a||125d?(a.sortIndex=c,f(t,a),null===h(r)&&a===h(t)&&(B?(E(L),L=-1):B=!0,K(H,c-d))):(a.sortIndex=e,f(r,a),A||z||(A=!0,I(J)));return a};\nexports.unstable_shouldYield=M;exports.unstable_wrapCallback=function(a){var b=y;return function(){var c=y;y=b;try{return a.apply(this,arguments)}finally{y=c}}};\n","'use strict';\n\nif (process.env.NODE_ENV === 'production') {\n module.exports = require('./cjs/scheduler.production.min.js');\n} else {\n module.exports = require('./cjs/scheduler.development.js');\n}\n","/**\n * @license React\n * react.production.min.js\n *\n * Copyright (c) Facebook, Inc. and its affiliates.\n *\n * This source code is licensed under the MIT license found in the\n * LICENSE file in the root directory of this source tree.\n */\n'use strict';var l=Symbol.for(\"react.element\"),n=Symbol.for(\"react.portal\"),p=Symbol.for(\"react.fragment\"),q=Symbol.for(\"react.strict_mode\"),r=Symbol.for(\"react.profiler\"),t=Symbol.for(\"react.provider\"),u=Symbol.for(\"react.context\"),v=Symbol.for(\"react.forward_ref\"),w=Symbol.for(\"react.suspense\"),x=Symbol.for(\"react.memo\"),y=Symbol.for(\"react.lazy\"),z=Symbol.iterator;function A(a){if(null===a||\"object\"!==typeof a)return null;a=z&&a[z]||a[\"@@iterator\"];return\"function\"===typeof a?a:null}\nvar B={isMounted:function(){return!1},enqueueForceUpdate:function(){},enqueueReplaceState:function(){},enqueueSetState:function(){}},C=Object.assign,D={};function E(a,b,e){this.props=a;this.context=b;this.refs=D;this.updater=e||B}E.prototype.isReactComponent={};\nE.prototype.setState=function(a,b){if(\"object\"!==typeof a&&\"function\"!==typeof a&&null!=a)throw Error(\"setState(...): takes an object of state variables to update or a function which returns an object of state variables.\");this.updater.enqueueSetState(this,a,b,\"setState\")};E.prototype.forceUpdate=function(a){this.updater.enqueueForceUpdate(this,a,\"forceUpdate\")};function F(){}F.prototype=E.prototype;function G(a,b,e){this.props=a;this.context=b;this.refs=D;this.updater=e||B}var H=G.prototype=new F;\nH.constructor=G;C(H,E.prototype);H.isPureReactComponent=!0;var I=Array.isArray,J=Object.prototype.hasOwnProperty,K={current:null},L={key:!0,ref:!0,__self:!0,__source:!0};\nfunction M(a,b,e){var d,c={},k=null,h=null;if(null!=b)for(d in void 0!==b.ref&&(h=b.ref),void 0!==b.key&&(k=\"\"+b.key),b)J.call(b,d)&&!L.hasOwnProperty(d)&&(c[d]=b[d]);var g=arguments.length-2;if(1===g)c.children=e;else if(1 {forum.forum_desc} \n Please Note: The HPCC Systems forums have moved to Stack Overflow.\n We invite you to post your questions on Stack Overflow utilizing the tag hpcc-ecl.\n HPCC Systems - HPCC Community Forum - hpccsystems.comForums
\r\n \r\n \r\n
\r\n >}\r\n \r\n \r\n \r\n \r\n {forums.filter(forum => forum.forum_posts > 0).map(forum => (\r\n Title \r\n Last Posted \r\n \r\n \r\n ))}\r\n \r\n \r\n {forum.forum_name}\r\n \r\n \r\n {new Date(forum.forum_last_post_time).toLocaleString()}\r\n \r\n \r\n Forums\r\n » \r\n {forumName !== \"\" ? forumName : \"Topics\"}\r\n
\r\n \r\n \r\n
\r\n >}\r\n \r\n \r\n \r\n \r\n {topics.map(topic => (\r\n Title \r\n Last Posted \r\n \r\n \r\n ))}\r\n \r\n \r\n {topic.topic_title}\r\n \r\n \r\n {new Date(topic.topic_time).toLocaleString()}\r\n \r\n The new ECL Playground allows you to access and execute self-contained ECL code on your HPCC system without the use of any other tools.
\\nNew Memory management provides significant performance improvements when sorting variable-length records.
\\nAn updated set of Client Tools includes compatibility for Mac.
\\nAnd includes updates and enhancements from the prior version.
\\n\\nDownload: http://hpccsystems.com/download/free-community-edition\", \"post_time\": \"2012-07-12 19:51:17\" },\n\t{ \"post_id\": 1548, \"topic_id\": 49, \"forum_id\": 7, \"post_subject\": \"Re: Community Edition\", \"username\": \"HPCC Staff\", \"post_text\": \"The HPCC Systems Community Edition 3.6.2 release is now available and includes the following changes:\\n\\n• Enhanced security including full LDAP support \\n• Improvements to ECL IDE & Client Tools \\n• Updates to Packaged Documentation & Graph Control \\n• Includes latest HPCC VM Image \\n• And much more - visit http://hpccsystems.com/download/free-co ... r-platform\", \"post_time\": \"2012-04-23 12:53:13\" },\n\t{ \"post_id\": 1440, \"topic_id\": 49, \"forum_id\": 7, \"post_subject\": \"Community Edition\", \"username\": \"HPCC Staff\", \"post_text\": \"A new Community Edition 3.6.2.4rc is now available. \\n\\nDownload at http://hpccsystems.com/download/release-candidates\\n\\nFor VM: http://hpccsystems.com/download/hpcc-vm-image/beta\", \"post_time\": \"2012-04-04 20:39:12\" },\n\t{ \"post_id\": 663, \"topic_id\": 49, \"forum_id\": 7, \"post_subject\": \"HPCC Community Edition 3.4\", \"username\": \"HPCC Staff\", \"post_text\": \"The latest HPCC community edition 3.4 is now available.\\n\\nIncludes custom output visualization capabilities and other updates and bug fixes. \\n\\nhttp://hpccsystems.com/download/free-co ... r-platform\", \"post_time\": \"2011-11-29 22:33:21\" },\n\t{ \"post_id\": 592, \"topic_id\": 49, \"forum_id\": 7, \"post_subject\": \"HPCC 3.4 Release Candidate 1 Now Available\", \"username\": \"HPCC Staff\", \"post_text\": \"The latest HPCC community edition, 3.4 Release Candidate 1, has been released.\\n\\nIncludes updates and bug fixes, including but not limited to, support for additional distributions (Ubuntu 11.10 & Debian "Lenny"), EE compatibility, Roxie queue support, new ECL command line, date support in the standard ECL library and more.\\n\\nDownload at http://hpccsystems.com/download/free-community-edition\", \"post_time\": \"2011-11-11 15:05:28\" },\n\t{ \"post_id\": 449, \"topic_id\": 49, \"forum_id\": 7, \"post_subject\": \"Latest Community Edition\", \"username\": \"HPCC Staff\", \"post_text\": \"The latest HPCC Community Edition, version 3.2.2.1 has been released. \\n\\nDownload at http://hpccsystems.com/download/free-community-edition\", \"post_time\": \"2011-10-12 21:37:13\" },\n\t{ \"post_id\": 241, \"topic_id\": 49, \"forum_id\": 7, \"post_subject\": \"UPDATE: Community Edition\", \"username\": \"HPCC Staff\", \"post_text\": \"The free HPCC Community Edition 3.0.4 including support for OpenSUSE is now available! Download at http://hpccsystems.com/download/free-community-edition\", \"post_time\": \"2011-08-08 15:23:12\" },\n\t{ \"post_id\": 182, \"topic_id\": 49, \"forum_id\": 7, \"post_subject\": \"UPDATE: Free Community Edition\", \"username\": \"HPCC Staff\", \"post_text\": \"The latest HPCC Community Edition, version 3.0.3, has been released. Now supporting Ubuntu 11.04. Download today:\\n\\nhttp://hpccsystems.com/download/free-community-edition\", \"post_time\": \"2011-07-21 21:44:14\" },\n\t{ \"post_id\": 150, \"topic_id\": 49, \"forum_id\": 7, \"post_subject\": \"Update: Community Edition\", \"username\": \"HPCC Staff\", \"post_text\": \"The latest HPCC Community Edition has been released! Version 3.0.2 now supports Ubuntu. Download today:\\n\\nhttp://hpccsystems.com/download/free-community-edition\", \"post_time\": \"2011-07-14 12:33:22\" },\n\t{ \"post_id\": 107, \"topic_id\": 49, \"forum_id\": 7, \"post_subject\": \"UPDATE: Community Edition\", \"username\": \"HPCC Staff\", \"post_text\": \"The free Community Edition now supports RedHat 5!\", \"post_time\": \"2011-07-07 15:11:59\" },\n\t{ \"post_id\": 88, \"topic_id\": 49, \"forum_id\": 7, \"post_subject\": \"Community Edition\", \"username\": \"HPCC Staff\", \"post_text\": \"Today we released our first set of binaries for the free Community Edition. This first phase includes binaries for CentOS.\\n\\nCheck out the Download page for documentation and instructions:\\nhttp://hpccsystems.com/download/free-community-edition\", \"post_time\": \"2011-06-29 19:58:30\" },\n\t{ \"post_id\": 3023, \"topic_id\": 62, \"forum_id\": 7, \"post_subject\": \"Re: What's the HPCC feature roadmap?\", \"username\": \"HPCC Staff\", \"post_text\": \"You can view the latest roadmap at the link below. If interested in assisting with any of these items, please contact us. \\n\\nhttp://cdn.hpccsystems.com/pdf/HPCC_Project_Roadmap.pdf\", \"post_time\": \"2012-12-18 18:52:57\" },\n\t{ \"post_id\": 288, \"topic_id\": 62, \"forum_id\": 7, \"post_subject\": \"Re: What's the HPCC feature roadmap?\", \"username\": \"HPCC Staff\", \"post_text\": \"The source code will be made available in September 2011.\", \"post_time\": \"2011-08-23 13:13:21\" },\n\t{ \"post_id\": 281, \"topic_id\": 62, \"forum_id\": 7, \"post_subject\": \"Re: What's the HPCC feature roadmap?\", \"username\": \"agnonchik\", \"post_text\": \"When do you plan to publish the source code? Am I right, it's not available so far?\", \"post_time\": \"2011-08-22 12:51:05\" },\n\t{ \"post_id\": 169, \"topic_id\": 62, \"forum_id\": 7, \"post_subject\": \"Re: What's the HPCC feature roadmap?\", \"username\": \"HPCC Staff\", \"post_text\": \"We are working on this document and targeting to have a draft by early 4Q. Suggestions and input for new features are welcome. Thank you!\", \"post_time\": \"2011-07-19 12:34:47\" },\n\t{ \"post_id\": 146, \"topic_id\": 62, \"forum_id\": 7, \"post_subject\": \"What's the HPCC feature roadmap?\", \"username\": \"yunchen\", \"post_text\": \"Can you provide a list of high-level features that are in the pipeline to be implemented?\", \"post_time\": \"2011-07-13 22:39:10\" },\n\t{ \"post_id\": 170, \"topic_id\": 69, \"forum_id\": 7, \"post_subject\": \"Training Promotion\", \"username\": \"HPCC Staff\", \"post_text\": \"For a limited time, HPCC Systems is running a training promotion in San Francisco! Register for Intro to ECL and Intro to Thor classes starting August 16 and attend the Roxie training classes the following week free! Visit the link below for more information:\\n\\nhttp://hpccsystems.com/community/training-events/training/roxie-promo-aug\", \"post_time\": \"2011-07-19 12:43:58\" },\n\t{ \"post_id\": 290, \"topic_id\": 97, \"forum_id\": 7, \"post_subject\": \"Re: HPCC Meetup at Alphratta Aug 25th\", \"username\": \"bforeman\", \"post_text\": \"I feel very fortunate that I am in Alpharetta this week and have the opportunity to see this and meet with the HPCC community!\", \"post_time\": \"2011-08-25 14:40:40\" },\n\t{ \"post_id\": 289, \"topic_id\": 97, \"forum_id\": 7, \"post_subject\": \"HPCC Meetup at Alpharetta Aug 25th\", \"username\": \"HPCC Staff\", \"post_text\": \"Looking forward to seeing everyone at the HPCC Systems Meetup! There is still room to attend http://hpccsystems.com/community/traini ... alph-aug25\", \"post_time\": \"2011-08-23 17:46:42\" },\n\t{ \"post_id\": 591, \"topic_id\": 98, \"forum_id\": 7, \"post_subject\": \"Meetup schedule for remainder of 2011\", \"username\": \"HPCC Staff\", \"post_text\": \"We've had some great discussions at our meetups! Join us in these cities as we wrap up our meetup schedule in 2011. \\n\\nSeattle, WA\\nNov 16, 7:00pm\\nDr. John Holt will be a guest speaker at the HPC Supercomputing Group meetup.\\nhttp://www.meetup.com/HPC-GPU-Supercomp ... /35959982/\\n\\nStanford, CA\\nDec 5, 5:30pm\\nhttp://www.meetup.com/Big-Data-Processi ... /40545232/\\n\\nCheck back for more meetups being planned in 2012.\", \"post_time\": \"2011-11-10 15:27:53\" },\n\t{ \"post_id\": 373, \"topic_id\": 98, \"forum_id\": 7, \"post_subject\": \"Upcoming Meetups planned\", \"username\": \"HPCC Staff\", \"post_text\": \"HPCC Systems will be hosting more meetups!\\n\\nOct 5 / Washington DC: http://www.meetup.com/Big-Data-Processi ... /32041752/\\n\\nOct 25 / Atlanta GA: http://www.meetup.com/Big-Data-Processi ... /34292102/\\n\\nOct 27 / Boca Raton, FL: http://www.meetup.com/Big-Data-Processi ... /34940922/\\n\\nNov 2 / Israel: http://www.meetup.com/Big-Data-Processi ... /36950532/\\n\\nNov 8 / New York, NY: http://www.meetup.com/Big-Data-Processi ... /35344332/\\n\\nCheck back as we continue to add more in other cities.\", \"post_time\": \"2011-09-30 20:05:12\" },\n\t{ \"post_id\": 322, \"topic_id\": 98, \"forum_id\": 7, \"post_subject\": \"More Meetups scheduled\", \"username\": \"HPCC Staff\", \"post_text\": \"HPCC Systems will be hosting a series of meetups in Boca Raton FL, Dayton OH & Washington DC:\\n\\nBig Data Analytics for Health - Insights from the Healthcare Industry\\nSanta Clara: RSVP at http://www.meetup.com/Big-Data-Processi ... /30568431/\\n \\nA comparison of HPCC Systems & Hadoop \\nBoca Raton: RSVP at http://www.meetup.com/Big-Data-Processi ... /31410732/ \\n\\nDayton: RSVP at http://www.meetup.com/Big-Data-Processi ... /32151642/\\n\\nWashington DC: RSVP at http://www.meetup.com/Big-Data-Processi ... /32041752/\\n\\nCheck back as we continue to add more in other cities.\", \"post_time\": \"2011-09-07 17:39:24\" },\n\t{ \"post_id\": 294, \"topic_id\": 98, \"forum_id\": 7, \"post_subject\": \"re: Join our Meetup Group!\", \"username\": \"HPCC Staff\", \"post_text\": \"HPCC Systems is planning the next meetup in Boca Raton, FL!\\n\\nWhen: September 15, 5-7pm\\nWhere: Embassy Suites\\nWhat: HPCC Systems vs Hadoop\\n\\nFlavio Villanustre and Arjuna Chala will speak on the four key factors that differentiate HPCC from Hadoop, including ECL and Roxie. Open discussion to follow.\\n\\nFood & beverages provided. Don't miss it! RSVP today.\\nhttp://www.meetup.com/Big-Data-Processi ... /31410732/\", \"post_time\": \"2011-08-30 21:01:11\" },\n\t{ \"post_id\": 291, \"topic_id\": 98, \"forum_id\": 7, \"post_subject\": \"Join our Meetup Group!\", \"username\": \"HPCC Staff\", \"post_text\": \"We invite the community to join the Big Data Analytics and Processing (HPCC Systems) group on Meetup.com.\\n\\nWe'll be hosting (and attending) a series of Meetups around the country to bring business and technology professionals together to discuss big data challenges and solutions.\\n\\nJoin our group at the link below:\\nhttp://www.meetup.com/Big-Data-Processi ... C-Systems/\\n\\nUpcoming Meetups:\\nTODAY Thursday August 25 - Alpharetta, GA\\nThursday September 8 - Santa Clara, CA\", \"post_time\": \"2011-08-25 19:25:34\" },\n\t{ \"post_id\": 329, \"topic_id\": 110, \"forum_id\": 7, \"post_subject\": \"Source Code Now Available!\", \"username\": \"HPCC Staff\", \"post_text\": \"HPCC Systems from LexisNexis Releases Source Code For Developers To Help Process and Deliver Big Data Analytics \\n\\nRead the press release:\\nhttp://hpccsystems.com/about-us/press_c ... ase-090911\\n\\nAccess the source code:\\nhttps://github.com/hpcc-systems\", \"post_time\": \"2011-09-11 00:16:04\" },\n\t{ \"post_id\": 597, \"topic_id\": 159, \"forum_id\": 7, \"post_subject\": \"HPCC Systems is looking for beta testers on AWS!\", \"username\": \"HPCC Staff\", \"post_text\": \"Be among the first to test the HPCC platform on the cloud! HPCC Systems is looking for testers on Amazon Web Services. \\n\\nContact us via beta@hpccsystems.com if you are interested in participating. \\n\\nThank you!\", \"post_time\": \"2011-11-14 13:58:39\" },\n\t{ \"post_id\": 678, \"topic_id\": 180, \"forum_id\": 7, \"post_subject\": \"HPCC Systems Data Delivery Engine Now Available on AWS\", \"username\": \"HPCC Staff\", \"post_text\": \"HPCC Systems™ from LexisNexis® Risk Solutions is now providing its Thor Data Refinery Cluster, which is a Big Data delivery engine for Big Data processing, on the Amazon Web Services platform.\\n\\nMore at http://hpccsystems.com/about-us/press_c ... hor_on_aws\", \"post_time\": \"2011-11-30 14:47:59\" },\n\t{ \"post_id\": 748, \"topic_id\": 191, \"forum_id\": 7, \"post_subject\": \"HPCC Systems from LexisNexis Breaks World Record on Terasort\", \"username\": \"HPCC Staff\", \"post_text\": \"HPCC Systems 4 nodes cluster sorts 100 gigabytes in 98 seconds and is 25% faster than a 20 nodes Hadoop cluster. \\n\\nRead more at http://hpccsystems.com/about-us/press_c ... ark-121211\", \"post_time\": \"2011-12-13 14:07:46\" },\n\t{ \"post_id\": 951, \"topic_id\": 216, \"forum_id\": 7, \"post_subject\": \"HPCC Systems from LexisNexis Launches Open Source Machine Le\", \"username\": \"HPCC Staff\", \"post_text\": \"HPCC Systems™ from LexisNexis® Risk Solutions announced today that it has released a beta version for its new open source Machine Learning (ML) and Matrix processing algorithms to assist data scientists and developers with business intelligence and predictive analytics in Big Data related problems. The algorithms cover supervised and unsupervised learning, document and text analysis, statistics and probabilities, and general inductive inference related problems. For more information, visit: http://hpccsystems.com/ml\\n\\nRead the full press release:\\nhttp://hpccsystems.com/about-us/press_c ... s_01302012\", \"post_time\": \"2012-01-30 22:17:54\" },\n\t{ \"post_id\": 844, \"topic_id\": 216, \"forum_id\": 7, \"post_subject\": \"Introducing Machine Learning Library\", \"username\": \"HPCC Staff\", \"post_text\": \"Now available! An extensible library of fully parallel machine learning routines for the HPCC Platform; covering supervised and unsupervised learning, document and text analysis, statistics and probabilities, and general inductive inference related problems. \\n\\nLearn more! http://hpccsystems.com/ml\\n\\nAlso, visit the dedicated ML forums: viewforum.php?f=23\", \"post_time\": \"2012-01-17 21:26:29\" },\n\t{ \"post_id\": 971, \"topic_id\": 239, \"forum_id\": 7, \"post_subject\": \"Re: Meetup presentations\", \"username\": \"HPCC Staff\", \"post_text\": \"Thanks for reaching out to us and joining our meetup group! If we do not have the opportunity to record the meetup session, the slides of the presentation are usually available on the event page after the meetup takes place. You can access past events here: http://hpccsystems.com/community/traini ... ts/archive\", \"post_time\": \"2012-02-01 21:31:48\" },\n\t{ \"post_id\": 966, \"topic_id\": 239, \"forum_id\": 7, \"post_subject\": \"Meetup presentations\", \"username\": \"curtkohler\", \"post_text\": \"I was curious if there were any plans to start recording the presentations from the MeetUp sessions and make them available on the web. I've seen a couple of interesting ones, but haven't been near a location to actually be able to attend. Making a recording available (even if it was just something as simple as a recorded Webex of the slides and associated audio for a topic done outside of the actual meetup) would significantly expand HPCC's exposure.\\n\\nCurt\", \"post_time\": \"2012-02-01 17:48:21\" },\n\t{ \"post_id\": 1525, \"topic_id\": 340, \"forum_id\": 7, \"post_subject\": \"HPCC Systems is a featured speaker at Database Week\", \"username\": \"HPCC Staff\", \"post_text\": \"HPCC Systems will be the featured speaker on May 16 during Database Week in New York City.\\n\\nDr. Flavio Villanustre will speak on how the HPCC Systems platform and its declarative language, ECL, are leveraged for tackling Big Data and explain how ECL is to Big Data as SQL is to RDBMS, along with demos of our latest integration work with Pentaho Business Intelligence Suite, R & Hadoop. Flavio will also present a technical deep dive into the HPCC Systems architecture and its major components including Thor (data refinery engine) and Roxie (data delivery engine) and how it compares and integrates with other big data technologies like Hadoop. Flavio will also touch on the total cost of ownership (TCO) of big data analytics solutions and offer tips and recommendations for managing costs.\\n\\nMore details & RSVP: http://www.database-week.com/database/hpcc-big-data\", \"post_time\": \"2012-04-13 18:24:36\" },\n\t{ \"post_id\": 1782, \"topic_id\": 392, \"forum_id\": 7, \"post_subject\": \"HPCC Systems Newsletter\", \"username\": \"HPCC Staff\", \"post_text\": \"Check out the inaugural newsletter released today! Includes platform and developer updates and the latest news and events: \\n\\nhttp://solutions.lexisnexis.com/content ... etter_9683\", \"post_time\": \"2012-06-14 15:53:12\" },\n\t{ \"post_id\": 2327, \"topic_id\": 516, \"forum_id\": 7, \"post_subject\": \"Re: Issue Tracker\", \"username\": \"richardkchapman\", \"post_text\": \"Issues from GitHub have been migrated to the corresponding issue numbers in Jira.\", \"post_time\": \"2012-09-12 15:08:16\" },\n\t{ \"post_id\": 2326, \"topic_id\": 516, \"forum_id\": 7, \"post_subject\": \"Issue Tracker\", \"username\": \"HPCC Staff\", \"post_text\": \"Please note that issue tracking for the HPCC-Platform repository on GitHub has been migrated to JIRA - http://track.hpccsystems.com \\n\\nUse your GitHub ID and the password reset mechanism to log in. \\n\\nONLY the issues from the HPCC-Platform repository have been migrated at this time. Other GitHub repositories will continue to use the GitHub issue tracker for the time being.\", \"post_time\": \"2012-09-12 15:03:21\" },\n\t{ \"post_id\": 2897, \"topic_id\": 637, \"forum_id\": 7, \"post_subject\": \"Events\", \"username\": \"HPCC Staff\", \"post_text\": \"Here's your chance to catch HPCC Systems at an event before the end of 2012! \\n\\nDec 12 - ITPalooza - NSU, South Florida http://itpalooza.e2mktg.com/\\nLexisNexis is a Silver sponsor and will have a table to showcase our latest demos. Our very own Bob Foreman, Sr Trainer, will be speaking at 4pm: Managing Big Data with LexisNexis HPCC Systems and the ECL Language\\n\\nDec 13 - ICMLA - Boca Raton, FL http://www.icmla-conference.org/icmla12/\\nDr Edin Muharemagic, LexisNexis Architect and Data Scientist, will be delivering a presentation Dec 14: The HPCC Systems Machine Learning Framework - A Scalable and Extensible ML platform for Big Data\\n\\nIf you are in the area, come join the fun!\", \"post_time\": \"2012-11-27 22:10:23\" },\n\t{ \"post_id\": 3461, \"topic_id\": 769, \"forum_id\": 7, \"post_subject\": \"Latest HPCC Systems Roadmap\", \"username\": \"HPCC Staff\", \"post_text\": \"You can view the latest roadmap at the link below. This includes a list of features in the pipeline from now through 2014. If interested in assisting with any of these items, please contact us. \\n\\nhttp://cdn.hpccsystems.com/pdf/HPCC_Project_Roadmap.pdf\", \"post_time\": \"2013-02-14 14:26:32\" },\n\t{ \"post_id\": 7140, \"topic_id\": 972, \"forum_id\": 7, \"post_subject\": \"Re: Community Edition 4.0 now available\", \"username\": \"HPCC Staff\", \"post_text\": \"Ali, \\n\\nThe source code is indeed available in our GitHub repository. (Our latest gold version is 5.0.6.):\\nhttps://github.com/hpcc-systems\\n\\nYou can view all the available releases here:\\nhttp://hpccsystems.com/download/free-co ... r-platform\\n\\nOur next release is 5.2 and a Release Candidate is available here:\\nhttp://hpccsystems.com/download/release-candidates\\n\\nYou can read about the new features and enhancements expected in the 5.2 version in our Blog:\\nhttp://hpccsystems.com/blog/whats-comin ... systems-52\\n\\nThank you for your interest!\", \"post_time\": \"2015-03-14 13:02:04\" },\n\t{ \"post_id\": 7139, \"topic_id\": 972, \"forum_id\": 7, \"post_subject\": \"Re: Community Edition 4.0 now available\", \"username\": \"Haider05\", \"post_text\": \"When do you plan to publish the source code? Am I right, it's not available so far???\\n\\n\\n\\nAli\", \"post_time\": \"2015-03-14 06:45:43\" },\n\t{ \"post_id\": 4512, \"topic_id\": 972, \"forum_id\": 7, \"post_subject\": \"Suggestions.\", \"username\": \"Lotus\", \"post_text\": \"1,Provide more detailed docs about architecture and it's implementation\\n\\nYou guys provided some docs,and few of them talk about architecture:what components exists primarily,but they can be taken as brief introductions only.What we really want to get are tow docs about:1,calculation model introduction,like google's paper's did;2,implementation details, like chapters 1~8 of <<Hadoop:The Definitive Guide>> did.If these cost time.would you please provide two detailed docs about 1, Detailed component's functions(Some thing can be found on wp_introduction_HPCC.pdf,but it is too simple) introduction; 2, Typical work-flow introduction:how and where data is prepared,distributed and computed and collected,by who and what format.These two will help CIO/CTOs to make decision from deeper level.\\n\\n\\n 2,Don't take too much energy on java.\\n\\nJava can decrease development cost but increase running cost.For not so large system,it is worth using it,but for larges,cost equation: x*RC+y*DC=TC(here,RC=running cost,dc=develop cost,tc=total cost,x=server count,y=developer count) changed.So when we start our own business,we try our best to de-javaing,use golang(it is growing quickly) as server side language and try different computing frameworks.Most of our members have >8 years experience on java,and after one week,we are all quite comfortable with ecl,so client language is a problem,but not so big. By the way,you can't use java to beat a java based ecosystem, but better focusing on you own strengths:better performance,make it "more" better.And maturity,your IDE,configuration and management tools are quite good.\\n\\n\\n2,No (dedicated) docs to talk about how to develop plugins for HPCC.\\n\\nIf you want to an ecosystem,you need plugins.So if possible,provide a good doc on plugin development.\\n\\n\\n3,Provide html based doc.\", \"post_time\": \"2013-08-30 01:13:32\" },\n\t{ \"post_id\": 4338, \"topic_id\": 972, \"forum_id\": 7, \"post_subject\": \"Community Edition 4.0 now available\", \"username\": \"HPCC Staff\", \"post_text\": \"http://hpccsystems.com/download/free-community-edition\\n\\nThe HPCC Systems Community Edition 4.0 includes many features and enhancements such as: \\nECL Language improvements including DICTIONARY - an efficient in-memory lookup from keys to values that is useful for implementing lookup tables, associative arrays, and other purposes; \\n\\nThe ability to embed Python, Javascript and R code within ECL, and to call external Java code simply from ECL programs; \\n\\nEclipse Language Plugin gold version which adds ECL awareness and HPCC Systems Platform integration to the Eclipse IDE allowing the user to manage the entire ECL life cycle from remotely writing and syntax checking ECL to submitting and monitoring ECL Workunits on the HPCC Systems Platform all from within the Eclipse IDE; \\n\\nRoxie package file improvements and additional functionality to assist in the support of Roxie queries;\\n\\nBundle support which is a framework for creating and managing installable ECL code components; \\n\\nTechnical preview of the new ECL Watch that uses the latest web technologies to deliver a simpler, more consistent user experience and provides a solid base and easier development life cycle for future web based user interfaces; \\n\\nMachine Learning improvements made to the ML.Regression library containing a new version of linear regression that uses the BLAS libraries for the matrix calculations; \\n\\nRoxie now supports direct access to queries via JSON (in addition to SOAP) adding the ability to call Roxie queries using standard JSON over HTTP as an alternative to SOAP;\\n\\nAnd more! A comprehensive list of changes is available in the Release Notes:\\nhttp://hpccsystems.com/download/free-co ... imitations\", \"post_time\": \"2013-07-19 13:21:45\" },\n\t{ \"post_id\": 4612, \"topic_id\": 1038, \"forum_id\": 7, \"post_subject\": \"Latest CE now available for download\", \"username\": \"HPCC Staff\", \"post_text\": \"Community Edition 4.0.2-2 is now available:\\nhttp://hpccsystems.com/download/free-co ... r-platform\\n\\nView release notes:\\nhttp://hpccsystems.com/download/free-co ... imitations\\n\\nThere is also now a 32bit and 64bit VM Image:\\nhttp://hpccsystems.com/download/hpcc-vm-image\\n\\nDid you know ECL is now listed on Rosetta Code? Help populate the page with some examples:\\nhttp://rosettacode.org/wiki/ECL\", \"post_time\": \"2013-09-19 15:48:57\" },\n\t{ \"post_id\": 4945, \"topic_id\": 1117, \"forum_id\": 7, \"post_subject\": \"Community Edition 4.2.0 ready for download\", \"username\": \"HPCC Staff\", \"post_text\": \"The latest version of HPCC Systems Community Edition 4.2.0-1 is now available. Download: http://hpccsystems.com/download/free-community-edition\\n\\nThis release includes many new features and improvements.\\n\\n-> New ECL Watch further improvements (Technical Preview)\\nThe highlights in 4.2 are:\\n•\\tQueries pages now allows Query manipulation (Suspend/Unsuspend, Activate/Deactivate, Delete, etc) \\n•\\tMultiple Fixed Spray options added (recfmv, recvmb, variable) \\n•\\tPublish Workunit now supports specifying a Remote Dali (allowing use of a foreign Dali for DFS resolution)\\n•\\tNew ZAP (Zipped Analysis Package) button in ECL Watch. This button is on the workunits details page. When users experience problems that require investigation by the developers, pressing this button collects any log file and workunit information and zips it up so it can be emailed or added to a JIRA ticket. Users complete a form giving details of the circumstances leading up to the discovery of the problem. This feature is available in both the old and new ECL Watch. https://track.hpccsystems.com/browse/HPCC-7984\\n\\n-> ECL Visualisations\\n•\\tA visualisations bundle is now available on github. This cellFormatter bundle makes use of the ability (in 4.0.2 and later) to display HTML and javascript code in eclwatch results pages, in order to simplify the display of various types of charts, graphs, and other diagrams in order to represent the relationships between data values in results. https://track.hpccsystems.com/browse/HPCC-7985\\n\\n•\\tEmbeddable web pages and manifest wildcard support. Users can use wildcards in the manifest to easily include a set of resources. It provides a URL format to use via WsWorkunits to access those resources directly from a browser. The resources can reference each other using relative paths. So for instance you can access a web page that references embedded javascript, css, jpg, etc. https://track.hpccsystems.com/browse/HPCC-10203\\n\\n-> Lookup join improvements\\n•\\tSmart Join - This attempts to perform an in-memory LOOKUP join. If there is insufficient memory, smart join will automatically ensure that both sides are efficiently distributed and attempt to perform a LOCAL LOOKUP join. If there is still insufficient memory, smart join will become a LOCAL HASH join which is not limited by memory. https://track.hpccsystems.com/browse/HPCC-8245\\n•\\tOptimized Many Lookup - In 4.2, a MANY LOOKUP join with a high number of matching right-hand-side key values is much faster and more efficient. https://track.hpccsystems.com/browse/HPCC-9244 \\n\\n-> Group join\\n•\\tThe GROUP JOIN syntax allows you to efficiently join two datasets on one condition, but have the result grouped by another condition. This is useful for efficiently solving some relationship matching problems. https://track.hpccsystems.com/browse/HPCC-10147\\n\\n-> Persist functionality improvements\\n•\\tPersist file per code hash – In 4.2, more than one user using older or newer versions of the same persist can both rebuild and co-exist independently\\n•\\tImprovements to the handling of the expiry of persist files – In 4.2 we now track the last access to a file and expire based on that.\\n•\\tRefactoring of the persist handling – in 4.2, intermediate persist files can be safely deleted without causing persists that are dependent on them to rebuild unnecessarily.\\nhttps://track.hpccsystems.com/browse/HPCC-9985\\n\\n-> ECL Plugin for Eclipse new features\\nThe highlights for 4.2 are:\\n•\\tAbility to submit/compile from editor to specific target\\n•\\tFiltered “history” workunit tree (available in 4.2 gold)\\n\\n-> Package map improvements \\n•\\tUsers can now specify which environment and or cluster to copy data from, from within the packagemap. https://track.hpccsystems.com/browse/HPCC-9916\\n•\\tIsolates packagemaps across target clusters by default making it easier to manage in a multi-cluster environment. https://track.hpccsystems.com/browse/HPCC-9869\\n\\n-> Roxie Monitoring Tool (Ganglia) Technical Preview\\nThe initial release leverages Ganglia, an open-source, scalable, distributed monitoring system, to produce a graphical view of a Roxie cluster's servers. With this tool, users can:\\n•\\tSee System information at a glance (such as, Uptimes, CPU Usage, Free Memory, Load, Network Activity, etc.) \\n•\\tView a grid of Roxie clusters\\n•\\tExamine Roxie Metrics (such as, Query Count, Activities, Disk Reads, Retires, Restarts etc)\\n•\\tLook at an overview of a cluster and drilldown to see information about a single node in that cluster \\n•\\tKeep and interrogate a running historical record of all metrics> Use this to help with capacity planning, disaster recovery planning, troubleshooting, and preventative maintenance.\\n\\nAnd more! Be sure to review the Release Notes and Known Limitations:\\nhttp://hpccsystems.com/download/free-co ... imitations\", \"post_time\": \"2013-11-14 17:18:42\" },\n\t{ \"post_id\": 5072, \"topic_id\": 1154, \"forum_id\": 7, \"post_subject\": \"Happy Holidays from HPCC Systems!\", \"username\": \"HPCC Staff\", \"post_text\": \"Check out the latest blog post from Flavio Villanustre as he recounts accomplishments from 2013 and what to expect in the new year. \\n\\nhttp://hpccsystems.com/blog/another-year-almost-gone\\n\\nPlease keep in mind during the holiday season, there might be a slight delay in response in the forums.\", \"post_time\": \"2013-12-19 22:17:31\" },\n\t{ \"post_id\": 5402, \"topic_id\": 1246, \"forum_id\": 7, \"post_subject\": \"HPCC Systems Platform 4.2.2 now available\", \"username\": \"HPCC Staff\", \"post_text\": \"This is a maintenance release fixing a number of issues. Use the below links to download the latest 4.2.2 version and other information relating to this release: \\n\\nDownload HPCC Systems 4.2.2: http://hpccsystems.com/download/free-co ... r-platform\\n\\nView Release notes and known limitations for 4.2.2: http://hpccsystems.com/download/free-co ... imitations\\n\\nSee list of issues resolved in 4.2.2: https://track.hpccsystems.com/issues/?filter=11207\", \"post_time\": \"2014-03-20 13:53:56\" },\n\t{ \"post_id\": 6030, \"topic_id\": 1382, \"forum_id\": 7, \"post_subject\": \"HPCC Systems Platform 5.0\", \"username\": \"HPCC Staff\", \"post_text\": \"HPCC Systems 5.0 is now available!\\n\\n•\\tHPCC Systems downloads page: http://hpccsystems.com/download/free-co ... r-platform. \\n•\\tHPCC Systems 5.0 Red Book pages: https://wiki.hpccsystems.com/display/hp ... elease+5.0\\n•\\tRelease notes and known limitations: http://hpccsystems.com/download/free-co ... imitations \\n•\\tList of issues fixed in 5.0: https://track.hpccsystems.com/issues/?filter=11328 \\n•\\tHPCC Systems Community Forums: http://hpccsystems.com/bb/\\n\\nHPCC Systems 5.0 is major release including the following significant new features and improvements:\\n\\n•\\tFirst internationalised release. We have provided the mechanism for translating ECL Watch into your native language and have also included translations for some languages including including Chinese, Spanish, Hungarian, Serbian and Brazilian Portugese.\\n\\n•\\tECL Watch redesign. You will already have seen evidence of the facelift in the technical preview released previously. The new ECL Watch uses modern techniques such as in place viewing so you can view related pages more easily. So for example, you can now view Workunit outputs, graphs, timers etc without having to navigate away from the Workunit Details page. The graph view has also been improved to handle larger graphs more efficiently and provide greater control over the details level. Spraying is now a much more efficient process. Where previously each spray had to be actioned separately, you can now spray multiple files in a single action saving time and effort. If you aren’t sure what type of spray to use or need more information to select the correct spray options, use the new hex preview mode which shows the contents of a file on the landing zone in hex for the first 32k. Read more at http://hpccsystems.com/bb/viewtopic.php?f=17&t=1383\\n\\n•\\tVisualizations of results for a number of chart/graph types including bar, scatter, pie, histogram etc. The ECL Playground includes two samples demonstrating the visualization of results as a bar chart or choropleth. You simply run the examples and click the chart button on completion to see the visualisations. Another way of viewing visualizations in ECL Watch is to include additional resources (such as an index web page) in your ECL code via the enhanced manifest mechanism, which can be viewed from within the workunit or a published query. Helper functions are also provided to facilitate calling Roxie from inside the web pages. \\n\\n•\\tEmbedded language features including the ability to pass records and datasets into embedded functions, and to return them as results from embedded functions. You can also pass records and datasets into embedded functions, and return them as results from embedded functions. Moreover, only those records which are actually required will be evaluated. We’ve also extended the list of embedded languages to include some interesting new ones including MySQL.\\n\\n•\\tExtended Roxie capabilities including the ability to read and write persist files and launch Thor graphs. To try this out, deploy a cluster that contains both a Roxie and Thor and submit a job. The parts that require Thor will be executed on Thor.\\n\\n•\\tRoxie query managements GUI features. We have extended ECL Watch to include a number of features for the management of Roxie queries which were previously available only from the command line. There are also more options available when publishing a query so, for example, you can now specify which cluster you want to get the files from and you can also get files from a remote DALI. You can also set a priority level for your published query and there is a comments field for recording information about a query you may want to refer back to in the future. In this release, you also get more information displayed about your deployed queries within ECL Watch. The queries page now shows a list of files used by the query and also indicates the reason why a query has been suspended. You can also clearly see which files and super files a query uses.\\n\\n•\\tErrors and warnings. The ECLCC compiler has always carried out static analysis on your code to spot potentially incorrect ECL code and would generate a warning if any was detected. Sometimes this has resulted in a large numbers of warnings which may be ignored and new/significant warnings might be missed as a result. In 5.0 we have added a new feature which enables you to customize the warning settings to indicate which warnings should be shown as warnings, which should be shown as fatal errors and which should be suppressed altogether. \\n\\n•\\tCode Generator improvements focussing on generating smaller C++ code which reduces the compile time for large queries.\\n\\n•\\tThor performance improvements which try to minimise spilling and increase the amount of row data that is retained in memory so that query throughput times are improved.\\n\\nAlso take note of the 3rd party plugins available for use with HPCC 5.0:\\n\\n•\\tHPCC Juju Charm. If you want to get going quickly using HPCC and you are using either Ubuntu 12.04 (Precise) or 13.10 (Saucy), then try using our new HPCC Juju charm. The charm sets up your VMs and installs the HPCC software really easily and quickly. You get a single node system containing both a thor and a roxie cluster, which you can then customize to suit your requirements. So for example, using the HPPC Charm you can add as many nodes as you need or using the HPCC Configuration Manager you can completely reconfigure the clusters according to your requirements. What’s more, you can set up an HPCC Platform on either a local machine (LXC) or on the Amazon Web Services Cloud. \\nMore information and download: http://manage.jujucharms.com/~xwang2713/precise/hpcc \\n\\n\\n•\\tNagios integration into HPCC. This technical preview harnesses Nagios’s monitoring and notification systems to help identify and resolve infrastructure problems before the affect critical processes. Nagios hardware notifications can help keep your system highly available and alerts can assist in pre-emptive maintenance for processes which are down or behaving outside expected parameters to ensure system stability, reliability, and uptime. Scripts and tools are now provided to extract HPCC Platform system metrics and easily integrate that data into Nagios. \\nDownload: http://hpccsystems.com/download/free-co ... dition/all \\nMore information: http://hpccsystems.com/download/docs/hp ... al-preview\\n\\n•\\tWsSQL Web Service (Coming to the HPCC Systems website end July 2014). This provides a mechanism for accessing HPCC data and published queries via SQL queries, supporting both ad-hoc and prepared queries. It provides an easy way to create read only connections from any SQL connection type to HPCC. There are a number of interesting benefits to be gained from using this service enabling you to harness the full power of HPCC ‘under the covers’. Your SQL requests generate ECL code which is submitted, compiled and executed on your target cluster. This means you can use the data processing features of HPCC without needing to learn and write ECL code. Moreover, you also get to take advantage of the Automatic Index fetching capabilities which produce faster data fetches. This service also supports simple SQL SELECT or CALL syntax so you can access HPCC data files as DB tables and published queries as DB Stored Procedures.\", \"post_time\": \"2014-07-11 13:04:37\" },\n\t{ \"post_id\": 6403, \"topic_id\": 1468, \"forum_id\": 7, \"post_subject\": \"HPCC Systems Platform 5.0.2 and 4.2.10 available\", \"username\": \"HPCC Staff\", \"post_text\": \"Use the below links to download the latest version and other information relating to this release: \\n\\nDownload HPCC Systems Platform: http://hpccsystems.com/download/free-community-edition\\n\\nView Release notes and known limitations: http://hpccsystems.com/download/free-co ... imitations\", \"post_time\": \"2014-10-04 11:52:42\" },\n\t{ \"post_id\": 6404, \"topic_id\": 1469, \"forum_id\": 7, \"post_subject\": \"2014 HPCC Systems Engineering Summit\", \"username\": \"HPCC Staff\", \"post_text\": \"The purpose of the Summit is to gather our HPCC engineers, data scientists and technology team to share knowledge and future roadmap plans on the HPCC Systems platform.\\n\\nThis year, one full day on October 7 is dedicated to showcase our community and have our corporate and academic partners present their HPCC Systems use cases, research projects and share their experience. This includes speakers from Comrise, Archway Health Advisors, RNET Technologies, North Carolina State University, Clemson University, Florida Atlantic University and more.\\n\\nThe event is sold out but will be live-streamed on the HPCC Systems YouTube channel. For more information on the agenda and sessions times visit:\\nhttp://hpccsystems.com/community/traini ... ummit-2014\", \"post_time\": \"2014-10-04 12:01:02\" },\n\t{ \"post_id\": 6556, \"topic_id\": 1500, \"forum_id\": 7, \"post_subject\": \"New HPCC Systems Video\", \"username\": \"HPCC Staff\", \"post_text\": \"Check out this new video giving an introduction of HPCC Systems and the key components. It also includes our new HPCC Systems design we have incorporated into our communications and across our social media channels.\\n\\nhttp://hpccsystems.com/community/traini ... troduction\", \"post_time\": \"2014-11-03 18:13:19\" },\n\t{ \"post_id\": 8216, \"topic_id\": 1639, \"forum_id\": 7, \"post_subject\": \"Recap: 2015 HPCC Systems Summit Community Day\", \"username\": \"HPCC Staff\", \"post_text\": \"We had an amazing day with our speakers, guests, attendees and livestream audience! \\n\\nYou can catch the recordings of each of the four tracks here: https://hpccsystems.com/community/event ... ummit-2015\\n\\nThe full agenda including session descriptions and speaker bios are here:\\nhttp://cdn.hpccsystems.com/pdf/2015_HPC ... Agenda.pdf\\n\\nThank you for participating!\", \"post_time\": \"2015-10-01 21:14:12\" },\n\t{ \"post_id\": 8208, \"topic_id\": 1639, \"forum_id\": 7, \"post_subject\": \"2015 HPCC Systems Summit Community Day\", \"username\": \"HPCC Staff\", \"post_text\": \"We are all set for the 2015 HPCC Systems Summit Community Day! Tune in on our HPCC Systems YouTube channel on September 29, 2015. We have a fantastic lineup of sessions and speakers from academia and industry. \\n\\nView Agenda: http://cdn.hpccsystems.com/pdf/2015_HPC ... mit_LS.pdf\\n\\nWatch the Livestream: http://bit.ly/2015hpccsummit\\n\\nFollow the event on Twitter: https://twitter.com/hpccsystems Hashtag #HPCCSummit\", \"post_time\": \"2015-09-25 23:42:39\" },\n\t{ \"post_id\": 7281, \"topic_id\": 1670, \"forum_id\": 7, \"post_subject\": \"ECL behind proxy\", \"username\": \"rgoel_0112\", \"post_text\": \"Hi there.I am behind a proxy server and not able to connect to use Ecl ide as it requires login.It is giving error1003:not able to communicate with server.\", \"post_time\": \"2015-04-07 16:56:59\" },\n\t{ \"post_id\": 7816, \"topic_id\": 1781, \"forum_id\": 7, \"post_subject\": \"Happy 4th Anniversary HPCC Systems Open Source\", \"username\": \"lchapman\", \"post_text\": \"Come and see why we're celebrating. Read all about what we have achieved and get a sneak preview of HPCC Systems 5.4.0 from my blog here: http://hpccsystems.com/blog/celebrating ... ce-project\", \"post_time\": \"2015-06-24 09:32:19\" },\n\t{ \"post_id\": 8268, \"topic_id\": 1922, \"forum_id\": 7, \"post_subject\": \"Google opensource blogspot features HPCC Systems this week\", \"username\": \"lchapman\", \"post_text\": \"Google are featuring a GSoC organization every week on their open source blogspot. This week it's HPCC Systems® turn. Read our blog about how GSoC 2015 went for us here: http://google-opensource.blogspot.com/\", \"post_time\": \"2015-10-10 06:59:33\" },\n\t{ \"post_id\": 8300, \"topic_id\": 1934, \"forum_id\": 7, \"post_subject\": \"Wanted - Project Ideas for Students to complete in 2016!\", \"username\": \"lchapman\", \"post_text\": \"It's been announced! The closing date for organisations to apply to participate in Google Summer of Code 2016 is 19th February 2016.\\n\\nWe have already started compiling a list of project ideas. I want to reach out to everyone in our community for ideas to add to this list: https://wiki.hpccsystems.com/x/yIBc\\n\\nIs there a cool feature you think is missing or would like to see implemented in HPCC Systems? Perhaps there is a Machine Learning statistic that you would like to see added to our ML Library? More visualisations you would like HPCC Systems to provide? Or perhaps you need more IoT enabling features?\\n\\nThe chances are, if it will benefit you, it will benefit others. So tell us about it. You can either post here, email Lorraine.Chapman@lexisnexis.com, or create a new issue in our Community Issue Tracker (please say if it is in response to this call out): https://track.hpccsystems.com/secure/Dashboard.jspa. \\n\\nWhile we are looking for projects for students to complete over a 12 weeks period next summer, we'd love to hear about any ideas you have. So get in touch and have your say! Go and see what our students of 2015 contributed to the HPCC Systems project: https://wiki.hpccsystems.com/x/g4BR \\n\\nLorraine Chapman\", \"post_time\": \"2015-10-15 12:02:15\" },\n\t{ \"post_id\": 9012, \"topic_id\": 2102, \"forum_id\": 7, \"post_subject\": \"2015 Review of major new features in HPCC Systems\", \"username\": \"lchapman\", \"post_text\": \"Happy New Year!\\n\\nWe're celebrating what was achieved in 2015 in this new blog: https://hpccsystems.com/resources/blog/lchapman/review-hpcc-systems-major-new-features-added-2015\\n\\nRefresh your memory about what was included in 5.2.x and 5.4.x and get information about other useful resources available to support your use of HPCC Systems.\\n\\nWe're also looking forward to 2016 and the release of HPCC Systems 6.0.0. The beta is out there on the downloads page and you can read a blog about the features included here: https://hpccsystems.com/resources/blog/lchapman/hpcc-systems-beta-600-feature-details\\n\\nWatch out for further updates about 6.0.0 coming soon.\", \"post_time\": \"2016-01-06 13:38:42\" },\n\t{ \"post_id\": 9142, \"topic_id\": 2134, \"forum_id\": 7, \"post_subject\": \"HPCC Systems 6.0.0 Beta 2 is on its way!\", \"username\": \"lchapman\", \"post_text\": \"You may have taken a look at the new features previewed in the HPCC Systems Beta version released in September 2015. The second beta, containing more new features and enhancements, will be available from the downloads page in March.\\n\\nWant to know more about what's coming? Read the blogs:\\n\\nHPCC Systems 6.0.0 Beta 1 feature details\\n\\nLook what's coming in HPCC Systems 6.0.0 Beta 2\", \"post_time\": \"2016-01-28 11:59:59\" },\n\t{ \"post_id\": 9166, \"topic_id\": 2140, \"forum_id\": 7, \"post_subject\": \"2016 HPCC Systems Summit Community Day\", \"username\": \"HPCC Staff\", \"post_text\": \"The call for presentations is now open for the 2016 HPCC Systems Engineering Summit Community Day!\\n\\nThis year the event will be held in Atlanta, GA on Wednesday October 12, 2016. We invite the community to participate and submit a speaking proposal to share their story or use case on how they use HPCC Systems.\\n\\nThe full announcement and details for submitting a speaking proposal can be found here:\\nhttps://hpccsystems.com/hpccsummit2016\", \"post_time\": \"2016-01-28 21:28:27\" },\n\t{ \"post_id\": 9316, \"topic_id\": 2186, \"forum_id\": 7, \"post_subject\": \"Students - Come and intern with us this summer!\", \"username\": \"lchapman\", \"post_text\": \"So we didn't get accepted for GSoC 2016, but hey, they accepted less than half the organisations that applied and spread the net wider for new open source organisations to get involved. We were lucky enough to be one of those in 2015 and we'll try our luck again in 2017.\\n\\nLast year, we also ran a summer intern program and it was such a success that we are going to do that again this year. So, if you've been looking at us already, keep on looking. We have some great projects available.\\n\\nI've written a blog about how you can get involved in the program so go and read it. It tells you what you need to know. If you have any questions, email lorraine.chapman@lexisnexis.com.\\n\\nhttps://hpccsystems.com/resources/blog/ ... d-find-out\", \"post_time\": \"2016-03-08 18:54:01\" },\n\t{ \"post_id\": 9374, \"topic_id\": 2206, \"forum_id\": 7, \"post_subject\": \"Revised deadline for non-machine learning intern proposals\", \"username\": \"lchapman\", \"post_text\": \"Good news students!\\n\\nThe HPCC Systems Summer Intern Program has extended the deadline for you to apply for non-machine learning projects! You now have a few extra weeks to get your proposal ready because the new deadline is Friday 15th April 2016.\\n\\nThere are a number of non-machine learning projects available including:\\n \\n
\\nGo and look at our list of projects here: https://wiki.hpccsystems.com/x/zYBc, there's bound to be something that catches your eye.\\n\\nYou can find out more about the HPCC Systems Summer Internship Program here: https://wiki.hpccsystems.com/x/HwBm\\n\\nThanks to all students who have already submitted proposals ahead of the deadline this Friday (25th March). Please note that after 25th March, we will not be accepting any more proposal for machine learning projects for summer 2016.\\n\\nQuestions? \\nEmail lorraine.chapman@lexisnexis.com\", \"post_time\": \"2016-03-22 12:24:08\" },\n\t{ \"post_id\": 9516, \"topic_id\": 2248, \"forum_id\": 7, \"post_subject\": \"Deadline for intern proposals is this Friday 15th April 7pm\", \"username\": \"lchapman\", \"post_text\": \"The deadline for submitting non-machine learning intern proposals is this Friday 15th April 7pm UTC.\\n\\nIf you haven't yet prepared your proposal, there is still time. Look what's on our list: https://wiki.hpccsystems.com/x/zYBc. Here's a taster...\\n\\n
\\n\\nFind out more about the HPCC Systems Summer Internship Program here: https://wiki.hpccsystems.com/x/HwBm\\n\\nThanks to all students who have already submitted proposals ahead of the deadline this Friday (15th April). You still have time to make changes right up to the last minute if you need to. Just make sure you send your final version to lorraine.chapman@lexisnexis.com before the deadline.\\n\\nWe have already reviewed the machine learning proposals and all students will be informed whether they have been successful by 22nd April.\\n\\nGood luck!\", \"post_time\": \"2016-04-12 11:32:10\" },\n\t{ \"post_id\": 9578, \"topic_id\": 2272, \"forum_id\": 7, \"post_subject\": \"HPCC Systems 6.0.0 RC3 is now available for download\", \"username\": \"lchapman\", \"post_text\": \"This release is now available for download on the website here: https://hpccsystems.com/download/hpcc-platform\\n\\nIn the View area, for Release, select Candidate. Then you can filter according to the Operating System you are using.\\n\\nIf you want to know what this release is all about, take a look at the two blogs which showcase the main features and improvements included in HPCC Systems 6.0.0:\\n
\\nYou can also watch a video of Flavio Villanustre and Lorraine Chapman talking about some of the main highlights here: https://hpccsystems.com/resources/blog/ ... 00-release\\n\\nDo let us know what you think about the new features. We're particularly interested in any feedback you have about performance improvements between your previous version and 6.0.0. Please email lorraine.chapman@lexisnexis.com with your findings. \\n\\nIf you find issues, we want to know about those too. Please use our community issue tracker to report your findings and we'll take a look: https://track.hpccsystems.com/secure/Dashboard.jspa\\n\\nWe expect the gold version of this release to be available towards the end of May. But don't wait until then to try it out. Get ahead of the game and take a look now.\", \"post_time\": \"2016-04-28 16:11:34\" },\n\t{ \"post_id\": 9742, \"topic_id\": 2318, \"forum_id\": 7, \"post_subject\": \"HPCC Systems 6.0.0 Gold now available for download\", \"username\": \"lchapman\", \"post_text\": \"HPCC Systems 6.0.0 Gold includes a number of major new features, performance enhancements and usability improvements. Read my new blog illustrating how we have adapted our system to use multicore technology to get significant performance improvements.\\n\\nGet the latest downloads and information here:\\n\\n\\n\\n\\n\\n\\n\\nHitting an issue? Let us know. Either discuss it in the Developer Forum or raise an issue using our Community Issue Tracker. \\n\\nWant a quick idea of what's included? Read the following:\\n\\n\\n\\n\\nOr watch a video of Flavio Villanustre and Lorraine Chapman discussing some of the highlights of this release.\\n\\nWe're also on FaceBook and Twitter. Follow us there to keep up to date with announcements.\", \"post_time\": \"2016-06-13 15:10:18\" },\n\t{ \"post_id\": 12143, \"topic_id\": 3013, \"forum_id\": 7, \"post_subject\": \"6.2.0 RC3 is now available\", \"username\": \"lchapman\", \"post_text\": \"This release is now available for download on the website here: https://hpccsystems.com/download/hpcc-platform\\n\\nIn the View area, for Release, select Candidate. Then you can filter according to the Operating System you are using.\\n\\nThe first in a series of blogs about this release can be found here: https://hpccsystems.com/resources/blog/lchapman/hpcc-systems-62x-here-whats-it-you\\nTo receive updates as more blogs in this series are posted, click on the RSS feeds icon in the top right hand corner of the main blog page, click on the Blog link and any others you want to follow.\\n\\nAmongst the new features added, we have also built on improvements made in 6.0.0. If you haven't already done so, take a look at the two blogs which showcase the main features and improvements included in HPCC Systems 6.0.0:\\nFeature highlights of HPCC Systems 6.0.0 Part 1\\nFeature highlights of HPCC Systems 6.0.0 Part 2\\n\\nWatch the developers talking about some of the major features in HPCC Systems 6.0.0 on Community Day at the 2016 HPCC Systems Engineering Summit. \\n\\nThis is a release candidate, so do let us know if you find issues, so we can respond as soon as possible for the benefit of all our users. Please use our community issue tracker to report your findings: https://track.hpccsystems.com/secure/Dashboard.jspa\\n\\nLook out for details about the gold version of this release towards the end of the year.\", \"post_time\": \"2016-10-19 12:09:44\" },\n\t{ \"post_id\": 12603, \"topic_id\": 3163, \"forum_id\": 7, \"post_subject\": \"Google Summer of Code 2017 has been announced\", \"username\": \"lchapman\", \"post_text\": \"HPCC systems will be applying to be an accepted organisation for GSoC 2017. Google have announced the timeline and our preparations are underway.\\n\\nCalling all HPCC Systems community members and users...\\nOur Ideas List is a work in progress. If you have any ideas for suitable projects, email lorraine.chapman@lexisnexis.com with details.\\n\\nCalling all students...\\nVisit our GSoC Wiki to take a look at our Ideas List. We will be updating it regularly, so keep calling back.\\n\\nWe were an accepted organisation for GSoC 2015. Read about the HPCC Systems projects students completed as part of the program in 2015.\\n\\nWe're looking forward to 2017 and hoping to take part in a program which provides such wonderful, collaborative opportunities for students and open source projects like HPCC Systems!\", \"post_time\": \"2016-10-27 17:56:36\" },\n\t{ \"post_id\": 12623, \"topic_id\": 3183, \"forum_id\": 7, \"post_subject\": \"Looking for an internship for summer 2017? Look no further!\", \"username\": \"lchapman\", \"post_text\": \"We know some of you are early birds and like to get your internship organised as soon as you can. If you are a student who aspires to be a software developer or data scientist and you're looking for summer internship, we want to hear from you.\\n\\nThe HPCC Systems intern program is a specialist program. To get accepted, you need to choose a project, and submit a proposal demonstrating how you would complete it and why you are the right person for the job.\\n\\nEach project has a mentor and we recommend that you submit a first draft of your proposal to the mentor to get some feedback, so you can improve it before you submit your final version to lorraine.chapman@lexisnexis.com. You have until Monday 3rd April 2017, but don't wait until the last minute. We are accepting proposals already so take a look and let us know if something catches your eye. \\n\\nWant to know more about HPCC Systems, our intern program and the projects? Look here:\\n\\n
\\n\\nMore blogs will be posted on the HPCC Systems website presenting the work students completed this summer. Here's the first one featuring Column Level Security on HPCC Systems.\\n\\nHave questions? Email lorraine.chapman@lexisnexis.com\", \"post_time\": \"2016-10-27 18:51:06\" },\n\t{ \"post_id\": 13333, \"topic_id\": 3363, \"forum_id\": 7, \"post_subject\": \"HPCC Systems November 2016 Developer Newsletter\", \"username\": \"HPCC Staff\", \"post_text\": \"In case you missed it, the latest HPCC Systems Developer Newsletter is now available! This edition includes information on our new workshop series - The Download Community Tech Talks, a recap on Community Day, student resources for the Internship programs, helpful ECL tips, upcoming events and more. \\n\\nNot on the distribution list? Sign up here.\", \"post_time\": \"2016-11-17 17:09:03\" },\n\t{ \"post_id\": 13903, \"topic_id\": 3523, \"forum_id\": 7, \"post_subject\": \"HPCC Systems 6.2.0 Gold is now available\", \"username\": \"lchapman\", \"post_text\": \"This release is now available for download on the website here: https://hpccsystems.com/download/hpcc-platform. In the View area, for Release, select Gold. Then you can filter according to the Operating System you are using.\\n\\nChangelogs are available here: https://hpccsystems.com/download/release-notes\\n\\nTo see a list of all available ECL bundles, follow the link below. The “supported” ones at the top are particularly interesting. Take note of the ML_Core and PBblas bundles (part of the refactoring which is taking place in the HPCC Systems Machine Learning Library) and the ‘Visualizer’: https://github.com/hpcc-systems/ecl-bundles. Both are new in HPCC Systems 6.2.0.\\n\\nThere are a number of blogs related to improvements included in HPCC Systems 6.2.0:\\n\\n
\\n\\nIf you come across any issues, let us know using our Community Issue Tracker.\", \"post_time\": \"2016-12-23 15:23:01\" },\n\t{ \"post_id\": 15693, \"topic_id\": 3563, \"forum_id\": 7, \"post_subject\": \"Registration Open! HPCC Systems Tech Talks, Episode 3\", \"username\": \"HPCC Staff\", \"post_text\": \"Registration is now open for the next Tech Talk scheduled for March 30, 2017, 11am ET. \\n\\nFeatured speakers include:\\nJoselito (Joey) Chua , PhD, Manager Software Engineer, Optimal Decisions Group\\nJill Luber, Senior Architect, LexisNexis Risk Solutions\\nMichael Gardner, Software Engineer II, LexisNexis Risk Solutions\\nBob Foreman, Senior Software Engineer, LexisNexis Risk Solutions\\n\\nRegister now: \\nhttps://www.brighttalk.com/webcast/1509 ... ign=249491\\n\\nSave the date for upcoming episodes:\\nApril 20\\nMay 25\\n\\nFor more information about the HPCC Systems Tech Talks, visit: \\nhttps://wiki.hpccsystems.com/display/hp ... +30th+2017\", \"post_time\": \"2017-03-07 13:10:21\" },\n\t{ \"post_id\": 14833, \"topic_id\": 3563, \"forum_id\": 7, \"post_subject\": \"Register Now for The Download: HPCC Systems Tech Talks Episo\", \"username\": \"HPCC Staff\", \"post_text\": \"Registration is now open for the next Tech Talk scheduled for February 16, 11am ET. \\n\\nFeatured speakers include:\\n
\\n\\nFull agenda will be shared soon! \\n\\nRegister now: \\nhttps://www.brighttalk.com/webcast/15091/244033?utm_source=HPCC%20Systems%20from%20LexisNexis%20Risk%20Solutions%20and%20RELX%20Group&utm_medium=brighttalk&utm_campaign=244033\\n\\nSave the date for upcoming episodes:\\nMarch 30\\nApril 20\\n\\nFor more information about the HPCC Systems Tech Talks, visit: \\nhttps://hpccsystems.com/community/event ... -episode-2\", \"post_time\": \"2017-01-26 13:49:42\" },\n\t{ \"post_id\": 14023, \"topic_id\": 3563, \"forum_id\": 7, \"post_subject\": \"The Download: Tech Talks by the HPCC Systems Community\", \"username\": \"HPCC Staff\", \"post_text\": \"Introducing The Download - Tech Talks by the HPCC Systems Community! \\n\\nPlease join us for an all new series of workshops specifically designed for the community by the community. The purpose of the workshop will be to share knowledge, spark innovation and further build and link the relationships within our HPCC Systems community. Each series will feature a 20 minute talk from 3-4 speakers. \\n\\nOur first Tech Talk is scheduled for January 12 at 11am ET and we are excited about our lineup of speakers for the inaugural event:\\n\\nFlavio Villanustre, VP Technology, LexisNexis\\nAnirudh Shah, Co-Founder, 3Loq\\nAllan Wrobel, Sr Senior Engineer, LexisNexis\\nLorraine Chapman, Consulting Business Analyst, HPCC Systems \\n\\nVisit https://hpccsystems.com/community/events/download-tech-talks-hpcc-systems-community-episode-1 for the full agenda and register today!\", \"post_time\": \"2017-01-09 12:00:32\" },\n\t{ \"post_id\": 15053, \"topic_id\": 3713, \"forum_id\": 7, \"post_subject\": \"Google Summer of Code 2017. Our application is in!\", \"username\": \"lchapman\", \"post_text\": \"It was a great experience to take part in GSoC 2015 so we've applied to do it all over again in 2017. No guarantees - we know they like to share the experience with new open source projects. We will know whether we have had the good fortune to be accepted on Feb 27th. So wish us luck and stay tuned for news!\\n\\nOur own summer intern programme was born out of our involvement with GSoC 2015, because we knew we could mentor more students than the 2 slots we were given. This program will run for the third consecutive year in 2017 and for the foreseeable future. So if you are a student, know someone who is or have contacts in the academic world, let them know we have places for students (Bachelors, Masters and PhD) with coding abilities who want to get experience working in a development environment. We support remote working, having accepted students working from the USA, Ireland and India in the past and we can accommodate the differences in semester dates just in case you are wondering.\\n\\nThinking about it but want more information? Contact Lorraine.Chapman@lexisnexisrisk.com.\\n\\nFind out about the achievements of our student contributors in 2015 and 2016, check out the list of projects for 2017 or suggest a project yourself. \\n\\nYou don't have to be a student applicant to suggest a project. Are you an HPCC Systems user who has an idea of a new feature or improvement that would help you and other community members? Contact Lorraine.Chapman@lexisnexisrisk.com with details and let's get it done!\", \"post_time\": \"2017-02-08 09:33:13\" },\n\t{ \"post_id\": 16043, \"topic_id\": 4023, \"forum_id\": 7, \"post_subject\": \"2017 HPCC Systems Community Day - Call for Presentations\", \"username\": \"HPCC Staff\", \"post_text\": \"The Call for Presentations and Poster Abstracts is now open for HPCC Systems Community Day 2017!\\n\\n\\nThe 2017 HPCC Systems Summit Community Day will be held in Atlanta, GA on Wednesday October 4, 2017. \\n\\nThe purpose of the Summit is to gather engineers, data scientists and technology professionals to share knowledge, best practices, and future roadmap plans for the open source HPCC Systems platform. This event is dedicated to our community featuring industry and academia presenting their HPCC Systems use cases, research projects and share their experience on how they leverage HPCC Systems. \\n\\nWe will also have the Poster Presentations Competition on Tuesday October 3 and on display throughout the event on Wednesday October 4. \\n\\nNew this year, we are offering various levels of sponsorships to allow our community partners to position their brand in front of key decision makers and influencers. \\n \\nThe announcement along with more details on submitting a speaking proposal or poster abstract, and sponsorship information can be found on our Web site at the link below. \\nhttps://hpccsystems.com/hpccsummit2017\", \"post_time\": \"2017-03-27 15:14:51\" },\n\t{ \"post_id\": 16153, \"topic_id\": 4073, \"forum_id\": 7, \"post_subject\": \"Intern Project Proposal Deadline Extension\", \"username\": \"lchapman\", \"post_text\": \"Good news if you're a bit late finalising your intern opportunity this year. We've extended the deadline for HPCC Systems Intern Program proposals to Saturday 22nd April.\\n\\nThe HPCC Systems intern program is a specialist program. To get accepted, you need to choose a project, and submit a proposal demonstrating how you would complete it and why you are the right person for the job.\\n\\nWant to know more about HPCC Systems, our intern program and the projects? Look here:\\n\\n
\\n\\nBlogs about student projects completed in 2016:\\n
\\n\\nHave questions? Email lorraine.chapman@lexisnexisrisk.com\", \"post_time\": \"2017-04-05 16:05:06\" },\n\t{ \"post_id\": 17463, \"topic_id\": 4353, \"forum_id\": 7, \"post_subject\": \"Meet the 2017 HPCC Systems interns\", \"username\": \"lchapman\", \"post_text\": \"We are pleased to announce that 5 students were awarded places on the 2017 HPCC Systems summer intern program.\\n\\nTo find out more about who they are, the projects they are working on and how it's going, read the blog: A very warm welcome to our 2017 intern students.\\n\\nThe proposal period for 2018 opens towards the end of September. Keep an eye on this forum for announcements and also on our available projects list. Remember, you can submit an proposal for an idea of your own, but it must leverage HPCC Systems and be of benefit to our open source community.\\n\\nWant to know more about our intern program or know someone who might be interested in applying? Visit our Student Wiki for more information or contact lorraine.chapman@lexisnexisrisk.com.\", \"post_time\": \"2017-06-19 14:57:43\" },\n\t{ \"post_id\": 17473, \"topic_id\": 4363, \"forum_id\": 7, \"post_subject\": \"Take HPCC Systems 6.4.0 for a test drive\", \"username\": \"lchapman\", \"post_text\": \"HPCC Systems 6.4.0 RC3 is now available for download on our website. If you are an ECL IDE users and want the latest version, select Gold and set the Operating System to Windows. \\n\\nThe gold version is expected to be ready in the coming weeks. Want to contribute to this closedown? Take it for a test drive and let us know how you get on. If you hit a problem, log it in our Community Issue tracker.\\n\\nSome information about what's included in this release is available in this blog. More blogs featuring specific areas such as performance, machine learning and ECL IDE and more, are coming soon.\\n\\nUse these JIRA filters if you want to drill down into the detail for what really matters to you:\\n\\n
\", \"post_time\": \"2017-06-19 15:19:20\" },\n\t{ \"post_id\": 18073, \"topic_id\": 4513, \"forum_id\": 7, \"post_subject\": \"Issues posting to forums?\", \"username\": \"HPCC Staff\", \"post_text\": \"Hello,\\n\\nFor anyone experiencing issues posting to the forum, please try logging out, clearing your browser's cookies for the hpccsystems.com domain, and then log back in to the forum.\\n\\nIn Chrome and Firefox, you should be able to selectively delete cookies only for this site. \\n\\nFor Chrome, visit chrome://settings/content/cookies, search for "hpccsystems", and delete these cookies by clicking the "Remove All Shown" button.\\n\\n\\n\\nFor Firefox, visit about:preferences#privacy, click the "remove individual cookies" link, search for "hpccsystems", and delete these cookies by clicking the "Remove All Shown" button.\\n\\n
\\n\\nFor Internet Explorer, press Ctrl + Shift + d to open the "Delete Browsing History" dialog and click "Delete".\", \"post_time\": \"2017-06-30 20:21:53\" },\n\t{ \"post_id\": 18473, \"topic_id\": 4613, \"forum_id\": 7, \"post_subject\": \"HPCC Systems 6.4.0 is gold\", \"username\": \"lchapman\", \"post_text\": \"We are pleased to announce that this release is now available for download on our website. If you are an ECL IDE user and want the latest version of this and the Client Tools, select Gold and set the Operating System to Windows. \\n\\nHighlights of HPCC Systems 6.4.0\\n
\\nUse these JIRA filters for more details of features, enhancements and fixes included in HPCC Systems 6.4.0:\\n
\\nDon't forget to check out the Red Book, which is updated regularly. If you hit a problem, we want to hear about it. Let us know by logging it in our Community Issue tracker.\", \"post_time\": \"2017-08-02 11:24:21\" },\n\t{ \"post_id\": 18643, \"topic_id\": 4683, \"forum_id\": 7, \"post_subject\": \"HPCC Systems JAPIs WsClient 1.2.0 now available\", \"username\": \"lchapman\", \"post_text\": \"HPCC Systems JAPIs WsClient 1.2.0 supplements HPCC Systems 6.4.0 and is now available for download. \\n\\nThis set of APIs standardize and facilitate interaction with HPCC Systems web services. More information about this feature is available in the HPCC Systems JAPIs github repository.\\n \\nMore details about what's included in this new release are available in the change log. \\n\\nWant to know more about the features included in HPCC Systems 6.4.0? See our earlier announcement.\", \"post_time\": \"2017-08-31 13:22:47\" },\n\t{ \"post_id\": 19733, \"topic_id\": 4973, \"forum_id\": 7, \"post_subject\": \"2018 internship application period is now open\", \"username\": \"lchapman\", \"post_text\": \"We are pleased to announce that we are now accepting proposal from students who would like complete an internship with HPCC Systems in the summer of 2018.\\n\\nThis paid program is open to high school students, undergraduate, masters or PhD students who are interested in joining our platform development team to get some real world experience coding a solution for a feature that will actively be used by our open source community in the future. \\n\\nInternships last for 12 weeks starting in late May running through to mid August and are available to students across the globe. As a result, we are flexible about start and end dates to take account of semester timings across different timezones.\\n\\nTake a look at the projects we are offering for internships in 2018. Students who want to suggest a project of their own may do so, however your project must:\\n\\n
\\nIf you choose to do this, let us know so we can assign a mentor with relevant experience to support you. \\n\\nOur Student Wiki provides more information about the program itself, how to prepare a proposal (including an example) and our expectations of interns. You may also want to read our blog about the program and find out about previously completed intern projects.\\n\\nNew to HPCC Systems? Familiarise yourself with our platform and how it works:\\n\\n\\n
\\nFinal deadline for proposals is April 6th 2018\\n\\nFinal versions of proposals must be sent to lorraine.chapman@lexisnexisrisk.com (not the mentor) by the deadline date. However, we strongly recommend that you discuss your proposal ideas with the project mentor before you submit your final version. \\n\\nWe reserve the right to make earlier offers to students who submit an excellent proposal we know we want to accept. \\n\\nFor more information contact lorraine.chapman@lexisnexisrisk.com\", \"post_time\": \"2017-10-26 12:12:54\" },\n\t{ \"post_id\": 22063, \"topic_id\": 5633, \"forum_id\": 7, \"post_subject\": \"Notice for Chrome 67.x Users\", \"username\": \"HPCC Staff\", \"post_text\": \"Last week, Chrome released version 67.x of the browser. This release affects the display of ECL Watch in all versions of the HPCC Systems Platform prior to 7.0 beta. The immediate workaround is to use another browser temporarily. You could also downgrade Chrome to version 66.x or lower, but this is not recommended. \\n\\nThe issue has been resolved and a fix has been merged into the source code. Updates to versions 6.4.x and 6.2.x will be available soon. Please check back for updates.\", \"post_time\": \"2018-06-05 19:09:47\" },\n\t{ \"post_id\": 22163, \"topic_id\": 5663, \"forum_id\": 7, \"post_subject\": \"HPCC Systems 7.0.0 Beta is now available\", \"username\": \"lchapman\", \"post_text\": \"While we add the finishing touches to HPCC Systems 7.0.0, we have made this Beta release available, to give you a preview of the features and enhancements you can expect to see in the gold version. We have made a number of performance improvements and there are some great new features for you to try in our user interfaces, ECL language and library. \\n\\nGood news for WsSQL users! You no longer need to download a separate package to use WsSQL. It is now integrated into the HPCC Systems platform package. If you already have a separate WsSQL package installed, please uninstall it before you upgrade to avoid potential compatibility issues.\\n\\nDo you use Spark? Try our new Spark-HPCC Systems Connector. In our Beta version, you have the ability to read Thor files natively from Spark. We are working towards including the write capability for the gold version of our 7.0.0 release.\\n\\nFind out more about HPCC Systems 7.0.0 Beta using these resources:\\n\\n
\\n\\nWe want your feedback. Here's how to get in touch:\\n
\\n\\nWe look forward to hearing about your experience with our beta release and will make an announcement about HPCC Systems 7.0.0 Gold later in the year.\", \"post_time\": \"2018-06-21 14:44:06\" },\n\t{ \"post_id\": 22233, \"topic_id\": 5693, \"forum_id\": 7, \"post_subject\": \"Antivirus software reporting Client Tools as security risk\", \"username\": \"lchapman\", \"post_text\": \"We use the NSIS windows installation framework for installing and uninstalling HPCC Systems Client Tools and Graph Control. Users have reported that these executables are being flagged as a security risk by their antivirus software. \\n\\nThis is a known issue and more information is available on the NSIS sourceforge wiki: http://nsis.sourceforge.net/NSIS_False_Positives. \\n\\nUsing the links provided on the NSIS wiki, you can report the false positive and get it white listed for the antivirus software you are using.\", \"post_time\": \"2018-06-27 16:32:42\" },\n\t{ \"post_id\": 23473, \"topic_id\": 6043, \"forum_id\": 7, \"post_subject\": \"Intern with us in 2019 - The proposal period is now open\", \"username\": \"lchapman\", \"post_text\": \"We are now accepting applications from students who would like to join the HPCC Systems team as an intern in 2019. The deadline for proposals is Friday March 29th 2019. But we do award places in advance of the deadline date to students who submit an excellent proposal we know we want to accept. So don't miss out on a place on the HPCC Systems intern program, get started now!\\n\\nApplication is by proposal submission to complete a specific HPCC Systems related project. This can be a project from our list, one you have suggested yourself or a project that supports work your school is doing which leverages HPCC Systems in some way.\\n\\nOur intern program is aimed at students studying STEM related subjects and most projects require coding skills. Our projects range from working on a new feature or enhancement, providing the ability to connect to third party products that may be used alongside HPCC Systems, to machine learning related projects. \\n\\nWe welcome applications from students across the academic spectrum from high school to PhD. It's a 12 week program over the summer months, but we can be flexible about start and end dates to take account of differences in semester dates for students interested in working remotely from outside the USA.\\n\\nInterested in finding out more? \\n\\n\\n
\\n\\nWe also have a brochure you can use to send to your friends, professors and school teachers so pass this information on to anyone you know who may be interested in joining our intern program in 2019!\", \"post_time\": \"2018-11-07 13:25:30\" },\n\t{ \"post_id\": 23493, \"topic_id\": 6063, \"forum_id\": 7, \"post_subject\": \"HPCC System 7.0.0 Gold is now available\", \"username\": \"lchapman\", \"post_text\": \"We are very pleased to announce that HPCC Systems 7.0.0 Gold is now available for download. \\n\\nRead our blog to find out about the main new features and enhancements or view the release notes for the full list of changes. The supporting documentation is available on our website and we recommend that you browse the RedBook for additional information about specific items of note.\\n\\nThe following list shows the highlights which are covered in our HPCC Systems 7.0.0 Gold blog. There are also some feature specific blogs available and where more information is available on our website or GitHub, links are supplied. \\n\\nPerformance enhancements\\n\\n
\\n\\nUsability enhancements\\n\\n
\\n\\nECL Language and Library Improvements\\n\\n\\n
\\n\\nNew bundles\\n\\n
\\n\\nMachine Learning Library Improvements\\n\\n
\\n\\nHave questions or need a tip? Post in our Developer forum. \\nFound an issue or have a feature request? Raise a JIRA issue.\\nWant to contribute? Walk through the process and take a look at the notes for developers included in the readme in our GitHub repository.\", \"post_time\": \"2018-11-07 16:15:50\" },\n\t{ \"post_id\": 25573, \"topic_id\": 6753, \"forum_id\": 7, \"post_subject\": \"HPCC Systems 7.2.0 now available for download\", \"username\": \"lchapman\", \"post_text\": \"We are very pleased to announce that HPCC Systems 7.2.0 is now available for download.\\n \\nView the release notes for the full list of changes. The supporting documentation is available on our website and we recommend that you browse the RedBook for additional information about specific items of note. Highlights of this release include:\\n \\nIDE Improvements\\n\\n \\nJava Embed Improvements\\n
\\n \\nAlternative way of embedding C and C++ Code\\nAllows files of embedded code rather than snippets. These can be disabled or allowed based on whether there they are signed. Provides a new, more convenient way of allowing embedded code in a secure way.\\n
\\n\\nSpark improvements and features\\n
\\n \\nStd date additions\\n\\n \\nThor new feature - Taking the load off Thor for jobs that don’t need to go to Thor\\n \\nGeo spatial library from Uber - Now available as a plugin to allow it to be used from ECL code\\n \\nView the full list of new features / full list of issues fixed in HPCC Systems 7.2.0.\\n \\nHave questions or need a tip? Post in our Developer forum.\\nFound an issue or have a feature request? Raise a JIRA issue.\\nWant to contribute? Walk through the process and take a look at the notes for developers included in the readme in our GitHub repository.\", \"post_time\": \"2019-04-09 13:21:00\" },\n\t{ \"post_id\": 26723, \"topic_id\": 7073, \"forum_id\": 7, \"post_subject\": \"Welcome to the students joing our intern program in 2019\", \"username\": \"lchapman\", \"post_text\": \"A warm welcome to the students joining the HPCC Systems Intern Program this summer!\\n\\nAs always, the standard of proposal submissions from students this year was extremely high. The review panel had a difficult task evaluating which proposals to accept, given the limited number places available. We know a lot of hard work goes into each proposal, so we'd like to say thank you to all students who took an interest in our intern program this year.\\n\\nFive students have started work on their projects already, with five more joining the team by the end of June. Read our blog Meet the students joining our intern program in 2019 to find out more about the students who have been accepted on to the HPCC Systems Intern Program this year.\\n\\nWe encourage our interns to get involved with our open source community in a number of ways. In addition to their contribution to our open source project we encourage students to:\\n\\n\\n
\\nEvery year, the students who join our intern program achieve a lot in what is a relatively short period of time (12 weeks). We greatly value their contribution to our open source platform and community and are proud of their achievements. \\n\\nEach student is mentored by one of our LexisNexis Risk Solutions colleagues and in many cases, additional mentoring is provided by university professors from our Academic Partners. We thank all our mentors for the encouragement and support they provide to our interns and for giving up their time to support our program. \\n\\nApplication to join the HPCC Systems Intern Program is by the submission of a proposal to complete a specific project during the 12 week internship period. Students can choose from our list of available projects, or they can suggest one of ther own, but it must leverage HPCC Systems in some way.\\n\\nThe proposal period reopens in the Fall for internships in 2020. Watch this forum for announcements. More information about the HPCC Systems Intern Program is available in our blog: Join the HPCC Systems team as an intern.\", \"post_time\": \"2019-06-06 16:11:22\" },\n\t{ \"post_id\": 26943, \"topic_id\": 7163, \"forum_id\": 7, \"post_subject\": \"HPCC Systems 7.4.0 is now available for download\", \"username\": \"lchapman\", \"post_text\": \"We are very pleased to announce that HPCC Systems 7.4.0 is now available for download.\\n \\nView the release notes for the full list of changes. The supporting documentation is available on our website and we recommend that you browse the RedBook for additional information about specific items of note. Highlights of this release include:\\n\\nSecurity\\n
\\n\\nSpark users\\n
\\n \\nEmbedded Java users\\n
\\n \\nECL Standard Library Features\\n
\\n \\nECL Language New Features\\n\\n \\nECL IDE\\n
\\n \\nECL Watch\\n
\\nNew Workunit Analyser\\nWe are working towards providing feedback to users regarding workunits that might not be performing optimally, by spotting some common errors/symptoms that we see on slow workunits. These new features are examples of this approach:\\n
\\n \\nDocumentation\\n
\\n\\n\\nHave questions or need a tip? Post in our Developer forum.\\nFound an issue or have a feature request? Raise a JIRA issue.\\nWant to contribute? Walk through the process and take a look at the notes for developers included in the readme in our GitHub repository.\\nSubscribe to our Newsletter.\\nRead our blog.\\nJoin us at one of our monthly Tech Talk webcasts.\", \"post_time\": \"2019-07-15 17:10:56\" },\n\t{ \"post_id\": 27113, \"topic_id\": 7213, \"forum_id\": 7, \"post_subject\": \"2019 HPCC Systems Community Day\", \"username\": \"HPCC Staff\", \"post_text\": \"The Call for Presentations and Poster Abstracts is open for HPCC Systems Community Day\\n\\nThe 6th annual HPCC Systems Summit Community Day is scheduled for October 15-16, 2019 in Atlanta, Georgia. This event is dedicated to showcase our community and have industry and academia present their HPCC Systems use cases, research projects and share their experience on how they leverage the HPCC Systems platform.\\n\\nAs part of Community Day, we will also hold a hands-on ECL Workshop and our 4th annual Poster Presentations competition on Tuesday October 15. New this year, we are hosting an Interactive Expo where attendees can see HPCC Systems in action and talk to our experts. A day full of talks and technical sessions is planned for Wednesday October 16. Catch a glimpse on what's being planned over the two-day event: http://cdn.hpccsystems.com/events/2019- ... -Flyer.pdf\\n\\nThe call for presentations is open through August 5 and poster abstracts through September 30. Be sure to submit early as the agenda slots typically fill up quickly. \\n\\nThe announcement along with more details on submitting a speaking proposal or poster abstract, along with workshop, sponsorship and registration information can be found on the event page: \\nhttps://hpccsystems.com/community/event ... ummit-2019\", \"post_time\": \"2019-07-29 20:34:53\" },\n\t{ \"post_id\": 28113, \"topic_id\": 7463, \"forum_id\": 7, \"post_subject\": \"Intern with us in 2020 - Proposal period is now open\", \"username\": \"lchapman\", \"post_text\": \"We are now accepting applications from students who would like to join the HPCC Systems team as an intern in 2020. The deadline for proposals is Friday March 20th 2020. But we do award places in advance of the deadline date to students who submit an excellent proposal we know we want to accept. So don't miss out on a place on the HPCC Systems intern program, get started now!\\n\\nApplication is by proposal submission to complete a specific HPCC Systems related project. This can be a project from our list, one you have suggested yourself or a project that supports work your school is doing which leverages HPCC Systems in some way.\\n\\nOur intern program is aimed at students studying STEM related subjects and most projects require coding skills. Our projects range from working on a new feature or enhancement, providing the ability to connect to third party products that may be used alongside HPCC Systems, to machine learning related projects. \\n\\nWe welcome applications from students across the academic spectrum from high school to PhD. It's a 12 week program over the summer months, but we can be flexible about start and end dates to take account of differences in semester dates for students interested in working remotely from outside the USA. In 2019, we accepted students on to the program from Europe and Asia.\\n\\nInterested in finding out more? \\n\\n\\n
\\n\\nWe also have a brochure you can use to send to your friends, professors and school teachers so pass this information on to anyone you know who may be interested in joining our intern program in 2020!\", \"post_time\": \"2019-11-14 10:48:01\" },\n\t{ \"post_id\": 28991, \"topic_id\": 7661, \"forum_id\": 7, \"post_subject\": \"Re: This forum still work?\", \"username\": \"rtaylor\", \"post_text\": \"THIS FORUM IS AS ACTIVE AS THE COMMUNITY HAS QUESTIONS. \\n\\nSO IF YOU HAVE A QUESTION ABOUT HPCC/ECL, PLEASE POST IT.\", \"post_time\": \"2020-01-10 13:59:44\" },\n\t{ \"post_id\": 28981, \"topic_id\": 7661, \"forum_id\": 7, \"post_subject\": \"This forum still work?\", \"username\": \"ricardos\", \"post_text\": \"This forum offline?\", \"post_time\": \"2020-01-10 12:56:24\" },\n\t{ \"post_id\": 29933, \"topic_id\": 7883, \"forum_id\": 7, \"post_subject\": \"2020 Intern Program - Proposal Period is now closed\", \"username\": \"lchapman\", \"post_text\": \"Thanks to all those who have submitted proposals. We are now moving through the review process and will be in touch with all applicants in the next two weeks.\\n\\nThe proposal period for 2021 will open in the fall. Check back here for details.\\n\\nBest wishes\\nLorraine Chapman\\nHPCC Systems Intern Program Manager\", \"post_time\": \"2020-03-26 14:01:56\" },\n\t{ \"post_id\": 31333, \"topic_id\": 8243, \"forum_id\": 7, \"post_subject\": \"HPCC Systems 7.10.0 Gold is now available\", \"username\": \"lchapman\", \"post_text\": \"We are very pleased to announce that HPCC Systems 7.10.0 is now available for download.\\n \\nView the release notes for the full list of changes. The supporting documentation is available on our website and we recommend that you browse the RedBook for additional information about specific items of note. \\n\\nTo find out more about what's included in this release read our blog What's New in HPCC Systems 7.10.0 Gold.\\n\\nHave questions or need a tip? Post in our Developer forum.\\nFound an issue or have a feature request? Raise a JIRA issue.\\nWant to contribute? Walk through the process and take a look at the notes for developers included in the readme in our GitHub repository.\\nSubscribe to our Newsletter.\\nRead our blog.\\nJoin us at one of our monthly Tech Talk webcasts.\", \"post_time\": \"2020-07-02 18:26:11\" },\n\t{ \"post_id\": 32583, \"topic_id\": 8493, \"forum_id\": 7, \"post_subject\": \"NOW OPEN - 2021 HPCC Systems Intern Program Proposal Period\", \"username\": \"lchapman\", \"post_text\": \"We are now accepting applications from students who would like to join the HPCC Systems team as an intern in 2021. The deadline for proposals is Friday March 19th 2021. But we do award places in advance of the deadline date to students who submit an excellent proposal we know we want to accept. So don't miss out on a place on the HPCC Systems intern program, get started now!\\n\\nApplication is by proposal submission to complete a specific HPCC Systems related project. This can be a project from our list, one you have suggested yourself or a project that supports work your school is doing which leverages HPCC Systems in some way.\\n\\nOur intern program is aimed at students studying STEM related subjects and most projects require coding skills. Our projects range from working on a new feature or enhancement, providing the ability to connect to third party products that may be used alongside HPCC Systems, to machine learning related projects. \\n\\nWe welcome applications from students across the academic spectrum from high school to PhD. It's a 12 week program over the summer months, but we can be flexible about start and end dates to take account of differences in semester dates for students interested in working remotely from outside the USA.\\n\\nInterested in finding out more? \\n\\n\\n
\\n\\nWe also have a brochure you can use to send to your friends, professors and school teachers so pass this information on to anyone you know who may be interested in joining our intern program in 2021!\", \"post_time\": \"2020-11-17 15:28:31\" },\n\t{ \"post_id\": 32613, \"topic_id\": 8513, \"forum_id\": 7, \"post_subject\": \"HPCC Systems 7.12.x is now available, find out more...\", \"username\": \"lchapman\", \"post_text\": \"HPCC Systems 7.12.x is now available for download.\\n \\nView the release notes for the full list of changes. The supporting documentation is available on our website and we recommend that you browse the RedBook for additional information about specific items of note. \\n\\nTo find out more about what's included in this release read our blog Highlights included in HPCC Systems 7.12.0.\\n\\nHave questions or need a tip? Post in our Developer forum.\\nFound an issue or have a feature request? Raise a JIRA issue.\\nWant to contribute? Walk through the process and take a look at the notes for developers included in the readme in our GitHub repository.\\nSubscribe to our Newsletter.\\nRead our blog.\", \"post_time\": \"2020-12-03 17:06:11\" },\n\t{ \"post_id\": 33183, \"topic_id\": 8643, \"forum_id\": 7, \"post_subject\": \"Intern Program Proposal Period 2021 is now CLOSED\", \"username\": \"lchapman\", \"post_text\": \"The proposal period for students applying to join the HPCC Systems Intern Program in 2021 is now closed.\\n\\nProposals are now being reviewed. \\n\\nThank you to all students who have submitted proposals this year. Thanks also to all professors and school teachers who have encouraged their students to apply, as well as our LexisNexis Risk Solutions Groups colleagues who have supported students with their proposals.\\n\\nAll applicants will receive an email response whether they have been successful or not, within 2 weeks.\\n\\nMore information about students who have been accepted onto the program this summer will be provided via a blog post towards the end of May. Keep visiting the HPCC Systems blog on our website for updates: https://hpccsystems.com/blog\\n\\nThe proposal period for internships in 2022 will open in the Fall. Register as a member of our Community Forum to be notified when new posts about this and other subjects are available.\\n\\nAny educator who would like us to present to students about the HPCC Systems Intern program when the 2022 academic year starts, please contact academia@hpccsystems.com to arrange a date and time.\", \"post_time\": \"2021-03-22 11:25:49\" },\n\t{ \"post_id\": 33353, \"topic_id\": 8703, \"forum_id\": 7, \"post_subject\": \"HPCC Systems 8.0.x includes our New Cloud Native Platform\", \"username\": \"lchapman\", \"post_text\": \"HPCC Systems 8.0.x is now available for download.\\n \\nView the release notes for the full list of changes. The supporting documentation is available on our website and we recommend that you browse the RedBook for additional information about specific items of note. \\n\\nRead these blogs for the latest information on new features and improvements added in HPCC Systems 8.0.x:\\n\\n
\\n\\nHPCC Systems Cloud Native Platform\\nHPCC Systems 8.0.0 includes the first release that is feature complete to the point where our Cloud Native platform is now ready for Cloud performance evaluation. More information about our Cloud Native platform is available on our Cloud Native Platform Wiki, which includes blog posts about getting setup on Azure EKS and AWS EKS, using a service mesh (Linkerd and Istio), using the Helm Charts and Data Handling. \\n\\nVisit our Helm Chart GitHub Repository and see some examples of use to help you get setup.\\n\\nHPCC Systems Bare Metal\\n\\nTo find out more about what's included in this release read our blog HPCC Systems 8.0.0 - Cross Platform Highlights. Get the latest version from the Download area of the HPCC Systems Website.\\n\\nContact us\\nHave questions or need a tip? Post in our Developer forum.\\nFound an issue or have a feature request? Raise a JIRA issue.\\nWant to contribute? Walk through the process and take a look at the notes for developers included in the readme in our GitHub repository.\\nSubscribe to our Newsletter.\\nRead our blog.\", \"post_time\": \"2021-04-15 11:50:25\" },\n\t{ \"post_id\": 33483, \"topic_id\": 8753, \"forum_id\": 7, \"post_subject\": \"HPCC Systems 10 Year Anniversary Podcast Series\", \"username\": \"HPCC Staff\", \"post_text\": \"2021 marks the 10 year anniversary of HPCC Systems as open source! \\n\\nOur success could not have been achieved without the support from our community. To commemorate this milestone, we are hosting a podcast series featuring members across our community who have supported HPCC Systems through innovative contributions, advocacy and adoption, and academic collaboration. \\n\\nListen to the stories and check back often as new episodes are being added!\\n\\nhttps://wiki.hpccsystems.com/display/hpcc/10+Year+Anniversary+Podcast+Series\\n\\n\", \"post_time\": \"2021-05-12 16:35:27\" },\n\t{ \"post_id\": 33493, \"topic_id\": 8763, \"forum_id\": 7, \"post_subject\": \"2021 HPCC Systems Community Day\", \"username\": \"HPCC Staff\", \"post_text\": \"The Call for Presentations and Poster Abstracts is open for HPCC Systems Community Day\\n\\nThe 8th annual HPCC Systems Community Day Summit will be held virtually on October 4. This worldwide event will return to offer plenary and breakout sessions covering a wide variety of topics, presentations and technical posters from students working on HPCC Systems related projects as part of our annual Poster Competition, as well as a high quality virtual workshop October 5-7.\\n\\nVisit the event page for more information and instructions on how to submit your speaking proposal. \\n\\nhttps://hpccsystems.com/community/events/hpcc-systems-summit-2021\\n\\nMark your calendars!\", \"post_time\": \"2021-05-13 17:02:34\" },\n\t{ \"post_id\": 33653, \"topic_id\": 8863, \"forum_id\": 7, \"post_subject\": \"Celebrating 10 Years of Open Source\", \"username\": \"lchapman\", \"post_text\": \"On Wed Jun 15 2011, we welcomed you to the HPCC Systems Open Source Community for the first time. Today, we celebrate our 10 year anniversary and invite you celebrate with us. \\n\\nJoin Vijay Ragahavan (EVP and CTO) and Flavio Villanustre (CISO and VP of Technology) as they discuss this important milestone and look ahead to the future.\\n\\nThis is the latest in a series of interviews celebrating our anniversary and features LexisNexis Risk Solutions Group colleagues, including past interns as well academic and industry partners who have all contributed to the success of our open source journey. View our 10 Year Anniversary Podcast Series.\", \"post_time\": \"2021-06-15 11:14:00\" },\n\t{ \"post_id\": 33783, \"topic_id\": 8913, \"forum_id\": 7, \"post_subject\": \"HPCC Systems 8.2.x is now available\", \"username\": \"lchapman\", \"post_text\": \"Read these blogs for the latest information about HPCC Systems 8.2.x:\\n\\n
\\n
\\n\\nHPCC Systems Cloud Native Platform\\nMore information about our Cloud Native platform is available on our Cloud Native Platform Wiki, which includes blog posts about getting setup on Azure EKS and AWS EKS, using a service mesh (Linkerd and Istio), using the Helm Charts and Data Handling. \\n\\nVisit our Helm Chart GitHub Repository and see some examples of use to help you get setup.\\n\\nHPCC Systems Bare Metal Platform\\nGet the latest version from the Download area of the HPCC Systems Website.\\n\\nView the release notes for the full list of changes on both platforms. The supporting documentation is available on our website and we recommend that you browse the RedBook for additional information about specific items of note. \\n\\nContact us\\nHave questions or need a tip? Post in our Developer forum.\\nFound an issue or have a feature request? Raise a JIRA issue.\\nWant to contribute? Walk through the process and take a look at the notes for developers included in the readme in our GitHub repository.\\nSubscribe to our Newsletter.\\nRead our blog.\\nFollow us on Twitter, FaceBook and LinkedIn.\", \"post_time\": \"2021-07-15 16:00:27\" },\n\t{ \"post_id\": 34503, \"topic_id\": 9123, \"forum_id\": 7, \"post_subject\": \"CVE-2021-44228 Security Vulnerability Announcement\", \"username\": \"jmlorti\", \"post_text\": \"The HPCC Systems team has been made aware of the Apache Log4j2 Thread Context Message Pattern and Context Lookup Pattern vulnerable to a denial-of-service attack. More details can be found on the Apache Log4J 2 website: https://logging.apache.org/log4j/2.x/.\\n \\nThis announcement summarizes the currently known potential impacts to the HPCC Systems platform. At this point, our engineering and security teams have determined that impacts are limited to the HPCC4J, and the Spark-plugins. The base HPCC Systems Platform does not use or reference Log4j and is not directly affected by the CVE-2021-44228 vulnerability.\\n \\nOur engineering and security teams continue to actively work on the analysis and any additional actions our users should perform will be updated in this forum post on an ongoing basis until the issue is resolved.\\n\\n
\", \"post_time\": \"2021-12-14 21:27:40\" },\n\t{ \"post_id\": 35435, \"topic_id\": 9375, \"forum_id\": 7, \"post_subject\": \"HPCC Systems Forums moving to Stack Overflow\", \"username\": \"HPCC Staff\", \"post_text\": \"Please Note: The HPCC Systems Forums are moving to Stack Overflow. We invite you to post your questions on Stack Overflow utilizing the tag hpcc-ecl: https://stackoverflow.com/search?tab=ne ... a4264a68eb\\n\\nThis legacy Forum will be active and monitored during our transition to Stack Overflow but will become read only beginning September 1, 2022.\", \"post_time\": \"2022-05-06 17:03:36\" },\n\t{ \"post_id\": 62, \"topic_id\": 33, \"forum_id\": 8, \"post_subject\": \"Re: Fields\", \"username\": \"ewadler\", \"post_text\": \"Gotcha, I was going from the already structured IMDB demo.\", \"post_time\": \"2011-05-24 20:13:11\" },\n\t{ \"post_id\": 61, \"topic_id\": 33, \"forum_id\": 8, \"post_subject\": \"Re: Fields\", \"username\": \"sort\", \"post_text\": \"First spray the file. The file structure is specified at read time not spray time. \\n\\nAt the point in ECL when you define the dataset that will read the logical file you sprayed, you will define the matching record structure. Something along this line....\\n\\nrIn := \\n record\\n varstring column1;\\n varstring column2;\\n varstring column3;\\n end;\\n\\ndIn := dataset('<logicalfilenamehere>', rIn, csv);\\noutput(dIn);\\n\\nMore detailed info is in the Language Reference which is useful because it covers additional details like specifying a heading, delimiters etc, which I haven't covered here.\", \"post_time\": \"2011-05-24 20:09:43\" },\n\t{ \"post_id\": 60, \"topic_id\": 33, \"forum_id\": 8, \"post_subject\": \"Fields\", \"username\": \"ewadler\", \"post_text\": \"When I spray a file. How do I know the fields/column names that are created? \\nAre there ways of referencing column 1, column 2, etc...?\\nCan I assign column names before or after a file has been sprayed.\\n\\nFWIW, I sprayed a csv file and there are no headers in the file, the first through last lines are lines/rows of data.\\n\\nthanks\", \"post_time\": \"2011-05-24 19:33:49\" },\n\t{ \"post_id\": 68, \"topic_id\": 36, \"forum_id\": 8, \"post_subject\": \"Re: Functions VS Transforms\", \"username\": \"richard.chapman.lex\", \"post_text\": \"[quote="cmastrange3":1bx510ur]Is there a significant difference between transforms and functions? Could one replace the other?\\n\\nA transform is a special type of function that takes one or more input records as parameters and returns a newly-created output record. You can use functions for multiple purposes, but an activity that creates a new record requires a transform to be provided in order to know how the new record should be initialized.\\n\\nRichard\", \"post_time\": \"2011-06-17 07:53:43\" },\n\t{ \"post_id\": 67, \"topic_id\": 36, \"forum_id\": 8, \"post_subject\": \"Functions VS Transforms\", \"username\": \"cmastrange3\", \"post_text\": \"Is there a significant difference between transforms and functions? Could one replace the other?\", \"post_time\": \"2011-06-17 04:40:42\" },\n\t{ \"post_id\": 148, \"topic_id\": 58, \"forum_id\": 8, \"post_subject\": \"Re: How to join as a HPCC open source developer?\", \"username\": \"richardkchapman\", \"post_text\": \"github, most likely.\", \"post_time\": \"2011-07-14 04:53:27\" },\n\t{ \"post_id\": 145, \"topic_id\": 58, \"forum_id\": 8, \"post_subject\": \"Re: How to join as a HPCC open source developer?\", \"username\": \"yunchen\", \"post_text\": \"How do you plan to manage the open source code?\", \"post_time\": \"2011-07-13 22:37:05\" },\n\t{ \"post_id\": 127, \"topic_id\": 58, \"forum_id\": 8, \"post_subject\": \"Re: How to join as a HPCC open source developer?\", \"username\": \"richardkchapman\", \"post_text\": \"We'll be ready to make more definite announcements on that when we release the sources - hopefully in a month or so.\", \"post_time\": \"2011-07-12 15:38:58\" },\n\t{ \"post_id\": 126, \"topic_id\": 58, \"forum_id\": 8, \"post_subject\": \"How to join as a HPCC open source developer?\", \"username\": \"yunchen\", \"post_text\": \"There's a FAQ of "What is the contribution process?", but I couldn't find clear steps/link to join, as well as roles and responsibilities of developers, leads, project managers etc.\", \"post_time\": \"2011-07-12 15:27:23\" },\n\t{ \"post_id\": 293, \"topic_id\": 99, \"forum_id\": 8, \"post_subject\": \"Re: How to spray data without using the ECL IDE?\", \"username\": \"bforeman\", \"post_text\": \"There are three ways to spray/despray files and keys:\\n\\n1. DFU command line:\\nDFUPLUS.exe takes command line parameters which are sent to DFU via ESP server.\\nA number of options are available for you to control how the spray is done.\\n\\n2. ECL Watch:\\nProprietary GUI web application which is used for monitoring the LN HPCC components and query activity.\\nLogical files are listed for available Landing Zones.\\nAvailable clusters are shown.\\nSpraying is done by completing a form supplying the information required.\\nYou can check the progress of a spray/despray and confirm its success or failure.\\n\\n3. Programmatically, using file services:\\nSpraying can be controlled from within the ECL code.\\nThis method is often used when automating regular production jobs.\\nSee the EclLibrary in the ECL IDE Repository for more details.\\n\\nYou can always execute ECL code from the command line using ECL Plus.\\n\\nCheck out the Client Tools PDF at this link:\\n\\nhttp://hpccsystems.com/community/docs/e ... leinttools\\n\\nRegards,\\n\\nBob Foreman\", \"post_time\": \"2011-08-30 19:13:35\" },\n\t{ \"post_id\": 292, \"topic_id\": 99, \"forum_id\": 8, \"post_subject\": \"How to spray data without using the ECL IDE?\", \"username\": \"hli\", \"post_text\": \"Hi,\\n\\nI want to spray data files to Thor inside of ECL program. I know, with the IDE, I need to load files into the landing zone first and then spray the file into Thor from there. But, How can do this with ECL program or external scripts outside of the IDE?\\n\\nA more general question is how to run ECL programs without using the IDE.\\n\\nAny help is appreciated.\\n\\nThanks,\\n\\n-Hongchao\", \"post_time\": \"2011-08-30 17:37:40\" },\n\t{ \"post_id\": 350, \"topic_id\": 114, \"forum_id\": 8, \"post_subject\": \"Re: compiler errors\", \"username\": \"thildebrant\", \"post_text\": \"Gavin,\\nThank you, that really helps.\\n\\nTodd\", \"post_time\": \"2011-09-16 18:04:18\" },\n\t{ \"post_id\": 349, \"topic_id\": 114, \"forum_id\": 8, \"post_subject\": \"Re: compiler errors\", \"username\": \"ghalliday\", \"post_text\": \"It looks like\\n\\nthorlcr/shared/thexception.hpp contains the error numbers for thor\\n\\nroxie/roxie/roxie.hpp contains the numbers for roxie\\n\\nsystem/include/errorlist.h contains the ranges of numbers reserved for each component.\", \"post_time\": \"2011-09-16 16:51:24\" },\n\t{ \"post_id\": 344, \"topic_id\": 114, \"forum_id\": 8, \"post_subject\": \"Re: compiler errors\", \"username\": \"thildebrant\", \"post_text\": \"good stuff, thank you.\\nAre there similar references to the Thor and Roxie errors?\", \"post_time\": \"2011-09-15 15:20:13\" },\n\t{ \"post_id\": 343, \"topic_id\": 114, \"forum_id\": 8, \"post_subject\": \"Re: compiler errors\", \"username\": \"ghalliday\", \"post_text\": \"The error codes, and most of the error messages are contained in two files:\\n\\necl/hql/hqlerrors.hpp - errors generated parsing the files/syntax checking\\n\\necl/hqlcpp/hqlcerrors.hpp - errors reported when processing and generating the c++\", \"post_time\": \"2011-09-15 08:08:09\" },\n\t{ \"post_id\": 341, \"topic_id\": 114, \"forum_id\": 8, \"post_subject\": \"compiler errors\", \"username\": \"thildebrant\", \"post_text\": \"Hello,\\nIs there a reference for the different error messages that the compiler generates?\\nOr, which file(s) can I look at in the source code to find out?\\n\\nThank you,\\nTodd\", \"post_time\": \"2011-09-14 20:20:52\" },\n\t{ \"post_id\": 364, \"topic_id\": 121, \"forum_id\": 8, \"post_subject\": \"Re: record and dataset definition, dataset has to be exporte\", \"username\": \"dabayliss\", \"post_text\": \"If you can post EXACTLY what you were trying to compile - and the EXACT error message we may be able to help.\\n\\nDavid\", \"post_time\": \"2011-09-26 19:02:21\" },\n\t{ \"post_id\": 363, \"topic_id\": 121, \"forum_id\": 8, \"post_subject\": \"Re: record and dataset definition, dataset has to be exporte\", \"username\": \"abi.abishek26\", \"post_text\": \"I TRIED WITH THE DATASET KEYWORD , WHILE COPYIN HERE I SOMEHOW MISSED IT . SO I THINK THAT WAS NOT THE REASON FOR THE ERROR THAT I GOT WHILE TRYING TO DO SO\", \"post_time\": \"2011-09-26 18:42:04\" },\n\t{ \"post_id\": 362, \"topic_id\": 121, \"forum_id\": 8, \"post_subject\": \"Re: record and dataset definition, dataset has to be exporte\", \"username\": \"dabayliss\", \"post_text\": \"You need the 'dataset' on your dataset definition; other than that the exporting works as you wish. Only those labels explicitely EXPORTed are visible outside of a module.\", \"post_time\": \"2011-09-26 18:28:11\" },\n\t{ \"post_id\": 360, \"topic_id\": 121, \"forum_id\": 8, \"post_subject\": \"Re: record and dataset definition, dataset has to be exporte\", \"username\": \"aintnomyth\", \"post_text\": \"Not seeing any code attached...\", \"post_time\": \"2011-09-26 16:22:08\" },\n\t{ \"post_id\": 359, \"topic_id\": 121, \"forum_id\": 8, \"post_subject\": \"record and dataset definition, dataset has to be exported\", \"username\": \"abi.abishek26\", \"post_text\": \"hello \\n\\ni have the record definition and dataset definition in the file below. i want only the dataset definition to be exported and not the record definition. i believe that the coding part is all correct. pls correct me if i am wrong anywhere. thanks.\\n\\nLayout_Persons :=RECORD\\nUNSIGNED8 ID;\\nSTRING15 FirstName;\\nSTRING25 LastName;\\nSTRING15 MiddleName;\\nSTRING2 NameSuffix;\\nSTRING8 FileDate;\\nUNSIGNED2 BureauCode;\\nSTRING1 MaritalStatus;\\nSTRING1 Gender;\\nUNSIGNED1 DependantCount;\\nSTRING8 BirthDate;\\nSTRING42 StreetAddress ;\\nSTRING20 City;\\nSTRING2 State;\\nSTRING15 ZipCode;\\nEND;\\n\\nEXPORT File_Layout_Persons :=DATASET('PERSONS',Layout_Persons,THOR);\", \"post_time\": \"2011-09-26 16:03:44\" },\n\t{ \"post_id\": 390, \"topic_id\": 124, \"forum_id\": 8, \"post_subject\": \"Re: Creating Boolean Definitions\", \"username\": \"Prabulg\", \"post_text\": \"Cool Thanks a lot for the clarification.\", \"post_time\": \"2011-10-04 21:05:54\" },\n\t{ \"post_id\": 380, \"topic_id\": 124, \"forum_id\": 8, \"post_subject\": \"Re: Creating Boolean Definitions\", \"username\": \"bforeman\", \"post_text\": \"Hi Prabulg,\\n\\nThat error is expected, when you Submit the IsYougMaleFloridian code, there is really no action that the workunit can perform (it's just a Boolean expression, and is not referencing a recordset to return). \\n\\nTry this: Open a new builder window and then enter:\\n\\nIMPORT TrainingYourName AS X;\\nX.Persons(X.IsYougMaleFloridian);\\n\\nThat will get you some good results
\\n \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2011-10-04 12:32:29\" },\n\t{ \"post_id\": 379, \"topic_id\": 124, \"forum_id\": 8, \"post_subject\": \"Re: Creating Boolean Definitions\", \"username\": \"Prabulg\", \"post_text\": \"I have a question about this. I have local boolean definitions that define the boolean i export. When i run check i get no errors but when i try to execute i get error.\\n my code is \\n\\nIMPORT $;\\nPersons := $.Persons;\\nBOOLEAN IsFloridian := Persons.State = 'FL';\\nBOOLEAN IsMale \\t\\t\\t:= Persons.Gender='M';\\nBOOLEAN IsBorn80 \\t\\t:= Persons.BirthDate>='1980' AND Persons.BirthDate!=' ';\\n\\nEXPORT BOOLEAN IsYougMaleFloridian := IsFloridian AND IsMale AND IsBorn80;\\n\\nThe Error i get is.\\nError: persons.state - no specified row for Table persons (3, 1), 2131, TrainingKirilAlexandrov\\\\IsYougMaleFloridian.ecl\\n\\nIn other executions i have no problem referencing persons for example \\nIMPORT $;\\nPersons := $.Persons;\\nflorida :=Persons(State='FL');\\nCOUNT(florida);\\n\\nRuns fine\", \"post_time\": \"2011-10-04 09:00:11\" },\n\t{ \"post_id\": 378, \"topic_id\": 124, \"forum_id\": 8, \"post_subject\": \"Re: Creating Boolean Definitions\", \"username\": \"rondav9\", \"post_text\": \"Bob,\\n\\nThank you!\", \"post_time\": \"2011-10-03 20:09:59\" },\n\t{ \"post_id\": 377, \"topic_id\": 124, \"forum_id\": 8, \"post_subject\": \"Re: Creating Boolean Definitions\", \"username\": \"bforeman\", \"post_text\": \"Hello,\\n\\nCreating a Boolean definitions is pretty simple. Go to your Repository window, and right-click in your target folder and select Insert File. Give the file a name for the Boolean definition that you wish to export. For example, if I wanted to create a Boolean definition named IsFloridian, that will be the name of my file. The ECL IDE will create a helper line of code for you that looks like this:\\n\\nEXPORT IsFloridian := 'todo';\\n\\nNow, just change it to add your Boolean expression like this:\\n\\nEXPORT IsFloridian := Person.per_st = 'FL';\\n\\n...and you'll probably need to import from your repository the reference to the dataset, like this:\\n\\nIMPORT $;\\nEXPORT IsFloridian := $.Person.per_st = 'FL';\\n\\nThat's all you need, please reply back if you have any additional questions.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2011-10-03 13:55:30\" },\n\t{ \"post_id\": 375, \"topic_id\": 124, \"forum_id\": 8, \"post_subject\": \"Creating Boolean Definitions\", \"username\": \"rondav9\", \"post_text\": \"I've just started to learn ECL, and i would like to know the steps in creating an EXPORT and local Boolean definitions. I read the HPCC Data tutorial, however, i'm still lost. Can someone get me started?\\n\\nThank you.\", \"post_time\": \"2011-10-02 02:25:09\" },\n\t{ \"post_id\": 473, \"topic_id\": 125, \"forum_id\": 8, \"post_subject\": \"Re: Newbe - getting to grips with compiler errors.\", \"username\": \"gsmith\", \"post_text\": \"[quote="Allan":25b6cj1d]Hi Gordon,\\nThere were items 1. and 2. which are also repeatable. Please don't miss out those.\\n\\n\\n2. Graph control (there may be differences based on IDE version and/or if you are refering to IDE or EclWatch graphs): But in general double click is your friend, no matter what you double click, it should zoom to fit (so works for zooming out as well as in).\", \"post_time\": \"2011-10-17 15:37:35\" },\n\t{ \"post_id\": 471, \"topic_id\": 125, \"forum_id\": 8, \"post_subject\": \"Re: Newbe - getting to grips with compiler errors.\", \"username\": \"Allan\", \"post_text\": \"Hi Gordon,\\n\\nThanks for taking these suggestions on board.\\n\\nThere were items 1. and 2. which are also repeatable. Please don't miss out those.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-10-17 15:17:05\" },\n\t{ \"post_id\": 466, \"topic_id\": 125, \"forum_id\": 8, \"post_subject\": \"Re: Newbe - getting to grips with compiler errors.\", \"username\": \"gsmith\", \"post_text\": \"[quote="Allan":hgjsnpum]\\n3. Close ECL IDE with files open works ok (the changes are saved), however on restarting the IDE the same files are marked as requiring saving (* by name in tab).\\n\\n4. No way to move cursor to the matching block token, be that END,(, or {\\n\\n5. No 'goto definition' or 'goto reference'. Seems to be important given a project seems to require numerious source files.\\n\\n6. No context sensitive help.\\n\\n7. 'Print' -> 'Print Setup' Also beings up the 'ECL Reference' help.\\n\\n8. Ability to do f1 on an error message to get up its help. PLEASE! (it brings up the ECL reference dialog box which is a fat load of use.)\\n\\n9. On closing the 'preferences' dialog box a [0%] image is left on the screen and is still there on closeing the entire IDE! (I'm using Windows 7)\\n(Can't reproduce this one)\\n\\n10. If you go though all the tabs on the 'Preferences' dialog box, on canceling it prompts to save changes made even though you've not made any changes.\\n\\n11. If the 'Limit Results (rows)' in the 'Results' tab of the 'Preferences' dialog box is set to >= 1000 the number of rows output in a result set is set to 1.\\n\\n\\nAlan - Thank you for your feedback, I will be opening issue tickets on GIT Hub for most of these, but would like to address some of the minor ones:\\n\\n4. Match brace (and select to match brace) are available under the "Advanced" option either in the Ribbon or in the context menu (ctrl+E and ctrl+shit+E). It does not currently work with RECORD/END etc. but given we "know" about these and use them for the tree folding, it should be possible to add this.\\n\\n5. There are some limited locate functions, F12 will attempt to resolve an external "attribute" but its use is limited. FWIW Historically all ECL was stored remotely with the HPCC server and the client IDE had no access to all the ECL files, making "proper" resolving impossible. However this functionality will be part of the eclipse plugin which is under development.\\n\\n6. There is context help for ECL now (in later IDE builds).\\n\\n7. These worked fine on my machine (maybe fixed in later IDE build?)\\n\\n8. I shall forward this suggestion to documents guys and see if we can get an indexed help file for it.\\n\\nGordon.\", \"post_time\": \"2011-10-17 11:16:35\" },\n\t{ \"post_id\": 432, \"topic_id\": 125, \"forum_id\": 8, \"post_subject\": \"Re: Newbe - getting to grips with compiler errors.\", \"username\": \"richard.taylor@lexisnexis.com\", \"post_text\": \"The fact that a single file contains a single EXPORT definition whose name must match the filename is one of those bits of information that sits on the cusp between the Language and the environment tools. \\n\\nThis code file/EXPORT definition relationship is highly stressed in our ECL classes (which you are most welcome to attend, take a look at http://hpccsystems.com/products-and-ser ... s/training). But as you pointed out, it is missing from our language reference documentation. I think that's because it most properly belongs as part of a guided tutorial -- so I have updated the Data Tutorial PDF to include this important fact. The updated PDF will be in the next release.\\n\\nThanks for bringing this to our attention.\", \"post_time\": \"2011-10-10 19:44:17\" },\n\t{ \"post_id\": 426, \"topic_id\": 125, \"forum_id\": 8, \"post_subject\": \"Re: Newbe - getting to grips with compiler errors.\", \"username\": \"Allan\", \"post_text\": \"Hi Bob,\\n\\nI was coming on the course next week but family problems mean I must delay joining LN till Mid November (hopefully) with a view to taking the January course.\\n\\nIn the mean time I'm attempting to learn ECL.\\n\\nYour suggestion worked a treat.\\n\\n
\\nOUTPUT (t,,'~thor::Genesis::C2',OVERWRITE);\\n
\\n\\ngenerates Error\\n\\nError: WHEN must be used to associated an action with a definition (26, 1), 2325, \\n
\\nWhereas:\\n\\nx := OUTPUT (t,,'~thor::Genesis::C2',OVERWRITE);\\n
\\nCompiles cleanly!\\n\\nI don't think the code breakers at Bletchley Park would have got that one \\n\\nAs to displaying the cursor position (line,column) I can find preferences but I can't find this 'ECL IDE Orb' thing. My ECL IDE Version is 6.0.1.5.682.1\\n\\nI must admit I'm finding this whole IDE/ EClWatch a bit buggy and lacking in features.\\n1. With ECLWatch The 'Description' of a logical file is displayed when viewed from 'Browse Logical Files' but not displayed when viewing via 'Browse File by scope'.\\n\\n2. Attempting to Zoom in on a graph screen does not work properly, thats ctrl+. You get into a situation where you cannot scroll to see any part of the graph.\\n\\n3. Close ECL IDE with files open works ok (the changes are saved), however on restarting the IDE the same files are marked as requiring saving (* by name in tab).\\n\\n4. No way to move cursor to the matching block token, be that END,(, or {\\n\\n5. No 'goto definition' or 'goto reference'. Seems to be important given a project seems to require numerious source files.\\n\\n6. No context sensitive help.\\n\\n7. 'Print' -> 'Print Setup' Also beings up the 'ECL Reference' help.\\n\\n8. Ability to do f1 on an error message to get up its help. PLEASE! (it brings up the ECL reference dialog box which is a fat load of use.)\\n\\n9. On closing the 'preferences' dialog box a [0%] image is left on the screen and is still there on closeing the entire IDE! (I'm using Windows 7)\\n(Can't reproduce this one)\\n\\n10. If you go though all the tabs on the 'Preferences' dialog box, on canceling it prompts to save changes made even though you've not made any changes.\\n\\n11. If the 'Limit Results (rows)' in the 'Results' tab of the 'Preferences' dialog box is set to >= 1000 the number of rows output in a result set is set to 1.\\n\\nAnyway Thanks for your help. If you'r in a position to, could you please pass on my comments above. I have a feeling I have more in a short while.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-10-07 20:26:32\" },\n\t{ \"post_id\": 423, \"topic_id\": 125, \"forum_id\": 8, \"post_subject\": \"Re: Newbe - getting to grips with compiler errors.\", \"username\": \"bforeman\", \"post_text\": \"Also, to turn on line numbers, go to the Preferences window (click on the ECL IDE Orb menu in the upper left corner, and click on the Preferences button in that window. The Line Numbers setting can be found in the Editor tab.\", \"post_time\": \"2011-10-07 19:09:48\" },\n\t{ \"post_id\": 421, \"topic_id\": 125, \"forum_id\": 8, \"post_subject\": \"Re: Newbe - getting to grips with compiler errors.\", \"username\": \"bforeman\", \"post_text\": \"Hi Allan,\\n\\nPerchance are you the Allan that's coming to our training next week?\\n\\nI believe the error is caused by the nested OUTPUT action prior to the EXPORT. If you define and EXPORT the OUTPUT, you will get no errors. After that, open a new Builder window, IMPORT the MODULE, and then execute any of all of the definitions that you EXPORTed.\\n\\nThis code compiles fine without errors:\\n\\nIMPORT std;\\n\\nEXPORT Program := MODULE\\n\\nSHARED Layout_Book := RECORD\\n STRING Text;\\nEND;\\n\\nSHARED Layout_Verse := RECORD\\n UNSIGNED2 Chapter;\\n UNSIGNED2 Verse;\\n STRING Text;\\nEND;\\n\\nSHARED Raw := DATASET('~thor::genesis_csv',Layout_Book,CSV(HEADING(4)));\\n\\nSHARED Layout_Verse Split(Layout_Book pInput) := TRANSFORM\\n SELF.Chapter := (UNSIGNED2)pInput.Text[1..std.str.Find(pInput.Text,':',1)];\\n SELF.Verse := (UNSIGNED2)pInput.Text[std.str.Find(pInput.Text,':',1)+1..std.str.Find(pInput.Text,':',2)];\\n SELF.Text := pInput.Text[Std.Str.Find(pInput.Text,':',2)+1..];\\nEND;\\n\\nt := PROJECT(Raw,Split(LEFT));\\nEXPORT myout := OUTPUT(t,,'~thor::Genesis::C2',OVERWRITE);\\n\\nEXPORT With_Index := DATASET ('~thor::Genesis::C2',{Layout_Verse,UNSIGNED8 fpos {virtual(fileposition)}},THOR);\\n\\nEXPORT Index_By_Chapter :=\\n INDEX(With_Index,{Chapter,fpos},'~thor::Genesis::VerseByChapterINDEX');\\n\\nEXPORT BuildGenesis := BUILDINDEX(Index_By_Chapter,OVERWRITE);\\nEND;\\n\\n\\nNote the use of SHARED, which allows definitions to be used within the same module.\\n\\nHope this helps!\\n\\nBest regards,\\nBob\", \"post_time\": \"2011-10-07 18:52:18\" },\n\t{ \"post_id\": 413, \"topic_id\": 125, \"forum_id\": 8, \"post_subject\": \"Re: Newbe - getting to grips with compiler errors.\", \"username\": \"Allan\", \"post_text\": \"Hi David,\\n\\nI was going though the example 'Working with Data' from the 'HPCC Data Tutorial'\\nThis worked, but for this simple example I ended up with 7 source files.\\nI was finding navigation difficult. It would help if the ECL IDE had a 'goto definition' and 'goto reference' ability. (if it does please tell me)\\n\\nLacking easy navigation I decided to do much the same but use a Bible example from the reference above, but use a single source file (or two, one to construct the bibliographic and index files, a second to allow searchs)\\n\\nThat's when my troubles started.\\n\\nThe complete program for construction part is below, I've tried to incorporate all I've learnt from the above corrispondance but still get error:\\n
\\nError: WHEN must be used to associated an action with a definition (26, 1), 2325, \\n
\\nThe code compiles and works all the way up to the definition of 'With_Index' which is line 26.\\nIf I make the last 3 definitions 'local' I get error:\\n\\nError: Action side effect is not associated with a definition (3, 19), 2325, \\n
\\nIMPORT std;\\n\\nEXPORT Program := MODULE\\n\\nLayout_Book := RECORD\\n STRING Text;\\nEND;\\n\\nLayout_Verse := RECORD\\n UNSIGNED2 Chapter;\\n\\t UNSIGNED2 Verse;\\n STRING Text;\\nEND;\\n\\nRaw := DATASET('~thor::genesis_csv',Layout_Book,CSV(HEADING(4)));\\n\\nLayout_Verse Split(Layout_Book pInput) := TRANSFORM\\n SELF.Chapter := (UNSIGNED2)pInput.Text[1..std.str.Find(pInput.Text,':',1)];\\n\\t\\tSELF.Verse := (UNSIGNED2)pInput.Text[std.str.Find(pInput.Text,':',1)+1..std.str.Find(pInput.Text,':',2)];\\n\\t\\tSELF.Text := pInput.Text[Std.Str.Find(pInput.Text,':',2)+1..];\\nEND;\\n\\nt := PROJECT(Raw,Split(LEFT));\\nOUTPUT (t,,'~thor::Genesis::C2',OVERWRITE);\\n\\nEXPORT With_Index := DATASET ('~thor::Genesis::C2',{Layout_Verse,UNSIGNED8 fpos {virtual(fileposition)}},THOR);\\n\\nEXPORT Index_By_Chapter :=\\n INDEX(With_Index,{Chapter,fpos},'~thor::Genesis::VerseByChapterINDEX');\\n\\nEXPORT BuildGenesis := BUILDINDEX(Index_By_Chapter,OVERWRITE);\\nEND;\\n
\\n\\nAny idea's to help me proceed?\\nI can't see what I'm doing different form the example from 'HPCC Data Tutorual' apart from place all the source into a single file.\\n\\nOh by-the-way, when editing the line and column numbers used to be displayed on the bottom left of the screen, they don't appear now, how are they toggled back on?\\n\\nIn addition is there any way to view line numbers with the source code? I see no option in the 'View' pane.\\n\\nYours\\nAllan\", \"post_time\": \"2011-10-06 17:52:34\" },\n\t{ \"post_id\": 412, \"topic_id\": 125, \"forum_id\": 8, \"post_subject\": \"Re: Newbe - getting to grips with compiler errors.\", \"username\": \"Allan\", \"post_text\": \"Hi dabayliss,\\n\\nGreat - Thanks for pointing me to those libraries, I'll be reading those up.\\n\\nAs to the 'strangeness', your right its only a personal judgment. I was just peeved that this restriction is not documented. If it is, its not made prominent enough in the tutorials. I spent a good two evenings stuck on that point.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-10-06 16:04:39\" },\n\t{ \"post_id\": 411, \"topic_id\": 125, \"forum_id\": 8, \"post_subject\": \"Re: Newbe - getting to grips with compiler errors.\", \"username\": \"dabayliss\", \"post_text\": \"GetNthWord is part of the ECL standard libraries - the routines are not yet documented in our language reference; but if you look in the ecllibrary directory (you can see it from the ECL IDL) all of the routines are visible to see and well documented.\\n\\nThe strangeness of the decision to encapsulate the code according to its logical structure depends upon where you are coming from. As someone who started in Pascal & Modula-2 it would strike me as very strange to allow code to leak all over your source files ....\\n\\nDavid\", \"post_time\": \"2011-10-06 12:54:31\" },\n\t{ \"post_id\": 410, \"topic_id\": 125, \"forum_id\": 8, \"post_subject\": \"Re: Newbe - getting to grips with compiler errors.\", \"username\": \"Allan\", \"post_text\": \"Thanks dabayliss,\\n\\nI'd actually found this web site and took the idea of using the Bible from it. Though decided to try a simpler example, as, for example, I could not find 'GetNthWord', used by author, in the ECL reference manual.\\n\\nI've been though LN's training videos and some, not all, documentation, and it not at all clear from these sources that, by default, a single definition can be exported from a file.\\n\\nIt seems a strange decision to tie what you can do in a program to the environment the program is compiling under i.e. the file name of containing file?\\n\\nIn C++ its good practice to have a single class in a file and tie the class name to the filename, but its not at all mandatory.\\n\\nYours - gradually making headway.\\n\\nAllan\", \"post_time\": \"2011-10-06 12:37:56\" },\n\t{ \"post_id\": 409, \"topic_id\": 125, \"forum_id\": 8, \"post_subject\": \"Re: Newbe - getting to grips with compiler errors.\", \"username\": \"dabayliss\", \"post_text\": \"Alan,\\n\\nGiven your subject matter you might find this useful:\\n\\nhttp://www.dabhand.org/ECL/construct_a_ ... search.htm\\n\\nDavid\", \"post_time\": \"2011-10-06 11:43:41\" },\n\t{ \"post_id\": 406, \"topic_id\": 125, \"forum_id\": 8, \"post_subject\": \"Re: Newbe - getting to grips with compiler errors.\", \"username\": \"Allan\", \"post_text\": \"Thanks ghalliday,\\n\\nI had tried using the MODULE construct using it as a namespace. Then referring to items as modulename.item (and variations thereof) to no avail.\\n\\nI will now beaver away till the next brick wall (5 minutes I expect)\\n\\nYours Gratefully\\n\\nAllan\", \"post_time\": \"2011-10-05 16:17:42\" },\n\t{ \"post_id\": 398, \"topic_id\": 125, \"forum_id\": 8, \"post_subject\": \"Re: Newbe - getting to grips with compiler errors.\", \"username\": \"ghalliday\", \"post_text\": \"The idea is that each source file exports a single symbol.\\n\\nHowever you can export a MODULE (think of it a bit like a C++ class/namespace), which means that you can publish a symbol with a structured type.\\n\\nWhat you could do in this instance is something like:\\n\\nEXPORT Program := MODULE\\n\\n EXPORT InputDatset := DATASET ('~thor::Genesis::C2',{Layout_Verse,UNSIGNED8 fpos {virtual(fileposition)}},THOR);\\n\\n EXPORT Index_By_Chapter :=\\n INDEX(Program,{Chapter,fpos},'~thor::Genesis::VerseByChapterINDEX');\\n\\n EXPORT BuildGenesis := BUILDINDEX(Index_By_Chapter,OVERWRITE);\\n\\nEND;
\\n\\nHistorically we have tended to separate definitions from the queries that use them since the definitions can be reused. (In this case the file above might be called Genesis or bible), and another file could contain\\n\\nIMPORT Program;\\nProgram.BuildGenesis;\\n
\\n\\nInstead of using a separate file to contain the query you can include a main Attribute in you module:\\n\\n\\n EXPORT Main := BuildGenesis;\\n
\\n\\nWhen you submit that file that defines a module as a query, eclcc will check to see if it has an attibute called main. If it has that is used to build the query.\\n\\nNote, there isn't much difference between definitions with or without parameters in ecl, so the c++ side of you may prefer to to say.\\n\\n\\n EXPORT BuildGenesis() := BUILDINDEX(Index_By_Chapter,OVERWRITE);\\n EXPORT Main() := BuildGenesis();\\n
\\n\\nThat will work equally well.\", \"post_time\": \"2011-10-05 12:16:23\" },\n\t{ \"post_id\": 395, \"topic_id\": 125, \"forum_id\": 8, \"post_subject\": \"Re: Newbe - getting to grips with compiler errors.\", \"username\": \"Allan\", \"post_text\": \"Thanks for these prompt reply's and actions it would seem - Wow!\\n\\nI have not got to grips with what definitions must tie up with a filename. (I come from a c++ background)\\n\\nMy original question was a simplification of my actual code (It would be nice if attachments could be put with a message, if someone could pass that suggestion on, its preferable to code inserts as they break up the flow of text and it allows spreadsheets etc to be passed around.)\\nA larger snippet is:\\n\\nEXPORT Program := DATASET ('~thor::Genesis::C2',{Layout_Verse,UNSIGNED8 fpos {virtual(fileposition)}},THOR);\\n\\nEXPORT Index_By_Chapter :=\\n INDEX(Program,{Chapter,fpos},'~thor::Genesis::VerseByChapterINDEX');\\n\\nBUILDINDEX(Index_By_Chapter,OVERWRITE);\\n
\\nThis 2nd definition cannot have the same name?\\nFrom what I see one will end up with hundreds of small files in one project, that can't be true?\\n\\nAnd with this refactored code I now get errors:\\n\\nError: WHEN must be used to associated an action with a definition (24, 1), 2325, \\nError: Definition program has a trailing public definition index_by_chapter (26, 1), 2380,\\n
\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-10-05 09:41:41\" },\n\t{ \"post_id\": 394, \"topic_id\": 125, \"forum_id\": 8, \"post_subject\": \"Re: Newbe - getting to grips with compiler errors.\", \"username\": \"ghalliday\", \"post_text\": \"I've reworded the error message, so now it will say\\n\\nName of exported symbol 'With_index' does not match the expected name 'program'\\n\\nHopefully that would have been clearer.\", \"post_time\": \"2011-10-05 08:17:54\" },\n\t{ \"post_id\": 393, \"topic_id\": 125, \"forum_id\": 8, \"post_subject\": \"Re: Newbe - getting to grips with compiler errors.\", \"username\": \"richardkchapman\", \"post_text\": \"The error message is trying to tell you that the name of the exported symbol has to match the name of the ecl file - I agree it could have been better worded.\\n\\nOn a more general point, I agree that we need documentation with a paragraph or so expanding on the possible causes and fixes for each compiler error - I'll make sure the documentation team get to see this message.\", \"post_time\": \"2011-10-05 06:17:19\" },\n\t{ \"post_id\": 389, \"topic_id\": 125, \"forum_id\": 8, \"post_subject\": \"Newbe - getting to grips with compiler errors.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI've been working my way though the example ECL program in 'HPCC data Tutorial' and though I get that working, any of my own work is stymied though lack of documentation of compiler errors.\\n\\ne.g. The following code snippet\\n\\nEXPORT With_index := DATASET ('~thor::Genesis::C2',{Layout_Verse,UNSIGNED8 fpos {virtual(fileposition)}},THOR);\\n\\ngenerates error:\\n\\nWarning: Definition program cannot define a public symbol with_index (24, 1), 2380, \\n\\nAnd I cannot find a way round it. (This needs to be exported so the roxie fetch definition can use it.) \\nOther posts talk of errors being held in files ecl/hql/hqlerrors.hpp \\nI should not have to download open source to get a list of errors, which, I expect, wont explain the error.\\n\\nSo 1. Can someone please help with this particular error and\\n 2. Tell me where error/warning messages are documented so I don't have to pester the forum every 2 minutes.\\n \\n Yours\\n \\n Allan\", \"post_time\": \"2011-10-04 18:48:56\" },\n\t{ \"post_id\": 429, \"topic_id\": 128, \"forum_id\": 8, \"post_subject\": \"Re: eclplus and IMPORT\", \"username\": \"aintnomyth\", \"post_text\": \"Thanks, Gavin, that clears it up. I'm able to compile with eclcc and execute with eclplus.\", \"post_time\": \"2011-10-10 14:05:40\" },\n\t{ \"post_id\": 428, \"topic_id\": 128, \"forum_id\": 8, \"post_subject\": \"Re: eclplus and IMPORT\", \"username\": \"ghalliday\", \"post_text\": \"The way you access source code is slightly difference from the legacy system that many users may be familiar with.\\n\\nIf you have all your source code local, you use eclcc to compile the source locally, and can use the -I option to indicate where the source files are located. \\n\\neclplus is used to submit the query to a remote server - which generally won't be on the same machine, and won't have access to those local sources. In this case you need to use eclcc to create an archive (which encapsulates all the sources used by your query into a single file), and then submit that archive using eclplus.\\n\\nE.g.,\\n\\neclcc myQuery.ecl -I myExtraSources -E -output myQuery.eclxml\\neclplus ecl=@myQuery.eclxml\\n
\\n\\nThe -E option on eclcc indicates that the output should be an archive file. By default the archive is output to stdout, but it often simpler to output it to a local file, and then submit that to eclplus. You can avoid the temporary file by piping the archive into eclplus. The command should be\\n\\n\\neclcc myQuery.ecl -I myExtraSources -E | eclplus\\n
\\n\\nUsing separate steps avoids submitting workunits if there are syntax errors.\\n\\nIf you are using the enterprise edition, with a central source repository, you would submit just the source file, since the remote server can directly pull the required dependencies.\", \"post_time\": \"2011-10-10 11:56:59\" },\n\t{ \"post_id\": 425, \"topic_id\": 128, \"forum_id\": 8, \"post_subject\": \"Re: eclplus and IMPORT\", \"username\": \"aintnomyth\", \"post_text\": \"I get the same warnings. \", \"post_time\": \"2011-10-07 20:09:29\" },\n\t{ \"post_id\": 424, \"topic_id\": 128, \"forum_id\": 8, \"post_subject\": \"Re: eclplus and IMPORT\", \"username\": \"sort\", \"post_text\": \"try removing the quotes when specifying the -I param\", \"post_time\": \"2011-10-07 19:58:33\" },\n\t{ \"post_id\": 422, \"topic_id\": 128, \"forum_id\": 8, \"post_subject\": \"Re: eclplus and IMPORT\", \"username\": \"aintnomyth\", \"post_text\": \"Thanks for the help, when I run that I'm still getting the warnings:\\n\\neclplus action=query ecl=@eclcode.ecl -I"c:\\\\ecl"\\n...Explicit source file C not found\\n...Explicit source file \\\\ecl not found\\n\\nIt's like it can't parse the parameter or something. I tried dropping the .ecl file directly into the bin directory (C:\\\\Program Files (x86)\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_0) but that doesn't help, the command was:\\neclplus action=query ecl=@eclcode.ecl -Iincoming.ecl\\n\\nIn theory, if my incoming.ecl code imported OTHER files, would those need to be explicitly defined in the -i argument too?\", \"post_time\": \"2011-10-07 18:54:13\" },\n\t{ \"post_id\": 420, \"topic_id\": 128, \"forum_id\": 8, \"post_subject\": \"Re: eclplus and IMPORT\", \"username\": \"sort\", \"post_text\": \"try something like the following:\\n\\neclplus action=query ecl=@eclcode.ecl -I"c:\\\\ecl"\\n\\n\\nI always set "action", I do not let the system default it based on params based in (and leading -).\", \"post_time\": \"2011-10-07 18:37:59\" },\n\t{ \"post_id\": 419, \"topic_id\": 128, \"forum_id\": 8, \"post_subject\": \"Re: eclplus and IMPORT\", \"username\": \"aintnomyth\", \"post_text\": \"I'm missing something somewhere\\n\\nThis command causes the output: "Error: unknown action"\\n
eclplus ecl=@eclcode.ecl -I"C:\\\\ecl\\\\alldata\\\\test\\\\incoming.ecl" -L"C:\\\\ecl\\\\alldata\\\\test\\\\incoming.ecl" -g -E as eclcc
\\n\\n\\nAnd this command (removing the "as eclcc" portion): \\neclplus ecl=@eclcode.ecl -I"C:\\\\ecl\\\\alldata\\\\test\\\\incoming.ecl" -L"C:\\\\ecl\\\\alldata\\\\test\\\\incoming.ecl" -g -E
\\n\\nproduces this output:\\n[size=85:39g4cd89]C:\\\\Program Files (x86)\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_0>eclplus ecl=@eclcode.ecl -I"C:\\\\ecl\\\\alldata\\\\test\\\\incoming.ecl" -L"C:\\\\ecl\\\\alldata\\\\test\\\\incoming.ecl" -g -E\\nWorkunit W20111007-135906 submitted\\n<Warning><source>eclcc</source><code>10</code><message> Explicit source file C not found</message></Warning>\\n<Warning><source>eclcc</source><code>10</code><message> Explicit source file \\\\ecl\\\\alldata\\\\test\\\\incoming.ecl not found</message></Warning>\\n<Error><source>eclcc</source><line>1</line><code>2081</code><message> Import names unknown module "alldata"</message></Error>\\n<Error><source>eclcc</source><line>2</line><code>2167</code><message> Unknown identifier "test"</message></Error>\\n
\\n\\nI get the same results if I remove the double-quotes from the paths.\", \"post_time\": \"2011-10-07 18:02:37\" },\n\t{ \"post_id\": 418, \"topic_id\": 128, \"forum_id\": 8, \"post_subject\": \"Re: eclplus and IMPORT\", \"username\": \"aintnomyth\", \"post_text\": \"...now that I look I see the "-lpath -Lpath" in the command line help.\", \"post_time\": \"2011-10-07 17:41:19\" },\n\t{ \"post_id\": 417, \"topic_id\": 128, \"forum_id\": 8, \"post_subject\": \"Re: eclplus and IMPORT\", \"username\": \"aintnomyth\", \"post_text\": \"Thanks, I'll try that. Do you have any docs on the command line switches? I checked the client tools .pdf but it only deals with named arguments (like arg=val).\", \"post_time\": \"2011-10-07 17:38:43\" },\n\t{ \"post_id\": 416, \"topic_id\": 128, \"forum_id\": 8, \"post_subject\": \"Re: eclplus and IMPORT\", \"username\": \"sort\", \"post_text\": \"try using the -I parameter when running eclplus. This will allow you to set and pass the include path to the eclcc compiler {-Ipath -Lpath -g -E as eclcc}.\", \"post_time\": \"2011-10-07 17:29:40\" },\n\t{ \"post_id\": 415, \"topic_id\": 128, \"forum_id\": 8, \"post_subject\": \"Re: eclplus and IMPORT\", \"username\": \"bforeman\", \"post_text\": \"As ECLPlus is a command line tool, I would guess that it is possibly a path issue. \\n\\nOn the command line, does it work if you set your path to the ecl code location prior to calling ECLPlus?\\n\\nExample - CD C:\\\\Documents and Settings\\\\All Users\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\", \"post_time\": \"2011-10-07 17:00:56\" },\n\t{ \"post_id\": 414, \"topic_id\": 128, \"forum_id\": 8, \"post_subject\": \"eclplus and IMPORT\", \"username\": \"aintnomyth\", \"post_text\": \"Hello,\\nI'm having trouble accessing code from my repository in eclplus.exe\\n\\nI've placed my code in a file for testing and created a test scenario to make sure my .INI file is good. \\n\\neclcode.ecl contents:\\ntest := 1+2;\\nOUTPUT(test);
\\nc:\\\\>eclplus ecl=@eclcode.ecl\\nC:\\\\Program Files (x86)\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_0>eclplus ecl=@eclcode.ecl\\nWorkunit W20111007-114850 submitted\\n[Result 1]\\nResult_1\\n3
\\n\\nThis scenario does not work:\\neclcode.ecl contents:\\nIMPORT alldata.test; \\nds := test.incoming; \\noutput(ds);
\\nC:\\\\Program Files (x86)\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_0>eclplus ecl=@eclcode.ecl\\nWorkunit W20111007-115227 submitted\\n<Error><source>eclcc</source><line>1</line><code>2081</code><message> Import names unknown module "alldata"</message></Error>\\n<Error><source>eclcc</source><line>2</line><code>2167</code><message> Unknown identifier "test"</message></Error>
\\nBut the above code does run in the ECL IDE.\\n\\nAny ideas?\", \"post_time\": \"2011-10-07 15:55:10\" },\n\t{ \"post_id\": 436, \"topic_id\": 131, \"forum_id\": 8, \"post_subject\": \"Re: Newbe - getting to grips with more compiler errors.\", \"username\": \"Allan\", \"post_text\": \"All,\\n\\nFound my own error!\\n\\n\\nR TakeWord(I le,UNSIGNED1 C) := TRANSFORM\\n SELF.Word_pos := C;\\n\\t SELF.Word := ToUpperCase(GetNthWord($.Clean(le.verse_text),C));\\n\\t SELF := le;\\nEND:\\n
\\nShould be:\\nR TakeWord(I le,UNSIGNED1 C) := TRANSFORM\\n SELF.Word_pos := C;\\n\\t SELF.Word := ToUpperCase(GetNthWord($.Clean(le.verse_text),C));\\n\\t SELF := le;\\nEND;\\n
\\n\\nSpot the difference in the terminating ';' - I suppose the STL in C++ generates as obscure error messages (but only just)\", \"post_time\": \"2011-10-11 15:01:34\" },\n\t{ \"post_id\": 435, \"topic_id\": 131, \"forum_id\": 8, \"post_subject\": \"Newbe - getting to grips with more compiler errors.\", \"username\": \"Allan\", \"post_text\": \"Hi Me Again.\\n\\nUnable to get any of my own ECL to work I've fallen back on attempting to get 'Bible Search' example working from the WEB.\\n\\nhttp://www.dabhand.org/ECL/construct_a_ ... search.htm\\n\\nand\\n\\nhttp://www.dabhand.org/ECL/construct_a_ ... archII.htm\\n\\n\\nI get the 1st part working, i.e. the defines for reading in, transforming and functions for future probes.\\nBy working there are no syntax errors, and submitting it creates a successfully completed workunit.\\n\\nI'm now stuck on the 2nd part. Creating something used to query the data.\\nThe code 'Inversion.ecl' I've entered so far is:\\n\\nIMPORT * FROM STD.Str;\\n\\nEXPORT Inversion := MODULE\\n\\n SHARED I := $.File_KJV.txt;\\n \\n SHARED R := RECORD\\n UNSIGNED1 BookNum := 0;\\n UNSIGNED1 Chapter := 0;\\n UNSIGNED1 Verse := 0;\\n UNSIGNED1 Word_pos:= 0;\\n STRING Word := '';\\n END;\\n\\nR TakeWord(I le,UNSIGNED1 C) := TRANSFORM\\n SELF.Word_pos := C;\\n SELF.Word := ToUpperCase(GetNthWord($.Clean(le.verse_text),C));\\n SELF := le;\\nEND:\\n\\nN := NORMALIZE(I,WordCount($.Clean(LEFT.verse_Text)),TakeWord(LEFT,COUNTER));\\nEXPORT Records := N;\\n\\nEND;\\n
\\n\\nI've also created a 'clean.ecl' with a single EXPORT in it.\\n\\nIMPORT * FROM STD.Str;\\n\\nEXPORT Clean(STRING S) := Filter(S,'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 \\\\'');\\n
\\n\\nHowever the syntax check on the 'Inversion.ecl' reports:\\n\\nError: Unknown identifier "N" (21, 1), 2167, \\nError: Incompatible types: can not assign Integer to Record r (15, 11), 2007, \\nError: Unknown identifier "N" (22, 19), 2167, \\n
\\n\\nAny pointers would be gratefully received as the error message is not enlightening.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-10-11 14:49:29\" },\n\t{ \"post_id\": 510, \"topic_id\": 136, \"forum_id\": 8, \"post_subject\": \"Re: File contained a line of length greater than 10485760 by\", \"username\": \"nvasil\", \"post_text\": \"Here is an interesting fact.\\n\\nI have sprayed my files setting the Quote to nothing. Each file can be read as a dataset without any problem. When I combine them to a superfile I get an error "File ~rr::super1 contained a line of length greater than 10485760 bytes". This is exactly the error I was getting when I wasn't setting quote to nothing. It seems to me that something is going wrong when you combine the files into a super file. Most likely the quote for the superfile is set to the default '. Is there a way to change it?\\n\\nNick\", \"post_time\": \"2011-10-21 19:23:33\" },\n\t{ \"post_id\": 479, \"topic_id\": 136, \"forum_id\": 8, \"post_subject\": \"Re: File contained a line of length greater than 10485760 by\", \"username\": \"Tony Kirk\", \"post_text\": \"Excellent.\", \"post_time\": \"2011-10-17 21:29:19\" },\n\t{ \"post_id\": 478, \"topic_id\": 136, \"forum_id\": 8, \"post_subject\": \"Re: File contained a line of length greater than 10485760 by\", \"username\": \"nvasil\", \"post_text\": \"It did work,\\n\\nMaybe it is better to have the default quote empty\\n\\nThanks a lot\", \"post_time\": \"2011-10-17 21:22:07\" },\n\t{ \"post_id\": 477, \"topic_id\": 136, \"forum_id\": 8, \"post_subject\": \"Re: File contained a line of length greater than 10485760 by\", \"username\": \"Tony Kirk\", \"post_text\": \"Before resorting to hand-parsing your data, you could try the suggestion of the empty QUOTE set to see if the file is readable as sprayed.\", \"post_time\": \"2011-10-17 21:13:49\" },\n\t{ \"post_id\": 476, \"topic_id\": 136, \"forum_id\": 8, \"post_subject\": \"Re: File contained a line of length greater than 10485760 by\", \"username\": \"nvasil\", \"post_text\": \"Thank's a lot\\n\\nI did scan my file and it turns out there are quotes. So this is probably what is causing the problem. Since I have no control over what kind of character each line will have, is it possible to read every line as a string and then do the parsing/cleaning with ECL. What kind of spray can I do?\", \"post_time\": \"2011-10-17 21:08:56\" },\n\t{ \"post_id\": 475, \"topic_id\": 136, \"forum_id\": 8, \"post_subject\": \"Re: File contained a line of length greater than 10485760 by\", \"username\": \"Tony Kirk\", \"post_text\": \"Hi.\\n\\nWithout knowing the dataset contents, it's conjecture, but see this response to a similar question: viewtopic.php?f=10&t=102&sid=af6f55cb58fd5d3a84df9e46ea98aee0#p307.\\n\\nTony\", \"post_time\": \"2011-10-17 20:52:28\" },\n\t{ \"post_id\": 474, \"topic_id\": 136, \"forum_id\": 8, \"post_subject\": \"File contained a line of length greater than 10485760 bytes\", \"username\": \"nvasil\", \"post_text\": \"Hi there\\n\\nI have the following code\\n\\nResourceRecord := RECORD, MAXLENGTH(8192)\\n STRING ip;\\n INTEGER rid;\\n STRING dns;\\n INTEGER volume;\\nEND;\\n\\nrdata := DATASET(rr_in_dir+'::'+rr_file, ResourceRecord, \\n CSV(MAXLENGTH(8192), SEPARATOR(['\\\\t', ' '])));\\nOUTPUT(COUNT(rdata));\\n\\nwhen I run it I get \\n\\n<Error><source>eclagent</source><code>0</code><message>System error: 0: Graph[1], csvread[2]: SLAVE 10.92.xxx.xxx:6600: File ~rr_files::rrsets_20110801 contained a line of length greater than 10485760 bytes.</message></Error>\\n\\nI have run python scripts and I have made sure the file does not contain lines larger than 8000 characters. Also all lines are tab separated and they all contain 4 attributes. I have tested the file before spraying and after spraying on the node. Everything looks ok.\\nAlso the file detail from ecl watch are\\n\\nLogical Name:\\trr_files::rrsets_20110801\\nDescription:\\t\\n \\nModification Time:\\t2011-10-17 16:17:02 (UTC/GMT)\\nDirectory:\\t/mnt/HPCCSystems/hpcc-data/thor/rr_files\\nPathmask:\\trrsets_20110801._$P$_of_$N$\\nWorkunit:\\tD20111017-161701\\nJob Name:\\trrsets_20110801\\nSize:\\t4,972,697,527\\nFormat:\\tcsv\\nMaxRecordSize:\\t8192\\nCsvSeparate:\\t\\\\t\\nCsvQuote:\\t'\\nCsvTerminate:\\t\\\\n,\\\\r\\\\n\\n\\n\\nFile Parts:\\n\\nNumber\\tIP\\tSize\\n1\\t10.xxx.xx.xx\\t2,486,348,712\\n2\\t10.xxx.xx.xx\\t2,486,348,815\\n\\n\\nThis file belongs to following superfile(s):\\n\\nrr_files::super1\\n\\n\\nAny ideas what might be wrong?\", \"post_time\": \"2011-10-17 20:22:47\" },\n\t{ \"post_id\": 509, \"topic_id\": 139, \"forum_id\": 8, \"post_subject\": \"Re: Inexplicable error when submitting a job to thor.\", \"username\": \"Allan\", \"post_text\": \"Hi Jsmith,\\n\\nThanks for the reply.\\n\\nI shall avoid the construct for the moment.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-10-21 14:56:06\" },\n\t{ \"post_id\": 508, \"topic_id\": 139, \"forum_id\": 8, \"post_subject\": \"Re: Inexplicable error when submitting a job to thor.\", \"username\": \"jsmith\", \"post_text\": \"I've identified the problem, it's caused by a bug in the nway-select activity, which is introduced when there's a select on a set of inputs, as in your example. See issue: https://github.com/hpcc-systems/HPCC-Pl ... issues/772\\n\\nUntil the fix is rolled out, this construct should be avoided in Thor.\\nAlthough it will not always fail, e.g. if the output had to be disk (OUTPUT(Raw1, , '~mydiskfile')) in the example, it would have circumvented the bug.\", \"post_time\": \"2011-10-21 14:40:33\" },\n\t{ \"post_id\": 506, \"topic_id\": 139, \"forum_id\": 8, \"post_subject\": \"Re: Inexplicable error when submitting a job to thor.\", \"username\": \"jsmith\", \"post_text\": \"Thanks, but looks like neither of those (master+slave) are from a failed workunit.\\nIn fact, these logs haven't run any workunits.\\n\\nI note they are in logdir 10_20_2011_17_34_08*, the failed workunit under Helpers, must point to a different reference I think, you'll see same contents in the master log as you'll see when you click on it from the workunit.\", \"post_time\": \"2011-10-20 19:18:08\" },\n\t{ \"post_id\": 505, \"topic_id\": 139, \"forum_id\": 8, \"post_subject\": \"Re: Inexplicable error when submitting a job to thor.\", \"username\": \"Allan\", \"post_text\": \"This is the THORMASTER.log from a run that failed.\\n\\n00000001 2011-10-20 17:34:49 16545 16545 Opened log file //192.168.65.128/var/log/HPCCSystems/mythor/10_20_2011_17_34_08/THORMASTER.log\\n00000002 2011-10-20 17:34:49 16545 16545 Build community_3.2.2-1\\n00000003 2011-10-20 17:34:49 16545 16545 calling initClientProcess Port 6500\\n00000004 2011-10-20 17:34:49 16545 16545 Found file 'thorgroup', using to form thor group\\n00000005 2011-10-20 17:34:49 16545 16545 Starting watchdog\\n00000006 2011-10-20 17:34:49 16545 16545 ThorMaster version 4.0, Started on 192.168.65.128:6500\\n00000007 2011-10-20 17:34:49 16545 16545 CThorRowManager initialized, memlimit = 2147483648\\n00000008 2011-10-20 17:34:49 16545 16545 Thor name = mythor, queue = mythor.thor, nodeGroup = mythor\\n00000009 2011-10-20 17:34:49 16545 16545 Creating sentinel file thor.sentinel for rerun from script\\n0000000A 2011-10-20 17:34:49 16545 16545 Waiting for 1 slaves to register\\n0000000B 2011-10-20 17:34:49 16545 16545 Verifying connection to slave 1\\n0000000C 2011-10-20 17:34:49 16545 16545 verified connection with 192.168.65.128:6600\\n0000000D 2011-10-20 17:34:49 16545 16545 Slaves connected, initializing..\\n0000000E 2011-10-20 17:34:49 16545 16545 Initialization sent to slave group\\n0000000F 2011-10-20 17:34:49 16545 16545 Registration confirmation from 192.168.65.128:6600\\n00000010 2011-10-20 17:34:49 16545 16545 Slave 1 (192.168.65.128:6600) registered\\n00000011 2011-10-20 17:34:49 16545 16545 Slaves initialized\\n00000012 2011-10-20 17:34:49 16545 16560 Started watchdog\\n00000013 2011-10-20 17:34:49 16545 16545 verifying mp connection to rest of cluster\\n00000014 2011-10-20 17:34:49 16545 16545 verified mp connection to rest of cluster\\n00000015 2011-10-20 17:34:49 16545 16545 ,Progress,Thor,Startup,mythor,mythor,mythor.thor,//192.168.65.128/var/log/HPCCSystems/mythor/10_20_2011_17_34_08/THORMASTER.log\\n00000016 2011-10-20 17:34:49 16545 16545 Listening for graph\\n00000017 2011-10-20 17:34:49 16545 16545 ThorLCR(192.168.65.128:6500) available, waiting on queue thor.thor\\n00000018 2011-10-20 17:35:49 16545 16559 SYS: PU= 6% MU= 8% MAL=226552 MMP=0 SBK=226552 TOT=276K RAM=220548K SWP=32K\\n00000019 2011-10-20 17:36:49 16545 16559 SYS: PU= 4% MU= 8% MAL=255336 MMP=0 SBK=255336 TOT=368K RAM=221340K SWP=32K\\n0000001A 2011-10-20 17:36:49 16545 16559 DSK: [sda] r/s=0.0 kr/s=0.1 w/s=1.4 kw/s=15.4 bsy=1 NIC: rxp/s=0.9 rxk/s=0.1 txp/s=1.0 txk/s=0.4 CPU: usr=0 sys=2 iow=0 idle=96\\n0000001B 2011-10-20 17:37:49 16545 16559 SYS: PU= 3% MU= 8% MAL=255336 MMP=0 SBK=255336 TOT=368K RAM=221236K SWP=32K\\n0000001C 2011-10-20 17:37:49 16545 16559 DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.6 kw/s=17.4 bsy=1 NIC: rxp/s=0.0 rxk/s=0.0 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=1 iow=1 idle=96\\n0000001D 2011-10-20 17:38:49 16545 16559 SYS: PU= 3% MU= 8% MAL=255344 MMP=0 SBK=255344 TOT=368K RAM=221388K SWP=32K\\n0000001E 2011-10-20 17:38:49 16545 16559 DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.3 kw/s=16.2 bsy=1 NIC: rxp/s=0.0 rxk/s=0.0 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=1 iow=1 idle=96\\n0000001F 2011-10-20 17:39:49 16545 16559 SYS: PU= 3% MU= 8% MAL=255336 MMP=0 SBK=255336 TOT=368K RAM=221804K SWP=32K\\n00000020 2011-10-20 17:39:49 16545 16559 DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.5 kw/s=16.2 bsy=1 NIC: rxp/s=0.0 rxk/s=0.0 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=2 iow=0 idle=96\\n00000021 2011-10-20 17:40:49 16545 16559 SYS: PU= 3% MU= 8% MAL=255344 MMP=0 SBK=255344 TOT=368K RAM=221688K SWP=32K\\n00000022 2011-10-20 17:40:49 16545 16559 DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.5 kw/s=17.0 bsy=1 NIC: rxp/s=0.0 rxk/s=0.0 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=2 iow=1 idle=96\\n00000023 2011-10-20 17:41:49 16545 16559 SYS: PU= 3% MU= 8% MAL=255336 MMP=0 SBK=255336 TOT=368K RAM=222092K SWP=32K\\n00000024 2011-10-20 17:41:49 16545 16559 DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.5 kw/s=16.9 bsy=1 NIC: rxp/s=0.0 rxk/s=0.0 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=1 iow=1 idle=96\\n00000025 2011-10-20 17:42:49 16545 16559 SYS: PU= 3% MU= 8% MAL=255344 MMP=0 SBK=255344 TOT=368K RAM=221988K SWP=32K\\n00000026 2011-10-20 17:42:49 16545 16559 DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.3 kw/s=14.8 bsy=0 NIC: rxp/s=0.0 rxk/s=0.0 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=1 iow=0 idle=96\\n00000027 2011-10-20 17:43:49 16545 16559 SYS: PU= 4% MU= 8% MAL=255336 MMP=0 SBK=255336 TOT=368K RAM=222012K SWP=32K\\n00000028 2011-10-20 17:43:49 16545 16559 DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.4 kw/s=16.5 bsy=1 NIC: rxp/s=0.0 rxk/s=0.0 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=2 iow=0 idle=96\\n00000029 2011-10-20 17:44:49 16545 16559 SYS: PU= 3% MU= 8% MAL=255344 MMP=0 SBK=255344 TOT=368K RAM=221920K SWP=32K\\n0000002A 2011-10-20 17:44:49 16545 16559 DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.4 kw/s=16.0 bsy=1 NIC: rxp/s=0.0 rxk/s=0.0 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=1 iow=1 idle=96\\n0000002B 2011-10-20 17:45:49 16545 16559 SYS: PU= 3% MU= 8% MAL=255336 MMP=0 SBK=255336 TOT=368K RAM=221816K SWP=32K\\n0000002C 2011-10-20 17:45:49 16545 16559 DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.2 kw/s=14.8 bsy=0 NIC: rxp/s=0.0 rxk/s=0.0 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=1 iow=0 idle=97\\n0000002D 2011-10-20 17:46:49 16545 16559 SYS: PU= 3% MU= 8% MAL=255344 MMP=0 SBK=255344 TOT=368K RAM=221720K SWP=32K\\n0000002E 2011-10-20 17:46:49 16545 16559 DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.4 kw/s=16.1 bsy=1 NIC: rxp/s=0.0 rxk/s=0.0 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=1 iow=0 idle=97\\n0000002F 2011-10-20 17:47:49 16545 16559 SYS: PU= 3% MU= 8% MAL=255336 MMP=0 SBK=255336 TOT=368K RAM=221752K SWP=32K\\n00000030 2011-10-20 17:47:49 16545 16559 DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.4 kw/s=16.0 bsy=1 NIC: rxp/s=0.0 rxk/s=0.0 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=1 iow=0 idle=97\\n00000031 2011-10-20 17:48:49 16545 16559 SYS: PU= 3% MU= 8% MAL=255344 MMP=0 SBK=255344 TOT=368K RAM=221776K SWP=32K\\n00000032 2011-10-20 17:48:49 16545 16559 DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.2 kw/s=14.6 bsy=1 NIC: rxp/s=0.0 rxk/s=0.0 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=1 iow=1 idle=97\\n00000033 2011-10-20 17:49:49 16545 16559 SYS: PU= 3% MU= 8% MAL=255336 MMP=0 SBK=255336 TOT=368K RAM=221928K SWP=32K\\n00000034 2011-10-20 17:49:49 16545 16559 DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.4 kw/s=16.0 bsy=1 NIC: rxp/s=0.0 rxk/s=0.0 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=1 iow=1 idle=96\\n00000035 2011-10-20 17:50:49 16545 16559 SYS: PU= 3% MU= 8% MAL=255344 MMP=0 SBK=255344 TOT=368K RAM=221952K SWP=32K\\n00000036 2011-10-20 17:50:49 16545 16559 DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.5 kw/s=15.2 bsy=1 NIC: rxp/s=0.0 rxk/s=0.0 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=1 iow=0 idle=97\\n00000037 2011-10-20 17:51:49 16545 16559 SYS: PU= 3% MU= 8% MAL=255336 MMP=0 SBK=255336 TOT=368K RAM=221980K SWP=32K\\n00000038 2011-10-20 17:51:49 16545 16559 DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.4 kw/s=16.3 bsy=1 NIC: rxp/s=0.0 rxk/s=0.0 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=1 iow=0 idle=97\\n00000039 2011-10-20 17:52:49 16545 16559 SYS: PU= 3% MU= 8% MAL=255344 MMP=0 SBK=255344 TOT=368K RAM=222008K SWP=32K\\n0000003A 2011-10-20 17:52:49 16545 16559 DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.4 kw/s=16.0 bsy=1 NIC: rxp/s=0.0 rxk/s=0.0 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=1 iow=0 idle=97\\n0000003B 2011-10-20 17:53:49 16545 16559 SYS: PU= 3% MU= 8% MAL=255336 MMP=0 SBK=255336 TOT=368K RAM=222540K SWP=32K\\n0000003C 2011-10-20 17:53:49 16545 16559 DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.5 kw/s=16.3 bsy=0 NIC: rxp/s=0.0 rxk/s=0.0 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=1 iow=0 idle=97\\n0000003D 2011-10-20 17:54:49 16545 16559 SYS: PU= 3% MU= 8% MAL=255344 MMP=0 SBK=255344 TOT=368K RAM=222556K SWP=32K\\n0000003E 2011-10-20 17:54:49 16545 16559 DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.4 kw/s=16.0 bsy=1 NIC: rxp/s=0.0 rxk/s=0.0 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=1 iow=1 idle=97\\n0000003F 2011-10-20 17:55:49 16545 16559 SYS: PU= 4% MU= 8% MAL=255336 MMP=0 SBK=255336 TOT=368K RAM=222460K SWP=32K\\n00000040 2011-10-20 17:55:49 16545 16559 DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.3 kw/s=16.1 bsy=1 NIC: rxp/s=0.0 rxk/s=0.0 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=1 iow=1 idle=96\\n00000041 2011-10-20 17:56:49 16545 16559 SYS: PU= 6% MU= 8% MAL=255344 MMP=0 SBK=255344 TOT=368K RAM=222364K SWP=32K\\n00000042 2011-10-20 17:56:49 16545 16559 DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.3 kw/s=15.1 bsy=4 NIC: rxp/s=0.0 rxk/s=0.0 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=1 iow=4 idle=94\\n00000043 2011-10-20 17:57:49 16545 16559 SYS: PU= 3% MU= 8% MAL=255336 MMP=0 SBK=255336 TOT=368K RAM=222392K SWP=32K\\n00000044 2011-10-20 17:57:49 16545 16559 DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.3 kw/s=16.0 bsy=0 NIC: rxp/s=0.0 rxk/s=0.0 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=1 iow=0 idle=97\\n00000045 2011-10-20 17:58:49 16545 16559 SYS: PU= 3% MU= 8% MAL=255344 MMP=0 SBK=255344 TOT=368K RAM=222416K SWP=32K\\n00000046 2011-10-20 17:58:49 16545 16559 DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.3 kw/s=15.0 bsy=0 NIC: rxp/s=0.0 rxk/s=0.0 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=1 iow=0 idle=97\\n00000047 2011-10-20 17:59:49 16545 16559 SYS: PU= 4% MU= 8% MAL=255336 MMP=0 SBK=255336 TOT=368K RAM=222564K SWP=32K\\n00000048 2011-10-20 17:59:49 16545 16559 DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.3 kw/s=16.1 bsy=1 NIC: rxp/s=0.0 rxk/s=0.0 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=1 iow=1 idle=96\\n00000049 2011-10-20 18:00:49 16545 16559 SYS: PU= 3% MU= 8% MAL=255344 MMP=0 SBK=255344 TOT=368K RAM=222592K SWP=32K\\n0000004A 2011-10-20 18:00:49 16545 16559 DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.4 kw/s=16.2 bsy=1 NIC: rxp/s=0.0 rxk/s=0.0 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=1 iow=1 idle=96\\n0000004B 2011-10-20 18:01:49 16545 16559 SYS: PU= 3% MU= 8% MAL=255336 MMP=0 SBK=255336 TOT=368K RAM=222492K SWP=32K\\n0000004C 2011-10-20 18:01:49 16545 16559 DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.4 kw/s=15.2 bsy=1 NIC: rxp/s=0.0 rxk/s=0.0 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=1 iow=0 idle=97\\n0000004D 2011-10-20 18:11:13 16545 16559 SYS: PU= 0% MU= 8% MAL=255344 MMP=0 SBK=255344 TOT=368K RAM=222684K SWP=32K\\n0000004E 2011-10-20 18:11:13 16545 16559 DSK: [sda] r/s=0.0 kr/s=0.0 w/s=0.1 kw/s=1.1 bsy=0 NIC: rxp/s=0.0 rxk/s=0.0 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=0 iow=0 idle=99\\n0000004F 2011-10-20 18:12:13 16545 16559 SYS: PU= 15% MU= 8% MAL=255336 MMP=0 SBK=255336 TOT=368K RAM=223740K SWP=32K\\n00000050 2011-10-20 18:12:13 16545 16559 DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.7 kw/s=19.2 bsy=13 NIC: rxp/s=2.9 rxk/s=0.3 txp/s=0.8 txk/s=0.2 CPU: usr=0 sys=2 iow=4 idle=91\\n
\\n\\nThis is the THORSLAVE from the same run.\\n\\n00000000 2011-10-20 17:34:49 16542 16542 Opened log file //192.168.65.128/var/log/HPCCSystems/mythor/10_20_2011_17_34_08_6600/THORSLAVE.192.168.65.128_6600.log\\n00000001 2011-10-20 17:34:49 16542 16542 Build community_3.2.2-1\\n00000002 2011-10-20 17:34:49 16542 16542 calling initClientProcess\\n00000003 2011-10-20 17:34:49 16542 16542 registering 192.168.65.128:6600 - master 192.168.65.128:6500\\n00000004 2011-10-20 17:34:49 16542 16542 Initialization received\\n00000005 2011-10-20 17:34:49 16542 16542 Registration confirmation sent\\n00000006 2011-10-20 17:34:49 16542 16542 verifying mp connection to rest of cluster\\n00000007 2011-10-20 17:34:49 16542 16542 verified mp connection to rest of cluster\\n00000008 2011-10-20 17:34:49 16542 16542 registered 192.168.65.128:6600\\n00000009 2011-10-20 17:34:49 16542 16542 CThorRowManager initialized, memlimit = 2147483648\\n0000000A 2011-10-20 17:34:49 16542 16542 ThorSlave Version LCR - 4.0 started\\n0000000B 2011-10-20 17:34:49 16542 16542 Slave 192.168.65.128:6600 - thor_tmp_dir set to : /var/lib/HPCCSystems/mythor/temp/\\n0000000C 2011-10-20 17:34:49 16542 16542 Using querySo directory: /var/lib/HPCCSystems/queries/mythor\\n0000000D 2011-10-20 17:34:49 16542 16542 FileCache: limit = 1800, purgeN = 10\\n0000000E 2011-10-20 17:34:49 16542 16562 Watchdog: thread running\\n
\\n\\nI hope this helps.\\n\\nYours\\nAllan\", \"post_time\": \"2011-10-20 18:17:23\" },\n\t{ \"post_id\": 504, \"topic_id\": 139, \"forum_id\": 8, \"post_subject\": \"Re: Inexplicable error when submitting a job to thor.\", \"username\": \"Allan\", \"post_text\": \"reply to dabayliss,\\n\\nSure I expect there are other ways to kill a cat.\\n\\nBut I'm attempting to learn the ECL and need to understand errors when they occur.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-10-20 17:30:19\" },\n\t{ \"post_id\": 503, \"topic_id\": 139, \"forum_id\": 8, \"post_subject\": \"Re: Inexplicable error when submitting a job to thor.\", \"username\": \"jsmith\", \"post_text\": \"You'll need access to the file system to get to, there are not links to them in the IDE/workunit...\\n\\nThey'll be on the thor cluster nodes, under /var/log/HPCCSystems/mythor/<logdir*>\\nThe logdir prefix is visible under Helpers, i.e. it is part of the link name, that takes you to the master log.\\nYou'll need to note that down and login to the node that's hosting the thor slave(s) and get the logs that way.\\n\\nHope that helps.\", \"post_time\": \"2011-10-20 15:39:02\" },\n\t{ \"post_id\": 501, \"topic_id\": 139, \"forum_id\": 8, \"post_subject\": \"Re: Inexplicable error when submitting a job to thor.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nThis is the thormaster log from the failed run.\\n\\nI cannot find any slave files.\\n\\n00000026 2011-10-19 15:14:23 14243 14243 Started wuid=W20111019-151422, user=hpccdemo, graph=graph1\\n**\\n00000027 2011-10-19 15:14:23 14243 14243 Query /var/lib/HPCCSystems/queries/mythor/V2664623750_libW20111019-151422.so loaded\\n00000028 2011-10-19 15:14:23 14243 14243 allocateMPTag: tag = 65537\\n00000029 2011-10-19 15:14:23 14243 14243 allocateMPTag: tag = 65538\\n0000002A 2011-10-19 15:14:23 14243 14243 allocateMPTag: tag = 65539\\n0000002B 2011-10-19 15:14:23 14243 14243 allocateMPTag: tag = 65540\\n0000002C 2011-10-19 15:14:23 14243 14243 allocateMPTag: tag = 65541\\n0000002D 2011-10-19 15:14:23 14243 14243 Graph graph1 created\\n0000002E 2011-10-19 15:14:23 14243 14243 Running graph=graph1\\n0000002F 2011-10-19 15:14:23 14243 14243 temp directory cleared\\n00000030 2011-10-19 15:14:23 14243 14243 Add: Launching graph thread for graphId=1\\n00000031 2011-10-19 15:14:23 14243 14464 Running graph [global] : <graph>\\n <node id="2" label="Csv Read">\\n <att name="definition" value="Examples\\\\ExProject.ecl(12,37)"/>\\n <att name="_kind" value="99"/>\\n <att name="ecl" value="DATASET(INTERNAL('gl4'), layout_book, CSV(header(4), separator(''))); "/>\\n <att name="recordSize" value="4..4096(260)"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n </node>\\n <node id="3" label="Csv Read">\\n <att name="definition" value="Examples\\\\ExProject.ecl(13,37)"/>\\n <att name="_kind" value="99"/>\\n <att name="ecl" value="DATASET(INTERNAL('gl6'), layout_book, CSV(header(4), separator(''))); "/>\\n <att name="recordSize" value="4..4096(260)"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n </node>\\n <node id="4" label="Csv Read">\\n <att name="definition" value="Examples\\\\ExProject.ecl(14,19)"/>\\n <att name="_kind" value="99"/>\\n <att name="ecl" value="DATASET(INTERNAL('gl8'), layout_book, CSV(header(4), separator(''))); "/>\\n <att name="recordSize" value="4..4096(260)"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n </node>\\n <node id="5" label="Csv Read">\\n <att name="definition" value="Examples\\\\ExProject.ecl(15,19)"/>\\n <att name="_kind" value="99"/>\\n <att name="ecl" value="DATASET(INTERNAL('glA'), layout_book, CSV(header(4), separator(''))); "/>\\n <att name="recordSize" value="4..4096(260)"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n </node>\\n <node id="6" label="Select Nway Input">\\n <att name="definition" value="Examples\\\\ExProject.ecl(17,8)"/>\\n <att name="_kind" value="137"/>\\n <att name="ecl" value="no_rowsetindex(raw, 2); "/>\\n <att name="recordSize" value="4..4096(260)"/>\\n <att name="recordCount" value="0..?[memory]"/>\\n </node>\\n <node id="7" label="Firstn">\\n <att name="_kind" value="12"/>\\n <att name="ecl" value="CHOOSEN(999); "/>\\n <att name="recordSize" value="4..4096(260)"/>\\n <att name="recordCount" value="0..999[group]"/>\\n </node>\\n <node id="8" label="Output Result #1">\\n <att name="definition" value="Examples\\\\ExProject.ecl(1,1)"/>\\n <att name="name" value="exproject"/>\\n <att name="definition" value="Examples\\\\ExProject.ecl(17,1)"/>\\n <att name="_kind" value="21"/>\\n <att name="ecl" value="OUTPUT(..., workunit); "/>\\n <att name="recordSize" value="4..4096(260)"/>\\n </node>\\n <att name="rootGraph" value="1"/>\\n <edge id="2_0" source="2" target="6"/>\\n <edge id="3_0" source="3" target="6">\\n <att name="_targetIndex" value="1"/>\\n </edge>\\n <edge id="4_0" source="4" target="6">\\n <att name="_targetIndex" value="2"/>\\n </edge>\\n <edge id="5_0" source="5" target="6">\\n <att name="_targetIndex" value="3"/>\\n </edge>\\n <edge id="6_0" source="6" target="7"/>\\n <edge id="7_0" source="7" target="8"/>\\n </graph>\\n - graph(graph1, 1)\\n00000032 2011-10-19 15:14:23 14243 14464 getResultString(gl4,-3)\\n00000033 2011-10-19 15:14:23 14243 14464 ,FileAccess,Thor,READ,mythor,hpccdemo,thor::niv::genesis,W20111019-151422,graph1,207327,1,mythor\\n00000034 2011-10-19 15:14:23 14243 14464 getResultString(gl6,-3)\\n00000035 2011-10-19 15:14:23 14243 14464 ,FileAccess,Thor,READ,mythor,hpccdemo,thor::niv::exodus,W20111019-151422,graph1,177564,1,mythor\\n00000036 2011-10-19 15:14:23 14243 14464 getResultString(gl8,-3)\\n00000037 2011-10-19 15:14:23 14243 14464 ,FileAccess,Thor,READ,mythor,hpccdemo,thor::niv::levit,W20111019-151422,graph1,132582,1,mythor\\n00000038 2011-10-19 15:14:23 14243 14464 getResultString(glA,-3)\\n00000039 2011-10-19 15:14:23 14243 14464 ,FileAccess,Thor,READ,mythor,hpccdemo,thor::niv::numbers,W20111019-151422,graph1,184240,1,mythor\\n0000003A 2011-10-19 15:14:23 14243 14464 CONNECTING (id=2, idx=0) to (id=6, idx=0) - activity(nwayselect, 6)\\n0000003B 2011-10-19 15:14:23 14243 14464 CONNECTING (id=3, idx=0) to (id=6, idx=1) - activity(nwayselect, 6)\\n0000003C 2011-10-19 15:14:23 14243 14464 CONNECTING (id=4, idx=0) to (id=6, idx=2) - activity(nwayselect, 6)\\n0000003D 2011-10-19 15:14:23 14243 14464 CONNECTING (id=5, idx=0) to (id=6, idx=3) - activity(nwayselect, 6)\\n0000003E 2011-10-19 15:14:23 14243 14464 allocateMPTag: tag = 65542\\n0000003F 2011-10-19 15:14:23 14243 14464 CONNECTING (id=6, idx=0) to (id=7, idx=0) - activity(firstn, 7)\\n00000040 2011-10-19 15:14:23 14243 14464 allocateMPTag: tag = 65543\\n00000041 2011-10-19 15:14:23 14243 14464 CONNECTING (id=7, idx=0) to (id=8, idx=0) - activity(workunitwrite, 8)\\n00000042 2011-10-19 15:14:23 14243 14464 Query dll: /var/lib/HPCCSystems/queries/mythor/V2664623750_libW20111019-151422.so\\n00000043 2011-10-19 15:14:23 14243 14464 ,Progress,Thor,StartSubgraph,mythor,W20111019-151422,1,1,mythor,mythor.thor\\n00000044 2011-10-19 15:14:23 14243 14464 allocateMPTag: tag = 65544\\n00000045 2011-10-19 15:14:23 14243 14464 sendGraph took 5 ms - graph(graph1, 1)\\n00000046 2011-10-19 15:14:23 14243 14464 Processing graph - graph(graph1, 1)\\n00000047 2011-10-19 15:14:23 14243 14471 activity(firstn, 7) : Graph[1], firstn[7]: MP link closed (192.168.65.128:6600), Master exception\\n00000048 2011-10-19 15:14:23 14243 14469 activity(workunitwrite, 8) : Graph[1], workunitwrite[8]: MP link closed (192.168.65.128:6600), Master exception\\n00000049 2011-10-19 15:14:23 14243 14469 4: Graph[1], workunitwrite[8]: MP link closed (192.168.65.128:6600), Master exception\\n0000004A 2011-10-19 15:14:23 14243 14469 INFORM [EXCEPTION]\\n0000004B 2011-10-19 15:14:23 14243 14469 4: Graph[1], workunitwrite[8]: MP link closed (192.168.65.128:6600), Master exception\\n0000004C 2011-10-19 15:14:23 14243 14469 Posting exception: Graph[1], workunitwrite[8]: MP link closed (192.168.65.128:6600), Master exception to agent 192.168.65.128 for workunit(W20111019-151422)\\n0000004D 2011-10-19 15:14:23 14243 14469 INFORM [EXCEPTION]\\n0000004E 2011-10-19 15:14:24 14243 14469 Abort condition set - activity(workunitwrite, 8)\\n0000004F 2011-10-19 15:14:24 14243 14469 Abort condition set - activity(firstn, 7)\\n00000050 2011-10-19 15:14:24 14243 14469 Abort condition set - activity(nwayselect, 6)\\n00000051 2011-10-19 15:14:24 14243 14469 Abort condition set - activity(csvread, 2)\\n00000052 2011-10-19 15:14:24 14243 14469 Abort condition set - activity(csvread, 5)\\n00000053 2011-10-19 15:14:24 14243 14469 Abort condition set - activity(csvread, 4)\\n00000054 2011-10-19 15:14:24 14243 14469 Abort condition set - activity(csvread, 3)\\n00000055 2011-10-19 15:14:24 14243 14469 Aborting master graph - graph(graph1, 1) : MP link closed (192.168.65.128:6600)\\n00000056 2011-10-19 15:14:25 14243 14469 Aborting slave graph - graph(graph1, 1) : MP link closed (192.168.65.128:6600)\\n00000057 2011-10-19 15:14:25 14243 14469 4: Reporting exception to WU : 4, Graph[1], workunitwrite[8]: MP link closed (192.168.65.128:6600), Master exception : Error aborting job, will cause thor restart\\n00000058 2011-10-19 15:14:25 14243 14469 Stopping jobManager\\n00000059 2011-10-19 15:14:25 14243 14471 4: Graph[1], firstn[7]: MP link closed (192.168.65.128:6600), Master exception\\n0000005A 2011-10-19 15:14:25 14243 14471 INFORM [EXCEPTION]\\n0000005B 2011-10-19 15:14:25 14243 14471 4: Graph[1], firstn[7]: MP link closed (192.168.65.128:6600), Master exception\\n0000005C 2011-10-19 15:14:25 14243 14471 INFORM [EXCEPTION]\\n0000005D 2011-10-19 15:14:28 14243 14257 SYS: PU= 6% MU= 7% MAL=304032 MMP=0 SBK=304032 TOT=556K RAM=189552K SWP=32K\\n0000005E 2011-10-19 15:14:28 14243 14257 DSK: [sda] r/s=0.0 kr/s=0.0 w/s=3.7 kw/s=59.4 bsy=0 NIC: rxp/s=2.4 rxk/s=1.0 txp/s=3.0 txk/s=0.8 CPU: usr=0 sys=4 iow=0 idle=95\\n0000005F 2011-10-19 15:14:28 14243 14257 KERN_INFO: [68519.077420] thorslave_6600[14470]: segfault at 4a8d0855 ip 009f7e68 sp 024e0ff0 error 4 in libactivityslaves_lcr.so[8c6000+171000]\\n00000060 2011-10-19 15:14:53 14243 14243 Waiting on executing graphs to complete.\\n00000061 2011-10-19 15:14:53 14243 14243 Currently running graphId = 1\\n00000062 2011-10-19 15:15:23 14243 14243 Waiting on executing graphs to complete.\\n00000063 2011-10-19 15:15:23 14243 14243 Currently running graphId = 1\\n00000064 2011-10-19 15:15:23 14243 14465 4: /var/jenkins/workspace/Release-3.2.2/src/thorlcr/graph/thgraphmaster.cpp(73) : FAILED TO RECOVER FROM EXCEPTION, STOPPING THOR : Graph[1], workunitwrite[8]: MP link closed (192.168.65.128:6600), Master exception\\n00000065 2011-10-19 15:15:23 14243 14461 4: /var/jenkins/workspace/Release-3.2.2/src/thorlcr/graph/thgraphmaster.cpp(73) : FAILED TO RECOVER FROM EXCEPTION, STOPPING THOR : Graph[1], workunitwrite[8]: MP link closed (192.168.65.128:6600), Master exception\\n00000066 2011-10-19 15:15:23 14243 14465 ,Timing,ThorGraph,mythor,W20111019-151422,1,1,1,60261,FAILED,mythor,mythor.thor\\n00000067 2011-10-19 15:15:23 14243 14461 ,Progress,Thor,Terminate,mythor,mythor,mythor.thor,exception\\n
\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-10-20 14:47:55\" },\n\t{ \"post_id\": 496, \"topic_id\": 139, \"forum_id\": 8, \"post_subject\": \"Re: Inexplicable error when submitting a job to thor.\", \"username\": \"jsmith\", \"post_text\": \"There's also a implicit super file format, e.g.:\\n\\nd := DATASET(base+'{'+SetBooks[1]+','+SetBooks[2]+','+SetBooks[3]+','+SetBooks[4]+'}', Layout_Book,CSV(HEADING(4),SEPARATOR('')));\\n\\n\\n>what and where are the 'slave' logs?\\n\\nFor Thor, there's a master log + a slave log per thor node in the cluster.\\nThe path to the master log is listed in the workunit under Helpers, the slave logs will have a very similar path on each thor node.\\ne.g. master log :\\n//192.168.16.101/var/log/HPCCSystems/mythor/10_18_2011_17_00_30/THORMASTER.log\\n\\nslave logs will be, e.g.:\\n//192.168.16.101/var/log/HPCCSystems/mythor/10_18_2011_17_00_30_6600/THORSLAVE.192.168.16.101_6600.log\", \"post_time\": \"2011-10-20 00:25:48\" },\n\t{ \"post_id\": 495, \"topic_id\": 139, \"forum_id\": 8, \"post_subject\": \"Re: Inexplicable error when submitting a job to thor.\", \"username\": \"dabayliss\", \"post_text\": \"Are they in the same format?\\nIf so join them into the same superfile - then you can read them as one file ...\\n\\nDavid\", \"post_time\": \"2011-10-19 23:52:46\" },\n\t{ \"post_id\": 494, \"topic_id\": 139, \"forum_id\": 8, \"post_subject\": \"Re: Inexplicable error when submitting a job to thor.\", \"username\": \"bforeman\", \"post_text\": \">>>Err - what and where are the 'slave' logs?\\n\\nJSmith is looking for the slave log(s), which are only available (as far as I know) by going to the appropriate cluster, selecting the appropriate slave, then clicking on the disk icon. \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2011-10-19 19:51:25\" },\n\t{ \"post_id\": 491, \"topic_id\": 139, \"forum_id\": 8, \"post_subject\": \"Re: Inexplicable error when submitting a job to thor.\", \"username\": \"Allan\", \"post_text\": \"Hi Jsmith,\\n\\nThe:\\n\\n\\n OUTPUT( DATASET(base+SetBooks[2],Layout_Book,CSV(HEADING(4),SEPARATOR('')) ));\\n
\\ninstead of:\\n\\nOUTPUT(Raw[2]);\\n
\\n\\nWorks fine.\\n\\nErr - what and where are the 'slave' logs?\\nYours\\nAllan\", \"post_time\": \"2011-10-19 18:27:46\" },\n\t{ \"post_id\": 485, \"topic_id\": 139, \"forum_id\": 8, \"post_subject\": \"Re: Inexplicable error when submitting a job to thor.\", \"username\": \"jsmith\", \"post_text\": \"Hi,\\n\\nOUTPUT(Raw[2]);\\n\\ndoes it also fail if you read 'exodus' directly, instead of via Raw[2] ?\\ni.e. OUTPUT( DATASET(base+SetBooks[2],Layout_Book,CSV(HEADING(4),SEPARATOR('')) ))\\n\\nI may need to see the slave logs, could you post them here?\", \"post_time\": \"2011-10-19 17:00:24\" },\n\t{ \"post_id\": 484, \"topic_id\": 139, \"forum_id\": 8, \"post_subject\": \"Inexplicable error when submitting a job to thor.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI've been attempting to use multiple input files.\\nI have 4 sprayed in logical files:\\n~thor::niv::genesis\\n~thor::niv::exodus\\n~thor::niv::levit\\n~thor::niv::numbers\\n\\nThe code below passes the syntax checker but fails once submitted to thor.\\n\\nIMPORT * from STD.str;\\nIMPORT * from STD;\\n\\nLayout_Book := RECORD\\n STRING Text;\\nEND;\\n\\nbase:= IF(System.Job.platform()='standalone', '', '~thor::niv::') : GLOBAL;\\nSetBooks := ['genesis', 'exodus', 'levit','numbers'];\\n\\nSET OF DATASET(Layout_Book) Raw := [DATASET(base+SetBooks[1],Layout_Book,CSV(HEADING(4),SEPARATOR(''))),\\n DATASET(base+SetBooks[2],Layout_Book,CSV(HEADING(4),SEPARATOR(''))),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tDATASET(base+SetBooks[3],Layout_Book,CSV(HEADING(4),SEPARATOR(''))),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tDATASET(base+SetBooks[4],Layout_Book,CSV(HEADING(4),SEPARATOR('')))];\\nOUTPUT(Raw[2]);\\n\\n/*\\nRaw1 := DATASET(base+SetBooks[1],Layout_Book,CSV(HEADING(4),SEPARATOR('')));\\nOutput(Raw1);\\nRaw2 := DATASET(base+SetBooks[2],Layout_Book,CSV(HEADING(4),SEPARATOR('')));\\nOutput(Raw2);\\nRaw3 := DATASET(base+SetBooks[3],Layout_Book,CSV(HEADING(4),SEPARATOR('')));\\nOutput(Raw3);\\nRaw4 := DATASET(base+SetBooks[4],Layout_Book,CSV(HEADING(4),SEPARATOR('')));\\nOutput(Raw4);\\n*/\\n
\\n\\nThe eclagent.log file is:\\n\\n00000000 2011-10-19 15:14:23 14448 14448 ECLAGENT build community_3.2.2-1\\n00000001 2011-10-19 15:14:23 14448 14448 Waiting for workunit lock\\n00000002 2011-10-19 15:14:23 14448 14448 Obtained workunit lock\\n00000003 2011-10-19 15:14:23 14448 14448 Loading dll (libW20111019-151422.so) from location /var/lib/HPCCSystems/myeclccserver/libW20111019-151422.so\\n00000004 2011-10-19 15:14:23 14448 14448 Starting process\\n00000005 2011-10-19 15:14:23 14448 14448 RoxieMemMgr: Setting memory limit to 314572800 bytes (300 pages)\\n00000006 2011-10-19 15:14:23 14448 14448 RoxieMemMgr: 320 Pages successfully allocated for the pool - memsize=335544320 base=0x9d800000 alignment=1048576 bitmapSize=10\\n00000007 2011-10-19 15:14:23 14448 14448 Waiting for run lock\\n00000008 2011-10-19 15:14:23 14448 14448 Obtained run lock\\n00000009 2011-10-19 15:14:23 14448 14448 setResultString(gl2,-3,'~thor::niv::')\\n0000000A 2011-10-19 15:14:23 14448 14448 setResultString(gl4,-3,'~thor::niv::genesis')\\n0000000B 2011-10-19 15:14:23 14448 14448 setResultString(gl6,-3,'~thor::niv::exodus')\\n0000000C 2011-10-19 15:14:23 14448 14448 setResultString(gl8,-3,'~thor::niv::levit')\\n0000000D 2011-10-19 15:14:23 14448 14448 setResultString(glA,-3,'~thor::niv::numbers')\\n0000000E 2011-10-19 15:14:23 14448 14448 Enqueuing on thor.thor to run wuid=W20111019-151422, graph=graph1, timelimit=600 seconds, priority=0\\n0000000F 2011-10-19 15:14:23 14448 14448 Thor on 192.168.65.128:6500 running W20111019-151422\\n00000010 2011-10-19 15:14:23 14448 14448 ERROR: 4: Graph[1], workunitwrite[8]: MP link closed (192.168.65.128:6600), Master exception (in item 1)\\n00000011 2011-10-19 15:14:23 14448 14448 Releasing run lock\\n00000012 2011-10-19 15:14:23 14448 14448 System error: 4: Graph[1], workunitwrite[8]: MP link closed (192.168.65.128:6600), Master exception\\n00000013 2011-10-19 15:14:23 14448 14448 4: System error: 4: Graph[1], workunitwrite[8]: MP link closed (192.168.65.128:6600), Master exception\\n00000014 2011-10-19 15:14:23 14448 14448 Process complete\\n00000015 2011-10-19 15:14:23 14448 14448 Workunit written complete\\n
\\n\\nI also have the 'thormaster' log if required but it seems to say much the same thing.\\n\\nI know the files themselves, and references to them, are fine, because if I uncomment out the currently commented code (and comment the earlier code) the submitted workunit works fine.\\n\\nBy the way if anyone could give a better way to load multiple datasets into one set\\n(by called a TRANSFORM from a PROJECT I expect) I would be very greatful.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-10-19 15:35:56\" },\n\t{ \"post_id\": 532, \"topic_id\": 142, \"forum_id\": 8, \"post_subject\": \"Re: watchdog has lost contact with Thor slave\", \"username\": \"nvasil\", \"post_text\": \"So I did try the most recent version HPCC Platform v03.2.2-1 and I opened the UDP port you suggested and it worked. \\n\\nThanks very much\", \"post_time\": \"2011-10-25 01:40:38\" },\n\t{ \"post_id\": 531, \"topic_id\": 142, \"forum_id\": 8, \"post_subject\": \"Re: watchdog has lost contact with Thor slave\", \"username\": \"nvasil\", \"post_text\": \"I would doubt it is a firewall issue. I am running the system on EC2 following instructions from HPCC. Some queries run fine for example \\nif rdata:=rdata1+rdata2+rdata3 it runs fine. I can run many queries (simple ones) on the cluster. When they become more data intensive it fails\\nInteresting enough if I add more things to be done it doesn't run. Also did you notice that the process on the slave actually dies because of a JMALLOC error? \\n\\nPeople from HPCC have reassured me that they run everything fine on the cloud so it cannot be a cloud issue.\", \"post_time\": \"2011-10-24 23:09:54\" },\n\t{ \"post_id\": 530, \"topic_id\": 142, \"forum_id\": 8, \"post_subject\": \"Re: watchdog has lost contact with Thor slave\", \"username\": \"jsmith\", \"post_text\": \"Thanks, this master log shows no jobs have run, but still lost contact with the slave nodes:\\n\\n0000002F 2011-10-24 15:35:06 31976 31990 Watchdog : Marking Machine as Down! [10.222.195.159:6600]\\n00000030 2011-10-24 15:35:06 31976 31990 Watchdog : Marking Machine as Down! [10.93.95.117:6600]\\n\\nThese message will be first reported after ~10 minutes, if there have been no wathdog messages from the slaves.\\n15:35:06 is precisely 10 minutes after Thor started.\\nSo it looks like the master is not receiving any watchdog messages from the slaves, at least in this run.\\n\\nCould it be that the watchdog messages are being blocked by a firewall issue?\\nWatchdog messages are UDP packets and will be on port 6606 by default.\\n\\nThe slave logs won't have much in them during this period if they're not running a job, but assuming the problem persists, it would be interesting to check the slave processes during the 1st 10-15 minutes of a thor start.\\nIf the jobs are otherwise processing normally, until the 10 minute mark when the 'Watchdog : Marking Machine as Down!' will appear in the master log, followed 5 minutes later, by the master reporting 'Watchdog has lost contact with Thor slave..' and restarting.\\nThen it definitively suggests, the udp packets are being blocked.\\n\\nHope that helps.\", \"post_time\": \"2011-10-24 22:46:25\" },\n\t{ \"post_id\": 523, \"topic_id\": 142, \"forum_id\": 8, \"post_subject\": \"Re: watchdog has lost contact with Thor slave\", \"username\": \"nvasil\", \"post_text\": \"This is the log of the slave who failed, there is clearly a JMALLOC error. Here is the log from the master.\\n\\n00000000 2011-10-24 15:25:06 31976 31976 Opened log file //10.90.213.34/mnt/HPCCSystems/mythor/10_24_2011_15_25_05/THORM\\nASTER.log\\n00000001 2011-10-24 15:25:06 31976 31976 Build community_3.2.0-2\\n00000002 2011-10-24 15:25:06 31976 31976 calling initClientProcess Port 6500\\n00000003 2011-10-24 15:25:06 31976 31976 Checking cluster replicate nodes\\n00000004 2011-10-24 15:25:06 31976 31976 Cluster replicate nodes check completed in 2ms\\n00000005 2011-10-24 15:25:06 31976 31976 Starting watchdog\\n00000006 2011-10-24 15:25:06 31976 31990 Started watchdog\\n00000007 2011-10-24 15:25:06 31976 31976 ThorMaster version 4.0, Started on 10.90.213.34:6500\\n00000008 2011-10-24 15:25:06 31976 31976 CThorRowManager initialized, memlimit = 2147483648\\n00000009 2011-10-24 15:25:06 31976 31976 Thor name = mythor, queue = mythor.thor, nodeGroup = mythor\\n0000000A 2011-10-24 15:25:06 31976 31976 Creating sentinel file thor.sentinel for rerun from script\\n0000000B 2011-10-24 15:25:06 31976 31976 Waiting for 2 slaves to register\\n0000000C 2011-10-24 15:25:06 31976 31976 Verifying connection to slave 2\\n0000000D 2011-10-24 15:25:06 31976 31976 verified connection with 10.222.195.159:6600\\n0000000E 2011-10-24 15:25:06 31976 31976 Verifying connection to slave 1\\n0000000F 2011-10-24 15:25:06 31976 31976 verified connection with 10.93.95.117:6600\\n00000010 2011-10-24 15:25:06 31976 31976 Slaves connected, initializing..\\n00000011 2011-10-24 15:25:06 31976 31976 Initialization sent to slave group\\n00000012 2011-10-24 15:25:06 31976 31976 Registration confirmation from 10.93.95.117:6600\\n00000013 2011-10-24 15:25:06 31976 31976 Slave 1 (10.93.95.117:6600) registered\\n00000014 2011-10-24 15:25:06 31976 31976 Registration confirmation from 10.222.195.159:6600\\n00000015 2011-10-24 15:25:06 31976 31976 Slave 2 (10.222.195.159:6600) registered\\n00000016 2011-10-24 15:25:06 31976 31976 Slaves initialized\\n00000017 2011-10-24 15:25:06 31976 31976 verifying mp connection to rest of cluster\\n00000018 2011-10-24 15:25:06 31976 31976 verified mp connection to rest of cluster\\n00000019 2011-10-24 15:25:06 31976 31976 ,Progress,Thor,Startup,mythor,mythor,mythor.thor,//10.90.213.34/mnt/HPCCSystems\\n/mythor/10_24_2011_15_25_05/THORMASTER.log\\n0000001A 2011-10-24 15:25:06 31976 31976 Listening for graph\\n0000001B 2011-10-24 15:25:06 31976 31976 ThorLCR(10.90.213.34:6500) available, waiting on queue thor.thor\\n0000001C 2011-10-24 15:26:06 31976 31989 SYS: PU= 0% MU= 4% MAL=220800 MMP=0 SBK=220800 TOT=264K RAM=673472K SWP=0K\\n0000001D 2011-10-24 15:27:06 31976 31989 SYS: PU= 0% MU= 4% MAL=237328 MMP=0 SBK=237328 TOT=264K RAM=670156K SWP=0K\\n0000001E 2011-10-24 15:27:06 31976 31989 DSK: [sdb] r/s=0.0 kr/s=0.0 w/s=2.4 kw/s=9.7 bsy=0 NIC: rxp/s=18.4 rxk/s=2.5 tx\\np/s=16.2 txk/s=2.5 CPU: usr=0 sys=0 iow=0 idle=99\\n0000001F 2011-10-24 15:28:06 31976 31989 SYS: PU= 0% MU= 4% MAL=237328 MMP=0 SBK=237328 TOT=264K RAM=674784K SWP=0K\\n00000020 2011-10-24 15:28:06 31976 31989 DSK: [sdb] r/s=0.0 kr/s=0.0 w/s=2.5 kw/s=9.8 bsy=0 NIC: rxp/s=25.9 rxk/s=3.3 tx\\np/s=21.5 txk/s=9.9 CPU: usr=0 sys=0 iow=0 idle=99\\n00000021 2011-10-24 15:29:06 31976 31989 SYS: PU= 0% MU= 4% MAL=237328 MMP=0 SBK=237328 TOT=264K RAM=675024K SWP=0K\\n00000022 2011-10-24 15:29:06 31976 31989 DSK: [sdb] r/s=0.0 kr/s=0.0 w/s=2.7 kw/s=10.7 bsy=0 NIC: rxp/s=17.9 rxk/s=2.5 t\\nxp/s=15.9 txk/s=2.5 CPU: usr=0 sys=0 iow=0 idle=99\\n00000023 2011-10-24 15:30:06 31976 31989 SYS: PU= 0% MU= 4% MAL=237328 MMP=0 SBK=237328 TOT=264K RAM=675256K SWP=0K\\n00000024 2011-10-24 15:30:06 31976 31989 DSK: [sdb] r/s=0.0 kr/s=0.0 w/s=2.3 kw/s=9.3 bsy=0 NIC: rxp/s=17.9 rxk/s=2.5 tx\\np/s=15.9 txk/s=2.5 CPU: usr=0 sys=0 iow=0 idle=100\\n00000025 2011-10-24 15:31:06 31976 31989 SYS: PU= 0% MU= 4% MAL=237328 MMP=0 SBK=237328 TOT=264K RAM=675496K SWP=0K\\n00000026 2011-10-24 15:31:06 31976 31989 DSK: [sdb] r/s=0.0 kr/s=0.0 w/s=2.5 kw/s=10.0 bsy=0 NIC: rxp/s=18.3 rxk/s=2.5 t\\nxp/s=16.2 txk/s=2.5 CPU: usr=0 sys=0 iow=0 idle=99\\n00000027 2011-10-24 15:32:06 31976 31989 SYS: PU= 0% MU= 4% MAL=237328 MMP=0 SBK=237328 TOT=264K RAM=675432K SWP=0K\\n00000028 2011-10-24 15:32:06 31976 31989 DSK: [sdb] r/s=0.0 kr/s=0.0 w/s=2.6 kw/s=10.5 bsy=0 NIC: rxp/s=17.9 rxk/s=2.5 t\\nxp/s=15.9 txk/s=2.5 CPU: usr=0 sys=0 iow=0 idle=99\\n00000029 2011-10-24 15:33:06 31976 31989 SYS: PU= 0% MU= 4% MAL=237328 MMP=0 SBK=237328 TOT=264K RAM=675512K SWP=0K\\n0000002A 2011-10-24 15:33:06 31976 31989 DSK: [sdb] r/s=0.0 kr/s=0.0 w/s=2.3 kw/s=9.4 bsy=0 NIC: rxp/s=17.9 rxk/s=2.5 tx\\np/s=15.9 txk/s=2.5 CPU: usr=0 sys=0 iow=0 idle=99\\n0000002B 2011-10-24 15:34:06 31976 31989 SYS: PU= 0% MU= 4% MAL=237328 MMP=0 SBK=237328 TOT=264K RAM=675444K SWP=0K\\n0000002C 2011-10-24 15:34:06 31976 31989 DSK: [sdb] r/s=0.0 kr/s=0.1 w/s=2.6 kw/s=10.3 bsy=0 NIC: rxp/s=18.1 rxk/s=2.5 t\\nxp/s=16.1 txk/s=2.5 CPU: usr=0 sys=0 iow=0 idle=99\\n0000002D 2011-10-24 15:35:06 31976 31989 SYS: PU= 0% MU= 4% MAL=237328 MMP=0 SBK=237328 TOT=264K RAM=675368K SWP=0K\\n0000002E 2011-10-24 15:35:06 31976 31989 DSK: [sdb] r/s=0.0 kr/s=0.0 w/s=2.4 kw/s=9.7 bsy=0 NIC: rxp/s=17.9 rxk/s=2.5 tx\\np/s=15.9 txk/s=2.5 CPU: usr=0 sys=0 iow=0 idle=99\\n0000002F 2011-10-24 15:35:06 31976 31990 Watchdog : Marking Machine as Down! [10.222.195.159:6600]\\n00000030 2011-10-24 15:35:06 31976 31990 Watchdog : Marking Machine as Down! [10.93.95.117:6600]\\n00000031 2011-10-24 15:36:06 31976 31989 SYS: PU= 0% MU= 4% MAL=235120 MMP=0 SBK=235120 TOT=264K RAM=675768K SWP=0K\\n00000032 2011-10-24 15:36:06 31976 31989 DSK: [sdb] r/s=0.0 kr/s=0.0 w/s=2.6 kw/s=10.2 bsy=0 NIC: rxp/s=18.1 rxk/s=2.5 t\\nxp/s=16.0 txk/s=2.5 CPU: usr=0 sys=0 iow=0 idle=99\\n00000033 2011-10-24 15:37:06 31976 31989 SYS: PU= 0% MU= 4% MAL=237328 MMP=0 SBK=237328 TOT=264K RAM=675856K SWP=0K\\n00000034 2011-10-24 15:37:06 31976 31989 DSK: [sdb] r/s=0.0 kr/s=0.0 w/s=2.4 kw/s=9.5 bsy=0 NIC: rxp/s=18.1 rxk/s=2.5 tx\\np/s=16.1 txk/s=2.5 CPU: usr=0 sys=0 iow=0 idle=99\\n00000035 2011-10-24 15:38:06 31976 31989 SYS: PU= 0% MU= 4% MAL=237328 MMP=0 SBK=237328 TOT=264K RAM=675788K SWP=0K\\n00000036 2011-10-24 15:38:06 31976 31989 DSK: [sdb] r/s=0.0 kr/s=0.0 w/s=2.6 kw/s=10.4 bsy=0 NIC: rxp/s=17.8 rxk/s=2.4 t\\nxp/s=15.8 txk/s=2.5 CPU: usr=0 sys=0 iow=0 idle=99\\n00000037 2011-10-24 15:39:06 31976 31989 SYS: PU= 0% MU= 4% MAL=237328 MMP=0 SBK=237328 TOT=264K RAM=675876K SWP=0K\\n00000038 2011-10-24 15:39:06 31976 31989 DSK: [sdb] r/s=0.0 kr/s=0.0 w/s=2.5 kw/s=10.0 bsy=0 NIC: rxp/s=18.2 rxk/s=2.5 t\\nxp/s=16.2 txk/s=2.5 CPU: usr=0 sys=0 iow=0 idle=99\\n00000039 2011-10-24 15:40:06 31976 31989 SYS: PU= 0% MU= 4% MAL=237328 MMP=0 SBK=237328 TOT=264K RAM=676116K SWP=0K\\n0000003A 2011-10-24 15:40:06 31976 31989 DSK: [sdb] r/s=0.0 kr/s=0.0 w/s=2.4 kw/s=9.5 bsy=0 NIC: rxp/s=17.9 rxk/s=2.5 tx\\np/s=15.9 txk/s=2.5 CPU: usr=0 sys=0 iow=0 idle=99\\n0000003B 2011-10-24 15:40:06 31976 31990 10056: /var/jenkins/workspace/Release-3.2.0/src/thorlcr/master/thgraphmanager.c\\npp(786) : abortThor : Watchdog has lost contact with Thor slave: 10.222.195.159:6600 (Process terminated or node down?)\\n0000003C 2011-10-24 15:40:06 31976 31990 abortThor called\\n0000003D 2011-10-24 15:40:06 31976 31990 Stopping jobManager\\n0000003E 2011-10-24 15:40:06 31976 31990 aborting any current active job\\n0000003F 2011-10-24 15:40:06 31976 31976 acceptConversation aborted - terminating\\n00000040 2011-10-24 15:40:06 31976 31976 ,Progress,Thor,Terminate,mythor,mythor,mythor.thor\\n00000041 2011-10-24 15:40:06 31976 31976 ThorMaster terminated OK\\n00000042 2011-10-24 15:40:07 31976 31976 priority set id=140408941676288 policy=0 pri=0 PID=31976\\n00000043 2011-10-24 15:40:07 31976 31976 Stopping watchdog\\n00000044 2011-10-24 15:40:07 31976 31976 Stopped watchdog\\n00000045 2011-10-24 15:40:07 31976 31976 Thor closing down 6\\n00000046 2011-10-24 15:40:07 31976 31976 Thor closing down 5\\n00000047 2011-10-24 15:40:07 31976 31976 Thor closing down 4\\n00000048 2011-10-24 15:40:07 31976 31976 Thor closing down 3\\n00000049 2011-10-24 15:40:07 31976 31976 Thor closing down 2\\n0000004A 2011-10-24 15:40:07 31976 31976 Thor closing down 1\", \"post_time\": \"2011-10-24 16:09:59\" },\n\t{ \"post_id\": 522, \"topic_id\": 142, \"forum_id\": 8, \"post_subject\": \"Re: watchdog has lost contact with Thor slave\", \"username\": \"bforeman\", \"post_text\": \"Hmmm, I was hoping the log would show the posted error (Watchdog has lost contact with Thor slave) but I don't see it there. Wondering if you posted the correct log. I have forwarded this to the development team for review.\", \"post_time\": \"2011-10-24 15:53:10\" },\n\t{ \"post_id\": 521, \"topic_id\": 142, \"forum_id\": 8, \"post_subject\": \"Re: watchdog has lost contact with Thor slave\", \"username\": \"nvasil\", \"post_text\": \"I am not using the IDE, I work on ubuntu and I still haven't been able to make it work. By the way the fact that the development tools are in windows is the biggest pain for HPCC, if you are using it for deploying systems. \\n\\nEventually I went through the slave and I found this. It is complaining about a memory leak \\n\\n00000000 2011-10-24 14:51:35 7564 7564 Opened log file //10.222.195.159/mnt/HPCCSystems/mythor/10_24_2011_14_51_34/THORSLAVE.10.222.195.159_6600.log\\n00000001 2011-10-24 14:51:35 7564 7564 Build community_3.2.0-2\\n00000002 2011-10-24 14:51:35 7564 7564 calling initClientProcess\\n00000003 2011-10-24 14:51:35 7564 7564 registering 10.222.195.159:6600 - master 10.90.213.34:6500\\n00000004 2011-10-24 14:51:36 7564 7564 Initialization received\\n00000005 2011-10-24 14:51:36 7564 7564 Registration confirmation sent\\n00000006 2011-10-24 14:51:36 7564 7564 verifying mp connection to rest of cluster\\n00000007 2011-10-24 14:51:36 7564 7564 verified mp connection to rest of cluster\\n00000008 2011-10-24 14:51:36 7564 7564 registered 10.222.195.159:6600\\n00000009 2011-10-24 14:51:36 7564 7564 CThorRowManager initialized, memlimit = 2147483648\\n0000000A 2011-10-24 14:51:36 7564 7564 ThorSlave Version LCR - 4.0 started\\n0000000B 2011-10-24 14:51:36 7564 7564 Slave 10.222.195.159:6600 - thor_tmp_dir set to : /mnt/HPCCSystems/mythor/temp/\\n0000000C 2011-10-24 14:51:36 7564 7564 Using querySo directory: /mnt/HPCCSystems/queries/mythor\\n0000000D 2011-10-24 14:51:36 7564 7564 FileCache: limit = 1800, purgeN = 10\\n0000000E 2011-10-24 14:51:36 7564 7574 priority set id=140538337232640 policy=0 pri=0 PID=7564\\n0000000F 2011-10-24 14:51:36 7564 7575 Watchdog: thread running\\n00000010 2011-10-24 15:00:31 7564 7564 Using query: /mnt/HPCCSystems/queries/mythor/V179191376_libW20111024-150030.so\\n00000011 2011-10-24 15:00:31 7564 7564 New Graph started : graph1\\n00000012 2011-10-24 15:00:31 7564 7564 temp directory cleared\\n00000013 2011-10-24 15:00:31 7564 7564 Disk space: /mnt/HPCCSystems/hpcc-data/thor = 389773, /mnt/HPCCSystems/hpcc-mirror/thor = 0\\n00000014 2011-10-24 15:00:31 7564 7564 Key file cache size set to: 6\\n00000015 2011-10-24 15:00:31 7564 7564 GraphInit: W20111024-150030graph1\\n00000016 2011-10-24 15:00:31 7564 7564 deserializeMPTag: tag = 65542\\n00000017 2011-10-24 15:00:31 7564 7564 deserializeMPTag: tag = 65539\\n00000018 2011-10-24 15:00:31 7564 7564 deserializeMPTag: tag = 65540\\n00000019 2011-10-24 15:00:31 7564 7564 deserializeMPTag: tag = 65541\\n0000001A 2011-10-24 15:00:31 7564 7564 Add: Launching graph thread for graphId=1\\n0000001B 2011-10-24 15:00:31 7564 7590 Running graph [global] : <graph>\\n <node id="2" label="Csv Read '...::rrsets_20110801'">\\n <att name="definition" value="dedupnotos.ecl(21,1)"/>\\n <att name="name" value="rdata1"/>\\n <att name="_kind" value="99"/>\\n <att name="ecl" value="DATASET('~rr::rrsets_20110801', resourcerecord, CSV(maxlength(8192), separator(['\\\\t', ' ']))); "/>\\n <att name="recordSize" value="24..8192(536)"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n <att name="_fileName" value="~rr::rrsets_20110801"/>\\n </node>\\n <node id="3" label="Csv Read '...::rrsets_20110802'">\\n <att name="definition" value="dedupnotos.ecl(23,1)"/>\\n <att name="name" value="rdata2"/>\\n <att name="_kind" value="99"/>\\n <att name="ecl" value="DATASET('~rr::rrsets_20110802', resourcerecord, CSV(maxlength(8192), separator(['\\\\t', ' ']))); "/>\\n <att name="recordSize" value="24..8192(536)"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n <att name="_fileName" value="~rr::rrsets_20110802"/>\\n </node>\\n <node id="4" label="Csv Read '...::rrsets_20110803'">\\n <att name="definition" value="dedupnotos.ecl(25,1)"/>\\n <att name="name" value="rdata3"/>\\n <att name="_kind" value="99"/>\\n <att name="ecl" value="DATASET('~rr::rrsets_20110803', resourcerecord, CSV(maxlength(8192), separator(['\\\\t', ' ']))); "/>\\n <att name="recordSize" value="24..8192(536)"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n <att name="_fileName" value="~rr::rrsets_20110803"/>\\n </node>\\n <node id="5" label="Csv Read '...::rrsets_20110804'">\\n <att name="definition" value="dedupnotos.ecl(27,1)"/>\\n <att name="name" value="rdata4"/>\\n <att name="_kind" value="99"/>\\n <att name="ecl" value="DATASET('~rr::rrsets_20110804', resourcerecord, CSV(maxlength(8192), separator(['\\\\t', ' ']))); "/>\\n <att name="recordSize" value="24..8192(536)"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n <att name="_fileName" value="~rr::rrsets_20110804"/>\\n </node>\\n <node id="6" label="Csv Read '...::rrsets_20110805'">\\n <att name="definition" value="dedupnotos.ecl(29,1)"/>\\n <att name="name" value="rdata5"/>\\n <att name="_kind" value="99"/>\\n <att name="ecl" value="DATASET('~rr::rrsets_20110805', resourcerecord, CSV(maxlength(8192), separator(['\\\\t', ' ']))); "/>\\n <att name="recordSize" value="24..8192(536)"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n <att name="_fileName" value="~rr::rrsets_20110805"/>\\n </node>\\n <node id="7" label="Funnel">\\n <att name="definition" value="dedupnotos.ecl(32,1)"/>\\n <att name="name" value="rdata"/>\\n <att name="_kind" value="22"/>\\n <att name="ecl" value="rdata1 + rdata2 + rdata3 + rdata4 + rdata5; "/>\\n <att name="recordSize" value="24..8192(536)"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n </node>\\n <node id="8" label="Filter">\\n <att name="definition" value="dedupnotos.ecl(37,1)"/>\\n <att name="name" value="rdata_clean"/>\\n <att name="_kind" value="5"/>\\n <att name="ecl" value="FILTER(COUNT(std.str.dosplitwords(src, separator, std.str.calcwordsetsize(src, separator))) = ...); "/>\\n <att name="recordSize" value="24..8192(536)"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n </node>\\n <node id="9" label="Disk Write 'rr_files_processed::rrsets_201108_clean'">\\n <att name="definition" value="dedupnotos.ecl(1,1)"/>\\n <att name="name" value="dedupnotos"/>\\n <att name="definition" value="dedupnotos.ecl(38,1)"/>\\n <att name="_kind" value="2"/>\\n <att name="ecl" value="OUTPUT(..., , 'rr_files_processed::rrsets_201108_clean'); "/>\\n <att name="recordSize" value="24..8192(536)"/>\\n <att name="_fileName" value="rr_files_processed::rrsets_201108_clean"/>\\n </node>\\n <att name="rootGraph" value="1"/>\\n <edge id="2_0" source="2" target="7"/>\\n <edge id="3_0" source="3" target="7">\\n <att name="_targetIndex" value="1"/>\\n </edge>\\n <edge id="4_0" source="4" target="7">\\n <att name="_targetIndex" value="2"/>\\n </edge>\\n <edge id="5_0" source="5" target="7">\\n <att name="_targetIndex" value="3"/>\\n </edge>\\n <edge id="6_0" source="6" target="7">\\n <att name="_targetIndex" value="4"/>\\n </edge>\\n <edge id="7_0" source="7" target="8"/>\\n <edge id="8_0" source="8" target="9"/>\\n </graph>\\n - graph(graph1, 1)\\n0000001C 2011-10-24 15:00:31 7564 7590 CONNECTING (id=2, idx=0) to (id=7, idx=0) - activity(funnel, 7)\\n0000001D 2011-10-24 15:00:31 7564 7590 CONNECTING (id=3, idx=0) to (id=7, idx=1) - activity(funnel, 7)\\n0000001E 2011-10-24 15:00:31 7564 7590 CONNECTING (id=4, idx=0) to (id=7, idx=2) - activity(funnel, 7)\\n0000001F 2011-10-24 15:00:31 7564 7590 CONNECTING (id=5, idx=0) to (id=7, idx=3) - activity(funnel, 7)\\n00000020 2011-10-24 15:00:31 7564 7590 CONNECTING (id=6, idx=0) to (id=7, idx=4) - activity(funnel, 7)\\n00000021 2011-10-24 15:00:31 7564 7590 CONNECTING (id=7, idx=0) to (id=8, idx=0) - activity(filter, 8)\\n00000022 2011-10-24 15:00:31 7564 7590 CONNECTING (id=8, idx=0) to (id=9, idx=0) - activity(diskwrite, 9)\\n00000023 2011-10-24 15:00:31 7564 7590 FUNNEL mode = PARALLEL, grouped=UNGROUPED - activity(funnel, 7)\\n00000024 2011-10-24 15:00:31 7564 7590 Watchdog: Start Job 1\\n00000025 2011-10-24 15:00:31 7564 7591 handling fname : /mnt/HPCCSystems/hpcc-data/thor/thor/rr_files_processed/rrsets_201108_clean._2_of_2 - activity(diskwrite, 9)\\n00000026 2011-10-24 15:00:31 7564 7591 Starting input - activity(diskwrite, 9)\\n00000027 2011-10-24 15:00:31 7564 7591 Starting input - activity(filter, 8)\\n00000028 2011-10-24 15:00:31 7564 7592 csvread[part=1]: reading physical file '/mnt/HPCCSystems/hpcc-data/thor/rr/rrsets_20110801._2_of_2' (logical file = ~rr::rrsets_20110801) - activity(csvread, 2)\\n00000029 2011-10-24 15:00:31 7564 7592 csvread[part=1]: Base offset to 2486348791 - activity(csvread, 2)\\n0000002A 2011-10-24 15:00:31 7564 7592 ITDL starting for output 0 - activity(csvread, 2)\\n0000002B 2011-10-24 15:00:31 7564 7594 csvread[part=1]: reading physical file '/mnt/HPCCSystems/hpcc-data/thor/rr/rrsets_20110803._2_of_2' (logical file = ~rr::rrsets_20110803) - activity(csvread, 4)\\n0000002C 2011-10-24 15:00:31 7564 7594 csvread[part=1]: Base offset to 2542483820 - activity(csvread, 4)\\n0000002D 2011-10-24 15:00:31 7564 7593 csvread[part=1]: reading physical file '/mnt/HPCCSystems/hpcc-data/thor/rr/rrsets_20110802._2_of_2' (logical file = ~rr::rrsets_20110802) - activity(csvread, 3)\\n0000002E 2011-10-24 15:00:31 7564 7593 csvread[part=1]: Base offset to 2480747096 - activity(csvread, 3)\\n0000002F 2011-10-24 15:00:31 7564 7591 ITDL starting for output 0 - activity(funnel, 7)\\n00000030 2011-10-24 15:00:31 7564 7591 ITDL starting for output 0 - activity(filter, 8)\\n00000031 2011-10-24 15:00:31 7564 7595 csvread[part=1]: reading physical file '/mnt/HPCCSystems/hpcc-data/thor/rr/rrsets_20110804._2_of_2' (logical file = ~rr::rrsets_20110804) - activity(csvread, 5)\\n00000032 2011-10-24 15:00:31 7564 7594 ITDL starting for output 0 - activity(csvread, 4)\\n00000034 2011-10-24 15:00:31 7564 7593 ITDL starting for output 0 - activity(csvread, 3)\\n00000035 2011-10-24 15:00:31 7564 7591 Writing to file: /mnt/HPCCSystems/hpcc-data/thor/thor/rr_files_processed/thtmp7564_1__partial.tmp - activity(diskwrite, 9)\\n00000036 2011-10-24 15:00:31 7564 7591 Created output stream for /mnt/HPCCSystems/hpcc-data/thor/thor/rr_files_processed/rrsets_201108_clean._2_of_2 - activity(diskwrite, 9)\\n00000037 2011-10-24 15:00:31 7564 7591 Ungrouped - activity(diskwrite, 9)\\n00000033 2011-10-24 15:00:31 7564 7595 csvread[part=1]: Base offset to 2478564725 - activity(csvread, 5)\\n00000038 2011-10-24 15:00:31 7564 7595 ITDL starting for output 0 - activity(csvread, 5)\\n00000039 2011-10-24 15:00:31 7564 7592 Record size (max) = 8192 - activity(csvread, 2)\\n0000003A 2011-10-24 15:00:31 7564 7594 Record size (max) = 8192 - activity(csvread, 4)\\n0000003B 2011-10-24 15:00:31 7564 7591 Record size (max) = 8192 - activity(funnel, 7)\\n0000003C 2011-10-24 15:00:31 7564 7591 Record size (max) = 8192 - activity(filter, 8)\\n0000003D 2011-10-24 15:00:31 7564 7596 csvread[part=1]: reading physical file '/mnt/HPCCSystems/hpcc-data/thor/rr/rrsets_20110805._2_of_2' (logical file = ~rr::rrsets_20110805) - activity(csvread, 6)\\n0000003E 2011-10-24 15:00:31 7564 7596 csvread[part=1]: Base offset to 2395982404 - activity(csvread, 6)\\n0000003F 2011-10-24 15:00:31 7564 7596 ITDL starting for output 0 - activity(csvread, 6)\\n00000040 2011-10-24 15:00:31 7564 7593 Record size (max) = 8192 - activity(csvread, 3)\\n00000041 2011-10-24 15:00:31 7564 7596 Record size (max) = 8192 - activity(csvread, 6)\\n00000042 2011-10-24 15:00:31 7564 7595 Record size (max) = 8192 - activity(csvread, 5)\\n00000043 2011-10-24 15:01:31 7564 7589 SYS: PU= 30% MU= 6% MAL=6980688 MMP=6602752 SBK=377936 TOT=7016K RAM=1001084K SWP=0K\\n00000044 2011-10-24 15:02:31 7564 7589 SYS: PU= 30% MU= 6% MAL=6997216 MMP=6602752 SBK=394464 TOT=7016K RAM=1020716K SWP=0K\\n00000045 2011-10-24 15:02:31 7564 7589 DSK: [sdb] r/s=0.1 kr/s=0.4 w/s=343.4 kw/s=14684.0 bsy=27 NIC: rxp/s=0.1 rxk/s=0.0 txp/s=0.1 txk/s=0.0 CPU: usr=14 sys=8 iow=8 idle=69\\n00000046 2011-10-24 15:03:31 7564 7589 SYS: PU= 30% MU= 6% MAL=6997216 MMP=6602752 SBK=394464 TOT=7016K RAM=1038516K SWP=0K\\n00000047 2011-10-24 15:03:31 7564 7589 DSK: [sdb] r/s=0.1 kr/s=0.4 w/s=208.4 kw/s=8738.5 bsy=17 NIC: rxp/s=0.1 rxk/s=0.0 txp/s=0.1 txk/s=0.0 CPU: usr=16 sys=9 iow=3 idle=69\\n00000048 2011-10-24 15:04:31 7564 7589 SYS: PU= 28% MU= 6% MAL=6997216 MMP=6602752 SBK=394464 TOT=7016K RAM=1032512K SWP=0K\\n00000049 2011-10-24 15:04:31 7564 7589 DSK: [sdb] r/s=19.3 kr/s=809.5 w/s=346.2 kw/s=14829.5 bsy=30 NIC: rxp/s=0.1 rxk/s=0.0 txp/s=0.1 txk/s=0.0 CPU: usr=14 sys=8 iow=5 idle=71\\n0000004A 2011-10-24 15:05:31 7564 7589 SYS: PU= 29% MU= 6% MAL=6997216 MMP=6602752 SBK=394464 TOT=7016K RAM=1028748K SWP=0K\\n0000004B 2011-10-24 15:05:31 7564 7589 DSK: [sdb] r/s=61.3 kr/s=2586.5 w/s=248.1 kw/s=10461.6 bsy=25 NIC: rxp/s=0.1 rxk/s=0.0 txp/s=0.2 txk/s=0.0 CPU: usr=14 sys=9 iow=5 idle=70\\n0000004C 2011-10-24 15:06:31 7564 7589 SYS: PU= 32% MU= 6% MAL=6997216 MMP=6602752 SBK=394464 TOT=7016K RAM=1020944K SWP=0K\\n0000004D 2011-10-24 15:06:31 7564 7589 DSK: [sdb] r/s=114.9 kr/s=4848.1 w/s=219.9 kw/s=9301.1 bsy=26 NIC: rxp/s=0.0 rxk/s=0.0 txp/s=0.1 txk/s=0.0 CPU: usr=15 sys=9 iow=8 idle=67\\n0000004E 2011-10-24 15:06:38 7564 7564 GraphAbort: W20111024-150030graph1\\n0000004F 2011-10-24 15:06:39 7564 7564 Abort condition set - activity(diskwrite, 9)\\n00000050 2011-10-24 15:06:39 7564 7564 Abort condition set - activity(filter, 8)\\n00000051 2011-10-24 15:06:39 7564 7564 Abort condition set - activity(funnel, 7)\\n00000052 2011-10-24 15:06:39 7564 7564 Abort condition set - activity(csvread, 2)\\n00000053 2011-10-24 15:06:39 7564 7564 Abort condition set - activity(csvread, 6)\\n00000054 2011-10-24 15:06:39 7564 7564 Abort condition set - activity(csvread, 5)\\n00000055 2011-10-24 15:06:39 7564 7564 Abort condition set - activity(csvread, 4)\\n00000056 2011-10-24 15:06:39 7564 7564 Abort condition set - activity(csvread, 3)\\n00000057 2011-10-24 15:06:39 7564 7591 Wrote 46387785 records, crc=0x195616B6 - activity(diskwrite, 9)\\n00000058 2011-10-24 15:06:39 7564 7591 Stopping input for - activity(diskwrite, 9)\\n00000059 2011-10-24 15:06:39 7564 7591 Stopping input for - activity(filter, 8)\\n0000005A 2011-10-24 15:06:39 7564 7594 ITDL output 0 stopped, count was 12513581 - activity(csvread, 4)\\n0000005C 2011-10-24 15:06:39 7564 7593 ITDL output 0 stopped, count was 12653054 - activity(csvread, 3)\\n0000005D 2011-10-24 15:06:39 7564 7592 ITDL output 0 stopped, count was 12227349 - activity(csvread, 2)\\n0000005E 2011-10-24 15:06:39 7564 7596 ITDL output 0 stopped, count was 12385313 - activity(csvread, 6)\\n0000005F 2011-10-24 15:06:39 7564 7596 funnel(4): Read 12385313 records - activity(funnel, 7) [ecl=rdata1 + rdata2 + rdata3 + rdata4 + rdata5;]\\n00000060 2011-10-24 15:06:39 7564 7593 funnel(1): Read 12653054 records - activity(funnel, 7) [ecl=rdata1 + rdata2 + rdata3 + rdata4 + rdata5;]\\n0000005B 2011-10-24 15:06:39 7564 7595 ITDL output 0 stopped, count was 12501224 - activity(csvread, 5)\\n00000061 2011-10-24 15:06:39 7564 7592 funnel(0): Read 12227349 records - activity(funnel, 7) [ecl=rdata1 + rdata2 + rdata3 + rdata4 + rdata5;]\\n00000062 2011-10-24 15:06:39 7564 7595 funnel(3): Read 12501224 records - activity(funnel, 7) [ecl=rdata1 + rdata2 + rdata3 + rdata4 + rdata5;]\\n00000063 2011-10-24 15:06:39 7564 7594 funnel(2): Read 12513581 records - activity(funnel, 7) [ecl=rdata1 + rdata2 + rdata3 + rdata4 + rdata5;]\\n00000064 2011-10-24 15:06:39 7564 7591 ITDL output 0 stopped, count was 62259797 - activity(funnel, 7)\\n00000065 2011-10-24 15:06:39 7564 7591 ITDL output 0 stopped, count was 46387785 - activity(filter, 8)\\n00000066 2011-10-24 15:06:39 7564 7590 Graph wait cancelled, aborted=true - graph(graph1, 1)\\n00000067 2011-10-24 15:06:39 7564 7590 End of sub-graph - graph(graph1, 1)\\n00000068 2011-10-24 15:06:39 7564 7590 Watchdog: Stop Job 1\\n00000069 2011-10-24 15:06:39 7564 7590 Socket statistics : connects=1\\nconnecttime=472us\\nfailedconnects=0\\nfailedconnecttime=0us\\nreads=46\\nreadtime=785us\\nreadsize=142343 bytes\\nwrites=87\\nwritetime=2944us\\nwritesize=7605 bytes\\nactivesockets=5\\nnumblockrecvs=0\\nnumblocksends=0\\nblockrecvsize=0\\nblocksendsize=0\\nblockrecvtime=0\\nblocksendtime=0\\nlongestblocksend=0\\nlongestblocksize=0\\n - graph(graph1, 1)\\n0000006A 2011-10-24 15:06:39 7564 7590 Graph Done - graph(graph1, 1)\\n0000006B 2011-10-24 15:06:39 7564 7590 PU= 28% MU= 6% MAL=2500112 MMP=2113536 SBK=386576 TOT=2632K RAM=1015248K SWP=0K DSK: [sdb] r/s=136.3 kr/s=5749.8 w/s=6.6 kw/s=26.3 bsy=6 NIC: rxp/s=4.4 rxk/s=0.6 txp/s=4.9 txk/s=0.7 CPU: usr=17 sys=10 iow=1 idle=71 - graph(graph1, 1)\\n0000006C 2011-10-24 15:06:39 7564 7590 CGraphExecutor running=0, waitingToRun=0, dependentsWaiting=0\\n0000006D 2011-10-24 15:06:39 7564 7564 QueryDone, removing W20111024-150030graph1 from jobs\\n0000006E 2011-10-24 15:06:39 7564 7564 Job ended : graph1\\n0000006F 2011-10-24 15:06:39 7564 7564 destroying ProcessSlaveActivity - activity(diskwrite, 9)\\n00000070 2011-10-24 15:06:39 7564 7564 ProcessSlaveActivity : joining process thread - activity(diskwrite, 9)\\n00000071 2011-10-24 15:06:39 7564 7564 AFTER ProcessSlaveActivity : joining process thread - activity(diskwrite, 9)\\n00000072 2011-10-24 15:06:39 7564 7564 DESTROYED - activity(diskwrite, 9)\\n00000073 2011-10-24 15:06:39 7564 7564 DESTROYED - activity(filter, 8)\\n00000074 2011-10-24 15:06:39 7564 7564 DESTROYED - activity(csvread, 2)\\n00000075 2011-10-24 15:06:39 7564 7564 DESTROYED - activity(csvread, 3)\\n00000076 2011-10-24 15:06:39 7564 7564 DESTROYED - activity(csvread, 4)\\n00000077 2011-10-24 15:06:39 7564 7564 DESTROYED - activity(csvread, 5)\\n00000078 2011-10-24 15:06:39 7564 7564 DESTROYED - activity(csvread, 6)\\n00000079 2011-10-24 15:06:39 7564 7564 DESTROYED - activity(funnel, 7)\\n0000007A 2011-10-24 15:06:39 7564 7564 CJobBase resetting memory manager\\n0000007B 2011-10-24 15:06:39 7564 7564 JMALLOC LEAKCHECKING: 1 leaks, total memory 82\\n0000007C 2011-10-24 15:06:39 7564 7564 JMALLOC OSBLOCKS: 4, total memory 4194304\\n0000007D 2011-10-24 15:06:39 7564 7564 CThorRowManager initialized, memlimit = 2147483648\\n0000007E 2011-10-24 15:06:39 7564 7564 QueryDone, removed W20111024-150030graph1 from jobs\\n0000007F 2011-10-24 15:06:40 7564 7564 Stopped watchdog\", \"post_time\": \"2011-10-24 15:37:00\" },\n\t{ \"post_id\": 520, \"topic_id\": 142, \"forum_id\": 8, \"post_subject\": \"Re: watchdog has lost contact with Thor slave\", \"username\": \"bforeman\", \"post_text\": \"Where do I find the log file?\\nIs it in the esp server on the slave that has the problem?\\n\\nIn the ECL Watch from the ECL IDE, there should be a log in the helpers section, and then the slave log would be found by opening the ECL Watch in a browser, selecting target clusters/mythor and then clicking on the disk icon to drill down to the appropriate slave log. When you open the target slave log you can download and then post it here.\\n\\nHope this helps,\\n\\nBob\", \"post_time\": \"2011-10-24 15:27:54\" },\n\t{ \"post_id\": 518, \"topic_id\": 142, \"forum_id\": 8, \"post_subject\": \"Re: watchdog has lost contact with Thor slave\", \"username\": \"nvasil\", \"post_text\": \"Where do I find the log file?\\nIs it in the esp server on the slave that has the problem?\", \"post_time\": \"2011-10-24 15:16:06\" },\n\t{ \"post_id\": 514, \"topic_id\": 142, \"forum_id\": 8, \"post_subject\": \"Re: watchdog has lost contact with Thor slave\", \"username\": \"bforeman\", \"post_text\": \"I can't see anything jump out in the code that would cause a delay, perhaps if you posted the THOR log we could see what is causing the delay and loss of contact.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2011-10-24 12:53:35\" },\n\t{ \"post_id\": 511, \"topic_id\": 142, \"forum_id\": 8, \"post_subject\": \"watchdog has lost contact with Thor slave\", \"username\": \"nvasil\", \"post_text\": \"I am running the following query\\nand I always get the following error\\n\\nError><source>eclagent</source><code>10056</code><message>System error: 10056: Watchdog has lost contact with Thor slave: 10.127.xxx.xxx:6600\\n\\nI have checked the slave and is up and running. There is enough memory and disk space\\n\\nimport $, std;\\n\\nResourceRecord := RECORD, MAXLENGTH(8192)\\n STRING ip;\\n INTEGER rid;\\n STRING dns;\\n INTEGER volume;\\nEND;\\n\\nSTRING rr_in_dir := '~rr';\\nSTRING rr_file1 := 'rrsets_20110801';\\nSTRING rr_file2 := 'rrsets_20110802';\\nSTRING rr_file3 := 'rrsets_20110803';\\nSTRING rr_file4 := 'rrsets_20110804';\\nSTRING rr_file5 := 'rrsets_20110805';\\n\\nSTRING rr_file := 'rrsets_201108';\\nSTRING rr_out_dir := 'rr_files_processed';\\n\\n\\n// all the resource record data\\nrdata1 := DATASET(rr_in_dir+'::'+rr_file1, ResourceRecord, \\n CSV(MAXLENGTH(8192), SEPARATOR(['\\\\t', ' '])));\\nrdata2 := DATASET(rr_in_dir+'::'+rr_file2, record_defs.ResourceRecord, \\n CSV(MAXLENGTH(8192), SEPARATOR(['\\\\t', ' '])));\\nrdata3 := DATASET(rr_in_dir+'::'+rr_file3, ResourceRecord, \\n CSV(MAXLENGTH(8192), SEPARATOR(['\\\\t', ' '])));\\nrdata4 := DATASET(rr_in_dir+'::'+rr_file4, ResourceRecord, \\n CSV(MAXLENGTH(8192), SEPARATOR(['\\\\t', ' '])));\\nrdata5 := DATASET(rr_in_dir+'::'+rr_file5, ResourceRecord, \\n CSV(MAXLENGTH(8192), SEPARATOR(['\\\\t', ' '])));\\n\\nrdata:=DISTRIBUTE(rdata1+rdata2+rdata3+rdata4+rdata5);\\n\\n//OUTPUT(COUNT(rdata));\\n\\nrdata_clean := rdata(IsIP(rdata.ip));\\nOUTPUT(rdata_clean,,rr_out_dir+'::'+rr_file+'_clean');\\n\\nBOOLEAN IsIP(STRING x) := FUNCTION\\n words:=std.Str.SplitWords(x, '.');\\n BOOLEAN is4words := IF(COUNT(words)!=4, false, \\n true);\\n Rec :=RECORD\\n STRING str;\\n END;\\n ds:=DATASET(words, Rec);\\n valid_words:=ds((INTEGER)str>=0 AND (INTEGER)str<=255);\\n result:=IF(is4words AND COUNT(valid_words)=4, true, false);\\n return result;\\nEND;\\n\\n\\nEND;\", \"post_time\": \"2011-10-21 21:40:28\" },\n\t{ \"post_id\": 538, \"topic_id\": 143, \"forum_id\": 8, \"post_subject\": \"Re: Getting my head round the TABLE command.\", \"username\": \"Allan\", \"post_text\": \"Thanks dabayliss,\\n\\n\\nYour example explains a few things to me and as added bonus worked a treat.
\\n\\nYours\\nAllan\", \"post_time\": \"2011-10-26 13:57:06\" },\n\t{ \"post_id\": 534, \"topic_id\": 143, \"forum_id\": 8, \"post_subject\": \"Re: Getting my head round the TABLE command.\", \"username\": \"Allan\", \"post_text\": \"Thanks everyone for these replies.\\n\\nDefinitely enough here to keep me busy.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-10-25 14:28:12\" },\n\t{ \"post_id\": 529, \"topic_id\": 143, \"forum_id\": 8, \"post_subject\": \"Re: Getting my head round the TABLE command.\", \"username\": \"dabayliss\", \"post_text\": \"Should be noted though that the 'pure' vertical slice is now redundant unless you have something such as a 'whole record' form of sort or an output/persist downstream. If you are 'just removing stuff for efficiency' - the compiler will now do it for you (and ignore your own attempts at doing so)\\n\\nDavid\", \"post_time\": \"2011-10-24 19:53:46\" },\n\t{ \"post_id\": 528, \"topic_id\": 143, \"forum_id\": 8, \"post_subject\": \"Re: Getting my head round the TABLE command.\", \"username\": \"richard.taylor@lexisnexis.com\", \"post_text\": \"Allan,\\n\\n>>The source specification in the 'format' seems to have to contain information on the actual recordset being used. (the 1st parameter to the TABLE command)<<\\n\\nThe TABLE function creates a temporary "table" in memory by deriving data from the first parameter recordset and structuring it using the second parameter RECORD structure. Therefore, the RECORD structure has to define, for each field in the table it creates, the datatype (can be inferred), name of the field, AND how to construct the output field. \\n\\nNow, most TABLE functions (especially the vertical slice variety) simply specify the subset of fields from the dataset that you want to work with (making this a valuable tool for following the "operate only on the data you need" principle), but that is far from the only thing you can do with TABLE.\\n\\nHere's a simple example that demonstrates doing more than just slicing out some columns:\\n\\n
\\nIMPORT STD;\\nds := DATASET([{'A','B','C'},{'D','E','F'},{'G','H','I'}],\\n {STRING1 Ltr1, STRING1 Ltr2, STRING1 Ltr3});\\n\\nTrec := RECORD\\n STRING1 Ltr1 := ds.Ltr1; //explicitly specifies type, name, and source\\n ds.Ltr2; //implicitly specifies type, name, and source\\n STRING1 Ltr3 := STD.Str.ToLowerCase(ds.Ltr3); //modify the data\\n STRING3 F1 := ds.Ltr1 + ds.Ltr2 + ds.Ltr3; //build totally new fields\\n STRING20 F4 := 'I am a work field'; //create a work field\\nEND;\\t\\t\\t\\t\\t\\t\\t\\nt := TABLE(ds,Trec);\\nt;\\t\\t\\t\\t\\t\\t\\t
\\n\\nNotice my input structure and output from the TABLE are completely different. But each field in Trec defines datatype, name, and source of the result data each in a different way.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2011-10-24 19:20:52\" },\n\t{ \"post_id\": 526, \"topic_id\": 143, \"forum_id\": 8, \"post_subject\": \"Re: Getting my head round the TABLE command.\", \"username\": \"dabayliss\", \"post_text\": \"I don't have the data - so I checked this syntax checks - but it may not work \\n\\n
Layout_Book := RECORD\\n STRING Text;\\nEND;\\n\\nLayout_Verse := RECORD\\n\\t\\tUNSIGNED1 Book;\\n UNSIGNED2 Chapter;\\n UNSIGNED2 Verse;\\n\\t\\tUNSIGNED4 HashN := 0;\\n STRING Text;\\nEND;\\n\\nBooks := ['genesis','exodus','leviticus','numbers']; // etc\\n\\nLayout_Verse FromBook(UNSIGNED1 Bk) := FUNCTION\\n\\tRaw := DATASET('~thor::niv::'+Books[Bk],Layout_Book,CSV(HEADING(4),SEPARATOR('')));\\n\\tLayout_Verse Split(Layout_Book pInput) := TRANSFORM\\n\\t SELF.Book := Bk;\\n SELF.Chapter := (UNSIGNED2)pInput.Text[1..Find(pInput.Text,':',1)];\\n SELF.Verse := (UNSIGNED2)pInput.Text[Find(pInput.Text,':',1)+1..Find(pInput.Text,':',2)];\\n SELF.Text := pInput.Text[Find(pInput.Text,':',2)+1..];\\n\\tEND;\\n\\tP1 := PROJECT(Raw,Split(LEFT));\\n\\tRETURN PROJECT(P1,TRANSFORM(Layout_Verse,SELF.HashN := HASH(LEFT.Text,LEFT.Chapter,LEFT.Verse), SELF := LEFT));\\n END;\\n\\nRawGen := FromBook(1);\\nRawExo := FromBook(2);\\n\\nRawGen+RawExo
\\n\\nFor this demo example it would be cleaner to pass in the name rather than have the set. The set is useful if you want to use Loop to bring in all the books to one datastream .... (and generally speaking - 1 datastream is WAY better than 66)\", \"post_time\": \"2011-10-24 17:04:56\" },\n\t{ \"post_id\": 525, \"topic_id\": 143, \"forum_id\": 8, \"post_subject\": \"Re: Getting my head round the TABLE command.\", \"username\": \"bforeman\", \"post_text\": \"Hi Allan,\\n\\nWhen you change the specs to accomodate 102 different datasets, of course a TABLE may not be the best solution. In that case, a simple PROJECT would do the trick and you could use the same record structure for the different datasets.\\n\\nThere are many ways to the mountain top \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2011-10-24 16:26:39\" },\n\t{ \"post_id\": 524, \"topic_id\": 143, \"forum_id\": 8, \"post_subject\": \"Re: Getting my head round the TABLE command.\", \"username\": \"Allan\", \"post_text\": \"Hi Bob,\\n\\nSO that FUNCTION works with 2 instances, how about 102 identical record sets?\\n\\nThis ECL will take a lot of getting used to. You seem to have to jump though hoops to do the simplest things.\\n\\nI suppose in real life one would merge all into one record-set, and somehow in the merge generate a field as a discriminator for use in future processing where the record sets need to be treated differently,\\n\\nThanks for your help\\n\\nAllan\", \"post_time\": \"2011-10-24 16:20:08\" },\n\t{ \"post_id\": 519, \"topic_id\": 143, \"forum_id\": 8, \"post_subject\": \"Re: Getting my head round the TABLE command.\", \"username\": \"bforeman\", \"post_text\": \"Hi Allan, \\n\\nI don't think you can with TABLE, since the second parameter format also requires the correct source. Here's a simple FUNCTION that generates one TABLE or the other:\\n\\n
IMPORT Training AS X;\\n\\nMyTableFunc(BOOLEAN cond) := FUNCTION\\n\\nmyrec := RECORD\\n\\t unsigned8 id := X.People.File.id;\\n\\t string15 firstname := X.People.File.firstname;\\n\\t string25 lastname := X.People.File.lastname;\\n END;\\nmyrec2 := RECORD \\n \\t unsigned8 id2 := X.DN.People.id;\\n\\t string15 firstname2 := X.DN.People.firstname;\\n\\t string25 lastname2 := X.DN.People.lastname;\\n END;\\t\\n\\n \\n MyTable := IF (cond = TRUE,\\n TABLE(X.People.File,myrec),\\n\\t TABLE(X.DN.People,myrec2));\\n \\n RETURN(MyTable);\\n \\n END;\\n \\n MyTableFunc(TRUE);\\n MyTableFunc(FALSE);
\", \"post_time\": \"2011-10-24 15:22:16\" },\n\t{ \"post_id\": 517, \"topic_id\": 143, \"forum_id\": 8, \"post_subject\": \"Re: Getting my head round the TABLE command.\", \"username\": \"Allan\", \"post_text\": \"Hi Bob,\\n\\nI must admit I don't understand.\\n\\nBoth DATASETs Raw2 and Raw3 can use the same default values. e.g.\\n\\nBlankSet := DATASET([{0,0,'',0}],Verse_with_hash);\\n
\\n\\nSo how can I get the record definition 'Verse_with_hash' to be used by both datasets?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-10-24 14:22:09\" },\n\t{ \"post_id\": 516, \"topic_id\": 143, \"forum_id\": 8, \"post_subject\": \"Re: Getting my head round the TABLE command.\", \"username\": \"bforeman\", \"post_text\": \"Ok, Allan, I think that the error makes sense. As you know, the record layout of the TABLE needs default values defined. If you are creating a TABLE using the Raw3 recordset, it will need to use the default values of the current record being read, and if you are referencing different default values from a different recordset, it does not know which values to use since they are not related. Even though they both have the same structure, they both indeed have different values. Hope this helps!\\n\\nBest regards,\\n\\nBob\", \"post_time\": \"2011-10-24 13:17:56\" },\n\t{ \"post_id\": 515, \"topic_id\": 143, \"forum_id\": 8, \"post_subject\": \"Re: Getting my head round the TABLE command.\", \"username\": \"Allan\", \"post_text\": \"Hi Bob,\\n\\nActually the syntax checker works for:\\n\\nWithHash := TABLE(Raw3,Verse_with_hash);\\n
\\nHowever when submitting to Thor I get error:\\n\\nWarning: (0,0): error C2131: raw2.chapter - Table raw2 is not related to raw3 (0, 0), 0, \\n
\\nIf everything is changed to use 'Raw3', as in your example above, then it works.\\nThe datasets have the same structure just contain different data.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-10-24 13:01:08\" },\n\t{ \"post_id\": 513, \"topic_id\": 143, \"forum_id\": 8, \"post_subject\": \"Re: Getting my head round the TABLE command.\", \"username\": \"bforeman\", \"post_text\": \"Hi Allan,\\n\\nWe need more information. What error (failure) are you receiving? If you modify the record structure to use the Raw3 defaults does it work after that?\\n\\nFor example:\\n\\nVerse_with_hash := RECORD\\n UNSIGNED2 Chapter := Raw3.Chapter;\\n UNSIGNED2 Verse := Raw3.Verse;\\n STRING Text := Raw3.Text;\\n UNSIGNED4 h := HASH(Raw3.Text,Raw3.Chapter,Raw3.Verse);\\nEND;\\n\\nWithHash := TABLE(Raw3,Verse_with_hash);\\n\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2011-10-24 12:48:30\" },\n\t{ \"post_id\": 512, \"topic_id\": 143, \"forum_id\": 8, \"post_subject\": \"Getting my head round the TABLE command.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI'm trying to understand the 'Vertical Slice' type of TABLE command.\\nThe source specification in the 'format' seems to have to contain information on the actual recordset being used. (the 1st parameter to the TABLE command)\\n\\nI can't understand this tie up, as it seems to limit the use of that particular TABLE command to that specific record set. The syntax checker will complain if I attempt to supply a different record set to the TABLE command even though I know it's the same structure. Consequently there does not seem to be a reason for the 1st parameter to the TABLE command given the record set used is defined by the 'format' parameter.\\n\\nI have an example below which works when the table command is given the record set 'Raw2' but fails is supplied 'Raw3' Even though the structure of Raw2 and Raw3 are the same.\\n\\nLayout_Book := RECORD\\n STRING Text;\\nEND;\\n\\nLayout_Verse := RECORD\\n UNSIGNED2 Chapter;\\n\\t UNSIGNED2 Verse;\\n STRING Text;\\nEND;\\n\\nRawGen := DATASET('~thor::niv::genesis',Layout_Book,CSV(HEADING(4),SEPARATOR('')));\\nRawExo := DATASET('~thor::niv::exodus',Layout_Book,CSV(HEADING(4),SEPARATOR('')));\\n\\nLayout_Verse Split(Layout_Book pInput) := TRANSFORM\\n SELF.Chapter := (UNSIGNED2)pInput.Text[1..Find(pInput.Text,':',1)];\\n\\t\\tSELF.Verse := (UNSIGNED2)pInput.Text[Find(pInput.Text,':',1)+1..Find(pInput.Text,':',2)];\\n\\t\\tSELF.Text := pInput.Text[Find(pInput.Text,':',2)+1..];\\nEND;\\n\\nRaw2 := PROJECT(RawGen,Split(LEFT));\\nRaw3 := PROJECT(RawExo,Split(LEFT));\\n\\nVerse_with_hash := RECORD\\n UNSIGNED2 Chapter := Raw2.Chapter;\\n UNSIGNED2 Verse := Raw2.Verse;\\n STRING Text := Raw2.Text;\\n UNSIGNED4 h := HASH(Raw2.Text,Raw2.Chapter,Raw2.Verse);\\nEND;\\n\\nWithHash := TABLE(Raw2,Verse_with_hash);\\nOUTPUT(WithHash);\\n
\", \"post_time\": \"2011-10-24 11:12:03\" },\n\t{ \"post_id\": 533, \"topic_id\": 144, \"forum_id\": 8, \"post_subject\": \"Re: Problems loading a superfile with QUOTE\", \"username\": \"nvasil\", \"post_text\": \"It turns out it was fixed a week ago in the latest release. Now it works\", \"post_time\": \"2011-10-25 01:55:41\" },\n\t{ \"post_id\": 527, \"topic_id\": 144, \"forum_id\": 8, \"post_subject\": \"Problems loading a superfile with QUOTE\", \"username\": \"nvasil\", \"post_text\": \"The following \\nrdata := DATASET(rr_in_dir+'::'+rr_file, record_defs.ResourceRecord, \\n CSV(MAXLENGTH(8192), QUOTE(''), SEPARATOR(['\\\\t', ' '])));\\n\\ncompiles fine but then when sent to thor it returns the following error\\nIn fact it returns error even when you do QUOTE('\\\\'') or QUOTE(['\\\\'']), see below\\n\\n<Error><source>eclcc</source><code>3000</code><message> Compile/Link failed for W20111024-173834 (see 'eclcc.log' for details)</message></Error>\\n<Warning><source>eclcc</source><code>0</code><message></message></Warning>\\n<Warning><source>eclcc</source><code>0</code><message>---------- compiler output --------------</message></Warning>\\n<Warning><source>eclcc</source><code>0</code><message>W20111024-173834.cpp: In member function ‘virtual size32_t cAc2::transform(ARowBuilder&, unsigned int*, const char**)’:</message></Warning>\\n<Warning><source>eclcc</source><code>0</code><message>W20111024-173834.cpp:315: error: invalid conversion from ‘const char*’ to ‘char*’</message></Warning>\\n<Warning><source>eclcc</source><code>0</code><message>W20111024-173834.cpp:315: error: initializing argument 2 of ‘unsigned int user2(size32_t, char*, size32_t, char*)’</message></Warning>\\n<Warning><source>eclcc</source><code>0</code><message>W20111024-173834.cpp:315: warning: deprecated conversion from string constant to ‘char*’</message></Warning>\\n<Warning><source>eclcc</source><code>0</code><message>W20111024-173834.cpp:315: error: invalid conversion from ‘const char*’ to ‘char*’</message></Warning>\\n<Warning><source>eclcc</source><code>0</code><message>W20111024-173834.cpp:315: error: initializing argument 5 of ‘void user1(bool&, size32_t&, void*&, size32_t, char*, size32_t, char*, long long unsigned int)’</message></Warning>\\n<Warning><source>eclcc</source><code>0</code><message>W20111024-173834.cpp:315: warning: deprecated conversion from string constant to ‘char*’</message></Warning>\\n<Warning><source>eclcc</source><code>0</code><message></message></Warning>\\n<Warning><source>eclcc</source><code>0</code><message>--------- end compiler output -----------</message></Warning>\\n\\nrdata := DATASET(rr_in_dir+'::'+rr_file, record_defs.ResourceRecord, \\n CSV(MAXLENGTH(8192), QUOTE('\\\\''), SEPARATOR(['\\\\t', ' '])));\\n\\n<Error><source>eclcc</source><code>3000</code><message> Compile/Link failed for W20111024-174313 (see 'eclcc.log' for details)</message></Error>\\n<Warning><source>eclcc</source><code>0</code><message></message></Warning>\\n<Warning><source>eclcc</source><code>0</code><message>---------- compiler output --------------</message></Warning>\\n<Warning><source>eclcc</source><code>0</code><message>W20111024-174313.cpp: In member function ‘virtual size32_t cAc2::transform(ARowBuilder&, unsigned int*, const char**)’:</message></Warning>\\n<Warning><source>eclcc</source><code>0</code><message>W20111024-174313.cpp:315: error: invalid conversion from ‘const char*’ to ‘char*’</message></Warning>\\n<Warning><source>eclcc</source><code>0</code><message>W20111024-174313.cpp:315: error: initializing argument 2 of ‘unsigned int user2(size32_t, char*, size32_t, char*)’</message></Warning>\\n<Warning><source>eclcc</source><code>0</code><message>W20111024-174313.cpp:315: warning: deprecated conversion from string constant to ‘char*’</message></Warning>\\n<Warning><source>eclcc</source><code>0</code><message>W20111024-174313.cpp:315: error: invalid conversion from ‘const char*’ to ‘char*’</message></Warning>\\n<Warning><source>eclcc</source><code>0</code><message>W20111024-174313.cpp:315: error: initializing argument 5 of ‘void user1(bool&, size32_t&, void*&, size32_t, char*, size32_t, char*, long long unsigned int)’</message></Warning>\\n<Warning><source>eclcc</source><code>0</code><message>W20111024-174313.cpp:315: warning: deprecated conversion from string constant to ‘char*’</message></Warning>\\n<Warning><source>eclcc</source><code>0</code><message></message></Warning>\\n<Warning><source>eclcc</source><code>0</code><message>--------- end compiler output -----------</message></Warning>\\n\\nrdata := DATASET(rr_in_dir+'::'+rr_file, record_defs.ResourceRecord, \\n CSV(MAXLENGTH(8192), QUOTE(['\\\\'']), SEPARATOR(['\\\\t', ' '])));\\n\\n<Error><source>eclcc</source><code>3000</code><message> Compile/Link failed for W20111024-174552 (see 'eclcc.log' for details)</message></Error>\\n<Warning><source>eclcc</source><code>0</code><message></message></Warning>\\n<Warning><source>eclcc</source><code>0</code><message>---------- compiler output --------------</message></Warning>\\n<Warning><source>eclcc</source><code>0</code><message>W20111024-174552.cpp: In member function ‘virtual size32_t cAc2::transform(ARowBuilder&, unsigned int*, const char**)’:</message></Warning>\\n<Warning><source>eclcc</source><code>0</code><message>W20111024-174552.cpp:315: error: invalid conversion from ‘const char*’ to ‘char*’</message></Warning>\\n<Warning><source>eclcc</source><code>0</code><message>W20111024-174552.cpp:315: error: initializing argument 2 of ‘unsigned int user2(size32_t, char*, size32_t, char*)’</message></Warning>\\n<Warning><source>eclcc</source><code>0</code><message>W20111024-174552.cpp:315: warning: deprecated conversion from string constant to ‘char*’</message></Warning>\\n<Warning><source>eclcc</source><code>0</code><message>W20111024-174552.cpp:315: error: invalid conversion from ‘const char*’ to ‘char*’</message></Warning>\\n<Warning><source>eclcc</source><code>0</code><message>W20111024-174552.cpp:315: error: initializing argument 5 of ‘void user1(bool&, size32_t&, void*&, size32_t, char*, size32_t, char*, long long unsigned int)’</message></Warning>\\n<Warning><source>eclcc</source><code>0</code><message>W20111024-174552.cpp:315: warning: deprecated conversion from string constant to ‘char*’</message></Warning>\\n<Warning><source>eclcc</source><code>0</code><message></message></Warning>\\n<Warning><source>eclcc</source><code>0</code><message>--------- end compiler output -----------</message></Warning>\", \"post_time\": \"2011-10-24 17:46:15\" },\n\t{ \"post_id\": 543, \"topic_id\": 145, \"forum_id\": 8, \"post_subject\": \"Re: Maximizing thor's performance\", \"username\": \"jsmith\", \"post_text\": \"The file meta data is stored in Dali, if you reconfigure the location of that component, then you will lose that data, or you will need manually migrate dali's meta data to the new node.\\n\\nBut assuming Dali stays put, and if you you reconfigure other components after ensuring all components are stopped first, you should be fine.\\n\\nHowever, any existing data on the thor cluster(s), will still reside across the original nodes, which is also fine, but means if reading those files, you may want to DISTRIBUTE them early on in a query.\", \"post_time\": \"2011-10-26 15:28:17\" },\n\t{ \"post_id\": 542, \"topic_id\": 145, \"forum_id\": 8, \"post_subject\": \"Re: Maximizing thor's performance\", \"username\": \"nvasil\", \"post_text\": \"Thanks a lot.\\n\\nNow if I have a 5 node cluster with 2 slaves and I shut it down. Then I change the configuration to 4 slaves and restart it. Will everything be ok?\", \"post_time\": \"2011-10-26 14:46:43\" },\n\t{ \"post_id\": 541, \"topic_id\": 145, \"forum_id\": 8, \"post_subject\": \"Re: Maximizing thor's performance\", \"username\": \"richardkchapman\", \"post_text\": \"I would probably put all my system services and the thormaster on node 1 and create 4 thor slaves on the other 4 nodes.\\n\\nIt _might_ be faster with thor slaves on all 5 nodes, but you'd be in danger of overloading the node that had all the system servers on.\", \"post_time\": \"2011-10-26 14:43:32\" },\n\t{ \"post_id\": 540, \"topic_id\": 145, \"forum_id\": 8, \"post_subject\": \"Re: Maximizing thor's performance\", \"username\": \"nvasil\", \"post_text\": \"Thanks for the response. \\nI just run the configuration tool and chose the default values. So if I have 5 nodes and I am the unique user, I will not spray files while submitting a query, in fact I will submit one query at a time, what is the best configuration so that I maximize the performance?\", \"post_time\": \"2011-10-26 14:37:39\" },\n\t{ \"post_id\": 539, \"topic_id\": 145, \"forum_id\": 8, \"post_subject\": \"Re: Maximizing thor's performance\", \"username\": \"richardkchapman\", \"post_text\": \"There's two possibilities here:\\n\\n1. Your thor has two slaves, and the other 3 nodes in your system were used for other components of the HPCC system (dali, esp, eclccserver etc). When you have a large system it's appropriate to give each of these services its own node, but on a smaller system it may be wasteful.\\n\\n2. Your data is skewed so that the distribute only placed rows onto two slaves even though there are 5 slaves on the thor. I don't know how you did the distribute but if for example you did DISTRIBUTE(mydataset, myfield) where myfield had very few distinct values you can get a skew like this. However since you say that the initial spray only put data on 2 nodes it sounds more likely that it's the first case...\\n\\nHow did you do the configuration - using the wizard mode? And what version of the platform are you using?\", \"post_time\": \"2011-10-26 14:31:00\" },\n\t{ \"post_id\": 535, \"topic_id\": 145, \"forum_id\": 8, \"post_subject\": \"Maximizing thor's performance\", \"username\": \"nvasil\", \"post_text\": \"I am using a thor cluster with 5 nodes using the default configuration. I noticed that when I spray a file (big one around 4GB) it only uses 2 nodes. Also even if I distribute it, the computations are happening on two nodes only. I suspect that the configuration decides to automatically allocate only 2 slaves and keep the other 3 for other tasks.\\n\\nSo suppose that I am the only user of thor when I want maximum utilization of nodes and CPU, how should I configure it?\", \"post_time\": \"2011-10-25 20:13:14\" },\n\t{ \"post_id\": 547, \"topic_id\": 146, \"forum_id\": 8, \"post_subject\": \"Re: Installing thor on a shared cluster\", \"username\": \"jsmith\", \"post_text\": \"Actually, thor's upper memory limit (per slave) is governed by globalMemorySize.\\nlargeMemSize is mostly a legacy setting, but should be set at approximately 75% of the globalMemorySize setting.\\n\\nYou could use ulimit, but the processes would only be aware of it when the limit was hit I think, so keep things running smoothly, you'll want to use the settings above.\", \"post_time\": \"2011-10-27 18:46:04\" },\n\t{ \"post_id\": 546, \"topic_id\": 146, \"forum_id\": 8, \"post_subject\": \"Re: Installing thor on a shared cluster\", \"username\": \"bforeman\", \"post_text\": \"Sure, THOR memory is set by the largememsize parameter in the config.\\n\\nYou could also use ulimit in linux.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2011-10-27 18:37:58\" },\n\t{ \"post_id\": 536, \"topic_id\": 146, \"forum_id\": 8, \"post_subject\": \"Installing thor on a shared cluster\", \"username\": \"nvasil\", \"post_text\": \"So here is the situation. We have a cluster that is using other systems. It runs cassandra and hadoop. Is it possible to limit the Thor cluster so that it doesn't use all the CPU/memory?\", \"post_time\": \"2011-10-25 20:16:11\" },\n\t{ \"post_id\": 575, \"topic_id\": 147, \"forum_id\": 8, \"post_subject\": \"Re: security on the cloud\", \"username\": \"ckaminski\", \"post_text\": \"Hi Nick,\\n\\nIt sounds like you are referring to a cluster you built in the Amazon AWS cloud. If so, then you have a second, less general but easier to implement, option through the AWS Management Console GUI.\\n\\nPorts opened for node-to-node communication can be limited to just the cluster. This is done by specifying the group-id for the source field in your security-group rule on Amazon. Group ID's can be made visible by clicking on the "show-hide" button in the right hand corner.\\n\\nFor locking down those ports that need to be opened to the outside world (for ESP and other things), you can limit those sources too. Again, you use the source field here. Instead of a Group ID, you would use Classless Inter-Domain Routing (CIDR) notation. \\n\\nWikipedia has a good article on CIDR notation:\\nhttp://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing\\n\\n-Charles\", \"post_time\": \"2011-11-04 17:00:06\" },\n\t{ \"post_id\": 562, \"topic_id\": 147, \"forum_id\": 8, \"post_subject\": \"Re: security on the cloud\", \"username\": \"flavio\", \"post_text\": \"Nick,\\n\\nWith regards to the open network ports and ESP web page, a simple set of iptables rules on the nodes can be a very effective method to prevent any access from outside the cluster and still allow you to manage the cluster from your workstation. \\n\\nAn example of such a list of rules could be:\\n\\n\\n# Flush all iptables, just in case\\n$ sudo iptables –F\\n \\n# Allow all the traffic into your loopback interface\\n$ sudo iptables –A INPUT –i lo –j ACCEPT \\n\\n# Accept established connections\\n$ sudo iptables –A INPUT –m conntrack --ctstate ESTABLISHED,RELATED –j ACCEPT \\n\\n# Replace “cluster_network/mask” with your own cluster IP network\\n$ sudo iptables –A INPUT –p tcp –s cluster_network/mask --dport 1024:65535 –J ACCEPT \\n\\n# You should really limit where you allow ssh from, but you get the idea\\n$ sudo iptables –A INPUT –p tcp --dport ssh –j ACCEPT \\n\\n# Replace “my_ip_address” with the IP address of your workstation\\n$ sudo iptables –A INPUT –p tcp –s my_ip_address --dport 7000:10000 –J ACCEPT \\n\\n# Block everything else coming to your nodes\\n$ sudo iptables –A INPUT –J DROP\\n
\\n\\nIf you are running Ubuntu you can use the following to save these iptables rules and execute them at start up. For other distributions, you will need to adapt these steps.\\n\\n\\n# To save your iptables to a file\\n$ sudo sh –c “iptables-save” > /etc/iptables.rules”\\n
\\n\\nIn order to run your newly created /etc/iptables.rules at startup, just include the following entry into your /etc/network/interfaces file, in the scope for your eth0 interface:\\n\\n\\npre-up sudo iptables-restore < /etc/iptables.rules\\n
\\n\\nPlease let me know if you have any problems.\\n\\nOn a related topic, for a more granular user based security model in general, we are planning to include our LDAP/Kerberos/AD based security, currently available only in our Enterprise Edition, into an upcoming version of the Community Edition (possibly around 3.6 or 3.8).\\n\\nThanks,\\n\\nFlavio\", \"post_time\": \"2011-10-31 15:51:04\" },\n\t{ \"post_id\": 537, \"topic_id\": 147, \"forum_id\": 8, \"post_subject\": \"security on the cloud\", \"username\": \"nvasil\", \"post_text\": \"As far as I know it is easy to protect thor nodes on the cluster so that authorized users can ssh to the machines. I see two security loopholes. The first one is that there are many ports open so that the nodes can communicate. I don't know but I suspect that the configuration might allow connections to these ports only from the other hpcc nodes, but I am not sure about it. The other problem is that the esp that runs the web-interface is also open. So if somebody knows the url he/she can have access to the thor cluster.\\n\\nAre there any solutions about these issues?\", \"post_time\": \"2011-10-26 01:37:50\" },\n\t{ \"post_id\": 567, \"topic_id\": 148, \"forum_id\": 8, \"post_subject\": \"Re: MP link closed exception\", \"username\": \"jsmith\", \"post_text\": \"Ok, so that sounds like you've build a package from git sources post 3.2.0 release.\\nBut it looks like Thor is still on 3.2.0 given the message in the error itself.\\n\\nThe regression I mention are fixed post 3.2.0, in 3.2.2. \\nCan you download 3.2.2 from the portal and ensure that your system is upgraded and try your query again?\", \"post_time\": \"2011-10-31 17:23:42\" },\n\t{ \"post_id\": 563, \"topic_id\": 148, \"forum_id\": 8, \"post_subject\": \"Re: MP link closed exception\", \"username\": \"aintnomyth\", \"post_text\": \"The ECL watch page is showing "community_3.3.0-1trunk" \\n\\nThe error messages show "...Internal Error at /var/jenkins/workspace/Release-3.2.0/src/thorlcr..." if that matters.\", \"post_time\": \"2011-10-31 16:17:01\" },\n\t{ \"post_id\": 561, \"topic_id\": 148, \"forum_id\": 8, \"post_subject\": \"Re: MP link closed exception\", \"username\": \"jsmith\", \"post_text\": \"Which version of the platform does this occur in?\\nThere were 2 separate regressions that effected splitter, which sounds like they may be implicated here..\", \"post_time\": \"2011-10-31 15:27:42\" },\n\t{ \"post_id\": 557, \"topic_id\": 148, \"forum_id\": 8, \"post_subject\": \"Re: MP link closed exception\", \"username\": \"aintnomyth\", \"post_text\": \"So far the only sure way to avoid the issue has been to have a single OUTPUT attribute. Any other ideas for a workaround?\", \"post_time\": \"2011-10-31 14:53:21\" },\n\t{ \"post_id\": 551, \"topic_id\": 148, \"forum_id\": 8, \"post_subject\": \"Re: MP link closed exception\", \"username\": \"aintnomyth\", \"post_text\": \"...and here is the rest of the log:\\n\\n000001A6 2011-10-28 09:19:56 22509 22811 CONNECTING (id=37, idx=0) to (id=39, idx=0) - activity(split, 39)\\n000001A7 2011-10-28 09:19:56 22509 22811 CONNECTING (id=39, idx=0) to (id=40, idx=0) - activity(filter, 40)\\n000001A8 2011-10-28 09:19:56 22509 22811 CONNECTING (id=40, idx=0) to (id=41, idx=0) - activity(countproject, 41)\\n000001A9 2011-10-28 09:19:56 22509 22811 CONNECTING (id=41, idx=0) to (id=42, idx=0) - activity(sort, 42)\\n000001AA 2011-10-28 09:19:56 22509 22811 CONNECTING (id=42, idx=0) to (id=43, idx=0) - activity(group, 43)\\n000001AB 2011-10-28 09:19:56 22509 22811 CONNECTING (id=43, idx=0) to (id=44, idx=0) - activity(iterate, 44)\\n000001AC 2011-10-28 09:19:56 22509 22811 CONNECTING (id=44, idx=0) to (id=45, idx=0) - activity(sort, 45)\\n000001AD 2011-10-28 09:19:56 22509 22811 CONNECTING (id=45, idx=0) to (id=46, idx=0) - activity(iterate, 46)\\n000001AE 2011-10-28 09:19:56 22509 22811 CONNECTING (id=46, idx=0) to (id=47, idx=0) - activity(project, 47)\\n000001AF 2011-10-28 09:19:56 22509 22811 HASHDISTRIB: createHashDistributeSlave - activity(hashdistribute, 48) [ecl=DISTRIBUTE(HASH32(sub_client_cd, policy_holder_id, suffix_id));]\\n000001B0 2011-10-28 09:19:56 22509 22811 CONNECTING (id=47, idx=0) to (id=48, idx=0) - activity(hashdistribute, 48)\\n000001B1 2011-10-28 09:19:56 22509 22811 CONNECTING (id=48, idx=0) to (id=49, idx=0) - activity(firstn, 49)\\n000001B2 2011-10-28 09:19:56 22509 22811 CONNECTING (id=49, idx=0) to (id=50, idx=0) - activity(workunitwrite, 50)\\n000001B3 2011-10-28 09:19:56 22509 22811 CONNECTING (id=39, idx=1) to (id=51, idx=0) - activity(diskwrite, 51)\\n000001B4 2011-10-28 09:19:56 22509 22811 deserializeMPTag: tag = 65547\\n000001B5 2011-10-28 09:19:56 22509 22811 deserializeMPTag: tag = 65556\\n000001B6 2011-10-28 09:19:56 22509 22811 deserializeMPTag: tag = 65552\\n000001B7 2011-10-28 09:19:56 22509 22811 HASHDISTRIB: init tag 65552 - activity(hashdistribute, 48)\\n000001B8 2011-10-28 09:19:56 22509 22811 CLocalSortSlaveActivity::init - activity(sort, 42)\\n000001B9 2011-10-28 09:19:56 22509 22811 deserializeMPTag: tag = 65554\\n000001BA 2011-10-28 09:19:56 22509 22822 Starting input - activity(workunitwrite, 50)\\n000001BB 2011-10-28 09:19:56 22509 22811 Watchdog: Start Job 36\\n000001BC 2011-10-28 09:19:56 22509 22826 registerTmpFile name=/var/lib/HPCCSystems/mythor/temp/c1__w20111028-091953-4._4_of_25, usageCount=1\\n000001BD 2011-10-28 09:19:56 22509 22826 handling fname : /var/lib/HPCCSystems/mythor/temp/c1__w20111028-091953-4._4_of_25 - activity(diskwrite, 51)\\n000001BE 2011-10-28 09:19:56 22509 22826 Starting input - activity(diskwrite, 51)\\n000001BF 2011-10-28 09:19:56 22509 22826 Starting input - activity(split, 39)\\n000001C0 2011-10-28 09:19:56 22509 22826 csvread[part=3]: reading physical file '/var/lib/HPCCSystems/hpcc-data/thor/thor/[ My Co ]/tst/tst1f0/in/membership._4_of_25' (logical file = ~thor::[ My Co ]::tst::tst1f0::in::membership) - activity(csvread, 37)\\n000001C1 2011-10-28 09:19:56 22509 22826 csvread[part=3]: Base offset to 52963530 - activity(csvread, 37)\\n000001C2 2011-10-28 09:19:56 22509 22826 ITDL starting for output 0 - activity(csvread, 37)\\n000001C3 2011-10-28 09:19:56 22509 22826 Spill is 'balanced' - activity(split, 39)\\n000001C5 2011-10-28 09:19:56 22509 22828 Starting input - activity(firstn, 49)\\n000001C6 2011-10-28 09:19:56 22509 22828 Starting input - activity(hashdistribute, 48)\\n000001C7 2011-10-28 09:19:56 22509 22828 Starting input - activity(project, 47)\\n000001C8 2011-10-28 09:19:56 22509 22828 Starting input - activity(iterate, 46)\\n000001C9 2011-10-28 09:19:56 22509 22828 Starting input - activity(sort, 45)\\n000001CA 2011-10-28 09:19:56 22509 22828 Starting input - activity(iterate, 44)\\n000001CB 2011-10-28 09:19:56 22509 22828 GROUP: is local - activity(group, 43)\\n000001CC 2011-10-28 09:19:56 22509 22828 Starting input - activity(group, 43)\\n000001CD 2011-10-28 09:19:56 22509 22828 ITDL starting for output 0 - activity(sort, 42)\\n000001CE 2011-10-28 09:19:56 22509 22828 Starting input - activity(sort, 42)\\n000001CF 2011-10-28 09:19:56 22509 22828 COUNTPROJECT: Is Global - activity(countproject, 41)\\n000001D0 2011-10-28 09:19:56 22509 22828 Starting input - activity(countproject, 41)\\n000001D1 2011-10-28 09:19:56 22509 22828 Starting input - activity(filter, 40)\\n000001D2 2011-10-28 09:19:56 22509 22828 ITDL starting for output 0 - activity(split, 39)\\n000001D3 2011-10-28 09:19:56 22509 22828 ITDL starting for output 0 - activity(filter, 40)\\n000001D4 2011-10-28 09:19:56 22509 22828 ITDL starting for output 0 - activity(countproject, 41)\\n000001C4 2011-10-28 09:19:56 22509 22826 ITDL starting for output 1 - activity(split, 39)\\n000001D5 2011-10-28 09:19:56 22509 22833 Record size = 480 - activity(csvread, 37)\\n000001D6 2011-10-28 09:19:56 22509 22834 Record size = 480 - activity(split, 39)\\n000001D7 2011-10-28 09:19:56 22509 22834 Record size = 480 - activity(filter, 40)\\n000001D8 2011-10-28 09:19:56 22509 22826 Writing to file: /var/lib/HPCCSystems/mythor/temp/thtmp22509_3__partial.tmp - activity(diskwrite, 51)\\n000001D9 2011-10-28 09:19:56 22509 22826 Performing row compression on output file: /var/lib/HPCCSystems/mythor/temp/c1__w20111028-091953-4._4_of_25 - activity(diskwrite, 51)\\n000001DA 2011-10-28 09:19:56 22509 22826 Created output stream for /var/lib/HPCCSystems/mythor/temp/c1__w20111028-091953-4._4_of_25 - activity(diskwrite, 51)\\n000001DB 2011-10-28 09:19:56 22509 22826 Ungrouped - activity(diskwrite, 51)\\n000001DC 2011-10-28 09:19:56 22509 22826 Record size = 480 - activity(split, 39)\\n000001DD 2011-10-28 09:19:59 22509 22833 Splitter activity, hit end of input @ rec = 120009 - activity(split, 39)\\n000001DE 2011-10-28 09:19:59 22509 22826 Wrote 120009 records - activity(diskwrite, 51)\\n000001DF 2011-10-28 09:19:59 22509 22826 Stopping input for - activity(diskwrite, 51)\\n000001E0 2011-10-28 09:19:59 22509 22826 ITDL output 1 stopped, count was 120009 - activity(split, 39)\\n000001E1 2011-10-28 09:19:59 22509 22834 count is 120009 - activity(countproject, 41)\\n000001E2 2011-10-28 09:19:59 22509 22828 Record size = 50 - activity(countproject, 41)\\n000001E3 2011-10-28 09:19:59 22509 22834 Stopping input for - activity(filter, 40)\\n000001E4 2011-10-28 09:19:59 22509 22834 Stopping input for - activity(split, 39)\\n000001E5 2011-10-28 09:19:59 22509 22834 ITDL output 0 stopped, count was 120009 - activity(csvread, 37)\\n000001E6 2011-10-28 09:19:59 22509 22834 ITDL output 0 stopped, count was 120009 - activity(split, 39)\\n000001E7 2011-10-28 09:19:59 22509 22834 ITDL output 0 stopped, count was 120009 - activity(filter, 40)\\n000001E8 2011-10-28 09:19:59 22509 22828 ITDL starting for output 0 - activity(group, 43)\\n000001E9 2011-10-28 09:19:59 22509 22828 Record size = 50 - activity(sort, 42)\\n000001EA 2011-10-28 09:19:59 22509 22828 ITDL starting for output 0 - activity(iterate, 44)\\n000001EB 2011-10-28 09:19:59 22509 22828 ITDL starting for output 268435457 - activity(sort, 45)\\n000001EC 2011-10-28 09:19:59 22509 22828 ITDL starting for output 0 - activity(iterate, 46)\\n000001ED 2011-10-28 09:19:59 22509 22828 ITDL starting for output 0 - activity(project, 47)\\n000001EE 2011-10-28 09:19:59 22509 22828 HASHDISTRIB: connect - activity(hashdistribute, 48)\\n000001EF 2011-10-28 09:19:59 22509 22883 Read loop start - activity(hashdistribute, 48)\\n000001F0 2011-10-28 09:19:59 22509 22828 HASHDISTRIB: connected - activity(hashdistribute, 48)\\n000001F1 2011-10-28 09:19:59 22509 22828 ITDL starting for output 0 - activity(hashdistribute, 48)\\n000001F2 2011-10-28 09:19:59 22509 22828 ITDL starting for output 0 - activity(firstn, 49)\\n000001F3 2011-10-28 09:19:59 22509 22828 Starting input - activity(firstn, 49)\\n000001F4 2011-10-28 09:19:59 22509 22884 Distribute send start - activity(hashdistribute, 48)\\n000001F5 2011-10-28 09:19:59 22509 22884 Record size = 50 - activity(group, 43)\\n000001F6 2011-10-28 09:19:59 22509 22884 Record size = 50 - activity(iterate, 44)\\n000001F7 2011-10-28 09:19:59 22509 22884 Record size = 50 - activity(sort, 45)\\n000001F8 2011-10-28 09:19:59 22509 22884 Record size = 50 - activity(iterate, 46)\\n000001F9 2011-10-28 09:19:59 22509 22884 Record size = 45 - activity(project, 47)\\n000001FA 2011-10-28 09:19:59 22509 22884 Distribute send finishing - activity(hashdistribute, 48)\\n000001FB 2011-10-28 09:19:59 22509 22822 WORKUNITWRITE: processing first block - activity(workunitwrite, 50)\\n000001FC 2011-10-28 09:19:59 22509 22883 HDIST: finished slave 1, 23 left - activity(hashdistribute, 48)\\n000001FD 2011-10-28 09:19:59 22509 22883 HDIST: finished slave 14, 22 left - activity(hashdistribute, 48)\\n000001FE 2011-10-28 09:19:59 22509 22883 HDIST: finished slave 13, 21 left - activity(hashdistribute, 48)\\n000001FF 2011-10-28 09:19:59 22509 22883 HDIST: finished slave 5, 20 left - activity(hashdistribute, 48)\\n00000200 2011-10-28 09:19:59 22509 22883 HDIST: finished slave 20, 19 left - activity(hashdistribute, 48)\\n00000201 2011-10-28 09:19:59 22509 22883 HDIST: finished slave 2, 18 left - activity(hashdistribute, 48)\\n00000202 2011-10-28 09:19:59 22509 22883 HDIST: finished slave 3, 17 left - activity(hashdistribute, 48)\\n00000203 2011-10-28 09:19:59 22509 22883 HDIST: finished slave 12, 16 left - activity(hashdistribute, 48)\\n00000204 2011-10-28 09:19:59 22509 22883 HDIST: finished slave 19, 15 left - activity(hashdistribute, 48)\\n00000205 2011-10-28 09:19:59 22509 22883 HDIST: finished slave 16, 14 left - activity(hashdistribute, 48)\\n00000206 2011-10-28 09:19:59 22509 22883 HDIST: finished slave 10, 13 left - activity(hashdistribute, 48)\\n00000207 2011-10-28 09:19:59 22509 22883 HDIST: finished slave 9, 12 left - activity(hashdistribute, 48)\\n00000208 2011-10-28 09:19:59 22509 22883 HDIST: finished slave 11, 11 left - activity(hashdistribute, 48)\\n00000209 2011-10-28 09:19:59 22509 22883 HDIST: finished slave 8, 10 left - activity(hashdistribute, 48)\\n0000020A 2011-10-28 09:19:59 22509 22883 HDIST: finished slave 6, 9 left - activity(hashdistribute, 48)\\n0000020B 2011-10-28 09:19:59 22509 22883 HDIST: finished slave 15, 8 left - activity(hashdistribute, 48)\\n0000020C 2011-10-28 09:19:59 22509 22883 HDIST: finished slave 24, 7 left - activity(hashdistribute, 48)\\n0000020D 2011-10-28 09:19:59 22509 22883 HDIST: finished slave 25, 6 left - activity(hashdistribute, 48)\\n0000020E 2011-10-28 09:19:59 22509 22883 HDIST: finished slave 22, 5 left - activity(hashdistribute, 48)\\n0000020F 2011-10-28 09:19:59 22509 22883 HDIST: finished slave 17, 4 left - activity(hashdistribute, 48)\\n00000210 2011-10-28 09:19:59 22509 22883 HDIST: finished slave 23, 3 left - activity(hashdistribute, 48)\\n00000211 2011-10-28 09:19:59 22509 22884 HDIST: Send loop finished 1 rows sent - activity(hashdistribute, 48)\\n00000212 2011-10-28 09:19:59 22509 22884 Stopping input for - activity(hashdistribute, 48)\\n00000213 2011-10-28 09:19:59 22509 22884 Stopping input for - activity(project, 47)\\n00000214 2011-10-28 09:19:59 22509 22884 Stopping input for - activity(iterate, 46)\\n00000215 2011-10-28 09:19:59 22509 22884 Stopping input for - activity(sort, 45)\\n00000216 2011-10-28 09:19:59 22509 22884 Stopping input for - activity(iterate, 44)\\n00000217 2011-10-28 09:19:59 22509 22884 Stopping input for - activity(group, 43)\\n00000218 2011-10-28 09:19:59 22509 22884 Stopping input for - activity(sort, 42)\\n00000219 2011-10-28 09:19:59 22509 22884 Stopping input for - activity(countproject, 41)\\n0000021A 2011-10-28 09:19:59 22509 22884 ITDL output 0 stopped, count was 120009 - activity(countproject, 41)\\n0000021B 2011-10-28 09:19:59 22509 22884 ITDL output 0 stopped, count was 2 - activity(sort, 42)\\n0000021C 2011-10-28 09:19:59 22509 22884 ITDL output 0 stopped, count was 1 - activity(group, 43)\\n0000021D 2011-10-28 09:19:59 22509 22884 ITDL output 0 stopped, count was 1 - activity(iterate, 44)\\n0000021E 2011-10-28 09:19:59 22509 22884 ITDL output 268435457 stopped, count was 1 - activity(sort, 45)\\n0000021F 2011-10-28 09:19:59 22509 22884 ITDL output 0 stopped, count was 1 - activity(iterate, 46)\\n00000220 2011-10-28 09:19:59 22509 22884 ITDL output 0 stopped, count was 1 - activity(project, 47)\\n00000221 2011-10-28 09:19:59 22509 22883 HDIST: finished slave 7, 2 left - activity(hashdistribute, 48)\\n00000222 2011-10-28 09:19:59 22509 22883 HDIST: finished slave 21, 1 left - activity(hashdistribute, 48)\\n00000223 2011-10-28 09:19:59 22509 22883 HDIST: finished slave 18, 0 left - activity(hashdistribute, 48)\\n00000224 2011-10-28 09:19:59 22509 22883 HDIST: Read loop done - activity(hashdistribute, 48)\\n00000225 2011-10-28 09:19:59 22509 22828 FIRSTN: Record limit is 99 0 - activity(firstn, 49)\\n00000226 2011-10-28 09:19:59 22509 22885 FIRSTN: Read 0 records, left to skip=0 - activity(firstn, 49)\\n00000227 2011-10-28 09:19:59 22509 22885 FIRSTN: maximum row count 0 - activity(firstn, 49)\\n00000228 2011-10-28 09:19:59 22509 22885 HASHDISTRIB: stopping - activity(hashdistribute, 48)\\n0000022A 2011-10-28 09:19:59 22509 22885 ITDL output 0 stopped, count was 0 - activity(hashdistribute, 48)\\n00000229 2011-10-28 09:19:59 22509 22828 Stopping input for - activity(firstn, 49)\\n0000022B 2011-10-28 09:19:59 22509 22828 ITDL output 0 stopped, count was 0 - activity(firstn, 49)\\n0000022C 2011-10-28 09:19:59 22509 22822 Stopping input for - activity(workunitwrite, 50)\\n0000022D 2011-10-28 09:19:59 22509 22811 End of sub-graph - graph(graph5, 36)\\n0000022E 2011-10-28 09:19:59 22509 22509 Entering getDone - graph(graph5, 36)\\n0000022F 2011-10-28 09:19:59 22509 22509 Watchdog: Stop Job 36\\n00000230 2011-10-28 09:19:59 22509 22509 Leaving getDone - graph(graph5, 36)\\n00000231 2011-10-28 09:19:59 22509 22811 CLocalSortSlaveActivity::kill - activity(sort, 42)\\n00000232 2011-10-28 09:19:59 22509 22811 HASHDISTRIB: kill - activity(hashdistribute, 48)\\n00000233 2011-10-28 09:19:59 22509 22811 Socket statistics : connects=0\\nconnecttime=0us\\nfailedconnects=0\\nfailedconnecttime=0us\\nreads=104\\nreadtime=419us\\nreadsize=459665 bytes\\nwrites=78\\nwritetime=763us\\nwritesize=3083 bytes\\nactivesockets=28\\nnumblockrecvs=0\\nnumblocksends=0\\nblockrecvsize=0\\nblocksendsize=0\\nblockrecvtime=0\\nblocksendtime=0\\nlongestblocksend=0\\nlongestblocksize=0\\n - graph(graph5, 36)\\n00000234 2011-10-28 09:19:59 22509 22811 Graph Done - graph(graph5, 36)\\n00000235 2011-10-28 09:19:59 22509 22811 PU= 68% MU= 3% MAL=2357680 MMP=0 SBK=2357680 TOT=8644K RAM=821452K SWP=0K - graph(graph5, 36)\\n00000236 2011-10-28 09:19:59 22509 22811 CGraphExecutor running=0, waitingToRun=0, dependentsWaiting=0\\n00000237 2011-10-28 09:19:59 22509 22509 GraphInit: W20111028-091953-4graph5\\n00000238 2011-10-28 09:19:59 22509 22509 deserializeMPTag: tag = 65559\\n00000239 2011-10-28 09:19:59 22509 22509 deserializeMPTag: tag = 65539\\n0000023A 2011-10-28 09:19:59 22509 22509 deserializeMPTag: tag = 65549\\n0000023B 2011-10-28 09:19:59 22509 22509 deserializeMPTag: tag = 65544\\n0000023C 2011-10-28 09:19:59 22509 22509 Add: Launching graph thread for graphId=52\\n0000023D 2011-10-28 09:19:59 22509 22509 destroying ProcessSlaveActivity - activity(workunitwrite, 50)\\n0000023E 2011-10-28 09:19:59 22509 22509 ProcessSlaveActivity : joining process thread - activity(workunitwrite, 50)\\n0000023F 2011-10-28 09:19:59 22509 22509 AFTER ProcessSlaveActivity : joining process thread - activity(workunitwrite, 50)\\n00000240 2011-10-28 09:19:59 22509 22509 DESTROYED - activity(workunitwrite, 50)\\n00000241 2011-10-28 09:19:59 22509 22509 DESTROYED - activity(split, 39)\\n00000242 2011-10-28 09:19:59 22509 22509 destroying ProcessSlaveActivity - activity(diskwrite, 51)\\n00000243 2011-10-28 09:19:59 22509 22509 ProcessSlaveActivity : joining process thread - activity(diskwrite, 51)\\n00000244 2011-10-28 09:19:59 22509 22509 AFTER ProcessSlaveActivity : joining process thread - activity(diskwrite, 51)\\n00000245 2011-10-28 09:19:59 22509 22509 DESTROYED - activity(diskwrite, 51)\\n00000246 2011-10-28 09:19:59 22509 22509 DESTROYED - activity(csvread, 37)\\n00000247 2011-10-28 09:19:59 22509 22509 DESTROYED - activity(filter, 40)\\n00000248 2011-10-28 09:19:59 22509 22509 DESTROYED - activity(countproject, 41)\\n00000249 2011-10-28 09:19:59 22509 22509 DESTROYED - activity(sort, 42)\\n0000024A 2011-10-28 09:19:59 22509 22509 DESTROYED - activity(group, 43)\\n0000024B 2011-10-28 09:19:59 22509 22509 DESTROYED - activity(iterate, 44)\\n0000024C 2011-10-28 09:19:59 22509 22509 DESTROYED - activity(sort, 45)\\n0000024D 2011-10-28 09:19:59 22509 22509 DESTROYED - activity(iterate, 46)\\n0000024E 2011-10-28 09:19:59 22509 22509 DESTROYED - activity(project, 47)\\n0000024F 2011-10-28 09:19:59 22509 22509 DESTROYED - activity(hashdistribute, 48)\\n00000250 2011-10-28 09:19:59 22509 22509 DESTROYED - activity(firstn, 49)\\n00000251 2011-10-28 09:19:59 22509 22811 Running graph [global] : <graph>\\n <node id="53" label="Disk Read Spill">\\n <att name="definition" value="code\\\\[ My Prod ]\\\\poc\\\\DataLoad.ecl(196,3)"/>\\n <att name="name" value="resultds"/>\\n <att name="_kind" value="82"/>\\n <att name="ecl" value="DATASET('~spill::C1', RECORD string3 sub_client_cd; string32 policy_holder_id; string4 suffix_id; ... END, THOR, __compressed__); "/>\\n <att name="recordSize" value="480"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n <att name="_fileName" value="~spill::C1"/>\\n <att name="_isSpill" value="1"/>\\n <att name="spillReason" value="Resource limit spill: Heavyweight (2>1)"/>\\n </node>\\n <node id="54" label="Count Project">\\n <att name="definition" value="code\\\\[ My Prod ]\\\\ingest\\\\Linking.ecl(37,22)"/>\\n <att name="_kind" value="38"/>\\n <att name="ecl" value="PROJECT(keyxform(LEFT, COUNTER)); "/>\\n <att name="recordSize" value="507"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n </node>\\n <node id="55" label="Local Sort">\\n <att name="definition" value="code\\\\[ My Prod ]\\\\ingest\\\\Linking.ecl(37,4)"/>\\n <att name="name" value="tempsort"/>\\n <att name="_kind" value="3"/>\\n <att name="local" value="1"/>\\n <att name="ecl" value="SORT(sub_client_cd, policy_holder_id, suffix_id, local); "/>\\n <att name="recordSize" value="507"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n </node>\\n <node id="56" label="Local Group">\\n <att name="definition" value="code\\\\[ My Prod ]\\\\ingest\\\\Linking.ecl(38,4)"/>\\n <att name="name" value="tempgroup"/>\\n <att name="_kind" value="20"/>\\n <att name="local" value="1"/>\\n <att name="ecl" value="GROUP(sub_client_cd, policy_holder_id, suffix_id, local); "/>\\n <att name="recordSize" value="507"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n </node>\\n <node id="57" label="Grouped Iterate">\\n <att name="definition" value="code\\\\[ My Prod ]\\\\ingest\\\\Linking.ecl(50,4)"/>\\n <att name="name" value="dedupresult"/>\\n <att name="_kind" value="9"/>\\n <att name="grouped" value="1"/>\\n <att name="ecl" value="ITERATE(dedupxform(LEFT, RIGHT, COUNTER)); "/>\\n <att name="recordSize" value="507"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n </node>\\n <node id="58" label="Grouped Sort">\\n <att name="definition" value="code\\\\[ My Prod ]\\\\ingest\\\\Linking.ecl(51,4)"/>\\n <att name="name" value="revsort"/>\\n <att name="_kind" value="3"/>\\n <att name="grouped" value="1"/>\\n <att name="ecl" value="SORT(sub_client_cd, policy_holder_id, suffix_id, -duporder); "/>\\n <att name="recordSize" value="507"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n </node>\\n <node id="59" label="Grouped Iterate">\\n <att name="definition" value="code\\\\[ My Prod ]\\\\ingest\\\\Linking.ecl(58,4)"/>\\n <att name="name" value="result"/>\\n <att name="_kind" value="9"/>\\n <att name="grouped" value="1"/>\\n <att name="ecl" value="ITERATE(dedupcountxform(LEFT, RIGHT, COUNTER)); "/>\\n <att name="recordSize" value="507"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n </node>\\n <node id="60" label="Firstn">\\n <att name="_kind" value="12"/>\\n <att name="ecl" value="CHOOSEN(100); "/>\\n <att name="recordSize" value="507"/>\\n <att name="recordCount" value="0..100[group]"/>\\n </node>\\n <node id="61" label="Output Result #3">\\n <att name="definition" value="C:\\\\Users\\\\BROWNJ~1\\\\AppData\\\\Local\\\\Temp\\\\TFR56C2.tmp(5,1)"/>\\n <att name="_kind" value="21"/>\\n <att name="ecl" value="OUTPUT(..., workunit); "/>\\n <att name="recordSize" value="507"/>\\n </node>\\n <att name="rootGraph" value="1"/>\\n <edge id="53_0" source="53" target="54"/>\\n <edge id="54_0" source="54" target="55"/>\\n <edge id="55_0" source="55" target="56"/>\\n <edge id="56_0" source="56" target="57"/>\\n <edge id="57_0" source="57" target="58"/>\\n <edge id="58_0" source="58" target="59"/>\\n <edge id="59_0" source="59" target="60"/>\\n <edge id="60_0" source="60" target="61"/>\\n </graph>\\n - graph(graph5, 52)\\n00000252 2011-10-28 09:19:59 22509 22811 CONNECTING (id=53, idx=0) to (id=54, idx=0) - activity(countproject, 54)\\n00000253 2011-10-28 09:19:59 22509 22811 CONNECTING (id=54, idx=0) to (id=55, idx=0) - activity(sort, 55)\\n00000254 2011-10-28 09:19:59 22509 22811 CONNECTING (id=55, idx=0) to (id=56, idx=0) - activity(group, 56)\\n00000255 2011-10-28 09:19:59 22509 22811 CONNECTING (id=56, idx=0) to (id=57, idx=0) - activity(iterate, 57)\\n00000256 2011-10-28 09:19:59 22509 22811 CONNECTING (id=57, idx=0) to (id=58, idx=0) - activity(sort, 58)\\n00000257 2011-10-28 09:19:59 22509 22811 CONNECTING (id=58, idx=0) to (id=59, idx=0) - activity(iterate, 59)\\n00000258 2011-10-28 09:19:59 22509 22811 CONNECTING (id=59, idx=0) to (id=60, idx=0) - activity(firstn, 60)\\n00000259 2011-10-28 09:19:59 22509 22811 CONNECTING (id=60, idx=0) to (id=61, idx=0) - activity(workunitwrite, 61)\\n0000025A 2011-10-28 09:19:59 22509 22811 deserializeMPTag: tag = 65558\\n0000025B 2011-10-28 09:19:59 22509 22811 deserializeMPTag: tag = 65557\\n0000025C 2011-10-28 09:19:59 22509 22811 CLocalSortSlaveActivity::init - activity(sort, 55)\\n0000025D 2011-10-28 09:19:59 22509 22811 deserializeMPTag: tag = 65545\\n0000025E 2011-10-28 09:19:59 22509 22811 Watchdog: Start Job 52\\n0000025F 2011-10-28 09:19:59 22509 22895 DISKREAD: Number of rows to read: 120009 - activity(diskread, 53)\\n00000260 2011-10-28 09:19:59 22509 22895 Starting input - activity(workunitwrite, 61)\\n00000261 2011-10-28 09:19:59 22509 22898 Starting input - activity(firstn, 60)\\n00000262 2011-10-28 09:19:59 22509 22898 Starting input - activity(iterate, 59)\\n00000263 2011-10-28 09:19:59 22509 22898 Starting input - activity(sort, 58)\\n00000264 2011-10-28 09:19:59 22509 22898 Starting input - activity(iterate, 57)\\n00000265 2011-10-28 09:19:59 22509 22898 GROUP: is local - activity(group, 56)\\n00000266 2011-10-28 09:19:59 22509 22898 Starting input - activity(group, 56)\\n00000267 2011-10-28 09:19:59 22509 22898 ITDL starting for output 0 - activity(sort, 55)\\n00000268 2011-10-28 09:19:59 22509 22898 Starting input - activity(sort, 55)\\n00000269 2011-10-28 09:19:59 22509 22898 COUNTPROJECT: Is Global - activity(countproject, 54)\\n0000026A 2011-10-28 09:19:59 22509 22898 Starting input - activity(countproject, 54)\\n0000026B 2011-10-28 09:19:59 22509 22898 diskread[part=3]: reading physical file '/var/lib/HPCCSystems/mythor/temp/c1__w20111028-091953-4._4_of_25' (logical file = ~spill::C1) - activity(diskread, 53)\\n0000026C 2011-10-28 09:19:59 22509 22898 diskread[part=3]: Base offset to 173099520 - activity(diskread, 53)\\n0000026D 2011-10-28 09:19:59 22509 22898 Reading block compressed file: /var/lib/HPCCSystems/mythor/temp/c1__w20111028-091953-4._4_of_25 - activity(diskread, 53)\\n0000026E 2011-10-28 09:19:59 22509 22898 diskread[part=3]: fixed (/var/lib/HPCCSystems/mythor/temp/c1__w20111028-091953-4._4_of_25) - activity(diskread, 53)\\n0000026F 2011-10-28 09:19:59 22509 22898 ITDL starting for output 0 - activity(diskread, 53)\\n00000270 2011-10-28 09:19:59 22509 22898 DISKREAD: Number of rows to read: 120009 - activity(diskread, 53)\\n00000271 2011-10-28 09:19:59 22509 22898 ITDL starting for output 0 - activity(countproject, 54)\\n00000272 2011-10-28 09:19:59 22509 22898 COUNTPROJECT: row count pre-known to be 120009 - activity(countproject, 54)\\n00000273 2011-10-28 09:19:59 22509 22903 Record size = 480 - activity(diskread, 53)\\n00000274 2011-10-28 09:19:59 22509 22898 Record size = 507 - activity(countproject, 54)\\n00000275 2011-10-28 09:20:00 22509 22903 ITDL output 0 stopped, count was 120009 - activity(diskread, 53)\\n00000276 2011-10-28 09:20:00 22509 22898 ITDL starting for output 0 - activity(group, 56)\\n00000277 2011-10-28 09:20:00 22509 22898 Record size = 507 - activity(sort, 55)\\n00000278 2011-10-28 09:20:00 22509 22898 ITDL starting for output 0 - activity(iterate, 57)\\n00000279 2011-10-28 09:20:00 22509 22898 ITDL starting for output 11156 - activity(sort, 58)\\n0000027A 2011-10-28 09:20:00 22509 22898 ITDL starting for output 0 - activity(iterate, 59)\\n0000027B 2011-10-28 09:20:00 22509 22898 ITDL starting for output 0 - activity(firstn, 60)\\n0000027C 2011-10-28 09:20:00 22509 22898 DISKREAD: Number of rows to read: 120009 - activity(diskread, 53)\\n0000027D 2011-10-28 09:20:00 22509 22898 DISKREAD: Number of rows to read: 120009 - activity(diskread, 53)\\n0000027E 2011-10-28 09:20:00 22509 22898 DISKREAD: Number of rows to read: 120009 - activity(diskread, 53)\\n0000027F 2011-10-28 09:20:00 22509 22898 DISKREAD: Number of rows to read: 120009 - activity(diskread, 53)\\n00000280 2011-10-28 09:20:00 22509 22898 DISKREAD: Number of rows to read: 120009 - activity(diskread, 53)\\n00000281 2011-10-28 09:20:00 22509 22898 DISKREAD: Number of rows to read: 120009 - activity(diskread, 53)\\n00000282 2011-10-28 09:20:00 22509 22898 Starting input - activity(firstn, 60)\\n00000283 2011-10-28 09:20:00 22509 22926 Record size = 507 - activity(group, 56)\\n00000284 2011-10-28 09:20:00 22509 22926 Record size = 507 - activity(iterate, 57)\\n00000285 2011-10-28 09:20:00 22509 22926 Record size = 507 - activity(sort, 58)\\n00000286 2011-10-28 09:20:00 22509 22926 Record size = 507 - activity(iterate, 59)\\n00000287 2011-10-28 09:20:00 22509 22895 WORKUNITWRITE: processing first block - activity(workunitwrite, 61)\\n00000288 2011-10-28 09:20:00 22509 22898 Row count pre-known to be 120009 - activity(firstn, 60)\\n00000289 2011-10-28 09:20:00 22509 22926 FIRSTN: Read 0 records, left to skip=0 - activity(firstn, 60)\\n0000028A 2011-10-28 09:20:00 22509 22926 FIRSTN: maximum row count 100 - activity(firstn, 60)\\n0000028B 2011-10-28 09:20:00 22509 22898 FIRSTN: Record limit is 0 0 - activity(firstn, 60)\\n0000028C 2011-10-28 09:20:00 22509 22898 Stopping input for - activity(firstn, 60)\\n0000028D 2011-10-28 09:20:00 22509 22926 Stopping input for - activity(iterate, 59)\\n0000028E 2011-10-28 09:20:00 22509 22926 Stopping input for - activity(sort, 58)\\n0000028F 2011-10-28 09:20:00 22509 22926 Stopping input for - activity(iterate, 57)\\n00000290 2011-10-28 09:20:00 22509 22926 Stopping input for - activity(group, 56)\\n00000291 2011-10-28 09:20:00 22509 22926 Stopping input for - activity(sort, 55)\\n00000292 2011-10-28 09:20:00 22509 22926 Stopping input for - activity(countproject, 54)\\n00000293 2011-10-28 09:20:00 22509 22926 ITDL output 0 stopped, count was 120009 - activity(countproject, 54)\\n00000294 2011-10-28 09:20:00 22509 22926 ITDL output 0 stopped, count was 101 - activity(sort, 55)\\n00000295 2011-10-28 09:20:00 22509 22926 ITDL output 0 stopped, count was 100 - activity(group, 56)\\n00000296 2011-10-28 09:20:00 22509 22926 ITDL output 0 stopped, count was 100 - activity(iterate, 57)\\n00000297 2011-10-28 09:20:00 22509 22926 ITDL output 11156 stopped, count was 100 - activity(sort, 58)\\n00000298 2011-10-28 09:20:00 22509 22926 ITDL output 0 stopped, count was 100 - activity(iterate, 59)\\n00000299 2011-10-28 09:20:00 22509 22898 ITDL output 0 stopped, count was 0 - activity(firstn, 60)\\n0000029A 2011-10-28 09:20:00 22509 22895 Stopping input for - activity(workunitwrite, 61)\\n0000029B 2011-10-28 09:20:00 22509 22811 End of sub-graph - graph(graph5, 52)\\n0000029C 2011-10-28 09:20:00 22509 22509 Entering getDone - graph(graph5, 52)\\n0000029D 2011-10-28 09:20:00 22509 22509 Watchdog: Stop Job 52\\n0000029E 2011-10-28 09:20:00 22509 22509 Leaving getDone - graph(graph5, 52)\\n0000029F 2011-10-28 09:20:00 22509 22811 deregisterTmpFile name=/var/lib/HPCCSystems/mythor/temp/c1__w20111028-091953-4._4_of_25\\n000002A0 2011-10-28 09:20:00 22509 22811 CLocalSortSlaveActivity::kill - activity(sort, 55)\\n000002A1 2011-10-28 09:20:00 22509 22811 Socket statistics : connects=0\\nconnecttime=0us\\nfailedconnects=0\\nfailedconnecttime=0us\\nreads=15\\nreadtime=29us\\nreadsize=5375 bytes\\nwrites=17\\nwritetime=251us\\nwritesize=733 bytes\\nactivesockets=28\\nnumblockrecvs=0\\nnumblocksends=0\\nblockrecvsize=0\\nblocksendsize=0\\nblockrecvtime=0\\nblocksendtime=0\\nlongestblocksend=0\\nlongestblocksize=0\\n - graph(graph5, 52)\\n000002A2 2011-10-28 09:20:00 22509 22811 Graph Done - graph(graph5, 52)\\n000002A3 2011-10-28 09:20:00 22509 22811 PU= 80% MU= 4% MAL=1544464 MMP=0 SBK=1544464 TOT=8644K RAM=1099016K SWP=0K DSK: [sda] r/s=0.0 kr/s=0.0 w/s=0.0 kw/s=0.0 bsy=0 NIC: rxp/s=72.7 rxk/s=31.8 txp/s=100.8 txk/s=61.6 CPU: usr=74 sys=6 iow=0 idle=19 - graph(graph5, 52)\\n000002A4 2011-10-28 09:20:00 22509 22811 CGraphExecutor running=0, waitingToRun=0, dependentsWaiting=0\\n000002A5 2011-10-28 09:20:00 22509 22509 GraphInit: W20111028-091953-4graph5\\n000002A6 2011-10-28 09:20:00 22509 22509 deserializeMPTag: tag = 65572\\n000002A7 2011-10-28 09:20:00 22509 22509 deserializeMPTag: tag = 65543\\n000002A8 2011-10-28 09:20:00 22509 22509 deserializeMPTag: tag = 65542\\n000002A9 2011-10-28 09:20:00 22509 22509 deserializeMPTag: tag = 65550\\n000002AA 2011-10-28 09:20:00 22509 22509 Add: Launching graph thread for graphId=62\\n000002AB 2011-10-28 09:20:00 22509 22509 destroying ProcessSlaveActivity - activity(workunitwrite, 61)\\n000002AC 2011-10-28 09:20:00 22509 22509 ProcessSlaveActivity : joining process thread - activity(workunitwrite, 61)\\n000002AD 2011-10-28 09:20:00 22509 22509 AFTER ProcessSlaveActivity : joining process thread - activity(workunitwrite, 61)\\n000002AE 2011-10-28 09:20:00 22509 22509 DESTROYED - activity(diskread, 53)\\n000002AF 2011-10-28 09:20:00 22509 22509 DESTROYED - activity(countproject, 54)\\n000002B0 2011-10-28 09:20:00 22509 22509 DESTROYED - activity(sort, 55)\\n000002B1 2011-10-28 09:20:00 22509 22509 DESTROYED - activity(group, 56)\\n000002B2 2011-10-28 09:20:00 22509 22509 DESTROYED - activity(iterate, 57)\\n000002B3 2011-10-28 09:20:00 22509 22509 DESTROYED - activity(sort, 58)\\n000002B4 2011-10-28 09:20:00 22509 22509 DESTROYED - activity(iterate, 59)\\n000002B5 2011-10-28 09:20:00 22509 22509 DESTROYED - activity(firstn, 60)\\n000002B6 2011-10-28 09:20:00 22509 22509 DESTROYED - activity(workunitwrite, 61)\\n000002B7 2011-10-28 09:20:00 22509 22811 Running graph [global] : <graph>\\n <node id="63" label="Disk Read '...::services'">\\n <att name="_kind" value="82"/>\\n <att name="ecl" value="DATASET('~thor::temp::tst::tst1f0::linked::services', RECORD string3 sub_client_cd; string42 claim_id; unsigned8 service_sequence; ... END, THOR, __compressed__); "/>\\n <att name="recordSize" value="175"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n <att name="_fileName" value="~thor::temp::tst::tst1f0::linked::services"/>\\n </node>\\n <node id="64" label="Disk Read '...::membershipheader'">\\n <att name="_kind" value="82"/>\\n <att name="ecl" value="DATASET('~thor::temp::tst::tst1f0::linked::membershipheader', RECORD string3 sub_client_cd; string32 policy_holder_id; string4 suffix_id; ... END, THOR, __compressed__); "/>\\n <att name="recordSize" value="45"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n <att name="_fileName" value="~thor::temp::tst::tst1f0::linked::membershipheader"/>\\n </node>\\n <node id="65" label="Split">\\n <att name="definition" value="code\\\\[ My Prod ]\\\\poc\\\\DataLoad.ecl(321,24)"/>\\n <att name="_kind" value="6"/>\\n <att name="ecl" value="code.[ My Prod ].poc.dataload.linkeddatasets.headersort; "/>\\n <att name="recordSize" value="45"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n </node>\\n <node id="66" label="Lightweight Join">\\n <att name="definition" value="code\\\\[ My Prod ]\\\\poc\\\\DataLoad.ecl(366,3)"/>\\n <att name="name" value="services"/>\\n <att name="definition" value="code\\\\[ My Prod ]\\\\poc\\\\DataLoad.ecl(367,7)"/>\\n <att name="_kind" value="15"/>\\n <att name="local" value="1"/>\\n <att name="ecl" value="JOIN(LEFT.sub_client_cd = RIGHT.sub_client_cd AND LEFT.policy_holder_id = RIGHT.policy_holder_id AND ..., TRANSFORM(RECORD unsigned6 eid; string3 sub_client_cd; string42 claim_id; unsigned8 service_sequence; ... END,...), nosort, local); "/>\\n <att name="recordSize" value="181"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n </node>\\n <node id="67" label="Firstn">\\n <att name="_kind" value="12"/>\\n <att name="ecl" value="CHOOSEN(100); "/>\\n <att name="recordSize" value="181"/>\\n <att name="recordCount" value="0..100[group]"/>\\n </node>\\n <node id="68" label="Output Result #5">\\n <att name="definition" value="C:\\\\Users\\\\BROWNJ~1\\\\AppData\\\\Local\\\\Temp\\\\TFR56C2.tmp(7,1)"/>\\n <att name="_kind" value="21"/>\\n <att name="ecl" value="OUTPUT(..., workunit); "/>\\n <att name="recordSize" value="181"/>\\n </node>\\n <node id="69" label="Disk Read '...::diags'">\\n <att name="_kind" value="82"/>\\n <att name="ecl" value="DATASET('~thor::temp::tst::tst1f0::linked::diags', RECORD string3 sub_client_cd; string42 claim_id; unsigned8 diag_sequence; string32 policy_holder_id; ... END, THOR, __compressed__); "/>\\n <att name="recordSize" value="114"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n <att name="_fileName" value="~thor::temp::tst::tst1f0::linked::diags"/>\\n </node>\\n <node id="70" label="Lightweight Join">\\n <att name="definition" value="code\\\\[ My Prod ]\\\\poc\\\\DataLoad.ecl(350,3)"/>\\n <att name="name" value="diags"/>\\n <att name="definition" value="code\\\\[ My Prod ]\\\\poc\\\\DataLoad.ecl(351,7)"/>\\n <att name="_kind" value="15"/>\\n <att name="local" value="1"/>\\n <att name="ecl" value="JOIN(LEFT.sub_client_cd = RIGHT.sub_client_cd AND LEFT.policy_holder_id = RIGHT.policy_holder_id AND ..., TRANSFORM(RECORD unsigned6 eid; string3 sub_client_cd; string42 claim_id; unsigned8 diag_sequence; ... END,...), nosort, local); "/>\\n <att name="recordSize" value="120"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n </node>\\n <node id="71" label="Firstn">\\n <att name="_kind" value="12"/>\\n <att name="ecl" value="CHOOSEN(100); "/>\\n <att name="recordSize" value="120"/>\\n <att name="recordCount" value="0..100[group]"/>\\n </node>\\n <node id="72" label="Output Result #4">\\n <att name="definition" value="C:\\\\Users\\\\BROWNJ~1\\\\AppData\\\\Local\\\\Temp\\\\TFR56C2.tmp(6,1)"/>\\n <att name="_kind" value="21"/>\\n <att name="ecl" value="OUTPUT(..., workunit); "/>\\n <att name="recordSize" value="120"/>\\n </node>\\n <node id="73" label="Disk Read '...::claims'">\\n <att name="_kind" value="82"/>\\n <att name="ecl" value="DATASET('~thor::temp::tst::tst1f0::linked::claims', RECORD string3 sub_client_cd; string42 claim_id; string32 policy_holder_id; ... END, THOR, __compressed__); "/>\\n <att name="recordSize" value="127"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n <att name="_fileName" value="~thor::temp::tst::tst1f0::linked::claims"/>\\n </node>\\n <node id="74" label="Lightweight Join">\\n <att name="definition" value="code\\\\[ My Prod ]\\\\poc\\\\DataLoad.ecl(334,3)"/>\\n <att name="name" value="claims"/>\\n <att name="definition" value="code\\\\[ My Prod ]\\\\poc\\\\DataLoad.ecl(335,7)"/>\\n <att name="_kind" value="15"/>\\n <att name="local" value="1"/>\\n <att name="ecl" value="JOIN(LEFT.sub_client_cd = RIGHT.sub_client_cd AND LEFT.policy_holder_id = RIGHT.policy_holder_id AND ..., TRANSFORM(RECORD unsigned6 eid; string3 sub_client_cd; string42 claim_id; string32 policy_holder_id; ... END,...), nosort, local); "/>\\n <att name="recordSize" value="133"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n </node>\\n <node id="75" label="Firstn">\\n <att name="_kind" value="12"/>\\n <att name="ecl" value="CHOOSEN(100); "/>\\n <att name="recordSize" value="133"/>\\n <att name="recordCount" value="0..100[group]"/>\\n </node>\\n <node id="76" label="Output Result #1">\\n <att name="definition" value="C:\\\\Users\\\\BROWNJ~1\\\\AppData\\\\Local\\\\Temp\\\\TFR56C2.tmp(3,1)"/>\\n <att name="_kind" value="21"/>\\n <att name="ecl" value="OUTPUT(..., workunit); "/>\\n <att name="recordSize" value="133"/>\\n </node>\\n <att name="rootGraph" value="1"/>\\n <edge id="64_0" source="64" target="65"/>\\n <edge id="63_0"\\n label="LEFT"\\n source="63"\\n target="66"/>\\n <edge id="65_0"\\n label="RIGHT"\\n source="65"\\n target="66">\\n <att name="_targetIndex" value="1"/>\\n </edge>\\n <edge id="66_0" source="66" target="67"/>\\n <edge id="67_0" source="67" target="68"/>\\n <edge id="69_0"\\n label="LEFT"\\n source="69"\\n target="70"/>\\n <edge id="65_1"\\n label="RIGHT"\\n source="65"\\n target="70">\\n <att name="_sourceIndex" value="1"/>\\n <att name="_targetIndex" value="1"/>\\n </edge>\\n <edge id="70_0" source="70" target="71"/>\\n <edge id="71_0" source="71" target="72"/>\\n <edge id="73_0"\\n label="LEFT"\\n source="73"\\n target="74"/>\\n <edge id="65_2"\\n label="RIGHT"\\n source="65"\\n target="74">\\n <att name="_sourceIndex" value="2"/>\\n <att name="_targetIndex" value="1"/>\\n </edge>\\n <edge id="74_0" source="74" target="75"/>\\n <edge id="75_0" source="75" target="76"/>\\n </graph>\\n - graph(graph5, 62)\\n000002B8 2011-10-28 09:20:00 22509 22811 CONNECTING (id=64, idx=0) to (id=65, idx=0) - activity(split, 65)\\n000002B9 2011-10-28 09:20:00 22509 22811 CONNECTING (id=63, idx=0) to (id=66, idx=0) - activity(join, 66)\\n000002BA 2011-10-28 09:20:00 22509 22811 CONNECTING (id=65, idx=0) to (id=66, idx=1) - activity(join, 66)\\n000002BB 2011-10-28 09:20:00 22509 22811 CONNECTING (id=66, idx=0) to (id=67, idx=0) - activity(firstn, 67)\\n000002BC 2011-10-28 09:20:00 22509 22811 CONNECTING (id=67, idx=0) to (id=68, idx=0) - activity(workunitwrite, 68)\\n000002BD 2011-10-28 09:20:00 22509 22811 CONNECTING (id=69, idx=0) to (id=70, idx=0) - activity(join, 70)\\n000002BE 2011-10-28 09:20:00 22509 22811 CONNECTING (id=65, idx=1) to (id=70, idx=1) - activity(join, 70)\\n000002BF 2011-10-28 09:20:00 22509 22811 CONNECTING (id=70, idx=0) to (id=71, idx=0) - activity(firstn, 71)\\n000002C0 2011-10-28 09:20:00 22509 22811 CONNECTING (id=71, idx=0) to (id=72, idx=0) - activity(workunitwrite, 72)\\n000002C1 2011-10-28 09:20:00 22509 22811 CONNECTING (id=73, idx=0) to (id=74, idx=0) - activity(join, 74)\\n000002C2 2011-10-28 09:20:00 22509 22811 CONNECTING (id=65, idx=2) to (id=74, idx=1) - activity(join, 74)\\n000002C3 2011-10-28 09:20:00 22509 22811 CONNECTING (id=74, idx=0) to (id=75, idx=0) - activity(firstn, 75)\\n000002C4 2011-10-28 09:20:00 22509 22811 CONNECTING (id=75, idx=0) to (id=76, idx=0) - activity(workunitwrite, 76)\\n000002C5 2011-10-28 09:20:00 22509 22811 deserializeMPTag: tag = 65563\\n000002C6 2011-10-28 09:20:00 22509 22811 deserializeMPTag: tag = 65562\\n000002C7 2011-10-28 09:20:00 22509 22811 deserializeMPTag: tag = 65571\\n000002C8 2011-10-28 09:20:00 22509 22811 deserializeMPTag: tag = 65570\\n000002C9 2011-10-28 09:20:00 22509 22811 deserializeMPTag: tag = 65567\\n000002CA 2011-10-28 09:20:00 22509 22811 deserializeMPTag: tag = 65566\\n000002CB 2011-10-28 09:20:00 22509 22973 Starting input - activity(workunitwrite, 76)\\n000002CC 2011-10-28 09:20:00 22509 22811 Watchdog: Start Job 62\\n000002CD 2011-10-28 09:20:00 22509 22974 Starting input - activity(workunitwrite, 72)\\n000002CE 2011-10-28 09:20:00 22509 22977 Starting input - activity(firstn, 75)\\n000002CF 2011-10-28 09:20:00 22509 22977 DISKREAD: Number of rows to read: 120006 - activity(diskread, 64)\\n000002D0 2011-10-28 09:20:00 22509 22977 JOIN: Starting R then L - activity(join, 74)\\n000002D1 2011-10-28 09:20:00 22509 22977 Starting input - activity(join, 74)\\n000002D2 2011-10-28 09:20:00 22509 22980 Starting input - activity(firstn, 71)\\n000002D3 2011-10-28 09:20:00 22509 22980 DISKREAD: Number of rows to read: 120006 - activity(diskread, 64)\\n000002D4 2011-10-28 09:20:00 22509 22980 JOIN: Starting R then L - activity(join, 70)\\n000002D5 2011-10-28 09:20:00 22509 22980 Starting input - activity(join, 70)\\n000002D6 2011-10-28 09:20:00 22509 22971 Starting input - activity(workunitwrite, 68)\\n000002D7 2011-10-28 09:20:00 22509 22977 Starting input - activity(join, 74)\\n000002D8 2011-10-28 09:20:00 22509 22977 diskread[part=3]: reading physical file '/var/lib/HPCCSystems/hpcc-data/thor/thor/temp/tst/tst1f0/linked/claims._4_of_25' (logical file = ~thor::temp::tst::tst1f0::linked::claims) - activity(diskread, 73)\\n000002D9 2011-10-28 09:20:00 22509 22977 diskread[part=3]: Base offset to 70541261 - activity(diskread, 73)\\n000002DA 2011-10-28 09:20:00 22509 22977 Reading block compressed file: /var/lib/HPCCSystems/hpcc-data/thor/thor/temp/tst/tst1f0/linked/claims._4_of_25 - activity(diskread, 73)\\n000002DB 2011-10-28 09:20:00 22509 22977 diskread[part=3]: fixed (/var/lib/HPCCSystems/hpcc-data/thor/thor/temp/tst/tst1f0/linked/claims._4_of_25) - activity(diskread, 73)\\n000002DC 2011-10-28 09:20:00 22509 22977 ITDL starting for output 0 - activity(diskread, 73)\\n000002DD 2011-10-28 09:20:00 22509 22982 Starting input - activity(split, 65)\\n000002DE 2011-10-28 09:20:00 22509 22982 diskread[part=3]: reading physical file '/var/lib/HPCCSystems/hpcc-data/thor/thor/temp/tst/tst1f0/linked/membershipheader._4_of_25' (logical file = ~thor::temp::tst::tst1f0::linked::membershipheader) - activity(diskread, 64)\\n000002DF 2011-10-28 09:20:00 22509 22980 Starting input - activity(join, 70)\\n000002E0 2011-10-28 09:20:00 22509 22980 diskread[part=3]: reading physical file '/var/lib/HPCCSystems/hpcc-data/thor/thor/temp/tst/tst1f0/linked/diags._4_of_25' (logical file = ~thor::temp::tst::tst1f0::linked::diags) - activity(diskread, 69)\\n000002E1 2011-10-28 09:20:00 22509 22980 diskread[part=3]: Base offset to 1287639120 - activity(diskread, 69)\\n000002E2 2011-10-28 09:20:00 22509 22980 Reading block compressed file: /var/lib/HPCCSystems/hpcc-data/thor/thor/temp/tst/tst1f0/linked/diags._4_of_25 - activity(diskread, 69)\\n000002E3 2011-10-28 09:20:00 22509 22980 diskread[part=3]: fixed (/var/lib/HPCCSystems/hpcc-data/thor/thor/temp/tst/tst1f0/linked/diags._4_of_25) - activity(diskread, 69)\\n000002E4 2011-10-28 09:20:00 22509 22980 ITDL starting for output 0 - activity(diskread, 69)\\n000002E5 2011-10-28 09:20:00 22509 22982 diskread[part=3]: Base offset to 16196265 - activity(diskread, 64)\\n000002E6 2011-10-28 09:20:00 22509 22982 Reading block compressed file: /var/lib/HPCCSystems/hpcc-data/thor/thor/temp/tst/tst1f0/linked/membershipheader._4_of_25 - activity(diskread, 64)\\n000002E7 2011-10-28 09:20:00 22509 22982 diskread[part=3]: fixed (/var/lib/HPCCSystems/hpcc-data/thor/thor/temp/tst/tst1f0/linked/membershipheader._4_of_25) - activity(diskread, 64)\\n000002E8 2011-10-28 09:20:00 22509 22982 ITDL starting for output 0 - activity(diskread, 64)\\n000002E9 2011-10-28 09:20:00 22509 22982 Spill is 'balanced' - activity(split, 65)\\n000002EA 2011-10-28 09:20:00 22509 22987 ITDL starting for output 1 - activity(split, 65)\\n000002EB 2011-10-28 09:20:00 22509 22980 ITDL starting for output 0 - activity(join, 70)\\n000002EC 2011-10-28 09:20:00 22509 22980 DISKREAD: Number of rows to read: 3786268 - activity(diskread, 69)\\n000002ED 2011-10-28 09:20:00 22509 22980 ITDL starting for output 0 - activity(firstn, 71)\\n000002EE 2011-10-28 09:20:00 22509 22980 Starting input - activity(firstn, 71)\\n000002EF 2011-10-28 09:20:00 22509 22974 WORKUNITWRITE: processing first block - activity(workunitwrite, 72)\\n000002F0 2011-10-28 09:20:00 22509 22995 Starting input - activity(firstn, 67)\\n000002F1 2011-10-28 09:20:00 22509 22995 DISKREAD: Number of rows to read: 120006 - activity(diskread, 64)\\n000002F2 2011-10-28 09:20:00 22509 22995 JOIN: Starting R then L - activity(join, 66)\\n000002F3 2011-10-28 09:20:00 22509 22995 Starting input - activity(join, 66)\\n000002F4 2011-10-28 09:20:00 22509 22995 Starting input - activity(join, 66)\\n000002F5 2011-10-28 09:20:00 22509 22995 diskread[part=3]: reading physical file '/var/lib/HPCCSystems/hpcc-data/thor/thor/temp/tst/tst1f0/linked/services._4_of_25' (logical file = ~thor::temp::tst::tst1f0::linked::services) - activity(diskread, 63)\\n000002F6 2011-10-28 09:20:00 22509 22995 diskread[part=3]: Base offset to 2487775325 - activity(diskread, 63)\\n000002F7 2011-10-28 09:20:00 22509 22995 Reading block compressed file: /var/lib/HPCCSystems/hpcc-data/thor/thor/temp/tst/tst1f0/linked/services._4_of_25 - activity(diskread, 63)\\n000002F8 2011-10-28 09:20:00 22509 22995 diskread[part=3]: fixed (/var/lib/HPCCSystems/hpcc-data/thor/thor/temp/tst/tst1f0/linked/services._4_of_25) - activity(diskread, 63)\\n000002F9 2011-10-28 09:20:00 22509 22995 ITDL starting for output 0 - activity(diskread, 63)\\n000002FA 2011-10-28 09:20:00 22509 22996 ITDL starting for output 0 - activity(split, 65)\\n000002FB 2011-10-28 09:20:00 22509 22995 ITDL starting for output 0 - activity(join, 66)\\n000002FC 2011-10-28 09:20:00 22509 22995 DISKREAD: Number of rows to read: 4757577 - activity(diskread, 63)\\n000002FD 2011-10-28 09:20:00 22509 22995 ITDL starting for output 0 - activity(firstn, 67)\\n000002FF 2011-10-28 09:20:00 22509 22995 Starting input - activity(firstn, 67)\\n000002FE 2011-10-28 09:20:00 22509 22982 ITDL starting for output 2 - activity(split, 65)\\n00000300 2011-10-28 09:20:00 22509 22977 ITDL starting for output 0 - activity(join, 74)\\n00000301 2011-10-28 09:20:00 22509 22977 DISKREAD: Number of rows to read: 188075 - activity(diskread, 73)\\n00000302 2011-10-28 09:20:00 22509 22977 ITDL starting for output 0 - activity(firstn, 75)\\n00000303 2011-10-28 09:20:00 22509 22977 Starting input - activity(firstn, 75)\\n00000304 2011-10-28 09:20:00 22509 22973 WORKUNITWRITE: processing first block - activity(workunitwrite, 76)\\n00000305 2011-10-28 09:20:00 22509 22971 WORKUNITWRITE: processing first block - activity(workunitwrite, 68)\\n00000306 2011-10-28 09:20:00 22509 22999 Record size = 175 - activity(diskread, 63)\\n00000307 2011-10-28 09:20:00 22509 22994 Record size = 114 - activity(diskread, 69)\\n00000308 2011-10-28 09:20:00 22509 22991 Record size = 45 - activity(diskread, 64)\\n00000309 2011-10-28 09:20:00 22509 22982 Record size = 45 - activity(split, 65)\\n0000030A 2011-10-28 09:20:00 22509 22987 Record size = 45 - activity(split, 65)\\n0000030B 2011-10-28 09:20:00 22509 22996 Record size = 45 - activity(split, 65)\\n0000030C 2011-10-28 09:20:00 22509 22999 Record size = 181 - activity(join, 66)\\n0000030D 2011-10-28 09:20:00 22509 22994 Record size = 120 - activity(join, 70)\\n0000030E 2011-10-28 09:20:00 22509 22997 Record size = 127 - activity(diskread, 73)\\n0000030F 2011-10-28 09:20:00 22509 22997 Record size = 133 - activity(join, 74)\\n00000310 2011-10-28 09:20:00 22509 22995 FIRSTN: Record limit is 0 0 - activity(firstn, 67)\\n00000311 2011-10-28 09:20:00 22509 22999 FIRSTN: Read 0 records, left to skip=0 - activity(firstn, 67)\\n00000312 2011-10-28 09:20:00 22509 22995 Stopping input for - activity(firstn, 67)\\n00000313 2011-10-28 09:20:00 22509 22999 FIRSTN: maximum row count 100 - activity(firstn, 67)\\n00000314 2011-10-28 09:20:00 22509 22999 Stopping input for - activity(join, 66)\\n00000315 2011-10-28 09:20:00 22509 22999 ITDL output 0 stopped, count was 100 - activity(diskread, 63)\\n00000316 2011-10-28 09:20:00 22509 22999 Stopping input for - activity(join, 66)\\n00000317 2011-10-28 09:20:00 22509 22996 JOIN: RHS input finished, 7129 rows read - activity(join, 66)\\n00000318 2011-10-28 09:20:00 22509 22996 ITDL output 0 stopped, count was 7129 - activity(split, 65)\\n00000319 2011-10-28 09:20:00 22509 22999 SortJoinSlaveActivity::stop - activity(join, 66)\\n0000031A 2011-10-28 09:20:00 22509 22999 ITDL output 0 stopped, count was 100 - activity(join, 66)\\n0000031B 2011-10-28 09:20:00 22509 22995 ITDL output 0 stopped, count was 0 - activity(firstn, 67)\\n0000031C 2011-10-28 09:20:00 22509 22987 ================================================\\n0000031D 2011-10-28 09:20:00 22509 22987 Signal: 11 Segmentation fault\\n0000031E 2011-10-28 09:20:00 22509 22987 Fault IP: 00002B94D671B607\\n0000031F 2011-10-28 09:20:00 22509 22987 Accessing: 000000000000000C\\n00000320 2011-10-28 09:20:00 22509 22987 Registers:\\n00000321 2011-10-28 09:20:00 22509 22987 EAX:0000000000000077 EBX:0000000010651360 ECX:0000000010651418 EDX:0000000000000000 ESI:0000000000000000 EDI:0000000010651360\\n00000322 2011-10-28 09:20:00 22509 22987 CS:EIP:0033:00002B94D671B607\\n00000323 2011-10-28 09:20:00 22509 22987 ESP:000000004926FEF0 EBP:0000000000000001\\n00000324 2011-10-28 09:20:00 22509 22987 Stack[000000004926FEF0]: 000000001064E038 000000B700000000 00000000000000B7 0000000000000000 0000000100000000 4927100000000001 0000000049271000 0000000000000000\\n00000325 2011-10-28 09:20:00 22509 22987 Stack[000000004926FF10]: 0000000000000000 DA2978BB00000000 00002B94DA2978BB 106513C800002B94 00000000106513C8 1065136000000000 0000000010651360 1064937000000000\\n00000326 2011-10-28 09:20:00 22509 22987 Stack[000000004926FF30]: 0000000010649370 1065138800000000 0000000010651388 0000000100000000 0000000000000001 0000000100000000 0000000000000001 0000007700000000\\n00000327 2011-10-28 09:20:00 22509 22987 Stack[000000004926FF50]: 0000000000000077 D6719B0C00000000 00002B94D6719B0C 0000000000002B94 0000000000000000 106513F000000000 00000000106513F0 0000000000000000\\n00000328 2011-10-28 09:20:00 22509 22987 Stack[000000004926FF70]: 0000000000000000 4927094000000000 0000000049270940 0000000700000000 0000000000000007 1064939800000000 0000000010649398 1064937000000000\\n00000329 2011-10-28 09:20:00 22509 22987 Stack[000000004926FF90]: 0000000010649370 106513C800000000 00000000106513C8 0000000700000000 0000000000000007 4927100000000000 0000000049271000 0000100000000000\\n0000032A 2011-10-28 09:20:00 22509 22987 Stack[000000004926FFB0]: 0000000000001000 D671B90D00000000 00002B94D671B90D AC003A0000002B94 00002AAAAC003A00 D6EEBFFF00002AAA 00002B94D6EEBFFF DA5870E700002B94\\n0000032B 2011-10-28 09:20:00 22509 22987 Stack[000000004926FFD0]: 00002B94DA5870E7 AC00363000002B94 00002AAAAC003630 B400400000002AAA 00002AAAB4004000 1064FD3000002AAA 000000001064FD30 0000000700000000\\n0000032C 2011-10-28 09:20:00 22509 22987 Backtrace:\\n0000032D 2011-10-28 09:20:00 22509 22987 /opt/HPCCSystems/lib/libjlib.so(_Z16PrintStackReportv+0x26) [0x2b94d46121b6]\\n0000032E 2011-10-28 09:20:00 22509 22987 /opt/HPCCSystems/lib/libjlib.so(_Z13excsighandleriP7siginfoPv+0x295) [0x2b94d46131b5]\\n0000032F 2011-10-28 09:20:00 22509 22987 /lib64/libc.so.6 [0x2b94da2552d0]\\n00000330 2011-10-28 09:20:00 22509 22987 /opt/HPCCSystems/lib/libgraph_lcr.so(_ZN20CSharedWriteAheadMem8readRowsEjj+0x97) [0x2b94d671b607]\\n00000331 2011-10-28 09:20:00 22509 22987 /opt/HPCCSystems/lib/libgraph_lcr.so(_ZN21CSharedWriteAheadBase8loadMoreEj+0x9c) [0x2b94d6719b0c]\\n00000332 2011-10-28 09:20:00 22509 22987 /opt/HPCCSystems/lib/libgraph_lcr.so(_ZN21CSharedWriteAheadBase7COutput7nextRowEv+0x18d) [0x2b94d671b90d]\\n00000333 2011-10-28 09:20:00 22509 22987 /opt/HPCCSystems/lib/libactivityslaves_lcr.so(_ZN14SplitterOutput7nextRowEv+0x6f) [0x2b94d6eec60f]\\n00000334 2011-10-28 09:20:00 22509 22987 /opt/HPCCSystems/lib/libactivityslaves_lcr.so(_ZN18ThorLookaheadCache3runEv+0x12f) [0x2b94d6f03def]\\n00000335 2011-10-28 09:20:00 22509 22987 /opt/HPCCSystems/lib/libjlib.so(_ZN6Thread5beginEv+0x37) [0x2b94d46a28e7]\\n00000336 2011-10-28 09:20:00 22509 22987 /opt/HPCCSystems/lib/libjlib.so(_ZN6Thread11_threadmainEPv+0x1f) [0x2b94d46a30bf]\\n00000337 2011-10-28 09:20:00 22509 22987 /lib64/libpthread.so.0 [0x2b94da58373d]\\n00000338 2011-10-28 09:20:00 22509 22987 /lib64/libc.so.6(clone+0x6d) [0x2b94da2f94bd]\\n00000339 2011-10-28 09:20:00 22509 22987 ThreadList:\\n42465940 1111906624 22514: CLogMsgManager::MsgProcessor\\n42E66940 1122396480 22515: CMPNotifyClosedThread\\n40F75940 1089952064 22516: MP Connection Thread\\n44268940 1143376192 22518: CSocketSelectThread\\n43867940 1132886336 22655: CBackupHandler\\n44C69940 1153866048 22656: CGraphProgressHandler\\n4606B940 1174845760 22806: CMemoryUsageReporter\\n4566A940 1164355904 22811: CGraphExecutor pool\\n46A6C940 1185335616 22971: ProcessSlaveActivity\\n47E6E940 1206315328 22973: ProcessSlaveActivity\\n4BA74940 1269254464 22974: ProcessSlaveActivity\\n41976940 1100441920 22977: ThorLookaheadCache\\n4746D940 1195825472 22980: ThorLookaheadCache\\n4886F940 1216805184 22982: ThorLookaheadCache\\n49270940 1227295040 22987: ThorLookaheadCache\\n49C71940 1237784896 22991: CWriter\\n4A672940 1248274752 22994: ThorLookaheadCache\\n4B073940 1258764608 22995: Stopped ThorLookaheadCache\\n4CE76940 1290234176 22997: ThorLookaheadCache
\", \"post_time\": \"2011-10-28 14:40:46\" },\n\t{ \"post_id\": 550, \"topic_id\": 148, \"forum_id\": 8, \"post_subject\": \"Re: MP link closed exception\", \"username\": \"aintnomyth\", \"post_text\": \"It's too big to post it all but here's the graph portion of the log from node 2:\\n\\n00000194 2011-10-28 09:19:56 22509 22509 Using query: /var/lib/HPCCSystems/queries/mythor/V3436643179_libW20111028-091953-4.so\\n00000195 2011-10-28 09:19:56 22509 22509 Loaded DLL /opt/HPCCSystems/plugins//libfileservices.so\\n00000196 2011-10-28 09:19:56 22509 22509 Current reported version is FILESERVICES 2.1.3\\n00000197 2011-10-28 09:19:56 22509 22509 Compatible version FILESERVICES 2.1 [a68789cfb01d00ef6dc362e52d5eac0e]\\n00000198 2011-10-28 09:19:56 22509 22509 Compatible version FILESERVICES 2.1.1\\n00000199 2011-10-28 09:19:56 22509 22509 Compatible version FILESERVICES 2.1.2\\n0000019A 2011-10-28 09:19:56 22509 22509 Compatible version FILESERVICES 2.1.3\\n0000019B 2011-10-28 09:19:56 22509 22509 New Graph started : graph5\\n0000019C 2011-10-28 09:19:56 22509 22509 temp directory cleared\\n0000019D 2011-10-28 09:19:56 22509 22509 Disk space: /var/lib/HPCCSystems/hpcc-data/thor = 785036, /var/lib/HPCCSystems/hpcc-mirror/thor = 785036\\n0000019E 2011-10-28 09:19:56 22509 22509 Key file cache size set to: 52\\n0000019F 2011-10-28 09:19:56 22509 22509 GraphInit: W20111028-091953-4graph5\\n000001A0 2011-10-28 09:19:56 22509 22509 deserializeMPTag: tag = 65546\\n000001A1 2011-10-28 09:19:56 22509 22509 deserializeMPTag: tag = 65548\\n000001A2 2011-10-28 09:19:56 22509 22509 deserializeMPTag: tag = 65541\\n000001A3 2011-10-28 09:19:56 22509 22509 deserializeMPTag: tag = 65540\\n000001A4 2011-10-28 09:19:56 22509 22509 Add: Launching graph thread for graphId=36\\n000001A5 2011-10-28 09:19:56 22509 22811 Running graph [global] : <graph>\\n <node id="37" label="Csv Read '...::membership'">\\n <att name="_kind" value="99"/>\\n <att name="ecl" value="DATASET('~thor::[ My Co ]::tst::tst1f0::in::membership', thislayout, CSV(separator('\\\\t'))); FILTER(~loadrulesfailed); "/>\\n <att name="recordSize" value="480"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n <att name="definition" value="code\\\\[ My Prod ]\\\\poc\\\\DataLoad.ecl(197,9)"/>\\n <att name="_fileName" value="~thor::[ My Co ]::tst::tst1f0::in::membership"/>\\n </node>\\n <node id="38" label="If">\\n <att name="definition" value="code\\\\[ My Prod ]\\\\poc\\\\DataLoad.ecl(196,3)"/>\\n <att name="name" value="resultds"/>\\n <att name="_kind" value="35"/>\\n <att name="ecl" value="IF(std.file.fileexists(code.[ My Prod ].poc.dataload.getlogicalpaths.membership, false), DISKREAD(), ...); "/>\\n <att name="recordSize" value="480"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n <att name="_graphIndependent" value="1"/>\\n </node>\\n <node id="39" label="Split">\\n <att name="definition" value="code\\\\[ My Prod ]\\\\poc\\\\DataLoad.ecl(196,26)"/>\\n <att name="_kind" value="6"/>\\n <att name="ecl" value="resultds; "/>\\n <att name="recordSize" value="480"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n </node>\\n <node id="40" label="Filter">\\n <att name="definition" value="code\\\\[ My Prod ]\\\\ingest\\\\Linking.ecl(36,4)"/>\\n <att name="name" value="passds"/>\\n <att name="_kind" value="5"/>\\n <att name="ecl" value="FILTER(~loadrulesfailed); "/>\\n <att name="recordSize" value="480"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n </node>\\n <node id="41" label="Count Project">\\n <att name="definition" value="code\\\\[ My Prod ]\\\\ingest\\\\Linking.ecl(37,22)"/>\\n <att name="_kind" value="38"/>\\n <att name="ecl" value="PROJECT(TRANSFORM(RECORD string3 sub_client_cd; string32 policy_holder_id; string4 suffix_id; ... END,...)); "/>\\n <att name="recordSize" value="50"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n </node>\\n <node id="42" label="Local Sort">\\n <att name="definition" value="code\\\\[ My Prod ]\\\\ingest\\\\Linking.ecl(37,4)"/>\\n <att name="name" value="tempsort"/>\\n <att name="_kind" value="3"/>\\n <att name="local" value="1"/>\\n <att name="ecl" value="SORT(sub_client_cd, policy_holder_id, suffix_id, local); "/>\\n <att name="recordSize" value="50"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n </node>\\n <node id="43" label="Local Group">\\n <att name="definition" value="code\\\\[ My Prod ]\\\\ingest\\\\Linking.ecl(38,4)"/>\\n <att name="name" value="tempgroup"/>\\n <att name="_kind" value="20"/>\\n <att name="local" value="1"/>\\n <att name="ecl" value="GROUP(sub_client_cd, policy_holder_id, suffix_id, local); "/>\\n <att name="recordSize" value="50"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n </node>\\n <node id="44" label="Grouped Iterate">\\n <att name="definition" value="code\\\\[ My Prod ]\\\\ingest\\\\Linking.ecl(50,4)"/>\\n <att name="name" value="dedupresult"/>\\n <att name="_kind" value="9"/>\\n <att name="grouped" value="1"/>\\n <att name="ecl" value="ITERATE(TRANSFORM(<unnamed>,SELF.isdup := duprec;SELF.duporder := thisc;SELF.sub_client_cd := RIGHT.sub_client_cd;...)); "/>\\n <att name="recordSize" value="50"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n </node>\\n <node id="45" label="Grouped Sort">\\n <att name="definition" value="code\\\\[ My Prod ]\\\\ingest\\\\Linking.ecl(51,4)"/>\\n <att name="name" value="revsort"/>\\n <att name="_kind" value="3"/>\\n <att name="grouped" value="1"/>\\n <att name="ecl" value="SORT(sub_client_cd, policy_holder_id, suffix_id, -duporder); "/>\\n <att name="recordSize" value="50"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n </node>\\n <node id="46" label="Grouped Iterate">\\n <att name="definition" value="code\\\\[ My Prod ]\\\\ingest\\\\Linking.ecl(58,4)"/>\\n <att name="name" value="result"/>\\n <att name="_kind" value="9"/>\\n <att name="grouped" value="1"/>\\n <att name="ecl" value="ITERATE(TRANSFORM(<unnamed>,SELF.sub_client_cd := RIGHT.sub_client_cd;SELF.policy_holder_id := RIGHT.policy_holder_id;...)); "/>\\n <att name="recordSize" value="50"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n </node>\\n <node id="47" label="Grouped Filtered Project">\\n <att name="definition" value="code\\\\[ My Prod ]\\\\ingest\\\\Linking.ecl(73,22)"/>\\n <att name="_kind" value="7"/>\\n <att name="grouped" value="1"/>\\n <att name="ecl" value="PROJECT(TRANSFORM(RECORD string3 sub_client_cd; string32 policy_holder_id; string4 suffix_id; ... END,...)); "/>\\n <att name="recordSize" value="45"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n </node>\\n <node id="48" label="Hash Distribute">\\n <att name="definition" value="code\\\\[ My Prod ]\\\\poc\\\\DataLoad.ecl(320,3)"/>\\n <att name="name" value="headerdist"/>\\n <att name="_kind" value="25"/>\\n <att name="ecl" value="DISTRIBUTE(HASH32(sub_client_cd, policy_holder_id, suffix_id)); "/>\\n <att name="recordSize" value="45"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n </node>\\n <node id="49" label="Firstn">\\n <att name="_kind" value="12"/>\\n <att name="ecl" value="CHOOSEN(100); "/>\\n <att name="recordSize" value="45"/>\\n <att name="recordCount" value="0..100[group]"/>\\n </node>\\n <node id="50" label="Output Result #2">\\n <att name="definition" value="C:\\\\Users\\\\BROWNJ~1\\\\AppData\\\\Local\\\\Temp\\\\TFR56C2.tmp(4,1)"/>\\n <att name="_kind" value="21"/>\\n <att name="ecl" value="OUTPUT(..., workunit); "/>\\n <att name="recordSize" value="45"/>\\n </node>\\n <node id="51" label="Disk Write Spill File">\\n <att name="_kind" value="2"/>\\n <att name="_internal" value="1"/>\\n <att name="ecl" value="OUTPUT(..., , '~spill::C1', __compressed__); "/>\\n <att name="recordSize" value="480"/>\\n <att name="_fileName" value="~spill::C1"/>\\n <att name="_isSpill" value="1"/>\\n </node>\\n <att name="rootGraph" value="1"/>\\n <edge id="37_0"\\n label="True"\\n source="37"\\n target="38"/>\\n <edge id="38_0" source="38" target="39"/>\\n <edge id="39_0" source="39" target="40"/>\\n <edge id="40_0" source="40" target="41"/>\\n <edge id="41_0" source="41" target="42"/>\\n <edge id="42_0" source="42" target="43"/>\\n <edge id="43_0" source="43" target="44"/>\\n <edge id="44_0" source="44" target="45"/>\\n <edge id="45_0" source="45" target="46"/>\\n <edge id="46_0" source="46" target="47"/>\\n <edge id="47_0" source="47" target="48"/>\\n <edge id="48_0" source="48" target="49"/>\\n <edge id="49_0" source="49" target="50"/>\\n <edge id="39_1" source="39" target="51">\\n <att name="_sourceIndex" value="1"/>\\n </edge>\\n </graph>\\n - graph(graph5, 36)
\", \"post_time\": \"2011-10-28 14:40:09\" },\n\t{ \"post_id\": 549, \"topic_id\": 148, \"forum_id\": 8, \"post_subject\": \"Re: MP link closed exception\", \"username\": \"jsmith\", \"post_text\": \"Afaics, from that part of the master log, there's no nwayselect activity involved, so doesn't look like it (You should be able to see a NWaySelect node on the graph in the IDE/Eclwatch btw to verify also).\\n\\nCould you post the corresponding log from the slave (.2) that the master lost contact with?\", \"post_time\": \"2011-10-28 14:00:25\" },\n\t{ \"post_id\": 548, \"topic_id\": 148, \"forum_id\": 8, \"post_subject\": \"MP link closed exception\", \"username\": \"aintnomyth\", \"post_text\": \"I have 3 JOINs sharing the same RIGHT recordset. Sometimes I get an exception and sometimes I don't. I'm wondering if this is the same "nway-select activity" bug that is fixed in issue 772.\\n\\n\\nError: System error: 4: Graph[62], workunitwrite[68]: MP link closed ( [My IP].2:12600), Master exception\\nInfo: 4, Graph[62], workunitwrite[68]: MP link closed ( [My IP].2:12600), Master exception : Error aborting job, will cause thor restart\\n\\n\\n\\nA snippet from the THOR log around where the exception starts:\\n00000193 2011-10-28 09:20:00 30031 30270 Processing graph - graph(graph5, 62)\\n00000194 2011-10-28 09:20:00 30031 30270 preStart - activity(join, 66)\\n00000195 2011-10-28 09:20:00 30031 30270 preStart - activity(join, 74)\\n00000196 2011-10-28 09:20:00 30031 30270 preStart - activity(join, 70)\\n00000197 2011-10-28 09:20:00 30031 30317 process - activity(join, 66)\\n00000198 2011-10-28 09:20:00 30031 30317 process exit - activity(join, 66)\\n00000199 2011-10-28 09:20:00 30031 30323 process - activity(join, 74)\\n0000019A 2011-10-28 09:20:00 30031 30323 process exit - activity(join, 74)\\n0000019B 2011-10-28 09:20:00 30031 30327 process - activity(join, 70)\\n0000019C 2011-10-28 09:20:00 30031 30327 process exit - activity(join, 70)\\n0000019D 2011-10-28 09:20:00 30031 30036 jsocket(9,2166) shutdown err = 107 : C! [My IP].2\\n0000019E 2011-10-28 09:20:00 30031 30315 activity(workunitwrite, 68) : Graph[62], workunitwrite[68]: MP link closed ( [My IP].2:12600), Master exception\\n0000019F 2011-10-28 09:20:00 30031 30315 4: Graph[62], workunitwrite[68]: MP link closed ( [My IP].2:12600), Master exception\\n000001A0 2011-10-28 09:20:00 30031 30315 INFORM [EXCEPTION]\\n000001A1 2011-10-28 09:20:00 30031 30315 4: Graph[62], workunitwrite[68]: MP link closed ( [My IP].2:12600), Master exception\\n000001A2 2011-10-28 09:20:00 30031 30315 Posting exception: Graph[62], workunitwrite[68]: MP link closed ( [My IP].2:12600), Master exception to agent [My IP].1 for workunit(W20111028-091953-4)\\n000001A3 2011-10-28 09:20:00 30031 30315 INFORM [EXCEPTION]\\n000001A4 2011-10-28 09:20:00 30031 30326 activity(firstn, 71) : Graph[62], firstn[71]: MP link closed ( [My IP].2:6600), Master exception\\n000001A5 2011-10-28 09:20:00 30031 30325 activity(workunitwrite, 72) : Graph[62], workunitwrite[72]: MP link closed ( [My IP].2:6600), Master exception\\n000001A6 2011-10-28 09:20:00 30031 30322 activity(firstn, 75) : Graph[62], firstn[75]: MP link closed ( [My IP].2:6600), Master exception\\n000001A7 2011-10-28 09:20:00 30031 30321 activity(workunitwrite, 76) : Graph[62], workunitwrite[76]: MP link closed ( [My IP].2:6600), Master exception\\n000001A8 2011-10-28 09:20:01 30031 30315 Abort condition set - activity(workunitwrite, 68)\\n000001A9 2011-10-28 09:20:01 30031 30315 Abort condition set - activity(firstn, 67)\\n000001AA 2011-10-28 09:20:01 30031 30315 Abort condition set - activity(join, 66)\\n000001AB 2011-10-28 09:20:01 30031 30315 Abort condition set - activity(diskread, 63)\\n000001AC 2011-10-28 09:20:01 30031 30315 Abort condition set - activity(split, 65)\\n000001AD 2011-10-28 09:20:01 30031 30315 Abort condition set - activity(diskread, 64)\\n000001AE 2011-10-28 09:20:01 30031 30315 Abort condition set - activity(workunitwrite, 76)\\n000001AF 2011-10-28 09:20:01 30031 30315 Abort condition set - activity(firstn, 75)\\n000001B0 2011-10-28 09:20:01 30031 30315 Abort condition set - activity(join, 74)\\n000001B1 2011-10-28 09:20:01 30031 30315 Abort condition set - activity(diskread, 73)\\n000001B2 2011-10-28 09:20:01 30031 30315 Abort condition set - activity(workunitwrite, 72)\\n000001B3 2011-10-28 09:20:01 30031 30315 Abort condition set - activity(firstn, 71)\\n000001B4 2011-10-28 09:20:01 30031 30315 Abort condition set - activity(join, 70)\\n000001B5 2011-10-28 09:20:01 30031 30315 Abort condition set - activity(diskread, 69)\\n000001B6 2011-10-28 09:20:01 30031 30315 Aborting master graph - graph(graph5, 62) : MP link closed ( [My IP].2:6600)\\n000001B7 2011-10-28 09:20:02 30031 30315 Aborting slave graph - graph(graph5, 62) : MP link closed ( [My IP].2:6600)\\n000001B8 2011-10-28 09:20:02 30031 30315 4: Reporting exception to WU : 4, Graph[62], workunitwrite[68]: MP link closed ( [My IP].2:12600), Master exception : Error aborting job, will cause thor restart\\n000001B9 2011-10-28 09:20:02 30031 30315 Stopping jobManager\\n000001BA 2011-10-28 09:20:02 30031 30325 4: Graph[62], workunitwrite[72]: MP link closed ( [My IP].2:6600), Master exception\\n000001BB 2011-10-28 09:20:02 30031 30325 INFORM [EXCEPTION]\\n000001BC 2011-10-28 09:20:02 30031 30325 4: Graph[62], workunitwrite[72]: MP link closed ( [My IP].2:6600), Master exception\\n000001BD 2011-10-28 09:20:02 30031 30325 INFORM [EXCEPTION]
\", \"post_time\": \"2011-10-28 13:41:53\" },\n\t{ \"post_id\": 555, \"topic_id\": 149, \"forum_id\": 8, \"post_subject\": \"Re: submitting a query and going to bed\", \"username\": \"ghalliday\", \"post_text\": \"Try adding\\n\\nreturnOnWait=1\\n\\nto the command line.\", \"post_time\": \"2011-10-31 12:37:58\" },\n\t{ \"post_id\": 552, \"topic_id\": 149, \"forum_id\": 8, \"post_subject\": \"submitting a query and going to bed\", \"username\": \"nvasil\", \"post_text\": \"I am using eclplus to submit queries. Sometimes the query might take time and I might want to shutdown my laptop and leave the query running on the cluster. I noticed that if I press Ctr ^C or shutdown my computer the query is canceled on the cluster. Is there a way I can submit a query with eclplus and return to shell immediately\", \"post_time\": \"2011-10-28 19:17:43\" },\n\t{ \"post_id\": 554, \"topic_id\": 150, \"forum_id\": 8, \"post_subject\": \"Re: Cannot index ALL\", \"username\": \"ghalliday\", \"post_text\": \"That error means that you have a list containing the value ALL\\ne.g.\\n\\nnameList := ALL;\\n\\nwhich you are then trying to index either explicitly\\n\\nnameList[2]\\n\\nor implicitly e.g., by converting it to a dataset.\\n\\ndataset(namesList, { unsigned i; });\\n\\nI agree the error message for the latter could be impoved. I will also see if I can try and include the name of the set if it has one.\", \"post_time\": \"2011-10-31 09:45:15\" },\n\t{ \"post_id\": 553, \"topic_id\": 150, \"forum_id\": 8, \"post_subject\": \"Cannot index ALL\", \"username\": \"nvasil\", \"post_text\": \"I am getting the following runtime error\\n\\nGraph[1], csvread[2]: SLAVE 10.xxx.xxx.xxx:6600: Cannot index ALL [id=2]\\n\\nIt seems to be generated by this statement\\n\\nrdata1:=JOIN(rdata0, as_info2, \\n LEFT.ip>=RIGHT.ip and LEFT.ip<=RIGHT.ip+RIGHT.size,\\n MyTransform(LEFT, RIGHT), LEFT OUTER, ALL);\\n\\nalthough it is not clear\\n\\nECL errors have to be more informative. The compiler is good most of the time, but the runtime always gives me headaches\", \"post_time\": \"2011-10-29 01:45:59\" },\n\t{ \"post_id\": 573, \"topic_id\": 151, \"forum_id\": 8, \"post_subject\": \"Re: Incremental dedup\", \"username\": \"joecella\", \"post_text\": \"You may also want to look at the MERGE function for your incremental process. It has a DEDUP option that may or may not be applicable to your situation.\", \"post_time\": \"2011-11-02 17:38:20\" },\n\t{ \"post_id\": 569, \"topic_id\": 151, \"forum_id\": 8, \"post_subject\": \"Re: Incremental dedup\", \"username\": \"rtaylor\", \"post_text\": \"Now assume that I want to serve this file on Roxie. How can I incorporate these incremental dedups?
\\n\\nIf the target is Roxie, then you want to be using payload INDEXes that are aggregated into SuperKeys. That way, your DEDUP of the new data would actually be a LEFT ONLY JOIN, something like this:\\n\\n\\nNewrecs := JOIN(NewData,\\n OldDataIndex,\\n LEFT.KeyField=RIGHT.KeyField,\\n TRANSFORM(LEFT), \\n LEFT ONLY);\\n
\\nNote that the right dataset is an INDEX of the original deduped data, making this a half-keyed JOIN (thus, more efficient). Then you would just build the new INDEX to add to the SuperKey from NewRecs only those new records that have no match in the old data).\", \"post_time\": \"2011-10-31 18:07:51\" },\n\t{ \"post_id\": 564, \"topic_id\": 151, \"forum_id\": 8, \"post_subject\": \"Re: Incremental dedup\", \"username\": \"bforeman\", \"post_text\": \"Of course this all depends on exactly what you need to do. \\n\\nAssuming that the large superfile (let's call it "Base") is already sorted/distributed, what you should only need to do is to first preprocess the new file (let's call it "Daily"), so I would DISTRIBUTE the Daily file first, and then SORT and DEDUP the Daily using the LOCAL option. After that, a simple JOIN using the Base as the Left Recordset and the Daily as the right, using a RIGHT ONLY Join type and the NOSORT Join Flag. OUTPUT the JOIN result to a new file, and then add that file to the Base Superfile. That should work fine and optimize very well.\", \"post_time\": \"2011-10-31 16:28:56\" },\n\t{ \"post_id\": 560, \"topic_id\": 151, \"forum_id\": 8, \"post_subject\": \"Re: Incremental dedup\", \"username\": \"nvasil\", \"post_text\": \"Thanks Bob, but let me see if I understand it.\\n\\nI run DISTRIBUTE, SORT LOCAL, DEDUP on my superfile (100GB)\\nthen I add another file (100MB) on my superfile\\nI do again DISTRIBUTE, SORT LOCAL, DEDUP \\n\\nThat seems to be doing the whole work from scratch. Will it take advantage of the fact that the superfile (100GB) is presorted?\\n\\nIn a traditional Cassandra system I would do it with lookups since the 100GB >>100MB\\n\\nam I missing something?\", \"post_time\": \"2011-10-31 15:06:37\" },\n\t{ \"post_id\": 558, \"topic_id\": 151, \"forum_id\": 8, \"post_subject\": \"Re: Incremental dedup\", \"username\": \"bforeman\", \"post_text\": \"If you want to use DEDUP, yes, I would recommend using DISTRIBUTE and SORT, LOCAL and DEDUP,LOCAL to optimize the DEDUP.\\n\\nYou might also look at using an inner JOIN, where a SORT would not be necessary and you could define the LEFT recordset as the main file and the new update as the RIGHT record set.\\n\\nRegarding deployment to ROXIE, Just updating the deduped sub-file should be all that you need.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2011-10-31 14:55:22\" },\n\t{ \"post_id\": 556, \"topic_id\": 151, \"forum_id\": 8, \"post_subject\": \"Incremental dedup\", \"username\": \"nvasil\", \"post_text\": \"Let's say I have run dedup on a superfile. Then I want to add another file and do dedup again. Do I have to run sort dedup again on the whole file?\\nIs there a fastest way. \\n\\nNow assume that I want to serve this file on Roxie. How can I incorporate these incremental dedups?\", \"post_time\": \"2011-10-31 14:15:22\" },\n\t{ \"post_id\": 570, \"topic_id\": 152, \"forum_id\": 8, \"post_subject\": \"Re: Join being slow\", \"username\": \"nvasil\", \"post_text\": \"Thank you, \\n\\nI will try to find out in the documentation how to build an index. I am not sure I know what is the half-key join\", \"post_time\": \"2011-10-31 18:16:43\" },\n\t{ \"post_id\": 568, \"topic_id\": 152, \"forum_id\": 8, \"post_subject\": \"Re: Join being slow\", \"username\": \"bforeman\", \"post_text\": \"Yes, of course, sorry, I missed that.\\n\\nIn that case I would say that your code as written looks to be optimized, but other factors need to be considered such as what does the actual data look like, how many nodes are you running on, etc.\\n\\nOne way to improve the performance would be to create an index on the right record set and then do a half-keyed join. That may be an option for you.\", \"post_time\": \"2011-10-31 17:23:46\" },\n\t{ \"post_id\": 566, \"topic_id\": 152, \"forum_id\": 8, \"post_subject\": \"Re: Join being slow\", \"username\": \"nvasil\", \"post_text\": \"I have tried LOOKUP but because the join condition doesn't have equality the compiler complains and suggests to use ALL\", \"post_time\": \"2011-10-31 16:55:24\" },\n\t{ \"post_id\": 565, \"topic_id\": 152, \"forum_id\": 8, \"post_subject\": \"Re: Join being slow\", \"username\": \"bforeman\", \"post_text\": \"Have you tried using LOOKUP instead of ALL? That should speed things up nicely.\", \"post_time\": \"2011-10-31 16:44:15\" },\n\t{ \"post_id\": 559, \"topic_id\": 152, \"forum_id\": 8, \"post_subject\": \"Join being slow\", \"username\": \"nvasil\", \"post_text\": \"Is there any way I can speed up this JOIN\\n\\n\\nmydata1:=JOIN(mydata0, my_lookup, \\n LEFT.key>=RIGHT.key_lo and LEFT.key<=RIGHT.key_hi,\\n DoLoopkup(LEFT, RIGHT), LEFT OUTER, ALL);\\n\\nMyRecord := RECORD\\n INTEGER key_lo;\\n INTEGER key_hi;\\n STRING value;\\nEND;\\n\\n\\nDATASET(MyRecord) my_lookup;\\n\\nmy_lookup is fairly small and it can fit in memory.\\n\\nDo I have to SORT it before using the JOIN or the JOIN will automatically sort it.\\n\\nI also want to point out that the ranges defined in my_lookup are non overlapping which means that once it finds a range that it fits it doesn't need to search more.\", \"post_time\": \"2011-10-31 15:01:32\" },\n\t{ \"post_id\": 615, \"topic_id\": 153, \"forum_id\": 8, \"post_subject\": \"Re: Can someone please explain the GRAPH operation?\", \"username\": \"bforeman\", \"post_text\": \"Hi Allan,\\n\\nWe will put something together, the trick is finding a cool example that goes beyond what is in the LRM.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2011-11-17 21:40:48\" },\n\t{ \"post_id\": 613, \"topic_id\": 153, \"forum_id\": 8, \"post_subject\": \"Re: Can someone please explain the GRAPH operation?\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI just noticed some training videos on a few ECL functions (e.g. JOIN) on this site.\\n\\nIn my opinion these are excellent. Can we have one on the GRAPH function please?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-11-17 21:18:52\" },\n\t{ \"post_id\": 574, \"topic_id\": 153, \"forum_id\": 8, \"post_subject\": \"Re: Can someone please explain the GRAPH operation?\", \"username\": \"Allan\", \"post_text\": \"Thanks Bob,\\n\\nI actually found that passage myself, was not that much enlightened.\\n\\nI'll just have to nibble away at it, I just wish there was a simple example for one first bite.\\n\\nIt also seems a little odd that all indexes in ECL are from 1, yet the first dataset in a Graph is referenced as the Zeroth dataset.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-11-02 19:52:56\" },\n\t{ \"post_id\": 572, \"topic_id\": 153, \"forum_id\": 8, \"post_subject\": \"Re: Can someone please explain the GRAPH operation?\", \"username\": \"bforeman\", \"post_text\": \"Hi Allan,\\n\\nSorry for the delay in response, but our team is currently doing a little research on this statement. As soon as we get some more feedback from our development team, I will post additional information.\\n\\nFor now, what David Bayliss says on his web site is spot on. GRAPH is a language tool designed for ROXIE. To quote:\\n\\n"The GRAPH statement allows you to create a Roxie execution graph at QUERY time out of a collection of components constructed at COMPILE time. So, first bit of confusion to get over: GRAPH does not help you process graphs; GRAPH creates an execution graph which it then executes.\\n\\nThe heart of a GRAPH statement is a function. In your GRAPH statement you get to pick how many times the function is called; at each call the function must return a dataset."\\n\\nMore to follow...\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2011-11-02 14:19:49\" },\n\t{ \"post_id\": 571, \"topic_id\": 153, \"forum_id\": 8, \"post_subject\": \"Can someone please explain the GRAPH operation?\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI'm failing to understand the GRAPH operation.\\n\\nThere is a rather complex example in the 'KJV Bible' project on the web.\\nPlus a simpler example in the ECL Reference manual.\\nI'll repeat it here.\\n\\nnamesRec := RECORD\\n STRING20 lname;\\n STRING10 fname;\\n UNSIGNED2 age := 25;\\n UNSIGNED2 ctr := 0;\\nEND;\\nnamesTable2 := DATASET([{'Flintstone','Fred',35},\\n {'Flintstone','Wilma',33},\\n {'Jetson','Georgie',10},\\n {'Mr. T','Z-man'}], namesRec);\\n\\nloopBody(SET OF DATASET(namesRec) ds, UNSIGNED4 c) :=\\n PROJECT(ds[c-1], //ds[0]=original input\\n TRANSFORM(namesRec,\\n SELF.age := LEFT.age+c; //c is graph COUNTER\\n SELF.ctr := COUNTER; //PROJECT’s COUNTER\\n SELF := LEFT));\\n\\ng1 := GRAPH(namesTable2,10,loopBody(ROWSET(LEFT),COUNTER));\\n\\nOUTPUT(g1);\\n
\\n\\nI've tried different values for GRAPH's 2nd parameter in an attempt to understand its workings, and I half understand what's going on.\\nI understand whats being attempted (a single pass multi-join), its just the mechanics of what's going on. \\n\\nI (and I expect others) could do with a very simple example to kick start me.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-11-01 19:45:44\" },\n\t{ \"post_id\": 579, \"topic_id\": 154, \"forum_id\": 8, \"post_subject\": \"Re: Missing argument in Example 'dfuplus'\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nThe HPCC Client Tools PDF documents the DFUplus command line program. At the top of page 52 of that doc there is a discussion of the DFUPLUS.INI file, which allows you to set constant parameters for the program. The first line in the file listing on that page is the server option that's missing. \\n\\nYou will note that immediately below that listing there is the sentence, "In all the examples below, we'll assume DFUPLUS.INI has the above content." Doing this allowed us to write cleaner code examples without repeating all the "standard stuff" in each one.\\n\\nThe Programmer's Guide article was also written the same way -- with the assumption of a DFUPLUS.INI containing all the "standard stuff" being present. I will update the article to expressly state this assumption and direct the reader to the DFUplus docs.\\n\\nThanks for pointing this out,\\n\\nRichard\", \"post_time\": \"2011-11-04 18:12:39\" },\n\t{ \"post_id\": 577, \"topic_id\": 154, \"forum_id\": 8, \"post_subject\": \"Re: Missing argument in Example 'dfuplus'\", \"username\": \"bforeman\", \"post_text\": \"Thanks Allan, I will forward this to our documentation team.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2011-11-04 17:56:08\" },\n\t{ \"post_id\": 576, \"topic_id\": 154, \"forum_id\": 8, \"post_subject\": \"Missing argument in Example 'dfuplus'\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI'm attempting to import BLOB data as described in the 'ECL Programmers Guide' section 'Working with BLOBS'.\\n\\nI find the example 'dfuplus action=spray' command is missing, what seems to be, the mandatory 'server' argument. \\n\\nWithout it the dfuplus fails with error 'Esp server url not specified'.\\nYours\\n\\nAllan\", \"post_time\": \"2011-11-04 17:03:49\" },\n\t{ \"post_id\": 586, \"topic_id\": 155, \"forum_id\": 8, \"post_subject\": \"Re: problems spraying in images.\", \"username\": \"bforeman\", \"post_text\": \"Thanks for the feedback Allan, I will follow this up with the development team.\", \"post_time\": \"2011-11-07 13:44:24\" },\n\t{ \"post_id\": 582, \"topic_id\": 155, \"forum_id\": 8, \"post_subject\": \"Re: problems spraying in images.\", \"username\": \"Allan\", \"post_text\": \"Hi Bforeman,\\n\\nWith a single file (no path) I got an error, indicating it was looking for the file under ../myesp, my default directory being mydropzone.\\n\\nSo I just moved all the files over to ../myesp, the subsequent re-run with default directory as 'myesp' worked a treat.\\n\\nSo the issue seems to be a lack of an error when specifying multiple files when running from the wrong default directory.\\n\\nThanks\\n\\nAllan\", \"post_time\": \"2011-11-05 09:41:17\" },\n\t{ \"post_id\": 580, \"topic_id\": 155, \"forum_id\": 8, \"post_subject\": \"Re: problems spraying in images.\", \"username\": \"bforeman\", \"post_text\": \"Hi Allan,\\n\\nWhat happens if you test it with just one specific file name (Example: C:\\\\import\\\\image.jpg)\", \"post_time\": \"2011-11-04 20:02:30\" },\n\t{ \"post_id\": 578, \"topic_id\": 155, \"forum_id\": 8, \"post_subject\": \"problems spraying in images.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI'm attempting to import *.jpg,*.bmp files, i.e. learning about spraying in BLOB's.\\n\\nThe 'dfuplus' command I'm using\\n\\nC:\\\\Users\\\\ALLAN>dfuplus action=spray srcip=localhost srcfile=*.jpg,*.bmp dstcluster=mythor dstname=LE::imagedb overwrite=1 prefix=FILENAME,FILESIZE nosplit=1 server=192.168.117.128\\nChecking for local Dali File Server\\nStarted local Dali file server on 127.0.0.1:7100\\n\\nSpraying from *.jpg,*.bmp on 127.0.0.1:7100 to LE::imagedb\\nSubmitted WUID D20111104-174826\\n0% Done\\nD20111104-174826 Finished\\nTotal time taken\\nC:\\\\Users\\\\ALLAN>echo $?\\n0\\nC:\\\\Users\\\\ALLAN>\\n
\\n\\nSeems to complete ok (exit code 0) but the resultant sprayed file is 0 bytes in length, the operation has not worked, and yes, I do have *.jpg and *.bmp in the default directory.\\n\\nI've attempted this from a DOS command box and from within VMWare, no luck from either.\\n\\nSame result if I attempt to load a single file type e.g. *.bmp\\n\\nI've not specified a full file path to the input files, so is dfuplus looking in some other directory on the 'remote' i.e. localhost machine?\\nTheir currently under 'mydropzone'.\\nI've also run explicitly stating the hpccdemo username and password.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-11-04 18:09:12\" },\n\t{ \"post_id\": 606, \"topic_id\": 160, \"forum_id\": 8, \"post_subject\": \"Re: Decoding errors\", \"username\": \"DSC\", \"post_text\": \"[quote="bforeman":1safoovj]Looks like the problem could be with FETCH, can you post some sample code regarding what you are trying to do?\\n\\nThe example you posted was slightly different from another I'd already seen, but it was enough to make a light go off in my head. I fixed my problem. Thanks!\\n\\nThere is likely a number of things wrong with what I was trying to do, so I'm not sure if the FETCH was the real culprit. Heavily elided:\\n\\n\\nEXPORT TableB := MODULE\\n\\nEXPORT Layout := RECORD\\n STRING40 lookupID;\\n // .. lots of other stuff\\nEND; // Layout\\n\\nEXPORT LayoutWithPos := RECORD\\n Layout;\\n UNSIGNED8 __pos {virtual(file position)};\\nEND; // Layout\\n\\nSHARED DS := DATASET('table_b',LayoutWithPos,THOR);\\n\\nEXPORT IDX_LookupID := INDEX(DS,{lookupID, __pos},'table_b_index');\\n\\nEXPORT GetAllForLookupID(STRING40 aLookupID) := FUNCTION\\n RETURN FETCH (DS,IDX_LookupID(lookupID = aLookupID),RIGHT.__pos);\\nEND;\\n\\nEND; // TableB\\n
\\n\\n\\nEXPORT TableA := MODULE\\n\\nEXPORT Layout := RECORD\\n STRING40 lookupID;\\n DATASET(TableB.LayoutWithPosition) tableBData;\\n // .. lots of other stuff\\nEND; // Layout\\n\\nETL(DATASET(Layout) ds) := FUNCTION\\n Layout ETLTransform(RawLayout r) := TRANSFORM\\n SELF.tableBData := TableB.GetAllForLookupID(r.id); // *** silly try, I know, but terribly convenient\\n SELF := r;\\n END; // ETLTransform\\n\\n RETURN PROJECT(ds, ETLTransform(LEFT));\\nEND; // ETL\\n\\nEND; // TableA\\n
\\n\\nThose two code blocks are in separate files, and it may not compile as-is because I'm combining, eliding, etc.. I'm testing on a four-node cluster. The error appeared when trying to execute the TableA.ETL() function.\\n\\nThanks again!\", \"post_time\": \"2011-11-15 18:18:04\" },\n\t{ \"post_id\": 605, \"topic_id\": 160, \"forum_id\": 8, \"post_subject\": \"Re: Decoding errors\", \"username\": \"bforeman\", \"post_text\": \"Hi Dan,\\n\\nLooks like the problem could be with FETCH, can you post some sample code regarding what you are trying to do?\\n\\nTypically, you can use NORMALIZE and DENORMALIZE to work with nested Child datasets, like this:\\n\\nParentRec := RECORD\\n INTEGER1 NameID;\\n STRING20 Name;\\nEND;\\nChildRec := RECORD\\n INTEGER1 NameID;\\n STRING20 Addr;\\nEND;\\nDenormedRec := RECORD\\n ParentRec;\\n INTEGER1 NumRows;\\n DATASET(ChildRec) Children {MAXCOUNT(5)};\\nEND;\\n\\nNamesTable := DATASET([ {1,'Gavin'},\\n {2,'Liz'},\\n\\t\\t\\t{3,'Mr Nobody'},\\n\\t\\t\\t{4,'Anywhere'}], \\n\\t\\t\\tParentRec); \\nNormAddrs := DATASET([{1,'10 Malt Lane'},\\t\\n\\t\\t {2,'10 Malt Lane'},\\t\\n\\t\\t {2,'3 The cottages'},\\t\\n\\t\\t {4,'Here'},\\t\\n\\t\\t {4,'There'},\\t\\n\\t\\t {4,'Near'},\\t\\n\\t\\t {4,'Far'}],\\n\\t \\t ChildRec);\\t\\n\\nDenormedRec ParentLoad(ParentRec L) := TRANSFORM\\n SELF.NumRows := 0;\\n SELF.Children := [];\\n SELF := L;\\nEND;\\n//Ptbl := TABLE(NamesTable,DenormedRec);\\nPtbl := PROJECT(NamesTable,ParentLoad(LEFT));\\nOUTPUT(Ptbl,NAMED('ParentDataReady'));\\n\\nDenormedRec DeNormThem(DenormedRec L, ChildRec R, INTEGER C) := TRANSFORM\\n SELF.NumRows := C;\\n SELF.Children := L.Children + R;\\n SELF := L;\\nEND;\\n\\nDeNormedRecs := DENORMALIZE(Ptbl, NormAddrs,\\n\\t\\t\\t LEFT.NameID = RIGHT.NameID,\\n\\t\\t\\t DeNormThem(LEFT,RIGHT,COUNTER));\\n\\nOUTPUT(DeNormedRecs,NAMED('NestedChildDataset'));\\n\\n// *******************************\\n\\nParentRec ParentOut(DenormedRec L) := TRANSFORM\\n SELF := L;\\nEND;\\n\\nPout := PROJECT(DeNormedRecs,ParentOut(LEFT));\\nOUTPUT(Pout,NAMED('ParentExtracted'));\\n\\n// /* Using Form 1 of NORMALIZE */\\nChildRec NewChildren(DenormedRec L, INTEGER C) := TRANSFORM\\n SELF := L.Children[C];\\nEND;\\nNewChilds := NORMALIZE(DeNormedRecs,LEFT.NumRows,NewChildren(LEFT,COUNTER));\\n\\n\\n// /* Using Form 2 of NORMALIZE */\\n// ChildRec NewChildren(ChildRec L) := TRANSFORM\\n // SELF := L;\\n// END;\\n\\n// NewChilds := NORMALIZE(DeNormedRecs,LEFT.Children,NewChildren(RIGHT));\\n\\n// /* Using Form 2 of NORMALIZE with inline TRANSFORM*/\\n //NewChilds := NORMALIZE(DeNormedRecs,LEFT.Children,TRANSFORM(RIGHT));\\n\\nOUTPUT(NewChilds,NAMED('ChildrenExtracted'));
\\n\\nThis is a little example that we use in our training classes.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2011-11-15 17:15:12\" },\n\t{ \"post_id\": 603, \"topic_id\": 160, \"forum_id\": 8, \"post_subject\": \"Decoding errors\", \"username\": \"DSC\", \"post_text\": \"I'm stumbling through the ECL learning process and ran into the following error message while attempting to populate a child dataset:\\n\\nError: System error: 0: Graph[22], csvread[23]: SLAVE 10.210.150.78:6600: Global child graph? : Global acts = Graph(25): [fetch(47)] (0, 0), 0, \\n\\nWhat is this error message telling me?\\n\\nOther than within the documentation here, are there any good examples of populating child datasets from recordsets? The scenario involves creating a decent data structure from two RDMS tables' worth of data. Each record in Table A references zero or more records in Table B via a single lookup value, and I'd like to create a single data structure where those Table B values are directly included instead. (And if I'm barking up the wrong tree here, please let me know. I'm learning!)\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2011-11-15 15:42:10\" },\n\t{ \"post_id\": 610, \"topic_id\": 161, \"forum_id\": 8, \"post_subject\": \"Re: Query on documented 'FUNCTION Structure' in ECL Ref manu\", \"username\": \"rtaylor\", \"post_text\": \"Well-spotted! You are correct, and that typo will be corrected ASAP.\", \"post_time\": \"2011-11-16 15:56:11\" },\n\t{ \"post_id\": 609, \"topic_id\": 161, \"forum_id\": 8, \"post_subject\": \"Query on documented 'FUNCTION Structure' in ECL Ref manual.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nThe very first example for the 'FUNCTION structure' in the ECL Reference guide either has a typo in it or I've miss-understood the example.\\n\\nEXPORT doProjectChild(parentRecord l,UNSIGNED idAdjust2) := FUNCTION\\nnewChildRecord copyChild(childRecord l) := TRANSFORM\\nSELF.person_id := l.person_id + idAdjust2;\\nSELF := l;\\nEND;\\nRETURN PROJECT(CHOOSEN(l.children, numChildren),copyChild(LEFT));\\nEND;\\n//And called from\\nSELF.children := doProjectChildren(l, 99);\\n
\\n\\nShould not the call to 'doProjectChildren' be 'doProjectChild'?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-11-16 14:33:05\" },\n\t{ \"post_id\": 620, \"topic_id\": 162, \"forum_id\": 8, \"post_subject\": \"Re: Joining Two Datasets\", \"username\": \"Rob Pelley\", \"post_text\": \"Thanks again Richard Fully edited code is now as follows ...\\n\\n
\\nIMPORT STD;\\n\\n// Define Patterns\\n\\nSTRING nonUppercaseAlphaNumeric := '[^A-Z0-9]';\\nSTRING validUppercasePostcode := '[A-Z]{1,2}[0-9R][0-9A-Z]? [0-9][ABD-HJLNP-UW-Z]{2}';\\nSTRING oneOrTwoUppercaseAlpha := '^[A-Z]{1,2}';\\n\\n// Define Records\\n\\npostcodeRec := RECORD\\n STRING original;\\n STRING normalised;\\n\\tSTRING validated;\\n\\tSTRING outward;\\n\\tSTRING inward;\\n\\tSTRING area;\\n\\tSTRING district;\\n\\tSTRING sector;\\n\\tSTRING unit;\\nEND;\\n\\nlocationRec := RECORD\\n STRING6 postcode;\\n\\tSTRING eastings;\\n\\tSTRING northings;\\n\\tSTRING latitude;\\n\\tSTRING longitude;\\n\\tSTRING town;\\n\\tSTRING region;\\n\\tSTRING country_code;\\n\\tSTRING country;\\nEND;\\n\\npostcodeLocationRec := RECORD\\n postcodeRec;\\n\\tlocationRec;\\nEND;\\n\\n// Define Inline Dataset\\n\\ntestDataDS := DATASET([{'OX12 8QD'},{'PO78JN'},{'RG30 6QA'},{'RG1 6QA'},\\n {'RG74RA'},{'SW1A4WW'},{'BS11 8BN'},{'BS22 8BN'},\\n {'GU31 4DP'},{'PO99RJ'}],{STRING10 original});\\n\\n// Define Transform\\n\\npostcodeRec T1 (TestDataDS pInput) := TRANSFORM\\n SELF.original := pInput.original;\\n SELF.normalised := REGEXREPLACE(nonUppercaseAlphaNumeric,STD.Str.ToUpperCase(SELF.original),'');\\n SELF.validated := REGEXFIND(validUppercasePostcode,(SELF.normalised[..LENGTH(SELF.normalised)-3] + ' ' + SELF.normalised[LENGTH(SELF.normalised)-2..]),0);\\n SELF.outward := SELF.validated[..STD.Str.Find(SELF.validated,' ', 1)-1];\\n SELF.inward := SELF.validated[STD.Str.Find(SELF.validated,' ', 1)+1..];\\n SELF.area := REGEXFIND(oneOrTwoUppercaseAlpha,SELF.outward,0);\\n SELF.district := SELF.outward[LENGTH(SELF.area)+1..];\\n SELF.sector := SELF.inward[1];\\n SELF.unit := SELF.inward[2..];\\nEND;\\n\\n// Project each Record in the inline Dataset through the Transform \\n\\ntestData := PROJECT(testDataDS,T1(LEFT)); \\n\\n// Define Base Dataset\\n\\npostcodeLocations := DATASET('~tutorial::RP::TutorialPostcode', {locationRec,UNSIGNED8 fpos{virtual(fileposition)}},THOR);\\n\\n// Define Index on Base Dataset\\n\\nlocationIndex := INDEX(postcodeLocations,{postcode},{postcodeLocations},'~tutorial::RP::LocationByPostcodeReferenceINDEX');\\n\\n// Join\\n\\nfullPostcodeData := JOIN(testData,locationIndex,KEYED(LEFT.outward=RIGHT.postcode));\\n\\n// Output\\n\\nOUTPUT(fullPostcodeData);\\n
\", \"post_time\": \"2011-11-18 19:21:12\" },\n\t{ \"post_id\": 619, \"topic_id\": 162, \"forum_id\": 8, \"post_subject\": \"Re: Joining Two Datasets\", \"username\": \"rtaylor\", \"post_text\": \"Rob,\\n\\nOnly one more comment -- you added postcode as a payload field in your INDEX when you already have it as a search key. The duplication is not necessary -- postcode will be picked up from the search key field. IOW, each field only needs to be in the INDEX once, as a search key or as a payload field. \\n\\nAlso, there's a shortcut way to represent this INDEX:\\n//instead of:\\nlocationIndex := INDEX(postcodeLocations,{postcode},{eastings,northings,latitude,longitude,town,region,country_code,country},'~tutorial::RP::LocationByPostcodeReferenceINDEX');\\n\\n//just define it this way:\\nlocationIndex := INDEX(postcodeLocations,\\n {postcode},\\n {postcodeLocations},\\n '~tutorial::RP::LocationByPostcodeReferenceINDEX');\\n
\\nThis defines the postcode as the search term, then all the rest of the fields from your postcodeLocations dataset as the payload fields. Remember, in ECL none of this is "executable code" -- it's all just definitions, so the compiler is smart enough to deduce that postcode is already there as a search key so it won't put it in a second time as a payload field.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2011-11-18 19:00:33\" },\n\t{ \"post_id\": 618, \"topic_id\": 162, \"forum_id\": 8, \"post_subject\": \"Re: Joining Two Datasets\", \"username\": \"Rob Pelley\", \"post_text\": \"Richard,\\n\\nMany thanks for taking the time to review my code, your advice is very much appreciated. I'm sure that other ECL novices will also learn from my mistakes and benefit hugely from your advice.\\n\\nI have now revised the code as per your guidance and the final code follows ...\\n\\nRegards,\\n\\nRob.\\n\\n\\nIMPORT STD;\\n\\n// Define Patterns\\n\\nSTRING nonUppercaseAlphaNumeric := '[^A-Z0-9]';\\nSTRING validUppercasePostcode := '[A-Z]{1,2}[0-9R][0-9A-Z]? [0-9][ABD-HJLNP-UW-Z]{2}';\\nSTRING oneOrTwoUppercaseAlpha := '^[A-Z]{1,2}';\\n\\n// Define Records\\n\\npostcodeRec := RECORD\\n STRING original;\\n STRING normalised;\\n\\tSTRING validated;\\n\\tSTRING outward;\\n\\tSTRING inward;\\n\\tSTRING area;\\n\\tSTRING district;\\n\\tSTRING sector;\\n\\tSTRING unit;\\nEND;\\n\\nlocationRec := RECORD\\n STRING6 postcode;\\n\\tSTRING eastings;\\n\\tSTRING northings;\\n\\tSTRING latitude;\\n\\tSTRING longitude;\\n\\tSTRING town;\\n\\tSTRING region;\\n\\tSTRING country_code;\\n\\tSTRING country;\\nEND;\\n\\npostcodeLocationRec := RECORD\\n postcodeRec;\\n\\tlocationRec;\\nEND;\\n\\n// Define Inline Dataset\\n\\ntestDataDS := DATASET([{'OX12 8QD'},{'PO78JN'},{'RG30 6QA'},{'RG1 6QA'},\\n {'RG74RA'},{'SW1A4WW'},{'BS11 8BN'},{'BS22 8BN'},\\n {'GU31 4DP'},{'PO99RJ'}],{STRING10 original});\\n\\n// Define Transform\\n\\npostcodeRec T1 (TestDataDS pInput) := TRANSFORM\\n SELF.original := pInput.original;\\n SELF.normalised := REGEXREPLACE(nonUppercaseAlphaNumeric,STD.Str.ToUpperCase(SELF.original),'');\\n SELF.validated := REGEXFIND(validUppercasePostcode,(SELF.normalised[..LENGTH(SELF.normalised)-3] + ' ' + SELF.normalised[LENGTH(SELF.normalised)-2..]),0);\\n SELF.outward := SELF.validated[..STD.Str.Find(SELF.validated,' ', 1)-1];\\n SELF.inward := SELF.validated[STD.Str.Find(SELF.validated,' ', 1)+1..];\\n SELF.area := REGEXFIND(oneOrTwoUppercaseAlpha,SELF.outward,0);\\n SELF.district := SELF.outward[LENGTH(SELF.area)+1..];\\n SELF.sector := SELF.inward[1];\\n SELF.unit := SELF.inward[2..];\\nEND;\\n\\n// Project each Record in the inline Dataset through the Transform \\n\\ntestData := PROJECT(testDataDS,T1(LEFT)); \\n\\n// Define Base Dataset\\n\\npostcodeLocations := DATASET('~tutorial::RP::TutorialPostcode', {locationRec,UNSIGNED8 fpos{virtual(fileposition)}},THOR);\\n\\n// Define Index on Base Dataset\\n\\nlocationIndex := INDEX(postcodeLocations,{postcode},{postcode,eastings,northings,latitude,longitude,town,region,country_code,country},'~tutorial::RP::LocationByPostcodeReferenceINDEX');\\n\\n// Join\\n\\nfullPostcodeData := JOIN(testData,locationIndex,KEYED(LEFT.outward=RIGHT.postcode));\\n\\n// Output\\n\\nOUTPUT(fullPostcodeData);\\n\\n
\", \"post_time\": \"2011-11-18 16:50:02\" },\n\t{ \"post_id\": 617, \"topic_id\": 162, \"forum_id\": 8, \"post_subject\": \"Re: Joining Two Datasets\", \"username\": \"rtaylor\", \"post_text\": \"Rob,\\n\\nYour code is now using a half-keyed JOIN, which will be more much efficient than your previous version.\\n\\nI see you're using an inline TRANSFORM in your JOIN, and simply taking all fields from LEFT and RIGHT -- which, for JOIN, is exactly the same as not using a TRANSFORM at all, like this:\\nfullPostcodeData := JOIN(testData,locationIndex,LEFT.outward=RIGHT.postcode);\\n
\\nOf course, a JOIN without a TRANSFORM is pretty uncommon, as most JOINs do produce something other than a simple union of all the fields.\\n\\nOne "nicety" that could be added to this line of code would be KEYED, like this:\\nfullPostcodeData := JOIN(testData,locationIndex,KEYED(LEFT.outward=RIGHT.postcode));\\n
\\nThis form of KEYED (see KEYED/WILD in the Lang Ref) is very different from the KEYED option on JOIN, because it explicitly specifies use of the metakey (binary tree) of the INDEX to locate the correct entries. \\n\\nAnother "nicety" would be in your INDEX definition. Because you're defining a payload INDEX, the record pointer is not required, like this:\\n\\nlocationIndex := INDEX(postcodeLocations,\\n {postcode},\\n {eastings,northings,latitude,longitude,town,region,country_code,country},\\n '~tutorial::RP::LocationByPostcodeReferenceINDEX');\\n
\\nAnd this part of your code is a very unusual style:\\nT1 (STRING10 pInput) := TRANSFORM(postcodeRec,\\n SELF.original := pInput,\\n SELF.normalised := REGEXREPLACE(nonUppercaseAlphaNumeric,STD.Str.ToUpperCase(SELF.original),''),\\n SELF.validated := REGEXFIND(validUppercasePostcode,(SELF.normalised[..LENGTH(SELF.normalised)-3] + ' ' + SELF.normalised[LENGTH(SELF.normalised)-2..]),0),\\n SELF.outward := SELF.validated[..STD.Str.Find(SELF.validated,' ', 1)-1],\\n SELF.inward := SELF.validated[STD.Str.Find(SELF.validated,' ', 1)+1..],\\n SELF.area := REGEXFIND(oneOrTwoUppercaseAlpha,SELF.outward,0),\\n SELF.district := SELF.outward[LENGTH(SELF.area)+1..],\\n SELF.sector := SELF.inward[1],\\n SELF.unit := SELF.inward[2..]\\n);\\n\\ntestData := DATASET([T1('OX12 8QD'),T1('PO78JN'),T1('RG30 6QA'),T1('RG1 6QA'),T1('RG74RA'),T1('SW1A4WW'),\\n T1('BS11 8BN'),T1('BS22 8BN'),T1('GU31 4DP'),T1('PO99RJ')]);\\n
\\nYou've defined your TRANSFORM as if you were defining it inline instead of as a separate definition, and you're constructing your test dataset by making 10 explicit calls of the TRANSFORM. \\n\\nWhat you're trying to accomplish would more normally be coded like this:\\n\\n//start by defining the inline dataset\\ntestDataDS := DATASET([{'OX12 8QD'},{'PO78JN'},{'RG30 6QA'},{'RG1 6QA'},\\n {'RG74RA'},{'SW1A4WW'},{'BS11 8BN'},{'BS22 8BN'},\\n {'GU31 4DP'},{'PO99RJ'}],\\n {STRING10 original});\\n\\n//then define the TRANSFORM \\npostcodeRec T1 (TestDataDS pInput) := TRANSFORM\\n SELF.original := pInput.original;\\n SELF.normalised := REGEXREPLACE(nonUppercaseAlphaNumeric,STD.Str.ToUpperCase(SELF.original),'');\\n SELF.validated := REGEXFIND(validUppercasePostcode,(SELF.normalised[..LENGTH(SELF.normalised)-3] + ' ' + SELF.normalised[LENGTH(SELF.normalised)-2..]),0);\\n SELF.outward := SELF.validated[..STD.Str.Find(SELF.validated,' ', 1)-1];\\n SELF.inward := SELF.validated[STD.Str.Find(SELF.validated,' ', 1)+1..];\\n SELF.area := REGEXFIND(oneOrTwoUppercaseAlpha,SELF.outward,0);\\n SELF.district := SELF.outward[LENGTH(SELF.area)+1..];\\n SELF.sector := SELF.inward[1];\\n SELF.unit := SELF.inward[2..];\\nEND;\\n\\n//then you can simply PROJECT the input data through the TRANSFORM to produce\\n// the actual testdata to "bang" against your INDEX\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n TestData := PROJECT(testDataDS,T1(LEFT));\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n
\\nThe advantage of this code is the possibility of using it against any input dataset, not just the inline dataset you want to test with.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2011-11-18 15:01:37\" },\n\t{ \"post_id\": 616, \"topic_id\": 162, \"forum_id\": 8, \"post_subject\": \"Re: Joining Two Datasets\", \"username\": \"Rob Pelley\", \"post_text\": \"Here is my revised code ... ds1 has been renamed testData and ds2 is now an index named locationIndex ... a few coding shortcuts have been added too ...\\n\\n\\nIMPORT STD;\\n\\nSTRING nonUppercaseAlphaNumeric := '[^A-Z0-9]';\\nSTRING validUppercasePostcode := '[A-Z]{1,2}[0-9R][0-9A-Z]? [0-9][ABD-HJLNP-UW-Z]{2}';\\nSTRING oneOrTwoUppercaseAlpha := '^[A-Z]{1,2}';\\n\\npostcodeRec := RECORD\\n STRING original;\\n STRING normalised;\\n\\tSTRING validated;\\n\\tSTRING outward;\\n\\tSTRING inward;\\n\\tSTRING area;\\n\\tSTRING district;\\n\\tSTRING sector;\\n\\tSTRING unit;\\nEND;\\n\\nlocationRec := RECORD\\n STRING6 postcode;\\n STRING eastings;\\n STRING northings;\\n STRING latitude;\\n STRING longitude;\\n STRING town;\\n STRING region;\\n STRING country_code;\\n STRING country;\\nEND;\\n\\npostcodeLocationRec := RECORD\\n postcodeRec;\\n locationRec AND NOT postcode;\\nEND;\\n\\nT1 (STRING10 pInput) := TRANSFORM(postcodeRec,\\n SELF.original := pInput,\\n SELF.normalised := REGEXREPLACE(nonUppercaseAlphaNumeric,STD.Str.ToUpperCase(SELF.original),''),\\n SELF.validated := REGEXFIND(validUppercasePostcode,(SELF.normalised[..LENGTH(SELF.normalised)-3] + ' ' + SELF.normalised[LENGTH(SELF.normalised)-2..]),0),\\n SELF.outward := SELF.validated[..STD.Str.Find(SELF.validated,' ', 1)-1],\\n SELF.inward := SELF.validated[STD.Str.Find(SELF.validated,' ', 1)+1..],\\n SELF.area := REGEXFIND(oneOrTwoUppercaseAlpha,SELF.outward,0),\\n SELF.district := SELF.outward[LENGTH(SELF.area)+1..],\\n SELF.sector := SELF.inward[1],\\n SELF.unit := SELF.inward[2..]\\n);\\n\\ntestData := DATASET([T1('OX12 8QD'),T1('PO78JN'),T1('RG30 6QA'),T1('RG1 6QA'),T1('RG74RA'),T1('SW1A4WW'),T1('BS11 8BN'),T1('BS22 8BN'),T1('GU31 4DP'),T1('PO99RJ')]);\\n\\npostcodeLocations := DATASET('~tutorial::RP::TutorialPostcode', {locationRec,UNSIGNED8 fpos{virtual(fileposition)}},THOR);\\n\\nlocationIndex := INDEX(postcodeLocations,{postcode,fpos},{eastings,northings,latitude,longitude,town,region,country_code,country},'~tutorial::RP::LocationByPostcodeReferenceINDEX');\\n\\nfullPostcodeData := JOIN(testData,locationIndex,LEFT.outward=RIGHT.postcode,TRANSFORM(postcodeLocationRec,\\n SELF := LEFT,\\n SELF := RIGHT\\n));\\n\\nOUTPUT(fullPostcodeData);\\n
\", \"post_time\": \"2011-11-18 10:14:31\" },\n\t{ \"post_id\": 614, \"topic_id\": 162, \"forum_id\": 8, \"post_subject\": \"Re: Joining Two Datasets\", \"username\": \"Rob Pelley\", \"post_text\": \"Richard,\\n\\nMany thanks for the prompt reply ... it helps a lot ... I was very pleased that my code did what I wanted it to do but I couldn't understand why ... trimming of trailing spaces makes a lot of sense.\\n\\nI had already defined an index on the file '~tutorial::RP::TutorialPostcode' but I'm not yet being familiar with half-keyed joins ... I'm looking forward to learning more in class \\n\\nRob.\", \"post_time\": \"2011-11-17 21:38:24\" },\n\t{ \"post_id\": 612, \"topic_id\": 162, \"forum_id\": 8, \"post_subject\": \"Re: Joining Two Datasets\", \"username\": \"rtaylor\", \"post_text\": \"Rob,\\n\\n1) You get results because trailing spaces are always trimmed before comparison in ECL. Because our default string type is space-padded, this trim before compare makes things a lot easier, ensuring that 'ABC ' always equals 'ABC'.\\n\\n2) The short answer to this is, "we'll discuss that in great detail when you come to class in a couple of weeks." The (slightly) longer answer is that you would have to first define and build the index yourself, then you could use that index to replace ds2 to make it a half-keyed join. IOW, it is not automatic (very little in ECL is automatic) -- the HPCC is not an RDBMS.
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2011-11-17 21:02:29\" },\n\t{ \"post_id\": 611, \"topic_id\": 162, \"forum_id\": 8, \"post_subject\": \"Joining Two Datasets\", \"username\": \"Rob Pelley\", \"post_text\": \"Hi,\\n\\nI have been attempting to write some UK Postcode validation in ECL ... the code follows ... apologies if it's a bit long ...\\n\\n
\\nIMPORT STD;\\n\\nSTRING nonUppercaseAlphaNumeric := '[^A-Z0-9]';\\nSTRING validUppercasePostcode := '[A-Z]{1,2}[0-9R][0-9A-Z]? [0-9][ABD-HJLNP-UW-Z]{2}';\\nSTRING oneOrTwoUppercaseAlpha := '^[A-Z]{1,2}';\\n\\npostcodeRec := RECORD\\n STRING original;\\n STRING normalised;\\n\\tSTRING validated;\\n\\tSTRING outward;\\n\\tSTRING inward;\\n\\tSTRING area;\\n\\tSTRING district;\\n\\tSTRING sector;\\n\\tSTRING unit;\\nEND;\\n\\nlocationRec := RECORD\\n STRING6 postcode;\\n\\tSTRING eastings;\\n\\tSTRING northings;\\n\\tSTRING latitude;\\n\\tSTRING longitude;\\n\\tSTRING town;\\n\\tSTRING region;\\n\\tSTRING country_code;\\n\\tSTRING country;\\nEND;\\n\\npostcodeLocationRec := RECORD\\n STRING original;\\n STRING normalised;\\n\\tSTRING validated;\\n\\tSTRING outward;\\n\\tSTRING inward;\\n\\tSTRING area;\\n\\tSTRING district;\\n\\tSTRING sector;\\n\\tSTRING unit;\\n\\tSTRING eastings;\\n\\tSTRING northings;\\n\\tSTRING latitude;\\n\\tSTRING longitude;\\n\\tSTRING town;\\n\\tSTRING region;\\n\\tSTRING country_code;\\n\\tSTRING country;\\nEND;\\n\\nT1 (STRING10 pInput) := \\nTRANSFORM(postcodeRec,\\n SELF.original := pInput,\\n SELF.normalised := REGEXREPLACE(nonUppercaseAlphaNumeric,STD.Str.ToUpperCase(SELF.original),''),\\n\\tSELF.validated := REGEXFIND(validUppercasePostcode,(SELF.normalised[..LENGTH(SELF.normalised)-3] + ' ' + SELF.normalised[LENGTH(SELF.normalised)-2..]),0),\\n SELF.outward := SELF.validated[..STD.Str.Find(SELF.validated,' ', 1)-1],\\n\\tSELF.inward := SELF.validated[STD.Str.Find(SELF.validated,' ', 1)+1..],\\n\\tSELF.area := REGEXFIND(oneOrTwoUppercaseAlpha,SELF.outward,0),\\n\\tSELF.district := SELF.outward[LENGTH(SELF.area)+1..],\\n\\tSELF.sector := SELF.inward[1],\\n\\tSELF.unit := SELF.inward[2..]\\n);\\n\\nds1 := DATASET([T1('OX12 8QD'),T1('PO78JN'),T1('RG30 6QA'),T1('RG1 6QA'),T1('RG74RA'),T1('SW1A4WW'),T1('BS11 8BN'),T1('BS22 8BN'),T1('GU31 4DP'),T1('PO99RJ')]);\\n\\nds2 := DATASET('~tutorial::RP::TutorialPostcode',locationRec,THOR);\\n\\nfullPostcodeData := JOIN(ds1,ds2,LEFT.outward=RIGHT.postcode,\\nTRANSFORM(postcodeLocationRec,\\n SELF.original := LEFT.original,\\n\\tSELF.normalised := LEFT.normalised,\\n\\tSELF.validated := LEFT.validated,\\n\\tSELF.outward := LEFT.outward,\\n\\tSELF.inward := LEFT.inward,\\n\\tSELF.area := LEFT.area,\\n\\tSELF.district := LEFT.district,\\n\\tSELF.sector := LEFT.sector,\\n\\tSELF.unit := LEFT.unit,\\n\\tSELF.eastings := RIGHT.eastings,\\n\\tSELF.northings := RIGHT.northings,\\n\\tSELF.latitude := RIGHT.latitude,\\n\\tSELF.longitude := RIGHT.longitude,\\n\\tSELF.town := RIGHT.town,\\n\\tSELF.region := RIGHT.region,\\n\\tSELF.country_code := RIGHT.country_code,\\n\\tSELF.country := RIGHT.country\\n));\\n\\nOUTPUT(fullPostcodeData);\\n
\\n\\nI have two questions ...\\n\\n1) Given that I have a JOIN between ds1 and ds2 where there is an equality between LEFT.outward and RIGHT.postcode, and that the corresponding definitions are STRING (which is not space padded) and STRING6 (which is space padded) respectively, how come I am receiving results? Surely 'RG1' would not match 'RG1' with three trailing spaces?\\n\\n2) How can I ensure that the JOIN will use an index on ds2?\\n\\nThanks in advance for any help ...\\n\\nRob.\", \"post_time\": \"2011-11-17 19:30:34\" },\n\t{ \"post_id\": 679, \"topic_id\": 164, \"forum_id\": 8, \"post_subject\": \"Re: Minor typo on ECL Programmers Guide.\", \"username\": \"rtaylor\", \"post_text\": \"And corrected for future releases...\", \"post_time\": \"2011-11-30 15:28:38\" },\n\t{ \"post_id\": 628, \"topic_id\": 164, \"forum_id\": 8, \"post_subject\": \"Re: Minor typo on ECL Programmers Guide.\", \"username\": \"bforeman\", \"post_text\": \"Thanks Allan, I will pass this to the documentation team.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2011-11-22 15:48:19\" },\n\t{ \"post_id\": 624, \"topic_id\": 164, \"forum_id\": 8, \"post_subject\": \"Minor typo on ECL Programmers Guide.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nSmall issue if extra quotes in XML example near bottom of Page 30 Section on 'Working with XML Data'\\n\\n<area code=""301"">\\n<zone>Eastern Time Zone</zone>\\n</area>\\n
\\n\\nNote the extra ""\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-11-21 20:21:25\" },\n\t{ \"post_id\": 676, \"topic_id\": 165, \"forum_id\": 8, \"post_subject\": \"Re: Question on Descending SORTED qualifier to MERGE.\", \"username\": \"ghalliday\", \"post_text\": \"I have released a fix to github. \\nI expect it to be included in 3.4.2.\", \"post_time\": \"2011-11-30 14:03:49\" },\n\t{ \"post_id\": 631, \"topic_id\": 165, \"forum_id\": 8, \"post_subject\": \"Re: Question on Descending SORTED qualifier to MERGE.\", \"username\": \"bforeman\", \"post_text\": \"Yep, it could be a compiler bug. I will submit a report to development. Thanks!\", \"post_time\": \"2011-11-22 20:21:24\" },\n\t{ \"post_id\": 630, \"topic_id\": 165, \"forum_id\": 8, \"post_subject\": \"Re: Question on Descending SORTED qualifier to MERGE.\", \"username\": \"Allan\", \"post_text\": \"Hi Bob,\\n\\nThe error is:\\n\\n\\nError: Type mismatch - numeric expression expected(String was given) (10, 30), 2004, \\n
\\n\\nI realise its just an indicator to the compiler, however the documentation for SORTED specifically says '-' is allowed with no qualification on the context of its use.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-11-22 19:25:56\" },\n\t{ \"post_id\": 629, \"topic_id\": 165, \"forum_id\": 8, \"post_subject\": \"Re: Question on Descending SORTED qualifier to MERGE.\", \"username\": \"bforeman\", \"post_text\": \"Hi Allan,\\n\\nThe SORTED attribute just alerts the compiler as to what element is SORTED. In your example, indeed the letter is NOT sorted in descending order. What specific error is being returned?\\n\\nReagrds,\\n\\nBob\", \"post_time\": \"2011-11-22 15:54:29\" },\n\t{ \"post_id\": 625, \"topic_id\": 165, \"forum_id\": 8, \"post_subject\": \"Question on Descending SORTED qualifier to MERGE.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nThe ECL reference manual documents that the '-' descending indicator is allowed in the SORTED commend.\\n\\nHowever I find this '-' is not allowed when using SORTED in the MERGE command.\\n\\ne.g. the following, lifted from the reference manual and doctored, does not seem to be allowed.\\n\\nds1 := SORTED(DATASET([{1,'A'},{1,'B'},{1,'C'},{1,'D'},{1,'E'},\\n{1,'F'},{1,'G'},{1,'H'},{1,'I'},{1,'J'}],\\n{INTEGER1 number,STRING1 Letter}),\\nletter,number);\\nds2 := SORTED(DATASET([{2,'A'},{2,'B'},{2,'C'},{2,'D'},{2,'E'},\\n{2,'F'},{2,'G'},{2,'H'},{2,'I'},{2,'J'}],\\n{INTEGER1 number,STRING1 Letter}),\\nletter,number);\\nds3 := MERGE(ds1,ds2,SORTED(-letter,number));\\nSetDS := [ds1,ds2];\\nds4 := MERGE(SetDS,letter,number);\\n
\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-11-21 20:26:14\" },\n\t{ \"post_id\": 662, \"topic_id\": 168, \"forum_id\": 8, \"post_subject\": \"Re: Handling XML File Errors\", \"username\": \"ghalliday\", \"post_text\": \"That error should be generated when you are parsing an xml file that contains an unquoted ampersand - e.g., "fred & jim" rather than "fred & jim".\\n\\nIt should be possible to find the offending text by looking at the line/file offset that is provided in the error message.\\n\\nIf the text at that point doesn't contain an example like that then it needs some more investigation. (It is possible that the & is much earlier).\", \"post_time\": \"2011-11-29 21:15:52\" },\n\t{ \"post_id\": 654, \"topic_id\": 168, \"forum_id\": 8, \"post_subject\": \"Re: Handling XML File Errors\", \"username\": \"bforeman\", \"post_text\": \"Hi, normally the error implies some bad XML that is somewhere in the target file.\\nI've seen this in the past, and correcting the bad XML took care of the error. It's possible that the development team will ask to see your log files. If you could attach them here it would be helpful.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2011-11-29 19:44:17\" },\n\t{ \"post_id\": 632, \"topic_id\": 168, \"forum_id\": 8, \"post_subject\": \"Handling XML File Errors\", \"username\": \"chargil\", \"post_text\": \"I am trying to define and out a dataset from an XML file and I keep running across this error \\n\\nError: System error: 2: Graph[17], xmlread[18]: SLAVE 192.168.255.128:6600: Error - syntax error "missing ';'" [line 4710, file offset 1859172]\\nLogical filename = ~CLASS::CG::WORDNET::FULL\\nLocal fileposition = 0x80000000001C5E64\\nher impassive remoteness</USAGE><USAGE>h*ERROR*e remained impassive, showing neither in (0, 0), 2,
\\n\\nI am not sure why a missing semi-colon would cause problems.\\n\\nHere is an example of the type tag I'm trying to read:\\n<SYNSET><ID>ENG30-00001740-a</ID><POS>a</POS><SYNONYM><LITERAL>able<SENSE>1</SENSE></LITERAL></SYNONYM><ILR><TYPE>near_antonym</TYPE>ENG30-00002098-a</ILR><ILR><TYPE>be_in_state</TYPE>ENG30-05200169-n</ILR><ILR><TYPE>be_in_state</TYPE>ENG30-05616246-n</ILR><ILR><TYPE>eng_derivative</TYPE>ENG30-05200169-n</ILR><ILR><TYPE>eng_derivative</TYPE>ENG30-05616246-n</ILR><DEF>(usually followed by `to') having the necessary means or skill or know-how or authority to do something</DEF><USAGE>able to swim</USAGE><USAGE>she was able to program her computer</USAGE><USAGE>we were at last able to buy a car</USAGE><USAGE>able to get a grant for the project</USAGE></SYNSET>
\\n\\nAnd the record structures\\nSynonym_Layout\\t:= RECORD\\n\\tSTRING\\tLiteral\\t{XPATH('LITERAL')};\\n\\tSTRING\\tSense\\t\\t{XPATH('SENSE')};\\nEND;\\n\\nILR_Layout\\t:= RECORD\\n\\tSTRING\\tIlrType\\t{XPATH('TYPE')};\\nEND;\\n\\nSynset_Layout\\t:= RECORD\\n\\tSTRING\\tId\\t{XPATH('ID')};\\n\\tSTRING\\tPos\\t{XPATH('POS')};\\n\\tDATASET(Synonym_Layout)\\tSynonyms\\t{XPATH('SYNONYM')};\\n\\tDATASET(ILR_Layout)\\tIrls\\t{XPATH('ILR')};\\nEND;\\n\\nSynsets_Layout\\t:= RECORD\\n\\tDATASET(Synset_Layout)\\tSynsets\\t{XPATH('SYNSET')};\\nEND;\\n\\nEXPORT WordNet := DATASET('~CLASS::CG::WORDNET::FULL',Synsets_Layout,XML('SYNSETS'));\\n
\", \"post_time\": \"2011-11-24 18:34:14\" },\n\t{ \"post_id\": 648, \"topic_id\": 171, \"forum_id\": 8, \"post_subject\": \"Re: Date manipulation\", \"username\": \"sasi\", \"post_text\": \"Thank you. This is nice. \\nHope the new version comes with its API documentation and sample code. Just to know how to use those libraries.\\n\\nthanks\\nSasi.\", \"post_time\": \"2011-11-29 13:51:12\" },\n\t{ \"post_id\": 646, \"topic_id\": 171, \"forum_id\": 8, \"post_subject\": \"Re: Date manipulation\", \"username\": \"ghalliday\", \"post_text\": \"The next 3.4 version of the system (due to be released this month) contains a date module which should make life much easier.\\n\\nI'll reply more fully to your other post about a date library. But to answer this question, you would be able to say.\\n\\nDate.ToDaysSince1900(myDate) >= Date.ToDaysSince1900(Date.Today())-3;\", \"post_time\": \"2011-11-29 12:10:21\" },\n\t{ \"post_id\": 635, \"topic_id\": 171, \"forum_id\": 8, \"post_subject\": \"Date manipulation\", \"username\": \"sasi\", \"post_text\": \"Hello\\n\\n1. How to add 2 days with a date?\\n\\nI searched your site, programmer guide and reference guide. I could not find any good reference material with regard to date manipulation.\\n\\nMy problem:\\n\\nI want to filter data for the last 3 days. My data has date field.\\n\\nThanks in advance.\\nSasi.\", \"post_time\": \"2011-11-27 23:01:33\" },\n\t{ \"post_id\": 688, \"topic_id\": 175, \"forum_id\": 8, \"post_subject\": \"Re: Large nested datasets and efficiency\", \"username\": \"JimD\", \"post_text\": \"It is installed on Windows with the other Client Tools. \\nIt is also installed on Linux servers when you install the packages.\\n\\nYou can find it on a Linux server in /opt/HPCCSystems/bin/\\n\\nThe documentation for DFUPlus in the Client Tools.PDF. This is valid for either Windows or Linux versions of DFUPlus. \\n\\nHTH,\\n\\nJim\", \"post_time\": \"2011-11-30 20:40:29\" },\n\t{ \"post_id\": 687, \"topic_id\": 175, \"forum_id\": 8, \"post_subject\": \"Re: Large nested datasets and efficiency\", \"username\": \"rtaylor\", \"post_text\": \"1. I see it only in community edition version. Is it available in community edition only?\\n\\n2. Is it available on Linux?
\\n\\n1. DFUplus.exe is installed with the IDE in the /bin directory, so no, it is not just available in Community Edition.\\n\\n2. Since the IDE and Client Tools is available for download in Windows and several Linux versions, I would presume so.\", \"post_time\": \"2011-11-30 20:39:58\" },\n\t{ \"post_id\": 686, \"topic_id\": 175, \"forum_id\": 8, \"post_subject\": \"Re: Large nested datasets and efficiency\", \"username\": \"dmitriox\", \"post_text\": \"[quote="rtaylor":fsg92lmw]DFUplus.exe is documented in the Client Tools PDF, available here:\\n\\nhttp://hpccsystems.com/community/docs/e ... leinttools\\n(no, it is not mispelled -- that's the way the URL is )\\n\\nHTH,\\n\\nRichard\\n\\n1. I see it only in community edition version. Is it available in community edition only?\\n\\n2. Is it available on Linux?\", \"post_time\": \"2011-11-30 20:16:40\" },\n\t{ \"post_id\": 685, \"topic_id\": 175, \"forum_id\": 8, \"post_subject\": \"Re: Large nested datasets and efficiency\", \"username\": \"rtaylor\", \"post_text\": \"DFUplus.exe is documented in the Client Tools PDF, available here:\\n\\nhttp://hpccsystems.com/community/docs/e ... leinttools\\n(no, it is not mispelled -- that's the way the URL is
)\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2011-11-30 20:04:15\" },\n\t{ \"post_id\": 684, \"topic_id\": 175, \"forum_id\": 8, \"post_subject\": \"Re: Large nested datasets and efficiency\", \"username\": \"ghalliday\", \"post_text\": \"Richard Taylor can probably help you with the documentation. However typing dfuplus /? gives you a fairly comprehensive help.\\n\\nIt doesn't look like the fileservices despray command goes give you access to that option. The esp soap service does allow it to be configured though.\\n\\n
\\neclcc --version\\n
\\nwill display the version of the language and the version of the system. Version 3.0 was the first open source version - the previous (legacy) versions used a different numbering system. The row processing was changed between the last stable legacy system version and the open source version.\", \"post_time\": \"2011-11-30 16:56:21\" },\n\t{ \"post_id\": 680, \"topic_id\": 175, \"forum_id\": 8, \"post_subject\": \"Re: Large nested datasets and efficiency\", \"username\": \"dmitriox\", \"post_text\": \"[quote="ghalliday":xhr1nooo]\\n...\\nSee dfuplus despray with the /splitprefix option.\\n\\nEssentially you will need records in the form\\n[code]\\nmyRecord := RECORD\\n STRING filename;\\n DATA data;\\nEND;\\n\\nand then use something like /splitprefix=filename,filesize.\\n\\n1. Can you please refer me to the document where thus utility described? I'm despraing using the fileservises.despray() method, it there similar functionality?\\n2. How can I check the version (regarding you second post, where you mentioned 3.x - as I understood, this is not the IDE version)?\", \"post_time\": \"2011-11-30 15:31:13\" },\n\t{ \"post_id\": 670, \"topic_id\": 175, \"forum_id\": 8, \"post_subject\": \"Re: Large nested datasets and efficiency\", \"username\": \"ghalliday\", \"post_text\": \"I agree with Davi'ds comments about the record size. Generally the smaller the records the more efficient the processing is likely to be - but if you need large records to efficiently model your problem then use them.\\n\\nFor ECL users who are used to the legacy system, the 3.x series is much better at handling large variable length records.\\n\\n- Previously you needed to specify the maximum size that the record could take up, that size was allocated, and then shrunk back down to the actual size. The new mechanism is more dynamic.\\n- The rows in child datasets are now represented in thor, roxie and hthor as separate rows. This means there isn't a single very large allocation, but many more smaller allocations. It also significantly reduces the amount of data copied when child datasets are processed and copied in transforms.\", \"post_time\": \"2011-11-30 10:06:58\" },\n\t{ \"post_id\": 669, \"topic_id\": 175, \"forum_id\": 8, \"post_subject\": \"Re: Large nested datasets and efficiency\", \"username\": \"ghalliday\", \"post_text\": \"This isn't an answer to your original question, but...\\n\\nAn alternative to writing out lots of files in thor is to write out a single logical file, and then despray the file using the option to split it into separate physical files.\\n\\nSee dfuplus despray with the /splitprefix option.\\n\\nEssentially you will need records in the form\\n\\nmyRecord := RECORD\\n STRING filename;\\n DATA data;\\nEND;\\n
\\n\\nand then use something like /splitprefix=filename,filesize.\", \"post_time\": \"2011-11-30 09:59:54\" },\n\t{ \"post_id\": 665, \"topic_id\": 175, \"forum_id\": 8, \"post_subject\": \"Re: Large nested datasets and efficiency\", \"username\": \"dabayliss\", \"post_text\": \"So are you primarily collecting these results together just prior to writing them out?\\n\\nTo answer your principle question: pulling together very large variable length records will usually be slower than processing the pieces as smaller more homogenous units - but it still depends on what you are doing.\\n\\nSorry to be vague - but there is no 'best' answer here - if there was the compiler would enforce it!\\n\\nDavid\", \"post_time\": \"2011-11-30 00:22:33\" },\n\t{ \"post_id\": 651, \"topic_id\": 175, \"forum_id\": 8, \"post_subject\": \"Re: Large nested datasets and efficiency\", \"username\": \"dmitriox\", \"post_text\": \"David,\\n\\nSeems I have to give more details about what I'm trying to do:\\n\\n- The "book/page" structure is for illustrative purpose only, but still it's good enough to explain my situation. Only note that the order is not important.\\n\\n- Initially I have quite large dataset, each record of which have book name and the page of it. I need to despray it to set of flat files per book name .\\n\\nNatural solution in my understanding looks like the following:\\n\\n 1. Denormailize dataset to have one record per book with nested dataset of book's pages;\\n 2. Apply for each record - write child dataset to file matching the book name.\\n\\nAbove seems to me simple and straightforward. And if my understanding correct, parallel processing should be applied exactly on the top level, not within the record level (correct me if I'm wrong, please).\\n\\nIn procedural language, alternative solution will be to loop the file and for each record to write the page into it's corresponding file name.\\n\\nHowever, since we don't have a loop in the ECL, only possibility I see without denormalizing is to extract set of book names, and then for each one filter matching records of the source dataset, i. e.:\\n 1. Get vertical slice of source dataset - only the book names;\\n 2. Dedup it;\\n 3. For each record of above\\n 3a. - filter source data.\\n 3b. Get vertical slice of above;\\n 3c. Write result of of above.\\n\\nSo basically I'm trying to compare two way I've described above - it there any other solutions?\\n\\n\\nAnd BTW, couple more questions regarding above:\\n\\n - You said that record is the unit which won't be split. However, I create a new definition out of it, which will be the set of the records, will system still try to process as a single unit and to which point (I can apply "DISTRIBUTE" command to it anytime, isn't it?)? \\n\\n - Still I like to understand how the max size of the record will affect the efficiency of the system? ( For example, I expect normally up to 1 Mb recs but set max length to 100 Mb "just in case")\", \"post_time\": \"2011-11-29 16:35:29\" },\n\t{ \"post_id\": 644, \"topic_id\": 175, \"forum_id\": 8, \"post_subject\": \"Re: Large nested datasets and efficiency\", \"username\": \"dabayliss\", \"post_text\": \"This is the kind of question that can lead to raised voices and blood all over the floor...\\n\\nThe thing to remember is that the record is the fundamental unit of work in ECL. Parallelization happens at record boundaries, records are passed in to transforms, records are copied around.\\n\\nIF you are completely sure that your processing model is really processing a book at a time - then it may be a reasonable memory model.\\n\\nWhen we do work with documents (and we do a lot) - then we will usually go for a much lower unit of granularity (a paragraph is not unusual) - we then using linking fields to retain the outer structure of the documents ...\\n\\nDavid\", \"post_time\": \"2011-11-28 21:59:35\" },\n\t{ \"post_id\": 643, \"topic_id\": 175, \"forum_id\": 8, \"post_subject\": \"Large nested datasets and efficiency\", \"username\": \"dmitriox\", \"post_text\": \"To have a nested (child) dataset in the record is really useful option. However, there are cases when the child dataset can be really large.\\n\\nFor example, say I have a book as a main record and pages from it as a nested dataset. In this case, the maximum size size of the record is huge and even hard to set a limit.\\n\\nSo, is there any general rule of thumb or any restriction on the record size? \\nIf I expect in average 10 Mb records but sometimes it can be up to 1 Gb, still can I be sure system will work efficiently? Anyway, what is the goal of limiting the maximum record size (especially keeping in mind it just 4K by default)?\", \"post_time\": \"2011-11-28 21:26:48\" },\n\t{ \"post_id\": 708, \"topic_id\": 182, \"forum_id\": 8, \"post_subject\": \"Re: Trying to understand AGGREGATE\", \"username\": \"ghalliday\", \"post_text\": \"I have released fixes for two different problems. I would expect them to go into 3.4.2\", \"post_time\": \"2011-12-05 16:54:46\" },\n\t{ \"post_id\": 697, \"topic_id\": 182, \"forum_id\": 8, \"post_subject\": \"Re: Trying to understand AGGREGATE\", \"username\": \"ghalliday\", \"post_text\": \"Your problem with AGGREGATE looks like a bug in the compiler. \\nI've added issue #1094 to github, and will investigate.\\n\\nAGGREGATE is a relatively recent addition to the language.\\n\\nYou are correct that you can do the same thing with ROLLUP. There are two main advantages of AGGREGATE over rollup:\\n- the output record doesn't need to match the input record\\n- the implementations in the different engines are designed to minimize the amount of record copying that takes place.\\n\\nOptimizing appending records to a dataset has already been special cased, and I'm planning to do the same for string concatenation when I get the time. (I've duplicated an existing issue into issue #1095 to make that more visible.)\", \"post_time\": \"2011-12-05 10:13:19\" },\n\t{ \"post_id\": 695, \"topic_id\": 182, \"forum_id\": 8, \"post_subject\": \"Re: Trying to understand AGGREGATE\", \"username\": \"Rob Pelley\", \"post_text\": \"Thanks, yes your modified code seems to work but I thought that the advantage of AGGREGATE was that you don't have to SORT or GROUP beforehand. I think I'll stick to ROLLUP until I understand more about AGGREGATE ...\", \"post_time\": \"2011-12-02 21:45:43\" },\n\t{ \"post_id\": 693, \"topic_id\": 182, \"forum_id\": 8, \"post_subject\": \"Re: Trying to understand AGGREGATE\", \"username\": \"aintnomyth\", \"post_text\": \"This example from the help file doesn't work for me either but here's a modified version that seems to work, hope that helps.\\n\\n inRecord := RECORD \\n UNSIGNED box; \\n STRING text{MAXLENGTH(10)}; \\n END; \\n\\n inTable := DATASET( [\\n\\t\\t\\t\\t\\t{1, 'A'},\\n\\t\\t\\t\\t\\t{2, 'B'},\\n\\t\\t\\t\\t\\t{3, 'C'},\\n\\t\\t\\t\\t\\t{1, 'D'},\\n\\t\\t\\t\\t\\t{2, 'E'} ] , inRecord);\\n \\n //Example 1: Produce a list of box contents by concatenating a string:\\n \\n outRecord1 := RECORD \\n UNSIGNED box; \\n STRING contents{MAXLENGTH(200)}; \\n END; \\n outRecord1 t1(inRecord l, outRecord1 r) := TRANSFORM \\n SELF.box := l.box; \\n SELF.contents := r.contents + IF(r.contents <> '', ',', '') + l.text; \\n END; \\n \\n outRecord1 t2(outRecord1 r1, outRecord1 r2) := TRANSFORM \\n SELF.box := r1.box; \\n SELF.contents := r1.contents + ',' + r2.contents; \\n END; \\n\\n\\t\\t\\t\\tgroupIn := GROUP( SORT(inTable,BOX), BOX);\\n\\t\\t\\t\\n OUTPUT(AGGREGATE(groupIn, outRecord1, t1(LEFT, RIGHT), t2(RIGHT1, RIGHT2) ));\\n \\n //This example could eliminate the merge transform if the SELF.contents expression in\\n //the t1 TRANSFORM were simpler, like this:\\n // SELF.contents := r.contents + ',' + l.text;\\n //which would make the AGGREGATE function like this:\\n // OUTPUT(AGGREGATE(inTable, outRecord1, t1(LEFT, RIGHT), LEFT.box));\\n \\n //Example 2: A PIGMIX style grouping operation:\\n \\n outRecord2 := RECORD \\n UNSIGNED box; \\n DATASET(inRecord) items; \\n END; \\n outRecord2 t3(inRecord l, outRecord2 r) := TRANSFORM \\n SELF.box := l.box; \\n SELF.items:= r.items + l; \\n END; \\n OUTPUT(AGGREGATE(inTable, outRecord2, t3(LEFT, RIGHT), LEFT.box));\\n
\\n\\n\\nResult 1:\\n1\\tA,D\\n2\\tB,E\\n3\\tC
\", \"post_time\": \"2011-12-02 20:22:51\" },\n\t{ \"post_id\": 692, \"topic_id\": 182, \"forum_id\": 8, \"post_subject\": \"Re: Trying to understand AGGREGATE\", \"username\": \"Rob Pelley\", \"post_text\": \"OK, I have answered the second question ... it's much easier to achieve the desired result using ROLLUP ... see below ... but I'd still like to know why I am getting my syntax error with the AGGREGATE function ...\\n\\n\\nmyRec := RECORD\\n UNSIGNED1 ID;\\n STRING text;\\nEND;\\n\\nmyDS := DATASET([{1,'A'},{1,'B'},{2,'C'},{2,'D'},{3,'E'},{2,'F'}],myRec);\\nmySortedDS := SORT(myDS,ID);\\n\\nmyRec XF(myRec L, myRec R) := TRANSFORM\\n SELF.ID := L.ID;\\n SELF.text := L.text + R.text;\\nEND;\\n\\n\\nOUTPUT(ROLLUP(mySortedDS,LEFT.ID=RIGHT.ID,XF(LEFT,RIGHT)));\\n
\", \"post_time\": \"2011-12-02 20:15:55\" },\n\t{ \"post_id\": 691, \"topic_id\": 182, \"forum_id\": 8, \"post_subject\": \"Trying to understand AGGREGATE\", \"username\": \"Rob Pelley\", \"post_text\": \"I'm trying to understand the AGGREGATE function ... it seems to be pretty complex!\\n\\nTo test AGGREGATE I have set up a simple inline dataset and I'm trying to produce an aggregated result set whereby the results are GROUPed by ID and the text strings related to each GROUP are concatenated (this is similar to the example in the ECL Language Reference.\\n\\nSo, for the input ...\\n\\n1 A\\n2 B\\n3 C\\n1 D\\n2 E\\n\\nI want the output ...\\n\\n1 AD\\n2 BE\\n3 C\\n\\nThe following code attempts to test the functionality but I keep getting the error : Error: Unknown identifier "LEFT" (23, 78) ... line 23 refers to the OUTPUT statement ...\\n\\n\\ninRec := RECORD\\n UNSIGNED1 ID;\\n STRING text;\\nEND;\\n\\nmyDS := DATASET([{1,'A'},{1,'B'},{2,'C'},{2,'D'},{3,'E'},{2,'F'}],inRec);\\n\\noutRec := RECORD\\n UNSIGNED1 ID;\\n STRING contents;\\nEND;\\n\\noutRec mainXF(inRec L, outRec R) := TRANSFORM\\n SELF.ID := L.ID;\\n SELF.contents := R.contents + IF(R.contents <> '', ',', '') + L.text;\\nEND;\\n\\noutRec mergeXF(outRec R1, outRec R2) := TRANSFORM\\n SELF.ID := R1.ID;\\n SELF.contents := R1.contents + ',' + R2.contents;\\nEND;\\n\\nOUTPUT(AGGREGATE(myDS, outRec, mainXF(LEFT, RIGHT), mergeXF(RIGHT1, RIGHT2), LEFT.ID));\\n
\\n\\nCan anyone throw any light on this error? Is there a simpler way of achieving the desired results?\\n\\nThanks in advance ...\\n\\nRob.\", \"post_time\": \"2011-12-02 19:57:54\" },\n\t{ \"post_id\": 753, \"topic_id\": 188, \"forum_id\": 8, \"post_subject\": \"Re: unicode default for THOR files?\", \"username\": \"bforeman\", \"post_text\": \"Hi,\\n\\nGood analysis and thanks for following up with the feedback.\\nMerry Christmas and Happy Holidays!\\n\\n\\nBob\", \"post_time\": \"2011-12-15 16:16:38\" },\n\t{ \"post_id\": 752, \"topic_id\": 188, \"forum_id\": 8, \"post_subject\": \"Re: unicode default for THOR files?\", \"username\": \"aintnomyth\", \"post_text\": \"I found my problem here, the diag_cd field was the last field on the file. The original DATASET was defined with TERMINATOR('\\\\n') instead of TERMINATOR(['\\\\n','\\\\r\\\\n','\\\\n\\\\r']) so a line delimiter byte was carried through.\", \"post_time\": \"2011-12-15 16:09:11\" },\n\t{ \"post_id\": 742, \"topic_id\": 188, \"forum_id\": 8, \"post_subject\": \"Re: unicode default for THOR files?\", \"username\": \"aintnomyth\", \"post_text\": \"Thanks for the reply, I changed the output back to THOR (from CSV) and changed the type from VARSTRING7 to STRING7 but no luck.\\n\\nInterestingly, I tried this code but got an error:\\n\\nError: Incompatible types: can not assign unicode4 to String (4, 23), 2007,\\ndiags := mockdata.diags;\\n\\nTYPEOF(diags.diag_cd) diagCodeCriteria := 'V502';\\nTYPEOF(diags.diag_cd) diagCodeCriteria2 := U'V502';\\n\\ndiags1 := diags(diag_cd = diagCodeCriteria);\\ndiags2 := diags(diag_cd = diagCodeCriteria2);\\n\\nOUTPUT(diags);\\nOUTPUT(diags1);\\nOUTPUT(diags2);
\\n\\n\\nAnd this code runs but since diagCodeCriteria is a STRING and not unicode (as verified by the above error), it seems logical that the diags4 attribute would return records, but it doesn't. It's puzzling. For now my only work around is to use the Std.Uni.CleanSpaces function.\\n\\ndiags := mockdata.diags;\\nTYPEOF(diags.diag_cd) diagCodeCriteria := 'V502';\\ndiagCodeCriteria2 := U'V502';\\n\\ndiags1 := diags( TRIM(diag_cd,LEFT,RIGHT) = diagCodeCriteria);\\ndiags2 := diags( TRIM(diag_cd,LEFT,RIGHT) = diagCodeCriteria2);\\ndiags3 := diags( uni.cleanspaces(diag_cd) = diagCodeCriteria);\\ndiags4 := diags( str.cleanspaces(diag_cd) = diagCodeCriteria);\\nOUTPUT(diags);\\nOUTPUT(diags1);\\nOUTPUT(diags2);\\nOUTPUT(diags3);\\nOUTPUT(diags4);
\\n\\nAnd the outputs are:\\n100+ Records (source)\\n0 Records\\n0 Records\\n100+ Records\\n0 Records\", \"post_time\": \"2011-12-09 18:31:34\" },\n\t{ \"post_id\": 741, \"topic_id\": 188, \"forum_id\": 8, \"post_subject\": \"Re: unicode default for THOR files?\", \"username\": \"ghalliday\", \"post_text\": \"That is probably the problem - varstrings were treated inconsistently with strings.\\n\\nThe code will be more efficient if you use a string7. If you want a variable length string you could use a string (no length) instead.\\n\\nThat has now been fixed (in 3.4 I think).\\n\\nSee issue #473 fixed by pull request #595.\", \"post_time\": \"2011-12-09 17:43:16\" },\n\t{ \"post_id\": 740, \"topic_id\": 188, \"forum_id\": 8, \"post_subject\": \"Re: unicode default for THOR files?\", \"username\": \"aintnomyth\", \"post_text\": \"From the Logical File Details page in ECL Watch: varstring7 diag_cd;\", \"post_time\": \"2011-12-09 17:16:29\" },\n\t{ \"post_id\": 739, \"topic_id\": 188, \"forum_id\": 8, \"post_subject\": \"Re: unicode default for THOR files?\", \"username\": \"ghalliday\", \"post_text\": \"What is the datatype of diag_cd?\", \"post_time\": \"2011-12-09 16:10:48\" },\n\t{ \"post_id\": 738, \"topic_id\": 188, \"forum_id\": 8, \"post_subject\": \"Re: unicode default for THOR files?\", \"username\": \"aintnomyth\", \"post_text\": \"Yes, this works:\\n(TRIM(diag_cd)[1..4] = 'V502')
\\n\\nAnd this too:\\n( diag_cd[1..4] = 'V502' )
\\n\\nAnd this too:\\n( diag_cd = U'V502')
\\n\\n\\nThe the original file was a basic CSV text file. In ECL watch that logical file shows "Format: csv". I'm not adding any unicode fields to the layouts to my knowledge but I am using record inheritance:\\n newLayout := RECORD(baseLayout)\\n extraFields...\\nEND;
\\n\\nOne of my problems was I could not open a dataset on the THOR file using the newLayout above. I had to make another layout like this:\\n newLayout2 := RECORD\\n baseLayout;\\n extraFields...\\nEND;
\", \"post_time\": \"2011-12-09 14:47:12\" },\n\t{ \"post_id\": 737, \"topic_id\": 188, \"forum_id\": 8, \"post_subject\": \"Re: unicode default for THOR files?\", \"username\": \"bforeman\", \"post_text\": \"Thinking about your filter issue, is the 'V502' the first four characters in the search string?\\n\\nDid you try something like:\\n\\n (TRIM(diag_cd)[1..4] = 'V502')\", \"post_time\": \"2011-12-09 14:39:27\" },\n\t{ \"post_id\": 736, \"topic_id\": 188, \"forum_id\": 8, \"post_subject\": \"Re: unicode default for THOR files?\", \"username\": \"bforeman\", \"post_text\": \"I see in the docs:\\n\\n"If none of the ASCII, EBCDIC, or UNICODE options are specified, the default input is\\nin ASCII format with any UNICODE fields in UTF8 format."\\n\\nAre there any unicode fields in your record definition?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2011-12-09 14:20:52\" },\n\t{ \"post_id\": 733, \"topic_id\": 188, \"forum_id\": 8, \"post_subject\": \"Re: unicode default for THOR files?\", \"username\": \"aintnomyth\", \"post_text\": \"After some more poking around I found this in the Logical File Details:\\nFormat:\\tutf8n
\\n\\n\\n[edit]\\nThis is after I OUTPUT the file in CSV format. Still getting the same filter issues though.\", \"post_time\": \"2011-12-08 20:54:53\" },\n\t{ \"post_id\": 732, \"topic_id\": 188, \"forum_id\": 8, \"post_subject\": \"unicode default for THOR files?\", \"username\": \"aintnomyth\", \"post_text\": \"I'm running into an odd problem doing a simple string filter on a THOR file. I saved the file using this code:\\nOUTPUT(DiagsOut, {DiagsOut;}, fName, COMPRESSED, OVERWRITE);
\\n\\nMy first problem was that I could not create a DATASET for the file using the same {DiagsOut;} record type that was used to create the file (sad face), but I'm passed that issue now so...\\n\\nThe file has a STRING7 field called diag_cd, values can be 4 to 7 digits in length.\\n\\nIMPORT std.Str;\\nIMPORT std.Uni;\\n\\nthisLayout := {DiagsOut;};\\na := '~thor::fname';\\nb := DATASET(a, thisLayout, thor);\\n\\nresult1 := b( diag_cd = 'V502' );\\nresult2 := b( str.CleanSpaces(diag_cd) = 'V502' );\\nresult3 := b( uni.CleanSpaces(diag_cd) = 'V502' );\\n
\\n\\nresult1 = 0 records \\nresult2 = 0 records\\nresult3 = 100+ records\\n\\nIs this normal?\", \"post_time\": \"2011-12-08 19:54:03\" },\n\t{ \"post_id\": 747, \"topic_id\": 190, \"forum_id\": 8, \"post_subject\": \"Re: "Dataset too large to output" and MP Link Closed Error\", \"username\": \"bforeman\", \"post_text\": \"So you are not using OUTPUT? In that case it might be a bug, let me ask the development team.\", \"post_time\": \"2011-12-12 23:10:50\" },\n\t{ \"post_id\": 746, \"topic_id\": 190, \"forum_id\": 8, \"post_subject\": \"Re: "Dataset too large to output" and MP Link Closed Error\", \"username\": \"chargil\", \"post_text\": \"I don't use ALL at all (heh). I however, have a function that takes a large dataset as a parameter. The dataset is just over 10 MB in fact (although ideally it would be quite a bit larger than that). Filtering out some values before using the function lets everything run without problems but I was surprised to learn that such a limit existed.\", \"post_time\": \"2011-12-12 22:13:54\" },\n\t{ \"post_id\": 745, \"topic_id\": 190, \"forum_id\": 8, \"post_subject\": \"Re: "Dataset too large to output" and MP Link Closed Error\", \"username\": \"bforeman\", \"post_text\": \"How are you outputing the dataset? Are you trying to write to a file or are you just outputing to the ECL IDE? Are you using the ALL option on OUTPUT?\\n\\nA student in one of my classes was using ALL, and saw that error. Removing it limited the result to the IDE and the error went away.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2011-12-12 17:25:34\" },\n\t{ \"post_id\": 744, \"topic_id\": 190, \"forum_id\": 8, \"post_subject\": \"Re: MP link closed error\", \"username\": \"chargil\", \"post_text\": \"I don't have access to the cluster that I'm using so I installed the latest Virtual Image. However, the error I get now is this: \\n\\nError: System error: 10099: Graph[119], workunitwrite[123]: Dataset too large to output to workunit (limit 10) megabytes, in result (name=auto8), Master exception (0, 0), 10099,
\\n\\nEDIT: I changed some things around with my datasets and I get my original MP Link Closed error again.\", \"post_time\": \"2011-12-10 23:46:13\" },\n\t{ \"post_id\": 743, \"topic_id\": 190, \"forum_id\": 8, \"post_subject\": \""Dataset too large to output" and MP Link Closed Error\", \"username\": \"chargil\", \"post_text\": \"I'm getting errors when I run ECL on THOR. I'm not sure if it's my code or if it's the cluster itself that's having issues. \\n\\nError: System error: 4: MP link closed (10.15.2.206:6600) (0, 0), 4,
\\n\\nThis does not occur when I run on hthor.\", \"post_time\": \"2011-12-10 17:45:15\" },\n\t{ \"post_id\": 21293, \"topic_id\": 199, \"forum_id\": 8, \"post_subject\": \"Re: 0: System error: 0: Graph Result 1 accessed before it is\", \"username\": \"janet.anderson\", \"post_text\": \"I am getting a similar but different error: Error: System error: 99: Graph Result 7 accessed before it is created (in Graph G214 E216). I don't understand the error and I don't see any graphs/subgraphs with IDs like G214 or E216. For internal LN reviewers, my workunit is W20180322-142502 on Dataland. \\n\\nI do not get this error if I run the same code with a simple dataset that I create inline.\", \"post_time\": \"2018-03-22 18:42:47\" },\n\t{ \"post_id\": 5111, \"topic_id\": 199, \"forum_id\": 8, \"post_subject\": \"Re: 0: System error: 0: Graph Result 1 accessed before it is\", \"username\": \"Rahul Jain\", \"post_text\": \"Do you have more than 1 output statement. If yes try removing them and run again.\\nI got same error some time back due to below logic -\\n\\n//attr1 and attr2 are functions in some other attribute.\\nMAP (bool_condition =>\\nProject(aatr1.x(parm1,parm2),Layout_prj1), \\nProject(aatr2.y(parm1,parm2),Layout_prj1)\\n);\\n\\nAnd I kept 1 output statement in Project2, while due to bool_condition the project2 was not called. Once I removed that output statement the error was gone.\\n\\nThis can be temporary fix for you.\", \"post_time\": \"2014-01-08 18:20:47\" },\n\t{ \"post_id\": 778, \"topic_id\": 199, \"forum_id\": 8, \"post_subject\": \"Re: 0: System error: 0: Graph Result 1 accessed before it is\", \"username\": \"ghalliday\", \"post_text\": \"Errors like that are normally caused by a problem in the code generator or one of the engines.\\n\\nIs this roxie/hthor/thor? It looks like thor from the source.\\n\\nIf you can provide an archive of your query that would help me debug the cause.\\n\\n(You can email me direct at ghalliday@hpccsystems.com)\", \"post_time\": \"2012-01-04 18:40:10\" },\n\t{ \"post_id\": 777, \"topic_id\": 199, \"forum_id\": 8, \"post_subject\": \"Re: 0: System error: 0: Graph Result 1 accessed before it is\", \"username\": \"bforeman\", \"post_text\": \"Based on the error I am just making an educated guess, but it looks like you are attempting to access the Results View prior to the actual results returned by the query, so something in your ECL code is causing a significant delay. Could it also be possible that you lost your connection to the target cluster? \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-01-04 13:57:33\" },\n\t{ \"post_id\": 775, \"topic_id\": 199, \"forum_id\": 8, \"post_subject\": \"0: System error: 0: Graph Result 1 accessed before it is cre\", \"username\": \"shriram.soni\", \"post_text\": \"What are the scenario when this error occurs?\\n0: System error: 0: Graph Result 1 accessed before it is created\", \"post_time\": \"2012-01-04 12:37:15\" },\n\t{ \"post_id\": 798, \"topic_id\": 200, \"forum_id\": 8, \"post_subject\": \"Re: ECL in Thor vs hThor\", \"username\": \"aintnomyth\", \"post_text\": \"Try removing any LOCAL options and re-run the thor job. If the results match then something isn't distributed properly upstream.\", \"post_time\": \"2012-01-06 23:22:29\" },\n\t{ \"post_id\": 783, \"topic_id\": 200, \"forum_id\": 8, \"post_subject\": \"Re: ECL in Thor vs hThor\", \"username\": \"bforeman\", \"post_text\": \"In theory, there shouldn't be a difference. HThor is simply a single node process, versus your THOR which is using multiple nodes. perhaps it's how you are grouping and what you are doing with those groups, but we won't know for sure until we can look at some sample code.\", \"post_time\": \"2012-01-05 14:24:37\" },\n\t{ \"post_id\": 780, \"topic_id\": 200, \"forum_id\": 8, \"post_subject\": \"ECL in Thor vs hThor\", \"username\": \"shriram.soni\", \"post_text\": \"Hello,\\nWe have 4 nodes and running same ECL code in hThor and Thor. The results are different in both cases. We are using Rollup and group commands in our code. Can you please help in understanding why this difference.\", \"post_time\": \"2012-01-05 13:14:22\" },\n\t{ \"post_id\": 827, \"topic_id\": 208, \"forum_id\": 8, \"post_subject\": \"Re: Query on Joining sets of datasets.\", \"username\": \"rtaylor\", \"post_text\": \"Glad she likes them! You know how to get hold of me if she wants more... \", \"post_time\": \"2012-01-13 19:06:01\" },\n\t{ \"post_id\": 826, \"topic_id\": 208, \"forum_id\": 8, \"post_subject\": \"Re: Query on Joining sets of datasets.\", \"username\": \"Allan\", \"post_text\": \"Thanks Richard,\\n\\nBy the way, Aly over moon about necklaces
\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-01-13 19:01:01\" },\n\t{ \"post_id\": 824, \"topic_id\": 208, \"forum_id\": 8, \"post_subject\": \"Re: Query on Joining sets of datasets.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nThanks for the heads-up on the newline problem (now fixed for the next doc release).\\n\\nThe LEFT OUTER issue is puzzling to me, too. IMO, both Ds2 and Ds4 should be matches for the A record, making the number of matching records 3, but my result is exactly the same as yours. Therefore, I will report his as a bug.\\n\\nRichard\", \"post_time\": \"2012-01-13 16:19:17\" },\n\t{ \"post_id\": 821, \"topic_id\": 208, \"forum_id\": 8, \"post_subject\": \"Query on Joining sets of datasets.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI'm working though this section of the ECL reference manual (page 200) and am attempting to understand the output generated for the 'LEFT OUTER' example.\\nI reproduce the example given in the manual below:\\n
\\nRec := RECORD,MAXLENGTH(4096)\\n STRING1 Letter;\\n UNSIGNED1 DS;\\n UNSIGNED1 Matches := 0;\\n UNSIGNED1 LastMatch := 0;\\n SET OF UNSIGNED1 MatchDSs := [];\\nEND;\\n\\nds1 := DATASET([{'A',1},{'B',1},{'C',1},{'D',1},{'E',1}],Rec);\\nds2 := DATASET([{'A',2},{'B',2},{'H',2},{'I',2},{'J',2}],Rec);\\nds3 := DATASET([{'B',3},{'C',3},{'M',3},{'N',3},{'O',3}],Rec);\\nds4 := DATASET([{'A',4},{'B',4},{'R',4},{'S',4},{'T',4}],Rec);\\nds5 := DATASET([{'B',5},{'V',5},{'W',5},{'X',5},{'Y',5}],Rec);\\nSetDS := [ds1,ds2,ds3,ds4,ds5];\\n\\nRec XF(Rec L,DATASET(Rec) Matches) := TRANSFORM\\n SELF.Matches := COUNT(Matches);\\n\\t\\tSELF.LastMatch := MAX(Matches,DS);\\n\\t\\tSELF.MatchDSs := SET(Matches,DS);\\n\\t\\tSELF := L;\\nEND;\\n\\nj1 := JOIN(SetDS,STEPPED(LEFT.Letter=RIGHT.Letter),XF(LEFT,ROWS(LEFT)),SORTED(Letter));\\nj2 := JOIN(SetDS,STEPPED(LEFT.Letter=RIGHT.Letter),XF(LEFT,ROWS(LEFT)),SORTED(Letter),LEFT OUTER);\\nj3 := JOIN(SetDS,STEPPED(LEFT.Letter=RIGHT.Letter),XF(LEFT,ROWS(LEFT)),SORTED(Letter),LEFT ONLY);\\nj4 := JOIN(SetDS,STEPPED(LEFT.Letter=RIGHT.Letter),XF(LEFT,ROWS(LEFT)),SORTED(Letter),MOFN(3));\\nj5 := JOIN(SetDS,STEPPED(LEFT.Letter=RIGHT.Letter),XF(LEFT,ROWS(LEFT)),SORTED(Letter),MOFN(3,4));\\n\\nOUTPUT(j1,NAMED('inner'));\\nOUTPUT(j2,NAMED('left_outer'));\\nOUTPUT(j3,NAMED('left_only'));\\nOUTPUT(j4,NAMED('MOFN_3'));\\nOUTPUT(j5,NAMED('MOFN_3_4'));\\n
\\n\\nThe results I get for 'LEFT OUTER' are:\\nLetter ds matches lastmatch Matches Item\\nA 1 2 4 1,4\\nB 1 5 5 1,2,3,4,5\\nC 1 2 3 1,3\\nD 1 1 1 1\\nE 1 1 1 1\\n
\\nFor 'LEFT OUTER' the manual says:\\nAt least one record for every record in the first dataset in the setofdatasets.\\n\\nSo for the 'A' row, if set 4 was selected why was not set 2 also selected given set 2 also has a 'A' record in it. \\nConversely if set 2 should not be selected why is set 4 being selected.\\nSomething seems wrong whichever way you look at it.\\n\\nAs an aside, the manuals example is missing a few newlines.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-01-13 12:22:41\" },\n\t{ \"post_id\": 836, \"topic_id\": 210, \"forum_id\": 8, \"post_subject\": \"Re: Allocating Space in BeginC++\", \"username\": \"ghalliday\", \"post_text\": \"A variable length string is represented as a 4 byte little endian length followed by the string.\\n\\nSo [100, 'Test', 0.0] would be\\n\\n8 byte little-engian integer\\n4 byte length : 04 00 00 00\\n4 bytes of text: Test\\n\\n8 byte real value\\n\\nIf you're returning a dataset you'll either have to precalculate the length of all the strings, and then allocate, or build the result up in a dynamically resied buffer and return at the end.\\n\\nThere is also the option of generating a link counted dataset. In that case the result is returned as an array of rows. See ecl/regress/stream.ecl(46) in the github sources for an example ..\", \"post_time\": \"2012-01-16 17:35:53\" },\n\t{ \"post_id\": 829, \"topic_id\": 210, \"forum_id\": 8, \"post_subject\": \"Allocating Space in BeginC++\", \"username\": \"nvasil\", \"post_text\": \"I want to write a custom C++ function that returns a dataset of a simple record\\n\\nMyRec := RECORD\\n INTEGER i;\\n STRING id;\\n REAL value;\\nEND;\\n\\nHow do I allocate space inside BEGINC++ module? \\nIn the documentation the only example suggests something like that\\nchar * out = (char *)rtlMalloc(len);\\nIf I want to generate a dataset of 10 records, what is the value of len 10x?\\nI could possibly figure out how to do it if I only had REALs of Integers, but how can I handle STRINGS that have variable length?\", \"post_time\": \"2012-01-15 22:37:14\" },\n\t{ \"post_id\": 839, \"topic_id\": 214, \"forum_id\": 8, \"post_subject\": \"Re: Data saved in CSV is not proper (count) as shows In ECL \", \"username\": \"bforeman\", \"post_text\": \"Are you despraying the file to the landing zone? All you need to do is to Browse the Logical File, select the file and press the gray button just to the right of the checkbox. From there you will see the despray option.\\n\\nHope this helps,\\n\\nBob\", \"post_time\": \"2012-01-17 16:16:21\" },\n\t{ \"post_id\": 837, \"topic_id\": 214, \"forum_id\": 8, \"post_subject\": \"Data saved in CSV is not proper (count) as shows In ECL IDE\", \"username\": \"Apurv.Khare\", \"post_text\": \"Hi, \\nWe are having 4 nodes clustre and running a file on thor, while we are saving it in Landing Zone we are getting difference in count of records ,for Ex. It shows 20,457 rows in ECL IDE but saving it in landing zone after distributing it we are having only half the records in CSV, and .TMP fle of such name thtmp21035_7__partial.tmp gets created.\\n\\nHow can i have my output command to collate these two in one??\", \"post_time\": \"2012-01-17 04:02:01\" },\n\t{ \"post_id\": 846, \"topic_id\": 215, \"forum_id\": 8, \"post_subject\": \"Re: Asynchronous SOAP call\", \"username\": \"DSC\", \"post_text\": \"[quote="bforeman":3mnrd0tx]What about nesting the SOAPCALL in a SEQUENTIAL statement?\\n\\nThe SOAPCALL isn't the problem, it's the SOAP call generated by the external service that executes the ECL code. That execution is synchronous, returning only after ECL does its bit, and that is what I want to turn asynchronous.\\n\\nOne solution is to create a stand-alone service that acts as a middleman, returning immediately to the first caller while also hanging around for ECL to finish, but that's a bit ugly.\", \"post_time\": \"2012-01-18 14:13:02\" },\n\t{ \"post_id\": 845, \"topic_id\": 215, \"forum_id\": 8, \"post_subject\": \"Re: Asynchronous SOAP call\", \"username\": \"bforeman\", \"post_text\": \"What about nesting the SOAPCALL in a SEQUENTIAL statement?\", \"post_time\": \"2012-01-18 14:08:22\" },\n\t{ \"post_id\": 841, \"topic_id\": 215, \"forum_id\": 8, \"post_subject\": \"Asynchronous SOAP call\", \"username\": \"DSC\", \"post_text\": \"Is there a way to make ECL code, published via the IDE, operate asynchronously?\\n\\nI'm working on a process that generates an XML data file based on an arbitrarily-large raw input file. The file would be transferred to a drop zone via an external service, and I was hoping to simply kick off the thor process and then have it issue a SOAPCALL when it was complete (and have another external process pick up the generated and desprayed XML file). I have working code, but it's synchronous -- the original SOAP call completes only when the XML file has been generated. I'm hoping there is a simple option somewhere that I've overlooked.\\n\\nThanks!\", \"post_time\": \"2012-01-17 19:45:13\" },\n\t{ \"post_id\": 859, \"topic_id\": 217, \"forum_id\": 8, \"post_subject\": \"Re: Unwanted results\", \"username\": \"DSC\", \"post_text\": \"Richard, thanks again for all your help with this. I truly appreciate the insight and explanations. Have a great weekend!\\n\\nDan\", \"post_time\": \"2012-01-20 19:17:37\" },\n\t{ \"post_id\": 858, \"topic_id\": 217, \"forum_id\": 8, \"post_subject\": \"Re: Unwanted results\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\nDoes it then follow that this overly-verbose code:\\n...\\nis actually an OUTPUT under the covers? Because I do see a TRUE result, which I find extremely confusing.
Well, it is not an OUTPUT, but it IS an action that has a value to return (the boolean value TRUE) as opposed to an action that just does something (like spray) and does not directly return a value to the workunit.\\nAlso, you may want to modify the documentation for the file parameter of the OUTPUT statement.
Point taken. I have now re-written that text to say:\\nOptional. The logical name of the file to write the records to. See the Scope & Logical Filenames section of the Language Reference for more on logical filenames. If omitted, the formatted data stream only returns to the command issuer (command line or IDE) and is not written to a disk file.
You will eventually see this in the Language Reference (whenever a new build is produced that includes updated docs).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-01-20 19:08:46\" },\n\t{ \"post_id\": 857, \"topic_id\": 217, \"forum_id\": 8, \"post_subject\": \"Re: Unwanted results\", \"username\": \"DSC\", \"post_text\": \"[quote="rtaylor":18gozqeg]Several of the actions in your code do not produce output results (spray and despray, etc.) but you DO have two OUTPUT actions -- one that writes the file to disk and one that outputs the filename, and THAT's why you have the "extra" result.\\n\\nOK, that makes sense. Does it then follow that this overly-verbose code:\\n\\nSHARED foo() := FUNCTION\\n RETURN TRUE;\\nEND;
\\n\\nis actually an OUTPUT under the covers? Because I do see a TRUE result, which I find extremely confusing.\\n\\nAlso, you may want to modify the documentation for the file parameter of the OUTPUT statement. It currently reads:\\n\\nOptional. The logical name of the file to write the records to. See the Scope & Logical Filenames article in the Programmer's Guide for more on logical filenames. If omitted, the formatted data stream returns to the command issuer (command line or Query Builder).
\\n\\nThat implies, to me at least, that including a value for the file parameter would not return the data stream to the issuer (IDE, SOAP caller, etc.). That was my assumption when I wrote the code, which led to all of the typing in this forum.\\n\\nCheers!\\n\\nDan\", \"post_time\": \"2012-01-20 17:04:31\" },\n\t{ \"post_id\": 856, \"topic_id\": 217, \"forum_id\": 8, \"post_subject\": \"Re: Unwanted results\", \"username\": \"rtaylor\", \"post_text\": \"OK, if you're seeing the extra result, then it is deliberate. A colleague just emailed me this comment:\\n\\nNot sure what he’s expecting. Looks like he’s doing an OUTPUT of a file, in addition to an OUTPUT of the string filename, and somehow in his work is seeing both results. I’m not sure how he’s accessing the results, but I don’t know of a way a workunit is going to produce a file and NOT have it show up in results. If he’s doing some soap call to query WUDetails, for example, and parsing the results section (just thinking out loud), he could use NAMED() for the string output to locate it directly or parse the filename. But, he didn’t really describe how he’s getting those results from the external environment. For all I know, there’s a way to get results back via ECLPlus (in addition to the WUID).
\\n\\nIOW, what he's reminding me of is that, since you're actually doing multiple actions in your code, you -should- be getting a separate result for each of those actions. A single workunit may produce multiple results,like this:\\n\\nOUTPUT('Fred');\\nOUTPUT('George');
\\nRunning this code will give you two result tabs, one for Fred and one for George.\\n\\nSeveral of the actions in your code do not produce output results (spray and despray, etc.) but you DO have two OUTPUT actions -- one that writes the file to disk and one that outputs the filename, and THAT's why you have the "extra" result.\", \"post_time\": \"2012-01-20 16:54:27\" },\n\t{ \"post_id\": 855, \"topic_id\": 217, \"forum_id\": 8, \"post_subject\": \"Re: Unwanted results\", \"username\": \"DSC\", \"post_text\": \"Fantastic information, Richard! I really, sincerely, appreciate the hints, pointers, feedback, etc.!\\n\\n[quote="rtaylor":1rh70tpn]You DO use Thor exactly this way, but Thor was not designed to have its jobs launched with SOAP (so that may be the entire problem here). Thor is not an end-user tool -- it was designed to be a developer's single-threaded back office tool. And automating standard Thor jobs is exactly what ECLplus.exe was created for. Roxie is the tool designed to deliver end-user results with thousands of concurrent queries launched with SOAP (and now JSON, too).\\n\\nI'll need to take a closer look at ECLplus, then, as I thought it was only a command-line version of the IDE, at least as far as code execution goes.\\n\\n[quote="rtaylor":1rh70tpn]I don't know why you're getting extra results with SOAP, but my question is -- do you get those extra results when you simply run the job interactively on Thor (just hit Submit)? If you do, then we'll need to take a closer look at your code. But if not, then you simply need to use ECLplus.exe to automate your job launching instead of SOAP.\\n\\nI do see the extra result in the IDE, after just hitting the Submit button, and that result does change between the dataset and the TRUE value. It perfectly matches the SOAP result, so I don't think SOAP is the culprit, here. Er, but a related fact may help troubleshooting: The WSDL that is created does have the extra result defined. Something obviously believes the unwanted result is valid and not completely unintentional.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-01-20 15:59:55\" },\n\t{ \"post_id\": 854, \"topic_id\": 217, \"forum_id\": 8, \"post_subject\": \"Re: Unwanted results\", \"username\": \"rtaylor\", \"post_text\": \"And I'm not *using* side-effects at all. I simply thought that side-effects were a possible source of my problem.
\\nCorrect. Now that I see your code I see that your actions are not "side-effects" but the actual return values from your FUNCTIONs.\\n\\nI'm glad to hear that they're going away, though, as they seemed problematic. By the way, is this documented anywhere? The current docs on your site (http://hpccsystems.com/community/docs/e ... -structure) don't indicate that side-effects are deprecated.
\\nThe docs on side-effect actions are being updated to indicate that WHEN is now needed. Where you used to be able to do this:\\nfunc(string1 ltr) := FUNCTION\\n OUTPUT(ltr);\\n RETURN ltr;\\nEND;\\n\\nfunc('A');
\\nNow you have to do this:\\nfunc(string1 ltr) := FUNCTION\\n o := OUTPUT(ltr);\\n RETURN WHEN(ltr,o);\\nEND;\\n\\nfunc('A');
\\nYou're right: I'm using THOR exclusively. My reasoning was that, in this case, I'm processing an input file exactly one time to create a single output, then discarding all the work. No additional querying is needed. This seemed to match Thor's capabilities. What does *not* match? Why wouldn't one use Thor in this way?
\\nYou DO use Thor exactly this way, but Thor was not designed to have its jobs launched with SOAP (so that may be the entire problem here). Thor is not an end-user tool -- it was designed to be a developer's single-threaded back office tool. And automating standard Thor jobs is exactly what ECLplus.exe was created for. Roxie is the tool designed to deliver end-user results with thousands of concurrent queries launched with SOAP (and now JSON, too).\\n\\nI don't know why you're getting extra results with SOAP, but my question is -- do you get those extra results when you simply run the job interactively on Thor (just hit Submit)? If you do, then we'll need to take a closer look at your code. But if not, then you simply need to use ECLplus.exe to automate your job launching instead of SOAP.\\n\\nI didn't think I was doing anything wildly different than anyone else. I'm still on the uphill slope of this learning curve; hence, these questions!
\\nOK, then in the interest of helping you up the slope, here are a couple of minor, stylistic points:\\n\\n1) You are overusing the FUNCTION structure. Any ECL definition that takes parameters IS a function (note the lower case), and if that function is a single expression, you do not need the FUNCTION structure (IOW, you're working too hard -- doing too much typing ). So this function:\\n
SHARED STRING LogicalPathOfIncomingDataFile(STRING rawDataFileName) := FUNCTION\\n RETURN kIncomingDataLogicalDirectory + rawDataFileName;\\nEND;
\\nIs exactly the same as this one:\\nSHARED STRING LogicalPathOfIncomingDataFile(STRING rawDataFileName) :=\\n kIncomingDataLogicalDirectory + rawDataFileName;
\\nThe FUNCTION structure was added to the language as a "container" to encapsulate all the related code necessary to produce a single result. It is just a code organization tool.\\n\\n2) You're more "granular" with your IMPORTs than necessary. Your code:\\nIMPORT $.ConfidenceStats AS ConfidenceStats;\\nIMPORT $.EmployeeStats AS EmployeeStats;\\nIMPORT $.FileLayout AS FileLayout;\\nIMPORT $.GeographicStats AS GeographicStats;\\nIMPORT $.RevenueStats AS RevenueStats;\\nIMPORT STD.File;
\\nIs there to save yourself from needing to qualify your definition names with "$." and the standard library with "STD.File." but there's not a lot of savings here -- the number of characters you had to type to avoid the qualification is probably more than the number of instances of "$." that you've avoided. Once again, you're working too hard \\n\\nBut it can also be written like this and accomplish the same thing:\\n
IMPORT * FROM $;\\nIMPORT * FROM STD;
\\nBut I would just go with the $. full qualification in my code, making my list of IMPORTS just this:\\nIMPORT $, STD;
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-01-20 15:32:58\" },\n\t{ \"post_id\": 852, \"topic_id\": 217, \"forum_id\": 8, \"post_subject\": \"Re: Unwanted results\", \"username\": \"DSC\", \"post_text\": \"[quote="rtaylor":37q54bg8]Since you're using side-effect actions inside a FUNCTION I can tell you're still using the legacy 702 build and not the new Open Source environment (where side-effect actions in FUNCTION structures are deprecated).\\n\\nI believe I'm using the current Open Source release. ECL Watch is reporting 'community_3.4.0-1' at least.\\n\\nAnd I'm not *using* side-effects at all. I simply thought that side-effects were a possible source of my problem. I'm glad to hear that they're going away, though, as they seemed problematic. By the way, is this documented anywhere? The current docs on your site (http://hpccsystems.com/community/docs/e ... -structure) don't indicate that side-effects are deprecated.\\n\\n[quote="rtaylor":37q54bg8]And, from the fact that you're using OUTPUT to write a new dataset to disk, I can also tell that your SOAP service is running on Thor and not on Roxie (you cannot use OUTPUT in a Roxie query). And Thor was not really designed to operate that way.\\n\\nYou're right: I'm using THOR exclusively. My reasoning was that, in this case, I'm processing an input file exactly one time to create a single output, then discarding all the work. No additional querying is needed. This seemed to match Thor's capabilities. What does *not* match? Why wouldn't one use Thor in this way?\\n\\n[quote="rtaylor":37q54bg8]Some of our external customers have been running this scenario for years, and it does not require use of SOAP at all. Here's their process:\\n\\n1) External service creates a data file to process in the ECL drop zone.\\n2) ECLplus.exe launches the ECL code to process the file. That code then:\\n\\n a) Sprays file into the cluster.\\n b) Processes the sprayed file.\\n c) Writes the result dataset using OUTPUT.\\n d) Desprays the result file to the drop zone.\\n 3) Writes a semaphore file to the drop zone indicating a completed process.\\n\\nTheir "external service" is a simple daemon that monitors an FTP site, picking up new files as they come in, launches ECLplus.exe to process them through their Thor, and sending the results back to their customer when complete. Their ECL code is just a MACRO that generates the appropriate BWR ECL code for each new file instance to process.\\n\\nI didn't think I was doing anything wildly different than anyone else. I'm still on the uphill slope of this learning curve; hence, these questions! I'll investigate this avenue as well.\\n\\nBut, my original question stands: Where is my unwanted result coming from? Here is the main code, with the last function being the entry point:\\n\\nIMPORT $.ConfidenceStats AS ConfidenceStats;\\nIMPORT $.EmployeeStats AS EmployeeStats;\\nIMPORT $.FileLayout AS FileLayout;\\nIMPORT $.GeographicStats AS GeographicStats;\\nIMPORT $.RevenueStats AS RevenueStats;\\n\\nIMPORT std.File;\\n\\nEXPORT Analytics := MODULE\\n\\n\\t//==========================================================================\\n\\t// Module constants\\n\\t//==========================================================================\\n\\tSHARED\\tkSprayHost := '10.210.150.80';\\n\\tSHARED\\tkSprayDirectory := '/var/lib/HPCCSystems/dropzone';\\n\\tSHARED\\tkESPServer := GETENV('ws_fs_server');\\n\\tSHARED\\tkIncomingDataLogicalDirectory := '~optimizer::';\\n\\tSHARED\\tkAnalyticsTempDirectory := '~tmp::';\\n\\n\\t//==========================================================================\\n\\t// Module utilities\\n\\t//==========================================================================\\n\\t\\n\\t//--------------------------------------------------------------------------\\n\\t// Generate logical path raw data file.\\n\\t// \\n\\t// rawDataFileName\\t\\t\\t\\tThe original name of the raw data file.\\n\\t//--------------------------------------------------------------------------\\n\\tSHARED\\tSTRING LogicalPathOfIncomingDataFile(STRING rawDataFileName) := FUNCTION\\n\\t\\tRETURN kIncomingDataLogicalDirectory + rawDataFileName;\\n\\tEND;\\n\\t\\n\\t//--------------------------------------------------------------------------\\n\\t// Generate logical path temporary analytics file.\\n\\t// \\n\\t// rawDataFileName\\t\\t\\t\\tThe original name of the raw data file.\\n\\t//--------------------------------------------------------------------------\\n\\tSHARED\\tSTRING AnalyticsFileName(STRING rawDataFileName) := FUNCTION\\n\\t\\tRETURN 'analytics_' + rawDataFileName + '.xml';\\n\\tEND;\\n\\t\\n\\t//--------------------------------------------------------------------------\\n\\t// Generate logical path temporary analytics file.\\n\\t// \\n\\t// rawDataFileName\\t\\t\\t\\tThe original name of the raw data file.\\n\\t//--------------------------------------------------------------------------\\n\\tSHARED\\tSTRING LogicalPathOfTempAnalyticsFile(STRING rawDataFileName) := FUNCTION\\n\\t\\tRETURN kAnalyticsTempDirectory + AnalyticsFileName(rawDataFileName);\\n\\tEND;\\n\\t\\n\\t//==========================================================================\\n\\t// Spray incoming raw data file into the cluster for analytic processing.\\n\\t// \\n\\t// rawDataFileName\\t\\t\\t\\tThe original name of the raw data file.\\n\\t//==========================================================================\\n\\tSHARED\\tSprayIncomingDataFile(STRING rawDataFileName) := FUNCTION\\n\\t\\tkSourceMaxRecordSize := 32767;\\n\\t\\tkSourceCsvSeparate := '\\\\\\\\,';\\n\\t\\tkSourceCsvTerminate := '\\\\\\\\n,\\\\\\\\r\\\\\\\\n';\\n\\t\\tkSourceCsvQuote := '"';\\n\\t\\tkDestinationGroup := 'mythor';\\n\\t\\tkTimeOut := -1;\\n\\t\\tkMaxConnections := -1;\\n\\t\\tkOverwriteIfDestExists := TRUE;\\n\\t\\tkReplicate := FALSE;\\n\\t\\tkCompress := FALSE;\\n\\t\\t\\n\\t\\tspraySourcePath := kSprayDirectory + '/' + rawDataFileName;\\n\\t\\tdestinationLogicalPath := LogicalPathOfIncomingDataFile(rawDataFileName);\\n\\t\\t\\n\\t\\tresultAction := File.SprayVariable\\t(\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tkSprayHost,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tspraySourcePath,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tkSourceMaxRecordSize,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tkSourceCsvSeparate,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tkSourceCsvTerminate,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tkSourceCsvQuote,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tkDestinationGroup,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tdestinationLogicalPath,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tkTimeOut,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tkESPServer,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tkMaxConnections,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tkOverwriteIfDestExists,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tkReplicate,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tkCompress\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t);\\n\\t\\t\\n\\t\\tRETURN resultAction;\\n\\tEND;\\n\\t\\n\\t//==========================================================================\\n\\t// Function to generate and return a dataset containing all the analytics\\n\\t// \\n\\t// dataFilePath\\t\\t\\tThe logical path to the raw source data, after it\\n\\t//\\t\\t\\t\\t\\t\\thas been sprayed into the cluster.\\n\\t//==========================================================================\\n\\tSHARED\\tDATASET GenerateAnalyticsDataset(STRING dataFilePath) := FUNCTION\\n\\t\\t//----------------------------------------------------------------------\\n\\t\\t// Dataset definition\\n\\t\\t//----------------------------------------------------------------------\\n\\t\\tds := FileLayout.ETL_DS(dataFilePath);\\n\\t\\t\\n\\t\\t//----------------------------------------------------------------------\\n\\t\\t// Macros that define the individual analytic results. Defined here\\n\\t\\t// to avoid double definitions (once in the record, once in the\\n\\t\\t// invocation\\n\\t\\t//----------------------------------------------------------------------\\n\\t\\tDATASET\\tConfidenceStatsResult() := MACRO\\n\\t\\t\\tConfidenceStats.GetResult(ds)\\n\\t\\tENDMACRO;\\n\\t\\t\\n\\t\\tDATASET\\tRevenueStatsResult() := MACRO\\n\\t\\t\\tRevenueStats.GetResult(ds)\\n\\t\\tENDMACRO;\\n\\t\\t\\n\\t\\tDATASET\\tEmployeeStatsResult() := MACRO\\n\\t\\t\\tEmployeeStats.GetResult(ds)\\n\\t\\tENDMACRO;\\n\\t\\t\\n\\t\\tDATASET\\tGeographicStatsResult() := MACRO\\n\\t\\t\\tGeographicStats.GetResult(ds)\\n\\t\\tENDMACRO;\\n\\t\\t\\n\\t\\t//----------------------------------------------------------------------\\n\\t\\t// Record definition of the result of this function\\n\\t\\t//----------------------------------------------------------------------\\n\\t\\tResultRecord := RECORD\\n\\t\\t\\tconfidence := ConfidenceStatsResult();\\n\\t\\t\\trevenue := RevenueStatsResult();\\n\\t\\t\\temployees := EmployeeStatsResult();\\n\\t\\t\\tgeographic := GeographicStatsResult();\\n\\t\\tEND;\\n\\t\\t\\n\\t\\t//----------------------------------------------------------------------\\n\\t\\t// Create the result record\\n\\t\\t//----------------------------------------------------------------------\\n\\t\\tresult := DATASET\\t(\\n\\t\\t\\t\\t\\t\\t\\t\\t[\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t{\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tConfidenceStatsResult(),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tRevenueStatsResult(),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tEmployeeStatsResult(),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tGeographicStatsResult()\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t}\\n\\t\\t\\t\\t\\t\\t\\t\\t],\\n\\t\\t\\t\\t\\t\\t\\t\\tResultRecord\\n\\t\\t\\t\\t\\t\\t\\t);\\n\\t\\t\\n\\t\\tRETURN result;\\n\\tEND;\\n\\t\\n\\t//==========================================================================\\n\\t// Create the analytics data file. The resulting file is distributed across\\n\\t// our nodes\\n\\t// \\n\\t// rawDataFileName\\t\\t\\t\\tThe original name of the raw data file.\\n\\t//==========================================================================\\n\\tSHARED\\tGenerateTempAnalyticsFile(STRING rawDataFileName) := FUNCTION\\n\\t\\tlogicalFilePath := LogicalPathOfIncomingDataFile(rawDataFileName);\\n\\t\\tanalyticsDataset := GenerateAnalyticsDataset(logicalFilePath);\\n\\t\\tdestPath := LogicalPathOfTempAnalyticsFile(rawDataFileName);\\n\\t\\t\\n\\t\\tresultAction := OUTPUT\\t(\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tanalyticsDataset,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tdestPath,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tXML(TRIM,OPT,HEADING('<?xml version="1.0" encoding="UTF-8" ?>\\\\n<Dataset>','</Dataset>')),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tOVERWRITE\\n\\t\\t\\t\\t\\t\\t\\t\\t);\\n\\t\\t\\n\\t\\tRETURN resultAction;\\n\\tEND;\\n\\t\\n\\t//==========================================================================\\n\\t// Combine distributed analytics data into one file\\n\\t// \\n\\t// rawDataFileName\\t\\t\\t\\tThe original name of the raw data file.\\n\\t//==========================================================================\\n\\tSHARED\\tDesprayTempAnalyticsFile(STRING rawDataFileName) := FUNCTION\\n\\t\\tkTimeout := -1;\\n\\t\\tkMaxConnections := -1;\\n\\t\\tkOverwriteIfDestExists := TRUE;\\n\\t\\t\\n\\t\\tsourceLogicalPath := LogicalPathOfTempAnalyticsFile(rawDataFileName);\\n\\t\\tdestFilename := AnalyticsFileName(rawDataFileName);\\n\\t\\tdesprayDestinationPath := kSprayDirectory + '/' + destFilename;\\n\\t\\t\\n\\t\\tresultAction := File.Despray\\t(\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tsourceLogicalPath,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tkSprayHost,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tdesprayDestinationPath,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tkTimeout,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tkESPServer,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tkMaxConnections,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tkOverwriteIfDestExists\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t);\\n\\t\\t\\n\\t\\tRETURN resultAction;\\n\\tEND;\\n\\t\\n\\t//==========================================================================\\n\\t// Combine distributed analytics data into one file\\n\\t// \\n\\t// rawDataFileName\\t\\t\\t\\tThe original name of the raw data file.\\n\\t//==========================================================================\\n\\tSHARED\\tDeleteTempFiles(STRING rawDataFileName) := FUNCTION\\n\\t\\tkAllowMissing := TRUE;\\n\\t\\t\\n\\t\\ttempLogicalPath := LogicalPathOfTempAnalyticsFile(rawDataFileName);\\n\\t\\tdeleteTempLogicalPath := IF\\t(\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tFile.FileExists(tempLogicalPath),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tFile.DeleteLogicalFile(tempLogicalPath,kAllowMissing)\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t);\\n\\t\\t\\n\\t\\tsprayedLogicalPath := LogicalPathOfIncomingDataFile(rawDataFileName);\\n\\t\\tdeleteSprayedLogicalPath := IF\\t(\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tFile.FileExists(sprayedLogicalPath),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tFile.DeleteLogicalFile(sprayedLogicalPath,kAllowMissing)\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t);\\n\\t\\t\\n\\t\\tresultAction := PARALLEL\\t(\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tdeleteTempLogicalPath,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tdeleteSprayedLogicalPath\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t);\\n\\t\\t\\n\\t\\tRETURN resultAction;\\n\\tEND;\\n\\t\\n\\t//==========================================================================\\n\\t// Combine distributed analytics data into one file\\n\\t// \\n\\t// rawDataFileName\\t\\t\\t\\tThe original name of the raw data file.\\n\\t//==========================================================================\\n\\tEXPORT\\tGenerateAnalyticsFromRawData(STRING rawDataFileName) := FUNCTION\\n\\t\\tdestFilename := AnalyticsFileName(rawDataFileName);\\n\\t\\t\\n\\t\\tresultAction := SEQUENTIAL\\t(\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tSprayIncomingDataFile(rawDataFileName),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tGenerateTempAnalyticsFile(rawDataFileName),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tDesprayTempAnalyticsFile(rawDataFileName),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tDeleteTempFiles(rawDataFileName),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tOUTPUT(destFilename)\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t);\\n\\t\\t\\n\\t\\tRETURN resultAction;\\n\\tEND;\\n\\nEND; // Analytics Module\\n
\\n\\nThe 'GenerateTempAnalyticsFile' function seems to be the problem child. That is where the unwanted result seems to come from. That is also the function that, if you replace the entire body with 'RETURN TRUE;' actually puts an unwanted 'TRUE' result in the output.\\n\\nThis is a very long message and I apologize for that. If you have any suggestions on how I can debug this issue, I would appreciate it.\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-01-20 14:05:15\" },\n\t{ \"post_id\": 851, \"topic_id\": 217, \"forum_id\": 8, \"post_subject\": \"Re: Unwanted results\", \"username\": \"rtaylor\", \"post_text\": \"I'm probably confused about functions or their side effects, or I'm inadvertently abusing functions somehow, but I can't quite get my head around this. Any pointers on things to look for would be greatly appreciated.
\\n\\nSince you're using side-effect actions inside a FUNCTION I can tell you're still using the legacy 702 build and not the new Open Source environment (where side-effect actions in FUNCTION structures are deprecated). And, from the fact that you're using OUTPUT to write a new dataset to disk, I can also tell that your SOAP service is running on Thor and not on Roxie (you cannot use OUTPUT in a Roxie query). And Thor was not really designed to operate that way.\\n\\nSome of our external customers have been running this scenario for years, and it does not require use of SOAP at all. Here's their process:\\n\\n1) External service creates a data file to process in the ECL drop zone.\\n2) ECLplus.exe launches the ECL code to process the file. That code then:\\n\\n a) Sprays file into the cluster.\\n b) Processes the sprayed file.\\n c) Writes the result dataset using OUTPUT.\\n d) Desprays the result file to the drop zone.\\n 3) Writes a semaphore file to the drop zone indicating a completed process.\\n\\nTheir "external service" is a simple daemon that monitors an FTP site, picking up new files as they come in, launches ECLplus.exe to process them through their Thor, and sending the results back to their customer when complete. Their ECL code is just a MACRO that generates the appropriate BWR ECL code for each new file instance to process. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-01-19 20:20:33\" },\n\t{ \"post_id\": 850, \"topic_id\": 217, \"forum_id\": 8, \"post_subject\": \"Unwanted results\", \"username\": \"DSC\", \"post_text\": \"After temporarily moving past my other current question ("Asynchronous SOAP call"), I've rushed headlong into a new problem with the same implementation. I'm trying to implement the following scenario:\\n\\n1) External service creates a data file to process in my ECL drop zone.\\n\\n2) SOAP call into ECL triggers processing of file.\\n\\n3) ECL sprays file into the cluster.\\n\\n4) An 'analytics' dataset is created, based on data from the sprayed file.\\n\\n5) Analytics dataset is written to a logical file, via OUTPUT and with XML options.\\n\\n6) XML analytics file is desprayed back into the drop zone.\\n\\n7) Temporary files (sprayed data file and analytics file from #5) is deleted.\\n\\n8) As of this writing, the SOAP call (#2) returns to the external caller, who then picks up the analytics file from the drop zone.\\n\\nSteps 3-7 are, tactically, performed within a single FUNCTION, specifically within a SEQUENTIAL statement. The last action within the SEQUENTIAL is a simple OUTPUT that spits out a single string that represents the filename of the analytics file now sitting in the drop zone. That filename is the only thing I want returned to the SOAP caller.\\n\\nMy problem is that the dataset from #5 is also returned. At least, ECL Watch reports that result as coming from that logical file; the SOAP result shows the dataset.\\n\\nI'm thinking that the OUTPUT that creates the logical file is somehow echoing back, but the documentation seems to say that that shouldn't happen if the file option is used. What *utterly* confuses me is if I replace the body of the function that does step #5 with a simple 'return true' statement, "true" is returned as the unwanted result instead of the dataset. What on earth would cause that?\\n\\nI'm probably confused about functions or their side effects, or I'm inadvertently abusing functions somehow, but I can't quite get my head around this. Any pointers on things to look for would be greatly appreciated.\\n\\nThanks!\", \"post_time\": \"2012-01-19 18:45:18\" },\n\t{ \"post_id\": 896, \"topic_id\": 220, \"forum_id\": 8, \"post_subject\": \"Re: 'ROWS' used in ECL ref manual but never explained.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\nIf 'ROWS' is explained its in the body of text, I don't get a clear indication of the use of 'ROWS' as opposed to 'ROW'. I start wondering if they are the same thing. Is it a typo or not, I just don't know.
\\nFirst, this is not a "ROWS" function that we're talking about, but the keywords ROWS(LEFT) or ROWS(RIGHT) (depending on the requirement of the function in which it is used). These are specific code constructs only for the context in which they are used, and not built-in ECL functions for general use. Nor are they options on any of these functions (the way LOCAL is), so that is why there is no entry for them in the list of parameter and option descriptions for these functions. The only meaningful place to discuss them is in the text describing how the TRANSFORM functions are called (the only place where they are actually used).\\n\\nWith that said, your point is well taken, and I have just added a discussion of ROWS(RIGHT) and ROWS(LEFT) to the section on Reserved Keywords in the Language Reference.\\n\\nThanks for your input, and keep the comments coming -- that's how we make it all better for everybody,\\n\\nRichard\", \"post_time\": \"2012-01-25 18:51:54\" },\n\t{ \"post_id\": 895, \"topic_id\": 220, \"forum_id\": 8, \"post_subject\": \"Re: 'ROWS' used in ECL ref manual but never explained.\", \"username\": \"Allan\", \"post_text\": \"Ok,\\n\\nBut it does not have a section of its own.\\n\\nFor example 'OPT' does not have a section of its own, but whereever its used their is an explanation, for example PROJECT page 250 and KEYED page 93.\\n\\nIf 'ROWS' is explained its in the body of text, I don't get a clear indication of the use of 'ROWS' as opposed to 'ROW'. I start wondering if they are the same thing. Is it a typo or not, I just don't know.\\n\\nIf 'ROW' and 'ROWS' are distinct, then perhaps a paragraph somewhere clarifying the difference would not go amiss?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-01-25 16:48:55\" },\n\t{ \"post_id\": 894, \"topic_id\": 220, \"forum_id\": 8, \"post_subject\": \"Re: 'ROWS' used in ECL ref manual but never explained.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nROWS(LEFT) and ROWS(RIGHT) are used only in these functions:\\n\\ncombine \\ndenormalize\\nrollup\\njoin (set of datasets)\\nloop
\\n\\nConsequently, the docs for each reference their use in the specific circumstance appropriate for that function. That's why there is no central discussion in the language reference. Look at the "COMBINE TRANSFORM Function Requirements" paragraph on page 142 for an example of how it is documented.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-01-25 16:25:05\" },\n\t{ \"post_id\": 890, \"topic_id\": 220, \"forum_id\": 8, \"post_subject\": \"Re: 'ROWS' used in ECL ref manual but never explained.\", \"username\": \"dabayliss\", \"post_text\": \"Rows is used to express that a transform is being passed a dataset of records rather than a single record. Examples are the rollup,group and loop\\n\\nDavid\", \"post_time\": \"2012-01-25 12:50:02\" },\n\t{ \"post_id\": 888, \"topic_id\": 220, \"forum_id\": 8, \"post_subject\": \"Re: 'ROWS' used in ECL ref manual but never explained.\", \"username\": \"Allan\", \"post_text\": \"Richard,\\n\\nTwo examples form the Oct 2011 Version 3.2.2.0\\n\\nCOMBINE function page 142\\nROLLUP function page 261\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-01-25 10:19:49\" },\n\t{ \"post_id\": 876, \"topic_id\": 220, \"forum_id\": 8, \"post_subject\": \"Re: 'ROWS' used in ECL ref manual but never explained.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nCan you point me to a specific instance or two, please?\\n\\nRichard\", \"post_time\": \"2012-01-24 18:11:56\" },\n\t{ \"post_id\": 862, \"topic_id\": 220, \"forum_id\": 8, \"post_subject\": \"'ROWS' used in ECL ref manual but never explained.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nThere are quite a few examples where the keyword 'ROWS' is used, but I don't find anywhere a page on or explanation of 'ROWS'.\\nThe 'ROW' function, singular, does have a pages of explanation (starting page 266).\\n\\nIs there a difference, or are ROW and ROWS synonymous?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-01-21 20:08:25\" },\n\t{ \"post_id\": 880, \"topic_id\": 222, \"forum_id\": 8, \"post_subject\": \"Re: Error in Ref Guide 'ROW'+ we need versions on documentat\", \"username\": \"Allan\", \"post_text\": \"Richard,\\n\\nGreat.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-01-24 20:10:07\" },\n\t{ \"post_id\": 877, \"topic_id\": 222, \"forum_id\": 8, \"post_subject\": \"Re: Error in Ref Guide 'ROW'+ we need versions on documentat\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nThanks for the heads-up. It is fixed for the next release. We're looking into moving the version info.\\n\\nRichard\", \"post_time\": \"2012-01-24 18:42:00\" },\n\t{ \"post_id\": 867, \"topic_id\": 222, \"forum_id\": 8, \"post_subject\": \"Error in Ref Guide 'ROW'+ we need versions on documentation.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nThere is an error in the 1st example for 'ROW' page 266.\\nSee the example copied below with the error commented and the correction also commented:\\n\\nAkaRec := {STRING20 forename,STRING20 surname };\\noutputRec := RECORD\\n UNSIGNED id,\\n DATASET(AkaRec) kids;\\nEND;\\n\\ninputRec := {UNSIGNED id,STRING20 forename,STRING20 surname };\\nInPeople := DATASET ([ {1,'Allan','Wrobel'},\\n {1,'Allan','Smith'},\\n {2,'Anna','Smith'},\\n {2,'Keith','Harrison'}],inputRec);\\noutputRec makeFatRecord(inputRec L) := TRANSFORM\\n SELF.id := L.id;\\n // In document as 'children' this is wrong.\\n SELF.children:= DATASET([{L.forename,L.surname}],AkaRec);\\n // Correct code is:\\n SELF.kids := DATASET([{L.forename,L.surname}],AkaRec);\\nEND;\\nFatIn := PROJECT(InPeople,makeFatRecord(LEFT));\\n\\noutputRec makeChildren(outputRec L,outputRec R) := TRANSFORM\\n SELF.id := L.id;\\n SELF.kids := L.kids + ROW({R.kids[1].forename,R.kids[1].surname},AkaRec);\\nEND;\\nOUTPUT(ROLLUP(FatIn,id,makeChildren(LEFT,RIGHT)));\\n
\\n\\nInterestingly the example for 'ROW Form 2' uses 'children' throughout instead of 'kids'.\\n\\nAs an aside, we need the documentation set versioned. \\nIn the current set I have I can only refer to page numbers, but I've no easy way at the moment to check that I'm NOT raising issues that are already fixed in a later version of the documentation.\\n\\nYours\\n\\nAllan\\n\\nPS \\nOk I've found the version information of the Ref manual secreted away on page 4 in very small type.\\nI'm working from October 2011 Version 3.2.2.0, this problem is still in the online version November 2011 Version 3.4.0.b\\n\\nCould the version be prominently on the first page under the title please?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-01-22 11:33:44\" },\n\t{ \"post_id\": 897, \"topic_id\": 225, \"forum_id\": 8, \"post_subject\": \"Re: Roxie Query gives: SAXParseException\", \"username\": \"anthony.fishbeck@lexisnexis.com\", \"post_text\": \"This is a known issue and is already fixed in the next release which is scheduled to be released within a week or so. \\n\\n..but I can look into getting the fix patched into release 3.4.0.x if necessary.\", \"post_time\": \"2012-01-25 20:38:25\" },\n\t{ \"post_id\": 871, \"topic_id\": 225, \"forum_id\": 8, \"post_subject\": \"Roxie Query gives: SAXParseException\", \"username\": \"Apurv.Khare\", \"post_text\": \"If we publish a roxie query to fetch data containing '&' then it gives following error on the WsECl 3.0(but we can see the result in XML Tables) ,\\nException(s) occured:\\nReporter: WsEcl\\nCode Message \\n2 [failed to compile xml][SAXParseException: Unterminated entity reference, 'C' (, line 1, column 298)] \\n\\nbut if the data has no '&' it shows no error.Is there any workaround..\", \"post_time\": \"2012-01-23 03:07:28\" },\n\t{ \"post_id\": 886, \"topic_id\": 228, \"forum_id\": 8, \"post_subject\": \"Re: left padding zeros\", \"username\": \"aintnomyth\", \"post_text\": \"Exactly, thanks for the quick response.\", \"post_time\": \"2012-01-24 22:30:23\" },\n\t{ \"post_id\": 885, \"topic_id\": 228, \"forum_id\": 8, \"post_subject\": \"Re: left padding zeros\", \"username\": \"Tony Kirk\", \"post_text\": \"Are you referring to INTFORMAT()?\", \"post_time\": \"2012-01-24 22:29:29\" },\n\t{ \"post_id\": 884, \"topic_id\": 228, \"forum_id\": 8, \"post_subject\": \"left padding zeros\", \"username\": \"aintnomyth\", \"post_text\": \"I'm sure this is easy but it's slipping my mind...how does one left pad with a non-space value?\\n\\nThanks!\", \"post_time\": \"2012-01-24 22:28:28\" },\n\t{ \"post_id\": 960, \"topic_id\": 232, \"forum_id\": 8, \"post_subject\": \"Re: Recommended practice for searching\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\ntotalFound is there; it's the attribute attached to all the COUNT() calls. When I calculate it there I get the non-DEDUP'd result count. If I counted the individual results or initialResults I'd get at most (mergeCount * 6) results due to the CHOOSEN calls embedded within the PROJECTs. This isn't a performance consideration so much as a correctness choice.
Yes, I saw your definition of totalFound (summing all the COUNTs), but I did not see you using that totalFound anywhere else, hence my comment.\\nI've worked with many declarative languages; they're pretty cool, when they aren't frustrating. Which pretty much defines all the languages I've learned, I guess. My fault for saying 'before a DEDUP' runs. I should have said, "on a data set that has not been truncated" or something like that.
No problem! I just like to take every public forum "soapbox opportunity" that comes along to point out the different nature of ECL as a declarative language compared to most programmers' experience with procedural languages. \\n\\nRichard\", \"post_time\": \"2012-01-31 15:07:27\" },\n\t{ \"post_id\": 959, \"topic_id\": 232, \"forum_id\": 8, \"post_subject\": \"Re: Recommended practice for searching\", \"username\": \"DSC\", \"post_text\": \"[quote="rtaylor":kimpkbfv]Dan,\\n
Astute readers will note that the totalFound value may not be accurate because it is calculated before a DEDUP process runs. In my case, this fuzziness is an acceptable trade-off for performance.
Have you tried making the totalFound a COUNT(initialResults) to see if that affects performance?\\n\\nI ask because your phrase "is calculated before a DEDUP process runs" implies that you think your code is actually doing something, and a *major* point I like to make in class is that ECL code is NOT executable code, but simply definition of *what* you want -- the *how* it gets done is the compiler's job. IOW, ECL programmers have to stop thinking that they are telling the computer what to do, they are only defining what they want -- which means the order in which you define your process is not necessarily the order in which it will execute.\\n\\nAlso, since I don't see totalFound used anywhere in your code I can only presume it is used later in your process -- but if not used anywhere, the compiler will happily ignore it. \\n\\ntotalFound is there; it's the attribute attached to all the COUNT() calls. When I calculate it there I get the non-DEDUP'd result count. If I counted the individual results or initialResults I'd get at most (mergeCount * 6) results due to the CHOOSEN calls embedded within the PROJECTs. This isn't a performance consideration so much as a correctness choice.\\n\\nI've worked with many declarative languages; they're pretty cool, when they aren't frustrating. Which pretty much defines all the languages I've learned, I guess. My fault for saying 'before a DEDUP' runs. I should have said, "on a data set that has not been truncated" or something like that.\\n\\nCheers!\\n\\nDan\", \"post_time\": \"2012-01-31 14:48:08\" },\n\t{ \"post_id\": 957, \"topic_id\": 232, \"forum_id\": 8, \"post_subject\": \"Re: Recommended practice for searching\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n
Astute readers will note that the totalFound value may not be accurate because it is calculated before a DEDUP process runs. In my case, this fuzziness is an acceptable trade-off for performance.
Have you tried making the totalFound a COUNT(initialResults) to see if that affects performance?\\n\\nI ask because your phrase "is calculated before a DEDUP process runs" implies that you think your code is actually doing something, and a *major* point I like to make in class is that ECL code is NOT executable code, but simply definition of *what* you want -- the *how* it gets done is the compiler's job. IOW, ECL programmers have to stop thinking that they are telling the computer what to do, they are only defining what they want -- which means the order in which you define your process is not necessarily the order in which it will execute.\\n\\nAlso, since I don't see totalFound used anywhere in your code I can only presume it is used later in your process -- but if not used anywhere, the compiler will happily ignore it. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-01-31 14:35:38\" },\n\t{ \"post_id\": 956, \"topic_id\": 232, \"forum_id\": 8, \"post_subject\": \"Re: Recommended practice for searching\", \"username\": \"DSC\", \"post_text\": \"[quote="DSC":1y9o2o56]The sample search corpus contains over 6M records with only four U.S. states represented. About 1.6M records have Texas (TX) in their state field. If TX is entered as a search value the routine above executes in the 8-9 second range. If different fields are chosen, where the result set is much smaller, the routine executes in the Wicked Fast Range of timings.\\n\\nAssuming that the search criteria cannot be changed, is there a way to restructure the data or implement a different recombination algorithm to improve the speed?\\nMy apologies for replying to my own post, but I hit upon an acceptable solution that I thought I'd share with any interested readers.\\n\\n<Crickets>\\n\\nWhat I was looking for is a solution for a common search scenario. Most users don't go too deep into the search results, most of the time, so it makes sense to optimize the search for the top of the result list (the first few 'pages' of results, which can vary depending on need).\\n\\nAt any rate, what I hit upon was breaking the search down into initial, separate reads of all indexes, then trimming the results before transforming/sorting/merging them. Because the search allows the user to set an initial result offset and the number of records to return, and the final result can be sorted a number of different ways, the number of records to leave in each index before the merge is critical. That number turns out to be the sum of the offset plus the size of the result. Even if only one index fulfills the search parameters, there will be enough records to return to the user.\\n\\nCode snippet:\\n\\n
mergeCount := p.resultOffset + p.resultCount;\\n\\ncityInterim :=\\t\\tIDX_City(city IN cityNames);\\nstateInterim :=\\t\\tIDX_State(state IN stateAbbreviations);\\npostalInterim :=\\tIDX_PostalCode(postalCode IN postalCodes);\\ncountryInterim :=\\tIDX_CountryID(countryID IN countryIDs);\\ncountyInterim :=\\tIDX_CountyID(countyID IN countyIDs);\\nareaCodeInterim :=\\tIDX_AreaCode(areaCode IN areaCodes);\\n\\ntotalFound := \\t\\tCOUNT(cityInterim) +\\n\\t\\t\\t\\t\\tCOUNT(stateInterim) +\\n\\t\\t\\t\\t\\tCOUNT(postalInterim) +\\n\\t\\t\\t\\t\\tCOUNT(countryInterim) +\\n\\t\\t\\t\\t\\tCOUNT(countyInterim) +\\n\\t\\t\\t\\t\\tCOUNT(areaCodeInterim);\\n\\ncityResults := \\t\\tPROJECT(CHOOSEN(STEPPED(cityInterim,uniqueID),mergeCount),TRANSFORM(UniqueIDSearchResultRecord,SELF:=LEFT));\\nstateResults :=\\t\\tPROJECT(CHOOSEN(STEPPED(stateInterim,uniqueID),mergeCount),TRANSFORM(UniqueIDSearchResultRecord,SELF:=LEFT));\\npostalResults :=\\tPROJECT(CHOOSEN(STEPPED(postalInterim,uniqueID),mergeCount),TRANSFORM(UniqueIDSearchResultRecord,SELF:=LEFT));\\ncountryResults :=\\tPROJECT(CHOOSEN(STEPPED(countryInterim,uniqueID),mergeCount),TRANSFORM(UniqueIDSearchResultRecord,SELF:=LEFT));\\ncountyResults :=\\tPROJECT(CHOOSEN(STEPPED(countyInterim,uniqueID),mergeCount),TRANSFORM(UniqueIDSearchResultRecord,SELF:=LEFT));\\nareaCodeResults :=\\tPROJECT(CHOOSEN(STEPPED(areaCodeInterim,uniqueID),mergeCount),TRANSFORM(UniqueIDSearchResultRecord,SELF:=LEFT));\\n\\ninitialResults :=\\tMERGE\\t(\\n\\t\\t\\t\\t\\t\\t\\t\\t[\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tcityResults,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tstateResults,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tpostalResults,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tcountryResults,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tcountyResults,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tareaCodeResults\\n\\t\\t\\t\\t\\t\\t\\t\\t],\\n\\t\\t\\t\\t\\t\\t\\t\\tSORTED(uniqueID),\\n\\t\\t\\t\\t\\t\\t\\t\\tDEDUP\\n\\t\\t\\t\\t\\t\\t\\t);\\n\\nfinalResults := CHOOSEN(initialResults,p.resultCount,p.resultOffset);
\\n\\nAstute readers will note that the totalFound value may not be accurate because it is calculated before a DEDUP process runs. In my case, this fuzziness is an acceptable trade-off for performance.\\n\\nThere is probably a more generic way of performing all this, but at least the code is relatively clear.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-01-31 13:10:46\" },\n\t{ \"post_id\": 928, \"topic_id\": 232, \"forum_id\": 8, \"post_subject\": \"Re: Recommended practice for searching\", \"username\": \"DSC\", \"post_text\": \"[quote="sort":2mjq8096]Dan - \\n Can you tell me how you deleted the data from roxie. We will need to see if we have an issue with file / meta information delete\\n\\nAll file manipulation was performed through the web interface (port 8010 stuff). Mainly, I would delete the query set and then the logical files (which always said they were on the mythor cluster). [Edit: Meaning, I would 'Browse Logical Files' and delete them from there.] In my earlier post, where I said that I found roxie files, this was through the command line after ssh'ing into each node. I indicated that they were orphaned, in that at that moment they appeared nowhere in the web interface and there wasn't a query set existing, so I deleted them from the command line. That is likely where the problem was introduced. PIBKAC.\\n\\nLet me know if you need any other info!\\n\\nDan\", \"post_time\": \"2012-01-27 19:51:48\" },\n\t{ \"post_id\": 927, \"topic_id\": 232, \"forum_id\": 8, \"post_subject\": \"Re: Recommended practice for searching\", \"username\": \"sort\", \"post_text\": \"Dan - \\n Can you tell me how you deleted the data from roxie. We will need to see if we have an issue with file / meta information delete\", \"post_time\": \"2012-01-27 19:34:39\" },\n\t{ \"post_id\": 926, \"topic_id\": 232, \"forum_id\": 8, \"post_subject\": \"Re: Recommended practice for searching\", \"username\": \"DSC\", \"post_text\": \"Yeah, this was definitely a sidetracking issue that I'm glad to have put behind me. Other than learning something (which is never a Bad Thing) I could have done without it.\\n\\nEnd of hijacked thread. Back to the original topic.\\n\\nThe following search routine, modified with the suggestions made earlier, technically works but seems to take a long time with certain input values:\\n\\n\\n//--------------------------------------------------------------------------\\n// Record definition containing only IDs\\n//--------------------------------------------------------------------------\\nEXPORT\\tUniqueIDSearchResultRecord := RECORD\\n\\tRecordLayout.uniqueID;\\nEND;\\n\\n//--------------------------------------------------------------------------\\n// Record definition of search result; will contain only one record\\n//--------------------------------------------------------------------------\\nEXPORT\\tSearchResult := RECORD\\n\\tINTEGER\\t\\t\\t\\t\\t\\t\\t\\t\\ttotalNumberFound;\\n\\tINTEGER\\t\\t\\t\\t\\t\\t\\t\\t\\tresultOffset;\\n\\tINTEGER\\t\\t\\t\\t\\t\\t\\t\\t\\tresultCount;\\n\\tDATASET(UniqueIDSearchResultRecord)\\tuniqueIDs;\\nEND;\\n\\n//--------------------------------------------------------------------------\\n// Actual search routine\\n//--------------------------------------------------------------------------\\nEXPORT\\tSearch(SearchParameters p) := FUNCTION\\n\\tcityNames := HUtil.SplitAndLowerString(p.cityNames);\\n\\tstateAbbreviations := HUtil.SplitAndLowerString(p.stateAbbreviations);\\n\\tpostalCodes := HUtil.SplitAndLowerString(p.postalCodes);\\n\\tcountryIDs := (SET OF INTEGER)HUtil.SplitAndLowerString(p.countryIDs);\\n\\tcountyIDs := (SET OF INTEGER)HUtil.SplitAndLowerString(p.countyIDs);\\n\\tareaCodes := HUtil.SplitAndLowerString(p.areaCodes);\\n\\t\\n\\tcityResults := \\t\\tIF\\t(\\n\\t\\t\\t\\t\\t\\t\\t\\tcityNames != [],\\n\\t\\t\\t\\t\\t\\t\\t\\tPROJECT(STEPPED(IDX_City(city IN cityNames),uniqueID),TRANSFORM(UniqueIDSearchResultRecord,SELF:=LEFT))\\n\\t\\t\\t\\t\\t\\t\\t);\\n\\tstateResults :=\\t\\tIF\\t(\\n\\t\\t\\t\\t\\t\\t\\t\\tstateAbbreviations != [],\\n\\t\\t\\t\\t\\t\\t\\t\\tPROJECT(STEPPED(IDX_State(state IN stateAbbreviations),uniqueID),TRANSFORM(UniqueIDSearchResultRecord,SELF:=LEFT))\\n\\t\\t\\t\\t\\t\\t\\t);\\n\\tpostalResults :=\\tIF\\t(\\n\\t\\t\\t\\t\\t\\t\\t\\tpostalCodes != [],\\n\\t\\t\\t\\t\\t\\t\\t\\tPROJECT(STEPPED(IDX_PostalCode(postalCode IN postalCodes),uniqueID),TRANSFORM(UniqueIDSearchResultRecord,SELF:=LEFT))\\n\\t\\t\\t\\t\\t\\t\\t);\\n\\tcountryResults :=\\tIF\\t(\\n\\t\\t\\t\\t\\t\\t\\t\\tcountryIDs != [],\\n\\t\\t\\t\\t\\t\\t\\t\\tPROJECT(STEPPED(IDX_CountryID(countryID IN countryIDs),uniqueID),TRANSFORM(UniqueIDSearchResultRecord,SELF:=LEFT))\\n\\t\\t\\t\\t\\t\\t\\t);\\n\\tcountyResults :=\\tIF\\t(\\n\\t\\t\\t\\t\\t\\t\\t\\tcountyIDs != [],\\n\\t\\t\\t\\t\\t\\t\\t\\tPROJECT(STEPPED(IDX_CountyID(countyID IN countyIDs),uniqueID),TRANSFORM(UniqueIDSearchResultRecord,SELF:=LEFT))\\n\\t\\t\\t\\t\\t\\t\\t);\\n\\tareaCodeResults :=\\tIF\\t(\\n\\t\\t\\t\\t\\t\\t\\t\\tareaCodes != [],\\n\\t\\t\\t\\t\\t\\t\\t\\tPROJECT(STEPPED(IDX_AreaCode(areaCode IN areaCodes),uniqueID),TRANSFORM(UniqueIDSearchResultRecord,SELF:=LEFT))\\n\\t\\t\\t\\t\\t\\t\\t);\\n\\t\\n\\tinitialResults :=\\tMERGE\\t(\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t[\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tcityResults,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tstateResults,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tpostalResults,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tcountryResults,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tcountyResults,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tareaCodeResults\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t],\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tSORTED(uniqueID),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tDEDUP\\n\\t\\t\\t\\t\\t\\t\\t\\t);\\n\\t\\n\\tfinalResults := CHOOSEN(initialResults,p.resultCount,p.resultOffset);\\n\\t\\n\\tresultDS := DATASET\\t(\\n\\t\\t\\t\\t\\t\\t\\t[\\n\\t\\t\\t\\t\\t\\t\\t\\t{\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tCOUNT(initialResults),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tp.resultOffset,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tp.resultCount,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tfinalResults\\n\\t\\t\\t\\t\\t\\t\\t\\t}\\n\\t\\t\\t\\t\\t\\t\\t],\\n\\t\\t\\t\\t\\t\\t\\tSearchResult\\n\\t\\t\\t\\t\\t\\t);\\n\\t\\n\\tRETURN resultDS;\\nEND;\\n
\\nThe sample search corpus contains over 6M records with only four U.S. states represented. About 1.6M records have Texas (TX) in their state field. If TX is entered as a search value the routine above executes in the 8-9 second range. If different fields are chosen, where the result set is much smaller, the routine executes in the Wicked Fast Range of timings.\\n\\nAssuming that the search criteria cannot be changed, is there a way to restructure the data or implement a different recombination algorithm to improve the speed?\\n\\nThanks a million for your help so far!\\n\\nDan\", \"post_time\": \"2012-01-27 19:16:29\" },\n\t{ \"post_id\": 925, \"topic_id\": 232, \"forum_id\": 8, \"post_subject\": \"Re: Recommended practice for searching\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nHooray! You did exactly what I was about to suggest next! \\n\\nFor the "stale metadata" issue, someone on the infrastructure side of things will have to address that -- I stay on the ECL side of things. \\n\\nRichard\", \"post_time\": \"2012-01-27 19:06:09\" },\n\t{ \"post_id\": 924, \"topic_id\": 232, \"forum_id\": 8, \"post_subject\": \"Re: Recommended practice for searching\", \"username\": \"DSC\", \"post_text\": \"Eureka!\\n\\nI was thinking that perhaps there was some metadata stuck in the system that was citing the old index record size -- 50 bytes was accurate at one time -- so I edited the code and simply renamed all my location index files. It now works.\\n\\nSo now the question becomes: What about this stale metadata? Where is it, can it be deleted, should it be deleted, and do I have to worry about it?\\n\\nCheers!\\n\\nDan\", \"post_time\": \"2012-01-27 18:52:29\" },\n\t{ \"post_id\": 923, \"topic_id\": 232, \"forum_id\": 8, \"post_subject\": \"Re: Recommended practice for searching\", \"username\": \"DSC\", \"post_text\": \"[quote="rtaylor":t5c5iamu]OK, when you have an INDEX with no payload, the system expects that it is a standard search index with a record pointer to the actual record in the DATASET (so FETCH can find it). You can get rid of this error by adding your __pos field as the third field, right after UniqueID.\\n\\nSo with this:\\n\\n
\\nEXPORT\\tRecordLayout := RECORD\\n\\t// Common fields\\n\\tSTRING20\\t\\tuniqueID;\\n\\t// Address fields\\n\\tSTRING50\\t\\tcity;\\n\\tSTRING30\\t\\tstate;\\n\\tSTRING9\\t\\t\\tpostalCode;\\n\\tINTEGER2\\t\\tcountryID;\\n\\tINTEGER2\\t\\tcountyID;\\n\\t// Phone fields\\n\\tSTRING5\\t\\t\\tareaCode;\\nEND;\\n\\nEXPORT\\tRecords := DATASET(kDataPath,{RecordLayout,UNSIGNED8 __pos{VIRTUAL(FILEPOSITION)}},FLAT);\\n\\nEXPORT\\tIDX_City := INDEX(Records,{city,uniqueID,__pos},kCityIndexPath);\\n
\\nRoxie throws this runtime error:\\n\\n\\nKey size mismatch on key /var/lib/HPCCSystems/hpcc-data/thor/bal/search/location/city_idx._4_of_4/3674185682 - size was 70, expected 50 (in Index Read 2)\\n\\nThe file cited in the error message is exactly 32K in size, whereas the total size of the logical file is a little over 80MB. Is it the header file for the distributed index?\", \"post_time\": \"2012-01-27 18:44:10\" },\n\t{ \"post_id\": 922, \"topic_id\": 232, \"forum_id\": 8, \"post_subject\": \"Re: Recommended practice for searching\", \"username\": \"rtaylor\", \"post_text\": \"OK, when you have an INDEX with no payload, the system expects that it is a standard search index with a record pointer to the actual record in the DATASET (so FETCH can find it). You can get rid of this error by adding your __pos field as the third field, right after UniqueID.\", \"post_time\": \"2012-01-27 18:39:37\" },\n\t{ \"post_id\": 921, \"topic_id\": 232, \"forum_id\": 8, \"post_subject\": \"Re: Recommended practice for searching\", \"username\": \"DSC\", \"post_text\": \"[quote="DSC":1w89v38l]I did try that at one point. I did again just now and received:\\n\\nEXPORT\\tIDX_City := INDEX(Records,{city,uniqueID},kCityIndexPath);\\n\\nError: Expected last field to be an integral fileposition field (89, 12), 2080\\n\\nRelatedly (is that a word?), when viewing the index file's details in the web browser:\\n\\n* If you have an empty payload only, the system adds an __internal_fpos field for you.\\n\\n* If you explicitly declare a file position field, that field shows up (with no virtual tag).\\n\\n* If you both declare a field and have an empty payload, both the declared field and the internal field show up, which is somewhat confusing.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-01-27 16:32:05\" },\n\t{ \"post_id\": 920, \"topic_id\": 232, \"forum_id\": 8, \"post_subject\": \"Re: Recommended practice for searching\", \"username\": \"DSC\", \"post_text\": \"[quote="rtaylor":70ydb0g4]I would try changing this:\\nEXPORT IDX_City := INDEX(Records,{city,uniqueID},{},kCityIndexPath);
to this:\\nEXPORT IDX_City := INDEX(Records,{city,uniqueID},kCityIndexPath);
Since you have no payload fields, you should not be defining the INDEX with a payload fields parameter. I don't know if it will help this specific problem, but it may.\\n\\nI did try that at one point. I did again just now and received:\\n\\nEXPORT\\tIDX_City := INDEX(Records,{city,uniqueID},kCityIndexPath);\\n\\nError: Expected last field to be an integral fileposition field (89, 12), 2080\", \"post_time\": \"2012-01-27 16:28:57\" },\n\t{ \"post_id\": 919, \"topic_id\": 232, \"forum_id\": 8, \"post_subject\": \"Re: Recommended practice for searching\", \"username\": \"rtaylor\", \"post_text\": \"I would try changing this:\\nEXPORT IDX_City := INDEX(Records,{city,uniqueID},{},kCityIndexPath);
to this:\\nEXPORT IDX_City := INDEX(Records,{city,uniqueID},kCityIndexPath);
Since you have no payload fields, you should not be defining the INDEX with a payload fields parameter. I don't know if it will help this specific problem, but it may.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-01-27 16:18:15\" },\n\t{ \"post_id\": 918, \"topic_id\": 232, \"forum_id\": 8, \"post_subject\": \"Re: Recommended practice for searching\", \"username\": \"DSC\", \"post_text\": \"I was having problems with MERGE complaining at runtime (an exception that I didn't save even though it appeared to be an internal error -- sorry). Having reviewed STEPPED again, I've reimplemented it.\\n\\nUnfortunately, I've been fighting a different error all morning. Same error message no matter what I do. Here is a code excerpt:\\n\\n\\nEXPORT\\tRecordLayout := RECORD\\n\\t// Common fields\\n\\tSTRING20\\t\\tuniqueID;\\n\\t// Address fields\\n\\tSTRING50\\t\\tcity;\\n\\tSTRING30\\t\\tstate;\\n\\tSTRING9\\t\\t\\tpostalCode;\\n\\tINTEGER2\\t\\tcountryID;\\n\\tINTEGER2\\t\\tcountyID;\\n\\t// Phone fields\\n\\tSTRING5\\t\\t\\tareaCode;\\nEND;\\n\\nEXPORT\\tRecords := DATASET(kDataPath,{RecordLayout,UNSIGNED8 __pos{VIRTUAL(FILEPOSITION)}},FLAT);\\n\\nEXPORT\\tIDX_City := INDEX(Records,{city,uniqueID},{},kCityIndexPath);\\n
\\nI had made changes for the STEPPED implementation. Specifically, the uniqueID field was originally defined as STRING and I made it a STRING20, then I moved the uniqueID from the payload to the key declaration section in the index definition. I'm now getting the following error from Roxie at runtime:\\n\\n\\nKey size mismatch on key /var/lib/HPCCSystems/hpcc-data/thor/bal/search/location/city_idx._4_of_4/53348355 - size was 70, expected 50 (in Index Read 2)\\n\\n(BTW, explicitly declaring the __pos field and removing the empty payload in the index declaration makes no difference.)\\n\\nI noticed that Roxie seemed to retain copies of the data and index files even though they the web interface (browse logical files) showed that I had deleted them, so I manually cleaned them up on all nodes. I made sure all data, index, work units and query sets were deleted, then rebuilt everything. No dice.\\n\\nThere was another thread about this error and it seemed that it was solved, then, by deleting and rebuilding. Since I'm having less-than-stellar luck with that, is there something else I can try?\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-01-27 15:50:47\" },\n\t{ \"post_id\": 913, \"topic_id\": 232, \"forum_id\": 8, \"post_subject\": \"Re: Recommended practice for searching\", \"username\": \"rtaylor\", \"post_text\": \"David,\\n \\nYes, there is a "Smart Stepping" article in the Programmer's Guide. I'll have to go back and re-read what I wrote now. \\n\\nRichard\", \"post_time\": \"2012-01-26 21:10:54\" },\n\t{ \"post_id\": 911, \"topic_id\": 232, \"forum_id\": 8, \"post_subject\": \"Re: Recommended practice for searching\", \"username\": \"dabayliss\", \"post_text\": \"This is an advanced technique. Richard - do we not have a global smart stepping tutorial in the programmers guide? If not we may need to create one.\\n\\n@DSC: The basic deal is that you need to bring the results back from each part of the inversion pre-sorted (or you get a bottleneck at the farmer)\\n\\nThe index needs to look like\\n\\n{ myfield_to_search_on, UID }\\n\\nThe placement of the UID is CRITICAL - it must NOT be in the payload.\\n\\nThen when you do the index lookup you use STEPPED (see the manual) on the UID. This ensures that the results coming back from slave for the index fetch are sorted by UID (the slave does this via a merge sort; which is why the UID placement is critical).\\n\\nIf you want to do this on N different fields - you can use N different indexes to do it. You can then combine the results using the MERGE operator (easy - and fairly fast) - or you can use the JOIN([dataset]) variants inside a GRAPH (ice-pack on the head and large vat of coffee; but once you get used to it the results are incredible)\\n\\nThere are some even more advanced techniques we occasionally get into (this is absolutely our bread n butter) - but the UID placement and STEPPED get you >>80% of the way there ....\\n\\nHTH\\n\\nDavid\", \"post_time\": \"2012-01-26 20:46:08\" },\n\t{ \"post_id\": 909, \"topic_id\": 232, \"forum_id\": 8, \"post_subject\": \"Re: Recommended practice for searching\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\n
At any rate, I suspect that the SORT, DEDUP and MERGE in that code is the wrong way to do this. Is there a better way?
I don't see a MERGE in the code you posted. Are you actually using MERGE?\\n\\nSimply appending all the resulting uniqueIDs into a single recordset that you SORT and DEDUP is exactly what I would also do. The likelihood is that your result sets will be small enough that this would be a pretty efficient operation.\\n\\nTry it that way and see what kind of performance you get.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-01-26 18:20:46\" },\n\t{ \"post_id\": 908, \"topic_id\": 232, \"forum_id\": 8, \"post_subject\": \"Re: Recommended practice for searching\", \"username\": \"DSC\", \"post_text\": \"Thanks for the info, Richard. I suspected that that was probably the best way, but how do you combine the separate search results efficiently? Here is a snippet of code that I currently have:\\n\\n\\nEXPORT\\tResultRec := RECORD\\n\\tSTRING20\\t\\tuniqueID;\\nEND;\\n\\ncityResults := PROJECT(IDX_City(city IN cityNames),TRANSFORM(ResultRec,SELF:=LEFT));\\nstateResults := PROJECT(IDX_State(state IN stateAbbreviations),TRANSFORM(ResultRec,SELF:=LEFT));\\n\\ninterimResults := SORT(cityResults + stateResults,uniqueID);\\n\\ninitialResults := DEDUP(interimResults,uniqueID);\\n\\nfinalResults := CHOOSEN(initialResults,p.resultCount,p.resultOffset);\\n
\\nThe uniqueID cited here is a payload field in the indexes. I require that a given ID appear only once. I'm not worried about the sort order of the final result just yet, but that will probably be coming. At any rate, I suspect that the SORT, DEDUP and MERGE in that code is the wrong way to do this. Is there a better way?\\n\\nThanks again!\", \"post_time\": \"2012-01-26 17:32:08\" },\n\t{ \"post_id\": 906, \"topic_id\": 232, \"forum_id\": 8, \"post_subject\": \"Re: Recommended practice for searching\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nFirst off, I would definitely be using payload indexes with the unique ID to return as the payload. Next, I would create a separate payload index for each search term field, since your post indicates you see sub-second response on your 1-field queries. Then I would simply write the code to interrogate only those indexes for which a parameter was passed, doing the simple filter using IN, and combining all the resulting unique IDs to return.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-01-26 16:58:51\" },\n\t{ \"post_id\": 904, \"topic_id\": 232, \"forum_id\": 8, \"post_subject\": \"Recommended practice for searching\", \"username\": \"DSC\", \"post_text\": \"I'm currently working on a scenario where:\\n\\n* I have a dataset where the record contains a unique ID and a bunch of searchable fields.\\n\\n* Inbound Roxie queries may supply search parameters for one or more fields. Each search parameter is a SET of multiple values. Multiple parameters have an implied OR relationship at the moment.\\n\\n* The result of the search should be a dataset containing only the unique IDs of the matching records.\\n\\nI've been experimenting with different index and retrieval setups and I'm not terribly happy with the performance. I've tried using one index with all the fields cited as keys, and separate indexes for each field. I've tried inserting the unique ID as an index payload value to avoid a FETCH. What I'm currently seeing is that if I alter the code to search only field -- e.g., Foo_IDX(field IN values) -- then the code is blindingly fast. Subsecond response time is the norm. But as soon as I add an additional constraint -- e.g., Foo_IDX(field1 IN values1 OR field2 IN values2) -- the response slows down to the 3-4 second range. What is weird is that after adding even more search parameters, the response time remains in the 3-4 second range.\\n\\nWhat is the recommended architecture for this kind of scenario?\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-01-26 15:38:45\" },\n\t{ \"post_id\": 917, \"topic_id\": 233, \"forum_id\": 8, \"post_subject\": \"Re: variable number of field arguments in a macro\", \"username\": \"aintnomyth\", \"post_text\": \"Thanks, I didn't realize the #EXPAND could be used for any string like that. Good info!\", \"post_time\": \"2012-01-27 13:59:18\" },\n\t{ \"post_id\": 916, \"topic_id\": 233, \"forum_id\": 8, \"post_subject\": \"Re: variable number of field arguments in a macro\", \"username\": \"david.wheelock@lexisnexis.com\", \"post_text\": \"You can try this also:\\n\\n\\nEXPORT JoinThem(d01,d02,joinfields):=FUNCTIONMACRO\\n RETURN JOIN(d01,d02,#EXPAND(REGEXREPLACE(',([^,]+)',','+joinfields,' AND LEFT.$1=RIGHT.$1')[6..]));\\nENDMACRO;\\n\\nd01:=DATASET([{'a','b','a',1},{'a','b','c',2},{'a','a','b',3}],{STRING joinfield1;STRING joinfield2;STRING joinfield3;UNSIGNED val01;});\\nd02:=DATASET([{'a','b','a',4},{'a','b','c',5},{'a','a','b',6}],{STRING joinfield1;STRING joinfield2;STRING joinfield3;UNSIGNED val02;});\\nJoinThem(d01,d02,'joinfield1');\\nJoinThem(d01,d02,'joinfield1,joinfield2');\\nJoinThem(d01,d02,'joinfield1,joinfield2,joinfield3');\", \"post_time\": \"2012-01-27 12:05:28\" },\n\t{ \"post_id\": 912, \"topic_id\": 233, \"forum_id\": 8, \"post_subject\": \"Re: variable number of field arguments in a macro\", \"username\": \"rtaylor\", \"post_text\": \"You could do it this way:\\nMyRec := RECORD\\n\\tSTRING1 Value1;\\n\\tSTRING1 Value2;\\nEND;\\nLeftFile := DATASET([{'C','A'},{'X','B'},{'A','C'}],MyRec);\\nRightFile := DATASET([{'C','X'},{'B','Y'},{'A','Z'}],MyRec);\\n\\nMyOutRec := RECORD\\n\\tSTRING1 Value1;\\n\\tSTRING1 LeftValue2;\\n\\tSTRING1 RightValue2;\\nEND;\\n\\nMyMacro(ds1, ds2, cond, res) := MACRO\\n\\tMyOutRec JoinThem(MyRec L, MyRec R) := TRANSFORM\\n\\t\\tSELF.Value1 := IF(L.Value1<>'', L.Value1, R.Value1);\\n\\t\\tSELF.LeftValue2 := L.Value2;\\n\\t\\tSELF.RightValue2 := R.Value2;\\n\\tEND;\\n\\tres := JOIN(ds1, ds2, cond, JoinThem(LEFT,RIGHT));\\nENDMACRO;\\t\\t\\t\\n\\nmyMacro(LeftFile, RightFile, LEFT.Value1=RIGHT.Value1, myResult1);\\n\\noutput(myResult1);\\n
\", \"post_time\": \"2012-01-26 20:51:29\" },\n\t{ \"post_id\": 910, \"topic_id\": 233, \"forum_id\": 8, \"post_subject\": \"variable number of field arguments in a macro\", \"username\": \"aintnomyth\", \"post_text\": \"I'm not sure if the macro syntax supports this but here's what I want to do\\n\\n\\nmyResult := myMacro(ds1, ds2, 'joinField1, joinField2, joinField3');\\n\\n//result should be the same as:\\nmyResult := JOIN(ds1, ds2, LEFT.joinField1 = RIGHT.joinField1\\n AND LEFT.joinField2 = RIGHT.joinField2\\n AND LEFT.joinField3 = RIGHT.joinField3);
\\n\\n\\nIs something like that even possible?\", \"post_time\": \"2012-01-26 19:55:48\" },\n\t{ \"post_id\": 952, \"topic_id\": 235, \"forum_id\": 8, \"post_subject\": \"Re: spraying multiple data files into single logical file\", \"username\": \"dean\", \"post_text\": \"Richard, \\nThanks. This did the trick. \\n\\nDean\", \"post_time\": \"2012-01-31 00:03:41\" },\n\t{ \"post_id\": 950, \"topic_id\": 235, \"forum_id\": 8, \"post_subject\": \"Re: spraying multiple data files into single logical file\", \"username\": \"rtaylor\", \"post_text\": \"Dean,\\n\\nHere's how I just did it:\\n\\n1. I split a small XML file I had (331 rows of data) into 8 "chunk" files, and numbered them 2-9.\\n2. I put those files on my landing zone.\\n3. I opened ECL Watch and went to the Spray XML page.\\n4. I selected the first file (filename2.xml) as the file to spray, using the Choose File button.\\n5. I changed the "2" in the filename to "?" (you can use * and ? as wildcards here).\\n6. I named my single result file and ran the spray job.\\n7. I opened the ECL IDE, defined the RECORD structure and DATASET definition for the single result file.\\n8. Ran OUTPUT(MyFile,ALL) and saw that I got all 331 rows of data returned.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-01-30 22:07:51\" },\n\t{ \"post_id\": 942, \"topic_id\": 235, \"forum_id\": 8, \"post_subject\": \"spraying multiple data files into single logical file\", \"username\": \"dean\", \"post_text\": \"Hey folks,\\nI've got 200+ XML data files, all with the same schema. How can I spray all of these files into one logical file?\\n\\nThanks,\\nDean\", \"post_time\": \"2012-01-30 19:30:32\" },\n\t{ \"post_id\": 962, \"topic_id\": 236, \"forum_id\": 8, \"post_subject\": \"Re: Errors with PIPE option within OUTPUT\", \"username\": \"DSC\", \"post_text\": \"[quote="richardkchapman":3phbia0i]The error is because cat does not know how to handle > - it's the command shell that handles > for it.\\n\\nIf you do\\n\\nOUTPUT(result,,PIPE('/bin/bash -c "/bin/cat - > /tmp/dantest"',XML));\\n
\\n\\nthen it should work. Of course, you could just output it to an xml file using thor directly.\\nAh. Thanks for reminding me of the execution environment!\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-01-31 17:49:09\" },\n\t{ \"post_id\": 961, \"topic_id\": 236, \"forum_id\": 8, \"post_subject\": \"Re: Errors with PIPE option within OUTPUT\", \"username\": \"richardkchapman\", \"post_text\": \"The error is because cat does not know how to handle > - it's the command shell that handles > for it.\\n\\nIf you do\\n\\nOUTPUT(result,,PIPE('/bin/bash -c "/bin/cat - > /tmp/dantest"',XML));\\n
\\n\\nthen it should work. Of course, you could just output it to an xml file using thor directly.\", \"post_time\": \"2012-01-31 17:42:15\" },\n\t{ \"post_id\": 943, \"topic_id\": 236, \"forum_id\": 8, \"post_subject\": \"Errors with PIPE option within OUTPUT\", \"username\": \"DSC\", \"post_text\": \"I'm trying to execute this:\\n\\nOUTPUT(result,,PIPE('/bin/cat - > /tmp/dantest',XML));
\\n\\nMy idea is to examine the output and possibly Do Something Interesting With it. Unfortunately, I can't make the PIPE option actually succeed. The above example gives me the runtime error:\\n\\nError: System error: 10096: Graph[46], pipewrite[48]: SLAVE 10.210.150.81:6600: Process returned 1: - PIPE(/bin/cat - > /tmp/dantest) (0, 0), 10096, \\n\\nNo file is created on any node. Is there a way to perform this capture?\\n\\nCheers!\\n\\nDan\", \"post_time\": \"2012-01-30 19:44:32\" },\n\t{ \"post_id\": 1018, \"topic_id\": 237, \"forum_id\": 8, \"post_subject\": \"Re: Problem in CreateSuperfile() Function\", \"username\": \"rengolin\", \"post_text\": \"I can't reproduce your error, with or without transactions. All I get is the sensible error:\\n\\nSystem error: 1: DFS Exception: 1: logical name t7::superfile::basefiles already exists
\\n\\nIt could be a bit more informative, I agree.\\n\\nWhat version are you running? This bug may have been fixed already in the main trunk...\", \"post_time\": \"2012-02-06 09:39:47\" },\n\t{ \"post_id\": 1017, \"topic_id\": 237, \"forum_id\": 8, \"post_subject\": \"Re: Problem in CreateSuperfile() Function\", \"username\": \"rengolin\", \"post_text\": \"I understand it was not intentional, and it should never segfault. I'm investigating it right now.\\n\\nThe main issue is that you're using the same name for files and superfiles. Think of superfiles as if they were TAR packages of similar files. It allows you to deal with all of them at the same time.\\n\\nYou can't add more files to a normal file, but you can to a TAR file. You can even add TAR files to other TAR files. This is exactly the same as SuperFiles, but the layout of the sub-files (and super-files) being added must be similar.\\n\\nYou need to be careful with your OVERWITEs and IFEXISTSs flags, as they can bite you later. For the same effect as OVERWRITE on superfiles, you can delete it first:\\n\\n\\nsequential(\\nStd.File.DeleteSuperFile('~insight_weblog::superfile::basefiles'),\\n...\\nStd.File.CreateSuperFile('~insight_weblog::superfile::basefiles'),\\n...\\n);\\n
\\n\\nThis way, you always get a clean super-file.\\n\\nHowever, I think your code should fail when you try to create a super-file on top of a logical file, with an error message saying that the file is not super and following code will break.\", \"post_time\": \"2012-02-06 09:28:13\" },\n\t{ \"post_id\": 1014, \"topic_id\": 237, \"forum_id\": 8, \"post_subject\": \"Re: Problem in CreateSuperfile() Function\", \"username\": \"Apurv.Khare\", \"post_text\": \"Appologize for delayed response. Here is the code to repeat this error:\\n// writing individual datasets into DFU\\nOUTPUT(waclickfact17dec,,'~insight_weblog::superfile::basefiles', OVERWRITE);\\nOUTPUT(waclickfact9jan,,'~insight_weblog::superfile::subfiles1', OVERWRITE);\\nOUTPUT(waclickfact16jan,,'~insight_weblog::superfile::subfiles2', OVERWRITE); \\n\\n// Creating Superfiles\\nSEQUENTIAL(\\n\\t Std.File.CreateSuperFile('~insight_weblog::superfile::basefiles',,TRUE),\\t// TRUE means ignore error if file exists\\n\\t Std.File.StartSuperFileTransaction(),\\n\\t Std.File.AddSuperFile('~insight_weblog::superfile::basefiles','~insight_weblog::superfile::subfiles1'),\\n\\t Std.File.AddSuperFile('~insight_weblog::superfile::basefiles','~insight_weblog::superfile::subfiles2'),\\n\\t Std.File.FinishSuperFileTransaction());
\\nAlthough it was not intentional, we were having a normal file (not superfile) created with same name, ~insight_weblog::superfile::basefiles'.\\nWhen we execute this code it ignores the error if file exits. And throws Segmentation Fault error.\", \"post_time\": \"2012-02-06 04:20:48\" },\n\t{ \"post_id\": 1005, \"topic_id\": 237, \"forum_id\": 8, \"post_subject\": \"Re: Problem in CreateSuperfile() Function\", \"username\": \"rengolin\", \"post_text\": \"Hi,\\n\\nI'm trying to reproduce your error, but I always get the error message, rather than the segmentation fault.\\n\\nCan you send us a snippet of your code that shows the bug? If you can reduce it to just a few lines would be even better. \\n\\nbest,\\n--renato\", \"post_time\": \"2012-02-03 17:20:33\" },\n\t{ \"post_id\": 993, \"topic_id\": 237, \"forum_id\": 8, \"post_subject\": \"Re: Problem in CreateSuperfile() Function\", \"username\": \"richardkchapman\", \"post_text\": \"I opened an issue at https://github.com/hpcc-systems/HPCC-Pl ... ssues/1432\", \"post_time\": \"2012-02-03 09:43:44\" },\n\t{ \"post_id\": 958, \"topic_id\": 237, \"forum_id\": 8, \"post_subject\": \"Re: Problem in CreateSuperfile() Function\", \"username\": \"rtaylor\", \"post_text\": \"[quote="Apurv.Khare":20uhw6qr] we tried to add a subfile to a normal file(not a Superfile) which already exits, it gave us this error\\n\\nError: SIG: Segmentation fault(11), accessing 0000000000000000, IP=00007FB550092B3B (0, 0), 1000,You cannot add a sub-file to anything other than a Superfile. \\n\\nCan you show us your code please, so we can exactly duplicate what you were doing?\", \"post_time\": \"2012-01-31 14:42:50\" },\n\t{ \"post_id\": 954, \"topic_id\": 237, \"forum_id\": 8, \"post_subject\": \"Problem in CreateSuperfile() Function\", \"username\": \"Apurv.Khare\", \"post_text\": \"Hi we were using the CreateSuperfile() function and set the Allow Exit parameter as 'TRUE', and we tried to add a subfile to a normal file(not a Superfile) which already exits, it gave us this error\\n\\nError: SIG: Segmentation fault(11), accessing 0000000000000000, IP=00007FB550092B3B (0, 0), 1000,\\n\\nand if we remove the parameter 'TRUE' it showed the error as the file already Exits.\", \"post_time\": \"2012-01-31 08:16:21\" },\n\t{ \"post_id\": 973, \"topic_id\": 240, \"forum_id\": 8, \"post_subject\": \"Re: IMPORT from folder\", \"username\": \"rtaylor\", \"post_text\": \"You just did
\", \"post_time\": \"2012-02-01 22:03:13\" },\n\t{ \"post_id\": 970, \"topic_id\": 240, \"forum_id\": 8, \"post_subject\": \"Re: IMPORT from folder\", \"username\": \"nvasil\", \"post_text\": \"Thanks Richard\\n\\nI took a look at the documentation, but it wasn't obvious that . replaces '/' or '\\\\'. Also I tried relative directories and it wasn't working, I understand not that it cannot support it.\\n\\nIn my opinion this must become more clear in the documentation. Where can I submit it as request to the documentation team?\", \"post_time\": \"2012-02-01 21:00:50\" },\n\t{ \"post_id\": 969, \"topic_id\": 240, \"forum_id\": 8, \"post_subject\": \"Re: IMPORT from folder\", \"username\": \"rtaylor\", \"post_text\": \"Assuming you added the ML folder to your My Files (as I did), then all you need do is:\\n
IMPORT ML;
\\nOr, if you want to use all the functions without qualification:\\nIMPORT * FROM ML;
\\nBut if you installed it in the home/nvasil directory under My Files, then it would need to be this:IMPORT * FROM home.nvasil.ML;
\\nThere are a number of examples on this page: http://hpccsystems.com/community/docs/e ... tml/import\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-02-01 20:55:16\" },\n\t{ \"post_id\": 967, \"topic_id\": 240, \"forum_id\": 8, \"post_subject\": \"IMPORT from folder\", \"username\": \"nvasil\", \"post_text\": \"I need to import a module (the machine learning one) which happens to live in a directory shared by many projects\\n\\nI tried several syntaxes but none seemed to work\\n\\nIMPORT /home/nvasil/ML;\\nIMPORT '/home/nvasil/ML';\\nIMPORT * FROM '/home/nvasil/';\\nIMPORT * FROM /home/nvasil/ML;\\nIMPORT * FROM /home/nvasil/ML;\\nIMPORT ML FROM '/home/nvasil';\\nIMPORT ML FROM /home/nvasil/;\\n\\nNothing seemed to work. What is the correct syntax. The documentation doesn't have an example\\n\\nAs a turnaround I used the -I option ins eclcc.\", \"post_time\": \"2012-02-01 20:02:54\" },\n\t{ \"post_id\": 1067, \"topic_id\": 241, \"forum_id\": 8, \"post_subject\": \"Re: linking to an external library\", \"username\": \"ghalliday\", \"post_text\": \"A fix has been applied to master/candidate 3.6 branches.\\n\\nIt now always prefixes the option with -Wl,\\n\\nso I think for that example you would need to say\\n\\n#option ('linkOptions', '-LMyPath,-lmylib');\\n\\nHopefully using #option library 'x' inside a begin c++ will work better.\", \"post_time\": \"2012-02-09 14:57:55\" },\n\t{ \"post_id\": 1062, \"topic_id\": 241, \"forum_id\": 8, \"post_subject\": \"Re: linking to an external library\", \"username\": \"richardkchapman\", \"post_text\": \"From the gcc docs at http://gcc.gnu.org/onlinedocs/gcc/Link-Options.html\\n\\n-Wl,option\\nPass option as an option to the linker. If option contains commas, it is split into multiple options at the commas. You can use this syntax to pass an argument to the option. For example, `-Wl,-Map,output.map' passes `-Map output.map' to the linker. When using the GNU linker, you can also get the same effect with `-Wl,-Map=output.map'. \\n\\nLooks to me that rather than forgetting to take commas out, it has forgotten to leave the -Wl in...\\n\\nI'll reopen the bug.\", \"post_time\": \"2012-02-09 08:34:36\" },\n\t{ \"post_id\": 1060, \"topic_id\": 241, \"forum_id\": 8, \"post_subject\": \"Re: linking to an external library\", \"username\": \"nvasil\", \"post_text\": \"Same thing happens with -flinkOptions=xxx\", \"post_time\": \"2012-02-08 22:03:43\" },\n\t{ \"post_id\": 1059, \"topic_id\": 241, \"forum_id\": 8, \"post_subject\": \"Re: linking to an external library\", \"username\": \"nvasil\", \"post_text\": \"I downloaded and compiled the current head and the -Wl option has a buug\\n\\nI used eclcc -Wl,-L../karnagio/debug.build/lib/,-lfastlib-\\ndbg,-lboost_thread-mt,-lboost_program_options-mt,-llapack,-lblas \\n\\nand here is the link command line that it generates\\n\\n"/usr/bin/g++" -L. -Wl,-E -fPIC -pipe -O0 -L/opt/HPCCSystems/lib -Wl,-\\nrpath -Wl,/opt/HPCCSystems/lib -L../karnagio/debug.build/lib/,-lfastlib-\\ndbg,-lboost_thread-mt,-lboost_program_options-mt,-llapack,-lblas "a.out\\n.o" -leclrtl -la.out.res.o -lhthor -o "a.out"\\n\\nIt seems that you forget to take the commas out\", \"post_time\": \"2012-02-08 21:58:23\" },\n\t{ \"post_id\": 1016, \"topic_id\": 241, \"forum_id\": 8, \"post_subject\": \"Re: linking to an external library\", \"username\": \"ghalliday\", \"post_text\": \"We don't yet have nightly builds available. At the moment the options are as you say to compile the source, or to wait for the next release.\\n\\nWe are aiming to close down a 3.6. release fairly soon so there should be a build that supports it in the near future.\\n\\nThe #option command I mentioned can't be used within the body of the beginc++. You would need to include it in the main query definition for the moment. Assuming that patch is taken you will be able to say\\n\\n#option library 'mylib'\\n\\ninside the beginc++.\", \"post_time\": \"2012-02-06 09:16:47\" },\n\t{ \"post_id\": 1013, \"topic_id\": 241, \"forum_id\": 8, \"post_subject\": \"Re: linking to an external library\", \"username\": \"nvasil\", \"post_text\": \"Thanks for the quick response\\n\\nI saw that the issue was fixed at the GIThub. Do you provide nightly builds? What are my options?\\na)Compile the source\\nb)Wait for the next release?\\n\\nAlso the option command ghalliday mentioned\\n#option ('linkOptions', 'xxxx'); \\n\\nis working inside the BEGINC++ environment?\", \"post_time\": \"2012-02-06 03:39:20\" },\n\t{ \"post_id\": 995, \"topic_id\": 241, \"forum_id\": 8, \"post_subject\": \"Re: linking to an external library\", \"username\": \"ghalliday\", \"post_text\": \"It looks like\\n\\neclcc -flinkOptions=xxxx\\n\\nor\\n\\n#option ('linkOptions', 'xxxx');\\n\\nshould get passed through to the linker.\\nI'm looking at fixing -Wl, and other issues.\", \"post_time\": \"2012-02-03 10:51:51\" },\n\t{ \"post_id\": 987, \"topic_id\": 241, \"forum_id\": 8, \"post_subject\": \"Re: linking to an external library\", \"username\": \"richardkchapman\", \"post_text\": \"eclcc can be used on the client machine to compile a standalone ecl executable (a.out), but it is also used by eclccserver to compile a shared object that is executed on the thor cluster.\\n\\nIf the external library you are using is installed on the machines in the thor cluster and on the eclccserver machine, then you should be able to use the library in your ecl program on thor (just so long as you can persuade it to link...)\\n\\nThere should be an easier way to persuade eclcc to add a library - when calling the library direct from ECL code (using a service definition) the library is added to the link command automatically, but I'm not sure how you do so when calling the library from embedded c++ code. Perhaps we need to add one. I'll add that to the bug report too. Could you call the library function directly from ECL perhaps (or at least add a call to the library from ECL so that the correct -l gets generated)?\", \"post_time\": \"2012-02-02 16:55:29\" },\n\t{ \"post_id\": 985, \"topic_id\": 241, \"forum_id\": 8, \"post_subject\": \"Re: linking to an external library\", \"username\": \"nvasil\", \"post_text\": \"Thanks Richard, That is what I did\\nI used -save-temps and then I did linking on my own.\\nBy the way -Wl does not appear in the help page of eclcc.\\n\\nI do have another question though:\\n\\nSo I have an ecl file that has C++ code inside and links to other libraries. \\nWhen I compile it with eclcc it generates an a.out file. What is this file. How do I submit this compiled query?\\n\\nIn general I want this ecl file to be a library. I want to expose only the function name and the rest C++ and library to be hidden from the user. Is there a way to install it inside the thor cluster so that the user doesn't need to compile it with eclcc all the time. For example the library that I am integrating is compiled in Linux, so if the user is using a window machine then the compiled file wouldn't execute in a linux thor cluster. I am not quite sure if I am 100% correct, I have never used windows, but I doubt the eclcc in windows would compile an executable since Thor runs only on linux clusters.\", \"post_time\": \"2012-02-02 15:34:44\" },\n\t{ \"post_id\": 984, \"topic_id\": 241, \"forum_id\": 8, \"post_subject\": \"Re: linking to an external library\", \"username\": \"richardkchapman\", \"post_text\": \"https://github.com/hpcc-systems/HPCC-Pl ... ssues/1426\", \"post_time\": \"2012-02-02 15:26:01\" },\n\t{ \"post_id\": 983, \"topic_id\": 241, \"forum_id\": 8, \"post_subject\": \"Re: linking to an external library\", \"username\": \"richardkchapman\", \"post_text\": \"Sounds like a bug to me (or at the very least a missing feature) - I'll open an issue on GitHub for it.\\n\\nYou can probably hack something using --save-temps then issue the link step yourself I guess.\", \"post_time\": \"2012-02-02 15:24:15\" },\n\t{ \"post_id\": 974, \"topic_id\": 241, \"forum_id\": 8, \"post_subject\": \"Re: linking to an external library\", \"username\": \"nvasil\", \"post_text\": \"Ok \\nI found the -v flag for eclcc, which reveals what I was expecting. The -L, -l flags that I pass with -Wc go to the compiler and not to the linker. I could hack it and execute the linking stage on my own. I wonder if there is any way you can pass the flags to the linker. If not, is it possible to include it in a future version?\", \"post_time\": \"2012-02-02 04:37:56\" },\n\t{ \"post_id\": 968, \"topic_id\": 241, \"forum_id\": 8, \"post_subject\": \"linking to an external library\", \"username\": \"nvasil\", \"post_text\": \"I use the BEGINC++ environment where I call a function that is defined in an external library. I am trying to link to it and I fail. \\n\\nHere is what I am using\\n\\neclcc myfile.ecl -Wc,-Imy_include_path,-Llibrarypath,-llibrary \\n\\nthe -I seems to work and it finds the definition files. The linking flags don't seem to work and I suspect that this is because linking is happening in a separate step.\\n\\nAny ideas? Also is there a way to print the script that eclcc executes? Something like make VERBOSE=1\", \"post_time\": \"2012-02-01 20:45:22\" },\n\t{ \"post_id\": 991, \"topic_id\": 242, \"forum_id\": 8, \"post_subject\": \"Re: Schemas in SOAP reply\", \"username\": \"richardkchapman\", \"post_text\": \"Thanks for the report - I have opened a bug:\\n\\nhttps://github.com/hpcc-systems/HPCC-Pl ... ssues/1430\", \"post_time\": \"2012-02-03 09:29:13\" },\n\t{ \"post_id\": 990, \"topic_id\": 242, \"forum_id\": 8, \"post_subject\": \"Re: Schemas in SOAP reply\", \"username\": \"DSC\", \"post_text\": \"Replying to my own post....\\n\\nI searched HPCC's source code and found many instances of these schemas being defined as strings. About half of them are correct (they contain the trailing slash) and half are not. This should probably be turned into a simple bug report.\\n\\nWe are working around this issue by moving everything to JSON for now.\\n\\nCheers,\\n\\nDSC\", \"post_time\": \"2012-02-02 18:32:17\" },\n\t{ \"post_id\": 972, \"topic_id\": 242, \"forum_id\": 8, \"post_subject\": \"Schemas in SOAP reply\", \"username\": \"DSC\", \"post_text\": \"We've run into a snag with using the SAAJ library to parse SOAP replies. The replies contain references to these two schemas:\\n\\nhttp://schemas.xmlsoap.org/soap/envelope\\nhttp://schemas.xmlsoap.org/soap/encoding\\n\\nThe problem is, it seems that the official SOAP spec (1.1, at least) calls for trailing slashes on both of these schemas. The SAAJ library is refusing to parse the results because it cannot resolve them.\\n\\nIs this something that can be fixed/adjusted on the HPCC side of things?\\n\\nThanks,\\n\\nDSC\", \"post_time\": \"2012-02-01 22:02:51\" },\n\t{ \"post_id\": 992, \"topic_id\": 243, \"forum_id\": 8, \"post_subject\": \"Re: possible eclcc bug\", \"username\": \"richardkchapman\", \"post_text\": \"Thanks for the report - I have reproduced the issue and opened a bug at https://github.com/hpcc-systems/HPCC-Pl ... ssues/1431\", \"post_time\": \"2012-02-03 09:34:33\" },\n\t{ \"post_id\": 975, \"topic_id\": 243, \"forum_id\": 8, \"post_subject\": \"possible eclcc bug\", \"username\": \"nvasil\", \"post_text\": \"Make a file dummy.ecl\\nthat has only OUTPUT('hello')\\n\\nAccording to the eclcc -help\\n\\neclcc -c dummy.ecl
\\n\\nmust create an a.out.o file but it doesn't\\n\\nif you do \\neclcc -v -c dummy.ecl\\n
\\nthen the you get the following log on the screen\\n\\n\\nInclude directory set to /opt/HPCCSystems/componentfiles/cl/include\\nLibrary directory set to /opt/HPCCSystems/lib\\nCompiler path set to "/usr/bin/g++"\\nLoading plugin /opt/HPCCSystems/plugins/libauditlib.so[lib_auditlib] version = AUDITLIB 1.0.1\\nLoading plugin /opt/HPCCSystems/plugins/libstringlib.so[lib_stringlib] version = STRINGLIB 1.1.14\\nLoading plugin /opt/HPCCSystems/plugins/libdebugservices.so[lib_debugservices] version = DEBUGSERVICES 1.0.1\\nLoading plugin /opt/HPCCSystems/plugins/libworkunitservices.so[lib_WORKUNITSERVICES] version = WORKUNITSERVICES 1.0.1\\nLoading plugin /opt/HPCCSystems/plugins/libunicodelib.so[lib_unicodelib] version = UNICODELIB 1.1.05\\nLoading plugin /opt/HPCCSystems/plugins/liblogging.so[lib_logging] version = LOGGING 1.0.1\\nLoading plugin /opt/HPCCSystems/plugins/libfileservices.so[lib_fileservices] version = FILESERVICES 2.1.3\\nLoading plugin /opt/HPCCSystems/plugins/libparselib.so[lib_parselib] version = PARSELIB 1.0.1\\nAdding library: eclrtl\\nAdding library: a.out.res.o\\naddLibrary eclrtl\\naddLibrary a.out.res.o\\naddLibrary hthor\\nCompiling a.out\\n"/usr/bin/g++" "a.out.cpp" -fvisibility=hidden -DUSE_VISIBILITY=1 -fPIC -pipe -O0 -m64 -c "-I/opt/HPCCSystems/componentfiles/cl/include"\\nTime taken for compile: 329298991 cycles (329M) = 220 msec\\n\\nCompiled a.out\\nOutput file 'a.out' created\\nTimings:\\n WorkUnit_lockRemote total=0ms max=2us count=1 ave=2us\\n Generate_code total=23ms max=23079us count=1 ave=23079us\\n Compile_code total=220ms max=220477us count=1 ave=220477us\\n WorkUnit_unlockRemote total=0ms max=1us count=1 ave=1us\\n
\\nAlthough we don't expect an a.out the message says it was created, but it doesn't. \\n\\nLooking at the g++ command line we would have expected a a.out.o\\nbut we don't get it.\\n\\nThe only way to get it is by doing this\\neclcc -c -save-temps dummy.ecl\", \"post_time\": \"2012-02-02 04:56:43\" },\n\t{ \"post_id\": 1015, \"topic_id\": 247, \"forum_id\": 8, \"post_subject\": \"Re: Problem with grouping field in TABLE\", \"username\": \"ghalliday\", \"post_text\": \"If you want to use a macro that looks more like a conventional attribute, try using functionmacro instead (bad name I know)\\n\\nI would imagine something like:\\n\\n\\nSHARED MakeFacetRecordSet(ds,f) := FUNCTIONMACRO\\n summary := TABLE(ds,{STRING display := ds.f, UNSIGNED num := COUNT(GROUP)},f,MERGE,UNSORTED);\\n RETURN PROJECT(summary,TRANSFORM(FacetRecord,SELF:=LEFT));\\nENDMACRO;\\n
\", \"post_time\": \"2012-02-06 09:12:25\" },\n\t{ \"post_id\": 1010, \"topic_id\": 247, \"forum_id\": 8, \"post_subject\": \"Re: Problem with grouping field in TABLE\", \"username\": \"DSC\", \"post_text\": \"[quote="rtaylor":1b8u2t8p]There are fundamentally two types of MACROs documented, and you're trying to use the form we rarely ever use. \\n\\nEgad. That works, and I can barely see why. This will definitely turn into a heavily-documented piece of code.\\n\\nMany thanks and have a great weekend.\\n\\nDan\", \"post_time\": \"2012-02-03 20:25:32\" },\n\t{ \"post_id\": 1009, \"topic_id\": 247, \"forum_id\": 8, \"post_subject\": \"Re: Problem with grouping field in TABLE\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nThere are fundamentally two types of MACROs documented, and you're trying to use the form we rarely ever use.
Try changing your code to this:
MakeFacetRecordSet(def,ds,f) := MACRO\\n def := PROJECT(TABLE(ds,\\n {STRING display := ds.f, UNSIGNED num := COUNT(GROUP)},\\n f,MERGE,UNSORTED),\\n TRANSFORM(FacetRecord,SELF:=LEFT))\\nENDMACRO;\\n//here's my test file\\nSomeFile := DATASET([{'A'},{'B'},{'C'},{'D'},{'E'},\\n {'P'},{'Q'},{'R'},{'S'},{'T'},\\n {'P'},{'Q'},{'R'},{'S'},{'T'},\\n {'P'},{'Q'},{'R'},{'S'},{'T'},\\n {'U'},{'V'},{'W'},{'X'},{'Y'}],\\n\\t\\t\\t\\t\\t{STRING1 Letter});\\n//call the macro\\nMakeFacetRecordSet(cityFacets,SomeFile,Letter)\\n\\t\\t\\t\\t\\t\\t\\t \\n//then show the result:\\ncityFacets;
Notice that I'm passing in the definition name to use so I can call it explicitly outside the MACRO.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-02-03 20:17:57\" },\n\t{ \"post_id\": 1008, \"topic_id\": 247, \"forum_id\": 8, \"post_subject\": \"Re: Problem with grouping field in TABLE\", \"username\": \"DSC\", \"post_text\": \"I'm now trying to generalize the facet stuff with a macro. This actually is fairly close to the example in the language reference within the MACRO documentation. Consider this working snippet:\\n\\nSHARED\\tFacetRecord := RECORD\\n\\tSTRING\\t\\t\\t\\t\\t\\t\\t\\t\\tdisplay;\\n\\tUNSIGNED\\t\\t\\t\\t\\t\\t\\t\\tnum;\\nEND;\\n\\nSHARED\\tFacetCollectionRecord := RECORD\\n\\tDATASET(FacetRecord)\\t\\t\\t\\t\\tcities;\\nEND;\\n\\ncityFacets := PROJECT(TABLE(indexResults,{STRING display := indexResults.city, UNSIGNED num := COUNT(GROUP)},city,MERGE,UNSORTED),TRANSFORM(FacetRecord,SELF:=LEFT));\\n\\t\\nfacets := DATASET\\t(\\n\\t\\t\\t\\t\\t\\t[\\n\\t\\t\\t\\t\\t\\t\\t{\\n\\t\\t\\t\\t\\t\\t\\t\\tcityFacets\\n\\t\\t\\t\\t\\t\\t\\t}\\n\\t\\t\\t\\t\\t\\t],\\n\\t\\t\\t\\t\\t\\tFacetCollectionRecord\\n\\t\\t\\t\\t\\t);
\\n\\nThe desire is to generalize the creation of cityFacets. Neither this:\\n\\nSHARED\\tMakeFacetRecordSet(ds,f) := MACRO\\n\\tPROJECT(TABLE(ds,{STRING display := ds.f, UNSIGNED num := COUNT(GROUP)},f,MERGE,UNSORTED),TRANSFORM(FacetRecord,SELF:=LEFT))\\nENDMACRO;\\n\\ncityFacets := MakeFacetRecordSet(indexResults,city);
\\n\\nNor this (which is closer to the documented example):\\n\\nSHARED\\tMakeFacetRecordSet(ds,f) := MACRO\\n\\tPROJECT(TABLE(ds,{STRING display := f, UNSIGNED num := COUNT(GROUP)},f,MERGE,UNSORTED),TRANSFORM(FacetRecord,SELF:=LEFT))\\nENDMACRO;\\n\\ncityFacets := MakeFacetRecordSet(indexResults,indexResults.city);
\\n\\nWill compile. Both give identical errors:\\n\\nError: While expanding macro makefacetrecordset (280, 54), 3002\\nError: Initializer for field cities in inline dataset has the wrong type (284, 10), 3123\\nError: Unknown identifier before "." (expected :=) (4, 13), 2167,
\\n\\nBeating my head against my keyboard has also failed to produce reliable results. I'm sure that this is also a simple cause. Richard, can you kindly tell me where the heck I'm going wrong?\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-02-03 19:38:37\" },\n\t{ \"post_id\": 1007, \"topic_id\": 247, \"forum_id\": 8, \"post_subject\": \"Re: Problem with grouping field in TABLE\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nI have some ideas about refactoring this code to make things a little more reusable. The facet-generating code, for example, will identical no matter what search functions are called (except for this dependency on an interim dataset, which is vexing).
I suggest you take a look at the Programmer's Guide article on Query Libraries -- making common functions available to any/all queries is exactly what they're all about.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-02-03 18:23:35\" },\n\t{ \"post_id\": 1006, \"topic_id\": 247, \"forum_id\": 8, \"post_subject\": \"Re: Problem with grouping field in TABLE\", \"username\": \"DSC\", \"post_text\": \"[quote="rtaylor":1cdomgpl]OK, then I can immediately make one suggestion that should improve performance -- get rid of the FETCH, it's not necessary. You already have all the fields in your dataset as search terms in your INDEX, so just use the INDEX. Instead of using foundRecords in your TABLE, just use interimResults. \\n\\nWe have long since quit using FETCH (except in some arcane circumstances) in Roxie queries in favor of using payload INDEXes.\\n\\nIndeed. A chosen query to my test data (through Roxie) resulted in a 5 minute response when FETCH was involved and around 8 seconds when using only the index. I suspect that I'll need to investigate some further preprocessing/indexing schemes to make this faster still, as I'm eventually aiming for one-second response time (a result "page" with one-or-more facets, chosen at runtime). I started with FETCH because I could foresee times where I don't have access to a suitable index. I don't think that will work, though, for performance reasons.\\n\\nI have some ideas about refactoring this code to make things a little more reusable. The facet-generating code, for example, will identical no matter what search functions are called (except for this dependency on an interim dataset, which is vexing). If things don't pan out then I may be back to this thread for further ideas.\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-02-03 17:33:16\" },\n\t{ \"post_id\": 1004, \"topic_id\": 247, \"forum_id\": 8, \"post_subject\": \"Re: Problem with grouping field in TABLE\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nOK, then I can immediately make one suggestion that should improve performance -- get rid of the FETCH, it's not necessary. You already have all the fields in your dataset as search terms in your INDEX, so just use the INDEX. Instead of using foundRecords in your TABLE, just use interimResults. \\n\\nWe have long since quit using FETCH (except in some arcane circumstances) in Roxie queries in favor of using payload INDEXes.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-02-03 16:46:33\" },\n\t{ \"post_id\": 1003, \"topic_id\": 247, \"forum_id\": 8, \"post_subject\": \"Re: Problem with grouping field in TABLE\", \"username\": \"DSC\", \"post_text\": \"[quote="rtaylor":3ny1tlj9]Is this code intended to end up in Roxie queries, or will it only be used in Thor jobs? The answer to that can affect the answers to your questions.\\n\\nIn this particular case, it's only Roxie. I would think that the technique would be applicable no matter what environment you're in, though. I'm a huge fan of non-duplication of code where possible.\", \"post_time\": \"2012-02-03 16:29:35\" },\n\t{ \"post_id\": 1002, \"topic_id\": 247, \"forum_id\": 8, \"post_subject\": \"Re: Problem with grouping field in TABLE\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nIs this code intended to end up in Roxie queries, or will it only be used in Thor jobs? The answer to that can affect the answers to your questions.\\n\\nRichard\", \"post_time\": \"2012-02-03 16:16:57\" },\n\t{ \"post_id\": 1001, \"topic_id\": 247, \"forum_id\": 8, \"post_subject\": \"Re: Problem with grouping field in TABLE\", \"username\": \"DSC\", \"post_text\": \"[quote="rtaylor":1nx1w0mg]I obviously can't test this, but try it and let me know how it works.\\n\\nHi Richard,\\n\\nThat worked perfectly. It looks like I was over-thinking this and making some bad assumptions about how datasets were referenced. Three record layouts needed to be moved into that routine to make it work. (Though it is not efficient; I'm probably going to get a call from one of our system administrators soon.)\\n\\nThis was only a fragment of a larger piece. Other search routines will want to have basically identical results. If portions of the final results need to be defined inline like this, where there are dependencies on interim datasets, how can one structure the code so there is not a lot of manual duplication?\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-02-03 15:58:44\" },\n\t{ \"post_id\": 1000, \"topic_id\": 247, \"forum_id\": 8, \"post_subject\": \"Re: Problem with grouping field in TABLE\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nSorry -- don't forget to remove the SHARED from your RECORD structures (like I did ) because now they are inside your FUNCTION structure.\\n\\nRichard\", \"post_time\": \"2012-02-03 15:37:19\" },\n\t{ \"post_id\": 999, \"topic_id\": 247, \"forum_id\": 8, \"post_subject\": \"Re: Problem with grouping field in TABLE\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nYour TABLE function is expecting a RECORD Structure containing fields from foundRecords, while your CityFacetRecord definition is referencing Records.city. The two can't be connected by the compiler (probably because of your FETCH), so that's why you get the warning and the error occurs.\\n\\nIf you re-organize your Facets and Search Results RECORD Structures to immediately follow your foundRecords definition (and change the CityFacetRecord to use foundRecords) then your code looks like this:\\n
//--------------------------------------------------------------------------\\n// Record definition of location information\\n//--------------------------------------------------------------------------\\nEXPORT RecordLayout := RECORD\\n // Common fields\\n STRING20 companyID;\\n // Address fields\\n STRING50 city;\\n STRING30 state;\\n STRING9 postalCode;\\n INTEGER2 countryID;\\n INTEGER2 countyID;\\n // Phone fields\\n STRING5 areaCode;\\nEND;\\n\\n//--------------------------------------------------------------------------\\n// Dataset\\n//--------------------------------------------------------------------------\\nSHARED Records := DATASET(kDataPath,{RecordLayout,UNSIGNED8 __pos{VIRTUAL(FILEPOSITION)}},FLAT);\\n\\n//--------------------------------------------------------------------------\\n// Index\\n//--------------------------------------------------------------------------\\nEXPORT IDX_Combined := INDEX (\\n Records,\\n {city,postalCode,countyID,areacode,state,countryID,companyID,__pos},\\n kCombinedIndexPath\\n );\\n\\n//--------------------------------------------------------------------------\\n// Record definition containing only IDs\\n//--------------------------------------------------------------------------\\nSHARED CompanyIDSearchResultRecord := RECORD\\n RecordLayout.companyID;\\nEND;\\n\\n\\n//--------------------------------------------------------------------------\\n// Actual AND search routine\\n//--------------------------------------------------------------------------\\nEXPORT SearchAND(SearchParameters p) := FUNCTION\\n cityNames := HUtil.SplitAndLowerString(p.cityNames);\\n stateAbbreviations := HUtil.SplitAndLowerString(p.stateAbbreviations);\\n postalCodes := HUtil.SplitAndLowerString(p.postalCodes);\\n countryIDs := (SET OF INTEGER)HUtil.SplitAndLowerString(p.countryIDs);\\n countyIDs := (SET OF INTEGER)HUtil.SplitAndLowerString(p.countyIDs);\\n areaCodes := HUtil.SplitAndLowerString(p.areaCodes);\\n \\n hasCityParam := cityNames != [];\\n hasStateParam := stateAbbreviations != [];\\n hasPostalParam := postalCodes != [];\\n hasCountryParam := countryIDs != [];\\n hasCountyParam := countyIDs != [];\\n hasAreaCodes := areaCodes != [];\\n hasAnySearchCriteria := hasCityParam OR hasStateParam OR hasPostalParam OR hasCountryParam OR hasCountyParam OR hasAreaCodes;\\n \\n interimResults := IF (\\n hasAnySearchCriteria,\\n IDX_Combined (\\n (~hasCityParam OR city IN cityNames) AND\\n (~hasStateParam OR state IN stateAbbreviations) AND\\n (~hasPostalParam OR postalCode IN postalCodes) AND\\n (~hasCountryParam OR countryID IN countryIDs) AND\\n (~hasCountyParam OR countyID IN countyIDs) AND\\n (~hasAreaCodes OR areaCode IN areaCodes)\\n )\\n ) : ONWARNING(4523,ignore); // Ignore 'Neither LIMIT() nor CHOOSEN() supplied for index read' warning\\n \\n totalFound := COUNT(interimResults);\\n \\n displayedResults := PROJECT(CHOOSEN(interimResults,p.resultCount,p.resultOffset),TRANSFORM(CompanyIDSearchResultRecord,SELF:=LEFT));\\n \\n foundRecords := IF (\\n hasAnySearchCriteria,\\n FETCH(Records,interimResults,RIGHT.__pos)\\n );\\n \\n//--------------------------------------------------------------------------\\n// Facets\\n//--------------------------------------------------------------------------\\nSHARED CityFacetRecord := RECORD\\n foundRecords.city;\\n UNSIGNED num := COUNT(GROUP);\\nEND;\\n\\nSHARED FacetCollectionRecord := RECORD\\n DATASET(CityFacetRecord) cities;\\nEND;\\n\\n//--------------------------------------------------------------------------\\n// Record definition of search result; will contain only one record\\n//--------------------------------------------------------------------------\\nSHARED SearchResult := RECORD\\n INTEGER totalNumberFound;\\n INTEGER resultOffset;\\n INTEGER resultCount;\\n DATASET(CompanyIDSearchResultRecord) companyIDs;\\n DATASET(FacetCollectionRecord) facets;\\nEND;\\n\\n facets := IF (\\n hasAnySearchCriteria,\\n DATASET (\\n [\\n {\\n TABLE(foundRecords,CityFacetRecord,city)\\n }\\n ],\\n FacetCollectionRecord\\n )\\n );\\n \\n resultDS := DATASET (\\n [\\n {\\n totalFound,\\n p.resultOffset,\\n p.resultCount,\\n displayedResults,\\n facets\\n }\\n ],\\n SearchResult\\n );\\n \\n RETURN resultDS;\\nEND;
I obviously can't test this, but try it and let me know how it works.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-02-03 15:35:19\" },\n\t{ \"post_id\": 998, \"topic_id\": 247, \"forum_id\": 8, \"post_subject\": \"Problem with grouping field in TABLE\", \"username\": \"DSC\", \"post_text\": \"Continuing my plan to conquer the world the hard way, I'm trying to extend my previous ECL search routines so that the search result contain facets. (Facets are basically a TABLE's "cross tab report" that are executed against an entire search result, not just the data that is returned to the caller.) With ECL's TABLE command I thought this would be straightforward, but I'm running into a weird problem.\\n\\nHere is some code showing a trial-run of the facet idea, against the 'city' field in the dataset:\\n\\n//--------------------------------------------------------------------------\\n// Record definition of location information\\n//--------------------------------------------------------------------------\\nEXPORT\\tRecordLayout := RECORD\\n\\t// Common fields\\n\\tSTRING20\\t\\tcompanyID;\\n\\t// Address fields\\n\\tSTRING50\\t\\tcity;\\n\\tSTRING30\\t\\tstate;\\n\\tSTRING9\\t\\t\\tpostalCode;\\n\\tINTEGER2\\t\\tcountryID;\\n\\tINTEGER2\\t\\tcountyID;\\n\\t// Phone fields\\n\\tSTRING5\\t\\t\\tareaCode;\\nEND;\\n\\n//--------------------------------------------------------------------------\\n// Dataset\\n//--------------------------------------------------------------------------\\nSHARED\\tRecords := DATASET(kDataPath,{RecordLayout,UNSIGNED8 __pos{VIRTUAL(FILEPOSITION)}},FLAT);\\n\\n//--------------------------------------------------------------------------\\n// Index\\n//--------------------------------------------------------------------------\\nEXPORT\\tIDX_Combined := INDEX\\t(\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tRecords,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t{city,postalCode,countyID,areacode,state,countryID,companyID,__pos},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tkCombinedIndexPath\\n\\t\\t\\t\\t\\t\\t\\t\\t);\\n\\n//--------------------------------------------------------------------------\\n// Record definition containing only IDs\\n//--------------------------------------------------------------------------\\nSHARED\\tCompanyIDSearchResultRecord := RECORD\\n\\tRecordLayout.companyID;\\nEND;\\n\\n//--------------------------------------------------------------------------\\n// Facets\\n//--------------------------------------------------------------------------\\nSHARED\\tCityFacetRecord := RECORD\\n\\tRecords.city;\\n\\tUNSIGNED\\t\\t\\t\\t\\t\\t\\t\\tnum := COUNT(GROUP);\\nEND;\\n\\nSHARED\\tFacetCollectionRecord := RECORD\\n\\tDATASET(CityFacetRecord)\\t\\t\\t\\tcities;\\nEND;\\n\\n//--------------------------------------------------------------------------\\n// Record definition of search result; will contain only one record\\n//--------------------------------------------------------------------------\\nSHARED\\tSearchResult := RECORD\\n\\tINTEGER\\t\\t\\t\\t\\t\\t\\t\\t\\ttotalNumberFound;\\n\\tINTEGER\\t\\t\\t\\t\\t\\t\\t\\t\\tresultOffset;\\n\\tINTEGER\\t\\t\\t\\t\\t\\t\\t\\t\\tresultCount;\\n\\tDATASET(CompanyIDSearchResultRecord)\\tcompanyIDs;\\n\\tDATASET(FacetCollectionRecord)\\t\\t\\tfacets;\\nEND;\\n\\n//--------------------------------------------------------------------------\\n// Actual AND search routine\\n//--------------------------------------------------------------------------\\nEXPORT\\tSearchAND(SearchParameters p) := FUNCTION\\n\\tcityNames := HUtil.SplitAndLowerString(p.cityNames);\\n\\tstateAbbreviations := HUtil.SplitAndLowerString(p.stateAbbreviations);\\n\\tpostalCodes := HUtil.SplitAndLowerString(p.postalCodes);\\n\\tcountryIDs := (SET OF INTEGER)HUtil.SplitAndLowerString(p.countryIDs);\\n\\tcountyIDs := (SET OF INTEGER)HUtil.SplitAndLowerString(p.countyIDs);\\n\\tareaCodes := HUtil.SplitAndLowerString(p.areaCodes);\\n\\t\\n\\thasCityParam := cityNames != [];\\n\\thasStateParam := stateAbbreviations != [];\\n\\thasPostalParam := postalCodes != [];\\n\\thasCountryParam := countryIDs != [];\\n\\thasCountyParam := countyIDs != [];\\n\\thasAreaCodes := areaCodes != [];\\n\\thasAnySearchCriteria := hasCityParam OR hasStateParam OR hasPostalParam OR hasCountryParam OR hasCountyParam OR hasAreaCodes;\\n\\t\\n\\tinterimResults :=\\tIF\\t(\\n\\t\\t\\t\\t\\t\\t\\t\\thasAnySearchCriteria,\\n\\t\\t\\t\\t\\t\\t\\t\\tIDX_Combined\\t(\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t(~hasCityParam OR city IN cityNames) AND\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t(~hasStateParam OR state IN stateAbbreviations) AND\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t(~hasPostalParam OR postalCode IN postalCodes) AND\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t(~hasCountryParam OR countryID IN countryIDs) AND\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t(~hasCountyParam OR countyID IN countyIDs) AND\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t(~hasAreaCodes OR areaCode IN areaCodes)\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t)\\n\\t\\t\\t\\t\\t\\t\\t) : ONWARNING(4523,ignore);\\t// Ignore 'Neither LIMIT() nor CHOOSEN() supplied for index read' warning\\n\\t\\n\\ttotalFound := COUNT(interimResults);\\n\\t\\n\\tdisplayedResults := PROJECT(CHOOSEN(interimResults,p.resultCount,p.resultOffset),TRANSFORM(CompanyIDSearchResultRecord,SELF:=LEFT));\\n\\t\\n\\tfoundRecords := IF\\t(\\n\\t\\t\\t\\t\\t\\t\\thasAnySearchCriteria,\\n\\t\\t\\t\\t\\t\\t\\tFETCH(Records,interimResults,RIGHT.__pos)\\n\\t\\t\\t\\t\\t\\t);\\n\\t\\n\\tfacets := IF\\t(\\n\\t\\t\\t\\t\\t\\thasAnySearchCriteria,\\n\\t\\t\\t\\t\\t\\tDATASET\\t(\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t[\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t{\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tTABLE(foundRecords,CityFacetRecord,city)\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t}\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t],\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tFacetCollectionRecord\\n\\t\\t\\t\\t\\t\\t\\t\\t)\\n\\t\\t\\t\\t\\t);\\n\\t\\n\\tresultDS := DATASET\\t(\\n\\t\\t\\t\\t\\t\\t\\t[\\n\\t\\t\\t\\t\\t\\t\\t\\t{\\n\\t\\t\\t\\t\\t\\t\\t\\t\\ttotalFound,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tp.resultOffset,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tp.resultCount,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tdisplayedResults,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tfacets\\n\\t\\t\\t\\t\\t\\t\\t\\t}\\n\\t\\t\\t\\t\\t\\t\\t],\\n\\t\\t\\t\\t\\t\\t\\tSearchResult\\n\\t\\t\\t\\t\\t\\t);\\n\\t\\n\\tRETURN resultDS;\\nEND;
\\n\\nI receive the following warning when checking syntax:\\n\\nWarning: Field 'city' in TABLE does not appear to be properly defined by grouping conditions (281, 47), 2168, C:\\\\Documents and Settings\\\\All Users\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\blahblahblah
\\n\\nThat corresponds to the single TABLE statement in the code. If I try to ignore the warning and compile the code, I get this error:\\n\\nWarning: (0,0): error C2131: records.city - Table records is not related to foundrecords
\\n\\nI've tried refactoring the attributes a number of different ways and always wind up with the same warning and error. What am I doing wrong? Is there an easier way to accomplish the same goal?\\n\\n[Edit: I originally ran into this problem with community_3.4.0-1 but duplicated it with community_3.4.2-1. If that makes a difference.]\\n\\nMany thanks!\\n\\nDan\", \"post_time\": \"2012-02-03 15:14:36\" },\n\t{ \"post_id\": 1042, \"topic_id\": 250, \"forum_id\": 8, \"post_subject\": \"Re: How to use Dictionary in ECL?\", \"username\": \"dabayliss\", \"post_text\": \"Inside: Installing & Running the HPCC Platform \\n\\nwe have an example called Anagram2 - that does precisely the dictionary lookup you refer to.\\n\\nECL can bring in all sorts of formats; including binary. Of course you will need to know what their format is\", \"post_time\": \"2012-02-07 13:38:24\" },\n\t{ \"post_id\": 1041, \"topic_id\": 250, \"forum_id\": 8, \"post_subject\": \"How to use Dictionary in ECL?\", \"username\": \"ashishbhagasra\", \"post_text\": \"Suppose i am having some strings, Say,\\nHard Times Are Over\\nNow, i want to validate each word with dictionary words.\\n\\nAlso, is there a way to use binary formatted files(of other languages) in ECL.\", \"post_time\": \"2012-02-07 06:23:17\" },\n\t{ \"post_id\": 1048, \"topic_id\": 251, \"forum_id\": 8, \"post_subject\": \"Re: Virtual field and record size\", \"username\": \"DSC\", \"post_text\": \"Thanks for the explanation.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-02-07 16:12:50\" },\n\t{ \"post_id\": 1047, \"topic_id\": 251, \"forum_id\": 8, \"post_subject\": \"Re: Virtual field and record size\", \"username\": \"ghalliday\", \"post_text\": \"I think the behaviour is justifiable. There is a difference between using a record, and using a record within a record.\\n\\n\\nrec1 := RECORD\\n STRING10 foo;\\nEND;\\n\\nrec2 := RECORD\\n rec1;\\nEND;\\n
\\n\\nWhen you include a record inside another record it adds all the fields from rec1 with names that aren't already contained in rec1. As part of that it doesn't include the attribute of whether it is a virtual field or not. (From memory the reason is to avoid strange issues where you use the record from another dataset.)\\n\\nI think what you wanted in you example is to just use the name of the record. i.e.,\\n\\n\\nr := DATASET(kPath,LayoutWithPos,FLAT);\\n
\", \"post_time\": \"2012-02-07 15:45:11\" },\n\t{ \"post_id\": 1043, \"topic_id\": 251, \"forum_id\": 8, \"post_subject\": \"Virtual field and record size\", \"username\": \"DSC\", \"post_text\": \"I was experimenting with some ECL changes and ran across an error that I don't quite understand.\\n\\nSimple working example:\\n\\nLayout := RECORD\\n\\tSTRING10\\tfoo;\\nEND;\\n\\nr := DATASET(kPath,{Layout, UNSIGNED8 __pos{VIRTUAL(FILEPOSITION)}},FLAT);
\\n\\nThe DATASET line here uses the struct option to append a virtual field to the existing layout. If you do something like try to create an index that uses 'r' then it will work just fine.\\n\\nAccording to the documentation, the '{' and '}' characters are basically synonyms for RECORD and END and allow you to create an inline RECORD definition. On a whim, I tried the following:\\n\\nLayout := RECORD\\n\\tSTRING10\\tfoo;\\nEND;\\n\\nLayoutWithPos := RECORD\\n\\tLayout;\\n\\tUNSIGNED8 __pos{VIRTUAL(FILEPOSITION)};\\nEND;\\n\\nr := DATASET(kPath,{LayoutWithPos},FLAT);
\\n\\nThis seems to be equivalent and it even compiles, but if you try to create an index that references this new r then you get a runtime error along the lines of 'record has a size of 18 when 10 is expected' (sorry, I forgot to copy the actual error).\\n\\nIs this a bug or a misunderstanding on my part?\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-02-07 14:03:11\" },\n\t{ \"post_id\": 1086, \"topic_id\": 256, \"forum_id\": 8, \"post_subject\": \"Re: #OPTION('linkOptions',xxx) bug\", \"username\": \"ghalliday\", \"post_text\": \"While looking at fixing the warning it looks like there is also a #option (that doesn't go inside a beginc++) to add a library to the link path:\\n\\n#link ('library');\\n\\nBTW If you specify a full path for the library it should automatically add the necessary path information as well.\", \"post_time\": \"2012-02-10 13:58:58\" },\n\t{ \"post_id\": 1085, \"topic_id\": 256, \"forum_id\": 8, \"post_subject\": \"Re: #OPTION('linkOptions',xxx) bug\", \"username\": \"ghalliday\", \"post_text\": \"Confusingly there are two forms of #option.\\n\\n#option ('value', 'name');\\n\\nwhich can be used outside a beginc++\\n\\nand without parameters e.g.,\\n\\n#option library 'mulib'\\n\\nwhich can be used inside a begin c++.\\nThey have different meanings, but it would make sense to allow\\n#option ('library', 'mylib');\\ninside a begin c++ as well.\\n\\nI have opened an issue https://github.com/hpcc-systems/HPCC-Pl ... ssues/1513 for it.\", \"post_time\": \"2012-02-10 13:47:18\" },\n\t{ \"post_id\": 1084, \"topic_id\": 256, \"forum_id\": 8, \"post_subject\": \"Re: #OPTION('linkOptions',xxx) bug\", \"username\": \"bforeman\", \"post_text\": \"I suspect that the inability of eclplus to compile it is because the warnings are real
\\n\\nWhen you say "inabilty to compile" are you receiving errors (instead of warnings), and what are they?\\n\\nAlso the #option() does not work inside the BEGINC++\\nIt only works if you put it outside
\\n\\nThat would be expected as #OPTION is not C++ \\n\\nBut let me check with development regarding the warnings and what they mean.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-02-10 12:51:16\" },\n\t{ \"post_id\": 1082, \"topic_id\": 256, \"forum_id\": 8, \"post_subject\": \"Re: #OPTION('linkOptions',xxx) bug\", \"username\": \"nvasil\", \"post_text\": \"Also the #option() does not work inside the BEGINC++\\n\\nIt only works if you put it outside\", \"post_time\": \"2012-02-10 05:38:37\" },\n\t{ \"post_id\": 1081, \"topic_id\": 256, \"forum_id\": 8, \"post_subject\": \"Re: #OPTION('linkOptions',xxx) bug\", \"username\": \"nvasil\", \"post_text\": \"Almost there\\n\\nWhen I compile with eclcc I get this warning\\n\\n\\nunknown(0,0): warning C4534: #option ('compileOptions') will have no effect - it needs to be set in the submitted workunit.\\nunknown(0,0): warning C4534: #option ('linkOptions') will have no effect - it needs to be set in the submitted workunit.\\n0 error, 2 warnings\\n\\nbut it compiles fine and both compiler and linker find all the paths (absolute paths).\\n\\nWhen I submit it to the thor cluster with eclplus I get the same warning and compilation fails. For the test I used one thor cluster running on my laptop. So basically the same absolute paths that eclcc finds with the #option are the same for the Thor cluster who is on the same machine. \\n\\nI suspect that the inability of eclplus to compile it is because the warnings are real\", \"post_time\": \"2012-02-10 05:36:03\" },\n\t{ \"post_id\": 1073, \"topic_id\": 256, \"forum_id\": 8, \"post_subject\": \"Re: #OPTION('linkOptions',xxx) bug\", \"username\": \"bforeman\", \"post_text\": \"Cool, good catch, and thanks for the feedback!\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-02-09 19:57:59\" },\n\t{ \"post_id\": 1072, \"topic_id\": 256, \"forum_id\": 8, \"post_subject\": \"Re: #OPTION('linkOptions',xxx) bug\", \"username\": \"nvasil\", \"post_text\": \"Ok I tested it and it works. You have to be careful though\\nIt would be nice to clarify in the documentation that \\nthe linking parameters should not have spaces, but only commas.\\nFor example I had in my code\\n#option('linkOptions',' -LMypath -lmylib')\\nand it failed.\\n\\nI think you also pointed that in one the posts\", \"post_time\": \"2012-02-09 19:46:48\" },\n\t{ \"post_id\": 1065, \"topic_id\": 256, \"forum_id\": 8, \"post_subject\": \"Re: #OPTION('linkOptions',xxx) bug\", \"username\": \"bforeman\", \"post_text\": \"Hi,\\n\\nCan you please log this into the Community Issue Tracker? One of our developers will have a look at it.\\n\\nhttps://github.com/hpcc-systems/HPCC-Platform/issues\\n\\nJust use your HPCC login to get access to GitHub.\\n\\nThanks!\\n\\nBob\", \"post_time\": \"2012-02-09 14:24:20\" },\n\t{ \"post_id\": 1061, \"topic_id\": 256, \"forum_id\": 8, \"post_subject\": \"#OPTION('linkOptions',xxx) bug\", \"username\": \"nvasil\", \"post_text\": \"Following up on this post http://hpccsystems.com/bb/viewtopic.php?f=8&t=241&sid=8c66e22a0b0fdda56b1eb114740df8a0 \\nI found out that there is a bug:\\n\\nHere is what I used:\\n
#OPTION('linkOptions', ' -LMyPath -lmylib ');\\n
\\n\\nAnd here is what eclcc generated:\\n"/usr/bin/g++" -L. -Wl,-E -fPIC -pipe -O0 -L/opt/HPCCSystems/lib -Wl,-rpath -Wl,/opt/HPCCSystems/lib -LMyPath -lmylib "a.out.o" -leclrtl -la.out.res.o -lhthor -o "a.out"
\\n\\nBut that is incorrect, it will not do the linking properly and for that reason the linker fails.\\nThis is because all the libraries must be appended after a.out.o\\nSo the correct one is this:\\n\\n"/usr/bin/g++" -L. -Wl,-E -fPIC -pipe -O0 -L/opt/HPCCSystems/lib -Wl,-rpath -Wl,/opt/HPCCSystems/lib "a.out.o" -LMyPath -lmylib -leclrtl -la.out.res.o -lhthor -o "a.out"
\\n\\nCan we get a fix on that?\", \"post_time\": \"2012-02-09 04:48:27\" },\n\t{ \"post_id\": 1071, \"topic_id\": 257, \"forum_id\": 8, \"post_subject\": \"Re: Issue using :digit: in regular expressions.\", \"username\": \"DSC\", \"post_text\": \"[quote="Allan":3ol6oh41]It's not at all clear form the Ref guide that you need those extra [] as the set definition is also delineated by [ ]\\n\\nFWIW, the extra brackets are part of regular expressions, not ECL. You can think of '[:digit:]' as a literal string replacement for '0-9'. If you used '0-9' in your pattern then you matching exactly those three characters; if you use '[0-9]' then you're matching 'any single numeric character'.\\n\\nA different example that highlights this is '[[:digit:][:alpha:]]' which means 'any single alphanumeric character' (and '[[:alnum:]]' means the same thing).\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-02-09 18:41:26\" },\n\t{ \"post_id\": 1069, \"topic_id\": 257, \"forum_id\": 8, \"post_subject\": \"Re: Issue using :digit: in regular expressions.\", \"username\": \"Allan\", \"post_text\": \"Thanks Bob,\\n\\nThat worked.\\nIt's not at all clear form the Ref guide that you need those extra [] as the set definition is also delineated by [ ]\\n\\nAnyway that's now in very heavy blue crayon in my printed copy of the guide.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-02-09 15:55:07\" },\n\t{ \"post_id\": 1066, \"topic_id\": 257, \"forum_id\": 8, \"post_subject\": \"Re: Issue using :digit: in regular expressions.\", \"username\": \"bforeman\", \"post_text\": \"Hi Allan,\\n\\nAfter brushing up on my regular expressions, try this:\\n\\nPATTERN UKOnly := '(' PATTERN('[[:digit:]]{1}') ')';\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-02-09 14:35:57\" },\n\t{ \"post_id\": 1064, \"topic_id\": 257, \"forum_id\": 8, \"post_subject\": \"Issue using :digit: in regular expressions.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nExperimenting with parsing patterns and have hit a problem using the shorthand :digit:' for 0-9.\\nIn the code below defining 'UKOnly' as \\n\\nPATTERN UKOnly := '(' PATTERN('[0-9]{1}') ')';\\n
\\n\\nmatches input data: '(0)118-123456'\\nbut If I use definition:\\n\\nPATTERN UKOnly := '(' PATTERN('[:digit:]{1}') ')';\\n
\\nThe above input data is NOT matched.\\nThe full example is:\\n\\nPATTERN UKOnly := '(' PATTERN('[0-9]{1}') ')';\\n//PATTERN UKOnly := '(' PATTERN('[:digit:]{1}') ')';\\nPATTERN area := PATTERN('[0-9]{1,4}');\\nPATTERN base := PATTERN('[0-9]{1,6}');\\nPATTERN TelNo := OPT(OPT(UKOnly)area '-')base;\\n\\nInD := DATASET([{'(0)118-123456'},\\n\\t\\t{'0118-123456'}\\n ],{string line});\\n\\t\\t\\t\\t\\t\\t\\t \\nRes := RECORD\\n ValidNum := MATCHTEXT(TelNo);\\nEND;\\n\\nOUTPUT(PARSE(InD,line,TelNo,Res,WHOLE),NAMED('Valid_Tel_Numbers'));\\n
\\n\\nAny idea's what' going on?\\nYours\\n\\nAllan\", \"post_time\": \"2012-02-09 13:46:57\" },\n\t{ \"post_id\": 1083, \"topic_id\": 258, \"forum_id\": 8, \"post_subject\": \"Re: Manipulating SET OF contents\", \"username\": \"ghalliday\", \"post_text\": \"Sets of datasets have some unfortunate restrictions at the moment (a weird side-effect of the way the language grammar is written). It needs fixing, but the last time I looked the fix was far from simple.\\n\\nI can look at fixing this particular instance though.\", \"post_time\": \"2012-02-10 11:55:07\" },\n\t{ \"post_id\": 1079, \"topic_id\": 258, \"forum_id\": 8, \"post_subject\": \"Re: Manipulating SET OF contents\", \"username\": \"DSC\", \"post_text\": \"Thanks, Richard!\", \"post_time\": \"2012-02-09 21:59:07\" },\n\t{ \"post_id\": 1078, \"topic_id\": 258, \"forum_id\": 8, \"post_subject\": \"Re: Manipulating SET OF contents\", \"username\": \"rtaylor\", \"post_text\": \"OK, I've alerted the "powers that be" to the issue and am awaiting their decision as to whether this behavior is intentional or a bug.\", \"post_time\": \"2012-02-09 21:50:05\" },\n\t{ \"post_id\": 1077, \"topic_id\": 258, \"forum_id\": 8, \"post_subject\": \"Re: Manipulating SET OF contents\", \"username\": \"DSC\", \"post_text\": \"Actually, I was looking for the number of data sets in the set, rather than the aggregate total. That would match my expectation of how COUNT would work as well (shallow count, not deep).\", \"post_time\": \"2012-02-09 21:27:36\" },\n\t{ \"post_id\": 1076, \"topic_id\": 258, \"forum_id\": 8, \"post_subject\": \"Re: Manipulating SET OF contents\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\nWell, my example may be bad, but COUNT(setOfThings) works for everything except SET OF DATASET. Slight modifications to my example will show that. Plus, the documentation for valuelist does say:\\n\\nA comma-delimited list of expressions to count. This may also be a SET of values.\\n\\nAnd there is no restriction on the word 'values' in that description. I think this should be filed as a bug, either to support SET OF DATASET types or to explicitly omit them with an appropriate compiler error message.\\n
I agree that it would be nice if COUNT(SetOfDatasets) worked the way you would like, but my real point was that, even if it did work, all it would do would be to count the number of datasets in the set, not the aggregate total of records across all the datasets in the set (which is what I think you were going for).\\n\\nThe syntax error you got (expected '[') was indicating that it recognized that you were trying to COUNT a set of datasets, and it was asking you to specify which dataset in the set to count. If you had changed your code to:TestRec := RECORD\\n STRING foo;\\nEND;\\n\\nds := DATASET([{'bar'}],TestRec);\\n\\ndsSet := [ds];\\n\\nOUTPUT(COUNT(dsSet[1]));
It would have compiled and run, returning 1.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-02-09 21:24:51\" },\n\t{ \"post_id\": 1075, \"topic_id\": 258, \"forum_id\": 8, \"post_subject\": \"Re: Manipulating SET OF contents\", \"username\": \"DSC\", \"post_text\": \"[quote="rtaylor":3dywcwk7]The COUNT function doc shows two forms: the first, COUNT(recordset), counts the number of records in the recordset, while the second, COUNT(valuelist), will count the number of entries in the list (and won't count the records in your set of datasets). The SET OF DATASETs functionality is fairly new to the language and was designed just for use in a couple of functions.\\n\\nWell, my example may be bad, but COUNT(setOfThings) works for everything except SET OF DATASET. Slight modifications to my example will show that. Plus, the documentation for valuelist does say:\\n\\nA comma-delimited list of expressions to count. This may also be a SET of values.
\\n\\nAnd there is no restriction on the word 'values' in that description. I think this should be filed as a bug, either to support SET OF DATASET types or to explicitly omit them with an appropriate compiler error message.\\n\\nWhile I'm on the subject of sets, and to further stray from the original topic: It would be great if the SET type had more support throughout ECL. More granular ways to slice and dice them, append to them, find intersections and unions, etc.. Put this down on a wish list, please!\\n\\n[quote="rtaylor":3dywcwk7]\\nRegarding your main issue, your approach should not be to remove the empties, but just to use the non-empties. Take a look at the RANGE function, then you can write code something like this to use in your MERGEJOIN:\\n\\nWhoa. That is exactly what I was looking for. Plus, it's a great/useful example of how to use the RANGE function.\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-02-09 20:39:17\" },\n\t{ \"post_id\": 1074, \"topic_id\": 258, \"forum_id\": 8, \"post_subject\": \"Re: Manipulating SET OF contents\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\nIt looks like COUNT doesn't like SET OF DATASET types but can handle simpler types (at least). Is this documented somewhere?
The COUNT function doc shows two forms: the first, COUNT(recordset), counts the number of records in the recordset, while the second, COUNT(valuelist), will count the number of entries in the list (and won't count the records in your set of datasets). The SET OF DATASETs functionality is fairly new to the language and was designed just for use in a couple of functions.\\n\\nRegarding your main issue, your approach should not be to remove the empties, but just to use the non-empties. Take a look at the RANGE function, then you can write code something like this to use in your MERGEJOIN:IncDS := DATASET([{IF(EXISTS(ds1),1,0)},\\n {IF(EXISTS(ds2),2,0)},\\n {IF(EXISTS(ds3),3,0)},\\n {IF(EXISTS(ds4),4,0)}],{integer ds});\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\nSetDS := RANGE([ds1,ds2,ds3,ds4],SET(IncDS(ds<>0),ds));
\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-02-09 20:09:17\" },\n\t{ \"post_id\": 1070, \"topic_id\": 258, \"forum_id\": 8, \"post_subject\": \"Re: Manipulating SET OF contents\", \"username\": \"DSC\", \"post_text\": \"That syntax error I cited above really bugs me, along with another I ran into. I isolated the other one to this snippet:\\n\\nTestRec := RECORD\\n\\tSTRING\\tfoo;\\nEND;\\n\\nds := DATASET([{'bar'}],TestRec);\\n\\ndsSet := [ds];\\n\\nOUTPUT(COUNT(dsSet));
\\n\\nCompiling this produces:\\n\\nError: syntax error near ")" : expected '[' (9, 19), 3002,
\\n\\nWhile this simpler example compiles fine:\\n\\nds := [1];\\n\\ndsSet := [ds];\\n\\nOUTPUT(COUNT(dsSet));
\\n\\nIt looks like COUNT doesn't like SET OF DATASET types but can handle simpler types (at least). Is this documented somewhere?\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-02-09 18:22:51\" },\n\t{ \"post_id\": 1068, \"topic_id\": 258, \"forum_id\": 8, \"post_subject\": \"Manipulating SET OF contents\", \"username\": \"DSC\", \"post_text\": \"I have a set of datasets populated manually:\\n\\nds1 := PROJECT();\\nds2 := PROJECT();\\nds3 := PROJECT();\\n\\noneSet := [ds1,ds2,ds3]
\\n\\nI now need to remove recordsets from the set that don't have records. In other words, if ds2 in the above example may not actually contain anything (say, due to a failed index filter) and I want it removed:\\n\\nnextSet := [ds1,ds3]
\\n\\nThe reason I want it removed is so something like MERGEJOIN will work correctly, as an empty set element will cause a distributed inner join to fail completely.\\n\\nI've tried writing a function that implements a GRAPH to create a new set with only non-empty record sets, but that is not going well. I'm being plagued by obtuse syntax errors, among other things, like this supposedly-simply TRANSFORM attempt:\\n\\nResultRec := RECORD\\n\\tSET OF DATASET\\tresultSet := [];\\nEND;\\n\\nResultRec AppendSet(ResultRec l, DATASET ds) := TRANSFORM,SKIP(~EXISTS(ds))\\n\\tSELF.resultSet := l.resultSet + [ds];\\nEND;\\n\\nError: syntax error near "resultSet" : expected < (346, 9), 3002
\\n\\nI'm thinking that there is an easier way to do this but I'm just not seeing it. Thoughts?\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-02-09 15:49:18\" },\n\t{ \"post_id\": 1112, \"topic_id\": 267, \"forum_id\": 8, \"post_subject\": \"Re: hqlfold.cpp error\", \"username\": \"ghalliday\", \"post_text\": \"The easiest way is to send me an email containing an archive of the query. \\n(ghalliday@hpccsystems.com).\\n\\nOtherwise you'll need to build a debug build of the system and try and work out what is happening.\", \"post_time\": \"2012-02-14 17:05:39\" },\n\t{ \"post_id\": 1111, \"topic_id\": 267, \"forum_id\": 8, \"post_subject\": \"hqlfold.cpp error\", \"username\": \"thildebrant\", \"post_text\": \"Hello,\\nAm seeing this error:\\n"eclserver 3000:assert(lhs->queryChild(0)->getOperator()==no_self) failed - file: hqlfold.cpp, line 926"\\n\\nIs there any easy way to trouble shoot this?\\n\\nThank you,\\nTodd\", \"post_time\": \"2012-02-14 15:38:11\" },\n\t{ \"post_id\": 1156, \"topic_id\": 269, \"forum_id\": 8, \"post_subject\": \"Re: Logging inside a BEGINC++\", \"username\": \"nvasil\", \"post_text\": \"Thanks a lot\\n\\nI will see the logging plug in. Debugging is not really that much of an issue as I can just output everything to a file. My worry has to do with error reporting (logging). If a user calls a BEGINC++ function and something goes wrong it has to be reported somewhere\", \"post_time\": \"2012-02-21 17:31:35\" },\n\t{ \"post_id\": 1151, \"topic_id\": 269, \"forum_id\": 8, \"post_subject\": \"Re: Logging inside a BEGINC++\", \"username\": \"ghalliday\", \"post_text\": \"No one seems to want to answer this one...\\n\\nI would strongly recomend debugging your c++ code as a separate c++ project, and using standard logging to stderr/stdout or dgb etc. to debug the program.\\n\\nIf that doesn't work I would try and debug the code by using the standalone option of eclcc to create a local executable, and run that locally. Again you can use the normal debugging techniques.\\n\\nIf that really doesn't work you can use some logging calls as a last resort. See the logging plugin for details of what can be done from ecl. That might get tricky to call from c++, but you could probably call dbglog somewhere else in the code and then call logDbgLog directly from the beginc++. (See the source of plugins/logging for the plugin code.)\", \"post_time\": \"2012-02-21 14:34:18\" },\n\t{ \"post_id\": 1119, \"topic_id\": 269, \"forum_id\": 8, \"post_subject\": \"Logging inside a BEGINC++\", \"username\": \"nvasil\", \"post_text\": \"Hi \\nI have been able to run and link successfully a BEGINC++ session\\nI need to do some debugging. I wonder what is the best way? For the moment I tries to write the messages to standard output. But they are obviously redirected somewhere else. What is the best way to output/log messages?\", \"post_time\": \"2012-02-17 06:28:24\" },\n\t{ \"post_id\": 1171, \"topic_id\": 272, \"forum_id\": 8, \"post_subject\": \"Re: WORKUNIT STORED\", \"username\": \"nvasil\", \"post_text\": \"OK I think I got it\", \"post_time\": \"2012-02-22 00:26:10\" },\n\t{ \"post_id\": 1170, \"topic_id\": 272, \"forum_id\": 8, \"post_subject\": \"Re: WORKUNIT STORED\", \"username\": \"nvasil\", \"post_text\": \"I think we are almost done.\\n\\nI apologize for the long ping-pong, but the goal of all this is really to enhance the tutorials that I am writing.\\n\\nLet me recap\\n\\nI have published a query where the input is given through the STORED('InputFileName'); \\n\\nLet's call it myquery\\n\\nNow I want to trigger it with a new dataset\\n\\nSo I need to send to thor the following commands\\n\\nSTRING logicalname := 'defaultfile' : STORED('InputFileName');\\nx := DATASET(MyRec, logicalname);\\n\\nHow do I send them?\\n\\nIs it eclplus @myfile server=. cluster=thor ? where myfile has the two commands mentioned above?\\n\\nHow do I specify that this is going to the specific myquery? Will THOR understand it by matching 'InputFileName' to the stored variables in the cluster?\", \"post_time\": \"2012-02-22 00:25:15\" },\n\t{ \"post_id\": 1169, \"topic_id\": 272, \"forum_id\": 8, \"post_subject\": \"Re: WORKUNIT STORED\", \"username\": \"anthony.fishbeck@lexisnexis.com\", \"post_text\": \"And for the input dataset case you can pass it through eclplus using the query=xml parameter or query=@file.\\n\\nFor the "ecl <command>" style command line its\\n\\necl run queryset query --input=xml or --input=filename.xml\", \"post_time\": \"2012-02-22 00:22:16\" },\n\t{ \"post_id\": 1168, \"topic_id\": 272, \"forum_id\": 8, \"post_subject\": \"Re: WORKUNIT STORED\", \"username\": \"anthony.fishbeck@lexisnexis.com\", \"post_text\": \"Ah, sorry, I think what you want is:\\n\\nfor cases where you want the input to specify the file to use for input\\n\\nSTRING logicalname := 'defaultfile' : STORED('InputFileName');\\nx := DATASET(MyRec, logicalname);\\n\\nFor cases where you want the entire dataset to come from query input rather than file:\\n\\nx := DATASET([], MyRec) : STORED('InputDataset');\", \"post_time\": \"2012-02-22 00:16:07\" },\n\t{ \"post_id\": 1167, \"topic_id\": 272, \"forum_id\": 8, \"post_subject\": \"Re: WORKUNIT STORED\", \"username\": \"nvasil\", \"post_text\": \"Sorry My mistake\\n\\nI understand all about spraying, what I don't get is how I trigger the query with the new DATASET\\n\\nFrom what I understand the published query is like a function which has an input and an output. I want to tell the system that this is the new input \\n\\nx:=DATASET(MyRec, 'myfile')\\n\\nSo if the input in the published query is STORED('MyInput')\\nI want to trigger the published query by telling it that MyInput is x, give me the new output\", \"post_time\": \"2012-02-21 23:55:15\" },\n\t{ \"post_id\": 1166, \"topic_id\": 272, \"forum_id\": 8, \"post_subject\": \"Re: WORKUNIT STORED\", \"username\": \"anthony.fishbeck@lexisnexis.com\", \"post_text\": \"You want to get the file onto the system, and then run your ECL code against it.\\n\\nStart by looking at the HPCC Data Tutorial document. \\n\\nhttp://hpccsystems.com/community/docs/d ... rial-guide\\n\\nBut the basics are that you first get your file onto the clusters "landing zone" and then use a process called "spraying" that distributes the file across the cluster. You give it a logical file name, and that is what you refer to in your ECL code.\\n\\nSpraying can be done through EclWatch, or via the dfuplus command line tool.\\n\\nDFS is the distributed file system.. accessed through DFU.\", \"post_time\": \"2012-02-21 23:51:13\" },\n\t{ \"post_id\": 1165, \"topic_id\": 272, \"forum_id\": 8, \"post_subject\": \"Re: WORKUNIT STORED\", \"username\": \"nvasil\", \"post_text\": \"Ok Great thanks for the answer, things start to become more clear.\\nI think I am missing a last detail\\n\\nLets say I have published a query that has its input in a stored variable called MyInput and its output in the MyOutput\\n\\nNow I have a dataset from my file that I want to send it to the published query\\n\\nHow do I submit this file so that it will trigger the query and give the result. \\n\\nThis is what I will sort of do:\\n\\nMake a file with this content\\nx:=DATASET(MyRec, 'myfile');\\n\\nand then send it with eclplus somehow.\\n\\nCan you elaborate on that?\", \"post_time\": \"2012-02-21 23:38:30\" },\n\t{ \"post_id\": 1164, \"topic_id\": 272, \"forum_id\": 8, \"post_subject\": \"Re: WORKUNIT STORED\", \"username\": \"anthony.fishbeck@lexisnexis.com\", \"post_text\": \"I'm a bit suprised when you say that your ecl has no direct or file output, but\\n\\nyes, eclplus @file1.ecl server=. cluster=thor would create a workunit.\\n\\nEclplus should output the workunit id followed by any direct output, if you have any.\\n\\neclplus @myfile.ecl server=10.239.219.10 cluster=hthor\\nWorkunit W20120221-175025 submitted\\n[myoutput]\\nname age\\nJohn 21\\nJane 22\\n\\nYou can then lookup the workunit W20120221-175025 in EclWatch.\\n\\nFrom the workunit details page, assign it a name, ("myquery") and click on\\nthe publish button.\\n\\nAny STORED values become query input parameters. Any "direct" output statements are query outputs... although hthor and thor queries would often output to files.\\n\\nThis would create a query in the thor queryset named "myquery". You could then find the query in WsECL and submit it muliple times. Submitting the query to the thor would create a new result workunit each time that would contain the new results, if any.\\n\\nPublished queries can also then called via SOAP, REST, JSON, etc.\\n\\nI don't think eclplus @file1 @file2 @file3 serve=. cluster=thor would actually work... it looks like it would just take the last file as input.\\n\\nThere are new command line tools "ecl <command>". These take one file, but gather all of the dependencies before creating the workunit.\\n\\nFor example if file1 depended on file2 and file3...\\n\\necl deploy file1.ecl -cl=thor\\n\\nwill create a workunit from file1 and its dependencies.\\n\\necl run file1.ecl -cl=thor\\n\\nwould do the same, but also execute the workunit.\\n\\nelc publish file1.ecl -cl=thor --name=myquery --activate\\n\\nwould actually combine all the steps, creating a workunit and then publishing it as a query right away.\", \"post_time\": \"2012-02-21 23:19:55\" },\n\t{ \"post_id\": 1163, \"topic_id\": 272, \"forum_id\": 8, \"post_subject\": \"Re: WORKUNIT STORED\", \"username\": \"nvasil\", \"post_text\": \"Thanks \\n\\nThis is very helpful. A critical problem with ECL/HPCC which I am trying to solve is match your terminology with what people already know from other systems. The term "Publish" is something I wasn't aware of and it starts making things more clear. But let's go one step back and define what query is. I am not sure I understand where the input and output is defined.\\n\\nSo typically what I do is to write modules, functions etc.\\nSo let's say I have three files that export modules.\\nfile1.ecl, file2.ecl, file3.ecl\\n\\nif I do eclplus @file1.ecl server=. cluster=thor \\nis this creating a workunit?\\nRemember there is no output or DATASET(..., file) command.\\nBy the way which commands define something as an input and which ones define something as an output? So far I know that if I use OUTPUT(...) this defines something as an output and if I use DATASET(..., file) it defines something as an input.\\n\\nAlso if I submit eclplus @file1 @file2 @file3 serve=. cluster=thor will it put all the modules in the same workunit\\n\\nLast question. Let's say that I have published a workunit, how do I send an input to the specific one, and how do I get the output?\", \"post_time\": \"2012-02-21 21:16:45\" },\n\t{ \"post_id\": 1159, \"topic_id\": 272, \"forum_id\": 8, \"post_subject\": \"Re: WORKUNIT STORED\", \"username\": \"anthony.fishbeck@lexisnexis.com\", \"post_text\": \"It might help here to define the term "Publish". \\n\\nYou can take an existing workunit, whether it has already been executed, or simply compiled, and publish it so that it can be run repeatedly with different inputs. When you publish a query you give it a name and add it to a collection of queries known as a queryset. Currently each queryset corresponds to one of the available clusters.\\n\\nThe page in eclwatch where you view details about a workunit has a button that makes publishing it very simple.\\n\\nYou can access all of the currently published queries by viewing the WsECL page via your browser. Current default uses the the eclwatch IP and port 8002, so http://eclwatch_ip:8002.\\n\\nQuery inputs are defined using the stored keyword.\\n\\nThe WsECL page will show you the currently published queries, and provide forms and other ways of calling them that allow you to input values for all of the STORED definitions.\\n\\nIf the queryset is associated with a roxie, that roxie will automatically load the query once its published.\\n\\nYou can also call published queries using SOAP/XML through WsECL (WSDL definitions are automatically provided). For roxie queries you also have the option of sending SOAP directly to the roxie.\", \"post_time\": \"2012-02-21 19:04:10\" },\n\t{ \"post_id\": 1153, \"topic_id\": 272, \"forum_id\": 8, \"post_subject\": \"Re: WORKUNIT STORED\", \"username\": \"bforeman\", \"post_text\": \"Thank David for your input. What I was abouit to say I think is in harmony with what you just said:\\n\\nThe way that I understand STORED, the workflow service is useful for triggering external (RUNTIME) input for Roxie queries, and for that it is essential. \\n\\nTake a look at the Data Tutorial's FetchPeopleByZipService and publish the query to Roxie with the STORED workflow service removed. What you will see in WS_ECL is a published query with NoStoredRequest and no way to test the query using the service.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-02-21 14:42:58\" },\n\t{ \"post_id\": 1152, \"topic_id\": 272, \"forum_id\": 8, \"post_subject\": \"Re: WORKUNIT STORED\", \"username\": \"dabayliss\", \"post_text\": \"For those of you old enough to remember them; a workunit is a job folder. The little manilla folder that contained the parameters and results for a 'job' as it floats around a system.\\n\\nLogically a work-unit is created when a job is executed. As part of job execution it is possible to specify values; these are stored in the WU. The WU also contains (or contains references to) all the other things the environment needs to execute the job - including the code.\\n\\nAs a job progresses the WU will 'grow' - it will acquire execution information and may acquire one or more results (or pointers to results) from the system. If a job has workflow instructions within it then the WU may become inactive and active during its life time - the WU is that 'state' of the job in progress. Once the job is complete the WU is the result of the job (again - much data may only be pointed to)\\n\\nThe 'multiple queries' bit is a little confusing. It should really be: "a single job with multiple outputs" - the WU stores all of the outputs using 'NAMED's if given.\\n\\nSTORED() - is a way of obtaining the run-time parameters specified by the WU. Contrary to what some claim - these are neither global nor variable. They are RUNTIME constants; the closest equivalent is environment variables in Linux.\\nThey are useful in thor; but their use is most obvious in roxie. If you publish a query with stored variables; then each stored variable is extracted from the XML query that is sent to roxie.\\n\\nDavid\", \"post_time\": \"2012-02-21 14:40:09\" },\n\t{ \"post_id\": 1149, \"topic_id\": 272, \"forum_id\": 8, \"post_subject\": \"Re: WORKUNIT STORED\", \"username\": \"nvasil\", \"post_text\": \"Thanks Bob, I think we have some progress in the understanding, but can you give me an example that defies my following argument:\\n\\nAssume the following job \\n\\nEXPORT INTEGER x:=5*3;\\n.....\\n....\\n.....\\n\\nINTEGER y:=x+1;\\n\\n\\nNow if that works, why do I need to use \\n\\nINTEGER x:=5*3 : STORED('x');\\n.....\\n.....\\n.....\\nINTEGER x:=x+1\", \"post_time\": \"2012-02-21 14:20:54\" },\n\t{ \"post_id\": 1142, \"topic_id\": 272, \"forum_id\": 8, \"post_subject\": \"Re: WORKUNIT STORED\", \"username\": \"bforeman\", \"post_text\": \"Here is what I currently know about workunits \\n\\nA workunit can be thought of simply as an ECL job. The Dali Server creates and maintains these workunits, and the Sacha server archives them. Inside the workunit is everything that the HPCC needs to know about the ECL job, and stores the code, timings, graph information, input file information, and even the results returned by the workunit. It also includes C++ code generated and system logs for the job. \\n\\nA typical ECL job needs to return at least one result, but often they can return multiple query results (for example, show me the data in this format and then show me the data in another format). I'm focusing on ECL workunits, but also there are DFU workunits that handle the spraying and despaying jobs applied to the target HPCC.\\n\\nThe STORED workflow service stores the result of the expression with the workunit that uses the ECL definition so that it remains available for use throughout the workunit. If a definition name is provided then the value of that definition will be pulled from storage, if it has not yet been set it will be computed, stored and then used from storage. This service implicitly causes the definition to be evaluated at a global scope instead of the enclosing scope.\\n\\nSTORED is also essential for developing queries for ROXIE. The WS_ECL service scans these STORED services and creates input fields for testing the queries from WS_ECL. \\n\\nHope this information helps you!\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-02-21 14:04:54\" },\n\t{ \"post_id\": 1133, \"topic_id\": 272, \"forum_id\": 8, \"post_subject\": \"WORKUNIT STORED\", \"username\": \"nvasil\", \"post_text\": \"I am a little bit puzzled with what WORKUNIT is.\\n\\nFrom the reference manual and other related ECL documentation it seems to me that WORKUNIT is the same thing like a query. For example when I do \\n\\n
eclplus @myquery server=. cluster=thor \\n
\\nI implicitly create a workunit. I am not sure if a workunit can have more than one queries. Also once I submit a query, is it possible to modify it, by attaching to it more queries? There needs to be clarification about the scope and life of a workunit.\\n\\nI am also trying to understand the meaning and purpose of STORED.\\nIt seems to me that STORED was invented so that WORKUNITS can communicate. So if I submit a query (or workunit?) like this:\\n\\nSTRING x:="nick" : STORED('the_idiot');\\n
\\n\\nthen somebody else executing a different query (WORKUNIT?) can have a statement like this:\\n\\n\\nSTRING my_husband:=the_idiot;\\n
\\n\\nbut this doesn't look right as there might be more than one workunits that export the_idiot. I checked the WORKUNIT command, but it doesn't seem to provide scoping.\\n\\nAt last, if on a different WORKUNIT I am executing \\n\\nSTRING my_husband:=the_idiot;\\n
\\nhow do I know that the_idiot exists, or when it is created, or if it will ever be created\", \"post_time\": \"2012-02-20 22:00:28\" },\n\t{ \"post_id\": 1139, \"topic_id\": 273, \"forum_id\": 8, \"post_subject\": \"Re: TEMPLATE language\", \"username\": \"ghalliday\", \"post_text\": \"Having looked at the manual I agree it is confusing. We should come up with a better term for the main use.\\n\\nI think part of the confusion is that template language used in the language reference has historical connotations going back to a previous product from a long while ago (a rapid application generator), which the developers and documentation team both worked on. The concept in that product was you had a "template" which defined various attributes, and rules for generated program source from those attributes.\\n\\nWhen the language reference describes the template language (e.g., page 323?->) in c++ terms it is really describing a more powerful preprocessor. As well as #IF it supports loops, setting variables, examining variables etc.. They can be used (with great care!) to examine the types of datasets and generate ECL based on the fields in a record etc.. A long time ago it was used to generate ECL source code from an xml definition - which is the reason for the capabilities.\\n\\n\\nThere is another language feature closer to the c++ idea of templates.\\n\\nHistorically we have used macros to provide some level of template definitions. The original syntax was the MACRO keyword, but for complex macros the newer FUNCTIONMACRO keyword is often more appropriate because it has better encapulation of the symbols defined within it, and it can be used more like a normal attribute.\\n\\nThere are two disadvantages with macros though\\n- The parameters are treated differently from normal attributes. Most of the time it isn't a problem, but it can be confusing.\\n- The syntax isn't checked until the macro is used.\\n\\nThere is some work in progress to add elements to the language that allow you to define more flexible definitions but without either of those two drawbacks. Sometimes "template" is used to describe these elements of the language - which are much closer to the c++ idea of templates.\\n\\nI hope that has added some clarity. If not please come back with any questions.\", \"post_time\": \"2012-02-21 09:41:40\" },\n\t{ \"post_id\": 1134, \"topic_id\": 273, \"forum_id\": 8, \"post_subject\": \"TEMPLATE language\", \"username\": \"nvasil\", \"post_text\": \"I am a little bit confused with the template library in ECL. Maybe we can start with some easy examples. It would be nice if an analogy with C++ templates can be given.\\n\\nI noticed for example that in the ML library in the Types.ecl file \\nyou have the following definition\\n\\n\\nEXPORT NumericField := RECORD\\n t_RecordID id;\\n t_FieldNumber number;\\n t_FieldReal value;\\n
\\n\\nLet's say I wanted to be able to control the precision of the value attribute, So I would like to be able to write something like this, inspired by the C++ templated:\\n\\n\\nEXPORT\\ntemplate<typename MyPrecision> \\nNumericField := RECORD\\n t_RecordID id;\\n t_FieldNumber number;\\n MyPrecision value;\\n
\\n\\nThen I would like to instantiate it:\\n\\n\\nDATASET(NumericField<REAL>) mydataset; \\n
\\n\\nLet's take it a little bit further. I noticed that there is a specific type of templates called MACRO which can be used for functions only. Is there any way to unify all of them?\\n\\nI would like to be able to do the following for FUNCTION, TRANSFORM, MODULE\\n\\n\\ntemplate<typename MyRecordType>\\nSTRING MyFunction(DATASET(MyRecordType) x) := MODULE\\n.....\\nEND; \\n
\", \"post_time\": \"2012-02-20 22:16:47\" },\n\t{ \"post_id\": 1150, \"topic_id\": 274, \"forum_id\": 8, \"post_subject\": \"Re: returning more than one arguments in BEGINC++\", \"username\": \"ghalliday\", \"post_text\": \"They're similar in that they are compound objects. The most natural expression of a module in c++ would probably be a class.\", \"post_time\": \"2012-02-21 14:21:47\" },\n\t{ \"post_id\": 1148, \"topic_id\": 274, \"forum_id\": 8, \"post_subject\": \"Re: returning more than one arguments in BEGINC++\", \"username\": \"ghalliday\", \"post_text\": \"It can be, but most of the time isn't.\\n\\nGenerally rows are streamed through the query graph, and each row is held as a separate allocation. If a row has child datasets then they may be stored inline, or may be stored as separate rows depending on the context.\\n\\nA c++ function that returns a dataset typically returns a block of memory - however there are options to return it as a array of rows, and options to stream it a record at a time.\", \"post_time\": \"2012-02-21 14:19:42\" },\n\t{ \"post_id\": 1146, \"topic_id\": 274, \"forum_id\": 8, \"post_subject\": \"Re: returning more than one arguments in BEGINC++\", \"username\": \"nvasil\", \"post_text\": \"I had the impression that the DATASET is a memory mapped file. Is that correct?\", \"post_time\": \"2012-02-21 14:13:53\" },\n\t{ \"post_id\": 1145, \"topic_id\": 274, \"forum_id\": 8, \"post_subject\": \"Re: returning more than one arguments in BEGINC++\", \"username\": \"ghalliday\", \"post_text\": \"If your dataset doesn't fit in memory then using beginc++ is a bit more challenging.\\n\\nThere is some support for datasets that stream (see stream.ecl and other examples in the github repository under ecl/regress).\\n\\nTo return multiple datasets you are going to find it hard.\", \"post_time\": \"2012-02-21 14:11:42\" },\n\t{ \"post_id\": 1144, \"topic_id\": 274, \"forum_id\": 8, \"post_subject\": \"Re: returning more than one arguments in BEGINC++\", \"username\": \"nvasil\", \"post_text\": \"Something else that came out of this conversation.\\nIs MODULE in ECL an equivalent to a C++ class?\\nWhat are the similarities and differences?\\nI want to add them in my tutorial about ECL\\n\\nThanks\", \"post_time\": \"2012-02-21 14:09:03\" },\n\t{ \"post_id\": 1141, \"topic_id\": 274, \"forum_id\": 8, \"post_subject\": \"Re: returning more than one arguments in BEGINC++\", \"username\": \"nvasil\", \"post_text\": \"Thanks for the response. In a previous conversation with David I think, I was discouraged to use this solution, because a RECORD has to fit in memory. So if the DATASETs within a record were big, that would be a problem. Is that correct?\", \"post_time\": \"2012-02-21 13:51:31\" },\n\t{ \"post_id\": 1137, \"topic_id\": 274, \"forum_id\": 8, \"post_subject\": \"Re: returning more than one arguments in BEGINC++\", \"username\": \"ghalliday\", \"post_text\": \"The natural way for this to be supported would be for the beginc++ to define a module which could map to a c++ class. I do have long term plans for supporting something like that, but that is part of a more substantial piece of work, so is likely to be a while before it sees the light of day.\\n\\nIn the short term you can return a dataset with 1 record which has two child datasets.\", \"post_time\": \"2012-02-21 09:13:57\" },\n\t{ \"post_id\": 1135, \"topic_id\": 274, \"forum_id\": 8, \"post_subject\": \"returning more than one arguments in BEGINC++\", \"username\": \"nvasil\", \"post_text\": \"I guess if someone wants to return more than one variables in a beginc++ section he would better pack them in a RECORD. But what if somebody wants to return two DATASETS putting them in one RECORD is inefficient. Would it be too difficult to to support variable number of return arguments in BEGINC++\", \"post_time\": \"2012-02-21 06:22:59\" },\n\t{ \"post_id\": 1147, \"topic_id\": 275, \"forum_id\": 8, \"post_subject\": \"Re: Some more clarifications in BEGINC++ side effects\", \"username\": \"ghalliday\", \"post_text\": \"Yes that would define a function with state. However there is no guarantee of how many times the function would be called, or even which process/slave it is called from, so it may or may not work how you want it to.\\n\\nIf you want to stream a dataset you shpuld use the streamed modifier (see other posts)\", \"post_time\": \"2012-02-21 14:16:42\" },\n\t{ \"post_id\": 1143, \"topic_id\": 275, \"forum_id\": 8, \"post_subject\": \"Re: Some more clarifications in BEGINC++ side effects\", \"username\": \"nvasil\", \"post_text\": \"Thanks a lot\\n\\nIf I want my function in C++ to have side effects (also known as persistent variable), would the following work\\n\\n\\nINTEGER MyFunc(INTEGER time_called) := BEGINC++\\n #include <vector>\\n #body \\n \\n static std::vector<long long> *my_vec;\\n if (time_called==0) {\\n my_vec=new std::vector<long long>(5);\\n __result=0;\\n return;\\n }\\n if (time_called==5) {\\n __result=0;\\n delete my_vec;\\n return;\\n }\\n __result=my_vec->size();\\nENDC++\\n\\n\\nAnd then I call it like this\\nMyFun(0)\\nMyFunc(1)\\nMyFunc(5)\", \"post_time\": \"2012-02-21 14:06:08\" },\n\t{ \"post_id\": 1138, \"topic_id\": 275, \"forum_id\": 8, \"post_subject\": \"Re: Some more clarifications in BEGINC++ side effects\", \"username\": \"ghalliday\", \"post_text\": \"The issue if side-effects is more to do with whether the function has any internal state. If it is pure passing the same arguments will give you the same result.\\n\\nThe classic example of a function with side-effects/state is rand(). Each call returns a different answer.\\n\\nThere are some functions (e.g., day of the year) which do have state, but you don't really want them being re-executed each time they are used. I don't think it is quite the correct construct, but currently once is there to indicate it does have state, but should only be evaluated once per query.\", \"post_time\": \"2012-02-21 09:17:56\" },\n\t{ \"post_id\": 1136, \"topic_id\": 275, \"forum_id\": 8, \"post_subject\": \"Some more clarifications in BEGINC++ side effects\", \"username\": \"nvasil\", \"post_text\": \"I have some difficulty understanding the difference between\\n#option pure
\\nand\\n#option once
\\nthey both seem to indicate that there are no side effects according to this:\\nhttp://hpccsystems.com/community/docs/ecl-language-reference/html/beginc%20%20-structure\\n\\nWhat I don't understand is how the C++ functions can have side effects, since the data structures are either passed by value or if the are pointers, they are always const.\\nDoes this mean that I can modify a dataset \\ndataset(r) ABC -> size32_t lenAbc, const void * abc
\\nby const casting\\n\\nconst_cast<void*>(abc)[0]=1
\", \"post_time\": \"2012-02-21 07:10:09\" },\n\t{ \"post_id\": 1208, \"topic_id\": 276, \"forum_id\": 8, \"post_subject\": \"Re: Performance of JOIN versus MERGE\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nGlad it worked for you.PS: Next up, in another thread, on another day: How to optimize when those field-level search results return a lot of records.
Well, that's part of what LIMIT, CHOOSEN, and TOPN are for. \\n\\nRichard\", \"post_time\": \"2012-02-24 20:53:15\" },\n\t{ \"post_id\": 1206, \"topic_id\": 276, \"forum_id\": 8, \"post_subject\": \"Re: Performance of JOIN versus MERGE\", \"username\": \"DSC\", \"post_text\": \"w00t!
\\n\\nRichard, your code worked beautifully. Well, I changed it a little, but the basic idea you put forth solved the performance problem easily.\\n\\nI hope you still keep the other issue open, though, as I do think there is a gremlin lurking in the SET OF DATASET version of JOIN.\\n\\nThanks again!\\n\\nDan\\n\\nPS: Next up, in another thread, on another day: How to optimize when those field-level search results return a lot of records.\", \"post_time\": \"2012-02-24 20:33:52\" },\n\t{ \"post_id\": 1205, \"topic_id\": 276, \"forum_id\": 8, \"post_subject\": \"Re: Performance of JOIN versus MERGE\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\n
Wouldn't it be more efficient to simply use TABLE(ResIDs,{CompanyID},CompanyID) when spitting out the AND result? Or use some other technique for picking out the CompanyID values? Is that filter against dsCorpus[1] really necessary?
If that gets you where you need to go, sure. I was just offering an alternative approach and maintaining orthogonality with your code (not knowing what else was in the records). It's not like I was actually able to test it, after all. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-02-24 20:24:25\" },\n\t{ \"post_id\": 1202, \"topic_id\": 276, \"forum_id\": 8, \"post_subject\": \"Re: Performance of JOIN versus MERGE\", \"username\": \"DSC\", \"post_text\": \"[quote="rtaylor":c9flecrl]Dan,\\n\\nHere's a suggestion -- why not get rid of the JOIN for the AND and try it this way:\\n\\nIt took me five minutes to figure out why that code will work. Wow.\\n\\nWouldn't it be more efficient to simply use TABLE(ResIDs,{CompanyID},CompanyID) when spitting out the AND result? Or use some other technique for picking out the CompanyID values? Is that filter against dsCorpus[1] really necessary?\\n\\n[quote="rtaylor":c9flecrl]I'm also not getting your use of DEDUP for the first MAP result. Is it possible to get multiple instances for each parameter?\\n\\nDEDUP was necessary, but I've changed things since then. Now my BUILD statements have a DEDUP option, which is better.\\n\\nI'll take the code revision for a spin and let you know the results.\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-02-24 20:06:02\" },\n\t{ \"post_id\": 1195, \"topic_id\": 276, \"forum_id\": 8, \"post_subject\": \"Re: Performance of JOIN versus MERGE\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nHere's a suggestion -- why not get rid of the JOIN for the AND and try it this way:
\\nParmCnt := (integer)hasCityParam +\\n\\t (integer)hasStateParam +\\n \\t (integer)hasPostalParam +\\n \\t (integer)hasCountryIDParam +\\n \\t (integer)hasCountyIDParam +\\n \\t (integer)hasAreaCodeParam;\\n\\ndsResAll := cityRecords + stateRecords +\\n postalRecords + countryIDRecords +\\n countyIDRecords + areaCodeRecords;\\n\\nResIDs := TABLE(dsResAll,{CompanyID,Cnt := COUNT(GROUP)},Companyid)(Cnt = ParmCnt);\\nSetResIDs := SET(ResIDs ,Companyid);\\n\\t \\n \\nresults := MAP (\\n COUNT(dsIndexesUsed) = 1 => DEDUP(dsCorpus[1],companyID),\\n COUNT(dsIndexesUsed) > 1 => IF (p.useANDOperator,\\n\\t\\t\\tdsCorpus[1](companyID IN SetResIDs),\\n MERGE(dsCorpus,companyID,SORTED(companyID),DEDUP)\\n )\\n );\\n \\n
I'm also not getting your use of DEDUP for the first MAP result. Is it possible to get multiple instances for each parameter?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-02-24 19:10:47\" },\n\t{ \"post_id\": 1180, \"topic_id\": 276, \"forum_id\": 8, \"post_subject\": \"Re: Performance of JOIN versus MERGE\", \"username\": \"bforeman\", \"post_text\": \"Thanks Dan, I have a message posted to the development team to see if I might have overlooked anything. Will keep you posted when I get some more information.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-02-23 15:10:10\" },\n\t{ \"post_id\": 1179, \"topic_id\": 276, \"forum_id\": 8, \"post_subject\": \"Re: Performance of JOIN versus MERGE\", \"username\": \"DSC\", \"post_text\": \"Thanks, Bob! Glad to hear that I've found the right tree to bark up.\\n\\nI have tried a MERGEJOIN, but it gave virtually identical results (to the accuracy of a wall clock) to JOIN. To give you a bit more information on the rest of this environment:\\n\\n* While the function I posted is a complete search, my test actually outputs only a COUNT of the final dataset. That's still valid for a Real Search, which is a different Roxie query, as the result of a real search would include the total number of found records. Still, I'm doing only a count at the moment.\\n\\n* Passing one search parameter for a city, one parameter for postal code, and setting p.useANDOperator to FALSE gets a response pretty much instantly. I'm using the web form for testing, with XML output. The returned value is a whopping 4250. If you search for these parameters individually, 4197 and 4117 are returned.\\n\\n* The same search with p.useANDOperator set to TRUE takes between 8-9 seconds to return. The returned value is 4174 (which makes sense because the AND version is not currently deduping).\\n\\nBottom line, we're not talking about a lot of records at all.\\n\\nBTW, I was asking about the IN operator because it also seems to create a delay out of proportion to its input. With the above example, adding a second city to the query (with p.useANDOperator FALSE) results in a 1-second response of 79K. Not terrible, just possibly out of line. Or I'm just jumping at things after JOIN debugging sessions.\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-02-23 15:07:13\" },\n\t{ \"post_id\": 1178, \"topic_id\": 276, \"forum_id\": 8, \"post_subject\": \"Re: Performance of JOIN versus MERGE\", \"username\": \"bforeman\", \"post_text\": \"Hi Dan,\\n\\nFirst, a very nice bit of ECL coding. Good job. Your approach looks good to me.\\n\\nI'm wondering if a MERGEJOIN might be better than a JOIN in this usage? Have you tried that?\\n\\n...and using the IN operator in a SET of values is the best way to search/filter in the way you set it up.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-02-23 14:23:58\" },\n\t{ \"post_id\": 1174, \"topic_id\": 276, \"forum_id\": 8, \"post_subject\": \"Performance of JOIN versus MERGE\", \"username\": \"DSC\", \"post_text\": \"I'm testing Roxie's search performance. In my test, the user can submit zero or more search parameters for up to six different fields. Within the a particular field, the results are OR'd together (using ECL's IN command), but the user can choose whether to OR or AND the results of each field. For example, the user can choose to search for either 'austin OR texas' or 'austin AND texas'.\\n\\nWhat I'm seeing are *vast* differences in the performance of the OR versus AND combining. The OR version seems to absolutely fly -- the above OR example would be easily sub-second response time -- while the AND version seems to take 10x longer, or more. The code is the same, except for the final 'combine the individual results' bit:\\n\\nEXPORT\\tPerformSearch(SearchParameters p) := FUNCTION\\n\\tcityNames := HUtil.SplitAndLowerString(p.cityNames);\\n\\tstateAbbreviations := HUtil.SplitAndLowerString(p.stateAbbreviations);\\n\\tpostalCodes := HUtil.SplitAndLowerString(p.postalCodes);\\n\\tcountryIDs := (SET OF INTEGER)HUtil.SplitAndLowerString(p.countryIDs);\\n\\tcountyIDs := (SET OF INTEGER)HUtil.SplitAndLowerString(p.countyIDs);\\n\\tareaCodes := HUtil.SplitAndLowerString(p.areaCodes);\\n\\t\\n\\t//----------------------------------------------------------------------\\n\\t// Determine which parameters were actually provided\\n\\t//----------------------------------------------------------------------\\n\\t\\n\\thasCityParam := cityNames != [];\\n\\thasStateParam := stateAbbreviations != [];\\n\\thasPostalParam := postalCodes != [];\\n\\thasCountryIDParam := countryIDs != [];\\n\\thasCountyIDParam := countyIDs != [];\\n\\thasAreaCodeParam := areaCodes != [];\\n\\t\\n\\t//----------------------------------------------------------------------\\n\\t// Individual searches for each parameter\\n\\t//----------------------------------------------------------------------\\n\\t\\n\\tcityRecords := PROJECT(STEPPED(IDX_City(city IN cityNames),companyID),TRANSFORM(CompanyIDSearchResultRecord,SELF:=LEFT)) : ONWARNING(4523,IGNORE);\\t// Ignore 'Neither LIMIT() nor CHOOSEN() supplied for index read' warning\\n\\tstateRecords := PROJECT(STEPPED(IDX_State(state IN stateAbbreviations),companyID),TRANSFORM(CompanyIDSearchResultRecord,SELF:=LEFT)) : ONWARNING(4523,IGNORE);\\t// Ignore 'Neither LIMIT() nor CHOOSEN() supplied for index read' warning\\n\\tpostalRecords := PROJECT(STEPPED(IDX_Postal(postalCode IN postalCodes),companyID),TRANSFORM(CompanyIDSearchResultRecord,SELF:=LEFT)) : ONWARNING(4523,IGNORE);\\t// Ignore 'Neither LIMIT() nor CHOOSEN() supplied for index read' warning\\n\\tcountryIDRecords := PROJECT(STEPPED(IDX_CountryID(countryID IN countryIDs),companyID),TRANSFORM(CompanyIDSearchResultRecord,SELF:=LEFT)) : ONWARNING(4523,IGNORE);\\t// Ignore 'Neither LIMIT() nor CHOOSEN() supplied for index read' warning\\n\\tcountyIDRecords := PROJECT(STEPPED(IDX_CountyID(countyID IN countyIDs),companyID),TRANSFORM(CompanyIDSearchResultRecord,SELF:=LEFT)) : ONWARNING(4523,IGNORE);\\t// Ignore 'Neither LIMIT() nor CHOOSEN() supplied for index read' warning\\n\\tareaCodeRecords := PROJECT(STEPPED(IDX_AreaCode(areaCode IN areaCodes),companyID),TRANSFORM(CompanyIDSearchResultRecord,SELF:=LEFT)) : ONWARNING(4523,IGNORE);\\t// Ignore 'Neither LIMIT() nor CHOOSEN() supplied for index read' warning\\n\\t\\n\\t//----------------------------------------------------------------------\\n\\t// Create a SET containing search results for those parameters that\\n\\t// were actually submitted\\n\\t//----------------------------------------------------------------------\\n\\t\\n\\tdsResults :=\\t[\\n\\t\\t\\t\\t\\t\\tcityRecords,\\n\\t\\t\\t\\t\\t\\tstateRecords,\\n\\t\\t\\t\\t\\t\\tpostalRecords,\\n\\t\\t\\t\\t\\t\\tcountryIDRecords,\\n\\t\\t\\t\\t\\t\\tcountyIDRecords,\\n\\t\\t\\t\\t\\t\\tareaCodeRecords\\n\\t\\t\\t\\t\\t];\\n\\t\\n\\tdsMask := DATASET\\t(\\n\\t\\t\\t\\t\\t\\t\\t[\\n\\t\\t\\t\\t\\t\\t\\t\\t{IF(hasCityParam,1,0)},\\n\\t\\t\\t\\t\\t\\t\\t\\t{IF(hasStateParam,2,0)},\\n\\t\\t\\t\\t\\t\\t\\t\\t{IF(hasPostalParam,3,0)},\\n\\t\\t\\t\\t\\t\\t\\t\\t{IF(hasCountryIDParam,4,0)},\\n\\t\\t\\t\\t\\t\\t\\t\\t{IF(hasCountyIDParam,5,0)},\\n\\t\\t\\t\\t\\t\\t\\t\\t{IF(hasAreaCodeParam,6,0)}\\n\\t\\t\\t\\t\\t\\t\\t],\\n\\t\\t\\t\\t\\t\\t\\t{INTEGER dsIndex}\\n\\t\\t\\t\\t\\t\\t);\\n\\t\\n\\tdsIndexesUsed := SET(dsMask(dsIndex>0),dsIndex);\\n\\t\\n\\tdsCorpus := RANGE(dsResults,dsIndexesUsed);\\n\\t\\n\\t//----------------------------------------------------------------------\\n\\t// Reduce the individual search results to a single, final result\\n\\t//----------------------------------------------------------------------\\n\\t\\n\\tresults :=\\tMAP\\t(\\n\\t\\t\\t\\t\\t\\tCOUNT(dsIndexesUsed) = 1\\t=>\\tDEDUP(dsCorpus[1],companyID),\\n\\t\\t\\t\\t\\t\\tCOUNT(dsIndexesUsed) > 1\\t=>\\tIF\\t(\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tp.useANDOperator,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tJOIN(dsCorpus,LEFT.companyID=RIGHT.companyID,TRANSFORM(LEFT),SORTED(companyID)),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tMERGE(dsCorpus,companyID,SORTED(companyID),DEDUP)\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t)\\n\\t\\t\\t\\t\\t);\\n\\t\\n\\tRETURN results;\\nEND;
\\n\\nIs there a different way of working this problem?\\n\\nRelated to performance: Is using the IN statement the most performant way of search for multiple values within a single index?\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-02-22 20:10:58\" },\n\t{ \"post_id\": 1211, \"topic_id\": 277, \"forum_id\": 8, \"post_subject\": \"Re: Is function Macro is used inside the transform?\", \"username\": \"ghalliday\", \"post_text\": \"I think you can acheive what you want by using the form for transform that takes the target record as a first prameter:\\n\\nTRANSFORM(record, assignments);\\n\\nYou could then have another macro that calculated the type of the record required something like the following:\\n\\n\\nmytransform(actiondataset m):=transform(FUNMAC_JoinRecord(outlay,dname,fname),\\n outlay:= m.outlayout;\\n dname:=m.dataset_name;\\n fname:=m.fieldname;\\n FUNMAC_JoinDataSets(outlay,dname,fname));\\nend;\\n
\\n\\nYou probably want the whole thing inside a single FUNCTIONMACRO e.g.,\\n\\n\\nmytransform(actiondataset m):= FUNCTIONMACRO\\n myRecord := .....;\\n myRecord myTransform := TRANSFORM\\n ...\\n END;\\n RETURN myTransform;\\nEND;\\n
\\n\\nWhichever way you do it you need to define the type of the value you are creating before you have the assignments.\", \"post_time\": \"2012-02-27 08:28:39\" },\n\t{ \"post_id\": 1184, \"topic_id\": 277, \"forum_id\": 8, \"post_subject\": \"Re: Is function Macro is used inside the transform?\", \"username\": \"bforeman\", \"post_text\": \"I'm not sure about APPLY, but your FUNCTIONMACRO needs a definition that will hold your return value, and if you want the result to be a part of the TRANSFORM output that definition should be a part of the record structure of the TRANSFORM result.\", \"post_time\": \"2012-02-24 13:17:44\" },\n\t{ \"post_id\": 1183, \"topic_id\": 277, \"forum_id\": 8, \"post_subject\": \"Re: Is function Macro is used inside the transform?\", \"username\": \"nparasher\", \"post_text\": \"instead of transform i used APPLY also because there is no return type but that is also giving macro expended error.\", \"post_time\": \"2012-02-24 06:27:11\" },\n\t{ \"post_id\": 1182, \"topic_id\": 277, \"forum_id\": 8, \"post_subject\": \"Re: Is function Macro is used inside the transform?\", \"username\": \"nparasher\", \"post_text\": \"i called my function macro inside this transform.it wants return type but i want to return the same what function macro is returning.If i write dataset as return type then that is given various error such as tExecRecJoin is unknown identifier and macro expand error.\\n\\nmytransform(actiondataset m):=transform\\n\\toutlay:= m.outlayout;\\n\\tdname:=m.dataset_name;\\n\\tfname:=m.fieldname;\\nFUNMAC_JoinDataSets(outlay,dname,fname);\\n\\t \\n\\t end;\\n\\t finaldataset:=project(actiondataset,mytransform(left));\\n\\t \\n\\t \\n\\t finaldataset;\", \"post_time\": \"2012-02-24 06:16:14\" },\n\t{ \"post_id\": 1177, \"topic_id\": 277, \"forum_id\": 8, \"post_subject\": \"Re: Is function Macro is used inside the transform?\", \"username\": \"dlingle\", \"post_text\": \"Defining a string value as a Qstring will also get you automatically capitalized chars.\", \"post_time\": \"2012-02-23 14:16:10\" },\n\t{ \"post_id\": 1176, \"topic_id\": 277, \"forum_id\": 8, \"post_subject\": \"Re: Is function Macro is used inside the transform?\", \"username\": \"bforeman\", \"post_text\": \"Hi nprasher,\\n\\nI don't think you should have any problem nesting a call to your FUNCTIONMACRO inside of another TRANSFORM. You may have to define the RecType and LeftDS locally within the target TRANSFORM prior to calling the FUNCTIONMACRO, but I believe it should work fine.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-02-23 13:53:06\" },\n\t{ \"post_id\": 1175, \"topic_id\": 277, \"forum_id\": 8, \"post_subject\": \"Is function Macro is used inside the transform?\", \"username\": \"nparasher\", \"post_text\": \"I want to use function macro inside the transform.\\nThe requirement is one by one read the record of one dataset and pass those record as a parameter to the function Macro.Let me know if it is possible.or recommend me some other way..\\n\\nDATASET FUNMAC_JoinDataSets(RecType,LeftDS,fieldname) := FUNCTIONMACRO\\n \\t\\tRecType tExecRecJoin(LeftDS L) := TRANSFORM\\n\\t\\t\\t\\n \\t\\t\\tSELF := L;\\n \\t\\tself.u_FIRSTNAME:=Std.Str.ToUpperCase(l.fieldname);\\n\\t\\t\\tself.l_FIRSTNAME:=Std.Str.Tolowercase(l.fieldname);\\n \\t\\tEND;\\n \\t\\t\\n \\t\\tOutDataSet :=project(LeftDS,tExecRecJoin(LEFT));\\n \\t\\tReturn OutDataSet;\\n ENDMACRO;\\n\\nthis is my function macro,it is working fine as i call like this\\nj1 := FUNMAC_JoinDataSets(outrec,ds_people,LASTNAME);\\n\\n\\ninstead of outrec,ds_people,lastname passing directly i want to read this coloumns name from the file.\", \"post_time\": \"2012-02-23 08:59:07\" },\n\t{ \"post_id\": 1207, \"topic_id\": 281, \"forum_id\": 8, \"post_subject\": \"Re: Dedup child records in a rollup transform function\", \"username\": \"dean\", \"post_text\": \"Richard, I'm running on boca duff!!\\n\\n... good luck strata next week. \\n\\ndean\", \"post_time\": \"2012-02-24 20:46:05\" },\n\t{ \"post_id\": 1203, \"topic_id\": 281, \"forum_id\": 8, \"post_subject\": \"Re: Dedup child records in a rollup transform function\", \"username\": \"rtaylor\", \"post_text\": \"Dean,\\n\\nYou must be working on the legacy 702 build, not the Open Source system, because this code runs perfectly on my training Thor:expertise_rec:={ unicode20 expertise };\\nperson_rec:={integer id, dataset(expertise_rec) expertises {maxcount(10)}};\\n\\npersons := dataset([{1,[{'butcher'},{'baker'}]},{1,[{'beggar'},{'baker'}]},\\n {2,[{'beggar'},{'baker'}]},{2,[{'thief'},{'baker'}]}\\n ],person_rec);\\n\\nperson_rec trollup(person_rec l, person_rec r):=transform\\n self.expertises:= dedup(SORT(l.expertises+r.expertises,record),record);\\n self:=l\\nend;\\nperson_ds:=rollup(persons,left.Id=right.Id,trollup(left,right));\\noutput(person_ds);
Try it on an OSS build.\\n\\nAlso note the SORT I added -- DEDUP needs it to find the duplicates.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-02-24 20:17:30\" },\n\t{ \"post_id\": 1201, \"topic_id\": 281, \"forum_id\": 8, \"post_subject\": \"Dedup child records in a rollup transform function\", \"username\": \"dean\", \"post_text\": \"Hey folks, \\n\\nI'm extracting person records from raw data. Each extracted person record has zero or more expertise subrecords:\\n\\nexpertise_rec:={ unicode20 expertise };\\nperson_rec:={integer id, dataset(expertise_rec) expertises {maxcount(10)}};\\n\\nAfter extracting person records, I find that there many with the same ID, but with different expertises. I'd like to do a rollup on the persons that merges all of their expertises into a single dataset. I'd also like to dedup the expertises.\\n\\nperson_rec trollup(person_rec l, person_rec r):=transform\\n self.expertises:= dedup(l.expertises+r.expertises,record);\\n self:=l\\nend;\\nperson_ds:=rollup(persons,left.Id=right.Id,trollup(left,right));\\n\\nI'm getting a "Thor does not yet support nested child queries" error. \\n\\nNote that if I do l.expertises+r.expertises (no dedup), it works fine, but there's a huge amount of redundant expertise records. \\n\\nThanks for assistance.\\n\\nDean\", \"post_time\": \"2012-02-24 19:51:43\" },\n\t{ \"post_id\": 1210, \"topic_id\": 282, \"forum_id\": 8, \"post_subject\": \"Re: Some help with MACRO\", \"username\": \"ghalliday\", \"post_text\": \"I think what you are trying to do is \\n\\nDatumMacro(IdType, ValueType) := FUNCTIONMACRO\\n RETURN RECORD\\n IdType id;\\n IdType number;\\n ValueType value;\\n END;\\nENDMACRO;\\n\\nMyDatum := DatumMacro(INTEGER, REAL);\\n
\\n\\nI suspect almost all MACROs would be cleaner/simpler using FUNCTIONMACRO instead.\", \"post_time\": \"2012-02-27 08:18:29\" },\n\t{ \"post_id\": 1209, \"topic_id\": 282, \"forum_id\": 8, \"post_subject\": \"Re: Some help with MACRO\", \"username\": \"rtaylor\", \"post_text\": \"but I don't have anyway to capture the RECORD name that it has internally created.
If you want to use this MACRO multiple times in the same workunit, then just pass the name you want to use each time to the MACRO. Otherwise, you can hard code the name in the MACRO (and only use it once/workunit).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-02-24 20:58:07\" },\n\t{ \"post_id\": 1204, \"topic_id\": 282, \"forum_id\": 8, \"post_subject\": \"Some help with MACRO\", \"username\": \"nvasil\", \"post_text\": \"I have the following code\\n\\n\\n\\nDatumMacro(IdType, ValueType) := MACRO\\n #UNIQUENAME(numeric_field); \\n %numeric_field%:= RECORD\\n IdType id;\\n\\t IdType number;\\n\\t ValueType value;\\n END;\\nENDMACRO;\\n\\nMyDatum := DatumMacro(INTEGER, REAL);\\n\\nOUTPUT('hi');\\n
\\n\\nBasically, I want to define a RECORD with templated types.\\n\\nThe above code fails and the error message is very cryptic as it gives wrong cursor position\\n\\nemp.ecl(2,-85): error C3002: syntax error near the end of the line : expected APPLY, BIG_ENDIAN, BUILD, BEGINC++, CRON, DEFINE, DISTRIBUTION, ENUM, EVENT, expression, FULL, KEYDIFF, KEYPATCH, LITTLE_ENDIAN, LOADXML, NOTIFY, OUTPUT, PACKED, PARALLEL, RANGE, RECORD, ROWSET, RULE, SERVICE, SET, <typename>, SUCCESS, TRANSFORM, TYPE, TYPEOF, UNSIGNED, UPDATE, WAIT, WILD, <, datarow, identifier, action, pattern, constant, MACRO, complex-macro, #CONSTANT, #OPTION, #WORKUNIT, #STORED, #LINK, #ONWARNING, '^', '$'\\ntemp.ecl(11,34): error C3002: While expanding macro datummacro\\n2 errors, 0 warning\\n
\\n\\nIt needs some fixing\\n\\nIf instead I use \\n\\n\\nDatumMacro(INTEGER, REAL);\\n
\\n\\nit compiles, but I don't have anyway to capture the RECORD name that it has internally created.\\n\\nAny thoughts?\", \"post_time\": \"2012-02-24 20:24:14\" },\n\t{ \"post_id\": 1213, \"topic_id\": 283, \"forum_id\": 8, \"post_subject\": \"Re: how can i use the Counter value of LOOP inside the Trans\", \"username\": \"david.wheelock@lexisnexis.com\", \"post_text\": \"Try wrapping the loop body in a function:\\n\\n\\nlData:={STRING s1};\\ndData:=DATASET([{'no value yet'}],lData);\\n\\nfTransformIt(DATASET(lData) d,UNSIGNED loop_counter):=FUNCTION\\n lData dotransform(d L):= TRANSFORM\\n SELF.s1:=IF(loop_counter<5,'good','bad');\\n END;\\n RETURN PROJECT(d,dotransform(LEFT));\\nEND;\\n\\nLOOP(dData,6,fTransformIt(ROWS(LEFT),COUNTER));
\", \"post_time\": \"2012-02-27 14:41:30\" },\n\t{ \"post_id\": 1212, \"topic_id\": 283, \"forum_id\": 8, \"post_subject\": \"how can i use the Counter value of LOOP inside the Transform\", \"username\": \"Ghost\", \"post_text\": \"IS there a way to use the value of 'counter' in my dotransform function??\\ni don't want to pass a [counter] inside the dotransform function. As per the requirement i had to use the Counter value of LOOP??\\n\\n{string s1} dotransform(ds1 L):= transform\\nself.s1:= if('GetMeTheValueOfOuterCounter'< 5, 'good','bad');\\nend;\\n\\nget:= loop(ds1, counter<=10, project(ds1,dotransform(left)));\", \"post_time\": \"2012-02-27 10:11:42\" },\n\t{ \"post_id\": 1236, \"topic_id\": 284, \"forum_id\": 8, \"post_subject\": \"Re: Performance of large queries\", \"username\": \"dabayliss\", \"post_text\": \"Yes - you want to narrow down your possibilities as much as possible as soon as possible. The 'rarer' the term - the smaller the result set ...\\n\\nDavid\", \"post_time\": \"2012-03-02 20:36:18\" },\n\t{ \"post_id\": 1235, \"topic_id\": 284, \"forum_id\": 8, \"post_subject\": \"Re: Performance of large queries\", \"username\": \"DSC\", \"post_text\": \"[quote="dabayliss":1pjgxlev]Incidentally - the same IDF numbers can be used to shuffle the search sequence to optimize our global smart stepping system ...\\n\\nThis comment just came back and slapped me up the back of the head.\\n\\nHow can IDF numbers influence the performance of the stepping system? Are you talking about ordering things so that small datasets are considered first during GRAPH calls, for instance, in order to potentially minimize the number items that must be scanned?\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-03-02 19:50:06\" },\n\t{ \"post_id\": 1226, \"topic_id\": 284, \"forum_id\": 8, \"post_subject\": \"Re: Performance of large queries\", \"username\": \"DSC\", \"post_text\": \"[quote="dabayliss":34zcqdyc]The disjunction works too - but the math is a little nastier:\\n\\nNumber of records for David OR Florida = (15%+5%-15%*5%) * Population\\n\\nThe conditional co-occurrence is completely general purpose - the only 'gotcha' is that the number of 2 field co-occurences is N.(N-1) in the number of fields, three field is N.(N-1).(N-2) - etc, which quickly gets big. \\n\\nTherefore we chose to ask people to 'call out' the ones they want us to handle.\\n\\nThat looks like the "Inclusion–Exclusion Principle" I've been reading about this morning.\\n\\nThanks for the math. That saved me a bunch of work!\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-02-28 17:15:32\" },\n\t{ \"post_id\": 1225, \"topic_id\": 284, \"forum_id\": 8, \"post_subject\": \"Re: Performance of large queries\", \"username\": \"dabayliss\", \"post_text\": \"The disjunction works too - but the math is a little nastier:\\n\\nNumber of records for David OR Florida = (15%+5%-15%*5%) * Population\\n\\nThe conditional co-occurrence is completely general purpose - the only 'gotcha' is that the number of 2 field co-occurences is N.(N-1) in the number of fields, three field is N.(N-1).(N-2) - etc, which quickly gets big. \\n\\nTherefore we chose to ask people to 'call out' the ones they want us to handle.\\n\\nDavid\", \"post_time\": \"2012-02-28 16:55:17\" },\n\t{ \"post_id\": 1224, \"topic_id\": 284, \"forum_id\": 8, \"post_subject\": \"Re: Performance of large queries\", \"username\": \"DSC\", \"post_text\": \"[quote="dabayliss":2i8c32sb]Firstly I should state that when you move from 'trial' to 'implementation' - you should seriously consider our SALT module - it tackles many, many things in this area.\\n\\nUnderstood, and I thank you for summarizing some of this here so quickly. It really helps!\\n\\nLet us assume that I know that 15% of my records are in Florida, lets assume that 5% of my records have the first name of David - then absent of other information the number of David's in Florida = 15% * 5% * my total population\\n\\nThe above mathematics assumes that the fields are independent; that the probability of getting a David is uniform from state to state. For 'David' that is fairly true; for 'Rodrigo' it might not be. Therefore you can refine your model my capturing combinations of field occurrence (in the SALT literature these are referred to as 'concepts') As the label may indicate; this is usually useful if there is some logical reason for the fields being correlated - for example some first and last names are heavily correlated.
\\n\\nThis makes perfect sense, especially for the intersection (AND) version. Between that and a bunch of research I've just done, I think I can get what I need for my testing.\\n\\nYour comment about correlation is also spot-on, and dovetails with my earlier thoughts. In my particular case, four of the six search fields are geographic and can largely be defined in terms of one another. Taking that into account would certain improve performance and accuracy at the expense of reusability. I was hoping for both generalization/reusability and performance, so I was avoiding hand-tooling the algorithm. That may have to be revisited....\\n\\nAnyway, thanks again for the pointers and the information. I have some interesting reading ahead.\\n\\nCheers!\\n\\nDan\", \"post_time\": \"2012-02-28 16:18:42\" },\n\t{ \"post_id\": 1223, \"topic_id\": 284, \"forum_id\": 8, \"post_subject\": \"Re: Performance of large queries\", \"username\": \"dabayliss\", \"post_text\": \"Firstly I should state that when you move from 'trial' to 'implementation' - you should seriously consider our SALT module - it tackles many, many things in this area.\\n\\nA fuller mathematical treatment of what I am about to discuss is here (http://www.faqs.org/patents/app/20090271397) - and fragments and extensions to it are littered throughout our patent disclosures of the last five years ...\\n\\nSimple version is this (and as you are fielded - i will go fielded):\\n\\nLet us assume that I know that 15% of my records are in Florida, lets assume that 5% of my records have the first name of David - then absent of other information the number of David's in Florida = 15% * 5% * my total population\\n\\nThe above mathematics assumes that the fields are independent; that the probability of getting a David is uniform from state to state. For 'David' that is fairly true; for 'Rodrigo' it might not be. Therefore you can refine your model my capturing combinations of field occurrence (in the SALT literature these are referred to as 'concepts') As the label may indicate; this is usually useful if there is some logical reason for the fields being correlated - for example some first and last names are heavily correlated.\\n\\nHTH\\n\\nDavid\", \"post_time\": \"2012-02-28 15:54:14\" },\n\t{ \"post_id\": 1222, \"topic_id\": 284, \"forum_id\": 8, \"post_subject\": \"Re: Performance of large queries\", \"username\": \"DSC\", \"post_text\": \"[quote="dabayliss":hncqpyhe]The total count is a natural fallout of the IDF; if you know the approximate frequency of each term in the document caucus then you know the approximate number of documents containing any given combination of the terms. Secondary and tertiary effects are computed using co-occurrence stats on the common terms.\\n\\nHi David,\\n\\nI get inverse document frequency, or I think I do, but you lost me with pretty much everything after that. I see how that would apply for natural text searches (a la Google's search) but does it apply to fielded searches? Co-occurence, specifically, seems to be a sentence-oriented statistic.\\n\\nAnd for the next dumb question: How does one compute an estimated search result count from the IDFs (or related metrics) of multiple terms? If a given word appears in 50% of the corpus and another word appears in 25% of the corpus, what conclusions can be drawn in both AND and OR combination scenarios? I can see an entire range from complete overlap to no overlap of individual results, so how is that handled? Assume a middle-of-the-road 50% overlap?\\n\\nAny insight would be greatly appreciated!\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-02-28 15:19:31\" },\n\t{ \"post_id\": 1218, \"topic_id\": 284, \"forum_id\": 8, \"post_subject\": \"Re: Performance of large queries\", \"username\": \"dabayliss\", \"post_text\": \"Yeah - but they don't actually COUNT those 10M results and then return the top 20 \\n\\nThe total count is a natural* fallout of the IDF; if you know the approximate frequency of each term in the document caucus then you know the approximate number of documents containing any given combination of the terms. Secondary and tertiary effects are computed using co-occurrence stats on the common terms.\\n\\n(If you are not using IDF in your searches then the Doc module of our ML libraries shows you how to compute them <although not how to use them to predict a search>)\\n\\nIncidentally - the same IDF numbers can be used to shuffle the search sequence to optimize our global smart stepping system ...\\n\\nDavid\\n\\n* Natural if your friends and family have all learned NOT to ask: "what are the odds" because you usually have the answer ...\", \"post_time\": \"2012-02-27 19:38:20\" },\n\t{ \"post_id\": 1217, \"topic_id\": 284, \"forum_id\": 8, \"post_subject\": \"Re: Performance of large queries\", \"username\": \"DSC\", \"post_text\": \"[quote="rtaylor":27r3sx7s]Yes, we call that a "pathological query" wherein the result set is so large it becomes meaningless. In the case of an AND query it may be useful to include all those results (or not), but for an OR query it would probably be better to throw them out and not inundate the end-user with TMI. Your call. :) \\n\\nShowing the number of records matching a query is never pathological: It explicitly tells the user how far out in the weeds they are. Presenting a ton of results all at once is pathological, though; I agree with that.\\n\\nNot to drag Google into this, but they're a decent example. Search for 'fubar' and Google gives you the first few results under a line of text that reads "About 10,900,000 results". In Google's use-case the user probably does want something near the top of the results, won't dive pathologically far into the results, and may or may not care about the total number. A user paying for downloadable information, on the other hand, may care a great deal about that total count (they have a goal of not exceeding some value important to them) and they use the few returned records only to confirm that their criteria is correct.\\n\\n[quote="rtaylor":27r3sx7s]The Programmer's Guide has a section on Roxie queries and one of the articles deals with these issues (I forget which one and I'm on my way to the airport as soon as I sign off). Take a look at those articles and maybe that will help.\\n\\nI will revisit the guide. I've read through it several times, gleaning more information each time; maybe this time something will jump out at me. I don't fully grok the subtleties of manual distribution of data (I tried it before, thinking I understood it, and managed to simply make a mess of things). I think it is likely that that is where my answer lies, as it cannot logically reside elsewhere. I'll follow up here with any positive results, for any Interested Readers.\\n\\nEnjoy your conference!\\n\\nDan\", \"post_time\": \"2012-02-27 18:34:46\" },\n\t{ \"post_id\": 1216, \"topic_id\": 284, \"forum_id\": 8, \"post_subject\": \"Re: Performance of large queries\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n
That second one is the gotcha. Whatever your search algorithm, it means you cannot prematurely LIMIT, CHOOSEN, or TOPN your results. Before truncating them, you have to count them.
I wasn't just referring to truncation. Yes, truncation using CHOOSEN or TOPN is the final step, after all results are determined and you want to just show the end-users the best, but LIMIT is all about simply throwing out all results when there are "too many" (along with the LIMIT and ATMOST options on JOIN).\\n\\nThe problem pops up when one (or more) of those interim results is large, usually because there are few unique search keys or the user enters a search criteria that manages to 'hit' a lot of records.
Yes, we call that a "pathological query" wherein the result set is so large it becomes meaningless. In the case of an AND query it may be useful to include all those results (or not), but for an OR query it would probably be better to throw them out and not inundate the end-user with TMI. Your call. \\n\\nThe Programmer's Guide has a section on Roxie queries and one of the articles deals with these issues (I forget which one and I'm on my way to the airport as soon as I sign off). Take a look at those articles and maybe that will help.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-02-27 18:11:43\" },\n\t{ \"post_id\": 1214, \"topic_id\": 284, \"forum_id\": 8, \"post_subject\": \"Performance of large queries\", \"username\": \"DSC\", \"post_text\": \"[quote="rtaylor":10q5pvlv]
PS: Next up, in another thread, on another day: How to optimize when those field-level search results return a lot of records.
Well, that's part of what LIMIT, CHOOSEN, and TOPN are for. :D \\n\\nRichard\\n\\nI wish it was that simple.\\n\\nConsider the output from a run-of-the-mill search engine. A search request response typically contains at least two different things:\\n\\n1) A 'page' of results, usually defined by an offset into the total list and a number of items.\\n\\n2) The total number of items found.\\n\\nThat second one is the gotcha. Whatever your search algorithm, it means you cannot prematurely LIMIT, CHOOSEN, or TOPN your results. Before truncating them, you have to count them.\\n\\nIn my current test, I'm allowing the user to search against any combination of multiple fields. Tactically, my code searches against each field individually, transforms the results into a 'standard' record, then merges those interim results according to the current rules (AND or OR). From what I can tell, only after that final merge I will be able to accurately COUNT the total number of found records. The problem pops up when one (or more) of those interim results is large, usually because there are few unique search keys or the user enters a search criteria that manages to 'hit' a lot of records.\\n\\nIs there a common ECL pattern for dealing with cases like this? Or a standard way of producing fuzzy-yet-fairly-account result counts? I'm hoping I'm simply overlooking something, but I find myself running into the same wall over and over again.\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-02-27 14:43:15\" },\n\t{ \"post_id\": 1221, \"topic_id\": 286, \"forum_id\": 8, \"post_subject\": \"Re: Removing nested transforms??\", \"username\": \"bforeman\", \"post_text\": \"I will have a more detailed look later today, but at first glance it looks like you need to use DENORMALIZE. Have you tried that?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-02-28 13:42:23\" },\n\t{ \"post_id\": 1220, \"topic_id\": 286, \"forum_id\": 8, \"post_subject\": \"Removing nested transforms??\", \"username\": \"Ghost\", \"post_text\": \"In the below code, i had used two nested transforms, and its working fine. But now, as per the requirement i have to do the same 'work' in a SINGLE transform. \\n\\nCan someone help me along with the sample code..
.. tx\\n\\n\\nrec:=record\\ninteger id;\\nend;\\n\\nr1:= record\\ninteger id;\\nstring name;\\nend;\\nds1:= dataset([{1,'john'},{2,'smith'},{3,'bruce'},{4,'william'},{5,'jena'}], r1);\\n\\nr2:= record\\ndataset(rec) ids;\\nend;\\nds2:= dataset([{[{1},{2},{3}]},\\n {[{6},{4},{5}]} ], r2);\\n\\n r2 outerT(ds2 L):= transform\\n currentIdDs:=L.ids;\\n \\n {integer id} innerT(ds1 L):= transform\\n self.id:= if(currentIdDs[1].id= L.id, 33,44);\\n end; \\n \\n getIdDs:= project(ds1, innerT(left)); \\n self.ids:= getIdDs;\\n End;\\n \\n t_outer:= project( ds2, outerT(left));\\noutput(t_outer);\", \"post_time\": \"2012-02-28 06:04:11\" },\n\t{ \"post_id\": 1231, \"topic_id\": 287, \"forum_id\": 8, \"post_subject\": \"Re: ALLNODES error\", \"username\": \"DSC\", \"post_text\": \"Thanks, Richard. That worked perfectly. Getting results at all is important for code refining, even if there are too many of them and they take too long to return.\\n\\nThanks again!\\n\\nDan\", \"post_time\": \"2012-03-01 16:22:32\" },\n\t{ \"post_id\": 1229, \"topic_id\": 287, \"forum_id\": 8, \"post_subject\": \"Re: ALLNODES error\", \"username\": \"richardkchapman\", \"post_text\": \"There's a default limit of 10000 on this (and some other activities) to protect Roxie from being swamped by rogue data. You can change the limit using a LIMIT clause on the allnodes, but be aware that data being returned from ALLNODES is not divided into chunks to be returned on demand in the way that data returned from an index read is, so if your ALLNODES returns a lot of data, but you then only use a bit of it, you will still end up sending it all.\\n\\nIn general you want to try to put any aggregation you are doing with this data on the slave side rather than the server side. For example, supposing you were doing a MAX operation, you might find it was more efficient to code something that looked like:\\n\\nMAX(ALLNODES(MAX(mydata))\\n\\nrather than \\n\\nMAX(ALLNODES(mydata))\\n\\nRichard\", \"post_time\": \"2012-03-01 14:32:54\" },\n\t{ \"post_id\": 1228, \"topic_id\": 287, \"forum_id\": 8, \"post_subject\": \"ALLNODES error\", \"username\": \"DSC\", \"post_text\": \"I'm messing around with distributed data handling and I'm running into an undocumented error at runtime:\\n\\n
<countResponse>\\n <Result>\\n <Exception>\\n <Source>Roxie</Source>\\n <Code>1</Code>\\n <Message>\\n Too many records returned from ALLNODES()\\n </Message>\\n </Exception>\\n </Result>\\n</countResponse>\\n
\\n\\nThe message is straightforward, but 1) how many is "too many"? and 2) what can be done about it?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-02-29 19:34:53\" },\n\t{ \"post_id\": 1233, \"topic_id\": 289, \"forum_id\": 8, \"post_subject\": \"Re: ALLNODES ordering\", \"username\": \"richardkchapman\", \"post_text\": \"Yes, it should be deterministic.\\n\\nWhen we were first writing Roxie we decided that raw performance was paramount and we would make as few guarantees about record order as we could. We soon found that this meant every real query ended up with lots of SORT activities in, which were far less efficient than having Roxie take steps to ensure that queries returned consistent results from run to run. So we changed the policy to make the results deterministic in the vast majority of cases, including cases where data is returned from slave nodes (such as allnodes, index reads, and disk reads).\\n\\nThe only activities that are not deterministic should be the unordered concat operations. \\n\\nNote that this deterministic guarantee only holds for as long as you don't change the configuration (e.g. change the Roxie cluster size).\\n\\nRichard\", \"post_time\": \"2012-03-02 15:39:00\" },\n\t{ \"post_id\": 1232, \"topic_id\": 289, \"forum_id\": 8, \"post_subject\": \"ALLNODES ordering\", \"username\": \"DSC\", \"post_text\": \"The ECL Programmer's Guide notes the following regarding ALLNODES:\\n\\nALLNODES() causes the portion of the query it encloses to execute on all slave nodes in parallel. The results are calculated independently on each node then merged together, without ordering the records.
\\n\\nThe ECL Language Reference indicates that "ALLNODES returns a record set or index."\\n\\nWhile the resulting record set is unordered, is it at least deterministic? In other words, given two identical queries/tasks with identical environments (input data, machine configuration, etc.) will ALLNODES produce identical (unordered) results?\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-03-02 15:31:24\" },\n\t{ \"post_id\": 1242, \"topic_id\": 290, \"forum_id\": 8, \"post_subject\": \"Re: Possible scoping bug inside modules\", \"username\": \"bforeman\", \"post_text\": \"Use SHARED for that A1 FUNCTION if both EXPORTS need to use it.\\n\\nSHARED makes the definition available within the entire MODULE. Local defiitions are only good until the first EXPORT or SHARED.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-03-05 17:33:57\" },\n\t{ \"post_id\": 1237, \"topic_id\": 290, \"forum_id\": 8, \"post_subject\": \"Possible scoping bug inside modules\", \"username\": \"nvasil\", \"post_text\": \"I am not sure what the scoping rule is for nested modules. From the reference manual it is inferred that non exported variables inside a module are visible only inside a module which implies that thy are visible to modules defined inside module. There is though some inconsistent behavior:\\n\\nConsider this:\\n\\n\\nA := MODULE\\n INTEGER A1() := FUNCTION\\n INTEGER x:=1;\\n return x;\\n END;\\n EXPORT SubModule1 := MODULE\\n EXPORT INTEGER a:=A1();\\n END;\\n EXPORT SubModule2 := MODULE\\n EXPORT INTEGER a:=A1();\\n END;\\nEND;\\n\\nx:=A;\\nOUTPUT('hi');\\n
\\n\\nCompiler gives the following error:\\ntemp.ecl(11,23): error C2167: Unknown identifier "A1"\\n1 error, 0 warning\\nA1 is a local function (I think you call that shared) and it is visible in SubModule1, but it is not visible in SubModule2 and compiler complains. If you EXPORT A1() then everything works fine.\\n\\nSo the problem is that the compiler has inconsistent behavior as it makes non exported variables visible to the first nested module but not in the other ones.\", \"post_time\": \"2012-03-04 05:02:39\" },\n\t{ \"post_id\": 1246, \"topic_id\": 291, \"forum_id\": 8, \"post_subject\": \"Re: Reading from a local file\", \"username\": \"william.whitehead@lexisnexis.com\", \"post_text\": \"ECLCC by default creates an executable, a.out, which is executed using the EclAgent/Hthor runtime engine. If you want to target THOR or ROXIE you need to specify the “-target=xxx” option (run “eclcc –help” for the correct syntax).\\n\\nFilename resolution depends on how you specified the filename. In “stand-alone” mode (eclcc without DALI), fully qualified filenames are resolved exactly as you specified them. Partial file specifications, and ones with nothing but a file name, are resolved relative to the current directory. So, you are correct that in your example, it locates the file in the current directory.\\n\\nIn standalone mode, it is incorrect for an eclcc generated executable to produce the “getDaliServixVersion” error that you are seeing. This should be raised as an issue on GitHub, or if you can provide ECL and a small datafile I can debug it.\", \"post_time\": \"2012-03-05 22:37:37\" },\n\t{ \"post_id\": 1238, \"topic_id\": 291, \"forum_id\": 8, \"post_subject\": \"Reading from a local file\", \"username\": \"nvasil\", \"post_text\": \"In previous conversations it was mentioned that if you do\\n\\n\\nelccc myfile.ecl\\n
\\n\\nyou get a file a.out which is a standalone executable. You can execute it and the query will be executed as if you were running on a single thor node.\\n\\nIf the myfile.ecl is reading from a file:\\n\\n\\nx:=DATASET(myfile, MyRec, CSV);\\n
\\n\\nwhere will it try to read the file from?\\n\\nI tried it and it did read the file successfully form the local directory but I also got this exception:\\n\\nhpcc-platform/common/remote/rmtfile.cpp(129) : getDaliServixVersion : connection failed\\nTarget: T>192.168.1.74, Raised in: hpcc-platform/system/jlib/jsocket.cpp, line 1211\\n\\nI believe what is going on, is it tries to find the dali server and if it fails it looks for it locally.\", \"post_time\": \"2012-03-04 15:24:48\" },\n\t{ \"post_id\": 1243, \"topic_id\": 292, \"forum_id\": 8, \"post_subject\": \"Re: Parsing pdf documents\", \"username\": \"bforeman\", \"post_text\": \"Hi Nick, \\n\\nNo, not that I'm aware of. Of course any document can be parsed line by line, but I can understand that a PDF would be problematic to get any meaningful data.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-03-05 17:37:33\" },\n\t{ \"post_id\": 1240, \"topic_id\": 292, \"forum_id\": 8, \"post_subject\": \"Parsing pdf documents\", \"username\": \"nvasil\", \"post_text\": \"Is there any support for parsing pdf documents in HPCC?\", \"post_time\": \"2012-03-04 23:41:03\" },\n\t{ \"post_id\": 1284, \"topic_id\": 293, \"forum_id\": 8, \"post_subject\": \"Re: Reading through Different Formats and Indexing for retri\", \"username\": \"rtaylor\", \"post_text\": \"I want to use HPCC for storing all my documents in original format.\\n\\nAs you suggested we should be able to read the data into binary format and spray. (can you please share any sample code)
OK, you can do that several ways, depending on exactly what format the actual data is in. There is a Programmer's Guide article called "Working with BLOBs" that describes one way of spraying and despraying binary files as BLOB fields. If you are working with files that are predominately (or all) text, you could simply spray each file as a separate record in a single dataset (we've done this with EDGAR data before), making it easily possible to parse the text in the files as free-form text fields in a dataset.\\nRegarding indexing: If we have very large number of documents then needs to search documents for retrieval. For this is there any functionality available? Or is there any way we can plug external search and index systems like Windows Search?
AFAIK, there are no external search and index systems currently available for HPCC. \\n\\nSearching documents for relevant information would usually be done using our PARSE technology (which I just spoke about at last week's Strata conference). Parsing text-based doc formats (.txt or .xml or .rtf) would be more straight-forward than the more binary-type formats (.pdf or .doc), therefore you might want to think about converting from the original format for extracting the relevant search terms and building your indexes.\\n\\nWe discuss Natural Language Parsing in our Advanced Thor classes (http://hpccsystems.com/products-and-ser ... s/training) and the class schedule is here: http://hpccsystems.com/community/traini ... s/training Our classes are designed to answer most of the kinds of questions people new to HPCC have, and the ability to dialog with the instructor can be invaluable as you begin designing your own HPCC solutions.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-03-08 20:19:48\" },\n\t{ \"post_id\": 1247, \"topic_id\": 293, \"forum_id\": 8, \"post_subject\": \"Re: Reading through Different Formats and Indexing for retri\", \"username\": \"Apurv.Khare\", \"post_text\": \"Hello,\\nI want to use HPCC for storing all my documents in original format.\\n\\nAs you suggested we should be able to read the data into binary format and spray. (can you please share any sample code)\\n\\nRegarding indexing: If we have very large number of documents then needs to search documents for retrieval. For this is there any functionality available? Or is there any way we can plug external search and index systems like Windows Search?\", \"post_time\": \"2012-03-06 11:17:16\" },\n\t{ \"post_id\": 1244, \"topic_id\": 293, \"forum_id\": 8, \"post_subject\": \"Re: Reading through Different Formats and Indexing for retri\", \"username\": \"rtaylor\", \"post_text\": \"Can we read through .pdf, .doc or mails saved as .pst format. If yes then how can we index mails and retrieve them ??
That's quite a broad-range question. The generic answer is that you can certainly read all these file types as binary or free-form text data, and you can extract meaningful information from them using the PARSE technology. \\n\\nHowever, "indexing them for retrieval" depends on what you're trying to accomplish. If you simply want to identify documents that contain certain key words, then you could construct a list of your key words and the document names that contain them, which could also include additional information, such as the location of the word within the document, if you set it up that way. \\n\\nSo, the real question is -- exactly what are you trying to accomplish?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-03-05 19:05:14\" },\n\t{ \"post_id\": 1241, \"topic_id\": 293, \"forum_id\": 8, \"post_subject\": \"Reading through Different Formats and Indexing for retrieval\", \"username\": \"Apurv.Khare\", \"post_text\": \"Hi,\\nCan we read through .pdf, .doc or mails saved as .pst format. If yes then how can we index mails and retrieve them ??\", \"post_time\": \"2012-03-05 08:48:36\" },\n\t{ \"post_id\": 1251, \"topic_id\": 294, \"forum_id\": 8, \"post_subject\": \"Re: #option perceived as an action?\", \"username\": \"bforeman\", \"post_text\": \"Hi Nicholas,\\n\\nLet me review this and forward it to the development team for comment. Thank you for your time in reporting this to us.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-03-06 13:41:05\" },\n\t{ \"post_id\": 1245, \"topic_id\": 294, \"forum_id\": 8, \"post_subject\": \"#option perceived as an action?\", \"username\": \"nvasil\", \"post_text\": \"Take a look at the following piece of code that fails\\n\\n\\n#option('compileOptions', '-g');\\nINTEGER MyFunc() := FUNCTION\\n x:=1;\\n RETURN 1;\\nEND;\\n\\nk:=MyFunc();\\n\\nINTEGER MyFunc2() := BEGINC++\\n #include <string>\\n #body\\n std::string xl;\\nENDC++;\\n\\nu:=MyFunc2();\\n\\nEXPORT TEMP :=MODULE\\n EXPORT a:=1;\\nEND;\\n
\\n\\nThe compiler complaints with cryptic error " error C2325: WHEN must be used to associated an action with a definition"\\nThe compiler points to the EXPORT TEMP line\\nIf I comment out the #option it works. Also if I remove the EXPORT and use the Module locally it works again.\\n\\n#option shouldn't really affect exports. If it has to, then the error message must be different\", \"post_time\": \"2012-03-05 22:07:50\" },\n\t{ \"post_id\": 1294, \"topic_id\": 299, \"forum_id\": 8, \"post_subject\": \"Re: Odd error during thor submit\", \"username\": \"ghalliday\", \"post_text\": \"Yes it should be. Issue number is #1515 for reference.\", \"post_time\": \"2012-03-09 11:56:15\" },\n\t{ \"post_id\": 1275, \"topic_id\": 299, \"forum_id\": 8, \"post_subject\": \"Re: Odd error during thor submit\", \"username\": \"DSC\", \"post_text\": \"[quote="ghalliday":1i3j5ei1]There is an open issue about projects with child datasets being combined with csv read activities. Try adding aNOFOLD() around the csv dataset.\\n\\nPerfect. That fixed my problem.\\n\\nOnce this issue is addressed, should NOFOLD be removed?\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-03-08 12:41:12\" },\n\t{ \"post_id\": 1271, \"topic_id\": 299, \"forum_id\": 8, \"post_subject\": \"Re: Odd error during thor submit\", \"username\": \"ghalliday\", \"post_text\": \"There is an open issue about projects with child datasets being combined with csv read activities.\\n\\nTry adding aNOFOLD() around the csv dataset. I am guessing in this case it is:\\n\\n\\nbaggedRecords := PROJECT(NOFOLD(CompanyNote.RawRecords),BagRecordTransform(LEFT));\\n
\", \"post_time\": \"2012-03-07 21:15:40\" },\n\t{ \"post_id\": 1269, \"topic_id\": 299, \"forum_id\": 8, \"post_subject\": \"Odd error during thor submit\", \"username\": \"DSC\", \"post_text\": \"This one is pretty odd. I tried writing some code that leveraged the tokenizing portion of the ML library and ran into this error during thor submit (Check was fine):\\n\\nError: Compile/Link failed for W20120307-140556 (see '//10.210.150.81/var/lib/HPCCSystems/myeclccserver/eclcc.log' for details) (0, 0), 3000, W20120307-140556\\nWarning: Mismatch in subminor version number (3.6.0 v 3.6.0) (0, 0), 3118, unknown\\nWarning: (0, 0), 0, \\nWarning: ---------- compiler output -------------- (0, 0), 0, \\nWarning: W20120307-140556.cpp: In member function ‘virtual size32_t cAc84::transform(ARowBuilder&, unsigned int*, const char**)’: (0, 0), 0, \\nWarning: W20120307-140556.cpp:3256: error: ‘Src’ was not declared in this scope (0, 0), 0, \\nWarning: W20120307-140556.cpp: In member function ‘virtual size32_t cAc87::getRowSingle(ARowBuilder&)’: (0, 0), 0, \\nWarning: W20120307-140556.cpp:3297: error: ‘lenno_deref’ was not declared in this scope (0, 0), 0, \\nWarning: W20120307-140556.cpp:3297: error: ‘datano_deref’ was not declared in this scope (0, 0), 0, \\nWarning: (0, 0), 0, \\nWarning: --------- end compiler output ----------- (0, 0), 0,
\\n\\nThe mentioned log file contains:\\n\\n00000000 2012-03-07 14:05:56 18459 18459 Loading plugin /opt/HPCCSystems/plugins/libstringlib.so[lib_stringlib] version = STRINGLIB 1.1.14\\n00000001 2012-03-07 14:05:56 18459 18459 Loading plugin /opt/HPCCSystems/plugins/libparselib.so[lib_parselib] version = PARSELIB 1.0.1\\n00000002 2012-03-07 14:05:56 18459 18459 Loading plugin /opt/HPCCSystems/plugins/libworkunitservices.so[lib_WORKUNITSERVICES] version = WORKUNITSERVICES 1.0.1\\n00000003 2012-03-07 14:05:56 18459 18459 Loading plugin /opt/HPCCSystems/plugins/libunicodelib.so[lib_unicodelib] version = UNICODELIB 1.1.05\\n00000004 2012-03-07 14:05:56 18459 18459 Loading plugin /opt/HPCCSystems/plugins/libfileservices.so[lib_fileservices] version = FILESERVICES 2.1.3\\n00000005 2012-03-07 14:05:56 18459 18459 Loading plugin /opt/HPCCSystems/plugins/libauditlib.so[lib_auditlib] version = AUDITLIB 1.0.1\\n00000006 2012-03-07 14:05:56 18459 18459 Loading plugin /opt/HPCCSystems/plugins/libdebugservices.so[lib_debugservices] version = DEBUGSERVICES 1.0.1\\n00000007 2012-03-07 14:05:56 18459 18459 Loading plugin /opt/HPCCSystems/plugins/liblogging.so[lib_logging] version = LOGGING 1.0.1\\n00000008 2012-03-07 14:05:56 18459 18459 ImplicitProject: Minimize docleanup PROJECT now (8/13)[companyid,addresstypeid,city,state,postalcode1,postalcode2,countryid,countyid]\\n00000009 2012-03-07 14:05:56 18459 18459 ImplicitProject: Minimize docleanup PROJECT now (4/6)[companyid,addresstypeid,phonetypeid,areacode]\\n0000000A 2012-03-07 14:05:56 18459 18459 ImplicitProject: Minimize docleanup PROJECT now (3/7)[tagid,userid,accountid]\\n0000000B 2012-03-07 14:05:56 18459 18459 ImplicitProject: Minimize docleanup PROJECT now (2/4)[tagid,companyid]\\n0000000C 2012-03-07 14:05:56 18459 18459 ImplicitProject: Minimize docleanup PROJECT now (5/9)[noteid,userid,accountid,companyid,note]\\n0000000D 2012-03-07 14:05:56 18459 18459 ImplicitProject: Minimize baggedrecords PROJECT now (5/6)[companyid,userid,accountid,noteid,words]\\n0000000E 2012-03-07 14:05:56 18459 18459 Optimizer: Merge NEWTABLE and NEWTABLE[records]\\n0000000F 2012-03-07 14:05:56 18459 18459 Optimizer: Merge NEWTABLE and NEWTABLE[records]\\n00000010 2012-03-07 14:05:56 18459 18459 Optimizer: Merge NEWTABLE and NEWTABLE[records]\\n00000011 2012-03-07 14:05:56 18459 18459 Optimizer: Merge NEWTABLE and NEWTABLE[records]\\n00000012 2012-03-07 14:05:56 18459 18459 Optimizer: Merge NEWTABLE and NEWTABLE[records]\\n00000013 2012-03-07 14:05:56 18459 18459 Optimizer: Merge NEWTABLE and NEWTABLE[records]\\n00000014 2012-03-07 14:05:56 18459 18459 Optimizer: Merge NEWTABLE and NEWTABLE[records]\\n00000015 2012-03-07 14:05:56 18459 18459 Optimizer: Merge NEWTABLE and NEWTABLE[records]\\n00000016 2012-03-07 14:05:56 18459 18459 Optimizer: Merge NEWTABLE and NEWTABLE[records]\\n00000017 2012-03-07 14:05:56 18459 18459 Optimizer: Merge NEWTABLE and NEWTABLE[records]\\n00000018 2012-03-07 14:05:56 18459 18459 Optimizer: Merge NEWTABLE and NEWTABLE[records]\\n00000019 2012-03-07 14:05:56 18459 18459 Optimizer: Merge NEWTABLE and NEWTABLE[records]\\n0000001A 2012-03-07 14:05:56 18459 18459 Optimizer: Merge NEWTABLE and NEWTABLE[records]\\n0000001B 2012-03-07 14:05:56 18459 18459 Optimizer: Merge NEWTABLE and NEWTABLE[records]\\n0000001C 2012-03-07 14:05:56 18459 18459 Optimizer: Merge PROJECT[clean] and DATASET[noterec]\\n0000001D 2012-03-07 14:05:56 18459 18459 Optimizer: Merge PROJECT and PROJECT[docleanup]\\n0000001E 2012-03-07 14:05:56 18459 18459 Optimizer: Merge NEWTABLE and NEWTABLE[records]\\n0000001F 2012-03-07 14:05:56 18459 18459 Optimizer: Merge NEWTABLE and NEWTABLE[records]\\n00000020 2012-03-07 14:05:56 18459 18459 Adding library: eclrtl\\n00000021 2012-03-07 14:05:56 18459 18459 Adding library: /opt/HPCCSystems/plugins/libfileservices\\n00000022 2012-03-07 14:05:56 18459 18459 Adding library: /opt/HPCCSystems/plugins/libstringlib\\n00000023 2012-03-07 14:05:56 18459 18459 Adding library: W20120307-140556.res.o\\n00000024 2012-03-07 14:05:56 18459 18459 Compiling W20120307-140556\\n00000025 2012-03-07 14:05:58 18459 18459 Failed to compile W20120307-140556\\n00000026 2012-03-07 14:05:58 18459 18459 \\n---------- compiler output --------------\\nW20120307-140556.cpp: In member function ‘virtual size32_t cAc84::transform(ARowBuilder&, unsigned int*, const char**)’:\\nW20120307-140556.cpp:3256: error: ‘Src’ was not declared in this scope\\n...skipping...\\n00000004 2012-03-07 14:05:56 18459 18459 Loading plugin /opt/HPCCSystems/plugins/libfileservices.so[lib_fileservices] version = FILESERVICES 2.1.3\\n00000005 2012-03-07 14:05:56 18459 18459 Loading plugin /opt/HPCCSystems/plugins/libauditlib.so[lib_auditlib] version = AUDITLIB 1.0.1\\n00000006 2012-03-07 14:05:56 18459 18459 Loading plugin /opt/HPCCSystems/plugins/libdebugservices.so[lib_debugservices] version = DEBUGSERVICES 1.0.1\\n00000007 2012-03-07 14:05:56 18459 18459 Loading plugin /opt/HPCCSystems/plugins/liblogging.so[lib_logging] version = LOGGING 1.0.1\\n00000008 2012-03-07 14:05:56 18459 18459 ImplicitProject: Minimize docleanup PROJECT now (8/13)[companyid,addresstypeid,city,state,postalcode1,postalcode2,countryid,countyid]\\n00000009 2012-03-07 14:05:56 18459 18459 ImplicitProject: Minimize docleanup PROJECT now (4/6)[companyid,addresstypeid,phonetypeid,areacode]\\n0000000A 2012-03-07 14:05:56 18459 18459 ImplicitProject: Minimize docleanup PROJECT now (3/7)[tagid,userid,accountid]\\n0000000B 2012-03-07 14:05:56 18459 18459 ImplicitProject: Minimize docleanup PROJECT now (2/4)[tagid,companyid]\\n0000000C 2012-03-07 14:05:56 18459 18459 ImplicitProject: Minimize docleanup PROJECT now (5/9)[noteid,userid,accountid,companyid,note]\\n0000000D 2012-03-07 14:05:56 18459 18459 ImplicitProject: Minimize baggedrecords PROJECT now (5/6)[companyid,userid,accountid,noteid,words]\\n0000000E 2012-03-07 14:05:56 18459 18459 Optimizer: Merge NEWTABLE and NEWTABLE[records]\\n0000000F 2012-03-07 14:05:56 18459 18459 Optimizer: Merge NEWTABLE and NEWTABLE[records]\\n00000010 2012-03-07 14:05:56 18459 18459 Optimizer: Merge NEWTABLE and NEWTABLE[records]\\n00000011 2012-03-07 14:05:56 18459 18459 Optimizer: Merge NEWTABLE and NEWTABLE[records]\\n00000012 2012-03-07 14:05:56 18459 18459 Optimizer: Merge NEWTABLE and NEWTABLE[records]\\n00000013 2012-03-07 14:05:56 18459 18459 Optimizer: Merge NEWTABLE and NEWTABLE[records]\\n00000014 2012-03-07 14:05:56 18459 18459 Optimizer: Merge NEWTABLE and NEWTABLE[records]\\n00000015 2012-03-07 14:05:56 18459 18459 Optimizer: Merge NEWTABLE and NEWTABLE[records]\\n00000016 2012-03-07 14:05:56 18459 18459 Optimizer: Merge NEWTABLE and NEWTABLE[records]\\n00000017 2012-03-07 14:05:56 18459 18459 Optimizer: Merge NEWTABLE and NEWTABLE[records]\\n00000018 2012-03-07 14:05:56 18459 18459 Optimizer: Merge NEWTABLE and NEWTABLE[records]\\n00000019 2012-03-07 14:05:56 18459 18459 Optimizer: Merge NEWTABLE and NEWTABLE[records]\\n0000001A 2012-03-07 14:05:56 18459 18459 Optimizer: Merge NEWTABLE and NEWTABLE[records]\\n0000001B 2012-03-07 14:05:56 18459 18459 Optimizer: Merge NEWTABLE and NEWTABLE[records]\\n0000001C 2012-03-07 14:05:56 18459 18459 Optimizer: Merge PROJECT[clean] and DATASET[noterec]\\n0000001D 2012-03-07 14:05:56 18459 18459 Optimizer: Merge PROJECT and PROJECT[docleanup]\\n0000001E 2012-03-07 14:05:56 18459 18459 Optimizer: Merge NEWTABLE and NEWTABLE[records]\\n0000001F 2012-03-07 14:05:56 18459 18459 Optimizer: Merge NEWTABLE and NEWTABLE[records]\\n00000020 2012-03-07 14:05:56 18459 18459 Adding library: eclrtl\\n00000021 2012-03-07 14:05:56 18459 18459 Adding library: /opt/HPCCSystems/plugins/libfileservices\\n00000022 2012-03-07 14:05:56 18459 18459 Adding library: /opt/HPCCSystems/plugins/libstringlib\\n00000023 2012-03-07 14:05:56 18459 18459 Adding library: W20120307-140556.res.o\\n00000024 2012-03-07 14:05:56 18459 18459 Compiling W20120307-140556\\n00000025 2012-03-07 14:05:58 18459 18459 Failed to compile W20120307-140556\\n00000026 2012-03-07 14:05:58 18459 18459 \\n---------- compiler output --------------\\nW20120307-140556.cpp: In member function ‘virtual size32_t cAc84::transform(ARowBuilder&, unsigned int*, const char**)’:\\nW20120307-140556.cpp:3256: error: ‘Src’ was not declared in this scope\\nW20120307-140556.cpp: In member function ‘virtual size32_t cAc87::getRowSingle(ARowBuilder&)’:\\nW20120307-140556.cpp:3297: error: ‘lenno_deref’ was not declared in this scope\\nW20120307-140556.cpp:3297: error: ‘datano_deref’ was not declared in this scope\\n\\n--------- end compiler output -----------\\n(END)
\\n\\nMy code is basically a clone of something else that works. The wrinkle is really the ML stuff, so naturally I'm suspicious of that code:\\n\\nEXPORT\\tRecordLayout := RECORD\\n\\tCompanyIDSearchResultRecord.companyID;\\n\\tINTEGER4\\t\\t\\tuserID;\\n\\tINTEGER4\\t\\t\\taccountID;\\n\\tINTEGER4\\t\\t\\tnoteID;\\n\\tSTRING20\\t\\t\\tword;\\nEND;\\n\\nSHARED\\tDATASET(RecordLayout) CreateCombinedRecordset() := FUNCTION\\n\\tBagRecordLayout := RECORD\\n\\t\\tRecordLayout;\\n\\t\\tDATASET(ML.Docs.Types.WordElement)\\twords;\\n\\tEND;\\n\\t\\n\\tBagRecordLayout BagRecordTransform(CompanyNote.RawRecordLayout l) := TRANSFORM\\n\\t\\tnoteRec := DATASET([{l.noteID,l.note}],ML.Docs.Types.Raw);\\n\\t\\tcleanedNote := ML.Docs.Tokenize.Clean(noteRec);\\n\\t\\tSELF.words := ML.Docs.Tokenize.Split(cleanedNote);\\n\\t\\tSELF.word := '';\\n\\t\\tSELF := l;\\n\\tEND;\\n\\t\\n\\tbaggedRecords := PROJECT(CompanyNote.RawRecords,BagRecordTransform(LEFT));\\n\\t\\n\\tRecordLayout RecordLayoutTransform(BagRecordLayout l, UNSIGNED c) := TRANSFORM\\n\\t\\tSELF.word := Str.ToLowerCase(l.words[c].word);\\n\\t\\tSELF := l;\\n\\tEND;\\n\\t\\n\\trs := NORMALIZE(baggedRecords,COUNT(LEFT.words),RecordLayoutTransform(LEFT,COUNTER));\\n\\t\\n\\tRETURN rs;\\nEND;
\\n\\nI'm probably doing something silly and/or dumb, but I don't see it. Any thoughts would be appreciated!\\n\\nDan\", \"post_time\": \"2012-03-07 20:12:19\" },\n\t{ \"post_id\": 1321, \"topic_id\": 303, \"forum_id\": 8, \"post_subject\": \"Re: Strange behavior when reading a dataset in BEGINC++\", \"username\": \"nvasil\", \"post_text\": \"Thanks\\n\\nCan you also address this issue too?\\nviewtopic.php?f=8&t=291\", \"post_time\": \"2012-03-12 14:28:45\" },\n\t{ \"post_id\": 1316, \"topic_id\": 303, \"forum_id\": 8, \"post_subject\": \"Re: Strange behavior when reading a dataset in BEGINC++\", \"username\": \"ghalliday\", \"post_text\": \"It looks like a code generator bug - the call to evaluate the dataset is in the incorrect place because no fields are referenced from the dataset.\\n\\nI've added issue 1821 to github to track it.\", \"post_time\": \"2012-03-12 11:27:16\" },\n\t{ \"post_id\": 1297, \"topic_id\": 303, \"forum_id\": 8, \"post_subject\": \"Re: Strange behavior when reading a dataset in BEGINC++\", \"username\": \"nvasil\", \"post_text\": \"It was really small\\n\\n\\n0,0,-0.90265,0\\n0,1,-0.91719,0\\n0,2,0.74029,0\\n1,0,-0.079012,0\\n1,1,-0.79218,0\\n1,2,2.4023,0\\n2,0,-0.79442,0\\n2,1,0.40984,0\\n2,2,-0.65182,0\\n3,0,0.4469,0\\n3,1,0.86663,0\\n3,2,-0.15092,0\\n4,0,-0.52346,0\\n4,1,0.44427,0\\n4,2,0.33556,0\\n
\\n\\nI can read the file and output it and it is fine. The problem is when I use a beginc++ to process it.\", \"post_time\": \"2012-03-09 12:58:17\" },\n\t{ \"post_id\": 1293, \"topic_id\": 303, \"forum_id\": 8, \"post_subject\": \"Re: Strange behavior when reading a dataset in BEGINC++\", \"username\": \"ghalliday\", \"post_text\": \"How big was the file you were applying the function to, and which platform? ( I am assuming default).\", \"post_time\": \"2012-03-09 11:50:00\" },\n\t{ \"post_id\": 1292, \"topic_id\": 303, \"forum_id\": 8, \"post_subject\": \"Strange behavior when reading a dataset in BEGINC++\", \"username\": \"nvasil\", \"post_text\": \"Consider the following code that just reads a csv file and the in a BEGINC++ statement it tries to just access the number of elemets\\n\\n\\n\\nMyRec := RECORD\\n INTEGER a1;\\n INTEGER a2;\\n REAL a3;\\n INTEGER a4;\\nEND;\\n\\nDATASET(MyRec) xx := DATASET('3gaussians.thor', MyRec, CSV(SEPARATOR(','))); \\n\\nx:=xx(a1<=89999);\\n\\nINTEGER MyFunc(DATASET(MyRec) a) := BEGINC++\\n #include <iostream>\\n #body\\n\\n std::cout<<"size="<<lenA<<std::endl;\\n return lenA;\\nENDC++;\\n\\nOUTPUT(MyFunc(x));\\n
\\n\\nThis code crashes when you compile it as a standalone executable with eclcc. I haven't tried it with a thor cluster.\\n\\n\\nerror: C1000 SIG: Segmentation fault(11), accessing 0000000000000000, IP=00007FF59561409A\\nKilled\\n\\n\\nIf I change the last statement to \\n\\nOUTPUT(MyFunc(xx));\\n
\\n\\nit works fine. Basically xx selects implicitly the x. \\n\\nSo I have played a lot with it and my verdict is that BEGINC++ code cannot read a dataset that has been imported from a file. But if you create a dataset as a result of ECL code then BEGINC++ works fine.\", \"post_time\": \"2012-03-09 07:44:22\" },\n\t{ \"post_id\": 1301, \"topic_id\": 304, \"forum_id\": 8, \"post_subject\": \"Re: Question calling EXISTS with a non-existent dataset.\", \"username\": \"Allan\", \"post_text\": \"Ah!\\n\\nEXISTS(valuelist)\\n\\nSilly me!\\n\\nThanks\", \"post_time\": \"2012-03-09 14:14:14\" },\n\t{ \"post_id\": 1300, \"topic_id\": 304, \"forum_id\": 8, \"post_subject\": \"Re: Question calling EXISTS with a non-existent datas\", \"username\": \"DSC\", \"post_text\": \"[quote="Allan":2n311wnf]Hi,\\n\\nI found that EXISTS returns 'true' when passed an non-existent dataset?\\n\\nIs this correct behaviour, I would have expected 'false' or an exception?\\n\\nOUTPUT(EXISTS('zzzz::qqq:zzzzYYYYSS'),NAMED('TEST_EXISTS'));\\n
\\n\\nI think you're testing for the existence of a string value, not the contents of a recordset.\", \"post_time\": \"2012-03-09 14:12:06\" },\n\t{ \"post_id\": 1299, \"topic_id\": 304, \"forum_id\": 8, \"post_subject\": \"Question calling EXISTS with a non-existent dataset.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI found that EXISTS returns 'true' when passed an non-existent dataset?\\n\\nIs this correct behaviour, I would have expected 'false' or an exception?\\n\\nOUTPUT(EXISTS('zzzz::qqq::zzzzYYYYSS'),NAMED('TEST_EXISTS'));\\n
\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-03-09 14:09:44\" },\n\t{ \"post_id\": 1322, \"topic_id\": 306, \"forum_id\": 8, \"post_subject\": \"Re: How can we secure Data in THOR.\", \"username\": \"rtaylor\", \"post_text\": \"Security issues like these in Thor are handled by LDAP, support for which should be forthcoming in an upcoming release (pretty soon). You should be able to configure your file permissions in LDAP so that users may read/write, or read-only, or not even see the file, along with many other permissions. For how to handle user security in your Roxie queries, someone else will have to address that issue, as I currently do not know what the plan is.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-03-12 18:26:56\" },\n\t{ \"post_id\": 1317, \"topic_id\": 306, \"forum_id\": 8, \"post_subject\": \"How can we secure Data in THOR.\", \"username\": \"Apurv.Khare\", \"post_text\": \"For securing our data(in THOR) and Queries(in Roxie),do we have any Role Based Access in HPCC??\", \"post_time\": \"2012-03-12 13:14:12\" },\n\t{ \"post_id\": 1337, \"topic_id\": 307, \"forum_id\": 8, \"post_subject\": \"Re: Modifying and Updating data on THOR\", \"username\": \"rtaylor\", \"post_text\": \"What type of OLTP tools are available that can be integrated with HPCC?
Typically, it would be a SQL database (Oracle, MySQL, etc.).\\n\\nHow to structure our Roxie query to invoke a OLTP tool? Can we have any code snippet for such OLTP tools?
You would simply form your Roxie query to include a call to that database to provide the latest data that had not yet been integrated into the Roxie. You could use SOAPCALL to accomplish that, or the EncodeRfsQuery function from the Standard Library like this:\\nIMPORT Std;\\nrfsserver := '10.173.207.1:7080';\\nrec := RECORD\\n STRING mydata{MAXLENGTH(8192)};\\nEND;\\nOUTPUT(DATASET(STD.File.EncodeRfsQuery( rfsserver,'SELECT data FROM xml_testnh'),\\n rec,CSV(MAXLENGTH(8192))));
\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-03-13 19:00:48\" },\n\t{ \"post_id\": 1336, \"topic_id\": 307, \"forum_id\": 8, \"post_subject\": \"Re: Modifying and Updating data on THOR\", \"username\": \"Apurv.Khare\", \"post_text\": \"Hi,\\nWhat type of OLTP tools are available that can be integrated with HPCC? \\nHow to structure our Roxie query to invoke a OLTP tool? Can we have any code snippet for such OLTP tools?\", \"post_time\": \"2012-03-13 18:49:18\" },\n\t{ \"post_id\": 1323, \"topic_id\": 307, \"forum_id\": 8, \"post_subject\": \"Re: Modifying and Updating data on THOR\", \"username\": \"rtaylor\", \"post_text\": \"If a query is published and called through a Soapcall,we need to allow the user to add any modifications,that will be saved in the original file.(As UPDATE in SQL). DO HPCC provide any such feature??
The short answer is "no" -- because HPCC is not designed to be an OLTP system. We do not support updating existing data files. In fact, when you are reading a file in your process, you may not write back to that same file -- you must write any changes to a new file. And, Roxie doesn't write to disk, it only returns result sets to the caller.\\n\\nWith that said, it is possible to emulate such an OLTP system by using HPCC in conjunction with tools designed for OLTP. You would need to capture the updates and changes in the OLTP tool and design your Roxie queries to query both the Roxie data and the OLTP tool for a given result. You would also need to have operational processes in place to periodically "flush" the OLTP tool's datastore and use Thor to incorporate the latest data updates into the Roxie data (for best performance). The "period" you choose for the update could be weekly, daily, hourly, every 10 minutes ... whatever, depending on the overall volume of data through the system and your response time requirements. The more data volume you have and/or the faster your speed requirements are, the shorter your period should be.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-03-12 18:47:03\" },\n\t{ \"post_id\": 1318, \"topic_id\": 307, \"forum_id\": 8, \"post_subject\": \"Modifying and Updating data on THOR\", \"username\": \"Apurv.Khare\", \"post_text\": \"Hi,\\nIf a query is published and called through a Soapcall,we need to allow the user to add any modifications,that will be saved in the original file.(As UPDATE in SQL). DO HPCC provide any such feature??\", \"post_time\": \"2012-03-12 13:20:29\" },\n\t{ \"post_id\": 1324, \"topic_id\": 308, \"forum_id\": 8, \"post_subject\": \"Re: How to decide Size of Cluster?\", \"username\": \"rtaylor\", \"post_text\": \"we need to decide the size of cluster(no. of nodes),based on what parameters we can decide that??\\nIf our Roxie is being used constantly by certain no of users and data is of particular size in GB\\\\TB, then what should be the approach to choose cluster size??
The number of concurrent users on Roxie is less of a concern than data size, so let's address that first: \\n\\nEach Roxie node can handle a number of concurrent queries (let's say 30), so you multiply that by the number of nodes (our standard Production Roxie is 100 nodes) and that gives you the total number of simultaneous queries your Roxie can handle (in our case, 3000). When you factor in the average response time for your queries (for most of our Prodcution queries, that's about a quarter of a second), then you can determine the total queries/second your Roxie can handle (ours is about 12,000/second).\\n\\nOK, so the real determinant is how much data do you have. For most of our Production Roxie queries, the data is contained in payload indexes -- meaning that the search terms allow quick access, and the payload eliminates the need to read the record from the base dataset (less disk I/O = faster response times). In ECL, an INDEX is always LZW compressed, so the real determinant of data size is the end-result size of the final production data after it has been compressed by the INDEX build. For our Production data we build the INDEXes on 400-node Thor and publish them to 100-node Roxie, making each Roxie node receive 4 file parts (in addition to the binary tree file part for the INDEX, which is always cached in RAM on Roxie). So you always want to size your Roxie and Thor as even multiples of each other (Thor, of course, always being the big guy, since he has to mangle all the data into production form - -Roxie only ever gets the final result "product" data).\\n\\nBottom line, is that it all depends on how much data you have. If you put a small amount of data on a large Roxie and require ultra-fast response time, you can have all the data PRELOADed into memory. A single Roxie, of course, can also have many different queries (with vastly different data) published to it, so it can serve up a multitude of products for you.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-03-12 19:08:27\" },\n\t{ \"post_id\": 1320, \"topic_id\": 308, \"forum_id\": 8, \"post_subject\": \"How to decide Size of Cluster?\", \"username\": \"Apurv.Khare\", \"post_text\": \"Hi,\\nwe need to decide the size of cluster(no. of nodes),based on what parameters we can decide that??\\nIf our Roxie is being used constantly by certain no of users and data is of particular size in GB\\\\TB, then what should be the approach to choose cluster size??\", \"post_time\": \"2012-03-12 13:24:44\" },\n\t{ \"post_id\": 1413, \"topic_id\": 309, \"forum_id\": 8, \"post_subject\": \"Re: Permutation\", \"username\": \"HPCC Staff\", \"post_text\": \"The team has identified the problem and has opened an issue/pull request in GitHub for the fix. Thank you for reporting it!\", \"post_time\": \"2012-03-28 14:41:22\" },\n\t{ \"post_id\": 1406, \"topic_id\": 309, \"forum_id\": 8, \"post_subject\": \"Re: Permutation\", \"username\": \"DSC\", \"post_text\": \"\", \"post_time\": \"2012-03-26 19:03:18\" },\n\t{ \"post_id\": 1401, \"topic_id\": 309, \"forum_id\": 8, \"post_subject\": \"Re: Permutation\", \"username\": \"DSC\", \"post_text\": \"As an educational experience, I wanted to see if I could do this a different way, applying the stuff I learned in this thread. Here is what I have now:\\n\\n
ProcessUniquePairs(rs,combinerTransform) := FUNCTIONMACRO\\n\\t// Empty record set derived from transform to be used to collect the transform's results\\n\\temptyFRS := PROJECT(DATASET([],RECORDOF(rs)),combinerTransform(LEFT,LEFT));\\n\\t\\n\\t// Calling the transform with pairs of records\\n\\tloopBody(INTEGER i) := PROJECT(rs[i+1..],combinerTransform(rs[i],LEFT));\\n\\t\\n\\t// Iterating through all but the last item in the record set\\n\\tresult := LOOP(emptyFRS,COUNT(rs)-1,FALSE,loopBody(COUNTER));\\n\\t\\n\\tRETURN result;\\n\\t\\nENDMACRO;\\n\\nSampleRec := RECORD\\n\\tINTEGER\\t\\tn;\\nEND;\\n\\nNewSampleRec := RECORD\\n\\tINTEGER\\t\\tn1;\\n\\tINTEGER\\t\\tn2;\\nEND;\\n\\nd1 := DATASET\\t(\\n\\t\\t\\t\\t\\t[\\n\\t\\t\\t\\t\\t\\t{1},\\n\\t\\t\\t\\t\\t\\t{2},\\n\\t\\t\\t\\t\\t\\t{3},\\n\\t\\t\\t\\t\\t\\t{4},\\n\\t\\t\\t\\t\\t\\t{5}\\n\\t\\t\\t\\t\\t],\\n\\t\\t\\t\\t\\tSampleRec\\n\\t\\t\\t\\t);\\n\\nNewSampleRec DoXForm(SampleRec l, SampleRec r) := TRANSFORM\\n\\tSELF.n1 := l.n;\\n\\tSELF.n2 := r.n;\\nEND;\\n\\nOUTPUT(ProcessUniquePairs(d1,DoXForm));
\\n\\nThis code works in hthor but appears to hang (infinite loop) in thor. Is there a fundamental problem with the code?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-03-21 13:17:45\" },\n\t{ \"post_id\": 1399, \"topic_id\": 309, \"forum_id\": 8, \"post_subject\": \"Re: Permutation\", \"username\": \"DSC\", \"post_text\": \"[quote="dabayliss":3d8doayr]Inside the generic function - and I am not defining the return format. I am using a simple defined format as the INPUT to the PROJECT - but the output of the project is in the format defined by the result of the transform (which is passed in)\\n\\n(Remember: the type of the result of a project is the type of the return of the transform - in fact this is true of many of the ECL components )\\n\\nThanks for the clarification. I see where I went wrong in my thinking, which was confusing the type of the first argument to PROJECT with the type in something like a GRAPH (where that type had to match the output). I'll go ahead and blame a general lack of caffeine for this one, too.\\n\\nGood stuff.\\n\\nDan\", \"post_time\": \"2012-03-20 20:39:37\" },\n\t{ \"post_id\": 1397, \"topic_id\": 309, \"forum_id\": 8, \"post_subject\": \"Re: Permutation\", \"username\": \"dabayliss\", \"post_text\": \"Inside the generic function - and I am not defining the return format. I am using a simple defined format as the INPUT to the PROJECT - but the output of the project is in the format defined by the result of the transform (which is passed in)\\n\\n(Remember: the type of the result of a project is the type of the return of the transform - in fact this is true of many of the ECL components )\\n\\nDavid\", \"post_time\": \"2012-03-20 20:13:31\" },\n\t{ \"post_id\": 1396, \"topic_id\": 309, \"forum_id\": 8, \"post_subject\": \"Re: Permutation\", \"username\": \"DSC\", \"post_text\": \"Ah! The ALL option to the JOIN function was the thing I was overlooking. Thanks!\\n\\nThe 'clean' way to solve the generic problem would be to pass in the layout of the returned data. If you really don't want to do that then something like:\\n\\n
\\n\\nIs that code inside or outside the generic function? If it's inside, it looks like you're still defining the returned record format, just in the PROJECT instead of the LOOP. Or does the {INTEGER i} bit get thrown away in this case?\", \"post_time\": \"2012-03-20 15:03:12\" },\n\t{ \"post_id\": 1395, \"topic_id\": 309, \"forum_id\": 8, \"post_subject\": \"Re: Permutation\", \"username\": \"dabayliss\", \"post_text\": \"JOIN,ALL requires all of one side of the JOIN to be on every node; I use the generic form of JOIN that allows the data to be evenly distributed.\\n\\nThe 'clean' way to solve the generic problem would be to pass in the layout of the returned data. If you really don't want to do that then something like:\\n\\nMyNull := PROJECT( DATASET([],{INTEGER i}), PassedInTransform(0));\\n\\nL := LOOP(MyNull, etc);\\n
\\nshould workMyNull := PROJECT( DATASET([],{INTEGER i}), PassedInTransform(0));\\n\\nL := LOOP(MyNull, etc);\\n
\\nshould work\\n\\nDavid\", \"post_time\": \"2012-03-20 14:53:22\" },\n\t{ \"post_id\": 1394, \"topic_id\": 309, \"forum_id\": 8, \"post_subject\": \"Re: Permutation\", \"username\": \"DSC\", \"post_text\": \"That is a very interesting way iterating through the record set. I admit to not fully understanding why it would distribute better, though. Obviously, I see that the elements are chosen differently, but how does this version avoid problems where the other does not? (We need a duncecap emoticon, I think.)\\n\\nQuestion related to the code snippet: My original goal was to write a generic function that would accept a transform from the caller. How would you change your snippet to account for that? Specifically, the loop is seeded with an empty recordset whose type is really determined by the transform. Is there a way of dynamically determining a transform's result type?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-03-20 14:26:06\" },\n\t{ \"post_id\": 1386, \"topic_id\": 309, \"forum_id\": 8, \"post_subject\": \"Re: Permutation\", \"username\": \"dabayliss\", \"post_text\": \"Here is a version that will distribute out rather better for large lists:\\n\\nd := DATASET([{'a'},{'b'},{'c'},{'d'},{'e'},{'f'}],{STRING1 l});\\n\\nenum := PROJECT(d,TRANSFORM({STRING1 l, UNSIGNED c},SELF.c := COUNTER, SELF := LEFT));\\n\\nhigh := COUNT(enum); // Use max if c is not 1..COUNT(enum)\\n\\npair := RECORD\\n STRING1 l;\\n\\tSTRING1 r;\\n\\tEND;\\n\\n// On iteration N we will create pairs for everything 'N' steps apart\\nlbody(UNSIGNED N) := FUNCTION\\n\\tRETURN JOIN(enum(c<=high-N),enum(c>N),LEFT.c+N=RIGHT.c,TRANSFORM(pair,SELF.r := RIGHT.l, SELF := LEFT));\\n END;\\n// The false causes the RETURN of each iteration to simply land in the output\\nl := LOOP(DATASET([],pair),high-1,FALSE,lbody(COUNTER));\\n\\nl
\\n\\nAssuming a 'proper' dataset is input; this will go across the full width of the cluster. Note: this is still N^2 and for big datasets will generate a TON of data - but if it is what you need \", \"post_time\": \"2012-03-19 15:13:56\" },\n\t{ \"post_id\": 1356, \"topic_id\": 309, \"forum_id\": 8, \"post_subject\": \"Re: Permutation\", \"username\": \"DSC\", \"post_text\": \"Got it. Thanks for the explanation.\\n\\nDan\", \"post_time\": \"2012-03-14 18:21:31\" },\n\t{ \"post_id\": 1355, \"topic_id\": 309, \"forum_id\": 8, \"post_subject\": \"Re: Permutation\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\n
How do you know that the record set must fit into a single node? What tells you this?
The fact that record #1 must bang against every other record in the file. We divide the work by how we divide the data, and each node is a separate box, so there is no "cross-node execution" but simply a passing of required data between multiple discrete computers. Therefore, any problem wherein any single record must interact with every other record from the same file implies they all must be on the same node to perform the process.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-03-14 18:19:12\" },\n\t{ \"post_id\": 1346, \"topic_id\": 309, \"forum_id\": 8, \"post_subject\": \"Re: Permutation\", \"username\": \"DSC\", \"post_text\": \"In the original use-case for this, the record set size would be very small (less than 100 records), so I don't think I'll have a problem with fitting it into a single node. However, this begs a question: How do you know that the record set must fit into a single node? What tells you this?\\n\\nI'll admit that I'm not happy with 1) rewriting the record set at all and 2) having to do a JOIN to make this work. In procedural code, you could just use a couple of integer variables to index into the original array (record set) and call the transform. Very little additional memory overhead necessary. I tried to do the same thing with LOOPs but it made my head hurt. I would love to find a low-memory way to do this, and a way that did not enforce any kind of single-node limit would be a plus. Any pointers would be appreciated.\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-03-14 11:39:41\" },\n\t{ \"post_id\": 1342, \"topic_id\": 309, \"forum_id\": 8, \"post_subject\": \"Re: Permutation\", \"username\": \"dustinskaggs\", \"post_text\": \"It's even a little worse than that, the initial dataset will need to be able fit on a single node with room to spare. If we do the math, the average number of result records per node will be x * (x - 1) / (2 * clustersize) where x is the number of total records initially. So for an example if we have 1000 total initial records on a 10 node cluster, the resulting dataset will have 49,500 records per node on average.\\n\\nAlso, the way you're using the counter will skew your results toward the first node. To get a more even distribution, you'll want to randomly distribute your dataset after you add the counter.\", \"post_time\": \"2012-03-14 01:50:58\" },\n\t{ \"post_id\": 1341, \"topic_id\": 309, \"forum_id\": 8, \"post_subject\": \"Re: Permutation\", \"username\": \"dabayliss\", \"post_text\": \"That solution will only work for data that fits on a single-node - is that ok?\", \"post_time\": \"2012-03-13 21:38:06\" },\n\t{ \"post_id\": 1340, \"topic_id\": 309, \"forum_id\": 8, \"post_subject\": \"Re: Permutation\", \"username\": \"DSC\", \"post_text\": \" Old dogs can learn new tricks!\\n\\nThis is almost most as much fun as the "Ah ha!" moment you get when Lisp finally clicks into place.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-03-13 21:11:48\" },\n\t{ \"post_id\": 1339, \"topic_id\": 309, \"forum_id\": 8, \"post_subject\": \"Re: Permutation\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\n
My goal is to write something I can stick into a Util module and make generically usable by any other code. Other than a pure MACRO, I don't know of a way to avoid passing the dataset. Do you?
It appears to me as if you've achieved the goal. The FUNCTIONMACRO is about as good as you're going to get, and defining the TRANSFORM and passing it to the FUNCTIONMACRO is a neat trick -- hats off to you! \\n\\nRichard\", \"post_time\": \"2012-03-13 20:41:43\" },\n\t{ \"post_id\": 1338, \"topic_id\": 309, \"forum_id\": 8, \"post_subject\": \"Re: Permutation\", \"username\": \"DSC\", \"post_text\": \"[quote="dustinskaggs":25inabfb]When running on thor, I typically try to avoid passing datasets into transforms. But since you want to match against every record, it probabaly doesn't doesn't make a difference. I think you can get the self join to work the way you want if you add in an extra condition of "left.field1 < right.field1".\\n\\nThat's exactly how the final result looks.\\n\\nMy goal is to write something I can stick into a Util module and make generically usable by any other code. Other than a pure MACRO, I don't know of a way to avoid passing the dataset. Do you?\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-03-13 20:33:09\" },\n\t{ \"post_id\": 1334, \"topic_id\": 309, \"forum_id\": 8, \"post_subject\": \"Re: Permutation\", \"username\": \"dustinskaggs\", \"post_text\": \"When running on thor, I typically try to avoid passing datasets into transforms. But since you want to match against every record, it probabaly doesn't doesn't make a difference. I think you can get the self join to work the way you want if you add in an extra condition of "left.field1 < right.field1".\", \"post_time\": \"2012-03-13 16:53:08\" },\n\t{ \"post_id\": 1333, \"topic_id\": 309, \"forum_id\": 8, \"post_subject\": \"Re: Permutation\", \"username\": \"DSC\", \"post_text\": \"OK, I adapted your idea and mine to come up with a generic function macro:\\n\\n
ProcessPermutations(rs,combinerTransform) := FUNCTIONMACRO\\n\\t// Define the record layout within the record set\\n\\trsRecType := RECORDOF(rs);\\n\\t\\n\\t// Interim record set containing index numbers for each record\\n\\tRSContainerRec := RECORD\\n\\t\\tINTEGER\\t\\t\\t\\tidx;\\n\\t\\trsRecType\\t\\t\\tvalueRec;\\n\\tEND;\\n\\t\\n\\t// Transform to create interim record set\\n\\tRSContainerRec MakeRSContainerRec(rsRecType l, UNSIGNED c) := TRANSFORM\\n\\t\\tSELF.idx := c;\\n\\t\\tSELF.valueRec := l;\\n\\tEND;\\n\\t\\n\\t// Create the interim record set\\n\\texpandedRS := PROJECT(rs,MakeRSContainerRec(LEFT,COUNTER));\\n\\t\\n\\t// Perform JOIN\\n\\tresult := JOIN(expandedRS,expandedRS,LEFT.idx < RIGHT.idx,combinerTransform(LEFT.valueRec,RIGHT.valueRec),ALL);\\n\\t\\n\\tRETURN result;\\n\\t\\nENDMACRO;
\\n\\nCalling from this:\\n\\n// Sample record layout\\nNumRec := RECORD\\n\\tINTEGER\\t\\tnum;\\nEND;\\n\\n// Sample data\\nd1 := DATASET\\t(\\n\\t\\t\\t\\t\\t[\\n\\t\\t\\t\\t\\t\\t{3},\\n\\t\\t\\t\\t\\t\\t{5},\\n\\t\\t\\t\\t\\t\\t{7},\\n\\t\\t\\t\\t\\t\\t{11},\\n\\t\\t\\t\\t\\t\\t{13}\\n\\t\\t\\t\\t\\t],\\n\\t\\t\\t\\t\\tNumRec\\n\\t\\t\\t\\t);\\n\\n// Transform to pass to permutation processor\\nNumRec MyXForm(NumRec l, NumRec r) := TRANSFORM\\n\\tSELF.num := l.num * r.num;\\nEND;\\n\\nOUTPUT(ProcessPermutations(d1,MyXForm));
\\n\\nResults in:\\n\\n15\\n21\\n33\\n39\\n35\\n55\\n65\\n77\\n91\\n143
\\n\\nDoes that look reasonable? (I take it as a given that you'll tell me that it isn't concise enough. )\\n\\nDan\", \"post_time\": \"2012-03-13 16:49:41\" },\n\t{ \"post_id\": 1332, \"topic_id\": 309, \"forum_id\": 8, \"post_subject\": \"Re: Permutation\", \"username\": \"david.wheelock@lexisnexis.com\", \"post_text\": \"Why not just change the self-join condition to LEFT.val<RIGHT.val. This is how we avoid duplicate pairing in the ML Library.\", \"post_time\": \"2012-03-13 16:44:05\" },\n\t{ \"post_id\": 1331, \"topic_id\": 309, \"forum_id\": 8, \"post_subject\": \"Re: Permutation\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nOK, then how about using NORMALIZE, like this:\\n\\n
ds := dataset([{1,'A'},{2,'B'},{3,'C'},{4,'D'},{5,'E'}],{unsigned1 recid,string F1});\\nCntDS := COUNT(ds);\\n\\n{STRING F1} XF(ds L, integer C) := TRANSFORM,SKIP(C <= L.RecID)\\n\\tSELF.F1 := L.F1 + ds[C].F1;\\nEND;\\n\\npermuted := NORMALIZE(ds,CntDS,XF(LEFT,COUNTER));\\n\\noutput(permuted);\\n\\n/*This gets me: \\n AB\\n AC\\n AD\\n AE\\n BC\\n BD\\n BE\\n CD\\n CE\\n DE*/\\n
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-03-13 15:32:19\" },\n\t{ \"post_id\": 1330, \"topic_id\": 309, \"forum_id\": 8, \"post_subject\": \"Re: Permutation\", \"username\": \"DSC\", \"post_text\": \"[quote="rtaylor":nofml7ak]Would a self-JOIN (left and right the same dataset) where the condition is always TRUE get you where you want to be?\\n\\nClose, if I make the JOIN condition 'LEFT != RIGHT'. The only remaining problem is that pairs of records are called twice. If the record set was [a,b,c] then both [a,b] and [b,a] are found and transformed, for instance. I need one and only one pair.\", \"post_time\": \"2012-03-13 14:54:46\" },\n\t{ \"post_id\": 1327, \"topic_id\": 309, \"forum_id\": 8, \"post_subject\": \"Re: Permutation\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nThe DEDUP function has an ALL option that checks for duplicates amongst all possible commutative pairs, but I don't think that's what you want. Would a self-JOIN (left and right the same dataset) where the condition is always TRUE get you where you want to be?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-03-13 14:06:22\" },\n\t{ \"post_id\": 1325, \"topic_id\": 309, \"forum_id\": 8, \"post_subject\": \"Permutation\", \"username\": \"DSC\", \"post_text\": \"I could have sworn that there was an ECL (or standard library) function for iterating through a dataset as a permutation, calling a user-supplied transform with pairs of records. By 'permutation' I mean something like 'each record paired with every other record.' Iteratively, the records could be chosen like in a shell sort algorithm.\\n\\nI've written an ECL function macro that performs this work, but I doubt it's an optimal implementation. I'm hoping that there is a built-in function I've overlooked.\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-03-13 12:25:08\" },\n\t{ \"post_id\": 1329, \"topic_id\": 310, \"forum_id\": 8, \"post_subject\": \"Re: Std: EncodeRfsQuery and RfsAction clarification\", \"username\": \"DSC\", \"post_text\": \"Excellent. Thanks for the clarification/validation, Richard.\\n\\nDan\", \"post_time\": \"2012-03-13 14:46:35\" },\n\t{ \"post_id\": 1328, \"topic_id\": 310, \"forum_id\": 8, \"post_subject\": \"Re: Std: EncodeRfsQuery and RfsAction clarification\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nIt appears that these methods simply open a TCP/IP port and execute a command. The former allows a response to be treated as input (as if from a file) while the latter either ignores any response or simply closes the port after delivering the command. Is that a fair description?
I'd say yes, that's a pretty fair description.\\n\\nHow are these methods actually called/used in a cluster setup? Would every node participating in the query execute them individually unless special coding is added? If yes, what would that special coding typically be?
That depends on whether the code is running on Thor or Roxie. On Thor, yes, each node would be making their own separate call for data (presumably different, since each node would be processing a different set of data). On Roxie, it would be the one node handling the query making the call (unless it was wrapped in an ALLNODES).\\n\\nI would typically see this as being used primarily in Roxie queries, since, depending on what kind of box the RFS server is, a large Thor could easily overwhelm it. This would be very applicable to the OLTP-emulation scenario I posted yesterday to Apurv (viewtopic.php?f=8&t=307)\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-03-13 14:24:11\" },\n\t{ \"post_id\": 1326, \"topic_id\": 310, \"forum_id\": 8, \"post_subject\": \"Std: EncodeRfsQuery and RfsAction clarification\", \"username\": \"DSC\", \"post_text\": \"I'm looking for a bit of clarification on EncodeRfsQuery and RfsAction, two methods defined in the standard library.\\n\\nIt appears that these methods simply open a TCP/IP port and execute a command. The former allows a response to be treated as input (as if from a file) while the latter either ignores any response or simply closes the port after delivering the command. Is that a fair description?\\n\\nHow are these methods actually called/used in a cluster setup? Would every node participating in the query execute them individually unless special coding is added? If yes, what would that special coding typically be?\\n\\nThanks for any pointers!\\n\\nDan\", \"post_time\": \"2012-03-13 12:46:58\" },\n\t{ \"post_id\": 1380, \"topic_id\": 313, \"forum_id\": 8, \"post_subject\": \"Re: Data Consolidation with SuperFiles\", \"username\": \"vinod.mamtani\", \"post_text\": \"Thanks a bunch. You have been very helpful!\", \"post_time\": \"2012-03-16 18:07:58\" },\n\t{ \"post_id\": 1379, \"topic_id\": 313, \"forum_id\": 8, \"post_subject\": \"Re: Data Consolidation with SuperFiles\", \"username\": \"rtaylor\", \"post_text\": \"My latest request was if there was a way to consolidate all the data into a single AllTimeLogicalFile that is currently added to the SuperFile. The answer seems to be no since the file is 'in use'. Here is the error that I am seeing:\\n\\nError: System error: -1: Graph[10], CDistributedFileDirectory::removeEntry Cannot remove file gsptel::alltime as owned by SuperFile gsptel::superfile::alltime
Correct -- you will see this type of error message any time you try to write to a file that is already being read from in the job -- there is no UPDATE in ECL.\\n\\nThor does not allow you to overwrite a file you are reading, you must write the newly consolidated data to a new logical file, then make that new logical file the one subfile in your superfile -- that's how the data consolidation is supposed to work. The idea is to keep a single superfile from ever having more than ~100 subfiles by simply writing all the superfile data out to a single new logical file and replacing the plethora of subfiles with the new consolidated file, so you can begin again adding new subfiles as they come in.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-03-16 17:59:06\" },\n\t{ \"post_id\": 1378, \"topic_id\": 313, \"forum_id\": 8, \"post_subject\": \"Re: Data Consolidation with SuperFiles\", \"username\": \"vinod.mamtani\", \"post_text\": \"Yes, I understand the FUNCTIONMACRO from your code snippet and have read the Programmer's Guide. I have been using nested SuperFiles but simplified the code for the problem at hand.\\n\\nMy latest request was if there was a way to consolidate all the data into a single AllTimeLogicalFile that is currently added to the SuperFile. The answer seems to be no since the file is 'in use'. Here is the error that I am seeing:\\n\\nError: System error: -1: Graph[10], CDistributedFileDirectory::removeEntry Cannot remove file gsptel::alltime as owned by SuperFile gsptel::superfile::alltime\\n\\nMore precisely, the AllTimeLogicalFile is the logical file that has all the data till date. Once the DailyLogicalFile has been populated, I like to take all this data and put them in the AllTimeLogicalFile so that the daily file can be used for the data from the following day. Turns out this is not possible since the AllTimeLogicalFile has already been added to the SuperFile.\", \"post_time\": \"2012-03-16 17:44:32\" },\n\t{ \"post_id\": 1376, \"topic_id\": 313, \"forum_id\": 8, \"post_subject\": \"Re: Data Consolidation with SuperFiles\", \"username\": \"rtaylor\", \"post_text\": \"Vinod,\\n\\nNow if I want to consolidate the SubFile2 data into the AllTimeLogicalFile, can I do this without creating a temporary file?
Using the code I posted earlier, you would need to move every CSV data file into a flat file before adding it to your superfile (if that's what you meant by "creating a temporary file") -- that's why I wrote the FUNCTIONMACRO to accomplish that for you. Your problem comes in because the system thinks you're trying to add a file that is not exactly the same format as the subfiles already in the superfile -- all subfiles must be exactly the same format. \\n\\nHOWEVER, we've come up with a second (simpler/more efficient) workaround -- when you initially spray the files, select UTF8N as the format instead of the default ASCII and your code as you initially wrote it will work.\\n\\nOne more item: Have you looked at the Programmer's Guide article "Creating and Maintaining Superfiles?" That article describes exactly what you're trying to accomplish, I think. One thing that I don't see you doing is nesting superfiles within another superfile (this IS allowed), so that the only "dataset" your code ever needs to reference is the top level "all data" superfile -- all the rest of the code is about maintaining the superfile/subfile structure.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-03-16 15:28:56\" },\n\t{ \"post_id\": 1369, \"topic_id\": 313, \"forum_id\": 8, \"post_subject\": \"Re: Data Consolidation with SuperFiles\", \"username\": \"vinod.mamtani\", \"post_text\": \"Thanks Richard. This is getting close.\\n\\nNow if I want to consolidate the SubFile2 data into the AllTimeLogicalFile, can I do this without creating a temporary file?\\n\\nThanks for all your help so far.\", \"post_time\": \"2012-03-15 21:33:07\" },\n\t{ \"post_id\": 1364, \"topic_id\": 313, \"forum_id\": 8, \"post_subject\": \"Re: Data Consolidation with SuperFiles\", \"username\": \"rtaylor\", \"post_text\": \"Vinod,\\n\\nI got your files, have duplicated the problem, and reported it. The problem appears to be the way the OUTPUT is writing the new CSV file to disk from the superfile.\\n\\nOne possible workaround would be to take your sprayed data and write it to flat files before putting it in superfiles, like this:\\n\\nIMPORT STD;\\nLayout_Gsp_Actions := RECORD\\n .... fieldds\\nEND;\\n\\nDailyFile := '~RTTEST::DailyFile';\\n\\nSubFile1 := '~gsptel::gspactions_2012020100';\\nSubFile2 := '~gsptel::gspactions_2012020101';\\nAllTimeLogicalFile := '~RTTEST::alltime';\\n\\nSubFileDS(SF) := FUNCTIONMACRO\\n subfile := DATASET(SF,Layout_Gsp_Actions,CSV(SEPARATOR('\\\\t'),TERMINATOR('\\\\t\\\\n')));\\n filename := SF + '_Flat';\\n WriteFlatFile := OUTPUT(subfile,,filename,overwrite);\\n RETURN WHEN(filename,WriteFlatFile);\\nENDMACRO;\\n\\nDailyData := DATASET(DailyFile,Layout_Gsp_Actions,FLAT);\\n\\nStep0 := SEQUENTIAL(\\n\\tStd.File.CreateSuperFile(DailyFile),\\n\\tOUTPUT('0. Done creating superfiles')\\n\\t);\\n\\nStep1 := SEQUENTIAL(\\n\\tStd.File.StartSuperFileTransaction(),\\n\\tStd.File.AddSuperFile(DailyFile,SubFileDS(SubFile1)),\\n\\tStd.File.FinishSuperFileTransaction(),\\n\\tOUTPUT('1. Done adding a logical file to the daily file'),\\n\\tCOUNT(DailyData(Id <> 0)),\\t\\n\\tOUTPUT(DailyData(Id <> 0)),\\n\\tOUTPUT(DailyData,,AllTimeLogicalFile,OVERWRITE)\\n\\t);\\n\\nStep2 := SEQUENTIAL(\\n\\tStd.File.StartSuperFileTransaction(),\\n\\tStd.File.ClearSuperFile(DailyFile),\\n\\tStd.File.AddSuperFile(DailyFile,AllTimeLogicalFile),\\n\\tStd.File.AddSuperFile(DailyFile,SubFileDS(SubFile2)),\\n\\tStd.File.FinishSuperFileTransaction(),\\n\\tOUTPUT('2. Done '),\\n\\tCOUNT(DailyData(Id <> 0)),\\t\\n\\tOUTPUT(DailyData(Id <> 0))\\n\\t);\\n\\nSEQUENTIAL(Step0,Step1,Step2);
The key to this is the FUNCTIONMACRO that writes the flat file and returns the filename for use by the AddSuperFile function. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-03-15 15:50:27\" },\n\t{ \"post_id\": 1361, \"topic_id\": 313, \"forum_id\": 8, \"post_subject\": \"Re: Data Consolidation with SuperFiles\", \"username\": \"rtaylor\", \"post_text\": \"Vinod,\\nI am unable to upload a file. The error message is: "The extension is not allowed".\\nThe file size is < 200 KB and I have tried with and without extension.
OK, no problem. You can just zip the file and email it to me -- richard.taylor@lexisnexis.com\\n\\nAnd if that doesn't work either, now that I have the structure I should be able to create a CSV file to play with (but I'd rather have yours, just in case there are some anomalies).\\n\\n\\nRichard\", \"post_time\": \"2012-03-14 21:10:13\" },\n\t{ \"post_id\": 1360, \"topic_id\": 313, \"forum_id\": 8, \"post_subject\": \"Re: Data Consolidation with SuperFiles\", \"username\": \"vinod.mamtani\", \"post_text\": \"Hey Richard,\\nI am unable to upload a file. The error message is: "The extension is not allowed".\\nThe file size is < 200 KB and I have tried with and without extension.\", \"post_time\": \"2012-03-14 20:55:29\" },\n\t{ \"post_id\": 1359, \"topic_id\": 313, \"forum_id\": 8, \"post_subject\": \"Re: Data Consolidation with SuperFiles\", \"username\": \"vinod.mamtani\", \"post_text\": \"Here is the record structure:\", \"post_time\": \"2012-03-14 20:52:59\" },\n\t{ \"post_id\": 1358, \"topic_id\": 313, \"forum_id\": 8, \"post_subject\": \"Re: Data Consolidation with SuperFiles\", \"username\": \"rtaylor\", \"post_text\": \"I will upload a data file shortly.
Great.\\n\\nI don't think I ever used the UTF8N format. To rule out any such possibility, I sprayed the CSV file from ECLWatch and ensured that the selected format is ASCII. I saw the same error and there has been no change in the result.
Hmm, then why is the error saying it's in utf8n format? Can you post the RECORD structures you're using, please?\\nOn a side note, when I view logical files on clicking the 'Browse Logical Files' link, I find the sprayed file does not have any associated 'Records' information while the logical file from superfile data consolidation does have the 'Records' information. This matches the COUNT output for the superfile dataset. The file size in both cases is identical. Any idea why ECLWatch shows the records info in one case but not the other?
Sprayed CSV files never show a record count because spray doesn't ever bother to count variable-length records (it would be inefficient to its operation). Spray only cares about getting the data onto the Thor nodes as quickly as possible, while ensuring a siongle record never spans multiple nodes (a record must be whole and complete on a single node, always). \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-03-14 20:50:50\" },\n\t{ \"post_id\": 1357, \"topic_id\": 313, \"forum_id\": 8, \"post_subject\": \"Re: Data Consolidation with SuperFiles\", \"username\": \"vinod.mamtani\", \"post_text\": \"I will upload a data file shortly.\\n\\nI don't think I ever used the UTF8N format. To rule out any such possibility, I sprayed the CSV file from ECLWatch and ensured that the selected format is ASCII. I saw the same error and there has been no change in the result.\\n\\nOn a side note, when I view logical files on clicking the 'Browse Logical Files' link, I find the sprayed file does not have any associated 'Records' information while the logical file from superfile data consolidation does have the 'Records' information. This matches the COUNT output for the superfile dataset. The file size in both cases is identical. Any idea why ECLWatch shows the records info in one case but not the other?\", \"post_time\": \"2012-03-14 20:42:38\" },\n\t{ \"post_id\": 1354, \"topic_id\": 313, \"forum_id\": 8, \"post_subject\": \"Re: Data Consolidation with SuperFiles\", \"username\": \"rtaylor\", \"post_text\": \"Can you attach a small example data file that I can use to try to duplicate the issue?\\n\\nAlso, please confirm whether you are using the UTF8N Format when you initially spray the data -- and do you really need that (IOW, what happens if you spray as ASCII format?).\", \"post_time\": \"2012-03-14 18:10:12\" },\n\t{ \"post_id\": 1353, \"topic_id\": 313, \"forum_id\": 8, \"post_subject\": \"Re: Data Consolidation with SuperFiles\", \"username\": \"vinod.mamtani\", \"post_text\": \"I had tried this earlier and did so again but am still seeing the same issue.\\n\\nAllTimeData := DATASET(AllTimeFile,Layout_Gsp_Actions,CSV(SEPARATOR('\\\\t'),TERMINATOR('\\\\t\\\\n'),UNICODE));\\n\\nOUTPUT(AllTimeData,,AllTimeLogicalFile,CSV(SEPARATOR('\\\\t'),TERMINATOR('\\\\t\\\\n'),UNICODE),OVERWRITE),\\n\\nError: System error: -1: addSubFile: gsptel::gspactions_2012020101's format (utf8n) is different than gsptel::alltime's (csv) (0, 0), -1, \\n\\nAny suggestions?\\nThanks\\nVinod\", \"post_time\": \"2012-03-14 17:59:37\" },\n\t{ \"post_id\": 1352, \"topic_id\": 313, \"forum_id\": 8, \"post_subject\": \"Re: Data Consolidation with SuperFiles\", \"username\": \"rtaylor\", \"post_text\": \"How do I make the new logical file (alltime file) format same as other logical files format (utf8n)?
The UTF8N indicates that you sprayed the file as a UTF8 UNICODE file, so your OUTPUT to create the consolidated should include the UNICODE option within the CSV,like this:\\nOUTPUT(AllTimeData,,\\n AllTimeLogicalFile,\\n CSV(SEPARATOR('\\\\t'),TERMINATOR('\\\\t\\\\n'),UNICODE),OVERWRITE)
Per the "OUTPUT CSV Files" section of the OUTPUT docs: "UNICODE Specifies all output is in Unicode UTF8 format "\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-03-14 17:46:13\" },\n\t{ \"post_id\": 1344, \"topic_id\": 313, \"forum_id\": 8, \"post_subject\": \"Data Consolidation with SuperFiles\", \"username\": \"vinod.mamtani\", \"post_text\": \"I have a set of CSV files in the dropzone. After spraying these files, I added the corresponding logical files to my superfile. Thereafter, I defined a superfile dataset to output all the data to a single logical file. Thereafter I cleared the superfile and added this new logical file to the superfile. So far so good.\\n\\nThereafter, when I try to add another set of logical files to the same superfile, I get the following error:\\n\\nError: System error: -1: addSubFile: gsptel::gspactions_2012020101's format (utf8n) is different than gsptel::alltime's (csv)\\n\\nIn order to narrow down the problem, I repeated this exercise with 2 logical files on a single node cluster. Here is the sequence:\\n\\n1. Add a logical file to the daily superfile:\\nSubFile1 := '~gsptel::gspactions_2012020100';\\nSEQUENTIAL(\\n Std.File.StartSuperFileTransaction(),\\n Std.File.AddSuperFile(DailyFile,SubFile1),\\n Std.File.FinishSuperFileTransaction(),\\n //OUTPUT('Done adding a logical file to the daily file')\\n\\t);\\n\\n2. Roll over this data into the weekly file:\\nSEQUENTIAL(\\n Std.File.StartSuperFileTransaction(),\\n Std.File.AddSuperFile(WeeklyFile,DailyFile,,TRUE),\\n Std.File.ClearSuperFile(DailyFile),\\n Std.File.FinishSuperFileTransaction(),\\n // OUTPUT('Done rolling over daily file to the weekly file'),\\n // COUNT(WeeklyData(Id <> 0)),\\t\\n // OUTPUT(WeeklyData(Id <> 0))\\n\\t);\\n\\n3. Add weekly file to the all time file.\\nSEQUENTIAL(\\n Std.File.StartSuperFileTransaction(),\\n Std.File.AddSuperFile(AllTimeFile,WeeklyFile,,TRUE),\\n Std.File.ClearSuperFile(WeeklyFile),\\n Std.File.FinishSuperFileTransaction(),\\n // OUTPUT('Done rolling over weekly file to the all time file'),\\n // COUNT(AllTimeData(Id <> 0)),\\n // OUTPUT(AllTimeData(Id <> 0))\\n\\t);\\n\\n4. Output all time data to all time logical file.\\nAllTimeLogicalFile := '~gsptel::alltime';\\nSEQUENTIAL(\\n OUTPUT(AllTimeData,,AllTimeLogicalFile,CSV(SEPARATOR('\\\\t'),TERMINATOR('\\\\t\\\\n')),OVERWRITE),\\n Std.File.StartSuperFileTransaction(),\\n Std.File.ClearSuperFile(AllTimeFile),\\n Std.File.AddSuperFile(AllTimeFile,AllTimeLogicalFile),\\n Std.File.FinishSuperFileTransaction(),\\n OUTPUT('Done adding a logical file to the all time file'),\\n COUNT(AllTimeData(Id <> 0)),\\n OUTPUT(AllTimeData(Id <> 0))\\n\\t);\\n\\nI ran the diff utility on 'alltime' file and 'gspactions_2012020100' file and did not see any difference. \\n\\n5. Repeat this for more data.\\nSubFile2 := '~gsptel::gspactions_2012020101';\\nSEQUENTIAL(\\n Std.File.StartSuperFileTransaction(),\\n Std.File.AddSuperFile(DailyFile,SubFile2),\\n Std.File.FinishSuperFileTransaction(),\\n //OUTPUT('Done adding a second logical file to the daily file')\\n\\t);\\n\\n// Roll over this data into the weekly file.\\nSEQUENTIAL(\\n Std.File.StartSuperFileTransaction(),\\n Std.File.AddSuperFile(WeeklyFile,DailyFile,,TRUE),\\n Std.File.ClearSuperFile(DailyFile),\\n Std.File.FinishSuperFileTransaction(),\\n OUTPUT('Done rolling over daily file for the second logical file to the weekly file'),\\n COUNT(WeeklyData(Id <> 0)),\\n OUTPUT(WeeklyData(Id <> 0))\\n\\t);\\n\\n// Add weekly file to the all time file.\\nSEQUENTIAL(\\n Std.File.StartSuperFileTransaction(),\\n Std.File.AddSuperFile(AllTimeFile,WeeklyFile,,TRUE),\\n Std.File.ClearSuperFile(WeeklyFile),\\n Std.File.FinishSuperFileTransaction(),\\n OUTPUT('Done rolling over weekly file to the all time file'),\\n COUNT(AllTimeData(Id <> 0)),\\n OUTPUT(AllTimeData(Id <> 0))\\n\\t);\\n\\nThis is when I see this error.\\nError: System error: -1: addSubFile: gsptel::gspactions_2012020101's format (utf8n) is different than gsptel::alltime's (csv)\\n\\nSure enough, the EclWatch shows these files as:\\nLogicalName Size Records Modified Owner Cluster Parts\\ngsptel::alltime 15,156,976 77,144 2012-03-14 05:02:46 hpccdemo mythor 1\\ngsptel::gspactions_2012020100 15,156,976 - 2012-03-06 00:35:57 - mythor 1\\ngsptel::gspactions_2012020101 12,289,356 - 2012-03-06 22:49:42 - mythor 1\\n\\nNote that the alltime file has number of records as 77,144.\\n\\nQuestion:\\nHow do I make the new logical file (alltime file) format same as other logical files format (utf8n)?\\n\\nThanks in advance.\\nVinod\", \"post_time\": \"2012-03-14 05:16:03\" },\n\t{ \"post_id\": 1403, \"topic_id\": 317, \"forum_id\": 8, \"post_subject\": \"Re: Eclplus and parameter passing\", \"username\": \"vinod.mamtani\", \"post_text\": \"Thank you Tony. All information provided has been very useful.\\n\\nThanks again!\\nVinod\", \"post_time\": \"2012-03-22 17:36:26\" },\n\t{ \"post_id\": 1400, \"topic_id\": 317, \"forum_id\": 8, \"post_subject\": \"Re: Eclplus and parameter passing\", \"username\": \"anthony.fishbeck@lexisnexis.com\", \"post_text\": \"You can also then publish the ecl as a precompiled thor query:\\n\\n\\necl publish SprayFile2.ecl --name=APublishedQuery --Activate --server=IP --cluster=thor\\n
\\n\\nThen by going to http://IP:8002 and expanding the "thor" queryset and finding the query named APublishedQuery, you can access a form, fill in the parameter values, and click submit,\\n\\nYou can also run the published query (won't need to recompile) using:\\n\\n\\necl run thor APublishedQuery --server=IP --cluster=thor --input="<any><TheInfile>\\n/var/lib/HPCCSystems/mydropzone/share2.tsv</TheInfile><TheSize>8192</TheSize><TheSeparator>\\\\t</TheSeparator><TheTerminator>\\\\t\\\\n</TheTerminator><TheOutfile>~gsptel::share2</TheOutfile></any>"\\n
\\n\\nRegards,\\nTony\", \"post_time\": \"2012-03-20 20:48:58\" },\n\t{ \"post_id\": 1398, \"topic_id\": 317, \"forum_id\": 8, \"post_subject\": \"Re: Eclplus and parameter passing\", \"username\": \"anthony.fishbeck@lexisnexis.com\", \"post_text\": \"I think the easiest way is actually to create SprayFile2.ecl as follows, using the STORED keyword to create input parameters:\\n\\n\\nIMPORT Std;\\nEXPORT SprayFile2 := MODULE\\n\\n EXPORT SprayFileFromDropZoneToThor(Infile,RecordSize,CSVSeparator,CSVTerminator,Outfile) := MACRO\\n Std.File.SprayVariable('xx.xxx.xx.xx',\\n Infile,\\n RecordSize,\\n CSVSeparator,\\n CSVTerminator,\\n ,\\n 'mythor',\\n Outfile,\\n ,,,\\n TRUE,\\n ,);\\n\\n OUTPUT('Done spraying');\\n ENDMACRO;\\n\\nEND;\\n\\nInfile := ''/var/lib/HPCCSystems/mydropzone/share2.tsv'' : STORED('TheFile');\\nRecordSize := 8192 : STORED('TheSize');\\nCSVSeparator := '\\\\t' : STORED('TheSeparator');\\nCSVTerminator := '\\\\t\\\\n' : STORED('TheTerminator');\\nOutfile := '~gsptel::share2' : STORED('TheOutfile');\\n\\nSprayFile2.SprayFileFromDropZoneToThor(Infile,RecordSize,CSVSeparator,CSVTerminator,Outfile);\\n
\\n\\nAnd then run it with eclplus as follows:\\n\\n\\neclplus cluster=thor server=http://xxx.onlive.net ecl=@SprayFile2.ecl /TheFile="/var/lib/HPCCSystems/mydropzone/share2.tsv" /TheSize=8192 /TheSeparator="\\\\t" /TheTerminator="\\\\t\\\\n" /TheOutfile="~gsptel::share2"\\n
\\n\\nor via "ecl run" as\\n\\n\\necl run SprayFile2.ecl --server=xxx.onlive.net --cluster=thor --input="<any><TheInfile>\\n/var/lib/HPCCSystems/mydropzone/share2.tsv</TheInfile><TheSize>8192</TheSize><TheSeparator>\\\\t</TheSeparator><TheTerminator>\\\\t\\\\n</TheTerminator><TheOutfile>~gsptel::share2</TheOutfile></any>"\\n
\", \"post_time\": \"2012-03-20 20:33:18\" },\n\t{ \"post_id\": 1393, \"topic_id\": 317, \"forum_id\": 8, \"post_subject\": \"Re: Eclplus and parameter passing\", \"username\": \"vinod.mamtani\", \"post_text\": \"This works:\\nSprayFile2.ecl:\\nIMPORT Std;\\nEXPORT SprayFile2 := MODULE\\n\\n EXPORT SprayFileFromDropZoneToThor(Infile,RecordSize,CSVSeparator,CSVTerminator,Outfile) := MACRO\\n Std.File.SprayVariable('xx.xxx.xx.xx',\\n Infile,\\n RecordSize,\\n CSVSeparator,\\n CSVTerminator,\\n ,\\n 'mythor',\\n Outfile,\\n ,,,\\n TRUE,\\n ,);\\n\\n OUTPUT('Done spraying');\\n ENDMACRO;\\n\\nEND;\\n\\nInfile := '/var/lib/HPCCSystems/mydropzone/share2.tsv';\\nRecordSize := 8192;\\nCSVSeparator := '\\\\t';\\nCSVTerminator := '\\\\t\\\\n';\\nOutfile := '~gsptel::share2';\\n\\nSprayFile2.SprayFileFromDropZoneToThor(Infile,RecordSize,CSVSeparator,CSVTerminator,Outfile);
\\n\\nInvocation from EclPlus:\\neclplus cluster=thor server=http://xxx.onlive.net ecl=@SprayFile2.ecl\\n
\\nThis doesn’t work\\nIn SprayFile2.ecl, comment out the following lines:\\n//Infile := '/var/lib/HPCCSystems/mydropzone/share2.tsv';\\n//RecordSize := 8192;\\n//CSVSeparator := '\\\\t';\\n//CSVTerminator := '\\\\t\\\\n';\\n//Outfile := '~gsptel::share2';
\\n\\nInvocation from EclPlus:\\neclplus cluster=thor server=http://xxx.onlive.net ecl=$SprayFile2.SprayFileFromDropZoneToThor('/var/lib/HPCCSystems/mydropzone/share2.tsv',8192,'\\\\t','\\\\t\\\\n','~gsptel::share2')
\\n\\nError code:\\nsyntax error near unexpected token `('\", \"post_time\": \"2012-03-20 14:21:36\" },\n\t{ \"post_id\": 1392, \"topic_id\": 317, \"forum_id\": 8, \"post_subject\": \"Re: Eclplus and parameter passing\", \"username\": \"vinod.mamtani\", \"post_text\": \"Yes, I had tried that earlier but got this error then:\\nWorkunit W20120320-140722 submitted\\n<Error><source>eclcc</source><line>1</line><code>3002</code><message> syntax error near "."</message></Error>\\n\\nAllow me to send you the files.\\nThanks\\nVinod\", \"post_time\": \"2012-03-20 14:10:54\" },\n\t{ \"post_id\": 1391, \"topic_id\": 317, \"forum_id\": 8, \"post_subject\": \"Re: Eclplus and parameter passing\", \"username\": \"rtaylor\", \"post_text\": \"Vinod,\\n\\nI have pasted the code in the original post
OK, then looking at this call:eclplus cluster=thor server=IP address of the server ecl=$SprayFile2.SprayFileFromDropZoneToThor{_Infile='/var/lib/HPCCSystems/mydropzone/share2.tsv' _ RecordSize=8192 _CSVSeparator='\\\\t' _CSVTerminator='\\\\t\\\\n' _Outfile='~gsptel::share2'}
the one thing that jumps out at me (once I pasted it into the IDE and increased the font size) is the use of curly braces instead of parentheses and your use of the parameter names. It appears as though you're trying to pass in a RECORD structure instead of the individual parameters, and that's not how your MACRO is designed. Try it this way:eclplus cluster=thor server=IP address of the server ecl=$SprayFile2.SprayFileFromDropZoneToThor('/var/lib/HPCCSystems/mydropzone/share2.tsv',8192,'\\\\t','\\\\t\\\\n','~gsptel::share2')
The point here being that, what appears following the "ecl=" command line parameter should simply be valid ECL code.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-03-20 13:34:43\" },\n\t{ \"post_id\": 1390, \"topic_id\": 317, \"forum_id\": 8, \"post_subject\": \"Re: Eclplus and parameter passing\", \"username\": \"vinod.mamtani\", \"post_text\": \"Thanks Richard. I have pasted the code in the original post and will be happy to send them to you as well. \\n\\nYou are right. I have successfully used DfuPlus. I want to write few macros that take file names as parameters and can be invoked from EclPlus, hence the exercise.\", \"post_time\": \"2012-03-20 13:04:42\" },\n\t{ \"post_id\": 1389, \"topic_id\": 317, \"forum_id\": 8, \"post_subject\": \"Re: Eclplus and parameter passing\", \"username\": \"rtaylor\", \"post_text\": \"Vinod,\\n\\nIt would help if I could see the actual code in your SprayFile.ecl, however, since what you are attempting to do is to "automate" spraying files to Thor, my next question is: Why are you not using DFUplus.exe to do that? DFUplus.exe was designed specifically to "automate" file spray/despray jobs in a command line or batch file mode.\\n\\nRichard\", \"post_time\": \"2012-03-20 12:56:11\" },\n\t{ \"post_id\": 1388, \"topic_id\": 317, \"forum_id\": 8, \"post_subject\": \"Re: Eclplus and parameter passing\", \"username\": \"vinod.mamtani\", \"post_text\": \"Yes I have tried what you suggest.\\n\\n1. Write ECL code to a file called SprayFile.ecl\\n This file defines a macro that takes in few parameters.\\n\\n2. Write an ECL invoker file called SprayFileInvoker.sh\\n This file calls the macro in SprayFile.ecl with parameters\\n\\n3. Call SprayFileInvoker.sh from eclplus\\n This works.\\n\\n4. Call SprayFile.ecl from eclplus\\n This fails. I am trying to understand the correct ways to pass parameters to the macro in SprayFile.ecl\", \"post_time\": \"2012-03-19 22:18:22\" },\n\t{ \"post_id\": 1387, \"topic_id\": 317, \"forum_id\": 8, \"post_subject\": \"Re: Eclplus and parameter passing\", \"username\": \"rtaylor\", \"post_text\": \"Vinod,\\n\\nHave you tried writing the ECL code to run to a file, then calling eclplus and just passing it the file containing the code to run? With as many parameters as you want to pass to the macro, that's the way I would do it.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-03-19 20:21:36\" },\n\t{ \"post_id\": 1385, \"topic_id\": 317, \"forum_id\": 8, \"post_subject\": \"Eclplus and parameter passing\", \"username\": \"vinod.mamtani\", \"post_text\": \"My goal is to get a simple example of parameter passing working with eclplus.\\n\\nHere is my ecl file:\\nIMPORT Std;\\nEXPORT SprayFile2 := MODULE\\n\\n EXPORT SprayFileFromDropZoneToThor(Infile,RecordSize,CSVSeparator,CSVTerminator,Outfile) := MACRO\\n Std.File.SprayVariable('IP address of the machine',\\n Infile,\\n RecordSize,\\n CSVSeparator,\\n CSVTerminator,\\n ,\\n 'mythor',\\n Outfile,\\n ,,,\\n TRUE,\\n ,);\\n\\n OUTPUT('Done spraying');\\n ENDMACRO;\\n\\nEND;
\\n\\nMy question is how do I pass these parameters for macro expansion from eclplus?\\n\\nWhen I add and uncomment the following lines to the same ecl file:\\n\\n//Infile := '/var/lib/HPCCSystems/mydropzone/share2.tsv';\\n//RecordSize := 8192;\\n//CSVSeparator := '\\\\t';\\n//CSVTerminator := '\\\\t\\\\n';\\n//Outfile := '~gsptel::share2';\\n\\n//SprayFile2.SprayFileFromDropZoneToThor(Infile,RecordSize,CSVSeparator,CSVTerminator,Outfile);
\\n\\nthe following eclplus invocation works without any issues:\\n\\neclplus cluster=thor server=IP address of the server ecl=@SprayFile2.ecl
\\n\\nHere is what I have tried for parameter passing with no success:\\n\\neclplus cluster=thor server=IP address of the server ecl=$SprayFile2.SprayFileFromDropZoneToThor{_Infile='/var/lib/HPCCSystems/mydropzone/share2.tsv' _ RecordSize=8192 _CSVSeparator='\\\\t' _CSVTerminator='\\\\t\\\\n' _Outfile='~gsptel::share2'}
\\n\\nThanks in advance for your help.\\nVinod\", \"post_time\": \"2012-03-18 21:07:35\" },\n\t{ \"post_id\": 1411, \"topic_id\": 318, \"forum_id\": 8, \"post_subject\": \"Re: Control over 'separator' used by Str.GetNthWord\", \"username\": \"Allan\", \"post_text\": \"Thanks\\n\\nDavid\", \"post_time\": \"2012-03-27 21:40:42\" },\n\t{ \"post_id\": 1405, \"topic_id\": 318, \"forum_id\": 8, \"post_subject\": \"Re: Control over 'separator' used by Str.GetNthWord\", \"username\": \"dabayliss\", \"post_text\": \"This is an open issue on github:\\n\\nhttps://github.com/hpcc-systems/HPCC-Pl ... ssues/1650\\n\\nThey currently have it pencilled in from 3.8\\n\\nDavid\", \"post_time\": \"2012-03-26 16:05:21\" },\n\t{ \"post_id\": 1404, \"topic_id\": 318, \"forum_id\": 8, \"post_subject\": \"Control over 'separator' used by Str.GetNthWord\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nIt would be nice if one could supply a separator to 'GetNthWord' in the same manner as that used in 'SplitWords'.\\n\\nI have a comma separated list (a normal thing to have) and have had to write a curious workaround. \\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-03-26 10:10:23\" },\n\t{ \"post_id\": 1410, \"topic_id\": 319, \"forum_id\": 8, \"post_subject\": \"Re: while doing output xml file, how to add xml namespace\", \"username\": \"anthony.fishbeck@lexisnexis.com\", \"post_text\": \"I don't think there is currently a way for ECL to do this automatically, rather you have to treat the namespace declaration as an attribute, and populate it with the URI. You then have to treat the namespace prefixes as part of the tag names:\\n\\n\\nrightRec := RECORD\\n string xmlns {xpath('@xmlns:right')};\\n string field1 {xpath('right:field1')};\\n string field2 {xpath('right:field2')};\\n string field3 {xpath('right:field3')};\\nEND;\\nleftRec := RECORD\\n string xmlns {xpath('@xmlns:left')};\\n string field1 {xpath('left:field1')};\\n string field2 {xpath('left:field2')};\\n string field3 {xpath('left:field3')};\\nEND;\\nrowRec := RECORD\\n int id;\\n string name;\\n leftRec lrec {xpath('left:left')};\\n rightRec rrec {xpath('right:right')};\\nEND;\\n
\\n\\nRegards,\\nTony\", \"post_time\": \"2012-03-27 15:10:02\" },\n\t{ \"post_id\": 1407, \"topic_id\": 319, \"forum_id\": 8, \"post_subject\": \"while doing output xml file, how to add xml namespace\", \"username\": \"gopi\", \"post_text\": \"Hi,\\n\\nWhile doing recordset to xml file in output, how to add xml namespace for the fields.\\n\\nFor example,\\n\\n<row>\\n<id></id>\\n<name></name>\\n<left:left xmlns:left=”http://www.abc.com/lit/left”>\\n <left:field1>\\n <left:field2>\\n <left:field3>\\n</left:left>\\n< right: right xmlns: right =”http://www.abc.com/lit/right”>\\n < right:field1>\\n < right:field2>\\n < right:field3>\\n</left:left>\\n</row>\\n\\nlike this output format i need, how to add these concept in output?\", \"post_time\": \"2012-03-27 09:59:17\" },\n\t{ \"post_id\": 1409, \"topic_id\": 320, \"forum_id\": 8, \"post_subject\": \"Re: Pagination for Search Result\", \"username\": \"DSC\", \"post_text\": \"[quote="shriram.soni":3m0rl6ve]We are planning to show the search result into an UI. We have considered to limit number of results to 1000. And at UI side we need to display 50 results at a time and user can navigate it. What is the best strategy to implement it.\\n\\nYour strategy depends entirely on your environment.\\n\\nIf your search results are very small (like, a unique ID and a single string for display) then you can likely just dump all of the results to the client and let it handle the rest without further server interaction.\\n\\nIf your results are larger, you will need to perform the pagination on the server side instead. Include pagination parameters with your search query, such as 'starting offset' and possibly 'page size' if your users can change the number of items to display at one time, then use the CHOOSEN() ECL function to limit the results sent to the client. You may also want to include the offset of the returned results and the total number of items found in the response, so the client knows whether or not to present controls that allow forward- and backward-paging.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-03-27 13:10:43\" },\n\t{ \"post_id\": 1408, \"topic_id\": 320, \"forum_id\": 8, \"post_subject\": \"Pagination for Search Result\", \"username\": \"shriram.soni\", \"post_text\": \"We are planning to show the search result into an UI. We have considered to limit number of results to 1000. And at UI side we need to display 50 results at a time and user can navigate it. What is the best strategy to implement it.\", \"post_time\": \"2012-03-27 12:09:02\" },\n\t{ \"post_id\": 29533, \"topic_id\": 322, \"forum_id\": 8, \"post_subject\": \"Re: Fetch data through SQL.\", \"username\": \"harshdesai\", \"post_text\": \"IMPORT mysql;\\ninteger ExtractIt(String Query) := EMBED(mysql : user('rchapman'),database('test'),server('127.0.0.1'), port('3306'))\\n?\\nENDEMBED;\\n\\nQuery := 'select count(1) from transaction_log' ;\\nExtractIt(Query);\\n\\n\\n\\n\\nEven if there is query ,How to negotiate quote with escape sequence .\\n\\nSELECT * FROM transaction_log WHERE account_number NOT IN ('10000001','10000024') AND batch_job_id IS NOT NULL AND SUBSTR(date_added, 1, 10) = SUBDATE(CURDATE(),1) ORDER BY date_added DESC;\", \"post_time\": \"2020-02-14 04:19:38\" },\n\t{ \"post_id\": 28513, \"topic_id\": 322, \"forum_id\": 8, \"post_subject\": \"Re: Fetch data through SQL.\", \"username\": \"harshdesai\", \"post_text\": \"Hi Team ,\\nIs there a limit to fetch data at one go or we can fetch all the data(hunderd thousands of data) at once\\n\\nRegards\\nHarsh Desai\", \"post_time\": \"2019-12-17 09:38:50\" },\n\t{ \"post_id\": 28393, \"topic_id\": 322, \"forum_id\": 8, \"post_subject\": \"Re: Fetch data through SQL.\", \"username\": \"harshdesai\", \"post_text\": \"Hi Team,\\nCan you please suggest how can i pass query runtime \\nTrying to pass query runtime to extract as this mainly for count respectives.\\n\\nIMPORT mysql;\\ninteger ExtractIt(String Query) := EMBED(mysql : user('rchapman'),database('test'),server('127.0.0.1'), port('3306'))\\n ?\\n ENDEMBED;\\n\\nQuery := 'select count(1) from transaction_log' ;\\nExtractIt(Query);\\n\\n\\n\\n\\nEven if there is query ,How to negotiate quote with escape sequence .\\n\\nSELECT * FROM transaction_log WHERE account_number NOT IN ('10000001','10000024') AND batch_job_id IS NOT NULL AND SUBSTR(date_added, 1, 10) = SUBDATE(CURDATE(),1) ORDER BY date_added DESC;\", \"post_time\": \"2019-12-11 11:27:02\" },\n\t{ \"post_id\": 1469, \"topic_id\": 322, \"forum_id\": 8, \"post_subject\": \"Re: Fetch data through SQL.\", \"username\": \"flavio\", \"post_text\": \"Arti,\\n\\nAlthough I'm not sure if the rfsserver for MySQL gets built by default in our current version, we could certainly add it to our build scripts in future versions, if this is useful to people.\\n\\nIn any case, the sources are at: https://github.com/hpcc-systems/HPCC-Platform/tree/master/dali/rfs/rfsmysql and there is even a makefile, so building it would be as simple as running "make" in the directory that you download it to.\\n\\nPlease let me know if you need any help there.\\n\\nThanks,\\n\\nFlavio\", \"post_time\": \"2012-04-10 12:10:27\" },\n\t{ \"post_id\": 1462, \"topic_id\": 322, \"forum_id\": 8, \"post_subject\": \"Re: Fetch data through SQL.\", \"username\": \"Arti\", \"post_text\": \"Hi,\\n\\n we are trying to understand the connection management between any RDBMS and HPCC. If its not possible with MSSQL we can try with MySql. \\n\\nHere we are not looking for any bulk load of Data for now, just trying with a database which has only 1 table with less than 10 records, so can you please guide us regarding this approach.\\nThanks\", \"post_time\": \"2012-04-10 05:51:44\" },\n\t{ \"post_id\": 1454, \"topic_id\": 322, \"forum_id\": 8, \"post_subject\": \"Re: Fetch data through SQL.\", \"username\": \"flavio\", \"post_text\": \"Arti,\\n\\nthe data access from/to RDBMS using rfs requires an rfsserver running on the database server. As far as I know, the only rfsserver that exists for now is the one for MySQL (not Ms SQL).\\n\\nThis is not a recommended way of accessing data residing on an RDBMS system, as bulk dump/load operations tend to perform significantly better; although there are occassions where access to a small number of records without the intermediate step of the dump/load could justify using it.\\n\\nPlease let me know.\\n\\nThanks,\\n\\nFlavio\", \"post_time\": \"2012-04-09 18:02:59\" },\n\t{ \"post_id\": 1452, \"topic_id\": 322, \"forum_id\": 8, \"post_subject\": \"Re: Fetch data through SQL.\", \"username\": \"Arti\", \"post_text\": \"Hi,\\nIs there any update on this issue.\", \"post_time\": \"2012-04-09 13:54:52\" },\n\t{ \"post_id\": 1417, \"topic_id\": 322, \"forum_id\": 8, \"post_subject\": \"Re: Fetch data through SQL.\", \"username\": \"bforeman\", \"post_text\": \"The development team is currently investigating this issue.\", \"post_time\": \"2012-04-03 12:46:45\" },\n\t{ \"post_id\": 1415, \"topic_id\": 322, \"forum_id\": 8, \"post_subject\": \"Fetch data through SQL.\", \"username\": \"Arti\", \"post_text\": \"Hi,\\nI am trying to fetch data from a table stored in SQL Server 2008 databse and to save it as CSV in HPCC??\\nI'm trying through the following code:\\n\\n\\nIMPORT Std;\\nrfsserver := '10.173.207.1:1433';\\nrec := RECORD,MAXLENGTH(8192)\\nSTRING mydata;\\nEND;\\nOUTPUT(DATASET(STD.File.EncodeRfsQuery( rfsserver,\\n'SELECT * FROM [TextAnalysis_HPCC].[dbo].[Test_text]'),rec,CSV(MAXLENGTH(8192))));\\n
\\n\\nError: System error: 1: socket not opened\\nTarget: C!10.173.207.1, Raised in: /var/jenkins/workspace/Candidate-3.4.2/HPCC-Platform/system/jlib/jsocket.cpp, line 1681\", \"post_time\": \"2012-03-30 11:32:18\" },\n\t{ \"post_id\": 1559, \"topic_id\": 323, \"forum_id\": 8, \"post_subject\": \"Re: Problem reading file in BLOB\", \"username\": \"bforeman\", \"post_text\": \"Driving in to work this morning, I was thinking about your code again. If all you need to do is to convert a bitmap into a text string, why not just use ASSTRING instead of calling an external program?\\n\\nSELF.textdata := ASSSTRING(l.text[5..]);\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-04-24 12:30:57\" },\n\t{ \"post_id\": 1557, \"topic_id\": 323, \"forum_id\": 8, \"post_subject\": \"Re: Problem reading file in BLOB\", \"username\": \"bforeman\", \"post_text\": \"The only thing I see is that you do not need to define a "tikaout", as the input looks to be the same as the input.\\n\\ntxtDS := PIPE(binDS, 'java -jar tika-app-1.0.jar -t',,CSV);\\n\\nIs the input data format for TIKA CSV?\", \"post_time\": \"2012-04-23 18:12:37\" },\n\t{ \"post_id\": 1546, \"topic_id\": 323, \"forum_id\": 8, \"post_subject\": \"Re: Problem reading file in BLOB\", \"username\": \"Apurv.Khare\", \"post_text\": \"Hi, \\n\\nI have tried the Path(/var/lib/HPCCSystems/mythor/) as suggested bu BOB, so i'm not getting the error of Watchdog now,but now i'm trying to read file through command line DFU and trying to read that logical file in the DATASET.\\nAfter that i want to give only the binary data to PIPE command so that we can invoke TIKA through PIPE command.\\n\\nBut when i try this following Code it runs but still my Binary DATA is not getting converted in text through TIKA.\\n\\nSo i would like to confirm few things that if i'm on right path:\\n\\n1. Am i extracting the binary data correctly after removing the first 4 bytes of size.\\n2. In PIPE command second format is \\n PIPE( recordset, command [, recorddef ] [, REPEAT] [, CSV | XML ] [, OUTPUT( CSV | XML ) ] [, GROUP] )\\nso what will be recorddef of the recordset which contains only the Binary DATA after removing first 4 bytes of size \\n\\nThis is the code i'm using for this scenario:-\\ntextRec := RECORD\\nSTRING filename;\\nDATA text; //first 4 bytes contain the length of the image data\\nEND;\\n\\nreadSprayedFixed := DATASET('~test::pipe::dfu',textRec,flat); //test::pipe::dfu\\nreadSprayedFixed;\\n\\noutrec := RECORD\\nSTRING filename;\\nDATA size;\\nDATA bindata; //first 4 bytes contain the length of the image data\\ndata textdata;\\nEND;\\n\\ntikain := record\\n data bindata;\\nend;\\n\\ntikaout := record\\n data textdata;\\nend;\\n\\n\\n//Do this follwoing steps in project\\noutrec xform(readSprayedFixed l) := transform\\n\\tself.filename := l.filename;\\n\\tself.size := l.text[1..4];\\n\\tself.bindata := l.text[5..];\\n\\n\\tbinDS := DATASET([{l.text[5..]}], tikain);\\n\\n\\ttxtDS := PIPE(binDS, 'java -jar tika-app-1.0.jar -t', tikaout,CSV);\\n\\t//txtDS := PIPE(binDS, 'cat', tikaout);\\n\\n\\tself.textdata := txtDS[1].textdata;\\nend;\\n\\noutds := project(readSprayedFixed,xform(left));\\n\\ntikaoutds := project(outds,tikaout);\\n\\ncount(tikaoutds);\\n\\noutput(tikaoutds,,'~pipe::tryouts::test::tikaout',OVERWRITE);
\", \"post_time\": \"2012-04-21 12:42:06\" },\n\t{ \"post_id\": 1468, \"topic_id\": 323, \"forum_id\": 8, \"post_subject\": \"Re: Problem reading file in BLOB\", \"username\": \"bforeman\", \"post_text\": \"Development just commented that programs that you run from PIPE should use (be in) the default directory that he mentioned. You can override it if needed by setting 'externalProgDir' environment variable. Verify that your java program is in that default folder. You might also try moving the TIKA program (tika-app-1.0.jar -t)to that default folder.\\n\\nBest regards,\\n\\nBob\", \"post_time\": \"2012-04-10 12:08:19\" },\n\t{ \"post_id\": 1463, \"topic_id\": 323, \"forum_id\": 8, \"post_subject\": \"Re: Problem reading file in BLOB\", \"username\": \"Apurv.Khare\", \"post_text\": \"HI,\\nWe have tried through SEQUENTIAL as suggested by Richard but its giving the same error.\\n\\nI am not able to understand the solution given by Bob.\\nSo Bob can you please elaborate the solution you have suggested about the problem we are facing.\", \"post_time\": \"2012-04-10 06:03:08\" },\n\t{ \"post_id\": 1443, \"topic_id\": 323, \"forum_id\": 8, \"post_subject\": \"Re: Problem reading file in BLOB\", \"username\": \"bforeman\", \"post_text\": \"Apurv,\\n\\nOur developer added this comment:\\n\\nI believe the default would be the thor instance directory, e.g. \\n/var/lib/HPCCSystems/mythor/\\nThis can be overridden by setting 'externalProgDir' for the thor cluster in the environment.\\n\\nHTH,\\n\\nBob\", \"post_time\": \"2012-04-05 12:05:11\" },\n\t{ \"post_id\": 1425, \"topic_id\": 323, \"forum_id\": 8, \"post_subject\": \"Re: Problem reading file in BLOB\", \"username\": \"rtaylor\", \"post_text\": \"Apurv,\\n\\nThere may be a timing issue here, too. Remember that ECL code is NOT "executable code" but simply definitions of what you want -- how it all gets done is the compiler's job, and the compiler usually thinks it can do everything at once, in parallel. So, try adding SEQUENTIAL, like this:\\nIMPORT std;\\n\\ntextRecord := RECORD\\n STRING filename;\\n DATA text;\\nEND;\\n\\nspray := std.File.SprayFixed('172.20.15.168',\\n '/var/lib/HPCCSystems/dropzone/ECLProgrammersGuide.pdf',\\n 564168,\\n 'mythor',\\n '~fixed::PDF::read1',,\\n 'http://172.20.15.168:8010/FileSpray',,true,,); \\n\\n //reading binary data in blob(here pdf) \\nreadSprayedFixed := DATASET('~fixed::PDF::read1',textRecord,FLAT); \\n\\nt := PIPE(readSprayedFixed,\\n 'java -jar /home/hpcc/lz_data/tika-app-1.0.jar -t');\\n\\npipeoutput := OUTPUT(t);\\n\\nSEQUENTIAL(spray, pipeoutput);
HTH,\\n\\nRichard\", \"post_time\": \"2012-04-04 14:02:20\" },\n\t{ \"post_id\": 1423, \"topic_id\": 323, \"forum_id\": 8, \"post_subject\": \"Re: Problem reading file in BLOB\", \"username\": \"Apurv.Khare\", \"post_text\": \"Hi i have tried to spary the file and again desparyed it with the same code and parameters,i am able to save it and can open it easily. So i think the problem is giving the logical file through PIPE to TIKA.\\nCan you tell us how to invoke TIKA(or any other external command) through PIPE \", \"post_time\": \"2012-04-04 05:21:17\" },\n\t{ \"post_id\": 1420, \"topic_id\": 323, \"forum_id\": 8, \"post_subject\": \"Re: Problem reading file in BLOB\", \"username\": \"bforeman\", \"post_text\": \"Can you spray the target file using the ECL Watch or DFUPlus tools?\\n\\nIf you can, perhaps something is wrong with your spray parameters.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-04-03 18:07:31\" },\n\t{ \"post_id\": 1416, \"topic_id\": 323, \"forum_id\": 8, \"post_subject\": \"Problem reading file in BLOB\", \"username\": \"Apurv.Khare\", \"post_text\": \"Hi i am trying to read a file in BLOB (a pdf file).\\n\\nthen using 'PIPE' gave this input to TIKA to extract text from pdf file and display it using OUTPUT.\\n\\nI'm trying with the following code:\\n
\\nIMPORT std;\\n\\ntextRecord := RECORD\\nSTRING filename;\\nDATA text;\\nEND;\\n\\nstd.File.SprayFixed('172.20.15.168','/var/lib/HPCCSystems/dropzone/ECLProgrammersGuide.pdf',564168,\\n 'mythor','~fixed::PDF::read1',,'http://172.20.15.168:8010/FileSpray',,true,,); \\n\\t//reading binary data in blob(here pdf) \\nreadSprayedFixed := DATASET('~fixed::PDF::read1',textRecord,FLAT);\\t\\t\\t\\t\\t\\t \\nt := PIPE(readSprayedFixed,'java -jar /home/hpcc/lz_data/tika-app-1.0.jar -t');\\nOUTPUT(t);\\n
\\n\\nI am getting the following error \\nError: System error: 10056: Watchdog has lost contact with Thor slave: 172.20.14.10:6600 (Process terminated or node down?)\", \"post_time\": \"2012-04-02 10:31:40\" },\n\t{ \"post_id\": 1471, \"topic_id\": 324, \"forum_id\": 8, \"post_subject\": \"Re: How to Upgrade the HPCC platform.\", \"username\": \"sort\", \"post_text\": \"the version file can be found in the following directory:\\n/etc/HPCCSystems\", \"post_time\": \"2012-04-10 13:07:27\" },\n\t{ \"post_id\": 1466, \"topic_id\": 324, \"forum_id\": 8, \"post_subject\": \"Re: How to Upgrade the HPCC platform.\", \"username\": \"chhaya\", \"post_text\": \"Hi i have tried the Second approach and it worked fine for me. \\nThanks \\n\\nBut i would also like to know the steps for carrying out the first approach, as i'm not able to find the version file.\", \"post_time\": \"2012-04-10 10:05:41\" },\n\t{ \"post_id\": 1444, \"topic_id\": 324, \"forum_id\": 8, \"post_subject\": \"Re: How to Upgrade the HPCC platform.\", \"username\": \"pschwartz\", \"post_text\": \"chhaya,\\n\\nI now understand the problem that happened with your upgrade. There is currently a bug in the generation of the contents of our version file (/etc/HPCCSystems/version). The status of this issue can be followed in our issue tracker on github.\\n\\nhttps://github.com/hpcc-systems/HPCC-Pl ... ssues/1154\\n\\nThis is causing 3.6.0 to be seen as a the installed version when it is not, which in turn is preventing the upgrade. \\n\\nYou currently have 2 options to complete the upgrade.\\n\\nOption 1:\\n1. Connect to each server and modify the version file to contain the following:\\n community_3.4.2\\n2. Rerun the previous install-cluster.sh command to now install the package correctly.\\n\\nOption 2:\\n1. Copy the 3.6.0 package to each server.\\n2. Run the `sudo dpkg -i <3.6.0 package>` to upgrade.\", \"post_time\": \"2012-04-05 12:47:05\" },\n\t{ \"post_id\": 1441, \"topic_id\": 324, \"forum_id\": 8, \"post_subject\": \"Re: How to Upgrade the HPCC platform.\", \"username\": \"chhaya\", \"post_text\": \"Hi,\\nwe tried the command `dpkg -l | grep hpccsystems` it shows the following result,\\nbut before this we tried to upgrade the newer version of 3.6\\n\\nii hpccsystems-clienttools 3.2.1 hpccsystems-clienttools built using CMake\\nii hpccsystems-documentation 3.4.1 hpccsystems-documentation built using CMake\\nii hpccsystems-graphcontrol 3.4.1 hpccsystems-graphcontrol built using CMake\\nii hpccsystems-platform 3.4.2.1 hpccsystems-platform built using CMake\\n\\nplease can you guide what we are missing in following the Upgradation.\", \"post_time\": \"2012-04-05 05:44:14\" },\n\t{ \"post_id\": 1435, \"topic_id\": 324, \"forum_id\": 8, \"post_subject\": \"Re: How to Upgrade the HPCC platform.\", \"username\": \"pschwartz\", \"post_text\": \"chhaya,\\n\\nCan you please start by running `dpkg -l | grep hpccsystems` on the 2 servers to determine what version is currently installed.\\n\\n- Philip\", \"post_time\": \"2012-04-04 18:12:27\" },\n\t{ \"post_id\": 1432, \"topic_id\": 324, \"forum_id\": 8, \"post_subject\": \"Re: How to Upgrade the HPCC platform.\", \"username\": \"rtaylor\", \"post_text\": \"
Ya sure we did that, just want to ask that the command which is mentioned in the Installation guide upgrades the version also.
It was a valid question (I've certainly made plenty of "bonehead" mistakes in my career ), and I apologize if you were offended by it. The answer is yes, running this command should install the package, which should upgrade your version (if that's what the specified package file does). If it did not, then one of our ops folks need to chime in with the possibilities for why it might not have.\", \"post_time\": \"2012-04-04 15:23:55\" },\n\t{ \"post_id\": 1431, \"topic_id\": 324, \"forum_id\": 8, \"post_subject\": \"Re: How to Upgrade the HPCC platform.\", \"username\": \"chhaya\", \"post_text\": \"Ya sure we did that, just want to ask that the command which is mentioned in the Installation guide upgrades the version also.\", \"post_time\": \"2012-04-04 14:40:08\" },\n\t{ \"post_id\": 1428, \"topic_id\": 324, \"forum_id\": 8, \"post_subject\": \"Re: How to Upgrade the HPCC platform.\", \"username\": \"rtaylor\", \"post_text\": \"
sudo /opt/HPCCSystems/sbin/install-cluster.sh -k <package-file-name> \\n
Is that literally what you typed in, or did you replace "<package-file-name>" with the name of your package file?\", \"post_time\": \"2012-04-04 14:12:50\" },\n\t{ \"post_id\": 1424, \"topic_id\": 324, \"forum_id\": 8, \"post_subject\": \"How to Upgrade the HPCC platform.\", \"username\": \"chhaya\", \"post_text\": \"Hi, \\nRight now we are using the Community Edition for HPCC Platform Ubuntu 10.04 LTS (version 3.4.2-1).\\n\\nAs the new version 3.6 has been released we want to upgrade our HPCC platform, so we tried to use this command given below as per Installation Guide on console.\\nIt says its done but we are not able to see the new version in the ECL Watch.\\n\\n the command we are using is for two node cluster:\\n\\nsudo /opt/HPCCSystems/sbin/install-cluster.sh -k <package-file-name> \\n\\nIs this the right approach or guide us through this problem..\", \"post_time\": \"2012-04-04 05:36:35\" },\n\t{ \"post_id\": 1430, \"topic_id\": 325, \"forum_id\": 8, \"post_subject\": \"Re: Subfile replacement\", \"username\": \"DSC\", \"post_text\": \"Understood. Thanks for the pointer!\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-04-04 14:33:36\" },\n\t{ \"post_id\": 1429, \"topic_id\": 325, \"forum_id\": 8, \"post_subject\": \"Re: Subfile replacement\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nIs it permissible, or even a good idea, to overwrite the contents of a superfile's (or superkey's) subfile? Or should we always go through the a removal/addition (or swap) process on the superfile?
One of our primary dictums around here is "never throw anything away" so I would suggest that you might want to consider never overwriting a production dataset/index, but instead writing a new file and just changing the subfile list the superfile uses. That way, you're just changing metadata in the DFU and going back to the previous version is easy.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-04-04 14:21:55\" },\n\t{ \"post_id\": 1426, \"topic_id\": 325, \"forum_id\": 8, \"post_subject\": \"Subfile replacement\", \"username\": \"DSC\", \"post_text\": \"Is it permissible, or even a good idea, to overwrite the contents of a superfile's (or superkey's) subfile? Or should we always go through the a removal/addition (or swap) process on the superfile?\\n\\nI'm asking because I'm running into problems updating the single subfile (index) within a superkey for use in Roxie.\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-04-04 14:06:15\" },\n\t{ \"post_id\": 2796, \"topic_id\": 326, \"forum_id\": 8, \"post_subject\": \"Re: Updating/replacing index used by Roxie\", \"username\": \"g2pis\", \"post_text\": \"--\", \"post_time\": \"2012-11-16 07:12:26\" },\n\t{ \"post_id\": 2181, \"topic_id\": 326, \"forum_id\": 8, \"post_subject\": \"Re: Updating/replacing index used by Roxie\", \"username\": \"HPCC Staff\", \"post_text\": \"The documentation is currently in progress for inclusion in an upcoming release. In the interim, we will reach out to you directly.\\n\\nThank you!\", \"post_time\": \"2012-08-08 13:54:39\" },\n\t{ \"post_id\": 2158, \"topic_id\": 326, \"forum_id\": 8, \"post_subject\": \"Re: Updating/replacing index used by Roxie\", \"username\": \"DSC\", \"post_text\": \"[quote="JimD":21hdggtd]Dan,\\n\\nSorry this did not make the 3.8 release. Our focus shifted toward Thor for 3.8 and will shift back to Roxie for 3.10.\\n\\nIn the meantime, I will provide some of the basic information next week. \\n\\nThanks for your patience. I apologize for the delay.\\n\\nHi Jim,\\n\\nHas there been any progress on this? I'd really like to check this feature out, but I cannot deduce its usage from the command line options or the few references in the documentation.\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-08-06 11:32:43\" },\n\t{ \"post_id\": 1959, \"topic_id\": 326, \"forum_id\": 8, \"post_subject\": \"Re: Updating/replacing index used by Roxie\", \"username\": \"JimD\", \"post_text\": \"Dan,\\n\\nSorry this did not make the 3.8 release. Our focus shifted toward Thor for 3.8 and will shift back to Roxie for 3.10.\\n\\nIn the meantime, I will provide some of the basic information next week. \\n\\nThanks for your patience. I apologize for the delay.\", \"post_time\": \"2012-07-12 18:42:32\" },\n\t{ \"post_id\": 1954, \"topic_id\": 326, \"forum_id\": 8, \"post_subject\": \"Re: Updating/replacing index used by Roxie\", \"username\": \"DSC\", \"post_text\": \"I just re-downloaded the documentation (3.8.0) to look for the Roxie Packages stuff and, sadly, it seems that that portion of the document has not been updated. Where can I find documentation on implementing and using Packages?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-07-12 17:44:39\" },\n\t{ \"post_id\": 1553, \"topic_id\": 326, \"forum_id\": 8, \"post_subject\": \"Re: Updating/replacing index used by Roxie\", \"username\": \"DSC\", \"post_text\": \"Excellent. Thanks for the pointer.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-04-23 14:30:46\" },\n\t{ \"post_id\": 1552, \"topic_id\": 326, \"forum_id\": 8, \"post_subject\": \"Re: Updating/replacing index used by Roxie\", \"username\": \"JimD\", \"post_text\": \"There are details on how to use the ECL command line with packages in the Client Tools manual (starting on page 61).\\n\\nThe section on packages in the Rapid Data Delivery Engine Reference (Roxie) will be updated in the next release.\\n\\nJim\", \"post_time\": \"2012-04-23 14:16:11\" },\n\t{ \"post_id\": 1549, \"topic_id\": 326, \"forum_id\": 8, \"post_subject\": \"Re: Updating/replacing index used by Roxie\", \"username\": \"DSC\", \"post_text\": \"Version 3.6.2 CE is out, but it appears that the documentation concerning packages in Roxie has not been updated. Is that documentation located somewhere else?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-04-23 13:03:11\" },\n\t{ \"post_id\": 1472, \"topic_id\": 326, \"forum_id\": 8, \"post_subject\": \"Re: Updating/replacing index used by Roxie\", \"username\": \"DSC\", \"post_text\": \"I just downloaded the documentation associated with 3.6.2rc4 and cannot find anything on the Packages feature. Where is it/will it be documented?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-04-10 14:05:15\" },\n\t{ \"post_id\": 1439, \"topic_id\": 326, \"forum_id\": 8, \"post_subject\": \"Re: Updating/replacing index used by Roxie\", \"username\": \"DSC\", \"post_text\": \"Good to hear. I'm looking forward that feature!\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-04-04 19:22:14\" },\n\t{ \"post_id\": 1438, \"topic_id\": 326, \"forum_id\": 8, \"post_subject\": \"Re: Updating/replacing index used by Roxie\", \"username\": \"sort\", \"post_text\": \"You are correct. I forgot in 3.6.0 it is an enterprise feature. In 3.6.2 it has been moved to the community release. \\n\\nWe are in the process of updating the portal with a beta version of 3.6.2. It should be fully available by the end of the day\", \"post_time\": \"2012-04-04 18:56:52\" },\n\t{ \"post_id\": 1436, \"topic_id\": 326, \"forum_id\": 8, \"post_subject\": \"Re: Updating/replacing index used by Roxie\", \"username\": \"DSC\", \"post_text\": \"Aren't packages available only in the enterprise version?\\n\\nIs there any documentation on packages other than the single page within http://cdn.hpccsystems.com/install/docs/3_6_0_1_CE/RDDERef.pdf? That page just describes the feature, states that it's supported in the Enterprise Edition, then concludes with "More to follow."\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-04-04 18:18:26\" },\n\t{ \"post_id\": 1434, \"topic_id\": 326, \"forum_id\": 8, \"post_subject\": \"Re: Updating/replacing index used by Roxie\", \"username\": \"sort\", \"post_text\": \"You are correct, roxie queries hold on to a reference to all files used when the queries get loaded. In the scenario you described, if you change the contents of a superkey in dali, in order for roxie to know about it, you will need to either:\\n. stop / start (or restart) the roxie cluster\\n. use package files (a feature introduced in 3.6) to tell the roxie cluster what subfiles to use when loading a query. (feature ecl pacakge add ...)\", \"post_time\": \"2012-04-04 18:09:46\" },\n\t{ \"post_id\": 1433, \"topic_id\": 326, \"forum_id\": 8, \"post_subject\": \"Updating/replacing index used by Roxie\", \"username\": \"DSC\", \"post_text\": \"I'm missing something regarding data updates and queries deployed to Roxie.\\n\\nI can create and initial data set and the associated index file, both as subfiles within a superfile, without a problem. I can deploy a Roxie query that references only the index superfile (superkey) without a problem.\\n\\nThe problem comes with adding data to the mess. The superfile containing the raw data is no problem, but the index is. It appears that the deployed Roxie query is hanging on to a reference to the superkey's subfile. The superfile transaction that replaces that superkey's subfile hangs until the Roxie query is removed from the query set (or the work unit is deleted, I'm not sure which).\\n\\nWhat is the recommended practice for updating an index that is currently in use by Roxie? I've read Programmer's Guide numerous times, but I've failed enlightenment.\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-04-04 16:01:32\" },\n\t{ \"post_id\": 1523, \"topic_id\": 337, \"forum_id\": 8, \"post_subject\": \"Re: default scope name for a cluster?\", \"username\": \"jeremy\", \"post_text\": \"I see, so default scope name = cluster name, which can be changed via configmgr.\\nThanks much!\\nJeremy\", \"post_time\": \"2012-04-13 15:22:25\" },\n\t{ \"post_id\": 1522, \"topic_id\": 337, \"forum_id\": 8, \"post_subject\": \"Re: default scope name for a cluster?\", \"username\": \"rtaylor\", \"post_text\": \"Basically, that's what it defaults to, but you can change it to anything you want it to be using configmgr.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-04-13 15:14:55\" },\n\t{ \"post_id\": 1519, \"topic_id\": 337, \"forum_id\": 8, \"post_subject\": \"Re: default scope name for a cluster?\", \"username\": \"jeremy\", \"post_text\": \"so the default scope name is just the name of the cluster?\", \"post_time\": \"2012-04-13 14:57:02\" },\n\t{ \"post_id\": 1518, \"topic_id\": 337, \"forum_id\": 8, \"post_subject\": \"Re: default scope name for a cluster?\", \"username\": \"bforeman\", \"post_text\": \"Hi Jeremy,\\n\\nOpen up the ECL Watch and look at Topology and Target Clusters. The default folders for Thor and Roxie and more are listed there.\\n\\nHTH,\\n\\nBob\", \"post_time\": \"2012-04-13 14:19:49\" },\n\t{ \"post_id\": 1509, \"topic_id\": 337, \"forum_id\": 8, \"post_subject\": \"default scope name for a cluster?\", \"username\": \"jeremy\", \"post_text\": \"According to the Scope & Logical Filenames section of the ECL language reference, there is a notion of a default scope name for a cluster, but I can't seem to find where it is defined?\", \"post_time\": \"2012-04-12 21:11:48\" },\n\t{ \"post_id\": 1583, \"topic_id\": 341, \"forum_id\": 8, \"post_subject\": \"Re: Error scanning superfile\", \"username\": \"DSC\", \"post_text\": \"Thanks for the follow-up. I'll watch the Git issue, with interest.\\n\\nCheers!\\n\\nDan\", \"post_time\": \"2012-05-02 19:21:36\" },\n\t{ \"post_id\": 1582, \"topic_id\": 341, \"forum_id\": 8, \"post_subject\": \"Re: Error scanning superfile\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nThis is now know to be a issue, reported in GIT HPCC-platform issue #2213, and the workaround is to explicitly define the CSV QUOTE option as blank for the superfile, like this: SF := DATASET('SFname',recstruct,CSV(QUOTE('')));
\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-05-02 17:49:28\" },\n\t{ \"post_id\": 1558, \"topic_id\": 341, \"forum_id\": 8, \"post_subject\": \"Re: Error scanning superfile\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nOK, I have been unable to duplicate the problem, so here's my code -- maybe you can tweak it to create the problem.IMPORT STD;\\nCompany := MODULE\\n \\n //==========================================================================\\n // Module constants\\n //==========================================================================\\n EXPORT kRawDataFilePath := '~RTTEST::company';\\n \\n //==========================================================================\\n // Record Definitions\\n //==========================================================================\\n \\n //--------------------------------------------------------------------------\\n // Oracle Table\\n //\\n // All fields are defined as STRING in order to help NULL value testing.\\n //--------------------------------------------------------------------------\\n EXPORT OracleTableLayout := RECORD\\n STRING field01 := '';\\n STRING field02 := '';\\n STRING field03 := '';\\n STRING field04 := '';\\n STRING field05 := '';\\n STRING field06 := '';\\n STRING field07 := '';\\n STRING field08 := '';\\n STRING field09 := '';\\n STRING field10 := '';\\n STRING field11 := '';\\n STRING field12 := '';\\n STRING field13 := '';\\n STRING field14 := '';\\n STRING field15 := '';\\n STRING field16 := '';\\n STRING field17 := '';\\n STRING field18 := '';\\n STRING field19 := '';\\n STRING field20 := '';\\n STRING field21 := '';\\n STRING field22 := '';\\n STRING field23 := '';\\n STRING field24 := '';\\n STRING field25 := '';\\n STRING field26 := '';\\n STRING field27 := '';\\n STRING field28 := '';\\n STRING field29 := '';\\n STRING field30 := '';\\n STRING field31 := '';\\n STRING field32 := '';\\n STRING field33 := '';\\n STRING field34 := '';\\n STRING field35 := '';\\n STRING field36 := '';\\n STRING field37 := '';\\n STRING field38 := '';\\n STRING field39 := '';\\n STRING field40 := '';\\n STRING field41 := '';\\n STRING field42 := '';\\n STRING field43 := '';\\n STRING field44 := '';\\n STRING field45 := '';\\n STRING field46 := '';\\n STRING field47 := '';\\n STRING field48 := '';\\n STRING field49 := '';\\n STRING field50 := '';\\n STRING field51 := '';\\n STRING field52 := '';\\n END;// \\n \\n //==========================================================================\\n // Dataset definitions\\n //==========================================================================\\n \\n EXPORT rawDataDS := DATASET(kRawDataFilePath,OracleTableLayout,CSV(SEPARATOR('\\\\t')));\\n \\n //==========================================================================\\n // Utilities\\n //==========================================================================\\n \\n //--------------------------------------------------------------------------\\n // Determine fill rate of raw, incoming data\\n //--------------------------------------------------------------------------\\n EXPORT ComputeFillRates(DATASET(OracleTableLayout) rs) := FUNCTION\\n InterimFillLayout := RECORD\\n INTEGER4 field01 := COUNT(GROUP,rs.field01 != '');\\n INTEGER4 field02 := COUNT(GROUP,rs.field02 != '');\\n INTEGER4 field03 := COUNT(GROUP,rs.field03 != '');\\n INTEGER4 field04 := COUNT(GROUP,rs.field04 != '');\\n INTEGER4 field05 := COUNT(GROUP,rs.field05 != '');\\n INTEGER4 field06 := COUNT(GROUP,rs.field06 != '');\\n INTEGER4 field07 := COUNT(GROUP,rs.field07 != '');\\n INTEGER4 field08 := COUNT(GROUP,rs.field08 != '');\\n INTEGER4 field09 := COUNT(GROUP,rs.field09 != '');\\n INTEGER4 field10 := COUNT(GROUP,rs.field10 != '');\\n INTEGER4 field11 := COUNT(GROUP,rs.field11 != '');\\n INTEGER4 field12 := COUNT(GROUP,rs.field12 != '');\\n INTEGER4 field13 := COUNT(GROUP,rs.field13 != '');\\n INTEGER4 field14 := COUNT(GROUP,rs.field14 != '');\\n INTEGER4 field15 := COUNT(GROUP,rs.field15 != '');\\n INTEGER4 field16 := COUNT(GROUP,rs.field16 != '');\\n INTEGER4 field17 := COUNT(GROUP,rs.field17 != '');\\n INTEGER4 field18 := COUNT(GROUP,rs.field18 != '');\\n INTEGER4 field19 := COUNT(GROUP,rs.field19 != '');\\n INTEGER4 field20 := COUNT(GROUP,rs.field20 != '');\\n INTEGER4 field21 := COUNT(GROUP,rs.field21 != '');\\n INTEGER4 field22 := COUNT(GROUP,rs.field22 != '');\\n INTEGER4 field23 := COUNT(GROUP,rs.field23 != '');\\n INTEGER4 field24 := COUNT(GROUP,rs.field24 != '');\\n INTEGER4 field25 := COUNT(GROUP,rs.field25 != '');\\n INTEGER4 field26 := COUNT(GROUP,rs.field26 != '');\\n INTEGER4 field27 := COUNT(GROUP,rs.field27 != '');\\n INTEGER4 field28 := COUNT(GROUP,rs.field28 != '');\\n INTEGER4 field29 := COUNT(GROUP,rs.field29 != '');\\n INTEGER4 field30 := COUNT(GROUP,rs.field30 != '');\\n INTEGER4 field31 := COUNT(GROUP,rs.field31 != '');\\n INTEGER4 field32 := COUNT(GROUP,rs.field32 != '');\\n INTEGER4 field33 := COUNT(GROUP,rs.field33 != '');\\n INTEGER4 field34 := COUNT(GROUP,rs.field34 != '');\\n INTEGER4 field35 := COUNT(GROUP,rs.field35 != '');\\n INTEGER4 field36 := COUNT(GROUP,rs.field36 != '');\\n INTEGER4 field37 := COUNT(GROUP,rs.field37 != '');\\n INTEGER4 field38 := COUNT(GROUP,rs.field38 != '');\\n INTEGER4 field39 := COUNT(GROUP,rs.field39 != '');\\n INTEGER4 field40 := COUNT(GROUP,rs.field40 != '');\\n INTEGER4 field41 := COUNT(GROUP,rs.field41 != '');\\n INTEGER4 field42 := COUNT(GROUP,rs.field42 != '');\\n INTEGER4 field43 := COUNT(GROUP,rs.field43 != '');\\n INTEGER4 field44 := COUNT(GROUP,rs.field44 != '');\\n INTEGER4 field45 := COUNT(GROUP,rs.field45 != '');\\n INTEGER4 field46 := COUNT(GROUP,rs.field46 != '');\\n INTEGER4 field47 := COUNT(GROUP,rs.field47 != '');\\n INTEGER4 field48 := COUNT(GROUP,rs.field48 != '');\\n INTEGER4 field49 := COUNT(GROUP,rs.field49 != '');\\n INTEGER4 field50 := COUNT(GROUP,rs.field50 != '');\\n INTEGER4 field51 := COUNT(GROUP,rs.field51 != '');\\n INTEGER4 field52 := COUNT(GROUP,rs.field52 != '');\\n INTEGER4 total_count := COUNT(GROUP);\\n END;\\n \\n interimFillRS := TABLE(rs,InterimFillLayout,MERGE);\\n \\n FillLayout := RECORD\\n REAL field01;\\n REAL field02;\\n REAL field03;\\n REAL field04;\\n REAL field05;\\n REAL field06;\\n REAL field07;\\n REAL field08;\\n REAL field09;\\n REAL field10;\\n REAL field11;\\n REAL field12;\\n REAL field13;\\n REAL field14;\\n REAL field15;\\n REAL field16;\\n REAL field17;\\n REAL field18;\\n REAL field19;\\n REAL field20;\\n REAL field21;\\n REAL field22;\\n REAL field23;\\n REAL field24;\\n REAL field25;\\n REAL field26;\\n REAL field27;\\n REAL field28;\\n REAL field29;\\n REAL field30;\\n REAL field31;\\n REAL field32;\\n REAL field33;\\n REAL field34;\\n REAL field35;\\n REAL field36;\\n REAL field37;\\n REAL field38;\\n REAL field39;\\n REAL field40;\\n REAL field41;\\n REAL field42;\\n REAL field43;\\n REAL field44;\\n REAL field45;\\n REAL field46;\\n REAL field47;\\n REAL field48;\\n REAL field49;\\n REAL field50;\\n REAL field51;\\n REAL field52;\\n END;\\n \\n FillLayout XFormToFillLayout(InterimFillLayout l) := TRANSFORM\\n FillRate(x) := ROUND(x / l.total_count * 10000) / 100;\\n \\n SELF.field01 := FillRate(l.field01);\\n SELF.field02 := FillRate(l.field02);\\n SELF.field03 := FillRate(l.field03);\\n SELF.field04 := FillRate(l.field04);\\n SELF.field05 := FillRate(l.field05);\\n SELF.field06 := FillRate(l.field06);\\n SELF.field07 := FillRate(l.field07);\\n SELF.field08 := FillRate(l.field08);\\n SELF.field09 := FillRate(l.field09);\\n SELF.field10 := FillRate(l.field10);\\n SELF.field11 := FillRate(l.field11);\\n SELF.field12 := FillRate(l.field12);\\n SELF.field13 := FillRate(l.field13);\\n SELF.field14 := FillRate(l.field14);\\n SELF.field15 := FillRate(l.field15);\\n SELF.field16 := FillRate(l.field16);\\n SELF.field17 := FillRate(l.field17);\\n SELF.field18 := FillRate(l.field18);\\n SELF.field19 := FillRate(l.field19);\\n SELF.field20 := FillRate(l.field20);\\n SELF.field21 := FillRate(l.field21);\\n SELF.field22 := FillRate(l.field22);\\n SELF.field23 := FillRate(l.field23);\\n SELF.field24 := FillRate(l.field24);\\n SELF.field25 := FillRate(l.field25);\\n SELF.field26 := FillRate(l.field26);\\n SELF.field27 := FillRate(l.field27);\\n SELF.field28 := FillRate(l.field28);\\n SELF.field29 := FillRate(l.field29);\\n SELF.field30 := FillRate(l.field30);\\n SELF.field31 := FillRate(l.field31);\\n SELF.field32 := FillRate(l.field32);\\n SELF.field33 := FillRate(l.field33);\\n SELF.field34 := FillRate(l.field34);\\n SELF.field35 := FillRate(l.field35);\\n SELF.field36 := FillRate(l.field36);\\n SELF.field37 := FillRate(l.field37);\\n SELF.field38 := FillRate(l.field38);\\n SELF.field39 := FillRate(l.field39);\\n SELF.field40 := FillRate(l.field40);\\n SELF.field41 := FillRate(l.field41);\\n SELF.field42 := FillRate(l.field42);\\n SELF.field43 := FillRate(l.field43);\\n SELF.field44 := FillRate(l.field44);\\n SELF.field45 := FillRate(l.field45);\\n SELF.field46 := FillRate(l.field46);\\n SELF.field47 := FillRate(l.field47);\\n SELF.field48 := FillRate(l.field48);\\n SELF.field49 := FillRate(l.field49);\\n SELF.field50 := FillRate(l.field50);\\n SELF.field51 := FillRate(l.field51);\\n SELF.field52 := FillRate(l.field52);\\n END;\\n \\n RETURN PROJECT(interimFillRS,XFormToFillLayout(LEFT));\\n END;\\n \\nEND; // Company Module\\n\\nRand := random() % 100 + 1;\\nSetRand := [rand,rand,rand,rand,rand,rand,rand,rand,rand,rand,\\n\\t\\t\\t\\t\\t\\trand,rand,rand,rand,rand,rand,rand,rand,rand,rand,\\n\\t\\t\\t\\t\\t\\trand,rand,rand,rand,rand,rand,rand,rand,rand,rand,\\n\\t\\t\\t\\t\\t\\trand,rand,rand,rand,rand,rand,rand,rand,rand,rand,\\n\\t\\t\\t\\t\\t\\trand,rand,rand,rand,rand,rand,rand,rand,rand,rand,\\n\\t\\t\\t\\t\\t\\trand,rand,rand,rand,rand,rand,rand,rand,rand,rand,\\n\\t\\t\\t\\t\\t\\trand,rand,rand,rand,rand,rand,rand,rand,rand,rand,\\n\\t\\t\\t\\t\\t\\trand,rand,rand,rand,rand,rand,rand,rand,rand,rand,\\n\\t\\t\\t\\t\\t\\trand,rand,rand,rand,rand,rand,rand,rand,rand,rand,\\n\\t\\t\\t\\t\\t\\trand,rand,rand,rand,rand,rand,rand,rand,rand,rand] : global;\\n\\nBlankDS := dataset([{''}],company.OracleTableLayout);\\n\\ncompany.OracleTableLayout XF1(company.OracleTableLayout L, integer C) := TRANSFORM\\n STRING FillChar(fld) := FUNCTION\\n\\t num := SetRand[(fld + c) % 100 + 1];\\n\\t STRING1 char1 := (>STRING<)((fld+C-2)%26 + 65);\\n\\t char2 := char1 + char1;\\n\\t char3 := char2 + char1;\\n\\t char4 := char2 + char2;\\n\\t char5 := char3 + char2;\\n\\t char10 := char5 + char5;\\n\\t char20 := char10 + char10;\\n\\t char30 := char20 + char10;\\n\\t char40 := char20 + char20;\\n\\t char50 := char30 + char20;\\n\\t char60 := char30 + char30;\\n\\t char70 := char40 + char30;\\n\\t char80 := char40 + char40;\\n\\t char90 := char50 + char40;\\n\\t char100 := char50 + char50;\\n\\t\\tRetStr1 := CHOOSE(num DIV 10,char10,char20,char30,char40,char50,char60,char70,char80,char90,char100,'');\\n\\t\\tRetStr2 := CHOOSE(num%10+1,char1,char2,char3,char4,char5,char3+char3,char4+char3,char4+char4,char5+char4,'');\\n\\t RETURN IF((fld*c+num)%(num%5+2) = 0,'',(string)num + '-' + TRIM(RetStr1) + TRIM(RetStr2));\\n\\tEND;\\t\\n\\tSELF.field01 := FillChar(1);\\n\\tSELF.field02 := FillChar(2);\\n\\tSELF.field03 := FillChar(3);\\n\\tSELF.field04 := FillChar(4);\\n\\tSELF.field05 := FillChar(5);\\n\\tSELF.field06 := FillChar(6);\\n\\tSELF.field07 := FillChar(7);\\n\\tSELF.field08 := FillChar(8);\\n\\tSELF.field09 := FillChar(9);\\n\\tSELF.field10 := FillChar(10);\\n\\tSELF.field11 := FillChar(11);\\n\\tSELF.field12 := FillChar(12);\\n\\tSELF.field13 := FillChar(13);\\n\\tSELF.field14 := FillChar(14);\\n\\tSELF.field15 := FillChar(15);\\n\\tSELF.field16 := FillChar(16);\\n\\tSELF.field17 := FillChar(17);\\n\\tSELF.field18 := FillChar(18);\\n\\tSELF.field19 := FillChar(19);\\n\\tSELF.field20 := FillChar(20);\\n\\tSELF.field21 := FillChar(21);\\n\\tSELF.field22 := FillChar(22);\\n\\tSELF.field23 := FillChar(23);\\n\\tSELF.field24 := FillChar(24);\\n\\tSELF.field25 := FillChar(25);\\n\\tSELF.field26 := FillChar(26);\\n\\tSELF.field27 := FillChar(27);\\n\\tSELF.field28 := FillChar(28);\\n\\tSELF.field29 := FillChar(29);\\n\\tSELF.field30 := FillChar(30);\\n\\tSELF.field31 := FillChar(31);\\n\\tSELF.field32 := FillChar(32);\\n\\tSELF.field33 := FillChar(33);\\n\\tSELF.field34 := FillChar(34);\\n\\tSELF.field35 := FillChar(35);\\n\\tSELF.field36 := FillChar(36);\\n\\tSELF.field37 := FillChar(37);\\n\\tSELF.field38 := FillChar(38);\\n\\tSELF.field39 := FillChar(39);\\n\\tSELF.field40 := FillChar(40);\\n\\tSELF.field41 := FillChar(41);\\n\\tSELF.field42 := FillChar(42);\\n\\tSELF.field43 := FillChar(43);\\n\\tSELF.field44 := FillChar(44);\\n\\tSELF.field45 := FillChar(45);\\n\\tSELF.field46 := FillChar(46);\\n\\tSELF.field47 := FillChar(47);\\n\\tSELF.field48 := FillChar(48);\\n\\tSELF.field49 := FillChar(49);\\n\\tSELF.field50 := FillChar(50);\\n\\tSELF.field51 := FillChar(51);\\n\\tSELF.field52 := FillChar(52);\\nEND;\\n\\n// ds1 := normalize(BlankDS,100000,XF1(LEFT,COUNTER));\\n// ds2 := normalize(BlankDS,50000,XF1(LEFT,COUNTER));\\n\\n// output(ds1,,company.kRawDataFilePath + '::ds1',CSV(separator('\\\\t')),overwrite);\\n// output(ds2,,company.kRawDataFilePath + '::ds2',CSV(separator('\\\\t')),overwrite);\\n\\nds1 := dataset(company.kRawDataFilePath + '::ds1',company.OracleTableLayout,CSV(separator('\\\\t')));\\nds2 := dataset(company.kRawDataFilePath + '::ds2',company.OracleTableLayout,CSV(separator('\\\\t')));\\nSF1 := dataset(company.kRawDataFilePath + '::SF1',company.OracleTableLayout,CSV(separator('\\\\t')));\\n\\n// OUTPUT(Company.ComputeFillRates(ds1));\\n// OUTPUT(Company.ComputeFillRates(ds2));\\n// OUTPUT(Company.ComputeFillRates(ds1+ds2));\\n// Std.File.CreateSuperFile(company.kRawDataFilePath + '::SF1');\\n// Std.File.AddSuperFile(company.kRawDataFilePath + '::SF1',company.kRawDataFilePath + '::ds1');\\n// Std.File.AddSuperFile(company.kRawDataFilePath + '::SF1',company.kRawDataFilePath + '::ds2');\\nOUTPUT(Company.ComputeFillRates(SF1));\\n
I just ran all this in a single builder window.\\n\\nHope you can make this code break it, \\n\\nRichard\", \"post_time\": \"2012-04-23 22:18:01\" },\n\t{ \"post_id\": 1550, \"topic_id\": 341, \"forum_id\": 8, \"post_subject\": \"Re: Error scanning superfile\", \"username\": \"DSC\", \"post_text\": \"FYI: This problem exists in the released 3.6.2 CE as well.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-04-23 13:07:51\" },\n\t{ \"post_id\": 1544, \"topic_id\": 341, \"forum_id\": 8, \"post_subject\": \"Re: Error scanning superfile\", \"username\": \"DSC\", \"post_text\": \"Hi Richard,\\n\\nFWIW, I was trying to find a combination of subfile sizes that would trigger this failure. I thought I was making progress, but then it got weird. Weirder, actually.\\n\\nI used the Linux split command to reduce my original files into different segment lengths, put two segments into a superfile, then test. The following combination of sizes worked:\\n\\n575,702,399 bytes\\n461,801,495 bytes\\n\\nBut this did not:\\n\\n352,920,830 bytes\\n461,801,495 bytes\\n\\nNow, I had been reducing the sizes and was quite happy to find something that actually worked. It was dismaying to find a *smaller* total size that failed with the same error message. I was secretly hoping to find something that hovered around the 2^31 mark, as that was near my file sizes, but I guess I'll just have to suffer with disappointment.\\n\\nAnyway, I thought I'd pass this along. It may be helpful.\\n\\nCheers,\\n\\nDSC\", \"post_time\": \"2012-04-19 13:25:46\" },\n\t{ \"post_id\": 1543, \"topic_id\": 341, \"forum_id\": 8, \"post_subject\": \"Re: Error scanning superfile\", \"username\": \"DSC\", \"post_text\": \"I've sanitized the code to protect the innocent:\\n\\n
EXPORT Company := MODULE\\n\\t\\n\\t//==========================================================================\\n\\t// Module constants\\n\\t//==========================================================================\\n\\tSHARED\\tkRawDataFilePath := '~company';\\n\\t\\n\\t//==========================================================================\\n\\t// Record Definitions\\n\\t//==========================================================================\\n\\t\\n\\t//--------------------------------------------------------------------------\\n\\t// Oracle Table\\n\\t//\\n\\t// All fields are defined as STRING in order to help NULL value testing.\\n\\t//--------------------------------------------------------------------------\\n\\tSHARED\\tOracleTableLayout := RECORD\\n\\t\\tSTRING\\t\\t\\tfield01;\\n\\t\\tSTRING\\t\\t\\tfield02;\\n\\t\\tSTRING\\t\\t\\tfield03;\\n\\t\\tSTRING\\t\\t\\tfield04;\\n\\t\\tSTRING\\t\\t\\tfield05;\\n\\t\\tSTRING\\t\\t\\tfield06;\\n\\t\\tSTRING\\t\\t\\tfield07;\\n\\t\\tSTRING\\t\\t\\tfield08;\\n\\t\\tSTRING\\t\\t\\tfield09;\\n\\t\\tSTRING\\t\\t\\tfield10;\\n\\t\\tSTRING\\t\\t\\tfield11;\\n\\t\\tSTRING\\t\\t\\tfield12;\\n\\t\\tSTRING\\t\\t\\tfield13;\\n\\t\\tSTRING\\t\\t\\tfield14;\\n\\t\\tSTRING\\t\\t\\tfield15;\\n\\t\\tSTRING\\t\\t\\tfield16;\\n\\t\\tSTRING\\t\\t\\tfield17;\\n\\t\\tSTRING\\t\\t\\tfield18;\\n\\t\\tSTRING\\t\\t\\tfield19;\\n\\t\\tSTRING\\t\\t\\tfield20;\\n\\t\\tSTRING\\t\\t\\tfield21;\\n\\t\\tSTRING\\t\\t\\tfield22;\\n\\t\\tSTRING\\t\\t\\tfield23;\\n\\t\\tSTRING\\t\\t\\tfield24;\\n\\t\\tSTRING\\t\\t\\tfield25;\\n\\t\\tSTRING\\t\\t\\tfield26;\\n\\t\\tSTRING\\t\\t\\tfield27;\\n\\t\\tSTRING\\t\\t\\tfield28;\\n\\t\\tSTRING\\t\\t\\tfield29;\\n\\t\\tSTRING\\t\\t\\tfield30;\\n\\t\\tSTRING\\t\\t\\tfield31;\\n\\t\\tSTRING\\t\\t\\tfield32;\\n\\t\\tSTRING\\t\\t\\tfield33;\\n\\t\\tSTRING\\t\\t\\tfield34;\\n\\t\\tSTRING\\t\\t\\tfield35;\\n\\t\\tSTRING\\t\\t\\tfield36;\\n\\t\\tSTRING\\t\\t\\tfield37;\\n\\t\\tSTRING\\t\\t\\tfield38;\\n\\t\\tSTRING\\t\\t\\tfield39;\\n\\t\\tSTRING\\t\\t\\tfield40;\\n\\t\\tSTRING\\t\\t\\tfield41;\\n\\t\\tSTRING\\t\\t\\tfield42;\\n\\t\\tSTRING\\t\\t\\tfield43;\\n\\t\\tSTRING\\t\\t\\tfield44;\\n\\t\\tSTRING\\t\\t\\tfield45;\\n\\t\\tSTRING\\t\\t\\tfield46;\\n\\t\\tSTRING\\t\\t\\tfield47;\\n\\t\\tSTRING\\t\\t\\tfield48;\\n\\t\\tSTRING\\t\\t\\tfield49;\\n\\t\\tSTRING\\t\\t\\tfield50;\\n\\t\\tSTRING\\t\\t\\tfield51;\\n\\t\\tSTRING\\t\\t\\tfield52;\\n\\tEND;// \\n\\t\\n\\t//==========================================================================\\n\\t// Dataset definitions\\n\\t//==========================================================================\\n\\t\\n\\tEXPORT\\trawDataDS := DATASET(kRawDataFilePath,OracleTableLayout,CSV(SEPARATOR('\\\\t')));\\n\\t\\n\\t//==========================================================================\\n\\t// Utilities\\n\\t//==========================================================================\\n\\t\\n\\t//--------------------------------------------------------------------------\\n\\t// Determine fill rate of raw, incoming data\\n\\t//--------------------------------------------------------------------------\\n\\tEXPORT\\tComputeFillRates(DATASET(OracleTableLayout) rs) := FUNCTION\\n\\t\\tInterimFillLayout := RECORD\\n\\t\\t\\tINTEGER4\\t\\tfield01 := COUNT(GROUP,rs.field01 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield02 := COUNT(GROUP,rs.field02 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield03 := COUNT(GROUP,rs.field03 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield04 := COUNT(GROUP,rs.field04 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield05 := COUNT(GROUP,rs.field05 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield06 := COUNT(GROUP,rs.field06 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield07 := COUNT(GROUP,rs.field07 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield08 := COUNT(GROUP,rs.field08 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield09 := COUNT(GROUP,rs.field09 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield10 := COUNT(GROUP,rs.field10 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield11 := COUNT(GROUP,rs.field11 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield12 := COUNT(GROUP,rs.field12 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield13 := COUNT(GROUP,rs.field13 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield14 := COUNT(GROUP,rs.field14 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield15 := COUNT(GROUP,rs.field15 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield16 := COUNT(GROUP,rs.field16 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield17 := COUNT(GROUP,rs.field17 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield18 := COUNT(GROUP,rs.field18 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield19 := COUNT(GROUP,rs.field19 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield20 := COUNT(GROUP,rs.field20 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield21 := COUNT(GROUP,rs.field21 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield22 := COUNT(GROUP,rs.field22 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield23 := COUNT(GROUP,rs.field23 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield24 := COUNT(GROUP,rs.field24 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield25 := COUNT(GROUP,rs.field25 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield26 := COUNT(GROUP,rs.field26 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield27 := COUNT(GROUP,rs.field27 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield28 := COUNT(GROUP,rs.field28 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield29 := COUNT(GROUP,rs.field29 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield30 := COUNT(GROUP,rs.field30 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield31 := COUNT(GROUP,rs.field31 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield32 := COUNT(GROUP,rs.field32 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield33 := COUNT(GROUP,rs.field33 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield34 := COUNT(GROUP,rs.field34 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield35 := COUNT(GROUP,rs.field35 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield36 := COUNT(GROUP,rs.field36 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield37 := COUNT(GROUP,rs.field37 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield38 := COUNT(GROUP,rs.field38 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield39 := COUNT(GROUP,rs.field39 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield40 := COUNT(GROUP,rs.field40 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield41 := COUNT(GROUP,rs.field41 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield42 := COUNT(GROUP,rs.field42 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield43 := COUNT(GROUP,rs.field43 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield44 := COUNT(GROUP,rs.field44 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield45 := COUNT(GROUP,rs.field45 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield46 := COUNT(GROUP,rs.field46 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield47 := COUNT(GROUP,rs.field47 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield48 := COUNT(GROUP,rs.field48 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield49 := COUNT(GROUP,rs.field49 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield50 := COUNT(GROUP,rs.field50 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield51 := COUNT(GROUP,rs.field51 != '');\\n\\t\\t\\tINTEGER4\\t\\tfield52 := COUNT(GROUP,rs.field52 != '');\\n\\t\\t\\tINTEGER4\\t\\ttotal_count := COUNT(GROUP);\\n\\t\\tEND;\\n\\t\\t\\n\\t\\tinterimFillRS := TABLE(rs,InterimFillLayout,MERGE);\\n\\t\\t\\n\\t\\tFillLayout := RECORD\\n\\t\\t\\tREAL\\t\\tfield01;\\n\\t\\t\\tREAL\\t\\tfield02;\\n\\t\\t\\tREAL\\t\\tfield03;\\n\\t\\t\\tREAL\\t\\tfield04;\\n\\t\\t\\tREAL\\t\\tfield05;\\n\\t\\t\\tREAL\\t\\tfield06;\\n\\t\\t\\tREAL\\t\\tfield07;\\n\\t\\t\\tREAL\\t\\tfield08;\\n\\t\\t\\tREAL\\t\\tfield09;\\n\\t\\t\\tREAL\\t\\tfield10;\\n\\t\\t\\tREAL\\t\\tfield11;\\n\\t\\t\\tREAL\\t\\tfield12;\\n\\t\\t\\tREAL\\t\\tfield13;\\n\\t\\t\\tREAL\\t\\tfield14;\\n\\t\\t\\tREAL\\t\\tfield15;\\n\\t\\t\\tREAL\\t\\tfield16;\\n\\t\\t\\tREAL\\t\\tfield17;\\n\\t\\t\\tREAL\\t\\tfield18;\\n\\t\\t\\tREAL\\t\\tfield19;\\n\\t\\t\\tREAL\\t\\tfield20;\\n\\t\\t\\tREAL\\t\\tfield21;\\n\\t\\t\\tREAL\\t\\tfield22;\\n\\t\\t\\tREAL\\t\\tfield23;\\n\\t\\t\\tREAL\\t\\tfield24;\\n\\t\\t\\tREAL\\t\\tfield25;\\n\\t\\t\\tREAL\\t\\tfield26;\\n\\t\\t\\tREAL\\t\\tfield27;\\n\\t\\t\\tREAL\\t\\tfield28;\\n\\t\\t\\tREAL\\t\\tfield29;\\n\\t\\t\\tREAL\\t\\tfield30;\\n\\t\\t\\tREAL\\t\\tfield31;\\n\\t\\t\\tREAL\\t\\tfield32;\\n\\t\\t\\tREAL\\t\\tfield33;\\n\\t\\t\\tREAL\\t\\tfield34;\\n\\t\\t\\tREAL\\t\\tfield35;\\n\\t\\t\\tREAL\\t\\tfield36;\\n\\t\\t\\tREAL\\t\\tfield37;\\n\\t\\t\\tREAL\\t\\tfield38;\\n\\t\\t\\tREAL\\t\\tfield39;\\n\\t\\t\\tREAL\\t\\tfield40;\\n\\t\\t\\tREAL\\t\\tfield41;\\n\\t\\t\\tREAL\\t\\tfield42;\\n\\t\\t\\tREAL\\t\\tfield43;\\n\\t\\t\\tREAL\\t\\tfield44;\\n\\t\\t\\tREAL\\t\\tfield45;\\n\\t\\t\\tREAL\\t\\tfield46;\\n\\t\\t\\tREAL\\t\\tfield47;\\n\\t\\t\\tREAL\\t\\tfield48;\\n\\t\\t\\tREAL\\t\\tfield49;\\n\\t\\t\\tREAL\\t\\tfield50;\\n\\t\\t\\tREAL\\t\\tfield51;\\n\\t\\t\\tREAL\\t\\tfield52;\\n\\t\\tEND;\\n\\t\\t\\n\\t\\tFillLayout XFormToFillLayout(InterimFillLayout l) := TRANSFORM\\n\\t\\t\\tFillRate(x) := ROUND(x / l.total_count * 10000) / 100;\\n\\t\\t\\t\\n\\t\\t\\tSELF.field01 := FillRate(l.field01);\\n\\t\\t\\tSELF.field02 := FillRate(l.field02);\\n\\t\\t\\tSELF.field03 := FillRate(l.field03);\\n\\t\\t\\tSELF.field04 := FillRate(l.field04);\\n\\t\\t\\tSELF.field05 := FillRate(l.field05);\\n\\t\\t\\tSELF.field06 := FillRate(l.field06);\\n\\t\\t\\tSELF.field07 := FillRate(l.field07);\\n\\t\\t\\tSELF.field08 := FillRate(l.field08);\\n\\t\\t\\tSELF.field09 := FillRate(l.field09);\\n\\t\\t\\tSELF.field10 := FillRate(l.field10);\\n\\t\\t\\tSELF.field11 := FillRate(l.field11);\\n\\t\\t\\tSELF.field12 := FillRate(l.field12);\\n\\t\\t\\tSELF.field13 := FillRate(l.field13);\\n\\t\\t\\tSELF.field14 := FillRate(l.field14);\\n\\t\\t\\tSELF.field15 := FillRate(l.field15);\\n\\t\\t\\tSELF.field16 := FillRate(l.field16);\\n\\t\\t\\tSELF.field17 := FillRate(l.field17);\\n\\t\\t\\tSELF.field18 := FillRate(l.field18);\\n\\t\\t\\tSELF.field19 := FillRate(l.field19);\\n\\t\\t\\tSELF.field20 := FillRate(l.field20);\\n\\t\\t\\tSELF.field21 := FillRate(l.field21);\\n\\t\\t\\tSELF.field22 := FillRate(l.field22);\\n\\t\\t\\tSELF.field23 := FillRate(l.field23);\\n\\t\\t\\tSELF.field24 := FillRate(l.field24);\\n\\t\\t\\tSELF.field25 := FillRate(l.field25);\\n\\t\\t\\tSELF.field26 := FillRate(l.field26);\\n\\t\\t\\tSELF.field27 := FillRate(l.field27);\\n\\t\\t\\tSELF.field28 := FillRate(l.field28);\\n\\t\\t\\tSELF.field29 := FillRate(l.field29);\\n\\t\\t\\tSELF.field30 := FillRate(l.field30);\\n\\t\\t\\tSELF.field31 := FillRate(l.field31);\\n\\t\\t\\tSELF.field32 := FillRate(l.field32);\\n\\t\\t\\tSELF.field33 := FillRate(l.field33);\\n\\t\\t\\tSELF.field34 := FillRate(l.field34);\\n\\t\\t\\tSELF.field35 := FillRate(l.field35);\\n\\t\\t\\tSELF.field36 := FillRate(l.field36);\\n\\t\\t\\tSELF.field37 := FillRate(l.field37);\\n\\t\\t\\tSELF.field38 := FillRate(l.field38);\\n\\t\\t\\tSELF.field39 := FillRate(l.field39);\\n\\t\\t\\tSELF.field40 := FillRate(l.field40);\\n\\t\\t\\tSELF.field41 := FillRate(l.field41);\\n\\t\\t\\tSELF.field42 := FillRate(l.field42);\\n\\t\\t\\tSELF.field43 := FillRate(l.field43);\\n\\t\\t\\tSELF.field44 := FillRate(l.field44);\\n\\t\\t\\tSELF.field45 := FillRate(l.field45);\\n\\t\\t\\tSELF.field46 := FillRate(l.field46);\\n\\t\\t\\tSELF.field47 := FillRate(l.field47);\\n\\t\\t\\tSELF.field48 := FillRate(l.field48);\\n\\t\\t\\tSELF.field49 := FillRate(l.field49);\\n\\t\\t\\tSELF.field50 := FillRate(l.field50);\\n\\t\\t\\tSELF.field51 := FillRate(l.field51);\\n\\t\\t\\tSELF.field52 := FillRate(l.field52);\\n\\t\\tEND;\\n\\t\\t\\n\\t\\tRETURN PROJECT(interimFillRS,XFormToFillLayout(LEFT));\\n\\tEND;\\n\\t\\nEND; // Company Module
\\n\\nAll of that is kicked off by a simple:\\n\\nOUTPUT($.Company.ComputeFillRates($.Company.rawDataDS));
\\n\\nUpdate: This morning I tried to a simpler test. I peeled 10 records off one of my subfiles, creating two subfiles with only five records each. I created the same setup in Thor with those tiny subfiles and the superfile and ... it worked. So this issue, whatever it is, may be related to both the size and count of the subfiles.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-04-19 11:57:12\" },\n\t{ \"post_id\": 1541, \"topic_id\": 341, \"forum_id\": 8, \"post_subject\": \"Re: Error scanning superfile\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nOK, then I'm fairly well stumped. Can I see your file definition code so I can create some garbage files of my own and experiment?\\n\\nThanks,\\n\\nRichard\", \"post_time\": \"2012-04-18 21:16:36\" },\n\t{ \"post_id\": 1540, \"topic_id\": 341, \"forum_id\": 8, \"post_subject\": \"Re: Error scanning superfile\", \"username\": \"DSC\", \"post_text\": \"This is all Thor.\", \"post_time\": \"2012-04-18 20:02:50\" },\n\t{ \"post_id\": 1537, \"topic_id\": 341, \"forum_id\": 8, \"post_subject\": \"Re: Error scanning superfile\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nIs this superfile on Roxie or Thor? Roxie only supports a single subfile in any given superfile but it does support multiple subfiles in a superkey).\\n\\nRichard\", \"post_time\": \"2012-04-18 18:35:23\" },\n\t{ \"post_id\": 1535, \"topic_id\": 341, \"forum_id\": 8, \"post_subject\": \"Re: Error scanning superfile\", \"username\": \"DSC\", \"post_text\": \"Update:\\n\\nI trimmed the second subfile down, down, down ... to a single record. The whole file was 152 bytes in size. The exact error message still occurs if I try to access the superfile with two subfiles.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-04-18 14:27:03\" },\n\t{ \"post_id\": 1534, \"topic_id\": 341, \"forum_id\": 8, \"post_subject\": \"Re: Error scanning superfile\", \"username\": \"DSC\", \"post_text\": \"I wish it was as easy as that.\\n\\nI ran six tests this morning; here is the Executive Summary:\\n\\n* Only subfile #1: Success\\n* Only subfile #2: Success\\n* Superfile with only subfile #1: Success\\n* Superfile with only subfile #2: Success\\n* Superfile with subfile #1 then subfile #2: Fail\\n* Superfile with subfile #2 then subfile #1: Fail\\n\\nI've enclosed the full thor log from the node doing the complaining (individual runs delimited with ==== and comments). The file was too large to include here, inline.\\n\\nPersonally, I'm a bit confused by the references to the recordSize value within the graph XML nodes and in the log entry a bit past that point. I sprayed the files with a default 8192 record length, but the logs indicate a 4096 length (which is the default value in a DATASET declaration). BTW, explicitly adding a MAXLENGTH() to that DATASET does not change the references to 4096 in the logs. I saw that yesterday when I was experimenting. That may not be relevant to this problem, but I just thought it was odd.\\n\\nThoughts?\\n\\nDan\", \"post_time\": \"2012-04-18 12:08:41\" },\n\t{ \"post_id\": 1532, \"topic_id\": 341, \"forum_id\": 8, \"post_subject\": \"Re: Error scanning superfile\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nThe MAXLENGTH the error is referring to is the maximum length of a single record, specified in the RECORD structure. \\n\\nI would expect that if it errors out with the SF, it should also error out with at least one of the two files. \\n\\nThe only way I can imagine it wouldn't would be if you're using a different RECORD structure for the SF's DATASET declaration than for the individual file DATASET declarations. Are you?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-04-17 21:33:44\" },\n\t{ \"post_id\": 1531, \"topic_id\": 341, \"forum_id\": 8, \"post_subject\": \"Error scanning superfile\", \"username\": \"DSC\", \"post_text\": \"I have a superfile composed of two tab-delimited files. These subfiles have just arrived into the cluster via a spray (so they were not created with OUTPUT or PERSIST).\\n\\nI have a bit of code that simply calculates the fill rate of the individual fields. There are 50+ fields, but all are fairly short length. The longest record is probably 500 bytes or so.\\n\\nMy code deals with the individual subfiles just fine, and with the superfile when only one subfile is included, but when I try to execute it against the superfile containing both subfiles I receive this probably-erroneous error:\\n\\nError: System error: 99: Graph[1], csvread[2]: SLAVE 10.210.150.81:6600: MAXLENGTH for CSV file is not large enough (0, 0), 99,
\\n\\nOne subfile is 2,289,439,116 bytes in length, the other is 2,308,639,854; the total is therefore 4,598,078,970.\\n\\nEdit: I should mention that I'm using a multi-node cluster and these files are spread across three Thor nodes. Each segment of each subfile is around 760MB in size.\\n\\nIs there a maximum length for a superfile in this format?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-04-17 16:59:59\" },\n\t{ \"post_id\": 1566, \"topic_id\": 346, \"forum_id\": 8, \"post_subject\": \"Re: PayLoad Indexing\", \"username\": \"bforeman\", \"post_text\": \"One of the suggestions during our discussion was to make binary data as payload for the index.
\\n\\nI think I mentioned that binary data would have to be retrieved using FETCH if the record size exceeded 8K, and using a NON-Payload INDEX. Sorry for any confusion.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-04-25 15:05:40\" },\n\t{ \"post_id\": 1565, \"topic_id\": 346, \"forum_id\": 8, \"post_subject\": \"Re: PayLoad Indexing\", \"username\": \"naveenchitrapu\", \"post_text\": \"Hi Bob,\\n\\nWe would like to retrieve original file through ROXIE that was sprayed on thor. One of the suggestions during our discussion was to make binary data as payload for the index. Does it mean that even payload field length is part of the maximum index length of 8K? Thanks.\", \"post_time\": \"2012-04-25 15:01:41\" },\n\t{ \"post_id\": 1563, \"topic_id\": 346, \"forum_id\": 8, \"post_subject\": \"Re: PayLoad Indexing\", \"username\": \"bforeman\", \"post_text\": \"Hi,\\n\\nExpected behavior \\n\\nThe Language Reference says the following regarding INDEX:\\n\\nA single index record must be defined as less than 32K and result in a less than 8K page after compression.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-04-25 14:06:09\" },\n\t{ \"post_id\": 1562, \"topic_id\": 346, \"forum_id\": 8, \"post_subject\": \"PayLoad Indexing\", \"username\": \"Bhagwant\", \"post_text\": \"Hi,\\nWe are trying to Index ResultDS on TxtData which is of the size 200 bytes and Payload Field as BinaryData is of the size 41174 bytes. But when we run the code we are getting an Error: \\nSystem error: 0: Graph[5], indexwrite[8]: Index minimum record length (41382) exceeds 32767 internal limit.\\nWhen we remove the payload Field the code works fine. Is it that the Payload Field Data should not exceed 32767 bytes?\\n\\n Code snippet for the above problem is as follows:\\n
\\nBinaryDataRec := RECORD\\nDATA41174 BinaryData;\\nend;\\n\\nBinaryDataDs := DATASET('~test::spraycsv',BinaryDataRec,thor);\\nBinaryDataDs;\\n\\nResultDSRec:=Record\\nInteger id;\\nString FileName;\\nString200 TxtData;\\nBinaryDataDs.BinaryData;\\nEnd;\\n\\nResultDS:=Project(BinaryDataDs,Transform(ResultDSRec,Self.id:=1;Self.FileName:='Cookbook.pdf',Self.TxtData:='Changing the query above to the query below will greatly reduce the amount of data being carried through the map and reduce phases by pig. Pig Cookbook the Apache Software Foundation', Self: =Left));\\n\\nop := OUTPUT(ResultDS, ,'~Dataset::myBinaryDS',OVERWRITE);\\n\\nmyDataset := DATASET('~Dataset::myBinaryDS',{ResultDSRec,UNSIGNED8 RecPtv{virtual(fileposition)}},THOR);\\n \\nmyIndex := INDEX(myDataset,{TxtData,RecPtv},{BinaryData},'~IDX::Index::BinaryTxt.RecPtv');\\n \\nbld := BUILDINDEX(myIndex,OVERWRITE);\\n\\nbld;\\n\\n
\", \"post_time\": \"2012-04-25 13:56:57\" },\n\t{ \"post_id\": 20223, \"topic_id\": 349, \"forum_id\": 8, \"post_subject\": \"Re: Dataset too large to output to workunit Exception\", \"username\": \"kovacsbv\", \"post_text\": \"First, the first parameter is a string, and needs to be quoted.\\nSecond, the number is the number of megabytes to use.\\nThird, the strings that you give are case sensitive.\\nFourth, there are two settings: outputLimit and outputLimitMb. These are synonyms. Both take the size in megabytes.\\n\\nWorking and tested example:\\nIMPORT std;\\n#OPTION('outputLimitMb', 100);\\nStd.File.LogicalFileList('*', 1, 1, FALSE);\\n
\\n\\nLet's just say we have a lot of files \", \"post_time\": \"2017-12-12 15:30:24\" },\n\t{ \"post_id\": 1573, \"topic_id\": 349, \"forum_id\": 8, \"post_subject\": \"Re: Dataset too large to output to workunit Exception\", \"username\": \"bforeman\", \"post_text\": \"Try adding the #OPTION(outputlimit,<new value>) to your query. The default is 10 MB.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-04-27 11:58:58\" },\n\t{ \"post_id\": 1571, \"topic_id\": 349, \"forum_id\": 8, \"post_subject\": \"Dataset too large to output to workunit Exception\", \"username\": \"Bhagwant\", \"post_text\": \"I want to return Dataset in Roxie which contains only 2 Rows but the Datasize is more than 10 Mb, So when i Compile my Roxie and try to query it From ESP(Port 8002),It gives me an Exception \\nReported by: eclagent\\nMessage: System error: 0: Dataset too large to output to workunit (limit 10) megabytes, in result (sequence=0).\\nRoxie Query is as Follows:\\n\\n
\\nIMPORT std;\\nEXPORT NgramRoxie := FUNCTION\\n\\nSTRING text_value := '': STORED('search'); ;\\n\\ntext_value_regexPattern := '\\\\\\\\b'+ text_value +'\\\\\\\\b';\\n\\nngramRec:=Record\\nUNSIGNED id;\\nString200 word;\\nEnd;\\n\\nTokenizeDataset := DATASET('~poc::naveen::result::ngramsdata',{ngramRec,UNSIGNED8 RecPos{virtual(fileposition)}},THOR);\\n\\n \\nmyIndex := INDEX(TokenizeDataset,{id,word,RecPos},'~IDX::Email::NewIndex');\\n\\n//mySet := FETCH(myText, myIndex(REGEXFIND(text_value_regexPattern,text)), RIGHT.RecPos);\\n\\nmySet := FETCH(TokenizeDataset, myIndex(REGEXFIND(text_value_regexPattern,word,NOCASE)), RIGHT.RecPos); //,TRUE\\n\\noutout := Dedup(mySet,id);\\n\\nreadDataRec := RECORD\\n STRING filename;\\n DATA inputfile;\\n String10000000 text; //{maxlength(10000000)};\\n END;\\n \\n\\nreadDataRec1 := RECORD\\n integer id;\\n STRING filename;\\n String10000000 text; //{maxlength(10000000)};\\n END;\\n \\n filedata := dataset('~poc::naveen::result::wedidit3',readDataRec,thor);\\n filedatawithid:=Project(filedata,Transform(readDataRec1,Self.id:=Counter,Self:=Left));\\n \\n \\n JoinedDS:=join(outout,filedatawithid,Left.id=Right.id);\\n// ret:=OUTPUT(JoinedDS,{text});\\n\\nRETURN JoinedDS;\\n\\nEND;\\n
\", \"post_time\": \"2012-04-26 19:49:30\" },\n\t{ \"post_id\": 1575, \"topic_id\": 350, \"forum_id\": 8, \"post_subject\": \"Re: Issue with Roxie WSDL.\", \"username\": \"anthony.fishbeck@lexisnexis.com\", \"post_text\": \"Is this happening when you try to use the WSDL, or later when you make the call to the ECL query?\\n\\nIf it's query time, and you are using release version 3.6, it's probably due to a known regression that was fixed in 3.6.2.\\n\\nRegards,\\nTony\", \"post_time\": \"2012-04-27 16:26:54\" },\n\t{ \"post_id\": 1574, \"topic_id\": 350, \"forum_id\": 8, \"post_subject\": \"Re: Issue with Roxie WSDL.\", \"username\": \"bforeman\", \"post_text\": \"Checking with development regarding this issue. What version of the Community Edition do you have deployed?\", \"post_time\": \"2012-04-27 12:16:06\" },\n\t{ \"post_id\": 1572, \"topic_id\": 350, \"forum_id\": 8, \"post_subject\": \"Issue with Roxie WSDL.\", \"username\": \"Bhagwant\", \"post_text\": \"Hi,\\nI am using hthor Published Wsdl in Asp.net Application it works fine but when the same query is complied and published as wsdl on Roxie that give me an error "Response is not well Formed XML(Root Element Missing)".\\n\\nThe Code For Roxie Query\\n\\nIMPORT WebAnalytics;\\nIMPORT std;\\n\\nEXPORT searchDataRoxie := FUNCTION\\n\\nSTRING text_value := '' : STORED('search');\\n\\ntext_value_regexPattern := '\\\\\\\\b'+ text_value +'\\\\\\\\b';\\n \\n \\n DatasetRec:=Record\\n\\tinteger id;\\n\\tString FileName;\\n\\tString200 TxtData;\\n\\tData6188 BinaryData;\\nEnd;\\n\\n\\nmyEmailSet := DATASET('~Email::shree::myBinaryDS',{DatasetRec,UNSIGNED8 RecPtv{virtual(fileposition)}},THOR);\\n\\n\\nmyIndex := INDEX(myEmailSet,{TxtData,RecPtv},{BinaryData},'~IDX::Email::BinaryTxt.RecPtv');\\n\\nmySet := FETCH(myEmailSet,myIndex(REGEXFIND(text_value_regexPattern,TxtData,NOCASE)),RIGHT.RecPtv);\\t//,TRUE\\t\\t\\t\\t\\t \\n\\noutout := OUTPUT(mySet);\\n\\nRETURN outout;\\n\\nEND;\\n
\", \"post_time\": \"2012-04-26 19:50:07\" },\n\t{ \"post_id\": 1592, \"topic_id\": 354, \"forum_id\": 8, \"post_subject\": \"Re: Payload indexes and child datasets\", \"username\": \"DSC\", \"post_text\": \"Got it. Thanks, Richard!\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-05-04 19:04:40\" },\n\t{ \"post_id\": 1591, \"topic_id\": 354, \"forum_id\": 8, \"post_subject\": \"Re: Payload indexes and child datasets\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nWell, I've tried, and it seems that this specific payload syntax is the only way to build an INDEX that contains nested child data in the payload. Therefore, here's a way that allows you to specify just the fields you want for the payload (IDX2, of course)://\\n// Example code - use without restriction. \\n//\\nLinkRec := RECORD\\n INTEGER1 NameID;\\nEND;\\nParentRec := RECORD(LinkRec)\\n STRING20 Name;\\nEND;\\nChildRec := RECORD(LinkRec)\\n STRING20 Addr;\\n INTEGER4 Phone;\\nEND;\\nChildNestedRec := RECORD\\n ChildRec AND NOT [NameID];\\nEND;\\nDenormedRec := RECORD\\n ParentRec;\\n INTEGER1 NumRows;\\n DATASET(ChildNestedRec) Children{MAXCOUNT(5)};\\nEND;\\n\\nNamesTable := DATASET([ {1,'Gavin'},\\n {2,'Liz'},\\n {3,'Mr Nobody'},\\n {4,'Anywhere'}], \\n ParentRec); \\nNormAddrs := DATASET([{1,'10 Malt Lane',12345},\\t\\n {2,'10 Malt Lane',54321},\\t\\n {2,'3 The cottages',45678},\\t\\n {4,'Here',87654},\\t\\n {4,'There',97531},\\t\\n {4,'Near',13579},\\t\\n {4,'Far',99999}],\\n ChildRec);\\t\\n\\nDenormedRec ParentLoad(ParentRec L) := TRANSFORM\\n SELF.NumRows := 0;\\n SELF.Children := [];\\n SELF := L;\\nEND;\\n\\nPtbl := PROJECT(NamesTable,ParentLoad(LEFT));\\n\\nDenormedRec DeNormThem(DenormedRec L, ChildRec R, INTEGER C) := TRANSFORM\\n SELF.NumRows := C;\\n SELF.Children := L.Children + ROW({R.Addr,R.Phone},ChildNestedRec);\\n SELF := L;\\nEND;\\n\\nDeNormedRecs := DENORMALIZE(Ptbl, NormAddrs,\\n LEFT.NameID = RIGHT.NameID,\\n\\t\\t\\t DeNormThem(LEFT,RIGHT,COUNTER));\\n\\nIDX1 := INDEX(DenormedRecs,{NameID},\\n {DenormedRecs},'~RTTEST::KEY::NestedChildPayloadTest1');\\n\\nOutRec := RECORD\\n ParentRec;\\n DATASET(ChildNestedRec) Children{MAXCOUNT(5)};\\nEND;\\n\\nJustMyFields := PROJECT(DenormedRecs,OutRec);\\nIDX2 := INDEX(JustMyFields,{NameID},\\n {JustMyFields},'~RTTEST::KEY::NestedChildPayloadTest2');\\n\\nBUILD(IDX1,overwrite);\\nBUILD(IDX2,overwrite);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-05-04 18:58:42\" },\n\t{ \"post_id\": 1590, \"topic_id\": 354, \"forum_id\": 8, \"post_subject\": \"Re: Payload indexes and child datasets\", \"username\": \"DSC\", \"post_text\": \"That does work, and I do understand why. I had forgotten about that variation of citing payload fields.\\n\\nWhat about the case where you don't want every non-indexed field in the dataset included in the payload, though? Not that I need that now, but it seems like something that should work.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-05-04 16:44:16\" },\n\t{ \"post_id\": 1589, \"topic_id\": 354, \"forum_id\": 8, \"post_subject\": \"Re: Payload indexes and child datasets\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nYour code:INDEX(rs,{someID},{someName,childRecs},'~the_tick::little_wooden_boy')
And this code:INDEX(rs,{someID},{rs},'~the_tick::little_wooden_boy')
Should yield exactly the same index file (IOW, exactly what you want), so try it this way and see if it gets you past the error. If it does, then we can talk about why. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-05-04 15:38:27\" },\n\t{ \"post_id\": 1588, \"topic_id\": 354, \"forum_id\": 8, \"post_subject\": \"Re: Payload indexes and child datasets\", \"username\": \"DSC\", \"post_text\": \"That example makes sense, but it wasn't what I was aiming for. My fault for not more fully describing the question.\\n\\nGiven a record like:\\n\\n
MyLayout := RECORD\\n\\tUNSIGNED2\\t\\t\\t\\t\\tsomeID;\\n\\tSTRING70\\t\\t\\t\\t\\tsomeName;\\n\\tDATASET(EmbeddedLayout)\\t\\tchildRecs;\\nEND;
\\n\\n(Where EmbeddedLayout is some other simple record.)\\n\\nThe idea would be to search only someID and have the other two as payload fields:\\n\\nINDEX(rs,{someID},{someName,childRecs},'~the_tick::little_wooden_boy')
\\n\\nIn your example, the child dataset really winds up describing the entire payload. In this example, it's only a small (named) part of it. That's where I get the error.\\n\\nIs this kind of thing possible?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-05-04 14:56:58\" },\n\t{ \"post_id\": 1587, \"topic_id\": 354, \"forum_id\": 8, \"post_subject\": \"Re: Payload indexes and child datasets\", \"username\": \"bforeman\", \"post_text\": \"Hi Dan,\\n\\nYes, Child datasets are supported with payload indexes. Here is a quick example that Richard Taylor and I worked up for you:\\n\\n//\\n// Example code - use without restriction. \\n//\\nLinkRec := RECORD\\n INTEGER1 NameID;\\nEND;\\nParentRec := RECORD(LinkRec)\\n STRING20 Name;\\nEND;\\nChildRec := RECORD(LinkRec)\\n STRING20 Addr;\\n INTEGER4 Phone;\\nEND;\\nChildNestedRec := RECORD\\n ChildRec AND NOT [NameID];\\nEND;\\nDenormedRec := RECORD\\n ParentRec;\\n INTEGER1 NumRows;\\n DATASET(ChildNestedRec) Children{MAXCOUNT(5)};\\nEND;\\n\\nNamesTable := DATASET([ {1,'Gavin'},\\n {2,'Liz'},\\n\\t\\t\\t{3,'Mr Nobody'},\\n\\t\\t\\t{4,'Anywhere'}], \\n\\t\\t\\tParentRec); \\nNormAddrs := DATASET([{1,'10 Malt Lane',12345},\\t\\n\\t {2,'10 Malt Lane',54321},\\t\\n\\t {2,'3 The cottages',45678},\\t\\n\\t {4,'Here',87654},\\t\\n\\t {4,'There',97531},\\t\\n\\t {4,'Near',13579},\\t\\n\\t {4,'Far',99999}],\\n\\t\\t ChildRec);\\t\\n\\nDenormedRec ParentLoad(ParentRec L) := TRANSFORM\\n SELF.NumRows := 0;\\n SELF.Children := [];\\n SELF := L;\\nEND;\\n\\nPtbl := PROJECT(NamesTable,ParentLoad(LEFT));\\nOUTPUT(Ptbl,NAMED('ParentDataReady'));\\n\\nDenormedRec DeNormThem(DenormedRec L, ChildRec R, INTEGER C) := TRANSFORM\\n SELF.NumRows := C;\\n SELF.Children := L.Children + ROW({R.Addr,R.Phone},ChildNestedRec);\\n SELF := L;\\nEND;\\n\\nDeNormedRecs := DENORMALIZE(Ptbl, NormAddrs,\\n\\t\\t\\t\\t LEFT.NameID = RIGHT.NameID,\\n\\t\\t\\t\\t DeNormThem(LEFT,RIGHT,COUNTER));\\n\\nOUTPUT(DeNormedRecs,NAMED('NestedChildDataset'));\\n\\nPayIDX := INDEX(DeNormedRecs,\\n {NameID,Name},{DeNormedRecs},'~test::nestchildpay');\\n\\nBUILD(PayIDX,overwrite);
\\n\\n\\nNote the use of ROW in the transform to filter out the foreign key info.\\n\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-05-04 14:36:29\" },\n\t{ \"post_id\": 1586, \"topic_id\": 354, \"forum_id\": 8, \"post_subject\": \"Payload indexes and child datasets\", \"username\": \"DSC\", \"post_text\": \"Does ECL support child datasets as payload fields in indexes?\\n\\nI tried to include just that and received a "this is not related" runtime error (referencing a field within the child dataset, IIRC) when attempting to build the index. I could be just doing it wrong, though.\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-05-04 12:47:32\" },\n\t{ \"post_id\": 2809, \"topic_id\": 355, \"forum_id\": 8, \"post_subject\": \"Re: Segfault aggregating records of variable length\", \"username\": \"ghalliday\", \"post_text\": \"It is a different issue, and it is still present in 3.10.0.\\n\\nI have added an issue https://track.hpccsystems.com/browse/HPCC-8299 to track it.\", \"post_time\": \"2012-11-19 13:31:19\" },\n\t{ \"post_id\": 2802, \"topic_id\": 355, \"forum_id\": 8, \"post_subject\": \"Re: Segfault aggregating records of variable length\", \"username\": \"oleg\", \"post_text\": \"Yes, forgot to mention, it works just fine on hthor.\", \"post_time\": \"2012-11-16 17:54:11\" },\n\t{ \"post_id\": 2790, \"topic_id\": 355, \"forum_id\": 8, \"post_subject\": \"Re: Segfault aggregating records of variable length\", \"username\": \"ghalliday\", \"post_text\": \"I tried that example code for hthor/roxie on 3.10.0 and it worked as you would expect.\\n\\nI'll try on thor tomorrow.\", \"post_time\": \"2012-11-15 18:55:00\" },\n\t{ \"post_id\": 2783, \"topic_id\": 355, \"forum_id\": 8, \"post_subject\": \"Re: Segfault aggregating records of variable length\", \"username\": \"bforeman\", \"post_text\": \"Verifying with development, 99% sure this is fixed in the next update.\\n\\nBob\", \"post_time\": \"2012-11-15 15:39:37\" },\n\t{ \"post_id\": 2782, \"topic_id\": 355, \"forum_id\": 8, \"post_subject\": \"Re: Segfault aggregating records of variable length\", \"username\": \"oleg\", \"post_text\": \"Is problem mentioned by Dan has been solved?\\n\\nI'm also having problem with AGGREGATE, it looks little different, so not sure if this is related:\\n\\nCode below compiles just fine, but produces an error \\n[color=#800000:3u7rm9wh]"3000: System error: 3000: Graph[1], aggregate[5]: SLAVE 10.222.64.1:7600: assert(!HeapletBase::isShared(original)) failed - file: /var/jenkins/workspace/LN-Candidate-3.8.2/LN/centos_5_x86_64/HPCC-Platform/roxie/roxiemem/roxiemem.cpp, line 2126"\\n\\nWhat is also interesting, if I comment out first OUTPUT and uncomment either second or third OUTPUT, it will work fine producing expected result. However, if I uncomment both second and third OUTPUTs, error returns !\\n\\n\\nr := {UNSIGNED n};\\n\\ninRecord := {UNSIGNED ds_number; DATASET(R) dsRecs };\\n\\ninTable := DATASET([\\n {1, DATASET([11, 12, 13, 14], R)}\\n ,{2, DATASET([21, 22, 23, 24], R)}\\n ,{2, DATASET([121, 122, 123, 124], R)}\\n ,{2, DATASET([221, 222, 223, 224], R)}\\n ,{3, DATASET([31, 32, 33, 34, 35], R)}\\n ,{3, DATASET([131, 132, 133, 134, 135], R)}\\n ,{1, DATASET([111, 112, 113, 114, 115], R)}\\n ], inRecord);\\n\\ndTable := DISTRIBUTE(inTable, HASH(ds_number));\\noutRecord := {\\n UNSIGNED ds_number; DATASET(R) dsRecs\\n};\\n\\noutRecord t1(inRecord l, outRecord r) := TRANSFORM\\nSELF.ds_number := l.ds_number;\\nSELF.dsRecs := r.dsRecs + l.dsRecs;\\nEND;\\n\\noutRecord t2(outRecord r1, outRecord r2) := TRANSFORM\\nSELF.ds_number := r1.ds_number;\\nSELF.dsRecs := r1.dsRecs + r2.dsRecs;\\nEND;\\n\\nOUTPUT(AGGREGATE(inTable, outRecord, t1(LEFT, RIGHT), t2(RIGHT1, RIGHT2), LEFT.ds_number));\\n// OUTPUT(AGGREGATE(dTable, outRecord, t1(LEFT, RIGHT), t2(RIGHT1, RIGHT2), LEFT.ds_number));\\n// OUTPUT(AGGREGATE(dTable, outRecord, t1(LEFT, RIGHT), LEFT.ds_number, LOCAL));\\n
\", \"post_time\": \"2012-11-15 14:37:46\" },\n\t{ \"post_id\": 1632, \"topic_id\": 355, \"forum_id\": 8, \"post_subject\": \"Re: Segfault aggregating records of variable length\", \"username\": \"DSC\", \"post_text\": \"Thanks for the follow-up. Glad to hear I'm not totally crazy, all of the time.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-05-23 11:39:42\" },\n\t{ \"post_id\": 1631, \"topic_id\": 355, \"forum_id\": 8, \"post_subject\": \"Re: Segfault aggregating records of variable length\", \"username\": \"bforeman\", \"post_text\": \"Hi Dan,\\n\\nThanks again for your example, the development team has also reproduced the issue and we will work on a fix as soon as possible.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-05-23 11:38:41\" },\n\t{ \"post_id\": 1626, \"topic_id\": 355, \"forum_id\": 8, \"post_subject\": \"Re: Segfault aggregating records of variable length\", \"username\": \"bforeman\", \"post_text\": \"Outstanding Dan! Thanks! I will verify your test results and pass this on to our development team.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-05-21 13:59:21\" },\n\t{ \"post_id\": 1625, \"topic_id\": 355, \"forum_id\": 8, \"post_subject\": \"Re: Segfault aggregating records of variable length\", \"username\": \"DSC\", \"post_text\": \"I finally got around to duplicating this problem with a smaller dataset and test code. Surprisingly, I got it to fail with an extremely low number of records in the test dataset: 10.\\n\\nHere is the code:\\n\\nIMPORT * FROM Std;\\n\\n// OUTPUT($.SliceData.XTab_US_SIC4.xtabUSSIC4ByCityDS[1..10],,'~test_file',OVERWRITE,COMPRESSED);\\n\\n\\nOriginalLayout := RECORD\\n\\tUNSIGNED2\\t\\t\\t\\t\\t\\t\\tcountryID;\\n\\tSTRING70\\t\\t\\t\\t\\t\\t\\tstate;\\n\\tSTRING50\\t\\t\\t\\t\\t\\t\\tcity;\\n\\tSTRING4\\t\\t\\t\\t\\t\\t\\t\\tusSIC4;\\n\\tUNSIGNED8\\t\\t\\t\\t\\t\\t\\tcompanyCount;\\n\\tREAL8\\t\\t\\t\\t\\t\\t\\t\\ttotalRevenue;\\n\\tUNSIGNED8\\t\\t\\t\\t\\t\\t\\ttotalEmployees;\\n\\tSTRING70\\t\\t\\t\\t\\t\\t\\tcountryName;\\n\\tSTRING180\\t\\t\\t\\t\\t\\t\\tindustryName;\\nEND;\\n\\nEmbeddedCitiesLayout := RECORD\\n\\tSTRING\\t\\t\\t\\t\\t\\t\\t\\tcity;\\n\\tUNSIGNED8\\t\\t\\t\\t\\t\\t\\tcompanyCount;\\n\\tREAL8\\t\\t\\t\\t\\t\\t\\t\\ttotalRevenue;\\nEND;\\n\\nDatasetLayout := RECORD\\n\\tSTRING4\\t\\t\\t\\t\\t\\t\\t\\tusSIC4;\\n\\tSTRING\\t\\t\\t\\t\\t\\t\\t\\tindustryName;\\n\\tUNSIGNED2\\t\\t\\t\\t\\t\\t\\tcountryID;\\n\\tSTRING\\t\\t\\t\\t\\t\\t\\t\\tcountryName;\\n\\tSTRING\\t\\t\\t\\t\\t\\t\\t\\tstate;\\n\\tUNSIGNED8\\t\\t\\t\\t\\t\\t\\tcompanyCount;\\n\\tREAL8\\t\\t\\t\\t\\t\\t\\t\\ttotalRevenue;\\n\\tDATASET(EmbeddedCitiesLayout)\\t\\tcities;\\nEND;\\n\\nrs1 := DATASET('~test_file',OriginalLayout,THOR);\\n\\nDatasetLayout MainAggregateXForm(rs1 l, DatasetLayout r) := TRANSFORM\\n\\tSELF.companyCount := l.companyCount + r.companyCount;\\n\\tSELF.totalRevenue := l.totalRevenue + r.totalRevenue;\\n\\tSELF.cities := r.cities + PROJECT(DATASET(l),TRANSFORM(EmbeddedCitiesLayout,SELF:=LEFT));\\n\\tSELF := l;\\nEND;\\n\\nDatasetLayout MergeAggregateXForm(DatasetLayout r1, DatasetLayout r2) := TRANSFORM\\n\\tSELF.companyCount := r1.companyCount + r2.companyCount;\\n\\tSELF.totalRevenue := r1.totalRevenue + r2.totalRevenue;\\n\\tSELF.cities := r1.cities + r2.cities;\\n\\tSELF := r1;\\nEND;\\n\\nrs2 := AGGREGATE\\t(\\n\\t\\t\\t\\t\\t\\trs1,\\n\\t\\t\\t\\t\\t\\tDatasetLayout,\\n\\t\\t\\t\\t\\t\\tMainAggregateXForm(LEFT,RIGHT),\\n\\t\\t\\t\\t\\t\\tMergeAggregateXForm(RIGHT1,RIGHT2),\\n\\t\\t\\t\\t\\t\\tLEFT.usSIC4,LEFT.countryID,Str.ToUpperCase(LEFT.state)\\n\\t\\t\\t\\t\\t);\\n\\nOUTPUT(rs2);\\n
\\n\\nI have a three-way Thor (one master and three slaves). Spray the enclosed file into your cluster (fixed, record length 400) and name it ~test_file, then simply execute this code. At least one Thor instance will crash (sometimes two) immediately, and the IDE will report an MP Link error. The stack dump is pretty much a duplicate of what I posted earlier.\\n\\nWhen creating this test, I used the commented-out line to grab just the first few records from my original dataset. Both the original and the generated dataset was compressed, and I thought that had something to do with it. However, I desprayed and re-sprayed the small file uncompressed and that didn't make a difference.\\n\\nI hope this gives you enough information to troubleshoot. If you need anything else, just ask.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-05-21 13:44:31\" },\n\t{ \"post_id\": 1615, \"topic_id\": 355, \"forum_id\": 8, \"post_subject\": \"Re: Segfault aggregating records of variable length\", \"username\": \"DSC\", \"post_text\": \"Hi Bob,\\n\\nI didn't snapshot the problem once I worked around it, so I'll have to do a bit of recreating. The code has progressed significantly since then. I'll let you know when I have something.\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-05-16 11:45:09\" },\n\t{ \"post_id\": 1614, \"topic_id\": 355, \"forum_id\": 8, \"post_subject\": \"Re: Segfault aggregating records of variable length\", \"username\": \"bforeman\", \"post_text\": \"Hi Dan,\\n\\nDevelopment has requested an example that reproduces the error. If you can reproduce with 1000 records can we get some sample data along with the ECL?\\n\\nThanks!\\n\\nBob\", \"post_time\": \"2012-05-16 11:41:46\" },\n\t{ \"post_id\": 1605, \"topic_id\": 355, \"forum_id\": 8, \"post_subject\": \"Re: Segfault aggregating records of variable length\", \"username\": \"bforeman\", \"post_text\": \"Thanks Dan, checking with development now.\", \"post_time\": \"2012-05-14 13:54:11\" },\n\t{ \"post_id\": 1604, \"topic_id\": 355, \"forum_id\": 8, \"post_subject\": \"Re: Segfault aggregating records of variable length\", \"username\": \"DSC\", \"post_text\": \"Single definition used throughout the code:\\n\\nEXPORT\\txtabUSSIC4ByCityDS := DATASET(kXTabUSSIC4ByCityFilePath,USSIC4IDByCityXTab,THOR);
\\n\\nThe code that called the function containing the AGGREGATE simply passed in xtabUSSIC4ByCityDS as the argument.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-05-14 13:44:49\" },\n\t{ \"post_id\": 1603, \"topic_id\": 355, \"forum_id\": 8, \"post_subject\": \"Re: Segfault aggregating records of variable length\", \"username\": \"bforeman\", \"post_text\": \"Hi Dan,\\n\\nWhat does your DATASET statement look like that references the USSIC4IDByCityXTab RECORD?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-05-14 13:39:50\" },\n\t{ \"post_id\": 1593, \"topic_id\": 355, \"forum_id\": 8, \"post_subject\": \"Segfault aggregating records of variable length\", \"username\": \"DSC\", \"post_text\": \"I spent too much time over the weekend debugging a strange problem with a simple AGGREGATE call. I discovered the workaround, but I thought I'd publish what I found to see where the problem actually resides (me or the system, basically). All of this is in version 3.6.2CE.\\n\\nConsider these three layouts:\\n\\nUSSIC4IDByCityXTab := RECORD\\n\\t// Group by fields\\n\\tUNSIGNED2\\t\\tcountryID;\\n\\tSTRING\\t\\t\\tstate;\\n\\tSTRING\\t\\t\\tcity;\\n\\tSTRING\\t\\t\\tusSIC4;\\n\\t// Aggregation fields\\n\\tUNSIGNED8\\t\\tcompanyCount;\\n\\tREAL8\\t\\t\\ttotalRevenue;\\n\\tUNSIGNED8\\t\\ttotalEmployees;\\n\\t// Appended fields\\n\\tSTRING\\t\\t\\tcountryName;\\n\\tSTRING\\t\\t\\tindustryName;\\n\\tSTRING\\t\\t\\tstateUpper;\\n\\tSTRING\\t\\t\\tcityUpper;\\nEND;\\n\\nIndustriesLayout := RECORD\\n\\tSTRING\\t\\t\\tusSIC4;\\n\\tSTRING\\t\\t\\tindustryName;\\n\\tUNSIGNED8\\t\\tcompanyCount;\\n\\tREAL8\\t\\t\\ttotalRevenue;\\nEND;\\n\\nTopIndustriesInCityByTotalRevenueLayout := RECORD\\n\\tUNSIGNED2\\t\\t\\t\\t\\tcountryID;\\n\\tSTRING\\t\\t\\t\\t\\t\\tcountryName;\\n\\tSTRING\\t\\t\\t\\t\\t\\tstate;\\n\\tSTRING\\t\\t\\t\\t\\t\\tcity;\\n\\tUNSIGNED8\\t\\t\\t\\t\\tcompanyCount;\\n\\tREAL8\\t\\t\\t\\t\\t\\ttotalRevenue;\\n\\tDATASET(IndustriesLayout)\\tindustries;\\nEND;
\\n\\nThe first one defines the layout of a dataset created via TABLE and some lookup-style JOINs. That dataset is created without a problem.\\n\\nThe last record will be the output of an AGGREGATE. It uses the second record layout, but I don't think that's relevant here (but it may be).\\n\\nThis function:\\n\\nCreateTopIndustriesInCityByTotalRevenue(DATASET(USSIC4IDByCityXTab) rs) := FUNCTION\\n\\tTopIndustriesInCityByTotalRevenueLayout MainCityAggregateXForm(rs l, TopIndustriesInCityByTotalRevenueLayout r) := TRANSFORM\\n\\t\\tSELF.companyCount := l.companyCount + r.companyCount;\\n\\t\\tSELF.totalRevenue := l.totalRevenue + r.totalRevenue;\\n\\t\\tSELF.industries := r.industries + PROJECT(DATASET(l),TRANSFORM(IndustriesLayout,SELF:=LEFT));\\n\\t\\tSELF := l;\\n\\tEND;\\n\\t\\n\\tTopIndustriesInCityByTotalRevenueLayout MergeCityAggregateXForm(TopIndustriesInCityByTotalRevenueLayout r1, TopIndustriesInCityByTotalRevenueLayout r2) := TRANSFORM\\n\\t\\tSELF.companyCount := r1.companyCount + r2.companyCount;\\n\\t\\tSELF.totalRevenue := r1.totalRevenue + r2.totalRevenue;\\n\\t\\tSELF.industries := r1.industries + r2.industries;\\n\\t\\tSELF := r1;\\n\\tEND;\\n\\t\\n\\tcityLevelData := AGGREGATE\\t(\\n\\t\\t\\t\\t\\t\\t\\t\\t\\trs,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tTopIndustriesInCityByTotalRevenueLayout,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tMainCityAggregateXForm(LEFT,RIGHT),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tMergeCityAggregateXForm(RIGHT1,RIGHT2),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tLEFT.countryID,LEFT.stateUpper,LEFT.cityUpper\\n\\t\\t\\t\\t\\t\\t\\t\\t);\\n\\t\\n\\tRETURN cityLevelData;\\nEND;
\\n\\nIs a test for aggregating the original dataset into the new one. Unfortunately, when I run this on my 3-way Thor, at least one node will always crash. A snippet from the thor log is:\\n\\n0000001E 2012-05-07 07:00:12 29384 29482 CONNECTING (id=2, idx=0) to (id=3, idx=0) - activity(sort, 3)\\n0000001F 2012-05-07 07:00:12 29384 29482 MSortSlaveActivity::createMSortSlave - activity(sort, 3) [ecl=SORT(countryid, stateupper, cityupper);]\\n00000020 2012-05-07 07:00:12 29384 29482 CONNECTING (id=3, idx=0) to (id=4, idx=0) - activity(group, 4)\\n00000021 2012-05-07 07:00:12 29384 29482 CONNECTING (id=4, idx=0) to (id=5, idx=0) - activity(aggregate, 5)\\n00000022 2012-05-07 07:00:12 29384 29482 CONNECTING (id=5, idx=0) to (id=6, idx=0) - activity(firstn, 6)\\n00000023 2012-05-07 07:00:12 29384 29482 CONNECTING (id=6, idx=0) to (id=7, idx=0) - activity(workunitwrite, 7)\\n00000024 2012-05-07 07:00:12 29384 29482 deserializeMPTag: tag = 65545\\n00000025 2012-05-07 07:00:12 29384 29482 deserializeMPTag: tag = 65544\\n00000026 2012-05-07 07:00:12 29384 29482 deserializeMPTag: tag = 65542\\n00000027 2012-05-07 07:00:12 29384 29482 deserializeMPTag: tag = 65543\\n00000028 2012-05-07 07:00:12 29384 29482 MSortSlaveActivity::init portbase = 20103, mpTagRPC = 65542 - activity(sort, 3)\\n00000029 2012-05-07 07:00:12 29384 29484 Creating SortSlaveServer on tag 65542 MP - activity(sort, 3)\\n0000002A 2012-05-07 07:00:12 29384 29482 Watchdog: Start Job 1\\n0000002B 2012-05-07 07:00:12 29384 29483 Starting input - activity(workunitwrite, 7)\\n0000002C 2012-05-07 07:00:12 29384 29485 Starting input - activity(firstn, 6)\\n0000002D 2012-05-07 07:00:12 29384 29485 Starting input - activity(aggregate, 5)\\n0000002E 2012-05-07 07:00:12 29384 29485 GROUP: is local - activity(group, 4)\\n0000002F 2012-05-07 07:00:12 29384 29485 Starting input - activity(group, 4)\\n00000030 2012-05-07 07:00:12 29384 29485 Starting input - activity(sort, 3)\\n00000031 2012-05-07 07:00:12 29384 29485 diskread[part=0]: reading physical file '/var/lib/HPCCSystems/hpcc-data/thor/timeline/xtab_data/revenue/us_sic4_by_city_xtab._1_of_3' (logical file = ~timeline::xtab_data::revenue::us_sic4_by_city_xtab) - activity(diskread, 2)\\n00000032 2012-05-07 07:00:12 29384 29485 diskread[part=0]: Base offset to 0 - activity(diskread, 2)\\n00000033 2012-05-07 07:00:12 29384 29485 Reading block compressed file: /var/lib/HPCCSystems/hpcc-data/thor/timeline/xtab_data/revenue/us_sic4_by_city_xtab._1_of_3 - activity(diskread, 2)\\n00000034 2012-05-07 07:00:12 29384 29485 diskread[part=0]: variable (/var/lib/HPCCSystems/hpcc-data/thor/timeline/xtab_data/revenue/us_sic4_by_city_xtab._1_of_3) - activity(diskread, 2)\\n00000035 2012-05-07 07:00:12 29384 29485 ITDL starting for output 0 - activity(diskread, 2)\\n00000036 2012-05-07 07:00:12 29384 29485 ITDL starting for output 0 - activity(sort, 3)\\n00000037 2012-05-07 07:00:12 29384 29485 Gather in - activity(sort, 3)\\n00000038 2012-05-07 07:00:12 29384 29484 Connected to slave 0 of 3 - activity(sort, 3)\\n00000039 2012-05-07 07:00:12 29384 29486 CSortTransferServerThread started port 20104\\n0000003A 2012-05-07 07:00:12 29384 29484 Start Gather - activity(sort, 3)\\n0000003B 2012-05-07 07:00:12 29384 29485 SORT: Gather - activity(sort, 3)\\n0000003C 2012-05-07 07:00:12 29384 29485 Record size (max) = 4096 - activity(diskread, 2)\\n0000003D 2012-05-07 07:00:40 29384 29485 Local run sort(s) done - activity(sort, 3)\\n0000003E 2012-05-07 07:00:40 29384 29485 Sort done, rows sorted = 4927794, bytes sorted = 668237112 overflowed to disk 0 times - activity(sort, 3)\\n0000003F 2012-05-07 07:00:40 29384 29485 Gather finished - activity(sort, 3)\\n00000040 2012-05-07 07:00:40 29384 29485 Stopping input for - activity(sort, 3)\\n00000041 2012-05-07 07:00:40 29384 29485 ITDL output 0 stopped, count was 4927794 - activity(diskread, 2)\\n00000042 2012-05-07 07:00:40 29384 29485 SORT waiting barrier.1 - activity(sort, 3)\\n00000043 2012-05-07 07:00:40 29384 29485 SORT barrier.1 raised - activity(sort, 3)\\n00000044 2012-05-07 07:00:40 29384 29485 SORT Merge Waiting - activity(sort, 3)\\n00000045 2012-05-07 07:00:40 29384 29484 Min =(117): , 1, 0, 0, 0, 0, 0, c, 0, 0, 0,"15300 Azazga" 4, 0, 0, 0,"7539" 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,32, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0,"Algeria"1c, 0, 0, 0,"Automotive repair shops, nec" 0, 0, 0, 0, c, 0, 0, 0,"15300 AZAZGA"\\n00000046 2012-05-07 07:00:40 29384 29484 Max =(112): , 0, 1, 0, 0, 0, 0, d, 0, 0, 0,"Phoenix 85016" 4, 0, 0, 0,"7011" 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, b, 0, 0, 0,"Wake Island"11, 0, 0, 0,"Hotels and motels" 0, 0, 0, 0, d, 0, 0, 0,"PHOENIX 85016"\\n00000047 2012-05-07 07:00:40 29384 29484 Ave Rec Size = 135\\n00000048 2012-05-07 07:00:40 29384 29484 Output start = 0, num = 4875515\\n00000049 2012-05-07 07:00:40 29384 29484 SORT Merge READ: Stream(0) local, pos=0 len=1628988\\n0000004A 2012-05-07 07:00:40 29384 29484 SORT Merge READ: Stream(1) 10.210.150.81:20103, pos=0 len=1600612\\n0000004B 2012-05-07 07:00:40 29384 29484 SORT Merge READ: Stream(1) connected to 10.210.150.81:20103\\n0000004C 2012-05-07 07:00:40 29384 29484 SORT Merge READ: Stream(2) 10.210.150.78:20103, pos=0 len=1645915\\n0000004D 2012-05-07 07:00:40 29384 29484 SORT Merge READ: Stream(2) connected to 10.210.150.78:20103\\n0000004E 2012-05-07 07:00:40 29384 29562 SORT Merge WRITE: start 10.210.150.81:60914, pos=1628988, len=1643934\\n0000004F 2012-05-07 07:00:40 29384 29562 SORT Merge WRITE: start 10.210.150.78:55968, pos=3272922, len=1654872\\n00000050 2012-05-07 07:00:40 29384 29484 Global Merger Created: 3 streams - activity(sort, 3)\\n00000051 2012-05-07 07:00:40 29384 29485 SORT Merge Start - activity(sort, 3)\\n00000052 2012-05-07 07:00:40 29384 29485 ITDL starting for output 0 - activity(group, 4)\\n00000053 2012-05-07 07:00:40 29384 29485 Record size (max) = 4096 - activity(sort, 3)\\n00000054 2012-05-07 07:00:40 29384 29485 ITDL starting for output 0 - activity(aggregate, 5)\\n00000055 2012-05-07 07:00:40 29384 29485 ITDL starting for output 0 - activity(firstn, 6)\\n00000056 2012-05-07 07:00:40 29384 29485 Starting input - activity(firstn, 6)\\n00000057 2012-05-07 07:00:40 29384 29563 Record size (max) = 4096 - activity(group, 4)\\n00000058 2012-05-07 07:00:40 29384 29563 Record size (max) = 4096 - activity(aggregate, 5)\\n00000059 2012-05-07 07:00:40 29384 29483 WORKUNITWRITE: processing first block - activity(workunitwrite, 7)\\n0000005A 2012-05-07 07:00:40 29384 29485 FIRSTN: Record limit is 100 0 - activity(firstn, 6)\\n0000005B 2012-05-07 07:00:40 29384 29485 Record size (max) = 4096 - activity(firstn, 6)\\n0000005C 2012-05-07 07:00:40 29384 29563 ================================================\\n0000005D 2012-05-07 07:00:40 29384 29563 Signal: 11 Segmentation fault\\n0000005E 2012-05-07 07:00:40 29384 29563 Fault IP: 00002AAAAAAE5D81\\n0000005F 2012-05-07 07:00:40 29384 29563 Accessing: 00002AAAF5A819B2\\n00000060 2012-05-07 07:00:40 29384 29563 Registers:\\n00000061 2012-05-07 07:00:40 29384 29563 EAX:00002AAAF5A819B2 EBX:00002AAAAC0015C0 ECX:00002AAADBE61DB8 EDX:00002AAADBE619AC ESI:0000000048B22FD0 EDI:0000000048B22FD0\\n00000062 2012-05-07 07:00:40 29384 29563 CS:EIP:0033:00002AAAAAAE5D81\\n00000063 2012-05-07 07:00:40 29384 29563 ESP:0000000048B22F90 EBP:0000000048B22FB0\\n00000064 2012-05-07 07:00:40 29384 29563 Stack[0000000048B22F90]: 0000000048B22FD0 AC00130000000000 00002AAAAC001300 19C2000400002AAA 0000000019C20004 1877DF1000000000 000000001877DF10 1877898000000000\\n00000065 2012-05-07 07:00:40 29384 29563 Stack[0000000048B22FB0]: 0000000018778980 34760DF200000000 00002B9E34760DF2 48B22FD000002B9E 0000000048B22FD0 24A870F300000000 00781EBD24A870F3 3332B61000781EBD\\n00000066 2012-05-07 07:00:40 29384 29563 Stack[0000000048B22FD0]: 00002B9E3332B610 DBE619AC00002B9E 00002AAADBE619AC 3332B65800002AAA 00002B9E3332B658 0000000100002B9E 0000000000000001 1877898000000000\\n00000067 2012-05-07 07:00:40 29384 29563 Stack[0000000048B22FF0]: 0000000018778980 0000040000000000 00002B9E00000400 0000000000002B9E 0000000000000000 AC0015C000000000 00002AAAAC0015C0 1877DFF000002AAA\\n00000068 2012-05-07 07:00:40 29384 29563 Stack[0000000048B23010]: 000000001877DFF0 1877DF1000000000 000000001877DF10 0000000700000000 0000000000000007 48B2400000000000 0000000048B24000 0000100000000000\\n00000069 2012-05-07 07:00:40 29384 29563 Stack[0000000048B23030]: 0000000000001000 347610CB00000000 00002B9E347610CB AC00173800002B9E 00002AAAAC001738 1877DFF000002AAA 000000001877DFF0 1877DF1000000000\\n0000006A 2012-05-07 07:00:40 29384 29563 Stack[0000000048B23050]: 000000001877DF10 347F2F6F00000000 00002B9E347F2F6F 0000000000002B9E 0000000000000000 0000000000000000 0000000000000000 1877E2C800000000\\n0000006B 2012-05-07 07:00:40 29384 29563 Stack[0000000048B23070]: 000000001877E2C8 48B2305000000000 0000000048B23050 0000000000000000 0000000000000000 1877DFF000000000 000000001877DFF0 1877DFF000000000\\n0000006C 2012-05-07 07:00:40 29384 29563 Backtrace:\\n0000006D 2012-05-07 07:00:40 29384 29563 /opt/HPCCSystems/lib/libjlib.so(_Z16PrintStackReportv+0x26) [0x2b9e31ec5b96]\\n0000006E 2012-05-07 07:00:40 29384 29563 /opt/HPCCSystems/lib/libjlib.so(_Z13excsighandleriP7siginfoPv+0x295) [0x2b9e31ec6be5]\\n0000006F 2012-05-07 07:00:40 29384 29563 /lib64/libpthread.so.0 [0x3f8660eb70]\\n00000070 2012-05-07 07:00:40 29384 29563 /var/lib/HPCCSystems/queries/mythor/V2164797438_libW20120507-070715.so [0x2aaaaaae5d81]\\n00000071 2012-05-07 07:00:40 29384 29563 /opt/HPCCSystems/lib/libactivityslaves_lcr.so(_ZN27GroupAggregateSlaveActivity14nextRowNoCatchEv+0x132) [0x2b9e34760df2]\\n00000072 2012-05-07 07:00:40 29384 29563 /opt/HPCCSystems/lib/libactivityslaves_lcr.so(_ZN27GroupAggregateSlaveActivity7nextRowEv+0x1b) [0x2b9e347610cb]\\n00000073 2012-05-07 07:00:40 29384 29563 /opt/HPCCSystems/lib/libactivityslaves_lcr.so(_ZN18ThorLookaheadCache3runEv+0x12f) [0x2b9e347f2f6f]\\n00000074 2012-05-07 07:00:40 29384 29563 /opt/HPCCSystems/lib/libjlib.so(_ZN6Thread5beginEv+0x37) [0x2b9e31f4f877]\\n00000075 2012-05-07 07:00:40 29384 29563 /opt/HPCCSystems/lib/libjlib.so(_ZN6Thread11_threadmainEPv+0x1f) [0x2b9e31f503ef]\\n00000076 2012-05-07 07:00:40 29384 29563 /lib64/libpthread.so.0 [0x3f8660673d]\\n00000077 2012-05-07 07:00:40 29384 29563 /lib64/libc.so.6(clone+0x6d) [0x3f85ed44bd]\\n00000078 2012-05-07 07:00:40 29384 29563 ThreadList:\\n40A6E940 1084680512 29387: CMPNotifyClosedThread\\n42719940 1114741056 29388: MP Connection Thread\\n4311A940 1125230912 29389: CSocketSelectThread\\n43B1B940 1135720768 29391: CBackupHandler\\n4451C940 1146210624 29392: CGraphProgressHandler\\n44F1D940 1156700480 29481: CMemoryUsageReporter\\n41584940 1096304960 29482: CGraphExecutor pool\\n4591E940 1167190336 29483: ProcessSlaveActivity\\n4631F940 1177680192 29484: CSortSlaveThread\\n46D20940 1188170048 29485: ThorLookaheadCache\\n47721940 1198659904 29486: SortTransferServer\\n48122940 1209149760 29562: CSocketSelectThread\\n48B23940 1219639616 29563: ThorLookaheadCache
\\n\\nThe original dataset is 14M records or so. If I alter the recordset argument to AGGREGATE to choose fewer records (e.g. CHOOSEN(rs,1000)) I can make different nodes crash, but they still crash. With absurdly low record numbers, like 100, it succeeds.\\n\\nThe workaround I have in place right now is to define the length of all the STRING fields in the record layouts:\\n\\nUSSIC4IDByCityXTab := RECORD\\n\\t// Group by fields\\n\\tUNSIGNED2\\t\\tcountryID;\\n\\tSTRING70\\t\\tstate;\\n\\tSTRING50\\t\\tcity;\\n\\tSTRING4\\t\\t\\tusSIC4;\\n\\t// Aggregation fields\\n\\tUNSIGNED8\\t\\tcompanyCount;\\n\\tREAL8\\t\\t\\ttotalRevenue;\\n\\tUNSIGNED8\\t\\ttotalEmployees;\\n\\t// Appended fields\\n\\tSTRING70\\t\\tcountryName;\\n\\tSTRING180\\t\\tindustryName;\\n\\tSTRING70\\t\\tstateUpper;\\n\\tSTRING50\\t\\tcityUpper;\\nEND;\\n\\nIndustriesLayout := RECORD\\n\\tSTRING4\\t\\t\\tusSIC4;\\n\\tSTRING180\\t\\tindustryName;\\n\\tUNSIGNED8\\t\\tcompanyCount;\\n\\tREAL8\\t\\t\\ttotalRevenue;\\nEND;\\n\\nTopIndustriesInCityByTotalRevenueLayout := RECORD\\n\\tUNSIGNED2\\t\\t\\t\\t\\tcountryID;\\n\\tSTRING70\\t\\t\\t\\t\\tcountryName;\\n\\tSTRING70\\t\\t\\t\\t\\tstate;\\n\\tSTRING50\\t\\t\\t\\t\\tcity;\\n\\tUNSIGNED8\\t\\t\\t\\t\\tcompanyCount;\\n\\tREAL8\\t\\t\\t\\t\\t\\ttotalRevenue;\\n\\tDATASET(IndustriesLayout)\\tindustries;\\nEND;
\\n\\nThat may be overkill, but it works. The full 14M record dataset can be processed without a problem.\\n\\nAnyone have any thoughts on this?\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-05-07 15:18:33\" },\n\t{ \"post_id\": 1597, \"topic_id\": 356, \"forum_id\": 8, \"post_subject\": \"Re: Rollup gives error when runs in thor\", \"username\": \"rtaylor\", \"post_text\": \"Shriram,\\n\\nWe have a utility to download Facebook posts in XML. As of now it create a file for each page, we are looping thru this and downloading previous pages. Although there are multiple XMLs, these should be in a single DataSet.
OK, I understand all that. What I don't get is why you're using ROLLUP to put all the XML files into a single record in the dataset.\\n\\nThe approach I would use would be to spray the files so that each XML file becomes a separate record in the dataset. Then I would use PARSE on each record (each XML file) to extract the data I'm looking for. Once that was done, only then would I consider doing a ROLLUP on any data that needs to be aggregated.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-05-10 13:24:57\" },\n\t{ \"post_id\": 1596, \"topic_id\": 356, \"forum_id\": 8, \"post_subject\": \"Re: Rollup gives error when runs in thor\", \"username\": \"shriram.soni\", \"post_text\": \"Hello Richard, \\nUse of DFUPlus with format as XML seems good I idea we will try our code using it.\\nWe have a utility to download Facebook posts in XML. As of now it create a file for each page, we are looping thru this and downloading previous pages. Although there are multiple XMLs, these should be in a single DataSet.\\n\\nThe MP Link issue we have faced in other scenario also. Using NaiveBias we created the model and saved into file. In another code we are reading the model from file and applying on multiple datasets in an Transform. In transform we are loading Model. It seems here also multiple nodes are trying to access Model and causing MP Link error. \\nWe will take-out model loading code from transform and test our code.\", \"post_time\": \"2012-05-10 05:38:41\" },\n\t{ \"post_id\": 1595, \"topic_id\": 356, \"forum_id\": 8, \"post_subject\": \"Re: Rollup gives error when runs in thor\", \"username\": \"rtaylor\", \"post_text\": \"Apurv,\\n\\nOK, first off, you can simply use DFUPLUS.exe (documented in HPCCClientTools.PDF) to spray multiple XML files into a single logical file on your Thor cluster (using wildcards in the filename). Just doing that should likely eliminate the link closed error, because you will no longer have multiple slave nodes all trying to talk to the same landing zone box to get at the files remotely.\\n\\nSecond, I'm curious as to what you are trying to accomplish by putting all these XML files into a single record. What will that give you that you won't be able to do with each file as a separate record?\\n\\nRichard\", \"post_time\": \"2012-05-08 15:02:33\" },\n\t{ \"post_id\": 1594, \"topic_id\": 356, \"forum_id\": 8, \"post_subject\": \"Rollup gives error when runs in thor\", \"username\": \"Apurv.Khare\", \"post_text\": \"Hi,\\nI'm trying to read multiple Xml files through RemoteDirectory and then storing them in Dataset.\\nThen i want to roll them up so that i can have all xml files in one row of RecordSet.\\n\\nI'm trying this code:\\n\\nIMPORT mylib;\\nimport std, std.File, std.Str;\\n\\nSTRING landing_zone := '172.20.15.168';\\nSTRING directory := '/home/hpcc/lz_data/web_analytics/facebook/';\\nfilelist := std.File.RemoteDirectory(landing_zone, directory, '*.xml', FALSE);\\nfilelist;\\n\\nwallComments := RECORD\\n STRING cmtFromName{XPATH('from/name')};\\n STRING cmtMsg{XPATH('message')};\\n STRING createdTime{XPATH('created_time')}; \\n DATASET( mylib.tagTXT.tagRec) tagCmtMsg ;\\nEND;\\n\\nwallEntry := RECORD\\n STRING fromName {XPATH('from/name')};\\n STRING toName {XPATH('to/data/name')};\\n STRING strMsg {XPATH('message')};\\n STRING strType {XPATH('type')};\\n DATASET(wallComments) Comments {XPATH('comments/data')};\\n DATASET(mylib.tagTXT.tagRec) tagWallMsg ;\\nEND;\\n\\n \\n xmlDataRec := record\\n dataset(wallEntry) xmldata ;\\n end;\\n\\ndictInfoData := DATASET('~file::172.20.15.168::home::hpcc::lz_data::web_analytics::emotiondictionary.csv',\\n mylib.tagTXT.dataDictRec, CSV(HEADING(1), QUOTE('\\\\"'))) : GLOBAL; \\n \\n \\n xmlDataRec xform3 (filelist l) := Transform\\n self.xmldata := DATASET('~file::172.20.15.168::home::hpcc::lz_data::web_analytics::facebook::' + l.name, wallEntry,XML('json/data'));\\n end;\\n \\n combinedFBposts:= project(filelist,xform3(left));\\n combinedFBposts;\\n \\n\\ncombinedFBposts doRollup(combinedFBposts l,combinedFBposts r) := transform\\n\\tself.xmldata := l.xmldata + r.xmldata;\\nend;\\n\\n\\nrolledPostsDS := rollup(combinedFBposts,true,doRollup(left,right));\\nrolledPostsDS;\\n
\\nThe problem is in thor cluster it gives me this error, \\nError: System error: 4: MP link closed (172.20.15.168:6600) (0, 0), 4, \\n\\nbut when i tried it in hthor it worked for me.Can you let me know is that the expected behavior because earlier i used rollup many times never faced such issues.\\n\\nAnd also is to read multiple Xml files is this the right Approach?\", \"post_time\": \"2012-05-08 04:31:07\" },\n\t{ \"post_id\": 1609, \"topic_id\": 358, \"forum_id\": 8, \"post_subject\": \"Re: IMPORT and relative parent directories\", \"username\": \"bforeman\", \"post_text\": \"Sure, the fact that you posted it I'm sure someone from development will see it, but I will go ahead and ping them as well.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-05-14 14:11:36\" },\n\t{ \"post_id\": 1608, \"topic_id\": 358, \"forum_id\": 8, \"post_subject\": \"Re: IMPORT and relative parent directories\", \"username\": \"DSC\", \"post_text\": \"Thanks, Bob. I just wanted to make sure that I wasn't missing something.\\n\\nGiven that, though: It would be really nice if the compiler did support relative upward path resolution. Could this be considered as a low-priority feature request?\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-05-14 14:08:25\" },\n\t{ \"post_id\": 1607, \"topic_id\": 358, \"forum_id\": 8, \"post_subject\": \"Re: IMPORT and relative parent directories\", \"username\": \"bforeman\", \"post_text\": \"Hi Dan,\\n\\nNo I don't believe that relative paths are supported. I know that you probably referenced the LRM on this, and here are the current variations of IMPORT that are supported:\\n\\nIMPORT $; //makes all definitions from the same folder available\\n\\nIMPORT $, Std; //makes the standard library functions available, also\\n\\nIMPORT MyModule; //makes available the definitions from MyModule folder\\n\\nIMPORT SomeFolder.SomeFile; //make the specific file available\\n\\nIMPORT SomeReallyLongFolderName AS SN; //alias the long name as "SN"\\n\\nIMPORT Def1,Def2 FROM Fred; //makes Def1 and Def2 from Fred available, unqualified\\n\\nIMPORT * FROM Fred; //makes everything from Fred available, unqualified\\n\\nIMPORT Dev.Me.Project1; //makes the Dev/Me/Project1 folder available
\\n\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-05-14 14:03:39\" },\n\t{ \"post_id\": 1602, \"topic_id\": 358, \"forum_id\": 8, \"post_subject\": \"IMPORT and relative parent directories\", \"username\": \"DSC\", \"post_text\": \"In C++ you can include a file with something like this:\\n\\n#include "../foo/bar.h"
\\n\\nIn ECL, is it possible to create a relative IMPORT that moves up a directory like in the C++ example? I can get to sibling and child files, but not (for example) child files of parent directories. I've tried a number of syntax variations without positive result.\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-05-14 13:20:53\" },\n\t{ \"post_id\": 1610, \"topic_id\": 359, \"forum_id\": 8, \"post_subject\": \"Re: Normalize\", \"username\": \"bforeman\", \"post_text\": \"Try this:\\n\\nChildRec := RECORD\\n//INTEGER1 NameID;\\nSTRING20 Name := '';\\nSTRING20 Addr;\\nEND;\\n\\nDenormedRec := RECORD\\n//INTEGER1 NameID;\\nSTRING20 Name;\\nDATASET(ChildRec) Children;\\nEND;\\n\\nds := DATASET([ {'Kevin',[ {'','10 Malt Lane'}]},\\n {'Liz', [ {'','10 Malt Lane'}, {'','3 The cottages'}]},\\n {'Mr Nobody', []},\\n {'Anywhere',[ {'','Far'}, {'','Here'}, {'','There'},{'','Near'}]} ],DenormedRec);\\n\\n\\n\\nChildRec NewChildren(ds L,ChildRec R) := TRANSFORM\\n SELF.Name := L.Name;\\n SELF := R;\\nEND;\\nds;\\nNewChilds := NORMALIZE(ds,LEFT.Children,NewChildren(LEFT,RIGHT));\\nNewChilds;
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-05-14 14:50:22\" },\n\t{ \"post_id\": 1606, \"topic_id\": 359, \"forum_id\": 8, \"post_subject\": \"Normalize\", \"username\": \"Bhagwant\", \"post_text\": \"ChildRec := RECORD\\n//INTEGER1 NameID;\\nSTRING20 Name := '';\\nSTRING20 Addr;\\nEND;\\n\\nDenormedRec := RECORD\\n//INTEGER1 NameID;\\nSTRING20 Name;\\nDATASET(ChildRec) Children;\\nEND;\\n\\nds := DATASET([ {'Kevin',[ {'','10 Malt Lane'}]},\\n\\t\\t\\t\\t\\t\\t\\t\\t{'Liz', [ {'','10 Malt Lane'}, {'','3 The cottages'}]},\\n\\t\\t\\t\\t\\t\\t\\t\\t{'Mr Nobody', []},\\n\\t\\t\\t\\t\\t\\t\\t\\t{'Anywhere',[ {'','Far'}, {'','Here'}, {'','There'},{'','Near'}]} ],DenormedRec);\\n\\nChildRec NewChildren(ChildRec R) := TRANSFORM\\n\\tSELF := R;\\nEND;\\nds;\\nNewChilds := NORMALIZE(ds,LEFT.Children,NewChildren(RIGHT));\\nNewChilds;\\n
\\n\\nIn above code I need to copy value of field Name in parent to child while normalizing it. Please suggest how to do this.\", \"post_time\": \"2012-05-14 13:58:41\" },\n\t{ \"post_id\": 3859, \"topic_id\": 365, \"forum_id\": 8, \"post_subject\": \"Re: UTF-8 support\", \"username\": \"rtaylor\", \"post_text\": \"David,Strangely this code picked straight from the language reference does not even compile
I'll check that out. \\n\\nMeantime, you can try using the LOCALE option on a RECORD structure (instead of on the UNICODE datatype), something like this:MyRec := RECORD,LOCALE('DE')\\n UNICODE5 MyUnicodeString := U'abcd353';\\nEND;
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-03-31 13:27:30\" },\n\t{ \"post_id\": 3858, \"topic_id\": 365, \"forum_id\": 8, \"post_subject\": \"Re: UTF-8 support\", \"username\": \"jeeves\", \"post_text\": \"Dan,\\n\\nStrangely this code picked straight from the language reference does not even compile\\nUNICODEde5 MyUnicodeString := U'abcd353';\\n\\nError: Unknown type 'unicodede5' (5, 1), 2324, \\n
\\nI get same result for UNICODEzh too.\\n\\nI should probably use some character encoding validator to check the integrity of my xml file - the "file" utility in linux does not indicate any issues.\\n\\nThanks,\\n-David\", \"post_time\": \"2013-03-31 07:04:57\" },\n\t{ \"post_id\": 3857, \"topic_id\": 365, \"forum_id\": 8, \"post_subject\": \"Re: UTF-8 support\", \"username\": \"DSC\", \"post_text\": \"Have you tried specifying a locale? I'm wondering if using something like 'UNICODEzh' (or 'UNICODEcmn') as a datatype would change that behavior. FWIW, I think your original UTF-8 spray is probably fine. It's the interpretation afterwards that is probably causing the problem.\\n\\nHonestly, I'm just guessing right now. All of my unicode-oriented stuff has just worked out of the box.\\n\\nDan\", \"post_time\": \"2013-03-31 00:09:39\" },\n\t{ \"post_id\": 3856, \"topic_id\": 365, \"forum_id\": 8, \"post_subject\": \"Re: UTF-8 support\", \"username\": \"jeeves\", \"post_text\": \"I tried a few things which did not work.\\n\\n- Specfifying encoding as UTF-8 while spraying using DFUPlus\\n\\n- Converting my UTF-8 xml to UTF-16 using iconv and then spraying with encoding set to UTF-16. In this case I could not even read the sprayed data using ECL's UNICODE string - it resulted in a strange error during runtime. After switching to the STRING type I was able to get the data into a dataset but the data was garbled.\", \"post_time\": \"2013-03-30 19:44:30\" },\n\t{ \"post_id\": 3855, \"topic_id\": 365, \"forum_id\": 8, \"post_subject\": \"Re: UTF-8 support\", \"username\": \"jeeves\", \"post_text\": \"Dan,\\n\\nThanks for the reply. Will let you know how it goes.\\n\\nThanks,\\n-David\", \"post_time\": \"2013-03-29 20:26:05\" },\n\t{ \"post_id\": 3854, \"topic_id\": 365, \"forum_id\": 8, \"post_subject\": \"Re: UTF-8 support\", \"username\": \"DSC\", \"post_text\": \"Hi David,\\n\\nMy understanding is that spraying should always match your source format (UTF-8 in your case), and that a destination data type of UNICODE will accept UTF-8 without trouble from sprays. That's talking about working through RECORD definitions; I'm not sure how PARSE() would play into it. Storage of UNICODE in a logical file is UTF-16, though. I'm not sure if internally (RAM) the data is stored in UTF-8 or UTF-16 but I suspect the latter.\\n\\nI don't have any direct experience with Chinese data. It may be that something there is throwing the parsers for a loop.\\n\\nI wish I had more to offer.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-03-29 20:21:40\" },\n\t{ \"post_id\": 3853, \"topic_id\": 365, \"forum_id\": 8, \"post_subject\": \"Re: UTF-8 support\", \"username\": \"jeeves\", \"post_text\": \"Dan,\\n\\nI tried that and it appeared to mess everything up. The strings looked garbled when displayed in ECL IDE and string operations(STD.Uni.find for example) went hay wire. Perhaps I should try that again and double check.\\n\\nA related point.. Should I choose encoding as UTF-16 while spraying using dfuplus? The xml encoding is however UTF-8\\n\\nThanks,\\nDavid\", \"post_time\": \"2013-03-29 20:13:06\" },\n\t{ \"post_id\": 3852, \"topic_id\": 365, \"forum_id\": 8, \"post_subject\": \"Re: UTF-8 support\", \"username\": \"DSC\", \"post_text\": \"Can you use the UNICODE datatype instead?\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-03-29 19:04:04\" },\n\t{ \"post_id\": 3851, \"topic_id\": 365, \"forum_id\": 8, \"post_subject\": \"Re: UTF-8 support\", \"username\": \"jeeves\", \"post_text\": \"We have a UTF-8 xml with some chinese characters and the parse in ECL fails like this\\n\\nError: System error: 0: Graph[1], xmlparse[4]: SLAVE 10.253.58.81:20100: XMLParse actId(4) INTERNAL ERROR 2: Error - syntax error "Mismatched opening and closing tags"...
\\n\\nI am reading the xml content from the dataset into a ECL "String".\\nEXPORT r_RAW := RECORD\\n STRING filename;\\n STRING textXml;\\nEND;
\\n"textXML" has a well formed UTF-8 xml. The parse fails only when there are chinese characters. Is there some way to fix this?\", \"post_time\": \"2013-03-29 18:15:35\" },\n\t{ \"post_id\": 1654, \"topic_id\": 365, \"forum_id\": 8, \"post_subject\": \"Re: UTF-8 support\", \"username\": \"DSC\", \"post_text\": \"Would it be possible to save the incoming XML (UTF-8) document as a STRING type, then cast it to UNICODE when I need to parse it? That would be an obvious space/time trade-off, but if we're talking about halving the required disk space, it may be worth it.\\n\\nBTW, there is a list of type casting rules in the language reference guide that, strangely, does not explicitly name STRING -> UNICODE casting. UNICODE -> STRING is present, though.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-05-25 11:30:12\" },\n\t{ \"post_id\": 1653, \"topic_id\": 365, \"forum_id\": 8, \"post_subject\": \"Re: UTF-8 support\", \"username\": \"DSC\", \"post_text\": \"The incoming data is XML, in UTF-8 encoding. Each file contains multiple documents, each with its own (identical) top-level tag. I found that using the NOROOT option for the DATASET structure works well with this.\\n\\n(Side note, for Interested Readers: An outside process was compiling this information and it was including the <xml> header tag for each. That header is not part of the structure; it precedes it, for reach record. I wound up removing the <xml> tag from the feed files prior to spraying them into the cluster using the 'sed' command. Otherwise, I wound up with troubles extracting data later on, such as with a PARSE command.)\\n\\nMy test file is about 2.4G, containing about 80K records. A use-case for my testing involves returning the entire XML document after querying for a single value, so I'm extracting that value and saving the two fields (value and XML as UNICODE) into a separate DATASET. Because of the lack of UTF-8 support, the new dataset is over 4.8GB.\\n\\nThat's a lot for 80K records, especially when I'm facing around 250M records in the final implementation. Hence my intense interest in UTF-8 support versus UTF-16. For the record, all this works fine, it just takes up a lot of space. (Though I will be opening a new topic on Roxie, super files, lack of super key support due to payload size limitations and retrieving those potentially-large XML strings.)\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-05-24 20:23:47\" },\n\t{ \"post_id\": 1640, \"topic_id\": 365, \"forum_id\": 8, \"post_subject\": \"Re: UTF-8 support\", \"username\": \"ghalliday\", \"post_text\": \"What format is the incoming data?\\n\\nThere are options on csv read to indicate the input is utf8 (and xml is assumed to be utf-8). If you have UNICODE fields in your datasets they will be convert as the data is read.\", \"post_time\": \"2012-05-23 15:31:58\" },\n\t{ \"post_id\": 1639, \"topic_id\": 365, \"forum_id\": 8, \"post_subject\": \"Re: UTF-8 support\", \"username\": \"DSC\", \"post_text\": \"Great! That's exactly the clarification I need. I don't really like it, as I'm looking at terabytes of incoming UTF-8 data that needs full support for non-ISO-8859-1 (latin1) characters, but knowing is better than not knowing. I guess I'm faced with converting it all to UTF-16 (ugh) to ensure compatibility.\\n\\nCan you disclose when full UTF-8 support will be fully supported, if ever?\\n\\nCheers!\\n\\nDan\", \"post_time\": \"2012-05-23 15:16:44\" },\n\t{ \"post_id\": 1638, \"topic_id\": 365, \"forum_id\": 8, \"post_subject\": \"Re: UTF-8 support\", \"username\": \"ghalliday\", \"post_text\": \"Can you assign 'Heizölrückstoßabdämpfung' to a STRING?\\n\\nIt will work as long as all of the UNICODE characters are included in the iso-8859-1 code page. Since that covers German, and most European languages, then it will almost certainly work. If it was a Chinese name then it wouldn't.\\n\\n(See http://en.wikipedia.org/wiki/ISO/IEC_8859-1 for details of letters included)\", \"post_time\": \"2012-05-23 15:12:44\" },\n\t{ \"post_id\": 1637, \"topic_id\": 365, \"forum_id\": 8, \"post_subject\": \"Re: UTF-8 support\", \"username\": \"ghalliday\", \"post_text\": \"Query time\\n\\nAt query time you are dealing with structured rows which can contain some fields/columns with different types. We support the following:\\n\\n<n> is always the number of logical characters, not necessarily the length.\\n\\nSTRING<n> - n 8-bit character string using ASCII or EBCDIC encoding. (ASCII is actually iso-8859-1 which also defines the values for characters > 128)\\n\\nDATA<n> - n byte 8-bit characters with no assumed encoding.\\nUNICODE<n> - n 16-bit characters using utf-16 encoding. \\n\\n(Undocumented and only partially implemented:\\nUTF8 - utf8 encoded string)\\n\\nSay you have the following definitions:\\n\\nx1 := 'ààààà'\\nx3 := U'ààààà'\\nx2 := D'ààààà'\\n
\\n\\nThe source code is utf8. 'à' has unicode value 0xe0 and is represented in the source code by the utf-8 sequence C3 A0.\\n\\nThe first definition will convert it to a STRING - E0 E0 E0 E0 E0\\nThe second definition will convert it to utf 16 - 00E0 00E0 00E0 00E0 00E0\\nThe third definition will perform no conversion - C3 A0 C3 A0 C3 A0 C3 A0 C3 A0.\\n\\nIf you convert between string and unicode the system will try and preserve the string that is represented.\\nSo (utf8)'à' will produce the a one character two byte result with value C3 A0.\\n\\nIf a character in the source cannot be represented in the target type it will be replaced with a placeholder. (I can't off-hand remember which character it is).\", \"post_time\": \"2012-05-23 15:09:15\" },\n\t{ \"post_id\": 1636, \"topic_id\": 365, \"forum_id\": 8, \"post_subject\": \"Re: UTF-8 support\", \"username\": \"DSC\", \"post_text\": \"Thanks for the additional information. That helps clarify some of my confusion.\\n\\nTo more concretely demonstrate my remaining questions, let's say we have dataset, not a source code literal, containing this German word, encoded in UTF-8: Heizölrückstoßabdämpfung.\\n\\n1) Can I assign this word to a STRING variable and retrieve it, intact?\\n\\n2) Can I assign this word to a STRING24 variable and retrieve it, intact?\\n\\n3) Should I just store 'Fuel oil recoil absorber' instead?\\n\\nI could probably do a number of trial-by-fire tests to figure some of this stuff out, but I'm hoping to uncover any gotchas I could miss by asking for the official word.\\n\\nThanks again,\\n\\nDan\", \"post_time\": \"2012-05-23 14:45:46\" },\n\t{ \"post_id\": 1635, \"topic_id\": 365, \"forum_id\": 8, \"post_subject\": \"Re: UTF-8 support\", \"username\": \"ghalliday\", \"post_text\": \"There are two separate issues - the source code and the values stored/represented when the query is run. I'll break the reply into two parts.\\n\\nSource Code:\\n\\nIt is really a requirement for a single plain text file to be in a single format. It would be tricky for part of the file to be utf8, and other parts ascii, unless you had some meta information to indicate the different extents. So each source file provided to ECL is assumed to be a single format. There were several options for which inputs were supported:\\n\\ni) ascii. A standard for a long time but you wouldn't be able to directly enter unicode characters into the text file.\\n\\nii) utf8. If you only use characters < 128 then it is compatible with ascii, but does allow unicode to be entered into the source files.\\n\\niii) Some other unicode format. It would be problematic to mandate this because it would prevent most simple editors from being used.\\n\\nNormally ECL source files are encoded as utf-8. This means that for the source code\\nmyString := 'Gavin';\\n
\\neven though the literal 'Gavin' has type string it is represented in the source in utf8 format - because the whole file is utf8.\\n\\nQuite possibly undocumented, but ecl source code files can be in any Unicode format if they are prefixed with the appropriate BOM. It will also guess the format of any that don't.\", \"post_time\": \"2012-05-23 14:03:35\" },\n\t{ \"post_id\": 1633, \"topic_id\": 365, \"forum_id\": 8, \"post_subject\": \"UTF-8 support\", \"username\": \"DSC\", \"post_text\": \"I'm finding the documentation, both the official docs and some of the postings here, regarding Unicode support a little ambiguous.\\n\\nThe language reference manual indicates that STRING types support either ASCII or EBCDIC, and that UNICODE types are UTF-16. However, the manual also states that all ECL code, including string literals, are UTF-8. Implicit or explicit support for UTF-8 is included in XML and CSV processing.\\n\\nWhat is the real story here? It seems that UTF-8 is actually supported by the STRING data type, despite what the documentation says. Is that correct? If it is, how does the STRINGn variations play into that if the value contains multibyte encodings (does n indicate byte count or character count)?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-05-23 13:23:22\" },\n\t{ \"post_id\": 1683, \"topic_id\": 369, \"forum_id\": 8, \"post_subject\": \"Re: Architecture help: Updating large records vs. superkeys\", \"username\": \"rtaylor\", \"post_text\": \"No Problem! \\n\\nLet me know how it all works out in practice,\\n\\nRichard\", \"post_time\": \"2012-05-31 14:27:27\" },\n\t{ \"post_id\": 1681, \"topic_id\": 369, \"forum_id\": 8, \"post_subject\": \"Re: Architecture help: Updating large records vs. superkeys\", \"username\": \"DSC\", \"post_text\": \"Thanks for the clear explanation, Richard!\\n\\nI think I did understand how it all worked. I was just concerned about the performance of managing the set of superkeys during updates. It appears that if I keep things small enough, it will just work itself out.\\n\\nThanks again!\\n\\nDan\", \"post_time\": \"2012-05-31 13:54:20\" },\n\t{ \"post_id\": 1680, \"topic_id\": 369, \"forum_id\": 8, \"post_subject\": \"Re: Architecture help: Updating large records vs. superkeys\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nI understand what you're saying, but I'm still wrestling with the problem of multiple superkeys, each providing a different search handle into the data, and how they are managed in tandem. You probably have already addressed my problem and I'm just too dense to get it without a great deal more coffee.\\n\\nSay you have one superkey that really acts as your data store (it indexes a unique DocID and has a payload of data). Then you have a bunch (>10, whatever) other superkeys that have indexes built for other parts of the data (e.g., geographic locations, industry codes, financial information, etc.) and the payloads contain the DocID so you can get back to the original data. Then you get the typical update stream, with adds, deletes and changes (whole-document replacements). The updates are based on the DocID.\\n\\nMy question is, is there an efficient pattern for managing the updates in those 'other' superkeys, since there isn't a fast way of retrieving affected records because the DocID isn't indexed? Other than file-scanning through them to find the DocIDs that had changes? Or are you saying that the most efficient way of dealing with that is to simply keep the recent superkey updates small enough so that file-scanning is viable? If that's the case, could DISTRIBUTE and LOCAL be used to partition the work more effectively?
OK, the whole point is that you're managing your data in this "batch mode-only" HPCC environment so that it emulates an OLTP-type system (not in real-time, obviously). \\n\\nSo you are correct that for your base data you will have multiple superkeys, each containing just a single INDEX (using superkeys as a data-aliasing mechanism so your queries don't have to be re-compiled every time the data changes). One will be the DocID with whole doc payload, and all the others will just be search term indexes with just the DocID as a payload.\\n\\nSeparately, you will have an identically-structured set of superkeys that will contain all the new and changed records. And, you will have one more superkey that just contains the DocIDs of the records that have been deleted.\\n\\nSo, your query comes in. Based on the actual parameters passed, your code determines which search indexes to use. It then queries those search indexes for the Base data AND the updates, giving you two sets of DocIDs representing your first candidate result set. You can at that point remove any of the DocIDs that exist in your set of deleted DocIDs. Then you get the docs themselves, making sure that you get the doc from the updates for any changed records, and that's your result set to deliver to the customer.\\n\\nMaintaining the superkeys is a periodic process. Start with all the data you have that's current, that's your Base data, and just build all the indexes on that. Then, as data comes in (adds, changes, deletes) you incorporate those records into your other superkeys for Adds/Changes and deletions. Each "update period" you take whatever adds/changes/deletions have come in since your last period and build all the indexes for the query on just those records. Then you take those indexes and add them to the superkeys for Adds/Changes and Deletions and publish the new DFU metadata for all those superkeys. At that point, your query will then be using all the latest data.\\n\\nBased on your volume of data, you decide what period is appropriate to roll all the adds/changes/deletions into a new Base Data set of superkeys and empty out the other superkeys for Adds/Changes and deletions to basically create a new starting point.\\n\\nThe whole point of using superkeys here is so that you're just maintaining DFU metadata to make an update to the actual data hit by a query. All the data maintenance happens on Thor and gets published to Roxie only when it's ready to go. How often you do your publishing is dependent on your volume and how much data latency you can live with. If you can live with a 1-day latency and you have an overnight "dead period", then you could conceivably just get by with Base Data only and rebuild "the world" every night. If you have a 30-minute data latency requirement, then you absolutely would need the base data along with latest data superkeys (these would be added to probably every 15 minutes to achieve your 30-minute requirement) and you would probably want to rebuild "the world" every night (you want to keep the number of sub-files in a given superfile down to about 100).\\n\\nClear as mud, right? \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-05-31 13:38:54\" },\n\t{ \"post_id\": 1672, \"topic_id\": 369, \"forum_id\": 8, \"post_subject\": \"Re: Architecture help: Updating large records vs. superkeys\", \"username\": \"DSC\", \"post_text\": \"[quote="rtaylor":282300sa]Well, I would structure it so that the Base data is easily searchable with any number of search terms. As long as you include the DocID in the result set you're OK. Then your Added and Changed records all go into a separate superkey, searchable in exactly the same ways as the Base data. Again, as long as you include the DocID in the result set you're OK. You're basically going to to do the same query twice -- once against the Base Data superkey, then again against the Adds/Changes superkey.\\n\\nI understand what you're saying, but I'm still wrestling with the problem of multiple superkeys, each providing a different search handle into the data, and how they are managed in tandem. You probably have already addressed my problem and I'm just too dense to get it without a great deal more coffee.\\n\\nSay you have one superkey that really acts as your data store (it indexes a unique DocID and has a payload of data). Then you have a bunch (>10, whatever) other superkeys that have indexes built for other parts of the data (e.g., geographic locations, industry codes, financial information, etc.) and the payloads contain the DocID so you can get back to the original data. Then you get the typical update stream, with adds, deletes and changes (whole-document replacements). The updates are based on the DocID.\\n\\nMy question is, is there an efficient pattern for managing the updates in those 'other' superkeys, since there isn't a fast way of retrieving affected records because the DocID isn't indexed? Other than file-scanning through them to find the DocIDs that had changes? Or are you saying that the most efficient way of dealing with that is to simply keep the recent superkey updates small enough so that file-scanning is viable? If that's the case, could DISTRIBUTE and LOCAL be used to partition the work more effectively?\\n\\nThanks for your insight and assistance!\\n\\nDan\", \"post_time\": \"2012-05-30 14:21:02\" },\n\t{ \"post_id\": 1668, \"topic_id\": 369, \"forum_id\": 8, \"post_subject\": \"Re: Architecture help: Updating large records vs. superkeys\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\n
This technique works well for superkeys that have their DocID as the actual key, but what about superkey updates where the key is something else? We may have several superkeys that index different portions of the same dataset. While those superkeys will have the DocID embedded, the DocID may not be something that is (easily) searchable.
Well, I would structure it so that the Base data is easily searchable with any number of search terms. As long as you include the DocID in the result set you're OK. Then your Added and Changed records all go into a separate superkey, searchable in exactly the same ways as the Base data. Again, as long as you include the DocID in the result set you're OK. You're basically going to to do the same query twice -- once against the Base Data superkey, then again against the Adds/Changes superkey.\\n\\nSo now you have two candidate result sets and all you have to do is merge the two recordsets, ensuring that any record in the final result whose DocID exists in both the base and update result sets comes only from the updates dataset (this takes care of changed records).\\n\\nThen, you take that combined result and filter out all those records whose DocID exists in the set of "Deleted" records (DocID is the only field you actually need in the deleted records file). And that gives you the final result to deliver.\\n\\nIf you keep your "rollup into new base data" period fairly short, the Add/Update and DeletedRecs datasets should never be so large they slow things down appreciably.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-05-29 19:25:50\" },\n\t{ \"post_id\": 1667, \"topic_id\": 369, \"forum_id\": 8, \"post_subject\": \"Re: Architecture help: Updating large records vs. superkeys\", \"username\": \"DSC\", \"post_text\": \"[quote="rtaylor":3etwyllu]Dan,\\n\\nThe updates will be either adds, deletes or whole-document replacements.
To me, that would make it simpler, allowing me to put all adds and changes into a single "updates" dataset, so a query only has to: \\n1) Query the base data, \\n2) Query the "updates" data, \\n3) filter the Base Data result against the SET(SelectedUpdatedRecs,DocID), then \\n4) append the two result sets and filter them against the SET(DeletedRecs,DocID).\\n\\nActually, this brings up another data-architecture-style question.\\n\\nThis technique works well for superkeys that have their DocID as the actual key, but what about superkey updates where the key is something else? We may have several superkeys that index different portions of the same dataset. While those superkeys will have the DocID embedded, the DocID may not be something that is (easily) searchable.\\n\\nSo my question becomes: Is there a 'standard' way of managing sets of superkeys for a dataset that is being updated as described? I realize that file-scanning the indexes is an option; I'm hoping for something a little more performance-friendly.\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-05-29 17:21:49\" },\n\t{ \"post_id\": 1666, \"topic_id\": 369, \"forum_id\": 8, \"post_subject\": \"Re: Architecture help: Updating large records vs. superkeys\", \"username\": \"DSC\", \"post_text\": \"[quote="rtaylor":2j933gzu]Now you're into problems with how the internal stuff does all the "magic" -- that makes it more of a Gavin or Richard question (that's the "other" Richard, not me), they're the "magicians". \\n\\nI'm looking forward to their response. On a related note, you guys need to get a popcorn smiley for this forum.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-05-29 14:33:50\" },\n\t{ \"post_id\": 1665, \"topic_id\": 369, \"forum_id\": 8, \"post_subject\": \"Re: Architecture help: Updating large records vs. superkeys\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nNow you're into problems with how the internal stuff does all the "magic" -- that makes it more of a Gavin or Richard question (that's the "other" Richard, not me), they're the "magicians".
\\n\\nRichard\", \"post_time\": \"2012-05-29 14:15:54\" },\n\t{ \"post_id\": 1664, \"topic_id\": 369, \"forum_id\": 8, \"post_subject\": \"Re: Architecture help: Updating large records vs. superkeys\", \"username\": \"DSC\", \"post_text\": \"Hi Richard,\\n\\nThor is complaining if I use what I believe to be a reasonable chunk size. The error in the logs is:\\n\\n
assert(insize<=keyLen) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.2/CE/Centos-5.7-x86_64/HPCC-Platform/system/jhtree/ctfile.cpp, line 359
\\n\\nI did get it to work with a chunk size of 2000, but 5000 and up generates this error. I was hoping to use at least 10000, if not 20000 in order to reduce the number of records generated.\\n\\nPerhaps a bit more of the error log should be cited here, as I think there is a disconnect between the found record size and the size some other part of the code is expecting (but I'm probably wrong):\\n\\n00000373 2012-05-29 08:28:42 9742 10626 Ave Rec Size = 20040\\n00000374 2012-05-29 08:28:42 9742 10626 Output start = 0, num = 52725\\n00000375 2012-05-29 08:28:42 9742 10626 SORT Merge READ: Stream(0) local, pos=0 len=52538\\n00000376 2012-05-29 08:28:42 9742 10626 SORT Merge READ: Stream(1) 10.210.150.81:20102, pos=0 len=187\\n00000377 2012-05-29 08:28:42 9742 10626 SORT Merge READ: Stream(1) connected to 10.210.150.81:20102\\n00000378 2012-05-29 08:28:42 9742 10626 Global Merger Created: 2 streams - activity(sort, 7)\\n00000379 2012-05-29 08:28:42 9742 10625 SORT Merge Start - activity(sort, 7)\\n0000037A 2012-05-29 08:28:42 9742 10625 INDEXWRITE: process: handling fname : /var/lib/HPCCSystems/hpcc-data/thor/gsrl/idx_file13._1_of_4 - activity(indexwrite, 8)\\n0000037B 2012-05-29 08:28:42 9742 10625 Writing to file: /var/lib/HPCCSystems/hpcc-data/thor/gsrl/thtmp9742_8__partial.tmp - activity(indexwrite, 8)\\n0000037C 2012-05-29 08:28:42 9742 10625 INDEXWRITE: created fixed output stream /var/lib/HPCCSystems/hpcc-data/thor/gsrl/idx_file13._1_of_4 - activity(indexwrite, 8)\\n0000037D 2012-05-29 08:28:42 9742 10625 INDEXWRITE: write - activity(indexwrite, 8)\\n0000037E 2012-05-29 08:28:42 9742 10625 Record size (max) = 4096 - activity(sort, 7)\\n0000037F 2012-05-29 08:28:42 9742 10625 Backtrace:\\n00000380 2012-05-29 08:28:42 9742 10625 /opt/HPCCSystems/lib/libjlib.so(_Z16PrintStackReportv+0x26) [0x2b29bd1bdb96]\\n00000381 2012-05-29 08:28:42 9742 10625 /opt/HPCCSystems/lib/libjlib.so(_Z20RaiseAssertExceptionPKcS0_j+0x26) [0x2b29bd1be756]\\n00000382 2012-05-29 08:28:42 9742 10625 /opt/HPCCSystems/lib/libjhtree.so(_ZN10CWriteNode3addEyPKvjy+0x278) [0x2b29bf07b998]\\n00000383 2012-05-29 08:28:42 9742 10625 /opt/HPCCSystems/lib/libjhtree.so(_ZN11CKeyBuilder14processKeyDataEPKcyj+0x5c) [0x2b29bf09565c]\\n00000384 2012-05-29 08:28:42 9742 10625 /opt/HPCCSystems/lib/libactivityslaves_lcr.so(_ZN23IndexWriteSlaveActivity10processRowEPKv+0x13a) [0x2b29bfa93b1a]\\n00000385 2012-05-29 08:28:42 9742 10625 /opt/HPCCSystems/lib/libactivityslaves_lcr.so(_ZN23IndexWriteSlaveActivity7processEv+0xdbb) [0x2b29bfa94adb]\\n00000386 2012-05-29 08:28:42 9742 10625 /opt/HPCCSystems/lib/libactivityslaves_lcr.so(_ZN20ProcessSlaveActivity4mainEv+0x34) [0x2b29bfa54674]\\n00000387 2012-05-29 08:28:42 9742 10625 /opt/HPCCSystems/lib/libjlib.so(_ZN19CThreadedPersistent4mainEv+0x2d) [0x2b29bd247f1d]\\n00000388 2012-05-29 08:28:42 9742 10625 /opt/HPCCSystems/lib/libjlib.so(_ZN19CThreadedPersistent8CAThread3runEv+0x10) [0x2b29bd24b4e0]\\n00000389 2012-05-29 08:28:42 9742 10625 /opt/HPCCSystems/lib/libjlib.so(_ZN6Thread5beginEv+0x37) [0x2b29bd247877]\\n0000038A 2012-05-29 08:28:42 9742 10625 /opt/HPCCSystems/lib/libjlib.so(_ZN6Thread11_threadmainEPv+0x1f) [0x2b29bd2483ef]\\n0000038B 2012-05-29 08:28:42 9742 10625 /lib64/libpthread.so.0 [0x3f8660673d]\\n0000038C 2012-05-29 08:28:42 9742 10625 /lib64/libc.so.6(clone+0x6d) [0x3f85ed44bd]\\n0000038D 2012-05-29 08:28:42 9742 10625 assert(insize<=keyLen) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.2/CE/Centos-5.7-x86_64/HPCC-Platform/system/jhtree/ctfile.cpp, line 359\\n0000038E 2012-05-29 08:28:42 9742 10625 activity(indexwrite, 8) : Graph[4], indexwrite[8]: assert(insize<=keyLen) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.2/CE/Centos-5.7-x86_64/HPCC-Platform/system/jhtree/ctfile.cpp, line 359
\\n\\nThoughts?\\n\\nDan\", \"post_time\": \"2012-05-29 14:05:28\" },\n\t{ \"post_id\": 1659, \"topic_id\": 369, \"forum_id\": 8, \"post_subject\": \"Re: Architecture help: Updating large records vs. superkeys\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nThe updates will be either adds, deletes or whole-document replacements.
To me, that would make it simpler, allowing me to put all adds and changes into a single "updates" dataset, so a query only has to: \\n1) Query the base data, \\n2) Query the "updates" data, \\n3) filter the Base Data result against the SET(SelectedUpdatedRecs,DocID), then \\n4) append the two result sets and filter them against the SET(DeletedRecs,DocID).\\n\\nThanks for the idea about 'chunking' the data. That makes sense. It also complicates updates, as you can imagine. But, if the chunk size was properly chosen then I could reduce the number of multi-chunk records significantly, making them relatively rare. That's assuming the average size of my records, plus index key and other payload fields, is small enough to fit into a Roxie index record. Food for thought, at any rate.\\n\\nThanks for the insight!
No Problem! Have fun with it! \\n\\nRichard\", \"post_time\": \"2012-05-25 17:41:45\" },\n\t{ \"post_id\": 1658, \"topic_id\": 369, \"forum_id\": 8, \"post_subject\": \"Re: Architecture help: Updating large records vs. superkeys\", \"username\": \"DSC\", \"post_text\": \"The updates will be either adds, deletes or whole-document replacements. I designed a simple system to handle those scenarios and it seems to work, though I admit to not knowing if it scales well. I think it does, but I have yet to send it through an acid bath. I modeled it after how the FAST search engine performs incremental document updates.\\n\\nThanks for the idea about 'chunking' the data. That makes sense. It also complicates updates, as you can imagine. But, if the chunk size was properly chosen then I could reduce the number of multi-chunk records significantly, making them relatively rare. That's assuming the average size of my records, plus index key and other payload fields, is small enough to fit into a Roxie index record. Food for thought, at any rate.\\n\\nThanks for the insight!\\n\\nDan\", \"post_time\": \"2012-05-25 16:09:41\" },\n\t{ \"post_id\": 1657, \"topic_id\": 369, \"forum_id\": 8, \"post_subject\": \"Re: Architecture help: Updating large records vs. superkeys\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nI knew I hit Submit too soon!
\\n\\n
\\nTHIS LINE:\\n EndPos := StartPos + ChunkSize;\\n\\nSHOULD BE:\\n EndPos := StartPos + ChunkSize -1;\\n
Sorry...\\n\\nRichard\", \"post_time\": \"2012-05-25 15:21:13\" },\n\t{ \"post_id\": 1656, \"topic_id\": 369, \"forum_id\": 8, \"post_subject\": \"Re: Architecture help: Updating large records vs. superkeys\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\n* The dataset will be updated very often. Estimated 10M records per day, spread throughout the day. I would imagine that the standard "update small datasets and roll up changes within superfiles" pattern is the way to go here.
Will the updates all be additions to the dataset, or adds, changes, and deletes? Additions-only is the obvious simple case, while changes/deletions require a more complex solution where you would need to query all existing records first for your candidate result set, then bang that result against a changed records dataset to get the updated data, then bang that result against a deletion dataset to filter out any deleted records before returning the result.* Different searches require different return values, and there is one that I'm having trouble with: Given a unique ID, return the corresponding original XML document in its entirety (which can be quite large).\\n\\nUsing superfiles is no problem; I get that part. Indexing the data and delivering it through Roxie, though, seems to be a problem. Here is what I think I know:\\n\\n* I cannot use superkeys because I cannot fit all the data into the payload portion of the index, and I must use the payload in order to use superkeys (FETCH isn't feasible with superkeys and superfiles).
\\nWell, since the raw documents are XML, and XML is just a pile of text, we can just treat it as a blob of text.\\n\\nOK, so how about this for a "squirrelly" idea -- build a payload index of the XML documents broken up into reasonable-sized "chunks" something like this:ChunkSize := 10000; //whatever size makes sense\\nChunkRec := RECORD\\n INTEGER DocID;\\n UNSIGNED1 ChunkID;\\n STRING TextChunk;\\nEND;\\nChunkRec XF(MyData L, INTEGER C) := TRANSFORM\\n SELF.DocID = L.DocID;\\n SELF.ChunkID := C; \\n StartPos := ((C-1)*ChunkSize)+1;\\n EndPos := StartPos + ChunkSize;\\n SELF.TextChunk := L.XmlText[StartPos..EndPos];\\nEND;\\nChunked := NORMALIZE(MyData,LENGTH(LEFT.XmlText)/ChunkSize,XF(LEFT,COUNTER));\\n\\nPayIDX := INDEX(Chunked,{DocID,ChunkID},{Chunked},'filename');\\nBUILD(PayIDX);
Then, when your query needs to return the entire document you can get all the payloads from the index and just concatenate them back together, something like this:\\nGetXMLDocs(SET OF INTEGER ResDocIDs) := FUNCTION \\n ResRecs := SORT(PayIDX(DocID IN ResDocIDs),DocID,ChunkID);\\n UnChunked := ROLLUP(ResRec,\\n LEFT.DocID=RIGHT.DocID,\\n TRANSFORM(ChunkRec,\\n SELF.TextChunk := \\n LEFT.TextChunk + RIGHT.TextChunk,\\n SELF := RIGHT));\\n RETURN PROJECT(UnChunked,\\n TRANSFORM({STRING line},SELF.line := LEFT.TextChunk));\\nEND;
All the rest of your search keys can be standard search indexes with the DocID as a payload field, so you can get back to your original document no matter what your search terms are.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-05-25 15:16:42\" },\n\t{ \"post_id\": 1655, \"topic_id\": 369, \"forum_id\": 8, \"post_subject\": \"Architecture help: Updating large records vs. superkeys\", \"username\": \"DSC\", \"post_text\": \"Here is my scenario:\\n\\n* I have incoming XML data, where each XML document is a record. Each record will likely be fairly small but some are quite large (up to 200K).\\n\\n* The entire dataset will likely be around 250M records.\\n\\n* The dataset will be updated very often. Estimated 10M records per day, spread throughout the day. I would imagine that the standard "update small datasets and roll up changes within superfiles" pattern is the way to go here.\\n\\n* There are interesting data points within each record (meaning, unique IDs for lookups and values that need to be searchable). These will obviously be extracted and put into indexes as keys.\\n\\n* Different searches require different return values, and there is one that I'm having trouble with: Given a unique ID, return the corresponding original XML document in its entirety (which can be quite large).\\n\\nUsing superfiles is no problem; I get that part. Indexing the data and delivering it through Roxie, though, seems to be a problem. Here is what I think I know:\\n\\n* I cannot use superkeys because I cannot fit all the data into the payload portion of the index, and I must use the payload in order to use superkeys (FETCH isn't feasible with superkeys and superfiles).\\n\\n* The only other way to index a superfile is to create a single index built against the entire superfile, which could be a problem given the amount of data I have.\\n\\nThe obvious desire is to quickly update the dataset and reindex only the stuff that's changed, in order to reduce data latency to its lowest possible value.\\n\\nDo I understand the limitations correctly? Is there another way to approach this problem?\\n\\nThanks for any insight!\\n\\nDan\", \"post_time\": \"2012-05-25 12:46:48\" },\n\t{ \"post_id\": 1699, \"topic_id\": 373, \"forum_id\": 8, \"post_subject\": \"Re: Thor crash when writing big index records\", \"username\": \"DSC\", \"post_text\": \"[quote="rtaylor":3g7jg8g2]Yes, but you can ensure they're always both the same this way:CODE: SELECT ALL\\nChunkText_t := STRING10000; // Size must match kChunkSize\\nkChunkSize := SIZEOF(ChunkText_t); // Must match size of ChunkText_t\\n
\\n\\nThat's good most of the time but doesn't work in all cases, at least for how I'm dealing with it right now:\\n\\nSTRING15\\tfoo1 := '';\\n\\nOUTPUT(SIZEOF(foo1)); // 15\\n\\nUNICODE15\\tfoo2 := U'';\\n\\nOUTPUT(SIZEOF(foo2)); // 30
\\n\\nThe size that's required for the chunk calculation is the length in characters rather than in bytes, which is what SIZEOF seems to return.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-06-04 12:20:33\" },\n\t{ \"post_id\": 1698, \"topic_id\": 373, \"forum_id\": 8, \"post_subject\": \"Re: Thor crash when writing big index records\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nBut then you have to use the chunk size in the calculation to create the chunks, right?
\\nYes, but you can ensure they're always both the same this way:CODE: SELECT ALL\\nChunkText_t := STRING10000; // Size must match kChunkSize\\nkChunkSize := SIZEOF(ChunkText_t); // Must match size of ChunkText_t\\n
HTH,\\n\\nRichard\", \"post_time\": \"2012-06-01 19:39:52\" },\n\t{ \"post_id\": 1694, \"topic_id\": 373, \"forum_id\": 8, \"post_subject\": \"Re: Thor crash when writing big index records\", \"username\": \"DSC\", \"post_text\": \"[quote="rtaylor":2o8qpl51]Dan,\\n It makes the code a little more fragile, in that 'chunk size' needs to be defined identically in more than one place, but it works.
Not necessarily. \\n\\nYou can do it this way:\\n
MyChunkType := STRING5000;\\n\\nChunkRec := RECORD\\n\\tINTEGER2\\tdocID;\\n\\tINTEGER2\\tchunkID;\\n\\tMyChunkType\\tchunk;\\nEND;\\n
\\n\\nBut then you have to use the chunk size in the calculation to create the chunks, right? Here's what I now have:\\n\\nkChunkSize := 10000;\\t\\t// Must match size of ChunkText_t\\nChunkText_t := STRING10000;\\t// Size must match kChunkSize
\\n\\nDan\", \"post_time\": \"2012-06-01 13:15:18\" },\n\t{ \"post_id\": 1693, \"topic_id\": 373, \"forum_id\": 8, \"post_subject\": \"Re: Thor crash when writing big index records\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n It makes the code a little more fragile, in that 'chunk size' needs to be defined identically in more than one place, but it works.
Not necessarily. \\n\\nYou can do it this way:\\n
MyChunkType := STRING5000;\\n\\nChunkRec := RECORD\\n\\tINTEGER2\\tdocID;\\n\\tINTEGER2\\tchunkID;\\n\\tMyChunkType\\tchunk;\\nEND;\\n
HTH,\\n\\nRichard\", \"post_time\": \"2012-06-01 13:12:22\" },\n\t{ \"post_id\": 1691, \"topic_id\": 373, \"forum_id\": 8, \"post_subject\": \"Re: Thor crash when writing big index records\", \"username\": \"DSC\", \"post_text\": \"Great info. Specifying an exact length-based datatype is a doable workaround for this problem. It makes the code a little more fragile, in that 'chunk size' needs to be defined identically in more than one place, but it works.\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-06-01 12:52:19\" },\n\t{ \"post_id\": 1690, \"topic_id\": 373, \"forum_id\": 8, \"post_subject\": \"Re: Thor crash when writing big index records\", \"username\": \"bforeman\", \"post_text\": \"A quick test here of your original code. \\n\\nkChunkSize := 4088; \\nAnything greater than 4088 will produce the runtime error.\\n\\nAlso, if you explicitly set the STRING length of the ChunkRecord to match the chunk, it works!\\n\\nChunkRec := RECORD\\n\\tINTEGER2\\tdocID;\\n\\tINTEGER2\\tchunkID;\\n\\tSTRING5000\\tchunk;\\nEND;
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-06-01 12:40:43\" },\n\t{ \"post_id\": 1689, \"topic_id\": 373, \"forum_id\": 8, \"post_subject\": \"Re: Thor crash when writing big index records\", \"username\": \"DSC\", \"post_text\": \"Unfortunately, I get the same error if I add MAXLENGTH annotations to either the chunk field or the record definition. I was using (kChunkSize * 2) to make sure it was big enough.\\n\\nHere is the log from the node that crashed. The version of the code put (kChunkSize * 2) on the field itself rather than the record. Note that the record size was correctly calculated, but the assert a few lines later still showed the same error.\\n\\n00000293 2012-06-01 06:38:32 20474 20474 Using query: /var/lib/HPCCSystems/queries/mythor/V4079901544_libW20120601-064620.so\\n00000294 2012-06-01 06:38:32 20474 20474 CThorRowManager initialized, memlimit = 0\\n00000295 2012-06-01 06:38:32 20474 20474 Global memory size = 0 MB, large mem size = 1536 MB\\n00000296 2012-06-01 06:38:32 20474 20474 New Graph started : graph1\\n00000297 2012-06-01 06:38:32 20474 20474 temp directory cleared\\n00000298 2012-06-01 06:38:32 20474 20474 Disk space: /var/lib/HPCCSystems/hpcc-data/thor = 11918, /var/lib/HPCCSystems/hpcc-mirror/thor = 11918\\n00000299 2012-06-01 06:38:32 20474 20474 Key file cache size set to: 8\\n0000029A 2012-06-01 06:38:32 20474 20474 GraphInit: W20120601-064620graph1, graphId=1\\n0000029B 2012-06-01 06:38:32 20474 20474 deserializeMPTag: tag = 65551\\n0000029C 2012-06-01 06:38:32 20474 20474 deserializeMPTag: tag = 65549\\n0000029D 2012-06-01 06:38:32 20474 20474 deserializeMPTag: tag = 65544\\n0000029E 2012-06-01 06:38:32 20474 20474 deserializeMPTag: tag = 65547\\n0000029F 2012-06-01 06:38:32 20474 20474 Add: Launching graph thread for graphId=1\\n000002A0 2012-06-01 06:38:32 20474 20632 Running graph [global] : <graph>\\n <node id="2" label="Inline Row {'Louis Wu ...}">\\n <att name="definition" value="\\\\\\\\psf\\\\Home\\\\Desktop\\\\ECL\\\\hoovers\\\\fail.ecl(15,1)"/>\\n <att name="name" value="rs"/>\\n <att name="_kind" value="152"/>\\n <att name="ecl" value="ROW(TRANSFORM(textrec,SELF.s := 'Louis Wu woke aflame with new life, under a coffin lid.\\\\n\\\\nDisplays glowed abov...';)); "/>\\n <att name="recordSize" value="4..4096(260)"/>\\n <att name="recordCount" value="1..1[tiny]"/>\\n </node>\\n <node id="3" label="Normalize">\\n <att name="definition" value="\\\\\\\\psf\\\\Home\\\\Desktop\\\\ECL\\\\hoovers\\\\fail.ecl(26,1)"/>\\n <att name="name" value="ds"/>\\n <att name="_kind" value="27"/>\\n <att name="ecl" value="NORMALIZE(4, TRANSFORM(RECORD integer2 docid; integer2 chunkid; string chunk{maxlength(10000)}; unsigned8 __internal_fpos__; END,...)); "/>\\n <att name="recordSize" value="16..10016(481)"/>\\n <att name="recordCount" value="4..4[tiny]"/>\\n </node>\\n <node id="4" label="Sort">\\n <att name="_kind" value="3"/>\\n <att name="ecl" value="SORT(1, chunkid, chunk); "/>\\n <att name="recordSize" value="16..10016(481)"/>\\n <att name="recordCount" value="4..4[tiny]"/>\\n </node>\\n <node id="5" label="Index Write">\\n <att name="definition" value="\\\\\\\\psf\\\\Home\\\\Desktop\\\\ECL\\\\hoovers\\\\fail.ecl(1,1)"/>\\n <att name="name" value="fail"/>\\n <att name="definition" value="\\\\\\\\psf\\\\Home\\\\Desktop\\\\ECL\\\\hoovers\\\\fail.ecl(30,1)"/>\\n <att name="_kind" value="44"/>\\n <att name="ecl" value="BUILDINDEX(..., , '~test_fail', sorted); "/>\\n <att name="recordSize" value="16..10016(481)"/>\\n <att name="_fileName" value="~test_fail"/>\\n </node>\\n <att name="rootGraph" value="1"/>\\n <edge id="2_0" source="2" target="3"/>\\n <edge id="3_0" source="3" target="4"/>\\n <edge id="4_0" source="4" target="5"/>\\n </graph>\\n - graph(graph1, 1)\\n000002A1 2012-06-01 06:38:32 20474 20632 CONNECTING (id=2, idx=0) to (id=3, idx=0) - activity(normalize, 3)\\n000002A2 2012-06-01 06:38:32 20474 20632 CONNECTING (id=3, idx=0) to (id=4, idx=0) - activity(sort, 4)\\n000002A3 2012-06-01 06:38:32 20474 20632 MSortSlaveActivity::createMSortSlave - activity(sort, 4) [ecl=SORT(1, chunkid, chunk);]\\n000002A4 2012-06-01 06:38:32 20474 20632 CONNECTING (id=4, idx=0) to (id=5, idx=0) - activity(indexwrite, 5)\\n000002A5 2012-06-01 06:38:32 20474 20632 deserializeMPTag: tag = 65540\\n000002A6 2012-06-01 06:38:32 20474 20632 CMPServer::flush(65540) discarded 1 buffers\\n000002A7 2012-06-01 06:38:32 20474 20632 deserializeMPTag: tag = 65548\\n000002A8 2012-06-01 06:38:32 20474 20632 deserializeMPTag: tag = 65550\\n000002A9 2012-06-01 06:38:32 20474 20632 deserializeMPTag: tag = 65546\\n000002AA 2012-06-01 06:38:32 20474 20632 CMPServer::flush(65546) discarded 1 buffers\\n000002AB 2012-06-01 06:38:32 20474 20632 MSortSlaveActivity::init portbase = 20102, mpTagRPC = 65550 - activity(sort, 4)\\n000002AC 2012-06-01 06:38:32 20474 20634 Creating SortSlaveServer on tag 65550 MP - activity(sort, 4)\\n000002AD 2012-06-01 06:38:32 20474 20632 Watchdog: Start Job 1\\n000002AE 2012-06-01 06:38:32 20474 20633 INDEXWRITE: Start - activity(indexwrite, 5)\\n000002AF 2012-06-01 06:38:32 20474 20633 Starting input - activity(indexwrite, 5)\\n000002B0 2012-06-01 06:38:32 20474 20633 Starting input - activity(sort, 4)\\n000002B1 2012-06-01 06:38:32 20474 20633 Starting input - activity(normalize, 3)\\n000002B2 2012-06-01 06:38:32 20474 20633 ITDL starting for output 0 - activity(temprow, 2)\\n000002B3 2012-06-01 06:38:32 20474 20633 ITDL starting for output 0 - activity(normalize, 3)\\n000002B4 2012-06-01 06:38:32 20474 20633 ITDL starting for output 0 - activity(sort, 4)\\n000002B5 2012-06-01 06:38:32 20474 20633 Gather in - activity(sort, 4)\\n000002B6 2012-06-01 06:38:32 20474 20634 Connected to slave 0 of 3 - activity(sort, 4)\\n000002B7 2012-06-01 06:38:32 20474 20635 CSortTransferServerThread started port 20103\\n000002B8 2012-06-01 06:38:32 20474 20634 Start Gather - activity(sort, 4)\\n000002B9 2012-06-01 06:38:32 20474 20633 SORT: Gather - activity(sort, 4)\\n000002BA 2012-06-01 06:38:32 20474 20633 Record size (max) = 4096 - activity(temprow, 2)\\n000002BB 2012-06-01 06:38:32 20474 20633 Record size (max) = 10016 - activity(normalize, 3)\\n000002BC 2012-06-01 06:38:32 20474 20633 Local run sort(s) done - activity(sort, 4)\\n000002BD 2012-06-01 06:38:32 20474 20633 Sort done, rows sorted = 4, bytes sorted = 20128 overflowed to disk 0 times - activity(sort, 4)\\n000002BE 2012-06-01 06:38:32 20474 20633 Gather finished - activity(sort, 4)\\n000002BF 2012-06-01 06:38:32 20474 20633 Stopping input for - activity(sort, 4)\\n000002C0 2012-06-01 06:38:32 20474 20633 Stopping input for - activity(normalize, 3)\\n000002C1 2012-06-01 06:38:32 20474 20633 ITDL output 0 stopped, count was 1 - activity(temprow, 2)\\n000002C2 2012-06-01 06:38:32 20474 20633 ITDL output 0 stopped, count was 4 - activity(normalize, 3)\\n000002C3 2012-06-01 06:38:32 20474 20633 SORT waiting barrier.1 - activity(sort, 4)\\n000002C4 2012-06-01 06:38:32 20474 20633 SORT barrier.1 raised - activity(sort, 4)\\n000002C5 2012-06-01 06:38:32 20474 20633 SORT Merge Waiting - activity(sort, 4)\\n000002C6 2012-06-01 06:38:32 20474 20634 Min =(5015): , 1, 0, 1, 0,87,13, 0, 0,"Louis Wu woke aflame with new life, under a coffin lid." a, a,"Displays glowed above his eyes. Bone composition, blood parameters, deep reflexes, urea and potassium and zinc balance: he could identify most of these. The damage listed wasn't great. Punctures and gouges; fatigue; torn ligaments and extensive bruises; two ribs cracked; all relics of the battle with the Vampire protector, Bram. All healed now. The 'doc would have rebuilt him cell by cell. He'd felt dead and cooling when he climbed into the Intensive Care Cavity." a, a,"Eighty-four days ago, the display said." a, a,"Sixty-seven Ringworld days. Almost a falan; a falan was ten Ringworld rotations, seventy-five thirty-hour days. Twenty or thirty days should have healed him! But he'd known he was injured. What with all the general bruising from the battle with Bram, he hadn't even noticed puncture wounds in his back." a, a,"He'd been under repair for twice that long the first time he lay in this box. Then, his int"...\\n000002C7 2012-06-01 06:38:32 20474 20634 Max =(5016): , 1, 0, 4, 0,88,13, 0, 0,"ir last duel. "It's functional?"" a, a,""The stepping-disk link is functional."" a, a,""What about the lander?"" a, a,""Life support is marginal. Drive systems and weaponry have failed."" a, a,""Can some of these service stacks be locked out of the system?"" a, a,""That has been done." Lines spread across the map to link the blinking lights. Some had crossed-circle verboten marks on them: closed. The maze was complicated, and Louis didn't try to understand it. "My Master has override codes," the Voice said." a, a,""May I have those?"" a, a,""No."" a, a,""Number these stepping-disk sites for me. Then print out a map."" a, a,"As the Ringworld was vast, the scale was extreme. His naked eye would never get any detail out of it. When the map extruded, he folded it and stuffed it in a pocket anyway." a, a,"He broke for lunch and came back." a, a,"He set two service stacks moving and changed a number of links. The Hindmost's Voice printed another map with his changes added. He p"...\\n000002C8 2012-06-01 06:38:32 20474 20634 Ave Rec Size = 5032\\n000002C9 2012-06-01 06:38:32 20474 20634 Output start = 0, num = 1\\n000002CA 2012-06-01 06:38:32 20474 20634 SORT Merge READ: Stream(0) local, pos=0 len=1\\n000002CB 2012-06-01 06:38:32 20474 20634 Global Merger Created: 1 streams - activity(sort, 4)\\n000002CC 2012-06-01 06:38:32 20474 20633 SORT Merge Start - activity(sort, 4)\\n000002CD 2012-06-01 06:38:32 20474 20633 INDEXWRITE: process: handling fname : /var/lib/HPCCSystems/hpcc-data/thor/test_fail._1_of_4 - activity(indexwrite, 5)\\n000002CE 2012-06-01 06:38:32 20474 20633 Writing to file: /var/lib/HPCCSystems/hpcc-data/thor/thtmp20474_6__partial.tmp - activity(indexwrite, 5)\\n000002CF 2012-06-01 06:38:32 20474 20633 INDEXWRITE: created fixed output stream /var/lib/HPCCSystems/hpcc-data/thor/test_fail._1_of_4 - activity(indexwrite, 5)\\n000002D0 2012-06-01 06:38:32 20474 20633 INDEXWRITE: write - activity(indexwrite, 5)\\n000002D1 2012-06-01 06:38:32 20474 20633 Record size (max) = 10016 - activity(sort, 4)\\n000002D2 2012-06-01 06:38:32 20474 20633 Backtrace:\\n000002D3 2012-06-01 06:38:32 20474 20636 SORT Merge WRITE: start 10.210.150.78:51706, pos=2, len=2\\n000002D4 2012-06-01 06:38:32 20474 20633 /opt/HPCCSystems/lib/libjlib.so(_Z16PrintStackReportv+0x26) [0x2ac5db48eb96]\\n000002D5 2012-06-01 06:38:32 20474 20633 /opt/HPCCSystems/lib/libjlib.so(_Z20RaiseAssertExceptionPKcS0_j+0x26) [0x2ac5db48f756]\\n000002D6 2012-06-01 06:38:32 20474 20633 /opt/HPCCSystems/lib/libjhtree.so(_ZN10CWriteNode3addEyPKvjy+0x278) [0x2ac5dd34c998]\\n000002D7 2012-06-01 06:38:32 20474 20633 /opt/HPCCSystems/lib/libjhtree.so(_ZN11CKeyBuilder14processKeyDataEPKcyj+0x5c) [0x2ac5dd36665c]\\n000002D8 2012-06-01 06:38:32 20474 20633 /opt/HPCCSystems/lib/libactivityslaves_lcr.so(_ZN23IndexWriteSlaveActivity10processRowEPKv+0x13a) [0x2ac5ddd64b1a]\\n000002D9 2012-06-01 06:38:32 20474 20633 /opt/HPCCSystems/lib/libactivityslaves_lcr.so(_ZN23IndexWriteSlaveActivity7processEv+0xdbb) [0x2ac5ddd65adb]\\n000002DA 2012-06-01 06:38:32 20474 20633 /opt/HPCCSystems/lib/libactivityslaves_lcr.so(_ZN20ProcessSlaveActivity4mainEv+0x34) [0x2ac5ddd25674]\\n000002DB 2012-06-01 06:38:32 20474 20633 /opt/HPCCSystems/lib/libjlib.so(_ZN19CThreadedPersistent4mainEv+0x2d) [0x2ac5db518f1d]\\n000002DC 2012-06-01 06:38:32 20474 20633 /opt/HPCCSystems/lib/libjlib.so(_ZN19CThreadedPersistent8CAThread3runEv+0x10) [0x2ac5db51c4e0]\\n000002DD 2012-06-01 06:38:32 20474 20633 /opt/HPCCSystems/lib/libjlib.so(_ZN6Thread5beginEv+0x37) [0x2ac5db518877]\\n000002DE 2012-06-01 06:38:32 20474 20633 /opt/HPCCSystems/lib/libjlib.so(_ZN6Thread11_threadmainEPv+0x1f) [0x2ac5db5193ef]\\n000002DF 2012-06-01 06:38:32 20474 20633 /lib64/libpthread.so.0 [0x3f8660673d]\\n000002E0 2012-06-01 06:38:32 20474 20633 /lib64/libc.so.6(clone+0x6d) [0x3f85ed44bd]\\n000002E1 2012-06-01 06:38:32 20474 20633 assert(insize<=keyLen) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.2/CE/Centos-5.7-x86_64/HPCC-Platform/system/jhtree/ctfile.cpp, line 359\\n000002E2 2012-06-01 06:38:32 20474 20633 activity(indexwrite, 5) : Graph[1], indexwrite[5]: assert(insize<=keyLen) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.2/CE/Centos-5.7-x86_64/HPCC-Platform/system/jhtree/ctfile.cpp, line 359\\n000002E3 2012-06-01 06:38:32 20474 20633 Stopping input for - activity(indexwrite, 5)\\n000002E4 2012-06-01 06:38:32 20474 20633 SORT waiting barrier.2 - activity(sort, 4)\\n000002E5 2012-06-01 06:38:32 20474 20636 SORT Merge WRITE: start 10.210.150.81:34472, pos=1, len=1\\n000002E6 2012-06-01 06:38:32 20474 20636 SORT Merge: finished 10.210.150.78:51706, 2 rows merged\\n000002E7 2012-06-01 06:38:32 20474 20636 SORT Merge: finished 10.210.150.81:34472, 1 rows merged\\n000002E8 2012-06-01 06:38:32 20474 20633 SORT barrier.2 raised - activity(sort, 4)\\n000002E9 2012-06-01 06:38:32 20474 20633 Local merge finishing - activity(sort, 4)\\n000002EA 2012-06-01 06:38:32 20474 20633 Local merge finished - activity(sort, 4)\\n000002EB 2012-06-01 06:38:32 20474 20633 SORT waiting for merge - activity(sort, 4)\\n000002EC 2012-06-01 06:38:32 20474 20633 ITDL output 0 stopped, count was 1 - activity(sort, 4)\\n000002ED 2012-06-01 06:38:32 20474 20632 graph(graph1, 1) : Graph[1], indexwrite[5]: assert(insize<=keyLen) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.2/CE/Centos-5.7-x86_64/HPCC-Platform/system/jhtree/ctfile.cpp, line 359\\n000002EE 2012-06-01 06:38:32 20474 20632 End of sub-graph - graph(graph1, 1)\\n000002EF 2012-06-01 06:38:32 20474 20634 Global Merged completed - activity(sort, 4)\\n000002F0 2012-06-01 06:38:32 20474 20634 Global Merge exit - activity(sort, 4)\\n000002F1 2012-06-01 06:38:32 20474 20634 Close - activity(sort, 4)\\n000002F2 2012-06-01 06:38:32 20474 20634 Close finished - activity(sort, 4)\\n000002F3 2012-06-01 06:38:32 20474 20634 Disconnecting from slave 0 of 3 - activity(sort, 4)\\n000002F4 2012-06-01 06:38:32 20474 20634 CSortTransferServerThread::stop\\n000002F5 2012-06-01 06:38:32 20474 20635 CSortTransferServerThread finished\\n000002F6 2012-06-01 06:38:32 20474 20634 CSortTransferServerThread::stopped\\n000002F7 2012-06-01 06:38:32 20474 20634 Disconnected from slave 0 of 3 - activity(sort, 4)\\n000002F8 2012-06-01 06:38:32 20474 20634 Exiting SortSlaveServer on tag 65550 - activity(sort, 4)\\n000002F9 2012-06-01 06:38:35 20474 20474 GraphAbort: W20120601-064620graph1\\n000002FA 2012-06-01 06:38:36 20474 20474 Abort condition set - activity(indexwrite, 5)\\n000002FB 2012-06-01 06:38:36 20474 20474 Abort condition set - activity(sort, 4)\\n000002FC 2012-06-01 06:38:36 20474 20474 Abort condition set - activity(normalize, 3)\\n000002FD 2012-06-01 06:38:36 20474 20474 Abort condition set - activity(temprow, 2)\\n000002FE 2012-06-01 06:38:36 20474 20632 Watchdog: Stop Job 1\\n000002FF 2012-06-01 06:38:36 20474 20632 MSortSlaveActivity::kill - activity(sort, 4)\\n00000300 2012-06-01 06:38:36 20474 20632 Joining Sort Slave Server - activity(sort, 4)\\n00000301 2012-06-01 06:38:36 20474 20632 ~CSortSlaveBase - activity(sort, 4)\\n00000302 2012-06-01 06:38:36 20474 20632 Socket statistics : connects=1\\nconnecttime=159us\\nfailedconnects=0\\nfailedconnecttime=0us\\nreads=77\\nreadtime=1428us\\nreadsize=186185 bytes\\nwrites=64\\nwritetime=2662us\\nwritesize=36460 bytes\\nactivesockets=6\\nnumblockrecvs=0\\nnumblocksends=2\\nblockrecvsize=0\\nblocksendsize=15048\\nblockrecvtime=0\\nblocksendtime=226\\nlongestblocksend=135\\nlongestblocksize=10032\\n - graph(graph1, 1)\\n00000303 2012-06-01 06:38:36 20474 20632 graph(graph1, 1) : Graph[1], indexwrite[5]: assert(insize<=keyLen) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.2/CE/Centos-5.7-x86_64/HPCC-Platform/system/jhtree/ctfile.cpp, line 359\\n00000304 2012-06-01 06:38:36 20474 20632 Graph Done - graph(graph1, 1)\\n00000305 2012-06-01 06:38:36 20474 20632 PU= 0% MU= 2% MAL=2866976 MMP=495616 SBK=2371360 TOT=4988K RAM=380512K SWP=88K - graph(graph1, 1)\\n00000306 2012-06-01 06:38:36 20474 20632 graph(graph1, 1) : Graph[1], indexwrite[5]: assert(insize<=keyLen) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.2/CE/Centos-5.7-x86_64/HPCC-Platform/system/jhtree/ctfile.cpp, line 359\\n00000307 2012-06-01 06:38:36 20474 20632 CGraphExecutor running=0, waitingToRun=0, dependentsWaiting=0\\n00000308 2012-06-01 06:38:36 20474 20474 QueryDone, removing W20120601-064620graph1 from jobs\\n00000309 2012-06-01 06:38:36 20474 20474 Job ended : graph1\\n0000030A 2012-06-01 06:38:36 20474 20474 destroying ProcessSlaveActivity - activity(indexwrite, 5)\\n0000030B 2012-06-01 06:38:36 20474 20474 ProcessSlaveActivity : joining process thread - activity(indexwrite, 5)\\n0000030C 2012-06-01 06:38:36 20474 20474 AFTER ProcessSlaveActivity : joining process thread - activity(indexwrite, 5)\\n0000030D 2012-06-01 06:38:36 20474 20474 DESTROYED - activity(indexwrite, 5)\\n0000030E 2012-06-01 06:38:36 20474 20474 DESTROYED - activity(temprow, 2)\\n0000030F 2012-06-01 06:38:36 20474 20474 DESTROYED - activity(normalize, 3)\\n00000310 2012-06-01 06:38:36 20474 20474 DESTROYED - activity(sort, 4)\\n00000311 2012-06-01 06:38:36 20474 20474 CJobBase resetting memory manager\\n00000312 2012-06-01 06:38:36 20474 20474 JMALLOC OSBLOCKS: 1, total memory 1048576\\n00000313 2012-06-01 06:38:36 20474 20474 QueryDone, removed W20120601-064620graph1 from jobs\\n\\n
\\n\\nThe mystery continues!\\n\\nDan\", \"post_time\": \"2012-06-01 11:40:02\" },\n\t{ \"post_id\": 1687, \"topic_id\": 373, \"forum_id\": 8, \"post_subject\": \"Re: Thor crash when writing big index records\", \"username\": \"rtaylor\", \"post_text\": \"\", \"post_time\": \"2012-05-31 21:34:52\" },\n\t{ \"post_id\": 1686, \"topic_id\": 373, \"forum_id\": 8, \"post_subject\": \"Re: Thor crash when writing big index records\", \"username\": \"DSC\", \"post_text\": \"[quote="DSC":3bkzaped][quote="rtaylor":3bkzaped]That should be correctable by adding the MAXLENGTH option to the variable-length fields in the RECORD structure. Something like this:\\n\\nIDX := INDEX(ds,{DocID},{,MAXLENGTH(10000) ds},'filename');\\n\\nWhat if you don't know, or cannot guarantee, the maximum size of the incoming data? Or should we just put an unreasonably-large number in there?\\n\\nNever mind. That was a couple of silly questions. If you're breaking the blob up, of course you know how big that record will be (or at least how big the chunk field is).\\n\\nCoffee....\\n\\nDan\", \"post_time\": \"2012-05-31 16:23:07\" },\n\t{ \"post_id\": 1685, \"topic_id\": 373, \"forum_id\": 8, \"post_subject\": \"Re: Thor crash when writing big index records\", \"username\": \"DSC\", \"post_text\": \"[quote="rtaylor":hriic6yi]That should be correctable by adding the MAXLENGTH option to the variable-length fields in the RECORD structure. Something like this:\\n\\nIDX := INDEX(ds,{DocID},{,MAXLENGTH(10000) ds},'filename');\\n\\nWhat if you don't know, or cannot guarantee, the maximum size of the incoming data? Or should we just put an unreasonably-large number in there?\", \"post_time\": \"2012-05-31 15:16:32\" },\n\t{ \"post_id\": 1684, \"topic_id\": 373, \"forum_id\": 8, \"post_subject\": \"Re: Thor crash when writing big index records\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nThat should be correctable by adding the MAXLENGTH option to the variable-length fields in the RECORD structure. Something like this:\\n\\nIDX := INDEX(ds,{DocID},{,MAXLENGTH(10000) ds},'filename');\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-05-31 15:00:56\" },\n\t{ \"post_id\": 1682, \"topic_id\": 373, \"forum_id\": 8, \"post_subject\": \"Thor crash when writing big index records\", \"username\": \"DSC\", \"post_text\": \"In another post I talked about experiencing a crash when trying to break up a large data string into chunks in order to create payload-only indexes to go into superkeys. I've managed to replicate the problem on a small scale (enclosed).\\n\\nI think what is happening here is that the anticipated record size for the index is calculated too small (4096 bytes rather than whatever it should be). In my sample code, there is a kChunkSize constant that can be adjusted to make the code succeed or fail. A value of 4000 works, a value of 5000 fails.\\n\\nEnjoy!\\n\\nDan\", \"post_time\": \"2012-05-31 13:59:38\" },\n\t{ \"post_id\": 8634, \"topic_id\": 374, \"forum_id\": 8, \"post_subject\": \"Re: Timing Roxie queries\", \"username\": \"ckaminski\", \"post_text\": \"The below is not exactly what you're asking for but writing it did stop me from digging through logs to find timings for a few thousand test requests.\\n\\n====== Roxie Query ECL Code =======\\n
\\n[...]\\n\\nresults := OUTPUT([some results from your query], NAMED('Results'));\\n\\nINTEGER8 ms() := BEGINC++\\n #option pure\\n #include <sys/time.h>\\n\\n #body\\n struct timeval tp;\\n gettimeofday(&tp, NULL);\\n //get current timestamp in milliseconds\\n long long mslong = (long long) tp.tv_sec * 1000L + tp.tv_usec / 1000; \\n return mslong;\\nENDC++;\\n\\nstart_time := output(ms(), NAMED('Start_Time'));\\nfinish_time := output(ms(), NAMED('End_Time'));\\n\\nSEQUENTIAL(start_time, results, finish_time);
\\n\\n====== Calling Python Code ======\\n\\nimport requests\\nimport json\\nfrom time import sleep\\n\\nbase_url = 'http://someipaddress:8131/someroxiename/cfk_ecl_example.somequery/json?param1=%s¶m2=%s'\\n\\nuser = 'youruserid'\\npw = 'yourpassword'\\nd = 'C:\\\\\\\\Users\\\\\\\\youruserid\\\\\\\\Documents\\\\\\\\Source\\\\\\\\RoxieTestData\\\\\\\\'\\nparam2 = '2'\\n\\nj_head = 'cfk_ecl_example.somequery'\\n\\nif __name__ == '__main__':\\n params = open(d + 'params.txt').readlines()\\n params = [x.strip() for x in params]\\n results = open(d + 'results.txt', 'a')\\n i = 0\\n for param1 in params:\\n i = i + 1\\n print i\\n url = base_url % (param1, param2)\\n resp = requests.post(url, auth=(user, pw))\\n \\n j = json.loads(resp.content)\\n j = j[j_head]['Results']\\n s_time = j['Start_Time']['Row'][0]['Start_Time']\\n result = j['Results']['Row'][0]['Results']\\n e_time = j['End_Time']['Row'][0]['End_Time']\\n elapsed_time = int(e_time) - int(s_time)\\n out = '%s,%s,%s,%s\\\\n' % (param1, param2, int(result), elapsed_time)\\n results.write(out)\\n results.flush()\\n sleep(0.1)\\n \\n print 'Done.'\\n
\\n\\n-Charles Kaminski\", \"post_time\": \"2015-11-18 14:42:18\" },\n\t{ \"post_id\": 1709, \"topic_id\": 374, \"forum_id\": 8, \"post_subject\": \"Re: Timing Roxie queries\", \"username\": \"DSC\", \"post_text\": \"[quote="bforeman":3e822wep]I've pinged Documentation as to where this information might be, but I also checked with a few developers and they told me that the best place to review Roxie timings is in the logs.\\n\\nThey can be found at:\\n\\n/var/log/HPCCSystems/myroxie \\n\\nWill post back when I have more info.\\n\\nThanks for the info. However, I was hoping to incorporate timing info in a SOAP or JSON response, as a 'user-facing feature' rather than for development. This just looks like something that needs to be incorporated into middleware instead, which is really more accurate (and useful) anyway.\\n\\nThanks again!\\n\\nDan\", \"post_time\": \"2012-06-04 18:41:28\" },\n\t{ \"post_id\": 1708, \"topic_id\": 374, \"forum_id\": 8, \"post_subject\": \"Re: Timing Roxie queries\", \"username\": \"bforeman\", \"post_text\": \"Hi Dan,\\n\\nI've pinged Documentation as to where this information might be, but I also checked with a few developers and they told me that the best place to review Roxie timings is in the logs.\\n\\nThey can be found at:\\n\\n/var/log/HPCCSystems/myroxie \\n\\nWill post back when I have more info.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-06-04 18:37:15\" },\n\t{ \"post_id\": 1692, \"topic_id\": 374, \"forum_id\": 8, \"post_subject\": \"Timing Roxie queries\", \"username\": \"DSC\", \"post_text\": \"What is the best way to calculate the internal duration of a Roxie query? I'm asking because I realize there are subtleties with variable assignments that I don't fully understand. Plus, I remember reading about exactly this somewhere in the documentation but I can't find it again.\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-06-01 13:08:40\" },\n\t{ \"post_id\": 1734, \"topic_id\": 382, \"forum_id\": 8, \"post_subject\": \"Re: Macro Expansion\", \"username\": \"rtaylor\", \"post_text\": \"Eric,\\n\\nSorry, but the short answer is -- no. You are far from the first to ask for this, and the answer has always been no ever since macros were added to the language (about 10 years ago or more). When I'm asked this in class my usual reply is, "the guy that always says no to this is a lot smarter than me, and I've seen him do 'impossible' things over the years we've worked together, so when he says it's just not possible I take him at his word."\\n\\nRichard\", \"post_time\": \"2012-06-08 13:45:38\" },\n\t{ \"post_id\": 1729, \"topic_id\": 382, \"forum_id\": 8, \"post_subject\": \"Macro Expansion\", \"username\": \"eric.scott\", \"post_text\": \"Is there a way to render the code produced by a macro expansion?\\n\\nFor example, from the language reference example:\\n\\nDATASET CrossTab(File,X,Y) := MACRO\\nTABLE(File,{X, Y, COUNT(GROUP)},X,Y)\\nENDMACRO;\\n// and would be used something like this:\\nOUTPUT(CrossTab(Person,person.per_st,Person.per_sex))\\n// this macro usage is the equivalent of:\\n// OUTPUT(TABLE(Person,{person.per_st,Person.per_sex,COUNT(GROUP)},\\n// person.per_st,Person.per_sex)\\n\\nIs there a way to automatically generate the 'this macro usage is the equivalent of' part, as there is with say Lisp's 'macroexpand' form?\\n\\nI know there is #expand, but as I understand it that is more of a means of inserting content into the macro.\\n\\nThanks,\", \"post_time\": \"2012-06-08 00:02:33\" },\n\t{ \"post_id\": 1760, \"topic_id\": 385, \"forum_id\": 8, \"post_subject\": \"Re: Workaround for Nested Child Queries error on legacy syst\", \"username\": \"dustin.skaggs@lexisnexis.com\", \"post_text\": \"The error is caused by trying to do dataset operations in a single record context. For the code that's in the other post, it's the DEDUP(SORT(..)) in the transform that's causing it. The way to get this to run on a legacy build of thor is flatten out the child records with a normalize, do the work on the flat dataset, and then roll it back up to a dataset with a child dataset. Here's some example code that runs on legacy thor:\\n\\nexpertise_rec:={ unicode20 expertise };\\nperson_rec:={integer id, dataset(expertise_rec) expertises {maxcount(10)}};\\n\\npersons := dataset([{1,[{'butcher'},{'baker'}]},{1,[{'beggar'},{'baker'}]},\\n {2,[{'beggar'},{'baker'}]},{2,[{'thief'},{'baker'}]}\\n ],person_rec);\\n\\nperson_recFlat:={integer id, unicode20 expertise};\\npersonsFlat := normalize(persons,\\n left.expertises,\\n transform(person_rec,\\n self.expertises := dataset(row({right.expertise}, expertise_rec));\\n self := left));\\n\\npersonsDedup := dedup(sort(personsFlat, record), record);\\n\\nperson_rec trollup(person_rec l, person_rec r):=transform\\n self.expertises:= l.expertises+r.expertises;\\n self:=l\\nend;\\n\\nperson_ds:=rollup(personsDedup,left.Id=right.Id,trollup(left,right));\\noutput(person_ds);
\", \"post_time\": \"2012-06-11 14:36:43\" },\n\t{ \"post_id\": 1757, \"topic_id\": 385, \"forum_id\": 8, \"post_subject\": \"Re: Workaround for Nested Child Queries error on legacy syst\", \"username\": \"rtaylor\", \"post_text\": \"Eric,\\n\\nTry running the job on hThor -- that is you rbest chance of getting it to work.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-06-10 13:48:22\" },\n\t{ \"post_id\": 1753, \"topic_id\": 385, \"forum_id\": 8, \"post_subject\": \"Re: Workaround for Nested Child Queries error on legacy syst\", \"username\": \"bforeman\", \"post_text\": \"Hi Eric,\\n\\nI gather the (non) workaround is just to normalize over the child dataset?\\n
\\n\\nYep , that sounds correct!\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-06-08 19:03:17\" },\n\t{ \"post_id\": 1751, \"topic_id\": 385, \"forum_id\": 8, \"post_subject\": \"Re: Workaround for Nested Child Queries error on legacy syst\", \"username\": \"eric.scott\", \"post_text\": \"I believe for the time being I'm restricted to the legacy system. The thing I'm trying to do is pretty much tied to using THOR. I gather the (non) workaround is just to normalize over the child dataset?\\n\\nThanks, Bob!\", \"post_time\": \"2012-06-08 18:26:54\" },\n\t{ \"post_id\": 1750, \"topic_id\": 385, \"forum_id\": 8, \"post_subject\": \"Re: Workaround for Nested Child Queries error on legacy syst\", \"username\": \"bforeman\", \"post_text\": \"Hi Eric, I can't find any workaround in the bug reports. Are you restricted to running this on legacy, or can you get access to an OSS cluster?\\n\\nIf not, have you tried running it using the NOTHOR directive?\\n\\n[attributename := ] NOTHOR( action )\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-06-08 18:20:26\" },\n\t{ \"post_id\": 1748, \"topic_id\": 385, \"forum_id\": 8, \"post_subject\": \"Workaround for Nested Child Queries error on legacy system?\", \"username\": \"eric.scott\", \"post_text\": \"I'm on the legacy system, and encountering the exact problem discussed here:\\n\\nhttp://hpccsystems.com/bb/viewtopic.php?f=8&t=281&p=1201&hilit=nested+child+queries&sid=5709bcd591b9a409a1f26c70270813fe&sid=5709bcd591b9a409a1f26c70270813fe#p1201\\n\\nWhen I copy and paste Richard's example code, I still get the 'Nested Child Queries' error.\\n\\nWhat exactly is the problem? Is there a workaround for this on the legacy system?\\n\\nThanks,\", \"post_time\": \"2012-06-08 17:39:56\" },\n\t{ \"post_id\": 1775, \"topic_id\": 386, \"forum_id\": 8, \"post_subject\": \"Re: Spraying a CSV File\", \"username\": \"ksviswa\", \"post_text\": \"Thanks a lot..tat worked..\", \"post_time\": \"2012-06-13 18:09:45\" },\n\t{ \"post_id\": 1758, \"topic_id\": 386, \"forum_id\": 8, \"post_subject\": \"Re: Spraying a CSV File\", \"username\": \"rtaylor\", \"post_text\": \"ksviswa,\\n\\nThe data in your file is not in a null-terminated form, so try changing all the VARSTRING to STRING in your RECORD structure.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-06-10 13:52:09\" },\n\t{ \"post_id\": 1755, \"topic_id\": 386, \"forum_id\": 8, \"post_subject\": \"Spraying a CSV File\", \"username\": \"ksviswa\", \"post_text\": \"Hi All,\\n\\nI am very new to HPCC, was just working on some basic programs in ECL IDE in a virtual machine.\\n\\nI am able to spray a fixed length file. While spraying a csv and then when i try to check the output in ECL IDE, i get junk values or no records in ECL IDE.\\n\\nFor Ex :\\n \\nCSV File :\\n101,John,23,1980\\n102,Ram,21,1976\\n103,KSV,22,1990 \\n\\nLayout structure :\\nLayout_people := record\\nvarstring id;\\nvarstring name;\\nvarstring age;\\nvarstring yearofbirth;\\nend;\\n\\nWhen i try to specify the same in ECL ide, i get some junk output.\\n\\nPeopleDS := DATASET('File Location',Layout_people,CSV);\\noutput(PeopleDS); \\n\\nIs it because i need to specify the delimiter or some additional settings in ECL watch / IDE or something needs to be done in ECL code..?\\n\\nPlease help me regarding this.\\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2012-06-09 09:59:15\" },\n\t{ \"post_id\": 1774, \"topic_id\": 387, \"forum_id\": 8, \"post_subject\": \"Re: Spraying a XML File\", \"username\": \"ksviswa\", \"post_text\": \"Thanks a lot fr the info..\\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2012-06-13 18:08:49\" },\n\t{ \"post_id\": 1759, \"topic_id\": 387, \"forum_id\": 8, \"post_subject\": \"Re: Spraying a XML File\", \"username\": \"rtaylor\", \"post_text\": \"ksviswa1.) How do we access different elements for a XML Sprayed file in ECL IDE..?
In the RECORD structure documentation there is a discussion of the XPATH field modifier and a section on the varieties of XPATH that we support.\\n\\n2.) How do we spray data other than xml/csv or fixed length..?
When you understand that the "Spray CSV" page in ECL Watch should more appropriately be named "Spray Variable-length Files" then you'll see that virtually any kind of data can be spayed onto Thor using these three options. The only kind of data files that cannot simply be sprayed as-is are those that contain variable-length records with no record delimiter in the file (these must be pre-processed to at least add a record delimiter so the Spray CSV (variable) can operate correctly on them).\\n\\nPlease help me regarding these topics, as am very new to HPCC and ECL IDE.
All these "getting started" topics are full covered in the classes we offer. Here is the current class schedule: http://hpccsystems.com/community/training-events/training\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-06-10 14:07:01\" },\n\t{ \"post_id\": 1756, \"topic_id\": 387, \"forum_id\": 8, \"post_subject\": \"Spraying a XML File\", \"username\": \"ksviswa\", \"post_text\": \"Hi all,\\n\\n1.) How do we access different elements for a XML Sprayed file in ECL IDE..?\\n\\n2.) How do we spray data other than xml/csv or fixed length..?\\n\\nPlease help me regarding these topics, as am very new to HPCC and ECL IDE.\\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2012-06-09 10:10:05\" },\n\t{ \"post_id\": 9952, \"topic_id\": 388, \"forum_id\": 8, \"post_subject\": \"Re: Uni.SplitWords ?\", \"username\": \"Allan\", \"post_text\": \"I would add a 'TRIM' to the example code above:\\n\\nds := DATASET([{U'the cat and th\\\\353 rat'}],{UNICODE line});\\nPATTERN Delim := U' ';\\nPATTERN Ltrs := PATTERN('[[:alpha:][:digit:]\\\\\\\\-]')+;\\nPATTERN Words := Ltrs OPT(Delim);\\nPARSE(QueryAddressDS,line,Words,{UNICODE Word := TRIM(MATCHUNICODE(Words),LEFT,RIGHT)},BEST,MANY);\\n
\\nTo get the expected results.\\nYours\\nAllan\", \"post_time\": \"2016-07-13 08:19:13\" },\n\t{ \"post_id\": 1764, \"topic_id\": 388, \"forum_id\": 8, \"post_subject\": \"Re: Uni.SplitWords ?\", \"username\": \"DSC\", \"post_text\": \"I guess I'll go with a function to emulate what I think Uni.SplitWords() should do, then. Casting to a string would likely have Bad Consequences I'd rather not track down at some unspecified future date.\\n\\nThanks, Richard!\\n\\nDan\", \"post_time\": \"2012-06-11 22:19:35\" },\n\t{ \"post_id\": 1762, \"topic_id\": 388, \"forum_id\": 8, \"post_subject\": \"Re: Uni.SplitWords ?\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nI would say no, since this code:IMPORT STD;\\nS := U'the cat and th\\\\353 rat';\\nSTD.Str.SplitWords(S,' ');
Gives a parameter type mismatch error. But if I cast to STRING, it does work:IMPORT STD;\\nS := U'the cat and th\\\\353 rat';\\nSTD.Str.SplitWords((STRING)S,' ');
Depending on your actual UNICODE data, that may or may not be good enough. If it's not, then I suggest you use PARSE to split the words yourself, something like this:ds := DATASET([{U'the cat and th\\\\353 rat'}],{UNICODE line});\\nPATTERN Delim := U' ';\\nPATTERN Ltrs := PATTERN('[[:alpha:]]')+;\\nPATTERN Words := Ltrs OPT(Delim);\\n\\nPARSE(ds,line,Words,{UNICODE Word := MATCHUNICODE(Words)},BEST,MANY);
HTH,\\n\\nRichard\", \"post_time\": \"2012-06-11 18:20:42\" },\n\t{ \"post_id\": 1761, \"topic_id\": 388, \"forum_id\": 8, \"post_subject\": \"Uni.SplitWords ?\", \"username\": \"DSC\", \"post_text\": \"There doesn't seem to be a unicode version of Str.SplitWords(). Will Str.SplitWords() reliably work on a UNICODE datatype if the separator is a simple ASCII character?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-06-11 15:22:05\" },\n\t{ \"post_id\": 1797, \"topic_id\": 390, \"forum_id\": 8, \"post_subject\": \"Re: Runtime compiler error\", \"username\": \"ghalliday\", \"post_text\": \"I just tried that example on 3.8, and it compiled fine for me.\\n\\nI suspect the bug has already been fixed.\", \"post_time\": \"2012-06-19 08:43:07\" },\n\t{ \"post_id\": 1785, \"topic_id\": 390, \"forum_id\": 8, \"post_subject\": \"Re: Runtime compiler error\", \"username\": \"ghalliday\", \"post_text\": \"I will see if I can reproduce the problem\\n\\nI have fixed one problem with csv reading - if I remember correctly it was since 3.6.\", \"post_time\": \"2012-06-15 17:52:40\" },\n\t{ \"post_id\": 1781, \"topic_id\": 390, \"forum_id\": 8, \"post_subject\": \"Re: Runtime compiler error\", \"username\": \"DSC\", \"post_text\": \"Addendum: In all my failing cases, the source file is a just-sprayed CSV (tab-delimited) file. Since I get identical errors with two different input files, I don't think the actual contents of the file are the problem.\\n\\nDan\", \"post_time\": \"2012-06-14 15:11:31\" },\n\t{ \"post_id\": 1780, \"topic_id\": 390, \"forum_id\": 8, \"post_subject\": \"Re: Runtime compiler error\", \"username\": \"DSC\", \"post_text\": \"I ran into the same problem but with an entirely different file and without complicating things with Str.SplitWords(). The enclosed folder contains all the information (though it does not have my dataset). Here is what is in the folder:\\n\\nCo_Change.ecl: Code file that generates the error. I was calling CreateDatasources() within that file to kick things off.\\n\\neclcc.log: Log file showing the compilation error, generated at runtime, when trying to execute CreateDatasources().\\n\\nW20120614-092611.cpp: The C++ source for the failing file (from Co_Change.ecl).\\n\\nExcerpt.ecl: I pulled code from Co_Change.ecl and attempted to replicate the problem with less noise and with an inline dataset. This code works, and the meat of it is identical.\\n\\nW20120614-092735.cpp: The C++ source for the working file (from Excerpt.ecl).\\n\\nHopefully this information will help determine the problem and, I hope, a workaround.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-06-14 15:08:51\" },\n\t{ \"post_id\": 1779, \"topic_id\": 390, \"forum_id\": 8, \"post_subject\": \"Re: Runtime compiler error\", \"username\": \"JimD\", \"post_text\": \"To keep CPP files with the WU, add the debug option under the More button in the ECL IDE. You can then view the CPP file(s) from the Workunit Details page in ECL Watch (in the Helpers section).\\n\\nsaveCppTempFiles=1
\\n\\nHope this helps,\\n\\nJim\", \"post_time\": \"2012-06-14 12:36:37\" },\n\t{ \"post_id\": 1778, \"topic_id\": 390, \"forum_id\": 8, \"post_subject\": \"Re: Runtime compiler error\", \"username\": \"DSC\", \"post_text\": \"Further update: I spent some time yesterday trying to recreate this problem using toy inline datasets. For whatever reason, I could not replicate the problem.\\n\\nOne thought I had was to compare the generated C++ code to see what differences there are in toy version versus the problem version. Is there any way to capture the C++ code file of a successful compilation? It is normally deleted.\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-06-14 11:42:39\" },\n\t{ \"post_id\": 1771, \"topic_id\": 390, \"forum_id\": 8, \"post_subject\": \"Re: Runtime compiler error\", \"username\": \"DSC\", \"post_text\": \"Update:\\n\\nI converted all the SET OF [STRING|UNICODE] datatypes in DatasourceLayout record to simple [STRING|UNICODE] and removed the Str.SplitWords() functions (and a Unicode version of that) from the transform. The code compiles and executes.\\n\\nCome to think of it, the LOOP function I referenced earlier was using local attributes whose values were generated with calls to Str.SplitWords(). So the problem may revolve entirely around that standard library call somehow.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-06-13 13:28:03\" },\n\t{ \"post_id\": 1770, \"topic_id\": 390, \"forum_id\": 8, \"post_subject\": \"Runtime compiler error\", \"username\": \"DSC\", \"post_text\": \"The following is with 3.6.2-2CE.\\n\\nI have a transform that I thought was relatively straightforward. The compiler, however, doesn't agree. It spits out the following error:\\n\\nError: Compile/Link failed for W20120613-072954 (see '//10.210.150.81/var/lib/HPCCSystems/myeclccserver/eclcc.log' for details) (0, 0 - W20120613-072954)
\\n\\nFollowed by an awful lot of "... was not declared in this scope" messages. I've looked at the interim C++ code and it looks like the built-up code is just flat-out wrong. I've enclosed the ECL file, the interim C++ file and the log file for analysis. I was calling the CreateDatasources() exported function from another file to start the whole process.\\n\\nFWIW, I experienced this identical error on an earlier incarnation of this same transform but when it was about 25% of the current size and much simpler. In that case, the error seemed to revolve around a LOOP I was using to marry pairs of values from two different SET variables -- if I commented-out the LOOP, the code ran. I eventually just built the TRANSFORM differently to work around the issue. But now it's back, and I don't really know how to proceed.\\n\\nThoughts?\\n\\nDan\", \"post_time\": \"2012-06-13 12:33:41\" },\n\t{ \"post_id\": 2122, \"topic_id\": 393, \"forum_id\": 8, \"post_subject\": \"Re: Code Samples on Trees and Perceptron\", \"username\": \"dabayliss\", \"post_text\": \"Just a note: I have not tested them myself yet - but I pulled a couple of change requests last week that add Decision Trees to our unified classifier interface ... they are on the master branch\\n\\nDavid\", \"post_time\": \"2012-07-31 14:25:19\" },\n\t{ \"post_id\": 2092, \"topic_id\": 393, \"forum_id\": 8, \"post_subject\": \"Re: Code Samples on Trees and Perceptron\", \"username\": \"nvasil\", \"post_text\": \"Hi there \\n\\nand sorry to jump in a little bit late. \\nSome clarifications. \\n\\n\\nI read if the number of training records are more then number of passes should also be high.According to me in 1-2 passes correct weights won't be given.Would like to know your comments about it.\\n
\\n\\nThat is not quite true, in fact the opposite is most likely. Let me give you an example. Imagine that you have a dataset with N records and you train it with k iterations. Now take the same dataset and duplicate it k times. Now you have a dataset with kN records. If you train it with one iteration then it is equivalent to the training you did with the Nrecords k times. This is of course an extreme case, but it gives you a hint. In practical cases larger datasets mean larger duplication of the data. The more data you add the more records you will have that look like.\\n\\nComing to the ECL implementation, I haven't looked how it is implemented. I can look at it if your problem is urgent although I am running out of time. Judging from David's responses, I can recommend a quick hack:\\nIf the problem is multiple iterations then take your dataset create k random subsamples. Train in parallel k perceptrons and using a voting scheme to evaluate your test data. \\nAnother strategy would be to run one iteration for every subsample get k different perceptrons. Average the weights of the perceptron and retrain each perceptron starting from the averaged weights\\n\\nNick\", \"post_time\": \"2012-07-26 14:41:56\" },\n\t{ \"post_id\": 2000, \"topic_id\": 393, \"forum_id\": 8, \"post_subject\": \"Re: Code Samples on Trees and Perceptron\", \"username\": \"chhaya\", \"post_text\": \"yea i agree with your point i tried that, it gives clasification as only 0.\\n\\nI read if the number of training records are more then number of passes should also be high.According to me in 1-2 passes correct weights won't be given.Would like to know your comments about it.\\n\\nwe are having only 2 node cluster is that the issue ?\", \"post_time\": \"2012-07-18 10:30:57\" },\n\t{ \"post_id\": 1967, \"topic_id\": 393, \"forum_id\": 8, \"post_subject\": \"Re: Code Samples on Trees and Perceptron\", \"username\": \"dabayliss\", \"post_text\": \"The perceptron code was written to demonstrate an online learning algorithm; it is actually performing the computations 'one record at a time' - so if you have 1000K records over 50 iterations - you performed the computation 50M times.\\n\\nAs I am sure you are aware; convergence for a perceptron is only guaranteed in the case that the data is linearly separable. Even if you have linearly separable data; too high a learn rate can lead to a result that oscillates.\\n\\nGiven the large exemplar count - my approach to this would be to set the learning rate quite low - perform 1 & 2 passes and compare the results\", \"post_time\": \"2012-07-13 12:22:58\" },\n\t{ \"post_id\": 1966, \"topic_id\": 393, \"forum_id\": 8, \"post_subject\": \"Re: Code Samples on Trees and Perceptron\", \"username\": \"chhaya\", \"post_text\": \"Hi,\\nOK..When i run perceptron on 1000k training data with 50 passes it took 9 hours to run what can be the issue ?\\n\\nIn every pass output of perceptron changes.Is there any way using which i can determine appropriate number of passes which will give me accurate output?\", \"post_time\": \"2012-07-13 06:01:31\" },\n\t{ \"post_id\": 1924, \"topic_id\": 393, \"forum_id\": 8, \"post_subject\": \"Re: Code Samples on Trees and Perceptron\", \"username\": \"rtaylor\", \"post_text\": \" Is there support for Multilayer Perceptron?
In looking at the code, I would expect the answer to be: not at this time.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-07-09 20:53:49\" },\n\t{ \"post_id\": 1907, \"topic_id\": 393, \"forum_id\": 8, \"post_subject\": \"Re: Code Samples on Trees and Perceptron\", \"username\": \"chhaya\", \"post_text\": \"hi,\\n\\nYea i did . Is there support for Multilayer Perceptron?\\n\\nThanks\", \"post_time\": \"2012-07-06 10:35:45\" },\n\t{ \"post_id\": 1821, \"topic_id\": 393, \"forum_id\": 8, \"post_subject\": \"Re: Code Samples on Trees and Perceptron\", \"username\": \"rtaylor\", \"post_text\": \"The Trees code file does contain extensive comments. Have you looked at those?\", \"post_time\": \"2012-06-21 14:20:48\" },\n\t{ \"post_id\": 1805, \"topic_id\": 393, \"forum_id\": 8, \"post_subject\": \"Re: Code Samples on Trees and Perceptron\", \"username\": \"chhaya\", \"post_text\": \"thanks but i already checked this document there is nothing about trees.\", \"post_time\": \"2012-06-20 07:46:55\" },\n\t{ \"post_id\": 1793, \"topic_id\": 393, \"forum_id\": 8, \"post_subject\": \"Re: Code Samples on Trees and Perceptron\", \"username\": \"arjuna chala\", \"post_text\": \"The documentation is available at - http://hpccsystems.com/community/docs/machine-learning\", \"post_time\": \"2012-06-18 13:30:22\" },\n\t{ \"post_id\": 1790, \"topic_id\": 393, \"forum_id\": 8, \"post_subject\": \"Re: Code Samples on Trees and Perceptron\", \"username\": \"chhaya\", \"post_text\": \"hi,\\nIs there any documentation available for the same?\", \"post_time\": \"2012-06-16 14:15:15\" },\n\t{ \"post_id\": 1788, \"topic_id\": 393, \"forum_id\": 8, \"post_subject\": \"Re: Code Samples on Trees and Perceptron\", \"username\": \"arjuna chala\", \"post_text\": \"Chhaya,\\n\\nWe are in the process of publishing some simple examples on the web. In the meantime, we will email you what we have.\\n\\nThanks\\n\\nArjuna\", \"post_time\": \"2012-06-15 22:46:49\" },\n\t{ \"post_id\": 1783, \"topic_id\": 393, \"forum_id\": 8, \"post_subject\": \"Code Samples on Trees and Perceptron\", \"username\": \"chhaya\", \"post_text\": \"Hi,\\nWe are trying to explore some of the algorithms in Machine Learning.\\nIs there any code samples or documentation available on trees,perceptron and linear regression.\\n\\nPlease help us in this regard.\", \"post_time\": \"2012-06-15 11:00:09\" },\n\t{ \"post_id\": 1787, \"topic_id\": 394, \"forum_id\": 8, \"post_subject\": \"Re: Sort order retained after filter?\", \"username\": \"DSC\", \"post_text\": \"Good to hear. Thanks, Richard!\", \"post_time\": \"2012-06-15 18:36:18\" },\n\t{ \"post_id\": 1786, \"topic_id\": 394, \"forum_id\": 8, \"post_subject\": \"Re: Sort order retained after filter?\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nI'll go out on a limb here and say: Yes! \\n\\nSeriously, because you're filtering an ordered recordset, I would expect the order to be maintained and would report it as a bug if I ever found it wasn't. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-06-15 18:26:40\" },\n\t{ \"post_id\": 1784, \"topic_id\": 394, \"forum_id\": 8, \"post_subject\": \"Sort order retained after filter?\", \"username\": \"DSC\", \"post_text\": \"Made-up example:\\n\\n
Foo := RECORD\\n\\tSTRING\\t\\tsomeText;\\n\\tINTEGER\\t\\tsomeNumber;\\nEND;\\n\\nds := DATASET('~foo',Foo,THOR);\\n\\nrs1 := SORT(ds,someNumber);\\n\\nrs2 := rs1(someNumber > 10);\\n\\nOUTPUT(rs1);\\nOUTPUT(rs2);
\\n\\nIs the output of rs2 guaranteed to be in the same sort order as rs1 in all cases?\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-06-15 17:20:27\" },\n\t{ \"post_id\": 1816, \"topic_id\": 395, \"forum_id\": 8, \"post_subject\": \"Re: How to use DEFINE in conjunction with USE for NLP parsin\", \"username\": \"ideal\", \"post_text\": \"Simply perfect ! \\n\\nJust a copy paste and it works as you say ! (I tried only the first one).\\n\\nThank you !\\n\\nJM.\", \"post_time\": \"2012-06-20 15:31:05\" },\n\t{ \"post_id\": 1812, \"topic_id\": 395, \"forum_id\": 8, \"post_subject\": \"Re: How to use DEFINE in conjunction with USE for NLP parsin\", \"username\": \"ghalliday\", \"post_text\": \"The alternative syntax would be\\n\\n
\\n..\\nRULE(Btree) args := arg OPT(suiteargs) addarg($1,$2) : define('args');\\n..\\n
\\n\\ninstead of adding an extra rule for args.\", \"post_time\": \"2012-06-20 11:20:02\" },\n\t{ \"post_id\": 1811, \"topic_id\": 395, \"forum_id\": 8, \"post_subject\": \"Re: How to use DEFINE in conjunction with USE for NLP parsin\", \"username\": \"ghalliday\", \"post_text\": \"The reason is it (unfortunately) necessary is that ECL doesn't have any support for forward references.\\n\\nNormally you would expect to say:\\n\\nRULE a := s;\\nRULE b := 'pattern';\\nRULE s := b;\\n\\nBut because there are no forward references, the link from a to s needs to be done indirectly. The USE/DEFINE pair provide a string that is used to link the two.\\n\\nSo for your example I would add a couple of extra rules:\\n\\n\\n....\\nRULE(Btree) argsref := USE(Btree, 'args');\\nRULE(Btree) predicat := foncteur '(' argsRef ')' conspred($1,$3);\\nRULE(Btree) arg := atome consarg($1) | variable consarg($1) | predicat transarg($1);\\nRULE(Btree) argsrule := arg OPT(suiteargs) addarg($1,$2);\\nRULE(Btree) args := define(argsrule, 'args');\\nRULE(Modele) fonction := foncteur '(' args ')' consmodele($1,$3);\\n....\\n
\\n\\nargsref being a forward reference to the args rule.\\nand having a new rule which just associates the args rule with the string.\", \"post_time\": \"2012-06-20 11:10:25\" },\n\t{ \"post_id\": 1801, \"topic_id\": 395, \"forum_id\": 8, \"post_subject\": \"Re: How to use DEFINE in conjunction with USE for NLP parsin\", \"username\": \"HPCC Staff\", \"post_text\": \"Thank you for posting! The team is currently reviewing your question and a response will be posted soon.\", \"post_time\": \"2012-06-19 16:59:05\" },\n\t{ \"post_id\": 1789, \"topic_id\": 395, \"forum_id\": 8, \"post_subject\": \"How to use DEFINE in conjunction with USE for NLP parsing\", \"username\": \"ideal\", \"post_text\": \"Hello,\\n\\nI am trying to use a forward reference in a PARSE grammar. \\nSample given in the documentation is not clear enough to me. It is said necessary to define the symbol in a rule as below :\\n\\nRULE a := USE('symbol');\\nRULE b := 'pattern';\\nRULE s := DEFINE(b,'symbol');
\\n\\nI don't understand why it is necessary to have this complex references. I think naively that it should be transparent with the Tomita parsing.\\n\\nAnyway, my code is as below : \\n\\n....\\nRULE(Btree) predicat := foncteur '(' USE(Btree,'args') ')' conspred($1,$3);\\nRULE(Btree) arg := atome consarg($1) | variable consarg($1) | predicat transarg($1);\\nRULE(Btree) args := arg OPT(suiteargs) addarg($1,$2);\\nRULE(Modele) fonction := foncteur '(' args ')' consmodele($1,$3);\\n....
\\n\\nI dont know where to put DEFINE action (and everything else) to refer to recursive symbol args which is argument of a function and function also. \\nCould you please help me ?\\n\\nThanks.\\nJM.\", \"post_time\": \"2012-06-16 10:43:56\" },\n\t{ \"post_id\": 1819, \"topic_id\": 403, \"forum_id\": 8, \"post_subject\": \"Re: HPCC/HDFS Connector\", \"username\": \"rodrigo.pastrana@lexisnexis.com\", \"post_text\": \"Hi Chhaya,\\nThe H2H connector requires hadoop v1.x, it looks like you’re using an older version.\\nRemember to install the H2H connector and libhdfs on each of the HPCC nodes.\\n\\nAlso, HADOOP_LOCATION should point to the top level directory of your Hadoop install location. \\n\\nIt might be necessary to add the libhdfs location to the runtime ld_library_path by adding a line to the conf file (/opt/HPCCSystems/etc/HPCCSystems/hdfsconnector.conf):\\n\\n"export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/hadoop/hadoop-0.20.203.0/Linux-i386-32/lib"\", \"post_time\": \"2012-06-21 13:19:45\" },\n\t{ \"post_id\": 1818, \"topic_id\": 403, \"forum_id\": 8, \"post_subject\": \"HPCC/HDFS Connector\", \"username\": \"chhaya\", \"post_text\": \"Hi,\\n\\nWe are trying to use H2H connector to read from and write to HDFS.\\nWe have a two node HPCC cluster and Hadoop installed on separate machine.\\n \\nInstalled H2H connector by following the instructions given in the documentation for installing the H2H connector. These are the steps we followed and some observations :\\n1.\\tInstalled H2H on Thor node. \\n1.\\tSet the HADOOP_LOCATION as /home/hadoop/hadoop-0.20.203.0 in hdfsconnector.conf\\n2.\\tPush the configuration file in both the nodes\\nBut the problem is when i run code for reading or writing from/to HDFS it gives an error:\\n Script starting\\nRunning as user: hpcc\\nRunning mode: \\nIncoming params: -si -nodeid 0 -clustercount 1 -reclen 27 -filename /user/hadoop/Test/MyData1 -format FLAT -host 172.20.12.62 -port 50070 nodeid: 0\\n/opt/HPCCSystems/bin/hdfsconnector: error while loading shared libraries: libhdfs.so.0: cannot open shared object file: No such file or directory\\nIn var/log/HPCCSytems/mydataconnector\\n \\nI checked the hadoop server for presence of libhdfs.so.0, but it is there in /home/hadoop/hadoop-0.20.203.0/Linux-i386-32/lib.\\n1. Is there any problem in setting the HADOOO_LOCATION? \\n2. We would also like to know H2H is compatible with which versions of hadoop.\", \"post_time\": \"2012-06-21 06:14:31\" },\n\t{ \"post_id\": 1829, \"topic_id\": 406, \"forum_id\": 8, \"post_subject\": \"Re: HPCC/SAS Connector\", \"username\": \"arjuna chala\", \"post_text\": \"Arti,\\n\\nThank you for inquiring.\\n\\nUnfortunately we do not have the plugin ready at this moment. However, we just completed a release of a R plugin if you would be interested. For SAS, we are planning a 3rd quarter plugin release.\\n\\nThanks\\n\\nArjuna\", \"post_time\": \"2012-06-25 18:10:46\" },\n\t{ \"post_id\": 1828, \"topic_id\": 406, \"forum_id\": 8, \"post_subject\": \"HPCC/SAS Connector\", \"username\": \"Arti\", \"post_text\": \"Hi,\\n\\nIs their any interfacing available between HPCC and SAS?\", \"post_time\": \"2012-06-25 13:09:48\" },\n\t{ \"post_id\": 1863, \"topic_id\": 407, \"forum_id\": 8, \"post_subject\": \"Re: Avoiding skew\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nI was afraid that would be the result, but "hope springs eternal" \\n\\nRichard\", \"post_time\": \"2012-06-28 14:36:57\" },\n\t{ \"post_id\": 1862, \"topic_id\": 407, \"forum_id\": 8, \"post_subject\": \"Re: Avoiding skew\", \"username\": \"DSC\", \"post_text\": \"[quote="rtaylor":311dd4y4]FWIW, you only actually need the SORTed records for your median and top100 calculations. All the rest can be accomplished without sorting at all. What happens if you split this into two jobs -- one with and one without SORT?\\n\\nI did notice that, but concluded that it probably wouldn't make a difference in the final analysis. However, since I've been wrong a couple of times before, I decided to test the theory this morning. Fortunately, I guessed correctly. I used another TABLE in a separate job to create the other stats and it took almost exactly the same amount of time as my final code. In yet another job I created the two stats you mentioned without the others getting involved, but with my split/sort idea, and that one took around the same amount of time as well. No gain. A third job with only the two stats and a naive sort ran for an hour before I killed it.\\n\\nLessons learned. Fun stuff.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-06-28 14:30:10\" },\n\t{ \"post_id\": 1858, \"topic_id\": 407, \"forum_id\": 8, \"post_subject\": \"Re: Avoiding skew\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nFWIW, you only actually need the SORTed records for your median and top100 calculations. All the rest can be accomplished without sorting at all. What happens if you split this into two jobs -- one with and one without SORT?\\n\\nRichard\", \"post_time\": \"2012-06-27 20:05:24\" },\n\t{ \"post_id\": 1856, \"topic_id\": 407, \"forum_id\": 8, \"post_subject\": \"Re: Avoiding skew\", \"username\": \"DSC\", \"post_text\": \"[quote="dustin.skaggs@lexisnexis.com":1qgj32um]I don't think that the '&' is doing what you think. '&' is a local operation and not a global append. You might need to do a MERGE instead.\\n\\nThanks, Dustin. I missed that in the documentation for that operator. Here is my revised code, for any Interested Readers:\\n\\n
initialDS := $.Data.ds;\\n\\nPeopleSummary := RECORD\\n STRING company_id := initialDS.company_id;\\n UNSIGNED4 num := COUNT(GROUP);\\nEND;\\n\\nsummaryDS := TABLE(initialDS,PeopleSummary,company_id,UNSORTED,MERGE);\\n\\nmultiEmployeeDS := SORT(summaryDS(num > 1),-num);\\noneEmployeeDS := summaryDS(num = 1);\\nsortedFinalDS := MERGE(multiEmployeeDS,oneEmployeeDS,SORTED(-num));\\n\\ntotalCompanyCount := COUNT(sortedFinalDS);\\n\\nOUTPUT(totalCompanyCount,NAMED('TotalCompanies'));\\nOUTPUT(SUM(sortedFinalDS,num),NAMED('TotalPeople'));\\nOUTPUT(SUM(sortedFinalDS[1..100],num),NAMED('TotalPeopleInTop100Companies'));\\nOUTPUT(sortedFinalDS[1].num,NAMED('MaximumPeopleCount'));\\nOUTPUT(AVE(sortedFinalDS,num),NAMED('AveragePeopleCount'));\\nOUTPUT(sortedFinalDS[totalCompanyCount DIV 2].num,NAMED('MedianPeopleCount'));\\nOUTPUT(COUNT(sortedFinalDS(num=1)),NAMED('CountOfOneEmployeeCompanies'));
\\n\\nThis ran three minutes slower than the last version, but that may have been due to variances in the environment (I'm using virtual hosts for my nodes).\\n\\nThanks again!\\n\\nDan\", \"post_time\": \"2012-06-27 19:57:09\" },\n\t{ \"post_id\": 1854, \"topic_id\": 407, \"forum_id\": 8, \"post_subject\": \"Re: Avoiding skew\", \"username\": \"dustin.skaggs@lexisnexis.com\", \"post_text\": \"I don't think that the '&' is doing what you think. '&' is a local operation and not a global append. You might need to do a MERGE instead. Here's an example that shows the potential problem. (I ran this code on a 10 node thor.)\\n\\nrec := {\\n\\tstring s1,\\n\\tunsigned4 cnt,\\n};\\n\\nds1 := dataset([{'a',1},{'b',1},{'c',1}], rec);\\nds2 := sort(dataset([{'d', 2},{'e', 3},{'f',4},{'g',5}], rec), -cnt);\\nds3 := ds2 & ds1;\\n\\noutput(ds3);\\n/* This was the result.\\ng\\t5\\na\\t1\\nb\\t1\\nc\\t1\\nf\\t4\\ne\\t3\\nd\\t2\\n*/
\\n\\n-Dustin\", \"post_time\": \"2012-06-27 19:12:33\" },\n\t{ \"post_id\": 1853, \"topic_id\": 407, \"forum_id\": 8, \"post_subject\": \"Re: Avoiding skew\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\nI would be interested in any feedback or pointers for further enhancements. I'd like to be able to understand this stuff so new code will be relatively high-performance the first time around and not take days to optimize. Knowing if/when to explicitly distribute record sets, in particular, seems to be a black art except for a few cases.
KUDOS!!! This is a great example of the intersection between knowing the tool (using TABLE's MERGE option, the & instead of + to append, and SORTED -- all three relatively little-used bits of language) and knowing your data (understanding that splitting out the 1-guy results would give you a much faster SORT).\\n\\nI can certainly tell you've read the Language Reference! Congrats!!\\n\\nThe key to DISTRIBUTE is simply understanding that its real purpose is to allow you to use the LOCAL option on subsequent operations (usually making things go faster).\\n\\nRichard\", \"post_time\": \"2012-06-27 18:29:57\" },\n\t{ \"post_id\": 1852, \"topic_id\": 407, \"forum_id\": 8, \"post_subject\": \"Re: Avoiding skew\", \"username\": \"bforeman\", \"post_text\": \"That's a nice trick with MERGE. It seems in your case that was the secret to avoid the skews.\\n\\nThanks for the feedback!\\n\\nBob\", \"post_time\": \"2012-06-27 18:02:35\" },\n\t{ \"post_id\": 1851, \"topic_id\": 407, \"forum_id\": 8, \"post_subject\": \"Re: Avoiding skew\", \"username\": \"DSC\", \"post_text\": \"I've been doing a bit of playing, trying for better performance, and I wound up with this code:\\n\\n
initialDS := $.Data.ds;\\n\\nPeopleSummary := RECORD\\n STRING company_id := initialDS.company_id;\\n UNSIGNED4 num := COUNT(GROUP);\\nEND;\\n\\nsummaryDS := TABLE(initialDS,PeopleSummary,company_id,UNSORTED,MERGE);\\n\\nmultiEmployeeDS := SORT(summaryDS(num > 1),-num);\\noneEmployeeDS := summaryDS(num = 1);\\nsortedFinalDS := SORTED(multiEmployeeDS & oneEmployeeDS,-num);\\n\\ntotalCompanyCount := COUNT(sortedFinalDS);\\n\\nOUTPUT(totalCompanyCount,NAMED('TotalCompanies'));\\nOUTPUT(SUM(sortedFinalDS,num),NAMED('TotalPeople'));\\nOUTPUT(SUM(sortedFinalDS[1..100],num),NAMED('TotalPeopleInTop100Companies'));\\nOUTPUT(sortedFinalDS[1].num,NAMED('MaximumPeopleCount'));\\nOUTPUT(AVE(sortedFinalDS,num),NAMED('AveragePeopleCount'));\\nOUTPUT(sortedFinalDS[totalCompanyCount DIV 2].num,NAMED('MedianPeopleCount'));\\nOUTPUT(COUNT(sortedFinalDS(num=1)),NAMED('CountOfOneEmployeeCompanies'));
\\n\\nOther than some options on TABLE, the other big change was optimizing the code using knowledge of the data. I split the summarized record set into two, with all the one-employee records in one set and everything else in the other, then sorted only the latter and glued them back together. The above code runs in half the time as the original (30 minutes versus 61 minutes).\\n\\nI would be interested in any feedback or pointers for further enhancements. I'd like to be able to understand this stuff so new code will be relatively high-performance the first time around and not take days to optimize. Knowing if/when to explicitly distribute record sets, in particular, seems to be a black art except for a few cases.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-06-27 17:14:26\" },\n\t{ \"post_id\": 1841, \"topic_id\": 407, \"forum_id\": 8, \"post_subject\": \"Re: Avoiding skew\", \"username\": \"bforeman\", \"post_text\": \"So are you saying that the LOCAL SORT is more efficient, and the results are the same? Sometimes SKEW can be expected, as when you are outputing just the first 100 records it will be reading off of one node. Could that be what you are seeing?\", \"post_time\": \"2012-06-26 18:20:05\" },\n\t{ \"post_id\": 1840, \"topic_id\": 407, \"forum_id\": 8, \"post_subject\": \"Re: Avoiding skew\", \"username\": \"DSC\", \"post_text\": \"Hmmm. It may be that the sort that is taking so long is part of the TABLE function rather than the explicit SORT further down the file. I just noticed that there was a second sort in the graph and it was labeled "Local Sort" whereas the first sort (the one taking so long) was just "Sort". Does that make sense?\", \"post_time\": \"2012-06-26 17:14:43\" },\n\t{ \"post_id\": 1839, \"topic_id\": 407, \"forum_id\": 8, \"post_subject\": \"Re: Avoiding skew\", \"username\": \"DSC\", \"post_text\": \"Okay, I got the explanation but the results seem to be what they were before. As I'm writing this, my three-way Thor is showing 1%, 1% and 74% CPU load during the sort process. It seems unchanged as compared to the original code.\\n\\nThoughts?\", \"post_time\": \"2012-06-26 16:40:33\" },\n\t{ \"post_id\": 1838, \"topic_id\": 407, \"forum_id\": 8, \"post_subject\": \"Re: Avoiding skew\", \"username\": \"DSC\", \"post_text\": \"Got it. Thanks, Bob!\", \"post_time\": \"2012-06-26 15:51:32\" },\n\t{ \"post_id\": 1837, \"topic_id\": 407, \"forum_id\": 8, \"post_subject\": \"Re: Avoiding skew\", \"username\": \"bforeman\", \"post_text\": \"Hi Dan,\\n\\nDid you try this?\\n\\nValidnum := summaryDS(num > 0);\\nDistDS := DISTRIBUTE(validnum,HASH32(num));\\nsortedFinalDS := SORT(DistDS,-num,LOCAL);
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-06-26 15:45:53\" },\n\t{ \"post_id\": 1836, \"topic_id\": 407, \"forum_id\": 8, \"post_subject\": \"Re: Avoiding skew\", \"username\": \"DSC\", \"post_text\": \"I did try a DISTRIBUTE but ran into an error telling me that I was misusing it (basically; I don't remember the exact error). I couldn't find anything in the documentation to tell me how to do this properly, so I punted with the SKEW. Can you supply some details on how to implement the DISTRIBUTE properly?\", \"post_time\": \"2012-06-26 15:10:35\" },\n\t{ \"post_id\": 1835, \"topic_id\": 407, \"forum_id\": 8, \"post_subject\": \"Re: Avoiding skew\", \"username\": \"bforeman\", \"post_text\": \"Hi Dan,\\n\\nHave you tried using a DISTRIBUTE using HASH on the SORT field, and then SORT with the LOCAL option?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-06-26 15:06:43\" },\n\t{ \"post_id\": 1834, \"topic_id\": 407, \"forum_id\": 8, \"post_subject\": \"Avoiding skew\", \"username\": \"DSC\", \"post_text\": \"Consider:\\n\\ninitialDS := $.Data.ds;\\n\\nPeopleSummary := RECORD\\n\\tSTRING\\t\\tcompany_id := initialDS.company_id;\\n\\tUNSIGNED4\\tnum := COUNT(GROUP);\\nEND;\\n\\nsummaryDS := TABLE(initialDS,PeopleSummary,company_id);\\nsortedFinalDS := SORT(summaryDS(num > 0),-num,SKEW(1.0));\\ntotalCompanyCount := COUNT(sortedFinalDS);\\n\\nOUTPUT(totalCompanyCount,NAMED('TotalCompanies'));\\nOUTPUT(SUM(sortedFinalDS,num),NAMED('TotalPeople'));\\nOUTPUT(SUM(sortedFinalDS[1..50],num),NAMED('TotalPeopleInTop50Companies'));\\nOUTPUT(sortedFinalDS[1].num,NAMED('MaximumPeopleCount'));\\nOUTPUT(AVE(sortedFinalDS,num),NAMED('AveragePeopleCount'));\\nOUTPUT(sortedFinalDS[totalCompanyCount DIV 2].num,NAMED('MedianPeopleCount'));\\nOUTPUT(COUNT(sortedFinalDS(num=1)),NAMED('CountOfOneEmployeeCompanies'));
\\n\\nThe general idea here is that I have a file containing records of people who work for companies and I want some general statistics on that dataset. The original dataset is fairly well distributed over three Thor nodes and contains a little over 183M records. The summaryDS record set winds up being heavily weighted in one direction, in that over 94M records will have their num field set to a value of 1.\\n\\nThe problem is with the SORT() function. When processing the sort, approximately 140M records are sorted quickly over all three nodes (I'm getting this by watching the graphs update and by watching the Cluster Processes section of ECL Watch) but the final 43M are sorted by only one node. This not only takes an incredibly long time, but it requires that I add the SKEW option to the SORT (if I don't add the SKEW then the sort will fail but only after burning an hour and getting almost finished).\\n\\nThe above code works, but it is not efficient and I would like to better understand how to make it more efficient. Any recommendations?\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-06-26 12:12:28\" },\n\t{ \"post_id\": 2067, \"topic_id\": 410, \"forum_id\": 8, \"post_subject\": \"Re: SuperFile handling in ECL\", \"username\": \"jeremy\", \"post_text\": \"Just a heads up, as described in topic: cannot call function in non-global context, you can use NOTHOR around the APPLY.\", \"post_time\": \"2012-07-24 19:46:19\" },\n\t{ \"post_id\": 1955, \"topic_id\": 410, \"forum_id\": 8, \"post_subject\": \"Re: SuperFile handling in ECL\", \"username\": \"rtaylor\", \"post_text\": \"Jeremy,\\n\\nDeleting old sub-files after their data has been consolidated would be an operational question. We tend to "never throw anything away" but that doesn't mean that we leave it all on the production disks, either. I actually don't know how we handle it.\\n\\nYou can certainly try the DeleteLogicalFile within an APPLY -- let me know if it works. \\n\\nRichard\", \"post_time\": \"2012-07-12 18:22:34\" },\n\t{ \"post_id\": 1952, \"topic_id\": 410, \"forum_id\": 8, \"post_subject\": \"Re: SuperFile handling in ECL\", \"username\": \"jeremy\", \"post_text\": \"Following the steps proposed above, and in the Creating and Maintaining SuperFiles section of the Docs, I notice that there isn't really a description of what to do with all of the newly-redundant detail files that have been consolidated. What is standard practice here? Do you all tend to just leave those around as orphaned logical files, or is there a best practices way to then loop back through the list and delete those files? Perhaps STD.File.DeleteLogicalFile behaves differently in an APPLY loop than STD.File.AddSuperFile, in which case the logic presented further above in this thread would work?\", \"post_time\": \"2012-07-12 17:15:28\" },\n\t{ \"post_id\": 1864, \"topic_id\": 410, \"forum_id\": 8, \"post_subject\": \"Re: SuperFile handling in ECL\", \"username\": \"jeremy\", \"post_text\": \"Richard,\\n\\nAgreed... that's pretty much where I started... but then I was thinking it would be good to allow for the fact that the daily aggregation could fail or be delayed, at which point the 'staging' SuperFile might contain hourly logical files for more than one day. I'd like the daily file to only contain hourly logical files from one day, so that I can use that daily file to run queries against a single day very quickly, as opposed to using the entire "all data" SuperFile. This is why I was experimenting with pulling out only those hourly files that met some criteria.\\nAt any rate, it sounds like my specific use case might not be trivial in ECL, and I'm probably over-engineering this anyway, so I'll look at some alternatives, like using ECL after the aggregation to clean up the daily logical files based on timestamps of individual records, or else adding logic to the hourly loader so that it puts data in different staging SuperFiles depending on the date. \\n\\nThanks,\\nJeremy\", \"post_time\": \"2012-06-28 14:37:56\" },\n\t{ \"post_id\": 1861, \"topic_id\": 410, \"forum_id\": 8, \"post_subject\": \"Re: SuperFile handling in ECL\", \"username\": \"rtaylor\", \"post_text\": \"Jeremy,\\n
In order to facilitate iterative loading into HPCC, I'm running an hourly process that pulls data from our servers into an hourly logical file, which is then added to a "stage" SuperFile. I'd then like to run a process each day that:\\nScans the SuperFile for logical files belonging to a given day.\\nMoves those files to a new "daily" SuperFile, which might then be aggregated into a single logical file.\\nRemoves them from the "stage" SuperFile.
OK, now I got what you want -- the process I recommend is described in the "Creating and Maintaining Superfiles" article in the Programmer's Guide, and we also teach it in our Advanced Thor class. Here's the way I would do it:\\n
So the code to do this is something like this:IMPORT STD;\\nnewfilename := 'DailyFile_' + (STRING8)STD.Date.Today();\\nSEQUENTIAL(\\n OUTPUT($.HourlySF_DS,,newfilename),\\n STD.File.StartSuperFileTransaction(),\\n STD.File.AddSuperFile($.DailySFname,newfilename),\\n STD.File.ClearSuperFile($.HourlySFname),\\n STD.File.FinishSuperFileTransaction());
If the data inflow happens 24/7, then I suggest this process be run every night right after the 11PM file is added to the "Hourly" superfile. I would also suggest that you run a similar data consolidation process on a weekly basis.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-06-28 14:00:02\" },\n\t{ \"post_id\": 1860, \"topic_id\": 410, \"forum_id\": 8, \"post_subject\": \"Re: SuperFile handling in ECL\", \"username\": \"jeremy\", \"post_text\": \"Not sure if that's going to do what I want... maybe I can describe my use case a bit more clearly...\\nIn order to facilitate iterative loading into HPCC, I'm running an hourly process that pulls data from our servers into an hourly logical file, which is then added to a "stage" SuperFile. I'd then like to run a process each day that:\\n\\n
\\nAre there best-practices for iterative loading like this? I know that I can accomplish this via dynamic ECL that is built from a bash script or something, but I was looking for something with just ECL.\", \"post_time\": \"2012-06-28 03:15:12\" },\n\t{ \"post_id\": 1855, \"topic_id\": 410, \"forum_id\": 8, \"post_subject\": \"Re: SuperFile handling in ECL\", \"username\": \"rtaylor\", \"post_text\": \"Jeremy,\\n\\nYou might try using STD.File.PromoteSuperFileList()\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-06-27 19:52:04\" },\n\t{ \"post_id\": 1849, \"topic_id\": 410, \"forum_id\": 8, \"post_subject\": \"Re: SuperFile handling in ECL\", \"username\": \"jeremy\", \"post_text\": \"Unfortunately, same issue:\\n\\nError: Cannot call function addsuperfile in a non-global context (101, 3), 4055... \\n
\", \"post_time\": \"2012-06-27 15:01:32\" },\n\t{ \"post_id\": 1848, \"topic_id\": 410, \"forum_id\": 8, \"post_subject\": \"Re: SuperFile handling in ECL\", \"username\": \"rtaylor\", \"post_text\": \"Jeremy,\\n\\nTry this:\\nSEQUENTIAL(STD.File.StartSuperFileTransaction(),\\n APPLY( listOfFiles , STD.File.AddSuperFile( someOtherSuperFile, name )),\\n STD.File.FinishSuperFileTransaction());
A superfile transaction frame must be in SEQUENTIAL.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-06-27 14:55:31\" },\n\t{ \"post_id\": 1847, \"topic_id\": 410, \"forum_id\": 8, \"post_subject\": \"SuperFile handling in ECL\", \"username\": \"jeremy\", \"post_text\": \"Greetings,\\nI'm trying to move logical files around in SuperFiles via ECL. Specifically, I'd like to list the contents of a SuperFile and filter that list a bit... This seems to work:\\n\\nlistOfFiles := STD.File.SuperFileContents( someSuperFile )( STD.Str.WildMatch( name, '*someFilterString*', TRUE ));\\n
\\n\\nThen, I'd like to move those files to a different SuperFile... I thought this would work:\\n\\nSTD.File.StartSuperFileTransaction();\\nAPPLY( listOfFiles , STD.File.AddSuperFile( someOtherSuperFile, name ));\\nSTD.File.FinishSuperFileTransaction();\\n
\\n\\nBut I'm getting an error: \\n\\nError: Cannot call function addsuperfile in a non-global context (98, 1), 4055,...\\n
\\n\\nThoughts?\", \"post_time\": \"2012-06-27 14:15:17\" },\n\t{ \"post_id\": 1950, \"topic_id\": 430, \"forum_id\": 8, \"post_subject\": \"Invoking eclcc programmatically\", \"username\": \"eric.scott\", \"post_text\": \"I'm trying to interact with Thor programmatically from the JVM, using shell calls to ECLPlus.\\n\\nI encountered the problem described in http://hpccsystems.com/bb/viewtopic.php?f=8&t=128&sid=ff72fc552cb3f732ae20849c4bceb262, which provided good guidence in getting the call to work from the command line.\\n\\nSo given:\\n\\nC:/HPPC/ECLSource/ECL/My Files/Test/Test1.ecl\\n\\nEXPORT test1 := module\\n\\tExport testValue := 7;\\nend;
\\n\\nand the test file importTest.ecl:\\n\\nImport Test.test1 as T;\\n\\noutput (T.testValue); \\n
\\nI can call from the windows command line:\\n\\neclcc importTest.ecl -I "C:/HPCC/ECLSource/ECL/My Files/" -E -o importTest.eclxml\\neclplus query owner=hpccdemo server=192.168.203.129 cluster=thor password=hpccdemo @importTest.eclxml
\\n\\nand get:\\n\\nWorkunit W20120711-202538 submitted\\n[Result 1]\\nResult_1\\n7
\\n\\nFine so far, however when I make exactly the same call to eclcc programmatically as a shell call in the JVM, I get this error:\\n\\nimportTest.ecl(1,26): error C2081: Import names unknown module "test"\\nimportTest.ecl(3,9): error C2167: Unknown identifier "T"
\\n\\nThinking that this might have something to do with parsing the arguments, I moved the call into a batch file, and had the same discrepency: testThis.bat worked fine when invoked manually, but generated the error above when invoked programmatically.\\n\\nWhat could be causing this?\\n\\nThanks.\", \"post_time\": \"2012-07-12 14:50:17\" },\n\t{ \"post_id\": 1987, \"topic_id\": 434, \"forum_id\": 8, \"post_subject\": \"Re: Issues With RECORD IFBLOCK's\", \"username\": \"Allan\", \"post_text\": \"Thanks for this.\\n\\nI've actually got this working due to the fact that only fields common to all record types have the same name. I can put these with 'RecordType' and then use\\nPOL AND NOT [fieldlist of common fields] in each IFBLOCK definition.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-07-16 20:01:32\" },\n\t{ \"post_id\": 1981, \"topic_id\": 434, \"forum_id\": 8, \"post_subject\": \"Re: Issues With RECORD IFBLOCK's\", \"username\": \"ghalliday\", \"post_text\": \"I can see the problem. Unfortunately your definition is actually creating the structure\\n\\n\\nR := RECORD\\n STRING5 RecordType;\\n IFBLOCK(SELF.RecordType = 'POL01')\\n STRING7 a;\\n STRING8 b;\\n STRING9 c;\\n END;\\n IFBLOCK(SELF.RecordType = 'SUB01')\\n STRING1 d;\\n STRING1 e;\\n END;\\nEND;
\\n\\nBecause the record already has fields a,b,c so they are not added within the ifblock.\\n\\nI think I would be tempted in your situation to use a nested record. E.g.,\\n\\n\\nPOL := RECORD\\n STRING7 a;\\n STRING8 b;\\n STRING9 c;\\nEND;\\n\\nSUB := RECORD\\n STRING1 a;\\n STRING1 b;\\n STRING1 C;\\n STRING1 d;\\n STRING1 e;\\nEND;\\nR := RECORD\\n STRING5 RecordType;\\n IFBLOCK(SELF.RecordType = 'POL01')\\n POL POL01;\\n END;\\n IFBLOCK(SELF.RecordType = 'SUB01')\\n SUB SUB01;\\n END;\\nEND;\\n
\\n\\nAnd you would then refer to the fields as ds.pol01.a or ds.sub01.a.\", \"post_time\": \"2012-07-16 07:56:43\" },\n\t{ \"post_id\": 1978, \"topic_id\": 434, \"forum_id\": 8, \"post_subject\": \"Re: Issues With RECORD IFBLOCK's\", \"username\": \"Allan\", \"post_text\": \"Thanks for this.\\n\\nActually my big issue is with the inline initialisation. (further down in my original post)\\n\\nI an attempting to create test-data that holds different record formats.\\nThis inline initialisation works fine, if all the fields have different names. However I'm attempting to use the actual record definitions in the definition of the test data record harness, and these do contain fields with the same name.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-07-15 08:12:42\" },\n\t{ \"post_id\": 1973, \"topic_id\": 434, \"forum_id\": 8, \"post_subject\": \"Re: Issues With RECORD IFBLOCK's\", \"username\": \"ghalliday\", \"post_text\": \"Yes, it looks confusing.\\n\\nIt is because when a dataset or a record is included within a record definition it adds a field for each field in the original record..... except if there is one there already with the same name.\\n\\nI agree it is a bit strange, and quite possibly it should be a warning for a record.\\n\\nI think it might go back to the syntax for defining an index- where you first specify which fields you want keyed, and then can say the whole dataset to mean use everything else in the payload. \\n\\niu := index(ds, { f1, f2 }, { ds });\\n\\nAlthough thinking about it - that is slightly different, so it must have been something else.\\n\\nIf you add an issue I'll at least look at adding a warning for a record - like your example.\", \"post_time\": \"2012-07-13 19:16:55\" },\n\t{ \"post_id\": 1969, \"topic_id\": 434, \"forum_id\": 8, \"post_subject\": \"Issues With RECORD IFBLOCK's\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nIf I attempt this construct:\\n\\nR := RECORD\\n STRING5 RecordType;\\n IFBLOCK(SELF.RecordType = 'POL01')\\n STRING7 a;\\n STRING8 b;\\n STRING9 c;\\n END;\\n IFBLOCK(SELF.RecordType = 'SUB01')\\n STRING1 a;\\n STRING1 b;\\n STRING1 C;\\n STRING1 d;\\n STRING1 e;\\n END;\\nEND;\\n
\\nI get error:\\n\\nA field called a is already defined in this record.\\n
\\nFare enough, however if I try:\\n\\nPOL := RECORD\\n STRING7 a;\\n STRING8 b;\\n STRING9 c;\\nEND;\\n\\nSUB := RECORD\\n STRING1 a;\\n STRING1 b;\\n STRING1 C;\\n STRING1 d;\\n STRING1 e;\\nEND;\\nR := RECORD\\n STRING5 RecordType;\\n IFBLOCK(SELF.RecordType = 'POL01')\\n POL;\\n END;\\n IFBLOCK(SELF.RecordType = 'SUB01')\\n SUB;\\n END;\\nEND;\\n
\\nIt syntax checks ok.\\nBut that is not all, If I attempt to do an inline initialisation with say:\\n\\nTestData := DATASET([{'POL01','12345','ABI123','C'},\\n {'SUB01','1' ,'2' ,'3', '4' ,'5' }],R);\\n
\\nI get error: Too many initializers (value '3')\\nIf I try:\\n\\nTestData := DATASET([{'POL01','12345','ABI123','C'},\\n {'SUB01','1' ,'2' }],R);\\n
\\nThis syntax checks ok but is obviously wrong for the initialisaion of the 'SUB01' record.\\n\\nThis is server 3.8 with IDE of 6.6.2.4.682.1\\n\\nAny idea what's going on?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-07-13 16:09:12\" },\n\t{ \"post_id\": 2214, \"topic_id\": 435, \"forum_id\": 8, \"post_subject\": \"Re: XML parsing question\", \"username\": \"dlingle\", \"post_text\": \"The other idea on this thread is if your XML is broken out into fields instead of a single string\\nthen you can do as richard suggested and have implement record structure that includes all the fields but in the transform from a project/join operation perform a self := [] which will set the\\nnon existing fields (e.g. in your case <cd-title> which is an optional field)\\nto null automatically.\", \"post_time\": \"2012-08-13 20:28:47\" },\n\t{ \"post_id\": 1985, \"topic_id\": 435, \"forum_id\": 8, \"post_subject\": \"Re: XML parsing question\", \"username\": \"rtaylor\", \"post_text\": \"Michael,\\n\\nBasically, you just need to code for the superset of all possible fields, something,like this:\\nd := DATASET([{'<library><book isbn="123456789X">' +\\n\\t'<author>Bayliss</author><title>A Way Too Far</title></book>' +\\n\\t'<book isbn="1234567801">' +\\n '<cd-title>DEF</cd-title>' +// <!-- This book has a CD with it -->\\n\\t'<author>Smith</author><title>A Way Too Short</title></book>' +\\n\\t'</library>'}],\\t{STRING line });\\n\\nrform := RECORD\\n STRING author := XMLTEXT('author');\\n STRING title \\t:= XMLTEXT('title');\\n STRING isbn \\t:= XMLTEXT('@isbn');\\n STRING CD \\t:= XMLTEXT('cd-title');\\nEND;\\nbooks := PARSE(d,line,rform,XML('library/book'));\\noutput(books);
HTH,\\n\\nRichard\", \"post_time\": \"2012-07-16 17:37:08\" },\n\t{ \"post_id\": 1984, \"topic_id\": 435, \"forum_id\": 8, \"post_subject\": \"XML parsing question\", \"username\": \"michael-mason\", \"post_text\": \"Let's say I have some XML structured like this:\\n\\n<library>\\n <book>\\n <title>XYZ</title>\\n <author>John Smith</author>\\n </book>\\n <book>\\n <title>ABC</title>\\n <author>Jame Doe</author>\\n <cd-title>DEF</cd-title> <!-- This book has a CD with it -->\\n </book>\\n</library>\\n\\nBasically, I have some XML data where each record doesn't have the same fields. Some have less, some have more. How would I compose an ECL expression so that I could get to 'cd-title' in the records that have 'cd-title' data?\\n\\nThanks,\", \"post_time\": \"2012-07-16 17:11:50\" },\n\t{ \"post_id\": 2029, \"topic_id\": 436, \"forum_id\": 8, \"post_subject\": \"Re: Is our dataset suitable for parsing with ECL?\", \"username\": \"michael-mason\", \"post_text\": \"Thank you for your reply. I'll start looking deeper into PARSE. I've read about PARSE, but I wasn't aware that you could associate actions to fire when certain patterns are matched -- that may be just what we're looking for.\\n\\nThanks,\\n-Mike\", \"post_time\": \"2012-07-19 15:45:56\" },\n\t{ \"post_id\": 1988, \"topic_id\": 436, \"forum_id\": 8, \"post_subject\": \"Re: Is our dataset suitable for parsing with ECL?\", \"username\": \"ghalliday\", \"post_text\": \"I'm not an expert on using ECL, but my approach would be...\\n\\nRead in the file as an xml dataset, and then use PARSE to process the contents of the <data> field to extract the contents.\\n\\nIn this case I would be very tempted to use the version of parse that allows you to associate actions with matched patterns. You could have a dataset of headers, a dataset of rows, each containing a dataset of values, and have them each built up as the data is matched.\\n\\nUnfortunately that version of PARSE only really works with STRING data at the moment - rather than UNICODE/UTF8, but it looks like your data may well fit into that category anyway.\", \"post_time\": \"2012-07-17 08:30:13\" },\n\t{ \"post_id\": 1986, \"topic_id\": 436, \"forum_id\": 8, \"post_subject\": \"Is our dataset suitable for parsing with ECL?\", \"username\": \"michael-mason\", \"post_text\": \"We have a semi-structured data set with an XML-like format. Something like this:\\n\\n<document>\\n... Lots of free form text ...\\n<page>\\n .. Lots of free form text ...\\n <table>\\n <data> \\n total-widgets-made total-items-sold total-profit\\n ------------------------------------------------\\n 2009 5524 5000 585.55\\n 2010 4500 4400 333.33\\n\\n </data>\\n </table>\\n</page>\\n...\\n<page>\\n <table>\\n another table with a different number of rows/columns\\n </table>\\n</page>\\n</document>\\n
\\nSo, the meat of what we want is deep inside of a <data> tag which is inside of a <table> tag. We're not sure on which <page> the data is located, but we need to go into the <data> for each <table>, and find a specific column name. In this case, the column name is in free form text.\\n\\nLet's say we want to search through the <data> tags to find the words "total-widgets-made". We'd have to go through all <page>s and then search through all <table>s. Get the <data> for each <table>, somehow pull out the first N words (these will be the column-name headings). We don't know how many columns each table has, so I suppose we'd be searching for a newline to indicate we're done looking at column names. Depending on which word matched "total-widgets-made", we'd have to process the free form text and find the Nth column in each row to pull out the data we want.\\n\\nSo, I guess the question is: could we use text within the <data> tag to figure out how to parse further into the <data> tag to pull out an integer value from within the <data> tag? Is ECL an appropriate tool for a job like this? I'm guessing that the data is too unstructured to effectively apply ECL to it. I'm currently trying to figure out how we can pre process the data to put it into a more amenable format for HPCC/ECL.\", \"post_time\": \"2012-07-16 18:30:52\" },\n\t{ \"post_id\": 2026, \"topic_id\": 439, \"forum_id\": 8, \"post_subject\": \"Re: Strange error from thor\", \"username\": \"jsmith\", \"post_text\": \"It looks like a core dump, so there should be a core* file in /var/lib/HPCCSystems/<thorinstancedir>/\\n\\nIf that's there and you have gdb, it should be possible to get a stack dump from the core dump, by running:\\n\\ncd /var/lib/HPCCSystems/<thorinstancedir>\\ngdb /opt/HPCCSystems/bin/thorslave_lcr <corefilename>\\n\\nand then running 'where' at the gdb prompt.\", \"post_time\": \"2012-07-19 12:34:20\" },\n\t{ \"post_id\": 2025, \"topic_id\": 439, \"forum_id\": 8, \"post_subject\": \"Re: Strange error from thor\", \"username\": \"nvasil\", \"post_text\": \"It is a little bit difficult\\n\\nBasically it has a BEGINC++ that links to some external libraries. It works perfectly locally and on hthor. It fails though on thor. It used to work on a previous version before 3.6\\n\\nIf I compile HPCC on debug mode is it possible to get anything? \\nIf Thor dies can we find out why?\", \"post_time\": \"2012-07-19 12:04:49\" },\n\t{ \"post_id\": 2024, \"topic_id\": 439, \"forum_id\": 8, \"post_subject\": \"Re: Strange error from thor\", \"username\": \"jsmith\", \"post_text\": \"Would it be possible to provide me with an example that has this problem?\\nThanks.\", \"post_time\": \"2012-07-19 11:54:41\" },\n\t{ \"post_id\": 2023, \"topic_id\": 439, \"forum_id\": 8, \"post_subject\": \"Re: Strange error from thor\", \"username\": \"nvasil\", \"post_text\": \"As a note this example used to work on a version before 3.6\", \"post_time\": \"2012-07-19 11:49:47\" },\n\t{ \"post_id\": 2022, \"topic_id\": 439, \"forum_id\": 8, \"post_subject\": \"Re: Strange error from thor\", \"username\": \"nvasil\", \"post_text\": \"I have actually upgraded to 3.8.1 and I get the same\\n\\n/var/log/HPCCSystems/mythor/thorslave.1.2012_07_18.log\\n\\n\\n00000000 2012-07-18 21:24:10 17491 17491 Opened log file //192.168.1.67/var/log/HPCCSystems/mythor/thorslave.1.2012_07_18.log\\n00000001 2012-07-18 21:24:10 17491 17491 Build community_3.8.0-1\\n00000002 2012-07-18 21:24:10 17491 17491 registering 192.168.1.67:20100 - master 192.168.1.67:20000\\n00000003 2012-07-18 21:24:10 17491 17491 Initialization received\\n00000004 2012-07-18 21:24:10 17491 17491 Master build: community_3.8.0-1\\n00000005 2012-07-18 21:24:10 17491 17491 Registration confirmation sent\\n00000006 2012-07-18 21:24:10 17491 17491 verifying mp connection to rest of cluster\\n00000007 2012-07-18 21:24:10 17491 17491 verified mp connection to rest of cluster\\n00000008 2012-07-18 21:24:10 17491 17491 registered 192.168.1.67:20100\\n00000009 2012-07-18 21:24:10 17491 17491 calling initClientProcess\\n0000000A 2012-07-18 21:24:10 17491 17491 ThorSlave Version LCR - 4.1 started\\n0000000B 2012-07-18 21:24:10 17491 17491 Slave 192.168.1.67:20100 - temporary dir set to : /var/lib/HPCCSystems/mythor/temp/\\n0000000C 2012-07-18 21:24:10 17491 17491 Using querySo directory: /var/lib/HPCCSystems/queries/mythor\\n0000000D 2012-07-18 21:24:10 17491 17491 RoxieMemMgr: Setting memory limit to 6239027200 bytes (5950 pages)\\n0000000E 2012-07-18 21:24:10 17491 17491 RoxieMemMgr: 5952 Pages successfully allocated for the pool - memsize=6241124352 base=0x7fe9dbf00000 alignment=1048576 bitmapSize=186\\n0000000F 2012-07-18 21:24:10 17491 17491 FileCache: limit = 1800, purgeN = 10\\n00000010 2012-07-18 21:24:10 17491 17510 Watchdog: thread running\\n00000011 2012-07-18 21:25:11 17491 17491 Started wuid=W20120718-212451, user=, graph=graph1\\n\\n00000012 2012-07-18 21:25:11 17491 17491 Using query: /var/lib/HPCCSystems/queries/mythor/V971381087_libW20120718-212451.so\\n00000013 2012-07-18 21:25:11 17491 17491 CRC allocator OFF\\n00000014 2012-07-18 21:25:11 17491 17491 Packed allocator OFF\\n00000015 2012-07-18 21:25:11 17491 17491 Global memory size = 5950 MB, large mem size = 4462 MB\\n00000016 2012-07-18 21:25:12 17491 17491 New Graph started : graph1\\n00000017 2012-07-18 21:25:12 17491 17491 temp directory cleared\\n00000018 2012-07-18 21:25:12 17491 17491 Disk space: /var/lib/HPCCSystems/hpcc-data/thor = 142102, /var/lib/HPCCSystems/hpcc-mirror/thor = 0\\n00000019 2012-07-18 21:25:12 17491 17491 Key file cache size set to: 4\\n0000001A 2012-07-18 21:25:12 17491 17491 GraphInit: W20120718-212451graph1, graphId=4\\n0000001B 2012-07-18 21:25:12 17491 17491 deserializeMPTag: tag = 65546\\n0000001C 2012-07-18 21:25:12 17491 17491 deserializeMPTag: tag = 65542\\n0000001D 2012-07-18 21:25:12 17491 17491 deserializeMPTag: tag = 65543\\n0000001E 2012-07-18 21:25:12 17491 17491 deserializeMPTag: tag = 65544\\n0000001F 2012-07-18 21:25:12 17491 17491 Add: Launching graph thread for graphId=4\\n00000020 2012-07-18 21:25:12 17491 17562 Running graph [global] : <graph>\\n <node id="5" label="Child Dataset">\\n <att name="definition" value="/e/ismion/git/paperboat/ecl-pb/pb/karnagio.ecl(69,3)"/>\\n <att name="name" value="getuint32tables"/>\\n <att name="_kind" value="149"/>\\n <att name="ecl" value="getuint32tables(' --references_in=dense$double$0 --k_neighbors=1 --distances_out=dense$double$3...', INTERNAL('gl2')); "/>\\n <att name="recordSize" value="17"/>\\n <att name="recordCount" value="0..?[few]"/>\\n </node>\\n <node id="6" label="Output Result #4">\\n <att name="definition" value="stdin:(45,1)"/>\\n <att name="_kind" value="21"/>\\n <att name="ecl" value="OUTPUT(..., workunit); "/>\\n <att name="recordSize" value="17"/>\\n </node>\\n <att name="rootGraph" value="1"/>\\n <edge id="5_0" source="5" target="6"/>\\n </graph>\\n - graph(graph1, 4)\\n00000021 2012-07-18 21:25:12 17491 17562 CONNECTING (id=5, idx=0) to (id=6, idx=0) - activity(workunitwrite, 6)\\n00000000 2012-07-18 21:25:13 17797 17797 Opened log file //192.168.1.67/var/log/HPCCSystems/mythor/thorslave.1.2012_07_18.log\\n00000001 2012-07-18 21:25:13 17797 17797 Build community_3.8.0-1\\n00000002 2012-07-18 21:25:13 17797 17797 registering 192.168.1.67:20100 - master 192.168.1.67:20000\\n00000003 2012-07-18 21:25:13 17797 17797 Initialization received\\n00000004 2012-07-18 21:25:13 17797 17797 Master build: community_3.8.0-1\\n00000005 2012-07-18 21:25:13 17797 17797 Registration confirmation sent\\n00000006 2012-07-18 21:25:13 17797 17797 verifying mp connection to rest of cluster\\n00000007 2012-07-18 21:25:13 17797 17797 verified mp connection to rest of cluster\\n00000008 2012-07-18 21:25:13 17797 17797 registered 192.168.1.67:20100\\n00000009 2012-07-18 21:25:13 17797 17797 calling initClientProcess\\n0000000A 2012-07-18 21:25:13 17797 17797 ThorSlave Version LCR - 4.1 started\\n0000000B 2012-07-18 21:25:13 17797 17797 Slave 192.168.1.67:20100 - temporary dir set to : /var/lib/HPCCSystems/mythor/temp/\\n0000000C 2012-07-18 21:25:13 17797 17797 Using querySo directory: /var/lib/HPCCSystems/queries/mythor\\n0000000D 2012-07-18 21:25:13 17797 17797 RoxieMemMgr: Setting memory limit to 6239027200 bytes (5950 pages)\\n0000000E 2012-07-18 21:25:13 17797 17797 RoxieMemMgr: 5952 Pages successfully allocated for the pool - memsize=6241124352 base=0x7f3b43f00000 alignment=1048576 bitmapSize=186\\n0000000F 2012-07-18 21:25:13 17797 17797 FileCache: limit = 1800, purgeN = 10\\n00000010 2012-07-18 21:25:13 17797 17816 Watchdog: thread running\\n
\\n\\n/var/log/HPCCSystems/mythor/thormaster.2012_07_18.log\\n\\n\\n00000001 2012-07-18 21:24:10 17494 17494 Build community_3.8.0-1\\n00000002 2012-07-18 21:24:10 17494 17494 calling initClientProcess Port 20000\\n00000003 2012-07-18 21:24:10 17494 17494 Found file 'thorgroup', using to form thor group\\n00000004 2012-07-18 21:24:10 17494 17494 RoxieMemMgr: Setting memory limit to 6239027200 bytes (5950 pages)\\n00000005 2012-07-18 21:24:10 17494 17494 RoxieMemMgr: 5952 Pages successfully allocated for the pool - memsize=6241124352 base=0x7f6007700000 alignment=1048576 bitmapSize=186\\n00000006 2012-07-18 21:24:10 17494 17494 Starting watchdog\\n00000008 2012-07-18 21:24:10 17494 17494 ThorMaster version 4.1, Started on 192.168.1.67:20000\\n00000007 2012-07-18 21:24:10 17494 17505 Started watchdog\\n00000009 2012-07-18 21:24:10 17494 17494 Thor name = mythor, queue = thor.thor, nodeGroup = mythor\\n0000000A 2012-07-18 21:24:10 17494 17494 Creating sentinel file thor.sentinel for rerun from script\\n0000000B 2012-07-18 21:24:10 17494 17494 Waiting for 1 slaves to register\\n0000000C 2012-07-18 21:24:10 17494 17494 Verifying connection to slave 1\\n0000000D 2012-07-18 21:24:10 17494 17494 verified connection with 192.168.1.67:20100\\n0000000E 2012-07-18 21:24:10 17494 17494 Slaves connected, initializing..\\n0000000F 2012-07-18 21:24:10 17494 17494 Initialization sent to slave group\\n00000010 2012-07-18 21:24:10 17494 17494 Registration confirmation from 192.168.1.67:20100\\n00000011 2012-07-18 21:24:10 17494 17494 Slave 1 (192.168.1.67:20100) registered\\n00000012 2012-07-18 21:24:10 17494 17494 Slaves initialized\\n00000013 2012-07-18 21:24:10 17494 17494 verifying mp connection to rest of cluster\\n00000014 2012-07-18 21:24:10 17494 17494 verified mp connection to rest of cluster\\n00000015 2012-07-18 21:24:10 17494 17494 ,Progress,Thor,Startup,mythor,mythor,thor.thor,//192.168.1.67/var/log/HPCCSystems/mythor/thormaster.2012_07_18.log\\n00000016 2012-07-18 21:24:10 17494 17494 Listening for graph\\n00000017 2012-07-18 21:24:10 17494 17494 ThorLCR(192.168.1.67:20000) available, waiting on queue thor.thor\\n00000018 2012-07-18 21:25:10 17494 17504 SYS: PU= 18% MU= 28% MAL=1947408304 MMP=1947209728 SBK=198576 TOT=1901836K RAM=4653960K SWP=112640K\\n00000019 2012-07-18 21:25:11 17494 17494 Processing wuid=W20120718-212451, graph=graph1 from agent: 192.168.1.67\\n0000001A 2012-07-18 21:25:11 17494 17494 ,Progress,Thor,Start,mythor,W20120718-212451,graph1,,mythor,thor.thor\\n0000001B 2012-07-18 21:25:11 17494 17494 Saving dll: /var/lib/HPCCSystems/queries/mythor/V971381087_libW20120718-212451.so\\n0000001C 2012-07-18 21:25:11 17494 17494 Started wuid=W20120718-212451, user=, graph=graph1\\n\\n0000001D 2012-07-18 21:25:11 17494 17494 Query /var/lib/HPCCSystems/queries/mythor/V971381087_libW20120718-212451.so loaded\\n0000001E 2012-07-18 21:25:11 17494 17494 CRC allocator OFF\\n0000001F 2012-07-18 21:25:11 17494 17494 Packed allocator OFF\\n00000020 2012-07-18 21:25:11 17494 17494 Global memory size = 5950 MB, large mem size = 4462 MB\\n00000021 2012-07-18 21:25:11 17494 17494 allocateMPTag: tag = 65537\\n00000022 2012-07-18 21:25:11 17494 17494 allocateMPTag: tag = 65538\\n00000023 2012-07-18 21:25:11 17494 17494 allocateMPTag: tag = 65539\\n00000024 2012-07-18 21:25:11 17494 17494 allocateMPTag: tag = 65540\\n00000025 2012-07-18 21:25:11 17494 17494 allocateMPTag: tag = 65541\\n00000026 2012-07-18 21:25:11 17494 17494 allocateMPTag: tag = 65542\\n00000027 2012-07-18 21:25:11 17494 17494 allocateMPTag: tag = 65543\\n00000028 2012-07-18 21:25:11 17494 17494 allocateMPTag: tag = 65544\\n00000029 2012-07-18 21:25:11 17494 17494 Graph graph1 created\\n0000002A 2012-07-18 21:25:11 17494 17494 Running graph=graph1\\n0000002B 2012-07-18 21:25:11 17494 17494 temp directory cleared\\n0000002C 2012-07-18 21:25:11 17494 17494 Running graph [global] : <graph>\\n <node id="5" label="Child Dataset">\\n <att name="definition" value="/e/ismion/git/paperboat/ecl-pb/pb/karnagio.ecl(69,3)"/>\\n <att name="name" value="getuint32tables"/>\\n <att name="_kind" value="149"/>\\n <att name="ecl" value="getuint32tables(' --references_in=dense$double$0 --k_neighbors=1 --distances_out=dense$double$3...', INTERNAL('gl2')); "/>\\n <att name="recordSize" value="17"/>\\n <att name="recordCount" value="0..?[few]"/>\\n </node>\\n <node id="6" label="Output Result #4">\\n <att name="definition" value="stdin:(45,1)"/>\\n <att name="_kind" value="21"/>\\n <att name="ecl" value="OUTPUT(..., workunit); "/>\\n <att name="recordSize" value="17"/>\\n </node>\\n <att name="rootGraph" value="1"/>\\n <edge id="5_0" source="5" target="6"/>\\n </graph>\\n - graph(graph1, 4)\\n0000002D 2012-07-18 21:25:11 17494 17494 getResultString(gl2,-3)\\n0000002E 2012-07-18 21:25:11 17494 17494 CONNECTING (id=5, idx=0) to (id=6, idx=0) - activity(workunitwrite, 6)\\n0000002F 2012-07-18 21:25:11 17494 17494 allocateMPTag: tag = 65545\\n00000030 2012-07-18 21:25:11 17494 17494 Query dll: /var/lib/HPCCSystems/queries/mythor/V971381087_libW20120718-212451.so\\n00000031 2012-07-18 21:25:12 17494 17494 ,Progress,Thor,StartSubgraph,mythor,W20120718-212451,1,4,mythor,thor.thor\\n00000032 2012-07-18 21:25:12 17494 17494 allocateMPTag: tag = 65546\\n00000033 2012-07-18 21:25:12 17494 17494 sendGraph took 22 ms - graph(graph1, 4)\\n00000001 2012-07-18 21:25:13 17800 17800 Opened log file //192.168.1.67/var/log/HPCCSystems/mythor/thormaster.2012_07_18.log\\n00000002 2012-07-18 21:25:13 17800 17800 Build community_3.8.0-1\\n00000003 2012-07-18 21:25:13 17800 17800 calling initClientProcess Port 20000\\n00000004 2012-07-18 21:25:13 17800 17800 Found file 'thorgroup', using to form thor group\\n00000005 2012-07-18 21:25:13 17800 17800 RoxieMemMgr: Setting memory limit to 6239027200 bytes (5950 pages)\\n00000006 2012-07-18 21:25:13 17800 17800 RoxieMemMgr: 5952 Pages successfully allocated for the pool - memsize=6241124352 base=0x7fc767f00000 alignment=1048576 bitmapSize=186\\n00000007 2012-07-18 21:25:13 17800 17800 Starting watchdog\\n00000009 2012-07-18 21:25:13 17800 17800 ThorMaster version 4.1, Started on 192.168.1.67:20000\\n00000008 2012-07-18 21:25:13 17800 17809 Started watchdog\\n0000000A 2012-07-18 21:25:13 17800 17800 Thor name = mythor, queue = thor.thor, nodeGroup = mythor\\n0000000B 2012-07-18 21:25:13 17800 17800 Creating sentinel file thor.sentinel for rerun from script\\n0000000C 2012-07-18 21:25:13 17800 17800 Waiting for 1 slaves to register\\n0000000D 2012-07-18 21:25:13 17800 17800 Verifying connection to slave 1\\n0000000E 2012-07-18 21:25:13 17800 17800 verified connection with 192.168.1.67:20100\\n0000000F 2012-07-18 21:25:13 17800 17800 Slaves connected, initializing..\\n00000010 2012-07-18 21:25:13 17800 17800 Initialization sent to slave group\\n00000011 2012-07-18 21:25:13 17800 17800 Registration confirmation from 192.168.1.67:20100\\n00000012 2012-07-18 21:25:13 17800 17800 Slave 1 (192.168.1.67:20100) registered\\n00000013 2012-07-18 21:25:13 17800 17800 Slaves initialized\\n00000014 2012-07-18 21:25:13 17800 17800 verifying mp connection to rest of cluster\\n00000015 2012-07-18 21:25:13 17800 17800 verified mp connection to rest of cluster\\n00000016 2012-07-18 21:25:13 17800 17800 ,Progress,Thor,Startup,mythor,mythor,thor.thor,//192.168.1.67/var/log/HPCCSystems/mythor/thormaster.2012_07_18.log\\n00000017 2012-07-18 21:25:13 17800 17800 Listening for graph\\n00000018 2012-07-18 21:25:13 17800 17800 ThorLCR(192.168.1.67:20000) available, waiting on queue thor.thor\\n
\", \"post_time\": \"2012-07-19 11:47:36\" },\n\t{ \"post_id\": 2021, \"topic_id\": 439, \"forum_id\": 8, \"post_subject\": \"Re: Strange error from thor\", \"username\": \"jsmith\", \"post_text\": \"By the looks of the abrupt ending to the slave process logging, i.e.:\\n0000001D 2012-07-18 02:24:04 15442 16228 CONNECTING (id=5, idx=0) to (id=6, idx=0) - activity(workunitwrite, 6)\\n[new process]\\n00000000 2012-07-18 02:24:05 16272 16272 Opened log file //192.168.1.67/var/log/HPCCSystems/mythor/thorslave.192.168.1.67_20100.2012_07_18.log\\n\\nit looks like it cored. I may need either an example or to see a stack from the core dump to know more.\\nBut if you can, I would recommend upgrading to 3.8.0.1.CE and retrying.\\nI can't say at the moment, if it's directly related to a bug in 3.6, but a number of issues were result between 3.6 and 3.8, so I think it's worth a shot if possible.\", \"post_time\": \"2012-07-19 11:33:48\" },\n\t{ \"post_id\": 1995, \"topic_id\": 439, \"forum_id\": 8, \"post_subject\": \"Strange error from thor\", \"username\": \"nvasil\", \"post_text\": \"I have installed hpcc on my laptop and I am running a Thor with the default configuration. I can submit trivial queries and they work. I did try to submit one of my paperboat tests and it fails with this message\\n\\n<Error><source>eclagent</source><code>-1</code><message>System error: -1: Failed to receive reply from thor 192.168.1.67:20000; (-1, Failed to receive reply from thor 192.168.1.67:20000)</message></Error>\\n
\\n\\nI did try it on hthor and it worked fine. Note that the query has to compile and link to the paperboat library. I checked on the eclwatch and the compilation step is done. \\n\\nHere are the thor logfiles\\n/var/log/HPCCSystems/mythor/thormaster.2012_07_18.log\\n\\n00000042 2012-07-18 02:24:04 15445 15445 Processing wuid=W20120718-022352, graph=graph1 from agent: 192.168.1.67\\n00000043 2012-07-18 02:24:04 15445 15445 ,Progress,Thor,Start,mythor,W20120718-022352,graph1,,mythor,thor.thor\\n00000044 2012-07-18 02:24:04 15445 15445 Saving dll: /var/lib/HPCCSystems/queries/mythor/V2856230807_libW20120718-022352.so\\n00000045 2012-07-18 02:24:04 15445 15445 Started wuid=W20120718-022352, user=, graph=graph1\\n**\\n00000046 2012-07-18 02:24:04 15445 15445 Query /var/lib/HPCCSystems/queries/mythor/V2856230807_libW20120718-022352.so loaded\\n00000047 2012-07-18 02:24:04 15445 15445 CThorRowManager initialized, memlimit = 0\\n00000048 2012-07-18 02:24:04 15445 15445 Global memory size = 0 MB, large mem size = 1536 MB\\n00000049 2012-07-18 02:24:04 15445 15445 allocateMPTag: tag = 65537\\n0000004A 2012-07-18 02:24:04 15445 15445 allocateMPTag: tag = 65538\\n0000004B 2012-07-18 02:24:04 15445 15445 allocateMPTag: tag = 65539\\n0000004C 2012-07-18 02:24:04 15445 15445 allocateMPTag: tag = 65540\\n0000004D 2012-07-18 02:24:04 15445 15445 allocateMPTag: tag = 65541\\n0000004E 2012-07-18 02:24:04 15445 15445 allocateMPTag: tag = 65542\\n0000004F 2012-07-18 02:24:04 15445 15445 allocateMPTag: tag = 65543\\n00000050 2012-07-18 02:24:04 15445 15445 allocateMPTag: tag = 65544\\n00000051 2012-07-18 02:24:04 15445 15445 Graph graph1 created\\n00000052 2012-07-18 02:24:04 15445 15445 Running graph=graph1\\n00000053 2012-07-18 02:24:04 15445 15445 temp directory cleared\\n00000054 2012-07-18 02:24:04 15445 15445 Running graph [global] : <graph>\\n <node id="5" label="Child Dataset">\\n <att name="definition" value="/e/ismion/git/paperboat/ecl-pb/pb/karnagio.ecl(69,3)"/>\\n <att name="name" value="getuint32tables"/>\\n <att name="_kind" value="149"/>\\n <att name="ecl" value="getuint32tables(' --references_in=dense$double$0 --k_neighbors=1 --distances_out=dense$double$3...', INTERNAL('gl2')); "/>\\n <att name="recordSize" value="17"/>\\n <att name="recordCount" value="0..?[few]"/>\\n </node>\\n <node id="6" label="Output Result #4">\\n <att name="definition" value="stdin:(45,1)"/>\\n <att name="_kind" value="21"/>\\n <att name="ecl" value="OUTPUT(..., workunit); "/>\\n <att name="recordSize" value="17"/>\\n </node>\\n <att name="rootGraph" value="1"/>\\n <edge id="5_0" source="5" target="6"/>\\n </graph>\\n - graph(graph1, 4)\\n00000055 2012-07-18 02:24:04 15445 15445 getResultString(gl2,-3)\\n00000056 2012-07-18 02:24:04 15445 15445 CONNECTING (id=5, idx=0) to (id=6, idx=0) - activity(workunitwrite, 6)\\n00000057 2012-07-18 02:24:04 15445 15445 allocateMPTag: tag = 65545\\n00000058 2012-07-18 02:24:04 15445 15445 Query dll: /var/lib/HPCCSystems/queries/mythor/V2856230807_libW20120718-022352.so\\n00000059 2012-07-18 02:24:04 15445 15445 ,Progress,Thor,StartSubgraph,mythor,W20120718-022352,1,4,mythor,thor.thor\\n0000005A 2012-07-18 02:24:04 15445 15445 allocateMPTag: tag = 65546\\n0000005B 2012-07-18 02:24:04 15445 15445 sendGraph took 8 ms - graph(graph1, 4)\\n00000001 2012-07-18 02:24:05 16275 16275 Opened log file //192.168.1.67/var/log/HPCCSystems/mythor/thormaster.2012_07_18.log\\n00000002 2012-07-18 02:24:05 16275 16275 Build community_3.6.2-3\\n00000003 2012-07-18 02:24:05 16275 16275 calling initClientProcess Port 20000\\n00000004 2012-07-18 02:24:05 16275 16275 Found file 'thorgroup', using to form thor group\\n00000005 2012-07-18 02:24:05 16275 16275 Starting watchdog\\n00000006 2012-07-18 02:24:05 16275 16275 ThorMaster version 4.0, Started on 192.168.1.67:20000\\n00000007 2012-07-18 02:24:05 16275 16286 Started watchdog\\n00000008 2012-07-18 02:24:05 16275 16275 Thor name = mythor, queue = thor.thor, nodeGroup = mythor\\n00000009 2012-07-18 02:24:05 16275 16275 Creating sentinel file thor.sentinel for rerun from script\\n0000000A 2012-07-18 02:24:05 16275 16275 Waiting for 1 slaves to register\\n0000000B 2012-07-18 02:24:05 16275 16275 Verifying connection to slave 1\\n0000000C 2012-07-18 02:24:05 16275 16275 verified connection with 192.168.1.67:20100\\n0000000D 2012-07-18 02:24:05 16275 16275 Slaves connected, initializing..\\n0000000E 2012-07-18 02:24:05 16275 16275 Initialization sent to slave group\\n0000000F 2012-07-18 02:24:05 16275 16275 Registration confirmation from 192.168.1.67:20100\\n00000010 2012-07-18 02:24:05 16275 16275 Slave 1 (192.168.1.67:20100) registered\\n00000011 2012-07-18 02:24:05 16275 16275 Slaves initialized\\n00000012 2012-07-18 02:24:05 16275 16275 verifying mp connection to rest of cluster\\n00000013 2012-07-18 02:24:05 16275 16275 verified mp connection to rest of cluster\\n00000014 2012-07-18 02:24:05 16275 16275 ,Progress,Thor,Startup,mythor,mythor,thor.thor,//192.168.1.67/var/log/HPCCSystems/mythor/thormaster.2012_07_18.log\\n00000015 2012-07-18 02:24:05 16275 16275 Listening for graph\\n00000016 2012-07-18 02:24:05 16275 16275 ThorLCR(192.168.1.67:20000) available, waiting on queue thor.thor\\n00000017 2012-07-18 02:25:05 16275 16285 SYS: PU= 31% MU= 25% MAL=206000 MMP=0 SBK=206000 TOT=264K RAM=4220128K SWP=55412K\\n00000018 2012-07-18 02:26:06 16275 16285 SYS: PU= 33% MU= 25% MAL=476336 MMP=270336 SBK=206000 TOT=528K RAM=4220212K SWP=55412K\\n00000019 2012-07-18 02:26:06 16275 16285 DSK: [sda] r/s=117.6 kr/s=472.7 w/s=54.0 kw/s=1091.5 bsy=99 NIC: rxp/s=0.0 rxk/s=0.0 txp/s=0.0 txk/s=0.0 CPU: usr=7 sys=1 iow=23 idle=66\\n
\\n\\n/var/log/HPCCSystems/mythor/thorslave.192.168.1.67_20100.2012_07_18.log\\n\\n\\n0000000F 2012-07-18 02:24:04 15442 15442 Using query: /var/lib/HPCCSystems/queries/mythor/V2856230807_libW20120718-022352.so\\n00000010 2012-07-18 02:24:04 15442 15442 CThorRowManager initialized, memlimit = 0\\n00000011 2012-07-18 02:24:04 15442 15442 Global memory size = 0 MB, large mem size = 1536 MB\\n00000012 2012-07-18 02:24:04 15442 15442 New Graph started : graph1\\n00000013 2012-07-18 02:24:04 15442 15442 temp directory cleared\\n00000014 2012-07-18 02:24:04 15442 15442 Disk space: /var/lib/HPCCSystems/hpcc-data/thor = 142467, /var/lib/HPCCSystems/hpcc-mirror/thor = 0\\n00000015 2012-07-18 02:24:04 15442 15442 Key file cache size set to: 4\\n00000016 2012-07-18 02:24:04 15442 15442 GraphInit: W20120718-022352graph1, graphId=4\\n00000017 2012-07-18 02:24:04 15442 15442 deserializeMPTag: tag = 65546\\n00000018 2012-07-18 02:24:04 15442 15442 deserializeMPTag: tag = 65542\\n00000019 2012-07-18 02:24:04 15442 15442 deserializeMPTag: tag = 65543\\n0000001A 2012-07-18 02:24:04 15442 15442 deserializeMPTag: tag = 65544\\n0000001B 2012-07-18 02:24:04 15442 15442 Add: Launching graph thread for graphId=4\\n0000001C 2012-07-18 02:24:04 15442 16228 Running graph [global] : <graph>\\n <node id="5" label="Child Dataset">\\n <att name="definition" value="/e/ismion/git/paperboat/ecl-pb/pb/karnagio.ecl(69,3)"/>\\n <att name="name" value="getuint32tables"/>\\n <att name="_kind" value="149"/>\\n <att name="ecl" value="getuint32tables(' --references_in=dense$double$0 --k_neighbors=1 --distances_out=dense$double$3...', INTERNAL('gl2')); "/>\\n <att name="recordSize" value="17"/>\\n <att name="recordCount" value="0..?[few]"/>\\n </node>\\n <node id="6" label="Output Result #4">\\n <att name="definition" value="stdin:(45,1)"/>\\n <att name="_kind" value="21"/>\\n <att name="ecl" value="OUTPUT(..., workunit); "/>\\n <att name="recordSize" value="17"/>\\n </node>\\n <att name="rootGraph" value="1"/>\\n <edge id="5_0" source="5" target="6"/>\\n </graph>\\n - graph(graph1, 4)\\n0000001D 2012-07-18 02:24:04 15442 16228 CONNECTING (id=5, idx=0) to (id=6, idx=0) - activity(workunitwrite, 6)\\n00000000 2012-07-18 02:24:05 16272 16272 Opened log file //192.168.1.67/var/log/HPCCSystems/mythor/thorslave.192.168.1.67_20100.2012_07_18.log\\n00000001 2012-07-18 02:24:05 16272 16272 Build community_3.6.2-3\\n00000002 2012-07-18 02:24:05 16272 16272 registering 192.168.1.67:20100 - master 192.168.1.67:20000\\n00000003 2012-07-18 02:24:05 16272 16272 Initialization received\\n00000004 2012-07-18 02:24:05 16272 16272 Master build: community_3.6.2-3\\n00000005 2012-07-18 02:24:05 16272 16272 Registration confirmation sent\\n00000006 2012-07-18 02:24:05 16272 16272 verifying mp connection to rest of cluster\\n00000007 2012-07-18 02:24:05 16272 16272 verified mp connection to rest of cluster\\n00000008 2012-07-18 02:24:05 16272 16272 registered 192.168.1.67:20100\\n00000009 2012-07-18 02:24:05 16272 16272 calling initClientProcess\\n0000000A 2012-07-18 02:24:05 16272 16272 ThorSlave Version LCR - 4.0 started\\n0000000B 2012-07-18 02:24:05 16272 16272 Slave 192.168.1.67:20100 - temporary dir set to : /var/lib/HPCCSystems/mythor/temp/\\n0000000C 2012-07-18 02:24:05 16272 16272 Using querySo directory: /var/lib/HPCCSystems/queries/mythor\\n0000000D 2012-07-18 02:24:05 16272 16272 FileCache: limit = 1800, purgeN = 10\\n0000000E 2012-07-18 02:24:05 16272 16291 Watchdog: thread running\\n
\", \"post_time\": \"2012-07-18 04:34:40\" },\n\t{ \"post_id\": 2068, \"topic_id\": 450, \"forum_id\": 8, \"post_subject\": \"Re: Consolidating SuperFiles\", \"username\": \"jeremy\", \"post_text\": \"Hi Richard,\\nThanks... I've worked a bit more on it, and I'm specifically trying to use ECLPlus... so I'd like to pass in a STRING via the /variable construct, and then convert that STRING to a RECORD, either via doing something analogous to Class.forName() in Java, or else by CASE'ing the STRING and calling one of several different DATASET commands, each one with a distinct RECORD type... which I can then pass to a generic OUTPUT command... this would let me consolidate any number of schemas with the same script. I've worked through several examples, but haven't found anything successful yet... will post a few attempts as I can test them and clean them up.\", \"post_time\": \"2012-07-24 19:53:41\" },\n\t{ \"post_id\": 2066, \"topic_id\": 450, \"forum_id\": 8, \"post_subject\": \"Re: Consolidating SuperFiles\", \"username\": \"rtaylor\", \"post_text\": \"Jeremy,\\n\\nGiven that you already need a DATASET declaration of the superfile in order to work with it, then the only additional code would be the OUTPUT, which needs only two parameters specified to perform its job, so there's little there to optimize. \\n\\nTherefore, the real opportunity for code genericization lies in the process of clearing the superfile and replacing the sub-files with the new base file. That process should also be in the article (and we do also cover that in our Advanced Thor class).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-07-24 19:14:46\" },\n\t{ \"post_id\": 2051, \"topic_id\": 450, \"forum_id\": 8, \"post_subject\": \"Consolidating SuperFiles\", \"username\": \"jeremy\", \"post_text\": \"Greetings,\\nFrom the Programmer's Guide, I see the following as an example for consolidating the contents of a SuperFile into a single logical file:\\n\\nEXPORT SuperFile2 := DATASET(AllPeople,Layout_Person,FLAT);\\nOUTPUT($.DeclareData.SuperFile2,,'~$.DeclareData::SUPERFILE::People14',OVERWRITE);
\\n\\nAre there best practices for making this more generic? i.e. I'd like to be able to call code like this for several different SuperFiles, logical files, and Record types.\", \"post_time\": \"2012-07-24 03:25:31\" },\n\t{ \"post_id\": 4415, \"topic_id\": 451, \"forum_id\": 8, \"post_subject\": \"Re: Fault-Tolerance in HPCC\", \"username\": \"DSC\", \"post_text\": \"Bringing an old thread back to life is so much fun.\\n\\n You can then select the extra nodes as spares. Additionally, you can make this swap node function work automatically by navigating to the SwapNode tab and the setting the AutoSwapNode value to true. The spares are swapped in when the Thor recycles and the thormaster notices that it cannot communicate with the slaves.
\\nObviously, one instance of "recycling" is restarting Thor itself. Does this happen on timed basis, though? In other words, will Thor eventually notice that a node is out of communication and swap in the spare without human intervention?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2013-08-01 17:26:09\" },\n\t{ \"post_id\": 2056, \"topic_id\": 451, \"forum_id\": 8, \"post_subject\": \"Re: Fault-Tolerance in HPCC\", \"username\": \"clo\", \"post_text\": \"Hi,\\n\\nThere are several ways that the HPCC System handles fault tolerance in relation to Thor.\\n\\nFirst, in regards to the mirroring, when you have a system configured to run on multiple nodes, the hpcc-mirror directory for node n is typically on node n+1. This is automatically set up when you configure the system through the configuration wizard.\\n\\nSecond, regarding when a node goes down, there is the option to run the swapnode function. This function allows you to swap in a spare node to replace the failed one. However, you'll have to configure this spare node and attach it to the thor component with the configmgr tool. \\n\\nHere are the steps to do so:\\n\\n1. Add the spare node to the hardware section\\n2. Navigate to the Thor Component > Topology tab \\n3. Ensure that Write Access is enabled \\n4. Right-click on the master node and select add spares. \\n\\nYou can then select the extra nodes as spares. Additionally, you can make this swap node function work automatically by navigating to the SwapNode tab and the setting the AutoSwapNode value to true. The spares are swapped in when the Thor recycles and the thormaster notices that it cannot communicate with the slaves. \\n\\nI found a similar post on the forum that hopefully answers any other questions you might have on redundancy.\\nviewtopic.php?f=16&t=46&hilit=swap+node\", \"post_time\": \"2012-07-24 15:29:17\" },\n\t{ \"post_id\": 2054, \"topic_id\": 451, \"forum_id\": 8, \"post_subject\": \"Fault-Tolerance in HPCC\", \"username\": \"Ankita Singla\", \"post_text\": \"hello\\nIts urgent...\\n\\nI am new to HPCC and not getting any idea abt\\nhow fault-tolerance is achieved especially for Thor cluster.\\nHow mirroring takes place?\\nwhat happens if a node goes down...\\nIs any manual intervention required?\\nDoes the fail-over happen automatically?\\n\\nplz Help....\", \"post_time\": \"2012-07-24 10:25:56\" },\n\t{ \"post_id\": 2102, \"topic_id\": 459, \"forum_id\": 8, \"post_subject\": \"Re: How do we deal with an unstructured data..?\", \"username\": \"bforeman\", \"post_text\": \"Hi,\\n\\nTried using a pattern and did parsing in ECL and extracted few contents, but the pattern is not specific all the time and hence the ECL code may not work all the time.\\n
\\n\\nThere are two things you can explore...\\n1. Use Boolean expressions in your patterns or rules so that your parsing statement can be flexible for different combinations.\\n\\n2. Use of multiple parsing statements that are looking for different combinations are also a good technique.\\n\\n\\nHow do we deal with such a data and other than parsing any other options are available..?\\n
\\n\\nI saw that the new Machine Learning library has a section on documents that also may help with your parsing journey.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-07-27 19:39:57\" },\n\t{ \"post_id\": 2090, \"topic_id\": 459, \"forum_id\": 8, \"post_subject\": \"How do we deal with an unstructured data..?\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nHow do we deal with an unstructured data..?\\n\\nSample of an unstructured data :\\n\\nxaxxaxa\\n DATED: xx/yy/zzzz\\n abc abc abc \\n No. xxx of yyyy\\n\\naaa\\nbbb\\nccc\\nddd ... TEXT1\\n\\n\\n Vs.\\n1.) aaa\\nbbbb\\n\\n2.) ccc\\ndddd\\neeee ... TEXT2\\n\\nTried using a pattern and did parsing in ECL and extracted few contents, but the pattern is not specific all the time and hence the ECL code may not work all the time.\\n\\nHow do we deal with such a data and other than parsing any other options are available..?\\n\\nPlease help regarding the same.\\n\\nThanks in Advance\\n\\nRegards,\\nksviswa\", \"post_time\": \"2012-07-26 14:29:42\" },\n\t{ \"post_id\": 2103, \"topic_id\": 460, \"forum_id\": 8, \"post_subject\": \"Re: How do we do a full text search for a given data..?\", \"username\": \"rtaylor\", \"post_text\": \"The example used in Bible Project in the web looks very complex especially the graph function..\\n\\nCan anybody explain a simpler example of graph function and the same how it can be used for text search..?
Searching unstructured text is usually done just by defining patterns and using them with the PARSE function. The GRAPH function may be useful in doing that, but it is absolutely not required.\\n\\nRichard\", \"post_time\": \"2012-07-27 20:09:17\" },\n\t{ \"post_id\": 2091, \"topic_id\": 460, \"forum_id\": 8, \"post_subject\": \"How do we do a full text search for a given data..?\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nHow do we do a full text search for a structured or unstructured data..?\\n\\nThe example used in Bible Project in the web looks very complex especially the graph function..\\n\\nCan anybody explain a simpler example of graph function and the same how it can be used for text search..?\\n\\nThe example in ECL Language reference is simple but not able to relate with text search.\\n\\nPlease help regarding the same.\\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2012-07-26 14:35:39\" },\n\t{ \"post_id\": 2126, \"topic_id\": 463, \"forum_id\": 8, \"post_subject\": \"Re: How Queries execute in roxie??????\", \"username\": \"bforeman\", \"post_text\": \"The default configuration of ROXIE is to copy whatever data it needs to its own cluster, but sometimes a query can be set up to store the query on ROXIE, and reference data from a remote THOR cluster, via SOAPCALLs.\\n\\nPersonally all of my queries copy any needed data to the target ROXIE when the query is published, and that is the standard I believe most of us use.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-07-31 15:34:47\" },\n\t{ \"post_id\": 2113, \"topic_id\": 463, \"forum_id\": 8, \"post_subject\": \"Re: How Queries execute in roxie??????\", \"username\": \"Ankita Singla\", \"post_text\": \"Thanx for the reply........\\nIt helped me alot...but now i am stuck with the different problm..\\n\\nFrom the pdf u referred i got info that\\n\\nDepending on the configuration, Roxie may read data remotely from a Thor cluster where it was prepared, or if preferred, it may be copied to the Roxie for local access.\\nTypically a development system might refer to data in situ on the Thor cluster, while a production system may prefer the performance benefits of copying data locally to the Roxie.\\n\\nCan u explain me which configurations they are talking about......\\nplz help..\", \"post_time\": \"2012-07-31 06:48:36\" },\n\t{ \"post_id\": 2111, \"topic_id\": 463, \"forum_id\": 8, \"post_subject\": \"Re: How Queries execute in roxie??????\", \"username\": \"bforeman\", \"post_text\": \"Here is a quick overview of the process:\\n\\nQueries in the ECL IDE that have been compiled using the Roxie target platform may be published to a QuerySet using ECL Watch. (ECL Watch is a Web Service running on an ESP Server, easily accessed through any browser):\\n\\nThe ECL Watch submits the ECL source code to an ECL Server/Agent, which compiles it.\\nOne of the Servers (or Farmers) in the ROXIE cluster receives:\\n1. The compiled ECL source code (the query).\\n2. Other information about the query, including the location of one or more compiled shared library files, the physical location of the data, and the physical location of the index file parts.\\n\\nThe Server (or Farmer) sends this information to the rest of the Servers in the ROXIE cluster.\\n\\nThe cluster copies all needed data files from their remote locations which may be a THOR, ECL Agent or even another ROXIE.\\n\\nOnce the cluster is aware of the query, any Server can execute it on demand.\\n\\nFor even more detailed information, check out the following PDF available on this web site:\\n\\nhttp://hpccsystems.com/download/docs/roxie-guide\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-07-30 22:42:52\" },\n\t{ \"post_id\": 2107, \"topic_id\": 463, \"forum_id\": 8, \"post_subject\": \"How Queries execute in roxie??????\", \"username\": \"Ankita Singla\", \"post_text\": \"Hi....\\nI am new to ECL...\\nWhen the data is uploaded and sprayed, it is stored in the Thor cluster. Now when the queries are executed extensively on Roxie, what happens exactly – \\nIs the data stored in Thor moved to the Roxie clusters? \\nor\\nDo the queries reside temporarily on the Thor cluster?\\n\\nplzz help\", \"post_time\": \"2012-07-30 07:41:56\" },\n\t{ \"post_id\": 2132, \"topic_id\": 465, \"forum_id\": 8, \"post_subject\": \"Re: Copying data from Thor to Roxie\", \"username\": \"JimD\", \"post_text\": \"The data files and index files referenced by the query’s ECL code are made available in one of these ways, depending on the configuration of the Roxie cluster. \\n\\nTwo configuration settings in Configuration Manager determine how this works: \\n\\ncopyResources \\nCopies necessary data and key files from the current\\nlocation when the query is published.\\n\\nuseRemoteResources \\nLoads necessary data and key files from the current\\nlocation when the query is published. \\n\\nThese options may appear to be mutually exclusive, but the chart below shows what each possible combination means.\\n\\n----------------------------------------------------------------------------------\\ncopyResources T\\nuseRemoteResources T\\n\\nDirects the Roxie cluster to use the remote copy of the data until it can copy\\nthe data locally. This allows a query to be available immediately using the \\nremote data until the copy completes.\\n----------------------------------------------------------------------------------\\ncopyResources T\\nuseRemoteResources F\\n\\nDirects the Roxie cluster to copy the data locally. The query cannot be\\nexecuted until the data copy completes. This ensures optimum performance but \\nmay delay the query's availability until the file copy completes.\\n----------------------------------------------------------------------------------\\ncopyResources F\\nuseRemoteResources T\\n\\nDirects the Roxie cluster to load the data from a remote location and never copy locally. The query can be executed immediately, but performance is\\nlimited by network bandwidth. \\nThis allows queries to run without using any Roxie node disk space, but reduces its throughput capabilities. This is the default for a single node because Thor and Roxie are on the same node and share the same disk drives.\\n----------------------------------------------------------------------------------\\ncopyResources F\\nuseRemoteResources F\\n\\nWill use data and indexes previously loaded but will not copy or read\\nremote data.\", \"post_time\": \"2012-08-01 15:58:10\" },\n\t{ \"post_id\": 2112, \"topic_id\": 465, \"forum_id\": 8, \"post_subject\": \"Copying data from Thor to Roxie\", \"username\": \"Ankita Singla\", \"post_text\": \"Hi...\\n\\nCan anyone tell me vat configurations they are talking about.....\\n\\nDepending on the configuration, Roxie may read data remotely from a Thor cluster where it was prepared, or if\\npreferred, it may be copied to the Roxie for local access.\\nTypically a development system might refer to data in situ on the Thor cluster, while a production system may prefer\\nthe performance benefits of copying data locally to the Roxie.\\n\\nReffered from http://hpccsystems.com/download/docs/roxie-guide\", \"post_time\": \"2012-07-31 06:41:04\" },\n\t{ \"post_id\": 2124, \"topic_id\": 468, \"forum_id\": 8, \"post_subject\": \"Re: More detail about Ecl supporting NLP/Queries of EclWatch\", \"username\": \"rtaylor\", \"post_text\": \"Case 1:\\nQuestion is : \\n\\n1.What is the business scenario wherein the data is in 'natural language' and how ECL can help in uploading and analyzing it?
Any free-form text can be parsed in ECL by using the PARSE function and all its supporting pattern definition and matching technology. \\n\\nOne very common use in the business world is to parse log files. It can also be used for cleaning/standardizing data. For example, 10-digit US phone numbers are normally entered into data files in multiple "standard" formats. Using PARSE's pattern matching easily enables your code to recognize that 561.999.4400 and (561)999-4400 and 561/999-4400 are all the same phone number.Case 2:\\n\\nThrough EclWatch i have successfully uploaded data which is in Mb.But what if \\n1. Data is huge let's say in Tbs and in multiple files
The upload/download file capability in ECL watch is simply an easy way to put files up to 2 Gb in size onto your landing zone (AKA dropzone) so they may be sprayed to your Thor. You may use any other method you want to get these files onto the landing zone -- FTP, sFTP, whatever.2. The party that uploads it is at a remote location and isn't interested in using ECL Watch - it will simply dump the files in a folder/directory and expect some script to upload it\\n\\nWhat can be done in this case?Can ECL help here?
We have no pre-built tool to monitor a folder/directory and automatically spray it to Thor, but you can easily create one. \\n\\nYou could write a program (in any language you want) to monitor a directory, and when a file appears there have that program write the script to spray the file to Thor and launch a "standard" job to process the file. Take a look in the ClientTools.PDF at the DFUplus.exe and ECLplus.exe documentation -- this type of automation is exactly what they were created for.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-07-31 14:46:22\" },\n\t{ \"post_id\": 2118, \"topic_id\": 468, \"forum_id\": 8, \"post_subject\": \"More detail about Ecl supporting NLP/Queries of EclWatch\", \"username\": \"prachi\", \"post_text\": \"Case 1:\\nI have gone through Introduction pdf of HPCC, in that it is mentioned :\\n\\nAn additional important capability provided in the ECL programming language is support for natural language processing (NLP) with PATTERN statements and the built-in PARSE operation. PATTERN statements allow matching patterns including regular expressions to be defined and used to parse information from unstructured data such as raw text.PATTERN statements can be combined to implement complex parsing operations or complete grammars from BNF definitions.\\n\\nQuestion is : \\n\\n1.What is the business scenario wherein the data is in 'natural language' and how ECL can help in uploading and analyzing it?\\n\\nCase 2:\\n\\nThrough EclWatch i have successfully uploaded data which is in Mb.But what if \\n1. Data is huge let's say in Tbs and in multiple files\\n2. The party that uploads it is at a remote location and isn't interested in using ECL Watch - it will simply dump the files in a folder/directory and expect some script to upload it\\n\\nWhat can be done in this case?Can ECL help here?\", \"post_time\": \"2012-07-31 11:40:37\" },\n\t{ \"post_id\": 2125, \"topic_id\": 469, \"forum_id\": 8, \"post_subject\": \"Re: Some Questions on Child dataset?\", \"username\": \"rtaylor\", \"post_text\": \"Ghost,Suppose i have some dataset of below format:\\nGrandFather\\nFather\\nChild1, Child2\\nGrandFather is having 'father' dataset as child dataset.\\n'father' is having 'Child1' and 'Child2' datasets as child datasets.
We cover a very similar type of data structure in our Advanced ECL class. We create a \\n Parent \\n - Child1 \\n - Child2 \\n - GrandChild \\n
structure (using DENORMALIZE), learn how ECL can easily accomplish complex multi-level relational querying when data is structured in this "nested child dataset" type of format, then learn to de-construct it all into separate related tables (using NORMALIZE). If you're going to be working with this type of data, I would strongly recommend taking the class.1. How can i know whether there is a child dataset in a parent dataset dynamically?
How do you mean "dynamically"? If you mean that you are attempting to work with data for which you have no foreknowledge of the structure, then you certainly have an interesting problem because ECL usually needs to know the structure of the data file in order to work with it (so you can define the RECORD structure that tells the system what data is in which fields). \\n\\nThe only way I know of to work with data for which you have absolutely no foreknowledge would be to either manually explore it to determine the structure, or simply define each record as a single valriable-length STRING field, then use PARSE to extract data (and that also implies foreknowledge of what you're looking for).2. How can i flatten the 'GrandFather' dataset at one go.
If, by "flatten" you mean simply extract the Grandfather data into a separate table, then you can use either TABLE or PROJECT to accomplish that.3. In case, i want to update some field in 'Child1' dataset, Is it possible to do so without flattening the GrandFather?
One basic rule in HPCC is that, when you are reading data from a file in your job, you never get to write to that file in the same job. IOW, HPCC is not an OLTP system nor an RDBMS. There is no "update" ever. You can read records from a file, add change and delete that data you read from that file, and then write a new file containing the updated data, but there is no "update in place" allowed.4. How can i know whether 'child1' has more no. of records than 'child2' dataset?
The COUNT function would help here.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-07-31 15:15:32\" },\n\t{ \"post_id\": 2119, \"topic_id\": 469, \"forum_id\": 8, \"post_subject\": \"Some Questions on Child dataset?\", \"username\": \"Ghost\", \"post_text\": \"Suppose i have some dataset of below format:\\nGrandFather\\n Father\\n Child1, Child2\\nGrandFather is having 'father' dataset as child dataset.\\n'father' is having 'Child1' and 'Child2' datasets as child datasets.\\n\\nNow,\\n1. How can i know whether there is a child dataset in a parent dataset dynamically?\\n2. How can i flatten the 'GrandFather' dataset at one go.\\n3. In case, i want to update some field in 'Child1' dataset, Is it possible to do so without flattening the GrandFather?\\n4. How can i know whether 'child1' has more no. of records than 'child2' dataset?\", \"post_time\": \"2012-07-31 11:54:33\" },\n\t{ \"post_id\": 2127, \"topic_id\": 470, \"forum_id\": 8, \"post_subject\": \"Re: WsEcl (:8002) and EclWatch (:8010)?\", \"username\": \"bforeman\", \"post_text\": \"Hi Prachi,\\n\\n1.What is the difference between WsEcl (:8002) (which is interface of HPCC) and EclWatch 3.8.0-1 (:8010)?\\n
\\n\\nBoth are considered ESP services. Both can be called independently of each other via your favorite browser, but ECL Watch includes a link from the System Servers/MyESP to allow you to open a new page or tab directly from ECL Watch. The difference is that they are indeed two different services, but WS_ECL can be called by ECL Watch if desired.\\n\\n2.Is it that WsEcl is integrated in EclWatch 3.8.0-1?
\\n\\nWell WS_ECL can be called from ECLWatch, but it can also be called independently from any browser.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-07-31 15:55:27\" },\n\t{ \"post_id\": 2120, \"topic_id\": 470, \"forum_id\": 8, \"post_subject\": \"WsEcl (:8002) and EclWatch (:8010)?\", \"username\": \"prachi\", \"post_text\": \"I have published a query in each i.e Thor and Roxie. In WsEcl i do get a textarea to enter text( for ex. Zip code )and then it will search rows related to it and give the output.\\n\\nI am using EclWatch 3.8.0-1 which has 'Query Sets'. Now when i click on it it shows me 'myroxie' and 'thor' link. By clicking resp on these links i do see my resp published query name.\\n\\nWhat i want to know is :\\n1.What is the difference between WsEcl (:8002) (which is interface of HPCC) and EclWatch 3.8.0-1 (:8010)?\\n\\n2.Is it that WsEcl is integrated in EclWatch 3.8.0-1?\", \"post_time\": \"2012-07-31 12:03:27\" },\n\t{ \"post_id\": 2150, \"topic_id\": 473, \"forum_id\": 8, \"post_subject\": \"Re: Can we spray binary data on multiple nodes\", \"username\": \"Ankita Singla\", \"post_text\": \"Thanks for the reply.....\\nMay be it can help me in some way... \", \"post_time\": \"2012-08-03 12:57:07\" },\n\t{ \"post_id\": 2141, \"topic_id\": 473, \"forum_id\": 8, \"post_subject\": \"Re: Can we spray binary data on multiple nodes\", \"username\": \"rtaylor\", \"post_text\": \"Ankita,\\n
But in binary data my one record can be on 50 rows and other can be on 2 rows. Also each row can differ in columns then how spraying works...?????
We work with binary data files all the time, with both fixed-length and variable-length records. Typically, each record has the same structure and all the data is contained in that one record with a delimiter indicating the end of the record. Spraying this type of data simply requires knowing what the record delimiter is.\\n\\nThe closest thing that I can think of to what I think you're describing would be a file wherein the data (either binary or textual) is comprised of multiple rows of different record types which are grouped together to describe a single logical entity in your data. These multiple physical records would be grouped either by a grouping identifier in the data (the logical entity identifier), or simply the physical positioning of the records in the file -- such as:
If this is the kind of data you're working with, then spraying is not a problem as long as each record type has the same delimniter, because it doesn't matter whether the logical grouping is spread across nodes, only that each physical record is whole and complete on a single node. The spray maintains the physical order of records in the file that is sprayed, so that the first n number of records are put on node 1, then the first record on node 2 is n+1, etc.\\n\\nYour real problem with this kind of data is in defining the RECORD structure for the file so that you can easily work with it. Take a look at the IFBLOCK discussion in the RECORD structure documentation for one way to accomplish this.\\n\\nHowever, if I've misunderstood and this is not the type of data you're working with, then can you please expand on your description so we can better help you?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-08-02 15:20:48\" },\n\t{ \"post_id\": 2139, \"topic_id\": 473, \"forum_id\": 8, \"post_subject\": \"Can we spray binary data on multiple nodes\", \"username\": \"Ankita Singla\", \"post_text\": \"Hi..\\n\\nAs if we spray CSV, XML or FLAT files, then distribution of the file across all the nodes of the target cluster is such that each single record is always whole and complete on a single node and the records are "evenly" distributed across the nodes.\\n\\nBut in binary data my one record can be on 50 rows and other can be on 2 rows. Also each row can differ in columns then how spraying works...?????\\nIs there any option to define delimiters so that it know different records and spray it on different nodes...\\n\\nPlzz help..\", \"post_time\": \"2012-08-02 13:12:13\" },\n\t{ \"post_id\": 2281, \"topic_id\": 475, \"forum_id\": 8, \"post_subject\": \"Re: implicit dereferencing\", \"username\": \"Allan\", \"post_text\": \"Thanks Bob and Richard,\\n\\nI'll give them a whorl.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-08-31 19:06:19\" },\n\t{ \"post_id\": 2278, \"topic_id\": 475, \"forum_id\": 8, \"post_subject\": \"Re: implicit dereferencing\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nIt may not be quite what you're going for, but this code works:process_SUFFIX(STRING pSuffix) := MODULE\\n EXPORT field := pSuffix;\\nEND;\\n\\nPersonName(STRING pName) := MODULE\\n EXPORT field := pName;\\nEND;\\n\\nR := RECORD\\n STRING s;\\nEND;\\n\\n R tSUB01Prep(r L) := transform\\n a := Process_SUFFIX('aaa').field;\\n b := PersonName('bbb').field;\\n\\t\\t\\tself.s := IF(TRUE,a,b) + L.s ;\\n end;\\n\\ndataset(R) SUB01(dataset(R) pSUB01) := project(pSUB01, tSUB01Prep(LEFT));\\n\\na := DATASET([ {'Smith'},{'Blow'},{'Jane'}],r);\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t \\nSUB01(a);\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t \\n\\n
HTH,\\n\\nRichard\", \"post_time\": \"2012-08-31 14:54:49\" },\n\t{ \"post_id\": 2274, \"topic_id\": 475, \"forum_id\": 8, \"post_subject\": \"Re: implicit dereferencing\", \"username\": \"bforeman\", \"post_text\": \"Is this a BUG or is all working as specified?
\\n\\nAllan, I think maybe neither \\n\\nChange the TRANSFORM to this:\\n\\n
R tSUB01Prep() := transform\\n a := 'Process_SUFFIX(\\\\'aaa\\\\')';\\n b := 'PersonName(\\\\'bbb\\\\')';\\n self.s := (STRING) IF(TRUE,a,b) + '.field' ;\\n //self.s := IF(TRUE,a,a).field;\\n //self.s := IF(TRUE,a.field,b.field);\\n end;
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-08-31 13:35:36\" },\n\t{ \"post_id\": 2251, \"topic_id\": 475, \"forum_id\": 8, \"post_subject\": \"Re: implicit dereferencing\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nIs this a BUG or is all working as specified?\\n\\nNot sure of the state of this issue.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-08-23 09:34:08\" },\n\t{ \"post_id\": 2148, \"topic_id\": 475, \"forum_id\": 8, \"post_subject\": \"Re: implicit dereferencing\", \"username\": \"gsmith\", \"post_text\": \"Ahh - I am not too suprised that dataset and module would behave differently...\", \"post_time\": \"2012-08-03 12:50:25\" },\n\t{ \"post_id\": 2147, \"topic_id\": 475, \"forum_id\": 8, \"post_subject\": \"Re: implicit dereferencing\", \"username\": \"Allan\", \"post_text\": \"Ok Gordan,\\n\\nYour example works, but mine does not:\\n\\nEXPORT Process_SUFFIX(STRING pSuffix) := MODULE\\n EXPORT field := pSuffix;\\nEND;\\n\\nEXPORT PersonName(STRING pName) := MODULE\\n EXPORT field := pName;\\nEND;\\n\\nR := RECORD\\n STRING s;\\nEND;\\n\\n R tSUB01Prep() := transform\\n a := Process_SUFFIX('aaa');\\n b := PersonName('bbb');\\n self.s := IF(TRUE,a,b).field;\\n //self.s := IF(TRUE,a,a).field;\\n //self.s := IF(TRUE,a.field,b.field);\\n end;\\n\\nexport dataset(R) SUB01(dataset(R) pSUB01) := project(pSUB01, tSUB01Prep());\\n
\\nWone might accept that where 'a' and 'b' are different types the expression might not work, but even my example\\n\\nself.s := IF(TRUE,a,a).field;\\n
\\ndoes not work, where the type is identical.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-08-03 12:31:45\" },\n\t{ \"post_id\": 2144, \"topic_id\": 475, \"forum_id\": 8, \"post_subject\": \"Re: implicit dereferencing\", \"username\": \"gsmith\", \"post_text\": \"The following example works fine:\\n\\nLayout_Person := RECORD\\n UNSIGNED1 PersonID;\\n STRING15 FirstName;\\n STRING25 LastName;\\nEND;\\n\\na := DATASET([ {1,'Fred','Smith'},\\n {2,'Joe','Blow'},\\n {3,'Jane','Smith'}],Layout_Person);\\n\\n\\nb := DATASET([ {1,'FredXXX','SmithXXX'},\\n {2,'JoeXXX','BlowXXX'},\\n {3,'JaneXXX','SmithXXX'}],Layout_Person);\\n\\nf := IF(false,a,b).FirstName;\\nd := IF(false,a,b);\\n \\noutput(d, {f});
\\n\\n(Not that you would write it that way)\", \"post_time\": \"2012-08-03 10:35:42\" },\n\t{ \"post_id\": 2143, \"topic_id\": 475, \"forum_id\": 8, \"post_subject\": \"implicit dereferencing\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nIn ECL one can do things like:\\n\\na := IF(<boolean expression>,b.field,c.field);\\n
\\nBut one cannot do:\\n\\na := IF(<boolean expression>,b,c).field;\\n
\\nThe latter would be very nice where you wanted to pass a parameter to a MACRO which could then access many fields from one input parameter.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-08-03 08:54:20\" },\n\t{ \"post_id\": 2179, \"topic_id\": 477, \"forum_id\": 8, \"post_subject\": \"Re: ECLPLUS Query\", \"username\": \"rtaylor\", \"post_text\": \"It worked because the ECLplus docs say:ecl= The ECL code to execute. Optionally, this may be replaced by the name of an input file containing the ECL to execute (in the form: @inputfile), or the name of a stored ECL file to execute (in the form: $folder.eclfile).
HTH,\\n\\nRichard\", \"post_time\": \"2012-08-08 12:49:24\" },\n\t{ \"post_id\": 2173, \"topic_id\": 477, \"forum_id\": 8, \"post_subject\": \"Re: ECLPLUS Query\", \"username\": \"prachi\", \"post_text\": \"This eclplus query worked :\\n\\nroot@cloudx-767-700:/var/lib/HPCCSystems/mydropzone# eclplus owner=root password=newuser_123 cluster=thor server=172.25.37.10 @/usr/share/dumphere/hpcc/SprayFile3.ecl\\nWorkunit W20120806-222758 submitted\\n[Result 1]\\nResult_1\\nD20120806-222759
\\n\\n\\nI have omitted 'ecl=' and 'http://' in the above query.\\nI have used '/var/lib/HPCCSystems/mydropzone' path because the file which i uploaded is in this.\", \"post_time\": \"2012-08-08 05:18:53\" },\n\t{ \"post_id\": 2167, \"topic_id\": 477, \"forum_id\": 8, \"post_subject\": \"Re: ECLPLUS Query\", \"username\": \"rtaylor\", \"post_text\": \"Also when i removed 'ecl=' (would like to know reason for this) and 'http://' word eclplus query also worked.
Please show me full syntax of the variations that also worked.\\n\\nRichard\", \"post_time\": \"2012-08-07 14:59:35\" },\n\t{ \"post_id\": 2165, \"topic_id\": 477, \"forum_id\": 8, \"post_subject\": \"Re: ECLPLUS Query\", \"username\": \"prachi\", \"post_text\": \"Thanks...\\nit works..\\nAlso when i removed 'ecl=' (would like to know reason for this) and 'http://' word eclplus query also worked. \\neclplus cluster=thor server=http://172.25.37.10 ecl=@SprayFile4.ecl\", \"post_time\": \"2012-08-07 04:23:20\" },\n\t{ \"post_id\": 2162, \"topic_id\": 477, \"forum_id\": 8, \"post_subject\": \"Re: ECLPLUS Query\", \"username\": \"rtaylor\", \"post_text\": \"
I would also like too know what is wrong in my query Because as mentioned above in my post the output is containing 'Query is empty' state.
The 'Query is empty' error is telling you that the file does not contain any actions, but only definitions. \\n\\n"Executable" ECL code (what we call Builder Window Runnable -- BWR code) is what the command line program is expecting in the file, and you gave it a Definition. \\n\\nThere are two differences between a Definition file and a BWR file:
So the code file you listed contained this:IMPORT Std;\\n\\nEXPORT SprayFile3 := STD.File.fSprayVariable( '172.25.37.10', \\n '/var/lib/HPCCSystems/mydropzone/Emp.csv',,,,, \\n 'mythor','~EclSpray::csvFile',,\\n 'http://172.25.37.10:8010/FileSpray',\\n TRUE,TRUE,FALSE);
which is a definition of something called "SprayFile3" -- and as a definition, it simply defines WHAT a "SprayFile3" is.\\n\\nChanging your code to this:IMPORT Std;\\nSTD.File.fSprayVariable( '172.25.37.10',\\n '/var/lib/HPCCSystems/mydropzone/Emp.csv',,,,, \\n 'mythor','~EclSpray::csvFile',,\\n 'http://172.25.37.10:8010/FileSpray',\\n TRUE,TRUE,FALSE);
changes the file contents from a Definition to a function (STD.File.fSprayVariable) acting as an action (an expression) to produce a result.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-08-06 14:54:09\" },\n\t{ \"post_id\": 2157, \"topic_id\": 477, \"forum_id\": 8, \"post_subject\": \"Re: ECLPLUS Query\", \"username\": \"prachi\", \"post_text\": \" \\nThanks...\\nI would also like too know what is wrong in my query
Because as mentioned above in my post the output is containing 'Query is empty' state.\", \"post_time\": \"2012-08-05 17:47:27\" },\n\t{ \"post_id\": 2154, \"topic_id\": 477, \"forum_id\": 8, \"post_subject\": \"Re: ECLPLUS Query\", \"username\": \"rtaylor\", \"post_text\": \"
1. My ultimate objective is to spray a single/multiple file(s) whose size will be in TB; also the file(s) will be one with no extension and can't be viewed due to massive size(It is said to have some data from a Physics experiment). Will the similar command spray such a massive file properly?Will an ECL query on this data render a proper dataset?
The total size of the file and lack of filename extension does not affect the spray operation (of course, the larger the file the longer it will take), no matter whether the spray is done through ECL Watch, DFUplus, or the Standard Library functions. \\n\\nIf you cannot view the file, then you will have to rely on the information given to you by the file's provider as to its structure and field layout. \\n\\nOnce the file has been sprayed and correctly defined by its ECL RECORD structure and DATASET declaration, then ECL will be able to work with the data, n o matter how big the file is. NB: you will need a much larger cluster than 3 nodes to handle files in the multi-Terabyte range (we use our 400-node clusters for our large datasets -- you may want to even go larger). \\n2. What if the file is a multimedia file like mp4? How should the ECL for it be written?
To HPCC it would all just be binary data.(To summarize points 2. and 3., which ECL clauses/functions etc. are used to set ONE 'logical' record/row in a file?
See http://hpccsystems.com/bb/viewtopic.php?f=8&t=473&sid=91a5b96a6eea55fbf263bf4f30a3b436 and http://hpccsystems.com/bb/viewtopic.php?f=12&t=476&sid=91a5b96a6eea55fbf263bf4f30a3b436\\n Ex. A file may have one record that spans to multiple lines
Which implies the presence of record delimiters? Or just the way the data wraps in a data viewer? and in each record, the columns are bits separated by different delimiters.
What exactly do you mean here -- can you provide an example file please??\\n3. The record length isn't specified anywhere unlike ECL Watch - is it correct to assume it need not be given if the size of the file is huge and the format unknown ?
If not specified, it defaults to 8K, and if your records are larger then you must explicitly set the maxrecordsize parameter (the SprayVariable function's optional third parameter which your code omitted, thereby invoking the default).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-08-03 15:06:56\" },\n\t{ \"post_id\": 2146, \"topic_id\": 477, \"forum_id\": 8, \"post_subject\": \"ECLPLUS Query\", \"username\": \"prachi\", \"post_text\": \"Hello,\\n\\nI have a 3-node HPCC set-up. The different nodes and the processes running on them are shown below :\\n\\nroot@cloudx-767-700:~# service hpcc-init status\\nmydafilesrv ( pid 21286 ) is running...\\nmydfuserver ( pid 6558 ) is running...\\nmyeclagent ( pid 6639 ) is running...\\nmyeclccserver ( pid 6720 ) is running...\\nmyesp ( pid 6800 ) is running...\\nmysasha ( pid 6883 ) is running...\\n
\\n\\n\\nroot@cloudx-798-730:~# service hpcc-init status\\nmydafilesrv ( pid 30555 ) is running...\\nmyroxie ( pid 31107 ) is running...\\n
\\n\\nroot@cloudx-799-731:~# sudo service hpcc-init status\\nmydafilesrv ( pid 10293 ) is running...\\nmydali ( pid 10856 ) is running...\\nmyeclscheduler ( pid 10963 ) is running...\\nmythor ( pid 16028 ) is running..\\n
\\n\\n\\nI want to spray file through EclPlus.\\n\\nI have transferred the file,Emp.csv, to mydropzone on cloudx-767-700 via ftp\\n\\nName,PsNo,BU,Designation,addr\\nPrachi,20001060,BU,SET,Vashi\\nAnkita,20001060,BU3,SET,Powai-II
\\n\\nThe ECL file for the spraying purpose viz.SprayFile3.ecl is :\\n\\nIMPORT Std;\\n\\nEXPORT SprayFile3 := STD.File.fSprayVariable( '172.25.37.10', '/var/lib/HPCCSystems/mydropzone/Emp.csv',,,,, \\n'mythor','~EclSpray::csvFile',,'http://172.25.37.10:8010/FileSpray', \\n,TRUE,TRUE,FALSE);
\\n\\nI executed the following command on cloudx-767-700(IP 172.25.37.10) :\\n\\neclplus cluster=thor server=http://172.25.37.10 ecl=@SprayFile4.ecl
\\n\\nThe output that I am getting is :\\n\\nWorkunit W20120803-220951 submitted\\n<Error><source>eclcc</source><line>1</line><code>3</code><message> Query is empty</message></Error>\\n
\\n\\nThe workunit's state appears as 'failed' when viewed via ECL Watch.\\n\\nPlease guide as to how this can be solved.\\n\\nIn addition to the above error, I also have the following queries :\\n\\n1. My ultimate objective is to spray a single/multiple file(s) whose size will be in TB; also the file(s) will be one with no extension and can't be viewed due to massive size(It is said to have some data from a Physics experiment). Will the similar command spray such a massive file properly?Will an ECL query on this data render a proper dataset?\\n2. What if the file is a multimedia file like mp4? How should the ECL for it be written?\\n(To summarize points 2. and 3., which ECL clauses/functions etc. are used to set ONE 'logical' record/row in a file? Ex. A file may have one record that spans to multiple lines and in each record, the columns are bits separated by different delimiters.)\\n3. The record length isn't specified anywhere unlike ECL Watch - is it correct to assume it need not be given if the size of the file is huge and the format unknown ?\\n\\nThanks and regards !!!\", \"post_time\": \"2012-08-03 11:43:27\" },\n\t{ \"post_id\": 2151, \"topic_id\": 478, \"forum_id\": 8, \"post_subject\": \"Re: How to delete data from the nodes physically\", \"username\": \"rtaylor\", \"post_text\": \"Ankita,\\n\\nYou can delete logical files from your environment through the ECL Watch "Browse Logical Files" page. Just select the file to delete by checking the box immediately left of its name, then the Delete button at the bottom of the page will become available -- press that and the file (all physical parts on all nodes) will be deleted.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-08-03 13:22:53\" },\n\t{ \"post_id\": 2149, \"topic_id\": 478, \"forum_id\": 8, \"post_subject\": \"How to delete data from the nodes physically\", \"username\": \"Ankita Singla\", \"post_text\": \"Hii...\\n\\nI have 3 nodes and m spraying data on 3 nodes..When i despray , it creates the copy of original data that is sprayed but the data remain physically present on each node...\\nSo vat should be done to delete the physical data present on the nodes...as when the data is in terabytes, we cant delete it manually....so Is there any method to delete it physically....\\n\\nPlzz help..\\nThanks and Regards...\", \"post_time\": \"2012-08-03 12:55:01\" },\n\t{ \"post_id\": 2178, \"topic_id\": 480, \"forum_id\": 8, \"post_subject\": \"Re: Query related Record size in DFUPlus\", \"username\": \"Ankita Singla\", \"post_text\": \"Thanks for the reply...\\nGot meaningful info..... \", \"post_time\": \"2012-08-08 11:47:43\" },\n\t{ \"post_id\": 2163, \"topic_id\": 480, \"forum_id\": 8, \"post_subject\": \"Re: Query related Record size in DFUPlus\", \"username\": \"rtaylor\", \"post_text\": \"Ankita,\\n\\nThe error message you got:
Failed: Source file //172.25.37.10:7100/var/lib/HPCCSystems/mydropzone/test.txt is not a valid multiple of the expected record size (4096)
is the result of attempting to do a fixed-length record spray (specified by your "recordsize=4096" command line option) on a file that does not contain fixed-length records. The system quite correctly divided your file size by 4096 and discovered that it was not evenly divisible -- hence the error.\\nso i want to ask:\\nHOW TO CALCULATE THE RECORD LENGTH FOR ANY GIVEN FILE
The short answer is -- you do not "calculate" the record length but you instead "determine" it by either looking at the file beforehand yourself (using any tool you want to use), or asking the data provider to give you that information. This is the standard practice in every data handling system I have ever used. \\n\\nWhat system have you used in the past that did this automatically for you? How did they accomplish it? I could envision a tool that could do this pretty easily for CSV or XML data, but how can it be done with binary data when you do not know the record delimiter? \\n\\nIf you DO know the record delimiter, then it becomes exactly the same process you would use to determine the max length of a CSV or XML record -- parse the file looking for the greatest distance between the known delimiters and there's your max size.\\n\\nYour other option, since you apparently know nothing about the file structure beforehand, would be to do a fixed-length record spray using a record size of "1" just to get the file onto the HPCC. The downside to this is that you will absolutely have records "split" across nodes, but if your file is a PDF (as your test case seems to be), there may or may not be an inherent "record" structure to work with anyway.\\n\\nSo, the question once again is -- what are you trying to accomplish? If you can provide a small example file of what you really want to work with, then maybe we can help you more.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-08-06 15:17:58\" },\n\t{ \"post_id\": 2159, \"topic_id\": 480, \"forum_id\": 8, \"post_subject\": \"Query related Record size in DFUPlus\", \"username\": \"Ankita Singla\", \"post_text\": \"Hiii....\\n\\nI am trying uploading and spraying file through DFUPlus...by \\n\\ndfuplus action=spray srcip=172.25.37.10 srcfile=/var/lib/HPCCSystems/mydropzone/test.pdf dstname=testCoe::poc::dfuplus::sprayed6text dstcluster=mythor prefix=FILENAME,FILESIZE nosplit=0 server=http://172.25.37.10:8010 username=root password=newuser_123 overwrite=1 replicate=1 recordsize=4096\\n
\\nbt its giving error as\\n\\nChecking for local Dali File Server\\n\\nSpraying from /var/lib/HPCCSystems/mydropzone/test.txt on 172.25.37.10:7100 to testCoe::poc::dfuplus::sprayed6text\\nSubmitted WUID D20120806-195734\\nD20120806-195734 status: queued\\nFailed: Source file //172.25.37.10:7100/var/lib/HPCCSystems/mydropzone/test.txt is not a valid multiple of the expected record size (4096)
\\n\\nso i want to ask:\\nHOW TO CALCULATE THE RECORD LENGTH FOR ANY GIVEN FILE\\n\\nThanks and Regards..\", \"post_time\": \"2012-08-06 12:07:21\" },\n\t{ \"post_id\": 2192, \"topic_id\": 482, \"forum_id\": 8, \"post_subject\": \"Re: Control Spraying operation in ECL\", \"username\": \"rtaylor\", \"post_text\": \"Again, no matter what the data is, if the records are variable-length and there is no record delimiter in the file, then you will have to pre-process the file to add record delimiters before you can spray the data. \\n\\nHowever, if you already have a program that extracts the relevant information from the file, why why not just run that against the segy files to build a more manageable-format data file to spray?\\n\\nRichard\", \"post_time\": \"2012-08-08 18:15:52\" },\n\t{ \"post_id\": 2189, \"topic_id\": 482, \"forum_id\": 8, \"post_subject\": \"Re: Control Spraying operation in ECL\", \"username\": \"prachi\", \"post_text\": \"Thanks for the reply,Richard !\\n\\nThe segy file(min. size 80MB) can be viewed only using a seg y viewer - text editors render junk.\\n\\nThe Textual File Header looks like this\\n\\nThe Trace Header looks like this\\n\\nNote : I was not able to find a suitable image for the trace data\\n\\nWe have written a small Java program that reads the files, byte by byte, and stores the Textual File Header and the Trace Header in program variables - is something similar possible with ECL and will it help to spray the file 'logically'?\", \"post_time\": \"2012-08-08 17:31:58\" },\n\t{ \"post_id\": 2180, \"topic_id\": 482, \"forum_id\": 8, \"post_subject\": \"Re: Control Spraying operation in ECL\", \"username\": \"rtaylor\", \"post_text\": \"1. If ONE such record in my case consists of Textual File Header (3200 bytes) + Binary Header File (400 bytes) + Trace Header (240 bytes) + Trace Data (variable size), how to specify this while spraying(else, it will spray randomly) the file?\\n2. If ONE such record in my case consists of Trace Header (240 bytes) + Trace Data (variable size), how to specify this while spraying(else, it will spray randomly) the file?\\n
The answers to both these questions are another two questions:
If the answers are YES and NO, then you simply do a CSV spray and specify the record delimiter.\\n\\nIf the answers are NO and YES, then you must first pre-process the file to add your own record delimiter, then simply do a CSV spray and specify the record delimiter.\\n\\nThe one type of file that cannot be sprayed is a variable-length record file that contains no record delimiters -- these must always be pre-processed using any tool you choose to add your own delimiters.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-08-08 13:01:28\" },\n\t{ \"post_id\": 2174, \"topic_id\": 482, \"forum_id\": 8, \"post_subject\": \"Control Spraying operation in ECL\", \"username\": \"prachi\", \"post_text\": \"Hello,\\nI have one seg-y format file named 'sample.sgy'. Its structure is :\\nhttp://en.wikipedia.org/wiki/File:SEGY_file_byte_stream_structure.svg\\n\\nAs the above link depicts :\\n\\nFirst 3200 bytes is Textual File Header\\nNext 400 bytes is Binary Header File\\nNext 240 bytes 1st Trace Header\\nNext n bytes data corresponding to 1st Trace Header\\nNext 240 bytes 2nd Trace Header\\nNext n bytes data corresponding to 2nd Trace Header\\n.\\n.\\n.\\n\\n\\n\\nCurrently when i am uploading and spraying sample.sgy file through Eclwatch --> Spray Variable(Spray Csv), it is spraying it on 3 Thor slaves :\\n\\nFile parts:\\n\\nNumber\\tIP\\tSize\\n1\\t172.25.38.214\\t1,354\\n2\\t172.25.38.214\\t1,463\\n3\\t172.25.38.214\\t1,415
\\n\\nHPCC documentation says that it keeps one complete logical record on a single node for faster processing of that data.\\n\\n\\nNow,my queries for different scenarios are :\\n\\n1. If ONE such record in my case consists of Textual File Header (3200 bytes) + Binary Header File (400 bytes) + Trace Header (240 bytes) + Trace Data (variable size), how to specify this while spraying(else, it will spray randomly) the file?\\n2. If ONE such record in my case consists of Trace Header (240 bytes) + Trace Data (variable size), how to specify this while spraying(else, it will spray randomly) the file?\\n\\nThanks and regards !\", \"post_time\": \"2012-08-08 09:10:25\" },\n\t{ \"post_id\": 2196, \"topic_id\": 483, \"forum_id\": 8, \"post_subject\": \"Re: Spraying using DFUPlus\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nTake a look at the example code in my previous post. It's all about spraying multiple physical files, where each file is meant to be a "record" in the resulting logical file on the HPCC. \\n\\nA more relevant example might something like this://Multiple spray all .XML files under\\n// c:\\\\import on 10.150.51.26 to single logical file called RT::XMLtoParse\\nC:\\\\>dfuplus action=spray srcip=10.150.51.26\\n srcfile=c:\\\\import\\\\*.xml\\n dstcluster=le_thor dstname=RT::XMLtoParse overwrite=1\\n prefix=FILENAME nosplit=1\\n\\n//this would result in a RECORD structure like this:\\nimageRecord := RECORD\\n STRING filename;\\n STRING xmltext; \\nEND;
This should spray all .XML files to a single logical file in the HPCC -- each XML file ends up in a single record in the logical file so that you can then use PARSE to extract the data in a meaningful manner.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-08-08 19:38:40\" },\n\t{ \"post_id\": 2194, \"topic_id\": 483, \"forum_id\": 8, \"post_subject\": \"Re: Spraying using DFUPlus\", \"username\": \"bforeman\", \"post_text\": \"Hi Dan,\\n\\nThat is my take on it as well!\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-08-08 18:44:38\" },\n\t{ \"post_id\": 2191, \"topic_id\": 483, \"forum_id\": 8, \"post_subject\": \"Re: Spraying using DFUPlus\", \"username\": \"DSC\", \"post_text\": \"Out of curiosity, when is nosplit a good option? When a file is very small and you know that it will be copied to all nodes anyway during certain operations?\\n\\nDan\", \"post_time\": \"2012-08-08 18:11:05\" },\n\t{ \"post_id\": 2187, \"topic_id\": 483, \"forum_id\": 8, \"post_subject\": \"Re: Spraying using DFUPlus\", \"username\": \"rtaylor\", \"post_text\": \"The nosplit=1 option specifies that each individual file should be complete on a single node, since the intent is to be spraying multiple physical files from the landing zone into a single logical file on the HPCC wherein each physical file sprayed is a single record in the resulting logical file on the HPCC. \\n\\nLike this example from the HPCCclientTools.PDF://Multiple spray all .JPG and .BMP files under\\n// c:\\\\import on 10.150.51.26 to single logical file called LE::imagedb:\\nC:\\\\>dfuplus action=spray srcip=10.150.51.26\\n srcfile=c:\\\\import\\\\*.jpg,c:\\\\import\\\\*.bmp\\n dstcluster=le_thor dstname=LE::imagedb overwrite=1\\n prefix=FILENAME,FILESIZE nosplit=1\\n\\n//this would result in a RECORD structure like this:\\nimageRecord := RECORD\\n STRING filename;\\n DATA image; //first 4 bytes contain the length of the image data\\nEND;
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-08-08 17:20:45\" },\n\t{ \"post_id\": 2184, \"topic_id\": 483, \"forum_id\": 8, \"post_subject\": \"Re: Spraying using DFUPlus\", \"username\": \"bforeman\", \"post_text\": \"Hi Ankita,\\n\\nWith all due respect, it does not matter what node on the cluster the file will get sprayed to. You do not have to worry about what node is storing the data, as you always refer to all files on your cluster as a single logical file name. That is the real power of HPCC. Other developers on other big data platforms have to be aware of wher data is on what node, but not HPCC. The DFU Server always finds the optimum location to spray, whether you have split or not. By the way, why is "no split" important to you? \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-08-08 15:37:12\" },\n\t{ \"post_id\": 2175, \"topic_id\": 483, \"forum_id\": 8, \"post_subject\": \"Spraying using DFUPlus\", \"username\": \"Ankita Singla\", \"post_text\": \"hii...\\n\\nIf i run ECLPlus & DFUPlus with nosplit=1 to spray multiple files. \\n\\ndfuplus action=spray srcip=172.25.37.10 srcfile=/var/lib/HPCCSystems/mydropzone/Emp1.csv dstname=testCoe::poc::dfuplus::spraycheck22k dstcluster=mythor prefix=FILENAME,FILESIZE nosplit=1 server=http://172.25.37.10:8010 username=root password=newuser_123 overwrite=1 replicate=1 format=csv\\n
\\n\\nIs it guarantee that the file will go on different nodes...\\nIf not How can i ensure that each complete file will go on different node.\\n\\nThanks and regards..\", \"post_time\": \"2012-08-08 10:11:37\" },\n\t{ \"post_id\": 2190, \"topic_id\": 484, \"forum_id\": 8, \"post_subject\": \"Re: Incorporating external libraries through ECL\", \"username\": \"Ankita Singla\", \"post_text\": \"Thanks for the reply,Bob !\\n\\nI'll try the approach you suggested for 1.\\n\\nAs for 2., let me give one example to make my direction clearer.\\n\\nSuppose there is an social application like Facebook/some share market application that exposes a Web Service(RESTful or requires WS-Clients to communicate over SOAP). When the WS are invoked, the response is typically a stream from which a file,a string etc. can be constructed. Now is it possible that HPCC can help pull such data and spray it directly into the Thor cluster? There will be three logical steps I suppose :\\n\\n1. Pulling the data physically from the WS\\n2. Interpreting the stream to construct the necessary file from it\\n3. Spraying the file onto Thor\\n\\nThanks and regards !\", \"post_time\": \"2012-08-08 18:10:56\" },\n\t{ \"post_id\": 2185, \"topic_id\": 484, \"forum_id\": 8, \"post_subject\": \"Re: Incorporating external libraries through ECL\", \"username\": \"bforeman\", \"post_text\": \"Hi Ankita,\\n\\nRegarding (1), all you need is an IMPORT statement in your ECL code:\\n\\nIMPORT STD;
\\n\\nThen, any existing libraries you need can be referenced but must be fully qualified. For example:\\n\\nupperstring := STD.STR.ToUpperCase($.yourfieldname);
\\n\\nIf you wish to include your own libraries, see the Language Reference PDF and search for SERVICE structures.\\n\\nAs for (2), I think that the examples for SOAPCALL are pretty good in the LRM, maybe if you could attach some sample code with more details as to what you are trying to dowe could help you debug it.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-08-08 16:00:10\" },\n\t{ \"post_id\": 2176, \"topic_id\": 484, \"forum_id\": 8, \"post_subject\": \"Incorporating external libraries through ECL\", \"username\": \"Ankita Singla\", \"post_text\": \"Hii...\\n\\nCan anyone guide me abt.\\n\\n1. How we can incorporate external library files and external programs in ECL..\\nif possible plzz explain it with example.\\n\\n2. working example of SOAPCALL interface to access the webservices..\\n\\nplzz help..\\nThanks and Regards...\", \"post_time\": \"2012-08-08 11:45:47\" },\n\t{ \"post_id\": 2242, \"topic_id\": 488, \"forum_id\": 8, \"post_subject\": \"Re: Std.File.RemotePull failure\", \"username\": \"ghalliday\", \"post_text\": \"CSV actually has a similar problem. If the end of line character can be quoted then you can't jump into the middle of the file to find the record boundaries - you need to scan linearly.\\n\\nThe variable length data is further complicated by having to decode the record structure to evaluate the record length. But yes it could work in a similar way if it was needed.\\n\\nAn alternative which we have also discussed is to save multiple split points for each file part as it was generated. These could then be used to divide the file up.\", \"post_time\": \"2012-08-21 12:02:35\" },\n\t{ \"post_id\": 2241, \"topic_id\": 488, \"forum_id\": 8, \"post_subject\": \"Re: Std.File.RemotePull failure\", \"username\": \"DSC\", \"post_text\": \"Done: https://github.com/hpcc-systems/HPCC-Pl ... ssues/3195\\n\\nI would have thought that you could leverage the CSV spray code to pull the file linearly across the network and redistribute the records. But I'm truly guessing, though, as I haven't perused that portion of the source code to know if it's even possible. It just feels like the same problem.\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-08-21 11:38:58\" },\n\t{ \"post_id\": 2239, \"topic_id\": 488, \"forum_id\": 8, \"post_subject\": \"Re: Std.File.RemotePull failure\", \"username\": \"ghalliday\", \"post_text\": \"The reason it is tricky is because you need to linearly scan the files to determine where each of the records starts/ends. With fixed length it can be calculated.\\n\\nPlease can you add an issue to github, and we can improve the error message, and also check that it defaults to nosplit for variable length.\\n\\n(I think there is a separate bug for ensuring that option can be specified.)\", \"post_time\": \"2012-08-21 08:36:51\" },\n\t{ \"post_id\": 2237, \"topic_id\": 488, \"forum_id\": 8, \"post_subject\": \"Re: Std.File.RemotePull failure\", \"username\": \"DSC\", \"post_text\": \"I think you're exactly right. I spot-checked several of the files I remembered as succeeding and failing via 'View File Details' in ECL Watch and it appears that all failing files had variable-length records and all succeeding files had fixed-length records. Good catch.\\n\\nIt would be helpful for this feature to work in all cases. If that turns out to be impossible, or wildly difficult, then perhaps in the short term a better error message should be provided.\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-08-20 11:40:01\" },\n\t{ \"post_id\": 2236, \"topic_id\": 488, \"forum_id\": 8, \"post_subject\": \"Re: Std.File.RemotePull failure\", \"username\": \"ghalliday\", \"post_text\": \"I think the error about not being able to repartition would be triggered when your source file contains variable length records. That would include child datasets, but also variable length strings.\\n\\nI expect it is triggered when the number of source slaves doesn't match the number of target slaves. If they match I would hope that it should succeed. It might default to redistributing, but it should at least fall back to not if it can't. If not, we should fix that.\", \"post_time\": \"2012-08-20 09:45:34\" },\n\t{ \"post_id\": 2230, \"topic_id\": 488, \"forum_id\": 8, \"post_subject\": \"Re: Std.File.RemotePull failure\", \"username\": \"HPCC Staff\", \"post_text\": \"Hi Dan, thanks for keeping us updated on this issue. We will look into this further and circle back on our progress.\", \"post_time\": \"2012-08-16 12:38:49\" },\n\t{ \"post_id\": 2229, \"topic_id\": 488, \"forum_id\": 8, \"post_subject\": \"Re: Std.File.RemotePull failure\", \"username\": \"DSC\", \"post_text\": \"Update: It appears that if a destination cluster has only one node containing Thor slaves (either one slave or more) then nosplit=1 must be enabled. If the destination has multiple nodes with Thor slaves, nosplit=0 needs to be enabled. If I follow that simple rule then my datasets (which are sourced on a four node cluster) do wind up at the destination and appear to be dispersed correctly (even on the one-node-multiple-Thor-slave-destination setup).\\n\\nThis doesn't seem right.\\n\\nDan\", \"post_time\": \"2012-08-16 12:29:32\" },\n\t{ \"post_id\": 2216, \"topic_id\": 488, \"forum_id\": 8, \"post_subject\": \"Re: Std.File.RemotePull failure\", \"username\": \"DSC\", \"post_text\": \"Update: I managed to make this work using dfuplus and setting the nosplit option. This may be an artifact of my configurations. The destination cluster is two nodes, with two Thor slaves enabled (which means that one node has those two slaves). Despite the nosplit, the files do seem to be split among the two slaves.\\n\\nDan\", \"post_time\": \"2012-08-14 14:48:56\" },\n\t{ \"post_id\": 2213, \"topic_id\": 488, \"forum_id\": 8, \"post_subject\": \"Re: Std.File.RemotePull failure\", \"username\": \"DSC\", \"post_text\": \"Update: I managed to find a 'simple' dataset (one without embedded child datasets) that fails to copy with the same error. That means my initial thought that only 'complex' datasets were the problem is false.\\n\\nI've examined the particulars between two 'simple' datasets, one that fails and one that doesn't, and I honestly cannot find anything significant. Both are fairly small, distributed across three Thor nodes, etc.. What can I look at to help troubleshoot this?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-08-13 19:58:55\" },\n\t{ \"post_id\": 2205, \"topic_id\": 488, \"forum_id\": 8, \"post_subject\": \"Re: Std.File.RemotePull failure\", \"username\": \"DSC\", \"post_text\": \"The same error occurs using Remote Copy within ECL Watch, for those datasets that caused the error in Std.File.RemotePull.\\n\\nDan\", \"post_time\": \"2012-08-10 14:54:27\" },\n\t{ \"post_id\": 2202, \"topic_id\": 488, \"forum_id\": 8, \"post_subject\": \"Std.File.RemotePull failure\", \"username\": \"DSC\", \"post_text\": \"I've run into the following error while trying to copy datasets from one cluster to another (both running 3.8.0CE):\\n\\nError: System error: 0: Graph[1], apply[3]: SLAVE 10.210.150.80:20100: DFUServer Error Failed: Source file format is not specified or is unsuitable for (re-)partitioning (0, 0), 0,
\\n\\nI have several datasets to copy, and it appears that the ones giving me this error have child datasets in their record definitions. Simple (flat) datasets are copied without a problem. Are child datasets supported? Or is this error telling me something else?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-08-10 13:41:59\" },\n\t{ \"post_id\": 2245, \"topic_id\": 489, \"forum_id\": 8, \"post_subject\": \"Re: Std.File.RemotePull compression\", \"username\": \"DSC\", \"post_text\": \"Oh yeah. I don't know why I keep forgetting named parameters. They resemble keyed arguments in Lisp and that's one of the more useful self-documenting features in a language. Thanks for the reminder!\\n\\nDan\", \"post_time\": \"2012-08-21 12:41:38\" },\n\t{ \"post_id\": 2243, \"topic_id\": 489, \"forum_id\": 8, \"post_subject\": \"Re: Std.File.RemotePull compression\", \"username\": \"rengolin\", \"post_text\": \"Hi Dan,\\n\\nYou can use named parameters, so no need to keep the order and avoid unused parameters.\\n\\nIt's as simple as:\\n\\nRemotePull(..., ..., ..., COMPRESS := TRUE, ..., ... )
\\n\\nThe issue to update the docs is still open, should be going out soon.\\n\\ncheers,\\n--renato\", \"post_time\": \"2012-08-21 12:19:00\" },\n\t{ \"post_id\": 2231, \"topic_id\": 489, \"forum_id\": 8, \"post_subject\": \"Re: Std.File.RemotePull compression\", \"username\": \"rengolin\", \"post_text\": \"[quote="DSC":1263q8du]Learning from the workaround to that other problem, however, leads to me recommend that you also add a 'no split' option to Std.File.RemotePull. That would help achieve greater parity with Remote Copy.\\n\\nHi Dan,\\n\\nI've added this to the issue in GitHub, I think it's a good idea, at least to re-code it to make it consistent.\\n\\ncheers,\\n--renato\", \"post_time\": \"2012-08-16 13:17:47\" },\n\t{ \"post_id\": 2228, \"topic_id\": 489, \"forum_id\": 8, \"post_subject\": \"Re: Std.File.RemotePull compression\", \"username\": \"DSC\", \"post_text\": \"Hi Renato,\\n\\nThanks for the update. I admit to not trying the code with the compression option, so I don't know if it works or not. I discovered this particular problem while tracking down the other problem (http://hpccsystems.com/bb/viewtopic.php?f=8&t=488&sid=684d6a38196f3ec39c8de9785075f251) I was experiencing. I'll try it out after I solve some of the other issues on my plate.\\n\\nLearning from the workaround to that other problem, however, leads to me recommend that you also add a 'no split' option to Std.File.RemotePull. That would help achieve greater parity with Remote Copy.\\n\\nThanks again!\\n\\nDan\", \"post_time\": \"2012-08-16 12:24:52\" },\n\t{ \"post_id\": 2227, \"topic_id\": 489, \"forum_id\": 8, \"post_subject\": \"Re: Std.File.RemotePull compression\", \"username\": \"rengolin\", \"post_text\": \"Hi Dan,\\n\\nIs it working with the additional compression options in ECL?\\n\\nThere are some undocumented features in HPCC, as they meant to be internal implementation details (like buffer size) and we didn't want users having to use them wrongly. But the compression option is clearly a user option.\\n\\nI've open the following Docs issue in GitHub to deal with the documentation side of it:\\nhttps://github.com/hpcc-systems/HPCC-Pl ... ssues/3155\\n\\nIf the functionality is working on ECL Watch and ECL code, I think we just need to update the docs.\\n\\nMaybe, to hide the other (still internal options), we might have to move the ```compress``` option up, and that might create some problems. For now, I think if you pass nothing to the other intermediate arguments and ```true``` to ```compress```, it should work.\\n\\nI created the following issue to track that discussion:\\nhttps://github.com/hpcc-systems/HPCC-Pl ... ssues/3156\\n\\ncheers,\\n--renato\", \"post_time\": \"2012-08-16 11:17:27\" },\n\t{ \"post_id\": 2206, \"topic_id\": 489, \"forum_id\": 8, \"post_subject\": \"Re: Std.File.RemotePull compression\", \"username\": \"DSC\", \"post_text\": \"OK, I figured this out by examining the File.ecl source code.\\n\\nThe documentation for RemotePull shows the following arguments:\\n\\nremoteURL\\nsourcelogicalname\\ndestinationGroup\\ndestinationlogicalName\\ntimeout=-1\\nmaxConnections=1\\nallowoverwrite=false\\nreplicate=false\\nasSuperfile=false
\\n\\nThe code, however, shows these arguments:\\n\\nremoteEspFsURL\\nsourceLogicalName\\ndestinationGroup\\ndestinationLogicalName\\ntimeOut=-1\\nmaxConnections=-1\\nallowoverwrite=false\\nreplicate=false\\nasSuperfile=false\\nforcePush=false\\ntransferBufferSize=0\\nwrap=false\\ncompress=false
\\n\\nforcePush, transferBufferSize, wrap, and compress are undocumented. I think I can accurately guess what transferBufferSize and compress is, but what do forcePush and wrap mean?\\n\\nAlso, note the difference between the documented default value for maxConnections and what the code sets (-1).\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-08-10 15:06:56\" },\n\t{ \"post_id\": 2204, \"topic_id\": 489, \"forum_id\": 8, \"post_subject\": \"Re: Std.File.RemotePull compression\", \"username\": \"DSC\", \"post_text\": \"FWIW, the Remote Copy feature within ECL Watch does offer a compression checkbox.\\n\\nDan\", \"post_time\": \"2012-08-10 14:53:49\" },\n\t{ \"post_id\": 2203, \"topic_id\": 489, \"forum_id\": 8, \"post_subject\": \"Std.File.RemotePull compression\", \"username\": \"DSC\", \"post_text\": \"It appears that compression is ignored for datasets copied via RemotePull. In other words, a source dataset that is compressed will wind up uncompressed on the destination cluster. In addition, the documentation for RemotePull shows no option for managing compression. Is RemotePull supposed to copy the compression setting? Is there a hidden compression argument?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-08-10 13:45:57\" },\n\t{ \"post_id\": 2211, \"topic_id\": 490, \"forum_id\": 8, \"post_subject\": \"Re: SegFault testing Std.File.MonitorFile\", \"username\": \"richardkchapman\", \"post_text\": \"I have opened GitHub issues https://github.com/hpcc-systems/HPCC-Pl ... ssues/3115 and https://github.com/hpcc-systems/HPCC-Pl ... ssues/3116 for these. Will investigate.\", \"post_time\": \"2012-08-13 15:00:35\" },\n\t{ \"post_id\": 2210, \"topic_id\": 490, \"forum_id\": 8, \"post_subject\": \"Re: SegFault testing Std.File.MonitorFile\", \"username\": \"DSC\", \"post_text\": \"Relatedly, while I was trying various combinations of things to try to make this work, I was deleting the failed work units via the IDE. I later discovered that all the jobs were scheduled, as they appear in the Scheduler within ECL Watch. Unfortunately, I now cannot deschedule those jobs because the work units have been deleted. How can I clear those jobs?\\n\\nDan\", \"post_time\": \"2012-08-13 14:04:18\" },\n\t{ \"post_id\": 2209, \"topic_id\": 490, \"forum_id\": 8, \"post_subject\": \"SegFault testing Std.File.MonitorFile\", \"username\": \"DSC\", \"post_text\": \"I'm experimenting with Std.File.MonitorFile and, sadly, failing. Here is my sample code, which is closely modeled after the example code associated with MonitorFile:\\n\\nIMPORT * FROM Std;\\n\\nfoundFileEventName := 'FoundAFile';\\n\\nFile.MonitorFile\\t(\\n\\t\\t\\t\\t\\t\\tfoundFileEventName,\\n\\t\\t\\t\\t\\t\\t'10.210.150.80',\\n\\t\\t\\t\\t\\t\\t'/var/lib/HPCCSystems/dropzone/*'\\n\\t\\t\\t\\t\\t);\\n\\nOUTPUT('File Found') : WHEN(EVENT(foundFileEventName,'*'),COUNT(1));\\n
\\n\\nWhen I submit this to Thor, I receive a segfault. Here is the log from eclagent:\\n\\n00000014 2012-08-13 08:03:35 24596 24597 AgentExec: Dequeued workunit request 'W20120813-081507'\\n00000015 2012-08-13 08:03:35 24596 24597 AgentExec: Executing 'start_eclagent WUID=W20120813-081507 DALISERVERS=10.210.150.78:7070'\\n00000016 2012-08-13 08:03:35 24596 24597 Execution started\\n00000017 2012-08-13 08:03:35 24596 24597 AgentExec: Waiting on queue(s) 'hthor.agent,thor.agent'\\n00000000 2012-08-13 08:03:35 25289 25289 Logging to /var/log/HPCCSystems/myeclagent/eclagent.2012_08_13.log\\n00000001 2012-08-13 08:03:35 25289 25289 ECLAGENT build community_3.8.0-1\\n00000002 2012-08-13 08:03:35 25289 25289 Waiting for workunit lock\\n00000003 2012-08-13 08:03:35 25289 25289 Obtained workunit lock\\n00000004 2012-08-13 08:03:35 25289 25289 Loading dll (libW20120813-081507.so) from location /var/lib/HPCCSystems/dllserver/temp/libW20120813-081507.so\\n00000005 2012-08-13 08:03:35 25289 25289 Starting process\\n00000006 2012-08-13 08:03:35 25289 25289 RoxieMemMgr: Setting memory limit to 314572800 bytes (300 pages)\\n00000007 2012-08-13 08:03:35 25289 25289 RoxieMemMgr: 320 Pages successfully allocated for the pool - memsize=335544320 base=0x2aaaab600000 alignment=1048576 bitmapSize=10\\n00000008 2012-08-13 08:03:35 25289 25289 Waiting for run lock\\n00000009 2012-08-13 08:03:35 25289 25289 Obtained run lock\\n0000000A 2012-08-13 08:03:35 25289 25289 ================================================\\n0000000B 2012-08-13 08:03:35 25289 25289 Signal: 11 Segmentation fault\\n0000000C 2012-08-13 08:03:35 25289 25289 Fault IP: 0000003F85E79B80\\n0000000D 2012-08-13 08:03:35 25289 25289 Accessing: 0000000000000000\\n0000000E 2012-08-13 08:03:35 25289 25289 Registers:\\n0000000F 2012-08-13 08:03:35 25289 25289 EAX:0000000000000000 EBX:00002B2E6EEE27C8 ECX:0000000000000000 EDX:0000000000000000 ESI:00007FFFD2F69C30 EDI:0000000000000000\\n00000010 2012-08-13 08:03:35 25289 25289 CS:EIP:0033:0000003F85E79B80\\n00000011 2012-08-13 08:03:35 25289 25289 ESP:00007FFFD2F69B78 EBP:0000000000000000\\n00000012 2012-08-13 08:03:35 25289 25289 Stack[00007FFFD2F69B78]: 00002B2E7159BFAC D2F69C6000002B2E 00007FFFD2F69C60 85A1298200007FFF 0000003F85A12982 000000000000003F 0000000000000000 0000000000000000\\n00000013 2012-08-13 08:03:35 25289 25289 Stack[00007FFFD2F69B98]: 0000000000000000 0000000000000000 0000000000000000 014B5E2000000000 00000000014B5E20 D2F69C6000000000 00007FFFD2F69C60 014B5E5000007FFF\\n00000014 2012-08-13 08:03:35 25289 25289 Stack[00007FFFD2F69BB8]: 00000000014B5E50 0000000200000000 0000000000000002 0000000200000000 0000000000000002 D2F69FA000000000 00007FFFD2F69FA0 AB3E925900007FFF\\n00000015 2012-08-13 08:03:35 25289 25289 Stack[00007FFFD2F69BD8]: 00002AAAAB3E9259 0000000100002AAA 0000000000000001 014B5E5000000000 00000000014B5E50 0000000400000000 0000000000000004 0000000200000000\\n00000016 2012-08-13 08:03:35 25289 25289 Stack[00007FFFD2F69BF8]: 0000000200000002 6EECF15000000002 00002B2E6EECF150 71AB0B6500002B2E 0000000271AB0B65 D2F69F7000000002 00007FFFD2F69F70 014B210000007FFF\\n00000017 2012-08-13 08:03:35 25289 25289 Stack[00007FFFD2F69C18]: 00000000014B2100 014B64B000000000 00000000014B64B0 0000000000000000 0000000000000000 0000000000000000 0000000000000000 71AB0C8C00000000\\n00000018 2012-08-13 08:03:35 25289 25289 Stack[00007FFFD2F69C38]: 00002B2E71AB0C8C 0000000000002B2E 0000000000000000 D2F69F7800000000 00007FFFD2F69F78 0101010100007FFF 0101010101010101 014B5E2001010101\\n00000019 2012-08-13 08:03:35 25289 25289 Stack[00007FFFD2F69C58]: 00000000014B5E20 014B64B000000000 00000000014B64B0 715C31C400000000 00002B2E715C31C4 014B5E2000002B2E 00000000014B5E20 0000000200000000\\n0000001A 2012-08-13 08:03:35 25289 25289 Backtrace:\\n0000001B 2012-08-13 08:03:35 25289 25289 /opt/HPCCSystems/lib/libjlib.so(_Z16PrintStackReportv+0x26) [0x2b2e71ac6e06]\\n0000001C 2012-08-13 08:03:35 25289 25289 /opt/HPCCSystems/lib/libjlib.so(_Z13excsighandleriP7siginfoPv+0x295) [0x2b2e71ac7e55]\\n0000001D 2012-08-13 08:03:35 25289 25289 /lib64/libpthread.so.0 [0x3f8660eb70]\\n0000001E 2012-08-13 08:03:35 25289 25289 /lib64/libc.so.6(strlen+0x30) [0x3f85e79b80]\\n0000001F 2012-08-13 08:03:35 25289 25289 /opt/HPCCSystems/lib/libeclrtl.so(_Z13rtlExtractTagRjRPcPKcS3_S3_+0x5c) [0x2b2e7159bfac]\\n00000020 2012-08-13 08:03:35 25289 25289 /var/lib/HPCCSystems/dllserver/temp/libW20120813-081507.so [0x2aaaab3e9259]\\n00000021 2012-08-13 08:03:35 25289 25289 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11performItemEjj+0x54) [0x2b2e715c31c4]\\n00000022 2012-08-13 08:03:35 25289 25289 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine13doExecuteItemER20IRuntimeWorkflowItemj+0x3f) [0x2b2e715c3b3f]\\n00000023 2012-08-13 08:03:35 25289 25289 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11executeItemEjj+0x26a) [0x2b2e715c35da]\\n00000024 2012-08-13 08:03:35 25289 25289 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine7performEP18IGlobalCodeContextP11IEclProcess+0x139) [0x2b2e715c41b9]\\n00000025 2012-08-13 08:03:35 25289 25289 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent10runProcessEP11IEclProcess+0x14a) [0x2b2e6eec501a]\\n00000026 2012-08-13 08:03:35 25289 25289 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent9doProcessEv+0x283) [0x2b2e6eec6fb3]\\n00000027 2012-08-13 08:03:35 25289 25289 /opt/HPCCSystems/lib/libhthor.so(_Z13eclagent_mainiPPKcP12StringBufferb+0x6af) [0x2b2e6eecbeef]\\n00000028 2012-08-13 08:03:35 25289 25289 eclagent(main+0x61) [0x4011a1]\\n00000029 2012-08-13 08:03:35 25289 25289 /lib64/libc.so.6(__libc_start_main+0xf4) [0x3f85e1d994]\\n0000002A 2012-08-13 08:03:35 25289 25289 eclagent(__gxx_personality_v0+0xe9) [0x401079]\\n0000002B 2012-08-13 08:03:35 25289 25289 ThreadList:\\n4202B940 1107474752 25290: CMPNotifyClosedThread\\n4402C940 1141033280 25291: MP Connection Thread\\n4802E940 1208150336 25293: CSocketSelectThread\\n4602D940 1174591808 25294: LogMsgParentReceiver\\n4A02F940 1241708864 25295: LogMsgFilterReceiver\\n4C030940 1275267392 25296: EclAgent Abort Monitor\\n4E031940 1308825920 25297: CDaliPublisherClient\\n\\n0000002C 2012-08-13 08:03:35 25289 25289 SIG: Segmentation fault(11), accessing 0000000000000000, IP=0000003F85E79B80\\n\\n
\\n\\nAny thoughts or ideas about this?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-08-13 13:08:00\" },\n\t{ \"post_id\": 2226, \"topic_id\": 491, \"forum_id\": 8, \"post_subject\": \"Re: Type casting alien data types\", \"username\": \"rtaylor\", \"post_text\": \"Sid,\\n\\nThis code does the job:\\nFile2 := DATASET('~test::sb::dates.csv',\\n {UNSIGNED1 ID,STRING9 DateString},CSV);\\nOUTPUT(File2);\\nLayout := RECORD\\n UNSIGNED1 ID;\\n Date.date_t dateField;\\nEND;\\nFile3 := PROJECT(File2,\\n TRANSFORM(Layout,\\n SELF.ID:=LEFT.ID,\\n SELF.dateField:=Date.FromString(LEFT.dateString,'%d%b%Y')));\\nOUTPUT(File3,NAMED('Explicit_conversion_dates'));
Let's discuss what you're trying to accomplish with the TYPE structure this afternoon in class.\\n\\nRichard\", \"post_time\": \"2012-08-15 18:42:36\" },\n\t{ \"post_id\": 2225, \"topic_id\": 491, \"forum_id\": 8, \"post_subject\": \"Re: Type casting alien data types\", \"username\": \"sbagaria\", \"post_text\": \"I corrected the code and put in more comments to explain what I am trying to do at each step and what I am expecting. The data file I refer to in the code is simply\\n\\n1,01JAN2000\\n2,21AUG2012\\n3,05DEC1992\\n4,32JAN2012\\n
\\n\\nIMPORT Std.Date;\\n\\nDATE_T(STRING format) := TYPE\\n EXPORT Date.Date_t STORE(STRING dateStr) := Date.FromString(dateStr,format);\\n EXPORT STRING LOAD(Date.Date_t dateVal) := Date.ToString(dateVal,format);\\n //EXPORT BOOLEAN GETISVALID(Date.Date_t dateVal) := IsValidDate(dateVal); // This function has not been provided to keep the example simple.\\nEND;\\n \\nLocaleDateFormatStr := '%d%b%Y';\\nISODateFormatStr := '%Y%m%d';\\n \\nISODate := DATE_T(ISODateFormatStr);\\nLocaleDate := DATE_T(LocaleDateFormatStr);\\n\\n\\n/* Compile time constant example */\\n// Convert from one format to another. date\\nLocaleDate date1 := '21AUG2012';\\n//ISODate date2 := (STRING)date1; // This will not solve our problem as we do not want to call the LOAD function for date1 and STORE function for date2.\\n// The idea is to use the same data for the internal representation of both these variables.\\n// So we try type transfers.\\n//ISODate date2 := (>ISODate<)date1; //ERROR\\n// This fails because date1 is a compile time constant and is always replaced by its string constant value in the generated C++ code.\\n// So this is trying to type transfer a string constant which will obviously not work. But sounds reasonable.\\n// The error message is:\\n// Error: Incompatible types: can not assign date to String (18, 15), 2007, \\n\\nOUTPUT(date1); // Should output '21AUG2012'. Does so.\\n//OUTPUT(date2); // Should output '20120821' ideally. But gives the above error.\\n\\n\\n/* Reading in a RECORD structure so that the date is not a compile time constant and the STORE function gets called when reading for the first time. */\\n//dates1 := DATASET([{'02JAN1992'}],{LocaleDate val});\\n//dates2 := (>ISODate<)dates1[1].val; //ERROR\\n\\n//OUTPUT(dates1);\\n//OUTPUT(dates2);\\n// This gives the following error suggesting that type transfer is not implemented for alien data types based on the physicaltype.\\n// Error: assert(!"Unknown copy source type") failed - file: ..\\\\..\\\\..\\\\..\\\\src\\\\HPCC-Platform\\\\ecl\\\\hqlcpp\\\\hqlcpp.cpp, line 10740\\n\\n/* The following code works fine on a locally compiled executable but does not work when the same input file is sprayed on thor and the job is run on thor */\\n\\n/* Reading in from a CSV with implicit conversion */\\nLayout := RECORD\\n UNSIGNED1 ID;\\n LocaleDate dateField;\\nEND;\\nFile := DATASET('~test::sb::dates.csv',Layout,CSV);\\n//OUTPUT(File,NAMED('Implicit_conversion_dates')); //ERROR\\n// Exception(s):\\n// 1000: 2012-08-15 17:23:31 GMT: Assignment to field 'datefield' causes row overflow. Size 14 exceeds the maximum size specified(5)\\n\\n/* Reading in from a CSV with explicit conversion */\\nFile2 := DATASET('~test::sb::dates.csv',{UNSIGNED1 ID,STRING9 dateString},CSV);\\nOUTPUT(File2);\\nFile3 := PROJECT(File2,TRANSFORM(Layout,SELF.ID:=LEFT.ID,SELF.dateField:=LEFT.dateString));\\n//OUTPUT(File3,NAMED('Explicit_conversion_dates')); //ERROR\\n// Same exception as above during the implicit conversion.\\n
\", \"post_time\": \"2012-08-15 17:30:30\" },\n\t{ \"post_id\": 2224, \"topic_id\": 491, \"forum_id\": 8, \"post_subject\": \"Re: Type casting alien data types\", \"username\": \"bforeman\", \"post_text\": \"When you corrected the compiler casting error, do the runtime errors remain the same?\\nCan you attach some sample data for us so we can reproduce here?\\n\\nThanks,\\n\\nBob\", \"post_time\": \"2012-08-15 15:41:15\" },\n\t{ \"post_id\": 2223, \"topic_id\": 491, \"forum_id\": 8, \"post_subject\": \"Re: Type casting alien data types\", \"username\": \"sbagaria\", \"post_text\": \"I am sorry for the Utils reference. There was some copy paste involved in creating this example.\\n\\nThe idea is that this code gives compile errors for some things, runtime errors for yet some others and does not give the desired output for the rest.\", \"post_time\": \"2012-08-15 14:48:05\" },\n\t{ \"post_id\": 2222, \"topic_id\": 491, \"forum_id\": 8, \"post_subject\": \"Re: Type casting alien data types\", \"username\": \"bforeman\", \"post_text\": \"Removing the Utils references in the following lines:\\n\\n/* Reading in a RECORD structure so that the STORE function gets called. */\\ndates1 := DATASET([{'21AUG2012'}],{LocaleDate val});\\ndates2 := (>ISODate<)dates1[1].val;\\n\\nYour code now compiles cleanly on my machine.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-08-15 13:04:42\" },\n\t{ \"post_id\": 2220, \"topic_id\": 491, \"forum_id\": 8, \"post_subject\": \"Re: Type casting alien data types\", \"username\": \"bforeman\", \"post_text\": \"The compiler apparently doesn't like:\\n\\n//ISODate date2 := (>ISODate<)date1;\\n\\nISODate date2 := (STRING)date1; \\n\\nThis eliminates that compile error, but I'm seeing a lot of other errors that need more investigation.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-08-15 12:52:29\" },\n\t{ \"post_id\": 2219, \"topic_id\": 491, \"forum_id\": 8, \"post_subject\": \"Type casting alien data types\", \"username\": \"sbagaria\", \"post_text\": \"I am trying to define my own format specific date types. With the LOAD and STORE functions, I believe I should be able to explicitly, if not implicitly, type cast from one date format to another. I would expect type transfers to work in this case since the physicaltype is the same for all. However, ECL does not behave as expected.\\n\\nIMPORT Std.Date;\\n\\nDATE(STRING format) := TYPE\\n\\tEXPORT Date.Date_t STORE(STRING dateStr) := Date.FromString(dateStr,format);\\n\\tEXPORT STRING LOAD(Date.Date_t dateVal) := Date.ToString(dateVal,format);\\n\\t//EXPORT BOOLEAN GETISVALID(Date.Date_t dateVal) := IsValidDate(dateVal); // This function has not been provided to keep the example simple.\\nEND;\\n\\t\\nLocaleDateFormatStr := '%d%b%Y';\\nISODateFormatStr := '%Y%m%d';\\n\\t\\t\\nISODate := DATE(ISODateFormatStr);\\nLocaleDate := DATE(LocaleDateFormatStr);\\n\\n\\n/* Compile time constant example */\\nLocaleDate date1 := '21AUG2012';\\nISODate date2 := (>ISODate<)date1;\\n\\nOUTPUT(date2); // Should output '20120821' ideally, but fails because date1 is a compile time constant and is always replaced by its string constant value in the generated C++ code.\\n// The error message is:\\n// Error: Incompatible types: can not assign date to String (18, 15), 2007, \\n\\n\\n/* Reading in a RECORD structure so that the STORE function gets called. */\\ndates1 := DATASET([{'21AUG2012'}],{Utils.LocaleDate val});\\ndates2 := (>Utils.ISODate<)dates1[1].val;\\n\\nOUTPUT(dates1);\\nOUTPUT(dates2);\\n\\n// This gives the following error suggesting that type transfer is not implemented for alien data types based on the physicaltype.\\n// Error: assert(!"Unknown copy source type") failed - file: ..\\\\..\\\\..\\\\..\\\\src\\\\HPCC-Platform\\\\ecl\\\\hqlcpp\\\\hqlcpp.cpp, line 10740\\n\\n/* Reading in from a CSV with implicit conversion */\\nLayout := RECORD\\n\\tLocaleDate dateField;\\nEND;\\nFile := DATASET('~test::sb::dates.csv',Layout,CSV);\\nOUTPUT(File);\\n// This gives empty rows with no fields\\n\\n/* Reading in from a CSV with explicit conversion later */\\nFile2 := DATASET('~test::sb::dates.csv',{STRING9 dateField},CSV);\\nFile3 := PROJECT(File2,Layout);\\nOUTPUT(File3); // This also gives the same error as when doing an implicit conversion while reading the csv\\n
\", \"post_time\": \"2012-08-15 07:20:24\" },\n\t{ \"post_id\": 2235, \"topic_id\": 492, \"forum_id\": 8, \"post_subject\": \"Re: Std.File.MonitorFile - finding many files\", \"username\": \"DSC\", \"post_text\": \"I've figured out a workaround, but I consider this to be suboptimal. Interested Readers may find this useful, though (or perhaps only humorous):\\n\\nIMPORT * FROM Std;\\n\\n// Useful constants\\nkLandingZoneHost := '10.210.150.80';\\nkLandingZoneDir := '/var/lib/HPCCSystems/dropzone';\\nkFilenamePattern := kLandingZoneDir + '/foo*';\\nkFoundFileEventName := 'FoundAFile';\\n\\n// Parsing patterns for picking apart the found file's hostpath\\n// (e.g.: '//10.210.150.80:7100/var/lib/HPCCSystems/dropzone/foo1')\\nPATTERN Digit := PATTERN('[[:digit:]]');\\nPATTERN IPv4Octet := REPEAT(Digit,1,3);\\nPATTERN IPv4Port := REPEAT(Digit,1,5);\\nPATTERN HostIPv4Address := IPv4Octet '.' IPv4Octet '.' IPv4Octet '.' IPv4Octet;\\nPATTERN HostSource := HostIPv4Address ':' IPv4Port;\\nPATTERN FilePath := '/' ANY+;\\nPATTERN FullFileSource := '//' HostSource FilePath;\\n\\n// Utility function\\nGetLastItemFromString(STRING text, STRING delimiter = ' ') := FUNCTION\\n SET OF STRING items := Str.SplitWords(text,delimiter);\\n RETURN items[COUNT(items)];\\nEND;\\n\\n// Record definition for temporarily holding the found file's hostpath\\nFilePathRec := RECORD\\n STRING path;\\nEND;\\n\\n// Record definition that will contain the host, full path and filename of found file\\nFilePathComponentsRec := RECORD\\n STRING host := MATCHTEXT(HostIPv4Address);\\n STRING path := MATCHTEXT(FilePath);\\n STRING filename := GetLastItemFromString(MATCHTEXT(FilePath),'/');\\nEND;\\n\\n// The function that would actually do the work, handling the found file\\nHandleFoundFile(FilePathComponentsRec f) := FUNCTION\\n // Do something interesting, like spray the file into Thor\\n RETURN OUTPUT('Found file ' + f.path + ' on host ' + f.host);\\nEND;\\n\\n// Definition of the file monitoring call\\nMonitorFileAction := File.MonitorFile(kFoundFileEventName,kLandingZoneHost,kFilenamePattern);\\n\\n// Function called when a file is found\\nProcessFoundFileEvent(STRING fullFilePath) := FUNCTION\\n rs := DATASET([{fullFilePath}],FilePathRec);\\n parsedRS := PARSE(rs,path,FullFileSource,FilePathComponentsRec,FIRST);\\n \\n actions := SEQUENTIAL (\\n // Handle the file\\n HandleFoundFile(parsedRS[1]),\\n \\n // Delete the file so it won't be found again\\n File.DeleteExternalFile(parsedRS[1].host,parsedRS[1].path),\\n \\n // Instantiate file monitoring again\\n MonitorFileAction\\n );\\n \\n RETURN actions;\\nEND;\\n\\n// Event handler for the file monitoring\\nProcessFoundFileEvent(EVENTEXTRA) : WHEN(EVENT(kFoundFileEventName,'*'));\\n\\n// Initial instantiation of file monitoring\\nMonitorFileAction;
\\n\\nThe basic trick was to have MonitorFile find only one file at a time, then set things up so that MonitorFile is instantiated again once the found file is processed. This requires that the found file is deleted or otherwise made unfindable (renamed, moved, etc.) so you don't end up in an infinite loop.\\n\\nThe code above uses OUTPUT to show that it's running, but that's a little misleading: Only the last file's information is viewable even though all the files are processed.\\n\\nI would still like to know why MonitorFile allows a 'shotCount' parameter and precisely how it is supposed to be used. That would seem to be a much cleaner implementation.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-08-17 17:28:50\" },\n\t{ \"post_id\": 2232, \"topic_id\": 492, \"forum_id\": 8, \"post_subject\": \"Std.File.MonitorFile - finding many files\", \"username\": \"DSC\", \"post_text\": \"Consider:\\n\\nIMPORT * FROM Std;\\n\\nfoundFileEventName := 'FoundAFile';\\n\\nFile.MonitorFile (\\n foundFileEventName,\\n '10.210.150.80',\\n '/var/lib/HPCCSystems/dropzone/foo*',\\n FALSE,\\n -1\\n );\\n\\nOUTPUT(EVENTEXTRA) : WHEN(EVENT(foundFileEventName,'*'));
\\n\\nIf I place five matching files (foo1, foo2, foo3, foo4 and foo5) into the cited directory, I would expect the OUTPUT to be executed five times. Instead, I see only the first result. The log file, however, shows all five:\\n\\n00001577 2012-08-16 13:25:46 17886 17894 "DFU Monitor running job: D20120816-132341"\\n00001578 2012-08-16 13:25:46 17886 17894 "MONITOR(D20120816-132341): triggering event: FoundAFile, //10.210.150.80:7100/var/lib/HPCCSystems/dropzone/foo3"\\n00001579 2012-08-16 13:25:46 17886 17894 "MONITOR(D20120816-132341): triggering event: FoundAFile, //10.210.150.80:7100/var/lib/HPCCSystems/dropzone/foo1"\\n0000157A 2012-08-16 13:25:46 17886 17894 "MONITOR(D20120816-132341): triggering event: FoundAFile, //10.210.150.80:7100/var/lib/HPCCSystems/dropzone/foo4"\\n0000157B 2012-08-16 13:25:46 17886 17894 "MONITOR(D20120816-132341): triggering event: FoundAFile, //10.210.150.80:7100/var/lib/HPCCSystems/dropzone/foo2"\\n0000157C 2012-08-16 13:25:46 17886 17894 "MONITOR(D20120816-132341): triggering event: FoundAFile, //10.210.150.80:7100/var/lib/HPCCSystems/dropzone/foo5"\\n0000157D 2012-08-16 13:25:46 17886 17894 "DFUMON Event Pushed: FoundAFile, //10.210.150.80:7100/var/lib/HPCCSystems/dropzone/foo3"\\n0000157E 2012-08-16 13:25:46 17886 17894 "DFUMON Event Pushed: FoundAFile, //10.210.150.80:7100/var/lib/HPCCSystems/dropzone/foo1"\\n0000157F 2012-08-16 13:25:46 17886 17894 "DFUMON Event Pushed: FoundAFile, //10.210.150.80:7100/var/lib/HPCCSystems/dropzone/foo4"\\n00001580 2012-08-16 13:25:46 17886 17894 "DFUMON Event Pushed: FoundAFile, //10.210.150.80:7100/var/lib/HPCCSystems/dropzone/foo2"\\n00001581 2012-08-16 13:25:46 17886 17894 "DFUMON Event Pushed: FoundAFile, //10.210.150.80:7100/var/lib/HPCCSystems/dropzone/foo5"\\n00001582 2012-08-16 13:25:46 17886 17894 "DFU Monitor waiting on queue dfuserver_monitor_queue"
\\n\\nThis code is simplified. My current code creates a subfile for each invocation and stuffs it into a superfile, so I can easily see what may be going on behind the scenes. Unfortunately, it matches what I just described: Only the first subfile is created.\\n\\nHow can I capture all five files?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-08-16 18:18:03\" },\n\t{ \"post_id\": 2268, \"topic_id\": 501, \"forum_id\": 8, \"post_subject\": \"Re: Error handling in ECL\", \"username\": \"bforeman\", \"post_text\": \"What handler are you calling in the FAILURE workflow service? Have you tried restarting the ECL action from there?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-08-30 12:10:50\" },\n\t{ \"post_id\": 2264, \"topic_id\": 501, \"forum_id\": 8, \"post_subject\": \"Error handling in ECL\", \"username\": \"tmurphy\", \"post_text\": \"My ECL code uses Std.File.MonitorFile to look for an incoming file in the landing zone, do something with it, and then wait a while and repeat. The code works except when an error occurs while "doing something" in which case the workunit fails and stops monitoring. I tried placing a FAILURE clause at various places but the workunit still fails. I want to "catch" the error and keep going. How can I do this?\", \"post_time\": \"2012-08-29 17:33:57\" },\n\t{ \"post_id\": 2283, \"topic_id\": 502, \"forum_id\": 8, \"post_subject\": \"Re: Transpose in ECL\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nThanks a ton..\\n\\nThat worked.\\n\\nRegards,\\nksviswa\", \"post_time\": \"2012-08-31 19:22:50\" },\n\t{ \"post_id\": 2280, \"topic_id\": 502, \"forum_id\": 8, \"post_subject\": \"Re: Transpose in ECL\", \"username\": \"bforeman\", \"post_text\": \"
Tried MAX(category1,category2,category3)\\n\\nGot the output as \\n\\nname category1 category2 category3 result\\nxxx 2.1 2.6 2.4 2.6\\nyyy 3.2 3.4 3.5 3.5\\n
\\n\\nYou're almost there! All you need to do is associate the value with a field name result. Try using CASE, like this:\\n\\nresult := CASE(MAX(category1,category2,category3),\\n category1 => 'category1',\\n category2 => 'category2',\\n \\t\\t\\t 'category3');
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-08-31 18:56:31\" },\n\t{ \"post_id\": 2279, \"topic_id\": 502, \"forum_id\": 8, \"post_subject\": \"Re: Transpose in ECL\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nThanks a lot for the reply.\\n\\nThat helps for question 1.\\n\\nBut for question 2, still not sure how do we get the result as the column/field name rather than the value when some computation happens across a row for different columns.\\n\\nEx :\\n\\nInput\\nname category1 category2 category3 \\nxxx 2.1 2.6 2.4\\nyyy 3.2 3.4 3.5\\n\\nIf i need to find the MAX for 'xxx' and then use the column/field name as the result. (To find the category name for xxx rather than the value)\\n\\nTried MAX(category1,category2,category3)\\n\\nGot the output as \\n\\nname category1 category2 category3 result\\nxxx 2.1 2.6 2.4 2.6\\nyyy 3.2 3.4 3.5 3.5\\n\\n\\nRequired Output :\\nname category1 category2 category3 result\\nxxx 2.1 2.6 2.4 category2\\nyyy 3.2 3.4 3.5 category3\\n\\n\\nPlease help regarding the same.\\n\\nThanks a lot in Advance.\\n\\nRegards,\\nksviswa\", \"post_time\": \"2012-08-31 18:39:55\" },\n\t{ \"post_id\": 2275, \"topic_id\": 502, \"forum_id\": 8, \"post_subject\": \"Re: Transpose in ECL\", \"username\": \"rtaylor\", \"post_text\": \"Try this:rec := RECORD\\n UNSIGNED1 ID;\\n\\tSTRING10 Col1;\\n\\tSTRING10 Col2;\\n\\tSTRING10 Col3;\\nEND;\\n\\nds := dataset([{1, 'Row11', 'Row12', 'Row13'},\\n\\t {2, 'Row21', 'Row22', 'Row23'}],rec);\\n\\noutrec := RECORD\\n UNSIGNED1 ID;\\n\\tSTRING10 Name;\\n\\tSTRING10 Value;\\nEND;\\n\\noutrec XF(ds L, integer C) := TRANSFORM\\n SELF.Name := CHOOSE(C,'Col1','Col2','Col3');\\n SELF.Value := CHOOSE(C,L.Col1,L.Col2,L.Col3);\\n SELF := L;\\nEND;\\nn := NORMALIZE(ds,3,XF(LEFT,COUNTER));\\n\\nn;
HTH,\\n\\nRichard\", \"post_time\": \"2012-08-31 13:37:30\" },\n\t{ \"post_id\": 2272, \"topic_id\": 502, \"forum_id\": 8, \"post_subject\": \"Re: Transpose in ECL\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nThanks a lot for the reply.\\n\\nMax worked across the various columns for a given row, but need the result as the name of the column(field) and not the value.\\n\\nNeed the column/field name as the result and not their value.\\n\\nRegards,\\nksviswa\", \"post_time\": \"2012-08-30 19:01:45\" },\n\t{ \"post_id\": 2270, \"topic_id\": 502, \"forum_id\": 8, \"post_subject\": \"Re: Transpose in ECL\", \"username\": \"bforeman\", \"post_text\": \"For question 2:\\n\\nhighcat := MAX(category1,category2,category3);\\n\\nFor question 1:\\n\\nLook at the section in the Language Reference that discusses #TEXT. Perhaps that is what you want.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-08-30 18:29:48\" },\n\t{ \"post_id\": 2269, \"topic_id\": 502, \"forum_id\": 8, \"post_subject\": \"Transpose in ECL\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nFew questions related to ECL .\\n\\n1.) How to transpose in ECL while getting a handle on the column names ?\\n\\nEx : \\nInput:\\nId Column1 Column2 Column3\\n1 Row11 Row12 Row13\\n2 Row21 Row22 Row23\\n\\nOutput:\\nId Name Value\\n1 Column1 Row11\\n1 Column2 Row12\\n1 Column3 Row13\\n2 Column1 Row21\\n2 Column2 Row22\\n2 Column3 Row23\\n\\nI tried using the NORMALIZE action , but wasnt able to get a handle on the column names, could get only their values.\\n\\n2.) How to do aggregate functions such as sum or max or avg across colummns (fields) in a row for a recordset.\\n\\nEx :\\nname category1 category2 category3 \\nxxx 2.1 2.6 2.4\\nyyy 3.2 3.4 3.5\\n\\nNeed a max value for xxx and yyy across the columns for a given row and assign the result to the column name.\\n\\nSample Output :\\n\\nname category1 category2 category3 result\\nxxx 2.1 2.6 2.4 category2\\nyyy 3.2 3.4 3.5 category3\\n\\nPlease help regarding the same.\\n\\nThanks a lot in advance.\\n\\nRegards,\\nksviswa\", \"post_time\": \"2012-08-30 18:23:41\" },\n\t{ \"post_id\": 2282, \"topic_id\": 503, \"forum_id\": 8, \"post_subject\": \"Re: JSON Feed\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nThanks a lot..\\n\\nThat helped by taking the entire JSON feed as a single text terminated with newline.\\n\\nThen using parse function to get the actual text.\\n\\nEx for JSON Feed:\\n\\nhttp://search.twitter.com/search.json?q ... rapi&rpp=2\\n\\n{\\n "completed_in": 0.129,\\n "max_id": 241613919431569408,\\n "max_id_str": "241613919431569408",\\n "next_page": "?page=2&max_id=241613919431569408&q=twitterapi&rpp=2",\\n "page": 1,\\n "query": "twitterapi",\\n "refresh_url": "?since_id=241613919431569408&q=twitterapi",\\n "results": [\\n {\\n "created_at": "Fri, 31 Aug 2012 19:10:22 +0000",\\n "from_user": "JuanLuisAguiGle",\\n "from_user_id": 187638747,\\n "from_user_id_str": "187638747",\\n "from_user_name": "\\\\u2020Schei\\\\u00dfe \\\\u00dfe Mine!!!",\\n "geo": null,\\n "id": 241613919431569408,\\n "id_str": "241613919431569408",\\n "iso_language_code": "es",\\n "metadata": {\\n "result_type": "recent"\\n },\\n "profile_image_url": "http:\\\\/\\\\/a0.twimg.com\\\\/profile_images\\\\/2541810122\\\\/8p5i5on02175aqqojv1y_normal.jpeg",\\n "profile_image_url_https": "https:\\\\/\\\\/si0.twimg.com\\\\/profile_images\\\\/2541810122\\\\/8p5i5on02175aqqojv1y_normal.jpeg",\\n "source": "<a href="http:\\\\/\\\\/twitter.com\\\\/tweetbutton">Tweet Button<\\\\/a>",\\n "text": "Haus Laboratories http:\\\\/\\\\/t.co\\\\/gwp095yW v\\\\u00eda @twitterapi",\\n "to_user": null,\\n "to_user_id": 0,\\n "to_user_id_str": "0",\\n "to_user_name": null\\n },\\n {\\n "created_at": "Fri, 31 Aug 2012 19:06:16 +0000",\\n "from_user": "yayahuang36",\\n "from_user_id": 188726460,\\n "from_user_id_str": "188726460",\\n "from_user_name": "Yaya Huang",\\n "geo": null,\\n "id": 241612890736562176,\\n "id_str": "241612890736562176",\\n "iso_language_code": "en",\\n "metadata": {\\n "result_type": "recent"\\n },\\n "profile_image_url": "http:\\\\/\\\\/a0.twimg.com\\\\/profile_images\\\\/1812996949\\\\/387320_10151104781780646_524285645_22402992_2063866852_n_normal.jpg",\\n "profile_image_url_https": "https:\\\\/\\\\/si0.twimg.com\\\\/profile_images\\\\/1812996949\\\\/387320_10151104781780646_524285645_22402992_2063866852_n_normal.jpg",\\n "source": "<a href="http:\\\\/\\\\/twitter.com\\\\/tweetbutton">Tweet Button<\\\\/a>",\\n "text": "never ever EVER do.\\\\nhttp:\\\\/\\\\/t.co\\\\/H6l4dhwr via @twitterapi",\\n "to_user": null,\\n "to_user_id": 0,\\n "to_user_id_str": "0",\\n "to_user_name": null\\n }\\n ],\\n "results_per_page": 2,\\n "since_id": 0,\\n "since_id_str": "0"\\n}\\n\\nRegards,\\nksviswa\", \"post_time\": \"2012-08-31 19:14:50\" },\n\t{ \"post_id\": 2277, \"topic_id\": 503, \"forum_id\": 8, \"post_subject\": \"Re: JSON Feed\", \"username\": \"arjuna chala\", \"post_text\": \"Hi ksviswa,\\n\\nCan you please provide us with some more information? We are specifically looking for an example feed with at least 2 records. \\n\\nThank You\\n\\nArjuna\", \"post_time\": \"2012-08-31 14:53:30\" },\n\t{ \"post_id\": 2276, \"topic_id\": 503, \"forum_id\": 8, \"post_subject\": \"Re: JSON Feed\", \"username\": \"rtaylor\", \"post_text\": \"ksviswa,\\n\\nAs long as there is some kind of record delimiter other than the terminating curly brace (because of the possible nesting of brace pairs), you could just define the file as a CSV file (no field delimiter) with a single variable-length string field contain all the text, something like this:rec := RECORD\\n STRING txt;\\nEND;\\n\\nds := DATASET('MyFile',rec,CSV(SEPARATOR(''),TERMINATOR));
HTH,\\n\\nRichard\", \"post_time\": \"2012-08-31 14:42:50\" },\n\t{ \"post_id\": 2271, \"topic_id\": 503, \"forum_id\": 8, \"post_subject\": \"JSON Feed\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nIs it possible to convert a json feed (such as a twitter feed) into a recordset having just the text?\\n\\nPlease help regarding this.\\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2012-08-30 18:39:31\" },\n\t{ \"post_id\": 2314, \"topic_id\": 504, \"forum_id\": 8, \"post_subject\": \"Re: Spell Checker\", \"username\": \"bforeman\", \"post_text\": \"The ECL IDE currently does not have a spell checker. I believe the developer was just running the sources through an external editor to correct comments and notes.\\n\\nOF course, the ECL compiler does a great side job of spell checking if you mistype any language keyword \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-09-10 14:51:42\" },\n\t{ \"post_id\": 2284, \"topic_id\": 504, \"forum_id\": 8, \"post_subject\": \"Spell Checker\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nCame through the following in HPCC Community Edition release notes "Correct some spelling mistakes and typos\\n Ran sources through spell checker and corrected various spelling mistakes\\n and typos. Not exhaustive, didn't look at all dirs."\\n\\nIs there a spell checker available in ECL..?\\n\\nIf so is it updated in the ECL repository or we need to update the same from some external source.?\\n\\nIf not will it be a feature added in the upcoming release.?\\n\\nplease clarify.\\n\\nThanks a lot in advance.\\n\\nRegards,\\nksviswa\", \"post_time\": \"2012-08-31 19:37:08\" },\n\t{ \"post_id\": 2352, \"topic_id\": 507, \"forum_id\": 8, \"post_subject\": \"Re: Example from std lib ref manual does not compile\", \"username\": \"ghalliday\", \"post_text\": \"Just to feed back, a fix will be included in 3.8.4.\", \"post_time\": \"2012-09-14 19:22:25\" },\n\t{ \"post_id\": 2295, \"topic_id\": 507, \"forum_id\": 8, \"post_subject\": \"Re: Example from std lib ref manual does not compile\", \"username\": \"HPCC Staff\", \"post_text\": \"Thank you both for reporting this!\", \"post_time\": \"2012-09-06 02:02:18\" },\n\t{ \"post_id\": 2294, \"topic_id\": 507, \"forum_id\": 8, \"post_subject\": \"Re: Example from std lib ref manual does not compile\", \"username\": \"DSC\", \"post_text\": \"I opened an issue on github: https://github.com/hpcc-systems/HPCC-Platform/issues/3299\", \"post_time\": \"2012-09-05 14:28:18\" },\n\t{ \"post_id\": 2293, \"topic_id\": 507, \"forum_id\": 8, \"post_subject\": \"Re: Example from std lib ref manual does not compile\", \"username\": \"DSC\", \"post_text\": \"It looks like there is an IMPORT missing from Std.System.Util:\\n\\n
IMPORT lib_fileservices;
\\nAdding that makes the code compile.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-09-05 13:47:36\" },\n\t{ \"post_id\": 2292, \"topic_id\": 507, \"forum_id\": 8, \"post_subject\": \"Example from std lib ref manual does not compile\", \"username\": \"tmurphy\", \"post_text\": \"IMPORT * FROM STD;\\nhost := 'dataland_dali.br.seisint.com';\\nOUTPUT(STD.System.Util.ResolveHostName(host));\\n\\ngenerates errors when compiling Util.ecl:\\n\\nError: Unknown identifier "lib_fileservices" (20, 5), 2167, \\nError: Incompatible types: can not assign Integer to String (19, 25), 2007, \\nError: Unknown identifier "lib_fileservices" (30, 5), 2167, \\nError: Incompatible types: can not assign Integer to varstring (29, 29), 2007, \\nError: Unknown identifier "lib_fileservices" (40, 5), 2167, \\nError: Incompatible types: can not assign Integer to varstring (39, 33), 2007, \\nError: Unknown identifier "lib_fileservices" (50, 5), 2167, \\nError: Unknown identifier "lib_fileservices" (20, 5), 2167, \\nError: Incompatible types: can not assign Integer to String (19, 25), 2007, \\nError: Unknown identifier "lib_fileservices" (30, 5), 2167, \\nError: Incompatible types: can not assign Integer to varstring (29, 29), 2007, \\nError: Unknown identifier "lib_fileservices" (40, 5), 2167, \\nError: Incompatible types: can not assign Integer to varstring (39, 33), 2007, \\nError: Unknown identifier "lib_fileservices" (50, 5), 2167,\", \"post_time\": \"2012-09-05 12:59:22\" },\n\t{ \"post_id\": 2795, \"topic_id\": 508, \"forum_id\": 8, \"post_subject\": \"Re: Nested Loops in ECL\", \"username\": \"g2pis\", \"post_text\": \"--\", \"post_time\": \"2012-11-16 07:12:04\" },\n\t{ \"post_id\": 2305, \"topic_id\": 508, \"forum_id\": 8, \"post_subject\": \"Re: Nested Loops in ECL\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\n\\nThanks a lot..\", \"post_time\": \"2012-09-06 18:51:35\" },\n\t{ \"post_id\": 2303, \"topic_id\": 508, \"forum_id\": 8, \"post_subject\": \"Re: Nested Loops in ECL\", \"username\": \"rtaylor\", \"post_text\": \"Like this:
ds1 := dataset([{'abcd'},{'vwxyz'}],{string f1});\\nds2 := dataset([{'e'},{'f'},{'g'}],{string1 f1});\\n\\nrecordof(ds1) XF2(ds2 L, integer C, STRING Str, integer N) := TRANSFORM\\n self.f1 := MAP(C=1 => L.F1 + Str[2..],\\n\\t C=N => Str[1..N-1] + L.F1,\\n\\t Str[1..C-1] + L.F1 + Str[C+1..]);\\nEND;\\n\\n\\nOutRec := RECORD\\n DATASET(recordof(ds1)) child;\\nEND;\\t\\n\\noutrec XF1(ds1 L) := TRANSFORM\\n Len := LENGTH(TRIM(L.f1));\\n SELF.Child := NORMALIZE(ds2,Len,XF2(LEFT,COUNTER,L.F1,Len));\\nEND;\\nres := PROJECT(ds1,XF1(LEFT));\\n\\nres.child;
HTH,\\n\\nRichard\", \"post_time\": \"2012-09-06 14:58:52\" },\n\t{ \"post_id\": 2302, \"topic_id\": 508, \"forum_id\": 8, \"post_subject\": \"Re: Nested Loops in ECL\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nThanks Richard..\\n\\nBut in case of ds1 (first dataset )having a length greater than 4 , then this code may not work.\\n\\nAny generic solution possible based on the length of the data in the first dataset ..?\\n\\nThanks in advance..\\n\\nRegards,\\nksviswa\", \"post_time\": \"2012-09-06 13:16:59\" },\n\t{ \"post_id\": 2301, \"topic_id\": 508, \"forum_id\": 8, \"post_subject\": \"Re: Nested Loops in ECL\", \"username\": \"chhaya\", \"post_text\": \"Can i use graph here ? if not then why ? what exactly graph does?\", \"post_time\": \"2012-09-06 13:03:14\" },\n\t{ \"post_id\": 2300, \"topic_id\": 508, \"forum_id\": 8, \"post_subject\": \"Re: Nested Loops in ECL\", \"username\": \"rtaylor\", \"post_text\": \"How about this:ds1 := dataset([{'abcd'}],{string4 f1});\\nds2 := dataset([{'e'},{'f'},{'g'}],{string1 f1});\\n\\nrecordof(ds1) XF(ds2 L, integer C) := TRANSFORM\\n Str := ds1[1].f1;\\n self.f1 := CHOOSE(C,L.F1 + Str[2..4],\\n Str[1] + L.F1 + Str[3..4],\\n Str[1..2] + L.F1 + Str[4],\\n Str[1..3] + L.F1);\\nEND;\\n\\nres := NORMALIZE(ds2,4,XF(LEFT,COUNTER));\\n\\nres;
HTH,\\n\\nRichard\", \"post_time\": \"2012-09-06 12:35:06\" },\n\t{ \"post_id\": 2297, \"topic_id\": 508, \"forum_id\": 8, \"post_subject\": \"Nested Loops in ECL\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nHow are nested loops handled in ECL..? \\n\\nCan we specify multiple conditions in Normalize Function..?\\n\\nFor Ex :\\n\\nSample Input :\\n\\nRecordSet1 : 'abcd'\\nRecordSet2 : e,f,g(3 records)\\n\\nSample Output :\\n\\nebcd,fbcd,gbcd, aecd, afcd, agcd , abed, abfd, abgd, abce, abcf, abcg (4 * 3 Records ).\\n\\nHow can i achieve the same in ECL..?\\n\\nTried using the normalize and loop, but it didnt help.. \\n\\nAre there any inbuilt function for nested loops in ECL..?\\n\\nPlease help..\\n\\nThanks a lot in advance.\\n\\nRegards,\\nksviswa\", \"post_time\": \"2012-09-06 10:24:39\" },\n\t{ \"post_id\": 2358, \"topic_id\": 510, \"forum_id\": 8, \"post_subject\": \"Re: OUTPUT(...NAMED('x')) 'x'is restricted to being a const\", \"username\": \"bforeman\", \"post_text\": \"No problem Allan, I will open an issue on this.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-09-14 20:02:03\" },\n\t{ \"post_id\": 2357, \"topic_id\": 510, \"forum_id\": 8, \"post_subject\": \"Re: OUTPUT(...NAMED('x')) 'x'is restricted to being a const\", \"username\": \"Allan\", \"post_text\": \"Bob,\\n\\nThanks for looking into this.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-09-14 19:59:00\" },\n\t{ \"post_id\": 2346, \"topic_id\": 510, \"forum_id\": 8, \"post_subject\": \"Re: OUTPUT(...NAMED('x')) 'x'is restricted to being a const\", \"username\": \"ghalliday\", \"post_text\": \"I think the reason is that the expression isn't constant folded even though it could be. If you open an issue on jira then I can investigate what would be needed.\", \"post_time\": \"2012-09-14 19:12:34\" },\n\t{ \"post_id\": 2345, \"topic_id\": 510, \"forum_id\": 8, \"post_subject\": \"Re: OUTPUT(...NAMED('x')) 'x'is restricted to being a const\", \"username\": \"Allan\", \"post_text\": \"Bob,\\n\\nThanks for working on this.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-09-14 14:59:18\" },\n\t{ \"post_id\": 2337, \"topic_id\": 510, \"forum_id\": 8, \"post_subject\": \"Re: OUTPUT(...NAMED('x')) 'x'is restricted to being a const\", \"username\": \"bforeman\", \"post_text\": \"Hi Allan,\\n\\nVery strange, since the SET function and the static set should be the same result. I'll investigate further with our development team.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-09-13 19:04:08\" },\n\t{ \"post_id\": 2334, \"topic_id\": 510, \"forum_id\": 8, \"post_subject\": \"Re: OUTPUT(...NAMED('x')) 'x'is restricted to being a const\", \"username\": \"Allan\", \"post_text\": \"Hi Bob,\\n\\nThe code below syntax checks, but fails at runtime.\\n
\\nR := RECORD\\n STRING5 Txt;\\nEND;\\n\\ne := DATASET([{'Info1'},{'Info2'}],R);\\ndesc := SET(e,Txt);\\nOUTPUT(e,NAMED(desc[1]));\\n
\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-09-13 18:03:30\" },\n\t{ \"post_id\": 2333, \"topic_id\": 510, \"forum_id\": 8, \"post_subject\": \"Re: OUTPUT(...NAMED('x')) 'x'is restricted to being a const\", \"username\": \"bforeman\", \"post_text\": \"Hi Allan,\\n\\nYou still can have your descriptions in the dummy field, but just use the SET function to extract the set of constant strings to use in the NAMED attribute. \\n\\nmydescriptions := SET(mydataset,descriptionfield);\\n\\nand then use the indexing of the mydescriptions instead of the descriptionfield, which would essentially give you the same result.\\n\\nBut I'll check with some other colleagues to see if they have any alternative suggestions. \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-09-13 11:58:29\" },\n\t{ \"post_id\": 2332, \"topic_id\": 510, \"forum_id\": 8, \"post_subject\": \"Re: OUTPUT(...NAMED('x')) 'x'is restricted to being a const\", \"username\": \"Allan\", \"post_text\": \"Thanks Bob,\\n\\nThat works, but rather defeats the object, which is to have the description in the test defined in one place (a spare field in the test data records).\\n\\nI could not maintain, i.e. keep in sync, two repositories of data.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-09-13 07:57:04\" },\n\t{ \"post_id\": 2331, \"topic_id\": 510, \"forum_id\": 8, \"post_subject\": \"Re: OUTPUT(...NAMED('x')) 'x'is restricted to being a const\", \"username\": \"bforeman\", \"post_text\": \"Would this get you there?\\n\\n\\nR := RECORD\\n STRING5 Txt;\\nEND;\\n\\nSETNamed := ['Info1','Info2'];\\n\\ne := DATASET([{'Info1'},{'Info2'}],R);\\nOUTPUT(e,NAMED(SETNamed[1]));\\nOUTPUT(e,NAMED(SETNamed[2]));\\n\\n
\\n\\nYou could use the SET function to create a dynamic set of strings in sync with the actual data.\\n\\nBob\", \"post_time\": \"2012-09-12 20:02:17\" },\n\t{ \"post_id\": 2330, \"topic_id\": 510, \"forum_id\": 8, \"post_subject\": \"Re: OUTPUT(...NAMED('x')) 'x'is restricted to being a const\", \"username\": \"bforeman\", \"post_text\": \"OK, I see what you want now, a dummy field that stores string values that you want to use in the Result tab via NAMED. I'm not sure it can be done, let me sleep on it! \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-09-12 19:56:03\" },\n\t{ \"post_id\": 2328, \"topic_id\": 510, \"forum_id\": 8, \"post_subject\": \"Re: OUTPUT(...NAMED('x')) 'x'is restricted to being a const\", \"username\": \"Allan\", \"post_text\": \"Well Bob,\\n\\nFinally got around to trying this out, but no luck.\\nThe program below gives error (at run time) of: Error: Name 'e[ 1] . Txt' must be a valid identifier. (18, 21), 2257, \\n\\n
\\nR := RECORD\\n STRING5 Txt;\\nEND;\\n\\ne := DATASET([{'Info1'},{'Info2'}],R);\\nOUTPUT(e,NAMED(#TEXT(e[1].Txt)));\\n
\\n\\nAny idea's?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-09-12 19:33:37\" },\n\t{ \"post_id\": 2320, \"topic_id\": 510, \"forum_id\": 8, \"post_subject\": \"Re: OUTPUT(...NAMED('x')) 'x'is restricted to being a const\", \"username\": \"Allan\", \"post_text\": \"Thanks Bob - I will \", \"post_time\": \"2012-09-10 18:41:09\" },\n\t{ \"post_id\": 2316, \"topic_id\": 510, \"forum_id\": 8, \"post_subject\": \"Re: OUTPUT(...NAMED('x')) 'x'is restricted to being a const\", \"username\": \"bforeman\", \"post_text\": \"Hi Allan,\\n\\nTry this:\\n\\n
t012 := RunTest('12'); OUTPUT(t012,NAMED(#TEXT(t012.Filler)));
\\n\\nYes, NAMED needs a string constant output, and #TEXT was built for this. \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-09-10 15:04:09\" },\n\t{ \"post_id\": 2307, \"topic_id\": 510, \"forum_id\": 8, \"post_subject\": \"OUTPUT(...NAMED('x')) 'x'is restricted to being a constant?\", \"username\": \"Allan\", \"post_text\": \"Hi,\\nI have a dataset with a large number of test cases in it.\\nI thought it would be good to use one of the filler fields in the test records as an readable identifier/description of the particular test exercised by that row of data.\\n\\nI could then output the said filler field as the NAMED paramter to OUTPUT, something like:\\n
\\nt012 := RunTest('12'); OUTPUT(t012,NAMED(t012.Filler));\\n
\\nBut, of course, I get error '4082 Expression Not constant.'\\n\\nThis seems a strange restriciton to have, in addition the ECL Reference manual does not mention that the 'NAMED' parameter to OUTPUT has to be constant.\\n\\nAny comments?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-09-07 09:43:28\" },\n\t{ \"post_id\": 2350, \"topic_id\": 511, \"forum_id\": 8, \"post_subject\": \"Re: Issue in nested PROJECT-TRANSFORM\", \"username\": \"ghalliday\", \"post_text\": \"I think you have hit a fairly obscure bug where LEFT is ambiguous. In 3.6 it is differentiated by the record type, but that can cause problems - with nested loops and with examples like this.\\n\\nA fix is included in 3.8, so your original example should work on a new system.\", \"post_time\": \"2012-09-14 19:17:03\" },\n\t{ \"post_id\": 2324, \"topic_id\": 511, \"forum_id\": 8, \"post_subject\": \"Re: Issue in nested PROJECT-TRANSFORM\", \"username\": \"jeeves\", \"post_text\": \"I think you might be right..\", \"post_time\": \"2012-09-12 07:13:53\" },\n\t{ \"post_id\": 2322, \"topic_id\": 511, \"forum_id\": 8, \"post_subject\": \"Re: Issue in nested PROJECT-TRANSFORM\", \"username\": \"bforeman\", \"post_text\": \"Yes, initially when I read that you were trying to join two datasets together, I just immediately offered a solution. JOIN is the best function for joining two datasets. \\n\\nReading the docs for PROJECT, I believe that you are using it incorrectly in the TRANSFORM. It is only designed to process a single recordset, and the second of the TRANSFORM that PROJECT calls is an optional COUNTER, or a GROUPed recordset. The compiler probably assumed that SELF.forDebug was the current GROUP in the recordset. \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-09-11 12:22:36\" },\n\t{ \"post_id\": 2321, \"topic_id\": 511, \"forum_id\": 8, \"post_subject\": \"Re: Issue in nested PROJECT-TRANSFORM\", \"username\": \"jeeves\", \"post_text\": \"Yes. This works. But I am still wondering why my first post did not work. It indicates that either I have misunderstood something or there is a bug!\", \"post_time\": \"2012-09-11 05:35:41\" },\n\t{ \"post_id\": 2317, \"topic_id\": 511, \"forum_id\": 8, \"post_subject\": \"Re: Issue in nested PROJECT-TRANSFORM\", \"username\": \"bforeman\", \"post_text\": \"How about this?\\n\\n
Word1 :=['h','e','l','l','o'];\\n\\nWord2 := ['a','b'];\\n\\nR_word1 := {STRING char};\\n\\nR_word2 := {STRING char};\\n\\nR_word3 := {STRING char};\\n\\nWord1_dataset := DATASET(Word1,R_word1);\\nWord2_dataset := DATASET(Word2,R_word2);\\n\\nR_word3 mix(R_word1 L,R_word1 R) := TRANSFORM\\n \\n self.char := L.char+R.char;\\n \\n END;\\n\\n\\nmyout := JOIN(Word1_dataset,Word2_dataset,TRUE,Mix(LEFT,RIGHT),INNER,ALL);\\n\\nmyout;
\\n\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-09-10 15:28:25\" },\n\t{ \"post_id\": 2308, \"topic_id\": 511, \"forum_id\": 8, \"post_subject\": \"Issue in nested PROJECT-TRANSFORM\", \"username\": \"jeeves\", \"post_text\": \"I tried combining two data sets [‘h’,’e’,’l’,’l’,’o’] and [‘a’,’b’]\\n\\nExpecting to get something like\\n\\n[[‘ha’],[‘hb’]], [[‘ea’],[‘eb’]], [[‘la’],[‘lb’]] ..\\n\\nBut I end up with..\\n\\n[[‘aa’],[‘bb’]], [[‘aa’],[‘bb’]], [[‘aa’],[‘bb’]] ..\\n\\nI am using the community edition (HPCCSystemsVM-3.6.2.3)\\n\\nThe code is given below\\n=======================\\n\\nWord1 :=['h','e','l','l','o'];\\n\\nWord2 := ['a','b'];\\n\\nR_word1 := {STRING char};\\n\\nR_word2 := {STRING char};\\n\\n\\nWord1_dataset := DATASET(Word1,R_word1);\\n\\nWord2_dataset := DATASET(Word2,R_word2);\\n\\nR_result := RECORD\\n\\nResult_dataset := DATASET([],R_word2);\\nSTRING forDebug; \\n\\nEND;\\n\\nR_word2 mix(R_word2 rec,String str) :=\\n\\tTRANSFORM\\n\\t\\n\\tself.char := rec.char+str;\\n\\t\\n\\tEND;\\n\\t\\n R_result mixAndStore(R_word1 rec) :=\\n\\tTRANSFORM\\n\\t\\n\\tSELF.forDebug := rec.char;\\n\\t\\n\\tSELF.Result_dataset :=\\tPROJECT(Word2_dataset,mix(LEFT,SELF.forDebug));\\n\\t\\n\\t//self.char := rec.char+rec.char;\\n\\t\\n\\t\\n\\tEND;\\n\\t\\n\\n\\n//projected_dataset := PROJECT \\n\\nmixedWord := PROJECT(Word1_dataset,mixAndStore(LEFT));\\n\\nOUTPUT(mixedWord);\\n\\n
\\n\\nIf I change \\n\\nR_word1 := {STRING char};\\nR_word2 := {STRING char};\\n
\\nto\\n\\nR_word1 := {STRING char1};\\nR_word2 := {STRING char2};\\n
\\nand make the corresponding changes elsewhere the code starts to work as expected\", \"post_time\": \"2012-09-07 09:47:34\" },\n\t{ \"post_id\": 2353, \"topic_id\": 512, \"forum_id\": 8, \"post_subject\": \"Re: Datasets in virtual modules\", \"username\": \"sbagaria\", \"post_text\": \"Thanks. Both suggestions work!\", \"post_time\": \"2012-09-14 19:26:21\" },\n\t{ \"post_id\": 2351, \"topic_id\": 512, \"forum_id\": 8, \"post_subject\": \"Re: Datasets in virtual modules\", \"username\": \"ghalliday\", \"post_text\": \"I haven't checked, but I think the following might work:\\n\\n\\nMdata := module(Idata)\\n export dataset(rec) attrib := dataset([{1,2}],rec);\\nend;\\n
\\n\\nIf that doesn't try\\n\\n\\nMdata := module(Idata)\\n export dataset attrib := dataset([{1,2}],rec);\\nend;\\n
\\n\\nThe problem is an ambiguity in the grammar which I haven't found a simple solution to. (The full solution is to rewrite it, but that's quite a big project!)\", \"post_time\": \"2012-09-14 19:20:54\" },\n\t{ \"post_id\": 2319, \"topic_id\": 512, \"forum_id\": 8, \"post_subject\": \"Re: Datasets in virtual modules\", \"username\": \"bforeman\", \"post_text\": \"I think the compiler is telling you that the use of DATASET in this context is not supported. In your INTERFACE, it looks like you are using a nested child DATASET definition, but then in the module you are using inline DATASET syntax.\\n\\nI could not find any reference in the docs that supported what you are trying to do.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-09-10 18:26:44\" },\n\t{ \"post_id\": 2309, \"topic_id\": 512, \"forum_id\": 8, \"post_subject\": \"Datasets in virtual modules\", \"username\": \"sbagaria\", \"post_text\": \"Hi!\\n\\nI don't know if this is supposed to work or not. Or if I am doing something really stupid. But here is my code.\\n\\nrec := RECORD\\n\\tinteger a;\\n\\tinteger b;\\nEND;\\n\\nIdata := interface\\n\\texport dataset(rec) attrib;\\nend;\\n\\nMdata := module(Idata)\\n\\texport attrib := dataset([{1,2}],rec);\\nend;\\n\\nMdata.attrib;
\\n\\nAnd here's the error.\\n\\nError: syntax error near ":=" : expected datarow, identifier, pattern-name, action, pattern (11, 16), 3002, \\nError: Cannot use an abstract MODULE in this context (attrib undefined) (14, 1), 2343,
\\n\\nThe second error is understandably the consequence of the first. But what is the compiler telling me in the first error?\", \"post_time\": \"2012-09-07 09:59:18\" },\n\t{ \"post_id\": 2355, \"topic_id\": 517, \"forum_id\": 8, \"post_subject\": \"Re: ecl complains about .ecl file being larger than 4Gb\", \"username\": \"sbagaria\", \"post_text\": \"Will do. Thanks!\", \"post_time\": \"2012-09-14 19:34:06\" },\n\t{ \"post_id\": 2354, \"topic_id\": 517, \"forum_id\": 8, \"post_subject\": \"Re: ecl complains about .ecl file being larger than 4Gb\", \"username\": \"bforeman\", \"post_text\": \"Yes, please log it as a bug, Gavin requested it I think.\\n\\nThanks!\\n\\nBob\", \"post_time\": \"2012-09-14 19:32:41\" },\n\t{ \"post_id\": 2349, \"topic_id\": 517, \"forum_id\": 8, \"post_subject\": \"Re: ecl complains about .ecl file being larger than 4Gb\", \"username\": \"sbagaria\", \"post_text\": \"Did you want me to log a bug?\", \"post_time\": \"2012-09-14 19:16:03\" },\n\t{ \"post_id\": 2348, \"topic_id\": 517, \"forum_id\": 8, \"post_subject\": \"Re: ecl complains about .ecl file being larger than 4Gb\", \"username\": \"sbagaria\", \"post_text\": \"Moving the files to a different location resolved the error. So I can confirm this was the cause.\", \"post_time\": \"2012-09-14 19:14:54\" },\n\t{ \"post_id\": 2347, \"topic_id\": 517, \"forum_id\": 8, \"post_subject\": \"Re: ecl complains about .ecl file being larger than 4Gb\", \"username\": \"ghalliday\", \"post_text\": \"Even if that is the cause it would be worth logging it as a bug, and I can improve the error message.\", \"post_time\": \"2012-09-14 19:13:49\" },\n\t{ \"post_id\": 2344, \"topic_id\": 517, \"forum_id\": 8, \"post_subject\": \"Re: ecl complains about .ecl file being larger than 4Gb\", \"username\": \"sbagaria\", \"post_text\": \"Ah!! I think you spotted the problem. \\n\\nThe file system being used is VMware Host Guest File System. That could very well be the culprit. Let me ask my user to move over the folders somewhere else.\\n\\nThe other laptop had the files in a different place.\\n\\nMany thanks! \", \"post_time\": \"2012-09-14 14:08:14\" },\n\t{ \"post_id\": 2342, \"topic_id\": 517, \"forum_id\": 8, \"post_subject\": \"Re: ecl complains about .ecl file being larger than 4Gb\", \"username\": \"ghalliday\", \"post_text\": \"Nothing has changed in that piece of code for a long while.\\n\\nThe only thing I can think of is that when the size of the file is requested it comes back as -1 for some reason.\\nIs the file local or remote? If remote what file system (NFS?).\", \"post_time\": \"2012-09-14 13:30:26\" },\n\t{ \"post_id\": 2339, \"topic_id\": 517, \"forum_id\": 8, \"post_subject\": \"Re: ecl complains about .ecl file being larger than 4Gb\", \"username\": \"bforeman\", \"post_text\": \"It's quite possible that we had a bug that was fixed in the later version. Checking with development now.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-09-14 12:13:49\" },\n\t{ \"post_id\": 2335, \"topic_id\": 517, \"forum_id\": 8, \"post_subject\": \"ecl complains about .ecl file being larger than 4Gb\", \"username\": \"sbagaria\", \"post_text\": \"What could possibly cause this error on a laptop running Ubuntu 12.04 with HPCC Platform community_3.8.0-1 installed on it?\\n\\n
\\nsbagaria@ubuntu:/mnt/hgfs/HPCC/trunk/Citi$ make allsignals ecl run --server=xxx.xx.xx.xxx --cluster=thor --username=sbagaria --password= -I"/mnt/hgfs/HPCC/trunk" --limit=0 "/mnt/hgfs/HPCC/trunk/Project/Signals/BWR_Output.ecl"\\nFile /mnt/hgfs/HPCC/trunk/Project/Signals/BWR_Output.ecl is larger than 4Gb\\n
\\n\\nThe same command will work perfectly on another laptop running Ubuntu 12.04 with HPCC Platform community_3.8.2-1.\", \"post_time\": \"2012-09-13 18:23:45\" },\n\t{ \"post_id\": 2343, \"topic_id\": 518, \"forum_id\": 8, \"post_subject\": \"Re: IFBLOCK in roxie queries\", \"username\": \"sbagaria\", \"post_text\": \"Can I take the default values of the fields from the stored variables? If I can do that, then I can still use the IFBLOCK by having another boolean field in the layout. I am guessing that this is not possible as default values for fields should be compile time constants.\\n\\nI hope it is clear that I am trying for the user to select which columns he wants to see in his output.\", \"post_time\": \"2012-09-14 14:04:33\" },\n\t{ \"post_id\": 2341, \"topic_id\": 518, \"forum_id\": 8, \"post_subject\": \"Re: IFBLOCK in roxie queries\", \"username\": \"ghalliday\", \"post_text\": \"There is a restriction on the conditions for IFBLOCKs - the condition can only be based on the previous fields in the record, rather than stored variables.\\n\\nIn some cases an alternative way of solving the problem is to define record structures for the different options, and then define datasets which project the records to a common format, and then select between those datasets.\\n\\nThat isn't going to work in this situation though. I can't immediately think of a good alternative.\\n\\nThe error message should be improved though.\", \"post_time\": \"2012-09-14 13:17:02\" },\n\t{ \"post_id\": 2340, \"topic_id\": 518, \"forum_id\": 8, \"post_subject\": \"Re: IFBLOCK in roxie queries\", \"username\": \"bforeman\", \"post_text\": \"Checking with our development team now. Thanks for your post!\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-09-14 12:36:12\" },\n\t{ \"post_id\": 2336, \"topic_id\": 518, \"forum_id\": 8, \"post_subject\": \"IFBLOCK in roxie queries\", \"username\": \"sbagaria\", \"post_text\": \"This code syntax checks but gives a compilation error.\\n\\niVariables := INTERFACE\\n\\tEXPORT INTEGER num;\\n\\tEXPORT BOOLEAN doSquare;\\nEND;\\n\\nStoredVariables := STORED(IVariables);\\n\\nOutputLayout := RECORD\\n\\tINTEGER num;\\n\\tIFBLOCK(StoredVariables.doSquare) INTEGER numSquare; END;\\nEND;\\n\\nOutputLayoutAll := RECORD\\n\\tINTEGER num;\\n\\tINTEGER numSquare;\\nEND;\\n\\nnum := StoredVariables.num;\\nnumSquare := StoredVariables.num*StoredVariables.num;\\n\\nOUTPUT(PROJECT(DATASET([{num,numSquare}],OutputLayoutAll),OutputLayout));
\\n\\nThe error is:\\n\\nWarning: Mismatch in subminor version number (3.6.0 v 3.6.1) (0, 0 - unknown)\\nError: Unexpected operator ':' in: HqlCppTranslator::buildExpr(EXPORT boolean dosquare := false : STORED('dosquare'); (21, 8 - C:\\\\DOCUME~1\\\\SIDDHA~1.BAG\\\\LOCALS~1\\\\Temp\\\\TFR401D.tmp)\\nWarning: \\nWarning: dosquare;\\nWarning: )
\\n\\nIs it not supposed to work?\", \"post_time\": \"2012-09-13 18:31:44\" },\n\t{ \"post_id\": 2392, \"topic_id\": 520, \"forum_id\": 8, \"post_subject\": \"Re: How to set 'NULL' for INTEGER/REAL attributes in ECL?\", \"username\": \"ghalliday\", \"post_text\": \"For real numbers you can cheat and use NaNs. You can then use the function ISVALID() to check if it is a valid real number (which excluded Nans). \\n\\nYou can create a Nan using code like the following:\\n\\n\\ntransfer(x'0100807F', real4),\\ntransfer(x'010000000000F07F', real8)\\n
\\n\\nSee example ecl/regress/isvalid2.ecl in the source code for some more examples. \\n(Not all the features in there are necessarily documented/supported.)\\n\\nThe problem with integers is that there is no "invalid" representation, so you generally need to use special legal values, or extra booleans.\", \"post_time\": \"2012-09-19 08:59:47\" },\n\t{ \"post_id\": 2364, \"topic_id\": 520, \"forum_id\": 8, \"post_subject\": \"Re: How to set 'NULL' for INTEGER/REAL attributes in ECL?\", \"username\": \"bforeman\", \"post_text\": \"So we have to use a additional flag to identify the NULL on the numeric fields currently
\\n\\nAfter chatting with several ECL developers, that seems the best strategy to use.\\nIf you wish to store your own nulls – using a Boolean to do this makes perfect sense.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-09-17 13:27:37\" },\n\t{ \"post_id\": 2363, \"topic_id\": 520, \"forum_id\": 8, \"post_subject\": \"Re: How to set 'NULL' for INTEGER/REAL attributes in ECL?\", \"username\": \"dsun\", \"post_text\": \"Thanks Bob for you information.\\nSo we have to use a additional flag to identify the NULL on the numeric fields currently.\\n\\nI'm checking with some other developers now to research how this would be traditionally handled.
\\nWaiting for the progress on this topic.\\n\\nThanks,\\nDongliang\", \"post_time\": \"2012-09-17 13:15:22\" },\n\t{ \"post_id\": 2362, \"topic_id\": 520, \"forum_id\": 8, \"post_subject\": \"Re: How to set 'NULL' for INTEGER/REAL attributes in ECL?\", \"username\": \"bforeman\", \"post_text\": \"There is no NULL support in ECL value types. All empty strings are treated as blanks, and all empty numeric types are treated as zero. \\n\\nPerhaps you just need to set some BYTE fields that mark another field as "NULL", and the when you export the file to your external SQL source handle the NULL condition there.\\n\\nI'm checking with some other developers now to research how this would be traditionally handled.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-09-17 12:15:31\" },\n\t{ \"post_id\": 2360, \"topic_id\": 520, \"forum_id\": 8, \"post_subject\": \"How to set 'NULL' for INTEGER/REAL attributes in ECL?\", \"username\": \"dsun\", \"post_text\": \"Hi, \\n\\nFor some business logic requirement, I need set the INTEGER/REAL attribute to NULL, but it seems I could not make it, I have try several approaches, but do not work, the default value is '0.0' for REAL and 0 for INTEGER.\\n\\nI have tried the code like this : REAL4 val := (REAL4)'';\\n\\nCould you provide some help on this?\\n\\nThanks,\\nDongliang\", \"post_time\": \"2012-09-17 08:10:51\" },\n\t{ \"post_id\": 2402, \"topic_id\": 521, \"forum_id\": 8, \"post_subject\": \"Re: Need help on Operating on a Child DataSet and return a v\", \"username\": \"ghalliday\", \"post_text\": \"The problem with using COUNTER is that it would be a bit ambiguous. Aggregates are generally done locally and then merged between the nodes. We would need to arrange it so that COUNTER started from 1 on each of the nodes, which is what you want, but doesn't fit in the its semantics elsewhere.\\n\\nThen when you are merging two partial results, what is the value of COUNTER? You would need COUNTER(LEFT) and COUNTER(RIGHT).\\n\\nI think adding an extra field is the best workaround if non-zero values are allowed, until there is support for default-values.\", \"post_time\": \"2012-09-20 11:42:19\" },\n\t{ \"post_id\": 2401, \"topic_id\": 521, \"forum_id\": 8, \"post_subject\": \"Re: Need help on Operating on a Child DataSet and return a v\", \"username\": \"dsun\", \"post_text\": \"1. \\n\\nThe code isn't ideal. The problem is there is no way of specifying the initial values for the fields (in this case 1). To work around it the code above needs to perform a test if the value is 0, and if so assume it was 1.\\n
\\n\\nThat's true, but the work around above is not good enough, if in some time, the value is 0, however, it's already not the initial case, then it will bring some problem.\\n\\nI think we add one more field ('cnt' in below code) to identify the initial case make more sense:\\n\\n inRec := { real value; };\\n ds := dataset([0.99,0.80,0.50,2.0], inRec);\\n\\n // Add one more field to work around the initial problem\\n resultRec := { real value, INTEGER cnt};\\n\\t\\t\\n //Code that corresponds to the work you need to do...\\n resultRec t1(inRec l, resultRec r) := TRANSFORM\\n prevValue := IF(r.cnt = 0, 1, r.value);\\n SELF.value := prevValue * (1+l.value/100);\\n\\t\\t\\t\\tSELF.cnt := r.cnt + 1;\\n END;\\n\\n filtered := ds(ISVALID(value));\\n agg := AGGREGATE(filtered, resultRec, t1(LEFT,RIGHT));\\n\\n result := agg[1].value;\\n output(result);\\n
\\n\\n2.\\n\\nI have already seen something very similar in another situation, and think it should be addressed by allowing the ECL user to default the default values for the output rows. (Probably via another TRANSFORM.)\\nI have opened issue http://track.hpccsystems.com/browse/HPCC-7844 to make sure it doesn't get lost.\\n
\\n\\nI'm not sure whether we can add the 'COUNTER' in the nested 'TRANSFORM' of 'AGGREGATE', which can implement the indicator for the initial case.\\n\\nAlso add the comment in the JIRA.\", \"post_time\": \"2012-09-20 07:37:18\" },\n\t{ \"post_id\": 2389, \"topic_id\": 521, \"forum_id\": 8, \"post_subject\": \"Re: Need help on Operating on a Child DataSet and return a v\", \"username\": \"ghalliday\", \"post_text\": \"This is one of the reasons the AGGREGATE operator was introduced. The following code produces something close to what you want\\n\\n\\ninRec := { real value; };\\nds := dataset([0.99,0.80,0.50,2.0], inRec);\\n\\nresultRec := { real value };\\n\\n//Code that corresponds to the work you need to do...\\nresultRec t1(inRec l, resultRec r) := TRANSFORM\\n prevValue := IF(r.value = 0, 1, r.value);\\n SELF.value := prevValue * l.value;\\nEND;\\n\\nfiltered := ds(ISVALID(value));\\nagg := AGGREGATE(filtered, resultRec, t1(LEFT,RIGHT));\\n\\nresult := agg[1].value;\\noutput(result);\\n\\n
\\n\\nA couple things to note:\\nISVALID is true unless the number is a Nan. I'm not sure if that is documented.\\nThe code isn't ideal. The problem is there is no way of specifying the initial values for the fields (in this case 1). To work around it the code above needs to perform a test if the value is 0, and if so assume it was 1.\\n\\nI have already seen something very similar in another situation, and think it should be addressed by allowing the ECL user to default the default values for the output rows. (Probably via another TRANSFORM.)\\nI have opened issue http://track.hpccsystems.com/browse/HPCC-7844 to make sure it doesn't get lost.\", \"post_time\": \"2012-09-19 07:35:14\" },\n\t{ \"post_id\": 2367, \"topic_id\": 521, \"forum_id\": 8, \"post_subject\": \"Re: Need help on Operating on a Child DataSet and return a v\", \"username\": \"rtaylor\", \"post_text\": \"Dongliang,I have tried to nest the Transform/Aggregation on a Transform to get it, but it does not work,
Could you please post the ECL code that did not work (you might have been closer than you thought)? \\n\\nRichard\", \"post_time\": \"2012-09-18 14:17:26\" },\n\t{ \"post_id\": 2365, \"topic_id\": 521, \"forum_id\": 8, \"post_subject\": \"Need help on Operating on a Child DataSet and return a value\", \"username\": \"dsun\", \"post_text\": \"Hi,\\n\\nAssume that in a RecordSet, we have a ChildSet, and I want to operate on the elements in the ChildSet and apply the following logic:\\n\\n\\t\\t// double[] childSet;\\n\\t\\tdouble r = 1;\\n\\t\\tfor (int i = 0; i < childSet.length; i++)\\n\\t\\t{\\n\\t\\t\\tdouble y = childSet[i];\\n\\t\\t\\tif (Double.compare(y, Double.NaN)!=0)\\n\\t\\t\\t{\\n\\t\\t\\t\\tr = r * (1 + y / 100);\\n\\t\\t\\t}\\n\\t\\t}\\n\\n\\t\\treturn r;\\n
\\n\\nI have tried to nest the Transform/Aggregation on a Transform to get it, but it does not work, is there a good structure(Loop or anyelse) to achieve this and any sample code for this?\\n\\nThanks,\\nDongliang\", \"post_time\": \"2012-09-18 03:05:24\" },\n\t{ \"post_id\": 2386, \"topic_id\": 522, \"forum_id\": 8, \"post_subject\": \"Re: PARSE and DISTRIBUTE\", \"username\": \"DSC\", \"post_text\": \"Well, the 51 files (not 58 -- I counted them) don't have the same structure. The incoming data is a fairly complex XML document that I'm breaking up into 51 different XML nodes. Those nodes are further parsed into different record layouts, depending on their node contents. Furthermore, the rollups of each superfile can have different priorities and timings. This additional complexity is why I'm splitting this stuff up in this manner.\\n\\nAll of that said, I'm sure there is a method more compatible with The ECL Way™ out there. I just need to find it. It apparently is not what I'm doing now, which should reduce the answer space by a tiny margin.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-09-18 19:19:00\" },\n\t{ \"post_id\": 2384, \"topic_id\": 522, \"forum_id\": 8, \"post_subject\": \"Re: PARSE and DISTRIBUTE\", \"username\": \"rtaylor\", \"post_text\": \"Dan,As a matter of fact, I am writing 58 logical files. They wind up in superfiles and are processed later in batches. It's turning an incoming data scenario 90 degrees, more or less.
Well then, since they all have exactly the same structure and are all going to be treated as the same logical file anyways, why bother to write them to separate files when you're creating them all in a single process? Just create 58 recordset definitions and append them together (using either the + or the & operator) to write the data as one file on disk. \\n\\nOr possibly, since you're working with XML data, you could accomplish splitting out all 58 new "one piece of data and a UID" records from each input record in one pass of a properly configured PARSE? That's the approach I would think long and hard about. Something like this:\\nds := dataset([{1,'<tag1>data11</tag1><tag2>data21</tag2><tag3>data31</tag3>'},\\n {2,'<tag1>data12</tag1><tag2>data22</tag2><tag3>data32</tag3>'},\\n {3,'<tag1>data13</tag1><tag2>data23</tag2><tag3>data33</tag3>'}],\\n {UNSIGNED1 UID, STRING xmldata});\\n\\nPATTERN Alphameric := PATTERN('[a-zA-Z0-9]')+;\\nPATTERN MyData := '>' Alphameric '<';\\n\\nOutRec := RECORD\\n UNSIGNED1 UID; \\n\\tSTRING Dat;\\nEND;\\n\\nOutRec XF(ds L) := TRANSFORM\\n SELF.UID := L.UID;\\n\\tSELF.Dat := MATCHTEXT(Alphameric);\\nEND;\\n\\nres := PARSE(ds,xmldata,MyData,XF(LEFT));\\t\\n\\nres;
\\nWhat other 'wrapper' can I use for actions besides PARALLEL or SEQUENTIAL?
None that I know of. But the "hammer" is irrelevant when it's a "screwdriver" that you need.\\n
And here I thought I was doing the right thing.... Sigh.
Welcome to MY world! Every day I learn something new about this stuff. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-09-18 19:02:46\" },\n\t{ \"post_id\": 2382, \"topic_id\": 522, \"forum_id\": 8, \"post_subject\": \"Re: PARSE and DISTRIBUTE\", \"username\": \"DSC\", \"post_text\": \"As a matter of fact, I am writing 58 logical files. They wind up in superfiles and are processed later in batches. It's turning an incoming data scenario 90 degrees, more or less.\\n\\nWhat other 'wrapper' can I use for actions besides PARALLEL or SEQUENTIAL?\\n\\nAnd here I thought I was doing the right thing.... Sigh.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-09-18 18:27:50\" },\n\t{ \"post_id\": 2381, \"topic_id\": 522, \"forum_id\": 8, \"post_subject\": \"Re: PARSE and DISTRIBUTE\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nHere's my usual question -- what exactly are you trying to accomplish here?
\\nAre you actually writing 58 separate files to disk?\\n\\nAnd, what happens if you remove all the SEQUENTIAL and PARALLEL actions and just let the compiler figure out what to do (usually the first/best recourse)? \\n\\nRichard\", \"post_time\": \"2012-09-18 18:19:38\" },\n\t{ \"post_id\": 2380, \"topic_id\": 522, \"forum_id\": 8, \"post_subject\": \"Re: PARSE and DISTRIBUTE\", \"username\": \"DSC\", \"post_text\": \"I must need a bit more sleep or something.\\n\\nIf you wrap PARALLEL around a bunch of SEQUENTIALs then the behavior (and graphs) make it look like PARALLEL doesn't exist.\\n\\nIf you wrap SEQUENTIAL around a bunch of PARALLELs you get the right-size chunks. In my case, it's 10 graphs, executed in order.\\n\\nFrom a performance (system management) perspective, I would prefer to have a 10 chunks executing in parallel, each executing their tasks sequentially. That's my goal, but I can't quite seem to make it work.\\n\\nDan\", \"post_time\": \"2012-09-18 18:11:38\" },\n\t{ \"post_id\": 2374, \"topic_id\": 522, \"forum_id\": 8, \"post_subject\": \"Re: PARSE and DISTRIBUTE\", \"username\": \"DSC\", \"post_text\": \"I did almost exactly that (10 SEQUENTIALs embedded within a single PARALLEL wrapper). That's what resulted in 58 independent graphs, as if the PARALLEL didn't exist. That's why I was hoping there was a different way of doing this.\\n\\nNow, I am actually doing all of this within a function. Here is an excerpted version:\\n\\n
EXPORT\\tProcessFile(STRING logicalFilePath) := FUNCTION\\n\\t// ...\\n\\tdistributedData := DISTRIBUTE(canonicalData,HASH32(uniqueID));\\n\\t\\n\\t// Unique ID to append to each created MPF file\\n\\tfileID := (STRING)Std.System.Util.GetUniqueInteger() : INDEPENDENT;\\n\\t\\n\\t// Actions for creating MPFs from the incoming data\\n\\totherActions := PARALLEL\\t\\t(\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tMPFLib.Other.PayloadHeader.MPF.ProcessIncomingData(distributedData,fileID)\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t);\\n\\t\\n\\t// ...\\n\\t\\n\\tfinActions := PARALLEL\\t\\t\\t(\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tMPFLib.Financials.FinancialStatement.MPF.ProcessIncomingData(distributedData,fileID),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tMPFLib.Financials.FinancialNorm.MPF.ProcessIncomingData(distributedData,fileID)\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t);\\n\\t\\n\\t// ...\\n\\t\\n\\tallActions := PARALLEL\\t\\t\\t(\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\totherActions,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t// ...\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tfinActions\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t);\\n\\t\\n\\tRETURN allActions;\\nEND;
\\n\\nSimply compiling different combinations of SEQUENTIAL and PARALLEL and then looking at the graphs show this behavior. Pretty much any introduction of SEQUENTIAL creates individual graphs for smallest action.\\n\\nCould that create a different call graph than just placing them in a bare file?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-09-18 17:49:11\" },\n\t{ \"post_id\": 2371, \"topic_id\": 522, \"forum_id\": 8, \"post_subject\": \"Re: PARSE and DISTRIBUTE\", \"username\": \"bforeman\", \"post_text\": \"Is there a pattern to breaking up tasks like this? Ideally it would be a chunk of 4-6 parallel tasks each executing sequential tasks. I don't see a way of doing that, though.\\n\\n
\\n\\nHow about this?\\n\\na := OUTPUT('a');\\nb := OUTPUT('b');\\nc := OUTPUT('c');\\nd := OUTPUT('d');\\ne := OUTPUT('e');\\nf := OUTPUT('f');\\n\\nPARALLEL(SEQUENTIAL(a,b),SEQUENTIAL(c,d),SEQUENTIAL(e,f));
\\n\\nBut I think that implicitly:\\n\\nSEQUENTIAL(a,b)\\nSEQUENTIAL(c,d)\\nSEQUENTIAL(e,f)
\\n\\nwill give you the same result.\\n\\nChecking my timings they are almost exact \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-09-18 17:32:41\" },\n\t{ \"post_id\": 2370, \"topic_id\": 522, \"forum_id\": 8, \"post_subject\": \"Re: PARSE and DISTRIBUTE\", \"username\": \"DSC\", \"post_text\": \"Good to hear, Bob. Thanks! I do have a follow up question, though, unrelated to the previous question but within this flow.\\n\\nI have a large number of these subtasks that I'm executing in parallel. 58 of them, I believe. If I actually wrap PARALLEL around them all I see a split in the graph and then all of them executing at once. At this point my systems guys come barging in, asking why I've driven the load on my nodes to over 40.\\n\\nSo I've been experimenting with trying to break up the task into more manageable chunks but I haven't been successful yet. Introducing SEQUENTIAL into the mix anywhere tends to make all 58 tasks run sequentially, from beginning to end. INDEPENDENT is nearly useless, as the first place I can really use it is after the very efficient initial read, and the output is just a spill file that has to be read again anyway. I seem to remember seeing a github task for commoning-up code within sequential tasks where possible, but I don't think that's in the current release.\\n\\nIs there a pattern to breaking up tasks like this? Ideally it would be a chunk of 4-6 parallel tasks each executing sequential tasks. I don't see a way of doing that, though.\\n\\nThanks again!\\n\\nDan\", \"post_time\": \"2012-09-18 17:19:33\" },\n\t{ \"post_id\": 2369, \"topic_id\": 522, \"forum_id\": 8, \"post_subject\": \"Re: PARSE and DISTRIBUTE\", \"username\": \"bforeman\", \"post_text\": \"Hi Dan,\\n\\nI think that the DISTRIBUTE should be fine as you suggest. You're using the XML version of the PARSE statement so each record is treated and processed by the row tag and you should have great performance. Please let me know if you have any issues, but I think your design flow is sound.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-09-18 17:07:15\" },\n\t{ \"post_id\": 2366, \"topic_id\": 522, \"forum_id\": 8, \"post_subject\": \"PARSE and DISTRIBUTE\", \"username\": \"DSC\", \"post_text\": \"I'm currently working with this kind of scenario:\\n\\n1. Given a bunch of XML documents.\\n2. Construct a recordset with only a unique ID (extracted via XPATH) and the full XML saved as a single UNICODE field.\\n3. In parallel, pass the records in that recordset to several separate functions that extract various portions of each XML document and save them into separate datasets, along with the unique ID.\\n\\nMy desire is to DISTRIBUTE the records in each dataset so that XML data with the same ID wind up on the same HPCC node. My question is, can I perform the DISTRIBUTE once, during step #1, and assume that the subsequent PARSE in step #3 retains the distribution? Or do I need to DISTRIBUTE after the PARSE? There is no LOCAL option to PARSE and the documentation doesn't talk about distributions, or at least I didn't see anything.\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-09-18 12:53:37\" },\n\t{ \"post_id\": 2395, \"topic_id\": 523, \"forum_id\": 8, \"post_subject\": \"Re: IBM mainframe VB files\", \"username\": \"sbagaria\", \"post_text\": \"Agreed.
\", \"post_time\": \"2012-09-19 11:54:22\" },\n\t{ \"post_id\": 2394, \"topic_id\": 523, \"forum_id\": 8, \"post_subject\": \"Re: IBM mainframe VB files\", \"username\": \"bforeman\", \"post_text\": \"Thanks for your input Gavin. I wish these forum posts had a "Like" button
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-09-19 11:53:29\" },\n\t{ \"post_id\": 2391, \"topic_id\": 523, \"forum_id\": 8, \"post_subject\": \"Re: IBM mainframe VB files\", \"username\": \"sbagaria\", \"post_text\": \"Thanks Gavin!\\n\\nI am going to try it out.\", \"post_time\": \"2012-09-19 08:55:45\" },\n\t{ \"post_id\": 2390, \"topic_id\": 523, \"forum_id\": 8, \"post_subject\": \"Re: IBM mainframe VB files\", \"username\": \"ghalliday\", \"post_text\": \"There are different levels of answer I think.\\n\\nThe most basic is to define a file with a single field of a user-defined datatype in it. Use that to read the blocks and then manually split them apart.\\n\\nSomething along the lines of the following for reading a 1 byte prefixed string.\\n\\n
\\nexport pstring := type\\n export integer physicallength(string x) := transfer(x[1], unsigned1)+1;\\n export string load(string x) := x[2..transfer(x[1], unsigned1)+1];\\n export string store(string x) := transfer(length(x), string1)+x;\\n export integer maxLength := 256;\\nend;\\n
\\n\\nThe next step is to use a dataset to split the blocks apart. I think the following should work.\\n\\n\\nvariableRecord := RECORD\\n BIG_ENDIAN INTEGER2 recordLength;\\n //replace with the real contents of your records\\n STRING10 id;\\n IFBLOCK (SELF.id <> '')\\n STRING5 name;\\n END;\\n //end of variable record\\nEND;\\n\\n\\nblockRecord := RECORD\\n BIG_ENDIAN INTEGER4 blockSize;\\n DATASET(variableRecord, SIZEOF(SELF.blockSize)) blockrecords;\\nEND;\\n
\\n\\nI would also recommend projecting your data as soon as you can (i.e. straight away) into a RECORD that doesn't use the ,SIZE(SELF.xxx) attribute on the DATASET, and then normalizing the records out of the blocks.\\n\\nThat should involve something like the following:\\n\\nsimpleRecord := RECORD\\n DATASET(variableRecord) blockrecords;\\nEND;\\n\\nds := DATASET('ds', blockRecord, THOR);\\np := PROJECT(ds, TRANSFORM(simpleRecord, SELF := LEFT));\\nrecords := p.blockRecords;\\n
\\n\\nCaveats:\\nI'm not an expert on IBM format (e.g., endianness and sizes of the size fields), I haven't tested the code, etc. etc. Hopefully it should get you going.\\n\\nThere may be an option to remove the block headers when spraying which may make things easier, although the code above should be reasonably efficient.\", \"post_time\": \"2012-09-19 08:51:28\" },\n\t{ \"post_id\": 2388, \"topic_id\": 523, \"forum_id\": 8, \"post_subject\": \"Re: IBM mainframe VB files\", \"username\": \"sbagaria\", \"post_text\": \"Can I give arbitrary byte patterns as line terminators for the spray operation?\", \"post_time\": \"2012-09-19 07:29:23\" },\n\t{ \"post_id\": 2387, \"topic_id\": 523, \"forum_id\": 8, \"post_subject\": \"Re: IBM mainframe VB files\", \"username\": \"sbagaria\", \"post_text\": \"I think there are problems with the line terminators in my file. I will have to fix that first and then try your solution.\\n\\nBy the way, is there a way to deal with files which have the length of each line as the first 4 bytes of the line, and no line terminators and field separators. There is only one variable length field at the end. Traditionally, that is the true representation of recfmv format. (Figure 5-1 in http://www.redbooks.ibm.com/redbooks/SG ... wwhelp.htm)\\n\\nI am hoping this is supported because there is an option in dfuplus for spraying recfmv files.\\n\\nI know this is a wild hope but if this works, then we can ask the clients to provide us with the data in its original format.\", \"post_time\": \"2012-09-19 07:22:51\" },\n\t{ \"post_id\": 2385, \"topic_id\": 523, \"forum_id\": 8, \"post_subject\": \"Re: IBM mainframe VB files\", \"username\": \"bforeman\", \"post_text\": \"Yes, I think you need to define the SEPARATOR('') and TERMINATOR in the DATASET statement.\", \"post_time\": \"2012-09-18 19:15:40\" },\n\t{ \"post_id\": 2383, \"topic_id\": 523, \"forum_id\": 8, \"post_subject\": \"Re: IBM mainframe VB files\", \"username\": \"sbagaria\", \"post_text\": \"I tried CSV too. It complained about file "contained a line of length greater than 10485760 bytes". Maybe something wrong with my input file. I will have to check.\\n\\nThe length of my fixed length record is about 469 bytes. So it should not reach that number. I opened the data file sprayed as CSV and all the lines made sense.\", \"post_time\": \"2012-09-18 19:00:35\" },\n\t{ \"post_id\": 2379, \"topic_id\": 523, \"forum_id\": 8, \"post_subject\": \"Re: IBM mainframe VB files\", \"username\": \"bforeman\", \"post_text\": \"If you spray as CSV, the DATASET statement needs to be CSV.\", \"post_time\": \"2012-09-18 18:08:27\" },\n\t{ \"post_id\": 2378, \"topic_id\": 523, \"forum_id\": 8, \"post_subject\": \"Re: IBM mainframe VB files\", \"username\": \"sbagaria\", \"post_text\": \"As THOR\", \"post_time\": \"2012-09-18 18:07:32\" },\n\t{ \"post_id\": 2377, \"topic_id\": 523, \"forum_id\": 8, \"post_subject\": \"Re: IBM mainframe VB files\", \"username\": \"bforeman\", \"post_text\": \"Are you defining the DATASET as CSV?\", \"post_time\": \"2012-09-18 18:06:45\" },\n\t{ \"post_id\": 2376, \"topic_id\": 523, \"forum_id\": 8, \"post_subject\": \"Re: IBM mainframe VB files\", \"username\": \"sbagaria\", \"post_text\": \"If I define everything as fixed length, then it complains about the split file sizes not being a multiple of the record size.\\n\\nIf I put in a variable length field at the end, it complains about memory pool exhausted.\", \"post_time\": \"2012-09-18 18:05:45\" },\n\t{ \"post_id\": 2375, \"topic_id\": 523, \"forum_id\": 8, \"post_subject\": \"Re: IBM mainframe VB files\", \"username\": \"bforeman\", \"post_text\": \"OK. I managed to spray as a CSV with no separators. What should my record layout look like if I need to pull out fixed length fields with a variable length string at the end?
\\n\\nJust define as you would with a fixed length record. Just make sure that your fixed length fields handle the maximum data size for that field. For example, if the largest last name is "Flugenheimer" a STRING13 would work great.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-09-18 17:54:15\" },\n\t{ \"post_id\": 2373, \"topic_id\": 523, \"forum_id\": 8, \"post_subject\": \"Re: IBM mainframe VB files\", \"username\": \"sbagaria\", \"post_text\": \"OK. I managed to spray as a CSV with no separators. What should my record layout look like if I need to pull out fixed length fields with a variable length string at the end?\", \"post_time\": \"2012-09-18 17:35:14\" },\n\t{ \"post_id\": 2372, \"topic_id\": 523, \"forum_id\": 8, \"post_subject\": \"Re: IBM mainframe VB files\", \"username\": \"bforeman\", \"post_text\": \"Wouldn't a simple Spray CSV do the trick?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-09-18 17:33:14\" },\n\t{ \"post_id\": 2368, \"topic_id\": 523, \"forum_id\": 8, \"post_subject\": \"IBM mainframe VB files\", \"username\": \"sbagaria\", \"post_text\": \"The fixed format deals with all fixed length fields. How do I import an IBM mainframe variable-blocked file? I know that dfuplus has an option to spray a file as recfmvb or recfmv. But I can't find any documentation concerning this on how to load this as a dataset.\", \"post_time\": \"2012-09-18 16:41:06\" },\n\t{ \"post_id\": 2408, \"topic_id\": 525, \"forum_id\": 8, \"post_subject\": \"Re: Value of attribute keeps changing\", \"username\": \"tmurphy\", \"post_text\": \"Deleting the file is not a side effect, it's the primary and desired effect.\", \"post_time\": \"2012-09-20 15:12:47\" },\n\t{ \"post_id\": 2403, \"topic_id\": 525, \"forum_id\": 8, \"post_subject\": \"Re: Value of attribute keeps changing\", \"username\": \"ghalliday\", \"post_text\": \"Even doing the DeleteLogicalFile() is going against the idea of HPCC being procedural. Once you have side-effects going on then you are likely to hit some issues.\\n\\nYou could mark fileExists as independent. That way it only gets evaluated once, but you have less control about when. It also won't work very well inside a dataset operation (e.g., applying it to a list of files.)\\n\\nOn a modern system something like the following should work:\\n\\nexistedBeforeBeingDeleted := WHEN(fileExists, STD.File.DeleteLogicalFile(path, FALSE));\\n\\nYou're associating the side-effect when the expression so it shouldn't be evaluated twice. You may find other interesting issues though - the delete may happen too early.\\n\\nIf it does please open a bug.\", \"post_time\": \"2012-09-20 11:51:04\" },\n\t{ \"post_id\": 2398, \"topic_id\": 525, \"forum_id\": 8, \"post_subject\": \"Value of attribute keeps changing\", \"username\": \"tmurphy\", \"post_text\": \"I'm trying to write a query that checks if a file exists, and then deletes it if it does exist. I want the query to return true if the file exists (and was deleted) or false otherwise. So here is (a stripped down version of) my code:\\n\\nIMPORT * FROM Std;\\nSTRING path := '~myfilenamehere';\\nBOOLEAN fileExists := STD.File.FileExists(path); \\nOUTPUT(fileExists); // inserted for debug purposes only; shows 'true'\\nIF (fileExists, STD.File.DeleteLogicalFile(path, FALSE));\\nOUTPUT(fileExists); // shows 'false'; I assume this is because 'fileExists' gets *re-evaluated* and since the file is now gone, the value is changed to false\\n\\nWhat do I do? I could put the OUTPUT at the spot where it's true, but that seems to defy the whole idea that HPCC is "non-procedural" (and I don't even know if that will work).\\n\\nTom\", \"post_time\": \"2012-09-19 21:54:31\" },\n\t{ \"post_id\": 2594, \"topic_id\": 526, \"forum_id\": 8, \"post_subject\": \"Re: External Services\", \"username\": \"chhaya\", \"post_text\": \"Any suggestions ?\", \"post_time\": \"2012-10-23 09:24:20\" },\n\t{ \"post_id\": 2409, \"topic_id\": 526, \"forum_id\": 8, \"post_subject\": \"Re: External Services\", \"username\": \"chhaya\", \"post_text\": \"i want guidelines to make .SO file how can i create my own service and call it.\", \"post_time\": \"2012-09-21 09:00:17\" },\n\t{ \"post_id\": 2407, \"topic_id\": 526, \"forum_id\": 8, \"post_subject\": \"Re: External Services\", \"username\": \"bforeman\", \"post_text\": \"...and here's another link with some good information:\\n\\nhttp://hpccsystems.com/bb/viewtopic.php?f=10&t=441&p=2008&hilit=so+files+shared+object&sid=7fae5bfc315ee40e95e66506d8757ee2&sid=7fae5bfc315ee40e95e66506d8757ee2#p2008\", \"post_time\": \"2012-09-20 13:51:11\" },\n\t{ \"post_id\": 2404, \"topic_id\": 526, \"forum_id\": 8, \"post_subject\": \"Re: External Services\", \"username\": \"bforeman\", \"post_text\": \"In the Language Reference manual, there is a great example on Page 365 (look for An Example Service)\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-09-20 12:10:13\" },\n\t{ \"post_id\": 2400, \"topic_id\": 526, \"forum_id\": 8, \"post_subject\": \"External Services\", \"username\": \"chhaya\", \"post_text\": \"hi,\\n\\nGoing through External Services in ECL and what i have understood is i have to create .SO file but not getting a clear idea.\\n\\nCan you guide me how can i do that with some examples.\\n\\nThanks\", \"post_time\": \"2012-09-20 07:25:35\" },\n\t{ \"post_id\": 2418, \"topic_id\": 527, \"forum_id\": 8, \"post_subject\": \"Re: Use of BNOT\", \"username\": \"Allan\", \"post_text\": \"Thanks,\\n\\nThe documentation could do with an example.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-09-21 15:35:11\" },\n\t{ \"post_id\": 2406, \"topic_id\": 527, \"forum_id\": 8, \"post_subject\": \"Re: Use of BNOT\", \"username\": \"ghalliday\", \"post_text\": \"BNOT is a unary operator to provide a bitwise inverse of an expression.\\n\\nI think you mean\\n\\n3 & BNOT 1\\n\\ne.g.,\\n\\n127 & BNOT 15 == 112\", \"post_time\": \"2012-09-20 13:14:29\" },\n\t{ \"post_id\": 2405, \"topic_id\": 527, \"forum_id\": 8, \"post_subject\": \"Use of BNOT\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI want to clear some bits down and see 'BNOT' documented in the REF guide under 'bitwise operators'\\n\\nHowever if I attempt to use BNOT in the same fashion as the other logical operators it syntax errors.\\nThe following is ok:\\n\\na := 3 ^ 1;\\n
\\nThe following does not parse:\\n\\na := 3 BNOT 1;\\n
\\n\\nThere are no examples of BNOT's use.\\n\\nErr help - I need to do be able to do this.\\n\\nYours\\nAllan\", \"post_time\": \"2012-09-20 12:47:05\" },\n\t{ \"post_id\": 2412, \"topic_id\": 528, \"forum_id\": 8, \"post_subject\": \"Re: External Service\", \"username\": \"bforeman\", \"post_text\": \"Hi Jeniba,\\n\\nIn order to consolidate the information to a single post, there is already an active thread on this topic:\\n\\nhttp://hpccsystems.com/bb/viewtopic.php?f=8&t=526&sid=762dcc9434b194500780e7354df50209\\n\\nPlease refer to this thread for more information, and feel free to post as needed there.\\n\\nTo answer your question, SO files are used with the ECL SERVICE structure to extend the capabilities of ECL. You can build libraries in the form of Shared Object files (SO files) that can enhance your data processing. Look at the already created libraries in the STD folder for examples of functions that extend the power of ECL.\\n\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-09-21 12:47:09\" },\n\t{ \"post_id\": 2410, \"topic_id\": 528, \"forum_id\": 8, \"post_subject\": \"External Service\", \"username\": \"Jeniba\", \"post_text\": \"After reading the Language reference.Iam still not clear about external service.Need to know how to create .SO and how to use it?What is the purposed of .SO?\", \"post_time\": \"2012-09-21 09:04:10\" },\n\t{ \"post_id\": 2414, \"topic_id\": 529, \"forum_id\": 8, \"post_subject\": \"Re: COMPRESSED option: DATASET vs INDEX\", \"username\": \"oleg\", \"post_text\": \"So how the records being accessed by record id from compressed datasets (Like in full keyed join I've mentioned earlier)?\\n\\nMy understanding is block address should be part of record id and then whole block should be decompressed - correct?\", \"post_time\": \"2012-09-21 13:30:43\" },\n\t{ \"post_id\": 2413, \"topic_id\": 529, \"forum_id\": 8, \"post_subject\": \"Re: COMPRESSED option: DATASET vs INDEX\", \"username\": \"richardkchapman\", \"post_text\": \"By default, indexes are compressed per block.\\n\\nDatasets are always compressed in blocks\\n\\nWe don't provide options for compressing datasets mainly because we've never had a call to. Datasets are typically not accessed randomly, and the compression is designed primarily to save disk space (and disk bandwidth). Indexes on the other hand are almost always accessed randomly, and the compression is designed more around that pattern of usage. The main reason there are a lot of options for compressing indexes is that it's harder to come up with a scheme that suits this pattern well, and the various options largely represent experiments in getting a good fit. Generally I'd recommend sticking with the default...\", \"post_time\": \"2012-09-21 13:04:57\" },\n\t{ \"post_id\": 2411, \"topic_id\": 529, \"forum_id\": 8, \"post_subject\": \"COMPRESSED option: DATASET vs INDEX\", \"username\": \"oleg\", \"post_text\": \"On Index, I can specify row level compression which said to be faster then the regular.\\nHowever, with the dataset, I can only specify that file is compressed.\\n\\nFew questions regarding that:\\n\\n- what is the default level for the index? Is it block?\\n- is it same for datasets?\\n- why we don't have the same options for the datasets? (At least it can be beneficial for full keyed join/access).\", \"post_time\": \"2012-09-21 09:50:40\" },\n\t{ \"post_id\": 2535, \"topic_id\": 530, \"forum_id\": 8, \"post_subject\": \"Re: How to escape character - in PATTERN\", \"username\": \"ideal\", \"post_text\": \"Confirmed by test !\\n\\nJM.\", \"post_time\": \"2012-10-18 09:04:18\" },\n\t{ \"post_id\": 2478, \"topic_id\": 530, \"forum_id\": 8, \"post_subject\": \"Re: How to escape character - in PATTERN\", \"username\": \"ideal\", \"post_text\": \"Sorry, I have no time to test right now but I think you're right. \\nIt is smart, I wish I could have found it by myself.\\nThis close the subject unless test fails but I don't think so.\\nthanks a lot,\\nJM.\", \"post_time\": \"2012-10-09 10:18:52\" },\n\t{ \"post_id\": 2464, \"topic_id\": 530, \"forum_id\": 8, \"post_subject\": \"Re: How to escape character - in PATTERN\", \"username\": \"rtaylor\", \"post_text\": \"JM,\\n\\nTry this:SHARED PATTERN chaineAtome := PATTERN('[a-z0-9àéèêîïôùûç<>+*/=_][-a-zA-Z0-9àéèêîïôùûç<>+*/=_]*');
Placing the "-" as the first character in the pattern worked for me when I needed a dash in addition to letters and numbers. It's just a matter of placing it in the pattern where it is unambiguously a dash character.\\n \\nHTH,\\n\\nRichard\", \"post_time\": \"2012-10-02 18:34:07\" },\n\t{ \"post_id\": 2463, \"topic_id\": 530, \"forum_id\": 8, \"post_subject\": \"Re: How to escape character - in PATTERN\", \"username\": \"ideal\", \"post_text\": \"Hello Richard,\\nSorry for my late answer. \\n\\nI am willing to decode a string like "2+2-3".\\nThen my code is\\nSHARED PATTERN chaineAtome := PATTERN('[a-z0-9àéèêîïôùûç<>+*/=_][a-zA-Z0-9àéèêîïôùûç<>+*/=_]*');
\\n\\nAs you can see, character "-" is missing. When I try to use escape character "\\\\" before to make it available, as I could with "\\\\" itself, it is still recognized as a special character. \\nWith the code you proposes, I don't have a chance to ignore it, neither having one in the middle of string.\\n\\nIn regular expression, it is(should be?) always possible to escape any special character.\\n\\nthanks,\\nJM\", \"post_time\": \"2012-10-02 18:21:02\" },\n\t{ \"post_id\": 2424, \"topic_id\": 530, \"forum_id\": 8, \"post_subject\": \"Re: How to escape character - in PATTERN\", \"username\": \"rtaylor\", \"post_text\": \"JM,\\n\\nLike this:AlphaNum :- PATTERN('[a-z123]');\\nFirstTwo := AlphaNum '-';
\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-09-24 15:34:26\" },\n\t{ \"post_id\": 2422, \"topic_id\": 530, \"forum_id\": 8, \"post_subject\": \"Re: How to escape character - in PATTERN\", \"username\": \"bforeman\", \"post_text\": \"Hi JM,\\n\\nYou should be able to use '\\\\-' pattern I would think, what specific string pattern are you trying to detect?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-09-24 12:28:09\" },\n\t{ \"post_id\": 2420, \"topic_id\": 530, \"forum_id\": 8, \"post_subject\": \"How to escape character - in PATTERN\", \"username\": \"ideal\", \"post_text\": \"Hello,\\n\\nHow can I recognize the character "-" in second position in PATTERN('[a-z123-]');\\n\\nthanks,\\nJM\", \"post_time\": \"2012-09-23 15:27:36\" },\n\t{ \"post_id\": 2462, \"topic_id\": 533, \"forum_id\": 8, \"post_subject\": \"Re: Need help with suggestions to optimize ECL\", \"username\": \"arunarav\", \"post_text\": \"Richard: Many thanks for your recommendations - I'll try as suggested and report back with updates.\", \"post_time\": \"2012-10-01 10:01:45\" },\n\t{ \"post_id\": 2439, \"topic_id\": 533, \"forum_id\": 8, \"post_subject\": \"Re: Need help with suggestions to optimize ECL\", \"username\": \"rtaylor\", \"post_text\": \"Arun,\\n\\nI can't address your major issues, but I can make a couple of comments that might help overall.\\n\\n1) Your Lexicon file is not consistent -- it contains many records with a single word in each, but it also contains a large number of records in this format:9421,10,TIME\\n9421,11,TO\\n9421,12,PART\\n9422,1,ARMS\\n
I cleaned these out by doing a regular expression search and replace in TextPad, searching for "[0-9]+,[0-9]+," and replacing it with nothing.\\n\\n2) Your code starts with a TABLE of the words and their number of occurrences. If you're only running on a 1-node cluster (like the VM version), the LOCAL option does nothing, but if you're running on a multi-node cluster, the LOCAL option will give you a result where the same word will most likely have several entries in the result. If that's your intention, then so be it.\\n\\n3) If your lexicon of words and their counts is not likely to change often, then you would be better off writing that to disk as an INDEX, something like this:TBL := TABLE(dictionaryDS,wordCountRec,word);\\ndictWordsWithCount := INDEX(TBL,\\n {STRING50 word := TBL.word,\\n integer count_wrd := TBL.count_wrd},\\n '~spellcheck::hpcc::dictWordsWithCountIDX');\\n\\nBUILD(dictWordsWithCount);\\n
Once it's built, you can comment out the BUILD and just use the INDEX in your code, so that you could use half-keyed JOINs instead of recordset JOINs (which should run faster).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-09-26 16:03:09\" },\n\t{ \"post_id\": 2432, \"topic_id\": 533, \"forum_id\": 8, \"post_subject\": \"Need help with suggestions to optimize ECL\", \"username\": \"arunarav\", \"post_text\": \"My colleagues and I are trying to port Peter Norvig's spelling corrector to ECL. We have created a couple of ECL files that compute the correction at a distance of one and two:\\n\\n * CorrectSpelling.ecl : Corrects the spelling with a distance of 1 - eg. 'introductin' would be corrected as 'introduction')\\n\\n * CorrectSpelling_distance2.ecl : (Corrects the spelling with a distance of 2 - eg. 'introdctin' would be corrected as 'introduction')\\n\\nThe code is available at github [1].\\n\\nThe screenshots describing the time taken is at [2].\\n\\nDuring the course of development, we noticed that the performance on a single node VM is much slower than the equivalent code in java [3]. For instance:\\n\\n * the PROJECT operation in the distance of two takes 25+ seconds while the java code takes a couple of seconds.\\n\\nIf the experts in this forum could suggest how the code could be optimized (especially the PROJECT operation), that would be very helpful.\\n\\n[1] Source hosted at Github : https://github.com/arunarav/hpcc-spelling-corrector\\n\\n[2] https://github.com/arunarav/hpcc-spelli ... ector/wiki\\n\\n[3] Equivalent java code: http://pastebin.ubuntu.com/1226178/. The lexicon used is the same as the one used in the ECL code. \\n\\nThanks\\n-Arun\", \"post_time\": \"2012-09-25 08:54:14\" },\n\t{ \"post_id\": 2443, \"topic_id\": 534, \"forum_id\": 8, \"post_subject\": \"Re: NOSORT option in JOIN does not work if field descending\", \"username\": \"bforeman\", \"post_text\": \"Thanks for your comments Richard, I will pass this to documentation as perhaps we can mention that NOSORT assumes an ascending sort.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-09-27 13:49:36\" },\n\t{ \"post_id\": 2442, \"topic_id\": 534, \"forum_id\": 8, \"post_subject\": \"Re: NOSORT option in JOIN does not work if field descending\", \"username\": \"richardkchapman\", \"post_text\": \"I'll let Gavin comment on the bug report in Jira, but I suspect that the answer will be that specifying ,NOSORT means that the system assumes (and is supposed to assume) that the fields are already 'properly sorted' meaning that it is sorted and partitioned by the join fields, in ascending order.\\n\\nYou'd actually be better off NOT in most cases telling the system not to sort on the join, but rather letting it work that out for itself from the sortedness (or otherwise) of the inputs. The NOSORT flag doesn't give any information about HOW it is sorted (and can't therefore tell that you meant descending).\\n\\nAlso, I rather suspect that even if your sorts had been ascending, the code may not have done what you expected if you had put NOSORT on the join, since the two sorts will not have divided the records between nodes at the same boundaries. On a multi-node system this could lead to issues.\", \"post_time\": \"2012-09-27 13:45:49\" },\n\t{ \"post_id\": 2437, \"topic_id\": 534, \"forum_id\": 8, \"post_subject\": \"Re: NOSORT option in JOIN does not work if field descending\", \"username\": \"bforeman\", \"post_text\": \"I've confirmed your report, checking with development to confirm a bug or perhaps user error \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-09-26 12:06:50\" },\n\t{ \"post_id\": 2435, \"topic_id\": 534, \"forum_id\": 8, \"post_subject\": \"Re: NOSORT option in JOIN does not work if field descending\", \"username\": \"bforeman\", \"post_text\": \"You have a small typo in your example, that should be "LEFT OUTER"\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-09-26 11:47:10\" },\n\t{ \"post_id\": 2434, \"topic_id\": 534, \"forum_id\": 8, \"post_subject\": \"NOSORT option in JOIN does not work if field descending\", \"username\": \"dsun\", \"post_text\": \"Hi,\\n\\nNot sure whether this is a bug or need more configuration, NOSORT option in JOIN does not work if field descending, i.e.\\n\\n1. run into Error.\\n
\\n// col2 descending\\nset1_1 := sort(set1, col1, -col2);\\nset2_1 := sort(set2, col1, -col2);\\nJOIN(set1_1, set2_1, LEFT.col1 = RIGHT.col1 AND LEFT.col2 = RIGHT.col2,\\n NOSORT,\\n LEFT OUTTER)\\n
\\n2. run successfully\\n\\n// col2 ascending\\nset1_1 := sort(set1, col1, col2);\\nset2_1 := sort(set2, col1, col2);\\nJOIN(set1_1, set2_1, LEFT.col1 = RIGHT.col1 AND LEFT.col2 = RIGHT.col2,\\n NOSORT,\\n LEFT OUTTER)\\n
\\n\\nI just want to keep the order (col1, -col2), otherwise I have to sort the JOIN result manually.\\n\\nThanks,\\nDongliang\", \"post_time\": \"2012-09-26 10:52:58\" },\n\t{ \"post_id\": 2450, \"topic_id\": 535, \"forum_id\": 8, \"post_subject\": \"Re: #EXPAND IN FunctionMacro gives Error\", \"username\": \"rtaylor\", \"post_text\": \"Apurv,If I use Any hard Coded String in Macro, it works fine and gives me a Desired Output...\\n\\nBut if use the Same string to be read from an XML file and then pass the Value to the Macro,it gives me the \\n\\nError: Constant expression expected...
Yes, that's exactly what I was saying -- you cannot do it this way.\\n\\nSo, again, what exactly are you trying to accomplish? Why are you writing this FUNCTIONMACRO to define a simple TABLE function? Perhaps there is another approach that will work better to get you where you want to go.\\n\\nRichard\", \"post_time\": \"2012-09-27 18:20:22\" },\n\t{ \"post_id\": 2441, \"topic_id\": 535, \"forum_id\": 8, \"post_subject\": \"Re: #EXPAND IN FunctionMacro gives Error\", \"username\": \"Apurv.Khare\", \"post_text\": \"Hi Richard,\\n\\nIf I use Any hard Coded String in Macro, it works fine and gives me a Desired Output\\n PolicyData := DATASET($.common.constants.AccuRUSI_PolicyData_OUTPUT,$.common.Layout_policyDataRec,THOR);\\n\\nfieldStr:='RiskAge,RiskSICCode,RiskValue';\\n$.AglonConf_Macro2(PolicyData,fieldStr);
\\n\\nBut if use the Same string to be read from an XML file and then pass the Value to the Macro,it gives me the \\n\\nError: Constant expression expected (4, 42), 2071, C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\AccuRUSI_try\\\\AglonConf_Macro2.ecl\\n\\nI have already pasted the XML and The Macro Code in the above post thread.\", \"post_time\": \"2012-09-27 09:53:04\" },\n\t{ \"post_id\": 2440, \"topic_id\": 535, \"forum_id\": 8, \"post_subject\": \"Re: #EXPAND IN FunctionMacro gives Error\", \"username\": \"rtaylor\", \"post_text\": \"Apurv,\\n\\nThe "Constant expression expected" error is indicating that TABLE requires a constant expression as its RECORD structure and you're trying to build it "on the fly" and the compiler won't let you. IOW, it's telling you that you can't do what you want to do in the way you're trying to do it.\\n\\nSo, what are you trying to accomplish here?\\n\\nRichard\", \"post_time\": \"2012-09-26 18:24:24\" },\n\t{ \"post_id\": 2438, \"topic_id\": 535, \"forum_id\": 8, \"post_subject\": \"Re: #EXPAND IN FunctionMacro gives Error\", \"username\": \"bforeman\", \"post_text\": \"I think #EXPAND requires the use of LOADXML. From the docs:\\n\\nLOADXML opens an active XML scope for Template language statements or symbols to act on. LOADXML must be the first line of code to function correctly.\\n\\nA valid XML scope is required for most Template Language statements to work. This is also used in "drilldown" MACRO code.\\n
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-09-26 12:16:38\" },\n\t{ \"post_id\": 2436, \"topic_id\": 535, \"forum_id\": 8, \"post_subject\": \"#EXPAND IN FunctionMacro gives Error\", \"username\": \"Apurv.Khare\", \"post_text\": \"Hi \\nWe are trying to read a string Value from XML but when we Provide that String Value to the FunctionMacro it gives the below given Error:\\n\\nError: Constant expression expected (4, 42), 2071, C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\AccuRUSI_try\\\\AglonConf_Macro2.ecl\\n\\nMy XML Looks like:\\n<configuration>\\n\\t<add Key='RiskAge,RiskSICCode,RiskValue'/>\\n \\n</configuration>
\\n\\nMy Macro Code is:\\nEXPORT AglonConf_Macro2(datasetToTable,TableFormat) :=FUNCTIONMACRO\\n\\n\\nRequiredDS:=TABLE(datasetToTable,{#EXPAND(TableFormat)});\\n\\n\\nRETURN RequiredDS;\\nENDMACRO;
\\n\\n\\nand the code where i'm calling this macro contains:\\n\\n PolicyData := DATASET($.common.constants.AccuRUSI_PolicyData_OUTPUT,$.common.Layout_policyDataRec,THOR);\\n\\nConfigData := DATASET('~file::172.20.15.168::home::hpcc::lz_data::apurv::config.xml',\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t{STRING Read_XMLasSTRING {XPATH('<>')}},XML('configuration/add'));// Here {XPATH('<>')} reads the whole XML in a single STRING i.e. in Read_XMLasSTRING\\n\\noutrec :=record\\nString Key := xmltext('@Key');\\nend;\\n\\nparsedXML := parse(ConfigData,Read_XMLasSTRING,outrec,XML('add'));\\n\\nstr:=(STRING)parsedXML[1].Key;\\n\\n$.AglonConf_Macro2(PolicyData,str);
\\n\\n\\nPlease can you help us with this issue.We need to develop something on Urgent Basis.\", \"post_time\": \"2012-09-26 12:03:01\" },\n\t{ \"post_id\": 2699, \"topic_id\": 536, \"forum_id\": 8, \"post_subject\": \"Re: Evaluating attribute once within scope\", \"username\": \"ghalliday\", \"post_text\": \"I have added a link from that issue back to this forum post. I'll update when there is any progress.\\n\\nIf it is any comfort I really want to get this issue resolved once and for all!\", \"post_time\": \"2012-11-06 11:37:26\" },\n\t{ \"post_id\": 2461, \"topic_id\": 536, \"forum_id\": 8, \"post_subject\": \"Re: Evaluating attribute once within scope\", \"username\": \"ghalliday\", \"post_text\": \"I will try and reply more fully next week. From a quick glance it looks like it is related to issue 2946 in jira to do with the way volatile functions are handled.\\n\\nI have a branch which improves it, but I have hit several nasty issues which means it hasn't been possible to merge it in yet. Hopefully once 3.10 is stable I will be able to return to it.\", \"post_time\": \"2012-09-28 19:24:04\" },\n\t{ \"post_id\": 2460, \"topic_id\": 536, \"forum_id\": 8, \"post_subject\": \"Re: Evaluating attribute once within scope\", \"username\": \"bforeman\", \"post_text\": \"Update: I just tested using RANDOM() instead of STD.system.Util.GetUniqueInteger() and it shows the same behavior.\\n\\nFun stuff!\\n\\n
\\n\\nIndeed, verified here as well.\", \"post_time\": \"2012-09-28 14:55:16\" },\n\t{ \"post_id\": 2459, \"topic_id\": 536, \"forum_id\": 8, \"post_subject\": \"Re: Evaluating attribute once within scope\", \"username\": \"DSC\", \"post_text\": \"Well, that would work if you could guarantee that no other process attached to that Dali (the source of the numbers, I think) don't ask for unique integers as well. That's very fragile, though.\\n\\nActually, I think STD.system.Util.GetUniqueInteger() is the perfect test case for this issue. It will always generate a unique number, because that's what it's designed for. Using other things, like the number of seconds since the epoch or whatnot, won't show the full problem except in boundary cases (when the value happens to change).\\n\\nUpdate: I just tested using RANDOM() instead of STD.system.Util.GetUniqueInteger() and it shows the same behavior.\\n\\nFun stuff!\\n\\nDan\", \"post_time\": \"2012-09-28 13:55:12\" },\n\t{ \"post_id\": 2458, \"topic_id\": 536, \"forum_id\": 8, \"post_subject\": \"Re: Evaluating attribute once within scope\", \"username\": \"bforeman\", \"post_text\": \"Dan,\\n\\nI understand what you are saying, but my point is that the problem is in the usage of the GetUniqueInteger function that is somehow violating the scoping rules.\\n\\nHow about something like this?\\n\\nIMPORT STD;\\n\\nWriteData(STRING filename,INTEGER val) := FUNCTION\\n\\t fileSuffix := filename + val;\\n\\t fileSuffix2 := filename + (val-1);\\n RETURN SEQUENTIAL(\\n OUTPUT(fileSuffix),\\n //File.StartSuperFileTransaction(),\\n OUTPUT(fileSuffix2)\\n );\\nEND;\\n\\nCreateData(STRING sourceLogicalFilePath,INTEGER num) := FUNCTION\\n //fileSuffix := STD.system.Util.GetUniqueInteger();\\n RETURN WriteData(sourceLogicalFilePath,num);\\nEND;\\n\\nSEQUENTIAL(CreateData('~mydata1',STD.system.Util.GetUniqueInteger()),\\n CreateData('~mydata2',STD.system.Util.GetUniqueInteger()),\\n CreateData('~mydata3',STD.system.Util.GetUniqueInteger()));
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-09-28 13:40:30\" },\n\t{ \"post_id\": 2457, \"topic_id\": 536, \"forum_id\": 8, \"post_subject\": \"Re: Evaluating attribute once within scope\", \"username\": \"DSC\", \"post_text\": \"Hi Bob,\\n\\nYour example isn't the same. In your case, the suffix is generated entirely outside the code (in your head, to be precise). I fully expect that to work right.\\n\\nThe problem is when the suffix needs to be generated within the not just extracted, and the generation occurs more times than expected. One use-case for this is to create a unique logical filename for a particular directory, which is where I was going when all this started. Using other functions, such as something that constructs a date/time string for the suffix, doesn't work for the same reason but would fail less often.\\n\\nThinking about this more, this may be related to the use of SEQUENTIAL and the way code is *not* commoned-up. Avoiding SEQUENTIAL is not possible when dealing with superfiles, however.\\n\\nAfter my last post it occurred to me that a new built-in function like VALUEOF(scalar) would work well. It would evaluate its argument and return it as a literal, thereby 'freezing' the value at that time. That would be useful for all kinds of things.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-09-28 13:13:22\" },\n\t{ \"post_id\": 2456, \"topic_id\": 536, \"forum_id\": 8, \"post_subject\": \"Re: Evaluating attribute once within scope\", \"username\": \"bforeman\", \"post_text\": \"Hi Dan,\\n\\nIt certainly is weird \\n\\nI think that it has something to do with the GetUniqueInteger() function, since that assigns a unique integer to each slave node.\\n\\nThat said, I tried something different. I used some simple parsing to extract a unique number from the passed parameter:\\n\\n
IMPORT STD;\\n\\nWriteData(STRING filename) := FUNCTION\\n val := LENGTH(TRIM(filename)); \\n suffix := filename[val..]; \\n fileSuffix := filename + suffix;\\n RETURN SEQUENTIAL(\\n OUTPUT(fileSuffix),\\n //File.StartSuperFileTransaction(),\\n \\t\\t OUTPUT(fileSuffix)\\n );\\nEND;\\n\\nCreateData(STRING sourceLogicalFilePath) := FUNCTION\\n\\n //fileSuffix := STD.system.Util.GetUniqueInteger();\\n RETURN WriteData(sourceLogicalFilePath);\\nEND;\\n\\nSEQUENTIAL(CreateData('~mydata1'),CreateData('~mydata2'),CreateData('~mydata3'));\\n
\\n\\nI know the code is trivial and borders on silly , but as you can see I get a unique and persistent number for each output. So I'm thinking the scoping is related to your use of GetUniqueInteger.\\n\\nJust my two cents, I'm sure Richard or one of the development team may want to jump in here.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-09-28 13:04:28\" },\n\t{ \"post_id\": 2453, \"topic_id\": 536, \"forum_id\": 8, \"post_subject\": \"Re: Evaluating attribute once within scope\", \"username\": \"DSC\", \"post_text\": \"Moving the evaluation of the attribute doesn't work. The ECL compiler apparently just sees it all as a big blob.\\n\\nHere are some runnable test cases that illustrate the issue, using OUTPUT instead of trying complicate things with file creation. I'm still using System.Util.GetUniqueInteger() because that easily shows the differing values, but I don't believe that function is at fault at all.\\n\\nStarting point:\\n\\n
IMPORT * FROM Std;\\n\\nFinalWork(STRING s) := FUNCTION\\n\\tRETURN SEQUENTIAL\\t(\\n\\t\\t\\t\\t\\t\\t\\tOUTPUT(s);\\n\\t\\t\\t\\t\\t\\t\\tOUTPUT(s);\\n\\t\\t\\t\\t\\t\\t);\\nEND;\\n\\nInterimWork(STRING s) := FUNCTION\\n\\tsuffix := (STRING)System.Util.GetUniqueInteger();\\n\\ts2 := s + ':' + suffix;\\n\\tRETURN FinalWork(s2);\\nEND;\\n\\nInterimWork('SomeString1');\\nInterimWork('SomeString2');
\\n\\nResults:\\n\\nResult 1 = 'SomeString1:47250401956'\\nResult 2 = 'SomeString1:47250401957' // Expected same as Result 1\\nResult 3 = 'SomeString2:47250401958'\\nResult 4 = 'SomeString2:47250401959' // Expected same as Result 3
\\n\\nSame as before, but add INDEPENDENT:\\n\\nIMPORT * FROM Std;\\n\\nFinalWork(STRING s) := FUNCTION\\n\\tRETURN SEQUENTIAL\\t(\\n\\t\\t\\t\\t\\t\\t\\tOUTPUT(s);\\n\\t\\t\\t\\t\\t\\t\\tOUTPUT(s);\\n\\t\\t\\t\\t\\t\\t);\\nEND;\\n\\nInterimWork(STRING s) := FUNCTION\\n\\tsuffix := (STRING)System.Util.GetUniqueInteger() : INDEPENDENT;\\n\\ts2 := s + ':' + suffix;\\n\\tRETURN FinalWork(s2);\\nEND;\\n\\nInterimWork('SomeString1');\\nInterimWork('SomeString2');
\\n\\nResults:\\n\\nResult 1 = 'SomeString1:47250407152'\\nResult 2 = 'SomeString1:47250407152'\\nResult 3 = 'SomeString2:47250407152' // Expected different value\\nResult 4 = 'SomeString2:47250407152' // Expect same as Result 3
\\n\\nMoving the evaluation up to the beginning:\\n\\nIMPORT * FROM Std;\\n\\nFinalWork(STRING s) := FUNCTION\\n\\tRETURN SEQUENTIAL\\t(\\n\\t\\t\\t\\t\\t\\t\\tOUTPUT(s);\\n\\t\\t\\t\\t\\t\\t\\tOUTPUT(s);\\n\\t\\t\\t\\t\\t\\t);\\nEND;\\n\\nInterimWork(STRING s) := FUNCTION\\n\\tRETURN FinalWork(s);\\nEND;\\n\\nInterimWork('SomeString1:' + (STRING)System.Util.GetUniqueInteger());\\nInterimWork('SomeString2:' + (STRING)System.Util.GetUniqueInteger());
\\n\\nResults (basically same as first iteration):\\n\\nResult 1 = 'SomeString1:47250411851'\\nResult 2 = 'SomeString1:47250411852' // Expected same as Result 1\\nResult 3 = 'SomeString2:47250411853'\\nResult 4 = 'SomeString2:47250411854' // Expected same as Result 3
\\n\\nI've commented in the results on what I thought I should be seeing. I don't think this is a bug, per se, but I think that this behavior is confusing. As I mentioned before, having a ONCE_AT_SCOPE service (or whatever) would be an ideal solution. It would be used in the second example instead of INDEPENDENT, and it would cause a reevaluation of the suffix attribute every time the InterimWork() was called but not again thereafter (e.g. within FinalWork()). Or, perhaps there is a workaround where I can copy an attribute value and break its association with its original definition; that would also solve the problem.\\n\\nThoughts?\\n\\nDan\", \"post_time\": \"2012-09-28 12:04:47\" },\n\t{ \"post_id\": 2451, \"topic_id\": 536, \"forum_id\": 8, \"post_subject\": \"Re: Evaluating attribute once within scope\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nWhat happens when you do it like this:WriteData(DATASET(foo) ds, STRING fileSuffix) := FUNCTION\\n outfilePath := '~whatever_' + fileSuffix;\\n RETURN SEQUENTIAL (\\n OUTPUT(ds,,outFilePath),\\n File.StartSuperFileTransaction(),\\n File.AddSuperFile(kSuperfilePath,outfilePath),\\n File.FinishSuperFileTransaction()\\n );\\nEND;\\n\\nCreateData(STRING sourceLogicalFilePath, STRING fileSuffix) := FUNCTION\\n // fileSuffix := System.Util.GetUniqueInteger();\\n ds := DATASET(sourceLogicalFilePath,foo,THOR);\\n RETURN WriteData(ds,fileSuffix);\\nEND;\\n\\nCreateData('~mydata1',(STRING)System.Util.GetUniqueInteger());\\nCreateData('~mydata2',(STRING)System.Util.GetUniqueInteger());\\nCreateData('~mydata3',(STRING)System.Util.GetUniqueInteger());
\\n\\nRichard\", \"post_time\": \"2012-09-27 18:40:33\" },\n\t{ \"post_id\": 2444, \"topic_id\": 536, \"forum_id\": 8, \"post_subject\": \"Evaluating attribute once within scope\", \"username\": \"DSC\", \"post_text\": \"There are times when it would be useful to have an attribute evaluate its value exactly once within its scope (say, within a function) but then reevaluated if the scope is reentered. To be less obtuse, consider this incomplete, contrived example:\\n\\nWriteData(DATASET(foo) ds, STRING fileSuffix) := FUNCTION\\n\\toutfilePath := '~whatever_' + fileSuffix;\\n\\tRETURN SEQUENTIAL\\t(\\n\\t\\t\\t\\t\\t\\t\\tOUTPUT(ds,,outFilePath),\\n\\t\\t\\t\\t\\t\\t\\tFile.StartSuperFileTransaction(),\\n\\t\\t\\t\\t\\t\\t\\tFile.AddSuperFile(kSuperfilePath,outfilePath),\\n\\t\\t\\t\\t\\t\\t\\tFile.FinishSuperFileTransaction()\\n\\t\\t\\t\\t\\t\\t);\\nEND;\\n\\nCreateData(STRING sourceLogicalFilePath) := FUNCTION\\n\\tfileSuffix := System.Util.GetUniqueInteger();\\n\\tds := DATASET(sourceLogicalFilePath,foo,THOR);\\n\\tRETURN WriteData(ds,fileSuffix);\\nEND;\\n\\nCreateData('~mydata1');\\nCreateData('~mydata2');\\nCreateData('~mydata3');
\\n\\nThe above code will fail in an interesting way. Within WriteData(), the outfilePath attribute will be evaluated twice. An output file will be created with one name, then the superfile will be updated with another (and then fail, because that new filepath doesn't exist).\\n\\nThat error can be mitigated by adding an INDEPENDENT to the fileSuffix attribute assignment within CreateData(), but then fileSuffix is evaluated only once for this entire run. That means that even though we're dealing with three input files, they will all wind up with exactly the same outfilePath name.\\n\\nIn this example, it would be handy to have fileSuffix behave more like a variable in C++. Specifically, evaluate it's value once in that scope only and then pass that value on to WriteData() without further evaluation. Is there a way to do that, perhaps by copying or casting the attribute value in a particular way? Is there maybe an undocumented ONCE_IN_SCOPE workflow service? Or am I just approaching this whole thing badly and there is a better way?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-09-27 14:11:12\" },\n\t{ \"post_id\": 2449, \"topic_id\": 537, \"forum_id\": 8, \"post_subject\": \"Re: Building an index with LOCAL\", \"username\": \"DSC\", \"post_text\": \"Great. Thanks for the clarification.\\n\\nDan\", \"post_time\": \"2012-09-27 16:59:43\" },\n\t{ \"post_id\": 2448, \"topic_id\": 537, \"forum_id\": 8, \"post_subject\": \"Re: Building an index with LOCAL\", \"username\": \"bforeman\", \"post_text\": \"Yep, as long as the data to be built is pre-distributed, you should be good to go with LOCAL!\\n\\nBob\", \"post_time\": \"2012-09-27 16:57:28\" },\n\t{ \"post_id\": 2447, \"topic_id\": 537, \"forum_id\": 8, \"post_subject\": \"Re: Building an index with LOCAL\", \"username\": \"DSC\", \"post_text\": \"So you're saying that there is no usage restriction on an index built with LOCAL versus one built without? That would be great!\\n\\nDan\", \"post_time\": \"2012-09-27 16:55:31\" },\n\t{ \"post_id\": 2446, \"topic_id\": 537, \"forum_id\": 8, \"post_subject\": \"Re: Building an index with LOCAL\", \"username\": \"bforeman\", \"post_text\": \"Hi Dan,\\n\\nI believe that LOCAL option simply allows the nodes to build the index independently on each node (on THOR), eliminating cross-talk between the slave nodes. Primarily the option is there to optimize the process, particularly for large index files on a large cluster.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-09-27 16:49:29\" },\n\t{ \"post_id\": 2445, \"topic_id\": 537, \"forum_id\": 8, \"post_subject\": \"Building an index with LOCAL\", \"username\": \"DSC\", \"post_text\": \"Given a non-distributed index (i.e. there will be a root) that will be built on a DISTRIBUTED dataset, what are the benefits and shortfalls of using the LOCAL option in the BUILD command? The manual is entirely unclear on why you would want to use LOCAL (though it does indicate that previously-distributed data retains its distribution, which begs the question of what happens to the distribution when you don't use LOCAL).\\n\\nThe eventual desire is to both perform non-local FETCHs through the index as well as treat the index like a dataset and perform local rollups.\\n\\nAny information would be appreciated. Thanks!\\n\\nDan\", \"post_time\": \"2012-09-27 15:27:23\" },\n\t{ \"post_id\": 2468, \"topic_id\": 539, \"forum_id\": 8, \"post_subject\": \"Re: PROCESS execution\", \"username\": \"bforeman\", \"post_text\": \"You can also look at this example in action on the ECL Playground \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-10-04 15:43:07\" },\n\t{ \"post_id\": 2467, \"topic_id\": 539, \"forum_id\": 8, \"post_subject\": \"Re: PROCESS execution\", \"username\": \"rtaylor\", \"post_text\": \"Oleg,\\n\\nThe first example in the Language Reference (which you can cut, paste, and run) demonstrates exactly what PROCESS does:\\n
\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-10-04 15:18:36\" },\n\t{ \"post_id\": 2466, \"topic_id\": 539, \"forum_id\": 8, \"post_subject\": \"PROCESS execution\", \"username\": \"oleg\", \"post_text\": \"Here is the definition from the manual:\\n\\nPROCESS(recordset, datarow, datasettransform, rowtransform [, LOCAL ] )\\nrecordset The set of records to process.\\ndatarow The initial RIGHT record to process, typically expressed by the ROW function.\\ndatasettransform The TRANSFORM function to call for each record in the recordset.\\nrowtransform The TRANSFORM function to call to produce the next RIGHT record for the datasettransform.\\n\\nFor both TRANSFORMs left argument is the row of the 'recordset', and right is the 'datarow'\\n\\nQUESTIONS:\\n\\n1. Are both of the TRANSFORMs arguments are the result of the previous iteration? \\n\\n2. If not, in which order TRANSFORMs will be executed?\", \"post_time\": \"2012-10-04 14:21:18\" },\n\t{ \"post_id\": 2472, \"topic_id\": 540, \"forum_id\": 8, \"post_subject\": \"Re: XPATH formatting\", \"username\": \"rtaylor\", \"post_text\": \"Glad to help! \", \"post_time\": \"2012-10-05 14:54:06\" },\n\t{ \"post_id\": 2471, \"topic_id\": 540, \"forum_id\": 8, \"post_subject\": \"Re: XPATH formatting\", \"username\": \"DSC\", \"post_text\": \"Ah, nevermind. This was actually a problem with my data. One set of files did not have embedded tabs (for pretty-printing XML) and other sets did. XPATH is functioning as it should.\\n\\nThanks anyway!\\n\\nDan\", \"post_time\": \"2012-10-05 14:50:58\" },\n\t{ \"post_id\": 2470, \"topic_id\": 540, \"forum_id\": 8, \"post_subject\": \"Re: XPATH formatting\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nOffhand, I don't know of any switch to do that (assuming I understand what I think you're describing). Can you give an example of what you're doing so I can see exactly what you're doing and dealing with?\\n\\nRichard\", \"post_time\": \"2012-10-05 14:20:48\" },\n\t{ \"post_id\": 2469, \"topic_id\": 540, \"forum_id\": 8, \"post_subject\": \"XPATH formatting\", \"username\": \"DSC\", \"post_text\": \"I'm consuming XML documents using XPATH as a field modifier in a record layout. What I'm picking out are XML nodes rather than individual values, by appending '<>' to the end of the path. It's working great, but tabs are being inserted into the parsed values, presumably for pretty-printing. I realize that I can strip them out later, but that consumes time and resources and it would be better if I could simply flip a switch somewhere that disables this behavior at the beginning, during the parse. Is there such a switch?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-10-05 13:36:19\" },\n\t{ \"post_id\": 2481, \"topic_id\": 542, \"forum_id\": 8, \"post_subject\": \"Re: Xml and Xpath in ECL\", \"username\": \"rtaylor\", \"post_text\": \"ksviswa,\\n
1.) In case of xmls having open and closed tags \\nEx : <a id = 123 /a>\\n\\nWhat is the way to read 'id' field using Xpath in ECL..?
When you want to reference data in an XML attribute (data inside a tag, and note that this data is always enclosed in double-quotes and there are never any spaces on either side of the = sign) you use the @ like this:Ex : <a id="123"/a> \\n\\nIs referenced in your xpath as a/@id to extract the 123 value
2.) In case of nested/embedded dataset in ECL, how does PROJECT work for the same?\\n\\nEx :\\n<a>\\n<id>1</id>\\n<id>2</id>\\n<id>3</id>\\n</a>
PROJECT is not particularly relevant to XML, per se. I think you may mean PARSE. Take a look at the XML option of PARSE.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-10-09 12:23:11\" },\n\t{ \"post_id\": 2479, \"topic_id\": 542, \"forum_id\": 8, \"post_subject\": \"Xml and Xpath in ECL\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nNeed some clarification for XML Related data.\\n\\n1.) In case of xmls having open and closed tags \\nEx : <a id = 123 /a>\\n\\nWhat is the way to read 'id' field using Xpath in ECL..?\\n\\n2.) In case of nested/embedded dataset in ECL, how does PROJECT work for the same?\\n\\nEx :\\n<a>\\n<id>1</id>\\n<id>2</id>\\n<id>3</id>\\n</a>\\n\\nPlease help regarding the same.\\n\\nThanks\\nksviswa\", \"post_time\": \"2012-10-09 11:22:18\" },\n\t{ \"post_id\": 2482, \"topic_id\": 543, \"forum_id\": 8, \"post_subject\": \"Re: Is NORMALIZE implicitly LOCAL?\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nYes, I believe you are probably correct. There's no particular reason to re-distribute the data when all you're doing is extracting child records. I'm sure one of the developers will chime in here though if we're wrong. \\n\\nRichard\", \"post_time\": \"2012-10-09 12:26:53\" },\n\t{ \"post_id\": 2480, \"topic_id\": 543, \"forum_id\": 8, \"post_subject\": \"Is NORMALIZE implicitly LOCAL?\", \"username\": \"DSC\", \"post_text\": \"Subject says it all. The documentation makes no mention of any redistribution of the data, and there is no LOCAL option, so I would think that newly-created records would exist on the same nodes the source records came from. Just wanted to confirm, though.\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-10-09 12:05:20\" },\n\t{ \"post_id\": 2502, \"topic_id\": 546, \"forum_id\": 8, \"post_subject\": \"Re: Pervasive LOCAL\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nAnd even if you can't, supplying the code and log files may give the guys enough to figure it out.\\n\\nRichard\", \"post_time\": \"2012-10-11 13:56:03\" },\n\t{ \"post_id\": 2500, \"topic_id\": 546, \"forum_id\": 8, \"post_subject\": \"Re: Pervasive LOCAL\", \"username\": \"DSC\", \"post_text\": \"If I can simplify the code and make it repeatable without huge dataset, I will do that. Thanks, Richard.\\n\\nDan\", \"post_time\": \"2012-10-11 13:48:37\" },\n\t{ \"post_id\": 2499, \"topic_id\": 546, \"forum_id\": 8, \"post_subject\": \"Re: Pervasive LOCAL\", \"username\": \"rtaylor\", \"post_text\": \"Dan,
(Before anyone asks: I'm pursuing this LOCAL stuff because I get a "memory resources exhausted" error in Roxie otherwise.)
If you can, you should submit a bug report on this and include your code that creates the issue. \\n\\nRule of thumb: if the code runs in Thor and won't in Roxie (or vice versa), that's definitely a "reportable" issue. \\n\\nRichard\", \"post_time\": \"2012-10-11 13:45:06\" },\n\t{ \"post_id\": 2497, \"topic_id\": 546, \"forum_id\": 8, \"post_subject\": \"Re: Pervasive LOCAL\", \"username\": \"DSC\", \"post_text\": \"Thanks, Richard. Does that mean that in my original code example, the Search.IDsMatchingXPATH() function would actually be executed locally?\\n\\nTwo things keep me poking at this question. First, Thor's execution of this query (without the ALLNODES and LOCAL) is faster than Roxie. That makes me think that in Roxie's case, it starts out local but some buried code is reaching out to data across all nodes and probably duplicating effort. Second, if I modify the original example and add ALLNODES(LOCAL()) to the first function as well as the second, I get a cryptic error along the lines of "attempt to use the results of graph 3 before it is calculated" (which, IIRC, is solved by wrapping idSet in THISNODE). But that seems to point to me not understanding how the execution graph is being constructed.\\n\\n(Before anyone asks: I'm pursuing this LOCAL stuff because I get a "memory resources exhausted" error in Roxie otherwise.)\\n\\nI've successfully used NOROOT and DISTRIBUTED indexes in the past. I was trying to keep things more generic, but it looks like I may need to go down that path as well.\\n\\nThanks again!\\n\\nDan\", \"post_time\": \"2012-10-11 11:41:48\" },\n\t{ \"post_id\": 2495, \"topic_id\": 546, \"forum_id\": 8, \"post_subject\": \"Re: Pervasive LOCAL\", \"username\": \"richardkchapman\", \"post_text\": \"LOCAL(x) means the entire graph calculating X is local, I believe.\\n\\nGavin is the expert here though. LOCAL support in Roxie is not a commonly-used feature, though this case (inside an ALLNODES) is one of the few places where it makes sense (and is used).\\n\\nYou may want to look at NOROOT option in indexes too, if you are going down this route.\", \"post_time\": \"2012-10-11 09:11:15\" },\n\t{ \"post_id\": 2494, \"topic_id\": 546, \"forum_id\": 8, \"post_subject\": \"Re: Pervasive LOCAL\", \"username\": \"DSC\", \"post_text\": \"Thanks for the information.\\n\\nIf you surround a function call with LOCAL(), does that indicate that only the local portion of datasets or indexes referenced within the function are accessed? Provided there aren't occurrences of any other scoping functions like NOLOCAL and friends, of course. If so, does that LOCAL() attribute extend to functions called from the first function, and so on?\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-10-10 18:45:01\" },\n\t{ \"post_id\": 2493, \"topic_id\": 546, \"forum_id\": 8, \"post_subject\": \"Re: Pervasive LOCAL\", \"username\": \"bforeman\", \"post_text\": \"Hi Dan,\\n\\nYes, I believe that the first function needs to be made explicitly LOCAL.\\nIn all of our examples that we show in class, LOCAL operation of any statement needs to be explicit stated and is never implied.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-10-10 18:29:45\" },\n\t{ \"post_id\": 2490, \"topic_id\": 546, \"forum_id\": 8, \"post_subject\": \"Pervasive LOCAL\", \"username\": \"DSC\", \"post_text\": \"Consider this working Roxie query:\\n\\n
IMPORT Search;\\n\\nSTRING\\tpath := '' : STORED('xpath');\\n\\nidRS := Search.IDsMatchingXPATH(path);\\nidSet := SET(idRS,id);\\n\\nresult := ALLNODES(LOCAL(Search.GSRLFromIDSet(idSet)),LIMIT(10000));\\n\\nOUTPUT(result,ALL,NAMED('Result'));
\\n\\nThe desire is that both functions in the Search module execute independently on each Roxie slave. Does the code above do that? In other words, does the LOCAL() function 'propagate' to all attributes that go into the execution of LOCAL's argument? Or do I need to wrap the first function call in a LOCAL() as well?\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-10-10 14:54:05\" },\n\t{ \"post_id\": 2506, \"topic_id\": 547, \"forum_id\": 8, \"post_subject\": \"Re: Understanding Thinking Declaratively\", \"username\": \"ahmedsha\", \"post_text\": \"[quote="rtaylor":a19q4vqz]I would take the position that is not specifying "how" but simply specifying "what" data to work with. \\n\\nHTH,\\n\\nRichard\\n\\nThanks, Richard.\", \"post_time\": \"2012-10-11 15:08:35\" },\n\t{ \"post_id\": 2504, \"topic_id\": 547, \"forum_id\": 8, \"post_subject\": \"Re: Understanding Thinking Declaratively\", \"username\": \"rtaylor\", \"post_text\": \"ahmedsha,The "HPCC Systems: Thinking Declaratively" document explains that we should think of "what you want, not how to get there". However, in the IMDB sample, the Actor record is slimmed down to it's bare essential - surely, in doing that we are actually suggesting "how"?
\\nI would take the position that is not specifying "how" but simply specifying "what" data to work with. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-10-11 14:06:17\" },\n\t{ \"post_id\": 2496, \"topic_id\": 547, \"forum_id\": 8, \"post_subject\": \"Understanding Thinking Declaratively\", \"username\": \"ahmedsha\", \"post_text\": \"The "HPCC Systems: Thinking Declaratively" document explains that we should think of "what you want, not how to get there". However, in the IMDB sample, the Actor record is slimmed down to it's bare essential - surely, in doing that we are actually suggesting "how"? \\n\\nE.g.\\n//Slim the records down to bare essentials for searching AND joining\\nslim_IMDB_rec := RECORD\\n STRING50 actor;\\n STRING150 movie;\\nEND;\\n\\n\\nDoesn't ECL automatically identify unused fields in records and make appropriate optimisations?\\n\\nIt may be difficult to generalise but in what circumstances, does additional transformations improve performance and when may it hinder performance?\", \"post_time\": \"2012-10-11 09:40:06\" },\n\t{ \"post_id\": 2523, \"topic_id\": 548, \"forum_id\": 8, \"post_subject\": \"Re: Spray using dfuplus ina virtual machine\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nThanks a lot...\\n\\nWas using ECL Watch to upload and download only, just was curious and wanted to know how it works using the DFUPlus. \", \"post_time\": \"2012-10-15 05:29:30\" },\n\t{ \"post_id\": 2510, \"topic_id\": 548, \"forum_id\": 8, \"post_subject\": \"Re: Spray using dfuplus ina virtual machine\", \"username\": \"rtaylor\", \"post_text\": \"ksviswa,\\n\\nThe problem is you're trying to use DFUplus.exe outside of the VM. The error message is indicating that the dafilesrv program has to be running on the landing zone you're trying to spray from.\\n\\nThe VM environment is meant to be a learning tool, not a production tool. Therefore, it does not have all the configuration abilities you would have by configuring a non-VM cluster (even a 1-node "cluster"). That means that you can't configure your host machine (10.242.50.67) as a dropzone (landing zone) to the VM environment.\\n\\nGetting data into and out of the VM environment should be done through ECL Watch. The Upload/download page will allow you to put files on the VM's pre-configured dropzone (and you can use programs like WinSCP if you need to put files > 2Gb there), and then you can use the ECL Watch spray pages to "spray" the data to the Thor cluster.\\n\\nTo fully exercise DFUplus.exe, you'll need to download the Community Edition and configure a non-VM environment to work with.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-10-12 13:40:41\" },\n\t{ \"post_id\": 2507, \"topic_id\": 548, \"forum_id\": 8, \"post_subject\": \"Re: Spray using dfuplus ina virtual machine\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nTried that..Thanks for the help..\\n\\nBut now facing a different error.\\n\\n"Failed: Failed to connect to dafilesrv/daliservix on 10.242.50.67:7100"\\n\\nDont know what could be he reason for the same, is it because of some proxy settings..?\\n\\nKindly help..\\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2012-10-12 05:50:06\" },\n\t{ \"post_id\": 2501, \"topic_id\": 548, \"forum_id\": 8, \"post_subject\": \"Re: Spray using dfuplus ina virtual machine\", \"username\": \"rtaylor\", \"post_text\": \"ksviswa,\\n\\nThis error tells you the problem:\\n
Cluster thor not found
In my VM installations, the name of the Thor cluster has always defaulted to "mythor". Take a look in ECL Watch for your VM environment at the Target Clusters and Cluster Processes pages and you'll see the names of your VM Thor cluster (which should also be "mythor").\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-10-11 13:53:28\" },\n\t{ \"post_id\": 2498, \"topic_id\": 548, \"forum_id\": 8, \"post_subject\": \"Spray using dfuplus ina virtual machine\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nNeed clarification, on how we spray using dfuPlus on a virtual machine.\\n\\nTried the following way,\\n\\n[code][code]D:\\\\HPCC\\\\bin\\\\ver_3_6>dfuplus server=http://192.168.59.129:8010/ username=hpccdemo\\n password=hpccdemo overwrite=1 replicate=1 action=spray srcip=192.168.59.129 src\\nfile=D:test\\\\People_Test dstname=RTTEMP::people.csv dstcluster=thor format=csv\\n\\nSpraying from D:\\\\334054\\\\hpcc on 192.168.59.129 to RTTEMP::people.csv\\nSubmitted WUID D20121011-124403\\nFailed: CDFUfileSpec: Cluster thor not found
\\n\\nBut getting the following error, and unable to create ini file also.\\n\\nNot sure if i am missing something really basic.\\n\\nKindly help.\\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2012-10-11 13:00:50\" },\n\t{ \"post_id\": 2513, \"topic_id\": 549, \"forum_id\": 8, \"post_subject\": \"Re: Parse XML\", \"username\": \"DSC\", \"post_text\": \"Thanks, Bob. I appreciate the kind words.\\n\\nHappy Friday!\\n\\nDan\", \"post_time\": \"2012-10-12 14:14:34\" },\n\t{ \"post_id\": 2511, \"topic_id\": 549, \"forum_id\": 8, \"post_subject\": \"Re: Parse XML\", \"username\": \"bforeman\", \"post_text\": \"Dan,\\n\\nI think that what you are doing is great, I can't think of a better way. \\n\\nI've learned when working with ECL that the compiler does a great job of optimizing your code, and if you are doing something that is less efficient, it may warn you that "this might take a little time to complete" \\n\\nIf you are getting good results and the process is reasonable regarding time, sleep well tonight!
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-10-12 14:11:05\" },\n\t{ \"post_id\": 2503, \"topic_id\": 549, \"forum_id\": 8, \"post_subject\": \"Parse XML\", \"username\": \"DSC\", \"post_text\": \"I have a large number of records with a structure similar to this:\\n\\n
PartRec := RECORD\\n\\tSTRING5\\t\\tuniqueID;\\n\\tUNSIGNED2\\tpartKind;\\n\\tSTRING\\t\\tpartData;\\nEND;
\\n\\npartData contains XML. The desire is to efficiently search these records using XPath. The technique I'm currently using is similar to this:\\n\\nMatchRec := RECORD(PartRec)\\n\\tSTRING\\t\\tmatchingXMLData;\\nEND;\\n\\nMatchRec ExtractMatchingData(PartRec l, STRING xpathToFind) := TRANSFORM\\n\\tSELF.matchingXMLData := XMLTEXT(xpathToFind);\\n\\tSELF := l;\\nEND;\\n\\nfirstPass := PARSE(myRecordSet,partData,ExtractMatchingData(LEFT,xpathToFind + '<>'),XML('Part'));\\nfoundRecords := firstPass(matchingXMLData != '');
\\n\\nxpathToFind is an attribute that will be passed in to Roxie via a SOAP or JSON call.\\n\\nThis works, but is it efficient? Is there a smarter way to extract records matching an unknown XPath?\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-10-11 13:59:21\" },\n\t{ \"post_id\": 2567, \"topic_id\": 550, \"forum_id\": 8, \"post_subject\": \"Re: Updating a superkey\", \"username\": \"DSC\", \"post_text\": \"Since this thread is referenced elsewhere (http://hpccsystems.com/bb/viewtopic.php?f=9&t=559) I thought I'd update it with my current findings. In short, I've created a workaround that is not quite optimal (more later) but mostly works.\\n\\nThe core of the workaround is the following module:\\n\\nIMPORT * FROM Std;\\n\\nEXPORT Manager := MODULE\\n\\t\\n\\t//==========================================================================\\n\\t// Record Definitions\\n\\t//==========================================================================\\n\\t\\n\\tEXPORT\\tRoxieQueryWorkunits := RECORD\\n\\t\\tSTRING\\t\\tworkUnitID;\\n\\t\\tSTRING\\t\\tname;\\n\\tEND;\\n\\t\\n\\tSHARED CommandRec := RECORD\\n\\t\\tSTRING\\t\\tcommand;\\n\\tEND;\\n\\t\\n\\t//==========================================================================\\n\\t// Functions\\n\\t//==========================================================================\\n\\t\\n\\t//--------------------------------------------------------------------------\\n\\t// TODO\\n\\t//--------------------------------------------------------------------------\\n\\tEXPORT\\tCurrentRoxieQueryWUIDList() := FUNCTION\\n\\t\\t// Extract info on the current Roxie queries\\n\\t\\trs := System.Workunit.WorkunitList('',cluster:='roxie');\\n\\t\\t\\n\\t\\t// Pick out the few items we need\\n\\t\\tresult := PROJECT(rs,TRANSFORM(RoxieQueryWorkunits,SELF.workUnitID := LEFT.wuid,SELF.name := LEFT.job));\\n\\t\\t\\t\\n\\t\\tRETURN result;\\n\\tEND;\\n\\t\\n\\t//--------------------------------------------------------------------------\\n\\t// TODO\\n\\t//--------------------------------------------------------------------------\\n\\tSHARED\\tDoPipe(DATASET(CommandRec) rs) := FUNCTION\\n\\t\\tdoNothing := TRUE;\\n\\t\\t\\n\\t\\t// We need the failure handler because all the nodes will be calling\\n\\t\\t// this function but only one will succeed\\n\\t\\taction := OUTPUT(rs,,PIPE('/bin/bash',CSV,REPEAT)) : FAILURE(EVALUATE(doNothing));\\n\\t\\t\\n\\t\\tRETURN action;\\n\\tEND;\\n\\t\\n\\t//--------------------------------------------------------------------------\\n\\t// TODO\\n\\t//--------------------------------------------------------------------------\\n\\tEXPORT\\tDisableRoxieQueries(DATASET(RoxieQueryWorkunits) rs) := FUNCTION\\n\\t\\t// Create a record set of commands that unpublish all the queries;\\n\\t\\t// we're not able to get the actual query name (e.g. "myquery.1") so\\n\\t\\t// we're manually appending a '.1' to the name, and this will break if\\n\\t\\t// there is ever a .2 or something published and active; eclwatchhost\\n\\t\\t// is defined in /etc/hosts.\\n\\t\\tcommands := PROJECT\\t(\\n\\t\\t\\t\\t\\t\\t\\t\\trs,\\n\\t\\t\\t\\t\\t\\t\\t\\tTRANSFORM\\t(\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tCommandRec,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tSELF.command := '/usr/bin/ecl unpublish --server=eclwatchhost myroxie ' + LEFT.name + '.1'\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t)\\n\\t\\t\\t\\t\\t\\t\\t);\\n\\t\\t\\n\\t\\taction := DoPipe(commands);\\n\\t\\t\\n\\t\\tRETURN action;\\n\\tEND;\\n\\t\\n\\t//--------------------------------------------------------------------------\\n\\t// TODO\\n\\t//--------------------------------------------------------------------------\\n\\tEXPORT\\tEnableRoxieQueries(DATASET(RoxieQueryWorkunits) rs) := FUNCTION\\n\\t\\t// Create a record set of commands that publish all the queries;\\n\\t\\t// eclwatchhost is defined in /etc/hosts.\\n\\t\\tcommands := PROJECT\\t(\\n\\t\\t\\t\\t\\t\\t\\t\\trs,\\n\\t\\t\\t\\t\\t\\t\\t\\tTRANSFORM\\t(\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tCommandRec,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tSELF.command := '/usr/bin/ecl publish --server=eclwatchhost --cluster=roxie --activate ' + LEFT.workUnitID\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t)\\n\\t\\t\\t\\t\\t\\t\\t);\\n\\t\\t\\n\\t\\taction := DoPipe(commands);\\n\\t\\t\\n\\t\\tRETURN action;\\n\\tEND;\\n\\t\\nEND; // Manager Module\\n
\\n\\nThe module is called from the following code fragment that modifies two different superkeys (slightly edited):\\n\\nroxieQueryInfo := Queries.Manager.CurrentRoxieQueryWUIDList() : INDEPENDENT;\\n\\nactions := SEQUENTIAL\\t(\\n\\t\\t\\t\\t\\t\\t\\t// Create the index files on disk\\n\\t\\t\\t\\t\\t\\t\\tPARALLEL\\t(\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tBUILD(oneIDX,DISTRIBUTED,OVERWRITE);\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tBUILD(twoIDX,DISTRIBUTED,OVERWRITE);\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t);\\n\\t\\t\\t\\t\\t\\t\\t\\n\\t\\t\\t\\t\\t\\t\\t// Make sure all of our superfiles and superkeys exist\\n\\t\\t\\t\\t\\t\\t\\tEnsureSuperfilesExist();\\n\\t\\t\\t\\t\\t\\t\\t\\n\\t\\t\\t\\t\\t\\t\\t// Disable all queries\\n\\t\\t\\t\\t\\t\\t\\tQueries.Manager.DisableRoxieQueries(roxieQueryInfo);\\n\\t\\t\\t\\t\\t\\t\\t\\n\\t\\t\\t\\t\\t\\t\\t// Start the transaction\\n\\t\\t\\t\\t\\t\\t\\tFile.StartSuperFileTransaction();\\n\\t\\t\\t\\t\\t\\t\\t\\n\\t\\t\\t\\t\\t\\t\\t// Add new indexes to superfile\\n\\t\\t\\t\\t\\t\\t\\tFile.AddSuperFile(kOneSuperkey,oneIndexFileName);\\n\\t\\t\\t\\t\\t\\t\\tFile.AddSuperFile(kTwoSuperkey,twoIndexFileName);\\n\\t\\t\\t\\t\\t\\t\\t\\n\\t\\t\\t\\t\\t\\t\\t// Finalize the transaction\\n\\t\\t\\t\\t\\t\\t\\tFile.FinishSuperFileTransaction();\\n\\t\\t\\t\\t\\t\\t\\t\\n\\t\\t\\t\\t\\t\\t\\t// Enable all queries\\n\\t\\t\\t\\t\\t\\t\\tQueries.Manager.EnableRoxieQueries(roxieQueryInfo);\\n\\t\\t\\t\\t\\t\\t);
\\n\\nIn short, this workaround:\\n\\n1) Gathers a list of all Roxie queries.\\n\\n2) Creates the subkeys that will be appended to the superkeys.\\n\\n3) Deletes the Roxie queries from the query set. This unlocks the superkeys for modification.\\n\\n4) Adds the subkeys to the superkeys.\\n\\n5) Re-adds the Roxie queries previously deleted.\\n\\nThe biggest problem with this implementation is that it deletes the queries. Callers would experience a hard failure while the superfile was being updated. If something besides a deletion would work here, I'd love to hear it (suspending/resuming does not work, BTW).\\n\\nOther problems include 1) deleting and re-adding all queries, not just the ones associated with the affected superkeys (but that's just my implementation); and 2) the inability to determine the exact job name of the published queries (any version beyond .1 would cause a problem).\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-10-19 14:45:45\" },\n\t{ \"post_id\": 2521, \"topic_id\": 550, \"forum_id\": 8, \"post_subject\": \"Re: Updating a superkey\", \"username\": \"sort\", \"post_text\": \"sorry, the ecl-roxie command will be available in 3.10\", \"post_time\": \"2012-10-12 16:04:31\" },\n\t{ \"post_id\": 2520, \"topic_id\": 550, \"forum_id\": 8, \"post_subject\": \"Re: Updating a superkey\", \"username\": \"DSC\", \"post_text\": \"I don't seem to have a 'ecl-roxie' executable. At least, I cannot locate one within /opt/HPCCSystems. Could it be named differently?\\n\\nIf I did have one, though, this would be something I would have to execute on each node using Std.System.Util.CmdProcess(), right? Or is there another way to execute binaries?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-10-12 15:40:27\" },\n\t{ \"post_id\": 2519, \"topic_id\": 550, \"forum_id\": 8, \"post_subject\": \"Re: Updating a superkey\", \"username\": \"sort\", \"post_text\": \"Dan,\\n If you change the contents of your superkey definition in dali and want to get a running roxie to use it, you can try the following command to reload everything without stopping / suspending\\n\\n ecl-roxie reload\\n\\n\\n Support for packages in 3.6.x and 3.8.x is not supported (some things way work). We are working on it for 3.10 and will also be updating the documentation\", \"post_time\": \"2012-10-12 15:32:41\" },\n\t{ \"post_id\": 2515, \"topic_id\": 550, \"forum_id\": 8, \"post_subject\": \"Re: Updating a superkey\", \"username\": \"bforeman\", \"post_text\": \"Hi Dan,\\n\\nThat's unfortunate that Std.System.Util.CmdProcess() is broken, as that's the only way that I know via ECL.\\n\\nLet me check with development to see if there is another way.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-10-12 14:29:07\" },\n\t{ \"post_id\": 2514, \"topic_id\": 550, \"forum_id\": 8, \"post_subject\": \"Re: Updating a superkey\", \"username\": \"DSC\", \"post_text\": \"I did not try to bounce the query with a suspend. I'll try that next time I test an update cycle.\\n\\nHow would one go about triggering query management from within ECL? Execute the ecl command-line application via Std.System.Util.CmdProcess()? (Please tell me that there is another way, as that particular function is broken in 3.8.4.1CE.)\\n\\nIf the command-line tool is used, are you specifically talking about the deactivate/active subcommands?\\n\\nAlso: I believe that Package support was included in 3.6.2CE or something around there. It's the documentation that is missing.\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-10-12 14:20:13\" },\n\t{ \"post_id\": 2512, \"topic_id\": 550, \"forum_id\": 8, \"post_subject\": \"Re: Updating a superkey\", \"username\": \"bforeman\", \"post_text\": \"Dan, the last time I heard, packages support for the Community Edition would be in the next update.\\n\\nInstead of having to delete and re-add the queries, did you try to simply suspend them?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-10-12 14:14:15\" },\n\t{ \"post_id\": 2505, \"topic_id\": 550, \"forum_id\": 8, \"post_subject\": \"Updating a superkey\", \"username\": \"DSC\", \"post_text\": \"I'm still missing something when it comes to updating a superkey that is currently in use by Roxie.\\n\\nI can build the superkey and use it just fine.\\n\\nWhen I create a new subkey and try to add it to the superkey, the process hangs until all deployed Roxie queries that use the superkey are bounced (deleted and re-added).\\n\\nHow can I quickly and automatically make the new data available to Roxie? Are packages the key -- ha! -- here? If so, can someone supply example code?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-10-11 14:40:48\" },\n\t{ \"post_id\": 2532, \"topic_id\": 553, \"forum_id\": 8, \"post_subject\": \"Re: count number of words\", \"username\": \"rtaylor\", \"post_text\": \"arun_s,\\n\\nHere's the way I would approach that problem:import std;\\n\\nCountWordsInPara(STRING txt) := FUNCTION\\n ds := DATASET([{txt}],{STRING para});\\n\\n\\tPATTERN ws := [' ',',','.',';']+;\\n\\tPATTERN Alpha := PATTERN('[A-Za-z]')+;\\n\\tPATTERN Word := Alpha ws;\\n\\n\\toutrec := RECORD\\n\\t\\tSTRING word := MATCHTEXT(Alpha);\\n\\t\\tINTEGER cnt := 1;\\n\\tEND;\\t\\n\\t\\t\\n\\tp := PARSE(ds,para,Word,outrec);\\n\\ts := SORT(p,word);\\n\\toutrec XF(s L,s R) := TRANSFORM\\n\\t\\tSELF.cnt := L.cnt + R.cnt;\\n\\t\\tSELF := L;\\n\\tEND;\\n\\tr := ROLLUP(s,word,XF(LEFT,RIGHT));\\n\\tRETURN sort(r,-cnt);\\t\\nEND;\\t\\n\\nparagraph := record\\n string para;\\nend;\\ndat:=dataset([{'The following discussion applies principally to local sorts, since Thor is the only platform that performs global sorts,and Thor does not provide a choice of algorithms.'}],\\n paragraph);\\n\\t\\t\\t\\t\\t\\t \\nCountWordsInPara(dat[1].para);
HTH,\\n\\nRichard\", \"post_time\": \"2012-10-17 21:06:39\" },\n\t{ \"post_id\": 2530, \"topic_id\": 553, \"forum_id\": 8, \"post_subject\": \"Re: count number of words\", \"username\": \"bforeman\", \"post_text\": \"If you want to calculate the number of times that a word occurs in each paragraph, it would probably be a good idea to first combine your multiple datasets (paragraphs) into a single recordset using a PROJECT (or NORMALIZE), and in the TRANSFORM add a unique ID for each paragraph. Then when you run the cross tab report, the TABLE can be grouped by ID and then the word.\\n\\nHope this helps,\\n\\nBob\", \"post_time\": \"2012-10-17 12:13:13\" },\n\t{ \"post_id\": 2527, \"topic_id\": 553, \"forum_id\": 8, \"post_subject\": \"Re: count number of words\", \"username\": \"arun_S\", \"post_text\": \"thank you Bob,\\ni wanted to find the number of times a word occur in a paragraph..\\ne.g\\nThe 1\\nfollowing 1\\nthor 2\", \"post_time\": \"2012-10-17 06:00:16\" },\n\t{ \"post_id\": 2526, \"topic_id\": 553, \"forum_id\": 8, \"post_subject\": \"Re: cout number of words\", \"username\": \"bforeman\", \"post_text\": \"This also works well in your example:\\n\\noutput(LENGTH(TRIM(dat[1].para,LEFT,RIGHT)) - LENGTH(TRIM(dat[1].para,ALL)) + 1);
\", \"post_time\": \"2012-10-16 15:53:00\" },\n\t{ \"post_id\": 2525, \"topic_id\": 553, \"forum_id\": 8, \"post_subject\": \"Re: cout number of words\", \"username\": \"bforeman\", \"post_text\": \"Have you seen the WordCount function?\\nEXPORT unsigned4 WordCount(string text) := lib_stringlib.StringLib.StringWordCount(text);\\n\\n\\nSTD.Str.WordCount(dat[1].para);
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-10-16 15:41:49\" },\n\t{ \"post_id\": 2524, \"topic_id\": 553, \"forum_id\": 8, \"post_subject\": \"count number of words\", \"username\": \"arun_S\", \"post_text\": \"Hi,\\nI'am trying to count number of times the word occur in a paragraph.\\n\\nimport std;\\nparagraph:=record\\nstring para;\\nend;\\ndat:=dataset([{'The following discussion applies principally to local sorts, since Thor is the only platform that performs global sorts,and Thor does not provide a choice of algorithms.'}],paragraph);\\n\\na:= std.Str.SplitWords(dat[1].para,' ',FALSE);\\nb:=dataset(a,paragraph);\\noutput(b);
\\nafter this will use groupby function to get the number of counts..\\nthis code is working for single dataset,i want to count the words from multiple dataset.\\nI'am new to hpcc plz help me..\\nThanks and Regards,\\nArun S\", \"post_time\": \"2012-10-16 13:13:48\" },\n\t{ \"post_id\": 2700, \"topic_id\": 554, \"forum_id\": 8, \"post_subject\": \"Re: "memory limit exceed" Exception\", \"username\": \"dlingle\", \"post_text\": \"Let me suggest the following to try and reduce the memory limit exceeded issue. If you're able to cast to a string7 instead of a string then I think you would save some memory for each assignment. Give that a shot. Normally if record layouts are defined with fixed length strings instead of just string it may save memory.\", \"post_time\": \"2012-11-06 13:20:14\" },\n\t{ \"post_id\": 2619, \"topic_id\": 554, \"forum_id\": 8, \"post_subject\": \"Re: "memory limit exceed" Exception\", \"username\": \"bforeman\", \"post_text\": \"Thanks for the feedback, I will pass this to the development team.\", \"post_time\": \"2012-10-25 12:16:45\" },\n\t{ \"post_id\": 2613, \"topic_id\": 554, \"forum_id\": 8, \"post_subject\": \"Re: "memory limit exceed" Exception\", \"username\": \"dsun\", \"post_text\": \"Hi,\\n\\nThe problem is that, I'm operating on a large dataset (around 25MM records), and in the Transform of the Aggregate function I explicitly cast all of the REAL4 to STRING, then the memory limit exceed exception.\\n\\nBut if I do not do the cast in the Aggregate Transform, but in the next step, the problem is gone.\\n\\nI'm now use this walk around, hope the information can help find potential problem in the Aggregate.\\n\\nThanks,\", \"post_time\": \"2012-10-25 05:40:20\" },\n\t{ \"post_id\": 2548, \"topic_id\": 554, \"forum_id\": 8, \"post_subject\": \"Re: "memory limit exceed" Exception\", \"username\": \"bforeman\", \"post_text\": \"We need to see a little more to nail this down.\\n\\nA complete slave log from a run of this job might reveal more, as we can't see all the activities from the snippet.\\n\\nAn archive of the query would also be useful.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-10-18 15:17:15\" },\n\t{ \"post_id\": 2531, \"topic_id\": 554, \"forum_id\": 8, \"post_subject\": \"Re: "memory limit exceed" Exception\", \"username\": \"dsun\", \"post_text\": \"Sorry for the mistake, I just try to attach the log, but failed, it always complain the extension is not allowed.\\n\\nThe story is that, in an Aggregation, I do some calculation and then explicitly cast the REAL4 to STRING, like this:\\n\\nSELF.EQUITIES_NET_RETURN := IF(l.Asset_Class = 1, (STRING)(l.ABSOLUTERETURNPCTNET/l.cnt), r.EQUITIES_NET_RETURN);\\n...\\n...\\n
\\n\\nBelow is part of the log:\\n\\n000002B1 2012-10-15 15:45:08 3660 4337 Starting input - activity(diskwrite, 37)\\n000002B2 2012-10-15 15:45:08 3660 4338 Connected to slave 0 of 1 - activity(join, 36)\\n000002B3 2012-10-15 15:45:08 3660 4338 Start Gather - activity(join, 36)\\n000002B4 2012-10-15 15:45:08 3660 4342 CSortTransferServerThread started port 20103\\n000002B5 2012-10-15 15:45:08 3660 4337 JOIN: Starting R then L - activity(join, 36)\\n000002B6 2012-10-15 15:45:08 3660 4337 Starting input - activity(join, 36)\\n000002B7 2012-10-15 15:45:08 3660 4337 Starting input - activity(join, 36)\\n000002B8 2012-10-15 15:45:08 3660 4337 diskread[part=0]: reading physical file '/var/lib/HPCCSystems/mythor/temp/3__w20121015-152121._1_of_1' (logical file = ~spill::3) - activity(diskread, 31)\\n000002B9 2012-10-15 15:45:08 3660 4347 Starting input - activity(aggregate, 35)\\n000002BA 2012-10-15 15:45:08 3660 4337 diskread[part=0]: Base offset to 0 - activity(diskread, 31)\\n000002BB 2012-10-15 15:45:08 3660 4347 GROUP: is local - activity(group, 34)\\n000002BC 2012-10-15 15:45:08 3660 4337 Reading block compressed file: /var/lib/HPCCSystems/mythor/temp/3__w20121015-152121._1_of_1 - activity(diskread, 31)\\n000002BD 2012-10-15 15:45:08 3660 4347 Starting input - activity(group, 34)\\n000002BE 2012-10-15 15:45:08 3660 4347 Starting input - activity(hashdistributemerge, 33)\\n000002BF 2012-10-15 15:45:08 3660 4337 diskread[part=0]: variable (/var/lib/HPCCSystems/mythor/temp/3__w20121015-152121._1_of_1) - activity(diskread, 31)\\n000002C0 2012-10-15 15:45:08 3660 4337 ITDL starting for output 0 - activity(diskread, 31)\\n000002C1 2012-10-15 15:45:08 3660 4347 diskread[part=0]: reading physical file '/var/lib/HPCCSystems/mythor/temp/5__w20121015-152121._1_of_1' (logical file = ~spill::5) - activity(diskread, 32)\\n000002C2 2012-10-15 15:45:08 3660 4347 diskread[part=0]: Base offset to 0 - activity(diskread, 32)\\n000002C3 2012-10-15 15:45:08 3660 4347 Reading block compressed file: /var/lib/HPCCSystems/mythor/temp/5__w20121015-152121._1_of_1 - activity(diskread, 32)\\n000002C4 2012-10-15 15:45:08 3660 4347 diskread[part=0]: variable (/var/lib/HPCCSystems/mythor/temp/5__w20121015-152121._1_of_1) - activity(diskread, 32)\\n000002C5 2012-10-15 15:45:08 3660 4347 ITDL starting for output 0 - activity(diskread, 32)\\n000002C6 2012-10-15 15:45:08 3660 4347 HASHDISTRIB: connect - activity(hashdistributemerge, 33)\\n000002C7 2012-10-15 15:45:08 3660 4347 HASHDISTRIB: connected - activity(hashdistributemerge, 33)\\n000002C8 2012-10-15 15:45:08 3660 4347 ITDL starting for output 0 - activity(hashdistributemerge, 33)\\n000002CA 2012-10-15 15:45:08 3660 4347 ITDL starting for output 0 - activity(group, 34)\\n000002C9 2012-10-15 15:45:08 3660 4350 Distribute send start - activity(hashdistributemerge, 33)\\n000002CB 2012-10-15 15:45:09 3660 4350 Record size (max) = 4096 - activity(diskread, 32)\\n000002CC 2012-10-15 15:45:10 3660 4350 CRowPullDistributor spilling to /var/lib/HPCCSystems/mythor/temp/thtmp3660_28__hashdistspill.tmp - activity(hashdistributemerge, 33)\\n000002CD 2012-10-15 15:45:10 3660 4349 Read loop start - activity(hashdistributemerge, 33)\\n000002CE 2012-10-15 15:45:10 3660 4347 Record size (max) = 4096 - activity(hashdistributemerge, 33)\\n000002CF 2012-10-15 15:45:10 3660 4347 ITDL starting for output 0 - activity(aggregate, 35)\\n000002D0 2012-10-15 15:45:10 3660 4347 Record size (max) = 4096 - activity(group, 34)\\n000002D1 2012-10-15 15:45:10 3660 4347 Record size (max) = 4096 - activity(aggregate, 35)\\n000002D2 2012-10-15 15:45:10 3660 4337 ITDL starting for output 0 - activity(join, 36)\\n000002D3 2012-10-15 15:45:10 3660 4347 RoxieMemMgr: CChunkingRowManager::allocate(size 942815768) allocated new HugeHeaplet size 943718400 - addr=0x7fdb97b00000 pages=900 pageLimit=1497 peakPages=1497 rowMgr=0x1c00bc8\\n000002D4 2012-10-15 15:45:10 3660 4337 Record size (max) = 4096 - activity(diskread, 31)\\n000002D5 2012-10-15 15:45:10 3660 4337 Gather in - activity(join, 36)\\n000002D6 2012-10-15 15:45:10 3660 4337 SORT: Gather - activity(join, 36)\\n000002D7 2012-10-15 15:45:10 3660 4337 SORT: Gather not sorting - activity(join, 36)\\n000002D8 2012-10-15 15:45:15 3660 4347 CThorSpillableRowArray::save 59 rows - activity(join, 36)\\n000002D9 2012-10-15 15:45:33 3660 4347 CThorSpillableRowArray::save done, bytes = 942822682 - activity(join, 36)\\n000002DA 2012-10-15 15:45:33 3660 4347 RoxieMemMgr: CChunkingRowManager::allocate(size 808464129) allocated new HugeHeaplet size 809500672 - addr=0x7fdb9fb00000 pages=772 pageLimit=1497 peakPages=1497 rowMgr=0x1c00bc8\\n000002DB 2012-10-15 15:45:35 3660 4062 SYS: PU= 89% MU= 27% MAL=1578366928 MMP=1578110976 SBK=255952 TOT=1541520K RAM=881088K SWP=253052K\\n000002DC 2012-10-15 15:45:35 3660 4062 DSK: [sda] r/s=55.9 kr/s=4722.7 w/s=81.7 kw/s=40071.6 bsy=83 NIC: rxp/s=437.4 rxk/s=0.0 txp/s=7383.8 txk/s=0.0 CPU: usr=20 sys=23 iow=45 idle=10\\n000002DD 2012-10-15 15:45:36 3660 4347 CThorSpillableRowArray::save 195 rows - activity(join, 36)\\n000002DE 2012-10-15 15:45:52 3660 4347 CThorSpillableRowArray::save done, bytes = 808511393 - activity(join, 36)\\n000002DF 2012-10-15 15:45:52 3660 4347 RoxieMemMgr: CChunkingRowManager::allocate(size 825702436) allocated new HugeHeaplet size 826277888 - addr=0x7fdb9eb00000 pages=788 pageLimit=1497 peakPages=1497 rowMgr=0x1c00bc8\\n000002E0 2012-10-15 15:45:53 3660 4347 RoxieMemMgr: Memory limit exceeded - current 870, requested 817, limit 1497\\n000002E1 2012-10-15 15:45:55 3660 4347 ThorLookaheadCache get exception - activity(join, 36) : Graph[30], aggregate[35]: memory limit exceeded\\n000002E2 2012-10-15 15:45:55 3660 4347 JOIN: RHS input finished, 235 rows read - activity(join, 36)\\n000002E3 2012-10-15 15:45:55 3660 4347 Stopping input for - activity(aggregate, 35)\\n000002E4 2012-10-15 15:45:55 3660 4347 Stopping input for - activity(group, 34)\\n000002E5 2012-10-15 15:45:55 3660 4347 HASHDISTRIB: stopping - activity(hashdistributemerge, 33)\\n000002E6 2012-10-15 15:45:56 3660 4337 1300: /var/jenkins/workspace/CE-Candidate-3.8.4/CE/ubuntu_12_04_x86_64/HPCC-Platform/thorlcr/msort/tsorts.cpp(1206) : **Exception(2) : Graph[30], aggregate[35]: memory limit exceeded\\n000002E7 2012-10-15 15:45:56 3660 4337 activity(diskwrite, 37) : Graph[30], aggregate[35]: memory limit exceeded\\n000002E8 2012-10-15 15:45:56 3660 4063 graph(graph1, 30) : Graph[30], aggregate[35]: memory limit exceeded\\n000002E9 2012-10-15 15:45:56 3660 4063 End of sub-graph - graph(graph1, 30)\\n000002EA 2012-10-15 15:45:58 3660 3660 GraphAbort: W20121015-152121graph1\\n000002EB 2012-10-15 15:46:02 3660 3660 GraphAbort: W20121015-152121graph1\\n000002EC 2012-10-15 15:46:04 3660 3660 GraphAbort: W20121015-152121graph1\\n000002ED 2012-10-15 15:46:06 3660 3660 GraphAbort: W20121015-152121graph1\\n000002EE 2012-10-15 15:46:08 3660 3660 Abort condition set - activity(join, 36)\\n000002EF 2012-10-15 15:46:08 3660 3660 Abort condition set - activity(diskread, 31)\\n000002F0 2012-10-15 15:46:08 3660 3660 Abort condition set - activity(aggregate, 35)\\n000002F1 2012-10-15 15:46:08 3660 3660 Abort condition set - activity(group, 34)\\n000002F2 2012-10-15 15:46:08 3660 3660 Abort condition set - activity(hashdistributemerge, 33)\\n000002F3 2012-10-15 15:46:08 3660 3660 Abort condition set - activity(diskread, 32)\\n000002F4 2012-10-15 15:46:08 3660 4063 Watchdog: Stop Job 30\\n
\\n\\nThanks a lot,\", \"post_time\": \"2012-10-17 12:41:55\" },\n\t{ \"post_id\": 2529, \"topic_id\": 554, \"forum_id\": 8, \"post_subject\": \"Re: "memory limit exceed" Exception\", \"username\": \"bforeman\", \"post_text\": \"Dongliang,\\n\\nYou said the log file was attached, but I do not see it.\\nAlso, it might be helpful to post the code that is producing this error with a brief description of what you are trying to do.\\n\\nThanks!\\n\\nBob\", \"post_time\": \"2012-10-17 11:59:26\" },\n\t{ \"post_id\": 2528, \"topic_id\": 554, \"forum_id\": 8, \"post_subject\": \""memory limit exceed" Exception\", \"username\": \"dsun\", \"post_text\": \"Hi, \\n\\nI got below Exception when I run the ecl on Thor:\\n\\nException(2) : Graph[30], aggregate[35]: memory limit exceeded\\n
\\n\\nHow does it happen? I think there may be some configuration need to modify, which parameters should I update?\\n\\nSome log file is attached.\\n\\nThanks,\\nDongliang\", \"post_time\": \"2012-10-17 10:25:05\" },\n\t{ \"post_id\": 2546, \"topic_id\": 558, \"forum_id\": 8, \"post_subject\": \"Re: Output a file to CSV\", \"username\": \"rtaylor\", \"post_text\": \"A standard mechanism in CSV files it to surround fields whose data may contain record or field delimiters with quotes. That way the system knows to ignore the delimiters it finds inside those fields.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-10-18 14:01:11\" },\n\t{ \"post_id\": 2543, \"topic_id\": 558, \"forum_id\": 8, \"post_subject\": \"Output a file to CSV\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nI have a scenario where I output the record set to a CSV format.\\n\\nConsider an Example :\\n\\nSample Output :\\n\\n1001 , <abc> , John, Florida\\n <id>1</id>\\n </abc>\\n\\n\\n2 , <abc> , Jane, Texas\\n <id>1</id>\\n </abc>
\\n\\nThe 2nd column in the above data is an xml that has line feeds within it (ie spans many lines)\\n\\nI try to output the recordset to a csv format file such as this:\\n\\noutput (data, , '~thor::in::output.dat', CSV );
\\n\\nThis shows the output as:\\n1001 , <abc>
\\n\\nI am assuming this is the case because the default terminator is \\\\n and since ECL encounters line feeds within the xml, it treats the rest of the XML as a separate line of output.\\n\\nPlease suggest how to distinguish between records with the new line but to ignore newline within the given XML.\\n\\n\\nThanks and Regards,\\nViswa\", \"post_time\": \"2012-10-18 13:31:49\" },\n\t{ \"post_id\": 2584, \"topic_id\": 560, \"forum_id\": 8, \"post_subject\": \"Re: Output to cluster not spraying uniformly\", \"username\": \"rtaylor\", \"post_text\": \"Arun,\\n\\nGlad to be of help,\\n\\nRichard\", \"post_time\": \"2012-10-21 19:38:01\" },\n\t{ \"post_id\": 2577, \"topic_id\": 560, \"forum_id\": 8, \"post_subject\": \"Re: Output to cluster not spraying uniformly\", \"username\": \"arunarav\", \"post_text\": \"Richard, \\n\\nYes I was generating randomized data rather than spraying 8GB. \\n\\nI followed the example code as pointed out by you in ECLProgrammersGuide and was successful in getting a pretty even distribution across nodes. My mistake - I was doing something silly with the way I was invoking DISTRIBUTE. Many thanks for your help.\\n\\nRegards\\nArun\", \"post_time\": \"2012-10-20 21:24:11\" },\n\t{ \"post_id\": 2563, \"topic_id\": 560, \"forum_id\": 8, \"post_subject\": \"Re: Output to cluster not spraying uniformly\", \"username\": \"rtaylor\", \"post_text\": \"Arun,\\n\\nIs the 8Gb data file generated by your code, or was the base data sprayed in? \\n\\nI ask because the only way I can think of to duplicate your result is to start with an inline dataset as a base (which is always on only a single node to begin with) and use NORMALIZE to generate a bunch of "garbage" data, then write that to disk without first doing a DISTRIBUTE somewhere in the process to get all the nodes doing some work and having some data.\\n\\nTake a look at the "Creating Example Data" article in the Programmer's Guide. At the bottom of page 8 I discuss this exact issue.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-10-19 13:28:53\" },\n\t{ \"post_id\": 2562, \"topic_id\": 560, \"forum_id\": 8, \"post_subject\": \"Re: Output to cluster not spraying uniformly\", \"username\": \"bforeman\", \"post_text\": \"We tested your report on two separate AWS configurations and cannot reproduce.\\nCheck your ECL code, are you targeting THOR or HTHOR?\", \"post_time\": \"2012-10-19 13:22:26\" },\n\t{ \"post_id\": 2560, \"topic_id\": 560, \"forum_id\": 8, \"post_subject\": \"Re: Output to cluster not spraying uniformly\", \"username\": \"bforeman\", \"post_text\": \"Yes, working on it, and will let you know if I can reproduce.\", \"post_time\": \"2012-10-19 12:23:39\" },\n\t{ \"post_id\": 2558, \"topic_id\": 560, \"forum_id\": 8, \"post_subject\": \"Re: Output to cluster not spraying uniformly\", \"username\": \"arunarav\", \"post_text\": \"Bob,\\n\\nUnfortunately, I don't have access to another cluster (other than AWS). Could you kindly help with replicating the issue on AWS? \\n\\nRegards\\nArun\", \"post_time\": \"2012-10-19 12:22:33\" },\n\t{ \"post_id\": 2556, \"topic_id\": 560, \"forum_id\": 8, \"post_subject\": \"Re: Output to cluster not spraying uniformly\", \"username\": \"bforeman\", \"post_text\": \"Hi Arun,\\n\\nDo you have access to another cluster besides the AWS? I can't duplicate this on a standard cluster, I'm just trying to verify if this issue is isolated to the one-click configuration.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-10-19 12:18:26\" },\n\t{ \"post_id\": 2552, \"topic_id\": 560, \"forum_id\": 8, \"post_subject\": \"Output to cluster not spraying uniformly\", \"username\": \"arunarav\", \"post_text\": \"I am working on a 5 node cluster on Amazon AWS (One click Thor).\\n\\nI have a 8GB file that I write to the file system using the regular OUTPUT command as follows:\\n\\n\\n\\noutput (dbNormalize, , '~thor::in::output.dat', CSV, CLUSTER('mythor') ,OVERWRITE );
\\n\\n\\nThe DFUQuery page accordingly shows the presence of the 8GB logical file:\\n\\nLogical Name\\t Description\\t Size\\t Records\\tModified (UTC/GMT)\\tOwner\\tCluster\\tParts\\n\\t\\t\\t\\nthor::in::output.dat\\t\\t8,220,094,150\\t100,000\\t2012-10-19 09:59:47\\thpccdemo\\tmythor\\t5
\\n\\nHowever, upon checking the file system of the individual nodes in the cluster (via SSH), the DALI server which also happens to be the drop zone, seems to have the file in its entirety:\\n\\nubuntu@ip-10-244-32-85:/mnt/var/lib/HPCCSystems/hpcc-data/thor/thor/in$ ls -lt\\ntotal 8035392\\n-rw-r--r-- 1 hpcc hpcc 8220090286 2012-10-19 09:27 output.dat._1_of_5
\\n\\n\\nThe other nodes have zero size files:\\n\\n\\nubuntu@ip-10-244-134-250:/mnt/var/lib/HPCCSystems/hpcc-data/thor/thor/in$ ls -lttotal 4\\n-rw-r--r-- 1 hpcc hpcc 0 2012-10-19 09:46 output.dat._3_of_5
\\n\\n\\n\\n\\nIf I manually de-spray and spray again using ECLWatch, the 8GB file gets split uniformly into 5 equal parts:\\n\\nubuntu@ip-10-244-134-250:/mnt/var/lib/HPCCSystems/hpcc-data/thor/thor/in$ ls -lttotal 160716\\n-rw-r--r-- 1 hpcc hpcc 164399988 2012-10-19 09:55 output.dat._3_of_5
\\n\\nI tried using the default version of OUTPUT (without the optional CLUSTER option) - both don't help.\\n\\nI've also used the DISTRIBUTE command prior to invoking the OUTPUT in the following versions and both don't help:\\n\\nDISTRIBUTE(dbNormalize, RANDOM());\\nDISTRIBUTE(dbNormalize);
\\n\\nPlease indicate how to change the OUTPUT statement to achieve uniform distribution of the file across all 5 nodes. \\n\\nRegards\\nArun\", \"post_time\": \"2012-10-19 10:26:45\" },\n\t{ \"post_id\": 2581, \"topic_id\": 561, \"forum_id\": 8, \"post_subject\": \"Re: Natural order of index files\", \"username\": \"DSC\", \"post_text\": \"Ah, I hadn't thought of that. I always thought of STEPPED as 'digging into' an index, not enforcing its overall ordering. Nice!\\n\\nDan\", \"post_time\": \"2012-10-21 14:02:20\" },\n\t{ \"post_id\": 2579, \"topic_id\": 561, \"forum_id\": 8, \"post_subject\": \"Re: Natural order of index files\", \"username\": \"dabayliss\", \"post_text\": \"If you want to ensure the order of returned you need to use the STEPPED function\", \"post_time\": \"2012-10-21 00:10:34\" },\n\t{ \"post_id\": 2553, \"topic_id\": 561, \"forum_id\": 8, \"post_subject\": \"Natural order of index files\", \"username\": \"DSC\", \"post_text\": \"If I have a superkey whose subkeys have been built using DISTRIBUTED, then access (filter against) that superkey as a dataset, do the records naturally come out in sorted order?\\n\\nI'm asking because in one scenario I have, I'm doing the above and then running the result through a local ROLLUP, which requires that the recordset be in sorted order. It would be helpful to skip the sort if possible.\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-10-19 11:36:27\" },\n\t{ \"post_id\": 2634, \"topic_id\": 562, \"forum_id\": 8, \"post_subject\": \"Re: JOIN options LOCAL versus HASH\", \"username\": \"dabayliss\", \"post_text\": \"There almost certainly isn't; however it is an error to rely upon that.\\n\\nIf you WANT it explicitly distributed and are relying upon that; then you should say so using distribute.\\n\\nIf you want the JOIN to 'do its thing' and are not reliant upon the distribution; then use JOIN,HASH\\n\\nDavid\", \"post_time\": \"2012-10-26 14:53:09\" },\n\t{ \"post_id\": 2633, \"topic_id\": 562, \"forum_id\": 8, \"post_subject\": \"Re: JOIN options LOCAL versus HASH\", \"username\": \"oleg\", \"post_text\": \"[quote="dabayliss":sf8ns8dn]Both global join forms implicitly redistribute the data. In the 'unqualified' case this is by a fairly sophisticated algorithm that tries to balance data equally across the nodes. In the HASH case it is via a hash function on the fixed (equivalence) portion of the join condition\\nYes, that exactly how I understood, but my question was about the second case - if it will distribute joining datasets via hash function, does it mean it is exactly the same as to explicitly call DISTRIBUTE on join condition, i.e. is there any difference at all between two following code snippets:\\n\\n\\nDISTRIBUTE(d1, HASH(join_condition) ;\\nDISTRIBUTE(d2, HASH(join_condition) ;\\nJOIN(d1,d2,join_condition);\\n
\\n\\n\\nJOIN(d1,d2,join_condition, HASH);\\n
\", \"post_time\": \"2012-10-26 14:44:13\" },\n\t{ \"post_id\": 2632, \"topic_id\": 562, \"forum_id\": 8, \"post_subject\": \"Re: JOIN options LOCAL versus HASH\", \"username\": \"dabayliss\", \"post_text\": \"Both global join forms implicitly redistribute the data. In the 'unqualified' case this is by a fairly sophisticated algorithm that tries to balance data equally across the nodes. In the HASH case it is via a hash function on the fixed (equivalence) portion of the join condition\", \"post_time\": \"2012-10-26 14:13:04\" },\n\t{ \"post_id\": 2630, \"topic_id\": 562, \"forum_id\": 8, \"post_subject\": \"Re: JOIN options LOCAL versus HASH\", \"username\": \"oleg\", \"post_text\": \"[quote="dabayliss":3apsmiyf]\\n....\\nJOIN,HASH is then a hint to the global join that a 'clever' distribution is not required, a hash function will cause a uniform distribution.\\n\\nSo, does it mean that in this case JOIN function will implicitly do hash distribution for both datasets?\", \"post_time\": \"2012-10-26 12:58:18\" },\n\t{ \"post_id\": 2580, \"topic_id\": 562, \"forum_id\": 8, \"post_subject\": \"Re: JOIN options LOCAL versus HASH\", \"username\": \"DSC\", \"post_text\": \"Thanks for the clarification!\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-10-21 13:59:21\" },\n\t{ \"post_id\": 2578, \"topic_id\": 562, \"forum_id\": 8, \"post_subject\": \"Re: JOIN options LOCAL versus HASH\", \"username\": \"dabayliss\", \"post_text\": \",LOCAL presumes that the data has been previously distributed in some manner. It can be useful to do\\n\\nDISTRIBUTE(,bysomething)\\n\\nand a -series- of ,LOCAL operations\\n\\nJOIN,LOCAL should be thought of as changing the semantic of the join to only consider local data.\\n\\nJOIN (no options) - is then a global join where they system does some pre-work to distribute the optimal split points or partitions.\\n\\nJOIN,HASH is then a hint to the global join that a 'clever' distribution is not required, a hash function will cause a uniform distribution.\", \"post_time\": \"2012-10-21 00:07:57\" },\n\t{ \"post_id\": 2566, \"topic_id\": 562, \"forum_id\": 8, \"post_subject\": \"Re: JOIN options LOCAL versus HASH\", \"username\": \"DSC\", \"post_text\": \"Thanks, Bob. Looking forward to the clarification.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-10-19 14:34:12\" },\n\t{ \"post_id\": 2565, \"topic_id\": 562, \"forum_id\": 8, \"post_subject\": \"Re: JOIN options LOCAL versus HASH\", \"username\": \"bforeman\", \"post_text\": \"Hi Dan,\\n\\nI think that HASH simply operates locally without a prior DISTRIBUTE (it says "Implicitly distributed") where LOCAL looks for a prior DISTRIBUTE. Verifying with development now.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-10-19 14:03:11\" },\n\t{ \"post_id\": 2554, \"topic_id\": 562, \"forum_id\": 8, \"post_subject\": \"JOIN options LOCAL versus HASH\", \"username\": \"DSC\", \"post_text\": \"What is the practical difference between LOCAL and HASH options in the JOIN() function? From the language reference manual:\\n\\nLOCAL: Specifies the operation is performed on each supercomputer node independently, without requiring interaction with all other nodes to acquire data; the operation maintains the distribution of any previous DISTRIBUTE.\\n\\nHASH: Specifies implicit distribution of the leftrecset and rightrecset across the supercomputer nodes so each node can do its job with local data.\\n\\nThe only thing that jumps out at me is that HASH may not preserve distribution, but only because that isn't explicitly mentioned in the description. The two options sound roughly the same to me.\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-10-19 11:51:30\" },\n\t{ \"post_id\": 2600, \"topic_id\": 565, \"forum_id\": 8, \"post_subject\": \"Re: PATTERN unstable ?\", \"username\": \"rtaylor\", \"post_text\": \"JM,\\n\\nIt appears to me as though your obviously extensive knowledge of regular expressions might be getting in the way here. ECL has a much less arcane approach to parsing. IOW, I think there's a reason I've never seen the ECL PATTERN function used to define a regular expression that was not simply a character set within square brackets.\\n\\nYour comment parsing example in your last post would try to use the PATTERN function when it is not necessary (and, in fact, won't work in ECL). Here's my more "ECLish" way of accomplishing it:// PATTERN BENGINC := PATTERN('/*');\\n// PATTERN ENDC := PATTERN('*/'); //gets "illegal pattern" error\\n\\nPATTERN StartC := '/*';\\nPATTERN EndC := '*/'; \\nPATTERN Txt := ANY+;\\nPATTERN Comment := StartC Txt EndC;\\n\\nds := DATASET([{'/*This is a comment*/'},\\n {'/*Another comment*/'}],{STRING line});\\nr := RECORD\\n STRING txt := MATCHTEXT(Txt); \\nEND;\\nP1 := PARSE(ds,line,Comment,r);\\nP1;
Trying to use "PATTERN('*/')" produces an "illegal pattern" error from the syntax checker. The PATTERN function is meant to supplement the ECL parsing syntax, not replace it, so when you need a string constant in your pattern (like 'abc') you can simply use the constant itself without using the PATTERN function.\\n\\nIn fact, you could make the code this simple:PATTERN Txt := ANY+;\\nPATTERN Comment := '/*' Txt '*/';\\n\\nds := DATASET([{'/*This is the comment*/'},\\n {'/*Another comment*/'}],{STRING line});\\n\\nr := RECORD\\n STRING txt := MATCHTEXT(Txt); \\nEND;\\nP1 := PARSE(ds,line,Comment,r);\\nP1;
And the only reason to define "Txt" separately is to have a name to reference in the MATCHTEXT function.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-10-23 15:01:11\" },\n\t{ \"post_id\": 2596, \"topic_id\": 565, \"forum_id\": 8, \"post_subject\": \"Re: PATTERN unstable ?\", \"username\": \"ideal\", \"post_text\": \"Hello Richard,\\n\\nThere is a work-arround with REGEXPREPLACE function, since it is only a simple preprocessing to do. Anyway, PATTERN function should work correctly.\\n\\nPATTERN calls split makes more readable grammar. \\n\\nFor example \\nBENGINC := PATTERN('/*')\\nENDC := PATTERN('*/')\\nTEXTC := PATTERN('(([*][^/])|[^*])*'); // it means everything but not "*/"\\nCOMMENT := BEGINC TEXTE ENDC\\n\\n'[abc]' and 'abc' is not the same : '[abc]' represents the character a,or b, or c, when 'abc' represents the string "abc".\\n\\nJM.\", \"post_time\": \"2012-10-23 13:03:29\" },\n\t{ \"post_id\": 2570, \"topic_id\": 565, \"forum_id\": 8, \"post_subject\": \"Re: PATTERN unstable ?\", \"username\": \"JimD\", \"post_text\": \"Thanks for reporting the display issue!\\n\\nWe are working on the Language Reference HTML formatting issue for the ParsePattern Definitions topic.\\n\\nWe have located the the cause, and now only have to find a solution \\nOften times, finding the cause is the hardest part. We entered an issue in Jira to keep track of this. \\nhttp://track.hpccsystems.com/browse/HPCC-8119\", \"post_time\": \"2012-10-19 16:08:49\" },\n\t{ \"post_id\": 2564, \"topic_id\": 565, \"forum_id\": 8, \"post_subject\": \"Re: PATTERN unstable ?\", \"username\": \"rtaylor\", \"post_text\": \"JM,
First, I suggest you could fix documentation online : "ParsePattern Definitions" chapter is not readable as lines are cut in the middle from the title to the end.
I see what you mean. We will see what we can do to fix that in the HTML version. Until we do, I suggest that your better resource for the Language Reference would be the PDF or the compiled help file (press F1 in the ECL IDE to bring that up) -- neither of which have these formatting issues.\\n\\nSecondly and this is the main point, it seems PATTERN function is not working properly. \\nFor instance, in a standard rule body, \\n\\nPATTERN('abcde') \\nis recognized when \\nCODE: SELECT ALL\\nPATTERN('a') PATTERN('bcde')\\nis not !!!
OK, so I can fully understand exactly what you're talking about, which of these three is your code:PATTERN a := 'abcde';\\nPATTERN b := 'a' 'bcde';\\n\\nPATTERN a := PATTERN('abcde');\\nPATTERN b := PATTERN('a') PATTERN('bcde');\\n\\nPATTERN a := PATTERN('[abcde]');\\nPATTERN b := PATTERN('[a]') PATTERN('[bcde]');\\n
I would expect the first and third versions to work correctly, but I would not necessarily expect the middle one to do so, since the PATTERN function specifies a perl-standard regular expression, and I've never seen one used in ECL that was not enclosed in square brackets (but I'm always open to learning more -- I am not the "world's foremost expert" on regular expressions and parsing).\\n\\nRichard\", \"post_time\": \"2012-10-19 13:43:02\" },\n\t{ \"post_id\": 2561, \"topic_id\": 565, \"forum_id\": 8, \"post_subject\": \"PATTERN unstable ?\", \"username\": \"ideal\", \"post_text\": \"Hello,\\n\\nI am really struggling with ECL, trying to use parsing support functions but it seems to be buggy.\\n\\nFirst, I suggest you could fix documentation online : "ParsePattern Definitions" chapter is not readable as lines are cut in the middle from the title to the end.\\n\\nSecondly and this is the main point, it seems PATTERN function is not working properly. \\nFor instance, in a standard rule body, \\nPATTERN('abcde') \\n
is recognized when \\nPATTERN('a') PATTERN('bcde')
is not !!!\\n\\nOff course, this is a simplified version of more complex issue. But logically speaking, even like this, it should work because this is the same request. Did I missed something implicit in PATTERN mechanism ?\\n\\nThanks for your support,\\nJM.\", \"post_time\": \"2012-10-19 13:09:52\" },\n\t{ \"post_id\": 2598, \"topic_id\": 566, \"forum_id\": 8, \"post_subject\": \"Re: Index Error\", \"username\": \"rtaylor\", \"post_text\": \"ksviswa,\\n\\nHere's the way I would write that code:rec1 := RECORD\\n string10 id;\\n string xml; \\nEND;\\n\\nsamIndex := DATASET('~RTTEMP::SampleData::FetchXml',{rec1 ,UNSIGNED8 RecPtr {virtual(fileposition)}},FLAT);\\nsamIdx1 := INDEX(samIndex ,{id,RecPtr},{samIndex},'~RTTEMP::SampleData::FetchXml');\\n\\nEXPORT TestQuery := FUNCTION\\n STRING10 id_test:= '' : STORED('id_value');\\n resultSet := FETCH(samIndex ,samIdx1(id=id_test),RIGHT.RecPtr);\\n RETURN resultSet;\\nEND;\\t
The name of the file containing this code must be "TestQuery.ecl" (because the EXPORT definition name and the filename must always match). Try it this way and see what happens.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-10-23 13:56:36\" },\n\t{ \"post_id\": 2589, \"topic_id\": 566, \"forum_id\": 8, \"post_subject\": \"Re: Index Error\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nJust trying to index based on id and search the same in roxie.\\n\\nThe same query after i refactored the file names ran properly in thor but not in roxie, where i specify a given id and the corresponding xml is fetched.\\n\\nI get this "Unknown query Error" only in roxie. Anything in particular to be done based on target clusters.\\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2012-10-22 19:25:43\" },\n\t{ \"post_id\": 2582, \"topic_id\": 566, \"forum_id\": 8, \"post_subject\": \"Re: Index Error\", \"username\": \"rtaylor\", \"post_text\": \"ksviswa,I did re run and rebuild with the changed names, now i encounter a different error..
Which indicates that re-running the index builds did work to clear that naming problem."Message: Unknown query TestData"\\n\\nIs it because the index is not created properly, i encounter these errors..
No, because this is a completely different error, which is telling you that the system can't find the "TestData" query. \\n\\nWhat exactly are you doing that makes this one occur?\\n\\nRichard\", \"post_time\": \"2012-10-21 19:32:10\" },\n\t{ \"post_id\": 2575, \"topic_id\": 566, \"forum_id\": 8, \"post_subject\": \"Re: Index Error\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nI did re run and rebuild with the changed names, now i encounter a different error..\\n\\n"Message: Unknown query TestData"\\n\\nIs it because the index is not created properly, i encounter these errors..?\\n\\nThanks in advance..\\n\\nRegards,\\nksviswa\", \"post_time\": \"2012-10-20 05:15:58\" },\n\t{ \"post_id\": 2574, \"topic_id\": 566, \"forum_id\": 8, \"post_subject\": \"Re: Index Error\", \"username\": \"rtaylor\", \"post_text\": \""Message: Query FetchData.1 is suspended because Could not resolve filename RTTEMP::SampleData::FetchXml"
This error indicates the file can't be found using the name you supplied.\\nTried using ~ (leading tildes) for the file names, still the same error exists
After changing the code, did you re-run everything to re-build files with the changed names? If not, then the files were still mis-named.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-10-19 20:16:09\" },\n\t{ \"post_id\": 2573, \"topic_id\": 566, \"forum_id\": 8, \"post_subject\": \"Re: Index Error\", \"username\": \"ksviswa\", \"post_text\": \"Hi Richard,\\n\\nTried using ~ (leading tildes) for the file names, still the same error exists. \\n\\nRegards,\\nksviswa\", \"post_time\": \"2012-10-19 19:42:59\" },\n\t{ \"post_id\": 2571, \"topic_id\": 566, \"forum_id\": 8, \"post_subject\": \"Re: Index Error\", \"username\": \"rtaylor\", \"post_text\": \"ksviswa,\\n\\nWhen you look at the tutorial code again, you will note that every time a file is named in the doc, the filename string always begins with a leading tilde (~) -- and I don't see leading tildes in your code. So I would suggest adding them and re-running everything.\\n\\nThe purpose of that leading tilde is discussed here: http://hpccsystems.com/community/docs/ecl-language-reference/html/scope-and-logical-filenames\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-10-19 16:40:19\" },\n\t{ \"post_id\": 2569, \"topic_id\": 566, \"forum_id\": 8, \"post_subject\": \"Index Error\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nI am creating an index and publishing the same in Roxie.\\n\\nBut while querying in roxie, get the following error :\\n\\n"Message: Query FetchData.1 is suspended because Could not resolve filename RTTEMP::SampleData::FetchXml"\\n\\nSample Code :\\n\\n
rec1 := RECORD\\n string10 id;\\n\\tstring xml; \\nEND;\\n\\nsampleOut:= TABLE(data1,rec1);\\nsampleOut2 := OUTPUT(sampleOut,,RTTEMP::SampleData::FetchXml');\\n\\nsamIndex := DATASET('RTTEMP::SampleData::FetchXml',{rec1 ,UNSIGNED8 RecPtr {virtual(fileposition)}},FLAT);\\n\\nsamIdx1 := INDEX(samIndex ,{id,RecPtr},{xml},'RTTEMP::SampleData::FetchXml');\\n\\nSTRING10 id_test:= '' : STORED('id_value');\\nresultSet := FETCH(samIndex ,samIdx1(id=id_test),RIGHT.RecPtr);\\nbldIndex := BUILDINDEX(samIdx1);\\nresultSet;
\\n\\nGiven a ID, i need to fetch the xml.\\n\\nTried publishing without a stored value,in that case i could fetch all the details , But in case to fetch a particular xml based on the id, get the following error.\\n\\nI think i am doing some very silly mistake.. , tried the same way as "Tutorial Person Example"\\n\\nKindly help.\\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2012-10-19 15:55:08\" },\n\t{ \"post_id\": 2674, \"topic_id\": 569, \"forum_id\": 8, \"post_subject\": \"Re: Use of TABLE Functionality\", \"username\": \"Pradeep\", \"post_text\": \"Dan,\\n\\nThanks for the details!!
\\n\\nI'll try out this XSLT transform.\\n\\n\\n\\nThank You,\\nPradeep\", \"post_time\": \"2012-11-01 06:30:10\" },\n\t{ \"post_id\": 2649, \"topic_id\": 569, \"forum_id\": 8, \"post_subject\": \"Re: Use of TABLE Functionality\", \"username\": \"DSC\", \"post_text\": \"XSLT stands for "XSL Transformations" and it's a technology unrelated to HPCC. Basically, it allows you to transform an XML document into something else, with quite a bit of control over the process. That "something else" can be anything, not just another XML document. My suggestion is simply to make that "anything" an ECL file containing your RECORD and TABLE declarations, based on the values found in the XML document. Once you write the XSLT transform (which is a file) you would pass it and your XML document to something like xsltproc to create the ECL file. Integrating that ECL file, so that it's actually usable in your workflow, depends more on your workflow than anything else. At any rate, I would suggest searching the 'net for XSLT for more information on that technology. It's pretty extensive.\\n\\nRichard's suggestion is spot-on, though. Get started by modifying the ECL manually. Once you get it working the way you want, you'll then be in a good place to try to automate the process as you'll know exactly what the XSLT needs to produce.\\n\\nHope this helps.\\n\\nDan\", \"post_time\": \"2012-10-29 14:36:32\" },\n\t{ \"post_id\": 2648, \"topic_id\": 569, \"forum_id\": 8, \"post_subject\": \"Re: Use of TABLE Functionality\", \"username\": \"Pradeep\", \"post_text\": \"Dan,\\n\\n
If you feel like this process really must be automated further, consider using XSLT to translate the XML into ECL
\\n\\nI didn't understand this use of XSLT to translate the xml into ECL?\\n\\nCan you explain in detail a bit, I'm new to this ECL and i dont know \\nall the concepts? \\n\\nIf u see previous posts, u'll come to know that i'm not able to use the xml, the way i want!!\\n\\nThank You,\\nPradeep\", \"post_time\": \"2012-10-29 14:26:23\" },\n\t{ \"post_id\": 2646, \"topic_id\": 569, \"forum_id\": 8, \"post_subject\": \"Re: Use of TABLE Functionality\", \"username\": \"Pradeep\", \"post_text\": \"Richard, \\n\\n
Since the frequency is only once a Month, then I suggest that you simply change the ECL code at the same time that you update the XML file
\\n\\nYou mean to say, i should use ECL File only, should not try xml again? \\n\\n\\nThank You,\\nPradeep\", \"post_time\": \"2012-10-29 14:11:03\" },\n\t{ \"post_id\": 2644, \"topic_id\": 569, \"forum_id\": 8, \"post_subject\": \"Re: Use of TABLE Functionality\", \"username\": \"DSC\", \"post_text\": \"If you feel like this process really must be automated further, consider using XSLT to translate the XML into ECL. For most XML, that should be a fairly simple translation.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-10-29 00:13:08\" },\n\t{ \"post_id\": 2642, \"topic_id\": 569, \"forum_id\": 8, \"post_subject\": \"Re: Use of TABLE Functionality\", \"username\": \"rtaylor\", \"post_text\": \"Pradeep,
Frequency of fields getting changed is Monthly!!\\n\\nYes it must be an external xml file. \\nBut why an xml file? for this right now i dont've answer.
Since the frequency is only once a Month, then I suggest that you simply change the ECL code at the same time that you update the XML file (presumably this XML file will be used by some other system) -- that way you satisfy both requirements.\\n\\nRichard\", \"post_time\": \"2012-10-28 14:05:28\" },\n\t{ \"post_id\": 2641, \"topic_id\": 569, \"forum_id\": 8, \"post_subject\": \"Re: Use of TABLE Functionality\", \"username\": \"Pradeep\", \"post_text\": \"Richard,\\n SetFldList is a local definition, but in a production code-base I would make it a separate EXPORT definition (a single .ecl file), maintainable as easily as an external XML file:\\n
\\nMaking separate ecl file and then exporting definition is fine, i've tried previously\\nand it works.\\n\\nFrequency of fields getting changed is Monthly!!\\n\\nYes it must be an external xml file. \\nBut why an xml file? for this right now i dont've answer. \\n\\nThank You,\\nPradeep\", \"post_time\": \"2012-10-28 11:26:42\" },\n\t{ \"post_id\": 2631, \"topic_id\": 569, \"forum_id\": 8, \"post_subject\": \"Re: Use of TABLE Functionality\", \"username\": \"rtaylor\", \"post_text\": \"Pradeep,
Now the requirement is like, i should not hard code fields of interest. They should come from an external file like XML because fields will change, so i tried this Functionmacro and #expand. Passing constant string 'a, b, c' to macro is same like mentioning fields in TABLE(recordset, {field1, field2, filed3}) which we dont want.
OK, I understand the issue -- you want to be able to change the set of interesting fields and do it in just one place. But what I'm not getting from your description is exactly how often those fields will change. Will that be daily? Hourly? Monthly?\\n\\nI can understand your desire to have a single place to update, whenever you want to change the fields. But unless that "single place to update" is used by multiple platforms I see absolutely no reason why it must be an external XML file. Updating a single ECL definition file is exactly the same amount of work as updating the content of an XML file -- they are both, after all, simply text files.\\n\\nIn this example, SetFldList is a local definition, but in a production code-base I would make it a separate EXPORT definition (a single .ecl file), maintainable as easily as an external XML file:\\nRec := RECORD\\n UNSIGNED1 ID;\\n STRING1 F1;\\n STRING1 F2;\\n STRING1 F3;\\n STRING1 F4;\\nEND;\\nDS := DATASET([{1,'A','B','C','D'},\\n {2,'E','F','G','H'},\\n {3,'I','J','K','L'}],Rec);\\nDSflds := DATASET([{'ID,F1,F2'},{'ID,F2,F3'},{'ID,F3,F4'}],{STRING list});\\n\\nFM_Vslice(RecSet,fields) := FUNCTIONMACRO\\n t := TABLE(RecSet,{#EXPAND(fields)});\\n RETURN t;\\nENDMACRO;\\n\\nSetFldList := ['ID,F1,F2','ID,F2,F3','ID,F3,F4'];\\nFM_Vslice(DS,SetFldList[1]);\\nFM_Vslice(DS,SetFldList[2]);\\nFM_Vslice(DS,SetFldList[3]);
\\nSo, my next question is -- MUST it be an external XML file? And, if so, why?\\n\\nRichard\", \"post_time\": \"2012-10-26 13:42:45\" },\n\t{ \"post_id\": 2627, \"topic_id\": 569, \"forum_id\": 8, \"post_subject\": \"Re: Use of TABLE Functionality\", \"username\": \"Pradeep\", \"post_text\": \"Richard,\\n\\nThank You for detailed reply. \\n
it's back to the "drawing board" and time to ask once again -- what task are you actually trying to accomplish?
The Task is I'm getting data which contains around 15 to 20 columns. After spraying and reading, i need only few columns like age, location etc. so that i can apply clustering using Machine Learning(ML) library. \\n\\nSo i used TABLE to get columns/fields of interest from dataset(which contains 15 to 20 columns) and it worked fine.\\n\\nNow the requirement is like, i should not hard code fields of interest. They should come from an external file like XML because fields will change, so i tried this Functionmacro and #expand. Passing constant string 'a, b, c' to macro is same like mentioning fields in TABLE(recordset, {field1, field2, filed3}) which we dont want.\\n\\nThis is the big picture!! \\n\\nThank You,\\nPradeep\", \"post_time\": \"2012-10-26 11:19:45\" },\n\t{ \"post_id\": 2621, \"topic_id\": 569, \"forum_id\": 8, \"post_subject\": \"Re: Use of TABLE Functionality\", \"username\": \"rtaylor\", \"post_text\": \"Pradeep,
But this is where i'm gettin stuck "passing in constant string each time it is called"
The operative term here is "constant string." You do that like this:Rec := RECORD\\n UNSIGNED1 ID;\\n\\tSTRING1 F1;\\n\\tSTRING1 F2;\\n\\tSTRING1 F3;\\n\\tSTRING1 F4;\\nEND;\\nDS := DATASET([{1,'A','B','C','D'},\\n {2,'E','F','G','H'},\\n {3,'I','J','K','L'}],Rec);\\nDSflds := DATASET([{'ID,F1,F2'},{'ID,F2,F3'},{'ID,F3,F4'}],{STRING list});\\n\\nFM_Vslice(RecSet,fields) := FUNCTIONMACRO\\n t := TABLE(RecSet,{#EXPAND(fields)});\\n\\tRETURN t;\\nENDMACRO;\\n\\n// FM_Vslice(DS,DSflds[1].list);\\n// FM_Vslice(DS,DSflds[2].list);\\n// FM_Vslice(DS,DSflds[3].list);\\nFM_Vslice(DS,'ID,F1,F2');\\nFM_Vslice(DS,'ID,F2,F3');\\nFM_Vslice(DS,'ID,F3,F4');
The commented out code does not work, creating a "constant expression expected" error, which is solved by the uncommented version that passes the field list as a string constant (not as a variable field value). That's the difference between trying to pass in a variable containing text and passing in a constant string of that text.\\n\\nSo, it's back to the "drawing board" and time to ask once again -- what task are you actually trying to accomplish? Your first-choice solution to the problem (trying to generate a different TABLE each time) simply won't work unless you write your own external ECL code-generation program to write that code outside of the HPCC environment and then run it using ECLplus.exe, so we need to find another way to solve the actual problem you're trying to solve. IOW, what's the "big picture" problem we're solving?\\n\\nRichard\", \"post_time\": \"2012-10-25 15:40:06\" },\n\t{ \"post_id\": 2615, \"topic_id\": 569, \"forum_id\": 8, \"post_subject\": \"Re: Use of TABLE Functionality\", \"username\": \"Pradeep\", \"post_text\": \"Richard,\\n\\n\\nHowever, if the purpose here is to dynamically define what fields are returned each time its called, and that is going to change multiple times within the same workunit.\\n
This is exacltly what i want and you're sayin FUCNTIONMACRO is right option!!\\n\\n\\n\\npassing in a different constant string each time it is called (and being a constant string, the #EXPAND will then operate the way you want it to).
\\nBut this is where i'm gettin stuck "passing in constant string each time it is called"\\n\\nHas anyone done like this, or is it possible using FUCNTIONMACRO?\\nOr am i wasting my time in doing this ?\\n\\nThank You,\\nPradeep\", \"post_time\": \"2012-10-25 07:18:57\" },\n\t{ \"post_id\": 2605, \"topic_id\": 569, \"forum_id\": 8, \"post_subject\": \"Re: Use of TABLE Functionality\", \"username\": \"rtaylor\", \"post_text\": \"Pradeep,
I tried using #EXPAND in macro but it didnt work. One of you people replied it can't be done in TABLE.
What I said was that it can't be done in a FUNCTIONMACRO, because #EXPAND expects a constant string and not a variable value from a file -- your problem is not with the TABLE function but with #EXPAND. http://hpccsystems.com/bb/viewtopic.php?f=8&t=535&hilit=+TABLE&sid=1eb42fd0e5caaef85d1b43038d97907e\\n\\nIf the purpose here is to have exactly one place to change the fields to return when/if you want to change them, then I submit that the TABLE code itself is the right and proper place to do that, and not an external xml file read at runtime. Alternatively, you could define the RECORD structure for this TABLE function as a separate definition in its own file and have that be the one place it is changed/maintained.\\n\\nHowever, if the purpose here is to dynamically define what fields are returned each time its called, and that is going to change multiple times within the same workunit, then I would submit that the call to the FUNCTIONMACRO is the right and proper place to do that, passing in a different constant string each time it is called (and being a constant string, the #EXPAND will then operate the way you want it to).\\n\\nOr is there some overriding reason this information has to come from an xml file? Is this a design issue that someone unfamiliar with HPCC has imposed?\\n\\nRichard\", \"post_time\": \"2012-10-23 17:37:42\" },\n\t{ \"post_id\": 2592, \"topic_id\": 569, \"forum_id\": 8, \"post_subject\": \"Re: Use of TABLE Functionality\", \"username\": \"Pradeep\", \"post_text\": \"Hi Bob,\\n\\nThanks for the reply.\\n\\nI read your Clean_BandsArtists.txt, what i understood from that is you're reading complex xml and extracting artist from that, is that correct?\\n\\nBut what i'm trying is Dynamic struture for TABLE/Project!!\\ni.e TABLE/PROJECT takes 2nd param as "format", where you mention output record struture or you mention like explicit field names {name,Address,email}.\\n\\nNow these 3 fields{name,Address,email} are coming from xml, and table should output data related to 3 fields and if i add "cellphone" in xml {name,Address,email, cellphone} table should output data of 4 fields.\\n\\nMy Files are\\n\\nXML File\\n- <configuration>\\n <add Key="RiskAge,RiskSICCode,RiskValue" /> \\n </configuration>\\n
\\n\\nECL Code\\n\\n ConfigData := DATASET('~file::172.20.104.226::home:: user ::hpcc::lz_data::xml::config.xml',{STRING xmlstring {XPATH('<>')}}, XML('configuration/add'));\\n\\n PolicyData := DATASET('~accurusi_poc::output::policydatacsv',$.common.Layout_policyDataRec,csv);\\n\\n\\toutrec := RECORD\\n\\tString Key := XMLTEXT('@Key');\\n\\tEND;\\n\\n\\tparsedXML := PARSE(ConfigData,xmlstring,outrec,XML('add'));\\n\\n\\tSTRING str:=parsedXML[1].key;\\n\\n\\tRequiredDS := TABLE(PolicyData,{#EXPAND(str)});\\n\\n\\t OUTPUT(RequiredDS);\\n\\n
\\n\\nRight now it is giving "constant expression error"\\n\\nand if use like this\\n\\n\\nRequiredDS := TABLE(PolicyData,{#EXPAND('RiskAge,RiskSICCode,RiskValue')});\\n
\\nthen it's working fine!!\\n\\nJust wanna know is there any other way of doing this?\\n\\nAm i using #EXPAND incorrectly?\\n\\nThank you,\\nPradeep\", \"post_time\": \"2012-10-23 07:17:40\" },\n\t{ \"post_id\": 2591, \"topic_id\": 569, \"forum_id\": 8, \"post_subject\": \"Re: Use of TABLE Functionality\", \"username\": \"bforeman\", \"post_text\": \"I think that a PROJECT is also a good alternative to a TABLE. I used it to read a very complex XML file and just slice the data that I needed from it.\\n\\nLook at the following code example for more details:\\n\\nhttp://hpccsystems.com/community/contributions/data-descriptors-and-simple-example-programs/musicmoz-artists-albums-and-tra\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-10-22 20:12:58\" },\n\t{ \"post_id\": 2586, \"topic_id\": 569, \"forum_id\": 8, \"post_subject\": \"Use of TABLE Functionality\", \"username\": \"Pradeep\", \"post_text\": \"Hi,\\n\\nAs my understanding we use TABLE to get particular fields(columns) of Recordset or dataset. i.e vertical slice. \\n\\nBut i want something like, an xml file, which contains fields(columns) of interest and use this file in TABLE, so that when fields need to be changed, i dont've to change the code just the xml file.\\n\\nI tried using #EXPAND in macro but it didnt work. One of you people replied it can't be done in TABLE.\\n\\nIs there any other way with or without TABLE?\\n\\n\\nThank You,\\nPradeep\", \"post_time\": \"2012-10-22 06:45:04\" },\n\t{ \"post_id\": 2595, \"topic_id\": 570, \"forum_id\": 8, \"post_subject\": \"Re: Runtime count value to TOPN\", \"username\": \"Ghost\", \"post_text\": \"May be this is what you were looking for :\\nchildPersonRecord := {STRING fname,UNSIGNED1 age};\\n\\npersonRecord := RECORD\\n\\tSTRING20 fname;\\n\\tSTRING20 lname;\\n\\tUNSIGNED cnt;\\n\\tDATASET(childPersonRecord) children;\\nEND;\\n\\n\\n\\npersonDataset := DATASET([{'Kevin','Hall', 0,[{'Abby',2},{'Nat',2}]},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t{'Kevin','Simms', 0,[{'Jen',18},{'Ali',16},{'Andy',13}]},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t{'Kevin','Simms', 0,[{'Jen',18},{'Ali',16},{'Andy',13}]},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t{'Suman','Hall', 0,[{'Abby',2},{'Nat',2}]},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t{'Suman','Simms', 0,[{'Jen',18},{'Ali',16},{'Jen',18},{'Ali',16},{'Andy',13}]}],\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tpersonRecord);\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\ngroupedPerson := sort(personDataset, fname);\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\nrollupPerson := rollup(groupedPerson, left.fname = right.fname,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\ttransform(personRecord,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tself.cnt := count(left.children) + count(right.children),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tself.children := left.children + right.children;\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tself := left));\\n\\nprojectPerson := project(rollupPerson, transform(personRecord,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tself.children := topn(left.children, (25 * left.cnt)/100, fname),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tself := left));\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\noutput(personDataset, named('personDataset'));\\noutput(rollupPerson, named('rollupPerson'));\\noutput(projectPerson, named('projectPerson'));
\", \"post_time\": \"2012-10-23 10:38:32\" },\n\t{ \"post_id\": 2587, \"topic_id\": 570, \"forum_id\": 8, \"post_subject\": \"Runtime count value to TOPN\", \"username\": \"sbagaria\", \"post_text\": \"I want to give a runtime dynamic count argument to TOPN inside a transform.\\n\\nBasically I want to select the top 25% of the rows of the table. So the count argument depends on how many rows are present in the dataset. However, when I use this transform inside a grouped rollup or a project for a nested dataset, I only get back the top 2 rows regardless of the size of the child dataset.\\n\\nThe compiler does not throw any warnings.\\n\\nIs there a way to get around it?\", \"post_time\": \"2012-10-22 14:07:56\" },\n\t{ \"post_id\": 2618, \"topic_id\": 574, \"forum_id\": 8, \"post_subject\": \"Re: Questions for maintransform & mergetransform in AGGREGAT\", \"username\": \"DSC\", \"post_text\": \"The next part of the documentation for AGGREGATE talks about the mergetransform. Basically, it can be deduced if all of the fields in maintransform are created relatively simply, with just a few operators (MAX, MIN, SUM, +, &, |, ^ and * according to the manual). If you do something different or more complex, like choosing left or right values based on a timestamp, then you will have to supply your own mergetransform to ensure that records coming together from different nodes actually merge correctly.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-10-25 11:39:25\" },\n\t{ \"post_id\": 2614, \"topic_id\": 574, \"forum_id\": 8, \"post_subject\": \"Questions for maintransform & mergetransform in AGGREGATE\", \"username\": \"dsun\", \"post_text\": \"Hi,\\n\\nI could not make the mergetransform work in an AGGREGATE, it always use the maintransform, I found the definition in the ECLLanguageReferrence:\\n\\n(d) If multiple records match on multiple nodes, then step (c) performs on each node, and then the summary records are merged. This requires a mergetransform that takes two records of type RIGHT. Whenever possible the code generator tries to deduce the mergetransform from the maintransform. If it can't, then the user will need to specify one.\\n
\\n\\nSo in which case, it can't deduce the mergetransform from maintransform and we need to specify?\\n\\nThanks,\\nDongliang\", \"post_time\": \"2012-10-25 05:58:23\" },\n\t{ \"post_id\": 2794, \"topic_id\": 577, \"forum_id\": 8, \"post_subject\": \"Re: APPLY() returns an error on THOR, but not on HTHOR\", \"username\": \"g2pis\", \"post_text\": \"--\", \"post_time\": \"2012-11-16 07:11:17\" },\n\t{ \"post_id\": 2711, \"topic_id\": 577, \"forum_id\": 8, \"post_subject\": \"Re: APPLY() returns an error on THOR, but not on HTHOR\", \"username\": \"oleg\", \"post_text\": \"[quote="rtaylor":23p1obkn]\\nSo, just being curious, why do these need to be separate files on disk?\\n\\nRichard\\n\\nThey need to be desprayed and shipped to separate destinations (as a regular files).\", \"post_time\": \"2012-11-07 10:35:29\" },\n\t{ \"post_id\": 2662, \"topic_id\": 577, \"forum_id\": 8, \"post_subject\": \"Re: APPLY() returns an error on THOR, but not on HTHOR\", \"username\": \"rtaylor\", \"post_text\": \"Oleg,Interesting enough, I was able to make it work on hthor (it does output to the file, even in WU it did look as a mess). I'm out of office now and going out of town for a week, so I don't have a working sample handy, let me know it you want to see it.
If you run my last example code in hThor you'll see that it runs but only shows a single result (not the three that I expected) in the IDE and in ECL Watch, but I just looked at my Logical Files list and all three files are indeed there (1-part files, of course, since they were produced on hThor).\\n\\nSo, just being curious, why do these need to be separate files on disk?\\n\\nRichard\", \"post_time\": \"2012-10-30 21:33:59\" },\n\t{ \"post_id\": 2659, \"topic_id\": 577, \"forum_id\": 8, \"post_subject\": \"Re: APPLY() returns an error on THOR, but not on HTHOR\", \"username\": \"oleg\", \"post_text\": \"Thanks for reply, Richard, I've tried quite a few way myself, including attempt tio hide OUTPUT inside the function as a side effect - can't trick hpcc .\\n\\nInteresting enough, I was able to make it work on hthor (it does output to the file, even in WU it did look as a mess). I'm out of office now and going out of town for a week, so I don't have a working sample handy, let me know it you want to see it.\\n\\nI'll definitely will follow you advice to add it to JIRA when I'll be back in office.\\nI definitely can output to a single file and split it in awk
, so it's not an emergency, but it would be nice to be able to do it inside the platform.\\n\\nThank a lot for your help!\\n\\nOleg.\", \"post_time\": \"2012-10-30 19:03:45\" },\n\t{ \"post_id\": 2658, \"topic_id\": 577, \"forum_id\": 8, \"post_subject\": \"Re: APPLY() returns an error on THOR, but not on HTHOR\", \"username\": \"rtaylor\", \"post_text\": \"Oleg,\\n\\nSorry, but I have no better answer. I've tried a number of ways to accomplish it, like this APPLY attempt:
FlatRec := RECORD\\n STRING1 Value1;\\n STRING1 Value2;\\n STRING1 CVal2_1;\\n STRING1 CVal2_2;\\nEND;\\n\\nFlatFile := DATASET([{'C','A','X','W'},\\n {'B','B','S','Y'},\\n {'A','C','Z','T'}],FlatRec);\\n\\nOutRec := RECORD\\n FlatFile.Value1;\\n FlatFile.Value2;\\nEND;\\nP_Recs := TABLE(FlatFile, OutRec);\\n\\n\\nOutRec NormThem(FlatRec L, INTEGER C) := TRANSFORM\\n SELF.Value2 := CHOOSE(C,L.CVal2_1, L.CVal2_2);\\n SELF := L;\\nEND;\\nChildRecs := NORMALIZE(FlatFile,2,NormThem(LEFT,COUNTER));\\n\\nSetVal1 := SET(P_Recs,Value1);\\nApplyDS := DATASET(SetVal1,{STRING1 Value1});\\n\\nAPPLY(ApplyDS,OUTPUT(ChildRecs(Value1 = ApplyDS.Value1),,'~RTTEST::Child_'+ApplyDS.Value1));\\n
BTW, this APPLY "runs" in hThor but does not produce the expected result, so it won't work that way either. \\n\\nI tried using the Template language to generate the appropriate OUTOUT actions, but that had a different set of issues.\\n\\nI suggest you add this as an issue in JIRA and let developers have a go at it.\\n\\nRichard\", \"post_time\": \"2012-10-30 18:50:48\" },\n\t{ \"post_id\": 2645, \"topic_id\": 577, \"forum_id\": 8, \"post_subject\": \"Re: APPLY() returns an error on THOR, but not on HTHOR\", \"username\": \"oleg\", \"post_text\": \"Richard, honestly, if it would be that simple, I wouldn't bother people on the forum!\\n\\nI need this to work for unlimited number of unspecified (at the moment of writing the code) 'ds_number' variables from my previous sample.\\n\\nSo at the moment of the writing a code I don't know neither number of files to output nor the possible values.\", \"post_time\": \"2012-10-29 10:33:06\" },\n\t{ \"post_id\": 2640, \"topic_id\": 577, \"forum_id\": 8, \"post_subject\": \"Re: APPLY() returns an error on THOR, but not on HTHOR\", \"username\": \"rtaylor\", \"post_text\": \"Oleg,\\n\\nYou can simply extend my previous example to write the separate child files by filtering like this:OUTPUT(ChildRecs(Value1='A'),,'ChildData1');\\nOUTPUT(ChildRecs(Value1='B'),,'ChildData2');\\nOUTPUT(ChildRecs(Value1='C'),,'ChildData3');
HTH,\\n\\nRichard\", \"post_time\": \"2012-10-27 19:12:11\" },\n\t{ \"post_id\": 2638, \"topic_id\": 577, \"forum_id\": 8, \"post_subject\": \"Re: APPLY() returns an error on THOR, but not on HTHOR\", \"username\": \"oleg\", \"post_text\": \"Sorry, I didn't put accent right: I want just output this, but output each of nested sets to it's own file.\\nPlease look at the following snippet:\\n\\nR1 := {UNSIGNED n};\\n\\nR2 := {UNSIGNED ds_number; DATASET(R1) dsRecs };\\n\\nd := DATASET([\\n\\t\\t{1, DATASET([1, 1, 1], R1)},\\n\\t\\t{2, DATASET([2, 2, 2], R1)}\\n\\t ], R2);\\n
\\n\\nFor the the data above need to create two files:\\n\\nFile '1.csv' : \\n1\\n1\\n1\\n\\nFile '2.csv' : \\n2\\n2\\n2\", \"post_time\": \"2012-10-26 22:46:31\" },\n\t{ \"post_id\": 2636, \"topic_id\": 577, \"forum_id\": 8, \"post_subject\": \"Re: APPLY() returns an error on THOR, but not on HTHOR\", \"username\": \"rtaylor\", \"post_text\": \"oleg,DATASET I have is the actually dataset of the dataset, and I want to output each nested set to it's own file.
That sounds like a nested child dataset that you want to extract, which is exactly what the NORMALIZE function is designed to do, as in this example://\\n// Example code - use without restriction. \\n//\\nFlatRec := RECORD\\n\\tSTRING1 Value1;\\n\\tSTRING1 Value2;\\n\\tSTRING1 CVal2_1;\\n\\tSTRING1 CVal2_2;\\nEND;\\n\\nFlatFile := DATASET([{'C','A','X','W'},\\n {'B','B','S','Y'},\\n {'A','C','Z','T'}],FlatRec);\\n\\nOutRec := RECORD\\n\\tFlatFile.Value1;\\n\\tFlatFile.Value2;\\nEND;\\nP_Recs := TABLE(FlatFile, OutRec);\\n\\nOUTPUT(P_Recs,NAMED('ParentData'));\\n/*\\nP_Recs result set is:\\n\\tRec#\\tValue1\\tValue2\\n\\t1\\t\\tC\\t\\tA\\n\\t2\\t\\tB\\t\\tB\\t\\n\\t3\\t\\tA\\t\\tC\\n*/\\n\\nOutRec NormThem(FlatRec L, INTEGER C) := TRANSFORM\\n\\tSELF.Value2 := CHOOSE(C,L.CVal2_1, L.CVal2_2);\\n\\tSELF := L;\\nEND;\\nChildRecs := NORMALIZE(FlatFile,2,NormThem(LEFT,COUNTER));\\n\\nOUTPUT(ChildRecs,NAMED('ChildData'));\\n/*\\nChildRecs result set is:\\n\\tRec#\\tValue1\\tValue2\\n\\t1\\t\\tC\\t\\tX\\n\\t2\\t\\tC\\t\\tW\\n\\t3\\t\\tB\\t\\tS\\n\\t4\\t\\tB\\t\\tY\\t\\n\\t5\\t\\tA\\t\\tZ\\n\\t6\\t\\tA\\t\\tT\\n*/
HTH,\\n\\nRichard\", \"post_time\": \"2012-10-26 17:59:48\" },\n\t{ \"post_id\": 2635, \"topic_id\": 577, \"forum_id\": 8, \"post_subject\": \"Re: APPLY() returns an error on THOR, but not on HTHOR\", \"username\": \"oleg\", \"post_text\": \"I understood that problem caused by OUTPUT(), basically the Language Reference does say that in the first place. \\n\\nStill I need some help to resolve this:\\nDATASET I have is the actually dataset of the dataset, and I want to output each nested set to it's own file.\\n\\nSo APPLY() seemed to me the prefect match, except unfortunately I can not use it (despite somehow it works on hthor !).\\n\\nI can think of writing a function with a side effect, and call it inside a TRANSFORM - than just use PROJECT. From the first look it should work. \\n\\nAre there any other alternatives?\", \"post_time\": \"2012-10-26 15:14:21\" },\n\t{ \"post_id\": 2629, \"topic_id\": 577, \"forum_id\": 8, \"post_subject\": \"APPLY() returns an error on THOR, but not on HTHOR\", \"username\": \"oleg\", \"post_text\": \"Here is a simplified example:\\n\\n===================================\\nr := {UNSIGNED n};\\nd := DATASET([1, 2, 3], R);\\nd;\\nAPPLY(d, output(n));\\n------------------------------------\\n\\nCode above works fine on hthor, but returns an error on thor. I did not find any usage restriction in docs. \\n\\nPlatform: OSS 3.8.2\\nError text:\\n\\neclagent\\t9999: System error: 9999: Graph[1], SLAVE 10.222.64.1:7600: Graph[1], apply[4]: Internal Error at /var/jenkins/workspace/LN-Candidate-3.8.2/LN/centos_5_x86_64/HPCC-Platform/thorlcr/graph/thgraphslave.cpp(908)\", \"post_time\": \"2012-10-26 12:07:34\" },\n\t{ \"post_id\": 2656, \"topic_id\": 578, \"forum_id\": 8, \"post_subject\": \"Re: Deleting work unit with libary function / command line t\", \"username\": \"bforeman\", \"post_text\": \"From our development team:\\n\\nNot via ECL / lib function,but via sasha cmdline you can archive (which removes from Dali), via:\\n\\nsasha server=<sasha-server-ip> action=ARCHIVE <wu-specifier>\\n\\nYou _could_ also delete workunits using daliadmin to directly delete meta data from Dali... with something like : \\ndaliadmin <dali-ip> delete /WorkUnits/<wuid>\\nbut you should not unless you really know what you're doing..\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-10-30 14:18:47\" },\n\t{ \"post_id\": 2652, \"topic_id\": 578, \"forum_id\": 8, \"post_subject\": \"Re: Deleting work unit with libary function / command line t\", \"username\": \"bforeman\", \"post_text\": \"I don't see anything in our service libraries. Perhaps the CmdProcess utility can be used to do this, checking now.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-10-30 12:48:16\" },\n\t{ \"post_id\": 2637, \"topic_id\": 578, \"forum_id\": 8, \"post_subject\": \"Deleting work unit with libary function / command line tool\", \"username\": \"tmurphy\", \"post_text\": \"Is there some way to delete a workunit programmatically? I'm looking for a command line tool or library function to do this. Does such a thing exist?\", \"post_time\": \"2012-10-26 22:18:18\" },\n\t{ \"post_id\": 2663, \"topic_id\": 580, \"forum_id\": 8, \"post_subject\": \"Re: APPLY actionlist - is it sequential?\", \"username\": \"rtaylor\", \"post_text\": \"Oleg,Manual says:\\n"The actions execute in the order they appear in the actionlist." Sounds like sequential execution of actions for each record.\\n\\nJust want to make sure my understanding is correct (i.e. each new action will start only after previous will finish).
Try an experiment, and if it doesn't work that way let me know. \\n\\nRichard\", \"post_time\": \"2012-10-30 21:35:55\" },\n\t{ \"post_id\": 2650, \"topic_id\": 580, \"forum_id\": 8, \"post_subject\": \"Re: APPLY actionlist - is it sequential?\", \"username\": \"bforeman\", \"post_text\": \"Hi Oleg,\\n\\nYes, I am sure that it is sequential as documented. That's how we've always presented APPLY in class.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-10-30 12:32:18\" },\n\t{ \"post_id\": 2647, \"topic_id\": 580, \"forum_id\": 8, \"post_subject\": \"APPLY actionlist - is it sequential?\", \"username\": \"oleg\", \"post_text\": \"Manual says:\\n"The actions execute in the order they appear in the actionlist." Sounds like sequential execution of actions for each record.\\n\\nJust want to make sure my understanding is correct (i.e. each new action will start only after previous will finish).\", \"post_time\": \"2012-10-29 14:20:14\" },\n\t{ \"post_id\": 4030, \"topic_id\": 581, \"forum_id\": 8, \"post_subject\": \"Re: CmdProcess and PIPE\", \"username\": \"sbagaria\", \"post_text\": \"The original message in this thread suggests that CmdProcess was hanging. This will now be resolved in 3.10.8. Please refer viewtopic.php?f=41&t=899.\", \"post_time\": \"2013-04-30 11:58:58\" },\n\t{ \"post_id\": 2742, \"topic_id\": 581, \"forum_id\": 8, \"post_subject\": \"Re: CmdProcess and PIPE\", \"username\": \"rtaylor\", \"post_text\": \"Oleg,
Please note that the book example said nothing about the execution details
But it will as soon as I update the doc. \\n\\nRichard\", \"post_time\": \"2012-11-08 14:54:43\" },\n\t{ \"post_id\": 2739, \"topic_id\": 581, \"forum_id\": 8, \"post_subject\": \"Re: CmdProcess and PIPE\", \"username\": \"oleg\", \"post_text\": \"Richard,\\nPlease note that the book example said nothing about the execution details, so it leaves readers with a wrong impression that the result DATASET will always consist of just a single element\", \"post_time\": \"2012-11-08 10:19:24\" },\n\t{ \"post_id\": 2736, \"topic_id\": 581, \"forum_id\": 8, \"post_subject\": \"Re: CmdProcess and PIPE\", \"username\": \"DSC\", \"post_text\": \"Wow. I've read those #option options several times and didn't remember that one.\\n\\nOK, so it's obscure but documented. Got it.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-11-07 20:27:55\" },\n\t{ \"post_id\": 2735, \"topic_id\": 581, \"forum_id\": 8, \"post_subject\": \"Re: CmdProcess and PIPE\", \"username\": \"rtaylor\", \"post_text\": \"And this use of #OPTION also gets you through to Thor every time:
//Form 2 with XML input:\\n#OPTION('pickBestEngine',false);\\nIMPORT STD;\\nnamesRecord := RECORD\\nSTRING10 Firstname{xpath('/Name/FName')};\\nSTRING10 Lastname{xpath('/Name/LName')};\\nEND;\\np := PIPE('echo <Name><FName>George' + STD.system.Thorlib.Node() + '</FName><LName>Jetson</LName></Name>', namesRecord, XML);\\nOUTPUT(p);
HTH,\\n\\nRichard\", \"post_time\": \"2012-11-07 20:24:13\" },\n\t{ \"post_id\": 2734, \"topic_id\": 581, \"forum_id\": 8, \"post_subject\": \"Re: CmdProcess and PIPE\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nGood question. I've been having a think on that since I started reading this thread and I haven't yet figured out why the two would be different. Note that, without SEQUENTIAL the job seems to execute on node 1 (zero returned from STD.system.ThorLib.Node()), but run the same job on hThor and it also gets zero from that function (seemingly node 1). Therefore, my speculation is that, without SEQUENTIAL the job is simple enough that ECL Agent is "hijacking" it and not allowing it to run on Thor, but with the SEQUENTIAL thrown in there we've managed to "complicate" it enough that it lets it go through to Thor.\\n\\nOK, so that's the hypothesis I just tested with this code://Form 2 with XML input:\\nIMPORT STD;\\nnamesRecord := RECORD\\nSTRING10 Firstname{xpath('/Name/FName')};\\nSTRING10 Lastname{xpath('/Name/LName')};\\nEND;\\np := PIPE('echo <Name><FName>George' + STD.system.Thorlib.Node() + '</FName><LName>Jetson</LName></Name>', namesRecord, XML);\\nOUTPUT(p);\\n// SEQUENTIAL(OUTPUT(p));\\n\\nSomeFile := DATASET([{'A'},{'B'},{'C'},{'D'},{'E'},\\n {'F'},{'G'},{'H'},{'I'},{'J'},\\n {'K'},{'L'},{'M'},{'N'},{'O'},\\n {'P'},{'Q'},{'R'},{'S'},{'T'},\\n {'U'},{'V'},{'W'},{'X'},{'Y'}],\\n\\t\\t\\t\\t\\t{STRING1 Letter});\\n\\t\\t\\t\\t\\t\\nOUTPUT(DISTRIBUTE(SomeFile));\\t\\t
You will note that the first result is now 3 records instead of 1, so it is confirmed -- NOT A BUG, but just a standard, garden-variety hThor "hijacking" instance. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-11-07 20:18:31\" },\n\t{ \"post_id\": 2733, \"topic_id\": 581, \"forum_id\": 8, \"post_subject\": \"Re: CmdProcess and PIPE\", \"username\": \"DSC\", \"post_text\": \"Richard, do you believe that the different output with and without SEQUENTIAL highlights a bug? Or is that expected behavior?\", \"post_time\": \"2012-11-07 20:07:59\" },\n\t{ \"post_id\": 2732, \"topic_id\": 581, \"forum_id\": 8, \"post_subject\": \"Re: CmdProcess and PIPE\", \"username\": \"rtaylor\", \"post_text\": \"And it becomes more interesting when you add the Node() function, like this:
IMPORT STD;\\nnamesRecord := RECORD\\nSTRING10 Firstname{xpath('/Name/FName')};\\nSTRING10 Lastname{xpath('/Name/LName')};\\nEND;\\np := PIPE('echo <Name><FName>George' + STD.system.Thorlib.Node() + '</FName><LName>Jetson</LName></Name>', namesRecord, XML);\\nSEQUENTIAL(OUTPUT(p));
\", \"post_time\": \"2012-11-07 20:06:32\" },\n\t{ \"post_id\": 2731, \"topic_id\": 581, \"forum_id\": 8, \"post_subject\": \"Re: CmdProcess and PIPE\", \"username\": \"rtaylor\", \"post_text\": \"And it becomes more interesting when you add the Node() function, like this:IMPORT STD;\\nnamesRecord := RECORD\\nSTRING10 Firstname{xpath('/Name/FName')};\\nSTRING10 Lastname{xpath('/Name/LName')};\\nEND;\\np := PIPE('echo <Name><FName>George' + STD.system.Thorlib.Node() + '</FName><LName>Jetson</LName></Name>', namesRecord, XML);\\nSEQUENTIAL(OUTPUT(p));
\", \"post_time\": \"2012-11-07 20:06:15\" },\n\t{ \"post_id\": 2726, \"topic_id\": 581, \"forum_id\": 8, \"post_subject\": \"Re: CmdProcess and PIPE\", \"username\": \"DSC\", \"post_text\": \"I think you're right, because the following two pieces of code produce different results on Thor:\\n\\nnamesRecord := RECORD\\nSTRING10 Firstname{xpath('/Name/FName')};\\nSTRING10 Lastname{xpath('/Name/LName')};\\nEND;\\np := PIPE('echo <Name><FName>George</FName><LName>Jetson</LName></Name>', namesRecord, XML);\\nOUTPUT(p);
\\n\\nnamesRecord := RECORD\\nSTRING10 Firstname{xpath('/Name/FName')};\\nSTRING10 Lastname{xpath('/Name/LName')};\\nEND;\\np := PIPE('echo <Name><FName>George</FName><LName>Jetson</LName></Name>', namesRecord, XML);\\nSEQUENTIAL(OUTPUT(p));
\\n\\nThe second one, with the SEQUENTIAL, produces a recordset with the number of records equal to the number of nodes. If it's not a bug then something should be clarified, I think.\\n\\nDan\", \"post_time\": \"2012-11-07 15:45:30\" },\n\t{ \"post_id\": 2725, \"topic_id\": 581, \"forum_id\": 8, \"post_subject\": \"Re: CmdProcess and PIPE\", \"username\": \"oleg\", \"post_text\": \"so why OUTPUT in the first sample prints just one line? I think it's a bug.\", \"post_time\": \"2012-11-07 15:37:26\" },\n\t{ \"post_id\": 2724, \"topic_id\": 581, \"forum_id\": 8, \"post_subject\": \"Re: CmdProcess and PIPE\", \"username\": \"DSC\", \"post_text\": \"This actually makes sense.\\n\\nYour initial PIPE command is, in reality, creating the dataset with the echo command. It is executing on every node, so p winds up holding a single record actually located on each of your 50 nodes. The first result you see is actually the result of your first OUTPUT(), where those 50 records are created. Your second OUTPUT, with the PIPE, write the single record contained on that node to /tmp/names.all located on that node.\\n\\nEdit: My earlier comments about using PIPE were explicitly about the PIPE option to OUTPUT. The PIPE built-in function is an input pipe, for getting data into the system. The PIPE() function runs on all nodes all the time, as far as I can tell.\\n\\nDan\", \"post_time\": \"2012-11-07 15:27:51\" },\n\t{ \"post_id\": 2720, \"topic_id\": 581, \"forum_id\": 8, \"post_subject\": \"Re: CmdProcess and PIPE\", \"username\": \"oleg\", \"post_text\": \"Thanks, now it's getting even more interesting:\\n\\nFirst, I've tried input pipe example from the book (BTW, It's really only example I can try because other using some non-standard commands for the pipe process):\\n\\n\\n//Form 2 with XML input:\\nnamesRecord := RECORD\\nSTRING10 Firstname{xpath('/Name/FName')};\\nSTRING10 Lastname{xpath('/Name/LName')};\\nEND;\\np := PIPE('echo <Name><FName>George</FName><LName>Jetson</LName></Name>', namesRecord, XML);\\nOUTPUT(p);\\n
\\nIt did output only one record. We have 50 logical nodes on 5 physical machines, so I assume this way it randomly decided which one of the nodes to use to execute this command. \\nNot clear, however, how it will work if the command will generate number of records - how they will move across the nodes?\\n\\nBut more interesting things happen after I decided to add output pipe:\\n\\n\\nnamesRecord := RECORD\\nSTRING10 Firstname{xpath('/Name/FName')};\\nSTRING10 Lastname{xpath('/Name/LName')};\\nEND;\\np := PIPE('echo <Name><FName>George</FName><LName>Jetson</LName></Name>', namesRecord, XML);\\nOUTPUT(p);\\nOUTPUT(p,,PIPE('tee /tmp/names.all')); \\n
\\n\\nNow simple output printed me 50 exactly the same rows! \\nSo this means that OUTPUT which clearly should happen after input (I'm using the result dataset in it) is affecting the way input PIPE being executed!\\n\\nRegarding the output PIPE, I've got 5 files 1 line each, i.e. 5 records total - files got overwritten since I have 10 logical nodes per one physical.\", \"post_time\": \"2012-11-07 14:18:58\" },\n\t{ \"post_id\": 2714, \"topic_id\": 581, \"forum_id\": 8, \"post_subject\": \"Re: CmdProcess and PIPE\", \"username\": \"DSC\", \"post_text\": \"PIPE works, at least in 3.8.4 and 3.8.6. I'm using it.\\n\\n(Clarification: I'm using the PIPE version of OUTPUT; what follows talks about that, rather than the PIPE built-in function.)\\n\\nUnless you go to great lengths, the command would be executed on all active nodes. Note that that doesn't mean all nodes, though. For instance, if you manipulated the data in such a way that all the records you're processing wind up on one node, then only that node's external command would be executed. In most cases, this is exactly what you want.\\n\\nWhere this becomes a problem is when you want to execute commands based on the overall action instead of individual records (e.g. "disable one external service before processing a group of records, then re-enable it afterwards"). You would have every node disabling and re-enabling the service in that case, so care must be taken.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-11-07 12:44:00\" },\n\t{ \"post_id\": 2713, \"topic_id\": 581, \"forum_id\": 8, \"post_subject\": \"Re: CmdProcess and PIPE\", \"username\": \"oleg\", \"post_text\": \"[quote="bforeman":2u1u48sq]\\n2. Yes CMDProcess was reported not working in Version 3.8 but is fixed in the next update.\\n\\nIs there same problem with the PIPE (action and/or OUTPUT option)?\\n\\n[quote="bforeman":2u1u48sq]\\n3: Anyway, where the target command suppose to be executed - on each node in parallel or in one place? If this is a single place where is it? And same for PIPE.
\\n\\n3. I think it depends on the command that you are executing. If it affects the cluster than it will be across all nodes as expected.\\n\\n\\nHmm. It's hard to believe that ECL will know anything about UNIX command line. I.e. my understanding it should just blindly execute a process and pass arguments to it.\", \"post_time\": \"2012-11-07 10:58:50\" },\n\t{ \"post_id\": 2668, \"topic_id\": 581, \"forum_id\": 8, \"post_subject\": \"Re: CmdProcess and PIPE\", \"username\": \"bforeman\", \"post_text\": \"Hi Oleg,\\n\\n1: it is described in the Library reference as STD.System.Util.CmdProcess(), but what is in the system is lib_fileservices.FileServices.CmdProcess() - I assume it must be the same, correct?\\n
\\n\\n1. Correct. They are indeed the same, legacy version versus open source version. Depends on what cluster version you are connected to.\\n\\n2: I tried an example from the reference :\\nOUTPUT(lib_fileservices.FileServices.CmdProcess('echo','George Jetson'));\\nand it just stays in the execution state, not generating any output.\\n
\\n\\n2. Yes CMDProcess was reported not working in Version 3.8 but is fixed in the next update.\\n\\n3: Anyway, where the target command suppose to be executed - on each node in parallel or in one place? If this is a single place where is it? And same for PIPE.
\\n\\n3. I think it depends on the command that you are executing. If it affects the cluster than it will be across all nodes as expected.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-10-31 13:42:25\" },\n\t{ \"post_id\": 2655, \"topic_id\": 581, \"forum_id\": 8, \"post_subject\": \"CmdProcess and PIPE\", \"username\": \"oleg\", \"post_text\": \"While CmdProcess being discussed in another topic, I have a few questions as well.\\n\\n1: it is described in the Library reference as STD.System.Util.CmdProcess(), but what is in the system is lib_fileservices.FileServices.CmdProcess() - I assume it must be the same, correct?\\n\\n2: I tried an example from the reference :\\nOUTPUT(lib_fileservices.FileServices.CmdProcess('echo','George Jetson'));\\nand it just stays in the execution state, not generating any output.\\n\\n3: Anyway, where the target command suppose to be executed - on each node in parallel or in one place? If this is a single place where is it? And same for PIPE.\", \"post_time\": \"2012-10-30 13:42:16\" },\n\t{ \"post_id\": 2743, \"topic_id\": 584, \"forum_id\": 8, \"post_subject\": \"Re: How Distribution of data is effected with Join's ?\", \"username\": \"oleg\", \"post_text\": \"Wow! This is really helpful. Thank you for digging it to the bottom, Richard!\", \"post_time\": \"2012-11-08 15:36:11\" },\n\t{ \"post_id\": 2741, \"topic_id\": 584, \"forum_id\": 8, \"post_subject\": \"Re: How Distribution of data is effected with Join's ?\", \"username\": \"rtaylor\", \"post_text\": \"Oleg,\\n\\nThanks for bringing this up -- I have learned something new about the way JOIN operates (and now need to update the JOIN docs). So here's the result of my new understanding:\\n\\n
\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-11-08 14:48:11\" },\n\t{ \"post_id\": 2740, \"topic_id\": 584, \"forum_id\": 8, \"post_subject\": \"Re: How Distribution of data is effected with Join's ?\", \"username\": \"oleg\", \"post_text\": \"As I understood from your earlier explanation, LOOKUP is implicitly a LOCAL operation only for the LEFT dataset, but global for the RIGHT one (since it's being copied entirely to all the nodes) - correct?\\nTherefore, LOCAL modifier either should be ignored (so LOOKUP behaviour will not change), or followed (so ECL will not copy RIGHT dataset to each node and use just a local portion of it instead).\\n\\nI.e. in "LOOKUP, LOCAL" combination at least one of keywords behaves differently then expected. The reason I'm emphasizing this is because I saw quite a few pieces of code where this combination being used.\", \"post_time\": \"2012-11-08 11:10:48\" },\n\t{ \"post_id\": 2728, \"topic_id\": 584, \"forum_id\": 8, \"post_subject\": \"Re: How Distribution of data is effected with Join's ?\", \"username\": \"rtaylor\", \"post_text\": \"Oleg,Considering above, how "LOOKUP, LOCAL" will work? Would it make any sense at all?
Not really, because LOOKUP is implicitly a LOCAL operation.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-11-07 19:41:18\" },\n\t{ \"post_id\": 2712, \"topic_id\": 584, \"forum_id\": 8, \"post_subject\": \"Re: How Distribution of data is effected with Join's ?\", \"username\": \"oleg\", \"post_text\": \"Considering above, how "LOOKUP, LOCAL" will work? Would it make any sense at all?\", \"post_time\": \"2012-11-07 10:42:40\" },\n\t{ \"post_id\": 2675, \"topic_id\": 584, \"forum_id\": 8, \"post_subject\": \"Re: How Distribution of data is effected with Join's ?\", \"username\": \"Ghost\", \"post_text\": \"Thanks Richard\", \"post_time\": \"2012-11-01 07:01:53\" },\n\t{ \"post_id\": 2672, \"topic_id\": 584, \"forum_id\": 8, \"post_subject\": \"Re: How Distribution of data is effected with Join's ?\", \"username\": \"rtaylor\", \"post_text\": \"That means if i am not using local (even when all is ON), the JOIN's will be global in all 3 cases.
The LOCAL, LOOKUP, and ALL options are all done locally, not globally. \\n
\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-10-31 16:44:19\" },\n\t{ \"post_id\": 2669, \"topic_id\": 584, \"forum_id\": 8, \"post_subject\": \"Re: How Distribution of data is effected with Join's ?\", \"username\": \"Ghost\", \"post_text\": \"Thanks for replying \\n\\nThat means if i am not using local (even when all is ON), the JOIN's will be global in all 3 cases.\", \"post_time\": \"2012-10-31 14:48:33\" },\n\t{ \"post_id\": 2667, \"topic_id\": 584, \"forum_id\": 8, \"post_subject\": \"Re: How Distribution of data is effected with Join's ?\", \"username\": \"bforeman\", \"post_text\": \"LEFT OUTER – At least one record for every record in the leftset.\\nRIGHT OUTER – At least one record for every record in the rightset.\\nFULL OUTER – At least one record for every record in both the leftset and rightset.\\n\\nThese joins are based on the condition. If the condition is true, the above will happen accordingly based on the join type.\\n\\nAs to distribution, the new recordset resulting from the join will be distributed evenly across all nodes regardless of the type of JOIN.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-10-31 13:27:15\" },\n\t{ \"post_id\": 2666, \"topic_id\": 584, \"forum_id\": 8, \"post_subject\": \"How Distribution of data is effected with Join's ?\", \"username\": \"Ghost\", \"post_text\": \"Suppose we have 2 datasets already distributed on some field (may or may not be same)\\n\\nNeed to clarify below concepts:\\n1. Join(ds1, ds2, ..... , left outer/only) // ,all)\\nThe output distribution of data will be as per ds1 ?\\n\\n2. Join(ds1, ds2, ..... , right outer/only)\\nThe output distribution of data will be as per ds2 ?\\n\\n3. Join(ds1, ds2, ..... , full outer/only) // ,all)\\nHow the distribution be effected in this case ?\", \"post_time\": \"2012-10-31 10:09:19\" },\n\t{ \"post_id\": 2750, \"topic_id\": 585, \"forum_id\": 8, \"post_subject\": \"Re: XPATH Oddities\", \"username\": \"aguynamedryan\", \"post_text\": \"Thanks Gavin (and Bob)! I appreciate you putting this on your radar.\\n\\nI think I found another issue: If XMLPROJECT includes XMLTEXT in its xmltag argument, XMLPROJECT fails to interpret the xmltag argument correctly.\\n\\nI've expanded my example from before to illustrate the problem I've encountered. Please see my code below for details, specifically definitions U and V.\\n\\nThanks!\\n\\n
\\n/*\\nI've encountered some odd behavior when trying to parse some XML.\\n\\nThe XML I was parsing was really some very convoluted HTML, but I was\\ngetting very inconsistent behavior, so I grabbed the example code\\nfrom here:\\nhttp://hpccsystems.com/community/docs/ecl-language-reference/html/xml-parsing-record-and-transform-functions\\n\\nAnd then massaged the second example a bit to make it actually run.\\n\\nThe code that is below is largely the same as the original,\\nbut I've added some comments and some asserts to illustrate\\nwhere I think there are some issues with the way XPATH behaves.\\n\\nI used the interactive XPATH tester here:\\nhttp://www.whitebeam.org/library/guide/TechNotes/xpathtestbed.rhtm\\nto validate the XPATHs I used below.\\n*/\\n\\nextractedValueRec := RECORD\\n STRING check;\\n STRING value;\\n UNSIGNED cnt;\\nEND;\\n\\nextractedRec := RECORD\\n STRING name;\\n UNSIGNED cnt;\\n DATASET(extractedValueRec) values;\\nEND;\\n\\nextractedValueRec t2 := TRANSFORM\\n SELF.check := XMLTEXT('<>');\\n SELF.value := XMLTEXT(''),\\n SELF.cnt := (UNSIGNED)XMLTEXT('@count');\\nEND;\\n\\nextractedRec t1 := TRANSFORM\\n SELF.name := XMLTEXT('@name');\\n SELF.cnt := (UNSIGNED)XMLTEXT('@distinct');\\n SELF.values := XMLPROJECT('Value', t2)(cnt > 1);\\nEND;\\n\\nxmlSample := DATASET([{\\n'<XML>' +\\n '<Value count="4">Bogus</Value>' +\\n '<Field name="surname" distinct="2">' +\\n '<Value count="3">Halliday</Value>' +\\n '<Value count="2">Chapman</Value>' +\\n '</Field>' +\\n '<Field name="title" distinct="2">' +\\n '<Value count="3">Mr</Value>' +\\n '<Value count="2">Mrs</Value>' +\\n '</Field>' +\\n'</XML>'\\n}], { STRING line} );\\n \\n /*\\nWe'll start with the example's original XPATH.\\nThis one behaves as expected.\\n*/\\np := PARSE(xmlSample, line , t1, XML('/XML/Field'));\\n//OUTPUT(p);\\nASSERT(COUNT(p) = 2);\\nASSERT(p[1].name = 'surname');\\nASSERT(p[2].name = 'title');\\nASSERT(p[1].values[1].value = 'Halliday');\\nASSERT(p[2].values[2].value = 'Mrs');\\n\\n/*\\nTo my way of thinking about XPATH, appending [1] to the end of\\nthe previous XPATH should limit my results to the first Field node and\\nignore the second one.\\n\\nThis isn't what happens. Both rows are returned instead.\\n\\nIt's as if the [1] isn't there at all.\\n*/\\nq := PARSE(xmlSample, line , t1, XML('/XML/Field[1]'));\\n//OUTPUT(q);\\nASSERT(COUNT(q) = 1);\\nASSERT(q[1].name = 'surname');\\n\\n/* Similarly, if we're returning single nodes when passed an index\\nthen this should return just the second Field node.\\n\\nInstead, it returns _no_ rows at all.\\n*/\\nr := PARSE(xmlSample, line , t1, XML('/XML/Field[2]'));\\n//OUTPUT(r);\\nASSERT(COUNT(r) = 1);\\nASSERT(r[1].name = 'title');\\n\\n/*\\nThere also seem to be issues when qualifiers are used in the middle\\nof a path.\\n\\nThis should return just the Values for the first Field.\\n\\nPlease note that I'm now parsing using the t2 transformation,\\nso I'm only parsing Value nodes.\\n\\nWe do get two Value nodes back, but they don't both belong\\nto the surname field. Instead, we get the second Value node\\nfrom each of the two Fields.\\n*/\\ns := PARSE(xmlSample, line , t2, XML('/XML/Field[1]/Value'));\\n//OUTPUT(s);\\nASSERT(COUNT(s) = 2);\\nASSERT(s[1].value = 'Halliday');\\nASSERT(s[2].value = 'Chapman');\\n\\n/*\\nAnd I believe XPATH should return the two Value nodes for the second\\nField node here, but instead no rows are returned.\\n*/\\nt := PARSE(xmlSample, line , t2, XML('/XML/Field[2]/Value'));\\n//OUTPUT(t);\\nASSERT(COUNT(t) = 2);\\nASSERT(t[1].value = 'Mr');\\nASSERT(t[2].value = 'Mrs');\\n\\n\\n/*\\nNew examples from 2012-11-08\\n\\nFirst off, it appears that XMLTEXT and XMLPROJECT \\nboth handle node indexing just fine!\\n\\nBut, I've encountered another odd issue.\\n\\nIt appears that using XMLPROJECT(myPath, transFunc) where\\nmyPath has a XMLTEXT in its definition causes XMLPROJECT\\nto fail to interpret myPath correctly.\\n\\n=== What I'm Trying to Do ===\\nSome of the HTML that I'm processing has slight variations in \\nthe XPATH I need to use.\\n\\nFor instance, some pages will have an extra element inserted in the DOM\\nand so the XPATHs I need to define need to first check if the extra\\nelement exists and, if so, increment the index value in my XPATH.\\n\\ne.g. I might have an extra row in a table that I don't need so my\\nXPATH needs to go from table/tr[1]/td to table/tr[2]/td\\n\\nMy approach was to define a local attribute at the top of the\\ntransform function that uses XMLTEXT('table/tr[1]/td/@href') or\\nsomething like that to determine if the element was present.\\n\\nI wrapped that in an IF() like so:\\nmyPath := 'table/tr[' + IF(XMLTEXT('table/tr[1]/td/@href') != '', '2', '1') + ']/td';\\n\\nThen I could use myPath when in some of my XMLTEXTs' xmltag arguments like so:\\nSELF.some_attr := XMLTEXT(myPath + '/p', someTransform);\\n\\n=== What's Failing ===\\nXMLPROJECT seems to fail when there is XMLTEXT \\nsomewhere in the definition in its xmltag argument.\\n\\nI have included two examples. The first works, verifying that my crazy\\napproach works when there is no XMLTEXT in the definition.\\n\\nThe second fails once I introduce XMLTEXT.\\n*/\\nextractedRec t3 := TRANSFORM\\n localNum := IF('some text' != '', '2', '1');\\n SELF.values := XMLPROJECT('/XML/Field[' + localNum + ']/Value', t2);\\n SELF := [];\\nEND;\\n\\nu := PARSE(xmlSample, line , t3, XML);\\n//OUTPUT(u);\\nASSERT(COUNT(u) = 1);\\nASSERT(COUNT(u[1].values) = 2);\\nASSERT(u[1].values[1].value = 'Mr');\\nASSERT(u[1].values[2].value = 'Mrs');\\n\\n/*\\nThis one fails, but as far as I can tell, it should be just like\\nthe example before it.\\n\\nI realize that we're not supposed to worry about order of execution\\nbut it seems to me there might be a timing issue here where the\\nXMLPROJECT occurs before localNum's XMLTEXT is finished?\\n\\nOr, if I'm just flat-out abusing local attribute definitions in a\\ntransform function, let me know.\\n*/\\nextractedRec t4 := TRANSFORM\\n localNum := IF(XMLTEXT('<>') != '', '2', '1');\\n \\n SELF.name := XMLTEXT('<>'); // Sanity check -- make sure XMLTEXT('<>') is really defined\\n SELF.cnt := (UNSIGNED) localNum; // Sanity check -- we'll see the number is set!\\n SELF.values := XMLPROJECT('/XML/Field[' + localNum + ']/Value', t2);\\nEND;\\n\\nv := PARSE(xmlSample, line , t4, XML);\\nOUTPUT(v);\\nOUTPUT(v[1].name);\\nOUTPUT(v[1].cnt);\\nASSERT(COUNT(v) = 1);\\nASSERT(COUNT(v[1].values) = 2);\\nASSERT(v[1].values[1].value = 'Mr');\\nASSERT(v[1].values[2].value = 'Mrs');\\n
\", \"post_time\": \"2012-11-09 02:01:38\" },\n\t{ \"post_id\": 2698, \"topic_id\": 585, \"forum_id\": 8, \"post_subject\": \"Re: XPATH Oddities\", \"username\": \"ghalliday\", \"post_text\": \"I have opened an issue in jira to track the issue. See https://track.hpccsystems.com/browse/HPCC-8210\", \"post_time\": \"2012-11-06 10:51:27\" },\n\t{ \"post_id\": 2680, \"topic_id\": 585, \"forum_id\": 8, \"post_subject\": \"Re: XPATH Oddities\", \"username\": \"aguynamedryan\", \"post_text\": \"Hi Bob,\\n\\nPerhaps I was a bit careless in my wording. When I said "poorly formed" you might have thought ECL's XML parser wouldn't even be able to parse the HTML and throw an exception. In fact, that was exactly what happened to me at first.\\n\\nI've run all the HTML through a program called "tidy" which at least turns the HTML into something ECL's XML parser is able parse without throwing an exception. So while ECL can now provide a parsed XML document, I now need a robust XPATH command to let me navigate it.\\n\\nI appreciate your proposed solution but I think it would only work nicely if the tags near the data I need had some sort of unique name or attribute. Unfortunately, most of the data is surrounded by <td> or even <font> tags. So, by "poorly formed" I meant "lacking semantic markup that would allow me to easily navigate to the specific tags and bits of data I need". Even the data itself isn't always something I can match with a pattern so I can't just grab the contents of ALL <td> tags and match a pattern against them.\\n\\nI imagine I'm not the first to try to tackle this issue, I too eagerly await some advice from the community.\\n\\nThanks!\", \"post_time\": \"2012-11-01 17:48:34\" },\n\t{ \"post_id\": 2679, \"topic_id\": 585, \"forum_id\": 8, \"post_subject\": \"Re: XPATH Oddities\", \"username\": \"bforeman\", \"post_text\": \"Hi Ryan,\\n\\nWhen XML or HTML is poorly formed, the only recourse that you have sometimes is to treat the whole document as one large record and just parse what you need.\\n\\nHere is an example:\\n\\nr := RECORD\\nSTRING txt;\\nEND;\\n\\nindata := DATASET('~class::bmf::parsedoc',{STRING txt},\\n CSV(TERMINATOR(['</SEC-DOCUMENT>'])));\\n\\npattern ptag := 'rr:AnnualReturn2009';\\npattern ptag2 := 'rr:AnnualReturn2010';\\npattern pratio := any*;\\npattern p := '<' ptag any* '>' pratio '</' ptag '>'|'<' ptag2 any* '>' pratio '</' ptag2 '>';\\n\\noutrec := RECORD\\nstring ratio := MATCHTEXT(p/pratio);\\nend;\\n\\nout := PARSE(indata, txt, p, outrec, FIRST);\\nout;\\n
\\n\\nThis file was a dirty XBRL file, but there was information with interesting tags that needed to be parsed. \\n\\nNote that the DATASET treats the XBRL file as a variable length (CSV), with no field separators and a TERMINATOR string that marks the end of the document.\\n\\nFrom there you can PARSE the whole stream and dig out what you need with PATTERN statements as shown.\\n\\nHopefully others will see this thread and post other examples to help get you started.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-11-01 17:31:47\" },\n\t{ \"post_id\": 2678, \"topic_id\": 585, \"forum_id\": 8, \"post_subject\": \"Re: XPATH Oddities\", \"username\": \"aguynamedryan\", \"post_text\": \"Hi Bob,\\n\\nThanks for looking into this.\\n\\nUltimately, I'm trying to ingest some HTML that doesn't provide many class names or other identifying attributes for me to tease out the tags/data I need.\\n\\nDo you know of other projects that have successfully ingested poorly formed HTML from a variety of sources? I'd love some pointers!\\n\\nI've tried looking through the Data Descriptors and Simple Example Programs to see how others have parsed XML/HTML but most have well-formed XML to use and so they aren't exercising XPATH, XMLPROJECT, and XMLTEXT like I need probably will need to.\\n\\nThanks again!\", \"post_time\": \"2012-11-01 17:19:07\" },\n\t{ \"post_id\": 2677, \"topic_id\": 585, \"forum_id\": 8, \"post_subject\": \"Re: XPATH Oddities\", \"username\": \"bforeman\", \"post_text\": \"Ryan,\\n\\nOn further review:\\n\\nThe documentation does say the following for using an xmltag in parsing:\\n\\nA string constant naming the XPATH to the tag containing the data (see the XPATH\\nSupport section under the RECORD structure discussion). This may contain an\\ninstance number (such as tagname[1]).
\\n\\nFrom this you would assume it should work. However there may be some restrictions on where/how it can be used. Checking with development team now.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-11-01 13:17:35\" },\n\t{ \"post_id\": 2676, \"topic_id\": 585, \"forum_id\": 8, \"post_subject\": \"Re: XPATH Oddities\", \"username\": \"bforeman\", \"post_text\": \"i Ryan,\\n\\nReading the documentation, I noticed the following:\\n\\nXPATH support is a limited subset of the full XPATH specification, basically expressed as:\\n\\nnode[qualifier] / node[qualifier] ...\\n\\nnode Can contain wildcards. \\n\\nqualifier Can be a node or attribute, or a simple single expression of equality, inequality, or numeric or alphanumeric comparisons, or node index values. No functions or inline arithmetic, etc. are supported. String comparison is indicated when the right hand side of the expression is quoted. \\n\\nThese operators are valid for comparisons: <, <=, >, >=, =, !=\\n\\nAn example of a supported xpath:\\n\\n/a/*/c*/*d/e[@attr]/f[child]/g[@attr="x"]/h[child>="5"]/i[@x!="2"]/j
\\n\\nSo at first glance you may be using some XPATH syntax that is simply not supported in ECL. Checking with some colleagues right now to confirm this.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-11-01 13:01:50\" },\n\t{ \"post_id\": 2673, \"topic_id\": 585, \"forum_id\": 8, \"post_subject\": \"XPATH Oddities\", \"username\": \"aguynamedryan\", \"post_text\": \"I'm quite new to HPCC/ECL and I've been tasked with parsing some HTML files into usable data.\\n\\nI've been trying to use PARSE, XML(xmltag), XMLPROJECT, and XPATH to parse out the data, but I keep running into problems that I don't quite understand.\\n\\nI finally decided to tinker with the example code from:\\nhttp://hpccsystems.com/community/docs/ecl-language-reference/html/xml-parsing-record-and-transform-functions\\nto see if I couldn't figure out how to get XPATH to work for me.\\n\\nI think I've run across some odd behavior from XPATH. I wrote a file that had sample code for the issues I found, but I can't seem to attach it to this post. I've copied and pasted the contents below this message instead.\\n\\nI'm considering submitting a bug against what I've found, but I figured I'd hit the forums first to see if:\\n1) This is simply user error and someone can correct me\\n2) These are known issues and I just missed seeing them in the documentation\\n\\nThanks!\\n- Ryan\\n\\n\\n/*\\nI've encountered some odd behavior when trying to parse some XML.\\n\\nThe XML I was parsing was really some very convoluted HTML, but I was\\ngetting very inconsistent behavior, so I grabbed the example code\\nfrom here:\\nhttp://hpccsystems.com/community/docs/ecl-language-reference/html/xml-parsing-record-and-transform-functions\\n\\nAnd then massaged the second example a bit to make it actually run.\\n\\nThe code that is below is largely the same as the original,\\nbut I've added some comments and some asserts to illustrate\\nwhere I think there are some issues with the way XPATH behaves.\\n\\nI used the interactive XPATH tester here:\\nhttp://www.whitebeam.org/library/guide/TechNotes/xpathtestbed.rhtm\\nto validate the XPATHs I used below.\\n*/\\n\\nextractedValueRec := RECORD\\n STRING value;\\n UNSIGNED cnt;\\nEND;\\n\\nextractedRec := RECORD\\n STRING name;\\n UNSIGNED cnt;\\n DATASET(extractedValueRec) values;\\nEND;\\n\\nextractedValueRec t2 := TRANSFORM\\n SELF.value := XMLTEXT(''),\\n SELF.cnt := (UNSIGNED)XMLTEXT('@count');\\nEND;\\n\\nextractedRec t1 := TRANSFORM\\n SELF.name := XMLTEXT('@name');\\n SELF.cnt := (UNSIGNED)XMLTEXT('@distinct');\\n SELF.values := XMLPROJECT('Value', t2)(cnt > 1);\\n END;\\n\\nxmlSample := DATASET([{\\n'<XML>' +\\n '<Value count="4">Bogus</Value>' +\\n '<Field name="surname" distinct="2">' +\\n '<Value count="3">Halliday</Value>' +\\n '<Value count="2">Chapman</Value>' +\\n '</Field>' +\\n '<Field name="title" distinct="2">' +\\n '<Value count="3">Mr</Value>' +\\n '<Value count="2">Mrs</Value>' +\\n '</Field>' +\\n'</XML>'\\n}], { STRING line} );\\n\\n /*\\nWe'll start with the example's original XPATH.\\nThis one behaves as expected.\\n*/\\np := PARSE(xmlSample, line , t1, XML('/XML/Field'));\\nASSERT(COUNT(p) = 2);\\nASSERT(p[1].name = 'surname');\\nASSERT(p[2].name = 'title');\\nASSERT(p[1].values[1].value = 'Halliday');\\nASSERT(p[2].values[2].value = 'Mrs');\\n\\n/*\\nTo my way of thinking about XPATH, appending [1] to the end of\\nthe previous XPATH should limit my results to the first Field node and\\nignore the second one.\\n\\nThis isn't what happens. Both rows are returned instead.\\n\\nIt's as if the [1] isn't there at all.\\n*/\\nq := PARSE(xmlSample, line , t1, XML('/XML/Field[1]'));\\nASSERT(COUNT(q) = 1);\\nASSERT(q[1].name = 'surname');\\n\\n/* Similarly, if we're returning single nodes when passed an index\\nthen this should return just the second Field node.\\n\\nInstead, it returns _no_ rows at all.\\n*/\\nr := PARSE(xmlSample, line , t1, XML('/XML/Field[2]'));\\nASSERT(COUNT(r) = 1);\\nASSERT(r[1].name = 'title');\\n\\n/*\\nThere also seem to be issues when qualifiers are used in the middle\\nof a path.\\n\\nThis should return just the Values for the first Field.\\n\\nPlease note that I'm now parsing using the t2 transformation,\\nso I'm only parsing Value nodes.\\n\\nWe do get two Value nodes back, but they don't both belong\\nto the surname field. Instead, we get the second Value node\\nfrom each of the two Fields.\\n*/\\ns := PARSE(xmlSample, line , t2, XML('/XML/Field[1]/Value'));\\nASSERT(COUNT(s) = 2);\\nASSERT(s[1].value = 'Halliday');\\nASSERT(s[2].value = 'Chapman');\\n\\n/*\\nAnd I believe XPATH should return the two Value nodes for the second\\nField node here, but instead no rows are returned.\\n*/\\nt := PARSE(xmlSample, line , t2, XML('/XML/Field[2]/Value'));\\nASSERT(COUNT(t) = 2);\\nASSERT(t[1].value = 'Mr');\\nASSERT(t[2].value = 'Mrs');\\n
\", \"post_time\": \"2012-10-31 22:51:22\" },\n\t{ \"post_id\": 2683, \"topic_id\": 586, \"forum_id\": 8, \"post_subject\": \"Re: How to define a record with hundreds of fields of same t\", \"username\": \"rtaylor\", \"post_text\": \"I am trying to define a record with hundreds or thousands of fields, all of the same type real.
What kind of file are they in? Is it a fixed-length record flat file, or are the fields delimited somehow? Or are there a variable number of fields in each record? Or ... ??\\n\\nBottom line is that Bob's suggestion to look at our ECL code generation tools is probably what you need, but I'd like more information so i can have a think on alternatives.\\n\\nRichard\", \"post_time\": \"2012-11-01 20:41:53\" },\n\t{ \"post_id\": 2682, \"topic_id\": 586, \"forum_id\": 8, \"post_subject\": \"Re: How to define a record with hundreds of fields of same t\", \"username\": \"bforeman\", \"post_text\": \"Look at the Language Reference for the use of MACRO and the Template Language. You can probably build a MACRO that will build this type of RECORD structure fairly easily. Look at #FOR to start.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-11-01 19:54:33\" },\n\t{ \"post_id\": 2681, \"topic_id\": 586, \"forum_id\": 8, \"post_subject\": \"How to define a record with hundreds of fields of same type\", \"username\": \"hzhang\", \"post_text\": \"I am trying to define a record with hundreds or thousands of fields, all of the same type real. How to do this? Here is an example I can think of working for small number of fields that I can still enumerate. \\n\\nmyrec := record\\nset of real fieldset := [1,2,3,4,5,6,7,8];\\nend;\\n\\nHow about I have 100 fields. Do I have to enumerate all fields one by one?\\n\\nThanks.\", \"post_time\": \"2012-11-01 19:30:00\" },\n\t{ \"post_id\": 2696, \"topic_id\": 587, \"forum_id\": 8, \"post_subject\": \"Re: Outbound HTTP Call\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nThanks Dan...\\n\\nLooks like some proxy issues..Will check the same..\\n\\nRegards,\\nksviswa\", \"post_time\": \"2012-11-05 14:50:17\" },\n\t{ \"post_id\": 2695, \"topic_id\": 587, \"forum_id\": 8, \"post_subject\": \"Re: Outbound HTTP Call\", \"username\": \"DSC\", \"post_text\": \"This works for me in Thor:\\n\\nOutRec1 := RECORD\\n string ip {xpath('/Response/Ip')};\\n string country_code {xpath('/Response/CountryCode')};\\n string region_code {xpath('/Response/RegionCode')};\\n string region_name {xpath('/Response/RegionName')};\\n string city {xpath('/Response/City')};\\n string zip_code {xpath('/Response/ZipCode')};\\n string latitude {xpath('/Response/Latitude')};\\n string longitude {xpath('/Response/Longitude')};\\n string metro_code {xpath('/Response/MetroCode')};\\nEND;\\n\\nraw := HTTPCALL('http://freegeoip.net/xml/203.99.197.54', 'GET', 'text/xml', OutRec1);\\n\\nOUTPUT(raw);
\", \"post_time\": \"2012-11-05 14:39:37\" },\n\t{ \"post_id\": 2694, \"topic_id\": 587, \"forum_id\": 8, \"post_subject\": \"Re: Outbound HTTP Call\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nIt was an example host name.\\n\\nThe original host name was \\n\\nhttp://freegeoip.net/xml/203.99.197.54\\n\\nRegards,\\nksviswa\", \"post_time\": \"2012-11-05 14:27:07\" },\n\t{ \"post_id\": 2693, \"topic_id\": 587, \"forum_id\": 8, \"post_subject\": \"Re: Outbound HTTP Call\", \"username\": \"DSC\", \"post_text\": \"The error message you cite makes me believe that the system cannot resolve the hostname correctly.. Was that an example hostname or were you really trying to reach abc.com?\", \"post_time\": \"2012-11-05 14:11:41\" },\n\t{ \"post_id\": 2692, \"topic_id\": 587, \"forum_id\": 8, \"post_subject\": \"Re: Outbound HTTP Call\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nThnks a lot dan..\\n\\nTried using httpcall, getting the error of this format.\\n\\nError: System error: 3000: <Error><text>assert(hostname) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.2/CE/Ubuntu-10.04-i386/HPCC-Platform/system/jlib/jsocket.cpp, line 775</text><url>http://abc.com:80/xml/10.42.63.122</url></Error> (in HTTP dataset G1 E2)
\\n\\nSince its just a GET method , not using soapcall.\\n\\nNot sure if i need to add something else.\\n\\n\\nSample Code :\\n\\nOutRec1 := RECORD\\n STRING500 Location{xpath('Response/location')};\\nEND;\\n\\nip := 'http://abc.com';\\n\\nraw := HTTPCALL(ip,'GET', 'text/xml', OutRec1);\\n\\noutput(raw);
\\n\\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2012-11-05 06:59:35\" },\n\t{ \"post_id\": 2685, \"topic_id\": 587, \"forum_id\": 8, \"post_subject\": \"Re: Outbound HTTP Call\", \"username\": \"DSC\", \"post_text\": \"You might want to check out this older thread: https://hpccsystems.com/bb/viewtopic.php?f=10&t=456.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-11-02 11:29:11\" },\n\t{ \"post_id\": 2684, \"topic_id\": 587, \"forum_id\": 8, \"post_subject\": \"Outbound HTTP Call\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nI have a log file with Client IP Addresses which I want to geo-ip encode using a service that exposes a HTTP URL and returns an XML.\\n\\nHow can I make an outbound HTTP call (and thereafter use the response XML to parse for further processing).\\n\\nRegards\\nksviswa\", \"post_time\": \"2012-11-02 06:42:22\" },\n\t{ \"post_id\": 2765, \"topic_id\": 592, \"forum_id\": 8, \"post_subject\": \"Re: Command line to deschedule a job\", \"username\": \"JimD\", \"post_text\": \"Sorry you had trouble. We also encountered this and opened an issue about it. (https://track.hpccsystems.com/browse/HPCC-8202.) \\n\\nI see now that the issue was resolved. (will be in a future release)\\n\\nJim\", \"post_time\": \"2012-11-12 17:44:09\" },\n\t{ \"post_id\": 2763, \"topic_id\": 592, \"forum_id\": 8, \"post_subject\": \"Re: Command line to deschedule a job\", \"username\": \"tmurphy\", \"post_text\": \"I tried 'scheduleadmin <my dali ip> remove <my wuid>' and I get a segmentation fault.\", \"post_time\": \"2012-11-12 16:32:13\" },\n\t{ \"post_id\": 2723, \"topic_id\": 592, \"forum_id\": 8, \"post_subject\": \"Re: Command line to deschedule a job\", \"username\": \"JimD\", \"post_text\": \"There is a scheduleadmin command line interface in /opt/HPCCSystems/bin/\\n\\nscheduleadmin <daliip> remove <wuid>\\n\\nfor example:\\n\\nscheduleadmin 192.168.11.11 remove W20121103-100635\\n\\nA guide for ECL Scheduling is being finalized and will be available soon.\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2012-11-07 15:20:42\" },\n\t{ \"post_id\": 2722, \"topic_id\": 592, \"forum_id\": 8, \"post_subject\": \"Re: Command line to deschedule a job\", \"username\": \"rtaylor\", \"post_text\": \"Not that I am aware of. It sounds to me like such a function would be a good addition to the Standard Library. You could either submit a JIRA issue for this, or you could delve into the platform source code and write one that you contribute to the community.\\n\\nRichard\", \"post_time\": \"2012-11-07 15:04:16\" },\n\t{ \"post_id\": 2708, \"topic_id\": 592, \"forum_id\": 8, \"post_subject\": \"Command line to deschedule a job\", \"username\": \"tmurphy\", \"post_text\": \"We use the File.MonitorFile feature to schedule a job that shows up in EclWatch under Clusters -> Scheduler. \\n\\nIs there a way to de-schedule such a job from the command line (or with a library function)? We don't want to have to do this manually in EclWatch.\", \"post_time\": \"2012-11-06 21:15:24\" },\n\t{ \"post_id\": 3767, \"topic_id\": 599, \"forum_id\": 8, \"post_subject\": \"Re: Does HPCC optimize logical file reads?\", \"username\": \"maruyue\", \"post_text\": \"I strongly recommend that HPCC developers should learn something related to hadoop. \\n\\nIn hadoop, hive/impala is similar to thor. To reduce disk io,RCFile is developed.\\n\\nIn google, dremel uses ColumnIO to reduce disk io.\", \"post_time\": \"2013-03-18 15:18:37\" },\n\t{ \"post_id\": 2806, \"topic_id\": 599, \"forum_id\": 8, \"post_subject\": \"Re: Does HPCC optimize logical file reads?\", \"username\": \"arunarav\", \"post_text\": \"Richard mentioned in a previous post:\\n\\nonce we have the data in memory we tend to try to keep it in memory throughout the rest of the process
\\n\\nIs it really cost effective to hold say 1 TB of dataset in memory? We may end up having 100s of nodes each with high RAM capacity. \\n\\n\\nbut the first thing that you should do (as you're working your standard ETL process to prep the data for actual use in queries) is to get that data into flat files\\n...\\n\\nI would also suggest extracting the actual data from that XML \\n
\\n\\nThe proof-of-concept we are currently dealing with is extremely XML centric with very complex nested structures. The base data is all in XML. If we have to code up ETL code to transform to-and-fro, it would introduce significant coding effort especially in the case of 'XML retrieval' where we have to return complex XML structures for various queries as opposed to the current way where we simple use PARSE and XPATH to return the XML.\\n\\nIn addition, there are continuous updates of sub-sections of the XML for which we would have to write custom ETLs. Currently we just store a blob of XML into a field and use PARSE/ Xpath to serve queries.\\n\\nIn general, we would also lose the xpath capabilities in HPCC if we go down the flat file path. HPCC's XML syntax is much more elegant than competing frameworks. \\n\\nIt had seemed that HPCC's XML parsing strengths would benefit XML-centric dataset situations but the 'reading file from disk' seems to be a core issue we have to solve for. (In fact, I am running a test on a 10-way 'instant cloud' cluster (Amazon aws) for a 500 GB dataset (XML) and it is taking an hour to complete the read operation).\\n\\n\\nI am just pointing out that in this particular use case, getting XML <-> Flat files is very effort intensive (and therefore error-prone and introduce dependencies) just to bring the performance on par with Hadoop (which allows storing blobs of XML in Hbase thereby not needing any extra ETL and retrieves equally quickly).\\n\\nRegards\\nArun\", \"post_time\": \"2012-11-18 12:35:55\" },\n\t{ \"post_id\": 2777, \"topic_id\": 599, \"forum_id\": 8, \"post_subject\": \"Re: Does HPCC optimize logical file reads?\", \"username\": \"richardkchapman\", \"post_text\": \"When reading a file from disk, that is arranged as records end to end, there is really very little alternative (so far as disk IO is concerned) to reading the entire contents. Reading a few bytes then seeking to the end of the record would not reduce the amount transferred from the disk, which will be transferring blocks at a time. What ECL _will_ do in such cases is ensure that only the fields you want are kept in memory and passed on to the rest of the code for processing. \\n\\nWhen reading from HBase, you are reading from an index that is arranged as key-value pairs (with the key corresponding typically to the column) ad are thus reading from a file that has been specifically arranged in order to be fast to read column by column. If you want similar behaviour in HPCC, you can create an INDEX and read from that (you'd want to take a little care to ensure that the index was reasonably distributed though - for this purpose a NOROOT index might be the most effective).\\n\\nAs Richard T has pointed out though, generally you would be reading the entire data in your ETL phase (in thor), and selecting individual records (or sets of records) via INDEXes in the delivery phase (using Roxie).\", \"post_time\": \"2012-11-14 16:19:03\" },\n\t{ \"post_id\": 2770, \"topic_id\": 599, \"forum_id\": 8, \"post_subject\": \"Re: Does HPCC optimize logical file reads?\", \"username\": \"rtaylor\", \"post_text\": \"Arun,In addition, I can filter by column family and a host of other filter criteria in HBase.\\n\\nJust trying to get solutions to avoid the data transfer and disk I/O.
The underlying design of data handling in HPCC is fundamentally different from the Hadoop HDFS. \\n\\nI am no Hadoop expert, but my understanding is that in HDFS, data is stored in key-value pairs that are aggregated into blocks of data. I know little about Hbase other than that appears to be a file system built on top of HDFS to allow you to treat Hadoop data is if it were in standard data files.\\n\\nIn HPCC, on the other hand, the data is actually stored in standard ISAM-type files. The data is distributed across the nodes as a single logical dataset comprised of separate physical files on each node.\\n\\nSince Hbase is sitting on top of HDFS, I would expect that it would be optimized for reading a single field (single set of of key-value pairs) from one of its logical files. HPCC, however, must operate the way I described in my previous post. \\n\\nTherefore, my first suggestion, if you want to make disk reads go faster, is to NOT use the CSV format. Sure, data will come in that way from your data sources, but the first thing that you should do (as you're working your standard ETL process to prep the data for actual use in queries) is to get that data into flat files and then build indexes to serve up the data in your queries. Also, I note in your previous example code that the individual fields in the file contain XML text. I would also suggest extracting the actual data from that XML and storing it in their own separate fields. We have ECL classes available that will teach the entire process. Just to add context, we are conducting some perf benchmarks for a customer - Hadoop vs HPCC. The file read on HPCC takes 20+ minutes but the actual "meat" of the report processing (parse, project, output) takes seconds). The equivalent HBase based report (same volume) takes just a couple of minutes for the complete processing presumably because of the above filter which cuts down on the unnecessary data transfer.
Sure, I would expect that kind of result from that kind of test -- try it again with flat files instead of CSV and let me know what happens to your timings. My expectation is that the HPCC time will be faster than with CSV but still not as fast as Hadoop (because of the differences between HDFS and the HPCC file system).\\n\\nBut my question is -- is this test a valid comparison between the two tools? Is this kind of simple read of an un-indexed dataset the kind of actual work the customer will want to do with HPCC? A simple one-field read and parse into relevant information? In HPCC a more standard way of operating would be to process the data contained in all the fields in that single pass. \\n\\nInitial disk read speed certainly is important, but for the kind of Big Data work we do normally it is a very small aspect of the whole job, because, once we have the data in memory we tend to try to keep it in memory throughout the rest of the process. By utilizing the sheer "horsepower" of hundreds of nodes working in parallel we can get a lot of work done at once with a "brute force" approach instead of trying to be "elegant" and finesse a solution to a given problem. IOW, why try to find matching records in multiple datasets for a single person at query time when you have the power to pre-build all matching records for all people in all your datasets at once and just use the result at query time?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-11-13 15:48:43\" },\n\t{ \"post_id\": 2769, \"topic_id\": 599, \"forum_id\": 8, \"post_subject\": \"Re: Does HPCC optimize logical file reads?\", \"username\": \"arunarav\", \"post_text\": \"Richard,\\n\\nIn HPCC, what are my options to reduce disk I/O and needless data transfer if all I am interested is just the contents of one column?\\n\\nMy intention is not to compare but to show a parallel. In HBase for example, I could use a filter like this which reads the entire equivalent dataset with significantly lesser time:\\n\\n\\nScan scan = new Scan();\\nscan.addColumn(Bytes.toBytes("column_family"),Bytes.toBytes("some_column"))\\n....\\n
\\n\\nIn addition, I can filter by column family and a host of other filter criteria in HBase.\\n\\nJust trying to get solutions to avoid the data transfer and disk I/O. \\n\\nJust to add context, we are conducting some perf benchmarks for a customer - Hadoop vs HPCC. The file read on HPCC takes 20+ minutes but the actual "meat" of the report processing (parse, project, output) takes seconds). The equivalent HBase based report (same volume) takes just a couple of minutes for the complete processing presumably because of the above filter which cuts down on the unnecessary data transfer.\\n\\nThanks\\nArun\", \"post_time\": \"2012-11-13 09:10:59\" },\n\t{ \"post_id\": 2764, \"topic_id\": 599, \"forum_id\": 8, \"post_subject\": \"Re: Does HPCC optimize logical file reads?\", \"username\": \"rtaylor\", \"post_text\": \"Arun,My question pertains to how to optimize reading the large logical file? If HPCC knows that only one column is to be used in subsequent operations, why does HPCC read the entire file?
Given that the file you're reading from is a CSV file, how else would you suggest the data be found other than reading each record in turn from the entire file? \\n\\nSince CSV is a variable-length record format text file without indexing, the only way to determine where the next record begins is to read the previous records. IOW, how can you know where record #2 starts if you haven't read record #1 to see how long it is? Also, since the CSV format uses field delimiters to indicate where each individual field within each record begins, how can you know where the fifth field's data starts unless you first see how many bytes fields one through four occupy? Yes, you could certainly optimize the file read process by indexing the file, but that would not help a lot here since what you want is a single field from each record. \\n\\nYou need to understand that HPCC is not an RDBMS system that will create and use indexes for you "under the covers" in some magical way, but simply a brute-force, massively parallel data manipulation machine using simple ISAM files (and the "I" part of ISAM, the indexes, are built by you, generally for use only in Roxie queries) -- the "magic" here is in the raw computing power it can bring to bear on a given task.\\n\\nMy suggestion of using TABLE was all about optimizing memory usage once your data is in memory -- that's where you can make a difference in performance. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-11-12 16:59:50\" },\n\t{ \"post_id\": 2762, \"topic_id\": 599, \"forum_id\": 8, \"post_subject\": \"Re: Does HPCC optimize logical file reads?\", \"username\": \"arunarav\", \"post_text\": \"When I ran a test, the time to perform CSV read is similar in spite of using a vertical slice. \\n\\nWithout Table operation test code:\\n\\n\\nSomeRecordStruct := RECORD\\n\\nINTEGER ID;\\n// 50 snippets of varying sizes of XML in each field\\nSTRING XMLSnippet1;\\nSTRING XMLSnippet2;\\n...\\n...\\nSTRING XMLSnippet50;\\nEND;\\n\\nSome_Recordset := DATASET('~thor::db::Sample.CSV',SomeRecordStruct,CSV);\\n\\noutput(Some_Recordset,,'~thor::db::output_test.CSV',CSV,OVERWRITE);
\\n\\n\\nWith Table operation:\\n\\n\\n\\nSomeRecordStruct := RECORD\\n\\nINTEGER ID;\\n// 50 snippets of varying sizes of XML in each field\\nSTRING XMLSnippet1;\\nSTRING XMLSnippet2;\\n...\\n...\\nSTRING XMLSnippet50;\\nEND;\\n\\nSome_Recordset := DATASET('~thor::db::Sample.CSV',SomeRecordStruct,CSV);\\n\\nSliceOfTheData := RECORD\\n\\n\\tjustOneColumn := SomeRecordStruct.XMLSnippet2;\\n\\nEND;\\n\\n\\njustOneColumn_recordset := Table (Some_Recordset , SliceOfTheData );\\n\\noutput(justOneColumn_recordset,,'~thor::db::justOneColumn_output_test.CSV',CSV,OVERWRITE);
\\n\\n> The time to perform CSV read is the same in both tests. \\n> The difference is in the time taken for the output operation since the 'justOneColumn_recordset' references a lesser number of columns hence the output operation is faster.\\n\\n~~~~~\\n\\nMy question pertains to how to optimize reading the large logical file? If HPCC knows that only one column is to be used in subsequent operations, why does HPCC read the entire file? \\n\\nthanks\\narun\", \"post_time\": \"2012-11-12 05:57:29\" },\n\t{ \"post_id\": 2757, \"topic_id\": 599, \"forum_id\": 8, \"post_subject\": \"Re: Does HPCC optimize logical file reads?\", \"username\": \"rtaylor\", \"post_text\": \"Arun,\\n\\nThat's the purpose of the TABLE function http://hpccsystems.com/community/docs/ecl-language-reference/html/table\\n\\nThe "vertical slice" form is what you want.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-11-09 19:49:29\" },\n\t{ \"post_id\": 2751, \"topic_id\": 599, \"forum_id\": 8, \"post_subject\": \"Does HPCC optimize logical file reads?\", \"username\": \"arunarav\", \"post_text\": \"Does HPCC optimize reading large sized logical files when only one or two fields in the record structure of the file are to be used in subsequent 'actions'?\\n\\nExample: Logical file: Employee.csv (say 1 TB size)\\nName | Age | Address | Department \\n\\nIf the action uses only the address field, which is only 10 MB out of the entire 1 TB, does HPCC perform Disk I/O for the entire logical file or just the subset (ie the address)?\\n\\nThanks\\nArun\", \"post_time\": \"2012-11-09 02:10:13\" },\n\t{ \"post_id\": 2768, \"topic_id\": 602, \"forum_id\": 8, \"post_subject\": \"Re: Std lib CmdProcess does not work\", \"username\": \"tmurphy\", \"post_text\": \"Ok, I found another post that has a reply indicating that CmdProcess is broken in our version. So never mind.\", \"post_time\": \"2012-11-12 23:08:15\" },\n\t{ \"post_id\": 2767, \"topic_id\": 602, \"forum_id\": 8, \"post_subject\": \"Std lib CmdProcess does not work\", \"username\": \"tmurphy\", \"post_text\": \"If I run this:\\n\\nIMPORT STD;\\nOUTPUT(STD.System.Util.CmdProcess('echo','hello'));\\n\\nI get this:\\n\\nError: System error: 0: Error loading /var/lib/HPCCSystems/myeclccserver/libW20121112-153602.so: /var/lib/HPCCSystems/myeclccserver/libW20121112-153602.so: undefined symbol: fsCmdProcess2\\n\\nWe're running community_3.8.2-1\", \"post_time\": \"2012-11-12 21:40:41\" },\n\t{ \"post_id\": 3533, \"topic_id\": 605, \"forum_id\": 8, \"post_subject\": \"Re: H2H connector installation\", \"username\": \"rodrigo.pastrana@lexisnexis.com\", \"post_text\": \"The log output seems to be from the libhdfs based H2H. I would uninstall the previous package (hpccsystems-hdfsconnector), then reinstall the new webhdfs based connector (hpccsystems-webhdfsconnector) and retry.\", \"post_time\": \"2013-02-22 13:53:28\" },\n\t{ \"post_id\": 3528, \"topic_id\": 605, \"forum_id\": 8, \"post_subject\": \"Re: H2H connector installation\", \"username\": \"chhaya\", \"post_text\": \"hi,\\nI installed H2H but when i run code for reading or writing from/to hadoop i get error\\n\\nScript starting\\nRunning as user: hpcc\\nIncoming params: \\n-si -nodeid 0 -clustercount 1 -reclen 27 -filename /user/hduser/mydata -format FLAT -host 172.20.104.226 -port 50070 \\nnodeid: 0\\nFormat: FLATlog4j:ERROR Could not connect to remote log4j server at [localhost]. We will try again later.\\n13/02/21 15:25:25 ERROR security.UserGroupInformation: PriviledgedActionException as:hpcc cause:java.io.IOException: Call to INFPH01464U/172.20.104.226:50070 failed on local exception: java.io.EOFException\\nException in thread "main" java.io.IOException: Call to INFPH01464U/172.20.104.226:50070 failed on local exception: java.io.EOFException\\n\\tat org.apache.hadoop.ipc.Client.wrapException(Client.java:1144)\\n\\tat org.apache.hadoop.ipc.Client.call(Client.java:1112)\\n\\tat org.apache.hadoop.ipc.RPC$Invoker.invoke(RPC.java:229)\\n\\tat $Proxy1.getProtocolVersion(Unknown Source)\\n\\tat org.apache.hadoop.ipc.RPC.getProxy(RPC.java:411)\\n\\tat org.apache.hadoop.hdfs.DFSClient.createRPCNamenode(DFSClient.java:135)\\n\\tat org.apache.hadoop.hdfs.DFSClient.<init>(DFSClient.java:276)\\n\\tat org.apache.hadoop.hdfs.DFSClient.<init>(DFSClient.java:241)\\n\\tat org.apache.hadoop.hdfs.DistributedFileSystem.initialize(DistributedFileSystem.java:100)\\n\\tat org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:1411)\\n\\tat org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:66)\\n\\tat org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:1429)\\n\\tat org.apache.hadoop.fs.FileSystem.get(FileSystem.java:254)\\n\\tat org.apache.hadoop.fs.FileSystem$1.run(FileSystem.java:117)\\n\\tat org.apache.hadoop.fs.FileSystem$1.run(FileSystem.java:115)\\n\\tat java.security.AccessController.doPrivileged(Native Method)\\n\\tat javax.security.auth.Subject.doAs(Subject.java:416)\\n\\tat org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1136)\\n\\tat org.apache.hadoop.fs.FileSystem.get(FileSystem.java:115)\\nCaused by: java.io.EOFException\\n\\tat java.io.DataInputStream.readInt(DataInputStream.java:392)\\n\\tat org.apache.hadoop.ipc.Client$Connection.receiveResponse(Client.java:848)\\n\\tat org.apache.hadoop.ipc.Client$Connection.run(Client.java:786)\\nCall to org.apache.hadoop.fs.Filesystem::get(URI, Configuration) failed!\\nH2H Error: Could not connect to hdfs on 172.20.104.226:50070\\nCould not connect to HDFS on 172.20.104.226:50070\\nH2H exited with: 1
\\n\\ni have installed hadoop with different user is that the problem?\", \"post_time\": \"2013-02-22 05:01:01\" },\n\t{ \"post_id\": 3244, \"topic_id\": 605, \"forum_id\": 8, \"post_subject\": \"Re: H2H connector installation\", \"username\": \"rodrigo.pastrana@lexisnexis.com\", \"post_text\": \"A new version of H2H which doesn't require local Hadoop has been released:\\nhttp://hpccsystems.com/products-and-ser ... ntegration\\n\\nIt uses the webHDFS interface rather than the libhdfs library.\", \"post_time\": \"2013-01-30 14:01:09\" },\n\t{ \"post_id\": 3002, \"topic_id\": 605, \"forum_id\": 8, \"post_subject\": \"Re: H2H connector installation\", \"username\": \"rodrigo.pastrana@lexisnexis.com\", \"post_text\": \"Hello Chhaya,\\nYes as I mentioned H2H requires that Hadoop be installed via standard install (otherwise the linux package manager will not be aware that Hadoop has been installed). BTW, the DEB/RPM Hadoop install packages are available since the Hadoop 1.x release.\\n\\nHowever, if you are locked into using the Hadoop tar file distribution, you can install H2H with the ignore dependencies flag (I don't recommend). If you do this, you will need to inform H2H where your Hadoop files reside. To do this you'll need to edit the /etc/HPCCSystems/hdfsconnector.conf file.\\n\\nLet us know if that works for you.\", \"post_time\": \"2012-12-13 14:58:02\" },\n\t{ \"post_id\": 2998, \"topic_id\": 605, \"forum_id\": 8, \"post_subject\": \"Re: H2H connector installation\", \"username\": \"chhaya\", \"post_text\": \"hi\\n\\nI have ubuntu12.04 LTS.Installed apache hadoop using tar file i did not use deb.\\n\\nam i missing something ?\", \"post_time\": \"2012-12-13 10:14:35\" },\n\t{ \"post_id\": 2987, \"topic_id\": 605, \"forum_id\": 8, \"post_subject\": \"Re: H2H connector installation\", \"username\": \"rodrigo.pastrana@lexisnexis.com\", \"post_text\": \"Hello Chhaya,\\nWhat platform are you attempting to install H2H on? As the release notes points out, there are a couple of platforms on which you need to install ignoring dependancies:\\n\\nWhen installing the rpm (centos and opensuse) use the following command to install the plugin:\\nsudo rpm -Uvh --nodeps
\\n\\nAlso, the H2H install process requires a Hadoop standard install (not available until hadoop 1.x.x). Did you install hadoop via deb/rpm package? If not, the system won't know that hadoop is present. Please let me know. Thanks.\", \"post_time\": \"2012-12-12 13:33:54\" },\n\t{ \"post_id\": 2985, \"topic_id\": 605, \"forum_id\": 8, \"post_subject\": \"Re: H2H connector installation\", \"username\": \"chhaya\", \"post_text\": \"hi,\\n\\ni have hadoop and hpcc both installed on the machine i need H2H but still its giving error while installating as hadoop package not installed.\\nAm i missing some configuration changes ? i have installed hadoop using manual settings\", \"post_time\": \"2012-12-12 09:38:27\" },\n\t{ \"post_id\": 2819, \"topic_id\": 605, \"forum_id\": 8, \"post_subject\": \"Re: H2H connector installation\", \"username\": \"rodrigo.pastrana@lexisnexis.com\", \"post_text\": \"Hi,\\n\\nHPCC+H2H requires a distribution of Hadoop to be installed on the same machines in order to satisfy the LibHDFS requirement. This distribution of Hadoop does not need to be configured, running, and/or contain any data.\\n\\nYour target Hadoop system (where your data is stored) does not need to be on the same machines as (HPCC+H2H+libhdfs). Thanks, Rodrigo.\", \"post_time\": \"2012-11-19 17:25:37\" },\n\t{ \"post_id\": 2811, \"topic_id\": 605, \"forum_id\": 8, \"post_subject\": \"Re: H2H connector installation\", \"username\": \"chhaya\", \"post_text\": \"Hi,\\n\\nwhat i have understood from your reply is hpcc,hadoop and H2H need to be on same machines.\\nAm i correct ? if not then what i have to install where i am bit confused please correct me if wrong\", \"post_time\": \"2012-11-19 16:09:59\" },\n\t{ \"post_id\": 2799, \"topic_id\": 605, \"forum_id\": 8, \"post_subject\": \"Re: H2H connector installation\", \"username\": \"rodrigo.pastrana@lexisnexis.com\", \"post_text\": \"Hi. You are correct, H2H and the target Hadoop cluster (where you have your data) don't have to be installed on the same machines. However, a Hadoop installation is required on everyone of the machines on which H2H is to be installed. \\n\\nThe reason H2H requires a local hadoop install is because H2H uses the libhdfs library provided by Hadoop. The Hadoop installed on the same machine as H2H doesn't need to be configured and/or started, it only needs to be present in order to make libhdfs available. \\n\\nThanks, I hope that help clear up the hadoop requirement issue. Let us know if there's anything else.\", \"post_time\": \"2012-11-16 13:57:24\" },\n\t{ \"post_id\": 2793, \"topic_id\": 605, \"forum_id\": 8, \"post_subject\": \"Re: H2H connector installation\", \"username\": \"chhaya\", \"post_text\": \"hi,\\n\\nwe have hpcc cluster on different machines and hadoop cluster on different machines.\\nin installation guide its given H2H should be installed on all the thor nodes.\\n is it necessary that hpcc and hadoop should be installed on the same machine ?\\nI read in H2H connector article that both the clusters can be on different machines.\\n\\nCan you please elaborate more about H2H installation?\", \"post_time\": \"2012-11-16 06:58:07\" },\n\t{ \"post_id\": 2780, \"topic_id\": 605, \"forum_id\": 8, \"post_subject\": \"Re: H2H connector installation\", \"username\": \"rodrigo.pastrana@lexisnexis.com\", \"post_text\": \"Hi, the H2H plugin is dependent on the libhdfs library provided by Hadoop, and therefore requires that Hadoop be installed alongside H2H on all nodes.\\n\\nThe key concept is that H2H doesn't have to be installed on the same machines as the target Hadoop system. Let us know if you have any questions or concerns. -Rodrigo\", \"post_time\": \"2012-11-15 14:18:29\" },\n\t{ \"post_id\": 2778, \"topic_id\": 605, \"forum_id\": 8, \"post_subject\": \"H2H connector installation\", \"username\": \"chhaya\", \"post_text\": \"hi,\\n\\nI tried to install H2H connector on ubuntu 12.04 LTS but it says hadoop package is not there.\\n\\nI read to install H2H you need not to have hadoop installe don same machine.What can be the problem?\\n\\nThanks\", \"post_time\": \"2012-11-15 11:51:27\" },\n\t{ \"post_id\": 2787, \"topic_id\": 606, \"forum_id\": 8, \"post_subject\": \"Re: Error with DISTRIBUTION command\", \"username\": \"rtaylor\", \"post_text\": \"JM,secondly, to fix documentation : it should be specified that DISTRIBUTION can be turned into a definition, as it is for OUTPUT, by using a variable, like this :
Yes, just as every action in ECL can be (and we NEVER refer to them as "variables" because, as definitions, they never "vary" ).\\n\\nThanks,\\n\\nRichard\", \"post_time\": \"2012-11-15 18:03:57\" },\n\t{ \"post_id\": 2786, \"topic_id\": 606, \"forum_id\": 8, \"post_subject\": \"Re: Error with DISTRIBUTION command\", \"username\": \"ideal\", \"post_text\": \"Hello Richard,\\n\\nIt works perfectly ! Thanks. \\n\\nI will add a comment to the DISTRIBUTION documentation entry for two reasons : \\n
\\n[attr := ] DISTRIBUTION(recordset [, fields ] [, NAMED( name ) ] )
\\n\\nThanks,\\nJM.\", \"post_time\": \"2012-11-15 17:33:04\" },\n\t{ \"post_id\": 2784, \"topic_id\": 606, \"forum_id\": 8, \"post_subject\": \"Re: Error with DISTRIBUTION command\", \"username\": \"rtaylor\", \"post_text\": \"JM,\\n\\nThe error message is telling you that you must use the WHEN function to allow your FUNCTION to execute the DISTRIBUTION action, like this:\\nrec := RECORD\\nINTEGER i;\\nINTEGER v;\\nEND;\\n\\nds := DATASET([{1,11},{2,22},{1,33},{2,44},{3,55}],{INTEGER i;INTEGER v});\\n\\nDATASET({STRING line}) fonc2(DATASET(rec) ds) := FUNCTION\\n dist := DISTRIBUTION(ds,i,v,NAMED('stats'));\\n xmlDis := DATASET(ROW(TRANSFORM({STRING line},SELF.line := WORKUNIT('stats', STRING))));\\n RETURN WHEN(xmlDis,dist,BEFORE);\\nEND;\\n\\nOUTPUT(fonc2(ds));
HTH,\\n\\nRichard\", \"post_time\": \"2012-11-15 16:18:05\" },\n\t{ \"post_id\": 2779, \"topic_id\": 606, \"forum_id\": 8, \"post_subject\": \"Error with DISTRIBUTION command\", \"username\": \"ideal\", \"post_text\": \"Hello,\\n\\nI get an error Error: WHEN must be used to associated an action with a definition (32, 2), 2325,
when using DISTRIBUTION action in a function. \\nin the following code : \\n\\nrec := RECORD\\nINTEGER i;\\nINTEGER v;\\nEND;\\n\\nds := DATASET([{1,11},{2,22},{1,33},{2,44},{3,55}],{INTEGER i;INTEGER v});\\n\\nDATASET({STRING line}) fonc2(DATASET(rec) ds) := FUNCTION\\n\\tDISTRIBUTION(ds,i,v,NAMED('stats'));\\n\\txmlDis := DATASET(ROW(TRANSFORM({STRING line},SELF.line := WORKUNIT('stats', STRING))));\\n\\tRETURN xmlDis;\\nEND;\\n\\nOUTPUT(fonc2(ds));
\\n\\nI just want to get stats on dataset to distribute it properly over the cluster.\\nHow can I do ?\\n\\nThanks,\\nJM.\", \"post_time\": \"2012-11-15 13:03:47\" },\n\t{ \"post_id\": 2861, \"topic_id\": 610, \"forum_id\": 8, \"post_subject\": \"Re: Getting errors when performing JOIN or build index\", \"username\": \"bforeman\", \"post_text\": \"I received a message from the development team:\\n\\nIt should not be caused by lack of memory.\\n\\nI'm probably going to need (at a minimum) a thor log from a slave that reported the error.
\\n\\nIf you could post (attach) your slave log, that would be great.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-11-26 13:56:08\" },\n\t{ \"post_id\": 2859, \"topic_id\": 610, \"forum_id\": 8, \"post_subject\": \"Re: Getting errors when performing JOIN or build index\", \"username\": \"bforeman\", \"post_text\": \"Checking with development regarding the error, it may be as you guess simply that you are running short on memory. Can you check the ECL Watch and look at your Topology > Target Clusters information?\\n\\nWhat does your JOIN and BUILD look like?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-11-26 13:32:40\" },\n\t{ \"post_id\": 2801, \"topic_id\": 610, \"forum_id\": 8, \"post_subject\": \"Getting errors when performing JOIN or build index\", \"username\": \"arunarav\", \"post_text\": \"We are getting the following error while performing JOIN or build index. This happens intermittently. Is this a symptom of insufficient memory (surmising since this happens on memory intensive operations)? Any pointers appreciated.\\n\\nEnvironment: 10 node THOR on AWS \\nData size- 85 million records.\\n\\nError: System error: 1303: Graph[1], join[4]: SLAVE 10.249.1.5:20100: Attempt to free invalid pointer (0, 0), 1303,
\", \"post_time\": \"2012-11-16 15:14:34\" },\n\t{ \"post_id\": 2805, \"topic_id\": 611, \"forum_id\": 8, \"post_subject\": \"Re: workunit blocked on HPCC VM\", \"username\": \"larry_tempe\", \"post_text\": \"[quote="rtaylor"]On your VM there's a "power down" option -- try doing that and then re-starting the HPCC VM. That should get you a completely new starting point.\\n\\nThanks for help!\\nYes, in fact I did it and restarted my VM and my host Windows 7 system several times and this problem is still there.\", \"post_time\": \"2012-11-17 18:37:25\" },\n\t{ \"post_id\": 2804, \"topic_id\": 611, \"forum_id\": 8, \"post_subject\": \"Re: workunit blocked on HPCC VM\", \"username\": \"rtaylor\", \"post_text\": \"On your VM there's a "power down" option -- try doing that and then re-starting the HPCC VM. That should get you a completely new starting point.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-11-17 14:44:18\" },\n\t{ \"post_id\": 2803, \"topic_id\": 611, \"forum_id\": 8, \"post_subject\": \"workunit blocked on HPCC VM\", \"username\": \"larry_tempe\", \"post_text\": \"I installed the HPCC VM on my local machine and tried HPCC.\\n\\nThe VM works perfectly in the first few days. But today when I submitted new jobs, all workunits have been blocked. I have restarted my pc and my VM several times and my workunits keep being blocked. How can I resolve this?\\n\\nThanks!\", \"post_time\": \"2012-11-17 02:31:15\" },\n\t{ \"post_id\": 2898, \"topic_id\": 613, \"forum_id\": 8, \"post_subject\": \"Re: Could anyone help explain how does PIPE work?\", \"username\": \"dsun\", \"post_text\": \"Bob,\\n\\nThanks a lot, it's really the path issue.\\nIn the pipe command, we'd better use the absolute path for the command/script file.\\n\\nRegards,\\nDongliang\", \"post_time\": \"2012-11-28 03:12:39\" },\n\t{ \"post_id\": 2891, \"topic_id\": 613, \"forum_id\": 8, \"post_subject\": \"Re: Could anyone help explain how does PIPE work?\", \"username\": \"bforeman\", \"post_text\": \"Yes, in the LRM, this code works fine:\\n\\nnamesRecord := RECORD\\n STRING10 Firstname{xpath('/Name/FName')};\\n STRING10 Lastname{xpath('/Name/LName')};\\nEND;\\n\\np := PIPE('echo <Name><FName>George</FName><LName>Jetson</LName></Name>', namesRecord, XML); \\nOUTPUT(p);
\\n\\nSo looking at your example I would suspect it might be a path issue.\\n\\nThe easiest way to debug that kind of thing, is probably to fully qualify path to executable and make it a script to trace stuff to a separate log (of their own).\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-11-27 15:29:50\" },\n\t{ \"post_id\": 2889, \"topic_id\": 613, \"forum_id\": 8, \"post_subject\": \"Re: Could anyone help explain how does PIPE work?\", \"username\": \"dsun\", \"post_text\": \"Bob,\\n\\nI just want to be clear how the 3 types PIPE works, I have tried to make the examples in the Reference work, but somehow failed. Maybe not use the form format correctly.\\nAnyway, I put the sample code here, and need your help to point out where is the problem and the correct ways to use PIPE.\\n\\n\\nrec := RECORD\\nSTRING str1;\\nEND;\\n\\np1 := PIPE('cat tmp.csv', rec, csv));\\np1;\\n
\\n\\nThe easiest pipe, it works if we just output to the IDE, but error if we use the following code:\\n\\nOUTPUT(p1,,'~testpipe::pipe1.csv',CSV,overwrite);\\n
\\n\\nError message\\n\\nError: System error: 10096: Graph[1], piperead[2]: SLAVE 192.168.5.139:20100: Process returned 1: - PIPE(cat tmp.csv) (0, 0), 10096, \\n
\\n\\nThanks a lot,\\nDongliang\", \"post_time\": \"2012-11-27 10:51:10\" },\n\t{ \"post_id\": 2863, \"topic_id\": 613, \"forum_id\": 8, \"post_subject\": \"Re: Could anyone help explain how does PIPE work?\", \"username\": \"bforeman\", \"post_text\": \"Hi Dongliang,\\n\\nPIPE is used to launch external command programs. I'm sure you have read the Language Reference Manual on PIPE, and indeed there is a fairly good example on the Form 2 (input/output) usage.\\n\\nBut as one of my colleagues always says, what are you trying to do? \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-11-26 14:02:44\" },\n\t{ \"post_id\": 2808, \"topic_id\": 613, \"forum_id\": 8, \"post_subject\": \"Could anyone help explain how does PIPE work?\", \"username\": \"dsun\", \"post_text\": \"Hi,\\n\\nI'm now trying to understand how PIPE works in ECL, I know that there are 3 types PIPE (input, through, output), the follow sample code works:\\n
\\nrec1 := RECORD\\nSTRING str1;\\nSTRING str2;\\nSTRING str3;\\nEND;\\n\\npipe1 := PIPE('python test.py', rec1, csv(separator(' ')));\\n
\\n\\nThen how can I use the pipe1, it seems I only can do 'output(pipe1)'?\\nAnd also how to write the input/output pipe? \\n\\nThanks,\\nDongliang\", \"post_time\": \"2012-11-19 11:42:25\" },\n\t{ \"post_id\": 2818, \"topic_id\": 614, \"forum_id\": 8, \"post_subject\": \"Re: Non local PROJECT on THOR\", \"username\": \"oleg\", \"post_text\": \"I.e. PROJECT(recordset,record) and PROJECT(recordset,record, LOCAL) are exactly the same?\", \"post_time\": \"2012-11-19 17:01:57\" },\n\t{ \"post_id\": 2816, \"topic_id\": 614, \"forum_id\": 8, \"post_subject\": \"Re: Non local PROJECT on THOR\", \"username\": \"ghalliday\", \"post_text\": \"Yes. The simple versions of PROJECT and TABLE preserve the sort order and distribution.\", \"post_time\": \"2012-11-19 16:51:01\" },\n\t{ \"post_id\": 2813, \"topic_id\": 614, \"forum_id\": 8, \"post_subject\": \"Non local PROJECT on THOR\", \"username\": \"oleg\", \"post_text\": \"The question is if I'm NOT using COUNTER in transform, what difference the LOCAL option should make?\\nI.e. is PROJECT will be (or can be) implicitly LOCAL even I didn't specify it, since there is no connection between nodes anyway?\\nAnd in any case, will it preserve DISTRIBUTION / SORT order?\\n\\nAnd same question for TABLE as well (for the plain TABLE with no aggregation etc.).\", \"post_time\": \"2012-11-19 16:23:29\" },\n\t{ \"post_id\": 2893, \"topic_id\": 617, \"forum_id\": 8, \"post_subject\": \"Re: Question about 'TYPE Structure Special Functions'\", \"username\": \"rtaylor\", \"post_text\": \"Dongliang,\\n\\nThat makes perfect sense to me. \\n\\nCSV is a text-based format, so there's no need to format the data in a particular binary format, which is essentially what LOAD and STORE are for.\\n\\n The "classic" example of an alien data type is the Pascal-style string (variable-length string field with a leading length byte indicating the number of characters in the data). This requires LOAD to read the string data from its binary format, and to write back to a binary file (Thor/Flat file) STORE is required to properly re-format the data back into a Pascal string. But to write that string data back to a CSV file, no special formatting is needed, so none would be applied. \\n\\nHowever, if you consider it a bug that STORE does not operate for CSV file output, then by all means add an issue to JIRA about it.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-11-27 19:39:16\" },\n\t{ \"post_id\": 2888, \"topic_id\": 617, \"forum_id\": 8, \"post_subject\": \"Re: Question about 'TYPE Structure Special Functions'\", \"username\": \"dsun\", \"post_text\": \"Thanks a lot, it can explain my question if we consider the LOAD/STORE are opposite.\\n\\nOne more question I mentioned in my last post:\\n\\nAlso, if we output to a CSV file instead of the Thor/Flat file, the result2 will be always the same with result1.\\n
\\n\\nThat means, if the code is below, we can see the STORE works:\\n\\nOUTPUT(ds,,'~RTTEST::TestTYPE',OVERWRITE);\\n
\\n\\nBut now, if output as a CSV file:\\n\\nOUTPUT(ds,,'~RTTEST::TestTYPE',CSV,OVERWRITE);\\n
\\n\\nThanks,\\nDongliang\", \"post_time\": \"2012-11-27 09:59:23\" },\n\t{ \"post_id\": 2843, \"topic_id\": 617, \"forum_id\": 8, \"post_subject\": \"Re: Question about 'TYPE Structure Special Functions'\", \"username\": \"rtaylor\", \"post_text\": \"Dongliang,Thanks for your detail explanation, but I still found some problems by a little change in your code (strL[1..4] --> strL[3..6]).
\\nThe purpose of TYPE is to de-format and re-format data -- that means, coded properly, LOAD and STORE must be "opposites" where LOAD de-formats the data for use in HPCC and STORE re-formats the data for storage (as demonstrated by my previous example). \\n\\nYour change makes LOAD and STORE no longer opposite functions. So the answer as to why it behaves as it does when LOAD and STORE are coded "improperly" would have to be found by examining the source code.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-11-23 15:26:17\" },\n\t{ \"post_id\": 2837, \"topic_id\": 617, \"forum_id\": 8, \"post_subject\": \"Re: Question about 'TYPE Structure Special Functions'\", \"username\": \"dsun\", \"post_text\": \"Richard,\\n\\nThanks for your detail explanation, but I still found some problems by a little change in your code (strL[1..4] --> strL[3..6]).\\n\\n YYString := TYPE\\n EXPORT STRING4 LOAD(STRING6 strL) := strL[3..6];\\n EXPORT STRING6 STORE(STRING4 strS) := strS + 'YY';\\n END;\\n r := RECORD\\n YYString F1;\\n END; \\n ds := dataset([{'abcdYY'},{'efghYY'}],r);\\n\\n ds;\\n OUTPUT(ds,,'~RTTEST::TestTYPE',OVERWRITE);\\n
\\n\\nresult1 is 'cdYY', for result2 'cdYY' in IDE, and I checked the Thor/Flat file in the disk, it's 'abcdYY'.\\n\\nresult1 looks OK, since only 'LOAD' is invoked, but result2 not expected.\\n\\nIf the flow for result2 in IDE is LOAD-->STORE-->LOAD, the result should be 'YYYY', but now it's 'cdYY', that means when it's showed in the IDE, only LOAD is invoked.\\nIf the flow for result2 in FLAT File is LOAD-->STORE, the result should be 'cdYYYY', but now it's 'abcdYY', that means when it's output to Flat File, only STORE invoked.\\n\\nAlso, if we output to a CSV file instead of the Thor/Flat file, the result2 will be always the same with result1.\\n\\nCould you have a look at this?\\n\\nThanks a lot!\\nDongliang\", \"post_time\": \"2012-11-22 04:26:15\" },\n\t{ \"post_id\": 2834, \"topic_id\": 617, \"forum_id\": 8, \"post_subject\": \"Re: Question about 'TYPE Structure Special Functions'\", \"username\": \"rtaylor\", \"post_text\": \"Dongliang,\\n\\nThe purpose of the TYPE structure is to define an "alien" data type (not one of the built-in datatypes) so that you can read the data from the file and re-write data to a file with the proper formatting for that "alien" data type. Therefore, there are a couple of important points to understand:
\\nThis code shows a complete functional example:YYString := TYPE\\n EXPORT STRING4 LOAD(STRING6 strL) := strL[1..4];\\n EXPORT STRING6 STORE(STRING4 strS) := strS + 'YY';\\nEND;\\nr := RECORD\\n YYString F1;\\nEND;\\t\\nds := dataset([{'abcdYY'},{'efghYY'}],r);\\n\\nds;\\nOUTPUT(ds,,'~RTTEST::TestTYPE',OVERWRITE);
When you run this code you will note that result 1 contains just 'abcd' and 'efgh' -- showing that LOAD has stripped the 'YY' formatting from the data to work with.\\n\\nYou will also note that result 2 shows 'abcd' and 'efgh' as the data values but that the displayed __fileposition__ values are 0 and 6. This demonstrates that STORE has re-appended the 'YY' formatting to the data written to disk. The reason the 'YY' is not displayed is because the LOAD function is called again to format the display values sent back to the IDE (the data is being read from disk using your LOAD function). If you look at the raw data in the disk file (any text editor will work to do that) you will see those 'YY' formatting characters in the file.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-11-21 14:31:32\" },\n\t{ \"post_id\": 2830, \"topic_id\": 617, \"forum_id\": 8, \"post_subject\": \"Re: Question about 'TYPE Structure Special Functions'\", \"username\": \"dsun\", \"post_text\": \"So I feel a little confused on the logic, i.e. we defined a new Type below:\\n\\nTestString4 := TYPE\\n EXPORT STRING LOAD(STRING4 strL) := strL[2..3]+'YY';\\n EXPORT STRING4 STORE(STRING strS) := strS[3..4]+'XX';\\nEND;\\n
\\n\\nWe give a input that 'abcd', the result will be 'dXYY', that means the flow is 'abcd' --> STORE --> LOAD.\\n\\nAlso, it seems that the new Type keeps both logical type and physical type, when we assign it to another variable(another type, i.g. STRING4), the value will always be the physical one, that means the intermediate value(after LOAD or STORE) can not be used.\\n\\nCould you help investigate on this?\\n\\nThanks a lot!\", \"post_time\": \"2012-11-21 04:30:09\" },\n\t{ \"post_id\": 2827, \"topic_id\": 617, \"forum_id\": 8, \"post_subject\": \"Re: Question about 'TYPE Structure Special Functions'\", \"username\": \"rtaylor\", \"post_text\": \"Dongliang,\\n\\nFirst, LOAD reads the data from disk to memory. Then you work with it until you're ready to write to disk, then STORE writes the data from memory to disk.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-11-20 17:36:20\" },\n\t{ \"post_id\": 2822, \"topic_id\": 617, \"forum_id\": 8, \"post_subject\": \"Question about 'TYPE Structure Special Functions'\", \"username\": \"dsun\", \"post_text\": \"Hi,\\n\\nFrom the ECL Language Reference, there are some definitions:\\n\\nLOAD defines the callback function to be applied to the bytes of the record to create the data value to be used in the computation. This function defines how the system reads the data from disk.\\n\\nSTORE defines the callback function to be applied to the computed value to store it within the record. This function defines how the system writes the data to disk.\\n
\\n\\n1. So it invokes 'STORE' first, then 'LOAD'?\\n3. How to identify which operations should be in 'STORE' and others in 'LOAD'?\\n3. When STORE and LOAD actually invoked?\\n\\nThanks,\\nDongliang\", \"post_time\": \"2012-11-20 08:52:10\" },\n\t{ \"post_id\": 2868, \"topic_id\": 618, \"forum_id\": 8, \"post_subject\": \"Re: DISTRIBUTE/SORT/GROUP - Keep track and carrying over\", \"username\": \"ghalliday\", \"post_text\": \"As far as possible the code generator tries to keep track of grouping/sort order/distribution. If you distribute a dataset and it is already distributed then the code generator should remove that second distribute. (If it does thor should still be fairly efficient.)\\n\\nThere is a #option you might find very useful.\\n\\nTry adding \\n\\n#option ('showMetaInGraph', true);\\n\\nto your query.\\n\\nWhat it will do is add information about what the code generator thinks is the current distribution/sort order/grouping.\\n\\nI could easily default it to on - would that be generally useful or would it add too much information to the graph.\", \"post_time\": \"2012-11-26 15:13:44\" },\n\t{ \"post_id\": 2865, \"topic_id\": 618, \"forum_id\": 8, \"post_subject\": \"Re: DISTRIBUTE/SORT/GROUP - Keep track and carrying over\", \"username\": \"bforeman\", \"post_text\": \"Hi Oleg,\\n\\nDoes system keep track if dataset already been DISTRIBUTEd/SORTed/GROUPed so the redundant action at least can be ignored by the system?
\\n\\nI'm checking with the development team, but my educated guess would be YES to this question.\\n\\nYou can also view the underlying C++ at any time, using the following switch in the Debug prompt:\\n\\nSaveCPPTempFiles=1\\nThis might help you to verify that indeed redundancies are handled appropriately.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-11-26 14:20:52\" },\n\t{ \"post_id\": 2824, \"topic_id\": 618, \"forum_id\": 8, \"post_subject\": \"DISTRIBUTE/SORT/GROUP - Keep track and carrying over\", \"username\": \"oleg\", \"post_text\": \"It is not always obvious (and sometimes much less then obvious) when result dataset carrying over distribution/grouping/sort order after some transformation.\\n\\nThe easiest way is to do add 'DISTRIBUTE' each time we need to make sure we are dealing with the data the way we need it, and so on... \\n\\nDoes system keep track if dataset already been DISTRIBUTEd/SORTed/GROUPed so the redundant action at least can be ignored by the system?\\n\\nIf not, how bad is this redundancy? For example, in case of DISTRIBUTE it shouldn't be that bad since if dataset already distributed, nothing need to be moved.\\n\\nAlso, the simple consolidated table in the Language Reference which explains the state of the dataset after the certain action will be helpful as well.\", \"post_time\": \"2012-11-20 13:32:37\" },\n\t{ \"post_id\": 2847, \"topic_id\": 623, \"forum_id\": 8, \"post_subject\": \"Re: Issue with parameters to NOTIFY\", \"username\": \"Allan\", \"post_text\": \"Thanks Richard,\\n\\nWill do\", \"post_time\": \"2012-11-24 10:35:18\" },\n\t{ \"post_id\": 2844, \"topic_id\": 623, \"forum_id\": 8, \"post_subject\": \"Re: Issue with parameters to NOTIFY\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nEither the code or the doc is wrong -- probably the doc, either because I was told incorrect information to begin with years ago when NOTIFY was added to the language (doubtful), or something changed along the way that I was not informed of at the time (the more likely culprit).\\n\\nMake a bug report in JIRA and it'll be addressed either as a code or doc issue, whichever it actually is.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-11-23 15:38:15\" },\n\t{ \"post_id\": 2839, \"topic_id\": 623, \"forum_id\": 8, \"post_subject\": \"Issue with parameters to NOTIFY\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nThe ECL reference guide (Feb 2012) has the second parameter to NOTIFY as optional.\\n\\nHowever I've found it is mandatory.\\nThe following code does not syntax check:\\n\\nNOTIFY ('Allan');\\n
\\nThis code passes syntax check:\\n\\nNOTIFY ('Allan','1');\\n
\\n\\nAny comments?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-11-22 14:34:05\" },\n\t{ \"post_id\": 2926, \"topic_id\": 624, \"forum_id\": 8, \"post_subject\": \"Re: ECLPLUS - referencing modules\", \"username\": \"bforeman\", \"post_text\": \"Yes, particularly there are switches on the ECL command line to specify paths:\\n\\n-Ipath Add path to locations to search for ecl imports\\n-Lpath Add path to locations to search for system libraries\\n \\nThese could be set in an INI file\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-12-03 13:22:23\" },\n\t{ \"post_id\": 2924, \"topic_id\": 624, \"forum_id\": 8, \"post_subject\": \"Re: ECLPLUS - referencing modules\", \"username\": \"jeeves\", \"post_text\": \"Thanks. I will try this and get back.\", \"post_time\": \"2012-12-03 09:33:28\" },\n\t{ \"post_id\": 2874, \"topic_id\": 624, \"forum_id\": 8, \"post_subject\": \"Re: ECLPLUS - referencing modules\", \"username\": \"bforeman\", \"post_text\": \"Use the ECL command line instead of ECLPLUS.\\n\\nSee: http://hpccsystems.com/download/docs/ecl-ide-client-tools\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-11-26 16:38:36\" },\n\t{ \"post_id\": 2840, \"topic_id\": 624, \"forum_id\": 8, \"post_subject\": \"ECLPLUS - referencing modules\", \"username\": \"jeeves\", \"post_text\": \"I am running a ECL script like this.\\n\\neclplus server=10.244.155.144 cluster=thor username=hpccdemo ecl=@script.ecl\\n\\nthe script.ecl file contains import statements like this\\n\\n\\nIMPORT poc.bc;\\nIMPORT poc.cons;
\\n\\nand the command fails with the error "Import names unknown module poc".\\n\\nHow can I make these modules available? Can i somehow upload/store them in HPCC?\", \"post_time\": \"2012-11-23 09:23:44\" },\n\t{ \"post_id\": 2851, \"topic_id\": 628, \"forum_id\": 8, \"post_subject\": \"Re: Operations in Thor\", \"username\": \"rtaylor\", \"post_text\": \"ksviswa,\\n\\nThe only thing that immediately comes to mind is that Roxie does not explicitly write disk files, therefore PERSIST and OUTPUT(ds,,'diskfilename') are not supported. Other than that, Roxie can do pretty much everything in ECL (unless I've forgotten something).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-11-25 13:16:01\" },\n\t{ \"post_id\": 2850, \"topic_id\": 628, \"forum_id\": 8, \"post_subject\": \"Operations in Thor\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nCan anybody share a consolidated list of operations that are supported in THOR but not in ROXIE..?\\n\\nKindly share any pointers regarding the same.\\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2012-11-25 09:05:16\" },\n\t{ \"post_id\": 2854, \"topic_id\": 629, \"forum_id\": 8, \"post_subject\": \"Re: Store random Values\", \"username\": \"ksviswa\", \"post_text\": \"Thanks a lot..\", \"post_time\": \"2012-11-25 20:27:12\" },\n\t{ \"post_id\": 2853, \"topic_id\": 629, \"forum_id\": 8, \"post_subject\": \"Re: Store random Values\", \"username\": \"rtaylor\", \"post_text\": \"Like this:
randVal := RANDOM() % 5 : GLOBAL; // If it generates 4.\\n\\noutVal := randVal + 1 // Result to be 5
It will retain its value through the end of the job.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-11-25 17:57:29\" },\n\t{ \"post_id\": 2852, \"topic_id\": 629, \"forum_id\": 8, \"post_subject\": \"Store random Values\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nIs there any way to retain a random generated value to be a constant and use the same value to be referenced in the future..?\\n\\nEx :\\n\\n\\nrandVal := RANDOM() % 5 // If it generates 4.\\n\\noutVal := randVal + 1 // Result to be 5 \\n
\\n\\nBut the outVal is not consistently 5, the random is called again and we get a different value as the output.\\n\\nAny way to retain the random generated value..?\\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2012-11-25 15:24:08\" },\n\t{ \"post_id\": 2969, \"topic_id\": 633, \"forum_id\": 8, \"post_subject\": \"Re: ECL filtering syntax parsing trouble\", \"username\": \"bforeman\", \"post_text\": \"I'm sure that the development team would love to have a look at your slave logs, if you can attach them here. This will help them track down the cause of the error.\\n\\nThe slave logs can be found in the Workunit Details in the Helpers section.\\n\\nThanks,\\n\\nBob\", \"post_time\": \"2012-12-06 12:39:54\" },\n\t{ \"post_id\": 2968, \"topic_id\": 633, \"forum_id\": 8, \"post_subject\": \"Re: ECL filtering syntax parsing trouble\", \"username\": \"ideal\", \"post_text\": \"I found a way with NORMALIZE and DENORMALIZE to avoid previous error. It was due to the fact that a so-called child dataset was embedded in the first one.\\n\\nNow, I have got another obscure message (not helpful at all but I understand it must be a part of the game because there is no documentation on error messages) :\\nError: System error: 3000: Graph[52], join[135]: SLAVE 10.36.106.98:20100: assert(onStartCalled) failed - file: /var/jenkins/workspace/CE-Candidate-3.8.0/CE/Ubuntu-12.04-amd64/HPCC-Platform/thorlcr/graph/thgraph.cpp, line 525 (0, 0), 3000,
\\nIt is correlated to the fact that a JOIN function is encapsulated in another JOIN function. I don't understand now but maybe, after spending some hours trying to understand, I will find a solution.\", \"post_time\": \"2012-12-06 10:40:32\" },\n\t{ \"post_id\": 2958, \"topic_id\": 633, \"forum_id\": 8, \"post_subject\": \"Re: ECL filtering syntax parsing trouble\", \"username\": \"ideal\", \"post_text\": \"Now, if I am using RIGHT OUTER instead of FULL OUTER and after the operator + with the first dataset, then performances are collapsing with a factor 20.\\n\\nGreat !\\n\\nJM.\", \"post_time\": \"2012-12-05 11:26:56\" },\n\t{ \"post_id\": 2952, \"topic_id\": 633, \"forum_id\": 8, \"post_subject\": \"Re: ECL filtering syntax parsing trouble\", \"username\": \"ideal\", \"post_text\": \"In the same way, you have \\n\\nJOIN( tupleDejaFus.dv\\n , tupleRP.dv\\n , LEFT.symbol = RIGHT.symbol\\n // AND NOT ( LEFT.type=TypeVariable.TOUVAR OR RIGHT.type=TypeVariable.TOUVAR )\\n , RIGHT ONLY\\n // , LOCAL\\n ));
\\n\\nis ok\\n\\nand\\n\\nJOIN( tupleDejaFus.dv\\n , tupleRP.dv\\n , LEFT.symbol = RIGHT.symbol\\n // AND NOT ( LEFT.type=TypeVariable.TOUVAR OR RIGHT.type=TypeVariable.TOUVAR )\\n , FULL ONLY\\n // , LOCAL\\n ));
\\nis not and gves this error :\\n\\nError: INTERNAL: Dataset is not active: '_EMPTY_(noeud)' (50, 7), 4153, C:\\\\Users\\\\Jean-Michel\\\\jeanmichel\\\\LN\\\\Interpreteur\\\\HPCC\\\\benchmark_tests\\\\divers\\\\perf\\\\test_distribute1\\\\interp_fusionner.ecl
\\n\\nActually, I don't see how it can work with some jointype and can't with some other, without being buggy. Here, it does not work with FULL OUTER or FULL ONLY jointype when I really need it.\\nI know there is not enough data to investigate but I don't have time and again I must find a workaround. \\n\\nJM.\", \"post_time\": \"2012-12-05 00:00:58\" },\n\t{ \"post_id\": 2883, \"topic_id\": 633, \"forum_id\": 8, \"post_subject\": \"Re: ECL filtering syntax parsing trouble\", \"username\": \"bforeman\", \"post_text\": \"Without seeing the actual code and results involved, I'm making an educated guess here, but perhaps the difference relates to scoping.\\n\\nWhen C is used inline with the COUNT, could it have a different value than when it is not inline? Enclosing scope might be different from global scope.\\n\\nWould like to see more code if possible as to where you are actually using it. \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-11-26 19:32:41\" },\n\t{ \"post_id\": 2858, \"topic_id\": 633, \"forum_id\": 8, \"post_subject\": \"ECL filtering syntax parsing trouble\", \"username\": \"ideal\", \"post_text\": \"Hello,\\n\\nIn my code, in GRAPH context, there is something like :\\n\\nRETURN tuplesFusPlusPrec(COUNT(regleUnif.tete)<=(C-1))
\\n\\nIt does not return the same as : \\n\\nvar:=C-1\\nRETURN tuplesFusPlusPrec(COUNT(regleUnif.tete)<=var)
\\n\\nC is a parameter of the function called by GRAPH and initialized with COUNTER.\\n\\nJM.\", \"post_time\": \"2012-11-26 11:45:23\" },\n\t{ \"post_id\": 2885, \"topic_id\": 636, \"forum_id\": 8, \"post_subject\": \"Re: Nested Project\", \"username\": \"bforeman\", \"post_text\": \"This might be a little cleaner and more accurate:\\n\\ntestIp := RECORD\\nstring ip;\\nEND;\\n\\ntest1Ds := DATASET([{'138.240.235.120'},{'205.142.197.100'},{'103.223.233.124'}],testIp);\\n\\ncntryRec := RECORD\\nstring startIp;\\nstring EndIp;\\nstring country;\\nEND;\\n\\ncntryData := DATASET([{'138.240.0.0','138.240.255.255','United States'},\\n {'103.223.0.0','103.22.255.255','India'},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t{'205.142.0.0','205.142.255.255','United Kingdom'}\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t ],cntryRec);\\n\\ncntry := RECORD\\nstring countryName;\\nEND;\\n\\nresultRec := RECORD\\nSTRING ipVal;\\nstring cntryVal1;\\nend;\\n\\nresultRec xfrm(test1Ds L, cntryData R) := TRANSFORM\\n self.ipVal := L.Ip;\\n self.cntryVal1 := R.country\\n END;\\n\\njoinout := JOIN(test1Ds,cntryData,\\n LEFT.IP BETWEEN RIGHT.startIP AND RIGHT.EndIp,\\n\\t\\t\\t\\t\\t\\t\\t\\txfrm(LEFT,RIGHT),LEFT OUTER,ALL);\\n\\njoinout;\\n\\nresultRec addnomatch(joinout L) := TRANSFORM\\n SELF.cntryVal1 := IF(L.cntryVal1 = '','No Match',L.cntryVal1);\\n SELF := L;\\nEND;\\n\\ncleanedRecs := PROJECT(joinout, addnomatch(LEFT));\\n\\ncleanedrecs;
\\n\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-11-26 20:12:34\" },\n\t{ \"post_id\": 2878, \"topic_id\": 636, \"forum_id\": 8, \"post_subject\": \"Nested Project\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nThere is a scenario where i have a list of IP's and another dataset which has the range of the Ip and country information.\\n\\nI am trying to match the list of IP's to a respective country.\\n\\nSample Code :\\n\\n\\ntestIp := RECORD\\n string ip;\\nEND;\\n\\ntest1Ds := DATASET([{'138.240.235.120'},{'205.142.197.100'},{'103.223.233.124'}],testIp);\\n \\ncntryRec := RECORD\\n string startIp;\\n string EndIp;\\n string country;\\nEND;\\n \\ncntryData := DATASET([{'138.240.0.0','138.240.255.255','United States'}, {'103.223.0.0','103.22.255.255','India'}, {'205.142.0.0','205.142.255.255','United Kingdom'} ],cntryRec);\\n \\ncntry := RECORD\\n string countryName;\\nEND;\\n \\nresultRec := RECORD\\n STRING ipVal;\\n string cntryVal1;\\nend;\\n\\ncntry chk2(cntryRec L,string ipV) := TRANSFORM\\n self.countryName := if(ipV between L.startIp and L.endIp,L.country,'No match');\\nEND;\\n \\nresultRec ipTrans(testIp L, integer C) := TRANSFORM\\n self.ipVal := L.Ip;\\n cntryVal := PROJECT(cntryData, chk2(LEFT,self.ipVal));\\n self.cntryVal1 :=cntryVal[C].countryName;\\n \\nEND;\\nprojOut := PROJECT(test1Ds,ipTrans(LEFT,COUNTER));\\nprojOut;\\n\\n
\\n\\nThis code will work if the list of Ip's and the range of Ip's are in the same order.\\n\\nSample Output :\\n\\n\\n138.240.235.120 United States\\n205.142.197.100 United Kingdom\\n103.223.233.124 India\\n
\\n\\nKindly help me how i can optimize the code further in such scenarios.\\n\\nThanks a lot in advance.\\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2012-11-26 17:50:39\" },\n\t{ \"post_id\": 2904, \"topic_id\": 639, \"forum_id\": 8, \"post_subject\": \"Re: extreme skew after distribute\", \"username\": \"DSC\", \"post_text\": \"Having a 1:1 correlation between those keys doesn't translate into something that distributes the same.\\n\\nAs I understand it, the distribution is based on a numeric value (the output of the HASH32() function in your example), modulo the number of thor nodes you have running. For sake of example, let's say that instead of using HASH32() you use function Foo(), which converts its input into an integer and returns only the last four bits of it. Then you have a two node cluster and the following data:\\n\\nMyRec := RECORD\\n STRING someChar;\\n INTEGER someInt;\\nEND;\\n\\nds1 := DATASET([{'A',1},{'Q',2}],MyRec);\\n\\nds2 := DISTRIBUTE(ds1,Foo(someChar));\\nds3 := DISTRIBUTE(ds1,Foo(someInt));\\n\\nAs far as Foo() in concerned, the keys would result in these returned values:\\n\\n'A' -> 65 -> 01000001 -> 0001\\n1 -> 00000001 -> 0001\\n'Q' -> 81 -> 01010001 -> 0001\\n2 -> 00000002 -> 0002\\n\\nSo, distributing the data on someChar would result in all records winding up on your first node, while distributing on someInt would result in them evenly distributed. And that's with a 1:1 mapping of your two fields values.\\n\\nOne thing you might want to try doing is double-hashing values, possibly mixing different hashes as well. Say, write a separate function that performs returns HASH32(HASH64(key1)) and use that as your key distribution. I haven't tested that, but I suspect it may result in a slightly more even distribution, or at least a more random one.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-11-29 12:50:00\" },\n\t{ \"post_id\": 2902, \"topic_id\": 639, \"forum_id\": 8, \"post_subject\": \"extreme skew after distribute\", \"username\": \"anag\", \"post_text\": \"Hi,\\nI'm doing a simple distribute of a dataset based on a key. I see that while doing the distribute, there is a severe skew (+2200%, -90%). As a result, the rest of my computation is coming to a virtual standstill. \\n\\nI use this code. \\n\\nStep1 := DISTRIBUTE( DS, HASH32(key1) );\\n\\nInitially, key1 was declared as an INTEGER, but I tried changing it to STRING, and I still get similar skews.\\n\\nI tried with a different key (key2; which is a long string), and that seems to work great - no skews. Distribution of key1 and key2 are almost identical. They are a near 1-to-1 mapping. \\n\\nDoes anybody have insights into this?\\n\\nRegards,\\nAN\", \"post_time\": \"2012-11-29 10:42:17\" },\n\t{ \"post_id\": 2951, \"topic_id\": 642, \"forum_id\": 8, \"post_subject\": \"Re: LOCAL PROJECT option explanation\", \"username\": \"ideal\", \"post_text\": \"Hello Richard,\\n\\nThank you for your detailed answer. It shows that LOCAL option is working implicitly for PROJECT's dataset argument only. You made me understand that my code example was not reflecting my issue and is even trivial. Anyway, discussion made me think about it and help me to find a good solution, thanks to Dan and you.\\n\\nJM.\", \"post_time\": \"2012-12-04 20:38:15\" },\n\t{ \"post_id\": 2936, \"topic_id\": 642, \"forum_id\": 8, \"post_subject\": \"Re: LOCAL PROJECT option explanation\", \"username\": \"DSC\", \"post_text\": \"Great explanation, Richard!\\n\\nThis kind of elucidation would be a perfect addendum to PROJECT's documentation. Or at least a note, clarifying that LOCAL applies specifically (and only) to PROJECT's dataset.\\n\\nCheers!\\n\\nDan\", \"post_time\": \"2012-12-03 21:52:09\" },\n\t{ \"post_id\": 2935, \"topic_id\": 642, \"forum_id\": 8, \"post_subject\": \"Re: LOCAL PROJECT option explanation\", \"username\": \"rtaylor\", \"post_text\": \"JM,\\n\\nI made one modification to your original code that might clear things up a bit -- I added a field to get the node# on which the PROJECT code runs:\\nIMPORT STD;\\nrec := RECORD\\n INTEGER i:=-1;\\n INTEGER v:=-1;\\n SET OF INTEGER lc:=[];\\n INTEGER n:=-1;\\nEND;\\nds1 := DATASET([{1,11,[1,2]},{2,22,[3,4]},{3,33,[5,6]},{4,44,[7,8]},{5,55,[9,10]}],rec);\\nds2 := DATASET([{1,111,[11,12]},{2,222,[13,14]},{3,333,[15,16]},{4,444,[17,18]},{5,555,[19,20]}],rec);\\nds1D := DISTRIBUTE(ds1,1);\\nds2D := DISTRIBUTE(ds2,2);\\nfus := PROJECT(ds1D, TRANSFORM(rec,SELF.v:=ds2D[COUNTER].v;SELF.n:=STD.system.Thorlib.Node();SELF:=LEFT),LOCAL);\\nOUTPUT(fus);
The output from this looks like this:1\\t111\\t'1', '2'\\t1\\n2\\t222\\t'3', '4'\\t1\\n3\\t333\\t'5', '6'\\t1\\n4\\t444\\t'7', '8'\\t1\\n5\\t555\\t'9', '10'\\t1\\n
Note that each output record is coming from node #1 (which is where you put all the ds1D records). \\n\\nNow, if your question is -- how is it getting anything from ds2D when all its records are on node #2, then the answer is simple -- the LOCAL process is pulling the data required from wherever it is because you are explicitly asking for data from a specific record in ds2D. The PROJECT is running on node #1, processing each rec on node #1, but pulling data from both Node #1 and node#2 because the LOCAL only applies to the dataset PROJECT is using (the LEFT one -- and PROJECT has no RIGHT record set). \\n\\nWhen I change the code to this to get a random distribution of both datasets:IMPORT STD;\\nrec := RECORD\\n INTEGER i:=-1;\\n INTEGER v:=-1;\\n SET OF INTEGER lc:=[];\\n INTEGER n:=-1;\\nEND;\\nds1 := DATASET([{1,11,[1,2]},{2,22,[3,4]},{3,33,[5,6]},{4,44,[7,8]},{5,55,[9,10]}],rec);\\nds2 := DATASET([{1,111,[11,12]},{2,222,[13,14]},{3,333,[15,16]},{4,444,[17,18]},{5,555,[19,20]}],rec);\\nds1D := DISTRIBUTE(ds1);\\nds2D := DISTRIBUTE(ds2);\\nfus := PROJECT(ds1D, TRANSFORM(rec,SELF.v:=ds2D[COUNTER].v;SELF.n:=STD.system.Thorlib.Node();SELF:=LEFT),LOCAL);\\nOUTPUT(fus);
I get a result that looks like this:1\\t333\\t'1', '2'\\t0\\n5\\t111\\t'9', '10'\\t0\\n2\\t333\\t'3', '4'\\t1\\n4\\t111\\t'7', '8'\\t1\\n3\\t333\\t'5', '6'\\t2
and now you see it has processed on all three of my nodes, separately.\\n\\nAnd this last version demonstrates the operation of LOCAL even better:IMPORT STD;\\nrec := RECORD\\n INTEGER i;\\n INTEGER v;\\n SET OF INTEGER lc;\\nEND;\\noutrec1 := RECORD\\n rec;\\n INTEGER n;\\nEND;\\noutrec2 := RECORD\\n rec;\\n INTEGER v2;\\n SET OF INTEGER lc2;\\n INTEGER n1;\\n INTEGER n2;\\nEND;\\nds1 := DATASET([{1,11,[1,2]},{2,22,[3,4]},{3,33,[5,6]},{4,44,[7,8]},{5,55,[9,10]}],rec);\\nds2 := DATASET([{1,111,[11,12]},{2,222,[13,14]},{3,333,[15,16]},{4,444,[17,18]},{5,555,[19,20]}],rec);\\nds1D := DISTRIBUTE(ds1);\\nds2D := DISTRIBUTE(ds2);\\n\\noutrec1 XF1(ds1 L, integer C) := TRANSFORM\\n\\tSELF.n := STD.system.Thorlib.Node()+1;\\n\\tSELF:=L;\\nEND;\\t\\n\\nfus1 := PROJECT(ds1D,XF1(LEFT,COUNTER) ,LOCAL);\\nfus2 := PROJECT(ds2D,XF1(LEFT,COUNTER) ,LOCAL);\\n\\noutrec2 XF2(outrec1 L, outrec1 R) := TRANSFORM\\n\\tSELF.n1 := L.n;\\n\\tSELF.n2 := R.n;\\n\\tSELF.v2 := R.v;\\n\\tSELF.lc2 := R.lc;\\n\\tSELF:=L;\\nEND;\\t\\n\\nfus := JOIN(fus1,fus2,LEFT.i=RIGHT.i,XF2(LEFT,RIGHT) ,LEFT OUTER,LOCAL);\\nOUTPUT(fus);
The result of this code looks like this on my cluster:3\\t33\\t'5', '6'\\t0\\t\\t1\\t0\\n5\\t55\\t'9', '10'\\t555\\t'19', '20'\\t1\\t1\\n4\\t44\\t'7', '8'\\t0\\t\\t2\\t0\\n1\\t11\\t'1', '2'\\t0\\t\\t3\\t0\\n2\\t22\\t'3', '4'\\t0\\t\\t3\\t0
JOIN has both LEFT and RIGHT record sets in its operation, so both are limited by the LOCAL. Now you can see where the LEFT record set has a match on the LOCAL RIGHT record set and where it does not.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-12-03 21:25:44\" },\n\t{ \"post_id\": 2931, \"topic_id\": 642, \"forum_id\": 8, \"post_subject\": \"Re: LOCAL PROJECT option explanation\", \"username\": \"ideal\", \"post_text\": \"Hello Dan,\\nI think. In my mind, that TRANSFORM should be executed locally and the behavior you found is a bug
\\nActually, I am not sure, regarding what I said in my previous post. I think it is an architectural choice. \\n\\nYour real world issue sounds like a perfect case for the LOOKUP version of JOIN
\\nYes, I did this with option ALL, which is inherently local and copies ds2 on each node, exactly as I whished : JOIN(ds1,ds2,true,fonc(LEFT,RIGHT),ALL);
\\nVery efficient according to my first tests. I think of generalizing this to other similar parts of my code.\\n\\nI think, despite problems I encoutered, that HPCC is efficient but the difficulty is to understand what is does actually and to find the correct ECL command to use. \\n\\nThanks,\\nJM.\", \"post_time\": \"2012-12-03 17:19:46\" },\n\t{ \"post_id\": 2928, \"topic_id\": 642, \"forum_id\": 8, \"post_subject\": \"Re: LOCAL PROJECT option explanation\", \"username\": \"DSC\", \"post_text\": \"I've had questions about LOCAL before; there's a thread somewhere in here where I questioned just how pervasive LOCAL really is. This is another example of its fuzziness, I think. In my mind, that TRANSFORM should be executed locally and the behavior you found is a bug. I'm probably wrong, though.\\n\\nYour real world issue sounds like a perfect case for the LOOKUP version of JOIN. Have you tried that? I've used it with great success in a couple of my tests. Also, that option makes JOIN inherently local, IIRC.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-12-03 13:54:18\" },\n\t{ \"post_id\": 2927, \"topic_id\": 642, \"forum_id\": 8, \"post_subject\": \"Re: LOCAL PROJECT option explanation\", \"username\": \"ideal\", \"post_text\": \"Hello Dan,\\n\\nThanks for your answer. I reach the same conclusion : apart from COUNTER parameter, I guess nothing else is concerned by LOCAL option in PROJECT function. It is reducing the scope of PROJECT's arguments (ds1D) but not improving performances because, as I said before, PROJECT is local by definition. To generalize to other ECL functions, I would say that LOCAL option concerns only ECL function's direct parameters, and first of all, dataset parameters.\\n\\nThen, your suggestion is surely the good one (a JOIN function, instead of PROJECT) if I am willing to gain some performances by distributing crossing data on a cluster. In my real world issue, the large dataset ds1D is equally distributed among all nodes and the little dataset ds2D is copied to each node. \\n\\nThanks,\\nJM.\", \"post_time\": \"2012-12-03 13:46:58\" },\n\t{ \"post_id\": 2918, \"topic_id\": 642, \"forum_id\": 8, \"post_subject\": \"Re: LOCAL PROJECT option explanation\", \"username\": \"DSC\", \"post_text\": \"I don't understand all the details of ECL and the optimizations performed, but I think the 'gotcha' here is that the TRANSFORM() is not following the LOCAL directive. Whether it should or not is something others will have to chime in on. At any rate, it looks like the TRANSFORM() is pulled out into its own scope and that scope allows for global data resolution. The TRANSFORM() is executing on the one node that contains ds1D, because that is where the data is, but because of the global scope it can lookup information on any of the nodes. That's my theory, anyway.\\n\\nHere is another version of your code that produces what you expect (nothing), for reasons that you would expect:\\n\\nrec := RECORD\\n INTEGER i:=-1;\\n INTEGER v:=-1;\\n SET OF INTEGER lc:=[];\\nEND;\\n\\nds1 := DATASET([{1,11,[1,2]},{2,22,[3,4]},{3,33,[5,6]},{4,44,[7,8]},{5,55,[9,10]}],rec);\\nds2 := DATASET([{1,111,[11,12]},{2,222,[13,14]},{3,333,[15,16]},{4,444,[17,18]},{5,555,[19,20]}],rec);\\n\\nds1D := DISTRIBUTE(ds1,1);\\nds2D := DISTRIBUTE(ds2,2);\\n\\n// fus := PROJECT(ds1D, TRANSFORM(rec,SELF.v:=ds2D[COUNTER].v;SELF:=LEFT),LOCAL);\\n\\nfus := JOIN(ds1D,ds2D,LEFT.i=RIGHT.i,TRANSFORM(rec,SELF.v:=RIGHT.v,SELF:=LEFT),LOCAL);\\n\\n// Following line replicates the PROJECT() output\\n// fus := JOIN(ds1,ds2,LEFT.i=RIGHT.i,TRANSFORM(rec,SELF.v:=RIGHT.v,SELF:=LEFT));\\n\\nOUTPUT(fus);
\\n\\nUnrelated: If you're goal was to distribute the data onto the first two nodes of your system, remember that your first node has an index of zero, not one. This code above distributes the data to the second and third nodes.\\n\\nThis probably doesn't help to answer your original question, but maybe it helps highlight the behavior.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-12-02 15:27:23\" },\n\t{ \"post_id\": 2917, \"topic_id\": 642, \"forum_id\": 8, \"post_subject\": \"LOCAL PROJECT option explanation\", \"username\": \"ideal\", \"post_text\": \"Hello,\\n\\nThe sentence about LOCAL option : operation is performed on each supercomputer node independently, without requiring interaction with all other nodes to acquire data
\\n\\nis not clear to me.\\n\\nWhen this code is running :\\n\\nrec := RECORD\\n\\tINTEGER i:=-1;\\n\\tINTEGER v:=-1;\\n\\tSET OF INTEGER lc:=[];\\nEND;\\nds1 := DATASET([{1,11,[1,2]},{2,22,[3,4]},{3,33,[5,6]},{4,44,[7,8]},{5,55,[9,10]}],rec);\\nds2 := DATASET([{1,111,[11,12]},{2,222,[13,14]},{3,333,[15,16]},{4,444,[17,18]},{5,555,[19,20]}],rec);\\nds1D := <strong>DISTRIBUTE(ds1,1)</strong>;\\nds2D := <strong>DISTRIBUTE(ds2,2)</strong>;\\nSELF:=LEFT),LOCAL);\\nfus := PROJECT(ds1D, TRANSFORM(rec,SELF.v:=ds2D[COUNTER].v;SELF:=LEFT),LOCAL);\\nOUTPUT(fus);
\\n\\n\\nI don't understand how is it possible that node1 does not require interaction with node2 to acquire data when ds1D data is on node1 and ds2D on node2 ?\\n\\nIf it concerns only ds1D, I don't understand neither, because in this case, PROJECT should execute on nodes independently with or without LOCAL option.\\n\\nThis understanding is essential to me because I though that distribution could give me some substantial performance gains by reducing network traffic between nodes. Actually, I don't understand how optimizer takes its decisions because timings results are sometimes hard to understand.\\n\\nThanks,\\nJM.\", \"post_time\": \"2012-12-01 22:53:56\" },\n\t{ \"post_id\": 2934, \"topic_id\": 643, \"forum_id\": 8, \"post_subject\": \"Re: Calculate Median\", \"username\": \"rtaylor\", \"post_text\": \"Answered here: http://hpccsystems.com/bb/viewtopic.php?f=23&t=641&sid=0704c9ef44429f391dd2ecec6e379a71\", \"post_time\": \"2012-12-03 20:42:43\" },\n\t{ \"post_id\": 2920, \"topic_id\": 643, \"forum_id\": 8, \"post_subject\": \"Calculate Median\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nHow to calculate the median for a particular column..?\\n\\nTried looking at the concept of FieldAggregates.Medians, but not clear about the usage.\\n\\nCan anybody share a example on how to use Median..?\\n\\nCan median be used for cross tab reports (to calculate median based on a grouped value)...?\\n\\nKindly help me regarding this.\\n\\nThanks a lot in advance.\\n\\nRegards,\\nksviswa\", \"post_time\": \"2012-12-03 06:35:26\" },\n\t{ \"post_id\": 2949, \"topic_id\": 644, \"forum_id\": 8, \"post_subject\": \"Re: Max size of internal, temporary results\", \"username\": \"ghalliday\", \"post_text\": \"BTW we are hoping to add some support for dictionaries in 4.0.\\n\\nWith that you will be able to say\\n\\n ds(field in myDictionary);\", \"post_time\": \"2012-12-04 15:18:25\" },\n\t{ \"post_id\": 2947, \"topic_id\": 644, \"forum_id\": 8, \"post_subject\": \"Re: Max size of internal, temporary results\", \"username\": \"ghalliday\", \"post_text\": \"Yes. In that situation I would do a LOOKUP join against the dataset. \\n\\nI think you'll find it executes *much* quicker.\", \"post_time\": \"2012-12-04 15:17:01\" },\n\t{ \"post_id\": 2945, \"topic_id\": 644, \"forum_id\": 8, \"post_subject\": \"Re: Max size of internal, temporary results\", \"username\": \"DSC\", \"post_text\": \"In this case your reason (b) is the cause, and my some-dataset is 5M STRING10 fields.\\n\\nWhat would be a well-performing workaround? JOIN, perhaps?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-12-04 14:24:07\" },\n\t{ \"post_id\": 2943, \"topic_id\": 644, \"forum_id\": 8, \"post_subject\": \"Re: Max size of internal, temporary results\", \"username\": \"ghalliday\", \"post_text\": \"The normal causes for causing some-dataset to be written to a work unit variable are \\n\\na) using a some-dataset in a child query\\n\\nb) using dataset(field in SET(some-dataset, value))\\n\\nBoth of those typically require the entire dataset to be in memory - which is why they are written to a workunit temporary rather than a disk file.\\n\\nIf you want me to examine the particular cause for your query I would probably need an archive of the query.\\n\\nGavin\", \"post_time\": \"2012-12-04 13:52:20\" },\n\t{ \"post_id\": 2941, \"topic_id\": 644, \"forum_id\": 8, \"post_subject\": \"Re: Max size of internal, temporary results\", \"username\": \"bforeman\", \"post_text\": \"Hi Dan,\\n\\nI believe that if the size limit is exceeded, it will automatically spill to disk. The point is that the workunit should not fail, is that what you are seeing?\\n\\nRegards, \\n\\nBob\", \"post_time\": \"2012-12-04 13:01:07\" },\n\t{ \"post_id\": 2929, \"topic_id\": 644, \"forum_id\": 8, \"post_subject\": \"Max size of internal, temporary results\", \"username\": \"DSC\", \"post_text\": \"I have some ECL code that, after compiling, results in a dataset being read and distributed, then split so it can be processed by two different execution graphs. The data on one side of the split gets stuffed into a temporary named output('a').\\n\\nIf the dataset is too large, that temporary output exceeds the default maximum size for workunit results (10MB). I can override that maximum size with an #OPTION, but should I have to? The split/temporary storage is not something I have directly, knowingly requested. Shouldn't this be handled invisible, or perhaps differently, by the compiler?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-12-03 14:02:31\" },\n\t{ \"post_id\": 2953, \"topic_id\": 647, \"forum_id\": 8, \"post_subject\": \"-\", \"username\": \"naier1\", \"post_text\": \"--\", \"post_time\": \"2012-12-05 05:24:10\" },\n\t{ \"post_id\": 2977, \"topic_id\": 650, \"forum_id\": 8, \"post_subject\": \"Re: #EXPAND Error -- Constant Expression Expected\", \"username\": \"rtaylor\", \"post_text\": \"Lu,And you are right that we have to use the field names as the input since we would like to make the function more flexible so that it can be further connect to user interface and let the clinet to make the decision on which fields to group.
OK, since the object is to allow end-users to select which fields they want to see these calculations on, then let me suggest a more HPCC-style alternative.\\n\\nHPCC has three major components: Thor, Roxie, and ECL:\\n\\n
\\nSo the way HPCC is designed to work is to use Thor to do all your ETL and data preparation work, and Roxie to deliver the results to end-users. End-users don't send queries to Thor, only ECL developers do that. End-users only interface with pre-defined queries on Roxie (usually through some GUI that you've built to allow the interaction).\\n\\nTherefore, given that you want your end-users to select which fields they are interested in calculating on, I would suggest that you simply use Thor to pre-calculate ALL the values that end-users might want to see from ALL your data. Then you just structure your Roxie queries to deliver only the set of fields that the end-user specifies for each given query. This way you're working with the system's design and not against it.\\n\\nBTW, this kind of process is exactly what we teach in our ECL classes that you can sign up for here: http://hpccsystems.com/community/training-events/training\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-12-10 16:11:11\" },\n\t{ \"post_id\": 2974, \"topic_id\": 650, \"forum_id\": 8, \"post_subject\": \"Re: #EXPAND Error -- Constant Expression Expected\", \"username\": \"dreamer1118\", \"post_text\": \"Thanks, Richard. I got your point through your instructive explanation.\\nAnd you are right that we have to use the field names as the input since we would like to make the function more flexible so that it can be further connect to user interface and let the clinet to make the decision on which fields to group. \\n\\nWe will refer to ECL code-generation program to see if it can solve this issue. Thanks again!\\n\\n\\nLu\", \"post_time\": \"2012-12-07 03:55:02\" },\n\t{ \"post_id\": 2973, \"topic_id\": 650, \"forum_id\": 8, \"post_subject\": \"Re: #EXPAND Error -- Constant Expression Expected\", \"username\": \"rtaylor\", \"post_text\": \"dreamer,We are really new to ECL and the way we do it is to concatenate all the variables we need to do the aggregation function together, not one at a time. Say, we have five variables, v1, v2, v3, v4 ad v5 and group by key1. The output is expected to be like Key1, v1_sum, v2_sum, v3_sum, v4_sum, v5_sum.
OK, that's what I thought you were doing. \\n\\nBut the question remains -- why are you getting the "variable" names from another dataset and not simply writing them in the TABLE code? (BTW, in ECL there are no "variables," only definitions and fields in datasets, so these would normally be termed "fields").\\n\\nBottom line, to do what you want you just write this kind of code:rec := RECORD\\n INTEGER key1;\\n INTEGER v1; \\n INTEGER v2; \\n INTEGER v3; \\n INTEGER v4; \\n INTEGER v5; \\nEND;\\n\\nds := DATASET([{1,1,1,1,1,1},{3,1,1,1,1,1},{2,1,1,1,1,1},\\n {2,1,1,1,1,1},{3,1,1,1,1,1},{3,1,1,1,1,1}],rec);\\n\\t\\t\\t\\t\\t\\t\\t \\nOutRec := RECORD\\n ds.key1;\\n s1 := SUM(GROUP,ds.v1);\\n s2 := SUM(GROUP,ds.v2);\\n s3 := SUM(GROUP,ds.v3);\\n s4 := SUM(GROUP,ds.v4);\\n s5 := SUM(GROUP,ds.v5);\\nEND;\\t\\t\\t\\t\\t\\t\\t \\n\\nt := TABLE(ds,OutRec,key1);\\nt;
Note that, in order to work with the dataset (ds) you must define its RECORD structure, and you have named the fields here already. Since you have done this, the amount of work required to put those field names inside some other dataset that "drives" your TABLE function is the same amount of work required to simply write the TABLE function's RECORD structure with those field names or to write those field names into a constant string that you pass to your FUNCTIONMACRO like this:rec := RECORD\\n INTEGER key1;\\n INTEGER v1; \\n INTEGER v2; \\n INTEGER v3; \\n INTEGER v4; \\n INTEGER v5; \\nEND;\\n\\nds := DATASET([{1,1,1,1,1,1},{3,1,1,1,1,1},{2,1,1,1,1,1},\\n {2,1,1,1,1,1},{3,1,1,1,1,1},{3,1,1,1,1,1}],rec);\\n\\t\\t\\t\\t\\t\\t\\t \\nrec2 := RECORD\\n INTEGER key2;\\n INTEGER f1; \\n INTEGER f2; \\n INTEGER f3; \\n INTEGER f4; \\n INTEGER f5; \\nEND;\\nds2 := DATASET([{4,1,1,1,1,1},{6,1,1,1,1,1},{5,1,1,1,1,1},\\n {5,1,1,1,1,1},{6,1,1,1,1,1},{6,1,1,1,1,1}],rec2);\\t\\t\\t\\t\\t\\t\\t \\n\\nFM_GenXtabSUMs (InDS,OutRec,KeyField) := FUNCTIONMACRO\\n RETURN TABLE(InDS,#EXPAND(OutRec),KeyField);\\nENDMACRO;\\n\\nPassRec1 := '{ds.key1,s1 := SUM(GROUP,ds.v1),s2 := SUM(GROUP,ds.v2),' +\\n 's3 := SUM(GROUP,ds.v3),s4 := SUM(GROUP,ds.v4),' + \\n\\t\\t\\t\\t\\t\\t's5 := SUM(GROUP,ds.v5)}';\\nFM_GenXtabSUMs(ds,PassRec1,Key1);\\n\\nPassRec2 := '{ds2.key2,s1 := SUM(GROUP,ds2.f1),s2 := SUM(GROUP,ds2.f2),' +\\n 's3 := SUM(GROUP,ds2.f3),s4 := SUM(GROUP,ds2.f4),'+\\n\\t\\t\\t\\t\\t\\t's5 := SUM(GROUP,ds2.f5)}';\\nFM_GenXtabSUMs(ds2,PassRec2,Key2);
Passing the constant string keeps #EXPAND happy.\\n\\nIf there is some over-arching reason why you must get those field names from an external configuration file, then you should look at writing an external ECL code-generation program that reads your configuration file and generates the ECL code for you, which you could then launch with the command-line tool ECL.EXE (documented in the Client Tools PDF).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-12-06 15:56:35\" },\n\t{ \"post_id\": 2967, \"topic_id\": 650, \"forum_id\": 8, \"post_subject\": \"Re: #EXPAND Error -- Constant Expression Expected\", \"username\": \"dreamer1118\", \"post_text\": \"Thanks, Richard. \\n\\nWhat we are trying to accomplish here is to extract information from an input data which contains a series of variable names that needs to do some aggregation (max,sum,ect.).\\n\\nWe are really new to ECL and the way we do it is to concatenate all the variables we need to do the aggregation function together, not one at a time. Say, we have five variables, v1, v2, v3, v4 ad v5 and group by key1. The output is expected to be like Key1, v1_sum, v2_sum, v3_sum, v4_sum, v5_sum.\\n\\nThe way we did is to use an iterate function to glue the variables together and then use #EXPAND() to transfer the information to TABLE function.\\n\\nHopefully I have made my question clearer. Thanks again for your help!\", \"post_time\": \"2012-12-06 02:28:37\" },\n\t{ \"post_id\": 2963, \"topic_id\": 650, \"forum_id\": 8, \"post_subject\": \"Re: #EXPAND Error -- Constant Expression Expected\", \"username\": \"rtaylor\", \"post_text\": \"dreamer,The result from the function cancatVarnames returns a string I want but it fails and gives the error Constant Expression Expected. However it works when I copy the string vars and use the named string as an input of the #EXPAND() function below.
Yes, the error is telling you exactly what the problem is -- #EXPAND expects a string constant and you're giving it a non-constant string. Of course it works when you give it a string constant.I am aware of the issue that \\n\\nThe "Constant expression expected" error is indicating that TABLE requires a constant expression as its RECORD structure and you're trying to build it "on the fly" and the compiler won't let you.\\n\\nfrom viewtopic.php?t=535&p=2450
OK, then I have to ask you the same questions the person in that thread has not yet answered:
\\nLet me explain why I ask. It is my experience that file formats rarely change quickly. Certainly it is possible that, given that you receive a file from a particular source periodically that from period to period different programmers may introduce changes, but if the files are coming from any kind of automated process then their structure only changes when someone changes the process.\\n\\nTherefore, since file formats rarely change, the best place to make any changes is in the code itself. Trying to generate new code from some "configuration file" simply introduces unnecessary complexity -- because it is exactly the same amount of work to change the one line of code as it is to change a "configuration file." \\n\\nI am willing to be convinced that your scenario is an exception to my previous experience so please expand on the reasons you are trying to do things this way. \\n\\nRichard\", \"post_time\": \"2012-12-05 20:27:44\" },\n\t{ \"post_id\": 2956, \"topic_id\": 650, \"forum_id\": 8, \"post_subject\": \"#EXPAND Error -- Constant Expression Expected\", \"username\": \"dreamer1118\", \"post_text\": \"I am trying to extract all the records from one column in the datasets by using the function ITERATE to concatenate them to be part of the input of macro #EXPAND and apply it in the TABLE function to do sum/max. However, it gives the error msg as "Constant Expression Expected". \\n\\nI am aware of the issue that \\n
The "Constant expression expected" error is indicating that TABLE requires a constant expression as its RECORD structure and you're trying to build it "on the fly" and the compiler won't let you.
\\nfrom viewtopic.php?t=535&p=2450\\n\\nI am wondering if there is any alternative to accompolish this task. The code that gives the error is attached below:\\n\\nDATA:\\nEXPORT BureauLayoutSummary := MODULE\\n\\nEXPORT Bureau_Layout:= RECORD\\n\\nSTRING VariableName;\\nSTRING Description;\\nSTRING6 VarType;\\nSTRING12 Format;\\nSTRING1 Key_IND;\\nSTRING20 MissingImputationBefore;\\nSTRING2 MissingImputationAfter;\\nSTRING AggregationFunc;\\n\\nEND;\\n\\nEXPORT Bureau_Summary_Data := \\nDATASET('~citi_bureau::burear_aggrfunc_test.csv',Bureau_Layout,CSV(HEADING(1)));\\nEND;
\\n\\nConcatenate Function:\\n\\n\\nRollUpKey := 'citi_cons_lnk,per_num';\\nSubFuncKeyWord :='SUM';\\nAggrVarNameSet := BureauLayoutSummary.Bureau_Summary_Data(AggregationFunc = SubFuncKeyWord);\\n\\nSTRING cancatVarnames(string RollUpKey, string SubFuncKeyWord) := function\\n AggrVarNameSet cancatenateVars(AggrVarNameSet l, AggrVarNameSet r) := TRANSFORM\\n pre_res := IF (l.VariableName = '','',l.VariableName+',');\\n self.VariableName := pre_res + r.VariableName + '_' + SubFuncKeyWord +':=' + SubFuncKeyWord + '(GROUP,(REAL)'+r.VariableName+')';\\n\\t\\t self := r;\\n END;\\n tempDS := ITERATE(AggrVarNameSet, cancatenateVars(LEFT, RIGHT));\\n string tmp:= tempDS[count(tempDS)].VariableName;\\n\\t return tmp;\\n\\t return '{'+RollUpKey+','+tmp+'},'+ RollUpKey;\\nEND;\\nSTRING vars := cancatVarnames(RollUpKey, SubFuncKeyWord);\\nDataset_aggr := DataPre.Aggregation_func(BUREAUDataImpt.RawData, vars);\\n
\\n\\nThe result from the function cancatVarnames returns a string I want but it fails and gives the error Constant Expression Expected. However it works when I copy the string vars and use the named string as an input of the #EXPAND() function below.\\n\\nCall #EXPAND() macro:\\n\\nEXPORT Aggregation_func(InputData,tempVars) := FUNCTIONMACRO\\n\\n StrRollUp := '{'+RollUpKey+','+tempVars+'},'+ RollUpKey;\\n Result := TABLE(InputData,#EXPAND(StrRollUp));\\nRETURN Result;\\nENDMACRO;
\\n\\nThanks!\", \"post_time\": \"2012-12-05 06:43:57\" },\n\t{ \"post_id\": 3008, \"topic_id\": 651, \"forum_id\": 8, \"post_subject\": \"Re: Writing a TABLE aggregator function\", \"username\": \"DSC\", \"post_text\": \"[quote="markk":9we3yywv]About 8x faster on this small dataset.\\n\\nGreat! The 8x increase makes perfect sense.\\n\\nYou have only one node, so all HPCC processes (Dali, Sasha, ESP, etc.) are all running on that system as well. Not to mention the Thor master process, which coordinates all the slaves. You have a 12 core CPU, so really you're running 15-18 active processes simultaneously (not counting OS work) and there is some contention between those processes within the CPU. You may actually experience a slight speedup by dropping the number of slaves slightly. In a 'real' cluster, you would probably have physical nodes dedicated to Thor and you could closely match the number of slaves with the number of cores without as much fear of contention. With network latency and the need to coordinate the individual slaves' results, I doubt you'd ever achieve a pure 12x speedup, but it's fun trying.\\n\\nExcellent results. Cheers!\\n\\nDan\", \"post_time\": \"2012-12-13 21:58:25\" },\n\t{ \"post_id\": 3007, \"topic_id\": 651, \"forum_id\": 8, \"post_subject\": \"Re: Writing a TABLE aggregator function\", \"username\": \"markk\", \"post_text\": \"Hi,\\nAbout 8x faster on this small dataset. I will try it on the full size soon and report the gain.\\nthx\", \"post_time\": \"2012-12-13 21:45:33\" },\n\t{ \"post_id\": 3006, \"topic_id\": 651, \"forum_id\": 8, \"post_subject\": \"Re: Writing a TABLE aggregator function\", \"username\": \"DSC\", \"post_text\": \"[quote="markk":2m2dp7ei]I changed the env to use slavesPerNode="12" and it does help performance,\\nthanks so much!\\n\\nDon't leave me hanging; I'm still learning, too! How much did performance improve?\", \"post_time\": \"2012-12-13 21:23:25\" },\n\t{ \"post_id\": 3005, \"topic_id\": 651, \"forum_id\": 8, \"post_subject\": \"Re: Writing a TABLE aggregator function\", \"username\": \"markk\", \"post_text\": \"Hi,\\nI changed the env to use slavesPerNode="12" and it does help performance,\\nthanks so much!\\nStill want to learn how to add a SERVICE func for APPLY(), but that's next week \\nThanks again everyone for all your help, this is cool stuff.\", \"post_time\": \"2012-12-13 21:13:34\" },\n\t{ \"post_id\": 3004, \"topic_id\": 651, \"forum_id\": 8, \"post_subject\": \"Re: Writing a TABLE aggregator function\", \"username\": \"rtaylor\", \"post_text\": \"Dan,
I hope Richard and others will chime in if I've said anything incorrect. I'm still learning this system and I don't pretend to know all the ins and outs.
You're doing just fine! \\n\\nRichard\", \"post_time\": \"2012-12-13 17:05:59\" },\n\t{ \"post_id\": 3003, \"topic_id\": 651, \"forum_id\": 8, \"post_subject\": \"Re: Writing a TABLE aggregator function\", \"username\": \"DSC\", \"post_text\": \"[quote="markk":3ks6mye1]The performance with an expression seems (at least to me) to be slow. I'm just curious if this is expected or is there something else I should check/do with my ECL/cluster/setup/etc. ?\\nDoes ~1 M recs/sec _seem_ right ?\\n\\n1M records per second isn't terribly far off, though there are some configuration changes that could make it go faster.\\n\\nThe format of the data is important. If your dataset is living on the filesystem in CSV for XML format then the code has to find field and record delimiters during the read operation, and that will slow things down. Going through an ETL process, where the incoming file is stored in 'native' ECL for (i.e. you use the 'thor' or 'flat' option in an OUTPUT() call to write the data) will show an improvement in subsequent reads. Especially with your example, as all the fields have a defined size.\\n\\nYour system has four spindles but presents itself to the OS as a single drive. This code walks the file from beginning to end. If you have only one Thor slave for that single Thor cluster, then one file descriptor is being used to access the data. Therefore, all the data is sequentially moving through that pipe. You could change your configuration so that multiple Thor slaves are used in that cluster; my hunch is that 12 slaves per node would be a maximum decent value, but you may want to start smaller (like with four). The configuration attribute is 'slavesPerNode' IIRC. After going through the ETL process again, and making sure you DISTRIBUTE() your data before the OUTPUT(), your dataset will be split evenly amongst all the slaves. All the I/O will still go through your RAID0 setup, but each slave will have its own file descriptor. Combined with a fixed-width record, you should see a dramatic I/O speedup. Of course, multiple slaves will start pushing performance bottlenecks elsewhere (CPU, disk I/O speed, backplane, etc.) but I think you will see some kind of improvement. You'll probably have to play with the configurations a bit to see what works best in your environment.\\n\\nI hope Richard and others will chime in if I've said anything incorrect. I'm still learning this system and I don't pretend to know all the ins and outs.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-12-13 16:47:35\" },\n\t{ \"post_id\": 2997, \"topic_id\": 651, \"forum_id\": 8, \"post_subject\": \"Re: Writing a TABLE aggregator function\", \"username\": \"markk\", \"post_text\": \"Hi,\\nThanks for the filter example. It also runs in 70+ seconds, so not any faster than the\\nPROJECT(TRANSFORM()) method.\\nI understand an optimization could occur without the expression. The performance with an expression seems (at least to me) to be slow. I'm just curious if this is expected or is there something else I should check/do with my ECL/cluster/setup/etc. ?\\nDoes ~1 M recs/sec _seem_ right ?\\nthanks again, I really appreciate all the help you all have shown me.\", \"post_time\": \"2012-12-12 23:55:16\" },\n\t{ \"post_id\": 2996, \"topic_id\": 651, \"forum_id\": 8, \"post_subject\": \"Re: Writing a TABLE aggregator function\", \"username\": \"DSC\", \"post_text\": \"Also, if you don't need to save the new dataset -- the one with the 'check' attribute set -- then you don't need the project at all. Try a simple filter instead:\\n\\n
ds1 := ds((m_hashIdx_l - m_hashIdx_r) < 180);\\nOUTPUT(COUNT(ds1));
\\n\\nRichard's probably right. That 6 second response is probably due to the compiler noticing that the attribute was basically a constant.\", \"post_time\": \"2012-12-12 22:21:14\" },\n\t{ \"post_id\": 2995, \"topic_id\": 651, \"forum_id\": 8, \"post_subject\": \"Re: Writing a TABLE aggregator function\", \"username\": \"rtaylor\", \"post_text\": \"Mark,\\n\\nSince the SELF.check := false version runs so quickly I'm suspicious that the compiler might be optimizing out doing the real work because all the fields stay the same except throwing in a default value of false for that one field, whereas the expression form requires that every record actually go through the process to determine the value to put in the check field. So, my next question is -- where did the input ds come from and was the initial value of that check field already false? \\n\\nYou can also compare the two graphs to see if my suspicion may be right.\\n\\nRichard\", \"post_time\": \"2012-12-12 22:18:17\" },\n\t{ \"post_id\": 2994, \"topic_id\": 651, \"forum_id\": 8, \"post_subject\": \"Re: Writing a TABLE aggregator function\", \"username\": \"markk\", \"post_text\": \"Hi,\\nSure. I know its nearly impossible to answer this sort of question without a lot more detail. Right now I have just one node. I can add more or configure it to act like more nodes if that is required. It has 96gb of phys mem and 12 cores. \\nI have a 4 disk raid0 stripe for the /var/lib/HPCCSystems fs. \\nECL is below. The setting of check to false runs in 6 sec, but the expression form runs in 78 sec. I'm sure I'm brain dead on this, thanks again for your advice.\\n\\nmyrec := RECORD\\n unsigned8 id_l;\\n unsigned1 m_hashIdx_l;\\n unsigned8 id_r;\\n unsigned1 m_hashIdx_r;\\n boolean check;\\nEND;\\n\\nmyrec myfunc(myrec ds1) := TRANSFORM\\n SELF.id_l := ds1.id_l;\\n SELF.id_r := ds1.id_r;\\n SELF.m_hashIdx_l := ds1.m_hashIdx_l;\\n SELF.m_hashIdx_r := ds1.m_hashIdx_r;\\n SELF.check := false; // fast\\n // SELF.check := ((ds1.m_hashIdx_l - ds1.m_hashIdx_r) >= 180); // slow\\nEND;\\n\\nds2 := PROJECT(ds, myfunc(LEFT));\\n\\ncount(ds2(check=true));\", \"post_time\": \"2012-12-12 22:02:53\" },\n\t{ \"post_id\": 2993, \"topic_id\": 651, \"forum_id\": 8, \"post_subject\": \"Re: Writing a TABLE aggregator function\", \"username\": \"rtaylor\", \"post_text\": \"Mark,I tried PROJECT() and it does what I need to do (thanks!), but the performance is hard to understand. Do you suppose APPLY() could be any faster ? APPLY() requires a SERVICE function, correct ? I will go learn about how to do that now.
PROJECT and APPLY are generally for different purposes. If PROJECT will do the job, that is probably your better choice.\\nThe PROJECT() sub-graph timing takes over a minute to update one member of about 60 million records with a very simple expression in the TRANSFORM function. Is this normal/expected for something small enough to fit into memory ? Interesting if I change the expression to be just an assignment then its about 10x faster.
Can you post your code for this so we can see exactly what you're doing? Also, how many nodes do you have in your cluster? \\n\\nRichard\", \"post_time\": \"2012-12-12 21:45:45\" },\n\t{ \"post_id\": 2992, \"topic_id\": 651, \"forum_id\": 8, \"post_subject\": \"Re: Writing a TABLE aggregator function\", \"username\": \"markk\", \"post_text\": \"Hi,\\nok, thanks. I tried PROJECT() and it does what I need to do (thanks!), but the performance is hard to understand. Do you suppose APPLY() could be any faster ? APPLY() requires a SERVICE function, correct ? I will go learn about how to do that now.\\nThe PROJECT() sub-graph timing takes over a minute to update one member of about 60 million records with a very simple expression in the TRANSFORM function. Is this normal/expected for something small enough to fit into memory ? Interesting if I change the expression to be just an assignment then its about 10x faster.\\nthanks much for any guidance,\\nmark\", \"post_time\": \"2012-12-12 21:20:38\" },\n\t{ \"post_id\": 2988, \"topic_id\": 651, \"forum_id\": 8, \"post_subject\": \"Re: Writing a TABLE aggregator function\", \"username\": \"DSC\", \"post_text\": \"Hi Mark,\\n\\nDepending on what want to do, see either PROJECT() or APPLY() -- both built-in functions. Both iterate through the recordset, one record at a time.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-12-12 14:26:36\" },\n\t{ \"post_id\": 2983, \"topic_id\": 651, \"forum_id\": 8, \"post_subject\": \"Re: Writing a custom function\", \"username\": \"markk\", \"post_text\": \"Hi,\\n\\nPerhaps this should be a separate post, \\nbut I want to do something similar, as in -\\n\\nFooRec := RECORD\\n....\\nEND;\\nds1 := dataset('<SomeDataset>', FooRec, ...);\\nMYFUNC(ds1);\\n\\nWhere MYFUNC() operates on one or more members of every element of ds1.\\n\\nIs this possible ? Would you recommend a similar plan ? Where can I find\\nRenato's blog ?\\n\\nthanks,\\nmark\", \"post_time\": \"2012-12-12 00:45:51\" },\n\t{ \"post_id\": 2966, \"topic_id\": 651, \"forum_id\": 8, \"post_subject\": \"Re: Writing a TABLE aggregator function\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nUnless Gavin jumps in here and educates us both, I can't think of a way. \\n\\nRichard\", \"post_time\": \"2012-12-05 21:41:51\" },\n\t{ \"post_id\": 2965, \"topic_id\": 651, \"forum_id\": 8, \"post_subject\": \"Re: Writing a TABLE aggregator function\", \"username\": \"DSC\", \"post_text\": \"Hi Richard,\\n\\nActually, I was looking to see if it was possible to write that kind of function purely in ECL (not adding a function via C++). Can I infer from your answer that it isn't possible?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-12-05 21:03:40\" },\n\t{ \"post_id\": 2964, \"topic_id\": 651, \"forum_id\": 8, \"post_subject\": \"Re: Writing a TABLE aggregator function\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nI don't have an answer for you, but I would probably start with the source code for the AVE function, modify it to become a new MEDIAN function, then go through Renato's "Compiler Tutorial" blog postings (which I have not yet found time to read myself
) to learn the ins and outs of adding a new function to the language.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-12-05 20:44:33\" },\n\t{ \"post_id\": 2960, \"topic_id\": 651, \"forum_id\": 8, \"post_subject\": \"Writing a TABLE aggregator function\", \"username\": \"DSC\", \"post_text\": \"Is it possible to write an ECL function that could be used in a TABLE scenario, like SUM, COUNT, AVE, MIN, and MAX? Specifically, an ECL function that understands the GROUP keyword correctly? To leverage a question from another thread, given the pseudo-code:\\n\\n
ds1 := <SomeDataset>;\\n\\nFooRec := RECORD\\n\\tINTEGER\\tsomeKey := ds1.someKey;\\n\\tINTEGER\\ttheMedian := Median(GROUP,ds.someValue);\\nEND;\\n\\nds2: = TABLE(ds1,FooRec,someKey);
\\n\\nWhat would the function Median() look like?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-12-05 13:01:10\" },\n\t{ \"post_id\": 2989, \"topic_id\": 654, \"forum_id\": 8, \"post_subject\": \"Re: mp link closed\", \"username\": \"DSC\", \"post_text\": \"Anecdotally, I've seen this most often when a Thor slave has crashed. The error comes from the Thor master attempting to communicate with that slave. The reason the slave is offline is usually due to a segfault, which generates a core dump and stack trace, both on the slave's node (not the master's). You may be able to find some more information there.\\n\\nHope that helps, or at least points you in the right direction.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-12-12 14:29:57\" },\n\t{ \"post_id\": 2986, \"topic_id\": 654, \"forum_id\": 8, \"post_subject\": \"mp link closed\", \"username\": \"tdelbecque\", \"post_text\": \"Hello,\\n\\nI am not sure I am in the good thread, sorry.\\n\\nWhat could be the main reasons for getting a "system error: 4: mp link closed" error message ? I got one such message when computing a normalisation, and I did not find any clue in the logs.\\n\\nThanks, Thierry.\", \"post_time\": \"2012-12-12 12:06:32\" },\n\t{ \"post_id\": 3014, \"topic_id\": 657, \"forum_id\": 8, \"post_subject\": \"Re: Overwriting Dataset attributes in interfaces\", \"username\": \"sban\", \"post_text\": \"Ah.. thanks very much! this works!\", \"post_time\": \"2012-12-14 19:39:41\" },\n\t{ \"post_id\": 3013, \"topic_id\": 657, \"forum_id\": 8, \"post_subject\": \"Re: Overwriting Dataset attributes in interfaces\", \"username\": \"rtaylor\", \"post_text\": \"sban,My question is, how does one overwrite a dataset attribute of an interface?
You can do it like this:r := RECORD\\n INTEGER x;\\n INTEGER y;\\nEND;\\n\\niGeneric := INTERFACE\\n EXPORT INTEGER a;\\n EXPORT INTEGER b;\\n EXPORT DATASET(r) DS;\\nEND;\\n\\ngeneric2(INTEGER x, INTEGER y) := MODULE(iGeneric)\\n EXPORT a:=x;\\n EXPORT b:=y;\\n EXPORT DATASET(r) DS := DATASET([{5,6},{12,26}],r);\\nEND;\\n\\nMyFunc(iGeneric P) := PROJECT(P.DS,\\n TRANSFORM(r,\\n SELF.x := LEFT.x + P.a,\\n SELF.y := LEFT.y + P.b));\\n\\nMyFunc(generic2(22,10));
The key to working with INTERFACE is to understand that the fundamental structure of the members within the INTERFACE is similar to the fields in a RECORD structure, with the addition of the leading EXPORT. Therefore, your problem began in the INTERFACE itself with your DS definition. As soon as I used the DATASET(recstruct) data type to define the DS member's data type, then I can override that in the MODULE structure byu simply extending that with a default value (the inline DATASET to the roight of the := definition operator).\\n\\nYou could also pass the actual dataset to use in as a parameter to the MODULE, like this:r := RECORD\\n INTEGER x;\\n INTEGER y;\\nEND;\\n\\niGeneric := INTERFACE\\n EXPORT INTEGER a;\\n EXPORT INTEGER b;\\n EXPORT DATASET(r) DS;\\nEND;\\n\\nMyFunc(iGeneric P) := PROJECT(P.DS,\\n TRANSFORM(r,\\n SELF.x := LEFT.x + P.a,\\n SELF.y := LEFT.y + P.b));\\n\\ngeneric3(INTEGER x, INTEGER y, DATASET(r) z) := MODULE(iGeneric)\\n EXPORT a:=x;\\n EXPORT b:=y;\\n EXPORT DATASET(r) DS := z; \\nEND;\\n\\nMyFunc(generic3(10,20,DATASET([{1,2},{3,4}],r)));
HTH,\\n\\nRichard\", \"post_time\": \"2012-12-14 15:40:14\" },\n\t{ \"post_id\": 3010, \"topic_id\": 657, \"forum_id\": 8, \"post_subject\": \"Overwriting Dataset attributes in interfaces\", \"username\": \"sban\", \"post_text\": \"Hello,\\n\\nWhat I'm trying to do is create an interface which has (among others) a DATASET attribute.\\nThe problem is, when I attempt to instantiate the INTERFACE with a MODULE where I overwrite the attribute with a new dataset (of the same layout), I see an error. \\n\\nMinimal code below:\\n\\nr:={\\nINTEGER x;\\nINTEGER y;\\n};\\n\\niGeneric:=INTERFACE\\n EXPORT INTEGER a;\\n EXPORT INTEGER b;\\n EXPORT DS:=DATASET([],r);\\nEND;\\n\\ngeneric1(INTEGER x, INTEGER y):=MODULE(iGeneric)\\nEXPORT a:=x;\\nEXPORT b:=y;\\nEND;\\n\\ngeneric2(INTEGER x, INTEGER y):=MODULE(iGeneric)\\nEXPORT a:=x;\\nEXPORT b:=y;\\nEXPORT DS:= DATASET([{5,6}],r);\\n Error: syntax error near ":=" : expected datarow, identifier, pattern-name, \\n action, pattern (20, 10), 3002,\\nEND;\\n
\\n\\nMy question is, how does one overwrite a dataset attribute of an interface?\\n(I'm pretty new to this, apologies if I missed something obvious)\", \"post_time\": \"2012-12-14 07:21:22\" },\n\t{ \"post_id\": 3012, \"topic_id\": 658, \"forum_id\": 8, \"post_subject\": \"Re: thor disk file read error\", \"username\": \"bforeman\", \"post_text\": \"Can you post some sample code that is causing the error? Probably the development team may want to see your slave logs, if you could attach them.\\n\\n"File too short" should be easy to track down.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-12-14 14:50:26\" },\n\t{ \"post_id\": 3011, \"topic_id\": 658, \"forum_id\": 8, \"post_subject\": \"thor disk file read error\", \"username\": \"gopi\", \"post_text\": \"Hi,\\n\\nWhile reading from disk file, it throw the following error\\n\\n0: System error: 0: Graph[1], SLAVE [color=#0040FF:ok10mq91][ip address with port]: Error loading /var/lib/HPCCSystems/queries/[color=#0040FF:ok10mq91][target_folder_name]/V2955893466_libW20121214-065200.so: /var/lib/HPCCSystems/queries/[color=#0040FF:ok10mq91][target_folder_name]/V2955893466_libW20121214-065200.so: file too short\\n\\nplease any one suggest me to solve the error.\\n\\nAdvance Thanks,\\n\\nBy\\nGopi\", \"post_time\": \"2012-12-14 12:40:04\" },\n\t{ \"post_id\": 3073, \"topic_id\": 672, \"forum_id\": 8, \"post_subject\": \"Re: 3.10.0 documentation?\", \"username\": \"DSC\", \"post_text\": \"Great! Thanks for the update.\\n\\nDan\", \"post_time\": \"2013-01-08 17:20:37\" },\n\t{ \"post_id\": 3072, \"topic_id\": 672, \"forum_id\": 8, \"post_subject\": \"Re: 3.10.0 documentation?\", \"username\": \"HPCC Staff\", \"post_text\": \"Hello Dan! We are working to deploy 3.10.0 to the portal today. The binaries and VM are available now, along with the release notes. The documentation will be added soon and the overall release will be promoted shortly after. \\n\\nThank you!\", \"post_time\": \"2013-01-08 17:18:28\" },\n\t{ \"post_id\": 3071, \"topic_id\": 672, \"forum_id\": 8, \"post_subject\": \"3.10.0 documentation?\", \"username\": \"DSC\", \"post_text\": \"I noticed that 3.10.0CE has been posted for download. Thanks!\\n\\nIt does not appear that any of the documentation has been updated. Are there documentation changes that encompass changes in 3.10.0? If so, when will they be made available?\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2013-01-08 16:23:40\" },\n\t{ \"post_id\": 3098, \"topic_id\": 675, \"forum_id\": 8, \"post_subject\": \"Re: Session Map Analysis\", \"username\": \"arjuna chala\", \"post_text\": \"Hi ksviswa,\\n\\nPlease take a look at\\n\\nhttps://github.com/hpcc-systems/ECL-WLAM \\n\\nIt is a Web Log Analytics module. It performs session tracking etc. and it might be a good start for finding a solution to your problem.\\n\\nThanks\\n\\nArjuna\", \"post_time\": \"2013-01-14 15:19:38\" },\n\t{ \"post_id\": 3088, \"topic_id\": 675, \"forum_id\": 8, \"post_subject\": \"Session Map Analysis\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nI have a scenario where for a session the user traverses or navigates through different links.\\n\\nWhich is the best way to analyse the most common path the user traverses or the percentage split up for each different paths.\\n\\nFor Ex :\\n\\n\\nSession\\tPage Category\\nsession1\\tabc\\nsession1\\tdef\\nsession1\\tghi\\nsession2\\tdef\\nsession2\\txxx\\nsession3\\tabc\\nsession3\\txxx\\n\\nSession\\t Path\\nsession1\\tabc->def->ghi\\nsession2\\tdef->xxx\\nsession3\\tabc->xxx\\n\\n
\\n\\nThe idea of the above is to find out where a user lands and what are the series of clicks he does for a given session.\\n\\nCan we provide some analysis based on the above..? \\n\\nFor Ex : \\n\\n1.)How many users landed on abc or the percentage of users who landed on abc.\\n2.) How many traversed from abc to xxx and so on..?\\n\\nThought of using string utility or matching the patterns and get the solution accordingly but not sure if its the best approach.\\n\\nKindly help me with a better approach to the above problem scenario.\\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2013-01-11 06:43:31\" },\n\t{ \"post_id\": 3091, \"topic_id\": 676, \"forum_id\": 8, \"post_subject\": \"Re: TRANSFORM, SKIP and multiple functions\", \"username\": \"markk\", \"post_text\": \"Hi,\\n\\nI got the answer - It bails after the first SKIP, but can't guarantee order of ops.\\n\\n-mark\", \"post_time\": \"2013-01-11 13:08:45\" },\n\t{ \"post_id\": 3089, \"topic_id\": 676, \"forum_id\": 8, \"post_subject\": \"TRANSFORM, SKIP and multiple functions\", \"username\": \"markk\", \"post_text\": \"Hi,\\n\\nQuick question, if inside a TRANSFORM() I have:\\n\\nSELF.score1 := IF( func1(l), 30, SKIP);\\nSELF.score2 := IF( func2(l), 15, SKIP);\\nSELF.score3 := IF( func3(l), 5, SKIP);\\n\\nDo all 3 always get run, or does it bail out after the first SKIP encountered and not call any remaining functions ?\\n\\nthanks,\\nmark\", \"post_time\": \"2013-01-11 12:46:33\" },\n\t{ \"post_id\": 3116, \"topic_id\": 683, \"forum_id\": 8, \"post_subject\": \"Re: Arbitrary functions as aggregate table\", \"username\": \"tdelbecque\", \"post_text\": \"Oups, I have just seen that there was a previous post on this subject a little earlier (december the 13th), sorry for the disturbance. It seems the answer is no, indeed ...\\n\\nT.\", \"post_time\": \"2013-01-17 12:53:17\" },\n\t{ \"post_id\": 3115, \"topic_id\": 683, \"forum_id\": 8, \"post_subject\": \"Arbitrary functions as aggregate table\", \"username\": \"tdelbecque\", \"post_text\": \"Hello\\n\\nI was wondering if there was an obvious way of using percentiles un the cross-tabulation form of the TABLE function. For example one could write:\\n\\nTABLE (ds, {ds.countryname, UNSIGNED n := PERCENT(GROUP, age, 0.01)}, countryname);\\n\\nto get the first percentile of the age in each country. \\n\\nThank you,\\n\\nThierry.\", \"post_time\": \"2013-01-17 12:20:57\" },\n\t{ \"post_id\": 3143, \"topic_id\": 689, \"forum_id\": 8, \"post_subject\": \"Re: Can Local be supported by Thor?\", \"username\": \"rtaylor\", \"post_text\": \"On Thor, all your code ends up being compiled into a single .SO and that one .SO is distributed to every node in the cluster, so that every node does exactly the same "work" (including your inline C++ functions) at runtime. We divide the work by distributing the data to the "proper" node at runtime so that every node does their appropriate share of the processing -- that's the point of parallel processing. So trying to "trick" the compiler into executing your functions on the same node is unnecessary -- it's already doing that. It sounds to me like what you actually want is to run all your data through those two functions on a single node -- which obviates the reason for parallel computation in the first place. \\n\\nSo, the real question is: what are you trying to accomplish?\\n\\nRichard\", \"post_time\": \"2013-01-21 23:35:32\" },\n\t{ \"post_id\": 3140, \"topic_id\": 689, \"forum_id\": 8, \"post_subject\": \"Re: Can Local be supported by Thor?\", \"username\": \"nvasil\", \"post_text\": \"The reason I am asking for local on Thor is because I need a couple of functions (BEGINC++) to be executed on the same Thor node. I am trying to trick the optimizer by having both take as an input a dataset, hoping they will both get executed on the same node. It doesn't happen though. Is there a way I can enforce it?\", \"post_time\": \"2013-01-21 21:01:26\" },\n\t{ \"post_id\": 3137, \"topic_id\": 689, \"forum_id\": 8, \"post_subject\": \"Re: Can Local be supported by Thor?\", \"username\": \"rtaylor\", \"post_text\": \"nvasil,I noticed that the Local(DATASET) command can only be used for Roxie according to the documentation. It would be very useful if you could support THOR too.
The LOCAL(dataset) function is only used in Roxie within the scope of the ALLNODES action and the purpose of ALLNODES is to make Roxie operate the way that Thor operates (where all nodes are operating in the data they have on each node, instead of simply delivering requested data back to the farmer node and letting it do all the work). IOW, Thor already operates on distributed datasets in parallel on each node, so LOCAL is not necessary on Thor.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-01-21 20:50:11\" },\n\t{ \"post_id\": 3132, \"topic_id\": 689, \"forum_id\": 8, \"post_subject\": \"Can Local be supported by Thor?\", \"username\": \"nvasil\", \"post_text\": \"I noticed that the Local(DATASET) command can only be used for Roxie according to the documentation. It would be very useful if you could support THOR too. Would that be possible?\", \"post_time\": \"2013-01-21 13:46:59\" },\n\t{ \"post_id\": 3155, \"topic_id\": 690, \"forum_id\": 8, \"post_subject\": \"Re: problems with the executable output of eclcc\", \"username\": \"ghalliday\", \"post_text\": \"I agree - I was hitting the same problems today.\\n\\nThere is an issue open (https://track.hpccsystems.com/browse/HPCC-1960) which is on the list of issues to look at in this sprint.\", \"post_time\": \"2013-01-23 16:31:47\" },\n\t{ \"post_id\": 3133, \"topic_id\": 690, \"forum_id\": 8, \"post_subject\": \"problems with the executable output of eclcc\", \"username\": \"nvasil\", \"post_text\": \"The following runs fine on THOR but when you produce a local executable on your machine you get this\\n/var/jenkins/workspace/CE-Candidate-3.10.0/CE/ubuntu_12_04_x86_64/HPCC-Platform/ecl/eclagent/eclgraph.cpp(1756) : EclAgent::executeGraph : CFile::open /var/lib/HPCCSystems//temp/WLOCAL_24512.~spill__scope__1__WLOCAL_24512, No such file or directory\\nerror: C0002 System error: 2: CFile::open /var/lib/HPCCSystems//temp/WLOCAL_24512.~spill__scope__1__WLOCAL_24512, No such file or directory\\nSystem error: 2: CFile::open /var/lib/HPCCSystems//temp/WLOCAL_24512.~spill__scope__1__WLOCAL_24512, No such file or directory\\n\\nI had raised this issue before. In general OUTPUT seems not to function properly on local executables. I am running 3.10.0\\n\\n\\n//session_id:=DATASET(['1'], z.STRING_REC );\\n STRING_REC := RECORD\\n STRING s;\\n END;\\n\\nDATASET(STRING_REC) GenSession() := BEGINC++\\n #include <iostream> \\n #include <sys/time.h>\\n #include "boost/lexical_cast.hpp"\\n #body\\n \\n timeval tv;\\n gettimeofday(&tv, NULL);\\n std::string random_tag =boost::lexical_cast<std::string>(tv.tv_usec); \\n // we will store it in a dataset so we need an extra 4 bytes\\n // for the length of the string first\\n __result=(char*)rtlMalloc(random_tag.size()+4); \\n int random_tag_size=random_tag.size();\\n memcpy(__result, &random_tag_size, 4);\\n memcpy((char*)__result+4, random_tag.data(), random_tag.size());\\n __lenResult=random_tag.size()+4;\\n std::cout<<"random tag="<<random_tag<<std::endl;\\n ENDC++;\\n\\nsession_id:=GenSession();\\ntemp1:=session_id+session_id;\\ntemp2:=session_id;\\nOUTPUT(temp1);\\nOUTPUT(temp2);\\nOUTPUT(session_id);\\n\\n
\", \"post_time\": \"2013-01-21 15:26:18\" },\n\t{ \"post_id\": 3138, \"topic_id\": 692, \"forum_id\": 8, \"post_subject\": \"Re: utility functions for converting strings to numbers\", \"username\": \"rtaylor\", \"post_text\": \"Simple type casting does that.\\nhttp://hpccsystems.com/community/docs/ecl-language-reference/html/type-casting\\n\\nHere's an example:STRING10 Fred := '12345';\\nGeorge := (INTEGER)Fred;\\nOUTPUT(Fred);\\nOUTPUT(George * 2);
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-01-21 20:55:48\" },\n\t{ \"post_id\": 3135, \"topic_id\": 692, \"forum_id\": 8, \"post_subject\": \"utility functions for converting strings to numbers\", \"username\": \"nvasil\", \"post_text\": \"I haven't found them anywhere. I need to convert strings to numbers and the reverse. Any ideas?\", \"post_time\": \"2013-01-21 17:29:20\" },\n\t{ \"post_id\": 3154, \"topic_id\": 693, \"forum_id\": 8, \"post_subject\": \"Re: Need an explanation for this\", \"username\": \"ghalliday\", \"post_text\": \"To force a single instance per query I would use\\n\\nsessionId := RANDOM() : independent;\\n\\nBTW. I know of several issues with the handling of RANDOM() and other non-pure functions. The current support isn't correct. \\nI've planned the changes that are needed to fix it, but unfortunately those changes are dependent on quite a few other changes which will need to be made first.\", \"post_time\": \"2013-01-23 16:28:34\" },\n\t{ \"post_id\": 3136, \"topic_id\": 693, \"forum_id\": 8, \"post_subject\": \"Need an explanation for this\", \"username\": \"nvasil\", \"post_text\": \"The following code generates a random tag. This tag goes through a function that makes a copy. If you run it you will see that you get two different session_ids. I tried adding #option pure and #option once to inform the compiler that the function does not have side effects, but I am getting the same behavior. I also tried putting the two OUTPUT statements under a sequential statement SEQUENTIAL(OUTPUT(session_id);\\nOUTPUT(Func1(session_id))); but no luck\\n\\n\\nSTRING_REC:= RECORD \\n STRING s;\\nEND;\\nsession_id:=DATASET([''+RANDOM()], STRING_REC);\\n\\nDATASET(STRING_REC) Func1(DATASET(STRING_REC) session_id) := BEGINC++\\n __lenResult=lenSession_id;\\n __result=rtlMalloc(lenSession_id);\\n memcpy(__result, session_id, lenSession_id);\\nENDC++;\\n\\nOUTPUT(session_id);\\nOUTPUT(Func1(session_id));\\n\\n
\\n\\nSo the question is how do I freeze the session_id to a random value once through the execution of the query. I did try Persist and it worked. The problem is that in the subsequent queries it remained the same and it wasn't updated. Is there a way to remove it "unpersist" it\", \"post_time\": \"2013-01-21 17:56:06\" },\n\t{ \"post_id\": 3285, \"topic_id\": 695, \"forum_id\": 8, \"post_subject\": \"Re: how to create a new dataset dynamically?\", \"username\": \"rocky.li\", \"post_text\": \"Hi Flavio\\ncould you give some example case for your two method?\\n\\nvery Thanks\", \"post_time\": \"2013-02-01 11:02:25\" },\n\t{ \"post_id\": 3210, \"topic_id\": 695, \"forum_id\": 8, \"post_subject\": \"Re: how to create a new dataset dynamically?\", \"username\": \"ghalliday\", \"post_text\": \"As it currently stands the ECL platform doesn't support recursive operations particularly well. However I can think of two possible solutions.\\n\\nFirstly you may be able to express the problem iteratively - which is supported by the LOOP construct.\\n\\nE.g., you could have a list of actions which need to be performed, process each of those actions - each of which could either returning a result or a list of other actions to be performed. Keep processing the outstanding actions until there are none left.\\n\\nThe other possibility depends on what processing you are doing to the data.\\nIf you are running queries which is essentially summarising and pulling together various pieces of data (rather than bulk ETL processing), then you could use SOAPCALL within a roxie query to execute a recursive query. I imagine this might be more what you are trying to achieve, so it may fit quite well.\\n\\nIf you could give any more details of the complexity of the processing at each stage, size of data likely to be returned/processed, and total size of the data being searched/processed it might help.\", \"post_time\": \"2013-01-28 10:57:42\" },\n\t{ \"post_id\": 3201, \"topic_id\": 695, \"forum_id\": 8, \"post_subject\": \"Re: how to create a new dataset dynamically?\", \"username\": \"rocky.li\", \"post_text\": \"Hi Flavio,\\nvery thank your help. we only want to know if ECL can help us implement our business model. I do not know if I have descirbe clearly our requirement. It is a recursive model, we must get every level information from another server. Your ever advice me distrubute D1, then process recursive operation in every node. But this way is not good for us, becuase some other IDs need be disturbuted calcuated in every level except the sub jobs.\\n\\nThanks\", \"post_time\": \"2013-01-27 16:04:06\" },\n\t{ \"post_id\": 3181, \"topic_id\": 695, \"forum_id\": 8, \"post_subject\": \"Re: how to create a new dataset dynamically?\", \"username\": \"flavio\", \"post_text\": \"Rocky, if you just declare a new activity, the ECL compiler will take care of parallelizing it, if estimates that there are enough resources in the cluster. \\n\\nA single Thor cluster with a single execution queue in its scheduler, technically will not run more than one job at a time; however, within a single job, all activities contained in the same subgraph are executed in parallel (the ECL compiler decides what goes in each subgraph, based on the type of activities and the estimated resources in the system).\\n\\nECL plugins are usually blocks of C++ code called from within ECL, normally used, for example, when in need to call an external system, passing records back and forth.\\n\\nI think that the main problem that you currently have, is that you are trying to imperatively control what, how, when and where your code is executed, while in a declarative paradigm like ECL, you are supposed to just tell the system "What" you want to get done, and the system will decide "how" to do it in the most efficient way. You can always take a look at the graphical execution plan to verify that the system is doing things the way you expected them to get done.\\n\\nFlavio\", \"post_time\": \"2013-01-25 12:18:56\" },\n\t{ \"post_id\": 3180, \"topic_id\": 695, \"forum_id\": 8, \"post_subject\": \"Re: how to create a new dataset dynamically?\", \"username\": \"rocky.li\", \"post_text\": \"Hi Flavio\\n Your advice bring me some idea, could you help confirm it? following is a example\\n\\nKVpair := RECORD //structure definition of the dataset\\n STRING Key;\\n STRING Value;\\nEND;\\n\\n//Could we sumbit a new job in here?\\nSetFuncLib := SERVICE\\nSTRING SumbitSubJob(STRING value,String Key) : library='examplelib',entrypoint='elSumbitSubJob';\\nEND;\\n\\nDS := DATASET([ {'Key1','12'},\\n {'Key2','13'},\\n {'Key3','14'},\\n {'Key2','15'},\\n {'Key1','16'},\\n {'Key2','17'},\\n {'Key3','18'},\\n {'Key3','19'},\\n {'Key1','20'},\\n {'Key3','21'},\\n {'Key1','22'},\\n {'Key2','23'},\\n {'Key2','24'},\\n {'Key1','25'},\\n {'Key1','26'}\\n ],KVpair); //the set of data\\n \\nDistDS := DISTRIBUTE(DS); \\n\\nNodeNum := STD.system.Thorlib.Node() + 1; //define node number\\nKVpair XF1(KVpair L) := TRANSFORM\\n SELF.Key := L.Key + ' - started on node ' + NodeNum + ':';\\n SELF.Value := SumbitSubJob(L.Value,L.Key);\\nEND;\\n\\nShowDist := PROJECT(DistDS,XF1(LEFT));\\nOUTPUT(ShowDist,NAMED('ShowDist')); \\n\\n///following is c++ code\\nEXAMPLELIB_API void elSumbitSubJob(size32_t & __lenResult,char * & __result,size32_t lenValue, char * value,size32_t lenKey,char * Key)\\n{\\n if (IsSubAcct(Key))\\n {\\n sumbitSubJob();\\n }\\n else\\n {\\n //do some calculate\\n Calculate();\\n }\\n}\\n
\\n\\nCould I use ECL plugin to submit a new task to framework when I found it is a sub job. But it seem current node must wait the result of another job. I worry this logic will cause the node resource is lock.\", \"post_time\": \"2013-01-25 08:52:34\" },\n\t{ \"post_id\": 3179, \"topic_id\": 695, \"forum_id\": 8, \"post_subject\": \"Re: how to create a new dataset dynamically?\", \"username\": \"flavio\", \"post_text\": \"Rocky,\\n\\nCouldn't you distribute your initial dataset D1 among the nodes of the system and execute the recursive jobs in each node for just the initial subset of records present in that node? \\n\\nFor example, if your initial Dataset D1 has 1000 records, and you have 10 nodes in your Thor cluster, each node would receive 100 records. Every subsequent transformation of these records involving the recursive operations would be processed in the local node (each thread accessing remote data through an ECL plugin that takes care of the data retrieval from the remote system).\\n\\nAs you are describing it, I don't see the need to spawn remote processes in other nodes for the recursive jobs. I assume that you are doing this as an attempt to load balance the system among the available nodes, but if you already have each node starting with a subset of the total records for processing, the load balancing is done for you automatically, across the entire job. This is the same as saying that you use a divide and conquer approach, by just dividing the size of your initial job across the available nodes.\\n\\nIf this works for you, coding this in ECL is relatively trivial.\\n\\nThanks,\\n\\nFlavio\", \"post_time\": \"2013-01-25 03:33:37\" },\n\t{ \"post_id\": 3177, \"topic_id\": 695, \"forum_id\": 8, \"post_subject\": \"Re: how to create a new dataset dynamically?\", \"username\": \"rocky.li\", \"post_text\": \"Flavio\\n\\nYes, we need run following steps for ever sub job. \\n1. we need perform a remoter call to get information.\\n2. we need extract the IDs from these information.\\n3. Some IDs can be caculated. Some IDs is another sub job, they need do same work from step1.\\n3. we want to distributethese IDs to many machine, then calculate them. \\nSo before step2, we can not create the dataset. \\n\\nwe can perpare D1 when we start process. But in fact its action is same with sub job, it is a topest job. so we hope we can also do it in framework.\\n\\nof cause we can generate all information of sub job recursively before we start process.But that is not parallel, the performance is very low.\\n\\nIn fact we also meet similar question in Hadoop. Its map/reduce is distributed, but the split can not be distributed. But we can call another job in MAP function of Hadoop, then merge the result of sub job. It seem HPCC difficulty implement it.\", \"post_time\": \"2013-01-25 02:13:31\" },\n\t{ \"post_id\": 3164, \"topic_id\": 695, \"forum_id\": 8, \"post_subject\": \"Re: how to create a new dataset dynamically?\", \"username\": \"flavio\", \"post_text\": \"Rocky,\\n\\nWhen you say that every job needs to get information from another server, are you indicating that they would need to perform a remote call to a service running in a completely different system to get it?\\n\\nIf I understand your graph correctly, you have a dataset D1, for which you want to iterate over every record and perform a number of independent computations (let's call them transformations). As a result of each of these independent computations you will want to generate an independent resultset (that you could save for future reference).\\n\\nAnd based on your previous messages, you would want these computations to happen in parallel, as much as possible.\\n\\nNow a couple of questions so that we can help you better: \\n\\nAre these calculations also depending on information from a remote system, or are they completely self contained and depending only on the input record?\\n\\nIs the the D1 dataset complete by the time you start the process, or do you expect new records to be appended to it as the process runs?\\n\\nThanks,\\n\\nFlavio\", \"post_time\": \"2013-01-24 12:45:32\" },\n\t{ \"post_id\": 3148, \"topic_id\": 695, \"forum_id\": 8, \"post_subject\": \"how to create a new dataset dynamically?\", \"username\": \"rocky.li\", \"post_text\": \"our example include mutiple level sub jobs, every job need get information from another server, then generate the dataset. It like following struct\\n\\n |--Calculation Item\\n |--sub level2--|--Calculation Item\\n |\\n |-- sub1--|--Calculation Item\\n | |--Calculation Item\\nD1-| \\n | |-Calculation Item\\n |-- sub2-|\\n |-Calculation Item\\n\\nshould we get lowest level item firstly before submit job, then generate mutiple datasets, then call ECL job? \\nCould we generate the sub dataset dynamically in ECL? when we find a item is a sub job, we can call a function to generate a dataset.\\nBecause we can analyse the sub job in C++, could we use call the plugin function for it?\", \"post_time\": \"2013-01-23 04:48:40\" },\n\t{ \"post_id\": 3188, \"topic_id\": 698, \"forum_id\": 8, \"post_subject\": \"Re: Does VL Library Support Voronoi Charts\", \"username\": \"david.wheelock\", \"post_text\": \"We will add Voronoi to the suite of available D3 interfaces in the VL. The current target will be to have it checked into the git repository by mid-February.\", \"post_time\": \"2013-01-25 16:25:46\" },\n\t{ \"post_id\": 3163, \"topic_id\": 698, \"forum_id\": 8, \"post_subject\": \"Does VL Library Support Voronoi Charts\", \"username\": \"Apurv.Khare\", \"post_text\": \"Hi,\\n\\nI'm working on Visualization available in HPCC.\\nEalier Version of VL library includes Voronoi Chart, but the latest version have Graph and i cant find any Voronoi Module.\\n\\nI want to Work on Voronoi charts for some client requirement. Can i have some code explaining the use and how to call Voronoi charts through the Latest Vl library.\", \"post_time\": \"2013-01-24 05:48:41\" },\n\t{ \"post_id\": 3206, \"topic_id\": 700, \"forum_id\": 8, \"post_subject\": \"Re: BEGINC++ and RECORD of DATASETS\", \"username\": \"ghalliday\", \"post_text\": \"If you have a single row containing two datasets the rows from the datasets aren't going to stream efficiently. Have you considered returning a single stream of records, and having different functions that return them for different types?\\n\\nE.g.,\\n\\ndoXReturnDoubles\\ndoXReturnIntegers\\n\\nEspecially if you combine it with generating a streaming datasets from an external function.\", \"post_time\": \"2013-01-28 09:47:20\" },\n\t{ \"post_id\": 3205, \"topic_id\": 700, \"forum_id\": 8, \"post_subject\": \"Re: BEGINC++ and RECORD of DATASETS\", \"username\": \"ghalliday\", \"post_text\": \"Here is my first example of that working. (It has also revealed a bug that needs addressing - hence the use of {EMBEDDED}). The code only generates a single row in the output dataset - although I suspect that is what you want for your code. If you need code to extend it to more than one row then I will extend the example.\\n\\n\\nRec1 := RECORD\\ninteger x;\\nEND;\\n\\nRec2 := RECORD\\nSTRING y;\\nEND;\\n\\nRec := RECORD\\nDATASET(Rec1) x_{EMBEDDED};\\nDATASET(Rec2) y_{EMBEDDED};\\nEND;\\n\\nDATASET(Rec) MyFunc() := BEGINC++\\n\\n unsigned size = sizeof(size32_t) + 5 * sizeof(__int64) +\\n sizeof(size32_t) + (sizeof(size32_t) + 5 + sizeof(size32_t) + 3);\\n \\n __lenResult = size;\\n __result = rtlMalloc(size);\\n byte * cur = (byte *)__result;\\n\\n //Size of the first dataset is 5 * the size of the elements\\n *(size32_t *)cur = 5 * sizeof(__int64);\\n cur += sizeof(size32_t);\\n //write 1,2,3,4,5\\n for (int i=1; i <=5 ; i++)\\n {\\n *(__int64 *)cur = i;\\n cur += sizeof(__int64);\\n }\\n\\n //Second dataset, this is possibly easier way of doing it (especially if variable size rows)...\\n size32_t * leny = (size32_t *)cur;\\n cur += sizeof(size32_t);\\n\\n byte * start = cur;\\n //Write "Gavin"\\n *(size32_t *)cur = 5;\\n cur += sizeof(size32_t);\\n memcpy(cur, "Gavin", 5);\\n cur += 5;\\n \\n //Next row - "Jim"\\n *(size32_t *)cur = 3;\\n cur += sizeof(size32_t);\\n memcpy(cur, "Jim", 3);\\n cur += 3;\\n \\n //Now back patch the length of the second dataset\\n *leny = (cur - start);\\n\\nENDC++;\\n\\noutput(MyFunc());\\n\\n
\", \"post_time\": \"2013-01-28 09:17:22\" },\n\t{ \"post_id\": 3169, \"topic_id\": 700, \"forum_id\": 8, \"post_subject\": \"BEGINC++ and RECORD of DATASETS\", \"username\": \"nvasil\", \"post_text\": \"One of the problems with ECL functions is that they cannot return more than one Arguments. If you have more you need to wrap it on a RECORD of DATASETS.\\n\\nI wonder how you create it. Here is an example\\n\\n\\nRec1 := RECORD\\n int x;\\nEND;\\n\\nRec2 := RECORD\\n STRING y;\\nEND;\\n\\nRec : = RECORD\\n DATASET(Rec1) x_;\\n DATASET(Rec2) y_;\\nEND;\\n\\nDATASET(Rec) MyFunc() := BEGINC++\\n \\nENDC++\\n
\\n\\nhow do I create a DATASET(Rec) inside the BEGINC++?\", \"post_time\": \"2013-01-24 16:40:51\" },\n\t{ \"post_id\": 3176, \"topic_id\": 702, \"forum_id\": 8, \"post_subject\": \"Re: Parsing functionality\", \"username\": \"jacksock\", \"post_text\": \"This was very helpful. Thank You both.\", \"post_time\": \"2013-01-24 23:31:27\" },\n\t{ \"post_id\": 3175, \"topic_id\": 702, \"forum_id\": 8, \"post_subject\": \"Re: Parsing functionality\", \"username\": \"arjuna chala\", \"post_text\": \"Jack,\\n\\nPlease review the following code example:\\n\\n\\n\\nIMPORT Std.Uni;\\n\\nRawRec := RECORD\\n UNICODE line;\\nEND;\\n\\nds := DATASET([{u'#BigData=@HPCCSystems. Visit http://hpccsystems.com'}], RawRec);\\n\\t\\t\\t\\t\\t\\t\\t \\n\\nPATTERN UserNameExp := PATTERN(U'[[:alnum:]]')+;\\nPATTERN HashExp := PATTERN(U'[[:alnum:]]')+;\\n\\nPATTERN AtSign := U'@';\\nPATTERN HashSign := U'#';\\n\\nPATTERN UrlExp := PATTERN(U'[A-Za-z]+://[A-Za-z0-9_]+.[A-Za-z0-9_:%&~\\\\?/.=]+');\\nPATTERN UserNameRule := AtSign UserNameExp;\\nPATTERN HashtagRule := HashSign HashExp;\\n\\nRULE MatchRule := (UserNameRule | HashtagRule | UrlExp);\\n\\nParsedRec := RECORD\\n UNICODE username := Uni.toUpperCase(MATCHUNICODE(UserNameExp)); \\n UNICODE hashname := Uni.toUpperCase(MATCHUNICODE(HashExp)); \\n UNICODE url := Uni.toUpperCase(MATCHUNICODE(UrlExp)); \\nEND;\\n\\nfinalOut := PARSE(ds, line, MatchRule, ParsedRec, SCAN, MAX, MANY);\\n\\nOUTPUT(finalOut);\\n\\n
\\n\\nHope this helps.\\n\\nArjuna\", \"post_time\": \"2013-01-24 23:30:23\" },\n\t{ \"post_id\": 3173, \"topic_id\": 702, \"forum_id\": 8, \"post_subject\": \"Re: Parsing functionality\", \"username\": \"bforeman\", \"post_text\": \"Hi JS,\\n\\nOne resource to get you started is located here:\\n\\nhttp://hpccsystems.com/community/contributions/data-descriptors-and-simple-example-programs/sentilyze-twitter-sentiment-ana\\n\\nThey are using the Machine Learning libraries, but you can also parse very effectively in ECL.\\n\\nThe ECL Playground shows a pretty good ECL Parse example that is part of the ECL Watch. Since you were asking a question about graphs in another post, I assume you have access to a cluster or an HPCC VM.\\n\\nHere's another example that shows how to PARSE a log file:\\n\\n//Declare the record to store each record from \\n//the raw input file. Since the file has lines of log data,\\n//the record will need one string field to store each line.\\nRawLayout := RECORD\\n STRING rawTxt;\\nEND;\\n\\n//Declare the file. In this example, \\n//for simplicity, the content is shown inline\\nfileRaw := DATASET(\\n [{'127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326'},\\n {'This record is absolute garbage and does not match any patterns'}],\\n RawLayout);\\n\\nPATTERN alphaFmt := PATTERN('[a-zA-Z]')+;\\nPATTERN alphaNumbFmt := PATTERN('[a-zA-Z0-9]')+;\\nPATTERN sepFmt := ' '+;\\nPATTERN numFmt := PATTERN('[0-9]')+;\\nPATTERN ipFmt := numFmt '.' numFmt '.' numFmt '.' numFmt; \\nPATTERN identFmt := '-';\\nPATTERN authuserFmt := alphaNumbFmt;\\nPATTERN hoursFromGMT := PATTERN('[\\\\\\\\-\\\\\\\\+]') numFmt;\\nPATTERN yearFmt := numFmt;\\nPATTERN monthFmt := alphaNumbFmt;\\nPATTERN dayFmt := numFmt;\\nPATTERN hoursFmt := numFmt;\\nPATTERN minutesFmt := numFmt;\\nPATTERN secondsFmt := numFmt;\\nPATTERN dateFmt := '[' dayFmt '/' monthFmt '/' yearFmt ':' hoursFmt ':' minutesFmt ':' secondsFmt ' ' hoursFromGMT ']';\\nPATTERN cmdFmt := alphaFmt;\\nPATTERN notQuoteFmt := PATTERN('[^"]')*;\\nPATTERN paramsFmt := OPT('?' notQuoteFmt);\\nPATTERN urlFmt := PATTERN('[^"\\\\\\\\?]')*;\\nPATTERN httpMethodFmt := 'HTTP/' numFmt '.' numFmt;\\nPATTERN requestFmt := '"' cmdFmt urlFmt paramsFmt httpMethodFmt '"';\\nPATTERN statusFmt := numFmt;\\nPATTERN bytesFmt := numFmt;\\n\\nPATTERN line := ipFmt sepFmt identFmt sepFmt authUserFmt sepFmt dateFmt sepFmt requestFmt sepFmt statusFmt sepFmt bytesFmt; \\n\\nLogLayout := RECORD \\n STRING ip := MATCHTEXT(ipFmt);\\n STRING authUser := MATCHTEXT(authuserFmt);\\n STRING date := MATCHTEXT(dateFmt);\\n STRING request := MATCHTEXT(requestFmt);\\n STRING status := MATCHTEXT(statusFmt);\\n STRING bytes := MATCHTEXT(bytesFmt);\\nEND;\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\nlogFile := PARSE(fileRaw,\\n rawTxt,\\n line,\\n LogLayout,FIRST);\\n\\nOUTPUT(logFile);\\n\\n//How to record error lines that do not match the specified pattern? \\n//In other words malformed input.\\n\\nErrorLayout := RECORD\\nSTRING t := fileRaw.rawTxt;\\nend;\\n\\ne := parse(fileRaw,\\n rawTxt,\\n line,\\n ErrorLayout,NOT MATCHED ONLY);\\n\\nOUTPUT(e);\\n
\\n\\nThe next step is to dive into the Language Reference Manual and see the chapter on Free-form text parsing.\\n\\nHope this helps to get you started!\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-01-24 21:38:12\" },\n\t{ \"post_id\": 3172, \"topic_id\": 702, \"forum_id\": 8, \"post_subject\": \"Parsing functionality\", \"username\": \"jacksock\", \"post_text\": \"Hello,\\n\\nCan somebody help me understand how to parse a block of text and extract key structures from the text? For example, I am trying to parse twitter text and identify all hash tags, re-tweets, URL, user mentions etc. \\n\\nThank You\\n\\nJS\", \"post_time\": \"2013-01-24 19:02:36\" },\n\t{ \"post_id\": 3202, \"topic_id\": 708, \"forum_id\": 8, \"post_subject\": \"Re: Writing ecl-services\", \"username\": \"nvasil\", \"post_text\": \"I think I found it\\nI downloaded the source code and I saw the example plugins/examplelib/examplelib.cpp\\n\\nwhich by the way should also be updated in the documentation\", \"post_time\": \"2013-01-27 16:48:15\" },\n\t{ \"post_id\": 3200, \"topic_id\": 708, \"forum_id\": 8, \"post_subject\": \"Writing ecl-services\", \"username\": \"nvasil\", \"post_text\": \"Two points here\\n--Looking at the documentation I see that you need to include a file hqlplugins.hpp in your code. This file seems to be GPL license which I think it is wrong, because that will require proprietary software to be come free\\n\\n--In C++ of the service I need to allocate memory. In BEGINC++ I used to do it with rtlMalloc. The hqlplugins.hpp header has something else called CTXMALLOC(ct, l)\\nnow ct is supposed to be a variable of type IPluginContext or IPluginContextEx\\nThe question is how do we get access to this variable?\\n\\nNick\", \"post_time\": \"2013-01-27 15:39:54\" },\n\t{ \"post_id\": 3225, \"topic_id\": 709, \"forum_id\": 8, \"post_subject\": \"Re: Cannot import a file containing a service\", \"username\": \"bforeman\", \"post_text\": \"Hi Nick,\\n\\nDon't you need to use the module name as well that the SERVICE is stored in?\\n\\nFor example, the HPCC String libraries are stored in a root STD folder, so:\\n\\nIMPORT STD:\\nSTD.STR.ToUppercase(myfield);
\\n\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-01-29 13:41:28\" },\n\t{ \"post_id\": 3203, \"topic_id\": 709, \"forum_id\": 8, \"post_subject\": \"Cannot import a file containing a service\", \"username\": \"nvasil\", \"post_text\": \"I am having the following problem\\n\\nI have this file pb.ecl\\nwhich exports a service\\n\\nEXPORT PB:=SERVICE\\n MyFunction() : LIBRARY='mylib', entrypoint='MyFunction';\\nEND;\\n
\\n\\nWhen I try on a different file to import it as\\n\\nIMPORT PB\\n\\nPB.MyFunction()\\n
\\n\\nIt does not recognize the function\", \"post_time\": \"2013-01-28 00:16:51\" },\n\t{ \"post_id\": 3267, \"topic_id\": 710, \"forum_id\": 8, \"post_subject\": \"Re: Problem passing linking flags with service\", \"username\": \"nvasil\", \"post_text\": \"I did try it, it doesn't work\", \"post_time\": \"2013-01-31 12:51:07\" },\n\t{ \"post_id\": 3263, \"topic_id\": 710, \"forum_id\": 8, \"post_subject\": \"Re: Problem passing linking flags with service\", \"username\": \"richardkchapman\", \"post_text\": \"I assume you mean you have declared some external functions to be called from ECL via a SERVICE definition\\n\\nYou can avoid the need to explicitly link the library in such cases by adding : LIBRARY('libname') after the SERVICE keyword.\", \"post_time\": \"2013-01-31 09:51:57\" },\n\t{ \"post_id\": 3254, \"topic_id\": 710, \"forum_id\": 8, \"post_subject\": \"Re: Problem passing linking flags with service\", \"username\": \"bforeman\", \"post_text\": \"Hi Nick,\\n\\nI'm glad that you found a workaround, but we really need to get this resolved. It is possible if you can open an issue on this on the Issue Tracker? If not I can copy your information to a new issue but if you do it you will get direct feedback from the development team.\\n\\nRegards and thanks,\\n\\nBob\", \"post_time\": \"2013-01-30 21:53:50\" },\n\t{ \"post_id\": 3204, \"topic_id\": 710, \"forum_id\": 8, \"post_subject\": \"Problem passing linking flags with service\", \"username\": \"nvasil\", \"post_text\": \"So I try to compile with eclcc code that has a service. That means I have to add a -L flag with the location of the so object of the service. It doesn't work. This is because the -Lmyservicelocation flag is not appended right before -lmyservice. In fact when I checked the default flags appended by eclcc the -L:/opt/HPCCSystems/lib is appended but not the -L:/opt/HPCCSystems/plugin.\\n\\nThis is probably why when I run the query with ecl run ...\\nI see that in the log my service which I have placed in :/opt/HPCCSystems/plugin is found but then the linker throws an error because it cannot find my service .so. If I copy now my service .so to the :/opt/HPCCSystems/lib directory the ecl run executed properly\", \"post_time\": \"2013-01-28 00:37:30\" },\n\t{ \"post_id\": 3276, \"topic_id\": 711, \"forum_id\": 8, \"post_subject\": \"Re: Integrating Fish Eye Chart with ECL\", \"username\": \"jprichard\", \"post_text\": \"Hi Neha\\n\\nI posted the code to the site so that you can get to it at http://hpccsystems.com/community/contributions/data-descriptors-and-simple-example-programs/sigmajs-gexf-example-sudoku\\n\\nThis example is simply to showcase how simple it is to integrate Sigma.js with a roxie service to visualize graphs on the flight. The neat thing is that the graph behind it can have billions of edges but you can hit the roxie query (like the Wikigraph demo) and visualize any point in the graph in a second (or less). \\n\\nIn the example:\\nThere is some ecl for the roxie service that returns gexf xml and also some html/js/css for the webserver to interact with the service.\\n\\nThe crack hpccsystems.com team will hopefully put a working version up on this site soon and I then I will do a blog post giving a more granular explanation about how it works. \\n\\nJo\", \"post_time\": \"2013-01-31 20:47:50\" },\n\t{ \"post_id\": 3238, \"topic_id\": 711, \"forum_id\": 8, \"post_subject\": \"Re: Integrating Fish Eye Chart with ECL\", \"username\": \"Neha Singh\", \"post_text\": \"Thank you\", \"post_time\": \"2013-01-30 05:17:24\" },\n\t{ \"post_id\": 3234, \"topic_id\": 711, \"forum_id\": 8, \"post_subject\": \"Re: Integrating Fish Eye Chart with ECL\", \"username\": \"jprichard\", \"post_text\": \"I will look to drop an example on this site in the next day and post a link to it. \\n\\n(will try and do the simplest example and you can take it from there)\\n\\nRegards\\n\\nJo\", \"post_time\": \"2013-01-29 21:32:58\" },\n\t{ \"post_id\": 3223, \"topic_id\": 711, \"forum_id\": 8, \"post_subject\": \"Re: Integrating Fish Eye Chart with ECL\", \"username\": \"Neha Singh\", \"post_text\": \"Thanks a lot. It would be very helpfull if i can get an example code to try out.\", \"post_time\": \"2013-01-29 11:25:44\" },\n\t{ \"post_id\": 3219, \"topic_id\": 711, \"forum_id\": 8, \"post_subject\": \"Re: Integrating Fish Eye Chart with ECL\", \"username\": \"jprichard\", \"post_text\": \"Hi Neha\\n\\nusing sigma.js is fairly straightforward and it is a pretty nifty visualization tool. Putting together a visualization using it isn't too complicated but I would probably need to walk you through the pieces.\\n\\n1. You need to figure out how to output gexf format graph xml from a service in ws_ecl. ( a little tricky, but luckily I have an example to send to you)\\n2. You need to plug that service and parameters into sigma.js and it will just work, because it is geared to consume gexf.\\n\\nIf you take a look at http://hpccsystems.com/demos/wikidemo you will see an example where I use sigma.js It is making a call through a proxy to a ws_ecl roxie service but the code stays identical. If you look at the source for the demo you can see that the js code is pretty straightforward. I think I took one of the demos and retrofitted the load gexf call to hit the roxie service.\\n\\nI will speak to the powers that be and see if we can host the code for returning gexf somewhere for you to download.\\n\\nRegards\\n\\nJo Prichard\", \"post_time\": \"2013-01-28 21:38:29\" },\n\t{ \"post_id\": 3207, \"topic_id\": 711, \"forum_id\": 8, \"post_subject\": \"Integrating Fish Eye Chart with ECL\", \"username\": \"Neha Singh\", \"post_text\": \"Can i create fish eye chart (sigma.js) using ecl.How can this be done?\", \"post_time\": \"2013-01-28 10:22:06\" },\n\t{ \"post_id\": 3213, \"topic_id\": 714, \"forum_id\": 8, \"post_subject\": \"Re: Documentation for streamed datasets\", \"username\": \"ghalliday\", \"post_text\": \"As for streamed, I'm not sure if I know of any code that currently uses it. In this case a function is prototyped as returning \\n\\nstreamed dataset(record)\\n\\nThe C++ function needs to create a class instance which implements a kind of iterator, which is then returned. The calling code requests each result record in turn.\\n\\necl/regress/stream.ecl in the HPCC source code contains an example of this and the other dataset return types. I've extracted a part of it below:\\n\\n\\nstreamed dataset(namesRecord) streamedNames(string prefix) := BEGINC++\\n\\n#define numElements(x) (sizeof(x)/sizeof(x[0]))\\n\\nclass StreamDataset : public RtlCInterface, implements IRowStream\\n{\\npublic:\\n StreamDataset(IEngineRowAllocator * _resultAllocator, unsigned _lenPrefix, const char * _prefix)\\n : resultAllocator(_resultAllocator),lenPrefix(_lenPrefix), prefix(_prefix)\\n {\\n count = 0;\\n }\\n RTLIMPLEMENT_IINTERFACE\\n\\n virtual const void *nextRow()\\n {\\n const char * const names[] = {"Gavin","John","Bart"};\\n if (count >= numElements(names))\\n return NULL;\\n\\n const char * name = names[count++];\\n size32_t lenName = strlen(name);\\n\\n RtlDynamicRowBuilder rowBuilder(resultAllocator);\\n unsigned len = sizeof(size32_t) + lenPrefix + lenName;\\n byte * row = rowBuilder.ensureCapacity(len, NULL);\\n *(size32_t *)(row) = lenPrefix + lenName;\\n memcpy(row+sizeof(size32_t), prefix, lenPrefix);\\n memcpy(row+sizeof(size32_t)+lenPrefix, name, lenName);\\n return rowBuilder.finalizeRowClear(len);\\n }\\n virtual void stop()\\n {\\n count = (unsigned)-1;\\n }\\n\\n\\nprotected:\\n Linked<IEngineRowAllocator> resultAllocator;\\n unsigned count;\\n unsigned lenPrefix;\\n const char * prefix;\\n};\\n\\n#body\\n return new StreamDataset(_resultAllocator, lenPrefix, prefix);\\nENDC++;\\n
\\n\\nNote, that example uses classes defined in other files in directory /var/HPCCSystems/componentfiles/cl/include\\n\\n\\nand return that iterator. there are any current\", \"post_time\": \"2013-01-28 13:55:57\" },\n\t{ \"post_id\": 3212, \"topic_id\": 714, \"forum_id\": 8, \"post_subject\": \"Re: Documentation for streamed datasets\", \"username\": \"ghalliday\", \"post_text\": \"There are two different things - link counted datasets, and streamed datasets.\\n\\nLink counted datasets are now the default representation internally in the engines. The interfaces are in eclagent.hpp which would need to be included by any plugin that started creating them.\\n\\nThe normal call sequence for variable length rows is\\n\\n\\nsize32_t initialSize;\\nrow = allocator->createRow(initialSize);\\n\\nsize32_t updatedSize;\\nrow = allocator->resizeRow(<size-needed>, row, updatedSize);\\n\\nreturn allocator->finilizeRow(<final-size>, row, updatedSize);\\n
\\n\\nThe following is the code from the previous example which has been changed to return a link counted dataset:\\n\\n\\nRec1 := RECORD\\ninteger x;\\nEND;\\n\\nRec2 := RECORD\\nSTRING y;\\nEND;\\n\\nRec := RECORD\\nDATASET(Rec1) x_{EMBEDDED};\\nDATASET(Rec2) y_{EMBEDDED};\\nEND;\\n\\n_linkcounted_ DATASET(Rec) MyFunc() := BEGINC++\\n\\n unsigned size = sizeof(size32_t) + 5 * sizeof(__int64) +\\n sizeof(size32_t) + (sizeof(size32_t) + 5 + sizeof(size32_t) + 3);\\n\\n size32_t actualSize;\\n byte * row = (byte *)_resultAllocator->createRow(actualSize);\\n row = (byte *)_resultAllocator->resizeRow(size, row, actualSize);\\n byte * cur = (byte *)row;\\n\\n //Size of the first dataset is 5 * the size of the elements\\n *(size32_t *)cur = 5 * sizeof(__int64);\\n cur += sizeof(size32_t);\\n //write 1,2,3,4,5\\n for (int i=1; i <=5 ; i++)\\n {\\n *(__int64 *)cur = i*100;\\n cur += sizeof(__int64);\\n }\\n\\n //A possibly easier way of doing it...\\n size32_t * leny = (size32_t *)cur;\\n cur += sizeof(size32_t);\\n\\n byte * start = cur;\\n //Write "Gavin"\\n *(size32_t *)cur = 5;\\n cur += sizeof(size32_t);\\n memcpy(cur, "Gavin", 5); \\n cur += 5;\\n \\n //Next row - "Jim"\\n *(size32_t *)cur = 3;\\n cur += sizeof(size32_t);\\n memcpy(cur, "Jim", 3);\\n cur += 3;\\n \\n //Now back patch the length\\n *leny = (cur - start);\\n\\n __countResult = 1;\\n __result = _resultAllocator->createRowset(1);\\n __result[0] = (byte *)_resultAllocator->finalizeRow(size, row, actualSize);\\n\\nENDC++;\\n\\noutput(MyFunc());\\n
\\n\\nNote the child datasets are still marked as EMBEDDED. This is because the called function doesn't currently have access to any allocators that could be used for those children. (Again another issue is open for that.)\", \"post_time\": \"2013-01-28 13:46:41\" },\n\t{ \"post_id\": 3211, \"topic_id\": 714, \"forum_id\": 8, \"post_subject\": \"Documentation for streamed datasets\", \"username\": \"nvasil\", \"post_text\": \"Hi\\n\\nI know there is a way to create a streaming dataset. I have seen somewhere the keyword streamed, but I can't find documentation. I know that in C++ environment this is done with the keyword _LINKCOUNTED_\\n\\nNow here is the thing I am writing a C++ function that returns a bunch of numerical values as strings. There is a reason why I cannot export them as numerical values so bare with me. The problem is that if I use rtlmalloc I need to go through all the values covert them to strings get their sizes and then allocate the memory and eventually do another scan to copy them to the output. \\nI have seen here and there a way to dynamically allocate the elements one by one with \\n_resultAllocator->createRowset(cnt);\\n_resultAllocator->createRow(allocSize);\\n_resultAllocator->finalizeRow(allocSize, row, allocSize);\\n\\nI am not sure how they work. Also in the hqlplugin.hpp I don't seem to find these functions.\\n\\nAny help\\n\\nNick\", \"post_time\": \"2013-01-28 13:20:12\" },\n\t{ \"post_id\": 3229, \"topic_id\": 717, \"forum_id\": 8, \"post_subject\": \"Re: HPCC as a back-end???\", \"username\": \"kaliyugantagonist\", \"post_text\": \"Hi Bob,\\n\\nI will check out the superfiles approach.\\n\\nActually, I had thought about the HPCC-JDBC driver and working on it but I'm stuck - would be glad if you help http://hpccsystems.com/bb/viewtopic.php?f=34&t=719&sid=d743978e56d5a8ed4482dc2adee50081\\n\\nThanks and regards !\", \"post_time\": \"2013-01-29 14:12:09\" },\n\t{ \"post_id\": 3228, \"topic_id\": 717, \"forum_id\": 8, \"post_subject\": \"Re: HPCC as a back-end???\", \"username\": \"bforeman\", \"post_text\": \"How do insert my data directly into HPCC - I don't have any kind of file with me(csv, flat, xml etc.)?
\\n\\nAll data moved directly into HPCC need to be sprayed, and a file needs to be on a landing zone. That file can be exported from MySQL, or perhaps you can use the JDBC driver. I will discuss your option with my colleagues.\\n\\n\\nThe BLOBs columns in the tables are the result of certain external operations which finish after the initial insert i.e the BLOBs are later 'updated' into the tables - how do I handle such a scenario in HPCC(ECL approach?)
\\n\\nWell, of course, updates are possible on HPCC Perhaps you need to explore the superfiles technique, where files that are alike can be updated when needed. A Superfile is a single logical file that contains many "sub-files" of the exact same layout, and using the superfiles libraries you can add and update as needed.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-01-29 13:58:07\" },\n\t{ \"post_id\": 3226, \"topic_id\": 717, \"forum_id\": 8, \"post_subject\": \"Re: HPCC as a back-end???\", \"username\": \"kaliyugantagonist\", \"post_text\": \"Hi bforeman,\\n\\nI had gone through the PDF that you have suggested.\\n\\nI'm currently stuck with the two questions that I have posted.\\n\\nThanks and regards !\", \"post_time\": \"2013-01-29 13:41:42\" },\n\t{ \"post_id\": 3224, \"topic_id\": 717, \"forum_id\": 8, \"post_subject\": \"Re: HPCC as a back-end???\", \"username\": \"bforeman\", \"post_text\": \"Hi,\\n\\nFirst, let me clarify the term "CSV" - You would normally think of CSV as an Excel file, but it can really be any variable length text, you can even treat a whole document as a single record, just specify no seperators, no quotes and go.\\n\\nIf I remember correctly, there is a great article in the Programmer's Guide regarding processing BLOBs in HPCC. Check it out:\\n\\nhttp://cdn.hpccsystems.com/releases/CE-Candidate-3.10.0/docs/ECLProgrammersGuide-3.10.0-1.pdf\\n\\nSee Page 36.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-01-29 13:32:02\" },\n\t{ \"post_id\": 3221, \"topic_id\": 717, \"forum_id\": 8, \"post_subject\": \"HPCC as a back-end???\", \"username\": \"kaliyugantagonist\", \"post_text\": \"I'm a HPCC beginner.\\n\\nI sprayed multiple csv files using ECL and DFU and also have fetched the data using ECL record structure. I read that XML files can also be the input and that there is a JDBC driver too which supports read-only queries to the HPCC data.\\n\\nIn my Java-based web application, I'm currently using a MySQL database. It consists of several tables which have BLOB columns but the data arriving is huge and we have decided to switch to a HPCC for quick storage and retrieval.\\n\\nMy questions are as follows:\\n\\n
How do insert my data directly into HPCC - I don't have any kind of file with me(csv, flat, xml etc.)?
\\nThe BLOBs columns in the tables are the result of certain external operations which finish after the initial insert i.e the BLOBs are later 'updated' into the tables - how do I handle such a scenario in HPCC(ECL approach?)
\", \"post_time\": \"2013-01-29 04:16:07\" },\n\t{ \"post_id\": 3257, \"topic_id\": 723, \"forum_id\": 8, \"post_subject\": \"Re: Incorporating external programs\", \"username\": \"bforeman\", \"post_text\": \"Duplicate post. Please see:\\n\\nhttp://hpccsystems.com/bb/viewtopic.php?f=10&t=722&sid=6c752b3c8f185acb8ff910bdb07a5c33\", \"post_time\": \"2013-01-30 22:06:03\" },\n\t{ \"post_id\": 3237, \"topic_id\": 723, \"forum_id\": 8, \"post_subject\": \"Incorporating external programs\", \"username\": \"kaliyugantagonist\", \"post_text\": \"Hi,\\n\\nWe have several algorithms for sentiment calculations that access multiple tables which have large no. of records. The processing time is high and we are looking to HPCC as a solution.\\n\\nThe algorithms are tried and tested but written in the Java programming language. Now, even if we import our database into HPCC and use the HPCC-JDBC driver, I don't think it will reduce the processing time - the algorithms will still run in a JVM on the machine where my application is deployed and we won't be able run these algorithms on HPCC nodes in-parallel.\\n\\nIn the 'Introduction' documentation of HPCC, I read the following :\\n\\nECL is compiled into optimized C++ code for execution on the HPCC system platform, and can be used for complex data processing and analysis jobs on a Thor cluster or for comprehensive query and report processing on a Roxie cluster.ECL allows inline C++ functions to be incorporated into ECL programs, and external programs in other languages can be incorporated and parallelized through a PIPE facility\\n\\nHow do we push our java code into HPCC for parallel processing? Is the mentioned 'PIPE' facility relevant and helpful in our scenario? Or is it that we will have to re-write our algorithms in ECL?\", \"post_time\": \"2013-01-30 03:46:23\" },\n\t{ \"post_id\": 3527, \"topic_id\": 727, \"forum_id\": 8, \"post_subject\": \"Re: ESP web service through ECL\", \"username\": \"anthony.fishbeck\", \"post_text\": \"Hi Bo,\\n\\nMost of the new command line tools can be found under:\\n\\nHPCC-Platform/ecl/eclcmd and it's sub directories.\\n\\nWhere web service calls are made you'll see code like:\\n\\n\\n//build webservice request\\nOwned<IClientWUDeployWorkunitRequest> req = client->createWUDeployWorkunitRequest();\\nreq->setFileName(filename);\\n...fill in more request values...\\n\\n//call webservice\\nOwned<IClientWUDeployWorkunitResponse> resp = client->WUDeployWorkunit(req);\\n\\n//process response\\nconst char *workunitId = resp->getWorkunit().getWuid();\\n
\\n\\nRegards,\\nTony\", \"post_time\": \"2013-02-21 22:43:56\" },\n\t{ \"post_id\": 3514, \"topic_id\": 727, \"forum_id\": 8, \"post_subject\": \"Re: ESP web service through ECL\", \"username\": \"buptkang\", \"post_text\": \"Hi Tony,\\n\\nIn the previous message, you are saying that:\\n\\nLooking at our command line tool source code may be one good way of seeing how we call some of these services ourselves.\\n\\nCould you point me those entry file names, so I can access them?\\n\\nThanks\\nBo\", \"post_time\": \"2013-02-20 23:09:58\" },\n\t{ \"post_id\": 3322, \"topic_id\": 727, \"forum_id\": 8, \"post_subject\": \"Re: ESP web service through ECL\", \"username\": \"anthony.fishbeck\", \"post_text\": \"Or if you prefer JSON, go to http://ip:8010/WsWorkunits/WUInfo.json? ... 204-122853 \", \"post_time\": \"2013-02-04 17:41:13\" },\n\t{ \"post_id\": 3321, \"topic_id\": 727, \"forum_id\": 8, \"post_subject\": \"Re: ESP web service through ECL\", \"username\": \"anthony.fishbeck\", \"post_text\": \"The .ecm extension is just historical.\\n\\nESP - Enterprise Service Platform\\nESDL - Enterprise Services Definition Language\\nECM - Enterprise component module\\n\\nFirst of all our web pages are implemented on top of our abstract services. We use xslt to generate the basic page, but more and more we try to use Ajax or the JSON equivalent calls into our services to implement our UI.\\n\\nThere is a new EclWatch UI in the works so you would probably want input from someone else how to best extend that.\\n\\nBut from the service stand point, your first decision would be whether this is new functionality or should be part of an existing service like WsWorkunits. Is it new options on an existing operation, or a brand new operation.\\n\\nYou can look at the ESDL to see how to define services and operations. And if xslt is needed for the EclWatch page generated by that operation you can define that in ESDL as well.\\n\\nI would play with some existing operations first until you get a feel for it.\\n\\nThere are a lot of tricks that can be used to test out ESP services, but you might start by going to http://IP:8010/WsWorkunits/WUInfo?form entering a valid Workunit ID and hitting submit. You'll see it actually displays the results after calling the XSLT specified in the ESDL definition for WUInfo. Then go to http://IP:8010/WsWorkunits/WUInfo.xml?W ... 204-122853 (replacing the WUID) and you can see the unprocessed xml (without it calling the XSLT).\", \"post_time\": \"2013-02-04 17:37:27\" },\n\t{ \"post_id\": 3320, \"topic_id\": 727, \"forum_id\": 8, \"post_subject\": \"Re: ESP web service through ECL\", \"username\": \"buptkang\", \"post_text\": \"Hey Tony,\\n\\nYour explanation is so worthy for me, Super nice, thank you. \\n\\nI have several questions based on your explain;\\n\\n1. why do you name as the ".ecm" file format?\\n2. Based on your explained work flow, if I want to create a web service and add some responsive web page, then how could I do it? For example, I want to add a new tab with 2 functional sub tab. Each tab corresponds to a new response web page. \\n\\nThanks a lot.\\nBo\", \"post_time\": \"2013-02-04 17:18:24\" },\n\t{ \"post_id\": 3319, \"topic_id\": 727, \"forum_id\": 8, \"post_subject\": \"Re: ESP web service through ECL\", \"username\": \"anthony.fishbeck\", \"post_text\": \"Hi Bo,\\n\\nWe don't have much external documentation on our source code.\\n\\nBut I can give you some pointers.\\n\\n1. The place you probably want to start looking is under esp/services. Each service you are interested in should have a directory under that location.\\n\\n2. ESP has its own interface definition language. ESDL. You will find the\\nESDL definitions for the services in esp/scm with a file extension of ".ecm".\\nFor the most part that will match what you see in the WSDL / XSDs.\\n\\n3. ESP services have 2 main classes. A service class and a binding class. For typical services each operation will have an onOPERATIONNAME() method in the service class that is the entry point for the implementation of that operation.\\n\\nSo for example if you were looking for the implementation of the WsWorkunits WURun operation you would go to the WsWorkunits service class (CWsWorkunitsEx) and look for the method named onWURun().\\n\\nOperation entry point methods have parameters of a context, a request and a response. The request and response will match what you find in the ESDL (.ecm) definition.\\n\\nHope that gets you started. Let me know if you have any questions.\\n\\nRegards,\\nTony\", \"post_time\": \"2013-02-04 16:59:05\" },\n\t{ \"post_id\": 3305, \"topic_id\": 727, \"forum_id\": 8, \"post_subject\": \"Re: ESP web service through ECL\", \"username\": \"buptkang\", \"post_text\": \"Hi Tony,\\n\\nThanks a lot for your tips. \\n\\nCurrently I am reading the source code of ESP module in HPCC. Do we have any documentation on those source dev?\\n\\nThanks\\nBo\", \"post_time\": \"2013-02-02 20:10:54\" },\n\t{ \"post_id\": 3279, \"topic_id\": 727, \"forum_id\": 8, \"post_subject\": \"Re: ESP web service through ECL\", \"username\": \"anthony.fishbeck\", \"post_text\": \"Ah right. That's the opposite of what I tend to call ECL Web Services.
\\n\\nWe have a whole collection of operations oriented web services in the HPCC. In fact pretty much anything you can do from EclWatch, the command line, or ECL IDE can be done via a web service.\\n\\nThe main problem is the lack of documentation since these have thus far been used mostly for our own development, and perhaps you will find they are a bit finicky toward being used in a specific way.\\n\\nAs a starter all of the WSDLS are available directly from your ESP, as are many test forms, that will allow you to play with the operations without doing development work.\\n\\nEclWatch is composed of many services, and each service supports many operations.\\n\\nFor example WsWorkunits contains all of the operations involved in creating, running and operating workunits. \\n\\nYou can see a list of the the operations with links to test forms and WSDLs by browsing to http://EclWatchIp:8010/WsWorkunits/?list_forms\\n\\nThat is true of each webservice by replacing WsWorkunits with the name of the service.\\n\\nWithought going into detail here and now, here are some of the services you should be able to browse / access: \\n\\nWsSMC - cluster activity\\nWsWorkunits - create, compile, run, publish workunits\\nWsTopology - system topology and configuration\\nWsDfu - file information\\nWsDfuXRef - file system maintenance\\nEclDirect - simple way to run some ECL\\nFileSpray - copy files to and from the HPCC\\nWsPackageProcess - manipulate query file references (via file packages)\\nws_machine - system monitoring\\nws_account - security accounts if enabled\\nws_access - security access if enabled\\n\\nLooking at our command line tool source code may be one good way of seeing how we call some of these services ourselves.\", \"post_time\": \"2013-01-31 22:54:27\" },\n\t{ \"post_id\": 3278, \"topic_id\": 727, \"forum_id\": 8, \"post_subject\": \"Re: ESP web service through ECL\", \"username\": \"buptkang\", \"post_text\": \"Hi Tony,\\n\\nThanks for your comments. \\n\\nBut what I am really want to do is to transfer data from my developed client side toward HPCC system using soap web service if possible. I do not intend to retrieve query result from Roxie but instead try to pull ECL script and dependency ECL scripts into HPCC through SOAPCall, then retrieve result from Thor cluster. \\n\\nIf I want to do it, do you know what I should do? Currently I am trying to add code inside of HPCC ECLWatcher source code in order to add my own components. But finally I still want to seperate my web components aside of the HPCC, and try to push and pull data through HPCC web service.\\n\\nIf any comment, that will be appreciated. \\n\\nThanks\\nBo\", \"post_time\": \"2013-01-31 22:26:21\" },\n\t{ \"post_id\": 3277, \"topic_id\": 727, \"forum_id\": 8, \"post_subject\": \"Re: ESP web service through ECL\", \"username\": \"anthony.fishbeck\", \"post_text\": \"Hi Bo,\\n\\nSeveral of our documents refer to the process involved to create a web service via ECL. But I'm not sure if there is anything that puts all the information together in one place.\\n\\nBasically the act of "publishing a query" among other things makes that query accessible as a web service. If you follow the data tutorial available on this site, once you pass the stage of publishing the query you will have a service that is accessible via quite a few web service standards.\\n\\nAny runable ECL workunit can be published, but generally the ones that are useful are parameterized first.\\nWeb service inputs are declared in your ECL code using the "STORED" keyword.\\nDirect outputs (as opposed to creating a file) become web service output.\\n\\nThere is an ESP service called WsECL (i.e. ECL WebServices) that typically runs on port 8002 on the same server as EclWatch. Browsing to http://EclWatchIP:8002 will usually allow you to browse all of your published queries. Access them as Forms. Get the WSDL documents and XML schemas for them. \\n\\nThere you can also submit SOAP, HTTP GET URLs, FORM POSTs, and JSON requests to execute the published queries.\\n\\nYou can also have queries published for any cluster type but roxie clusters are particularly designed for many low latency high throughput queries running simultaneously.\\n\\nRegards,\\nTony\", \"post_time\": \"2013-01-31 22:13:05\" },\n\t{ \"post_id\": 3245, \"topic_id\": 727, \"forum_id\": 8, \"post_subject\": \"ESP web service through ECL\", \"username\": \"buptkang\", \"post_text\": \"Hello there,\\n\\nCould somebody do me a favor to explain the following concept:\\n\\nhttp://hpccsystems.com/products-and-services/products/modules/enterprise-service-platform-esp\\n\\nIn the above link, it mentions that ESP also allows custom Web services to be written using the ECL language itself.. Are there some demos on it or which parts of source code tackle with that?\\n\\nThanks a lot\\nBo\", \"post_time\": \"2013-01-30 15:05:52\" },\n\t{ \"post_id\": 3314, \"topic_id\": 730, \"forum_id\": 8, \"post_subject\": \"Re: ECL - some difficulties with the basics\", \"username\": \"kaliyugantagonist\", \"post_text\": \"Hi Richard,\\n\\nThanks for the inputs!\\n\\nYou may close this question.\\n\\nThanks and regards !\", \"post_time\": \"2013-02-04 11:46:35\" },\n\t{ \"post_id\": 3296, \"topic_id\": 730, \"forum_id\": 8, \"post_subject\": \"Re: ECL - some difficulties with the basics\", \"username\": \"rtaylor\", \"post_text\": \"
At T1, push one csv per module onto the dropzone\\nAt T2, make a call to the HPCC cluster master to execute a command that sprays the above csv files\\nAt T3, via the HPCC-JDBC driver, query the super-file for the data
Since you are "pushing" this whole process from your external data source, your code become pretty simple:IMPORT STD;\\nEXPORT SprayAndAdd(STRING filename) := SEQUENTIAL(\\n\\n/*Spray the csv file from the dropzone*/\\nSTD.File.SprayVariable('10.101.2.171',\\n\\t'/var/lib/HPCCSystems/mydropzone/clientwebcrawldata/'+ filename, \\n\\t,';',,, \\n\\t'mythor','~sapphire::'+filename[..LENGTH(filename)-4],,'http://10.101.2.170:8010/FileSpray', \\n\\t,TRUE,TRUE,FALSE),\\n\\n/*Add logical file*/\\nStd.File.StartSuperFileTransaction(),\\nStd.File.AddSuperFile(ClientWebCrawlDataBaseFile,\\n\\t'~sapphire::superfile::parts::'+filename[..LENGTH(filename)-4]),\\nStd.File.FinishSuperFileTransaction()\\n);
Note that I collapsed your process into a single SEQUENTIAL action defined to take a filename as its one parameter. I also changed to SprayVariable instead of fSprayVariable. Passing in the filename allows your code to be generic and puts the file naming job back onto your source.\\n\\nCalling this code from the command line would be something like this:eclplus owner=root password=newuser_123 cluster=thor server=10.101.2.170 ecl=SprayAndAdd('clientwebcrawldata_293705.csv')
You will probably have to play with the "ecl=" parameter a bit to get the directory structure right so the compiler can find the code, but that's essentially all you need.\\n\\nOf course, you could add a second STRING parameter to SparyAndAdd to make it add the filename to whichever superfile you choose to call it for, like this:IMPORT STD;\\nEXPORT SprayAndAdd(STRING filename,STRING SFfilename) := SEQUENTIAL(\\n\\n/*Spray the csv file from the dropzone*/\\nSTD.File.SprayVariable('10.101.2.171',\\n\\t'/var/lib/HPCCSystems/mydropzone/clientwebcrawldata/'+ filename, \\n\\t,';',,, \\n\\t'mythor','~sapphire::'+filename[..LENGTH(filename)-4],,'http://10.101.2.170:8010/FileSpray', \\n\\t,TRUE,TRUE,FALSE),\\n\\n/*Add logical file*/\\nStd.File.StartSuperFileTransaction(),\\nStd.File.AddSuperFile(SFfilename,\\n\\t'~sapphire::superfile::parts::'+filename[..LENGTH(filename)-4]),\\nStd.File.FinishSuperFileTransaction()\\n);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-02-01 15:25:31\" },\n\t{ \"post_id\": 3280, \"topic_id\": 730, \"forum_id\": 8, \"post_subject\": \"Re: ECL - some difficulties with the basics\", \"username\": \"kaliyugantagonist\", \"post_text\": \"Hi Richard,\\n\\nThe source of the csv files is a remote web application which will generate csv every n minutes. To avoid any errors, we have given the control to push data onto HPCC to this web application - to summarize, the web application does the following:\\nAt T1, push one csv per module onto the dropzone
\\nAt T2, make a call to the HPCC cluster master to execute a command that sprays the above csv files
\\nAt T3, via the HPCC-JDBC driver, query the super-file for the data
\\n\\nThe ECL code to spray csv :\\n\\n/*Spray the csv file from the dropzone*/\\nSprayCSVFile_withContent :=STD.File.fSprayVariable('10.101.2.171','/var/lib/HPCCSystems/mydropzone/clientwebcrawldata/clientwebcrawldata_293705.csv',\\n,';',,, \\n'mythor','~sapphire::ClientWebCrawlData_1',,'http://10.101.2.170:8010/FileSpray', \\n,TRUE,TRUE,FALSE);\\nSprayCSVFile_withContent;\\n
\\n\\nAdd to super-file is :\\n\\n/*Add logical file*/\\naddSubFile := SEQUENTIAL(/*SprayCSVFile_withContent,*/\\nStd.File.StartSuperFileTransaction(),\\n//Std.File.AddSuperFile(ClientWebCrawlDataBaseFile,'~sapphire::superfile::parts::clientwebcrawldata_293705'),\\n//Std.File.AddSuperFile(ClientWebCrawlDataBaseFile,'~sapphire::superfile::parts::clientwebcrawldata_293798'),\\nStd.File.AddSuperFile(ClientWebCrawlDataBaseFile,'~sapphire::superfile::parts::clientwebcrawldata_294895'),\\nStd.File.FinishSuperFileTransaction()\\n);\\naddSubFile;\\n
\\n\\nCurrently, we have executed these codes via ECL IDE manually, thus, it was possible to give different logical names to but in real-time scenario, irrespective of whether the web application overwrites the same csv or pushes csv with unique names, we have to provide unique logical names to each sprayed csv - how do we achieve that? We couldn't find any eclplus command etc. to 'parameterize' the spraying code :\\n\\nhttp://hpccsystems.com/bb/viewtopic.php?f=9&t=725&sid=a6d75b0b73b88225ef960c7ae7020e5f\\n\\nI hope I have highlighted our issues correctly.\\n\\nThanks and regards !\", \"post_time\": \"2013-02-01 04:03:14\" },\n\t{ \"post_id\": 3271, \"topic_id\": 730, \"forum_id\": 8, \"post_subject\": \"Re: ECL - some difficulties with the basics\", \"username\": \"rtaylor\", \"post_text\": \"Regarding your code, it should simply be:\\nif(NOT Std.File.SuperFileExists('~.::ClientWebCrawlDataBaseFile'),\\n Std.File.CreateSuperFile(ClientWebCrawlDataBaseFile));
\\nIf you look up SKIP in the Language Reference you'll find that it is only used inside a TRANSFORM structure, and SEQUENTIAL is not needed if there's only one action to perform.\\nI have created 10 super-files pertaining to 10 different business modules. Every 10 min. or so, 1 one csv per module needs to be added to the respective super-file, thus, creating a need of giving a unique logical name to the csv(that will be arriving every 10 min.). I was planning to use module-name_timestamp for this purposes but couldn't find a function in ECL for the same. Please note that RANDOM() and GetUniqueInteger() may return the same value when invoked multiple times(please correct me if I'm wrong)
Where are the new CSV files coming from? I would assume they would be named at the source, eliminating the need for you to create names for them. All you would need to do would be to detect the new file and add its name to the appropriate superfile.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-01-31 15:01:33\" },\n\t{ \"post_id\": 3265, \"topic_id\": 730, \"forum_id\": 8, \"post_subject\": \"ECL - some difficulties with the basics\", \"username\": \"kaliyugantagonist\", \"post_text\": \"Hello,\\n\\nI have created 10 super-files pertaining to 10 different business modules. Every 10 min. or so, 1 one csv per module needs to be added to the respective super-file, thus, creating a need of giving a unique logical name to the csv(that will be arriving every 10 min.). I was planning to use module-name_timestamp for this purposes but couldn't find a function in ECL for the same. Please note that RANDOM() and GetUniqueInteger() may return the same value when invoked multiple times(please correct me if I'm wrong)\\n
\\n\\nThis may sound silly but I'm really stuck with this - a simple if-else statement! I want to check whether a superfile exists - if it does, don't do anything but if it doesn't, create one. I tried multiple syntax like :\\n\\n
\\n\\nbut got syntax errors !\\n\\nPlease guide me.\\n\\nThanks and regards !\", \"post_time\": \"2013-01-31 12:06:27\" },\n\t{ \"post_id\": 3365, \"topic_id\": 735, \"forum_id\": 8, \"post_subject\": \"Re: ECL - if-else clarity\", \"username\": \"rtaylor\", \"post_text\": \"if(Std.File.SuperFileExists('~.::ClientWebCrawlDataBaseFile'),SKIP,SEQUENTIAL(\\n\\t\\tStd.File.CreateSuperFile(ClientWebCrawlDataBaseFile)));\\n
\\n\\nif(!Std.File.SuperFileExists('~.::ClientWebCrawlDataBaseFile')){\\nSEQUENTIAL(Std.File.CreateSuperFile(ClientWebCrawlDataBaseFile));\\n}\\nRollAdd := SEQUENTIAL(\\n OUTPUT(BaseSuperfile,,'consolidated_ clientwebcrawldata',OVERWRITE),\\n\\t\\t\\tStd.File.StartSuperFileTransaction(),\\n\\t\\t\\tStd.File.ClearSuperFile(BaseSuperfile), \\n\\t\\t\\tStd.File.AddSuperFile(BaseSuperfile, NewSubFile),\\n\\t\\t\\tStd.File.FinishSuperFileTransaction());\\nJustAdd := SEQUENTIAL(\\n\\t\\t\\tStd.File.StartSuperFileTransaction(),\\n\\t\\t\\tStd.File.AddSuperFile(BaseSuperfile, NewSubFile),\\n\\t\\t\\tStd.File.FinishSuperFileTransaction());\\n\\nIF(STD.file.GetSuperFileSubCount(BaseSuperfile) = 99),RollAdd,JustAdd);
BTW, the concept of "max 100 subfiles in a superfile" is not a hard and fast rule, but only a guideline. It's just a suggestion that you should look at your volume of data coming in and decide what period would be most appropriate for rolling all your data into a new base file.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-02-06 18:24:54\" },\n\t{ \"post_id\": 3361, \"topic_id\": 735, \"forum_id\": 8, \"post_subject\": \"Re: ECL - if-else clarity\", \"username\": \"kaliyugantagonist\", \"post_text\": \"Hi Richard,\\n\\nThanks for your inputs.\\n\\nCan you please have a look at the expected if-else code snippet pertaining to checking if the max. no of sub-files has been reached for a super-file in my original post(where IF(true) then a series of actions, ELSE some other actions)?\\n\\nThanks and regards !\", \"post_time\": \"2013-02-06 17:18:55\" },\n\t{ \"post_id\": 3357, \"topic_id\": 735, \"forum_id\": 8, \"post_subject\": \"Re: ECL - if-else clarity\", \"username\": \"rtaylor\", \"post_text\": \"kaliyugantagonist To summarize, I want some guidelines/examples etc. to handle ACTIONS, FUNCTIONs with complex return types within an IF statement.
For actions, you simply use the SEQUENTIAL and/or PARALLEL actions to reduce the IF to a single true or false action to perform, like this:ActTrue := SEQUENTIAL(OUTPUT('Step 1'),OUTPUT('Step 2'),OUTPUT('Step 3'));\\nActFalse := PARALLEL(OUTPUT('False Value 1'),OUTPUT('False Value 2'));\\n\\nIF(x = 1,ActTrue,ActFalse);\\nIF(y = 1,ActTrue,ActFalse);
You will note that most superfile example code is contained within SEQUENTIAL actions, for the reasons expressed in the Programmer's Guide articles.\\n\\nAnd this example is based on your pseudo codeTrueFlag1 := TRUE;\\nTrueFlag2 := FALSE;\\n\\nds := DATASET([{'W',1},{'X',2},{'Y',3},{'Z',4}],{STRING1 Ltr, UNSIGNED1 Nbr});\\n\\nRecStruct := MODULE\\n EXPORT Layout1 := RECORD\\n\\t ds.Ltr;\\n\\t ds.Nbr;\\n\\tEND;\\n EXPORT Layout2 := RECORD\\n\\t ds.Nbr;\\n\\t ds.Ltr;\\n\\tEND;\\nEND;\\n\\nIF(TrueFlag1,OUTPUT(ds,RecStruct.Layout1),OUTPUT(ds,RecStruct.Layout2)); \\nIF(TrueFlag2,OUTPUT(ds,RecStruct.Layout1),OUTPUT(ds,RecStruct.Layout2));
If these examples don't fully cover what you're looking for, please expand on what you mean by "FUNCTIONs with complex return types." \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-02-06 14:48:21\" },\n\t{ \"post_id\": 3352, \"topic_id\": 735, \"forum_id\": 8, \"post_subject\": \"Re: ECL - if-else clarity\", \"username\": \"kaliyugantagonist\", \"post_text\": \"Hi Richard,\\n\\nI had seen the videos and read the documentation too before I posted the question - the action part of the IF statement is where my both concept and syntax are not clear. I will quote the specific examples:\\nIn my original question, I'm want to execute several steps if the super-file has more than 99 sub-files - how and where do I fit these actions in an IF statement?
\\nSuppose, I have a file('RECSTRUCT' MODULE), say recordstruct.ecl where I have kept all the RECORD definitions and in another file, say moduleops.ecl, I am want to do something like this(pseudo-code) :\\n
\\n\\n\\nmoduleName := ...;\\nmoduleRecStruct := ...;\\n\\nIF(moduleName is equal to 'clientwebcrawldata'){\\n moduleRecStruct := RECSTRUCT.Layout_CLIENTWEBCRAWLDATA;\\n}\\n.\\n.\\n/*use moduleRecStruct in OUTPUT etc.*/\\n
Suppose I create multiple functions in an ECL file and want to call it from trueresult/falseresult of an IF statement - again there will be restrictions on the return type of the functions. How do I proceed?
\\n\\nTo summarize, I want some guidelines/examples etc. to handle ACTIONS, FUNCTIONs with complex return types within an IF statement.\\n\\nThanks and regards !\", \"post_time\": \"2013-02-06 11:44:10\" },\n\t{ \"post_id\": 3323, \"topic_id\": 735, \"forum_id\": 8, \"post_subject\": \"Re: ECL - if-else clarity\", \"username\": \"rtaylor\", \"post_text\": \"Your problem is simple syntax. There is no keyword "ELSE" in ECL and the true and false return values are never contained within curly braces.\\n\\nHere is an example of the correct syntax for the IF function (using actions as the true and false results):x := 1;\\ny := 2;\\n\\nActTrue := OUTPUT('True Value');\\nActFalse := OUTPUT('False Value');\\n\\nIF(x = 1,ActTrue,ActFalse);\\nIF(y = 1,ActTrue,ActFalse);
If you haven't yet, I would suggest that you might take a look at these videos explaining ECL's basic concepts and syntax: \\nhttp://hpccsystems.com/community/training-videos/ecl-concepts-1\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-02-04 19:16:36\" },\n\t{ \"post_id\": 3313, \"topic_id\": 735, \"forum_id\": 8, \"post_subject\": \"ECL - if-else clarity\", \"username\": \"kaliyugantagonist\", \"post_text\": \"Hi,\\n\\nAs per the ECL Programmer's guide, max. of 100 sub-files can be added to a super-file, hence, some kind of a rolling is required. Accordingly, before adding a sub-file, we need to check if that threshold is reached. The sample code is :\\n\\n/*ECL file used to spray the csv files*/\\nif(BaseSuperfile. GetSuperFileSubCount(‘clientwebcrawldata’) == 99){\\n\\ttempBaseSuperfile := OUTPUT(BaseSuperfile,,’consolidated_ clientwebcrawldata’,OVERWRITE);\\n\\tSEQUENTIAL(\\n\\t\\ttempBaseSuperfile,\\nStd.File.StartSuperFileTransaction(),\\nStd.File.ClearSuperFile(BaseSuperfile), \\nStd.File.AddSuperFile($.DeclareData.BaseFile, tempBaseSuperfile),Std.File.FinishSuperFileTransaction(),\\nSTD.File.DeleteLogicalFile(tempBaseSuperfile)\\n);\\n}else{\\n/*Add subfile normally to the superfile*/\\n}
\\n\\nNow, as per the IF statement doc. :\\n\\nIF(expression, trueresult [, falseresult ])\\n\\nIFF(expression, trueresult [, falseresult ])\\n\\nexpression A conditional expression. \\ntrueresult The result to return when the expression is true. This may be a single value, a SET of values, a recordset, or an action to perform. \\nfalseresult The result to return when the expression is false. This may be a single value, a SET of values, a recordset, or an action to perform. This may be omitted only if the result is an action. \\nReturn: IF returns a single value, set, recordset, or action.
\\n\\nWhile it is possible to call an action, we are not able to achieve the behavior mentioned in the i.e if(true){some action}else{some action}
\\n\\nThanks and regards !\", \"post_time\": \"2013-02-04 11:44:00\" },\n\t{ \"post_id\": 3382, \"topic_id\": 736, \"forum_id\": 8, \"post_subject\": \"Re: Pausing workunits\", \"username\": \"tdelbecque\", \"post_text\": \"I realize now that my description was maybe confusing. Actually the program was building an attribute that was to be persisted, so there is no issue with the PERSIST mechanism. \\nI think that there was no problem indeed, but just the fact that I did not know that 'Pause now' was making the current graph to abort rather than just making it to freeze. \\n\\nThanks again.\", \"post_time\": \"2013-02-07 16:23:47\" },\n\t{ \"post_id\": 3381, \"topic_id\": 736, \"forum_id\": 8, \"post_subject\": \"Re: Pausing workunits\", \"username\": \"jsmith\", \"post_text\": \"PauseNow - means interrupt the current subgraph, i.e. aborted it, and put the job into a paused state.\\nPause - means, pause after the current subgraph has completed.\\n\\nSo resuming after a pausenow, will require it to redo the subgraph it was currently some way through.\\n\\nNeither explain why a PERSIST that had already built would rebuild on resubmit if neither the ECL had change, nor any input dataset though.\", \"post_time\": \"2013-02-07 16:14:42\" },\n\t{ \"post_id\": 3380, \"topic_id\": 736, \"forum_id\": 8, \"post_subject\": \"Re: Pausing workunits\", \"username\": \"tdelbecque\", \"post_text\": \"Thank you for this answer. \\n\\nNor the code or data had changed. If I happen to find a reproductible manip I will describe it. Btw, I think I clicked on 'Pause now', rather than 'Pause', may be this has an impact ...\\n\\nThierry.\", \"post_time\": \"2013-02-07 16:09:26\" },\n\t{ \"post_id\": 3379, \"topic_id\": 736, \"forum_id\": 8, \"post_subject\": \"Re: Pausing workunits\", \"username\": \"bforeman\", \"post_text\": \"Regarding PERSIST, the only reason it would be rebuilt would be if either your code OR data changed during the process.\\n\\nBut if it's rebuilding your PERSIST on a resubmit and neither of the above are true, then it may be a bug, in which case we'll probably need an example.\\n\\nRegarding Pause, AFAIK it was designed to pause a job, such that it paused after the subgraph it was currently processed had finished and resumed where it had left off when 'Resume' was hit.\\n\\nBut we will test a little more here and will let you know if we find any issues.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-02-07 16:03:42\" },\n\t{ \"post_id\": 3318, \"topic_id\": 736, \"forum_id\": 8, \"post_subject\": \"Pausing workunits\", \"username\": \"tdelbecque\", \"post_text\": \"Hello,\\n\\nI do not think I am in the good thread, but I do not see what should be the good one, so excuse me.\\n\\nI run an heavy workunit for some hours, and I paused it in order to allow a smaller work unit that was blocked to pass and do its job. After resuming my heavy workunit and looking at the graph, I had the feeling that the workunit had started from the begining again (there was an attribute definition with a PERSIST service, and the graph shows clearly that writing process has been reinitiated), and so the computing hours spent previously are lost. Is it the expected behavior ? If this is so, 'Pause' is a rather missleading naming for this functionality ...\\n\\nThanks, \\n\\nThierry.\\n\\nPS, btw, I have never been lucky with this Pause feature.\", \"post_time\": \"2013-02-04 15:24:17\" },\n\t{ \"post_id\": 3377, \"topic_id\": 739, \"forum_id\": 8, \"post_subject\": \"Re: WIDTH option for BUILD\", \"username\": \"rtaylor\", \"post_text\": \"No, because the WIDTH works with LOCAL on INDEX and not on BUILD.\\n\\nGood try though! \", \"post_time\": \"2013-02-07 15:30:20\" },\n\t{ \"post_id\": 3376, \"topic_id\": 739, \"forum_id\": 8, \"post_subject\": \"Re: WIDTH option for BUILD\", \"username\": \"DSC\", \"post_text\": \"Is it perhaps the case that LOCAL simply has to match between the INDEX and BUILD, like DISTRIBUTED?\", \"post_time\": \"2013-02-07 15:24:08\" },\n\t{ \"post_id\": 3373, \"topic_id\": 739, \"forum_id\": 8, \"post_subject\": \"Re: WIDTH option for BUILD\", \"username\": \"rtaylor\", \"post_text\": \"By golly, you're right - I see that the LOCAL option is not documented on INDEX. Jake already submitted JIRA ticket 8771 to address this WIDTH and BUILD issue, so I'll update the docs when they've decided how it's going to actually be.
\", \"post_time\": \"2013-02-07 15:15:06\" },\n\t{ \"post_id\": 3370, \"topic_id\": 739, \"forum_id\": 8, \"post_subject\": \"Re: WIDTH option for BUILD\", \"username\": \"DSC\", \"post_text\": \"OK, now you've confused me more.\\n\\nFuther, I just looked at the ECL reference manual and, according to it, INDEX does not support LOCAL option. Does it really, and the option is undocumented?\\n\\nI understand that LOCAL on BUILD basically means "sort each Thor node's data independently" but what I don't understand is if that adversely affects the root node -- the node that is consulted during a general search (like, a simple index filter). I seem to remember having problems in the past with not finding records in that scenario and my theory was the BUILD(LOCAL) misconfigured the root, making a binary search infeasible. That was just a theory, though.\", \"post_time\": \"2013-02-07 14:28:37\" },\n\t{ \"post_id\": 3369, \"topic_id\": 739, \"forum_id\": 8, \"post_subject\": \"Re: WIDTH option for BUILD\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nOne more missing piece to the puzzle (from one of the developers):
I think, confusingly, LOCAL on a BUILD means perform a local sort rather than building a local index. I can’t remember why – possibly if it has been preceded by a keyed distribute.
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-02-07 13:55:25\" },\n\t{ \"post_id\": 3368, \"topic_id\": 739, \"forum_id\": 8, \"post_subject\": \"Re: WIDTH option for BUILD\", \"username\": \"DSC\", \"post_text\": \"Thanks, Richard!\\n\\nRelated: If you use LOCAL in both the definition of an index and in the BUILD for that index, does that impose any limitations on how that index can be used in either Thor or Roxie?\\n\\nThanks again,\\n\\nDan\", \"post_time\": \"2013-02-07 12:46:07\" },\n\t{ \"post_id\": 3364, \"topic_id\": 739, \"forum_id\": 8, \"post_subject\": \"Re: WIDTH option for BUILD\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nMystery solved!\\n\\nThe LOCAL option has to be on the INDEX definition (not the BUILD). And there's an additional restriction I just learned of -- the WIDTH number must be < the number of nodes on the Thor cluster building it AND that number must be evenly divisible into the number of nodes on the Thor cluster building it.\\n\\nThe docs will be updated with this new information. Thanks for bringing it up,\\n\\nRichard\", \"post_time\": \"2013-02-06 18:09:27\" },\n\t{ \"post_id\": 3355, \"topic_id\": 739, \"forum_id\": 8, \"post_subject\": \"Re: WIDTH option for BUILD\", \"username\": \"DSC\", \"post_text\": \"[quote="bforeman":25fx1yqk]Reading the error, it sounds like LOCAL is missing from the BUILD, did you try adding that option?\\n\\nSorry, I forgot to mention that. I did try adding LOCAL to the BUILD but that didn't change the error. That was the only thing I tried, though. I surfed the source code and located the checks that produced the error, but could not determine the conditions under which the WIDTH option would work when the number of segments was less than the number of nodes in the thor cluster. I gave up at that point and posted the question.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-02-06 13:34:53\" },\n\t{ \"post_id\": 3353, \"topic_id\": 739, \"forum_id\": 8, \"post_subject\": \"Re: WIDTH option for BUILD\", \"username\": \"bforeman\", \"post_text\": \"Hi Dan,\\n\\nReading the error, it sounds like LOCAL is missing from the BUILD, did you try adding that option?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-02-06 13:29:03\" },\n\t{ \"post_id\": 3333, \"topic_id\": 739, \"forum_id\": 8, \"post_subject\": \"WIDTH option for BUILD\", \"username\": \"DSC\", \"post_text\": \"I'm experimenting with the idea of building indexes on one cluster for use on another. The clusters are of different size, so I'm trying to use the WIDTH option in the BUILD() function to indicate the size of the destination Roxie cluster (which is smaller than the source, BTW, and not by a multiple). I'm seeing the following error during execution:\\n\\nError: System error: 0: Graph[81], indexwrite[84]: Unsupported, refactoring to few parts only supported for local indexes. (0, 0), 0,
\\n\\nThis is a payload index, sourced from a recordset built within a function. The actual BUILD statement is pretty simple (segmentCount is an argument to the function):\\n\\naction := BUILD(idx,WIDTH(segmentCount),OVERWRITE);
\\n\\nI see nothing in the documentation describing a limitation on using the WIDTH option. Is my expectation out of line? Is it possible to build a smaller index? What are the limitations I'm running into?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2013-02-05 13:40:15\" },\n\t{ \"post_id\": 3525, \"topic_id\": 745, \"forum_id\": 8, \"post_subject\": \"Re: Where is ECLPLUS.INI in Linux?\", \"username\": \"kovacsbv\", \"post_text\": \"[quote="pschwartz":xyokskuj]With the ecl command line, an ini file is not required. Just set the correct options to a command and you will be off and running.\\n\\nRight, they're not required, but save a lot of typing when trying things out before automating them.\\n\\nIt seems Windows has them (the HPCC Client Tools manual talks about them), so if the answer is "Windows has them but Linux doesn't," that's the answer I'm looking for. But it seems there is one.\\n\\nI ran some greps on the source code looking for it, and right at the beginning of dfuplus' main.cpp main() definition, it seems the format is dfuplus.ini (all lowercase, no dot on the front, but with .ini on the end.).\\n\\n265 int main(int argc, const char* argv[])\\n266 {\\n267 InitModuleObjects();\\n268\\n269 if ((argc >= 2) && ((stricmp(argv[1], "/version") == 0) || (stricmp(argv[1], "-version") == 0)))\\n270 {\\n271 printVersion();\\n272 return 0;\\n273 }\\n274\\n275 Owned<IFile> inifile = createIFile("dfuplus.ini");\\n276 if(argc < 2 && !(inifile->exists() && inifile->size() > 0))\\n277 {\\n278 handleSyntax();\\n279 return 0;\\n280 }\\n
\\n\\nHowever, finding out where the Owned class is at is at and what directories its implementation looks in is beyond the scope of this book.\\n\\nI'll try the current local directory that the script is running from.\", \"post_time\": \"2013-02-21 19:09:22\" },\n\t{ \"post_id\": 3524, \"topic_id\": 745, \"forum_id\": 8, \"post_subject\": \"Re: Where is ECLPLUS.INI in Linux?\", \"username\": \"pschwartz\", \"post_text\": \"With the ecl command line, an ini file is not required. Just set the correct options to a command and you will be off and running.\", \"post_time\": \"2013-02-21 18:08:06\" },\n\t{ \"post_id\": 3523, \"topic_id\": 745, \"forum_id\": 8, \"post_subject\": \"Re: Where is ECLPLUS.INI in Linux?\", \"username\": \"kovacsbv\", \"post_text\": \"The output doesn't really have much on where to put the .INI file:\\n\\n/mnt/hpcc_storage/HPCCSystems/mydropzone$ ecl help\\n\\nUsage:\\n ecl [--version] <command> [<args>]\\n\\nCommonly used commands:\\n deploy create a workunit from an ecl file, archive, or dll\\n publish add a workunit to a query set\\n unpublish remove a query from a query set\\n run run the given ecl file, archive, dll, wuid, or query\\n activate activate a published query\\n deactivate deactivate the given query alias name\\n queries show or manipulate queries and querysets\\n\\nRun 'ecl help <command>' for more information on a specific command\\n
\\n\\necl help <subcommands> didn't have anything either.\", \"post_time\": \"2013-02-21 18:05:46\" },\n\t{ \"post_id\": 3522, \"topic_id\": 745, \"forum_id\": 8, \"post_subject\": \"Re: Where is ECLPLUS.INI in Linux?\", \"username\": \"pschwartz\", \"post_text\": \"Try to use the ecl command line on your linux system.\\n\\nThere is a link to it in /usr/bin so it should be on your users path.\\n\\nrun `ecl help` to see how to use it.\", \"post_time\": \"2013-02-21 15:52:37\" },\n\t{ \"post_id\": 3521, \"topic_id\": 745, \"forum_id\": 8, \"post_subject\": \"Re: Where is ECLPLUS.INI in Linux?\", \"username\": \"kovacsbv\", \"post_text\": \"I tried creating ones in all uppercase and all lowercase, but haven't had any luck with it. \\n\\nEditing in details. I should probably go further into detail of what I tried.\\n\\n which eclplus\\n/usr/bin/eclplus
\\n\\nAlso, I probably won't be able to write to /usr/bin/ for this-and-that because that would be a privileged operation for an administrator.\\n\\nSo, I tried creating a ~/ECLPLUS.INI, a ~/.eclplus.ini, a ~/.eclplus, but that didn't work either:\\n\\n/mnt/hpcc_storage/HPCCSystems/mydropzone$ dfuplus action=spray dstcluster=mythor format=csv encoding=utf8 quote=\\\\" srcip=localhost srcfile=/mnt/hpcc_storage/HPCCSystems/mydropzone/2013021519.BLECK dstname=~logs::tagged::201302112\\nERROR: Esp server url not specified.\\n/mnt/hpcc_storage/HPCCSystems/mydropzone$ echo -n "server=localhost\\\\n" > ~/.eclplus\\n/mnt/hpcc_storage/HPCCSystems/mydropzone$ dfuplus action=spray dstcluster=mythor format=csv encoding=utf8 quote=\\\\" srcip=localhost srcfile=/mnt/hpcc_storage/HPCCSystems/mydropzone/2013021519.BLECK dstname=~logs::tagged::201302112\\nERROR: Esp server url not specified.\\n/mnt/hpcc_storage/HPCCSystems/mydropzone$ echo -n "server=localhost\\\\n" > ~/ECLPLUS.INI\\n/mnt/hpcc_storage/HPCCSystems/mydropzone$ dfuplus action=spray dstcluster=mythor format=csv encoding=utf8 quote=\\\\" srcip=localhost srcfile=/mnt/hpcc_storage/HPCCSystems/mydropzone/2013021519.BLECK dstname=~logs::tagged::201302112\\nERROR: Esp server url not specified.\\n/mnt/hpcc_storage/HPCCSystems/mydropzone$ echo -n "server=localhost\\\\n" > ~/eclplus.ini\\n/mnt/hpcc_storage/HPCCSystems/mydropzone$ dfuplus action=spray dstcluster=mythor format=csv encoding=utf8 quote=\\\\" srcip=localhost srcfile=/mnt/hpcc_storage/HPCCSystems/mydropzone/2013021519.BLECK dstname=~logs::tagged::201302112\\nERROR: Esp server url not specified.\\n/mnt/hpcc_storage/HPCCSystems/mydropzone$\\n
\\n\\nAny other ideas?\\n\\nVic\", \"post_time\": \"2013-02-21 15:46:30\" },\n\t{ \"post_id\": 3359, \"topic_id\": 745, \"forum_id\": 8, \"post_subject\": \"Re: Where is ECLPLUS.INI in Linux?\", \"username\": \"bforeman\", \"post_text\": \"Don't know if this is helpful or not, but I always run ECLPLUS from the Windows environment, and the INI by default is in the same folder as ECLPLUS.EXE\\n\\nIf it doesn't find it in the same folder, it would then look in the Windows root folder next.\\n\\nHere's a good link on using INI files in Linux:\\nhttp://www.linuxquestions.org/questions/programming-9/how-to-create-and-handle-ini-files-in-linux-405765/\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-02-06 16:44:51\" },\n\t{ \"post_id\": 3356, \"topic_id\": 745, \"forum_id\": 8, \"post_subject\": \"Where is ECLPLUS.INI in Linux?\", \"username\": \"kovacsbv\", \"post_text\": \"Does anybody know where it is?\\nAnd if more than one place is possible, what is the precedence?\\n\\nThanks,\\nVic\", \"post_time\": \"2013-02-06 14:12:25\" },\n\t{ \"post_id\": 3403, \"topic_id\": 747, \"forum_id\": 8, \"post_subject\": \"Re: Superfile Indexing Problem\", \"username\": \"rtaylor\", \"post_text\": \"prachi,\\n\\nThe Programmer's Guide has a section (four articles) on working with superfiles. Each article contains example code and discusses that code in depth. The code for each article is fully functional and it is all available for download here: http://hpccsystems.com/download/docs/learning-ecl\\n\\nThe point of giving you fully function example code that operates with data you generate on your system (see the Creating Example Data article first) is to allow you to run all that code on your own system and learn how to work with HPCC and ECL. \\n\\nBeyond that, we also offer classes: http://hpccsystems.com/community/training-events/training that you can attend right now. We are also in th eprocess of developing online and remote training courses, too, the first of which should become available in the next couple of weeks.\\n\\nI suggest you first work through the code in the Programmer's Guide articles and come to fully understand how that code works before trying to create your own solution to your own data problems. I think that will get you to where you want to be faster and with less frustration.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-02-09 21:29:30\" },\n\t{ \"post_id\": 3399, \"topic_id\": 747, \"forum_id\": 8, \"post_subject\": \"Re: Superfile Indexing Problem\", \"username\": \"prachi\", \"post_text\": \"Hi Richard,\\n\\nThanks for the reply !\\nBut i am totally
\\n\\nPlease refer to my scenario and guide me which path should i take:\\nPath 1: \\n
Create Superfile and create INDEX on Superfile.\\nKeep adding subfiles to Superfiles.\\nRe-build INDEX on Superfile as subfiles are added.
\\n\\nBut if my Superfile contains around 100 subfiles with 1000 or more number of record then rebuilding INDEX everytime is time consuming.\\n\\nPath 2:\\nUsing Superkey, how to proceed?\\n\\n\\nThanks and regards !\", \"post_time\": \"2013-02-09 18:34:26\" },\n\t{ \"post_id\": 3391, \"topic_id\": 747, \"forum_id\": 8, \"post_subject\": \"Re: Superfile Indexing Problem\", \"username\": \"rtaylor\", \"post_text\": \"prachi,\\n\\nYour idea of creating separate INDEXes into each subfile and putting them in a superkey is exactly what is being described in the section titled "There is a Problem."\\n\\nAnd that article continues with the next section titled "And the Solution Is ..." which states "The way around this problem is to create a single INDEX into the SuperFile, as shown by this code (contained in IndexSuperFile4.ECL):"\\n\\nThat is the way to do it.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-02-08 19:02:00\" },\n\t{ \"post_id\": 3388, \"topic_id\": 747, \"forum_id\": 8, \"post_subject\": \"Re: Superfile Indexing Problem\", \"username\": \"prachi\", \"post_text\": \"Hi Richard,\\n\\nThanks for the reply !\\n\\nThe Indexing into SuperFiles in ECL Programmer's Guide has a following statement:\\n\\nHowever, a SuperKey may not contain INDEX sub-files that directly reference the sub-files of a SuperFile using\\nthe {virtual(fileposition)} “record pointer” mechanism (used by FETCH and full-keyed JOIN operations). This is\\nbecause the {virtual(fileposition)} field is a virtual (exists only when the file is read from disk) field containing the\\nrelative byte position of each record within the single logical entity.
\\n\\nBased on your inputs, I have made the following assumptions(please correct if anything is wrong )\\n
Create a plain superfile, say SUPERFILE_TWITTER on THOR
\\nCreate an empty superkey(which is a superfile), say SUPERKEY_IDX_TWITTER on THOR
\\nAdd a sub-file to SUPERFILE_TWITTER, say SUBF_TWITTER_1 and create an INDEX, say IDX_SUBF_1 on this sub-file
\\nAdd IDX_SUBF_1 to SUPERKEY_IDX_TWITTER
\\nFor the subsequent sub-files (SUBF_TWITTER_2, SUBF_TWITTER_3 and so on), create INDEXes IDX_SUBF_2, IDX_SUBF_3 and keep on adding them to SUPERKEY_IDX_TWITTER
\\nSend queries to ROXIE which refer to SUPERKEY_IDX_TWITTER(and not SUPERFILE_TWITTER as ROXIE allows only one sub-file per super-file for querying)
\\n\\nI do have some basic queries pertaining to the use of 'Payload' INDEXes in this scenario but will post them once the above flow is clear !\\n\\nThanks and regards !\", \"post_time\": \"2013-02-08 11:50:11\" },\n\t{ \"post_id\": 3386, \"topic_id\": 747, \"forum_id\": 8, \"post_subject\": \"Re: Superfile Indexing Problem\", \"username\": \"rtaylor\", \"post_text\": \"prachi,1. I need to create Superfile only once along with one subfile with no records in it(so that i can build INDEX on superfile)\\n2. Then after every 2 mins new subfile along with 1000 (or more number) of records is added to Superfile
The best scenario for you depends on what kind of data you're working with.\\n\\nThe only difference between a superfile and a superkey is the content. A superfile contains DATASETs as its subfiles, while a superkey contains INDEXes (which may or may not be payload indexes).\\n\\nTherefore, you need only create the superfile (superkey) once. Then you can begin adding subfiles to it and it can be used in queries.\\n\\nIf you add your new DATASETs as subfiles, then you need to BUILD the INDEX into that superfile every time you add a new file before you can begin using the INDEX to query the data in the superfile. The more data you accumulate, the longer time it will take to build the INDEX. \\n\\nBut if you take the new data (your 1000 record file) and BUILD an INDEX on each new file that contains your search fields and the rest of the data as a payload (pretty quick to do on 1000 records), then you can add that new INDEX to your superkey and instantly start querying your superkey as if it were a single payload INDEX.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-02-07 20:34:25\" },\n\t{ \"post_id\": 3384, \"topic_id\": 747, \"forum_id\": 8, \"post_subject\": \"Re: Superfile Indexing Problem\", \"username\": \"prachi\", \"post_text\": \"Hi rtaylor,\\nThanks for reply.\\nI need more clarification about what exactly to use and how.\\nThe detailed scenario of job is like this:\\n1. I need to create Superfile only once along with one subfile with no records in it(so that i can build INDEX on superfile)\\n2. Then after every 2 mins new subfile along with 1000 (or more number) of records is added to Superfile\\n\\nAbove 2 steps needs to be performed for 20 different modules.\\n\\nCurrently when we are trying to fetch data for one module only without INDEX, much time is required.\\n\\nThere are No Payloads in INDEXes!\\n\\nQuestions:\\n1. In the scenario stated previously(first post), after adding new subfile(Step 5) to the superfile and again REBUILDING INDEX, we are facing same problem that is 0 records fetched.\\n2. Which is better solution for the scenario stated above :SUPERFILE or SUPERFILE (with INDEX) or SUPERKEY? Why and How to be implemented?\", \"post_time\": \"2013-02-07 17:49:20\" },\n\t{ \"post_id\": 3375, \"topic_id\": 747, \"forum_id\": 8, \"post_subject\": \"Re: Superfile Indexing Problem\", \"username\": \"rtaylor\", \"post_text\": \"Prachi,Is it that when a new subfile is added, the index on superfile needs to be recreated?
YES.\\n\\nHPCC is not an RDBMS, so indexes are only re-built when you re-BUILD them.\\n\\nOf course, you can also have a superfile of INDEXes (a superkey) and if those INDEXes have payloads, then your data access can be faster than using an INDEX to FETCH records from a dataset.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-02-07 15:23:42\" },\n\t{ \"post_id\": 3371, \"topic_id\": 747, \"forum_id\": 8, \"post_subject\": \"Superfile Indexing Problem\", \"username\": \"prachi\", \"post_text\": \"Hello,\\nI need to access data of superfile using INDEX of superfile.Steps which i follwed are:\\nStep 1:Created Superfile(with no subfile in it)\\nStep 2:Added subfile with no records in that file(to create index)\\nStep 3:Created INDEX on superfile(with {Upload_id,fpos} column in record structure)\\nStep 4:Got 0 records when accessed superfile using index(where clause:upload_id=12)\\nStep 5:Added subfile with 5 records in it\\nStep 6:Again got 0 records using index(where clause:upload_id=12)\\n\\n\\nIs it that when a new subfile is added, the index on superfile needs to be recreated?\\n\\nThanks and Regards!\", \"post_time\": \"2013-02-07 14:38:23\" },\n\t{ \"post_id\": 3445, \"topic_id\": 750, \"forum_id\": 8, \"post_subject\": \"Re: csv escape char for STD.file.sprayVariable?\", \"username\": \"rodrigo.pastrana@lexisnexis.com\", \"post_text\": \"This looks like a potentially helpful future H2H feature. It is currently not supported in H2H.\", \"post_time\": \"2013-02-13 15:06:11\" },\n\t{ \"post_id\": 3434, \"topic_id\": 750, \"forum_id\": 8, \"post_subject\": \"Re: csv escape char for STD.file.sprayVariable?\", \"username\": \"lblau\", \"post_text\": \"I don't see escape('\\\\') being passed in there. I didn't mean that the value of the csv option is an escaped character, I mean the csv option for an escape character.\", \"post_time\": \"2013-02-13 01:16:46\" },\n\t{ \"post_id\": 3426, \"topic_id\": 750, \"forum_id\": 8, \"post_subject\": \"Re: csv escape char for STD.file.sprayVariable?\", \"username\": \"rodrigo.pastrana@lexisnexis.com\", \"post_text\": \"Hi lblau, yes the latest H2H allow escaped characters as the CSV options.\\nFor example:\\n\\nDataConnectors.HDFSConnector.PipeIn(yourdataset,\\n '/your/hdfs/file',\\n Your_CSV_Layout,\\n CSV(TERMINATOR('\\\\n'),quote('\\\\'')),\\n 'xxx.yyy.zzz.www', 50070)\\n
\", \"post_time\": \"2013-02-12 15:39:16\" },\n\t{ \"post_id\": 3413, \"topic_id\": 750, \"forum_id\": 8, \"post_subject\": \"Re: csv escape char for STD.file.sprayVariable?\", \"username\": \"lblau\", \"post_text\": \"A follow-on questions is whether the HDFS connector library (and ecl macros) has also been updated to allow an escape character to be set as a csv option?\", \"post_time\": \"2013-02-11 09:57:12\" },\n\t{ \"post_id\": 3406, \"topic_id\": 750, \"forum_id\": 8, \"post_subject\": \"Re: csv escape char for STD.file.sprayVariable?\", \"username\": \"bforeman\", \"post_text\": \"I'm pretty sure that if it's supported in the ECL Watch interface, it should be supported on the command line. Pinging the documentation team, but try looking at the DFU workunit details or the log if you can locate it and perhaps the setting is embedded there.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-02-10 15:55:00\" },\n\t{ \"post_id\": 3401, \"topic_id\": 750, \"forum_id\": 8, \"post_subject\": \"csv escape char for STD.file.sprayVariable?\", \"username\": \"lblau\", \"post_text\": \"I see where the csv escape character has been added to the ecl watch sprayCSV, and to the dfuplus capability, but it does not seem to have been added to the standard file services library. Is this deliberate? Is there some way to use sprayVariable through ecl with the csvEscape character set?\", \"post_time\": \"2013-02-09 20:16:09\" },\n\t{ \"post_id\": 3432, \"topic_id\": 754, \"forum_id\": 8, \"post_subject\": \"Re: Index Problems\", \"username\": \"bforeman\", \"post_text\": \"Just a quick glance at your code:\\n\\n/*Create super-file index*/\\nIDX_SuperFile := INDEX(SuperFile_Dataset,{InsertID,fileposition},index_filename);\\nbuild_superfile_index := BUILDINDEX(INDEX(SuperFile_Dataset,{InsertID},{fileposition},index_filename),OVERWRITE);\\n
\\n\\nI see that IDX_Superfile is not used anywhere, was this intended?\\n\\nWhy not:\\n\\nbuild_superfile_index := BUILD(IDX_SuperFile);
\\n\\nI'm also not clear why you are using fileposition as a payload field in your BUILDINDEX statement above.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-02-12 21:47:40\" },\n\t{ \"post_id\": 3416, \"topic_id\": 754, \"forum_id\": 8, \"post_subject\": \"Index Problems\", \"username\": \"prachi\", \"post_text\": \"Hi,\\n\\nI am executing following steps:\\nStep1:Create Superfile\\nStep2:Create one logical file(having 750 records) with record structure and add it to Superfile as subfile\\nStep3:Create INDEX on Superfile(index is created on {insertid,fileposition} cols)\\n\\nNow, when i have a look in ECL Watch, an INDEX is created but with 98 records only.When i select view data file of INDEX, insertid column consists of data as '0'(zero) though subfile consists valid numeric data in insertid column and NOT ZERO.\\n\\nAs and when i add subfiles to superfile, INDEX is re-built.But still its consists improper data.\\n\\nWhen i query this Superfile from HPCC-JDBC driver java code, it gives the ouput as records only when where clause is insertid=0\\n\\nI am attaching CSV file used as subfile and screenshot os ECL Watch for Index file.\\n\\n\\nCode:(for spraying and adding file to superfile and creating index)\\n\\n\\nIMPORT Std;\\n\\nSuperfilename := '~with_index::superfile::clientwebcrawldata';\\nindex_filename := '~with_index::superfile::index';\\ncreateSuperFile:=Std.File.CreateSuperFile(Superfilename);\\n\\nIF(NOT Std.File.SuperFileExists(Superfilename),createSuperFile);\\n\\nVARSTRING timeStamp := '10101010';\\nVARSTRING fileName := 'clientwebcrawldata' ;\\nVARSTRING destinationlogicalname := '~with_index::' +fileName + '_' + timeStamp;\\nVARSTRING sourceIP := '10.101.2.171';\\nVARSTRING sourcepath := '/var/lib/HPCCSystems/mydropzone/clientwebcrawldata/' +fileName +'.csv';\\nVARSTRING srcCSVseparator := ';';\\nVARSTRING destinationgroup := 'mythor';\\nVARSTRING espserverIPport := 'http://10.101.2.170:8010/FileSpray';\\nVARSTRING subFileDestinationLogicalname := '~with_index::subfile::' +fileName + '_' + timeStamp;\\n\\n/*Spray the csv file from the dropzone*/\\nSprayCSVFile :=STD.File.fSprayVariable(sourceIP,sourcepath,,srcCSVseparator,,,\\ndestinationgroup,destinationlogicalname,,espserverIPport,\\n,TRUE,TRUE,FALSE);\\n\\n/*Create Dataset of sprayed file*/\\nLayout_ClientWebCrawlData := RECORD\\nVARSTRING Controller_ID;\\nVARSTRING User_ID;\\nVARSTRING URL_Link;\\nVARSTRING URL_Content;\\nREAL URL_Sentiment;\\nVARSTRING URL_Date;\\nINTEGER Unique_Search_ID;\\nINTEGER InsertID;\\nVARSTRING StatusID;\\nVARSTRING Search_Pattern;\\nVARSTRING Word_Ignored;\\nVARSTRING Search_Date;\\nVARSTRING Detected_Language;\\nEND;\\n\\nFile_Layout_Subfile_Dataset :=\\nDATASET(destinationlogicalname,Layout_ClientWebCrawlData,CSV);\\n\\n/*create logical file with record structure*/\\nsubfileCreation := OUTPUT(File_Layout_Subfile_Dataset,,subFileDestinationLogicalname);\\n\\n/*delete previous logical file without record structure*/\\ndeleteSprayedLogicalFile := STD.File.DeleteLogicalFile(destinationlogicalname);\\n\\nSuperFile_Dataset := DATASET(Superfilename,{Layout_ClientWebCrawlData, UNSIGNED8 fileposition {VIRTUAL(fileposition)}}, CSV);\\n\\n/*Create super-file index*/\\nIDX_SuperFile := INDEX(SuperFile_Dataset,{InsertID,fileposition},index_filename);\\nbuild_superfile_index := BUILDINDEX(INDEX(SuperFile_Dataset,{InsertID},{fileposition},index_filename),OVERWRITE);\\n\\nSEQUENTIAL(\\nSprayCSVFile,\\nsubfileCreation,\\ndeleteSprayedLogicalFile,\\nStd.File.StartSuperFileTransaction(),\\nStd.File.AddSuperFile(Superfilename,subFileDestinationLogicalname),\\nStd.File.FinishSuperFileTransaction(),\\nbuild_superfile_index\\n);\\n
\", \"post_time\": \"2013-02-11 14:33:29\" },\n\t{ \"post_id\": 3447, \"topic_id\": 764, \"forum_id\": 8, \"post_subject\": \"Roxie files (used by published queries) are locked\", \"username\": \"janssend\", \"post_text\": \"I get some trouble ‘updating’ super-files under Roxie, I use super-files under Roxie only to get indirection : one single sub-file, no super-key with payload indexes.\\n\\nIn our HPCC project, some dataset and index files are periodically pushed from Thor to Roxie cluster by doing this :\\n\\n-\\tcopy a list of Thor files (dataset and index) to Roxie cluster .\\n-\\tstart Superfile transaction \\no\\t clear Roxie super files\\no\\tadd the new Roxie files (one per super-file) \\n-\\tfinish Superfile transaction\\n\\nIt works fine as long as there is no published Roxie queries which ‘use’ these super-files. \\nOnce a Roxie query has been published, the ‘associated’ super-files seem to been locked forever.\\n\\nI would have thought the write-locks on Superfiles (and files) would be ‘active’ only in case the published queries were ‘used’ simultaneously…and not forever .\\n\\n\\nI found this work around described in the HPCC Community Forum : \\nviewtopic.php?f=8&t=550&p=2567&hilit=roxie+published&sid=b25349ff7e250a5b1e64d3c5cddb22eb#p2567 \\n\\nBut it happens to fail really often (creation of /bin/bash process failed). My main concern is to have to do this kind of 'tricks' : it wouldn't be possible in a 'Production' environment. \\n\\nWhat is the correct answer to avoid this lock trouble ? \\nAm I the only one to encounter this issue ? \\n\\n\\nRegards\\nD. Janssen\\n\\n\\n\\nHere is the log file error (community_3.8.6-4) : \\n\\n[color=#0000FF:3txeig8b]\\n<- Work around start\\n00000074 2013-02-13 16:41:48 18105 18105 Unpublishing all roxie queries\\n00000075 2013-02-13 16:41:48 18105 18105 Enqueuing on thor.thor to run wuid=W20130213-163209, graph=graph13, timelimit=600 seconds, priority=0\\n00000076 2013-02-13 16:41:48 18105 18105 Thor on 192.168.1.239:20000 running W20130213-163209\\n00000077 2013-02-13 16:41:48 18105 18105 DisableRoxieQueries done ! \\n Work around end ->\\n00000078 2013-02-13 16:41:48 18105 18105 StartSuperFileTransaction\\n00000079 2013-02-13 16:41:48 18105 18105 ClearSuperFile ('hlcr::rxdb::daily::content', del) trans\\n0000007A 2013-02-13 16:41:48 18105 18105 ,FileAccess,FileServices,RemoveSuperFile,W20130213-163209,hpcc,hlcr::rxdb::daily::content\\n0000007B 2013-02-13 16:41:48 18105 18105 AddSuperFile ('hlcr::rxdb::daily::content', 'hlcr::rxdb::daily::content.base0') trans\\n0000007C 2013-02-13 16:41:48 18105 18105 ,FileAccess,FileServices,AddSuperFile,W20130213-163209,hpcc,hlcr::rxdb::daily::content,hlcr::rxdb::daily::content.base0\\n... \\n... [color=#0000FF:3txeig8b]<Trying to perform Std.File.FinishSuperFileTransaction>\\n... LOCKED\\n... \\n\\n0000009D 2013-02-13 16:41:53 18105 18105 safeChangeModeWrite - temporarily releasing lock on hlcr::rxdb::daily::content to avoid deadlock\\n0000009E 2013-02-13 16:41:58 18105 18105 safeChangeModeWrite on hlcr::rxdb::daily::content waiting for 10s\\n0000009F 2013-02-13 16:41:58 18105 18105 Backtrace:\\n000000A0 2013-02-13 16:41:58 18105 18105 /opt/HPCCSystems/lib/libjlib.so(_Z16PrintStackReportv+0x26) [0x7ff0dbf44cb6]\\n000000A1 2013-02-13 16:41:58 18105 18105 /opt/HPCCSystems/lib/libdalibase.so(_Z19safeChangeModeWriteP17IRemoteConnectionPKcRbj+0x1ff) [0x7ff0daedc0df]\\n000000A2 2013-02-13 16:41:58 18105 18105 /opt/HPCCSystems/lib/libdalibase.so(_ZN20CDistributedFileBaseI21IDistributedSuperFileE14lockPropertiesEj+0x64) [0x7ff0dae71874]\\n000000A3 2013-02-13 16:41:58 18105 18105 /opt/HPCCSystems/lib/libdalibase.so(_ZN21CDistributedSuperFile20cRemoveSubFileAction7prepareEv+0xd3) [0x7ff0dae79a43]\\n000000A4 2013-02-13 16:41:58 18105 18105 /opt/HPCCSystems/lib/libdalibase.so(_ZN27CDistributedFileTransaction6commitEv+0x4e) [0x7ff0dae807be]\\n000000A5 2013-02-13 16:41:58 18105 18105 /opt/HPCCSystems/plugins/libfileservices.so(fslFinishSuperFileTransaction+0x4f) [0x7ff0cdb09b6f]\\n000000A6 2013-02-13 16:41:58 18105 18105 /var/lib/HPCCSystems/myeclccserver/libW20130213-163209.so(+0x691e3) [0x7ff0cd08d1e3]\\n000000A7 2013-02-13 16:41:58 18105 18105 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11performItemEjj+0x54) [0x7ff0da7505c4]\\n000000A8 2013-02-13 16:41:58 18105 18105 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine13doExecuteItemER20IRuntimeWorkflowItemj+0x41) [0x7ff0da7511e1]\\n000000A9 2013-02-13 16:41:58 18105 18105 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11executeItemEjj+0x2b7) [0x7ff0da750dd7]\\n000000AA 2013-02-13 16:41:58 18105 18105 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine7performEP18IGlobalCodeContextP11IEclProcess+0x17c) [0x7ff0da75158c]\\n000000AB 2013-02-13 16:41:58 18105 18105 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent10runProcessEP11IEclProcess+0x147) [0x7ff0dc3740a7]\\n000000AC 2013-02-13 16:41:58 18105 18105 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent9doProcessEv+0x277) [0x7ff0dc3765c7]\\n000000AD 2013-02-13 16:41:58 18105 18105 /opt/HPCCSystems/lib/libhthor.so(_Z13eclagent_mainiPPKcP12StringBufferb+0x7a0) [0x7ff0dc3790f0]\\n000000AE 2013-02-13 16:41:58 18105 18105 eclagent(main+0x51) [0x400f41]\\n000000AF 2013-02-13 16:41:58 18105 18105 /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xed) [0x7ff0db5cc76d]\\n000000B0 2013-02-13 16:41:58 18105 18105 eclagent() [0x401019]\\n000000B1 2013-02-13 16:41:58 18105 18105 CDFAction lock timed out on hlcr::rxdb::daily::content\\n000000B2 2013-02-13 16:41:58 18105 18105 CDistributedFileTransaction: Transaction pausing\\n.....\\n.....\\n[color=#FF4040:3txeig8b]..... LOCKED until I perform a 'manual' unpublish on roxie queries \", \"post_time\": \"2013-02-13 16:42:16\" },\n\t{ \"post_id\": 3451, \"topic_id\": 765, \"forum_id\": 8, \"post_subject\": \"Re: nested loops\", \"username\": \"bforeman\", \"post_text\": \"Yep, the JOIN Left and Right recordsets can be different formats and the output of the transform can also be a different format. JOIN rocks \\n\\nBob\", \"post_time\": \"2013-02-13 18:04:59\" },\n\t{ \"post_id\": 3450, \"topic_id\": 765, \"forum_id\": 8, \"post_subject\": \"Re: nested loops\", \"username\": \"gouldbrfl\", \"post_text\": \"will that work if it's the same dataset as the source? I guess it might since I just load 2 datasets and then do a join.\\n\\nThanks\", \"post_time\": \"2013-02-13 18:03:11\" },\n\t{ \"post_id\": 3449, \"topic_id\": 765, \"forum_id\": 8, \"post_subject\": \"Re: nested loops\", \"username\": \"bforeman\", \"post_text\": \"Mike, have you tried a simple JOIN? Create a LEFT recordset of your seed values 0 to 99999 and then JOIN it with the RIGHT record set with duplicate zips. \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-02-13 18:01:27\" },\n\t{ \"post_id\": 3448, \"topic_id\": 765, \"forum_id\": 8, \"post_subject\": \"nested loops\", \"username\": \"gouldbrfl\", \"post_text\": \"I have a single source file which requires using a set of ECL functions to access it. It is essentially a database holding all zipcode information including primary cities and vanity cities. What I need out of this data is all of the zipcodes associated with all vanity cities. At the time we start to read the input file we don't know anything about the data and therefore use a seed for the zipcode starting a 0 and going to 99999. At this point we're only retrieving the first row for every zipcode and I need to get all records for that zipcode so that I can create on the output all vanity cities within that zipcode.\\n\\nBest Regards\\n\\nMike Gould\", \"post_time\": \"2013-02-13 16:52:32\" },\n\t{ \"post_id\": 3506, \"topic_id\": 766, \"forum_id\": 8, \"post_subject\": \"Re: ECL Re-Run in ECL Watch Playground\", \"username\": \"buptkang\", \"post_text\": \"Thanks for all the explanations. \\n\\nBo\", \"post_time\": \"2013-02-20 15:10:50\" },\n\t{ \"post_id\": 3499, \"topic_id\": 766, \"forum_id\": 8, \"post_subject\": \"Re: ECL Re-Run in ECL Watch Playground\", \"username\": \"gsmith\", \"post_text\": \"The ECL Playground was primarily designed for "playing" with ECL Snippets and Samples.\\n\\nOnce the ECL starts importing external ECL then its usefulness ends (the fact you can open it for a given workunit may not have been the best idea). \\n\\nThere is a plan for supporting the entire ECL Archive with a "tree" of ECL and once that is done it will behave the way you expect.\\n\\n/Gordon.\", \"post_time\": \"2013-02-20 07:47:39\" },\n\t{ \"post_id\": 3497, \"topic_id\": 766, \"forum_id\": 8, \"post_subject\": \"Re: ECL Re-Run in ECL Watch Playground\", \"username\": \"buptkang\", \"post_text\": \"Hi Bob,\\n\\nBut if I refer to many other ECL code, for instance, I refer to ECL-ML code, how could I paste all those refer code into the inline ECL playground?\\n\\nThanks\\nBo\", \"post_time\": \"2013-02-20 02:46:32\" },\n\t{ \"post_id\": 3456, \"topic_id\": 766, \"forum_id\": 8, \"post_subject\": \"Re: ECL Re-Run in ECL Watch Playground\", \"username\": \"bforeman\", \"post_text\": \"The ECL Playground is only meant for inline code, IOW, if you want to play with the code and reference other definitions, you need to copy and paste it inline.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-02-14 03:01:19\" },\n\t{ \"post_id\": 3455, \"topic_id\": 766, \"forum_id\": 8, \"post_subject\": \"ECL Re-Run in ECL Watch Playground\", \"username\": \"buptkang\", \"post_text\": \"Hello there, \\n\\nDoes somebody notice that if we run a ECL job by referring some other ECL codes, then after running properly in locally, the same code cannot run in the published WorkUnit's ECL playground?\\n\\nCould anybody give me a hint how could I run the same ECL code in ECL playground successfully?\\n\\nWith Thanks and Regards\\nBo\", \"post_time\": \"2013-02-13 23:05:26\" },\n\t{ \"post_id\": 7480, \"topic_id\": 772, \"forum_id\": 8, \"post_subject\": \"Re: Concatenate set elements\", \"username\": \"DSC\", \"post_text\": \"I'm not sure what you tried, but it does seem to work. I used the earlier example and pulled just the first two elements using that technique and the result looks right:\\n\\n
// Given values\\na := ['S1','S2','S3'];\\nkDelimiter := ',';\\n\\n// Define a record layout to hold both input and output\\nStringRec := RECORD\\n STRING s;\\nEND;\\n\\n// Simplest way to convert a SET to a DATASET\\nt := DATASET(a[..2],StringRec);\\n\\n// Transform to combine records in the input\\nStringRec MakeStringRec(StringRec l, StringRec r, STRING sep) := TRANSFORM\\n SELF.s := l.s + IF(l.s != '',sep,'') + r.s;\\nEND;\\n\\n// This version of ROLLUP always results in one record, and simply\\n// combines one record with the next; there is no need to SORT\\n// the input when using this version\\nr := ROLLUP(t,TRUE,MakeStringRec(LEFT,RIGHT,kDelimiter));\\n\\nOUTPUT(r[1].s); // 'S1,S2' as a STRING
\\nFWIW, I tested this with a version 5.2 cluster.\\n\\nDan\", \"post_time\": \"2015-04-28 18:04:13\" },\n\t{ \"post_id\": 7479, \"topic_id\": 772, \"forum_id\": 8, \"post_subject\": \"Re: Concatenate set elements\", \"username\": \"micevepay\", \"post_text\": \"I tried that before and got an error:\\n\\nError: Unexpected operator 'no_range' in: HqlCppTranslator::buildExpr(..3)\", \"post_time\": \"2015-04-28 17:44:50\" },\n\t{ \"post_id\": 7478, \"topic_id\": 772, \"forum_id\": 8, \"post_subject\": \"Re: Concatenate set elements\", \"username\": \"DSC\", \"post_text\": \"You could use this instead:\\n\\nt := DATASET(a[..3],StringRec);
\\nThat will process the first three elements of the set.\\n\\nDan\", \"post_time\": \"2015-04-28 17:05:25\" },\n\t{ \"post_id\": 7477, \"topic_id\": 772, \"forum_id\": 8, \"post_subject\": \"Re: Concatenate set elements\", \"username\": \"micevepay\", \"post_text\": \"What if I only wanted to concatenate a subset of the SET? Like the first 3 of a set of 5?\", \"post_time\": \"2015-04-28 16:42:29\" },\n\t{ \"post_id\": 3472, \"topic_id\": 772, \"forum_id\": 8, \"post_subject\": \"Re: Concatenate set elements\", \"username\": \"sbagaria\", \"post_text\": \"t := DATASET(a,StringRec);
\\n\\nThis is what I was missing. I did not know you could make a dataset out of a set like this.\\n\\nThanks a lot!\", \"post_time\": \"2013-02-16 14:42:03\" },\n\t{ \"post_id\": 3471, \"topic_id\": 772, \"forum_id\": 8, \"post_subject\": \"Re: Concatenate set elements\", \"username\": \"DSC\", \"post_text\": \"Here is one way to do what you're asking. There may be a more efficient way.\\n\\n\\n// Given values\\na := ['S1','S2','S3'];\\nkDelimiter := ',';\\n\\n// Define a record layout to hold both input and output\\nStringRec := RECORD\\n\\tSTRING\\ts;\\nEND;\\n\\n// Simplest way to convert a SET to a DATASET\\nt := DATASET(a,StringRec);\\n\\n// Transform to combine records in the input\\nStringRec MakeStringRec(StringRec l, StringRec r, STRING sep) := TRANSFORM\\n\\tSELF.s := l.s + IF(l.s != '',sep,'') + r.s;\\nEND;\\n\\n// This version of ROLLUP always results in one record, and simply\\n// combines one record with the next; there is no need to SORT\\n// the input when using this version\\nr := ROLLUP(t,TRUE,MakeStringRec(LEFT,RIGHT,kDelimiter));\\n\\nOUTPUT(r[1].s); // 'S1,S2,S3' as a STRING\\n
\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-02-16 14:39:57\" },\n\t{ \"post_id\": 3469, \"topic_id\": 772, \"forum_id\": 8, \"post_subject\": \"Re: Concatenate set elements\", \"username\": \"sbagaria\", \"post_text\": \"For now, I just wrote my own C++ function to do this.\\n\\n\\nEXPORT STRING CONCATENATE(SET OF VARSTRING elements, STRING sep=',') := BEGINC++\\n #include<string.h>\\n\\n (void)isAllElements;\\n\\n // elements is a contiguous array of length lenElements consisting of null-terminated strings\\n char *strElements = (char *) elements;\\n\\n // Compute the number of elements in the set\\n int numElements = 0;\\n for (int i=0; i<lenElements;) {\\n i += strlen(strElements+i)+1; // TODO: Store these values in an array to avoid recomputation\\n numElements++;\\n }\\n __lenResult = lenElements - (numElements); // Strings without the null terminators\\n __lenResult += lenSep * (numElements - 1); // For separators\\n // No need for a null terminator when returning a STRING ECL type.\\n\\n __result = (char *)malloc(__lenResult+1); // +1 for the null terminator at the end when using strcpy\\n int dstidx = 0; // Current pointer in the result string\\n for (int srcidx=0; srcidx<lenElements;) {\\n strcpy(__result+dstidx,strElements+srcidx);\\n int len = strlen(strElements+srcidx);\\n dstidx += len;\\n srcidx += len+1;\\n numElements--;\\n if (numElements>0) {\\n strcpy(__result+dstidx,sep);\\n dstidx += lenSep;\\n }\\n }\\n ENDC++;\\n
\", \"post_time\": \"2013-02-16 13:14:51\" },\n\t{ \"post_id\": 3468, \"topic_id\": 772, \"forum_id\": 8, \"post_subject\": \"Re: Concatenate set elements\", \"username\": \"sbagaria\", \"post_text\": \"On a more abstract level, how can we aggregate or rollup sets of values? One possible way could be to convert them to a dataset and then do a ROLLUP. But I cannot find a way to convert a set to a dataset.\", \"post_time\": \"2013-02-16 11:25:27\" },\n\t{ \"post_id\": 3467, \"topic_id\": 772, \"forum_id\": 8, \"post_subject\": \"Concatenate set elements\", \"username\": \"sbagaria\", \"post_text\": \"I have a set of strings which I want to output inside a dataset. The problem is I would like to be able to concatenate the elements of the set so that they form a single string which can then be output as a string field. I know I can achieve this functionality using the IFELSE statements when the number of elements in the set are expected to be small. But that just results in ugly ECL code. Is there a more elegant way to do this? Perhaps an ECL function which I missed in the manual?\\n\\n\\nA := ['S1','S2','S3'];\\n// OUTPUT(CONCATENATE(A, NAMED separator:=', ')); // How I want my code to look like\\nOUTPUT(A[1]+', '+A[2]+', '+A[3]); // How I want my output to look like\\n
\", \"post_time\": \"2013-02-16 11:19:26\" },\n\t{ \"post_id\": 3486, \"topic_id\": 773, \"forum_id\": 8, \"post_subject\": \"Re: How to push data and index from Thor to Roxie ?\", \"username\": \"DSC\", \"post_text\": \"Hi David,\\n\\nI can't speak to the code working in older versions of the platform and not the newer versions. I've never tried doing that myself. Maybe someone from HPCC can chime in on that one, as I don't know which behavior is the bug (you may have been exploiting something that shouldn't have worked in the first place).\\n\\nI think you're right re: copyResources=FALSE and useRemoteResources=FALSE, though. In that scenario, data would have to be manually migrated from Thor to Roxie. But would that situation actually arise in real life? Is there a use-case where it is beneficial to manually move that data?\\n\\nAlso, note that there is an outstanding bug that makes those configuration parameters not behave quite as advertised. A good description of the bug and the parameters is at http://hpccsystems.com/bb/viewtopic.php?f=14&t=541.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-02-19 12:48:54\" },\n\t{ \"post_id\": 3483, \"topic_id\": 773, \"forum_id\": 8, \"post_subject\": \"Re: How to push data and index from Thor to Roxie ?\", \"username\": \"janssend\", \"post_text\": \"Thank for this answer, \\n\\nI should have said before :\\n\\n[color=#0080BF:2hk0s79r]My main concern is the previously provided ECL scripts work fine on community_3.8.6-4; I got the error message as long as I have updated the HPCC system (and tools) to community_3.10.2-1.\\n\\n\\n\\nI perform data copy from Thor to Roxie because of this paragraph in 'Rapid Data Delivery Engine Reference' (page 9) :\\n\\n'Depending on the configuration, Roxie may read data remotely from a Thor cluster where it was prepared, or if\\npreferred, it may be copied to the Roxie for local access.\\nTypically a development system might refer to data in situ on the Thor cluster, while a production system may prefer\\nthe performance benefits of copying data locally to the Roxie'\\n\\nFrom what you said (and the manual also), the Roxie data access have to be setup by using two parameters : copyResources and useRemoteResources\\nBut unfortunately, I didn't found the following page in the Enterprise Service Platform.\\n \\n[attachment=0:2hk0s79r]file4.png\\n\\nI guess, in case both copyResources and useRemoteResources have been set to FALSE, I have still to manually copy data and index to Roxie ?\\n\\n\\nCheers,\\nDavid\", \"post_time\": \"2013-02-19 09:21:23\" },\n\t{ \"post_id\": 3479, \"topic_id\": 773, \"forum_id\": 8, \"post_subject\": \"Re: How to push data and index from Thor to Roxie ?\", \"username\": \"DSC\", \"post_text\": \"What prompted you to manually perform this copy?\\n\\nWhen you publish a Roxie query, the system automatically copies the referenced data files if needed (this can actually vary somewhat depending on settings in your configuration). At any rate, you normally don't need to do anything special to make a logical file available to Roxie; just reference the same filename from your Thor and Roxie code and you should be good to go.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-02-18 16:13:18\" },\n\t{ \"post_id\": 3478, \"topic_id\": 773, \"forum_id\": 8, \"post_subject\": \"How to push data and index from Thor to Roxie ?\", \"username\": \"janssend\", \"post_text\": \"I have some trouble to push data (and related index) from Thor to Roxie. \\n\\nI build the dataset and the related index on Thor cluster. Once it's done, I copy these files to Roxie in order to use its with roxie queries (FETCH).\\n\\ncommunity_3.10.2-1\\n\\nTHOR ECL SCRIPT :\\n\\nIMPORT Std, Std.File, Std.Str;\\n\\nMyRecord := {INTEGER id, STRING value};\\nMyDataSet := DATASET([{1,'toto'},{2,'titi'}], MyRecord);\\n\\nMyThorData:= DATASET('~thor::data', {MyRecord, UNSIGNED8 fpos {virtual(fileposition)}}, FLAT);\\nMyThorIndex := INDEX(MyThorData,{id, fpos}, '~thor::index');\\n\\nMain := SEQUENTIAL( \\n// create thor file within 2 records \\nOUTPUT(MyDataSet, ,'~thor::data',OVERWRITE),\\n// build thor index on thor file\\nBUILDINDEX(MyThorIndex,OVERWRITE),\\nOUTPUT(FETCH(MyThorData,MyThorIndex(id = 2),\\tRIGHT.fpos)),\\n// copy both thor index and file to roxie cluster\\nSTD.File.Copy('~thor::data','myroxie','~roxie::data', , , , ,TRUE , ,),\\nSTD.File.Copy('~thor::index','myroxie','~roxie::index', , , , ,TRUE , ,),\\n);\\nMain;\\n
\\n\\nFILES \\n[attachment=0:3tdtsur7]file1.png\\n\\nROXIE ECL SCRIPT :\\n\\nIMPORT Std, Std.File, Std.Str;\\n\\nSimpleRecord := {INTEGER id, STRING value};\\nSimpleDSFile:= DATASET('~roxie::data', {SimpleRecord, UNSIGNED8 fpos {virtual(fileposition)}}, FLAT);\\nSimpleIndex := INDEX(SimpleDSFile,{id, fpos}, '~roxie::index');\\n\\nOUTPUT(FETCH(SimpleDSFile,SimpleIndex(id = 2),\\tRIGHT.fpos));\\n
\\n\\nRESULT :\\nError: Query W20130218-165610 is suspended because Could not open file /var/lib/HPCCSystems/hpcc-data/roxie/roxie/index._2_of_2 at any remote location - (0, 0), 1402,\", \"post_time\": \"2013-02-18 16:08:39\" },\n\t{ \"post_id\": 3518, \"topic_id\": 774, \"forum_id\": 8, \"post_subject\": \"Re: how can I get landing zone IP by function ?\", \"username\": \"DSC\", \"post_text\": \"Absolutely! That function works great if the landing zone you're interested in is on the Dali server. It all depends on your configuration.\\n\\nNitpick: If you use that function you may want to use Str.SplitWords() to extract the IP address, just in case the string length of the port number ever changes:\\n\\nIMPORT Std;\\n\\naddressItems := Std.Str.SplitWords(Std.System.Thorlib.DaliServer(),':');\\n\\nipAddress := addressItems[1];\\nport := addressItems[2];\\n\\nOUTPUT(ipAddress,NAMED('ipAddress'));\\nOUTPUT(port,NAMED('port'));
\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-02-21 12:37:35\" },\n\t{ \"post_id\": 3516, \"topic_id\": 774, \"forum_id\": 8, \"post_subject\": \"Re: how can I get landing zone IP by function ?\", \"username\": \"ideal\", \"post_text\": \"Hello Dan,\\n\\nFYI, there is a function that seems convenient, although I just tested it when landing zone and master are on the same server. \\nIt is daliServer() function in Thorlib.\\n\\nTo get exact ip and use it in file names, you just have to do this :\\nchr_ip := daliServer();\\nlong:=LENGTH(chr_ip);\\nip := chr_ip[..long-5];
\\n\\nJM.\", \"post_time\": \"2013-02-21 09:41:07\" },\n\t{ \"post_id\": 3481, \"topic_id\": 774, \"forum_id\": 8, \"post_subject\": \"Re: how can I get landing zone IP by function ?\", \"username\": \"DSC\", \"post_text\": \"[quote="ideal":2l4xl74x]How can I get landing zone ip and path by function ? \\nI don't believe that there are functions that provide this information. Part of the complication here is that any server running the right server process (dafilesrv) can host a landing zone, and you can have multiple landing zones defined.\\nI've tackled this in the past by defining a 'constants file' and setting exported attribute values to the landing zone path and hostname you need. I name the hostname as something generic, then map the generic hostname to a specific host IP in /etc/hosts. That allows you to use the same code in multiple environments.\\nHow can I use it in DATAFILE function ?
\\nI think you're talking about the DATASET() function, where the first argument is a file path. If you really want to reference a file on a landing zone, which means an external file not yet sprayed into your cluster, then see the File.ExternalLogicalFileName() standard library function.\\nHow can I upload a file from my computer and put it in the landing zone ?
\\nThe landing zone is just a directory on some system. You can use any file copy utility that works. I use scp.\\n\\nHope this helps.\\n\\nDan\", \"post_time\": \"2013-02-18 20:24:38\" },\n\t{ \"post_id\": 3480, \"topic_id\": 774, \"forum_id\": 8, \"post_subject\": \"how can I get landing zone IP by function ?\", \"username\": \"ideal\", \"post_text\": \"Hello,\\n\\nHow can I get landing zone ip and path by function ?\\nHow can I use it in DATAFILE function ?\\n\\nHow can I upload a file from my computer and put it in the landing zone ?\\n\\nThe purpose of my question is to avoid setup manually IPs directly in the code and do it automatically. If necessary, I can handle with my pc IP but at least, I would like to get landing zone IP automatically.\\n\\nThanks,\\nJM.\", \"post_time\": \"2013-02-18 17:19:28\" },\n\t{ \"post_id\": 3510, \"topic_id\": 776, \"forum_id\": 8, \"post_subject\": \"Re: Re-building of super-key\", \"username\": \"rtaylor\", \"post_text\": \"OK, that means what I said in this thread (http://hpccsystems.com/bb/viewtopic.php?f=10&t=777&sid=927ed1bdd4e80abcd64d814b052c5451) is what you need to do. Here's the information repeated:\\nFor a development/testbed system, simply un-publishing a query and re-publishing it manually is a perfectly appropriate solution.\\n\\nFor a Production system, however, you don't want any interruption of service to the end-users. That's why we typically configure our Production Roxies with an "extra" offline Roxie that is used to update queries, data, do QA work, etc. before we make the new version available to end-users. That makes the switch to new data/query versions a simple matter of telling the load balancer to start sending all new queries to the Roxie that was just updated, bringing it online and taking the previous version Roxie offline (ready to be updated with even newer stuff). That way, if there is a problem with the new release you can just as easily switch back to the previous version.
HTH,\\n\\nRichard\", \"post_time\": \"2013-02-20 19:18:22\" },\n\t{ \"post_id\": 3498, \"topic_id\": 776, \"forum_id\": 8, \"post_subject\": \"Re: Re-building of super-key\", \"username\": \"kaliyugantagonist\", \"post_text\": \"Hi Richard,\\n\\nMy fear turned real - I'm not able to add the newly created INDEX into the super-key \\n\\n
IMPORT Std;\\n\\nsuperkeyCWCD :=\\t'~hpcc::superkey::cwcd';\\n\\nSEQUENTIAL(\\n//Std.File.StartSuperFileTransaction(),\\nStd.File.AddSuperFile(superkeyCWCD,'~hpcc::index::idxpayload_cwcd_second')\\n//Std.File.FinishSuperFileTransaction()\\n);
\\nIrrespective of the presence of the super-file transaction :\\n\\n000000C0 2013-02-20 15:31:03 27190 27190 CDFAction lock timed out on hpcc::superkey::cwcd\\n000000C1 2013-02-20 15:31:03 27190 27190 CDistributedFileTransaction: Transaction pausing
\\n\\nAs known, the lock/control to the super-key, hence, the indexes is with the Roxie cluster. Therefore, the manipulation of the super-keys and INDEXes has to be initiated by Roxie - Thor workunits will simply get suspended.\\n\\nI foolishly attempted a super-key(super-file) transaction on Roxie and was shooed away:\\n\\nReported by: Roxie\\nMessage: UNIMPLEMENTED at /var/jenkins/workspace/CE-Candidate-3.10.0/CE/ubuntu_12_04_x86_64/HPCC-Platform/roxie/ccd/ccdserver.cpp:30276
\\n\\nTo summarize the scenario/open questions:\\nThor can build new INDEXes but cannot add them to the super-key
\\nI'm not sure if a super-file transaction and manipulation can take place on Roxie(the doc. do not mention anything about this)
\\nIs there a way wherein a request can be made to Roxie from Thor to release temp. the lock on super-key(AND suspend the query/show 'stale' data till the super-key is rebuilt)?
\\n\\nThanks and regards !!!\", \"post_time\": \"2013-02-20 04:58:45\" },\n\t{ \"post_id\": 3496, \"topic_id\": 776, \"forum_id\": 8, \"post_subject\": \"Re: Re-building of super-key\", \"username\": \"rtaylor\", \"post_text\": \"Meanwhile, I just want to confirm(as I don't have the environment access) that in a Thor code itself, I can acquire the lock to the super-key, now also present on Roxie,add this new index and remove the old one.
I can neither confirm nor deny. \\n\\nThat's an "operational" type of question, and my expertise is on the ECL side of things (I am not a hardware/network guy). Try it, and if it doesn't work, then the production scheme outlined in the thread I referenced in my previous post will be your most likely solution.\", \"post_time\": \"2013-02-19 16:43:41\" },\n\t{ \"post_id\": 3495, \"topic_id\": 776, \"forum_id\": 8, \"post_subject\": \"Re: Re-building of super-key\", \"username\": \"kaliyugantagonist\", \"post_text\": \"Hi Richard !\\n\\nI'll try the new index approach that you have suggested.\\n\\nMeanwhile, I just want to confirm(as I don't have the environment access) that in a Thor code itself, I can acquire the lock to the super-key, now also present on Roxie,add this new index and remove the old one.\\n\\nThanks and regards !\", \"post_time\": \"2013-02-19 16:07:58\" },\n\t{ \"post_id\": 3494, \"topic_id\": 776, \"forum_id\": 8, \"post_subject\": \"Re: Re-building of super-key\", \"username\": \"rtaylor\", \"post_text\": \"kaliyugantagonist (is that actually your name?),\\n
I'm not building a new index for each sub-file but simply trying to overwrite the INDEX built on the super-file whenever a new sub-file is sprayed.
Yeah, I got that\\n\\n(Please correct if I'm wrong)As per you, I must create a new index every time and write it in a new file. But I didn't get the point of 'updating' the super-key - does it mean that start a transaction for the super-key, add this new INDEX in it, delete the old one(this would again give locking issue I believe), complete the transaction?Even if my assumption is correct, will I be able to secure the lock for super-key(Will Roxie release it)?
Yes, that's what I was saying -- "updating" the Superkey means using the superfile functions to change the subfile (within a transaction frame) to the new INDEX just built.\\n\\nOf course, this is the same kind of issue raised on this thread: http://hpccsystems.com/bb/viewtopic.php?f=10&t=777&sid=cd85e3e5eb9ddbc85963e39de79aa5cd so there may be considerations of this kind, too.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-02-19 15:58:50\" },\n\t{ \"post_id\": 3493, \"topic_id\": 776, \"forum_id\": 8, \"post_subject\": \"Re: Re-building of super-key\", \"username\": \"kaliyugantagonist\", \"post_text\": \"Hi Richard,\\n\\nThanks for the reply !\\n\\nApologies about I asking the question in plain-text without any code as I don't have the access to the environment at this point of time \\n\\nI'm not building a new index for each sub-file but simply trying to overwrite the INDEX built on the super-file whenever a new sub-file is sprayed.\\n\\n(Please correct if I'm wrong)As per you, I must create a new index every time and write it in a new file. But I didn't get the point of 'updating' the super-key - does it mean that start a transaction for the super-key, add this new INDEX in it, delete the old one(this would again give locking issue I believe), complete the transaction?Even if my assumption is correct, will I be able to secure the lock for super-key(Will Roxie release it)?\\n\\nGiven the high frequency of the new sub-files, Thor would soon have hundreds of new indexes
\\n\\nThanks and regards !\", \"post_time\": \"2013-02-19 15:29:32\" },\n\t{ \"post_id\": 3490, \"topic_id\": 776, \"forum_id\": 8, \"post_subject\": \"Re: Re-building of super-key\", \"username\": \"rtaylor\", \"post_text\": \"Try building the new INDEX to a new filename then update the superkey to use the new INDEX file instead of the old.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-02-19 15:07:12\" },\n\t{ \"post_id\": 3485, \"topic_id\": 776, \"forum_id\": 8, \"post_subject\": \"Re-building of super-key\", \"username\": \"kaliyugantagonist\", \"post_text\": \"Hi,\\n\\nI have a scenario where a Roxie query \\n
/*****ROXIE QUERY*****/\\n\\nExport testSuperKey :=Function\\n\\nLayout_ClientWebCrawlData := RECORD\\nString15 User_ID;\\nVARSTRING1000 URL_Link;\\nVARSTRING URL_Content;\\nREAL URL_Sentiment;\\nVARSTRING URL_Date;\\nINTEGER8 Unique_Search_ID;\\nVARSTRING URL_ContextName;\\nVARSTRING Search_Keyword;\\nINTEGER8 InsertID;\\nVARSTRING45 StatusID;\\nVARSTRING200 Search_Pattern;\\nVARSTRING2000 Word_Ignored;\\nVARSTRING Search_Date;\\nVARSTRING45 Detected_Language;\\nVARSTRING IdeaCloude;\\nVARSTRING ActualContent;\\nEND;\\n\\nString userid := '' : STORED('user_id');\\n\\ncwcdSet := DATASET('~hpcc::superfile::clientwebcrawldata',{Layout_ClientWebCrawlData,UNSIGNED8 fpos {virtual(fileposition)}},thor);\\n//cwcdSet;\\n\\ncwcdPayloadIdxTemp := INDEX (cwcdSet,{User_ID},{URL_Link , URL_Sentiment , InsertID , URL_Content,fpos},'~hpcc::superkey::cwcd');\\n//cwcdPayloadIdxTemp;\\n\\n//ds := FETCH(cwcdSet,cwcdPayloadIdxTemp(user_id=userid),RIGHT.fpos);\\nds := cwcdPayloadIdxTemp(KEYED(user_id=userid));\\nop := output(ds);\\nreturn op;\\n\\nend;
\\n\\nuses a super-file(on Thor) with multiple sub-files. I have built an INDEX on this super-file and added this to super-key. When I publish the Roxie query, only the above INDEX(and not the sub-files)are transferred to Roxie.\\n\\nAs per my understanding, the INDEX must be rebuilt every time on Thor a new sub-file is added to the super-file, else, the Roxie doesn't retrieve the data pertaining to the new sub-file. But the rebuild cannot be achieved as Roxie would not allow me to overwrite the index(it holds a lock). \\n00000017 2013-02-19 21:32:23 28416 28416 ERROR: -1: Graph[4], CDistributedFileDirectory::removeEntry Cannot remove file cwcd::idx as owned by SuperFile(s): cwcd::superkey::key (in item 1)
\\n\\nHow can I 're-build' the super-key so that Roxie query returns the new data too?\\n\\nThanks and regards !\", \"post_time\": \"2013-02-19 11:28:31\" },\n\t{ \"post_id\": 3505, \"topic_id\": 778, \"forum_id\": 8, \"post_subject\": \"Re: Payload Index Usage\", \"username\": \"DSC\", \"post_text\": \"Your already-written index must have that field defined, then. If that's the case then you'll need to include it in this newer INDEX definition. I don't think you can actually use RecPos in a payload index, so you might want to consider removing it and saving some space. But simply including it in your index definition here, now, should be fine.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-02-20 14:28:01\" },\n\t{ \"post_id\": 3504, \"topic_id\": 778, \"forum_id\": 8, \"post_subject\": \"Re: Payload Index Usage\", \"username\": \"ksviswa\", \"post_text\": \"Thanks a lot Dan..\\n\\nIt works but we need to specify RecPos also in the INDEX command i suppose else we get the following error : \\n\\n\\nError: System error: 0: Index layout does not match published layout for index
\\n\\nThanks and Regards\\nksviswa\", \"post_time\": \"2013-02-20 14:13:24\" },\n\t{ \"post_id\": 3502, \"topic_id\": 778, \"forum_id\": 8, \"post_subject\": \"Re: Payload Index Usage\", \"username\": \"DSC\", \"post_text\": \"The first parameter of the INDEX (a dataset) is optional. If you omit it, however, then you have to specify the datatype of all the fields that are in the index and (if I remember correctly) you cannot use variable-length fields.\\n\\nAdjusting your field sizes slightly, and omitting RecPos because you really don't need that in a payload index, you may have something like:\\n\\nStudent_index := INDEX\\n (\\n {\\n STRING5 id;\\n },\\n {\\n STRING10 fname;\\n UNSIGNED1 age;\\n },\\n '~index::students'\\n );
\\n\\nBottom line, payload indexes don't need to reference their source dataset. You can, in fact, create a recordset on the fly, BUILD an index from that, then reference it with something like the above declaration to access it. No base dataset needs to be involved.\\n\\nHope this helps.\\n\\nDan\", \"post_time\": \"2013-02-20 12:51:07\" },\n\t{ \"post_id\": 3500, \"topic_id\": 778, \"forum_id\": 8, \"post_subject\": \"Payload Index Usage\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nI have created a payload index file ‘index::students’ and I simply want to OUTPUT the contents of the payload index file. Given below is the code I am forced to use.\\n\\n// The record structure\\n\\nstudent := RECORD\\n\\nSTRING5 id;\\nSTRING fname;\\nSTRING lname;\\nUNSIGNED age;\\n\\nEND;\\n\\n// The record structure with record pointer\\n\\nRec_with_recpos := {student,UNSIGNED8 RecPos{virtual(fileposition)}};\\n\\n// why do I have to do this ?!\\n\\nStudent_dataset:= DATASET('data::student_data', Rec_with_recpos,thor);\\n\\n//declare the index - the index file index::students has already been built and exists.\\n\\nStudent_index := INDEX(Student_dataset,{id},{fname,age,RecPos},'~index::students');\\n\\n// Ouput the contents of the payload index file\\nOUTPUT(Student_index);\\n\\n
\\n\\nThe problem is that I still have to refer to the logical file('data::student_data') which was used to create the payload index. This looks ugly and un-necessary and I want to know if there is a better way of doing this.\\n\\nRegards\\nksviswa\", \"post_time\": \"2013-02-20 09:50:54\" },\n\t{ \"post_id\": 3681, \"topic_id\": 779, \"forum_id\": 8, \"post_subject\": \"Re: ECL- Row data to columns convertion like PIVOT\", \"username\": \"buptkang\", \"post_text\": \"[quote="rtaylor":1zryzara]Bo,\\n\\nUsing a constantly-changing pivot table of your data is going to present you with major problem each period -- the need to re-write all your query code to use the new RECORD structure and incorporate any new fields that you added. \\n\\nIf it were me designing your ECL solution, here's what I would do:\\n\\n1. Eliminate the pivot table completely.\\n2. Instead, keep the data in its key-value pair format.\\n3. Build two INDEXes to allow searching the data:\\n
4. Write my query processing code to use these two INDEXes to return all the data values.\\n\\nThe advantage of using INDEXes is fast access to the data, which is also LZW compressed. And, most importantly, you will NOT need to re-write your code every period to handle changing RECORD structures.\\n\\nHTH,\\n\\nRichard\\n\\nHi Richard,\\n\\nThe idea is clear enough. \\n\\nThanks a lot. \\n\\nI will try it and give you some feedback later. \\n\\nThanks again\\nBo\", \"post_time\": \"2013-03-08 16:49:15\" },\n\t{ \"post_id\": 3680, \"topic_id\": 779, \"forum_id\": 8, \"post_subject\": \"Re: ECL- Row data to columns convertion like PIVOT\", \"username\": \"rtaylor\", \"post_text\": \"Bo,\\n\\nUsing a constantly-changing pivot table of your data is going to present you with major problem each period -- the need to re-write all your query code to use the new RECORD structure and incorporate any new fields that you added. \\n\\nIf it were me designing your ECL solution, here's what I would do:\\n\\n1. Eliminate the pivot table completely.\\n2. Instead, keep the data in its key-value pair format.\\n3. Build two INDEXes to allow searching the data:\\n
4. Write my query processing code to use these two INDEXes to return all the data values.\\n\\nThe advantage of using INDEXes is fast access to the data, which is also LZW compressed. And, most importantly, you will NOT need to re-write your code every period to handle changing RECORD structures.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-03-08 15:54:25\" },\n\t{ \"post_id\": 3679, \"topic_id\": 779, \"forum_id\": 8, \"post_subject\": \"Re: ECL- Row data to columns convertion like PIVOT\", \"username\": \"buptkang\", \"post_text\": \"[quote="bforeman":1ktaauqt]Hi Bo,\\n\\nWith a FUNCTION, the parameters need to be specific valuetypes (STRING, INTEGER, DATASET, etc.), where a MACRO can be more generalized in regards to the arguments. \\nA MACRO is essentially an ECL code generator, and what you pass in to the MACRO must result in generating valid ECL code, so usually you would write a working model first and then begin to "macroize" the process.\\n\\nThere is even a new FUNCTIONMACRO that combines the best of both structures. \\n\\nYes, for a dynamic row to column conversion you would need to consider one of these options, but if I may ask before we continue, what are you trying to do with your big data project, and why is this a requirement?\\n\\nI've chatted with a couple of colleagues, and most of their row to column processes always target a specific DATASET, so there was never a need to make it generic.\\n\\nRegards,\\n\\nBob\\n\\nHi Bob, \\n\\nMy input data format is n by 3 DATASET\\n(attributeID(VARSTRING) rowID(VARSTRING) attributeValue(REAL))
\\n \\nThe data will be like\\nAGE, Terry, 24\\nGender, Lee, 0\\nPayment, Terry, 25.89\\n.....
\\n\\nWe want to convert it like:\\n \\n AGE Gender Payment\\nTerry 24 *** 25.89\\nLee *** 0 ***\\n
\\n\\n*** means null.\\n\\n\\nThe dataset will have dynamical number of AttributeIDs, therefore I need to decide the structure of the inverted three column DATASET, and then do the JOIN and ROLLUP operation to transform all the data into the new data structure.\\n\\nOur dataset will be retrieved once per a period, then every time we get a new DATASET, we need to do JOIN manipulation on existing DATASET with the new coming DATASET.\\n\\nWith Thanks and Regards\\nBo\", \"post_time\": \"2013-03-08 15:10:41\" },\n\t{ \"post_id\": 3675, \"topic_id\": 779, \"forum_id\": 8, \"post_subject\": \"Re: ECL- Row data to columns convertion like PIVOT\", \"username\": \"bforeman\", \"post_text\": \"Hi Bo,\\n\\nWith a FUNCTION, the parameters need to be specific valuetypes (STRING, INTEGER, DATASET, etc.), where a MACRO can be more generalized in regards to the arguments. \\nA MACRO is essentially an ECL code generator, and what you pass in to the MACRO must result in generating valid ECL code, so usually you would write a working model first and then begin to "macroize" the process.\\n\\nThere is even a new FUNCTIONMACRO that combines the best of both structures. \\n\\nYes, for a dynamic row to column conversion you would need to consider one of these options, but if I may ask before we continue, what are you trying to do with your big data project, and why is this a requirement?\\n\\nI've chatted with a couple of colleagues, and most of their row to column processes always target a specific DATASET, so there was never a need to make it generic.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-03-08 14:17:53\" },\n\t{ \"post_id\": 3654, \"topic_id\": 779, \"forum_id\": 8, \"post_subject\": \"Re: ECL- Row data to columns convertion like PIVOT\", \"username\": \"buptkang\", \"post_text\": \"Hey,\\n\\nThe difficult session is to dynamically decide the structure of RECORD based on the DATASET. \\n\\nAs I am still learning the ECL, should I write FUNCTION or MACRO? Till now, I am unclear what is the difference between those two?\\n\\nIf providing some sample code will be so helpful for me.\\n\\nThanks\\nBo\", \"post_time\": \"2013-03-06 22:27:59\" },\n\t{ \"post_id\": 3652, \"topic_id\": 779, \"forum_id\": 8, \"post_subject\": \"Re: ECL- Row data to columns convertion like PIVOT\", \"username\": \"buptkang\", \"post_text\": \"[quote="bforeman":3phjz851]I know this post had a similar feel to another \\nTake a look at this thread, I think it is related:\\n\\nhttp://hpccsystems.com/bb/viewtopic.php?f=8&t=502&p=2269&hilit=row+to+column&sid=c6039ea902c6ce725292bfcf6d5b4b98&sid=c3f3d1e96f5e1e79facbf5b706c7636c#p2269\\n\\nI'm guessing that you probaly want to write a FUNCTION that passes a DATASET as a parameter, and then returns the DATASET in the converted column format.\\n\\nRegards,\\n\\nBob\\n\\nHi Bob, \\n\\nIf possible, could you give me some working sample code based on your previous example?\\n\\nThanks\\nBo\", \"post_time\": \"2013-03-06 21:51:57\" },\n\t{ \"post_id\": 3519, \"topic_id\": 779, \"forum_id\": 8, \"post_subject\": \"Re: ECL- Row data to columns convertion like PIVOT\", \"username\": \"bforeman\", \"post_text\": \"I know this post had a similar feel to another
\\nTake a look at this thread, I think it is related:\\n\\nhttp://hpccsystems.com/bb/viewtopic.php?f=8&t=502&p=2269&hilit=row+to+column&sid=c6039ea902c6ce725292bfcf6d5b4b98&sid=c6039ea902c6ce725292bfcf6d5b4b98#p2269\\n\\nI'm guessing that you probaly want to write a FUNCTION that passes a DATASET as a parameter, and then returns the DATASET in the converted column format.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-02-21 12:53:32\" },\n\t{ \"post_id\": 3512, \"topic_id\": 779, \"forum_id\": 8, \"post_subject\": \"Re: ECL- Row data to columns convertion like PIVOT\", \"username\": \"buptkang\", \"post_text\": \"Thanks Bob, \\n\\nContinually, if I cannot statically determine the number of attributes. For instance, I do not know the number of "something" only after computing the list of attributes from dataset each time. Then how could I dynamically create my record structure to store the data?\\n\\nThanks\\nBo\", \"post_time\": \"2013-02-20 20:49:04\" },\n\t{ \"post_id\": 3511, \"topic_id\": 779, \"forum_id\": 8, \"post_subject\": \"Re: ECL- Row data to columns convertion like PIVOT\", \"username\": \"bforeman\", \"post_text\": \"Here's one way to do it:\\n\\n
/* id name value\\n ------------------------------\\n 0 timezone Europe/London\\n 0 language en\\n 0 country 45\\n 0 something x\\n 1 timezone Europe/Paris\\n 1 language fr\\n 1 country 46\\n \\n convert to:\\n \\n id timezone language country something\\n ---------------------------------------------------\\n 0 Europe/London en 45 x\\n 1 Europe/Paris fr 46\\n*/\\nVal_Rec := RECORD\\n UNSIGNED1 rid := 0;\\n STRING10 name := '';\\n STRING15 myvalue := '';\\nEND;\\n\\nout_rec := RECORD\\n UNSIGNED1 rid := 0;\\n STRING15 timezone := '';\\n STRING2 language := '';\\n STRING2 country := '';\\n STRING1 something := '';\\n END; \\n\\nd := dataset([{0,'timezone','Europe/London'},\\n {0,'language','en'},\\n {0,'country','45'},\\n {0,'something','x'},\\n {1,'timezone','Europe/Paris'},\\n {1,'language','fr'},\\n {1,'country','46'}\\n ],Val_Rec);\\n\\t\\t\\t\\t\\t\\t\\t\\nid_list := DEDUP(PROJECT(d,out_rec));\\nid_list;\\n\\nout_rec BuildIt(out_rec L,val_rec R) := TRANSFORM\\n SELF.timezone := IF (R.name = 'timezone',R.myvalue,L.timezone);\\n\\tSELF.language := IF (R.name = 'language',R.myvalue,L.language);\\n\\tSELF.country := IF (R.name = 'country',R.myvalue,L.country);\\n\\tSELF.something := IF (R.name = 'something',R.myvalue,L.something);\\n\\tSELF := L;\\n END; \\n\\n\\nCol_Out := JOIN(id_list,d,LEFT.Rid = RIGHT.RID,BuildIt(LEFT,RIGHT));\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t \\n\\ncol_out Rollem(col_out L,col_out R) := TRANSFORM\\n SELF.timezone := IF (R.timezone <> '',R.timezone,L.timezone);\\n\\tSELF.language := IF (R.language <> '',R.language,L.language);\\n\\tSELF.country := IF (R.country <> '',R.country,L.country);\\n\\tSELF.something := IF (R.something <> '',R.something,L.something);\\n\\tSELF := L;\\n END;\\n\\nFinal_out := ROLLUP(col_out,LEFT.Rid = RIGHT.RID,Rollem(LEFT,RIGHT));\\nFinal_out;
\\n\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-02-20 20:17:38\" },\n\t{ \"post_id\": 3509, \"topic_id\": 779, \"forum_id\": 8, \"post_subject\": \"Re: ECL- Row data to columns convertion like PIVOT\", \"username\": \"buptkang\", \"post_text\": \"Hi Bob, \\n\\nAs I am still new toward ECL, could you give me some more detailed peusdo code to refer to?\\n\\nFor instance, my original data format is:\\n\\nattributeID, indexID, attributeValue\\nattr1 index attr1Value\\n\\nI want to build a Table or Matrix by fitting to data format\\nindex attr1 attr1Value attr2 attr2Value....\\n\\nThanks\\nBo\", \"post_time\": \"2013-02-20 16:42:57\" },\n\t{ \"post_id\": 3508, \"topic_id\": 779, \"forum_id\": 8, \"post_subject\": \"Re: ECL- Row data to columns convertion like PIVOT\", \"username\": \"bforeman\", \"post_text\": \"Yep, ECL can handle this pretty well.\\n\\nFirst, use PROJECT, followed by DEDUP and DISTRIBUTE to build the column lists. \\n\\nAfter the lists are built use multiple LEFT OUTER JOINS to combine those lists. The output of the first JOIN is used as the input to the subsequent JOIN, etc.\\n\\nHopefully this is enough to get you started \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-02-20 15:55:42\" },\n\t{ \"post_id\": 3503, \"topic_id\": 779, \"forum_id\": 8, \"post_subject\": \"ECL- Row data to columns convertion like PIVOT\", \"username\": \"buptkang\", \"post_text\": \"Hello there, \\n\\nCould somebody here give me some hints how could I transform row data to columns using ECL?\\n\\nThe relevant link is like this:\\n\\n
http://stackoverflow.com/questions/10925445/mysql-select-dynamic-row-values-as-column-names-another-column-as-value
\\n\\nAlso, does ECL RECORD provide mechanism to support variable length of members in the structure. What I want to do is to dynamically build a dataset with multiple variable length columns based on existing row values?\\n\\nThanks a lot\\nBo\", \"post_time\": \"2013-02-20 14:08:57\" },\n\t{ \"post_id\": 3517, \"topic_id\": 782, \"forum_id\": 8, \"post_subject\": \"Passing parameters to standalone executable\", \"username\": \"ideal\", \"post_text\": \"Hello,\\n\\nI would like to pass parameters to an ecl compiled executable.\\nThe only way I see would be to create c++ source code with eclcc -S option and then create a main by trying to connect external parameters with internal variables.\\n\\nIs there something more hard wired in ECL to do it simply ?\\n\\nThanks,\\nJM.\", \"post_time\": \"2013-02-21 11:36:05\" },\n\t{ \"post_id\": 3548, \"topic_id\": 783, \"forum_id\": 8, \"post_subject\": \"Re: WHEN Function in ECL\", \"username\": \"Pradeep\", \"post_text\": \"Thank You Bob and Dan!!\", \"post_time\": \"2013-02-26 05:00:59\" },\n\t{ \"post_id\": 3532, \"topic_id\": 783, \"forum_id\": 8, \"post_subject\": \"Re: WHEN Function in ECL\", \"username\": \"DSC\", \"post_text\": \"Here is another example that may help. The goal here is to write to a log file at a certain point in the execution of a function.\\n\\nIMPORT Std;\\n\\nSomeRec := RECORD\\n UNSIGNED n;\\nEND;\\n\\nAddOne(DATASET(SomeRec) ds) := FUNCTION\\n SomeRec XForm(SomeRec l) := TRANSFORM\\n SELF.n := l.n + 1;\\n END;\\n \\n t := PROJECT(ds,XForm(LEFT));\\n \\n result := WHEN(t,EVALUATE(Std.System.Log.dbglog('*** value=' + t[1].n + ' ***')));\\n \\n RETURN result;\\nEND;\\n\\ninDS := DATASET([5],SomeRec);\\n\\nOUTPUT(AddOne(inDS));
\\n\\nExcerpted from the eclagent log:\\n\\n0000000A 2013-02-22 07:33:25 11125 11125 Executing hthor graph graph1\\n0000000B 2013-02-22 07:33:25 11125 11125 Executing subgraph 3\\n0000000C 2013-02-22 07:33:25 11125 11125 Executing subgraph 1\\n0000000D 2013-02-22 07:33:25 11125 11125 *** value=6 ***\\n0000000E 2013-02-22 07:33:25 11125 11125 Completed subgraph 1\\n0000000F 2013-02-22 07:33:26 11125 11125 Completed subgraph 3\\n
\\n\\nWhat isn't clearly documented is that WHEN() seems to return its first value. You can leverage that to insert side-effect actions.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-02-22 13:38:27\" },\n\t{ \"post_id\": 3531, \"topic_id\": 783, \"forum_id\": 8, \"post_subject\": \"Re: WHEN Function in ECL\", \"username\": \"bforeman\", \"post_text\": \"In the submitted example, DS represents a recordset trigger. As the recordset is successfully executed, the OUTPUT action O is then executed. \\n\\nIt is a way to ensure in ECL that an action will be successful based on the trigger of another successful recordset definition.\\n\\nHope this helps,\\n\\nBob\", \"post_time\": \"2013-02-22 13:16:46\" },\n\t{ \"post_id\": 3529, \"topic_id\": 783, \"forum_id\": 8, \"post_subject\": \"WHEN Function in ECL\", \"username\": \"Pradeep\", \"post_text\": \"Hi, i'm trying to understand this function WHEN but there is only one example and it is confusing me. Please help me to understand this as i'm not getting any clear picture on this.\\n\\n\\n//a FUNCTION with side-effect Action\\nnamesTable := FUNCTION\\nnamesRecord := RECORD\\nSTRING20 surname;\\nSTRING10 forename;\\nINTEGER2 age := 25;\\nEND;\\no := OUTPUT('namesTable used by user <x>');\\nds := DATASET([{'x','y',22}],namesRecord);\\nRETURN WHEN(ds,O);\\nEND;\\nz := namesTable : PERSIST('z');\\n//the PERSIST causes the side-effect action to execute only when the PERSIST is re-built\\nOUTPUT(z);\\n
\", \"post_time\": \"2013-02-22 07:07:55\" },\n\t{ \"post_id\": 3536, \"topic_id\": 784, \"forum_id\": 8, \"post_subject\": \"Re: qualified names in XML datasets\", \"username\": \"tdelbecque\", \"post_text\": \"Thank you for this quick answer !\\n\\nThierry.\", \"post_time\": \"2013-02-22 16:06:47\" },\n\t{ \"post_id\": 3535, \"topic_id\": 784, \"forum_id\": 8, \"post_subject\": \"Re: qualified names in XML datasets\", \"username\": \"bforeman\", \"post_text\": \"This is why I think the HPCC VM is so cool. I was able to test very quickly across multiple versions.\\n\\nYour original code that uses the tag:cid in XPATH works in version 3.6.2, but returns no data on all versions after that. I will open an issue and post back here.\\n\\nhttps://track.hpccsystems.com/browse/HPCC-8845\\n\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-02-22 14:42:18\" },\n\t{ \"post_id\": 3534, \"topic_id\": 784, \"forum_id\": 8, \"post_subject\": \"Re: qualified names in XML datasets\", \"username\": \"bforeman\", \"post_text\": \"I can confirm your report here as well, it looks like a possible bug in the latest version, checking some back versions and development.\\n\\nThanks for the report!\\n\\nBob\", \"post_time\": \"2013-02-22 14:15:45\" },\n\t{ \"post_id\": 3530, \"topic_id\": 784, \"forum_id\": 8, \"post_subject\": \"qualified names in XML datasets\", \"username\": \"tdelbecque\", \"post_text\": \"Hello, \\n\\ndoe's somebody know how qualified names are handled in XML data sets ?\\n\\nWhen I try to read the following dataset:\\n\\n<tag:doc xmlns:tag="http://www.sodad.com/xml/tag/dtd">\\n<tag:meta><tag:cid>270967</tag:cid></tag:meta>\\n<tag:meta><tag:cid>270968</tag:cid></tag:meta>\\n</tag:doc>\\n\\nwith this code:\\n\\nR := RECORD\\n STRING cid {XPATH('tag:cid')};\\nEND;\\n\\nds := DATASET ('~thy::test::barxml', R, XML('tag:doc/tag:meta'));\\n\\nOUTPUT(ds);\\n\\n\\nI get the following error: Namespace prefix tag on cid is not defined\\n\\nBut when I remove the qualifier from cid:\\n\\n<tag:doc xmlns:tag="http://www.sodad.com/xml/tag/dtd">\\n<tag:meta><cid>270967</cid></tag:meta>\\n<tag:meta><cid>270968</cid></tag:meta>\\n</tag:doc>\\n\\n\\n and from the XPATH spec:\\n\\nR := RECORD\\n STRING cid {XPATH('cid')};\\nEND;\\n\\nds := DATASET ('~thy::test::barxml', R, XML('tag:doc/tag:meta'));\\n\\nOUTPUT(ds);\\n\\n\\nthen it works. What am I missing there ?\\n\\nThanks, Thierry.\", \"post_time\": \"2013-02-22 11:54:07\" },\n\t{ \"post_id\": 3565, \"topic_id\": 786, \"forum_id\": 8, \"post_subject\": \"Re: KEYED INDEX Clarity\", \"username\": \"bforeman\", \"post_text\": \"As I mentioned in my earlier post, because you are referencing all elements (fields) of the INDEX, there is no need to use KEYED in that instance, but it looks like you are getting a slightly different error now. \\n\\nWhat is your target cluster, are you running this on THOR, or publishing to ROXIE?\\n\\nThe EMPTY message seems to indicate that your INDEX is empty or hasn't been built yet.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-02-27 13:04:52\" },\n\t{ \"post_id\": 3559, \"topic_id\": 786, \"forum_id\": 8, \"post_subject\": \"Re: KEYED INDEX Clarity\", \"username\": \"prachi\", \"post_text\": \"Hi Bob,\\n\\nThanks for your response!\\nAs suggested by you i have made changes but error still persists.\\n\\nCode1:\\nIMPORT STD;\\n\\nExport Superkey_TweetsMonitor_AVG_TweetsSentiment():=Function\\nSTRING100 user_id := '' : STORED('UserID');\\nSTRING500 parent_TweetUniqueID:= '' : STORED('ParentTweetUniqueID');\\nINTEGER8 twitter_UniqueID:= 0 : STORED('TwitterUniqueID');\\n\\nindexfile_name := '~thor::tweetsmonitor::idx';\\n\\n// create index on superfile\\n\\nIDX_SuperFile := INDEX({STRING100 UserID,INTEGER8 TwitterUniqueID,STRING500 ParentTweetUniqueID},\\n{STRING5000 TweetsText,STRING1000 TweetedBy,STRING1000 TweetsDate,STRING2000 TweetsLocation,\\nSTRING200 TweetsCountry,DECIMAL3_2 TweetsSentiment,STRING5000 Retweets,STRING2000 TweetsID,\\nSTRING50 TweetsType,INTEGER8 TwitterUserID,INTEGER8 NoOfFollowers,INTEGER8 NoOfFriends,UNSIGNED8 fpos},indexfile_name);\\n\\nds := IDX_SuperFile(KEYED(UserID=user_id) AND KEYED(ParentTweetUniqueID=parent_TweetUniqueID) OR \\nKEYED(TwitterUniqueID=twitter_UniqueID));\\n\\nnewRec := RECORD\\nAVG_TweetsSentiment := AVE( GROUP , ds.TweetsSentiment );\\nEND;\\n\\nds_tbl := TABLE(ds,newRec);\\n\\nRETURN ds_tbl;\\nEND;\\n\\n\\n\\n
\\n\\n\\nCode2:\\nIMPORT STD;\\n\\nExport Superkey_TweetsMonitor_AVG_TweetsSentiment():=Function\\nSTRING100 user_id := '' : STORED('UserID');\\nSTRING500 parent_TweetUniqueID:= '' : STORED('ParentTweetUniqueID');\\nINTEGER8 twitter_UniqueID:= 0 : STORED('TwitterUniqueID');\\n\\nindexfile_name := '~thor::tweetsmonitor::idx';\\n\\n// create index on superfile\\n\\nIDX_SuperFile := INDEX({STRING100 UserID,INTEGER8 TwitterUniqueID,STRING500 ParentTweetUniqueID},\\n{STRING5000 TweetsText,STRING1000 TweetedBy,STRING1000 TweetsDate,STRING2000 TweetsLocation,\\nSTRING200 TweetsCountry,DECIMAL3_2 TweetsSentiment,STRING5000 Retweets,STRING2000 TweetsID,\\nSTRING50 TweetsType,INTEGER8 TwitterUserID,INTEGER8 NoOfFollowers,INTEGER8 NoOfFriends,UNSIGNED8 fpos},indexfile_name);\\n\\nds := IDX_SuperFile(KEYED(UserID=user_id) , KEYED(ParentTweetUniqueID=parent_TweetUniqueID) OR \\nKEYED(TwitterUniqueID=twitter_UniqueID));\\n\\nnewRec := RECORD\\nAVG_TweetsSentiment := AVE( GROUP , ds.TweetsSentiment );\\nEND;\\n\\nds_tbl := TABLE(ds,newRec);\\n\\nRETURN ds_tbl;\\nEND;\\n\\n\\n\\n
\\n\\n\\nError:\\nError: KEYED(INDEX(_EMPTY_(RECORD (24, 11 - D:\\\\HPCC_data\\\\files\\\\Sapphire\\\\Superkey_TweetsMonitor_AVG_TweetsSentiment.ecl)\\nWarning: userid;\\nWarning: twitteruniqueid;\\nWarning: parenttweetuniqueid;\\nWarning: tweetstext;\\nWarning: tweetedby;\\nWarning: tweetsdate;\\nWarning: tweetslocation;\\nWarning: tweetscountry;\\nWarning: tweetssentiment;\\nWarning: retweets;\\nWarning: tweetsid;\\nWarning: tweetstype;\\nWarning: twitteruserid;\\nWarning: nooffollowers;\\nWarning: nooffriends;\\nWarning: fpos;\\nWarning: END), { userid, twitteruniqueid, parenttweetuniqueid }, RECORD\\nWarning: tweetstext;\\nWarning: tweetedby;\\nWarning: tweetsdate;\\nWarning: tweetslocation;\\nWarning: tweetscountry;\\nWarning: tweetssentiment;\\nWarning: retweets;\\nWarning: tweetsid;\\nWarning: tweetstype;\\nWarning: twitteruserid;\\nWarning: nooffollowers;\\nWarning: nooffriends;\\nWarning: END, '~thor::tweetsmonitor::idx').userid = STORED('userid')) could not be implemented by the key
\\n\\n\\nNeed solution for this error.\\n\\nThanks and Regards!!\", \"post_time\": \"2013-02-27 05:38:03\" },\n\t{ \"post_id\": 3545, \"topic_id\": 786, \"forum_id\": 8, \"post_subject\": \"Re: KEYED INDEX Clarity\", \"username\": \"bforeman\", \"post_text\": \"For 1.:\\n\\nBased on the error that you are getting:\\n\\nChange \\n\\nds := IDX_SuperFile(KEYED(UserID=user_id AND ParentTweetUniqueID=parent_TweetUniqueID OR TwitterUniqueID=twitter_UniqueID ));\\n
\\n\\nTo:\\n\\nds := IDX_SuperFile(KEYED(UserID=user_id),\\n KEYED(ParentTweetUniqueID=parent_TweetUniqueID) OR \\n\\t KEYED(TwitterUniqueID=twitter_UniqueID));
\\n\\nKEYED and WILD are essentially needed when you are filtering on an INDEX lower component, and do not care about the upper component. So you would WILD the upper key component and use KEYED for the lower component. Looking at your INDEX, since you are filtering on all of the INDEX components I don't think you would need to use KEYED.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-02-25 14:07:52\" },\n\t{ \"post_id\": 3542, \"topic_id\": 786, \"forum_id\": 8, \"post_subject\": \"KEYED INDEX Clarity\", \"username\": \"prachi\", \"post_text\": \"Hi,\\n\\nI need to use INDEX with KEYED option created on Superfile.\\nI tried different scenario:\\n\\n1.Using INDEX with KEYED \\nIMPORT STD;\\n\\nExport Superkey_TweetsMonitor_AVG_TweetsSentiment():=Function\\nSTRING100 user_id := '' : STORED('UserID');\\nSTRING500 parent_TweetUniqueID:= '' : STORED('ParentTweetUniqueID');\\nINTEGER8 twitter_UniqueID:= 0 : STORED('TwitterUniqueID');\\n\\nindexfile_name := '~thor::tweetsmonitor::idx';\\n\\n// create index on superfile\\n\\nIDX_SuperFile := INDEX({STRING100 UserID,INTEGER8 TwitterUniqueID,STRING500 ParentTweetUniqueID},\\n{STRING5000 TweetsText,STRING1000 TweetedBy,STRING1000 TweetsDate,STRING2000 TweetsLocation,\\nSTRING200 TweetsCountry,DECIMAL3_2 TweetsSentiment,STRING5000 Retweets,STRING2000 TweetsID,\\nSTRING50 TweetsType,INTEGER8 TwitterUserID,INTEGER8 NoOfFollowers,INTEGER8 NoOfFriends,UNSIGNED8 fpos},indexfile_name);\\n\\nds := IDX_SuperFile(KEYED(UserID=user_id AND ParentTweetUniqueID=parent_TweetUniqueID OR TwitterUniqueID=twitter_UniqueID ));\\n\\nnewRec := RECORD\\nAVG_TweetsSentiment := AVE( GROUP , ds.TweetsSentiment );\\nEND;\\n\\nds_tbl := TABLE(ds,newRec);\\n\\nRETURN ds_tbl;\\nEND;\\n
\\n\\n2.Using FETCH with INDEX(with KEYED)\\nIMPORT STD;\\n\\nExport Superkey_TweetsMonitor_AVG_TweetsSentiment_trial():=Function\\nSTRING100 user_id := '' : STORED('UserID');\\nSTRING500 parent_TweetUniqueID:= '' : STORED('ParentTweetUniqueID');\\nINTEGER8 twitter_UniqueID:= 0 : STORED('TwitterUniqueID');\\n\\nindexfile_name := '~thor::tweetsmonitor::idx';\\nSuperFile_name :='~thor::tweetsmonitor::superfile';\\n\\nLayout_TweetsMonitoring := RECORD\\nSTRING100 UserID;\\nSTRING5000 TweetsText;\\nSTRING1000 TweetedBy;\\nSTRING1000 TweetsDate;\\nSTRING2000 TweetsLocation;\\nSTRING200 TweetsCountry;\\nDECIMAL3_2 TweetsSentiment;\\nSTRING5000 Retweets;\\nINTEGER8 TwitterUniqueID;\\nSTRING2000 TweetsID;\\nSTRING50 TweetsType;\\nSTRING500 ParentTweetUniqueID;\\nINTEGER8 TwitterUserID;\\nINTEGER8 NoOfFollowers;\\nINTEGER8 NoOfFriends;\\nEND;\\n\\nSuperFile_Dataset := DATASET(SuperFile_name,{Layout_TweetsMonitoring,UNSIGNED8 fpos {virtual(fileposition)}},THOR);\\n\\nIDX_SuperFile := INDEX({STRING100 UserID,INTEGER8 TwitterUniqueID,STRING500 ParentTweetUniqueID},\\n{STRING5000 TweetsText,STRING1000 TweetedBy,STRING1000 TweetsDate,STRING2000 TweetsLocation,\\nSTRING200 TweetsCountry,DECIMAL3_2 TweetsSentiment,STRING5000 Retweets,STRING2000 TweetsID,\\nSTRING50 TweetsType,INTEGER8 TwitterUserID,INTEGER8 NoOfFollowers,INTEGER8 NoOfFriends,UNSIGNED8 fpos},indexfile_name);\\n\\nfetched_records := FETCH(SuperFile_Dataset, IDX_SuperFile(KEYED(UserID=user_id AND ParentTweetUniqueID=parent_TweetUniqueID OR TwitterUniqueID=twitter_UniqueID))\\n, RIGHT.fpos);\\n\\nnewRec := RECORD\\nAVG_TweetsSentiment := AVE( GROUP , fetched_records.TweetsSentiment );\\nEND;\\n\\nds_tbl := TABLE(fetched_records,newRec);\\n\\nRETURN ds_tbl;\\nEND;
\\n\\n\\nBut it fails and Error is:\\nError: Cannot OR together conditions on multiple key fields (KEYED(INDEX(_EMPTY_(RECORD (39, 45 - D:\\\\HPCC_data\\\\files\\\\Sapphire\\\\Superkey_TweetsMonitor_AVG_TweetsSentiment_trial.ecl)\\nWarning: userid;\\nWarning: twitteruniqueid;\\nWarning: parenttweetuniqueid;\\nWarning: tweetstext;\\nWarning: tweetedby;\\nWarning: tweetsdate;\\nWarning: tweetslocation;\\nWarning: tweetscountry;\\nWarning: tweetssentiment;\\nWarning: retweets;\\nWarning: tweetsid;\\nWarning: tweetstype;\\nWarning: twitteruserid;\\nWarning: nooffollowers;\\nWarning: nooffriends;\\nWarning: fpos;\\nWarning: END), { userid, twitteruniqueid, parenttweetuniqueid }, RECORD\\nWarning: tweetstext;\\nWarning: tweetedby;\\nWarning: tweetsdate;\\nWarning: tweetslocation;\\nWarning: tweetscountry;\\nWarning: tweetssentiment;\\nWarning: retweets;\\nWarning: tweetsid;\\nWarning: tweetstype;\\nWarning: twitteruserid;\\nWarning: nooffollowers;\\nWarning: nooffriends;\\nWarning: END, '~thor::tweetsmonitor::idx').userid = STORED('userid') AND INDEX(_EMPTY_(RECORD\\nWarning: userid;\\nWarning: twitteruniqueid;\\nWarning: parenttweetuniqueid;\\nWarning: tweetstext;\\nWarning: tweetedby;\\nWarning: tweetsdate;\\nWarning: tweetslocation;\\nWarning: tweetscountry;\\nWarning: tweetssentiment;\\nWarning: retweets;\\nWarning: tweetsid;\\nWarning: tweetstype;\\nWarning: twitteruserid;\\nWarning: nooffollowers;\\nWarning: nooffriends;\\nWarning: fpos;\\nWarning: END), { userid, twitteruniqueid, parenttweetuniqueid }, RECORD\\nWarning: tweetstext;\\nWarning: tweetedby;\\nWarning: tweetsdate;\\nWarning: tweetslocation;\\nWarning: tweetscountry;\\nWarning: tweetssentiment;\\nWarning: retweets;\\nWarning: tweetsid;\\nWarning: tweetstype;\\nWarning: twitteruserid;\\nWarning: nooffollowers;\\nWarning: nooffriends;\\nWarning: END, '~thor::tweetsmonitor::idx').parenttweetuniqueid = STORED('parenttweetuniqueid') OR INDEX(_EMPTY_(RECORD\\nWarning: userid;\\nWarning: twitteruniqueid;\\nWarning: parenttweetuniqueid;\\nWarning: tweetstext;\\nWarning: tweetedby;\\nWarning: tweetsdate;\\nWarning: tweetslocation;\\nWarning: tweetscountry;\\nWarning: tweetssentiment;\\nWarning: retweets;\\nWarning: tweetsid;\\nWarning: tweetstype;\\nWarning: twitteruserid;\\nWarning: nooffollowers;\\nWarning: nooffriends;\\nWarning: fpos;\\nWarning: END), { userid, twitteruniqueid, parenttweetuniqueid }, RECORD\\nWarning: tweetstext;\\nWarning: tweetedby;\\nWarning: tweetsdate;\\nWarning: tweetslocation;\\nWarning: tweetscountry;\\nWarning: tweetssentiment;\\nWarning: retweets;\\nWarning: tweetsid;\\nWarning: tweetstype;\\nWarning: twitteruserid;\\nWarning: nooffollowers;\\nWarning: nooffriends;\\nWarning: END, '~thor::tweetsmonitor::idx').twitteruniqueid - 9223372036854775808 = STORED('twitteruniqueid')))
\\n\\n\\nBut following scenario works(without Error)\\n3.Using FETCH with INDEX( without KEYED)\\nIMPORT STD;\\n\\nExport Superkey_TweetsMonitor_AVG_TweetsSentiment_trial():=Function\\nSTRING100 user_id := '' : STORED('UserID');\\nSTRING500 parent_TweetUniqueID:= '' : STORED('ParentTweetUniqueID');\\nINTEGER8 twitter_UniqueID:= 0 : STORED('TwitterUniqueID');\\n\\nindexfile_name := '~thor::tweetsmonitor::idx';\\nSuperFile_name :='~thor::tweetsmonitor::superfile';\\n\\nLayout_TweetsMonitoring := RECORD\\nSTRING100 UserID;\\nSTRING5000 TweetsText;\\nSTRING1000 TweetedBy;\\nSTRING1000 TweetsDate;\\nSTRING2000 TweetsLocation;\\nSTRING200 TweetsCountry;\\nDECIMAL3_2 TweetsSentiment;\\nSTRING5000 Retweets;\\nINTEGER8 TwitterUniqueID;\\nSTRING2000 TweetsID;\\nSTRING50 TweetsType;\\nSTRING500 ParentTweetUniqueID;\\nINTEGER8 TwitterUserID;\\nINTEGER8 NoOfFollowers;\\nINTEGER8 NoOfFriends;\\nEND;\\n\\nSuperFile_Dataset := DATASET(SuperFile_name,{Layout_TweetsMonitoring,UNSIGNED8 fpos {virtual(fileposition)}},THOR);\\n\\nIDX_SuperFile := INDEX({STRING100 UserID,INTEGER8 TwitterUniqueID,STRING500 ParentTweetUniqueID},\\n{STRING5000 TweetsText,STRING1000 TweetedBy,STRING1000 TweetsDate,STRING2000 TweetsLocation,\\nSTRING200 TweetsCountry,DECIMAL3_2 TweetsSentiment,STRING5000 Retweets,STRING2000 TweetsID,\\nSTRING50 TweetsType,INTEGER8 TwitterUserID,INTEGER8 NoOfFollowers,INTEGER8 NoOfFriends,UNSIGNED8 fpos},indexfile_name);\\n\\nfetched_records := FETCH(SuperFile_Dataset, IDX_SuperFile(UserID=user_id AND ParentTweetUniqueID=parent_TweetUniqueID OR TwitterUniqueID=twitter_UniqueID)\\n, RIGHT.fpos);\\n\\nnewRec := RECORD\\nAVG_TweetsSentiment := AVE( GROUP , fetched_records.TweetsSentiment );\\nEND;\\n\\nds_tbl := TABLE(fetched_records,newRec);\\n\\nRETURN ds_tbl;\\nEND;
\\n\\n\\n4.Using only INDEX(without KEYED)\\nIMPORT STD;\\n\\nExport Superkey_TweetsMonitor_AVG_TweetsSentiment():=Function\\nSTRING100 user_id := '' : STORED('UserID');\\nSTRING500 parent_TweetUniqueID:= '' : STORED('ParentTweetUniqueID');\\nINTEGER8 twitter_UniqueID:= 0 : STORED('TwitterUniqueID');\\n\\nindexfile_name := '~thor::tweetsmonitor::idx';\\n\\n// create index on superfile\\n\\nIDX_SuperFile := INDEX({STRING100 UserID,INTEGER8 TwitterUniqueID,STRING500 ParentTweetUniqueID},\\n{STRING5000 TweetsText,STRING1000 TweetedBy,STRING1000 TweetsDate,STRING2000 TweetsLocation,\\nSTRING200 TweetsCountry,DECIMAL3_2 TweetsSentiment,STRING5000 Retweets,STRING2000 TweetsID,\\nSTRING50 TweetsType,INTEGER8 TwitterUserID,INTEGER8 NoOfFollowers,INTEGER8 NoOfFriends,UNSIGNED8 fpos},indexfile_name);\\n\\nds := IDX_SuperFile(UserID=user_id AND ParentTweetUniqueID=parent_TweetUniqueID OR TwitterUniqueID=twitter_UniqueID);\\n\\nnewRec := RECORD\\nAVG_TweetsSentiment := AVE( GROUP , ds.TweetsSentiment );\\nEND;\\n\\nds_tbl := TABLE(ds,newRec);\\n\\nRETURN ds_tbl;\\nEND;\\n
\\n\\nWithout KEYED it works but i need to use KEYED for performance benefit.\\nNeed explanation and solution for using KEYED option in INDEX query.\\n\\nTHANKS AND REGARDS!!\", \"post_time\": \"2013-02-25 12:00:47\" },\n\t{ \"post_id\": 3544, \"topic_id\": 787, \"forum_id\": 8, \"post_subject\": \"Re: keyed index - CFileSerialStream::get read past end of st\", \"username\": \"bforeman\", \"post_text\": \"Could we please see some sample code? That error normally indicates that there is something wrong with your INDEX or DATASET definition.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-02-25 13:52:05\" },\n\t{ \"post_id\": 3543, \"topic_id\": 787, \"forum_id\": 8, \"post_subject\": \"keyed index - CFileSerialStream::get read past end of stream\", \"username\": \"gopi\", \"post_text\": \"Hi,\\n\\ni using keyed index for join in ROXIE, i am getting below error, \\n\\nError: System error: -1: Graph[1], keyedjoin[5]: SLAVE 10.144.240.49:16600: CFileSerialStream::get read past end of stream\\n\\nThanks in advance\\n\\nBy\\nGopi\", \"post_time\": \"2013-02-25 12:23:28\" },\n\t{ \"post_id\": 3812, \"topic_id\": 789, \"forum_id\": 8, \"post_subject\": \"Re: STD.File.MonitorFile and MonitorLogicalFileName\", \"username\": \"DSC\", \"post_text\": \"I can think of two ways of doing this. The first is to not use File.MonitorFile() but use a CRON-based action instead, along with File.RemoteDirectory(). That action would simply look for files in the remote directory and then act on them. The advantage to that method is a more flexible scanning interval, rather than the single interval specified in your configuration. The downside is that if one process takes too long and overruns the next execution, you could start running into trouble.\\n\\nThe second method is one that I've implemented. In short, the action that processes the File.MonitorFile() firing basically sets itself up for monitoring again. Here is the original test code I wrote:\\n\\nIMPORT * FROM Std;\\n\\n// Useful constants\\nkLandingZoneHost := 'mydali.somedomain.com';\\nkLandingZoneDir := '/var/lib/HPCCSystems/dropzone';\\nkFilenamePattern := kLandingZoneDir + '/foo*';\\nkFoundFileEventName := 'FoundFile';\\n\\n// Parsing patterns for picking apart the found file's hostpath\\n// (e.g.: '//10.210.150.80:7100/var/lib/HPCCSystems/dropzone/foo1')\\nPATTERN Digit := PATTERN('[[:digit:]]');\\nPATTERN IPv4Octet := REPEAT(Digit,1,3);\\nPATTERN IPv4Port := REPEAT(Digit,1,5);\\nPATTERN HostIPv4Address := IPv4Octet '.' IPv4Octet '.' IPv4Octet '.' IPv4Octet;\\nPATTERN HostSource := HostIPv4Address ':' IPv4Port;\\nPATTERN FilePath := '/' ANY+;\\nPATTERN FullFileSource := '//' HostSource FilePath;\\n\\n// Utility function\\nGetLastItemFromString(STRING text, STRING delimiter = ' ') := FUNCTION\\n SET OF STRING items := Str.SplitWords(text,delimiter);\\n \\n RETURN items[COUNT(items)];\\nEND;\\n\\n// Record definition for temporarily holding the found file's hostpath\\nFilePathRec := RECORD\\n STRING path;\\nEND;\\n\\n// Record definition that will contain the host, full path and filename of found file\\nFilePathComponentsRec := RECORD\\n STRING host := MATCHTEXT(HostIPv4Address);\\n STRING path := MATCHTEXT(FilePath);\\n STRING filename := GetLastItemFromString(MATCHTEXT(FilePath),'/');\\nEND;\\n\\n// The function that would actually do the work, handling the found file\\nHandleFoundFile(FilePathComponentsRec f) := FUNCTION\\n // Do something interesting, like spray the file into Thor\\n RETURN OUTPUT('Found file ' + f.path + ' on host ' + f.host);\\nEND;\\n\\n// Definition of the file monitoring call\\nMonitorFileAction := File.MonitorFile(kFoundFileEventName,kLandingZoneHost,kFilenamePattern);\\n\\n// Function called when a file is found\\nProcessFoundFileEvent(STRING fullFilePath) := FUNCTION\\n rs := DATASET([{fullFilePath}],FilePathRec);\\n parsedRS := PARSE(rs,path,FullFileSource,FilePathComponentsRec,FIRST);\\n \\n actions := SEQUENTIAL\\n (\\n // Handle the file\\n HandleFoundFile(parsedRS[1]),\\n \\n // Delete the file so it won't be found again\\n File.DeleteExternalFile(parsedRS[1].host,parsedRS[1].path),\\n \\n // Instantiate file monitoring again\\n MonitorFileAction\\n );\\n \\n RETURN actions;\\nEND;\\n\\n// Event handler for the file monitoring\\nProcessFoundFileEvent(EVENTEXTRA) : WHEN(EVENT(kFoundFileEventName,'*'));\\n\\n// Initial instantiation of file monitoring\\nMonitorFileAction;
\\nHope this helps.\\n\\nDan\", \"post_time\": \"2013-03-22 11:46:37\" },\n\t{ \"post_id\": 3811, \"topic_id\": 789, \"forum_id\": 8, \"post_subject\": \"Re: STD.File.MonitorFile and MonitorLogicalFileName\", \"username\": \"Pradeep\", \"post_text\": \"Hi Bob,\\n\\nI'm monitoring physical file(single log file) on dropzone, which gets updated as logs are generated. When the event occurs this file is getting sprayed.\\n\\nNow the issue is that once the file is spryed, the workunit completes and monitoring stops. After updation of the file if i've to spray it again then i've to submit\\nthat workunit again.\\n\\nIs there way like when file gets updated event should fire and file should be sprayed without manual intervene(No need of submitting workunit again)?\\n\\nI used shotcount parameter as -1 so that it continues monitoring until manually aborted. but after spraying workunit got completed!!\\n\\n\\n\\nhere is the code\\n\\nFileEventName := 'FoundFile';\\n\\t\\t lz := '172.20.104.226';\\n\\t\\t FileName := '/var/lib/HPCCSystems/mydropzone/FlumeLogs/1363778142876-1'; \\t\\t\\n\\t\\tSTD.File.MonitorFile (FileEventName, \\n\\t\\t\\t\\t lz, \\n\\t\\t\\t\\t FileName\\n\\t\\t\\t\\t );\\nSTD.File.SprayVariabl('*.*.*.*', '/var/lib/HPCCSystems/mydropzone/Logs/13637781428761',,\\n'\\\\\\\\,','\\\\\\\\n','\\\\"','mythor', \\n '~test::prad::TestLogFileSpary', -1, 'http://172.20.104.226:8010/FileSpray',,TRUE) : WHEN(EVENT(FileEventName,'*'), COUNT(1));\\n
\", \"post_time\": \"2013-03-22 11:07:22\" },\n\t{ \"post_id\": 3692, \"topic_id\": 789, \"forum_id\": 8, \"post_subject\": \"Re: STD.File.MonitorFile and MonitorLogicalFileName\", \"username\": \"bforeman\", \"post_text\": \"change:\\nOUTPUT('File FOUND') : WHEN(EVENT(FileEventName,'*'));
\\n\\nto:\\n\\nOUTPUT('File FOUND') : WHEN(EVENT(FileEventName,'*'),COUNT(1));
\", \"post_time\": \"2013-03-11 13:06:24\" },\n\t{ \"post_id\": 3691, \"topic_id\": 789, \"forum_id\": 8, \"post_subject\": \"Re: STD.File.MonitorFile and MonitorLogicalFileName\", \"username\": \"Pradeep\", \"post_text\": \"Bob,\\n\\nWe changed the ECL IDE to latest one. Now the event is getting fired at \\nexactly 15 mins eventhough the interval is 1min (i.e. 60 seconds, i've changed in config manger from 900 sec to 60 sec).\\n\\nI also tried the following example for MonitorFile\\n\\n \\n FileEventName := 'FoundFile';\\n\\t\\t lz := '172.20.104.223';\\n\\t\\t FileName := '/var/lib/HPCCSystems/mydropzone/foo.txt';\\n\\t\\n\\t\\t\\tSTD.File.MonitorFile (FileEventName, \\n\\t\\t\\t\\t \\t lz, \\n\\t\\t\\t\\t\\t FileName\\n\\t\\t\\t\\t\\t );\\n\\n OUTPUT('File FOUND') : WHEN(EVENT(FileEventName,'*'));
\\n\\nNow this is not firing, the value of "lz" and "Filename" are correct.\\n\\nI'm running the above example in the latest ECL IDE.\", \"post_time\": \"2013-03-11 11:28:36\" },\n\t{ \"post_id\": 3659, \"topic_id\": 789, \"forum_id\": 8, \"post_subject\": \"Re: STD.File.MonitorFile and MonitorLogicalFileName\", \"username\": \"bforeman\", \"post_text\": \"My test is also successful on the 3.8.6 and 3.6.2 HPCC VM versions. If you have the same error with the 3.10.2 VM, I'm straing to think that your problem might be machine related. Let me know after you test the 3.10 VM.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-03-07 13:05:52\" },\n\t{ \"post_id\": 3658, \"topic_id\": 789, \"forum_id\": 8, \"post_subject\": \"Re: STD.File.MonitorFile and MonitorLogicalFileName\", \"username\": \"Pradeep\", \"post_text\": \"Bob,\\n\\nI've attached the environmental.xml(environmental.txt) file.\\n\\nAnd tried the same code on vm in my local machine. But i got segmentation fault error !!.\\nMy VM Version 3.8.2. \\n\\nI'll download latest one and try again.\\n\\n\\nPradeep\", \"post_time\": \"2013-03-07 11:12:29\" },\n\t{ \"post_id\": 3647, \"topic_id\": 789, \"forum_id\": 8, \"post_subject\": \"Re: STD.File.MonitorFile and MonitorLogicalFileName\", \"username\": \"bforeman\", \"post_text\": \"Sure, please send me your environment.xml\\nFrankly I am out of ideas, except for one more.\\n\\nCan you install the latest VM and test your code there? I just did on my local machine and again the event fired perfectly. If you can verify that the example runs on your VM, then we know that it is a configuration issue.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-03-06 14:13:12\" },\n\t{ \"post_id\": 3645, \"topic_id\": 789, \"forum_id\": 8, \"post_subject\": \"Re: STD.File.MonitorFile and MonitorLogicalFileName\", \"username\": \"Pradeep\", \"post_text\": \"Bob,\\n\\nI've checked the configuration manager, but dont know which attribute to check for enabling scheduler. Even read the Installing_and_RunningTheHPCCPlatform-3.10.2-1 pdf but didnt get any idea on how to enable ECL scheduler.\\n\\nI'm attaching screen shot of environment.xml and highlighted attribute which i changed from false to true. Is that the correct attribute look for?\\n\\nIf it required i can attach environment.xml file ?\\n\\n\\nPradeep\", \"post_time\": \"2013-03-06 13:40:13\" },\n\t{ \"post_id\": 3627, \"topic_id\": 789, \"forum_id\": 8, \"post_subject\": \"Re: STD.File.MonitorFile and MonitorLogicalFileName\", \"username\": \"bforeman\", \"post_text\": \"Pradeep, your logs seem to match mine, although your log in the ECLAgent shows the workunit state before the event is fired.\\n\\nThat said, if the code is identical, targets are identical, but results are different, then perhaps it is a configuration issue. Can you ask your administrator to check to make sure that the Scheduler is enabled, and that the pre-flight metrics show that the DFU Server is active and monitoring the file activity?\\n\\nThanks,\\n\\nBob\", \"post_time\": \"2013-03-05 13:56:28\" },\n\t{ \"post_id\": 3622, \"topic_id\": 789, \"forum_id\": 8, \"post_subject\": \"Re: STD.File.MonitorFile and MonitorLogicalFileName\", \"username\": \"Pradeep\", \"post_text\": \"Bob,\\n\\nHere are the log files from Helpers section.\\n\\n\\n\\nPradeep\", \"post_time\": \"2013-03-05 10:52:54\" },\n\t{ \"post_id\": 3615, \"topic_id\": 789, \"forum_id\": 8, \"post_subject\": \"Re: STD.File.MonitorFile and MonitorLogicalFileName\", \"username\": \"bforeman\", \"post_text\": \"I think we can start by comparing my successful log files with yours, can you download and then attach the log files found in the Helpers area of your workunit?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-03-04 12:55:34\" },\n\t{ \"post_id\": 3614, \"topic_id\": 789, \"forum_id\": 8, \"post_subject\": \"Re: STD.File.MonitorFile and MonitorLogicalFileName\", \"username\": \"Pradeep\", \"post_text\": \"Bob,\\n\\nI tried the above code also and got the same result, event didnt fire!!.\\nSince the same code is working on your machine and not on mine \\ndoes that mean HPCC platform is not installed properly.\\nCluster : Single Standalone Hpcc cluster 3.10.2-1 Ubuntu 12.04 LTS\\n\\nOr\\nIs there any "service or component" related to events that should be started before trying these examples.\\n\\nDocumentation says that ECL scheduler is installed with HPCC platform. \\nIt Starts and stops using hpcc-init just as other components.
\\nI'm, still unable to find any solution?\", \"post_time\": \"2013-03-04 11:28:31\" },\n\t{ \"post_id\": 3610, \"topic_id\": 789, \"forum_id\": 8, \"post_subject\": \"Re: STD.File.MonitorFile and MonitorLogicalFileName\", \"username\": \"bforeman\", \"post_text\": \"Without using NOTIFY, this code works correctly for me:\\n\\nIMPORT STD;\\n FileEventName := 'FileFoundEvent';\\n FileName := '~atest::whenevent::fire';\\n \\n IF (STD.File.FileExists(FileName),STD.File.DeleteLogicalFile(FileName));\\n STD.File.MonitorLogicalFileName(FileEventName,FileName);\\n \\n OUTPUT('FileFound') : WHEN(EVENT(FileEventName,'*'),COUNT(1));\\n \\n rec := RECORD\\n STRING10 key;\\n STRING10 val;\\n END;\\n \\n afile := DATASET([{ 'A', '0'}], rec);\\n OUTPUT(afile,,FileName,OVERWRITE);\\n
\\n\\nOnly slightly modified from your original.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-03-01 20:23:55\" },\n\t{ \"post_id\": 3609, \"topic_id\": 789, \"forum_id\": 8, \"post_subject\": \"Re: STD.File.MonitorFile and MonitorLogicalFileName\", \"username\": \"Pradeep\", \"post_text\": \"Bob,\\n\\nI'm using 3.10.2-1 version. \\nCluster : Single Standalone Hpcc cluster 3.10.2-1 Ubuntu 12.04 LTS\\n\\nEven chanaged the interval to 1 min using configuration manager. Still workunit is waiting for an event to fire. As mentioned earlier event is getting fired through NOTIFY FileEventName := 'FileFoundEvent';\\nNOTIFY('FileEventName ', '*');
and by manually pushing event through ECL scheduler interface. \\n\\n\\n\\nPradeep\", \"post_time\": \"2013-03-01 17:04:49\" },\n\t{ \"post_id\": 3595, \"topic_id\": 789, \"forum_id\": 8, \"post_subject\": \"Re: STD.File.MonitorFile and MonitorLogicalFileName\", \"username\": \"bforeman\", \"post_text\": \"Regarding 3.) \\nThe subtype is used to filter out which object of many may be posting the event, so you could look foe an event from a particular WU that fired it. I've only seen it used in one place, when detecting a CRON event at a specific time:\\n\\n
EVENT('CRON',(STRING)minute + ' ' + (STRING)hour + ' * * *');
\\n\\nWhat OSS version are you currently running this on? As I said in my last message, my event eventually fired after the 15 minute default interval, and I am using the 3.10.2 cluster (the latest)\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-02-28 18:28:42\" },\n\t{ \"post_id\": 3594, \"topic_id\": 789, \"forum_id\": 8, \"post_subject\": \"Re: STD.File.MonitorFile and MonitorLogicalFileName\", \"username\": \"Pradeep\", \"post_text\": \"Bob,\\n\\nI waited for more than 2 hours but workunit was still in wait state and even created the logical file('test::pradeep::prad') in seperate builder still event didnt fire. But when i pushed the same event(using "PushEvent" button in ECL scheduler interface) it got fired and workunit completed.\\n\\n1. I suppose this is not the way to fire event. As you said it should fire when it detects appropriate logical file name.\\n\\n2. When i used NOTIFY like this in a seperate builder NOTIFY('test::pradeep::prad', '*');
the event got fired. \\n If an appropriately named file arrives in this interval it will fire the event with the name of the triggering object as the event subtype
\\n\\n3. What is the significance of event subtypes in above statement? i mean what are event subtypes? \\n\\nPradeep\", \"post_time\": \"2013-02-28 17:45:09\" },\n\t{ \"post_id\": 3569, \"topic_id\": 789, \"forum_id\": 8, \"post_subject\": \"Re: STD.File.MonitorFile and MonitorLogicalFileName\", \"username\": \"bforeman\", \"post_text\": \"OK, after further study, the code works as documented (kind of):\\n\\nThe MonitorLogicalFileName function creates a file monitor job in the DFU Server. Once the job is received it goes into a 'monitoring' mode (which can be seen in the eclwatch DFU Workunit display), which polls at a fixed interval (default 15 mins). If an appropriately named file arrives in this interval it will fire the event with the name of the triggering object as the event subtype (see the EVENT function).\\n\\nRun your workunit and wait about 20 minutes, the event will eventually fire (at least it did on my machine).\\n\\nYou can change that interval if you wish in the configuration manager settings.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-02-27 15:35:19\" },\n\t{ \"post_id\": 3568, \"topic_id\": 789, \"forum_id\": 8, \"post_subject\": \"Re: STD.File.MonitorFile and MonitorLogicalFileName\", \"username\": \"bforeman\", \"post_text\": \"I am going to open an issue on this, as the event should fire after the file is detected. I even tried to create the file in a seperate workunit, and I still could not get the event to fire.\\n\\nThanks for your report.\\n\\nBob\", \"post_time\": \"2013-02-27 14:41:43\" },\n\t{ \"post_id\": 3567, \"topic_id\": 789, \"forum_id\": 8, \"post_subject\": \"Re: STD.File.MonitorFile and MonitorLogicalFileName\", \"username\": \"bforeman\", \"post_text\": \"I need to clarify what you are posting.\\n\\nThe workunit is not "blocked", it is "waiting" for the event to fire. Perhaps the blocked icon on the results tab needs to be changed. You can verify this easily by running another workunit while this one is waiting.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-02-27 14:35:12\" },\n\t{ \"post_id\": 3563, \"topic_id\": 789, \"forum_id\": 8, \"post_subject\": \"Re: STD.File.MonitorFile and MonitorLogicalFileName\", \"username\": \"Pradeep\", \"post_text\": \"Bob, \\nI tried running by commenting IF statement even then it is getting blocked. \\n\\n
\\nIMPORT STD;\\n\\nFileEventName := 'FileFoundEvent';\\nFileName := 'test::pradeep::prad';\\n\\t\\n\\t// IF (STD.File.FileExists(FileName),STD.File.DeleteLogicalFile(FileName));\\n\\t\\n\\tSTD.File.MonitorLogicalFileName(FileEventName,FileName, 1);\\n \\n\\tOUTPUT('FileFound') : WHEN(EVENT(FileEventName,'*'),COUNT(1));\\n\\t\\n\\trec := RECORD\\n\\t\\tSTRING10 key;\\n\\t\\tSTRING10 val;\\n END;\\n afile := DATASET([{ 'A', '0'}], rec);\\n OUTPUT(afile,,'test::pradeep::prad', OVERWRITE);\\n
\\n1) If a matching file already exists when the DFU Monitoring job is started, that file will not generate an event. It will only generate an event once the file has been deleted and recreated.
\\nTo satisfy this condition I deleted and created the logical file again. But I get same behavior (Blockage)\\nIf a file is created and then deleted (or deleted then re-created) between polling intervals, it will not be seen by the monitor and will not trigger an event.\\n
How to satisfy this condition?\\n\\nI've taken examples from documentaion "TheECLScheduler-3.10.4-2rc(page no 21 & 22)". \\nAre there any other examples which shows the exact behavior of MonitorLogicalFile and MonitorFile?\\n\\nPradeep\", \"post_time\": \"2013-02-27 09:33:55\" },\n\t{ \"post_id\": 3554, \"topic_id\": 789, \"forum_id\": 8, \"post_subject\": \"Re: STD.File.MonitorFile and MonitorLogicalFileName\", \"username\": \"bforeman\", \"post_text\": \"Why is workunit blocked?
\\nIt is waiting for an EVENT to fire that never does, because the file was deleted just above it.\\n\\nHow is the event 'FileFoundEvent' fired ?
\\nWhen the MonitorLogicalFileName function detects the Filename\\n\\nWhen this event is fired is it creating a logical file 'test::pradeep::prad'.Is my understanding correct here?
\\n\\nYes, it detects that the file exists on the target cluster.\\n\\nIf you comment out the IF function just above it then the event will fire and the workunit will complete. I'm not sure if this is expected behavior, still testing, but note the following in the documentation regarding the MonitorLogicalFileName function:\\n\\n1) If a matching file already exists when the DFU Monitoring job is started, that file will not generate an event. It will only generate an event once the file has been deleted and recreated.\\n\\n2) If a file is created and then deleted (or deleted then re-created) between polling intervals, it will not be seen by the monitor and will not trigger an event.\\n\\n3) Events are only generated on the polling interval.
\\n\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-02-26 13:57:02\" },\n\t{ \"post_id\": 3549, \"topic_id\": 789, \"forum_id\": 8, \"post_subject\": \"STD.File.MonitorFile and MonitorLogicalFileName\", \"username\": \"Pradeep\", \"post_text\": \"Hi, when i run the following example, the workunit is getting blocked even though there are no other workunits running at that time. \\n\\n\\nFileEventName := 'FileFoundEvent';\\n\\tFileName := 'test::pradeep::prad';\\n\\t\\n\\tIF (STD.File.FileExists(FileName),STD.File.DeleteLogicalFile(FileName));\\n\\tSTD.File.MonitorLogicalFileName(FileEventName,FileName);\\n \\n\\tOUTPUT('FileFound') : WHEN(EVENT(FileEventName,'*'),COUNT(1));\\n\\t\\n rec := RECORD\\n\\t\\tSTRING10 key;\\n\\t\\tSTRING10 val;\\n END;\\n afile := DATASET([{ 'A', '0'}], rec);\\n OUTPUT(afile,,FileName);\\n
\\n\\nWhy is workunit blocked?\\nHow is the event 'FileFoundEvent' fired ?\\nWhen this event is fired is it creating a logical file 'test::pradeep::prad'.Is my understanding correct here?\", \"post_time\": \"2013-02-26 06:08:30\" },\n\t{ \"post_id\": 25413, \"topic_id\": 793, \"forum_id\": 8, \"post_subject\": \"Re: A question about the GROUP function\", \"username\": \"rtaylor\", \"post_text\": \"newportm,\\n\\nMy understanding is that UNGROUP would remove the grouping but leave the data where it is at the point of the UNGROUP.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-03-21 13:45:03\" },\n\t{ \"post_id\": 25403, \"topic_id\": 793, \"forum_id\": 8, \"post_subject\": \"Re: A question about the GROUP function\", \"username\": \"newportm\", \"post_text\": \"I am wondering if the ungroup will lose distribution of the dataset if the group was created on a distributed dataset locally. Based on what I can see, it does not, however I am looking for verification. \\n\\nThanks.\", \"post_time\": \"2019-03-21 07:34:45\" },\n\t{ \"post_id\": 3577, \"topic_id\": 793, \"forum_id\": 8, \"post_subject\": \"Re: A question about the GROUP function\", \"username\": \"Leofei\", \"post_text\": \"Thx, Richard. It did help me to understand more detials in the code.\", \"post_time\": \"2013-02-27 21:02:30\" },\n\t{ \"post_id\": 3576, \"topic_id\": 793, \"forum_id\": 8, \"post_subject\": \"Re: A question about the GROUP function\", \"username\": \"rtaylor\", \"post_text\": \"Leo,\\n\\nYes, GROUP and DISTRIBUTE sound similar, but how similar are they? I don't know precisely (you can look at the source code if you really want that answer ). \\n\\nThe big difference between the two is that DISTRIBUTE does not create the subgroups that GROUP does, so subsequent operations will not operate the same if you just DISTRIBUTE instead of using GROUP.\\n\\nUNGROUP is not always necessary, but it may solve some problems if they occur. In most cases a GROUPed dataset can be used just like a non-GROUPed dataset for operations that don't work on subgroups separately.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-02-27 20:05:13\" },\n\t{ \"post_id\": 3572, \"topic_id\": 793, \"forum_id\": 8, \"post_subject\": \"Re: A question about the GROUP function\", \"username\": \"Leofei\", \"post_text\": \"Hi, Richard, Thank you for your efficient reply!\\n\\nIt sounds like GROUP is doing the same thing as DISTRIBUTE does. May I think they are dealing with the data in the same way?\\n\\nAlso, is there any problem if I don’t UNGROUP the dataset after the GROUP operation?\\n\\nThanks and looking forward to your answer.\\n\\n-Leo\", \"post_time\": \"2013-02-27 18:34:14\" },\n\t{ \"post_id\": 3571, \"topic_id\": 793, \"forum_id\": 8, \"post_subject\": \"Re: A question about the GROUP function\", \"username\": \"rtaylor\", \"post_text\": \"Leofei,\\n\\nThe GROUP function is meant to make processing huge datasets faster by allowing operations to work on smaller chunks of data.\\n\\nFor example, let's say you have a 10 BILLION record dataset that you need to SORT by lastname, firstname, middlename, and gender. You could just do it this way:
Rec := RECORD\\n STRING30 lastname;\\n STRING20 firstname;\\n STRING20 middlename;\\n STRING1 gender;\\n\\t//and a bunch of other fields\\nEND;\\nds := DATASET('MyTenBillionRecordFile',Rec,FLAT);\\n\\nSortedRecs := SORT(ds,lastname,firstname,middlename,gender);
This code would work, but it be a single 10 billion record global sort, which could take quite a bit of time (depending on the size of your cluster).\\n\\nSo an alternative would be to do it this way:Rec := RECORD\\n STRING30 lastname;\\n STRING20 firstname;\\n STRING20 middlename;\\n STRING1 gender;\\n\\t//and a bunch of other fields\\nEND;\\nds := DATASET('MyTenBillionRecordFile',Rec,FLAT);\\n\\nSortedRecs := SORT(ds,lastname);\\nGrpRecs := GROUP(SortedRecs,lastname);\\nFinalRecs := SORT(GrpRecs,firstname,middlename,gender);
The difference here is that the initial global sort by lastname will go reasonably fast, then the GROUP by lastname creates a separate subgroup (each on a single node) of records for each unique lastname, so that the last SORT by firstname, middlename, and gender will happen separately and independently on each subgroup.\\n\\nSo, if you had exactly 10,000 last names and a completely even distribution of data, that last SORT would actually do 10,000 1-million-record sorts instead of a single 10-BILLION-record sort. And since each subgroup is contained on a single node, if you were running a 400-node cluster you would be doing at least 400 of those 10,000 1-million-record sorts simultaneously at all times until the entire sorting job is done.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-02-27 17:37:48\" },\n\t{ \"post_id\": 3570, \"topic_id\": 793, \"forum_id\": 8, \"post_subject\": \"A question about the GROUP function\", \"username\": \"Leofei\", \"post_text\": \"Hi, I'm a new guy studying ECL. I have a question here about the GROUP function:\\n\\nI have some question about using GROUP function. \\n\\n1. In the reference, it said that "The GROUP function fragments a recordset into a set of sets." What is the meaning of "set of sets"? What is the differences between a grouped dataset and a ungrouped dataset (like a set of records)?\\n\\n2. In the reference, "This allows aggregations and other operations (such as ITERATE, DEDUP, ROLLUP, SORT and others)", I always use these functions (like DEDUP, ROLLUP) directly. Don't understand when I need the GROUP function.\\n\\nThanks a lot if anyone can answer my question!\", \"post_time\": \"2013-02-27 16:51:19\" },\n\t{ \"post_id\": 5703, \"topic_id\": 794, \"forum_id\": 8, \"post_subject\": \"Re: Fileservices.RenameLogicalFile with OVERWRITE?\", \"username\": \"bforeman\", \"post_text\": \"A JIRA was opened some time back: \\nhttps://track.hpccsystems.com/browse/HPCC-10168\\nIt was recently closed as cannot reproduce.\\nWhat build are you on? \\nYou should try with 4.2.4, it may be fixed.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-05-16 13:04:46\" },\n\t{ \"post_id\": 5700, \"topic_id\": 794, \"forum_id\": 8, \"post_subject\": \"Re: Fileservices.RenameLogicalFile with OVERWRITE?\", \"username\": \"jwilt\", \"post_text\": \"One of the issues with this situation is this:\\nWe're not aware of any other operations that help to solve the problem (without manual intervention).\\nIf a file-name remains, DeleteLogicalFile(...) also fails.\\nNothing else works.\\n\\nWhile ideally, one would try to find the "root cause" - given a live situation with large clusters, having a programmatic work-around would beat finding a failed overnight job. (Tracking down what caused the problem can be very difficult.)\\n\\nMaybe adding some other feature? Like a "kill all file-parts" option in DeleteLogicalFile(...)? Or a whole new function?\\nWe've been wondering if overwriting with a 0-sized file and re-attempting the DeleteLogicalFile(...) might improve our chances.\\nPossibly with an intervening sleep(...) function?\\n\\nAny advice would be appreciated.\\nThanks.\", \"post_time\": \"2014-05-15 20:43:41\" },\n\t{ \"post_id\": 5376, \"topic_id\": 794, \"forum_id\": 8, \"post_subject\": \"Re: Fileservices.RenameLogicalFile with OVERWRITE?\", \"username\": \"jwilt\", \"post_text\": \"This issue occurs with logic like the following (all in a SEQUENTIAL):\\n\\noutput file_new\\ndelete file_old\\nrename file file_old\\nrename file_new file\\n\\nVery often, we see failures of the 1st rename because the previous delete hasn't "completely finished" yet.\\nMaybe the above sequence is just a bad idea. Open to suggestions.\", \"post_time\": \"2014-03-11 23:01:00\" },\n\t{ \"post_id\": 3616, \"topic_id\": 794, \"forum_id\": 8, \"post_subject\": \"Re: Fileservices.RenameLogicalFile with OVERWRITE?\", \"username\": \"bforeman\", \"post_text\": \"Comments from development:\\n\\nWe shouldn't be leaving lingering [clashing] file parts around, they should all be temporary whilst in transit and named to final in a short space of time. If it's common that there are files being left behind (e.g. detected by XREF), then we should investigate those problems separately.\\n\\nDo you know under what circumstances there were lingering file parts ?\\n\\nI'm not convinced Rename should have an overwrite flag, as opposed to an explicit delete, followed by separate rename operation. Most file systems don't allow rename if dest exist I think.\\n
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-03-04 13:46:15\" },\n\t{ \"post_id\": 3574, \"topic_id\": 794, \"forum_id\": 8, \"post_subject\": \"Re: Fileservices.RenameLogicalFile with OVERWRITE?\", \"username\": \"bforeman\", \"post_text\": \"Suggestion passed to the development team, thanks!\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-02-27 20:00:08\" },\n\t{ \"post_id\": 3573, \"topic_id\": 794, \"forum_id\": 8, \"post_subject\": \"Fileservices.RenameLogicalFile with OVERWRITE?\", \"username\": \"jwilt\", \"post_text\": \"Hey, just wondering if this enhancement has ever been considered.\\nNot a huge deal, but it would be handy.\\nIt would even be great if, say, there were leftover file-parts on the cluster, and the OVERWRITE option even allowed overwriting those. \\nUnder 7.02, eclagent fails in this case - i.e, a lingering file-part exists and stops a later rename to that file name - and requiring manual intervention.\\nThanks.\", \"post_time\": \"2013-02-27 19:52:23\" },\n\t{ \"post_id\": 3588, \"topic_id\": 795, \"forum_id\": 8, \"post_subject\": \"Re: Error: System error: 1: Error - end of stream\", \"username\": \"rtaylor\", \"post_text\": \"Mohan,\\n\\nGlad to be of help.\\n\\nRichard\", \"post_time\": \"2013-02-28 15:41:32\" },\n\t{ \"post_id\": 3587, \"topic_id\": 795, \"forum_id\": 8, \"post_subject\": \"Re: Error: System error: 1: Error - end of stream\", \"username\": \"mohan\", \"post_text\": \"Hi Richard\\n\\nThe "Row Tag" comment from you really got me thinking. Soo.....\\n\\nI deleted the logical file and sprayed it again. I can now see the results. \\nI tried the Row Tag with "DOCUMENT" and "DOCUMENTS" and it seems to work.\\nI deleted the logical file again and tried it with a blank Row Tag. \\nAnd I managed to get the same error.\\n\\nSo, I do know now that I messed up on the "Row Tag".\\n\\nThanks a lot Richard.\\nMohan\", \"post_time\": \"2013-02-28 14:37:21\" },\n\t{ \"post_id\": 3585, \"topic_id\": 795, \"forum_id\": 8, \"post_subject\": \"Re: Error: System error: 1: Error - end of stream\", \"username\": \"mohan\", \"post_text\": \"Hi Richard\\n\\nTo answer you questions:\\nWhen you sprayed, what was the Row Tag you specified?\\nDOCUMENT (see attached picture)\\nCan we see a small example of the data?\\n <DOCUMENTS>\\n <DOCUMENT>\\n <ProjectID>9999</ProjectID>\\n <PROID>999999</PROID>\\n <ProjectName />\\n <CountyID>99</CountyID>\\n <State>AK</State>\\n <StateID>99</StateID>\\n <Value>9999.0000</Value>\\n <Stage>ABC</Stage>\\n <StageTypeID>99</StageTypeID>\\n <StageTypeParentDescr>ABCD</StageTypeParentDescr>\\n <StageTypeParentID>9999</StageTypeParentID>\\n <CreateDate>2011-03-02</CreateDate>\\n <UpdateDate>2011-03-17</UpdateDate>\\n<Notes>test</Notes>\\n </DOCUMENT>\\n <DOCUMENT>\\n <ProjectID>9999</ProjectID>\\n <PROID>999999</PROID>\\n <ProjectName />\\n <CountyID>99</CountyID>\\n <State>AK</State>\\n <StateID>99</StateID>\\n <Value>9999.0000</Value>\\n <Stage>ABC</Stage>\\n <StageTypeID>99</StageTypeID>\\n <StageTypeParentDescr>ABCD</StageTypeParentDescr>\\n <StageTypeParentID>9999</StageTypeParentID>\\n <CreateDate>2011-03-02</CreateDate>\\n <UpdateDate>2011-03-17</UpdateDate>\\n<Notes>test 2</Notes>\\n </DOCUMENT>\\n</DOCUMENTS>\\nWhat is the maximum length of the longest DOCUMENT tag in the file (in bytes)?\\n20000\\n\\nI did try the xml file with just 2 documents and that worked. I am just trying a larger dataset that is all.\", \"post_time\": \"2013-02-28 14:22:36\" },\n\t{ \"post_id\": 3583, \"topic_id\": 795, \"forum_id\": 8, \"post_subject\": \"Re: Error: System error: 1: Error - end of stream\", \"username\": \"rtaylor\", \"post_text\": \"Mohan,\\n\\nSome questions:
\\nBased on the error message, I suspect that at line 496 it found an open record tag and did not find the matching close record tag within the maximum number of bytes specified as your maximum record size. Since your RECORD structure has no MAXLENGTH in it, that maximum would be 4096 bytes. Or possibly there is a missing end of record tag?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-02-28 13:39:33\" },\n\t{ \"post_id\": 3578, \"topic_id\": 795, \"forum_id\": 8, \"post_subject\": \"Error: System error: 1: Error - end of stream\", \"username\": \"mohan\", \"post_text\": \"Hi\\n\\nI am trying to Export the xml data from the file that I did a "spray XML" to.\\nI am getting this error:\\nError: System error: 1: Error - end of stream "End of stream encountered whilst parsing" [line 496, file offset 52175]\\ntDescr>POST BID</StageTypeParentDescr>\\n*ERROR* (//10.0.1.3:7100/var/lib/HPCCSystems/hpcc-data/thor/testproject/yn/projectsample1._1_of_5) (in Xml Read G1 E2) (0, 0), 1, \\n\\nI am thinking it is because of some setting. The file offset tells me that it is keeping a track of the whole document while scanning the sprayed xml document rather than each DOCUMENT element inside the xml file.\\n\\nAny insight would be helpful.\\n\\nThanks,\\n\\nThis is the code:\\n_01Layout_Project := RECORD\\nINTEGER ProjectID {XPATH('ProjectID')};\\nSTRING50 StageTypeParentDescr {XPATH('StageTypeParentDescr')} ;\\nEND;\\n\\nEXPORT _02File_OriginalProject := DATASET('~testproject::YN::projectsample1',_01Layout_Project, XML('DOCUMENTS/DOCUMENT'));\\n\\n
\", \"post_time\": \"2013-02-27 21:53:00\" },\n\t{ \"post_id\": 3603, \"topic_id\": 797, \"forum_id\": 8, \"post_subject\": \"Re: Roxie Package Map Issue\", \"username\": \"bforeman\", \"post_text\": \"No, I wouldn't think that reinstalling HPCC would be a solution for any issue \\nIf you could post the log for the similar issue that would be helpful to help us analyze.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-03-01 12:39:15\" },\n\t{ \"post_id\": 3601, \"topic_id\": 797, \"forum_id\": 8, \"post_subject\": \"Re: Roxie Package Map Issue\", \"username\": \"Bhagwant\", \"post_text\": \"Hi Bob,\\nWe had to reinstall HPCC to get rid of that error [color=#FF0000:3o6caf73]"Unknown query roxie_query_using_superkey". \\nOn another instance we are facing similar issue and option of reinstalling HPCC is not available. \\nIs there any other way out?\", \"post_time\": \"2013-03-01 10:02:47\" },\n\t{ \"post_id\": 3584, \"topic_id\": 797, \"forum_id\": 8, \"post_subject\": \"Re: Roxie Package Map Issue\", \"username\": \"bforeman\", \"post_text\": \"Thanks for your post, I have passed this to our development team for review.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-02-28 14:16:52\" },\n\t{ \"post_id\": 3582, \"topic_id\": 797, \"forum_id\": 8, \"post_subject\": \"Roxie Package Map Issue\", \"username\": \"Bhagwant\", \"post_text\": \"Hi,\\nI am trying to update a SuperKey used in a Published Roxie Query Using PackageMap Command.But When we try to add packageMap,Superkey Data is not Updated instead the Roxie Query Published Earlier which were working properly started displaying \\nError:[color=#FF0000:3bxxiz4w]"Unknown query roxie_query_using_superkey".\\n\\nIs this a known issue in 3.10.2-1 platform?\\nAttaching the log file generated by Roxie\\n\\nCluster Specification: Single Standalone Hpcc cluster 3.10.2-1 Ubuntu 12.04 LTS\\n\\nLog snippet\\n
\\n00000E9A 2013-02-28 14:58:20 15106 15109 "DSK: [sda] r/s=0.1 kr/s=0.6 w/s=1.6 kw/s=11.6 bsy=1 NIC: rxp/s=10744.9 rxk/s=0.0 txp/s=81413.4 txk/s=0.0 CPU: usr=0 sys=0 iow=0 idle=98"\\n00000E9B 2013-02-28 14:58:35 15106 15302 "Loading package set default_#, process spec *"\\n00000E9C 2013-02-28 14:58:35 15106 15302 "Loading empty package for QuerySet roxie"\\n00000E9D 2013-02-28 14:58:35 15106 15302 "Loaded packages"\\n00000E9E 2013-02-28 14:58:35 15106 15302 "Loading package set default_#, process spec *"\\n00000E9F 2013-02-28 14:58:35 15106 15302 "Loading package map roxiepackagemap.pkg, active false"\\n00000EA0 2013-02-28 14:58:35 15106 15302 "Loaded packages"\\n00000EA1 2013-02-28 14:59:17 15106 15297 "PING: 0 replies received, average delay 0"\\n00000EA2 2013-02-28 14:59:20 15106 15109 "SYS: PU= 13% MU= 29% MAL=1075154384 MMP=1074794496 SBK=359888 TOT=1050076K RAM=2335700K SWP=27724K"\\n00000EA3 2013-02-28 14:59:20 15106 15109 "DSK: [sda] r/s=1.4 kr/s=22.8 w/s=8.1 kw/s=2162.7 bsy=5 NIC: rxp/s=15830.6 rxk/s=0.0 txp/s=105350.5 txk/s=0.0 CPU: usr=9 sys=2 iow=1 idle=86"\\n00000EA4 2013-02-28 14:59:20 15106 15109 "KERN_INFO: [270481.764262] esp[23104]: segfault at 0 ip 00007ff00a885fd2 sp 00007feffe081840 error 4 in libws_packageprocess.so[7ff00a631000+2ca000]"\\n00000EA5 2013-02-28 14:59:56 15106 18905 "[(null)] ERROR: Unknown query roxie_query_using_superkey"\\n00000EA6 2013-02-28 14:59:56 15106 18905 "[172.20.104.223:9876{13}] FAILED: <roxie_query_using_superkey xmlns='urn:hpccsystems:ecl:roxie_query_using_superkey'/>"\\n00000EA7 2013-02-28 14:59:56 15106 18905 "[172.20.104.223:9876{13}] EXCEPTION: Unknown query roxie_query_using_superkey"\\n00000EA8 2013-02-28 15:00:00 15106 18905 "[(null)] ERROR: Unknown query roxie_query_using_superkey"\\n00000EA9 2013-02-28 15:00:00 15106 18905 "[172.20.104.223:9876{14}] FAILED: <roxie_query_using_superkey xmlns='urn:hpccsystems:ecl:roxie_query_using_superkey'><id>5</id></roxie_query_using_superkey>"\\n00000EAA 2013-02-28 15:00:00 15106 18905 "[172.20.104.223:9876{14}] EXCEPTION: Unknown query roxie_query_using_superkey"\\n00000EAB 2013-02-28 15:00:04 15106 18905 "[(null)] ERROR: Unknown query roxie_query_using_superkey"\\n00000EAC 2013-02-28 15:00:04 15106 18905 "[172.20.104.223:9876{15}] FAILED: <roxie_query_using_superkey xmlns='urn:hpccsystems:ecl:roxie_query_using_superkey'><id>5</id></roxie_query_using_superkey>"\\n00000EAD 2013-02-28 15:00:04 15106 18905 "[172.20.104.223:9876{15}] EXCEPTION: Unknown query roxie_query_using_superkey"\\n
\", \"post_time\": \"2013-02-28 12:58:10\" },\n\t{ \"post_id\": 3592, \"topic_id\": 798, \"forum_id\": 8, \"post_subject\": \"Re: Package not getting deleted\", \"username\": \"DSC\", \"post_text\": \"This problem has been reported and a fix in place for a future release. See https://track.hpccsystems.com/browse/HPCC-8777. Included in the comments is information on how to delete the package through daliadmin.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-02-28 16:10:25\" },\n\t{ \"post_id\": 3586, \"topic_id\": 798, \"forum_id\": 8, \"post_subject\": \"Package not getting deleted\", \"username\": \"prachi\", \"post_text\": \"Hi,\\nI have created one package (named buzzmonitoring_11111.pkg) using packagemap.But when i am trying to delete it, it is not getting deleted and giving error.\\n\\nCommand:\\nroot@cldx-1088-982:/# ecl packagemap info roxie\\n<PackageMaps id="buzzmonitoring_11111.pkg">\\n <Package id="pkg_buzzmonitoring">\\n <SuperFile id="sapphire::superkey::buzzmonitoring">\\n <SubFile value="sapphire::index::buzzmonitoring_11111"/>\\n </SuperFile>\\n </Package>\\n <Package id="buzz_uniqueid_count.1">\\n <Base id="pkg_buzzmonitoring"/>\\n </Package>\\n <Package id="buzz_uniqueid_groupby_followers.1">\\n <Base id="pkg_buzzmonitoring"/>\\n </Package>\\n</PackageMaps>\\n
\\n\\nError:\\nroot@cldx-1088-982:/# ecl packagemap delete roxie buzzmonitoring_11111.pkg\\n\\n ... deleting package map buzzmonitoring_11111.pkg now\\n\\n\\nException(s):\\n11004: No package sets defined for roxie\\n\\nroot@cldx-1088-982:/#\\n
\\n\\nNeed solution for this.\\nThanks and Regards !\", \"post_time\": \"2013-02-28 14:33:25\" },\n\t{ \"post_id\": 3591, \"topic_id\": 799, \"forum_id\": 8, \"post_subject\": \"Re: Watchdog has lost contact with Thor slave\", \"username\": \"janssend\", \"post_text\": \"Slave 3 (192.168.1.200)\\n00000051 2013-02-28 16:13:10 18031 18031 Started wuid=W20130228-161229, user=hpcc, graph=graph1\\n\\n00000052 2013-02-28 16:13:10 18031 18031 Using query: /var/lib/HPCCSystems/queries/mythor_20100/V1366914267_libW20130228-161229.so\\n00000053 2013-02-28 16:13:10 18031 18031 CRC allocator OFF\\n00000054 2013-02-28 16:13:10 18031 18031 Packed allocator OFF\\n00000055 2013-02-28 16:13:10 18031 18031 Global memory size = 1531 MB, memory spill at = 80%, large mem size = 1148 MB\\n00000056 2013-02-28 16:13:10 18031 18031 maxActivityCores = [unbound]\\n00000057 2013-02-28 16:13:10 18031 18031 Loaded DLL /opt/HPCCSystems/plugins//libfileservices.so\\n00000058 2013-02-28 16:13:10 18031 18031 Current reported version is FILESERVICES 2.1.3\\n00000059 2013-02-28 16:13:10 18031 18031 Compatible version FILESERVICES 2.1 [a68789cfb01d00ef6dc362e52d5eac0e]\\n0000005A 2013-02-28 16:13:10 18031 18031 Compatible version FILESERVICES 2.1.1\\n0000005B 2013-02-28 16:13:10 18031 18031 Compatible version FILESERVICES 2.1.2\\n0000005C 2013-02-28 16:13:10 18031 18031 Compatible version FILESERVICES 2.1.3\\n0000005D 2013-02-28 16:13:10 18031 18031 New Graph started : graph1\\n0000005E 2013-02-28 16:13:10 18031 18031 temp directory cleared\\n0000005F 2013-02-28 16:13:10 18031 18031 Disk space: /var/lib/HPCCSystems/hpcc-data/thor = 113271, /var/lib/HPCCSystems/hpcc-mirror/thor = 113271\\n00000060 2013-02-28 16:13:10 18031 18031 Key file cache size set to: 8\\n00000061 2013-02-28 16:13:10 18031 18031 GraphInit: W20130228-161229graph1, graphId=1\\n00000062 2013-02-28 16:13:10 18031 18031 deserializeMPTag: tag = 65546\\n00000063 2013-02-28 16:13:10 18031 18031 deserializeMPTag: tag = 65542\\n00000064 2013-02-28 16:13:10 18031 18031 deserializeMPTag: tag = 65541\\n00000065 2013-02-28 16:13:10 18031 18031 deserializeMPTag: tag = 65540\\n00000066 2013-02-28 16:13:10 18031 18031 Add: Launching graph thread for graphId=1\\n00000067 2013-02-28 16:13:10 18031 18305 Running graph [global] : <graph>\\n <node id="2" label="Child Dataset">\\n <att name="definition" value="C:\\\\Program Files (x86)\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_6\\\\ecllibrary\\\\std\\\\File.ecl(477,1)"/>\\n <att name="name" value="superfilecontents"/>\\n <att name="_kind" value="149"/>\\n <att name="ecl" value="superfilecontents('~hlcr::db::daily::content', false); "/>\\n <att name="recordSize" value="4..259(45)"/>\\n <att name="recordCount" value="0..?[few]"/>\\n </node>\\n <node id="3" label="Count">\\n <att name="_kind" value="141"/>\\n <att name="ecl" value="TABLE({ integer8 value := COUNT(group) }); "/>\\n <att name="recordSize" value="8"/>\\n <att name="recordCount" value="1..1[tiny]"/>\\n </node>\\n <node id="4" label="Store Internal('a1')">\\n <att name="_kind" value="28"/>\\n <att name="ecl" value="extractresult(value, named('a1')); "/>\\n <att name="recordSize" value="8"/>\\n </node>\\n <att name="rootGraph" value="1"/>\\n <edge id="2_0" source="2" target="3"/>\\n <edge id="3_0" source="3" target="4"/>\\n </graph>\\n - graph(graph1, 1)\\n00000068 2013-02-28 16:13:10 18031 18305 CONNECTING (id=2, idx=0) to (id=3, idx=0) - activity(countaggregate, 3)\\n00000069 2013-02-28 16:13:10 18031 18305 CONNECTING (id=3, idx=0) to (id=4, idx=0) - activity(remoteresult, 4)\\n0000006A 2013-02-28 16:13:10 18031 18305 deserializeMPTag: tag = -7854\\n0000006B 2013-02-28 16:13:10 18031 18305 deserializeMPTag: tag = 65545\\n0000006C 2013-02-28 16:13:10 18031 18305 Watchdog: Start Job 1\\n0000006D 2013-02-28 16:13:10 18031 18306 Starting input - activity(remoteresult, 4)\\n0000006E 2013-02-28 16:13:10 18031 18306 Starting input - activity(countaggregate, 3)\\n0000006F 2013-02-28 16:13:10 18031 18306 ITDL starting for output 0 - activity(linkedrawiterator, 2)\\n00000070 2013-02-28 16:13:10 18031 18306 ITDL starting for output 0 - activity(countaggregate, 3)\\n00000071 2013-02-28 16:13:10 18031 18306 Stopping input for - activity(countaggregate, 3)\\n00000072 2013-02-28 16:13:10 18031 18306 ITDL output 0 stopped, count was 0 - activity(linkedrawiterator, 2)\\n00000073 2013-02-28 16:13:10 18031 18306 Stopping input for - activity(remoteresult, 4)\\n00000074 2013-02-28 16:13:10 18031 18306 ITDL output 0 stopped, count was 0 - activity(countaggregate, 3)\\n00000075 2013-02-28 16:13:10 18031 18031 Entering getDone - graph(graph1, 1)\\n00000076 2013-02-28 16:13:10 18031 18031 Watchdog: Stop Job 1\\n00000077 2013-02-28 16:13:10 18031 18031 Leaving getDone - graph(graph1, 1)\\n00000078 2013-02-28 16:13:10 18031 18305 End of sub-graph - graph(graph1, 1)\\n00000079 2013-02-28 16:13:10 18031 18305 Socket statistics : connects=0\\nconnecttime=0us\\nfailedconnects=0\\nfailedconnecttime=0us\\nreads=35\\nreadtime=330us\\nreadsize=158452 bytes\\nwrites=32\\nwritetime=860us\\nwritesize=925 bytes\\nactivesockets=5\\nnumblockrecvs=0\\nnumblocksends=0\\nblockrecvsize=0\\nblocksendsize=0\\nblockrecvtime=0\\nblocksendtime=0\\nlongestblocksend=0\\nlongestblocksize=0\\n - graph(graph1, 1)\\n0000007A 2013-02-28 16:13:10 18031 18305 Graph Done - graph(graph1, 1)\\n0000007B 2013-02-28 16:13:10 18031 18305 PU= 64% MU= 8% MAL=1611883904 MMP=1611665408 SBK=218496 TOT=1574156K RAM=346580K SWP=15628K - graph(graph1, 1)\\n0000007C 2013-02-28 16:13:10 18031 18305 CGraphExecutor running=0, waitingToRun=0, dependentsWaiting=0\\n0000007D 2013-02-28 16:13:10 18031 18031 GraphInit: W20130228-161229graph1, graphId=5\\n0000007E 2013-02-28 16:13:10 18031 18031 deserializeMPTag: tag = 65548\\n0000007F 2013-02-28 16:13:10 18031 18031 deserializeMPTag: tag = 65539\\n00000080 2013-02-28 16:13:10 18031 18031 deserializeMPTag: tag = 65543\\n00000081 2013-02-28 16:13:10 18031 18031 deserializeMPTag: tag = 65544\\n00000082 2013-02-28 16:13:10 18031 18031 Add: Launching graph thread for graphId=5\\n00000083 2013-02-28 16:13:10 18031 18031 destroying ProcessSlaveActivity - activity(remoteresult, 4)\\n00000084 2013-02-28 16:13:10 18031 18031 ProcessSlaveActivity : joining process thread - activity(remoteresult, 4)\\n00000085 2013-02-28 16:13:10 18031 18031 AFTER ProcessSlaveActivity : joining process thread - activity(remoteresult, 4)\\n00000086 2013-02-28 16:13:10 18031 18031 DESTROYED - activity(linkedrawiterator, 2)\\n00000087 2013-02-28 16:13:10 18031 18031 DESTROYED - activity(countaggregate, 3)\\n00000088 2013-02-28 16:13:10 18031 18031 DESTROYED - activity(remoteresult, 4)\\n00000089 2013-02-28 16:13:10 18031 18305 Running graph [global] : <graph>\\n <node id="6" label="Child Dataset">\\n <att name="definition" value="C:\\\\Program Files (x86)\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_6\\\\ecllibrary\\\\std\\\\File.ecl(477,1)"/>\\n <att name="name" value="superfilecontents"/>\\n <att name="_kind" value="149"/>\\n <att name="ecl" value="superfilecontents('~hlcr::db::daily::meta', false); "/>\\n <att name="recordSize" value="4..259(45)"/>\\n <att name="recordCount" value="0..?[few]"/>\\n </node>\\n <node id="7" label="Count">\\n <att name="_kind" value="141"/>\\n <att name="ecl" value="TABLE({ integer8 value := COUNT(group) }); "/>\\n <att name="recordSize" value="8"/>\\n <att name="recordCount" value="1..1[tiny]"/>\\n </node>\\n <node id="8" label="Store Internal('a2')">\\n <att name="_kind" value="28"/>\\n <att name="ecl" value="extractresult(value, named('a2')); "/>\\n <att name="recordSize" value="8"/>\\n </node>\\n <att name="rootGraph" value="1"/>\\n <edge id="6_0" source="6" target="7"/>\\n <edge id="7_0" source="7" target="8"/>\\n </graph>\\n - graph(graph1, 5)\\n0000008A 2013-02-28 16:13:10 18031 18305 CONNECTING (id=6, idx=0) to (id=7, idx=0) - activity(countaggregate, 7)\\n0000008B 2013-02-28 16:13:10 18031 18305 CONNECTING (id=7, idx=0) to (id=8, idx=0) - activity(remoteresult, 8)\\n0000008C 2013-02-28 16:13:10 18031 18305 deserializeMPTag: tag = -9038\\n0000008D 2013-02-28 16:13:10 18031 18305 deserializeMPTag: tag = 65547\\n0000008E 2013-02-28 16:13:10 18031 18305 Watchdog: Start Job 5\\n0000008F 2013-02-28 16:13:10 18031 18307 Starting input - activity(remoteresult, 8)\\n00000090 2013-02-28 16:13:10 18031 18307 Starting input - activity(countaggregate, 7)\\n00000091 2013-02-28 16:13:10 18031 18307 ITDL starting for output 0 - activity(linkedrawiterator, 6)\\n00000092 2013-02-28 16:13:10 18031 18307 ITDL starting for output 0 - activity(countaggregate, 7)\\n00000093 2013-02-28 16:13:10 18031 18307 Stopping input for - activity(countaggregate, 7)\\n00000094 2013-02-28 16:13:10 18031 18307 ITDL output 0 stopped, count was 0 - activity(linkedrawiterator, 6)\\n00000095 2013-02-28 16:13:10 18031 18307 Stopping input for - activity(remoteresult, 8)\\n00000096 2013-02-28 16:13:10 18031 18307 ITDL output 0 stopped, count was 0 - activity(countaggregate, 7)\\n00000097 2013-02-28 16:13:10 18031 18031 Entering getDone - graph(graph1, 5)\\n00000098 2013-02-28 16:13:10 18031 18031 Watchdog: Stop Job 5\\n00000099 2013-02-28 16:13:10 18031 18031 Leaving getDone - graph(graph1, 5)\\n0000009A 2013-02-28 16:13:10 18031 18305 End of sub-graph - graph(graph1, 5)\\n0000009B 2013-02-28 16:13:10 18031 18305 Socket statistics : connects=0\\nconnecttime=0us\\nfailedconnects=0\\nfailedconnecttime=0us\\nreads=14\\nreadtime=87us\\nreadsize=1899 bytes\\nwrites=20\\nwritetime=672us\\nwritesize=561 bytes\\nactivesockets=5\\nnumblockrecvs=0\\nnumblocksends=0\\nblockrecvsize=0\\nblocksendsize=0\\nblockrecvtime=0\\nblocksendtime=0\\nlongestblocksend=0\\nlongestblocksize=0\\n - graph(graph1, 5)\\n0000009C 2013-02-28 16:13:10 18031 18305 Graph Done - graph(graph1, 5)\\n0000009D 2013-02-28 16:13:10 18031 18305 PU= 20% MU= 8% MAL=1611883968 MMP=1611665408 SBK=218560 TOT=1574156K RAM=346580K SWP=15628K DSK: [sda] r/s=0.0 kr/s=0.0 w/s=0.0 kw/s=0.0 bsy=0 NIC: rxp/s=57360.0 rxk/s=0.0 txp/s=27060.0 txk/s=0.0 CPU: usr=0 sys=20 iow=0 idle=80 - graph(graph1, 5)\\n0000009E 2013-02-28 16:13:10 18031 18305 CGraphExecutor running=0, waitingToRun=0, dependentsWaiting=0\\n0000009F 2013-02-28 16:13:10 18031 18031 QueryDone, removing W20130228-161229graph1 from jobs\\n000000A0 2013-02-28 16:13:10 18031 18031 Job ended : graph1\\n000000A1 2013-02-28 16:13:10 18031 18031 destroying ProcessSlaveActivity - activity(remoteresult, 8)\\n000000A2 2013-02-28 16:13:10 18031 18031 ProcessSlaveActivity : joining process thread - activity(remoteresult, 8)\\n000000A3 2013-02-28 16:13:10 18031 18031 AFTER ProcessSlaveActivity : joining process thread - activity(remoteresult, 8)\\n000000A4 2013-02-28 16:13:10 18031 18031 DESTROYED - activity(remoteresult, 8)\\n000000A5 2013-02-28 16:13:10 18031 18031 DESTROYED - activity(linkedrawiterator, 6)\\n000000A6 2013-02-28 16:13:10 18031 18031 DESTROYED - activity(countaggregate, 7)\\n000000A7 2013-02-28 16:13:10 18031 18031 CJobBase resetting memory manager\\n000000A8 2013-02-28 16:13:10 18031 18031 Unloading dll /opt/HPCCSystems/plugins//libfileservices.so\\n000000A9 2013-02-28 16:13:10 18031 18031 Roxiemem stats: Heap size 1536 pages, 1536 free, largest block 1536\\n000000AA 2013-02-28 16:13:10 18031 18031 Heap usage : 270336 bytes\\n000000AB 2013-02-28 16:13:10 18031 18031 QueryDone, removed W20130228-161229graph1 from jobs\\n000000AC 2013-02-28 16:13:10 18031 18031 Finished wuid=W20130228-161229, graph=graph1\\n000000AD 2013-02-28 16:13:10 18031 18031 Started wuid=W20130228-161229, user=hpcc, graph=graph2\\n\\n000000AE 2013-02-28 16:13:10 18031 18031 Using query: /var/lib/HPCCSystems/queries/mythor_20100/V1366914267_libW20130228-161229.so\\n000000AF 2013-02-28 16:13:10 18031 18031 CRC allocator OFF\\n000000B0 2013-02-28 16:13:10 18031 18031 Packed allocator OFF\\n000000B1 2013-02-28 16:13:10 18031 18031 Global memory size = 1531 MB, memory spill at = 80%, large mem size = 1148 MB\\n000000B2 2013-02-28 16:13:10 18031 18031 maxActivityCores = [unbound]\\n000000B3 2013-02-28 16:13:10 18031 18031 Loaded DLL /opt/HPCCSystems/plugins//libfileservices.so\\n000000B4 2013-02-28 16:13:10 18031 18031 Current reported version is FILESERVICES 2.1.3\\n000000B5 2013-02-28 16:13:10 18031 18031 Compatible version FILESERVICES 2.1 [a68789cfb01d00ef6dc362e52d5eac0e]\\n000000B6 2013-02-28 16:13:10 18031 18031 Compatible version FILESERVICES 2.1.1\\n000000B7 2013-02-28 16:13:10 18031 18031 Compatible version FILESERVICES 2.1.2\\n000000B8 2013-02-28 16:13:10 18031 18031 Compatible version FILESERVICES 2.1.3\\n000000B9 2013-02-28 16:13:10 18031 18031 New Graph started : graph2\\n000000BA 2013-02-28 16:13:10 18031 18031 temp directory cleared\\n000000BB 2013-02-28 16:13:10 18031 18031 Disk space: /var/lib/HPCCSystems/hpcc-data/thor = 113271, /var/lib/HPCCSystems/hpcc-mirror/thor = 113271\\n000000BC 2013-02-28 16:13:10 18031 18031 Key file cache size set to: 8\\n000000BD 2013-02-28 16:13:10 18031 18031 GraphInit: W20130228-161229graph2, graphId=9\\n000000BE 2013-02-28 16:13:10 18031 18031 deserializeMPTag: tag = 65541\\n000000BF 2013-02-28 16:13:10 18031 18031 deserializeMPTag: tag = 65545\\n000000C0 2013-02-28 16:13:10 18031 18031 deserializeMPTag: tag = 65546\\n000000C1 2013-02-28 16:13:10 18031 18031 deserializeMPTag: tag = 65540\\n000000C2 2013-02-28 16:13:10 18031 18031 Add: Launching graph thread for graphId=9\\n000000C3 2013-02-28 16:13:10 18031 18311 Running graph [global] : <graph>\\n <node id="10" label="Disk Read '...::content'">\\n <att name="definition" value="D:\\\\dev\\\\projects\\\\HLCR\\\\trunk\\\\sources\\\\HLCR\\\\merge\\\\ConsolidateDaily.ecl(14,1)"/>\\n <att name="name" value="ds_dailycontent"/>\\n <att name="_kind" value="82"/>\\n <att name="ecl" value="DATASET('~hlcr::db::daily::content', layout_textfile, THOR); "/>\\n <att name="recordSize" value="400..4096(656)"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n <att name="_fileName" value="~hlcr::db::daily::content"/>\\n </node>\\n <node id="11" label="Disk Write">\\n <att name="definition" value="D:\\\\dev\\\\projects\\\\HLCR\\\\trunk\\\\sources\\\\HLCR\\\\merge\\\\ConsolidateDaily.ecl(22,2)"/>\\n <att name="_kind" value="2"/>\\n <att name="ecl" value="OUTPUT(..., , hlcr.util.superfile.getsuperfilebasesubname(dbdailycontentfile)); "/>\\n <att name="recordSize" value="400..4096(656)"/>\\n </node>\\n <att name="rootGraph" value="1"/>\\n <edge id="10_0" source="10" target="11"/>\\n </graph>\\n - graph(graph2, 9)\\n000000C4 2013-02-28 16:13:10 18031 18311 CONNECTING (id=10, idx=0) to (id=11, idx=0) - activity(diskwrite, 11)\\n000000C5 2013-02-28 16:13:10 18031 18311 Watchdog: Start Job 9\\n000000C6 2013-02-28 16:13:10 18031 18312 handling fname : /var/lib/HPCCSystems/hpcc-data/thor/hlcr/db/daily/content.base0._3_of_3 - activity(diskwrite, 11)\\n000000C7 2013-02-28 16:13:10 18031 18312 Starting input - activity(diskwrite, 11)\\n000000C8 2013-02-28 16:13:10 18031 18312 diskread[part=3]: reading physical file '/var/lib/HPCCSystems/hpcc-data/thor/hlcr/db/daily/content.base1._3_of_3' (logical file = ~hlcr::db::daily::content) - activity(diskread, 10)\\n000000C9 2013-02-28 16:13:10 18031 18312 diskread[part=3]: Base offset to 15365772008 - activity(diskread, 10)\\n000000CA 2013-02-28 16:13:10 18031 18312 diskread[part=3]: variable (/var/lib/HPCCSystems/hpcc-data/thor/hlcr/db/daily/content.base1._3_of_3) - activity(diskread, 10)\\n000000CB 2013-02-28 16:13:10 18031 18312 ITDL starting for output 0 - activity(diskread, 10)\\n000000CC 2013-02-28 16:13:10 18031 18312 Writing to file: /var/lib/HPCCSystems/hpcc-data/thor/hlcr/db/daily/thtmp18031_2__partial.tmp - activity(diskwrite, 11)\\n000000CD 2013-02-28 16:13:10 18031 18312 Created output stream for /var/lib/HPCCSystems/hpcc-data/thor/hlcr/db/daily/content.base0._3_of_3 - activity(diskwrite, 11)\\n000000CE 2013-02-28 16:13:10 18031 18312 Ungrouped - activity(diskwrite, 11)\\n000000CF 2013-02-28 16:13:10 18031 18312 Record size (max) = 4096 - activity(diskread, 10)\\n000000D0 2013-02-28 16:14:10 18031 18310 SYS: PU= 99% MU= 8% MAL=1613978800 MMP=1613762560 SBK=216240 TOT=1576204K RAM=358660K SWP=15628K\\n000000D1 2013-02-28 16:15:10 18031 18310 SYS: PU=100% MU= 8% MAL=1613978800 MMP=1613762560 SBK=216240 TOT=1576204K RAM=360980K SWP=15628K\\n TT: PI=18031 PN=thorslave_lcr PC=80 ST=304 UT=770\\n TT: PI=13104 PN=java PC=5 ST=36 UT=38\\n TT: PI=22 PN=kswapd0 PC=3 ST=43 UT=0\\n000000D2 2013-02-28 16:15:10 18031 18310 DSK: [sda] r/s=118.2 kr/s=15094.5 w/s=125.4 kw/s=15811.4 bsy=93 NIC: rxp/s=5279.9 rxk/s=0.0 txp/s=1098.2 txk/s=0.0 CPU: usr=13 sys=7 iow=78 idle=0\\n000000D3 2013-02-28 16:16:10 18031 18310 SYS: PU=100% MU= 8% MAL=1613978800 MMP=1613762560 SBK=216240 TOT=1576204K RAM=361704K SWP=15628K\\n TT: PI=18031 PN=thorslave_lcr PC=81 ST=293 UT=799\\n TT: PI=13104 PN=java PC=5 ST=35 UT=43\\n TT: PI=22 PN=kswapd0 PC=3 ST=42 UT=0\\n000000D4 2013-02-28 16:16:13 18031 18310 DSK: [sda] r/s=126.4 kr/s=16154.3 w/s=124.9 kw/s=15739.5 bsy=97 NIC: rxp/s=5472.1 rxk/s=0.0 txp/s=1190.0 txk/s=0.0 CPU: usr=14 sys=7 iow=78 idle=0\\n000000D5 2013-02-28 16:17:13 18031 18310 SYS: PU=100% MU= 8% MAL=1613978800 MMP=1613762560 SBK=216240 TOT=1576204K RAM=358420K SWP=15628K\\n TT: PI=18031 PN=thorslave_lcr PC=80 ST=286 UT=833\\n TT: PI=13104 PN=java PC=5 ST=40 UT=40\\n TT: PI=22 PN=kswapd0 PC=3 ST=44 UT=0\\n000000D6 2013-02-28 16:17:13 18031 18310 DSK: [sda] r/s=115.5 kr/s=14737.5 w/s=116.5 kw/s=14677.3 bsy=89 NIC: rxp/s=4989.3 rxk/s=0.0 txp/s=1065.6 txk/s=0.0 CPU: usr=13 sys=7 iow=78 idle=0\\n000000D7 2013-02-28 16:18:13 18031 18310 SYS: PU=100% MU= 8% MAL=1613978800 MMP=1613762560 SBK=216240 TOT=1576204K RAM=359052K SWP=15628K\\n TT: PI=18031 PN=thorslave_lcr PC=81 ST=282 UT=780\\n TT: PI=13104 PN=java PC=5 ST=33 UT=43\\n TT: PI=22 PN=kswapd0 PC=3 ST=41 UT=0\\n000000D8 2013-02-28 16:18:13 18031 18310 DSK: [sda] r/s=119.7 kr/s=15307.9 w/s=118.9 kw/s=14956.4 bsy=92 NIC: rxp/s=5148.9 rxk/s=0.0 txp/s=1122.4 txk/s=0.0 CPU: usr=13 sys=7 iow=78 idle=0\\n000000D9 2013-02-28 16:18:53 18031 18312 diskread[part=3]: CRC Stored=e70f95bf, calculated=e70f95bf file(/var/lib/HPCCSystems/hpcc-data/thor/hlcr/db/daily/content.base1._3_of_3) - activity(diskread, 10)\\n000000DA 2013-02-28 16:18:53 18031 18038 CThorBackupHandler, copying to target: //192.168.1.241/var/lib/HPCCSystems/hpcc-mirror/thor/hlcr/db/daily/content.base0._3_of_3\\n000000DB 2013-02-28 16:18:53 18031 18312 Wrote 334414 records, crc=0xE70F95BF - activity(diskwrite, 11)\\n000000DC 2013-02-28 16:18:53 18031 18312 Stopping input for - activity(diskwrite, 11)\\n000000DD 2013-02-28 16:18:53 18031 18312 ITDL output 0 stopped, count was 334414 - activity(diskread, 10)\\n000000DE 2013-02-28 16:19:13 18031 18310 SYS: PU= 97% MU= 8% MAL=1611881648 MMP=1611665408 SBK=216240 TOT=1574156K RAM=363496K SWP=15628K\\n TT: PI=18031 PN=thorslave_lcr PC=69 ST=474 UT=563\\n TT: PI=44342 PN=dafilesrv PC=11 ST=154 UT=10\\n TT: PI=13104 PN=java PC=5 ST=44 UT=40\\n000000DF 2013-02-28 16:19:13 18031 18310 DSK: [sda] r/s=120.7 kr/s=15425.4 w/s=141.1 kw/s=17810.7 bsy=89 NIC: rxp/s=7461927.4 rxk/s=0.0 txp/s=5399335.8 txk/s=0.0 CPU: usr=10 sys=15 iow=71 idle=2\\n000000E0 2013-02-28 16:20:13 18031 18310 SYS: PU= 91% MU= 8% MAL=1611881648 MMP=1611665408 SBK=216240 TOT=1574156K RAM=360864K SWP=15628K\\n TT: PI=18031 PN=thorslave_lcr PC=57 ST=812 UT=82\\n TT: PI=44342 PN=dafilesrv PC=23 ST=340 UT=22\\n TT: PI=13104 PN=java PC=6 ST=60 UT=36\\n000000E1 2013-02-28 16:20:13 18031 18310 DSK: [sda] r/s=110.3 kr/s=14083.8 w/s=119.4 kw/s=15028.1 bsy=79 NIC: rxp/s=16440102.6 rxk/s=0.0 txp/s=15101780.9 txk/s=0.0 CPU: usr=2 sys=27 iow=61 idle=8\\n000000E2 2013-02-28 16:21:13 18031 18310 SYS: PU= 92% MU= 8% MAL=1611881648 MMP=1611665408 SBK=216240 TOT=1574156K RAM=361492K SWP=15628K\\n TT: PI=18031 PN=thorslave_lcr PC=54 ST=866 UT=84\\n TT: PI=44342 PN=dafilesrv PC=25 ST=417 UT=22\\n TT: PI=13104 PN=java PC=6 ST=59 UT=51\\n000000E3 2013-02-28 16:21:13 18031 18310 DSK: [sda] r/s=108.0 kr/s=13635.9 w/s=133.8 kw/s=16899.1 bsy=79 NIC: rxp/s=16247086.5 rxk/s=0.0 txp/s=14598210.2 txk/s=0.0 CPU: usr=2 sys=32 iow=57 idle=7\\n00000000 2013-02-28 16:37:23 19052 19052 Opened log file //192.168.1.200/var/log/HPCCSystems/mythor/thorslave.3.2013_02_28.log\\n00000001 2013-02-28 16:37:23 19052 19052 Build community_3.8.6-4\\n00000002 2013-02-28 16:37:23 19052 19052 registering 192.168.1.200:20100 - master 192.168.1.239:20000\\n00000003 2013-02-28 16:37:23 19052 19052 Initialization received\\n00000004 2013-02-28 16:37:23 19052 19052 Master build: community_3.8.6-4\\n00000005 2013-02-28 16:37:23 19052 19052 Registration confirmation sent\\n00000006 2013-02-28 16:37:23 19052 19052 verifying mp connection to rest of cluster\\n00000007 2013-02-28 16:37:23 19052 19052 verified mp connection to rest of cluster\\n00000008 2013-02-28 16:37:23 19052 19052 registered 192.168.1.200:20100\\n00000009 2013-02-28 16:37:23 19052 19052 calling initClientProcess\\n0000000A 2013-02-28 16:37:23 19052 19052 setIORetryCount set to : 0\\n0000000B 2013-02-28 16:37:23 19052 19052 ThorSlave Version LCR - 4.1 started\\n0000000C 2013-02-28 16:37:23 19052 19052 Slave 192.168.1.200:20100 - temporary dir set to : /var/lib/HPCCSystems/mythor/temp/\\n0000000D 2013-02-28 16:37:23 19052 19052 Using querySo directory: /var/lib/HPCCSystems/queries/mythor_20100\\n0000000E 2013-02-28 16:37:23 19052 19052 RoxieMemMgr: Setting memory limit to 1605369856 bytes (1531 pages)\\n0000000F 2013-02-28 16:37:23 19052 19052 RoxieMemMgr: 1536 Pages successfully allocated for the pool - memsize=1610612736 base=0x7f9633f00000 alignment=1048576 bitmapSize=48\\n00000010 2013-02-28 16:37:23 19052 19052 FileCache: limit = 1800, purgeN = 10\\n00000011 2013-02-28 16:37:23 19052 19059 priority set id=140284906174208 policy=0 pri=0 PID=19052\\n00000012 2013-02-28 16:37:23 19052 19060 Watchdog: thread running
\", \"post_time\": \"2013-02-28 16:09:25\" },\n\t{ \"post_id\": 3590, \"topic_id\": 799, \"forum_id\": 8, \"post_subject\": \"Re: Watchdog has lost contact with Thor slave\", \"username\": \"janssend\", \"post_text\": \"Slave 1 (192.168.1.241)\\n00000055 2013-02-28 16:13:07 18330 18330 Started wuid=W20130228-161229, user=hpcc, graph=graph1\\n\\n00000056 2013-02-28 16:13:07 18330 18330 Using query: /var/lib/HPCCSystems/queries/mythor_20100/V1366914267_libW20130228-161229.so\\n00000057 2013-02-28 16:13:07 18330 18330 CRC allocator OFF\\n00000058 2013-02-28 16:13:07 18330 18330 Packed allocator OFF\\n00000059 2013-02-28 16:13:07 18330 18330 Global memory size = 1531 MB, memory spill at = 80%, large mem size = 1148 MB\\n0000005A 2013-02-28 16:13:07 18330 18330 maxActivityCores = [unbound]\\n0000005B 2013-02-28 16:13:07 18330 18330 Loaded DLL /opt/HPCCSystems/plugins//libfileservices.so\\n0000005C 2013-02-28 16:13:07 18330 18330 Current reported version is FILESERVICES 2.1.3\\n0000005D 2013-02-28 16:13:07 18330 18330 Compatible version FILESERVICES 2.1 [a68789cfb01d00ef6dc362e52d5eac0e]\\n0000005E 2013-02-28 16:13:07 18330 18330 Compatible version FILESERVICES 2.1.1\\n0000005F 2013-02-28 16:13:07 18330 18330 Compatible version FILESERVICES 2.1.2\\n00000060 2013-02-28 16:13:07 18330 18330 Compatible version FILESERVICES 2.1.3\\n00000061 2013-02-28 16:13:07 18330 18330 New Graph started : graph1\\n00000062 2013-02-28 16:13:07 18330 18330 temp directory cleared\\n00000063 2013-02-28 16:13:07 18330 18330 Disk space: /var/lib/HPCCSystems/hpcc-data/thor = 112392, /var/lib/HPCCSystems/hpcc-mirror/thor = 112392\\n00000064 2013-02-28 16:13:07 18330 18330 Key file cache size set to: 8\\n00000065 2013-02-28 16:13:07 18330 18330 GraphInit: W20130228-161229graph1, graphId=1\\n00000066 2013-02-28 16:13:07 18330 18330 deserializeMPTag: tag = 65546\\n00000067 2013-02-28 16:13:07 18330 18330 deserializeMPTag: tag = 65542\\n00000068 2013-02-28 16:13:07 18330 18330 deserializeMPTag: tag = 65541\\n00000069 2013-02-28 16:13:07 18330 18330 deserializeMPTag: tag = 65540\\n0000006A 2013-02-28 16:13:07 18330 18330 Add: Launching graph thread for graphId=1\\n0000006B 2013-02-28 16:13:07 18330 18603 Running graph [global] : <graph>\\n <node id="2" label="Child Dataset">\\n <att name="definition" value="C:\\\\Program Files (x86)\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_6\\\\ecllibrary\\\\std\\\\File.ecl(477,1)"/>\\n <att name="name" value="superfilecontents"/>\\n <att name="_kind" value="149"/>\\n <att name="ecl" value="superfilecontents('~hlcr::db::daily::content', false); "/>\\n <att name="recordSize" value="4..259(45)"/>\\n <att name="recordCount" value="0..?[few]"/>\\n </node>\\n <node id="3" label="Count">\\n <att name="_kind" value="141"/>\\n <att name="ecl" value="TABLE({ integer8 value := COUNT(group) }); "/>\\n <att name="recordSize" value="8"/>\\n <att name="recordCount" value="1..1[tiny]"/>\\n </node>\\n <node id="4" label="Store Internal('a1')">\\n <att name="_kind" value="28"/>\\n <att name="ecl" value="extractresult(value, named('a1')); "/>\\n <att name="recordSize" value="8"/>\\n </node>\\n <att name="rootGraph" value="1"/>\\n <edge id="2_0" source="2" target="3"/>\\n <edge id="3_0" source="3" target="4"/>\\n </graph>\\n - graph(graph1, 1)\\n0000006C 2013-02-28 16:13:07 18330 18603 CONNECTING (id=2, idx=0) to (id=3, idx=0) - activity(countaggregate, 3)\\n0000006D 2013-02-28 16:13:07 18330 18603 CONNECTING (id=3, idx=0) to (id=4, idx=0) - activity(remoteresult, 4)\\n0000006E 2013-02-28 16:13:07 18330 18603 deserializeMPTag: tag = -7854\\n0000006F 2013-02-28 16:13:07 18330 18603 deserializeMPTag: tag = 65545\\n00000070 2013-02-28 16:13:07 18330 18603 Watchdog: Start Job 1\\n00000071 2013-02-28 16:13:07 18330 18604 Starting input - activity(remoteresult, 4)\\n00000072 2013-02-28 16:13:07 18330 18604 Starting input - activity(countaggregate, 3)\\n00000073 2013-02-28 16:13:07 18330 18604 ITDL starting for output 0 - activity(linkedrawiterator, 2)\\n00000074 2013-02-28 16:13:07 18330 18604 ITDL starting for output 0 - activity(countaggregate, 3)\\n00000075 2013-02-28 16:13:07 18330 18604 Record size (max) = 259 - activity(linkedrawiterator, 2)\\n00000076 2013-02-28 16:13:07 18330 18604 Stopping input for - activity(countaggregate, 3)\\n00000077 2013-02-28 16:13:07 18330 18604 ITDL output 0 stopped, count was 2 - activity(linkedrawiterator, 2)\\n00000078 2013-02-28 16:13:07 18330 18604 Record size = 8 - activity(countaggregate, 3)\\n00000079 2013-02-28 16:13:07 18330 18604 Stopping input for - activity(remoteresult, 4)\\n0000007A 2013-02-28 16:13:07 18330 18604 ITDL output 0 stopped, count was 1 - activity(countaggregate, 3)\\n0000007B 2013-02-28 16:13:07 18330 18330 Entering getDone - graph(graph1, 1)\\n0000007C 2013-02-28 16:13:07 18330 18330 Watchdog: Stop Job 1\\n0000007D 2013-02-28 16:13:07 18330 18330 Leaving getDone - graph(graph1, 1)\\n0000007E 2013-02-28 16:13:07 18330 18603 End of sub-graph - graph(graph1, 1)\\n0000007F 2013-02-28 16:13:07 18330 18603 Socket statistics : connects=1\\nconnecttime=198us\\nfailedconnects=0\\nfailedconnecttime=0us\\nreads=302\\nreadtime=1021ms\\nreadsize=160176 bytes\\nwrites=166\\nwritetime=986ms\\nwritesize=133617910 bytes\\nactivesockets=5\\nnumblockrecvs=0\\nnumblocksends=0\\nblockrecvsize=0\\nblocksendsize=0\\nblockrecvtime=0\\nblocksendtime=0\\nlongestblocksend=0\\nlongestblocksize=0\\n - graph(graph1, 1)\\n00000080 2013-02-28 16:13:07 18330 18603 Graph Done - graph(graph1, 1)\\n00000081 2013-02-28 16:13:07 18330 18603 PU= 75% MU= 8% MAL=1611883904 MMP=1611665408 SBK=218496 TOT=1574156K RAM=338676K SWP=37764K - graph(graph1, 1)\\n00000082 2013-02-28 16:13:07 18330 18603 CGraphExecutor running=0, waitingToRun=0, dependentsWaiting=0\\n00000083 2013-02-28 16:13:07 18330 18330 GraphInit: W20130228-161229graph1, graphId=5\\n00000084 2013-02-28 16:13:07 18330 18330 deserializeMPTag: tag = 65548\\n00000085 2013-02-28 16:13:07 18330 18330 deserializeMPTag: tag = 65539\\n00000086 2013-02-28 16:13:07 18330 18330 deserializeMPTag: tag = 65543\\n00000087 2013-02-28 16:13:07 18330 18330 deserializeMPTag: tag = 65544\\n00000088 2013-02-28 16:13:07 18330 18330 Add: Launching graph thread for graphId=5\\n00000089 2013-02-28 16:13:07 18330 18330 destroying ProcessSlaveActivity - activity(remoteresult, 4)\\n0000008A 2013-02-28 16:13:07 18330 18330 ProcessSlaveActivity : joining process thread - activity(remoteresult, 4)\\n0000008B 2013-02-28 16:13:07 18330 18330 AFTER ProcessSlaveActivity : joining process thread - activity(remoteresult, 4)\\n0000008C 2013-02-28 16:13:07 18330 18330 DESTROYED - activity(linkedrawiterator, 2)\\n0000008D 2013-02-28 16:13:07 18330 18330 DESTROYED - activity(countaggregate, 3)\\n0000008E 2013-02-28 16:13:07 18330 18330 DESTROYED - activity(remoteresult, 4)\\n0000008F 2013-02-28 16:13:07 18330 18603 Running graph [global] : <graph>\\n <node id="6" label="Child Dataset">\\n <att name="definition" value="C:\\\\Program Files (x86)\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_6\\\\ecllibrary\\\\std\\\\File.ecl(477,1)"/>\\n <att name="name" value="superfilecontents"/>\\n <att name="_kind" value="149"/>\\n <att name="ecl" value="superfilecontents('~hlcr::db::daily::meta', false); "/>\\n <att name="recordSize" value="4..259(45)"/>\\n <att name="recordCount" value="0..?[few]"/>\\n </node>\\n <node id="7" label="Count">\\n <att name="_kind" value="141"/>\\n <att name="ecl" value="TABLE({ integer8 value := COUNT(group) }); "/>\\n <att name="recordSize" value="8"/>\\n <att name="recordCount" value="1..1[tiny]"/>\\n </node>\\n <node id="8" label="Store Internal('a2')">\\n <att name="_kind" value="28"/>\\n <att name="ecl" value="extractresult(value, named('a2')); "/>\\n <att name="recordSize" value="8"/>\\n </node>\\n <att name="rootGraph" value="1"/>\\n <edge id="6_0" source="6" target="7"/>\\n <edge id="7_0" source="7" target="8"/>\\n </graph>\\n - graph(graph1, 5)\\n00000090 2013-02-28 16:13:07 18330 18603 CONNECTING (id=6, idx=0) to (id=7, idx=0) - activity(countaggregate, 7)\\n00000091 2013-02-28 16:13:07 18330 18603 CONNECTING (id=7, idx=0) to (id=8, idx=0) - activity(remoteresult, 8)\\n00000092 2013-02-28 16:13:07 18330 18603 deserializeMPTag: tag = -9038\\n00000093 2013-02-28 16:13:07 18330 18603 deserializeMPTag: tag = 65547\\n00000094 2013-02-28 16:13:07 18330 18603 Watchdog: Start Job 5\\n00000095 2013-02-28 16:13:07 18330 18605 Starting input - activity(remoteresult, 8)\\n00000096 2013-02-28 16:13:07 18330 18605 Starting input - activity(countaggregate, 7)\\n00000097 2013-02-28 16:13:07 18330 18605 ITDL starting for output 0 - activity(linkedrawiterator, 6)\\n00000098 2013-02-28 16:13:07 18330 18605 ITDL starting for output 0 - activity(countaggregate, 7)\\n00000099 2013-02-28 16:13:07 18330 18605 Record size (max) = 259 - activity(linkedrawiterator, 6)\\n0000009A 2013-02-28 16:13:07 18330 18605 Stopping input for - activity(countaggregate, 7)\\n0000009B 2013-02-28 16:13:07 18330 18605 ITDL output 0 stopped, count was 2 - activity(linkedrawiterator, 6)\\n0000009C 2013-02-28 16:13:07 18330 18605 Record size = 8 - activity(countaggregate, 7)\\n0000009D 2013-02-28 16:13:07 18330 18605 Stopping input for - activity(remoteresult, 8)\\n0000009E 2013-02-28 16:13:07 18330 18605 ITDL output 0 stopped, count was 1 - activity(countaggregate, 7)\\n0000009F 2013-02-28 16:13:07 18330 18330 Entering getDone - graph(graph1, 5)\\n000000A0 2013-02-28 16:13:07 18330 18330 Watchdog: Stop Job 5\\n000000A1 2013-02-28 16:13:07 18330 18330 Leaving getDone - graph(graph1, 5)\\n000000A2 2013-02-28 16:13:07 18330 18603 End of sub-graph - graph(graph1, 5)\\n000000A3 2013-02-28 16:13:07 18330 18603 Socket statistics : connects=0\\nconnecttime=0us\\nfailedconnects=0\\nfailedconnecttime=0us\\nreads=18\\nreadtime=95us\\nreadsize=1971 bytes\\nwrites=18\\nwritetime=426us\\nwritesize=533 bytes\\nactivesockets=5\\nnumblockrecvs=0\\nnumblocksends=0\\nblockrecvsize=0\\nblocksendsize=0\\nblockrecvtime=0\\nblocksendtime=0\\nlongestblocksend=0\\nlongestblocksize=0\\n - graph(graph1, 5)\\n000000A4 2013-02-28 16:13:07 18330 18603 Graph Done - graph(graph1, 5)\\n000000A5 2013-02-28 16:13:07 18330 18603 PU= 25% MU= 8% MAL=1611883968 MMP=1611665408 SBK=218560 TOT=1574156K RAM=338676K SWP=37764K DSK: [sda] r/s=0.0 kr/s=0.0 w/s=0.0 kw/s=0.0 bsy=0 NIC: rxp/s=75150.0 rxk/s=0.0 txp/s=38075.0 txk/s=0.0 CPU: usr=0 sys=25 iow=0 idle=75 - graph(graph1, 5)\\n000000A6 2013-02-28 16:13:07 18330 18603 CGraphExecutor running=0, waitingToRun=0, dependentsWaiting=0\\n000000A7 2013-02-28 16:13:07 18330 18330 QueryDone, removing W20130228-161229graph1 from jobs\\n000000A8 2013-02-28 16:13:07 18330 18330 Job ended : graph1\\n000000A9 2013-02-28 16:13:07 18330 18330 destroying ProcessSlaveActivity - activity(remoteresult, 8)\\n000000AA 2013-02-28 16:13:07 18330 18330 ProcessSlaveActivity : joining process thread - activity(remoteresult, 8)\\n000000AB 2013-02-28 16:13:07 18330 18330 AFTER ProcessSlaveActivity : joining process thread - activity(remoteresult, 8)\\n000000AC 2013-02-28 16:13:07 18330 18330 DESTROYED - activity(remoteresult, 8)\\n000000AD 2013-02-28 16:13:07 18330 18330 DESTROYED - activity(linkedrawiterator, 6)\\n000000AE 2013-02-28 16:13:07 18330 18330 DESTROYED - activity(countaggregate, 7)\\n000000AF 2013-02-28 16:13:07 18330 18330 CJobBase resetting memory manager\\n000000B0 2013-02-28 16:13:07 18330 18330 Unloading dll /opt/HPCCSystems/plugins//libfileservices.so\\n000000B1 2013-02-28 16:13:07 18330 18330 Roxiemem stats: Heap size 1536 pages, 1536 free, largest block 1536\\n000000B2 2013-02-28 16:13:07 18330 18330 Heap usage : 270336 bytes\\n000000B3 2013-02-28 16:13:07 18330 18330 QueryDone, removed W20130228-161229graph1 from jobs\\n000000B4 2013-02-28 16:13:07 18330 18330 Finished wuid=W20130228-161229, graph=graph1\\n000000B5 2013-02-28 16:13:07 18330 18330 Started wuid=W20130228-161229, user=hpcc, graph=graph2\\n\\n000000B6 2013-02-28 16:13:07 18330 18330 Using query: /var/lib/HPCCSystems/queries/mythor_20100/V1366914267_libW20130228-161229.so\\n000000B7 2013-02-28 16:13:07 18330 18330 CRC allocator OFF\\n000000B8 2013-02-28 16:13:07 18330 18330 Packed allocator OFF\\n000000B9 2013-02-28 16:13:07 18330 18330 Global memory size = 1531 MB, memory spill at = 80%, large mem size = 1148 MB\\n000000BA 2013-02-28 16:13:07 18330 18330 maxActivityCores = [unbound]\\n000000BB 2013-02-28 16:13:07 18330 18330 Loaded DLL /opt/HPCCSystems/plugins//libfileservices.so\\n000000BC 2013-02-28 16:13:07 18330 18330 Current reported version is FILESERVICES 2.1.3\\n000000BD 2013-02-28 16:13:07 18330 18330 Compatible version FILESERVICES 2.1 [a68789cfb01d00ef6dc362e52d5eac0e]\\n000000BE 2013-02-28 16:13:07 18330 18330 Compatible version FILESERVICES 2.1.1\\n000000BF 2013-02-28 16:13:07 18330 18330 Compatible version FILESERVICES 2.1.2\\n000000C0 2013-02-28 16:13:07 18330 18330 Compatible version FILESERVICES 2.1.3\\n000000C1 2013-02-28 16:13:07 18330 18330 New Graph started : graph2\\n000000C2 2013-02-28 16:13:07 18330 18330 temp directory cleared\\n000000C3 2013-02-28 16:13:07 18330 18330 Disk space: /var/lib/HPCCSystems/hpcc-data/thor = 112392, /var/lib/HPCCSystems/hpcc-mirror/thor = 112392\\n000000C4 2013-02-28 16:13:07 18330 18330 Key file cache size set to: 8\\n000000C5 2013-02-28 16:13:07 18330 18330 GraphInit: W20130228-161229graph2, graphId=9\\n000000C6 2013-02-28 16:13:07 18330 18330 deserializeMPTag: tag = 65541\\n000000C7 2013-02-28 16:13:07 18330 18330 deserializeMPTag: tag = 65545\\n000000C8 2013-02-28 16:13:07 18330 18330 deserializeMPTag: tag = 65546\\n000000C9 2013-02-28 16:13:07 18330 18330 deserializeMPTag: tag = 65540\\n000000CA 2013-02-28 16:13:07 18330 18330 Add: Launching graph thread for graphId=9\\n000000CB 2013-02-28 16:13:07 18330 18609 Running graph [global] : <graph>\\n <node id="10" label="Disk Read '...::content'">\\n <att name="definition" value="D:\\\\dev\\\\projects\\\\HLCR\\\\trunk\\\\sources\\\\HLCR\\\\merge\\\\ConsolidateDaily.ecl(14,1)"/>\\n <att name="name" value="ds_dailycontent"/>\\n <att name="_kind" value="82"/>\\n <att name="ecl" value="DATASET('~hlcr::db::daily::content', layout_textfile, THOR); "/>\\n <att name="recordSize" value="400..4096(656)"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n <att name="_fileName" value="~hlcr::db::daily::content"/>\\n </node>\\n <node id="11" label="Disk Write">\\n <att name="definition" value="D:\\\\dev\\\\projects\\\\HLCR\\\\trunk\\\\sources\\\\HLCR\\\\merge\\\\ConsolidateDaily.ecl(22,2)"/>\\n <att name="_kind" value="2"/>\\n <att name="ecl" value="OUTPUT(..., , hlcr.util.superfile.getsuperfilebasesubname(dbdailycontentfile)); "/>\\n <att name="recordSize" value="400..4096(656)"/>\\n </node>\\n <att name="rootGraph" value="1"/>\\n <edge id="10_0" source="10" target="11"/>\\n </graph>\\n - graph(graph2, 9)\\n000000CC 2013-02-28 16:13:07 18330 18609 CONNECTING (id=10, idx=0) to (id=11, idx=0) - activity(diskwrite, 11)\\n000000CD 2013-02-28 16:13:07 18330 18609 Watchdog: Start Job 9\\n000000CE 2013-02-28 16:13:07 18330 18610 handling fname : /var/lib/HPCCSystems/hpcc-data/thor/hlcr/db/daily/content.base0._1_of_3 - activity(diskwrite, 11)\\n000000CF 2013-02-28 16:13:07 18330 18610 Starting input - activity(diskwrite, 11)\\n000000D0 2013-02-28 16:13:07 18330 18610 diskread[part=0]: reading physical file '/var/lib/HPCCSystems/hpcc-data/thor/hlcr/db/daily/content.base1._1_of_3' (logical file = ~hlcr::db::daily::content) - activity(diskread, 10)\\n000000D1 2013-02-28 16:13:07 18330 18610 diskread[part=0]: Base offset to 0 - activity(diskread, 10)\\n000000D2 2013-02-28 16:13:07 18330 18610 diskread[part=0]: variable (/var/lib/HPCCSystems/hpcc-data/thor/hlcr/db/daily/content.base1._1_of_3) - activity(diskread, 10)\\n000000D3 2013-02-28 16:13:07 18330 18610 ITDL starting for output 0 - activity(diskread, 10)\\n000000D4 2013-02-28 16:13:07 18330 18610 Writing to file: /var/lib/HPCCSystems/hpcc-data/thor/hlcr/db/daily/thtmp18330_2__partial.tmp - activity(diskwrite, 11)\\n000000D5 2013-02-28 16:13:07 18330 18610 Created output stream for /var/lib/HPCCSystems/hpcc-data/thor/hlcr/db/daily/content.base0._1_of_3 - activity(diskwrite, 11)\\n000000D6 2013-02-28 16:13:07 18330 18610 Ungrouped - activity(diskwrite, 11)\\n000000D7 2013-02-28 16:13:08 18330 18610 Record size (max) = 4096 - activity(diskread, 10)\\n000000D8 2013-02-28 16:14:07 18330 18608 SYS: PU= 99% MU= 9% MAL=1611881648 MMP=1611665408 SBK=216240 TOT=1574156K RAM=352996K SWP=37764K\\n000000D9 2013-02-28 16:15:08 18330 18608 SYS: PU=100% MU= 9% MAL=1611881648 MMP=1611665408 SBK=216240 TOT=1574156K RAM=354476K SWP=37764K\\n TT: PI=18330 PN=thorslave_lcr PC=79 ST=244 UT=585\\n TT: PI=783 PN=java PC=6 ST=43 UT=25\\n TT: PI=22 PN=kswapd0 PC=3 ST=32 UT=0\\n000000DA 2013-02-28 16:15:08 18330 18608 DSK: [sda] r/s=89.2 kr/s=11390.6 w/s=97.9 kw/s=12356.6 bsy=98 NIC: rxp/s=4958.7 rxk/s=0.0 txp/s=385.6 txk/s=0.0 CPU: usr=10 sys=6 iow=83 idle=0\\n000000DB 2013-02-28 16:16:08 18330 18608 SYS: PU=100% MU= 9% MAL=1611881648 MMP=1611665408 SBK=216240 TOT=1574156K RAM=356116K SWP=37764K\\n TT: PI=18330 PN=thorslave_lcr PC=78 ST=197 UT=595\\n TT: PI=783 PN=java PC=7 ST=42 UT=29\\n TT: PI=57526 PN=daserver PC=3 ST=22 UT=13\\n000000DC 2013-02-28 16:16:08 18330 18608 DSK: [sda] r/s=90.1 kr/s=11510.8 w/s=80.1 kw/s=10025.4 bsy=98 NIC: rxp/s=4880.1 rxk/s=0.0 txp/s=418.5 txk/s=0.0 CPU: usr=10 sys=5 iow=83 idle=0\\n000000DD 2013-02-28 16:17:08 18330 18608 SYS: PU=100% MU= 9% MAL=1611881648 MMP=1611665408 SBK=216240 TOT=1574156K RAM=355320K SWP=37764K\\n TT: PI=18330 PN=thorslave_lcr PC=77 ST=199 UT=575\\n TT: PI=783 PN=java PC=7 ST=46 UT=30\\n TT: PI=57526 PN=daserver PC=3 ST=26 UT=7\\n000000DE 2013-02-28 16:17:08 18330 18608 DSK: [sda] r/s=88.6 kr/s=11267.0 w/s=96.8 kw/s=12153.7 bsy=99 NIC: rxp/s=4904.6 rxk/s=0.0 txp/s=425.9 txk/s=0.0 CPU: usr=10 sys=5 iow=83 idle=0\\n000000DF 2013-02-28 16:18:08 18330 18608 SYS: PU=100% MU= 9% MAL=1611881648 MMP=1611665408 SBK=216240 TOT=1574156K RAM=355004K SWP=37764K\\n TT: PI=18330 PN=thorslave_lcr PC=78 ST=196 UT=595\\n TT: PI=783 PN=java PC=6 ST=46 UT=22\\n TT: PI=57526 PN=daserver PC=3 ST=25 UT=9\\n000000E0 2013-02-28 16:18:08 18330 18608 DSK: [sda] r/s=89.8 kr/s=11444.9 w/s=101.1 kw/s=12742.9 bsy=98 NIC: rxp/s=4846.4 rxk/s=0.0 txp/s=407.4 txk/s=0.0 CPU: usr=10 sys=5 iow=83 idle=0\\n000000E1 2013-02-28 16:19:08 18330 18608 SYS: PU=100% MU= 9% MAL=1611881648 MMP=1611665408 SBK=216240 TOT=1574156K RAM=359884K SWP=37764K\\n TT: PI=18330 PN=thorslave_lcr PC=70 ST=230 UT=627\\n TT: PI=44407 PN=dafilesrv PC=8 ST=99 UT=7\\n TT: PI=783 PN=java PC=7 ST=89 UT=2\\n000000E2 2013-02-28 16:19:08 18330 18608 DSK: [sda] r/s=95.4 kr/s=12179.3 w/s=121.8 kw/s=15324.3 bsy=96 NIC: rxp/s=4599136.2 rxk/s=0.0 txp/s=9578.4 txk/s=0.0 CPU: usr=11 sys=9 iow=78 idle=0\\n000000E3 2013-02-28 16:20:08 18330 18608 SYS: PU=100% MU= 9% MAL=1611881648 MMP=1611665408 SBK=216240 TOT=1574156K RAM=360416K SWP=37764K\\n TT: PI=18330 PN=thorslave_lcr PC=59 ST=281 UT=702\\n TT: PI=44407 PN=dafilesrv PC=20 ST=321 UT=20\\n TT: PI=783 PN=java PC=5 ST=79 UT=18\\n000000E4 2013-02-28 16:20:08 18330 18608 DSK: [sda] r/s=104.0 kr/s=13228.9 w/s=216.3 kw/s=27554.1 bsy=96 NIC: rxp/s=15169496.1 rxk/s=0.0 txp/s=28208.1 txk/s=0.0 CPU: usr=13 sys=17 iow=69 idle=0\\n000000E5 2013-02-28 16:21:08 18330 18608 SYS: PU=100% MU= 9% MAL=1611881648 MMP=1611665408 SBK=216240 TOT=1574156K RAM=360384K SWP=37764K\\n TT: PI=18330 PN=thorslave_lcr PC=57 ST=402 UT=689\\n TT: PI=44407 PN=dafilesrv PC=21 ST=382 UT=19\\n TT: PI=783 PN=java PC=5 ST=86 UT=27\\n000000E6 2013-02-28 16:21:10 18330 18608 DSK: [sda] r/s=106.3 kr/s=13321.3 w/s=208.6 kw/s=26538.4 bsy=98 NIC: rxp/s=15137653.2 rxk/s=0.0 txp/s=27754.7 txk/s=0.0 CPU: usr=13 sys=22 iow=63 idle=0\\n00000000 2013-02-28 16:21:23 18992 18992 Opened log file //192.168.1.241/var/log/HPCCSystems/mythor/thorslave.1.2013_02_28.log\\n00000001 2013-02-28 16:21:23 18992 18992 Build community_3.8.6-4\\n00000002 2013-02-28 16:21:23 18992 18992 registering 192.168.1.241:20100 - master 192.168.1.241:20000\\n00000003 2013-02-28 16:21:23 18992 18992 Initialization received\\n00000004 2013-02-28 16:21:23 18992 18992 Master build: community_3.8.6-4\\n00000005 2013-02-28 16:21:23 18992 18992 Registration confirmation sent\\n00000006 2013-02-28 16:21:23 18992 18992 verifying mp connection to rest of cluster\\n00000007 2013-02-28 16:21:23 18992 18992 verified mp connection to rest of cluster\\n00000008 2013-02-28 16:21:23 18992 18992 registered 192.168.1.241:20100\\n00000009 2013-02-28 16:21:23 18992 18992 calling initClientProcess\\n0000000A 2013-02-28 16:21:23 18992 18992 setIORetryCount set to : 0\\n0000000B 2013-02-28 16:21:23 18992 18992 ThorSlave Version LCR - 4.1 started\\n0000000C 2013-02-28 16:21:23 18992 18992 Slave 192.168.1.241:20100 - temporary dir set to : /var/lib/HPCCSystems/mythor/temp/\\n0000000D 2013-02-28 16:21:23 18992 18992 Using querySo directory: /var/lib/HPCCSystems/queries/mythor_20100\\n0000000E 2013-02-28 16:21:23 18992 18992 RoxieMemMgr: Setting memory limit to 1605369856 bytes (1531 pages)\\n0000000F 2013-02-28 16:21:23 18992 18992 RoxieMemMgr: 1536 Pages successfully allocated for the pool - memsize=1610612736 base=0x7fb473f00000 alignment=1048576 bitmapSize=48\\n00000010 2013-02-28 16:21:23 18992 18992 FileCache: limit = 1800, purgeN = 10\\n00000011 2013-02-28 16:21:23 18992 19015 Watchdog: thread running\\n00000000 2013-02-28 16:37:20 19345 19345 Opened log file //192.168.1.241/var/log/HPCCSystems/mythor/thorslave.1.2013_02_28.log\\n00000001 2013-02-28 16:37:20 19345 19345 Build community_3.8.6-4\\n00000002 2013-02-28 16:37:20 19345 19345 registering 192.168.1.241:20100 - master 192.168.1.239:20000\\n00000003 2013-02-28 16:37:20 19345 19345 Initialization received\\n00000004 2013-02-28 16:37:20 19345 19345 Master build: community_3.8.6-4\\n00000005 2013-02-28 16:37:20 19345 19345 Registration confirmation sent\\n00000006 2013-02-28 16:37:20 19345 19345 verifying mp connection to rest of cluster\\n00000007 2013-02-28 16:37:20 19345 19345 verified mp connection to rest of cluster\\n00000008 2013-02-28 16:37:20 19345 19345 registered 192.168.1.241:20100\\n00000009 2013-02-28 16:37:20 19345 19345 calling initClientProcess\\n0000000A 2013-02-28 16:37:20 19345 19345 setIORetryCount set to : 0\\n0000000B 2013-02-28 16:37:20 19345 19345 ThorSlave Version LCR - 4.1 started\\n0000000C 2013-02-28 16:37:20 19345 19345 Slave 192.168.1.241:20100 - temporary dir set to : /var/lib/HPCCSystems/mythor/temp/\\n0000000D 2013-02-28 16:37:20 19345 19345 Using querySo directory: /var/lib/HPCCSystems/queries/mythor_20100\\n0000000E 2013-02-28 16:37:20 19345 19345 RoxieMemMgr: Setting memory limit to 1605369856 bytes (1531 pages)\\n0000000F 2013-02-28 16:37:20 19345 19345 RoxieMemMgr: 1536 Pages successfully allocated for the pool - memsize=1610612736 base=0x7f031ff00000 alignment=1048576 bitmapSize=48\\n00000010 2013-02-28 16:37:20 19345 19345 FileCache: limit = 1800, purgeN = 10\\n00000011 2013-02-28 16:37:20 19345 19352 priority set id=139653218948864 policy=0 pri=0 PID=19345\\n00000012 2013-02-28 16:37:20 19345 19353 Watchdog: thread running
\\n\\nSlave 2 (192.168.1.196)\\n00000051 2013-02-28 16:13:09 24043 24043 Started wuid=W20130228-161229, user=hpcc, graph=graph1\\n\\n00000052 2013-02-28 16:13:09 24043 24043 Using query: /var/lib/HPCCSystems/queries/mythor_20100/V1366914267_libW20130228-161229.so\\n00000053 2013-02-28 16:13:09 24043 24043 CRC allocator OFF\\n00000054 2013-02-28 16:13:09 24043 24043 Packed allocator OFF\\n00000055 2013-02-28 16:13:09 24043 24043 Global memory size = 1531 MB, memory spill at = 80%, large mem size = 1148 MB\\n00000056 2013-02-28 16:13:09 24043 24043 maxActivityCores = [unbound]\\n00000057 2013-02-28 16:13:09 24043 24043 Loaded DLL /opt/HPCCSystems/plugins//libfileservices.so\\n00000058 2013-02-28 16:13:09 24043 24043 Current reported version is FILESERVICES 2.1.3\\n00000059 2013-02-28 16:13:09 24043 24043 Compatible version FILESERVICES 2.1 [a68789cfb01d00ef6dc362e52d5eac0e]\\n0000005A 2013-02-28 16:13:09 24043 24043 Compatible version FILESERVICES 2.1.1\\n0000005B 2013-02-28 16:13:09 24043 24043 Compatible version FILESERVICES 2.1.2\\n0000005C 2013-02-28 16:13:09 24043 24043 Compatible version FILESERVICES 2.1.3\\n0000005D 2013-02-28 16:13:09 24043 24043 New Graph started : graph1\\n0000005E 2013-02-28 16:13:09 24043 24043 temp directory cleared\\n0000005F 2013-02-28 16:13:09 24043 24043 Disk space: /var/lib/HPCCSystems/hpcc-data/thor = 112324, /var/lib/HPCCSystems/hpcc-mirror/thor = 112324\\n00000060 2013-02-28 16:13:09 24043 24043 Key file cache size set to: 8\\n00000061 2013-02-28 16:13:09 24043 24043 GraphInit: W20130228-161229graph1, graphId=1\\n00000062 2013-02-28 16:13:09 24043 24043 deserializeMPTag: tag = 65546\\n00000063 2013-02-28 16:13:09 24043 24043 deserializeMPTag: tag = 65542\\n00000064 2013-02-28 16:13:09 24043 24043 deserializeMPTag: tag = 65541\\n00000065 2013-02-28 16:13:09 24043 24043 deserializeMPTag: tag = 65540\\n00000066 2013-02-28 16:13:09 24043 24043 Add: Launching graph thread for graphId=1\\n00000067 2013-02-28 16:13:09 24043 24316 Running graph [global] : <graph>\\n <node id="2" label="Child Dataset">\\n <att name="definition" value="C:\\\\Program Files (x86)\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_6\\\\ecllibrary\\\\std\\\\File.ecl(477,1)"/>\\n <att name="name" value="superfilecontents"/>\\n <att name="_kind" value="149"/>\\n <att name="ecl" value="superfilecontents('~hlcr::db::daily::content', false); "/>\\n <att name="recordSize" value="4..259(45)"/>\\n <att name="recordCount" value="0..?[few]"/>\\n </node>\\n <node id="3" label="Count">\\n <att name="_kind" value="141"/>\\n <att name="ecl" value="TABLE({ integer8 value := COUNT(group) }); "/>\\n <att name="recordSize" value="8"/>\\n <att name="recordCount" value="1..1[tiny]"/>\\n </node>\\n <node id="4" label="Store Internal('a1')">\\n <att name="_kind" value="28"/>\\n <att name="ecl" value="extractresult(value, named('a1')); "/>\\n <att name="recordSize" value="8"/>\\n </node>\\n <att name="rootGraph" value="1"/>\\n <edge id="2_0" source="2" target="3"/>\\n <edge id="3_0" source="3" target="4"/>\\n </graph>\\n - graph(graph1, 1)\\n00000068 2013-02-28 16:13:09 24043 24316 CONNECTING (id=2, idx=0) to (id=3, idx=0) - activity(countaggregate, 3)\\n00000069 2013-02-28 16:13:09 24043 24316 CONNECTING (id=3, idx=0) to (id=4, idx=0) - activity(remoteresult, 4)\\n0000006A 2013-02-28 16:13:09 24043 24316 deserializeMPTag: tag = -7854\\n0000006B 2013-02-28 16:13:09 24043 24316 deserializeMPTag: tag = 65545\\n0000006C 2013-02-28 16:13:09 24043 24316 Watchdog: Start Job 1\\n0000006D 2013-02-28 16:13:09 24043 24317 Starting input - activity(remoteresult, 4)\\n0000006E 2013-02-28 16:13:09 24043 24317 Starting input - activity(countaggregate, 3)\\n0000006F 2013-02-28 16:13:09 24043 24317 ITDL starting for output 0 - activity(linkedrawiterator, 2)\\n00000070 2013-02-28 16:13:09 24043 24317 ITDL starting for output 0 - activity(countaggregate, 3)\\n00000071 2013-02-28 16:13:09 24043 24317 Stopping input for - activity(countaggregate, 3)\\n00000072 2013-02-28 16:13:09 24043 24317 ITDL output 0 stopped, count was 0 - activity(linkedrawiterator, 2)\\n00000073 2013-02-28 16:13:09 24043 24317 Stopping input for - activity(remoteresult, 4)\\n00000074 2013-02-28 16:13:09 24043 24317 ITDL output 0 stopped, count was 0 - activity(countaggregate, 3)\\n00000075 2013-02-28 16:13:09 24043 24043 Entering getDone - graph(graph1, 1)\\n00000076 2013-02-28 16:13:09 24043 24043 Watchdog: Stop Job 1\\n00000077 2013-02-28 16:13:09 24043 24043 Leaving getDone - graph(graph1, 1)\\n00000078 2013-02-28 16:13:09 24043 24316 End of sub-graph - graph(graph1, 1)\\n00000079 2013-02-28 16:13:09 24043 24316 Socket statistics : connects=0\\nconnecttime=0us\\nfailedconnects=0\\nfailedconnecttime=0us\\nreads=33\\nreadtime=329us\\nreadsize=158452 bytes\\nwrites=34\\nwritetime=1485us\\nwritesize=989 bytes\\nactivesockets=5\\nnumblockrecvs=0\\nnumblocksends=0\\nblockrecvsize=0\\nblocksendsize=0\\nblockrecvtime=0\\nblocksendtime=0\\nlongestblocksend=0\\nlongestblocksize=0\\n - graph(graph1, 1)\\n0000007A 2013-02-28 16:13:09 24043 24316 Graph Done - graph(graph1, 1)\\n0000007B 2013-02-28 16:13:09 24043 24316 PU= 66% MU= 9% MAL=1611883920 MMP=1611665408 SBK=218512 TOT=1574156K RAM=325716K SWP=56608K - graph(graph1, 1)\\n0000007C 2013-02-28 16:13:09 24043 24316 CGraphExecutor running=0, waitingToRun=0, dependentsWaiting=0\\n0000007D 2013-02-28 16:13:09 24043 24043 GraphInit: W20130228-161229graph1, graphId=5\\n0000007E 2013-02-28 16:13:09 24043 24043 deserializeMPTag: tag = 65548\\n0000007F 2013-02-28 16:13:09 24043 24043 deserializeMPTag: tag = 65539\\n00000080 2013-02-28 16:13:09 24043 24043 deserializeMPTag: tag = 65543\\n00000081 2013-02-28 16:13:09 24043 24043 deserializeMPTag: tag = 65544\\n00000082 2013-02-28 16:13:09 24043 24043 Add: Launching graph thread for graphId=5\\n00000083 2013-02-28 16:13:09 24043 24043 destroying ProcessSlaveActivity - activity(remoteresult, 4)\\n00000084 2013-02-28 16:13:09 24043 24043 ProcessSlaveActivity : joining process thread - activity(remoteresult, 4)\\n00000085 2013-02-28 16:13:09 24043 24043 AFTER ProcessSlaveActivity : joining process thread - activity(remoteresult, 4)\\n00000086 2013-02-28 16:13:09 24043 24043 DESTROYED - activity(linkedrawiterator, 2)\\n00000087 2013-02-28 16:13:09 24043 24043 DESTROYED - activity(countaggregate, 3)\\n00000088 2013-02-28 16:13:09 24043 24043 DESTROYED - activity(remoteresult, 4)\\n00000089 2013-02-28 16:13:09 24043 24316 Running graph [global] : <graph>\\n <node id="6" label="Child Dataset">\\n <att name="definition" value="C:\\\\Program Files (x86)\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_6\\\\ecllibrary\\\\std\\\\File.ecl(477,1)"/>\\n <att name="name" value="superfilecontents"/>\\n <att name="_kind" value="149"/>\\n <att name="ecl" value="superfilecontents('~hlcr::db::daily::meta', false); "/>\\n <att name="recordSize" value="4..259(45)"/>\\n <att name="recordCount" value="0..?[few]"/>\\n </node>\\n <node id="7" label="Count">\\n <att name="_kind" value="141"/>\\n <att name="ecl" value="TABLE({ integer8 value := COUNT(group) }); "/>\\n <att name="recordSize" value="8"/>\\n <att name="recordCount" value="1..1[tiny]"/>\\n </node>\\n <node id="8" label="Store Internal('a2')">\\n <att name="_kind" value="28"/>\\n <att name="ecl" value="extractresult(value, named('a2')); "/>\\n <att name="recordSize" value="8"/>\\n </node>\\n <att name="rootGraph" value="1"/>\\n <edge id="6_0" source="6" target="7"/>\\n <edge id="7_0" source="7" target="8"/>\\n </graph>\\n - graph(graph1, 5)\\n0000008A 2013-02-28 16:13:09 24043 24316 CONNECTING (id=6, idx=0) to (id=7, idx=0) - activity(countaggregate, 7)\\n0000008B 2013-02-28 16:13:09 24043 24316 CONNECTING (id=7, idx=0) to (id=8, idx=0) - activity(remoteresult, 8)\\n0000008C 2013-02-28 16:13:09 24043 24316 deserializeMPTag: tag = -9038\\n0000008D 2013-02-28 16:13:09 24043 24316 deserializeMPTag: tag = 65547\\n0000008E 2013-02-28 16:13:09 24043 24316 Watchdog: Start Job 5\\n0000008F 2013-02-28 16:13:09 24043 24318 Starting input - activity(remoteresult, 8)\\n00000090 2013-02-28 16:13:09 24043 24318 Starting input - activity(countaggregate, 7)\\n00000091 2013-02-28 16:13:09 24043 24318 ITDL starting for output 0 - activity(linkedrawiterator, 6)\\n00000092 2013-02-28 16:13:09 24043 24318 ITDL starting for output 0 - activity(countaggregate, 7)\\n00000093 2013-02-28 16:13:09 24043 24318 Stopping input for - activity(countaggregate, 7)\\n00000094 2013-02-28 16:13:09 24043 24318 ITDL output 0 stopped, count was 0 - activity(linkedrawiterator, 6)\\n00000095 2013-02-28 16:13:09 24043 24318 Stopping input for - activity(remoteresult, 8)\\n00000096 2013-02-28 16:13:09 24043 24318 ITDL output 0 stopped, count was 0 - activity(countaggregate, 7)\\n00000097 2013-02-28 16:13:09 24043 24043 Entering getDone - graph(graph1, 5)\\n00000098 2013-02-28 16:13:09 24043 24043 Watchdog: Stop Job 5\\n00000099 2013-02-28 16:13:09 24043 24043 Leaving getDone - graph(graph1, 5)\\n0000009A 2013-02-28 16:13:09 24043 24316 End of sub-graph - graph(graph1, 5)\\n0000009B 2013-02-28 16:13:09 24043 24316 Socket statistics : connects=0\\nconnecttime=0us\\nfailedconnects=0\\nfailedconnecttime=0us\\nreads=14\\nreadtime=70us\\nreadsize=1899 bytes\\nwrites=20\\nwritetime=435us\\nwritesize=561 bytes\\nactivesockets=5\\nnumblockrecvs=0\\nnumblocksends=0\\nblockrecvsize=0\\nblocksendsize=0\\nblockrecvtime=0\\nblocksendtime=0\\nlongestblocksend=0\\nlongestblocksize=0\\n - graph(graph1, 5)\\n0000009C 2013-02-28 16:13:09 24043 24316 Graph Done - graph(graph1, 5)\\n0000009D 2013-02-28 16:13:09 24043 24316 PU= 0% MU= 9% MAL=1611884000 MMP=1611665408 SBK=218592 TOT=1574156K RAM=325716K SWP=56608K DSK: [sda] r/s=0.0 kr/s=0.0 w/s=0.0 kw/s=0.0 bsy=0 NIC: rxp/s=73350.0 rxk/s=0.0 txp/s=33825.0 txk/s=0.0 CPU: usr=0 sys=0 iow=0 idle=100 - graph(graph1, 5)\\n0000009E 2013-02-28 16:13:09 24043 24316 CGraphExecutor running=0, waitingToRun=0, dependentsWaiting=0\\n0000009F 2013-02-28 16:13:09 24043 24043 QueryDone, removing W20130228-161229graph1 from jobs\\n000000A0 2013-02-28 16:13:09 24043 24043 Job ended : graph1\\n000000A1 2013-02-28 16:13:09 24043 24043 destroying ProcessSlaveActivity - activity(remoteresult, 8)\\n000000A2 2013-02-28 16:13:09 24043 24043 ProcessSlaveActivity : joining process thread - activity(remoteresult, 8)\\n000000A3 2013-02-28 16:13:09 24043 24043 AFTER ProcessSlaveActivity : joining process thread - activity(remoteresult, 8)\\n000000A4 2013-02-28 16:13:09 24043 24043 DESTROYED - activity(remoteresult, 8)\\n000000A5 2013-02-28 16:13:09 24043 24043 DESTROYED - activity(linkedrawiterator, 6)\\n000000A6 2013-02-28 16:13:09 24043 24043 DESTROYED - activity(countaggregate, 7)\\n000000A7 2013-02-28 16:13:09 24043 24043 CJobBase resetting memory manager\\n000000A8 2013-02-28 16:13:09 24043 24043 Unloading dll /opt/HPCCSystems/plugins//libfileservices.so\\n000000A9 2013-02-28 16:13:09 24043 24043 Roxiemem stats: Heap size 1536 pages, 1536 free, largest block 1536\\n000000AA 2013-02-28 16:13:09 24043 24043 Heap usage : 270336 bytes\\n000000AB 2013-02-28 16:13:09 24043 24043 QueryDone, removed W20130228-161229graph1 from jobs\\n000000AC 2013-02-28 16:13:09 24043 24043 Finished wuid=W20130228-161229, graph=graph1\\n000000AD 2013-02-28 16:13:09 24043 24043 Started wuid=W20130228-161229, user=hpcc, graph=graph2\\n\\n000000AE 2013-02-28 16:13:09 24043 24043 Using query: /var/lib/HPCCSystems/queries/mythor_20100/V1366914267_libW20130228-161229.so\\n000000AF 2013-02-28 16:13:09 24043 24043 CRC allocator OFF\\n000000B0 2013-02-28 16:13:09 24043 24043 Packed allocator OFF\\n000000B1 2013-02-28 16:13:09 24043 24043 Global memory size = 1531 MB, memory spill at = 80%, large mem size = 1148 MB\\n000000B2 2013-02-28 16:13:09 24043 24043 maxActivityCores = [unbound]\\n000000B3 2013-02-28 16:13:09 24043 24043 Loaded DLL /opt/HPCCSystems/plugins//libfileservices.so\\n000000B4 2013-02-28 16:13:09 24043 24043 Current reported version is FILESERVICES 2.1.3\\n000000B5 2013-02-28 16:13:09 24043 24043 Compatible version FILESERVICES 2.1 [a68789cfb01d00ef6dc362e52d5eac0e]\\n000000B6 2013-02-28 16:13:09 24043 24043 Compatible version FILESERVICES 2.1.1\\n000000B7 2013-02-28 16:13:09 24043 24043 Compatible version FILESERVICES 2.1.2\\n000000B8 2013-02-28 16:13:09 24043 24043 Compatible version FILESERVICES 2.1.3\\n000000B9 2013-02-28 16:13:09 24043 24043 New Graph started : graph2\\n000000BA 2013-02-28 16:13:09 24043 24043 temp directory cleared\\n000000BB 2013-02-28 16:13:09 24043 24043 Disk space: /var/lib/HPCCSystems/hpcc-data/thor = 112324, /var/lib/HPCCSystems/hpcc-mirror/thor = 112324\\n000000BC 2013-02-28 16:13:09 24043 24043 Key file cache size set to: 8\\n000000BD 2013-02-28 16:13:09 24043 24043 GraphInit: W20130228-161229graph2, graphId=9\\n000000BE 2013-02-28 16:13:09 24043 24043 deserializeMPTag: tag = 65541\\n000000BF 2013-02-28 16:13:09 24043 24043 deserializeMPTag: tag = 65545\\n000000C0 2013-02-28 16:13:09 24043 24043 deserializeMPTag: tag = 65546\\n000000C1 2013-02-28 16:13:09 24043 24043 deserializeMPTag: tag = 65540\\n000000C2 2013-02-28 16:13:09 24043 24043 Add: Launching graph thread for graphId=9\\n000000C3 2013-02-28 16:13:09 24043 24322 Running graph [global] : <graph>\\n <node id="10" label="Disk Read '...::content'">\\n <att name="definition" value="D:\\\\dev\\\\projects\\\\HLCR\\\\trunk\\\\sources\\\\HLCR\\\\merge\\\\ConsolidateDaily.ecl(14,1)"/>\\n <att name="name" value="ds_dailycontent"/>\\n <att name="_kind" value="82"/>\\n <att name="ecl" value="DATASET('~hlcr::db::daily::content', layout_textfile, THOR); "/>\\n <att name="recordSize" value="400..4096(656)"/>\\n <att name="recordCount" value="0..?[disk]"/>\\n <att name="_fileName" value="~hlcr::db::daily::content"/>\\n </node>\\n <node id="11" label="Disk Write">\\n <att name="definition" value="D:\\\\dev\\\\projects\\\\HLCR\\\\trunk\\\\sources\\\\HLCR\\\\merge\\\\ConsolidateDaily.ecl(22,2)"/>\\n <att name="_kind" value="2"/>\\n <att name="ecl" value="OUTPUT(..., , hlcr.util.superfile.getsuperfilebasesubname(dbdailycontentfile)); "/>\\n <att name="recordSize" value="400..4096(656)"/>\\n </node>\\n <att name="rootGraph" value="1"/>\\n <edge id="10_0" source="10" target="11"/>\\n </graph>\\n - graph(graph2, 9)\\n000000C4 2013-02-28 16:13:09 24043 24322 CONNECTING (id=10, idx=0) to (id=11, idx=0) - activity(diskwrite, 11)\\n000000C5 2013-02-28 16:13:09 24043 24322 Watchdog: Start Job 9\\n000000C6 2013-02-28 16:13:09 24043 24323 handling fname : /var/lib/HPCCSystems/hpcc-data/thor/hlcr/db/daily/content.base0._2_of_3 - activity(diskwrite, 11)\\n000000C7 2013-02-28 16:13:09 24043 24323 Starting input - activity(diskwrite, 11)\\n000000C8 2013-02-28 16:13:09 24043 24323 diskread[part=2]: reading physical file '/var/lib/HPCCSystems/hpcc-data/thor/hlcr/db/daily/content.base1._2_of_3' (logical file = ~hlcr::db::daily::content) - activity(diskread, 10)\\n000000C9 2013-02-28 16:13:09 24043 24323 diskread[part=2]: Base offset to 9973643034 - activity(diskread, 10)\\n000000CA 2013-02-28 16:13:09 24043 24323 diskread[part=2]: variable (/var/lib/HPCCSystems/hpcc-data/thor/hlcr/db/daily/content.base1._2_of_3) - activity(diskread, 10)\\n000000CB 2013-02-28 16:13:09 24043 24323 ITDL starting for output 0 - activity(diskread, 10)\\n000000CC 2013-02-28 16:13:09 24043 24323 Writing to file: /var/lib/HPCCSystems/hpcc-data/thor/hlcr/db/daily/thtmp24043_2__partial.tmp - activity(diskwrite, 11)\\n000000CD 2013-02-28 16:13:09 24043 24323 Created output stream for /var/lib/HPCCSystems/hpcc-data/thor/hlcr/db/daily/content.base0._2_of_3 - activity(diskwrite, 11)\\n000000CE 2013-02-28 16:13:09 24043 24323 Ungrouped - activity(diskwrite, 11)\\n000000CF 2013-02-28 16:13:09 24043 24323 Record size (max) = 4096 - activity(diskread, 10)\\n000000D0 2013-02-28 16:14:09 24043 24321 SYS: PU= 99% MU= 9% MAL=1613929664 MMP=1613713408 SBK=216256 TOT=1576156K RAM=335836K SWP=56608K\\n000000D1 2013-02-28 16:15:09 24043 24321 SYS: PU=100% MU= 9% MAL=1613929664 MMP=1613713408 SBK=216256 TOT=1576156K RAM=336372K SWP=56608K\\n TT: PI=24043 PN=thorslave_lcr PC=81 ST=287 UT=802\\n TT: PI=766 PN=java PC=5 ST=54 UT=19\\n TT: PI=22 PN=kswapd0 PC=3 ST=41 UT=0\\n000000D2 2013-02-28 16:15:09 24043 24321 DSK: [sda] r/s=120.7 kr/s=15414.9 w/s=115.1 kw/s=14523.8 bsy=94 NIC: rxp/s=4984.7 rxk/s=0.0 txp/s=419.1 txk/s=0.0 CPU: usr=14 sys=7 iow=78 idle=0\\n000000D3 2013-02-28 16:16:09 24043 24321 SYS: PU=100% MU= 9% MAL=1613929664 MMP=1613713408 SBK=216256 TOT=1576156K RAM=337284K SWP=56608K\\n TT: PI=24043 PN=thorslave_lcr PC=82 ST=263 UT=830\\n TT: PI=766 PN=java PC=5 ST=73 UT=1\\n TT: PI=22 PN=kswapd0 PC=3 ST=42 UT=0\\n000000D4 2013-02-28 16:16:09 24043 24321 DSK: [sda] r/s=124.2 kr/s=15872.6 w/s=125.8 kw/s=15878.1 bsy=95 NIC: rxp/s=4891.8 rxk/s=0.0 txp/s=422.4 txk/s=0.0 CPU: usr=14 sys=6 iow=78 idle=0\\n000000D5 2013-02-28 16:17:09 24043 24321 SYS: PU=100% MU= 9% MAL=1613978816 MMP=1613762560 SBK=216256 TOT=1576204K RAM=336408K SWP=56608K\\n TT: PI=24043 PN=thorslave_lcr PC=79 ST=267 UT=824\\n TT: PI=766 PN=java PC=5 ST=47 UT=32\\n TT: PI=22 PN=kswapd0 PC=3 ST=43 UT=0\\n000000D6 2013-02-28 16:17:12 24043 24321 DSK: [sda] r/s=129.1 kr/s=16466.0 w/s=133.4 kw/s=16806.2 bsy=100 NIC: rxp/s=5157.9 rxk/s=0.0 txp/s=435.3 txk/s=0.0 CPU: usr=14 sys=7 iow=77 idle=0\\n000000D7 2013-02-28 16:18:12 24043 24321 SYS: PU=100% MU= 9% MAL=1613978816 MMP=1613762560 SBK=216256 TOT=1576204K RAM=336048K SWP=56608K\\n TT: PI=24043 PN=thorslave_lcr PC=81 ST=292 UT=879\\n TT: PI=766 PN=java PC=5 ST=29 UT=56\\n TT: PI=22 PN=kswapd0 PC=3 ST=45 UT=0\\n000000D8 2013-02-28 16:18:12 24043 24321 DSK: [sda] r/s=118.5 kr/s=15148.1 w/s=116.5 kw/s=14681.1 bsy=89 NIC: rxp/s=4570.1 rxk/s=0.0 txp/s=386.6 txk/s=0.0 CPU: usr=14 sys=7 iow=77 idle=0\\n000000D9 2013-02-28 16:18:46 24043 24323 diskread[part=2]: CRC Stored=b5f76965, calculated=b5f76965 file(/var/lib/HPCCSystems/hpcc-data/thor/hlcr/db/daily/content.base1._2_of_3) - activity(diskread, 10)\\n000000DA 2013-02-28 16:18:46 24043 24050 CThorBackupHandler, copying to target: //192.168.1.200/var/lib/HPCCSystems/hpcc-mirror/thor/hlcr/db/daily/content.base0._2_of_3\\n000000DB 2013-02-28 16:18:46 24043 24323 Wrote 341238 records, crc=0xB5F76965 - activity(diskwrite, 11)\\n000000DC 2013-02-28 16:18:46 24043 24323 Stopping input for - activity(diskwrite, 11)\\n000000DD 2013-02-28 16:18:46 24043 24323 ITDL output 0 stopped, count was 341238 - activity(diskread, 10)\\n000000DE 2013-02-28 16:19:12 24043 24321 SYS: PU= 95% MU= 9% MAL=1611881664 MMP=1611665408 SBK=216256 TOT=1574156K RAM=333280K SWP=56608K\\n TT: PI=24043 PN=thorslave_lcr PC=82 ST=541 UT=516\\n TT: PI=766 PN=java PC=6 ST=54 UT=26\\n TT: PI=57579 PN=daserver PC=3 ST=24 UT=15\\n000000DF 2013-02-28 16:19:12 24043 24321 DSK: [sda] r/s=126.8 kr/s=16200.0 w/s=83.4 kw/s=10505.9 bsy=88 NIC: rxp/s=19100.9 rxk/s=0.0 txp/s=7520570.3 txk/s=0.0 CPU: usr=9 sys=13 iow=73 idle=4\\n000000E0 2013-02-28 16:20:12 24043 24321 SYS: PU= 89% MU= 9% MAL=1611881664 MMP=1611665408 SBK=216256 TOT=1574156K RAM=325796K SWP=56608K\\n000000E1 2013-02-28 16:20:12 24043 24321 DSK: [sda] r/s=120.3 kr/s=15378.4 w/s=1.1 kw/s=13.0 bsy=81 NIC: rxp/s=33960.1 rxk/s=0.0 txp/s=16464509.1 txk/s=0.0 CPU: usr=1 sys=18 iow=68 idle=10\\n000000E2 2013-02-28 16:21:12 24043 24321 SYS: PU= 88% MU= 9% MAL=1611881664 MMP=1611665408 SBK=216256 TOT=1574156K RAM=325840K SWP=56608K\\n000000E3 2013-02-28 16:21:12 24043 24321 DSK: [sda] r/s=120.2 kr/s=15179.0 w/s=1.3 kw/s=13.6 bsy=77 NIC: rxp/s=32873.4 rxk/s=0.0 txp/s=16224540.0 txk/s=0.0 CPU: usr=2 sys=22 iow=63 idle=11\\n00000000 2013-02-28 16:37:22 25060 25060 Opened log file //192.168.1.196/var/log/HPCCSystems/mythor/thorslave.2.2013_02_28.log\\n00000001 2013-02-28 16:37:22 25060 25060 Build community_3.8.6-4\\n00000002 2013-02-28 16:37:22 25060 25060 registering 192.168.1.196:20100 - master 192.168.1.239:20000\\n00000003 2013-02-28 16:37:22 25060 25060 Initialization received\\n00000004 2013-02-28 16:37:22 25060 25060 Master build: community_3.8.6-4\\n00000005 2013-02-28 16:37:22 25060 25060 Registration confirmation sent\\n00000006 2013-02-28 16:37:22 25060 25060 verifying mp connection to rest of cluster\\n00000007 2013-02-28 16:37:22 25060 25060 verified mp connection to rest of cluster\\n00000008 2013-02-28 16:37:22 25060 25060 registered 192.168.1.196:20100\\n00000009 2013-02-28 16:37:22 25060 25060 calling initClientProcess\\n0000000A 2013-02-28 16:37:22 25060 25060 setIORetryCount set to : 0\\n0000000B 2013-02-28 16:37:22 25060 25060 ThorSlave Version LCR - 4.1 started\\n0000000C 2013-02-28 16:37:22 25060 25060 Slave 192.168.1.196:20100 - temporary dir set to : /var/lib/HPCCSystems/mythor/temp/\\n0000000D 2013-02-28 16:37:22 25060 25060 Using querySo directory: /var/lib/HPCCSystems/queries/mythor_20100\\n0000000E 2013-02-28 16:37:22 25060 25060 RoxieMemMgr: Setting memory limit to 1605369856 bytes (1531 pages)\\n0000000F 2013-02-28 16:37:22 25060 25060 RoxieMemMgr: 1536 Pages successfully allocated for the pool - memsize=1610612736 base=0x7fdfa3f00000 alignment=1048576 bitmapSize=48\\n00000010 2013-02-28 16:37:22 25060 25060 FileCache: limit = 1800, purgeN = 10\\n00000011 2013-02-28 16:37:22 25060 25067 priority set id=140600350934784 policy=0 pri=0 PID=25060\\n00000012 2013-02-28 16:37:22 25060 25068 Watchdog: thread running
\", \"post_time\": \"2013-02-28 16:08:57\" },\n\t{ \"post_id\": 3589, \"topic_id\": 799, \"forum_id\": 8, \"post_subject\": \"Watchdog has lost contact with Thor slave\", \"username\": \"janssend\", \"post_text\": \"Hello.\\n\\nI have a Thor cluster of 4 nodes : 1 thor master and 3 slaves.\\n\\nI used to successfully run a thor process; but as long as the data are bigger I got this message : Watchdog has lost contact with Thor slave.\\nHere is the ECL code. It's mainly a data consolidating process. (merge every sub files into one single file)\\n\\nThe workunit has been submit by using ECL IDE client, with a maxruntime set to 0.\\n\\nI do not see where is the problem ? \\n\\n\\n\\nIMPORT Hlcr;\\nIMPORT Hlcr.Util;\\nIMPORT Std, Std.File, Std.Str;\\n\\n\\n// merge every db daily sub files into one single file \\ndbDailyContentFile := Hlcr.DeclareData.FS_LNBIS_DB_DAILY_CONTENT;\\ndbDailyMetaFile := Hlcr.DeclareData.FS_LNBIS_DB_DAILY_META;\\n\\ndbDailyContentBaseFile := Hlcr.Util.SuperFile.GetSuperFileBaseSubName(dbDailyContentFile);\\ndbDailyMetaBaseFile := Hlcr.Util.SuperFile.GetSuperFileBaseSubName(dbDailyMetaFile);\\n\\nDS_DailyContent := DATASET(dbDailyContentFile, Hlcr.DeclareData.Layout_TextFile, THOR);\\nDS_DailyMeta := DATASET(dbDailyMetaFile, Hlcr.DeclareData.Layout_Meta, THOR);\\n \\n// As roxie does not allow multiple sub-files, we consolidate \\n// daily in order to push file onto roxie cluster\\nIF (COUNT(STD.File.SuperFileContents(dbDailyContentFile)) > 1,\\nSEQUENTIAL(\\n\\tStd.File.CreateSuperFile(dbDailyContentFile,,TRUE),\\n\\tOUTPUT(DS_DailyContent,,dbDailyContentBaseFile), \\n\\tStd.File.StartSuperFileTransaction(),\\n\\tStd.File.ClearSuperFile(dbDailyContentFile, TRUE), \\n\\tStd.File.AddSuperFile(dbDailyContentFile, dbDailyContentBaseFile),\\n\\tStd.File.FinishSuperFileTransaction(),\\n ));\\n\\nIF (COUNT(STD.File.SuperFileContents(dbDailyMetaFile)) > 1,\\nSEQUENTIAL(\\n\\tStd.File.CreateSuperFile(dbDailyMetaFile,,TRUE),\\n\\tOUTPUT(DS_DailyMeta,,dbDailyMetaBaseFile),\\n\\tStd.File.StartSuperFileTransaction(),\\n\\tStd.File.ClearSuperFile(dbDailyMetaFile,TRUE), \\n\\tStd.File.AddSuperFile(dbDailyMetaFile, dbDailyMetaBaseFile),\\n\\tStd.File.FinishSuperFileTransaction(),\\n ));\\n
\\n\\n\\nAnd the thor master log (192.168.1.239):\\n00000000 2013-02-28 16:12:31 32973 32973 Logging to /var/log/HPCCSystems/myeclagent/eclagent.2013_02_28.log\\n00000001 2013-02-28 16:12:31 32973 32973 ECLAGENT build community_3.8.6-4\\n00000002 2013-02-28 16:12:31 32973 32973 Waiting for workunit lock\\n00000003 2013-02-28 16:12:31 32973 32973 Obtained workunit lock\\n00000004 2013-02-28 16:12:31 32973 32973 Loading dll (libW20130228-161229.so) from location /var/lib/HPCCSystems/myeclccserver/libW20130228-161229.so\\n00000005 2013-02-28 16:12:31 32973 32973 Starting process\\n00000006 2013-02-28 16:12:31 32973 32973 RoxieMemMgr: Setting memory limit to 314572800 bytes (300 pages)\\n00000007 2013-02-28 16:12:31 32973 32973 RoxieMemMgr: 320 Pages successfully allocated for the pool - memsize=335544320 base=0x7feb93f00000 alignment=1048576 bitmapSize=10\\n00000008 2013-02-28 16:12:31 32973 32973 Waiting for run lock\\n00000009 2013-02-28 16:12:31 32973 32973 Obtained run lock\\n0000000A 2013-02-28 16:12:31 32973 32973 Enqueuing on thor.thor to run wuid=W20130228-161229, graph=graph1, timelimit=600 seconds, priority=0\\n0000000B 2013-02-28 16:12:31 32973 32973 Thor on 192.168.1.239:20000 running W20130228-161229\\n0000000C 2013-02-28 16:12:32 32973 32973 ,FileAccess,FileServices,CreateSuperFile,W20130228-161229,hpcc,hlcr::db::daily::content\\n0000000D 2013-02-28 16:12:32 32973 32973 CreateSuperFile ('hlcr::db::daily::content') done\\n0000000E 2013-02-28 16:12:32 32973 32973 Enqueuing on thor.thor to run wuid=W20130228-161229, graph=graph2, timelimit=600 seconds, priority=0\\n0000000F 2013-02-28 16:12:32 32973 32973 Thor on 192.168.1.239:20000 running W20130228-161229\\n00000010 2013-02-28 16:35:41 32973 32973 ERROR: 10056: Watchdog has lost contact with Thor slave: 192.168.1.241:20100 (Process terminated or node down?) (in item 1)\\n00000011 2013-02-28 16:35:41 32973 32973 Releasing run lock\\n00000012 2013-02-28 16:35:41 32973 32973 System error: 10056: Watchdog has lost contact with Thor slave: 192.168.1.241:20100 (Process terminated or node down?)\\n00000013 2013-02-28 16:35:41 32973 32973 10056: System error: 10056: Watchdog has lost contact with Thor slave: 192.168.1.241:20100 (Process terminated or node down?)\\n00000014 2013-02-28 16:35:41 32973 32973 Process complete\\n00000015 2013-02-28 16:35:41 32973 32973 Workunit written complete\\n
\\n See next message for slave logs (The maximum number of allowed characters is 60000.)\", \"post_time\": \"2013-02-28 16:07:57\" },\n\t{ \"post_id\": 3638, \"topic_id\": 803, \"forum_id\": 8, \"post_subject\": \"Re: IN type of ROXIE query\", \"username\": \"prachi\", \"post_text\": \"Hi,\\nThanks for your response!\\nIt worked!\", \"post_time\": \"2013-03-06 05:05:21\" },\n\t{ \"post_id\": 3612, \"topic_id\": 803, \"forum_id\": 8, \"post_subject\": \"Re: IN type of ROXIE query\", \"username\": \"DSC\", \"post_text\": \"Your caller can provide delimited detailed claim steps as a single string. If the delimiter was a comma, then you could use something like this:\\n\\ndetailed_ClaimSteps_Set := Std.Str.SplitWords(detailed_ClaimSteps,',');
\\n\\nHope this helps.\\n\\nDan\", \"post_time\": \"2013-03-03 14:32:39\" },\n\t{ \"post_id\": 3611, \"topic_id\": 803, \"forum_id\": 8, \"post_subject\": \"IN type of ROXIE query\", \"username\": \"prachi\", \"post_text\": \"Hi,\\n\\nI have an SQL query like :\\n\\nSELECT Sentence,Category,Intention FROM sapphire::superfile::claim_process where UserID='294895' and ClaimNumber='Claim003' and DetailedClaimSteps IN \\n("Humming,Damage,Communication") and Status ='Active';\\n
\\n\\nThe equivalent Roxie query is :\\nIMPORT STD;\\n\\nExport claim_process_in_detailedclaimsteps():=Function\\nSTRING100 user_id := '' : STORED('UserID');\\nSTRING100 claim_Number := '' : STORED('ClaimNumber');\\nSTRING1000 detailed_ClaimSteps := '': STORED('DetailedClaimSteps');\\nSTRING100 claim_Status := '' : STORED('Status');\\n\\ndetailed_ClaimSteps_Set := [detailed_ClaimSteps];\\n\\n/*Name of the Superkeyname*/\\nsuperkey_name := '~sapphire::superkey::claim_process';\\n\\n// Declare Payload INDEX\\nIDX_SuperFile := INDEX({STRING100 UserID,STRING100 Domain,STRING100 Status,STRING100 ClaimNumber,STRING1000 DetailedClaimSteps},\\n{STRING1000 Sentence,STRING100 Category,\\nSTRING100 Feedback_date,STRING100 UploadDate,STRING1000 Intention,\\nUNSIGNED8 fpos {virtual(fileposition)}},superkey_name);\\n\\nds := IDX_SuperFile(KEYED(UserID=user_id AND WILD(Domain) AND Status=claim_Status AND ClaimNumber=claim_Number AND DetailedClaimSteps IN detailed_ClaimSteps_Set ));\\n\\nnewRec := RECORD\\nSTRING1000 Sentence := ds.Sentence;\\nSTRING100 Category := ds.Category;\\nSTRING1000 Intention := ds.Intention;\\nEND;\\n\\n//store output\\nds_tbl := TABLE(ds,newRec);\\n\\nRETURN ds_tbl;\\nEND;\\n
\\n\\nI am attaching screenshot of ESP and records of INDEX.\\nI am getting output for one particular word (ex. detailedclaimsteps = 'Humming'), but how to change STRING to SET STRING so that i can have list of STRING (ex. detailedclaimsteps = 'Humming','Communication','Damage' )and how to take multiple input strings from ROXIE ESP (also where to specify delimeter like ',' or ';')?\\n\\nIn simple word, is it possible to accept multiple values in a field(either comma separated or custom separator) in a Roxie query?\\n\\nThanks and regards !\", \"post_time\": \"2013-03-02 13:11:46\" },\n\t{ \"post_id\": 3613, \"topic_id\": 804, \"forum_id\": 8, \"post_subject\": \"PIPE command help required\", \"username\": \"kaliyugantagonist\", \"post_text\": \"Hi,\\n\\nI'm trying to invoke a Java program which reads from its standard input and writes to its standard output.\\n\\n\\nimport java.io.IOException;\\nimport java.io.InputStream;\\n\\npublic class Tester {\\n\\n\\t/**\\n\\t * @param args\\n\\t */\\n\\tpublic static void main(String[] args) {\\n\\t\\t// TODO Auto-generated method stub\\n\\n\\t\\tSystem.out.println("In Tester.main(...): length is : " + args.length);\\n\\n\\t\\tTester tester = new Tester();\\n\\t\\tdouble sentiment = tester.getSentiment(args[0]);\\n\\n\\t\\tSystem.out.println(sentiment);\\t\\t\\n\\t}\\n\\n\\tprivate double getSentiment(String sentence) {\\n\\n\\t\\tSystem.out.println("In Tester.getSentiment(...)");\\n\\n\\t\\treturn 4.0;\\n\\t}\\n}\\n
\\n\\nThe csv, whose logical name is omkartest::pipetweetsentiments :\\n\\n298790;HPCC as a real time system - hmm!\\n298790;HPCC multi-thor requires connection pooling !!!\\n29870;HPCC RC version - shall be hold high hopes???
\\n\\nCase 1: Using only output recorddef\\n\\noutputRSSentimentAnalysis := RECORD\\nSTRING tweetSentiment;\\nEND;\\n\\nlogicalname := '~omkartest::pipetweetsentiments';\\npipeOutput := PIPE('java Tester "This argument is not used"',outputRSSentimentAnalysis,csv);\\n \\nOUTPUT(pipeOutput);
\\n\\n\\nThe output (ECL IDE Result)is:\\n\\nIn Tester.main(...): length is : 1\\nIn Tester.getSentiment(...)\\n4.0fromIs : -1
\\n\\nAs evident, the statements written by the Java program to its standard output(via System.out.println(...)) are reflected in the ECL IDE.\\n\\nCase 2: Using both input recordset and output recorddef\\n\\ninputRSSentimentAnalysis := RECORD\\nSTRING100 UserID;\\nSTRING100 TweetText;\\nEND;\\n\\noutputRSSentimentAnalysis := RECORD\\nSTRING tweetSentiment;\\nEND;\\n\\nlogicalname := '~omkartest::pipetweetsentiments';\\n\\ninputDSSentimentAnalysis := DATASET(logicalname,inputRSSentimentAnalysis,CSV(SEPARATOR(';')));\\n\\nOUTPUT(inputDSSentimentAnalysis);\\n\\npipeOutput := PIPE(inputDSSentimentAnalysis,'java Tester "This argument is not used"',outputRSSentimentAnalysis,csv);\\n\\n \\nOUTPUT(pipeOutput);
\\n\\nECL IDE Result1 :\\n298790 \\tHPCC as a real time system - hmm! \\n298790 \\tHPCC multi-thor requires connection pooling !!! \\n29870 \\tHPCC RC version - shall be hold high hopes??? \\n
\\n\\nException is:\\n\\nError: System error: 10096: Graph[1], pipethrough[4]: SLAVE 172.25.38.21:20100: Process returned 1: - PIPE(java Tester "This argument is not used")
\\n\\nI'm attaching the 'Helper' logs of the workunit reflected in the ECL Watch\\n\\nThe only thing I could figure out is that,probably, the DATASET is not getting written to the Java's stdin.\\n\\nMy general question is that how does the ECL push its 'DATASET' into the command's code? Ex. If a C++,Java etc. code is being invoked which needs the access to the DATASET's data, how does it construct it? Or is it that only strings, integers, decimals etc. can be passed as arguments?\\n\\nThanks and regards !!!\", \"post_time\": \"2013-03-04 10:43:01\" },\n\t{ \"post_id\": 3618, \"topic_id\": 805, \"forum_id\": 8, \"post_subject\": \"WsECL creation and call work flow\", \"username\": \"buptkang\", \"post_text\": \"Hey there,\\n\\nCurrently I am attempting to build a WsECL and publish it as a query for later online computing purpose. \\n\\nIn the pre-computed phase, I should create and export the ECL as FUNCTION, then STORED its each OUTPUT variable. \\n\\nAfter publishing the query, during online computing phase, I could use SOAPCALL to access data through WsECL.\\n\\nMy question now is \\n1. During pre-computing process, after running the ECL through ECL-IDE, could ECL itself has some mechanism to publish itself as WsECL instead of going to EclWatcher to publish it manually?\\n\\n2. Do we have any sample code to show this push and pull process?\\n\\nThanks \\nBo\", \"post_time\": \"2013-03-04 19:02:05\" },\n\t{ \"post_id\": 3643, \"topic_id\": 807, \"forum_id\": 8, \"post_subject\": \"Re: Workunit was compiled for eclagent interface version 0\", \"username\": \"bforeman\", \"post_text\": \"Thanks for the feedback! That certainly was a strange error, glad you were able to track it down!\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-03-06 12:51:53\" },\n\t{ \"post_id\": 3641, \"topic_id\": 807, \"forum_id\": 8, \"post_subject\": \"Re: Workunit was compiled for eclagent interface version 0\", \"username\": \"battleman\", \"post_text\": \"[quote="battleman":2odgp6cw][quote="bforeman":2odgp6cw]What is your target? THOR or HTHOR?\\nAre you performing a syntax check (F7) before submitting?\\n\\nRegards,\\n\\nBob\\nI have checked the syntax before submit. I tried both THOR and HTHOR ,and got the same error .\\n\\nFinially,I worked out this problem . I got this error because not all of slaves was installed with the same version of HPCC System. My fault!\\nThanks Bob, anyway. \", \"post_time\": \"2013-03-06 11:17:21\" },\n\t{ \"post_id\": 3637, \"topic_id\": 807, \"forum_id\": 8, \"post_subject\": \"Re: Workunit was compiled for eclagent interface version 0\", \"username\": \"battleman\", \"post_text\": \"[quote="bforeman":58zfm0ps]What is your target? THOR or HTHOR?\\nAre you performing a syntax check (F7) before submitting?\\n\\nRegards,\\n\\nBob\\nI have checked the syntax before submit. I tried both THOR and HTHOR ,and got the same error .\", \"post_time\": \"2013-03-06 02:46:18\" },\n\t{ \"post_id\": 3629, \"topic_id\": 807, \"forum_id\": 8, \"post_subject\": \"Re: Workunit was compiled for eclagent interface version 0\", \"username\": \"bforeman\", \"post_text\": \"What is your target? THOR or HTHOR?\\nAre you performing a syntax check (F7) before submitting?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-03-05 14:37:42\" },\n\t{ \"post_id\": 3621, \"topic_id\": 807, \"forum_id\": 8, \"post_subject\": \"Workunit was compiled for eclagent interface version 0\", \"username\": \"battleman\", \"post_text\": \"This is my data in simpledataincsv.csv:\\n\\njames\\t1\\t13811855184\\tstreet1\\ntommy\\t0\\t13411855184\\tstreet2\\ntony\\t1\\t13511855184\\tstreet3\\n\\ndata definition :\\n\\nEXPORT layout_person := RECORD\\n\\t\\tSTRING10 name; \\n\\t\\tSTRING1 gender; \\n\\t\\tSTRING11 phone; \\n\\t\\tSTRING10 address; \\nEND;\\n\\nand code :\\nEXPORT showtable := \\nDATASET('~official::ww::simpledate',test.layout_person ,THOR);\\n\\nWhen I submmit I got this error:\\nError: System error: 0: Workunit was compiled for eclagent interface version 0, this eclagent requires version 138..140\\n\\nWhat does this error mean? Any advice ? Thanks.\", \"post_time\": \"2013-03-05 08:10:29\" },\n\t{ \"post_id\": 3697, \"topic_id\": 808, \"forum_id\": 8, \"post_subject\": \"Re: GRAPH data accumulation\", \"username\": \"ideal\", \"post_text\": \"Hello,\\n\\nIt is related to bug HPCC-8908 on DISTRIBUTE.\\nIt is also related to my previous post about this problem.\\nIt was fixed in version 3.10.4. Then after recompiling source code from github, I am working now on HPCC platform 3.10.45RC, where this issue does not happen any more.\\n\\nThanks,\\nJM.\", \"post_time\": \"2013-03-11 17:15:23\" },\n\t{ \"post_id\": 3689, \"topic_id\": 808, \"forum_id\": 8, \"post_subject\": \"Re: GRAPH data accumulation\", \"username\": \"DSC\", \"post_text\": \"What did you see as a result, and expected to see?\\n\\nI just executed this via ECL Playground against hthor and the results look right (two recordsets, each matching the input). There was also a warning ("OUTPUT() appears to be context dependent - this may cause a dataset not active error"). This was against version 3.10.2CE.\\n\\nDan\", \"post_time\": \"2013-03-10 16:16:44\" },\n\t{ \"post_id\": 3623, \"topic_id\": 808, \"forum_id\": 8, \"post_subject\": \"GRAPH data accumulation\", \"username\": \"ideal\", \"post_text\": \"Hello,\\n\\nHow do you explain what you have in set of dataset parameter below ? It seems srec contains data accumulation everywhere. \\n\\nrec := RECORD\\nINTEGER a;\\nINTEGER b;\\nEND;\\n\\n\\nds1:=DATASET([{1,1},{2,1},{3,2},{4,3},{5,4}],rec);\\n\\nfGraph(SET OF DATASET(rec) srec,INTEGER c) := FUNCTION\\n\\tdr := srec[c-1];\\n\\tdr2:=WHEN(dr,OUTPUT(dr));\\n\\tRETURN dr2;\\nEND;\\n\\n\\nres:=GRAPH(ds1,2,fGraph(ROWSET(LEFT),COUNTER));\\n\\nOUTPUT(res);
\\n\\nJM.\", \"post_time\": \"2013-03-05 11:14:27\" },\n\t{ \"post_id\": 3631, \"topic_id\": 810, \"forum_id\": 8, \"post_subject\": \"Re: Incremental Index\", \"username\": \"bforeman\", \"post_text\": \"There is a great article in the Programmer's Guide that discusses Superfiles and Superkeys, intended for exactly what you want to achieve.\\n\\nTake a look starting on page 46. Here is the link to download the Programmer's Guide:\\n\\nhttp://hpccsystems.com/download/docs/programmers-guide\\n\\nAfter you look it over and try the examples come back here and post back if you have any questions.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-03-05 16:03:54\" },\n\t{ \"post_id\": 3625, \"topic_id\": 810, \"forum_id\": 8, \"post_subject\": \"Incremental Index\", \"username\": \"arun\", \"post_text\": \"Hi Friends,\\n I am new to ECL and i have to do incremental index as per below usecases.\\n 1.I will be getting daily new files.\\n 2.I need to add the new files to existing indexes, instead of re-indexing again completely.\\n Grateful for any suggestions! …\", \"post_time\": \"2013-03-05 12:42:12\" },\n\t{ \"post_id\": 3636, \"topic_id\": 811, \"forum_id\": 8, \"post_subject\": \"Re: regression on DISTRIBUTE\", \"username\": \"rtaylor\", \"post_text\": \"OK, you got around to posting it before I did. \\n\\nRichard\", \"post_time\": \"2013-03-05 19:05:40\" },\n\t{ \"post_id\": 3634, \"topic_id\": 811, \"forum_id\": 8, \"post_subject\": \"Re: regression on DISTRIBUTE\", \"username\": \"ideal\", \"post_text\": \"HPCC-8908 - regression on DISTRIBUTE\", \"post_time\": \"2013-03-05 17:32:16\" },\n\t{ \"post_id\": 3630, \"topic_id\": 811, \"forum_id\": 8, \"post_subject\": \"Re: regression on DISTRIBUTE\", \"username\": \"ideal\", \"post_text\": \"I let you post the bug in JIRA.\\n\\nStrange things happen also with GRAPH : I don't see expected temporary results in set of dataset. See my post "GRAPH data accumulation".\", \"post_time\": \"2013-03-05 15:27:38\" },\n\t{ \"post_id\": 3628, \"topic_id\": 811, \"forum_id\": 8, \"post_subject\": \"Re: regression on DISTRIBUTE\", \"username\": \"rtaylor\", \"post_text\": \"JM,\\n\\nConfirmed. On a 3.8.2-2 3-node cluster the result is:\\n\\n4\\t1\\n2\\t1\\n1\\t1\\n3\\t2\\n5\\t2\\n\\nOn a 3.10.2-1 3-node cluster the result is:\\n\\n1\\t1\\n\\nI will report this issue in JIRA (unless you would prefer to or already have done so).\\n\\nRichard\", \"post_time\": \"2013-03-05 14:15:53\" },\n\t{ \"post_id\": 3626, \"topic_id\": 811, \"forum_id\": 8, \"post_subject\": \"regression on DISTRIBUTE\", \"username\": \"ideal\", \"post_text\": \"Hello,\\n\\nI think there is a regression on DISTRIBUTE. I don't understand how such regression can happen. Don't you test before commiting new releases ? \\n\\nTry this code on a 3 slaves cluster : \\n
rec := RECORD\\nINTEGER a;\\nINTEGER b;\\nEND;\\n\\n\\nds1:=DATASET([{1,1},{2,2},{3,1},{4,2},{5,1}],rec);\\n\\nfGraph(SET OF DATASET(rec) srec,INTEGER c) := FUNCTION\\n\\tdr := srec[C-1];\\n\\tdr2:=DISTRIBUTE(dr,RANDOM());\\n\\tdr3:=PROJECT(dr2,TRANSFORM(rec,SELF.b:=COUNTER;SELF:=LEFT),LOCAL);\\n\\tRETURN dr3;\\nEND;\\n\\n\\nres:=GRAPH(ds1,2,fGraph(ROWSET(LEFT),COUNTER));\\nOUTPUT(res);\\n
\\n\\nJM.\", \"post_time\": \"2013-03-05 13:46:00\" },\n\t{ \"post_id\": 3655, \"topic_id\": 812, \"forum_id\": 8, \"post_subject\": \"Re: Handling errors\", \"username\": \"DSC\", \"post_text\": \"The whole cron-based process is an attempt to have one environment pull certain changes that occur in another environment. It's a pull-based replication scheme, basically, handling the basic create/delete logical file activities as well as some other task-specific activities. I opted to go this route -- asynchronous polling and pulling of data -- in order to reduce the complexity of having multiple environments perfectly available at all times (which is what would happen if the replication occurred synchronously, inline with the original activity).\\n\\nThe environment performs a standard "reference a remote dataset and apply a filter" to determine what activities, if any, need to be replicated. A file copy activity is performed with Std.File.FileCopy() with an appropriately-named foreign path. All other activities occur locally. The activities are driven by the records that pass the filter, and they are called from within an APPLY(). If no errors pop up, everything works very well.\\n\\nConsider removing the responsibility for the copy from the CRON-launched workunit.
\\n\\nMoving all those activities to external workunits is an excellent idea, one that I had not considered. That would isolate any failures to just those actions and not interfere with the cron task.\\n\\nAnother off-the-wall possibility, again with the caveat that a failure might be due to an actual problem that needs to be addressed, is performing "I don't care if this fails" activities in SUCCESS clauses. Unless something has changed recently, something that fails in a SUCCESS clause will be reported, but it will not fail the workunit.
\\n\\nNow that's an interesting tidbit. I'm certainly going to try it out.\\n\\nThanks for the information and tips!\\n\\nDan\", \"post_time\": \"2013-03-06 22:47:06\" },\n\t{ \"post_id\": 3653, \"topic_id\": 812, \"forum_id\": 8, \"post_subject\": \"Re: Handling errors\", \"username\": \"Tony Kirk\", \"post_text\": \"I'm not sure what method you are using to copy these files (ECL via ~foreign read? DFU workunit?), and specifics might change the suggestions.\\n\\nConsider removing the responsibility for the copy from the CRON-launched workunit. The question then becomes how you would deal with actually performing the copy. Possibilities range from NOTIFY to another waiting workunit to utilizing the workunit publishing or cloning capabilities of the platform. Any of these, however, would take you into a world of monitoring other workunits (you can get workunit lists), and perhaps into SOAPCALLs to ESP to clone/submit those other workunits. Still, failures of those workunits are probably something you would want to monitor, report, perhaps even try to correct.\\n\\nAnother off-the-wall possibility, again with the caveat that a failure might be due to an actual problem that needs to be addressed, is performing "I don't care if this fails" activities in SUCCESS clauses. Unless something has changed recently, something that fails in a SUCCESS clause will be reported, but it will not fail the workunit.\\n\\nInsight into your process (current copy method, frequency, etc) might help. Any thoughts about the above possibilities?\", \"post_time\": \"2013-03-06 22:20:37\" },\n\t{ \"post_id\": 3651, \"topic_id\": 812, \"forum_id\": 8, \"post_subject\": \"Re: Handling errors\", \"username\": \"DSC\", \"post_text\": \"Thanks, John. I was actually already handling the specific case of "file not found" in a similar manner, using Std.File.ForeignLogicalFileName(). The copy process can still fail in other ways, however -- network failure, unresponsive remote Dali, etc. -- and I'm looking for an elegant way to handle all of that without aborting the workunit.\\n\\nDan\", \"post_time\": \"2013-03-06 21:25:15\" },\n\t{ \"post_id\": 3650, \"topic_id\": 812, \"forum_id\": 8, \"post_subject\": \"Re: Handling errors\", \"username\": \"john holt\", \"post_text\": \"I assume that by different environment that you mean clusters running with a different DALI Server. If the same DALI server, just drop the foreign Dali prefix.\\n\\nThe code:\\nIMPORT STD;\\nforeign_dali_prefix := '~foreign::' + _control.IPAddress.prod_thor_dali + '::';\\n// the prod_thor_dali atribute is just for my testing, use something\\n//that poioints to you other Dali server\\ndsname := foreign_dali_prefix + 'THOR::missing:dataset';\\n\\nWork1 := RECORD\\n STRING keyname;\\n STRING keyvalue;\\nEND;\\n\\nds := DATASET(dsname, Work1, THOR);\\nd1 := IF(STD.File.FileExists(dsname), ds);\\nOUTPUT(d1);\\n\\n\\nwill run without an error and produce an empty d1 recordset. You could use the EXISTS(...) function to determine if d1 had records or was empty if you wish.\\n\\nIn any event, your job will run whether the dataaset is present or missing.\", \"post_time\": \"2013-03-06 20:10:19\" },\n\t{ \"post_id\": 3635, \"topic_id\": 812, \"forum_id\": 8, \"post_subject\": \"Handling errors\", \"username\": \"DSC\", \"post_text\": \"I have a small ECL cron-based process that attempts to copy logical files from one environment to another (one environment is pulling files from another, not pushing). It is possible that a copy will fail, perhaps because the file is actually missing from the source environment.\\n\\n(Edit: A 'file not found' error is only one possible error. I'm looking to skip/ignore all errors.)\\n\\nHow can I cause ECL to ignore the error that would arise in this case? The FAILURE work service option seems to execute actions in addition to aborting the work unit, instead of preventing the abort, and RECOVERY seems to retry. I'm looking for a SKIP, basically. Also, the process could be copying a number of files and I would like to continue processing the list if, say, one file in the middle of the list is causing the problem, so skipping the entire remainder of the process would be less than ideal.\\n\\nThe overall problem is that in the event of an error, the work unit will abort and become descheduled, preventing further activities.\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2013-03-05 19:02:27\" },\n\t{ \"post_id\": 3676, \"topic_id\": 816, \"forum_id\": 8, \"post_subject\": \"Re: Distributed local sort\", \"username\": \"DSC\", \"post_text\": \"Done: https://track.hpccsystems.com/browse/HPCC-8933\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2013-03-08 14:28:00\" },\n\t{ \"post_id\": 3674, \"topic_id\": 816, \"forum_id\": 8, \"post_subject\": \"Re: Distributed local sort\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\nMy question is specifically concerning the 'distributed local sort' version. The manual states that all three of these produce identical output,
You're correct. Only the two LOCAL versions produce the same result.\\n\\nIf you could please add this issue to JIRA, it will get on my plate to address re-writing that article (which was written quite a few years ago) to reflect the way things actually are.\\n\\nThanks,\\n\\nRichard\", \"post_time\": \"2013-03-08 14:12:17\" },\n\t{ \"post_id\": 3661, \"topic_id\": 816, \"forum_id\": 8, \"post_subject\": \"Distributed local sort\", \"username\": \"DSC\", \"post_text\": \"The Programmer's Guide has a section on using the GROUP() function. Embedded within the discussion is some sample code comparing a global sort, a distributed local sort, and a grouped local sort:\\n\\nbf := NORMALIZE(accounts,\\n CLUSTERSIZE * 2,\\n TRANSFORM(RECORDOF(ProgGuide.Accounts),\\n SELF := LEFT));\\nds0 := DISTRIBUTE(bf,RANDOM()) : PERSIST('~PROGGUIDE::PERSIST::TestGroupSort');\\nds1 := DISTRIBUTE(ds,HASH32(personid)); // -- I think 'ds' should be 'ds0' here\\n\\n// do a global sort\\ns1 := SORT(ds0,personid,opendate,-balance);\\na := OUTPUT(s1,,'~PROGGUIDE::EXAMPLEDATA::TestGroupSort1',OVERWRITE);\\n\\n// do a distributed local sort\\ns3 := SORT(ds1,personid,opendate,-balance,LOCAL);\\nb := OUTPUT(s3,,'~PROGGUIDE::EXAMPLEDATA::TestGroupSort2',OVERWRITE);\\n\\n// do a grouped local sort\\ns4 := SORT(ds1,personid,LOCAL);\\ng2 := GROUP(s4,personid,LOCAL);\\ns5 := SORT(g2,opendate,-balance);\\nc := OUTPUT(s5,,'~PROGGUIDE::EXAMPLEDATA::TestGroupSort3',OVERWRITE);\\n\\nSEQUENTIAL(a,b,c);
\\nMy question is specifically concerning the 'distributed local sort' version. The manual states that all three of these produce identical output, but in my own tests I find that the distributed local sort does not. Furthermore, I don't see how it could work unless you had a very few number of specially-crafted key values (personid, in the example), or you had only one node, or you ran the code only in hthor. Virtually any example with a sizable number of unique key values and records seems to fail. After the sort, all the records on one node will be sorted, but the first record of the next node is not necessarily the next record in the list. It's more likely to be a low value, one that should have been included much earlier in the sorted result.\\n\\nCan someone shed some light on this and maybe describe what is going on with the DISTRIBUTE() to make it work? Or is the example incorrect?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2013-03-07 18:39:04\" },\n\t{ \"post_id\": 3702, \"topic_id\": 820, \"forum_id\": 8, \"post_subject\": \"Re: fixed file record definition and field data type problem\", \"username\": \"battleman\", \"post_text\": \"[quote="DSC":308t0g1b]This worked for me under 3.10.2CE:\\n\\nLayout_transform := RECORD\\n\\tSTRING10 name;\\n\\tSTRING2 gender;\\n\\tSTRING10 score;\\nEND; \\n\\nds := DATASET([{'James','1','67.5'},{'Tom','1','76.4'},{'Emily','0','80.0'}],Layout_transform);\\n\\nTotBal := SUM (ds,(REAL8)ds.score) ;\\n\\nOUTPUT(TotBal);
\\n\\nThis is a stripped-down, inline version of your original posting, basically. The coercion to a REAL8 does work on my system, with a result of 223.9. What happens when you execute this against thor or hthor?\\n\\nDan\\n\\n\\n May be I found a bug for you . I have two files where their contents are exactly the same .But the older file turns out an error :"Workunit was compiled for eclagent interface version 0, this eclagent requires version 138..140" ,but the newer-created file was executed successfully ,and I got 223.9 too . \\n Thanks Dan !I finally can use SUM() on this fixed file .\", \"post_time\": \"2013-03-12 05:02:57\" },\n\t{ \"post_id\": 3701, \"topic_id\": 820, \"forum_id\": 8, \"post_subject\": \"Re: fixed file record definition and field data type problem\", \"username\": \"battleman\", \"post_text\": \"[quote="rtaylor":1qshl95f]battleman,\\n\\nI think that you're confusing type transfer with type casting.\\n\\nYour syntax:TotBal := SUM(DS_tramsform,(>REAL8<)DS_tramsform.score)
is using the type transfer operator -- (>REAL8<) -- when Dan's suggested code was using the type casting operator -- (REAL8) -- note the absence of the angle brackets.\\n\\nType transfer is "treat this as a different data type" -- meaning that the bit pattern does not change. That means that with (>REAL8<) you were telling the system to treat the STRING10 field as if it were a 10-byte REAL (and REAL is only valid in 4 and 8-byte formats).\\n\\nType casting is "convert this value to a different data type" -- meaning that the bit pattern does change. So using (REAL8) instead would take your STRING10 value and convert it to an 8-byte REAL.\\n\\nHTH,\\n\\nRichard\\n\\nThanks Richard!Your explanation helps me a lot .\", \"post_time\": \"2013-03-12 04:53:39\" },\n\t{ \"post_id\": 3695, \"topic_id\": 820, \"forum_id\": 8, \"post_subject\": \"Re: fixed file record definition and field data type problem\", \"username\": \"rtaylor\", \"post_text\": \"battleman,\\n\\nI think that you're confusing type transfer with type casting.\\n\\nYour syntax:TotBal := SUM(DS_tramsform,(>REAL8<)DS_tramsform.score)
is using the type transfer operator -- (>REAL8<) -- when Dan's suggested code was using the type casting operator -- (REAL8) -- note the absence of the angle brackets.\\n\\nType transfer is "treat this as a different data type" -- meaning that the bit pattern does not change. That means that with (>REAL8<) you were telling the system to treat the STRING10 field as if it were a 10-byte REAL (and REAL is only valid in 4 and 8-byte formats).\\n\\nType casting is "convert this value to a different data type" -- meaning that the bit pattern does change. So using (REAL8) instead would take your STRING10 value and convert it to an 8-byte REAL.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-03-11 15:26:25\" },\n\t{ \"post_id\": 3694, \"topic_id\": 820, \"forum_id\": 8, \"post_subject\": \"Re: fixed file record definition and field data type problem\", \"username\": \"DSC\", \"post_text\": \"This worked for me under 3.10.2CE:\\n\\nLayout_transform := RECORD\\n\\tSTRING10 name;\\n\\tSTRING2 gender;\\n\\tSTRING10 score;\\nEND; \\n\\nds := DATASET([{'James','1','67.5'},{'Tom','1','76.4'},{'Emily','0','80.0'}],Layout_transform);\\n\\nTotBal := SUM (ds,(REAL8)ds.score) ;\\n\\nOUTPUT(TotBal);
\\n\\nThis is a stripped-down, inline version of your original posting, basically. The coercion to a REAL8 does work on my system, with a result of 223.9. What happens when you execute this against thor or hthor?\\n\\nDan\", \"post_time\": \"2013-03-11 14:45:49\" },\n\t{ \"post_id\": 3693, \"topic_id\": 820, \"forum_id\": 8, \"post_subject\": \"Re: fixed file record definition and field data type problem\", \"username\": \"battleman\", \"post_text\": \"[quote="DSC":23zghua1]From your first example, where score is defined as a STRING10, try this instead for the SUM():\\n\\nTotBal := SUM(DS_tramsform,(REAL8)DS_tramsform.score) ;
\\n\\nECL's type coercion is a lot like C's. You might want to read through the section on type casting in the language reference manual.\\n\\nHope this helps.\\n\\nDan\\n\\nYour advice didnt work out.I got this error: \\n[color=#FF0000:23zghua1]Workunit was compiled for eclagent interface version 0, this eclagent requires version 138..140\\nand TotBal := SUM(DS_tramsform,(>REAL8<)DS_tramsform.score) got the same error .\\nI want to create a function to do this type-casting first,this is my code :\\n\\nIMPORT test;\\n test.File_transform StringToReal(test.File_transform pInput)\\n := TRANSFORM\\n\\t\\t SELF.name := pInput.name;\\n\\t\\t SELF.gender := pInput.gender;\\n\\t\\t SELF.score := (>REAL4<)pInput.score ; \\n END ;\\nOrigDataset := test.File_transform;\\nfinalDataSet := PROJECT(OrigDataset,StringToReal(LEFT));\\nOUTPUT(finalDataSet,,'~~learn::transform',OVERWRITE);\\n\\nand I got this error:\\n\\n[color=#FF0000:23zghua1]Error: Can not assign Real to String (field SELF.score) (8, 4), 2007, \\n\\nThanks for helping!\", \"post_time\": \"2013-03-11 14:29:00\" },\n\t{ \"post_id\": 3672, \"topic_id\": 820, \"forum_id\": 8, \"post_subject\": \"Re: fixed file record definition and field data type problem\", \"username\": \"DSC\", \"post_text\": \"From your first example, where score is defined as a STRING10, try this instead for the SUM():\\n\\nTotBal := SUM(DS_tramsform,(REAL8)DS_tramsform.score) ;
\\n\\nECL's type coercion is a lot like C's. You might want to read through the section on type casting in the language reference manual.\\n\\nHope this helps.\\n\\nDan\", \"post_time\": \"2013-03-08 13:21:47\" },\n\t{ \"post_id\": 3671, \"topic_id\": 820, \"forum_id\": 8, \"post_subject\": \"fixed file record definition and field data type problem\", \"username\": \"battleman\", \"post_text\": \"I have a fixed data file,the length of a record is 22(10+2+10):\\nJames 1 67.5 \\nTom 1 76.4 \\nEmily 0 80.0 \\nI want to get a total value of the third field ,this is my code :\\nEXPORT Layout_transform := RECORD\\n STRING10 name;\\n STRING2 gender;\\n STRING10 score;\\nEND; \\nDS_tramsform := DATASET('~learn::transform',Layout_transform,THOR);\\nTotBal := SUM(DS_tramsform,DS_tramsform.score) ;\\nOUTPUT(TotBal); \\n \\nsyntax checking turns out this error :\\n[color=#FF0000:20mrtkyc]Error: Type mismatch - Integer or real value expected (String was given) (5, 28), 2004, \\nSO I change Layout_transform to this :\\nEXPORT Layout_transform := RECORD\\n STRING10 name;\\n STRING2 gender;\\n REAL8 score;\\nEND; \\n\\nsyntax checking is ok,but when I submit,I got this error: \\n[color=#FF0000:20mrtkyc]Published record size 22 for file ~learn::transform, does not match coded record size 20\\n\\nSO I change Layout_transform to this :\\nEXPORT Layout_transform := RECORD\\n STRING10 name;\\n STRING2 gender;\\n UNSIGNED10 score;\\nEND; \\nand get this error:\\n[color=#FF0000:20mrtkyc]Invalid size for UNSIGNED type: can only be 1 to 8\\n\\nI am so confused.I didnt find any function that can change a date field from STRING to REAL .Then how can I do this SUM operation ?\\nthanks!\\nregards,\", \"post_time\": \"2013-03-08 11:20:12\" },\n\t{ \"post_id\": 3728, \"topic_id\": 824, \"forum_id\": 8, \"post_subject\": \"Re: XML Parse for a poorly formed xml\", \"username\": \"ksviswa\", \"post_text\": \"Thanks a lot Bob..\\n\\nLooks like CDATA is only creating the problem, will try to use pattern and tweak the code more.\\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2013-03-13 08:58:08\" },\n\t{ \"post_id\": 3700, \"topic_id\": 824, \"forum_id\": 8, \"post_subject\": \"Re: XML Parse for a poorly formed xml\", \"username\": \"bforeman\", \"post_text\": \"Hi ksviswa,\\n\\nUnless I'm missing something it's that CDATA stuff that "souring the milk".\\n\\nI think you have to treat the document as free form text, and look for those patterns.\\n\\nYou can spray the data as variable length (Spray CSV) and just specify the end root tag as the line terminator, and then parse the whole document as a long text stream.\\n\\nSomething like this:\\n
IMPORT STD;\\n/* <content><![CDATA[<block><p>aaa <person>person1</person> xxxx\\n <person>person2</person></p></block>\\n ]]></content>\\n*/\\n\\n\\nr := RECORD\\nSTRING line;\\nEND;\\n\\nindata := DATASET('~a::xmltest',{STRING line},\\n CSV(TERMINATOR(['</content>'])));\\n\\npattern btag := '<person>';\\npattern etag := '</person>';\\npattern txt := any*;\\npattern p := btag txt etag;\\n\\noutrec := RECORD\\n STRING name := MATCHTEXT(p/txt);\\nend;\\n\\nout := PARSE(indata, line, p, outrec,SCAN ALL);\\nout(NOT std.Str.contains(name,'<',1));
\\n\\nThe function at the end was used to filter out my false matches, I'm sure the patterns could be tweaked to eliminate the need for that.\\n\\nHope this helps!\\n\\nBob\", \"post_time\": \"2013-03-11 18:57:30\" },\n\t{ \"post_id\": 3696, \"topic_id\": 824, \"forum_id\": 8, \"post_subject\": \"XML Parse for a poorly formed xml\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nI have a xml similar to the below format :\\n\\n\\n<content><![CDATA[\\n \\n<block><p>aaa <person>person1</person> xxxx\\n <person>person2</person></p></block>\\n ]]></content>\\n
\\n\\n I need to extract the person details and give them a unique Id. There could be few other tags other than person tag in the content. \\n\\nThere are many such files of the same format. Is it possible to provide a unique id/sequence number for each person..?\\n\\n\\n100 | person1\\n101 | person2\\n
\\n\\nTried using XMLPROJECT since person is repeated, but i wasn't able to extract the required pattern.\\n\\n\\n\\nper_content:= RECORD\\n string person;\\nEND;\\n\\nrecStr := RECORD\\n DATASET(per_content) personRec;\\nEND;\\n\\nrecStr t1 := TRANSFORM\\n\\nSELF.personRec:= XMLPROJECT('block/person',\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t TRANSFORM(per_content,\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tself.person:= XMLTEXT('')));\\n\\nEND;\\nparseData := parse(RawData,textXml,t1,XML('content'));\\n\\n
\\n\\nTried for all these patterns 'block/person' or 'block/p/person' or 'person', it didnt match.\\n\\nI sprayed it as a single field and tried to use PARSE for text data but again the pattern didn't match.\\n\\nNot sure if i am doing something wrong here.\\n\\nKindly help. Is there any other approach to solve such problems, kindly share the same.\\n\\nThanks a lot in advance\\n\\nRegards,\\nksviswa\", \"post_time\": \"2013-03-11 15:59:20\" },\n\t{ \"post_id\": 3714, \"topic_id\": 829, \"forum_id\": 8, \"post_subject\": \"Re: JOIN,GROUP help\", \"username\": \"bforeman\", \"post_text\": \"The first thing I see is that you might be confusing the GROUP function with the GROUP keyword that is used in cross-tabulation reports.\\n\\nTry modifying your code (shown here):\\n\\nopStruct := RECORD\\ntempDS.Country;\\ncnt := COUNT(GROUP(tempDS,Country));\\nEND;\\n//tempDSGrp := GROUP(tempDS,Country);\\n\\n//tempDSGrpCnt := COUNT(GROUP(tempDS,Country),KEYED);\\n\\nopDS := TABLE(tempDS,opStruct);\\n\\nreturn opDS;\\n
\\n\\nTo this:\\n\\nopStruct := RECORD\\ntempDS.Country;\\ncnt := COUNT(GROUP);\\nEND;\\n\\nopDS := TABLE(tempDS,opStruct,country);\\n\\nreturn opDS;\\n
\\n\\nDoes that help?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-03-12 13:13:12\" },\n\t{ \"post_id\": 3708, \"topic_id\": 829, \"forum_id\": 8, \"post_subject\": \"JOIN,GROUP help\", \"username\": \"kaliyugantagonist\", \"post_text\": \"Hi,\\n\\nI have a simple SQL query for which I'm trying to write an equivalent ECL :\\n\\nSELECT tul.country,count(rtm.TwitterUserID) FROM retweetsmonitoring rtm\\ninner join twitteruserlocation tul on rtm.TwitterUserID=tul.TwitterUserID\\nwhere rtm.TwitterUniqueID=257\\nand rtm.UserID='293705'\\nand tul.country not in ('cannot','NotDetected')\\nand tul.city<>'Delhi'\\ngroup by tul.country;\\n
\\n\\nDue to unavailability of large amount of data, I'm getting identical outputs(which frighteningly matches with the output of SQL query) for all the variants I wrote \\n\\n
EXPORT ReTweetMonitoring_CountryCntUsrId_JoinTUL := FUNCTION\\nINTEGER4 inputTwitterUniqueID := 0 : STORED('TwitterUniqueID');\\nSTRING100 inputUserID := '' : STORED('UserID');\\n//STRING200 Country := '' : STORED('Country');\\nSTRING200 inputCity := '' : STORED('City');\\n\\n\\nsuperkeyReTweetsMonitoring :=\\t'~sapphire::superkey::retweetsmonitoring';\\nsuperkeyTwitterUserLocation :=\\t'~sapphire::superkey::twitteruserlocation';\\n\\nIDX_SK_RETWEETSMONITORING := INDEX({INTEGER4 TwitterUserID,INTEGER4 TwitterUniqueID,STRING100 UserID,STRING25 ParentTweetsSentiment}\\n,{STRING200 TwitterUserName,STRING200 Location,STRING200 Country,INTEGER4 NoOfFollowers,INTEGER4 NoOfFriends,UNSIGNED8 fpos {virtual(fileposition)}}\\n,superkeyReTweetsMonitoring);\\n\\nIDX_SK_TWITTERUSERLOCATION := INDEX({INTEGER8 TwitterUserID,STRING200 Country,STRING200 City}\\n,{STRING100 TwitterUserName,STRING200 Location,STRING200 Latitude,STRING200 Longitude,STRING50 HPCCStatus,UNSIGNED8 fpos {virtual(fileposition)}}\\n,superkeyTwitterUserLocation);\\n\\ninvalidCountrySet := ['cannot','NotDetected'];\\n\\n\\njoinOpStruct := RECORD\\nSTRING200 Country;\\nINTEGER4 TwitterUserID;\\nEND;\\n\\njoinOpStruct returnJoinOutput(IDX_SK_RETWEETSMONITORING rtm,IDX_SK_TWITTERUSERLOCATION tul) := TRANSFORM\\nSELF.Country := tul.Country;\\nSELF.TwitterUserID := rtm.TwitterUserID;\\n\\nEND;\\n\\ntempDS := SORT(JOIN(IDX_SK_RETWEETSMONITORING(WILD(TwitterUserID) AND TwitterUniqueID=inputTwitterUniqueID AND UserID=inputUserID)\\n ,IDX_SK_TWITTERUSERLOCATION\\n ,LEFT.TwitterUserID=RIGHT.TwitterUserID AND RIGHT.Country NOT IN invalidCountrySet AND RIGHT.City <> inputCity,returnJoinOutput(LEFT,RIGHT)),Country);\\n\\nopStruct := RECORD\\ntempDS.Country;\\ncnt := COUNT(GROUP(tempDS,Country));\\nEND;\\n//tempDSGrp := GROUP(tempDS,Country);\\n\\n//tempDSGrpCnt := COUNT(GROUP(tempDS,Country),KEYED);\\n\\nopDS := TABLE(tempDS,opStruct);\\n\\nreturn opDS;\\n\\nEND;
\\n\\n\\n\\n/*SELECT tul.country,count(rtm.TwitterUserID) FROM retweetsmonitoring rtm\\ninner join twitteruserlocation tul on rtm.TwitterUserID=tul.TwitterUserID\\nwhere rtm.TwitterUniqueID=257\\nand rtm.UserID='293705'\\nand tul.country not in ('cannot','NotDetected')\\nand tul.city<>'Delhi'\\ngroup by tul.country;*/\\n\\nEXPORT ReTweetMonitoring_CountryCntUsrId_JoinTUL := FUNCTION\\nINTEGER4 inputTwitterUniqueID := 0 : STORED('TwitterUniqueID');\\nSTRING100 inputUserID := '' : STORED('UserID');\\n//STRING200 Country := '' : STORED('Country');\\nSTRING200 inputCity := '' : STORED('City');\\n\\n\\nsuperkeyReTweetsMonitoring :=\\t'~sapphire::superkey::retweetsmonitoring';\\nsuperkeyTwitterUserLocation :=\\t'~sapphire::superkey::twitteruserlocation';\\n\\nIDX_SK_RETWEETSMONITORING := INDEX({INTEGER4 TwitterUserID,INTEGER4 TwitterUniqueID,STRING100 UserID,STRING25 ParentTweetsSentiment}\\n,{STRING200 TwitterUserName,STRING200 Location,STRING200 Country,INTEGER4 NoOfFollowers,INTEGER4 NoOfFriends,UNSIGNED8 fpos {virtual(fileposition)}}\\n,superkeyReTweetsMonitoring);\\n\\nIDX_SK_TWITTERUSERLOCATION := INDEX({INTEGER8 TwitterUserID,STRING200 Country,STRING200 City}\\n,{STRING100 TwitterUserName,STRING200 Location,STRING200 Latitude,STRING200 Longitude,STRING50 HPCCStatus,UNSIGNED8 fpos {virtual(fileposition)}}\\n,superkeyTwitterUserLocation);\\n\\ninvalidCountrySet := ['cannot','NotDetected'];\\n\\n\\njoinOpStruct := RECORD\\nSTRING200 Country;\\nINTEGER4 TwitterUserID;\\nEND;\\n\\njoinOpStruct returnJoinOutput(IDX_SK_RETWEETSMONITORING rtm,IDX_SK_TWITTERUSERLOCATION tul) := TRANSFORM\\nSELF.Country := tul.Country;\\nSELF.TwitterUserID := rtm.TwitterUserID;\\n\\nEND;\\n\\ntempDS := SORT(JOIN(IDX_SK_RETWEETSMONITORING(WILD(TwitterUserID) AND TwitterUniqueID=inputTwitterUniqueID AND UserID=inputUserID)\\n ,IDX_SK_TWITTERUSERLOCATION\\n ,LEFT.TwitterUserID=RIGHT.TwitterUserID AND RIGHT.Country NOT IN invalidCountrySet AND RIGHT.City <> inputCity,returnJoinOutput(LEFT,RIGHT)),Country);\\n\\nopStruct := RECORD\\ntempDS.Country;\\ncnt := COUNT(tempDS.TwitterUserID);\\nEND;\\n//tempDSGrp := GROUP(tempDS,Country);\\n\\n//tempDSGrpCnt := COUNT(GROUP(tempDS,Country),KEYED);\\n\\nopDS := TABLE(tempDS,opStruct,Country);\\n\\nreturn opDS;\\n\\nEND;\\n
\\n\\nI went through the ECL reference for the different constructs in the following order:\\n\\nJOIN\\nSORT\\nGROUP\\nCOUNT\\nTABLE\\n\\nSomehow, I'm not able to write the exact ECL equivalent of the SQL query.\\n\\nPlease go through the SQL query, point out my ECL mistake and guide me \\n\\nThanks and regards !!!\", \"post_time\": \"2013-03-12 11:39:24\" },\n\t{ \"post_id\": 3734, \"topic_id\": 830, \"forum_id\": 8, \"post_subject\": \"Re: Need Incremental Index solution\", \"username\": \"bforeman\", \"post_text\": \"Good post here regarding updating indexes in superkeys:\\n\\nhttps://hpccsystems.com/bb/viewtopic.php?f=8&t=837\", \"post_time\": \"2013-03-13 12:57:58\" },\n\t{ \"post_id\": 3725, \"topic_id\": 830, \"forum_id\": 8, \"post_subject\": \"Re: Need Incremental Index solution\", \"username\": \"prachi\", \"post_text\": \"Hi,\\nBob and Tony,\\n\\nTo clear confusions, if any, listing the facts :\\n\\n
In our Roxie queries, we are using super-keys which in turn use payload indexes built on super-files
\\n\\n
Once such a Roxie query is published, one cannot update the super-key i.e addition of new indexes, removal of the old etc. cannot be done. This is because Roxie acquires a lock on the super-key and its child components. To get around this, the packagemap needs to be usedNow, as it is obvious, every time a sub-file is added to a super-file, a new PAYLOAD index needs to be created. There are two problems now - the overhead of creating a new PAYLOAD index every time on the entire super-file AND updating the super-key in such a way that it has access to the latest data with least no. of PAYLOAD indexes
\\n\\nThe background and the known issues are already posted on the forums :\\n\\nhttp://hpccsystems.com/bb/viewtopic.php ... d028#p3477\\n\\nI'm listing my queries again in as terse manner as possible :\\n\\nHow to get a SINGLE/ONE index on a super-file such that it has the latest data and also is built in an 'incremental' manner i.e NOT BUILT on the entire super-file
\\nWithout using packagemap, how to update the super-key which is already locked by Roxie?
\\n\\nThanks and regards !!!\", \"post_time\": \"2013-03-13 04:57:49\" },\n\t{ \"post_id\": 3719, \"topic_id\": 830, \"forum_id\": 8, \"post_subject\": \"Re: Need Incremental Index solution\", \"username\": \"bforeman\", \"post_text\": \"The main question seems to actually be:\\n\\nThe core point is that instead of building a new INDEX every time a sub-file is added(which is slow as it built on the entire super-file), is there a way wherein the super-key can get the 'incremental update' i.e a new/overwritten INDEX which has the latest data?
\\n\\nI think the only way to have an incremental superkey of a superfile with multiple sub files is to create a “Payload Index”… that is an index that has all the data you need in it rather than needing to resolve the related records from a datafile via the filepos. If you can use a Payload index, you should be able to index the new subfile, and just append that index to the Super Key…. But if you need to fetch records from the data superfile, you need to re-index the whole superfile every time.\\n\\nHow to overcome stale indexes?? (Note. if unpublish Roxie queries is the solution then we cant unpublish queries)
\\n\\nNot sure exactly what the question is here, but if you do re-index every time and only refer to the index via superkey you should be able to delete the old index once the superkey is updated, right? i.e. all queries go through the superkey and that no longer refers to the old index, so it can be deleted?\\n\\nRegards,\\n\\nBob and Tony\", \"post_time\": \"2013-03-12 15:19:01\" },\n\t{ \"post_id\": 3710, \"topic_id\": 830, \"forum_id\": 8, \"post_subject\": \"Need Incremental Index solution\", \"username\": \"prachi\", \"post_text\": \"Hi,\\n\\nI am having one scenario in which after every few mins new data is added to Superfile and as and when new data comes, we are creating INDEX on Superfile.\\n\\nThis latest INDEX we are using in Package map to update our Roxie queries.\\n\\nECL code for Spraying and adding file to Superfile and creating INDEX on Superfile is:\\n\\nIMPORT STD;\\n\\nVARSTRING timeStamp := '' : stored('timeStamp');\\nVARSTRING fileName := '' : stored('filename');\\nVARSTRING thorip := '' : stored('thorip');\\nVARSTRING destinationlogicalname := '~sprayed::' +fileName + '_' + timeStamp;\\nVARSTRING sourceIP := '' : stored('roxieip');;\\nVARSTRING sourcepath := '/var/lib/HPCCSystems/mydropzone/buzzmonitoring/' +fileName +'.csv';\\nVARSTRING srcCSVseparator := ';';\\nVARSTRING destinationgroup := 'mythor';\\nVARSTRING espserverIPport := 'http://' +thorip + ':8010/FileSpray';\\nVARSTRING subFileDestinationLogicalname := '~sapphire::subfile::buzzmonitoring::' +fileName + '_' + timeStamp;\\n\\nVARSTRING superfile_name := '~sapphire::superfile::buzzmonitoring';\\nVARSTRING indexfile_name := '~sapphire::index::buzzmonitoring::buzzmonitoring_' +timeStamp;\\n\\n/*Spray the csv file from the dropzone*/\\nSprayCSVFile :=STD.File.fSprayVariable(sourceIP,sourcepath,,srcCSVseparator,,,\\ndestinationgroup,destinationlogicalname,,espserverIPport,\\n,TRUE,TRUE,FALSE);\\n\\n/*Create Dataset of sprayed file*/\\nLayout_buzzmonitoring := RECORD\\nSTRING100 UserID;\\nSTRING1000 Search_Keyword;\\nINTEGER8 TwitterUniqueID;\\nINTEGER8 TwitterUserID;\\nSTRING1000 TwitterUserName;\\nSTRING1000 TwitterProfileName;\\nINTEGER8 NoOfFollowers;\\nINTEGER8 NoOfFriends;\\nSTRING1000 Search_Date;\\nSTRING1000 Tweets_Date;\\nEND;\\n\\nFile_Layout_Subfile_Dataset :=\\nDATASET(destinationlogicalname,Layout_buzzmonitoring,CSV(SEPARATOR(';')));\\n\\n/*create logical file with record structure*/\\nsubfileCreation := OUTPUT(File_Layout_Subfile_Dataset,,subFileDestinationLogicalname,THOR,OVERWRITE);\\n\\n/*delete previous logical file without record structure*/\\ndeleteSprayedLogicalFile := STD.File.DeleteLogicalFile(destinationlogicalname);\\n\\nSuperFile_Dataset := DATASET(superfile_name,{Layout_buzzmonitoring,UNSIGNED8 fpos{virtual(fileposition)}},THOR);\\n\\nIDX_SuperFile := INDEX(SuperFile_Dataset,{UserID,TwitterUniqueID,Search_Keyword,Tweets_Date},\\n{TwitterUserID,TwitterUserName,TwitterProfileName,NoOfFollowers,NoOfFriends,Search_Date,fpos},indexfile_name);\\nidx := BUILDINDEX(IDX_SuperFile,OVERWRITE);\\n\\nSEQUENTIAL(\\nSprayCSVFile,\\nsubfileCreation,\\ndeleteSprayedLogicalFile,\\nStd.File.StartSuperFileTransaction(),\\nStd.File.AddSuperFile(superfile_name,subFileDestinationLogicalname),\\nStd.File.FinishSuperFileTransaction(),\\nidx\\n);\\n\\n
\\n\\nThe newly (latest) created INDEX is having all the contents of the Superfile.\\nSo the problem here is that previously created INDEXes are of no use now and number of INDEXes are increasing. We can say that only latest INDEX is of use and all previously created INDEXes are STALE. Inshort redundancy is occuring on THOR and ROXIE (on ROXIE because we are using PackageMap to update Roxie queries).\\n\\nHow to overcome stale indexes?? (Note. if unpublish Roxie queries is the solution then we cant unpublish queries)\\n\\nThe core point is that instead of building a new INDEX every time a sub-file is added(which is slow as it built on the entire super-file), is there a way wherein the super-key can get the 'incremental update' i.e a new/overwritten INDEX which has the latest data ?\", \"post_time\": \"2013-03-12 11:59:20\" },\n\t{ \"post_id\": 3732, \"topic_id\": 832, \"forum_id\": 8, \"post_subject\": \"Re: How to do aggregation that "group by" tow fields\", \"username\": \"bforeman\", \"post_text\": \"If I have already created two indexs on year and atype,does those indexs take effects when I do those:\\n1.TABLE(mydata,rec,year,atype);\\n2.queryresult = mydata(year='2011',atype='B');\\nIf they dont,How could I do?
\\n\\nIndexes are built on THOR for the express purpose of using them in ROXIE. The short answer is YES, you can treat an INDEX like a DATASET and substitute "myindex" where "mydata" is used, but the real power of INDEXes are where they are used in ROXIE with FETCH or Keyed JOINs.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-03-13 12:16:13\" },\n\t{ \"post_id\": 3730, \"topic_id\": 832, \"forum_id\": 8, \"post_subject\": \"Re: How to do aggregation that "group by" tow fields\", \"username\": \"battleman\", \"post_text\": \"[quote="bforeman":ac7nzj9u]Hi Battleman!\\n\\nEasy, use a cross-tab report!\\n\\nHere is the code to do it:\\n\\nmydata := DATASET([{2011,'A',12,4},\\n {2011,'B',14,3},\\n {2011,'A',4,1},\\n {2012,'A',13,5},\\n {2012,'B',5,2}],\\n{INTEGER Year,STRING1 atype,INTEGER cost,INTEGER profit});\\n\\nmydata;\\n\\nrec := RECORD\\nyear := mydata.year;\\natype := mydata.atype;\\nsumcost := SUM(GROUP,mydata.cost);\\nsumprofit := SUM(GROUP,mydata.profit);\\nEND;\\n\\nmytable := TABLE(mydata,rec,year,atype);\\n\\nmytable;\\n
\\n\\nThe GROUP keyword is where the magic happens. The "year" and "atype" in the TABLE is the GROUP BY equivalent.\\n\\nBTW, This is a topic in our Introduction to THOR Online Training, head on over there if you get a chance:\\n\\nhttp://learn.lexisnexis.com/hpcc \\n\\nRegards,\\n\\nBob\\n\\nThank you very much Bob!! It's really worked for me. But I have other questions:\\n If I have already created two indexs on year and atype,does those indexs take effects when I do those:\\n 1.TABLE(mydata,rec,year,atype);\\n 2.queryresult = mydata(year='2011',atype='B');
\\n If they dont,How could I do?\", \"post_time\": \"2013-03-13 11:43:21\" },\n\t{ \"post_id\": 3718, \"topic_id\": 832, \"forum_id\": 8, \"post_subject\": \"Re: How to do aggregation that "group by" tow fields\", \"username\": \"bforeman\", \"post_text\": \"Hi Battleman!\\n\\nEasy, use a cross-tab report!\\n\\nHere is the code to do it:\\n\\nmydata := DATASET([{2011,'A',12,4},\\n {2011,'B',14,3},\\n {2011,'A',4,1},\\n {2012,'A',13,5},\\n {2012,'B',5,2}],\\n{INTEGER Year,STRING1 atype,INTEGER cost,INTEGER profit});\\n\\nmydata;\\n\\nrec := RECORD\\nyear := mydata.year;\\natype := mydata.atype;\\nsumcost := SUM(GROUP,mydata.cost);\\nsumprofit := SUM(GROUP,mydata.profit);\\nEND;\\n\\nmytable := TABLE(mydata,rec,year,atype);\\n\\nmytable;\\n
\\n\\nThe GROUP keyword is where the magic happens. The "year" and "atype" in the TABLE is the GROUP BY equivalent.\\n\\nBTW, This is a topic in our Introduction to THOR Online Training, head on over there if you get a chance:\\n\\nhttp://learn.lexisnexis.com/hpcc \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-03-12 15:15:35\" },\n\t{ \"post_id\": 3715, \"topic_id\": 832, \"forum_id\": 8, \"post_subject\": \"How to do aggregation that "group by" tow fields\", \"username\": \"battleman\", \"post_text\": \"Hi:\\n \\n I have a SQL statement for the data beneath this SQL ,and want the result beneath the data:\\n\\n SELECT t.year,t.type,sum(t.cost) costsum,sum(t.profit) profitsum \\n FROM profitandcost t \\n group by t.year,t.type \\n --------------data ------------\\n 2011 A 12 4\\n 2011 B 14 3\\n 2011 A 4 1\\n 2012 A 13 5\\n 2012 B 5 2\\n---------------result------------\\n 2011 A 16 5\\n 2011 B 14 3\\n 2012 A 13 5\\n 2012 B 5 2
\\n\\nHow could I do it by an equivalent ECL code?\", \"post_time\": \"2013-03-12 14:33:53\" },\n\t{ \"post_id\": 3742, \"topic_id\": 835, \"forum_id\": 8, \"post_subject\": \"Re: Function in ECL for GROUP_CONCAT\", \"username\": \"DSC\", \"post_text\": \"This certainly looks doable, though I admit to not typing any of the code into an IDE and testing.\\n\\nBy examining your SQL query, it looks like you're basically joining two tables where the first table is actually an aggregation (temporary) table based on a physical table. If you look at it like that, the ECL version should be easier to construct.\\n\\nYour entire first line of SQL can be translated into ECL as "construct a new recordset to manipulate". Focus on only twitterdata_user first and create a recordset that contains the attributes you need. It may be that TABLE() is the only function you'll use here and you can do it in one step. It may take multiple steps, however; I can't quite tell without running code.\\n\\nOnce you have a twitterdata_user recordset in the format you want, focus on the join to create a second recordset. Once you have that recordset, create a third recordset that is the result of the final GROUP BY.\\n\\nWhile it looks like you're doing more work with all that ECL, I suspect that under the covers you're doing exactly what an SQL interpreter does and it will be much faster due to parallelization across nodes.\\n\\nHope this helps.\\n\\nDan\", \"post_time\": \"2013-03-14 12:35:37\" },\n\t{ \"post_id\": 3741, \"topic_id\": 835, \"forum_id\": 8, \"post_subject\": \"Re: Function in ECL for GROUP_CONCAT\", \"username\": \"prachi\", \"post_text\": \"Hi Dan,\\nWe have MySQL query such as\\nSELECT tul.Latitude,tul.Longitude,tul.City,avg(tdu.Sentiment),count(tul.City),group_concat(tdu.IdeaCloudWord separator ',') as IdeaCloudWord\\nFROM twitterdata_user tdu inner join twitteruserlocation tul on tdu.TwitterUserID=tul.TwitterUserID\\nwhere tdu.TwitterUniquID=2147\\nand tdu.UserID='293705'\\nand tul.city not in ('','NotDetected')\\nand tul.Latitude not in ('','NotDetected')\\nand tul.Longitude not in ('','NotDetected')\\ngroup by tul.city;
\\n\\nCorresponding ECL code is:\\n\\n/*SELECT tul.Latitude,tul.Longitude,tul.City,avg(tdu.Sentiment),count(tul.City),group_concat(tdu.IdeaCloudWord separator ',') as IdeaCloudWord\\nFROM twitterdata_user tdu inner join twitteruserlocation tul on tdu.TwitterUserID=tul.TwitterUserID\\nwhere tdu.TwitterUniquID=2147\\nand tdu.UserID='293705'\\nand tul.city not in ('','NotDetected')\\nand tul.Latitude not in ('','NotDetected')\\nand tul.Longitude not in ('','NotDetected')\\ngroup by tul.city;*/\\n\\nINTEGER4 inputTwitterUniqueID := 2147;\\nSTRING100 inputUserID := '293705';\\n\\n\\nsuperkeyTwitterDataUser := '~sapphire::superkey::twitterdata_user';\\nsuperkeyTwitterUserLocation := '~sapphire::superkey::twitteruserlocation';\\n\\nIDX_SK_TWITTERDATAUSER := INDEX({STRING500 UserID,INTEGER8 TwitterUniquID,STRING100 Date,INTEGER8 InsertID,INTEGER8 TwitterUserID}\\n,{STRING500 SearchKeyWord,STRING5000 TwitterText,STRING100 Sentiment,STRING100 Male_per,STRING100 Female_per,STRING500 TwitterUser,STRING500 TwitterLocation,STRING100 CountryLocation,STRING5000 TwitterText_Sentiment,STRING5000 IdeaCloudWord,STRING5000 IdeaCloudeText,STRING1500 DetectedLanguage,UNSIGNED8 fpos{virtual(fileposition)}}\\n,superkeyTwitterDataUser);\\n\\nIDX_SK_TWITTERUSERLOCATION := INDEX({INTEGER8 TwitterUserID,STRING200 Country,STRING200 City,STRING200 Latitude,STRING200 Longitude}\\n,{STRING100 TwitterUserName,STRING200 Location,STRING50 HPCCStatus,UNSIGNED8 fpos {virtual(fileposition)}}\\n,superkeyTwitterUserLocation);\\n\\ninvalidSet := ['','NotDetected'];\\n\\njoinOpStruct := RECORD\\n\\nSTRING200 Latitude;\\nSTRING200 Longitude;\\nSTRING200 City;\\nREAL4 Sentiment;\\nSTRING100 IdeaCloudWord;\\n\\nEND;\\n\\njoinOpStruct returnJoinOutput(IDX_SK_TWITTERDATAUSER tdu,IDX_SK_TWITTERUSERLOCATION tul) := TRANSFORM\\n\\nSELF.Latitude := tul.Latitude;\\nSELF.Longitude := tul.Longitude;\\nSELF.City := tul.City;\\nSELF.Sentiment := (REAL4)tdu.Sentiment;\\nSELF.IdeaCloudWord := tdu.IdeaCloudWord;\\n\\nEND;\\n\\ntempDS := SORT(JOIN(IDX_SK_TWITTERDATAUSER(UserID=inputUserID AND TwitterUniquID=inputTwitterUniqueID)\\n,IDX_SK_TWITTERUSERLOCATION\\n,LEFT.TWITTERUSERID = RIGHT.TWITTERUSERID AND RIGHT.City NOT IN invalidSet AND RIGHT.Latitude NOT IN invalidSet AND RIGHT.Longitude NOT IN invalidSet\\n,returnJoinOutput(LEFT,RIGHT),KEYED),City);\\n\\n//test just JOIN\\ntempDS;\\n\\nopStruct := RECORD\\ntempDS.Latitude;\\ntempDS.Longitude;\\ntempDS.City;\\navgSentiment := AVE(GROUP,tempDS.Sentiment);\\ncntCity := COUNT(GROUP);\\n//cntIdeaCloudWord := cntIdeaCloudWord + ',' + cntIdeaCloudWord;\\ncntIdeaCloudWord := COUNT(GROUP);\\nEND;\\n\\nopDS := TABLE(tempDS,opStruct,city);\\n\\nopDS;\\n\\n
\\n\\nI have attached CSV files for data.\\n\\nHow do I get the comma-sep. string for IdeaCloudWord ? Can ROLLUP really help in this scenario where JOIN is involved?\\n\\nThanks and Regards!\", \"post_time\": \"2013-03-14 11:54:24\" },\n\t{ \"post_id\": 3737, \"topic_id\": 835, \"forum_id\": 8, \"post_subject\": \"Re: Function in ECL for GROUP_CONCAT\", \"username\": \"prachi\", \"post_text\": \"Hi Dan,\\n\\nThanks for your response!\\n\\nIts works for current scenario.\", \"post_time\": \"2013-03-14 09:11:36\" },\n\t{ \"post_id\": 3731, \"topic_id\": 835, \"forum_id\": 8, \"post_subject\": \"Re: Function in ECL for GROUP_CONCAT\", \"username\": \"DSC\", \"post_text\": \"ROLLUP() would work well here.\\n\\nRecLayout := RECORD\\n\\tSTRING\\tpub_id;\\n\\tSTRING\\tcate_id;\\nEND;\\n\\nds := DATASET\\n\\t(\\n\\t\\t[\\n\\t\\t\\t{'P001','CA002'},\\n\\t\\t\\t{'P001','CA004'},\\n\\t\\t\\t{'P002','CA003'},\\n\\t\\t\\t{'P002','CA003'},\\n\\t\\t\\t{'P003','CA001'},\\n\\t\\t\\t{'P003','CA003'},\\n\\t\\t\\t{'P004','CA005'},\\n\\t\\t\\t{'P004','CA002'},\\n\\t\\t\\t{'P005','CA001'},\\n\\t\\t\\t{'P005','CA004'},\\n\\t\\t\\t{'P006','CA005'},\\n\\t\\t\\t{'P006','CA001'},\\n\\t\\t\\t{'P007','CA005'},\\n\\t\\t\\t{'P007','CA002'},\\n\\t\\t\\t{'P008','CA005'},\\n\\t\\t\\t{'P008','CA004'}\\n\\t\\t],\\n\\t\\tRecLayout\\n\\t);\\n\\nRecLayout RollupRecLayout(RecLayout l, RecLayout r) := TRANSFORM\\n\\tSELF.pub_id := l.pub_id;\\n\\tSELF.cate_id := l.cate_id + ',' + r.cate_id;\\nEND;\\n\\nrs1 := SORT(ds,pub_id);\\nrs2 := ROLLUP\\n\\t(\\n\\t\\trs1,\\n\\t\\tLEFT.pub_id = RIGHT.pub_id,\\n\\t\\tRollupRecLayout(LEFT,RIGHT)\\n\\t);\\n\\nOUTPUT(rs2)\\n
\\nCheers,\\n\\nDan\", \"post_time\": \"2013-03-13 11:44:51\" },\n\t{ \"post_id\": 3727, \"topic_id\": 835, \"forum_id\": 8, \"post_subject\": \"Function in ECL for GROUP_CONCAT\", \"username\": \"prachi\", \"post_text\": \"Hi,\\n\\nWe have MySQL query such as :\\n\\n\\nSELECT pub_id,GROUP_CONCAT(cate_id) \\nFROM book_mast \\nGROUP BY pub_id;\\n
\\n\\nI have attached image of output we get for this query and also the data worked on.\\n\\nWe need ECL function similar to GROUP_CONCAT().\\nMySQL GROUP_CONCAT() function returns a string with concatenated non-NULL value from a group.\\n\\nLink for the same is: http://www.w3resource.com/mysql/aggrega ... concat.php\", \"post_time\": \"2013-03-13 08:55:02\" },\n\t{ \"post_id\": 25893, \"topic_id\": 837, \"forum_id\": 8, \"post_subject\": \"Re: One method for dynamically updating superkeys\", \"username\": \"DSC\", \"post_text\": \"Using the OPT flag on either the INDEX() or DATASET() function will allow ROXIE to work with files that may not exist, whether you use package maps or DYNAMIC().\\n\\nDYNAMIC() -- which is a function surrounding the logical filename in the INDEX() or DATASET() function -- just tells ROXIE to always resolve the pathname at runtime, for every query, which means ROXIE does not preload the data, lock the file, maintain caches, etc. If you don't put an OPT flag in there and the file is missing, you will get a runtime error even with DYNAMIC().\", \"post_time\": \"2019-04-23 13:56:52\" },\n\t{ \"post_id\": 25873, \"topic_id\": 837, \"forum_id\": 8, \"post_subject\": \"Re: One method for dynamically updating superkeys\", \"username\": \"abaruchi\", \"post_text\": \"Just adding some information for this thread.\\nRecently I read an article written by Dan Camper (link below) and he explains how to update an superkey in a Roxie Query without downtime using packages. \\nAs far as I understand, using packages, is the "official" way to update Superkeys that is being used by Roxie. On the other hand, the DYNAMIC file is a way to refer to a file that, for some reason, do not exist yet (but it will). \\n\\nSummary:\\n- If you want to update a superkey that is used by a Roxie, use packages;\\n- If you want to refer to a file that is not there (for some reason that is not under your control), use DYNAMIC;\\n\\nLink for the Dan Camper article:\\nhttps://hpccsystems.com/blog/real-time- ... s-in-roxie\\n\\n- Artur Baruchi\", \"post_time\": \"2019-04-22 14:28:26\" },\n\t{ \"post_id\": 3804, \"topic_id\": 837, \"forum_id\": 8, \"post_subject\": \"Re: One method for dynamically updating superkeys\", \"username\": \"richardkchapman\", \"post_text\": \"There is an option in the ecl compiler 'allFilesDynamic' that you may find useful. Should be able to set it using #option or on the command line.\\n\\nYou are right though that packages are the recommended way to go long term.\", \"post_time\": \"2013-03-21 17:17:56\" },\n\t{ \"post_id\": 3784, \"topic_id\": 837, \"forum_id\": 8, \"post_subject\": \"Re: One method for dynamically updating superkeys\", \"username\": \"DSC\", \"post_text\": \"Excellent news, David!\\n\\nI think the real solution to this problem of updating superkeys on the fly lies with Packages, though. One day when I have some time I'm going to try to figure out the "recipe" for making that work. If/when I do, I'll be sure to publish that to the forums. I'm hoping, however, that someone beats me to the punch and publishes first!\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-03-20 11:33:32\" },\n\t{ \"post_id\": 3783, \"topic_id\": 837, \"forum_id\": 8, \"post_subject\": \"Re: One method for dynamically updating superkeys\", \"username\": \"janssend\", \"post_text\": \"Thank you Dan,\\n\\nI have done some changes in my ECL scripts in order to avoid using same subfile names. It seems to work. I have to check with simultaneous roxie querie calls and superfile updating. But in my case, DYNAMIC files make roxie data update easier.\\n\\nThank you again.\\nRegards\\nDavid\", \"post_time\": \"2013-03-20 09:51:06\" },\n\t{ \"post_id\": 3765, \"topic_id\": 837, \"forum_id\": 8, \"post_subject\": \"Re: One method for dynamically updating superkeys\", \"username\": \"DSC\", \"post_text\": \"[quote="janssend":2isut13x]Message: Different version of dynamic_file_test::subkey_1 already loaded: sizes = 32768 32768 Date = 2013-03-18T13:06:44 2013-03-18T13:06:25 \\n\\nI think I failed to fully read your earlier reply; my apologies.\\n\\nI've seen this "different version" error pop up when Roxie copies a logical file from Thor and then something goes wrong with the tracking of that file. What's happening is that the file physically exists in the Roxie portion of the distributed file system but Dali does not know about it (it doesn't show up when you Browse Logical Files, for instance). I've found that physically deleting the Roxie version of the file usually clears the problem, which means going into each of your nodes and deleting those file parts. They should all be within /var/lib/HPCCSystems/hpcc-data/roxie/ on a standard installation. If you don't have any published Roxie queries you can simply delete that entire directory.\\n\\nThere is almost certainly something wrong with what I just wrote, either the diagnosis or the fix. That 'fix' is really heavy-handed and there is probably a much more elegant way to resolve the issue. Plus, I don't really know what's going on under the covers; this is just the explanation I've come up with.\\n\\nIf you're uncomfortable deleting files like this, there is one easy thing to try: Rename the subkeys to some other name. You won't collide with anything that's already existing if you start with a new name.\\n\\nLet me know what you find!\\n\\nDan\", \"post_time\": \"2013-03-18 14:30:19\" },\n\t{ \"post_id\": 3764, \"topic_id\": 837, \"forum_id\": 8, \"post_subject\": \"Re: One method for dynamically updating superkeys\", \"username\": \"janssend\", \"post_text\": \"Hi Dan (and thanks for you answer), \\n\\nI have done what you said : publish 'roxie_query' before adding subkey file to 'dynamic' superkey file... but I get the same result when calling the published query :\\n\\ndynamic superkey test Response\\nException\\nReported by: Roxie\\nMessage: Different version of dynamic_file_test::subkey_1 already loaded: sizes = 32768 32768 Date = 2013-03-18T13:06:44 2013-03-18T13:06:25 \\n\\nI have probably misunderstood something, but what? Do you known if there is a way to setup Roxie (using HPCC configuration manager) to force Roxie nodes to 're-load' changed dynamic files ? (I would have thought it was done by default using 'DYNAMIC') ?\\n\\nTo be honest, this issue is crucial for us, because we want to be able to update really often data of published roxie queries. (we may have to think about 'package' ?)\\n\\nRegards. \\nDavid\", \"post_time\": \"2013-03-18 13:26:58\" },\n\t{ \"post_id\": 3760, \"topic_id\": 837, \"forum_id\": 8, \"post_subject\": \"Re: One method for dynamically updating superkeys\", \"username\": \"DSC\", \"post_text\": \"Hi David,\\n\\nIn my testing I found -- or thought I found -- that if the Roxie query was published when only an empty superkey was present then everything Just Worked. You could add, remove, and swap contents without any problems. If, however, a subkey was present when the query was published then Roxie would indeed latch onto that subkey and copy it (if the configuration is setup that way). The subkey would then be subject to the same restrictions as before. So the key step was ensuring that the superkey was empty when the query was published.\\n\\nDoes that match with your findings?\\n\\nDan\", \"post_time\": \"2013-03-15 16:35:09\" },\n\t{ \"post_id\": 3759, \"topic_id\": 837, \"forum_id\": 8, \"post_subject\": \"Re: One method for dynamically updating superkeys\", \"username\": \"janssend\", \"post_text\": \"Thanks for this sample,\\n\\nA see a restriction with DYNAMIC files; DYNAMIC superfiles are not 'locked' but published queries still retain references to superkey contents.\\n\\nWhich means this is not possible to replace the content of a given superkey sub file. (for a roxie published query)\\n\\nHere is the create_files.ecl sample (I made some change to add subfiles to superkey) \\n\\nIMPORT Std;\\n\\n//------------------------------------------------------------------------------\\n\\nkSuperFilePath := '~dynamic_file_test::superfile';\\nkMaxRecordsPerSubkey := 1000;\\n\\n//------------------------------------------------------------------------------\\n\\nDataRec := RECORD\\n\\tUNSIGNED4\\tmyKey;\\n\\tUNSIGNED8\\tmyValue;\\nEND;\\n\\nCreateSubkey(UNSIGNED1 keyCount, UNSIGNED4 recordCount) := FUNCTION\\n\\tsubkeyPath := '~dynamic_file_test::subkey_' + (STRING)keyCount;\\n\\t\\n\\tDataRec MakeDataRec(UNSIGNED c) := TRANSFORM\\n\\t\\tSELF.myKey := c;\\n\\t\\tSELF.myValue := RANDOM();\\n\\tEND;\\n\\t\\n\\tsubkeyData := DISTRIBUTE(DATASET(recordCount,MakeDataRec(COUNTER)));\\n\\t\\n\\tidx := INDEX\\n\\t\\t(\\n\\t\\t\\tsubkeyData,\\n\\t\\t\\t{\\n\\t\\t\\t\\tmyKey\\n\\t\\t\\t},\\n\\t\\t\\t{\\n\\t\\t\\t\\tmyValue\\n\\t\\t\\t},\\n\\t\\t\\tsubkeyPath\\n\\t\\t);\\n\\t\\n\\t\\n\\tRETURN SEQUENTIAL(BUILD(idx,OVERWRITE), STD.File.AddSuperFile(kSuperFilePath, subkeyPath));\\nEND;\\n\\n//------------------------------------------------------------------------------\\n\\nSEQUENTIAL(\\nStd.File.CreateSuperFile(kSuperFilePath,allow_exist:=TRUE),\\nSTD.File.ClearSuperFile(kSuperFilePath, TRUE),\\nPARALLEL(CreateSubkey(1,RANDOM() % kMaxRecordsPerSubkey),CreateSubkey(2,RANDOM() % kMaxRecordsPerSubkey),CreateSubkey(3,RANDOM() % kMaxRecordsPerSubkey)),\\n);
\\n\\nAfter publishing 'roxie_query.ecl'; you would be able to submit it until you don't 're-submit' create_files.ecl.\\n\\nIn case you submit 'create_files.ecl' (after a call to published 'roxie_query.ecl'); you would get this response :\\n\\n[color=#0040FF:1zg494mz]dynamic superkey test Response\\nException\\nReported by: Roxie\\nMessage: Different version of dynamic_file_test::subkey_1 already loaded: sizes = 32768 32768 Date = 2013-03-15T16:14:40 2013-03-15T16:14:09 \\n\\nThe only possibility to dynamically update superkeys: we should add new subfiles using brand new logical names. \\n\\nRegards\\nDavid\", \"post_time\": \"2013-03-15 16:26:36\" },\n\t{ \"post_id\": 3752, \"topic_id\": 837, \"forum_id\": 8, \"post_subject\": \"Re: One method for dynamically updating superkeys\", \"username\": \"Durai\", \"post_text\": \"It is quite interesting discussion, I can already map some use cases around it. Thanks Dan for detailed explanation on Dynamic reference to superkeys.\\n\\nPackages can also be good candidate on this workaround, But (In my understanding) packages may requires cluster restart. I am not sure though! but in specific cases package can keep data in memory(without having to restart the cluster, need some validation here!!!), if this is true Package can be used we can use simply as superfiles. And we can reindex files to consolidate the data when there is Roxie idle time.\", \"post_time\": \"2013-03-15 04:43:49\" },\n\t{ \"post_id\": 3740, \"topic_id\": 837, \"forum_id\": 8, \"post_subject\": \"Re: One method for dynamically updating superkeys\", \"username\": \"DSC\", \"post_text\": \"Can you provide a comparison - Packages v/s DYNAMIC files ?
\\nThis dynamic technique is simpler to set up and use, but has a runtime performance penalty and requires a fairly standard Thor/Roxie configuration. Personally, I think it's a perfectly good solution providing your requirements fit within those constraints.\\n\\nI admit to not fully understanding packages. Version 3.10.2 suffered from some problems with package management that prevented me from easily experimenting with that feature (you couldn't easily delete mistakes, basically). Version 3.10.4 was just released and it addresses those problems, so I should go back and experiment some more. From what I understand, though, packages should provide a performant method for updating superkeys but at the expense of increasing subkey management complexity. Specifically, packages seem to update the superkey/subkey relationships for queries but they do not update the superkeys and subkeys themselves. That means that it is not easy to see and manage the superkey/subkey relationships through another tool, such as ECL Watch. I could be wrong about that, however; that's what I want to experiment with.\\n\\nThe only real oddity I've found with this technique involves an empty superkey. In that case, the Roxie query returns nothing rather three empty results. There may be other strange behaviors, but I haven't run into them yet.
\\nThe problem is as described. If you have a SOAP-based caller that is expecting three responses, then having an empty superkey (a superkey with no subkeys) then the response will be invalid. Instead of seeing three empty results, you'll see absolutely nothing in the response. You will get a reply to the SOAP call, it just won't be formatted correctly. That's less of a problem with a JSON interface, though, as the response isn't strongly defined anyway.\\n\\nI hope this helps.\\n\\nDan\", \"post_time\": \"2013-03-14 11:51:43\" },\n\t{ \"post_id\": 3736, \"topic_id\": 837, \"forum_id\": 8, \"post_subject\": \"Re: One method for dynamically updating superkeys\", \"username\": \"prachi\", \"post_text\": \"Hi Dan,\\n\\nGood work with the that DYNAMIC stuff \\n\\nI have used the 'packages' approach to push the newly created INDEXes to Roxie.\\n\\nActually, I was re-directed to this post from the below one :\\n\\nhttps://hpccsystems.com/bb/viewtopic.php?f=8&t=830&p=3725#p3725\\n\\nI tried your approach in my the context of my requirement and it worked. Still, I have the following queries:\\n\\n
Can you provide a comparison - Packages v/s DYNAMIC files ?
\\nI didn't get the below part - can you elaborate :
\\n\\nThe only real oddity I've found with this technique involves an empty superkey. In that case, the Roxie query returns nothing rather three empty results. There may be other strange behaviors, but I haven't run into them yet.
\\n\\nThanks and regards !\", \"post_time\": \"2013-03-14 09:06:35\" },\n\t{ \"post_id\": 3733, \"topic_id\": 837, \"forum_id\": 8, \"post_subject\": \"One method for dynamically updating superkeys\", \"username\": \"DSC\", \"post_text\": \"There have been a lot of questions on the forum recently concerning dynamically updating superkeys. For those of you who aren't clear on what superkeys are: A superkey is a superfile composed only of payload indexes and (optionally) other superkeys. Payload indexes are indexes that do not reference datasets and contain all the fields you need for further work, both keyed (indexed) values and read-only fields (the payload).\\n\\nThe typical usage pattern is to create an initial superkey, usually as a superfile with one payload index. A Roxie query is then written to access the superkey. Everyone seems to have no problem with this part.\\n\\nThe difficulty surrounds the fact that Roxie usually obtains a lock/reference to the superkey for that query. While the query is published, the superkey cannot be modified. Since the usual desire is to update the superkey with new data on the fly, this can pose a challenge.\\n\\nThe usual answer -- which is the best-performing and most correct way of doing this -- is to unpublish the query, update the superkey, then republish the query. If your requirement is to keep that query available to an external caller at all times, then the problem becomes "how do I keep the query alive?" rather than "how do I update the superkey?" One answer to this is to have two Roxie clusters and switch between them. If the Roxie clusters are configured to copy their underlying data resources (which is the default setting) then each will have independent copies of the data. Point all your callers to Roxie cluster A, update cluster B, point all your callers to B, then update A. This technique has many advantages, such as giving you time to QA the update, roll back any problem updates, etc. without impacting callers. Another advantage of this technique is performance: You're using the system as it is designed to be used, and all the data- and code-performance optimizations are in place. The expense is that your infrastructure will be somewhat bigger.\\n\\nHowever, there are cases where you don't need all that performance, or you can't afford the extra infrastructure, or both. Or maybe you're just doing a proof-of-concept test and you don't want to go to all the extra work.\\n\\nI was rereading a section of the language reference manual and stumbled across something I'd read before hadn't used in practice. In the Scope and Logical Filenames section of the manual there is a subsection titled "Dynamic Files." It reads:\\n\\nDynamic Files\\n\\nIn Roxie queries (only) you can also read files that may not exist at query deployment time, but that will exist at query runtime by making the filename DYNAMIC.\\nThe syntax looks like this:\\n\\nDYNAMIC('<filename>')\\n\\nFor example,\\n\\n
\\nThis formed the basis for a different method for updating superkeys dynamically.\\n\\nThe executive summary is: Create a empty superkey and a Roxie query that references it dynamically. Because it is marked dynamic, the query will not retain a reference to it and it therefore will not have a 'lock' on the superkey or its contents. You can therefore update the superkey's contents at will, without performing the unpublished-update-republish task.\\n\\nI've found some limitations with this technique and there could be more. Specifically:\\n\\nMyFile :=DATASET(DYNAMIC('~training::import::myfile'),RecStruct,FLAT);
\\n\\nThis causes the file to be resolved when the query is executed instead of when it is deployed.Payload indexes are not copied to the Roxie cluster. Roxie will always "reach back" to Thor in order to read those files. If your Roxie and Thor clusters are mismatched or reside on different nodes, this will incur a network penalty.
\\nDepending on configuration settings, Roxie will open file handles to the payload index files at either query publish time or lazily. Either way, Roxie normally tries to keep those file handles open in order to avoid the performance penalty of reopening the files for every query. This technique prevents Roxie from doing that; the indexes will be reopened for every query. This will also impose a performance penalty, and it gets worse if there is a network penalty. This performance penalty will only get worse as the number of payload indexes within the superkey increases, so it becomes vital that some periodic task rolls up and aggregates those updates frequently.
\\n\\nEnclosed with this posting is a zip file containing two ECL files that demonstrate this technique.\\n\\ncreate_files.ecl: This builds Thor code that sets up files needed for the test. An empty superkey named '~dynamic_file_test::superfile' is created along with three payload indexes named '~dynamic_file_test::subkey_1', '~dynamic_file_test::subkey_2' and '~dynamic_file_test::subkey_3'.
\\nroxie_query.ecl: This is a Roxie query that references the superkey. It has no parameters and outputs three results just to show that the query worked.
\\n\\nTo use this example, first run execute the code within create_files.ecl to build the data, then publish the contents of roxie_query.ecl as a Roxie query. Then it's play time:\\n\\nUsing a web browser and the query interface (port 8002) on your cluster, submit the query. No parameters are needed.
\\nUsing a web browser and ECL Watch (port 8010) adjust the contents of the superkey by manually adding and removing payload indexes. Go back to the first step and try the query again. Notice that you don't have to unpublish the query before manipulating the contents of the superkey.
\\n\\nWhile I didn't supply any code for automatically generating updates to the superkey, I wouldn't think that creating such code would be difficult for any decent ECL programmer.\\n\\nThe only real oddity I've found with this technique involves an empty superkey. In that case, the Roxie query returns nothing rather three empty results. There may be other strange behaviors, but I haven't run into them yet.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-03-13 12:40:47\" },\n\t{ \"post_id\": 3757, \"topic_id\": 838, \"forum_id\": 8, \"post_subject\": \"Re: ECL Remote WS port cannot accessed\", \"username\": \"buptkang\", \"post_text\": \"Thanks, problem solved by network team.\", \"post_time\": \"2013-03-15 13:30:56\" },\n\t{ \"post_id\": 3744, \"topic_id\": 838, \"forum_id\": 8, \"post_subject\": \"Re: ECL Remote WS port cannot accessed\", \"username\": \"bforeman\", \"post_text\": \"I am pretty sure that the 8002 port needs to be opened.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-03-14 15:30:25\" },\n\t{ \"post_id\": 3735, \"topic_id\": 838, \"forum_id\": 8, \"post_subject\": \"ECL Remote WS port cannot accessed\", \"username\": \"buptkang\", \"post_text\": \"Hello there,\\n\\nWhen I was deploying a service toward my local virtual machine, I can access the data through port 8002 ESP_WS site by using SOAPCALL. \\n\\nHowever, if I want to remotely access non-local production clusters, after deploying the same service there, I cannot access the 8002 port to call the same method.\\n\\nAs I have no idea on HPCC admin side, can somebody tell me how could I do it? Or the admin need to open this port to be accessible?\\n\\nWith Thanks and Regards\\nBo\", \"post_time\": \"2013-03-13 15:57:21\" },\n\t{ \"post_id\": 3745, \"topic_id\": 839, \"forum_id\": 8, \"post_subject\": \"Re: Strange behavior using IF on file DataSet (3.10.4-1)\", \"username\": \"bforeman\", \"post_text\": \"Hi David,\\n\\nI can confirm the error, and I would suggest that you please open an issue regarding this.\\n\\nGood news, there is a workaround:\\n\\nMyThorDataSet := IF(MyFileExists,OUTPUT(MyInlineDataSet),OUTPUT(MyFileDataSet));\\n\\nOUTPUT(MyFileExists); // ok\\nOUTPUT(MyInlineDataSet); // ok\\nOUTPUT(MyFileDataSet); // ok \\nMyThorDataSet; // ok this way
\\n\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-03-14 15:49:10\" },\n\t{ \"post_id\": 3738, \"topic_id\": 839, \"forum_id\": 8, \"post_subject\": \"Strange behavior using IF on file DataSet (3.10.4-1)\", \"username\": \"janssend\", \"post_text\": \"I got an error message when submitting the following code. The environment is a cluster of 4 thor nodes and 2 roxie nodes.\\n\\nThis error happened after upgrading from 3.8.6 to 3.10.4-1. \\n\\nIMPORT Std, Std.File, Std.Str;\\n\\nMyRecord := {INTEGER id, STRING value};\\nMyInlineDataSet := DATASET([{1,'rec1'},{2,'rec2'}], MyRecord);\\nMyFileDataSet := DATASET('~tdata', {MyRecord}, FLAT);\\n\\nMyFileExists := Std.File.FileExists('~tdata');\\n\\nMyThorDataSet := IF (MyFileExists,\\tMyInlineDataSet, MyFileDataSet);\\n\\nOUTPUT(MyFileExists);\\t\\t// ok\\nOUTPUT(MyInlineDataSet);\\t// ok\\nOUTPUT(MyFileDataSet);\\t\\t// ok \\nOUTPUT(MyThorDataSet);\\t\\t// failed
\\n\\nECL Error message :\\neclagent -1: System error: -1: Graph[1], if[5]: SLAVE 192.168.1.196:20100: No active dali server connection available\\n\\n\\nThor slave logs :\\n000007C5 2013-03-14 10:22:58 7282 7282 /var/jenkins/workspace/CE-Candidate-3.10.4/CE/ubuntu_12_04_x86_64/HPCC-Platform/dali/base/dadfs.cpp(341) : [color=#0000FF:2bg56adl]CConnectLock CDistributedFileDirectory::exists Files/Scope[@name="."]/File[@name="tdata"] : No active dali server connection available\\n000007C6 2013-03-14 10:22:58 7282 7282 /var/jenkins/workspace/CE-Candidate-3.10.4/CE/ubuntu_12_04_x86_64/HPCC-Platform/thorlcr/slave/slavmain.cpp(389) : Graph[1], if[5]: No active dali server connection available\\n\\n\\nIn short, the FileExists function works, except when called from IF condition.\\n\\nRegards\\nDavid\", \"post_time\": \"2013-03-14 09:48:48\" },\n\t{ \"post_id\": 3739, \"topic_id\": 840, \"forum_id\": 8, \"post_subject\": \"STD.File.Copy from thor to roxie failed (3.10.4-1)\", \"username\": \"janssend\", \"post_text\": \"I got an error message when submitting the following code. The environment is a cluster of 4 thor nodes and 2 roxie nodes.\\n\\nIMPORT Std, Std.File, Std.Str;\\n\\nMyRecord := {INTEGER id, STRING value};\\nMyInlineDataSet := DATASET([{1,'rec1'},{2,'rec2'}], MyRecord);\\n\\n\\nSEQUENTIAL(\\n//OUTPUT(MyInlineDataSet ,, '~tdata' , OVERWRITE), \\t\\t// ok\\nSTD.File.Copy('~tdata','myroxie', '~rdata' , , , , ,TRUE , ,FALSE) // failed\\n);\\n\\n
\\n\\nECL error :\\neclagent 0: System error: 0: DFUServer Error Failed: Source file format is not specified or is unsuitable for (re-)partitioning \\n\\n\\n\\nECL logs :\\n\\n0000000C 2013-03-14 11:43:46 57295 57295 fileservices using esp URL: http://192.168.1.239:8010/FileSpray\\n0000000D 2013-03-14 11:43:47 57295 57295 ERROR: 0: DFUServer Error Failed: Source file format is not specified or is unsuitable for (re-)partitioning (in item 1)\\n0000000E 2013-03-14 11:43:47 57295 57295 Releasing run lock\\n0000000F 2013-03-14 11:43:47 57295 57295 System error: 0: DFUServer Error Failed: Source file format is not specified or is unsuitable for (re-)partitioning\\n00000010 2013-03-14 11:43:47 57295 57295 0: System error: 0: DFUServer Error Failed: Source file format is not specified or is unsuitable for (re-)partitioning\\n00000011 2013-03-14 11:43:47 57295 57295 Process complete\\n\\n\\nBut when I use DFU under ESP to copy 'tdata' file (from thor cluster) to 'tdata' file (to roxie cluster); it works.\\n\\nWhat's wrong with my STD.File.Copy parameters ?\\n\\n\\nRegards\\nDavid\\n\\n[attachment=2:hyb83neu]file8.png\\n[attachment=1:hyb83neu]file7.png\\n[attachment=0:hyb83neu]file6.png\", \"post_time\": \"2013-03-14 11:04:07\" },\n\t{ \"post_id\": 3751, \"topic_id\": 842, \"forum_id\": 8, \"post_subject\": \"Re: count(group) exceed skew limit\", \"username\": \"DSC\", \"post_text\": \"Addendum: If you see this particular error during a SORT() or JOIN() instead of a TABLE(), and there is no way to restructure your data to avoid the error, SORT() and JOIN() both have a SKEW option that allows you to basically ignore the error. Technically, the option allows you to set exactly when the error would pop up -- how bad the skew has to get -- but you can set it such that the skew is ignored entirely.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-03-14 20:47:57\" },\n\t{ \"post_id\": 3748, \"topic_id\": 842, \"forum_id\": 8, \"post_subject\": \"Re: count(group) exceed skew limit\", \"username\": \"DSC\", \"post_text\": \"More than likely, you have a very few number of dataset1.title values and a large number of records associated with those values. I would suggest breaking up the problem a little.\\n\\nr1 := RECORD\\n\\tdataset1.title;\\n\\tn := COUNT(GROUP);\\nEND;\\n\\nrs1 := DISTRIBUTE(dataset1,HASH32(title));\\nrs2 := TABLE(rs1,r1,title,LOCAL,MERGE);\\n\\nOUTPUT(rs2);
\\nI haven't executed the above code, but it shows what I'm getting at. Basically, you distribute the data so that all the identical titles wind up on the same node, then you perform the TABLE() function locally (which avoids all skew issues).\\n\\nHope this helps.\\n\\nDan\", \"post_time\": \"2013-03-14 19:05:53\" },\n\t{ \"post_id\": 3747, \"topic_id\": 842, \"forum_id\": 8, \"post_subject\": \"count(group) exceed skew limit\", \"username\": \"rachel\", \"post_text\": \"Hi\\nMy code like this \\nr1:=record\\ndataset1.title;\\nn:=count(group);\\nEnd;\\no1=Table(dataset1,r1,number);\\n\\nwhen I do the same thing again count(group)of n from the above result. A system error show exceed skew limit. But o1 is much smaller than dataset1. Why this happened? How to solve it?\\nThank you\", \"post_time\": \"2013-03-14 18:16:55\" },\n\t{ \"post_id\": 3758, \"topic_id\": 844, \"forum_id\": 8, \"post_subject\": \"Re: Hacking Superfiles\", \"username\": \"Durai\", \"post_text\": \"Hi Dan, \\n\\nThanks for the reply. I figured the problem with the help from Joe, Initially when I I wrote the superfile/subfile, I wrote it as indexed file. But when I read it through different eCL code ( not from regular process), I edited and saved it as normal output file. So it complained about index layout mismatch. \\n\\nBy editing the files, I meant exactly do read it and modify it using ECL and writing it/replacing it programmatically. However this was not done through the regular process I am supposed to do, but from outside the process, to test the results for new incoming data. Since I can't change the incoming data, I am modifying the subfile to mimic the process I need to validate. \\n\\nThanks\\nDurai\", \"post_time\": \"2013-03-15 15:03:43\" },\n\t{ \"post_id\": 3754, \"topic_id\": 844, \"forum_id\": 8, \"post_subject\": \"Re: Hacking Superfiles\", \"username\": \"DSC\", \"post_text\": \"The same record layout you use for the superfile should work for the subfile. I've done something similar in the past without a problem.\\n\\nYou might want to compare your ECL code with the actual layout as defined in the Details section for the subfile (from the Browse Logical Files option in ECL Watch). That may highlight any differences. If that fails, post some more information here and we'll take a crack at it.\\n\\n(I doubt you really want to update a subfile, though. Do you mean that you want to read it, create a new subfile through some process, then replace the old subfile with the new one?)\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-03-15 05:08:12\" },\n\t{ \"post_id\": 3753, \"topic_id\": 844, \"forum_id\": 8, \"post_subject\": \"Hacking Superfiles\", \"username\": \"Durai\", \"post_text\": \"Hi,\\n\\nHow do I read a particular subfile directly. I am planning for a testing for which I need to update some data in one of the subfiles. Then add it back to its superfile. \\n\\nWhen I try to access the particular subfile, I get layout mismatch error,though the layout is clearly defined and I validated the same in ESP workunit display page. any insight/solutions is highly appreciated. \\n\\nThanks\\nDurai\", \"post_time\": \"2013-03-15 04:56:28\" },\n\t{ \"post_id\": 3756, \"topic_id\": 845, \"forum_id\": 8, \"post_subject\": \"Re: Error while publishing a query to roxie\", \"username\": \"bforeman\", \"post_text\": \"I've seen this once before, it usually indicates that the Roxie cluster is down. You can still publish...\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-03-15 12:06:38\" },\n\t{ \"post_id\": 3755, \"topic_id\": 845, \"forum_id\": 8, \"post_subject\": \"Error while publishing a query to roxie\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nI encounter an error while publishing the query to roxie.\\n\\n\\nError : Published to Queryset.\\nBut request to update cluster failed.\\n
\\n\\nThe same works in hthor. There were some posts related to the same in github, but couldn't find any solution.\\n\\nKindly suggest some pointers for the same.\\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2013-03-15 07:31:56\" },\n\t{ \"post_id\": 3808, \"topic_id\": 846, \"forum_id\": 8, \"post_subject\": \"Re: why does it cost so long time?\", \"username\": \"bforeman\", \"post_text\": \"Hi Gavin,\\n\\nThe issue I reported says:\\n\\nA simple SUM, followed by a COUNT on the same file.\\n
\\n\\nI thought that was the same thing?\\nI agree that THOR files are much faster than CSV files, but the issue that I was reporting was that a CSV file doubles the time when another aggregate (like SUM) is added to the workunit. With THOR, the time is not doubled and almost just as fast.\\n\\nSorry for any confusion!\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-03-21 18:06:44\" },\n\t{ \"post_id\": 3807, \"topic_id\": 846, \"forum_id\": 8, \"post_subject\": \"Re: why does it cost so long time?\", \"username\": \"ghalliday\", \"post_text\": \"That issue you opened didn't test the same thing.\\n\\nThe claim (I think) was that processing two aggregates is twice as slow as processing one. \\n\\nI can believe that is true for hthor reading from csv, but I would be very surprised if it was true for Tthor.\\n\\nYou can process multiple aggregates at once explicitly by saying\\n\\n\\noutput(TABLE(alldata, { COUNT(GROUP), SUM(GROUP, pv) }));\\n
\", \"post_time\": \"2013-03-21 17:49:38\" },\n\t{ \"post_id\": 3806, \"topic_id\": 846, \"forum_id\": 8, \"post_subject\": \"Re: why does it cost so long time?\", \"username\": \"bforeman\", \"post_text\": \"Hi Gavin,\\n\\nThe same behavior was seen on both targets.\\n\\nI used the #OPTION('pickBestEngine', false), and manually set both targets. Results were the same for THOR and hTHOR.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-03-21 17:41:58\" },\n\t{ \"post_id\": 3805, \"topic_id\": 846, \"forum_id\": 8, \"post_subject\": \"Re: why does it cost so long time?\", \"username\": \"ghalliday\", \"post_text\": \"Are these queries executing in thor or hthor?\", \"post_time\": \"2013-03-21 17:21:39\" },\n\t{ \"post_id\": 3785, \"topic_id\": 846, \"forum_id\": 8, \"post_subject\": \"Re: why does it cost so long time?\", \"username\": \"bforeman\", \"post_text\": \"You can track the progress here, we have opened an issue at the request of the development team.\\n\\nhttps://track.hpccsystems.com/browse/HPCC-8982\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-03-20 11:53:11\" },\n\t{ \"post_id\": 3781, \"topic_id\": 846, \"forum_id\": 8, \"post_subject\": \"Re: why does it cost so long time?\", \"username\": \"maruyue\", \"post_text\": \"Now as to why it does this, I can only say that I see this only with CSV files.
\\n\\nis there some progress? \\n\\nthanks a lot.\", \"post_time\": \"2013-03-20 08:58:55\" },\n\t{ \"post_id\": 3766, \"topic_id\": 846, \"forum_id\": 8, \"post_subject\": \"Re: why does it cost so long time?\", \"username\": \"bforeman\", \"post_text\": \"The answer as to why it is doubling the time can be found in the ECL Watch graph. Look at the Workunit Details and then the graph and it shows that the job is splitting and reading the file twice. \\n\\nNow as to why it does this, I can only say that I see this only with CSV files. Try the same test with THOR files and you will see a completely different graph. Checking with development for more details.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-03-18 14:47:02\" },\n\t{ \"post_id\": 3763, \"topic_id\": 846, \"forum_id\": 8, \"post_subject\": \"why does it cost so long time?\", \"username\": \"maruyue\", \"post_text\": \"1. \\npv_record := RECORD\\n UNSIGNED id1;\\t\\n UNSIGNED id2;\\n UNSIGNED id3;\\n UNSIGNED pv;\\nEND;\\n\\nalldata := DATASET('~test::pv_fact', pv_record, CSV(SEPARATOR('\\\\t')));\\n\\noutput(SUM(alldata, alldata.pv));\\n\\n==== cost 7 secs.\\n\\n2. \\npv_record := RECORD\\n UNSIGNED id1;\\t\\n UNSIGNED id2;\\n UNSIGNED id3;\\n UNSIGNED pv;\\nEND;\\n\\nalldata := DATASET('~test::pv_fact', pv_record, CSV(SEPARATOR('\\\\t')));\\n\\noutput(count(alldata));\\n\\n==== cost 7 secs\\n\\n3. \\n\\npv_record := RECORD\\n UNSIGNED id1;\\t\\n UNSIGNED id2;\\n UNSIGNED id3;\\n UNSIGNED pv;\\nEND;\\n\\nalldata := DATASET('~test::pv_fact', pv_record, CSV(SEPARATOR('\\\\t')));\\n\\noutput(SUM(alldata, alldata.pv));\\noutput(count(alldata));\\n\\n==== cost 14 secs\\n\\n[color=#FF0000:1t61c7n0]why the third ecl program cost 14 secs? it should cost 7 secs. Could you explain the reason?\", \"post_time\": \"2013-03-18 11:07:46\" },\n\t{ \"post_id\": 3774, \"topic_id\": 848, \"forum_id\": 8, \"post_subject\": \"Re: how to get each word in a string\", \"username\": \"rtaylor\", \"post_text\": \"Rachel,\\n\\nOK, you don't need any TRANSFORM. the SplitWords function returns a SET OF STRING, so in order to treat that set as a DATASET, you can simply define it as one, like this:IMPORT std;\\n\\ns1 := 'hpcc systems community forum';\\ns2 := 'advanced forum home';\\n\\nds(STRING s) := DATASET(Std.Str.SplitWords(s,' '),{STRING word});\\n\\nds(s1);\\nds(s2);
HTH,\\n\\nRichard\", \"post_time\": \"2013-03-18 20:53:56\" },\n\t{ \"post_id\": 3772, \"topic_id\": 848, \"forum_id\": 8, \"post_subject\": \"Re: how to get each word in a string\", \"username\": \"rachel\", \"post_text\": \"Hi Richard,\\nI know the splitwords function. I am very comfused about how to write the transform. \\nI write code like this \\nIMPORT std.Str as Str;\\n \\n Preclean(STRING s):=FUNCTION \\n\\t\\tsplit := Str.SplitWords(s,' ');\\n\\t\\tRETURN\\tsplit;\\n\\tEND;\\n\\t\\n\\tLayout:=RECORD \\n\\tstring100 name;\\n\\tEND;\\n\\t\\nLayout TC(File L) := TRANSFORM\\n SELF. name:= $.Preclean(L.names);\\n\\tSELF := L;\\nEND;\\n\\nBut the transform is wrong. The result of preclean is set of strings, but the name here is string. So my problem is how to transform it to string?\\n\\nThanks \\n\\n[quote="rtaylor":1mb3nhfl]Rachel,\\n\\nYou can simply use the STD.Str.SplitWords function from our standard library. The documentation is in the IDE's help file (press F1).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-03-18 18:19:24\" },\n\t{ \"post_id\": 3771, \"topic_id\": 848, \"forum_id\": 8, \"post_subject\": \"Re: how to get each word in a string\", \"username\": \"bforeman\", \"post_text\": \"What Richard said, or here's another way:\\n\\n InputDS := DATASET([{'hpcc systems community forum'},\\n {'advanced forum home'}],{STRING100 inline});\\n\\n\\n PATTERN Ltrs := PATTERN('[A-Za-z]');\\n PATTERN Char := Ltrs | '-' | '\\\\'';\\n TOKEN Word := Char+;\\n \\t\\n\\n res := PARSE(Inputds,inline,Word,{STRING100 Pword := MATCHTEXT(Word)});\\n\\t\\n\\tres;
\\n\\n Regards,\\n\\n Bob\", \"post_time\": \"2013-03-18 18:18:45\" },\n\t{ \"post_id\": 3770, \"topic_id\": 848, \"forum_id\": 8, \"post_subject\": \"Re: how to get each word in a string\", \"username\": \"rtaylor\", \"post_text\": \"Rachel,\\n\\nYou can simply use the STD.Str.SplitWords function from our standard library. The documentation is in the IDE's help file (press F1).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-03-18 18:10:34\" },\n\t{ \"post_id\": 3769, \"topic_id\": 848, \"forum_id\": 8, \"post_subject\": \"how to get each word in a string\", \"username\": \"rachel\", \"post_text\": \"Hi\\n\\nA dataset like\\n{ 'hpcc systems community forum',\\n 'advanced forum home' , String100\\n};\\neach word is seperated by space\\nI want to pick each word, and build a new data set like \\n{ 'hpcc','systems','community','forum','advanced','forum','home',String100}\\n\\nHow to write the transform? \\nThanks\", \"post_time\": \"2013-03-18 17:14:56\" },\n\t{ \"post_id\": 3825, \"topic_id\": 850, \"forum_id\": 8, \"post_subject\": \"Re: packagemap issue(s)\", \"username\": \"kaliyugantagonist\", \"post_text\": \"Hi,\\n\\nI'm in process of creating and testing multiple scenarios involving Roxie data update using packagemap.\\n\\nAs per the doc.(RDDERef-3.10.4-1.pdf) :\\n\\nThe definition of a superfile or superkey inside of a package file overrides the definition in Dali, but does NOT change the superfile or superkey definition in Dali Server's DFU.
\\n\\nAs expected, I couldn't retrieve the latest contents of the super-key - on Thor, I used :\\n\\n\\nIMPORT STD;\\n\\nsuperKeyName := '~test::superkey::facebook';\\nOUTPUT(STD.File.SuperFileContents(superKeyName));\\n
\\n\\nwhich didn't return the latest contents(the recently pushed payload INDEX)\\n\\nand on Roxie :\\n\\n\\nIMPORT STD;\\n\\nEXPORT echoSFContents() := FUNCTION\\nSTRING superKeyName := '~test::superkey::facebook';\\nreturn STD.File.SuperFileContents(superKeyName);\\nEND;\\n
\\nI got this error :\\n\\nReported by: Roxie\\nMessage: UNIMPLEMENTED at /var/jenkins/workspace/CE-Candidate-3.10.4/CE/ubuntu_12_04_x86_64/HPCC-Platform/roxie/ccd/ccdserver.cpp:30268\\n
\\n\\nIs it possible to read the current/latest definition of the super-key from the Dali server? Can this information be made available on Thor?\\n\\nThanks and regards !!!\", \"post_time\": \"2013-03-25 11:19:45\" },\n\t{ \"post_id\": 3790, \"topic_id\": 850, \"forum_id\": 8, \"post_subject\": \"Re: packagemap issue(s)\", \"username\": \"kaliyugantagonist\", \"post_text\": \"Hi Dan,\\n\\nYeah, I had read the doc. and knew what you are pointing at but suppose at some point of time, I need to find out that which all sub-files(payload INDEXes) are the part of a super-key, how do I get it. Since the lock is owned by Roxie, one can't start a super-file transaction and update the key - packagemap is the only way out !\\n\\nSurely, I'll reproduce the code and the steps, also, I have some more doubts related to packagemap which would be better explained with code.\\n\\nThanks and regards !!!\", \"post_time\": \"2013-03-20 15:45:01\" },\n\t{ \"post_id\": 3786, \"topic_id\": 850, \"forum_id\": 8, \"post_subject\": \"Re: packagemap issue(s)\", \"username\": \"DSC\", \"post_text\": \"[quote="kaliyugantagonist":2zar0ogk]What exactly happens when a package file is executed using packagemap? Why is it that though the data pertaining to the new INDEX is reflected in the Roxie query, the INDEX doesn't become a part of the super-key?\\nIn the "Rapid Data Delivery Engine Reference" (found here for version 3.10.4) there is a section on Packages and Packagemaps. On the first page of that section there is this paragraph:\\n\\nThe definition of a superfile or superkey inside of a package file overrides the definition in Dali, but does NOT change the superfile or superkey definition in Dali Server's DFU.
\\nSo, it appears that the behavior you see is by design. I wish it were otherwise.\\n\\nCan you by chance publish a small, complete example for maintaining a superkey using packages and packagemaps? That would help everyone (or maybe just myself) that knows that the feature exists but is unsure what the exact steps are to implement it.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-03-20 12:06:03\" },\n\t{ \"post_id\": 3780, \"topic_id\": 850, \"forum_id\": 8, \"post_subject\": \"packagemap issue(s)\", \"username\": \"kaliyugantagonist\", \"post_text\": \"Hi,\\n\\nOur (business)application scenario is explained in the below post(please ignore the question itself, the link is just to provide the usage scenario) :\\n\\nhttps://hpccsystems.com/bb/viewtopic.php?f=8&t=830\\n\\nIn general, whenever a super-file/super-key is viewed via ECL Watch/Browse Logical Files, its 'cluster' column is blank - why is it so? If I'm creating my super-file and my super-key on Thor and the latter is then pushed to Roxie, the ECL Watch must reflect the cluster for the same.\\n\\nSo we are using packagemap to push the new INDEXes to Roxie so that the Roxie queries can return the latest data. A sample file is :\\n\\n<RoxiePackages>\\n<Package id="facebook_get_date_sentiment.1">\\n<Base id="sapphire::superkey::facebook"/>\\n</Package>\\n<Package id="facebook_get_top_negative_comments.1">\\n<Base id="sapphire::superkey::facebook"/>\\n</Package>\\n<Package id="facebook_get_top_positive_comments.1">\\n<Base id="sapphire::superkey::facebook"/>\\n</Package>\\n<Package id="facebook_select_count_and_sentiment_by_date.1">\\n<Base id="sapphire::superkey::facebook"/>\\n</Package>\\n<Package id="facebook_show_comments_with_sentiment.1">\\n<Base id="sapphire::superkey::facebook"/>\\n</Package>\\n<Package id="sapphire::superkey::facebook">\\n<SuperFile id="~sapphire::superkey::facebook">\\n<SubFile value="~sapphire::index::facebook::facebook_20130319144237967"/>\\n</SuperFile>\\n</Package>
\\n\\nAs seen, a new INDEX sapphire::index::facebook::facebook_20130319144237967 is being added to a super-key sapphire::superkey::facebook. This operation succeeds - ECL Watch shows the INDEX present on both Thor & Roxie, the data is reflected in the Roxie queries. But, if I do a 'View Details' on the super-key, it still doesn't reflect the new INDEX - why is this so? \\nI tried using \\nds := STD.File.SuperFileContents('~sapphire::superkey::facebook'); \\n
\\n\\non both Thor(where the new INDEX name is not returned) and Roxie :\\n\\nException\\nReported by: Roxie\\nMessage: UNIMPLEMENTED at /var/jenkins/workspace/CE-Candidate-3.10.4/CE/ubuntu_12_04_x86_64/HPCC-Platform/roxie/ccd/ccdserver.cpp:30268
\\n\\nWhat exactly happens when a package file is executed using packagemap? Why is it that though the data pertaining to the new INDEX is reflected in the Roxie query, the INDEX doesn't become a part of the super-key?\\n\\nThanks and regards !\", \"post_time\": \"2013-03-20 06:25:50\" },\n\t{ \"post_id\": 3802, \"topic_id\": 851, \"forum_id\": 8, \"post_subject\": \"Re: Time Conversion\", \"username\": \"richardkchapman\", \"post_text\": \"The behaviour of the compiler is by design - the length is passed to the inline C++ separately in a parameter called lenXXXX (where XXXX is the name of the STRING parameter).\\n\\nI'll look into the documentation to make sure it makes that clear. \\n\\nI find when writing inline C++ the best approach is to look at the generated C++ code (look for functions called user1, user2 etc) to get the exact c++ prototype that is generated.\", \"post_time\": \"2013-03-21 16:02:35\" },\n\t{ \"post_id\": 3801, \"topic_id\": 851, \"forum_id\": 8, \"post_subject\": \"Re: Time Conversion\", \"username\": \"DSC\", \"post_text\": \"Great!\\n\\nI opened a bug for this issue: https://track.hpccsystems.com/browse/HPCC-9019. It may be a documentation bug or a code-generator bug, but I think it's a bug regardless.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-03-21 15:15:23\" },\n\t{ \"post_id\": 3799, \"topic_id\": 851, \"forum_id\": 8, \"post_subject\": \"Re: Time Conversion\", \"username\": \"ksviswa\", \"post_text\": \"Dan,\\n\\nThanks a lot.. it worked..didnt think about this at all..\", \"post_time\": \"2013-03-21 15:08:09\" },\n\t{ \"post_id\": 3797, \"topic_id\": 851, \"forum_id\": 8, \"post_subject\": \"Re: Time Conversion\", \"username\": \"DSC\", \"post_text\": \"I needed more coffee before replying the first time. I was able to duplicate your results.\\n\\nI did find the problem, though: Change the declaration of the C++ function from:\\n\\n
EXPORT STRING StringFormatTimestamp(UNSIGNED4 timestamp,\\n STRING format = '%FT%T',\\n BOOLEAN uselocaltimezone = FALSE) := BEGINC++
\\nTo:\\n\\nEXPORT STRING StringFormatTimestamp(UNSIGNED4 timestamp,\\n VARSTRING format = '%FT%T',\\n BOOLEAN uselocaltimezone = FALSE) := BEGINC++
\\nVARSTRING is a null-terminated string, but I would have thought that the ECL compiler would have translated the incoming STRING to a VARSTRING. At any rate, once I made that change your code spit out the right values, at least on my system.\\n\\nHope this helps.\\n\\nDan\", \"post_time\": \"2013-03-21 14:41:17\" },\n\t{ \"post_id\": 3795, \"topic_id\": 851, \"forum_id\": 8, \"post_subject\": \"Re: Time Conversion\", \"username\": \"ksviswa\", \"post_text\": \"Dan,\\n\\nIts still the same no luck..\\n\\nI think each time in the transform function '\\\\n' gets appended to the value. \\n\\nIs there anyway we can clear the buffer in transform function...?\\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2013-03-21 14:26:21\" },\n\t{ \"post_id\": 3793, \"topic_id\": 851, \"forum_id\": 8, \"post_subject\": \"Re: Time Conversion\", \"username\": \"DSC\", \"post_text\": \"Interesting. I wasn't able to get the same incorrect results on my cluster, but that may be due to different distributions or library versions. I'm testing on RHEL6.\\n\\nIt appears that strftime() is not thread safe, as it calls getenv() internally. That may have something to do with the garbage characters, but that seems odd. Still, threading issues can cause some strange results.\\n\\nHere is a modified version of the function I posted. This one uses a mutex to protect the call to strftime(), initializes the buffer to zero, and also checks the return value for strftime(). Give it a shot and let me know how it works in your environment.\\n\\n
EXPORT STRING StringFormatTimestamp(UNSIGNED4 timestamp,\\n STRING format = '%FT%T',\\n BOOLEAN uselocaltimezone = FALSE) := BEGINC++\\n #option pure\\n #option once\\n #include <time.h>\\n #include <pthread.h>\\n #body\\n \\n struct tm timeComponents;\\n time_t theTime = timestamp;\\n int kBufferSize = 256;\\n char buffer[kBufferSize];\\n pthread_mutex_t strftimeMutex = PTHREAD_MUTEX_INITIALIZER;\\n \\n memset(buffer,kBufferSize,0);\\n \\n if (uselocaltimezone)\\n {\\n localtime_r(&theTime,&timeComponents);\\n }\\n else\\n {\\n gmtime_r(&theTime,&timeComponents);\\n }\\n \\n pthread_mutex_lock(&strftimeMutex);\\n if (strftime(buffer,kBufferSize,format,&timeComponents) == 0)\\n {\\n buffer[0] = '\\\\0';\\n }\\n pthread_mutex_unlock(&strftimeMutex);\\n \\n __lenResult = strlen(buffer);\\n __result = NULL;\\n \\n if (__lenResult > 0)\\n {\\n __result = reinterpret_cast<char*>(rtlMalloc(__lenResult));\\n memcpy(__result,buffer,__lenResult);\\n }\\nENDC++;
\\nCheers,\\n\\nDan\", \"post_time\": \"2013-03-21 12:02:15\" },\n\t{ \"post_id\": 3791, \"topic_id\": 851, \"forum_id\": 8, \"post_subject\": \"Re: Time Conversion\", \"username\": \"ksviswa\", \"post_text\": \"Dan,\\n\\nEncountered a strange error when i used this function.\\n\\n\\n\\ntimestamp := RECORD\\n\\tSTRING strTime;\\n\\tSTRING strFormat;\\n\\tUNSIGNED4 epochtime;\\nEND;\\n\\ntst:=DATASET([\\n\\t{'May-12-2013 14:33', '%b-%d-%Y %H:%M', 0}\\n\\t,{'May-12-2013 14:33:01 -0400', '%b-%d-%Y %H:%M:%S %z', 0}\\n\\t,{'2013-05-12 14:33:23 -0700', '%Y-%m-%d %H:%M:%S %z', 0}\\n\\t,{'2013-05-12 14:33:23 GMT', '%Y-%m-%d %H:%M:%S %Z', 0}\\n\\t,{'2013-05-12 14:33:23 +0000', '%Y-%m-%d %H:%M:%S %z', 0}\\n], timestamp);\\n\\n\\nUNSIGNED4 ConvertToEpochTime(STRING timestamp, STRING format = '%FT%T') := BEGINC++\\n\\t time_t timeinsecs=0LL;\\n\\t struct tm theTime;\\n\\t memset(&theTime, 0, sizeof(struct tm));\\n\\t \\n\\t if( strptime(timestamp, format, &theTime) == NULL) {\\n\\t printf("error\\\\n");\\n }\\n\\t timeinsecs = mktime(&theTime);\\n\\t \\n return timeinsecs;\\nENDC++;\\n\\n\\ntimestamp epochTime(timestamp ts) := TRANSFORM\\n\\tSELF.strTime := ts.strTime;\\n\\tSELF.strFormat := ts.strFormat;\\n\\tSELF.epochTime := ConvertToEpochTime(ts.strTime, ts.strFormat);\\nEND;\\n\\n\\ntstEpoch:=PROJECT(tst, epochTime(LEFT));\\noutput(tstEpoch);\\n\\nSTRING ConvertFromEpochTime(UNSIGNED4 timestamp,\\n STRING format = '%FT%T') := BEGINC++\\n \\n struct tm timeComponents;\\n time_t theTime = timestamp;\\n int kBufferSize = 256;\\n char buffer[kBufferSize];\\n \\n \\n gmtime_r(&theTime,&timeComponents);\\n \\n strftime(buffer,kBufferSize,format,&timeComponents);\\n \\n __lenResult = strlen(buffer);\\n __result = NULL;\\n \\n if (__lenResult > 0)\\n {\\n __result = reinterpret_cast<char*>(rtlMalloc(__lenResult));\\n memcpy(__result,buffer,__lenResult);\\n }\\nENDC++;\\n\\n\\ntimeresult := RECORD\\n\\ttimestamp;\\n\\tSTRING resultString;\\nEND;\\n\\ntimeresult StringTime(timestamp ts) := TRANSFORM\\n\\tSELF.strTime := ts.strTime;\\n\\tSELF.strFormat := ts.strFormat;\\n\\tSELF.epochTime := ts.epochTime;\\n\\tSELF.resultString := ConvertFromEpochTime(ts.epochTime,ts.strFormat);\\nEND;\\n\\ntestresult := PROJECT(tstEpoch, StringTime(LEFT));\\n\\noutput(testresult);\\n\\n
\\n\\nThe code works properly in the virtual image locally, But in case of a cluster i get some junk characters appended at the end of the result string.\\n\\n\\n2013-05-12 14:33:23 GMT\\t%Y-%m-%d %H:%M:%S %Z\\t1368387203\\t2013-05-12 19:33:23 GMT[b][u]îQ[/u][/b]\\n2013-05-12 14:33:23 -0700\\t%Y-%m-%d %H:%M:%S %z\\t1368387203\\t2013-05-12 19:33:23 +0000[b][u]îQ[/u][/b]\\n2013-05-12 14:33:23 +0000\\t%Y-%m-%d %H:%M:%S %z\\t1368387203\\t2013-05-12 19:33:23 +0000[b][u]îQ[/u][/b]\\nMay-12-2013 14:33\\t%b-%d-%Y %H:%M\\t1368387180\\tMay-12-2013 19:33[b][u]lîQ[/u][/b]\\nMay-12-2013 14:33:01 -0400\\t%b-%d-%Y %H:%M:%S %z\\t1368387181\\tMay-12-2013 19:33:01 +0000[b][u]mîQ[/u][/b]\\n
\\n\\nIn case i don't pass any format or pass any default format in the transform function, i get the results properly. Not sure if i am missing out something really simple..\\n\\nThanks a lot in advance..\\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2013-03-21 07:09:13\" },\n\t{ \"post_id\": 3789, \"topic_id\": 851, \"forum_id\": 8, \"post_subject\": \"Re: Time Conversion\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nThanks a lot Dan..
\\n\\nRegards,\\nksviswa\", \"post_time\": \"2013-03-20 15:27:14\" },\n\t{ \"post_id\": 3787, \"topic_id\": 851, \"forum_id\": 8, \"post_subject\": \"Re: Time Conversion\", \"username\": \"DSC\", \"post_text\": \"I have a set of functions for manipulating time, and I wound up using inline C++ functions for that purpose. While I don't have anything that converts something like "Tue, 19 Mar 2013 05:11:54 GMT" to epoch time, I do have the reverse (timestamp to human-readable string, via strftime()):\\n\\n
//==========================================================================\\n// StringFormatTimestamp\\n//\\n// timestamp: Integer representing the number of seconds since\\n// midnight on Jan. 1, 1970 UTC.\\n// format: The string format to use to create the result. See\\n// man page for strftime. Optional, defaults to '%FT%T'\\n// which is the date and time in YYYY-MM-DDTHH:MM:SS\\n// format.\\n// useLocalTimeZone: If TRUE, the timestamp is converted to local time\\n// during the conversion to a readable value.\\n// Optional, defaults to FALSE.\\n// \\n// Returns: The given time converted to a readable date/time,\\n// depending on the value of the format argument.\\n//==========================================================================\\nEXPORT STRING StringFormatTimestamp(UNSIGNED4 timestamp,\\n STRING format = '%FT%T',\\n BOOLEAN uselocaltimezone = FALSE) := BEGINC++\\n #option pure\\n #option once\\n #include <time.h>\\n #body\\n struct tm timeComponents;\\n time_t theTime = timestamp;\\n int kBufferSize = 256;\\n char buffer[kBufferSize];\\n \\n if (uselocaltimezone)\\n {\\n localtime_r(&theTime,&timeComponents);\\n }\\n else\\n {\\n gmtime_r(&theTime,&timeComponents);\\n }\\n \\n strftime(buffer,kBufferSize,format,&timeComponents);\\n \\n __lenResult = strlen(buffer);\\n __result = NULL;\\n \\n if (__lenResult > 0)\\n {\\n __result = reinterpret_cast<char*>(rtlMalloc(__lenResult));\\n memcpy(__result,buffer,__lenResult);\\n }\\nENDC++;
\\nMy convert-to-epoch-time functions all use individual date/time components as arguments. Well, I do have one for a UTC datetime string (YYYY-MM-DDTHH:MM:SS).\\n\\nHope this helps.\\n\\nDan\", \"post_time\": \"2013-03-20 12:12:56\" },\n\t{ \"post_id\": 3782, \"topic_id\": 851, \"forum_id\": 8, \"post_subject\": \"Time Conversion\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nWhat is the best way to convert a date to an epoch time and vice versa..?\\n\\n\\n\\nEx: Date String : "Tue, 19 Mar 2013 05:11:54 GMT"\\n Epoch Time in Seconds : 1363669914\\n\\n Epoch Time : 1363669914\\n Date String : "Tue, 19 Mar 2013 05:11:54 GMT"\\n\\n
\\n\\nIs there a built in function for the same or any standard library function available..?\\n\\nI came across a way where we add inline C++ to ECL code. Is that the only way..?\\n\\nKindly help.\\n\\nThanks a lot in advance.\\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2013-03-20 09:13:09\" },\n\t{ \"post_id\": 3860, \"topic_id\": 853, \"forum_id\": 8, \"post_subject\": \"Re: Multilingual File problem\", \"username\": \"prachi\", \"post_text\": \"Hi Dan,\\n\\nMultilingual data is now shown properly in ECL Watch and WsECL.\\nI am now using UNICODE as datatype for multilingual col and UNICODE option in DATASET as well.\", \"post_time\": \"2013-04-01 03:58:57\" },\n\t{ \"post_id\": 3794, \"topic_id\": 853, \"forum_id\": 8, \"post_subject\": \"Re: Multilingual File problem\", \"username\": \"DSC\", \"post_text\": \"Two questions:\\n\\n1) Your initial read of the CSV file, into File_Layout_Subfile_Dataset, does not contain a UNICODE option but after you create the output file you're reading that output file with the UNICODE option. Was that intentional?\\n\\n2) Why are you writing the output file as CSV? Writing it as a THOR/FLAT file would be much more efficient, and you have a record structure that would easily support that.\\n\\nDan\", \"post_time\": \"2013-03-21 12:08:09\" },\n\t{ \"post_id\": 3792, \"topic_id\": 853, \"forum_id\": 8, \"post_subject\": \"Multilingual File problem\", \"username\": \"prachi\", \"post_text\": \"Hi,\\n\\nA CSV file (Unicode UTF-8) which consists multilingual data is sprayed and written to disk as logical file properly.But when i am creating INDEX on logical file, the data is not shown properly. I am building INDEX to fetch data from INDEX (as DATASET) in ROXIE queries.\\n\\nSteps performed:\\n1.Spraying file through DFUPLUS (which gives option for encoding-UTF8).\\n2.Writing logical file by reading sprayed file.\\n3.Creating INDEX on logical file.\\n\\n\\nCode of dfuplus:\\ndfuplus server=172.25.37.135 username=root password=newuser_123 action=spray srcip=172.25.37.135 srcfile=/var/lib/HPCCSystems/mydropzone/twitterdata_user/twitterdata_user.csv\\ndstcluster=mythor dstname=~multilingual::sprayed_datafile_csv format=csv encoding=utf8 separator=;\\n
\\n\\nECL code to write logical file:\\nIMPORT STD;\\n\\nLayout_twitterdata_user := RECORD\\nSTRING100 UserID;\\nSTRING100 SearchKeyWord;\\nUNICODE5000 TwitterText;\\nSTRING100 Sentiment;\\nSTRING50 Date;\\nSTRING50 Male_per;\\nSTRING50 Female_per;\\nSTRING100 TwitterUser;\\nINTEGER8 TwitterUniquID;\\nSTRING100 TwitterLocation;\\nINTEGER8 InsertID;\\nSTRING100 CountryLocation;\\nSTRING5000 TwitterText_Sentiment;\\nINTEGER8 TwitterUserID;\\nSTRING5000 IdeaCloudWord;\\nSTRING5000 IdeaCloudeText;\\nSTRING1500 DetectedLanguage;\\nEND;\\n\\nFile_Layout_Subfile_Dataset :=\\nDATASET('~multilingual::sprayed_datafile_csv',Layout_twitterdata_user,CSV(SEPARATOR(';')));\\n\\n/*create logical file with record structure*/\\nsubfileCreation := OUTPUT(File_Layout_Subfile_Dataset,,'~multilingual::csv_newrecords',CSV,OVERWRITE);\\n\\nsubfileCreation;\\n
\\n\\n\\nECL code to build INDEX file:\\nLayout_twitterdata_user := RECORD\\nSTRING100 UserID;\\nSTRING100 SearchKeyWord;\\nUNICODE5000 TwitterText;\\nSTRING100 Sentiment;\\nSTRING50 Date;\\nSTRING50 Male_per;\\nSTRING50 Female_per;\\nSTRING100 TwitterUser;\\nINTEGER8 TwitterUniquID;\\nSTRING100 TwitterLocation;\\nINTEGER8 InsertID;\\nSTRING100 CountryLocation;\\nSTRING5000 TwitterText_Sentiment;\\nINTEGER8 TwitterUserID;\\nSTRING5000 IdeaCloudWord;\\nSTRING5000 IdeaCloudeText;\\nSTRING1500 DetectedLanguage;\\nEND;\\n\\n\\n\\nSuperFile_Dataset := DATASET('~multilingual::csv_newrecords',{Layout_twitterdata_user,UNSIGNED8 fpos{virtual(fileposition)}},CSV(UNICODE));\\n\\nIDX_SuperFile := INDEX(SuperFile_Dataset,{UserID},\\n{SearchKeyWord,TwitterText,Sentiment,Date,Male_per,Female_per,TwitterUser,TwitterUniquID,TwitterLocation,\\nInsertID,CountryLocation,TwitterText_Sentiment,TwitterUserID,\\nIdeaCloudWord,IdeaCloudeText,DetectedLanguage,fpos},'~multilingual::datafile_index');\\nidx := BUILDINDEX(IDX_SuperFile,OVERWRITE);\\n\\nidx;\\n
\", \"post_time\": \"2013-03-21 11:56:08\" },\n\t{ \"post_id\": 3869, \"topic_id\": 857, \"forum_id\": 8, \"post_subject\": \"Re: ToXML producing empty tags\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI for one think the only viable option is a qualifier to 'toxml' allowing the supression of empty tags. (Note Perl has a 'supressempty' option)\\n\\nREGEXREPLACE cannot be fullproof, as there are too many situations to consider.\\nEscape Characters\\nTags inside quotes (so not tags)\\nother constructs e.g. CDATA\\n\\nIt's a common requirment, and will be faster if built into ECL/ECL libraries.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2013-04-02 08:43:46\" },\n\t{ \"post_id\": 3829, \"topic_id\": 857, \"forum_id\": 8, \"post_subject\": \"Re: ToXML producing empty tags\", \"username\": \"bforeman\", \"post_text\": \"Not that I know of, unless the entire field was always blank on export.\\nMaybe you could try filtering the recordset prior to export to xml, but it may be a wasted effort as you need all records regardless of values.\\n\\nBob\", \"post_time\": \"2013-03-25 17:02:15\" },\n\t{ \"post_id\": 3828, \"topic_id\": 857, \"forum_id\": 8, \"post_subject\": \"Re: ToXML producing empty tags\", \"username\": \"ahmedsha\", \"post_text\": \"Thanks Bob for responding.\\n\\nThe tags are not always blank - which tags are empty and which hold values varies from query to query.\\n\\nWe have already experimented with REGEXREPLACE. It works but, ideally, we would like to avoid generating them in the first place.\\n\\nI guess the answer is there is no option to do this?\", \"post_time\": \"2013-03-25 15:37:00\" },\n\t{ \"post_id\": 3827, \"topic_id\": 857, \"forum_id\": 8, \"post_subject\": \"Re: ToXML producing empty tags\", \"username\": \"bforeman\", \"post_text\": \"You could try pre-processing or post-processing the data.\\n\\nIf the tags are always blank, could you just create a TABLE that contains the information you need, and then export that toxml?\\n\\nOr, with post processing, you could possibly just PARSE the XML and only extract what you need in a new output, or maybe just a simple REGEXREPLACE (look for the empty tags and replace with a blank string). \\n\\nI think you have a few options to try \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-03-25 15:31:18\" },\n\t{ \"post_id\": 3810, \"topic_id\": 857, \"forum_id\": 8, \"post_subject\": \"ToXML producing empty tags\", \"username\": \"ahmedsha\", \"post_text\": \"When we use toxml() produce XML, all empty tags are included. \\n\\nThe empty tags form 90% of the xml structure. Having to store such a large structure that is nearly entirely empty tags, means that cluster is substantially larger than it has to be.\\n\\nIs there a way to get toxml() to exclude all empty tags?\\n\\nThanks\", \"post_time\": \"2013-03-22 10:56:19\" },\n\t{ \"post_id\": 3843, \"topic_id\": 858, \"forum_id\": 8, \"post_subject\": \"Re: how to replace stop word\", \"username\": \"rtaylor\", \"post_text\": \"You could try using STD.Str.FindReplace instead of REGEXREPLACE\", \"post_time\": \"2013-03-28 15:45:25\" },\n\t{ \"post_id\": 3842, \"topic_id\": 858, \"forum_id\": 8, \"post_subject\": \"Re: how to replace stop word\", \"username\": \"rachel\", \"post_text\": \"so, if don't use REGEXREPLACE. How to realize it by other methods?\", \"post_time\": \"2013-03-28 12:47:36\" },\n\t{ \"post_id\": 3834, \"topic_id\": 858, \"forum_id\": 8, \"post_subject\": \"Re: how to replace stop word\", \"username\": \"rtaylor\", \"post_text\": \"rachel,\\n\\nAre you reading from and trying to write to the same filename? If so, that's not allowed in HPCC. Try changing the name of the file to OUTPUT.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-03-26 20:22:42\" },\n\t{ \"post_id\": 3833, \"topic_id\": 858, \"forum_id\": 8, \"post_subject\": \"Re: how to replace stop word\", \"username\": \"rachel\", \"post_text\": \"but it seems some problem when I replace stopwords dataset by a dataset created from a file. syntax is correct, but output nothing for stopWordRegex.\", \"post_time\": \"2013-03-26 18:07:57\" },\n\t{ \"post_id\": 3820, \"topic_id\": 858, \"forum_id\": 8, \"post_subject\": \"Re: how to replace stop word\", \"username\": \"DSC\", \"post_text\": \"Here is one way to do it, using REGEXREPLACE():\\n\\n
WordsRec := RECORD\\n STRING w;\\nEND;\\n\\nstopwords := DATASET\\n (\\n [\\n {'of'},{'the'},{'a'},{'for'},{'and'}\\n ],\\n WordsRec\\n );\\n\\n// Combine the stop words into a single regex pattern\\nWordsRec CombineStopWords(WordsRec l, WordsRec r) := TRANSFORM\\n SELF.w := l.w + IF(l.w != '', '|', '') + r.w;\\nEND;\\n\\ncombinedStopWords := ROLLUP\\n (\\n stopwords,\\n TRUE,\\n CombineStopWords(LEFT,RIGHT)\\n );\\n\\nstopWordRegex := '\\\\\\\\b(' + combinedStopWords[1].w + ')\\\\\\\\b';\\n\\nOUTPUT(stopWordRegex,NAMED('stopWordRegex'));\\n\\nDescRec := RECORD\\n STRING name;\\n STRING pro;\\nEND;\\n\\ndes := DATASET\\n (\\n [\\n {'mike','a lawer in the goverment'},\\n {'john','a professor in the state university'}\\n ],\\n DescRec\\n );\\n\\nOUTPUT(des,NAMED('des'));\\n\\n// Apply the regex against the data\\nDescRec RemoveStopWords(DescRec l) := TRANSFORM\\n SELF.pro := REGEXREPLACE(stopWordRegex,l.pro,'',NOCASE);\\n SELF := l;\\nEND;\\n\\nnewDes := PROJECT(des,RemoveStopWords(LEFT));\\n\\nOUTPUT(newDes,NAMED('newDes'));\\n
\\n\\nHope this helps.\\n\\nDan\", \"post_time\": \"2013-03-23 13:58:11\" },\n\t{ \"post_id\": 3814, \"topic_id\": 858, \"forum_id\": 8, \"post_subject\": \"how to replace stop word\", \"username\": \"rachel\", \"post_text\": \"the stop words like\\nstopwords:=DATASET ([{'of'},{'the'},{'a'},{'for'},{'and'}], STRING5 key});\\ndata file like des:=DATASET ([{'mike','a lawer in the goverment'},{'john','a professor in the state university'}],{string5 name;string20 pro}); \\n\\nHow to replace stop word in data as space'' ?\\n\\nThanks\", \"post_time\": \"2013-03-22 15:23:30\" },\n\t{ \"post_id\": 3886, \"topic_id\": 859, \"forum_id\": 8, \"post_subject\": \"Re: BACKUP of Cluster\", \"username\": \"ultima_centauri\", \"post_text\": \"In Configuration Manager, under the Agents configuration tab; the full redundancy option will provide you "data backup" and in this case that means that you'll effectively be using twice the disk space on each of the nodes, since the that is replicated. E.g. In a 10 node myroxie cluster: node 1 and 6 will have each others data, 2 and 7 share, etc.\", \"post_time\": \"2013-04-05 00:07:13\" },\n\t{ \"post_id\": 3841, \"topic_id\": 859, \"forum_id\": 8, \"post_subject\": \"Re: BACKUP of Cluster\", \"username\": \"DSC\", \"post_text\": \"A little more information can be found in another old post: http://hpccsystems.com/bb/viewtopic.php?f=14&t=634.\\n\\nHope this helps.\\n\\nDan\", \"post_time\": \"2013-03-28 11:37:48\" },\n\t{ \"post_id\": 3838, \"topic_id\": 859, \"forum_id\": 8, \"post_subject\": \"Re: BACKUP of Cluster\", \"username\": \"ultima_centauri\", \"post_text\": \"In regards the first question check the thread below:\\n\\nviewtopic.php?f=15&t=321\\n\\n\\nHow to swap a node with other node in case of :\\n1. A node fails on which Mydali is running
\\n\\nIf you refer to the "SwapNode" option that only applies to the mythor slave nodes, it doesn't apply for mydali or mythor master, as far as the myroxie nodes, I will need to do a little more research on the agents configuration option under myroxie in configmanager.\", \"post_time\": \"2013-03-28 00:50:54\" },\n\t{ \"post_id\": 3826, \"topic_id\": 859, \"forum_id\": 8, \"post_subject\": \"BACKUP of Cluster\", \"username\": \"prachi\", \"post_text\": \"Hi,\\n\\nI have large amount of data (INDEXes, Superfiles, Superkeys) on 4 node HPCC cluster. If situation arises in which a node or all 4 nodes crashes/closed down(shutdown)/ip changed, then :\\n\\n1. How to take backup of complete cluster such that in above situation we are able to restore the queries and data. Please elaborate ThorCluster Backup topic in UsingConfigManager.pdf\\n\\n2. How to swap a node with other node in case of :\\n1. A node fails on which Mydali is running\\n2. A node fails on which MyThor/MyRoxie master are running and so on
\\n\\nThanks and Regards!!\", \"post_time\": \"2013-03-25 11:56:41\" },\n\t{ \"post_id\": 3840, \"topic_id\": 861, \"forum_id\": 8, \"post_subject\": \"Re: How to stop thor slaves\", \"username\": \"DSC\", \"post_text\": \"I supposed you could go to each node and use the Linux kill command for that. But why would you kill the slaves and not the master? The master without slaves is relatively useless (as well as the converse). If you're goal is to reset Thor then you should probably reset the entire Thor system rather than just the slaves.\\n\\nAlso, if you kill only the slaves then eventually the watchdog process will kick in and restart them.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-03-28 11:33:44\" },\n\t{ \"post_id\": 3839, \"topic_id\": 861, \"forum_id\": 8, \"post_subject\": \"Re: How to stop thor slaves\", \"username\": \"prachi\", \"post_text\": \"Hi Dan,\\n\\nIf i need to stop Thor master and slaves i will use the command which you mentioned above.\\n\\nBut my requirement is to stop slave/slaves (currently having 2 slaves on each 2 diff nodes) and not Thor master i.e MyThor.\\n\\nWhich command to use for this?\", \"post_time\": \"2013-03-28 06:00:53\" },\n\t{ \"post_id\": 3832, \"topic_id\": 861, \"forum_id\": 8, \"post_subject\": \"Re: How to stop thor slaves\", \"username\": \"DSC\", \"post_text\": \"You probably want to stop Thor itself, which means stopping both the master and all of the associated slaves. On your controller node, issue:\\n\\nsudo -u hpcc /opt/HPCCSystems/sbin/hpcc-run.sh -c mythor stop
\\n'stop' could also be 'start' or 'restart'. The 'mythor' bit refers to the name of your Thor cluster.\\n\\nHope this helps.\\n\\nDan\", \"post_time\": \"2013-03-26 14:45:41\" },\n\t{ \"post_id\": 3831, \"topic_id\": 861, \"forum_id\": 8, \"post_subject\": \"How to stop thor slaves\", \"username\": \"prachi\", \"post_text\": \"Hi ,\\n\\nUsing service hpcc-init status/start/stop command we can start/stop/status of components.\\n\\nWhich command is used to start/stop slaves of THOR?\", \"post_time\": \"2013-03-26 12:00:31\" },\n\t{ \"post_id\": 3848, \"topic_id\": 862, \"forum_id\": 8, \"post_subject\": \"Re: How to check a record in a Superfile?\", \"username\": \"Leofei\", \"post_text\": \"I see, thank you! Richard.\\n\\nLeo\", \"post_time\": \"2013-03-28 20:41:03\" },\n\t{ \"post_id\": 3847, \"topic_id\": 862, \"forum_id\": 8, \"post_subject\": \"Re: How to check a record in a Superfile?\", \"username\": \"rtaylor\", \"post_text\": \"Leo,\\n\\nLots of work, but you could define each subfile as a DATASET, then output each dataset, filtering for the one record value, and see which one has the record.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-03-28 20:37:22\" },\n\t{ \"post_id\": 3846, \"topic_id\": 862, \"forum_id\": 8, \"post_subject\": \"Re: How to check a record in a Superfile?\", \"username\": \"Leofei\", \"post_text\": \"Hi Richard,\\n\\nThanks for your quick reply. Because the files in the superfile have already been processed. But we found there are some problem in a couple of records, and we want to track back to check with the original files. If we know the records belong to which subfile, it's easier to find the original file. Do you have any suggestion? Thx!\\n\\n-Leo\", \"post_time\": \"2013-03-28 20:13:40\" },\n\t{ \"post_id\": 3845, \"topic_id\": 862, \"forum_id\": 8, \"post_subject\": \"Re: How to check a record in a Superfile?\", \"username\": \"rtaylor\", \"post_text\": \"Leo,\\n\\nSince the whole point of a superfile is to be able to treat a set of logical files as a single logical file, my question back to you is -- why do you need to know? What purpose does it serve to know this?\\n\\nAnd my answer to your question is -- I have no idea how you would discover that. You could build the filename in as a field in each subfile, but why?\\n\\nRichard\", \"post_time\": \"2013-03-28 20:02:14\" },\n\t{ \"post_id\": 3844, \"topic_id\": 862, \"forum_id\": 8, \"post_subject\": \"How to check a record in a Superfile?\", \"username\": \"Leofei\", \"post_text\": \"Hi, \\n\\nI have a question like this:\\n\\nA superfile contians hundreds of subfiles. There is one record in that superfile with a unique ID. I'm wondering how I can know which subfile this record is in? There are hundreds of subfiles in that superfile, it's difficult to check it one by one. Thx a lot!\\n\\n-Leo\", \"post_time\": \"2013-03-28 19:56:22\" },\n\t{ \"post_id\": 3873, \"topic_id\": 863, \"forum_id\": 8, \"post_subject\": \"Re: URL Encoding /Decoding\", \"username\": \"DSC\", \"post_text\": \"To my knowledge, there are no built-in functions for encoding and decoding URL-type strings. Here is some inline C++ to do the trick inspired by this code.\\n\\nSTRING DecodeURL(VARSTRING url) := BEGINC++\\n #include <string.h>\\n #body\\n const char HEX2DEC[256] = \\n {\\n /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */\\n /* 0 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,\\n /* 1 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,\\n /* 2 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,\\n /* 3 */ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,\\n\\n /* 4 */ -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,\\n /* 5 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,\\n /* 6 */ -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,\\n /* 7 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,\\n\\n /* 8 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,\\n /* 9 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,\\n /* A */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,\\n /* B */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,\\n\\n /* C */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,\\n /* D */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,\\n /* E */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,\\n /* F */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1\\n };\\n const int urlLength = strlen(url);\\n const char* beginInputPtr = url;\\n const char* endInputPtr = beginInputPtr + urlLength;\\n const char* endCheckPtr = endInputPtr - 2; // last decodable '%' \\n char* endOutputPtr = NULL;\\n \\n if (urlLength > 0)\\n {\\n __result = reinterpret_cast<char*>(rtlMalloc(urlLength));\\n endOutputPtr = __result;\\n \\n while (beginInputPtr < endCheckPtr)\\n {\\n if (*beginInputPtr == '%')\\n {\\n char dec1, dec2;\\n if (-1 != (dec1 = HEX2DEC[*(beginInputPtr + 1)]) && -1 != (dec2 = HEX2DEC[*(beginInputPtr + 2)]))\\n {\\n *endOutputPtr++ = (dec1 << 4) + dec2;\\n beginInputPtr += 3;\\n continue;\\n }\\n }\\n\\n *endOutputPtr++ = *beginInputPtr++;\\n }\\n \\n // the last 2- chars\\n while (beginInputPtr < endInputPtr)\\n {\\n *endOutputPtr++ = *beginInputPtr++;\\n }\\n \\n __lenResult = endOutputPtr - __result;\\n }\\n else\\n {\\n __lenResult = 0;\\n __result = NULL;\\n }\\nENDC++;\\n\\nString EncodeURL(VARSTRING url) := BEGINC++\\n #include <string.h>\\n #body\\n const char SAFE[256] =\\n {\\n /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */\\n /* 0 */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,\\n /* 1 */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,\\n /* 2 */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,\\n /* 3 */ 1,1,1,1, 1,1,1,1, 1,1,0,0, 0,0,0,0,\\n\\n /* 4 */ 0,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,\\n /* 5 */ 1,1,1,1, 1,1,1,1, 1,1,1,0, 0,0,0,0,\\n /* 6 */ 0,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,\\n /* 7 */ 1,1,1,1, 1,1,1,1, 1,1,1,0, 0,0,0,0,\\n\\n /* 8 */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,\\n /* 9 */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,\\n /* A */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,\\n /* B */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,\\n\\n /* C */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,\\n /* D */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,\\n /* E */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,\\n /* F */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0\\n };\\n const char DEC2HEX[16 + 1] = "0123456789ABCDEF";\\n const int urlLength = strlen(url);\\n const char* beginInputPtr = url;\\n const char* endInputPtr = beginInputPtr + urlLength;\\n char* endOutputPtr = NULL;\\n \\n if (urlLength > 0)\\n {\\n __result = reinterpret_cast<char*>(rtlMalloc(urlLength * 3));\\n endOutputPtr = __result;\\n\\n for (; beginInputPtr < endInputPtr; ++beginInputPtr)\\n {\\n if (SAFE[*beginInputPtr]) \\n {\\n *endOutputPtr++ = *beginInputPtr;\\n }\\n else\\n {\\n // escape this char\\n *endOutputPtr++ = '%';\\n *endOutputPtr++ = DEC2HEX[*beginInputPtr >> 4];\\n *endOutputPtr++ = DEC2HEX[*beginInputPtr & 0x0F];\\n }\\n }\\n \\n __lenResult = endOutputPtr - __result;\\n }\\n else\\n {\\n __lenResult = 0;\\n __result = NULL;\\n }\\nENDC++;\\n\\n//======================================================================\\n\\ns := 'http%3A%2f%2f';\\n\\ndecoded := DecodeURL(s);\\n\\nOUTPUT(decoded);\\n\\nencoded := EncodeURL(decoded);\\n\\nOUTPUT(encoded);
\\n\\nHope this helps.\\n\\nDan\", \"post_time\": \"2013-04-02 12:07:51\" },\n\t{ \"post_id\": 3849, \"topic_id\": 863, \"forum_id\": 8, \"post_subject\": \"URL Encoding /Decoding\", \"username\": \"arun_S\", \"post_text\": \"Hi,\\n\\nIs there any inbuilt HPCC method or function which can be used for URL encoding or decoding..?\\n\\n\\nEx : \\n\\nURL : http%3A%2F%2F\\nDecoded URL : http://\\n\\n
\\n\\nIf not available, which is the best way to approach the same..?\\n\\nKindly help.\\n\\nThanks and Regards\\narun\", \"post_time\": \"2013-03-29 07:17:58\" },\n\t{ \"post_id\": 3903, \"topic_id\": 868, \"forum_id\": 8, \"post_subject\": \"Re: Performance Issue using Parse and JOIN\", \"username\": \"DSC\", \"post_text\": \"[quote="ksviswa":3q5va3fy]Not able to think of any logic where i can convert to equality condition.\\nHere is some code showing how you can convert your IpCityBlock dataset into an IP-specific structure:\\n\\nIPCityBlockRangeRec := RECORD\\n UNSIGNED4 startIPNum;\\n UNSIGNED4 endIPNum;\\n UNSIGNED4 locationCode\\nEND;\\n\\nds1 := DATASET\\n (\\n [\\n // {1,10,100}\\n {1123631104,1123631124,32191},\\n {1123633104,1123635124,32191},\\n {1123733104,1123733204,32192}\\n ],\\n IPCityBlockRangeRec\\n );\\n\\nIPCityBlockRec := RECORD\\n UNSIGNED4 ipNum;\\n UNSIGNED4 locationCode;\\nEND;\\n\\nIPCityBlockRec DoTransform(IPCityBlockRangeRec l,\\n UNSIGNED4 c) := TRANSFORM\\n SELF.ipNum := l.startIPNum + c - 1;\\n SELF.locationCode := l.locationCode;\\nEND;\\n\\nds2 := NORMALIZE\\n (\\n ds1,\\n LEFT.endIPNum - LEFT.startIPNum + 1,\\n DoTransform(LEFT,COUNTER)\\n );\\n\\nds3 := DISTRIBUTE(ds2,ipNum);\\n\\nOUTPUT(ds3,ALL);\\n
\\nYou would execute this code only once, or whenever the original IpCityBlock data changed, write it out to a new dataset, then reference the new dataset in your runtime code.\\n\\nYou have a choice on what this new dataset actually is. In my example, I distributed the records based on the ipNum value and the assumption is that the data would exist as a logical flat (Thor) file. In your runtime code, you could then distribute your incoming log records on the decimal version of the IP number and use a local join to quickly lookup the right locationCode. It's quick because you can test for an equality rather than a range.\\n\\nAn alternate method would be to build a payload index file instead, then use a half-keyed join. A variation on that -- and I suspect the fastest-performing version -- would be to build a distributed payload index and use a local half-keyed join.\\n\\nHope this helps.\\n\\nDan\", \"post_time\": \"2013-04-09 12:00:18\" },\n\t{ \"post_id\": 3901, \"topic_id\": 868, \"forum_id\": 8, \"post_subject\": \"Re: Performance Issue using Parse and JOIN\", \"username\": \"rtaylor\", \"post_text\": \"Is there a possible way to spray the file without distributing and then use distribute to each nodes so that this ipcityblock dataset copy is present in all the nodes and then use local in the join condition..?
You're already using the ALL option on the JOIN, which means the entire right-hand-side dataset is copied in memory to each node to do the JOIN (making it an implicitly local operation).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-04-09 10:44:59\" },\n\t{ \"post_id\": 3898, \"topic_id\": 868, \"forum_id\": 8, \"post_subject\": \"Re: Performance Issue using Parse and JOIN\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nNot able to think of any logic where i can convert to equality condition.\\n\\nI did try indexing the file as it is , but the performance was the same. \\n\\nExpanding the dataset to discrete IP addresses may not work because the range is not consistent. Sometimes there can be a difference of 2000, sometimes it can be 5000 also.\\n\\nIs there a possible way to spray the file without distributing and then use distribute to each nodes so that this ipcityblock dataset copy is present in all the nodes and then use local in the join condition..? \\n\\nKindly suggest.. let me know if there's any other possibility also.\\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2013-04-08 19:15:28\" },\n\t{ \"post_id\": 3890, \"topic_id\": 868, \"forum_id\": 8, \"post_subject\": \"Re: Performance Issue using Parse and JOIN\", \"username\": \"DSC\", \"post_text\": \"Specifically, regarding that JOIN, you might want to try expanding your IpCityBlock dataset into discrete IP addresses.\\n\\nIt looks like the existing dataset contains fields like 'LocationCode,BeginIPAddress,EndIPAddress'. Try expanding it so that every record contains 'LocationCode,IPAddress' instead. Explode that range into unique values, in other words, and turn it into an index if it isn't already.\\n\\nWhile the resulting dataset will be much bigger, that JOIN operation will go much faster because you'll be matching an exact value (IP address).\\n\\nBonus points for distributing both datasets according to IP address and using LOCAL.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-04-05 14:34:06\" },\n\t{ \"post_id\": 3889, \"topic_id\": 868, \"forum_id\": 8, \"post_subject\": \"Re: Performance Issue using Parse and JOIN\", \"username\": \"rtaylor\", \"post_text\": \"ksviswa,\\n\\nHave you tried putting the second PARSE pass logic into the first PARSE? That would eliminate one whole table scan (if it's possible to do so).\\n\\nNext, your JOIN logic is purely "fuzzy" which is always going to slow things down. Is there some logic to the start-end IPs in the lookup table that would allow you to reduce this to an equality condition? That might make things go faster.\\n\\nLastly, 12 minutes on a 50-node cluster to process through 250Gb of data seems to be decent performance to me (but I am not the expert in that area), but if you really really need it to go faster, then you could try running it on a larger cluster (100 or 200 nodes).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-04-05 14:02:37\" },\n\t{ \"post_id\": 3883, \"topic_id\": 868, \"forum_id\": 8, \"post_subject\": \"Performance Issue using Parse and JOIN\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nI have a scenario where there are many log files and do some analysis on the same.\\n\\nThe code works properly but takes lot of time to execute.\\n\\nEnvironment Details :\\n\\n\\nCluster : 50 Node thor cluster\\nSize of the File : 250 GB\\nNo. of Records : 162 Million (The size and number can increase more)\\n
\\n\\nSample Log File :\\n\\nxxx.xxx.xxx.xxx - - [dd/mm/yyyy:00:00:00 -0500] "GET /abc?a=123&b=135&c=135&d=145&e=135&f=135 HTTP/1.1" 204 - "http://www.abc.com" "MSIE 9"\\n
\\n\\nInitial Parse Code : Getting all the individual fields.\\n\\nlog_init := PARSE(log_seq, line, pattrn, logFields, FIRST,maxlength(8192));\\n
\\n\\nSecond Parse Code : Get all the key value pair from the request field \\n\\nEx : \\na | 123 \\nb | 135 \\nc | 135 ...\\n\\nkeyvals_init := PARSE(log_init,request,key_val,keyFields,many,max,SCAN,nocase); \\n
\\n\\nDenormalize the key val pairs to separate fields.\\n\\n ip ..... status browser a b c\\nxxx.xxx.xxx.xxx 204 MSIE 123 135 135\\n
\\n\\nMap the IP to respective region or location :\\n\\n\\nFirst convert the respective IP Address to a decimal notation and then check the range and accordingly get the location code.\\n\\nLocationCode_Details_RecordSet \\n\\t\\t:= JOIN(DecimalAddr,\\n\\t\\t\\tIpCityBlock,\\n\\t\\t\\tLEFT.decimal_address BETWEEN RIGHT.startIpNum and RIGHT.endIpNum,\\n\\t\\t\\t\\tjoinCityBlockInfo(LEFT,RIGHT),\\n\\t\\t\\t\\tLEFT OUTER,ALL);\\n\\nEx : \\n\\nIP Address | Decimal Notation\\n66.249.71.23 1123632919\\n\\nIP City Block Ex :\\n\\nstartIpNum | EndIPNum | locationCode\\n1123631104\\t1123633663\\t32191\\n\\n
\\n\\nThe geoip city block file has aroud 1 Million records.\\n\\nThe time taken for initial parse and join takes the majority chunk of time.\\nJust for 10000 Records, initial parse takes around 600 seconds and the join operation takes around 200 seconds.\\n
\\n\\n\\nHow can i optimize the same, the data is distributed uniformly. Am i missing something..? Kindly help regarding the same.\\n\\nScreenshot for the time taken\\n\\n[attachment=1:2z7kr0e6]Sample1.jpg\\n\\n[attachment=0:2z7kr0e6]sample2.jpg\\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2013-04-04 07:20:23\" },\n\t{ \"post_id\": 3888, \"topic_id\": 869, \"forum_id\": 8, \"post_subject\": \"Re: Full Keyed Vs Half Keyed Joins\", \"username\": \"rtaylor\", \"post_text\": \"ksviswa,\\n\\nIn a full-keyed JOIN, the right recordset (JOIN's second parameter) is a DATASET and the KEYED option names an INDEX into that DATASET that is used to find the matching records.\\n\\nIn a half-keyed JOIN, the right recordset (JOIN's second parameter) is an INDEX (usually with payload fields).\\n\\nYou can accomplish the exact same functionality of a full-keyed JOIN by doing a half-keyed JOIN and then a FETCH. But if your INDEX has payload fields, then you can eliminate that FETCH, resulting in one less disk access = faster performance.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-04-05 13:07:13\" },\n\t{ \"post_id\": 3885, \"topic_id\": 869, \"forum_id\": 8, \"post_subject\": \"Re: Full Keyed Vs Half Keyed Joins\", \"username\": \"DSC\", \"post_text\": \"There is a section in the ECL Programmer's Guide devoted to indexes. It does a great job describing simple index usage as well as full- and half-key joins. Look for the "Using ECL Keys (INDEX Files)" section.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-04-04 14:45:30\" },\n\t{ \"post_id\": 3884, \"topic_id\": 869, \"forum_id\": 8, \"post_subject\": \"Full Keyed Vs Half Keyed Joins\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nWhat is the exact difference between Full keyed and Half Keyed Joins..? Which is preferable to be used in joins and does each have a impact on performance.\\n\\nFull Keyed Joins : we use additional KEYED in the JOIN condition.\\nHalf Keyed Joins : We dont use KEYED word.\\n\\nNot able to find any other differences from the language manual..\\n\\nFull keyed join in a multi node cluster gives me an error :\\n\\n
\\nError: System error: -1: Graph[14], keyedjoin[18]: SLAVE 10.0.1.3:20100: CFileSerialStream::get read past end of stream\\n
\\n\\nAny suggestions on the same.\\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2013-04-04 14:36:22\" },\n\t{ \"post_id\": 3902, \"topic_id\": 872, \"forum_id\": 8, \"post_subject\": \"Template Language\", \"username\": \"sapthashree\", \"post_text\": \"Hi,\\n\\nI'm working on Template Language.Right now i'm confused with the template keywords #CONSTSANT and #STORED.According to my understsnding #CONSTSNT value can't be over-writable at runtime but #STORED value can be over-writable.\\nI have witten small piece of code to understand the difference between these 2 keywords but both are giving same output i.e., #constant is not keeping its value constant at runtime.\\n\\nLayout_per := RECORD\\n\\t\\t\\t UNSIGNED8 ID;\\n\\t\\t\\t STRING15 FirstName ; \\n\\t\\t\\t STRING25 LastName ;\\n\\t\\t\\t UNSIGNED3 Age;\\nEND;\\npersonDS := DATASET([{1,'Rachel','Green',29},{2,'Ross','Gellar',30},{4,'Monica','Gellar',29},{5,'Joey','Tribayani',31},{6,'Phoebe','Buffe',31}],Layout_per); \\npersonDS;\\nPersonCount := COUNT(personDS) : STORED('myname'); \\nPersonCount;\\t\\n\\t\\n#CONSTANT('myname',100);\\n\\nINTEGER myfunction(INTEGER x):= FUNCTION\\nINTEGER y := x;\\nRETURN Y;\\nEND; \\t\\n#STORED('myname',myfunction(11)); // myfunction returns 11\\t\\n
\\n\\nOutput is : 100 and expected output is 100(because 'myname' is set to a value 100 using #CONSTANT so its not over-written by #STORED statement)\\n\\nIf i use # STORED instead of #CONSTANT the output is 100 but expected output is 11 as #STORED value is over-writable at runtime.Below is the code\\n\\n#STORED('myname',100);\\n\\nINTEGER myfunction(INTEGER x):= FUNCTION\\nINTEGER y := x;\\nRETURN Y;\\nEND; \\t\\n#STORED('myname',myfunction(11)); // myname should be set to 11 ideally\\n
\\n\\nIs my understsnding correct for these keywords?\", \"post_time\": \"2013-04-09 11:27:51\" },\n\t{ \"post_id\": 3916, \"topic_id\": 873, \"forum_id\": 8, \"post_subject\": \"Re: How to pass the parameters to the hthor/get the results\", \"username\": \"rtaylor\", \"post_text\": \"You can simply type the values into the entry controls at the right and then press the Submit button. Your result will be displayed as soon as it comes back.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-04-11 10:28:14\" },\n\t{ \"post_id\": 3908, \"topic_id\": 873, \"forum_id\": 8, \"post_subject\": \"How to pass the parameters to the hthor/get the results\", \"username\": \"wei xu\", \"post_text\": \"Hello there,\\n\\nI have a problem when I try to query the results from the published hthor, can anyone help me with how to pass the parameters to the hthor (like state, lastnmae, etc) /\\n get the query results (in what format? can we have a .xml or .csv?) and make use of the results table to draw graphs like using excel?\\n\\nthe following is the screenshot of the queries hthor.\\n\\nThank you!\", \"post_time\": \"2013-04-09 21:07:56\" },\n\t{ \"post_id\": 3930, \"topic_id\": 877, \"forum_id\": 8, \"post_subject\": \"Re: HTTPCALL\", \"username\": \"peter\", \"post_text\": \"I was using the 3.10.4.1 VM, I upgraded to 3.10.6.1 and it all works! Thanks!\", \"post_time\": \"2013-04-16 02:07:10\" },\n\t{ \"post_id\": 3924, \"topic_id\": 877, \"forum_id\": 8, \"post_subject\": \"Re: HTTPCALL\", \"username\": \"DSC\", \"post_text\": \"I have no troubles running this code snippet on my 3.10.6 cluster. What version are you running?\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-04-15 14:08:49\" },\n\t{ \"post_id\": 3921, \"topic_id\": 877, \"forum_id\": 8, \"post_subject\": \"HTTPCALL\", \"username\": \"peter\", \"post_text\": \"Hello,\\n\\nLove the language! In the help, I see this function documented: \\nHTTPCALL\\n\\n\\nWhen I try to run the example code:\\n\\nworldBankSource := RECORD\\n STRING name {XPATH('name')}\\nEND;\\nOutRec1 := RECORD\\n DATASET(worldBankSource) Fred{XPATH('/source')};\\nEND;\\nraw := HTTPCALL('http://api.worldbank.org/sources', 'GET', 'text/xml', OutRec1);\\nOUTPUT(raw);\\n\\n\\n\\nI get:\\nError: System error: 10107: Graph[13], http[14]: Unsupported activity kind: http (0, 0), 10107, \\n\\nIs this still a supported feature? I am running this from the VM.\", \"post_time\": \"2013-04-14 01:52:24\" },\n\t{ \"post_id\": 3971, \"topic_id\": 878, \"forum_id\": 8, \"post_subject\": \"Re: reading and conerting a packed decimal\", \"username\": \"DSC\", \"post_text\": \"TRIM() removes only trailing spaces by default. If you want to remove leading spaces, add the LEFT argument (e.g. TRIM(' ',LEFT)).\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-04-25 12:15:20\" },\n\t{ \"post_id\": 3970, \"topic_id\": 878, \"forum_id\": 8, \"post_subject\": \"Re: reading and conerting a packed decimal\", \"username\": \"swapna\", \"post_text\": \"Thanks Richard. This worked but still there is one issue. \\n\\npacked decimal occupies 5 fields in the file. The leading spaces should be trimmed before converting to packed decimal. So the length of the String after triming May vary from one to 5. This has to be decided dynamically.\\n\\ni used the following code for the same.\\nIt works when the length passed to the functions is hardcoded example: convertDataToInt(dataVal,1);\\n\\nIt works even if i pass it as convertDataToInt(dataVal,length(sample_DataSet[2].EMP_HERE_NBR));\\n\\nbut the same does not work when i pass the trimmed string convertDataToInt(dataVal,length(trim(sample_DataSet[2].EMP_HERE_NBR))). please help. \\n\\n\\n\\nconvertDataToInt(Data abc,integer len) := FUNCTION\\na:=choose (len, (>decimal1<)abc,(>decimal3<)abc,(>decimal5<)abc,(>decimal7<)abc,(>decimal9<)abc); \\nRETURN a;\\nEND;\\n\\nString emp_here:=sample_DataSet[2].EMP_HERE_NBR; \\nEBCDIC String EBCStringVal:=trim(emp_here,left); \\nData dataVal:=(DATA)EBCStringVal; \\nretval:=convertDataToInt(dataVal,length(EBCStringVal));\\nretval;\", \"post_time\": \"2013-04-25 06:22:15\" },\n\t{ \"post_id\": 3928, \"topic_id\": 878, \"forum_id\": 8, \"post_subject\": \"Re: reading and conerting a packed decimal\", \"username\": \"rtaylor\", \"post_text\": \"Swapna,\\n\\nI would start by trying to use our built in DECIMAL data type. It should work if the mainframe puts the sign nibble in the rightmost nibble, as in this example:DATA2 d1 := x'123C'; //result is 123\\nDATA2 d2 := x'123D'; //result is -123\\n\\n(>DECIMAL3<)d1; \\n(>DECIMAL3<)d2;
The use of the DATA data type allows me to put any hex values in. The C and D are standard ways of representing the sign nibble in Packed Decimal format (see the discussion at http://en.wikipedia.org/wiki/Binary-coded_decimal) -- C is positive and D is negative.\\n\\nHowever, if your mainframe puts the sign nibble in the leftmost position, then you'll need to write a converter, like this:STRING StringToNumString(STRING DataIn) := FUNCTION\\n STRING2 HexToString(STRING1 HexIn) := FUNCTION\\n STRING1 HexVal(UNSIGNED1 val) :=\\n CHOOSE(val,'1','2','3','4','5','6','7','8',\\n '9','A','B','C','D','E','F','0');\\n UNSIGNED1 Char1 := (((>UNSIGNED1<)HexIn & 11110000b) >> 4);\\n UNSIGNED1 Char2 := ((>UNSIGNED1<)HexIn & 00001111b);\\n RETURN HexVal(Char1) + HexVal(Char2);\\n END;\\n OutRec := {STRING HexOut{MAXLENGTH(1024)}}; \\n BlankDS := DATASET([{''}],OutRec);\\n ds3 := NORMALIZE(BlankDS,\\n LENGTH(TRIM(DataIn)),\\n TRANSFORM(OutRec, \\n SELF.HexOut := HexToString(DataIn[COUNTER])));\\n HexOut := ROLLUP(ds3,\\n TRUE,\\n TRANSFORM(OutRec, \\n SELF.HexOut := LEFT.HexOut + RIGHT.HexOut));\\n Sign := IF(HexOut[1].HexOut[1]='D','-',' ');\\n RETURN Sign + HexOut[1].HexOut[2..];\\nEND;\\n\\nStringToNumString((STRING)x'C123'); //result is 123\\nStringToNumString((STRING)x'D123'); //result is -123
To use this function, you just define your packed decimal field as a STRING and then pass its value to the function. It will come back as a STRING, so you simply need to cast that result to any numeric data type you want to use.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-04-15 15:04:19\" },\n\t{ \"post_id\": 3923, \"topic_id\": 878, \"forum_id\": 8, \"post_subject\": \"reading and converting a packed decimal\", \"username\": \"swapna\", \"post_text\": \"Hi, \\n\\ni am trying to read a mainframe generated fixed width file that has packed decimal values , can you suggest me on what should be data type to be used while reading from the file and how to unpack and get the value stored in it\\n\\nThanks,\\nSwapna.p\", \"post_time\": \"2013-04-15 09:45:27\" },\n\t{ \"post_id\": 3952, \"topic_id\": 885, \"forum_id\": 8, \"post_subject\": \"Re: APPLY(...,Std.File.DeleteExternalFile(...)) failed on 6.\", \"username\": \"janssend\", \"post_text\": \"Thanks Bob, \\nI would probably rollback to the 6.10.4. \\nRegards.\", \"post_time\": \"2013-04-19 13:02:26\" },\n\t{ \"post_id\": 3951, \"topic_id\": 885, \"forum_id\": 8, \"post_subject\": \"Re: APPLY(...,Std.File.DeleteExternalFile(...)) failed on 6.\", \"username\": \"bforeman\", \"post_text\": \"Hi David,\\n\\nCan you rollback to the previous version? This may be something that you need to log to the issue tracker as a possible regression. Apologies for the inconvenience.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-04-19 12:54:46\" },\n\t{ \"post_id\": 3950, \"topic_id\": 885, \"forum_id\": 8, \"post_subject\": \"APPLY(...,Std.File.DeleteExternalFile(...)) failed on 6.10.6\", \"username\": \"janssend\", \"post_text\": \"Hi,\\n\\nI used to upload large amount of files from dropzone to one single HPCC file (called 'incoming.raw') by using STD.File.DfuPlusExec. (blob mode)\\n\\nThen once these data have been handled, I delete every single drop-zone files listed by the 'incoming.raw' file by using this command :\\n\\n\\n// delete every physical file listed by the incoming raw file \\nAPPLY(DATASET('incoming.raw', Layout_Blob, THOR), STD.File.DeleteExternalFile('192.168.1.239', '/var/lib/HPCCSystems/mydropzone' + '/' + TRIM(filename)));\\n
\\n\\nIt used to work fine on previous HPCC release (3.8.6-4 and 3.10.4-1); but I got this error message on the 3.10.6-1 plateform.\\n\\n[color=#0000FF:12q4ih3k]\\nError: System error: -1: Graph[15], apply[17]: SLAVE 192.168.1.239:20100: No active dali server connection available (0, 0), -1,\\n\\nIs there a workaround ?\\n\\n\\nThanks\\nDavid\", \"post_time\": \"2013-04-18 15:40:22\" },\n\t{ \"post_id\": 3963, \"topic_id\": 888, \"forum_id\": 8, \"post_subject\": \"Re: Despray as CSV\", \"username\": \"DSC\", \"post_text\": \"Sure. A despray operation (a standard library function) requires a logical file as the source, and the OUTPUT() function can create a CSV-formatted logical file. XML is also supported, for the same reason.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-04-24 11:54:29\" },\n\t{ \"post_id\": 3961, \"topic_id\": 888, \"forum_id\": 8, \"post_subject\": \"Despray as CSV\", \"username\": \"peter\", \"post_text\": \"Hello,\\n\\nIs it possible to despray a file to a CSV layout (in the dropzone) rather than a fixed length?\", \"post_time\": \"2013-04-23 16:25:46\" },\n\t{ \"post_id\": 3966, \"topic_id\": 890, \"forum_id\": 8, \"post_subject\": \"Re: SOAP Communication Error\", \"username\": \"anthony.fishbeck\", \"post_text\": \"Generally when you see a SOAP Communication Error on the command line, either an exception or core in the eclwatch ESP caused the socket connection to be dropped.\\n\\nAs you noticed that wouldn't necessarily prevent the workunit from running.\\n\\nCan you check the ESP log file and see if there is anything revealing in it?\", \"post_time\": \"2013-04-24 20:23:56\" },\n\t{ \"post_id\": 3964, \"topic_id\": 890, \"forum_id\": 8, \"post_subject\": \"SOAP Communication Error\", \"username\": \"rajunagarajan\", \"post_text\": \"All,\\n\\nSince we upgraded to internal_3.10.6-1, the ECLPlus.exe invoke from command line returns a "SOAP Communication Error" message ( from time to time ).. sometimes works, while the WU is still active / running in the thor.\\nThe code sprays data from a set of files and works fine when run in the IDE.\\nAnyone experiencing the same issue?\", \"post_time\": \"2013-04-24 12:30:22\" },\n\t{ \"post_id\": 3992, \"topic_id\": 893, \"forum_id\": 8, \"post_subject\": \"Re: Error: System error: -1: No active dali server connec\", \"username\": \"bohman\", \"post_text\": \"Resolved. This was an oversight on my part; the Target in the IDE was local. When I changed it to thor I was able to spray the file...\", \"post_time\": \"2013-04-26 12:15:49\" },\n\t{ \"post_id\": 3989, \"topic_id\": 893, \"forum_id\": 8, \"post_subject\": \"Re: Error: System error: -1: No active dali server connec\", \"username\": \"bohman\", \"post_text\": \"Thanks for the response Richard. Yes, I was able to execute the examine the data section. I got the exact count as specified in Figure 15; I also completed the Process the Data section and viewed results as in Figure 17. I stopped there...\\n\\nI've been through the Installing and Running the HPCC Platform document(version 3.10.4-1.pdf). I didn't do anything with LDAP. Is there a specific section I might want to review closer?\\n\\nAny nudge or nugget you can provide will be appreciated.\\n\\nThanks Again.\", \"post_time\": \"2013-04-26 11:01:17\" },\n\t{ \"post_id\": 3987, \"topic_id\": 893, \"forum_id\": 8, \"post_subject\": \"Re: Error: System error: -1: No active dali server connec\", \"username\": \"rtaylor\", \"post_text\": \"bohman,When I execute a "top -u hpcc" on the machine HPCC is installed on I see dafilesrv and daserver listed; I'm assuming one or both of these are the dali server?
No, neither of these is the dali server.\\n\\nHave you looked at these docs? -- http://cdn.hpccsystems.com/releases/CE-Candidate-3.10.6/docs/Installing_and_RunningTheHPCCPlatform-3.10.6-1.pdf\\n\\nI can execute the steps in the HPCC Data Tutorial and spray content to the THOR cluster using ECL Watch without error.
Did you do all of the Data Tutorial, including the "Examine the Data" section that has you use the IDE to look at the content of a data file? I ask because the inability to find Dali suggests to me an IDE configuration issue.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-04-25 20:49:16\" },\n\t{ \"post_id\": 3985, \"topic_id\": 893, \"forum_id\": 8, \"post_subject\": \"Error: System error: -1: No active dali server connection\", \"username\": \"bohman\", \"post_text\": \"I am new to HPCC (newb). I am trying to create a logical file within THOR by reading a file I dropped on the machine (/var/uspostaldata/ohwrklinux is a file containing fixed length records). As a first step, I am trying to read the file and echo the records back within the results window of the IDE. The second step is write them to a logical file within THOR (see commented out OUTPUT statement below). Currently, I have a one node configuration running on Ubuntu 12.04 LTS; I can execute the steps in the HPCC Data Tutorial and spray content to the THOR cluster using ECL Watch without error. When I try the following code:\\n\\n[size=85:11m0kd4s]IMPORT STD;\\nIMPORT usPostal.recordFormats.unitedStatesPostalRecords;\\n\\nsourceMachineIp := '193.169.1.100';\\nsourceMachineFile := '/var/uspostaldata/ohwrklinux';\\nohWrkDs := DATASET(STD.File.ExternalLogicalFileName(sourceMachineIp, sourceMachineFile), unitedStatesPostalRecords.CityStateBaseRecord, FLAT);\\nOUTPUT(ohWrkDs);\\n//OUTPUT(ohWrkDs, , 'testdata::ohwrk', OVERWRITE);\\n\\nI get this error:\\n\\n[size=85:11m0kd4s]Error: No active dali server connection available\\nError: ..\\\\..\\\\..\\\\..\\\\..\\\\HPCC-Platform\\\\dali\\\\base\\\\dadfs.cpp(369) : CConnectLock CNamedGroup::lookup Groups : No active dali server connection available\\nError: ..\\\\..\\\\..\\\\..\\\\..\\\\HPCC-Platform\\\\ecl\\\\eclagent\\\\eclgraph.cpp(1755) : EclAgent::executeGraph : No active dali server connection available\\nError: C4294967295 System error: -1: No active dali server connection available\\nError: System error: -1: No active dali server connection available\\n\\nI also tried the following ECL code:\\n\\n[size=85:11m0kd4s]IMPORT STD;\\n\\nsourceMachineIp := '193.169.1.100';\\nsourceMachineFile := '/var/uspostaldata/ohwrklinux';\\nSTD.File.SPrayFixed(sourceMachineIp, sourceMachineFile, 129, 'm100' , 'testdata::ohwrk', -1,);\\n\\nand received a similar error:\\n\\n[size=85:11m0kd4s]Error: No active dali server connection available\\nError: C4294967295 System error: -1: No active dali server connection available\\nError: System error: -1: No active dali server connection available\\n\\nWhen I execute a "top -u hpcc" on the machine HPCC is installed on I see dafilesrv and daserver listed; I'm assuming one or both of these are the dali server? \\n\\nI've been stumped for a while and any insights are much appreciated.\\n\\nAlso, I'm wondering if there is an operations guide or a document describing the HPCC processes running, installation directories, configuration files, log file locations and etc?\\n\\n[color=#BF0000:11m0kd4s]Thanks in advance for your time.\", \"post_time\": \"2013-04-25 19:32:18\" },\n\t{ \"post_id\": 4072, \"topic_id\": 898, \"forum_id\": 8, \"post_subject\": \"Re: Packagemap simplest use case\", \"username\": \"sort\", \"post_text\": \"make sure all package names, file names are in lower case. Builds prior to 3.10.8 have an issue with mixed case\", \"post_time\": \"2013-05-10 20:20:37\" },\n\t{ \"post_id\": 4066, \"topic_id\": 898, \"forum_id\": 8, \"post_subject\": \"Re: Packagemap simplest use case\", \"username\": \"clo\", \"post_text\": \"Hi, also make sure that the flag allFilesDynamic in the roxie is set to false. This may affect your tests.\", \"post_time\": \"2013-05-08 20:01:36\" },\n\t{ \"post_id\": 4048, \"topic_id\": 898, \"forum_id\": 8, \"post_subject\": \"Re: Packagemap simplest use case\", \"username\": \"sbagaria\", \"post_text\": \"It's strange because it does not work for me. What I did not mention before was that the two versions were on different computers with different operating systems. I should try your approach of just upgrading to 4.0.0rc from 3.10.6 and see if that works.\", \"post_time\": \"2013-05-03 17:13:30\" },\n\t{ \"post_id\": 4047, \"topic_id\": 898, \"forum_id\": 8, \"post_subject\": \"Re: Packagemap simplest use case\", \"username\": \"clo\", \"post_text\": \"Hi. I just recreated the query and package for 3.10.6-1. Ater I upgraded my system to 4.0.0 release candidate, I deleted the package, recompiled and republished temp.ecl query because the compiler had changed. After that, I added the package again and it seems to work fine for me.\", \"post_time\": \"2013-05-03 13:39:55\" },\n\t{ \"post_id\": 4034, \"topic_id\": 898, \"forum_id\": 8, \"post_subject\": \"Re: Packagemap simplest use case\", \"username\": \"sbagaria\", \"post_text\": \"But then there is this\\n<Package id="temp">\\n <Base id="temp::file"/>\\n</Package>
\\n\\nI have followed the same convention as in the documentation.\\n\\nMoreover, the exact steps work for me in 3.10.4-1. Did you try to repeat the steps in 3.10.4 or 3.10.6? Did they work?\", \"post_time\": \"2013-04-30 21:58:03\" },\n\t{ \"post_id\": 4032, \"topic_id\": 898, \"forum_id\": 8, \"post_subject\": \"Re: Packagemap simplest use case\", \"username\": \"clo\", \"post_text\": \"I'm concerned about your use of temp::file as the package id. I'd expect it to be the same as your query name.\", \"post_time\": \"2013-04-30 17:18:00\" },\n\t{ \"post_id\": 4015, \"topic_id\": 898, \"forum_id\": 8, \"post_subject\": \"Re: Packagemap simplest use case\", \"username\": \"sbagaria\", \"post_text\": \"OK. Version string of my build: \\ncommunity_4.0.0-rc5-Debug[community_4.0.0-rc5-12-gc8f517]\\n\\nStep 1:\\n\\n/* File name: createFiles.ecl */\\nIMPORT Std.File;\\nLayout := RECORD\\n STRING line; \\nEND;\\nA := DATASET([{'File1'}],Layout);\\nB := DATASET([{'File2'}],Layout);\\nC := DATASET([{'File3'}],Layout);\\nSEQUENTIAL(\\nPARALLEL(\\nOUTPUT(A,,'~temp::file1',THOR),\\nOUTPUT(B,,'~temp::file2',THOR),\\nOUTPUT(C,,'~temp::file3',THOR)\\n),\\nFile.StartSuperFileTransaction(),\\nFile.CreateSuperFile('~temp::file'),\\nFile.AddSuperFile('~temp::file','~temp::file1'),\\nFile.FinishSuperFileTransaction()\\n);\\n
\\n\\necl run -t thor createFiles.ecl\\n
\\n\\nStep 2:\\n\\n/* File name: temp.ecl */\\nLayout := RECORD\\n STRING line;\\nEND;\\nOUTPUT(DATASET('~temp::file',Layout,THOR));\\n
\\n\\necl publish -t roxie temp.ecl\\n
\\n\\nStep 3:\\n\\n<!-- File name: temp.pkg -->\\n<RoxiePackages>\\n <Package id="temp">\\n <Base id="temp::file"/>\\n </Package>\\n <Package id="temp::file">\\n <SuperFile id="~temp::file">\\n <SubFile value="~temp::file3"/>\\n </SuperFile>\\n </Package>\\n</RoxiePackages>\\n
\\n\\necl packagemap add roxie temp.pkg -O -A -v\\n
\\n\\nState of my package system:\\necl packagemap list roxie
\\nPackage Name = temp.pkg active = 1\\n\\t\\tid = temp::file\\n\\t\\tid = temp
\\n\\nContents of my package:\\necl packagemap info roxie
\\n<PackageMaps id="temp.pkg">\\n <Package id="temp::file">\\n <SuperFile id="temp::file">\\n <SubFile value="temp::file3"/>\\n </SuperFile>\\n </Package>\\n <Package id="temp">\\n <Base id="temp::file"/>\\n </Package>\\n</PackageMaps>
\", \"post_time\": \"2013-04-29 16:28:32\" },\n\t{ \"post_id\": 4014, \"topic_id\": 898, \"forum_id\": 8, \"post_subject\": \"Re: Packagemap simplest use case\", \"username\": \"clo\", \"post_text\": \"Hi, I was wondering if you'd be able to post the exact commandline calls you're making as well the contents of the current package you're using now. \\n\\nThanks,\\nChris\", \"post_time\": \"2013-04-29 15:53:41\" },\n\t{ \"post_id\": 4012, \"topic_id\": 898, \"forum_id\": 8, \"post_subject\": \"Re: Packagemap simplest use case\", \"username\": \"anthony.fishbeck\", \"post_text\": \"You're welcome but it's a bit concerning that 4.0.0rc5 isn't working for you. I'll have to take a look.\\n\\nRegards,\\nTony\", \"post_time\": \"2013-04-29 14:55:19\" },\n\t{ \"post_id\": 4011, \"topic_id\": 898, \"forum_id\": 8, \"post_subject\": \"Re: Packagemap simplest use case\", \"username\": \"anthony.fishbeck\", \"post_text\": \"The packagemap file actually applies to all of the queries on the roxie. The name is arbitrary and just has to be unique.\\n\\nPackages within the file are matched to particular queries using either the id or the "queries" attribute.\\n\\nIf you change the package id from "temp" to "test" it should work.\\n\\nThere was an issue at some point where you had to include the query version in the id. id="test.1". But that's no longer the case in the latest code line.\\n\\nThe "queries" attribute is a fuzzy match, so you could use <Package queries="te*"> but I think using "id" is preferred if you're only matching a single query.\", \"post_time\": \"2013-04-29 14:53:41\" },\n\t{ \"post_id\": 4010, \"topic_id\": 898, \"forum_id\": 8, \"post_subject\": \"Re: Packagemap simplest use case\", \"username\": \"sbagaria\", \"post_text\": \"I took your suggestion and changed the package id to refer to the exact query name. And it started working in 3.10.6-1 for me.\\n\\n4.0.0rc5 does not work even after that change, so something might be broken there. But at least 3.10.6-1 is working as expected.\\n\\nThe documentation needs to be slightly clear that the "query reference" needs to refer to the exact query as the package information binds to specific queries.\\n\\nThank you for the nudge in the right direction.
\", \"post_time\": \"2013-04-29 14:52:34\" },\n\t{ \"post_id\": 4009, \"topic_id\": 898, \"forum_id\": 8, \"post_subject\": \"Re: Packagemap simplest use case\", \"username\": \"sbagaria\", \"post_text\": \"I can see that, in both versions, the new subfile is background copied to the Roxie cluster so something is definitely happening, although that happens even there is no query published so it does not conclude that the query is linked to the new superfile definition. Also, if I remove the subfiles in Dali, the Roxie query fails completely saying that '~temp::file' cannot be resolved.\", \"post_time\": \"2013-04-29 14:36:49\" },\n\t{ \"post_id\": 4008, \"topic_id\": 898, \"forum_id\": 8, \"post_subject\": \"Re: Packagemap simplest use case\", \"username\": \"sbagaria\", \"post_text\": \"Oh and the version I tried yesterday was 3.10.6-1. I just finished testing on my local build of 4.0.0rc5. Same results.\", \"post_time\": \"2013-04-29 14:23:07\" },\n\t{ \"post_id\": 4007, \"topic_id\": 898, \"forum_id\": 8, \"post_subject\": \"Re: Packagemap simplest use case\", \"username\": \"sbagaria\", \"post_text\": \"My query is called test, the same as the name of the package file I inserted called 'test.pkg'. Is there a relation between the name of the query and any of the package specification elements?\", \"post_time\": \"2013-04-29 14:19:10\" },\n\t{ \"post_id\": 4006, \"topic_id\": 898, \"forum_id\": 8, \"post_subject\": \"Re: Packagemap simplest use case\", \"username\": \"anthony.fishbeck\", \"post_text\": \"The documentation seems to focus on the approach of adding additional files, but completely changing the file referenced by the SuperFile is fine as well.\\n\\nCan you tell me which version of the HPCC you are running? There have been some issues related to packagemaps that were fixed in later builds.\\n\\nAlso, what is the name of the query you are publishing? In your example, does the temp in "<Package id="temp">" refer to the name of your query?\\n\\nRegards,\\nTony\", \"post_time\": \"2013-04-29 14:16:12\" },\n\t{ \"post_id\": 4004, \"topic_id\": 898, \"forum_id\": 8, \"post_subject\": \"Packagemap simplest use case\", \"username\": \"sbagaria\", \"post_text\": \"I have a live client facing system in which the client sends us data every day, and the cumulative data then needs to be aggregated by some complex logic into a single file. This operation can not be completely incremental and so the set of files (the one aggregated base file and some payload indices computed off that) needs to be replaced each day. The problem is that since this a live system, the published queries can not afford downtime.\\n\\nMy current solution is to keep two sets of files (where the set of files is specified by a suffix attached to all the file names). If 'set1' suffixed files are currently active, I write the day's new files as 'set2' suffixed. Then I recompile and republish my queries. Now, the older queries can be deleted/unpublished and the set1 files can be overwritten the next day.\\n\\nThis is clearly too convoluted and something someday will eventually get messed up as there are code changes and recompilation involved in a live system. \\n\\nI read through the posts on the forum on dynamic files and packagemaps. I personally think packagemaps are the future, so I went about creating a simplest use case.\\n\\nI create three files like this:\\n
Layout := RECORD\\n\\tSTRING line; \\nEND;\\n\\nA := DATASET([{'File1'}],Layout);\\nB := DATASET([{'File2'}],Layout);\\nC := DATASET([{'File3'}],Layout);\\n\\nOUTPUT(A,,'~temp::file1',THOR,CLUSTER('mythor'));\\nOUTPUT(B,,'~temp::file2',THOR,CLUSTER('mythor'));\\nOUTPUT(C,,'~temp::file3',THOR,CLUSTER('mythor'));\\n
\\n\\nThen, through ECL Watch, I create a superfile called '~temp::file' containing only '~temp::file1' so that Dali registers the superfile as a valid file, which then allows me to publish my Roxie query like this:\\nLayout := RECORD\\n\\tSTRING line; \\nEND;\\n\\nOUTPUT(DATASET('~temp::file',Layout,THOR));\\n
\\n\\nEverything is working fine now and my published query returns 'File1'. Now I add a packagemap to my cluster which reads like this with the info command:\\n<PackageMaps id="test.pkg">\\n <Package id="temp::file">\\n <SuperFile id="temp::file">\\n <SubFile value="temp::file2"/>\\n </SuperFile>\\n </Package>\\n <Package id="temp">\\n <Base id="temp::file"/>\\n </Package>\\n</PackageMaps>
\\n\\nI would now expect my published query to return 'File2'. But it does not.\\n\\nOn another note, the documentation on packagemaps always suggests the use case where more sub files are being added to the superfile. I just want to replace the one subfile I will have in there, such that the original subfile could be deleted or overwritten.\", \"post_time\": \"2013-04-29 02:44:15\" },\n\t{ \"post_id\": 4061, \"topic_id\": 903, \"forum_id\": 8, \"post_subject\": \"Re: Record Layout definition\", \"username\": \"swapna\", \"post_text\": \"Thanks for your input Richard. \\nIt is clear now that, as the data itself is present as string in the file, the data type defined should be string initially. It can latter be converted to appropriate type as needed. \\n\\nSalaray is a packed decimal. String3ToDecimal5 function is to convert the salary to decimal format. Following is the function definition \\n\\n String3ToDecimal5(String3 pinput) :=Function \\n\\n\\t\\tString trimmedStr:=trim(pinput,left);\\n\\n\\t\\tlen:=length(trimmedStr);\\n\\n\\t\\tEBCDIC String ebcdicStr:=choose (\\n\\t\\tlen ,\\n\\t\\t'\\\\000\\\\000'+trimmedStr,\\n\\t\\t'\\\\000'+trimmedStr,\\n\\t\\ttrimmedStr);\\n\\n\\t\\tDATA3 dataVal:=(DATA)ebcdicStr;\\n\\t\\t \\n\\t\\tDecimal5 decimalVal:=(>Decimal5<)dataVal;\\n\\n\\t\\tReturn decimalVal; \\n\\nEnd;\", \"post_time\": \"2013-05-08 03:00:57\" },\n\t{ \"post_id\": 4060, \"topic_id\": 903, \"forum_id\": 8, \"post_subject\": \"Re: Record Layout definition\", \"username\": \"rtaylor\", \"post_text\": \"swapna,\\n\\nThe purpose of the RECORD structure for a dataset is to define what IS -- IOW, the exact format of the data on disk. So given this input, \\nfile content: \\ntest29`a<11111111description
the only field that I see as binary and not text would be the Salary field, which you are converting using a function you did not include - can we see that function, please?\\n\\nYour age and balance fields are numeric values but the data itself is string, so that's how it should be initially defined.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-05-07 14:09:08\" },\n\t{ \"post_id\": 4059, \"topic_id\": 903, \"forum_id\": 8, \"post_subject\": \"Re: Record Layout definition\", \"username\": \"swapna\", \"post_text\": \"Hi, \\n\\nThe integer value that i have in the file is of lenghth 2. Value stored in it is 29. when i define the data type as integer2 the value printed was 14642 instead of 29. \\n\\nAlso if the integer stored in the file is of length 9, we will not be able to have the data type as integer9. Hence defining the datatype as integer while providing the Record layout of file does not work. \\n\\nI have provided two examples \\n1. defining the type as string while reading from the file and converting it to specified data type \\n2. defining the type as integer while redaing from the file it self. \\n\\nThe second one is not giving proper result. please let me know if i can go with the first approach itself. Or let me know if i am missing something in the second approach \\n\\nfile content: \\n test29`a<11111111description\\n\\n\\nApproach 1:\\n//Record definition while reading from the file \\ntestLayoutstr := Record \\nString6 Name;\\nstring2 age; \\nstring3 salary;\\nString8 balance;\\nString11 description;\\nend;\\n//convert to data set \\ntest_DataSet := Dataset('~test::sample',testLayoutstr,THOR);\\n\\n//Record set definition with proper data Type \\ntestLayoutconverted := Record \\nString6 Name;\\ninteger age; \\nUdecimal5 salary;\\ninteger balance;\\nString11 description;\\nend;\\n\\n//Transform function for converting to actual data type \\n\\ntestLayoutconverted convertToActual(testLayoutstr pInput):=Transform\\nself.Name:=pInput.Name;\\nself.age:=(integer)pInput.age;\\nself.salary:=String3ToDecimal5(pInput.salary);\\nself.balance:=(integer)pInput.balance;\\nself.description:=pInput.description\\nend;\\n\\n//converted data set \\nFinal_Converted_DataSet:=Project(test_DataSet,convertToActual(LEFT));\\n\\noutput(Final_Converted_DataSet);\\n\\nOut of this approach 1:\\nname\\tage\\tsalary\\tbalance\\tdescription\\n test\\t29\\t79814\\t11111111\\tdescription\\n\\n\\nApproach 2:\\n\\n//record layout definition\\n\\ntestLayouttype := Record \\nString6 Name;\\ninteger2 age; \\nUdecimal5 salary;\\ninteger8 balance;\\nString11 description;\\nend;\\n\\n//convert to data set \\ntest_DataSet1 := Dataset('~test::sample',testLayouttype,THOR);\\n\\noutput(test_DataSet1); \\n\\nOut of this approach 2:\\n\\nname\\tage\\tsalary\\tbalance\\t description\\n test\\t14642\\t####\\t 3544668469065756977 description\", \"post_time\": \"2013-05-07 11:23:01\" },\n\t{ \"post_id\": 4058, \"topic_id\": 903, \"forum_id\": 8, \"post_subject\": \"Re: Record Layout definition\", \"username\": \"rtaylor\", \"post_text\": \"swapna,I am new to ECL. I am trying to read from a fixed with file which has data of all types(String,Integer,packed decimal, binary etc...). following is the sample of my Record Layout\\n\\nsample_Layout:=Record\\nString5 name;\\nString2age;\\nString5 salary;\\nEnd;\\n\\nwhen i read from a file, i read everything as a string and then convert that to corresponding data type. Is this approach correct. Is there a way to define appropriate data type when reading the file itself. \\n\\nI tried with the following layout, but it failed reading the file stating the record length of the sprayed file does not match the record definition. \\n\\nsample_Layout:=Record\\nString5 name;\\nInteger1 age;\\nUdecimal9 salary; \\nEnd;
The second one doesn't work because the overall record size doesn't match the first structure's size. Try changing from integer1 to integer2 and the sizes should then match up.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-05-06 21:06:24\" },\n\t{ \"post_id\": 4057, \"topic_id\": 903, \"forum_id\": 8, \"post_subject\": \"Record Layout definition\", \"username\": \"swapna\", \"post_text\": \"Hi, \\n\\nI am new to ECL. I am trying to read from a fixed with file which has data of all types(String,Integer,packed decimal, binary etc...). following is the sample of my Record Layout\\n\\nsample_Layout:=Record\\nString5 name;\\nString2age;\\nString5 salary;\\nEnd;\\n\\nwhen i read from a file, i read everything as a string and then convert that to corresponding data type. Is this approach correct. Is there a way to define appropraite data type when reading the file itself. \\n\\nI tried with the following layout, but it failed reading the file stating the record length of the sprayed file does not match the record definition. \\n\\nsample_Layout:=Record\\nString5 name;\\nInteger1 age;\\nUdecimal9 salary; \\nEnd;\", \"post_time\": \"2013-05-06 06:54:35\" },\n\t{ \"post_id\": 4073, \"topic_id\": 905, \"forum_id\": 8, \"post_subject\": \"Re: Automate the Spraying Process\", \"username\": \"JimD\", \"post_text\": \"The ECL Scheduler manual should help. \\n\\nYou can find it here:\\nhttp://hpccsystems.com/download/docs/ecl-scheduler\\n\\nI would use the STD.File.MonitorFile to "listen" for a file's arrival on the landing zone.\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2013-05-10 20:40:05\" },\n\t{ \"post_id\": 4068, \"topic_id\": 905, \"forum_id\": 8, \"post_subject\": \"Automate the Spraying Process\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nI have a scenario where i need to automate the spraying process, to spray the files as and when it comes in the dropzone to the thor cluster.\\n\\nI tried using the following code but the workunit keeps waiting. \\n\\n\\n\\nEXPORT Sample_Config := MODULE \\n\\t\\n\\t\\tEXPORT STRING LandingZoneIP := '192.168.80.129';\\n\\t\\tEXPORT STRING DestinationGroup\\t:= 'mythor';\\n\\t\\tEXPORT STRING LandingZoneRoot := '/var/lib/HPCCSystems/mydropzone';\\n\\t\\tEXPORT STRING Sample_Test := LandingZoneRoot + '/Test';\\n\\t\\tEXPORT STRING Sample_Test_Server1 := Sample_Test + '/Server1';\\n\\t\\tEXPORT STRING Sample_Analysed := Sample_Test + '/Logs_Analysed';\\n\\t\\tEXPORT STRING Sample_Logical_Name := '~thor::sampleTestLog';\\nEND;\\n\\n\\nIMPORT Test_Mod.Sample_Config AS Constants;\\nIMPORT STD;\\nIMPORT STD.File AS fservices;\\n\\nscheduleFileUpload() := FUNCTION\\n\\t//Move the sprayed file to a different directory\\n\\tfMoveRemoteFile(STRING pFileToMove, STRING pSourceDir, STRING pTargetDir) := FUNCTION\\n\\tSTRING\\tlTrimFilename :=\\tTRIM(pFileToMove, LEFT, RIGHT);\\n\\tSTRING\\tlSourceFullPath\\t:=\\tConstants.Sample_Test_Server1 + '/' + lTrimFilename;\\n\\tSTRING\\tlTargetFullPath\\t:=\\tConstants.Sample_Analysed + '/' + lTrimFilename;\\n\\treturn\\tfservices.MoveExternalFile(Constants.LandingZoneIP, lSourceFullPath, lTargetFullPath);\\n\\tEND;\\n\\n\\t//Spray the file\\n\\tfSprayFile(STRING pFileName, BOOLEAN pOverwrite = false) := FUNCTION\\n\\t STRING\\tlFileNameTrim := TRIM(pFileName, left, right);\\n\\t STRING\\tlSourceFullPath\\t:= Constants.Sample_Test_Server1 + '/' + lFileNameTrim;\\n\\t string\\tlTargetThorName\\t:=\\tConstants.DestinationGroup;\\n\\tRETURN fservices.SprayVariable( Constants.LandingZoneIP, lSourceFullPath, , , , , Constants.DestinationGroup,Constants.Sample_Logical_Name , , , , pOverwrite, false, TRUE);\\n\\tEND;\\n\\n\\tfMoveSprayedFile(string pFileName) :=\\tfMoveRemoteFile( pFileName, Constants.Sample_Test_Server1 , Constants.Sample_Analysed );\\n\\n\\t//Spray and Move the files to another folder sequentially\\n\\t fSprayAndMoveFile(STRING pFileToSpray) := FUNCTION \\n STRING lTrimFilename := TRIM(pFileToSpray,left,right);\\n\\t ReturnAction := PARALLEL( fSprayFile(lTrimFilename, true), fMoveSprayedFile(lTrimFileName) );\\n return ReturnAction;\\n\\tEND;\\n\\n\\tdFilesSpraying := fservices.RemoteDirectory(Constants.LandingZoneIP, Constants.Sample_Test_Server1 );\\n\\tret := IF ( EXISTS(dFilesSpraying) , APPLY(dFilesSpraying, fSprayAndMoveFile(name)), OUTPUT('No files to spray'));\\n\\tRETURN ret;\\nEND;\\n\\nscheduleFileUpload() : WHEN ( CRON ( '0-59/5 * * * *' ) ); //SCHEDULE A JOB every 5 minutes\\n\\n
\\n\\nScheduling a job to pick up the files to spray every 5 minutes. \\n\\nI am sure i am missing something really simple How do i use EVENT to trigger this job since it goes in a wait state. where do i specify the event name..? Kindly help regarding the same.\\n\\nIs there a different approach to automate the spraying process in general ? \\n \\n\\nThanks a lot in advance.\\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2013-05-09 15:53:41\" },\n\t{ \"post_id\": 4139, \"topic_id\": 909, \"forum_id\": 8, \"post_subject\": \"Re: Type Structure\", \"username\": \"rtaylor\", \"post_text\": \"swapna,
i am not able to access the tutorial "http://learn.lexisnexis.com/hpcc". Getting "All prerequisites for this activity have not been met." error But it did not mention what prerequisites is missed.
Before you can take Intro to Thor you must first take the Intro to ECL class. It's not a waste of time to do so. It will answer some of the basic questions you have.\\nI did read about PERSIST in the ECL reference manual. I have few queries. \\n\\n1. There is a file, i read from the file and create new dataset out of it.\\nOr \\nThere is a existing dataset and i perform Transform and project to create a new dataset. \\n2. The newly created data Set is used in Five other ecl commands\\n\\nIf i am not using Persist, does it mean that when ever i use the Dataset it will be Rebuilt? And if i use Persist,it will build once and persist in a file, whenever i use the dataset it will return the same dataset unless it is modified?
This is all explained in the online courses (specifically, the differentiation of the terms "dataset" and "record set"). PERSISTing a record set definition creates a record set that may be used in subsequent definitions without re-doing all the work each time. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-05-22 13:13:26\" },\n\t{ \"post_id\": 4136, \"topic_id\": 909, \"forum_id\": 8, \"post_subject\": \"Re: Type Structure\", \"username\": \"swapna\", \"post_text\": \"Richard, \\n\\ni am not able to access the tutorial "http://learn.lexisnexis.com/hpcc". Getting "All prerequisites for this activity have not been met." error But it did not mention what prerequisites is missed. \\n\\nI did read about PERSIST in the ECL reference manual. I have few queries. \\n\\n1. There is a file, i read from the file and create new dataset out of it.\\n Or \\nThere is a existing dataset and i perform Transform and project to create a new dataset. \\n2. The newly created data Set is used in Five other ecl commands\\n\\nIf i am not using Persist, does it mean that when ever i use the Dataset it will be Rebuilt? And if i use Persist,it will build once and persist in a file, whenever i use the dataset it will return the same dataset unless it is modified?\", \"post_time\": \"2013-05-22 06:25:46\" },\n\t{ \"post_id\": 4132, \"topic_id\": 909, \"forum_id\": 8, \"post_subject\": \"Re: Type Structure\", \"username\": \"rtaylor\", \"post_text\": \"Swapna,\\n\\nIt pretty much depends on what your final intention is for the data and how much processing of that data you intend to do in HPCC. Right away, I can see three possibilities here:\\n\\n1) You intend to copy the data in from the mainframe and work with that copy in HPCC, without returning anything to the mainframe.\\n\\n2) You intend to copy the data in from the mainframe, work with that copy in HPCC then return the result to the mainframe.\\n\\n3) You intend to copy the data in from the mainframe, work with that copy in HPCC and return the result to the mainframe, using the result in both.\\n\\nSo, for all these scenarios, I would use your option #2 (slightly modified), to bring in the data, modify the data types and transform the data to its final format. \\n\\nThe slight modification I would make would be to use PERSIST on intermediate steps until I got the data into its final, transformed, "product" format, and only then write a new dataset to disk. This is the technique we teach in our "Introduction to Thor" class, which, if you haven't already taken it, I highly recommend (available free, online, here: http://learn.lexisnexis.com/hpcc).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-05-21 13:35:37\" },\n\t{ \"post_id\": 4130, \"topic_id\": 909, \"forum_id\": 8, \"post_subject\": \"Re: Type Structure\", \"username\": \"Tony Kirk\", \"post_text\": \"Hi, Swapna.\\n\\nWithout exact knowledge of your data, it's difficult to say definitively, but I have seen many mainframe-generated flat files read in ECL with records that include DECIMAL, BIG_ENDIAN INTEGER, and STRING field types. Is it EBCDIC? If so, I would suggest you define as much of your record as those types as expected, look at the ASCII() and EBCDIC() functions, and at least eliminate those that work automatically (or have you already?).\", \"post_time\": \"2013-05-21 13:33:36\" },\n\t{ \"post_id\": 4127, \"topic_id\": 909, \"forum_id\": 8, \"post_subject\": \"Re: Type Structure\", \"username\": \"swapna\", \"post_text\": \"Hi Richard, \\n\\nThanks for the response. My objective is to read from a mainframe generated fixed width file that has all type of data(String,Integer,Packed decimal,decimal etc..). For one another question u have responded that while reading from a file everything has to be read as a string. So i have arrived at the following options, please suggest me on the correct option for reading from a mainframe generated file\\n\\n\\n1. Read all the field data as string. when ever a Math operation or comparison is performed convert to approriate data type and perform the intended operation\\n2. Read all the field as String. Convert all the fields to appropriate data type and save in a dataSet. Use this converted dataset for further processing \\n3. Convert to appropriate data type while reading from the file itself(I used Type Structure for this)\\n\\nI feel the first option is the best. Need your opinion on this. \\nThanks,\\nSwapna.P\", \"post_time\": \"2013-05-21 12:40:55\" },\n\t{ \"post_id\": 4080, \"topic_id\": 909, \"forum_id\": 8, \"post_subject\": \"Re: Type Structure\", \"username\": \"rtaylor\", \"post_text\": \"swapna,\\n
I am trying to create a Type structure which reads a string and converts to integer while processing. It works for String of length 2. But not for String of Length 8.
Not quite correct. In order to work at all correctly, you need to change your code to this (note the addition at the end of the LOAD function):INTEGERTYPE(INTEGER len) := TYPE\\nEXPORT integer LOAD(STRING S) := (integer)S[1..len];\\nEXPORT STRING STORE(integer I) := (string)I;\\nEXPORT INTEGER PHYSICALLENGTH(STRING S) := len;\\nEND;
But even after making this change, it will still only work correctly for the first instance -- IOW, it will work for either the 2-byte OR the 8-byte string, but not both. \\n\\nHowever, your real problem is that you're creating the TYPE structure at all, when the only thing you're doing is type casting from string to integer and back. IOW, you're "OVER-COMPLIFICATING" the problem (yes, I did just create that word ).\\n\\nAll you really need to do is a simple typecast to/from STRING/INTEGER and you only need to do that when you actually want to do some integer-type operations on the data. There's no real reason this data can't stay string for the whole time, unless you're going to do some math on it.\\n\\nSo, the real question here is: what actual "problem" are you trying to solve with this TYPE structure?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-05-13 19:16:58\" },\n\t{ \"post_id\": 4077, \"topic_id\": 909, \"forum_id\": 8, \"post_subject\": \"Type Structure\", \"username\": \"swapna\", \"post_text\": \"Hi, \\n\\nI am trying to create a Type structure which reads a string and converts to integer while processing. It works for String of length 2. But not for String of Length 8. \\n\\nType Structure definition:\\n\\nINTEGERTYPE(INTEGER len) := TYPE\\nEXPORT integer LOAD(STRING S) := (integer)S;\\nEXPORT STRING STORE(integer I) := (string)I;\\nEXPORT INTEGER PHYSICALLENGTH(STRING S) := len;\\nEND; \\n\\nFile Layout:\\n===========\\n\\nABCDEF2934512345678description\\nEFGHIJ3056734567890description\\n\\nRecord Definition:\\n==================\\ntestLayoutstr1 := Record \\nString6 Name;\\nINTEGERTYPE(2) age; \\nstring3 salary;\\nINTEGERTYPE(8) balance;\\nString11 description;\\nstring2 dummy;\\nend;\\n\\nData Set Definition\\n===================\\ntest_DataSet2 := Dataset('~test::sample',testLayoutstr1,THOR);\\n\\noutput(test_DataSet2);\\n\\nThis is not yielding proper result. It gives the following instead of priniting the dataset\\n\\n(#1)\\n(#2)\\n\\nIt works when the Type Structure is used only for String of length 2. Example:\\n\\ntestLayoutstr1 := Record \\nString6 Name;\\nINTEGERTYPE(2) age; \\nstring3 salary;\\nString8 balance;\\nString11 description;\\nstring2 dummy;\\nend;\\n\\ntest_DataSet2 := Dataset('~test::sample',testLayoutstr1,THOR);\\noutput(test_DataSet2); //This prints the data Set properly.\", \"post_time\": \"2013-05-13 09:23:22\" },\n\t{ \"post_id\": 4159, \"topic_id\": 915, \"forum_id\": 8, \"post_subject\": \"Re: Std lib CmdProcess does not work (in 3.10)\", \"username\": \"sort\", \"post_text\": \"There is an issue with this in 3.10.6 which can cause the lockup. Please upgrade to 3.10.8. (https://track.hpccsystems.com/browse/HPCC-9226)\\n\\nThere is an issue in the documentation as well.\\nIMPORT STD;\\noutput(STD.System.Util.CmdProcess('echo','hello world'));\\n\\n\\nshould be \\nIMPORT STD;\\noutput(STD.System.Util.CmdProcess('cat','hello world'));\\n\\n(echo does not read from stdin)\", \"post_time\": \"2013-05-29 21:16:45\" },\n\t{ \"post_id\": 4158, \"topic_id\": 915, \"forum_id\": 8, \"post_subject\": \"Re: Std lib CmdProcess does not work (in 3.10)\", \"username\": \"sort\", \"post_text\": \"Development will look into this\", \"post_time\": \"2013-05-29 20:45:49\" },\n\t{ \"post_id\": 4115, \"topic_id\": 915, \"forum_id\": 8, \"post_subject\": \"Std lib CmdProcess does not work (in 3.10)\", \"username\": \"tmurphy\", \"post_text\": \"We upgraded to community_3.10.6-1 and the example code in the ECL Reference still does not work. The problem now is the job runs but never ends (or outputs anything). Here is the code: \\n\\nIMPORT STD;\\noutput(STD.System.Util.CmdProcess('echo','hello world'));\", \"post_time\": \"2013-05-17 19:48:11\" },\n\t{ \"post_id\": 4149, \"topic_id\": 918, \"forum_id\": 8, \"post_subject\": \"Re: join and denormalize\", \"username\": \"cjohn\", \"post_text\": \"Thank You Richard.
\", \"post_time\": \"2013-05-27 17:17:06\" },\n\t{ \"post_id\": 4135, \"topic_id\": 918, \"forum_id\": 8, \"post_subject\": \"Re: join and denormalize\", \"username\": \"rtaylor\", \"post_text\": \"cjohn,
1. What is the difference between join and denormalize?
\\nJOIN allows you find matching records in two datasets and call a TRANSFORM function to produce the result you want from those matching records.\\n\\nDENORMALIZE is designed to match Parent and Child records for the purpose of producing a "nested child dataset" where all the related child records are contained within the same file and same physical record as their parent. This makes DENORMALIZE a highly-specialized form of join.\\n2. Which is better join or denormalize?
Which is better, Red or Blue -- it depends on whether you're painting a seascape or a fire truck, doesn't it? When you want to produce a nested child dataset, then DENORMALIZE is usually the tool to use. For all other dataset-matching purposes, JOIN is probably going to be the better choice.\\n3. how to decide whether to use join or denormalize?
Pretty much comes down to what you want to produce.\\nI heard people saying the joins are expensive, they take much time. Does that mean denormalize is better than join?
A JOIN (or a DENORMALIZE) can take time, depending on the size of the datasets (and your cluster) and the complexity of your join condition. But JOIN is the tool that does all the "heavy lifting" for record matching -- which is why you'll find it has a large number of options available to customize its operation.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-05-21 21:12:23\" },\n\t{ \"post_id\": 4133, \"topic_id\": 918, \"forum_id\": 8, \"post_subject\": \"join and denormalize\", \"username\": \"cjohn\", \"post_text\": \"Hi,\\nThis question may sound basic but I'm not clear about these things.\\n\\n1. What is the difference between join and denormalize?\\n2. Which is better join or denormalize?\\n3. how to decide whether to use join or denormalize?\\n\\nI heard people saying the joins are expensive, they take much time. Does that mean denormalize is better than join?\\n\\nThank you.\", \"post_time\": \"2013-05-21 17:28:57\" },\n\t{ \"post_id\": 4160, \"topic_id\": 922, \"forum_id\": 8, \"post_subject\": \"Re: Read Mainframe Binary Format\", \"username\": \"swapna\", \"post_text\": \"Thanks a lot Richard. This helps\", \"post_time\": \"2013-05-30 03:08:15\" },\n\t{ \"post_id\": 4155, \"topic_id\": 922, \"forum_id\": 8, \"post_subject\": \"Re: Read Mainframe Binary Format\", \"username\": \"rtaylor\", \"post_text\": \"swapna,\\n\\nYour COMP field should simply be defined as a BIG_ENDIAN INTEGER4 data type in ECL.\\n\\nThen you just need to do a simple type cast to an INTEGER4 to get it into Intel's LITTLE_ENDIAN format so you can work with it in HPCC.\\n\\nHere's an example://define a field with the right value\\nBIG_ENDIAN INTEGER4 X := 1004035;\\n\\n//display the Hex content of that field\\n(>DATA4<)X; //000F5203\\n\\n//cast it for use in ECL\\n(INTEGER4)X; //1004035
I defined X as a BIG_ENDIAN INTEGER4 then the first result (the type transfer to DATA4) will show you the hex value of that and the second result (the type cast to INTEGER4) shows you its decimal value.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-05-29 18:40:13\" },\n\t{ \"post_id\": 4146, \"topic_id\": 922, \"forum_id\": 8, \"post_subject\": \"Read Mainframe Binary Format\", \"username\": \"swapna\", \"post_text\": \"Hi, \\n\\nI am reading from a mainframe generated file which contains data in binary format(Mainframe data Type- COMP ). Is there any built in function available to convert this to integer format? \\n\\n EBCDIC string ebcdicStr:= fileDataSet[1].data; \\n \\n DATA4 dataVal:=(DATA4)ebcdicStr;\\n\\nHex value received is 000F5203.This should be Converted to 1004035. \\n\\nConversion logic to be used:\\nHex value : 000F5203\\nBinary Equivalent : 00000000 00001111 01010010 00000011 \\nDecimal/integer Equivalent : 1004035\", \"post_time\": \"2013-05-27 05:31:20\" },\n\t{ \"post_id\": 4154, \"topic_id\": 923, \"forum_id\": 8, \"post_subject\": \"Re: New answer to "What is the technical advantage of LexisN\", \"username\": \"rtaylor\", \"post_text\": \"This reply was posted today in the Quora thread:\\nhttp://www.quora.com/What-is-the-technical-advantage-of-LexisNexis-HPCC-over-Hadoop\\n*******************************************************************\\n@Anonymous -- Let me address a few specific items from your post:\\n Maintenance is a time suck. No unit testing, no compile time checking on interfaces. Oh yeah, there are no interfaces. You have a choice between a macro, module, or a function. Thats it.
INTERFACE is documented here (http://hpccsystems.com/community/docs/ecl-language-reference/html/interface-structure) in the Language Reference, and there are also Programmer's Guide articles that discuss its use. These docs both can be downloaded here: http://hpccsystems.com/download/docs/learning-ecl You can only use two languages. Either ECL (stab me in the eye), or embedded C in ECL.
True today. But will be no longer true when version 4.0 is released (within the next month or so) with support for embedding other languages, starting with Java, Javascript, Python, and R.It looses its "declarative" nature when you have to describe each minute step to execute. For example: I want to dedup a result set:\\n uniquedata := dedup(mydata, somekey).\\n\\n This doesnt work. You have to do this:\\n sortedData := sort(mydata, someKey)\\n distData := distribute(sortedData, someKey)\\n uniqueData: dedup(distData,somekey)\\n\\n 1: Why did I have to tell it to sort the data? Doesnt dedup implicitly declare that I want that to happen?\\n 2: Why did I have to distribute the data? WTF?! Its a damn super computer! Yeah, I want the data distributed. \\n 3: Oh now I can actually dedup the data! Thanks LexisNexis. That so "declarative".
This DEDUP (http://hpccsystems.com/community/docs/ecl-language-reference/html/dedup) could simply be coded as:\\n uniquedata := DEDUP(mydata, somekey,ALL);
The ALL option on DEDUP specifies that all possible commutative record pairs are checked for duplicity, instead of checking only contiguous pairs. This can be expensive on large datasets, which is why SORT is more commonly used in conjunction with DEDUP instead of the ALL option. \\n\\nAlso, in this DEDUP code example the DISTRIBUTE is unnecessary. It should simply be written as: sortedData := SORT(mydata, someKey);\\n uniqueData := DEDUP(sortedData,somekey);
\\n\\nThe only purpose of defining the SORT separately here is that it may be useful elsewhere. If that is not the case, then you could just write it like this: uniqueData := DEDUP(SORT(mydata, someKey),somekey);
And to properly use DISTRIBUTE in this example, the LOCAL option would need to be present on subsequent operations, like this:\\n distData := DISTRIBUTE(myData, HASH(someKey));\\n sortedData := SORT(distdata, someKey, LOCAL);\\n uniqueData := DEDUP(sortedData,somekey,LOCAL);
In an HPCC cluster the data is already distributed across all the nodes of the cluster, so the DISTRIBUTE function is actually used to RE-distribute the data so that all records with the same HASH values end up on the same node, thus enabling local operation on each node instead of global operation across all the nodes. In some cases this could provide a performance benefit by eliminating global crosstalk amongst the nodes.\\n \\nAll these possible ways of accomplishing the same task in ECL allows greater programmer control over exactly how the DEDUP is accomplished.\\nIts expensive. Yes there is a free version, but you will run into so many problems, that you will be forced to buy a license and ask LexisNexis to polish their turd.
The Community Edition is free and fully supported in the on-line forums (http://hpccsystems.com/bb/). For example, a Fortune 500 company recently developed their new HPCC system and put it into production, entirely using Community Edition -- by just downloading and reading the docs, and posting questions in the Forum.Their integrated IDE is crap. I wont even get into this, because most developers like to use the IDE of their choice (eclipse, VIM, emacs, etc...)
The Beta version of the "ECL Plugin for Eclipse" is available for download here: http://hpccsystems.com/products-and-services/products/plugins/eclipse-idePerformance is sketchy. I've ported ECL code into hadoop. Hadoop outperformed the original ECL code with 20% less hardware. I'm not saying Hadoop is faster, I'm just saying that performance lies mostly in the developer (in this case)
\\nAs with every language/platform, the better you know the tool the more likely you are to write optimal code for it. Creating a true apples-to-apples comparison of the two systems would require an equal expertise with both.\\n\\nFor example, in October, 2011, SGI announced their results for a "terasort" type of benchmark: http://www.sgi.com/company_info/newsroom/press_releases/2011/october/hadoop.html\\nSo in November, 2011, HPCC Systems duplicated their "terasort" effort on the HPCC platform (same algorithms, same size data, same hardware configuration, and SGI probably knows Hadoop equally as well as HPCC Systems knows HPCC). Here is the result comparison:\\n\\nSGI Performance: 130 seconds\\nHPCC Performance: 98 seconds\\n\\nSGI code: 700 lines of Java\\nHPCC code: 3 lines of ECL \\n\\nSGI cluster size: 20 nodes\\nHPCC cluster size: 4 nodes \\nNo support. Look at their forums. They are empty. Almost no activity at all.
I cannot answer a question that has not been asked. Read some previous postings and you will find that the responses to Forum postings have always been timely and comprehensive.No books. Search on Amazon. Not one damn book on ECL!!
Obviously an opportunity that is awaiting the right author. Should I be approached for assistance by any author taking on an HPCC book project, I would be more than glad to help. \\n\\nHowever, there are FREE online introductory courses available to get folks started coding with ECL at: http://learn.lexisnexis.com/hpcc\\n\\nThere are also competitively-priced online, remote (WebEx), and on-site training classes that go well beyond the basics: http://hpccsystems.com/community/training-events/training\", \"post_time\": \"2013-05-29 13:57:57\" },\n\t{ \"post_id\": 4148, \"topic_id\": 923, \"forum_id\": 8, \"post_subject\": \"Re: New answer to "What is the technical advantage of LexisN\", \"username\": \"maruyue\", \"post_text\": \"Could you (HPCC dev) give some comments for the post?\", \"post_time\": \"2013-05-27 06:18:14\" },\n\t{ \"post_id\": 4147, \"topic_id\": 923, \"forum_id\": 8, \"post_subject\": \"New answer to "What is the technical advantage of LexisNexis\", \"username\": \"maruyue\", \"post_text\": \"http://www.quora.com/What-is-the-techni ... ver-Hadoop.\\n\\n-----------------\\n\\nI use both hadoop and hpcc everyday. Hpcc sucks. Its horrible.\\n- Crashes regularly. Since it is a message based architecture, if one node goes down, the whole cluster goes down.\\n- ECL is cryptic. Its a bastardized version of SQL. Lots of syntax that makes no sense at all, or is not-necessary. Completely absent of any modern software practices like unit testing.\\n- You can only use two languages. Either ECL (stab me in the eye), or embedded C in ECL.\\n- Bugs in ECL. Not a day goes by that one of dont find some obscure bug in ECL. So much wasted time troubleshooting this POS.\\n- Maintenance is a time suck. No unit testing, no compile time checking on interfaces. Oh yeah, there are no interfaces. You have a choice between a macro, module, or a function. Thats it.\\n- It looses its "declarative" nature when you have to describe each minute step to execute. For example: I want to dedup a result set:\\n uniquedata := dedup(mydata, somekey).\\n\\n This doesnt work. You have to do this:\\n sortedData := sort(mydata, someKey)\\n distData := distribute(sortedData, someKey)\\n uniqueData: dedup(distData,somekey)\\n\\n 1: Why did I have to tell it to sort the data? Doesnt dedup implicitly declare that I want that to happen?\\n 2: Why did I have to distribute the data? WTF?! Its a damn super computer! Yeah, I want the data distributed.\\n 3: Oh now I can actually dedup the data! Thanks LexisNexis. That so "declarative".\\n \\n- Its expensive. Yes there is a free version, but you will run into so many problems, that you will be forced to buy a license and ask LexisNexis to polish their turd.\\n- Their integrated IDE is crap. I wont even get into this, because most developers like to use the IDE of their choice (eclipse, VIM, emacs, etc...)\\n- Performance is sketchy. I've ported ECL code into hadoop. Hadoop outperformed the original ECL code with 20% less hardware. I'm not saying Hadoop is faster, I'm just saying that performance lies mostly in the developer (in this case)\\n- No support. Look at their forums. They are empty. Almost no activity at all.\\n- Corporate support. Look at their Jira system. Its a ghost town. There are 10 developers listed. 10!! Your corporate super computer will be dependent on 10 devs!\\n- No books. Search on Amazon. Not one damn book on ECL!!\\n- No jobs. Search indeed. I think there are 102, world wide. Holy Crap! 102! Hadoop, 6,889.\\n- No integration. No one is coding new technology against HPCC. All the new compelling work is occurring on Hadoop.\\n\\nWhy is hadoop awesome?\\n- Its robust. I lost a whole rack once. The jobs kept running, no data lost. Hell yeah, its production ready.\\n- Integration. Tons of third party tools that work in Hadoop\\n- Cost. FREE FREE FREE!!!\\n- Labor. More and more developers are learning Hadoop.\\n- Support. Tons of blogs, websites, books, conferences.\\n- Diversity. You can run distributed jobs in any language you like. This is a life saver for legacy code.\\n- Flexibility If you dont like one of the components in the ecosystem, its likely you can find a replacement.\\n- Corporate support. All the major players are contributing to Hadoop. Look at the "powered by" page, and the jira system.\\n\\n\\nDont buy into the LexisNexis propaganda. They want you too believe its free. Its not. You will soon be on the phone with them begging for support. Hadoop is solid. Ask yourself, where do you want to spend your dollars. LexisNexis licenses, or development and hardware (more nodes). For the price of a lexisnexis license, we could have constructed a massive hadoop cluster. But no...we have to make sure LexisNexis gets their chunk of change to fix their bugs!\\n\\n\\nTo see the question with all answers, visit:\\nhttp://www.quora.com/What-is-the-techni ... rs/2531638\", \"post_time\": \"2013-05-27 06:16:47\" },\n\t{ \"post_id\": 4197, \"topic_id\": 924, \"forum_id\": 8, \"post_subject\": \"Re: Abstract and concrete modules\", \"username\": \"rtaylor\", \"post_text\": \"John,\\n\\n1. "Mod1" is declared as VIRTUAL so is it abstract module? am i right?
Yes.\\n2. "Mod2" inherits "Mod1" so does this inheritance makes "Mod2" as abstract one or not?
As its comment indicates, Mod2 is a concrete instance.\\nCODE: SELECT ALL\\nOUTPUT(Mod2.func(5)); //result is 15\\nThis gives me 15 as result. Because it is overriding inherited value val=1 (of Mod1). if i want the Mod1's value of Val attribute, what should i do?
Change Mod2 to Mod1, as in this code://VIRTUAL examples\\nMod1 := MODULE,VIRTUAL //an abstract module\\n EXPORT val := 1;\\n EXPORT func(INTEGER sc) := val * sc;\\nEND;\\n \\nMod2 := MODULE(Mod1) //a concete instance\\n EXPORT val := 3; //override inherited default value\\nEND;\\n \\nMod3 := MODULE(Mod1) //a concete instance\\n EXPORT func(INTEGER sc) := val + sc; //override inherited func\\nEND;\\nOUTPUT(Mod1.func(5)); //result is 5\\nOUTPUT(Mod2.func(5)); //result is 15\\nOUTPUT(Mod3.func(5)); //result is 6
3. and final one what is"pure member attribute" if possible with an example?
You'll note that this sentence no longer exists in the 4.0 docs so it's no longer relevant to discuss.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-06-12 14:55:13\" },\n\t{ \"post_id\": 4196, \"topic_id\": 924, \"forum_id\": 8, \"post_subject\": \"Re: Abstract and concrete modules\", \"username\": \"cjohn\", \"post_text\": \"Richard, \\nI'll check 4.0 release docs.\\n \\nPlease help me in understanding this example.\\n\\n/VIRTUAL examples\\nMod1 := MODULE,VIRTUAL //an abstract module\\n EXPORT val := 1;\\n EXPORT func(INTEGER sc) := val * sc;\\nEND;\\n \\nMod2 := MODULE(Mod1) //a concete instance\\n EXPORT val := 3; //override inherited default value\\nEND\\n \\nMod3 := MODULE(Mod1) //a concete instance\\n EXPORT func(INTEGER sc) := val + sc; //override inherited func\\nEND\\nOUTPUT(Mod2.func(5)); //result is 15\\nOUTPUT(Mod3.func(5)); //result is 6
\\n\\n1. "Mod1" is declared as VIRTUAL so is it abstract module? am i right?\\n\\n2. "Mod2" inherits "Mod1" so does this inheritance makes "Mod2" as abstract one or not?\\n\\nOUTPUT(Mod2.func(5)); //result is 15
This gives me 15 as result. Because it is overriding inherited value val=1 (of Mod1). if i want the Mod1's value of Val attribute, what should i do?\\n\\n3. and final one what is"pure member attribute" if possible with an example?\\n\\nJohn\", \"post_time\": \"2013-06-12 14:29:13\" },\n\t{ \"post_id\": 4195, \"topic_id\": 924, \"forum_id\": 8, \"post_subject\": \"Re: Abstract and concrete modules\", \"username\": \"rtaylor\", \"post_text\": \"John,\\n\\nThe MODULE doc has been re-written for the 4.0 release. Pre-4.0 the entire MODULE was either abstract (VIRTUAL) or not. In 4.0 that will change so that individuals members can be either abstract (VIRTUAL) or not.\\n\\nYou can get the 4.0 doc version right now by downloading the latest 4.0 release candidate from GIT (I think the doc updates are in a ZIP file).\\nThese questions may sound silly but i think, these things should be clear before moving on to other stuff?
There are no silly questions, but the concept of abstract MODULES is fairly arcane to begin with, so I would suggest looking at the 4.0 docs first, but go ahead and move on since most abstract modules are defined as INTERFACEs.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-06-12 14:04:10\" },\n\t{ \"post_id\": 4194, \"topic_id\": 924, \"forum_id\": 8, \"post_subject\": \"Re: Abstract and concrete modules\", \"username\": \"cjohn\", \"post_text\": \"Hi Richard,\\n I know why interface and module are used. But \\n1. I'm not getting difference between abstract and concrete module? what is the role of Virtual in that example?\\nAn abstract MODULE is one that contains at least one pure member attribute (an attribute with no value definition).
\\n2. what is the meaning of above statement? what is "pure member attribute"?\\n\\nThese questions may sound silly but i think, these things should be clear before moving on to other stuff? \\n\\nJohn\", \"post_time\": \"2013-06-12 13:50:38\" },\n\t{ \"post_id\": 4156, \"topic_id\": 924, \"forum_id\": 8, \"post_subject\": \"Re: Abstract and concrete modules\", \"username\": \"rtaylor\", \"post_text\": \"cjohn,\\n\\nYou might want to take a look at the "Query Libraries" article in the Programmer's Guide (press F1 in the IDE and you'll find the Programmer's Guide is in the Help file as well as available for download as a PDF). That article discusses the INTERFACE and MODULE structures in depth.\\n\\nAny questions after that, please ask away!\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-05-29 18:50:32\" },\n\t{ \"post_id\": 4150, \"topic_id\": 924, \"forum_id\": 8, \"post_subject\": \"Abstract and concrete modules\", \"username\": \"cjohn\", \"post_text\": \"Hi,\\nI'm not getting clear picture of what are abstract and concrete modules, and i got confused with examples in the documentation. Can anyone tell me with simple example?\\nand also i didn't get the below line\\nAn abstract MODULE is one that contains at least one pure member attribute (an attribute with no value definition).
Specially the bold part?\\n\\n//VIRTUAL examples\\nMod1 := MODULE,VIRTUAL //an abstract module\\n EXPORT val := 1;\\n EXPORT func(INTEGER sc) := val * sc;\\nEND;\\n \\nMod2 := MODULE(Mod1) //a concete instance\\n EXPORT val := 3; //override inherited default value\\nEND\\n \\nMod3 := MODULE(Mod1) //a concete instance\\n EXPORT func(INTEGER sc) := val + sc; //override inherited func\\nEND\\nOUTPUT(Mod2.func(5)); //result is 15\\nOUTPUT(Mod3.func(5)); //result is 6
\\n\\nWhat is the use of VIRTUAL option here?\", \"post_time\": \"2013-05-27 17:24:58\" },\n\t{ \"post_id\": 4201, \"topic_id\": 928, \"forum_id\": 8, \"post_subject\": \"Re: Keyed and Wild\", \"username\": \"ghalliday\", \"post_text\": \"The indexes are implemented using a btree representation.\\n\\nRecords can be found by either\\n1) Using the btree structure to narrow down which records match or\\n\\n2) Walking a part of the btree, and testing the matching rows.\\n\\nIn general (1) is faster - and it corresponds to the KEYED portion of a filter. However it is sometimes more efficient to do (2) for part of the filter - post filter the records being walked in the btree.\\n\\nThe example was based on a hypothetical index\\n\\ni := INDEX({ integer a, integer b, integer c }, 'myindex');\", \"post_time\": \"2013-06-13 10:17:51\" },\n\t{ \"post_id\": 4200, \"topic_id\": 928, \"forum_id\": 8, \"post_subject\": \"Re: Keyed and Wild\", \"username\": \"chhaya\", \"post_text\": \"hi \\nI didn't understand meaning of this sentence\\n\\nit can be filtered as the index structures are walked, or as a post filter once the rows have been matched\\n\\nwhat does these two sentences mean? \\n\\nand i didn't get the example too can you please elaborate it more.\\n\\nThanks\", \"post_time\": \"2013-06-13 10:01:57\" },\n\t{ \"post_id\": 4175, \"topic_id\": 928, \"forum_id\": 8, \"post_subject\": \"Re: Keyed and Wild\", \"username\": \"ghalliday\", \"post_text\": \"There are two ways for an index to be filtered (within the implementation). It can be filtered as the index structures are walked, or as a post filter once the rows have been matched. The KEYED/WILD keywords are there to provide control over which of these is used.\\n\\nIf KEYED or WILD is used within a filter on an index, then only the conditions within those KEYED expressions will be done directly as the index is walked. Any other conditions will be done as post filters.\\n\\nIf there are no KEYED or WILD expressions in the filter then the code generator does its best to make as many conditions as it can KEYED. Most of the time it gets it correct, but there are some situations where this may not be optimal. The KEYED constructs allow you to override that behaviour.\\n\\n(One potential situations is where you have an index that has keyed fields a,b,c. There is a filter on a and c, but not on b. It probably depends on the cardinality of the fields, and the filter being applied to c whether\\n\\nKEYED(a),WILD(b),KEYED(c)\\nor\\nKEYED(a),c\\n\\nis the best way to evaluate it. The KEYED/WILD keywords provide the control you need. \\n\\nP.S. I suspect the number of times it makes a significant difference these days is fairly small.\", \"post_time\": \"2013-06-06 16:46:53\" },\n\t{ \"post_id\": 4174, \"topic_id\": 928, \"forum_id\": 8, \"post_subject\": \"Keyed and Wild\", \"username\": \"chhaya\", \"post_text\": \"hi,\\n\\nwhat is the difference in filtering index using with/without keyed option\\n\\nThanks\", \"post_time\": \"2013-06-06 13:32:02\" },\n\t{ \"post_id\": 4193, \"topic_id\": 930, \"forum_id\": 8, \"post_subject\": \"Re: Alien datatypes\", \"username\": \"rtaylor\", \"post_text\": \"Pstring := TYPE\\n EXPORT INTEGER PHYSICALLENGTH(STRING X) := TRANSFER(X[1],UNSIGNED1)+1;\\n EXPORT STRING LOAD(STRING X) := X[2..TRANSFER(X[1],UNSIGNED1)+1];\\n EXPORT STRING STORE(STRING X) := TRANSFER(LENGTH(X),STRING1)+X;\\nEND;\\n\\nR := RECORD\\n STRING A;\\nEND;\\n\\n\\nD := DATASET([{'abc'},{'83'},{'RICHARD'}],R);\\nFilename := '~RTTEST::OUT::PstringTest';\\nAct1 := OUTPUT(D,{Pstring Pdat := D.A},Filename,OVERWRITE);\\n\\nPRec := RECORD\\n Pstring Pdat;\\nEND;\\nds := DATASET(Filename,Prec,THOR);\\nAct2 := OUTPUT(ds);\\n\\nSEQUENTIAL(Act1,Act2);
This code will write a file to disk with Pstring data in it, then read it. \\n\\nNote that result 1 (the write to disk action) looks like this:\\npdat __fileposition__\\nabc 0\\n83 4\\nRICHARD 7
The first record contains 3 characters, but occupies 4 bytes of storage, the second record contains 2 characters, but occupies 3 bytes of storage... This demonstrates that the data was written to disk with the leading length byte prepended to the data.\\n\\nBut the second result (the read from disk action) just shows the data and not the leading length byte.\\n \\nHTH,\\n\\nRichard\", \"post_time\": \"2013-06-12 13:33:49\" },\n\t{ \"post_id\": 4191, \"topic_id\": 930, \"forum_id\": 8, \"post_subject\": \"Re: Alien datatypes\", \"username\": \"chhaya\", \"post_text\": \"hi,\\n\\nCan you give some more examples?\", \"post_time\": \"2013-06-12 03:17:18\" },\n\t{ \"post_id\": 4183, \"topic_id\": 930, \"forum_id\": 8, \"post_subject\": \"Re: Alien datatypes\", \"username\": \"rtaylor\", \"post_text\": \"The example is defining the TYPE structure for a Pascal string type. The LOAD function is the callback function the system will use when it wants to read the data from disk into memory. The STORE function is the callback function the system will use to write the data back to disk in the proper format. The PHYSICALLENGTH function is the callback function the system will use when it needs to know how many bytes of storage the field will occupy in the disk file.\\n\\nTo use it, you simply define your pascal string field in your RECORD structure for the DATASET containing the pascal string as a PSTRING.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-06-11 12:58:25\" },\n\t{ \"post_id\": 4182, \"topic_id\": 930, \"forum_id\": 8, \"post_subject\": \"Re: Alien datatypes\", \"username\": \"chhaya\", \"post_text\": \"hi Richard,\\n\\nUnderstood.Thanks for the details.But i didn't get what this example is exactly doing can you please elaborate it more? and is there any other example which can help me in understanding?\", \"post_time\": \"2013-06-11 05:37:28\" },\n\t{ \"post_id\": 4179, \"topic_id\": 930, \"forum_id\": 8, \"post_subject\": \"Re: Alien datatypes\", \"username\": \"rtaylor\", \"post_text\": \"chhaya,\\n\\nWe have many built-in data types in ECL. When you receive a new file from someone and some portion of the data it contains is in a form that is not directly supported by our built-in data types, you have two choices:\\n\\n1) Define the field as a STRING (or DATA) type then write some ECL code to extract the meaning out of the bitmaps yourself.\\n\\n2) Create a TYPE structure that defines for the system (at a minimum) how to LOAD the data from the file into memory and how to STORE the data back on disk.\\n\\nAn example is a Pascal string -- up to 255 characters in a variable-length format with a leading length byte indicating the number of actual characters following. ECL does not have a "PSTRING" data type, but you can construct one with the TYPE structure, like this:Pstring := TYPE\\n EXPORT INTEGER PHYSICALLENGTH(STRING x) := transfer(x[1],UNSIGNED1)+1;\\n EXPORT STRING LOAD(STRING x) := x[2 .. TRANSFER(x[1],UNSIGNED1)+1];\\n EXPORT STRING STORE(STRING x) := TRANSFER(LENGTH(x),STRING1)+x;\\nEND;
\\nBottom line -- the TYPE structure is rarely ever needed, since we have almost all of the most common data types already built-in.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-06-10 13:20:58\" },\n\t{ \"post_id\": 4178, \"topic_id\": 930, \"forum_id\": 8, \"post_subject\": \"Alien datatypes\", \"username\": \"chhaya\", \"post_text\": \"Hi,\\n\\nI want to know what is alien datatype and its real time uses? in which scenarios it will be used?its not clear in documentation.\\n\\nThanks\", \"post_time\": \"2013-06-10 11:40:39\" },\n\t{ \"post_id\": 4184, \"topic_id\": 931, \"forum_id\": 8, \"post_subject\": \"Re: ClusterSize command.\", \"username\": \"rtaylor\", \"post_text\": \"This option does not actually change the physical number of nodes in the cluster, just the value of CLUSTERSIZE, so it should have no impact on performance and could have a negative impact if your ECL code is using CLUSTERSIZE and expecting it to represent the actual number of nodes.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-06-11 14:09:08\" },\n\t{ \"post_id\": 4181, \"topic_id\": 931, \"forum_id\": 8, \"post_subject\": \"ClusterSize command.\", \"username\": \"Rahul Jain\", \"post_text\": \"Hi I tried to execute the command :-\\n\\n#option('clusterSize','2');\\noutput(clustersize);\\n\\non Hthor. It converts the target cluster size to 2 from the old size which was 1.\\nBelow are my questions:-\\n1. Can HTHOR have cluster size greater than 1.( As per my knowlege it can be only 1)\\n2. Increasing the cluster size like above will increase the execution speed or it will cause some negative imapact. (Ideally more the no. of nodes more the execution speed but will it be same for HTHOR?)\\n\\nThanks,\\nRahul Jain\", \"post_time\": \"2013-06-10 21:00:15\" },\n\t{ \"post_id\": 4216, \"topic_id\": 936, \"forum_id\": 8, \"post_subject\": \"Re: Automate the file write process\", \"username\": \"bforeman\", \"post_text\": \"I am using PARALLEL command, as my module has multiple output statements.
\\n\\n\\nOK thanks, I believe that you do not need to use PARALLEL, as that is implicit operation for multiple outputs. \\n\\nWill pass on the errors to development team.\\n\\nJust to verify, does your function work correctly when used outside of APPLY?\\n\\nFor example:\\nExtractLogic('yourFilename');
\\n\\nMaybe APPLY is not what you are looking for. How about this?\\n\\nresult := If(EXISTS(lfiles), EVENT('fileexists',*), 'No files present.');\\nExtractLogic(lfiles.name) : WHEN('fileexists'); \\n
\\n\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-06-18 12:10:16\" },\n\t{ \"post_id\": 4214, \"topic_id\": 936, \"forum_id\": 8, \"post_subject\": \"Re: Automate the file write process\", \"username\": \"RSP\", \"post_text\": \"While executing APPLY command I get the follwoing error:\\nError: System error: -1: Failed to receive reply from thor
\\n\\nI tried using NOTHOR around APPLY command. Then I get the error:\\nWarning: (0,0): error C4818: INTERNAL: Expected a parent/container context. Likely to be caused by executing something invalid inside a NOTHOR. (0, 0), 0,
\\n\\nI am using PARALLEL command, as my module has multiple output statements.\\n\\nRegards,\\nRSP.\", \"post_time\": \"2013-06-18 05:59:58\" },\n\t{ \"post_id\": 4211, \"topic_id\": 936, \"forum_id\": 8, \"post_subject\": \"Re: Automate the file write process\", \"username\": \"bforeman\", \"post_text\": \"It looks like what you are doing is sound, but when you say that APPLY is not working, what errors are you receiving? And why are you using PARALLEL? Are you mixing it with a sequential operation?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-06-17 13:40:39\" },\n\t{ \"post_id\": 4206, \"topic_id\": 936, \"forum_id\": 8, \"post_subject\": \"Automate the file write process\", \"username\": \"RSP\", \"post_text\": \"Hi,\\nI want to automate and schedule a module that writes the output to an csv file. \\n\\nExtractLogic(String filename) := Function \\n\\tCE := $.MyModule(filename);\\n\\tret := CE.cbl_action;\\n\\treturn ret;\\nEnd;\\n\\n//lfiles is a dataset with filesnames in the column 'name'\\nresult := If(Exists(lfiles), APPLY(lFiles, ExtractLogic(name)), Output('No files present.'));\\nresult;\\n
\\n\\nThe function ExtractLogic(), mentioned above calls 'MyModule' where multiple OUTPUT actions are put in a Parallel command and assigned to cbl_action.\\n\\nNow I want to execute this ExtractLogic() function for every filename in dataset lFiles.\\nUsing APPLY as in the above example is not working in thor / hthor. \\nWHEN does does not seem to work either. \\nCan a function returning an action attribute be called in any other way?\", \"post_time\": \"2013-06-14 10:27:27\" },\n\t{ \"post_id\": 4212, \"topic_id\": 937, \"forum_id\": 8, \"post_subject\": \"Re: Joining multiple datasets\", \"username\": \"bforeman\", \"post_text\": \"Have you tried simply using successive JOINs? JOIN A to B and then use that result to JOIN to C. All datasets can be different in a JOIN; LEFT, RIGHT, and the JOIN Output.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-06-17 13:43:39\" },\n\t{ \"post_id\": 4210, \"topic_id\": 937, \"forum_id\": 8, \"post_subject\": \"Joining multiple datasets\", \"username\": \"PManickam\", \"post_text\": \"Hi,\\n\\nHow to join more than 2 datasets which are not identical in ecl? \\nwhen i try to create a set of datasets it says datasets should be identical. \\n\\n\\nThanks,\", \"post_time\": \"2013-06-17 12:41:39\" },\n\t{ \"post_id\": 4222, \"topic_id\": 938, \"forum_id\": 8, \"post_subject\": \"Re: TRIM and Unicode\", \"username\": \"chucks\", \"post_text\": \""In the Unicode standard, U+200B and U+FEFF are not included in the table of space characters, as they have no width and are not supposed to have any visible glyph."\\n\\nUgh\", \"post_time\": \"2013-06-18 16:43:17\" },\n\t{ \"post_id\": 4219, \"topic_id\": 938, \"forum_id\": 8, \"post_subject\": \"Re: TRIM and Unicode\", \"username\": \"rtaylor\", \"post_text\": \"Chuck,\\n\\nI tested it this way:
UNICODE10 MyUnicodeString1 := U'abcd\\\\u00A0';\\nUNICODE10 MyUnicodeString2 := U'abcd\\\\u200B'; \\n\\nLENGTH(TRIM(MyUnicodeString1)); //results in 4 - trimmed\\nLENGTH(TRIM(MyUnicodeString2)); //results in 5 - not trimmed
I found that \\\\u00A0 is trimmed and \\\\u200B is not (however, it will be trimmed if u_isspace is true).\\n\\nThe Unicode library we use is documented here: http://icu-project.org/apiref/icu4c44/classUnicodeString.html#abb3f5f6eb82c5689fcfeb8f79d5ca2bf\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-06-18 14:52:36\" },\n\t{ \"post_id\": 4213, \"topic_id\": 938, \"forum_id\": 8, \"post_subject\": \"TRIM and Unicode\", \"username\": \"chucks\", \"post_text\": \"Unicode has several code points that represent a space of some form. The built-in TRIM function does not seem to remove them. Is that deliberate?\\n\\nExamples: u+00A0 u+200B\", \"post_time\": \"2013-06-17 19:21:47\" },\n\t{ \"post_id\": 4245, \"topic_id\": 939, \"forum_id\": 8, \"post_subject\": \"Re: CmdProcess to publish a query\", \"username\": \"bforeman\", \"post_text\": \"Thank you Gordon, nice alternative!\", \"post_time\": \"2013-06-21 13:17:16\" },\n\t{ \"post_id\": 4244, \"topic_id\": 939, \"forum_id\": 8, \"post_subject\": \"Re: CmdProcess to publish a query\", \"username\": \"gsmith\", \"post_text\": \"The following ECL will publish a WU:\\nOutRec1 := RECORD\\n string1 result;\\nEND;\\nraw := HTTPCALL('http://192.168.1.201:8010/WsWorkunits/WUPublishWorkunit?Wuid=W20130609-230229&JobName=yxy&Activate=1', 'GET', 'text/xml', OutRec1, onfail(skip));\\nraw;
\\n\\nNote: The publish action works, but the submitted ECL will fail as there is no response (I think), adding "onfail(skip)" lets the submitted WU succeed at least.\", \"post_time\": \"2013-06-21 13:14:08\" },\n\t{ \"post_id\": 4243, \"topic_id\": 939, \"forum_id\": 8, \"post_subject\": \"Re: CmdProcess to publish a query\", \"username\": \"bforeman\", \"post_text\": \"Well, first I would try other ECL options instead of publish, perhaps a simple compile. I'm not sure that copying to the Roxie cluster was correct, the target indeed is Roxie, but you are still publishing from the Thor cluster. Try copying the ecl executable to the THOR cluster instead.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-06-21 12:00:38\" },\n\t{ \"post_id\": 4241, \"topic_id\": 939, \"forum_id\": 8, \"post_subject\": \"Re: CmdProcess to publish a query\", \"username\": \"Trilokesh\", \"post_text\": \"Hi Bob,\\n\\nWe ran the below code on cmd prompt and it executed. \\n\\necl publish --target=roxie --server=192.168.142.130 --name=hello.3 -A <wuid>
\\n\\nWe are unable to get it working from ECL IDE though.\\n\\nTried copying the program to the roxie cluster, so that cmdprocess would be able to find it, but it was a road block.\\n\\necl publish --target=roxie --server=192.168.142.130 --name=hello.3 -A hello.ecl
\\n\\n\\nIt throws a error: Cant determine content typr of argument hello.ecl\\n\\nIs there any workaround to this?\\n\\nJust want to explore the available options of auto publish to roxie.\\n\\nDo u have any suggestions?\", \"post_time\": \"2013-06-21 05:58:09\" },\n\t{ \"post_id\": 4227, \"topic_id\": 939, \"forum_id\": 8, \"post_subject\": \"Re: CmdProcess to publish a query\", \"username\": \"bforeman\", \"post_text\": \"A few things to check:\\n\\n1. Make sure that your ECL script works on the command line.\\n\\n2. I believe that the program that CmdProcess uses needs to be copied to the target cluster so that CmdProcess can find the program. \\n\\n3. Auto-publishing a ROXIE query is not a good practice. You should use the manual Compile and Publish from the Workunit details. Just a recommendation.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-06-19 14:51:07\" },\n\t{ \"post_id\": 4217, \"topic_id\": 939, \"forum_id\": 8, \"post_subject\": \"CmdProcess to publish a query\", \"username\": \"Trilokesh\", \"post_text\": \"Hi,\\n\\nI would like to publish a query in roxie, for which i use the following code\\n\\noutput('hello');\\na := STD.System.Util.CmdProcess('ecl publish --target=roxie --name=hello -A' + workunit ,'');\\na;
\\nbut i dont seem to be able to reflect the same..\\ncould you please help me with how i can best do the above.\\n\\nIs there any better way to publish a query to roxie through ECL?\\n\\n-Trilokesh\", \"post_time\": \"2013-06-18 12:41:07\" },\n\t{ \"post_id\": 4234, \"topic_id\": 943, \"forum_id\": 8, \"post_subject\": \"Re: Question on "overflow" error using JOIN function\", \"username\": \"rtaylor\", \"post_text\": \"Leofei,\\n\\nJust for fun, try this code:Testset := {\\n string50 search_term,// {maxlength(50)},\\n string50 pid,// {maxlength(50)},\\n string2 type,\\n string28 pii,// {maxlength(28)},\\n string1024 source_title,// {maxlength(1024)},\\n string4096 item_title,// {maxlength(4096)},\\n};\\nds1 := dataset(file_path1,testset,csv(maxlength(10000),separator(','),quote('\\\\"')));\\n\\nTestset2 := Record\\n UNICODE80 search_term;//{maxlength(80)};\\n UNICODE28 pii;//{maxlength(28)};\\nEnd;\\nds1_pro := project(ds1, Transform(Testset2, Self.search_term := Left.search_term,\\n Self.pii := Left.pii)); \\n\\nLayout := record\\n UNICODE pii;\\n UNICODE doi;\\n UNICODE item_id{MAXLENGTH(40)};\\n UNICODE descType{MAXLENGTH(15)};\\n UNICODE mainterm{MAXLENGTH(80)};\\n UNICODE weight{MAXLENGTH(3)};\\n BOOLEAN candidateFlag;\\nEnd;\\n\\nds2_ClassFile := Dataset(file_path2, Layout, Thor);\\nds2_join := join(ds2_ClassFile, ds1_pro, Left.mainterm = Right.search_term, \\n Transform(Testset2, Self := Right));\\n\\noutput(ds2_join);\\noutput(count(ds2_join));
I'm suspecting that it may be a data issue. Specifying fixed field sizes for the CSV file will work in ECL, but I'm also wondering if the problem might be in the ds2_Classfile, where you have several fields defined as variable-length UNICODE and the DATASET as a THOR file. So -- how was the ds2_Classfile created?\\n\\nIt would help if you could upload small test files with representative data (or you can email them to me directly, if it's sensitive: richard.taylor@lexisnexis.com).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-06-20 14:51:49\" },\n\t{ \"post_id\": 4233, \"topic_id\": 943, \"forum_id\": 8, \"post_subject\": \"Re: Question on "overflow" error using JOIN function\", \"username\": \"Leofei\", \"post_text\": \"Sorry for the confusing question. Met this kind of problem many times, so I mixed up the error message. Here is the one just run. Both Dataset "ds2_ClassFile" and "ds1_pro" can OUTPUT. So the issue happened on JOIN function. While, the MAXLENGTH for each field keeps the same. \\n\\nTestset := {\\n string search_term {maxlength(50)},\\n string pid {maxlength(50)},\\n string2 type,\\n string pii {maxlength(28)},\\n string source_title {maxlength(1024)},\\n string item_title {maxlength(4096)},\\n};\\nds1 := dataset(file_path1,testset,csv(maxlength(10000),separator(','),quote('\\\\"')));\\n\\nTestset2 := Record\\n\\t\\t\\tUNICODE search_term{maxlength(80)};\\n\\t\\t\\tUNICODE pii{maxlength(28)};\\nEnd;\\nds1_pro := project(ds1, Transform(Testset2, Self.search_term := Left.search_term,\\n Self.pii := Left.pii)); \\n\\nLayout := record\\n UNICODE pii;\\n\\tUNICODE doi;\\n\\tUNICODE item_id{MAXLENGTH(40)};\\n\\tUNICODE descType{MAXLENGTH(15)};\\n\\tUNICODE mainterm{MAXLENGTH(80)};\\n\\tUNICODE weight{MAXLENGTH(3)};\\n\\tBOOLEAN candidateFlag;\\nEnd;\\n\\nds2_ClassFile := Dataset(file_path2, Layout, Thor);\\nds2_join := join(ds2_ClassFile, ds1_pro, Left.mainterm = Right.search_term, \\n\\t\\tTransform(Testset2, Self := Right));\\n\\noutput(ds2_join);\\noutput(count(ds2_join));\\n
\\n\\nError: System error: 1000: Graph[1], pipeoutput[8]: SLAVE 10.144.90.1:16600: 1000, Graph[1], newdiskread[2]: Assignment to field 'mainterm' causes row overflow. Size 480 exceeds the maximum size specified(164), Error handling file: /c$/thordata/pandc/dedup_class._1_of_50 : ProcessSlaveActivity exception id=8\", \"post_time\": \"2013-06-20 14:06:39\" },\n\t{ \"post_id\": 4232, \"topic_id\": 943, \"forum_id\": 8, \"post_subject\": \"Re: Question on "overflow" error using JOIN function\", \"username\": \"rtaylor\", \"post_text\": \"Leofei,\\n\\nOK, you've managed to confuse me.\\n\\nThe error message you quote "Assignment to field 'weight' causes row overflow. Size 236 exceeds the maximum size specified(234)"
does not seem to relate to the example code provided, for these reasons:
\\nCan you please provide more information?\\n\\nRichard\", \"post_time\": \"2013-06-20 09:31:59\" },\n\t{ \"post_id\": 4231, \"topic_id\": 943, \"forum_id\": 8, \"post_subject\": \"Question on "overflow" error using JOIN function\", \"username\": \"Leofei\", \"post_text\": \"Hi,\\n\\nI meet a problem on JOIN function. When I run the following code, it reported this error:\\n\\nError: System error: 1000: Graph[9], diskwrite[19]: SLAVE 10.144.90.2:16600: 1000, Assignment to field 'weight' causes row overflow. Size 236 exceeds the maximum size specified(234) : ProcessSlaveActivity exception id=19\\n\\nI'm curious what this error is? Last time I met a similar error when I use ROLLUP function to combine several records into one, and adjusted the MAXLENGTH to fix it. Don't understand why this happened in JOIN function? \\n\\n\\nLayout1 := {\\n string search_term {maxlength(80)},\\n string pid {maxlength(50)},\\n string2 type,\\n string pii {maxlength(28)},\\n string source_title {maxlength(1024)},\\n string item_title {maxlength(4096)},\\n};\\n\\nds1 := Dataset(file_path1, Layout1, csv(maxlength(10000),separator(','),quote('\\\\"')));\\n\\nLayout2 := Record\\n UNICODE pii;\\n\\tUNICODE doi;\\n\\tUNICODE item_id{MAXLENGTH(40)};\\n\\tUNICODE descType{MAXLENGTH(15)};\\n\\tUNICODE mainterm{MAXLENGTH(80)};\\n\\tUNICODE weight{MAXLENGTH(3)};\\n\\tBOOLEAN candidateFlag;\\nEnd;\\n\\nds2 := Dataset(file_path2, Layout2, Thor);\\n\\nLayout_Join := Record\\n\\tUNICODE search_term {maxlength(80)};\\n\\tUNICODE pii{maxlength(28)};\\nEnd;\\n\\nds_join := Join(ds2, ds1, Left.mainterm = Right.search_term, \\n\\t\\t\\t\\tTransform(Layout_Join, Self := Right));\\n\\noutput(ds_join);\\noutput(count(ds_join));\\n
\", \"post_time\": \"2013-06-19 20:48:19\" },\n\t{ \"post_id\": 4239, \"topic_id\": 944, \"forum_id\": 8, \"post_subject\": \"Re: Append data to file\", \"username\": \"abhisr\", \"post_text\": \"Excellant Dan.\\nI come from java background and moved to ECL a couple of weeks back so sometimes I think from java perspective for implementations.\\n\\nThanks a lot!!\", \"post_time\": \"2013-06-20 16:17:59\" },\n\t{ \"post_id\": 4238, \"topic_id\": 944, \"forum_id\": 8, \"post_subject\": \"Re: Append data to file\", \"username\": \"DSC\", \"post_text\": \"Well, you could it that way, but you may run into problems and it certainly won't scale past a few "programs." What I was suggesting was more along the lines of:\\n\\n1) Prog.A creates logical file 'data_a1' and appends it to superfile 'all_data'.\\n2) Prog.B creates logical file 'data_b1' and appends it to superfile 'all_data'.\\n3) Prog.A creates logical file 'data_a2' and appends it to superfile 'all_data'.\\n... (etc)\\n\\nECL code that reads the data already written would reference only 'all_data'. At all points in time, all_data would contain everything. Basically, the append process occurs when you add the new logical file to the superfile. Make sense?\\n\\nDan\", \"post_time\": \"2013-06-20 15:56:36\" },\n\t{ \"post_id\": 4237, \"topic_id\": 944, \"forum_id\": 8, \"post_subject\": \"Re: Append data to file\", \"username\": \"abhisr\", \"post_text\": \"Thanks Dan for the answer,i was mentioning about logical files only .\\nSo in my case there are two programs that run on a daily basis to generate daily reports that needs to be written(append) on same file .\\nSo as you said it should run like, prog.A writes to file1, prog.B reads file1 and adds its own data and writes to a new file file2, which is going to be final output file .Delete file1 .\\n\\nThanks in advance.\", \"post_time\": \"2013-06-20 15:49:15\" },\n\t{ \"post_id\": 4236, \"topic_id\": 944, \"forum_id\": 8, \"post_subject\": \"Re: Append data to file\", \"username\": \"DSC\", \"post_text\": \"I assume you're talking about writing appending to a logical file, not an external file.\\n\\nIf so, then the short answer is "you can't." The way appends are normally accomplished is to write your updates to separate logical files and group all of them in a superfile. In your ECL code, you can reference the superfile just like a "regular" logical file and you'll be able to see all the data. The Programmer's Guide has an entire section on superfiles that should help you get started.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-06-20 15:30:53\" },\n\t{ \"post_id\": 4235, \"topic_id\": 944, \"forum_id\": 8, \"post_subject\": \"Append data to file\", \"username\": \"abhisr\", \"post_text\": \"Hi,\\n\\nI am working on a scenario where i need to append data to an already existing file.\\nWhen i tried to write data after fetching data from the same file it showing we cant write data to a read file.\\nhow can we fix this ?\\n\\nthanks\\nabhi\", \"post_time\": \"2013-06-20 15:10:29\" },\n\t{ \"post_id\": 4261, \"topic_id\": 948, \"forum_id\": 8, \"post_subject\": \"Re: Multivariate index?\", \"username\": \"flavio\", \"post_text\": \"Ahh, I see! \\n\\nUnfortunately the "multivariate" "feature" didn't do this (or anything else that I know of) \\n\\nFlavio\", \"post_time\": \"2013-06-26 19:19:42\" },\n\t{ \"post_id\": 4259, \"topic_id\": 948, \"forum_id\": 8, \"post_subject\": \"Re: Multivariate index?\", \"username\": \"DSC\", \"post_text\": \"Oh, I agree! I'm absolutely abusing terminology. It's one of my favorite pastimes. It sometimes leads me to startling revelations that, upon inspection, have no basis in reality and were really just big wastes of time.\\n\\nThe idea of indexing set values makes a bit more sense if you extend the test syntax. To make something up on the spur of the moment:\\n\\n
SET OF UNSIGNED4 foo := [100,200,300];\\n\\nOUTPUT(ANY(foo) < 50); // FALSE\\nOUTPUT(ANY(foo) < 150); // TRUE
\\nThe implied usage here is that the system tests to see if any value in the set satisfies the condition. In this example, 'ANY(foo) = 200' would be identical to the already-supported '200 IN foo'.\\n\\nAnyway, the ability to generically search on multiple values within a single field, like the above, was where I was going with this whole thing. 'Multiple values' -> 'multivariate' -- probably wildly incorrectly.\\n\\nFun thought excercise, though.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-06-26 12:48:51\" },\n\t{ \"post_id\": 4257, \"topic_id\": 948, \"forum_id\": 8, \"post_subject\": \"Re: Multivariate index?\", \"username\": \"flavio\", \"post_text\": \"Well... I think that you're stretching the terminology there, unless your set of scalars has more than one dimension (to apply the "multivariate" qualifier).\\n\\nBut for the particular application, wouldn't a hashmap, lookup table or a dictionary be a better choice? The b-tree would give you an O(log N) complexity, while the others could give you an O(1).\\n\\nFlavio\", \"post_time\": \"2013-06-26 11:59:36\" },\n\t{ \"post_id\": 4256, \"topic_id\": 948, \"forum_id\": 8, \"post_subject\": \"Re: Multivariate index?\", \"username\": \"DSC\", \"post_text\": \"Thanks for the clarification, Flavio. While Hidden Field Equations would be terribly exciting to a teeny tiny fraction of the audience, I suspect the ROI on that work would just miss the mark.\\n\\nPersonally, I was hoping that the phrase meant that you could index a set of scalars (e.g. SET OF UNSIGNED4). The idea being that a match on any value from the set in that field returns the associated record. I think you could conceivably argue that that would be a "multivariate index" but that may just be a case of abusing the terminology.\\n\\nCheers!\\n\\nDan\", \"post_time\": \"2013-06-26 11:53:44\" },\n\t{ \"post_id\": 4255, \"topic_id\": 948, \"forum_id\": 8, \"post_subject\": \"Re: Multivariate index?\", \"username\": \"flavio\", \"post_text\": \"Dan,\\n\\nI think that you just spotted a problem with the description in our features page. I personally don't know what a multivariate index is (I do know what multivariate statistics, multivariate calculus and multivariate cryptography are, though ). I did ask too, and nobody around seemed to know how the term "multivariate" got messed up with "indexes" in that page.\\n\\nI think that what that feature is referring to is compound (aka multi-field or multi-component) keys, equivalent to those of more traditional RDBM Systems.\\n\\nI could ask Richard to see if he wants to develop a Hidden Field Equations encrypted index format. Hidden Field Equations are a practical public key encryption system based on polynomials over finite fields (a type of multivariate cryptography), so that we can justify having that feature there, but I'm pretty sure that he'll just respond: "send me a pull request with your patch"... On a second thought, I think I'll just change that entry in the features page to "compound keys" and move along
\\n\\nFlavio\", \"post_time\": \"2013-06-26 11:33:32\" },\n\t{ \"post_id\": 4253, \"topic_id\": 948, \"forum_id\": 8, \"post_subject\": \"Multivariate index?\", \"username\": \"DSC\", \"post_text\": \"The features page on this site contains the following verbiage in the Database Capabilities section:\\n\\n"The HPCC platform includes the capability to build multi-key, multivariate indexes on DFS files."\\n\\nWhat are multivariate indexes? Can someone provide an ECL example of one?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2013-06-25 19:23:01\" },\n\t{ \"post_id\": 4483, \"topic_id\": 949, \"forum_id\": 8, \"post_subject\": \"Re: Download image & store in THOR\", \"username\": \"richardkchapman\", \"post_text\": \"I think I would be inclined to try to use PIPE and wget - but I haven't worked through all the details.\", \"post_time\": \"2013-08-20 17:16:53\" },\n\t{ \"post_id\": 4266, \"topic_id\": 949, \"forum_id\": 8, \"post_subject\": \"Re: Download image & store in THOR\", \"username\": \"manojgvr\", \"post_text\": \"[quote="bforeman":loudvtr1]Did you try using HTTPCALL and then simply convert the text in STRING format to a DATA type?\\nThanks for the idea. We will try it and let you know .\\n\\nThanks\\nManoj\", \"post_time\": \"2013-06-28 04:52:09\" },\n\t{ \"post_id\": 4262, \"topic_id\": 949, \"forum_id\": 8, \"post_subject\": \"Re: Download image & store in THOR\", \"username\": \"bforeman\", \"post_text\": \"Did you try using HTTPCALL and then simply convert the text in STRING format to a DATA type?\", \"post_time\": \"2013-06-27 12:53:30\" },\n\t{ \"post_id\": 4258, \"topic_id\": 949, \"forum_id\": 8, \"post_subject\": \"Download image & store in THOR\", \"username\": \"manojgvr\", \"post_text\": \"Is there a way we can download an image using ECL program ? . We are parsing dynamic content which contains image urls . We need to download images from these URL\\n's and store this in THOR. ( in hexa decimal format ). We tried using HTTPCALL function ( but documentation says response mime type supported is only text/xml ) ? It will be great if you could throw some light on this ?\", \"post_time\": \"2013-06-26 12:28:05\" },\n\t{ \"post_id\": 4271, \"topic_id\": 950, \"forum_id\": 8, \"post_subject\": \"Re: Failure keyword does not work with MonitorFile\", \"username\": \"tmurphy\", \"post_text\": \"Issue has been opened. HPCC-9586.\", \"post_time\": \"2013-06-28 13:56:34\" },\n\t{ \"post_id\": 4270, \"topic_id\": 950, \"forum_id\": 8, \"post_subject\": \"Re: Failure keyword does not work with MonitorFile\", \"username\": \"bforeman\", \"post_text\": \"Thanks for your example code!\\n\\nI can confirm your behavior on my machine as well, but I'm not certain by reading the docs if this is an issue or expected behavior. \\n\\nCan you please open up an issue at the JIRA site?\\n\\nhttps://track.hpccsystems.com/secure/Dashboard.jspa\\n\\nThanks!\\n\\nBob\", \"post_time\": \"2013-06-28 12:42:31\" },\n\t{ \"post_id\": 4260, \"topic_id\": 950, \"forum_id\": 8, \"post_subject\": \"Failure keyword does not work with MonitorFile\", \"username\": \"tmurphy\", \"post_text\": \"When I add a FAILURE clause to handle errors that arise while processing files with MonitorFile, the normal feature that passes the file name to my event handler function stops working, and hpcc instead passes empty string. You can see this by commenting out the FAILURE clause in the sample; the file name then comes through just fine (assuming you have some files at the default lz location of course). I tried moving the FAILURE clause inside the handler function (attaching it to the logic that might fail - here shown as '...real logic would go here') and it again does not pass the file name. Is this a bug? \\n\\nIMPORT * FROM Std;\\n\\nkFoundFileEventName := 'FoundIncomingMessageFile';\\nkLandingZoneHostName := 'localhost'; \\nkLandingZoneHostIP := Std.System.Util.ResolveHostName(kLandingZoneHostName);\\nkLandingZoneDir := '/var/lib/HPCCSystems/mydropzone/';\\nkFilenamePattern := kLandingZoneDir + '*.*';\\n\\nMonitorFileAction := STD.File.MonitorFile(kFoundFileEventName,kLandingZoneHostIP,kFileNamePattern);\\n\\nRecoverFromError(STRING fullFilePath) := FUNCTION\\n RETURN SEQUENTIAL\\n (\\n STD.System.Log.addWorkunitInformation('at RecoverFromError' + fullFilePath,1),\\n MonitorFileAction\\n );\\nEND;\\n\\nHandleFoundFileEvent(STRING fullFilePath) := FUNCTION\\n\\thandleAction := SEQUENTIAL\\n (\\n STD.System.Log.addWorkunitInformation('at HandleFoundFile' + fullFilePath,1),\\n // ... real logic would go here\\n\\t// MonitorFileAction \\n ); \\n RETURN handleAction;\\nEND;\\n\\nHandleFoundFileEvent(EVENTEXTRA) : WHEN(EVENT(kFoundFileEventName,'*')) ,FAILURE(RecoverFromError(EVENTEXTRA));\\n\\nMonitorFileAction;\", \"post_time\": \"2013-06-26 19:06:53\" },\n\t{ \"post_id\": 4274, \"topic_id\": 951, \"forum_id\": 8, \"post_subject\": \"Re: Reading Logical files\", \"username\": \"abhisr\", \"post_text\": \"Yes ,\\nI used that approach.\\n\\n
COUNT(DATASET('~my::LogicalFile',{STRING a},CSV(SEPARATOR(''))));
\\n\\nThankyou !!\", \"post_time\": \"2013-06-28 18:15:31\" },\n\t{ \"post_id\": 4273, \"topic_id\": 951, \"forum_id\": 8, \"post_subject\": \"Re: Reading Logical files\", \"username\": \"bforeman\", \"post_text\": \"The best thing I could recommend would be to create a "generic" one field variable string record structure and then parse through it with ECL.\\n\\nBut it's hard to get record counts without knowing the record structure.\\n\\nAs I said above parsing would be your only other alternative, to look for end of line terminators and then just count that.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-06-28 17:44:48\" },\n\t{ \"post_id\": 4263, \"topic_id\": 951, \"forum_id\": 8, \"post_subject\": \"Reading Logical files\", \"username\": \"abhisr\", \"post_text\": \"hi,\\n\\nI have a set of logical file names which I need to read from two different clusters and do a comparison of their record counts but the layout is not available with me [DATASET needs Rec.Structure to read the files] ;\\n\\nI can find the layout of the files manually, however I have 1000's of logical files available to do the comparison operation .\\n\\nHow can I achieve this ?\", \"post_time\": \"2013-06-27 13:34:04\" },\n\t{ \"post_id\": 6958, \"topic_id\": 956, \"forum_id\": 8, \"post_subject\": \"Re: String Library functions inside a FunctionMacro\", \"username\": \"pyaramosam\", \"post_text\": \"You could look at using OUTPUT's EXTEND option, which of course requires that each dataset be exactly the same structure, since it is creating a single dataset (not a set of datasets).???\\n\\n______________\\nGet http://pass4sure.co.uk/ for test king and pass4sure a+ book success guaranteed. Our high qualitycallutheran.edu you well before Stanford University of selftestengine gmat.\", \"post_time\": \"2015-02-16 09:34:17\" },\n\t{ \"post_id\": 6726, \"topic_id\": 956, \"forum_id\": 8, \"post_subject\": \"Re: String Library functions inside a FunctionMacro\", \"username\": \"rtaylor\", \"post_text\": \"Emma,\\n\\nThis should work for a single field:
MyFunc(ds,grp) := FUNCTIONMACRO\\n R := RECORD\\n ds.grp;\\n Cnt := COUNT(GROUP);\\n END;\\n RETURN TABLE(ds,R,grp);\\nENDMACRO;
\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-12-27 15:52:23\" },\n\t{ \"post_id\": 6725, \"topic_id\": 956, \"forum_id\": 8, \"post_subject\": \"Re: String Library functions inside a FunctionMacro\", \"username\": \"alicia\", \"post_text\": \"I m trying to write a functionmacro that creates a simple crosstab report. The input parameters are a dataset and a CSV separated string of column names on which to perform the GROUP\", \"post_time\": \"2014-12-25 10:53:15\" },\n\t{ \"post_id\": 4305, \"topic_id\": 956, \"forum_id\": 8, \"post_subject\": \"Re: String Library functions inside a FunctionMacro\", \"username\": \"soumyadip\", \"post_text\": \"Ok, I give up, it doesn't look like a FUNCTIONAMACRO is suitable for what I am trying to get done. I converted the thing into a plain-jane MACRO.\\n\\nOne of my colleagues helped simplify the code a bit, and here is what I have working right now:\\nEXPORT MAC_Xtab(ds,lCol='') := MACRO\\n LOADXML('<xml/>');\\n #DECLARE(xtab_layout);\\n #EXPORTXML(fields,RECORDOF(ds)); \\n #IF(#TEXT(lCol)<>'')\\n\\t#SET(xtab_layout,STD.Str.FindReplace(lCol,',',';'));\\n\\tOUTPUT(TABLE(ds,{%xtab_layout%, Cnt:= COUNT(GROUP);},#EXPAND(lcol)),NAMED('Group')); \\n #ELSE\\n\\t#FOR(fields)\\n\\t\\t#FOR(Field)\\n\\t\\t\\tOUTPUT(TABLE(ds,{%{@label}%; Cnt := COUNT(GROUP);},%{@label}%),NAMED(#TEXT(%{@label}%)),ALL);\\n\\t\\t#END\\n #END;\\n #END\\nENDMACRO;
\\nObviously the BWR had to be modified slightly:\\nIMPORT STD,$;\\n\\nfact_table_layout := $.Constants.fact_table_layout;\\ndsIn := DATASET('~undata::trade::commodity::facttable',fact_table_layout,THOR);\\n\\n$.Mac_Xtab(dsIn,'country,commodity_code');
\\nTake out the CS string in the last line, and the MACRO generates counts for each of the fields in the DS.\\n\\nHope that helps.\", \"post_time\": \"2013-07-10 15:14:05\" },\n\t{ \"post_id\": 4302, \"topic_id\": 956, \"forum_id\": 8, \"post_subject\": \"Re: String Library functions inside a FunctionMacro\", \"username\": \"soumyadip\", \"post_text\": \"[quote="rtaylor":3soobdb1]1. You could look at using OUTPUT's EXTEND option, which of course requires that each dataset be exactly the same structure, since it is creating a single dataset (not a set of datasets).\\n\\n2. Using #UNIQUENAME would give you generated definition names, but they are only known internally to the MACRO unless you use its optional second "pattern" parameter to specify how the names must be generated.\\n\\nHTH,\\n\\nRichard\\nThanks a ton Richard for the pointer on #UNIQUENAME, I missed that completely. Has the syntax been modified in 4.0.0 to expect a string as the pattern like:\\n\\n#UNIQUENAME(crosstab_table, '_Groupby_$_');
\\n\\nI ask because I keep getting an "Expected string" error during syntax check when I did not put in the quotes for the pattern parameter.\\n\\nI suspect treating the pattern as a string may have been an incorrect step, since I can't get a subsequent SET append operation to work:\\n\\nSET OF DATASET Outputfile:=[];\\n...\\n#UNIQUENAME(set_of_table, '_Table_$_');\\n...\\n%crosstab_table% := [TABLE(InputFile,%crosstab_layout%,%{@name}%)];\\n#SET(%set_of_table%,Outputfile + %crosstab_table%);\\nOutputfile := %set_of_table%;
\\n\\nor a variation:\\n\\n%set_of_table:=Outputfile + %crosstab_table%;\\nOutputfile := %set_of_table%;
\\n\\nThis is my last question in this thread, to avoid going off-topic. If I have further questions, I will be creating a new thread.\", \"post_time\": \"2013-07-09 19:28:36\" },\n\t{ \"post_id\": 4294, \"topic_id\": 956, \"forum_id\": 8, \"post_subject\": \"Re: String Library functions inside a FunctionMacro\", \"username\": \"rtaylor\", \"post_text\": \"Now I would have ideally loved to return a set of datasets/tables. However, iterating through the fields one by one, I can't figure out a way to:\\n1.Sequentially append to a set of datasets/tables or\\n2.Generate separate n tables with template-generated names to create a set from
1. You could look at using OUTPUT's EXTEND option, which of course requires that each dataset be exactly the same structure, since it is creating a single dataset (not a set of datasets).\\n\\n2. Using #UNIQUENAME would give you generated definition names, but they are only known internally to the MACRO unless you use its optional second "pattern" parameter to specify how the names must be generated.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-07-08 20:55:16\" },\n\t{ \"post_id\": 4283, \"topic_id\": 956, \"forum_id\": 8, \"post_subject\": \"Re: String Library functions inside a FunctionMacro\", \"username\": \"soumyadip\", \"post_text\": \"I'll switch this topic into a question of approach. Trying to make the macro a little more useful, I tried changing the macro so that if it detects that there is no groupcolumn parameter, it will create count tables for distinct values in all the fields of the dataset. So given a dataset of this type:\\n\\ncountry\\tyear\\tcommodity_code\\tdirection_flag\\tamount\\t weight_flag\\nAlbania\\t2011\\t100110\\t 1\\t 4018470 1\\t\\nAlbania\\t2011\\t100190\\t 1\\t 301925387 1\\nAlbania\\t2011\\t100190\\t 2\\t 87360\\t 1
\\nthere will be 6 tables with rows grouped by each of the fields (I know it is a little pointless, still...).\\n\\nNow I would have ideally loved to return a set of datasets/tables. However, iterating through the fields one by one, I can't figure out a way to:\\n1.Sequentially append to a set of datasets/tables or\\n2.Generate separate n tables with template-generated names to create a set from\\n\\nI would imagine the second approach to be ideal, but how do I create a table with the name:\\ntGrouped_by_%'{@name}'%
or tGrouped_by_%'Ndx'%
\\n\\nOr should I just convert this into a macro (instead of the functionmacro as it is currently) and stick it inside a module, exporting the tables as soon as they are created. Even then, the problem of naming the tables comes back to haunt me.\\n\\nIs there any other obvious approach that I am missing?\", \"post_time\": \"2013-07-02 20:53:27\" },\n\t{ \"post_id\": 4281, \"topic_id\": 956, \"forum_id\": 8, \"post_subject\": \"Re: String Library functions inside a FunctionMacro\", \"username\": \"HPCC Staff\", \"post_text\": \"Thank you for circling back and sharing your result! And there are never stupid questions...ask away! \", \"post_time\": \"2013-07-02 19:55:19\" },\n\t{ \"post_id\": 4280, \"topic_id\": 956, \"forum_id\": 8, \"post_subject\": \"Re: String Library functions inside a FunctionMacro\", \"username\": \"soumyadip\", \"post_text\": \"I was being stupid. Here is a working version (this does not handle errors yet):\\n
IMPORT STD,$;\\nEXPORT MAC_Crosstab(InputFile,GroupColumn) := FUNCTIONMACRO\\nLOADXML('<xml/>');\\n\\n#DECLARE(crosstab_layout);\\n#DECLARE(ndx);\\n#SET (Ndx, 0);\\n#SET(crosstab_layout, '{');\\n#EXPORTXML(AllFields, recordof(inputFile));\\n#FOR (AllFields)\\n\\t#FOR (Field)\\n\\t#SET (Ndx, %Ndx% + 1);\\n\\t\\t#IF(STD.Str.Contains(GroupColumn,%'{@name}'%,TRUE))\\n\\t\\t\\t#IF(%Ndx%=1)\\n\\t\\t\\t\\t#APPEND(crosstab_layout,%'{@name}'%);\\n\\t\\t\\t#ELSE\\n\\t\\t\\t\\t#APPEND(crosstab_layout,','+%'{@name}'%);\\n\\t\\t\\t#END\\n\\t\\t#END\\n\\t#END\\n#END\\n#APPEND(crosstab_layout,',COUNT(GROUP)}');\\n#IF(%Ndx%=0)\\n\\t#SET(crosstab_layout, '');\\n#END\\nOutputfile:=TABLE(InputFile, %crosstab_layout%, #EXPAND(GroupColumn));\\nRETURN Outputfile;\\nENDMACRO;
\\n\\nAnd here is a BWR to test this:\\nIMPORT STD,$;\\n\\nfact_table_layout := $.Constants.fact_table_layout;\\ndsIn := DATASET('~undata::trade::commodity::facttable',fact_table_layout,THOR);\\ndsOut:= $.Mac_Crosstab(dsIn,'country,commodity_code');\\nOUTPUT(dsOut);
\", \"post_time\": \"2013-07-02 15:09:40\" },\n\t{ \"post_id\": 4275, \"topic_id\": 956, \"forum_id\": 8, \"post_subject\": \"String Library functions inside a FunctionMacro\", \"username\": \"soumyadip\", \"post_text\": \"I'm probably doing something very stupid, but it's my first time writing a macro in ECL so please forgive my stupidity.\\n\\nI'm trying to write a functionmacro that creates a simple crosstab report. The input parameters are a dataset and a CSV separated string of column names on which to perform the GROUP. \\n\\nI am using the HPCC VM (ver. 3.10.8). Here is the functionmacro definition:\\n\\nIMPORT STD;\\nEXPORT MAC_Crosstab(InputFile,GroupColumn) := FUNCTIONMACRO\\nLOADXML('<xml/>');\\n\\n#DECLARE(crosstab_layout);\\n#SET(crosstab_layout, 'RECORD');\\n#EXPORTXML(AllFields, recordof(inputFile));\\n#FOR (AllFields)\\n\\t#FOR (Field)\\n\\t\\tBOOLEAN IsGrpColumn := STD.Str.Contains(GroupColumn,%'{@name}'%);\\n\\t\\t#IF(IsGrpColumn)\\n\\t\\t\\t#APPEND(crosstab_layout,' '+%'{@type}'%+%'{@size}'%+' '+%'{@name}'%+';')\\n\\t\\t#END\\n\\t#END\\n#END\\n#APPEND(crosstab_layout,' COUNT(GROUP); END;')\\n\\nRETURN OutputFile:=TABLE(InputFile, %crosstab_layout%, #EXPAND(GroupColumn));\\nENDMACRO;
\\n\\nThe layout for the input dataset is:\\n\\nEXPORT fact_table_layout := RECORD\\n\\tstring32\\t\\tcountry;\\n\\tunsigned4\\t\\tyear;\\n\\tstring6\\t\\t\\tcommodity_code;\\n\\tunsigned1\\t\\tdirection_flag;\\n\\tunsigned8\\t\\tamount;\\n\\tunsigned1\\t\\tweight_flag;\\nEND;
\\n\\nThe target result is to have something like:\\n\\nIMPORT $;\\n\\nfact_table_layout := $.Constants.fact_table_layout;\\ndsIn := DATASET('~undata::trade::commodity::facttable',fact_table_layout,THOR);\\ndsOut:=$.Mac_Crosstab(dsIn,'country,commodity_code');\\nOUTPUT(dsOut);
\\n\\nThe problem is that whenever I do a syntax check on the last bit of code in a builder window, I get this error:\\nError: Unknown identifier "STD" (10, 32), 2167, C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\UNdata\\\\MAC_CrossTab.ecl
\\n\\nIs it possible to use the standard library in the macro? Is there a better way to create a RECORD definition?\", \"post_time\": \"2013-06-29 06:15:49\" },\n\t{ \"post_id\": 4310, \"topic_id\": 959, \"forum_id\": 8, \"post_subject\": \"Re: Using XMLfiles in LOADXML\", \"username\": \"rtaylor\", \"post_text\": \"ceejac,My .xml file is a logical file with single part. It contains an xml w/o any cr/lf. i gave as loadxml('~comparator::myfile.xml') but it says failed to load it.
Try just putting the contents of that XML file in your LOADXML argument and see if that works. IOW, make sure your code will correctly parse the text. Once you've done that, then you can decide how to get the XML string to LOADXML -- as a file parameter, or by passing the XML string itself to the MACRO. \\n\\n I had one more doubt.\\nis ther anyway to give a value other than constant expressions inside #if\\nfor eg:\\nn:=count(<any dataset>);\\n#SET (Ndx, 1);\\n#LOOP\\n#IF (%Ndx% >n) \\nwhen i give 'n' like this i get the error saying constant expression expected.
The Template Language was originally designed to work with "static" text as its input, so the error is telling you that. Once again, you can pass that COUNT(dataset) value to the MACRO and use that passed parameter and it should work.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-07-11 13:04:42\" },\n\t{ \"post_id\": 4308, \"topic_id\": 959, \"forum_id\": 8, \"post_subject\": \"Re: Using XMLfiles in LOADXML\", \"username\": \"ceejac\", \"post_text\": \"My .xml file is a logical file with single part. It contains an xml w/o any cr/lf. i gave as loadxml('~comparator::myfile.xml') but it says failed to load it. \\n\\nI had one more doubt.\\nis ther anyway to give a value other than constant expressions inside #if\\nfor eg:\\nn:=count(<any dataset>);\\n#SET (Ndx, 1);\\n #LOOP\\n #IF (%Ndx% >n) \\nwhen i give 'n' like this i get the error saying constant expression expected.\", \"post_time\": \"2013-07-11 12:31:27\" },\n\t{ \"post_id\": 4300, \"topic_id\": 959, \"forum_id\": 8, \"post_subject\": \"Re: Using XMLfiles in LOADXML\", \"username\": \"rtaylor\", \"post_text\": \"Where is the XML file coming from? Is it a logical file with multiple physical parts? If so, it will not work.\\n\\nLOADXML is designed to load an XML datastream so the Template Language can parse it to generate ECL code. It can take the XML as a single STRING, or from a file that contains the single XML string to parse. Therefore, this LOADXML will work:LOADXML('<section><item type="count"><set>person</set></item></section>')
While this will also work if MyFile.XML is a single-part file that contains only that same string:LOADXML('MyFile.XML') //process the XML in MyFile.XML
\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-07-09 14:22:49\" },\n\t{ \"post_id\": 4298, \"topic_id\": 959, \"forum_id\": 8, \"post_subject\": \"Re: Using XMLfiles in LOADXML\", \"username\": \"bforeman\", \"post_text\": \"Is your XML file well formed? Does it follow the requirements of the LOADXML statement? No CR/LF characters allowed.\", \"post_time\": \"2013-07-09 11:54:56\" },\n\t{ \"post_id\": 4295, \"topic_id\": 959, \"forum_id\": 8, \"post_subject\": \"Re: Using XMLfiles in LOADXML\", \"username\": \"ceejac\", \"post_text\": \"Hi Bob,\\nThanks for the reply. Tried both the ways but i am still getting the same error.\", \"post_time\": \"2013-07-09 03:59:07\" },\n\t{ \"post_id\": 4292, \"topic_id\": 959, \"forum_id\": 8, \"post_subject\": \"Re: Using XMLfiles in LOADXML\", \"username\": \"bforeman\", \"post_text\": \"Did you try it with the overriding scope symbol? Example:\\n\\nLoadXML('~comparator::xml_code.xml')
\\n\\nOr the fully qualified logical name? Example:\\n\\nLoadXML('thor::comparator::xml_code.xml')
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-07-08 17:35:18\" },\n\t{ \"post_id\": 4287, \"topic_id\": 959, \"forum_id\": 8, \"post_subject\": \"Using XMLfiles in LOADXML\", \"username\": \"ceejac\", \"post_text\": \"Hi,\\nI need to build a dataset dynamically and read this dataset as an xml for use in template function.\\n For this i need to use loadxml() to read the xml from a file.\\n but to use the syntax loadxml(myfilename.xml) \\n myfilename - is it a logical file?\\n if so how come it is given with an extension in the language reference\\n even if i give a logical file name i get the following error'Error: Load XML('comparator::xml_code.xml') failed '..(thats the logical file i am trying to read)\\n Kindly suggest how to use the loadxml command?\", \"post_time\": \"2013-07-04 09:57:23\" },\n\t{ \"post_id\": 4291, \"topic_id\": 960, \"forum_id\": 8, \"post_subject\": \"Re: Update data using PackageMap\", \"username\": \"bforeman\", \"post_text\": \"It looks like what you are doing is correct, thanks for the detailed report.\\n\\nThere were some issues reported with packagemaps in version 3.10 that have been fixed in Version 4.0 - you can download and test with the release candidate now.\\n\\nhttp://hpccsystems.com/download/free-community-edition/server-platform/beta\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-07-08 17:21:39\" },\n\t{ \"post_id\": 4289, \"topic_id\": 960, \"forum_id\": 8, \"post_subject\": \"Update data using PackageMap\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nThere is a scenario where i need to automatically deploy the query to roxie and get the latest data.\\n\\nI am trying that approach using the PackageMap, but i dont see the data getting updated, instead just have the old data.\\n\\nSteps:\\n\\n1.) Create a superkey with the initial set of data.\\n\\n\\nIMPORT STD;\\nFile1 := '~thor::sample_test_roxie_1';\\n\\nrecStr := RECORD\\n STRING10 name;\\n UNSIGNED4 ID;\\n UNSIGNED8 amount;\\nEND;\\n\\n\\nDsOut := DATASET(File1,recStr , CSV(separator('\\\\t')));\\n\\nSubKey1 := '~thor::subkey_test_roxie_1';\\nSubIDX1 := '~thor::subIndex_test_roxie_1';\\n\\nBldDat := IF(~Std.File.FileExists(SubKey1),\\n OUTPUT(DsOut,,SubKey1));\\n\\nRecPlus := {recStr,UNSIGNED8 RecPos{virtual(fileposition)}};\\n\\nd1 := DATASET(SubKey1,RecPlus,THOR);\\n\\ni1 := INDEX(d1,{Id},\\n {name,amount,RecPos},\\n SubIDX1);\\n\\nBldIDX := IF(~Std.File.FileExists(SubIDX1),\\n BUILDINDEX(i1));\\n\\nCreate_Superkeys := SEQUENTIAL(BldDat,BldIDX);\\n\\n//adding logical files to superfile\\n\\nkSuperFilePath := '~thor::sample::SuperKey_Master';\\n\\naddtosuperfile:= sequential(\\n STD.File.CreateSuperFile(kSuperFilePath,allow_exist:=TRUE);\\n STD.File.StartSuperFileTransaction(),\\n STD.File.AddSuperFile(kSuperFilePath, SubIDX1),\\n STD.File.finishSuperFileTransaction(),\\n );\\nFinal_Output := SEQUENTIAL(Create_Superkeys,addtosuperfile);\\n\\nFinal_Output; \\n\\n
\\n\\n2.) Publish the query using the initial data in roxie. Published the data using the query name : "publish_test"\\n\\n\\nkSuperFilePath := '~thor::sample::SuperKey_Master';\\n\\ndsInd := INDEX({UNSIGNED4 Id},\\n {string10 NAME, UNSIGNED8 AMOUNT , UNSIGNED8 recPos} ,kSuperFilePath);\\n\\nrecStr_no_fpos := RECORD\\n UNSIGNED4 ID := dsInd.Id;\\n STRING10 name := dsInd.name;\\n UNSIGNED8 Amt := dsInd.Amount;\\n\\nEND;\\n\\nFinal_Output := TABLE(dsInd, recStr_no_fpos);\\n\\nOUTPUT(Final_Output);\\n
\\n\\n3.) Create a packagemap containing a package defining the contents of the superkey\\n\\nPackage Name : "mypackagemap.pkg"\\n\\n\\n<RoxiePackages>\\n <!-- Begin Queries -->\\n <Package id="publish_test">\\n <Base id="thor::sample::superkey_master"/>\\n </Package>\\n <!-- End Queries -->\\n <!-- Begin File references -->\\n <Package id="thor::sample::superkey_master">\\n <SuperFile id="~thor::sample::superkey_master">\\n <SubFile value="~thor::subindex_test_roxie_1"/>\\n </SuperFile>\\n </Package>\\n<!--End File references -->\\n</RoxiePackages>\\n
\\n\\n4.) Add the packagemap by associating the package information with a QuerySet\\n\\n\\n ecl packagemap add -s=192.xxx.xxx.xxx roxie mypackagemap.pkg -O -A -v\\n
\\n\\n5.) Prepare the data and create a new subfile.\\n\\n New File : ~thor::subindex_test_roxie_2"\\n
\\n\\n6.) Create a package with a superkey definition that includes the new subfile\\n\\n<RoxiePackages>\\n <!-- Begin Queries -->\\n <Package id="publish_test">\\n <Base id="thor::sample::superkey_master"/>\\n </Package>\\n <!-- End Queries -->\\n <!-- Begin File references -->\\n <Package id="thor::sample::superkey_master">\\n <SuperFile id="~thor::sample::superkey_master">\\n <SubFile value="~thor::subindex_test_roxie_1"/>\\n <SubFile value="~thor::subindex_test_roxie_2"/>\\n </SuperFile>\\n </Package>\\n<!--End File references -->\\n</RoxiePackages>\\n \\n
\\n\\n7.) Add the packagemap redefining the contents of the superkey.\\n\\n ecl packagemap add -s=192.xxx.xxx.xxx roxie mypackagemap.pkg -O -A -v\\n
\\n\\nNow when i query in roxie, i am not able to view the updated data, instead it just shows the "roxie_1" content. \\n\\nAnd manually do we have to change the package each time we get the new data by adding the new subfile in the package..?\\n\\nI have set the flag "allFilesDynamic : false" based on a forum post.\\n\\nI am testing this in a virtual machine and version is 3.10.8.9\\n\\nAm i missing something..?\\n\\nKindly help me regarding the same.\\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2013-07-08 07:26:46\" },\n\t{ \"post_id\": 4306, \"topic_id\": 961, \"forum_id\": 8, \"post_subject\": \"SOAP call to ESP Webservice for WUQuery fails\", \"username\": \"abhisr\", \"post_text\": \"Hi,\\n\\nI need to get all the Workunit list written in a production box.As I am runing this from a diffrent environment i cant use the STD.System.Workunit.WorkunitList(''));\\n
.\\nSo i relied on the GetDFUWorkunits
of ESP , and am trying to make a SOAP call from my ECl code. I am getting error.\\n\\nMy code here\\n\\n\\nip\\t:='http://my_server_ip:8010/FileSpray' ;\\nsvc := 'GetDFUWorkunits';\\n\\n\\noutrec :=\\nRECORD\\n\\tSTRING Results {xpath('results')};\\nEND;\\n\\nOneRec1 := SOAPCALL(ip,svc,{STRING CLUSTER := 'thor11'},outrec,xpath('GetDFUWorkunitsResponse'));\\nOUTPUT(OneRec1);
\\n\\nError\\nError: System error: -1: <Error><text>HTTP error (500) in processQuery</text><soapresponse><?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/" xmlns:wsse="http://schemas.xmlsoap.org/ws/2002/04/secext"><soap:Body><soap:Fault><faultcode>400</faultcode><faultstring>[400: Bad Request [Method GetDFUWorkunitsRequest not available in service FileSpray]] </faultstring><faultactor>Esp</faultactor><detail><Exceptions xmlns="urn:hpccsystems:ws:filespray" xsi:schemaLocation="urn:hpccsystems:ws:filespray http://10.194.10.2:8010/FileSpray/?xsd"><Source>Esp</Source><Exception><Code>400</Code><Audience>user</Audience><Message>Bad Request [Method GetDFUWorkunitsRequest not available in service FileSpray]</Message></Exception></Exceptions></detail></soap:Fault></soap:Body></soap:Envelope></soapresponse><url>http://10.194.10.2:8010/FileSpray</url></Error> (in SOAP dataset G1 E2)
\\n\\nhelp !!\", \"post_time\": \"2013-07-10 16:08:04\" },\n\t{ \"post_id\": 4325, \"topic_id\": 963, \"forum_id\": 8, \"post_subject\": \"Re: HTTPCALL/SOAPCALL for ECL Package Maps\", \"username\": \"anthony.fishbeck\", \"post_text\": \"Hi ksviswa,\\n\\nYou've left the "wsdl" parameter in the URL you are using. Adding that parameter will always just return the wsdl for that operation. Try the same call without it.\\n\\nBtw, you don't need the . in front of each parameter, although it should work with or without it anyway.\\n\\nAlso, the result is going to be a nested structure of the form:\\n\\n<AddPackageResponse>\\n<status>\\n<Code>0</Code>\\n<Description>Successfully loaded test.pkg</Description>\\n</status>\\n</AddPackageResponse>\\n\\nYou may want to change your outRec1 structure to reflect that.\\n\\nI'll have to get back to you on your request for a SOAPCALL example... and to verify the encoding requirements for HTTPCALL.\\n\\nRegards,\\nTony\", \"post_time\": \"2013-07-15 14:11:28\" },\n\t{ \"post_id\": 4323, \"topic_id\": 963, \"forum_id\": 8, \"post_subject\": \"Re: HTTPCALL/SOAPCALL for ECL Package Maps\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nThanks a lot..\\n\\nI was able to create the web service for adding a package.\\n\\nFew doubts :\\n\\n1.) Do we need to create a .pkg file initially..? Incase we are specifying the contents of the packagemap in the "INFO" parameter\\n\\nSample HTTPCALL :\\n\\n\\n\\nOutRec1 := RECORD\\n string1 result;\\nEND;\\n \\nraw := HTTPCALL('http://54.212.48.67:8010/WsPackageProcess/AddPackage?Wsdl&ver_=1&.Info=%3CRoxiePackages%3E%3CPackage%20id%3D%22publish_test2%22%3E%3CBase%20id%3D%22thor%3A%3Asample%3A%3Asuperkey_master%22%2F%3E%3C%2FPackage%3E%3CPackage%20id%3D%22thor%3A%3Asample%3A%3Asuperkey_master%22%3E%3CSuperFile%20id%3D%22~thor%3A%3Atest%3A%3Asample%3A%3Asuperkey_master_new%22%3E%3CSubFile%20value%3D%22~thor%3A%3Atest%3A%3Asubindex_test_roxie_2%22%2F%3E%3C%2FSuperFile%3E%3C%2FPackage%3E%3C%2FRoxiePackages%3E&.Target=roxie&.Activate=1&.OverWrite=1&.PackageMap=test.pkg','GET', 'text/xml', OutRec1, onfail(skip));\\nOUTPUT(raw); \\n\\n
\\n\\nIs this sample code correct for HTTPCALL..? \\n\\nI tried the above approach and was able to view the updated result in roxie only if the .pkg file was created initially.\\n\\nKindly advise me with the sample code for SOAPCALL for the above scenario..?\\n\\n2.) Do we need to encode the URL in HTTPCALL..?\\n\\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2013-07-15 12:52:00\" },\n\t{ \"post_id\": 4313, \"topic_id\": 963, \"forum_id\": 8, \"post_subject\": \"Re: HTTPCALL/SOAPCALL for ECL Package Maps\", \"username\": \"anthony.fishbeck\", \"post_text\": \"Hi ksviswa,\\n\\nAll of the web services for dealing with packagemaps can be found under:\\n\\nhttp://IP:8010/WsPackageProcess/\\n\\nIf you browse that location you should find a list of available operations.\\n\\nFor packagemap-add the wsdl is located at:\\n\\nhttp://IP:8010/WsPackageProcess/AddPackage?wsdl\\n\\nAnd the test form is:\\n\\nhttp://IP:8010/WsPackageProcess/AddPackage?form\\n\\nFYI for AddPackage the contents of the packagemap go in the "INFO" parameter.\\n\\nFor calling from ECL, you can use HTTPCALL or SOAPCALL to call them, but for operations with large or complex input parameters (like sending the contents of a packagemap) I would probably recommend SOAPCALL.\\n\\nHTH, Regards,\\nTony\", \"post_time\": \"2013-07-11 17:02:23\" },\n\t{ \"post_id\": 4311, \"topic_id\": 963, \"forum_id\": 8, \"post_subject\": \"HTTPCALL/SOAPCALL for ECL Package Maps\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nDo we have any HTTPCALL/SOAPCALL for ecl package maps.\\n\\nEx : \\n\\necl packagemap add -s=192.xxx.xxx.xxx roxie mypackagemap.pkg -O -A -v\\n\\n\\nHow would this be done using HTTPCALL/SOAPCALL..?\\n\\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2013-07-11 13:20:21\" },\n\t{ \"post_id\": 4318, \"topic_id\": 964, \"forum_id\": 8, \"post_subject\": \"Re: Activate/DeActivate Roxie Clusters\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nThanks a lot Richard.\\n\\nI was referring for 3 Roxie Clusters only, sorry for the typo.\\n\\nAny idea how we configure multiple roxie clusters manually using AWS..?\\n\\nAny documentation available for the same..?\\n\\nKindly suggest..\\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2013-07-12 09:36:24\" },\n\t{ \"post_id\": 4315, \"topic_id\": 964, \"forum_id\": 8, \"post_subject\": \"Re: Activate/DeActivate Roxie Clusters\", \"username\": \"rtaylor\", \"post_text\": \"ksviswa,I have a scenario where there are 3 Roxie nodes in a cluster.\\n\\nHow do we activate or deactivate the roxie nodes using HTTPCALL/SOAPCALL.?
Simple answer: you don't. \\n\\nThe nodes in an HPCC cluster (Thor or Roxie) are not dynamically configurable. Once in use, a cluster stays configured as it is until you manually bring it down and re-configure the nodes in some other manner. IOW, a 3-node Roxie stays a 3-node Roxie until you bring it down and specify that those boxes are now going to be something other than a 3-node Roxie.\\n\\nUnless, of course, what you actually mean is you have 3 Roxie clusters in an environment and you want to know how to dynamically change which ones are actively receiving queries at any given time (which would be controlled by your load balancer, not the Roxie clusters themselves).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-07-11 19:53:27\" },\n\t{ \"post_id\": 4312, \"topic_id\": 964, \"forum_id\": 8, \"post_subject\": \"Activate/DeActivate Roxie Clusters\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nI have a scenario where there are 3 Roxie nodes in a cluster.\\n\\nHow do we activate or deactivate the roxie nodes using HTTPCALL/SOAPCALL.?\\n\\nHow we get to know the list of roxie nodes within a cluster and which of them are active or deactive..? Do we have any inbuilt function for the same..?\\n\\nKindly help regarding the same..\\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2013-07-11 13:33:25\" },\n\t{ \"post_id\": 4321, \"topic_id\": 965, \"forum_id\": 8, \"post_subject\": \"Re: Logging\", \"username\": \"rtaylor\", \"post_text\": \"Rajaganesh,\\n\\nThe error message just indicates that the STD.System.Log.addWorkunitInformation('Process Started',1); code is an inappropriately placed action within the context of your ECL code. You simply need to use the WHEN function to indicate to the system what exactly will trigger that action's performance. \\n\\nThe WHEN function is documented here: http://hpccsystems.com/community/docs/ecl-language-reference/html/when-function\\nDont we have alternate way like where we used to write logs where ever we need in .net
ECL code and .NET code are very different because ECL is a declarative, non-procedural language and all the .NET languages I've looked at are procedural. IOW, you have to think about your code differently in ECL than in any procedural language. This fundamental concept is discussed in our Intro to ECL online course, available for free here: http://learn.lexisnexis.com/hpcc\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-07-12 15:25:39\" },\n\t{ \"post_id\": 4320, \"topic_id\": 965, \"forum_id\": 8, \"post_subject\": \"Logging\", \"username\": \"Rajaganesh\", \"post_text\": \"Hi All,\\n\\nI am new to ECL. Right now I am coding in thor. I have written Few Function below the MODULE. I just tried to get the log details by passing below code inbetween different functionalities\\n\\nSTD.System.Log.addWorkunitInformation('Process Started',1); \\n\\nBut, I am getting a error to implement "When" Function. Dont we have alternate way like where we used to write logs where ever we need in .net\", \"post_time\": \"2013-07-12 14:55:41\" },\n\t{ \"post_id\": 4369, \"topic_id\": 976, \"forum_id\": 8, \"post_subject\": \"Re: Foreign Files\", \"username\": \"rtaylor\", \"post_text\": \"ksviswa,\\n\\nAccessing a file in another environment is possible in the community edition -- I just tested it between two of my training clusters, so the foreign dali filename syntax does work.\\n\\nTherefore, this must be a limitation with the AWS implementation. I am told by the guys that setup the AWS access that there is no guarantee that the clusters in the cloud would be visible to each other. Therefore, it may be time to think about moving away from AWS, either to your own hardware, our cloud, or another hosting provider. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-07-30 03:44:18\" },\n\t{ \"post_id\": 4351, \"topic_id\": 976, \"forum_id\": 8, \"post_subject\": \"Foreign Files\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nI have a scenario where i have launched 2 clusters using AWS.\\n\\nI create a superkey in one of the cluster and trying to read the same from the 2nd cluster using foreign in the logical file.\\n\\nEx:\\n\\n\\n\\nCluster 1 :\\n\\nIMPORT STD;\\nFile1 := '~thor::test::sample_test_roxie_1';\\n\\nrecStr := RECORD\\n STRING10 name;\\n UNSIGNED4 ID;\\n UNSIGNED8 amount;\\nEND;\\n\\n\\nDsOut := DATASET(File1,recStr , CSV(separator('\\\\t')));\\n\\nSubKey1 := '~thor::test::subkey_test_roxie_1';\\nSubIDX1 := '~thor::test::subIndex_test_roxie_1';\\n\\nBldDat := IF(~Std.File.FileExists(SubKey1),\\n OUTPUT(DsOut,,SubKey1));\\n\\nRecPlus := {recStr,UNSIGNED8 RecPos{virtual(fileposition)}};\\n\\nd1 := DATASET(SubKey1,RecPlus,THOR);\\n\\ni1 := INDEX(d1,{Id},\\n {name,amount},\\n SubIDX1);\\n\\nBldIDX := IF(~Std.File.FileExists(SubIDX1),\\n BUILDINDEX(i1));\\n\\nCreate_Superkeys := SEQUENTIAL(BldDat,BldIDX);\\n\\n//adding logical files to superfile\\nkSuperFilePath := '~thor::test::sample::SuperKey_Master_New_23_July_2013';\\n\\naddtosuperfile:= sequential(\\n STD.File.CreateSuperFile(kSuperFilePath,allow_exist:=TRUE);\\n STD.File.StartSuperFileTransaction(),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tSTD.File.clearSuperfile(kSuperFilePath);\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tSTD.File.AddSuperFile(kSuperFilePath, SubIDX1),\\n STD.File.finishSuperFileTransaction(),\\n );\\nFinal_Output := SEQUENTIAL(Create_Superkeys,addtosuperfile);\\n\\nFinal_Output;\\n\\n
\\n\\nThis will create a superkey.\\n\\n\\nI am trying to access the superkey from a different cluster\\n\\n\\n\\ncluster 2:\\n\\nkSuperFilePath := '~foreign::<dali ip for the first cluster>::thor::test::sample::SuperKey_Master_New_23_July_2013';\\n\\ndsInd := INDEX({UNSIGNED4 Id},\\n {string10 NAME, UNSIGNED8 AMOUNT} ,kSuperFilePath);\\n\\n\\nOUTPUT(dsInd);\\n\\n
\\n\\nI get the following error when i execute this query in the second cluster \\n\\n\\nError: System error: 7: DFS Exception: 7: Timeout connecting to Dali Server on 54.xxx.xxx.xxx (0, 0), 7, \\n
\\n\\nI have tried a similar approach using STD.File.RemotePull,still i get a similar error.\\n\\n\\nimport STD;\\n\\nSTD.File.RemotePull('<ESP IP of 2nd cluster >/FileSpray',\\n '~thor::test::sample::SuperKey_Master_New_23_July_2013',\\n 'thor',\\n '~remotethor::test::sample::SuperKey_Master_New_23_July_2013');\\n
\\n\\nWhat is the procedure to read or access a file from a different cluster..? \\n\\nAm i missing something or is it because of AWS configuration..?\\n\\nKindly advise\\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2013-07-23 12:03:20\" },\n\t{ \"post_id\": 4368, \"topic_id\": 979, \"forum_id\": 8, \"post_subject\": \"Re: Please explain PROJECT(<module>, <interface>)\", \"username\": \"rtaylor\", \"post_text\": \"rhimbo,\\n\\nPer the docs: "This allows you to create a module for one interface with the values\\nbeing provided by another interface." \\n\\nThat means your statement: It seems to me that the only purpose is to take attributes of a MODULE definition and "package them up" as an INTERFACE.
is absolutely correct. The MODULE, in this case, is a concrete instance of some existing INTERFACE whose format is not the same as that required by the function you want to pass it to. \\n\\nSo, given an interface that provides the correct information but not in the correct form, you can use PROJECT(module, interface) to make the required adjustment in form so you can call the function expecting the interface despite the fact that you're not getting the parameters in the correct format, thus eliminating a need to write two instances of the same function whose only difference would be the format of the input parameters.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-07-29 05:18:47\" },\n\t{ \"post_id\": 4366, \"topic_id\": 979, \"forum_id\": 8, \"post_subject\": \"Please explain PROJECT(<module>, <interface>)\", \"username\": \"rhimbo\", \"post_text\": \"Could someone explain how PROJECT(<module>, <interface>) works? I don't understand the example given in the ECL Reference manual. It seems to me that the only purpose is to take attributes of a MODULE definition and "package them up" as an INTERFACE. \\n\\nIs this specifically intended to be used to pass attribute values to FUNCTION calls?\\n\\nI tried to contrive some code that would help me understand. Here is what I did:\\n\\nOut_Rec := RECORD\\n STRING20 firstName;\\n STRING1 gender;\\nEND;\\n\\n EXPORT MyModule := MODULE\\n EXPORT ds := DATASET([{'George', 'M'}, {'Alice', 'F'}], Out_Rec);\\n EXPORT STRING25 firstName := '';\\n EXPORT STRING1 gender := '';\\nEND;\\n\\nIArgs := INTERFACE\\n EXPORT STRING25 firstName;\\n EXPORT STRING1 gender;\\n \\nEND;\\n\\nSTRING myFunc(IArgs args) := FUNCTION\\n STRING name := TRIM(args.firstName) + ', ' + TRIM(args.gender);\\n return name;\\nEND;\\n\\nname := myFunc(PROJECT($.MyModule, IArgs)); \\nOUTPUT(name);\\n
\", \"post_time\": \"2013-07-26 07:06:45\" },\n\t{ \"post_id\": 4388, \"topic_id\": 983, \"forum_id\": 8, \"post_subject\": \"Re: Executing ECL code using a label name\", \"username\": \"sameermsc\", \"post_text\": \"Thanks a lot Richard,\\nwill try the more option\\n\\nSameer\", \"post_time\": \"2013-07-31 08:52:42\" },\n\t{ \"post_id\": 4386, \"topic_id\": 983, \"forum_id\": 8, \"post_subject\": \"Re: Executing ECL code using a label name\", \"username\": \"rtaylor\", \"post_text\": \"Sameer,\\n\\nThe fact that you're using the term "label" indicates to me you're working in a legacy-style central-repository environment, because the Open Source Community Edition does not have the option of "labelling" the Reopository. So this answer only applies to LN-internal environments right now.\\n\\nThe "Label" is a "snapshot" of the state of the entire Repository at the time the label is applied. To use that specific version of code, you just need to click the "More" button on the builder window and select the appropriate Label to use from the droplist -- your code will then run with that version of the Repository code.\\n\\nFWIW, all that "label" stuff goes away in OSS, since your ECL code is stored locally in text files on each developer's machine and the version control you use is whatever you choose to install and use.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-07-31 08:15:17\" },\n\t{ \"post_id\": 4385, \"topic_id\": 983, \"forum_id\": 8, \"post_subject\": \"Re: Executing ECL code using a label name\", \"username\": \"sameermsc\", \"post_text\": \"I think i was not clear with the question i have asked, sorry about the confusion. Let me rephrase it\\n\\nI have some ECL "BWR1" code which is checked in say about 5 timse till date, lets call the versions as V1, V2, V3, V4 and V5 (latest) and i also assign a label(not sure if i can call it a snapshot) to all of the associated files of each version as L1, L2, L3, L4 and L5 (latest) respectively. \\n\\nActually, the BWR1 imports code from around 10 Modules, which are continuously modified and checked in by different users, and we cannot restrict them from checking in code in their respective modules as those changes are prod updates.\\n\\nNow, say for example, three users are calling the ECL code "To be specific its an NFD call". following are few scenarios i am looking at,\\n1) when User 1 executes the BWR1 it should execute the last checked in version V5 (by default this is what will get executed, so no question here)\\n2) when User 2 executes the BWR1 it should execute version V3 which has been labelled as L3\\n3) when User 3 executes the BWR1 it should execute version V1 which has been labelled as L1\\n\\nare there any options either in Client Tools or in NFD which will allow me to achieve the above scenarios.\\n\\nlet me know your lexis id, i can chat if required.\\n\\nRegards,\\nSameer\", \"post_time\": \"2013-07-31 06:33:49\" },\n\t{ \"post_id\": 4379, \"topic_id\": 983, \"forum_id\": 8, \"post_subject\": \"Re: Executing ECL code using a label name\", \"username\": \"bforeman\", \"post_text\": \"Hi Sameer,\\n\\nECL definitions in other modules simply must be fully qualified. For example, if you have a folder named "Sameer", and you create an EXPORT definition in your folder named "JoinThem", if I had a folder named "Bob" and had code where I wanted to reference your definition, I would do the following:\\n\\nIMPORT Sameer;\\nSameer.JoinThem;
\\n\\nBy using this type of dot syntax qualification, developers can share definitions with each other with clashes.\\n\\nHope this helps,\\n\\nBob\", \"post_time\": \"2013-07-30 18:57:11\" },\n\t{ \"post_id\": 4372, \"topic_id\": 983, \"forum_id\": 8, \"post_subject\": \"Executing ECL code using a label name\", \"username\": \"sameermsc\", \"post_text\": \"We have a situation where multiple users call a specific ECL module, Assuming that each user is testing a specific functionality and would like to see only those ECL code changes specific to their functionality but not all other changes done till then. \\n\\nIf a Label the ECL code in such a way that each label reflects only the changes desired by a specific user, is there any mechanism to execute a specific labelled code?\\nif yes, what options/syntax i need to use\\n\\nRegards,\\nSameer\", \"post_time\": \"2013-07-30 13:08:43\" },\n\t{ \"post_id\": 4398, \"topic_id\": 984, \"forum_id\": 8, \"post_subject\": \"Re: Error fetching child dataset from HTTP response in Thor\", \"username\": \"bforeman\", \"post_text\": \"The first error (unsupported activity HTTPCALL) is addressed in HPCC-9349.\", \"post_time\": \"2013-07-31 15:22:15\" },\n\t{ \"post_id\": 4381, \"topic_id\": 984, \"forum_id\": 8, \"post_subject\": \"Error fetching child dataset from HTTP response in Thor\", \"username\": \"abhisr\", \"post_text\": \"Hi,\\n \\n I am making a http call to the WsDfu/DFUDefFile service by passing the file name. I am getting the response and am able to view it .But the system is throwing error when trying to access the child data set in THOR . The error is given below\\n Error: System error: 10107: Graph[1], http[2]: Unsupported activity kind: http (0, 0), 10107,
\\nMy code goes here\\n\\nSTRING IP := '10.194.10.2:8010';\\nSTRING \\tFileName :='thor::test::cricketscore';\\n\\nSTRING IpAddrs := 'http://'+IP+'/WsDfu/DFUDefFile/abhisr?Name='+FileName+'&FileName=abhisr&Format=xml';\\n\\nFields_R := RECORD\\n\\t STRING EclType{XPATH('@ecltype'),MAXLENGTH(30)};\\n\\t STRING Label {XPATH('@label'),MAXLENGTH(30)};\\n\\t STRING Name {XPATH('@name'),MAXLENGTH(30)};\\n\\t INTEGER Position{XPATH('@position'),MAXLENGTH(30)};\\nEND;\\n\\n\\nOutResponse := RECORD\\n\\t DATASET(Fields_R) Fields{XPATH('/Table/Field')};\\n\\t STRING Filename {XPATH('/Table/filename')}\\nEND;\\n\\n\\nParsedResponse := HTTPCALL(IpAddrs,'GET', 'text/xml', OutResponse);\\nParsedResponse.Fields;\\n\\n
\\n\\n But it returns child data set successfully in hThor. Another thing I noticed is even it fails to give result for ParsedResponse.Fields
, its giving result for ParsedResponse.Fields[1]
in thor\\n Also I am not able to write the response to a file using OUTPUT in either clusters.\\n\\nJira ID: https://track.hpccsystems.com/browse/HPCC-9797\", \"post_time\": \"2013-07-30 19:41:29\" },\n\t{ \"post_id\": 4414, \"topic_id\": 988, \"forum_id\": 8, \"post_subject\": \"Re: STRING to Layout\", \"username\": \"bforeman\", \"post_text\": \"Well, there's always light with ECL - you just need to parse your own dynamic string and strip out the quotes. Look at the string function library or even REGEXREPLACE to help you.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-08-01 13:23:22\" },\n\t{ \"post_id\": 4413, \"topic_id\": 988, \"forum_id\": 8, \"post_subject\": \"Re: STRING to Layout\", \"username\": \"abhisr\", \"post_text\": \"Thanks Bob.The above code works fine.\\nWhere I am stuck is i have \\n
dynamicS := ResultOfFunction_returnsString();\\n//'{STRING player, Integer Score, String CenturyAgainst, Integer StrikeRate, Integer Year}\\n
. and not a constant String \\n\\n
s:= '{STRING player, Integer Score, String CenturyAgainst, Integer StrikeRate, Integer Year}';
\\n\\nIn this case as MyRec := #EXPAND(dynamicS)
won't work\\n[color=#800030:3lsiogvh]Error: Constant expression expected (27, 17), 2071, \\n as Dan commented\\n I don't think you can use this to (for instance) load a record definition from an external source on the fly, then read a logical file using that definition
.\\nAs I am trying to read a list of files written by a set of jobs , i need to load the layout on the fly and read the files and do some comparison operation.\\n\\nIs there any light at the end of the tunnel ?\", \"post_time\": \"2013-08-01 13:19:25\" },\n\t{ \"post_id\": 4410, \"topic_id\": 988, \"forum_id\": 8, \"post_subject\": \"Re: STRING to Layout\", \"username\": \"bforeman\", \"post_text\": \"s:= '{STRING player, Integer Score, String CenturyAgainst, Integer StrikeRate, Integer Year}';\\n\\nMyRec := #EXPAND(s);\\n\\nString fName := '~thor::test::cricketscore';\\n\\nd := DATASET(fName,MyRec,THOR);\\n\\nOUTPUT(d);
\", \"post_time\": \"2013-08-01 12:01:19\" },\n\t{ \"post_id\": 4409, \"topic_id\": 988, \"forum_id\": 8, \"post_subject\": \"Re: STRING to Layout\", \"username\": \"abhisr\", \"post_text\": \"Yes my case is to load the record definition on the fly and read a logical file ; am developing an automation tool .\\nFor inline data set it will work fine, but for file names its error\\n\\n \\ns:= '{STRING player,\\tInteger Score,\\tString CenturyAgainst,\\tInteger StrikeRate,\\tInteger Year}';\\n\\nMyRec := #EXPAND(s);\\n\\nd := DATASET(fName,MyRec);\\n\\nOUTPUT(d);
Is there any alternatives workarounds to achieve this?\", \"post_time\": \"2013-07-31 19:10:30\" },\n\t{ \"post_id\": 4408, \"topic_id\": 988, \"forum_id\": 8, \"post_subject\": \"Re: STRING to Layout\", \"username\": \"bforeman\", \"post_text\": \"What Dan said, but if your file is a logical one, not inline, remove the single quotes after the curly braces, and you need to add the type of file as the third parameter of DATASET.\\n\\nYou have:\\n\\nString fName := '~thor::test::cricketscore';\\nDS2 :=DATASET(fName,{'STRING player, Integer Score, String CenturyAgainst, Integer StrikeRate, Integer Year'});\\nDS2;
\\n\\nShould be:\\n\\nString fName := '~thor::test::cricketscore';\\nDS2 :=DATASET(fName,{STRING player,Integer Score, String CenturyAgainst,Integer StrikeRate, Integer Year},THOR);\\nDS2;\\n
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-07-31 18:58:59\" },\n\t{ \"post_id\": 4407, \"topic_id\": 988, \"forum_id\": 8, \"post_subject\": \"Re: STRING to Layout\", \"username\": \"DSC\", \"post_text\": \"This works:\\n\\ns := '{STRING name, INTEGER count}';\\n\\nMyRec := #EXPAND(s);\\n\\nd := DATASET([{'Dan',10}],MyRec);\\n\\nOUTPUT(d);
\\nThis may not be what you want, though. I believe #EXPAND requires that its arguments are static. In other words, I don't think you can use this to (for instance) load a record definition from an external source on the fly, then read a logical file using that definition.\\n\\nDan\", \"post_time\": \"2013-07-31 18:56:42\" },\n\t{ \"post_id\": 4406, \"topic_id\": 988, \"forum_id\": 8, \"post_subject\": \"Re: STRING to Layout\", \"username\": \"abhisr\", \"post_text\": \"Hi Bob,\\n\\nIt doesnt work for DATASET, its showing error \\n \\nString fName := '~thor::test::cricketscore';\\nDS2 :=DATASET(fName,{'STRING player,\\tInteger Score,\\tString CenturyAgainst,\\tInteger StrikeRate,\\tInteger Year'});\\nDS2;
\\n[color=#BF0040:3iybpgjh]Error: Expected a list (18, 16), 2376,\", \"post_time\": \"2013-07-31 18:49:20\" },\n\t{ \"post_id\": 4401, \"topic_id\": 988, \"forum_id\": 8, \"post_subject\": \"Re: STRING to Layout\", \"username\": \"bforeman\", \"post_text\": \"Well, I guess you could parse it and convert it to an inline RECORD structure right?\\n\\nSo using find and replace, you could easily convert this:\\n\\n'RECORD STRING name; INTEGER count;END;'
\\n\\nto this:\\n\\n{STRING name, INTEGER count};
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-07-31 15:45:24\" },\n\t{ \"post_id\": 4399, \"topic_id\": 988, \"forum_id\": 8, \"post_subject\": \"STRING to Layout\", \"username\": \"abhisr\", \"post_text\": \"Hi,\\n\\nIs there any way I can convert a STRING which has a RECORD structure to a actual RECORD Structure. \\n\\n\\nString Str_Lay := 'RECORD STRING name; INTEGER count;END;';\\n\\nActual_Lay := RECORD STRING name; INTEGER count;END;;\\nDS := DATASET([{'Sweet','21'}],Actual_Lay);\\nDS;\\n
.\\n\\nCan I convert 'Str_Lay' to 'Actual_Lay' .\", \"post_time\": \"2013-07-31 15:30:56\" },\n\t{ \"post_id\": 4427, \"topic_id\": 989, \"forum_id\": 8, \"post_subject\": \"Re: Running multiple versions of IDE\", \"username\": \"gsmith\", \"post_text\": \"For your set up I would recommend:\\n\\n1. Install IDE 4.0 (which includes 4.0 Client Tools)\\n2. Install 3.10 Client Tools\\n\\nAs Bob said, the IDE will auto pick the correct tools for the server.\\n\\nNote: 4.0 and 3.10 IDE will install "Side by Side", but once either is configured with both sets of Client Tools you don't really need the other...\\n\\nGordon.\", \"post_time\": \"2013-08-04 15:28:43\" },\n\t{ \"post_id\": 4419, \"topic_id\": 989, \"forum_id\": 8, \"post_subject\": \"Re: Running multiple versions of IDE\", \"username\": \"bforeman\", \"post_text\": \"...and yes, the 4.0 ECL IDE will work with an older version cluster. There is a new feature in the Compiler Preferences that auto-detects compiler versions!\", \"post_time\": \"2013-08-01 19:57:08\" },\n\t{ \"post_id\": 4418, \"topic_id\": 989, \"forum_id\": 8, \"post_subject\": \"Re: Running multiple versions of IDE\", \"username\": \"bforeman\", \"post_text\": \"Hi,\\n\\nThe 4.0 install will not clash with the previous 3.x versions, so you can run both at the same time. \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-08-01 19:55:47\" },\n\t{ \"post_id\": 4416, \"topic_id\": 989, \"forum_id\": 8, \"post_subject\": \"Running multiple versions of IDE\", \"username\": \"tmurphy\", \"post_text\": \"I have a 3.10 cluster and I use the 3.10 native IDE (windows). Now I want to hit a 4.0 dev cluster using the 4.0 IDE. But I'm worried that if I install the 4.0 IDE, it will delete my 3.10 IDE and I won't be able to access my 3.10 cluster. Is there some way to run multiple versions of the IDE on the same computer? Or can the 4.0 IDE work with a 3.10 cluster (in which case I won't need the 3.10 IDE)?\", \"post_time\": \"2013-08-01 18:00:22\" },\n\t{ \"post_id\": 4482, \"topic_id\": 990, \"forum_id\": 8, \"post_subject\": \"Re: TRANSFORM(FooRec) as return type\", \"username\": \"richardkchapman\", \"post_text\": \"FWIW, my view is that they should be documented, with appropriate caveats (e.g. that they are subject to change...)\", \"post_time\": \"2013-08-20 17:12:27\" },\n\t{ \"post_id\": 4447, \"topic_id\": 990, \"forum_id\": 8, \"post_subject\": \"Re: TRANSFORM(FooRec) as return type\", \"username\": \"DSC\", \"post_text\": \"Fair enough, and I certainly understand the desire to keep some undocumented features private. I'll open a Jira ticket for all of these items and we'll see what happens.\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2013-08-08 11:34:41\" },\n\t{ \"post_id\": 4446, \"topic_id\": 990, \"forum_id\": 8, \"post_subject\": \"Re: TRANSFORM(FooRec) as return type\", \"username\": \"ghalliday\", \"post_text\": \"That's what you get when you have that platform developers writing ecl.\\n\\nI'm not sure all of it should be exposed!\\n\\nNOBOUNDCHECK is there to remove the internal out-of-bounds check on a dataset. Not recommended unless you really need it for efficiency and you can guarantee that the row exists in all situations.\\n\\nProbably worth creating jira tickets for the documentation - then we can at least track them.\", \"post_time\": \"2013-08-08 11:27:07\" },\n\t{ \"post_id\": 4445, \"topic_id\": 990, \"forum_id\": 8, \"post_subject\": \"Re: TRANSFORM(FooRec) as return type\", \"username\": \"DSC\", \"post_text\": \"Thanks, Gavin. That explains a lot. I did see the __self usage and wondered about that as well. There is really quite a lot of interesting coding in this single file. Here is another I found:\\n\\n
EXPORT buildDS := AGGREGATE(ds, myBloomRec, addTransform(LEFT), myBloomFilter.mergeBloom(ROWS(RIGHT)[NOBOUNDCHECK 2]));
\\nNOBOUNDCHECK?\\n\\nAre all of these things parts of the language that you want exposed? If so, should I create a Jira ticket asking for documentation updates?\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2013-08-08 10:36:54\" },\n\t{ \"post_id\": 4444, \"topic_id\": 990, \"forum_id\": 8, \"post_subject\": \"Re: TRANSFORM(FooRec) as return type\", \"username\": \"ghalliday\", \"post_text\": \"Yes\\n\\n\\nEXPORT TRANSFORM(bloomrec) addBloom(UNSIGNED4 hash1, UNSIGNED4 hash2, UNSIGNED4 _numhashes = numHashes, UNSIGNED _tablesize=tableSize) := BEGINC++\\n
\\n\\nIs a way of defining a transform function in C++. It uses a __self parameter to reserve the space needed for the record that is returned, and returns the size of the final row (this is the same mechanism used by the code generated from ECL by eclcc).\", \"post_time\": \"2013-08-08 09:21:13\" },\n\t{ \"post_id\": 4442, \"topic_id\": 990, \"forum_id\": 8, \"post_subject\": \"Re: TRANSFORM(FooRec) as return type\", \"username\": \"DSC\", \"post_text\": \"Ask about the 'L.<keyfields>' usage as well, please. They're tied together, I'm sure.\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2013-08-07 15:15:13\" },\n\t{ \"post_id\": 4440, \"topic_id\": 990, \"forum_id\": 8, \"post_subject\": \"Re: TRANSFORM(FooRec) as return type\", \"username\": \"bforeman\", \"post_text\": \"What does '<?> ANY keyfields' mean?
\\n\\nWell, ANY refers to a typeless parameter, and the <?> syntax seems to be new to me as well. Checking with the development team.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-08-07 14:35:43\" },\n\t{ \"post_id\": 4439, \"topic_id\": 990, \"forum_id\": 8, \"post_subject\": \"Re: TRANSFORM(FooRec) as return type\", \"username\": \"DSC\", \"post_text\": \"Hi Bob,\\n\\nOK, I revisited the code. Here is a two-line excerpt:\\n\\nTRANSFORM(myBloomRec) addTransform(ds L) := myBloomFilter.addBloom64(hash64(L.<keyfields>));\\n\\nEXPORT buildDS := AGGREGATE(ds, myBloomRec, addTransform(LEFT), myBloomFilter.mergeBloom(ROWS(RIGHT)[NOBOUNDCHECK 2]));
\\nThe usage within the AGGREGATE makes me think that this is just an alternate way of declaring a TRANSFORM. Or really, a way to call a function as a TRANSFORM. The following is not correct code, but conceptually, these would be equivalent:\\n\\nTRANSFORM(FooRec) myFunction(DATASET(OtherRec) l) := someOtherFunction(l);\\n\\nFooRec myFunction(DATASET(OtherRec) l) := TRANSFORM\\n\\tSELF := someOtherFunction(l);\\nEND;
\\nDoes that make sense?\\n\\nAnd....\\n\\nThat excerpt contains a new thing I didn't spot earlier: 'L.<keyfields>' -- what is that? Tracing 'keyfields' backwards finds this:\\n\\n EXPORT buildBloomFilter(UNSIGNED DECIMAL6_3 fpProb,\\n UNSIGNED INTEGER8 cardinality,\\n VIRTUAL DATASET ds, <?> ANY keyfields) := MODULE
\\nWhat does '<?> ANY keyfields' mean?\\n\\nThanks,\\n\\nDan\\n\\nPS: Looking forward to meeting you as well! I wish I was staying for the entire week. It looks like there will be a lot of very interesting stuff to learn.\", \"post_time\": \"2013-08-07 13:40:42\" },\n\t{ \"post_id\": 4438, \"topic_id\": 990, \"forum_id\": 8, \"post_subject\": \"Re: TRANSFORM(FooRec) as return type\", \"username\": \"bforeman\", \"post_text\": \"Hi Dan,\\n\\nI'm querying development regarding this, but on reading the docs it looks like TRANSFORM is passing a single record, where DATASET would pass all.\\n\\nTRANSFORM(datarow)\\nA single record to transform, typically the keyword LEFT.
\\n\\nBloom.ecl indeed is interesting code. \\nLooking forward to meeting you in September!\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-08-07 12:48:26\" },\n\t{ \"post_id\": 4422, \"topic_id\": 990, \"forum_id\": 8, \"post_subject\": \"TRANSFORM(FooRec) as return type\", \"username\": \"DSC\", \"post_text\": \"I was reading through the example Bloom.ecl code housed in the new Bundle repository and spotted something interesting (to me, at least).\\n\\nMany of the functions in that module are embedded C++ functions. Many of them also look like this:\\n\\n
EXPORT bloomrec := RECORD\\n DATA bits { maxlength(tablesize) };\\nEND;\\n\\nEXPORT TRANSFORM(bloomrec) addBloom(UNSIGNED4 hash1, UNSIGNED4 hash2, UNSIGNED4 _numhashes = numHashes, UNSIGNED _tablesize=tableSize) := BEGINC++\\n // Do stuff\\nENDC++;
\\nMy question is about the return type of the addBloom() function. How is 'TRANSFORM(bloomrec)' interpreted here? Is it only a mechanism for ensuring that the returned value is in that particular record format? How is that different from using 'DATASET(bloomrec)' as the return type?\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2013-08-02 13:15:29\" },\n\t{ \"post_id\": 4450, \"topic_id\": 995, \"forum_id\": 8, \"post_subject\": \"Re: Unicode in OUTPUT XML\", \"username\": \"ghalliday\", \"post_text\": \"That looks like a bug (or more than one bug) - the values shouldn't be restricted to strings, and they should be converted to utf8 when output. Please could you add an issue in jira, and we can look at fixing it.\\n\\n(https://track.hpccsystems.com)\\n\\nIf not, I'll add one later.\\n\\nYou may be able to temporarily work around the issue by using the FROMUNICODE function e.g.,\\n\\nheading(FROMUNICODE(header, 'utf8'))\\n\\nunfortunately I think the work around is likely to break when the bug is fixed.\", \"post_time\": \"2013-08-13 08:23:41\" },\n\t{ \"post_id\": 4449, \"topic_id\": 995, \"forum_id\": 8, \"post_subject\": \"Unicode in OUTPUT XML\", \"username\": \"chucks\", \"post_text\": \"The XML option in the OUTPUT statement requires strings for the header and footer.\\ne.g.\\nxml('Entity', heading((string)header,(string)Footer),trim, OPT)\\n\\nIf header and footer are defined as unicode strings, there will be a syntax error.\\nHowever, the conversion from unicode to string will result in invalid UTF-8 characters.\\nE.g.:\\nsuppose this value is included in the header:\\nunicode montreal := U'The Montréal Exchange - Disciplinary Decision';\\n\\nThe resulting XML file cannot be read, because the accented e is converted to xE0, which is an invalid UTF-8 character. (It is actually the 8859-1 encoding. The UTF-8 encoding is c3 a9.)\\nI don't understand that in the light of this statement in the ECL reference manual:\\n“All ECL code is UTF-8 encoded, which means\\nthat all strings are also UTF-8 encoded, whether Unicode or non-Unicode strings.”\", \"post_time\": \"2013-08-12 20:10:23\" },\n\t{ \"post_id\": 4506, \"topic_id\": 999, \"forum_id\": 8, \"post_subject\": \"Re: Strange query name failure\", \"username\": \"DSC\", \"post_text\": \"From an eclccserver log:\\n\\n00000470 2013-08-16 11:06:22.046 7863 16498 "Agent request 'W20130816-110620' enqueued on 'thor.agent'"\\n00000471 2013-08-16 11:23:05.980 7863 16498 "eclcc: Creating PIPE program process : 'eclcc -shared - -main .Report_BWR --timings -oW20130816-112305 -platform=thorlcr -fcreated_by=ws_workunits -fcreated_for=dcamper -fapplyInstantEclTransformations=1 -fapplyInstantEclTransformationsLimit=100' - hasinput=1, hasoutput=0 stderrbufsize=0"\\n00000472 2013-08-16 11:23:06.184 7863 18803 "assert(expr->queryName()) failed - file: /var/lib/jenkins/workspace/CE-Candidate-4.0.0-1-with-plugins/CE/centos-6.4-x86_64/HPCC-Platform/ecl/hql/hqlexpr.cpp, line 7500"\\n00000473 2013-08-16 11:23:06.189 7863 16498 "eclcc: Pipe: process 18801 complete 2"
\\nAccording to grep, 'Report_BWR' appears in no other logs.\\n\\nDan\", \"post_time\": \"2013-08-29 12:41:08\" },\n\t{ \"post_id\": 4503, \"topic_id\": 999, \"forum_id\": 8, \"post_subject\": \"Re: Strange query name failure\", \"username\": \"ghalliday\", \"post_text\": \"That error looks like it is coming about because eclcc is trying to define a symbol to, but for some reason it thinks there isn't a name to associated with it.\\n\\nIs there a stack trace or anything else in the log file?\", \"post_time\": \"2013-08-29 10:27:11\" },\n\t{ \"post_id\": 4478, \"topic_id\": 999, \"forum_id\": 8, \"post_subject\": \"Re: Strange query name failure\", \"username\": \"DSC\", \"post_text\": \"Well, I didn't originally have one that works, as that was an unsaved BWR window. But I created one by simply saving that BWR window to a new file, and it seems to work (it's executing now).\\n\\nThe full path of the file that does not work is:\\n\\n\\\\\\\\psf\\\\Home\\\\Desktop\\\\ECL\\\\hoovers\\\\sybase\\\\Report_BWR.ecl
\\nThe full path of the (new) file that works is:\\n\\n\\\\\\\\psf\\\\Home\\\\Desktop\\\\ECL\\\\hoovers\\\\sybase\\\\dantest.ecl
\\nThe odd pathname comes from my environment. I use a Mac, running the IDE within Parallels. All of my source code is actually located in a folder in the OS X environment. I've used this setup successfully for quite some time now.\", \"post_time\": \"2013-08-19 12:27:06\" },\n\t{ \"post_id\": 4477, \"topic_id\": 999, \"forum_id\": 8, \"post_subject\": \"Re: Strange query name failure\", \"username\": \"gsmith\", \"post_text\": \"What is the full path the file that is failing and the full path to the one that works?\", \"post_time\": \"2013-08-19 12:17:23\" },\n\t{ \"post_id\": 4476, \"topic_id\": 999, \"forum_id\": 8, \"post_subject\": \"Re: Strange query name failure\", \"username\": \"DSC\", \"post_text\": \"Sigh. It's Monday, right? I can just tell.\\n\\nThe file really was rebuilt with the BOM. I did not, however, go back and see if adding the BOM would change the results. I just assumed they would, based on your response. My mistake, there. The problem still exists. Just now, I opened the file in the IDE and submitted it to Thor. I immediately saw the same error as before.\\n\\nI take it the apparent transformation of "Report_BWR" to ".Report_BWR" is not normal?\", \"post_time\": \"2013-08-19 12:13:53\" },\n\t{ \"post_id\": 4474, \"topic_id\": 999, \"forum_id\": 8, \"post_subject\": \"Re: Strange query name failure\", \"username\": \"gsmith\", \"post_text\": \"Hmmm - It should work with or without the BOM (I was just wondering if the BOM was somehow invalid).\\n\\nEclipse does not support BOMs for example...\", \"post_time\": \"2013-08-19 11:55:22\" },\n\t{ \"post_id\": 4473, \"topic_id\": 999, \"forum_id\": 8, \"post_subject\": \"Re: Strange query name failure\", \"username\": \"DSC\", \"post_text\": \"The file is named "Report_BWR.ecl".\\n\\nGordon's option #2 turned out to be the correct one. I checked the file and it was missing the UTF-8 BOM (0xEF 0xBB 0xBF). I copied the text into a new editor window and saved over the original file to correct the problem.\\n\\nI'm 99% sure that the file originated from the IDE, but I also think it originated as a "scratch" file where I was writing most of my code elsewhere and using this one to test that code as I went along. In that case, the file probably wasn't saved at all for several days, was restored by the IDE upon relaunching multiple times, and was modified countless more. Maybe something in that series of actions caused this issue. If I come up with a repeatable bug, I'll open a Jira ticket.\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2013-08-19 11:49:19\" },\n\t{ \"post_id\": 4469, \"topic_id\": 999, \"forum_id\": 8, \"post_subject\": \"Re: Strange query name failure\", \"username\": \"richardkchapman\", \"post_text\": \"The assert seems to suggest that the filename of the BWR file may be ".Report_BWR" - not sure how that happened, but I can see that it might confuse any code expecting to deduce the query name from the filename (prior to the extension)\\n\\nPresumably the BWR file SHOULD have been called Report_BWR.ecl ?\", \"post_time\": \"2013-08-19 09:27:25\" },\n\t{ \"post_id\": 4467, \"topic_id\": 999, \"forum_id\": 8, \"post_subject\": \"Re: Strange query name failure\", \"username\": \"gsmith\", \"post_text\": \"Given a copy and paste into new attribute fixes this, it sounds like it could be one of the following:\\n1. An invalid hidden character (possibly in the upper Unicode range) - this sometimes happens when code is copy and pasted from PDF docs.\\n2. An invalid BOM on the ECL file (BOM is a few bytes at the start of the file which tells the OS what charset its using).\\n3. Some invalid (for that OS) line endings.\\n\\nIf you still have the orig ECL file we can hex edit and see if there is anything "unusual".\\n\\nGordon.\", \"post_time\": \"2013-08-19 06:36:43\" },\n\t{ \"post_id\": 4463, \"topic_id\": 999, \"forum_id\": 8, \"post_subject\": \"Strange query name failure\", \"username\": \"DSC\", \"post_text\": \"I have this amazingly simple BWR file:\\n\\nIMPORT SBRI;\\n\\nd := SBRI.CreateReportData();\\n\\nOUTPUT(d);
\\nIt's saved as an ECL file. When I go to execute it under Thor, I receive the following runtime error:\\n\\nWarning: assert(expr->queryName()) failed - file: /var/lib/jenkins/workspace/CE-Candidate-4.0.0-1-with-plugins/CE/centos-6.4-x86_64/HPCC-Platform/ecl/hql/hqlexpr.cpp, line 7500\\nError: assert(expr->queryName()) failed - file: /var/lib/jenkins/workspace/CE-Candidate-4.0.0-1-with-plugins/CE/centos-6.4-x86_64/HPCC-Platform/ecl/hql/hqlexpr.cpp, line 7500 (1, 0 - .Report_BWR)
\\nIf I copy and paste this code into a new, unnamed/unsaved BWR window, it works perfectly.\\n\\nThis is with 4.0.0-1 and the IDE associated with that version.\\n\\nHelp?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2013-08-16 16:26:23\" },\n\t{ \"post_id\": 4497, \"topic_id\": 1004, \"forum_id\": 8, \"post_subject\": \"Re: Difference between ROW and RECORD\", \"username\": \"omnibuzz\", \"post_text\": \"Thank you, Richard. I never thought of using transform this way till I saw the implementation in the Standard Library a few days back \\n\\nStd.Date.CreateDate\\nStd.Date.DateFromRec\\n\\nAnyways, It was more to satisfy my curiosity than anything. Thank you for all the help.\\nRegards\\nSrini\", \"post_time\": \"2013-08-27 20:46:44\" },\n\t{ \"post_id\": 4492, \"topic_id\": 1004, \"forum_id\": 8, \"post_subject\": \"Re: Difference between ROW and RECORD\", \"username\": \"rtaylor\", \"post_text\": \"Srini,\\n\\nMy best guess is that it comes down to your non-standard use of TRANSFORM in your example code. \\n\\nThe TRANSFORM structure was designed to work in conjunction with the operation that calls it. The ECL operations that call TRANSFORMs are: PROJECT, ITERATE, JOIN, ROLLUP, NORMALIZE, DENORMALIZE, PROCESS, AGGREGATE, PARSE, FETCH (have I forgotten any?). You may recall from class that I always state: "a TRANSFORM function never exists alone -- it is always inextricably linked to the operation that uses it." \\n\\nSo changing your Fn2 code to this:
r1 Fn2(aa,ba) := FUNCTION\\n r1 Ifn2(r1 L) := TRANSFORM\\n SELF.a := aa;\\n SELF.b := ba;\\n END;\\n RETURN PROJECT(DATASET([],r1),Ifn2(LEFT));\\nEND;
Makes all of your additional cases work except this one:DATASET(Recs,r1); // Does not work
And that version is simply redundant, since Recs is already a recordset in the r1 format and doesn't need to be converted to a DATASET to be used as such.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-08-27 14:32:31\" },\n\t{ \"post_id\": 4491, \"topic_id\": 1004, \"forum_id\": 8, \"post_subject\": \"Re: Difference between ROW and RECORD\", \"username\": \"omnibuzz\", \"post_text\": \"Thank you for the explanation, Richard. But, I still don't feel I am getting it \\nIf Fn2 is returning a record set, I believe the following operations should be possible. None of the below works. \\n\\n
\\nRecs := Fn2(1,2);\\n\\n// All the below operations will work if Recs is a recordset with 1 record\\nRecs[1..1]; // Does not work\\nDATASET(Recs,r1); // Does not work\\nOUTPUT(Recs); // Does not work\\na := Recs[1]; // Does not work\\nDataset(a); // Does not work\\n
\\n\\n-Srini\", \"post_time\": \"2013-08-27 01:16:08\" },\n\t{ \"post_id\": 4490, \"topic_id\": 1004, \"forum_id\": 8, \"post_subject\": \"Re: Difference between ROW and RECORD\", \"username\": \"rtaylor\", \"post_text\": \"Srini,In RDBMS world, we interchangeably use ROW and RECORD. I guess it's different in HPCC.\\nCan someone help me understand what is the difference between the two functions.
The keyword RECORD in ECL code is always referring only to a field layout definition -- the RECORD structure (which is not a function). I think you're confusing it here with the term "recordset" that describes a set of records from a dataset.\\n\\nThe ROW function specifically returns a single record, whose layout is defined by the RECORD structure referenced in its second parameter. The ROW function can only return a single record and not a recordset.\\nFrom the results, I can see that it's not a bug and it's a conscious decision to keep it different. But, why? [ I am inferring that Fn1 returns a ROW and Fn2 returns a RECORD
Not quite. Your Fn1 function is returning a ROW (a single record), but your Fn2 function is returning a recordset, not a RECORD (structure).\\n\\nThere are contexts in ECL code where a single record (as in the return result from the ROW function) is the only thing appropriate for that context. A TRANSFORM function always produces a recordset, which is why your non-standard use of a TRANSFORM function in your Fn2 function makes it return a 1-record recordset, not a single record (as the ROW function does).\\n\\nSo for your first two examples, you're using the inline form of DATASET:DATASET([fn1(1,2)],r1); // works -- 1-rec \\nDATASET([fn2(1,2)],r1); // does not work -- recordset
\\nThe first one works because your first parameter is a set ([]) of the return results from Fn1, which is a single record, so you have a single record and the inline form is "happy" with that.\\n\\nThe second one does not work because your first parameter is a set ([]) of the return results from Fn2, which is a recordset, so you have a set of recordsets and the inline form is NOT "happy" with that.\\n\\nSo for your next two examples, you're still using the inline form of DATASET and you're simply omitting the second parameter:DATASET([fn2(1,2)]) ; // works -- recordset\\nDATASET([fn1(1,2)]); // does not works -- 1-rec
Note that, when omitting the second parameter for this form, the docs say that second parameter is: "Omittable only if the recordset parameter is just one record or a list of in-line transform functions."\\n\\nThe first one does NOT work because your set ([]) contains the return results from Fn2, which is a recordset, so you have a set of recordsets (not a single record) and this form is NOT "happy" with that -- it wants only a single record.\\n\\nThe second one works because your set ([]) is the return results from Fn1, which is a single record, so you have a single record in the set and the form is "happy" with that.\\n\\nFor your last two examples, you're still using the inline form of DATASET and omitting the second parameter:DATASET(fn1(1,2)); // works\\t\\t -- 1 rec\\nDATASET(fn2(1,2)); // does not work -- recordset
The first one works because the return results from Fn1 is a single record, so the form is "happy" with that.\\n\\nThe second one does NOT work because your return results from Fn2 is a 1-record recordset (not just a single record), and the form is NOT "happy" with that -- it wants only a single record.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-08-26 20:09:39\" },\n\t{ \"post_id\": 4489, \"topic_id\": 1004, \"forum_id\": 8, \"post_subject\": \"Difference between ROW and RECORD\", \"username\": \"omnibuzz\", \"post_text\": \"In RDBMS world, we interchangeably use ROW and RECORD. I guess it's different in HPCC.\\nCan someone help me understand what is the difference between the two functions. From the results, I can see that it's not a bug and it's a conscious decision to keep it different. But, why? [ I am inferring that Fn1 returns a ROW and Fn2 returns a RECORD]\\n\\n\\nr1 := RECORD\\n\\tinteger a;\\n\\tinteger b;\\nEND;\\n\\nr1 Fn1(a,b) := FUNCTION\\n\\tRETURN ROW({a,b},r1);\\nEND;\\n\\nr1 Fn2(a,b)\\t:= FUNCTION\\n\\tr1 Ifn2 := \\tTRANSFORM\\n\\t\\t\\tSELF.a \\t:= a;\\n\\t\\t\\tSELF.b := b;\\n\\t\\tEND;\\n\\tRETURN ifn2;\\nEND;\\n\\n\\nDATASET([fn1(1,2)],r1); // works\\nDATASET([fn2(1,2)],r1); // does not work\\n\\nDATASET([fn2(1,2)]); // works\\nDATASET([fn1(1,2)]); // does not works\\n\\nDATASET(fn1(1,2)); // works\\nDATASET(fn2(1,2)); // does not work\\n
\", \"post_time\": \"2013-08-25 03:08:42\" },\n\t{ \"post_id\": 4519, \"topic_id\": 1006, \"forum_id\": 8, \"post_subject\": \"Re: xml dataset ignoring record?\", \"username\": \"lblau\", \"post_text\": \"HPCCSystemsVM-4.0.0-9.ova\", \"post_time\": \"2013-09-02 13:49:43\" },\n\t{ \"post_id\": 4518, \"topic_id\": 1006, \"forum_id\": 8, \"post_subject\": \"Re: xml dataset ignoring record?\", \"username\": \"gsmith\", \"post_text\": \"I just got around to testing this and it worked as expected (this is on a 4.0.2-rc1 build).\\n\\nWhat version where you seeing the issue on?\", \"post_time\": \"2013-09-02 12:28:18\" },\n\t{ \"post_id\": 4498, \"topic_id\": 1006, \"forum_id\": 8, \"post_subject\": \"xml dataset ignoring record?\", \"username\": \"lblau\", \"post_text\": \"I just downloaded the latest vm and tried uploading, spraying and reading both a csv file and an xml file.\\nEither I've forgotten everything I ever knew about ECL and xml, or something is broken.\\nThe structure of my xml file is simple:\\n <kbpentlink>\\n <query id="EL_ENG_00001">\\n <name>Lucy</name>\\n <docid>eng-WL-110-174612-12992627</docid>\\n <beg>799</beg>\\n <end>802</end>\\n </query>\\n <query id="EL_ENG_00002">\\n <name>GID</name>\\n <docid>AFP_ENG_20080408.0495.LDC2009T13</docid>\\n <beg>831</beg>\\n <end>833</end>\\n </query>\\n...\\n</kbpentlink>\\nlayout_linking_queries := record\\n String id {xpath('@id')};\\n String mention {xpath('name')};\\n String docid {xpath('docid')};\\n integer m_start {xpath('beg')};\\n integer m_end {xpath('end')};\\nend;\\n DATASET('~.::queries',Layout_linking_queries,xml('/kbpentlink/query')); \\n\\nI've tried this both on the thor and the hthor. (I am using the ecl playground through ecl watch).\\nThe results tab on the playground looks as if the compiler completely ignored my record structure, it shows 5 columns:\\n## name docid beg end\\n\\nIf I use the workunit browser and select the workunit and use the 'show' link next to the result I see the same thing. However, if I open the results piece, and click on the number of rows, I see 6 columns\\n##(not labeled) \\t@id\\tname\\tdocid\\tbeg\\tend\\n\\n\\nwhile that is capturing all the data in the xml, it does not match the record I defined. Am I suffering a bad memory, or has something changed radically in how ecl handles XML?\\n\\nInterestingly, I can join using the field names defined in my record structure.\\n\\nThanks,\\nlauren\", \"post_time\": \"2013-08-28 19:09:36\" },\n\t{ \"post_id\": 4499, \"topic_id\": 1007, \"forum_id\": 8, \"post_subject\": \"DICTIONARY to/from embedded C++ function\", \"username\": \"DSC\", \"post_text\": \"Is it possible to pass a DICTIONARY to an embedded C++ function? Can an embedded C++ function return a DICTIONARY? If so, can someone provide an example of each?\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2013-08-28 21:58:58\" },\n\t{ \"post_id\": 4515, \"topic_id\": 1010, \"forum_id\": 8, \"post_subject\": \"Re: Determining capabilities within embedded C++ functions\", \"username\": \"ghalliday\", \"post_text\": \"There isn't currently any way of doing that.\\n\\nAs a work around you might be able to base the decision on the type/version of the compiler.\\n\\nWe could add something that provides the functionality - add a Jira issue, and we it can be discussed there.\", \"post_time\": \"2013-08-30 14:30:18\" },\n\t{ \"post_id\": 4510, \"topic_id\": 1010, \"forum_id\": 8, \"post_subject\": \"Re: Determining capabilities within embedded C++ functions\", \"username\": \"DSC\", \"post_text\": \"I don't think #OPTION provides the capabilities that I'm looking for, but I could be wrong. If I am, I hope someone corrects me.\\n\\nIn a regular C/C++ application, you typically #include a header file that was generated by autoconf/automake (or a similar tool). That header file contains the #define statements that act as flags, telling you whether a particular feature is present or not. The code you write can then test those flags and determine what to compile and what to ignore (e.g. whether to use localtime_r() or just localtime()).\\n\\nIdeally, what would be nice would be an HPCC-provided header file that could be included in the embedded C++ function that would provide those #defines. Granted, the header would have to be maintained as more features and functionality are uncovered, but at least it provides a single place to find that information.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-08-29 14:37:30\" },\n\t{ \"post_id\": 4508, \"topic_id\": 1010, \"forum_id\": 8, \"post_subject\": \"Re: Determining capabilities within embedded C++ functions\", \"username\": \"bforeman\", \"post_text\": \"Hi Dan,\\n\\nI'm sure Gavin will want to add here, but isn't #OPTION the best approach here?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-08-29 12:49:43\" },\n\t{ \"post_id\": 4504, \"topic_id\": 1010, \"forum_id\": 8, \"post_subject\": \"Determining capabilities within embedded C++ functions\", \"username\": \"DSC\", \"post_text\": \"In a typical C/C++ application dev environment you have the opportunity to query the build system for capabilities. Using autoconf, for instance, you can test for the presence of certain system calls. The result of a particular test is an #ifdef that you can use in your source to code to determine what system calls are available at compile time. One specific example would be testing for localtime_r(): localtime() is standard and all Linux systems support it, but localtime_r() is not supported everywhere.\\n\\nIs it possible to query the compiler environment in this manner from within embedded C++ functions?\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2013-08-29 10:39:59\" },\n\t{ \"post_id\": 4511, \"topic_id\": 1012, \"forum_id\": 8, \"post_subject\": \"STD.File.CompareFiles\", \"username\": \"abhisr\", \"post_text\": \"Hi,\\n\\nI am using the function STD.File.CompareFiles( file1, file2 );
. Can any one help me to explain how the comparison is working in the background ? Is there a hash comparison happening ?\\n\\n\\nRegards\\nabhi.\", \"post_time\": \"2013-08-29 19:18:07\" },\n\t{ \"post_id\": 4530, \"topic_id\": 1016, \"forum_id\": 8, \"post_subject\": \"Re: INTERNAL: Dataset is not active: 'left'\", \"username\": \"abhisr\", \"post_text\": \"https://track.hpccsystems.com/browse/EPE-53\", \"post_time\": \"2013-09-04 14:02:24\" },\n\t{ \"post_id\": 4526, \"topic_id\": 1016, \"forum_id\": 8, \"post_subject\": \"Re: INTERNAL: Dataset is not active: 'left'\", \"username\": \"bforeman\", \"post_text\": \"Can you please log this in the Community Issue Tracker?\\nhttps://track.hpccsystems.com/secure/Dashboard.jspa\\n\\nYour code looks OK to me, we might have a bug in this release.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-09-04 13:44:37\" },\n\t{ \"post_id\": 4522, \"topic_id\": 1016, \"forum_id\": 8, \"post_subject\": \"INTERNAL: Dataset is not active: 'left'\", \"username\": \"abhisr\", \"post_text\": \"Hi,\\n\\nI am using the function STD.File.CompareFiles , but when i run this code its throwing error \\n\\n[color=#800040:2gfkyn1l] CompareLogicalFiles: No active dali server connection available, \\nSo I wrapped the comapre code with NOTHOR \\n\\nFileCompRes := NOTHOR(STD.File.CompareFiles(file1,file2));
\\nNow the exception has changed to \\n[color=#800040:2gfkyn1l]Error: INTERNAL: Dataset is not active: 'left'\\nCan any one help me to solve this . My code goes here\\nf1 := '~thor::test::20130902::cricketscore';\\nf2 := '~thor::test::20130903::cricketscore';\\nf3 := '~thor::test::20130901::cricketscore';\\nf4 := '~thor::test::20130828::cricketscore';\\nf5 := '~thor::test::20130827::cricketscore';\\nf6 := '~thor::test::20130826::cricketscore';\\n\\nFileDS := DATASET([{f1,f2},{f3,f4},{f5,f6}],{STRING f1, STRING f2});\\nCompLay := RECORD\\n\\tSTRING F1;\\n\\tSTRING F2;\\n\\tINTEGER CompResult\\nEND;\\n\\n\\nCompLay FILE_TRANS(FileDS L) := TRANSFORM\\n\\tfile1 := L.f1;\\n\\tfile2 := L.f2;\\n\\t\\n\\tFileCompRes := STD.File.CompareFiles(file1,file2);\\n\\t//FileCompRes := NOTHOR(STD.File.CompareFiles(file1,file2));\\n\\n\\tSELF.CompResult := FileCompRes;\\n\\tSELF.F1 := file1;\\n\\tSELF.F2 := file2;\\n\\t\\nEND;\\nComapreResult := PROJECT(FileDS, FILE_TRANS(LEFT));\\nComapreResult;
\\n\\nVersion :community-4.0.0-2\\nServer :internal_4.0.0-9\\n\\n\\nRegards\\nAbhi\", \"post_time\": \"2013-09-03 18:22:07\" },\n\t{ \"post_id\": 4529, \"topic_id\": 1017, \"forum_id\": 8, \"post_subject\": \"Re: Casting of Boolean FALSE to STRING returns EMPTY\", \"username\": \"abhisr\", \"post_text\": \"Thanks Bob.\\n\\nJust now I checked the Casting Rules in Language Refrence ,in that \\n\\nCasting Rules\\n\\nFrom BOOLEAN\\tTo STRING\\tResults in\\n FALSE = '', TRUE = '1'.
\\nSo the code is working as per the rule but not as my expectation \", \"post_time\": \"2013-09-04 13:56:49\" },\n\t{ \"post_id\": 4524, \"topic_id\": 1017, \"forum_id\": 8, \"post_subject\": \"Re: Casting of Boolean FALSE to STRING returns EMPTY\", \"username\": \"bforeman\", \"post_text\": \"The behavior is in accordance with the casting rules. From the Language Reference:\\n
BOOLEAN STRING FALSE = '', TRUE = '1'
\\n\\nA workaround is to cast the BOOLEAN to an INTEGER and then to a STRING:\\n\\nBOOLEAN isTRUE := TRUE;\\nBOOLEAN isFalse := FALSE;\\nSTRING castTRUE := (STRING)isTRUE;\\nINTEGER num := (INTEGER)isFalse;\\nSTRING castFALSE := (STRING)num;\\n\\n\\nOUTPUT(castTRUE,NAMED('CastTrue'));\\nOUTPUT(castFALSE,NAMED('CastFalse'));
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-09-04 13:28:17\" },\n\t{ \"post_id\": 4523, \"topic_id\": 1017, \"forum_id\": 8, \"post_subject\": \"Casting of Boolean FALSE to STRING returns EMPTY\", \"username\": \"abhisr\", \"post_text\": \"Hi,\\n\\nWhen I tried to cast a Boolean FALSE to STRING it returns EMPTY, instead of Zero. But when I cast TRUE to STRING it returns One.\\n\\nIdeally FALSE should return Zero ?\\nMy code goes here\\n\\nBOOLEAN isTRUE := TRUE;\\nBOOLEAN isFalse := FALSE;\\nSTRING castTRUE := (STRING)isTRUE;\\nSTRING castFALSE := (STRING)isFalse;\\n\\nOUTPUT(castTRUE,NAMED('CastTrue'));\\nOUTPUT(castFALSE,NAMED('CastFalse'));\\n\\n
\", \"post_time\": \"2013-09-04 13:11:55\" },\n\t{ \"post_id\": 4565, \"topic_id\": 1026, \"forum_id\": 8, \"post_subject\": \"Re: Weird Problem with DECIMAL after 90's\", \"username\": \"abhisr\", \"post_text\": \"Thanks Dan for pointing the mistake.\\nI mistook the first number after DECIMAL as the size of Whole number part. \\n\\nRegards\\nAbhi\", \"post_time\": \"2013-09-11 20:03:59\" },\n\t{ \"post_id\": 4564, \"topic_id\": 1026, \"forum_id\": 8, \"post_subject\": \"Re: Weird Problem with DECIMAL after 90's\", \"username\": \"DSC\", \"post_text\": \"I think DECIMAL2_2 is incorrect here. From the language reference manual:\\n\\n[UNSIGNED] DECIMALn[ _y ]\\n\\nA packed decimal value of n total digits (to a maximum of 32). If the _y value is present, the y defines the number of decimal places in the value.
\\n\\nIf you change your example code to DECIMAL4_2 -- four is the total number of digits in your data -- then the output looks as expected.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-09-11 19:21:34\" },\n\t{ \"post_id\": 4557, \"topic_id\": 1026, \"forum_id\": 8, \"post_subject\": \"Weird Problem with DECIMAL after 90's\", \"username\": \"abhisr\", \"post_text\": \"Hi ,\\n\\nI ran this simple code \\n\\nEXPORT N_LAYOUT := RECORD\\n\\tINTEGER Ranks;\\n\\tSTRING Name;\\n\\tSTRING SurName;\\n\\tDECIMAL2_2 Score;\\n\\tSTRING year;\\n\\t\\nEND;\\nClass2_DS := DATASET([{2,'Base','B',89.00,'2000'},{3,'Case','C',85.00,'2000'},{1,'Dace','DZ',92.00,'2000'},{4,'Dacer','Z',72.00,'2000'},{5,'Dace*','Z',67.00,'2000'}],N_LAYOUT);\\nOUTPUT(Class2_DS, NAMED('Class1_DS'));
\\nAnd when I check the result my score column is filled with 0.
\\nOnce you change the DECIMAL2_2 Score; to DECIMAL Score;
and run this code again you can see the score column 92.00 is shown as 91.99999999999999 and all the rest of the scores are populated correctly.\\n\\nSo I think there is some problem with Decimal . I am raising a jira ticket \\n\\nhttps://track.hpccsystems.com/browse/HPCC-9973\\n\\nVersion : community_4.0.0-2\\nServer : internal_4.0.0-9\\n\\n\\nRegards\\nAbhi\", \"post_time\": \"2013-09-11 14:59:24\" },\n\t{ \"post_id\": 4594, \"topic_id\": 1027, \"forum_id\": 8, \"post_subject\": \"Re: A question about Distribute/Sort\", \"username\": \"Leofei\", \"post_text\": \"Thanks, Dustin! Good to know this!\", \"post_time\": \"2013-09-17 15:33:49\" },\n\t{ \"post_id\": 4581, \"topic_id\": 1027, \"forum_id\": 8, \"post_subject\": \"Re: A question about Distribute/Sort\", \"username\": \"dustinskaggs\", \"post_text\": \"By default, SORT doesn't change the order of duplicate records so you should be able to rely on records with the same ID being in the same order after a sort. This can save you from needing the PROJECT with a COUNTER. A global SORT followed by a global DEDUP with the RIGHT option to keep the last record for each id should accomplish what you want:\\n\\ndsSort := SORT(ds, id);\\ndsDedup := DEDUP(dsSort, id, RIGHT);
\\n\\nDISTRIBUTE doesn't currently make any guarantees about the ordering of the records so you'd need to use the COUNT PROJECT as in Dan's example if you want to DISTRIBUTE. From a performance standpoint, Dan's example is better because it avoids doing a global SORT, which is an expensive operation. The only change I would make to it is to use DEDUP(..., RIGHT) instead of a ROLLUP.\\n\\n-Dustin\", \"post_time\": \"2013-09-16 20:46:50\" },\n\t{ \"post_id\": 4563, \"topic_id\": 1027, \"forum_id\": 8, \"post_subject\": \"Re: A question about Distribute/Sort\", \"username\": \"Leofei\", \"post_text\": \"Adding an index at the beginning can solve all issues. Thx a lot! \", \"post_time\": \"2013-09-11 18:04:41\" },\n\t{ \"post_id\": 4562, \"topic_id\": 1027, \"forum_id\": 8, \"post_subject\": \"Re: A question about Distribute/Sort\", \"username\": \"DSC\", \"post_text\": \"I just realized that I didn't answer your question about DISTRIBUTE. You could certainly use that, along with LOCAL, if you're dealing with a truly large number of records:\\n\\n
// Create sample data\\nDRec := RECORD\\n STRING theID;\\n STRING theValue;\\n UNSIGNED2 i := 0;\\nEND;\\n\\nd1 := DATASET\\n (\\n [\\n {'Id1','data'},\\n {'Id2','data'},\\n {'Id1','data2'},\\n {'Id2','data2'},\\n {'Id2','data3'},\\n {'Id1','data3'},\\n {'Id2','data4'}\\n ],\\n DRec\\n );\\n\\n// Add an index value\\nd2 := PROJECT\\n (\\n d1,\\n TRANSFORM\\n (\\n DRec,\\n SELF.i := COUNTER,\\n SELF := LEFT\\n )\\n );\\n\\n// Distribute the records\\nd3 := DISTRIBUTE(d2,HASH32(theID));\\n\\n// Roll up records on theID value\\nDRec KeepLast(DRec l, DRec r) := TRANSFORM\\n SELF := r\\nEND;\\n\\nd4 := ROLLUP\\n (\\n SORT(d3,theID,i,LOCAL),\\n LEFT.theID = RIGHT.theID,\\n KeepLast(LEFT,RIGHT),\\n LOCAL\\n );\\n\\n// Output only the fields we're interested in\\nOUTPUT(d4,{theID,theValue});
\\nHere, DISTRIBUTE puts all of the matching IDs on one node. You can then both SORT and ROLLUP locally, because you know that all the right records are already co-located.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-09-11 18:01:16\" },\n\t{ \"post_id\": 4561, \"topic_id\": 1027, \"forum_id\": 8, \"post_subject\": \"Re: A question about Distribute/Sort\", \"username\": \"Leofei\", \"post_text\": \"I see your point! It's a great idea. Thanks.\", \"post_time\": \"2013-09-11 17:51:22\" },\n\t{ \"post_id\": 4559, \"topic_id\": 1027, \"forum_id\": 8, \"post_subject\": \"Re: A question about Distribute/Sort\", \"username\": \"DSC\", \"post_text\": \"You can use ROLLUP to collapse records on one value.\\n\\nYour example doesn't include a field that would determine which record is "later" than another. I added a simple numeric field to help in this example:\\n\\n// Create sample data\\nDRec := RECORD\\n STRING theID;\\n STRING theValue;\\n UNSIGNED2 i := 0;\\nEND;\\n\\nd1 := DATASET\\n (\\n [\\n {'Id1','data'},\\n {'Id2','data'},\\n {'Id1','data2'},\\n {'Id2','data2'},\\n {'Id2','data3'},\\n {'Id1','data3'},\\n {'Id2','data4'}\\n ],\\n DRec\\n );\\n\\n// Add an index value\\nd2 := PROJECT\\n (\\n d1,\\n TRANSFORM\\n (\\n DRec,\\n SELF.i := COUNTER,\\n SELF := LEFT\\n )\\n );\\n\\n// Roll up records on theID value\\nDRec KeepLast(DRec l, DRec r) := TRANSFORM\\n SELF := r\\nEND;\\n\\nd3 := ROLLUP\\n (\\n SORT(d2,theID,i),\\n LEFT.theID = RIGHT.theID,\\n KeepLast(LEFT,RIGHT)\\n );\\n\\n// Output only the fields we're interested in\\nOUTPUT(d3,{theID,theValue});
\\nHope this helps.\\n\\nDan\", \"post_time\": \"2013-09-11 17:31:21\" },\n\t{ \"post_id\": 4558, \"topic_id\": 1027, \"forum_id\": 8, \"post_subject\": \"A question about Distribute/Sort\", \"username\": \"Leofei\", \"post_text\": \"Hi, here is the problem I meet:\\n\\nA dataset has duplicate ID. We need to keep the last unique ID record in the raw data. But we don't have any criteria to sort the data except the "ID" field. For one specific unique ID, will the function "sort" change the order of records? Plus, may I use "distribute"? Any suggestions?\\n\\nExample:\\nId1 data\\nId2 data\\nId1 data2\\nId2 data2\\nId2 data3\\nId1 data3\\nId2 data4\\n\\nThese records want to be kept:\\nId1 data3\\nId2 data4\", \"post_time\": \"2013-09-11 15:17:03\" },\n\t{ \"post_id\": 4815, \"topic_id\": 1031, \"forum_id\": 8, \"post_subject\": \"Re: Question about verifying a pattern\", \"username\": \"bforeman\", \"post_text\": \"Hi Allan,\\n\\nThanks for the link - good to know!\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-10-23 13:50:31\" },\n\t{ \"post_id\": 4791, \"topic_id\": 1031, \"forum_id\": 8, \"post_subject\": \"Re: Question about verifying a pattern\", \"username\": \"Allan\", \"post_text\": \"I've found REGEXFIND very powerful and use it in preference to library functions such as STD.File.contains()\\n\\nJust learning regular expressions is very useful. I use 'regexbuddy' www.regexbuddy.com to play around with regular expressions. If you use 'regexbuddy' remember to select 'Perl'.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2013-10-21 13:15:51\" },\n\t{ \"post_id\": 4578, \"topic_id\": 1031, \"forum_id\": 8, \"post_subject\": \"Re: Question about verifying a pattern\", \"username\": \"Leofei\", \"post_text\": \"It's great to learn this. Thank you for all your help!\", \"post_time\": \"2013-09-16 13:11:39\" },\n\t{ \"post_id\": 4577, \"topic_id\": 1031, \"forum_id\": 8, \"post_subject\": \"Re: Question about verifying a pattern\", \"username\": \"DSC\", \"post_text\": \"A somewhat shorter version that satisfies your original requirements:\\n\\nIsValidDate(STRING s) := REGEXFIND('^\\\\\\\\d\\\\\\\\d-\\\\\\\\w\\\\\\\\w\\\\\\\\w-\\\\\\\\d\\\\\\\\d\\\\\\\\d\\\\\\\\d$',s,NOCASE);\\n\\nOUTPUT(IsValidDate('10-JAN-1999'));\\nOUTPUT(IsValidDate('AA-ABC-2000'));
\\nNote, however, that PARSE is really the way to go if you need to do anything more fancy (such as validate the name of the month, make sure the month is uppercase, etc.). If all you need is to make sure the pattern is correct, then REGEXFIND should work. Note the double-escaping of the regex pattern flags.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-09-16 12:54:04\" },\n\t{ \"post_id\": 4575, \"topic_id\": 1031, \"forum_id\": 8, \"post_subject\": \"Re: Question about verifying a pattern\", \"username\": \"rtaylor\", \"post_text\": \"Leofei,\\n\\nYou could use PARSE technology, but here's a simple generic function the does this specific validation:BOOLEAN IsDateValid(STRING11 d) := FUNCTION\\n IsDash(STRING1 d) := d = '-';\\n IsNum(STRING1 n) := n IN [1,2,3,4,5,6,7,8,9,0];\\n IsChar(STRING1 c) := c IN ['a','b','c','d','e','f','g','h','i','j',\\n 'k','l','m','n','o','p','q','r','s','t',\\n 'u','v','w','x','y','z',\\n 'A','B','C','D','E','F','G','H','I','J',\\n 'K','L','M','N','O','P','Q','R','S','T',\\n 'U','V','W','X','Y','Z'];\\n RETURN IsNum(d[1]) AND IsNum(d[2]) AND \\n IsDash(d[3]) AND\\n IsChar(d[4]) AND IsChar(d[5]) AND IsChar(d[6]) AND \\n IsDash(d[7]) AND\\n IsNum(d[8]) AND IsNum(d[9]) AND IsNum(d[10]) AND IsNum(d[11]);\\nEND;
Or you could make it a lot more specific, like this:BOOLEAN IsDateValid(STRING11 d) := FUNCTION\\n IsDash(STRING1 d) := d = '-';\\n IsDD(STRING2 n) := (INTEGER)n BETWEEN 1 AND 31;\\n IsMM(STRING3 m) := m IN ['JAN','FEB','MAR','APR','MAY','JUN',\\n 'JUL','AUG','SEP','OCT','NOV','DEC'];\\n IsYYYY(STRING4 y) := (INTEGER)y BETWEEN 1980 AND 2050; //whatever valid range is\\n RETURN IsDD(d[1..2]) AND \\n IsDash(d[3]) AND\\n IsMM(d[4..6]) AND \\n IsDash(d[7]) AND\\n IsYYYY(d[8..11]);\\nEND;
\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-09-15 16:44:10\" },\n\t{ \"post_id\": 4573, \"topic_id\": 1031, \"forum_id\": 8, \"post_subject\": \"Question about verifying a pattern\", \"username\": \"Leofei\", \"post_text\": \"Hi, I have a question here:\\n\\nHow to verify a string following this "DD-MMM-YYYY" format? DD and YYYY must be numbers, MMM must be letters, the sign between them must be "-".\\n\\nFor example: \\n10-JAN-1999, return TRUE value;\\nAA-ABC-2000, return FALSE value;\\n\\nDoes anyone have suggestions?\", \"post_time\": \"2013-09-13 19:25:42\" },\n\t{ \"post_id\": 4882, \"topic_id\": 1041, \"forum_id\": 8, \"post_subject\": \"Re: SOAPCALL Testing\", \"username\": \"DSC\", \"post_text\": \"Ah! I see it now. Thanks!\\n\\nNow I'm wondering why my PDF reader couldn't find "IFF". But that's not appropriate for this forum.\\n\\nThanks, guys!\\n\\nDan\", \"post_time\": \"2013-11-01 17:42:30\" },\n\t{ \"post_id\": 4881, \"topic_id\": 1041, \"forum_id\": 8, \"post_subject\": \"Re: SOAPCALL Testing\", \"username\": \"rtaylor\", \"post_text\": \"BTW, although IFF does not have it's own page in the docs (therefore does not appear in the table of contents) it is indexed separately, so looking up IFF or IFF function in the index would have found it.\", \"post_time\": \"2013-11-01 17:39:50\" },\n\t{ \"post_id\": 4880, \"topic_id\": 1041, \"forum_id\": 8, \"post_subject\": \"Re: SOAPCALL Testing\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nIFF is "if and only if" and is documented along with IF here: http://hpccsystems.com/download/docs/ecl-language-reference/html/IF.html\\n\\nThat's where it says:The IFF function performs the same functionality as IF, but ensures that an expression containing complex boolean logic is evaluated exactly as it appears.
\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-11-01 17:36:19\" },\n\t{ \"post_id\": 4879, \"topic_id\": 1041, \"forum_id\": 8, \"post_subject\": \"Re: SOAPCALL Testing\", \"username\": \"DSC\", \"post_text\": \"What version is IFF() compatible with? My 4.0.2-2 version of LRM does not have IFF() documented.\", \"post_time\": \"2013-11-01 17:35:30\" },\n\t{ \"post_id\": 4878, \"topic_id\": 1041, \"forum_id\": 8, \"post_subject\": \"Re: SOAPCALL Testing\", \"username\": \"greg.whitaker\", \"post_text\": \"Just another form of IF.\\nFrom LRM:\\n"The IFF function performs the same functionality as IF, but ensures that an expression containing complex boolean logic is evaluated exactly as it appears."\", \"post_time\": \"2013-11-01 17:33:45\" },\n\t{ \"post_id\": 4877, \"topic_id\": 1041, \"forum_id\": 8, \"post_subject\": \"Re: SOAPCALL Testing\", \"username\": \"DSC\", \"post_text\": \"Greg, you have this line in your example:\\n\\nresult2 := IFF (getA, callA, callB);
\\nWhat is IFF()?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2013-11-01 17:30:09\" },\n\t{ \"post_id\": 4875, \"topic_id\": 1041, \"forum_id\": 8, \"post_subject\": \"Re: SOAPCALL Testing\", \"username\": \"greg.whitaker\", \"post_text\": \"IMPORTANT NOTE about conditionally making SOAPCALL()s:\\n\\nWhen you need to conditionaly call a SOAPCALL() you will need to create a request that will fail because the SOAPCALLs will execute no matter what.\\nIn the 3 examples below both SOAPCALLs are executed:\\ngetA := TRUE;\\ncallA := SOAPCALL(requestA, gatewayurlA,,,,,);\\ncallB := SOAPCALL(requestB, gatewayurlB,,,,,);\\nresult1 := IF (getA, callA, callB);\\nresult2 := IFF (getA, callA, callB);\\nresult3 := MAP(getA => callA, callB);\\n\\nWorkaround: 1 method is to conditionally make the request invalid like clearing the gatewayURL value (second parameter to the SOAPCALL).\\nExample:\\ngetA := TRUE;\\ngatewayurlB_fix := if(getA,'', gatewayurlB);\\ncallA := SOAPCALL(requestA, gatewayurlA,,,,,);\\ncallB := SOAPCALL(requestB, gatewayurlB_fix,,,,,);\\nresult1 := IF (getA, callA, callB);\\nresult2 := IFF (getA, callA, callB);\\nresult3 := MAP(getA => callA, callB);\", \"post_time\": \"2013-10-31 13:59:18\" },\n\t{ \"post_id\": 4843, \"topic_id\": 1041, \"forum_id\": 8, \"post_subject\": \"Re: SOAPCALL Testing\", \"username\": \"greg.whitaker@lexisnexis.com\", \"post_text\": \"How to view ROXIE LOG file (NOTE:this is for a 1 node roxie):\\n1)Use ECL Watch page.\\n2)Look for Topology on the left hand side and click on CLUSTERS.\\n3)locate the roxie cluster you ran the query on.\\n4)click on the Name link provided\\n5)on a one node roxie only one name will appear here, click on the gray image of a harddrive.\\n6)a list of log files will appear, highlight the dated file you want and click on the SELECT button at the bottom of page.\\n7)click the radio button for the type of filtering you want to use.\\nExample to just see the last hour click on "or last:" and enter "1" for hours.\", \"post_time\": \"2013-10-29 13:46:39\" },\n\t{ \"post_id\": 4842, \"topic_id\": 1041, \"forum_id\": 8, \"post_subject\": \"Re: SOAPCALL Testing\", \"username\": \"greg.whitaker@lexisnexis.com\", \"post_text\": \"Yes, add LOG as a parameter to your SOAPCALL. This will dump the soap info into the Roxie LOG file. \\nExample: \\nSoapResult := SOAPCALL(qry,gateway_url,'Sql',{qry}, xform(left),DATASET(response_layout) ,xpath('Response'),onFail(FailSoapY(left)),RETRY(0),TIMEOUT(60),LOG);\", \"post_time\": \"2013-10-29 13:35:54\" },\n\t{ \"post_id\": 4621, \"topic_id\": 1041, \"forum_id\": 8, \"post_subject\": \"SOAPCALL Testing\", \"username\": \"jacob\", \"post_text\": \"I'm trying to test a job that is submitting a SOAP request via the SOAPCALL function. I'd like to be able to view my full SOAP request XML for testing. Is there a way that I could output the SOAP envelope for testing/debug purposes?\", \"post_time\": \"2013-09-20 17:55:31\" },\n\t{ \"post_id\": 4719, \"topic_id\": 1063, \"forum_id\": 8, \"post_subject\": \"Re: Super File question.\", \"username\": \"Leofei\", \"post_text\": \"Thank you, Richard! I will do that.\", \"post_time\": \"2013-10-01 14:54:46\" },\n\t{ \"post_id\": 4718, \"topic_id\": 1063, \"forum_id\": 8, \"post_subject\": \"Re: Super File question.\", \"username\": \"rtaylor\", \"post_text\": \"Leofei,\\n\\nJIRA is the bug reporting system we use. All HPCC source code is stored in the GitHub (https://github.com ) HPCC-Platform repository and all bug reporting is done through JIRA (https://track.hpccsystems.com) -- the two operate very well together.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-10-01 14:39:13\" },\n\t{ \"post_id\": 4717, \"topic_id\": 1063, \"forum_id\": 8, \"post_subject\": \"Re: Super File question.\", \"username\": \"Leofei\", \"post_text\": \"Excuse me, JIRA? What's that? It shows this issue on my virtual machine(thor and hthor) and DEV cluster(thor).\", \"post_time\": \"2013-10-01 14:27:39\" },\n\t{ \"post_id\": 4716, \"topic_id\": 1063, \"forum_id\": 8, \"post_subject\": \"Re: Super File question.\", \"username\": \"rtaylor\", \"post_text\": \"Leofei,\\n\\nI duplicated your issue on my machine, but only when targeting hthor. When I target Thor it works perfectly and produces this result:aaa \\tthor::thor::data::name1\\nbbb \\tthor::thor::data::name1\\nccc \\tthor::thor::data::name1\\nddd \\tthor::thor::data::name2
\\nPlease raise an issue in JIRA for this.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-10-01 14:11:14\" },\n\t{ \"post_id\": 4715, \"topic_id\": 1063, \"forum_id\": 8, \"post_subject\": \"Super File question.\", \"username\": \"Leofei\", \"post_text\": \"Hi, a question about Super File. I need to output all records in one superfile, and add a new field to show the subfile logic name that record belongs to. I use the following sample code:\\n\\nIMPORT STD;\\nsf1 := 'thor::data::test::sf';\\npath1 := 'thor::data::name1';\\npath2 := 'thor::data::name2';\\n\\nnRecord := RECORD\\nSTRING20 name;\\nEND;\\nds1 := DATASET([ {'aaa'},\\n{'bbb'},\\n{'ccc'} ],\\nnRecord);\\nds2 := DATASET([{'ddd'}], nRecord);\\n\\nSEQUENTIAL(\\nOUTPUT(ds1,,path1, OVERWRITE),\\nOUTPUT(ds2,,path2, OVERWRITE),\\nSTD.File.CreateSuperFile(sf1),\\nSTD.File.StartSuperFileTransaction(),\\nSTD.File.AddSuperFile(sf1, path1),\\nSTD.File.AddSuperFile(sf1, path2),\\nSTD.File.FinishSuperFileTransaction());\\n\\nds3 := DATASET(sf1, {nRecord, string255 logicalFile{virtual(logicalfilename)}}, THOR);\\nOUTPUT(ds3);
\\n\\nSEQUENTIAL part creates the superfile and subfiles. ds3 is the result I want. I'm confused by the result in ds3. In the field named “logicalfile”, the value is “MORE!” instead of the really logicalfilename. What does this mean? How can I get the result I want? Really appreciate any suggestion.\", \"post_time\": \"2013-10-01 13:54:49\" },\n\t{ \"post_id\": 4728, \"topic_id\": 1064, \"forum_id\": 8, \"post_subject\": \"Re: Embedded C++ code returning fixed-size STRING\", \"username\": \"DSC\", \"post_text\": \"Perfect!\\n\\nI will open a Jira ticket requesting clarification in the language reference manual.\\n\\nThanks, Dustin!\\n\\nDan\", \"post_time\": \"2013-10-03 15:29:13\" },\n\t{ \"post_id\": 4727, \"topic_id\": 1064, \"forum_id\": 8, \"post_subject\": \"Re: Embedded C++ code returning fixed-size STRING\", \"username\": \"dustinskaggs\", \"post_text\": \"For fixed length return values, __result has already been allocated so you don't need to malloc space for it. If you remove the one line with your rtlMalloc, it works.\", \"post_time\": \"2013-10-03 15:22:49\" },\n\t{ \"post_id\": 4726, \"topic_id\": 1064, \"forum_id\": 8, \"post_subject\": \"Re: Embedded C++ code returning fixed-size STRING\", \"username\": \"DSC\", \"post_text\": \"That works, but feels a little kludgy. I'm hoping that I'm just doing something wrong. The code executes, just not correctly.\\n\\nThe language reference section referring to return values is here. The subsection beginning with "Return types are handled as C++ functions returning the same types with some exceptions" shows that fixed-length value types (STRING<nnn>, QSTRING<nnn>, UNICODE<nnn>, and DATA<nnn>) can all be returned. Presumably, the compiler notices the return type of the function and supplies the length of the data pointed to by __result to the caller, leaving me to set only that __result variable.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-10-03 14:26:05\" },\n\t{ \"post_id\": 4725, \"topic_id\": 1064, \"forum_id\": 8, \"post_subject\": \"Re: Embedded C++ code returning fixed-size STRING\", \"username\": \"gsmith\", \"post_text\": \"I suspect BEGINC++ can only return variable length strings, you could then wrap it in a function to encapsulate the STRINTG10 requirement:\\n\\n\\nSTRING10 BadTest() := FUNCTION \\n STRING cppString() := BEGINC++\\n int fooSize = 10;\\n char foo[fooSize];\\n\\t \\n memset(foo,32,sizeof(foo));\\n memcpy(foo,"DSC",3);\\n\\t \\n __result = reinterpret_cast<char*>(rtlMalloc(fooSize));\\n memcpy(__result,foo,fooSize);\\n ENDC++;\\n RETURN cppString();\\nEND;\\n
\", \"post_time\": \"2013-10-03 13:50:11\" },\n\t{ \"post_id\": 4724, \"topic_id\": 1064, \"forum_id\": 8, \"post_subject\": \"Embedded C++ code returning fixed-size STRING\", \"username\": \"DSC\", \"post_text\": \"I've run into a problem trying to write an embedded C++ function that returns a fixed-length string. The language reference manual indicates that only __result needs to be set in such cases, and the code compiles and runs, but only garbage is returned.\\n\\nHere is a contrived, poorly-written example demonstrating what I'm seeing:\\n\\nSTRING GoodTest() := BEGINC++\\n char foo[10];\\n \\n memset(foo,32,sizeof(foo));\\n memcpy(foo,"DSC",3);\\n \\n __lenResult = 10;\\n __result = reinterpret_cast<char*>(rtlMalloc(__lenResult));\\n memcpy(__result,foo,__lenResult);\\nENDC++;\\n\\nSTRING10 BadTest() := BEGINC++\\n int fooSize = 10;\\n char foo[fooSize];\\n \\n memset(foo,32,sizeof(foo));\\n memcpy(foo,"DSC",3);\\n \\n __result = reinterpret_cast<char*>(rtlMalloc(fooSize));\\n memcpy(__result,foo,fooSize);\\nENDC++;\\n\\nOUTPUT(GoodTest(),NAMED('Good'));\\nOUTPUT(BadTest(),NAMED('Bad'));
\\nHow should a STRING10 be built and returned by BadTest()?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2013-10-03 13:12:55\" },\n\t{ \"post_id\": 4740, \"topic_id\": 1065, \"forum_id\": 8, \"post_subject\": \"Re: Converting a set into a dataset.\", \"username\": \"Allan\", \"post_text\": \"Thanks Dan,\\n\\nI thought it would be something simple like that \\n\\nCheers\\n\\nAllan\", \"post_time\": \"2013-10-08 10:32:55\" },\n\t{ \"post_id\": 4739, \"topic_id\": 1065, \"forum_id\": 8, \"post_subject\": \"Re: Converting a set into a dataset.\", \"username\": \"DSC\", \"post_text\": \"Try something like this:\\n\\n
stringSet := ['a','b','c'];\\n\\nstringRS := DATASET(stringSet,{STRING s});\\n\\nOUTPUT(stringRS);
\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-10-08 10:16:46\" },\n\t{ \"post_id\": 4738, \"topic_id\": 1065, \"forum_id\": 8, \"post_subject\": \"Converting a set into a dataset.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI've been passed a set of strings as an input parameter, their easier to manipulate in the FUNCTION as a DATASET.\\n\\nWhats the easiest way to convert a set of something into a dataset of something?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2013-10-08 09:54:08\" },\n\t{ \"post_id\": 4759, \"topic_id\": 1071, \"forum_id\": 8, \"post_subject\": \"Re: Dataset sorting after merging Record using "+" operator\", \"username\": \"rtaylor\", \"post_text\": \"Rahul,\\n\\nThe neither the + nor the & append operators merge the two datasets to maintain their sorted order -- the MERGE function does that. Here's some example code that shows the differences:IMPORT STD;\\nRec := {INTEGER1 number,STRING1 Letter,UNSIGNED1 NodeID := 0};\\nSomeFile1 := DATASET([{1,'A'},{1,'B'},{1,'C'},{1,'D'},{1,'E'},\\n {1,'F'},{1,'G'},{1,'H'},{1,'I'},{1,'J'}],\\n rec);\\nSomeFile2 := DATASET([{2,'A'},{2,'B'},{2,'C'},{2,'D'},{2,'E'},\\n {2,'F'},{2,'G'},{2,'H'},{2,'I'},{2,'J'}],\\n rec);\\n\\t\\t\\t\\t\\t\\nD1 := DISTRIBUTE(Somefile1);\\t\\t//spread across nodes\\t\\t\\t\\nD2 := DISTRIBUTE(Somefile2);\\t\\t\\t\\t\\t\\nP1 := PROJECT(D1,\\n TRANSFORM(rec,\\n SELF.NodeID := STD.system.Thorlib.Node()+1,\\n SELF := LEFT),LOCAL);\\nP2 := PROJECT(D2,\\n TRANSFORM(rec,\\n SELF.NodeID := STD.system.Thorlib.Node()+1,\\n SELF := LEFT),LOCAL);\\nP1;P2; //show those results\\n\\nS1 := SORT(P1,letter,number);\\t\\t//global sort\\t\\t\\t\\nS2 := SORT(P2,letter,number);\\t\\t\\t\\t\\t\\nS1;S2; //show those results\\n\\nMerged := MERGE(S1,S2,SORTED(letter,number));\\n\\nOUTPUT(Merged,NAMED('Merged'));\\nOUTPUT(S1 + S2,NAMED('Append_Plus'));\\nOUTPUT(S1 & S2,NAMED('Append_Amp'));\\n
\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-10-11 20:28:05\" },\n\t{ \"post_id\": 4757, \"topic_id\": 1071, \"forum_id\": 8, \"post_subject\": \"Dataset sorting after merging Record using "+" operator\", \"username\": \"Rahul Jain\", \"post_text\": \"If I have 2 dataset sorted on specific column and then I combine them then will sorting order remain same or will get distorted.\\n\\nEx :- \\n\\nex_rec := RECORD\\nstring name;\\nstring st;\\nEND;\\n\\nin_rec := DATASET([{'Rahul','FL'},{'Tom','TX'},{'Jerry','DT'}],ex_rec);\\n\\nfltr_ds1 := in_rec(st = 'FL');\\nsrted_ds1 := SORT(fltr_ds1,name,st);\\n\\nfltr_ds2 := in_rec(st <> 'FL');\\nsrted_ds2 := SORT(fltr_ds2,st);\\n\\nfltrstrall := srted_ds1 + srted_ds2; // Will the sorted order remain as it is ?\\nget_frst_rec := CHOOSEN(fltrstrall,1); // If not I will get random first record?\\n//fltrstrall := srted_ds1 & srted_ds2; //Will this be a better option ?\\n\\n\\nOUTPUT(get_frst_rec,NAMED('fetch_first_record'));\\n\\n
\", \"post_time\": \"2013-10-11 17:50:05\" },\n\t{ \"post_id\": 4762, \"topic_id\": 1072, \"forum_id\": 8, \"post_subject\": \"Re: SIG: Segmentation fault(11), accessing 000032313335373\", \"username\": \"rtaylor\", \"post_text\": \"Then it's time to post this code as an issue in JIRA https://track.hpccsystems.com\", \"post_time\": \"2013-10-11 20:52:51\" },\n\t{ \"post_id\": 4761, \"topic_id\": 1072, \"forum_id\": 8, \"post_subject\": \"Re: SIG: Segmentation fault(11), accessing 000032313335373\", \"username\": \"abhisr\", \"post_text\": \"Hi rtaylor,\\n\\nI made two modification in the code as per the comment.\\nI removed the STD.System.Job.WUID() function and also used the STD.System.email.sendemail() to send the mail.\\n\\nStill I encounter the same error\\n\\n\\noutputFileName := QA_DataPackage.CC_KeyFile_Defs.QA_COMPA_CustSupport_REPORT_FILE_NAME;\\n// outputFileName := 'Constant name';\\n\\nmailBody := '\\\\nWorkunit : '+(STRING)STD.System.Job.WUID ( ) +'\\\\nOutput File : '+outputFileName ;\\n\\t\\nOUTPUT(DS1,,'~thor::test::testfile',OVERWRITE,NAMED('Bezos')) : SUCCESS(STD.System.email.sendemail(\\t'abhilash.nair@lexisnexis.com',\\noutputFileName,'Hello'));\\n
\", \"post_time\": \"2013-10-11 20:48:26\" },\n\t{ \"post_id\": 4760, \"topic_id\": 1072, \"forum_id\": 8, \"post_subject\": \"Re: SIG: Segmentation fault(11), accessing 000032313335373\", \"username\": \"rtaylor\", \"post_text\": \"abhisr,\\n\\nI see you're using the STD.System.Job.WUID() function in your code, which implies you're running on OSS, but your SUCCESS service is calling fileservices.sendemail() (a legacy-system plugin library function) and not the STD.System.email.sendemail() Standard Library function that I would expect you to use in OSS. Could that possibly be an issue here?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-10-11 20:36:55\" },\n\t{ \"post_id\": 4758, \"topic_id\": 1072, \"forum_id\": 8, \"post_subject\": \"SIG: Segmentation fault(11), accessing 000032313335373\", \"username\": \"abhisr\", \"post_text\": \"Hi ,\\n\\nI am using SUCCESS to send mail after an OUTPUT operation.\\nHere is my piece of code.\\n\\nWhen I execute this am getting this error.\\nECLAGENT 1000: SIG: Segmentation fault(11), accessing 0000323133353735, IP=00002AAAAD06DD7C \\n\\nHere is my code\\n\\nexport REC := record\\n\\tstring player;\\n\\tinteger8 Score;\\n\\tstring CenturyAgainst;\\n\\tinteger8 StrikeRate;\\n\\tinteger8 Year;\\nend;\\n\\n\\nDS1 := DATASET\\n([{'Sehwag',219,'West Indies',157,2011}\\n\\t\\t,{'Sachin',200,'South Africa',189,2010}\\n\\t\\t,{'Coventry',219,'Bangladesh',157,2009}\\n\\t],REC);\\noutputFileName := AnotherPackage.ModuleName.REPORT_FILE_NAME;\\n// outputFileName := 'Constant name';\\n\\nmailBody := '\\\\nWorkunit : '+(STRING)STD.System.Job.WUID ( ) +'\\\\nOutput File : '+outputFileName ;\\n\\t\\nOUTPUT(DS1,,'~thor::test::testfile',OVERWRITE,NAMED('Bezos')) : SUCCESS(fileservices.sendemail(\\t'abhisr@domain.com',\\noutputFileName,mailBody));
\\n\\nIn the above code outputFileName
is defined in another module as a STRING which is exported .Instead of reading it from another attribute if I define it here my code is working fine .\", \"post_time\": \"2013-10-11 19:34:14\" },\n\t{ \"post_id\": 4769, \"topic_id\": 1074, \"forum_id\": 8, \"post_subject\": \"Re: Workunit State\", \"username\": \"abhisr\", \"post_text\": \"thanks smith\", \"post_time\": \"2013-10-15 11:35:14\" },\n\t{ \"post_id\": 4768, \"topic_id\": 1074, \"forum_id\": 8, \"post_subject\": \"Re: Workunit State\", \"username\": \"gsmith\", \"post_text\": \"From: https://github.com/hpcc-systems/HPCC-Platform/blob/master/common/workunit/workunit.hpp\\n\\nenum WUState\\n{\\n WUStateUnknown = 0,\\n WUStateCompiled = 1,\\n WUStateRunning = 2,\\n WUStateCompleted = 3,\\n WUStateFailed = 4,\\n WUStateArchived = 5,\\n WUStateAborting = 6,\\n WUStateAborted = 7,\\n WUStateBlocked = 8,\\n WUStateSubmitted = 9,\\n WUStateScheduled = 10,\\n WUStateCompiling = 11,\\n WUStateWait = 12,\\n WUStateUploadingFiles = 13,\\n WUStateDebugPaused = 14,\\n WUStateDebugRunning = 15,\\n WUStatePaused = 16,\\n WUStateSize = 17\\n};\\n
\\n\\nFrom: https://github.com/hpcc-systems/HPCC-Platform/blob/master/common/workunit/workunit.cpp\\n\\nmapEnums states[] = {\\n { WUStateUnknown, "unknown" },\\n { WUStateCompiled, "compiled" },\\n { WUStateRunning, "running" },\\n { WUStateCompleted, "completed" },\\n { WUStateFailed, "failed" },\\n { WUStateArchived, "archived" },\\n { WUStateAborting, "aborting" },\\n { WUStateAborted, "aborted" },\\n { WUStateBlocked, "blocked" },\\n { WUStateSubmitted, "submitted" },\\n { WUStateScheduled, "scheduled" },\\n { WUStateCompiling, "compiling" },\\n { WUStateWait, "wait" },\\n { WUStateUploadingFiles, "uploading_files" },\\n { WUStateDebugPaused, "debugging" },\\n { WUStateDebugRunning, "debug_running" },\\n { WUStatePaused, "paused" },\\n { WUStateSize, NULL }\\n};\\n
\", \"post_time\": \"2013-10-15 06:00:29\" },\n\t{ \"post_id\": 4766, \"topic_id\": 1074, \"forum_id\": 8, \"post_subject\": \"Workunit State\", \"username\": \"abhisr\", \"post_text\": \"hi,\\n\\nIs there any library function or service available to know all the valid states of a workunit ?\\n\\nRegards\\nAbhi\", \"post_time\": \"2013-10-14 13:02:44\" },\n\t{ \"post_id\": 4844, \"topic_id\": 1080, \"forum_id\": 8, \"post_subject\": \"Re: Using MACRO's in Filters.\", \"username\": \"Allan\", \"post_text\": \"Thnaks for this Richard,\\n\\nYour example works.\\nMinor point, types cannot be supplied to a FUNCTIONMACRO so the definition is:\\n\\nfilt(prefix,postfix,itm) := FUNCTIONMACRO\\n
\\n\\nPardon the delay in replying (pressure of other work)\\n\\nYours\\n\\nAllan\", \"post_time\": \"2013-10-29 15:28:43\" },\n\t{ \"post_id\": 4824, \"topic_id\": 1080, \"forum_id\": 8, \"post_subject\": \"Re: Using MACRO's in Filters.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nTry Dustin's suggestion of wrapping your filter logic in a FUNCTIONMACRO that returns the boolean filter expression you want, something like this:r(STRING itm) := REGEXFIND('^(\\\\\\\\S+\\\\\\\\s+|\\\\\\\\s*)Allan(\\\\\\\\s.*$|$)',itm,NOCASE);\\ns(STRING itm) := REGEXFIND('^.*1978-02-01.*$',itm,NOCASE);\\n\\nfilt(string prefix,string postfix,boolean itm(string x)) := FUNCTIONMACRO\\n txt := itm(prefix+'1_'+postfix)\\n OR itm(prefix+'2_'+postfix)\\n OR itm(prefix+'3_'+postfix)\\n OR itm(prefix+'4_'+postfix)\\n OR itm(prefix+'5_'+postfix);\\n RETURN #EXPAND(txt);\\nENDMACRO;\\n\\nd := t(filt('subject','first_name',r) AND filt('subject','dob',s));
I have not tried this code, so you may need to play with it a bit.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-10-24 13:24:29\" },\n\t{ \"post_id\": 4823, \"topic_id\": 1080, \"forum_id\": 8, \"post_subject\": \"Re: Using MACRO's in Filters.\", \"username\": \"Allan\", \"post_text\": \"Unfortunatly the code I posted above does not work with either:\\n\\nd := t((filt('subject','first_name',r)));\\n
\\nor\\n\\nd := t((#EXPAND(filt('subject','first_name',r))));\\n
\\n\\nThe filter gets constant folded to d(false) and with no other actions to do the WU just ends after the compilation phase.\\n\\nAny other ideas?\\n\\nYours\", \"post_time\": \"2013-10-24 13:12:25\" },\n\t{ \"post_id\": 4796, \"topic_id\": 1080, \"forum_id\": 8, \"post_subject\": \"Re: Using MACRO's in Filters.\", \"username\": \"Allan\", \"post_text\": \"Thnaks both of you for the replies.\\nThe x := filt(...)\\ndid not work.\\n\\nSo I've used the 2nd approach thus:\\n\\nr(STRING itm) := REGEXFIND('^(\\\\\\\\S+\\\\\\\\s+|\\\\\\\\s*)Allan(\\\\\\\\s.*$|$)',itm,NOCASE);\\ns(STRING itm) := REGEXFIND('^.*1978-02-01.*$',itm,NOCASE);\\n\\nfilt(string prefix,string postfix,boolean itm(string x)) := itm(prefix+'1_'+postfix)\\n OR itm(prefix+'2_'+postfix)\\n OR itm(prefix+'3_'+postfix)\\n OR itm(prefix+'4_'+postfix)\\n OR itm(prefix+'5_'+postfix);\\n\\nd := t(#EXPAND(filt('subject','first_name',r)) AND #EXPAND(filt('subject','dob',s)));\\n
\\n\\nI had not realise done could use the #EXPAND construct in this context. Good hint to know.\\nI will in future use FUNCTIONMACRO more.\\nThat being said, MACRO's should not be difficult to write, I beleive there is a real usibility issue here, not helped by error messages that tell you nothing useful about the error!\\n\\nOnce again thank you both for your replies.\\n\\nAllan\", \"post_time\": \"2013-10-22 08:49:18\" },\n\t{ \"post_id\": 4795, \"topic_id\": 1080, \"forum_id\": 8, \"post_subject\": \"Re: Using MACRO's in Filters.\", \"username\": \"dustinskaggs\", \"post_text\": \"I tend to avoid MACROs in favor of FUNCTIONMACROs. MACROs tend to be much more difficult to write and debug. For your example, you could get away without using either and just build a string and #EXPAND it.\\n\\nfilt(string prefix,string postfix,string itm) := prefix+'1'+postfix+' = \\\\''+itm+'\\\\'';\\nds(#expand(filt('subject','_name','Allan')))
;\\n\\nDepending on what you're trying to accomplish, it may make sense to write a FUNCTIONMACRO that takes the dataset as one of the parameters along with the filter info and have the FUNCTIONMACRO return the filtered dataset.\\n\\n-Dustin\", \"post_time\": \"2013-10-21 20:12:23\" },\n\t{ \"post_id\": 4794, \"topic_id\": 1080, \"forum_id\": 8, \"post_subject\": \"Re: Using MACRO's in Filters.\", \"username\": \"bforeman\", \"post_text\": \"Hi Allan,\\n\\nHave you tried:\\nmyfilter := filt('subject','_name','Allan');\\nmydata := d(myfilter);
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-10-21 16:04:14\" },\n\t{ \"post_id\": 4792, \"topic_id\": 1080, \"forum_id\": 8, \"post_subject\": \"Using MACRO's in Filters.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nYet another simple MACRO question that is baffling me.\\n\\nI have a record with fields named in simmiler ways e.g. 'subject1_name', 'subject2_name', 'subject1_dob', 'subject2_dob' etc.\\n\\nI'm attempting to write a MACRO that generates a filter against all simmilary named fields. i.e. to end up with:\\n\\nd(subject1_name = 'Allan' or subject2_name = 'Allan');\\n
\\nI can genenerate a macro fine that creates the correct text:\\n\\nloadxml('<xml/>');\\nfilt(prefix,postfix,itm) := MACRO\\n #DECLARE(f);\\n #SET(f,prefix+'1'+postfix+' = \\\\''+itm+'\\\\'');\\n %'f'%\\nENDMACRO;\\nfilt('subject','_name','Allan');\\n
\\nBut when I attempt to use the generated text as a filter to a DATASET I get syntax errors.\\nI've tried:\\n\\nd(filt('subject','_name','Allan'));\\n
\\nand\\n\\nd(#EXPAND(filt('subject','_name','Allan')));\\n
\\nAll to no avail, any pointers would be greatly appreciated.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2013-10-21 13:31:15\" },\n\t{ \"post_id\": 4829, \"topic_id\": 1089, \"forum_id\": 8, \"post_subject\": \"Re: Location of #OPTION()\", \"username\": \"DSC\", \"post_text\": \"@Richard: I guess I should have mentioned what the error was, huh? That probably would have helped. At any rate, it was a straightforward "symbol not found" type of error, indicating that the embedded C++ code was calling a function that the linker knew nothing about, which in turn indicated that the UUID library was not loaded. I did try moving the #OPTION() around, before and after my IMPORTs (but always preceding the MODULE itself). I didn't try embedding #OPTION() within the module.\\n\\n@Jim: I actually messed around with the compiler option as well, but ended up simply citing (within a comment in the module's file) the need to insert the #OPTION() statement in the BWR. That seemed to be a better alternative, from a code management standpoint.\\n\\nI actually took a look at the source code to see if I could insert a scan-and-promote step into the parser but I got scared.\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2013-10-24 14:38:22\" },\n\t{ \"post_id\": 4828, \"topic_id\": 1089, \"forum_id\": 8, \"post_subject\": \"Re: Location of #OPTION()\", \"username\": \"JimD\", \"post_text\": \"Another option for #OPTION is the command line eclcc. \\n\\neclcc -foption[=value]
\\n\\nallows you to pass a #OPTION on the command line.\\nThis may not answer your encapsulation question, but it came up while discussing it.\", \"post_time\": \"2013-10-24 14:13:23\" },\n\t{ \"post_id\": 4827, \"topic_id\": 1089, \"forum_id\": 8, \"post_subject\": \"Re: Location of #OPTION()\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nSince #OPTION is part of the template language and usually a compiler directive (as it is in this case), I have never put it anywhere but the beginning of the BWR code I'm hitting Submit on.\\n\\nWhen you put it in your definition file, are you making it the first line there? Or are you placing it inside the MODULE structure?\\n\\nAnd when you say "does not seem to work" -- what result (or non-result) are you seeing that raises that conclusion?\\n\\nRichard\", \"post_time\": \"2013-10-24 14:09:15\" },\n\t{ \"post_id\": 4821, \"topic_id\": 1089, \"forum_id\": 8, \"post_subject\": \"Location of #OPTION()\", \"username\": \"DSC\", \"post_text\": \"I wrote a tiny module that provides an ECL interface to libuuid. It works well, but it needs to inform the compiler to link in libuuid during the compile phase. This is easily done:\\n\\n#OPTION('linkOptions','-luuid');
\\nMy desire is to place this line within the file containing the module to make it self contained, but that does not seem to work. If I insert that line in the top-most ECL file, it works fine but it breaks encapsulation.\\n\\nWhat are the rules regarding the visibility/placement for #OPTION() directives?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2013-10-24 11:46:38\" },\n\t{ \"post_id\": 4832, \"topic_id\": 1090, \"forum_id\": 8, \"post_subject\": \"Re: Delete Logical Files in an automated way\", \"username\": \"rtaylor\", \"post_text\": \"Abhi,\\n\\nThat code should work, but does not in the 4.0.2 release. I have reported the issue in JIRA: https://track.hpccsystems.com/browse/HPCC-10302\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-10-24 18:52:53\" },\n\t{ \"post_id\": 4830, \"topic_id\": 1090, \"forum_id\": 8, \"post_subject\": \"Re: Delete Logical Files in an automated way\", \"username\": \"abhisr\", \"post_text\": \"Hi Viswa,\\n\\nI have written a code chunk to delete the logical files and it works fine for me.\\n\\n\\nSTRING filePatt := '*::sample*';\\nsprayedFileList := STD.File.LogicalFileList(filePatt);\\nFilenames := TABLE(sprayedFileList,{name});\\n\\nAct := APPLY(Filenames,FileServices.DeleteLogicalFile('~'+Filenames.name));\\nAPPLY(Filenames,FileServices.DeleteLogicalFile('~'+Filenames.name));\\n\\n
.\\n\\nRegards\\nAbhi\", \"post_time\": \"2013-10-24 18:16:39\" },\n\t{ \"post_id\": 4826, \"topic_id\": 1090, \"forum_id\": 8, \"post_subject\": \"Delete Logical Files in an automated way\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nThere is a scenario where every day new index files are created and after every one week or one month , need to delete the old index files.\\n\\nIs there any automated way to delete the old logical files present in both thor and roxie..?\\n\\nI tried a sample code for the same.\\n\\nSample Logical Files\\n\\n~thor::sample::1\\n~thor::sample::2\\n~thor::sample::3\\n~thor::sample::4\\n~thor::sample::5\\n
\\n\\n\\nimport std;\\nfile_name := 'thor::sample';\\n\\nfiles_list := STD.File.LogicalFileList(file_name + '*');\\nfiles_list;\\n\\nsubset_files_list := files_list[2..4];\\nsubset_files_list;\\n\\nfile_Rec := RECORD\\n\\tSTRING result;\\nEND;\\n\\nfile_Rec DelFiles(subset_files_list L) := TRANSFORM\\n \\tself.result := IF (STD.File.FileExists('~' + L.name), STD.File.DeleteLogicalFile('~' + L.name), 'False');\\n END;\\n\\ndelete_files := PROJECT(subset_files_list ,DelFiles(LEFT));\\ndelete_files;\\n
\\n\\n\\nnot able to delete logical files using this logic and encounter some error\\n\\n\\nError: syntax error near "DeleteLogicalFile" : expected datarow, identifier, macro-name (22, 68), 3002, \\n
\\n\\n\\nI can delete manually or run DeleteLogicalFile for individual files, but the number of files is in few hundreds.\\n\\nKindly help regarding the same.\\n\\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2013-10-24 13:55:32\" },\n\t{ \"post_id\": 4837, \"topic_id\": 1093, \"forum_id\": 8, \"post_subject\": \"JSON response via HTTP POST call\", \"username\": \"sanjayssn\", \"post_text\": \"Hi,\\n\\nI tried to get a JSON response from a published ECL query and I was able to do it successfully. But I have a doubt regarding how the HTTP call works. Following is the Java code that I wrote to retrieve results from an ECL query which has only one parameter named 'state'. \\n\\n\\n HttpClient client = new DefaultHttpClient();\\n\\t\\tHttpPost post = new HttpPost(\\n\\t\\t\\t\\t"http://ip:port/WsEcl/submit/query/hthor/persons");\\n\\t\\tpost.addHeader(BasicScheme.authenticate(\\n\\t\\t\\tnew UsernamePasswordCredentials ("username", "password"),\\n\\t\\t\\t\\t"UTF-8", false));\\n\\n\\t\\tStringEntity input = new StringEntity(\\n\\t\\t\\t\\t"{\\\\"persons\\\\":{\\\\"state\\\\":\\\\"FL\\\\"}}");\\n\\t\\tinput.setContentType("application/json");\\n\\t\\tpost.setEntity(input);\\n\\n\\t\\tHttpResponse response = client.execute(post);\\n\\n\\t\\tBufferedReader in = new BufferedReader(new InputStreamReader(response\\n\\t\\t\\t\\t.getEntity().getContent()));\\n\\t\\tStringBuilder sb = new StringBuilder();\\n\\t\\tString line;\\n\\t\\twhile ((line = in.readLine()) != null) {\\n\\t\\t\\tsb.append(line);\\n\\t\\t}\\n
\\n\\nThe code returned an inputstream response with a JSON format and I was able to convert it into JSON. My question is, in the above code I have set the request content type as "application/json", but I have not specified the content type in which I expect the response. So how do I receive a JSON formatted response for this request? Does HPCC automatically identify the response format from the reqest format? I'm just trying to understand the workflow in a detailed way.\\n\\nThanks,\\nSanjay\", \"post_time\": \"2013-10-28 05:19:52\" },\n\t{ \"post_id\": 6757, \"topic_id\": 1094, \"forum_id\": 8, \"post_subject\": \"Re: Expanding a range of dates.\", \"username\": \"rtaylor\", \"post_text\": \"Sara,\\n\\nYou can use this service:http://xxx.xxx.xxx.xxx:8010/WsTopology
where the "xxx"s are your environment's IP for its ESP server (port 8010 is the default port for ECL Watch). This takes you to a page listing services you can use. These services are currently undocumented.\\n\\nThe specific one I think you want is this one:http://xxx.xxx.xxx.xxx:8010/WsTopology/TpListTargetClusters?form
\\nOur Eclipse plugin consumes these services, so you can look at that source code for usage examples.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-01-07 14:56:30\" },\n\t{ \"post_id\": 4847, \"topic_id\": 1094, \"forum_id\": 8, \"post_subject\": \"Re: Expanding a range of dates.\", \"username\": \"Allan\", \"post_text\": \"Thanks Richard,\\n\\nActually the actual job I want to do is better accomplished with a cross-tab table nested in another cross-tab.\\n\\nBut this answers my original questions and will come in useful in other contexts.\\n\\nThanks\\n\\nAllan\", \"post_time\": \"2013-10-29 19:35:30\" },\n\t{ \"post_id\": 4846, \"topic_id\": 1094, \"forum_id\": 8, \"post_subject\": \"Re: Expanding a range of dates.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nHere's my code to create your dataset of each date within your date range. I assumed an inclusive range:IMPORT STD;\\nDateRec := {STRING8 d};\\t\\nDateRangeDS(STRING8 dFrom,STRING8 dTo) := FUNCTION\\n jFrom := STD.Date.FromGregorianDate((UNSIGNED4)dFrom);\\n jTo := STD.Date.FromGregorianDate((UNSIGNED4)dTo);\\n NumDays := JTo - jFrom + 1;\\n ds := DATASET(NumDays,\\n TRANSFORM(DateRec,\\n SELF.d := (STRING8)STD.Date.ToGregorianDate(jFrom+COUNTER-1)));\\n RETURN ds;\\nEND;\\n\\nDateRangeDS('20130101','20130103');
\\nI'll leave the rest to you. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-10-29 18:35:07\" },\n\t{ \"post_id\": 4845, \"topic_id\": 1094, \"forum_id\": 8, \"post_subject\": \"Expanding a range of dates.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI need to perform some processing on every date between a supplied range of dates.\\n\\nI'm having trouble generating the list of dates from a supplied range.\\n\\nI've tried using LOOP, this compiles and runs but does not do whats expected at run time (1 row returned of the 1st day with no data for that day).\\nIf I could just generate a dataset of STRING8 holding a list of every day I could just use PROJECT.\\nThis is what I have so far:\\n
\\n EXPORT DATASET(LayoutStats.DayStats) Stats (STRING AccNo,STRING8 dFrom,STRING8 dTo) := FUNCTION\\n\\n ValidDate(STRING8 itm) := REGEXFIND('^(19|20)[0-9]{2}(0[1-9]|1[012])(0[1-9]|[12][0-9]|3[01])$',itm);\\n \\n cnt := MAP(NOT ValidDate(dFrom) => ERROR('Date From paramter is not YYYYMMDD'),\\n NOT ValidDate(dTo) => ERROR('Date To paramter is not YYYYMMDD'),\\n ut.daysApart(dFrom,dTo)+1);\\n\\n startDate := IF(dFrom < dTo,dFrom,dTo);\\n \\n ds := DATASET([{startDate,[]}],LayoutStats.DayStats);\\n\\n LayoutStats.DayStats DoDay(STRING AccNo,LayoutStats.DayStats L,INTEGER C) := TRANSFORM\\n STRING8 d := ut.date_math(L.Day,C-1);\\n SELF.Day := d;\\n SELF.Stats := StatsByHour(AccNo,d);\\n END;\\n \\n RETURN LOOP(ds,COUNTER <= cnt,PROJECT(ROWS(LEFT),DoDay(AccNo,LEFT,COUNTER)));\\n END;\\n
\\n\\nHopefully I don't have to go down the LOOP route and can just generate the dataset I mentioned above.\\n\\nYours\\n\\nAllan\\n\\nPS. I think, given the complexity of LOOP, its would be useful to have an example added to the 'ECL Playgound'.\", \"post_time\": \"2013-10-29 15:54:42\" },\n\t{ \"post_id\": 4853, \"topic_id\": 1095, \"forum_id\": 8, \"post_subject\": \"Re: Available cluster names in ECL\", \"username\": \"abhisr\", \"post_text\": \"Thanks smith.\\n\\nHow can i get the server IP (ESP) from ECL code ,the one we gave in preferences in ECL IDE?\", \"post_time\": \"2013-10-30 12:37:33\" },\n\t{ \"post_id\": 4849, \"topic_id\": 1095, \"forum_id\": 8, \"post_subject\": \"Re: Available cluster names in ECL\", \"username\": \"gsmith\", \"post_text\": \"http://IP:8010/WsTopology/TpLogicalClusterQuery?rawxml_\\nhttp://IP:8010/WsTopology/TpLogicalClusterQuery.json\", \"post_time\": \"2013-10-30 08:06:31\" },\n\t{ \"post_id\": 4848, \"topic_id\": 1095, \"forum_id\": 8, \"post_subject\": \"Available cluster names in ECL\", \"username\": \"abhisr\", \"post_text\": \"hi,\\n\\nIs there any library function or service available to know the available Clusters ?\\n\\nRegards\\nAbhi\", \"post_time\": \"2013-10-29 21:19:00\" },\n\t{ \"post_id\": 4864, \"topic_id\": 1099, \"forum_id\": 8, \"post_subject\": \"Re: Meet an system error\", \"username\": \"Leofei\", \"post_text\": \"I see. Thank you for your info. I will talk to my colleges about this. \\n-Fan\", \"post_time\": \"2013-10-30 17:16:29\" },\n\t{ \"post_id\": 4863, \"topic_id\": 1099, \"forum_id\": 8, \"post_subject\": \"Re: Meet an system error\", \"username\": \"rtaylor\", \"post_text\": \"Leofei,Q1:\\nIn EclWatch, I found the XRef, there are five clusters:\\nSuperFiles/thor11/thor21/thor400_72/thor50_42b\\n\\nHow can I know in which cluster these orphan files located?
They are probably in the same cluster that you were targetting when you got the error message.\\nQ2:\\nThere are five available link under "Available Reports" field in each cluster(except SuperFiles, it seems it's not related to this issue, we can ignore this)\\n\\nFound Files/Orphan Files/Lost Files/Directories/ErrorsWarnings\\n\\nActually, I tried all "Found Files" and "Orphan Files" for these four clusters, and I cannot find a path like "var/lib/HPCCSystems/hpcc-mirror/thor/\\nthor_data400" in the error message. Any suggestion?
Found Files will list all the physical files for which there are a complete set of files that could be re-attached to the HPCC (re-added to the DFU).\\n\\nOrphan files will list all the physical files for which there are NOT a complete set of files that could be re-attached to the HPCC.\\n\\nI'd would suggest just doing a general cleanup of all Found and Orphan files (my default action would be to simply delete them all) on all your clusters. This is a good periodic maintenance step to perform.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-10-30 17:10:32\" },\n\t{ \"post_id\": 4861, \"topic_id\": 1099, \"forum_id\": 8, \"post_subject\": \"Re: Meet an system error\", \"username\": \"Leofei\", \"post_text\": \"Thank you for your reply, Richard!\\n\\nThe size of the screen shoot is bigger then the attachment limitation, I'm trying describe my further questions here:\\n\\nQ1:\\nIn EclWatch, I found the XRef, there are five clusters:\\nSuperFiles/thor11/thor21/thor400_72/thor50_42b\\n\\nHow can I know in which cluster these orphan files located?\\n\\nQ2:\\nThere are five available link under "Available Reports" field in each cluster(except SuperFiles, it seems it's not related to this issue, we can ignore this)\\n\\nFound Files/Orphan Files/Lost Files/Directories/ErrorsWarnings\\n\\nActually, I tried all "Found Files" and "Orphan Files" for these four clusters, and I cannot find a path like "var/lib/HPCCSystems/hpcc-mirror/thor/\\nthor_data400" in the error message. Any suggestion?\\n\\nThank you!\", \"post_time\": \"2013-10-30 16:54:50\" },\n\t{ \"post_id\": 4860, \"topic_id\": 1099, \"forum_id\": 8, \"post_subject\": \"Re: Meet an system error\", \"username\": \"rtaylor\", \"post_text\": \"Leofei,\\n\\nIt helps if you break the message up into its component parts so you can read it more easily, like this:Error: System error: -1: rename: could not rename logical file \\nthor_data400::out::mvr_delta::pdate::temp \\nto \\nthor_data400::out::mvr_delta::pdate: \\n\\n[ 5: DFS Exception: 5: physical part \\n//10.194.10.16/var/lib/HPCCSystems/hpcc-mirror/thor/\\nthor_data400/out/mvr_delta/pdate._5_of_30 \\nalready exists] \\n\\n[ 5: DFS Exception: 5: physical part \\n//10.194.10.13/var/lib/HPCCSystems/hpcc-mirror/thor/\\nthor_data400/out/mvr_delta/pdate._2_of_30 \\nalready exists] ...
This error is saying the filename you're trying to change to has existing physical parts with those names on disk. Therefore, if the new filename is not listed in your logical files, those file parts are orphans and you need to run XREF (in ECL Watch) and clean those orphan parts up.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-10-30 15:15:54\" },\n\t{ \"post_id\": 4858, \"topic_id\": 1099, \"forum_id\": 8, \"post_subject\": \"Meet an system error\", \"username\": \"Leofei\", \"post_text\": \"Hi, meet a system error with the following message, does anyone have idea about it? Please specify more required info you need any. Any suggestion is appreciated!\\n\\nError: System error: -1: rename: could not rename logical file thor_data400::out::mvr_delta::pdate::temp to thor_data400::out::mvr_delta::pdate: [ 5: DFS Exception: 5: physical part //10.194.10.16/var/lib/HPCCSystems/hpcc-mirror/thor/thor_data400/out/mvr_delta/pdate._5_of_30 already exists] [ 5: DFS Exception: 5: physical part //10.194.10.13/var/lib/HPCCSystems/hpcc-mirror/thor/thor_data400/out/mvr_delta/pdate._2_of_30 already exists] [ 5: DFS Exception: 5: physical part //10.194.10.14/var/lib/HPCCSystems/hpcc-mirror/thor/thor_data400/out/mvr_delta/pdate._3_of_30 already exists] [ 5: DFS Exception: 5: physical part //10.194.10.33/var/lib/HPCCSystems/hpcc-mirror/thor/thor_data400/out/mvr_delta/pdate._22_of_30 already exists] [ 5: DFS Exception: 5: physical part //10.194.10.15/var/lib/HPCCSystems/hpcc-mirror/thor/thor_data400/out/mvr_delta/pdate._4_of_30 already exists] [ 5: DFS Exception: 5: physical part //10.194.10.17/var/lib/HPCCSystems/hpcc-mirror/thor/thor_data400/out/mvr_delta/pdate._6_of_30 already exists] [ 5: DFS Ex...\", \"post_time\": \"2013-10-30 14:15:10\" },\n\t{ \"post_id\": 4876, \"topic_id\": 1100, \"forum_id\": 8, \"post_subject\": \"Re: Converting string value to data\", \"username\": \"rtaylor\", \"post_text\": \"I've added this to JIRA: https://track.hpccsystems.com/browse/HPCC-10332\", \"post_time\": \"2013-10-31 14:37:21\" },\n\t{ \"post_id\": 4874, \"topic_id\": 1100, \"forum_id\": 8, \"post_subject\": \"Re: Converting string value to data\", \"username\": \"DSC\", \"post_text\": \"Excellent. I see those two functions defined in the Str standard library module. Dustin, will you be creating a Jira issue requesting documentation?\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2013-10-31 11:28:37\" },\n\t{ \"post_id\": 4869, \"topic_id\": 1100, \"forum_id\": 8, \"post_subject\": \"Re: Converting string value to data\", \"username\": \"dustinskaggs\", \"post_text\": \"I think what you want is:\\n\\nstd.str.fromHexPairs(stringValue);\\nstd.str.toHexPairs(dataValue);\\n\\nIt doesn't appear that these are documented. \\n\\n-Dustin\", \"post_time\": \"2013-10-30 20:09:49\" },\n\t{ \"post_id\": 4868, \"topic_id\": 1100, \"forum_id\": 8, \"post_subject\": \"Re: Converting string value to data\", \"username\": \"DSC\", \"post_text\": \"Thanks, Richard. I was hoping that I was simply missing something easy, but I guess that's not the case.\\n\\nI'm not sure if this is so much a bug as a missing feature, though. If there was a built-in function that emulated that 'x' string literal prefix, like TOUNICODE() does for the 'U' prefix, I would argue that the casting is working correctly today.\\n\\nNow I'll pop some popcorn and watch Jira.\\n\\nCheers!\\n\\nDan\", \"post_time\": \"2013-10-30 18:59:43\" },\n\t{ \"post_id\": 4867, \"topic_id\": 1100, \"forum_id\": 8, \"post_subject\": \"Re: Converting string value to data\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nYou've got a point there. \\n\\nJIRA issue created: https://track.hpccsystems.com/browse/HPCC-10322\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-10-30 17:48:57\" },\n\t{ \"post_id\": 4866, \"topic_id\": 1100, \"forum_id\": 8, \"post_subject\": \"Re: Converting string value to data\", \"username\": \"DSC\", \"post_text\": \"It doesn't actually work, though. Within the IDE:\\n\\n
UUID_t := DATA16;\\n\\ni := '0AB911267E8C4C389A983B9A10B5FA2B';\\n\\ni; // 0AB911267E8C4C389A983B9A10B5FA2B\\n(UUID_t)i; // 30414239313132363745384334433338\\n\\nj := x'0AB911267E8C4C389A983B9A10B5FA2B';\\n\\nj; // 0AB911267E8C4C389A983B9A10B5FA2B\\n(UUID_t)j; // 0AB911267E8C4C389A983B9A10B5FA2B
\\nHow can I mimic that 'x' prefix when it's not a literal?\", \"post_time\": \"2013-10-30 17:31:02\" },\n\t{ \"post_id\": 4865, \"topic_id\": 1100, \"forum_id\": 8, \"post_subject\": \"Re: Converting string value to data\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nA simple type cast should do it, like this:UUID_t := DATA16;\\ni := '0AB911267E8C4C389A983B9A10B5FA2B';\\nUUID_t myID := (UUID_t)i;\\n\\nSIZEOF(i);\\t\\t//32\\nSIZEOF(myID);\\t//16\\ni;\\nMyID;
Unless I'm misunderstanding the issue here, this should be all you need.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-10-30 17:22:18\" },\n\t{ \"post_id\": 4862, \"topic_id\": 1100, \"forum_id\": 8, \"post_subject\": \"Converting string value to data\", \"username\": \"DSC\", \"post_text\": \"I ran into a silly problem, or at least a problem that seems like it should be silly.\\n\\nI implemented a module for libuuid support. There are two formats for UUIDs, a packed hex version and a string version. The former maps perfectly to a DATA16, the latter to a STRING36. I use the DATA16 incarnation in my datasets to save on space.\\n\\nWhen the IDE displays a record containing a DATA16 UUID, it automatically casts it as a string. That works well, because it gives something for us humans to read. Converting a string version of that value back into a DATA16 type is easy if you're working with literals:\\n\\nUUID_t myID := (UUID_t)x'0AB911267E8C4C389A983B9A10B5FA2B';
\\nThe 'x' prefix on the string literal does the magic here. But what if the value is in an attribute, or is coming in from an external resource such as a stored parameter in a Roxie query? The following does not work (it runs, but the final result is invalid):\\n\\ni := '0AB911267E8C4C389A983B9A10B5FA2B';\\nUUID_t myID := (UUID_t)i;
\\nI was unable to find a way to perform this kind of conversion in that scenario. I wrote a converter function as a workaround, but I think there is probably a simple technique or method I'm missing.\\n\\nThoughts?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2013-10-30 17:02:59\" },\n\t{ \"post_id\": 4904, \"topic_id\": 1106, \"forum_id\": 8, \"post_subject\": \"Re: A system error\", \"username\": \"Leofei\", \"post_text\": \"Got the answer. Thanks. \\nFYI, it's because of Line Terminators of this platform version. Using '\\\\n' instead of '\\\\n,\\\\r\\\\n' can solve the issue.\\n\\n-Fan\", \"post_time\": \"2013-11-08 16:42:15\" },\n\t{ \"post_id\": 4901, \"topic_id\": 1106, \"forum_id\": 8, \"post_subject\": \"A system error\", \"username\": \"Leofei\", \"post_text\": \"Hi, when using "SprayVariable" function(it was working well, this error have been happening since yesterday), I meet the following error message: \\n\\nError: System error: 0: DFUServer Error Failed: Duplicate entry "\\n" added to string matcher\\n\\nAny suggestion about what this is? Thanks a lot!\\n\\n-Fan\", \"post_time\": \"2013-11-08 14:50:34\" },\n\t{ \"post_id\": 4938, \"topic_id\": 1113, \"forum_id\": 8, \"post_subject\": \"Re: Error running ECL referencing Samples.IMDB in ECL Playgr\", \"username\": \"bforeman\", \"post_text\": \"Hello,\\n\\nYes, unfortunately the code in the ECL Playground must be inline, as it has no knowledge of your other active target repository folders, so what you would need to do is copy and paste the code from the Playground to a builder window in the ECL IDE.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-11-13 16:24:00\" },\n\t{ \"post_id\": 4927, \"topic_id\": 1113, \"forum_id\": 8, \"post_subject\": \"Error running ECL referencing Samples.IMDB in ECL Playground\", \"username\": \"drea.leed@lnssi.com\", \"post_text\": \"When running ECL in the ecl playground, how do you include additional local files?\\n\\nFor example, running the following ECL works in the ECL IDE:\\nimport IMDB;\\noutput(1)\\n\\nBut fails in ECL Playground with\\n"Import names unknown module imdb."\", \"post_time\": \"2013-11-12 20:30:01\" },\n\t{ \"post_id\": 20913, \"topic_id\": 1118, \"forum_id\": 8, \"post_subject\": \"Re: Spray delimited\", \"username\": \"Gopala Rudraraju\", \"post_text\": \"Thanks Brian\", \"post_time\": \"2018-03-01 15:08:49\" },\n\t{ \"post_id\": 4948, \"topic_id\": 1118, \"forum_id\": 8, \"post_subject\": \"Re: Spray delimited\", \"username\": \"omnibuzz\", \"post_text\": \"Brilliant!! Thank you, Brian \\nRegards\\nSrini\", \"post_time\": \"2013-11-15 12:39:29\" },\n\t{ \"post_id\": 4947, \"topic_id\": 1118, \"forum_id\": 8, \"post_subject\": \"Re: Spray delimited\", \"username\": \"BrianB644\", \"post_text\": \"There are two parts to moving a file into the HPCC. The first part is "spraying". The second part is "interpreting the sprayed file as a dataset".\\n\\nMost likely your original data was sprayed without being altered. However, the default settings for making a dataset from delimited data is to trim spaces.\\n\\nAdding the "NOTRIM" option when you define your DATASET will probably do what you want.\\n\\ne.g. ...\\n\\n DATASET(myfile, mylayout, CSV(..., ..., NOTRIM));\\n\\nCheers,\\n\\nBrian\", \"post_time\": \"2013-11-15 07:02:36\" },\n\t{ \"post_id\": 4946, \"topic_id\": 1118, \"forum_id\": 8, \"post_subject\": \"Spray delimited\", \"username\": \"omnibuzz\", \"post_text\": \"I am uploading a delimited text file where each of the rows have some leading spaces and trailing spaces. When I try to spray the file and view the data file, the records are trimmed of leading and trailing spaces. Is there a flag to set to not trim the spaces?\\n\\nThanks\\nSrini\", \"post_time\": \"2013-11-15 02:34:19\" },\n\t{ \"post_id\": 4960, \"topic_id\": 1121, \"forum_id\": 8, \"post_subject\": \"Re: Modulus operation - wrong result\", \"username\": \"bforeman\", \"post_text\": \"Yes, if any numeric value is not explicitly defined, it is always assumed to be an 8-byte signed integer.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-11-19 14:31:55\" },\n\t{ \"post_id\": 4959, \"topic_id\": 1121, \"forum_id\": 8, \"post_subject\": \"Re: Modulus operation - wrong result\", \"username\": \"sameermsc\", \"post_text\": \"Thanks Bob,\\n\\nbut the range of unsigned8 is\\t0 to 18,446,744,073,709,551,615 (from the documentation)\\nand the value i have assigned to v is 9,223,779,370,063,604,652 which is within the range\\n\\ntypecast 15 to unsigned8 made it to work correctly, i guess 15 is treated as a signed integer\\n\\nunsigned8 v := 9223779370063604652;\\nunsigned8 w := v % (unsigned8)15;\\noutput(v % 15); // result is -4\\noutput(v % (unsigned8)15); // result is 12\\noutput(w); // result is 12\\n\\nRegards,\\nSameer\", \"post_time\": \"2013-11-19 14:25:13\" },\n\t{ \"post_id\": 4957, \"topic_id\": 1121, \"forum_id\": 8, \"post_subject\": \"Re: Modulus operation - wrong result\", \"username\": \"bforeman\", \"post_text\": \"Hi Sameer,\\n\\nThis is expected behavior. When you do a simple output of a modulus result, the compiler explicitly casts the result to a SIGNED INTEGER. \\n\\nSo:\\n
output(v % 15); // result is -4
\\n\\nIs giving you the expected result in SIGNED INTEGER format.\\n\\nBut when you explicitly move the expression to an UNSIGNED value:\\n\\nunsigned8 w := v % 15;
\\n\\nThen you are getting a negative number cast to a positive UNSIGNED integer.\\n\\n...and the reason for the negative result? Your v value is out of range, so you are getting an overflow result in both cases.\\n\\nHope this helps,\\n\\nBob\", \"post_time\": \"2013-11-19 13:37:20\" },\n\t{ \"post_id\": 4953, \"topic_id\": 1121, \"forum_id\": 8, \"post_subject\": \"Modulus operation - wrong result\", \"username\": \"sameermsc\", \"post_text\": \"Hi,\\n\\ni have observed this when i was trying to perform a modulus operation\\nhere is a sample code with two cases\\n\\ncase 1:\\nunsigned8 v := 8223779370063604652;\\nunsigned8 w := v % 15;\\noutput(v % 15); // result is 2\\noutput(w); // result is 2\\n\\n\\ncase 2:\\nunsigned8 v := 9223779370063604652;\\nunsigned8 w := v % 15;\\noutput(v % 15); // result is -4\\noutput(w); // result is 18446744073709551612\\n\\nis this some kind of bug or what, i have not tested till which value it gives correct output\\n\\nRegards,\\nSameer\", \"post_time\": \"2013-11-19 11:29:10\" },\n\t{ \"post_id\": 4994, \"topic_id\": 1123, \"forum_id\": 8, \"post_subject\": \"Re: Removing Non AlphaNumeric Characters\", \"username\": \"rtaylor\", \"post_text\": \"arun,\\n\\nYou're getting the result you're getting in F3 because your pattern to match is "ANY+" -- and ANY literally means "any character" so your result shows everything that is there in the input data.\\n\\nHowever, your expected result indicates that you want to get rid of the comma (,) and the space, but not the dollar sign ($). Your commented out REGEXREPLACE does not achieve this, since it also removes the dollar sign, along with the comma and space.\\n\\nThis code does produce your expected result:Ds := DATASET([{'A@-N#D$RÉ,VÉRONIQU 1234'}],{UNICODE str});\\nDs;\\n\\nPATTERN sepChar := PATTERN('[^[:alnum:]]+'); \\nPATTERN Name := PATTERN('[[:alnum:]]')+;\\nPATTERN pat1 := Name ;\\nPATTERN pat2 := Name;\\nPATTERN pat3 := any+;\\nRULE Namet := (pat1 sepChar pat2 sepChar pat3)| (pat1 sepChar pat2) |(pat1);\\n\\nresults := RECORD\\nUNICODE F1 := MATCHTEXT(pat1);\\nUNICODE F2 := MATCHTEXT(pat2);\\n// UNICODE F3 := MATCHTEXT(pat3);\\nUNICODE F3 := REGEXREPLACE('[ ,]+',MATCHTEXT(pat3),'');\\nEND;\\n\\noutfile1 := PARSE(Ds,str,Namet,results,FIRST,NOT MATCHED);\\noutfile1;
Note that I only changed the regular expression in your REGEXREPLACE and flipped the comment marks.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-12-02 16:17:51\" },\n\t{ \"post_id\": 4965, \"topic_id\": 1123, \"forum_id\": 8, \"post_subject\": \"Removing Non AlphaNumeric Characters\", \"username\": \"arun\", \"post_text\": \"Hi,\\nI am able to parse a string but i have one issue in the [color=#FF0000:204enmgp]F3 field. In F3 field only AlphaNumeric is allowed but i am getting all the characters.\\n\\nInput => A@-N#D$RÉ,VÉRONIQU 1234\\nActual Output:\\nF1 => A, F2 =>N, F3 =>D$RÉ,VÉRONIQU 1234\\n\\nExcepted Output:\\nF1 => A, F2 =>N, [color=#FF0000:204enmgp]F3 =>D$RÉVÉRONIQU1234\\n\\n\\nIMPORT STD;\\nDs := DATASET([{'A@-N#D$RÉ,VÉRONIQU 1234'}],{UNICODE str});\\nDs;\\n\\nPATTERN sepChar := PATTERN('[^[:alnum:]]+'); \\nPATTERN Name := PATTERN('[[:alnum:]]')+;\\nPATTERN pat1 := Name ;\\nPATTERN pat2 := Name;\\nPATTERN pat3 := any+;\\nRULE Namet := (pat1 sepChar pat2 sepChar pat3)| (pat1 sepChar pat2) |(pat1);\\n\\nresults := RECORD\\nUNICODE F1 := MATCHTEXT(pat1);\\nUNICODE F2 := MATCHTEXT(pat2);\\n[color=#FF4000:204enmgp]UNICODE F3 := MATCHTEXT(pat3);\\n//UNICODE F3 := REGEXREPLACE('[^[:alnum:]]+',MATCHTEXT(pat3),'');\\nEND;\\n\\noutfile1 := PARSE(Ds,str,Namet,results,FIRST,NOT MATCHED);\\noutfile1;\\n
\\n\\n\\nI can able to achieve by using REGEXREPLACE but still i am looking for best one.\\nAny suggestions?\", \"post_time\": \"2013-11-21 20:51:40\" },\n\t{ \"post_id\": 4970, \"topic_id\": 1125, \"forum_id\": 8, \"post_subject\": \"Re: Cartesian Join\", \"username\": \"DanielJW\", \"post_text\": \"Excellent! That worked - thank you.\", \"post_time\": \"2013-11-22 15:18:42\" },\n\t{ \"post_id\": 4968, \"topic_id\": 1125, \"forum_id\": 8, \"post_subject\": \"Re: Cartesian Join\", \"username\": \"bforeman\", \"post_text\": \"We have an example of this in the Programmer's Guide, look at the aptly named Cartesian.ECL file \\n\\nhttp://hpccsystems.com/download/docs/programmers-guide\\n\\nHopefully this is what you need.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-11-22 15:04:32\" },\n\t{ \"post_id\": 4967, \"topic_id\": 1125, \"forum_id\": 8, \"post_subject\": \"Cartesian Join\", \"username\": \"DanielJW\", \"post_text\": \"How can I join 2 datasets that have no fields in common - in other words a Cartesian join?\", \"post_time\": \"2013-11-22 14:49:45\" },\n\t{ \"post_id\": 5007, \"topic_id\": 1127, \"forum_id\": 8, \"post_subject\": \"Re: STD.File.LogicalFileSuperSubList error\", \"username\": \"abhisr\", \"post_text\": \"Thanks Bob.\", \"post_time\": \"2013-12-02 19:07:39\" },\n\t{ \"post_id\": 5005, \"topic_id\": 1127, \"forum_id\": 8, \"post_subject\": \"Re: STD.File.LogicalFileSuperSubList error\", \"username\": \"bforeman\", \"post_text\": \"Can your system administrator clean them up for you? You can use DaliAdmin to do this.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-12-02 18:52:34\" },\n\t{ \"post_id\": 4999, \"topic_id\": 1127, \"forum_id\": 8, \"post_subject\": \"Re: STD.File.LogicalFileSuperSubList error\", \"username\": \"abhisr\", \"post_text\": \"Yes , i do have some corrupted superfiles in my target cluster which are unable to delete using ESP\", \"post_time\": \"2013-12-02 17:57:00\" },\n\t{ \"post_id\": 4991, \"topic_id\": 1127, \"forum_id\": 8, \"post_subject\": \"Re: STD.File.LogicalFileSuperSubList error\", \"username\": \"bforeman\", \"post_text\": \"My test is successful, I can't reproduce your error. The development team thinks that you might have a corrupted superfile on your target cluster:\\n\\n
RKC thinks this is caused by a corrupt superfile, will you take a look and see if you agree?
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-12-02 15:19:42\" },\n\t{ \"post_id\": 4973, \"topic_id\": 1127, \"forum_id\": 8, \"post_subject\": \"STD.File.LogicalFileSuperSubList error\", \"username\": \"abhisr\", \"post_text\": \"I am trying to look for a function that will return me the list of all subfiles of a super file.\\n\\nOUTPUT(STD.File.LogicalFileSuperSubList());
\\n\\nWhen I execute the above code , its throwing exception in thor and hthor \\n\\neclagent 0: System error: 0: IPropertyTree: Ambiguous xpath used getProp: ambiguous xpath "SubFile[@num="1"]" (in Child Dataset G1 E2)
\\n\\nI would love have my super files name passed as parameter rather than filtering .i haven't so far seen the result hoping it will return all the child files.\\n\\nhttps://track.hpccsystems.com/browse/HPCC-10464\", \"post_time\": \"2013-11-25 19:02:42\" },\n\t{ \"post_id\": 5020, \"topic_id\": 1129, \"forum_id\": 8, \"post_subject\": \"Re: Unspary fails with filename having symbol '&'\", \"username\": \"bforeman\", \"post_text\": \"After digging a little more, it might be a simple matter of what the Linux file system supports. How would you normally write a filename with an ampersand character to a Linux folder? Because we are using a web service to despray, the escape character causes an error (\\\\&) - it's probably a best practice to avoid it's use and substitute the & for the word "and". \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-12-03 15:49:41\" },\n\t{ \"post_id\": 5014, \"topic_id\": 1129, \"forum_id\": 8, \"post_subject\": \"Re: Unspary fails with filename having symbol '&'\", \"username\": \"bforeman\", \"post_text\": \"Here's the link:\\n\\nhttps://track.hpccsystems.com/secure/Dashboard.jspa\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-12-02 20:36:26\" },\n\t{ \"post_id\": 5013, \"topic_id\": 1129, \"forum_id\": 8, \"post_subject\": \"Re: Unspary fails with filename having symbol '&'\", \"username\": \"deben18\", \"post_text\": \"Where can I find Issue Tracking System? I never heard of it. thanks...Deb\", \"post_time\": \"2013-12-02 20:34:33\" },\n\t{ \"post_id\": 5012, \"topic_id\": 1129, \"forum_id\": 8, \"post_subject\": \"Re: Unspary fails with filename having symbol '&'\", \"username\": \"bforeman\", \"post_text\": \"Looks like a bug in the DFU - I tried despraying in the ECL Watch and using an ampersand in the file name to despray it also fails with the same error. Can you please log this as an issue in the Issue Tracking System?\\n\\nThanks!\\n\\nBob\", \"post_time\": \"2013-12-02 20:08:31\" },\n\t{ \"post_id\": 5010, \"topic_id\": 1129, \"forum_id\": 8, \"post_subject\": \"Re: Unspary fails with filename having symbol '&'\", \"username\": \"deben18\", \"post_text\": \"Thanks a lot Bob for looking at my issue. I tried wrapping the text within a double quote but ECL throws same type of error.\\n\\nThanks\\nDeb\", \"post_time\": \"2013-12-02 20:03:41\" },\n\t{ \"post_id\": 4993, \"topic_id\": 1129, \"forum_id\": 8, \"post_subject\": \"Re: Unspary fails with filename having symbol '&'\", \"username\": \"bforeman\", \"post_text\": \"Hi Deb,\\n\\nDid you try wrapping the filename around quotes? \\n\\n '"Sanctions & Enforcements.xml"' \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-12-02 16:10:43\" },\n\t{ \"post_id\": 4977, \"topic_id\": 1129, \"forum_id\": 8, \"post_subject\": \"Unspary fails with filename having symbol '&'\", \"username\": \"deben18\", \"post_text\": \"Hi,\\n\\nI am trying to unspray a file with second parameter as filename like 'Sanctions & Enforcements.xml' and it throws following error:\\n\\nError: System error: 0: DFUServer Error Failed: invalid escaped sequence (0, 0), 0,\\n\\nCan you please what is the way around?\\n\\nThanks\\nDeb\", \"post_time\": \"2013-11-27 14:48:50\" },\n\t{ \"post_id\": 5008, \"topic_id\": 1130, \"forum_id\": 8, \"post_subject\": \"Re: COMPRESSED results\", \"username\": \"rtaylor\", \"post_text\": \"arun,\\n\\nThe icon indicates a compressed file, but the size displayed is always the UNcompressed amount of data. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-12-02 19:14:20\" },\n\t{ \"post_id\": 4978, \"topic_id\": 1130, \"forum_id\": 8, \"post_subject\": \"COMPRESSED results\", \"username\": \"arun\", \"post_text\": \"I want to compress my output to save the disk space and i compressed 100GB flat file.\\nActually file was compressed but still file size was same.\\n\\nFor testing purpose, i have taken the small Datasets with repeated letters\\n\\n
MyRec := {STRING1 Value1,STRING1 Value2, INTEGER1 Value3};\\nSomeFile := DATASET([{'appleapple','appleapple',1},{'appleapple','appleapple',2},{'appleapple','appleapple',3},\\n {'appleapple','appleapple',4},{'appleapple','appleapple',5}],MyRec);\\nOUTPUT(SomeFile,,'~arun::temp::comp3',NAMED('Fred'),OVERWRITE);\\nOUTPUT(SomeFile,,'~arun::temp::comp4',NAMED('Fred1'),COMPRESSED,OVERWRITE);
\\n\\nBut still there no reduction in the size and i have the attached the results snapshots.\\n\\nLet me know, where i went wrong.\", \"post_time\": \"2013-11-27 17:17:23\" },\n\t{ \"post_id\": 5094, \"topic_id\": 1149, \"forum_id\": 8, \"post_subject\": \"Re: RemoveSuperFile - fails\", \"username\": \"abhisr\", \"post_text\": \"Thanks Sameer,\\n\\nI rewrote my code into two functions and ran them sequentially.\\n\\nFirst I found all the LogicalFileSuperowners STD.File.LogicalFileSuperowners(fileName) and then I removed the relation using STD.File.RemoveSuperFile.\\n\\nSecond I used DeleteSuperFile library function and which deletes the superfile along with sub file.\\n\\n\\nRegards\\nAbhilash\", \"post_time\": \"2013-12-26 14:31:50\" },\n\t{ \"post_id\": 5093, \"topic_id\": 1149, \"forum_id\": 8, \"post_subject\": \"Re: RemoveSuperFile - fails\", \"username\": \"sameermsc\", \"post_text\": \"Hi Abhi,\\n\\nIf i understand your requirement correctly, you want to remove the subfiles from all of the superfiles to which they are associated.\\n\\nYour previous code should give you the desired result, since the sub file -> super file relation is already specified in '~thor::test::ab1::relFile'\\n\\nso, remove below line from your code\\n STD.File.RemoveFileRelationship(superFile, subFile)
\\n\\nRegards,\\nSameer\", \"post_time\": \"2013-12-26 10:39:23\" },\n\t{ \"post_id\": 5066, \"topic_id\": 1149, \"forum_id\": 8, \"post_subject\": \"Re: RemoveSuperFile - fails\", \"username\": \"abhisr\", \"post_text\": \"Thanks sameer, the solution worked fine for the sample .\\n\\nNow I have a multiple files associated to a super file. So i tried to remove the relationship between them before removing the super file.\\nI have the whole set of data written to a file which has the list of subfiles and their related super files, so subfile1 -> SuperFile1, subfile1 -> SuperFile2, subfile2 -> SuperFile2; like that.\\n\\ni have the sames data that I wrote as inline data set in my above code as output file , but this time its available in a file, when I changed to its throwing this error \\nNOTHOR(APPLY(mySubSupeFiles,RemoveRelation(mySubSupeFiles.superFile,mySubSupeFiles.subFile)));\\n
\\nnow its throwing something invalid inside NOTHOR error\\n[color=#FF0000:2de2on1q]Error: INTERNAL: Expected a parent/container context. Likely to be caused by executing something invalid inside a NOTHOR. (0, 0), 4818,
\\n\\n\\n\\nRegards\\nAbhi\\n\\n\\nSHARED supSub_Lay := {STRING70 SubFile,STRING70 superFile};\\nEXPORT toDetachDS := DATASET('~thor::test::ab1::relFile', supSub_Lay,THOR);\\n\\n\\n\\n EXPORT RemoveRelation(STRING superFile,STRING subFile ) := FUNCTION\\n\\nRETURN SEQUENTIAL(\\n\\t\\tSTD.File.StartSuperFileTransaction(),\\t\\n\\t\\tSTD.File.RemoveFileRelationship(superFile, subFile),\\n\\t\\tSTD.File.RemoveSuperFile(superFile,subFile),\\n\\t\\tSTD.File.FinishSuperFileTransaction()\\n\\t );\\n\\t \\n\\t \\nEND;\\n\\ntoDetachDS;\\n\\nNOTHOR(APPLY(toDetachDS,RemoveRelation(toDetachDS.superFile,toDetachDS.subFile)));\\n\\n\\n
\", \"post_time\": \"2013-12-18 20:46:24\" },\n\t{ \"post_id\": 5064, \"topic_id\": 1149, \"forum_id\": 8, \"post_subject\": \"Re: RemoveSuperFile - fails\", \"username\": \"sameermsc\", \"post_text\": \"Hi,\\n\\nCalling APPLY inside NOTHOR will fix this\\n\\n\\n NOTHOR(APPLY(mySubSupeFiles,RemoveRelation(mySubSupeFiles.superFile,mySubSupeFiles.subFile)));\\n\\n
\\n\\nRegards,\\nSameer\", \"post_time\": \"2013-12-18 11:32:07\" },\n\t{ \"post_id\": 5060, \"topic_id\": 1149, \"forum_id\": 8, \"post_subject\": \"RemoveSuperFile - fails\", \"username\": \"abhisr\", \"post_text\": \"I have a data set that contains a pair of subfiles, superfiles that are attached. I want to remove the sub file from the super file.\\n\\nI have written a standalone sample code for this .\\n\\n\\n\\nSHARED logFile1 := '~thor::test::hp1::logicalfile1';\\nSHARED logFile2 := '~thor::test::hp1::logicalfile2';\\n\\nCreateLogicalFiles(STRING l1, STRING l2):= FUNCTION\\nREC := record\\n\\tstring player;\\n\\tinteger8 Score;\\n\\tstring CenturyAgainst;\\n\\tinteger8 StrikeRate;\\n\\tinteger8 Year;\\nend;\\n\\nDS1 := DATASET([{'Sehwag',219,'West Indies',157,2011}],REC);\\nDS2 := DATASET([{'Coventry',219,'Bangladesh',157,2009}],REC);\\t\\n\\na := OUTPUT(DS1,,l1,OVERWRITE);\\nb := OUTPUT(DS2,,l2,OVERWRITE);\\n\\nRETURN SEQUENTIAL(a,b);\\n\\nEND;\\n\\nCreateLogicalFiles(logFile1,logFile2);\\n\\nSHARED supFile1 := '~thor::test::hp1::superfile1';\\nSTD.File.CreateSuperFile(supFile1);\\n\\nSHARED supFile2 := '~thor::test::hp1::superfile2';\\nSTD.File.CreateSuperFile(supFile2);\\n\\n\\nSEQUENTIAL(\\n STD.File.StartSuperFileTransaction(),\\n STD.File.AddSuperFile(supFile1,logFile1),\\n STD.File.FinishSuperFileTransaction()\\n);\\n\\nSEQUENTIAL(\\n STD.File.StartSuperFileTransaction(),\\n STD.File.AddSuperFile(supFile2,logFile2),\\n STD.File.FinishSuperFileTransaction()\\n);\\n\\nSHARED supSub_Lay := {STRING70 SubFile,STRING70 superFile};\\n\\nSHARED mySubSupeFiles:= DATASET([{logFile1,supFile1},{logFile2,supFile2}],supSub_Lay);\\n\\nEXPORT RemoveRelation(STRING superFile, STRING subFile) := FUNCTION\\n\\nRETURN SEQUENTIAL(\\n\\t\\tSTD.File.StartSuperFileTransaction(),\\t\\t\\n\\t\\tSTD.File.RemoveSuperFile(superFile,subFile),\\n\\t\\tSTD.File.FinishSuperFileTransaction()\\n\\t );\\n\\t \\n\\t \\nEND;\\n\\n/* APPLY(GLOBAL(mySubSupeFiles),NOTHOR(RemoveRelation(mySubSupeFiles.superFile,mySubSupeFiles.subFile))); */\\nAPPLY(mySubSupeFiles,NOTHOR(RemoveRelation(mySubSupeFiles.superFile,mySubSupeFiles.subFile)));\\n\\n\\n\\n
\\n\\nWhen I run this code APPLY(mySubSupeFiles,RemoveRelation(mySubSupeFiles.superFile,mySubSupeFiles.subFile));\\n
it will throw the error [color=#FF0040:qr0kcs0f](61,1) : 4055: Cannot call function startsuperfiletransaction in a non-global context
\\n\\nSo I tried to wrap it in GLOBAL like this APPLY(GLOBAL(mySubSupeFiles),RemoveRelation(mySubSupeFiles.superFile,mySubSupeFiles.subFile));\\n
it will throw the error [color=#FF0040:qr0kcs0f] (470,1) : 2131: mysubsupefiles.superfile - Table mysubsupefiles is not related to GLOBAL(mysubsupefiles)
.\\n\\n\\nHow can i fix this.\", \"post_time\": \"2013-12-17 22:02:55\" },\n\t{ \"post_id\": 5090, \"topic_id\": 1159, \"forum_id\": 8, \"post_subject\": \"Re: UnicodeFind\", \"username\": \"sameermsc\", \"post_text\": \"Hi Shank,\\n\\nrefer to String Handling -> Find section of ECLStandardLibraryReference for details, here is the url\\nhttp://cdn.hpccsystems.com/releases/CE- ... .2.0-1.pdf\\n\\nRegards,\\nSameer\", \"post_time\": \"2013-12-24 09:32:39\" },\n\t{ \"post_id\": 5089, \"topic_id\": 1159, \"forum_id\": 8, \"post_subject\": \"UnicodeFind\", \"username\": \"shank\", \"post_text\": \"Hi,\\n\\ninteger4 lib_unicodelib.UnicodeLib.UnicodeFind(src, tofind, instance)
\\n\\nIn the above unicode function, I'd like to know what do the following represent:\\n\\n1. What does the last parameter instance represent? What would be the corresponding integer values for this parameter?\\n2. What does the function return exactly? Does it return the index in the original string where the tofind string was found?\\n\\nAlso, is there a complete unicode string reference available? I was not able to find this function in the ECL Language reference manual.\\n\\nThanks\\nShank\", \"post_time\": \"2013-12-23 15:26:51\" },\n\t{ \"post_id\": 5101, \"topic_id\": 1162, \"forum_id\": 8, \"post_subject\": \"Re: Question about String handle\", \"username\": \"Leofei\", \"post_text\": \"Does it mean I need to know what will appear after \\\\? BTW, this issue has been solved. We removed \\\\ in UNIX system. Thank you all the same!\\n-Fan\", \"post_time\": \"2014-01-06 18:05:40\" },\n\t{ \"post_id\": 5100, \"topic_id\": 1162, \"forum_id\": 8, \"post_subject\": \"Re: Question about String handle\", \"username\": \"sameermsc\", \"post_text\": \"check this one\\nIMPORT STD;\\n\\nrec := {\\n\\tunsigned id;\\n\\tstring txt;\\n};\\n\\nin_ds := dataset([{1, 'This is a sample te\\\\\\\\xt'},\\n\\t\\t\\t\\t\\t\\t\\t{2, 'This is a\\\\nother sample'},\\n\\t\\t\\t\\t\\t\\t\\t{3, 'This is thi\\\\rd sample'},\\n\\t\\t\\t\\t\\t\\t\\t{4, '\\\\another \\\\\\\\sample su\\\\\\\\bcode'}], rec); \\n\\nstring func(string in_txt) := function\\n\\tin1 := regexreplace('\\\\\\\\a', in_txt, 'a');\\n\\tin2 := regexreplace('\\\\\\\\n', in1, 'n');\\n\\tin3 := regexreplace('\\\\\\\\r', in2, 'r');\\n\\tin4 := regexreplace('\\\\\\\\\\\\\\\\x', in3, 'x');\\t\\t\\n\\tin5 := regexreplace('\\\\\\\\\\\\\\\\s', in4, 's');\\t\\t\\n\\tin6 := regexreplace('\\\\\\\\\\\\\\\\b', in5, 'b');\\t\\t\\n\\treturn in6; \\nend;\\nout_ds := project(in_ds, transform(rec,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tself.id := left.id;\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tself.txt := func(left.txt)));\\nin_ds;\\nout_ds;\\t\\t
\\n\\nregards,\\nSameer\", \"post_time\": \"2014-01-06 07:28:55\" },\n\t{ \"post_id\": 5099, \"topic_id\": 1162, \"forum_id\": 8, \"post_subject\": \"Re: Question about String handle\", \"username\": \"Leofei\", \"post_text\": \"What about if it's a field in a dataset? I want to remove \\\\ in one field of a dataset. It seems this way will treat the variable name as a regular text.\", \"post_time\": \"2014-01-03 15:31:26\" },\n\t{ \"post_id\": 5098, \"topic_id\": 1162, \"forum_id\": 8, \"post_subject\": \"Re: Question about String handle\", \"username\": \"sameermsc\", \"post_text\": \"Hi,\\n\\nthe sample text contains an escape character '\\\\b', to treat it as a regular text use #TEXT template\\n\\n\\nIMPORT STD;\\n\\nout1 := STD.Str.FindReplace(#TEXT('a\\\\bc'), '\\\\\\\\', '');\\nout2 := regexreplace('\\\\\\\\\\\\\\\\', #TEXT('a\\\\bc'), '');\\noutput(out1);\\noutput(out2);
\\n\\nHope this helps\\n\\nRegards,\\nSameer\", \"post_time\": \"2014-01-03 07:39:57\" },\n\t{ \"post_id\": 5097, \"topic_id\": 1162, \"forum_id\": 8, \"post_subject\": \"Question about String handle\", \"username\": \"Leofei\", \"post_text\": \"Hi,\\n\\nI have a question regarding STD.Str.FindReplace function. How can I remove the \\\\ sign in a string? For example, \\n\\nIMPORT STD;\\nSTD.Str.FindReplace('a\\\\bc','\\\\\\\\','');
\\n\\nThanks a lot!\\n\\n-Fan\", \"post_time\": \"2014-01-02 23:00:32\" },\n\t{ \"post_id\": 5110, \"topic_id\": 1163, \"forum_id\": 8, \"post_subject\": \"Re: Error: More than 10000 match candidates in keyed join fo\", \"username\": \"bforeman\", \"post_text\": \"Thanks for the example, I think that the issue might be your JOIN condition, and the fact that the index in the right recordset does not contain all of the fields in your condition. I'm glad that setting limits has removed the error.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-01-08 14:46:47\" },\n\t{ \"post_id\": 5109, \"topic_id\": 1163, \"forum_id\": 8, \"post_subject\": \"Re: Error: More than 10000 match candidates in keyed join fo\", \"username\": \"balajisampath\", \"post_text\": \"Thanks for reply Bob,\\n\\nHere is the sample code. I am running in THOR and as of now able to get rid of this using LIMIT and SKIP\\n\\nSMPLRECCD :=RECORD\\n string4 col1.....\\nEND;\\n \\n EXPORT Daily_Scrub_DS\\t\\t:= DATASET ('~thor::filename',SMPLRECCD,THOR,OPT);\\n EXPORT Daily_Scrub_CNTCD\\t:= COUNT(Daily_Scrub_DS);\\n EXPORT MySampleSetCD \\t\\t:= DISTRIBUTE(Daily_Scrub_DS,RANDOM());\\n EXPORT SMPL_DSCD \\t\\t\\t:= ENTH(MySampleSetCD,5,10,1,LOCAL)[1..(Daily_Scrub_CNTCD*10)/100];\\n\\nREC013 := RECORD\\n string6 col1...;\\nEND; \\n\\nDS99999998_13 \\t:= DATASET([],REC013);\\nIdx99999998_13 \\t:= INDEX(DS99999998_13,{col1,col2},{DS99999998_13},'~foreign::filename');\\nSMPLJOIN13\\t\\t:= JOIN(SMPL_DSCD,Idx99999998_13,\\n\\t\\t\\t\\t\\t\\tLEFT.col1=RIGHT.col1 AND \\n\\t\\t\\t\\t\\t\\tLEFT.col3=RIGHT.col3 AND \\n\\t\\t\\t\\t\\t\\tLEFT.col4=RIGHT.col4 AND \\n\\t\\t\\t\\t\\t\\tLEFT.col5=RIGHT.col5,\\n\\t\\t\\t\\t\\t\\tKEEP(10000));\\n
\", \"post_time\": \"2014-01-08 14:29:41\" },\n\t{ \"post_id\": 5108, \"topic_id\": 1163, \"forum_id\": 8, \"post_subject\": \"Re: Error: More than 10000 match candidates in keyed join fo\", \"username\": \"bforeman\", \"post_text\": \"Can you please post your ECL code? The 10000 error means that your results are not limited, are you running this on THOR or as a ROXIE query?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-01-07 22:21:03\" },\n\t{ \"post_id\": 5102, \"topic_id\": 1163, \"forum_id\": 8, \"post_subject\": \"Error: More than 10000 match candidates in keyed join for..\", \"username\": \"balajisampath\", \"post_text\": \"I am getting the below error while joining two datasets (one data file and index file)\\n\\nError: System error: 0: Graph[4], keyedjoin[10]: SLAVE 10.194.10.16:16600: More than 10000 match candidates in keyed join for row........ \\n\\nThe index file has two key columns and several non key columns. I am using only one key column and some non key columns in the join.\\nIf I use just one key column alone in join without non key columns its working fine.\\n\\nI tried using KEEP inside JOIN and still it fails with same message.\\nNot sure if i am missing something. Can someone please help me to understand what is wrong?\", \"post_time\": \"2014-01-06 19:35:24\" },\n\t{ \"post_id\": 5194, \"topic_id\": 1170, \"forum_id\": 8, \"post_subject\": \"Re: DFU SOAP call clarification\", \"username\": \"balajisampath\", \"post_text\": \"I tried with the modified code as suggested and its not working.\\nThe index file i tried has 51 columns with 50 key columns and 1 non key columns,SOAP call returns 22 key columns and 1 non key columns rest of the columns are missing\\n\\nPlease send me your email address to balaji.sampath@lexisnexis.com and I shall share the file name and location if you want to have a look at.\\n\\nThanks,\\nBalaji\", \"post_time\": \"2014-02-11 19:07:21\" },\n\t{ \"post_id\": 5177, \"topic_id\": 1170, \"forum_id\": 8, \"post_subject\": \"Re: DFU SOAP call clarification\", \"username\": \"balajisampath\", \"post_text\": \"Thanks for looking into this issue,\\n\\nI remember that columns were missing. Because I did a count of columns and it didn't match. Anyway I shall test the modified code soon and post the results.\\n\\nThanks,\\nBalaji\", \"post_time\": \"2014-02-05 20:48:17\" },\n\t{ \"post_id\": 5175, \"topic_id\": 1170, \"forum_id\": 8, \"post_subject\": \"Re: DFU SOAP call clarification\", \"username\": \"kevin.wang@lexisnexis.com\", \"post_text\": \"Richard Taylor added a comment - 04/Feb/14 5:00 PM - edited\\nBalaji,\\nIn testing your code, I found that the order of the fields being returned was not correct, because you are using the + append operator to append all your TABLEs, which does not guarantee maintenance of the order. I changed them all to the & append operator (which does maintain order) and my 29+ field INDEXes all came out correctly. It is possible that your problem is not missing fields, but mis-ordered fields.\\nTry making these changes to your code, please, and let us know how it works:\\nKeyColumns := KeyColumns1 & KeyColumns2 & KeyColumns3 & KeyColumns4 & KeyColumns5 &\\nKeyColumns6 & KeyColumns7 & KeyColumns8 & KeyColumns9 & KeyColumns10 &\\nKeyColumns11 & KeyColumns12 & KeyColumns13 & KeyColumns14 & KeyColumns15 &\\nKeyColumns16 & KeyColumns17 & KeyColumns18 & KeyColumns19 & KeyColumns20;\\nNonKeyColumns := NonKeyColumns1 & NonKeyColumns2 & NonKeyColumns3 & NonKeyColumns4 & NonKeyColumns5 &\\nNonKeyColumns6 & NonKeyColumns7 & NonKeyColumns8 & NonKeyColumns9 & NonKeyColumns10 &\\nNonKeyColumns11 & NonKeyColumns12 & NonKeyColumns13 & NonKeyColumns14 & NonKeyColumns15 &\\nNonKeyColumns16 & NonKeyColumns17 & NonKeyColumns18 & NonKeyColumns19 & NonKeyColumns20;\\nResult := IF(include_fpos = 'N', KeyColumns & NonKeyColumns (ColumnLabel <> 'fpos' AND ColumnLabel <> 'fileposition' AND ColumnLabel <> 'internal_fpos_'),\\nKeyColumns & NonKeyColumns );\", \"post_time\": \"2014-02-05 13:33:50\" },\n\t{ \"post_id\": 5138, \"topic_id\": 1170, \"forum_id\": 8, \"post_subject\": \"Re: DFU SOAP call clarification\", \"username\": \"balajisampath\", \"post_text\": \"Thank You Richard,\\n\\nCreated ticket in JIRA https://track.hpccsystems.com/browse/HPCC-10648\\n\\nThanks,\\nBalaji\", \"post_time\": \"2014-01-16 15:46:09\" },\n\t{ \"post_id\": 5136, \"topic_id\": 1170, \"forum_id\": 8, \"post_subject\": \"Re: DFU SOAP call clarification\", \"username\": \"rtaylor\", \"post_text\": \"If your code works on some files and not others, then it is probably an issue that needs to be fixed. Since your problem is with using SOAPCALL to talk to "internal" (as in non-ECL) DFU processes, I suggest you post this problem as an issue in JIRA. \\n\\nBy posting it to JIRA yourself you automatically are included in the discussion and can easily track the progress of the issue.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-01-16 14:48:13\" },\n\t{ \"post_id\": 5126, \"topic_id\": 1170, \"forum_id\": 8, \"post_subject\": \"DFU SOAP call clarification\", \"username\": \"balajisampath\", \"post_text\": \"I am using DFU SOAP calls to get the index file metadata(layout/column details).Please refer the code attached.\\n\\nSome of key columns are not returned by the soap call when the file has more than 20 key columns.Otherwise it works fine. I couldn't find any other pattern other than this.\\n\\nThe "Result{XPATH('Result')" column displays all the columns in xml format as string\\nbut not in DFUDataKeyedColumns.\\n\\nIf someone has used this SOAP call please help me to identify the issue.\\n\\n\\n/*\\n\\nFunction to return Columns with datatypes of the given INDEX/DATA (logical file name)\\n\\nINPUT : Logical File name with IP,PORT,CLUSTER\\nOUTPUT : Columns with data types and indicator\\n\\n*/\\nEXPORT fn_GetCoulmnInfo(STRING IP ='',\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tSTRING port_= '8010',\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tSTRING FileName = '', \\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tSTRING Cluster = '',\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tSTRING include_fpos = 'N') := FUNCTION\\n\\n// Build the soap URL\\nIMPORT STD;\\nSTRING URl \\t\\t:= IP+':'+(STRING)port_+'/WsDfu/';\\nSTRING fName \\t:= IF(Cluster <>'',Cluster+'::'+FileName,FileName);\\n\\n\\n// SOAP request data structure\\nDFUSearchDataRequest := \\n\\t\\tRECORD\\n\\t\\t\\tSTRING OpenLogicalName{XPATH('OpenLogicalName')} := fName;\\n\\t\\t\\tSTRING Cluster{XPATH('Cluster')} := '';\\n\\t\\tEND;\\n\\n// SOAP result exception data structure\\n ESPExceptions_Lay :=\\n\\t\\tRECORD\\n\\t\\t\\t\\tSTRING Code{XPATH('Code')};\\n\\t\\t\\t\\tSTRING Audience{XPATH('Audience')};\\n\\t\\t\\t\\tSTRING Source{XPATH('Source')};\\n\\t\\t\\t\\tSTRING Message{XPATH('Message')};\\n\\t\\tEND;\\n\\n// SOAP result data structure\\t\\t\\nDFUDataColumn_Lay := RECORD \\t\\n\\n\\t STRING \\tColumnLabel{XPATH('ColumnLabel')};\\n STRING \\tColumnType{XPATH('ColumnType')};\\n\\t STRING \\tColumnValue{XPATH('ColumnValue')};\\n\\t INTEGER \\tColumnSize{XPATH('ColumnSize')};\\n\\t INTEGER\\tMaxSize{XPATH('MaxSize')};\\n\\t\\t \\nEND;\\n\\t\\t\\nDFUDataCols :=RECORD \\n\\n\\tDATASET (DFUDataColumn_Lay) DFUDataColumn {XPATH ('DFUDataColumn')};\\n\\n\\nEND;\\nDFUSearchDataResponse := RECORD\\n\\t\\n\\t\\tSTRING \\tLogicalName{XPATH('LogicalName')};\\n\\t\\tINTEGER Total{XPATH('Total')};\\n\\t\\tSTRING Cluster{XPATH('Cluster')};\\n\\t\\tSTRING \\t\\tParentName{XPATH('ParentName')};\\n\\t\\tINTEGER \\tStartIndex{XPATH('StartIndex')};\\n\\t\\tINTEGER \\tEndIndex{XPATH('EndIndex')};\\n\\t\\tSTRING \\t\\tResult{XPATH('Result')};\\n\\t\\t\\n DATASET(ESPExceptions_Lay) Exceptions{XPATH('Exceptions/ESPException')};\\n\\t\\t\\n\\t\\tDATASET(DFUDataCols) DFUDataKeyedColumns1 {XPATH('DFUDataKeyedColumns1')};\\n\\t\\tDATASET(DFUDataCols) DFUDataKeyedColumns2 {XPATH('DFUDataKeyedColumns2')};\\t\\n\\t\\tDATASET(DFUDataCols) DFUDataKeyedColumns3 {XPATH('DFUDataKeyedColumns3')};\\t\\n\\t\\tDATASET(DFUDataCols) DFUDataKeyedColumns4 {XPATH('DFUDataKeyedColumns4')};\\t\\n\\t\\tDATASET(DFUDataCols) DFUDataKeyedColumns5 {XPATH('DFUDataKeyedColumns5')};\\n\\t\\tDATASET(DFUDataCols) DFUDataKeyedColumns6 {XPATH('DFUDataKeyedColumns6')};\\n\\t\\tDATASET(DFUDataCols) DFUDataKeyedColumns7 {XPATH('DFUDataKeyedColumns7')};\\t\\n\\t\\tDATASET(DFUDataCols) DFUDataKeyedColumns8 {XPATH('DFUDataKeyedColumns8')};\\t\\n\\t\\tDATASET(DFUDataCols) DFUDataKeyedColumns9 {XPATH('DFUDataKeyedColumns9')};\\t\\n\\t\\tDATASET(DFUDataCols) DFUDataKeyedColumns10 {XPATH('DFUDataKeyedColumns10')};\\t\\t\\n\\t\\tDATASET(DFUDataCols) DFUDataKeyedColumns11 {XPATH('DFUDataKeyedColumns11')};\\n\\t\\tDATASET(DFUDataCols) DFUDataKeyedColumns12 {XPATH('DFUDataKeyedColumns12')};\\t\\n\\t\\tDATASET(DFUDataCols) DFUDataKeyedColumns13 {XPATH('DFUDataKeyedColumns13')};\\t\\n\\t\\tDATASET(DFUDataCols) DFUDataKeyedColumns14 {XPATH('DFUDataKeyedColumns14')};\\t\\n\\t\\tDATASET(DFUDataCols) DFUDataKeyedColumns15 {XPATH('DFUDataKeyedColumns15')};\\n\\t\\tDATASET(DFUDataCols) DFUDataKeyedColumns16 {XPATH('DFUDataKeyedColumns16')};\\n\\t\\tDATASET(DFUDataCols) DFUDataKeyedColumns17 {XPATH('DFUDataKeyedColumns17')};\\t\\n\\t\\tDATASET(DFUDataCols) DFUDataKeyedColumns18 {XPATH('DFUDataKeyedColumns18')};\\t\\n\\t\\tDATASET(DFUDataCols) DFUDataKeyedColumns19 {XPATH('DFUDataKeyedColumns19')};\\t\\n\\t\\tDATASET(DFUDataCols) DFUDataKeyedColumns20 {XPATH('DFUDataKeyedColumns20')};\\t\\t\\n\\t\\t\\n\\t\\tDATASET(DFUDataCols) DFUDataNonKeyedColumns1{XPATH('DFUDataNonKeyedColumns1')};\\n\\t\\tDATASET(DFUDataCols) DFUDataNonKeyedColumns2{XPATH('DFUDataNonKeyedColumns2')};\\n\\t\\tDATASET(DFUDataCols) DFUDataNonKeyedColumns3{XPATH('DFUDataNonKeyedColumns3')};\\n\\t\\tDATASET(DFUDataCols) DFUDataNonKeyedColumns4{XPATH('DFUDataNonKeyedColumns4')};\\n\\t\\tDATASET(DFUDataCols) DFUDataNonKeyedColumns5{XPATH('DFUDataNonKeyedColumns5')};\\n\\t\\tDATASET(DFUDataCols) DFUDataNonKeyedColumns6{XPATH('DFUDataNonKeyedColumns6')};\\n\\t\\tDATASET(DFUDataCols) DFUDataNonKeyedColumns7{XPATH('DFUDataNonKeyedColumns7')};\\n\\t\\tDATASET(DFUDataCols) DFUDataNonKeyedColumns8{XPATH('DFUDataNonKeyedColumns8')};\\n\\t\\tDATASET(DFUDataCols) DFUDataNonKeyedColumns9{XPATH('DFUDataNonKeyedColumns9')};\\n\\t\\tDATASET(DFUDataCols) DFUDataNonKeyedColumns10{XPATH('DFUDataNonKeyedColumns10')};\\n\\t\\tDATASET(DFUDataCols) DFUDataNonKeyedColumns11{XPATH('DFUDataNonKeyedColumns11')};\\n\\t\\tDATASET(DFUDataCols) DFUDataNonKeyedColumns12{XPATH('DFUDataNonKeyedColumns12')};\\n\\t\\tDATASET(DFUDataCols) DFUDataNonKeyedColumns13{XPATH('DFUDataNonKeyedColumns13')};\\n\\t\\tDATASET(DFUDataCols) DFUDataNonKeyedColumns14{XPATH('DFUDataNonKeyedColumns14')};\\n\\t\\tDATASET(DFUDataCols) DFUDataNonKeyedColumns15{XPATH('DFUDataNonKeyedColumns15')};\\n\\t\\tDATASET(DFUDataCols) DFUDataNonKeyedColumns16{XPATH('DFUDataNonKeyedColumns16')};\\n\\t\\tDATASET(DFUDataCols) DFUDataNonKeyedColumns17{XPATH('DFUDataNonKeyedColumns17')};\\n\\t\\tDATASET(DFUDataCols) DFUDataNonKeyedColumns18{XPATH('DFUDataNonKeyedColumns18')};\\n\\t\\tDATASET(DFUDataCols) DFUDataNonKeyedColumns19{XPATH('DFUDataNonKeyedColumns19')};\\n\\t\\tDATASET(DFUDataCols) DFUDataNonKeyedColumns20{XPATH('DFUDataNonKeyedColumns20')};\\nEND;\\n\\nDFUSearchDataSoapCall := SOAPCALL(URl\\n\\t\\t\\t\\t,'DFUSearchData'\\n\\t\\t\\t\\t,DFUSearchDataRequest\\n\\t\\t\\t\\t,DFUSearchDataResponse\\n\\t\\t\\t\\t,XPATH('DFUSearchDataResponse')\\n\\t\\t\\t\\t);\\n\\nKeyColumns1 := TABLE(DFUSearchDataSoapCall.DFUDataKeyedColumns1.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'Y'});\\nKeyColumns2 := TABLE(DFUSearchDataSoapCall.DFUDataKeyedColumns2.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'Y'});\\nKeyColumns3 := TABLE(DFUSearchDataSoapCall.DFUDataKeyedColumns3.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'Y'});\\nKeyColumns4 := TABLE(DFUSearchDataSoapCall.DFUDataKeyedColumns4.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'Y'});\\nKeyColumns5 := TABLE(DFUSearchDataSoapCall.DFUDataKeyedColumns5.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'Y'});\\nKeyColumns6 := TABLE(DFUSearchDataSoapCall.DFUDataKeyedColumns6.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'Y'});\\nKeyColumns7 := TABLE(DFUSearchDataSoapCall.DFUDataKeyedColumns7.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'Y'});\\nKeyColumns8 := TABLE(DFUSearchDataSoapCall.DFUDataKeyedColumns8.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'Y'});\\nKeyColumns9 := TABLE(DFUSearchDataSoapCall.DFUDataKeyedColumns9.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'Y'});\\nKeyColumns10 := TABLE(DFUSearchDataSoapCall.DFUDataKeyedColumns10.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'Y'});\\nKeyColumns11 := TABLE(DFUSearchDataSoapCall.DFUDataKeyedColumns11.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'Y'});\\nKeyColumns12 := TABLE(DFUSearchDataSoapCall.DFUDataKeyedColumns12.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'Y'});\\nKeyColumns13 := TABLE(DFUSearchDataSoapCall.DFUDataKeyedColumns13.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'Y'});\\nKeyColumns14 := TABLE(DFUSearchDataSoapCall.DFUDataKeyedColumns14.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'Y'});\\nKeyColumns15 := TABLE(DFUSearchDataSoapCall.DFUDataKeyedColumns15.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'Y'});\\nKeyColumns16 := TABLE(DFUSearchDataSoapCall.DFUDataKeyedColumns16.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'Y'});\\nKeyColumns17 := TABLE(DFUSearchDataSoapCall.DFUDataKeyedColumns17.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'Y'});\\nKeyColumns18 := TABLE(DFUSearchDataSoapCall.DFUDataKeyedColumns18.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'Y'});\\nKeyColumns19 := TABLE(DFUSearchDataSoapCall.DFUDataKeyedColumns19.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'Y'});\\nKeyColumns20 := TABLE(DFUSearchDataSoapCall.DFUDataKeyedColumns20.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'Y'});\\n\\n\\nNonKeyColumns1 := TABLE(DFUSearchDataSoapCall.DFUDataNonKeyedColumns1.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'N'});\\nNonKeyColumns2 := TABLE(DFUSearchDataSoapCall.DFUDataNonKeyedColumns2.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'N'});\\nNonKeyColumns3 := TABLE(DFUSearchDataSoapCall.DFUDataNonKeyedColumns3.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'N'});\\nNonKeyColumns4 := TABLE(DFUSearchDataSoapCall.DFUDataNonKeyedColumns4.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'N'});\\nNonKeyColumns5 := TABLE(DFUSearchDataSoapCall.DFUDataNonKeyedColumns5.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'N'});\\nNonKeyColumns6 := TABLE(DFUSearchDataSoapCall.DFUDataNonKeyedColumns6.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'N'});\\nNonKeyColumns7 := TABLE(DFUSearchDataSoapCall.DFUDataNonKeyedColumns7.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'N'});\\nNonKeyColumns8 := TABLE(DFUSearchDataSoapCall.DFUDataNonKeyedColumns8.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'N'});\\nNonKeyColumns9 := TABLE(DFUSearchDataSoapCall.DFUDataNonKeyedColumns9.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'N'});\\nNonKeyColumns10 := TABLE(DFUSearchDataSoapCall.DFUDataNonKeyedColumns10.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'N'});\\nNonKeyColumns11 := TABLE(DFUSearchDataSoapCall.DFUDataNonKeyedColumns11.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'N'});\\nNonKeyColumns12 := TABLE(DFUSearchDataSoapCall.DFUDataNonKeyedColumns12.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'N'});\\nNonKeyColumns13 := TABLE(DFUSearchDataSoapCall.DFUDataNonKeyedColumns13.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'N'});\\nNonKeyColumns14 := TABLE(DFUSearchDataSoapCall.DFUDataNonKeyedColumns14.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'N'});\\nNonKeyColumns15 := TABLE(DFUSearchDataSoapCall.DFUDataNonKeyedColumns15.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'N'});\\nNonKeyColumns16 := TABLE(DFUSearchDataSoapCall.DFUDataNonKeyedColumns16.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'N'});\\nNonKeyColumns17 := TABLE(DFUSearchDataSoapCall.DFUDataNonKeyedColumns17.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'N'});\\nNonKeyColumns18 := TABLE(DFUSearchDataSoapCall.DFUDataNonKeyedColumns18.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'N'});\\nNonKeyColumns19 := TABLE(DFUSearchDataSoapCall.DFUDataNonKeyedColumns19.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'N'});\\nNonKeyColumns20 := TABLE(DFUSearchDataSoapCall.DFUDataNonKeyedColumns20.DFUDataColumn,{ColumnLabel,ColumnType,KeyCol := 'N'});\\n\\nKeyColumns := KeyColumns1 + KeyColumns2 + KeyColumns3 + KeyColumns4 + KeyColumns5+\\n\\t\\t\\t\\tKeyColumns6 + KeyColumns7 + KeyColumns8 + KeyColumns9 + KeyColumns10 +\\n\\t\\t\\t\\tKeyColumns11 + KeyColumns12 + KeyColumns13 + KeyColumns14 + KeyColumns15+\\n\\t\\t\\t\\tKeyColumns16 + KeyColumns17 + KeyColumns18 + KeyColumns19 + KeyColumns20;\\n\\nNonKeyColumns := NonKeyColumns1 + NonKeyColumns2 + NonKeyColumns3 + NonKeyColumns4 + NonKeyColumns5+\\n\\t\\t\\t\\t\\t\\t\\t\\t NonKeyColumns6 + NonKeyColumns7 + NonKeyColumns8 + NonKeyColumns9 + NonKeyColumns10+\\n\\t\\t\\t\\t\\t\\t\\t\\t NonKeyColumns11 + NonKeyColumns12 + NonKeyColumns13 + NonKeyColumns14 + NonKeyColumns15+\\n\\t\\t\\t\\t\\t\\t\\t\\t NonKeyColumns16 + NonKeyColumns17 + NonKeyColumns18 + NonKeyColumns19 + NonKeyColumns20;\\n\\nResult := IF(include_fpos = 'N', KeyColumns + NonKeyColumns (ColumnLabel <> '_fpos' AND ColumnLabel <> '__fileposition__' AND ColumnLabel <> '__internal_fpos__'),\\n\\t\\t\\t\\t\\t\\t KeyColumns + NonKeyColumns );\\nRETURN Result ;\\n\\nEND;
\", \"post_time\": \"2014-01-14 23:49:10\" },\n\t{ \"post_id\": 5137, \"topic_id\": 1171, \"forum_id\": 8, \"post_subject\": \"Re: How to get index file information using ECL command?\", \"username\": \"rtaylor\", \"post_text\": \"Balaji,\\n\\nReply posted to your other thread.\\n\\nRichard\", \"post_time\": \"2014-01-16 14:49:12\" },\n\t{ \"post_id\": 5131, \"topic_id\": 1171, \"forum_id\": 8, \"post_subject\": \"Re: How to get index file information using ECL command?\", \"username\": \"balajisampath\", \"post_text\": \"Thanks Richard for reply,\\n\\nI need to generate ECL code dynamically to read any given index file and return the results. My input will be just a logical file name. \\n\\nCurrently I am using DFU SOAP calls to get this done but for some files its not working for all index files. Please refer my post http://hpccsystems.com/bb/viewtopic.php?f=8&t=1170&sid=18a2b50b92f6beba3ee1d844f4803292\\n\\nThanks,\\nBalaji\", \"post_time\": \"2014-01-15 21:11:15\" },\n\t{ \"post_id\": 5130, \"topic_id\": 1171, \"forum_id\": 8, \"post_subject\": \"Re: How to get index file information using ECL command?\", \"username\": \"rtaylor\", \"post_text\": \"balajisampath,Is there any ECL function/SOAP call available to get the list of Keys,Non-Key columns and data type of Index files?
Every logical file in an HPCC environment has an entry in the DFU, and you can use ECL Watch to open the Logical File Details page for the file and see all that metadata about the file -- its structure, file size, number of records, etc.\\n\\nYou can also take a look at using #EXPORT and/or #EXPORTXML to generate the structure.\\n\\nWhat exactly are you trying to accomplish?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-01-15 20:09:43\" },\n\t{ \"post_id\": 5128, \"topic_id\": 1171, \"forum_id\": 8, \"post_subject\": \"How to get index file information using ECL command?\", \"username\": \"balajisampath\", \"post_text\": \"Is there any ECL function/SOAP call available to get the list of Keys,Non-Key columns and data type of Index files?\", \"post_time\": \"2014-01-15 15:51:50\" },\n\t{ \"post_id\": 5146, \"topic_id\": 1176, \"forum_id\": 8, \"post_subject\": \"Re: fileservices.renamelogicalfile overwrite option\", \"username\": \"bforeman\", \"post_text\": \"Will pass your request to the development team. It might be a good idea to log your feature request in the Community Issue Tracker.\\n\\nThanks!\\n\\nBob\", \"post_time\": \"2014-01-21 13:24:05\" },\n\t{ \"post_id\": 5142, \"topic_id\": 1176, \"forum_id\": 8, \"post_subject\": \"fileservices.renamelogicalfile overwrite option\", \"username\": \"spmurphy\", \"post_text\": \"Would it be possible to modify the renamelogical file to include an overwrite option?\\n\\nI delete a logical file using fileservices.deletelogical file, and then I rename the new file the same as the original. Often a piece of the logical file is not deleted, so the rename fails. \\n\\nIn our operation, it would be nice if the renamelogical file had an option overwrite.\", \"post_time\": \"2014-01-20 18:24:16\" },\n\t{ \"post_id\": 5151, \"topic_id\": 1179, \"forum_id\": 8, \"post_subject\": \"Re: Job attempts to run DALI call on thorslave\", \"username\": \"jsmith\", \"post_text\": \"Thor slaves are prevented from talking directly to Dali for efficiency reasons (a cluster of slaves all in constant communication with Dali can bring it to it's knees).\\n\\nUsually this error is seen with plugin calls that need Dali access, e.g. fileservice calls as in your case. Typically the attribute involved can be wrapped with a NOTHOR declaration to avoid it executing on the slaves.\\nI hope that helps.\", \"post_time\": \"2014-01-24 14:42:33\" },\n\t{ \"post_id\": 5150, \"topic_id\": 1179, \"forum_id\": 8, \"post_subject\": \"Job attempts to run DALI call on thorslave\", \"username\": \"jgostylo\", \"post_text\": \"I am running community_4.2.0-4 on a cluster. I have a long running job that is set up as a monitor (in the scheduler) and I published the work unit to thor. There are some file system manipulation steps (read a file in the dropzone, move files within the dropzone) and for some reason the cluster tries to run these steps on a thorslave instead of the thormaster.\\n\\nThe job is listening for an event. I push an event via SOAP and I have also tried pushing the event with the ECLWatch page in the Scheduler section with the PushEvent button (which should be the same thing as my SOAP call).\\n\\nThis is the error I am seeing:\\n\\nError: System error: -1: Graph[7], if[14]: SLAVE 10.210.150.115:20100: No access to Dali - this normally means a plugin call is being called from a thorslave, (0, 0), -1, \\n\\nI have verified the ip address is one of the cluster slaves.\\n\\nI tried publishing this job to hthor and everything works. This only happens when the job is pushed to thor.\\n\\nCan someone explain what may cause these DALI calls to execute on a server other than the master?\", \"post_time\": \"2014-01-23 16:33:15\" },\n\t{ \"post_id\": 5154, \"topic_id\": 1181, \"forum_id\": 8, \"post_subject\": \"when SET is used inside a DATASET parameter Roxie query fail\", \"username\": \"clo\", \"post_text\": \"A user reported this issue:\\nhttps://track.hpccsystems.com/browse/HPCC-10694\", \"post_time\": \"2014-01-24 19:58:09\" },\n\t{ \"post_id\": 5174, \"topic_id\": 1186, \"forum_id\": 8, \"post_subject\": \"Re: Reading Dataset Erro in ECL Watch\", \"username\": \"Leofei\", \"post_text\": \"It works. Thanks a lot, Richard!\", \"post_time\": \"2014-02-04 21:29:17\" },\n\t{ \"post_id\": 5171, \"topic_id\": 1186, \"forum_id\": 8, \"post_subject\": \"Re: Reading Dataset Erro in ECL Watch\", \"username\": \"rtaylor\", \"post_text\": \"Fan,\\n\\nI duplicated your issue. The ECL Watch page will not handle viewing an INDEX with a BLOB field. But you can certainly view it through the ECL IDE. Here's example code I wrote to create the INDEX, then display the content:IMPORT STD;\\n\\nNumRecs := 10000;\\nStartDate := STD.Date.FromGregorianDate(STD.Date.Today()) - NumRecs - 1;\\nThisDate(UNSIGNED4 C) := (STRING)STD.Date.ToGregorianDate(StartDate + C);\\n\\nRec := RECORD\\n STRING20 transaction_id;\\n STRING14 date_added;\\n STRING content {BLOB, MAXLENGTH(1000000)};\\nEND;\\n\\nds := DATASET(NumRecs,TRANSFORM(Rec,\\n SELF.Transaction_id := INTFORMAT(COUNTER,20,0),\\n SELF.Date_Added := ThisDate(COUNTER),\\n\\t\\t\\t\\tSELF.content := 'XXX'));\\n\\nidx := INDEX(ds,{transaction_id,date_added},{content},'~RTTEST::KEY::BLOBindex');\\nSEQUENTIAL(OUTPUT(ds), BUILD(idx,OVERWRITE),OUTPUT(idx));
Try it with your index.\\n\\nIn ECL, you can always treat an INDEX as if it were a DATASET if you want to work with the entire set of leaf node records instead of using it to access only a specified few of them.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-02-03 20:50:11\" },\n\t{ \"post_id\": 5170, \"topic_id\": 1186, \"forum_id\": 8, \"post_subject\": \"Re: Reading Dataset Erro in ECL Watch\", \"username\": \"Leofei\", \"post_text\": \"Here is the layout:\\n\\t\\tSTRING20 transaction_id;\\n\\t\\tSTRING14 date_added;\\n\\t\\tSTRING \\t content {BLOB, MAXLENGTH(1000000)};\", \"post_time\": \"2014-02-03 19:58:38\" },\n\t{ \"post_id\": 5168, \"topic_id\": 1186, \"forum_id\": 8, \"post_subject\": \"Re: Reading Dataset Erro in ECL Watch\", \"username\": \"rtaylor\", \"post_text\": \"Fan,\\n\\nWhat is the structure of this INDEX?\\n\\nRichard\", \"post_time\": \"2014-02-03 19:44:15\" },\n\t{ \"post_id\": 5164, \"topic_id\": 1186, \"forum_id\": 8, \"post_subject\": \"Reading Dataset Erro in ECL Watch\", \"username\": \"Leofei\", \"post_text\": \"Hi, when I'm trying to view a KEY file in thor cluster thorough ECL Watch, it prompt the following error:\\n\\nMessage:2014-01-31 23:17:38 GMT: Cannot view complex key 'thor::key::**::**:****'\\n\\nFor most other key files, I can read it in this way, but this one I cannot. Is there any way to read into it through some commands in ECL IDE? Since this is an indexed file, I cannot use DATASET function. Thanks!\\n\\n-Fan\", \"post_time\": \"2014-01-31 23:21:40\" },\n\t{ \"post_id\": 5173, \"topic_id\": 1187, \"forum_id\": 8, \"post_subject\": \"Re: Handle Job or a action in case of a failure\", \"username\": \"rtaylor\", \"post_text\": \"ksviswa,\\n\\nTo test FAILURE, run this code:Rec := RECORD\\n unsigned1 id;\\n string20 txt;\\nEND;\\nds := DATASET(5,TRANSFORM(Rec,SELF.id := COUNTER, SELF.txt := 'ABCD'));\\n// ds := DATASET('ABCD',Rec,FLAT);\\n\\nFailOut := OUTPUT('ouch');\\nCnt := COUNT(ds) : FAILURE(FailOut);\\nCnt;
This should run successfully. \\n\\nNow, to make it fail at runtime, simply comment out the inline dataset and uncomment the DATASET with the non-existent filename. Your second run should result in a system error AND the "ouch" text in the result 1 tab. A more typical use of FAILURE would be to have it send an email alert (as the example in the Language Reference does).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-02-04 15:32:08\" },\n\t{ \"post_id\": 5172, \"topic_id\": 1187, \"forum_id\": 8, \"post_subject\": \"Re: Handle Job or a action in case of a failure\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nI looked at the following workflow services.\\n\\nHow does the FAILURE workflow service work because both the compile time and run time errors are handled by ECL IDE and in which scenario we can effectively test the failure workflow service..?\\n\\nDo we have any sample code for the same..?\\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2014-02-04 09:16:20\" },\n\t{ \"post_id\": 5169, \"topic_id\": 1187, \"forum_id\": 8, \"post_subject\": \"Re: Handle Job or a action in case of a failure\", \"username\": \"rtaylor\", \"post_text\": \"ksviswa,\\n\\nHave you looked at the FAILURE and SUCCESS workflow services?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-02-03 19:45:19\" },\n\t{ \"post_id\": 5165, \"topic_id\": 1187, \"forum_id\": 8, \"post_subject\": \"Handle Job or a action in case of a failure\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nDo we have any way to handle failure scenarios in case any action or job fails..?\\n\\nFor Ex a parse may fail but it should continue with the job instead of terminating from the workunit and continue with the job and perform some other action.\\n\\nI am referring to a try and catch scenario in HPCC ECL..?\\n\\nThere is a CATCH command but that has very specific action in case a recset fails.\\n\\nKindly help.\\n\\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2014-02-03 06:30:10\" },\n\t{ \"post_id\": 5176, \"topic_id\": 1189, \"forum_id\": 8, \"post_subject\": \"Improving the ECL Template Language\", \"username\": \"BenJones\", \"post_text\": \"The ECL Template Language is a useful code generation mechanism built into the ECL language that allows us to generate ECL code based on the structure of previously declared ECL attributes. In its current form, it is limited. I would like to show how much more useful it could be with a few improvements.\\n\\nHere is how it works currently. We have the #export and #exportxml commands:\\n\\n\\n#export(symbol,data)\\n#exportxml(symbol,data)\\n
\\n\\nWhat the #export command does is analyze data, which is an ECL attribute, previously defined as a record, field, or dataset. It generates an XML string representing the structure of that attribute and stores it in the template variable symbol.\\n\\nThe #exportxml command does the same thing except that the symbol doesn't have to have previously been defined with #declare and the loadxml command doesn't have to be invoked in order to use the #for statement.\\n\\nThe XML is generated with the following format:\\n\\n\\n<Data>\\n <Field name="<name-of-field>"\\n type="<ecl-type-without-size>"\\n size="<n>"\\n isRecord="<flag>"\\n isDataset="<flag>"\\n isEnd="<flag>"\\n />\\n ...\\n</Data>\\n
\\n\\nHere is what attributes of <Data> mean:\\n\\n\\n
\\n\\nThe #for command may then be used to walk through the XML and generate ECL code based on the attributes in the <Field> tag (see ECL Language Reference).\\n\\nAmong other things, this capability is used by the SALT.MAC_Default_SPC macro to generate a preliminary SALT specification file using a previously defined record structure.\\n\\nHowever, this capability would be much more useful if more attributes of <Field> were extracted from the record definition.\\n\\nSuppose that you have the following ECL code:\\n\\n\\nNameType := string;\\nAddressType := string;\\nPhoneType := string;\\n\\nPhoneBookLayout := record\\n NameType name;\\n AddressType address;\\n PhoneType phone;\\nend;\\n...\\n#export(PhoneBookStruct,PhoneBookLayout)\\n
\\n\\nThe resulting XML (simplified to show the essentials) would be:\\n\\n\\n<Data>\\n <Field name="name" type="string" />\\n <Field name="address" type="string" />\\n <Field name="phone" type="string" />\\n </Data> \\n
\\n\\nThis XML code does not reflect the fact that I created some typedefs: NameType, AddressType, and PhoneType to describe the the various fields. Now it is true that underneath, these are all strings. However, imagine what you could do if you use the typedef names used in the declaration of the PhoneBookLayout to customize the ECL code that you might generate. For example, if you knew that the field address was declared with the PhoneType, you could generate code to automatically normalize the phone number (remove punctuation, add area code and country code).\\n\\nNow, I'm sure that someone is likely to point out that that I could define NameType, AddressType, and PhoneType as record structures:\\n\\n\\nNameType := record\\n string text;\\nend;\\n\\nAddressType := record\\n string text;\\nend;\\n\\nPhoneType := record\\n string text;\\nend;\\n\\nPhoneBookLayout := record\\n NameType name;\\n AddressType address;\\n PhoneType phone;\\nend;\\n
\\n\\nThe resulting XML (simplified) would indeed reflect the information:\\n\\n\\n<Data>\\n <Field isRecord="1" name="name" type="NameType" />\\n <Field name="text" type="string" />\\n <field isEnd="1" name="name" />\\n <Field isRecord="1" name="address" type="AddressType" />\\n <Field name="text" type="string" />\\n <field isEnd="1" name="address" />\\n <Field isRecord="1" name="phone" type="PhoneType" />\\n <Field name="text" type="string" />\\n <field isEnd="1" name="phone" />\\n</Data>\\n
\\n\\nThe only problem here is that semantics of accessing the fields are completely changed. Regular ECL code accessing the name field would have to refer to name.text rather than name, etc.\\n\\nThat being the case, it would be nice if ECL code understood that referencing a field name, whose underlying structure was a single field, implied an automatic reference to that single field.\\n\\nIt gets even worse if we use a child dataset:\\n\\n\\nPhoneBookLayout := record\\n NameType name;\\n AddressType address;\\n dataset(PhoneType) phones;\\nend;\\n
\\n\\nThe resulting XML (simplified) only partially reflects the information:\\n\\n\\n<Data>\\n <Field isRecord="1" name="name" type="NameType" />\\n <Field name="text" type="string" />\\n <field isEnd="1" name="name" />\\n <Field isRecord="1" name="address" type="AddressType" />\\n <Field name="text" type="string" />\\n <field isEnd="1" name="address" />\\n <Field isDataset="1" name="phones" type="table of <unnamed>" />\\n <Field name="text" type="string" />\\n <field isEnd="1" name="phones" />\\n</Data>\\n
\\n\\nNote that in the case of phones , which is specified as a child dataset, we lose the fact that we were using PhoneType as the underlying type.\\n\\nWhile we're at it, I should point out that the #export doesn't reflect inheritance at all nor does it indicate that any field modifiers are used.\\n\\nSo, what I'd like to propose is a few more attributes for the <Field> tag:\\n\\n\\n
\", \"post_time\": \"2014-02-05 16:16:02\" },\n\t{ \"post_id\": 5182, \"topic_id\": 1192, \"forum_id\": 8, \"post_subject\": \"Improving Macros\", \"username\": \"BenJones\", \"post_text\": \"One of the reasons people shy away from using macros in ECL is that if you misspell a macro argument, obscure errors may result which are very hard to debug. One solution is of course to include in the macro body your own error checking so that the macro can issue an intelligible error message up front. This is not always possible in the current version of ECL because of the lack of certain primitives:\\n\\n[list=1:1f5drnd5]\\n\\nloadxml('<xml/>');\\n
\\nIt is very perplexing if someone tries to invoke a macro and then gets the message that an XML scope is not active.\\n\\nSo, I would recommend that a default XML scope always be available and that instead of loadxml, we should provide the following built-in ECL functions:\\n\\n\\npushxml([xmlstring] | [symbol [,branch]])\\n
\\n\\n\\nStart or load an new XML scope. If no arguments are given, start a new unnamed scope.\\n\\nIf #exportxml is performed, after doing pushxml(), it simply loads that unnamed XML scope with the generated XML. Alternatively, if a pushxml() is performed, an #export and a loadxml may be called to set symbols into that unnamed scope.\\n
\\n\\npopxml()\\n
\\n\\nExistence\\n\\nWe may want a macro to define a name which was passed as an argument. As we all know, if we try to redefine an attribute, we often get an obscure error message depending on how that attribute was previously defined.\\n\\nWe also may want a macro to reference some attribute that needed to have been defined by a previous macro. If we forgot to invoke that other macro first, obscure errors result.\\n\\nIf we use an undefined template variable name (i.e. %A%) in an expression, it is assumed to be defined as 0. That can be useful but it would be better to know whether it was defined already because if we try to #append or #set using that variable, an error will result.\\n\\nThere is currently no way in ECL to test to see if a attribute or template variable is already defined.\\n\\nIt is true that we have the #inmodule function:\\n\\n\\nReturn to the previous XML scope.\\n
\\n\\n#inmodule(module,attribute)\\n
\\nThis function returns false if anything other than a folder name (or repository module name) is passed as the module argument or if anything other than attribute[.ecl] (file) defined in that folder is specified. \\n\\nA more general solution would be to provide the following template function:\\n\\n\\nReturn true if attribute is defined in the module.\\n
\\n\\n#exists([qualifier.]identifier)\\n
\\n\\nCheck For a Valid Identifier or Expression\\n\\nIf you specify the wrong characters in a macro argument that was intended to be used as an identifier or expression in the context in which it is to be expanded, even more obscure errors may result. How about providing the following template function:\\n\\n\\nReturn true if identifier is defined in the current scope. This identifier may be qualified. For example:\\n
\\n\\n#exists(A) returns true if A is defined in the current scope.\\n#exists(A.B) returns true if B is defined in A.\\n#exists(^.A) returns true if A is defined in the outer scope.\\n#exists(*.A) returns true if A is defined in any outer scope.\\n#exists(%A%) returns true if A is defined as a template variable.\\n
\\n\\n#validname(argument)\\n
\\n\\nReturn true if argument is a valid ECL identifier.\\n
\\n\\n#validexpression(argument)\\n
\", \"post_time\": \"2014-02-06 16:42:12\" },\n\t{ \"post_id\": 5186, \"topic_id\": 1193, \"forum_id\": 8, \"post_subject\": \"Re: Search\", \"username\": \"kumar2k14\", \"post_text\": \"Thanks Richard\", \"post_time\": \"2014-02-07 14:50:33\" },\n\t{ \"post_id\": 5185, \"topic_id\": 1193, \"forum_id\": 8, \"post_subject\": \"Re: Search\", \"username\": \"rtaylor\", \"post_text\": \"kumar2k14,\\n\\nIn ECL itself we have an entire pattern matching technology built around the PARSE function and its supporting PATTERN, TOKEN, and RULE definitions, along with the REGEXFIND and REGEXREPLACE functions that both do pattern matching using Perl-standard regular expressions.\\n\\nFor simple patterns, like your "%S" example, we also have several standard library functions available: STD.STR.EndsWith(), STD.STR.StartsWith(), STD.STR.Contains(), STD.STR.Find(), and a number of others that you may find useful -- all these are documented in the Standard Library Reference (press F1 in the IDE and its the third "book" in the help file, along with the Language Reference and Programmer's Guide).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-02-07 14:41:19\" },\n\t{ \"post_id\": 5183, \"topic_id\": 1193, \"forum_id\": 8, \"post_subject\": \"Search\", \"username\": \"kumar2k14\", \"post_text\": \"Please let me know what is equivalent in ECl which is something use in sql like '%S'\\nThanks for yur help\", \"post_time\": \"2014-02-06 22:07:17\" },\n\t{ \"post_id\": 5191, \"topic_id\": 1194, \"forum_id\": 8, \"post_subject\": \"Re: Can we import from the folder containing this one?\", \"username\": \"ghalliday\", \"post_text\": \"This looks like it is related to the jira issue:\\n\\nhttps://track.hpccsystems.com/browse/HPCC-10150\\n\\nI have been experimenting with the changes that would be required to allow\\n\\nimport $.^ as parent;\\n\\nto work.\", \"post_time\": \"2014-02-10 14:05:43\" },\n\t{ \"post_id\": 5190, \"topic_id\": 1194, \"forum_id\": 8, \"post_subject\": \"Re: Can we import from the folder containing this one?\", \"username\": \"BenJones\", \"post_text\": \"Actually, now that I think about it, I don't really want to "import * from M;" as I indicated in the previous post. In order for the folder structure to behave the same as the module structure I originally started with, I would need to be able to see the exported symbols from M but not necessarily bring them into the namespace of M11, M12, M21, or M22. The reason is that I may want to override those symbols in those inner namespaces.\\n\\nNow, why would I want to do that? Because I could conceivably have an action that I want execute at the innermost module level that, if I applied it to an outer module level might apply that action to each module in that outer level.\\n\\nAlternatively, I might have an attribute at an inner level representing the data associated with that inner level, and an attribute of the same name at an outer level that consists of the concatenation of all the data of the same name at the inner levels.\\n\\nSo here is what I'm really after:\\n\\nI simply want folders to behave like module structures, just like they did in the legacy ECL except that I want it to work on multiple folder levels. And I want ECL files in those folders to behave just like attributes did in the legacy ECL, which is to say that each file should contain an exportable attribute of the same name. In addition, I would like to be able to apply the #export and #exportxml template functions to a module or folder name so that I can iterate over the names of folders and files contained in any given folder (or the exportable attributes in a module structure) to generate ECL code, just like we can use it to iterate over the names of fields in a record structure.\", \"post_time\": \"2014-02-10 04:08:29\" },\n\t{ \"post_id\": 5189, \"topic_id\": 1194, \"forum_id\": 8, \"post_subject\": \"Re: Can we import from the folder containing this one?\", \"username\": \"BenJones\", \"post_text\": \"I don't think this is a silly question. Let's say that I define a module structure in a single attribute file:\\n\\n\\nReturn true if argument is a valid ECL expression.\\n
\\n\\nexport M := module\\n export M1 := module\\n export M11 := module\\n // Definitions in M11\\n end;\\n export M12 := module\\n // Definitions in M12\\n end;\\n end;\\n export M2 := module\\n export M21 := module\\n // Definitions in M21\\n end;\\n export M22 := module\\n // Definitions in M22\\n end;\\n end;\\nend;\\n
\\n\\nInside of M22, I can reference M2.M21 without having to say M.M2.M21 and I can reference M1.M11 without having to say M.M1.M11.\\n\\nNow, let's suppose that I decide to make M a folder; M1 and M2 folders inside of M; M11 and M12 .ecl files inside of M1; and M21 and M22 .ecl files inside of M2. Now I have to put imports at the top of each file as follows so that all the code within M11, M12, M21, and M22 still works:\\n\\n\\nM11.ecl:\\n import * from M;\\n import * from $;\\n export M11 := module\\n // Definitions in M11\\n end;\\n\\nM12.ecl:\\n import * from M;\\n import * from $;\\n export M12 := module\\n // Definitions in M12\\n end;\\n\\nM21.ecl:\\n import * from M;\\n import * from $;\\n export M21 := module\\n // Definitions in M21\\n end;\\n\\nM22.ecl:\\n import * from M;\\n import * from $;\\n export M22 := module\\n // Definitions in M22\\n end;\\n
\\n\\nSo far so good, although I'm a little annoyed that I have to tell ECL what the file system can already easily figure out, that M11 and M12 are inside of the same folder M1, that M21 and M22 are inside of the same folder M2, and that M1 and M2 are inside of the same folder M.\\n\\nNow suppose that I need to move folder M to be inside of folder X. Now, in order for the code to still work, I need to go back and change all the "import * from M;" lines to be "import X;import * from X.M;". If I could have said "import * from ^.$;" or something like that, then I wouldn't have to change anything else. Better yet, if we let ECL let the file system discover the nesting relationships, we wouldn't have to even do that. \\n\\nOf course this is a contrived example. However, in the real world, I might have lots of independent modules that need to be brought together into some kind of folder hierarchy so that they can now refer to each other. The mere fact of renaming or moving higher level folders around shouldn't change the relationships that files at inner levels already have with each other.\", \"post_time\": \"2014-02-08 12:56:00\" },\n\t{ \"post_id\": 5188, \"topic_id\": 1194, \"forum_id\": 8, \"post_subject\": \"Re: Can we import from the folder containing this one?\", \"username\": \"rtaylor\", \"post_text\": \"Yes. This does it:\\nIMPORT * FROM ContainingFolder;
\\nIf this were an ECL class and you asked this question, I would answer the same way and then proceed to enlighten you on the fact that the "IMPORT * FROM folder" syntax was designed only as a shortcut to allow our multi-years-worth of legacy code to be easily ported into the OSS world by just adding IMPORT * FROM Default;
because in the pre-OSS world only the EXPORT definitions in the "Default" module were truly global and could be referenced without qualification. \\n\\nTherefore, using that construct in new code just to get around having to qualify definitions is a bad habit to get into (and will likely get you into trouble eventually). I would do what you want this way:IMPORT $;\\nIMPORT ContainingFolder AS CF;\\n\\nEXPORT MyDef := $.AnotherDef + CF.ThaDef;
But, just like my students, I expect you'll probably ignore that advice. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-02-07 21:06:03\" },\n\t{ \"post_id\": 5187, \"topic_id\": 1194, \"forum_id\": 8, \"post_subject\": \"Can we import from the folder containing this one?\", \"username\": \"BenJones\", \"post_text\": \"We can get direct access to all exported attributes (files) in the same module (folder) by saying:\\n\\n
\\n import * from $;\\n
\\nIs there a way to a way to do the same for the containing folder (analogous to ../ in Linux)?\", \"post_time\": \"2014-02-07 20:28:57\" },\n\t{ \"post_id\": 5208, \"topic_id\": 1196, \"forum_id\": 8, \"post_subject\": \"Re: Is there a way to WAIT until the submitted WU completes?\", \"username\": \"balajisampath\", \"post_text\": \"Thanks for looking at this issue Richard ,\\n\\nWAIT command doesn't allow variables. If more than one instance of code needs to be executed then I have to change the code for every new instance and this is why I am looking for another option. Also this command is not supported inside FUNCTION. \\n\\nThanks,\\nBalaji\", \"post_time\": \"2014-02-13 18:35:53\" },\n\t{ \"post_id\": 5198, \"topic_id\": 1196, \"forum_id\": 8, \"post_subject\": \"Re: Is there a way to WAIT until the submitted WU completes?\", \"username\": \"rtaylor\", \"post_text\": \"Balaji,\\n\\nThis is exactly what WAIT and NOTIFY were designed to do, so what is the problem/issue with them that you want to resolve? \\n\\nRichard\", \"post_time\": \"2014-02-12 13:40:38\" },\n\t{ \"post_id\": 5195, \"topic_id\": 1196, \"forum_id\": 8, \"post_subject\": \"Is there a way to WAIT until the submitted WU completes?\", \"username\": \"balajisampath\", \"post_text\": \"I am generating ECL code and using SOAP calls to create and submit workunit. My parent workunits completes once the SOAP call succeeds. I want the parent program to wait until the submitted workunit completes.\\n\\nCurrently I am achieving this using WAIT and NOTIFY commands. I want to avoid using WAIT and NOTIFY\\n\\nIf there is any alternate way please help me with pointers/suggesions\\n\\nThanks,\\nBalaji\", \"post_time\": \"2014-02-11 22:16:30\" },\n\t{ \"post_id\": 5487, \"topic_id\": 1203, \"forum_id\": 8, \"post_subject\": \"Re: Setting the seed for RANDOM()\", \"username\": \"tlhumphrey2\", \"post_text\": \"The C++ function I posted back in February doesn't work. The following works but only on hthor.\\n\\nDOrtlSeedRandom(unsigned i) := BEGINC++\\n rtlSeedRandom(i);\\nENDC++;\\n\\nThen, before you use/call the code that calls RANDOM(), do this:\\n\\nDOrtlSeedRandom(<yourseed>);\", \"post_time\": \"2014-04-08 13:45:00\" },\n\t{ \"post_id\": 5253, \"topic_id\": 1203, \"forum_id\": 8, \"post_subject\": \"Re: Setting the seed for RANDOM()\", \"username\": \"tlhumphrey2\", \"post_text\": \"Richard Chapman just gave me a work-around that I can use. He suggested I make an embedded c++ that simply sets the seed of srand and then execute it at the beginning of my run. He believes this should work when running on an hthor (but something more complicated would have to be done on thors).\\n\\nBut, is will work for me. Although I'm currently doing much of my evaluation on thors, the part of the evaluation that needs to have results stay constant from run to run could be done on an hthor.\\n\\nBy the way, the embedded c++ he suggested was:\\n\\nDoSrand(integer i) := BEGINC++\\n srand(i);\\n return I;\\nENDC++;\", \"post_time\": \"2014-02-20 15:00:24\" },\n\t{ \"post_id\": 5252, \"topic_id\": 1203, \"forum_id\": 8, \"post_subject\": \"Re: Setting the seed for RANDOM()\", \"username\": \"rtaylor\", \"post_text\": \"Not that I know of. Sorry, Tim. \", \"post_time\": \"2014-02-20 14:51:44\" },\n\t{ \"post_id\": 5218, \"topic_id\": 1203, \"forum_id\": 8, \"post_subject\": \"Setting the seed for RANDOM()\", \"username\": \"tlhumphrey2\", \"post_text\": \"Is there away to do this? \\n\\nI'm verifying some of the machine learning library functions and several of them use RANDOM() to generate their results. So, from run to run their results differ even when their input stays the same.\\n\\nBut, to verify their results, I need their results to stay the same from run to run. So, if I could set RANDOM's seed, I could get the same result every run.\", \"post_time\": \"2014-02-17 19:12:41\" },\n\t{ \"post_id\": 5224, \"topic_id\": 1204, \"forum_id\": 8, \"post_subject\": \"Re: Difference between MAP and CASE!\", \"username\": \"pius_francis\", \"post_text\": \"Thanks a lot Bob.\", \"post_time\": \"2014-02-18 13:59:50\" },\n\t{\n\t\t\"post_id\": 5222, \"topic_id\": 1204, \"forum_id\": 8, \"post_subject\": \"Re: Difference between MAP and CASE!\", \"username\": \"bforeman\", \"post_text\": \"Hi Pius,\\n\\nIn a nutshell, MAP allows you you test multiple conditions where CASE allows you to test multiple results of a single expression. At times what you can do with MAP can also be done with CASE, but MAP extends your options a little bit farther.\\n\\nHope this helps!\\n\\nBob\", \"post_time\": \"2014-02-18 13:19:07\"\n\t},\n\t{ \"post_id\": 5219, \"topic_id\": 1204, \"forum_id\": 8, \"post_subject\": \"Difference between MAP and CASE!\", \"username\": \"pius_francis\", \"post_text\": \"Hi All,\\n Can someone help me out in differentiating MAP and CASE conditional Statement. I infer it to be same. Is there any difference in its functionality\\nThanks,\\nPius\", \"post_time\": \"2014-02-18 06:30:04\" },\n\t{ \"post_id\": 5270, \"topic_id\": 1205, \"forum_id\": 8, \"post_subject\": \"Re: Map Statement\", \"username\": \"David Dasher\", \"post_text\": \"Will do Richard, thank you.\\n\\nDavid\", \"post_time\": \"2014-02-21 13:26:35\" },\n\t{ \"post_id\": 5268, \"topic_id\": 1205, \"forum_id\": 8, \"post_subject\": \"Re: Map Statement\", \"username\": \"rtaylor\", \"post_text\": \"David,\\n\\nGo to http://learn.lexisnexis.com/hpcc and register (if you haven't already) or login (if you're already registered). That will take you to the HPCC Systems Learning Resource Center page. The Advanced ECL, Advanced Thor, Intro to Roxie, and Advanced Roxie course links will take you to the page for that course where it says in the first paragraph:
You will need a promo code. If you don't have a promo code yet and need to purchase this course, Click Here. You will be taken to Eventbrite to register and pay for the course and then receive the promo code to enter on the following screen.
Let me know how it goes,\\n\\nRichard\", \"post_time\": \"2014-02-21 12:11:09\" },\n\t{ \"post_id\": 5266, \"topic_id\": 1205, \"forum_id\": 8, \"post_subject\": \"Re: Map Statement\", \"username\": \"David Dasher\", \"post_text\": \"Thanks Richard\\n\\nDo you have a link for me to buy the Online courses? I don't qualify for a discount.\\n\\nDavid\", \"post_time\": \"2014-02-21 09:35:57\" },\n\t{ \"post_id\": 5265, \"topic_id\": 1205, \"forum_id\": 8, \"post_subject\": \"Re: Map Statement\", \"username\": \"rtaylor\", \"post_text\": \"David,\\n\\nYes. All the courses we currently teach are now available online (self-paced and pre-recorded) and through Webex (instructor-led and live), in addition to our on-site classes. All three delivery forms cover the same material.\\n\\nThe first two online Intro courses are free. The rest of the online courses are $495 each unless you work for LexisNexis, Reed Elsevier, or one of our HPCC Systems' partner companies (in which case, you can get a discount code by emailing training@hpccsystems.com).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-02-21 09:32:52\" },\n\t{ \"post_id\": 5264, \"topic_id\": 1205, \"forum_id\": 8, \"post_subject\": \"Re: Map Statement\", \"username\": \"David Dasher\", \"post_text\": \"Thanks, is that the one that is available online? I've done the basic courses, however I noticed there some that you can pay for.\\n\\nDavid\", \"post_time\": \"2014-02-21 09:01:24\" },\n\t{ \"post_id\": 5263, \"topic_id\": 1205, \"forum_id\": 8, \"post_subject\": \"Re: Map Statement\", \"username\": \"rtaylor\", \"post_text\": \"David,\\n\\nGreat. It was an extrapolation of a bit of lab exercise code from our Intro to Roxie class.\\n\\nRichard\", \"post_time\": \"2014-02-21 08:58:04\" },\n\t{ \"post_id\": 5259, \"topic_id\": 1205, \"forum_id\": 8, \"post_subject\": \"Re: Map Statement\", \"username\": \"David Dasher\", \"post_text\": \"Thanks Richard, that looks so much better than mine \\n\\nIt works a treat.\\n\\nThanks \\n\\nDavid\", \"post_time\": \"2014-02-20 21:17:16\" },\n\t{ \"post_id\": 5255, \"topic_id\": 1205, \"forum_id\": 8, \"post_subject\": \"Re: Map Statement\", \"username\": \"rtaylor\", \"post_text\": \"David,\\n\\nTry something like this:
F1 := InJobNo = '' OR SitexData.JobNo = InJobNo;\\nF2 := InService = '' OR SitexData.ServiceName = InService;\\nF3 := InSearchEngine = '' OR \\n STD.Str.Contains(SitexData.SearchEngine,InSearchEngine,TRUE);\\nF4 := InJobType = '' OR SitexData.JobType = InJobType;\\nF5 := InFromDate = '' OR \\n (SitexData.Orderdate >= uFromDate AND SitexData.OrderDate <= uToDate);\\nF6 := InJobNoIn = '' OR SitexData.JobNo IN Std.Str.SplitWords(InJobNoIn, ',');\\nF7 := InStatus = '' OR \\n CASE(STD.Str.ToUpperCase(InStatus),\\n 'INCOMPLETE' => Sitexdata.Status IN ['BOOKED','PLANNED','SUSPENDED',\\n 'EN ROUTE','BOOKED','ARRIVED'],\\n 'PLANNED' => Sitexdata.Status IN ['PLANNED','EN ROUTE',\\n 'BOOKED','ARRIVED'],\\n 'COMPLETE' => Sitexdata.Status IN ['COMPLETE','DEFERRED',\\n 'NON DEFERRED'],\\n 'CANCELLED' => Sitexdata.Status <> 'CANCELLED',\\n Sitexdata.Status = STD.Str.ToUpperCase(InStatus)); \\n\\t\\t\\t\\t\\t\\t\\t\\nF8 := (InJobNo = '' AND \\n InService = '' AND \\n InSearchEngine = '' AND \\n InJobType = '' AND \\n InFromDate = '' AND \\n InJobNoIn = '' AND \\n InStatus = '') OR\\n SitexData.orderdate >= Date.ToDaysSince1900(Date.Today()-30));\\n\\t\\t\\t\\t \\nFilter := F1 AND F2 AND F3 AND F4 AND F5 AND F6 AND F7 AND F8;\\t\\t\\t \\n \\nSiteConditionData := SitexData(Filter);
That's the approach I would use.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-02-20 15:43:43\" },\n\t{ \"post_id\": 5223, \"topic_id\": 1205, \"forum_id\": 8, \"post_subject\": \"Re: Map Statement\", \"username\": \"David Dasher\", \"post_text\": \"Many thanks for your reply Bob. I'll take a look and come back to you.\\n\\nDavid\", \"post_time\": \"2014-02-18 13:57:17\" },\n\t{ \"post_id\": 5221, \"topic_id\": 1205, \"forum_id\": 8, \"post_subject\": \"Re: Map Statement\", \"username\": \"bforeman\", \"post_text\": \"Hi David,\\n\\nI really think that MAP is the best way to go in this situation. You could try to use a MACRO or FUNCTIONMACRO to generalize the inputs, but then you have to account for different value types and how many search parameters are valid. In my opinion, that method would almost have as much work and complexity as just having MAPs and nested MAPs to achieve the results.\\n\\nWe actually do something exactly like this in our ROXIE classes, and make the FUNCTION parameters more manageable by using an INTERFACE with the STORED function.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-02-18 13:15:13\" },\n\t{ \"post_id\": 5220, \"topic_id\": 1205, \"forum_id\": 8, \"post_subject\": \"Map Statement\", \"username\": \"David Dasher\", \"post_text\": \"Hi all\\n\\nI'm using a MAP statement to check which parameters have been passed into a query and then filtering my dataset based on which parameter has been used. My client now wants to be able to use combinations i.e. parameter 1 and 4. Without doing every possible combination is there a way I can build a condition based on which parameters have values?\\n\\nI hope that makes sense. I have placed some of my code below.\\n\\nKind regards\\n\\nDavid \\n\\nSiteConditionData := Map(\\n\\tInJobNo <> '' => SitexData(JobNo = InJobNo),\\n\\tInService <> '' => SitexData(ServiceName = InService),\\n\\tInSearchEngine <> '' => SitexData(STD.Str.Contains(SearchEngine,InSearchEngine,true)= true),\\n\\tInJobType <> '' => SitexData(JobType = InJobType),\\n\\tInFromDate <> '' => SitexData(Orderdate >= uFromDate and OrderDate <= uToDate),\\n\\tInJobNoIn <> '' => SitexData(JobNo in Std.Str.SplitWords(InJobNoIn, ',')),\\n\\tInStatus <> '' => \\n\\tMap(\\n\\t\\tSTD.Str.ToUpperCase(InStatus) = 'INCOMPLETE' => Sitexdata(Status in['BOOKED','PLANNED','SUSPENDED','EN ROUTE','BOOKED','ARRIVED']),\\n\\t\\tSTD.Str.ToUpperCase(InStatus) = 'PLANNED' => Sitexdata(Status in['PLANNED','EN ROUTE','BOOKED','ARRIVED']),\\n\\t\\tSTD.Str.ToUpperCase(InStatus) = 'COMPLETE' => Sitexdata(Status in['COMPLETE','DEFERRED','NON DEFERRED']),\\n\\t\\tSTD.Str.ToUpperCase(InStatus) = 'CANCELLED' => Sitexdata(Status <> 'CANCELLED'),\\n\\t\\tSitexdata(Status = STD.Str.ToUpperCase(InStatus))\\n\\t ), \\n\\tSitexData(orderdate >= Date.ToDaysSince1900(Date.Today()-30))\\n\\t);\\n\\t
\", \"post_time\": \"2014-02-18 09:27:09\" },\n\t{ \"post_id\": 5273, \"topic_id\": 1206, \"forum_id\": 8, \"post_subject\": \"Re: Compilation error when not connected to MySQL repository\", \"username\": \"balajisampath\", \"post_text\": \"Thank You Richard\", \"post_time\": \"2014-02-21 15:18:13\" },\n\t{ \"post_id\": 5256, \"topic_id\": 1206, \"forum_id\": 8, \"post_subject\": \"Re: Compilation error when not connected to MySQL repository\", \"username\": \"rtaylor\", \"post_text\": \"Balaji,\\n\\nThe difference is between Open Source and the legacy systems. In old legacy ECL code, side-effect actions were allowed. Now, in the Open Source world, side-effect actions are not allowed and the WHEN function (http://hpccsystems.com/download/docs/ecl-language-reference/html/WHEN_Function.html) must be used to accomplish the same thing.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-02-20 15:49:54\" },\n\t{ \"post_id\": 5225, \"topic_id\": 1206, \"forum_id\": 8, \"post_subject\": \"Compilation error when not connected to MySQL repository\", \"username\": \"balajisampath\", \"post_text\": \"My project is migrating from Mysql repository to Github. Some of the existing code fails to compile when I access the code via local folder.\\n\\nThe below function shows compilation error when I try to use my local directory\\n\\nf1:= function\\noutput(10);\\nexport x:= 10;\\nreturn x;\\nend;\\nf1;
\\n\\nError: WHEN must be used to associate an action with a definition (3, 1), 2325, \\n\\nThe same code compiles when I am connected to MySQL repository.\\n\\nPlease help me to identify the issue.\\n\\nThanks,\\nBalaji\", \"post_time\": \"2014-02-18 16:21:34\" },\n\t{ \"post_id\": 5235, \"topic_id\": 1210, \"forum_id\": 8, \"post_subject\": \"Re: Problem using REGEXFIND\", \"username\": \"David Dasher\", \"post_text\": \"Thank you so much, it now works perfectly.\\n\\nDavid\", \"post_time\": \"2014-02-19 16:03:49\" },\n\t{ \"post_id\": 5234, \"topic_id\": 1210, \"forum_id\": 8, \"post_subject\": \"Re: Problem using REGEXFIND\", \"username\": \"tlhumphrey2\", \"post_text\": \"Change: string100 InSearchEngine := 'DAVID' : STORED('InSearchEngine');\\n\\nto: string InSearchEngine := 'DAVID' : STORED('InSearchEngine');\\n\\nAnd it should work. Why?\\n\\nstring100 pads with spaces so instead of 'DAVID' you have 'DAVID ', i.e. you have 100 - length('DAVID') spaces to the right of DAVID.\", \"post_time\": \"2014-02-19 15:57:59\" },\n\t{ \"post_id\": 5230, \"topic_id\": 1210, \"forum_id\": 8, \"post_subject\": \"Problem using REGEXFIND\", \"username\": \"David Dasher\", \"post_text\": \"Hello\\n\\nI'm trying to use REGEXFIND to filter some results in a Dataset as STD.Str.Contains does not do what I need on this occasion and I'm having trouble getting results to come back when using a parameter.\\n\\nIf I Test my filter using\\n\\nfiltered := SiteConditionData(REGEXFIND('DAVID', SearchEngine));\\n\\nEverything is fine.\\n\\nIf I set a parameter \\n\\nstring100 InSearchEngine := 'DAVID' : STORED('InSearchEngine');\\n\\nfiltered := SiteConditionData(REGEXFIND(InSearchEngine, SearchEngine));\\n\\nNo data is returning. I'm sure it's me doing something stupid, I just can't see it.\\n\\nKind regards\\n\\nDavid\", \"post_time\": \"2014-02-19 13:59:54\" },\n\t{ \"post_id\": 5315, \"topic_id\": 1218, \"forum_id\": 8, \"post_subject\": \"Re: Regarding copy files from different HPCC server\", \"username\": \"rtaylor\", \"post_text\": \"We do try \", \"post_time\": \"2014-03-03 18:53:44\" },\n\t{ \"post_id\": 5311, \"topic_id\": 1218, \"forum_id\": 8, \"post_subject\": \"Re: Regarding copy files from different HPCC server\", \"username\": \"Leofei\", \"post_text\": \"Thanks a lot, Richard. You are always helpful!\\n\\n-Fan\", \"post_time\": \"2014-03-03 16:06:47\" },\n\t{ \"post_id\": 5289, \"topic_id\": 1218, \"forum_id\": 8, \"post_subject\": \"Re: Regarding copy files from different HPCC server\", \"username\": \"rtaylor\", \"post_text\": \"Fan,\\n\\nThe STD.File.RemotePull() function (documented in the Standard Library Reference included in the IDE's online Help file) does the Remote Copy in code.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-02-26 10:30:34\" },\n\t{ \"post_id\": 5285, \"topic_id\": 1218, \"forum_id\": 8, \"post_subject\": \"Re: Regarding copy files from different HPCC server\", \"username\": \"Leofei\", \"post_text\": \"Forgot to mention, I also tried to wrap SEQUENTIAL function by NOTHOR function, it still report the same message. Thanks!\\n\\n-Fan\", \"post_time\": \"2014-02-25 16:15:30\" },\n\t{ \"post_id\": 5284, \"topic_id\": 1218, \"forum_id\": 8, \"post_subject\": \"Re: Regarding copy files from different HPCC server\", \"username\": \"Leofei\", \"post_text\": \"Richard,\\n\\nThanks for the reply. Is it possible to finish it by code? Because there are like 15 files need to be copied, and this procedure need to repeat a couple of times. It will really take time to do this one by one manually. The naming conflict is because we have a same named SF in DEV, so the one I copied from PROD should be renamed by adding my initials to it.\\n\\nCurrently, the sample code reports error message like this:\\n\\nError: System error: -1: Graph[1], SLAVE 10.*.*.*:*: No access to Dali - this normally means a plugin call is being called from a thorslave\\n\\nThanks for any suggestion!\\n-Fan\", \"post_time\": \"2014-02-25 16:11:50\" },\n\t{ \"post_id\": 5283, \"topic_id\": 1218, \"forum_id\": 8, \"post_subject\": \"Re: Regarding copy files from different HPCC server\", \"username\": \"rtaylor\", \"post_text\": \"Fan,\\n\\nThen you should be able to just copy the PROD subfile to DEV by using the Remote Copy feature from your DEV ECL Watch (the next to last selection in the DFU Files section of the menu on the left). Then you can just create the Superfile on DEV and populate it with the DEV version of the subfile. Since they are in separate environments (Dali's), the names of the subfiles and superfiles on DEV can be the same as those you use on PROD.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-02-25 15:11:44\" },\n\t{ \"post_id\": 5281, \"topic_id\": 1218, \"forum_id\": 8, \"post_subject\": \"Re: Regarding copy files from different HPCC server\", \"username\": \"Leofei\", \"post_text\": \"Richard,\\n\\nThey are two different Dali's. Here is the definition of ut.foreign_prod in the code.\\nforeign_prod := '~foreign::10.*.*.*::';\\n\\nThanks,\\n-Fan\", \"post_time\": \"2014-02-25 14:25:12\" },\n\t{ \"post_id\": 5280, \"topic_id\": 1218, \"forum_id\": 8, \"post_subject\": \"Re: Regarding copy files from different HPCC server\", \"username\": \"rtaylor\", \"post_text\": \"Fan,\\n\\nAre your DEV and PROD separate environments (two different Dali's) or just separate clusters in a single environment (one Dali)?\\n\\nRichard\", \"post_time\": \"2014-02-25 10:40:59\" },\n\t{ \"post_id\": 5278, \"topic_id\": 1218, \"forum_id\": 8, \"post_subject\": \"Regarding copy files from different HPCC server\", \"username\": \"Leofei\", \"post_text\": \"Hi, I have a question to copy files from a different server:\\n\\nI know how to copy a single file from a different server. But how can I copy a SuperFile? Here is the issue details: \\n\\nThe SF is on PROD, with an known logical name. While it has only one subFile, which I need to copy to DEV, keep the sub file name as it is(because it has date info). The SF file I need to assign a new name(Because of the name conflict in DEV). I'm wondering how can I do this?\\n\\nHere is some code I have:\\n
File:= 'thor::known::logic::name';\\n\\nfilecopy (STRING SFpath ) := FUNCTION\\n\\tr_sf := ut.foreign_prod + SFpath ;\\n\\tr_file := fileservices.SuperFileContents(r_sf)[1].name : independent;\\n\\td_file := '~' + fileservices.SuperFileContents(r_file)[1].name[23..];\\n\\td_sf := '~' + SFpath[1..22] + 'new::' + SFpath[23..] :independent;\\n\\tres := sequential(\\n\\t\\t\\t\\t\\t\\t\\tfileservices.copy('~'+r_file, 'thor11', d_file,,,,,true,true),\\n\\t\\t\\t\\t\\t\\t\\tfileservices.startsuperfiletransaction(),\\n\\t\\t\\t\\t\\t\\t\\tif(fileservices.SuperFileExists(d_sf),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tfileservices.clearsuperfile(d_sf),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tfileservices.CreateSuperFile(d_sf)),\\n\\t\\t\\t\\t\\t\\t\\tfileservices.addsuperfile(d_sf,'~'+d_file),\\n\\t\\t\\t\\t\\t\\t\\tfileservices.finishsuperfiletransaction()\\n\\t\\t\\t\\t\\t\\t\\t);\\n\\treturn res;\\n\\nEND;\\n\\nfilecopy(File);\\n
\\n\\nThanks a lot!\\n-Fan\", \"post_time\": \"2014-02-24 19:24:33\" },\n\t{ \"post_id\": 5303, \"topic_id\": 1220, \"forum_id\": 8, \"post_subject\": \"Re: Error on Roxie\", \"username\": \"bforeman\", \"post_text\": \"Hi David,\\n\\nSorry for the delay in reply, this is an error that is rarely seen or reported, so I needed to dig and ask other members of the HPCC team.\\n\\nThe error indicates that a roxie slave is failing to contact the roxie server node to get dynamic file information. \\n\\nIt has been seen in testing with too many parallel queries, leaving no threads available for the callback to the server, but I think it only applies where there are child queries of child queries.\\n\\nThe important thing is that if this can be consistently reproduced, you should probably post an issue to the Community Issue Tracker, with enough information and code samples for our team to be able to reproduce this. The Community Issue Tracker is located at this link:\\n\\nhttps://track.hpccsystems.com/secure/Dashboard.jspa \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-02-28 13:44:05\" },\n\t{ \"post_id\": 5291, \"topic_id\": 1220, \"forum_id\": 8, \"post_subject\": \"Error on Roxie\", \"username\": \"David Dasher\", \"post_text\": \"Hello\\n\\nWe are receiving an intermittent error when running a job on Roxie. I have pasted the message below. Can anybody help?\\n\\n<Source>Roxie</Source><Code>1454</Code><Message>Failed to get response from server for dynamic file callback</Message> \\n\\nKind regards\\n\\nDavid\", \"post_time\": \"2014-02-26 12:44:44\" },\n\t{ \"post_id\": 5317, \"topic_id\": 1223, \"forum_id\": 8, \"post_subject\": \"Re: problem using IMPORT\", \"username\": \"tlhumphrey2\", \"post_text\": \"Great! Glad I could help.\", \"post_time\": \"2014-03-03 21:05:19\" },\n\t{ \"post_id\": 5316, \"topic_id\": 1223, \"forum_id\": 8, \"post_subject\": \"Re: problem using IMPORT\", \"username\": \"kereno\", \"post_text\": \"Hi, It helped! Adding the -I flag + path to the import libraries solved the issue. Thanks!\", \"post_time\": \"2014-03-03 20:41:02\" },\n\t{ \"post_id\": 5313, \"topic_id\": 1223, \"forum_id\": 8, \"post_subject\": \"Re: problem using IMPORT\", \"username\": \"tlhumphrey2\", \"post_text\": \"We may have to get someone else evolved in this discussion because I don't have experience with eclplus. But, maybe the following will help you.\\n\\nI’m fairly sure that you are missing a path that the compiler, eclcc.exe, needs, which tells the compiler where to search for ecl imports. So, besides the command line arguments that you currently have for eclcc.exe, you also need “-I <path>” (without the quotes), where <path> is the full path to where Test can be found (don’t include Test at the end of that path).\\n\\nI don’t know how you provide command line arguments to eclcc.exe. With ECLIDE, there is a tab for the compiler in perferences where these arguments are placed.\\n\\nHope this helps.\", \"post_time\": \"2014-03-03 18:10:02\" },\n\t{ \"post_id\": 5312, \"topic_id\": 1223, \"forum_id\": 8, \"post_subject\": \"Re: problem using IMPORT\", \"username\": \"kereno\", \"post_text\": \"Thank you Timothy. I am actually launching my queries with eclplus, rather than using the ECLIDE as I am running on Ubuntu and there isn't a version of ECL IDE for Ubuntu. \\nSo it looks like this: \\neclplus @myquery.txt \\n\\nI am wondering what would be the equivalent configuration of the attachment you sent me in eclplus?\\n\\nThank you,\\nKeren\", \"post_time\": \"2014-03-03 17:31:43\" },\n\t{ \"post_id\": 5308, \"topic_id\": 1223, \"forum_id\": 8, \"post_subject\": \"Re: problem using IMPORT\", \"username\": \"tlhumphrey2\", \"post_text\": \"I was hoping you could attach something like my attachment, which is a screen shot of what my repository looks like in IDE. Why? \\n\\nIt shows me two things: 1) that the folder, Test, can be seen by the ecl compiler (if I saw it in the repository then the ecl compiler can see it) and 2) for the IMPORT to recognize it as a module (i.e. you won’t get the error message you’re seeing), then it would have to appear in the repository as a brown folder.\", \"post_time\": \"2014-02-28 19:51:19\" },\n\t{ \"post_id\": 5307, \"topic_id\": 1223, \"forum_id\": 8, \"post_subject\": \"Re: problem using IMPORT\", \"username\": \"kereno\", \"post_text\": \"Sure,\\n\\n ~/forum_eclplus 1$ ls -al\\ntotal 20\\ndrwxrwxr-x 3 kereno kereno 4096 Feb 28 11:13 .\\ndrwxr-xr-x 81 kereno kereno 4096 Feb 28 11:13 ..\\n-rw------- 1 kereno kereno 43 Jan 31 14:25 eclplus.ini\\n-rw-rw-r-- 1 kereno kereno 31 Feb 27 13:38 forum_question.txt\\ndrwxrwxr-x 2 kereno kereno 4096 Feb 27 13:37 Test\\n\\nNote that this is a simpler reproduction of my issue, and the above file/folder were created for this purpose (hence the names )\", \"post_time\": \"2014-02-28 19:16:31\" },\n\t{ \"post_id\": 5302, \"topic_id\": 1223, \"forum_id\": 8, \"post_subject\": \"Re: problem using IMPORT\", \"username\": \"tlhumphrey2\", \"post_text\": \"Can you take a snapshot of what you respository looks like with the Test module visable, please?\", \"post_time\": \"2014-02-28 13:30:27\" },\n\t{ \"post_id\": 5300, \"topic_id\": 1223, \"forum_id\": 8, \"post_subject\": \"problem using IMPORT\", \"username\": \"kereno\", \"post_text\": \"Hello,\\n\\nI am getting an error when using an "IMPORT Test" command. I exported my ECL definitions under a folder named "Test" (located at the same directory where my script is) . The error message using eclplus:\\n<Error><source>eclcc</source><line>2</line><code>2081</code><message> Import names unknown module "Test"</message></Error>\\n\\nI also tried IMPORT * FROM Test and got the same error.\\nMy script merely contains the import command:\\nIMPORT Test;\\n\\nAny clue what am I doing wrong?
\\n\\nThanks,\\nKeren\", \"post_time\": \"2014-02-27 21:41:49\" },\n\t{ \"post_id\": 5348, \"topic_id\": 1225, \"forum_id\": 8, \"post_subject\": \"Re: Is there a way to get ESP IP using ECL?\", \"username\": \"balajisampath\", \"post_text\": \"Thanks Richard and Dan,\\n\\nI am not very specific to get/use the IP or Hostname. All I want is to make SOAP call to currently connected server. Since the SOAP call require IP/Hostname address, I am trying to find it automatically.\\n\\nI am building component which will generate ECL code by accessing WsDfu and WsWorkunits SOAP calls. I prefer this component to work in any environment without configurations.\\n\\nSince there is an option to get Dali IP, I thought something could be available for ESP also.\\n\\nMy last option is to add configuration to my component(i.e. to give IP/Hostname as parameters)\\n\\nThanks,\\nBalaji\", \"post_time\": \"2014-03-05 14:21:53\" },\n\t{ \"post_id\": 5345, \"topic_id\": 1225, \"forum_id\": 8, \"post_subject\": \"Re: Is there a way to get ESP IP using ECL?\", \"username\": \"richardkchapman\", \"post_text\": \"There is no requirement that an environment has a single ESP - there may be multiple. So asking for the IP of "the" esp is not really meaningful.\", \"post_time\": \"2014-03-05 13:10:37\" },\n\t{ \"post_id\": 5344, \"topic_id\": 1225, \"forum_id\": 8, \"post_subject\": \"Re: Is there a way to get ESP IP using ECL?\", \"username\": \"DSC\", \"post_text\": \"What we've done in the past is create an application-specific hostname, then register that name with the IP address you need on a per-cluster basis. This can be done through either /etc/hosts entries or via zoned DNS entries. If your code references only the hostname, it becomes portable between clusters. Of course, you do have to maintain the DNS entries, but that's typically a one-time or at least low-maintenance activity.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2014-03-05 13:07:13\" },\n\t{ \"post_id\": 5341, \"topic_id\": 1225, \"forum_id\": 8, \"post_subject\": \"Re: Is there a way to get ESP IP using ECL?\", \"username\": \"balajisampath\", \"post_text\": \"No, I don't want to hard code either IP/hostname\", \"post_time\": \"2014-03-05 01:23:35\" },\n\t{ \"post_id\": 5335, \"topic_id\": 1225, \"forum_id\": 8, \"post_subject\": \"Re: Is there a way to get ESP IP using ECL?\", \"username\": \"tlhumphrey2\", \"post_text\": \"Will Std.system.util.resolvehostname help you?\", \"post_time\": \"2014-03-04 18:58:52\" },\n\t{ \"post_id\": 5334, \"topic_id\": 1225, \"forum_id\": 8, \"post_subject\": \"Re: Is there a way to get ESP IP using ECL?\", \"username\": \"balajisampath\", \"post_text\": \"Thanks for your reply\\n\\nI want my code to work in any environment without any code changes. \\n\\nThanks,\\nBalaji\", \"post_time\": \"2014-03-04 18:52:56\" },\n\t{ \"post_id\": 5325, \"topic_id\": 1225, \"forum_id\": 8, \"post_subject\": \"Re: Is there a way to get ESP IP using ECL?\", \"username\": \"tlhumphrey2\", \"post_text\": \"I can get the IP address of one of the ESPs I work on using the following:\\nStd.system.util.resolvehostname('dataland_esp.br.seisint.com');\\n\\nSo, if you know the name then you can get the IP address using the above. But, if you know the name, you can enter that name into hour SOAPCALL.\", \"post_time\": \"2014-03-04 15:47:30\" },\n\t{ \"post_id\": 5324, \"topic_id\": 1225, \"forum_id\": 8, \"post_subject\": \"Re: Is there a way to get ESP IP using ECL?\", \"username\": \"tlhumphrey2\", \"post_text\": \"There are many functions in the Std.system library that get this type of information, for example Std.system.Job.daliserver(), gets the IP address of the dali. But, I don't see any function that gets the IP of the ESP.\\n\\nMaybe if you tells us what you need the ESP IP address for, we can find something else that will help you.\", \"post_time\": \"2014-03-04 15:31:37\" },\n\t{ \"post_id\": 5318, \"topic_id\": 1225, \"forum_id\": 8, \"post_subject\": \"Is there a way to get ESP IP using ECL?\", \"username\": \"balajisampath\", \"post_text\": \"Is there a way to get the ESP IP address from ECL code?\\nI need to identify the currently connected ESP IP for making SOAP calls\\n\\nThanks,\\nBalaji\", \"post_time\": \"2014-03-04 02:46:26\" },\n\t{ \"post_id\": 5350, \"topic_id\": 1226, \"forum_id\": 8, \"post_subject\": \"Re: Index keys too large to fit in RAM?\", \"username\": \"jsmith\", \"post_text\": \"Yes I agree.\\n\\nA Keyed Join will be used automatically if the RHS is an index on fields that are in your join expression. A Keyed Join, is fundamentally a disk based and random access event and since it's a b-tree, each lookup may involve several key page loads, and since the key is distributed, unless you've co-distributed your LHS, it will be pulling those key pages over the network from other nodes.\\n\\nA memory cache helps mitigate these pain points to some extent, but if your LHS is large and your index is much bigger than the cache, then the cache will be ineffective and the per lookup avg. will become expensive.\\nThis cache is separate from the 'globalMemorySize' config. option (it should probably be integrated to use the same memory pool at some point). There is limited control over it's size at the moment.\\n\\nYou're probably better off using a standard join with the size you involved.\", \"post_time\": \"2014-03-05 17:42:05\" },\n\t{ \"post_id\": 5343, \"topic_id\": 1226, \"forum_id\": 8, \"post_subject\": \"Re: Index keys too large to fit in RAM?\", \"username\": \"richardkchapman\", \"post_text\": \"If you are doing a join to an index in thor, with a large left-hand-side, it's often advantageous to force it to use a standard join rather than a keyed-join. The exact crossover point where this becomes worthwhile will depend on the size of your RHS.\\n\\nYou can force a standard join rather than a keyed one by putting a PULL() around the RHS index.\", \"post_time\": \"2014-03-05 10:29:37\" },\n\t{ \"post_id\": 5337, \"topic_id\": 1226, \"forum_id\": 8, \"post_subject\": \"Re: Index keys too large to fit in RAM?\", \"username\": \"rtaylor\", \"post_text\": \"I would try using config manager to change that value from 40Gb to 98Gb and see what happens.
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-03-04 19:03:17\" },\n\t{ \"post_id\": 5333, \"topic_id\": 1226, \"forum_id\": 8, \"post_subject\": \"Re: Index keys too large to fit in RAM?\", \"username\": \"ubarsmx\", \"post_text\": \"Thanks Richard. Would the memory limit be set in the globalMemorySize attribute of the ThorCluster tag in environment.xml? If so, and using your numbers, that would explain it as it is set to about 40GB per node (single instance of Thor). I can see if the admin can up it.\", \"post_time\": \"2014-03-04 18:51:37\" },\n\t{ \"post_id\": 5331, \"topic_id\": 1226, \"forum_id\": 8, \"post_subject\": \"Re: Index keys too large to fit in RAM?\", \"username\": \"rtaylor\", \"post_text\": \"So, given 14 nodes, each index leaf node part should be about 457Mb for 400 million, 4.57Gb for 4 billion and 45.7Gb for 40 billion. And, given 98Gb RAM on each node ...\\n\\nAre you configured for multiple instances of Thor on this hardware? If so, then that could explain the issue. Otherwise, I'd suggest reporting it in JIRA so the developers can have a look at it.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-03-04 18:36:12\" },\n\t{ \"post_id\": 5330, \"topic_id\": 1226, \"forum_id\": 8, \"post_subject\": \"Re: Index keys too large to fit in RAM?\", \"username\": \"ubarsmx\", \"post_text\": \"It is JOIN(mydataset, myindex, LEFT.key = RIGHT.key)\\nI've never used LOOKUP or ALL with an INDEX, as copying the index parts to each node seems to defeat the purpose of using an index.\", \"post_time\": \"2014-03-04 18:27:11\" },\n\t{ \"post_id\": 5328, \"topic_id\": 1226, \"forum_id\": 8, \"post_subject\": \"Re: Index keys too large to fit in RAM?\", \"username\": \"rtaylor\", \"post_text\": \"Is it a LOOKUP JOIN? If so, then it makes the JOIN operate the same as the ALL option.\", \"post_time\": \"2014-03-04 18:05:27\" },\n\t{ \"post_id\": 5327, \"topic_id\": 1226, \"forum_id\": 8, \"post_subject\": \"Re: Index keys too large to fit in RAM?\", \"username\": \"ubarsmx\", \"post_text\": \"The join isn't an ALL join. Perhaps there's a setting in the environment.xml that determines the maximum memory for each Thor process or maybe there is some overhead that needs to be added in.\", \"post_time\": \"2014-03-04 17:46:08\" },\n\t{ \"post_id\": 5326, \"topic_id\": 1226, \"forum_id\": 8, \"post_subject\": \"Re: Index keys too large to fit in RAM?\", \"username\": \"rtaylor\", \"post_text\": \"
The index is composed of an 8-byte integer key and an 8-byte integer payload.
Since the INDEX is comprised of two binary fields, and we know that compression algorithms don't work as well on binary data as they do on text, I would just multiply the record size (16) times the number of records and use that as a "ballpark figure" for the amount of memory required.\\n16 * 4 = 64 -- then just add the right number of zeroes: \\n400 million = 6.4 Gb\\n4 billion = 64 Gb\\n40 billion = 640 Gb
Since you're running out of memory, and you have a total of 1372 Gb of RAM in your cluster, it sounds to me like you're doing something like an ALL JOIN with the index as the right dataset. So what you're really asking is how much RAM you need on each node to fully contain the index. \\n\\nI can see why the 400 million and 4 billion both work (with 98 Gb RAM on each node) but with 40 billion records, the OS would need to swap to disk, slowing everything down considerably. \\n\\nTherefore, the ALL JOIN (if that's what you're doing) won't scale to 40 billion index entries and you need to find a different solution for that much data. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-03-04 16:23:04\" },\n\t{ \"post_id\": 5323, \"topic_id\": 1226, \"forum_id\": 8, \"post_subject\": \"Re: Index keys too large to fit in RAM?\", \"username\": \"ubarsmx\", \"post_text\": \"The index is composed of an 8-byte integer key and an 8-byte integer payload. System is 14 nodes, 98GB RAM per node.\\nThe real question would be, how do I calculate the approximate amount of memory needed for the index to be entirely cached in memory?\", \"post_time\": \"2014-03-04 15:18:54\" },\n\t{ \"post_id\": 5322, \"topic_id\": 1226, \"forum_id\": 8, \"post_subject\": \"Re: Index keys too large to fit in RAM?\", \"username\": \"rtaylor\", \"post_text\": \"When the index has 40 billion records the join slows to the point that it will take 600+ hours to complete.
What is the RECORD structure of the INDEX, what size cluster are you currently running on, and how much RAM does each node have? \\n\\nRichard\", \"post_time\": \"2014-03-04 15:08:33\" },\n\t{ \"post_id\": 5320, \"topic_id\": 1226, \"forum_id\": 8, \"post_subject\": \"Re: Index keys too large to fit in RAM?\", \"username\": \"bforeman\", \"post_text\": \"Interesting question!\\n\\nI believe that the actual size of the index record also plays a role in how many records can be loaded into memory. If you are working with that many records, you should try to use a non-payload or standard index instead of a payload index. \\n\\nWith that said, the amount of memory that an index will allocate is the size of the index record times the number of actual records, and I'm sure there is a little overhead or boundary in each record.\\n\\nI will check with the development team to see if we have a more exact formula.\\n\\nAlso, one of my colleagues reminded me correctly that all indexes are compressed, \\nThe amount of compression varies depending on the actual data, so a formula can only be approximate and should be based upon a valid sampling of the data. \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-03-04 13:52:17\" },\n\t{ \"post_id\": 5319, \"topic_id\": 1226, \"forum_id\": 8, \"post_subject\": \"Index keys too large to fit in RAM?\", \"username\": \"ubarsmx\", \"post_text\": \"I am working with an index and I am seeing a point where I believe the index keys no longer fit into memory. When the index has 400 million records I can run a join in 0.25 hours, when the index has 4 billion records the join runs in 0.50 hours (looks like O (log N) behavior). When the index has 40 billion records the join slows to the point that it will take 600+ hours to complete.\\n\\nIs there a way to calculate the amount of memory an index will need for it to be cached entirely in memory?\", \"post_time\": \"2014-03-04 12:45:34\" },\n\t{ \"post_id\": 5354, \"topic_id\": 1228, \"forum_id\": 8, \"post_subject\": \"Re: Error while copying from source to destination\", \"username\": \"jeeves\", \"post_text\": \"Richard,\\n\\nEverything appears to be working perfectly including the queries.\\n\\nAnd the dummy RECORD never gets written anywhere since it is only in the INTERFACE. In fact I could have kept the interface clean with no code in it like this. \\n\\n\\nEXPORT FileCopier := INTERFACE\\n\\n\\n EXPORT Copy(STRING source, STRING destination) := FUNCTION\\n \\n \\n RETURN SEQUENTIAL(OUTPUT('Please Override this'));\\n \\n END;\\n \\n\\nEND;\\n
\", \"post_time\": \"2014-03-07 05:27:45\" },\n\t{ \"post_id\": 5352, \"topic_id\": 1228, \"forum_id\": 8, \"post_subject\": \"Re: Error while copying from source to destination\", \"username\": \"rtaylor\", \"post_text\": \"David,\\n\\nMy concern is that the DFU keeps the metadata about logical files written within the HPCC, and that includes your "dummy" RECORD structure. So have you tested that the resulting logical file is still usable in the ECL code that queries the superfile it becomes part of?\\n\\nRichard\", \"post_time\": \"2014-03-06 16:02:00\" },\n\t{ \"post_id\": 5351, \"topic_id\": 1228, \"forum_id\": 8, \"post_subject\": \"Re: Error while copying from source to destination\", \"username\": \"jeeves\", \"post_text\": \"Richard,\\n\\nWe were able to use the OUTPUT workaround and resolve this.\\n\\nSince the client is unaware of the superfile name and the intricacies of rollup it cannot pass in the correct dataset. All that the client does is do something like\\n\\nrollup.write(someDataset);\\n\\nSo we defined an interface called FileCopier\\n\\n\\nEXPORT FileCopier := INTERFACE\\n\\n\\tSHARED DummyRec := RECORD\\n\\t\\n\\t\\tSTRING hello;\\n\\t\\t\\n\\tEND;\\n\\n\\tEXPORT Copy(STRING source, STRING destination) := FUNCTION\\n\\t\\n\\t\\tcontents := DATASET(source,DummyRec,THOR);\\n\\t\\t\\n\\t\\tRETURN SEQUENTIAL(OUTPUT(contents,,destination,THOR,OVERWRITE));\\n\\t\\t\\n\\tEND;\\n\\t\\n\\nEND;\\n
\\n\\nAnd then had the client pass in an concrete implementation of that. There might be better ways of doing this, but this worked.\\n\\nConcrete copier:\\n\\n\\nIMPORT MutableFile;\\n\\nEXPORT MessageCopier := MODULE(MutableFile.FileCopier)\\n\\n\\t\\n\\tEXPORT Copy(STRING source, STRING destination) := FUNCTION\\n\\t\\n\\t\\tcontents := DATASET(source,$.DataStructure.MessageRec,THOR);\\n\\t\\t\\n\\t\\tRETURN OUTPUT(contents,,destination,THOR);\\n\\t\\t\\n\\t\\t\\n\\tEND;\\n\\nEND;\\n
\\n\\nThe actual copying happens like this:\\n\\n\\n\\tEXPORT DoHourlyRollup() := FUNCTION\\n\\t\\n\\t\\trolledUpFileName := GetHourlyRollupFileName();\\n\\t\\tact := SEQUENTIAL\\t(\\n\\t\\t\\t\\t\\t\\t\\t\\t//Copy Hour SF as Hour LF\\n\\t\\t\\t\\t\\t\\t\\t\\tcopier.copy(kCurrentHourSF,rolledUpFileName);\\n.\\n.\\n.\\n
\", \"post_time\": \"2014-03-06 06:26:18\" },\n\t{ \"post_id\": 5349, \"topic_id\": 1228, \"forum_id\": 8, \"post_subject\": \"Re: Error while copying from source to destination\", \"username\": \"rtaylor\", \"post_text\": \"Dan, that discussion relates to this JIRA issue: https://track.hpccsystems.com/browse/HPCC-3195 which is still showing as unresolved. So we need to find a workaround for these guys.If we change the code to use OUTPUT, the client code will also have to pass the record structure in some way - would that be possible?
David, in order to actually use the superfile in any other ECL code, there must be a DATASET definition somewhere (with its requisite RECORD structure); otherwise you can't write any ECL code to query that superfile.\\n\\nTherefore, you need to have that DATASET definition name passed to your CopyFile, which now just becomes a FUNCTIONMACRO to accomplish the OUTPUT appropriately, like this:CopyFile(Source,Destination) := FUNCTIONMACRO\\n RETURN OUTPUT(Source,,Destination,OVERWRITE);\\nENDMACRO;
\\nWhich you can then call like this:\\nSomeFile1 := DATASET([{'A'},{'B'},{'C'},{'D'},{'E'},\\n {'F'},{'G'},{'H'},{'I'},{'J'},\\n {'K'},{'L'},{'M'} ,{'N'},{'O'},\\n {'P'},{'Q'},{'R'},{'S'},{'T'},\\n {'U'},{'V'},{'W'},{'X'},{'Y'}],\\n\\t\\t\\t\\t\\t{STRING1 Letter});\\n\\t\\t\\t\\t\\t\\nMyRec := RECORD\\n STRING1 Value1;\\n STRING1 Value2;\\nEND;\\n\\nSomeFile2 := DATASET([{'C','G'},\\n {'C','C'},\\n {'A','X'},\\n {'B','G'},\\n {'A','B'}],MyRec);\\t\\t\\t\\nCopyFile(Somefile1,'~RTTEST::TST::Destination1');\\nCopyFile(Somefile2,'~RTTEST::TST::Destination2');
\\nThis way, your actual rollup code stays "RECORD structure agnostic" and your caller just needs to pass the DATASET defintion name instead of the superfile's logical filename.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-03-05 15:15:27\" },\n\t{ \"post_id\": 5346, \"topic_id\": 1228, \"forum_id\": 8, \"post_subject\": \"Re: Error while copying from source to destination\", \"username\": \"DSC\", \"post_text\": \"I ran into this before as well. Here's a forum post for it: http://hpccsystems.com/bb/viewtopic.php?t=488&p=2237\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2014-03-05 13:11:50\" },\n\t{ \"post_id\": 5342, \"topic_id\": 1228, \"forum_id\": 8, \"post_subject\": \"Re: Error while copying from source to destination\", \"username\": \"jeeves\", \"post_text\": \"Richard,\\n\\nViswa is referring to the fact that the OUTPUT only takes a DATASET as the first parameter. The STD.File.Copy function takes a superfile name(STRING). We(I am working in the same project as well) are being forced to use the Copy function because we do not "know" the RECORD structure of the data behind the superfile. \\n\\nThe whole code is inside a reusable(hopefully) "Rollup" MODULE and we are trying to keep the code RECORD structure agnostic. \\n\\nIf we change the code to use OUTPUT, the client code will also have to pass the record structure in some way - would that be possible?\\n\\n\\n\\nThanks,\\n-David\", \"post_time\": \"2014-03-05 06:31:35\" },\n\t{ \"post_id\": 5340, \"topic_id\": 1228, \"forum_id\": 8, \"post_subject\": \"Re: Error while copying from source to destination\", \"username\": \"rtaylor\", \"post_text\": \"ksviswa,\\n\\nYes, you are incorrect. \\n\\nThe first parameter to OUTPUT can simply be the superfile, just like with Copy. This is discussed in the Programmer's Guide article Creating and Maintaining Superfiles -- specifically in the section titled Data Consolidation. My version of your code should work better than the code you already had, since it should NOT get the error you've been seeing. If it does, then there's some other problem at work here.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-03-04 21:00:07\" },\n\t{ \"post_id\": 5339, \"topic_id\": 1228, \"forum_id\": 8, \"post_subject\": \"Re: Error while copying from source to destination\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nI tried to use copy because its faster than using output. I hope so...
\\n\\nOutput i need to read the entire hourly superfile in to a recordset and then write it to a file, this could downgrade the performance if the size of the file increases each time. Using copy i could just use the name of the superfile and copy it as a new logical file.\\n\\nCorrect me incase i am wrong.\\n\\nNot sure if there is a change in the output syntax.\\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2014-03-04 19:54:17\" },\n\t{ \"post_id\": 5338, \"topic_id\": 1228, \"forum_id\": 8, \"post_subject\": \"Re: Error while copying from source to destination\", \"username\": \"rtaylor\", \"post_text\": \"ksviswa,\\n\\nI would re-write your code to this:
SHARED BOOLEAN CopyFile(STRING Source,STRING Destination) := \\n OUTPUT(Source,,Destination,OVERWRITE);\\n\\nAddSFAct(STRING SFName, STRING partName) := STD.File.AddSuperFile(SFName,partName);\\n\\nrolledUpFileName := GetHourlyRollupFileName(); // get some new file name\\n\\nSEQUENTIAL(\\n//Copy Hour SF as Hour LF \\n CopyFile(kCurrentHourSF,rolledUpFileName), \\n\\n//Add Hour LF to Day SF\\n AddSFAct(kCurrentDaySF,rolledUpFileName),\\n \\n//Clear Hour SF\\n STD.File.ClearSuperFile(kCurrentHourSF,TRUE),\\n);
Using a FUNCTION structure as a wrapper around a single expression or action is just doing too much typing -- there is no benefit to it.\\n\\nNote also that instead of the STD.File.Copy() function, I'm using OUTPUT to accomplish your rollup. The fact that the failure happens when the file size gets around 2Gb sounds suspiciously like a file size limitation in the Copy function code that is definitely NOT present in OUTPUT.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-03-04 19:37:46\" },\n\t{ \"post_id\": 5336, \"topic_id\": 1228, \"forum_id\": 8, \"post_subject\": \"Re: Error while copying from source to destination\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nThe hourly rollup code and the copy file logic.\\n\\nCopy Logic :\\n\\nSHARED BOOLEAN CopyFile(STRING Source,STRING Destination, BOOLEAN AsSuperFile) := FUNCTION\\n\\nAct\\t:= STD.File.Copy(Source,'',Destination,,-1,,,allowoverwrite:=TRUE,AsSuperfile:=AsSuperFile);\\n\\nRETURN WHEN(TRUE,Act);\\nEND;\\n
\\n\\nAdd Superfile Logic :\\n\\nAddSFAct(STRING SFName, STRING partName) := FUNCTION\\n\\nact := STD.File.AddSuperFile(SFName,partName);\\nRETURN act;\\n
\\n\\nHourly Rollup Logic :\\t\\n\\nDoHourlyRollup() := FUNCTION\\n\\t\\nrolledUpFileName := GetHourlyRollupFileName(); // get some new file name\\n\\nact := SEQUENTIAL(\\n//Copy Hour SF as Hour LF\\t\\t\\t\\t\\t\\tCopyFile(kCurrentHourSF,rolledUpFileName,FALSE),\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n//Add Hour LF to Day SF\\nAddSFAct(kCurrentDaySF,rolledUpFileName),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\n//Clear Hour SF\\nSTD.File.ClearSuperFile(kCurrentHourSF,TRUE),\\n);\\n\\nRETURN act;\\nEND;\\n
\\n\\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2014-03-04 18:59:04\" },\n\t{ \"post_id\": 5332, \"topic_id\": 1228, \"forum_id\": 8, \"post_subject\": \"Re: Error while copying from source to destination\", \"username\": \"rtaylor\", \"post_text\": \"ksviswa,\\n\\nWhat does your rollup code look like?\\n\\nRichard\", \"post_time\": \"2014-03-04 18:38:40\" },\n\t{ \"post_id\": 5329, \"topic_id\": 1228, \"forum_id\": 8, \"post_subject\": \"Error while copying from source to destination\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nI have a scenario where i do a hourly rollup , daily rollup etc.\\n\\nThe rollup logic works perfectly well for small sets of data.\\n\\nIncase the dataset size increases more than 1 or 2 GB rollup fails and i get the following error :\\n\\n\\n"Source file format is not specified or is unsuitable for (re-)partitioning"\\n
\\nSteps :\\n\\n1.) Create a hour sf\\n2.) After a hour do a hourly rollup , copy the hourly superfile to a new file and add the same in to a new superfile and clear the hour sf.\\n\\nEncounter this error when i try to do a copy using "std.file.copy" and when the hour superfile size is comparatively large ( around 1 GB to 2 GB )\\n\\nDo we need to change any settings in the environment xml to increase the memory size for copy..?\\n\\nI use the following version : community_4.0.2-2\\n\\nKindly suggest.\\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2014-03-04 18:26:13\" },\n\t{ \"post_id\": 6814, \"topic_id\": 1231, \"forum_id\": 8, \"post_subject\": \"Re: Is there a nosplit option for a remote Std.File.Copy ?\", \"username\": \"Allan\", \"post_text\": \"Thanks very much dustinskaggs\\n\\nYour post has helped out immensely.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2015-01-20 16:53:22\" },\n\t{ \"post_id\": 5366, \"topic_id\": 1231, \"forum_id\": 8, \"post_subject\": \"Re: Is there a nosplit option for a remote Std.File.Copy ?\", \"username\": \"dustinskaggs\", \"post_text\": \"I've always just made FileSpray soapcalls from ECL code to do copies in order to have access to all of the available options, including nosplit. Below is some example code for making the soapcall but I'm not sure I ever tested it on OSS.\\n\\nimport ClusterInfo;\\n// TODO: don't use ClusterInfo, get url from parm defaulted to GETENV('ws_fs_server')\\n// TODO: make defaults the same as the normal call to fCopy\\nimport lib_system;\\n\\nurl := ClusterInfo.Systems.espsmc_url +'/FileSpray?ver_=1.03';\\nsrvc := 'Copy';\\nlocalDali := ClusterInfo.Systems.Dali;\\n\\n// Returns the dfu workunit that was created.\\nexport fCopy(string sourceName,\\n string destinationGroup,\\n string destinationName,\\n string sourceDali = localDali,\\n integer timeout = -1, // not using\\n string espServerIpPort = lib_system.ws_fs_server, // not using\\n integer maxConnection = 200,\\n boolean allowOverwrite = false,\\n boolean replicate = false,\\n boolean asSuperFile = false,\\n boolean noSplit = false,\\n integer transferBufferSize = 100000000) := soapcall(\\n\\t\\turl,\\n\\t\\tsrvc,\\n\\t\\t{string sourceLogicalName {maxlength(512), xpath('sourceLogicalName')} := sourceName,\\n\\t\\tstring sourceDali {maxlength(30), xpath('sourceDali')} := sourceDali,\\n\\t\\tstring destGroup {maxlength(100), xpath('destGroup')} := destinationGroup,\\n\\t\\tstring destLogicalName {maxlength(512), xpath('destLogicalName')} := destinationName, \\n\\t\\tinteger maxConnections {xpath('maxConnections')} := maxConnection, \\n\\t\\tinteger bufferSize {xpath('transferBufferSize')} := transferBufferSize, \\n\\t\\t// nosplit is needed to copy a dataset to a smaller thor\\n\\t\\tboolean nosplitOpt {xpath('nosplit')} := noSplit,\\n\\t\\tboolean doOverwrite {xpath('overwrite')} := allowOverwrite, \\n\\t\\tboolean supercopy {xpath('superCopy')} := asSuperFile}, \\n\\t\\t{string result {maxlength(30), xpath('result')}}, xpath('CopyResponse'),\\n\\t\\ttimeout(0), retry(0), literal\\n).result; // result contains the dfu workunit
\", \"post_time\": \"2014-03-10 14:13:43\" },\n\t{ \"post_id\": 5363, \"topic_id\": 1231, \"forum_id\": 8, \"post_subject\": \"Re: Is there a nosplit option for a remote Std.File.Copy ?\", \"username\": \"sameermsc\", \"post_text\": \"Hi,\\n\\nNot sure about the correct answer, here is what i think are the possible alternatives (may be a temporary solution)\\n1) Despray the data and then spray it on to the target cluster\\n2) As you have already mentioned that the DFUPlus and the ESP File Copy UI has the required options to do this (manually test and confirm the correctness), you can use the command line client tools and call the required command from a Java code which is embedded/called into/from ECL\\n\\nRegards,\\nSameer\", \"post_time\": \"2014-03-10 09:27:42\" },\n\t{ \"post_id\": 5356, \"topic_id\": 1231, \"forum_id\": 8, \"post_subject\": \"Is there a nosplit option for a remote Std.File.Copy ?\", \"username\": \"drealeed\", \"post_text\": \"I'm trying to use Std.File.Copy to copy a file from a remote (8-node) thor cluster to a 1-node thor cluster.\\n\\nWhen I do this, I get the error: "Source file format is not specified or is unsuitable for (re-)partitioning".\\n\\nI did some research and found that this can happen when copying a variable length record (i.e., one with child datasets) from a multi-node cluster to a single-node thor. DFUPlus and the ESP File Copy UI have the nosplit option, which eliminates the problem; but there's no nosplit parameter for the File.Copy.\\n\\nIs there a way to do this programatically from ECL?\", \"post_time\": \"2014-03-07 19:22:21\" },\n\t{ \"post_id\": 5362, \"topic_id\": 1233, \"forum_id\": 8, \"post_subject\": \"Re: Extracting xml data issue in particular format\", \"username\": \"sameermsc\", \"post_text\": \"Hi,\\n\\nCan you go through "Working with XML Data" section of ECLProgrammersGuide\\nhttp://cdn.hpccsystems.com/releases/CE-Candidate-4.2.0/docs/ECLProgrammersGuide-4.2.0-1.pdf\\n\\nThis might help you to create proper records layouts and parse/extract the data in required format\\n\\nRegards,\\nSameer\", \"post_time\": \"2014-03-10 09:17:28\" },\n\t{ \"post_id\": 5359, \"topic_id\": 1233, \"forum_id\": 8, \"post_subject\": \"Extracting xml data issue in particular format\", \"username\": \"gopi\", \"post_text\": \"Hi All,\\n\\nI am facing an issue in extracting data in particular format from HPCC ECL code. Can anyone please help me in extracting the data. I have explained below about the xml, ECL code and the expected output. \\n\\n\\n[color=#4080FF:183bwjkt]I sprayed the following xml file with “product” as a row tag.\\n\\n/* an XML file called "MyFile" contains this XML data:\\n<?xml version="1.0" encoding="UTF-8" standalone="no"?>\\n<Product name="la" >\\n <Line>\\n <Parameter name="hostip">198.185.23.226</Parameter>\\n <Parameter name="identifier">-</Parameter>\\n <Parameter name="userid">-</Parameter>\\n <Parameter name="time">01/Jan/2014:07:38:57 -0500</Parameter>\\n <RequestURI>\\n <Parameter name="httpMethod">GET</Parameter>\\n <Parameter name="url">/abc.watag</Parameter>\\n </RequestURI>\\n </Line>\\n <Line>\\n <Parameter name="hostip">198.185.23.225</Parameter>\\n <Parameter name="identifier">-</Parameter>\\n <Parameter name="userid">-</Parameter>\\n <Parameter name="time">01/Jan/2014:07:39:04 -0500</Parameter>\\n <RequestURI>\\n <Parameter name="httpMethod">GET</Parameter>\\n <Parameter name="url">/xyz.watag</Parameter>\\n </RequestURI>\\n </Line>\\n</Product>\\n*/\\n\\n[color=#4080FF:183bwjkt]And I am reading the xml file using below ECL code, \\n\\nLay_Data := Record,maxlength(5000000)\\n\\tString hostip{xpath('Parameter[@name=\\\\'hostip\\\\']')};\\n\\tString identifier{xpath('Parameter[@name=\\\\'identifier\\\\']')};\\n\\tString userid{xpath('Parameter[@name=\\\\'userid\\\\']')};\\n\\tString time{xpath('Parameter[@name=\\\\'time\\\\']')};\\nEnd;\\nds_data := DATASET('~base::sample.xml',Lay_Data,XML('Product/Line'));\\nOutput(ds_data);\\n\\n[color=#4080FF:183bwjkt]I am getting the below output (all field values in single column - Parameter),\\n\\nParameter\\n198.185.23.226--01/Jan/2014:07:38:57 -0500\\n198.185.23.225--01/Jan/2014:07:39:04 -0500\\n\\n\\n[color=#4080FF:183bwjkt]But I need the output in the below format only (4 column),\\nhostip\\tidentifier\\tuserid\\ttime\\n198.185.23.226\\t-\\t-\\t01/Jan/2014:07:38:57 -0500\\n198.185.23.225\\t-\\t-\\t01/Jan/2014:07:39:04 -0500\\n\\ncan anyone help me in getting the output in the above mentioned format?\\n\\nThanks\", \"post_time\": \"2014-03-10 06:39:16\" },\n\t{ \"post_id\": 5382, \"topic_id\": 1234, \"forum_id\": 8, \"post_subject\": \"Re: Is there any function for Time manipulation?\", \"username\": \"kevinLv\", \"post_text\": \"Thanks alot, it's really helpful.\", \"post_time\": \"2014-03-13 03:08:28\" },\n\t{ \"post_id\": 5365, \"topic_id\": 1234, \"forum_id\": 8, \"post_subject\": \"Re: Is there any function for Time manipulation?\", \"username\": \"DSC\", \"post_text\": \"I wrote a Bundle that had some time-oriented code that you may find interesting. It's up on github: https://github.com/dcamper/ecl-bundles/blob/SysTime/SysTime.ecl.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2014-03-10 13:59:41\" },\n\t{ \"post_id\": 5361, \"topic_id\": 1234, \"forum_id\": 8, \"post_subject\": \"Re: Is there any function for Time manipulation?\", \"username\": \"sameermsc\", \"post_text\": \"Hi Kevin,\\n\\nPer my knowledge, There are now standard library function available to do this, probably you should try writing your own functions using C++ code (BEGINC++)\\n\\nalso check if you can make use of any of the existing functions in "Workunit Services" to do your tasks\\n\\nRegards,\\nSameer\", \"post_time\": \"2014-03-10 09:12:52\" },\n\t{ \"post_id\": 5360, \"topic_id\": 1234, \"forum_id\": 8, \"post_subject\": \"Is there any function for Time manipulation?\", \"username\": \"kevinLv\", \"post_text\": \"In standrad ecl library, I just found date module, do we have any modules for time manipulation, for example \\nGetCurrentTime; AddHours; AddMinutes; GetHourPart; GetMinutesPart; TimeDiffer.... ?\", \"post_time\": \"2014-03-10 07:04:36\" },\n\t{ \"post_id\": 5388, \"topic_id\": 1241, \"forum_id\": 8, \"post_subject\": \"Re: Alternatives to LOOP for sampling with unequal probabili\", \"username\": \"janet.anderson\", \"post_text\": \"Thanks a lot. I will work to incorporate this solution and test with actual data.\\n\\nI had actually gone down a similiar road earlier (see an early version of my code below), but gotten very wrong results. Actually, just perusing your code, I'm not sure how my results veered so wildly. \\n\\n\\nIMPORT ML_Mat.Vec AS Vec;\\n\\nInput_rec := RECORD\\n\\tunsigned8 idl;\\n \\tunsigned2 f1;\\n\\tunsigned2 f2;\\n\\tinteger8 f3;\\nEND;\\n \\nDesiredDist_Rec := RECORD\\n \\tunsigned2 f1;\\n\\tunsigned2 f2;\\n\\tinteger8 f3;\\n\\tinteger8 cnt;\\nEND;\\n\\nInputData := DATASET( '~testdata1k', Input_rec, csv(heading(1), separator(','),quote('"'),TERMINATOR(['\\\\n', '\\\\r\\\\n']), MAXLENGTH(1000)));\\nDesiredDist := DATASET( '~desireddist', DesiredDist_rec, csv(heading(1), separator(','),quote('"'),TERMINATOR(['\\\\n', '\\\\r\\\\n']), MAXLENGTH(1000)));\\nSampleSize := 100;\\n\\nSize := JOIN(InputData, DesiredDist,\\n\\t\\t\\t\\tleft.f1 = right.f1 and\\n\\t\\t\\t\\tleft.f2 = right.f2 and\\n\\t\\t\\t\\tleft.f3 = right.f3,\\n\\t\\t\\t\\tTRANSFORM({InputData.idl, weight := DesiredDist.cnt, integer8 ProgSum, integer8 rnd, integer8 rndid, integer8 select},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tself.idl := left.idl;\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tself.weight := right.cnt;\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tself.ProgSum := 0;\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tself.rnd := RANDOM();\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tself.rndid := 0;\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tself.select := 0),\\n\\t\\t\\t\\tLeft Outer, LOOKUP);\\n\\noutput(Size, named('Size'));\\n\\nrndSortSize := distribute(sort(Size, rnd), hash32(rnd));\\n\\nTransRec := RECORD\\n\\trecordof(rndSortSize);\\nEND;\\n\\nTransRec Cumulative(TransRec l, TransRec r, integer8 C):= TRANSFORM\\n\\t\\t\\tSELF.ProgSum := r.weight + l.progsum;\\n\\t\\t\\tSELF.rndid := C;\\n\\t\\t\\tSELF:=r;\\nEND;\\n\\nProgressiveSum := ITERATE(rndSortSize, Cumulative(LEFT,RIGHT,COUNTER));\\n\\noutput(ProgressiveSum, named('ProgressiveSum'));\\n\\nN:= count(ProgressiveSum);\\nX := max(ProgressiveSum, ProgSum);\\nStep := roundup(X/SampleSize);\\n\\noutput(N, named('N'));\\noutput(X, named('X'));\\noutput(Step, named('Step'));\\n\\nStepVector := Vec.From(Step);\\nStepRandom := PROJECT(StepVector,\\n\\t\\t\\t\\t\\t\\t\\tTRANSFORM({StepVector.x, integer8 rnd},\\n\\t\\t\\t\\t\\t\\t\\tSELF.rnd := RANDOM();\\n\\t\\t\\t\\t\\t\\t\\tSELF := LEFT));\\nMaxRnd := MAX(StepRandom, rnd);\\nStart := MAX(StepRandom(rnd = MaxRnd), x);\\n\\noutput(Start, named('Start'));\\n\\nCutoffVec := Vec.From(SampleSize);\\nCutoff := PROJECT(CutoffVec,\\n\\t\\t\\t\\t\\t\\t\\tTRANSFORM({CutoffVec.x, integer8 k},\\n\\t\\t\\t\\t\\t\\t\\tSELF.k := Start + (left.x-1)*Step;\\n\\t\\t\\t\\t\\t\\t\\tSELF := LEFT));\\n\\noutput(Cutoff, named('Cutoff'));\\n\\ndistCutoff := distribute(sort(Cutoff, k), hash32(k));\\n\\nSetSelect := JOIN(ProgressiveSum, distCutoff,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t left.ProgSum >= right.k and \\n\\t\\t\\t\\t\\t\\t\\t\\t\\t left.ProgSum < (right.k + step),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t transform({recordof(left)}, self.select := right.k; self := left),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t all, local);\\n\\noutput(SetSelect, named('SetSelect'));\\n\\nGrpSelect := TABLE(SetSelect, {select, selectrnd := min(group, rndid)}, select);\\n\\noutput(GrpSelect, named('GrpSelect'));\\n\\nGetIDs := JOIN(ProgressiveSum, GrpSelect,\\n\\t\\t\\t\\t\\tLEFT.rndid = right.selectrnd,\\n\\t\\t\\t\\t\\tTRANSFORM({ProgressiveSum.idl}, SELF := LEFT));\\n\\noutput(GetIDs, named('GetIDs'));\\n
\", \"post_time\": \"2014-03-14 19:54:40\" },\n\t{ \"post_id\": 5387, \"topic_id\": 1241, \"forum_id\": 8, \"post_subject\": \"Re: Alternatives to LOOP for sampling with unequal probabili\", \"username\": \"DSC\", \"post_text\": \"Based more on the graphic you included in your post than anything else, here is a different way to approach the problem:\\n\\nInputRec := RECORD\\n UNSIGNED4 unitNumber;\\n UNSIGNED4 size;\\n UNSIGNED8 progressiveSum := 0;\\nEND;\\n\\n// Recordset with raw data\\nd1 := DATASET\\n (\\n [\\n {1,15},\\n {2,81},\\n {3,26},\\n {4,42},\\n {5,20},\\n {6,16},\\n {7,45},\\n {8,55}\\n ],\\n InputRec\\n );\\n\\n// Add running total\\nd2 := ITERATE\\n (\\n d1,\\n TRANSFORM\\n (\\n InputRec,\\n SELF.progressiveSum := LEFT.progressiveSum + RIGHT.size,\\n SELF := RIGHT\\n )\\n );\\n\\nOUTPUT(d2,NAMED('raw_data'));\\n\\n// Create a recordset containing the selection criteria values\\nCriteriaRec := RECORD\\n UNSIGNED8 n;\\nEND;\\n\\nkOffset := 36;\\n\\nCriteriaRec MakeCriteriaRec (UNSIGNED4 c) := TRANSFORM\\n SELF.n := 100 * (c - 1) + kOffset;\\nEND;\\n\\ns := DATASET(COUNT(d2),MakeCriteriaRec(COUNTER));\\n\\nOUTPUT(s,NAMED('criteria_scratchpad'));\\n\\n// Join the raw data against the selection criteria values, retaining\\n// all matches where the running total exceeds the criteria value\\nj := JOIN\\n (\\n d2,\\n s,\\n LEFT.progressiveSum >= RIGHT.n,\\n ALL\\n );\\n\\nOUTPUT(j,NAMED('joined_data'));\\n\\n// Sort and dedup the data\\njs := SORT(j,n,unitNumber);\\n\\nOUTPUT(js,NAMED('joined_data_sorted'));\\n\\ndd := DEDUP(js,n);\\n\\nOUTPUT(dd,NAMED('final_result'));
\\nThere are a number of OUTPUT() statements in there in order to show what is going on. The actual result winds up in the 'dd' attribute. Hopefully I didn't butcher the creation of the sample data or the selection criteria too much, making either inappropriate as a solution.\\n\\nThere are probably some optimizations that could be applied. Choosing a more appropriate number of selection criteria records to generate, for one. Adjusting the JOIN criteria and options, for another.\\n\\nHope this helps.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2014-03-14 19:03:15\" },\n\t{ \"post_id\": 5386, \"topic_id\": 1241, \"forum_id\": 8, \"post_subject\": \"Alternatives to LOOP for sampling with unequal probabilities\", \"username\": \"janet.anderson\", \"post_text\": \"I need to sample a population, but some records should be weighted such that they are more likely to be selected than others. I am trying to implement this with a LOOP, but I get the following error: \\nError: INTERNAL: Dataset is not active: 'left' (75, 8), 4153\\n\\nI also suspect that implementing this on a large population will not be very efficient. Attached is a pic explaining the general algorithm that I am trying to implement: basically, I have the records sorted in a random order and a field with the cumulative sum of the weights. I want to select the first record that has a cumulative sum greater than or equal to a multiple of the LOOP counter. \\n\\nCan you suggest a way to implement the LOOP or an alternative way to think about the problem?\\n\\n// Similar to Horvitz and Thompson theory for unequal probability sampling without replacement\\n\\nIMPORT ML_Mat.Vec AS Vec;\\n\\nInput_rec := RECORD\\n\\tunsigned8 idl;\\n \\tunsigned2 f1;\\n\\tunsigned2 f2;\\n\\tinteger8 f3;\\nEND;\\n \\nDesiredDist_Rec := RECORD\\n \\tunsigned2 f1;\\n\\tunsigned2 f2;\\n\\tinteger8 f3;\\n\\tinteger8 cnt;\\nEND;\\n\\nInputData := DATASET( '~testdata1k', Input_rec, csv(heading(1), separator(','),quote('"'),TERMINATOR(['\\\\n', '\\\\r\\\\n']), MAXLENGTH(1000)));\\nDesiredDist := DATASET( '~desireddist', DesiredDist_rec, csv(heading(1), separator(','),quote('"'),TERMINATOR(['\\\\n', '\\\\r\\\\n']), MAXLENGTH(1000)));\\nSampleSize := 100;\\n\\nSize := JOIN(InputData, DesiredDist,\\n\\t\\t\\t\\tleft.f1 = right.f1 and\\n\\t\\t\\t\\tleft.f2 = right.f2 and\\n\\t\\t\\t\\tleft.f3 = right.f3,\\n\\t\\t\\t\\tTRANSFORM({InputData.idl, weight := DesiredDist.cnt, integer8 ProgSum, integer8 rnd, integer8 rndid, integer8 select},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tself.idl := left.idl;\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tself.weight := right.cnt;\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tself.ProgSum := 0;\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tself.rnd := RANDOM();\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tself.rndid := 0;\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tself.select := 0),\\n\\t\\t\\t\\tLeft Outer, LOOKUP);\\n\\noutput(Size, named('Size'));\\n\\nrndSortSize := distribute(sort(Size, rnd), hash32(rnd));\\n\\nTransRec := RECORD\\n\\trecordof(rndSortSize);\\nEND;\\n\\nTransRec Cumulative(TransRec l, TransRec r, integer8 C):= TRANSFORM\\n\\t\\t\\tSELF.ProgSum := r.weight + l.progsum;\\n\\t\\t\\tSELF.rndid := C;\\n\\t\\t\\tSELF:=r;\\nEND;\\n\\nProgressiveSum := ITERATE(rndSortSize, Cumulative(LEFT,RIGHT,COUNTER));\\n\\noutput(ProgressiveSum, named('ProgressiveSum'));\\n\\nN:= count(ProgressiveSum);\\nX := max(ProgressiveSum, ProgSum);\\nStep := roundup(X/SampleSize);\\n\\noutput(N, named('N'));\\noutput(X, named('X'));\\noutput(Step, named('Step'));\\n\\nStepVector := Vec.From(Step);\\nStepRandom := PROJECT(StepVector,\\n\\t\\t\\t\\t\\t\\t\\tTRANSFORM({StepVector.x, integer8 rnd},\\n\\t\\t\\t\\t\\t\\t\\tSELF.rnd := RANDOM();\\n\\t\\t\\t\\t\\t\\t\\tSELF := LEFT));\\nMaxRnd := MAX(StepRandom, rnd);\\nStart := MAX(StepRandom(rnd = MaxRnd), x);\\n\\noutput(Start, named('Start'));\\n\\nSetSelect := LOOP(ProgressiveSum,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t LEFT.progsum < (start + COUNTER*step),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t LEFT.progsum >= (start + COUNTER*step),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t PROJECT(ROWS(LEFT),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t TRANSFORM(TransRec,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t SELF.select := COUNTER;\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t SELF := LEFT)));\\n\\noutput(SetSelect, named('SetSelect'));\\n
\", \"post_time\": \"2014-03-14 15:32:46\" },\n\t{ \"post_id\": 5411, \"topic_id\": 1242, \"forum_id\": 8, \"post_subject\": \"Re: JOIN: LOOKUP vs. LOCAL, LOOKUP\", \"username\": \"tlhumphrey2\", \"post_text\": \"I was told that LOOKUP by itself in a JOIN or with LOCAL causes a hash lookup into the right dataset. So, I'm surprised that LOCAL causes the JOIN to run 10 times slower.\\n\\nWhat build are you build running on?\", \"post_time\": \"2014-03-21 19:01:29\" },\n\t{ \"post_id\": 5410, \"topic_id\": 1242, \"forum_id\": 8, \"post_subject\": \"Re: JOIN: LOOKUP vs. LOCAL, LOOKUP\", \"username\": \"jsmith\", \"post_text\": \"What build are you running on?\\nI can't think why the LOCAL variety would be slower - it should be substantially quicker and in a very brief ECL kludged test of my own it was (over 10x quicker). Which is what I'd expect.\\nThe LOCAL variety doesn't need to duplicate the RHS across all slaves.\\n\\n\\nDS1 := DISTRIBUTE(DS0, HASH32(strFunc(line1 + line2, FALSE)));\\n..\\njDS := JOIN(DS1, DS2, strFunc(LEFT.line1 + LEFT.line2, FALSE) = RIGHT.Uncleaned, LOOKUP, LOCAL);\\n
\\n\\nIn the std. (non-local) case - the DISTRIBUTE (of DS0) isn't needed. It _could_ be streamed locally, since the entire global RHS is replicated (in a HT) on all slaves.\\nI wonder if the code optimizer is spotting that and that you don't use the distribution beyond the JOIN and optimizing the DISTRIBUTE away in the non-local case..?\\n\\nCan you attach a full example?\", \"post_time\": \"2014-03-21 18:34:45\" },\n\t{ \"post_id\": 5409, \"topic_id\": 1242, \"forum_id\": 8, \"post_subject\": \"Re: JOIN: LOOKUP vs. LOCAL, LOOKUP\", \"username\": \"tlhumphrey2\", \"post_text\": \"I looked at past emails that our compiler expert sent me regarding ECL problems I've had. Once I used both LOOKUP and LOCAL in a JOIN and our expert told me that LOCAL on a LOOKUP JOIN means the right dataset won’t be fully copied to every node like it normally would be for a LOOKUP JOIN.\\n\\nTherefore, I’m thinking that possibly this may also mean that a hash lookup into the right dataset may not occur, too, which might explain why this JOIN is slower than the one with just LOOKUP.\", \"post_time\": \"2014-03-21 14:27:32\" },\n\t{ \"post_id\": 5408, \"topic_id\": 1242, \"forum_id\": 8, \"post_subject\": \"Re: JOIN: LOOKUP vs. LOCAL, LOOKUP\", \"username\": \"tlhumphrey2\", \"post_text\": \"Above, Richard said that LOOKUP and LOCAL are mutually exclusive, meaning they can't be used together. So, first I'm surprised you didn't get an error for your 1st JOIN (containing both LOOKUP and LOCAL).\\n\\nBut, since you did use them together without getting an error, I'm not sure what the behavior will be. Let me get someone who really knows the compiler to respond to this.\", \"post_time\": \"2014-03-21 14:02:18\" },\n\t{ \"post_id\": 5407, \"topic_id\": 1242, \"forum_id\": 8, \"post_subject\": \"Re: JOIN: LOOKUP vs. LOCAL, LOOKUP\", \"username\": \"oleg\", \"post_text\": \"DS1 := DISTRIBUTE(DS0, HASH32(strFunc(line1 + line2, FALSE)));\\nAnd the I've tried either one of the following:\\njDS := JOIN(DS1, DS2, strFunc(LEFT.line1 + LEFT.line2, FALSE) = RIGHT.Uncleaned, LOOKUP, LOCAL);\\njDS := JOIN(DS1, DS2, strFunc(LEFT.line1 + LEFT.line2, FALSE) = RIGHT.Uncleaned, LOOKUP);\\n\\nThe DS sizes: DS1 ~28 M rec, DS2 ~375 K recs, THOR size is 10 nodes, strFunc – just performs some straight text manipulations\", \"post_time\": \"2014-03-21 13:52:55\" },\n\t{ \"post_id\": 5406, \"topic_id\": 1242, \"forum_id\": 8, \"post_subject\": \"Re: JOIN: LOOKUP vs. LOCAL, LOOKUP\", \"username\": \"tlhumphrey2\", \"post_text\": \"I'd like to see the different JOINs. Can you provide them, please?\\n\\nAnd, for the LOCAL JOIN, how was the two datasets distributed? Were they distributed in the same manner?\\n\\nApproximately how large were the two datasets? And, how many nodes in the thor cluster?\", \"post_time\": \"2014-03-21 12:58:41\" },\n\t{ \"post_id\": 5405, \"topic_id\": 1242, \"forum_id\": 8, \"post_subject\": \"Re: JOIN: LOOKUP vs. LOCAL, LOOKUP\", \"username\": \"rtaylor\", \"post_text\": \"Yes, definitely a valid question. And since I don't know the answer, then someone else will have to respond as to why that is.\\n\\nRichard\", \"post_time\": \"2014-03-21 02:12:10\" },\n\t{ \"post_id\": 5401, \"topic_id\": 1242, \"forum_id\": 8, \"post_subject\": \"Re: JOIN: LOOKUP vs. LOCAL, LOOKUP\", \"username\": \"oleg\", \"post_text\": \"Thanks for clarification, Richard!\\n\\nHowever, that means my original question is valid - why LOCAL performs 10 times worse?\", \"post_time\": \"2014-03-20 10:23:25\" },\n\t{ \"post_id\": 5400, \"topic_id\": 1242, \"forum_id\": 8, \"post_subject\": \"Re: JOIN: LOOKUP vs. LOCAL, LOOKUP\", \"username\": \"rtaylor\", \"post_text\": \"Oleg,\\n\\nNo, but thanks for bringing that up. The docs are no longer on my plate and I obviously didn't get the bandwidth to update them with the information in that thread before it slipped off my radar.\\n\\nThanks,\\n\\nRichard\", \"post_time\": \"2014-03-20 05:57:52\" },\n\t{ \"post_id\": 5396, \"topic_id\": 1242, \"forum_id\": 8, \"post_subject\": \"Re: JOIN: LOOKUP vs. LOCAL, LOOKUP\", \"username\": \"oleg\", \"post_text\": \"So should I disregard this:\\n\\nviewtopic.php?f=8&t=584&p=2740&hilit=lookup&sid=7c1d961c4cac6ddba55203dacb423265#p2741\\n\\nPlease also note that graphs actually show that 'Local Lookup Join' has been executed.\", \"post_time\": \"2014-03-19 09:42:44\" },\n\t{ \"post_id\": 5394, \"topic_id\": 1242, \"forum_id\": 8, \"post_subject\": \"Re: JOIN: LOOKUP vs. LOCAL, LOOKUP\", \"username\": \"rtaylor\", \"post_text\": \"Oleg,\\n\\nThe JOIN docs say: The following options are mutually exclusive and may only be used to the exclusion of the others in this list: PARTITION LEFT | PARTITION RIGHT | [MANY] LOOKUP | GROUPED | ALL | NOSORT | HASH\\n\\nIn addition to this list, the KEYED and LOCAL options are also mutually exclusive with the options listed above, but not to each other. When both KEYED and LOCAL options are specified, only the INDEX part(s) on each node are accessed by that node.
(emphasis added)\\n\\nSo at one time I had been informed that LOCAL and LOOKUP were mutually exclusive options. Therefore, it does not surprise me that LOCAL,LOOKUP is slower than LOOKUP alone. I'm a bit surprised that LOCAL, LOOKUP is supported at all.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-03-19 06:50:33\" },\n\t{ \"post_id\": 5391, \"topic_id\": 1242, \"forum_id\": 8, \"post_subject\": \"JOIN: LOOKUP vs. LOCAL, LOOKUP\", \"username\": \"oleg\", \"post_text\": \"I expected about the same kind of performance out of this two join types - hash table access should not depend too much of the table size. \\nProbably LOCAL may be slightly better since the whole data copy does not need to be distributed onto each node.\\n\\nHowever, to my surprise, the LOCAL join performed almost 10 times slower! (~16 sec vs. 1.9 sec).\\n\\nThe DS sizes: LEFT ~28 M rec, RIGHT ~375 K recs, THOR size is 10 nodes.\", \"post_time\": \"2014-03-18 17:45:05\" },\n\t{ \"post_id\": 5395, \"topic_id\": 1243, \"forum_id\": 8, \"post_subject\": \"Re: Restriction\", \"username\": \"bforeman\", \"post_text\": \"I'm pretty sure that is controlled by LDAP, the User accounts and permissions. If I am wrong I will post back.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-03-19 08:35:11\" },\n\t{ \"post_id\": 5393, \"topic_id\": 1243, \"forum_id\": 8, \"post_subject\": \"Restriction\", \"username\": \"kumar2k14\", \"post_text\": \"How to restrict the logical file with read only and write ....\\n\\nThanks for your help..\", \"post_time\": \"2014-03-18 23:53:21\" },\n\t{ \"post_id\": 5416, \"topic_id\": 1248, \"forum_id\": 8, \"post_subject\": \"Re: Unsupported activity kind: sequential on 4.2\", \"username\": \"bforeman\", \"post_text\": \"What kind of activity is it pointing to? \\n\\nGavin Halliday posted a blog a few days ago regarding SEQUENTIAL, ORDERED, and PARALLEL. It might give you an insight behind the error:\\n\\nhttp://hpccsystems.com/blog/actions-1-sequential-ordered-parallel\\n\\nIf you are certain that what you are doing is acceptable, you should open an issue on JIRA as soon as possible.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-03-24 13:57:05\" },\n\t{ \"post_id\": 5414, \"topic_id\": 1248, \"forum_id\": 8, \"post_subject\": \"Unsupported activity kind: sequential on 4.2\", \"username\": \"oleg\", \"post_text\": \"We're start getting messages like "Unsupported activity kind: sequential" after switching to 4.2 version. \\nIs there some generic changes in SEQUENTIAL/PARALLEL behavior and what can/cannot be used within such definitions?\", \"post_time\": \"2014-03-24 12:01:43\" },\n\t{ \"post_id\": 5418, \"topic_id\": 1249, \"forum_id\": 8, \"post_subject\": \"Re: STD.File.Copy to copy SuperFiles - Error\", \"username\": \"abhisr\", \"post_text\": \"My err :\\n\\nSTD.File.Copy(srcFile,'',destFile,,-1,,,TRUE,,[color=#FF40FF:2b9kfgi5]TRUE);\\n\\nshould have set asSuperfile (last param) to TRUE\", \"post_time\": \"2014-03-24 15:05:04\" },\n\t{ \"post_id\": 5417, \"topic_id\": 1249, \"forum_id\": 8, \"post_subject\": \"STD.File.Copy to copy SuperFiles - Error\", \"username\": \"abhisr\", \"post_text\": \"\\nsrcFile := '~thor::qc::aut::keyscompare::2014:;testsrc';\\n\\ndestFile := 'thor::qc::aut::keyscompare::2014::analytics_dup';\\nSTD.File.Copy(srcFile,'thor11',destFile,,-1,,,TRUE,TRUE,FALSE);
\\nI am getting the error Error: \\n[color=#FF0000:25wdueo8]Error: System error: 0: DFUServer Error Failed: Source file format is not specified or is unsuitable for (re-)partitioning (0, 0), 0, \\nwhen i tries to copy the super file.\\n\\nis it possible to copy super file using Std.File.Copy\", \"post_time\": \"2014-03-24 14:24:47\" },\n\t{ \"post_id\": 5446, \"topic_id\": 1254, \"forum_id\": 8, \"post_subject\": \"Re: Using both maxlength and maxcount for sets of strings\", \"username\": \"rtaylor\", \"post_text\": \"Jim,\\n\\nIf your opinion is that they should be enforced, then submitting a JIRA ticket would be your next move. I don't have an opinion on whether they should or should not be. \\n\\nRichards\", \"post_time\": \"2014-04-01 13:07:57\" },\n\t{ \"post_id\": 5444, \"topic_id\": 1254, \"forum_id\": 8, \"post_subject\": \"Re: Using both maxlength and maxcount for sets of strings\", \"username\": \"jwilt\", \"post_text\": \"Hmm... should these be enforced, in your example?\\nThanks again.\", \"post_time\": \"2014-04-01 04:49:54\" },\n\t{ \"post_id\": 5430, \"topic_id\": 1254, \"forum_id\": 8, \"post_subject\": \"Re: Using both maxlength and maxcount for sets of strings\", \"username\": \"rtaylor\", \"post_text\": \"Jim,\\n\\nThis syntax works on my machine:
MyStringType := STRING{MAXLENGTH(10)};\\n\\nrec := RECORD\\n SET OF MyStringType mySet {MAXCOUNT(10)};\\nEND;\\n\\nds := DATASET([{['a','bcd','ef','ghi','jklmnopqrstuvwxyz']},\\n {['1','2','3','4','5','6','7','8','9','10','11']}],rec);\\n\\nOUTPUT(DS);
Although you will note when you run it that neither of the constraints is enforced in this example. Whether that's a problem or not ...\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-03-26 20:47:24\" },\n\t{ \"post_id\": 5428, \"topic_id\": 1254, \"forum_id\": 8, \"post_subject\": \"Using both maxlength and maxcount for sets of strings\", \"username\": \"jwilt\", \"post_text\": \"Is there some way to get the effect of the following pseudo-code?\\n\\nl := RECORD\\n SET of STRING mySet {maxcount(10), maxlength(1000)};\\nEND;\\n\\n...where maxcount indicates the maximum members in the set,\\n...and maxlength indicates the maximum length of any one of the members.\\n\\nIs this as simple as combining maxcount and maxlength? If so, how?\\n\\nWe'd prefer not to use, e.g., STRING1000, since the data is sparse and we only occasionally need large strings.\\n\\nThanks.\", \"post_time\": \"2014-03-26 17:45:26\" },\n\t{ \"post_id\": 5436, \"topic_id\": 1256, \"forum_id\": 8, \"post_subject\": \"Re: Accessing Prev N and Next N records inside Project/Trans\", \"username\": \"rtaylor\", \"post_text\": \"Sameer,\\n\\nAFAIK I do not have access to BUZZ (never heard of it til now ), so you can just email me the code to look at: richard.taylor@lexisnexis.com\\n\\nRichard\", \"post_time\": \"2014-03-27 14:16:23\" },\n\t{ \"post_id\": 5435, \"topic_id\": 1256, \"forum_id\": 8, \"post_subject\": \"Re: Accessing Prev N and Next N records inside Project/Trans\", \"username\": \"sameermsc\", \"post_text\": \"i have a similar layout to hold the current, previous N and next N records, as mentioned in my last reply (i have edited it), the performance is slow on larger dataset (few hundred million records) due to duplication of data.\\n\\nregards,\\nSameer\", \"post_time\": \"2014-03-27 14:11:31\" },\n\t{ \"post_id\": 5434, \"topic_id\": 1256, \"forum_id\": 8, \"post_subject\": \"Re: Accessing Prev N and Next N records inside Project/Trans\", \"username\": \"tlhumphrey2\", \"post_text\": \"Make a dataset whose record structure is like the following:\\n\\nLayout_SlidingWindow:=RECORD\\n unsigned window_id;\\n OriginalLayout original_record;\\n dataset(OriginalLayout) previousN;\\n dataset(OriginalLayout) nextN;\\nEND;\\n\\nWhere each record contains one of your original dataset records (i.e., original_record), a child dataset containing the previous N records for original_record (i.e., previousN), and another child dataset containing the next N records for original_record (i.e., nextN).\", \"post_time\": \"2014-03-27 14:05:29\" },\n\t{ \"post_id\": 5433, \"topic_id\": 1256, \"forum_id\": 8, \"post_subject\": \"Re: Accessing Prev N and Next N records inside Project/Trans\", \"username\": \"sameermsc\", \"post_text\": \"Hi Richard,\\n\\nI have tried to use something similar to what you have suggested.\\nMy requirement is to perform nested looping, and the built in functions i am using to achieve this are not supporting (i get a Runtime Error) the said usage.\\n\\nI have an implementation which works, but the thor timings are too high on larger dataset, so i am looking at other approaches which can improve the performance\\n\\nIf you have access to "BUZZ" kindly share your lexis mail id. i can send you the WU details (working WU and the one which gives runtime exception)\\n\\nRegards,\\nSameer\", \"post_time\": \"2014-03-27 14:00:44\" },\n\t{ \"post_id\": 5432, \"topic_id\": 1256, \"forum_id\": 8, \"post_subject\": \"Re: Accessing Prev N and Next N records inside Project/Trans\", \"username\": \"rtaylor\", \"post_text\": \"Sameer,\\n\\nHow about just doing it like this:
rec := {STRING Ltr};\\nds := DATASET([{'A'},{'B'},{'C'},{'D'},{'E'},\\n {'F'},{'G'},{'H'},{'I'},{'J'},\\n {'K'},{'L'},{'M'},{'N'},{'O'},\\n {'P'},{'Q'},{'R'},{'S'},{'T'},\\n {'U'},{'V'},{'W'},{'X'},{'Y'}],\\n\\t\\t\\t\\t\\t rec);\\n\\nrec XF(rec L, INTEGER C) := TRANSFORM\\n //define the previous records to use\\n P_2 := ds[IF(C IN [1, 2], 0, C-2)];\\n P_1 := ds[IF(C=1, 0, C-1)];\\n\\n //define the next records to use\\n Cds := COUNT(ds);\\n N_1 := ds[IF(C=Cds, 0, C+1)];\\n N_2 := ds[IF(C IN [Cds, Cds-1], 0, C+2)];\\n\\n //do the work\\n SELF.ltr := P_2.ltr + P_1.ltr + L.Ltr + N_1.ltr + N_2.ltr; \\nEND;\\n\\t\\t\\t\\t\\t\\nres := PROJECT(ds, XF(LEFT,COUNTER));\\t\\t\\t\\t\\t\\nres;
This type of code works where N is a fixed number and your "previous" records can be the original records and not the result records from your PROJECT -- which is what your description sounds like you're currently doing. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-03-27 13:48:20\" },\n\t{ \"post_id\": 5431, \"topic_id\": 1256, \"forum_id\": 8, \"post_subject\": \"Accessing Prev N and Next N records inside Project/Transform\", \"username\": \"sameermsc\", \"post_text\": \"I am working on a project where at any point of time i need to have access to current, previous N and Next N available records.\\n\\nTo make life easy i would like to see a special structure/dataset which allows me to access the current, previous N and next N records inside a project/transform.\\n\\nI know there are ways to achieve this behavior, one of them is by transforming 2N + 1 rows data into a single row (new layout to hold all the data under existing/new field names), which i am doing, but it requires few additional steps to transform the data into the required format. This approach slows down the process when the new layout has many columns and lot of data gets duplicated.\\n\\nRegards,\\nSameer\", \"post_time\": \"2014-03-27 08:57:51\" },\n\t{ \"post_id\": 5440, \"topic_id\": 1258, \"forum_id\": 8, \"post_subject\": \"Re: Base32 algorithm in ECL?\", \"username\": \"jwilt\", \"post_text\": \"(And, yes, we're considering just wrapping C++ code with BeginC++... Just thought I'd see if this has already been done...)\", \"post_time\": \"2014-03-28 17:27:23\" },\n\t{ \"post_id\": 5439, \"topic_id\": 1258, \"forum_id\": 8, \"post_subject\": \"Base32 algorithm in ECL?\", \"username\": \"jwilt\", \"post_text\": \"Hey, just checking if anyone has written a base32 algorithm in ECL?\\nOr even in BeginC++.\\nBase64 would also be helpful, if no base32 exists.\\n\\nE.g.:\\nhttp://code.google.com/p/basic-algorith ... 32.c?r=175\\n\\nThanks.\", \"post_time\": \"2014-03-28 17:22:53\" },\n\t{ \"post_id\": 5465, \"topic_id\": 1264, \"forum_id\": 8, \"post_subject\": \"Re: GROUP - unexpected result\", \"username\": \"rtaylor\", \"post_text\": \"Keren,\\n\\nYou're confusing the GROUP() function with the "group by" form of the TABLE function (AKA - crosstab reports) where the GROUP keyword is used in the RECORD structure for the TABLE to specify aggregating by each sub-group (using the COUNT, SUM, MIN, MAX, or AVE functions). This produces one record in the result for each unique value of the "group by" field(s).\\n\\nThe GROUP function will simply sub-group the records in your recordset so that subsequent operations (like SORT or DEDUP) operate separately and independently on each sub group -- it does not aggregate the records. It is used primarily to improve performance on very large datasets by breaking the work up into more manageable chunks. The input recordset usually has to be SORTed so all the records to GROUP together are contiguous. \\n\\nHere's an example of both:
//GROUP keyword usage\\nMyRec := RECORD\\n\\tSTRING1 Value1;\\n\\tSTRING1 Value2;\\n\\tINTEGER1 Value3;\\nEND;\\nSomeFile := DATASET([{'C','G',1},\\n \\t\\t\\t\\t\\t {'C','C',2},\\n \\t\\t\\t\\t\\t {'A','X',3},\\n \\t\\t\\t\\t\\t {'B','G',4},\\n \\t\\t\\t\\t\\t {'A','B',5}],MyRec);\\nMyOutRec := RECORD\\n\\tSomeFile.Value1;\\n\\tGrpCnt := COUNT(GROUP);\\n\\tGrpSum := SUM(GROUP,SomeFile.Value3);\\nEND;\\n\\nMyTable := TABLE(SomeFile,MyOutRec,Value1);\\n\\nOUTPUT(MyTable);\\n\\n//GROUP function usage\\nS := SORT(Somefile, value1);\\nG := GROUP(S, value1);\\n\\nD := DEDUP(G,ABS(LEFT.value3-RIGHT.Value3)=1);\\nOUTPUT(D);\\n//the DEDUP only operates within each sub-group so the \\n// result removes only the "C,C,2" record, since 5-3=2
\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-04-03 13:45:39\" },\n\t{ \"post_id\": 5459, \"topic_id\": 1264, \"forum_id\": 8, \"post_subject\": \"Re: GROUP - unexpected result\", \"username\": \"sameermsc\", \"post_text\": \"Hi Keren,\\n\\nyou have missed the expression in Table syntax\\nhere is the corrected code\\n\\n\\nb := TABLE(ds,{user,ts}, user, ts);\\nc := GROUP(b, user, all);\\n//d := TABLE(c,{user,COUNT(c(timestamp<43200)),COUNT(c(timestamp>=43200))});\\nOUTPUT(c, NAMED('res_7'));
\\n\\nRegards,\\nSameer\", \"post_time\": \"2014-04-03 07:41:01\" },\n\t{ \"post_id\": 5458, \"topic_id\": 1264, \"forum_id\": 8, \"post_subject\": \"GROUP - unexpected result\", \"username\": \"kereno\", \"post_text\": \"Hello,\\n\\nI am executing the following query:\\nb := TABLE(a,{user,timestamp});\\nc := GROUP(b, user, ALL); \\n//d := TABLE(c,{user,COUNT(c(timestamp<43200)),COUNT(c(timestamp>=43200))});\\nOUTPUT(c, NAMED('res_7'));\\n\\nExpecting to see my records grouped-by user name, however the output shows:\\nuser timestamp\\nkeren 44\\nkeren 44\\n\\nFor some reason, the two users named keren weren't grouped together - any clue why?\\n\\nThanks,\\nKeren\", \"post_time\": \"2014-04-03 06:02:05\" },\n\t{ \"post_id\": 5549, \"topic_id\": 1269, \"forum_id\": 8, \"post_subject\": \"Re: What is the difference between Spray and ~file?\", \"username\": \"BenJones\", \"post_text\": \"I'm also looking into the possibility that we could modify the dfuserver to take a URL as a source file string. The idea would be to spray an Internet file or RESTFUL service call directly into the system without first storing it on the landing zone. The main reason for doing it this way is that the file might be too big to store on a single node, including the landing zone.\", \"post_time\": \"2014-04-25 10:54:42\" },\n\t{ \"post_id\": 5531, \"topic_id\": 1269, \"forum_id\": 8, \"post_subject\": \"Re: What is the difference between Spray and ~file?\", \"username\": \"rtaylor\", \"post_text\": \"Sounds to me like you've written some code that might be useful to others, too. \\n\\nYou could contribute it to the community for the benefit of all (this is Open Source, after all). The process is basically: create a JIRA issue, create a GIT branch, check your code into that branch, and submit a Pull request.\", \"post_time\": \"2014-04-22 14:48:15\" },\n\t{ \"post_id\": 5508, \"topic_id\": 1269, \"forum_id\": 8, \"post_subject\": \"Re: What is the difference between Spray and ~file?\", \"username\": \"BenJones\", \"post_text\": \"I discovered that that the ECL Standard Library contains the function:\\n\\n\\nSTD.File.EncodeRfsQuery(server,query)\\n
\\nIt returns a string of the form '~file::server::>query'. I also discovered that the source code for HPCC-Platform contains the source code for creating services that will respond to these. It is found in:\\n\\n\\nHPCC-Platform/dali/rfs\\n
\\n\\nOne of the programs found in subfolders is called "rfsmysql". If run on a landing zone with the appropriate parameters, it allows DATASET's in ECL to send the 'query' to a MySQL server running on a remote computer, provided that the landing zone has access to the Internet.\\n\\nAnother one is called "simplerfs". It allows the DATASET to pass a 'filename' on the landing zone as a query string. It simply opens that file and passes the contents back. Apparently, if EncodeRfsQuery is used in an OUTPUT statement, the "simplerfs" can create files and directories if needed on the landing zone.\\n\\nIt looks like the base classes provided in rfs.cpp and rfs.h anticipate all sorts of operations, as yet undocumented anywhere.\\n\\nI built a variation on this called "rfsinternet" which allows a DATASET to pass a 'URL' as a query string. In this case, it goes out to the Internet and fetches the file referenced in the URL and passes the contents back to the DATASET.\\n\\nOne issue, however, is that if you try to pass '*' or '?' in the 'query', the system complains that the filename cannot be resolved. I was able to work around this by passing '%2a' instead of '*' and '%3f' instead of '?' and then fixing the server code to translate '%xx' back from a hexadecimal number back to a character by interpreting the 'xx' as a hexadecimal number representing the appropriate character.\", \"post_time\": \"2014-04-14 21:36:31\" },\n\t{ \"post_id\": 5483, \"topic_id\": 1269, \"forum_id\": 8, \"post_subject\": \"Re: What is the difference between Spray and ~file?\", \"username\": \"rtaylor\", \"post_text\": \"Then I guess you'll have to submit a feature request with JIRA. I don't know of any way to get data into Thor and not write it to Thor's disks, at the very least.\", \"post_time\": \"2014-04-07 21:04:48\" },\n\t{ \"post_id\": 5482, \"topic_id\": 1269, \"forum_id\": 8, \"post_subject\": \"Re: What is the difference between Spray and ~file?\", \"username\": \"BenJones\", \"post_text\": \"The idea here is to not have copies of the raw data lying around on the landing zone or on the THOR perhaps because they are accessed using some sort of certificate.\", \"post_time\": \"2014-04-07 20:59:24\" },\n\t{ \"post_id\": 5481, \"topic_id\": 1269, \"forum_id\": 8, \"post_subject\": \"Re: What is the difference between Spray and ~file?\", \"username\": \"rtaylor\", \"post_text\": \"Ben,\\n\\nIf it's a disk space issue on the Landing Zone, then I would just write a utility program to get the file, put it on the LZ, spray it to Thor (using DFUplus.exe), and then delete it from the LZ once the spray is complete -- it could also automatically launch any standard job you want to run on the sprayed file (using ecl.exe). \\n\\nIf your LZ box has SSDs, then that whole process should all go fairly quickly too, if speed is also an issue and not just space.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-04-07 20:36:46\" },\n\t{ \"post_id\": 5480, \"topic_id\": 1269, \"forum_id\": 8, \"post_subject\": \"Re: What is the difference between Spray and ~file?\", \"username\": \"BenJones\", \"post_text\": \"I've been asked to look at the possibility of accessing files on the Internet for processing on the HPCC without having to go through the step of pulling them down to a landing zone and then spraying them onto the system. My understanding is that the HPCC accesses everything through internal IP addresses but that only landing zones can be set up to access the internet. My thought was that perhaps "dafilesrv" could be modified to recognize a file string representing a URL and then implement a kind of proxy that would forward the request out to the Internet.\", \"post_time\": \"2014-04-07 20:09:12\" },\n\t{ \"post_id\": 5479, \"topic_id\": 1269, \"forum_id\": 8, \"post_subject\": \"Re: What is the difference between Spray and ~file?\", \"username\": \"rtaylor\", \"post_text\": \"Ben, does that effectively do a spray and despray without storing any intermediate results, while still utilizing the parallelism of the HPCC?
No.\\n\\nIt just does a remote read of the file from the landing zone. Since the landing zone is a single node, all the records get read into node 1 (I just tested this to verify). To achieve parallel execution on all the nodes of whatever process you to run on the file you would need to use the DISTRIBUTE function.\\n\\nHere's the code I ran on a 3-node training cluster that demonstrates this:IMPORT TrainingYourName, STD;\\n\\nds := DATASET('~file::10.173.248.1::mnt::disk1::var::lib::^H^P^C^C^Systems::dropzone::persons',\\n TrainingYourName.File_Persons.Layout,\\n\\t\\t\\t\\t\\t\\t\\tThor);\\n\\nP1 := PROJECT(ds,TRANSFORM({ds.ID, UNSIGNED1 N},\\n SELF.N := STD.system.Thorlib.NODE()+1, \\n SELF := LEFT));\\nP2 := PROJECT(DISTRIBUTE(ds),\\n TRANSFORM({ds.ID, UNSIGNED1 N},\\n SELF.N := STD.system.Thorlib.NODE()+1, \\n SELF := LEFT));\\n\\nCOUNT(P1(N=1)); //841400\\nCOUNT(P1(N=2)); //0\\nCOUNT(P1(N=3)); //0\\n\\nCOUNT(P2(N=1)); //280277\\nCOUNT(P2(N=2)); //280525\\nCOUNT(P2(N=3)); //280598
I also tried writing a new file back to the LZ, and was unsuccessful each time, so I will be submitting a JIRA issue against that problem.\\n\\nSo, what's the real issue here? What are you trying to accomplish with these Landing Zone files?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-04-07 18:49:22\" },\n\t{ \"post_id\": 5478, \"topic_id\": 1269, \"forum_id\": 8, \"post_subject\": \"What is the difference between Spray and ~file?\", \"username\": \"BenJones\", \"post_text\": \"According to the ECL Reference Manual, you can reference a landing zone file using the filename string '~file::ip-address::filename'. When you create a dataset using such a filename string, do some transformations or joins on it, and then output the result to another landing zone file; does that effectively do a spray and despray without storing any intermediate results, while still utilizing the parallelism of the HPCC?\", \"post_time\": \"2014-04-07 16:04:08\" },\n\t{ \"post_id\": 5492, \"topic_id\": 1270, \"forum_id\": 8, \"post_subject\": \"Re: Using local repositories with ECLPLUS.exe\", \"username\": \"tlhumphrey2\", \"post_text\": \"I consulted with experts who really know the clienttools. And, from that consultation, I determined that I need both ECLPLUS and ECL RUN. First, I execute ECL RUN to compile and execute my ECL code. Here is the specific DOS command line:\\n\\necl.exe run -Ic:\\\\myeclcode;d:\\\\mlrepository --target=thor --server=10.239.40.2 --username <myuserid> --password <mypassword> FileOfECLCodeToRun.ecl\\n\\nwhere the –I is an option for the ecl compiler that provides the paths to the 2 code repositories I need. By the way, I needed these because of the IMPORTs in my ECL code named folders (modules) that are in these two directories. -–username provides the userid I use to access the HPCC server (whose IP address is given by –server)(Note. Replace <myuserid> in the above with your userid). And, --password gives the password I use to access the HPCC server (Note. Replace <mypassword> in the above with your password). \\n\\nNext, I execute ECLPLUS to get the timings (I needed the timings for an experiment) from the dump. Here is the specific DOS command line:\\n\\nEclplus.exe action=dump wuid=W20140408-164620 cluster=thor server=10.239.40.2 > dump.txt\\n\\nI got the workunit number from the output of the above run of ecl.exe run. And, I’m redirecting the dump to a file, i.e. dump.txt.\", \"post_time\": \"2014-04-09 13:42:22\" },\n\t{ \"post_id\": 5489, \"topic_id\": 1270, \"forum_id\": 8, \"post_subject\": \"Using local repositories with ECLPLUS.exe\", \"username\": \"tlhumphrey2\", \"post_text\": \"I’m attempting to use ECLPLUS.exe. I have two local code repositories that I want to use with ECLPLUS.exe. The path of these repositories are:\\n\\nC:\\\\d\\\\ecl\\\\ml\\\\ecl\\nC:\\\\users\\\\humphrtl\\\\documents\\\\github\\\\ecl-ml\\n\\nWhen I use IDE, I place these paths in the ECL Folders textbox under the compiler tab of preferences.\\n\\nHow do I reference them when using ECLPLUS.exe?\", \"post_time\": \"2014-04-08 14:20:25\" },\n\t{ \"post_id\": 5621, \"topic_id\": 1273, \"forum_id\": 8, \"post_subject\": \"Re: Questions about ECL publish\", \"username\": \"tlhumphrey2\", \"post_text\": \"Bob,\\n\\nThanks for the response.\\n\\ntim\", \"post_time\": \"2014-05-02 15:18:55\" },\n\t{ \"post_id\": 5614, \"topic_id\": 1273, \"forum_id\": 8, \"post_subject\": \"Re: Questions about ECL publish\", \"username\": \"bforeman\", \"post_text\": \"Hi Tim,\\n\\nThis post has been unanswered for a while, but let me tell you at least what I think I know about query sets.\\n\\nI look at a single "query set" as a pre-compiled published workunit, and a query can be any ECL workunit. Prior to OSS, you had Roxie Config that did the publishing, but now in OSS that is now built into the ECL Watch Workunit Detail Page via the Publish button. Queries can actually be published to THOR, ROXIE or hTHOR, and all of them can be tested using the WS_ECL service.\\n\\nIn the ECL Watch, the Query Set option shows a list of the published workunits, and there you have the ability to delete them or activate/suspend them.\\n\\nPublishing a query is the method of "pushing" or copying a workunit and its associated datasets to ROXIE, but you can now also emulate the same in hTHOR and THOR to test the queries before they go into production.\\n\\nThat's the way I see them \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-05-02 14:25:35\" },\n\t{ \"post_id\": 5500, \"topic_id\": 1273, \"forum_id\": 8, \"post_subject\": \"Questions about ECL publish\", \"username\": \"tlhumphrey2\", \"post_text\": \"What is a query set? And, if you publish a workunit to a query set, how can that query set be used?\", \"post_time\": \"2014-04-10 15:19:08\" },\n\t{ \"post_id\": 5604, \"topic_id\": 1283, \"forum_id\": 8, \"post_subject\": \"Re: Creating Modules - Error 2386\", \"username\": \"micevepay\", \"post_text\": \"I definitely learned that in class. Thank you for your help rtaylor and sorry bforeman for not taking better notes.
\", \"post_time\": \"2014-05-01 15:43:17\" },\n\t{ \"post_id\": 5545, \"topic_id\": 1283, \"forum_id\": 8, \"post_subject\": \"Re: Creating Modules - Error 2386\", \"username\": \"rtaylor\", \"post_text\": \"micevepay,\\n\\nOK, so that's not the issue. I just duplicated your problem by creating a similar MODULE structure definition file, then trying to just hit "Submit" to run a job. I got the same error you did.\\n\\nYour problem is that you "got away" with doing that when your EXPORT definition was a DATASET declaration, because the compiler allowed the DATASET to default as the action (expression) to run, showing you records from that DATASET. As I say in every class I teach -- this is a bad habit to get into. Why? because it only works sometimes.\\n\\nWhen the EXPORT definition is a MODULE structure the compiler cannot simply default to treating that as the action (expression) to run. Therefore, this is one of those "sometimes" when just hitting "Submit" won't work.\\n\\nYou need to open a separate builder window (or a BWR file) and the action (expression) to run will be like this:
IMPORT YourFolderName;\\nYourFolderName.File_publications.File;
HTH,\\n\\nRichard\", \"post_time\": \"2014-04-24 17:54:47\" },\n\t{ \"post_id\": 5544, \"topic_id\": 1283, \"forum_id\": 8, \"post_subject\": \"Re: Creating Modules - Error 2386\", \"username\": \"tlhumphrey2\", \"post_text\": \"The query in which you got this error, what does it look like, i.e the code that calls this module (or file)?\", \"post_time\": \"2014-04-24 17:52:37\" },\n\t{ \"post_id\": 5543, \"topic_id\": 1283, \"forum_id\": 8, \"post_subject\": \"Re: Creating Modules - Error 2386\", \"username\": \"micevepay\", \"post_text\": \"For the second piece of code the file is named the same as the EXPORT. It is named "File_Publications.ecl" which is why I am confused. Or maybe I am misunderstanding what you are saying.\", \"post_time\": \"2014-04-24 17:39:31\" },\n\t{ \"post_id\": 5542, \"topic_id\": 1283, \"forum_id\": 8, \"post_subject\": \"Re: Creating Modules - Error 2386\", \"username\": \"rtaylor\", \"post_text\": \"The name of the .ecl file must exactly match the name of the *one* EXPORT definition it contains (in this case, the MODULE structure). \\n\\nSo, since your first code works well, I presume the file is named "publications.ecl" which, after changing your code to the MODULE structure you posted, now contains the EXPORT File_publications definition, so the EXPORT definition name and the filename are out of synch -- thus the error message.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-04-24 17:00:20\" },\n\t{ \"post_id\": 5541, \"topic_id\": 1283, \"forum_id\": 8, \"post_subject\": \"Creating Modules - Error 2386\", \"username\": \"micevepay\", \"post_text\": \"Hello,\\n\\nI'm trying to create a module and keep getting Error 2386 \\n\\nI am able to bring in my data normally\\n\\nLayout_Publications := RECORD\\n\\tString\\tfilename;\\n\\tSTRING \\txmldata;\\n\\tUNSIGNED8 RecPos{virtual(fileposition)};\\nEND;\\n\\n\\n\\nEXPORT Publications := DATASET('~mike::data::publications::articles::rawxml::publicationsxmlblob', Layout_Publications, FLAT);
\\n\\nBut this is when I get the error\\n\\nEXPORT File_Publications := MODULE;\\n\\n\\tEXPORT Layout := RECORD\\n\\t\\tString\\tfilename;\\n\\t\\tSTRING \\txmldata;\\n\\t\\tUNSIGNED8 RecPos{virtual(fileposition)};\\n\\tEND;\\n\\n\\tEXPORT FILE := DATASET('~mike::data::publications::articles::rawxml::publicationsxmlblob', Layout, FLAT);\\n\\nEND;
\\n\\n"[color=#BF0000:1sgdqysa]Error: Module publicationdata.File_Publications does not EXPORT an attribute main() (1, 0), 2386, ".\\n\\n\\nI've even went back to examples from training manual and I get the same error.\", \"post_time\": \"2014-04-24 15:19:38\" },\n\t{ \"post_id\": 5560, \"topic_id\": 1287, \"forum_id\": 8, \"post_subject\": \"Re: SALT Linking\", \"username\": \"bforeman\", \"post_text\": \"Hi Viswa,\\n\\nThe very best source where linking is explained is in the SALT User's Guide PDF. \\nLook near the end of the PDF, and check out the Record Linking/Clustering article.\\n\\nInternal linking essentially is the process that looks for matching records in the same file or entity.\\n\\nExternal Linking looks at records in a designated related file and links them back to a central base or authority file.\\n\\nCONCEPT simply lets you combine fields that "go together" and can be used to contribute to the overall record linking score. Again, there is a pretty good explanation in the SALT User's Guide.\\n\\nHope this helps!\\n\\nBob\", \"post_time\": \"2014-04-28 17:24:17\" },\n\t{ \"post_id\": 5558, \"topic_id\": 1287, \"forum_id\": 8, \"post_subject\": \"SALT Linking\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nAm very new to SALT and want to know more about internal and external linking.\\n\\nI was going through the SALT Videos for the same.\\n\\nDo we have any other material or link where SALT Linking is explained in a more detailed fashion. ?\\n\\nAlso what is the use of CONCEPT field in the specification file ? Not able to relate the same with the existing examples.\\n\\nEx :\\n\\n\\nCONCEPT:locale:+:zip:state:city:msa:0,0\\nCONCEPT:address:prim_range+:sec_range:prim_name+:zip4:unit_desig:addr_suffix:0,0\\n
\\n\\nWhat does the above statement mean exactly and how we can use it for other entities or other data ?\\n\\nKindly advise.\\n\\nThanks and Regards,\\nViswa\", \"post_time\": \"2014-04-28 16:51:41\" },\n\t{ \"post_id\": 5597, \"topic_id\": 1291, \"forum_id\": 8, \"post_subject\": \"Re: Landing zone file\", \"username\": \"dbang\", \"post_text\": \"Hi Richard\\n\\nThanks for the heads up.\\n\\nKind regards\\nDaniel\", \"post_time\": \"2014-04-30 14:09:48\" },\n\t{ \"post_id\": 5596, \"topic_id\": 1291, \"forum_id\": 8, \"post_subject\": \"Re: Landing zone file\", \"username\": \"rtaylor\", \"post_text\": \"Daniel,\\n\\nFYI, there will be expanded EMBED support coming in release 5.0 (due sometime in the next several weeks or so) that will enable you to simply host your temporary file in, say, MySQL and have your Roxie query directly access that database using embedded SQL code. That would move your temporary file update processes to the MySQL side of things and eliminate any changes to the HPCC side of things (assuming the response times work well for your situation).\\n\\nJust a heads-up,\\n\\nRichard\", \"post_time\": \"2014-04-30 13:58:34\" },\n\t{ \"post_id\": 5587, \"topic_id\": 1291, \"forum_id\": 8, \"post_subject\": \"Re: Landing zone file\", \"username\": \"rtaylor\", \"post_text\": \"Daniel,\\n\\nThen you should just use a Superfile as the temporary file in your query, so you only need to compile the query once. That Superfile will contain a single sub-file (Roxie requires that) so it's only used for indirection. \\n\\nThe easiest way to accomplish this is to have two Roxies that switch off being the active online Roxie.\\n \\nThen you put in place a process to every ten minutes:\\n1. automatically copy the latest temporary file to your offline Roxie \\n2. update the Package file to change the name of the Superfile's sub-file to the new temp file \\n3. tell the load balancer to send all queries to the just-updated Roxie\\n\\nThis scheme has the advantage that all your data for the Roxie queries are always local to the Roxie, making your query response time as fast as possible.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-04-29 15:51:24\" },\n\t{ \"post_id\": 5586, \"topic_id\": 1291, \"forum_id\": 8, \"post_subject\": \"Re: Landing zone file\", \"username\": \"dbang\", \"post_text\": \"Hi Richard\\n\\nI'd say around 10 minutes.\\n\\nKind regards\\nDaniel\", \"post_time\": \"2014-04-29 15:31:08\" },\n\t{ \"post_id\": 5585, \"topic_id\": 1291, \"forum_id\": 8, \"post_subject\": \"Re: Landing zone file\", \"username\": \"rtaylor\", \"post_text\": \"Daniel,\\n\\nWhat is the period of change for this temporary file?\\n\\nRichard\", \"post_time\": \"2014-04-29 15:28:54\" },\n\t{ \"post_id\": 5584, \"topic_id\": 1291, \"forum_id\": 8, \"post_subject\": \"Re: Landing zone file\", \"username\": \"rtaylor\", \"post_text\": \"Daniel,\\n\\nWhat is the period of change for this temporary file?\\n\\nRichard\", \"post_time\": \"2014-04-29 15:28:35\" },\n\t{ \"post_id\": 5583, \"topic_id\": 1291, \"forum_id\": 8, \"post_subject\": \"Re: Landing zone file\", \"username\": \"dbang\", \"post_text\": \"Hi Richard\\n\\nClarity always helps \\n\\nThe parameter works fine but this means that the data has to be passed every time. I was hoping that we could somehow persist the data for that session. So the flow would be to upload the data once for the session and then on-the-fly calculations could be performed with different parameters on the same data. Kinda like persist in thor.\\n\\nKind regards\\nDaniel\", \"post_time\": \"2014-04-29 13:53:56\" },\n\t{ \"post_id\": 5580, \"topic_id\": 1291, \"forum_id\": 8, \"post_subject\": \"Re: Landing zone file\", \"username\": \"rtaylor\", \"post_text\": \"Daniel,\\n\\nIf the temporary dataset is always small, you could just pass it in as a parameter to the Roxie query. \\n\\nWhere is the temporary data originating from? An SQL database, maybe? If so, then you could also consider creating a service in that RDBMS and using SOAPCALL in your Roxie query to get the data.\\n\\nOr ... it all depends on your circumstance
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-04-29 13:46:00\" },\n\t{ \"post_id\": 5578, \"topic_id\": 1291, \"forum_id\": 8, \"post_subject\": \"Re: Landing zone file\", \"username\": \"dbang\", \"post_text\": \"Hi\\n\\nBrief overview:\\nWe have a predefined dataset we are deploying to Roxie. However this needs to be merged with a temporary dataset to calculate accumulations on the fly. So we were looking at ways of doing this. Doesnt necessarily needs to be this function but something along those lines. We have another solution where we post the data to workunit but that would have to be done at every call. I was afraid of going through thor since the calculation can vary depending on the job queue.\\n\\nHope this makes sense.\\n\\nKind regards\\nDaniel\", \"post_time\": \"2014-04-29 13:34:32\" },\n\t{ \"post_id\": 5577, \"topic_id\": 1291, \"forum_id\": 8, \"post_subject\": \"Re: Landing zone file\", \"username\": \"rtaylor\", \"post_text\": \"dbang,\\n\\nIt probably should work. But since it doesn't, you should report the issue in JIRA.\\n\\nHowever, my more fundamental question is why you would want to do that on Roxie at all? The purpose of Roxie is to deliver end-users the fastest possible response to their queries. Therefore, "correct design" for a Roxie query should include having all the data needed to answer the query locally available on the Roxie itself. \\n\\nSo, what are you trying to accomplish?\\n\\nRichard\", \"post_time\": \"2014-04-29 13:26:47\" },\n\t{ \"post_id\": 5576, \"topic_id\": 1291, \"forum_id\": 8, \"post_subject\": \"Landing zone file\", \"username\": \"dbang\", \"post_text\": \"Hi \\n\\nI am trying to access landing zone files directly via\\n‘~file::<LZ-ip>::<path>::<filename>’ using STD.File.ExternalLogicalFileName\\n\\nThis works in THOR but not in Roxie which gives an error (end of post). Question is whether accessing landing zone files are meant to work in Roxie at all or I am doing something wrong?\\n\\nKind regards\\nDaniel\\n\\nRoxie error:\\nError: IPropertyTree: xpath parse error\\nXPath Exception: Qualifier expected e.g. [..]\\nin xpath = ^h^p^c^c^systems::mydropzone::test.txt\\n ^ (0, 0), 1, \\nError: IPropertyTree: xpath parse error\\nXPath Exception: Qualifier expected e.g. [..]\\nin xpath = ^h^p^c^c^systems::mydropzone::test.txt\\n ^ (0, 0), 1,\", \"post_time\": \"2014-04-29 12:13:23\" },\n\t{ \"post_id\": 5603, \"topic_id\": 1293, \"forum_id\": 8, \"post_subject\": \"Re: SALT error\", \"username\": \"ernest_lyubchik\", \"post_text\": \"Thank you, your suggestion worked out the problem!\", \"post_time\": \"2014-05-01 15:37:48\" },\n\t{ \"post_id\": 5599, \"topic_id\": 1293, \"forum_id\": 8, \"post_subject\": \"Re: SALT error\", \"username\": \"bforeman\", \"post_text\": \"Hi Ernest,\\n\\nThe Hygiene module makes a call to MAC_Character_Counts, which is located in your SALTxx repository folder (xx is the version number - i.e., SALT29)\\n\\nGo to line 3 in that file and increase the 256 value: \\n\\n
export MAC_Character_Counts := module\\nshared MaxExamples := 300;\\nshared MaxChars := 256; // Change to allow more than 256 different characters in string
\\n\\nAll the sizes for the profiling output are in the SALTnn. MAC_Character_Counts module. The definition for the Words child dataset uses the MaxChars value to define the maximum number of rows.\\n\\ndataset(Words_Layout) Words {MAXCOUNT(MaxChars)} := dataset([],Words_Layout);
\\n\\nIt sounds like you have some sort of large string field (like an entire document) so you are going to have a lot of words. SALT was really not meant for extremely large text fields, but sometimes it can be made to work. You may not break the row count, but you could still end up exceeding the maximum size of the record. For data profiling, you may have to PROJECT a version of the dataset that contains only the regular types of fields, i.e. eliminates the extremely large text fields.\\n\\nThat should eliminate the DENORMALIZE error. Happy hunting!\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-05-01 13:42:04\" },\n\t{ \"post_id\": 5598, \"topic_id\": 1293, \"forum_id\": 8, \"post_subject\": \"SALT error\", \"username\": \"ernest_lyubchik\", \"post_text\": \"Hi,\\n\\nI am new to SALT , and using a SALT generated hygiene file to profile data. \\nI received the following error:\\n\\nError: System error: 0: Graph[146], denormalize[149]: SLAVE 10.0.9.57:20000: Too many rows assigned to field words(256) [id=149], \\n\\nAfter the workunit run for a long time. It generated some data in the summary. Covering about 7645544, but the all_profiles has no data.\", \"post_time\": \"2014-05-01 09:58:17\" },\n\t{ \"post_id\": 5645, \"topic_id\": 1294, \"forum_id\": 8, \"post_subject\": \"Re: HashMap modeling\", \"username\": \"ernest_lyubchik\", \"post_text\": \"No one encountered this issue?\\n\\nMaybe I am missing some different approach, of how to list O(1) key accessible elements of sparse runtime defined column type in a Recordset form?\", \"post_time\": \"2014-05-06 16:24:22\" },\n\t{ \"post_id\": 5609, \"topic_id\": 1294, \"forum_id\": 8, \"post_subject\": \"HashMap modeling\", \"username\": \"ernest_lyubchik\", \"post_text\": \"Hi,\\n\\nI have data which is currently in RecordSet format. \\nI would like to transform this data into a different format RecordSet, where each record contains a HashMap as one of its fields. The number of keys in the HashMap is determined in runtime, and can be different for each record entry. Naturally, access by key should be O(1). \\nBasically, this is what I would represent in HBase in the form of a column family, and then be able to add columns sparsely, keeping a HashMap access structure. \\nI think Dictionary resembles the desired data structure, but I am not sure how to use it correctly in this case.\\n\\nWhat is the best way to do this in HPCC?\\nHow should I go about transforming an existing RecordSet by taking some of its fields and putting them in the new HashMap (while adding to it additional computed data)?\\nBasically, for record: \\nrecA := RECORD\\n STRING10 x1;\\n STRING10 x2; \\n STRING10 x3;\\nEND;\\n\\nI would like to have:\\nrecB := RECORD\\n STRING10 xyz;\\n HashMap { (x1, (foo1(x1),foo2(x1)), (x3, (foo1(x3),foo2(x3)) ) } //x2 not selected on purpose.\\nEND;\\n\\nThanks,\\nErnest\", \"post_time\": \"2014-05-02 09:18:10\" },\n\t{ \"post_id\": 5637, \"topic_id\": 1295, \"forum_id\": 8, \"post_subject\": \"Re: How to do Index-based DISTRIBUTE\", \"username\": \"rtaylor\", \"post_text\": \"Vijay,Since my Data is huge – 50 GB, I want the Index file to be spread across all Nodes, Instead of occupying a single node within the cluster.
An INDEX built on a 50-node cluster will always have 50 leaf node file parts (and one btree part), unless you specifically use the FEW or WIDTH options on your BUILD. \\n\\nTo test this, I just wrote a dataset to one node, then built an INDEX on that 1-node dataset and the result (on my 3-node training cluster) was an INDEX with 3 leaf node parts and 1 btree part. I want the Index creation process (Creating Index and writing to THOR) to be quick. Now to create Index for 50 GB of data it takes 2 hrs and 30 minutes.
How big is the cluster you're building it on? The easiest way to make this kind of process faster is to throw more hardware at it. For a 50-Gb dataset, I would consider a 50-node cluster to be a good size to run on.Please clarify - When should we use DISTRIBUTE. In the below code I am trying to DISTRIBUTE the dataset based on EmpId and FName_Hash and then I am trying to BUILD Index for it.
EmployeeLayout := RECORD\\nINTEGER EmpId;\\nINTEGER FName_Hash;\\nINTEGER LName_Hash;\\nEND;\\n\\nEmpDS := DATASET('ds::empDetails', {EmployeeLayout, UNSIGNED filepos{virtual(fileposition)}}, FLAT);\\n\\nEMPDS_Dist := DISTRIBUTE(EmpDS , {EmpId,FName_Hash});\\n\\nIDX_EMP_DS := INDEX({INTEGER EmpId, INTEGER FName_Hash}, {EmployeeLayout, UNSIGNED filepos{virtual(fileposition)}}, 'ds:IDX_empDetails');\\n\\nBLD_EMP_DS:= BUILD(EMPDS_Dist , {EmpId, FName_Hash}, {EmpId, FName_Hash, LName_Hash, filepos}, 'ds:IDX_empDetails' );
You have a couple of problems with this code.\\n\\nFirst, the DISTRIBUTE, as written, will not syntax check because the second parameter of DISTRIBUTE must be an integer expression (you're trying to give it a RECORD structure).\\n\\nSecond, your INDEX definition is attempting to duplicate the EmpID and Fname_hash fields in both the key fields and the payload. This is unnecessary. The key field are in each leaf node already so you only need to payload the additional fields. \\n\\nThird, the point of using DISTRIBUTE (in almost all cases) is so you can then use the LOCAL option on subsequent operations. You're not using LOCAL on your BUILD, so the likelihood of this being faster is negligible.\\n\\nI would suggest correcting these problems then running a test to see which way builds the INDEX more quickly on your current hardware. Here's the way I would write it:EmployeeLayout := RECORD\\n INTEGER EmpId;\\n INTEGER FName_Hash;\\n INTEGER LName_Hash;\\nEND;\\n\\nEmpDS := DATASET('ds::empDetails', \\n {EmployeeLayout, UNSIGNED filepos{virtual(fileposition)}}, FLAT);\\n\\nEMPDS_Dist := DISTRIBUTE(EmpDS , EmpId);\\n\\nIDX_EMP_DS_glob := INDEX(EmpDS,{EmpId, FName_Hash}, \\n {EMPS_Dist}, 'ds::IDX_empDetails_glob');\\nIDX_EMP_DS_dist := INDEX(EMPS_Dist,{EmpId, FName_Hash}, \\n {EMPS_Dist}, 'ds::IDX_empDetails_dist');\\n\\nBUILD(IDX_EMP_DS_glob,OVERWRITE);\\nBUILD(IDX_EMP_DS_dist,LOCAL,OVERWRITE);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-05-05 16:00:59\" },\n\t{ \"post_id\": 5627, \"topic_id\": 1295, \"forum_id\": 8, \"post_subject\": \"Re: How to do Index-based DISTRIBUTE\", \"username\": \"VijayaKumar_Dhanasekaran\", \"post_text\": \"Thanks for the reply Bob.\\n\\nRichard, \\n\\nMy requirement is to DISTRIBUTE the Index file which is getting created in THOR. I am trying to read a Dataset (50 GB) from THOR and I am trying to create Index for the same. I am not sure whether the Index which gets created will be DISTRIBUTED by default or should I DISTRIBUTE it explicitly.\\n\\nReason for using DSITRIBUTE while creating Index is \\n\\nSince my Data is huge – 50 GB, I want the Index file to be spread across all Nodes, Instead of occupying a single node within the cluster.\\n\\nI want the Index creation process (Creating Index and writing to THOR) to be quick. Now to create Index for 50 GB of data it takes 2 hrs and 30 minutes.\\n\\n\\nWhich Index creation would be quicker.\\n1.) Creating Index file for Dataset which has been distributed.\\n2.) Creating Index file for Dataset which has been read from THOR directly.\\n\\nPlease clarify - When should we use DISTRIBUTE. In the below code I am trying to DISTRIBUTE the dataset based on EmpId and FName_Hash and then I am trying to BUILD Index for it. \\n\\nEmployeeLayout := RECORD\\nINTEGER EmpId;\\nINTEGER FName_Hash;\\nINTEGER LName_Hash;\\nEND;\\n\\nEmpDS := DATASET('ds::empDetails', {EmployeeLayout, UNSIGNED filepos{virtual(fileposition)}}, FLAT);\\n\\nEMPDS_Dist := DISTRIBUTE(EmpDS , {EmpId,FName_Hash});\\n\\nIDX_EMP_DS := INDEX({INTEGER EmpId, INTEGER FName_Hash}, {EmployeeLayout, UNSIGNED filepos{virtual(fileposition)}}, 'ds:IDX_empDetails');\\n\\nBLD_EMP_DS:= BUILD(EMPDS_Dist , {EmpId, FName_Hash}, {EmpId, FName_Hash, LName_Hash, filepos}, 'ds:IDX_empDetails' );
\\n\\n\\n\\nThanks\\nVijay\", \"post_time\": \"2014-05-05 05:57:24\" },\n\t{ \"post_id\": 5620, \"topic_id\": 1295, \"forum_id\": 8, \"post_subject\": \"Re: How to do Index-based DISTRIBUTE\", \"username\": \"rtaylor\", \"post_text\": \"Vijay,I am trying to DISTRIBUTE the DataSet while creating Index for it.
Why? What advantage do you expect this DISTRIBUTE to give you?\\n\\nIt does not matter how the DATASET is distributed, the INDEX will end up the same either way. All you need to do is this to accomplish what you want:EmployeeLayout := RECORD\\n INTEGER EmpId;\\n INTEGER FName_Hash;\\n INTEGER LName_Hash;\\nEND;\\n\\nEmpDS := DATASET('ds::empDetails', {EmployeeLayout, UNSIGNED filepos{virtual(fileposition)}}, FLAT);\\n\\nIDX_EMP_DS := INDEX(EmpDS,{EmpId, FName_Hash}, {EmpDS}, 'ds:IDX_empDetails');\\n\\nBLD_EMP_DS:= BUILD(IDX_EMP_DS);
This INDEX will have leaf nodes sorted by EmpID and Fname_Hash, every other field in the DATASET will be the payload in each leaf node record, and the binary tree will be built on the two key fields. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-05-02 15:14:18\" },\n\t{ \"post_id\": 5619, \"topic_id\": 1295, \"forum_id\": 8, \"post_subject\": \"Re: How to do Index-based DISTRIBUTE\", \"username\": \"bforeman\", \"post_text\": \"Yeah, DISTRIBUTE needs an INDEX definition, not field names \\n\\nThis works:\\n\\n
BLD_EMP_DS:= BUILD(EmpDS, {EmpId, FName_Hash}, {EmpId, FName_Hash, LName_Hash, filepos}, 'ds:IDX_empDetails' , DISTRIBUTE(IDX_EMP_DS));
\\n\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-05-02 15:08:59\" },\n\t{ \"post_id\": 5617, \"topic_id\": 1295, \"forum_id\": 8, \"post_subject\": \"Re: How to do Index-based DISTRIBUTE\", \"username\": \"VijayaKumar_Dhanasekaran\", \"post_text\": \"Thanks for the reply Bob. Now I understood how Index based DISTRIBUTE works.\\n\\nI am trying to DISTRIBUTE the DataSet while creating Index for it. \\n\\nPlease find the below code for your reference.\\n\\nIn the below code I have given DISTRIBUTE based on EmpId and FName_Hash while building the Index. but I am getting the below error.\\n\\nError: syntax error near "EmpId" : expected RANGE, ROWSET, SELF, SUCCESS, datarow, dataset, dictionary, module-name, identifier, identifier, function-name, identifier, macro-name, '^', '(', '[' (12, \\n\\nHow to overcome this error. Should I specify the DataType for the fields on which I created the Index.\\n\\n\\nEmployeeLayout := RECORD\\nINTEGER EmpId;\\nINTEGER FName_Hash;\\nINTEGER LName_Hash;\\nEND;\\n\\nEmpDS := DATASET('ds::empDetails', {EmployeeLayout, UNSIGNED filepos{virtual(fileposition)}}, FLAT);\\n\\nIDX_EMP_DS := INDEX({INTEGER EmpId, INTEGER FName_Hash}, {EmployeeLayout, UNSIGNED filepos{virtual(fileposition)}}, 'ds:IDX_empDetails');\\n\\nBLD_EMP_DS:= BUILD(EmpDS, {EmpId, FName_Hash}, {EmpId, FName_Hash, LName_Hash, filepos}, 'ds:IDX_empDetails' , DISTRIBUTE(EmpId,FName_Hash));\\n\\n
\\n\\n\\nThanks\\nVijay\", \"post_time\": \"2014-05-02 14:59:51\" },\n\t{ \"post_id\": 5613, \"topic_id\": 1295, \"forum_id\": 8, \"post_subject\": \"Re: How to do Index-based DISTRIBUTE\", \"username\": \"bforeman\", \"post_text\": \"Hi Vijay,\\n\\nThe docs look pretty good here:\\n\\nDISTRIBUTE(recordset, index [, joincondition ] )\\n\\nThis form redistributes the recordset based on the existing distribution of the specified index, where the linkage between the two is determined by the joincondition. Records for which the joncondition is true will end up on the same node.\\n\\n
\\nExample Code:\\nmainTable := DATASET('~keyed.d00',mainRecord,THOR);\\nnameKey := INDEX(mainTable, {surname,forename,filepos}, 'name.idx');\\nincTable := DATASET('~inc.d00',mainRecord,THOR);\\nx := DISTRIBUTE(incTable, nameKey,\\n LEFT.surname = RIGHT.surname AND\\n LEFT.forename = RIGHT.forename);\\nOUTPUT(x);
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-05-02 13:27:24\" },\n\t{ \"post_id\": 5611, \"topic_id\": 1295, \"forum_id\": 8, \"post_subject\": \"How to do Index-based DISTRIBUTE\", \"username\": \"VijayaKumar_Dhanasekaran\", \"post_text\": \"Hi Team,\\n\\nI have done two types of DISTRIBUTE\\n\\nUsing RANDOM:\\nDISTRIBUTE(Dataset, RANDOM())\\n\\nUsing HASH32 on a specific field:\\nDISTRIBUTE(Dataset, HASH32('Name'));\\n\\nI need an example for Index-Based DISTRIBUTE. \\n\\nThanks\\nVijay\", \"post_time\": \"2014-05-02 11:58:51\" },\n\t{ \"post_id\": 5643, \"topic_id\": 1296, \"forum_id\": 8, \"post_subject\": \"Re: Lock Error when try to add a file to an existing SuperKe\", \"username\": \"bforeman\", \"post_text\": \"Hi Arjun,\\n\\nDoes the XREF utilty in the ECL Watch tell you anything about that superkey?\\nCheck with your system administrator before running any report as they warn you that it will strain the servers. \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-05-06 11:54:25\" },\n\t{ \"post_id\": 5641, \"topic_id\": 1296, \"forum_id\": 8, \"post_subject\": \"Re: Lock Error when try to add a file to an existing SuperKe\", \"username\": \"ArjunKumar\", \"post_text\": \"Hi Bob,\\n\\nThere is no issue when publishing the query to THOR, we can able to add/remove the sub key with THOR. But when it executes in ROXIE we are facing the issue. Even though we are not published it, we are facing the issue.\\n\\nThanks,\\nArjun\", \"post_time\": \"2014-05-06 09:54:40\" },\n\t{ \"post_id\": 5638, \"topic_id\": 1296, \"forum_id\": 8, \"post_subject\": \"Re: Lock Error when try to add a file to an existing SuperKe\", \"username\": \"bforeman\", \"post_text\": \"Hi Arjun,\\n\\nIs there a possibility that somebody also published a query to either Thor or hThor that the SuperKey could be using? \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-05-05 17:08:13\" },\n\t{ \"post_id\": 5628, \"topic_id\": 1296, \"forum_id\": 8, \"post_subject\": \"Lock Error when try to add a file to an existing SuperKey\", \"username\": \"ArjunKumar\", \"post_text\": \"Hi Team,\\n\\nWe’re getting a lock error when try to add an index file to an existing Superkey. \\n\\nHere is the error:\\n\\n[color=#FF4000:cltex2x3]"SDS: Lock held SDS Reply Error : SDS: Lock held Lock is held performing changeMode on connection to ": \\n\\nWe verified all the roxie queries that might probably be referring to this particular super key and we even deleted all of the published roxie queries but still we are getting the same lock issue whenever we try to add/remove any subfile to/from this super key. We are not sure how to proceed further with this issue.\\n\\nThanks,\\nArjun\", \"post_time\": \"2014-05-05 06:32:33\" },\n\t{ \"post_id\": 5696, \"topic_id\": 1307, \"forum_id\": 8, \"post_subject\": \"Re: SALT External Linking\", \"username\": \"dabayliss\", \"post_text\": \"SALT is a proprietary LN technology we cannot discuss in open forum (some of the technologies are still undergoing patent protection and public disclosure threatens that status).\\n\\nAssuming you have SALT and have the evaluation agreement then Trish McCall or Arjuna Chala should be able to arrange for you to have access to the SALT forum where we can answer your questions.\", \"post_time\": \"2014-05-15 17:50:40\" },\n\t{ \"post_id\": 5689, \"topic_id\": 1307, \"forum_id\": 8, \"post_subject\": \"Re: SALT External Linking\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nI am not a direct employee of RE , but we partner with LN risk solutions for different HPCC projects.\\n\\nThanks,\\nviswa\", \"post_time\": \"2014-05-13 22:22:10\" },\n\t{ \"post_id\": 5688, \"topic_id\": 1307, \"forum_id\": 8, \"post_subject\": \"Re: SALT External Linking\", \"username\": \"tlhumphrey2\", \"post_text\": \"If you are an employee of RE, you should be able to get access. \\n\\nLet me know if you are, please.\", \"post_time\": \"2014-05-13 18:20:03\" },\n\t{ \"post_id\": 5687, \"topic_id\": 1307, \"forum_id\": 8, \"post_subject\": \"Re: SALT External Linking\", \"username\": \"ksviswa\", \"post_text\": \"Tim,\\n\\nThanks a lot..\\n\\nDo you have the salt forum link ?\\n\\nThe one you mentioned gives me a error , am not authorized to use this forum. \\n\\nThanks,\\nViswa\", \"post_time\": \"2014-05-13 17:59:02\" },\n\t{ \"post_id\": 5686, \"topic_id\": 1307, \"forum_id\": 8, \"post_subject\": \"Re: SALT External Linking\", \"username\": \"tlhumphrey2\", \"post_text\": \"Viswa,\\n\\nYou would get quicker response and probably better answers if you posted your SALT questions on the SALT forum, http://hpccsystems.com/bb/viewforum.php?f=44.\\n\\nTim\", \"post_time\": \"2014-05-13 17:54:05\" },\n\t{ \"post_id\": 5685, \"topic_id\": 1307, \"forum_id\": 8, \"post_subject\": \"SALT External Linking\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nHave few clarifications regarding external linking.\\n\\n1.) The external linking result will just tell which all records could be linked to the base file ..? \\n\\nIncase we want a collated result of both the files we need to perform a join separately based on the entity id ( base file ) and the unique id ( external file ) based on the linking result.\\n\\nFor Ex :\\n\\nConsider a base data file "abc" with these fields \\n\\nfield1 field2 field3 field4\\n123 abc xxx xyz\\n\\nExternal File With these fields :\\n\\nfield_1 field_2 field_3 field_4\\n1 xxx def zzz \\n\\nLinking of internal file and external file is based on field3 and field_2 respectively.\\n\\nDo we get the result of this format after linking ?\\n\\nfield1 field2 field3 field4 field_1 field_2 field_3 field_4\\n123 abc xxx xyz 1 xxx def zzz\\n\\n2.) What exactly is the difference between "External Files" and "Attribute Files"\\n\\n3.) If the base internal file and external file do not have many similarities , which is better to use External linking or remote linking ?\\n\\n4.) Can you explain with an example of "INITIAL , ABBR " which we specify in the FIELD statement in the specification file.\\n\\nEx : if "abc def" is present as abbr / initial , abc / def alone should be matched Is that correct ?\\n\\nKindly advise.\\n\\nThanks a lot in advance.\\n\\nThanks,\\nViswa\", \"post_time\": \"2014-05-13 17:18:31\" },\n\t{ \"post_id\": 5706, \"topic_id\": 1309, \"forum_id\": 8, \"post_subject\": \"Re: WHEN is a function return\", \"username\": \"rtaylor\", \"post_text\": \"Tim,\\n\\nTry making your OUTPUT in the LOOP a "Named OUTPUT" using the NAMED and EXTEND options -- then you should see the result of each iteration of the LOOP in the file.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-05-16 18:00:49\" },\n\t{ \"post_id\": 5705, \"topic_id\": 1309, \"forum_id\": 8, \"post_subject\": \"Re: WHEN is a function return\", \"username\": \"tlhumphrey2\", \"post_text\": \"The WHEN does produce output each time the function is called by the LOOP statement. But, that output shows up as only one file to the workunit.\\n\\nThis is different behavior than what happens when I call the function when it isn't the body of a LOOP statement. In this scenario, each time I call the function, I get that many files out to the workunit.\", \"post_time\": \"2014-05-16 17:45:33\" },\n\t{ \"post_id\": 5704, \"topic_id\": 1309, \"forum_id\": 8, \"post_subject\": \"WHEN is a function return\", \"username\": \"tlhumphrey2\", \"post_text\": \"Is the following true? \\n\\nIf there is a WHEN statement, e.g. WHEN(ReturnedDataset,O), on the RETURN of a function, F, and O is, O:=OUTPUT(ds), then everytime F is executed there is output to the workunit.\\n\\nI thought it was true. But, it isn't for a function that is the body of a LOOP statement.\", \"post_time\": \"2014-05-16 17:30:45\" },\n\t{ \"post_id\": 5721, \"topic_id\": 1310, \"forum_id\": 8, \"post_subject\": \"Re: Spray using PROJECT\", \"username\": \"ksviswa\", \"post_text\": \"Thanks..\\n\\nLet me check with SprayXML ones.\\n\\nThanks,\\nViswa\", \"post_time\": \"2014-05-19 16:27:16\" },\n\t{ \"post_id\": 5720, \"topic_id\": 1310, \"forum_id\": 8, \"post_subject\": \"Re: Spray using PROJECT\", \"username\": \"bforeman\", \"post_text\": \"Yes, you should be able to set the parameters in SprayXML or fSprayXML just like you are trying to do with STD.File.DfuPlusExec, which is more generic. Worth a look!\", \"post_time\": \"2014-05-19 15:54:18\" },\n\t{ \"post_id\": 5719, \"topic_id\": 1310, \"forum_id\": 8, \"post_subject\": \"Re: Spray using PROJECT\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nCan i use SprayXML for list of XML files or a single XML File ..?\\n\\nEach directory in my scenario would have hundreds of xml files.\\n\\nThanks,\\nViswa\", \"post_time\": \"2014-05-19 15:50:32\" },\n\t{ \"post_id\": 5718, \"topic_id\": 1310, \"forum_id\": 8, \"post_subject\": \"Re: Spray using PROJECT\", \"username\": \"bforeman\", \"post_text\": \"Why not just call the SprayXML function directly?\\n\\nSprayXML returns a null-terminated string containing the DFU workunit ID (DFUWUID).\", \"post_time\": \"2014-05-19 15:46:33\" },\n\t{ \"post_id\": 5717, \"topic_id\": 1310, \"forum_id\": 8, \"post_subject\": \"Re: Spray using PROJECT\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nThanks Bob..\\n\\nI have modified the code , but am getting some error in the "IF" statement, not sure if am missing something
\\n\\n
\\n\\nSprayFiles(String dir_name , String status ) := FUNCTION \\n \\n dropzone_path := /abc/';\\n dest_file := '~xxx' + dir_name;\\n \\n ESPPort := '8010';\\n ESPIp := 'xx.xx.xx.xx';\\n serv := 'server=http://'+ ESPIp + ':' + ESPPort + '/ ';\\n user := 'username=asdf ';\\n pswd := 'password=xxx ';\\n over := 'overwrite=1 ';\\n action := 'action=spray ';\\n srcip := 'srcip=x.x.x.x ';\\n srcfile := 'srcfile=' + dropzone_path + dir_name + '/*.xml ';\\n dstcluster := 'dstcluster=thor ';\\n dstname := 'dstname=' + dest_file + ' ';\\n prefix := 'prefix=FILENAME,';\\n recordsize := 'FILESIZE ';\\n nosplit := 'nosplit=1 ';\\n replicate := 'replicate=0 ';\\n cmdline := action + serv + user + pswd + srcip + srcfile + dstcluster + dstname + prefix + recordsize + over + nosplit+replicate;\\n // output(cmdline);\\n SprayFilesRes := STD.File.DfuPlusExec(cmdline);\\n status := if(SprayFilesRes , 'Spraying Completed' , 'Spraying Failed');\\n \\n return status;\\nEND;\\n\\nsample_rec := RECORD\\n STRING dirname;\\n STRING final_status;\\nEND;\\n\\nsample_ds := DATASET([{'dir1',''},{'dir2',''}],sample_rec);\\n\\nsample_rec sprayTransform(sample_ds le) := TRANSFORM\\n self.dirname := le.dirname;\\n self.final_Status := SprayFiles(le.dirname, le.final_Status);\\nEND; \\n\\nsample_spray := PROJECT(sample_ds , sprayTransform(LEFT));\\n\\n
\\n\\nKindly help.\\n\\nThanks,\\nViswa\", \"post_time\": \"2014-05-19 15:38:32\" },\n\t{ \"post_id\": 5713, \"topic_id\": 1310, \"forum_id\": 8, \"post_subject\": \"Re: Spray using PROJECT\", \"username\": \"bforeman\", \"post_text\": \"Hi,\\n\\nI think you are on the right track, but pay attention to the error:\\n\\nI tried using project and then calling for each record(directory name) the spray function , encountered some error as the return types don't match.
\\n\\nYour function should just return a TRUE, or "spray completed" as a string to a new field in your DATASET. So you have a directory name and then a status field.\\n\\nAnother approach might be to use SEQUENTIAL in the spray process, like this\\n\\nSEQUENTIAL(SprayFiles(sample_rec[1].dirname),\\n SprayFiles(sample_rec[2].dirname),\\n SprayFiles(sample_rec[3].dirname))
\\n\\nBut I think if you can resolve your return type in the function your approach looks good.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-05-19 13:29:31\" },\n\t{ \"post_id\": 5707, \"topic_id\": 1310, \"forum_id\": 8, \"post_subject\": \"Spray using PROJECT\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nI have a scenario where i have multiple directories and each directory having hundreds of xmls.\\n\\nI need to spray these xmls based on the directory.\\n\\nFor Ex : \\n\\ndir1 : 100's of xmls\\ndir2 : 100's of xmls\\ndir3 : 100's of xmls\\n\\n\\nindividually passing a directory name to a spray function, am able to spray.\\n\\nHow do we do the similar logic in a loop ? Loop through the directory list and spray the xml files for each directory name present in the list.?\\n\\nI tried using project and then calling for each record(directory name) the spray function , encountered some error as the return types don't match.\\n\\n\\n\\nSprayFiles(String dir_name ) := FUNCTION\\t\\t\\n\\t\\t\\n\\t\\tdropzone_path := /abc/';\\n\\t\\tdest_file := '~xxx' + dir_name;\\n\\t\\t\\t\\n\\t\\tESPPort := '8010';\\n\\t\\tESPIp := 'xx.xx.xx.xx';\\n\\t\\tserv := 'server=http://'+ ESPIp + ':' + ESPPort + '/ ';\\n user := 'username=asdf ';\\n\\t\\tpswd := 'password=xxx ';\\n\\t\\tover := 'overwrite=1 ';\\n action := 'action=spray ';\\n srcip := 'srcip=x.x.x.x ';\\n srcfile := 'srcfile=' + dropzone_path + dir_name + '/*.xml ';\\n\\t\\tdstcluster := 'dstcluster=thor ';\\n\\t\\tdstname := 'dstname=' + dest_file + ' ';\\n\\t\\tprefix := 'prefix=FILENAME,';\\n\\t\\trecordsize := 'FILESIZE ';\\n\\t\\tnosplit := 'nosplit=1 ';\\n\\t\\treplicate := 'replicate=0 ';\\n\\t\\tcmdline := action + serv + user + pswd + srcip + srcfile + dstcluster + dstname + prefix + recordsize + over + nosplit+replicate;\\n\\t\\t// output(cmdline);\\n\\t\\tSprayFiles := STD.File.DfuPlusExec(cmdline);\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t \\n\\t\\treturn SprayFiles;\\nEND;\\n\\nsample_rec := RECORD\\n STRING dirname;\\nEND;\\n\\nsample_ds := DATASET([{'dir1'},{'dir2'}],sample_rec);\\n\\nsample_rec sprayTransform(sample_ds le) := TRANSFORM\\n\\t\\tself.dirname := SprayFiles(le.dirname);\\nEND; \\n\\nsample_spray := PROJECT(sample_ds , sprayTransform(LEFT));\\n
\\n\\nCan we approach this scenario using some other logic ?\\n\\nKindly advise.\\n\\nThanks,\\nviswa\", \"post_time\": \"2014-05-17 01:37:27\" },\n\t{ \"post_id\": 5734, \"topic_id\": 1319, \"forum_id\": 8, \"post_subject\": \"Re: ECL EVENT Not getting Triggered\", \"username\": \"bforeman\", \"post_text\": \"Hi Vijay,\\n\\nIt has to be a configuration issue on your end. I ran your code on our training cluster and it worked perfectly. Perhaps you have a bad path?\\n\\nHere is my code that works:\\n\\nIMPORT $, STD;\\nSTRING landingZoneIP := '10.173.248.1';\\nSTRING fileName := '/mnt/disk1/var/lib/HPCCSystems/dropzone/MyTestFile.txt';\\nSTRING espServerIP := 'http://10.173.248.1:8010/filespray';\\nEventName_1 := 'FileDroppedToLandingZone';\\n\\n\\nSTD.File.MonitorFile( EventName_1, landingZoneIP , fileName, FALSE, 1 , espServerIP);
\\n\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-05-22 13:33:37\" },\n\t{ \"post_id\": 5733, \"topic_id\": 1319, \"forum_id\": 8, \"post_subject\": \"Re: ECL EVENT Not getting Triggered\", \"username\": \"VijayaKumar_Dhanasekaran\", \"post_text\": \"Hi Bob, \\n\\nThanks for the Reply.\\n\\nI tried with the first three parameters now its throwing the below mentioned error.\\n\\nNo access to Dali - this normally means a plugin call is being called from a thorslave,
\\n\\nThanks\\nvijay\", \"post_time\": \"2014-05-22 13:11:11\" },\n\t{ \"post_id\": 5732, \"topic_id\": 1319, \"forum_id\": 8, \"post_subject\": \"Re: ECL EVENT Not getting Triggered\", \"username\": \"bforeman\", \"post_text\": \"Hi Vijay,\\n\\nTry using the function with just the first three parameters. It looks like your ESPServerIP definition is incorrect. The docs say:\\n\\nOptional. A null-terminated string containing the protocol, IP, port, and directory, or the DNS equivalent, of the ESP server program. This is usually the same IP and port as ECL Watch, with “/FileSpray” appended. If omitted, the default is the value contained in the lib_system.ws_fs_server attribute.
\\n\\nSo try your function with the parameter omitted, and then if you need it, try it with the "/FileSpray" string, like this:\\n\\nSTRING espServerIP := 'http://xx.xx.xx.xxx:8010/FileSpray';
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-05-22 11:48:34\" },\n\t{ \"post_id\": 5731, \"topic_id\": 1319, \"forum_id\": 8, \"post_subject\": \"ECL EVENT Not getting Triggered\", \"username\": \"VijayaKumar_Dhanasekaran\", \"post_text\": \"Hi Team,\\n\\nI am trying to automate the SPRAYING process for which I am using ECL Scheduler.\\nMy requirement is to monitor the landing zone for a specific file. If that file is placed in the landing Zone I should pick that file and SPRAY the same into THOR.\\n\\nI have written a sample code to monitor the Landing Zone for the file. If that file is present then I am triggering an output in another function - OUTPUT('FILE IN LANDING ZONE')\\n\\nPlease find the below code for your reference.\\n\\nIMPORT $, STD;\\nIMPORT AutomateSpraying.AutoSpray_Module;\\n\\nSTRING landingZoneIP := 'xx.xxx.xxx.xx';\\n//STRING directory := '/var/lib/HPCCSystems/ftp/in'; \\nSTRING fileName := '/var/lib/HPCCSystems/ftp/in/2013-10-01_1.xml';\\nSTRING espServerIP := 'http://xx.xx.xx.xxx:8010';\\nEventName_1 := 'FileDroppedToLandingZone';\\n\\n\\nSTD.File.MonitorFile( EventName_1, landingZoneIP , fileName, FALSE, 1 , espServerIP);\\n\\n\\nAutoSpray_Module.EventTriggered() : WHEN(EventName_1, COUNT(1));\\n\\nOUTPUT('DONE');
\\n\\nI am missing something when specifying the ESP server IP and LANDING ZONE IP. I am getting the below mentioned error.\\n\\nError: System error: -2: SOAP server error[HTTP Status 500 Internal Server ErrorSOAP fault: string=[400: Bad Request [Method DfuMonitorRequest not available in service WsSMC]] .] (0, 0), -2,\\n\\nKindly Help.\\n\\nThanks\\nVijay\", \"post_time\": \"2014-05-22 11:05:52\" },\n\t{ \"post_id\": 5764, \"topic_id\": 1320, \"forum_id\": 8, \"post_subject\": \"Re: Dataset transform question\", \"username\": \"Leofei\", \"post_text\": \"Richard, it's helpful! Thanks for the optimization. \\n\\n-Fan\", \"post_time\": \"2014-05-28 14:39:39\" },\n\t{ \"post_id\": 5761, \"topic_id\": 1320, \"forum_id\": 8, \"post_subject\": \"Re: Dataset transform question\", \"username\": \"rtaylor\", \"post_text\": \"Leofei,\\n\\nThis version (Dan's code very slightly modified) eliminates the need for the final PROJECT to re-increment the records:// Original data structure\\nRawDataRec := RECORD\\n UNSIGNED i;\\n STRING firstName;\\n STRING lastName;\\n STRING thing;\\nEND;\\n\\n// Original data\\nr10 := DATASET\\n (\\n [\\n {1,'John','Smith','One house'},\\n {2,'','','Two Cars'},\\n {3,'Fran','Bush','One house'},\\n {4,'','','One company'},\\n {5,'','','One dog'},\\n {6,'','','One car'}\\n ],\\n RawDataRec\\n );\\n\\nOUTPUT(r10,NAMED('OriginalData'));\\n\\n// Coerce the index field so that the records we need to group\\n// have identical index values\\nr20 := ITERATE\\n (\\n r10,\\n TRANSFORM\\n (\\n RawDataRec,\\n SELF.i := IF(RIGHT.firstName = '', LEFT.i, LEFT.i+1),\\n SELF := RIGHT\\n )\\n );\\n\\nOUTPUT(r20,NAMED('IterateResult'));\\n\\n// New data structure\\nThingRec := RECORD\\n STRING thing;\\nEND;\\n\\nFinalDataRec := RECORD\\n UNSIGNED i;\\n STRING firstName;\\n STRING lastName;\\n DATASET(ThingRec) things;\\nEND;\\n\\n// Collapse records based on index value\\nr30 := AGGREGATE\\n (\\n r20,\\n FinalDataRec,\\n TRANSFORM\\n (\\n FinalDataRec,\\n SELF.things := RIGHT.things + ROW({LEFT.thing},ThingRec),\\n SELF := LEFT\\n ),\\n LEFT.i\\n );\\n\\nOUTPUT(r30,NAMED('CollapsedResult'));
I just changed the TRANSFORM for the initial ITERATE to create the final numbering in that pass.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-05-28 14:22:18\" },\n\t{ \"post_id\": 5742, \"topic_id\": 1320, \"forum_id\": 8, \"post_subject\": \"Re: Dataset transform question\", \"username\": \"Leofei\", \"post_text\": \"It works well. Thanks a lot, Dan!\", \"post_time\": \"2014-05-23 14:01:06\" },\n\t{ \"post_id\": 5739, \"topic_id\": 1320, \"forum_id\": 8, \"post_subject\": \"Re: Dataset transform question\", \"username\": \"DSC\", \"post_text\": \"Here is one way to do that:\\n\\n// Original data structure\\nRawDataRec := RECORD\\n UNSIGNED i;\\n STRING firstName;\\n STRING lastName;\\n STRING thing;\\nEND;\\n\\n// Original data\\nr10 := DATASET\\n (\\n [\\n {1,'John','Smith','One house'},\\n {2,'','','Two Cars'},\\n {3,'Fran','Bush','One house'},\\n {4,'','','One company'},\\n {5,'','','One dog'},\\n {6,'','','One car'}\\n ],\\n RawDataRec\\n );\\n\\nOUTPUT(r10,NAMED('OriginalData'));\\n\\n// Coerce the index field so that the records we need to group\\n// have identical index values\\nr20 := ITERATE\\n (\\n r10,\\n TRANSFORM\\n (\\n RawDataRec,\\n SELF.i := IF(RIGHT.firstName = '', LEFT.i, RIGHT.i),\\n SELF := RIGHT\\n )\\n );\\n\\nOUTPUT(r20,NAMED('IterateResult'));\\n\\n// New data structure\\nThingRec := RECORD\\n STRING thing;\\nEND;\\n\\nFinalDataRec := RECORD\\n UNSIGNED i;\\n STRING firstName;\\n STRING lastName;\\n DATASET(ThingRec) things;\\nEND;\\n\\n// Collapse records based on index value\\nr30 := AGGREGATE\\n (\\n r20,\\n FinalDataRec,\\n TRANSFORM\\n (\\n FinalDataRec,\\n SELF.things := RIGHT.things + ROW({LEFT.thing},ThingRec),\\n SELF := LEFT\\n ),\\n LEFT.i\\n );\\n\\nOUTPUT(r30,NAMED('CollapsedResult'));\\n\\n// Rewrite the index value so it is a simple increment\\nr40 := PROJECT\\n (\\n r30,\\n TRANSFORM\\n (\\n FinalDataRec,\\n SELF.i := COUNTER,\\n SELF := LEFT\\n )\\n );\\n\\nOUTPUT(r40,NAMED('FinalResult'));
\\nHope this helps.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2014-05-23 13:21:37\" },\n\t{ \"post_id\": 5735, \"topic_id\": 1320, \"forum_id\": 8, \"post_subject\": \"Dataset transform question\", \"username\": \"Leofei\", \"post_text\": \"I have a question regarding the table transform, not sure whether there is a good way to do it:\\n\\nFor example, here is the table I have right now:\\n\\n1 John Smith One house\\n2 Two Cars\\n3 Fran Bush One house\\n4 One company\\n5 One dog\\n6 One Car\\n\\nThe Table I want is a dataset with a child dataset: the first two records associated with John, the last four records associated with Fran. I cannot edit a good table format here, so I updated a picture to show the question. \\n\\nThe only criteria to decide the next record belongs to whom is whether there is a new name or not.\\n\\nPlease let me know if the question is not clear enough. \\nThanks a lot!\\n-Fan\", \"post_time\": \"2014-05-22 21:57:43\" },\n\t{ \"post_id\": 5772, \"topic_id\": 1323, \"forum_id\": 8, \"post_subject\": \"Re: CRON is not working as expected\", \"username\": \"rtaylor\", \"post_text\": \"Arjun,\\n\\nMaybe the problem is with the clock settings on your cluster. CRON will most likely be using the clock on the Dali or ECL Agent machine, so that's the clock you really need to look at, not the time on your PC that launched the job.\\n\\nNote also this bit from the docs:The time parameter is unix-standard cron time, expressed in UTC (aka Greenwich Mean Time) as a string containing the following, space-delimited components:
And your setting for every day at midnight (AKA 12:00 AM) should be:dailyAtMidnight := CRON('0 0 * * *');
Which, of course, should fire at 7:00 PM EST every day (unless your cluster's clocks are set to GMT). \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-05-29 19:27:40\" },\n\t{ \"post_id\": 5768, \"topic_id\": 1323, \"forum_id\": 8, \"post_subject\": \"Re: CRON is not working as expected\", \"username\": \"ArjunKumar\", \"post_text\": \"Hi Bob,\\n\\nThe below example should fire at 0523 each day, but it is not firing, instead it went to wait state.\\n\\nWHEN(EVENT('CRON','23 5 * * *'));\\n\\nand i tried using below example where it fires at 23 mins of current hour.\\n\\nWHEN(EVENT('CRON','23 0-23/1 * * *'),count(1));\\n\\nwe are confusing with the pattern as its not working as expected, we have a requirement to trigger the event daily at 12:00 AM.\\n\\nThanks,\\nArjun\", \"post_time\": \"2014-05-29 07:29:40\" },\n\t{ \"post_id\": 5756, \"topic_id\": 1323, \"forum_id\": 8, \"post_subject\": \"Re: CRON is not working as expected\", \"username\": \"bforeman\", \"post_text\": \"Hi Arjun,\\n\\nWhat times are you targeting? \\n
WHEN(EVENT('CRON','23 5 * * *'));
\\nThis example should fire at 0523 each day.\\n\\nWHEN(EVENT('CRON','4 9 23 5 5'));
\\nThe second example should trigger at 0904 on the 23rd of May on Friday, is this intended? Is the day of week really necessary? Did you try this without the last parameter?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-05-28 13:40:05\" },\n\t{ \"post_id\": 5738, \"topic_id\": 1323, \"forum_id\": 8, \"post_subject\": \"CRON is not working as expected\", \"username\": \"ArjunKumar\", \"post_text\": \"Hi Team,\\n\\nwe are facing issue with CRON actually. Please find the below two syntaxs.\\n\\n#1\\nAutoSpray_Module.EventTriggered('someevent') : WHEN(EVENT('CRON','* * * * *'));\\n\\n#2\\nAutoSpray_Module.EventTriggered('someevent') : WHEN(EVENT('CRON','[color=#FF4040:3trvzgfu]23 5 * * *'));\\nAutoSpray_Module.EventTriggered('someevent') : WHEN(EVENT('CRON','[color=#FF4040:3trvzgfu]04 9 23 5 5'));\\n\\nwhen we ran syntax #1 it is running, it was triggering the event every minute. but when we are running the syntax #2 it went to wait state and its not running at particular time as specified. we have a requirement to trigger an event on daily basis at a particular time. Appreciate your suggestion on this.\\n\\n\\nThanks,\\nArjun\", \"post_time\": \"2014-05-23 13:21:19\" },\n\t{ \"post_id\": 5757, \"topic_id\": 1324, \"forum_id\": 8, \"post_subject\": \"Re: How to Invoke STD.File.fSprayXML within a function\", \"username\": \"bforeman\", \"post_text\": \"Hi Vijay,\\n\\nMy gut feeling tells me that one of your parameters in the function is causing that error.\\n\\nTo verify this, just test the statement stand alone in a builder window. If you see the same error, you know that one of your parameters is wrong or not needed. My bet is on the dropzoneIP, try it with the default setting.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-05-28 13:44:56\" },\n\t{ \"post_id\": 5740, \"topic_id\": 1324, \"forum_id\": 8, \"post_subject\": \"How to Invoke STD.File.fSprayXML within a function\", \"username\": \"VijayaKumar_Dhanasekaran\", \"post_text\": \"Hi Team,\\n\\nI trying to spray files present with a specific folder from the landing zone.\\n\\nIMPORT $, STD;\\n\\nSprayFiles(String dir_name , String status ) := FUNCTION \\n\\t\\t\\tlandingZoneIp := 'xx.xx.xx.xx';\\n\\t\\t\\tfileName := 'c:/DATA/RA_RE' + dir_name + '/ *.xml';\\n\\t\\t\\trecordTag := 'useractivity';\\n\\t\\t\\tdropZoneIP := ''xx.xx.xx.xx'';\\n\\t\\t\\tlogicalFileName := '~asdf::' + dir_name + '::23rd_May_14.xml';\\n\\t\\t\\t\\t\\t \\n\\t\\t\\tdfuwuid := STD.File.fSprayXML( landingZoneIp , fileName, 10000 , recordTag , ,dropZoneIP, logicalFileName);\\n\\t\\t\\tRETURN dfuwuid ;\\nEND;\\n\\nsample_rec := RECORD\\n STRING dirname;\\n\\tSTRING final_Status;\\nEND;\\n\\nsample_ds := DATASET([{'RA_RE', ''},{'RA_USER', ''}],sample_rec);\\n\\nsample_rec sprayTransform(sample_ds l) := TRANSFORM\\n self.dirname := l.dirname;\\n self.final_Status := SprayFiles(l.dirname, l.final_Status);\\nEND; \\n\\nsample_spray := PROJECT(sample_ds , sprayTransform(LEFT));\\n\\nOUTPUT(sample_spray);
\\n\\nI am facing the below mentioned error when I try to return dfuwuid present within the function.\\n\\nError: System error: -1: Graph[1], project[3]: SLAVE xx.xx.xx.xx:6060: No access to Dali - this normally means a plugin call is being called from a thorslave, (0, 0), -1,
\\n\\nIf I return a STRING as 'success' from the folder, I am not getting any error but the file is not getting sprayed into THOR.\\n\\nKindly advise how to proceed further.\\n\\nThanks\\nVijay\", \"post_time\": \"2014-05-23 13:46:53\" },\n\t{ \"post_id\": 5765, \"topic_id\": 1325, \"forum_id\": 8, \"post_subject\": \"Re: "May not be supported" runtime warning\", \"username\": \"DSC\", \"post_text\": \"Done: https://track.hpccsystems.com/browse/HPCC-11537\\n\\nThanks, everyone!\", \"post_time\": \"2014-05-28 19:35:42\" },\n\t{ \"post_id\": 5762, \"topic_id\": 1325, \"forum_id\": 8, \"post_subject\": \"Re: "May not be supported" runtime warning\", \"username\": \"ghalliday\", \"post_text\": \"The logging is probably not worth reporting any more - if you open a jira then I will remove it.\\n\\nIt is because there were some instances where implicitly denormalizing the child dataset didn't work, but I think most of them have been fixed for a long time.\", \"post_time\": \"2014-05-28 14:29:36\" },\n\t{ \"post_id\": 5760, \"topic_id\": 1325, \"forum_id\": 8, \"post_subject\": \"Re: "May not be supported" runtime warning\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nDon't know about the warning, but this is the way I usually code an inline dataset with a nested child dataset:SubRec := RECORD\\n UNSIGNED s;\\nEND;\\n\\nDataRec := RECORD\\n UNSIGNED i;\\n DATASET(SubRec) d;\\nEND;\\n\\nr := DATASET\\n (\\n [\\n {1,[100,200]},\\n {2,[300,400]}\\n ],\\n DataRec\\n );\\nr;\\nr.d;
Note that this version gets the same warning.\\n\\nRichard\", \"post_time\": \"2014-05-28 14:11:54\" },\n\t{ \"post_id\": 5741, \"topic_id\": 1325, \"forum_id\": 8, \"post_subject\": \""May not be supported" runtime warning\", \"username\": \"DSC\", \"post_text\": \"Demonstration code:\\n\\nSubRec := RECORD\\n UNSIGNED s;\\nEND;\\n\\nDataRec := RECORD\\n UNSIGNED i;\\n DATASET(SubRec) d;\\nEND;\\n\\nr := DATASET\\n (\\n [\\n {1,DATASET([100,200],SubRec)},\\n {2,DATASET([300,400],SubRec)}\\n ],\\n DataRec\\n );\\n\\nr;\\nr.d;
\\nThis compiles and executes correctly but emits the following warning at runtime:\\n\\nWarning: dataset r.d may not be supported without using NORMALIZE (0, 0 - unknown)
\\nIs this warning significant? Is it safer/better to extract the child dataset before using it?\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2014-05-23 13:58:59\" },\n\t{ \"post_id\": 5759, \"topic_id\": 1328, \"forum_id\": 8, \"post_subject\": \"Re: Scheduling the task getting an issue\", \"username\": \"bforeman\", \"post_text\": \"Your code looks OK to me, I don't see anything reported on this, can you please open an issue in the Issue Tracker, and attach all logs and as much information as you can.\\n\\nThanks,\\n\\nBob\\n\\nhttps://track.hpccsystems.com/secure/Dashboard.jspa\", \"post_time\": \"2014-05-28 14:03:21\" },\n\t{ \"post_id\": 5747, \"topic_id\": 1328, \"forum_id\": 8, \"post_subject\": \"Scheduling the task getting an issue\", \"username\": \"Balachandar\", \"post_text\": \"Hi Team,\\n\\nI am trying to schedule the job. But while scheduling only i got an issue. Without schedule code its working fine.\\nHere is the sample code\\n\\n Execution := SEQUENTIAL(\\n SpryingData_Module.SprayFiles(FileName.landingZonePath, FileName.sprayedFileName),\\n Module.Result,\\n AddingSubKey_Module.addSubFile(FileName.SuperKey,FileName.Index)\\n\\t );\\n\\n Execution: WHEN(EVENT('CRON','12 0-23/1 * * *'),COUNT(1));
\\n\\nThe Error i am getting the below one\\nError: assert(info->isActivity) failed - file: /var/lib/jenkins/workspace/LN-Candidate-4.2.4-rc2/LN/centos-5.7-x86_64/HPCC-Platform/ecl/hqlcpp/hqlresource.cpp, line 3217\", \"post_time\": \"2014-05-27 13:00:52\" },\n\t{ \"post_id\": 5749, \"topic_id\": 1329, \"forum_id\": 8, \"post_subject\": \"Re: dfuplus - Spray multiple XML files\", \"username\": \"DSC\", \"post_text\": \"Take a look at the NOROOT open to the DATASET() declaration you're using to access the logical file. That parameter allows you to have repeated top-level tags. You may have to adjust any XPATH values to accommodate that change, as this effectively turns your XML documents into a series of rows within a single document that has no root tag.\\n\\nHope this helps.\\n\\nDan\", \"post_time\": \"2014-05-27 14:36:42\" },\n\t{ \"post_id\": 5748, \"topic_id\": 1329, \"forum_id\": 8, \"post_subject\": \"dfuplus - Spray multiple XML files\", \"username\": \"rajesh.dorairaj\", \"post_text\": \"Dear Team,\\nMy requirement is to spray multiple XML files, which has same structure(contains same root and child tag names). I used dfuplus command to spray multiple XML files and mapped into a single logical file. \\nIn my ECL program, I am creating a dataset using this logical file as below\\n[color=#40BF00:hyv44rb9]EXPORT file := DATASET('~presales::demo::files',rform,XML('rfp-document/document'));\\n\\nHowever the execution of the ECL/WorkUnit failed with syntax error "Trailing tag open after close of root tag" on my logical file.\\nThis is obvious because I am spraying XML files with similar root tag into single logical file and there can be multiple instances of root tag.\\nI don't face the issue while I spray single XML file, because it containe only one root tag.\\n\\nCould you please help me by providing a solution for resolving the error(either on how to make Logical file to contain single root tag or a way for ECL to handle that).\", \"post_time\": \"2014-05-27 13:22:43\" },\n\t{ \"post_id\": 5784, \"topic_id\": 1331, \"forum_id\": 8, \"post_subject\": \"Re: Memory limit exceeded problem\", \"username\": \"fanglimian\", \"post_text\": \"Hi Bob, \\n\\nThanks for your reply!\\nI did a simple lookup inner join, I ran the same code before and it was fine. \\nThe problem is solved now as it was working again the next day.\", \"post_time\": \"2014-05-30 18:05:04\" },\n\t{ \"post_id\": 5778, \"topic_id\": 1331, \"forum_id\": 8, \"post_subject\": \"Re: Memory limit exceeded problem\", \"username\": \"bforeman\", \"post_text\": \"Hi Limian,\\n\\nIt's always helpful if we can see the actual JOIN code. What kind of a JOIN type are you attempting? Is the LEFT recordset the larger of the two datasets? Have you tried local operation using LOOKUP, ALL, or even the new SMART option?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-05-30 12:38:46\" },\n\t{ \"post_id\": 5766, \"topic_id\": 1331, \"forum_id\": 8, \"post_subject\": \"Memory limit exceeded problem\", \"username\": \"fanglimian\", \"post_text\": \"Hi ,\\n\\nI was trying to do a simple join between my dataset ( about 200,000 records) and a database in prod. I received the error below saying memory limit exceeded.\\nI am wondering whether it is a system problem or there is something I can do with my codes to avoid this error? Thank you!\\n\\n\\nError: System error: 1300: Graph[44], diskread[47]: SLAVE 10.194.96.10:6600: memory limit exceeded, memory limit exceeded - handling file: //10.194.198.37:7100/var/lib/HPCCSystems/hpcc-data/thor/thor/base/cdw/20140408/marketview_ut._37_of_400 (0, 0), 1300,\", \"post_time\": \"2014-05-28 21:19:58\" },\n\t{ \"post_id\": 5810, \"topic_id\": 1339, \"forum_id\": 8, \"post_subject\": \"Re: Ignore Case while searching on Text\", \"username\": \"rtaylor\", \"post_text\": \"Rajesh,\\n\\nAnd if, after you try it, you find it is inefficient to do change case for the target, then you'll just need to do multiple case-sensitive searches (one for each possible permutation), like this:IMPORT STD;\\ns := 'Fred FRed FRED';\\n\\nr1 := STD.Str.Find(s,'Fred',1);\\nr2 := STD.Str.Find(s,'FRED',1);\\nr1;\\nr2
NB -- this code did not find the instance of "FRed"\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-06-02 14:23:29\" },\n\t{ \"post_id\": 5809, \"topic_id\": 1339, \"forum_id\": 8, \"post_subject\": \"Re: Ignore Case while searching on Text\", \"username\": \"rajesh.dorairaj\", \"post_text\": \"Thanks Bob!\\nIt is fast .\", \"post_time\": \"2014-06-02 14:12:58\" },\n\t{ \"post_id\": 5808, \"topic_id\": 1339, \"forum_id\": 8, \"post_subject\": \"Re: Ignore Case while searching on Text\", \"username\": \"bforeman\", \"post_text\": \"The functions are pretty efficient, my suggestion is to give it a try and see how it performs!
\", \"post_time\": \"2014-06-02 14:11:39\" },\n\t{ \"post_id\": 5807, \"topic_id\": 1339, \"forum_id\": 8, \"post_subject\": \"Re: Ignore Case while searching on Text\", \"username\": \"rajesh.dorairaj\", \"post_text\": \"Hi Bob,\\n\\nTarget is a text data which can be of huge size.\\nIs it advisable to upper case the target?\", \"post_time\": \"2014-06-02 14:06:40\" },\n\t{ \"post_id\": 5802, \"topic_id\": 1339, \"forum_id\": 8, \"post_subject\": \"Re: Ignore Case while searching on Text\", \"username\": \"bforeman\", \"post_text\": \"Hi Rajesh,\\n\\nIf I understand you correctly, you are looking for a case insensitive search.\\n\\nIn that case, you could simply upper case the source and target:\\n\\n
A := IF(STD.Str.Find(STD.Str.ToUpperCase(MySource), STD.Str.ToUpperCase(MyTarget),1) = 2,\\n 'Success',\\n 'Failure - 1'); //success\\n
\\n\\nHope this helps!\\n\\nBob\", \"post_time\": \"2014-06-02 12:31:27\" },\n\t{ \"post_id\": 5799, \"topic_id\": 1339, \"forum_id\": 8, \"post_subject\": \"Ignore Case while searching on Text\", \"username\": \"rajesh.dorairaj\", \"post_text\": \"Dear Team,\\n\\nPlease advice on how to enforce an ignore case sensitive search while using STD.Str.find() function.\", \"post_time\": \"2014-06-02 11:38:33\" },\n\t{ \"post_id\": 5812, \"topic_id\": 1340, \"forum_id\": 8, \"post_subject\": \"Re: How to automate Spraying?\", \"username\": \"rtaylor\", \"post_text\": \"KatyChow,\\n\\nYes. There is a command line utility (DFUplue.exe -- documented in the Client Tools PDF) that sprays files, and several functions in the Standard Library (documented in the Standard Library Reference, available online and as part of the compiled help file for the IDE) that will spray files from within ECL code. \\n\\nThese issues are covered in the Applied ECL: ECL Code Generation Tools course available online at http://learn.lexisnexis.com/hpcc.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-06-02 15:30:09\" },\n\t{ \"post_id\": 5811, \"topic_id\": 1340, \"forum_id\": 8, \"post_subject\": \"How to automate Spraying?\", \"username\": \"KatyChow\", \"post_text\": \"Is there simple code or function that will automate the spraying process?\", \"post_time\": \"2014-06-02 15:17:05\" },\n\t{ \"post_id\": 5832, \"topic_id\": 1343, \"forum_id\": 8, \"post_subject\": \"Re: Is there an easy way to find file size with ECL?\", \"username\": \"JimD\", \"post_text\": \"If you want to get the size of the sprayed file (the logical file) then \\n\\nOUTPUT(STD.File.GetLogicalFileAttribute(file,'size'));
\\nThis is from the Standard Library Reference section on GetLogicalFileAttribute:\\n\\nIf you want the size of a file on disk (such as one on your landing zone), then you would use a linux command inside STD.System.Util.CmdProcess (also found in the Standard Library)\\n\\nwc -c may be the command you want, depending on your server's os.\\n\\nHope this helps,\\n\\nJim\", \"post_time\": \"2014-06-04 16:04:27\" },\n\t{ \"post_id\": 5831, \"topic_id\": 1343, \"forum_id\": 8, \"post_subject\": \"Re: Is there an easy way to find file size with ECL?\", \"username\": \"bforeman\", \"post_text\": \"Hi Katy,\\n\\nWhy will file size verify that your file is complete? \\n\\nIf you are spraying multiple files from a designated landing zone, a best practice is to place a small dummy file on the landing zone that flags you that files are uploaded and are ready to spray. Use the MonitorFile function to detect if that dummy file is there and then start your sprays. After your sprays you can then delete the dummy file.\\n\\nHope this helps!\\n\\nBob\", \"post_time\": \"2014-06-04 15:57:02\" },\n\t{ \"post_id\": 5830, \"topic_id\": 1343, \"forum_id\": 8, \"post_subject\": \"Is there an easy way to find file size with ECL?\", \"username\": \"KatyChow\", \"post_text\": \"Hi there,\\n\\nI have searched around on the web to try to see if there is an easy function to determine the size of my file. I want a quick way to be able to verify what I am spraying is the complete file because of the number of files I have to spray.\\n\\nThanks so much!\\n\\nKaty\", \"post_time\": \"2014-06-04 14:51:09\" },\n\t{ \"post_id\": 21671, \"topic_id\": 1345, \"forum_id\": 8, \"post_subject\": \"Re: Use PIPE to unzip file.\", \"username\": \"jwilt\", \"post_text\": \"A couple examples of a slightly different approach...\\n\\nrec := RECORD\\n UNSIGNED Elevation;\\n UNSIGNED Aspect;\\n UNSIGNED Slope;\\nEND;\\nZipFilename := '10.18.9.212:/var/lib/HPCCSystems/mydropzone/myfile_head3.csv.gz'; //the zipped container\\ntempf := '/tmp/myFile' + WORKUNIT + '.gz';\\nUnzipCommandRaw := ''\\n 'scp ' + ZipFileName + ' ' + tempf + ' 2>&1;' +\\n 'gzip -d --stdout ' + tempf + ' 2>&1; ' +\\n 'rm -rf ' + tempf + ';' +\\n '';\\n\\nUnzipCommand := 'bash -c \\\\'' + UnzipCommandRaw + '\\\\'';\\nunzippedDS := PIPE(UnzipCommand, rec, CSV(TERMINATOR(['\\\\n','\\\\n\\\\r','\\\\r\\\\n']), SEPARATOR(','), QUOTE('"')));\\n\\n// Save the unzippedDS...\\n
\\n\\n...this one runs on the web at play.hpccsystems.com:8010:\\n\\nrec := RECORD\\n UNICODE YearofBirth;\\n UNICODE Gender;\\n UNICODE Ethnicity;\\n UNICODE ChildsFirstName;\\n UNICODE Count;\\n UNICODE Rank;\\nEND;\\n\\nZipFilename := '10.0.0.208:/var/lib/HPCCSystems/mydropzone/Most_Popular_Baby_Names_by_Sex_and_Mother_s_Ethnic_Group__New_York_City.csv.gz'; //the uncompressed file\\ntempf := '/tmp/myFile_' + WORKUNIT + '.gz';\\n\\n// Copy the file to a local temp file, unzip it to STDOUT, remove the temp file\\nUnzipCommandraw := '' +\\n 'scp ' + ZipFilename + ' ' + tempf + ' 2>&1;' +\\n 'gzip -d --stdout ' + tempf + ' 2>&1;' +\\n 'rm -rf ' + tempf + ' 2>&1;' +\\n '';\\n\\n// Wrap whatever CMDraw script with a bash command\\nUnzipCommand := 'bash -c \\\\'' + UnzipCommandraw + '\\\\'';\\n\\nunzippedDS := PIPE(UnzipCommand, rec, \\n CSV(HEADING(1), \\n TERMINATOR(['\\\\n','\\\\n\\\\r','\\\\r\\\\n']), \\n SEPARATOR(','), \\n QUOTE('"')));\\nOUTPUT(unzippedDS,NAMED('unzippedDS'));\\n
\", \"post_time\": \"2018-04-15 23:32:47\" },\n\t{ \"post_id\": 21641, \"topic_id\": 1345, \"forum_id\": 8, \"post_subject\": \"Re: Use PIPE to unzip file.\", \"username\": \"tlhumphrey2\", \"post_text\": \"The following isn't my original problem. But, it is very similar. And, it works. Below, I give the code that unzips a file I have on the dropzone. And, below it I give the unzipped file. There is one oddity. I expected the 1st OUTPUT statement to output to the workunit the contents of the unzipped file. But, it only outputs the column headers. So, I added a 2nd OUTPUT statement which does output the contents of the unzipped file.\\n\\nrec := RECORD\\n UNSIGNED Elevation;\\n UNSIGNED Aspect;\\n UNSIGNED Slope;\\nEND;\\nUnzipFilename := '~file::10.18.9.212::var::lib::^H^P^C^C^Systems::mydropzone::myfile_head3.csv'; //the uncompressed file\\nZipFilename := '/var/lib/HPCCSystems/mydropzone/myfile_head3.csv.gz'; //the zipped container\\nUnzipCommand := 'gunzip ' + ZipFilename; //the command line that produces the uncompressed file\\nunzippedDS :=DATASET(UnzipFilename,rec,PIPE(UnzipCommand,CSV));\\nOUTPUT(unzippedDS,NAMED('unzippedDS'));\\nOUTPUT(DATASET(UnzipFilename,rec,CSV(HEADING(1),SEPARATOR(','),TERMINATOR(['\\\\n','\\\\r\\\\n','\\\\n\\\\r']))),NAMED('the_unzipped_dataset'));\\n
\\n\\nHere is the unzipped file's content:\\nElevation,Aspect,Slope\\n2596,51,3\\n2590,56,2\\n2804,139,9\\n2785,155,18\\n2595,45,2\\n2579,132,6\\n2606,45,7\\n2605,49,4\\n2617,45,9\\n2612,59,10\\n
\", \"post_time\": \"2018-04-11 17:20:21\" },\n\t{ \"post_id\": 21593, \"topic_id\": 1345, \"forum_id\": 8, \"post_subject\": \"Re: Use PIPE to unzip file.\", \"username\": \"jwilt\", \"post_text\": \"Can someone point us to a working example of this type of thing?\\n\\nE.g... a gz/tar/zip file sprayed to the cluster, then read and passed to a PIPE command to decompress the file via Linux commands?\\n\\nI'm sure there are actual-working examples, just haven't found them.\\nThanks.\", \"post_time\": \"2018-04-10 17:31:29\" },\n\t{ \"post_id\": 5845, \"topic_id\": 1345, \"forum_id\": 8, \"post_subject\": \"Re: Use PIPE to unzip file.\", \"username\": \"rtaylor\", \"post_text\": \"Tim,\\n\\nThis error usually indicates it's not finding any record delimiters (which it wouldn't be likely to do in a compressed binary file) so it sounds to me like the unzip is not happening. \\n\\nFirst, you should verify that gzip is present and capable of unzipping the file. \\n\\nIf it is present, it's possible that it's not getting the proper command line parameters to do the unzip. I would test that first by doing a command line unzip yourself to discover what it takes to make that work, and then take what you learn from that and apply it to the PIPE option.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-06-06 14:29:10\" },\n\t{ \"post_id\": 5844, \"topic_id\": 1345, \"forum_id\": 8, \"post_subject\": \"Re: Use PIPE to unzip file.\", \"username\": \"tlhumphrey2\", \"post_text\": \"Here is what my new code looks like:\\n\\n\\nrec := RECORD\\n STRING field1;\\n STRING field2;\\n STRING field3;\\n STRING field4;\\nEND;\\nfilename:='~file::10.239.40.5::var::lib::^H^P^C^C^Systems::dropzone::head100_1025b_election_retweets.csv.gz';\\nhiggsDS :=DATASET(filename,rec,PIPE('gzip',CSV));\\nOUTPUT(higgsDS);
\\n\\nBut, I'm getting the following error message: Error: System error: -1: Graph[1], diskread[2]: SLAVE 10.239.40.6:20100: CFileSerialStream::get read past end of stream, CFileSerialStream::get read past end of stream - handling file: //10.239.40.5:7100/var/lib/HPCCSystems/dropzone/head100_1025b_election_retweets.csv.gz\\n\\nWorkunit on 10.239.40.2 is W20140606-094758.\\n\\nBy the way, this dataset only has 100 lines (records).\", \"post_time\": \"2014-06-06 13:58:16\" },\n\t{ \"post_id\": 5838, \"topic_id\": 1345, \"forum_id\": 8, \"post_subject\": \"Re: Use PIPE to unzip file.\", \"username\": \"rtaylor\", \"post_text\": \"Tim,\\n\\nI would use the PIPE option on the DATASET declaration (an input pipe) instead of the PIPE function (a through pipe), since the file on disk is zipped. Since the file seems to be on the dropzone, the PIPE option on DATASET should unzip it (assuming the gzip program is available to use) as it reads the file.\\n\\nYour RECORD structure, however, should reflect the structure of the unzipped records, and not just define a single blob field. So I would try doing it something like this:rec := RECORD\\n //DATA D; //put your real field definitions in here\\nEND;\\nfilename:='~file::10.239.40.5::var::lib::^H^P^C^C^Systems::dropzone::head100_1025b_election_retweets.csv.gz';\\nhiggsDS :=DATASET(filename,rec,PIPE('gzip',CSV));\\nOUTPUT(higgsDS);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-06-05 20:25:03\" },\n\t{ \"post_id\": 5837, \"topic_id\": 1345, \"forum_id\": 8, \"post_subject\": \"Use PIPE to unzip file.\", \"username\": \"tlhumphrey2\", \"post_text\": \"I have the following code that should use pipe to unzip a .gz file:\\n\\n\\nrec := RECORD\\n DATA D;\\nEND;\\nfilename:='~file::10.239.40.5::var::lib::^H^P^C^C^Systems::dropzone::head100_1025b_election_retweets.csv.gz';\\nhiggsDS :=DATASET(filename,rec,FLAT);\\nunzipped_higgsDS := PIPE(higgsDS,'gzip',OUTPUT(CSV));\\n
\\n\\nI'm unsure if I have my command, 'gzip' in the correct form. In addition, I don't know if gzip is available on the cluster I'm using, i.e. thor on the Machine Learning Dev Cluster, 10.239.40.2.\", \"post_time\": \"2014-06-05 17:10:43\" },\n\t{ \"post_id\": 5897, \"topic_id\": 1351, \"forum_id\": 8, \"post_subject\": \"Re: Rollup with GROUP\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nThanks a lot Richard..\\n\\nViswa\", \"post_time\": \"2014-06-15 23:58:48\" },\n\t{ \"post_id\": 5886, \"topic_id\": 1351, \"forum_id\": 8, \"post_subject\": \"Re: Rollup with GROUP\", \"username\": \"rtaylor\", \"post_text\": \"Viswa,\\n\\nYour desired output:id fname lname email id1 id2\\n1 abc def abc.com 12 15\\n a d.e abc.xyz\\n defg
looks to me almost like the kind of result you get from the DISTRIBUTION action (for each field you get a breakout of each unique value in that field and count of the records with that unique value).\\n\\nOtherwise, I can only see accomplishing what you want with multiple passes (probably crosstab reports), not a single operation.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-06-13 13:29:41\" },\n\t{ \"post_id\": 5880, \"topic_id\": 1351, \"forum_id\": 8, \"post_subject\": \"Re: Rollup with GROUP\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nHow can i achieve the desired result ?\\n\\nIncase i am trying to group by other fields along with id, i would still have some duplicates in the result and have multiple rows ?\\n\\nThanks,\\nviswa\", \"post_time\": \"2014-06-12 19:42:27\" },\n\t{ \"post_id\": 5879, \"topic_id\": 1351, \"forum_id\": 8, \"post_subject\": \"Re: Rollup with GROUP\", \"username\": \"rtaylor\", \"post_text\": \"Viswa,\\n\\nYour result is the way it is because you did your GROUP by ID.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-06-12 18:17:44\" },\n\t{ \"post_id\": 5876, \"topic_id\": 1351, \"forum_id\": 8, \"post_subject\": \"Rollup with GROUP\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nI am trying to perform a rollup with group but not getting the intended result.\\n\\nInitial Dataset :\\n\\n\\nid fname lname email id1 id2\\n1 abc def abc.com 12 0\\n1 abc d.e abc.com 0 15\\n1 abc defg abc.com 12 0\\n1 a def abc.xyz 0 15\\n
\\n\\nRequired Output :\\n\\n\\nid fname lname email id1 id2\\n1 abc def abc.com 12 15\\n a d.e abc.xyz\\n defg\\n
\\n\\nThe code using ROLLUP and GROUP :\\n\\n\\ninitialds_grouped := GROUP(initial_ds , id);\\n\\nnameRec := RECORD\\n STRING name;\\t\\nEND;\\t\\n\\nid_rec := RECORD\\n UNSIGNED7 id;\\nEND;\\n\\nfinalRec := RECORD\\n UNSIGNED6 id_Val;\\n DATASET(nameRec) lnames;\\n DATASET(nameRec) fnames;\\n DATASET(nameRec) emails;\\n DATASET(id_rec) id1;\\n DATASET(id_rec) id2;\\nEND;\\n\\nfinalRec doRollUp(initial_rec L , DATASET(initial_rec ) AllRows ) := TRANSFORM\\n SELF.id_Val:= L.id;\\n SELF.lnames := DEDUP(PROJECT( AllRows , TRANSFORM( nameRec ,\\n SELF.name := L.lname)),name);\\n SELF.fnames := DEDUP(PROJECT( AllRows , TRANSFORM( nameRec ,\\n SELF.name := L.fname)),name);\\n SELF.emails := DEDUP(PROJECT( AllRows , TRANSFORM( nameRec ,\\n SELF.name := L.email)),name);\\n SELF.id1 := DEDUP(PROJECT( AllRows(id1 <> 0) , TRANSFORM( id_rec , \\n SELF.id := L.id1)),id);\\n SELF.id2 := DEDUP(PROJECT( AllRows(id2 <> 0) , TRANSFORM( id_rec , \\n SELF.id := L.id2)),id);\\nEND;\\n\\nrollup_Output := ROLLUP(initialds_grouped , GROUP , doRollUp(LEFT , ROWS(left)));\\n\\n
\\n\\nNot sure what am doing wrong here, get the output of the first record alone.\\n\\n\\nSample Output\\n\\nid fname lname email id1 id2\\n1 abc def abc.com 12 0\\n\\n
\\n\\nKindly help..\\n\\nThanks and Regards,\\nViswa\", \"post_time\": \"2014-06-12 15:59:31\" },\n\t{ \"post_id\": 5905, \"topic_id\": 1354, \"forum_id\": 8, \"post_subject\": \"Re: How to store count(ds) in #set\", \"username\": \"rtaylor\", \"post_text\": \"Shank,The dataset I am trying to output wont come out as an Excel file from ECL watch when it has more than 4k records.
Then you can just OUTPUT it as a CSV file, then despray it and open the file in Excel that way.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-06-18 14:03:12\" },\n\t{ \"post_id\": 5901, \"topic_id\": 1354, \"forum_id\": 8, \"post_subject\": \"Re: How to store count(ds) in #set\", \"username\": \"shank\", \"post_text\": \"hi Richard,\\nThe dataset I am trying to output wont come out as an Excel file from ECL watch when it has more than 4k records. So I am trying to split my dataset into chunks of 4k or less and output them to multiple logical files named ds_1 , ds_2 etc.\\nI am trying to use #LOOP to accomplish this . I need to dynamically get the number of records in a dataset and split them into chunks of 4k or lesser records.\\n\\nThanks,\\nShank\", \"post_time\": \"2014-06-17 04:45:15\" },\n\t{ \"post_id\": 5899, \"topic_id\": 1354, \"forum_id\": 8, \"post_subject\": \"Re: How to store count(ds) in #set\", \"username\": \"rtaylor\", \"post_text\": \"Shank,\\n\\nThe Template language that you're trying to use is an ECL code generation tool -- it operates at compile time to generate the ECL code that defines your job. The COUNT function is a run-time type of operation. Therefore, the simple answer to your question is, "You can't." \\n\\nSo, what are you actually trying to accomplish here?\\n\\nRichard\", \"post_time\": \"2014-06-16 13:52:32\" },\n\t{ \"post_id\": 5898, \"topic_id\": 1354, \"forum_id\": 8, \"post_subject\": \"How to store count(ds) in #set\", \"username\": \"shank\", \"post_text\": \"I have a dataset named ds. I need to set the value of COUNT(ds) to MySymbol here below. How can I achieve it?:\\n\\n\\n#DECLARE(MySymbol);\\n#SET(MySymbol,1);\\n
\\n\\n\\nThanks,\\nShank\", \"post_time\": \"2014-06-16 12:46:12\" },\n\t{ \"post_id\": 5910, \"topic_id\": 1355, \"forum_id\": 8, \"post_subject\": \"Re: Hexadecimal to Octal\", \"username\": \"rtaylor\", \"post_text\": \"Abhi,\\n\\nIf you wanted to write it in ECL, you could do it something like this:Hex2Oct(UNSIGNED1 Int) := FUNCTION\\n O1 := (STRING1)(Int & 00000111b);\\n O2 := (STRING1)((Int & 00111000b) >> 3);\\n O3 := (STRING1)((Int & 11000000b) >> 6);\\n\\tRETURN O3 + O2 + O1;\\nEND;\\n\\nUNSIGNED1 Str2Hex(STRING2 InStr) := FUNCTION\\n UNSIGNED1 Char2Bits(STRING1 Char) := \\n CASE(Char,\\n\\t'0'=>00000000b,\\n\\t'1'=>00000001b,\\n\\t'2'=>00000010b,\\n\\t'3'=>00000011b,\\n\\t'4'=>00000100b,\\n\\t'5'=>00000101b,\\n\\t'6'=>00000110b,\\n\\t'7'=>00000111b,\\n\\t'8'=>00001000b,\\n\\t'9'=>00001001b,\\n\\t'A'=>00001010b,\\n\\t'B'=>00001011b,\\n\\t'C'=>00001100b,\\n\\t'D'=>00001101b,\\n\\t'E'=>00001110b,\\n\\t'F'=>00001111b,\\n\\t 00000000b);\\n Char1 := Char2Bits(InStr[1]) << 4;\\n Char2 := Char2Bits(InStr[2]);\\n RETURN Char1 | Char2;\\nEND;\\n\\nHex2Oct(Str2Hex('FF'));
\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-06-18 21:25:48\" },\n\t{ \"post_id\": 5903, \"topic_id\": 1355, \"forum_id\": 8, \"post_subject\": \"Re: Hexadecimal to Octal\", \"username\": \"bforeman\", \"post_text\": \"Hi Abhi,\\n\\nI could not find one, but there are many C++ examples that you could use in a BEGINC++ structure.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-06-18 12:19:08\" },\n\t{ \"post_id\": 5900, \"topic_id\": 1355, \"forum_id\": 8, \"post_subject\": \"Hexadecimal to Octal\", \"username\": \"abhisr\", \"post_text\": \"Hi,\\n\\nIs there any function available in ECL to convert \\nHexadecimal to Octal or vice versa.\\n\\nRegards\\nAbhi\", \"post_time\": \"2014-06-16 20:36:45\" },\n\t{ \"post_id\": 5955, \"topic_id\": 1359, \"forum_id\": 8, \"post_subject\": \"Re: JOIN for a child dataset\", \"username\": \"Leofei\", \"post_text\": \"ghalliday,\\n\\nIt's really good to know it. I noticed that the LOOKUP JOIN copied the right records to each rows, but I didn't have a good way to solve it. Thanks for the info, I will try DICTIONARY.\\n\\n-Fan\", \"post_time\": \"2014-06-24 18:34:15\" },\n\t{ \"post_id\": 5948, \"topic_id\": 1359, \"forum_id\": 8, \"post_subject\": \"Re: JOIN for a child dataset\", \"username\": \"ghalliday\", \"post_text\": \"Just to be pedantic (!) LOOKUP clones all the rows locally, rather than being implicitly local. (LOOKUP,LOCAL is also supported which doesn't clone all the rows.)\\n\\nOne other possibility you could try would be to your PROJECT example, but use a DICTIONARY to map the values in a nested project - instead of a JOIN. The advantage is that the code may be generated inline, avoiding the overhead of processing a subgraph.\", \"post_time\": \"2014-06-24 09:58:34\" },\n\t{ \"post_id\": 5928, \"topic_id\": 1359, \"forum_id\": 8, \"post_subject\": \"Re: JOIN for a child dataset\", \"username\": \"Leofei\", \"post_text\": \"Richard,\\n\\nYes, you are right. It should be LOOKUP. I can try the performance and see the result. Feel free to let me know if you have any other idea.\\n\\nThanks,\\n-Fan\", \"post_time\": \"2014-06-19 17:33:08\" },\n\t{ \"post_id\": 5924, \"topic_id\": 1359, \"forum_id\": 8, \"post_subject\": \"Re: JOIN for a child dataset\", \"username\": \"rtaylor\", \"post_text\": \"Fan,\\n\\nThe only change I would suggest is to use LOOKUP instead of LOCAL on the JOIN. LOOKUP is implicitly local and makes sure the entire rhs dataset is copied in memory to every node (which is what I think you probably want).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-06-19 16:19:38\" },\n\t{ \"post_id\": 5917, \"topic_id\": 1359, \"forum_id\": 8, \"post_subject\": \"JOIN for a child dataset\", \"username\": \"Leofei\", \"post_text\": \"Hi,\\n\\nI have a question here. \\n\\nThe child dataset has the fields need to be updated(i.e. the example in the picture). The main DS is a big file, and the reference table is a small one which contains the info to update the child dataset. There are two options in my mind to do this:\\n\\n 1. Normalize the main DS, JOIN, Denormalize. Since the main table a big DS, I'm not quite sure the performance. The last step Denormalize seems to happen between two big files.\\n\\n 2. Use project function, in this project function each step use a JOIN function. I looked into the GRAPH, it seems the RIGHT table will become really big. Here is the code:\\n\\nchildrec := {string2 code, string10 desc};\\nparentrec := {String2 name, dataset(childrec) child};\\n\\n\\nds1 := dataset([{'X', [{'aa', 'aa desc'}, {'bb', ''}]}, \\n\\t\\t\\t\\t\\t\\t\\t\\t{'Y', [{'cc', 'cc desc'}, {'bb', ''}]},\\n\\t\\t\\t\\t\\t\\t\\t\\t{'Z', [{'aa', 'aa desc'}, {'dd', ''}]}\\n\\t\\t\\t\\t\\t\\t\\t\\t], parentrec);\\n\\t\\t\\t\\t\\t\\t\\t\\t\\nds2 := dataset([{'aa', 'aa new desc'},\\n\\t\\t\\t\\t\\t\\t\\t\\t{'bb', 'bb new desc'},\\n\\t\\t\\t\\t\\t\\t\\t\\t{'cc', 'cc new desc'}\\n\\t\\t\\t\\t\\t\\t\\t\\t], childrec);\\n\\nds := PROJECT(ds1, transform(parentrec,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tself.child := join(left.child, ds2, \\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tleft.code = right.code, \\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\ttransform(childrec,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tself := if(left.code = right.code, right, left),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tself := left), \\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tleft outer, LOCAL);\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tself := left\\n\\t\\t\\t\\t\\t\\t\\t));\\n\\n\\noutput(ds);
\\n\\nDo you have any suggestion which way I should do? Or any other way to do it? Please correct me if I was wrong some where. Any suggestion will be appreciated!\\n\\nThanks,\\n-Fan\", \"post_time\": \"2014-06-19 14:19:58\" },\n\t{ \"post_id\": 5930, \"topic_id\": 1361, \"forum_id\": 8, \"post_subject\": \"Re: "String contains" check in JOIN condition\", \"username\": \"pius_francis\", \"post_text\": \"Thanks a lot richard\", \"post_time\": \"2014-06-20 07:05:42\" },\n\t{ \"post_id\": 5929, \"topic_id\": 1361, \"forum_id\": 8, \"post_subject\": \"Re: "String contains" check in JOIN condition\", \"username\": \"rtaylor\", \"post_text\": \"pius_francis,\\n\\nPer the docs, the ALL keyword: "Specifies the rightrecset is a small file that can be fully copied to every node, which allows the compiler to ignore the lack of any "equality" portion to the condition, eliminating the "join too complex" error that the condition would normally produce. If an "equality" portion is present, the JOIN is internally executed as a MANY LOOKUP. The KEEP option is supported in conjunction with this option."\\n\\nI had to use it in this example because my join condition had no equality portion. If your actual code has any equality logic to it, then you could remove the ALL option.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-06-19 19:05:39\" },\n\t{ \"post_id\": 5926, \"topic_id\": 1361, \"forum_id\": 8, \"post_subject\": \"Re: "String contains" check in JOIN condition\", \"username\": \"pius_francis\", \"post_text\": \"Hi Richard,\\n Thanks for response. The tried out this but i dont get desired output without the use of keep(1).Can you please explain the use of ALL keyword here.\", \"post_time\": \"2014-06-19 16:54:08\" },\n\t{ \"post_id\": 5925, \"topic_id\": 1361, \"forum_id\": 8, \"post_subject\": \"Re: "String contains" check in JOIN condition\", \"username\": \"rtaylor\", \"post_text\": \"pius_francis,\\n\\nDo you mean like this?IMPORT STD;\\n\\nMyRec := RECORD\\n\\tSTRING10 Value1;\\n\\tSTRING10 Value2;\\nEND;\\n\\nLeftFile := DATASET([{'C','A'},\\n {'X','B'},\\n {'A','C'}],MyRec);\\n\\nRightFile := DATASET([{'AC','X'},\\n {'DB','Y'},\\n {'AA','Z'}],MyRec);\\n\\nMyOutRec := RECORD\\n STRING10 LeftValue1;\\n STRING10 LeftValue2;\\n STRING10 RightValue1;\\n STRING10 RightValue2;\\nEND;\\n\\nMyOutRec JoinThem(MyRec L, MyRec R) := TRANSFORM\\n SELF.LeftValue1 := L.Value1; \\n SELF.RightValue1 := R.Value1;\\n SELF.LeftValue2 := L.Value2;\\n SELF.RightValue2 := R.Value2;\\nEND;\\n\\nJ := JOIN(LeftFile,RightFile,\\n STD.Str.Contains(RIGHT.Value1,LEFT.Value1, TRUE),\\n JoinThem(LEFT,RIGHT),ALL);\\nJ;
\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-06-19 16:34:11\" },\n\t{ \"post_id\": 5921, \"topic_id\": 1361, \"forum_id\": 8, \"post_subject\": \""String contains" check in JOIN condition\", \"username\": \"pius_francis\", \"post_text\": \"Hi,\\n I need to perform a JOIN operation only when the "left side" string 'contains' the "right side" string. Is there is any way to perfrom such operation in ecl.Please help me regarding this? Thanks.\", \"post_time\": \"2014-06-19 15:16:42\" },\n\t{ \"post_id\": 5936, \"topic_id\": 1362, \"forum_id\": 8, \"post_subject\": \"Re: Outputting a Keyed logical file\", \"username\": \"rlbars5\", \"post_text\": \"Richard I clearly understand what you are saying. Considering the differences between a dataset and an INDEX. Thanks alot, helped me\\n\\n-Rahul\", \"post_time\": \"2014-06-20 17:07:54\" },\n\t{ \"post_id\": 5935, \"topic_id\": 1362, \"forum_id\": 8, \"post_subject\": \"Re: Outputting a Keyed logical file\", \"username\": \"rtaylor\", \"post_text\": \"rlbars5 ,\\n\\nYou have to understand that you're working with standard, old-fashioned ISAM files on the HPCC platform -- this is not an RDBMS.\\n\\nAn INDEX declares there is a file on disk for use in your ECL code, which is the same thing a DATASET declaration does for a data file. The difference between the two is the fact that the INDEX can be used for fast random access to individual record using its binary tree (IOW, by filtering the INDEX on its key field values -- the search terms) while DATASET implements filtering with a full table scan. B oth methods are appropriate for use in different circumstances.\\n\\nGiven your INDEX declaration:Key_payload := INDEX(recordset,{Key1,key2},{recordset},'Filename');
\\nall you need to do to use that INDEX as a dataset is to use it just as you would a DATASET, something like this:OUTPUT(Key_payload);
\\nand to treat it as a INDEX, you just filter it by the key field(s), something like this:OUTPUT(Key_payload(Key1="some value"));
\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-06-20 16:07:37\" },\n\t{ \"post_id\": 5934, \"topic_id\": 1362, \"forum_id\": 8, \"post_subject\": \"Re: Outputting a Keyed logical file\", \"username\": \"rlbars5\", \"post_text\": \"I am trying to understand what you replied,\\nYes this file is an Index that I created with a build\\nKey_payload := INDEX(recordset,{Key1,key2},{recordset},'Filename');
\\nAfter which I executed the buildindex. The Indexed file which I have now is 'thor::keyed::file'. You mentioned I can use this after I have correctly defined, as an dataset, Can you give me an example I think am confused.\", \"post_time\": \"2014-06-20 15:21:07\" },\n\t{ \"post_id\": 5933, \"topic_id\": 1362, \"forum_id\": 8, \"post_subject\": \"Re: Outputting a Keyed logical file\", \"username\": \"rtaylor\", \"post_text\": \"rlbars5,\\n\\nIf by "keyed file" you mean that this file is an index that you created with the BUILD action, then you just need to define it as an INDEX (and not as a DATASET). Once you have the file correctly defined you can use it as an index or a dataset as you choose.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-06-20 15:01:55\" },\n\t{ \"post_id\": 5931, \"topic_id\": 1362, \"forum_id\": 8, \"post_subject\": \"Outputting a Keyed logical file\", \"username\": \"rlbars5\", \"post_text\": \"I am trying to output a keyed file from a dataset is it posible ?\\nsomething like this ds:= DATASET('~thor::keyed::file',Layout,thor);\\noutput(ds);
\\nI tried it and it always gives me an error below:\\nDataset layout does not match published layout for file thor::keyed::file\\nDo I have to Mention somewhere that this file is Keyed ?\\nAny help would be appreciated.\", \"post_time\": \"2014-06-20 14:28:08\" },\n\t{ \"post_id\": 5951, \"topic_id\": 1364, \"forum_id\": 8, \"post_subject\": \"Re: Converting Multiple recordset into single recordset\", \"username\": \"rlbars5\", \"post_text\": \"Richard,\\n\\nI agree looks straight forward. \\nAppreciate your time and your expert advise \", \"post_time\": \"2014-06-24 14:01:12\" },\n\t{ \"post_id\": 5946, \"topic_id\": 1364, \"forum_id\": 8, \"post_subject\": \"Re: Converting Multiple recordset into single recordset\", \"username\": \"rtaylor\", \"post_text\": \"Rahul,\\n\\nThis would be the more "usual" way of doing it (although your method works well, too):
outrec := RECORD\\n STRING20 Types;\\n STRING20 brand;\\nEND;\\n\\nchildrec := RECORD\\n STRING20 brand;\\nEND;\\n\\nParentRec := RECORD\\n STRING20 Types;\\n DATASET(childrec) brands;\\nEND;\\n\\ni1 := DATASET([ \\n{'SUV', 'JEEP'},\\n{'SEDAN', 'JEEP'},\\n{'SUV', 'NISSAN'},\\n{'SEDAN', 'NISSAN'},\\n{'SUV', 'TOYOTA'},\\n{'SEDAN', 'TOYOTA'},\\n{'SUV', 'MAZDA'},\\n{'SEDAN', 'MAZDA'},\\n{'SUV', 'HONDA'},\\n{'SEDAN', 'HONDA'},\\n{'SUV','SUBURU'},\\n{'SEDAN','SUBURU'}\\n], outrec);\\n\\nTypesDS := TABLE(i1,{Types},Types);\\n\\nP_recs := PROJECT(TypesDS,TRANSFORM(ParentRec,SELF.Types := LEFT.Types, SELF.brands := []));\\n\\nParentRec denorm(ParentRec l, outRec kid) :=TRANSFORM\\n SELF.Types:= l.Types;\\n SELF.brands := L.brands + ROW({kid.brand},childrec);\\nEND;\\n\\nresults := DENORMALIZE(P_recs, i1, LEFT.types=RIGHT.types,denorm(LEFT,RIGHT));\\n\\noutput(results);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-06-23 19:39:04\" },\n\t{ \"post_id\": 5944, \"topic_id\": 1364, \"forum_id\": 8, \"post_subject\": \"Re: Converting Multiple recordset into single recordset\", \"username\": \"rlbars5\", \"post_text\": \"Bob,\\nThanks for your suggestion, I tried it doing this way (code below)\\n\\noutrec := RECORD\\nSTRING20 Types;\\nSTRING20 brand;\\nEND;\\n\\nchildrec := RECORD\\nSTRING20 brand;\\nEND;\\n\\nParentRec := RECORD\\nSTRING20 Types;\\nDATASET(childrec) brands;\\nEND;\\n\\ni1 := DATASET([ \\n{'SUV', 'JEEP'},\\n{'SUV', 'NISSAN'},\\n{'SUV', 'TOYOTA'},\\n{'SUV', 'MAZDA'},\\n{'SUV', 'HONDA'},\\n{'SUV','SUBURU'}], outrec);\\n\\nGrouping := GROUP(i1,Types);\\n\\nParentRec doRollup(outRec l, DATASET(outRec) allRows) :=TRANSFORM\\nSELF.Types:= l.Types;\\nSELF.brands := PROJECT(allRows,TRANSFORM(childrec, SELF := LEFT));\\nEND;\\n\\nresults := ROLLUP(Grouping, GROUP, doRollup(LEFT,ROWS(LEFT)));\\noutput(results);
\\n\\nThis gives me the output which is desired\\n[attachment=0:wneuz2xs]eg2.png\\n\\nI will look at your suggestion as well, just wanted to point out what I worked on \\nAppreciate your help Bob\\n\\nThanks,\\nRahul\", \"post_time\": \"2014-06-23 17:25:09\" },\n\t{ \"post_id\": 5938, \"topic_id\": 1364, \"forum_id\": 8, \"post_subject\": \"Re: Converting Multiple recordset into single recordset\", \"username\": \"bforeman\", \"post_text\": \"Hi Rahul,\\n\\nThere is something available in ECL that provides exactly what you need. Take a look at the DENORMALIZE statement and the use of nested child datasets. \\n\\nYou would first define a child record structure for the types and brands. You need types in the child record to link to your parent.\\n\\nAfter that, define the parent record with the parent fields and a nested child DATASET using your child record in the parent record structure.\\n\\nAfter that it's a simple PROJECT to build your parent records and then a DENORMALIZE to add the children.\\n\\nThere is a great example in the ECL playground named NORM_DENORM_ChildDatasets that shows you how this is done.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-06-23 07:35:40\" },\n\t{ \"post_id\": 5937, \"topic_id\": 1364, \"forum_id\": 8, \"post_subject\": \"Converting Multiple recordset into single recordset\", \"username\": \"rlbars5\", \"post_text\": \"I have a child dataset (below is the screenshot of the output)\\n[attachment=1:7aad4vn9]example.png\\nThe following dataset has 5 records for (parentrec)'SUV' having different/multiple (child)brands. Is there a way to produce this dataset into a single parentrec with multiple child rec below (refer to the desired output screenshot below)?\\n[attachment=0:7aad4vn9]example2.png\\n\\nAny thoughts or suggestions would be appreciated\\nThanks,\\nRahul\", \"post_time\": \"2014-06-20 19:40:48\" },\n\t{ \"post_id\": 6150, \"topic_id\": 1366, \"forum_id\": 8, \"post_subject\": \"Re: Number of logical files in superfile\", \"username\": \"gouldbrfl\", \"post_text\": \"I have one superfile that has about 1000 logical files in it. Does it run yes, however this is a monster that I've taken over from someone else. I plan on consolidating them this weekend into a single file and then have another job run from a Unix cron on the weekends which take the previous 6 days and creates another consolidated file so that I never have more than 7 files in the superfile at any one time. In my tests, it didn't take less time to read the files, but it means that there are 1000 less entries in the daili to manage.\", \"post_time\": \"2014-08-01 10:15:19\" },\n\t{ \"post_id\": 5945, \"topic_id\": 1366, \"forum_id\": 8, \"post_subject\": \"Re: Number of logical files in superfile\", \"username\": \"rtaylor\", \"post_text\": \"Oleg,\\n\\nNo, it hasn't changed, as it is based on our experience. One hundred is just a nice round number, easy to remember and work with. You may or may not see performance degradation if you push it to 200, you're more likely to if you push it to 300 or 400, etc. It's up to you.\\n\\nRichard\", \"post_time\": \"2014-06-23 19:15:43\" },\n\t{ \"post_id\": 5942, \"topic_id\": 1366, \"forum_id\": 8, \"post_subject\": \"Re: Number of logical files in superfile\", \"username\": \"oleg\", \"post_text\": \"Yes, Richard, \\n\\nI heard of this few years ago - is anything changed since?\\n\\nAlso can I ask what this limit is based on? From my IT experience, this number looks a bit suspicious - I would rather expect some round number, like 256 or 1024 \", \"post_time\": \"2014-06-23 15:52:53\" },\n\t{ \"post_id\": 5941, \"topic_id\": 1366, \"forum_id\": 8, \"post_subject\": \"Re: Number of logical files in superfile\", \"username\": \"rtaylor\", \"post_text\": \"Oleg,\\n\\n100 is the maximum recommended, but that is not a hard limit.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-06-23 15:47:49\" },\n\t{ \"post_id\": 5940, \"topic_id\": 1366, \"forum_id\": 8, \"post_subject\": \"Number of logical files in superfile\", \"username\": \"oleg\", \"post_text\": \"Is there limit (either recommended or physical)?\", \"post_time\": \"2014-06-23 15:32:41\" },\n\t{ \"post_id\": 5967, \"topic_id\": 1370, \"forum_id\": 8, \"post_subject\": \"Re: Do action for each records in a Dataset\", \"username\": \"Leofei\", \"post_text\": \"ghalliday,\\n\\nThanks for the info. I will try it later.\\n\\n-Fan\", \"post_time\": \"2014-06-25 15:59:06\" },\n\t{ \"post_id\": 5962, \"topic_id\": 1370, \"forum_id\": 8, \"post_subject\": \"Re: Do action for each records in a Dataset\", \"username\": \"ghalliday\", \"post_text\": \"The workaround is to execute the dataset in thor, and spill the result to a workunit temporary. The following change should help:\\n\\nds := GLOBAL(Files.configDS, FEW);\\n\\nI'll link that Jira up with the existing issues.\", \"post_time\": \"2014-06-25 06:41:44\" },\n\t{ \"post_id\": 5960, \"topic_id\": 1370, \"forum_id\": 8, \"post_subject\": \"Re: Do action for each records in a Dataset\", \"username\": \"Leofei\", \"post_text\": \"Richard, I reported it. Here is the detail:\\n\\n HPCC-11781 - APPLY function issue \\n\\nThanks,\\n-Fan\", \"post_time\": \"2014-06-24 21:10:28\" },\n\t{ \"post_id\": 5959, \"topic_id\": 1370, \"forum_id\": 8, \"post_subject\": \"Re: Do action for each records in a Dataset\", \"username\": \"rtaylor\", \"post_text\": \"Fan,\\n\\nYes, I get the same error message. You should report the issue in JIRA.\\n\\nRichard\", \"post_time\": \"2014-06-24 20:48:15\" },\n\t{ \"post_id\": 5958, \"topic_id\": 1370, \"forum_id\": 8, \"post_subject\": \"Re: Do action for each records in a Dataset\", \"username\": \"Leofei\", \"post_text\": \"Richard,\\n\\nIt seems the inline dataset could work. Whereas, if it's the dataset read from THOR cluster, it reports the following error:\\n\\nError: INTERNAL: Expected a parent/container context. Likely to be caused by executing something invalid inside a NOTHOR.\\n\\n
IMPORT STD;\\n\\n// ds := dataset([{'AL'},{'KS'}], {STRING2 SPC});\\nds := Files.configDS;\\nSEQUENTIAL(\\n STD.File.StartSuperFileTransaction(),\\n NOTHOR(\\n\\t\\tAPPLY(ds, STD.File.ClearSuperFile('~thor::data::test' + SPC, TRUE))\\n\\t\\t),\\n STD.File.FinishSuperFileTransaction()\\n);\\n
\\n\\nThanks,\\n-Fan\", \"post_time\": \"2014-06-24 20:23:42\" },\n\t{ \"post_id\": 5957, \"topic_id\": 1370, \"forum_id\": 8, \"post_subject\": \"Re: Do action for each records in a Dataset\", \"username\": \"rtaylor\", \"post_text\": \"Fan,I tried APPLY function, it seems it doesn't work.
That is exactly what APPLY is supposed to do, so what problem are you having with it? Can you post the code and whatever error messages you're getting, please?\\n\\nRichard\", \"post_time\": \"2014-06-24 20:07:59\" },\n\t{ \"post_id\": 5956, \"topic_id\": 1370, \"forum_id\": 8, \"post_subject\": \"Do action for each records in a Dataset\", \"username\": \"Leofei\", \"post_text\": \"Hi,\\n\\nI have a question here: I have a dataset contains some state postal code. For example, KS, AL, FL. For each state in this list, I want to do the same action. For example, clearSuperFile('~thor::data::state'+ SPC, TRUE).\\n\\nIs there any way to do it? I tried APPLY function, it seems it doesn't work. Any suggestion will be appreciated. \\n\\nThanks,\\n-Fan\", \"post_time\": \"2014-06-24 18:50:10\" },\n\t{ \"post_id\": 5971, \"topic_id\": 1372, \"forum_id\": 8, \"post_subject\": \"Re: Understanding Basic ECL concepts\", \"username\": \"rlbars5\", \"post_text\": \"Wow Makes practical sense now. I will enroll for the course so as to get clear with some basics. Thanks alot and appreciate your help\", \"post_time\": \"2014-06-25 21:05:20\" },\n\t{ \"post_id\": 5970, \"topic_id\": 1372, \"forum_id\": 8, \"post_subject\": \"Re: Understanding Basic ECL concepts\", \"username\": \"rtaylor\", \"post_text\": \"rlbars5,\\n\\nI strongly suggest that you register and take the online ECL courses (http://learn.lexisnexis.com/hpcc). The first two courses (Intro to ECL and Intro to Thor) are free for everybody, and the questions you asked are discussed in the Intro to Thor course.\\n\\nDISTRIBUTE will RE-distribute the records in your dataset based on the integer expression you specify as its second parameter (usually the result of one of our hashing functions) so that all records with the same value will end up on the same node. \\n\\nThe following examples all use these datasets:IMPORT STD;\\nMyRec := RECORD\\n\\tUNSIGNED1 ID;\\n\\tSTRING10 Fname;\\n\\tSTRING10 Lname;\\nEND;\\n\\nLeftFile := DATASET([{1,'FRED','JONES'},\\n {2,'GEORGE','JONES'},\\n {3,'SUE','JONES'},\\n {4,'FRED','TAYLOR'},\\n {5,'GEORGE','TAYLOR'},\\n {6,'SUE','TAYLOR'},\\n {7,'FRED','MASON'},\\n {8,'GEORGE','MASON'},\\n {9,'SUE','MASON'}\\n ],MyRec);\\n\\nRightFile := DATASET([{1,'GEORGE','TAYLOR'},\\n {2,'FRED','JONES'},\\n {3,'SUE','JONES'},\\n {4,'GEORGE','MASON'},\\n {5,'FRED','TAYLOR'},\\n {6,'SUE','MASON'},\\n {7,'GEORGE','JONES'},\\n {8,'FRED','MASON'},\\n {9,'SUE','TAYLOR'}\\n ],MyRec);\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t \\nLDS := TABLE(DISTRIBUTE(LeftFile),\\n {LeftFile,NodeID := STD.system.Thorlib.Node()+1});\\nRDS := TABLE(DISTRIBUTE(RightFile),\\n {RightFile,NodeID := STD.system.Thorlib.Node()+1});\\nOUTPUT(LDS,NAMED('LDS_random'));\\nOUTPUT(RDS,NAMED('EDS_random'));
When you run this code you will see that the records are randomly distributed across the nodes.\\n\\nThen, if you want to find matching records in the two datasets based on last and first names, you could do it with a global JOIN like this:OutRec := RECORD\\n UNSIGNED1 L_ID;\\n UNSIGNED1 R_ID;\\n STRING10 Fname;\\n STRING10 Lname;\\n UNSIGNED1 L_NodeID;\\n UNSIGNED1 R_NodeID;\\n UNSIGNED1 J_NodeID;\\nEND;\\nOutRec XF1(LDS L, RDS R) := TRANSFORM\\n SELF.L_ID := L.ID;\\n SELF.R_ID := R.ID;\\n SELF.L_NodeID := L.NodeID;\\n SELF.R_NodeID := R.NodeID;\\n SELF.J_NodeID := STD.system.Thorlib.Node()+1;\\n SELF := L\\nEND;\\nGlobalJoin := JOIN(LDS,RDS,\\n LEFT.fname=RIGHT.fname AND LEFT.lname=RIGHT.lname,\\n XF1(LEFT,RIGHT) );\\nOUTPUT(GlobalJoin,NAMED('Global_Join'));
You will note that the node where the JOIN happens is not necessarily the same node as either record started out on. That's because a global JOIN will automatically sort and redistribute the records so that it can easily do the matching. We call this the "cluster talk" or "cluck" phase, and on large datasets it can take a while to accomplish.\\n\\nHowever, you can use DISTRIBUTE and LOCAL to eliminate the "cluck" phase, like this:D_LDS := TABLE(DISTRIBUTE(LeftFile,HASH32(fname,lname)),\\n {LeftFile,NodeID := STD.system.Thorlib.Node()+1});\\nD_RDS := TABLE(DISTRIBUTE(RightFile,HASH32(fname,lname)),\\n {RightFile,NodeID := STD.system.Thorlib.Node()+1});\\n\\nLocalJoin := JOIN(D_LDS,D_RDS,\\n LEFT.fname=RIGHT.fname AND LEFT.lname=RIGHT.lname,\\n XF1(LEFT,RIGHT),LOCAL );\\nOUTPUT(LocalJoin,NAMED('Local_Join'));
Now you see that the JOIN happens on the same node that the data is already on, eliminating the need to move records to do the matching (the "cluck" phase).\\n\\nObviously, with these small datasets you won't see any performance difference between the Global and Local JOINs, but given sizable datasets to work with, you should.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-06-25 19:40:23\" },\n\t{ \"post_id\": 5969, \"topic_id\": 1372, \"forum_id\": 8, \"post_subject\": \"Understanding Basic ECL concepts\", \"username\": \"rlbars5\", \"post_text\": \"I am naive when it comes to basic options in ECL, I am trying to understand their basic functionality so that I can use them effectively, referred ECL language reference but still the concept has not seeped in. \\n\\nWhat is the Basic use or example when a Distribute is used,\\n -> I understand the data is distributed across nodes, but in which situation would I use this ?\\nWhy do we use local once we have distributed, what if I don't mention local would it still work?\\nWhat would be a basic example of MANY&LOOKUP in JOIN ?\\n\\nAny help you be appreciated\", \"post_time\": \"2014-06-25 18:02:33\" },\n\t{ \"post_id\": 6005, \"topic_id\": 1376, \"forum_id\": 8, \"post_subject\": \"Re: uploading/spraying with eclplus/command line?\", \"username\": \"JimD\", \"post_text\": \"You can also use DFUPlus command line interface. \\n\\nSee the Client Tools manual for details.\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2014-07-01 18:30:59\" },\n\t{ \"post_id\": 5996, \"topic_id\": 1376, \"forum_id\": 8, \"post_subject\": \"Re: uploading/spraying with eclplus/command line?\", \"username\": \"bforeman\", \"post_text\": \"Try looking at the ECL library support for spraying. Look at SprayXML, you should be able to automate the process nicely right from the ECL IDE and a workunit.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-07-01 07:01:43\" },\n\t{ \"post_id\": 5995, \"topic_id\": 1376, \"forum_id\": 8, \"post_subject\": \"uploading/spraying with eclplus/command line?\", \"username\": \"kereno\", \"post_text\": \"Hello,\\n\\nI would like to upload and spray (XML) files on HPCC using a bash script (it's many files so it wouldn't be efficient to use the web UI). Is there a way to express that with eclplus or another command line tool?\\nI looked in eclplus options and it doesn't seem to include upload/spray:\\neclplus action=[list|view|dump|delete|abort|query|graph]\\n\\nThanks,\\nKeren\\n\\nPS: I don't think it makes a difference: my files are located on the network (remote machine) so I couldn't browse to them from the web ui anyway.\", \"post_time\": \"2014-07-01 03:25:45\" },\n\t{ \"post_id\": 6015, \"topic_id\": 1378, \"forum_id\": 8, \"post_subject\": \"Re: Calculate a new column and adding it to layouut for disp\", \"username\": \"rtaylor\", \"post_text\": \"Nilesh,\\n\\nThat would be most easily done with a crosstab report (covered at the beginning of the Intro to Thor course), like this:IMPORT $;\\n\\nLayout_PersonsWithAge := RECORD\\n $.Persons;\\n INTEGER2 Age;\\nEND;\\n\\nTodaysYear := 2014;\\n\\nLayout_PersonsWithAge calcAge($.Persons l) := TRANSFORM\\n SELF.Age := IF(l.BirthDate<>'',TodaysYear - (INTEGER)l.BirthDate[1..4],0);\\n SELF := l;\\nEND;\\n\\nRecordsWithAge := PROJECT($.Persons, calcAge(LEFT));\\n\\n// # Get All the records with age above 60\\nRecordsWithAgeOver60 := RecordsWithAge(Age > 60);\\n\\nXtabOver60ByState := TABLE(RecordsWithAgeOver60,{state,cnt := COUNT(GROUP)},state);\\nXtabOver60ByState;
The "group by" form of the TABLE function is extremely useful for this type of thing.\\n\\nHowever, it could also be accomplished this way:IMPORT $;\\n\\nLayout_PersonsWithAge := RECORD\\n $.Persons;\\n INTEGER2 Age;\\nEND;\\n\\nTodaysYear := 2014;\\n\\nLayout_PersonsWithAge calcAge($.Persons l) := TRANSFORM\\n SELF.Age := IF(l.BirthDate<>'',TodaysYear - (INTEGER)l.BirthDate[1..4],0);\\n SELF := l;\\nEND;\\n\\nRecordsWithAge := PROJECT($.Persons, calcAge(LEFT));\\n\\n// # Get All the records with age above 60\\nRecordsWithAgeOver60 := RecordsWithAge(Age > 60);\\n\\nXtabOver60ByState := TABLE(RecordsWithAgeOver60,{state,cnt := COUNT(GROUP)},state);\\nSORT(XtabOver60ByState,state);\\n\\nTblStateCnt := TABLE(RecordsWithAgeOver60,{state,Cnt := 1});\\n\\nSortedTblStateCnt := SORT(TblStateCnt,state);\\n\\nOver60ByState := ROLLUP(SortedTblStateCnt,\\n LEFT.state=RIGHT.state, \\n TRANSFORM(RECORDOF(SortedTblStateCnt),\\n SELF.Cnt := LEFT.Cnt+1,\\n SELF.state := RIGHT.state));\\n\\nOver60ByState;
The vertical slice form of the TABLE function and the ROLLUP function are also both covered in the Intro to Thor course. Note that I added a SORT to make the two results come out in the same order so it's easier to see that both methods produce exactly the same result.\\n\\nAs a general rule, when you can do what you need either with the TABLE function or some other function that uses a TRANSFORM, then the TABLE function is usually the "better" choice for simpler code.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-07-07 14:11:23\" },\n\t{ \"post_id\": 6014, \"topic_id\": 1378, \"forum_id\": 8, \"post_subject\": \"Re: Calculate a new column and adding it to layouut for disp\", \"username\": \"nileshdchavan\", \"post_text\": \"Thanks i had tried this but my syntax was wrong so that did not work.\\n\\nAnyway, how can i get the count of people living in each state? I have all the unique states in the \\n\\nUniqueStates := DEDUP(SortedStates,state);\\n\\nwhen i use below statement, it gives me total count from all the states -\\n\\nCountInEachState := COUNT(RecordsWithAgeOver60(State IN SET(UniqueStates,state)));\\n\\nHow can i achieve to list the count of each state separately.\\n\\nBTW, i have gone through the free online training but since the lab assignments were limited corresponding to the commands/constrcuts, i'm getting so many issues. Apologies for troubling you with so many queries. \\n\\nPlease advise. Thank you.\", \"post_time\": \"2014-07-07 05:16:09\" },\n\t{ \"post_id\": 6013, \"topic_id\": 1378, \"forum_id\": 8, \"post_subject\": \"Re: Calculate a new column and adding it to layouut for disp\", \"username\": \"rtaylor\", \"post_text\": \"Nilesh,
The program gives error at the second last line where i'm using State IN UniqueStates filter.
That's because the IN operator expects to be looking in a set of values, not a recordset. Your code would work if you did it this way (but may or may not get you the result that you're trying to achieve):CountInEachState := COUNT(RecordsWithAgeOver60(State IN SET(UniqueStates,state)));
\\nSo my next question to you is: have you gone through the FREE online ECL courses? Many of your basic issues would be resolved by doing that. Here's the link to register and take the courses: http://learn.lexisnexis.com/hpcc \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-07-03 15:51:53\" },\n\t{ \"post_id\": 6012, \"topic_id\": 1378, \"forum_id\": 8, \"post_subject\": \"Re: Calculate a new column and adding it to layouut for disp\", \"username\": \"nileshdchavan\", \"post_text\": \"Hi Rich,\\n\\nI wrote a following program to get the count of persons in each state -\\n\\nIMPORT $;\\n\\nLayout_PersonsWithAge := RECORD\\n $.Persons;\\n INTEGER2 Age;\\nEND;\\n\\nTodaysYear := 2014;\\n\\nLayout_PersonsWithAge calcAge($.Persons l) := TRANSFORM\\n SELF.Age := IF(l.BirthDate<>'',TodaysYear - (INTEGER)l.BirthDate[1..4],0);\\n SELF := l;\\nEND;\\n\\nRecordsWithAge := PROJECT($.Persons, calcAge(LEFT));\\nRecordsWithAge;\\n\\n# Get All the records with age above 60\\nRecordsWithAgeOver60 := RecordsWithAge(Age > 60);\\nRecordsWithAgeOver60;\\n\\n//Get All the state Names\\nStatesNames := TABLE(RecordsWithAgeOver60,{state});\\n//StatesNames;\\n//COUNT(StatesNames);\\n\\n// Get Unique State Names\\nSortedStates := SORT(StatesNames,state); \\nUniqueStates := DEDUP(SortedStates,state);\\n//UniqueStates;\\n\\n// # of peoples in each state\\nCountInEachState := COUNT(RecordsWithAgeOver60(State IN UniqueStates));\\n//CountInEachState;\\n\\nThe program gives error at the second last line where i'm using State IN UniqueStates filter.\\n\\nCould you please advise what could be the issue here? When i use State = 'MN' it works fine. Not sure how to filter for each state.\\n\\nPlease advise. Thanks in advance.\\n\\n-Nilesh\", \"post_time\": \"2014-07-03 02:28:02\" },\n\t{ \"post_id\": 6011, \"topic_id\": 1378, \"forum_id\": 8, \"post_subject\": \"Re: Calculate a new column and adding it to layouut for disp\", \"username\": \"nileshdchavan\", \"post_text\": \"Thanks Richard. This worked fine. \", \"post_time\": \"2014-07-02 14:45:41\" },\n\t{ \"post_id\": 6010, \"topic_id\": 1378, \"forum_id\": 8, \"post_subject\": \"Re: Calculate a new column and adding it to layouut for disp\", \"username\": \"rtaylor\", \"post_text\": \"nileshdchavan,\\n\\nHere's how I would do it:
IMPORT $;\\n\\nLayout_PersonsWithAge := RECORD\\n $.Persons;\\n INTEGER2 Age;\\nEND;\\n\\nTodaysYear := 2014;\\n\\nLayout_PersonsWithAge calcAge($.Persons l) := TRANSFORM\\n SELF.Age := IF(l.BirthDate<>'',TodaysYear - (INTEGER)l.BirthDate[1..4],0);\\n SELF := l;\\nEND;\\n\\nRecordsWithAge := PROJECT($.Persons, calcAge(LEFT));\\n\\nRecordsWithAge;
Note that I changed your RECORD structure to simply inherit all the fields from your Persons dataset. This accomplishes exactly the same thing as your RECORD structure, but with less typing. \\n\\nI also changed all your dataset references to be fully-qualified ("$.Persons" not just "Persons"). Full qualification is a requirement in ECL -- this was the cause of your syntax errors.\\n\\nThe one logic change I made was to use IF to do the actual calculation so that your result does not end up with a bunch of people whose "age" is 2014. I also added RecordsWithAge as an action to the end, otherwise you would have defined what you wanted, but not asked to see the result.\\n \\nHTH,\\n\\nRichard\", \"post_time\": \"2014-07-02 13:39:15\" },\n\t{ \"post_id\": 6007, \"topic_id\": 1378, \"forum_id\": 8, \"post_subject\": \"Re: Calculate a new column and adding it to layouut for disp\", \"username\": \"nileshdchavan\", \"post_text\": \"I tried as shown below, but this did not work.\\n\\nIMPORT $;\\n$.Persons;\\n\\nLayout_PersonsWithAge := RECORD\\n INTEGER4 RECID;\\n STRING15 FirstName;\\n STRING25 LastName;\\n STRING15 MiddleName;\\n STRING2 NameSuffix;\\n STRING8 FileDate;\\n UNSIGNED2 BureauCode;\\n STRING1 MaritalStatus;\\n STRING1 Gender;\\n UNSIGNED1 DependentCount;\\n STRING8 BirthDate;\\n STRING42 StreetAddress;\\n STRING20 City;\\n STRING2 State;\\n STRING5 ZipCode;\\n INTEGER2 Age;\\nEND;\\n\\nTodaysYear := 2014;\\n\\nLayout_PersonsWithAge calcAge(Persons l) := TRANSFORM\\nSELF.Age := TodaysYear - l.BirthDate[1..4];\\nSELF := l;\\nEND;\\n\\nRecordsWithAge := PROJECT(Persons, calcAge(LEFT));\", \"post_time\": \"2014-07-02 04:16:30\" },\n\t{ \"post_id\": 6006, \"topic_id\": 1378, \"forum_id\": 8, \"post_subject\": \"Calculate a new column and adding it to layouut for display\", \"username\": \"nileshdchavan\", \"post_text\": \"Hello -\\n\\nI'm working on online_persons sample data file provided in the ECL training.\\n\\nI want to calculate the age of each person in this dataset. I've done this however, i also want to output this Age field as an additional column. I've created a separate layout with this additional field, however, i'm not sure how to add this age column as additional field to existing resultset. Please help.\\n\\nI'm calculating age as follows -\\nINTEGER4 CurrentYear := 2014;\\nfn_getAge(INTEGER bdate) := IF (bdate != 0, (CurrentYear - bdate), 0);\\nOUTPUT($.Persons, {fn_getAge( (INTEGER) BirthDate[1..4])}, NAMED('AGE'));\\n\\nHere is my layout with age -\\n\\nLayout_PersonsWithAge := RECORD\\n\\tINTEGER4 RECID;\\n\\tSTRING15 FirstName;\\n\\tSTRING25 LastName;\\n\\tSTRING15 MiddleName;\\n\\tSTRING2 NameSuffix;\\n\\tSTRING8 FileDate;\\n\\tUNSIGNED2 BureauCode;\\n\\tSTRING1 MaritalStatus;\\n\\tSTRING1 Gender;\\n\\tUNSIGNED1 DependentCount;\\n\\tSTRING8 BirthDate;\\n\\tSTRING42 StreetAddress;\\n\\tSTRING20 City;\\n\\tSTRING2 State;\\n\\tSTRING5 ZipCode;\\n\\tINTEGER2 Age;\\nEND;\", \"post_time\": \"2014-07-01 21:02:27\" },\n\t{ \"post_id\": 6024, \"topic_id\": 1379, \"forum_id\": 8, \"post_subject\": \"Re: Spray Dataset Created at runtime into THOR\", \"username\": \"rtaylor\", \"post_text\": \"Vijay,In the code above dirList holds the list of file Names present in landing zone.\\n\\nMy requirement is to write the file Names as a dataset into THOR.
Take a look at the STD.File.RemoteDirectory() function in the Standard Library Reference. It does exactly that.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-07-10 13:30:57\" },\n\t{ \"post_id\": 6021, \"topic_id\": 1379, \"forum_id\": 8, \"post_subject\": \"Re: Spray Dataset Created at runtime into THOR\", \"username\": \"VijayaKumar_Dhanasekaran\", \"post_text\": \"Hi Bob,\\n\\nThanks for the reply. In the code above dirList holds the list of file Names present in landing zone.\\n\\nMy requirement is to write the file Names as a dataset into THOR.\\n\\nHow can I achieve this? Kindly help.\\n\\nThanks\\nVijay\", \"post_time\": \"2014-07-10 12:37:24\" },\n\t{ \"post_id\": 6018, \"topic_id\": 1379, \"forum_id\": 8, \"post_subject\": \"Re: Spray Dataset Created at runtime into THOR\", \"username\": \"bforeman\", \"post_text\": \"Hi Vijay,\\n\\nHow are you getting your file to the landing zone? Are you despraying it? I don't see anything in your code that tells me that. \\n\\nFurthermore, If you are creating a dataset in THOR, all you would really need to do is a simple OUTPUT using the file parameter. That will effectively "spray" your file to the THOR cluster.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-07-10 12:19:39\" },\n\t{ \"post_id\": 6016, \"topic_id\": 1379, \"forum_id\": 8, \"post_subject\": \"Spray Dataset Created at runtime into THOR\", \"username\": \"VijayaKumar_Dhanasekaran\", \"post_text\": \"Hi Team,\\n\\nI am trying to spray Dataset created at RUNTIME into THOR using APPLY and FileServices.SprayVariable API. \\n\\nBut I am getting the below mentioned error.\\n\\nIn the below code dirList contains the dataset of string values, this string value is being passed to a project function, which inturn passes the same as a variable to a module file.\\n\\n\\nSUCCESS_LAYOUT := RECORD\\nSTRING fileName;\\nSTRING Status;\\nEND;\\n\\nreturnSuccess(String file) := FUNCTION \\n\\nMOD := AppSharedSvc_Recommendation_AutoSpraying.Collab_Sequential_Module(file);\\n//Call the code in Module File\\nMOD.collabExecution;\\nreturn 'success';\\nEND;\\n\\nSUCCESS_LAYOUT TransformFunction(Layout_FileName l) := TRANSFORM\\nSELF.fileName := l.lname;\\nSELF.Status := returnSuccess(l.lname);\\nEND;\\n\\nfinalOutput := PROJECT(dirList, TransformFunction(LEFT));\\n\\nOUTPUT(finalOutput);\\n
\\n\\nIn the Module file I have the below mentioned code.\\n\\n\\nEXPORT Collab_Sequential_Module (String fileName) := MODULE\\n\\nEXPORT fileNameRecord := RECORD\\n\\tSTRING fileName;\\nEND;\\n\\nEXPORT FileNameDS := DATASET([{fileName}], fileNameRecord);\\n\\nEXPORT sprayVars := APPLY(FileNameDS, FileServices.SprayVariable(Configurations_File_Module.dropZoneIP, 'appshrdsvc::test::in::12345.xml',1000,\\t\\t,,, \\t\\t\\t'thor100_810_a', fileName,,,,TRUE,TRUE,TRUE));\\n\\t \\nEXPORT collabExecution := SEQUENTIAL(sprayVars);\\n\\n\\t\\t \\t \\nEND;\\n
\\n\\nThe error what I get is \\n\\n\\nError: System error: 0: DFUServer Error Failed: Could not open source file //1x.xxx.xx.x/mnt/disk1/var/lib/HPCCSystems/esp/appshardsvc::sample::12345 (in Disk Read G11 E12) (0, 0), 0, \\n
\\n\\nCan we spray Datasets created at runtime into THOR.\\n\\nKindly help.\\n\\nThanks\\nVijay\", \"post_time\": \"2014-07-10 10:40:41\" },\n\t{ \"post_id\": 6045, \"topic_id\": 1380, \"forum_id\": 8, \"post_subject\": \"Re: Write a DATASET into THOR inside Function Block\", \"username\": \"rtaylor\", \"post_text\": \"Balachandar,\\n\\nWhen you read the WHEN function docs, notice that the first parameter must be a record set or an action. Your code has the Boolean constant TRUE, which is neither.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-07-14 15:33:46\" },\n\t{ \"post_id\": 6042, \"topic_id\": 1380, \"forum_id\": 8, \"post_subject\": \"Re: Write a DATASET into THOR inside Function Block\", \"username\": \"Balachandar\", \"post_text\": \"Hi Richard,\\n\\nI tried as you mentioned in the previous post. But still i got the same ERROR\\n\\nPlease take a look at the sample code below, \\n\\nSUCCESS_LAYOUT := RECORD\\nSTRING fileName;\\nboolean Status;\\nEND;\\n\\n\\nboolean returnSuccess(String file) := FUNCTION\\n\\n fileNameSuffix := DATASET([{file}], fileNameLayout);\\n FileOut := OUTPUT(fileNameSuffix,,'~appshrdsvc::recsvc::in::activities::bala', thor); \\n RETURN WHEN(true,FileOut);\\n \\nEND;\\n\\n\\nSUCCESS_LAYOUT TransformFunction(Layout_FileName l) := TRANSFORM\\n \\nSELF.fileName := l.lname;\\nSELF.Status := returnSuccess(l.lname);\\n\\nEND;\\n\\nfinalOutput := PROJECT(dirList, TransformFunction(LEFT));\\n\\nOUTPUT(finalOutput);\", \"post_time\": \"2014-07-14 07:34:06\" },\n\t{ \"post_id\": 6027, \"topic_id\": 1380, \"forum_id\": 8, \"post_subject\": \"Re: Write a DATASET into THOR inside Function Block\", \"username\": \"rtaylor\", \"post_text\": \"You're still trying to make the FUNCTION structure operate like a "C" function and not ECL. In the FUNCTION structure docs (http://hpccsystems.com/download/docs/ecl-language-reference/html/FUNCTION_Structure.html) it says: Side-effect actions contained in the code of the FUNCTION must have definition names that must be referenced by the WHEN function to execute.
That means that your FUNCTION should look more like this:returnSuccess(String file) := FUNCTION\\n\\n fileNameSuffix := DATASET([{file}], fileNameLayout);\\n FileOut := OUTPUT(fileNameSuffix,,'~appshrdsvc::recsvc::in::activities::1231111'); \\n ds := DATASET([{'success'}],{STRING10 s});\\n RETURN WHEN(ds,FileOut);\\n\\nEND;
\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-07-10 14:21:47\" },\n\t{ \"post_id\": 6026, \"topic_id\": 1380, \"forum_id\": 8, \"post_subject\": \"Re: Write a DATASET into THOR inside Function Block\", \"username\": \"bforeman\", \"post_text\": \"Try this:\\n\\nreturnSuccess(String file) := FUNCTION\\n\\nfileNameSuffix := DATASET([{file}], fileNameLayout);\\nRETURN OUTPUT(fileNameSuffix,,'~appshrdsvc::recsvc::in::activities::1231111');\\n\\n//return 'success';\\n\\nEND;
\", \"post_time\": \"2014-07-10 14:05:55\" },\n\t{ \"post_id\": 6025, \"topic_id\": 1380, \"forum_id\": 8, \"post_subject\": \"Re: Write a DATASET into THOR inside Function Block\", \"username\": \"Balachandar\", \"post_text\": \"Hi Bob,\\n Still we are facing one more issue.\\nIf you gave the string as static its working fine. If you passed the string using project function getting the same error.\\n\\nHere is the sample code.\\n\\n\\nreturnSuccess(String file) := FUNCTION\\n\\n fileNameSuffix := DATASET([{file}], fileNameLayout);\\n SEQUENTIAL(\\n OUTPUT(fileNameSuffix,,'~appshrdsvc::recsvc::in::activities::1231111')\\n ); \\n\\t\\t\\t\\nreturn 'success';\\n \\nEND;\\n\\nSUCCESS_LAYOUT TransformFunction(Layout_FileName l) := TRANSFORM\\n \\nSELF.fileName := l.lname;\\nSELF.Status := returnSuccess(l.lname);\\n\\nEND;\\n\\nfinalOutput := PROJECT(dirList, TransformFunction(LEFT));\\n\\nOUTPUT(finalOutput);\", \"post_time\": \"2014-07-10 14:00:01\" },\n\t{ \"post_id\": 6023, \"topic_id\": 1380, \"forum_id\": 8, \"post_subject\": \"Re: Write a DATASET into THOR inside Function Block\", \"username\": \"Balachandar\", \"post_text\": \"Thanks Bob. Its work fine for me also. Thanks a lot.\", \"post_time\": \"2014-07-10 12:57:47\" },\n\t{ \"post_id\": 6022, \"topic_id\": 1380, \"forum_id\": 8, \"post_subject\": \"Re: Write a DATASET into THOR inside Function Block\", \"username\": \"bforeman\", \"post_text\": \"This works for me:\\n\\nIMPORT MyFolder AS X;\\n\\nMyFunc(STRING filename) := FUNCTION\\n RETURN OUTPUT(X.MyRecordSet,,filename);\\nEND;\\n\\nMyFunc('~CLASS::BMF::TestThis');
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-07-10 12:37:43\" },\n\t{ \"post_id\": 6020, \"topic_id\": 1380, \"forum_id\": 8, \"post_subject\": \"Re: Write a DATASET into THOR inside Function Block\", \"username\": \"Balachandar\", \"post_text\": \"Trying to write a DATA into thor. Is there any possible way to achieve this funcationality\", \"post_time\": \"2014-07-10 12:33:10\" },\n\t{ \"post_id\": 6019, \"topic_id\": 1380, \"forum_id\": 8, \"post_subject\": \"Re: Write a DATASET into THOR inside Function Block\", \"username\": \"bforeman\", \"post_text\": \"That's an interesting error When you call your function, are you specifically targeting THOR and not HTHOR?\", \"post_time\": \"2014-07-10 12:28:46\" },\n\t{ \"post_id\": 6017, \"topic_id\": 1380, \"forum_id\": 8, \"post_subject\": \"Write a DATASET into THOR inside Function Block\", \"username\": \"Balachandar\", \"post_text\": \"Is it possible to use output action inside FUNCTION block.\\nI tried got a error \\n'Error: OUTPUT to file is not supported inside NOTHOR()'.\\n\\nPlease suggest how to do my code is like this\\n\\nreturnSuccess(String file) := FUNCTION \\n\\nfileNameSuffix := DATASET([{file}], fileNameLayout);\\n\\n\\nOUTPUT(fileNameSuffix,,'~thor100_240_a::appshrdsvc::recsvc::in::activities::12345',THOR);\\nreturn 'success';\\nEND;\", \"post_time\": \"2014-07-10 11:01:24\" },\n\t{ \"post_id\": 6036, \"topic_id\": 1386, \"forum_id\": 8, \"post_subject\": \"Re: Recursive filename dependency Issue\", \"username\": \"rtaylor\", \"post_text\": \"Arjun,\\n\\nI think you may be running into this problem because of the fundamental design of HPCC. In a single workunit it is not allowed to read from a dataset and then write to that same dataset. What you're doing is the reverse of that -- writing to the file and then trying to read from it. Either way, you're both reading and writing the same dataset in the same workunit.\\n\\nAnother way to approach this would be to do the spray as one workunit, which can use NOTIFY to fire an event that another workunit (the one that reads the dataset) is waiting for.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-07-11 15:17:12\" },\n\t{ \"post_id\": 6034, \"topic_id\": 1386, \"forum_id\": 8, \"post_subject\": \"Recursive filename dependency Issue\", \"username\": \"ArjunKumar\", \"post_text\": \"Hi Team,\\n\\nI am facing the [color=#FF4040:2za2mwn3]Recursive filename dependency error while executing SEQUENTIAL api.\\n\\nresults := SEQUENTIAL(BWR_1,BWR_2,..);\\nfinal_result := SEQUENTIAL(SpryingData_Module.SprayFiles(... ),\\n results);\\n\\nin the above example, 1st iam trying to spray a logical file into THOR. 2nd i am running another SEQUENTIAL (results) which will run all BWR files where actually i read the sprayed logical file and used some values out of it.\\n\\ni am unable to achieve this functionality. please help on this.\\n\\nThanks,\\nArjun\", \"post_time\": \"2014-07-11 13:54:44\" },\n\t{ \"post_id\": 6037, \"topic_id\": 1387, \"forum_id\": 8, \"post_subject\": \"Re: Move file from one folder to another using ECL\", \"username\": \"rtaylor\", \"post_text\": \"Vijay,\\n\\nYou could try using the STD.File.MoveExternalFile() standard library function.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-07-11 15:19:42\" },\n\t{ \"post_id\": 6035, \"topic_id\": 1387, \"forum_id\": 8, \"post_subject\": \"Move file from one folder to another using ECL\", \"username\": \"VijayaKumar_Dhanasekaran\", \"post_text\": \"Hi Team,\\n\\nMy landing zone has two folder\\n\\nFOLDER A\\nFOLDER B\\n\\nI want to move files present in folder A to folder B. How can I achieve this in HPCC. What API should I use?\\n\\nThanks\\nVijay\", \"post_time\": \"2014-07-11 15:07:40\" },\n\t{ \"post_id\": 6055, \"topic_id\": 1388, \"forum_id\": 8, \"post_subject\": \"Re: OUTPUT to file is not supported inside NOTHOR()\", \"username\": \"rtaylor\", \"post_text\": \"Vijay,\\n\\nTry it this way:
EXPORT Collab_Sequential_Module := MODULE\\n\\n\\tEXPORT CollabResult := AppSharedSvc_Recommendation_Collab_BWR_Files_New.BWR_Write_Doc_To_Session_Records_Run_1;\\n\\n\\tEXPORT fileList := FileServices.RemoteDirectory('xx.xxx.xx.xx', 'c:/DATA/RA_RE_1day/', '*'+'.xml');\\n\\n\\tEXPORT landingZonePath := Configurations_File_Module.dir_root + Configurations_File_Module.dir_name_collab + fileList[1].name;\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\tEXPORT collabExecution := SEQUENTIAL( SpryingData_Module.SprayFiles(landingZonePath, Configurations_File_Module.sprayedFileName, Configurations_File_Module.recordTag_collab )\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t,CollabResult);\\n\\nEND;
And your AppSharedSvc_Recommendation_Collab_BWR_Files_New.BWR_Write_Doc_To_Session_Records_Run_1 should contain: EXPORT BWR_Write_Doc_To_Session_Records_Run_1 := SEQUENTIAL( \\nOUTPUT(NormalizeCoreDataItem_Module.NormalizedCoreDataItemTable,,FileName_Module_Collab.Normalized_CoreDataItemDS, OVERWRITE); \\nOUTPUT(NormalizeCoreDataItem_Module.DistDocToSessionRecords,,FileName_Module_Collab.Doc_To_Session_Records, OVERWRITE, EXPIRE(5)); \\n);
IOW, move the SEQUENTIAL action (it is an "action" not an "API") to surround the two OUTPUT actions you actually want to execute in sequence. And, you need to give it an ECL definition name to reference it the way you're trying to do.\\n\\nOne other minor point, CollabResult is a definition, not a "variable" (since ECL is a declarative language, it does not have "variables" - only "definitions"). \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-07-15 16:46:28\" },\n\t{ \"post_id\": 6046, \"topic_id\": 1388, \"forum_id\": 8, \"post_subject\": \"Re: OUTPUT to file is not supported inside NOTHOR()\", \"username\": \"bforeman\", \"post_text\": \"Hi Vijay,\\n\\nPlease verify, is your target set to THOR or hTHOR (e.g. NOTHOR)?\\n\\nIf set to THOR, you may have to force the compiler to actually use THOR.\\n\\nTry setting the following #OPTION at the start of your code:\\n\\n#OPTION(pickBestEngine,FALSE)
\\n\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-07-14 16:09:50\" },\n\t{ \"post_id\": 6044, \"topic_id\": 1388, \"forum_id\": 8, \"post_subject\": \"OUTPUT to file is not supported inside NOTHOR()\", \"username\": \"VijayaKumar_Dhanasekaran\", \"post_text\": \"Hi Team,\\n\\nWith SEQUENTIAL API I am trying to achieve two functionality.\\n\\n1.)Spray Data using Lib_Fileservices.Fileservices.DfuPlusExec.\\n2.)Read the Sprayed File Name from action (1) and create logical files.\\n\\nWith SEQUENTIAL API if I run the first action (1) alone the data is getting sprayed.\\nWhen I specify the logic to create logical its throwing the below mentioned error.\\n\\nError: OUTPUT to file is not supported inside NOTHOR() \\n\\nMy requirement is to read a list of files from landing zone folder, spray the same into THOR and create logical files (from which Super key or super file will be created) by reading the name of the sprayed files.\\n\\nEXPORT Collab_Sequential_Module := MODULE\\n\\nEXPORT CollabResult := SEQUENTIAL( \\nAppSharedSvc_Recommendation_Collab_BWR_Files_New.BWR_Write_Doc_To_Session_Records_Run_1\\n);\\n\\n\\nEXPORT fileList := FileServices.RemoteDirectory('xx.xxx.xx.xx', 'c:/DATA/RA_RE_1day/', '*'+'.xml');\\n\\nEXPORT landingZonePath := Configurations_File_Module.dir_root + Configurations_File_Module.dir_name_collab + fileList[1].name;\\n \\nEXPORT collabExecution := SEQUENTIAL( SpryingData_Module.SprayFiles(landingZonePath, Configurations_File_Module.sprayedFileName, Configurations_File_Module.recordTag_collab )\\n ,CollabResult);\\n\\nEND;
\\n\\n\\n\\nCode Present in AppSharedSvc_Recommendation_Collab_BWR_Files_New.BWR_Write_Doc_To_Session_Records_Run_1\\n\\nOUTPUT(NormalizeCoreDataItem_Module.NormalizedCoreDataItemTable,,FileName_Module_Collab.Normalized_CoreDataItemDS, OVERWRITE); \\nOUTPUT(NormalizeCoreDataItem_Module.DistDocToSessionRecords,,FileName_Module_Collab.Doc_To_Session_Records, OVERWRITE, EXPIRE(5)); \\n
\\n\\nWhen I call CollabResult variable within SEQUENTIAL its throwing an error.\\n\\nKindly Help.\\n\\nThanks\\nVijay\", \"post_time\": \"2014-07-14 13:49:19\" },\n\t{ \"post_id\": 6056, \"topic_id\": 1390, \"forum_id\": 8, \"post_subject\": \"Re: Spray multiple xml files to single logic file\", \"username\": \"xiaolou\", \"post_text\": \"[quote="dustinskaggs":1phx2wbx]UNICODE within ECL is utf16 so that's why a simple cast of the utf8 data doesn't work. Since your data is utf8, the TOUNICODE function is required.\\n\\nYou are right. Thanks a lot! \\n\\n\\nAaron\", \"post_time\": \"2014-07-16 01:22:23\" },\n\t{ \"post_id\": 6054, \"topic_id\": 1390, \"forum_id\": 8, \"post_subject\": \"Re: Spray multiple xml files to single logic file\", \"username\": \"dustinskaggs\", \"post_text\": \"UNICODE within ECL is utf16 so that's why a simple cast of the utf8 data doesn't work. Since your data is utf8, the TOUNICODE function is required.\", \"post_time\": \"2014-07-15 15:08:04\" },\n\t{ \"post_id\": 6053, \"topic_id\": 1390, \"forum_id\": 8, \"post_subject\": \"Re: Spray multiple xml files to single logic file\", \"username\": \"xiaolou\", \"post_text\": \"[quote="dustinskaggs":3e69kcas]You'll want to specify the Content field as "DATA" instead of "UNICODE". Then you can run it through a PROJECT to convert it to UNICODE with TOUNICODE(rec.content, 'UTF-8'). This is assuming the data is utf-8 encoded. \\n\\n-Dustin\\nThe data is utf-8 encoded.\\nI tried convert "DATA" to "UNICODE" with (UNICODE)rec.content before, but I got the garbled content. \\nI will try your suggestion.\\nThanks for your reply.\\n\\nAaron\", \"post_time\": \"2014-07-15 15:05:30\" },\n\t{ \"post_id\": 6052, \"topic_id\": 1390, \"forum_id\": 8, \"post_subject\": \"Re: Spray multiple xml files to single logic file\", \"username\": \"dustinskaggs\", \"post_text\": \"You'll want to specify the Content field as "DATA" instead of "UNICODE". Then you can run it through a PROJECT to convert it to UNICODE with TOUNICODE(rec.content, 'UTF-8'). This is assuming the data is utf-8 encoded. \\n\\n-Dustin\", \"post_time\": \"2014-07-15 14:55:43\" },\n\t{ \"post_id\": 6051, \"topic_id\": 1390, \"forum_id\": 8, \"post_subject\": \"Re: Spray multiple xml files to single logic file\", \"username\": \"xiaolou\", \"post_text\": \"[quote="bforeman":1nsq54hv]How is your DATASET defined?\\n\\nRegards,\\n\\nBob\\nHi Bob,\\n\\nHere the DATASET defination\\n\\n
Layout := RECORD\\n STRING FileName;\\n UNICODE Content;\\n END;\\nEXPORT originfiles := DATASET('~online::aaron::hyperlink::origindocs',Layout,thor);
\\n\\nThanks,\\nAaron\", \"post_time\": \"2014-07-15 14:53:14\" },\n\t{ \"post_id\": 6050, \"topic_id\": 1390, \"forum_id\": 8, \"post_subject\": \"Re: Spray multiple xml files to single logic file\", \"username\": \"bforeman\", \"post_text\": \"How is your DATASET defined?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-07-15 14:41:16\" },\n\t{ \"post_id\": 6049, \"topic_id\": 1390, \"forum_id\": 8, \"post_subject\": \"Spray multiple xml files to single logic file\", \"username\": \"xiaolou\", \"post_text\": \"Hi, \\n\\nI sprayed multiple xml files to a single logic file by using following dfuplus command.\\n\\ndfuplus server=http://192.168.32.128:8010 username=hpccdemo password=hpccdemo overwrite=1 replicate=1 action=spray srcip=192.168.32.128 srcfile=/home/hpccdemo/Input/*.xml dstcluster=mythor dstname=online::aaron::hyperlink::origindocs prefix=FILENAME,FILESIZE nosplit=1 \\n
\\nIt works fine, however, when I try output the records as following structure , the ECL IDE returns a error "System error: 1301: Memory pool exhausted (in Disk Read G1 E2) (0, 0)".\\n Layout := RECORD\\n\\t\\t STRING FileName;\\n\\t\\t UNICODE Content;\\n\\t\\tEND;
\\n\\nIf I change the type of the field "Content" to "STRING" , ECL IDE returns no error.\\nHowever , the "Content" is garbled , because there are some Japanese characters in the files.\\n\\nIs there any way to get the correct content?\\n\\nMy platform version is "community_4.2.2-1".\", \"post_time\": \"2014-07-15 13:36:54\" },\n\t{ \"post_id\": 6107, \"topic_id\": 1397, \"forum_id\": 8, \"post_subject\": \"Re: Generating Data that matches a Regular Expression\", \"username\": \"Rob Pelley\", \"post_text\": \"I've now taken a look at the Python documentation and your code is now self-explanatory.\\n\\nThanks again for the useful tip.\\n\\nRegards,\\n\\nRob.\", \"post_time\": \"2014-07-24 11:31:56\" },\n\t{ \"post_id\": 6106, \"topic_id\": 1397, \"forum_id\": 8, \"post_subject\": \"Re: Generating Data that matches a Regular Expression\", \"username\": \"Rob Pelley\", \"post_text\": \"That's a very useful tip ... I'll take a look at the Python documentation as you suggested.\\n\\nWhat output does your ECL code produce?\\n\\nCan you provide a little more explanation?\\n\\nMany thanks,\\n\\nRob.\", \"post_time\": \"2014-07-24 11:19:51\" },\n\t{ \"post_id\": 6105, \"topic_id\": 1397, \"forum_id\": 8, \"post_subject\": \"Re: Generating Data that matches a Regular Expression\", \"username\": \"Gleb Aronsky\", \"post_text\": \"I was able to generate data using embeded python code. The python library rstr is required to be installed. \\n\\nOn Ubuntu 14.04 LTS I had to install a few pacakges first:\\n\\n\\nsudo apt-get install python-setuptools\\ndownload the rstr library from https://pypi.python.org/pypi/rstr/2.1.2\\nunpack the library into /usr/lib/python2.7/dist-packages\\nrun 'sudo python setup.py install' from /usr/lib/python2.7/dist-packages/rstr-2.1.2\\n
\\nRefer to rstr python docs for more details on the rstr.rstr method and parameters.\\nHere is the sample ECL code to generate random data using regular expressions after installing rstr.\\n\\nimport python;\\n\\nstring regexgen1(string s1) := embed(Python)\\n import rstr\\n return rstr.rstr(s1)\\nendembed;\\n\\nstring regexgen2(string s1, s2) := embed(Python)\\n import rstr\\n return rstr.rstr(s1, s2)\\nendembed;\\n\\nstring regexgen3(string s1, s2, s3) := embed(Python)\\n import rstr\\n return rstr.rstr(s1, s2, s3)\\nendembed;\\n\\nstring regexgen4(string s1, string s3) := embed(Python)\\n import rstr\\n return rstr.rstr(s1, include=s3)\\nendembed;\\n\\nStr1 := ('ABC');\\nStr2 := ('&');\\n\\nregexgen1(Str1);\\n//regexgen2(Str1, 5);\\n//regexgen3(Str1, 1, 10);\\n//regexgen4(Str1, Str2);
\", \"post_time\": \"2014-07-23 16:19:59\" },\n\t{ \"post_id\": 6099, \"topic_id\": 1397, \"forum_id\": 8, \"post_subject\": \"Re: Generating Data that matches a Regular Expression\", \"username\": \"Rob Pelley\", \"post_text\": \"Hi Richard,\\n\\nI can't find much in the way of pre-written, open source c or c++ code ...\\n\\nI agree that it would be fantastic if we could simply use in-line c++ for this! \\n\\nI'll do some more digging ...\\n\\nRegards,\\n\\nRob.\", \"post_time\": \"2014-07-22 17:52:04\" },\n\t{ \"post_id\": 6098, \"topic_id\": 1397, \"forum_id\": 8, \"post_subject\": \"Re: Generating Data that matches a Regular Expression\", \"username\": \"rtaylor\", \"post_text\": \"Rob,\\n\\nIs this something that is already written in C/C++ that you could just incorporate and use in ECL?\\n\\nRichard\", \"post_time\": \"2014-07-22 17:13:37\" },\n\t{ \"post_id\": 6097, \"topic_id\": 1397, \"forum_id\": 8, \"post_subject\": \"Re: Generating Data that matches a Regular Expression\", \"username\": \"Rob Pelley\", \"post_text\": \"Hi Bob,\\n\\nI'll be sure to take a look at the Machine Learning Library as you suggested ...\\n\\nThanks for the link.\\n\\nHi Aintnomyth,\\n\\nI'll try your code sample but I think, as you have suggested, that it may too slow when generating a large set of sample data ... \\n\\nI was really looking for something more generic.\\n\\nThanks for your help.\\n\\nRegards,\\n\\nRob.\", \"post_time\": \"2014-07-22 17:10:03\" },\n\t{ \"post_id\": 6095, \"topic_id\": 1397, \"forum_id\": 8, \"post_subject\": \"Re: Generating Data that matches a Regular Expression\", \"username\": \"aintnomyth\", \"post_text\": \"Here's a brute force method combining a couple tutorials from the ECL reference...I added the ^ and $ to the regex to limit it to 6 characters, you can add more iterations but brute force gets pretty slow (in my VM at least).\\n\\n\\nrec := RECORD\\n VARSTRING Letters;\\nEND;\\nInds1 := DATASET([{'A'},{'B'},{'C'},{'D'},{'E'},\\n {'F'},{'G'},{'H'},{'I'},{'J'},\\n {'K'},{'L'},{'M'},{'N'},{'O'},\\n {'P'},{'Q'},{'R'},{'S'},{'T'},\\n {'U'},{'V'},{'W'},{'X'},{'Y'},{'Z'},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t{'1'},{'2'},{'3'},{'4'},{'5'},{'6'},{'7'},{'8'},{'9'},{'0'}\\t\\t\\t\\t\\t\\t\\t\\t\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t],\\n rec);\\nCntInDS1 := COUNT(Inds1);\\nSetInDS1 := SET(inds1,letters);\\n\\nrec CartProd(rec L, INTEGER C) := TRANSFORM\\n SELF.Letters := L.Letters + SetInDS1[C]; \\nEND;\\n\\n\\nGetCP(dsIn) := FUNCTIONMACRO\\n\\tresult := NORMALIZE(dsIn,CntInDS1,CartProd(LEFT,COUNTER));\\n\\treturn result;\\nENDMACRO;\\n\\nsp := '^((A|B|C){3})((1|2|3){3})$';\\n\\nCP2 := GetCP(Inds1);//AA\\nCP3 := GetCP(CP2);\\t//AAA\\nCP4 := GetCP(CP3);\\t//AAA1\\nCP5 := GetCP(CP4);\\t//AAA11\\nCP6 := GetCP(CP5);\\t//AAA111\\n\\nfiltered := CP6(REGEXFIND(sp,letters));\\n\\nOUTPUT(CP6);\\nOUTPUT(filtered);
\", \"post_time\": \"2014-07-22 15:36:01\" },\n\t{ \"post_id\": 6093, \"topic_id\": 1397, \"forum_id\": 8, \"post_subject\": \"Re: Generating Data that matches a Regular Expression\", \"username\": \"bforeman\", \"post_text\": \"Hi Rob,\\n\\nAs a start I think you might want to take a look at the Machine Learning Library. Although they do not have anything specific for regular expressions that I can see, they do have an interesting chapter in Generating Test Data, and several built in functions that look close to what you are trying to do.\\n\\nThe latest docs can be downloaded here:\\nhttp://hpccsystems.com/download/docs/machine-learning\\n\\nMeanwhile, let me check around with some other ECL wizards. \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-07-22 12:45:42\" },\n\t{ \"post_id\": 6092, \"topic_id\": 1397, \"forum_id\": 8, \"post_subject\": \"Generating Data that matches a Regular Expression\", \"username\": \"Rob Pelley\", \"post_text\": \"ECL has great support for Regular Expressions through the built in functions REGEXFIND and REGEXREPLACE.\\n\\nIs it possible to generate sample data that matches a supplied Regular Expression?\\n\\nFor example ...\\n\\nGiven the Regular Expression ((A|B|C){3})((1|2|3){3}) can I generate sample data that matches?\\n\\nABC123\\nCAB231\\nACB132\\n\\netc ...\\n\\nIdeally I'd like to be able to define a Regular Expression and then generate a defined number of matching data values ... with a function of the form ...\\n\\nGenerateSampleData(thisRegEx STRING, SamplesRequired INTEGER)\\n\\n... and have the results returned as a DATASET of STRINGs.\", \"post_time\": \"2014-07-22 10:17:17\" },\n\t{ \"post_id\": 6103, \"topic_id\": 1399, \"forum_id\": 8, \"post_subject\": \"Re: cast boolean to unsigned\", \"username\": \"rtaylor\", \"post_text\": \"aintnomyth,
Am I crazy or did the implicit conversion work in prior versions?
I honestly don't know. I tend to just use BOOLEAN to store boolean data. \\n\\nThe only real reason I can see to use UNSIGNED1 would be to send the data on to some external tool that expects its boolean fields to be 0 or 1. If the data is going to stay in the HPCC world (Thor and/or Roxie) then there's no reason not to just use BOOLEAN.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-07-22 20:10:24\" },\n\t{ \"post_id\": 6102, \"topic_id\": 1399, \"forum_id\": 8, \"post_subject\": \"Re: cast boolean to unsigned\", \"username\": \"aintnomyth\", \"post_text\": \"Thanks for the quick reply. \\n\\nI had tried:\\nSELF.flag := (UNSIGNED1) LEFT.val1 = LEFT.val2;
\\n\\nBut I was missing the parenthesis so the cast was applied to LEFT.val1 (and still throwing the same error), now it's working like your example:\\nSELF.flag := (UNSIGNED1) (LEFT.val1 = LEFT.val2);
\\n\\nThanks again!\\n\\nAm I crazy or did the implicit conversion work in prior versions?\", \"post_time\": \"2014-07-22 20:05:21\" },\n\t{ \"post_id\": 6101, \"topic_id\": 1399, \"forum_id\": 8, \"post_subject\": \"Re: cast boolean to unsigned\", \"username\": \"rtaylor\", \"post_text\": \"aintnomyth,\\n\\nTry it this way:rec1 := RECORD\\n UNSIGNED1 val1;\\n UNSIGNED1 val2;\\nEND;\\n\\nds1 := DATASET([{1,2},{1,1}], rec1);\\n\\nrec2 := RECORD\\n rec1;\\n UNSIGNED1 flag;\\n BOOLEAN bool;\\nEND;\\nproj1 := PROJECT(ds1, \\n TRANSFORM(rec2,\\n SELF.Bool := LEFT.val1 = LEFT.val2;\\n SELF.flag := (UNSIGNED1)(LEFT.val1 = LEFT.val2);\\n SELF := LEFT));\\n \\nOUTPUT(proj1);
Your code had two problems:
\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-07-22 19:54:20\" },\n\t{ \"post_id\": 6100, \"topic_id\": 1399, \"forum_id\": 8, \"post_subject\": \"cast boolean to unsigned\", \"username\": \"aintnomyth\", \"post_text\": \"I'm hitting the error "C2007: Can not assign Boolean to Integer" and wondering if I'm misreading the ECL Reference. Under the Type Casting -> Casting Rules section:\\n
From: BOOLEAN\\nTo: INTEGER\\nResults in: FALSE = 0, TRUE = 1
\\n\\nThis kind of code worked on an older version (3.6 I think):\\nrec1 := RECORD\\n\\tUNSIGNED1 val1;\\n\\tUNSIGNED1 val2;\\n\\tUNSIGNED1 flag := 0;\\nEND;\\n\\nds1 := DATASET([{1,2}], rec1);\\n\\n\\nproj1 := PROJECT(ds1, TRANSFORM(rec1,\\n\\t\\tSELF.flag := LEFT.val1 = LEFT.val2;\\n\\t\\tSELF := LEFT;\\n\\t));\\n\\t\\nOUTPUT(ds1);
\\n\\nThoughts?\", \"post_time\": \"2014-07-22 18:31:02\" },\n\t{ \"post_id\": 6143, \"topic_id\": 1404, \"forum_id\": 8, \"post_subject\": \"Re: Date format\", \"username\": \"bforeman\", \"post_text\": \"Hi Limian,\\n\\nThe Standard Library Reference has three functions, Year, Month and Day where you can extract the pieces of any date and format it however you like using indexing and concatenation.\\n\\nSee the Standard Library Reference PDF for more information.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-07-30 20:02:32\" },\n\t{ \"post_id\": 6142, \"topic_id\": 1404, \"forum_id\": 8, \"post_subject\": \"Date format\", \"username\": \"fanglimian\", \"post_text\": \"Hi,\\n\\nThis might be a dump question..but is there a way to convert a date to a specific format? The original format of the file on HPCC is yyyymmdd , I would like to convert it mm/dd/yy. if there is missing value , I would want it to still be in this format as 00/00/00 or 0/0/0.\\n\\nIs there a function to do that? Thanks!!\", \"post_time\": \"2014-07-30 16:20:35\" },\n\t{ \"post_id\": 6152, \"topic_id\": 1405, \"forum_id\": 8, \"post_subject\": \"Re: Concat 2 rows as 1 row in ECL\", \"username\": \"bforeman\", \"post_text\": \"See the following post:\\n\\nhttp://hpccsystems.com/bb/viewtopic.php?f=8&t=502&p=2269&hilit=row+to+column&sid=c3f3d1e96f5e1e79facbf5b706c7636c&sid=7855f7e6dcd7fd2b7cf3f5fb889786ac#p2269\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-08-01 13:37:57\" },\n\t{ \"post_id\": 6151, \"topic_id\": 1405, \"forum_id\": 8, \"post_subject\": \"Concat 2 rows as 1 row in ECL\", \"username\": \"ArjunKumar\", \"post_text\": \"Hi Team,\\n\\ni have a requirement to concate two or more rows into a signle row. Pls check the below table.\\n\\nName\\n--------\\nA\\nB\\nC\\n\\nNow i want to convert it as a single row as below\\nName\\n--------\\nA B C\\n\\ncan we achieve this in ECL?. Please suggest some thing.\\n\\nThanks in Advance,\\nArjun\", \"post_time\": \"2014-08-01 10:36:34\" },\n\t{ \"post_id\": 6164, \"topic_id\": 1406, \"forum_id\": 8, \"post_subject\": \"Re: how to remove logical files from a superfile\", \"username\": \"gouldbrfl\", \"post_text\": \"Bob,\\n\\nI know that I can do that, however this we are doing some major migrations so we only want the files that are currently in the superfile. These will be copied, the original renamed, the copy renamed back to the original and then the real original will be deleted. We're doing this to move hundreds of files from one cluster to another.\\n\\nWhat we need is a \\nStartLogicalFileTransaction() and a FinishLogicalFileTransaction(). IN my case I don't want to delete any files if the renames fail or the verifyFiles does not return an OK result.\\n\\nBest Regards\\n\\nMichael Gould\", \"post_time\": \"2014-08-04 20:31:56\" },\n\t{ \"post_id\": 6162, \"topic_id\": 1406, \"forum_id\": 8, \"post_subject\": \"Re: how to remove logical files from a superfile\", \"username\": \"bforeman\", \"post_text\": \"Hi Michael,\\n\\nThe ClearSuperFile function can remove them in one shot. Example code:\\n\\nIMPORT $,STD;\\nSEQUENTIAL(OUTPUT($.DS.AllData,,'mytemp'),\\n STD.File.StartSuperFileTransaction(),\\n STD.File.ClearSuperFile($.SF.AllData),\\n STD.File.AddSuperFile($.SF.AllData,'mytemp'),\\n STD.File.FinishSuperFileTransaction());
\\n\\n\\nSo you output the data to a temporary file on the cluster, clear all sub files, and then just add back the single file.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-08-04 19:34:50\" },\n\t{ \"post_id\": 6155, \"topic_id\": 1406, \"forum_id\": 8, \"post_subject\": \"how to remove logical files from a superfile\", \"username\": \"gouldbrfl\", \"post_text\": \"I am consolidating a superfile that is made up of hundreds of logical files. 99% of these files have 0 records in them and I want to exclude them. Why the code was written like this I don't know but for every day, there is a data file and a corrections file that is created. The files are in XML format. I want to remove any logical file during the conversion process has a size of 0\\n\\n\\nBest Regards\\n\\nMichael Gould\", \"post_time\": \"2014-08-03 13:28:52\" },\n\t{ \"post_id\": 6172, \"topic_id\": 1409, \"forum_id\": 8, \"post_subject\": \"Re: Updating Published Query - PackageMap\", \"username\": \"bforeman\", \"post_text\": \"Hi Arjun,\\n\\nUpdates to the superkey should be done by simply adding a new sub key to the published superkey. \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-08-05 12:32:16\" },\n\t{ \"post_id\": 6165, \"topic_id\": 1409, \"forum_id\": 8, \"post_subject\": \"Updating Published Query - PackageMap\", \"username\": \"ArjunKumar\", \"post_text\": \"Hi Team,\\n\\nIs there any option to update the Published Super Key using PackageMap. When i gave the Overwrite option as TURE it deletes the existing data and add new data, when i gave FALSE no action was done. Is there any way to Update the data to the Published Super Key instead of deleting and adding? \\n\\n\\nThanks,\\nArjun\", \"post_time\": \"2014-08-05 07:54:10\" },\n\t{ \"post_id\": 6177, \"topic_id\": 1412, \"forum_id\": 8, \"post_subject\": \"Re: Dataset Not Active Error\", \"username\": \"ksviswa\", \"post_text\": \"Thanks Richard,\\n\\nI will check the same and let you know.\\n\\nViswa\", \"post_time\": \"2014-08-05 23:13:43\" },\n\t{ \"post_id\": 6175, \"topic_id\": 1412, \"forum_id\": 8, \"post_subject\": \"Re: Dataset Not Active Error\", \"username\": \"rtaylor\", \"post_text\": \"Viswa,\\n\\nHere's the code I ran on both 4.2.2-1 and 5.0.0-1:rec1 := RECORD\\n unsigned id;\\n string name;\\nEND;\\n\\ninitial_ds := DATASET('~RTTEST::Test::file1',rec1,THor);\\n// initial_ds := DATASET([{1,'file1'},{2,'file1'},{3,'file1'}],rec1);\\n// OUTPUT(initial_ds,,'~RTTEST::Test::file1');\\n\\nrec2 := RECORD\\n unsigned uid;\\n string email;\\n string text1;\\nEND;\\n\\nadditional_ds := DATASET('~RTTEST::Test::file2',rec2,THOR);\\n// additional_ds := DATASET([{1,'file2','file2'},{2,'file2','file2'}],rec2);\\n// OUTPUT(additional_ds,,'~RTTEST::Test::file2');\\n\\nchildRec := RECORD\\nstring f1;\\nstring f2;\\nEND;\\n\\nrec3 := record\\n UNSIGNED ID;\\n string name;\\n string email;\\n string text1;\\n DATASET(childRec) Children;// := DATASET([],childRec);\\nEND;\\n\\nJOIN(initial_ds , additional_ds ,\\n LEFT.id = RIGHT.uid ,\\n Transform(rec3 ,\\n SELF.email := RIGHT.email,\\n SELF.text1 := RIGHT.text1,\\n SELF.children := [],\\n self := LEFT;\\n ),\\n LEFT OUTER\\n );
I tried with inline datasets and disk files. I worked correctly with both. Try this code on your cluster.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-08-05 14:10:09\" },\n\t{ \"post_id\": 6169, \"topic_id\": 1412, \"forum_id\": 8, \"post_subject\": \"Dataset Not Active Error\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nI am trying to a normal join operation and assign one of the child dataset value to be null, get the following error.\\n\\n\\nError: INTERNAL: Dataset is not active: '_EMPTY_(childRec)' (111, 8)\\n
\\n\\nThe code which am using :\\n\\n\\nrec1 := RECORD\\n unsigned id;\\n string name;\\nEND;\\n\\ninitial_ds := DATASET('file1',rec1,THor);\\n\\nrec2 := RECORD\\n unsigned uid;\\n string email;\\n string text1;\\nEND;\\n\\nadditional_ds := DATASET('file2',rec2,THOR);\\n\\nchildRec := RECORD\\n string f1;\\n string f2;\\nEND;\\n\\nrec3 := record\\n UNSIGNED ID;\\n string name;\\n string email;\\n string text1;\\n DATASET(childRec) Children := DATASET([],childRec);\\nEND;\\n\\nJOIN(initial_ds , additional_ds \\n LEFT.id = RIGHT.uid ,\\n Transform(rec3 ,\\n SELF.email := RIGHT.email,\\n SELF.text1 := RIGHT.text1,\\n SELF.children := [],\\n self := LEFT;\\n ),\\n LEFT OUTER\\n );\\n \\n
\\n\\nAm i missing something here ? Is this the correct way to initialize child datasets with null or we have to initialize the same in another way .?\\n\\nHPCC Version : 4.2.2-rc9 , 50 Node Cluster\\n\\n\\nKindly suggest.\\n\\nThanks,\\nViswa\", \"post_time\": \"2014-08-05 09:39:50\" },\n\t{ \"post_id\": 6212, \"topic_id\": 1414, \"forum_id\": 8, \"post_subject\": \"Re: Invalid Destination Filename\", \"username\": \"rtaylor\", \"post_text\": \"Viswa,\\n\\nSo the problem was you did not have the STORED workflow service on a definition in your ECL code, so SOAP had no place to put the passed parameter. OK, thanks for letting us know.\\n\\nRichard\", \"post_time\": \"2014-08-11 13:00:13\" },\n\t{ \"post_id\": 6208, \"topic_id\": 1414, \"forum_id\": 8, \"post_subject\": \"Re: Invalid Destination Filename\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nThose are queryset and query parameters, similar to this syntax from the documentation.\\n\\n\\necl run [--target=<c>][--input=<file|xml>][--wait=<ms>] <queryset> <query>\\n
\\n\\nI was able to fix the issue, i wasn't having a stored variable in ecl code to take the user input value and hence the error.\\n\\nThanks,\\nViswa\", \"post_time\": \"2014-08-11 09:45:20\" },\n\t{ \"post_id\": 6187, \"topic_id\": 1414, \"forum_id\": 8, \"post_subject\": \"Re: Invalid Destination Filename\", \"username\": \"rtaylor\", \"post_text\": \"Viswa,--input="<request><batch_id>batchid_input</batch_id></request>" thor_50 <ecl query>
That part looks suspect to me. The double quotes enclose XML but after the ending quote there's two other items: "thor_50" and "<ecl query>" -- In my reading of the syntax diagram in the docs I don't see what parameters these are.\\n\\nWhat are these meant to be? \\n\\nRichard\", \"post_time\": \"2014-08-06 22:59:35\" },\n\t{ \"post_id\": 6186, \"topic_id\": 1414, \"forum_id\": 8, \"post_subject\": \"Re: Invalid Destination Filename\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\n/opt/HPCCSystems/bin/ecl run -s=XX.XX.XX.XX -u=xyz -cl=thor_50 -pw=XXXX --input="<request><batch_id>batchid_input</batch_id></request>" thor_50 <ecl query>\\n\\nKindly let me know if any other details are required.\\n\\nThe same "batchid_input" input when i pass as a parameter to the ECL Code is running successfully.\\n\\nThanks,\\nViswa\", \"post_time\": \"2014-08-06 17:19:34\" },\n\t{ \"post_id\": 6182, \"topic_id\": 1414, \"forum_id\": 8, \"post_subject\": \"Re: Invalid Destination Filename\", \"username\": \"rtaylor\", \"post_text\": \"Viswa,\\n\\nPlease show us the command line that failed.\\n\\nRichard\", \"post_time\": \"2014-08-06 12:37:39\" },\n\t{ \"post_id\": 6178, \"topic_id\": 1414, \"forum_id\": 8, \"post_subject\": \"Invalid Destination Filename\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nWhile trying to run a ECL code through the command line utility using ECL run, encounter this error. \\n\\n\\n\\nW20140805-165756 failed\\n<Result>\\n<Exception><Source>eclagent</Source><Message>System error: 0: [20052: 2014-08-05 20:57:57 GMT: invalid destination filename] </Message></Exception>\\n</Result>\\n\\n
\\n\\nI was able to run the same initially without any errors for previous scenarios. \\nThe same ECL code am able to run successfully using ECL IDE. \\n\\nThe ECL Code basically does a spray XML , parse and add the same in a superfile.\\n\\n\\nKindly suggest.\\n\\nThanks,\\nViswa\", \"post_time\": \"2014-08-05 23:30:41\" },\n\t{ \"post_id\": 6207, \"topic_id\": 1416, \"forum_id\": 8, \"post_subject\": \"Re: Question about child dataset transform\", \"username\": \"Leofei\", \"post_text\": \"I see. Thanks for the review, Richard!\\n\\n-Fan\", \"post_time\": \"2014-08-08 19:36:16\" },\n\t{ \"post_id\": 6206, \"topic_id\": 1416, \"forum_id\": 8, \"post_subject\": \"Re: Question about child dataset transform\", \"username\": \"rtaylor\", \"post_text\": \"Fan,\\n\\nYour way is probably more efficient, for precisely the reasons you stated. \\n\\nBut as a matter of ECL coding style, I would suggest writing your TRANSFORMs as separate functions immediately preceding the operation that uses them instead of inline. Inline TRANSFORMs are fine if there are very few (as in 1, 2, or 3) simple transformations to define. \\n\\nIn my code you see this demonstrated in the two separate TRANSFORM functions for NORMALIZE and DENORMALIZE and then an inline TRANSFORM for my PROJECT. \\n\\nMy reason for suggesting this is simply code readability/maintainability. Smaller code chunks are easier to "digest" and understand and are also more of an "ECL-ish" style, instead of the "C-style" formatting of inline code nesting. \\n\\nSo I would have written your code like this:IMPORT std;\\n\\nrec := {UNSIGNED id, STRING50 line};\\n\\nds1 := DATASET([{1, 'one bedroom|two bathroom|one living room'},\\n {2, 'two bedroom|two bathroom|one living room|one garage'}],\\n rec);\\n\\nOUTPUT(ds1);\\n\\nChildRec := {UNSIGNED seq, STRING50 newline};\\nrec2 := {UNSIGNED id,DATASET(ChildRec) childDS};\\n\\nrec2 XF1(ds1 L) := TRANSFORM\\n SELF.id := L.id;\\n lineSet := STD.Str.SplitWords(L.line, '|');\\n lineDS := DATASET(lineSet, {STRING50 line}); \\n\\t\\n ChildRec XF2(lineDS L, INTEGER C) := TRANSFORM\\n SELF.seq := C;\\n SELF.newline := L.line;\\n END;\\n SELF.childDs := PROJECT(lineDS,XF2(LEFT,COUNTER));\\n\\nEND;\\t\\nds2 := PROJECT(ds1,XF1(LEFT)); \\n\\nOUTPUT(ds2);
This eliminates the nesting within nesting that can get very confusing very quickly if you're coming at the code "cold" (as in, someone else wrote it or you did a long time ago).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-08-08 18:23:30\" },\n\t{ \"post_id\": 6201, \"topic_id\": 1416, \"forum_id\": 8, \"post_subject\": \"Re: Question about child dataset transform\", \"username\": \"Leofei\", \"post_text\": \"Thanks for the modification, Richard! I see my problem. I made a stupid mistake and I forgot to add DATASET() into the layout. \\n\\nAt the same time, do you think it could be more efficient to do it in the way I proposed? Since it only need to read through the whole table one time. But the NORMALIZE and DENORMALIZE need to read through the table at least three time(NORMALIZE function is the first time to read the whole table, and then DENORMALIZE needs to read LEFT and RIGHT table once). Whereas, using NORMALIZE can guarantee the order of the segments. This is really good.\\n\\nPlease correct me if I was wrong somewhere. I'd like to make sure I understand it in the right way. \\n\\nThanks,\\n-Fan\", \"post_time\": \"2014-08-08 16:04:05\" },\n\t{ \"post_id\": 6200, \"topic_id\": 1416, \"forum_id\": 8, \"post_subject\": \"Re: Question about child dataset transform\", \"username\": \"rtaylor\", \"post_text\": \"Leofei,\\n\\nYour code needed two minor tweaks to work:
import std;\\n\\nrec := {UNSIGNED id, STRING50 line};\\n\\nds1 := dataset([{1, 'one bedroom|two bathroom|one living room'},\\n {2, 'two bedroom|two bathroom|one living room|one garage'}],\\n rec\\n );\\n\\noutput(ds1);\\n\\nrec2 := {UNSIGNED id, DATASET({UNSIGNED seq, STRING50 newline}) childDS};\\n\\nds2 := project(ds1, \\n transform(rec2,\\n self.id := left.id;\\n lineSet := STD.Str.SplitWords(left.line, '|');\\n \\n self.childDs := PROJECT(dataset(lineSet, {STRING50 line}),\\n transform(RECORDOF(rec2.childDS),\\n self.seq := counter;\\n self.newline := left.line\\n ))\\n ));\\n\\noutput(ds2);
Note the addition of DATASET in your rec2 RECORD structure and the addition of the RECORDOF() function in your TRANSFORM. \\n\\nThe DATASET is required to eliminate the error message you're getting, and the RECORDOF() function is required because that first parameter to an inline TRANSFORM must be a RECORD structure and not a dataset name.\\n\\nHere's an alternative method of doing this, using NORMALIZE and DENORMALIZE:import std;\\n\\nrec := {UNSIGNED id, STRING50 line};\\n\\nds1 := dataset([{1, 'one bedroom|two bathroom|one living room'},\\n {2, 'two bedroom|two bathroom|one living room|one garage'}],\\n rec);\\n\\noutput(ds1);\\n\\nChildRec := {UNSIGNED seq, STRING50 newline};\\nrec2 := {UNSIGNED id,DATASET(ChildRec) childDS};\\n\\nMidrec := {UNSIGNED id,ChildRec};\\n\\nMidrec XF1(rec L,INTEGER C) := TRANSFORM\\n SELF.id := L.id;\\n SELF.seq := C;\\n lineSet := STD.Str.SplitWords(L.line, '|');\\n SELF.newline := lineset[C];\\nEND;\\n\\nds2 := NORMALIZE(ds1,COUNT(STD.Str.SplitWords(left.line, '|')),XF1(LEFT, COUNTER));\\noutput(ds2);\\n\\npRecs := PROJECT(ds1,TRANSFORM(rec2,SELF.id:=LEFT.id,SELF.ChildDS := []));\\n\\nrec2 XF2(rec2 Parent, MidRec Child,INTEGER Ctr) := TRANSFORM\\n SELF.id := Parent.id;\\n SELF.ChildDS := Parent.ChildDS + ROW({Child.seq,Child.newline},ChildRec);\\nEND;\\nds3 := DENORMALIZE(pRecs,ds2,LEFT.id=RIGHT.id,XF2(LEFT,RIGHT,COUNTER));\\n\\noutput(ds3);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-08-08 15:26:59\" },\n\t{ \"post_id\": 6195, \"topic_id\": 1416, \"forum_id\": 8, \"post_subject\": \"Question about child dataset transform\", \"username\": \"Leofei\", \"post_text\": \"Hi,\\n\\nI have a question. I don't know how to fulfill this transform, could anyone help with it? \\n\\nOne field in my table contains some text info. This text field contains some small segments with pipe delimiter (i.e. |). I want to load these text into a child dataset. Each record in the child dataset contains one segment. Plus, they need to keep the same sequence as they appear in the text info. (see the pic attached)\\n\\nI did some work, but it reports error. \\n\\nimport std;\\n\\nrec := {UNSIGNED id, STRING50 line};\\n\\nds1 := dataset([{1, 'one bedroom|two bathroom|one living room'},\\n\\t\\t\\t\\t\\t\\t\\t {2, 'two bedroom|two bathroom|one living room|one garage'}],\\n\\t\\t\\t\\t\\t\\t\\t\\trec\\n\\t\\t\\t\\t\\t\\t\\t);\\n\\noutput(ds1);\\n\\nrec2 := {UNSIGNED id, {UNSIGNED seq, STRING50 newline} childDS};\\n\\nds2 := project(ds1, \\n\\t\\t\\t\\t\\ttransform(rec2,\\n\\t\\t\\t\\t\\tself.id := left.id;\\n\\t\\t\\t\\t\\tlineSet := STD.Str.SplitWords(left.line, '|');\\n\\t\\t\\t\\t\\t\\n\\t\\t\\t\\t\\tself.childDs := PROJECT(dataset(lineSet, {STRING50 line}),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\ttransform(rec2.childDS,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tself.seq := counter;\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tself.newline := left.line\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t))\\n\\t\\t\\t\\t\\t));\\n\\noutput(ds2);\\n
\\n\\nAny thought will be appreciated!!\\n\\nThanks,\\n-Fan\", \"post_time\": \"2014-08-07 23:08:24\" },\n\t{ \"post_id\": 6228, \"topic_id\": 1418, \"forum_id\": 8, \"post_subject\": \"Re: Filter in Child Dataset\", \"username\": \"ksviswa\", \"post_text\": \"Thanks a lot Bob..\\n\\nViswa\", \"post_time\": \"2014-08-18 10:18:18\" },\n\t{ \"post_id\": 6211, \"topic_id\": 1418, \"forum_id\": 8, \"post_subject\": \"Re: Filter in Child Dataset\", \"username\": \"bforeman\", \"post_text\": \"Look at the EXISTS function.\\n\\nExample:\\n\\n myout := ParentRecord(EXISTS(ChildRecord(anyfilter)));
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-08-11 12:24:01\" },\n\t{ \"post_id\": 6209, \"topic_id\": 1418, \"forum_id\": 8, \"post_subject\": \"Filter in Child Dataset\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nIs it possible to apply a filter directly to check if the child dataset is null or not null ?\\n\\nEx : \\n\\n\\nsample_ds(ref_val = '');\\n
\\n\\nI tried different ways but encountered errors.\\n\\nWe can normalize the data, but normalize will not consider the null values in the child dataset. \\n\\nKindly provide suggestions on the same.\\n\\nThanks,\\nViswa\", \"post_time\": \"2014-08-11 10:05:58\" },\n\t{ \"post_id\": 6229, \"topic_id\": 1419, \"forum_id\": 8, \"post_subject\": \"Re: Superfiles\", \"username\": \"ksviswa\", \"post_text\": \"Thank a lot bob and Richard.\\n\\nI will try with APPLY option and share the results.\\n\\nRichard : They are mainly xml blob files, retaining them temporarily so that incase if we want to extract few other fields at a later point of time.\\n\\n1.) They are available in the thor cluster.\\n2.) They are produced through an ECL code while spraying the data.\\n3.) They are valid for some time may be a month or so and then deleted.\\n4.) They are around approximately 10K.\\n5.) They are all in the same format.\\n6.) 80 was just an example, may be each superfile can contain 100 of them , because if i collapse the file and try to read, the xml offset is lost and hence just adding the same in multiple superfiles.\\n\\nThanks,\\nViswa\", \"post_time\": \"2014-08-18 10:28:48\" },\n\t{ \"post_id\": 6214, \"topic_id\": 1419, \"forum_id\": 8, \"post_subject\": \"Re: Superfiles\", \"username\": \"bforeman\", \"post_text\": \"Hi Viswa,\\n\\nYou can build a DATASET that contains the filenames that you need to add to your superfile by using the STD.File.LogicalFileList FUNCTION.\\n\\nAfter that, use APPLY through the DATASET that calls an action that adds each filename in the list to your target superfile(s).\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-08-11 15:14:27\" },\n\t{ \"post_id\": 6213, \"topic_id\": 1419, \"forum_id\": 8, \"post_subject\": \"Re: Superfiles\", \"username\": \"rtaylor\", \"post_text\": \"Viswa,\\n\\nThe short answer is: probably. The real answer depends a lot on the circumstances. \\n\\nYou call these "temp" files, so:
\\nRichard\", \"post_time\": \"2014-08-11 15:07:09\" },\n\t{ \"post_id\": 6210, \"topic_id\": 1419, \"forum_id\": 8, \"post_subject\": \"Superfiles\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nI have a scenario where i have a lot of temp files.\\n\\nI need these files to be added to a superfile in a programmed way without collapsing / merging.\\n\\nEx : \\n\\nFirst 80 temp files to superfile1 , next 80 files to superfile2 and so on for the list of all the temp files present.\\n\\nIs it possible to accomplish this scenaio ?\\n\\nThanks,\\nViswa\", \"post_time\": \"2014-08-11 10:16:06\" },\n\t{ \"post_id\": 6237, \"topic_id\": 1421, \"forum_id\": 8, \"post_subject\": \"Re: Download of Desprayed logical file is incomplete\", \"username\": \"jgostylo\", \"post_text\": \"Yeah, sorry. The file was complete on the machine, it think it was a browser/web server setting that was preventing the full download.\", \"post_time\": \"2014-08-18 21:47:57\" },\n\t{ \"post_id\": 6222, \"topic_id\": 1421, \"forum_id\": 8, \"post_subject\": \"Re: Download of Desprayed logical file is incomplete\", \"username\": \"kevin.wang@lexisnexis.com\", \"post_text\": \"To download/upload a large file, you will need a tool that supports the secure copy protocol, such as WinSCP.\", \"post_time\": \"2014-08-15 14:21:31\" },\n\t{ \"post_id\": 6221, \"topic_id\": 1421, \"forum_id\": 8, \"post_subject\": \"Re: Download of Desprayed logical file is incomplete\", \"username\": \"kevin.wang@lexisnexis.com\", \"post_text\": \"I think that your file is too big for downloading using ECLWatch download function. The download function is built on web browser's HTTP request. Different browsers have different limits (1G or 2G) for a file size to be defined in an HTTP request. But, your file size is > 4G. \\n\\nPlease see: http://cdn.hpccsystems.com/releases/CE- ... .0.0-1.pdf\\nPage 81: " For smaller data files, maximum of 2GB, you can use the upload/download file utility in ECL Watch. "\", \"post_time\": \"2014-08-15 14:12:59\" },\n\t{ \"post_id\": 6217, \"topic_id\": 1421, \"forum_id\": 8, \"post_subject\": \"Download of Desprayed logical file is incomplete\", \"username\": \"jgostylo\", \"post_text\": \"I am attempting to get a flat file exported from an indexed superfile. Everything looks good until I try to download the file from the landingzone and I only get the first 13.6MB of the 4.3GB file.\\n\\nHere is the code I am using:\\n\\nFirst I export the superfile to a non-indexed logical file.\\n\\n\\ncreateTempFile := OUTPUT\\n(\\n\\trecs,,\\n\\ttempFile,\\n\\tCSV\\n\\t(\\n\\t\\tSEPARATOR('|'),\\n\\t\\tTERMINATOR('\\\\n'),\\n\\t\\tQUOTE(''),\\n\\t\\tHEADING(headerText,SINGLE)\\n\\t),\\n\\tOVERWRITE\\n);\\n
\\n\\nThis works and I look at the logical file defined by tempFile in ECLWatch and all the records are there.\\n\\nThen I despray the temp logical file with this code:\\n\\n\\ndesprayTempFile := Std.File.Despray\\n(\\n\\ttempFile,\\n\\tdestHost,\\n\\tdestPath,\\n\\tallowOverwrite := TRUE\\n);\\n
\\n\\nWhen this is done I can look in the landingzone in ECLWatch under Upload/Download files and it reports this file as 4.3GB.\\n\\nWhen I click on the file to download it I only get 13.6MB downloaded.\\n\\nWhat am I missing that will allow me to download (or create) the entire file? When I spray a 2.5GB file from the landingzone and then despray it to another file I can then download the entire thing. I feel like there is some parameter that is needed to build a single contiguous file.\", \"post_time\": \"2014-08-14 03:25:41\" },\n\t{ \"post_id\": 6238, \"topic_id\": 1424, \"forum_id\": 8, \"post_subject\": \"Re: MD5 Decryption\", \"username\": \"bforeman\", \"post_text\": \"To my knowledge, the support for encryption and decryption in ECL is limited to the OUTPUT and DATASET statements, where an encrypted OUTPUT file is decrypted by a corresponding DATASET statement. \\n\\nIn your case I like what you propose with the INDEX, and that's what we normally do as well, in the RECORD structure is a decrypted field next to the original one. Normally the vendor will provide us with both.\\n\\nOther than that, if you have a decryption function in another language you can always use the EMBED or BEGINC++ structures to use it in ECL.\\n\\nAnother developer mentioned to me that MD5 is one-way, especially without a "lock in which to try the key" to know if your result is correct.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-08-19 12:46:06\" },\n\t{ \"post_id\": 6230, \"topic_id\": 1424, \"forum_id\": 8, \"post_subject\": \"MD5 Decryption\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nThere is a built in function HASHMD5 to encrypt the data. Is it possible to decrypt an MD5 data in HPCC ?\\n\\nSay for an example, i receive a file from an external source with one of the fields encrypted using MD5 , Is it possible to get back the original data in HPCC ?\\n\\nThe only way i can think is to have an index file which has both the original and encrypted data and then use the same file to retrieve the original data.\\n\\nAre there any other ways ? Kindly suggest.\\n\\nThanks,\\nViswa\", \"post_time\": \"2014-08-18 10:41:23\" },\n\t{ \"post_id\": 6288, \"topic_id\": 1431, \"forum_id\": 8, \"post_subject\": \"Re: Can't get eclplus to run in interactive mode.\", \"username\": \"Richard_Wyant\", \"post_text\": \"Ok. I got an updated version of the documentation from a colleague. Thanks for clearing that up.\", \"post_time\": \"2014-09-10 14:25:08\" },\n\t{ \"post_id\": 6275, \"topic_id\": 1431, \"forum_id\": 8, \"post_subject\": \"Re: Can't get eclplus to run in interactive mode.\", \"username\": \"bforeman\", \"post_text\": \"Hi Richard,\\n\\nThe Interactive Mode was deprecated a long while ago. Unfortunately it was still in the Version 3 docs. This mode was removed from the docs in Version 4.2 and greater.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-09-08 16:34:35\" },\n\t{ \"post_id\": 6274, \"topic_id\": 1431, \"forum_id\": 8, \"post_subject\": \"Re: Can't get eclplus to run in interactive mode.\", \"username\": \"Richard_Wyant\", \"post_text\": \"Does anyone have suggestions on getting eclplus to run in interactive mode? When I try eclplus at the command prompt, all I get is the help text.\", \"post_time\": \"2014-09-08 12:23:36\" },\n\t{ \"post_id\": 6267, \"topic_id\": 1431, \"forum_id\": 8, \"post_subject\": \"Can't get eclplus to run in interactive mode.\", \"username\": \"Richard_Wyant\", \"post_text\": \"Hello all. I'm attempting to learn some command line parameters for automating some ECL. I thought I'd try using eclplus in interactive mode from my Windows 7 laptop. All I get though is the help text and I get kicked back to the command prompt.\\n\\nI noticed eclplus.ini didn't exist on my system. I made a short one consisting of:\\n\\nserver=<ip address of our thor cluster, redacted>\\ncluster=ThorCluster\\n*queue=<redacted>\\n\\nEclplus seems not to be picking it up. I tried putting copies in the same folder in these locations and rebooting:\\n\\nC:\\\\Program Files (x86)\\\\HPCCSystems\\\\5.0.0\\\\clienttools\\\\bin (where eclplus.exe lives)\\nC:\\\\Program Files (x86)\\\\HPCCSystems\\\\5.0.0\\\\eclide\\nC:\\\\Users\\\\<my user name>\\\\AppData\\\\Roaming\\\\HPCC Systems\\\\eclide\\n\\nI'm still getting command prompt when I run "eclplus" without any options. What am I doing wrong?\\n\\nUpdate: I proceeded with individual commands and found it was reading the .ini file. I got a wrong name for my cluster form it:\\n\\nC:\\\\Program Files (x86)\\\\HPCCSystems\\\\5.0.0\\\\clienttools\\\\bin>eclplus user=rwyant pas\\nsword=<redacted> server=<redacted> ecl=@kick_off_my_script.txt\\n\\nSo the only question is why can't I get into interactive mode?\\n[20049: 2014-09-02 13:55:32 GMT: Invalid cluster name: ThorCluster]\", \"post_time\": \"2014-09-02 13:18:48\" },\n\t{ \"post_id\": 6271, \"topic_id\": 1432, \"forum_id\": 8, \"post_subject\": \"Re: Filter / Eliminating input records issue\", \"username\": \"ArjunKumar\", \"post_text\": \"Hi Team,\\n\\nPreviously we used SET actually but we faced some performance issue as our data is huge. I think using LEFT ONLY JOIN we can achieve it even performance wise also. Thank You so much for your ideas.\\n\\nThanks,\\nArjun\", \"post_time\": \"2014-09-04 06:38:34\" },\n\t{ \"post_id\": 6270, \"topic_id\": 1432, \"forum_id\": 8, \"post_subject\": \"Re: Filter / Eliminating input records issue\", \"username\": \"DSC\", \"post_text\": \"If you have a really large number of records you may be better off with a LEFT ONLY option on a JOIN:\\n\\nmainR := RECORD\\n INTEGER no;\\nEND;\\n\\nmainDS := DATASET([{1},{2},{3}], mainR);\\n\\ninputDS := DATASET([{1},{2}], mainR);\\n\\nresultDS := JOIN(mainDS,inputDS,LEFT.no = RIGHT.no,LEFT ONLY);\\n\\nOUTPUT(resultDS);
\\nCheers,\\n\\nDan\", \"post_time\": \"2014-09-03 14:50:56\" },\n\t{ \"post_id\": 6269, \"topic_id\": 1432, \"forum_id\": 8, \"post_subject\": \"Re: Filter / Eliminating input records issue\", \"username\": \"rtaylor\", \"post_text\": \"Arjun,\\n\\nThis will do it:mainR := RECORD\\n INTEGER no;\\nEND;\\n\\nmainDS := DATASET([{1},{2},{3}], mainR);\\n\\ninputDS := DATASET([{1},{2}], mainR);\\n\\nSetNo := SET(inputDS,no); \\n\\nOUTPUT(mainDS(no NOT IN SetNo));
\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-09-03 13:44:18\" },\n\t{ \"post_id\": 6268, \"topic_id\": 1432, \"forum_id\": 8, \"post_subject\": \"Filter / Eliminating input records issue\", \"username\": \"ArjunKumar\", \"post_text\": \"Hi Team,\\n\\nAssume there are two DATASET's called mainDS and inputDS. i want to filter inputDS from mainDS, as per the below example after filtering i should get 3 as a output. but i am getting 2,3,1,3 as output. Please look into below code and suggest to achieve my requirement.\\n\\n====================================================================================\\nmainR := RECORD\\n\\tINTEGER no;\\nEND;\\n\\nmainDS := DATASET([{1},{2},{3}], mainR);\\n\\ninputDS := DATASET([{1},{2}], mainR);\\n\\nmainDSR := RECORD\\n\\tDATASET(mainR) ds;\\nEND;\\n\\nmainDSR filterFunc(mainR L) := TRANSFORM\\n\\t\\tSELF.ds := mainDS(no<>L.no);\\nEND;\\nfilterOutp := PROJECT(inputDS, filterFunc(LEFT));\\n\\n//OUTPUT(filterOutp);\\n\\nmainR normFunc(mainDSR L, mainR R) := TRANSFORM\\n\\tSELF.no := R.no;\\t\\t\\nEND;\\n\\nnormValue := NORMALIZE(filterOutp,LEFT.ds,normFunc(LEFT,RIGHT));\\n\\nOUTPUT(normValue);\\n====================================================================================\\n\\nAfter filter 1,2 from 1,2,3 i should get 3 as output.\\n\\nThanks,\\nArjun\", \"post_time\": \"2014-09-03 07:43:44\" },\n\t{ \"post_id\": 6316, \"topic_id\": 1435, \"forum_id\": 8, \"post_subject\": \"Re: Save unknown format type Error\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nIt was some permission issue for a folder within a s3 bucket. \\n\\nChanging the permission issue for the same , was able to spray the file successfully.\\n\\nunmounted the s3 bucket and remounted it with proper permissions for the user.\\n\\nRegards,\\nViswa\", \"post_time\": \"2014-09-17 10:17:37\" },\n\t{ \"post_id\": 6279, \"topic_id\": 1435, \"forum_id\": 8, \"post_subject\": \"Re: Save unknown format type Error\", \"username\": \"tlhumphrey2\", \"post_text\": \"Viswa,\\n\\nPlease, show us your spray. The path you give in it must be the path of a file on your landing zone. And, since you are working on EC2, that path should look something like the following: /var/lib/HPCCSystems/mydropzone/your_file_name\\n\\nI have a strong feeling that isn't the case in your situation.\", \"post_time\": \"2014-09-09 18:08:34\" },\n\t{ \"post_id\": 6277, \"topic_id\": 1435, \"forum_id\": 8, \"post_subject\": \"Re: Save unknown format type Error\", \"username\": \"rtaylor\", \"post_text\": \"Viswa,\\n\\nTypically, sprays are done from a sub-directory under the default starting point, so that may be your issue.\\n\\nYou can try changing the default path in your Landing Zone's configuration to the new mount. Or, if you need to have both paths available, you can try creating a new LZ with the same IP but a default path pointing to the s3 mount and do the spray using that LZ.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-09-09 13:22:01\" },\n\t{ \"post_id\": 6276, \"topic_id\": 1435, \"forum_id\": 8, \"post_subject\": \"Save unknown format type Error\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nI get this error while trying to do a sprayxml call.\\n\\nWe had mounted s3 on our landing zone using s3fs and tried to spray few xml files based on the s3 mounted path, but it failed to spray and we encounter this error .\\n\\n\\nError : 0: System error: 0: DFUServer Error Failed: INTERNAL: Save unknown format type\\n
\\n\\nThe spray works if we copy the files from mounted s3 in to the landingzone.\\n\\nAny thoughts on the same..?\\n\\nThanks,\\nViswa\", \"post_time\": \"2014-09-09 11:41:00\" },\n\t{ \"post_id\": 6281, \"topic_id\": 1437, \"forum_id\": 8, \"post_subject\": \"Re: FileServices is not being recognized.\", \"username\": \"rtaylor\", \"post_text\": \"Tim,\\n\\nOn AWS you need to use the OSS syntax for the service library functions, not the old 702-style syntax.\\n\\nThis should work:IMPORT STD;\\nlayout_myfile := RECORD\\n STRING Elevationfield1;\\n STRING Aspectfield2;\\n STRING Slopefield3;\\nEND;\\n\\ndistributed_logical_filename:='thor::myfile_head_distributed_by_aspect';\\ndistributed_myfile_head := DATASET(distributed_logical_filename,layout_myfile,THOR);\\n\\ncsv_filename := 'thor::distributed_myfile_head_csv';\\noutput(distributed_myfile_head, ,csv_filename,CSV,OVERWRITE);\\n\\nSTD.File.DeSpray(\\n csv_filename\\n ,'10.202.169.151'\\n ,'/var/lib/HPCCSystems/mydropzone/distributed_myfile_head_csv'\\n ,-1,,,true\\n);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-09-09 18:59:27\" },\n\t{ \"post_id\": 6280, \"topic_id\": 1437, \"forum_id\": 8, \"post_subject\": \"FileServices is not being recognized.\", \"username\": \"tlhumphrey2\", \"post_text\": \"I have deployed a thor cluster to AWS.\\n\\nI'm attempting to do a DeSpray using FileServices.DeSpray. But, I getting the error: "Error: Unknown identifier before "." (expected :=) (13, 13)". My code follows:\\n\\nlayout_myfile := RECORD\\n STRING Elevationfield1;\\n STRING Aspectfield2;\\n STRING Slopefield3;\\nEND;\\n\\ndistributed_logical_filename:='thor::myfile_head_distributed_by_aspect';\\ndistributed_myfile_head := DATASET(distributed_logical_filename,layout_myfile,THOR);\\n\\ncsv_filename := 'thor::distributed_myfile_head_csv';\\noutput(distributed_myfile_head, ,csv_filename,CSV,OVERWRITE);\\n\\nFileServices.DeSpray(\\n csv_filename\\n ,'10.202.169.151'\\n ,'/var/lib/HPCCSystems/mydropzone/distributed_myfile_head_csv'\\n ,-1,,,true\\n);\\n
\", \"post_time\": \"2014-09-09 18:17:08\" },\n\t{ \"post_id\": 6298, \"topic_id\": 1440, \"forum_id\": 8, \"post_subject\": \"Re: Moving Files within Landing Zone\", \"username\": \"rtaylor\", \"post_text\": \"Arjun,\\n\\nThe SEQUENTIAL action works for me with this code:IMPORT STD;\\nIP := '10.173.248.7';\\ninfile := '/mnt/disk1/var/lib/HPCCSystems/dropzone/myfile.xml';\\noutfile := '/mnt/disk1/var/lib/HPCCSystems/dropzone/NewDir/myfile.xml';\\n\\nM1 := STD.File.MoveExternalFile(IP,infile,outfile);\\nO1 := OUTPUT(DATASET(STD.File.ExternalLogicalFileName(IP,outfile),{STRING1 Char},FLAT));\\nM2 := STD.File.MoveExternalFile(IP,outfile,infile);\\nSEQUENTIAL(M1,O1,M2);
\\nCan you post the code that produces the error?\\n\\nRichard\", \"post_time\": \"2014-09-12 19:04:30\" },\n\t{ \"post_id\": 6295, \"topic_id\": 1440, \"forum_id\": 8, \"post_subject\": \"Re: Moving Files within Landing Zone\", \"username\": \"ArjunKumar\", \"post_text\": \"Hi Richard,\\n\\nI used STD.File.MoveExternalFile, But as i mentioned in the second post i am facing the Folder Exist issue with SEQUENTIAL API.\\n\\nThanks,\\nArjun\", \"post_time\": \"2014-09-12 09:04:34\" },\n\t{ \"post_id\": 6293, \"topic_id\": 1440, \"forum_id\": 8, \"post_subject\": \"Re: Moving Files within Landing Zone\", \"username\": \"rtaylor\", \"post_text\": \"Arjun,\\n\\nThat would be part of the Standard Library, the STD.File.MoveExternalFile function, documented in the Standard Library Reference in the "External Files" section. That doc is available for download as PDF here: http://hpccsystems.com/download/docs/standard-library-reference\\n\\nIt is also in the online help file (press F1 in the IDE or Eclipse).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-09-11 20:09:53\" },\n\t{ \"post_id\": 6291, \"topic_id\": 1440, \"forum_id\": 8, \"post_subject\": \"Re: Moving Files within Landing Zone\", \"username\": \"ArjunKumar\", \"post_text\": \"Actually we used SERVICE api to moving files, below is the code.\\n\\nMoveExternalFile(const varstring location, const varstring frompath, const varstring topath): c,action,context,entrypoint='fsMoveExternalFile'; \\n\\nMy requirement is as below\\n1. Move file from SOURCE to TEMP\\n2. Run some logic by reading file from TEMP\\n3. Move file form TEMP to DESTINATION\\n\\nfor 1st and 3rd steps i am calling the above function to move file. It is working fine as normal, but when i use SEQUENTIAL, it throws [color=#BF0000:gpa14yfk]Folder Exists Already error. The magic here is 1st and 2nd steps are successfully, throws error at step 3.\\n\\nPlease suggest is any other way to move files within landing zone.\\n\\nThanks,\\nArjun\", \"post_time\": \"2014-09-11 11:31:16\" },\n\t{ \"post_id\": 6290, \"topic_id\": 1440, \"forum_id\": 8, \"post_subject\": \"Moving Files within Landing Zone\", \"username\": \"ArjunKumar\", \"post_text\": \"Hi team,\\n\\ncan you specify any ECL API's which helps to move file / files from one path to another path within landing zone. It will be great if you specify sample code. \\n\\nThanks,\\nArjun\", \"post_time\": \"2014-09-11 09:47:43\" },\n\t{ \"post_id\": 6320, \"topic_id\": 1447, \"forum_id\": 8, \"post_subject\": \"Re: XML Parsing\", \"username\": \"bforeman\", \"post_text\": \"Hi Viswa,\\n\\nTo add to Richard's comment, when the XML is not well formed, you have to resort to Plan B, which is to treat the "XML" as free form text and use standard parsing with PATTERN, TOKEN or RULE. It's a lot more work but it can be done if you have to live with the data as is.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-09-18 12:34:34\" },\n\t{ \"post_id\": 6319, \"topic_id\": 1447, \"forum_id\": 8, \"post_subject\": \"Re: XML Parsing\", \"username\": \"rtaylor\", \"post_text\": \"Viswa,\\n\\nIn my experimentation, I found no way around the fact that the data needs to already be "XML-friendly" and encoded as proper XML text, as in this example:
d := DATASET([{'<library><book isbn="123456789X">' +\\n\\t'<author>Bayliss</author><title>A Way Too Far</title></book>' +\\n\\t'<book isbn="1234567801">' +\\n '<cd-title>DEF Hello A & B; x < y </cd-title>' +\\n\\t'<author>Smith</author><title>A Way Too Short</title></book>' +\\n\\t'</library>'}],\\t{STRING line });\\n\\nrform := RECORD\\n STRING author := XMLTEXT('author');\\n STRING title \\t:= XMLTEXT('title');\\n STRING isbn \\t:= XMLTEXT('@isbn');\\n STRING CD \\t:= XMLTEXT('cd-title');\\nEND;\\n\\nbooks := PARSE(d,line,rform,XML('library/book'));\\noutput(books)
\\nIf your XML data is being produced by an HPCC cluster, then you just need to use the XMLENCODE function to accomplish that. However, if it's coming from an external vendor, then they need to give you properly formed XML data.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-09-17 14:33:46\" },\n\t{ \"post_id\": 6317, \"topic_id\": 1447, \"forum_id\": 8, \"post_subject\": \"XML Parsing\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nDo we have any ways to parse the xml if we have some special characters like '&' , '<' , '>' within xml tags.?\\n\\n\\nEx : \\n\\n<book> \\n <author>xx</author>\\n <title><Hello> World</title>\\n <description>Hello A & B; x < y </description>\\n</book>\\n\\n
\\n\\nCan we escape these special characters in XML parse only or we need to have a clean xml with these characters replaced as (& , < , >) before we parse the same in HPCC.?\\n\\nKindly suggest.\\n\\nRegards,\\nViswa\", \"post_time\": \"2014-09-17 10:27:06\" },\n\t{ \"post_id\": 6342, \"topic_id\": 1452, \"forum_id\": 8, \"post_subject\": \"Re: Why would a chosen dataset come back as undefined?\", \"username\": \"rtaylor\", \"post_text\": \"Richard,Yes. I did the best I could with the labs. I struggled with them and I had no one I could ask at the time. That's why I started coming here. I was under the impression this was a safe place where to ask ecl questions that others might also have.
This community forum is absolutely a safe place to ask questions, but your comment suggests to me that you took offense where none was intended. If so, I apologize. My only mission here is to educate everybody about ECL and HPCC, and sometimes that information may come across as too blunt.\\n\\nThe only point I was trying to make is that it is apparent from both your code and your comments about your intentions regarding that code, that you have a fundamental misunderstanding of what PERSIST is and how it is meant to be used. Therefore, I suggest that you go back and listen again to the course lectures covering PERSIST -- I believe it is discussed in both the Intro to ECL and Intro to Thor courses.\\n\\nAs I said in my first response, PERSIST is meant to make intermediate results in a multi-step process "stick around" so that the work is done only once, even though the PERSISTed definition is used multiple times in the process. Therefore, although it can be used on all the standard "building block" definition types (Boolean, value, set, and recordset), it is most commonly used only on recordset definitions to store intermediate result records so that the work of producing that intermediate result is not repeated every time the definition is subsequently used. \\n\\nBut you used PERSIST on DATASET and OUTPUT, which are not recordset definitions. DATASET is a file declaration and OUTPUT is an action. That's why your use of PERSIST was problematic. It had nothing to do with the number of PERSISTs in your code.\\n\\nWe also have a forum dedicated to questions about the online course material and exercises here: http://hpccsystems.com/bb/viewforum.php?f=40&sid=cdb2e83676b483a9b30a043b6cbc6cb2\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-09-19 14:28:20\" },\n\t{ \"post_id\": 6340, \"topic_id\": 1452, \"forum_id\": 8, \"post_subject\": \"Re: Why would a chosen dataset come back as undefined?\", \"username\": \"bforeman\", \"post_text\": \"Yes. I did the best I could with the labs. I struggled with them and I had no one I could ask at the time.
\\n\\nHi Richard,\\n\\nWe created a special forum for the online courses where you can post questions or comments. \\n\\nhttp://hpccsystems.com/bb/viewforum.php?f=40&sid=7b8e0fa9c5b8f3215a129bcb504cb5e1\\n\\nWe are here to help and yes, this is a safe forum to post your ECL questions! \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-09-19 13:40:38\" },\n\t{ \"post_id\": 6337, \"topic_id\": 1452, \"forum_id\": 8, \"post_subject\": \"Re: Why would a chosen dataset come back as undefined?\", \"username\": \"Richard_Wyant\", \"post_text\": \"[quote="rtaylor":3cypvdkt]
Did you take the online courses? And did you do the exercises or just fast-forward to the explanations?
\\n\\nYes. I did the best I could with the labs. I struggled with them and I had no one I could ask at the time. That's why I started coming here. I was under the impression this was a safe place where to ask ecl questions that others might also have.\", \"post_time\": \"2014-09-19 11:38:57\" },\n\t{ \"post_id\": 6330, \"topic_id\": 1452, \"forum_id\": 8, \"post_subject\": \"Re: Why would a chosen dataset come back as undefined?\", \"username\": \"rtaylor\", \"post_text\": \"So the walk away from this is if I have to *many* persists, I could get this "undefined" error I saw in ECL Watch. Is that correct?
No.\\n\\nThe walkaway is that you need to understand what PERSIST is and does so you don't mis-use it and create problems for yourself.I took the courses, but I'm having a hard time finding examples on the internet like I would with SQL or Java.
And part of your problem is that you're trying to work with ECL like you would SQL or Java, and it is quite simply different enough that that approach doesn't work well. Did you take the online courses? And did you do the exercises or just fast-forward to the explanations?\\n\\nThe biggest issue anybody has in learning ECL is to change your thinking 180 degrees from the way you think about coding in other languages. That's why the exercises are actually important to do and understand. The type of code you're trying to write here is covered in the first two courses -- simple queries and transforms. The code you wrote for the exercises in class is the same kind of code you need to write to do what you want to do here. \\n\\nBEWARE OF TRYING TO OVER-COMPLICATE THINGS -- ECL is actually a fairly simple language. So the simpler you think, the more in line with the way the language is designed to work you will be.\\n\\nRichard\", \"post_time\": \"2014-09-18 20:03:40\" },\n\t{ \"post_id\": 6329, \"topic_id\": 1452, \"forum_id\": 8, \"post_subject\": \"Re: Why would a chosen dataset come back as undefined?\", \"username\": \"Richard_Wyant\", \"post_text\": \"I took the courses, but I'm having a hard time finding examples on the internet like I would with SQL or Java. I need to be able to submit ecl, see what I get, and adjust accordingly. The persists were in there because the job was taking too long to complete. I wanted to speed it up by only precessing things when I needed to.\\n\\nIn the first post, I edited the original code I was working with and in the process mistakenly took out the transform, so I'm still going to need that project. I took out the persists and I'm seeing what I want to see.\\n\\nSo the walk away from this is if I have to *many* persists, I could get this "undefined" error I saw in ECL Watch. Is that correct?\", \"post_time\": \"2014-09-18 19:45:51\" },\n\t{ \"post_id\": 6328, \"topic_id\": 1452, \"forum_id\": 8, \"post_subject\": \"Re: Why would a chosen dataset come back as undefined?\", \"username\": \"rtaylor\", \"post_text\": \"Richard,\\n\\nYour code has a couple of problems. Try it like this:temp_ds :=\\n dataset('~test::file',input_rec,CSV(\\n heading(1),\\n SEPARATOR('|'),\\n TERMINATOR(['\\\\n', '\\\\r\\\\n']),\\n QUOTE('"'),\\n MAXLENGTH(2000))\\n );\\n\\nfirst_100_ds := choosen(temp_ds,100); // I only want the 1st 100, like a limit in SQL.\\n\\noutput(first_100_ds,named('first_100_ds'));
The first issue was that your PROJECT only had one parameter (it always takes at least two), and PROJECT is unnecessary here, since you're not transforming the data (which is what PROJECT is designed to do). \\n\\nThe next issue(s) are your PERSISTs (both of them). You don't need to PERSIST a DATASET definition because it is already a file on disk (therefore persistent). You also do not need to PERSIST an OUTPUT action, because that data will either be written to a disk file (if you name a file to write to), or be stored in the workunit (as your code does) -- either way the result is also persistent. PERSIST is meant to make intermediate results in a multi-step process "stick around" so that the work is done only once, even though the PERSISTed definition is used multiple times in the process.\\n\\nHave you attended any of our ECL training courses (available on-site, WebEx, and online)?\\n\\nRichard\", \"post_time\": \"2014-09-18 19:03:11\" },\n\t{ \"post_id\": 6327, \"topic_id\": 1452, \"forum_id\": 8, \"post_subject\": \"Why would a chosen dataset come back as undefined?\", \"username\": \"Richard_Wyant\", \"post_text\": \"I simply want to output the first 100 records of a file:\\n\\n\\ntemp_ds :=\\nproject(\\n\\tdataset('~test::file',input_rec,CSV(\\n\\t\\theading(1),\\n\\t\\tSEPARATOR('|'),\\n\\t\\tTERMINATOR(['\\\\n', '\\\\r\\\\n']),\\n\\t\\tQUOTE('"'),\\n\\t\\tMAXLENGTH(2000))\\n\\t),\\t\\n) :persist('test::persist');\\n\\nfirst_100_ds := choosen(temp_ds,100); // I only want the 1st 100, like a limit in SQL.\\n\\noutput(first_100_ds,named('first_100_ds'))\\n\\t:persist('test::first_100_ds'); // debugging\\n
\\n\\nWhat happens is the output statement just has "undefined" when I look at it in ECL Watch. Why? If I use the whole file, I can see it just fine. Why would choosen mess that up?\", \"post_time\": \"2014-09-18 18:43:10\" },\n\t{ \"post_id\": 6356, \"topic_id\": 1454, \"forum_id\": 8, \"post_subject\": \"Re: dfuplus\", \"username\": \"bforeman\", \"post_text\": \"Thanks Keren, I have passed this information to our development team for further review.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-09-19 20:06:08\" },\n\t{ \"post_id\": 6355, \"topic_id\": 1454, \"forum_id\": 8, \"post_subject\": \"Re: dfuplus\", \"username\": \"kereno\", \"post_text\": \"Thanks Bob, I followed your advice and compared the DFU workunits (in their XML format). I think that the difference which causes the problem is that a rowtag is mandatory in the dfuplus command line but not mentioned in the web ui (where the correct spray happens). I tried to set rowtag to different values: empty, tuple, dataset and none of these worked. (also sending you a full comparison in a screenshot by email).\\n\\nThanks,\\nKeren\", \"post_time\": \"2014-09-19 19:51:42\" },\n\t{ \"post_id\": 6347, \"topic_id\": 1454, \"forum_id\": 8, \"post_subject\": \"Re: dfuplus\", \"username\": \"AttilaV\", \"post_text\": \"Hi Karen,\\n\\nThis is strange. A couple of minutes ago I tried to spray a ~1GB xml file locally (HPCC runs inside a VM) and remotely (from HPCC runs one VM to other HPCC runs on other VM with different IP address).\\n\\nThe src file structure is:\\n<Corporations>\\n<Corporation>\\n...\\n</Corporation>\\n<Corporation>\\n...\\n</Corporation>\\n</Corporations>\\n\\nCommand for local spray:\\ndfuplus action=spray srcip=. srcfile=/home/ati/shared/Xml/CorporationsDataExtract.xml dstname=ati::test_ati-Corp-2013-09-20_13-30 jobname=spray_xmlupload3-1 server=. dstcluster=mythor format=xml rowtag=Corporation prefix=FILENAME,FILESIZE username=xxxx password=xxxx overwrite=1 \\n\\nIf the file and the target cluster is local , you can use '.' for both srcip= and server= address. \\n\\nCommand for remote spray (copy local file to remote cluster) :\\ndfuplus action=spray srcip=192.168.1.50 srcfile=/home/ati/shared/Xml/rporationsDataExtract.xml dstname=ati::test_ati-Corp-2014-09-19_17-45 jobname=spray_xmlupload3-1 server=http://192.168.1.52 dstcluster=mythor format=xml rowtag=Corporation prefix=FILENAME,FILESIZE username=xxxx password=xxxx overwrite=1 \\n\\nIf I put invalid IP address into srcip= then I got \\nFailed: Failed to connect to dafilesrv/daliservix on 192.168.1.55:7100\\n\\nIf I put invalid IP address to server=\\nthen I got:\\n00000000 2014-09-19 18:04:51.915 8123 8123 "ERROR: Error connecting to 192.168.1.55:8010"\\n00000001 2014-09-19 18:04:51.916 8123 8123 "-3: connection failed\\nTarget: T>192.168.1.55, Raised in: /home/ati/HPCC-Platform/system/jlib/jsocket.cpp, line 1242"\\nSOAP Connection error\\n\\nSorry, but I can't reproduce this problem. \\n\\nSo I think I need a little bit more information to try reproducing your problem, e.g.: your HPCC version, workunit, log of DFUserver, etc.\\n\\nRegards\\n\\nAttila\", \"post_time\": \"2014-09-19 17:16:11\" },\n\t{ \"post_id\": 6338, \"topic_id\": 1454, \"forum_id\": 8, \"post_subject\": \"Re: dfuplus\", \"username\": \"bforeman\", \"post_text\": \"Hi Keren,\\n\\nI guess the place to look is in the log and XML of the DFU workunit that was successful, and then compare that with your DFUPlus script to see what might be missing or different. The ECL Watch interface let's you browse the DFU Workunits and you can see all of the settings used for the spray.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-09-19 13:34:04\" },\n\t{ \"post_id\": 6335, \"topic_id\": 1454, \"forum_id\": 8, \"post_subject\": \"dfuplus\", \"username\": \"kereno\", \"post_text\": \"Hello,\\n\\nI am currently testing the spray of a 100M file on my HPCC cluster using dfuplus as follow:\\ndfuplus action=spray srcip=xxx srcfile=/home/kereno/more/200M-sens/pigmix/page_views/part-m-00000 dstname=thor::page_views.xml dstcluster=mythor prefix=FILENAME,FILESIZE nosplit=1 server=http://xxx format=xml username=kereno overwrite=1 replicate=1 rowtag=tuple \\n\\nI would like to scale to 600GB, and since eclplus has a known limitation of 2GB amx, I am using dfuplus as this command line tool doesnt have this limitation.\\n\\nMy dataset looks as follow:\\n<dataset>\\n<tuple>\\n...\\n</tuple>\\n<tuple>\\n...\\n</tuple>\\n</dataset>\\n\\nFollowing the spraying, I execute my query and I get the following error:\\n<Error><source>eclagent</source><code>2</code><message>System error: 2: Graph[1], xmlread[2]: SLAVE 128.195.11.31:20100: Error - syntax error "Expecting "<"" [file offset 2]\\nLogical filename = page_views.xml\\nLocal fileposition = 0x8000000000000002\\n\\nI looked into the sprayed xml file from the web interface, and it seems missing the first <dataset> tag (hence row #2 in the error message perhaps?).\\n\\nNote that when I sprayed the exact same file from the web interface, and executed the query I got fine results, so the pb is related to a wrong rowtag or some other parameters of dfuplus's. Any clue? Thanks for the help \\n\\nKeren\", \"post_time\": \"2014-09-19 03:37:13\" },\n\t{ \"post_id\": 6424, \"topic_id\": 1456, \"forum_id\": 8, \"post_subject\": \"Re: Superfiles working as synonyms\", \"username\": \"Ignacio\", \"post_text\": \"Hi Richard, \\n\\nThat makes sense, but I´m afraid we are getting a little bit lost here these days. Before going forward I feel that we will probably have to learn a bit about the use of package files. Could you please redirect me to an entry point in the documentation about them that we can begin with?\\n\\nThanks again.\", \"post_time\": \"2014-10-10 16:41:58\" },\n\t{ \"post_id\": 6378, \"topic_id\": 1456, \"forum_id\": 8, \"post_subject\": \"Re: Superfiles working as synonyms\", \"username\": \"rtaylor\", \"post_text\": \"Ignacio,
But our main concern is that we see that as soon as we have a Roxie query deployed in the cluster, any file included in a superfile which is in use, would be locked.
This problem can be resolved by configuring your environments so that your Production Roxies have their own Dali, separate from the Dali for your Thor (development) clusters. It's Dali that locks the file(s), so having a separate Dali (separate HPCC environment) for your Roxie clusters eliminates the issue. This specific issue is discussed in our Advanced Roxie course online.So correct me if I am wrong, but I can't see it working if we want to rename the single file in the superfile (or remove it from it) because it would be locked. So by now I didn't manage to change the data and not having to re-compile the Roxie queries.
You don't rename or remove the sub-file, you replace it with a new sub-file by using a package file that simply changes the sub-file referenced by the superfile to the new file. That makes the Roxie data update process something like this:
\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-09-25 18:32:33\" },\n\t{ \"post_id\": 6376, \"topic_id\": 1456, \"forum_id\": 8, \"post_subject\": \"Re: Superfiles working as synonyms\", \"username\": \"Ignacio\", \"post_text\": \"Hi Richard, \\n\\nThanks for your response. We took some time to debate this issue with superfiles but we didn't come up with a clear vision of this. \\n\\nWe are aware that once that you publish something you should not overwrite it. I was more thinking about development times, in that case. \\n\\nBut our main concern is that we see that as soon as we have a Roxie query deployed in the cluster, any file included in a superfile which is in use, would be locked. So correct me if I am wrong, but I can't see it working if we want to rename the single file in the superfile (or remove it from it) because it would be locked. So by now I didn't manage to change the data and not having to re-compile the Roxie queries.\\n\\nRegards,\\n\\nIgnacio.\", \"post_time\": \"2014-09-25 16:43:37\" },\n\t{ \"post_id\": 6354, \"topic_id\": 1456, \"forum_id\": 8, \"post_subject\": \"Re: Superfiles working as synonyms\", \"username\": \"rtaylor\", \"post_text\": \"Ignacio,\\n\\nYour use of Superfiles for data indirection is perfectly acceptable-- we do this every day. The problem you're experiencing can be resolved by configuring your environments so that your production Roxies have their own Dali, separate from the Dali for your Thor clusters. \\n\\nThen you can use package files to update the data on your Roxies by simply changing the name of the single sub-file in the superfile. That way you just change the data and don't have to re-compile your queries every time you get new data.
The least important one, is that once that we deploy a Roxie query working on an index on our DEV server, another developer will not be able to mess up with it on Thor, for example, to add new fields, improve performance, etc.
This one actually points up a fundamental difference in the way you need to think about working in HPCC. Once you have a file in its end format and released to the world, you never want to overwrite that. If you need to mess about,change formats, add fields, etc.then its time to write a new file.\\n\\nThese concepts are all discussed in our Intro to Roxie and Advanced Roxie classes, available online.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-09-19 18:47:53\" },\n\t{ \"post_id\": 6341, \"topic_id\": 1456, \"forum_id\": 8, \"post_subject\": \"Superfiles working as synonyms\", \"username\": \"Ignacio\", \"post_text\": \"Probably we are too influenced by the Oracle world. In the past, our code would access a synonym would point to a table containing current data. When a new version of the data was released, we just needed to change the synonym to point to it. \\n\\nRegardless the normal use of superfiles explained in the documentation (as a list of sub-files), we thought we could use it as some kind of synonym. So we would have our ECL code point to a superfile, which would contain a file with current (latest) data. We would use an index for our queries on it. When the new version is released, we would regenerate the index, remove previous file from the superfile, and add that new one, so that the transition is as smooth as possible.\\n\\nThis seems to work until we publish our code to Roxie. For example, when we try to alter the index, we are not allowed to do so because it is locked :\\n\\nCannot delete (filename) [ 30: SDS: Lock held SDS Reply Error : SDS: Lock held Lock is held performing changeMode on connection to : (etc..) ]
\\n\\nEven more, once that the Roxie query is deployed, we can't modify the Thor index. \\n\\nSo we have too issues here. The least important one, is that once that we deploy a Roxie query working on an index on our DEV server, another developer will not be able to mess up with it on Thor, for example, to add new fields, improve performance, etc.\\n\\nAnd the main one, is that we wonder whether we can actually use superfiles to point to the latest version of our data, or if there is a smooth way to do so. We must take into account that we have an online application, so we can't really wait for a few minutes (even hours) to republish and reindex all our data.\", \"post_time\": \"2014-09-19 14:23:43\" },\n\t{ \"post_id\": 6357, \"topic_id\": 1457, \"forum_id\": 8, \"post_subject\": \"Re: Spray vs DISTRIBUTE\", \"username\": \"lpezet\", \"post_text\": \"Thank you all!\\n\\nIt sure helps.\", \"post_time\": \"2014-09-19 20:16:22\" },\n\t{ \"post_id\": 6352, \"topic_id\": 1457, \"forum_id\": 8, \"post_subject\": \"Re: Spray vs DISTRIBUTE\", \"username\": \"rtaylor\", \"post_text\": \"Luc,\\n\\nSpray is a "dumb" operation, and it exists just to quickly get the data from the landing zone to the nodes of the Thor cluster so you can work with the data. \\n\\nFor example, spraying a 3 Gb file to a 3-node cluster will result in the first 1 Gb of the original file written to a file on node #1, the second 1 Gb of the original file written to a file on node #2, and the third 1 Gb of the original file written to a file on node #3. \\n\\nIf the original file has its records in no particular order, then the logical file on Thor (after the spray) will also have its records in no particular order. If the original file does have its records in some particular order, then the logical file on Thor (after the spray) will also have its records in the same order across all the nodes, but they will not necessarily break the way you would want them to from node to node.\\n\\nDISTRIBUTE, however, does a RE-distribute of the records, based on its second parameter (which must be an integer value), so that all records with the same second-parameter value end up on the same node together. \\n\\nFor example:rds := DISTRIBUTE(MyDS,HASH32(lastname,firstname));
will re-distribute the records (leaving them in memory) to the nodes such that all records with the same hash value end up on the same node. IOW, all the JOHN SMITH records will be on the same node together, and all the JOE SCHMOE records will also be together on some node.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-09-19 18:04:48\" },\n\t{ \"post_id\": 6349, \"topic_id\": 1457, \"forum_id\": 8, \"post_subject\": \"Re: Spray vs DISTRIBUTE\", \"username\": \"AttilaV\", \"post_text\": \"Hi,\\n\\nI think one difference between these two is the Spray instruction process file from any path with different internal structure (CSV, XML, etc.) in OS directory structure (restricted by the user privileges) and generate one file in cluster data directory or split it (distribute) as many chunks as the number of the slaves.\\nThe Distribute can work with the DATASET (Created with DS, Spray etc. instructions) only and not directly with files.\\n\\nRegards\\n\\nAttila\", \"post_time\": \"2014-09-19 17:51:53\" },\n\t{ \"post_id\": 6346, \"topic_id\": 1457, \"forum_id\": 8, \"post_subject\": \"Spray vs DISTRIBUTE\", \"username\": \"lpezet\", \"post_text\": \"Hello,\\n\\nI'm trying to understand the full extent of Spray and DISTRIBUTE.\\nAs far as I could gather, there isn't much documentation on what Spraying Files does exactly (but very complete doc on how to do it).\\nMy assumption is that it will distribute the file across all (?) nodes in the cluster.\\nSo then my question is, what is the difference with DISTRIBUTE then?\\n\\nI've seen some examples of ECL code doing the following (in an ETL/ELT kind of phase):\\n\\nLandingZoneIP := '172.xx.xx.xx';\\nFilePath := '/var/lib/HPCCSystems/mydropzone/somefolder/myfile.csv';\\nds := DATASET(Std.File.ExternalLogicalFilename(LandingZoneIP, FilePath), some_layout, CSV(HEADING(1)));\\ndds := DISTRIBUTE(ds, somefield);\\n
\\n\\nThe code goes on and use dds from that point on.\\nSo here I see no spraying...or am I wrong?\\nIt seems to me that, if myfile.csv would have been sprayed, then maybe the DISTRIBUTE could have been more efficient, since each node would have some data to work on. Not being sprayed, only 1 node is executing the DISTRIBUTE. Is that right?\\n\\n\\nThanks for the help!\\nLuc.\", \"post_time\": \"2014-09-19 17:06:25\" },\n\t{ \"post_id\": 6436, \"topic_id\": 1460, \"forum_id\": 8, \"post_subject\": \"Re: Abstract modules and function inheritance\", \"username\": \"rtaylor\", \"post_text\": \"Ignacio,I just wanted to flag that in my opinion, the outcome was not very intuitive when the compiler would override the function behavior but not the return type definition.
I'll have to disagree on this one. \\n\\nThe purpose of the VIRTUAL function is to define the structure of the function -- the parameters and return type -- along with the default behavior. Therefore, it makes perfect sense to me that the compiler would not allow you to override the return type, just as it won't allow you to make changes to the parameter types being passed. \\n\\nThe purpose of the concrete instance of the function is precisely to change the default behavior of the function -- to do something different to those parameters to create return values different from what the default would be but with exactly the same return type.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-10-14 13:42:46\" },\n\t{ \"post_id\": 6423, \"topic_id\": 1460, \"forum_id\": 8, \"post_subject\": \"Re: Abstract modules and function inheritance\", \"username\": \"Ignacio\", \"post_text\": \"Hi Richard,\\n\\nSorry for the mess in my code, and thanks for your response. It always helps a lot to read somebody else's code (specially as I have a very limited experience on the field). You can bet the workaround I had taken at first was worse, so I was happy to make some adjustments. \\n\\nI just wanted to flag that in my opinion, the outcome was not very intuitive when the compiler would override the function behavior but not the return type definition. \\n\\nThanks again.\", \"post_time\": \"2014-10-10 16:28:11\" },\n\t{ \"post_id\": 6381, \"topic_id\": 1460, \"forum_id\": 8, \"post_subject\": \"Re: Abstract modules and function inheritance\", \"username\": \"rtaylor\", \"post_text\": \"Ignacio,\\n\\nOK, I made a couple of small changes to your code and now it returns exactly what you would have originally expected:THE_PARENT := MODULE,VIRTUAL\\n\\n SHARED GREEN := 'GREEN';\\n SHARED AMBER := 'AMBER';\\n SHARED RED := 'RED';\\n SHARED ONE := '1';\\n SHARED TWO := '2';\\n SHARED THREE := '3';\\n\\n SHARED VIRTUAL STRING calculateAlias(INTEGER in_value) := \\n CHOOSE(in_value,ONE,TWO,THREE);\\n \\nEND;\\n\\nTHE_CHILD := MODULE(THE_PARENT)\\n\\n SHARED STRING calculateAlias(INTEGER in_value):= CHOOSE(in_value,RED,AMBER,GREEN);\\n\\n EXPORT testing(INTEGER in_value) := calculateAlias(in_value);\\nEND;\\n\\noutput(THE_CHILD.testing(1), NAMED('ONE'));\\noutput(THE_CHILD.testing(2), NAMED('TWO'));\\noutput(THE_CHILD.testing(3), NAMED('THREE'));
My first change was to eliminate the EXPORTs on the MODULE structures and the IMPORTs so I could test it all in a single builder window. My next change was required to eliminate a syntax error -- I got rid of the INTEGER data type in the calculateAlias function call in the testing function. \\n\\nOnce the code passed syntax check, I ran it. This got me what I expect was the same surprise result you got: the first character only of each of the colors ('R', 'A', and 'G'). \\n\\nSo next, I added the STRING data type as the return type to both instances of the calculateAlias function definitions. I ran the code again, and got the full string for each of the colors, not just the first character.\\n\\nSince you did not specify a return type in the VIRTUAL function's definition, the compiler defaulted to the type of the actual data returned by default -- STRING1. \\n\\nNote also that I changed your MAP to CHOOSE, because CHOOSE is the more efficient function for this circumstance. I also eliminated the FUNCTION structures because you don't need it when your function contains only a single expression.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-09-26 14:32:57\" },\n\t{ \"post_id\": 6377, \"topic_id\": 1460, \"forum_id\": 8, \"post_subject\": \"Abstract modules and function inheritance\", \"username\": \"Ignacio\", \"post_text\": \"This is more a thinking exercise for myself than anything else, so do not spend too much time on this if you are a bit busy. \\n\\nLet's say that we have an abstract module with a parent function that would return string constants of '1', '2' and '3' for inputs 1, 2 and 3. A child module would inherit from that parent. It would have a function with the same name returning string constants of 'RED', 'AMBER' and 'GREEN' for those values 1, 2 and 3.\\n\\nIf I call the function from the child module, with value 1, what would I get back? I would have gone for the first of the following :\\n\\na) Child module function, that is 'RED', as the one from the parent module would be overridden. \\nb) Parent module function, that is '1', as the compiler would not be smart enough with the inheritance, and may do something funny.\\nc) None of the above. \\n\\nI don't want you to spoil you too much with the result I got, if you want to try it yourself copying and pasting the code examples below. I will just say I was surprised when I checked what the actual answer was. My guess is that the compiler gets the definition from the parent function, then overrides it all with the child's one, except for the return type length, although in ECL we never specified an explicit type. A bit tricky in my opinion. \\n\\n//THE_PARENT.ecl\\nEXPORT THE_PARENT := MODULE,VIRTUAL\\n\\n\\tSHARED GREEN:='GREEN';\\n\\tSHARED AMBER:='AMBER';\\n\\tSHARED RED:='RED';\\n\\tSHARED ONE:='1';\\n\\tSHARED TWO:='2';\\n\\tSHARED THREE:='3';\\n\\n SHARED VIRTUAL calculateAlias(INTEGER in_value):=FUNCTION\\n\\t RETURN MAP(in_value=1=>ONE,\\n\\t\\t\\t\\t\\t\\tin_value=2=>TWO,\\n\\t\\t\\t\\t\\t\\tTHREE);\\n\\tEND;\\n\\t\\nEND;\\n
\\n\\n//THE_CHILD.ecl\\nIMPORT $.THE_PARENT as THE_PARENT;\\n\\nEXPORT THE_CHILD := MODULE(THE_PARENT)\\n\\n SHARED calculateAlias(INTEGER in_value):=FUNCTION\\n\\t RETURN MAP(in_value=1=>RED,\\n\\t\\t\\t\\t\\t\\tin_value=2=>AMBER,\\n\\t\\t\\t\\t\\t\\tGREEN);\\n\\tEND;\\n\\n\\tEXPORT testing(INTEGER in_value) := FUNCTION\\n\\t\\t RETURN calculateAlias(INTEGER in_value);\\n\\tEND;\\nEND;\\n
\\n\\n//TESTING.ecl\\nIMPORT $.THE_CHILD as THE_CHILD;\\noutput(THE_CHILD.testing(1), NAMED('ONE'));\\noutput(THE_CHILD.testing(2), NAMED('TWO'));\\noutput(THE_CHILD.testing(3), NAMED('THREE'));\\n
\", \"post_time\": \"2014-09-25 16:50:20\" },\n\t{ \"post_id\": 6385, \"topic_id\": 1463, \"forum_id\": 8, \"post_subject\": \"Re: Group Function Improving Performance\", \"username\": \"rtaylor\", \"post_text\": \"JoseThomas,\\n\\nYou generally want to use GROUP whenever you are working with very large amounts of data and GROUPing the data is possible to do and get the result you need from the operation you want to perform. In your case SORT/DEDUP is almost always a candidate for SORT/GROUP/DEDUP, depending of course on your DEDUP condition.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-09-29 12:51:50\" },\n\t{ \"post_id\": 6384, \"topic_id\": 1463, \"forum_id\": 8, \"post_subject\": \"Group Function Improving Performance\", \"username\": \"joseThomas\", \"post_text\": \"I have typically omitted group in my code as I distribute then do a sort & dedup locally. I read a white paper on using ECL that proposed as using grouping to improve performance. I tested performance by inserting a group between a sort and dedup -> Sort, Group, and then dedup on a small 300 meg file. The performance was dramatically faster. \\n\\nMy question is related to when do you use grouping and when do you not group.\", \"post_time\": \"2014-09-27 18:11:30\" },\n\t{ \"post_id\": 6390, \"topic_id\": 1464, \"forum_id\": 8, \"post_subject\": \"Re: Normalize Child Datasets\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nThanks a lot Richard..\\n\\nViswa\", \"post_time\": \"2014-09-30 13:41:32\" },\n\t{ \"post_id\": 6389, \"topic_id\": 1464, \"forum_id\": 8, \"post_subject\": \"Re: Normalize Child Datasets\", \"username\": \"rtaylor\", \"post_text\": \"Viswa,\\n\\nOK, here are two ways to do what you want. \\n\\nThe first is your original approach with the empty child dataset records added back in for the ROLLUP (which requires a PROJECT to get the "no kids" recs ready to be rolled in, and then also requires a SORT for the ROLLUP)://input data\\nIdentifiers := RECORD\\n\\tSTRING name;\\n\\tSTRING name_val;\\nEND;\\ndoc_rec := RECORD\\n\\tSTRING DocID;\\n\\tSTRING title;\\n\\tDATASET(Identifiers) identifiers_list;\\nEND;\\ndoc_file_ds := DATASET([{'101','abc',[{'a1','201'},{'a2','205'},{'a3','301'}]},\\n {'102','def',[]},\\n {'103','efg',[{'b1','401'},{'a2','204'}]}],doc_rec);\\n\\n//Your NORMALIZE Form 2 approach:\\nnormalize_rec := RECORD\\n\\tstring docid;\\n\\tstring title;\\n\\tstring a1 := '';\\n\\tstring a2 := '';\\n\\tstring a3 := '';\\n\\tstring b1 := '';\\nEND;\\n\\nnormalize_rec normalizeID1(doc_rec L , identifiers R) := TRANSFORM\\n SELF.docid:= L.docid;\\n SELF.title := L.title;\\n SELF.a1:= If ( R.name = 'a1', R.name_val ,'');\\n SELF.a2:= If ( R.name = 'a2', R.name_val ,'');\\n SELF.a3:= If ( R.name = 'a3', R.name_val ,'');\\n SELF.b1:= If ( R.name = 'b1', R.name_val ,'');\\nEND;\\n\\ndoc_file_normalized_ds1 := NORMALIZE(doc_file_ds , \\n LEFT.identifiers_list , \\n normalizeID1(LEFT , RIGHT));\\n//get the recs with no children\\nNoKids := PROJECT(doc_file_ds(NOT EXISTS(identifiers_list)),normalize_rec);\\nOUTPUT(NoKids,NAMED('NoKids'));\\n\\nnormalize_rec RollXF1(normalize_rec L, normalize_rec R) := TRANSFORM\\n SELF.a1:= If ( L.a1 = '',R.a1,L.a1);\\n SELF.a2:= If ( L.a2 = '',R.a2,L.a2);\\n SELF.a3:= If ( L.a3 = '',R.a3,L.a3);\\n SELF.b1:= If ( L.b1 = '',R.a1,L.b1);\\n SELF := L;\\nEND;\\n\\n//concatenate the NoKids with the NORMALIZE result and SORT it for thr ROLLUP:\\nR1 := ROLLUP(SORT(doc_file_normalized_ds1 + NoKids,DocID),docid,RollXF1(LEFT,RIGHT));\\nOUTPUT(R1,NAMED('Rollup_1'));\\n\\n//one-pass approach, using Form 1 of NORMALIZE\\nnormalize_rec normalizeID2(doc_rec L , INTEGER C) := TRANSFORM\\n SELF.docid:= L.docid;\\n SELF.title := L.title;\\n R := L.identifiers_list[C];\\n SELF.a1:= If ( R.name = 'a1', R.name_val ,'');\\n SELF.a2:= If ( R.name = 'a2', R.name_val ,'');\\n SELF.a3:= If ( R.name = 'a3', R.name_val ,'');\\n SELF.b1:= If ( R.name = 'b1', R.name_val ,'');\\nEND;\\n\\ndoc_file_normalized_ds2 := NORMALIZE(doc_file_ds ,\\n IF(NOT EXISTS(LEFT.identifiers_list),\\n 1,\\n COUNT(LEFT.identifiers_list)) , \\n normalizeID2(LEFT , COUNTER));\\n\\nR2 := ROLLUP(doc_file_normalized_ds2,docid,RollXF1(LEFT,RIGHT));\\nOUTPUT(R2,NAMED('Rollup_2'));
The second approach does it all in one pass by simply using Form 1 of NORMALIZE, eliminating the need for the PROJECT and SORT. The "trick" here lies in the "counter" logic to force a single TRANSFORM call when there are no child records. The TRANSFORM is already written to put in blanks in all the child slots so the end result is the same.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-09-29 19:54:34\" },\n\t{ \"post_id\": 6388, \"topic_id\": 1464, \"forum_id\": 8, \"post_subject\": \"Re: Normalize Child Datasets\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nSorry for that, should have posted some example codes.\\n\\n\\n\\nnormalize_rec := RECORD\\n string docid;\\n string title;\\n string a1;\\n string a2;\\n string a3;\\n string b1;\\nEND;\\n\\nnormalize_rec normalizeID(doc_rec L , identifiers R) := TRANSFORM\\n SELF.docid:= L.docid;\\n\\tSELF.title := L.title;\\n\\tSELF.a1:= If ( R.name = 'a1', R.name_val ,'');\\n\\tSELF.a2:= If ( R.name = 'a2', R.name_val ,'');\\n\\tSELF.a3:= If ( R.name = 'a3', R.name_val ,'');\\n\\tSELF.b1:= If ( R.name = 'b1', R.name_val ,'');\\n\\nEND;\\n\\ndoc_file_normalized_ds := NORMALIZE(doc_file_ds , LEFT.identifiers_list , normalizeID(LEFT , RIGHT));\\n\\n
\\n\\nThis will give me just the result for ids 101 and 103 only and id 102 will be missed from the normalized output because there is no child dataset for id 102.\\n\\nIs there a way to retain those records too that were missed during the normalize process ?\\n\\nKindly suggest.\\n\\nViswa\", \"post_time\": \"2014-09-29 17:50:48\" },\n\t{ \"post_id\": 6387, \"topic_id\": 1464, \"forum_id\": 8, \"post_subject\": \"Re: Normalize Child Datasets\", \"username\": \"rtaylor\", \"post_text\": \"Viswa,\\n\\nI'm not sure I fully understand what you're trying to accomplish here. Can you please post your example code and explain what result you want to see?\\n\\nThanks,\\n\\nRichard\", \"post_time\": \"2014-09-29 17:29:02\" },\n\t{ \"post_id\": 6386, \"topic_id\": 1464, \"forum_id\": 8, \"post_subject\": \"Normalize Child Datasets\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nI have a dataset in this format, attached is the sample test data.\\n\\nHow do we consider all the records when a normalize (form 2) operation is performed when a child dataset is blank ?\\n\\nI was able to get the result in the below format by using normalize form 2 and a rollup, but those records were missed for which the child dataset was blank from the base data.\\n\\nKindly suggest.\\n\\nViswa\", \"post_time\": \"2014-09-29 17:08:15\" },\n\t{ \"post_id\": 6542, \"topic_id\": 1470, \"forum_id\": 8, \"post_subject\": \"Re: Merging Indexed logical files into single logical file\", \"username\": \"DSC\", \"post_text\": \"Hi Arjun,\\n\\nYou're right; what I wrote was incorrect. You should treat the file as a dataset, in the BUILD command, but you reference it as an INDEX (just like what you discovered).\\n\\nSorry for the confusion.\\n\\nDan\", \"post_time\": \"2014-10-31 11:29:42\" },\n\t{ \"post_id\": 6541, \"topic_id\": 1470, \"forum_id\": 8, \"post_subject\": \"Re: Merging Indexed logical files into single logical file\", \"username\": \"ArjunKumar\", \"post_text\": \"Hi dan,\\n\\nAs you said above (point 2) i tried to refer the super key with a DATASET command which gives the below error.\\n\\n[color=#FF0040:2vx75na8]"10124: System error: 10124: Graph[1], diskread[2]: diskread: Layout does not match published layout. Superfile: " \\n\\nPlease let me know whether i am using correct syntax.\\n\\nweeklySK_DS := DATASET('~somelogicalSuperKey', {MinActivityRecord, UNSIGNED filepos{virtual(fileposition)}}, THOR);
\\n\\nBut if we are using INDEX it is working as below.\\n\\nweeklySK_DS := INDEX( {INTEGER TargetDocID_Hash, BOOLEAN LawSchool, BOOLEAN Internal}, \\n{MinActivityRecord,UNSIGNED filepos}, '~somelogicalSuperKey');
\\n\\nThanks,\\nArjun\", \"post_time\": \"2014-10-31 11:11:45\" },\n\t{ \"post_id\": 6430, \"topic_id\": 1470, \"forum_id\": 8, \"post_subject\": \"Re: Merging Indexed logical files into single logical file\", \"username\": \"rtaylor\", \"post_text\": \"Arjun,\\n\\nYou can also look at the MERGE function.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-10-13 21:02:20\" },\n\t{ \"post_id\": 6426, \"topic_id\": 1470, \"forum_id\": 8, \"post_subject\": \"Re: Merging Indexed logical files into single logical file\", \"username\": \"DSC\", \"post_text\": \"You can combine indexes using (mostly) the same mechanism as combining regular data files:\\n\\n1) Create a superkey containing your indexes.\\n\\n2) In ECL, reference the superkey with a DATASET command. This basically treats the superkey as a data file.\\n\\n3) Write a BUILD statement that creates a new index from the data. The result will be a single index file composed of all the records from all of your indexes.\\n\\nLogistically, you should probably use superkeys all the time and manipulate the superkey's contents:\\n\\n1) Create the superkey and reference it all time in your ECL code.\\n\\n2) When you create a new index file, add it to the superkey. This makes the new index available to code.\\n\\n3) Periodically, as you've found, you'll want to combine the individual index files. Perform the above steps to do that, but when you're done clear the superkey's contents and then add the new (combined) index you just created.\\n\\nHope this helps.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2014-10-13 12:58:51\" },\n\t{ \"post_id\": 6405, \"topic_id\": 1470, \"forum_id\": 8, \"post_subject\": \"Merging Indexed logical files into single logical file\", \"username\": \"ArjunKumar\", \"post_text\": \"Hi Team,\\n\\nwe have a requirement to merge all index sub files into a single index logical file. please suggest how to achieve this. Using Super Key concept we are facing performance issue so we thought of merging as a single index logical file.\\n\\n\\nThanks in Advance,\\nArjun\", \"post_time\": \"2014-10-08 06:31:29\" },\n\t{ \"post_id\": 6451, \"topic_id\": 1473, \"forum_id\": 8, \"post_subject\": \"Re: Multiple BLOB Field Modifiers\", \"username\": \"oleg\", \"post_text\": \"Thanks, Jake,\\n\\nThat's exactly what I wanted to hear \", \"post_time\": \"2014-10-15 09:54:10\" },\n\t{ \"post_id\": 6429, \"topic_id\": 1473, \"forum_id\": 8, \"post_subject\": \"Re: Multiple BLOB Field Modifiers\", \"username\": \"jsmith\", \"post_text\": \"All BLOBs in the record definition will be stored in the same blob block, space permitting, so there is no overhead in terms of seeks or space as far as the blob storage is concerned. However, there is a 8-byte id stored in the record as a reference id to the blob data, so increasing the number of blobs does increase the record size store in the index.\\n\\nHope that helps.\", \"post_time\": \"2014-10-13 21:00:52\" },\n\t{ \"post_id\": 6408, \"topic_id\": 1473, \"forum_id\": 8, \"post_subject\": \"Multiple BLOB Field Modifiers\", \"username\": \"oleg\", \"post_text\": \"Is there any difference in using multiple BLOB Field Modifiers for the multiple fields within the same record or group them together into the sub-record and then declare it as as BLOB?\\n\\nFrom the manual:\\n-----------------------\\n{ BLOB } Specifies the field is stored separately from the leaf node entry in\\nthe INDEX. This is applicable specifically to fields in the payload\\nof an INDEX to allow more than 32K of data per index entry. The\\nBLOB data is stored within the index file, but not with the rest of\\nthe record. Accessing the BLOB data requires an additional seek.\\n---------------------------\\n\\nSo, if I have multiple BLOB fields, would they will be stored in the same block or each one will have it's own partition and so multiple seeks will be required?\", \"post_time\": \"2014-10-08 16:25:05\" },\n\t{ \"post_id\": 6572, \"topic_id\": 1474, \"forum_id\": 8, \"post_subject\": \"Re: Probable Race Condition in Thor\", \"username\": \"jsmith\", \"post_text\": \"It is difficult to say from that 1 log, whether it is actually stuck or just very delayed.\\nHave you confirmed there's zero progress on the subgraph that's running in Eclwatch/IDE ? i.e. absolutely no counts are increasing.\\n\\nThis slave is certainly reporting that it is blocked, to be specific the splitter (84) is saying that it is being blocked, probably because one or more of it's downstream arms is not pulling.\\n\\nThe graph view, will also indicate skew, e.g. which slaves are +% ahead and which are -% behind (ahead/behind the average). It is also worth examining the timeMaxMs figures for each activity on the graph to see where/if a lot of time is being consumed.\\n\\nHope that helps to point toward the underlying issue.\", \"post_time\": \"2014-11-06 16:39:42\" },\n\t{ \"post_id\": 6409, \"topic_id\": 1474, \"forum_id\": 8, \"post_subject\": \"Probable Race Condition in Thor\", \"username\": \"lokesh\", \"post_text\": \"Hi,\\n\\nI have created a single node system with 20 cores and 15 slaves.\\nIt has 64 GB of RAM, and 2TB of disk space.\\n\\nNow I am running Association Rule Mining (EclatN) on the data I have with following properties:\\n1) 298000 rows\\n2) 94 columns\\n3) Most columns have 3 levels.\\n\\nI am running ARM only on the 10000 rows subset.\\n\\nThe process got stuck after certain point.\\n\\nI have attached the log from the first slave.\\n\\nPS: The file was sprayed on the system with single slave, and then environment file was changed to have 15 slaves.\", \"post_time\": \"2014-10-08 17:40:47\" },\n\t{ \"post_id\": 6420, \"topic_id\": 1475, \"forum_id\": 8, \"post_subject\": \"Re: help: to understand output for naive bayes algorithm\", \"username\": \"tlhumphrey2\", \"post_text\": \"If you are always getting the same value then the learning is saying that everything belongs to the same class.\\n\\nNumber if the field number of the class label. For example, if your class label was the 1st field then number will be 1. If it was the 4th field then number will be 4.\", \"post_time\": \"2014-10-09 22:34:53\" },\n\t{ \"post_id\": 6419, \"topic_id\": 1475, \"forum_id\": 8, \"post_subject\": \"Re: help: to understand output for naive bayes algorithm\", \"username\": \"chennapooja\", \"post_text\": \"Hello,\\n\\n number is still not very clear to me. number of the class label means? and does value will always be an integer? Because every time I get the same result.\\n\\n can you post me your example for my better understanding.\\n\\n Thanks for the help.\", \"post_time\": \"2014-10-09 17:56:45\" },\n\t{ \"post_id\": 6418, \"topic_id\": 1475, \"forum_id\": 8, \"post_subject\": \"Re: help: to understand output for naive bayes algorithm\", \"username\": \"tlhumphrey2\", \"post_text\": \"I can explain some of it. Refer to my example below.\\n\\n'id' is the same as 'id' in your input. 'number' is the 'number' of the class label. 'value' is the predicted class label. I'm not sure what 'conf' or 'closest_conf' are. I believe 'conf' is suppose to be the conference in percentages. \\n \\n
id number value conf closest_conf\\n1 4 2 7.55 10.84\\n2 4 2 7.55 10.84\\n3 4 2 8.53 9.16\\n4 4 2 9.66 14.25\\n
\", \"post_time\": \"2014-10-09 17:42:22\" },\n\t{ \"post_id\": 6414, \"topic_id\": 1475, \"forum_id\": 8, \"post_subject\": \"Re: help: to understand output for naive bayes algorithm\", \"username\": \"chennapooja\", \"post_text\": \"Hello,\\n\\n Yes I have read that, but when I give below line \\n\\n Results := BayesModule.ClassifyD(indep_t,Model);\\n Results;\\n\\n I am getting following output,\\nid number value conf closest_conf\\n 0\\t1\\t0\\t50.66890346797192\\t0.0\\n5\\t1\\t0\\t3.748890859009343\\t0.0\\n6\\t1\\t0\\t20.156382301427\\t0.0\\n\\n So I am trying to understand this output. What actually I need is using Bayes, I want to predict the class label for my test data. So in the above output, which one should I consider as predicted class label for my test data.\\n\\n Also, I have string as my class label, all the outputs are integers, so I am bit confused about the outputs.\\n\\n I am new to machine learning, so please correct me if my understanding is wrong somewhere.\\n\\nThanks for the help.\", \"post_time\": \"2014-10-09 16:01:18\" },\n\t{ \"post_id\": 6413, \"topic_id\": 1475, \"forum_id\": 8, \"post_subject\": \"Re: help: to understand output for naive bayes algorithm\", \"username\": \"chennapooja\", \"post_text\": \"Hello,\\n\\n Yes I have read that, but when I give below line \\n\\n Results := BayesModule.ClassifyD(indep_t,Model);\\n Results;\\n\\n I am getting following output,\\n\\n 0\\t1\\t0\\t50.66890346797192\\t0.0\\n5\\t1\\t0\\t3.748890859009343\\t0.0\\n6\\t1\\t0\\t20.156382301427\\t0.0\", \"post_time\": \"2014-10-09 15:57:54\" },\n\t{ \"post_id\": 6412, \"topic_id\": 1475, \"forum_id\": 8, \"post_subject\": \"Re: help: to understand output for naive bayes algorithm\", \"username\": \"tlhumphrey2\", \"post_text\": \"Have you read, http://hpccsystems.com/download/docs/machine-learning? On page 21, it gives you a description of the outputs: Raw, CrossAssignment, PrecisionByClass, and Headline.\\n\\nYou may want to use something like the following FUNCTIONMACRO to compare predicted class label with actual class label.\\n\\nEXPORT\\ncalcProportionCorrect(actualDep, predictedDep) := FUNCTIONMACRO\\n actual_v_predicted_rec := RECORD\\n unsigned id;\\n unsigned number;\\n real a_value;\\n real e_value;\\n real a_e_diff;\\n END;\\n\\n actual_v_predicted :=\\n JOIN(actualDep\\n ,predictedDep\\n ,LEFT.id=RIGHT.id\\n ,TRANSFORM(actual_v_predicted_rec\\n ,SELF.a_value:=LEFT.value\\n ,SELF.e_value:=RIGHT.value\\n ,SELF.a_e_diff := ABS(LEFT.value-RIGHT.value)\\n ,SELF:=LEFT\\n )\\n );\\n //OUTPUT(actual_v_predicted,NAMED('actual_v_predicted'));\\n\\n proportion_error := COUNT(actual_v_predicted(a_value<>e_value))/COUNT(actual_v_predicted);\\n //OUTPUT(proportion_error,NAMED('proportion_error'));\\n proportion_correct := 1.0 - proportion_error;\\n //OUTPUT(proportion_correct,NAMED('proportion_correct'));\\n return proportion_correct;\\nENDMACRO;
\", \"post_time\": \"2014-10-09 15:01:24\" },\n\t{ \"post_id\": 6410, \"topic_id\": 1475, \"forum_id\": 8, \"post_subject\": \"help: to understand output for naive bayes algorithm\", \"username\": \"chennapooja\", \"post_text\": \"Hello All,\\n\\n can someone brief me the output which we get when we call naïve bayes algorithm. How will Naïve bayes represent the classification in the final output.\\n\\nBelow is my program where id5 is class label and it should be predicted for the test data I give as input. I am leaving the 5th column as blank while giving input for test data where as training data has all the values.\\n\\n IMPORT * FROM ML;\\nIMPORT ML.Mat;\\n//IMPORT ML.Tests.Explanatory as TE;\\n\\n//This is the tennis-weather dataset transformed to discrete number values.\\nweatherRecord := RECORD\\n\\tTypes.t_RecordID id1;\\n\\tTypes.t_FieldNumber id2;\\n\\tTypes.t_FieldNumber id3;\\n\\tTypes.t_FieldNumber id4;\\n\\tTypes.t_FieldNumber id5;\\n\\t\\nEND;\\n\\ntrain_Data := dataset('~thor::iris', weatherRecord,\\nCSV(heading(1),separator(','),quote('')));\\n\\ntest_Data := dataset('~thor::iris_test', weatherRecord,\\nCSV(heading(1),separator(','),quote('')));\\n\\nindep_data:= TABLE(train_Data,{id1,id2,id3,id4});\\ndep_data:= TABLE(train_Data,{id1, id5});\\n\\nindep_test:= TABLE(test_Data,{id1,id2,id3,id4});\\n\\nToField(indep_data, pr_indep);\\nindep := ML.Discretize.ByRounding(pr_indep);\\nToField(dep_data, pr_dep);\\ndep := ML.Discretize.ByRounding(pr_dep);\\n\\nToField(indep_test, test_indep);\\nindep_t := ML.Discretize.ByRounding(test_indep);\\n\\nBayesModule := ML.Classify.NaiveBayes;\\n\\nTestModule := BayesModule.TestD(indep,dep);\\nTestModule.Raw;\\nTestModule.CrossAssignments;\\nTestModule.PrecisionByClass;\\nTestModule.Headline;\\n\\nModel := BayesModule.LearnD(indep,dep);\\nResults := BayesModule.ClassifyD(indep_t,Model);\\nResults;\\n\\nIn the output for Results, I have id, number, value, conf, closest_conf. Please give description for these fields and which one should be taken as prediction for my class label.\\n\\nThanks in advance.\", \"post_time\": \"2014-10-08 23:58:15\" },\n\t{ \"post_id\": 6472, \"topic_id\": 1478, \"forum_id\": 8, \"post_subject\": \"Re: help: understand output for decision trees\", \"username\": \"tlhumphrey2\", \"post_text\": \"The monk dataset, ML.Test.Explanatory.MonkDS.ecl, we got from the UCI repository.\\n\\nYour training dataset must be randomly selected if you want good training. I use the following ECL code to randomly order a dataset, and then I take the top N records of the randomly ordered dataset as my training data and use the rest for my test data. N is the size of your training dataset.\\n\\nEXPORT\\nrandomize( ds ) := FUNCTIONMACRO\\n ran_rec := RECORD\\n REAL ran;\\n ds\\n END;\\n\\n return\\n PROJECT(\\n SORT(\\n PROJECT(ds\\n ,TRANSFORM(ran_rec\\n ,SELF.ran:=RANDOM()\\n ,SELF := LEFT\\n )\\n )\\n ,ran\\n )\\n ,recordof(ds)\\n );\\nENDMACRO;
\\n\\nHere is an example of how to convert string labels to integers. Let use say your string labels were ('fair', 'hot', 'cold'). You have 3 labels, converted to integers this would be (1, 2, 3). Where 1 is equal to 'fair', 2 is equal to 'hot' and 3 is equal to 'cold'.\", \"post_time\": \"2014-10-16 18:30:19\" },\n\t{ \"post_id\": 6470, \"topic_id\": 1478, \"forum_id\": 8, \"post_subject\": \"Re: help: understand output for decision trees\", \"username\": \"chennapooja\", \"post_text\": \"Hello,\\n\\nThanks for the response. Can I get some sample data set name from UCI repository which I can use for classification with decision trees and naïve bayes?\\n\\n3. My test data is separate all together, it does not have original data again. I am taking a sample data set from UCI repository, dividing it into two parts, one for training and other for test. Using train data, I am learning the model and using test data I am classifying with classifyD. Is this approach wrong?\\nLinke monkDS dataset, should test data contain all of train data also?\\n\\n4. How to convert a character into an integer? Also what if I have string as class label? ( I encountered this in Iris dataset - we have three strings in class label).\", \"post_time\": \"2014-10-16 17:58:49\" },\n\t{ \"post_id\": 6468, \"topic_id\": 1478, \"forum_id\": 8, \"post_subject\": \"Re: help: understand output for decision trees\", \"username\": \"tlhumphrey2\", \"post_text\": \"1. About MonkDS Results:\\n\\nIf you are attempting to use ML.Tests.Explanatory.DecisionTree to get your MonkDS results, I can tell you there are problems with it. I've attached a simplified version of that code.\\n\\n2. Cross assign:\\n\\nIf you have your training and test data separate, how did you pick the records you used for training? And, I assume your test data was all the original weather record minus those in the training data. Is that correct? \\n\\nWhen I look at my results for MonkDS, using the code I attached, I see both 0 and 1 in the predicted classifications.\\n\\n3. If your class label is character, you need to convert it to integer.\\n\\n4. I've worked with datasets from UCI and have gotten acceptable results.\", \"post_time\": \"2014-10-16 17:48:32\" },\n\t{ \"post_id\": 6455, \"topic_id\": 1478, \"forum_id\": 8, \"post_subject\": \"Re: help: understand output for decision trees\", \"username\": \"chennapooja\", \"post_text\": \"Thanks for the response.\\n\\nI have few questions here:\\n\\n1. When I use weather data set, I am getting output which is fine because in that there is no test data, but when I use monkDS, the prediction for class is not proper. Is it like output wont be same as given in monkDS test data?(in test data we have value for class label also, so output through decision trees classification-classifyD value and test data class label are not same for all records)\\n\\n2. Cross assig output is clearly showing how many records are classified as 0 and how many classified as 1 for weather data. But when I have train data and test data separate, cross assig output is showing all records classified as 1 though there are some records under 0 classifier.(I have tested this for monkDS).\\n\\n3. I have taken some sample classification data sets from UCI(Irvine University) repository, like Breast Cancer data, but I am not getting any output for classification. Also if I have my class label as character type, should I give it other than Types.t_FieldNumber classlbl; in my record definition because there is nothing specific for characters and strings in Types.ecl.\\n\\n4. can I get any sample working UCI datasets for decision trees, kmeans, naïve bayes?\\n\\nThanks in advance.\", \"post_time\": \"2014-10-16 02:29:11\" },\n\t{ \"post_id\": 6442, \"topic_id\": 1478, \"forum_id\": 8, \"post_subject\": \"Re: help: understand output for decision trees\", \"username\": \"tlhumphrey2\", \"post_text\": \"LearnD learns the model from the training set, while ClassifyD uses the learned model to classify a test set.\\n\\nCurrently, the only documentation for the ML library is machinelearning.pdf and of course the code.\", \"post_time\": \"2014-10-14 17:18:07\" },\n\t{ \"post_id\": 6440, \"topic_id\": 1478, \"forum_id\": 8, \"post_subject\": \"Re: help: understand output for decision trees\", \"username\": \"chennapooja\", \"post_text\": \"Thanks for sharing...\\n\\nDo we have classification in decision trees or it is just for building model? That means, can I classify my test data into separate categories using decision trees?\\n\\nAlso, is there any manual for understanding outputs obtained for algorithms, if so please share....in machinelearning.pdf, explanation is shared for only few.\", \"post_time\": \"2014-10-14 15:46:14\" },\n\t{ \"post_id\": 6438, \"topic_id\": 1478, \"forum_id\": 8, \"post_subject\": \"Re: help: understand output for decision trees\", \"username\": \"tlhumphrey2\", \"post_text\": \"I believe you are talking about the function, AUC_ROC, which stands for "area under the curve" "receiver operating characteristic". A simple but accurate explanation of what this is http://metaoptimize.com/qa/questions/988/simple-explanation-of-area-under-the-roc-curve.\\n\\nI'll have to respond a second time after I fix syntax errors I'm getting.\", \"post_time\": \"2014-10-14 14:18:31\" },\n\t{ \"post_id\": 6437, \"topic_id\": 1478, \"forum_id\": 8, \"post_subject\": \"Re: help: understand output for decision trees\", \"username\": \"tlhumphrey2\", \"post_text\": \"I believe you are talking about the function, AUC_ROC, which stands for "area under the curve" "receiver operating characteristic". A simple but accurate explanation of what this is http://metaoptimize.com/qa/questions/988/simple-explanation-of-area-under-the-roc-curve.\\n\\nI'll have to respond a second time after I fix syntax errors I'm getting.\", \"post_time\": \"2014-10-14 14:18:13\" },\n\t{ \"post_id\": 6431, \"topic_id\": 1478, \"forum_id\": 8, \"post_subject\": \"help: understand output for decision trees\", \"username\": \"chennapooja\", \"post_text\": \"Hello,\\n\\n There is newly introduced code for classify test data in github for decision trees.\\ncan I get some help understanding that what AUC is and what do we get from that as result.\\n\\n I have id,classifier,threso,fpr,tpr,deltapos,deltaneg,cumneg,auc as headings in output. Need description for these outputs. Please help me.\\n\\nRegards,\\nPooja.\", \"post_time\": \"2014-10-14 03:44:17\" },\n\t{ \"post_id\": 6489, \"topic_id\": 1483, \"forum_id\": 8, \"post_subject\": \"Re: PigMix Benchmark\", \"username\": \"tmiddleton\", \"post_text\": \"Hi,\\n\\nWe ran this benchmark several years ago. We used a custom translation program called Bacon to translate the Pig scripts into ECL at the time. Since then the Pig language and ECL have changed with new enhancements, and the current PigMix scripts may need to be translated manually. The biggest issue in running the benchmark is generating the PigMix data in the data structures which are used by the HPCC platform and ECL. When we first did this, it was a fairly major effort to get everything to work. We used a cluster that had both a current version at the time of both Hadoop and HPCC installed. One other person has done this independently, she was a graduate student at Hebrew University in Israel, and I believe she did publish a paper with her results.\\n\\nWe welcome independent benchmarks, but using PigMix is a challenging effort and requires considerable language and platform knowledge. Hadoop has many complex configuration parameters which can affect performance as well. It might be better to try a benchmark like the Terabyte Sort (or other sort at http://sortbenchmark.org/ ) which can demonstrate performance differences with the same number of nodes between Hadoop and HPCC. However, the ECL is very simple for this and does not show the difference between ECL and other languages like Pig for more complex data intensive computing. You could also zero in on a couple of the Pigmix tests, and write equivalent ECL and see what results you get. Again the challenge will be generating the data in data structures and types used by ECL versus the data structures and types used by PIG.\", \"post_time\": \"2014-10-21 17:50:21\" },\n\t{ \"post_id\": 6477, \"topic_id\": 1483, \"forum_id\": 8, \"post_subject\": \"PigMix Benchmark\", \"username\": \"tinebp\", \"post_text\": \"HI,\\n\\nI have a recent build of HPCC Systems Platform installed on my computer and I'm interested in running some benchmark on it.\\n\\nI saw the PigMix ECL sources posted on the website (http://hpccsystems.com/Why-HPCC/HPCC-vs ... pigmix_ecl) and would like to run them on my computer.\\n\\nI would like to generate a batch job on my server that runs the full benchmark and generates to nice performance report at the end, similar to those available here (https://cwiki.apache.org/confluence/display/PIG/PigMix).\\n\\nI'm not sure where to start, has someone already done something similar? \\nHow do I log performance metrics in HPCC?\\nHow do I create a batch job?\\n\\nThanks,\\n-Blaise\", \"post_time\": \"2014-10-20 07:04:11\" },\n\t{ \"post_id\": 6521, \"topic_id\": 1487, \"forum_id\": 8, \"post_subject\": \"Re: testing with UCI data sets\", \"username\": \"tlhumphrey2\", \"post_text\": \"All three of these functions, Naïve Bayes, DecisionTrees, and K-Means have not successfully been tested with large datasets. There are issues with all three. Their results have been compared to the results of R equivalent ONLY ON SMALL DATASETS.\\n\\nWhat I suggest you do is use a smaller randomly selected subset of your data as the training set (I'm thinking no more that 5000 records). You should be able to use all the data during classification/testing.\\n\\nYou should get good prediction with a smaller dataset UNLESS a class or classes is rare.\", \"post_time\": \"2014-10-28 18:24:25\" },\n\t{ \"post_id\": 6512, \"topic_id\": 1487, \"forum_id\": 8, \"post_subject\": \"Re: testing with UCI data sets\", \"username\": \"chennapooja\", \"post_text\": \"Hello,\\n\\n can anyone please help me with above queries?\\n My job with 48000 records dataset ran for three days, and still I did not get any result, there are no errors too. \\n Thanks in advance.\", \"post_time\": \"2014-10-28 16:12:49\" },\n\t{ \"post_id\": 6496, \"topic_id\": 1487, \"forum_id\": 8, \"post_subject\": \"testing with UCI data sets\", \"username\": \"chennapooja\", \"post_text\": \"Hello,\\n\\n I am using 3 algorithms naïve bayes, decision trees, and kmeans for testing very large datasets from UCI data repository.\\n I took Adult dataset which has 48000 records, 32000 as training and 16000 as testing records. Tried to run decision tree algorithm on it. I did not get result even after some 900 minutes. It still shows the status as running. Any idea how much time will it take to run such a big data.\\n Also any suggestions which dataset has to be used and is already tested with above algorithms from UCI.\\n\\nThanks in advance.\", \"post_time\": \"2014-10-24 22:09:16\" },\n\t{ \"post_id\": 6507, \"topic_id\": 1489, \"forum_id\": 8, \"post_subject\": \"Re: 'Memory Limit Exceeded' err while trying to merge huge d\", \"username\": \"ArjunKumar\", \"post_text\": \"Hi Dan,\\n\\nyes we tried that approach also like we have a monthly super key which contains weekly subfiles, where this weekly sub files has daily sub files n so on. But still we faced performance issue. so we moved to this merging concept. \\n\\nThanks,\\nArjun\", \"post_time\": \"2014-10-27 13:51:53\" },\n\t{ \"post_id\": 6506, \"topic_id\": 1489, \"forum_id\": 8, \"post_subject\": \"Re: 'Memory Limit Exceeded' err while trying to merge huge d\", \"username\": \"DSC\", \"post_text\": \"That makes perfect sense.\\n\\nI still think the way to go here is via superfiles, though. Remember, superfiles can be members of other superfiles, so it is possible to create some pretty nifty "groupings" that will go a long way toward solving any performance problems like this. In the Programmer's Guide manual, within the superfiles section, there is some discussion about nesting superfiles and how you use that to manage a large influx of data like what you're describing. In essence, you pump your individual data files into a superfile that "wraps" a small unit of time like an hour, day, or week (depends on your volume) then you periodically roll up that superfile into the next-higher superfile (e.g. day -> week -> month), and do the same periodically for other superfiles up the chain. The idea is to keep the number of individual files within any one superfile relatively small.\\n\\nI'm describing it poorly. The Programmer's Guide manual describes the process much better, and has examples.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2014-10-27 13:20:27\" },\n\t{ \"post_id\": 6505, \"topic_id\": 1489, \"forum_id\": 8, \"post_subject\": \"Re: 'Memory Limit Exceeded' err while trying to merge huge d\", \"username\": \"ArjunKumar\", \"post_text\": \"Hi Dan,\\n\\nYes you are correct, we used Super Key concept in similar way. Actualy our requirement is like we have hourly data, after we did some business logic on it each hourly data will be added to super key. But when days are increased the no.of sub files are incresed in super key, at this time we are facing performance issue(since the super key holds no.of sub files).\\n To over come the performance issue we are trying to merge hourly files into daily files and then daily files into weekly file and weekly files into monthly file. so that we can reduce the no. of sub files from Super Key. When we tried to merge the daily files into weekly we got [color=#FF0000:2oc9z9hw]'Memory Limit Exceeded' Error because the daily logical files contain huge data/size as mentioned in the above post.\\n\\nThanks,\\nArjun\", \"post_time\": \"2014-10-27 13:02:18\" },\n\t{ \"post_id\": 6503, \"topic_id\": 1489, \"forum_id\": 8, \"post_subject\": \"Re: 'Memory Limit Exceeded' err while trying to merge huge d\", \"username\": \"DSC\", \"post_text\": \"The end result of your example is a dataset where each row contains a child dataset. You don't mention what you really want to do with this result, but I imagine that your actual goal is to operate on the contents of file as if it were one gigantic dataset, right?\\n\\nIf so, the easiest way to do that is create a superfile containing each of your individual files, then reference only the superfile in your ECL code. Superfiles are covered extensively in the ECL Programmer's Guide manual. If you don't want to create an actual superfile you could use a temporary superfile (described in the ECL Language Reference manual) instead.\\n\\nHope this helps.\\n\\nDan\", \"post_time\": \"2014-10-27 12:21:12\" },\n\t{ \"post_id\": 6502, \"topic_id\": 1489, \"forum_id\": 8, \"post_subject\": \"'Memory Limit Exceeded' err while trying to merge huge data\", \"username\": \"ArjunKumar\", \"post_text\": \"Hi Team,\\n\\nwe are facing [color=#FF0000:3swi0ge6]'memory limit exceeded' error while trying to merge data by reading logical files. I we are dealing with small size data it works fine, but when the data is huge we are facing this issue. please find the below code.\\n\\n-------------------------------------------------------------------------------------\\nFileName := RECORD\\n STRING name;\\nEND;\\n\\nsubfilesList := DATASET([{'app::data1'},{'app::data2'},{'app::data3'},{'app::data4'}],FileName);\\n\\nResType := RECORD\\n DATASET(Layout_Module.MinActivityRecord) resultDS;\\nEND;\\n\\nResType TFNew(FileName L) := TRANSFORM\\t\\n SELF.resultDS := DATASET(('~'+L.Name), Layout_Module.MinActivityRecord, FLAT);\\nEND; \\n\\nsubfilesNRM := PROJECT(subfilesList, TFNew(LEFT));\\n-------------------------------------------------------------------------------------\\n\\nplease suggest is there any other approach to achieve this. Any API is available to link the existing dataset/logical files without reading or some thing. How to overcome this issue.\\n\\nbelow are the size details of the file we are using.\\n\\nFile\\tSize\\t rowcount\\n----- ----- --------\\nfile1\\t6474926\\t 87499\\nfile2\\t225402298\\t 3045977\\nfile3\\t589980836\\t 7972714\\nfile4\\t1015461300\\t 13722450\\nfile5\\t984673452\\t 13306398\\nfile6\\t1144512860\\t 15466390\\nfile7\\t1183469864\\t 15992836\\n\\nThanks\\nArjun\", \"post_time\": \"2014-10-27 06:51:14\" },\n\t{ \"post_id\": 8422, \"topic_id\": 1491, \"forum_id\": 8, \"post_subject\": \"Re: Conditional execution\", \"username\": \"ghalliday\", \"post_text\": \"Since the expression is a scalar you need to use IFF()\\n\\nresult := IFF(ds[1].number='1', doFunction1, doFunction2);\\n\\nI would also avoid relying on side-effects associated with expressions unless there is no other way of achieving the same thing.\", \"post_time\": \"2015-11-04 16:05:20\" },\n\t{ \"post_id\": 8420, \"topic_id\": 1491, \"forum_id\": 8, \"post_subject\": \"Re: Conditional execution\", \"username\": \"Ignacio\", \"post_text\": \"Hi all, \\n\\nThanks again for your previous comments. For some time that looked like working in our application, but although we are getting the right answer, when looking at the graph, I'm not sure that the inner behavior is what we want. \\n\\nIn Gavin's example, we are not getting that output to "Pay for a service" (which is correct) but still we are getting an empty tab, as if actually some code was executed in that branch of the IFF. When looking at the graph in Roxie, I can see as attached in the picture below.\\n\\nWe have several real life examples, some more complex than other. The simplest one is when depending on an input we want to output one value calculated from a function or from other. \\n\\nIFF(NOFOLD(pIn_ClientId)=ClientID1,\\n\\t\\tgetDataById1(pIn_ID),\\nIFF(NOFOLD(pIn_ClientId)=ClientID2,\\n\\t\\tgetDataById2(pIn_ID),\\n\\t\\tDS_EMPTY_RESULT));\\n
\\nAnd although with your approach we fixed some issues that happened when using IF, we can still see in the graph that for some reason both branches are evaluated even though the return value is a dataset (as opposed to scalars as in the example).\\n\\nAre we missing anything? How do you think this can be possible? Can you think of any other workaround?\\n\\nBest regards.\\n\\nIgnacio.\", \"post_time\": \"2015-11-04 11:55:20\" },\n\t{ \"post_id\": 6583, \"topic_id\": 1491, \"forum_id\": 8, \"post_subject\": \"Re: Conditional execution\", \"username\": \"ghalliday\", \"post_text\": \"Yes that can be true if it is a scalar expression. If that's a problem use IFF instead.\\nGavin\", \"post_time\": \"2014-11-08 19:08:10\" },\n\t{ \"post_id\": 6582, \"topic_id\": 1491, \"forum_id\": 8, \"post_subject\": \"Re: Conditional execution\", \"username\": \"nawaz\", \"post_text\": \"Hi all, \\n\\nImagine that depending on a condition I want to get a my result value from a certain function or another one. When I specify this using an IF statement, I can see that both are actually executed, although then I am given the right value. \\n\\n\\n\\n\\n\\n_______________\\nwww.solitairechamp.info\", \"post_time\": \"2014-11-08 17:39:50\" },\n\t{ \"post_id\": 6560, \"topic_id\": 1491, \"forum_id\": 8, \"post_subject\": \"Re: Conditional execution\", \"username\": \"Ignacio\", \"post_text\": \"Thanks both, Bob and Gavin. \\n\\n[quote="bforeman":3gbxtts5]I don't see how your code example will even compile. Rule #1 in ECL is that you cannot mix EXPORTed definitions with actions.\\nSorry for that bad practice.\\n\\n[quote="ghalliday":3gbxtts5]IF() is executed if the type of the results is an action or a dataset.\\nI was not aware of it, good to know.\\n\\n[quote="ghalliday":3gbxtts5]To ensure that the other branch isn't executed you can use IFF().\\nAs you guessed, in my real life example the use of IFF() was fine to do the trick. No need for the other tweaks, although again, good learning. \\n\\nThanks all again.\\n\\nIgnacio.\", \"post_time\": \"2014-11-04 17:04:39\" },\n\t{ \"post_id\": 6548, \"topic_id\": 1491, \"forum_id\": 8, \"post_subject\": \"Re: Conditional execution\", \"username\": \"ghalliday\", \"post_text\": \"Bob: You want to avoid using SEQUENTIAL unless you really need to use it.\\n\\nIt has significant effects - including stopping code being shared between branches of the sequential.\\n\\n\\nIgnacio:\\n\\nThe short answer to your question is that IF() only tries to ensure that only the correct branch of the IF() is executed if the type of the results is an action or a dataset. For scalars (and rows) both branches may be executed. \\n\\nTo ensure that the other branch isn't executed you can use IFF(). Changing to IFF() prevents any work involved in calculating RETURN '1' or RETURN '2'. (If they were complex expressions) - which is likely to be the issue in your real life example.\\n\\n[Somewhat strangely in this case that causes the entire expression to be constant folded - I'm not 100% sure why it isn't with the IF() - but it means we need to add a NOFOLD into the example code to prevent that happening.]\\n\\n\\nHowever... in this case that isn't the whole story. Your "work" is being done in your two outputs. These are associated with the next expression, but only loosely. In this example they are spotted as being invariant, and moved so they are executed globally.\\n\\nYou should be able to use WHEN to directly tie the outputs to the expressions:\\n\\n
EXPORT rec := RECORD\\n STRING1 number;\\nEND;\\n\\nEXPORT doFunction1 := FUNCTION\\n o1 := OUTPUT('Boil the ocean');\\n RETURN WHEN('1', o1, SUCCESS);\\nEND;\\n\\nEXPORT doFunction2 := FUNCTION\\n o1 := OUTPUT('Pay for a service');\\n RETURN WHEN('2', o1, SUCCESS);\\nEND;\\n\\nEXPORT ds := DATASET([{'1'},{'2'}],rec);\\n\\nresult := IFF(NOFOLD(ds[1].number)='1', doFunction1, doFunction2);\\n//(The NOFOLD prevents the test condition being optimized away).\\n\\nOUTPUT(result);\\n
\\n\\nbut there is one problem - https://track.hpccsystems.com/browse/HPCC-10243 - the SUCCESS option hasn't been implement for scalars. \\n\\nSo... If you really want your example to work you need to return datasets from your functions:\\n\\n\\nEXPORT rec := RECORD\\n STRING1 number;\\nEND;\\n\\nEXPORT doFunction1 := FUNCTION\\n o1 := OUTPUT('Boil the ocean');\\n RETURN WHEN(DATASET(['1'], rec), o1, SUCCESS);\\nEND;\\n\\nEXPORT doFunction2 := FUNCTION\\n o1 := OUTPUT('Pay for a service');\\n RETURN WHEN(DATASET(['2'], rec), o1, SUCCESS);\\nEND;\\n\\nEXPORT ds := DATASET([{'1'},{'2'}],rec);\\n\\nresult := IF(NOFOLD(ds[1].number)='1', doFunction1, doFunction2);\\n\\nOUTPUT(result[1].number);\\n
\\n\\nI suspect in real life using IFF will fix your problem, but if your costs really are with associated actions then you'll need to adopt something similar to the code above.\", \"post_time\": \"2014-11-03 11:10:38\" },\n\t{ \"post_id\": 6538, \"topic_id\": 1491, \"forum_id\": 8, \"post_subject\": \"Re: Conditional execution\", \"username\": \"bforeman\", \"post_text\": \"Hi Ignacio,\\n\\nI don't see how your code example will even compile. Rule #1 in ECL is that you cannot mix EXPORTed definitions with actions. \\n\\nThat said, removing the FUNCTIONs and replacing with SEQUENTIAL actions handles the condition well:\\n\\nrec := RECORD\\n STRING1 number;\\n END;\\n\\n doFunction1 := SEQUENTIAL(OUTPUT('Boil the Ocean'),OUTPUT('1'));\\n doFunction2 := SEQUENTIAL(OUTPUT('Pay for a service'),OUTPUT('2'));\\n ds := DATASET([{'1'},{'2'}],rec);\\n\\n result := IF(ds[1].number='1', doFunction1, doFunction2);\\n result;\\n
\\n\\nHere's another alternative:\\n\\nds := DATASET([{'1'},{'2'}],{STRING1 choice});\\n\\nMyChoice(STRING1 whatfunction) := FUNCTION\\n FirstResult := SEQUENTIAL(OUTPUT('Boil the ocean'),OUTPUT('1'));\\n SecondResult := SEQUENTIAL(OUTPUT('Pay for a service'),OUTPUT('2'));\\n ChoiceOut := IF (whatfunction = '1',\\t\\t\\n FirstResult,\\n\\t SecondResult);\\n RETURN ChoiceOut;\\t\\t\\t\\t\\t\\t\\t\\t \\nEND;\\n\\n\\n MyChoice(ds[1].choice);\\n MyChoice(ds[2].choice);
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-10-30 15:01:49\" },\n\t{ \"post_id\": 6527, \"topic_id\": 1491, \"forum_id\": 8, \"post_subject\": \"Conditional execution\", \"username\": \"Ignacio\", \"post_text\": \"Hi all, \\n\\nImagine that depending on a condition I want to get a my result value from a certain function or another one. When I specify this using an IF statement, I can see that both are actually executed, although then I am given the right value. \\n\\nThis can be good when the calculation is simple enough, but it can be the case when one of the functions will perform complex calculations ("boil the ocean") and the other may call an external service we have to pay for. We may not want to call both every time, as it would be bad in terms of performance and we will be wasting money.\\n\\nSo in the following code :\\n\\nEXPORT rec := RECORD\\n STRING1 number;\\nEND;\\n\\nEXPORT doFunction1 := FUNCTION\\n OUTPUT('Boil the ocean');\\n RETURN '1';\\nEND;\\n\\nEXPORT doFunction2 := FUNCTION\\n OUTPUT('Pay for a service');\\n RETURN '2';\\nEND;\\n\\nEXPORT ds := DATASET([{'1'},{'2'}],rec);\\n\\nresult := IF(ds[1].number='1', doFunction1, doFunction2);\\n\\nOUTPUT(result);\\n
\\n\\nI can see we are getting the right value ('1') but both outputs are displayed ('Boil the ocean' and 'Pay for a service'), so I can tell that both functions were executed.\\n\\nThanks again for your time.\", \"post_time\": \"2014-10-29 15:55:24\" },\n\t{ \"post_id\": 6544, \"topic_id\": 1494, \"forum_id\": 8, \"post_subject\": \"Re: "No access to Dali" error\", \"username\": \"vikram\", \"post_text\": \"Bob,\\n\\n Thanks for your reply. We were trying to isolate the specific cause of this issue. And it seems the filter condition on line 30 in WUID: W20141031-091858 is causing this issue! It is actually fetching the required dataset, however, when I apply the filter it throws the "No access to Dali" error.\\n We successfully replicated the issue on multiple clusters and even had the same issue from Boca Dataland. (It happened before for a file on Boca Prod, but that doesn't happen now!) However, this issue keeps recurring for a specific dataset on fcra logs thor(We tried calling from alpha and boca devs). Also, as I said before, it works fine from a single node cluster!\\n\\nVikram\", \"post_time\": \"2014-10-31 13:30:44\" },\n\t{ \"post_id\": 6536, \"topic_id\": 1494, \"forum_id\": 8, \"post_subject\": \"Re: "No access to Dali" error\", \"username\": \"bforeman\", \"post_text\": \"Hi Vikram,\\n\\nIt could mean that a plugin is not installed on the master THOR on the target cluster where the file is located. \\n\\nTry creating a small sample file on another cluster and see if the error goes away. Also, try writing some ECL which accesses that file outside of the MACRO. If that works, something in the MACRO is calling a plugin that is probably not installed on the Boca cluster.\\n\\nYou may also need to contact Boca Operations and they can have a look at the log files in your workunit.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-10-30 14:16:36\" },\n\t{ \"post_id\": 6535, \"topic_id\": 1494, \"forum_id\": 8, \"post_subject\": \""No access to Dali" error\", \"username\": \"vikram\", \"post_text\": \"Hi ECL users,\\n\\n I have having a issue in my code wherein I am trying to run a macro - which runs perfectly fine when I run it from hthor. However, it gives out the following error when I run it from any other thor(I am reading a boca file from alpha dev):\\n\\n"No access to Dali - this normally means a plugin call is being called from a thorslave,"\\n\\nDoes anyone have an idea of what this error means?\\n\\nThank you,\\nVikram\", \"post_time\": \"2014-10-30 13:11:15\" },\n\t{ \"post_id\": 6558, \"topic_id\": 1495, \"forum_id\": 8, \"post_subject\": \"Re: Use of CONST flag in ASSERT()\", \"username\": \"ghalliday\", \"post_text\": \"CONST forces the expression to be constant folded and evaluated when the code is generated, rather than at runtime. The reason that many of the standard library functions have tests that include CONST and don't is partly to test the code generator - to ensure they are evaluated correctly at compile and runtime.\\n\\nIf your altered function complains it means the code generator couldn't constant fold it - probably because it contained something that isn't constant folded for some reason.\\n\\nNo working with CONST doesn't mean there is an issue with your function. The main advantage is that if the function is call with constant arguments it will be evaluated at compile time, rather than imposing a runtime overhead. If you think it should have been easy to constant fold the function then it may show an optimization that should be included in the code generator.\", \"post_time\": \"2014-11-04 11:12:23\" },\n\t{ \"post_id\": 6539, \"topic_id\": 1495, \"forum_id\": 8, \"post_subject\": \"Use of CONST flag in ASSERT()\", \"username\": \"DSC\", \"post_text\": \"I'm trying to update some existing test cases where ASSERT() is used to verify a function's output. The existing line looked like this:\\n\\nASSERT(myFunc(12345, 'fubar') = 'correct_result', CONST);
\\nI altered myFunc() and then discovered that this test failed:\\n\\nWarning: (8,12): error C4082: Expression is not constant
\\nThis led me back to the language reference manual. The note for CONST says:\\n\\nOptional. Specifies the condition is evaluated during code generation.
\\nI'm at a bit of a loss. The test passes if I remove the CONST flag, so now I'm just wondering what's going on.\\n\\nThe suite of tests have several marked with CONST and many more that are not. What determines when CONST can/should be used in an ASSERT()? Is it better to have a function that can pass a CONST-flagged test?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2014-10-30 20:02:18\" },\n\t{ \"post_id\": 6661, \"topic_id\": 1497, \"forum_id\": 8, \"post_subject\": \"Re: Deleting No.of Logical Files by passing DATASET\", \"username\": \"ArjunKumar\", \"post_text\": \"Thank you Richard, its working \\n\\nThanks\\nArjun\", \"post_time\": \"2014-12-04 04:31:37\" },\n\t{ \"post_id\": 6625, \"topic_id\": 1497, \"forum_id\": 8, \"post_subject\": \"Re: Deleting No.of Logical Files by passing DATASET\", \"username\": \"rtaylor\", \"post_text\": \"Arjun,\\n\\nThe way to make this work is to add a NOTHOR around the APPLY. This is due to a change made in the 3.10 release (we're now up to 5.0) and NOTHOR is the way to implement APPLY actions that need to talk to Dali (as this one does).\\n\\nSo, your APPLY now becomes:
NOTHOR(APPLY(ddd,STD.File.DeleteLogicalFile(ddd.name)));
All the rest of your code stays the same.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-11-21 19:31:45\" },\n\t{ \"post_id\": 6552, \"topic_id\": 1497, \"forum_id\": 8, \"post_subject\": \"Re: Deleting No.of Logical Files by passing DATASET\", \"username\": \"rtaylor\", \"post_text\": \"Arjun,\\n\\nIt did the same for me. I reported the issue in JIRA: https://track.hpccsystems.com/browse/HPCC-12514\\n\\nRichard\", \"post_time\": \"2014-11-03 14:43:35\" },\n\t{ \"post_id\": 6550, \"topic_id\": 1497, \"forum_id\": 8, \"post_subject\": \"Re: Deleting No.of Logical Files by passing DATASET\", \"username\": \"ArjunKumar\", \"post_text\": \"Hi Richard,\\n\\nI tried with same code but it gives the below error. but when i execute without apply the logical file is getting deleted.\\n\\n[color=#FF0040:1duafnen]Error: System error: 0: Graph[1], SLAVE 10.144.240.11:16600: Graph[1], apply[3]: Could not delete file appsharedsvc::collab::test, (0, 0), 0, \\n\\nAPPLY(ddd,STD.File.DeleteLogicalFile(ddd.name));
\\n\\nPlease suggest.\\n\\nThanks,\\nArjun\", \"post_time\": \"2014-11-03 12:08:50\" },\n\t{ \"post_id\": 6547, \"topic_id\": 1497, \"forum_id\": 8, \"post_subject\": \"Re: Deleting No.of Logical Files by passing DATASET\", \"username\": \"rtaylor\", \"post_text\": \"Arjun,\\n\\nYour code doesn't work because you're not passing the filename to delete to the STD.File.DeleteLogicalFile() function. Try it this way:sampR := RECORD\\n STRING name;\\nEND;\\n\\nddd := DATASET([{'~appsharedsvc::collab::test'},{'~appsharedsvc::collab::test1'}],sampR);\\n\\nAPPLY(ddd,STD.File.DeleteLogicalFile(ddd.name));
\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-10-31 14:20:31\" },\n\t{ \"post_id\": 6546, \"topic_id\": 1497, \"forum_id\": 8, \"post_subject\": \"Re: Deleting No.of Logical Files by passing DATASET\", \"username\": \"ArjunKumar\", \"post_text\": \"Hi Richard,\\n\\nI am confuse with APPLY api, please look the below code and suggest where can i pass my dataset exactly. If this is wrong give me some example related to my requirement.\\n\\nsampR := RECORD\\n STRING name;\\nEND;\\n\\nddd := DATASET([{'~appsharedsvc::collab::test'},{'~appsharedsvc::collab::test1'}],sampR);\\n\\nAPPLY(ddd,STD.File.DeleteLogicalFile());
\\n\\nThanks,\\nArjun\", \"post_time\": \"2014-10-31 13:59:05\" },\n\t{ \"post_id\": 6545, \"topic_id\": 1497, \"forum_id\": 8, \"post_subject\": \"Re: Deleting No.of Logical Files by passing DATASET\", \"username\": \"rtaylor\", \"post_text\": \"Arjun,\\n\\nSince the STD.File.DeleteLogicalFile() function is essentially an action, then instead of PROJECT you should take a look at APPLY.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-10-31 13:39:57\" },\n\t{ \"post_id\": 6543, \"topic_id\": 1497, \"forum_id\": 8, \"post_subject\": \"Deleting No.of Logical Files by passing DATASET\", \"username\": \"ArjunKumar\", \"post_text\": \"Hi Team,\\n\\nUsing STD.File.DeleteLogicalFile() function we can delete one file. I have a requirement to delete some no.of logical files at a time. How to do it using PROJECT function where i can send the list of logical files names as a DATASET. How can we make a call to the delete function inside the PROJECT function. Do we have any other approach to do. Please suggest.\\n\\nThanks,\\nArjun\", \"post_time\": \"2014-10-31 13:26:19\" },\n\t{ \"post_id\": 6574, \"topic_id\": 1501, \"forum_id\": 8, \"post_subject\": \"Re: Dictionary intended usage and limitations\", \"username\": \"aintnomyth\", \"post_text\": \"Yes, I'm targeting THOR. I'll give your suggestion a shot, Thanks!\", \"post_time\": \"2014-11-06 18:00:44\" },\n\t{ \"post_id\": 6571, \"topic_id\": 1501, \"forum_id\": 8, \"post_subject\": \"Re: Dictionary intended usage and limitations\", \"username\": \"ghalliday\", \"post_text\": \"Out of interest which platform are you using. I suspect thor which has had the least testing with dictionaries.\\n\\nYou could try using an option\\n\\n#option ('useResultsForChildSpills', true);\\n\\nAs far as I know it is fully functional, but it hasn't been completely regression tested - so is currently disabled by default.\\n\\nThis should stop the dictionaries being stored in the workunit. It will probably be more efficient as well.\\n\\nGavin\", \"post_time\": \"2014-11-06 15:12:11\" },\n\t{ \"post_id\": 6562, \"topic_id\": 1501, \"forum_id\": 8, \"post_subject\": \"Re: Dictionary intended usage and limitations\", \"username\": \"DSC\", \"post_text\": \"I don't have a concrete answer regarding the limitations of a dictionary, but the error you're seeing looks a lot like the problem of filtering a dataset against a large SET of values:\\n\\nvalidCodes := SET(codeDS(someValue > 10),code);\\n\\nvalidRecs := recDS(recCode IN validCodes);
\\nIn that example, if validCodes contains "many" records (with different values for many) and it needs to spill, you'll see the same error when filtering recDS. The only workaround, as far as I know, is to use a JOIN instead.\\n\\nIn your example, it may be that your females-only dictionary is causing the problem (based solely on thinking that you may have a lot of people, but not that many diagnostic codes). So, you may be able to use a JOIN to filter the claims, then a dictionary lookup of the diagnostic codes to filter that result. It's a compromise, perhaps with better performance then doing JOINs everywhere.\\n\\nHope this helps.\\n\\nDan\", \"post_time\": \"2014-11-05 12:36:24\" },\n\t{ \"post_id\": 6557, \"topic_id\": 1501, \"forum_id\": 8, \"post_subject\": \"Dictionary intended usage and limitations\", \"username\": \"aintnomyth\", \"post_text\": \"This is one of those I'm-looking-to-understand-this-feature-better posts...\\n\\nI’m using DICTIONARY to simplify code and to facilitate the use of PROJECT instead of JOIN on multiple datasets and/or nesting child datasets. I’m cleaning healthcare claims, we have a zillion rules but the pattern is consistent, each rule looks something like this:\\n\\nRule: “For Female members, flag related ClaimDiagnosis records that have male-only diagnosis codes (prostate cancer, for example)”\\n1.\\tFind Members who are Female – create the FemaleDictionary\\n2.\\tFind diagnosis codes that are not valid for Females – create MaleOnlyDiagsDictionary\\n3.\\tPROJECT through the ClaimDiags dataset \\na.\\tChecking to see if:\\n
\\nthe MemberID is in FemaleDictionary, and
the diagnosis code is in MaleOnlyDiagsDictionary
b.\\tSet the ClaimDiags.MaleDiagForFemale flag for records with matches in both dictionaries
\\n4.\\tFeed the results into the next rule\\n\\nThe performance is good, much better than performing JOINs or generating intermediate nested child datasets, but it seems to suffer from a major drawback (or maybe I’m misusing it) - the required DICTIONARY must fit into a certain ideal size range:\\n•\\tCan’t be too small - the DICTIONARY has some initial overhead so small / trivial datasets perform better with a JOIN because the operation which uses the dictionary never overcomes the overhead.\\n•\\tCan’t be too big - the DICTIONARY has an upper size limit so it’s not a consideration when millions of keys are possible. \\nThe workunit fails when the dictionary exceeds the size defined by outputLimit: Error: System error: 10099: Graph[4], dictionaryworkunitwrite[7]: Dataset too large to output to workunit (limit is set to 10) megabytes, in result (name=spill1), Master exception\\n
\\nUnfortunately the size limit is not documented and we don’t always know how many keys will be present at design time (“know your data” notwithstanding) so I can take advantage of this super cool feature in just a few ideal cases. Is this the intended usage? \\n\\nCan you offer some guidance on the outputLimit and the default 10MB size? The error indicates the dictionary is stored in the workunit output which was a surprise, does that actually scale? Why is the default size 10MB? And does the answer to that question also answer my next question - what if I bump the outputLimit to a super large value?\\n\\nTo see an example of the size limit, run this ECL against the Certification dataset:\\nIMPORT Std;\\nIMPORT _Certification as Certification;\\n\\n//works\\n//maxDictRows := 1310721;\\n\\n//does not work\\nmaxDictRows := 1310722;\\n\\nds := DATASET(Certification.Setup.filename, Certification.Layout_FullFormat,THOR);\\n\\ndsCount := COUNT(ds);\\nidSize := SIZEOF(ds.id);\\nid_MB := (dsCount * idSize) / POWER(1024,2);\\n\\nid_10MB_Dictionary := DICTIONARY(ds(id<maxDictRows ),{id});\\n\\ndictCount := COUNT(id_10MB_Dictionary);\\ndict_MB := (dictCount * idSize) / POWER(1024,2);\\n\\nOUTPUT(id_MB, named('idMB') );\\nOUTPUT(dictCount, named('dictCount') );\\nOUTPUT(dict_MB, named('dict_MB') );
\\n\\nThanks in advance!\", \"post_time\": \"2014-11-03 22:31:34\" },\n\t{ \"post_id\": 6567, \"topic_id\": 1502, \"forum_id\": 8, \"post_subject\": \"Re: Key layout mismatch detected for index\", \"username\": \"HPCC Staff\", \"post_text\": \"Thank you for posting what you resolved regardless!\", \"post_time\": \"2014-11-05 18:40:20\" },\n\t{ \"post_id\": 6565, \"topic_id\": 1502, \"forum_id\": 8, \"post_subject\": \"Re: Key layout mismatch detected for index\", \"username\": \"lpezet\", \"post_text\": \"Strangely though, it was in the Payload and not in the Keys...\", \"post_time\": \"2014-11-05 16:49:08\" },\n\t{ \"post_id\": 6564, \"topic_id\": 1502, \"forum_id\": 8, \"post_subject\": \"Re: Key layout mismatch detected for index\", \"username\": \"lpezet\", \"post_text\": \"Nevermind.\\nWhen building the index, one of the fields was INTEGER2, and when loading it, it was defined as UNSIGNED INTEGER2.\\n\\nAs usual, my bad.\\nLuc.\", \"post_time\": \"2014-11-05 16:38:59\" },\n\t{ \"post_id\": 6559, \"topic_id\": 1502, \"forum_id\": 8, \"post_subject\": \"Key layout mismatch detected for index\", \"username\": \"lpezet\", \"post_text\": \"Hi!\\n\\nI'm having a problem with Roxie.\\nI was able first to develop some Roxie Queries, creating indices as SubFiles and referencing a SuperFile in my queries. \\nI followed the Development Path in the "Rapid Data Delivery Engine Reference" doc and everything went just fine.\\n\\nI needed to tweak the layout of that same query, and that's when hell broke loose.\\nI ended up deleting everything (packagemaps, queries, indices) and just starting from scratch.\\nI build an index, add it to a SuperFile, and use that SuperFile in my query.\\nWhen running that query against Thor/hThor, everything is fine and I get results. Great!\\nI compile it, publish it, add packagemap and all but the query is said to be suspended (in ECL Watch its says "On Cluster(s): roxie").\\nSo I just tried submitting the same ECL code against Roxie and I got the following error:\\nError: Query W20141104-151403 is suspended because Key layout mismatch detected for index ..... (0, 0), 1402, \\n
\\n\\nI don't understand it at all. The layout I use when I create the index is the same (I believe) as the layout I use when loading it. If it wasn't the case, wouldn't Thor/hThor choke as well?\\n\\nI tweaked my query to explicitly use that SubFile (i.e. the sole index I built and used in that SuperFile) and not the SuperFile. I get the same error (which I expected).\\nIs there a way to find more information about the "mismatch"? Mismatch between what and what?\\nIs that the value type of a field? The order of the fields?\\n\\nOn a side note, is there a way to define the layout of an Index with Payload and use it AS IS when both building an index AND loading the index?\\nIdea being to avoid copy-paste-tweak which is very error prone. Problem is when building the index, I need to specify which fields to use from my dataset to populate the fields of the index. Is there a better way?\\n\\n\\nThanks,\\nLuc.\", \"post_time\": \"2014-11-04 15:36:18\" },\n\t{ \"post_id\": 6590, \"topic_id\": 1508, \"forum_id\": 8, \"post_subject\": \"Re: KEL Tutorial Error\", \"username\": \"rtaylor\", \"post_text\": \"\", \"post_time\": \"2014-11-11 14:37:15\" },\n\t{ \"post_id\": 6589, \"topic_id\": 1508, \"forum_id\": 8, \"post_subject\": \"Re: KEL Tutorial Error\", \"username\": \"David Dasher\", \"post_text\": \"Thanks Richard, apologies, I must have skipped over that part.\\n\\nAll sorted\\n\\nDavid\", \"post_time\": \"2014-11-10 21:13:29\" },\n\t{ \"post_id\": 6586, \"topic_id\": 1508, \"forum_id\": 8, \"post_subject\": \"Re: KEL Tutorial Error\", \"username\": \"rtaylor\", \"post_text\": \"David,\\n\\nDid you open the KEL.MOD file in the ECL IDE to import the KEL defintions into the KEL04 directory? Your first error indicates the KEL04 directory is not there.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-11-10 14:01:18\" },\n\t{ \"post_id\": 6585, \"topic_id\": 1508, \"forum_id\": 8, \"post_subject\": \"KEL Tutorial Error\", \"username\": \"David Dasher\", \"post_text\": \"Hello\\n\\nApologies if this is not the correct Forum.\\n\\nI've downloaded KEL Lite and started going through the tutorial, everything was fine until I got to the section on executing a query. I get the following errors, can anybody help?\\n\\nError: Import names unknown module "KEL04" (1, 23 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\Q_showall.ecl)\\nError: Import names unknown module "KEL04" (1, 23 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: Import names unknown module "KEL04" (3, 26 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: Unknown identifier "KEL" (5, 17 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: Unknown identifier before "." (expected :=) (7, 8 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: Record must not be zero length (6, 22 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: Unknown identifier "KEL" (17, 94 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: Unknown identifier "KEL" (18, 67 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: Unknown identifier "KEL" (19, 18 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: Unknown identifier before "." (expected :=) (22, 8 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: Record must not be zero length (21, 27 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: Unknown identifier before "." (expected :=) (26, 8 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: Record must not be zero length (25, 27 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: Unknown identifier before "." (expected :=) (30, 8 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: Record must not be zero length (29, 27 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: Unknown identifier before "." (expected :=) (34, 8 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: Record must not be zero length (33, 27 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: Unknown identifier before "." (expected :=) (38, 8 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: Record must not be zero length (37, 27 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: Unknown identifier before "." (expected :=) (42, 8 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: Record must not be zero length (41, 27 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: Unknown identifier before "." (expected :=) (46, 8 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: Record must not be zero length (45, 27 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: Unknown identifier before "." (expected :=) (50, 8 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: Record must not be zero length (49, 20 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: Object 'SELF' does not have a field named '__ST26' (61, 10 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: Unknown identifier "__ST26" (61, 10 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: SELF not legal here (62, 5 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: Unknown identifier "__ST27" (62, 10 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: SELF not legal here (63, 5 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: Unknown identifier "__ST28" (63, 10 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: SELF not legal here (64, 5 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: Unknown identifier "__ST29" (64, 10 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: SELF not legal here (65, 5 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: Unknown identifier "__ST30" (65, 10 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: SELF not legal here (66, 5 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: Unknown identifier "__ST31" (66, 10 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: SELF not legal here (67, 5 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: Unknown identifier "__ST32" (67, 10 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: SELF not legal here (68, 5 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: Unknown identifier "__RecordCount" (68, 10 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: syntax error near ":=" : expected '.' (69, 10 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: syntax error near "InData" : expected RANGE, ROWSET, SELF, SUCCESS, datarow, dataset, dictionary, module-name, identifier, identifier, function-name, identifier, macro-name, '^', '(', '[' (71, 35 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: Unknown identifier "__UNWRAP" (72, 20 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: Unknown identifier before "." (expected :=) (73, 92 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: Record must not be zero length (73, 88 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: Too many initializers (value COUNT([kel_tutorial_file_person_small_file_invalid])) for inline dataset definition (73, 34 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: syntax error near "END" (74, 1 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\E_Person.ecl)\\nError: Object 'KEL_Tutor' does not have a field named 'E_Person' (2, 8 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\Q_showall.ecl)\\nError: Import names unknown module "KEL04" (3, 26 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\Q_showall.ecl)\\nError: Unknown identifier "E_Person" (5, 20 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\Q_showall.ecl)\\nError: Unknown identifier "__UNWRAP" (6, 18 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\KEL_Tutor\\\\Q_showall.ecl)\\nError: Object 'kel_tutor' does not have a member named 'Q_showall' (2, 3)\\nError: Unknown identifier "Q_showall" (2, 3)\\nError: Object 'kel_tutor' does not have a member named 'res0' (2, 13)\\n\\nThanks \\n\\nDavid\", \"post_time\": \"2014-11-10 08:46:21\" },\n\t{ \"post_id\": 6602, \"topic_id\": 1510, \"forum_id\": 8, \"post_subject\": \"Re: Distribute and Super Files\", \"username\": \"omnibuzz\", \"post_text\": \"Thank you, Richard. That's great news. \\nSrini\", \"post_time\": \"2014-11-17 20:54:22\" },\n\t{ \"post_id\": 6595, \"topic_id\": 1510, \"forum_id\": 8, \"post_subject\": \"Re: Distribute and Super Files\", \"username\": \"rtaylor\", \"post_text\": \"Srini,\\n\\nI'm told the answer is Yes, as long as they are distributed on the same size cluster. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-11-14 17:57:35\" },\n\t{ \"post_id\": 6594, \"topic_id\": 1510, \"forum_id\": 8, \"post_subject\": \"Distribute and Super Files\", \"username\": \"omnibuzz\", \"post_text\": \"Let's say I have a file F1 with the record structure\\n
\\nrec := RECORD\\n STRING Col1;\\n STRING Col2;\\n STRING Col3;\\n
\\n\\nI distributed the file based on HASH of Col1 and I added the resulting file to a super file, say 'SF1'. \\n\\nIf I bring in a new file F2 with the same layout, but with different data. And if I distribute it on the same column and add it to the super file, Can I expect the super file to be distributed (so that an operation like a local dedup on the superfile will not result in duplicates).\\n-Srini\", \"post_time\": \"2014-11-14 17:25:58\" },\n\t{ \"post_id\": 6601, \"topic_id\": 1512, \"forum_id\": 8, \"post_subject\": \"Re: Sort small file with local option giving different resul\", \"username\": \"RMBerger\", \"post_text\": \"Thank You Richard for your response, I now understand why the output action gives me different result when sorting across multiple nodes with local, vs not using local option.\", \"post_time\": \"2014-11-17 20:17:41\" },\n\t{ \"post_id\": 6600, \"topic_id\": 1512, \"forum_id\": 8, \"post_subject\": \"Re: Sort small file with local option giving different resul\", \"username\": \"rtaylor\", \"post_text\": \"RMBerger,\\n\\nThis issue has nothing to do with CSV versus flat or XML files with fixed or variable-length fields. The problem is your use of the LOCAL option.\\n\\nThe LOCAL option on SORT tells the system to do the sort separately and independently on each node with whatever data is already on that node. I assume, from your posting, that what you're expecting to see is a globally correct sort result, where all the "AA"s are at the top and the "ZZ"s are at the bottom. That means you simply need to not use the LOCAL option, which gives you a global SORT. \\n\\nThis example code shows the difference:IMPORT STD;\\nSomeFile := DISTRIBUTE(DATASET([{'A'},{'B'},{'C'},{'D'},{'E'},\\n {'P'},{'Q'},{'R'},{'S'},{'T'},\\n {'K'},{'L'},{'M'},{'N'},{'O'},\\n {'F'},{'G'},{'H'},{'I'},{'J'},\\n {'U'},{'V'},{'W'},{'X'},{'Y'},{'Z'}],\\n\\t\\t\\t\\t\\t{STRING1 Letter}));\\nr := RECORD\\n UNSIGNED1 Node := STD.system.Thorlib.NODE()+1;\\n Somefile.Letter;\\nEND;\\t\\n\\nSrtTbl := TABLE(SomeFile,r,LOCAL);\\nSrtTbl;\\t\\n\\nSORT(SrtTbl,Letter,LOCAL); //LOCAL sort\\nSORT(SrtTbl,Letter); //Global sort
I added the node number so you can see exactly which node does what on the LOCAL sort. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-11-17 19:07:54\" },\n\t{ \"post_id\": 6598, \"topic_id\": 1512, \"forum_id\": 8, \"post_subject\": \"Sort small file with local option giving different result\", \"username\": \"RMBerger\", \"post_text\": \"I'm sorting a file with about 1,000 records, csv, with fields defined as string (variable lengths), and if I sort by one field using the option local I get a wrong result, if I remove the option local I get the correct result. I have tried with distribute, without distribute, with option few, option stable, and option unstable. As long as I use option local the result is incorrect. Is there an option that I should use to make sure the result is correct? Is this happening because the file is csv with variable length fields?\", \"post_time\": \"2014-11-17 15:09:37\" },\n\t{ \"post_id\": 6657, \"topic_id\": 1517, \"forum_id\": 8, \"post_subject\": \"Re: Programattically deschedule workunits\", \"username\": \"gsmith\", \"post_text\": \"If you want to avoid the redirect and still send the request as a HTTP URL, simply add "rawxml_" as a param. In summary:\\n\\nDeschedule with JSON response:\\n\\nhttp://x.x.x.x:8010/WsWorkunits/WUAction.json?Wuids_i0=W20141120-051657&ActionType=Abort\\n
\\n\\nDeschedule with redirect (useful for hyperlinks):\\n\\nhttp://x.x.x.x:8010/WsWorkunits/WUAction?Wuids_i0=W20141120-051657&ActionType=Abort\\n
\\n\\nDeschedule with XML response (I suspect this is the one you want):\\n\\nhttp://x.x.x.x:8010/WsWorkunits/WUAction?Wuids_i0=W20141120-051657&ActionType=Abort&rawxml_=1\\n
\", \"post_time\": \"2014-12-03 08:44:34\" },\n\t{ \"post_id\": 6655, \"topic_id\": 1517, \"forum_id\": 8, \"post_subject\": \"Re: Programattically deschedule workunits\", \"username\": \"rodrigo.pastrana\", \"post_text\": \"Hi James, the redirect is a good sign (the WsWorkunits.WUAction() method was contacted, and it responded). Unfortunately there doesn't seem to be a mechanism for preventing a redirect upon a REST based request. However soap requests do not redirect.\\n\\nLet's do this, go to the form page for that method:\\nhttp://X.X.X.X:8010/WsWorkunits/WUAction?ver_=1.53&form\\n\\nType in the WUID you're interested in descheduling into the Wuids field, and "Deschedule" in the ActionType field.\\n\\nNext, at the bottom, click the "SOAP Test" button. This will fill in a soap request with the values you've entered. In the SOAP Test page, click the "Send Request" button. You should see the XML response on the right pane.\\nYou should be able to recreate the soap request programmatically, you're not tied to REST based requests, right?\\n\\nAnyway, if you're attempting to do this in java, there's a set of apis that help you do this. Please look at the hpcc ws client project here:\\nhttps://github.com/hpcc-systems/HPCC-JA ... .ws.client\\n\\nYou could do something like this:\\n\\nPlatform platform = Platform.get("http", "yourHPCCip", 8010, "youruser", "yourpass");\\n\\nboolean pingServer = platform.pingServer(); //optional\\n\\n//platform the platform provides a ws client\\nHPCCWSClient connector = platform.getHPCCWSClient();\\n\\n//the soapproxy provides the WSDL based service methods\\nWsWorkunitsServiceSoapProxy soapProxy = connector.getWsWorkunitsClient().getSoapProxy();\\n\\n//create a wuaction request\\nWUAction deschedule = new WUAction();\\ndeschedule.setWuids(new String[]{"yourWUID"});\\ndeschedule.setActionType("Deschedule");\\n \\n//submit the request\\nWUActionResponse wuAction = soapProxy.WUAction(deschedule);
\", \"post_time\": \"2014-12-02 19:28:00\" },\n\t{ \"post_id\": 6652, \"topic_id\": 1517, \"forum_id\": 8, \"post_subject\": \"Re: Programattically deschedule workunits\", \"username\": \"james.wilson\", \"post_text\": \"Hi Rodrigo\\n\\nI tried that but no luck, and when I tried it from a browser it simply forwarded me to the WU list page.\\n\\nYes, if you've got some Java code I'd appreciate seeing it, I come from a Java background so hopefully should be able to read it...\\n\\nThanks\\n\\nJames\", \"post_time\": \"2014-12-02 09:45:55\" },\n\t{ \"post_id\": 6648, \"topic_id\": 1517, \"forum_id\": 8, \"post_subject\": \"Re: Programattically deschedule workunits\", \"username\": \"rodrigo.pastrana\", \"post_text\": \"I don't think you need the Action.json. Try this:\\nhttp://X.X.X.X:8010/WsWorkunits/WUAction?ver_=1.53&ActionType=Deschedule&Wuids_i0=W2014MMDD-HHMMSS\\n\\nAlso, we have a soap client for JAVA based on the ESP WSDLs which facilitate making soap requests to the HPCC ESP services, let me know if that would help you...\", \"post_time\": \"2014-12-01 17:04:02\" },\n\t{ \"post_id\": 6623, \"topic_id\": 1517, \"forum_id\": 8, \"post_subject\": \"Re: Programattically deschedule workunits\", \"username\": \"james.wilson\", \"post_text\": \"I've tried the HTTPCALL but got the following error:\\nError - syntax error "Expecting "<"" [file offset 1] {*ERROR*"WUActionResponse": {}}
\\n\\nMy guess is this is because the call to the server appears to be returning JSON but I'm calling HTMLCALL with a responsemimetype of 'text/xml' (the only one the documentation says it accepts). I did try using 'text/plain' but got the same response, so suspect that the argument is being ignored and is currently a placeholder. I've tried searching through the source for HTMLCALL to see if I can verify that but can't find it at the moment. I looked in the HPCC-Platform-master repository, is that right?\\n\\nDo you have any ideas of ways round the error?\\n\\nThanks\\n\\nJames\", \"post_time\": \"2014-11-21 13:50:07\" },\n\t{ \"post_id\": 6620, \"topic_id\": 1517, \"forum_id\": 8, \"post_subject\": \"Re: Programattically deschedule workunits\", \"username\": \"JimD\", \"post_text\": \"There is also an ActionType of Deschedule. That would deschedule it without aborting. This is the action ECL Watch performs when you select a scheduled job and press the deschedule button.\\n\\nhttp://x.x.x.x:8010/WsWorkunits/WUAction.json?Wuids_i0=W20141120-051657&ActionType=Deschedule
\", \"post_time\": \"2014-11-20 17:49:15\" },\n\t{ \"post_id\": 6619, \"topic_id\": 1517, \"forum_id\": 8, \"post_subject\": \"Re: Programattically deschedule workunits\", \"username\": \"james.wilson\", \"post_text\": \"Thanks! I'll give that a shot, and when I know a bit more about HPCC & ECL I'll delve in to the source and see if I can add something.\\n\\nJames\", \"post_time\": \"2014-11-20 16:43:56\" },\n\t{ \"post_id\": 6618, \"topic_id\": 1517, \"forum_id\": 8, \"post_subject\": \"Re: Programattically deschedule workunits\", \"username\": \"JimD\", \"post_text\": \"The previous response truncated the command to use :\\n\\nhttp://x.x.x.x:8010/WsWorkunits/WUAction.json?Wuids_i0=W20141120-051657&ActionType=Abort\\n
\", \"post_time\": \"2014-11-20 16:41:28\" },\n\t{ \"post_id\": 6613, \"topic_id\": 1517, \"forum_id\": 8, \"post_subject\": \"Re: Programattically deschedule workunits\", \"username\": \"gsmith\", \"post_text\": \"You could make a HTTPCALL to:\\nhttp://X.X.X.X:8010/WsWorkunits/WUActio ... Type=Abort\\n\\n(You will need to insert the correct IP and Wuid)\", \"post_time\": \"2014-11-20 05:21:11\" },\n\t{ \"post_id\": 6612, \"topic_id\": 1517, \"forum_id\": 8, \"post_subject\": \"Re: Programattically deschedule workunits\", \"username\": \"rtaylor\", \"post_text\": \"Not that I am aware of through ECL. But since this is an open source platform, I'm sure you can look through the source code and find a way to accomplish the task. Creating a tool to do this would be a good candidate for a contribution to the platform.\\n\\nHTH\\n\\nRichard\", \"post_time\": \"2014-11-19 16:13:05\" },\n\t{ \"post_id\": 6611, \"topic_id\": 1517, \"forum_id\": 8, \"post_subject\": \"Programattically deschedule workunits\", \"username\": \"james.wilson\", \"post_text\": \"Is it possible to deschedule workunits programatically? The background is that we have a set of workunits that are usually in a wait state (they each wake up every 10 minutes to check for an input, if it exists then they get to work on it otherwise they go back to sleep). Sometimes it's necessary to stop them all for a while, and I've been tasked with writing something that will stop or start them all in one operation rather than doing it all manually. I think I should be able to start them, but can't see any way of stopping them.\", \"post_time\": \"2014-11-19 15:33:40\" },\n\t{ \"post_id\": 6635, \"topic_id\": 1521, \"forum_id\": 8, \"post_subject\": \"Re: HPCC cluster in distributed environment\", \"username\": \"rtaylor\", \"post_text\": \"Pooja,\\n\\nSome of the ML functions were originally written specifically for sparse matrices, so their performance on dense matrices is not so good. That's why we're in the process of re-writing/expanding/improving those functions with algorithms designed for dense matrices. We are using PB-BLAS in some cases, if you're familiar with that. I am not an ML expert, so that's as much as I really know about the situation. Perhaps next week, after the Thanksgiving holiday someone more knowledgeable in this area will be able to respond.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-11-25 16:33:13\" },\n\t{ \"post_id\": 6630, \"topic_id\": 1521, \"forum_id\": 8, \"post_subject\": \"Re: HPCC cluster in distributed environment\", \"username\": \"chennapooja\", \"post_text\": \"Hello,\\n\\n can anybody share your experiences here? \\n\\nThanks,\\nPooja.\", \"post_time\": \"2014-11-24 20:44:34\" },\n\t{ \"post_id\": 6624, \"topic_id\": 1521, \"forum_id\": 8, \"post_subject\": \"HPCC cluster in distributed environment\", \"username\": \"chennapooja\", \"post_text\": \"Dear All,\\n\\n I initially used VM image and virtual machine to test my machine learning algorithms. My motive is to check how much time naïve bayes and kmeans algorithms took to classify big data of 10 lakhs instances. It took me nearly 8 minutes. \\n Now I tried to test the same scenario with HPCC cluster which I have brought up with 3 nodes to test in distributed system like Hadoop. But the performance degraded, it took me nearly 10 mins for naïve bayes and did not get any result for kmeans. \\n Did any one test in such a environment and faced similar problems? Also any idea about this case, please provide me some inputs.\\n\\nThanks And Regards,\\nPooja.\", \"post_time\": \"2014-11-21 16:23:37\" },\n\t{ \"post_id\": 6636, \"topic_id\": 1526, \"forum_id\": 8, \"post_subject\": \"Re: Issue in Outputting large dataset to a logical file\", \"username\": \"rtaylor\", \"post_text\": \"pius_francis,Currently i am facing an issue where it takes around 1 hour to write 1.4 million records to logical file. Is there is a way to optimize it.
The old saying is: "The Devil is in the details" -- we would need a lot more information about your exact circumstances before we could even speculate what the issue may be and how it might be improved.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-11-25 16:37:37\" },\n\t{ \"post_id\": 6633, \"topic_id\": 1526, \"forum_id\": 8, \"post_subject\": \"Issue in Outputting large dataset to a logical file\", \"username\": \"pius_francis\", \"post_text\": \"Hi all,\\nCurrently i am facing an issue where it takes around 1 hour to write 1.4 million records to logical file. Is there is a way to optimize it. Thanks in Advance.\", \"post_time\": \"2014-11-25 07:29:36\" },\n\t{ \"post_id\": 6653, \"topic_id\": 1527, \"forum_id\": 8, \"post_subject\": \"Re: Layout mismatch\", \"username\": \"bforeman\", \"post_text\": \"Use the ECL Watch and look at the layout of '~result::w20141127-190256_0', and then compare that with your ECL record layout. There has to be a mismatch somewhere.\\n\\nIf the field names match, then the next thing you need to look at is the order of the fields. If the addition of the nested child data is causing the error, you have to make sure that the declarations are in the exact order that you used to output the file.\\n\\nBob\", \"post_time\": \"2014-12-02 13:51:16\" },\n\t{ \"post_id\": 6651, \"topic_id\": 1527, \"forum_id\": 8, \"post_subject\": \"Re: Layout mismatch\", \"username\": \"kereno\", \"post_text\": \"Thank you Bob. If you look at the layout field names and the query names, it seems to me they match. That's why I am perplexed.\", \"post_time\": \"2014-12-01 22:36:23\" },\n\t{ \"post_id\": 6646, \"topic_id\": 1527, \"forum_id\": 8, \"post_subject\": \"Re: Layout mismatch\", \"username\": \"bforeman\", \"post_text\": \"When you see this error, this simply means that a field name(or names) that you used to output or create this file on the cluster DO NOT MATCH the field names that you are declaring in the layout. They must match exactly.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-12-01 13:24:24\" },\n\t{ \"post_id\": 6637, \"topic_id\": 1527, \"forum_id\": 8, \"post_subject\": \"Layout mismatch\", \"username\": \"kereno\", \"post_text\": \"Hello,\\n\\nI am reading a file in XML, and writing it to THOR. Then I run a query on the THOR file, but I get a layout mismatch error. \\nTo make sure the layouts are the same, I used a SOAP request to extract the layout of the thor file, and compared it to the layout in my query. The only difference is that the definition of the nested data:\\nSOAP says Others while the query defines it as dataset. When I experiment without the nested data (which is obviously not a solution..) I don't get an error.\\n\\nSee the layout below:\\n<?xml version="1.0" encoding="utf-8"?>\\n<soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/" xmlns:wsse="http://schemas.xmlsoap.org/ws/2002/04/secext">\\n <soap:Body>\\n <DFUGetDataColumnsResponse xmlns="urn:hpccsystems:ws:wsdfu">\\n <LogicalName>~result::w20141127-190256_0</LogicalName>\\n <StartIndex>1</StartIndex>\\n <EndIndex>100</EndIndex>\\n <DFUDataNonKeyedColumns1>\\n <DFUDataColumn>\\n <ColumnLabel>user</ColumnLabel>\\n <ColumnType>String</ColumnType>\\n <ColumnValue/>\\n <ColumnSize>40</ColumnSize>\\n <MaxSize>40</MaxSize>\\n </DFUDataColumn>\\n <DFUDataColumn>\\n <ColumnLabel>action</ColumnLabel>\\n <ColumnType>Integer</ColumnType>\\n <ColumnValue/>\\n <ColumnSize>20</ColumnSize>\\n <MaxSize>20</MaxSize>\\n </DFUDataColumn>\\n <DFUDataColumn>\\n <ColumnLabel>timespent</ColumnLabel>\\n <ColumnType>Integer</ColumnType>\\n <ColumnValue/>\\n <ColumnSize>20</ColumnSize>\\n <MaxSize>20</MaxSize>\\n </DFUDataColumn>\\n </DFUDataNonKeyedColumns1>\\n <DFUDataNonKeyedColumns2>\\n <DFUDataColumn>\\n <ColumnLabel>query_term</ColumnLabel>\\n <ColumnType>String</ColumnType>\\n <ColumnValue/>\\n <ColumnSize>40</ColumnSize>\\n <MaxSize>40</MaxSize>\\n </DFUDataColumn>\\n <DFUDataColumn>\\n <ColumnLabel>ip_addr</ColumnLabel>\\n <ColumnType>Integer</ColumnType>\\n <ColumnValue/>\\n <ColumnSize>20</ColumnSize>\\n <MaxSize>20</MaxSize>\\n </DFUDataColumn>\\n <DFUDataColumn>\\n <ColumnLabel>timestamp</ColumnLabel>\\n <ColumnType>Integer</ColumnType>\\n <ColumnValue/>\\n <ColumnSize>20</ColumnSize>\\n <MaxSize>20</MaxSize>\\n </DFUDataColumn>\\n </DFUDataNonKeyedColumns2>\\n <DFUDataNonKeyedColumns3>\\n <DFUDataColumn>\\n <ColumnLabel>estimated_revenue</ColumnLabel>\\n <ColumnType>Real</ColumnType>\\n <ColumnValue/>\\n <ColumnSize>32</ColumnSize>\\n <MaxSize>32</MaxSize>\\n </DFUDataColumn>\\n </DFUDataNonKeyedColumns3>\\n <DFUDataNonKeyedColumns4>\\n <DFUDataColumn>\\n <ColumnLabel>page_info</ColumnLabel>\\n <ColumnType>Others</ColumnType>\\n <ColumnValue/>\\n <ColumnSize>128</ColumnSize>\\n <MaxSize>128</MaxSize>\\n </DFUDataColumn>\\n </DFUDataNonKeyedColumns4>\\n <DFUDataNonKeyedColumns5>\\n <DFUDataColumn>\\n <ColumnLabel>page_links</ColumnLabel>\\n <ColumnType>Others</ColumnType>\\n <ColumnValue/>\\n <ColumnSize>128</ColumnSize>\\n <MaxSize>128</MaxSize>\\n </DFUDataColumn>\\n </DFUDataNonKeyedColumns5>\\n <DFUDataNonKeyedColumns6>\\n <DFUDataColumn>\\n <ColumnLabel>__fileposition__</ColumnLabel>\\n <ColumnType>Integer</ColumnType>\\n <ColumnValue/>\\n <ColumnSize>20</ColumnSize>\\n <MaxSize>20</MaxSize>\\n </DFUDataColumn>\\n </DFUDataNonKeyedColumns6>\\n <RowCount>10</RowCount>\\n <ChooseFile>0</ChooseFile>\\n </DFUGetDataColumnsResponse>\\n </soap:Body>\\n</soap:Envelope>\\n\\nAnd the query is:\\nlayout_kv := record\\nstring k,\\nstring v,\\nend;\\n\\nlayout_kvd := record\\n dataset(layout_kv) kvd,\\nend;\\n\\npage_views_rec := record\\n string30 user,\\n integer8 action,\\n integer8 timespent,\\n string30 query_term,\\n integer8 ip_addr,\\n integer8 timestamp,\\n real8 estimated_revenue,\\n dataset (layout_kv) page_info,\\n dataset (layout_kvd) page_links,\\n integer8 __fileposition__\\n\\nend;\\n\\na := dataset ('~result::w20141127-190256_0', page_views_rec, THOR);\\noutput (a);\\n\\nHowever, when I execute it, I get:\\nSystem error: 10124: Graph[1], diskread[2]: diskread: Layout does not match published layout. File: result::w20141127-190256_0\", \"post_time\": \"2014-11-28 03:43:50\" },\n\t{ \"post_id\": 6665, \"topic_id\": 1532, \"forum_id\": 8, \"post_subject\": \"Re: Best way to remotely delete a roxie superfile?\", \"username\": \"bforeman\", \"post_text\": \"Hi Drea,\\n\\nThanks for the feedback, I was just about to forward your post to the development team, but it looks like you found a good method.
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-12-04 14:18:26\" },\n\t{ \"post_id\": 6660, \"topic_id\": 1532, \"forum_id\": 8, \"post_subject\": \"Re: Best way to remotely delete a roxie superfile?\", \"username\": \"drealeed\", \"post_text\": \"Found an answer to this, posting for future reference:\\n\\nYou can make a SOAPCALL to wsDfu/DFUArrayAction with the following parameters\\n\\nType: Delete (this is case sensitive)\\nLogicalFiles: superfilename here\", \"post_time\": \"2014-12-03 21:01:53\" },\n\t{ \"post_id\": 6649, \"topic_id\": 1532, \"forum_id\": 8, \"post_subject\": \"Best way to remotely delete a roxie superfile?\", \"username\": \"drealeed\", \"post_text\": \"I need to remotely delete a roxie superfile from THOR ecl. For deleting the physical files we're using STD.File.DeleteExternalFile(IP,infile), which works fine.\\n\\nIs there a wsDFU soap method for deleting a superfile available? I tried using the SuperfileAction service but couldn't find a way to make it delete a given superfile definition when the superfile was empty.\", \"post_time\": \"2014-12-01 17:22:23\" },\n\t{ \"post_id\": 6679, \"topic_id\": 1536, \"forum_id\": 8, \"post_subject\": \"Re: Failed to receive reply from thor\", \"username\": \"bforeman\", \"post_text\": \"Yes, this means that you have lost the conne3ction with the cluster. It could be a network issue. What kind of cluster are you connecting to? Cloud? VM? Internal?\\n\\nIf the error persists you should contact the systems administrator and ask them to restart the cluster.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-12-09 14:24:22\" },\n\t{ \"post_id\": 6674, \"topic_id\": 1536, \"forum_id\": 8, \"post_subject\": \"Failed to receive reply from thor\", \"username\": \"chanbchen\", \"post_text\": \"I have seen this error a few times. Does this mean that the thor has timed out? What does this error mean in essence?\", \"post_time\": \"2014-12-08 15:05:11\" },\n\t{ \"post_id\": 6686, \"topic_id\": 1541, \"forum_id\": 8, \"post_subject\": \"Re: PATTERN - Not Empty\", \"username\": \"DSC\", \"post_text\": \"The key is to wrap the optional bits in the OPT() parse pattern definition. You will still have to define a pattern for the last delimiter and the random text (which could be an ANY+ pattern) so you can assign it in the transform, but by wrapping it in OPT() the parser won't fail if it is not present.\\n\\nHere is the code I wrote to test this, possibly for comedic relief:\\n\\n
TextLayout := RECORD\\n STRING s;\\nEND;\\n\\nds := DATASET\\n (\\n [\\n 'Video Player - Tuesday - text I do not care about here',\\n 'Haptic Player - Thursday - ',\\n 'Olfactory Player - Monday',\\n 'Audio Player - Saturday - more ignored text'\\n ],\\n TextLayout\\n );\\n\\nPATTERN pWS := PATTERN('[ \\\\t]')+;\\nPATTERN pDelim := pWS '-' pWS;\\n\\nPATTERN pThing := PATTERN('[A-Za-z0-9 ]')+ PATTERN('[A-Za-z0-9]');\\nPATTERN pDay := PATTERN('[A-Za-z]')+;\\nPATTERN pRemainder := ANY+;\\n\\nRULE pLine := FIRST pThing pDelim pDay OPT(pDelim pRemainder);\\n\\nParsedLayout := RECORD\\n STRING thing;\\n STRING day;\\n STRING remainder;\\nEND;\\n\\nParsedLayout ExtractData(TextLayout l) := TRANSFORM\\n SELF.thing := MATCHTEXT(pThing);\\n SELF.day := MATCHTEXT(pDay);\\n SELF.remainder := MATCHTEXT(pRemainder);\\nEND;\\n\\nresult := PARSE\\n (\\n ds,\\n s,\\n pLine,\\n ExtractData(LEFT),\\n FIRST\\n );\\n\\nOUTPUT(result);
\\nResults:\\n\\nthing day remainder\\n============================================================\\nVideo Player Tuesday text I do not care about here\\nHaptic Player Thursday \\nOlfactory Player Monday \\nAudio Player Saturday more ignored text\\n
\\nEdit: Note the use of FIRST as flag to the PARSE() command. Without that, you'll get every possible pattern match. In this case, you will get two records for each of the incoming records that have 'remainder text' because they match with and without the pRemainder pattern.\\n\\nEdit2: I reread your original post and realized that the remainder text may not be optional, you just don't care about it. In that case, you can remove the OPT() clause in this pattern and it should still work. You could probably remove the FIRST flag on the PARSE() statement as well. You might need to wrap some of the assignments in the transform with TRIM() to make sure you don't get spaces mixed in.\\n\\nHope this helps.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2014-12-12 16:00:06\" },\n\t{ \"post_id\": 6685, \"topic_id\": 1541, \"forum_id\": 8, \"post_subject\": \"PATTERN - Not Empty\", \"username\": \"househippo\", \"post_text\": \"I have a string below\\n\\nVideo Player - Tuesday - text I do not care about here
\\n\\nIn the last part of the string I do not want to parse it. In fact I just want to keep it as is. I have a pattern match that works fine for everything before the last part but how do I PATTERN the last part of "text I do not care about here" or just make sure its not empty.\\n\\nThank you\", \"post_time\": \"2014-12-12 08:18:44\" },\n\t{ \"post_id\": 6704, \"topic_id\": 1543, \"forum_id\": 8, \"post_subject\": \"Re: publish the repository source through Command line tools\", \"username\": \"Balachandar\", \"post_text\": \"Richard,\\n I got a solution there is an option to access the repository source in ecl.exe.\\nThe syntax is the below one\\n\\necl run --target=<cluster> --main=<repository path : folder.eclquery>\\n\\nThanks,\\nBala\", \"post_time\": \"2014-12-17 08:03:15\" },\n\t{ \"post_id\": 6701, \"topic_id\": 1543, \"forum_id\": 8, \"post_subject\": \"Re: publish the repository source through Command line tools\", \"username\": \"Balachandar\", \"post_text\": \"I have looked at the query tab in the work unit nothing is there. Through ECLplus the ecl code it runs directly instead of how to compile and publish the code like eclcommand utility\", \"post_time\": \"2014-12-16 07:31:06\" },\n\t{ \"post_id\": 6700, \"topic_id\": 1543, \"forum_id\": 8, \"post_subject\": \"Re: publish the repository source through Command line tools\", \"username\": \"Balachandar\", \"post_text\": \"Hi Richard,\\n\\nI tried as you said i got the below error\\n\\nC:\\\\Program Files (x86)\\\\HPCCSystems\\\\4.2.0\\\\clienttools\\\\bin>Eclplus server=10.144.2\\n40.4 cluster=roxie ecl=$Bala.RollUpEx\\nWorkunit W20141216-021744 submitted\\n[color=#FF4000:rs7ioj04]<Error><source>eclserver</source><line>1</line><code>2167</code><message>Unknown\\n identifier before "." (expected :=)</message></Error>\", \"post_time\": \"2014-12-16 07:23:18\" },\n\t{ \"post_id\": 6699, \"topic_id\": 1543, \"forum_id\": 8, \"post_subject\": \"Re: publish the repository source through Command line tools\", \"username\": \"rtaylor\", \"post_text\": \"Bala,\\n\\nHere's the old text (no longer in the docs) that talks about running central repository code with the ECLplus.exe program.\\n\\nThis first is the description of the ecl= command line option:ecl= The ECL code to execute. Optionally, this may be replaced by the name of an input file containing the ECL to execute (in the form: @inputfile), or the name of a stored attribute to execute (in the form: $module.attribute).
And this next one shows an example of the third form:In the third form, your ECL code is in an attribute in the Repository. For example, if you have a module A that contains an attribute B, then you can run it directly with:\\n\\n C:\\\\>Eclplus $Training.Query_Utilization_Stat
HTH,\\n\\nRichard\", \"post_time\": \"2014-12-15 16:05:24\" },\n\t{ \"post_id\": 6698, \"topic_id\": 1543, \"forum_id\": 8, \"post_subject\": \"Re: publish the repository source through Command line tools\", \"username\": \"rtaylor\", \"post_text\": \"Bala,\\n\\nUsing a central ECL code Repository (a MySQL database) is legacy technology from the pre-Open Source days. It will be deprecated. As long as your code is in that central repository you will need to use ECLplus.exe instead of ECL.exe. ECLplus.exe is also documented in the ClientTools.PDF.\\n\\nOnce you migrate to local repositories, then you will be able to use ECL.exe.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-12-15 15:48:23\" },\n\t{ \"post_id\": 6697, \"topic_id\": 1543, \"forum_id\": 8, \"post_subject\": \"Re: publish the repository source through Command line tools\", \"username\": \"Balachandar\", \"post_text\": \"Yes Richard. We are using central repository. Planning to do some workunits run from java code through the client tools. But i couldn't run the repository code from my local machine.\\nIn this example i have created sample.ecl in the local directory. The below query ran successfully.\\n 'ecl run--target=thor100_240_a --name=samplepub --activate sample.ecl'\\n\\nThe same way i have to run the central repository code like this \\n 'ecl run--target=thor100_240_a --name=samplepub --activate <repository ecl file>'\\n\\nThanks\", \"post_time\": \"2014-12-15 15:41:36\" },\n\t{ \"post_id\": 6696, \"topic_id\": 1543, \"forum_id\": 8, \"post_subject\": \"Re: publish the repository source through Command line tools\", \"username\": \"rtaylor\", \"post_text\": \"Bala,\\n\\nAre you working with a central Repository? If so, specifically what do you want to accomplish?\\n\\nRichard\", \"post_time\": \"2014-12-15 15:32:12\" },\n\t{ \"post_id\": 6695, \"topic_id\": 1543, \"forum_id\": 8, \"post_subject\": \"Re: publish the repository source through Command line tools\", \"username\": \"Balachandar\", \"post_text\": \"Richard,\\n\\nI went through the pdf and i tried but i couldn't run the repository sources through ecl command line utility. I can able to run only in the local directory files.\\n\\nThanks,\\nBala\", \"post_time\": \"2014-12-15 15:25:39\" },\n\t{ \"post_id\": 6694, \"topic_id\": 1543, \"forum_id\": 8, \"post_subject\": \"Re: publish the repository source through Command line tools\", \"username\": \"rtaylor\", \"post_text\": \"Bala,\\n\\nThat should all be covered in the Client Tools PDF, available here: http://hpccsystems.com/download/docs/eclide-and-clienttools\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-12-15 15:12:58\" },\n\t{ \"post_id\": 6693, \"topic_id\": 1543, \"forum_id\": 8, \"post_subject\": \"Re: publish the repository source through Command line tools\", \"username\": \"Balachandar\", \"post_text\": \"Hi Richard,\\n I have created ecl files through ECL IDE and it stored it in ESP server repository. How to run or deploy that ecl files using command line tools.\\n\\nThanks,\\nBala\", \"post_time\": \"2014-12-15 15:10:11\" },\n\t{ \"post_id\": 6692, \"topic_id\": 1543, \"forum_id\": 8, \"post_subject\": \"Re: publish the repository source through Command line tools\", \"username\": \"rtaylor\", \"post_text\": \"Balachandar,\\n\\nI think you might be confusing the meaning of the word "publish" in the HPCC environment. \\n\\nThe "publish" action you accomplished with the ecl.exe command line tool might also be termed "deploy" since that is what it actually does -- deploys the compiled .SO for the query to the specified cluster (in this case, thor100_240_a), making that pre-compiled query available through SOAP or JSON calls from an end-user GUI. \\n\\nHowever, saying you want to "publish the repository source" indicates to me that you want to see the ECL code itself "published" in some manner. Since the ECL source code files are simply text files on disk, you can just open them in any text editor. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-12-15 15:02:36\" },\n\t{ \"post_id\": 6690, \"topic_id\": 1543, \"forum_id\": 8, \"post_subject\": \"publish the repository source through Command line tools\", \"username\": \"Balachandar\", \"post_text\": \"I publish successfully the local ecl code through command line \\t'ecl publish --target=thor100_240_a --name=samplepub --activate sample.ecl' the same way publish the repository source. Is there any way to handle this ??\", \"post_time\": \"2014-12-15 07:50:00\" },\n\t{ \"post_id\": 6713, \"topic_id\": 1545, \"forum_id\": 8, \"post_subject\": \"Re: Roxie requires constant filenames Error\", \"username\": \"Balachandar\", \"post_text\": \"Thanks Richard. I ran in roxie instead of thor, thats why i am getting this error.\", \"post_time\": \"2014-12-22 07:33:22\" },\n\t{ \"post_id\": 6706, \"topic_id\": 1545, \"forum_id\": 8, \"post_subject\": \"Re: Roxie requires constant filenames Error\", \"username\": \"rtaylor\", \"post_text\": \"Bala,\\n\\nThere are two fundamentally different purposes for Thor and Roxie. Thor is designed to do data ETL work (transforming raw input data into final product data) while Roxie is designed to serve that final product data to end users.\\n\\nYour code looks like the type of "go get some raw data" operation that Thor does very well and Roxie is not designed to do at all. So why are you trying to do this on Roxie? \\n\\nRichard\", \"post_time\": \"2014-12-17 15:52:49\" },\n\t{ \"post_id\": 6703, \"topic_id\": 1545, \"forum_id\": 8, \"post_subject\": \"Roxie requires constant filenames Error\", \"username\": \"Balachandar\", \"post_text\": \"Error: Roxie requires constant filenames - expression alias cannot be computed at deployment time (8, 25) , 4029, AppSharedSvc_Recommendation_Collab_BWR_Files_CD.NormalizeCoreDataItem_Module\\n\\nMy ECL code is below and highlighted line number for error \\n\\nIMPORT STD;\\nIMPORT lib_fileservices.fileservices;\\nIMPORT AppSharedSvc_Recommendation_CollabAutomation_Process.Collab_Sequential_Module;\\nIMPORT AppSharedSvc_Recommendation_CollabAutomation_Process.Configurations_File_Module;\\n\\nSTRING dateValue := '2013-12-01' : STORED('dateValue');\\n\\ngetFileName(STRING path) := FUNCTION\\n[color=#FF4040:1tc5nl69] fileList := FileServices.RemoteDirectory(Configurations_File_Module.landingZoneIp, path, '*.xml');\\nRETURN fileList[1].name;\\nEND;\", \"post_time\": \"2014-12-17 07:45:32\" },\n\t{ \"post_id\": 6820, \"topic_id\": 1547, \"forum_id\": 8, \"post_subject\": \"Re: ECL code to confirm all clusters are running\", \"username\": \"balajisampath\", \"post_text\": \"Here is sample SOAP call code to get workunit information\\n\\nSTRING URL := IP+':'+(STRING)Port+'/WsWorkunits/';\\n\\n\\n\\n WuinfoInRecord :=\\n\\trecord, maxlength(100)\\n\\t\\tstring eclWorkunit{xpath('Wuid')} := WUId;\\n\\tend;\\n\\nrESPExceptions\\t:=\\nRECORD\\n\\tstring\\t\\tCode{XPATH('Code'),maxlength(10)};\\n\\tstring\\t\\tAudience{XPATH('Audience'),maxlength(50)};\\n\\tstring\\t\\tSource{XPATH('Source'),maxlength(30)};\\n\\tstring\\t\\tMessage{XPATH('Message'),maxlength(200)};\\nEND;\\nsrcFile := RECORD\\n\\tString FILEName {XPATH('Name')};\\n\\tString Cluster {XPATH('Cluster')};\\n\\tSTRING RecCount {XPATH('Count')};\\nEND; \\n\\n\\nResultFiles_Lay := RECORD\\n\\n\\tINTEGER Seq {XPATH('Sequence')};\\n\\tString Values {XPATH('Value')};\\n\\tString Link {XPATH('Link')};\\n\\tString FileName {XPATH('FileName')};\\n\\tSTRING XmlSchema {XPATh('XmlSchema')};\\n\\t\\n\\t\\nEND;\\n\\nWuinfoResponse\\t:= RECORD\\n\\n\\tstring\\tOwner{XPATH('Workunit/Owner'),maxlength(20)};\\n\\tstring\\tCluster{XPATH('Workunit/Cluster'),maxlength(20)};\\n\\tSTRING Jobname{xpath('Workunit/Jobname'),maxlength(20)};\\n\\tSTRING State{xpath('Workunit/State'),maxlength(20)};\\n\\tSTRING Query{xpath('Workunit/Query'),maxlength(100)};\\n\\tdataset(srcFile) \\tSrcFiles{xpath('Workunit/SourceFiles/ECLSourceFile')};\\n\\tdataset(rESPExceptions)\\t\\tExceptions{XPATH('Exceptions/ESPException'),maxcount(110)};\\n\\tdataset(ResultFiles_Lay) \\tResultFiles{xpath('Workunit/Results/ECLResult')};\\nEND;\\n\\nWuInfoSoapCall\\t:=\\t\\nsoapcall(URL\\n\\t,'WUInfo'\\n\\t, wuinfoInRecord\\n\\t,wuinfoResponse\\n\\t,XPATH('WUInfoResponse')\\n\\t);\\n\\n\\noutput(WuInfoSoapCall);
\", \"post_time\": \"2015-01-21 15:31:50\" },\n\t{ \"post_id\": 6819, \"topic_id\": 1547, \"forum_id\": 8, \"post_subject\": \"Re: ECL code to confirm all clusters are running\", \"username\": \"ArjunKumar\", \"post_text\": \"Hi Balaji,\\n\\nwould you please share the sample code if you have anything. I never used SOAPCALL api and i am bit confused about it. Thanks in advance.\\n\\nThanks,\\nArjun\", \"post_time\": \"2015-01-21 10:31:29\" },\n\t{ \"post_id\": 6738, \"topic_id\": 1547, \"forum_id\": 8, \"post_subject\": \"Re: ECL code to confirm all clusters are running\", \"username\": \"Gleb Aronsky\", \"post_text\": \"You can use Nagios and/or Ganglia to monitor HPCC clusters. You can find more information on using Nagios and Ganglia with HPCC here: \\n\\nhttp://hpccsystems.com/download/docs/hp ... al-preview\\n\\nAnd get the downloads from here:\\n\\nhttp://hpccsystems.com/download/free-co ... monitoring\\n\\nThe HPCC VM that is available on the portal demonstrates some of this functionality out of the box.\\n\\nIf you just want daily checks that alert when there is an outage, then Nagios would probably be what you are looking for.\\n\\n-Gleb\", \"post_time\": \"2014-12-31 14:41:08\" },\n\t{ \"post_id\": 6730, \"topic_id\": 1547, \"forum_id\": 8, \"post_subject\": \"Re: ECL code to confirm all clusters are running\", \"username\": \"balajisampath\", \"post_text\": \"Arjun,\\n\\nYou can get the information by a SOAP call to ws_machine/GetMachineInfo.\\nSOAP call will return the Condition, State,Processes Down, Physical Memory,CPU Load etc.,\\n\\nPlease check the WSDL documentation for more information\\n\\nThanks,\\nBalaji\", \"post_time\": \"2014-12-29 21:20:02\" },\n\t{ \"post_id\": 6722, \"topic_id\": 1547, \"forum_id\": 8, \"post_subject\": \"Re: ECL code to confirm all clusters are running\", \"username\": \"ArjunKumar\", \"post_text\": \"Hi Richard,\\n\\nwe want to check it on daily basis, so that we have a plan to write some code and scheduled it. Is there any other way to do it. Please suggest.\\n\\nThanks,\\nArjun\", \"post_time\": \"2014-12-23 06:49:48\" },\n\t{ \"post_id\": 6716, \"topic_id\": 1547, \"forum_id\": 8, \"post_subject\": \"Re: ECL code to confirm all clusters are running\", \"username\": \"rtaylor\", \"post_text\": \"Arjun,\\n\\nNo, ECL is not designed to do that. You have the ECL Watch website for your environment that provides that kind of capability.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-12-22 17:30:31\" },\n\t{ \"post_id\": 6714, \"topic_id\": 1547, \"forum_id\": 8, \"post_subject\": \"ECL code to confirm all clusters are running\", \"username\": \"ArjunKumar\", \"post_text\": \"Hi Team,\\n\\nIs there any ECL code is available to confirm all the Clusters(THOR / ROXIE) are Up and Running without any issues. Please share if you have anything.\\n\\nThanks,\\nArjun\", \"post_time\": \"2014-12-22 07:44:29\" },\n\t{ \"post_id\": 6724, \"topic_id\": 1548, \"forum_id\": 8, \"post_subject\": \"Re: ECL Watch\", \"username\": \"chennapooja\", \"post_text\": \"Thanks Richard.\\n\\nIts working now \", \"post_time\": \"2014-12-23 16:32:00\" },\n\t{ \"post_id\": 6723, \"topic_id\": 1548, \"forum_id\": 8, \"post_subject\": \"Re: ECL Watch\", \"username\": \"rtaylor\", \"post_text\": \"Pooja,\\n\\nNotice that the URL begins with HTTPS and not HTTP. That means you need to check the SSL box and then all the ports will automatically configure themselves to the SSL defaults for a secure connection across the internet (instead of just across a network).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-12-23 14:42:59\" },\n\t{ \"post_id\": 6721, \"topic_id\": 1548, \"forum_id\": 8, \"post_subject\": \"Re: ECL Watch\", \"username\": \"chennapooja\", \"post_text\": \"Thanks Richard.\\n\\nUsing Legacy ECL watch, I am able to upload and spray the file. Now, in ECL IDE if I give the IP "216.19.105.7" in preferences and say OK, I am not able to connect to the server. Please let me know what might go wrong here.\\n\\nThanks,\\nPooja.\", \"post_time\": \"2014-12-23 04:44:45\" },\n\t{ \"post_id\": 6720, \"topic_id\": 1548, \"forum_id\": 8, \"post_subject\": \"Re: ECL Watch\", \"username\": \"rtaylor\", \"post_text\": \"Pooja,\\n\\nOK, I duplicated the problem. It appears that the new 5.0 interface is missing the ability to choose which directory to do uploads to. Trying to upload to "mydropzone" creates the problem. You need to upload to one of the directories below that \\n\\nYour workaround is to click on the three lines at the top right corner of the window and select "Open Legacy ECL Watch" then use the old interface to upload your file(s) to:\\n
/var/landingzone/dropzone/download/
\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-12-22 22:52:09\" },\n\t{ \"post_id\": 6719, \"topic_id\": 1548, \"forum_id\": 8, \"post_subject\": \"Re: ECL Watch\", \"username\": \"chennapooja\", \"post_text\": \"Dear Richard,\\n\\n Yes, I am able to open the link with given credentials. Then I tried to upload one file in landing zone so that it can be sprayed to the cluster. But while upload itself .... I am getting error saying permission denied. Once if spray is successful, we want to test machine learning algorithms with some big data.\\n\\nRegards,\\nPooja.\", \"post_time\": \"2014-12-22 22:06:46\" },\n\t{ \"post_id\": 6718, \"topic_id\": 1548, \"forum_id\": 8, \"post_subject\": \"Re: ECL Watch\", \"username\": \"rtaylor\", \"post_text\": \"Pooja,\\n\\nThat is the URL for our external training cluster's ECL Watch page. Are you able to open that page with the login credentials I emailed to you on 12/12/14? If so, then what exactly are you trying to do that causes this error?\\n\\nRichard\", \"post_time\": \"2014-12-22 21:16:35\" },\n\t{ \"post_id\": 6717, \"topic_id\": 1548, \"forum_id\": 8, \"post_subject\": \"ECL Watch\", \"username\": \"chennapooja\", \"post_text\": \"Hello,\\n\\n We are trying to use URL https://216.19.105.7:18010 where upload is failing with permission denied error. May I know whether using this URL, needs some kind of access permissions, if so, please provide me contacts. \\n\\nThanks,\\nPooja.\", \"post_time\": \"2014-12-22 18:38:30\" },\n\t{ \"post_id\": 6743, \"topic_id\": 1550, \"forum_id\": 8, \"post_subject\": \"Re: Cryptographic functions\", \"username\": \"DSC\", \"post_text\": \"OK. First, here is some working code specifically for SHA-256:\\n\\nDATA DoHash(STRING s) := BEGINC++\\n #include <openssl/evp.h>\\n #include <openssl/crypto.h>\\n \\n #body\\n \\n EVP_MD_CTX* mdctxPtr = EVP_MD_CTX_create();\\n const unsigned int kMaxDigestSize = EVP_MAX_MD_SIZE;\\n unsigned int finalDigestSize = 0;\\n unsigned char digest[kMaxDigestSize];\\n \\n if (mdctxPtr)\\n {\\n if (EVP_DigestInit_ex(mdctxPtr, EVP_sha256(), NULL) == 1)\\n {\\n if (EVP_DigestUpdate(mdctxPtr, s, lenS) == 1)\\n {\\n if (EVP_DigestFinal_ex(mdctxPtr, digest, &finalDigestSize) == 1)\\n {\\n __lenResult = finalDigestSize;\\n __result = reinterpret_cast<char*>(rtlMalloc(__lenResult));\\n memcpy(__result,digest,__lenResult);\\n }\\n }\\n }\\n \\n EVP_MD_CTX_destroy(mdctxPtr);\\n }\\nENDC++;\\n\\n//--------------------------------------\\n\\nDoHash('this is a test');\\n\\n// hex result = 2E99758548972A8E8822AD47FA1017FF72F06F3FF6A016851F45C398732BC50C
\\nThere is a caveat, however. My earlier thinking regarding OpenSSL availability was possibly incorrect. Most of the 4.x and 5.x clusters I tested this against did not have OpenSSL installed. Or, if it was installed, it was installed in a different location and I didn't find it (a runtime error of "openssl/evp.h: no such file" is something like what you will see in this case). Maybe someone else will come along and tell me I'm wrong about all this. Anyway, I did successfully test the code against an HPCC 5.x cluster (CentOS) I built from source which had OpenSSL installed in its normal location (headers in /usr/include/openssl/).\\n\\n(Edit: What may actually be happening is that HPCC itself uses OpenSSL and is linked against the appropriate libraries, but when you write embedded C++ code you also need access to the OpenSSL header files. An RPM installation of HPCC requires the libraries but won't install those header files; you will have to install them yourself.)\\n\\nI would highly recommend going with a function like this if possible for performance reasons if you're dealing with a large number of hashes and can ensure that OpenSSL is installed. Otherwise, you can use the PIPE command and execute an external binary to compute the hash.\\n\\nFinal note: It's possible to run an HPCC cluster that was not built with OpenSSL on an OS that has OpenSSL installed. In that situation, the OpenSSL libraries won't be included in the HPCC executable and you'll have to import those libraries at runtime. The way to do that is to include this line somewhere in the toplevel Thor or Roxie code:\\n\\n#OPTION('linkOptions','-lssl -lcrypto');
\\nHope this helps.\\n\\nDan\", \"post_time\": \"2014-12-31 17:29:24\" },\n\t{ \"post_id\": 6742, \"topic_id\": 1550, \"forum_id\": 8, \"post_subject\": \"Re: Cryptographic functions\", \"username\": \"lpezet\", \"post_text\": \"Sorry for the confusion.\\n\\nThanks a lot Dan!\", \"post_time\": \"2014-12-31 16:24:08\" },\n\t{ \"post_id\": 6741, \"topic_id\": 1550, \"forum_id\": 8, \"post_subject\": \"Re: Cryptographic functions\", \"username\": \"DSC\", \"post_text\": \"Hi Luc,\\n\\nI should read things more closely. I thought you were looking for encryption/decryption, not hashing. Hashing is considerably easier, as you don't have to deal with quickly and securely generating IVs and such.\\n\\nPIPE should work, provided you know that you have a binary to call (and where it is) on each node. There is a performance hit though, as the system will do an internal fork-and-execv set of steps for each call to the binary. If you're dealing with a large number of records, you will certainly want something with a little more performance.\\n\\nI'll keep looking at this, focusing on hash functions this time. An ECL code module would be a much cleaner solution, I think.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2014-12-31 16:18:26\" },\n\t{ \"post_id\": 6740, \"topic_id\": 1550, \"forum_id\": 8, \"post_subject\": \"Re: Cryptographic functions\", \"username\": \"lpezet\", \"post_text\": \"Hi Dan!\\n\\nI'm actually looking for secure hash functions, and if I may be picky I'd even say SHA-256.\\nIn the meantime, should I just be using PIPE and output say "key,hashed key", to JOIN it with my data and keep only the "hashed key" from that point on?\\n\\nThanks!\\nLuc.\", \"post_time\": \"2014-12-31 16:02:17\" },\n\t{ \"post_id\": 6739, \"topic_id\": 1550, \"forum_id\": 8, \"post_subject\": \"Re: Cryptographic functions\", \"username\": \"DSC\", \"post_text\": \"Hi Luc,\\n\\nAs you found, there aren't any built-in cryptographic functions beyond MD5 (which is really a hash). The platform, however, does support OpenSSL and if it is enabled (which I believe is the default) then the low-level functionality does exist. I'll take a look at an implementation. You're strictly looking for symmetric encryption/decryption, right?\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2014-12-31 15:17:46\" },\n\t{ \"post_id\": 6735, \"topic_id\": 1550, \"forum_id\": 8, \"post_subject\": \"Re: Cryptographic functions\", \"username\": \"lpezet\", \"post_text\": \"...well MD5 is a crypto function but still...SHA is the minimum I need here \", \"post_time\": \"2014-12-30 20:33:54\" },\n\t{ \"post_id\": 6733, \"topic_id\": 1550, \"forum_id\": 8, \"post_subject\": \"Cryptographic functions\", \"username\": \"lpezet\", \"post_text\": \"Hello!\\n\\nIs there any cryptographic hashing functions in ECL?\\nI know of the HASH and Co. functions but as far as I can tell (correct me if I'm wrong) they are all non-cryptographic (including FNV).\\n\\nWhat I'm trying to accomplish here is hashing sensitive information (i.e. some fields, not all) before sending over the data to production Roxie cluster for web delivery of that same data. Only SHA-type hashing functions are approved by NIST for example.\\n\\n\\nThanks!\\nLuc.\", \"post_time\": \"2014-12-30 18:28:41\" },\n\t{ \"post_id\": 6799, \"topic_id\": 1559, \"forum_id\": 8, \"post_subject\": \"Re: Compute Pairs of Fields\", \"username\": \"rtaylor\", \"post_text\": \"Viswa,\\n\\nI noticed that my previous example code was not quite correct, in that it would aallow the same author pairs if duplicated more than once. Here's the corrected code that should work in all cases:
Rec := RECORD\\n UNSIGNED ID;\\n STRING10 Author;\\nEND; \\n\\nds := DATASET([ {1, 'A1'},\\n {1, 'A2'},\\n {1, 'A3'},\\n {2, 'A1'} ,\\n {2, 'A2'} ,\\n {2, 'A4'} ,\\n {3, 'A5'},\\n {3, 'A6'}, \\n {3, 'A7'}, \\n {4, 'A1'},\\n {4, 'A2'},\\n {4, 'A4'},\\n {4, 'A5'},\\n {5, 'A9'}],Rec);\\n\\n\\nAuthors := RECORD\\n STRING10 Author1;\\n STRING10 Author2;\\nEND;\\nOutRec := RECORD\\n ds.ID;\\n Authors;\\nEND;\\n\\nOutRec XF(ds L, ds R) := TRANSFORM\\n SELF.Author1 := L.Author;\\n SELF.Author2 := R.Author;\\n SELF := L;\\nEND;\\n\\nJres := JOIN(ds, ds, \\n LEFT.ID=RIGHT.ID AND \\n LEFT.Author <> RIGHT.Author,\\n XF(LEFT,RIGHT), LEFT OUTER);\\nJres;\\nGrp1 := GROUP(SORT(Jres,ID),ID);\\nDres := DEDUP(Grp1,LEFT.Author1 = RIGHT.Author2 AND LEFT.Author2 = RIGHT.Author1,ALL);\\nDres;\\n\\nHashRec := RECORD\\n outRec;\\n\\tUNSIGNED Hash1;\\n\\tUNSIGNED Hash2;\\nEND;\\t\\nHashRec AddHash(Dres L, INTEGER C) := TRANSFORM\\n SELF.ID := C;\\n\\tSELF.Hash1 := HASH64(L.Author1,L.Author2);\\n\\tSELF.Hash2 := HASH64(L.Author2,L.Author1);\\n\\tSELF := L;\\nEND;\\n\\nHashed := PROJECT(UNGROUP(Dres),AddHash(LEFT,COUNTER));\\nHashed;\\n\\nDup1 := JOIN(Hashed,Hashed,LEFT.ID<>RIGHT.ID AND LEFT.Hash1=RIGHT.Hash1);\\t\\n\\nSetDups1 := SET(Dup1,ID);\\nFirstCut := Hashed(ID NOT IN SetDups1);\\n\\nDup2 := JOIN(FirstCut,FirstCut,LEFT.ID<>RIGHT.ID AND LEFT.Hash1=RIGHT.Hash2);\\t\\n\\nAllDups := SORT(Dup1 + Dup2, Hash1, ID);\\nKeepers := DEDUP(AllDups,Hash1);\\n\\nSetDupIDs := SET(AllDups(ID NOT IN SET(Keepers,ID)),ID);\\nFinalRes := PROJECT(Hashed(ID NOT IN SetDupIDs),Authors);\\n\\nSORT(FinalRes,Author1,Author2);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-01-16 16:46:26\" },\n\t{ \"post_id\": 6790, \"topic_id\": 1559, \"forum_id\": 8, \"post_subject\": \"Re: Compute Pairs of Fields\", \"username\": \"rtaylor\", \"post_text\": \"Viswa,Incase we require unique pairs , Is it possible to carry a global dedup without ALL ?
Short answer -- no. A global DEDUP,ALL will generate a runtime error telling you ALL is not supported on a global DEDUP.Sorting accordingly before the join should solve this problem.
Again, the answer is NO -- there is no way to sort such that the A1,A2 and A2,A1 pairs will be be appropriately deduped to a single A1,A2 pair.\\n\\nHere's my previous example that I extended to eliminate any duplicates across the articles:Rec := RECORD\\n UNSIGNED ID;\\n STRING10 Author;\\nEND; \\n\\nds := DATASET([ {1, 'A1'},\\n {1, 'A2'},\\n {1, 'A3'},\\n {2, 'A1'} ,\\n {2, 'A2'} ,\\n {2, 'A4'} ,\\n {3, 'A5'},\\n {3, 'A6'}, \\n {3, 'A7'}, \\n {4, 'A2'},\\n {4, 'A4'},\\n {4, 'A5'},\\n {5, 'A9'}],Rec);\\n\\n\\nAuthors := RECORD\\n STRING10 Author1;\\n STRING10 Author2;\\nEND;\\nOutRec := RECORD\\n ds.ID;\\n Authors;\\nEND;\\n\\nOutRec XF(ds L, ds R) := TRANSFORM\\n SELF.Author1 := L.Author;\\n SELF.Author2 := R.Author;\\n SELF := L;\\nEND;\\n\\nJres := JOIN(ds, ds, \\n LEFT.ID=RIGHT.ID AND \\n LEFT.Author <> RIGHT.Author,\\n XF(LEFT,RIGHT), LEFT OUTER);\\nJres;\\n\\nGrp1 := GROUP(SORT(Jres,ID),ID);\\nDres := DEDUP(Grp1,LEFT.Author1 = RIGHT.Author2 AND LEFT.Author2 = RIGHT.Author1,ALL);\\nDres;\\n\\nHashRec := RECORD\\n outRec;\\n UNSIGNED Hash1;\\n UNSIGNED Hash2;\\nEND;\\t\\nHashRec AddHash(Dres L, INTEGER C) := TRANSFORM\\n SELF.ID := C;\\n SELF.Hash1 := HASH64(L.Author1,L.Author2);\\n SELF.Hash2 := HASH64(L.Author2,L.Author1);\\n SELF := L;\\nEND;\\nHashed := PROJECT(UNGROUP(Dres),AddHash(LEFT,COUNTER));\\nHashed;\\n\\nDup1 := JOIN(Hashed,Hashed,LEFT.ID<>RIGHT.ID AND LEFT.Hash1=RIGHT.Hash1);\\t\\nDup2 := JOIN(Hashed,Hashed,LEFT.ID<>RIGHT.ID AND LEFT.Hash1=RIGHT.Hash2);\\t\\n\\nDups := DEDUP(SORT(Dup1 + Dup2, Hash1, -ID),Hash1);\\nSetDupIDs := SET(Dups,ID);\\nSetDupIDs;\\n\\nFinalRes := PROJECT(Hashed(ID NOT IN SetDupIDs),Authors);\\nSORT(FinalRes,Author1,Author2);
Notice that in my previous code section I added the Authors RECORD structure and changed the OutRec RECORD structure, and I also renamed the "article" field to an "ID" field. Other than that, all the rest of the changes follow the Dres from the previous example.\\n\\nI start by doing a PROJECT to change the ID field content from the article number to a record number and at the same time add two HASH64 values on the pair of author fields. \\n\\nI then define two self JOINs that will find all the duplicated author pairs. I split this into two to avoid the need for an ALL option on the one JOIN form. These JOINs find the duplicated author pairs by comparing the HASH64 values.\\n\\nThe SORT/DEDUP on the combined JOIN results then gives me the SET of record IDs to remove, so that the final PROJECT just filters out the duplicates and strips off the extraneous fields from the final result. \\n\\nThe last SORT of the final result just makes it easy to see that we've achieved what we set out to do.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-01-14 15:49:42\" },\n\t{ \"post_id\": 6789, \"topic_id\": 1559, \"forum_id\": 8, \"post_subject\": \"Re: Compute Pairs of Fields\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nSorting accordingly before the join should solve this problem.\\n\\nThanks.\\n\\nViswa\", \"post_time\": \"2015-01-14 14:38:53\" },\n\t{ \"post_id\": 6787, \"topic_id\": 1559, \"forum_id\": 8, \"post_subject\": \"Re: Compute Pairs of Fields\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nIncase we require unique pairs , Is it possible to carry a global dedup without ALL ?\\n\\nThere can be scenarios where the pairs are interchanged across all the articles.\\n\\nArticle1 : A1 , A2 , A3\\narticle2 : A2 , A1\\narticle3 : A1 , A3\\n\\nIs it possible to have something similar like this ?\\n\\nUnique Pairs across all the articles:\\n\\nA1 , A2 \\nA1 , A3\\nA2 , A3\\n\\n\\n\\n { A1 , A2 and A2 , A1} are similar pairs but interchanged accordingly.\\n\\n{ A1 , A3 and A1 , A3} can be deduped once we ungroup the same and dedup accordingly.\\n\\nThanks a lot in advance.\\n\\nViswa\", \"post_time\": \"2015-01-14 13:55:52\" },\n\t{ \"post_id\": 6781, \"topic_id\": 1559, \"forum_id\": 8, \"post_subject\": \"Re: Compute Pairs of Fields\", \"username\": \"ksviswa\", \"post_text\": \"Thanks Richard...\", \"post_time\": \"2015-01-12 19:45:49\" },\n\t{ \"post_id\": 6780, \"topic_id\": 1559, \"forum_id\": 8, \"post_subject\": \"Re: Compute Pairs of Fields\", \"username\": \"rtaylor\", \"post_text\": \"Viswa,\\n\\nHere's code that does what you want, using a self JOIN and a DEDUP, ALL:
Rec := RECORD\\n UNSIGNED1 Article;\\n\\tSTRING10 Author;\\nEND;\\t\\n\\nds := DATASET([ {1, 'A1'},\\n {1, 'A2'},\\n {1, 'A3'},\\n {2, 'A1'} ,\\n {2, 'A4'} ,\\n {3, 'A5'},\\n {3, 'A6'}, \\n {3, 'A7'}, \\n {4, 'A2'},\\n {4, 'A4'},\\n {4, 'A5'},\\n {5, 'A9'}],Rec);\\n\\nOutRec := RECORD\\n ds.Article;\\n STRING10 Author1;\\n STRING10 Author2};\\nEND;\\n\\nOutRec XF(ds L, ds R) := TRANSFORM\\n SELF.Author1 := L.Author;\\n SELF.Author2 := R.Author;\\n SELF := L;\\nEND;\\n\\nJres := JOIN(ds, ds, \\n LEFT.Article=RIGHT.article AND \\n LEFT.Author <> RIGHT.Author,\\n XF(LEFT,RIGHT), LEFT OUTER);\\nJres;\\nGrp1 := GROUP(SORT(Jres,article),article);\\nDres := DEDUP(Grp1,LEFT.Author1 = RIGHT.Author2 AND LEFT.Author2 = RIGHT.Author1,ALL);\\nDres;
You need to use the GROUP function so the ALL option on DEDUP will work (you can't do a global DEDUP, ALL), which is why I carried forward the Article ID to GROUP by. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-01-12 14:48:16\" },\n\t{ \"post_id\": 6779, \"topic_id\": 1559, \"forum_id\": 8, \"post_subject\": \"Compute Pairs of Fields\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nI have a scenario where am planning to get all of the possible pairs of authors for a particular article.\\n\\nEx : \\n\\nArticle Id1 : A1 , A2 , A3 , A4\\nArticle Id2 : A1, A4 \\nArticle Id3 : A5 , A6 , A7 \\nArticle Id4 : A2 , A4 , A5\\nArticle Id5 : A9\\n\\nOutput :\\n\\nA1 , A2 \\nA1 , A3\\nA1 , A4\\nA2 , A3\\nA2 , A4 \\nA3 , A4\\nA1 , A4\\nA5 , A6\\nA5 , A7\\nA6 , A7\\nA2 , A4\\nA2 , A5\\nA4 , A5\\nA9 \\n\\nTried using project and normalize , not able to get the required output.\\n\\nAny thoughts on the same.\\n\\nThanks a lot in advance.\\n\\nRegards,\\nViswa\", \"post_time\": \"2015-01-12 14:08:46\" },\n\t{ \"post_id\": 6783, \"topic_id\": 1560, \"forum_id\": 8, \"post_subject\": \"Re: JOIN() on a CSV-File\", \"username\": \"rtaylor\", \"post_text\": \"NSD,\\n\\nYour first two error messages are telling you that your first two parameters to the JOIN are backwards. It should be:IMPORT $, AerzteDaten, ArztQuartileDaten;\\n\\nr := RECORD\\n AerzteDaten.Class_Arzt;\\n ArztQuartileDaten.Class_Rank;\\nEND;\\n\\n\\nr Xform(AerzteDaten.Dataset_AerzteDaten AD,\\n ArztQuartileDaten.Dataset_ArztQuartileDaten AQD) := TRANSFORM\\n SELF := AD;\\n SELF := AQD;\\nEND;\\n\\n\\nJ1 := JOIN( AerzteDaten.Dataset_AerzteDaten,\\n ArztQuartileDaten.Dataset_ArztQuartileDaten,\\n LEFT.ArztID = RIGHT.ArztID,\\n Xform(LEFT,RIGHT),\\n KEYED(AerzteDaten.IDX_AerzteDaten)\\n);\\n\\nOUTPUT(J1);
\\nA full-KEYED JOIN means that you pass the name of an INDEX into the right dataset (second parameter to JOIN) as the argument to the KEYED option so that the join condition can use the INDEX to find the appropriate right dataset records before passing them to the TRANSFORM. I can't tell from your code whether your AerzteDaten.IDX_AerzteDaten is in fact an INDEX, but from your name I expect that it is, so once you get past these first two errors your next problem is the file type.\\n\\nYour last error: Error: RIGHT side of a full keyed join must be a THOR disk file (CSV/XML) not currently supported (24, 7), 2036,
says exactly what it means -- a full-KEYED JOIN must have a THOR/FLAT file as the right dataset that you index into; CSV and XML are not yet supported.\\n\\nHowever, if you really want to keep that right dataset as a CSV file (usually a lot slower to work with than THOR/FLAT files), then I suggest you make the INDEX a payload index and do a half-keyed JOIN instead. We find that we use half-keyed JOINs much more often than full-keyed.\\n\\nAs a general note, CSV and XML files are perfectly fine for receiving data and spraying it into your HPCC platform, but once you start doing real work in HPCC then THOR/FLAT files will be a much more efficient storage format to work with. That means your ETL process will typically produce THOR/FLAT files for you to do your "real" work with.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-01-13 13:51:45\" },\n\t{ \"post_id\": 6782, \"topic_id\": 1560, \"forum_id\": 8, \"post_subject\": \"JOIN() on a CSV-File\", \"username\": \"NSD\", \"post_text\": \"Hi,\\n\\nI've taken a look into the ECL Programmers Guide (page 39 ff.). There I've tried to recode the full-keyed join with following code:\\n\\nIMPORT $, AerzteDaten, ArztQuartileDaten;\\n\\nr := RECORD\\n\\tAerzteDaten.Class_Arzt;\\n\\tArztQuartileDaten.Class_Rank;\\nEND;\\n\\n\\nr Xform(AerzteDaten.Dataset_AerzteDaten AD,\\n\\t\\t\\t\\tArztQuartileDaten.Dataset_ArztQuartileDaten AQD) := TRANSFORM\\n\\t\\tSELF := AD;\\n\\t\\tSELF := AQD;\\nEND;\\n\\n\\nJ1 := JOIN(\\n\\t\\t\\tArztQuartileDaten.Dataset_ArztQuartileDaten,\\n\\t\\t\\tAerzteDaten.Dataset_AerzteDaten,\\n\\t\\t\\tLEFT.ArztID = RIGHT.ArztID,\\n\\t\\t\\tXform(LEFT,RIGHT),\\n\\t\\t\\tKEYED(AerzteDaten.IDX_AerzteDaten)\\n);\\n\\nOUTPUT(J1);
\\n\\n\\nAs an Error i got:\\nError: Parameter AD type mismatch - expected row of <unnamed>, given row of <unnamed> (28, 10), 2064,
\\n\\nError: Omitted parameter AQD has no default value (28, 4), 2062,
\\n\\nError: RIGHT side of a full keyed join must be a THOR disk file (CSV/XML) not currently supported (24, 7), 2036,
\\n\\nThe last one confuses me, does that mean, I can't join CSV-Files ?\\n\\nIf you need more code, please let me know (e.g. the Layouts[Class_] or Datasets)\", \"post_time\": \"2015-01-13 13:29:17\" },\n\t{ \"post_id\": 6796, \"topic_id\": 1564, \"forum_id\": 8, \"post_subject\": \"Re: HPCC cluster - Login\", \"username\": \"rtaylor\", \"post_text\": \"richard.taylor@lexisnexis.com\", \"post_time\": \"2015-01-15 18:25:53\" },\n\t{ \"post_id\": 6795, \"topic_id\": 1564, \"forum_id\": 8, \"post_subject\": \"Re: HPCC cluster - Login\", \"username\": \"swethareddy01\", \"post_text\": \"Hai Richard,\\n\\nCould you please tell me whom shall I contact to get the login credentials??\\n\\nThank you.\\n\\nSwetha\", \"post_time\": \"2015-01-15 18:23:03\" },\n\t{ \"post_id\": 6794, \"topic_id\": 1564, \"forum_id\": 8, \"post_subject\": \"Re: HPCC cluster - Login\", \"username\": \"rtaylor\", \"post_text\": \"Swetha,\\n\\nThat cluster is an external-facing training cluster for use by ECL training classes. As such, the passwords and logins assigned for class use do expire and are then deleted.\\n\\nAnything further you have on this should be communicated through email, please.\\n\\nRichard\", \"post_time\": \"2015-01-15 18:14:22\" },\n\t{ \"post_id\": 6793, \"topic_id\": 1564, \"forum_id\": 8, \"post_subject\": \"HPCC cluster - Login\", \"username\": \"swethareddy01\", \"post_text\": \"Hai,\\n\\nI was unable to login hpcc cluster (https://216.19.105.7:18010) with my login credentials, where as I was able to login at the start of this month.\\n\\nPlease help me to login.\\n\\nSwetha\", \"post_time\": \"2015-01-15 17:57:22\" },\n\t{ \"post_id\": 7009, \"topic_id\": 1567, \"forum_id\": 8, \"post_subject\": \"Re: Number Format\", \"username\": \"NSD\", \"post_text\": \"thx, worked. I created a module for this, so i can easily import the function\", \"post_time\": \"2015-02-22 15:03:41\" },\n\t{ \"post_id\": 6811, \"topic_id\": 1567, \"forum_id\": 8, \"post_subject\": \"Re: Number Format\", \"username\": \"rtaylor\", \"post_text\": \"NSD,\\n\\nAs you have already noticed, ECL has no GUI aspects to it at all. The language is purely, simply, and only a data manipulation tool. The concept is that, once you have manipulated the data such that the values are correct, you will usually provide end-users with a GUI interface to access that data (usually from Roxie). That GUI interface can be a website, or an end-user app. In either case, the languages used to create those GUI interfaces are designed to do exactly that kind of formatting, so that would be the "proper" place to do it.\\n\\nWith that said, formatting numbers with commas and monetary symbols can be accomplished in ECL, but not as easily as your SQL example. You would need to write a function that would return a formatted string, something like this simple example:\\n\\nval1 := 100000;\\nval2 := 100000.5;\\nval3 := 1000;\\nval4 := 100.25;\\n\\nSTRING13 FormatMoney(REAL InVal, STRING1 Currency) := FUNCTION\\n STRING12 MoneyVal := REALFORMAT(InVal,12,2);\\n NumLen := LENGTH(TRIM(MoneyVal,ALL));\\n CommaStr := MAP(NumLen <= 6 => MoneyVal,\\n NumLen BETWEEN 7 AND 9 =>\\n MoneyVal[1..6] + ',' + MoneyVal[7..12], \\n NumLen BETWEEN 10 AND 12 =>\\n MoneyVal[1..3] + ',' + MoneyVal[4..6] + ',' + MoneyVal[7..12], \\n MoneyVal);\\n CurrStr := Currency + TRIM(CommaStr,ALL);\\n Spaces := 13 - LENGTH(CurrStr) + 1;\\n RETURN CHOOSE(Spaces,\\n CurrStr,\\n ' ' + CurrStr,\\n ' ' + CurrStr,\\n ' ' + CurrStr,\\n ' ' + CurrStr,\\n ' ' + CurrStr,\\n ' ' + CurrStr,\\n ' ' + CurrStr,\\n ' ' + CurrStr,\\n ' ' + CurrStr);\\nEND;\\n\\nFormatMoney(val1,'$'); // $100,000.00\\nFormatMoney(val2,'$'); // $100,000.50\\nFormatMoney(val3,'$'); // $1,000.00\\nFormatMoney(val4,'$'); // $100.25
\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-01-20 16:00:50\" },\n\t{ \"post_id\": 6804, \"topic_id\": 1567, \"forum_id\": 8, \"post_subject\": \"Number Format\", \"username\": \"NSD\", \"post_text\": \"Hi,\\n\\nI've successfully coded computed fields in payload keys (was necessary to do so, due to csv-files). In (Postgre-)SQL I can show numbers in different formats like below:\\n\\nTO_CHAR(ROUND(SUM(KD."GLK TN")), '999G999G990 €') "GLK"\\n--> 100,000.00 €
\\n\\nis that possible in HPCC too (1 line code) ? Ive already tried REALFORMAT and INTEGERFORMAT, but that wasn't what i wanted. \\n\\nthx!\", \"post_time\": \"2015-01-19 12:56:05\" },\n\t{ \"post_id\": 7008, \"topic_id\": 1568, \"forum_id\": 8, \"post_subject\": \"Re: Different Computed Fields in Payload Keys\", \"username\": \"NSD\", \"post_text\": \"thx, worked.\", \"post_time\": \"2015-02-22 15:03:15\" },\n\t{ \"post_id\": 6812, \"topic_id\": 1568, \"forum_id\": 8, \"post_subject\": \"Re: Different Computed Fields in Payload Keys\", \"username\": \"rtaylor\", \"post_text\": \"NSD,\\n\\nSince I don't know your data, there are some aspects of your code that I can only guess at. Given that, here are my suggestions (which may be incorrect, if I've assumed incorrectly):\\n\\n
\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-01-20 16:28:11\" },\n\t{ \"post_id\": 6805, \"topic_id\": 1568, \"forum_id\": 8, \"post_subject\": \"Different Computed Fields in Payload Keys\", \"username\": \"NSD\", \"post_text\": \"Hi,\\n\\nI have two Datasets and i need to Compute different Values from both Sets and merge them into one. I tried this with the Section "Computed Fields in Payload Keys" from the Programmers Guide, but I got stucked at the merging. Here's the code:\\n\\nr1 countem (t1 L, t2 R) := TRANSFORM\\n SELF.Programm_Kosten := L.Programm_Kosten;\\n SELF := R;\\n SELF := L;\\nEND;
which should eliminate your 0 problem in the Programm_Kosten fieldj := JOIN(t1,t2,LEFT.Prog_Quartal=RIGHT.Programm_Quartal,countem(LEFT,RIGHT));\\n
#OPTION('outputlimit',100);\\t\\t//Needed, due to DataSet too large\\n\\nIMPORT $;\\n\\nKostenDaten \\t\\t\\t:= $.KostenDaten.CSVDataset;\\nKostenDaten_IDX\\t\\t:= $.KostenDaten.IDX;\\n\\nProgrammPhase\\t\\t\\t:= $.ProgrammPhase.CSVDataset;\\nProgrammPhase_IDX\\t:= $.ProgrammPhase.IDX;\\n\\n\\n\\nr1 := RECORD\\n\\tProgrammPhase.Prog_Quartal;\\n\\tUNSIGNED2\\t\\tAnzahl_Teilnehmer \\t:= 0;\\n\\tUNSIGNED2\\t\\tJahr\\t\\t\\t\\t\\t\\t\\t\\t:= 0;\\n\\tUNSIGNED1\\t\\tQuartal\\t\\t\\t\\t\\t\\t\\t:= 0;\\n\\tREAL8\\t\\t\\t\\tArzt_Kosten\\t\\t\\t\\t\\t:= 0;\\n\\tREAL8\\t\\t\\t\\tApoth_Kosten\\t\\t\\t\\t:= 0;\\n\\tREAL8\\t\\t\\t\\tKH_Kosten\\t\\t\\t\\t\\t\\t:= 0;\\n\\tREAL8\\t\\t\\t\\tGLK\\t\\t\\t\\t\\t\\t\\t\\t\\t:= 0;\\n\\tREAL8\\t\\t\\t\\tProgramm_Kosten\\t\\t\\t:= ROUND(SUM(GROUP,ProgrammPhase.Prog_Kosten));\\n\\tREAL8\\t\\t\\t\\tZuweisung\\t\\t\\t\\t\\t\\t:= 0;\\n\\tREAL8\\t\\t\\t\\tGesamtkosten\\t\\t\\t\\t:= 0;\\nEND;\\n\\n\\nt1\\t:= TABLE(ProgrammPhase,r1, Prog_Quartal);\\nst1 := DISTRIBUTE(t1,HASH32(Prog_Quartal));\\n\\n\\n\\n\\nr2 := RECORD\\n\\tKostenDaten.Programm_Quartal;\\n\\tUNSIGNED2\\t\\tAnzahl_Teilnehmer \\t:= COUNT(GROUP);\\n\\tUNSIGNED2\\t\\tJahr\\t\\t\\t\\t\\t\\t\\t\\t:= KostenDaten.Jahr;\\n\\tUNSIGNED1\\t\\tQuartal\\t\\t\\t\\t\\t\\t\\t:= KostenDaten.Quartal;\\n\\tREAL8\\t\\t\\t\\tArzt_Kosten\\t\\t\\t\\t\\t:= ROUND(SUM(GROUP,KostenDaten.Arzt_Kosten_TN));\\n\\tREAL8\\t\\t\\t\\tApoth_Kosten\\t\\t\\t\\t:= ROUND(SUM(GROUP,KostenDaten.Apoth_Kosten_TN));\\n\\tREAL8\\t\\t\\t\\tKH_Kosten\\t\\t\\t\\t\\t\\t:= ROUND(SUM(GROUP,KostenDaten.KH_Kosten_TN));\\n\\tREAL8\\t\\t\\t\\tGLK\\t\\t\\t\\t\\t\\t\\t\\t\\t:= ROUND(SUM(GROUP,KostenDaten.GLK_TN));\\n\\tREAL8\\t\\t\\t\\tProgramm_Kosten\\t\\t\\t:= 0;\\n\\tREAL8\\t\\t\\t\\tZuweisung\\t\\t\\t\\t\\t\\t:= ROUND(SUM(GROUP,KostenDaten.Zuweisung_TN));\\n\\tREAL8\\t\\t\\t\\tGesamtkosten\\t\\t\\t\\t:= ROUND(SUM(GROUP,KostenDaten.Gesamt_TN));\\nEND;\\n\\nt2\\t:= TABLE(KostenDaten,r2, Jahr, Quartal, Programm_Quartal);\\nst2 := DISTRIBUTE(t2,HASH32(Jahr, Quartal, Programm_Quartal));\\n\\n\\n\\nr1 countem (t1 L, t2 R) := TRANSFORM\\n\\tSELF := R;\\n\\tSELF := L;\\nEND;\\n\\nj := JOIN(st1,st2,LEFT.Prog_Quartal=RIGHT.Programm_Quartal,countem(LEFT,RIGHT),LOCAL);\\n\\n\\n\\nout := SORT(j, {Jahr, Quartal});\\n\\n\\nOUTPUT(out,ALL);
\\n\\nSure, the field "Programm_Kosten" is always 0, but I don't know how to fix this. My next and final step will be, to restrict the SUM() of "Programm_Kosten" by defined search keys in SQL it would be\\nWHERE\\n\\tPP."Prog Phase" NOT IN ('Matching') OR\\n\\tPP."Prog Phase" IS NULL\\n
\\n\\nAny hints where I can get these Informations?\", \"post_time\": \"2015-01-19 17:54:07\" },\n\t{ \"post_id\": 6926, \"topic_id\": 1592, \"forum_id\": 8, \"post_subject\": \"Re: HTTPCALL\", \"username\": \"lpezet\", \"post_text\": \"Ok done: https://track.hpccsystems.com/browse/HPCC-12999\\n\\nThanks Richard!\\nLuc.\", \"post_time\": \"2015-02-09 18:25:08\" },\n\t{ \"post_id\": 6924, \"topic_id\": 1592, \"forum_id\": 8, \"post_subject\": \"Re: HTTPCALL\", \"username\": \"rtaylor\", \"post_text\": \"Luc,\\n\\nI just tried it and got the same error in 5.0.4-1 so it looks like a regression. Please report this in JIRA.\\n\\nRichard\", \"post_time\": \"2015-02-09 18:14:13\" },\n\t{ \"post_id\": 6921, \"topic_id\": 1592, \"forum_id\": 8, \"post_subject\": \"HTTPCALL\", \"username\": \"lpezet\", \"post_text\": \"Hello!\\n\\nI was just trying HTTPCALL using the code provided in the documentation:\\n\\nworldBankSource := RECORD\\n STRING name {XPATH('name')}\\nEND;\\n\\nOutRec1 := RECORD\\n DATASET(worldBankSource) Fred{XPATH('/source')};\\nEND;\\n\\nraw := HTTPCALL('http://api.worldbank.org/sources/', 'GET', 'application/xml', OutRec1);\\n\\nOUTPUT(raw);\\n
\\n\\nHere's what I got:\\n\\nError: System error: -1: <Error><text>HTTP error (404) in processQuery</text><soapresponse><?xml version='1.0' encoding='UTF-8'?><fault><faultstring>Not Found</faultstring><detail><errorcode>CLASSIFICATION_FAILURE</errorcode></detail></fault></soapresponse><url>http://api.worldbank.org:80/sources/</url></Error> (in HTTP dataset G1 E2) (0, 0), -1, \\n
\\n\\nThe url seems to return what's expected. I'm using 4.2.2-1.\\nWhat did I do wrong?\\n\\n\\nThanks!\\nLuc.\", \"post_time\": \"2015-02-09 15:57:34\" },\n\t{ \"post_id\": 6952, \"topic_id\": 1599, \"forum_id\": 8, \"post_subject\": \"Upload File Programatically\", \"username\": \"omnibuzz\", \"post_text\": \"Is there a way to upload a small file programatically to the landing zone. Either using a command line utility like DFUPlus or through a published webservice like DFUSpray that allows me to spray using the "HPCC" credentials. \\n\\nI don't want the ftp/scp option as I would need the login ID and password to the machine. \\n \\nCheers\\nSrini\", \"post_time\": \"2015-02-12 19:36:15\" },\n\t{ \"post_id\": 6967, \"topic_id\": 1601, \"forum_id\": 8, \"post_subject\": \"Re: Loading fixed-length records file\", \"username\": \"rtaylor\", \"post_text\": \"Luc,\\n\\nNo problem! \\n\\nRichard\", \"post_time\": \"2015-02-17 16:08:21\" },\n\t{ \"post_id\": 6966, \"topic_id\": 1601, \"forum_id\": 8, \"post_subject\": \"Re: Loading fixed-length records file\", \"username\": \"lpezet\", \"post_text\": \"Hi Richard!\\n\\nMost of the time it's just files I download (without any further processing).\\nThat test file was created using "vi".\\nI simply put one record in it, but there would be multiple (1 record = 1 line) within one file.\\n\\nEntering hex mode in "vi" I see the 0a character at the end of the line...which is the Line Feed. Makes sense now.\\n\\nMy bad, sorry for the inconvenience.\", \"post_time\": \"2015-02-17 15:37:54\" },\n\t{ \"post_id\": 6965, \"topic_id\": 1601, \"forum_id\": 8, \"post_subject\": \"Re: Loading fixed-length records file\", \"username\": \"rtaylor\", \"post_text\": \"Luc,
I do see the size of the file is 38 bytes...but then what can I do about it?\\nIf I just add a "garbage" STRING1 field at the end of my layout, it will work just fine.\\nAm I missing something or doing something wrong?
Given that the file size is 38 and your record size should be 37, I would open that file in a Hex editor and see exactly what's in that extra byte before doing anything else.\\n\\nSo my list of questions \\n
\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-02-17 15:26:27\" },\n\t{ \"post_id\": 6957, \"topic_id\": 1601, \"forum_id\": 8, \"post_subject\": \"Re: Loading fixed-length records file\", \"username\": \"lpezet\", \"post_text\": \"Looks like when it's a Slave doing it I get the following error instead:\\n\\nError: System error: -1: Graph[1], diskread[2]: SLAVE 172.31.38.72:20100: CFileSerialStream::get read past end of stream, CFileSerialStream::get read past end of stream - handling file: //172.31.38.70:7100/var/lib/HPCCSystems/mydropzone/weather/ghcnd-stations.txt (0, 0), -1, \\n
\\nDifferent file but same fixed-length format...adding an extra STRING1 field solves the problem as well.\\n\\nForgot to mention I'm using Community Edition 5.0.2-1.\", \"post_time\": \"2015-02-16 03:24:04\" },\n\t{ \"post_id\": 6956, \"topic_id\": 1601, \"forum_id\": 8, \"post_subject\": \"Loading fixed-length records file\", \"username\": \"lpezet\", \"post_text\": \"Hello!\\n\\nSomething really stupid I'm sure.\\nI'm trying to load a file with fixed-length records.\\nLike so:\\n\\nlayout := RECORD\\n\\tSTRING11 id;\\n\\t\\tSTRING4 year;\\n\\t\\tSTRING2 month;\\n\\t\\tSTRING4 element;\\n\\t\\tSTRING5 value1;\\n\\t\\tSTRING1 mflag1;\\n\\t\\tSTRING1 qflag1;\\n\\t\\tSTRING1 sflag1;\\n\\t\\tSTRING5 value2;\\n\\t\\tSTRING1 mflag2;\\n\\t\\tSTRING1 qflag2;\\n\\t\\tSTRING1 sflag2;\\nEND;\\n
\\nThe content of the file is as follow:\\nUS1WIPC0011201502PRCP 0T N 0 N
\\n(yes, it's weather data).\\n\\nMy ECL code to load it:\\nDATASET(std.File.ExternalLogicalFilename('172.xx.xx.xx', '/var/lib/HPCCSystems/mydropzone/weather/test.txt'), layout,FLAT,UNSORTED);
\\n\\nNow as is I'm getting:\\nError: System error: 1: File //172.31.38.70:7100/var/lib/HPCCSystems/mydropzone/weather/test.txt size is 38 which is not a multiple of 37 (0, 0), 1,
\\n\\nDouble checking the length of my awesome data file:\\n$ cat test.txt | awk '{print length}'\\n37
\\n\\nI do see the size of the file is 38 bytes...but then what can I do about it?\\nIf I just add a "garbage" STRING1 field at the end of my layout, it will work just fine.\\nAm I missing something or doing something wrong?\\n\\nThanks!\\nLuc.\", \"post_time\": \"2015-02-14 18:34:58\" },\n\t{ \"post_id\": 6989, \"topic_id\": 1606, \"forum_id\": 8, \"post_subject\": \"Execute publish query using Jquery or any other client side\", \"username\": \"Balachandar\", \"post_text\": \"We have executed published queries from Java end. Is there any way to call the publish query using client side scripting JQuery\\nPlease find the below sample code, i am getting bad request\\n\\n$.ajax({\\n type: "GET",\\n url: "http://10.144.240.9:8021/WsEcl/forms/default/query/hthor/test",\\n //data: markers,\\n contentType: "application/json; charset=utf-8",\\n dataType: "json",\\n beforeSend: function (xhr) {\\n xhr.setRequestHeader('Authorization', 'Basic c2l2YXByZ3g6UEBzc3dvcmQ=');\\n \\t}.\\n\\nThanks,\\nBala\", \"post_time\": \"2015-02-19 14:32:59\" },\n\t{ \"post_id\": 7040, \"topic_id\": 1609, \"forum_id\": 8, \"post_subject\": \"Re: "No access to Dali" when saving a CSV file\", \"username\": \"LY\", \"post_text\": \"Thank you, Bob.\", \"post_time\": \"2015-02-28 06:03:10\" },\n\t{ \"post_id\": 7018, \"topic_id\": 1609, \"forum_id\": 8, \"post_subject\": \"Re: "No access to Dali" when saving a CSV file\", \"username\": \"bforeman\", \"post_text\": \"Hello,\\n\\nTry wrapping the superfile function around NOTHOR, like this:\\n\\nDataset(Std.File.FsLogicalFileNameRecord) fileList := NOTHOR(STD.File.SuperFileContents('~test::wc::superfile'));\\n\\nOUTPUT(fileList,,'~test::wc::filelist',CSV(SEPARATOR(','), TERMINATOR('\\\\n')),OVERWRITE);
\\n\\nLooking at the output file, it looks like it is distributed properly across the cluster after execution.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-02-24 15:38:46\" },\n\t{ \"post_id\": 7006, \"topic_id\": 1609, \"forum_id\": 8, \"post_subject\": \""No access to Dali" when saving a CSV file\", \"username\": \"LY\", \"post_text\": \"Hello,\\n\\nI am a beginner to ECL. I am trying to read all subfile names of a super file.\\n\\nI found the below code worked fine on hthor, but failed on thor. The error message was "No access to Dali - this normally means a plugin call is being called from a thorslave".\\n\\nAnd I found the issue was on the second OUTPUT statement. If I change it to OUTPUT(filelist), there will be no error.\\n\\n\\nDataset(Std.File.FsLogicalFileNameRecord) fileList := STD.File.SuperFileContents('~test::wc::superfile');\\n\\nOUTPUT(fileList,,'~test::wc::filelist',CSV(SEPARATOR(','), TERMINATOR('\\\\n')),OVERWRITE);\\n
\\n\\nCould anyone help me to know what I should do to avoid the error?\\n\\nThanks in advance.\", \"post_time\": \"2015-02-21 00:23:27\" },\n\t{ \"post_id\": 7150, \"topic_id\": 1613, \"forum_id\": 8, \"post_subject\": \"Re: Regarding Parse Function\", \"username\": \"passban\", \"post_text\": \"Looking at the output file, it looks like it is distributed properly across the cluster after execution.\\n\\nRegards,\\n\\nBob\\n\\n________________________\\nSolitaireCardGame.org\", \"post_time\": \"2015-03-18 09:58:35\" },\n\t{ \"post_id\": 7149, \"topic_id\": 1613, \"forum_id\": 8, \"post_subject\": \"Re: Regarding Parse Function\", \"username\": \"passban\", \"post_text\": \"Looking at the output file, it looks like it is distributed properly across the cluster after execution.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-03-18 09:57:46\" },\n\t{ \"post_id\": 7065, \"topic_id\": 1613, \"forum_id\": 8, \"post_subject\": \"Re: Regarding Parse Function\", \"username\": \"Leofei\", \"post_text\": \"Thank you, Bob. Btw, I also tried 'FROMXML', but it seems it cannot recognize the 'Dataset[@name="Results"]/Row' piece.\\n\\nThanks,\\n-Fan\", \"post_time\": \"2015-03-03 16:38:24\" },\n\t{ \"post_id\": 7063, \"topic_id\": 1613, \"forum_id\": 8, \"post_subject\": \"Re: Regarding Parse Function\", \"username\": \"bforeman\", \"post_text\": \"See revised code below. After a lot of testing and reading the docs again, it looks like you need to define and expose the child fields that you need to extract, if you want to use XML parsing. Of course, if you treat the document as free-form text, you could write a pattern that looks for an explicit tag and then extract what you need, but you still need the tag name. No way around it I guess.\\n\\nRegards,\\n\\nBob\\n\\nxmlData := '<roottag>'+\\n '<Dataset name="RecordsAvailable">'+\\n ' <Row><RecordsAvailable>2</RecordsAvailable></Row>'+\\n '</Dataset>'+\\n '<Dataset name="Results">'+\\n ' <Row><id>001</id><lname>Smith</lname><fname>John</fname><addr>1000 Alderman Dr</addr><st>GA</st></Row>'+\\n ' <Row><id>002</id><lname>Smith</lname><fname>Jason</fname><addr>1000 Alderman Dr</addr><st>GA</st></Row>'+\\n '</Dataset>'+\\n '<Dataset name="BatchResults">03'+\\n '</Dataset>'+\\n '</roottag>';\\n\\t\\t\\n\\t\\tchildrecord := RECORD\\n\\t\\t STRING id;\\n\\t\\t STRING lname;\\n\\t\\t\\tSTRING fname;\\n\\t\\t\\tSTRING addr;\\n\\t\\t\\tSTRING st;\\n\\t\\tEND;\\t\\n\\n ds := dataset([{xmlData}], {string line});\\n\\n outrec := RECORD\\n STRING RecordsAvailable;\\n dataset(childrecord) Results;\\n STRING BatchResults;\\n END;\\n\\n outrec t({string line} L) := TRANSFORM\\n SELF.RecordsAvailable := XMLTEXT('Dataset[@name="RecordsAvailable"]/Row/RecordsAvailable');\\n SELF.Results := XMLPROJECT('Dataset[@name="Results"]/Row/',\\n transform(childrecord,\\n self.id := XMLTEXT('id'),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tself.lname := XMLTEXT('lname'),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tself.fname := XMLTEXT('fname'),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tself.addr := XMLTEXT('addr'),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tself.st := XMLTEXT('st'),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t ));\\n\\n SELF.BatchResults := XMLTEXT('Dataset[@name="BatchResults"]');\\n end;\\n\\n textout := PARSE(ds, line, t(LEFT), XML('/roottag'));\\n\\n output(textout);\\n
\", \"post_time\": \"2015-03-03 16:05:11\" },\n\t{ \"post_id\": 7051, \"topic_id\": 1613, \"forum_id\": 8, \"post_subject\": \"Re: Regarding Parse Function\", \"username\": \"Leofei\", \"post_text\": \"Bob, thanks for the response!! I didn't know we can put the wildcard into the XPATH.\\n\\nBut the results still looks not good. I highlighted the result xml tags, the original tags have been replaced with the new ones...\", \"post_time\": \"2015-03-02 19:25:57\" },\n\t{ \"post_id\": 7049, \"topic_id\": 1613, \"forum_id\": 8, \"post_subject\": \"Re: Regarding Parse Function\", \"username\": \"bforeman\", \"post_text\": \"Hello!\\n\\nThank you for the nice example! \\n\\nSimply add a wildcard to your XPATH of the XMLPROJECT, like this:\\n\\n
outrec t({string line} L) := TRANSFORM\\n SELF.RecordsAvailable := XMLTEXT('Dataset[@name="RecordsAvailable"]/Row/RecordsAvailable');\\n SELF.Results := XMLPROJECT('Dataset[@name="Results"]/Row/*',\\n transform({string line},\\n self.line := XMLTEXT('')\\n ));
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-03-02 16:45:55\" },\n\t{ \"post_id\": 7039, \"topic_id\": 1613, \"forum_id\": 8, \"post_subject\": \"Regarding Parse Function\", \"username\": \"Leofei\", \"post_text\": \"Hi, I'm trying to get this result from the following XML string. Is there any simple way to do this? I wrote some code, but I cannot get the result I expected. \\n\\nMeanwhile, I know I could use some functions like XMLPROJECT to populate all the fields of the child dataset one by one, but since the real child layout is much more complicated than the sample I provided here. I'm wondering if there is an easy way to just store all the info under the <Dataset name="Results"> tag into a field?\\n\\nxmlData := '<roottag>'+\\n'<Dataset name="RecordsAvailable">'+\\n' <Row><RecordsAvailable>2</RecordsAvailable></Row>'+\\n'</Dataset>'+\\n'<Dataset name="Results">'+\\n' <Row><id>001</id><lname>Smith</lname><fname>John</fname><addr>1000 Alderman Dr</addr><st>GA</st></Row>'+\\n' <Row><id>002</id><lname>Smith</lname><fname>Jason</fname><addr>1000 Alderman Dr</addr><st>GA</st></Row>'+\\n'</Dataset>'+\\n'<Dataset name="BatchResults">03'+\\n'</Dataset>'+\\n'</roottag>';\\n\\nds := dataset([{xmlData}], {string line});\\n\\noutrec := RECORD\\n STRING RecordsAvailable;\\n dataset({string line}) Results;\\n STRING BatchResults;\\nEND;\\n\\noutrec t({string line} L) := TRANSFORM\\n SELF.RecordsAvailable := XMLTEXT('Dataset[@name="RecordsAvailable"]/Row/RecordsAvailable');\\n SELF.Results := XMLPROJECT('Dataset[@name="Results"]/Row',\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\ttransform({string line},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tself.line := XMLTEXT('')\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t));\\n SELF.BatchResults := XMLTEXT('Dataset[@name="BatchResults"]/Row/BatchResults');\\nend;\\n\\ntextout := PARSE(ds, line, t(LEFT), XML('/roottag'));\\n\\noutput(textout);\\n
\", \"post_time\": \"2015-02-27 22:41:04\" },\n\t{ \"post_id\": 7089, \"topic_id\": 1618, \"forum_id\": 8, \"post_subject\": \"Re: Creating PATTERN on the fly\", \"username\": \"rtaylor\", \"post_text\": \"Mragesh,BTW, is there some way in which I can store an action in a string and then execute it,
Not that I know of.\\n\\nRichard\", \"post_time\": \"2015-03-05 16:32:26\" },\n\t{ \"post_id\": 7088, \"topic_id\": 1618, \"forum_id\": 8, \"post_subject\": \"Re: Creating PATTERN on the fly\", \"username\": \"Mragesh\", \"post_text\": \"Thanks Richard,\\n\\nBTW, is there some way in which I can store an action in a string and then execute it,\\n\\nE.g. \\n\\nSTRING str := 'PATTERN pat1 := [\\\\''+dsWords[1].word+'\\\\'];';\\n//Execute str\\n
\\n\\nSorry for an incomplete example, its just a curious questions. \\nI thought of multiple approaches and this was just one of them.\", \"post_time\": \"2015-03-05 16:19:27\" },\n\t{ \"post_id\": 7087, \"topic_id\": 1618, \"forum_id\": 8, \"post_subject\": \"Re: Creating PATTERN on the fly\", \"username\": \"rtaylor\", \"post_text\": \"Mragesh,\\n\\nUnfortunately, PARSE expects static patterns. My attempts to make it take dynamic data for your PATTERN by encapsulating the code within a FUNCTIONMACRO were unsuccessful due to constraints of the ECL Template Language.\\n\\nHowever, I think you'll find this code accomplishes the same purpose:IMPORT STD;\\n\\nMatchWords(DATASET({STRING sentence}) SearchDS,DATASET({STRING word}) WordDS) := FUNCTION\\n\\tResRec := RECORD\\n\\t\\tSTRING sentence;\\n\\t\\tSTRING word;\\n\\tEND;\\n\\tResRec ParseWords(SearchDS Src) := TRANSFORM\\n\\t\\n\\t\\t{STRING word} FindMatches(WordDS L) := TRANSFORM\\n\\t\\t\\tSELF.word := IF(STD.Str.Find(Src.sentence,L.word,1)>0,L.Word,''); \\n\\t\\tEND;\\n\\t\\tFindMatch := PROJECT(WordDS,FindMatches(LEFT))(word<>'');\\n\\t\\t\\n\\t\\tSELF.word := IF(EXISTS(FindMatch),FindMatch[1].word,SKIP);\\n\\t\\tSELF := Src;\\n\\tEND;\\n\\tparseText := PROJECT(SearchDS,ParseWords(LEFT));\\n RETURN parseText;\\nEND;\\n\\nds1 := DATASET([{'Hi'},{'HELLO'},{'THIS IS TEST'}],{STRING word});\\nds3 := DATASET([{'Hello'},{'another'},{'more'}],{STRING word});\\n\\nds2 := DATASET([{'Hi This is a test'},\\n {'HELLO This is another test'},\\n {'Hello THIS IS TEST'},\\n {'Yet one more test'}],{STRING sentence});\\n\\n\\nOUTPUT(MatchWords(ds2,ds1));\\nOUTPUT(MatchWords(ds2,ds3));
The only real "trick" here is the nesting of one PROJECT within the TRANSFORM of another PROJECT. I also encaopsulated it in a FUNCTION so it will work with any DATASETs with the same structures.\\n \\nHTH,\\n\\nRichard\", \"post_time\": \"2015-03-05 15:59:16\" },\n\t{ \"post_id\": 7085, \"topic_id\": 1618, \"forum_id\": 8, \"post_subject\": \"Re: Creating PATTERN on the fly\", \"username\": \"Mragesh\", \"post_text\": \"\\nds := DATASET([{'Hi'},{'HELLO'},{'THIS IS TEST'}],{STRING word});\\n\\nSET OF STRING words := (SET OF STRING) SET(ds,word);\\n\\n//PATTERN pat := words; //INVALID\\n\\nPATTERN pat2 := ['Hi', 'HELLO', 'THIS IS TEST']; //VALID\\n\\nds2 := DATASET([{'Hi This is a test'}],{STRING sentence});\\n\\nparseText := PARSE(ds2,sentence,pat2,{ds2.sentence,STRING match := MATCHTEXT(pat2)},FIRST);\\n\\nOUTPUT(parseText);\\n
\\n\\nNow the problem I am facing is that the dataset which contains list of words is generated dynamically and might change everytime, so I need a way to create a pattern definition for all the words in the dataset dynamically.\", \"post_time\": \"2015-03-04 20:57:25\" },\n\t{ \"post_id\": 7084, \"topic_id\": 1618, \"forum_id\": 8, \"post_subject\": \"Re: Creating PATTERN on the fly\", \"username\": \"rtaylor\", \"post_text\": \"Mragesh,\\n\\nThe syntax error I get from your code is, "This expression cannot be included in a pattern." That's a clear statement.\\n\\nSo, you still have not shown your code past the pattern definition. I need to see what you're trying to DO with it, please.\\n\\nRichard\", \"post_time\": \"2015-03-04 20:45:56\" },\n\t{ \"post_id\": 7083, \"topic_id\": 1618, \"forum_id\": 8, \"post_subject\": \"Re: Creating PATTERN on the fly\", \"username\": \"rtaylor\", \"post_text\": \"Mragesh.\\n\\nI see that you posted the example code while I was forming my last reply.\\n\\nThank you,\\n\\nRichard\", \"post_time\": \"2015-03-04 20:41:40\" },\n\t{ \"post_id\": 7082, \"topic_id\": 1618, \"forum_id\": 8, \"post_subject\": \"Re: Creating PATTERN on the fly\", \"username\": \"rtaylor\", \"post_text\": \"Mragesh,\\n\\nYes, I understood all that from your first post.\\n\\nWhat you haven't shown is how you intend to use that pattern in your parse. IOW, you said that what you want to do works when you have an explicitly defined set of words as your pattern. Can you please show me the code that works?\\n\\nRichard\", \"post_time\": \"2015-03-04 20:40:17\" },\n\t{ \"post_id\": 7081, \"topic_id\": 1618, \"forum_id\": 8, \"post_subject\": \"Re: Creating PATTERN on the fly\", \"username\": \"Mragesh\", \"post_text\": \"[quote="rtaylor":1q5o1vtq]Mragesh,\\n\\nWithout more context, it's very difficult to say where you're going wrong. \\n\\nIf the setOFWords is only used as part of a filter expression, then it may not need to be in a PATTERN definition at all. \\n\\nWhat exactly are you trying to do?\\n\\nHTH,\\n\\nRichard\\n\\nHope this gives you a clear picture, created a small example:\\n\\nds := DATASET([{'HI'},{'HELLO'},{'THIS IS TEST'}],{STRING word});\\n\\nSET OF STRING words := (SET OF STRING) SET(ds,word);\\n\\nPATTERN pat := words; //INVALID\\n\\nPATTERN pat2 := ['HI', 'HELLO', 'THIS IS TEST']; //VALID\\n
\", \"post_time\": \"2015-03-04 20:34:49\" },\n\t{ \"post_id\": 7080, \"topic_id\": 1618, \"forum_id\": 8, \"post_subject\": \"Re: Creating PATTERN on the fly\", \"username\": \"Mragesh\", \"post_text\": \"\\nSET OF STRING setOFWords := (SET OF STRING)SET(topWords,word);\\nPattern patOfWords := setOFWords;\\n
\\n\\nI hope it makes more sense now.\", \"post_time\": \"2015-03-04 20:25:54\" },\n\t{ \"post_id\": 7079, \"topic_id\": 1618, \"forum_id\": 8, \"post_subject\": \"Re: Creating PATTERN on the fly\", \"username\": \"Mragesh\", \"post_text\": \"I have a dataset with the following layout:\\n\\ntopWordsLayout := RECORD\\n\\tSTRING word := filteredOut.word;\\nEND;\\n
\\n\\nNow each row in this dataset is a word.\\n\\nI want to dynamically create a PATTERN definition from all the words in the dataset and then use it to PARSE some text and see words that match.\\n\\nI know for a fact that \\n\\n[list-of-patterns]: A comma-delimited list of alternative patterns, useful for string sets. This is the same as OR.\\n
\\n\\nis a valid parsepattern definition.\", \"post_time\": \"2015-03-04 20:24:17\" },\n\t{ \"post_id\": 7078, \"topic_id\": 1618, \"forum_id\": 8, \"post_subject\": \"Re: Creating PATTERN on the fly\", \"username\": \"rtaylor\", \"post_text\": \"Mragesh,\\n\\nWithout more context, it's very difficult to say where you're going wrong. \\n\\nIf the setOFWords is only used as part of a filter expression, then it may not need to be in a PATTERN definition at all. \\n\\nWhat exactly are you trying to do?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-03-04 20:18:29\" },\n\t{ \"post_id\": 7076, \"topic_id\": 1618, \"forum_id\": 8, \"post_subject\": \"Creating PATTERN on the fly\", \"username\": \"Mragesh\", \"post_text\": \"Hi,\\n\\nI have been trying to create a PATTERN on the fly, but most of the approaches didn't work out so far.\\n\\nThe one which I feel should be working is:\\n\\n\\ntopWordsLayout := RECORD\\n\\tSTRING word := filteredOut.word;\\nEND;\\n\\ntopWords := TABLE(filteredOut[..10],topWordsLayout);\\n\\nsetOFWords := SET(topWords,word);\\n\\nPattern patOfWords := setOFWords ;\\n
\\n\\nNow, The following PATTERN creation code works fine:\\n\\nPattern patOfWords := ['Hi','Hello'] ;\\n
\\n\\nCan you please suggest, what exactly am I missing?\", \"post_time\": \"2015-03-04 17:55:49\" },\n\t{ \"post_id\": 18883, \"topic_id\": 1626, \"forum_id\": 8, \"post_subject\": \"Re: Why must I fully qualify these?\", \"username\": \"RoqScheer\", \"post_text\": \"The compiler supports a "-legacy" parameter on the command line. When using the ECL IDE, this parameter can be added in the "Arguments" field under the "Compiler" tab of the "Preferences" dialog box.\\n\\nThis parameter forces the compiler to look for definitions in the same folder without the need to explicitly IMPORT and fully qualify them. This option in mentioned in the SALT Users Guide.\", \"post_time\": \"2017-09-21 14:29:52\" },\n\t{ \"post_id\": 7112, \"topic_id\": 1626, \"forum_id\": 8, \"post_subject\": \"Re: Why must I fully qualify these?\", \"username\": \"rtaylor\", \"post_text\": \"Tim,\\n\\nSALT does support Open Source.\\n\\nRichard\", \"post_time\": \"2015-03-09 19:46:16\" },\n\t{ \"post_id\": 7111, \"topic_id\": 1626, \"forum_id\": 8, \"post_subject\": \"Re: Why must I fully qualify these?\", \"username\": \"tlhumphrey2\", \"post_text\": \"Proc_Iterate is in APSALTVER21A.\\n\\nAnother question. Is there a version of SALT that can be used with the open source platform? I'm using SALT 3.0 and when I do salt APSALTVER21A.spc > APSALTVER21A.mod
\\nThe ECL code that is generated by SALT doesn't have the called attributes fully qualified and doesn't use '$'.\", \"post_time\": \"2015-03-09 19:43:02\" },\n\t{ \"post_id\": 7110, \"topic_id\": 1626, \"forum_id\": 8, \"post_subject\": \"Re: Why must I fully qualify these?\", \"username\": \"rtaylor\", \"post_text\": \"Tim,\\n\\nWhat directory is Proc_Iterate in?\\n\\nRichard\", \"post_time\": \"2015-03-09 19:36:43\" },\n\t{ \"post_id\": 7109, \"topic_id\": 1626, \"forum_id\": 8, \"post_subject\": \"Re: Why must I fully qualify these?\", \"username\": \"tlhumphrey2\", \"post_text\": \"I changed my query so it matches your suggested query. Now I get the following error:\\nError: Object '_local_directory_' does not have a member named 'Proc_Iterate' (9, 8)
\", \"post_time\": \"2015-03-09 19:03:41\" },\n\t{ \"post_id\": 7108, \"topic_id\": 1626, \"forum_id\": 8, \"post_subject\": \"Re: Why must I fully qualify these?\", \"username\": \"rtaylor\", \"post_text\": \"Tim,I'm having to add an IMPORT APSALTVER21A, to everyone of these attributes and fully qualify any attributes called that are in APSALTVER21A.
If I understand correctly what the problem is, it looks like your Proc_Iterate is in the APSALTVER21A directory, and your question is why you have to add IMPORT APSALTVER21A to every .ecl file in that directory and fully qualify every reference to any definition from the same directory.\\n\\nThe answer is that in the change from legacy pre-OSS ECL to the current Open Source ECL syntax, the most significant change was to definition qualification. In the current syntax, every definition referenced must be fully qualified, therefore an IMPORT is also required.\\n\\nYour solution of doing an explicit IMPORT and qualification will work, but the shorthand (IMPORT $) syntax is the better solution. The dollar sign ($) indicates "the current directory" so you only need the $ to qualify. That would change your code to:#workunit('name','APSALTVER21A.BWR_Iterate(it34) - SALT30a11');\\n#option('multiplePersistInstances',false);\\n\\n//Brian Input file name below \\n#CONSTANT('LAST_ITERATION_ID','33}');\\n\\nIMPORT $,SALT30;\\nIMPORT SALTTOOLS30;\\n\\nP := $.Proc_Iterate('34');\\nP.DoAll;\\n// P.DoAllAgain;
\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-03-09 18:27:21\" },\n\t{ \"post_id\": 7107, \"topic_id\": 1626, \"forum_id\": 8, \"post_subject\": \"Re: Why must I fully qualify these?\", \"username\": \"tlhumphrey2\", \"post_text\": \"Let me see if I can insert a image of my repository, here.\\n[attachment=0:2idx45a4]THMyRepository.jpg\", \"post_time\": \"2015-03-09 17:22:54\" },\n\t{ \"post_id\": 7106, \"topic_id\": 1626, \"forum_id\": 8, \"post_subject\": \"Re: Why must I fully qualify these?\", \"username\": \"tlhumphrey2\", \"post_text\": \"It might help if you saw my repository. So, I've attached a screenshot of it.\", \"post_time\": \"2015-03-09 17:16:58\" },\n\t{ \"post_id\": 7102, \"topic_id\": 1626, \"forum_id\": 8, \"post_subject\": \"Why must I fully qualify these?\", \"username\": \"tlhumphrey2\", \"post_text\": \"I have the following query which I got from the module APSALTVER21A:\\n#workunit('name','APSALTVER21A.BWR_Iterate(it34) - SALT30a11');\\n#option('multiplePersistInstances',false);\\n\\n//Brian Input file name below \\n#CONSTANT('LAST_ITERATION_ID','33}');\\n\\nIMPORT APSALTVER21A,SALT30;\\nIMPORT SALTTOOLS30;\\n\\nP := APSALTVER21A.Proc_Iterate('34');\\nP.DoAll;\\n// P.DoAllAgain;\\n
\\n\\nAnd, as you can see I have IMPORTed APSALTVER21A. But, I'm getting the syntax error, "unknown identifier ...", on attributes and modules in APSALTVER21A, that are referenced in APSALTVER21A.Proc_Iterate and other attributes called by Proc_Iterate.\\n\\nI'm having to add an IMPORT APSALTVER21A, to everyone of these attributes and fully qualify any attributes called that are in APSALTVER21A.\\n\\nWhy?\", \"post_time\": \"2015-03-09 15:16:18\" },\n\t{ \"post_id\": 7117, \"topic_id\": 1627, \"forum_id\": 8, \"post_subject\": \"Re: file path changes with the clusters I submitted the job \", \"username\": \"bforeman\", \"post_text\": \"What is the value of currentcarrier.files.ds_base_ambest?\\n\\nTry adding a tilde (~) in front of the filename string, like this:\\n\\nds_base_ambest := DATASET('~THOR::BASE::CurrentCarrier::SUPPRESSED::QA::AMBEST, yourrecorddefinition,THOR);
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-03-10 12:09:43\" },\n\t{ \"post_id\": 7114, \"topic_id\": 1627, \"forum_id\": 8, \"post_subject\": \"file path changes with the clusters I submitted the job with\", \"username\": \"fanglimian\", \"post_text\": \"I have been having really weird errors. \\n\\nI have this really simple code that I used all the time to access the CC ambest table, but error started to appear since last Thursday:\\n\\nimport CurrentCarrier;\\n#CONSTANT ('CCFCEnv', 'Y');\\n#CONSTANT ('CurrentCarrierEnv', 'N');\\n#CONSTANT ('CCFCCustomerTestEnv', 'N');\\n#CONSTANT ('CurrentCarrierCustomerTestEnv', 'N');\\n\\nambest := currentcarrier.files.ds_base_ambest;\\nOUTPUT(ambest, NAMED('ambest'));
\\n\\n\\nIf I submit the job on Thor50_42, I got the error below:\\nError: System error: 0: Read: Logical file name 'thor50_42::THOR::BASE::CurrentCarrier::SUPPRESSED::QA::AMBEST' could not be resolved (0, 0), 0,\\n\\nIf I submit the job on thor21, I got the error below: \\nError: System error: 0: Read: Logical file name 'thor21::THOR::BASE::CurrentCarrier::SUPPRESSED::QA::AMBEST' could not be resolved (0, 0), 0,\\n\\n\\nThis file THOR::BASE::CurrentCarrier::SUPPRESSED::QA::AMBEST clearly exist, but ECL somehow changed the path when I submit the job.\", \"post_time\": \"2015-03-09 20:02:00\" },\n\t{ \"post_id\": 7127, \"topic_id\": 1634, \"forum_id\": 8, \"post_subject\": \"Re: SOAPCALL input layout defaults overriding dataset values\", \"username\": \"bforeman\", \"post_text\": \"Hi Drea,\\n\\nYour SOAPCALL was missing a TRANSFORM...see your private email.\\n\\nInputLayout_ScoredSearch DefParams(InputLayout_ScoredSearch Le) := TRANSFORM\\n SELF.St := '= CA';\\n SELF.RecordsToReturn:=3;\\n SELF := Le;\\nEND;\\t\\t\\t\\t\\t\\t\\t\\t\\n\\t\\t\\t\\t\\t\\t\\t\\n\\nresult:= SOAPCALL(InputDataset,\\n'http://10.xxx.xxx.x:8002/WsEcl/submit/query/roxie',\\n'leeddx_issue_637_visualizescoredsearch.Ins002_salt_scoredsearchservice', \\nInputLayout_ScoredSearch,DefParams(LEFT),\\nDATASET(OutputLayout_ScoredSearch), XPATH('leeddx_issue_637_visualizescoredsearch.Ins002_salt_scoredsearchserviceResponse'));
\\n\\nIn the SOAPCALL docs:\\nhttp://hpccsystems.com/download/docs/ecl-language-reference/html/SOAPCALL.html\\n\\nWe read:\\n\\ninstructure\\nA RECORD structure containing the input field definitions from which the XML input to the SOAP service is constructed. The name of the tags in the XML are derived from the names of the fields in the input record; this can be overridden by placing an xpath on the field ( {xpath('tagname')} — see the XPATH Support section of the RECORD Structure discussion). If the recset parameter is not present, each field definition must contain a default value that will constitute the single input record. If the recset parameter is present, each field definition must contain a default value unless a transform is also specified to supply that data values.
\\n\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-03-11 13:26:26\" },\n\t{ \"post_id\": 7126, \"topic_id\": 1634, \"forum_id\": 8, \"post_subject\": \"SOAPCALL input layout defaults overriding dataset values\", \"username\": \"drealeed\", \"post_text\": \"I have a SOAPCALL command that’s working. It takes in an input dataset and outputs a dataset.\\n\\nHowever, the default values for the input layout in the soapcall are overriding the values set in the incoming dataset. The “St” field is set to “= CA” in the input dataset, and has the value “= MA” in the defined input layout. “= MA” is what’s getting used in the query.\\n\\nThe inputlayout requires default values; otherwise it throws a compile error. Below is the ecl in question.Any ideas?\\n\\n\\n\\nEXPORT OutputLayout_Result1 := RECORD\\n INTEGER Field1;\\n STRING MyName;\\n STRING Addr1;\\n STRING Addr2;\\n STRING City;\\n STRING St;\\n STRING Zip;\\n REAL score;\\n END;\\nEXPORT OutputLayout_Inputs := RECORD\\n STRING state{XPATH('Result_4')}; \\n END;\\nEXPORT OutputLayout_ScoredSearch:=RECORD\\n\\tDATASET(OutputLayout_Result1) Result1{xpath('Results/Result/Dataset[@name="Result 1"]/Row')};\\n\\tDATASET(OutputLayout_Inputs) Result4{xpath('Results/Result/Dataset[@name="Result 4"]/Row')};\\nEND;\\n\\nEXPORT InputLayout_ScoredSearch := RECORD\\n STRING Field1:='';\\n STRING MyName:='';\\n STRING Addr1:='';\\n STRING Addr2:='';\\n STRING City:='';\\n STRING St:='= MA';\\n STRING Zip:='';\\n STRING1 ScoreCombine:='';\\n UNSIGNED RecordsToReturn:=10;\\n UNSIGNED Threshold:=0;\\n END;\\n\\nEXPORT InputDataset:= PROJECT(DATASET([{''}],{STRING temp}),\\n\\tTRANSFORM(InputLayout_ScoredSearch,\\n\\tSELF.St := '= CA';\\n\\tSELF.RecordsToReturn:=3;\\n\\tSELF:=LEFT;));\\n\\nresult:= SOAPCALL(InputDataset,\\n\\t\\t'http://10.173.147.1:8002/WsEcl/submit/query/roxie/',\\n\\t\\t'leeddx_issue_637_visualizescoredsearch.Ins002_salt_scoredsearchservice', \\n\\t\\tInputLayout_ScoredSearch,\\n\\t\\tDATASET(OutputLayout_ScoredSearch),\\n\\t\\tXPATH('leeddx_issue_637_visualizescoredsearch.Ins002_salt_scoredsearchserviceResponse')\\n\\t);\\n\\t\\t\\noutput(inputdataset[1].St,named('userstate'));\\noutput(result[1].result1,named('results'));\\noutput(result[1].result4,named('stateusedbyservice'));\\n
\", \"post_time\": \"2015-03-10 22:10:12\" },\n\t{ \"post_id\": 7790, \"topic_id\": 1636, \"forum_id\": 8, \"post_subject\": \"Re: thor queue workunit timeout\", \"username\": \"bforeman\", \"post_text\": \"No problem Srini,\\n\\nThere is also a setting in eclserver:\\n\\neclserver thorConnectTimeoutSeconds\\n\\nBob\", \"post_time\": \"2015-06-17 18:31:47\" },\n\t{ \"post_id\": 7789, \"topic_id\": 1636, \"forum_id\": 8, \"post_subject\": \"Re: thor queue workunit timeout\", \"username\": \"omnibuzz\", \"post_text\": \"Brilliant. Thanks, Bob. Will check and let you know. \\nCheers\\nSrini\", \"post_time\": \"2015-06-17 18:28:30\" },\n\t{ \"post_id\": 7788, \"topic_id\": 1636, \"forum_id\": 8, \"post_subject\": \"Re: thor queue workunit timeout\", \"username\": \"bforeman\", \"post_text\": \"In the Configuration Manager for the target cluster, there is a setting in the ECL Agent section:\\n\\nthorConnectTimeout - Default connection timeout when sending query to Thor - 600 \\n\\nThat looks like the attribute that controls this. You would have to contact the system administrator to adjust that setting.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-06-17 18:06:43\" },\n\t{ \"post_id\": 7786, \"topic_id\": 1636, \"forum_id\": 8, \"post_subject\": \"Re: thor queue workunit timeout\", \"username\": \"micevepay\", \"post_text\": \"How exactly does maxRunTime relate to waiting? Reference states that maxRunTime "Sets the maximum number of seconds a job runs before it times out" which seems to refer to that job itself running. I, like the original poster, want jobs in the queue not to time out will waiting to be executed.\", \"post_time\": \"2015-06-17 17:26:10\" },\n\t{ \"post_id\": 7144, \"topic_id\": 1636, \"forum_id\": 8, \"post_subject\": \"Re: thor queue workunit timeout\", \"username\": \"bforeman\", \"post_text\": \"Srini, two things.\\n\\n1. Is the #OPTION the very first line in your code?\\n\\n2. Can you try an incremental test, like \\n\\n#OPTION('maxRunTime',200);\\n\\nJust to make sure that it's working. Still waiting to hear from development.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-03-16 12:34:10\" },\n\t{ \"post_id\": 7142, \"topic_id\": 1636, \"forum_id\": 8, \"post_subject\": \"Re: thor queue workunit timeout\", \"username\": \"omnibuzz\", \"post_text\": \"Bob - I tried that. \\nI gave this:\\n#OPTION('maxRunTime',20000);\\n\\nIt still fails after 10 minutes with the error as:\\n System error: 0: Query W20150315-005748 failed to start within specified timelimit (600) seconds\\n\\nRegards\\nSrini\", \"post_time\": \"2015-03-15 01:24:08\" },\n\t{ \"post_id\": 7134, \"topic_id\": 1636, \"forum_id\": 8, \"post_subject\": \"Re: thor queue workunit timeout\", \"username\": \"bforeman\", \"post_text\": \"Hi Srini,\\n\\nYou can control it from the workunit interface by increasing the Max Runtime parameter. You can also control it using the #OPTION statement:\\n\\nmaxRunTime Default: none Sets the maximum number of seconds a job runs before it times out
\\n\\nNot sure if there is a "wait forever" setting. Checking with development now.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-03-13 12:37:55\" },\n\t{ \"post_id\": 7132, \"topic_id\": 1636, \"forum_id\": 8, \"post_subject\": \"thor queue workunit timeout\", \"username\": \"omnibuzz\", \"post_text\": \"How do I make sure that the ECL job I submitted doesn't time out when it's WAITING in queue for its turn. The default timeout looks like 10 minutes. Is there a way to make it wait indefinitely.\\n\\nThanks\\nSrini\", \"post_time\": \"2015-03-13 03:30:19\" },\n\t{ \"post_id\": 7135, \"topic_id\": 1637, \"forum_id\": 8, \"post_subject\": \"Re: Logical File Name Error while building INDEX\", \"username\": \"bforeman\", \"post_text\": \"Hi Subhu,\\n\\nLooks like you are missing the BUILD statement. The INDEX statement simply declares the INDEX, the BUILD statement actually creates it on the cluster:\\n\\nBUILD(IDX_EmployeeByID);
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-03-13 13:00:09\" },\n\t{ \"post_id\": 7133, \"topic_id\": 1637, \"forum_id\": 8, \"post_subject\": \"Logical File Name Error while building INDEX\", \"username\": \"kps_mani\", \"post_text\": \"Hi,\\nI am trying to build index for Roxie query and received the error of Logical File Name could not be resolved. \\n\\nPlease help in resolving this issue. Here is the code snippet.\\n\\nEXPORT EmployeeDetails := MODULE\\n\\nEXPORT Layout := RECORD\\n\\tSTRING6 EmployeeID;\\n\\tSTRING Field2;\\n\\tSTRING Field3;\\n\\tSTRING Field4;\\n\\tSTRING Field5;\\n\\tSTRING Field6;\\n\\tSTRING Field7;\\n\\tSTRING Field8;\\n\\tSTRING Field9;\\n\\tSTRING Field10;\\n\\tSTRING Field11;\\n\\tSTRING Field12;\\n\\tSTRING Field13;\\n\\tSTRING Field14;\\n\\tSTRING Field15;\\n\\tSTRING Field16;\\n\\tSTRING Field17;\\n\\tSTRING Field18;\\n\\tSTRING Field19;\\n\\tSTRING Field20;\\nEND;\\n\\nEXPORT File := DATASET('~subbu::training::employeedetailscsv',Layout,CSV(HEADING(1)));\\n\\nEND;\\n\\nOUTPUT($.EmployeeDetails.File,,'~subbu::training::roxie::EmployeeDetails', OVERWRITE);\\n\\nEXPORT QueryEmployeeDetails := DATASET('~subbu::training::roxie::EmployeeDetails',{$.EmployeeDetails.Layout, UNSIGNED8 fpos {virtual(fileposition)}},THOR);\\n\\nEXPORT IDX_EmployeeByID := INDEX($.QueryEmployeeDetails, {employeeid, fpos}, '~subbu::training::roxie::EmployeeByID_INDEX');\\n\\nError: System error: 0: IndexRead: Logical file name 'subbu::training::roxie::EmployeeByID_INDEX' could not be resolved (0, 0), 0, \\n\\nRegards,\\nSubbu\", \"post_time\": \"2015-03-13 03:59:11\" },\n\t{ \"post_id\": 7154, \"topic_id\": 1642, \"forum_id\": 8, \"post_subject\": \"Re: WebService SOAPCALL\", \"username\": \"ravishankar\", \"post_text\": \"Thanks a lot for your answer. \\n\\nIt make lot of sense for me to comprehend things better. \\n\\nThanks again.\", \"post_time\": \"2015-03-19 04:55:09\" },\n\t{ \"post_id\": 7152, \"topic_id\": 1642, \"forum_id\": 8, \"post_subject\": \"Re: WebService SOAPCALL\", \"username\": \"anthony.fishbeck\", \"post_text\": \"Beyond the error caused by outputting a record layout , there are several problems here.\\n\\n\\n
\\n\\nPutting it all together:\\n\\n STRING Esp := 'http://www.webservicex.net/geoipservice.asmx';\\n \\n ReqLayout := RECORD\\n String statenme1 {XPATH('IPAddress')} := '74.125.236.161';\\n END;\\n \\n ResLayout := RECORD\\n integer statenme1 {XPATH('ReturnCode')};\\n String state_val1 {XPATH('IP')};\\n String statenme2 {XPATH('ReturnCodeDetails')};\\n String state_val2 {XPATH('CountryName')};\\n String statenme3 {XPATH('CountryCode')};\\n END;\\n \\n GetVerseSoap:=SOAPCALL(Esp,'GetGeoIP', ReqLayout, ResLayout, LITERAL, NAMESPACE('http://www.webservicex.net/'), XPATH('GetGeoIPResponse/GetGeoIPResult'));\\n \\n GetVerseSoap;
\", \"post_time\": \"2015-03-18 15:24:35\" },\n\t{ \"post_id\": 7151, \"topic_id\": 1642, \"forum_id\": 8, \"post_subject\": \"Re: WebService SOAPCALL\", \"username\": \"rtaylor\", \"post_text\": \"ravishankar,Cannot return a result of this type from a workunit
I see that the second action in your ECL code is "reslayout" -- the name of a RECORD structure definition (which is not a valid action).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-03-18 13:23:16\" },\n\t{ \"post_id\": 7148, \"topic_id\": 1642, \"forum_id\": 8, \"post_subject\": \"WebService SOAPCALL\", \"username\": \"ravishankar\", \"post_text\": \"I am trying to access the public webservice - http://www.webservicex.net/geoipservice.asmx by the following method.\\n\\n \\t\\t\\t STRING Esp := 'http://www.webservicex.net/geoipservice.asmx';\\n\\t\\t\\t \\n\\t\\t\\t ReqLayout := RECORD\\n\\t\\t\\t\\t\\t\\t\\t\\tString statenme1 {XPATH('GetGeoIP/IPAddress')} := '74.125.236.161';\\n\\t\\t\\t END;\\n \\t\\t \\n\\t\\t\\tResLayout := RECORD\\n\\t\\t\\t\\t\\t\\t\\t\\tinteger statenme1 {XPATH('GetGeoIPResponse/GetGeoIPResult/ReturnCode')};\\n\\t\\t\\t\\t\\t\\t\\t\\tString state_val1 {XPATH('GetGeoIPResponse/GetGeoIPResult/IP')};\\n \\t\\t\\t\\t\\tString statenme2 {XPATH('GetGeoIPResponse/GetGeoIPResult/ReturnCodeDetails')};\\n \\t\\t\\t\\t\\tString state_val2 {XPATH('GetGeoIPResponse/GetGeoIPResult/CountryName')};\\n \\t\\t\\t\\t\\tString statenme3 {XPATH('GetGeoIPResponse/GetGeoIPResult/CountryCode')};\\n\\t\\t\\t END;\\n\\t\\t\\t\\t\\t\\n\\t\\t\\t GetVerseSoap:=SOAPCALL(Esp,'GeoIPService',ReqLayout,ResLayout);\\n\\t\\t\\t\\t\\n\\t\\t\\t GetVerseSoap;\\n\\t\\t\\t ResLayout;\\t
\\n\\nI also tried removing the GetGeoIP/ from request record structure and GetGeoIPResponse/GetGeoIPResult/ from response record structure.\\n\\nI am getting Error like \\nWarning: (0,0): error C4157: Cannot return a result of this type from a workunit (0, 0), 0, \\n\\nThe request and Response SOAP XML,End Point URL and WSDL can be find in http://www.webservicex.net/ws/WSDetails ... 12&WSID=64\\n\\nCould you please let us know if I am missing anything here.\", \"post_time\": \"2015-03-18 05:34:44\" },\n\t{ \"post_id\": 7161, \"topic_id\": 1644, \"forum_id\": 8, \"post_subject\": \"Re: Access HPCC data out of Cluster\", \"username\": \"ravishankar\", \"post_text\": \"Thanks Richard for highlighting the options.\", \"post_time\": \"2015-03-20 04:10:34\" },\n\t{ \"post_id\": 7159, \"topic_id\": 1644, \"forum_id\": 8, \"post_subject\": \"Re: Access HPCC data out of Cluster\", \"username\": \"rtaylor\", \"post_text\": \"ravishankar,\\n\\nYou left out despraying the file and making it available through any other tool.\\n\\nI also believe that JSON support for Roxie queries and an ODBC driver are both in the works.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-03-19 13:16:28\" },\n\t{ \"post_id\": 7155, \"topic_id\": 1644, \"forum_id\": 8, \"post_subject\": \"Access HPCC data out of Cluster\", \"username\": \"ravishankar\", \"post_text\": \"Is there any other better way of Accessing HPCC data out of Cluster than the below three options.\\n\\n1) Making a SOAP Request to ECL Published Code/Query - Writing a Web Service client and accessing the data\\n2) Making a HTPP Request to ECL Published Code/Query - Writing a HTTP Request and retrieving the data in the response URL\\n3) JDBC Driver - Accessing HPCC Data file as table and querying \\n\\nPlease let me know, If there is any others ways of getting HPCC data to the outer world.\", \"post_time\": \"2015-03-19 05:21:32\" },\n\t{ \"post_id\": 7210, \"topic_id\": 1650, \"forum_id\": 8, \"post_subject\": \"Re: Distributed index in roxie\", \"username\": \"rtaylor\", \"post_text\": \"eamaro,I have workarounded the issue by creating another thor node, and then copying the index from thor to roxie before publishing the query. This way, I get the index distributed in roxie and I can run the published query correctly.
Creating another Thor node was the right thing to do, since the number of Thor nodes determines the number of file parts in the INDEX. However, you should not need to copy the INDEX to the Roxie manually since Publishing the compiled query that uses the INDEX will automatically copy it over to the Roxie, correctly distributed.\\n\\nTypically your Thor is always much larger than your Roxie because Thor needs a lot of disk and memory to handle interim results as it processes your Big Data. But Roxie only ever gets final (distilled) result data ready to go to the customer. That's why our Production Thors are 400-node machines and our Production Roxies are 100-node machines.\\n\\nIf you only have three nodes to work with, then your "proper" configuration would be a 2-node Thor to do all your "heavy lifting" and a 1-node Roxie to deliver final results.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-03-26 18:54:09\" },\n\t{ \"post_id\": 7201, \"topic_id\": 1650, \"forum_id\": 8, \"post_subject\": \"Re: Distributed index in roxie\", \"username\": \"eamaro\", \"post_text\": \"I have reproduced the failure again. I have attached the roxie.log (node1.log and node2.log) files of the two nodes right after publishing the query.\\n\\nI am also attaching the log (log3.log) produced when trying to start roxie again by issuing ".../hpcc-init start".\\n\\nI have workarounded the issue by creating another thor node, and then copying the index from thor to roxie before publishing the query. This way, I get the index distributed in roxie and I can run the published query correctly.\", \"post_time\": \"2015-03-25 22:24:24\" },\n\t{ \"post_id\": 7194, \"topic_id\": 1650, \"forum_id\": 8, \"post_subject\": \"Re: Distributed index in roxie\", \"username\": \"bforeman\", \"post_text\": \"Can you please attach your roxie log files? \\n\\nWhen you say you could not bring up the daemon what did the init log say? Could it be possible that roxie was already running?\\n\\nAnd as I mentioned earlier, the configuration you have is wrong to use. There should never be a case were the number of roxie nodes exceeds the number of thor nodes since it will mean that roxie nodes will not be slaves, but it should not bring down ROXIE. \\n\\nI was incorrect in an earlier post, when you publish from a THOR cluster with only one file part, only one file part will be copied to ROXIE.\\n\\nIf you look at the following PDF:\\nhttp://cdn.hpccsystems.com/releases/CE-Candidate-5.0.4/docs/HPCCSystemAdministratorsGuide-5.0.4-1.pdf\\n\\nCheck out the System Sizings topic.\\n\\nAlso, the Wiki has a good topic on this:\\nhttps://wiki.hpccsystems.com/display/hpcc/Sample+Sizing+Guide+for+HPCC+-+High+Data+volume+-+Typical+scenario\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-03-25 15:52:38\" },\n\t{ \"post_id\": 7192, \"topic_id\": 1650, \"forum_id\": 8, \"post_subject\": \"Re: Distributed index in roxie\", \"username\": \"bforeman\", \"post_text\": \"Usually, I always execute my queries from the ECL IDE (if the target is THOR or hTHOR) and the WS_ECL service if I had published the query to ROXIE. Not sure why publishing a query would bring down your ROXIE, checking with some members of my team.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-03-25 15:00:18\" },\n\t{ \"post_id\": 7191, \"topic_id\": 1650, \"forum_id\": 8, \"post_subject\": \"Re: Distributed index in roxie\", \"username\": \"eamaro\", \"post_text\": \"Bob,\\n\\nI understand my configuration is not typical. Right now I don't have more resources available, and given that I wanted to test the distributed index scenario in roxie nodes, it made sense to create a configuration like the one I currently have. I am coming to the conclusion that hpcc does not handle this scenario correctly.\\n\\nBased on your input, I performed these steps:\\n - eclplus action=query cluster=thor server=esp_ip @build_idx.ecl . Index is created properly.\\n - eclplus action=query cluster=thor server=esp_ip @query.ecl . Query is executed correctly.\\n - eclplus action=query cluster=roxie server=esp_ip @query.ecl . Query is executed correctly.\\n - In ECLWatch, I go to the previous work unit and publish the query. After this step, things started to behave bad. After some debugging I realized that after publishing the query, roxie binaries in both nodes died (both init_roxie and roxie). Moreover, "hpcc-init start" is not able to bring up the roxie daemons anymore. To get out of this state, I had to reinstall the stack.\\n\\nI can access the logical file list, and this is the partition that is generated for the index in the roxie cluster, which seems strange:\\n\\npart copy node cluster size\\n1\\t1\\tnode74\\tmyroxie\\t6,823,936\\t\\n1\\t2\\tnode75\\tmyroxie\\t6,823,936\\t\\n2\\t1\\tnode75\\tmyroxie\\t32,768\\t\\n2\\t2\\tnode74\\tmyroxie\\t32,768\\t\\n2\\t3\\tnode74\\tmyroxie\\t32,768\\t\\n2\\t4\\tnode75\\tmyroxie\\t32,768\\n\\nI guess I'll try again with the same number of thor and roxie nodes.\", \"post_time\": \"2015-03-25 14:53:54\" },\n\t{ \"post_id\": 7188, \"topic_id\": 1650, \"forum_id\": 8, \"post_subject\": \"Re: Distributed index in roxie\", \"username\": \"bforeman\", \"post_text\": \"Let's make sure that a few ideas are clear, and then we can go from there. \\n\\nFirst, all indexes used for ROXIE are always built on THOR. The fact that you only have a single node THOR confirms that the index created on THOR will only have one part.\\n\\nAFTER YOU PUBLISH your query to ROXIE, the publish process will copy the index from THOR to ROXIE. If you have a two-node ROXIE, the index will be split into two-parts on ROXIE. So you can confirm this in the ECL Watch. Your original INDEX that you built on a single node THOR should have one part, the index published to ROXIE should be in two parts.\\n\\nCorrection: After consulting with my team this is not the case with your configuration. If you have a single node THOR configured, when you publish it will only copy one part to ROXIE \\n\\nYour configuration is not typical. Usually a THOR cluster will have several slave nodes in order to do the "heavy lifting" Typically in production you will see a 400-node THOR and a 100-node ROXIE, so often the parts on THOR are consolidated before they reach ROXIE.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-03-25 12:38:35\" },\n\t{ \"post_id\": 7185, \"topic_id\": 1650, \"forum_id\": 8, \"post_subject\": \"Re: Distributed index in roxie\", \"username\": \"eamaro\", \"post_text\": \"Bob,\\n\\nI am using version 5.2.0-rc3Debug and I am using the OriginalPerson data file used in several examples. I uploaded the file to the landing zone and then sprayed to thor, both using ECLwatch.\\n\\nThis is exactly what I am doing:\\n - Upload the file and spray it to thor. I get one file of ~100mb. I only have one thor node so this makes sense.\\n - Execute "eclplus action=query cluster=roxie server=esp_ip @build_idx.ecl". build_idx.ecl is the first snippet of code I referenced in my previous post. At this point I can see the test::key_fn_person file in EclWatch, with only one partition.\\n - Execute "eclplus action=query cluster=roxie server=esp_ip @query.ecl". query.ecl is the 2nd snippet of code I referenced in my previous post. The query is executed correctly.\\n - Publish the previous query.\\n - I go to esp_ip:8002 and execute the published query. The query is executed correctly.\\n\\nI should probably also mention that if I copy the sprayed file from thor to roxie using EclWatch, then I get 2 parts of the file. This isn't an index yet. When I run the create index ecl code, the parts get merged into one index only in a roxie node :/.\\n\\nApologies, I think I am missing something obvious, just can't seem to find out what.\\n\\nThanks for the help.\", \"post_time\": \"2015-03-25 03:42:24\" },\n\t{ \"post_id\": 7181, \"topic_id\": 1650, \"forum_id\": 8, \"post_subject\": \"Re: Distributed index in roxie\", \"username\": \"bforeman\", \"post_text\": \"The distribution of the index across the nodes of the ROXIE cluster should be automatic when you publish the query. I looked at some of my test clusters using a 2-node ROXIE and confirmed this. Where are you getting your file parts information from, and what version are you currently using?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-03-24 16:41:52\" },\n\t{ \"post_id\": 7178, \"topic_id\": 1650, \"forum_id\": 8, \"post_subject\": \"Distributed index in roxie\", \"username\": \"eamaro\", \"post_text\": \"Hello,\\n\\nI understand that the specific strategy to distribute an index in roxie is undefined. Nonetheless, I am interested in doing some experimentation in this specific scenario. Could someone please provide a skeleton for a query that would force this? I have tried several ways of accomplishing this, however, at the end the resulting index has only one part.\\n\\nExample:\\n\\nEcl code to build index:\\n
\\ndata_layout := RECORD\\n STRING15 FirstName;\\n STRING25 LastName;\\n STRING15 MiddleName;\\n STRING5 Zip;\\n STRING42 Street;\\n STRING20 City;\\n STRING2 State;\\nEND;\\n\\ndataa := DATASET('~test::originalperson',{data_layout, UNSIGNED8 RecPtr {virtual(fileposition)}},THOR);\\ndatax := INDEX(dataa, {FirstName, RecPtr}, '~test::key_fn_person');\\nBUILDINDEX(datax);\\n
\\n\\nEcl query code:\\n\\ndata_layout := RECORD\\n ... same as before\\nEND;\\n\\ndataa := DATASET('~test::originalperson',{data_layout, UNSIGNED8 RecPtr {virtual(fileposition)}},FLAT);\\nfn_x := INDEX(dataa, {FirstName, RecPtr}, '~test::key_fn_person');\\n\\nfilterdata := FETCH(dataa, fn_x(FirstName='Chaeli'), RIGHT.RecPtr);\\nfilterdata;\\n
\\n\\nMy test cluster currently has only 2 roxie nodes. Thus, I am trying to generate a 2-way partition of the index.\\n\\nThanks for the help.\", \"post_time\": \"2015-03-24 13:46:48\" },\n\t{ \"post_id\": 7189, \"topic_id\": 1651, \"forum_id\": 8, \"post_subject\": \"Re: ECL code for date manipulation\", \"username\": \"DSC\", \"post_text\": \"The next major release of HPCC, version 5.2, has a greatly expanded Std.Date module that contains many more date- and time-related functions. If you're feeling adventurous, you can download a release candidate from http://hpccsystems.com/download/release-candidates and try it out.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2015-03-25 13:58:29\" },\n\t{ \"post_id\": 7180, \"topic_id\": 1651, \"forum_id\": 8, \"post_subject\": \"Re: ECL code for date manipulation\", \"username\": \"bforeman\", \"post_text\": \"Hi Gopi,\\n\\nIf you look at the Standard Library Reference you will see a section that contains a few Date functions:\\n\\nhttp://hpccsystems.com/download/docs/standard-library-reference\\n\\nIn addition, in our training classes, we provide the following helper function which converts standard dates (Zdate) into Julian dates. With Julian dates, you can subtract one from another to get an accurate age by days.\\n\\nHere is the code:\\n\\nEXPORT Z2JD(STRING8 Zdate) := FUNCTION\\n // adapted from an algorithm described here:\\n // http://quasar.as.utexas.edu/BillInfo/JulianDatesG.html\\n A(Y) := TRUNCATE(Y/100);\\n B(Aval) := TRUNCATE(Aval/4);\\n C(Y) := 2-A(Y)+B(A(Y));\\n E(Y) := TRUNCATE(365.25 * (Y+4716));\\n F(M) := TRUNCATE(30.6001 * (M+1));\\n Yval := IF((INTEGER1)(Zdate[5..6]) < 3,\\n (INTEGER2)(Zdate[1..4])-1,\\n\\t (INTEGER2)(Zdate[1..4]));\\n Mval := IF((INTEGER1)(Zdate[5..6]) < 3,\\n (INTEGER1)(Zdate[5..6])+12,\\n\\t (INTEGER1)(Zdate[5..6]));\\n Dval := (INTEGER1)(Zdate[7..8]);\\n RETURN IF(Zdate='',0,TRUNCATE(C(Yval) + Dval + E(Yval) + F(Mval)- 1524.5));\\nEND;\\t\\t\\t\\t\\t\\t
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-03-24 16:33:04\" },\n\t{ \"post_id\": 7179, \"topic_id\": 1651, \"forum_id\": 8, \"post_subject\": \"ECL code for date manipulation\", \"username\": \"gopi\", \"post_text\": \"How to add or subtract days with in ECL code.\\nCould you please any one share me ECL code for date manipulation?\\n\\nOR\\n\\nHow I can use c++ code for date manipulation in ECL code?\\n\\nThanks in advance.\\nGopi\", \"post_time\": \"2015-03-24 15:33:33\" },\n\t{ \"post_id\": 7190, \"topic_id\": 1652, \"forum_id\": 8, \"post_subject\": \"Re: When a module is not a module\", \"username\": \"DSC\", \"post_text\": \"The IMPORT command does not support modules that have arguments. To use a module like that, you need to make sure you import the module's parent directory, then instantiate the module explicitly. Something like this (if your files are in the same directory):\\n\\nIMPORT $;\\n\\nm := $.testModule('foo');\\nOUTPUT(m.message);
\\nEdit: To be clear, the compiler message you saw was due to the mismatch between the definition of the module, which included the argument, and your reference to it, which did not have an argument. Because the signatures did not match, the compiler believed that your reference was to a module that did not exist.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2015-03-25 14:03:32\" },\n\t{ \"post_id\": 7186, \"topic_id\": 1652, \"forum_id\": 8, \"post_subject\": \"When a module is not a module\", \"username\": \"subba\", \"post_text\": \"I seem to be getting a weird compiler error message:\\n\\n$ cat testModule.ecl \\nEXPORT testModule(STRING msg) := MODULE\\n EXPORT STRING message := 'abc' + msg; \\nEND;\\n\\n$ cat testImport.ecl\\nIMPORT testModule;\\nOUTPUT('hello');\\n\\n$ eclcc -syntax testImport.ecl\\ntestImport.ecl(1,19): error C2081: Import item "testModule" is not a module\\n1 error, 0 warning
\\n\\nWhy is 'testModule' not a module? If I remove the 'msg' parameter then the compiler has no problem in accepting 'testModule' as a module.\\n\\nThanks in advance,\\nsubba\", \"post_time\": \"2015-03-25 05:23:08\" },\n\t{ \"post_id\": 7226, \"topic_id\": 1654, \"forum_id\": 8, \"post_subject\": \"Re: Accessing a DICTIONARY locally\", \"username\": \"omnibuzz\", \"post_text\": \"Brilliant. That was exactly what I was looking for. Never seen this DATASET syntax before. I NOW notice that it WAS there in the ECL reference. \\n\\nds1 := DATASET(CntInDS,TRANSFORM({DStoDup},SELF := DStoDup[COUNTER]),LOCAL);
\\n\\nAnd it took me a while to understand what you did.\\nThanks\\nSrini\", \"post_time\": \"2015-03-30 16:04:32\" },\n\t{ \"post_id\": 7225, \"topic_id\": 1654, \"forum_id\": 8, \"post_subject\": \"Re: Accessing a DICTIONARY locally\", \"username\": \"rtaylor\", \"post_text\": \"Srini,\\n\\nYour code used DISTRIBUTED on your DATASET, whereas to accomplish what you want you need to use LOCAL, like this:IMPORT STD,TrainingYourName;\\n\\n//first you need the dataset you want to have fully available on each node. \\n//I'm just using a training dataset, but you can replace this with whatever.\\n//Note that I'm restricting the fields to just those that I need\\n//and that this TABLE is global, not LOCAL.\\nDStoDup := TABLE(TrainingYourName.File_Persons.File,{lastname,firstname});\\nCntInDS := COUNT(DStoDup);\\nCntInDS;\\n\\n//Then you need to get all the recs onto each node.\\n//Using the LOCAL option on the TRANSFORM form of DATASET accomplishes that. \\nds1 := DATASET(CntInDS,TRANSFORM({DStoDup},SELF := DStoDup[COUNTER]),LOCAL);\\n\\nCOUNT(ds1); // result is CntInDS * Number of nodes;\\n\\n//Then this vertical slice form of TABLE using LOCAL adds the node numbers\\n//to all the records on each node separately and independently. \\nr := RECORD\\n ds1;\\n MyNode := STD.system.Thorlib.Node();\\nEND;\\nTout := TABLE(DS1,r,LOCAL);\\n\\n//And here's the proof that we now have duplicated the entire dataset on each node.\\nCOUNT(tOut(Mynode = 0)); //output is same as CntInDS\\nCOUNT(tOut(Mynode = 1)); //output is same as CntInDS\\nCOUNT(tOut(Mynode = 2)); //output is same as CntInDS
\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-03-30 15:47:12\" },\n\t{ \"post_id\": 7224, \"topic_id\": 1654, \"forum_id\": 8, \"post_subject\": \"Re: Accessing a DICTIONARY locally\", \"username\": \"omnibuzz\", \"post_text\": \"Bob - Pardon my ignorance. I have never used TABLE in that way and I am really interested in this option. I tried to recreate the scenario. Please let me know what I am doing wrong here. \\nThe code below is running in a 40 node cluster. and I want ds1 to be replicated in every node. It doesn't seem to happen. It is behaving the way I thought it would. \\n\\n\\nIMPORT STD;\\n\\nds1 := DATASET(10000,TRANSFORM({INTEGER Num},SELF.Num := COUNTER),DISTRIBUTED);\\n\\nCOUNT(ds1); // output is 10000;\\n\\nr := RECORD\\n\\tds1;\\n\\tMyNode := STD.system.Thorlib.Node();\\nEND;\\n \\nTout := TABLE(DS1,r,num,LOCAL);\\nCOUNT(tOut(Mynode = 0)); //output is 250 = 10000/40\\nCOUNT(tOut(Mynode = 1)); //output is 250 = 10000/40\\nCOUNT(tOut(Mynode = 2)); //output is 250 = 10000/40\\n
\\n\\nThanks again.\\nSrini\", \"post_time\": \"2015-03-30 14:21:27\" },\n\t{ \"post_id\": 7215, \"topic_id\": 1654, \"forum_id\": 8, \"post_subject\": \"Re: Accessing a DICTIONARY locally\", \"username\": \"bforeman\", \"post_text\": \"Hi Srini,\\n\\nEasy! Just use LOCAL on the TABLE \\n\\nThis example is trivial, but shows that the entire TABLE was duplicated on every node in my training cluster (I was using our class Persons data):\\n\\n
r := RECORD\\n $.File_Persons.File.BureauCode;\\n MyNode := STD.system.Thorlib.Node();\\n END;\\n \\n Tout := TABLE($.File_Persons.File,r,BureauCode,LOCAL);\\n COUNT(tOut(Mynode = 0));\\n COUNT(tOut(Mynode = 1));\\n COUNT(tOut(Mynode = 2));
\", \"post_time\": \"2015-03-27 15:46:41\" },\n\t{ \"post_id\": 7214, \"topic_id\": 1654, \"forum_id\": 8, \"post_subject\": \"Re: Accessing a DICTIONARY locally\", \"username\": \"JimD\", \"post_text\": \"http://hpccsystems.com/download/docs/ec ... TABLE.html\\n\\nhth,\\nJim\", \"post_time\": \"2015-03-27 15:46:25\" },\n\t{ \"post_id\": 7213, \"topic_id\": 1654, \"forum_id\": 8, \"post_subject\": \"Re: Accessing a DICTIONARY locally\", \"username\": \"omnibuzz\", \"post_text\": \"Bob - Can you give an example code on how to designate the table as LOCAL. I am not able to find it in the ECL Reference.\\nI am looking at having the table available locally in each node. \\nThank you.\\n-Srini\", \"post_time\": \"2015-03-27 15:33:02\" },\n\t{ \"post_id\": 7212, \"topic_id\": 1654, \"forum_id\": 8, \"post_subject\": \"Re: Accessing a DICTIONARY locally\", \"username\": \"bforeman\", \"post_text\": \"Srini,\\nMy first instinct would be to simply load up the DATASET into a TABLE (which you can designate as LOCAL). Of course, an inline DATASET is always implicitly local in scope. The new DATASET(count) form also supports LOCAL.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-03-27 12:15:51\" },\n\t{ \"post_id\": 7211, \"topic_id\": 1654, \"forum_id\": 8, \"post_subject\": \"Re: Accessing a DICTIONARY locally\", \"username\": \"omnibuzz\", \"post_text\": \"On the same note, if I want to use a DATASET inside a project and would like copy that DATASET locally to each node so that I can use it inside the TRANSFORM function in PROJECT LOCAL, how do I do it?\\nThanks\\nSrini\", \"post_time\": \"2015-03-27 00:52:39\" },\n\t{ \"post_id\": 7207, \"topic_id\": 1654, \"forum_id\": 8, \"post_subject\": \"Re: Accessing a DICTIONARY locally\", \"username\": \"omnibuzz\", \"post_text\": \"That is wonderful news, Bob. I didn't realize. Thanks for clarifying.\\nRegards\\nSrini\", \"post_time\": \"2015-03-26 17:43:12\" },\n\t{ \"post_id\": 7204, \"topic_id\": 1654, \"forum_id\": 8, \"post_subject\": \"Re: Accessing a DICTIONARY locally\", \"username\": \"bforeman\", \"post_text\": \"Hi Srini,\\n\\nReading the docs on DICTIONARY I see:\\n\\nA DICTIONARY allows you to efficiently check whether a particular data value is in a list (using the IN operator), or to simply map data. It is similar to a LOOKUP JOIN that can be used in any context.\\n\\n
\\n\\nSince it is similar to a LOOKUP JOIN, a LOOKUP JOIN is an implicit LOCAL operation, since the entire right recordset is loaded on to every node. I'm pretty sure that a DICTIONARY works in the same way. The entire contents are loaded to each node.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-03-26 12:51:34\" },\n\t{ \"post_id\": 7196, \"topic_id\": 1654, \"forum_id\": 8, \"post_subject\": \"Accessing a DICTIONARY locally\", \"username\": \"omnibuzz\", \"post_text\": \"Say I want to use a DICTIONARY in my PROJECT, I would ideally like to use it as a LOOKUP with each node having it's own copy of the entire data. And doing a PROJECT with LOCAL and in the TRANSFORM search the dictionary. \\nIs there a way by which I can force the DICTIONARY to replicate to all the nodes?\\n\\nIf not, how do I solve for such a requirement.\\n\\nThanks\\nSrini\", \"post_time\": \"2015-03-25 16:16:50\" },\n\t{ \"post_id\": 7227, \"topic_id\": 1656, \"forum_id\": 8, \"post_subject\": \"Re: Autoselecting hthor?\", \"username\": \"omnibuzz\", \"post_text\": \"Thank you, Richard. \\nJira ticket created:\\nhttps://track.hpccsystems.com/browse/HPCC-13315\\n\\n-Srini\", \"post_time\": \"2015-03-30 16:19:19\" },\n\t{ \"post_id\": 7200, \"topic_id\": 1656, \"forum_id\": 8, \"post_subject\": \"Re: Autoselecting hthor?\", \"username\": \"rtaylor\", \"post_text\": \"Srini,\\n\\nStrange behavior! I sort of duplicated your problem (not hundreds, but 15 on my 3-node cluster). I'd say it has to do with your PIPE being called multiple times (most likely once on each node). Please submit a JIRA ticket.\\n\\nI suggest you just write the file to disk and see if that changes the behavior (I expect it will).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-03-25 20:43:05\" },\n\t{ \"post_id\": 7199, \"topic_id\": 1656, \"forum_id\": 8, \"post_subject\": \"Autoselecting hthor?\", \"username\": \"omnibuzz\", \"post_text\": \"I am running this on a 40 node thor cluster. When I run this code:\\n\\ninput := '0!1!2|3!4!5|5!6!7|8!>!9|';\\n\\nRec := RECORD\\n\\tINTEGER a;\\n\\tINTEGER b;\\n\\tREAL c;\\nEND;\\n\\nds := PIPE('echo ' + input, Rec, CSV(SEPARATOR('!'),TERMINATOR('|')));\\n\\nds;\\n
\\n\\nI get 5 records.\\n\\nWhen I add a project after the last line, I get more than 100 records for the same ds.\\n\\n\\ninput := '0!1!2|3!4!5|5!6!7|8!>!9|';\\n\\nRec := RECORD\\n\\tINTEGER a;\\n\\tINTEGER b;\\n\\tREAL c;\\nEND;\\n\\nds := PIPE('echo ' + input, Rec, CSV(SEPARATOR('!'),TERMINATOR('|')));\\n\\nds;\\n\\nPROJECT(ds(a <> 0 OR b <> 0), \\n\\t\\t\\t\\t\\t\\tTRANSFORM(Rec,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tSELF.b := IFF(LEFT.b = 0,10,LEFT.b);\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tSELF := LEFT));\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n
\\n\\nI take it that the cluster automatically moved the job to hthor when I didn't use the project. I still feel it warrants at least a warning of some sort to expect a different result. \\n\\nNow to the real issue. How do I get just the 5 records using the PROJECT in Thor. The output from ds is being used as part of a bigger job which can run only in Thor.\\n\\n-Srini\", \"post_time\": \"2015-03-25 19:56:07\" },\n\t{ \"post_id\": 7267, \"topic_id\": 1663, \"forum_id\": 8, \"post_subject\": \"Re: working on superfile contents individually\", \"username\": \"alex\", \"post_text\": \"Distributing the PData didn't seem to help the original problem, which was that the job was taking a very long time:\\n\\nI cut down the size of the inputs to just 5 files from the original 25 and ran the job both writing things out specifically, like in my original post, and using the PROJECT/DENORM combination we've been talking about.\\n\\nThe brute force method finished in 10 seconds on my cluster, and I killed the DENORMALIZE job after ten minutes or so.\\n\\nOn a whim, I added LOCAL to the DENORMALIZE action and re-ran the job. It finished in 45 seconds. So it adds overhead, but I think it's minimal enough to make this approach reasonable, depending on how it scales to the full job.\\n\\nThanks for your help, Richard.\", \"post_time\": \"2015-04-06 20:20:05\" },\n\t{ \"post_id\": 7266, \"topic_id\": 1663, \"forum_id\": 8, \"post_subject\": \"Re: working on superfile contents individually\", \"username\": \"alex\", \"post_text\": \"EDIT: using "Distribute(PData, HASH32(PData.identifier)) made the error go away. I'll re-run and see what happens.\\n\\nNo joy there:\\n\\nError: INTERNAL: Dataset is not active: '_EMPTY_(result_rec)'
\", \"post_time\": \"2015-04-06 19:36:05\" },\n\t{ \"post_id\": 7265, \"topic_id\": 1663, \"forum_id\": 8, \"post_subject\": \"Re: working on superfile contents individually\", \"username\": \"rtaylor\", \"post_text\": \"Then try using DISTRIBUTE around the Pdata.\", \"post_time\": \"2015-04-06 19:29:22\" },\n\t{ \"post_id\": 7264, \"topic_id\": 1663, \"forum_id\": 8, \"post_subject\": \"Re: working on superfile contents individually\", \"username\": \"alex\", \"post_text\": \"No, there are about 25 different values.\", \"post_time\": \"2015-04-06 19:27:24\" },\n\t{ \"post_id\": 7263, \"topic_id\": 1663, \"forum_id\": 8, \"post_subject\": \"Re: working on superfile contents individually\", \"username\": \"rtaylor\", \"post_text\": \"Alex,estimated skew 1.0000
This tells me that all the records are trying to go on one node. That implies that the UIDs TABLE only has one record in it. Is that the case?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-04-06 19:14:42\" },\n\t{ \"post_id\": 7262, \"topic_id\": 1663, \"forum_id\": 8, \"post_subject\": \"Re: working on superfile contents individually\", \"username\": \"alex\", \"post_text\": \"Hi again Richard.\\n\\nI tried something like what you suggested, using DENORMALIZE:\\n\\n result_rec := RECORD\\n \\t// Field List\\n END;\\n\\n processedData := RECORD\\n \\tstring3 identifier;\\n \\tDATASET(result_rec) resultsFromDoSomething;\\n END:\\n\\n UIDs := TABLE(Superfile,{IDfield},IDfield);\\n Pdata := PROJECT(UIDs,TRANSFORM(processedData,SELF.identifier:=LEFT.IDfield,SELF := []));\\n\\n processedData xform(processedData P, DATASET(theLayout) R) := TRANSFORM\\n \\n SELF.identifier:= R.Identifier;\\n SELF.resultsFromDoSomething := doSomething(R);\\n END;\\n\\n fullProcess := DENORMALIZE(Pdata,\\n Superfile,\\n LEFT.identifier=RIGHT.IDfield,\\n GROUP,\\n xform(LEFT, ROWS(RIGHT)));\\n\\n
\\n\\nThis caused a runtime error:\\nSystem error: 10083: Graph[9], denormalizegroup[13], JOIN failed, RHS skewed, based on distribution of LHS partition points. Graph[9] denormalizegroup[13]: Exceeded skew limit: 0.008333, estimated skew 1.0000
\", \"post_time\": \"2015-04-06 19:08:51\" },\n\t{ \"post_id\": 7255, \"topic_id\": 1663, \"forum_id\": 8, \"post_subject\": \"Re: working on superfile contents individually\", \"username\": \"alex\", \"post_text\": \"Hi Richard; thanks for the response.\\n\\nWhat I am specifically trying to do is perform the SALT-generated hygiene process to each member of the superfile. So for example, knowing that a field is 90% populated across the entire superfile is not particularly illustrative, but knowing that 90% of the files have that field 100% populated but the other 10% do not is useful.\\n\\nETA: There is already a layout change from the original files to the superfile, where several fields are deleted. Since I'm trying to profile those fields as well, I was just using the super file contents as a convenient way to get a list of all the fields I care about. It may be that's not the smartest way to do it.\", \"post_time\": \"2015-04-06 14:42:44\" },\n\t{ \"post_id\": 7254, \"topic_id\": 1663, \"forum_id\": 8, \"post_subject\": \"Re: working on superfile contents individually\", \"username\": \"rtaylor\", \"post_text\": \"Alex,However, since there are a few hundred files in the superfile, and the exact number is subject to change, I'd like to have a way to do this all at once.
First off, the recommended maximum number of sub-files in a superfile is around 100. Yes, as you already know, you can have more than that, but performance issues can start to creep in the more sub-files you have. That's why we teach the periodic data consolidation techniques in our Advanced Thor class section on superfiles. \\n\\nSo the real question is -- what do you need to do to each sub-file separately that you cannot simply do to the entire superfile? \\n\\nIf it is a layout change, then that is the perfect opportunity to also consolidate the data into a single sub-file as the "new base" to which you can then add additional sub-files.\\n\\nHere's your second example, re-written to operate on the entire superfile at once (and obviously not tested):result_rec := RECORD\\n// Field List\\nEND;\\n\\nUIDs := TABLE(Superfile,{IDfield},IDfield);\\n\\nprocessedData := RECORD\\n\\tstring3 identifier;\\n\\tDATASET(result_rec) := resultsFromDoSomething;\\nEND:\\n\\nPdata := PROJECT(UIDs,TRANSFORM(processedData,SELF.identifier:=LEFT.IDfield,SELF := []));\\n\\nprocessedData xform(processedData P, \\n DATASET(STD.File.FsLogicalFileNameRecord) L) := TRANSFORM\\n current := L;\\n SELF.identifier:= P,identifier;\\n SELF.resultsFromDoSomething := doSomething(current);\\nEND;\\n\\nfullProcess := DENORMALIZE(Pdata, \\n Superfile, \\n LEFT.identifier=RIGHT.IDfield, \\n GROUP, \\n xform(LEFT, RIGHT);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-04-06 14:03:43\" },\n\t{ \"post_id\": 7253, \"topic_id\": 1663, \"forum_id\": 8, \"post_subject\": \"working on superfile contents individually\", \"username\": \"alex\", \"post_text\": \"Hello HPCC folks. I have a superfile, and I need to be able to perform an operation on its constituent files:\\n\\nfile_list := FileServices.GetSuperFileContents('~path::to::superfile');\\nds1 := DATASET(file_list[1].name, theLayout, thor);\\n...\\ndsN := DATASET(file_list[N].name, theLayout, thor);\\n\\nr1 := doSomething(ds1);\\n...\\nrN := doSomething(dsN);\\n
\\n\\nIn my experimentation, writing out each line like above works, and is very fast. However, since there are a few hundred files in the superfile, and the exact number is subject to change, I'd like to have a way to do this all at once.\\n\\nThis also works:\\nresult_rec := RECORD\\n// Field List\\nEND;\\n\\nprocessedData := RECORD\\n string3 identifier;\\n DATASET(result_rec) := resultsFromDoSomething;\\nEND:\\n\\nprocessedData xform( STD.File.FsLogicalFileNameRecord L, integer C) := TRANSFORM\\ncurrent := DATASET(L.name, theLayout, thor);\\nSELF.identifier:= current[1].IDfield;\\nSELF.resultsFromDoSomething := doSomething(current);\\nEND;\\n\\nfullProcess := Normalize(file_list, 1, xform(LEFT, COUNTER);
\\n\\nHowever, using Normalize like that makes the whole process take much much longer than just writing it out one at a time (30 seconds in the first case to longer than an hour in the second). So I assume that is an inherently inefficient way to accomplish my goal. I tried PROJECT and PROCESS and had similar results. \\n\\nWhat am I doing wrong? \\n\\nThanks in advance.\", \"post_time\": \"2015-04-06 12:48:30\" },\n\t{ \"post_id\": 7268, \"topic_id\": 1666, \"forum_id\": 8, \"post_subject\": \"Re: ECL Interface\", \"username\": \"rtaylor\", \"post_text\": \"Brandon,\\n\\nAn INTERFACE is designed to pre-define a set of parameters that can be passed to a function as a single unit. If you look in the ECL Language Reference under Function Definitions (Parameter Passing) at the Passing DATASET parameters section, you'll find that to pass a DATASET parameter to a function requires a DATASET data type that specifies the exact layout (RECORD structure) of the type of DATASET that will be passed. Try just passing the DATASET as a separate parameter, not in the INTERFACE/MODULE.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-04-06 20:36:38\" },\n\t{ \"post_id\": 7261, \"topic_id\": 1666, \"forum_id\": 8, \"post_subject\": \"ECL Interface\", \"username\": \"brandon.walker\", \"post_text\": \"I am trying to create an interface, but am having an issue when a member is a dataset and am hoping someone can help. For example:\\n\\nITest := INTERFACE\\nEXPORT STRING BuildVerion;\\nEXPORT DATASET File;\\nEND;\\n\\nTest := MODULE(ITest)\\nEXPORT BuildVersion := '20150406';\\nEXPORT File := DATASET(LogicalFileName, Layout, THOR);\\nEND;\\n\\nI'm getting a syntax error for the Test module when declaring File: \\nsyntax error near ":=" : expected datarow, identifier, pattern-name, action, pattern\", \"post_time\": \"2015-04-06 18:41:03\" },\n\t{ \"post_id\": 13303, \"topic_id\": 1669, \"forum_id\": 8, \"post_subject\": \"Re: ECL equivalent to SQL\", \"username\": \"rtaylor\", \"post_text\": \"Vishnu,select column1,coulumn2 from table_name group by column1;\\n\\nWhat would be the equivalent of this?
How about this:\\nSORT(TABLE(table_name,{column1,coulumn2}),column1);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-11-17 14:07:35\" },\n\t{ \"post_id\": 13293, \"topic_id\": 1669, \"forum_id\": 8, \"post_subject\": \"Re: ECL equivalent to SQL\", \"username\": \"vchinta\", \"post_text\": \"select column1,coulumn2 from table_name group by column1;\\n\\nWhat would be the equivalent of this?\\n\\nThanks for your help,\\nVishnu\", \"post_time\": \"2016-11-16 23:33:47\" },\n\t{ \"post_id\": 7278, \"topic_id\": 1669, \"forum_id\": 8, \"post_subject\": \"Re: ECL equivalent to SQL\", \"username\": \"rtaylor\", \"post_text\": \"Srini,select col1, count(distinct col2) from table_name GROUP BY Col1
\\nOK, that's a little more complex, but still fairly straight-forward:\\ntable_name := DATASET([{2,1},{2,5},{1,4},{1,1},{1,5},\\n {1,4},{3,1},{1,5},{3,4},{3,4},{3,4}],{INTEGER col1,INTEGER col2});\\nt := TABLE(table_name,{col1,col2});\\ns := SORT(t,col1,col2);\\nr := ROLLUP(s,TRANSFORM(LEFT),col1,col2);\\nTABLE(r,{col1,DistinctCol2Cnt := COUNT(GROUP)},col1);
I start with the vertical slice TABLE, but that isn't necessary for this example (just good form for when you're working with a lot more fields in your dataset). The SORT and ROLLUP do the real work, then it's just a crosstab for the result.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-04-07 15:22:43\" },\n\t{ \"post_id\": 7275, \"topic_id\": 1669, \"forum_id\": 8, \"post_subject\": \"Re: ECL equivalent to SQL\", \"username\": \"omnibuzz\", \"post_text\": \"I also wanted to pre-empt the obvious solution. I wanted to know if there is a way to do it in one pass. \\n\\nI am aware of this solution (typing directly, so it may have errors). But, I feel it is inefficient.\\n\\ntbl1 := TABLE(table_name,{col1,col2}, col1,col2);\\nTABLE(tbl1,{col1,COUNT(col2)},col1);\\n
\\n\\nThanks\\nSrini\", \"post_time\": \"2015-04-07 14:58:10\" },\n\t{ \"post_id\": 7274, \"topic_id\": 1669, \"forum_id\": 8, \"post_subject\": \"Re: ECL equivalent to SQL\", \"username\": \"omnibuzz\", \"post_text\": \"Of course. My bad. I ended up over simplifying my question. I wanted an equivalent of this.\\n\\nselect col1, count(distinct col2) from table_name GROUP BY Col1\\n
\\n\\nThanks\\nSrini\", \"post_time\": \"2015-04-07 14:54:25\" },\n\t{ \"post_id\": 7273, \"topic_id\": 1669, \"forum_id\": 8, \"post_subject\": \"Re: ECL equivalent to SQL\", \"username\": \"rtaylor\", \"post_text\": \"Srini,\\n\\nThis SQL:select count(distinct column_name) from table_name
should just translate to this ECL:COUNT(TABLE(table_name,{column_name},column_name));
\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-04-07 14:47:14\" },\n\t{ \"post_id\": 7272, \"topic_id\": 1669, \"forum_id\": 8, \"post_subject\": \"ECL equivalent to SQL\", \"username\": \"omnibuzz\", \"post_text\": \"What is the equivalent ECL code for the following SQL Query (assuming that there is a dataset table_name already created with the record definition having a STRING column_name)?\\n\\nselect count(distinct column_name) from table_name
\\n\\nThanks\\nSrini\", \"post_time\": \"2015-04-07 14:42:30\" },\n\t{ \"post_id\": 7288, \"topic_id\": 1671, \"forum_id\": 8, \"post_subject\": \"Re: RIGHT OUTER JOIN - Expected Behavior?\", \"username\": \"kps_mani\", \"post_text\": \"Thanks a lot Bob for clarifying it.\", \"post_time\": \"2015-04-08 17:10:43\" },\n\t{ \"post_id\": 7287, \"topic_id\": 1671, \"forum_id\": 8, \"post_subject\": \"Re: RIGHT OUTER JOIN - Expected Behavior?\", \"username\": \"bforeman\", \"post_text\": \"I don't think it is an inconsistency but more of an efficiency issue. Never assume that the result of any JOIN type will be automatically sorted. If you need to have your results sorted in some way simple use a SORT after your JOIN, and let the compiler do the optimizing for you.\\n\\nThere is an excellent blog by the compiler writer himself, Gavin Halliday, regarding JOIN behavior:\\n\\nhttp://hpccsystems.com/blog/joins\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-08 16:44:35\" },\n\t{ \"post_id\": 7286, \"topic_id\": 1671, \"forum_id\": 8, \"post_subject\": \"Re: RIGHT OUTER JOIN - Expected Behavior?\", \"username\": \"kps_mani\", \"post_text\": \"Thanks for confirming. In that case, why are we not having LEFT Outer Join also sorted. Are we not seeing an inconsistency here wherein LEFT OUTER JOIN will not do the default sorting and on the other hand RIGHT OUTER JOIN does the default sorting?\\n\\nRegards,\\nSubbu\", \"post_time\": \"2015-04-08 16:40:00\" },\n\t{ \"post_id\": 7285, \"topic_id\": 1671, \"forum_id\": 8, \"post_subject\": \"Re: RIGHT OUTER JOIN - Expected Behavior?\", \"username\": \"bforeman\", \"post_text\": \"Hi Subbu,\\n\\nI am pretty sure this is expected behavior. The RIGHT recordset is sorted to efficiently compare with the LEFT recordset, so the RIGHT OUTER JOIN appears to be sorted.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-08 16:27:36\" },\n\t{ \"post_id\": 7284, \"topic_id\": 1671, \"forum_id\": 8, \"post_subject\": \"RIGHT OUTER JOIN - Expected Behavior?\", \"username\": \"kps_mani\", \"post_text\": \"I was testing the ECL Playground - LEFT and RIGHT Outer Join ECL for learning purpose. It seems that LEFT OUTER JOIN produces the result without sorting on the Joined Value (Value 1). However, RIGHT OUTER JOIN produces the result set with Sorting on Joined value (Value 1). I could see the similar thing for FULL OUTER JOIN as well. Is it the expected behavior of RIGHT OUTER and FULL OUTER JOIN? I was under impression that RIGHT OUTER JOIN keeps the same order in the RIGHT file without sorting as like SQL. \\n\\nRegards,\\nSubbu\", \"post_time\": \"2015-04-07 21:11:29\" },\n\t{ \"post_id\": 7293, \"topic_id\": 1673, \"forum_id\": 8, \"post_subject\": \"How to fetch the logical file list in scheduler concept\", \"username\": \"gopi\", \"post_text\": \"Hi,\\n\\nWhen i try to run the below code in scheduler concept, i am getting the error. \\n\\nds1 := STD.File.LogicalFileList(search_pattern);
\\n\\nError:\\nFileServices.LogicalFileList cannot access Dali in this context - this normally means it is being called from a thor slave\\n\\nhow to fix the error or how to fetch the logical file list in scheduler concept.\\n\\nThanks\", \"post_time\": \"2015-04-09 11:40:12\" },\n\t{ \"post_id\": 7295, \"topic_id\": 1674, \"forum_id\": 8, \"post_subject\": \"Fetch Logical File List error\", \"username\": \"gopi\", \"post_text\": \"Hi,\\n\\nWhen i try to run the below code in scheduler concept, i am getting the error. \\n\\nImport STD;\\n\\nEXPORT GetTime() := FUNCTION\\n//function to get time\\nstring6 getTime() := BEGINC++\\n// Declarations\\nstruct tm localt; // localtime in "tm" structure\\ntime_t timeinsecs; // variable to store time in secs\\n\\n// Get time in sec since Epoch\\ntime(&timeinsecs); \\n// Convert to local time\\nlocaltime_r(&timeinsecs,&localt);\\n// Format the local time value\\nstrftime(__result, 8, "%H%M%S", &localt); // Formats the localtime to HHMMSS\\n\\nENDC++;\\n\\nreturn getTime();\\nEND;\\n\\nExport Normalized_Term_Process := Module\\nbase_pattern := 'innovation::case_law::test_schedule';\\nbasepath := '~' + base_pattern;\\nschedulelog := 'schedulelog';\\noutfile_name := 'rfc_anchors';\\nsearch_pattern := base_pattern+'::'+outfile_name+'_*';\\nds1 := STD.File.LogicalFileList(search_pattern);\\n\\nsplit_count := count(ds1)+1;\\n//split_count := 1;\\ncount_split := 2;\\nstart_index := ((split_count-1)*count_split)+1;\\nend_index := split_count * count_split;\\n\\nmyrec := record\\ninteger id;\\nstring25 name;\\nend;\\n\\nrec_log := Record\\ninteger split_index;\\nstring file_name;\\nstring12 file_time;\\nEnd;\\n\\nall_data := DATASET([{1, 'bala'},{2, 'arun'},{3,'David'}, {4, 'mike'}, {5, 'Michael'},{6,'Hari'}], myrec)[start_index..end_index]; \\n\\n\\nOutputFile := basepath + '::' + outfile_name + '_' + split_count;\\nOutputFileName := basepath + '::' + outfile_name + '_' + (split_count-1);\\nDellog := basepath +'::'+schedulelog+'_'+(split_count-3);\\nInlog := basepath +'::'+schedulelog+'_'+(split_count-2);\\nOutlog := basepath +'::'+schedulelog+'_'+ (split_count-1);\\n\\nSplit_Data_Output := Output(all_data,, OutputFile, Overwrite);\\n\\nt11 := GetTime();\\nt12 := t11[1..2]+':'+t11[3..4]+':'+t11[5..6];\\n//t2 := GetTime();\\n\\nds_schedule := if(split_count>2, DATASET(Inlog, rec_log, thor) , DATASET([], rec_log) );\\nds_addition := DATASET([{(split_count-1), OutputFileName, t12}], rec_log);\\nds_lognew := SORT(ds_schedule + ds_addition, split_index);\\n\\nschedule_log := Output(ds_lognew, , Outlog, overwrite);\\n\\n\\nExport Run := Sequential\\n(\\n\\tSplit_Data_Output,\\n\\tSTD.File.DeleteLogicalFile(Dellog, TRUE),\\n\\tschedule_log,\\n\\tNOTIFY(EVENT('WRITE_SPLIT','Starts writing the split files'));\\n);\\nEnd;\\n\\nNormalized_Term_Process.Run : WHEN(EVENT('WRITE_SPLIT','*'), COUNT(3));\\n\\n
\\n\\nError:\\nFileServices.LogicalFileList cannot access Dali in this context - this normally means it is being called from a thor slave\\n\\nhow to fix the error or how to fetch the logical file list in scheduler concept.\\n\\nThanks\", \"post_time\": \"2015-04-09 14:23:35\" },\n\t{ \"post_id\": 7326, \"topic_id\": 1676, \"forum_id\": 8, \"post_subject\": \"Re: Layout of Index ...\", \"username\": \"chanbchen\", \"post_text\": \"Hi Rich,\\n\\nThanks for the help.\\n\\nJust to play around a bit I changed the layout of my dataset to this:\\n\\n\\nunsigned id;\\nstring name;\\nunsigned8 __internal_fpos__ := 0;\\n
\\n\\nAfter building the index, when I looked into the layout of the index's logical file I see the exact same layout. This feels odd. \\n\\nSo, technically I should not be getting this error as both the layouts (the dataset and the index) are exactly the same.\\n\\nPlease explain.\\n\\nThanks\", \"post_time\": \"2015-04-13 09:40:01\" },\n\t{ \"post_id\": 7313, \"topic_id\": 1676, \"forum_id\": 8, \"post_subject\": \"Re: Layout of Index ...\", \"username\": \"rtaylor\", \"post_text\": \"I had given a small version of the actual scenario that am facing in my realtime application. In my application I have a dataset which is indexed in 10 different ways and I need to merge the fetch from all these 10 indexes and do some further processing in the merged dataset. So I need to transform the result of every fetch into a common layout and then do a merge.\\n\\nAnd thes index calls and transform need to be done a million times. I thought if there would be some way to avoid the transform, that would improve my performance quite a bit.
OK, the way I would approach this would be to have 11 INDEXes defined. One would only have each record's unique identifier field as its single search term, and a payload of all the rest of the fields. The other ten would have your 10 different search terms and each would only payload the unique identifier of the record. Your query then would use the search criteria to get the list of unique ids for that criteria. Then you just combine the 10 sets of unique ids to get your result set from the master INDEX. \\n\\nThis code demonstrates the concept, but with only 3 search keys and one master payload INDEX:EmployeeLayout := RECORD\\n UNSIGNED id;\\n STRING15 name;\\n STRING15 field1;\\n STRING15 field2;\\nEND;\\n\\nEmployeeDS := DATASET([{10, 'JOHN','a','A'}, \\n {20, 'ADAM','b','B'}, \\n {25, 'ADAM','a','B'}, \\n {30, 'STEVE','c','C'}], EmployeeLayout);\\n\\n //first the payload key --search only by the record's unique identifier\\nID_Key := INDEX(EmployeeDS, {id}, {EmployeeDS}, '~test::ID_Key');\\n //then the search keys --payload is only the record's unique identifier\\nEmployeeNameKey := INDEX(EmployeeDS, {name}, {ID}, '~test::employeeNameKey');\\nEmployeeField1Key := INDEX(EmployeeDS, {field1}, {ID}, '~test::employeeField1Key');\\nEmployeeField2Key := INDEX(EmployeeDS, {field2}, {ID}, '~test::employeeField2Key');\\nB1 := BUILDINDEX(ID_Key, OVERWRITE);\\nB2 := BUILDINDEX(EmployeeNameKey, OVERWRITE);\\nB3 := BUILDINDEX(EmployeeField1Key, OVERWRITE);\\nB4 := BUILDINDEX(EmployeeField2Key, OVERWRITE);\\nBldIdx := PARALLEL(B1,B2,B3,B4);\\nNumSearchKeys := 3; //how many search keys are there?\\n\\n//******************************************************\\n//get the search results from each INDEX, just the ID field and put them in a SET\\nSetName := SET(EmployeeNameKey(name = 'ADAM'),ID);\\nSetField1 := SET(EmployeeField1Key(field1 = 'a'),ID);\\nSetField2 := SET(EmployeeField2Key(field2 = 'B'),ID);\\n\\n//aggregate all the result sets\\nAllIDs := SetName + SetField1 + SetField2;\\n\\n//For match of all criteria:\\nRes1DS := DATASET(AllIDs,{UNSIGNED id});\\nSetAllMatches := SET(TABLE(Res1DS,{id,Cnt := COUNT(GROUP)},ID)(Cnt = NumSearchKeys),ID);\\nResultAllMatches := ID_Key(ID IN SetAllMatches);\\n\\n//For match of at least one criteria:\\nRes2DS := DATASET(AllIDs,{UNSIGNED id});\\nSetAnyMatches := SET(TABLE(Res1DS,{id},ID),ID);\\nResultAnyMatches := ID_Key(ID IN SetAnyMatches);\\n\\nSEQUENTIAL(BldIdx,OUTPUT(ResultAllMatches),OUTPUT(ResultAnyMatches));
\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-04-10 19:10:09\" },\n\t{ \"post_id\": 7312, \"topic_id\": 1676, \"forum_id\": 8, \"post_subject\": \"Re: Layout of Index ...\", \"username\": \"chanbchen\", \"post_text\": \"Hi Rich\\n\\nThank you for the reply again.\\n\\nI have one more question.\\n\\nI agree that this is not a scenario where we should worry about performance. \\n\\nBut what if I need to merge the 2 datasets (fetch by id and fetch by name) and do some operation on the merged dataset?\\n\\nI had given a small version of the actual scenario that am facing in my realtime application. In my application I have a dataset which is indexed in 10 different ways and I need to merge the fetch from all these 10 indexes and do some further processing in the merged dataset. So I need to transform the result of every fetch into a common layout and then do a merge.\\n\\nAnd thes index calls and transform need to be done a million times. I thought if there would be some way to avoid the transform, that would improve my performance quite a bit.\\n\\nAny tips here please? \\n\\nThanks\", \"post_time\": \"2015-04-10 18:24:47\" },\n\t{ \"post_id\": 7310, \"topic_id\": 1676, \"forum_id\": 8, \"post_subject\": \"Re: Layout of Index ...\", \"username\": \"rtaylor\", \"post_text\": \"Just like this:EmployeeLayout := RECORD\\n UNSIGNED id;\\n STRING15 name;\\nEND;\\n\\nEmployeeDS := DATASET([{10, 'JOHN'}, {20, 'ADAM'}, {30, 'STEVE'}], EmployeeLayout);\\nEmployeeKey := INDEX(EmployeeDS, {id}, {EmployeeDS}, '~test::employeeKey');\\nEmployeeNameKey := INDEX(EmployeeDS, {name}, {EmployeeDS}, '~test::employeeNameKey');\\n\\nEmployeeFunction1 ( DATASET(RECORDOF(EmployeeKey)) employees) := \\n PROJECT(employees\\n , TRANSFORM({unsigned id; string name; unsigned salary}\\n , SELF.SALARY := 60000; SELF := LEFT));\\n\\nEmployeeFunction2 ( DATASET(RECORDOF(EmployeeNameKey)) employees) := \\n PROJECT(employees\\n , TRANSFORM({unsigned id; string name; unsigned salary}\\n , SELF.SALARY := 60000; SELF := LEFT));\\n\\nsample2 := EmployeeFunction1(EmployeeKey(id = 30));\\nsample3 := EmployeeFunction2(EmployeeNameKey(name = 'ADAM'));\\n\\nSEQUENTIAL(PARALLEL(BUILDINDEX(EmployeeKey, OVERWRITE),\\n BUILDINDEX(EmployeeNameKey, OVERWRITE)),\\n PARALLEL(sample2,\\n sample3));
\\nRemember that ECL is a declarative language -- you are not writing executable code!! There are just a few areas in ECL where you need to really think about performance, and this isn't one of them. There is no reason not to simply define two separate functions that take different parameters but do essentially the same thing.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-04-10 17:12:35\" },\n\t{ \"post_id\": 7309, \"topic_id\": 1676, \"forum_id\": 8, \"post_subject\": \"Re: Layout of Index ...\", \"username\": \"chanbchen\", \"post_text\": \"Thank you again for the reply.\\n\\nThat brings up my next question:\\n\\nSay I have another index built on the same dataset but with a different key field.\\n\\n\\nEmployeeNameKey := INDEX(EmployeeDS, {name}, {EmployeeDS}, '~test::employeeNameKey');\\nBUILDINDEX(EmployeeNameKey, OVERWRITE);\\n
\\n\\nThere are times when I would need to use one index and there are times when I would need to use the other index. But I would be able to write a common function to handle results from both the index so that I need not transform the index result everytime (this seems to be a performance overhead).\\n\\nWhat can I do to achieve this?\\n\\n\\nEmployeeLayout := RECORD\\n\\tUNSIGNED id;\\n\\tSTRING15 name;\\nEND;\\n\\nEmployeeDS := DATASET([{10, 'JOHN'}, {20, 'ADAM'}, {30, 'STEVE'}], EmployeeLayout);\\n\\nEmployeeKey := INDEX(EmployeeDS, {id}, {EmployeeDS}, '~test::employeeKey');\\nBUILDINDEX(EmployeeKey, OVERWRITE);\\n\\nEmployeeNameKey := INDEX(EmployeeDS, {name}, {EmployeeDS}, '~test::employeeNameKey');\\nBUILDINDEX(EmployeeNameKey, OVERWRITE);\\n\\nEmployeeFunction ( DATASET(EmployeeLayout) employees) := FUNCTION\\n\\tsal := PROJECT(employees\\n\\t\\t, TRANSFORM({unsigned id; string name; unsigned salary}\\n\\t\\t\\t\\t, SELF.SALARY := 60000; SELF := LEFT;));\\n\\tRETURN sal;\\nEND;\\n\\naSample := EmployeeKey(id = 30);\\n\\nsample2 := EmployeeFunction(aSample);\\n\\nsample2;\\n\\nanotherSample := EmployeeNameKey(name = 'ADAM');\\n\\nsample3 := EmployeeFunction(anotherSample);\\n\\nsample3;\\n
\\n\\nBottomline even though the different indexes are indexed on different fields of the employee dataset, they are all basically the same employee data and I would like to be able to treat them using a common layout. How do I do it?\\n\\nThanks\", \"post_time\": \"2015-04-10 15:23:56\" },\n\t{ \"post_id\": 7308, \"topic_id\": 1676, \"forum_id\": 8, \"post_subject\": \"Re: Layout of Index ...\", \"username\": \"rtaylor\", \"post_text\": \"Try it this way:\\nEmployeeLayout := RECORD\\n UNSIGNED id;\\n STRING name;\\nEND;\\n\\nEmployeeDS := DATASET([{10, 'JOHN'}, {20, 'ADAM'}, {30, 'STEVE'}], EmployeeLayout);\\n\\nEmployeeKey := INDEX(EmployeeDS, {id}, {EmployeeDS}, '~test::employeeKey');\\nBUILDINDEX(EmployeeKey, OVERWRITE);\\n\\n\\nEmployeeFunction ( DATASET(RECORDOF(EmployeeKey)) employees) := FUNCTION\\n sal := PROJECT(employees\\n , TRANSFORM({unsigned id; string name; unsigned salary}\\n , SELF.SALARY := 60000; SELF := LEFT;));\\n RETURN sal;\\nEND;\\n\\naSample := EmployeeKey(id = 30);\\n\\nsample2 := EmployeeFunction(aSample);\\n\\nsample2;
You're not passing a record from the DATASET, you're passing a record from the INDEX, so your FUNCTION needs to be defined to expect the INDEX layout records.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-04-10 15:10:43\" },\n\t{ \"post_id\": 7307, \"topic_id\": 1676, \"forum_id\": 8, \"post_subject\": \"Re: Layout of Index ...\", \"username\": \"chanbchen\", \"post_text\": \"Thanks again for the reply.\\n\\nHere's what I tried:\\n\\n\\nEmployeeLayout := RECORD\\n\\tUNSIGNED id;\\n\\tSTRING name;\\nEND;\\n\\nEmployeeDS := DATASET([{10, 'JOHN'}, {20, 'ADAM'}, {30, 'STEVE'}], EmployeeLayout);\\n\\nEmployeeKey := INDEX(EmployeeDS, {id}, {EmployeeDS}, '~test::employeeKey');\\nBUILDINDEX(EmployeeKey, OVERWRITE);\\n\\n\\nEmployeeFunction ( DATASET(EmployeeLayout) employees) := FUNCTION\\n\\tsal := PROJECT(employees\\n\\t\\t, TRANSFORM({unsigned id; string name; unsigned salary}\\n\\t\\t\\t\\t, SELF.SALARY := 60000; SELF := LEFT;));\\n\\tRETURN sal;\\nEND;\\n\\naSample := EmployeeKey(id = 30);\\n\\nsample2 := EmployeeFunction(aSample);\\n\\nsample2;\\n
\\n\\nI get this error:\\n\\nError: Parameter employees type mismatch - expected Table of employeelayout, given Table of <unnamed> (19, 29), 2064,
\\n\\nThe layout don't seem to match. What am I missing here ... ?\\n\\nThanks\", \"post_time\": \"2015-04-10 15:02:13\" },\n\t{ \"post_id\": 7306, \"topic_id\": 1676, \"forum_id\": 8, \"post_subject\": \"Re: Layout of Index ...\", \"username\": \"rtaylor\", \"post_text\": \"chanbchen,\\n\\nIf your id field is NOT unique then you will simply get the set of records that match the search criteria. That is also true if only one record is found. In both scenarios (using FETCH or not) the result is a record set, even if there is only one record in it. \\n\\nUsing a payload INDEX means you do not need a FETCH, therefore your performance is always better with a payload INDEX because it requires fewer disk reads to get the same data.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-04-10 14:14:46\" },\n\t{ \"post_id\": 7305, \"topic_id\": 1676, \"forum_id\": 8, \"post_subject\": \"Re: Layout of Index ...\", \"username\": \"chanbchen\", \"post_text\": \"Hi,\\n\\nThank you for the reply.\\n\\nSorry to ask a silly question - say if id field is NOT unique. In that case, would fpos be mandatory? And would it be mandatory to use fetch() function in that case? or would this be okay:\\n\\nemployeeKey(KEYED(id=30));
\\n\\nAnd should I use a transform in this case?\\n\\nThanks\", \"post_time\": \"2015-04-10 13:54:25\" },\n\t{ \"post_id\": 7303, \"topic_id\": 1676, \"forum_id\": 8, \"post_subject\": \"Re: Layout of Index ...\", \"username\": \"rtaylor\", \"post_text\": \"My question basically is say I have a function that takes dataset(recordof(indexDs)) as input; when I call this function by passing fetch, this seems to give a layout mismatch error. So the records fetched from a index - will they not have the same layout as the dataset from which the index was built originally (in this case, indexDS)?\\n\\nSo everytime I have to do a transform after the fetch. I'd think this should add a performance overhead. Or am I missing something...?
In your first post you based your question on this line of code:fetch := employeeKey(id = 20);
and called it a "fetch" but this is actually just a filter on an INDEX, which, assuming the id is unique, will return one record from the INDEX. \\n\\nBut your question about using a TRANSFORM implies that you're actually asking about using the FETCH function. Here is your code that I have modified to show only what is needed by FETCH: l := {unsigned4 id; string name};\\nds := dataset('~sample::employee', {l; unsigned __fpos {virtual(fileposition)}}, thor);\\n\\nemployeekey := index(indexDs, {id, __fpos}, '~indexes:employeekey');\\nbuildindex(employeekey, overwrite);\\n\\n//the employeekey INDEX is then used in FETCH like this:\\nfetched := FETCH(ds,employeeKey(id = 20),__fpos);
Note that I did not need to define a TRANSFORM since FETCH defaults to returning all the fields from the DATASET that you're FETCHing from.\\n\\nHowever, your code appears not to be defining a simple search-term INDEX but a payload INDEX. That means you do not need the FETCH function at all. And you also do not need the __fpos field that is only needed by FETCH, so here's that version of your code:l := {unsigned4 id; string name};\\nds := dataset('~sample::employee', l, thor);\\n\\nemployeekey := index(ds, {id}, {ds}, '~indexes:employeekey');\\nbuildindex(employeekey, overwrite);\\n\\n//the employeekey INDEX is then used without FETCH like this:\\nfetched := employeeKey(id = 20);
Again assuming the id is unique, this code will return one record from the INDEX, which will include all the fields that are in the dataset, because the payload on the INDEX adds all the rest of the fields from the DATASET that have not already been used as search terms in the INDEX.\\n\\nAlso note that the PROJECT you used is not required in either version.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-04-10 13:09:39\" },\n\t{ \"post_id\": 7301, \"topic_id\": 1676, \"forum_id\": 8, \"post_subject\": \"Re: Layout of Index ...\", \"username\": \"chanbchen\", \"post_text\": \"My question basically is say I have a function that takes dataset(recordof(indexDs)) as input; when I call this function by passing fetch, this seems to give a layout mismatch error. So the records fetched from a index - will they not have the same layout as the dataset from which the index was built originally (in this case, indexDS)?\\n\\nSo everytime I have to do a transform after the fetch. I'd think this should add a performance overhead. Or am I missing something...?\\n\\nThanks\", \"post_time\": \"2015-04-10 12:02:32\" },\n\t{ \"post_id\": 7300, \"topic_id\": 1676, \"forum_id\": 8, \"post_subject\": \"Layout of Index ...\", \"username\": \"chanbchen\", \"post_text\": \"Hi,\\n\\nSay I have a index as shown below:\\n\\n\\nl := {unsigned4 id; string name};\\nds := dataset('~sample::employee', l, thor);\\n\\nil := {l; unsigned __fpos {virtual(fileposition));\\n\\nindexDs := project(ds, transform(il, self := left));\\nemployeekey := index(indexDs, {id, __fpos}, {ids}, '~indexes:employeekey');\\nbuildindex(employeekey, overwrite);\\n
\\n\\nNow I do a fetch :\\n\\n\\nfetch := employeeKey(id = 20);\\n
\\n\\nWill fetch have the same layout as indexDs? Or should I do a transform?\\n\\nThanks\", \"post_time\": \"2015-04-10 11:48:35\" },\n\t{ \"post_id\": 7393, \"topic_id\": 1681, \"forum_id\": 8, \"post_subject\": \"Re: Error reading line which greater than 10 MB\", \"username\": \"ghalliday\", \"post_text\": \"It looks like it might be an instance of this issue:\\n\\nhttps://track.hpccsystems.com/browse/HPCC-12677\", \"post_time\": \"2015-04-17 12:00:03\" },\n\t{ \"post_id\": 7378, \"topic_id\": 1681, \"forum_id\": 8, \"post_subject\": \"Re: Error reading line which greater than 10 MB\", \"username\": \"bforeman\", \"post_text\": \"I am asking the original reporter to confirm.\\n\\nYou can also verify this by downloading the 5.2 HPCC VM. \\n\\nhttp://hpccsystems.com/download/hpcc-vm-image\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-16 14:10:44\" },\n\t{ \"post_id\": 7377, \"topic_id\": 1681, \"forum_id\": 8, \"post_subject\": \"Re: Error reading line which greater than 10 MB\", \"username\": \"pius_francis\", \"post_text\": \"Hi Bob,\\nThink this issue is solved in version servser 5.2.0. Can you please confirm that? Got to know from below link.\\nhttps://track.hpccsystems.com/i#browse/HPCC-12677\\nthanks,\\nPius\", \"post_time\": \"2015-04-16 13:59:23\" },\n\t{ \"post_id\": 7375, \"topic_id\": 1681, \"forum_id\": 8, \"post_subject\": \"Re: Error reading line which greater than 10 MB\", \"username\": \"pius_francis\", \"post_text\": \"Hi Bob,\\nThink this issue is resolved in version 5.2.0 . Can you please confirm ? \\nGot to know from the below link.\\nhttps://track.hpccsystems.com/i#browse/HPCC-12677\\n\\nThanks,\\nPius\", \"post_time\": \"2015-04-16 13:09:00\" },\n\t{ \"post_id\": 7372, \"topic_id\": 1681, \"forum_id\": 8, \"post_subject\": \"Re: Error reading line which greater than 10 MB\", \"username\": \"bforeman\", \"post_text\": \"Since this is an XML stream, I would try to set the TERMINATOR to identify the closing tag as the RECORD terminator. Example: (TERMINATOR(['</END-DOCUMENT>'])\\n\\nOther than that, I am out of ideas The next step would be to log this as an issue and have the development team review this.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-16 12:14:59\" },\n\t{ \"post_id\": 7370, \"topic_id\": 1681, \"forum_id\": 8, \"post_subject\": \"Re: Error reading line which greater than 10 MB\", \"username\": \"pius_francis\", \"post_text\": \"Hi Bob,\\n Thanks a lot again for reply. I figured out the error is dataset creation itself. I tried what you suggested,but it is also throwing error.Explaining you in detail.\\n\\nI am having a file in which each line is a xml. I am spraying it as delimited file and reading it as csv. While creating the dataset from the sprayed file , i encounter this issue. \\nError: System error: 0: Graph[1], csvread[2]: SLAVE 10.144.110.3:21000: File ~hpcc_serialset3 contained a line of length greater than 10485760 bytes. (0, 0), 0, \\nCode : \\n\\n#option('outputlimit',2000)\\nwordLayout := RECORD,maxlength(20000000)\\n\\tSTRING word;\\nEND;\\nsourceFile := DATASET('~hpcc_serialset3',wordLayout,CSV(maxlength(20000000),SEPARATOR([''])));\\noutput(sourceFile,,'~francip1::input',thor);\", \"post_time\": \"2015-04-16 07:24:18\" },\n\t{ \"post_id\": 7363, \"topic_id\": 1681, \"forum_id\": 8, \"post_subject\": \"Re: Error reading line which greater than 10 MB\", \"username\": \"bforeman\", \"post_text\": \"Yes, MAXLENGTH is not valid in parsing, but what about the source RECORD of the parsing?\\n\\nSomething like this:\\n\\n
rec := RECORD,MAXLENGTH(1000000000)\\n INTEGER2 seq;\\n STRING line; //this field contains my XML to parse\\nEND;
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-15 16:25:10\" },\n\t{ \"post_id\": 7362, \"topic_id\": 1681, \"forum_id\": 8, \"post_subject\": \"Re: Error reading line which greater than 10 MB\", \"username\": \"pius_francis\", \"post_text\": \"Hi Bob,\\nThe solution provided is not suitable for xml parsing. MAXLENGTH option is not valid in xml parsing.\\n\\nI am spraying the file as xml and parsing it as xml as well. I encounter the issue while doing so. \\n\\nCan you please provide me any other alternative solution? \\n\\nThanks,\\nPius\", \"post_time\": \"2015-04-15 16:06:18\" },\n\t{ \"post_id\": 7343, \"topic_id\": 1681, \"forum_id\": 8, \"post_subject\": \"Re: Error reading line which greater than 10 MB\", \"username\": \"bforeman\", \"post_text\": \"Try adding the MAXLENGTH attribute as well to your RECORD structure.\\n\\nHere is a post where the programmer was running into a similar issue when parsing a long line. He needed to add MAXLENGTH to the RECORD as well.\\n\\nhttp://hpccsystems.com/bb/viewtopic.php?f=10&t=1661 \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-14 15:15:11\" },\n\t{ \"post_id\": 7342, \"topic_id\": 1681, \"forum_id\": 8, \"post_subject\": \"Re: Error reading line which greater than 10 MB\", \"username\": \"pius_francis\", \"post_text\": \"Thanks Hallday for ur reply.But unfortunately i am getting same error after trying your suggestion.\\n\\nIs there a limitation in HPCC, that a single record cannot be more than 10MB in length?\\n\\nPlease enlighten me.\", \"post_time\": \"2015-04-14 15:06:23\" },\n\t{ \"post_id\": 7339, \"topic_id\": 1681, \"forum_id\": 8, \"post_subject\": \"Re: Error reading line which greater than 10 MB\", \"username\": \"ghalliday\", \"post_text\": \"I think you need to add a MAXLENGTH attribute as an attribute of CSV inside the dataset definition.\\n\\nE.g.,\\n\\n\\ninputTable := dataset('~.::csvin',inputRecord,CSV(MAXLENGTH(100000000)));\\n
\\n\\nThe system is protecting you against missing line termination characters accidentally creating giant rows.\", \"post_time\": \"2015-04-14 13:17:44\" },\n\t{ \"post_id\": 7338, \"topic_id\": 1681, \"forum_id\": 8, \"post_subject\": \"Re: Error reading line which greater than 10 MB\", \"username\": \"pius_francis\", \"post_text\": \"Hi Bob,\\nThanks for your repsonse. I have already tried the suggestion which you gave. I used #option('outputlimit',1000) it failed,so i tried #option('outputlimit',2000)but still i get the same error shown below.\\n\\nError: System error: 0: Graph[6], csvread[9]: SLAVE 10.144.110.3:22000: File ~~francip1::gibberish::serialset contained a line of length greater than 10485760 bytes.\\n\\nCan you please let me know what is the reason for this issue?\", \"post_time\": \"2015-04-14 10:49:11\" },\n\t{ \"post_id\": 7332, \"topic_id\": 1681, \"forum_id\": 8, \"post_subject\": \"Re: Error reading line which greater than 10 MB\", \"username\": \"bforeman\", \"post_text\": \"See #OPTION and outputLimit \\nDefault: 10 Sets maximum size (in Mb) of result stored in workunit. \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-13 20:13:25\" },\n\t{ \"post_id\": 7317, \"topic_id\": 1681, \"forum_id\": 8, \"post_subject\": \"Error reading line which greater than 10 MB\", \"username\": \"pius_francis\", \"post_text\": \"I am having a dataset where each record corresponds to a xml file. I am getting an erro while processing the dataset if the size of the record or xml is greater than 10 MB. Is there is a way to rectify it.\", \"post_time\": \"2015-04-11 08:48:20\" },\n\t{ \"post_id\": 7340, \"topic_id\": 1684, \"forum_id\": 8, \"post_subject\": \"Re: MP Link Closed - Fetch Query Test\", \"username\": \"bforeman\", \"post_text\": \"Hi Subbu,\\n\\nIf you are running this on a HPCC VM then you are aware that targeting hTHOR and THOR are essentially equivalent, since your VM is a single node THOR. I would be curious if you could test it on a multi-node THOR. My test on my 3-node THOR does not show this error.\\n\\nI think the development team will want to look at your THOR logs to see if something can be determined prior to the ERROR log. If you could attach the THOR master and slave logs I will ask development to look at it.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-14 14:58:06\" },\n\t{ \"post_id\": 7336, \"topic_id\": 1684, \"forum_id\": 8, \"post_subject\": \"MP Link Closed - Fetch Query Test\", \"username\": \"kps_mani\", \"post_text\": \"Hi,\\nI have facing a strange problem of getting MP Link closed error in Thor (I am using VM with version 5.0.4). However, when I run the same ECL in HTHOR, it is working perfectly fine. I believe that this issue has been reported earlier version of the HPCC platform and the response is that it will get fixed in the later versions.\\n\\nHere is what I have in the Fetch Query\\n\\nImport $;\\nbasefile := $.File_Persons_Slim.FilePlus;\\nbasekey := $.File_Persons_Slim.IDX_CSZ_lname_fname;\\ncszfile := $.File_LookupCSZ.FilePlus;\\ncszkey := $.File_LookupCSZ.IDX_St_City;\\n\\nEXPORT Fetch_Persons_StateLFName (STRING25 LName, STRING15 FName, String2 StateID) := FUNCTION\\n\\n\\tStateRecs := FETCH(cszfile, cszkey(State=StateID), RIGHT.RecPos);\\n\\tSetCSZIDs := SET(StateRecs, CSZ_ID);\\n\\tFilteredKey := IF (FName ='', basekey(CSZ_ID IN SetCSZIDs, LastName=LName), basekey(CSZ_ID IN SetCSZIDs, LastName=LName, FirstName=FName));\\n\\tFetchPersons := FETCH(Basefile, FilteredKey, Right.RecPos);\\n\\t\\n\\tOutRec := RECORD\\n\\t\\tRECORDOF(basefile) AND NOT [RecPos, CSZ_ID];\\n\\t\\tRECORDOF(cszfile) AND NOT [RecPos, CSZ_ID];\\n\\tEND;\\n\\n\\tOutRec JoinThem(cszfile R, basefile L) := TRANSFORM\\n\\t\\tSELF := L;\\n\\t\\tSELF := R;\\n\\tEND; \\n\\n\\tRETURN JOIN(StateRecs, FetchPersons, LEFT.CSZ_ID = RIGHT.CSZ_ID, JoinThem(LEFT, RIGHT), ALL); \\nEND;\\n\\n\\nHere is what I have in the BWR for testing the above Fetch Query.\\nimport $;\\nOutput($.Fetch_Persons_StateLFName('SMITH','', 'PA'));\\n\\nIt seems pretty simple. However, it is throwing the MP Link closed error while running the BWR. There was no error while compiling the Export function listed above. \\n\\nRegards,\\nSubbu\", \"post_time\": \"2015-04-14 04:25:18\" },\n\t{ \"post_id\": 7355, \"topic_id\": 1685, \"forum_id\": 8, \"post_subject\": \"Re: Regarding running BWR_Specificities in SALT.\", \"username\": \"bforeman\", \"post_text\": \"Can you post your first 10 errors? As I said, sometimes fixing the first one can fix all of the rest. If you say your folder structure is correct, you may be missing a definition somewhere. Without seeing your specific errors I can only guess.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-15 12:08:50\" },\n\t{ \"post_id\": 7352, \"topic_id\": 1685, \"forum_id\": 8, \"post_subject\": \"Re: Regarding running BWR_Specificities in SALT.\", \"username\": \"Abhishek_M04\", \"post_text\": \"Hi Bob,\\n\\nThe errors are coming since specificities files are not getting correctly generated and it is becoming tedious to fix 100+ errors, yes the folder structures are maintained correctly as shown in the videos, can you please provide inputs?\\n\\n\\nRegards,\\nAbhishek,\\n9962210131.\", \"post_time\": \"2015-04-15 05:43:47\" },\n\t{ \"post_id\": 7341, \"topic_id\": 1685, \"forum_id\": 8, \"post_subject\": \"Re: Regarding running BWR_Specificities in SALT.\", \"username\": \"bforeman\", \"post_text\": \"Hi Abhishek,\\n\\nWhen you get that many errors, usually it is caused by a referencing error, and correcting one error can resolve all others.\\n\\nDoes your repository folder layout match the layout shown in the training videos?\\n\\nWhat specific errors are you getting? That would be a good start to finding your issue.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-14 15:02:37\" },\n\t{ \"post_id\": 7337, \"topic_id\": 1685, \"forum_id\": 8, \"post_subject\": \"Regarding running BWR_Specificities in SALT.\", \"username\": \"Abhishek_M04\", \"post_text\": \"Hi Team,\\n\\nCan you please let me know that how can I generate specificities file brand new that is when I am following the training videos, my previously generated specificities field values and other staffs are creating impacts hence I am getting 102 errors even though in the training slides its showing error free while running BWR_Specificities file from first Lesson in Advanced SALT section. Can you please guide me so that I can start from scratch and get the output just like it has been shown in the videos.\\n\\nA quick reply would help since its urgent.\\n\\n\\nRegards,\\nAbhishek.\", \"post_time\": \"2015-04-14 07:49:08\" },\n\t{ \"post_id\": 7358, \"topic_id\": 1687, \"forum_id\": 8, \"post_subject\": \"Re: Parallelism\", \"username\": \"bforeman\", \"post_text\": \"Hi Chen,\\n\\nYes! And of course, if one output depended on completing before running the second, you could wrap each action inside of a SEQUENTIAL statement.\\n\\nThe output graph of this workunit will also verify that these two OUTPUTs are running in parallel.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-15 14:01:46\" },\n\t{ \"post_id\": 7357, \"topic_id\": 1687, \"forum_id\": 8, \"post_subject\": \"Parallelism\", \"username\": \"chanbchen\", \"post_text\": \"\\n\\nresult1 := operation1(input1);\\nresult2 := operation2(input2);\\n\\noutput(result1);\\noutput(result2);\\n\\n
\\n\\nIn the above example, operation1 and 2 are mutually exclusive operations and one does not depend on the other for it's input. So, will operation1 and operation2 run in parallel by default?\\n\\nPlease advise.\\n\\nThanks\\nChan\", \"post_time\": \"2015-04-15 13:39:48\" },\n\t{ \"post_id\": 7390, \"topic_id\": 1688, \"forum_id\": 8, \"post_subject\": \"Re: Query Cache\", \"username\": \"chanbchen\", \"post_text\": \"Hi,\\n\\nIs the whole index cached? or is only the result of the index read cached?\\n\\nThanks\\nChan\", \"post_time\": \"2015-04-16 21:18:32\" },\n\t{ \"post_id\": 7376, \"topic_id\": 1688, \"forum_id\": 8, \"post_subject\": \"Re: Query Cache\", \"username\": \"bforeman\", \"post_text\": \"Hi Chen,\\n\\nNo, when you construct a workunit that accesses an INDEX multiple times, it is auto-cached. \\n\\nIt's when you close one query and call another where the PRELOAD would come in handy, and keep the file in memory as you are stringing one query to another.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-16 13:13:08\" },\n\t{ \"post_id\": 7374, \"topic_id\": 1688, \"forum_id\": 8, \"post_subject\": \"Re: Query Cache\", \"username\": \"chanbchen\", \"post_text\": \"Hi Bob,\\n\\nThanks for the reply.\\n\\nJust wanted to clarify - so if I do NOT use the PRELOAD option, the caching will NOT be in effect?\\n\\nThanks\\nChan\", \"post_time\": \"2015-04-16 13:07:12\" },\n\t{ \"post_id\": 7373, \"topic_id\": 1688, \"forum_id\": 8, \"post_subject\": \"Re: Query Cache\", \"username\": \"bforeman\", \"post_text\": \"Hi Chan,\\n\\nWhen repeated calls to a query are inline (in the same file) the auto-caching is in effect. \\n\\nYou can ensure that the INDEX file is left in memory for calls to other queries that use the same index by using the PRELOAD option.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-16 12:42:12\" },\n\t{ \"post_id\": 7368, \"topic_id\": 1688, \"forum_id\": 8, \"post_subject\": \"Query Cache\", \"username\": \"chanbchen\", \"post_text\": \"Hi,\\n\\nSay I have employee data in a dataset employeeDS indexed on the employee id field empId.\\n\\nSay on one of the nodes on a multi node thor, I happen to run an index query employeeKey(empId = 500) multiple times while running my application. Would HPCC have a built in auto cache mechanism of some sort wherein repeated occurrences of the same index reads are stored and read from a cache instead of reading from the index's logical file everytime?\\n\\nThanks\\nChan\", \"post_time\": \"2015-04-15 21:17:41\" },\n\t{ \"post_id\": 7432, \"topic_id\": 1693, \"forum_id\": 8, \"post_subject\": \"Re: Child Queries\", \"username\": \"dabayliss\", \"post_text\": \"What release of the system are you using? There was a time when THOR could not support the use of various primitives (sort/join/rollup etc) inside a transform. \\n\\nBut that was fixed a long, long time ago...\", \"post_time\": \"2015-04-23 01:13:53\" },\n\t{ \"post_id\": 7431, \"topic_id\": 1693, \"forum_id\": 8, \"post_subject\": \"Child Queries\", \"username\": \"chanbchen\", \"post_text\": \"Hi,\\n\\nI have seen this error quite a few times:\\n\\nThor master does not support the execution of child queries
\\n\\nAnd I managed to get around this error.\\n\\nBut I was always curious to understand what this error means. What exactly is a child query? How can I simulate one (I always wanted to see how the graph of a child query looks like but never managed to view the graphs as they were all huge when I got this error and the graph view plugin always crashed)?\\n\\nThanks\\nChan\", \"post_time\": \"2015-04-22 12:30:04\" },\n\t{ \"post_id\": 7471, \"topic_id\": 1696, \"forum_id\": 8, \"post_subject\": \"Re: PARALLEL\", \"username\": \"ghalliday\", \"post_text\": \"The answer depends on the context that o1 and o2 are used in.\\n\\nIf you say\\n\\n\\noutput(o1);\\noutput(o2);\\n
\\n\\nThen it is likely that they will be done one after another.\\n\\nIf the code is\\n\\noutput(o1 + o2);\\n
\\n\\nThen they will be done in parallel.\\n\\nIf the activities are executed in parallel, then normally the code will be being executed on multiple threads - so both will be executed at the same time - it is impossible to predict how the sequence of processing rows would proceed.\\n\\n(The operating system is likely to serialize access to the disk files, but everything else should be possible to execute in parrallel.)\\n\\nI don't think there is currently a way of explicitly requesting multiple outputs execute in parallel. Often the extra seeks that it would introduce on the disk files would outweigh the advantages in executing them both at the same time, but if the processing is very expensive the balance may well change.\\n\\n\\n(I have added https://track.hpccsystems.com/browse/HPCC-13468 to allow us to discuss that issue.)\", \"post_time\": \"2015-04-27 16:34:01\" },\n\t{ \"post_id\": 7439, \"topic_id\": 1696, \"forum_id\": 8, \"post_subject\": \"PARALLEL\", \"username\": \"chanbchen\", \"post_text\": \"Hi,\\n\\nSay I have 2 distributed datasets (on say a 20 node thor):\\n\\n\\nds1Dist := distribute(ds1, hash32(rid));\\nds2Dist := distribute(ds2, hash32(rid));\\n
\\n\\nSay I run 2 operations - 1 on each distributed dataset:\\n\\n\\no1 := project(ds1Dist, runOperation1(left), local);\\no2 := project(ds2Dist, runOperation2(left), local);\\n
\\n\\nThese 2 operations are mutually explusive and are not dependent on one another in any way. So I believe both will run in parallel. Is that correct?\\n\\nNow, say I have 20 million records on each of ds1 and ds2. And say I have 20 nodes. So each node will have 1 million records each of ds1Dist and ds2Dist. So if operations 1 and 2 run in parallel, what does that mean exactly? Will thor alternate between op\\noperations 1 and 2 (say 1 record at a time) so that both run in parallel? How will be the sequence here?\\n\\nThanks\\nChan\", \"post_time\": \"2015-04-23 16:21:32\" },\n\t{ \"post_id\": 7457, \"topic_id\": 1697, \"forum_id\": 8, \"post_subject\": \"Re: Applying Regex within the contents of XML\", \"username\": \"dnordahl\", \"post_text\": \"That worked great. And swapping in my original data structure (wikiArticles), I was able to get the titles included with:\\n\\n\\nresults := RECORD\\n wikiArticles.title;\\n STRING braces := MatchText(squareBraces);\\n STRING linkTitles := MatchText(wikiLinkPattern[1]);\\nEND;\\n
\\n\\nAnd once I get it defined as a table object, it looks like I can probably join to a copy of itself to filter out the dead links. \\n\\nThanks a bunch!\", \"post_time\": \"2015-04-24 21:44:58\" },\n\t{ \"post_id\": 7456, \"topic_id\": 1697, \"forum_id\": 8, \"post_subject\": \"Re: Applying Regex within the contents of XML\", \"username\": \"Mragesh\", \"post_text\": \"For the second part, let us know if this is what you wanted:\\n\\ndatafile := DATASET([{'one','blah blah [[Alexander of Paris|poet named Alexander]] bldfkas'}, \\n {'two','blah blah [[Alexander of Paris]] bldfkas'}], {STRING title,STRING body});\\n\\nPATTERN squareBraces := PATTERN('\\\\\\\\[\\\\\\\\[');\\nPATTERN wikiLinkPattern := PATTERN('([a-zA-Z0-9 ]+)(?=\\\\\\\\||\\\\\\\\]\\\\\\\\])');\\nRULE allPatts := squareBraces wikiLinkPattern;\\nresults := RECORD\\n\\tdataFile;\\n\\tSTRING braces := MatchText(squareBraces);\\n STRING linkTitles := MatchText(wikiLinkPattern[1]);\\nEND;\\n\\nOUTPUT(PARSE(datafile,body,allPatts,results,SCAN ALL));\\n
\\n\\nYou can use "TABLE" to get vertical slices of the fields you want and discard the rest.\", \"post_time\": \"2015-04-24 21:34:16\" },\n\t{ \"post_id\": 7455, \"topic_id\": 1697, \"forum_id\": 8, \"post_subject\": \"Re: Applying Regex within the contents of XML\", \"username\": \"Mragesh\", \"post_text\": \"For the first part of your question, You might want to break up reg-ex into multiple patterns and use the pattern you want to.\\ndatafile := DATASET([{'blah blah [[Alexander of Paris|poet named Alexander]] bldfkas'}, \\n {'blah blah [[Alexander of Paris]] bldfkas'}], {STRING body});\\n\\nPATTERN squareBraces := PATTERN('\\\\\\\\[\\\\\\\\[');\\nPATTERN wikiLinkPattern := PATTERN('([a-zA-Z0-9 ]+)(?=\\\\\\\\||\\\\\\\\]\\\\\\\\])');\\nRULE allPatts := squareBraces wikiLinkPattern;\\nresults := RECORD\\n\\tSTRING braces := MatchText(squareBraces);\\n STRING linkTitles := MatchText(wikiLinkPattern[1]);\\nEND;\\n\\nOUTPUT(PARSE(datafile,body,allPatts,results,SCAN ALL));\\n
\\nNow you can use the column "linkTitles" for the desired result.\", \"post_time\": \"2015-04-24 21:23:13\" },\n\t{ \"post_id\": 7454, \"topic_id\": 1697, \"forum_id\": 8, \"post_subject\": \"Re: Applying Regex within the contents of XML\", \"username\": \"dnordahl\", \"post_text\": \"I got it mostly working, however it's returning the two leading square brackets with the match returned which it doesn't do with REGEXFIND. The documentation says that the () is how to define the groupings so I'm not sure what I'm doing incorrectly:\\n\\n\\ndatafile := DATASET([{'blah blah [[Alexander of Paris|poet named Alexander]] bldfkas'}, \\n {'blah blah [[Alexander of Paris]] bldfkas'}], {STRING body});\\n\\nPATTERN wikiLinkPattern := PATTERN('(\\\\\\\\[\\\\\\\\[)([a-zA-Z0-9 ]+)(?=\\\\\\\\||\\\\\\\\]\\\\\\\\])');\\nresults := RECORD\\n STRING linkTitles := MatchText(wikiLinkPattern[1]);\\nEND;\\n\\nOUTPUT(PARSE(datafile,body,wikiLinkPattern,results,SCAN ALL));\\n
\\n\\nYields:\\n\\n[[Alexander of Paris\\n[[Alexander of Paris\\n\\nOnce I point this to my actual wikiArticles record set, how can I tie the title field to the matches it parses out of each corresponding body field?\", \"post_time\": \"2015-04-24 20:41:19\" },\n\t{ \"post_id\": 7453, \"topic_id\": 1697, \"forum_id\": 8, \"post_subject\": \"Re: Applying Regex within the contents of XML\", \"username\": \"dnordahl\", \"post_text\": \"Thanks for helping pin it down to the regex pattern compatibility. It looks like the ?: is the part it doesn't like. I'll play around with getting an equivalent regex using the supported syntax choices.\", \"post_time\": \"2015-04-24 20:04:08\" },\n\t{ \"post_id\": 7451, \"topic_id\": 1697, \"forum_id\": 8, \"post_subject\": \"Re: Applying Regex within the contents of XML\", \"username\": \"bforeman\", \"post_text\": \"Hi,\\n\\nIf you want to use a regular expression in a PATTERN statement in ECL, it has to look as follows:\\n\\nPATTERN mypattern PATTERN('regexphere');
\\n\\nThat said, your regex is not valid in this context, using:\\n\\nPATTERN wikiLinkPattern := PATTERN('\\\\\\\\[\\\\\\\\[([\\\\\\\\w ]+)(?:\\\\\\\\||\\\\\\\\]\\\\\\\\])');
\\n\\nGives me an "Illegal Pattern" error.\\n\\nYou may have to tweak your expression to use in parsing. The rules for valid regular expression context can be found here:\\n\\nhttp://hpccsystems.com/download/docs/ecl-language-reference/html/ParsePattern_Definitions.html\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-24 19:53:48\" },\n\t{ \"post_id\": 7450, \"topic_id\": 1697, \"forum_id\": 8, \"post_subject\": \"Re: Applying Regex within the contents of XML\", \"username\": \"Mragesh\", \"post_text\": \"You might want to look at the reg-ex, it might not be supported by PATTERN.\\nHere is something I tried, not exactly what you want but it worked:\\n\\n\\nPATTERN wikiLinkRegex := PATTERN('[a-zA-Z]+ ');\\nwikiArticles := DATASET([{'blah blah [[Alexander of Paris|poet named Alexander]] bldfkas'},\\n\\t\\t\\t\\t\\t\\t\\t\\t{'blah blah [[Alexander of Paris]] bldfkas'}], {STRING body});\\n\\nresults := RECORD\\n STRING linkTitles := MatchText(wikiLinkRegex);\\nEND;\\n\\nOUTPUT(PARSE(wikiArticles,body,wikiLinkRegex,results,FIRST));\\n
\", \"post_time\": \"2015-04-24 19:42:21\" },\n\t{ \"post_id\": 7448, \"topic_id\": 1697, \"forum_id\": 8, \"post_subject\": \"Applying Regex within the contents of XML\", \"username\": \"dnordahl\", \"post_text\": \"I'm trying to learn ECL with a project to parse wikipedia dataset XML and perform a page rank algorithm on it. The first step is to extract the titles and link titles from the body, and put in CSV with two columns, and remove any links that don't contain match a title in the dataset. \\n\\nSo far I've managed to parse the XML successfully and get a regex pattern matching correctly in ECL. But I cannot seem to successfully apply the examples of the PARSE function which I've found in the documentation online to execute my regex on my dataset.\\n\\nBtw, I have found the HPCC wiki page rank sample code online, but it is done on an older sql based wikipedia export format. \\n\\nThe following works for me:\\n\\n\\narticle := RECORD\\n STRING title; //data from title tag -- tag name matches field name\\n STRING body {XPATH('revision/text')}; //article body from text tag, renaming the field\\nEND;\\nwikiArticles := DATASET('~wikipedia-cs.xml',article,XML('mediawiki/page'));\\nOUTPUT(wikiArticles);\\n\\n// Verify Regex:\\n\\nwikiLinkRegex := '\\\\\\\\[\\\\\\\\[([\\\\\\\\w ]+)(?:\\\\\\\\||\\\\\\\\]\\\\\\\\])';\\ntestContent := 'blah blah [[Alexander of Paris|poet named Alexander]] bldfkas';\\ntestContent2 := 'blah blah [[Alexander of Paris]] bldfkas';\\nOUTPUT(REGEXFIND(wikiLinkRegex, testContent, 1)); // returns 'Alexander of Paris'\\nOUTPUT(REGEXFIND(wikiLinkRegex, testContent2, 1)); // returns 'Alexander of Paris'\\n
\\n\\nHowever, all my attempts to test functionality of combing regex and the parse function do not work:\\n\\n\\nPATTERN wikiLinkPattern := wikiLinkRegex;\\nresults := RECORD\\n STRING linkTitles := MatchText(wikiLinkPattern);\\nEND;\\nOUTPUT(PARSE(wikiArticles,body,wikiLinkPattern,results,SCAN ALL));\\n
\\n\\nIt would be handy if there were a version of REGEXFIND that would return a list of all matches, because that's essentially all I need. Ideally I would just like to get to a dataset of these:\\n\\n\\narticleLink := RECORD\\n STRING title; \\n STRING linkedTitle;\\nEND;\\n
\", \"post_time\": \"2015-04-24 18:36:23\" },\n\t{ \"post_id\": 7467, \"topic_id\": 1699, \"forum_id\": 8, \"post_subject\": \"Re: Case insensitive JOIN and deduplication\", \"username\": \"dnordahl\", \"post_text\": \"Yes.. Removing the col3 did the trick. \\n\\nThank you!\", \"post_time\": \"2015-04-27 14:19:39\" },\n\t{ \"post_id\": 7466, \"topic_id\": 1699, \"forum_id\": 8, \"post_subject\": \"Re: Case insensitive JOIN and deduplication\", \"username\": \"DSC\", \"post_text\": \"If you want to sum the values on only one field, you need to supply only that one field to the TABLE function. Removing the 'col3' should do that:\\n\\nIMPORT Std;\\n\\nmySet := DATASET([{'A', 1, 'C', 1},{'B', 1, 'A', 2},{'C', 2, 'A', 2},{'C', 2, 'B', 1}], {STRING col1, UNSIGNED col2, STRING col3, UNSIGNED col4});\\n\\nr1 := ROLLUP\\n (\\n mySet,\\n TRANSFORM(LEFT),\\n Std.Str.ToUpperCase(col1), col2, col4\\n );\\n\\nmyAggregatedColSet := TABLE(r1,{col1, newCol := SUM(GROUP, col2 + col4)}, col1 ); \\n\\nOUTPUT(myAggregatedColSet);
\\nThat results in only three rows. Is that what you were looking for?\\n\\nDan\", \"post_time\": \"2015-04-27 13:58:11\" },\n\t{ \"post_id\": 7465, \"topic_id\": 1699, \"forum_id\": 8, \"post_subject\": \"Re: Case insensitive JOIN and deduplication\", \"username\": \"dnordahl\", \"post_text\": \"Adding GROUP to the SUM function made the warning go away, but its still not grouping the rows for C. I'm expecting to have three rows from the output, where the last is C 7\", \"post_time\": \"2015-04-27 13:51:23\" },\n\t{ \"post_id\": 7464, \"topic_id\": 1699, \"forum_id\": 8, \"post_subject\": \"Re: Case insensitive JOIN and deduplication\", \"username\": \"DSC\", \"post_text\": \"This may do what you want:\\n\\nIMPORT Std;\\n\\nmySet := DATASET([{'A', 1, 'C', 1},{'B', 1, 'A', 2},{'C', 2, 'A', 2},{'C', 2, 'B', 1}], {STRING col1, UNSIGNED col2, STRING col3, UNSIGNED col4});\\n\\nr1 := ROLLUP\\n\\t(\\n\\t\\tmySet,\\n\\t\\tTRANSFORM(LEFT),\\n\\t\\tStd.Str.ToUpperCase(col1), col2, col4\\n\\t);\\n\\nmyAggregatedColSet := TABLE(r1,{col1, newCol := SUM(GROUP, col2 + col4)}, col1, col3 ); \\n\\nOUTPUT(myAggregatedColSet);
\\nWithin the ROLLUP, the fields you list for the match don't really have to be field names. They really just need to resolve to scalar values. So, you can perform your case conversion there to effectively perform a case-insensitive ROLLUP.\\n\\nAs far as the warning goes, use the GROUP keyword in your computation to limit that computation to the unique field value combinations cited in the TABLE. That brings col2 and col4 into scope.\\n\\nHope this helps.\\n\\nDan\", \"post_time\": \"2015-04-27 11:02:29\" },\n\t{ \"post_id\": 7461, \"topic_id\": 1699, \"forum_id\": 8, \"post_subject\": \"Re: Case insensitive JOIN and deduplication\", \"username\": \"dnordahl\", \"post_text\": \"Along the lines of unexpected duplicate or un-merged columns, I'm also getting two columns in the example below that aren't aggregated as I was expecting:\\n\\n\\nmySet := DATASET([{'A', 1, 'C', 1},{'B', 1, 'A', 2},{'C', 2, 'A', 2},{'C', 2, 'B', 1}], {STRING col1, UNSIGNED col2, STRING col3, UNSIGNED col4});\\n\\nr1 := ROLLUP(mySet, TRANSFORM(LEFT), col1, col2, col4);\\nmyAggregatedColSet := TABLE(r1, {col1, newCol := SUM( col2 + col4)}, col1, col3 ); \\n\\nOUTPUT(myAggregatedColSet);\\n
\\n\\nYields:\\n\\n\\nCol NewCol\\nA 2\\nB 3\\nC 4\\nC 3\\n
\\n\\nIt also has a warning: Field 'newCol' in TABLE does not appear to be properly defined by grouping conditions (4, 68), 2168, unknown\\n\\nHow can I modify the sample code above to properly aggregate for all distinct values in col1?\", \"post_time\": \"2015-04-26 23:29:50\" },\n\t{ \"post_id\": 7459, \"topic_id\": 1699, \"forum_id\": 8, \"post_subject\": \"Case insensitive JOIN and deduplication\", \"username\": \"dnordahl\", \"post_text\": \"I'm having trouble figuring out how to do a case insensitive inner join in on the sample below: (Or at least perform a case insensitive de-dup the results of the join).\\n\\n\\nIMPORT STD;\\n\\narticleLink := RECORD\\n\\tSTRING title := parsedLinks.title;\\n\\tSTRING linkTitle := parsedLinks.linkTitle;\\nEND;\\n\\narticleLinks := DATASET([\\n{'A', 'B'},{'A', 'B'},{'A', 'b'},\\n{'B', 'C'},{'B', 'C'},\\n{'D', 'E'},\\n{'B', 'G'},\\n{'B', 'D'}], articleLink);\\n\\ndeadLinksFiltered := JOIN(articleLinks, articleLinks, STD.Str.ToLowerCase(LEFT.linkTitle)=STD.Str.ToLowerCase(RIGHT.title), INNER, KEEP(1));\\n\\n
\\n\\nThe results I get are:\\n\\n\\nA\\tB\\nA\\tB\\nA\\tb\\nB\\tD\\n
\\n\\nThe KEEP(1) seems to prevent a lot of duplicates, but I still would like to have complete deduplication including any that would match case insensitive. Is it possible to define a join function of some sort to avoid two steps of computation?\", \"post_time\": \"2015-04-25 00:07:39\" },\n\t{ \"post_id\": 7470, \"topic_id\": 1700, \"forum_id\": 8, \"post_subject\": \"Re: HTTPCall Http Headers option\", \"username\": \"bforeman\", \"post_text\": \"Hi Luc,\\n\\nI think a JIRA is a great idea! In my opinion the statement should have that capability as an option.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-27 15:59:56\" },\n\t{ \"post_id\": 7460, \"topic_id\": 1700, \"forum_id\": 8, \"post_subject\": \"HTTPCall Http Headers option\", \"username\": \"lpezet\", \"post_text\": \"Hi!\\n\\nI was thinking of using HTTPCall to collect data from REST APIs.\\nFor example, weather data: http://www.ncdc.noaa.gov/cdo-web/webservices/v2\\n\\nI could setup an Event (CRON) requesting weather data every hour or so, and storing it on Thor for further processing. Problem is that most REST APIs out there require "header based" authentication (Basic or other, like the "token" header for that NOAA REST service) and HTTPCall doesn't provide any control over http headers.\\n\\nIs that the wrong use-case for HTTPCall?\\nIf not, would it be sensical and feasible to add support for http headers in HTTPCall function?\\n(I'd happily submit a JIRA Suggestion or Improvement if so )\\n\\nThanks!\\nLuc.\", \"post_time\": \"2015-04-25 03:38:52\" },\n\t{ \"post_id\": 7531, \"topic_id\": 1701, \"forum_id\": 8, \"post_subject\": \"Re: full keyed join in roxie\", \"username\": \"bforeman\", \"post_text\": \"
"record of KEYED index does not contain references to the dataset" error comes even though the index have the file position in the right order, do you have any other suggestions why it may fail?\\n
\\n\\nSomething in your INDEX that you are using in the KEYED option does not match the right dataset of the JOIN. That's what it is telling me. If that is not the case, could you put together an example with an inline dataset that can reproduce the issue?\\n\\nLook at the KEYED JOIN example in the ECL Watch Playground to help you with that.\\n\\nRegarding the 20 node THOR to single node ROXIE, it is good for testing perhaps, but other than that you are losing the concurrent power of ROXIE by limiting it to only one node. It's simply a matter of performance.\\n\\nSome smart guys wrote this a couple of years ago:\\nhttps://wiki.hpccsystems.com/display/hpcc/Sample+Sizing+Guide+for+HPCC+-+High+Data+volume+-+Typical+scenario\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-05-04 17:43:43\" },\n\t{ \"post_id\": 7530, \"topic_id\": 1701, \"forum_id\": 8, \"post_subject\": \"Re: full keyed join in roxie\", \"username\": \"omnibuzz\", \"post_text\": \"Thanks, Bob. I understand. So, given that "record of KEYED index does not contain references to the dataset" error comes even though the index have the file position in the right order, do you have any other suggestions why it may fail?\\n\\nI also have a question on your side comment \\nWhy should a 20 node thor not publish to a single node Roxie? Is there an operational issue?\\nHere is a cooked up situation. How would you size Thor and Roxie for this?\\nMy input dataset to Thor is about 1TB. But, after doing the transformations, my resulting payload index is just 2GB, which needs to be pushed to Roxie. \\n\\nCheers\\nSrini\", \"post_time\": \"2015-05-04 17:14:44\" },\n\t{ \"post_id\": 7528, \"topic_id\": 1701, \"forum_id\": 8, \"post_subject\": \"Re: full keyed join in roxie\", \"username\": \"bforeman\", \"post_text\": \"Hi Srini,\\n\\nHere is a related topic:\\n\\nhttp://hpccsystems.com/bb/viewtopic.php?f=8&t=1650&sid=a496f108d190358aa67d2e514c24944e\\n\\nI'm sure you know that having a 20-node THOR copy to a one way ROXIE is not a best practice
\\n\\nNevertheless, here is what is happening behind the scenes when you publish from THOR to ROXIE.\\n\\n
4 Node THOR >> 2 Node ROXIE\\nMetaKey (32K) Node 1 contains:\\nPart 1 Metakey(32K) (to Farmer)\\nPart 2 Part 1 and Part 2 (to Data Channel – Slave)\\nPart 3 Node 2 contains:\\n Metakey(32K) (to Farmer)\\n Part 3 and Part 4 (to Data Channel – Slave)\\n\\nOne Index Copy (Publish), bad configuration \\n1 Node THOR >> 2 Node ROXIE\\nMetaKey (32K) Node 1 contains:\\nPart 1 Metakey(32K) (to Farmer)\\n Part 1 (to Data Channel – Slave)\\n\\n Node 2 contains:\\n Metakey(32K) (to Farmer)\\n Part 1 (to Data Channel – Slave)\\n\\n
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-05-04 12:50:33\" },\n\t{ \"post_id\": 7526, \"topic_id\": 1701, \"forum_id\": 8, \"post_subject\": \"Re: full keyed join in roxie\", \"username\": \"omnibuzz\", \"post_text\": \"Bob - It has the file position in the right order. I have another question now. \\nIf my thor used to build the index is 20 nodes and Roxie 1 node. \\nYou will essentially have the file and the index split into 20 parts in Thor. How will the keyed join know which file part of the dataset to go and get the results with just the fpos from the index? Does the index go to the same part Id of the base file as itself. Let me know if I don't make sense.\\nThanks\\nSrini\", \"post_time\": \"2015-05-02 16:19:14\" },\n\t{ \"post_id\": 7476, \"topic_id\": 1701, \"forum_id\": 8, \"post_subject\": \"Re: full keyed join in roxie\", \"username\": \"bforeman\", \"post_text\": \"Hi Srini,\\nThe KEYED attribute in the Full-Keyed JOIN specifies the INDEX that will be used to FETCH into the RIGHT recordset. By definition, the compiler knows what that field is because it needs to be the last field in the INDEX. It always assumes that the last field in the INDEX is the FILEPOSITION byte record pointer.\\n\\nSo when I see the "record of KEYED index does not contain references to the dataset" error, I am assuming that your INDEX might have the FILEPOSITION in the wrong order.\\n\\nLet me ask around with some other colleagues.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-28 15:42:07\" },\n\t{ \"post_id\": 7475, \"topic_id\": 1701, \"forum_id\": 8, \"post_subject\": \"Re: full keyed join in roxie\", \"username\": \"omnibuzz\", \"post_text\": \"Hi Bob - I included the file position. In fact, I am able to use the index to FETCH from the base dataset in roxie. However, I get an error when I try to do a full keyed join.\\n\\nI have a question though. Here is the example from our Programmers guide for full key join\\nJ1 := JOIN($.DeclareData.Person.FilePlus(PersonID BETWEEN 1 AND 100),\\n $.DeclareData.Accounts,\\n LEFT.PersonID=RIGHT.PersonID,\\n Xform1(LEFT,RIGHT),\\n KEYED($.DeclareData.IDX_Accounts_PersonID));\\n
\\n\\nI don't see us specifying what is the column in $.DeclareData.IDX_Accounts_PersonID that will hold the fpos. We do that in fetch. Is there a convention that is being followed?\\nI expected that last line to look something like this....\\n\\nKEYED($.DeclareData.IDX_Accounts_PersonID,RecPos));
\\n\\nLet me know what I am missing.\\nCheers\\nSrini\", \"post_time\": \"2015-04-28 14:45:24\" },\n\t{ \"post_id\": 7474, \"topic_id\": 1701, \"forum_id\": 8, \"post_subject\": \"Re: full keyed join in roxie\", \"username\": \"bforeman\", \"post_text\": \"Hi Srini!\\n\\nDid you include the FILEPOSITION field in the index?\\n\\nExample:\\n\\nPtbl1 := DATASET(DataFile,{PtblRec,UNSIGNED8 filepos {virtual(fileposition)}},FLAT);
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-28 14:01:01\" },\n\t{ \"post_id\": 7472, \"topic_id\": 1701, \"forum_id\": 8, \"post_subject\": \"full keyed join in roxie\", \"username\": \"omnibuzz\", \"post_text\": \"I built a dataset and a keyed access index in thor. I now pushed both of these to roxie and when I try to do a full keyed join with it in a roxie query, it throws an error: "record of KEYED index does not contain references to the dataset".\\n\\nDoes that mean I need to push just the base dataset to roxie and build the keyed access index in the ROXIE side? or is there something fundamental I am missing here.\\n\\nUpdate Info: The Thor is a separate cluster, with it's own Dali. The Roxie pulls the index and the base dataset by using File.Copy.\\nCheers\\nSrini\", \"post_time\": \"2015-04-27 18:59:58\" },\n\t{ \"post_id\": 7576, \"topic_id\": 1706, \"forum_id\": 8, \"post_subject\": \"Re: Denormalize\", \"username\": \"rtaylor\", \"post_text\": \"Chen,My question is - will approach 2 be faster than approach 1?
I would just code it both ways, then run each in separate workunits against the same data (a significant amount of data, not just a small test set) several times and see which method consistently performs better on your data in your environment.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-05-11 14:48:23\" },\n\t{ \"post_id\": 7572, \"topic_id\": 1706, \"forum_id\": 8, \"post_subject\": \"Re: Denormalize\", \"username\": \"chanbchen\", \"post_text\": \"Thanks for the replies Rich.\\n\\nJust one more question please.\\n\\nAm just curious as to how many index reads does a denormalize do? Is it implemented as 1 join in essential internally?\\n\\nWhat I would like to know essentially is - say I have these 2 approaches (I have a dataset with say 10 records):\\n\\n1. In approach 1, I read the index once for each record in my dataset (and I do denormalize to load the child fataset)\\n\\n2. In approach 2, I just use 1 DENORMALIZE (instead of the join itself) instead of one index read for each record\\n\\nMy question is - will approach 2 be faster than approach 1?\\n\\nThanks\", \"post_time\": \"2015-05-11 11:12:14\" },\n\t{ \"post_id\": 7549, \"topic_id\": 1706, \"forum_id\": 8, \"post_subject\": \"Re: Denormalize\", \"username\": \"rtaylor\", \"post_text\": \"Chen,If I use denormalize instead of join, would that be a left outer join by default?
Yes. DENORMALIZE is, under the covers, just a specialized form of a left outer JOIN.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-05-06 14:14:28\" },\n\t{ \"post_id\": 7547, \"topic_id\": 1706, \"forum_id\": 8, \"post_subject\": \"Re: Denormalize\", \"username\": \"chanbchen\", \"post_text\": \"Hi,\\n\\nThanks for all the replies. I have one more question on this please.\\n\\nIf I use denormalize instead of join, would that be a left outer join by default? (If there are records in the right dataset for a record in the left, would the left record be ignored?)\\n\\n\\nThanks\\nChen\", \"post_time\": \"2015-05-06 11:21:18\" },\n\t{ \"post_id\": 7521, \"topic_id\": 1706, \"forum_id\": 8, \"post_subject\": \"Re: Denormalize\", \"username\": \"rtaylor\", \"post_text\": \"Just use the first form of DENORMALIZE\", \"post_time\": \"2015-05-01 17:36:06\" },\n\t{ \"post_id\": 7520, \"topic_id\": 1706, \"forum_id\": 8, \"post_subject\": \"Re: Denormalize\", \"username\": \"chanbchen\", \"post_text\": \"Hi,\\n\\nThanks again for the reply.\\n\\nOne more question please? What if I do not want to group by that fields? I would like to use DENORMALIZE in the place of JOIN but I do not want to group the child dataset by any field at all. How can I achieve this? Can I just skip the GROUP keyword to achieve this?\\n\\n\\nThanks\\nChen\", \"post_time\": \"2015-05-01 17:04:39\" },\n\t{ \"post_id\": 7519, \"topic_id\": 1706, \"forum_id\": 8, \"post_subject\": \"Re: Denormalize\", \"username\": \"rtaylor\", \"post_text\": \"Chen,\\n\\nYes. That's what "based on the join condition" means -- whatever child dataset fields you use in the join condition are used to group the records.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-05-01 16:55:46\" },\n\t{ \"post_id\": 7518, \"topic_id\": 1706, \"forum_id\": 8, \"post_subject\": \"Re: Denormalize\", \"username\": \"chanbchen\", \"post_text\": \"Hi,\\n\\nTHanks for the reply.\\n\\nIf I have more fields in the join condition - say f4, f5 & f6, then will grouping happen on all fields f3-6?\\n\\nThanks\\nChen\", \"post_time\": \"2015-05-01 16:51:55\" },\n\t{ \"post_id\": 7517, \"topic_id\": 1706, \"forum_id\": 8, \"post_subject\": \"Re: Denormalize\", \"username\": \"rtaylor\", \"post_text\": \"Chen,is the grouping done on f2?
No. \\n\\nThe doc says, "GROUP Specifies grouping the childrecset records based on the join condition so all the related child records are passed as a dataset parameter to the transform."\\n\\nThat means the grouping is on the child dataset (second parameter to DENORMALIZE) so it would be on your f3 field.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-05-01 16:45:03\" },\n\t{ \"post_id\": 7516, \"topic_id\": 1706, \"forum_id\": 8, \"post_subject\": \"Re: Denormalize\", \"username\": \"chanbchen\", \"post_text\": \"I am trying to use a DENORMALIZE instead of JOIN as you suggested\\n\\nI have a complex join condition and the first field in the join is id. Followed by id, I have a few other fields used in the join condition.\\n\\nSo, when I use GROUP in the DERNORMALIZE as shown in my earlier post, will grouping be done on the first field in the join condition (id in my case)?\\n\\nThanks\\nChen\", \"post_time\": \"2015-05-01 15:34:51\" },\n\t{ \"post_id\": 7515, \"topic_id\": 1706, \"forum_id\": 8, \"post_subject\": \"Re: Denormalize\", \"username\": \"chanbchen\", \"post_text\": \"When I use this line:\\n\\nDENORMALIZE(d1, d2, LEFT.f2 = RIGHT.f3, GROUP, f(LEFT, ROWS(RIGHT)))
\\n\\nis the grouping done on f2?\\n\\nTHanks\\nChen\", \"post_time\": \"2015-05-01 15:28:26\" },\n\t{ \"post_id\": 7512, \"topic_id\": 1706, \"forum_id\": 8, \"post_subject\": \"Re: Denormalize\", \"username\": \"rtaylor\", \"post_text\": \"It is probably more efficient to first join the child and the grandchild, and then join that with the parent.
Precisely the way we teach this in the Advanced ECL class, whose class exercises construct a 3-level, 4-table nested child dataset.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-05-01 12:42:28\" },\n\t{ \"post_id\": 7509, \"topic_id\": 1706, \"forum_id\": 8, \"post_subject\": \"Re: Denormalize\", \"username\": \"ghalliday\", \"post_text\": \"The simplest would be to have a project, which called a transform. That transform would contain a line:\\n\\nSELF.children := DENORMALIZE(l.children, someOtherFile);\\n\\nHowever that is likely to be not so efficient - because the join will be done for each row - which means someOtherFile may be read multiple times. If the file you are joining against is a relatively small then you might want to consider using a dictionary, especially if someOtherFile was all constants.\\n\\nIt is probably more efficient to first join the child and the grandchild, and then join that with the parent. The are generally quite a few ways of solving the same problem, the one that is the most efficient often corresponds to the simplest graph.\", \"post_time\": \"2015-05-01 08:30:32\" },\n\t{ \"post_id\": 7500, \"topic_id\": 1706, \"forum_id\": 8, \"post_subject\": \"Re: Denormalize\", \"username\": \"chanbchen\", \"post_text\": \"Amazing!\\n\\nOne more question please. \\n\\nSay, I have to do a DENORMALIZE (join basically) one more time, only this time, I would like to join the child dataset formed in the previous step against another dataset, and thereby create a child2 which would be a child dataset inside child.\\n\\nTHanks \\nChen\", \"post_time\": \"2015-04-30 16:46:24\" },\n\t{ \"post_id\": 7495, \"topic_id\": 1706, \"forum_id\": 8, \"post_subject\": \"Re: Denormalize\", \"username\": \"ghalliday\", \"post_text\": \"yes, use the DENORMALIZE keyword instead of JOIN.\\n\\nTo use a similar transform use something like\\n\\n\\nDENORMALIZE(d1, d2, LEFT.f2 = RIGHT.f3, GROUP, f(LEFT, ROWS(RIGHT)))\\n
\", \"post_time\": \"2015-04-30 14:34:28\" },\n\t{ \"post_id\": 7494, \"topic_id\": 1706, \"forum_id\": 8, \"post_subject\": \"Re: Denormalize\", \"username\": \"chanbchen\", \"post_text\": \"Hi Gavin,\\n\\nThanks for the tip. I have a slightly different version of this question.\\n\\nSay a join is creating this dataset that I showed you. Is it possible to do a denormalize during the join itself?\\n\\n\\nl1 := {string f1; string f2};\\nl2 := {string f3; string f4};\\n\\nd1 := dataset([{'v11', 'v21'}, {'v12', 'v21'}], l1);\\nd2 := dataset([{'v21', 'v41'}, {'v21', 'v42'}], l2);\\n\\nd1; d2;\\n\\njoin(d1, d2, left.f2 = right.f3);\\n
\\n\\nThis join generates the following output:\\n\\n[attachment=1:36z5a07x]norm.PNG\\n\\nI would like to denormalize this so that it looks this way:\\n\\n[attachment=0:36z5a07x]denorm.PNG\\n\\nHow can I achieve it?\\n\\nThanks\\nChen\", \"post_time\": \"2015-04-30 14:23:32\" },\n\t{ \"post_id\": 7491, \"topic_id\": 1706, \"forum_id\": 8, \"post_subject\": \"Re: Denormalize\", \"username\": \"ghalliday\", \"post_text\": \"In that situation is is simplest to use a grouped rollup:\\n\\n\\nlay := record\\n string f1;\\n string f2;\\n string f3;\\n string f4;\\nend;\\n\\nlayChild := {string f3; string f4;};\\n\\nlayNorm := record\\n string f1;\\n string f2;\\n dataset(layChild) child;\\nend;\\n\\nd := dataset([{'v11', 'v21', 'v31', 'v41'}, {'v11', 'v21', 'v32', 'v42'}, {'v11', 'v21', 'v33', 'v43'}], lay);\\n\\nlayNorm doRollup(lay l, dataset(lay) matches) := TRANSFORM\\n SELF.child := PROJECT(matches, TRANSFORM(layChild, SELF := LEFT));\\n SELF := l;\\nEND;\\n\\nr := ROLLUP(GROUP(d, f1, f2), GROUP, doRollup(LEFT, ROWS(LEFT)));\\n\\noutput(r);\\n
\", \"post_time\": \"2015-04-30 11:03:31\" },\n\t{ \"post_id\": 7490, \"topic_id\": 1706, \"forum_id\": 8, \"post_subject\": \"Re: Denormalize\", \"username\": \"chanbchen\", \"post_text\": \"I would like to denormalize d now:\\n\\n\\nlay := record\\n\\tstring f1;\\n\\tstring f2;\\n\\tstring f3;\\n\\tstring f4;\\nend;\\n\\nlayChild := {string f3; string f4;};\\n\\nlayNorm := record\\n\\tstring f1;\\n\\tstring f2;\\n\\tdataset(layChild);\\nend;\\n\\nd := dataset([{'v11', 'v21', 'v31', 'v41'}, {'v11', 'v21', 'v32', 'v42'}, {'v11', 'v21', 'v33', 'v43'}], lay);\\n
\", \"post_time\": \"2015-04-30 10:54:52\" },\n\t{ \"post_id\": 7489, \"topic_id\": 1706, \"forum_id\": 8, \"post_subject\": \"Denormalize\", \"username\": \"chanbchen\", \"post_text\": \"Hi,\\n\\nSay I have a dataset with 4 fields (f1, 2, 3 & 4) that looks like this:\\n\\n[attachment=1:3qcbg7lo]norm.PNG\\n\\nI would like to denormalize this so that it looks like this:\\n\\n[attachment=0:3qcbg7lo]denorm.PNG\\n\\nHow do I achieve this denormalize within the same dataset?\\n\\nThanks\\nChen\", \"post_time\": \"2015-04-30 10:54:12\" },\n\t{ \"post_id\": 7676, \"topic_id\": 1709, \"forum_id\": 8, \"post_subject\": \"Re: SKIP in TRANSFORM\", \"username\": \"rtaylor\", \"post_text\": \"This example is even more fun \\n
DS := DATASET(10,\\nTRANSFORM({integer number},\\nSELF.number := if(counter %2 = 1 ,skip,counter)\\n));\\n\\nds;\\n// result is \\n// 2\\n// 4\\n// 6\\n// 8\\n// 10
\", \"post_time\": \"2015-05-28 13:19:47\" },\n\t{ \"post_id\": 7673, \"topic_id\": 1709, \"forum_id\": 8, \"post_subject\": \"Re: SKIP in TRANSFORM\", \"username\": \"pius_francis\", \"post_text\": \"Hi chanbchen, \\n\\nBut remember the 'counter' will get incremented even if you SKIP. See example below\\n\\nCode:\\nDS := DATASET(10,\\nTRANSFORM({integer number},\\nSELF.number := if(counter = 1 ,skip,counter)\\n));\\n\\noutput :\\n\\n2\\n3\\n4\\n5\\n6\\n7\\n8\\n9\\n10\", \"post_time\": \"2015-05-28 10:08:06\" },\n\t{ \"post_id\": 7623, \"topic_id\": 1709, \"forum_id\": 8, \"post_subject\": \"Re: SKIP in TRANSFORM\", \"username\": \"chanbchen\", \"post_text\": \"Thank you very much for the help.\", \"post_time\": \"2015-05-20 19:05:23\" },\n\t{ \"post_id\": 7622, \"topic_id\": 1709, \"forum_id\": 8, \"post_subject\": \"Re: SKIP in TRANSFORM\", \"username\": \"rtaylor\", \"post_text\": \"Will the entire record be skipped if "condition" is false?
\\nYes\", \"post_time\": \"2015-05-20 19:03:44\" },\n\t{ \"post_id\": 7621, \"topic_id\": 1709, \"forum_id\": 8, \"post_subject\": \"Re: SKIP in TRANSFORM\", \"username\": \"chanbchen\", \"post_text\": \"So, say I have 2 fields in the output layout. Would this be good enough?\\n\\n\\nSELF.field1 := if(condition = TRUE, value1, SKIP);\\nSELF := L;\\n
\\n\\nWill the entire record be skipped if "condition" is false?\", \"post_time\": \"2015-05-20 19:02:39\" },\n\t{ \"post_id\": 7620, \"topic_id\": 1709, \"forum_id\": 8, \"post_subject\": \"Re: SKIP in TRANSFORM\", \"username\": \"rtaylor\", \"post_text\": \"Chen,\\n\\nYou only need one SKIP in the TRANSFORM if the condition will be the same for each field.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-05-20 18:56:58\" },\n\t{ \"post_id\": 7619, \"topic_id\": 1709, \"forum_id\": 8, \"post_subject\": \"Re: SKIP in TRANSFORM\", \"username\": \"chanbchen\", \"post_text\": \"Thanks for the reply.\\n\\nIf I need to calculate the SKIP condition inside the TRANSFORM in say several lines of code and do a skip at the end of the TRANSFORM, how can I do this without having to mention skip for every field in the output layout?\\n\\nThanks\\nChen\", \"post_time\": \"2015-05-20 18:53:37\" },\n\t{ \"post_id\": 7504, \"topic_id\": 1709, \"forum_id\": 8, \"post_subject\": \"Re: SKIP in TRANSFORM\", \"username\": \"rtaylor\", \"post_text\": \"Yes. Put SKIP(condition) on the TRANSFORM itself -- http://hpccsystems.com/download/docs/ecl-language-reference/html/TRANSFORM_Structure.html\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-04-30 18:25:25\" },\n\t{ \"post_id\": 7499, \"topic_id\": 1709, \"forum_id\": 8, \"post_subject\": \"SKIP in TRANSFORM\", \"username\": \"chanbchen\", \"post_text\": \"Hi,\\n\\nSay I have a TRANSFORM function churning out a layout with 100 fields. Based on one condition derived inside the TRANSFORM, I would like to either keep or skip the whole record.\\n\\nIs there a simple way to do this instead of having to do the following step for all 100 fields?\\n\\nSELF.field1 := if(condition = true, l.field1, skip);
\\n\\nThanks\\nChen\", \"post_time\": \"2015-04-30 15:57:24\" },\n\t{ \"post_id\": 7522, \"topic_id\": 1710, \"forum_id\": 8, \"post_subject\": \"Re: Mergejoin issue\", \"username\": \"bforeman\", \"post_text\": \"Hi Alex,\\n\\nIt appears that MERGEJOIN only supports LOCAL operations in THOR with the latest release.\\n\\nThis code works as expected. Note that the use of STEPPED is not needed when working with DATASETs instead of INDEXes.\\n\\n rec:=RECORD\\n\\t\\t unsigned8 keyfield;\\n string3 category;\\n END;\\n\\nf1 := DATASET([{1,'A'},{2,'B'},{3,'C'},{4,'D'},{5,'E'}],Rec);\\nf2 := DATASET([{2,'A'},{3,'B'},{3,'H'},{3,'I'},{6,'J'}],Rec); \\t\\t\\n\\t\\t\\n\\n // these have a few million records each:\\n s1:= DISTRIBUTE(sort(f1, keyfield, category),HASH32(keyfield));\\n s2:= DISTRIBUTE(sort(f2, keyfield, category),HASH32(keyfield));\\n\\n i1:= 3;\\n r1:= s1(keyfield=i1);\\n r2:= s2(keyfield=i1);\\n\\t\\tr1;\\n\\t\\tr2;\\n\\n j1 := MERGEJOIN([r1,r2], LEFT.keyfield=RIGHT.keyfield, sorted(keyfield, category),LOCAL);\\n\\t\\t\\n\\t\\tj1;
\\n\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-05-01 19:29:25\" },\n\t{ \"post_id\": 7505, \"topic_id\": 1710, \"forum_id\": 8, \"post_subject\": \"Mergejoin issue\", \"username\": \"alex\", \"post_text\": \"Hello HPCC. I'm seeing some behavior from MERGEJOIN that I don't understand. I have reduced the problem to this trivial case:\\n\\nrec:=RECORD\\n unsigned8 keyfield;\\n string3 category;\\nEND;\\n\\n// these have a few million records each:\\ns1:= sort(DATASET(f1, rec, thor), keyfield, category);\\ns2:= sort(DATASET(f2, rec, thor), keyfield, category);\\n\\ni1:= 14526414584674218874;\\nr1:= s1(keyfield=i1);\\nr2:= s2(keyfield=i1);\\n\\nj1 := MERGEJOIN([r1,r2], STEPPED(LEFT.keyfield=RIGHT.keyfield), sorted(keyfield, category), DEDUP);\\n\\n
\\n\\nr1 and r2 above have exactly one record, with the same value for the category field. I would expect the mergejoined set to also have one record. However:\\n\\n\\nr1; // prints the record\\nr2; // prints the record\\nr1-r2; // outputs an empty set, as expected\\nj1; // ALSO outputs an empty set\\n
\\n\\nWhat makes this particularly weird is that there are plenty of records in s1 and s2 that will match and be added to j1 (if I do the mergejoin on the full set, not the filtered ones). That record and some others, however, refuse to match, and I don't know what is preventing them.\\n\\nA possible connection is that when executing the code above, I get a compiler warning for the 'r1 := ...' and 'r2 := ...' lines that the condition is always false. This puzzles me, because outputting r1 and r2 shows the matching record.\\n\\nFinally, defining two one-record datasets with the same data as the filtered r1, r2 sets and running a mergejoin on them works just fine.\", \"post_time\": \"2015-04-30 19:33:33\" },\n\t{ \"post_id\": 7525, \"topic_id\": 1711, \"forum_id\": 8, \"post_subject\": \"Re: JOIN KEEP(1)\", \"username\": \"omnibuzz\", \"post_text\": \"This looks like it will work. I am worried that it may require an extra lookup on table B. But, may be not. Let me run the job next week and update you on my findings.\\nThanks\\nSrini\", \"post_time\": \"2015-05-02 16:16:01\" },\n\t{ \"post_id\": 7514, \"topic_id\": 1711, \"forum_id\": 8, \"post_subject\": \"Re: JOIN KEEP(1)\", \"username\": \"rtaylor\", \"post_text\": \"Srini,\\n\\nOK, then how about this way:SortedB := SORT(B,SortCol);\\nT := TABLE(SortedB,{UNSIGNED Num := 0,SortedB});\\n\\nRECORDOF(T) XF(T L, T R, INTEGER C) TRANSFORM\\n SELF.Num := C;\\n SELF := R;\\nEND;\\nI := ITERATE(T,XF(LEFT,RIGHT,COUNTER));\\n\\nFirstRec(TYPEOF(joinCol) JoinVal) := MIN(I(JoinCol=JoinVal),Num); \\n\\nJOIN(A,I, LEFT.joinCol = RIGHT.JoinCol AND \\n RIGHT.Num = FirstRec(LEFT.joinCol),\\n MANY LOOKUP,LEFT OUTER);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-05-01 14:09:56\" },\n\t{ \"post_id\": 7513, \"topic_id\": 1711, \"forum_id\": 8, \"post_subject\": \"Re: JOIN KEEP(1)\", \"username\": \"omnibuzz\", \"post_text\": \"Richard - Like I mentioned in my comment in the code. I cannot dedup prior to the join because the first row in the RIGHT table (after sorting) is not the row that I want. I need the first row after the join conditions are satisfied. I realize that I am not making much sense. I apologize for that. I will come up with a proper example to elucidate my question. It will take a while to build that example. Let me get back.\\nThank you for the help. Really appreciate it.\\nCheers\\nSrini\", \"post_time\": \"2015-05-01 13:35:35\" },\n\t{ \"post_id\": 7511, \"topic_id\": 1711, \"forum_id\": 8, \"post_subject\": \"Re: JOIN KEEP(1)\", \"username\": \"rtaylor\", \"post_text\": \"Srini,\\n\\nSorry -- my bad. Here's the "right" code for that example:SortedB := SORT(B,SortCol);\\nT := TABLE(SortedB,{UNSIGNED Num := 0,SortedB});\\n\\nRECORDOF(T) XF(T L, T R, INTEGER C) TRANSFORM\\n SELF.Num := IF(L.SortCol = R.SortCol, 0, C);\\n SELF := R;\\nEND;\\nI := ITERATE(T,XF(LEFT,RIGHT,COUNTER));\\nJOIN(A,I, LEFT.joinCol = RIGHT.JoinCol AND RIGHT.Num <> 0,MANY LOOKUP,LEFT OUTER);
You just use the Num field as part of the JOIN condition to make sure it joins to the one rec you want. \\n\\nBut I thought of an easier way:SortedB := SORT(B,SortCol);\\nR := DEDUP(SortedB,SortCol);\\n\\nJOIN(A,R, LEFT.joinCol = RIGHT.JoinCol,MANY LOOKUP,LEFT OUTER);
Since you only want to JOIN to a single possible record for each SortCol value, why not just get rid of all the other records?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-05-01 12:23:44\" },\n\t{ \"post_id\": 7510, \"topic_id\": 1711, \"forum_id\": 8, \"post_subject\": \"Re: JOIN KEEP(1)\", \"username\": \"omnibuzz\", \"post_text\": \"Richard - I am not able to understand how it will help.\\nThe only job of the iterate seems to be to add the Num field which is not used subsequently. I guess it was a typo and the JOIN was supposed to have RIGHT.Num <> 0.\\nIf that is the case, it seems to be an equivalent of DEDUPing the right table even before the join. I guess I did a poor job of explaining my problem. Here is a contrived example.Let me know if I am making sense.\\n\\n// Generate some random data\\nA := DATASET(20,TRANSFORM({INTEGER joinCol},SELF.joinCol := COUNTER%10 + 1));\\nB := DATASET(100,TRANSFORM({INTEGER joinCol,INTEGER SortCol},SELF.joinCol := COUNTER%10 + 1;SELF.SortCol := RANDOM()%10 + 1));\\n\\n// My suggested code.. Seems to work in hthor (this is my expected result.. Get the top 1.. But, I am not sure if it's expected behavior or random \\nSortedB := SORT(B,SortCol) : PERSIST('persisted::random');\\nSortedB;\\n// The join condition below is a lot more complex, so I cannot pre-eliminate the unwanted data in the right table before the join. \\n// I need to take the top 1 records from the right table \\n// resulting after the join but based on the sort criteria above. And it's a many to many relationship between A and B.\\n\\nJOIN(A,SortedB, LEFT.joinCol = RIGHT.JoinCol,MANY LOOKUP, KEEP(1)); \\n\\n\\n// Here is the code that you provided, for comparison.\\nT := TABLE(SortedB,{UNSIGNED Num := 0,SortedB});\\n\\nRECORDOF(T) XF(T L, T R, INTEGER C) := TRANSFORM\\n SELF.Num := IF(L.SortCol = R.SortCol, 0, C);\\n SELF := R;\\nEND;\\nI := ITERATE(T,XF(LEFT,RIGHT,COUNTER));\\nJOIN(A,I, LEFT.joinCol = RIGHT.JoinCol AND RIGHT.SortCol <> 0,MANY LOOKUP,LEFT OUTER);
\", \"post_time\": \"2015-05-01 11:37:06\" },\n\t{ \"post_id\": 7507, \"topic_id\": 1711, \"forum_id\": 8, \"post_subject\": \"Re: JOIN KEEP(1)\", \"username\": \"rtaylor\", \"post_text\": \"Srini,\\n\\nWhy not simply add a number field to your SortedB and do it like this:\\n\\nSortedB := SORT(B,SortCol);\\nT := TABLE(SortedB,{UNSIGNED Num := 0,SortedB});\\n \\nRECORDOF(T) XF(T L, T R, INTEGER C) TRANSFORM\\n SELF.Num := IF(L.SortCol = R.SortCol, 0, C);\\n SELF := R;\\nEND;\\nI := ITERATE(T,XF(LEFT,RIGHT,COUNTER));\\nJOIN(A,I, LEFT.joinCol = RIGHT.JoinCol AND RIGHT.SortCol <> 0,MANY LOOKUP,LEFT OUTER);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-04-30 20:45:23\" },\n\t{ \"post_id\": 7506, \"topic_id\": 1711, \"forum_id\": 8, \"post_subject\": \"JOIN KEEP(1)\", \"username\": \"omnibuzz\", \"post_text\": \"I want to join a LEFT table A with about a billion records with another RIGHT table B that has about 100K Records. The join will result in many RIGHT records for every LEFT record, of which I need to choose the top 1 RIGHT record from the resulting joined records based on a particular sort order. I am trying to avoid a join and a roll up and see if I can accomplish by using KEEP(1)\\n\\nWould something of this order work (Consider it a pseudocode, may contain typos)\\n\\n\\nSortedB := SORT(B,SortCol);\\nJOIN(A,SortedB, LEFT.joinCol = RIGHT.JoinCol,LOOKUP MANY, KEEP(1),LEFT);\\n
\\n\\nWill the join use the old sort order in SortedB to decide which record to keep?\\nLet me know if I need to elaborate this further.\\n\\nThanks\\nSrini\", \"post_time\": \"2015-04-30 20:06:06\" },\n\t{ \"post_id\": 7551, \"topic_id\": 1714, \"forum_id\": 8, \"post_subject\": \"Re: In built funtion to get current date in different format\", \"username\": \"rtaylor\", \"post_text\": \"Pius,\\n\\nThe STD.Date.ConvertFormat() function will convert a date from any standard format to any other.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-05-06 15:13:45\" },\n\t{ \"post_id\": 7550, \"topic_id\": 1714, \"forum_id\": 8, \"post_subject\": \"Re: In built funtion to get current date in different format\", \"username\": \"bforeman\", \"post_text\": \"Hi Pius,\\n\\nIn the latest 5.2 release the Date Standard Library has been expanded. \\n\\nYou didn't say what format you wanted, but I suspect that using Std.Date.DateToString() will get you most of the way there.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-05-06 14:53:58\" },\n\t{ \"post_id\": 7535, \"topic_id\": 1714, \"forum_id\": 8, \"post_subject\": \"In built funtion to get current date in different format\", \"username\": \"pius_francis\", \"post_text\": \"Hi all,\\nIs there any in built funtion to get current date in different formats? Currently working on a function to do the same. Just wanted to know whether its already present in ECL.\\nThanks,\\nPius\", \"post_time\": \"2015-05-05 06:13:18\" },\n\t{ \"post_id\": 7556, \"topic_id\": 1715, \"forum_id\": 8, \"post_subject\": \"Re: JDBC Driver\", \"username\": \"rodrigo.pastrana\", \"post_text\": \"Hi Chen, you can find some high level documentation here:\\nhttp://hpccsystems.com/products-and-ser ... ugins/JAPI\\n\\nLet me know if that helps.\", \"post_time\": \"2015-05-07 17:42:29\" },\n\t{ \"post_id\": 7553, \"topic_id\": 1715, \"forum_id\": 8, \"post_subject\": \"Re: JDBC Driver\", \"username\": \"chanbchen\", \"post_text\": \"Hi,\\n\\nThanks for the info. This was definitely helpful. I was wondering if I could get some documentation for this package? Like are there any current limitations in this package ... ? Any detail would be very helpful.\\n\\n\\nThanks\\nChen\", \"post_time\": \"2015-05-07 09:54:25\" },\n\t{ \"post_id\": 7539, \"topic_id\": 1715, \"forum_id\": 8, \"post_subject\": \"Re: JDBC Driver\", \"username\": \"rodrigo.pastrana\", \"post_text\": \"Hi Chen, \\nThe JDBC Driver currently supports read-only operations.\\nSince you're presumably writing a java application to read oracle data with their JDBC, and you want to write that data to HPCC, you could use the JAPI project to get that data onto HPCC. \\nTake a look at https://github.com/hpcc-systems/HPCC-JAPIs particularly org.hpccsystems.ws.client\\n\\nIf this data migration is only going to be done once, I would prob handle it manually. Let us know if this helps. Thanks.\", \"post_time\": \"2015-05-05 16:02:48\" },\n\t{ \"post_id\": 7538, \"topic_id\": 1715, \"forum_id\": 8, \"post_subject\": \"Re: JDBC Driver\", \"username\": \"chanbchen\", \"post_text\": \"Can I use this driver to ingest data into HPCC? Say I want to read an Oracle schema data using Oracle JDBC and insert that data into HPCC. Will this HPCC JDBC driver help me with that?\\n\\n- Chen\", \"post_time\": \"2015-05-05 13:34:29\" },\n\t{ \"post_id\": 7537, \"topic_id\": 1715, \"forum_id\": 8, \"post_subject\": \"Re: JDBC Driver\", \"username\": \"HPCC Staff\", \"post_text\": \"Chen, \\n\\nThere is a JDBC Driver available which allows you to connect to the HPCC Systems platform through a JDBC client. Take a look here:\\nhttp://hpccsystems.com/products-and-ser ... DBC-Driver\\n\\nThank you for your post!\", \"post_time\": \"2015-05-05 11:49:13\" },\n\t{ \"post_id\": 7536, \"topic_id\": 1715, \"forum_id\": 8, \"post_subject\": \"JDBC Driver\", \"username\": \"chanbchen\", \"post_text\": \"Hi,\\n\\nIs there a JDBC driver currently available that I could use to interact with HPCC?\\n\\nThanks\\nChen\", \"post_time\": \"2015-05-05 11:04:07\" },\n\t{ \"post_id\": 7592, \"topic_id\": 1716, \"forum_id\": 8, \"post_subject\": \"Re: Creating a single dataset from a list of filenames\", \"username\": \"bforeman\", \"post_text\": \"Here is one approach that works:\\n\\n\\nsuperfileName := '~CLASS::BMF::SF::Test';\\n\\n// STD.File.CreateSuperFile('~CLASS::BMF::SF::Test'); //this created the superfile for testing \\n\\nfileList:=NOTHOR(STD.file.LogicalFileList('ecltraining::*'));\\nOUTPUT(filelist);\\n \\naddToSuperfile(string filename) := STD.File.AddSuperFile(superfileName, filename);\\n\\n SEQUENTIAL(STD.File.StartSuperfileTransaction(),\\n NOTHOR(APPLY(filelist,addToSuperfile('~'+filelist.name))),\\n STD.File.FinishSuperfileTransaction()\\n );\\n
\", \"post_time\": \"2015-05-12 23:14:06\" },\n\t{ \"post_id\": 7541, \"topic_id\": 1716, \"forum_id\": 8, \"post_subject\": \"Creating a single dataset from a list of filenames\", \"username\": \"elouche\", \"post_text\": \"Hello there!\\n\\nI have a dataset that contains a list of filenames and from this list I am trying to create a single dataset with all of the data from each file. I have attempted to create a superfile and add each file to it, however, I get "Cannot call function addsuperfile in a non-global context" when I run the following:\\n\\n\\nSHARED batchNumber:='b15120117';\\nSHARED superfileName:=ut.foreign_prod + 'scrub::cc::tmp::rejreport';\\n\\nSHARED fileList:=lib_fileservices.fileservices.LogicalFileList('scrub::cc*' + batchNumber + '*rejreport', , , ,'10.194.12.1');\\n\\naddToSuperfile(string filename) := FileServices.AddSuperFile(superfileName, filename);\\n\\nAPPLY(fileList, addToSuperfile(ut.foreign_prod + name));\\t\\n
\\n\\nFirst off I would like to know if this is the best way to accomplish combining logical files into one dataset, and second I am wondering why I am unable to add to a superfile in this manner. Let me know if you need any additional information.\\n\\nI appreciate your help in advance!\", \"post_time\": \"2015-05-05 19:36:27\" },\n\t{ \"post_id\": 7562, \"topic_id\": 1720, \"forum_id\": 8, \"post_subject\": \"Re: Join Conditions\", \"username\": \"rtaylor\", \"post_text\": \"Chen,\\n\\nThe order of the terms in the join condition is irrelevant. What is relevant is that, if you're doing a half-keyed JOIN (second parameter is an INDEX, typically with a payload), then the join condition becomes an implicit filter on that INDEX. When you are filtering an INDEX, if you do not filter on the leading field(s) of the INDEX's search terms, then you must use KEYED/WILD. If you are using the leading field(s), then you do not need KEYED/WILD.\\n\\nThis code demonstrates what I'm talking about. Because of the generated data used here, all four of these JOINs will produce the same result -- that would not be the case with "real world" data.ds1_rec := RECORD\\n UNSIGNED4 F1;\\n UNSIGNED4 F2;\\n UNSIGNED4 F3;\\n STRING20 P1;\\n STRING20 P2;\\nEND;\\nds2_rec := RECORD\\n UNSIGNED4 F1;\\n UNSIGNED4 F2;\\n UNSIGNED4 F3;\\n STRING20 D1;\\n STRING20 D2;\\nEND;\\nds1 := DATASET(1000,\\n TRANSFORM(ds1_rec,\\n SELF.F1:=COUNTER,\\n SELF.F2:=COUNTER + 10000,\\n SELF.F3:=COUNTER + 1000000,\\n SELF.P1:='Payload1 ' + INTFORMAT(COUNTER,10,1),\\n SELF.P2:='Payload2 ' + INTFORMAT(COUNTER,10,1)\\n\\t\\t\\t));\\nds2 := DATASET(1000,\\n TRANSFORM(ds2_rec,\\n SELF.F1:=COUNTER,\\n SELF.F2:=COUNTER + 10000,\\n SELF.F3:=COUNTER + 1000000,\\n SELF.D1:='DS2 - 1 ' + INTFORMAT(COUNTER,10,1),\\n SELF.D2:='DS2 - 2 ' + INTFORMAT(COUNTER,10,1)\\n\\t\\t\\t));\\nidx := INDEX(ds1,{F1,F2,F3},{ds1},'~RTTEST::IDX::KeyedJoinTest');\\n\\nbld := BUILD(idx,OVERWRITE);\\n\\nj1 := JOIN(DS2,idx,LEFT.f1=RIGHT.f1);\\nj2 := JOIN(DS2,idx,LEFT.f1=RIGHT.f1 AND LEFT.f2=RIGHT.f2);\\nj3 := JOIN(DS2,idx,KEYED(LEFT.f2=RIGHT.f2) AND WILD(RIGHT.f1));\\nj4 := JOIN(DS2,idx,KEYED(LEFT.f1=RIGHT.f1 AND LEFT.f3=RIGHT.f3) AND WILD(RIGHT.f2));\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t \\nPara := PARALLEL(OUTPUT(j1,NAMED('j1')),\\n OUTPUT(j2,NAMED('j2')),\\n OUTPUT(j3,NAMED('j3')),\\n OUTPUT(j4,NAMED('j4')));\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t \\nSEQUENTIAL(Bld,Para);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-05-08 14:04:11\" },\n\t{ \"post_id\": 7560, \"topic_id\": 1720, \"forum_id\": 8, \"post_subject\": \"Re: Join Conditions\", \"username\": \"chanbchen\", \"post_text\": \"Thank you Rich.\\n\\nJust to reiterate, so it is okay not to use the field that we're joining on to be the 2nd condition in the join clause?\\n\\nThanks\\nChen\", \"post_time\": \"2015-05-07 21:51:16\" },\n\t{ \"post_id\": 7554, \"topic_id\": 1720, \"forum_id\": 8, \"post_subject\": \"Re: Join Conditions\", \"username\": \"rtaylor\", \"post_text\": \"Chen,\\n\\nAH!! I missed that. In that case, your original code was correct.\\n\\nRichard\", \"post_time\": \"2015-05-07 13:21:18\" },\n\t{ \"post_id\": 7552, \"topic_id\": 1720, \"forum_id\": 8, \"post_subject\": \"Re: Join Conditions\", \"username\": \"chanbchen\", \"post_text\": \"Hi,\\n\\nThanks for the reply. But in my case f1 is also a keyed field - it's the 2nd key field in the index. \\n\\nThanks\\nChen\", \"post_time\": \"2015-05-06 16:40:20\" },\n\t{ \"post_id\": 7548, \"topic_id\": 1720, \"forum_id\": 8, \"post_subject\": \"Re: Join Conditions\", \"username\": \"rtaylor\", \"post_text\": \"Chen,\\n\\nIt should probably be this:JOIN(ds, myKey, KEYED(myKeysFirstKeyField = 10) AND left.f1 = right.f1);
Note that this is KEYED as in KEYED and WILD (since it is part of the join condition) and not the KEYED option on JOIN.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-05-06 13:35:29\" },\n\t{ \"post_id\": 7546, \"topic_id\": 1720, \"forum_id\": 8, \"post_subject\": \"Join Conditions\", \"username\": \"chanbchen\", \"post_text\": \"Hi,\\n\\nI have a dataset ds that I would like to do a join with an index myKey.\\n\\nCan I use a key field in myKey which is not a part of the join condition first in my join clause? Or should the join clause always contain the join condition at the very beginning?\\n\\n JOIN(ds, myKey, myKeysFirstKeyField = 10 AND left. f1 = right.f1);
\\n\\nIs the above usage okay? f1 is the actual join condition but I have not used it in the beginning of the join clause. (This is because in my scenario, I have f1 as the 2nd key field in my index and myKeysFirstKeyField is the first key field).\\n\\n\\nThanks\\nChen\", \"post_time\": \"2015-05-06 09:42:23\" },\n\t{ \"post_id\": 7559, \"topic_id\": 1722, \"forum_id\": 8, \"post_subject\": \"Re: Roxie vs Thor\", \"username\": \"rtaylor\", \"post_text\": \"Chen,I have heard everyone say roxie would be faster than thor.
That's an "apples and oranges" type of question. \\n\\nThor and Roxie are two different tools, with two different purposes, which both use a common infrastructure and programming language (ECL).\\n\\nThor is a back-office massive data processing platform, designed to work with huge amounts of data and produce huge result sets. It does one job at a time and is NOT designed to be customer-facing.\\n\\nRoxie is designed as a customer-facing rapid data delivery engine, delivering thousands of small concurrent results to end-users. An end user isn't interested in all the people in the USA, just the "Smiths" that live on "Main Street" in "AnyTown" or the "Taylors" that live on "High Street" in "MyTown". \\n\\nThor prepares data for deployment to Roxie, mostly by cleaning and standardizing raw input into final delivery form, then building indexes for rapid access to specific records. Then that indexed data is copied to Roxie for use by end user queries. \\n\\nRoxie then provides that data to end-users, using the indexes to deliver the small result sets based on what the specific customer wants in each separate query transaction.I am curious to understand what makes roxie faster.
Those indexes that rapidly get just the bit of information the end user wants to see this time.And on an average how faster would roxie be compared to thor?
Not a fair question to ask.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-05-07 20:38:13\" },\n\t{ \"post_id\": 7558, \"topic_id\": 1722, \"forum_id\": 8, \"post_subject\": \"Roxie vs Thor\", \"username\": \"chanbchen\", \"post_text\": \"Hi,\\n\\nI have heard everyone say roxie would be faster than thor.\\n\\nI am curious to understand what makes roxie faster.\\n\\nAnd on an average how faster would roxie be compared to thor?\\n\\nThanks\\nChen\", \"post_time\": \"2015-05-07 19:54:32\" },\n\t{ \"post_id\": 7596, \"topic_id\": 1725, \"forum_id\": 8, \"post_subject\": \"Re: Memory Limit exceeded issue in smart join\", \"username\": \"manish_jaychand\", \"post_text\": \"Hi Richard,\\n\\n Thanks!. The Join is now working. The issue was with the data. I should have checked this earlier. \\nThere were too many null values in both datasets and this caused the issue.\\n\\nThanks,\\nManish\", \"post_time\": \"2015-05-13 15:59:31\" },\n\t{ \"post_id\": 7595, \"topic_id\": 1725, \"forum_id\": 8, \"post_subject\": \"Re: Memory Limit exceeded issue in smart join\", \"username\": \"rtaylor\", \"post_text\": \"Manish,\\n\\nGiven that your records are very long and you're matching on just a couple of fields, I would suggest doing it something like this:\\n
LeftTable := TABLE(LeftDataset,{UniqueIDfield,MatchingField1,MatchingField2});\\nRightTable := TABLE(RightDataset,{UniqueIDfield,MatchingField1,MatchingField2});\\n\\nJoinRec := RECORD\\n TYPEOF(LeftTable.UniqueIDfield) LeftID;\\n TYPEOF(RightTable.UniqueIDfield) RightID;\\n LeftTable.MatchingField1;\\n LeftTable.MatchingField2;\\nEND;\\n\\nJoinIDs := JOIN(LeftTable,RightTable,\\n LEFT.MatchingField1 = RIGHT.MatchingField1 AND\\n LEFT.MatchingField2 = RIGHT.MatchingField2,\\n TRANSFORM(JoinRec,\\n SELF.LeftID := LEFT.UniqueIDfield, \\n SELF.RightID := RIGHT.UniqueIDfield,\\n SELF := LEFT));
Using vertical slice TABLEs to reduce the record size to only those fields needed for the JOIN should cure the memory issue. The JOIN then produces a "join table" of just the unique record identifiers of the matching records. This is an old-school many-many technique for joins of this type.\\n\\nAnother possible solution would be to use the GROUP function, something like this:\\nLeftSort := SORT(LeftDataset,MatchingField1);\\nLeftGroup := GROUP(LeftSort,MatchingField1);\\n\\nRightSort := SORT(RightDataset,MatchingField1);\\nRightGroup := GROUP(RightSort,MatchingField1);\\n\\nJoinRec := RECORD\\n LeftDataset;\\n RightDataset;\\nEND;\\n\\nJoinIDs := JOIN(LeftGroup,RightGroup,\\n LEFT.MatchingField1 = RIGHT.MatchingField1 AND\\n LEFT.MatchingField2 = RIGHT.MatchingField2,\\n TRANSFORM(JoinRec,\\n SELF := LEFT\\n SELF := RIGHT),GROUPED);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-05-13 13:13:01\" },\n\t{ \"post_id\": 7593, \"topic_id\": 1725, \"forum_id\": 8, \"post_subject\": \"Re: Memory Limit exceeded issue in smart join\", \"username\": \"manish_jaychand\", \"post_text\": \"The left recordset(larger one) has a record length of 568(count is around 290 million) and the right(smaller one) has record length of 633(count is around 28 million). I am trying to match using postcode field for equality condition of the join. This will match many records on the right set. To reduce the number of matches i also tried to match building name and building number but am still getting a "memory limit exceeded" error.\\n\\nThe number of fields are 45 for left and 38 for the right and i am using only 3 of the fields in join condition. \\n\\nThanks,\\nManish\", \"post_time\": \"2015-05-13 09:47:46\" },\n\t{ \"post_id\": 7585, \"topic_id\": 1725, \"forum_id\": 8, \"post_subject\": \"Re: Memory Limit exceeded issue in smart join\", \"username\": \"rtaylor\", \"post_text\": \"Manish,\\n\\nOK, what sizes are the RECORD structures for the two files, what field(s) are you JOINING on (and how many are you not), and how granular are the matches (IOW, does each record on the left match a bazillion recs on the right)?\\n\\nRichard\", \"post_time\": \"2015-05-12 17:05:40\" },\n\t{ \"post_id\": 7584, \"topic_id\": 1725, \"forum_id\": 8, \"post_subject\": \"Re: Memory Limit exceeded issue in smart join\", \"username\": \"manish_jaychand\", \"post_text\": \"Hi Richard,\\n\\n I tried that too. It still throws out of memory error.\\n\\nThanks,\\nManish\", \"post_time\": \"2015-05-12 16:03:49\" },\n\t{ \"post_id\": 7583, \"topic_id\": 1725, \"forum_id\": 8, \"post_subject\": \"Re: Memory Limit exceeded issue in smart join\", \"username\": \"rtaylor\", \"post_text\": \"Manish,\\n\\nAlso try just a global JOIN without SMART, please.\\n\\nRichard\", \"post_time\": \"2015-05-12 15:48:41\" },\n\t{ \"post_id\": 7582, \"topic_id\": 1725, \"forum_id\": 8, \"post_subject\": \"Re: Memory Limit exceeded issue in smart join\", \"username\": \"manish_jaychand\", \"post_text\": \"Hi Richard,\\n\\n Yes. Am still getting a "out of memory" error. I tried using local join instead of smart.\\n\\nThanks,\\nManish\", \"post_time\": \"2015-05-12 15:46:24\" },\n\t{ \"post_id\": 7581, \"topic_id\": 1725, \"forum_id\": 8, \"post_subject\": \"Re: Memory Limit exceeded issue in smart join\", \"username\": \"rtaylor\", \"post_text\": \"Manish,\\n\\nAnd if you remove the SMART option, does it still error out?\\n\\nRichard\", \"post_time\": \"2015-05-12 15:31:46\" },\n\t{ \"post_id\": 7580, \"topic_id\": 1725, \"forum_id\": 8, \"post_subject\": \"Re: Memory Limit exceeded issue in smart join\", \"username\": \"manish_jaychand\", \"post_text\": \"Hi Richard ,\\n\\n Thanks! I tried interchanging the left and right datasets so that the left data set is the larger one. But I am still facing another issue.\\n\\n"Error: System error: 0: Graph[12], smartjoin[15]: SLAVE 10.193.65.12:8500: Graph[12], smartjoin[15]: Out of memory, allocating row array, had 1966072, trying to allocate 1966073 elements, (0, 0), 0, "\\n\\n\\nThanks,\\nManish\", \"post_time\": \"2015-05-12 15:20:02\" },\n\t{ \"post_id\": 7578, \"topic_id\": 1725, \"forum_id\": 8, \"post_subject\": \"Re: Memory Limit exceeded issue in smart join\", \"username\": \"rtaylor\", \"post_text\": \"Manish,\\n\\nThe standard "rule" for JOINs is that the larger file should be the left file, so I would first try reversing the two files (or just add PARTITION RIGHT to your existing JOIN).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-05-12 14:17:10\" },\n\t{ \"post_id\": 7577, \"topic_id\": 1725, \"forum_id\": 8, \"post_subject\": \"Memory Limit exceeded issue in smart join\", \"username\": \"manish_jaychand\", \"post_text\": \"Hi,\\n\\n I am facing a issue while trying to use smart join. The left recordset has around 28 million records and the right record set has around 290 million. I have also attached the error log for this. \\n "System error: 1300: Graph[12], smartjoin[15]: SLAVE 10.193.65.12:8500: memory limit exceeded".\\n\\nThanks,\\nManish\", \"post_time\": \"2015-05-12 13:05:37\" },\n\t{ \"post_id\": 7587, \"topic_id\": 1726, \"forum_id\": 8, \"post_subject\": \"Re: Key Filters\", \"username\": \"bforeman\", \"post_text\": \"Hi Chen,\\n\\nI do not see any problems using OR in an INDEX filter, the compiler should optimize it efficiently.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-05-12 17:48:36\" },\n\t{ \"post_id\": 7579, \"topic_id\": 1726, \"forum_id\": 8, \"post_subject\": \"Key Filters\", \"username\": \"chanbchen\", \"post_text\": \"Hi,\\n\\nIn general, is it a good idea to use OR conditions in a key's filter?\\n\\nSay I have a key myKey which has key fields kf1, kf2, kf3 and kf4.\\n\\nIs it a good idea to use the following condition:\\n\\n
myKey(kf1 = 1 or kf2 = 2 or kf3 = 3 or kf4 = 4)
\\n\\nThanks\\nChen\", \"post_time\": \"2015-05-12 15:09:33\" },\n\t{ \"post_id\": 7609, \"topic_id\": 1729, \"forum_id\": 8, \"post_subject\": \"Re: KEYDIFF - UNIMPLEMENTED activity error\", \"username\": \"balajisampath\", \"post_text\": \"Thanks Richard.\", \"post_time\": \"2015-05-18 14:06:04\" },\n\t{ \"post_id\": 7608, \"topic_id\": 1729, \"forum_id\": 8, \"post_subject\": \"Re: KEYDIFF - UNIMPLEMENTED activity error\", \"username\": \"rtaylor\", \"post_text\": \"Looking for the best way to compare two (same layout) files
I suggest you look at using ROWDIFF in a JOIN.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-05-15 19:14:56\" },\n\t{ \"post_id\": 7606, \"topic_id\": 1729, \"forum_id\": 8, \"post_subject\": \"Re: KEYDIFF - UNIMPLEMENTED activity error\", \"username\": \"balajisampath\", \"post_text\": \"I executed your code and it worked in THOR.\\n\\nLooking for the best way to compare two (same layout) files, so trying to find out if I can use the results of KEYDIFF file.\\n\\nCurrently I am using STD.File.CompareFiles but looking for next level which can produce column level comparison.\", \"post_time\": \"2015-05-15 17:57:05\" },\n\t{ \"post_id\": 7604, \"topic_id\": 1729, \"forum_id\": 8, \"post_subject\": \"Re: KEYDIFF - UNIMPLEMENTED activity error\", \"username\": \"bforeman\", \"post_text\": \"Did you run my code example? I could not get it to fail.\\n\\nAre you just experimenting with KEYDIFF, or what is it exactly that you are trying to do?\\n\\nYou do know that KEYDIFF is designed to work with KEYPATCH?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-05-15 15:30:03\" },\n\t{ \"post_id\": 7603, \"topic_id\": 1729, \"forum_id\": 8, \"post_subject\": \"Re: KEYDIFF - UNIMPLEMENTED activity error\", \"username\": \"balajisampath\", \"post_text\": \"Still could not get the output i was looking for.\\n\\nI got the same error after including file position also while executing in HTHOR\\n\\nWhen executed in THOR with file position the results were not not accessible. Results doesn't shows up in IDE, also I cant read the output file written.\\n\\nWhen executed in THOR without file position the job runs indefinitely.\\n\\nThanks,\\nBalaji\", \"post_time\": \"2015-05-15 15:06:42\" },\n\t{ \"post_id\": 7600, \"topic_id\": 1729, \"forum_id\": 8, \"post_subject\": \"Re: KEYDIFF - UNIMPLEMENTED activity error\", \"username\": \"bforeman\", \"post_text\": \"The attached code works fine for me. I think that KEYDIFF requires that the INDEX definitions expose the VIRTUAL file position field:\\n\\n rec:= RECORD\\n string10 name;\\n unsigned2 age;\\n END;\\n\\n ds1 := DATASET([{'Name1',10},{'Name2',20}],{rec});\\n ds2 := DATASET([{'Name1',10},{'Name3',20}],{rec});\\n a := output(ds1,,'~BMFtest::file1',overwrite);\\n b := output(ds2,,'~BMFtest::file2',overwrite);\\n\\n newds1 := dataset('~BMFtest::file1',{rec,UNSIGNED8 recpos {virtual(fileposition)}},thor);\\n newds2 := dataset('~BMFtest::file2',{rec,UNSIGNED8 recpos {virtual(fileposition)}},thor);\\n // newds1;\\n // newds2;\\n i1 := INDEX(newds1,{name,recpos},'~BMFtest::file1_idx');\\n i2 := INDEX(newds2,{name,recpos},'~BMFtest::file2_idx');\\n C := buildindex(i1);\\n d := buildindex(i2);\\n\\t\\t// SEQUENTIAL(PARALLEL(A,B),PARALLEL(C,D)); //BUILD THIS FIRST, THEN RUN KEYDIFF\\n KEYDIFF(i1,i2,'~BMFtest::DIFF::i1i2',OVERWRITE);
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-05-15 12:37:46\" },\n\t{ \"post_id\": 7599, \"topic_id\": 1729, \"forum_id\": 8, \"post_subject\": \"KEYDIFF - UNIMPLEMENTED activity error\", \"username\": \"balajisampath\", \"post_text\": \"I am getting error while trying KEYDIFF. Not sure if I am missing anything. Please help\\n\\nError: System error: -1: UNIMPLEMENTED activity 'keydiff'(kind=59) at /var/lib/jenkins/workspace/LN-Candidate-5.2.4-rc1/LN/centos-6.4-x86_64/HPCC-Platform/ecl/eclagent/eclgraph.cpp(321) (0, 0), -1, \\n\\nrec:= RECORD\\n\\tstring10 name;\\n\\tunsigned2 age;\\n END;\\n\\n// ds1 := DATASET([{'Name1',10},{'Name2',20}],{rec});\\n// ds2 := DATASET([{'Name1',10},{'Name3',20}],{rec});\\n// output(ds1,,'~thor::file1',overwrite);\\n// output(ds2,,'~thor::file2',overwrite);\\n\\nds1:= dataset('~thor::file1',rec,thor);\\nds2:= dataset('~thor::file2',rec,thor);\\nds1;\\nds2;\\ni1 := INDEX(ds1,\\n{name},\\n'~thor::file1_idx');\\ni2 := INDEX(ds2,\\n{name},\\n'~thor::file2_idx');\\n//buildindex(i1);\\n//buildindex(i2);\\nKEYDIFF(i1,i2,'~thor::DIFF::i1i2',OVERWRITE);
\", \"post_time\": \"2015-05-14 20:07:35\" },\n\t{ \"post_id\": 7607, \"topic_id\": 1730, \"forum_id\": 8, \"post_subject\": \"Re: How to make sequential and when\", \"username\": \"kovacsbv\", \"post_text\": \"Found it. A logical file got deleted by accident somehow. Works fine, thanks!\", \"post_time\": \"2015-05-15 18:44:10\" },\n\t{ \"post_id\": 7605, \"topic_id\": 1730, \"forum_id\": 8, \"post_subject\": \"Re: How to make sequential and when\", \"username\": \"kovacsbv\", \"post_text\": \"The definitions were causing errors such that when I resolved them by not using them, the errors went away.\\n\\nI ran your edited file (it passed error checks), and produced the output logical file.\\n\\nOk, so I run the query:\\n\\nIMPORT forensics;\\nResult := DATASET(\\n '~forensics::tagged::search', \\n forensics.search, \\n CSV(\\n HEADING(1),\\n SEPARATOR(['\\\\t']),\\n TERMINATOR(['\\\\n']),\\n UNICODE\\n )\\n);\\n\\nFilteredResult := Result(Timestamp[1..10]='2015040623');\\n\\nFilteredResult;
\\n\\nAnd get:\\n\\nError: System error: 10004: Graph[1], SLAVE 10.0.1.14:20100: Graph[1], csvread[2]: No physical file part for logical file forensics::tagged::search, found at given locations: //xxx.xxx.xxx.xxx/var/lib/HPCCSystems/hpcc-data/thor/forensics/tagged/search._9_of_60, //10.0.1.15/var/lib/HPCCSystems/hpcc-mirror/thor/forensics/tagged/search._9_of_60 (Error = 115) (0, 0), 10004,\", \"post_time\": \"2015-05-15 17:17:00\" },\n\t{ \"post_id\": 7602, \"topic_id\": 1730, \"forum_id\": 8, \"post_subject\": \"Re: How to make sequential and when\", \"username\": \"rtaylor\", \"post_text\": \"Vic,\\n\\nYour problem is that SEQUENTIAL requires that all its parameters be actions, and this:Result := DATASET(\\n '~forensics::tagged::aggregate::20150406__search',\\n forensics.search,\\n CSV(\\n HEADING(1),\\n SEPARATOR(['\\\\t']),\\n TERMINATOR(['\\\\n']),\\n UNICODE\\n )\\n ),//
is a definition, not an action. \\n\\nYou need to move that definition outside of the SEQUENTIAL. You also do not need the nested SEQUENTIAL since it takes only one parameter. Also, why bother to define your filename constants and not use them?\\n\\nI've attached my edits of your code.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-05-15 13:59:05\" },\n\t{ \"post_id\": 7601, \"topic_id\": 1730, \"forum_id\": 8, \"post_subject\": \"How to make sequential and when\", \"username\": \"kovacsbv\", \"post_text\": \"My question is really about how to make this code work,\\nbut mostly about when to use SEQUENTIAL and how.\\n\\nI'm attaching two ECL files. In the first, there is a sequence of processes:\\n1. Generate an empty Superfile, forensics::tagged::aggregate::20150406__search .\\n2. Use a master logical file (which may not exist), ~forensics::tagged::search, to hold all records.\\n3. Add the existing master logical file to the superfile.\\n4. Add additional files, as needed, to the superfile.\\n5. Aggregate all the above into a new master logical file with a file that has a random name (~tmp::2015-05-14_17-04-47_iqhpohepga).\\n6. Replace the old master logical file (~forensics::tagged::search) with the new one (~tmp::2015-05-14_17-04-47_iqhpohepga).\\n\\nWhen run non-sequentially, HPCC complains that:\\n\\nError: System error: -1: addSubFile: File forensics::tagged::search is already a subfile of forensics::tagged::aggregate::20150406__search\\n\\nBut since the first IF ensures that the file exists and is cleared out, this seems to be a sequencing issue. So I put a SEQUENTIAL() in and the error goes away, but now the Result:= instruction around line 64 complains that it is undeclared (which is probably an issue with putting the Result := in the SEQUENTIAL() statement.\\n\\nCan anybody show me the right way to do this? I do insist that the code create anything that's not already there and clean out anything that is already there\\nso it can be rerun if needed without errors. If you can show me how to get rid of the random filename, that would be good too.\\n\\nTIA,\\n\\nVic\", \"post_time\": \"2015-05-15 13:10:04\" },\n\t{ \"post_id\": 8802, \"topic_id\": 1731, \"forum_id\": 8, \"post_subject\": \"Re: Creating fixed-width output files\", \"username\": \"janet.anderson\", \"post_text\": \"No, I had the string length specified as in your seconds example, but the output to CSV file with no separator still did not work.\", \"post_time\": \"2015-12-10 14:54:27\" },\n\t{ \"post_id\": 8800, \"topic_id\": 1731, \"forum_id\": 8, \"post_subject\": \"Re: Creating fixed-width output files\", \"username\": \"tlhumphrey2\", \"post_text\": \"Your OutRec is \\nOutRec := RECORD\\n STRING v;\\nEND
\\n\\nWouldn't your trim problem be solved it your OutRec was the following?\\n OutRec := RECORD\\n STRING10 v;\\nEND
\", \"post_time\": \"2015-12-10 14:49:48\" },\n\t{ \"post_id\": 8798, \"topic_id\": 1731, \"forum_id\": 8, \"post_subject\": \"Re: Creating fixed-width output files\", \"username\": \"janet.anderson\", \"post_text\": \"Yes, this is great. Thank you!\", \"post_time\": \"2015-12-10 14:22:37\" },\n\t{ \"post_id\": 8796, \"topic_id\": 1731, \"forum_id\": 8, \"post_subject\": \"Re: Creating fixed-width output files\", \"username\": \"bforeman\", \"post_text\": \"Hi Janet,\\n\\nTry this simple code example. When you despray it to the landing zone, you can verify in the Hex Previewer that there is a CR/LF sequence generated at the end of each line. Is this what you are trying to achieve?\\n\\nRegards,\\n\\nBob\\n\\nPtblRec := RECORD\\n STRING4 sequence;\\n STRING2 State;\\n STRING20 City;\\n STRING25 Lname;\\n STRING15 Fname;\\n STRING2 EOL := '\\\\r\\\\n'\\nEND;\\n\\nTemp := DATASET([{'3000','FL','BOCA RATON','LONDON','BILLY'},\\n\\t\\t {'35','FL','BOCA RATON','SMITH','FRANK'},\\n\\t\\t {'50','FL','BOCA RATON','SMITH','SUE'},\\n\\t\\t {'135','FL','BOCA RATON','SMITH','NANCY'},\\n\\t\\t {'235','FL','BOCA RATON','SMITH','FRED'},\\n\\t\\t {'335','FL','BOCA RATON','TAYLOR','FRANK'},\\n\\t\\t {'3500','FL','BOCA RATON','JONES','FRANK'},\\n\\t\\t {'30','FL','BOCA RATON','TAYLOR','RICHARD'}], PtblRec);\\n\\t\\t\\t\\t\\t\\t\\t\\t \\nOUTPUT(Temp,,'~AAA::TEST::FlatFile',THOR,OVERWRITE);
\", \"post_time\": \"2015-12-10 14:01:15\" },\n\t{ \"post_id\": 8794, \"topic_id\": 1731, \"forum_id\": 8, \"post_subject\": \"Re: Creating fixed-width output files\", \"username\": \"janet.anderson\", \"post_text\": \"I am only familiar with fixed width files that have new line for each record, but I am not able to achieve this using either the CSV or the FLAT formats. With the CSV format using options SEPARATOR('') and DELIMITER('\\\\r\\\\n'), I get the same issue described by DSC where the fields are trimmed. When I use FLAT, I get a flat file with no new lines, which is an unfamiliar format to me and not what I think the vendor is requesting. How do I get a fixed width file with no field delimiters, but with row delimiters?\", \"post_time\": \"2015-12-10 03:18:59\" },\n\t{ \"post_id\": 7613, \"topic_id\": 1731, \"forum_id\": 8, \"post_subject\": \"Re: Creating fixed-width output files\", \"username\": \"DSC\", \"post_text\": \"Ha. I thought I was over-thinking things. Thanks, Bob!\", \"post_time\": \"2015-05-20 13:40:02\" },\n\t{ \"post_id\": 7612, \"topic_id\": 1731, \"forum_id\": 8, \"post_subject\": \"Re: Creating fixed-width output files\", \"username\": \"bforeman\", \"post_text\": \"Hi Dan!\\n\\nWhy not just output to a FLAT/THOR file? \\n\\nThis code generates a nice fixed length record:\\n\\n
DataRec := RECORD\\n STRING10 v;\\n END;\\n\\n d1 := DATASET(['fizz','buzz'], DataRec);\\n\\n OutRec := RECORD\\n STRING20 v;\\n END;\\n\\n d2 := AGGREGATE\\n (\\n d1,\\n OutRec,\\n TRANSFORM\\n (\\n OutRec,\\n SELF.v := LEFT.v + RIGHT.v\\n )\\n );\\n\\n OUTPUT(d2,,'~tmp::fixed_20',OVERWRITE);//,CSV(SEPARATOR(''),TERMINATOR(''),QUOTE('')),OVERWRITE);
\\n\\nThe ECL IDE clips the last field in the display, but the file size in the ECL watch is 20 as you wanted.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-05-20 13:32:16\" },\n\t{ \"post_id\": 7611, \"topic_id\": 1731, \"forum_id\": 8, \"post_subject\": \"Creating fixed-width output files\", \"username\": \"DSC\", \"post_text\": \"I'm writing a sample data generator with the goal of creating a fixed-width file. The file would then be desprayed and then used to test another process that consumes that fixed-width file's data. I've run into two issues:\\n\\n1) OUTPUT seems to right-trim its data when writing to Thor. Given this example:\\n\\nDataRec := RECORD\\n STRING10 v;\\nEND;\\n\\nd1 := DATASET(['fizz','buzz'], DataRec);\\n\\nOutRec := RECORD\\n STRING v;\\nEND;\\n\\nd2 := AGGREGATE\\n (\\n d1,\\n OutRec,\\n TRANSFORM\\n (\\n OutRec,\\n SELF.v := RIGHT.v + LEFT.v\\n )\\n );\\n\\nOUTPUT(d2,,'~tmp::fixed_20',CSV(SEPARATOR(''),TERMINATOR(''),QUOTE('')),OVERWRITE);
\\nThe resulting file is 14 bytes long, not 20, presumably due to the right-trim of the output string. A 'NOTRIM' flag would be helpful for the CSV options, but that does not seem to be available. How can I prevent that output from being trimmed?\\n\\n2) If #1 is solved there may be another problem with the sheer size of the output. If the data generator creates a lot of data, and since the output is fixed-width there is no delimiter, using the above scheme would in effect create a single very large string. That may cause memory issues. True?\\n\\nI'm currently getting around both issues by creating the file with a TERMINATOR('\\\\n') option, which puts each fixed-length record on a single line, despraying the file, then post-processing it with a Perl script to fill out each record and rewrite everything without the linefeed. I'd like to avoid that step if possible, though.\\n\\nI may be approaching this whole thing incorrectly. If there is a better way to create large fixed-width files I would love to hear about it.\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2015-05-20 12:45:56\" },\n\t{ \"post_id\": 7635, \"topic_id\": 1732, \"forum_id\": 8, \"post_subject\": \"Re: No XML scope active\", \"username\": \"rtaylor\", \"post_text\": \"Timothy,Error: Const-foldable string expression expected
That error basically means the code expects a constant and you're giving it the contents of a field in a record in a dataset (definitely not a constant).\\n\\nWhat are you trying to accomplish?\\n\\nRichard\", \"post_time\": \"2015-05-21 19:53:48\" },\n\t{ \"post_id\": 7632, \"topic_id\": 1732, \"forum_id\": 8, \"post_subject\": \"Re: No XML scope active\", \"username\": \"tspowell-AU\", \"post_text\": \"I got the macro working, Thank you! Here is a copy of the test code I wrote to try it out. It gives me a RECORD structure that I can use in my ECL code.\\n\\nEXPORT M_Test(OutFile,XMLString) := MACRO\\n \\n\\tLOADXML(XMLString);\\n\\t#DECLARE(outString);\\n\\t#DECLARE(tmpString);\\n\\t\\t\\n\\t#SET(outString, '');\\n\\t#SET(tmpString, '');\\n\\t\\n\\t#FOR(item)\\n\\t\\t\\t#APPEND(tmpString, 'STRING ' + %'datavalue'% + '; \\\\n');\\n\\t#END\\n\\t\\n\\t#APPEND(outString, 'myRec := RECORD \\\\n' + %'tmpString'% + ' END;');\\n\\t\\n\\tOutFile := %'outString'%;\\n\\t%outString%\\n\\t\\nENDMACRO;\\n\\n ds1 := '<headerrow>' +\\n '<item>' +\\n\\t\\t\\t\\t'<datavalue>FirstName</datavalue>' +\\n\\t\\t\\t\\t'</item>' +\\n\\t\\t\\t\\t'<item>' + \\n\\t\\t\\t\\t'<datavalue>LastName</datavalue>' +\\n\\t\\t\\t\\t'</item>' +\\n\\t\\t\\t\\t'</headerrow>';\\nOUTPUT(ds1,NAMED('ds1'));\\n\\nM_Test(newRec,ds1);\\nOUTPUT(newRec);\\n\\nnewRec;
\\n\\n\\nOne more question, I have a record set that contains only 1 row and it's a string that looks like ds1 above. When I try to send just the one record to the macro I get the following error:\\n\\nError: Const-foldable string expression expected (5, 20), 999,
\\n\\nIs there a way to pull that one record out and pass it as a STRING similar to ds1 in the above code?\\nHere is what I tried but it give the same error.\\nSTRING XMLString := headerXML[1].result;\\n\\nM_Test(newRec,XMLString);
\\n\\nIs this just creating a new record set with the same record and not a STRING variable?\\n\\nThanks.\\nTimothy\", \"post_time\": \"2015-05-21 17:56:13\" },\n\t{ \"post_id\": 7630, \"topic_id\": 1732, \"forum_id\": 8, \"post_subject\": \"Re: No XML scope active\", \"username\": \"tspowell-AU\", \"post_text\": \"Okay, Thank you.\\n\\nYes, my intention is to generate this RECORD structure as ECL code to be used by other definitions. I'll change over to a macro and see how that goes for me. \\n\\nI appreciate the help.\\n\\nThank you!\\nTimothy\", \"post_time\": \"2015-05-21 14:25:01\" },\n\t{ \"post_id\": 7628, \"topic_id\": 1732, \"forum_id\": 8, \"post_subject\": \"Re: No XML scope active\", \"username\": \"rtaylor\", \"post_text\": \"Timothy.\\n\\nTry this: r := RECORD\\n UNSIGNED4 dg_parentid;\\n STRING10 dg_firstname;\\n STRING dg_lastname;\\n UNSIGNED1 dg_prange;\\n END;\\n \\n ds1 := DATASET('someBogusFilename', r, THOR);\\n \\nFM_Test(inrec, OutName) := FUNCTIONMACRO\\n \\n #DECLARE(outString);\\n #SET(outString, OutName + ' := RECORD\\\\n ');\\n #EXPORTXML(Fred,inrec);\\n \\n #FOR (Fred)\\n #FOR (Field) \\n #APPEND(outString,%'{@type}'%\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t #IF (%'{@size}'% <> '-15' AND\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t%'{@isRecord}'%='' AND\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t%'{@isDataset}'%='')\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t + %'{@size}'%\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t #END\\n + ' ' + %'{@label}'% + ';\\\\n ');\\n #END\\n #END\\n #APPEND(outString,'END;');\\n RETURN %'outString'% ;\\nENDMACRO;\\n\\nOUTPUT(FM_Test(RECORDOF(ds1),'MyRec'),NAMED('RECORD_Structure'));
I adapted the #EXPORTXML example code to produce a RECORD Structure. \\n\\nNote that the first parameter to the FUNCTIONMACRO uses the RECORDOF function.\\n\\nIf your intention is to generate this RECORD structure as ECL code that will be used by other defintions, then I think you'll have to change this to be a MACRO and not a FUNCTIONMACRO to accomplish that.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-05-21 13:58:11\" },\n\t{ \"post_id\": 7624, \"topic_id\": 1732, \"forum_id\": 8, \"post_subject\": \"Re: No XML scope active\", \"username\": \"tspowell-AU\", \"post_text\": \"I think I know what I’m doing wrong but could use some guidance on what direction to go. I’m passing a dataset to the FUNCTIONMACRO but the #FOR needs an XML STRING to work. I tried using the TOXML but that only work on 1 record and it still didn't work for me. Is there a way in ECL to convert a dataset to a XML STRING? Here is my updated code:\\n\\nEXPORT FM_Test(infile) := FUNCTIONMACRO\\n \\n\\tOUTPUT(infile,NAMED('infile'));\\n\\t#DECLARE(myData);\\n\\t#EXPORT(myData,infile);\\n\\tOUTPUT(%'myData'%,NAMED('myData'));\\n\\t\\n\\tSTRING ds := (STRING) TOXML(infile[1]);\\n\\tOUTPUT(ds,NAMED('ds'));\\n\\t\\n\\tCnt := COUNT(infile);\\n\\tOUTPUT(Cnt,NAMED('myDataCnt'));\\n\\t\\n\\t#DECLARE(outString);\\n\\t#DECLARE(tmpString);\\n\\t\\t\\n\\t#SET(outString, '');\\n\\t#SET(tmpString, '');\\n\\t\\n\\t#FOR(ds)\\n\\t\\t#FOR(field1)\\n\\t\\t\\t#APPEND(tmpString, field1 + '; \\\\n');\\n\\t\\t\\tOUTPUT(%'tmpString'%, NAMED(tmpString));\\n\\t\\t#END\\n\\t#END\\n\\t\\n\\t#APPEND(outString, 'RECORD ' + %'tmpString'% + ' END;' );\\n\\t\\t\\n\\tRETURN %'outString'% ;\\n\\t\\nENDMACRO;\\n\\nLOADXML('');\\n\\nds1 := dataset([{'FirstName'},{'LastName'},{'MiddleName'}],{STRING field1});\\nOUTPUT(ds1,NAMED('ds1'));\\n\\nOUTPUT(FM_Test(ds1),NAMED('RECORDStructure'));\\n
\\n\\nThank you.\\nTimothy\", \"post_time\": \"2015-05-20 20:24:28\" },\n\t{ \"post_id\": 7618, \"topic_id\": 1732, \"forum_id\": 8, \"post_subject\": \"Re: No XML scope active\", \"username\": \"tspowell-AU\", \"post_text\": \"I changed the #EXPORTXML(myData,RECORDOF(infile)); to just #EXPORT(myData,RECORDOF(infile)); and that gave me what I was expecting but come to find out that not what I need so I'm still working on it.\\n\\nThanks.\\nTimothy\", \"post_time\": \"2015-05-20 17:44:12\" },\n\t{ \"post_id\": 7617, \"topic_id\": 1732, \"forum_id\": 8, \"post_subject\": \"Re: No XML scope active\", \"username\": \"tspowell-AU\", \"post_text\": \"Thank you that did help. Currently at work we have 5.0.12 but I was able to fix the syntax. Here is the new code that kind of works.\\n\\nEXPORT FM_Test(infile) := FUNCTIONMACRO\\n \\n\\t#DECLARE(myData);\\n\\t#EXPORTXML(myData,RECORDOF(infile));\\n\\tOUTPUT(%'myData'%,NAMED('myData'));\\n\\t\\n\\t#DECLARE(outString);\\n\\t#DECLARE(tmpString);\\n\\t\\n\\t#SET(outString, '');\\n\\t#SET(tmpString, '');\\n\\t\\n\\t#FOR(theXMLdata)\\n\\t\\t#FOR(Field)\\n\\t\\t\\t#APPEND(tmpString, %'{@label}'% + '; \\\\n');\\n\\t\\t#END\\n\\t#END\\n\\t\\n\\t#APPEND(outString, 'RECORD ' + %'tmpString'% + ' END;' );\\n\\t\\t\\n\\tRETURN %'outString'% ;\\n\\t\\nENDMACRO;\\n\\nLOADXML('');\\n\\nds1 := dataset([{'FirstName'},{'LastName'},{'MiddleName'}],{STRING field1});\\nOUTPUT(ds1,NAMED('ds1'));\\n\\nOUTPUT(FM_Test(ds1),NAMED('RECORDStructure'));
\\n\\nThe code OUTPUT(%'myData'%,NAMED('myData')); either isn't working as I expected or I've done something else incorrectly. It appears that no data is being passed to the macro. Any idea what I'm missing?\\n\\nThanks.\", \"post_time\": \"2015-05-20 15:50:48\" },\n\t{ \"post_id\": 7615, \"topic_id\": 1732, \"forum_id\": 8, \"post_subject\": \"Re: No XML scope active\", \"username\": \"rtaylor\", \"post_text\": \"Timothy,\\n\\nThe "No XML scope active" error message indicates that you need to use the LOADXML function. This was often required in older builds, but the latest builds have removed the need for a "dummy" LOADXML function in order to use #EXPORTXML. \\n\\nI just tried syntax checking your code in 5.2.2 and I do not get the "No XML scope active" error. Your code does have other issues once that one is resolved, but your approach is one I would pursue, so you just need to update to 5.2.2 and then continue working at it.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-05-20 14:43:56\" },\n\t{ \"post_id\": 7614, \"topic_id\": 1732, \"forum_id\": 8, \"post_subject\": \"No XML scope active\", \"username\": \"tspowell-AU\", \"post_text\": \"Is it possible in ECL using the Template Language, and FUNCTIONMACRO to generate ECL code that assigns a record structure? I have written code that spray’s a file to the cluster, then reads in the header row and transposes it to one record per variable name. What I’d like to be able to do now is take that record set and generate a RECORD Structure that could be used to read in the entire file. Here is what I've written so far to do the RECORD Structure but the syntax is wrong and I’m not sure what wrong.\\n\\nEXPORT FM_Test(infile,infield) := FUNCTIONMACRO\\n \\n\\t#EXPORTXML(theXMLdata, infile);\\n\\t\\n\\t#DECLARE(outString);\\n\\t#DECLARE(tmpString);\\n\\t\\n\\t#SET(outString, '');\\n\\t#SET(tmpString, '');\\n\\t\\n\\t#FOR(field1)\\n\\t\\t#APPEND(tmpString, field1 + '; \\\\n');\\n\\t#END\\n\\t\\n\\t#APPEND(outString, 'myRec := RECORD ' + %'tmpString'% + ' END;' );\\n\\t\\n\\tRETURN %'outString'% ;\\n\\t\\nENDMACRO;\\n\\nds1 := dataset([{'FirstName'},{'LastName'},{'MiddleName'}],{STRING1 field1});\\n\\nFM_Test(ds1,field1);\\n\\nds2 := DATASET('~test::data::test001',myRec,CSV(HEADING(2)));\\n\\nOUTPUT(ds2,NAMED('ds2'))
\\n\\nError: No XML scope active (3, 23), 2164, \\nError: While expanding macro fm_test (21, 20), 2164, \\nError: Unknown identifier "myRec" (23, 39), 2167,\\n\\nAm I heading in the right direction?\\n\\nThanks.\\nTimothy\", \"post_time\": \"2015-05-20 14:28:43\" },\n\t{ \"post_id\": 7652, \"topic_id\": 1734, \"forum_id\": 8, \"post_subject\": \"Re: Using SELF on Right hand side.\", \"username\": \"ghalliday\", \"post_text\": \"Richard is right, but I'll try and and expand on what he says:\\n\\nIf you have some code:\\n\\n\\noutlayout txfm(inlayout le) := TRANSFORM\\n SELF.dtfirstseen := STD.Date.Today();\\n SELF.dtlastseen := SELF.dtfirstseen;\\n SELF := [];\\nEND;\\n
\\nThen that is translated at parse time (before the system starts generating any code, or processing the ECL) to the following form:\\n\\n\\noutlayout txfm(inlayout le) := TRANSFORM\\n TheDate := STD.Date.Today();\\n SELF.dtfirstseen := TheDate;\\n SELF.dtlastseen := TheDate;\\n SELF := [];\\nEND;\\n
\\ni.e., those two pieces of code will be processed identically.\\n\\nIf you use SELF.XXX on the right hand side of an assignment, then the transform must have already had an assignment to that field. Note: It doesn't matter which order the fields are in the target layout - dtfirstseen could follow dtlastseen.\\n\\nThe code generated to implement the transform may evaluate the expressions in any order (and will often evaluate them in a different order from the order in the transform).\\n\\nI would use whichever syntax seems most natural - but bear in the back of your mind that using SELF.x actually means "use the value that is assigned to SELF.x".\\n\\nA couple of random notes:\\n\\n\\n
\", \"post_time\": \"2015-05-26 10:17:46\" },\n\t{ \"post_id\": 7640, \"topic_id\": 1734, \"forum_id\": 8, \"post_subject\": \"Re: Using SELF on Right hand side.\", \"username\": \"rtaylor\", \"post_text\": \"Ramesh,Are you suggesting we still have a possibility of getting amigious results if we use SELF on the right in a transform because the assignment sequence at run time need not be the coded sequence?
I'm suggesting the doc may be incorrect in regards to TRANSFORM (but not necessarily incorrect regarding its use in a RECORD structure). \\n\\nI have not had any problem with this previously in TRANSFORMs, and definitions inside a TRANSFORM are positional -- at least in respect to the shortcuts that have to come last. And you do note that my response to Rahul did specify that the definition to the output field has to come before its use in any expression for a subsequent output field, because ECL has a one-pass compiler.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-05-21 20:33:32\" },\n\t{ \"post_id\": 7638, \"topic_id\": 1734, \"forum_id\": 8, \"post_subject\": \"Re: Using SELF on Right hand side.\", \"username\": \"vsramesh\", \"post_text\": \"Thanks a lot Richard,\\nAre you suggesting we still have a possibility of getting amigious results if we use SELF on the right in a transform because the assignment sequence at run time need not be the coded sequence? \\n\\nFor the example posted by Rahul since its a smaller example we have the luxury of defining locals as its a smaller transform but for bigger layouts with hundreds of fields do you still recommend using locals againt using SELF on the right.\\n\\nThanks\\nRamesh.\", \"post_time\": \"2015-05-21 20:16:59\" },\n\t{ \"post_id\": 7637, \"topic_id\": 1734, \"forum_id\": 8, \"post_subject\": \"Re: Using SELF on Right hand side.\", \"username\": \"rtaylor\", \"post_text\": \"Ramesh,\\n\\nAnother way of handling this would be:outlayout txfm(inlayout le) := TRANSFORM\\n SELF.dtfirstseen := STD.Date.Today();\\n SELF.dtlastseen := STD.Date.Today();\\n SELF := [];\\nEND;
or this:outlayout txfm(inlayout le) := TRANSFORM\\n TheDate := STD.Date.Today();\\n SELF.dtfirstseen := TheDate;\\n SELF.dtlastseen := TheDate;\\n SELF := [];\\nEND;
\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-05-21 20:11:38\" },\n\t{ \"post_id\": 7633, \"topic_id\": 1734, \"forum_id\": 8, \"post_subject\": \"Re: Using SELF on Right hand side.\", \"username\": \"vsramesh\", \"post_text\": \"Hi Richard,\\n\\nBased on a previous discussion I remember someone pointing out that when we use SELF in right side in a transform as an expression or an assignment it could give ambiguous results as we dont really know the sequence of assigments within a transform and it could not be in the same as the coding sequence. Even the documentation from the language reference seems to suggest that saying " SELF should not be used on the right hand side of any attribute definition". \\n\\nCould you please throw some more light on this.\\n\\nThanks\\nRamesh.\", \"post_time\": \"2015-05-21 18:24:47\" },\n\t{ \"post_id\": 7629, \"topic_id\": 1734, \"forum_id\": 8, \"post_subject\": \"Re: Using SELF on Right hand side.\", \"username\": \"rtaylor\", \"post_text\": \"Rahul,\\n\\nYes, like this:outlayout txfm(inlayout le) := TRANSFORM\\n SELF.dtfirstseen := STD.Date.Today();\\n SELF.dtlastseen := SELF.dtfirstseen;\\n SELF := [];\\nEND;
Note that before you use it in any expression on the right it must have first been defined itself.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-05-21 14:01:47\" },\n\t{ \"post_id\": 7625, \"topic_id\": 1734, \"forum_id\": 8, \"post_subject\": \"Using SELF on Right hand side.\", \"username\": \"Rahul Jain\", \"post_text\": \"Hi,\\nIs it fine if we use SELF on right side? :\\n\\noutlayout txfm(inlayout le) := TRANSFORM\\n SELF.dtlastseen := SELF.dtfirstseen;\\n SELF := [];\\nEND;\\n\\nDoes it impact performance as we are going to have millions/billions of records?\\nLet me know if any further information required.\", \"post_time\": \"2015-05-21 07:52:23\" },\n\t{ \"post_id\": 7677, \"topic_id\": 1739, \"forum_id\": 8, \"post_subject\": \"Re: Regarding Despray\", \"username\": \"rtaylor\", \"post_text\": \"Pius,In our projects the excel report is too large that we are unable to download in single go.
"Excel output" is obtained by clicking on the XLS button on the result page of ECL Watch. That is not the same thing as a despray. \\n\\nThis feature is meant to allow you to look at any data result in an Excel spreadsheet, therefore whatever kind of data the result is (CSV, flat, XML, JSON) this feature will only produce a CSV file for Excel to open. This is meant to be a testing/debugging feature and not a Production tool. \\n\\nDespray is accomplished by selecting a file on the Logical Files page of ECL Watch and then clicking the DESPRAY button. Despray can absolutely handle any size file, whether that file is a flat file, CSV, XML, or JSON file. What a despray operation does is "stitch together" all the separate file parts into a single physical file (flat, CSV, XML, or JSON) on the Landing Zone you specify in the despray options.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-05-28 13:38:00\" },\n\t{ \"post_id\": 7674, \"topic_id\": 1739, \"forum_id\": 8, \"post_subject\": \"Re: Regarding Despray\", \"username\": \"pius_francis\", \"post_text\": \"Hi Richard,\\nIn our projects the excel report is too large that we are unable to download in single go.So we do the following steps\\n1. use template language to split the excel in to smaller chunk files \\n2. And then use .NET tool to download all files automatically from HPCC system.\\n\\n If we are able to despray the output csv file , then there will be no need for the above steps.\\n\\nThanks,\\nPius\", \"post_time\": \"2015-05-28 10:13:51\" },\n\t{ \"post_id\": 7644, \"topic_id\": 1739, \"forum_id\": 8, \"post_subject\": \"Re: Regarding Despray\", \"username\": \"FanFei\", \"post_text\": \"Thanks for the reply, Richard. It's alright, I was just curious. Because I need to despray a few tables, one of them has different field delimiter from the others. When they are loaded to MySQL, that load process expects the same header field delimiter to parse out the field names. But we can change that load code.\\n\\nThanks again!\\n-Fan\", \"post_time\": \"2015-05-22 14:13:03\" },\n\t{ \"post_id\": 7643, \"topic_id\": 1739, \"forum_id\": 8, \"post_subject\": \"Re: Regarding Despray\", \"username\": \"rtaylor\", \"post_text\": \"Fan,\\n\\nShort answer: No.\\nLonger answer: You could manually construct the file to be that way if you really want to.\\nBigger question: That is a very non-standard way of constructing a field-delimited file, so why would you even want to do this?\\n\\nRichard\", \"post_time\": \"2015-05-22 13:43:20\" },\n\t{ \"post_id\": 7641, \"topic_id\": 1739, \"forum_id\": 8, \"post_subject\": \"Regarding Despray\", \"username\": \"FanFei\", \"post_text\": \"Hi, I need to despray a CSV file from HPCC to landingZone. I'm curious if it's possible to have different field delimiter for Header and data? It means the column headers use comma as the field delimiter, but the data maybe use | as field delimiter. \\n\\nThanks,\\n-Fan\", \"post_time\": \"2015-05-21 21:53:43\" },\n\t{ \"post_id\": 7735, \"topic_id\": 1743, \"forum_id\": 8, \"post_subject\": \"Re: Reading and Writing JSON files in THOR\", \"username\": \"rtaylor\", \"post_text\": \"John,\\n\\nYou'll first want a login to our GitHub -- links are here: https://github.com/hpcc-systems\\n\\nThen you'll need a login to our JIRA, here: http://track.hpccsystems.com\\n\\nWhile you're at it, have you tried just doing this:OUTPUT(WebinarDS,,'FileName.JSON', JSON);
\\nThat works quite well on my training datasets, producing what to my eyes looks lilke a perfectly good JSON file.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-06-04 18:29:34\" },\n\t{ \"post_id\": 7734, \"topic_id\": 1743, \"forum_id\": 8, \"post_subject\": \"Re: Reading and Writing JSON files in THOR\", \"username\": \"jandleman\", \"post_text\": \"This actually worked in my case. It allowed me to add the outer curly brackets on the entire dataset while turning it into JSON:\\n\\nWebinarJsonDS := PROJECT(WebinarDS,TRANSFORM({UNICODE outstr},SELF.outstr := '{' + TOJSON(LEFT) + '}'));\\n\\nI would still like to enter this into JIRA as a bug or enhancement request since proper JSON needs these brackets. Can you tell direct me to the URL for the JIRA environment?\\n\\nThanks,\\nJohn\", \"post_time\": \"2015-06-04 17:43:28\" },\n\t{ \"post_id\": 7732, \"topic_id\": 1743, \"forum_id\": 8, \"post_subject\": \"Re: Reading and Writing JSON files in THOR\", \"username\": \"anthony.fishbeck\", \"post_text\": \"This was done primarily to be consistent with toxml, but it does give you a chance to extend the content of the row. \\n\\nBut it does always require that extra work of opening and closing the row object so as Richard said, feel free to open a JIRA to either discuss different behavior or request a new option to toJSON to open and close the row object automatically.\\n\\nTony\", \"post_time\": \"2015-06-04 16:01:10\" },\n\t{ \"post_id\": 7708, \"topic_id\": 1743, \"forum_id\": 8, \"post_subject\": \"Re: Reading and Writing JSON files in THOR\", \"username\": \"rtaylor\", \"post_text\": \"John,\\n\\nI would just do it this way:namesRec2 := RECORD \\n UNSIGNED2 EmployeeID; \\n STRING10 Firstname; \\n STRING10 Lastname; \\nEND; \\nrec2 := TOJSON(ROW({42,'Fred','Flintstone'},namesRec2)); \\nOUTPUT('{' + rec2 + '}');
But you are welcome to submit a report to JIRA for either a feature request to have an option added, or a bug report if you think it should always add the outside braces. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-06-02 13:07:51\" },\n\t{ \"post_id\": 7707, \"topic_id\": 1743, \"forum_id\": 8, \"post_subject\": \"Re: Reading and Writing JSON files in THOR\", \"username\": \"jandleman\", \"post_text\": \"Thanks Richard! The only problem I see is that TOJSON is not wrapping the whole thing in curly brackets "{}". If I call TOJSON on nested data structures, the inner data has the curly brackets. Example:\\n\\n"nameid": 1, "name": "Kevin", "numrows": 1, "children": {"Row": [{"nameid": 1, "addr": "10 Malt Lane"}]}\\n\\nWhat I should see is this:\\n\\n{"nameid": 1, "name": "Kevin", "numrows": 1, "children": {"Row": [{"nameid": 1, "addr": "10 Malt Lane"}]}}\\n\\nIs there an easy way to add the outer curly brackets?\\n\\nThanks,\\nJohn\", \"post_time\": \"2015-06-02 00:45:29\" },\n\t{ \"post_id\": 7657, \"topic_id\": 1743, \"forum_id\": 8, \"post_subject\": \"Re: Reading and Writing JSON files in THOR\", \"username\": \"rtaylor\", \"post_text\": \"jandleman,\\n\\nYou are in luck ... native JSON support was added to release 5.2 and the docs will be updated with the new JSON syntax for release 5.2.4.\\n\\nBasically, JSON files will be handled just like XML files, so before the official docs arrive you can experiment by reading the OUTPUT,XML docs and just replace XML with JSON in your experimental code. Same thing with DATASET,XML. Because of the structural similarities between XML and JSON, we're using XPATH in the RECORD structures for both.\\n\\nThere are also new TOJSON() and FROMJSON() functions that operate the same way the TOXML() and FROMXML() functions do. Here's an example:
namesRec := RECORD \\n UNSIGNED2 EmployeeID{xpath('EmpID')}; \\n STRING10 Firstname{xpath('FName')}; \\n STRING10 Lastname{xpath('LName')}; \\nEND; \\nx := '{"FName": "George" , "LName": "Jetson", "EmpID": 42}'; \\nrec := FROMJSON(namesRec,x); \\nOUTPUT(rec);\\n\\nnamesRec1 := RECORD \\n UNSIGNED2 EmployeeID{xpath('EmpID')}; \\n STRING10 Firstname{xpath('FName')}; \\n STRING10 Lastname{xpath('LName')}; \\nEND; \\nrec1 := TOJSON(ROW({42,'Fred','Flintstone'},namesRec1)); \\nOUTPUT(rec1); \\n//returns this string: \\n//'"EmpID": 42, "FName": "Fred", "LName": "Flintstone"' \\n\\nnamesRec2 := RECORD \\n UNSIGNED2 EmployeeID; \\n STRING10 Firstname; \\n STRING10 Lastname; \\nEND; \\nrec2 := TOJSON(ROW({42,'Fred','Flintstone'},namesRec2)); \\nOUTPUT(rec2); \\n//returns this string: \\n//'"employeeid": 42, "firstname": "Fred", "lastname": "Flintstone"'\\n\\n//**************************************\\nMyRec := RECORD\\n\\tSTRING1 Value1{xpath('Field1')};\\n\\tSTRING1 Value2{xpath('Field2')};\\nEND;\\nds := DATASET([{'C','G'},\\n {'C','C'},\\n {'A','X'},\\n {'B','G'},\\n {'A','B'}],MyRec);\\nPROJECT(ds,TRANSFORM({UNICODE outstr},SELF.outstr := TOXML(LEFT)));\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t \\nPROJECT(ds,TRANSFORM({UNICODE outstr},SELF.outstr := TOJSON(LEFT)));
\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-05-26 16:59:32\" },\n\t{ \"post_id\": 7651, \"topic_id\": 1743, \"forum_id\": 8, \"post_subject\": \"Reading and Writing JSON files in THOR\", \"username\": \"jandleman\", \"post_text\": \"My company deals with a lot of JSON data, and I would like to be able to both input and output this from ECL in THOR. My first priority is to produce a JSON output dataset. For this task, the input files will all be CSV, but I would like to output nested data records in JSON format that I can load into MongoDB. I can not find any instructions for doing this in the documentation.\\n\\nMy second need, will be to ingest files containing JSON data into an ECL program in THOR. We collect lots of telemetry data as JSON messages. Furthermore, much of this data comes in streams that contain multiple message types, each type having a different schema. These would need to be analyzed and split into multiple streams (data sets) for processing.\\n\\nThanks! \", \"post_time\": \"2015-05-26 07:51:32\" },\n\t{ \"post_id\": 7666, \"topic_id\": 1744, \"forum_id\": 8, \"post_subject\": \"Re: Group Operations\", \"username\": \"rtaylor\", \"post_text\": \"Chen,
If my layout has say 15 fields instead of just an id and score, is there an easier way to copy over all the field values into the table instead of having to pass all the 15 fields as the last parameters to the table call?
That happens in the JOIN, like this:Rec := RECORD\\n UNSIGNED ID;\\n UNSIGNED Score;\\n STRING10 Added;\\nEND;\\n\\n//this dataset generates some random example data\\nds := DATASET(40000,TRANSFORM(Rec, \\n SELF.ID := (RANDOM() % 100)+1,\\n SELF.Score := (RANDOM() % 100)+1 )\\n SELF.Added := INTFORMAT(COUNTER,10,1) )); \\n\\n//this TABLE groups by ID and Score, then filters out groups > 2\\nt := TABLE(ds,{ID,Score,Cnt := COUNT(GROUP)},ID,Score)(Cnt <= 2);\\n\\n//the sort preps the DEDUP to only keep the greatest Score\\ns := SORT(t,ID,-Score);\\nd := DEDUP(s,ID);\\n\\n//then we JOIN back to the original DATASET to get the complete records you want\\nJOIN(ds,d,LEFT.ID=RIGHT.ID and LEFT.Score=RIGHT.Score);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-05-27 13:19:00\" },\n\t{ \"post_id\": 7664, \"topic_id\": 1744, \"forum_id\": 8, \"post_subject\": \"Re: Group Operations\", \"username\": \"chanbchen\", \"post_text\": \"Thank you for the explanation.\\nI have one more question please.\\n\\nIf my layout has say 15 fields instead of just an id and score, is there an easier way to copy over all the field values into the table instead of having to pass all the 15 fields as the last parameters to the table call?\\n\\nThanks\", \"post_time\": \"2015-05-27 09:58:40\" },\n\t{ \"post_id\": 7659, \"topic_id\": 1744, \"forum_id\": 8, \"post_subject\": \"Re: Group Operations\", \"username\": \"rtaylor\", \"post_text\": \"Chen,\\n\\nThis code demonstrates what I think you want: Rec := RECORD\\n UNSIGNED ID;\\n UNSIGNED Score;\\nEND;\\n\\n//this dataset generates some random example data\\nds := DATASET(40000,TRANSFORM(Rec, \\n SELF.ID := (RANDOM() % 100)+1,\\n SELF.Score := (RANDOM() % 100)+1 ));\\t\\n\\n//this TABLE groups by ID and Score, then filters out groups > 2\\nt := TABLE(ds,{ID,Score,Cnt := COUNT(GROUP)},ID,Score)(Cnt <= 2);\\n\\n//the sort preps the DEDUP to only keep the greatest Score\\ns := SORT(t,ID,-Score);\\nd := DEDUP(s,ID);\\n\\n//then we JOIN back to the original DATASET to get the records you want\\nJOIN(ds,d,LEFT.ID=RIGHT.ID and LEFT.Score=RIGHT.Score);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-05-26 18:08:41\" },\n\t{ \"post_id\": 7658, \"topic_id\": 1744, \"forum_id\": 8, \"post_subject\": \"Re: Group Operations\", \"username\": \"chanbchen\", \"post_text\": \"Hi,\\n\\nAny tips please?\\n\\nThanks\", \"post_time\": \"2015-05-26 17:28:50\" },\n\t{ \"post_id\": 7656, \"topic_id\": 1744, \"forum_id\": 8, \"post_subject\": \"Re: Group Operations\", \"username\": \"chanbchen\", \"post_text\": \"Hi Bob,\\n\\nThanks for the reply.\\n\\nI would like to filter only the ids (but I do not want to denormalize the scores) which have either just 1 or 2 records having the maximum score for that id. (If an id has 3 records with the maximum score, I do not want it in my filtered data. And I would like to retain the records in the original layout and I do not want the scores as child datasets for each id.)\\n\\nPlease help me on how I can achieve this.\\n\\n\\nThanks\\nChen\", \"post_time\": \"2015-05-26 15:42:51\" },\n\t{ \"post_id\": 7655, \"topic_id\": 1744, \"forum_id\": 8, \"post_subject\": \"Re: Group Operations\", \"username\": \"bforeman\", \"post_text\": \"Hi Chen,\\n\\nI would like to fetch the records having the maximum score for each id.
\\n\\nIf you want to fetch the entire record, look at ROLLUP to create a recordset with records salvaged with the highest score. There are many examples in the Language Reference and a good one in the ECL Playground, but let me know if you have any questions.\\n\\nIf you were only interested in the actual score values for analytics, a cross-tab report might also be a good option for that.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-05-26 15:33:41\" },\n\t{ \"post_id\": 7654, \"topic_id\": 1744, \"forum_id\": 8, \"post_subject\": \"Group Operations\", \"username\": \"chanbchen\", \"post_text\": \"I have a few thousands of rows each having an id and a score (both UNSIGNED).\\n\\nEach id has several rows with a different score. The scores can be repetetive (there can be multiple records with the same id having the same score).\\n\\nI would like to fetch the records having the maximum score for each id.\\n\\nI am a bit lost with if I can and how to (if I can) use GROUP/HAVING.\\n\\nPlease help.\\n\\nThanks\\nChen\", \"post_time\": \"2015-05-26 14:03:20\" },\n\t{ \"post_id\": 7662, \"topic_id\": 1745, \"forum_id\": 8, \"post_subject\": \"Re: HASH64 - String size\", \"username\": \"balajisampath\", \"post_text\": \"Thanks Richard,\\n\\nMy intention is to know the range. Since I am storing it as string, wrongly phrased the question but got the answer \", \"post_time\": \"2015-05-26 20:05:35\" },\n\t{ \"post_id\": 7661, \"topic_id\": 1745, \"forum_id\": 8, \"post_subject\": \"Re: HASH64 - String size\", \"username\": \"rtaylor\", \"post_text\": \"balajisampath,\\n\\nThe HASH64() function does not return a string, it returns a 64-bit integer -- an UNSIGNED8. \\nRange of values: 0 to 18,446,744,073,709,551,615\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-05-26 19:07:21\" },\n\t{ \"post_id\": 7660, \"topic_id\": 1745, \"forum_id\": 8, \"post_subject\": \"HASH64 - String size\", \"username\": \"balajisampath\", \"post_text\": \"What is the maximum number of characters that would be returned by HASH64 function\", \"post_time\": \"2015-05-26 19:01:42\" },\n\t{ \"post_id\": 7729, \"topic_id\": 1754, \"forum_id\": 8, \"post_subject\": \"Re: Exceeded disk write size limit of 10737418240 while writ\", \"username\": \"bforeman\", \"post_text\": \"I dug into our archives and this was actually reported a couple of years ago, and this was the developer's reply:\\n\\n
The query has run on hThor, because it has been deemed trivial by the code generator.\\nYou can override with #option('pickBestEngine', 0);\\n\\nAnd the disk limit is a hThor defence mechanism, to avoid queries filling up the single target disk it tends to use.\\nThe limit can be raised with:\\n\\n#option('hthorDiskWriteSizeLimit', <size>);\\n\\nWhere rather bizarrely <size> is in bytes (should have at least been in MB's)\\nThe default is 10GB.
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-06-04 15:30:52\" },\n\t{ \"post_id\": 7709, \"topic_id\": 1754, \"forum_id\": 8, \"post_subject\": \"Exceeded disk write size limit of 10737418240 while writing\", \"username\": \"elango_v\", \"post_text\": \"Hi, \\n\\nThere are two datasets with the common record layout. I could output them individually and that is working fine. I just need to combine both of them and need to output it. I have got the error "error: 0: Exceeded disk write size limit of 10737418240 while writing file ~spill::U3H__W20150602-161319 (0, 0), 0, ". below is the code snippet that I used.\\n\\nSo I tried to select only the 10 records, still I get this issue. Please let me know how to resolve this.\\n\\nt_ds:=project(ds,trans(left));\\nt_ds;// output is fine\\nt_ds2:=project(ds1,trans2(left));\\nt_ds2;//output is fine\\nhist_ds:=choosen(t_ds,10)+choosen(t_ds2,10);\\nhist_ds;// error\", \"post_time\": \"2015-06-02 16:36:10\" },\n\t{ \"post_id\": 7747, \"topic_id\": 1757, \"forum_id\": 8, \"post_subject\": \"Re: Impacts of MaxRecordSize when using STD.File.SprayVariab\", \"username\": \"bforeman\", \"post_text\": \"The development team is researching this at this time, sorry for the delay but we should have an answer soon.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-06-11 15:17:04\" },\n\t{ \"post_id\": 7717, \"topic_id\": 1757, \"forum_id\": 8, \"post_subject\": \"Impacts of MaxRecordSize when using STD.File.SprayVariable\", \"username\": \"troche\", \"post_text\": \"Does anyone know what the impact in terms of timing (i.e. how long the spray will take) or overall execution will be when using a much larger size than is required for maxRecordSize when calling STD.File.SprayVariable()? For example, let's say I have:\\n\\n myRecord := RECORD, MAXLENGTH(8192)\\n ...\\n END;\\n\\nbut call:\\n\\n STD.File.SprayVariable(srcIP, srcPath, 1000000, ...)\\n\\nusing a value of 1000000 for the maximum record length. How much overhead in terms of timing does this cause? Is the split point calculation going to be a problem using a number that is much larger than the actual record length? What algorithm is used within DFU that makes use of this value?\", \"post_time\": \"2015-06-02 20:15:39\" },\n\t{ \"post_id\": 7722, \"topic_id\": 1758, \"forum_id\": 8, \"post_subject\": \"Re: Macros\", \"username\": \"bforeman\", \"post_text\": \"Hi Chen,\\n\\nA FUNCTIONMACRO is an ECL code generator like a MACRO, but because your code is encapsulated in a structure, you can treat it like you can treat any standard ECL Function.\\n\\nUsed in an expression context, it can look something like this:\\n\\nmyresult := myfirstFUNCTIONMACRO(value1, value2);
\\n\\n...and what is RETURNed by the FUNCTIONMACRO will be moved to the myresult definition.\\n\\nHope this helps!\\n\\nBob\", \"post_time\": \"2015-06-03 12:06:35\" },\n\t{ \"post_id\": 7720, \"topic_id\": 1758, \"forum_id\": 8, \"post_subject\": \"Macros\", \"username\": \"chanbchen\", \"post_text\": \"Hi,\\n\\nThe difference between functionmacro and macro is not very clear to me. The documentation says functionmacro can be used in an expression context; but I don't understand it well.\\n\\nPlease help.\\n\\nThanks\\nChen\", \"post_time\": \"2015-06-03 10:40:40\" },\n\t{ \"post_id\": 7743, \"topic_id\": 1760, \"forum_id\": 8, \"post_subject\": \"Re: Dataset Operators\", \"username\": \"JimD\", \"post_text\": \"Thanks for all this! I have added a Jira issue to add this to documentation.\\n\\nhttps://track.hpccsystems.com/browse/HPCC-13696\\n\\nJim\", \"post_time\": \"2015-06-08 18:19:10\" },\n\t{ \"post_id\": 7742, \"topic_id\": 1760, \"forum_id\": 8, \"post_subject\": \"Re: Dataset Operators\", \"username\": \"ghalliday\", \"post_text\": \"Richard loves it when people use undocumented features!\\n\\nSo it can be documented...\\n\\ndataset1 - dataset2\\n\\nis translated into the equivalent code:\\n\\nJOIN(dataset1, dataset2, LEFT = RIGHT, TRANSFORM(LEFT), LEFT ONLY);\\n\\n\\nTo answer your question, D2 is likely to be evaluated more quickly than D1 if C is a simple filter condition. For complex conditions the D1 might be more efficient (although in the future see HPCC-8070 we should avoid recalculating calculating C which would likely make D2 more efficient).\", \"post_time\": \"2015-06-08 11:45:35\" },\n\t{ \"post_id\": 7733, \"topic_id\": 1760, \"forum_id\": 8, \"post_subject\": \"Re: Dataset Operators\", \"username\": \"chanbchen\", \"post_text\": \"I would also like to point out that C1 is a pretty complex condition in my case involving a bunch of REGEXFIND's etc. So I am hesitant to applying the C1 = TRUE filter thinking it would basically be a recalculation of the complex condition on the several million records.\", \"post_time\": \"2015-06-04 16:06:11\" },\n\t{ \"post_id\": 7731, \"topic_id\": 1760, \"forum_id\": 8, \"post_subject\": \"Re: Dataset Operators\", \"username\": \"chanbchen\", \"post_text\": \"\\nTestLayout := RECORD\\n\\tUNSIGNED Num;\\n\\tSTRING Number;\\nEND;\\n\\nA := DATASET([{1, 'ONE'}, {2, 'Two'}, {3, 'Three'}, {4, 'Four'}], TestLayout);\\nB := A(Num > 2);\\n\\nC := A-B;\\n\\nC;\\n\\n
\\n\\nThis is just a sample code.\\n\\nI find it very handy in scenarios as this:\\n\\n1. Say A is the output of operation 1 in my code.\\n2. Say I have a very complex filter condition.\\n3. I need to do another operation (operation 2) only on records of A that fail the complex condition (C1) (say the filtered dataset is B)\\n4. Say the output of operation 2 is C\\n5. I need to merge data from A that pass C1 and C. \\n6. So to get the records from A that pass C1, if I again apply the filter A(C1 = TRUE), that might cause an overhead if there are several million records. In that case A-B is handy.\\n\\n\\nA := operation 1;\\nB := A(C1 = FALSE);\\nC := operation 2 on B;\\n\\nD1 := A - B + C; // easy to use\\nD2 := A(C1 = TRUE) + C; // not sure if this would affect performance\\n
\\n\\nSo, my question is - would D2 be derived faster than D1? Or would D1 be faster?\\nWould you recommend using the - operator?\\n\\n\\nThanks\\nChen\", \"post_time\": \"2015-06-04 16:01:01\" },\n\t{ \"post_id\": 7730, \"topic_id\": 1760, \"forum_id\": 8, \"post_subject\": \"Re: Dataset Operators\", \"username\": \"rtaylor\", \"post_text\": \"Chen,I have tried using it and it works consistently.
Please show me some example code, and describe what it does for you.\\n\\nThanks,\\n\\nRichard\", \"post_time\": \"2015-06-04 15:31:37\" },\n\t{ \"post_id\": 7728, \"topic_id\": 1760, \"forum_id\": 8, \"post_subject\": \"Re: Dataset Operators\", \"username\": \"chanbchen\", \"post_text\": \"I have tried using it and it works consistently. But I don't think there's any mention of it in the documentation. But it's very handy to use.\", \"post_time\": \"2015-06-04 14:34:08\" },\n\t{ \"post_id\": 7727, \"topic_id\": 1760, \"forum_id\": 8, \"post_subject\": \"Re: Dataset Operators\", \"username\": \"rtaylor\", \"post_text\": \"Chen,\\n\\nAFAIK there is no "-" operator for datasets. To remove records from a dataset you simply filter the dataset.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-06-04 13:46:07\" },\n\t{ \"post_id\": 7726, \"topic_id\": 1760, \"forum_id\": 8, \"post_subject\": \"Dataset Operators\", \"username\": \"chanbchen\", \"post_text\": \"Hi,\\n\\nThe ECL documentation talks about the + operator for concatenating 2 datasets. I have tried using the - operator likewise to remove a dataset from within another dataset.\\n\\nIs the - operator reliable? A join could be used to work around the - operator; but - is really simple to use and avoids errors while coding. \\n\\n\\nThanks\\nChen\", \"post_time\": \"2015-06-04 12:56:10\" },\n\t{ \"post_id\": 7853, \"topic_id\": 1766, \"forum_id\": 8, \"post_subject\": \"Re: error C4088: Duplicate definition of PERSIST\", \"username\": \"rtaylor\", \"post_text\": \"Srini,\\n\\nI don't know what Gavin would say to this, but I expect it would be something along the lines of, "Each graph is exactly as complex as it needs to be to accomplish what it must."\\n\\nWith that said, you could certainly try to break the code up into multiple "serial" functions and see how that changes your graph. You could also think about changing the job to several workunits, PERSISTing what needs to be done once, and launching each subsequent job using NOTIFY. \\n\\nIt really depends more on what you're actually doing. Since you wrote a FUNCTION, I have to assume you're calling that FUNCTION multiple times within the same WU -- if not, why bother writing it as a FUNCTION (other than encapsulating code for a Roxie query)?\\n\\nIf you want to discuss it, give me a call this afternoon and we can talk over your exact scenario. You have my number \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-06-29 15:31:44\" },\n\t{ \"post_id\": 7847, \"topic_id\": 1766, \"forum_id\": 8, \"post_subject\": \"Re: error C4088: Duplicate definition of PERSIST\", \"username\": \"omnibuzz\", \"post_text\": \"Richard - You are right. I removed the persist and it did run it as a single graph. \\nThe graph, however, looks too complex to my liking. Is there a way I can keep the persist and fix this error or is this by design?\\nCheers\\nSrini\", \"post_time\": \"2015-06-29 08:51:11\" },\n\t{ \"post_id\": 7766, \"topic_id\": 1766, \"forum_id\": 8, \"post_subject\": \"Re: error C4088: Duplicate definition of PERSIST\", \"username\": \"rtaylor\", \"post_text\": \"Srini,
the subsequent operations are also inside the function
Then I would expect PERSIST to be unnecessary. I would expect the compiler to ensure that it is calculated only once. Have you seen graphs that indicate otherwise?\\n\\nRichard\", \"post_time\": \"2015-06-15 17:58:13\" },\n\t{ \"post_id\": 7765, \"topic_id\": 1766, \"forum_id\": 8, \"post_subject\": \"Re: error C4088: Duplicate definition of PERSIST\", \"username\": \"omnibuzz\", \"post_text\": \"Richard - the subsequent operations are also inside the function.\", \"post_time\": \"2015-06-15 17:25:56\" },\n\t{ \"post_id\": 7763, \"topic_id\": 1766, \"forum_id\": 8, \"post_subject\": \"Re: error C4088: Duplicate definition of PERSIST\", \"username\": \"rtaylor\", \"post_text\": \"Srini,My persisted definition is actually a recordset with a few million records, that I am using in subsequent operations multiple times.
How? Since your PERSISTed definition is inside a FUNCTION structure it should not be visible outside that FUNCTION.\\n\\nRichard\", \"post_time\": \"2015-06-15 15:11:03\" },\n\t{ \"post_id\": 7761, \"topic_id\": 1766, \"forum_id\": 8, \"post_subject\": \"Re: error C4088: Duplicate definition of PERSIST\", \"username\": \"omnibuzz\", \"post_text\": \"Bob - Created a JIRA ticket as suggested. \\nhttps://track.hpccsystems.com/browse/HPCC-13738\\n\\n-Srini\", \"post_time\": \"2015-06-15 14:22:04\" },\n\t{ \"post_id\": 7760, \"topic_id\": 1766, \"forum_id\": 8, \"post_subject\": \"Re: error C4088: Duplicate definition of PERSIST\", \"username\": \"omnibuzz\", \"post_text\": \"Richard - Fair point. That is because the example was contrived. Here is another contrived example to answer your question. \\n\\ntestfn(STRING instr) := FUNCTION\\n outstr := instr + instr : PERSIST('costly::operation');\\n\\n RETURN Std.Str.ToUpperCase(outstr) + Std.Str.ToLowerCase(outstr);\\nEND;\\n\\nSEQUENTIAL(Testfn('first'),TestFn('second'));\\n
\\nMy persisted definition is actually a recordset with a few million records, that I am using in subsequent operations multiple times.\\nCheers\\nSrini\", \"post_time\": \"2015-06-15 13:53:51\" },\n\t{ \"post_id\": 7759, \"topic_id\": 1766, \"forum_id\": 8, \"post_subject\": \"Re: error C4088: Duplicate definition of PERSIST\", \"username\": \"rtaylor\", \"post_text\": \"Srini,\\n\\nEven if it does work it will be recalculating the PERSIST every time the FUNCTION is called, so why bother?\\n\\nRichard\", \"post_time\": \"2015-06-15 13:31:08\" },\n\t{ \"post_id\": 7757, \"topic_id\": 1766, \"forum_id\": 8, \"post_subject\": \"Re: error C4088: Duplicate definition of PERSIST\", \"username\": \"bforeman\", \"post_text\": \"Hi Srini,\\n\\nReading the documentation, this should work. Can you please open an issue in JIRA?\\nThis will ensure that development will get a look at it.\\n\\nThanks!\\n\\nBob\", \"post_time\": \"2015-06-15 13:06:48\" },\n\t{ \"post_id\": 7754, \"topic_id\": 1766, \"forum_id\": 8, \"post_subject\": \"error C4088: Duplicate definition of PERSIST\", \"username\": \"omnibuzz\", \"post_text\": \"When I try to call a function multiple times with different parameters (using sequential) and the function has a persist, it throws error: Warning: (0,0): error C4088: Duplicate definition of PERSIST.\\n\\nHere is a contrived example\\n\\ntestfn(STRING instr) := FUNCTION\\n\\toutstr := instr + instr : PERSIST('this::is::failing');\\n\\tRETURN outstr;\\nEND;\\n\\nSEQUENTIAL(Testfn('first'),TestFn('second'));\\n
\\n\\n-Srini\", \"post_time\": \"2015-06-15 10:47:00\" },\n\t{ \"post_id\": 7776, \"topic_id\": 1770, \"forum_id\": 8, \"post_subject\": \"Re: SELF JOIN\", \"username\": \"chanbchen\", \"post_text\": \"Yes Rich. That's what I was looking for. Thanks for the tip.\", \"post_time\": \"2015-06-16 13:57:41\" },\n\t{ \"post_id\": 7775, \"topic_id\": 1770, \"forum_id\": 8, \"post_subject\": \"Re: SELF JOIN\", \"username\": \"rtaylor\", \"post_text\": \"You mean like this?MyRec := RECORD\\n\\tUNSIGNED ID;\\n\\tSTRING10 F1;\\n\\tSTRING10 F2;\\nEND;\\n\\nds := DATASET([{1,'ABCDE','XYDEF'},{1,'DEFGH','ABCDE'},{1,'WXYZ','RSTUV'},\\n {2,'ABCDE','ABDEF'},\\n {3,'DECDE','ABDEF'},{3,'ABCDE','DEFGH'},\\n {4,'ABCDE','DEFGH'},\\n {4,'XYCDE','ABDEF'}],MyRec);\\n\\t\\t\\t\\t\\t\\t\\t \\nOutrec := RECORD\\t\\n MyRec;\\t\\t\\t\\t\\t\\t \\n STRING10 RF1;\\n STRING10 RF2;\\nEND;\\nOutrec XF(MyRec L, MyRec R) := TRANSFORM\\n SELF.RF1 := R.F1;\\n SELF.RF2 := R.F2;\\n SELF := L;\\nEND;\\n\\t\\t\\t\\t\\t\\t\\t \\nJOIN(ds,ds,LEFT.ID = RIGHT.ID AND LEFT.F1[1..2] = RIGHT.F2[1..2],XF(LEFT,RIGHT));\\t\\t\\t\\t\\t\\t\\t
\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-06-16 13:38:23\" },\n\t{ \"post_id\": 7773, \"topic_id\": 1770, \"forum_id\": 8, \"post_subject\": \"Re: SELF JOIN\", \"username\": \"bforeman\", \"post_text\": \"Hi Chen,\\n\\nShow us some sample input records, and your expected output from those records, and then we can go from there.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-06-16 13:34:52\" },\n\t{ \"post_id\": 7772, \"topic_id\": 1770, \"forum_id\": 8, \"post_subject\": \"Re: SELF JOIN\", \"username\": \"chanbchen\", \"post_text\": \"Hi,\\n\\nThanks for the reply.\\n\\nI think I did not explain the condition clearly.\\n\\nSay there are 5 records for id = 500. I want all records where 1st 2 chars in F2 match the 1st 2 chars of F1 from "any of the 5 records for id=500".\\n\\nThanks\\nChen\", \"post_time\": \"2015-06-16 13:26:42\" },\n\t{ \"post_id\": 7771, \"topic_id\": 1770, \"forum_id\": 8, \"post_subject\": \"Re: SELF JOIN\", \"username\": \"bforeman\", \"post_text\": \"Hi Chen,\\n\\nDid you try a simple filter? \\n\\n
myrecs := myDS(F2{1..2] = F1[1..2]);
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-06-16 13:21:12\" },\n\t{ \"post_id\": 7769, \"topic_id\": 1770, \"forum_id\": 8, \"post_subject\": \"SELF JOIN\", \"username\": \"chanbchen\", \"post_text\": \"Hi,\\n\\nI have a dataset ds with 3 fields Id, F1 and F2. \\n\\nI would like to filter the records in such a way that for each id, I would like to retain only those records where 1st 2 characters in F2 match the 1st 2 characters of any of the F1 values for that id.\\n\\nIs a self join suitable for this? How can I implement this?\\n\\nThanks\\nChen\", \"post_time\": \"2015-06-16 12:43:29\" },\n\t{ \"post_id\": 8072, \"topic_id\": 1774, \"forum_id\": 8, \"post_subject\": \"Re: Parallel Lightweight Joins don't execute in parallel (5.\", \"username\": \"BrianB644\", \"post_text\": \"An update ...\\n\\nI am running the same "certification suite" using 5.2.4 ... and the parallel lightweight join section is executing in parallel. My current slave layout has a lower cpu/slave ratio and is less able to take advantage of this but I'm glad the prior behavior is restored. I'm expecting a much reduced "long-tail effect" even with the current slave layout.\", \"post_time\": \"2015-09-03 12:54:09\" },\n\t{ \"post_id\": 7787, \"topic_id\": 1774, \"forum_id\": 8, \"post_subject\": \"Re: Parallel Lightweight Joins don't execute in parallel (5.\", \"username\": \"BrianB644\", \"post_text\": \"BTW ... generally parallel execution is much better in 5.0.X ... Check out following from regular everyday code ...\\n\\nfor ip in 172.31.31.{159..170}; do echo "----- $ip -----"; ssh hpcc@$ip "top -b -u hpcc -n 1 | grep thorslave_lcr"; done | gawk '$0 !~ /^-/ {print $9;}' | gawk '{sum += $1} END {print NR, sum, sum/NR/100, sum/100.0, 36*12, sum/(36*12)}'
\\n144 43175.9 2.99833 431.759 432 99.9442
\\n\\nIn "english" this says ... 144 thor slaves keeping 432 CPUs 99.9442% busy. I get a quite a bit of this every run.\", \"post_time\": \"2015-06-17 17:43:54\" },\n\t{ \"post_id\": 7785, \"topic_id\": 1774, \"forum_id\": 8, \"post_subject\": \"Parallel Lightweight Joins don't execute in parallel (5.0.X)\", \"username\": \"BrianB644\", \"post_text\": \"In 4.X releases ... ECL that produced graphs containing multiple "parallel" Lightweight Join elements seemed to execute in parallel and could consume a full CPU per lightweight join per slave. The same code running under 5.0.X releases produces similar graphs and, when executing, the lightweight joins progress evenly, but ... in total ... only consume 1 CPU/slave when executing. Is this an expected difference between 4.X and 5.0.X? If so, is there an option to restore the prior behavior?\\n\\nI'm running the same code and same data in a slightly different physical environment ... in particular ... AWS vs an Internal Cloud, single-slave/instance vs. multi-slaves/instance, and slightly more memory per slave (4.5GB vs 4GB) ... in case one of those factors might make a difference.\", \"post_time\": \"2015-06-17 15:35:16\" },\n\t{ \"post_id\": 7836, \"topic_id\": 1784, \"forum_id\": 8, \"post_subject\": \"Re: Optimising a query for Roxie\", \"username\": \"David Dasher\", \"post_text\": \"Thanks Richard\", \"post_time\": \"2015-06-25 15:36:26\" },\n\t{ \"post_id\": 7833, \"topic_id\": 1784, \"forum_id\": 8, \"post_subject\": \"Re: Optimising a query for Roxie\", \"username\": \"rtaylor\", \"post_text\": \"David,\\n\\nGiven that it is a set, you can use the COUNT function to determine the number of elements in the passed set and call the separate functions that way. If you have one, you call the 1-read function, two calls the 2-read function... \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-06-25 14:38:39\" },\n\t{ \"post_id\": 7832, \"topic_id\": 1784, \"forum_id\": 8, \"post_subject\": \"Re: Optimising a query for Roxie\", \"username\": \"David Dasher\", \"post_text\": \"Hi Richard\\n\\nThat makes perfect sense, however, I'm passing the parameters as a set of string so if there is one of three would that not be done in one read anyway? \\n\\nBasically this questionnaire structure has a row per respondent per question, so I might be looking at everyone who has answered Q1 and aggregate the data, or the parameter lets me aggregate the people who answered Q1 and Q2. I have to join the respondents from Q1 and Q2 and aggregate from there. Same for the third parameter. \\n\\nI hope that makes sense.\\n\\nI could really write both ways and compare \\n\\nDavid\", \"post_time\": \"2015-06-25 14:10:35\" },\n\t{ \"post_id\": 7831, \"topic_id\": 1784, \"forum_id\": 8, \"post_subject\": \"Re: Optimising a query for Roxie\", \"username\": \"rtaylor\", \"post_text\": \"David,\\n\\nJust to throw "a spanner in your works"
...\\n\\nSince this is an INDEX that you're reading, the reads themselves should be pretty efficient. And, since you said, "I may need to read from that index 3 times depending on the parameters passed in by the user application" (emphasis added by me), my response would be: "It depends."
\\n\\nIf your first read is all that's needed a significant portion of the time and you can avoid the other two completely in those circumstances, then the three separate reads scenario would be more appropriate.\\n\\nHowever, this is a case where you can both have and eat your cake.
\\n\\nThe way I would handle it would be to write two separate functions: one to handle three separate reads, and another to do it all in one read. Make sure both return exactly the same structured data. Then in your main Roxie query code make the decision which one to call to do the actual work based on the parameters passed each time the query is called. That way you get the best of both methods. \\n\\nIn fact, if there are actually three possibilities (one, two, or three reads) detectable based on the parameters passed, then I would write three FUNCTIONs to do the real work. \\n\\nIn our Roxie courses we structure the Roxie queries in our lab exercises with two FUNCTIONs, both returning the "same" thing, calling whichever is appropriate based on the parameters passed in each individual query.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-06-25 13:43:18\" },\n\t{ \"post_id\": 7829, \"topic_id\": 1784, \"forum_id\": 8, \"post_subject\": \"Re: Optimising a query for Roxie\", \"username\": \"David Dasher\", \"post_text\": \"Excellent, thanks Bob.\\n\\nDavid\", \"post_time\": \"2015-06-25 09:02:58\" },\n\t{ \"post_id\": 7827, \"topic_id\": 1784, \"forum_id\": 8, \"post_subject\": \"Re: Optimising a query for Roxie\", \"username\": \"bforeman\", \"post_text\": \"Hi David,\\n\\nI would probably opt for B. One dataset would definitely be more efficient and the filter should also be optimized properly by the compiler.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-06-24 22:26:07\" },\n\t{ \"post_id\": 7825, \"topic_id\": 1784, \"forum_id\": 8, \"post_subject\": \"Optimising a query for Roxie\", \"username\": \"David Dasher\", \"post_text\": \"Hello all\\n\\nI'm trying to optimise a query ready for Roxie and I'm struggling to find the best solution. \\n\\nI'm reading data from an index (Questionnaire data approx 26 million rows). I may need to read from that index 3 times for depending on the parameters passed in by the user application. I can either:-\\n\\na) Read the data into three different datasets as and when I need them.\\n\\nb) Read all data at once into one dataset and filter it as and when I need it.\\n\\nIs there a best practice or obvious performance benefit from either solution or an option I'm not currently considering?\\n\\nMany thanks\\n\\nDavid\", \"post_time\": \"2015-06-24 21:11:55\" },\n\t{ \"post_id\": 7846, \"topic_id\": 1785, \"forum_id\": 8, \"post_subject\": \"Re: #STORED\", \"username\": \"ravishankar\", \"post_text\": \"Thanks Richard for the clarification.\", \"post_time\": \"2015-06-29 04:53:41\" },\n\t{ \"post_id\": 7841, \"topic_id\": 1785, \"forum_id\": 8, \"post_subject\": \"Re: #STORED\", \"username\": \"rtaylor\", \"post_text\": \"ravishankar,\\n\\nMy opinion is that these are both useful for testing purposes as you're developing code for Roxie queries, but not terribly useful once you go into production.\\n\\n#STORED is useful while developing on Thor to eliminate having to change your code just to pass parameters while testing, allowing you to test the actual code that will eventually end up on Roxie. \\n\\n#CONSTANT is useful once you're ready to start testing on Roxie, allowing you to test without having to enter a bunch of parameter values every time you want to run the query.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-06-26 14:03:25\" },\n\t{ \"post_id\": 7840, \"topic_id\": 1785, \"forum_id\": 8, \"post_subject\": \"Re: #STORED\", \"username\": \"ravishankar\", \"post_text\": \"Thanks a lot Richard and Jim for your response.\\n\\nAre these #STORED and #CONSTANT are used in any testing scenarios ? \\nIs there any specific usecase where we will use these #STORED and #CONSTANT? \\n\\nIt could be really great help, you throw few lights on the same. please\", \"post_time\": \"2015-06-26 06:10:57\" },\n\t{ \"post_id\": 7837, \"topic_id\": 1785, \"forum_id\": 8, \"post_subject\": \"Re: #STORED\", \"username\": \"JimD\", \"post_text\": \"I have added a Jira issue to clarify this point in the documentation.\\n\\nhttps://track.hpccsystems.com/browse/HPCC-13819\\n\\nThanks for your question, ravishankar, and thanks to Richard for answering.\\n\\nJim\", \"post_time\": \"2015-06-25 15:43:22\" },\n\t{ \"post_id\": 7835, \"topic_id\": 1785, \"forum_id\": 8, \"post_subject\": \"Re: #STORED\", \"username\": \"rtaylor\", \"post_text\": \"BTW, replacing your #STORED with #CONSTANT has the effect of removing the STORED definition from the list of parameters that can be passed. You can test it yourself with this code:
#STORED('myname1',10);\\nuserVal := 100 : STORED('myname1');\\nuserVal;\\n\\n#CONSTANT('myname2',100);\\nPersonCount := 0 : STORED('myname2');\\nPersonCount;
\\nWhen run on Thor it produces 10 and 100 as the two results. When published and run as a query, the query takes only one parameter for the "myname1" runtime variable and produces that value as the first result and the second result will always be 100.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-06-25 15:35:30\" },\n\t{ \"post_id\": 7834, \"topic_id\": 1785, \"forum_id\": 8, \"post_subject\": \"Re: #STORED\", \"username\": \"rtaylor\", \"post_text\": \"ravishankar,\\n\\nWhen running interactively in Thor, the #STORED takes effect (it's basically a compile-time tool) and you see its value as the result. \\n\\nBut publishing that code as a query makes the STORED definition (the "myname" runtime variable in the workunit) a receptacle for the passed value. So passing a value to the query at runtime will overwrite any default value already there.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-06-25 15:25:23\" },\n\t{ \"post_id\": 7830, \"topic_id\": 1785, \"forum_id\": 8, \"post_subject\": \"#STORED\", \"username\": \"ravishankar\", \"post_text\": \"#STORED('myname',10);\\nuserVal := 100 : STORED('myname');\\nuserVal;\\n\\nDefinition of #STORED:\\n\\nThe #STORED statement assigns the value to the storedname, overwriting any previous value the stored attribute had contained.\\n\\nClarification:\\n\\n1) Published the above three lines of code as a query and hit that in ESP by giving a \\nvalue 200 and it returns 200. As per definition it should overwrite the value 200 by 10.\\n\\n2) The default value 100 got overwritten by 10 when it is ran in ECL IDE.\\n\\nPoint 1 and 2 is expected behaviour of #STORED, please clarifiy.\\n\\n3) In which usecase/scenario this #STORED will be used.\\n\\n4) In which usecase/scenario this #CONSTANT will be used.\", \"post_time\": \"2015-06-25 09:50:57\" },\n\t{ \"post_id\": 7852, \"topic_id\": 1787, \"forum_id\": 8, \"post_subject\": \"Re: iterate transform error?\", \"username\": \"rtaylor\", \"post_text\": \"KatyChow,\\n\\nI would suggest you first identify exactly which call is causing the syntax error by iteratively commenting out all but one call at a time and doing a syntax check on just that one. \\n\\nWhen you identify exactly which is causing the problem, then tell me and give me the exact syntax error that's occurring, please.\\n\\nRichard\", \"post_time\": \"2015-06-29 15:12:39\" },\n\t{ \"post_id\": 7851, \"topic_id\": 1787, \"forum_id\": 8, \"post_subject\": \"Re: iterate transform error?\", \"username\": \"KatyChow\", \"post_text\": \"Hi Richard,\\n\\nHere are several examples of how I am calling this. \\n\\n\\nAttsandPredShopResponse := PROJECT(AttsandPred, TRANSFORM({recordof(attsandpred), integer2 response}, SELF.response := LEFT.shopped2015flag;, self := left;));\\n\\nfrequency(AttsandPredShopResponse,zip,a1,'zip',response)\\nfrequency(AttsandPredShopResponse,zip4,a2,'zip4',response)\\nfrequency(AttsandPredShopResponse,addresscnt,a3,'addresscnt',response)\\nfrequency(AttsandPredShopResponse,lnamecounter,a4,'lnamecounter',response)\\nfrequency(AttsandPredShopResponse,cntshoppinghh,a5,'cntshoppinghh',response)\\nfrequency(AttsandPredShopResponse,avgshopping,a6,'avgshopping',response)\\nfrequency(AttsandPredShopResponse,sumshopping,a7,'sumshopping',response)\\nfrequency(AttsandPredShopResponse,avgjj2012shopping,a8,'avgjj2012shopping',response)\\nfrequency(AttsandPredShopResponse,sumjj2012shopping,a9,'sumjj2012shopping',response)\\nfrequency(AttsandPredShopResponse,avgjd2012shopping,a10,'avgjd2012shopping',response)\\nfrequency(AttsandPredShopResponse,sumjd2012shopping,a11,'sumjd2012shopping',response)\\nfrequency(AttsandPredShopResponse,avgjj2013shopping,a12,'avgjj2013shopping',response)\\nfrequency(AttsandPredShopResponse,sumjj2013shopping,a13,'sumjj2013shopping',response)\\nfrequency(AttsandPredShopResponse,avgjd2013shopping,a14,'avgjd2013shopping',response)\\nfrequency(AttsandPredShopResponse,sumjd2013shopping,a15,'sumjd2013shopping',response)\\nfrequency(AttsandPredShopResponse,avgjj2014shopping,a16,'avgjj2014shopping',response)\\n
\", \"post_time\": \"2015-06-29 15:03:17\" },\n\t{ \"post_id\": 7850, \"topic_id\": 1787, \"forum_id\": 8, \"post_subject\": \"Re: iterate transform error?\", \"username\": \"rtaylor\", \"post_text\": \"KatyChow,\\n\\nFirst, there is a vast difference between a MACRO and a FUNCTIONMACRO. This code is a MACRO structure, designed to generate ECL at the exact point in your code where it is called, and it is never truly syntax checked until it is used in your code.\\n\\nTherefore, to properly evaluate your MACRO I will need to see your code that calls it, please.\\n\\nRichard\", \"post_time\": \"2015-06-29 15:01:28\" },\n\t{ \"post_id\": 7849, \"topic_id\": 1787, \"forum_id\": 8, \"post_subject\": \"iterate transform error?\", \"username\": \"KatyChow\", \"post_text\": \"Hi I inherited this function macro and when I try to compile my code I get a syntax error at the iterate portion of it. Could someone tell me what is wrong?\\n\\n\\nfrequency(din,indep,dout,name='',analysis):= MACRO\\n#uniquename(tot_cnt);\\nunsigned8 %tot_cnt% :=count(din);\\n\\n#uniquename(tot_good);\\nunsigned8 %tot_good% :=sum(din,analysis);\\n\\n#uniquename(tot_bad);\\nunsigned8 %tot_bad% :=%tot_cnt%-%tot_good%;\\n\\n#uniquename(x);\\n%x% :=distribute(project(din,transform({value:=din.indep,response:=left.response},\\nself.value:=left.indep;self:=left)),hash32(value));\\n\\n#uniquename(y);\\n%y% :=sort(table(%x%,{var := (string100) name; \\n value; \\n\\t cnt := count(group);\\n cnt_pct := count(group)/%tot_cnt%;\\n dep_sum := sum(group,analysis);\\n dep_pct := sum(group,analysis)/%tot_good%;\\n bad_sum := count(group)-sum(group,analysis); \\n bad_pct := (count(group)-sum(group,analysis))/%tot_bad%;\\n },value,local),value);\\n\\n#uniquename(restype);\\n%restype% := record\\n recordof(%y%);\\n unsigned var_rank:=0;\\n real aggpct:=%y%.cnt_pct;\\n unsigned seqnum :=0;\\nend;\\n\\n#uniquename(xpct);\\n%xpct% := project(%y%,transform(%restype%,self.aggpct :=left.cnt_pct;self:=left;));\\n\\n#uniquename(it);\\n%restype% %it%(%restype% L,%restype% R) := transform\\n self.aggpct := l.aggpct+r.aggpct;\\n self.var_rank := truncate(200*self.aggpct);\\n self.seqnum := if(self.var_rank=l.var_rank,l.seqnum+1,1); \\n self :=r;\\nend;\\n\\noutds := sort(table(iterate(%xpct%,%it%(left,right)), \\n {var;var_rank; \\n min_value:=(string10) min(group,value);\\n max_value:=(string10) max(group,value);\\n port := sum(group,cnt);\\n portpct :=sum(group,cnt_pct);\\n goodtotal:=sum(group,dep_sum); \\n goodpct :=sum(group,dep_pct);\\n badtotal:=sum(group,bad_sum); \\n badpct:=sum(group,bad_pct);\\n indexv:=100*sum(group,dep_pct)/sum(group,bad_pct);\\n },var,var_rank),var,var_rank);\\n\\nENDMACRO;\\n
\\n\\nAny advice would help! Thank you!\", \"post_time\": \"2015-06-29 14:55:56\" },\n\t{ \"post_id\": 7856, \"topic_id\": 1788, \"forum_id\": 8, \"post_subject\": \"Re: HPCC Certification\", \"username\": \"vyasshub\", \"post_text\": \"Hi Richard,\\nThanks for update.\", \"post_time\": \"2015-06-30 05:20:31\" },\n\t{ \"post_id\": 7855, \"topic_id\": 1788, \"forum_id\": 8, \"post_subject\": \"Re: HPCC Certification\", \"username\": \"rtaylor\", \"post_text\": \"vyasshub,\\n\\nWe do have courses available, but do not yet have a formal certification program in place.\\n\\nOur courses are described here: http://hpccsystems.com/products-and-services/services/training\\n\\nAnd you can sign up for our scheduled courses here: http://hpccsystems.com/community/training-events/training\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-06-29 18:19:35\" },\n\t{ \"post_id\": 7854, \"topic_id\": 1788, \"forum_id\": 8, \"post_subject\": \"HPCC Certification\", \"username\": \"vyasshub\", \"post_text\": \"Hi All,\\nIs there any certified course available for ECL,THOR AND ROXIE ?\", \"post_time\": \"2015-06-29 18:05:56\" },\n\t{ \"post_id\": 7868, \"topic_id\": 1795, \"forum_id\": 8, \"post_subject\": \"Re: Date format issue\", \"username\": \"rtaylor\", \"post_text\": \"elango_v,\\n\\nTake a look at the STD.Date.FromStringToDate() function. It's not yet documented in the Standard Library Reference, but there are javadoc comments that explain it well. It will basically take any date string and format it into a date_t type (an integer type containing the date in YYYYMMDD format). \\n\\nThe file to look at is in the ecllibrary >> std >> Date file in your repository.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-07-01 13:30:40\" },\n\t{ \"post_id\": 7867, \"topic_id\": 1795, \"forum_id\": 8, \"post_subject\": \"Date format issue\", \"username\": \"elango_v\", \"post_text\": \"Hi Team,\\n\\nI am loading a csv file in HPCC. I am facing below date format issue. In CSV the date field has the values in the format of dd-mmm-yy (01-Jun-15). However I want to store it as yyyymmdd (20150601). When I just load the date from the Csv without any transformation is strangely loading as 01 June 15 (then whole month name is coming there, but in CSV only three letters indicates the month field).\\n\\nPlease advice how to format this date field?\", \"post_time\": \"2015-07-01 10:07:08\" },\n\t{ \"post_id\": 7889, \"topic_id\": 1798, \"forum_id\": 8, \"post_subject\": \"Re: #workunit\", \"username\": \"Anjali\", \"post_text\": \"Hi Jim,\\n\\nI have upgraded my IDE to 5.2.4 version. Now 'Protect' option is working for me.\\nI also got the proper way of using 'Cluster' option.\\n\\nThanks a lot for the help \\n\\nRegards,\\nAnjali\", \"post_time\": \"2015-07-08 06:01:00\" },\n\t{ \"post_id\": 7884, \"topic_id\": 1798, \"forum_id\": 8, \"post_subject\": \"Re: #workunit\", \"username\": \"JimD\", \"post_text\": \"I am using version 5.2.4 of the platform and the IDE\\n\\nAs for the cluster option:\\n\\nThe value to pas should be the Target Cluster name, not the process name. In your example, it should be \\n\\n
#WORKUNIT('cluster','hthor');
\\n\\nI submitted that code with the IDE with the target set to thor.\\nto verify, I looked in the WU XML and it says: \\nclusterName="hthor"
\\n\\nI also verified that the reverse works, too. (trust but verify) \\n\\nIf you don't want to examine the WU XML, you can use this \\n\\nIMPORT STD;\\n#WORKUNIT('cluster','thor');\\nclust := STD.System.Job.Target();\\nOUTPUT(clust);\\n
\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2015-07-07 13:16:54\" },\n\t{ \"post_id\": 7883, \"topic_id\": 1798, \"forum_id\": 8, \"post_subject\": \"Re: #workunit\", \"username\": \"Anjali\", \"post_text\": \"Hi Jim,\\n\\nThank you so much for your help.\\n\\nI could set priority to 10 as in your reply \\n\\nFor the 'Cluster' option, i changed the code as \\n
#WORKUNIT('cluster','mythor');
\\nNow the error disappeared.But the workunit won't run in 'thor' cluster, as the cluster option selected in eclide will override it.Do you have any suggestion for this?\\n\\n'Protect' option still not working for me. Can you share your version details that you used to run the code?\\n\\nThanks,\\nAnjali\", \"post_time\": \"2015-07-07 05:07:42\" },\n\t{ \"post_id\": 7880, \"topic_id\": 1798, \"forum_id\": 8, \"post_subject\": \"Re: #workunit\", \"username\": \"JimD\", \"post_text\": \"I can help you with a couple of these:\\n\\nA Workunit Scope can only be set for an environment where LDAP security has been implemented. A Workunit scope has no other purpose. Therefore, I expect the error you are getting if submitting to an environment that is not LDAP-enabled. However, we could improve that error message so I opened an issue \\n(https://track.hpccsystems.com/browse/HPCC-13870).\\n\\nFor cluster, it is expecting a string value so, it should be:\\n\\n#WORKUNIT('cluster','mythor');
\\n\\nFor protect, your code works fine on my system. I submited the code and then I am unable to delete the WU (unless I first un-protect it manually).\\n\\nFor priority, I used this code:\\n#WORKUNIT('priority',10);
\\n\\nI look at the WU XML and see:\\n<PriorityFlag>10</PriorityFlag>
\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2015-07-06 18:15:17\" },\n\t{ \"post_id\": 7877, \"topic_id\": 1798, \"forum_id\": 8, \"post_subject\": \"#workunit\", \"username\": \"Anjali\", \"post_text\": \"Hi,\\n\\nAs a part of my assignment,i have to explore #workunit statement and i am having troubles with many of it's options.\\n\\n#WORKUNIT( option, value );\\noption : cluster ,protect ,priority ,scope\\n\\nProtect\\nBelow is the code,\\n#WORKUNIT('protect',true); \\noutput('Hello');
\\n\\nHere the code get submitted properly,but i could delete the workunit without any issues.I tried deleting the same from some other owner's account,still there were no issues for deletion.\\n\\nPriority\\nBelow is the code,\\nimport std.system.Workunit as wu;\\n#workunit('priority', 1);\\nwu.workunitlist(Workunit)[1].priority;
\\n\\nHere i am setting the priority and trying to read it.but i am getting a null value returned.\\n\\nScope\\nBelow is the code,\\n#WORKUNIT('scope','NewVal'); \\noutput('Hello');
\\n\\nHere i am getting the error : error C3000: Trying to change workunit scope without security interfaces available
\\n\\nCluster\\nBelow is the code,\\n#WORKUNIT('cluster',mythor); \\noutput('Hello');
\\n\\nHere for any cluster value input, i am getting unknown identifier error
\\n\\n\\nI went through previous posts,and below are some related JIRA,which are already resolved.But i am facing the same issues(version: 4.2.0)\\n\\nhttps://track.hpccsystems.com/browse/HPCC-8585\\nhttps://track.hpccsystems.com/browse/HPCC-8901\\nhttps://track.hpccsystems.com/browse/HPCC-8902\\n\\nCan anyone help me to explore this options properly?\\n\\nThanks,\\nAnjali\", \"post_time\": \"2015-07-06 12:19:10\" },\n\t{ \"post_id\": 7895, \"topic_id\": 1800, \"forum_id\": 8, \"post_subject\": \"Re: ROWDIFF\", \"username\": \"rtaylor\", \"post_text\": \"ksviswa,\\n\\nA FULL ONLY join will always pass a "live" record from one dataset and an empty record from the other. Since your example data has a non-blank/non-zero value in each field, that means ROWDIFF is comparing a real value to blank/zero in its empty record. That means none of the fields are the same. Therefore ROWDIFF is correctly listing all the fields as different.\\n\\nBottom line, using ROWDIFF makes no real sense on a FULL ONLY join. It is most useful on an inner JOIN, for comparison of different versions of the "same" file.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-07-09 15:12:04\" },\n\t{ \"post_id\": 7894, \"topic_id\": 1800, \"forum_id\": 8, \"post_subject\": \"Re: ROWDIFF\", \"username\": \"ksviswa\", \"post_text\": \"Thanks Richard,\\n\\nI was in the assumption that even though the rows are completely different as in FULL JOIN , was expecting only the difference in the specific fields within the data.\\n\\nksviswa\", \"post_time\": \"2015-07-09 15:00:47\" },\n\t{ \"post_id\": 7892, \"topic_id\": 1800, \"forum_id\": 8, \"post_subject\": \"Re: ROWDIFF\", \"username\": \"rtaylor\", \"post_text\": \"ksviswa,\\n\\nIt is working properly in your code, too.\\n\\nIn a FULL ONLY join you are only getting records that have no match in the other dataset, so the entire row is different and you get all the field names from ROWDIFF.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-07-09 07:50:21\" },\n\t{ \"post_id\": 7891, \"topic_id\": 1800, \"forum_id\": 8, \"post_subject\": \"ROWDIFF\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nHow does ROWDIFF work for FULL ONLY joins ?\\n\\n\\nin1rec := {UNSIGNED1 id,String name1,String name2 };\\nin2rec := {UNSIGNED1 id,String name1,String name2 };\\n\\nin1 := DATASET([{1,'Kevin','Halligan'},\\n {2,'Liz','Halligan'},\\n {3,'Elizabeth','Windsor'},\\n {4,'Viswa','vks'}], in1rec);\\nin2 := DATASET([{1,'Kevin','Halligan'},\\n {2,'Liz','H'},\\n {3,'E','Windsor'},\\n {5,'Vis','vks'}], in2rec);\\noutrec := RECORD\\n STRING35 diff1;\\nEND;\\noutrec t1(in1 L, in2 R) := TRANSFORM\\n SELF.diff1 := ROWDIFF(L,R);\\nEND;\\nOUTPUT(JOIN(in1, in2, LEFT.id = RIGHT.id, t1(LEFT,RIGHT),FULL ONLY));\\n
\\n\\nExpected the result to contain only the difference in the field names i.e mainly id and name1 in this scenario but i get all the field names as difference in the result.\\n\\n\\nResult : \\n\\nid,name1,name2 \\nid,name1,name2 \\n
\\n\\nCorrect me if am missing something It works properly for the Inner Joins.\\n\\nksviswa\", \"post_time\": \"2015-07-08 21:22:04\" },\n\t{ \"post_id\": 7910, \"topic_id\": 1804, \"forum_id\": 8, \"post_subject\": \"Caching\", \"username\": \"chanbchen\", \"post_text\": \"Hi,\\n\\nCould you throw some light on the caching details in HPCC please? Are all index reads cached? Are they cached only within a WU? or is cache available across WU's as well? How long would the caching be active? Any details would be helpful.\\n\\nThanks\\nChen\", \"post_time\": \"2015-07-14 14:52:33\" },\n\t{ \"post_id\": 7915, \"topic_id\": 1805, \"forum_id\": 8, \"post_subject\": \"Re: Run time from eclplus dump\", \"username\": \"rtaylor\", \"post_text\": \"Tim,\\n\\nThis looks to me like you either targeted hThor with the query, or hThor "hijacked" it from Thor due to its simplicity. Try it again with a more complex query and see if you get the same thing.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-07-15 09:06:08\" },\n\t{ \"post_id\": 7913, \"topic_id\": 1805, \"forum_id\": 8, \"post_subject\": \"Run time from eclplus dump\", \"username\": \"tlhumphrey2\", \"post_text\": \"I'm using the workunit dump from the command line too, eclplus, to get the thor execution time of the workunit. But, I'm not sure where to find it.\\n\\nA workunit dump has xml objects that look like the following:\\n <Statistic c="hthor"\\n count="1"\\n creator="myeclagent@10.0.0.78"\\n kind="TimeElapsed"\\n max="3151704393"\\n s="section"\\n scope="Process"\\n ts="1436890840309719"\\n unit="ns"\\n value="3151704393"/>\\n </Statistics>
\\nThis one I believe shows the execution time for the whole process, i.e. value="3151704393". But, I don't see one just for the THOR time. Is there one just for the THOR time and if so which one is it?\", \"post_time\": \"2015-07-14 18:11:06\" },\n\t{ \"post_id\": 7920, \"topic_id\": 1806, \"forum_id\": 8, \"post_subject\": \"Re: EncodeRfsQuery Not Working\", \"username\": \"SuganthSelvan\", \"post_text\": \"You saved my Time. Thanks Richard.\", \"post_time\": \"2015-07-15 12:49:14\" },\n\t{ \"post_id\": 7919, \"topic_id\": 1806, \"forum_id\": 8, \"post_subject\": \"Re: EncodeRfsQuery Not Working\", \"username\": \"rtaylor\", \"post_text\": \"The latest doc for the STD.File.EncodeRfsQuery() function says: This function is now considered deprecated in favor of using the EMBED mechanism. It will be removed in Version 6.0.
\\nTherefore, I suggest you not spend any appreciable amount of time on this and try doing what you need to using EMBED.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-07-15 12:45:50\" },\n\t{ \"post_id\": 7918, \"topic_id\": 1806, \"forum_id\": 8, \"post_subject\": \"EncodeRfsQuery Not Working\", \"username\": \"SuganthSelvan\", \"post_text\": \"I am trying to create a sample POC to demonstrate the functionality of EncodeRfsQuery.\\nSo, after refering to the definition of EncodeRfsQuery in ECL reference guide, i tried to use EncodeRfsQuery to connect to Oracle db to get the data. I tried this by installing Oracle db in my local machine and connect to the oracle db from HPCCSystemsVM-5.0.4-1 VMWare which is also running in my local machine (used bridged option).But i couldnt able to connect it. \\n\\nI gave my local machine's ip as the server ip since oracle db is installed in my machine.\\n\\nKindly let me know wat went wrong and also do let me know how exactly we can use EncodeRfsQuery and also which server's ip we need to provide? Is there any other additonal configurations that is required?\\n\\nNote: When i tried to execute the example provided in the ECL reference, i find that it is trying to connect to the dali server.\", \"post_time\": \"2015-07-15 10:31:28\" },\n\t{ \"post_id\": 7929, \"topic_id\": 1807, \"forum_id\": 8, \"post_subject\": \"Re: SetColumnMapping\", \"username\": \"SuganthSelvan\", \"post_text\": \"It was mentioned in the standard file library that "SetColumnMapping" defines how the data in the fields of the file must be transformed between the actual data storage format and the input format used to query that data. This is used by the user interface of the roxie browser.\\n\\nSo, i had set a column mapping (like for ex: "firstname{set(stringlib.StringToUpperCase)}") for a field and tried to query that data using the roxie browser but i find no difference between the normal behaviour ie.., for logical files that has no column mapping and for logical files that has column mappings set as mentioned above.\\n\\nCan you please help me as to how exactly we can apply/use "SetColumnMapping" construct on Roxie browser to query the data.\\n\\nThanks,\\nS.Suganth\", \"post_time\": \"2015-07-20 04:45:23\" },\n\t{ \"post_id\": 7921, \"topic_id\": 1807, \"forum_id\": 8, \"post_subject\": \"SetColumnMapping\", \"username\": \"SuganthSelvan\", \"post_text\": \"Can anyone kindly provide a scenario where SetColumnMapping can be used?\", \"post_time\": \"2015-07-15 14:21:37\" },\n\t{ \"post_id\": 7927, \"topic_id\": 1810, \"forum_id\": 8, \"post_subject\": \"Re: What is Roxie Browser or Data content browser?\", \"username\": \"SuganthSelvan\", \"post_text\": \"Hi Jim,\\nThanks for your response.\\n\\nIt was mentioned in the standard file library that "SetColumnMapping" defines how the data in the fields of the file must be transformed between the actual data storage format and the input format used to query that data. This is used by the user interface of the roxie browser.\\n\\nSo, i had set a column mapping (like for ex: "firstname{set(stringlib.StringToUpperCase)}") for a field and tried to query that data using the roxie browser but i find no difference between the normal behaviour ie.., for logical files that has no column mapping and for logical files that has column mappings set as mentioned above.\\n\\nCan you please help me as to how exactly we can apply/use "SetColumnMapping" construct on Roxie browser to query the data.\\n\\nThanks,\\nS.Suganth\", \"post_time\": \"2015-07-17 04:53:40\" },\n\t{ \"post_id\": 7926, \"topic_id\": 1810, \"forum_id\": 8, \"post_subject\": \"Re: What is Roxie Browser or Data content browser?\", \"username\": \"JimD\", \"post_text\": \"The path to get that functionality has changed with the EclWatch 5\\n\\n1) Locate your superfile in the list of logical files\\n2) Open the Logical File details page (double-click or select and press Open action button)\\n\\nThe sub-files display at the bottom of the details page\\n\\n3) Locate a sub-file and Open it (double-click or select, Open button)\\n4) Select the contents tab\\n5) Use the filter drop menu to enter values to query the data\\n\\nThe results display \\n\\nI also added a Jira issue to update the docs to reflect this information. \\nhttps://track.hpccsystems.com/browse/HPCC-13919\\n\\nThanks for your question!\\nJim\", \"post_time\": \"2015-07-16 13:45:32\" },\n\t{ \"post_id\": 7924, \"topic_id\": 1810, \"forum_id\": 8, \"post_subject\": \"What is Roxie Browser or Data content browser?\", \"username\": \"SuganthSelvan\", \"post_text\": \"Actually what is the Data content browser (Roxie Browser) referred in the "AddFileRelationship" in ECL reference guide?\\n\\nIs it the Roxie's active queries section displayed in the ESP?\", \"post_time\": \"2015-07-16 07:03:32\" },\n\t{ \"post_id\": 7931, \"topic_id\": 1814, \"forum_id\": 8, \"post_subject\": \"Enabling Proxy in HTTPCALL\", \"username\": \"Ramesh Pachamuthu\", \"post_text\": \"Hi,\\n\\nI want to hit google geocoding web-service. I am using HTTPCALL to achieve this. \\nI am using below code.ResLayout := { STRING Status {XPATH('status')}}; \\nurl := 'http://maps.googleapis.com/maps/api/geocode/xml?address=1600+Amphitheatre+Parkway,+Mountain+View,+CA&key=empty';\\nGetGeoCode:= HTTPCALL(Url,'GET','text/xml',ResLayout, XPATH('GeocodeResponse'));\\nGetGeoCode;
\\nWhen I execute this code, I am getting below error.<Error><text>connection failed 74.125.130.95:80</text><url>http://maps.googleapis.com:80/maps/api/geocode/xml?address=1600+Amphitheatre+Parkway,+Mountain+View,+CA&key=empty</url></Error> (in HTTP dataset G1 E2)
\\nI think it could be becuase my hpcc server is not using proxy. Kindly someone help me on how to instruct hpcc to use proxy for HTTPCALL. \\n\\nThanks in advance.\\n- Ramesh\", \"post_time\": \"2015-07-20 09:50:18\" },\n\t{ \"post_id\": 7941, \"topic_id\": 1816, \"forum_id\": 8, \"post_subject\": \"Re: Execution time of a Roxie query\", \"username\": \"kereno\", \"post_text\": \"Unlike for Thor (OLAP queries), Roxie (OLTP) is actually having a great performance advantage compared to other Big Data systems I am benchmarking. Just a side note.\", \"post_time\": \"2015-07-24 18:29:03\" },\n\t{ \"post_id\": 7939, \"topic_id\": 1816, \"forum_id\": 8, \"post_subject\": \"Re: Execution time of a Roxie query\", \"username\": \"kereno\", \"post_text\": \"Thank you Bob! \\nI've usually used Nagios or Ganglia to track the load of the cluster (such as cpu or network load). See this page for an example of our tracking of the clusters at UCI using ganglia:\\nhttps://ganglia.ics.uci.edu/\\n\\nWould that be the only way to time the queries? Is there not a Roxie time field somewhere? I am surprised there is one for Thor and there isn't one for Roxie.. \\n\\nThanks for your help,\\nKeren\", \"post_time\": \"2015-07-23 18:44:50\" },\n\t{ \"post_id\": 7936, \"topic_id\": 1816, \"forum_id\": 8, \"post_subject\": \"Re: Execution time of a Roxie query\", \"username\": \"bforeman\", \"post_text\": \"Hi Keren,\\n\\nI think that the new Ganglia and/or Nagios might give you the stats that you need. You can actually see them in action if you install the latest HPCC VM and click on the Plgd-ins menu option at the top of the screen. The installs for these add-ons is also on the HPCC portal.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-07-23 11:59:51\" },\n\t{ \"post_id\": 7934, \"topic_id\": 1816, \"forum_id\": 8, \"post_subject\": \"Execution time of a Roxie query\", \"username\": \"kereno\", \"post_text\": \"Hello,\\n\\nI am running queries on Roxie (a cluster of ten machines). I am getting my results as expected so all seems to work .\\nSince I am collecting performance measurements, I am interested in the execution time of the query. However I couldn't locate this information on the web interface (ECL watch on port 8010) nor on the web interface where I submit my Roxie queries (port 8002). I would think even a fast real-time query engine has an execution time.. Could you point me to where I can find this information?\\n\\nThank you,\\nKeren Ouaknine\", \"post_time\": \"2015-07-22 17:44:58\" },\n\t{ \"post_id\": 7951, \"topic_id\": 1818, \"forum_id\": 8, \"post_subject\": \"Re: Denormalize form 2 documentation\", \"username\": \"Ramesh Pachamuthu\", \"post_text\": \"Thanks for the update Richard.\\n\\nRegards,\\nRamesh\", \"post_time\": \"2015-07-29 05:40:32\" },\n\t{ \"post_id\": 7948, \"topic_id\": 1818, \"forum_id\": 8, \"post_subject\": \"Re: Denormalize form 2 documentation\", \"username\": \"rtaylor\", \"post_text\": \"Ramesh,\\n\\nIt works because, under the covers DENORMALIZE is really just a specialized form of JOIN designed to create nested child datasets. As such, it shares much of the JOIN codebase (also making many of the JOIN options available for use on DENORMALIZE, although undocumented as such). \\n\\nSo the DENORMALIZE docs define the recommended/common ways of using the function and not all the possible permutations. BTW, it is also possible to create a nested child dataset using ROLLUP. That is, of course, not ROLLUP's primary purpose, and therefore also not explicitly addressed in the docs for ROLLUP.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-07-28 14:28:47\" },\n\t{ \"post_id\": 7946, \"topic_id\": 1818, \"forum_id\": 8, \"post_subject\": \"Re: Denormalize form 2 documentation\", \"username\": \"Ramesh Pachamuthu\", \"post_text\": \"Hi Bob,\\n\\nPlease find below code in which i am going against the requirements of denormalize form-2 as ECLLanguageReference states,\\n\\n
NormRec := RECORD\\nSTRING20 thename;\\nSTRING20 addr;\\nEND;\\nNamesRec := RECORD\\n UNSIGNED1 numRows;\\n STRING20 thename;\\nEND;\\nNamesTable := DATASET([ {0,'Kevin'},{0,'Liz'},\\n {0,'Mr Nobody'},{0,'Anywhere'}],\\n NamesRec); \\nNormAddrs := DATASET([{'Kevin','10 Malt Lane'},\\n{'Liz','10 Malt Lane'},\\n{'Liz','3 The cottages'},\\n{'Anywhere','Here'},\\n{'Anywhere','There'},\\n{'Anywhere','Near'},\\n{'Anywhere','Far'}],NormRec);\\nResultRec := RECORD\\n STRING20 thename;\\n UNSIGNED1 numRows;\\n DATASET({ STRING20 addr }) addresses;\\nEND;\\nResultRec DeNormThem(NamesRec L, DATASET(NormRec) R) := TRANSFORM\\n SELF.NumRows := COUNT(R);\\n SELF.addresses := TABLE( R , { addr } );\\n SELF := L;\\nEND;\\nDeNormedRecs := DENORMALIZE(NamesTable, NormAddrs,\\n LEFT.thename = RIGHT.thename,\\n GROUP,\\n DeNormThem(LEFT,ROWS(RIGHT))); \\nDeNormedRecs;
\\n\\nThe above code executes & gives denormlized result.\\n\\nKindly note,\\n1. In above code record layout left dataset of denormalize function is not as of combined parentrecset and childrecset \\n2. Record layout of the result is not same as of left dataset.\\n\\n\\n\\nRegards,\\nRamesh\", \"post_time\": \"2015-07-28 12:30:37\" },\n\t{ \"post_id\": 7938, \"topic_id\": 1818, \"forum_id\": 8, \"post_subject\": \"Re: Denormalize form 2 documentation\", \"username\": \"bforeman\", \"post_text\": \"It looks like the Form2 example in in step with what the documentation is saying:\\n\\nNamesRec := RECORD\\n UNSIGNED1 numRows;\\n STRING20 thename;\\n DATASET(NormRec) addresses;\\nEND;\\nNamesTable := DATASET([ {0,'Kevin',[]},{0,'Liz',[]},\\n {0,'Mr Nobody',[]},{0,'Anywhere',[]}],\\n NamesRec);\\n\\nNamesRec DeNormThem(NamesRec L, DATASET(NormRec) R) := TRANSFORM\\n SELF.NumRows := COUNT(R);\\n SELF.addresses := R;\\n SELF := L;\\nEND;\\nDeNormedRecs := DENORMALIZE(NamesTable, NormAddrs,\\n LEFT.thename = RIGHT.thename,\\n GROUP,\\n DeNormThem(LEFT,ROWS(RIGHT)));
\\n\\nNamesTable is conforming to the requirements of Step 1 that you mention, and the NamesRec is accurate for what you are describing in Step 2. \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-07-23 12:47:58\" },\n\t{ \"post_id\": 7937, \"topic_id\": 1818, \"forum_id\": 8, \"post_subject\": \"Denormalize form 2 documentation\", \"username\": \"Ramesh Pachamuthu\", \"post_text\": \"Hi,\\n\\nFor Denormalize form 2 ECLLaguageReference states that,\\n1. it is necessary for left record to be same format as the combined parentrecset and childrecset (the resulting de-normalized record structure)\\n2. Result of the transform function must be a record set of the same format as\\nthe LEFT record.\\n\\nBut I think, it is not necessary because in denormalize form-2 we are not using any rows of result in the transform function.\\n\\nShould we need to update ECLLaguageReference?\\n\\nThanks,\\nRamesh\", \"post_time\": \"2015-07-23 12:15:53\" },\n\t{ \"post_id\": 7981, \"topic_id\": 1826, \"forum_id\": 8, \"post_subject\": \"Re: HPCC-JDBC Driver-Issue in using alias names for Outer Jo\", \"username\": \"rodrigo.pastrana\", \"post_text\": \"Sathish, I believe I recreated the issue, take a look:\\nhttps://track.hpccsystems.com/browse/WSSQL-115\", \"post_time\": \"2015-08-04 13:25:43\" },\n\t{ \"post_id\": 7975, \"topic_id\": 1826, \"forum_id\": 8, \"post_subject\": \"Re: HPCC-JDBC Driver-Issue in using alias names for Outer Jo\", \"username\": \"rodrigo.pastrana\", \"post_text\": \"Hi Sathish, I couldn't recreate it with the aliases. I wonder if this has something to do with the actual aliases you used, or with the field names of your file.\\n\\nLet's open an official bug report so we can track this on Jira: https://track.hpccsystems.com/browse/JDBC/\\n\\nIf possible please provide as much specific information so I can recreate. Thanks.\", \"post_time\": \"2015-07-31 19:28:47\" },\n\t{ \"post_id\": 7971, \"topic_id\": 1826, \"forum_id\": 8, \"post_subject\": \"Re: HPCC-JDBC Driver-Issue in using alias names for Outer Jo\", \"username\": \"sathishsks\", \"post_text\": \"Hi Rodrigo,\\nThanks for the response.\\n\\nOuter join without alias names using 'ON' clause is working fine for me as well.\\nThe issue occurs only when I use alias names using 'AS' clause(while referring the fields).\\ni have reframed the SQL query you have shared with alias names.\\nCan you please execute this query in your end and let me know the results?\\n\\nSELECT T1.personid,T2.name\\nfrom progguide::exampledata::accounts AS T1 \\nouter join regress::multi::book AS T2\\non T1.personid = T2.id;\\n\\nThanks,\\nSathish.\", \"post_time\": \"2015-07-31 12:14:54\" },\n\t{ \"post_id\": 7963, \"topic_id\": 1826, \"forum_id\": 8, \"post_subject\": \"Re: HPCC-JDBC Driver-Issue in using alias names for Outer Jo\", \"username\": \"sathishsks\", \"post_text\": \"Hi Rodrigo,\\nThanks for the response.\\nThe format of the query which you have provided is working fine for me also. So there is no problem with "ON" clause.\\nThe issue occurs only when I use alias names using "AS" clause in the OUTER JOIN query.\\nI have reframed your query with alias names. \\nCan you please execute the below query at your end and let me know your results?\\n\\nSELECT T1.personid,T2.name \\nfrom progguide::exampledata::accounts as T1 \\nouter join regress::multi::book as T2\\non T1.personid = T2.id;\\n\\nThanks,\\nSathish.\", \"post_time\": \"2015-07-30 13:17:15\" },\n\t{ \"post_id\": 7956, \"topic_id\": 1826, \"forum_id\": 8, \"post_subject\": \"Re: HPCC-JDBC Driver-Issue in using alias names for Outer Jo\", \"username\": \"rodrigo.pastrana\", \"post_text\": \"Hi Sathish, thanks for letting us know about this issue.\\nI attempted to recreate the issue on HPCC 5.4.0 and WsSQL 5.4.0 (not yet released), but was not able to. See the query below, let me know if you can spot a difference between my query and your query which caused the issue.\\nSELECT progguide::exampledata::accounts.personid,\\nregress::multi::book.name \\nfrom progguide::exampledata::accounts \\nouter join regress::multi::book \\non progguide::exampledata::accounts.personid = regress::multi::book.id;
\\n\\nThe Error reported indicates that the generated code used the "ALL" flag in the Outer Join. The ALL flag is only used when the ON clause is either missing, or it does not provide an "Equality condition".\\n\\nSo I'm interested to look at the exact "ON" clause you used, please send me so I can investigate why the system determined it did not contain an equality condition in the non-aliased case. Thanks.\", \"post_time\": \"2015-07-29 14:39:56\" },\n\t{ \"post_id\": 7952, \"topic_id\": 1826, \"forum_id\": 8, \"post_subject\": \"HPCC-JDBC Driver-Issue in using alias names for Outer Join\", \"username\": \"sathishsks\", \"post_text\": \"Hi All,\\nI am currently exploring HPCC-JDBC Driver 0.2.6 beta Version.I am using HPCC 5.2.4-1 Image and Squirrel SQL client 3.4.0\\n\\nI tried to use alias names for the logical filenames while using JOIN queries.It works fine for inner join whereas it throws error for outer join.\\nBut without alias names to refer fieldnames, both Inner & Outer joins work fine.\\n\\nSample Query for Inner join operation\\nWithout Aliasnames : working fine\\nSelect training::DS1.field2, training::DS2.field3 from training::DS1 inner join training::DS2 on training::DS1.field1=training::DS2.field1\\nWith aliasnames : working fine\\nSelect t1.field2,t2.field3 from training::DS1 as t1 inner join training::DS2 as t2 on t1.field1 = t2.field1\\n\\nSample Query for Outer join operation\\nWithout Aliasnames : working fine\\nSelect training::DS1.field2, training::DS2.field3 from training::DS1 outer join training::DS2 on training::DS1.field1=training::DS2.field1\\nWith aliasnames : Throws following error\\nSelect t1.field2,t2.field3 from training::DS1 as t1 outer join training::DS2 as t2 on t1.field1 = t2.field1\\n\\nError: java.lang.Exception: HPCCJDBC: Error in response: ' JOIN(ALL) only supports INNER, LEFT OUTER, and LEFT ONLY joins'\\nSQLState: null\\nErrorCode: 0\\n\\nIn the documentation, it has been mentioned that it supports inner,outer joins only. But it conflicts with the error message I received.\\nCan someone please help me to use alias names for outer joins?\\n\\nThanks,\\nSathish\", \"post_time\": \"2015-07-29 12:22:02\" },\n\t{ \"post_id\": 9632, \"topic_id\": 1832, \"forum_id\": 8, \"post_subject\": \"Re: Wildcards in MonitorLogicalFileName\", \"username\": \"rtaylor\", \"post_text\": \"Here's some example code to give you the general idea:IMPORT STD;\\n\\nMyRec := STD.File.FsLogicalFileInfoRecord; \\nNewFileListPattern := 'RTTEST::*file?';\\nTimedFileListName := 'RTTEST::TimedFileListTest_';\\nNow := (STRING8)STD.Date.Today() + '_' + (STRING6)STD.Date.CurrentTime();\\n\\nTimedFileList := NOTHOR(STD.File.LogicalFileList(TimedFileListName + '*',TRUE,FALSE));\\nLastRecNum := COUNT(TimedFileList);\\n\\nds_name := TimedFileList[LastRecNum].name;\\n\\nds := DATASET('~' + ds_name,MyRec,FLAT); //latest list of previously processed files\\n// ds;\\nNewFileList := NOTHOR(STD.File.LogicalFileList(NewFileListPattern,TRUE,FALSE));\\n// NewFileList;\\nOUTPUT(NewFileList,,'~' + TimedFileListName + Now); //Write current list to disk\\n\\nRecsToProcess := JOIN(NewFileList,ds,LEFT.name=RIGHT.name,LEFT ONLY);\\n\\n//Process the ones you need to:\\nRecsToProcess;\\n
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-05-11 15:29:03\" },\n\t{ \"post_id\": 9630, \"topic_id\": 1832, \"forum_id\": 8, \"post_subject\": \"Re: Wildcards in MonitorLogicalFileName\", \"username\": \"rtaylor\", \"post_text\": \"That's the thing. This environment doesn't use a landing zone. It is sprayed from a completely different technology. So the files only exist as a logical file within HPCC.
So, what IS doing the file spray? DFUplus.exe? Or is it one of the new streaming data services (like Kafka)? Or ... ? AND, is that tool just putting the data on the Thor disks, or is it also updating the DFU?\\n\\nIF the DFU knows about the new file, then I could envision trying something like creating a CRON job (using the WHEN workflow service) to use the STD.File.LogicalFileList() function to get the list of data files that match your pattern, then compare that list against a list of previously processed files and process the new ones, then write a new list of all the processed files for the next periodic instance to run against.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-05-11 14:54:39\" },\n\t{ \"post_id\": 9628, \"topic_id\": 1832, \"forum_id\": 8, \"post_subject\": \"Re: Wildcards in MonitorLogicalFileName\", \"username\": \"JimD\", \"post_text\": \"Have you tried using the IP of one of the Thor slaves? (and the location of the data file parts). You can see this information in ECL Watch. For example, /var/lib/HPCCSystems/hpcc-data/thor/progguide/exampledata \\n\\nThe MonitorFile function is monitoring disk files not logical file entries. This would trigger the event when one physical file part exists which is a good indication that the logical file exists.\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2016-05-11 12:01:56\" },\n\t{ \"post_id\": 9626, \"topic_id\": 1832, \"forum_id\": 8, \"post_subject\": \"Re: Wildcards in MonitorLogicalFileName\", \"username\": \"micevepay\", \"post_text\": \"That's the thing. This environment doesn't use a landing zone. It is sprayed from a completely different technology. So the files only exist as a logical file within HPCC.\", \"post_time\": \"2016-05-10 23:17:16\" },\n\t{ \"post_id\": 9624, \"topic_id\": 1832, \"forum_id\": 8, \"post_subject\": \"Re: Wildcards in MonitorLogicalFileName\", \"username\": \"rtaylor\", \"post_text\": \"OK, I just tried this code on 5.6.2-1:IMPORT STD;\\nSTD.File.MonitorFile('Found_A_File','10.173.248.7','RTTEST::*',TRUE);
That code passes syntax check AND successfully launches a DFU workunit.\\n\\nThe IP parameter must be the IP of the Landing Zone (DropZone) that you want to monitor and not the Dali IP. As the docs state about the IP parameter: "Optional. A null-terminated string containing the ip address for the file to monitor. This is typically a landing zone. This may be omitted only if the filename parameter contains a complete URL."\\n\\nHTH,\\n\\nRichard \", \"post_time\": \"2016-05-10 18:16:33\" },\n\t{ \"post_id\": 9620, \"topic_id\": 1832, \"forum_id\": 8, \"post_subject\": \"Re: Wildcards in MonitorLogicalFileName\", \"username\": \"micevepay\", \"post_text\": \"So, I am trying to monitor the logical files being sprayed from an external process/service outside of the landing zone (HPCC infrastructure).\\n\\nI tried switching to STD.File.MonitorFile and:\\n\\n-> 5.6.0-1 says
System error: -1: Graph[1], Wildcards not allowed in filename (thor::test::*)
\\n\\nThis is the version that is in production and can't necessarily be rolled back.\\n\\n-> Tried on 5.4.2-1 and got System error: 0: DFUServer Error Failed: DFUWU: cannot determine file part directory for ~thor::test::*
\\n\\n-> Should I be using the external facing IP address or the internal Dali IP address? Both give the same errors.\", \"post_time\": \"2016-05-10 15:36:01\" },\n\t{ \"post_id\": 9616, \"topic_id\": 1832, \"forum_id\": 8, \"post_subject\": \"Re: Wildcards in MonitorLogicalFileName\", \"username\": \"micevepay\", \"post_text\": \"The thing is. I want to process each new tblfield table that gets sprayed to the cluster. The name of the file is not know ahead of time. Just that the filename will end with tblfield. Though tblfield is one of a few hundred (or thousand) that filename ending that I want to process.\\n\\nEDIT: Just seen your above comment. Let me try that out.\", \"post_time\": \"2016-05-10 13:25:27\" },\n\t{ \"post_id\": 9614, \"topic_id\": 1832, \"forum_id\": 8, \"post_subject\": \"Re: Wildcards in MonitorLogicalFileName\", \"username\": \"rtaylor\", \"post_text\": \"What release are you using?\\n\\nThis code syntax checks for me on 5.4.2-1:IMPORT STD;\\nSTD.File.MonitorFile('Found_A_File','10.173.248.1','~RTTEST::*',TRUE);
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-05-10 13:17:40\" },\n\t{ \"post_id\": 9612, \"topic_id\": 1832, \"forum_id\": 8, \"post_subject\": \"Re: Wildcards in MonitorLogicalFileName\", \"username\": \"micevepay\", \"post_text\": \"I tried using the wildcards and get an error.\\n\\nSystem error: 0: DFUServer Error Failed: Wildcards not allowed in filename (thor::*tblfield)
\", \"post_time\": \"2016-05-10 12:20:28\" },\n\t{ \"post_id\": 9608, \"topic_id\": 1832, \"forum_id\": 8, \"post_subject\": \"Re: Wildcards in MonitorLogicalFileName\", \"username\": \"rtaylor\", \"post_text\": \"micevepay,\\n\\nTry using MonitorFile instead -- its docs still have the wildcards listed.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-05-09 18:11:28\" },\n\t{ \"post_id\": 9604, \"topic_id\": 1832, \"forum_id\": 8, \"post_subject\": \"Re: Wildcards in MonitorLogicalFileName\", \"username\": \"micevepay\", \"post_text\": \"So that's what I've been looking at. The docs and tutorials. There is no information as to how to handle to the situation. Here it is:\\n\\nI have hundreds of different table types. These tables are sprayed to the cluster with unique endings\\n\\nmicevepay::in::someid_tbl1\\nmicevepay::in::someid_tbl1\\nmicevepay::in::someid_tbl1\\nmicevepay::in::someid_tbl2\\nmicevepay::in::someid_tbl2\\nmicevepay::in::someid_tbl3
\\n\\nsomeid = a varying unique id.\\n\\nI want to be able to monitor all files coming in based on the table type (tbl1,tbl2,etc.) and perform an action based on that type of table. As the originator of this post stated, the docs (since removed) and video tutorial (not removed) stated at one time we could use the asterisk. Now that we know this is no longer true, how do I monitor these logical file types *tbl1, *tbl2, *tbl3, etc?\", \"post_time\": \"2016-05-09 15:24:17\" },\n\t{ \"post_id\": 9602, \"topic_id\": 1832, \"forum_id\": 8, \"post_subject\": \"Re: Wildcards in MonitorLogicalFileName\", \"username\": \"rtaylor\", \"post_text\": \"micevepay,\\n\\nTake a look at the WHEN workflow service docs -- that's what you use to run a job that waits for an event to happen. The NOTIFY function triggers the event and can pass the information needed by the waiting job as to which file to work with *this time*\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-05-06 18:58:24\" },\n\t{ \"post_id\": 9600, \"topic_id\": 1832, \"forum_id\": 8, \"post_subject\": \"Re: Wildcards in MonitorLogicalFileName\", \"username\": \"micevepay\", \"post_text\": \"How does that work if you are waiting for a file to be sprayed? I'm confused as to how you would have a workunit execute the creation in such a case.\", \"post_time\": \"2016-05-06 18:48:18\" },\n\t{ \"post_id\": 7988, \"topic_id\": 1832, \"forum_id\": 8, \"post_subject\": \"Re: Wildcards in MonitorLogicalFileName\", \"username\": \"rtaylor\", \"post_text\": \"ksviswa,\\n\\nI would simply have the workunit that creates the files you're interested in trigger the event and pass the specific name of the file created as an EVENTEXTRA value, something like this:\\n\\nNOTIFY('MyEvent',\\n '<Event>' + \\n '<FileName>' + TRIM(TheFileName) + '</FileName>' + \\n '</Event>');
\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-08-05 10:07:38\" },\n\t{ \"post_id\": 7986, \"topic_id\": 1832, \"forum_id\": 8, \"post_subject\": \"Re: Wildcards in MonitorLogicalFileName\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nThanks a lot Richard and Jim.\\n\\nHow do we handle such scenarios then? In case i need to trigger an event based on a wild card pattern for a list of logical file names once they are created.\\n\\nI may not know the exact file names as they are created dynamically.\\n\\nRegards,\\nksviswa\", \"post_time\": \"2015-08-04 19:28:15\" },\n\t{ \"post_id\": 7977, \"topic_id\": 1832, \"forum_id\": 8, \"post_subject\": \"Re: Wildcards in MonitorLogicalFileName\", \"username\": \"JimD\", \"post_text\": \"The typo in documentation was fixed this issue:\\n\\nhttps://track.hpccsystems.com/browse/HPCC-13579\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2015-08-03 13:21:55\" },\n\t{ \"post_id\": 7974, \"topic_id\": 1832, \"forum_id\": 8, \"post_subject\": \"Re: Wildcards in MonitorLogicalFileName\", \"username\": \"rtaylor\", \"post_text\": \"ksviswa,\\n\\nYou're not just using that wildcarded name in the MonitorLogicalFileName function, but also the FileExists and DeleteLogicalFile functions, neithr of which is documented to allow wildcards in their filename parameters.\\n\\nAnd, in my simple test:IMPORT STD;\\nSTD.File.MonitorLogicalFileName('Found_A_File','~RTTEST::*');
I also get the "wildcards not allowed" error when I try to run the job, so I suspect that the docs are simply incorrect (probably a copy/paste error from the MonitorFile function doc which DOES allow wildcards in its filename parameter).\\n\\nTypo reported: https://track.hpccsystems.com/browse/HPCC-14009\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-07-31 15:57:29\" },\n\t{ \"post_id\": 7973, \"topic_id\": 1832, \"forum_id\": 8, \"post_subject\": \"Wildcards in MonitorLogicalFileName\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nHow do we use wildcards in MonitorLogicalFileName ?\\n\\nThe document says , wild cards are supported in the file name , get the following error :\\n\\n\\nError: System error: -1: Wildcards not allowed in filename (thor::viswa::*::data) (0, 0), -1, \\n
\\n\\nEx:\\n\\n\\nEventName1 := 'MyFileEvent';\\nFileName1 := '~thor::viswa::*::data';\\nIF (STD.File.FileExists(FileName1),\\n STD.File.DeleteLogicalFile(FileName1));\\nSTD.File.MonitorLogicalFileName(EventName1,FileName1);\\nOUTPUT('File Created') : WHEN(EVENT(EventName1,'*'),COUNT(1));\\n\\nFileName := '~thor::viswa::test::data';\\nrec := RECORD\\n STRING10 key;\\n STRING10 val;\\nEND;\\nafile := DATASET([{ 'A', '0'}], rec);\\nOUTPUT(afile,,FileName);\\n\\n
\\n\\nNot sure if its the right usage or am missing something.\\n\\nHPCC Version : 5.2.4-1\\n\\nRegards,\\nksviswa\", \"post_time\": \"2015-07-31 15:35:33\" },\n\t{ \"post_id\": 7984, \"topic_id\": 1833, \"forum_id\": 8, \"post_subject\": \"Re: Debug\", \"username\": \"chanbchen\", \"post_text\": \"Hi,\\n\\nI need to print out datasets as part of my debug. Would you be able to suggest anything appropriate for this?\\n\\n- Chen\", \"post_time\": \"2015-08-04 15:48:54\" },\n\t{ \"post_id\": 7983, \"topic_id\": 1833, \"forum_id\": 8, \"post_subject\": \"Re: Debug\", \"username\": \"rtaylor\", \"post_text\": \"Chen,\\n\\nI suggest you take a look at ASSERT if it's just for debugging and you just need to put out a message (which could contain scalar values from your logic but not recordsets).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-08-04 15:43:48\" },\n\t{ \"post_id\": 7982, \"topic_id\": 1833, \"forum_id\": 8, \"post_subject\": \"Debug\", \"username\": \"chanbchen\", \"post_text\": \"Hi,\\n\\nI need to run a bunch of OUTPUT statements only when a flag is true. I would need this for debug purpose. Was wondering what would be the best way to implement this?\\n\\nThanks\\nChen\", \"post_time\": \"2015-08-04 14:29:35\" },\n\t{ \"post_id\": 8246, \"topic_id\": 1836, \"forum_id\": 8, \"post_subject\": \"Re: Combinations\", \"username\": \"kev77log\", \"post_text\": \"Chen,\\n\\nRichard's earlier example required the respective strings to not be present as substrings of the other strings that are being combined (as it used Str.Find = 0 as the JOIN criterion). \\n\\nHere is a slight adjustment to the earlier code -- it assigns unique sequence numbers to the original strings in the set to be combined, then collects the sequence numbers as it is building up the strings, and uses comparisons of the sequence numbers in the original set with the collected set to determine what has yet to be combined.\\n\\nKevin\\n\\n\\nIMPORT STD;\\nComboStr(SET OF STRING SetStrs) := FUNCTION\\n StrCnt := COUNT(SetStrs);\\n\\n Rec := {STRING line};\\n PreppedRec := {Rec, SET OF UNSIGNED2 seqs};\\n\\n StartDS := DATASET(SetStrs, Rec);\\n PreppedRec Prep(Rec L, UNSIGNED ctr) := TRANSFORM\\n SELF.seqs := [ctr];\\n SELF := L;\\n END;\\t\\n PreppedDS := PROJECT(StartDS, Prep(LEFT, COUNTER));\\n\\t\\n PreppedRec XF(PreppedRec L, PreppedRec R) := TRANSFORM\\n SELF.Line := L.Line + R.Line;\\n SELF.seqs := L.seqs + R.seqs;\\n END; \\n\\n Joins(DATASET(PreppedRec) LDS) := \\n JOIN(LDS,PreppedDS,\\n RIGHT.seqs[1] NOT IN LEFT.seqs, \\n XF(LEFT,RIGHT),ALL);\\n \\n result := LOOP(PreppedDS,StrCnt-1,Joins(ROWS(LEFT)));\\n\\t\\n RETURN PROJECT(result, Rec);\\nEND;\\n\\nds2 := ComboStr(['s1','s2','s3','s10']);\\nOUTPUT(ds2,NAMED('LoopJoinMethod'));\\n
\", \"post_time\": \"2015-10-06 00:14:30\" },\n\t{ \"post_id\": 8013, \"topic_id\": 1836, \"forum_id\": 8, \"post_subject\": \"Re: Combinations\", \"username\": \"rtaylor\", \"post_text\": \"Chen,\\n\\nFinally, here is the definitive best way to accomplish this task with any number of strings:IMPORT STD;\\nComboStr(SET OF STRING SetStrs) := FUNCTION\\n StrCnt := COUNT(SetStrs);\\n Rec := {STRING line};\\n StartDS := DATASET(SetStrs,Rec);\\n Rec XF(Rec L, Rec R) := TRANSFORM\\n SELF.Line := L.Line + R.Line;\\n END;\\t\\n Joins(DATASET(Rec) LDS) := \\n JOIN(LDS,StartDS,\\n STD.Str.Find(LEFT.Line,RIGHT.Line,1)=0,\\n XF(LEFT,RIGHT),ALL);\\n RETURN LOOP(StartDS,StrCnt-1,Joins(ROWS(LEFT)));\\nEND;\\n\\nds2 := ComboStr(['s1','s2','s3','s4']);\\nOUTPUT(ds2,NAMED('LoopJoinMethod'));
\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-08-12 15:43:27\" },\n\t{ \"post_id\": 8006, \"topic_id\": 1836, \"forum_id\": 8, \"post_subject\": \"Re: Combinations\", \"username\": \"rtaylor\", \"post_text\": \"Chen,\\n\\nIt just occurred to me that the 24 combinations you were looking for were just the combinations of all four strings. If so, then this version does just that, for up to nine strings:IMPORT STD;\\n\\nComboStr(SET OF STRING SetStrs) := FUNCTION\\n StrCnt := COUNT(SetStrs);\\n Rec := {STRING line};\\n StartDS := DATASET(SetStrs,Rec);\\n Rec XF(Rec L, Rec R) := TRANSFORM\\n SELF.Line := L.Line + R.Line;\\n END;\\t\\n Joins(DATASET(Rec) LDS) := JOIN(LDS,StartDS,\\n STD.Str.Find(LEFT.Line,RIGHT.Line,1)=0,\\n XF(LEFT,RIGHT),ALL);\\n J1 := Joins(StartDS);\\n J2 := Joins(J1);\\n J3 := Joins(J2);\\n J4 := Joins(J3);\\n J5 := Joins(J4);\\n J6 := Joins(J5);\\n J7 := Joins(J6);\\n J8 := Joins(J7);\\n J9 := Joins(J8);\\n RETURN CHOOSE(StrCnt,StartDS,J1,J2,J3,J4,J5,J6,J7,J8,J9);\\nEND;\\n\\nds2 := ComboStr(['s1','s2','s3','s4']);\\nOUTPUT(ds2,NAMED('JoinMethod'));
\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-08-11 15:47:40\" },\n\t{ \"post_id\": 8005, \"topic_id\": 1836, \"forum_id\": 8, \"post_subject\": \"Re: Combinations\", \"username\": \"rtaylor\", \"post_text\": \"Chen,\\n\\nAnd here's a slightly more "elegant" method of doing it. It has still got a bit of "brute force" to it, so this function can just handle up to nine strings correctly. Running this, I discovered that my previous code was incomplete and the actual number of possible combinations for four strings is sixty, as shown in this code:IMPORT STD;\\n\\nComboStr(SET OF STRING SetStrs) := FUNCTION\\n StrCnt := COUNT(SetStrs);\\n Rec := {STRING line};\\n NullDS := DATASET([],Rec);\\n StartDS := DATASET(SetStrs,Rec);\\n Rec XF(Rec L, Rec R) := TRANSFORM\\n SELF.Line := L.Line + R.Line;\\n END;\\t\\n Joins(DATASET(Rec) LDS,UNSIGNED1 Iter) := \\n IF( Iter > StrCnt,\\n NullDS,\\n JOIN(LDS,StartDS,\\n STD.Str.Find(LEFT.Line,RIGHT.Line,1)=0,\\n XF(LEFT,RIGHT),ALL));\\n J1 := Joins(StartDS,1);\\n J2 := Joins(J1,2);\\n J3 := Joins(J2,3);\\n J4 := Joins(J3,4);\\n J5 := Joins(J4,5);\\n J6 := Joins(J5,6);\\n J7 := Joins(J6,7);\\n J8 := Joins(J7,7);\\n J9 := Joins(J8,8);\\n RETURN J1 + J2 + J3 + J4 + J5 + J6 + J7 + J8 + J9;\\nEND;\\n\\nds2 := ComboStr(['s1','s2','s3','s4']);\\nOUTPUT(ds2,NAMED('JoinMethod'));
As you can see, this produces sixty possible combinations of the four strings - and it appears to me that those sixty contains no duplicates, so I was incorrect with my previous code showing only forty combinations.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-08-11 15:22:19\" },\n\t{ \"post_id\": 8003, \"topic_id\": 1836, \"forum_id\": 8, \"post_subject\": \"Re: Combinations\", \"username\": \"rtaylor\", \"post_text\": \"Chen,\\n\\nHere's the "brute force" approach in ECL (and I count 40 possible combinations):\\nSET OF STRING CatStr(STRING s1, STRING s2, STRING s3, STRING s4) := \\n [ s1+s2,\\n s1+s3,\\n s1+s4,\\n s2+s1,\\n s2+s3,\\n s2+s4,\\n s3+s1,\\n s3+s2,\\n s3+s4,\\n s4+s1,\\n s4+s2,\\n s4+s3,\\n s1+s2+s3,\\n s1+s2+s4,\\n s1+s3+s4,\\n s2+s1+s3,\\n s2+s1+s4,\\n s2+s3+s4,\\n s3+s1+s2,\\n s3+s1+s4,\\n s3+s2+s4,\\n s4+s1+s2,\\n s4+s1+s3,\\n s4+s2+s3,\\n s1+s2+s3+s4,\\n s1+s3+s4+s2,\\n s1+s4+s2+s3,\\n s1+s4+s3+s2,\\n s2+s3+s4+s1,\\n s2+s4+s1+s2,\\n s2+s1+s2+s4,\\n s2+s1+s3+s4,\\n s3+s4+s1+s2,\\n s3+s1+s2+s4,\\n s3+s2+s4+s1,\\n s3+s4+s2+s1,\\n s4+s1+s2+s3,\\n s4+s2+s3+s1,\\n s4+s3+s1+s2,\\n s4+s1+s3+s2 ];\\n\\nds := DATASET(CatStr('s1','s2','s3','s4'),{STRING line});\\nOUTPUT(ds);
Please let me know if this works for you.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-08-08 11:30:15\" },\n\t{ \"post_id\": 8002, \"topic_id\": 1836, \"forum_id\": 8, \"post_subject\": \"Re: Combinations\", \"username\": \"rtaylor\", \"post_text\": \"Chen,\\n\\nNot that I am aware of.\\n\\nSorry,\\n\\nRichard\", \"post_time\": \"2015-08-08 09:30:21\" },\n\t{ \"post_id\": 8001, \"topic_id\": 1836, \"forum_id\": 8, \"post_subject\": \"Combinations\", \"username\": \"chanbchen\", \"post_text\": \"Hi,\\n\\nI have 4 strings - s1, s2, s3 and s4.\\n\\nI would like to concatenate these 4 strings in all possible ways (24 possible combinations).\\n\\nIs there any function I could use for this?\\n\\nThanks\\nChen\", \"post_time\": \"2015-08-07 16:58:58\" },\n\t{ \"post_id\": 8016, \"topic_id\": 1839, \"forum_id\": 8, \"post_subject\": \"Re: What does light-weight join means in graphs\", \"username\": \"balajisampath\", \"post_text\": \"Thanks Bob\", \"post_time\": \"2015-08-13 13:54:29\" },\n\t{ \"post_id\": 8015, \"topic_id\": 1839, \"forum_id\": 8, \"post_subject\": \"Re: What does light-weight join means in graphs\", \"username\": \"bforeman\", \"post_text\": \"Exactly!\\n\\nIf the JOIN is local and both left and right sides are sorted, a light-weight JOIN is generated.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-08-13 11:53:19\" },\n\t{ \"post_id\": 8014, \"topic_id\": 1839, \"forum_id\": 8, \"post_subject\": \"What does light-weight join means in graphs\", \"username\": \"balajisampath\", \"post_text\": \"ds := dataset( 'thor::filename',rec,thor);\\ndsdist := distribute(ds,hash(firstcol));\\ndssort := sort(dsdist,firstcol,local);\\nds1 := table(dssort,{firstcol,cnt := count(group)},firstcol)(cnt>1); \\nds2 := join(dssort,ds1, left.firstcol = right.firstcol);
\\n\\ndoes light-weight join means LOCAL JOIN?\", \"post_time\": \"2015-08-12 19:29:33\" },\n\t{ \"post_id\": 8412, \"topic_id\": 1840, \"forum_id\": 8, \"post_subject\": \"Re: JSON and FETCH Error\", \"username\": \"househippo\", \"post_text\": \"Looks like the error\\n 2134 - First parameter of FETCH should be a disk file
\\nis not expected behavior. Here is the Jira https://track.hpccsystems.com/browse/HPCC-14417.\", \"post_time\": \"2015-11-03 09:19:40\" },\n\t{ \"post_id\": 8034, \"topic_id\": 1840, \"forum_id\": 8, \"post_subject\": \"Re: JSON and FETCH Error\", \"username\": \"bforeman\", \"post_text\": \"Hi Fujio,\\n\\nThis is expected behavior I believe, as JSON files do not have the VIRTUAL fileposition field built-in.\\nSimply output the JSON file to a THOR/FLAT file first, and then add the VIRTUAL fileposition field to the intermediate THOR/FLAT file, then declare your INDEX on the THOR file and FETCH accordingly.\\n\\nExample:\\nx1 := DATASET('~test::sample.json::sample.json',json_schema,JSON);\\na := OUTPUT(x1,,'~test::samplejson::FLATSample');\\nx1plus := DATASET('~test::samplejson::FLATSample',{json_schema, UNSIGNED8 RecPtr{virtual(fileposition)}},FLAT);\\ndatax := INDEX(x1plus,{email,RecPtr},'~test::sample.json::sample.json.index');\\nb := BUILD(datax);\\nfilterdata := FETCH(x1plus, datax(email = 'test@test.com'),RIGHT.RecPtr);\\nc := filterdata;\\nSEQUENTIAL(a,b,OUTPUT(c));
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-08-24 17:26:40\" },\n\t{ \"post_id\": 8033, \"topic_id\": 1840, \"forum_id\": 8, \"post_subject\": \"Re: JSON and FETCH Error\", \"username\": \"bforeman\", \"post_text\": \"Dumb question, but did you build the INDEX?\", \"post_time\": \"2015-08-24 17:00:44\" },\n\t{ \"post_id\": 8017, \"topic_id\": 1840, \"forum_id\": 8, \"post_subject\": \"JSON and FETCH Error\", \"username\": \"househippo\", \"post_text\": \"I'm getting the error: 2134 - First parameter of FETCH should be a disk file\\n\\nHere is the contents of: ~test::sample.json::sample.json all in a single row.\\n[{"email":"test@test.com"},{"email":"right@ttp.com"},{"name":"Cake Baker","email":"cake@lakes.com"}]\\n\\n\\njson_schema := RECORD\\n STRING30 email;\\n STRING100 name;\\nEND;\\n\\nx1 := DATASET('~test::sample.json::sample.json',{json_schema, UNSIGNED8 RecPtr{virtual(fileposition)}},JSON);\\ndatax := INDEX(x1,{email,RecPtr},'~test::sample.json::sample.json.index');\\nfilterdata := FETCH(x1, datax(email = 'test@test.com'),RIGHT.RecPtr);\\nfilterdata;\\n
\", \"post_time\": \"2015-08-16 10:16:30\" },\n\t{ \"post_id\": 8070, \"topic_id\": 1858, \"forum_id\": 8, \"post_subject\": \"Re: Using imported attributes on the command line\", \"username\": \"JimD\", \"post_text\": \"I plan clarify this in the docs (and add an example)\\n\\nhttps://track.hpccsystems.com/browse/HPCC-14167\\n\\nThanks,\\nJim\", \"post_time\": \"2015-09-02 13:50:09\" },\n\t{ \"post_id\": 8068, \"topic_id\": 1858, \"forum_id\": 8, \"post_subject\": \"Re: Using imported attributes on the command line\", \"username\": \"bforeman\", \"post_text\": \"If you are using a local repository, use the ECL command line instead of ECLPlus. \\n\\nExample:\\n\\necl.exe run -Ic:\\\\myeclcode;d:\\\\mlrepository --target=thor --server=xxx.xxx.xxx.xxx --username <myuserid> --password <mypassword> FileOfECLCodeToRun.ecl\\n
\\nwhere the –I is an option for the ecl compiler that provides the paths to the 2 code repositories I need. By the way, I needed these because of the IMPORTs in my ECL code named folders (modules) that are in these two directories. -–username provides the userid I use to access the HPCC server (whose IP address is given by –server)(Note. Replace <myuserid> in the above with your userid). And, --password gives the password I use to access the HPCC server (Note. Replace <mypassword> in the above with your password). \\n\\nIn a central or shared repository, use the -main switch as follows:\\n\\necl run --target=<cluster> --main=<repository path : folder.eclquery>
\", \"post_time\": \"2015-09-02 12:36:15\" },\n\t{ \"post_id\": 8064, \"topic_id\": 1858, \"forum_id\": 8, \"post_subject\": \"Using imported attributes on the command line\", \"username\": \"kovacsbv\", \"post_text\": \"Hello,\\n\\nI'm trying to run ECL at the command line with an attribute folder.\\nApparently the .ecl files in the attributes folder need some processing\\n(maybe so I can run ecl -I<something> -L<something>). How do I make this\\nhappen?\\n\\n\\n$ cat aggregate.ecl\\nIMPORT attributes;\\n\\nOUTPUT('Hello, World!');\\n\\n$ eclplus action=query ecl=@aggregate.ecl server=10.1.1.11 cluster=thor user=vkovacs password=secret\\nWorkunit W20150901-150024 submitted\\n<Error><source>eclserver</source><line>1</line><code>2081</code><message>Import names unknown module "attributes"</message></Error>\\n$ ls -l\\ntotal 76\\n-rw-rw-r-- 1 kovacs kovacs 49 Sep 1 14:25 aggregate.ecl\\ndrwxrwxr-x 6 kovacs kovacs 4096 Sep 1 14:56 attributes\\n-rwxrwxr-- 1 kovacs kovacs 2419 Aug 31 09:51 bulk_spray\\ndrwxrwxr-x 6 kovacs kovacs 36864 Aug 31 15:44 combine_incoming\\ndrwxrwxr-x 7 kovacs kovacs 4096 Aug 31 09:11 nohup\\n-rw------- 1 kovacs kovacs 9960 Aug 28 14:58 nohup.out\\n-rwxrwxr-- 1 kovacs kovacs 2419 Aug 31 15:49 pipeline_rezip\\n-rwxrwxr-- 1 kovacs kovacs 2419 Aug 31 15:49 pipeline_spray\\n-rwxrwxr-- 1 kovacs kovacs 2832 Aug 31 16:11 pipeline_unzip\\n\\n$ ls -l attributes\\ntotal 16\\ndrwxrwx--- 3 kovacs kovacs 4096 Sep 1 14:56 Aggregation\\ndrwxrwx--- 3 kovacs kovacs 4096 Sep 1 14:56 forensics\\ndrwxrwx--- 3 kovacs kovacs 4096 Sep 1 14:56 indexing\\ndrwxrwx--- 3 kovacs kovacs 4096 Sep 1 14:56 searches\\n
\", \"post_time\": \"2015-09-01 19:25:27\" },\n\t{ \"post_id\": 8084, \"topic_id\": 1860, \"forum_id\": 8, \"post_subject\": \"Re: Detecting grouped dataset\", \"username\": \"DSC\", \"post_text\": \"Well sure, but if you're writing code that will be used like a library -- primarily meaning that you don't know your callers or their requirements -- then undoing a GROUP may be a bad idea. The caller may be expecting the data to remain grouped after calling your code. Of course, I could always just document the fact that any grouping would be undone, but trusting people to read documentation is... unwise.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2015-09-04 12:34:14\" },\n\t{ \"post_id\": 8080, \"topic_id\": 1860, \"forum_id\": 8, \"post_subject\": \"Re: Detecting grouped dataset\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nOf course, the obvious workaround would be to use UNGROUP on every dataset passed to the MACRO and write the code optimized for ungrouped datasets. If the passed dataset is not GROUPed then UNGROUP would most likely be a no-op (you should test this first though ).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-09-03 19:03:40\" },\n\t{ \"post_id\": 8078, \"topic_id\": 1860, \"forum_id\": 8, \"post_subject\": \"Re: Detecting grouped dataset\", \"username\": \"DSC\", \"post_text\": \"Thanks for verifying, Richard.\\n\\nI've created a ticket for the request: https://track.hpccsystems.com/browse/HPCC-14176.\\n\\nThanks again,\\n\\nDan\", \"post_time\": \"2015-09-03 18:58:22\" },\n\t{ \"post_id\": 8076, \"topic_id\": 1860, \"forum_id\": 8, \"post_subject\": \"Re: Detecting grouped dataset\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nNot that I am aware of. So IMO a JIRA ticket asking for a new ISGROUPED(ds) function would be the next step.
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-09-03 18:51:45\" },\n\t{ \"post_id\": 8074, \"topic_id\": 1860, \"forum_id\": 8, \"post_subject\": \"Detecting grouped dataset\", \"username\": \"DSC\", \"post_text\": \"Is there a way to determine if a dataset has been grouped via the GROUP() command?\\n\\nA scenario in which that would be applicable is within a function macro that sequences records with unique IDs. Some optimizations are applicable to only non-grouped data, and those same optimizations will produce incorrect results if the data has been grouped. Being able to tell the difference at run time or compile time would be very useful.\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2015-09-03 12:54:58\" },\n\t{ \"post_id\": 8106, \"topic_id\": 1868, \"forum_id\": 8, \"post_subject\": \"Re: OUTPUT and EXTEND\", \"username\": \"rtaylor\", \"post_text\": \"micevepay,\\n\\nThe example code for OUTPUT with EXTEND works perfectly as is on 5.2.6-1.\\n\\nCan I see an example of your use of EXTEND that used to work and does no longer, please?\\n\\nRichard\", \"post_time\": \"2015-09-10 13:36:04\" },\n\t{ \"post_id\": 8104, \"topic_id\": 1868, \"forum_id\": 8, \"post_subject\": \"OUTPUT and EXTEND\", \"username\": \"micevepay\", \"post_text\": \"Upgraded my HPCC cluster and now I get the error
"Extend no longer supported by output to file"
\\n\\nWhy was this change made? The extend option made it easier to see how datasets where changing from transformation to transformation. Especially when LOOP is being used.\\n\\nNow, to see how something is changing, I have to run several different versions of the same code.\\n\\nFurthermore, why is it still included in the language reference if it is no longer applicable?\", \"post_time\": \"2015-09-10 01:50:30\" },\n\t{ \"post_id\": 8132, \"topic_id\": 1880, \"forum_id\": 8, \"post_subject\": \"Re: File.Copy not executing\", \"username\": \"alex\", \"post_text\": \"I fell victim to this problem. Seems fixed now, using the work-around in that thread. Thanks for the help.\", \"post_time\": \"2015-09-15 16:40:35\" },\n\t{ \"post_id\": 8130, \"topic_id\": 1880, \"forum_id\": 8, \"post_subject\": \"Re: File.Copy not executing\", \"username\": \"alex\", \"post_text\": \"Ok, that looks promising, but I can't find a way to make the syntax work.\\n\\nI had hopes for:\\n\\n\\n \\n APPLY(ds, sequential(\\n StartSuperFileTransaction(),\\n if(~Superfileexists(ds.sf), CreateSuperFile(ds.sf)),\\n if(ds.dest in set(Superfilecontents(ds.sf), name), RemoveSuperFile(ds.sf, ds.dest)),\\n Copy(ds.source, ds.clustername, ds.dest,....etc),\\n AddSuperFile(ds.sf, ds.dest)\\n FinishSuperfileTransaction()));\\n \\n
\\n\\nBut I get errors about superfile transactions in a non-global context. If I put the whole thing in a NOTHOR I get errors about expecting a parent/container context.\", \"post_time\": \"2015-09-15 15:30:37\" },\n\t{ \"post_id\": 8128, \"topic_id\": 1880, \"forum_id\": 8, \"post_subject\": \"Re: File.Copy not executing\", \"username\": \"rtaylor\", \"post_text\": \"alex,\\n\\nYou are mis-appropriating #LOOP. \\n\\n#LOOP is part of the ECL Template Language designed to generate ECL code, but you're not generating ECL. It looks to me like you're trying to force ECL into a procedural paradigm (which is never a good idea).\\n\\nYour better approach would be to use APPLY, which is designed to do the kind of thing you need to accomplish.\\n\\nTake a look at this thread, which should give you a good idea of how APPLY might be used for your issue: http://hpccsystems.com/bb/viewtopic.php?f=8&t=1497&hilit=+APPLY&sid=09fb6eb65abdc13f68e9d70e64ef8ebf\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-09-15 14:50:13\" },\n\t{ \"post_id\": 8126, \"topic_id\": 1880, \"forum_id\": 8, \"post_subject\": \"Re: File.Copy not executing\", \"username\": \"alex\", \"post_text\": \"So it's even weirder than I thought.\\n\\nBefore starting the #LOOP, I do some simple checking to see if I need to actually copy the file before sending it into the #LOOP:\\n\\nto_do := thin(to_do L) := TRANSFORM\\n SELF.sf := if( ~FileExists(L.dest), L.SFname, SKIP);\\n SELF := L;\\nEND;\\n\\nds := project(ds_list_full, thin(LEFT);\\n
\\n\\nThe routine accepts a boolean argument. If it's true, then I do the thinning on the file list, if false I don't. If I don't do the thinning, then the whole routine including the #LOOP work just fine. It's just if I run things through the transform first that I have problems.\\n\\nThis is the case even if the transform doesn't change the file list at all! If I'm starting from scratch and no files exist on the destination cluster, the input to PROJECT is identical to the output, but I still see the error the 2nd time through the #LOOP.\", \"post_time\": \"2015-09-15 14:49:04\" },\n\t{ \"post_id\": 8124, \"topic_id\": 1880, \"forum_id\": 8, \"post_subject\": \"File.Copy not executing\", \"username\": \"alex\", \"post_text\": \"I'm trying to write a routine to copy files from one HPCC to another, and add them to superfiles. The code looks more or less like this:\\n\\n\\nto_do := RECORD\\n string source;\\n string dest;\\n string sf;\\nEND;\\nds:=('~list_of_files',to_do,flat);\\n\\nsources := set(ds, source);\\ndests := set(ds, dest);\\nsfs := set(ds, sf);\\n\\n#SET(Ndx, 1)\\n#LOOP\\n #IF(%Ndx > 15)\\n #BREAK\\n #ELSE\\n sequential(\\n StartSuperFileTransaction(),\\n if(~SuperfileExists(sfs[%Ndx]), CreateSuperfile(sfs[%Ndx%])),\\n FinishSuperFileTransaction(),\\n File.Copy(sources[%Ndx%], clustername, dests[%Ndx%],,,,,true,,,,,), // overwrite ok\\n StartSuperFileTransaction(),\\n AddSuperFile(sfs[%Ndx%], dests[%Ndx%])\\n FinishSuperFileTransaction())\\n );\\n #END //if\\n #SET(Ndx, %Ndx% + 1)\\n#END//loop\\n
\\n\\nThis works the 1st time through the #LOOP. The second time, the AddSuperFile step fails because the Copy didn't happen. No errors or anything from Copy; it's like it's not there. I've even used ASSERT to verify that the source file exists, the superfile exists, and the destination file does NOT exist before the copy step.\\n\\nThe inputs are correct, so it's not like Copy can't find the source file or something. I can change the Ndx initialization to whatever I want (2,3,etc) and the 1st trip through the loop is ok, and the second one will fail.\\n\\nAny advice is appreciated. If there's a way to do this without #LOOP I'd love to hear that, too.\\n\\nThanks in advance.\", \"post_time\": \"2015-09-15 14:04:20\" },\n\t{ \"post_id\": 8236, \"topic_id\": 1890, \"forum_id\": 8, \"post_subject\": \"Re: ERROR: Esp server url not specified\", \"username\": \"JimD\", \"post_text\": \"The DFUPlus server= is referring to the ESP Server running ECL Watch. \\n\\nWhile the DFU Server is doing the work, the DFUPlus command is sending the instructions to a web service within ECL Watch (WsDFU).\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2015-10-05 20:17:06\" },\n\t{ \"post_id\": 8224, \"topic_id\": 1890, \"forum_id\": 8, \"post_subject\": \"Re: ERROR: Esp server url not specified\", \"username\": \"kovacsbv\", \"post_text\": \"Your server clause syntax is correct. The server option accepts a port which defaults to :8010 if not specified. I would look into what the line breaks are doing in this command. Note that this is completely different way of specifing the server than you would use in the ecl command.\\n\\nIf not using ssl:\\nserver=http:10.1.2.3:8010
\\nIf using ssl, you typically need an 's' and a different port number, commonly 18010:\\nserver=https:10.1.2.3:18010
\\n\\nSending non-ssl to an ssl port results in bizarre errors like "Cannot verify configuration."\", \"post_time\": \"2015-10-05 17:58:16\" },\n\t{ \"post_id\": 8214, \"topic_id\": 1890, \"forum_id\": 8, \"post_subject\": \"Re: ERROR: Esp server url not specified\", \"username\": \"bforeman\", \"post_text\": \"Just a thought, I believe the DFU Server does not need the 8010 port. Try your setting with just the base address only:\\n\\ndfuplus server=http://192.168.32.128\", \"post_time\": \"2015-09-28 21:56:07\" },\n\t{ \"post_id\": 8150, \"topic_id\": 1890, \"forum_id\": 8, \"post_subject\": \"ERROR: Esp server url not specified\", \"username\": \"angela\", \"post_text\": \"Hi All,\\nI just tried to use a dfuplus script to combine XML files into just one XML file in order to manipulate the data in ECL. \\nBut I met an error : 'ERROR: Esp server url not specified.' when I tried run the script on command line.\\n\\nMy code is as below:\\n"\\ndfuplus server=http://192.168.32.128:8010 \\nusername=hpccdemo \\npassword=hpccdemo \\noverwrite=1 \\nreplicate=1 \\naction=spray \\nsrcip=192.168.32.128 \\nsrcfile=/home/hpccdemo/Input/*.xml \\ndstcluster=mythor \\ndstname=online::aaron::hyperlink::origindocs \\nprefix=FILENAME,FILESIZE nosplit=1 \\n"\\n\\nThank you!\", \"post_time\": \"2015-09-19 03:42:50\" },\n\t{ \"post_id\": 8158, \"topic_id\": 1892, \"forum_id\": 8, \"post_subject\": \"Re: Machine Learning Mat Module\", \"username\": \"tlhumphrey2\", \"post_text\": \"Give me an example because all those functions work OK for me.\", \"post_time\": \"2015-09-20 13:44:25\" },\n\t{ \"post_id\": 8156, \"topic_id\": 1892, \"forum_id\": 8, \"post_subject\": \"Re: Machine Learning Mat Module\", \"username\": \"Abhishek_M04\", \"post_text\": \"Thank you.\\n\\nThat's fine. However , I am mainly concerned with the fact that 'Has' is having a recursive dependency with Add,Mul etc. That's why most of the Math related programs are not compiling, especially the Matrix Operations.\\n\\nCan you please throw some light on this?\\n\\n\\nRegards,\\nAbhishek\", \"post_time\": \"2015-09-20 05:41:02\" },\n\t{ \"post_id\": 8154, \"topic_id\": 1892, \"forum_id\": 8, \"post_subject\": \"Re: Machine Learning Mat Module\", \"username\": \"tlhumphrey2\", \"post_text\": \"I'm assuming you are talking about the sample ML ecl programs in https://github.com/hpcc-systems/ecl-samples/tree/master/MLUsageExamples/Mat. Yes, I'm looking at the sample here https://github.com/hpcc-systems/ecl-samples/blob/master/MLUsageExamples/Mat/Each/Use_ML.Mat.Each.Abs.ecl, and it has text in it that shouldn't be there. The following.\\n<?dbfo-need height="1.7in" ?>
.\\n\\nIf you remove this text, the example should run.\\n\\nThese examples were part of a document I was writing which I formatted with docbook xml. I used a program to extra the examples from the xml but obviously missed some xml.\\n\\nIf you aren't talking about these samples. Can you give me a specific case where you found errors.\", \"post_time\": \"2015-09-19 14:21:06\" },\n\t{ \"post_id\": 8152, \"topic_id\": 1892, \"forum_id\": 8, \"post_subject\": \"Machine Learning Mat Module\", \"username\": \"Abhishek_M04\", \"post_text\": \"Hi Team,\\n\\nI was exploring Machine Learning Mat Module. Inside this , Add, Mul and Has all has Errors. Can you please help.\\n\\n\\nRegards,\\nAbhishek\", \"post_time\": \"2015-09-19 10:03:44\" },\n\t{ \"post_id\": 8212, \"topic_id\": 1906, \"forum_id\": 8, \"post_subject\": \"Re: Error in join\", \"username\": \"bforeman\", \"post_text\": \"After your transform call, simply add the ADD Join Flag, like this:\\nmatching_ds1 := JOIN(ds_base_temp,std_input_processed, (LEFT.surname = RIGHT.surname\\nAND LEFT.forename = RIGHT.forename\\nAND TRIM(left.surname,LEFT,RIGHT)<>''\\nAND TRIM(left.forename,LEFT,RIGHT)<>''\\nAND left._TYPE='INDIVIDUAL')or(\\nLEFT.NAME=RIGHT.NAME\\nAND RIGHT.NAME<>''\\nAND LEFT.NAME<>''\\nAND LEFT.SURNAME=''\\nAND LEFT.FORENAME=''\\nAND RIGHT._TYPE<>'INDIVIDUAL'\\nAND left._TYPE<>'INDIVIDUAL')\\n,fn_transform(left,right),ALL);\\n
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-09-28 19:26:47\" },\n\t{ \"post_id\": 8210, \"topic_id\": 1906, \"forum_id\": 8, \"post_subject\": \"Error in join\", \"username\": \"elango_v\", \"post_text\": \"Hi,\\n\\nI am inner joining two data sets. Both the data set has the same layout. There are two sets of conditions I need to test.If the field "_type" value is "individual" then i need to check forename and surname, if the field"_type" value is not "individual" then i need to check name. Below is my code snippet. I am getting the error\\n \\nError: JOIN matching_ds contains no equality conditions - use ,ALL to allow \\n\\n\\ncode:\\n-------\\nmatching_ds1 := JOIN(ds_base_temp,std_input_processed, (LEFT.surname = RIGHT.surname \\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tAND LEFT.forename = RIGHT.forename \\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tAND TRIM(left.surname,LEFT,RIGHT)<>''\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tAND TRIM(left.forename,LEFT,RIGHT)<>''\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tAND left._TYPE='INDIVIDUAL')or(\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t LEFT.NAME=RIGHT.NAME \\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t AND RIGHT.NAME<>'' \\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t AND LEFT.NAME<>''\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t AND LEFT.SURNAME=''\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t AND LEFT.FORENAME=''\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t AND RIGHT._TYPE<>'INDIVIDUAL'\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t AND left._TYPE<>'INDIVIDUAL')\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t,fn_transform(left,right));\\n\\ncan you please check once?\", \"post_time\": \"2015-09-28 11:09:01\" },\n\t{ \"post_id\": 8240, \"topic_id\": 1908, \"forum_id\": 8, \"post_subject\": \"Re: Same data loading logic using different layouts\", \"username\": \"lpezet\", \"post_text\": \"I saw the IFBLOCK but did not think of using it that way...interesting!\\n\\n\\nThanks!\\nLuc.\", \"post_time\": \"2015-10-05 20:35:40\" },\n\t{ \"post_id\": 8230, \"topic_id\": 1908, \"forum_id\": 8, \"post_subject\": \"Re: Same data loading logic using different layouts\", \"username\": \"bforeman\", \"post_text\": \"Hi Luc,\\n\\nThe RECORD structure has support for different layouts using IFBLOCK. So if it's Week 1, use this record block, Week 3, use this block etc.\\n\\nr := RECORD\\n UNSIGNED4 dg_parentid;\\n STRING10 dg_firstname;\\n STRING dg_lastname;\\n UNSIGNED1 dg_prange;\\n IFBLOCK(week = 1)\\n STRING20 extrafield1;\\n END;\\n IFBLOCK(week = 2)\\n STRING20 extrafield2;\\n END;\\n END;\\n
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-10-05 19:53:35\" },\n\t{ \"post_id\": 8218, \"topic_id\": 1908, \"forum_id\": 8, \"post_subject\": \"Same data loading logic using different layouts\", \"username\": \"lpezet\", \"post_text\": \"Hello!\\n\\nI receive files weekly that I need to process.\\nI have some ETL logic to load those files from CSV to Thor.\\nThe problem now is that from time to time the layout of those files changes.\\nFor example, Week 1, the layout is\\n{ STRING a; INTEGER b; REAL c;}
\\nOn Week 3, the layout is now:\\n{ STRING a; INTEGER b; REAL c; STRING d;}
\\nAnd on week 5, the layout becomes:\\n{ STRING a; INTEGER b; STRING e; REAL c; STRING d;}
\\n\\nNow I can't just update my layout and stick with the latest version, as I need to be able to re-ETL old files too.\\n\\nMy thinking is that I will have this "Master Layout", being the latest version, and have some TRANSFORM that will go from Week1Layout to MasterLayout, and another one to go from Week3Layout to MasterLayout.\\n\\nBut I'd like to load those files the same exact way, like so:\\nETL(STRING pId, STRING pWeek, ??? pLayout, ...) := MODULE\\n SHARED mDS := DATASET(std.File.ExternalLogicalFilename(LandingZone_IP,'/data/' + pId + '/' + pWeek + '/file.csv', pLayout, CSV);\\n SHARED mDist := DISTRIBUTE(mDS, a);\\n EXPORT doIt() := OUTPUT(mDist,, 'data::' + pId + '::' + pWeek, OVERWRITE);\\nEND;\\n
\\n\\nI was thinking of having a DATASET (or XML) mapping weeks to layouts.\\nThe problem (as mentioned here)\\n\\n\\nThank you for your help!\\nLuc.\", \"post_time\": \"2015-10-02 14:48:28\" },\n\t{ \"post_id\": 8242, \"topic_id\": 1912, \"forum_id\": 8, \"post_subject\": \"Re: getting ecl cli values into a variable\", \"username\": \"rtaylor\", \"post_text\": \"Then try something like this:\\nEXPORT findperson() := FUNCTION\\n STRING20 ThisLname := '' : STORED('LName');\\n RETURN MyPersonDatset(Lname=ThisLname);\\nEND;
Obviously you will want to edit this for your file and field names.\\n\\nGood luck with it,\\n\\nRichard\", \"post_time\": \"2015-10-05 20:38:36\" },\n\t{ \"post_id\": 8238, \"topic_id\": 1912, \"forum_id\": 8, \"post_subject\": \"Re: getting ecl cli values into a variable\", \"username\": \"kovacsbv\", \"post_text\": \"I don't have one; we're looking for what findperson.ecl should contain. I don't have anything that works, so I need a simplistic findperson.ecl that puts 'JONES' into a variable called LName, given that I typed the "ecl run" shown above.\", \"post_time\": \"2015-10-05 20:20:25\" },\n\t{ \"post_id\": 8234, \"topic_id\": 1912, \"forum_id\": 8, \"post_subject\": \"Re: getting ecl cli values into a variable\", \"username\": \"rtaylor\", \"post_text\": \"kovacsbv ,\\n\\nMay we see exactly what code is in your findperson.ecl file, please?\\n\\nRichard\", \"post_time\": \"2015-10-05 20:13:17\" },\n\t{ \"post_id\": 8222, \"topic_id\": 1912, \"forum_id\": 8, \"post_subject\": \"getting ecl cli values into a variable\", \"username\": \"kovacsbv\", \"post_text\": \"In the HPCC client tools manual, the ECL CLI section, the "run" subcommand, we have the following example:\\n\\necl run --target=thor --input="<request><LName>JONES</LName></request>" findperson.ecl
\\n\\nIn findperson.ecl, I want a variable called LName that contains the value 'JONES' in it.\\n________________________________________________________________\\nEdit: add this paragraph:\\nThe client manual for the ecl CLI says:\\n-X<name> Sets the stored input value (stored('name'))
\\nSo, I was guided to the STORED() function.\\n________________________________________________________________\\n\\nThe STORED() documentation in the ECL language reference has this example code:\\n\\nIname := INTERFACE\\nEXPORT STRING20 Name;\\nEXPORT BOOLEAN KeepName := TRUE;\\nEND;\\nStoredName := STORED(Iname);\\n// is equivalent to:\\n// StoredName := MODULE(Iname)\\n// EXPORT STRING20 Name := '' : STORED('name');\\n// EXPORT BOOLEAN KeepName := TRUE : STORED('keepname');\\n// END;
\\n\\nI need to make the two link, because I have written a hello_world.ecl, but can't get a variable named LName to contain the string 'JONES' so I can use it in ECL. Can somebody provide the ecl code that would do this? Remeber, I want to get past an interface name to actually putting the value in a STRING50 LName\", \"post_time\": \"2015-10-05 16:58:23\" },\n\t{ \"post_id\": 8232, \"topic_id\": 1914, \"forum_id\": 8, \"post_subject\": \"Re: Group parameter in ECL Function\", \"username\": \"bforeman\", \"post_text\": \"Hi Brandon,\\n\\nThis might get you there:\\n\\nDATASET as a Parameter Type\\n[GROUPED] DATASET( struct )\\n\\nThis form is only used as a Value Type for passing parameters, specifying function return types, or defining a SET OF datasets. If GROUPED is present, the passed parameter must have been grouped using the GROUP function. \\n\\n
\\n\\nHTH,\\n\\nBob\", \"post_time\": \"2015-10-05 19:58:50\" },\n\t{ \"post_id\": 8228, \"topic_id\": 1914, \"forum_id\": 8, \"post_subject\": \"Group parameter in ECL Function\", \"username\": \"brandon.walker\", \"post_text\": \"How would one write an ECL function that would accept a group? Is that possible in ECL?\", \"post_time\": \"2015-10-05 19:38:41\" },\n\t{ \"post_id\": 8312, \"topic_id\": 1930, \"forum_id\": 8, \"post_subject\": \"Re: Despraying a Flat file\", \"username\": \"rlbars5\", \"post_text\": \"Thank you Richard\", \"post_time\": \"2015-10-15 17:58:31\" },\n\t{ \"post_id\": 8286, \"topic_id\": 1930, \"forum_id\": 8, \"post_subject\": \"Re: Despraying a Flat file\", \"username\": \"rtaylor\", \"post_text\": \"rlbars5,\\n\\nA fixed length flat file doesn't have (or need) record delimiters. That's why you're seeing what you're seeing.\\n\\nTo end up with record delimiters in the desprayed file, you will need to add the record delimiters to the file in HPCC first, because despray only takes the file parts as they already exist and "stitches" them together into a single physical file on the LZ. Despray does not change the data in any way.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-10-14 13:52:28\" },\n\t{ \"post_id\": 8284, \"topic_id\": 1930, \"forum_id\": 8, \"post_subject\": \"Re: Despraying a Flat file\", \"username\": \"rlbars5\", \"post_text\": \"I did a bit of an experiment, I had a logical fixed file with 100 records. I de-sprayed this file and the output file consisted of 1 big record string which consisted of 100 records (separated by its record length) \\n\\neg: logical file in HPCC\\n 1. A B C D E \\n 2. Q W E T Y\\n\\nAfter De-spray\\n 1. A B C D E Q W E T Y\\n\\nUpon spraying this De-sprayed file again in HPCC the records were read properly and separated as individual records. (I do understand the record length acts as the separator for fixed files)\\n\\nAlthough my question is can we have the de-sprayed file to have individual records rather than one big record(just curious if this is possible)\", \"post_time\": \"2015-10-14 12:29:20\" },\n\t{ \"post_id\": 8280, \"topic_id\": 1930, \"forum_id\": 8, \"post_subject\": \"Despraying a Flat file\", \"username\": \"rlbars5\", \"post_text\": \"Hello,\\n\\nWhen I try to De-spray a 'flat' file with 100 records, the file generated after despray contains only 1 big record(which contains all the 100 records)\\n(I do understand that the record separator here is the record length.)\\nMy question is can we split this single record into separate 100 records while de-spraying a flat file ?\", \"post_time\": \"2015-10-13 22:36:20\" },\n\t{ \"post_id\": 8310, \"topic_id\": 1938, \"forum_id\": 8, \"post_subject\": \"Re: Regarding Error: no specific rows in a Table.\", \"username\": \"rtaylor\", \"post_text\": \"Abhishek,\\n\\nIt would help if you could share your code that created this error.\\n\\nRichard\", \"post_time\": \"2015-10-15 17:33:32\" },\n\t{ \"post_id\": 8306, \"topic_id\": 1938, \"forum_id\": 8, \"post_subject\": \"Regarding Error: no specific rows in a Table.\", \"username\": \"Abhishek_M04\", \"post_text\": \"Hi Team,\\n\\nCan anyone please let me know in which scenarios do we get the Error :\\n"No specific rows in Table <Table Name>"\\n\\n\\nThanks,\\nAbhishek\", \"post_time\": \"2015-10-15 14:17:50\" },\n\t{ \"post_id\": 8374, \"topic_id\": 1942, \"forum_id\": 8, \"post_subject\": \"Re: Function Macro Loop Append Rows\", \"username\": \"longly\", \"post_text\": \"iMikePayne, you may want to try Normalize.\\nblankDataSet := DATASET([{0, 0, 0, 0}], filelayout);\\nfileLayout createRow(fileLayout L, INTEGER C) := TRANSFORM\\n SELF := fm_stat(C)\\nEND;\\nresultDataSet := NORMALIZE(blankDataSet, 3, createRow(LEFT, COUNTER));\\n
\", \"post_time\": \"2015-10-23 01:07:56\" },\n\t{ \"post_id\": 8328, \"topic_id\": 1942, \"forum_id\": 8, \"post_subject\": \"Re: Function Macro Loop Append Rows\", \"username\": \"iMikePayne\", \"post_text\": \"Correlation or any other function that will be written to operate in similar manner.\", \"post_time\": \"2015-10-16 16:47:06\" },\n\t{ \"post_id\": 8326, \"topic_id\": 1942, \"forum_id\": 8, \"post_subject\": \"Re: Function Macro Loop Append Rows\", \"username\": \"tlhumphrey2\", \"post_text\": \"What is "func"?\", \"post_time\": \"2015-10-16 16:43:23\" },\n\t{ \"post_id\": 8324, \"topic_id\": 1942, \"forum_id\": 8, \"post_subject\": \"Function Macro Loop Append Rows\", \"username\": \"iMikePayne\", \"post_text\": \"Hi,\\n\\nI am trying to figure out how to append rows to a dataset in the function macro.\\n\\n\\n\\n//real data goes to f5000\\nfilelayout := RECORD\\n UNSIGNED rcid;\\n REAL f1;\\n REAL f2;\\n REAL f3;\\nEND;\\n\\nsomeData := DATASET(~temp::somefile,filelayout,thor);\\n\\nfm_stat(cnt):=FUNCTIONMACRO\\n \\n RETURN DATASET([{cnt, \\n func(someData,#EXPAND('f'+cnt),f1)\\n func(someData,#EXPAND('f'+cnt),f2)\\n func(someData,#EXPAND('f'+cnt),f3)}],filelayout);\\n\\nEND;\\n\\n//Want a row for each fn and want to avoid\\nfm_stat(1)+fm_stat(2)+fm_stat(3);\\n
\\n\\nI know I cant use the normal LOOP and pass a counter to the macro but I don't understand how to use the #LOOP to #APPEND a row.\\n\\nAny assistance would be greatly appreciated.\\n\\n\\nEDIT: Sorry for putting this in the wrong part of the forum....\", \"post_time\": \"2015-10-16 16:01:31\" },\n\t{ \"post_id\": 8446, \"topic_id\": 1968, \"forum_id\": 8, \"post_subject\": \"Re: Graph Timing\", \"username\": \"jsmith\", \"post_text\": \"Hi,\\n\\nThey all are timings for the particular activity you are examining.\\nIf you have a 1 slave Thor or are targeting hThor, then TimeMinLocalExecute, TimeMaxLocalExecute and TimeAvgLocalExecute will be identical.\\n( I believe timeMaxMs and timeMinMs are legacy actually, i.e. shouldn't be there any more )\\n\\nHowever, when your query targets a multi slave cluster, then you can see which slave took the least amount of time (TimeMinLocalExecute), the slave that took the max amount of time (TimeMaxLocalExecute) and the average of all the slave execution times (TimeAvgLocalExecute).\\nIn addition you will have:\\nNodeMaxLocalExecute, NodeMinLocalExecute , which indicate which slave was the slowest and fastes and:\\nSkewMaxLocalExecute, SkewMinLocalExecute, the percentage off the average the slowest and faster slaves were.\\n\\nThese figures are often useful in pinpointing which activity on which slave took significant time, which may hint at an e.g. expensive join matching process, or if there's a big skew, it may be due to data skew or potentially a hardware issue.\\n\\nHope that helps.\", \"post_time\": \"2015-11-06 16:25:44\" },\n\t{ \"post_id\": 8398, \"topic_id\": 1968, \"forum_id\": 8, \"post_subject\": \"Graph Timing\", \"username\": \"chanbchen\", \"post_text\": \"Hi,\\n\\nI am viewing a graph in my WU. When I click on an activity in the graph, I see the following parameters regarding the timing:\\n\\n1. TimeAvgLocalExecute\\n2. TimeMaxLocalExecute\\n3. TimeMinLocalExecute\\n4. timeMaxMs\\n5. timeMinMs\\n\\nOut of the above 5, which is the time taken for that particular activity?\\n\\nThanks\\nChen\", \"post_time\": \"2015-10-30 13:21:17\" },\n\t{ \"post_id\": 8496, \"topic_id\": 1970, \"forum_id\": 8, \"post_subject\": \"Re: Working of OR in ECL\", \"username\": \"bforeman\", \"post_text\": \"Well first, the example code you have submitted will not compile at all, so I really don't know what you are trying to show here.\\n\\nThis example works better:\\n\\nsamp(string a) := FUNCTION\\nds := dataset([a],{String alph});\\no := output(ds,, '~sampleTest',overwrite, NAMED('A'));\\nreturn WHEN(ds,o);\\nEND;\\n\\nsamp1(string b) := FUNCTION\\nds2 := dataset([b],{String alph});\\no2 := output(ds2,, '~sampleTest',overwrite,NAMED('B'));\\nreturn WHEN(ds2,o2);\\nEND;\\n\\nsamp('input-b'); \\nsamp1('input-a');\\n
\\n\\nChange the input parameters and watch the effect.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-11-09 19:25:54\" },\n\t{ \"post_id\": 8472, \"topic_id\": 1970, \"forum_id\": 8, \"post_subject\": \"Re: Working of OR in ECL\", \"username\": \"hema_g02\", \"post_text\": \"Thanks for the response Bob.\\n\\nI have got another question while investigating this scenario. In the below code, as per your comments, since 'OUTPUT' is overriding the behavior of 'OR', the file '~sampleTest' logical file should contain the content as 'input-b' since i have used 'OVERWRITE' in my output statement.\\n\\nHere, I am getting two outputs (which is obviously the side effect of using OUTPUT statement as you said). But the logical file written with the content 'input-a'\\n\\nsamp(string a) := FUNCTION\\n\\toutput(dataset([a],{String alph}),, '~sampleTest', overwrite);\\n\\treturn true;\\nEND;\\n\\nsamp1(string b) := FUNCTION\\n\\toutput(dataset([b],{String alph}),, '~sampleTest', overwrite);\\n\\treturn true;\\nEND;\\nsamp('input-a') OR samp1('input-b');\", \"post_time\": \"2015-11-09 14:56:51\" },\n\t{ \"post_id\": 8416, \"topic_id\": 1970, \"forum_id\": 8, \"post_subject\": \"Re: Working of OR in ECL\", \"username\": \"bforeman\", \"post_text\": \"Confirmation from development:\\n\\nMost of the time the second condition will not be evaluated if the first condition is always true – but it isn’t guaranteed.\\n\\nIn that situation I suspect it is because the output is a side-effect that is associated with the Boolean value. The rules for when side-effects are evaluated are not easy to tie down, but generally it is evaluated at the same point as the action that contains the expression – which means it will not be short circuited.\\n
\\n\\nAlso, see:\\nhttps://track.hpccsystems.com/browse/HPCC-13652\\n\\nHTH,\\n\\nBob\", \"post_time\": \"2015-11-03 14:47:34\" },\n\t{ \"post_id\": 8414, \"topic_id\": 1970, \"forum_id\": 8, \"post_subject\": \"Re: Working of OR in ECL\", \"username\": \"bforeman\", \"post_text\": \"Hi Vasa,\\n\\nMy educated guess is your introduction of side effects (OUTPUT) in your functions is causing both of them to stream a result to you, that seems to override the default behavior of the OR operator. I am checking with the compiler developer now to confirm this theory.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-11-03 13:07:45\" },\n\t{ \"post_id\": 8400, \"topic_id\": 1970, \"forum_id\": 8, \"post_subject\": \"Working of OR in ECL\", \"username\": \"hema_g02\", \"post_text\": \"Hi Folks,\\n I have a question in the working of conditional operator 'OR' in ECL. In most of the compilers in general, if we have two conditions used in OR, if the first condition is TRUE, it will the second condition will not get evaluated. Does this hold true for ECL also ?\\n\\nIn the example below,\\nsamp(STRING a) := FUNCTION\\n\\tOUTPUT(a, NAMED('FunctionA'));\\n\\tRETURN TRUE;\\nEND;\\n\\nsamp1(STRING b) := FUNCTION\\n\\tOUTPUT(b, NAMED('FunctionB'));\\n\\tRETURN TRUE;\\nEND;\\nsamp('input-a') OR samp1('input-b');\\n\\nsince the samp function returns TRUE, the samp1 function should not be called AFAIK. But both the function gets executed in this case and i am getting three outputs at the end. Could anyone pls throw light on this ?\\n\\nThanks and Regards,\\nVasa\", \"post_time\": \"2015-10-30 14:48:05\" },\n\t{ \"post_id\": 8410, \"topic_id\": 1972, \"forum_id\": 8, \"post_subject\": \"Re: JOIN Options\", \"username\": \"rtaylor\", \"post_text\": \"Kevin,\\n\\nI would use LOOKUP if the relationship is MANY-1, and MANY LOOKUP or ALL for any other relationship type.\\n\\n"Large" or "small" in relation to these options for the right dataset always comes down to "small enough that all records in the dataset can fit into memory on each node."\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-11-02 21:18:52\" },\n\t{ \"post_id\": 8404, \"topic_id\": 1972, \"forum_id\": 8, \"post_subject\": \"JOIN Options\", \"username\": \"kgarrity\", \"post_text\": \"Scenario:\\nI want to JOIN two record sets \\nI have a LeftRecSet of 2 Billion Records. I have a RightRecSet of 32 records. I'd like to understand what are the best JOIN options to use as some of them seem to do the same thing.\\n\\n dResult := JOIN(\\n SORT(DISTRIBUTE(dHugeLeft,HASH(x)),x,LOCAL), // 2 billion records\\n SORT(DISTRIBUTE(dSmallRight,HASH(x)),x,LOCAL), // 32 records\\n LEFT.x = RIGHT.x,\\n TRANSFORM(LEFT),\\n LOOKUP,\\n SMART,\\n FEW,\\n ALL,\\n LOCAL\\n );\\n\\nIt seems that LOOKUP, SMART, FEW and ALL apply in this situation. Which ones should I use? Do some options supersede other options? Do some options make other options irrelevant?\\n\\nAdditionally, when using the SMART option do I still DISTRIBUTE the record sets and let the compiler decide whether or not to ignore it or is the DISTRIBUTE command not needed at all when using SMART?\\n\\nLastly, in JOIN how large is "large" when referring to the right record set?\\n\\nThanks,\\nKevin\", \"post_time\": \"2015-11-02 15:15:16\" },\n\t{ \"post_id\": 8418, \"topic_id\": 1974, \"forum_id\": 8, \"post_subject\": \"Re: how to get last year\", \"username\": \"rtaylor\", \"post_text\": \"elango_v,\\n\\nI found this function in the Date.ecl Standard Library file:/**\\n * Adjusts a date by incrementing or decrementing year, month and/or day values.\\n * The date must be in the Gregorian calendar after the year 1600.\\n * If the new calculated date is invalid then it will be normalized according\\n * to mktime() rules. Example: 20140130 + 1 month = 20140302.\\n *\\n * @param date The date to adjust.\\n * @param year_delta The requested change to the year value;\\n * optional, defaults to zero.\\n * @param month_delta The requested change to the month value;\\n * optional, defaults to zero.\\n * @param day_delta The requested change to the day of month value;\\n * optional, defaults to zero.\\n * @return The adjusted Date_t value.\\n */\\n\\nEXPORT Date_t AdjustDate(Date_t date,\\n INTEGER2 year_delta = 0,\\n INTEGER4 month_delta = 0,\\n INTEGER4 day_delta = 0) :=\\n TimeLib.AdjustDate(date, year_delta, month_delta, day_delta);\\n
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-11-03 22:03:55\" },\n\t{ \"post_id\": 8408, \"topic_id\": 1974, \"forum_id\": 8, \"post_subject\": \"Re: how to get last year\", \"username\": \"elango_v\", \"post_text\": \"I used the following code to get the last 13 month date. Please let me know any other better alternatives\\n\\ncurrent_date:=(integer)StringLib.GetDateYYYYMMDD();\\nlast_month2:=(integer)current_date[5..6]-1;\\nlast_month:=if(last_month2=0,12,last_month2);\\nlast_month;\\nlast_year2:=(integer)current_date[1..4]-1;\\nlast_year:=if(last_month2=0,last_year2-1,last_year2);\\nlast_year;\\nlastyear:=(string)last_year+(string)last_month+'01';\\nlastyear;\", \"post_time\": \"2015-11-02 16:19:55\" },\n\t{ \"post_id\": 8406, \"topic_id\": 1974, \"forum_id\": 8, \"post_subject\": \"how to get last year\", \"username\": \"elango_v\", \"post_text\": \"Hi all,\\n\\nI need to calculate the date that is 13 month previous to today's date. Day is always 01.\\nFor instance today's date is 2015/11/02. I would like to get the result as 2014/10/01. I could not find much date related predefined functions. Can you please check once?\\n\\nThanks,\\nElango\", \"post_time\": \"2015-11-02 16:07:27\" },\n\t{ \"post_id\": 8580, \"topic_id\": 1998, \"forum_id\": 8, \"post_subject\": \"Re: JSON equivalent of SOAPCALL?\", \"username\": \"anthony.fishbeck\", \"post_text\": \"But taking a step way back, HTTPCALL does not yet support JSON.\\n\\nThere is an open issue requesting this:\\n\\nhttps://track.hpccsystems.com/browse/HPCC-12196\\n\\nPlease feel free to comment to establish interest if nothing else.\", \"post_time\": \"2015-11-11 22:25:59\" },\n\t{ \"post_id\": 8578, \"topic_id\": 1998, \"forum_id\": 8, \"post_subject\": \"Re: JSON equivalent of SOAPCALL?\", \"username\": \"anthony.fishbeck\", \"post_text\": \"Btw, It's not ideal, but if you insert the username and password into the URL it will send basic auth without needing HTTPHEADER.\\n\\nhttp://username:password@ip:port/path\", \"post_time\": \"2015-11-11 22:13:58\" },\n\t{ \"post_id\": 8576, \"topic_id\": 1998, \"forum_id\": 8, \"post_subject\": \"Re: JSON equivalent of SOAPCALL?\", \"username\": \"anthony.fishbeck\", \"post_text\": \"To log what is being sent, try setting:\\n\\n\\n#option('soapTraceLevel', 10); \\n
\", \"post_time\": \"2015-11-11 22:10:10\" },\n\t{ \"post_id\": 8574, \"topic_id\": 1998, \"forum_id\": 8, \"post_subject\": \"Re: JSON equivalent of SOAPCALL?\", \"username\": \"JimD\", \"post_text\": \"The feature was introduced in 5.4.0 so it should work.\\n\\nThe lack of colorization is an issue, but not an indication of whether it should be supported.\\n\\nI will get someone with more experience than I have to to look at your code.\", \"post_time\": \"2015-11-11 21:54:45\" },\n\t{ \"post_id\": 8572, \"topic_id\": 1998, \"forum_id\": 8, \"post_subject\": \"Re: JSON equivalent of SOAPCALL?\", \"username\": \"drealeed\", \"post_text\": \"Doesn't seem to be working for me.\\n\\ndshttp:=HTTPCALL( 'https://api.github.com/repos/hpcc-systems/DSP/issues?page=1&per_page=200&state=all&access_token=f6db7718bd732939578fe652aa93387ada31c4b5',\\n'GET','application/json' , l_rawissue, XPATH('/'), HTTPHEADER('Authorization','Basic ZHJlYWxlZWQ6emFxMVpBUSE='),HTTPHEADER('ETag','f6db7718bd732939578fe652aa93387ada31c4b5'));\\n\\nAn auth error is still coming back. (the same url & params sent via a java hpccconnection call works.)\\n\\nI’m using ecl ide & eclcc 5.4.2 against an OSS hpcc cluster running 5.4.4-1. \\n\\n Is HTTPHeader supported in that version? I don’t see the option being highlighted in the ECL IDE as a keyword, like XPATH and HEADER are.\\n\\nAny way to see what headers are being sent? I did a grep across all the logs on the four-way for “api.github”, but the auth error was the only thing that came back.\", \"post_time\": \"2015-11-11 21:47:35\" },\n\t{ \"post_id\": 8568, \"topic_id\": 1998, \"forum_id\": 8, \"post_subject\": \"Re: JSON equivalent of SOAPCALL?\", \"username\": \"JimD\", \"post_text\": \"Good question! You can pass header info using this syntax:\\n\\nHTTPCALL(url, 'GET', 'application/json', respRec, httpheader('myheader1', 'value1'), httpheader('myheader2', 'myvalue2'));\\n\\nI have created a Jira issue to add this information to the docs. https://track.hpccsystems.com/browse/HPCC-14506\\n\\nHTH,\\nJim\", \"post_time\": \"2015-11-11 21:19:50\" },\n\t{ \"post_id\": 8566, \"topic_id\": 1998, \"forum_id\": 8, \"post_subject\": \"Re: JSON equivalent of SOAPCALL?\", \"username\": \"drealeed\", \"post_text\": \"Why, yes it does! Thanks. I don't see a way to pass header info (such as basic auth headers, etc) in the documentation. Do you know if that exists?\", \"post_time\": \"2015-11-11 21:02:16\" },\n\t{ \"post_id\": 8564, \"topic_id\": 1998, \"forum_id\": 8, \"post_subject\": \"Re: JSON equivalent of SOAPCALL?\", \"username\": \"JimD\", \"post_text\": \"Would HTTPCALL work ?\\n\\nhttps://hpccsystems.com/download/docume ... PCALL.html\\n\\nHTH,\\nJim\", \"post_time\": \"2015-11-11 20:54:32\" },\n\t{ \"post_id\": 8562, \"topic_id\": 1998, \"forum_id\": 8, \"post_subject\": \"JSON equivalent of SOAPCALL?\", \"username\": \"drealeed\", \"post_text\": \"I'm importing json files into ecl as logical files, and am able to read them in without a problem.\\n\\nWhat I'd really like to do is retrieve them from the original external json web service supplying them, from within HPCC, as one does with xml-based web services. Is there a json equivalent of the SOAPCALL command in the works at all?\", \"post_time\": \"2015-11-11 20:39:44\" },\n\t{ \"post_id\": 8596, \"topic_id\": 2008, \"forum_id\": 8, \"post_subject\": \"Automatically Log Function/Macro/Module Calls\", \"username\": \"Guoyes01\", \"post_text\": \"My team wants to automatically log the use of our team's repository function/macro/modules. E.g. when my ECL code calls a module written by a colleague in the repository, one record will be added to the log file recording my user id, time, WUID, etc. The ideal solution is to minimally modify existing repository files so that the users do not need any extra coding, but the log will be automatically updated.\\n\\nOur current attempt uses a module to update the log (see below). The module will load the current log file, add a new line, output the new log to a temp dir, delete the current log and rename the temp dir to the current dir. This module will be appended to our repository module/functions via a SUCCESS clause. \\n\\nThere are 3 issues with this attempt:\\n 1. the output step generates an output tab in ECL IDE. Is there a way to suppress/hide this output? We want the log to be updated in the background so that the user will not notice it.\\n 2. If a module has 100 EXPORT's, then the developer needs 100 SUCCESS's to generate the \\nlog for each export. Is there a way to force execute a line in the module so that the developer only needs to add 1 line of code, but the log will be executed whenever the module is called?\\n 3. Also, this attempt becomes clumsy as the size of the log file becomes large. Is there a better alternative solution? say using system log file? \\n \\n\\n// Module to update log\\nEXPORT HPCC_Proc_Log(string proc_name) := MODULE\\n EXPORT AddNewLog := SEQUENTIAL(\\n OUTPUT(DATASET([{proc_name,\\n STD.System.Job.User(),\\n STD.System.Job.WUID(),\\n lib_timelib.timelib.CurrentDate(TRUE),\\n lib_timelib.timelib.CurrentTime(TRUE), \\n lib_timelib.timelib.CurrentTimestamp(TRUE)}],Proc_Log_Rec) +\\n DATASET(LogDir,Proc_Log_Rec,THOR),,TempLogDir,THOR,OVERWRITE),\\n STD.File.DeleteLogicalFile(LogDir),\\n STD.File.RenameLogicalFile(TempLogDir,LogDir));\\n \\n EXPORT Proc_Log_Rec := RECORD\\n string procedure;\\n string user;\\n string wuid;\\n unsigned4 date;\\n unsigned4 time;\\n unsigned8 stamp;\\n END; \\nEND;\\n\\n// Log function calls\\nEXPORT string Test(string input) := FUNCTION\\n out := input+'_to_output' \\n // add one line in log file using module HPCC_Proc_Log\\n :SUCCESS(HPCC_Proc_Log('Test').AddNewLog);\\n RETURN out;\\nEND;\\n
\\n\\nThank you! \", \"post_time\": \"2015-11-16 17:22:01\" },\n\t{ \"post_id\": 8624, \"topic_id\": 2010, \"forum_id\": 8, \"post_subject\": \"Re: Automatically Log Function/Macro/Module Calls\", \"username\": \"Guoyes01\", \"post_text\": \"Thank you Jim. This is really helpful. I will talk to the admin.
\", \"post_time\": \"2015-11-17 16:13:12\" },\n\t{ \"post_id\": 8622, \"topic_id\": 2010, \"forum_id\": 8, \"post_subject\": \"Re: Automatically Log Function/Macro/Module Calls\", \"username\": \"JimD\", \"post_text\": \"The SSH credentials are outside of HPCC and are maintained by your system admin. Is there a system admin who can give you ssh credentials?\\n\\nIf your system is on a cloud, then you will need to make sure the ssh port (default is 22) is open and you will need to use the external IP, not the one listed in ECL Watch.\\n\\nHTH,\\nJim\", \"post_time\": \"2015-11-17 16:09:25\" },\n\t{ \"post_id\": 8620, \"topic_id\": 2010, \"forum_id\": 8, \"post_subject\": \"Re: Automatically Log Function/Macro/Module Calls\", \"username\": \"Guoyes01\", \"post_text\": \"I am sorry, my question was how to SSH into the ECL agent node? I tried to SSH with the ECL agent IP from ECL Watch and my normal login, but I am am getting "Invalid uername or password reported by server". Is there a special port number or do I need to request additional access?\", \"post_time\": \"2015-11-17 15:56:14\" },\n\t{ \"post_id\": 8616, \"topic_id\": 2010, \"forum_id\": 8, \"post_subject\": \"Re: Automatically Log Function/Macro/Module Calls\", \"username\": \"JimD\", \"post_text\": \"SSH to your ECL Agent node, then in a terminal window:\\n\\ntail /var/log/syslog\\n\\nYou can also use these commands to see the log files:\\n\\nless \\nmore \\ncat \\ngrep\\n\\nOr you can combine them:\\n\\ncat /var/log/syslog |grep "My Audit Message"\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2015-11-17 14:59:58\" },\n\t{ \"post_id\": 8612, \"topic_id\": 2010, \"forum_id\": 8, \"post_subject\": \"Re: Automatically Log Function/Macro/Module Calls\", \"username\": \"Guoyes01\", \"post_text\": \"Thanks Jim. How can I access this log file? I believe it resides on the hthor that compiles my code (ECL agent)?\", \"post_time\": \"2015-11-17 14:29:58\" },\n\t{ \"post_id\": 8610, \"topic_id\": 2010, \"forum_id\": 8, \"post_subject\": \"Re: Automatically Log Function/Macro/Module Calls\", \"username\": \"JimD\", \"post_text\": \"STD.Audit.Audit() writes to the system log (typically /var/log/syslog)\\n\\nYou can read about this function here:\\n\\nhttps://hpccsystems.com/download/docume ... Audit.html\\n\\nHTH,\\nJim\", \"post_time\": \"2015-11-17 13:57:28\" },\n\t{ \"post_id\": 8606, \"topic_id\": 2010, \"forum_id\": 8, \"post_subject\": \"Re: Automatically Log Function/Macro/Module Calls\", \"username\": \"Guoyes01\", \"post_text\": \"No, I was not aware of it. Can the audit.audit function output to a designated computer? We would much prefer to have all the log records written to a single common file so that it would be easier to run reports.\", \"post_time\": \"2015-11-16 21:09:50\" },\n\t{ \"post_id\": 8604, \"topic_id\": 2010, \"forum_id\": 8, \"post_subject\": \"Re: Automatically Log Function/Macro/Module Calls\", \"username\": \"rtaylor\", \"post_text\": \"Guoyes01,\\n\\nRegarding your issue #1, have you tried using the STD.Audit.Audit() function instead of OUTPUT?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-11-16 20:51:50\" },\n\t{ \"post_id\": 8602, \"topic_id\": 2010, \"forum_id\": 8, \"post_subject\": \"Automatically Log Function/Macro/Module Calls\", \"username\": \"Guoyes01\", \"post_text\": \"My team wants to monitor the use of our own repository function/macro/modules using a log file. For instance, when my ECL code calls a module written by another colleague in the repository, a record will be added to the log file recording my user id, time, WUID, etc. The ideal solution would be minimally modify existing repository files for automatic logging, and the users calling the functions need no extra coding and will not notice any difference in the output.\\n\\nOur current attempt uses a module to update the log (see below). The module reads the current log file, add a new record, OUTPUT the updated log to a temp logical dir, delete the current logical dir, rename the temp dir to the current dir. This is used with a SUCCESS clause inside the repository function/macro/modules to update the log.\\n\\nThere are 3 issues with this attempt\\n 1. The output step creates a result tab in ECL IDE. Is there a way to suppress this output so that the user would not notice the log being created in the background?\\n 2. If a module has 100 EXPORT elements, then the developer needs 100 SUCCESS calls to update the log. Is there a way to force execute one line in a module so that the developer only needs to add 1 line and the log will still be updated regardless of which EXPORT element is called.\\n 3. This attempt also becomes clumsy as the log file becomes large. Is there a better alternative to achieve the same functionality? like using the system log files?\\n\\n
\\n// Module to update log\\nEXPORT HPCC_Proc_Log(string proc_name) := MODULE\\t\\n EXPORT AddNewLog := SEQUENTIAL(\\n OUTPUT(DATASET([{proc_name, \\n STD.System.Job.User(),\\n STD.System.Job.WUID(); \\n lib_timelib.timelib.CurrentDate(TRUE),\\n lib_timelib.timelib.CurrentTime(TRUE), \\n lib_timelib.timelib.CurrentTimestamp(TRUE)}],Proc_Log_Rec) +\\n DATASET(LogDir,Proc_Log_Rec,THOR),,TempLogDir,THOR,OVERWRITE),\\n STD.File.DeleteLogicalFile(LogDir),\\n STD.File.RenameLogicalFile(TempLogDir,LogDir));\\n \\n EXPORT Proc_Log_Rec := RECORD\\n string procedure;\\n string user;\\n string wuid;\\n unsigned4 date;\\n unsigned4 time;\\n unsigned8 stamp;\\n END;\\nEND;\\n\\n// Function that needs to be monitored\\nEXPORT string Test(string input) := FUNCTION\\n out := input+'2output' \\n // Create a new log record\\n :SUCCESS(HPCC_Proc_Log('Test').AddNewLog);\\n RETURN out;\\nEND;\\n
\\n\\nThank you! \", \"post_time\": \"2015-11-16 19:19:56\" },\n\t{ \"post_id\": 8686, \"topic_id\": 2022, \"forum_id\": 8, \"post_subject\": \"Re: Word Combinations\", \"username\": \"tlhumphrey2\", \"post_text\": \"Chan,\\n\\nIt may pay you to take some of our free ECL classes (see here, https://learn.lexisnexis.com/lexisnexis/user_training.aspx?Track=141).\\n\\nTim\", \"post_time\": \"2015-11-25 14:29:34\" },\n\t{ \"post_id\": 8684, \"topic_id\": 2022, \"forum_id\": 8, \"post_subject\": \"Re: Word Combinations\", \"username\": \"tlhumphrey2\", \"post_text\": \"
IMPORT Std;\\nIMPORT * FROM Std;\\nrec := RECORD\\nSTRING w;\\nEND;\\ns:='w1 w2 w3 w4 w5 w6 w7 w8 w9 wA wB wC wD wE wF wG wH wI wJ wK wL wM wN wO wP wQ wR wS wT wU wV wW wX wY wZ';\\nw:=Std.Str.SplitWords(s,' ');\\nwDS:=PROJECT(DATASET([w],rec),TRANSFORM({UNSIGNED c, STRING w},SELF.c:=0,SELF.w:=LEFT.w));\\nOUTPUT(wDS,NAMED('wDS'));\\n\\nwDS_rec := RECORDOF(wDS);\\nwDS_rec catenate(wDS_rec L, wDS_rec R, UNSIGNED c) := TRANSFORM\\n SELF.w := IF(L.w='',R.w, L.w+' '+R.w);\\n\\t SELF.c := c;\\nEND;\\n\\nloopbody(DATASET(wDS_rec) ds, unsigned c) := FUNCTION\\n mx := IF(COUNT(wDS)-c = 4, COUNT(wDS)-c, 4);\\n\\tn := IF(mx < 4, mx, (mx-1));\\n newDS := ds + ITERATE(wDS[c..(c+n)],catenate(LEFT,RIGHT,c));\\nreturn newDS;\\nEND;\\n\\n\\noutDS:=LOOP(DATASET([],wDS_rec),COUNT(wDS),loopbody(ROWS(LEFT),COUNTER));\\nOUTPUT(outDS,NAMED('outDS'),ALL);\\n
\", \"post_time\": \"2015-11-25 14:01:43\" },\n\t{ \"post_id\": 8682, \"topic_id\": 2022, \"forum_id\": 8, \"post_subject\": \"Re: Word Combinations\", \"username\": \"chanbchen\", \"post_text\": \"I will try that as well. Thank you.\\n\\nJust one more question please - If I need to assign a unique counter for each combination in the loop example above, how could I achieve that?\\n\\nThanks\\nChan\", \"post_time\": \"2015-11-25 09:55:28\" },\n\t{ \"post_id\": 8672, \"topic_id\": 2022, \"forum_id\": 8, \"post_subject\": \"Re: Word Combinations\", \"username\": \"rtaylor\", \"post_text\": \"Chan,\\n\\nYou could probably also do this with PARSE. Try playing around with it and see. \\n\\nRichard\", \"post_time\": \"2015-11-24 17:24:19\" },\n\t{ \"post_id\": 8666, \"topic_id\": 2022, \"forum_id\": 8, \"post_subject\": \"Re: Word Combinations\", \"username\": \"chanbchen\", \"post_text\": \"That worked. Thank you!\\n\\nBut I have a question/concern though. Doesn't LOOP take a lot of memory?\\n\\nSay I have a dataset of million (or billion) records and each row in that dataset has a string that needs to be split the way I had mentioned. How do I achieve this in that case?\\n\\nJust out of curiosity - also wanted to check if an embedded C++ code would handle this better?\\n\\nThanks\\nChan\", \"post_time\": \"2015-11-24 07:59:15\" },\n\t{ \"post_id\": 8662, \"topic_id\": 2022, \"forum_id\": 8, \"post_subject\": \"Re: Word Combinations\", \"username\": \"tlhumphrey2\", \"post_text\": \"I missed up. Replace the ROLLUP with ITERATE and you get what you want. So your ITERATE would look like the following:\\n\\n
ITERATE(wDS[c..(c+n)],catenate(LEFT,RIGHT))
\", \"post_time\": \"2015-11-23 20:29:43\" },\n\t{ \"post_id\": 8658, \"topic_id\": 2022, \"forum_id\": 8, \"post_subject\": \"Re: Word Combinations\", \"username\": \"tlhumphrey2\", \"post_text\": \"IMPORT Std;\\nIMPORT * FROM Std;\\nrec := RECORD\\nSTRING w;\\nEND;\\ns:='w1 w2 w3 w4 w5 w6 w7 w8 w9 wA wB wC wD wE wF wG wH wI wJ wK wL wM wN wO wP';\\nw:=Std.Str.SplitWords(s,' ');\\nwDS:=PROJECT(DATASET([w],rec),TRANSFORM(rec,SELF.w:=REGEXREPLACE('[^\\\\\\\\w\\\\\\\\-]',LEFT.w,'')));\\nOUTPUT(wDS,NAMED('wDS'));\\n\\nwDS_rec := RECORDOF(wDS);\\nwDS_rec catenate(wDS_rec L, wDS_rec R) := TRANSFORM\\n SELF.w := IF(L.w='',R.w, L.w+' '+R.w);\\nEND;\\n\\nloopbody(DATASET(wDS_rec) ds, unsigned c) := FUNCTION\\n mx := IF(COUNT(wDS)-c = 4, COUNT(wDS)-c, 4);\\n\\tn := IF(mx < 4, mx, (mx-1));\\n newDS := ds + ROLLUP(wDS[c..(c+n)],1=1,catenate(LEFT,RIGHT));\\nreturn newDS;\\nEND;\\n\\n\\noutDS:=LOOP(DATASET([],wDS_rec),COUNT(wDS),loopbody(ROWS(LEFT),COUNTER));\\nOUTPUT(outDS,NAMED('outDS'),ALL);\\n\\n
\", \"post_time\": \"2015-11-23 19:58:12\" },\n\t{ \"post_id\": 8656, \"topic_id\": 2022, \"forum_id\": 8, \"post_subject\": \"Word Combinations\", \"username\": \"chanbchen\", \"post_text\": \"Hi,\\n\\nSay I have this layout:\\n\\nWordsLayout := RECORD\\nSTRING Line;\\nDATASET({STRING Words;}) Combinations;\\nEND;\\n\\nSay I have a record of the above layout with value for the field "Line" set as this string - 'W1 W2 W3 W4 W5 W6 W7 W8 W9 W10 W11 W12 W13 W14 W15'.\\n\\nI would like to generate the following combinations of the words in the above line and set it to the child dataset field "Combinations".\\n\\n\\nW1\\nW1 W2\\nW1 W2 W3\\nW1 W2 W3 W4\\nW2 \\nW2 W3\\nW2 W3 W4\\nW2 W3 W4 W5\\nW3\\nW3 W4\\nW3 W4 W5\\nW3 W4 W5 W6\\n...\\n...\\nW14\\nW14 W15\\nW15\\n\\nStarting with each word in the line, I would like to generate strings with 1 word, 2 words, 3 words and 4 words combinations.\\n\\nI would like to do this irrespective of the number of words in the input line.\\n\\nPlease help me with an optimal approach to achieve this.\\n\\nThanks\", \"post_time\": \"2015-11-23 15:36:27\" },\n\t{ \"post_id\": 8704, \"topic_id\": 2028, \"forum_id\": 8, \"post_subject\": \"Re: #IF( condition )\", \"username\": \"rtaylor\", \"post_text\": \"Error: syntax error near ")" : expected datarow, identifier, pattern-name, action, pattern (36, 46 - Bair.Orbit_Module_Builts)\\n\\nError: Action side effect is not associated with a definition (20, 49 - Bair.Orbit_Module_Builts)
The first error is on line 36 of the Bair.Orbit_Module_Builts code file, so you need to look at that code to see what it's doing. The error message indicates that you should be passing something and you're not.\\n\\nThe second error tells me you have a "side-effect" action, and the rules for those have changed between 702 and OSS. Look at the FUNCTION docs.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-11-25 23:51:24\" },\n\t{ \"post_id\": 8702, \"topic_id\": 2028, \"forum_id\": 8, \"post_subject\": \"Re: #IF( condition )\", \"username\": \"BarrOs01\", \"post_text\": \" \\t\\t\\t\\tBair.Files().Orbit_Agencies t2(Bair.Layouts.Orbit_Agencies_Layout L) := TRANSFORM\\n \\t\\t\\t\\t\\tProcess_Agency(L.AgencyName);\\n \\t\\t\\t\\tEND;\\n \\t\\t\\t\\tOrbit_Build_All := PROJECT(Bair.Files().Orbit_Agencies, t2(LEFT));
\\n\\nOn the code above I'm getting the following errors:\\n\\nError: syntax error near ")" : expected datarow, identifier, pattern-name, action, pattern (36, 46 - Bair.Orbit_Module_Builts)\\n\\nError: Action side effect is not associated with a definition (20, 49 - Bair.Orbit_Module_Builts)\\n\\n I have tried to convert the function into a module and vice-versa. What would be the right way to define the module or function and how should I call it from the transform structure ??\", \"post_time\": \"2015-11-25 19:39:40\" },\n\t{ \"post_id\": 8696, \"topic_id\": 2028, \"forum_id\": 8, \"post_subject\": \"Re: #IF( condition )\", \"username\": \"rtaylor\", \"post_text\": \"BarrOs01, for each agency I need to execute a function (Process_Agency) that spray inputs, build logical files, create keys and deploy.
I would start by splitting out all those actions from a single function into separate ones. ECL really is best done with "K.I.S.S." programming. \\n\\nI would approach this by starting with the APPLY() action to spray the files. I would also look at having the spray workunit finish with the NOTIFY() action to kick off another workunit that's waiting for the files to be successfully sprayed so it can then do the work of building files and keys. That could then NOTIFY another workunit to do your deploy. I've written example code for this type of process before, and that code is now in production for fully automated spray/process jobs. I'd give you the code, but I'm on vacation this week and on my personal laptop, not my work machine.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-11-25 18:46:00\" },\n\t{ \"post_id\": 8692, \"topic_id\": 2028, \"forum_id\": 8, \"post_subject\": \"Re: #IF( condition )\", \"username\": \"BarrOs01\", \"post_text\": \"I want to loop a list of agencies and for each agency I need to execute a function (Process_Agency) that spray inputs, build logical files, create keys and deploy. This function has many outputs and I believe is having a conflict when I use a transform structure.\\n\\nBy using a transform structure I get different errors, I was trying to implement an alternative of the transform in order to loop records and execute a function for each agency.\", \"post_time\": \"2015-11-25 17:39:02\" },\n\t{ \"post_id\": 8690, \"topic_id\": 2028, \"forum_id\": 8, \"post_subject\": \"Re: #IF( condition )\", \"username\": \"rtaylor\", \"post_text\": \"BarrOs01,\\n\\nYou're using ECL's Template Language, which is a code generation tool. As such, it is a procedural language designed to parse input XML and generate ECL code for you, but it is absolutely not a code execution tool. Wrong tool, wrong job.\\n\\nSo the real question then becomes, what are you actually trying to accomplish?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-11-25 17:07:46\" },\n\t{ \"post_id\": 8680, \"topic_id\": 2028, \"forum_id\": 8, \"post_subject\": \"#IF( condition )\", \"username\": \"BarrOs01\", \"post_text\": \"I'm trying to create a conditional loop using a variable, but it seems I'm limited to use constants in the conditional expression.\\n\\n\\n#DECLARE (SetString);\\n #DECLARE (Ndx);\\n #SET (Ndx, 1);\\n #LOOP\\n #IF (%Ndx% > 9) // Here is where I need a variable instead of 9\\n #BREAK\\n #ELSE\\n DO_SOMETHING;\\n #SET (Ndx, %Ndx% + 1);\\n #END\\n #END\\n\\nIs there any work around for this ?\", \"post_time\": \"2015-11-24 22:07:48\" },\n\t{ \"post_id\": 8770, \"topic_id\": 2040, \"forum_id\": 8, \"post_subject\": \"Re: Finding Records Containing Any of a Set of Words\", \"username\": \"rtaylor\", \"post_text\": \"Changing the case of the bad words in the set does NOT make the function case insensitive, it only changes the comparison to a different specific case. Here's how to make it truly case insensitive:
SetBadWords := ['JUNK', 'GARBAGE', 'CRUD'];\\nIsBadWordPresent(STRING Legal, STRING DBA, SET OF STRING SetBadWords) := FUNCTION\\n ds := DATASET(STD.STR.SplitWords(Legal,' ') + STD.STR.SplitWords(DBA,' '),{STRING word});\\n Proj := PROJECT(ds,\\n TRANSFORM({BOOLEAN Fnd},\\n SELF.Fnd := STD.STR.ToUpperCase(LEFT.word) IN SetBadWords));\\n RETURN EXISTS(Proj(Fnd=TRUE)); \\nEND;
If this version doesn't filter out any records, then I would suggest that there are most likely no records with the words 'JUNK', 'GARBAGE', or 'CRUD' in them in the dataset you're using.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-12-08 20:16:28\" },\n\t{ \"post_id\": 8768, \"topic_id\": 2040, \"forum_id\": 8, \"post_subject\": \"Re: Finding Records Containing Any of a Set of Words\", \"username\": \"linlux\", \"post_text\": \"This works thank you bforeman!\\n\\nimport std;\\n\\nrecord_layout := RECORD\\n string LEGAL_NAME; \\n string DBA_NAME; \\n string STREET_ADDRESS;\\n string CITY; \\n string STATE; \\n string ZIP; \\n string Services_Provided;\\n string STATE_CODES_WHERE_SERVICES_OFFERED; \\n string NUMBER_OF_BRANCHES; \\n string Auth_Sign_Date; \\n string Received_Date;\\nEND;\\n\\nd := DATASET('~thor400::in::fincen_money_services_businesses', record_layout, csv(quote('"'), heading(single)) )(legal_name<>'' or dba_name<>'');\\n\\nOUTPUT(d);\\n\\nrecord_layout2 := record\\n string LEGAL_NAME; \\n string DBA_NAME; \\n string STREET_ADDRESS;\\n string CITY; \\n string STATE; \\n string ZIP; \\n string Services_Provided;\\n string STATE_CODES_WHERE_SERVICES_OFFERED; \\n string NUMBER_OF_BRANCHES; \\n string Received_Date;\\n boolean issuer_of_travelers_checks;\\n boolean seller_of_travelers_checks;\\n boolean redeemer_of_travelers_checks;\\n boolean issuer_of_money_orders;\\n boolean seller_of_money_orders;\\n boolean redeemer_of_money_orders;\\n boolean currency_dealer_or_exchanger;\\n boolean check_casher;\\n boolean money_transmitter;\\nend;\\n\\np := project(d, transform(record_layout2,\\n self.issuer_of_travelers_checks := std.str.find(left.Services_Provided, 'A', 1) > 0;\\n self.seller_of_travelers_checks := std.str.find(left.Services_Provided, 'B', 1) > 0;\\n self.redeemer_of_travelers_checks := std.str.find(left.Services_Provided, 'C', 1) > 0;\\n self.issuer_of_money_orders := std.str.find(left.Services_Provided, 'D', 1) > 0;\\n self.seller_of_money_orders := std.str.find(left.Services_Provided, 'E', 1) > 0;\\n self.redeemer_of_money_orders := std.str.find(left.Services_Provided, 'F', 1) > 0;\\n self.currency_dealer_or_exchanger := std.str.find(left.Services_Provided, 'G', 1) > 0;\\n self.check_casher := std.str.find(left.Services_Provided, 'H', 1) > 0;\\n self.money_transmitter := std.str.find(left.Services_Provided, 'I', 1) > 0;\\n self := left));\\n \\n//output(enth(p, 100), named('sample_FINCEN_MSB_listings')); // sample of 100 random records\\n\\nSetBadWords := ['Junk', 'Garbage', 'Crud'];\\nIsBadWordPresent(STRING Legal, STRING DBA, SET OF STRING SetBadWords) := FUNCTION\\n ds := DATASET(STD.STR.SplitWords(Legal,' ') + STD.STR.SplitWords(DBA,' '),{STRING word});\\n Proj := PROJECT(ds,TRANSFORM({BOOLEAN Fnd},SELF.Fnd := LEFT.word IN SetBadWords));\\n RETURN EXISTS(Proj(Fnd=TRUE)); \\nEND;\\n\\nOUTPUT(d(IsBadWordPresent(LEGAL_NAME,DBA_NAME,SetBadWords)));\\n\\n
\", \"post_time\": \"2015-12-08 20:00:36\" },\n\t{ \"post_id\": 8766, \"topic_id\": 2040, \"forum_id\": 8, \"post_subject\": \"Re: Finding Records Containing Any of a Set of Words\", \"username\": \"bforeman\", \"post_text\": \"In the code you are posting, you are missing a crucial piece:\\n\\nOUTPUT(d(IsBadWordPresent(LEGAL_NAME,DBA_NAME,SetBadWords)));
\\n\\nIn my testing the function works perfectly and when I tested with a sample inline dataset, only the records with 'Crud', 'Garbage', or 'Junk was returned.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-12-08 19:39:20\" },\n\t{ \"post_id\": 8764, \"topic_id\": 2040, \"forum_id\": 8, \"post_subject\": \"Re: Finding Records Containing Any of a Set of Words\", \"username\": \"linlux\", \"post_text\": \"It returns the entire list when case of words within 'SetBadWords' is changed.\\n\\nHow can this example[1] output just the records containing a set of words? (I've tried following the 'Records Containing Any of a Set of Words' example from the ECL Programmers Guide to no avail)\\n\\n[1] https://hpccsystems.com/sites/default/f ... s-List.ecl\\n\\nThis code returns all records:\\nimport std;\\n\\nrecord_layout := RECORD\\n string LEGAL_NAME; \\n string DBA_NAME; \\n string STREET_ADDRESS;\\n string CITY; \\n string STATE; \\n string ZIP; \\n string Services_Provided;\\n string STATE_CODES_WHERE_SERVICES_OFFERED; \\n string NUMBER_OF_BRANCHES; \\n string Auth_Sign_Date; \\n string Received_Date;\\nEND;\\n\\nd := DATASET('~thor400::in::fincen_money_services_businesses', record_layout, csv(quote('"'), heading(single)) )(legal_name<>'' or dba_name<>'');\\n\\nOUTPUT(d);\\n\\nrecord_layout2 := record\\n string LEGAL_NAME; \\n string DBA_NAME; \\n string STREET_ADDRESS;\\n string CITY; \\n string STATE; \\n string ZIP; \\n string Services_Provided;\\n string STATE_CODES_WHERE_SERVICES_OFFERED; \\n string NUMBER_OF_BRANCHES; \\n string Received_Date;\\n boolean issuer_of_travelers_checks;\\n boolean seller_of_travelers_checks;\\n boolean redeemer_of_travelers_checks;\\n boolean issuer_of_money_orders;\\n boolean seller_of_money_orders;\\n boolean redeemer_of_money_orders;\\n boolean currency_dealer_or_exchanger;\\n boolean check_casher;\\n boolean money_transmitter;\\nend;\\n\\np := project(d, transform(record_layout2,\\n self.issuer_of_travelers_checks := std.str.find(left.Services_Provided, 'A', 1) > 0;\\n self.seller_of_travelers_checks := std.str.find(left.Services_Provided, 'B', 1) > 0;\\n self.redeemer_of_travelers_checks := std.str.find(left.Services_Provided, 'C', 1) > 0;\\n self.issuer_of_money_orders := std.str.find(left.Services_Provided, 'D', 1) > 0;\\n self.seller_of_money_orders := std.str.find(left.Services_Provided, 'E', 1) > 0;\\n self.redeemer_of_money_orders := std.str.find(left.Services_Provided, 'F', 1) > 0;\\n self.currency_dealer_or_exchanger := std.str.find(left.Services_Provided, 'G', 1) > 0;\\n self.check_casher := std.str.find(left.Services_Provided, 'H', 1) > 0;\\n self.money_transmitter := std.str.find(left.Services_Provided, 'I', 1) > 0;\\n self := left));\\n \\n//output(enth(p, 100), named('sample_FINCEN_MSB_listings')); // sample of 100 random records\\n\\nSetBadWords := ['Junk', 'Garbage', 'Crud'];\\nIsBadWordPresent(STRING Legal, STRING DBA, SET OF STRING SetBadWords) := FUNCTION\\n ds := DATASET(STD.STR.SplitWords(Legal,' ') + STD.STR.SplitWords(DBA,' '),{STRING word});\\n Proj := PROJECT(ds,TRANSFORM({BOOLEAN Fnd},SELF.Fnd := LEFT.word IN SetBadWords));\\n RETURN EXISTS(Proj(Fnd=TRUE)); \\nEND;
\", \"post_time\": \"2015-12-08 17:17:34\" },\n\t{ \"post_id\": 8762, \"topic_id\": 2040, \"forum_id\": 8, \"post_subject\": \"Re: Finding Records Containing Any of a Set of Words\", \"username\": \"rtaylor\", \"post_text\": \"Probably a case sensitivity issue. My example function is case sensitive, so change it to be case insensitive and run it again -- you will probably see a different result.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-12-08 16:17:17\" },\n\t{ \"post_id\": 8760, \"topic_id\": 2040, \"forum_id\": 8, \"post_subject\": \"Re: Finding Records Containing Any of a Set of Words\", \"username\": \"linlux\", \"post_text\": \"How is the recordset containing bad words be written to an output? \\n\\nThis code did not output bad word records:\\nBadRecs := OUTPUT(d(IsBadWordPresent(LEGAL_NAME,DBA_NAME,SetBadWords)));
\\n\\nCurrently the output lists all 37652 records\\n\", \"post_time\": \"2015-12-08 02:20:49\" },\n\t{ \"post_id\": 8758, \"topic_id\": 2040, \"forum_id\": 8, \"post_subject\": \"Re: Finding Records Containing Any of a Set of Words\", \"username\": \"rtaylor\", \"post_text\": \"linlux ,\\n\\nYou have two issues, both corrected in this code:
IsBadWordPresent(STRING Legal, STRING DBA, SET OF STRING SetBadWords) := FUNCTION\\n ds := DATASET(STD.STR.SplitWords(Legal,' ') + STD.STR.SplitWords(DBA,' '),{STRING word});\\n Proj := PROJECT(ds,TRANSFORM({BOOLEAN Fnd},SELF.Fnd := LEFT.word IN SetBadWords));\\n RETURN EXISTS(Proj(Fnd=TRUE)); \\nEND;
The first was the comma that should have been a period (SELF.Fnd) and the second was the attempted re-definition of "P" -- you already used "P" as the name of your project, so that name cannot be re-used inside the FUNCTION (still within the scope of visibility of its first use).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-12-07 21:43:53\" },\n\t{ \"post_id\": 8756, \"topic_id\": 2040, \"forum_id\": 8, \"post_subject\": \"Re: Finding Records Containing Any of a Set of Words\", \"username\": \"linlux\", \"post_text\": \"Why does this code produce errors? \\n\\nThis forum post is taking the FINCEN-Money-Services-List.ecl HPCC example and outputting records containing a set of words in LEGAL_NAME and DBA_NAME similar to what was done on p86 of ECLProgrammersGuide-5.4.4-1.pdf\\n\\nimport std;\\n\\nrecord_layout := RECORD\\n string LEGAL_NAME; \\n string DBA_NAME; \\n string STREET_ADDRESS;\\n string CITY; \\n string STATE; \\n string ZIP; \\n string Services_Provided;\\n string STATE_CODES_WHERE_SERVICES_OFFERED; \\n string NUMBER_OF_BRANCHES; \\n string Auth_Sign_Date; \\n string Received_Date;\\nEND;\\n\\nd := DATASET('~thor400::in::fincen_money_services_businesses', record_layout, csv(quote('"'), heading(single)) )(legal_name<>'' or dba_name<>'');\\n\\nOUTPUT(d);\\n\\nrecord_layout2 := record\\n string LEGAL_NAME; \\n string DBA_NAME; \\n string STREET_ADDRESS;\\n string CITY; \\n string STATE; \\n string ZIP; \\n string Services_Provided;\\n string STATE_CODES_WHERE_SERVICES_OFFERED; \\n string NUMBER_OF_BRANCHES; \\n string Received_Date;\\n boolean issuer_of_travelers_checks;\\n boolean seller_of_travelers_checks;\\n boolean redeemer_of_travelers_checks;\\n boolean issuer_of_money_orders;\\n boolean seller_of_money_orders;\\n boolean redeemer_of_money_orders;\\n boolean currency_dealer_or_exchanger;\\n boolean check_casher;\\n boolean money_transmitter;\\nend;\\n\\np := project(d, transform(record_layout2,\\n self.issuer_of_travelers_checks := std.str.find(left.Services_Provided, 'A', 1) > 0;\\n self.seller_of_travelers_checks := std.str.find(left.Services_Provided, 'B', 1) > 0;\\n self.redeemer_of_travelers_checks := std.str.find(left.Services_Provided, 'C', 1) > 0;\\n self.issuer_of_money_orders := std.str.find(left.Services_Provided, 'D', 1) > 0;\\n self.seller_of_money_orders := std.str.find(left.Services_Provided, 'E', 1) > 0;\\n self.redeemer_of_money_orders := std.str.find(left.Services_Provided, 'F', 1) > 0;\\n self.currency_dealer_or_exchanger := std.str.find(left.Services_Provided, 'G', 1) > 0;\\n self.check_casher := std.str.find(left.Services_Provided, 'H', 1) > 0;\\n self.money_transmitter := std.str.find(left.Services_Provided, 'I', 1) > 0;\\n self := left));\\n \\noutput(enth(p, 100), named('sample_FINCEN_MSB_listings')); // sample of 100 random records\\n\\nSetBadWords := ['JUNK', 'GARBAGE', 'CRUD'];\\nIsBadWordPresent(STRING Legal, STRING DBA, SET OF STRING SetBadWords) := FUNCTION\\n ds := DATASET(STD.STR.SplitWords(Legal,' ') + STD.STR.SplitWords(DBA,' '),{STRING word});\\n P := PROJECT(ds,TRANSFORM({BOOLEAN Fnd},SELF,Fnd := word IN SetBadWords));\\n RETURN EXISTS(P(Fnd=TRUE)); \\nEND;\\n\\nBadRecs := OUTPUT(d(IsBadWordPresent(LEGAL_NAME,DBA_NAME,SetBadWords)));
\\n\\nSeverity Source Code Message Col Line\\nError\\teclcc\\t3002\\tsyntax error near ":=" : expected ';'\\t6\\t60\\tstdin:\\nError\\teclcc\\t2167\\tUnknown identifier "Fnd"\\t19\\t61\\tstdin:\\nError\\teclcc\\t3002\\tsyntax error near "END" : \\t1\\t62\\tstdin:\\nError\\teclcc\\t2167\\tUnknown identifier "IsBadWordPresent"\\t21\\t64\\tstdin:
\", \"post_time\": \"2015-12-07 21:15:10\" },\n\t{ \"post_id\": 8748, \"topic_id\": 2040, \"forum_id\": 8, \"post_subject\": \"Re: Finding Records Containing Any of a Set of Words\", \"username\": \"JimD\", \"post_text\": \"There is also a superfluous single quote after the semi-colon in the \\nSetBadWords definition.\\n\\nJim\", \"post_time\": \"2015-12-07 20:07:00\" },\n\t{ \"post_id\": 8744, \"topic_id\": 2040, \"forum_id\": 8, \"post_subject\": \"Re: Finding Records Containing Any of a Set of Words\", \"username\": \"bforeman\", \"post_text\": \"Inside the function you are missing the semicolon at the end of the DATASET statement.\", \"post_time\": \"2015-12-07 19:18:24\" },\n\t{ \"post_id\": 8740, \"topic_id\": 2040, \"forum_id\": 8, \"post_subject\": \"Re: Finding Records Containing Any of a Set of Words\", \"username\": \"linlux\", \"post_text\": \"Why does this code:\\nimport std;\\n\\nrecord_layout := RECORD\\n string LEGAL_NAME; \\n string DBA_NAME; \\n string STREET_ADDRESS;\\n string CITY; \\n string STATE; \\n string ZIP; \\n string Services_Provided;\\n string STATE_CODES_WHERE_SERVICES_OFFERED; \\n string NUMBER_OF_BRANCHES; \\n string Auth_Sign_Date; \\n string Received_Date;\\nEND;\\n\\nd := DATASET('~thor400::in::fincen_money_services_businesses', record_layout, csv(quote('"'), heading(single)) )(legal_name<>'' or dba_name<>'');\\n\\nOUTPUT(d);\\n\\nrecord_layout2 := record\\n string LEGAL_NAME; \\n string DBA_NAME; \\n string STREET_ADDRESS;\\n string CITY; \\n string STATE; \\n string ZIP; \\n string Services_Provided;\\n string STATE_CODES_WHERE_SERVICES_OFFERED; \\n string NUMBER_OF_BRANCHES; \\n string Received_Date;\\n boolean issuer_of_travelers_checks;\\n boolean seller_of_travelers_checks;\\n boolean redeemer_of_travelers_checks;\\n boolean issuer_of_money_orders;\\n boolean seller_of_money_orders;\\n boolean redeemer_of_money_orders;\\n boolean currency_dealer_or_exchanger;\\n boolean check_casher;\\n boolean money_transmitter;\\nend;\\n\\np := project(d, transform(record_layout2,\\n self.issuer_of_travelers_checks := std.str.find(left.Services_Provided, 'A', 1) > 0;\\n self.seller_of_travelers_checks := std.str.find(left.Services_Provided, 'B', 1) > 0;\\n self.redeemer_of_travelers_checks := std.str.find(left.Services_Provided, 'C', 1) > 0;\\n self.issuer_of_money_orders := std.str.find(left.Services_Provided, 'D', 1) > 0;\\n self.seller_of_money_orders := std.str.find(left.Services_Provided, 'E', 1) > 0;\\n self.redeemer_of_money_orders := std.str.find(left.Services_Provided, 'F', 1) > 0;\\n self.currency_dealer_or_exchanger := std.str.find(left.Services_Provided, 'G', 1) > 0;\\n self.check_casher := std.str.find(left.Services_Provided, 'H', 1) > 0;\\n self.money_transmitter := std.str.find(left.Services_Provided, 'I', 1) > 0;\\n self := left));\\n \\noutput(enth(p, 100), named('sample_FINCEN_MSB_listings')); // sample of 100 random records\\n\\nSetBadWords := ['JUNK', 'GARBAGE', 'CRUD'];'\\nIsBadWordPresent(STRING Legal, STRING DBA, SET OF STRING SetBadWords) := FUNCTION\\n ds := DATASET(STD.STR.SplitWords(Legal,' ') + STD.STR.SplitWords(DBA,' '),{STRING word})\\n P := PROJECT(ds,TRANSFORM({BOOLEAN Fnd},SELF,Fnd := word IN SetBadWords));\\n RETURN EXISTS(P(Fnd=TRUE)); \\nEND;\\n\\nBadRecs := OUTPUT(d(IsBadWordPresent(LEGAL_NAME,DBA_NAME,SetBadWords)));
\\n\\nReturn this Error Message?\\nSeverity\\tSource\\tCode\\tMessage\\tCol\\tLine\\nError\\teclcc\\t2195\\tString constant is not terminated: "'"\\t44\\t57\\tstdin:\\nError\\teclcc\\t2167\\tUnknown identifier "IsBadWordPresent"\\t1\\t58\\tstdin:\\nError\\teclcc\\t2167\\tUnknown identifier "Fnd"\\t19\\t61\\tstdin:\\nError\\teclcc\\t3002\\tsyntax error near "END"\\t1\\t62\\tstdin:\\nError\\teclcc\\t2167\\tUnknown identifier "IsBadWordPresent"\\t21\\t64\\tstdin:
\", \"post_time\": \"2015-12-07 16:14:52\" },\n\t{ \"post_id\": 8736, \"topic_id\": 2040, \"forum_id\": 8, \"post_subject\": \"Re: Finding Records Containing Any of a Set of Words\", \"username\": \"bforeman\", \"post_text\": \"Here is one way that you might approach this:\\n\\nIsBadWordPresent(STRING Legal, STRING DBA, SET OF STRING SetBadWords) := FUNCTION\\n ds := DATASET(STD.STR.SplitWords(Legal,' ') + STD.STR.SplitWords(DBA,' '),{STRING word});\\n P := PROJECT(ds,TRANSFORM({BOOLEAN Fnd},SELF,Fnd := word IN SetBadWords));\\n RETURN EXISTS(P(Fnd=TRUE)); \\nEND;\\n\\nBadRecs := OUTPUT(d(IsBadWordPresent(LEGAL_NAME,DBA_NAME,SetBadWords)));\\n
\\n\\nThanks to Richard Taylor who helped to expand my initial idea.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-12-07 15:23:11\" },\n\t{ \"post_id\": 8730, \"topic_id\": 2040, \"forum_id\": 8, \"post_subject\": \"Finding Records Containing Any of a Set of Words\", \"username\": \"linlux\", \"post_text\": \"How would you use Std.Str.Find() to find a set of three words ('junk', 'garbage' or 'crud') in the 'LEGAL_NAME' or 'DBA_NAME' datasets and output any listing containing these words?\\n\\n\\n\\nimport std;\\n\\nrecord_layout := RECORD\\n\\tstring LEGAL_NAME; \\n\\tstring DBA_NAME; \\n\\tstring STREET_ADDRESS;\\n\\tstring CITY; \\n\\tstring STATE; \\n\\tstring ZIP; \\n\\tstring Services_Provided;\\n\\tstring STATE_CODES_WHERE_SERVICES_OFFERED; \\n\\tstring NUMBER_OF_BRANCHES; \\n\\tstring Auth_Sign_Date; \\n\\tstring Received_Date;\\nEND;\\n\\nd := DATASET('~thor400::in::fincen_money_services_businesses', record_layout, csv(quote('"'), heading(single)) )(legal_name<>'' or dba_name<>'');\\n\\nOUTPUT(d);\\n\\nrecord_layout2 := record\\n\\tstring LEGAL_NAME; \\n\\tstring DBA_NAME; \\n\\tstring STREET_ADDRESS;\\n\\tstring CITY; \\n\\tstring STATE; \\n\\tstring ZIP; \\n\\tstring Services_Provided;\\n\\tstring STATE_CODES_WHERE_SERVICES_OFFERED; \\n\\tstring NUMBER_OF_BRANCHES; \\n\\tstring Received_Date;\\n\\tboolean issuer_of_travelers_checks;\\n\\tboolean seller_of_travelers_checks;\\n\\tboolean redeemer_of_travelers_checks;\\n\\tboolean issuer_of_money_orders;\\n\\tboolean seller_of_money_orders;\\n\\tboolean redeemer_of_money_orders;\\n\\tboolean currency_dealer_or_exchanger;\\n\\tboolean check_casher;\\n\\tboolean money_transmitter;\\nend;\\n\\np := project(d, transform(record_layout2,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tself.issuer_of_travelers_checks := std.str.find(left.Services_Provided, 'A', 1) > 0;\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tself.seller_of_travelers_checks := std.str.find(left.Services_Provided, 'B', 1) > 0;\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tself.redeemer_of_travelers_checks := std.str.find(left.Services_Provided, 'C', 1) > 0;\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tself.issuer_of_money_orders := std.str.find(left.Services_Provided, 'D', 1) > 0;\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tself.seller_of_money_orders := std.str.find(left.Services_Provided, 'E', 1) > 0;\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tself.redeemer_of_money_orders := std.str.find(left.Services_Provided, 'F', 1) > 0;\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tself.currency_dealer_or_exchanger := std.str.find(left.Services_Provided, 'G', 1) > 0;\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tself.check_casher := std.str.find(left.Services_Provided, 'H', 1) > 0;\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tself.money_transmitter := std.str.find(left.Services_Provided, 'I', 1) > 0;\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tself := left));\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\noutput(enth(p, 100), named('sample_FINCEN_MSB_listings')); // sample of 100 random records\\n\\nSetBadWords := ['JUNK', 'GARBAGE', 'CRUD'];'
\", \"post_time\": \"2015-12-06 21:52:55\" },\n\t{ \"post_id\": 8754, \"topic_id\": 2044, \"forum_id\": 8, \"post_subject\": \"Re: Search multiple files with non-fpos payloaded indices\", \"username\": \"rtaylor\", \"post_text\": \"Vic,\\n\\nSince all your INDEXes have the same structure, putting them all into a superkey is precisely what you need to do -- that's what superkeys are for. A superkey will give your code a single INDEX to search, letting the middleware handle all the intricacies of working with all the separate INDEX files.\\n\\nI also note the use of QSTRING in your payload RECORD structure, which is NOT a good idea. INDEXes are automatically LZW compressed, and a QSTRING field would appear to be binary data to a compression algorithm. That means you are defeating the purpose of the LZW compression. Change that field to STRING and you will end up with better overall data compression in the INDEX.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-12-07 21:10:10\" },\n\t{ \"post_id\": 8750, \"topic_id\": 2044, \"forum_id\": 8, \"post_subject\": \"Search multiple files with non-fpos payloaded indices\", \"username\": \"kovacsbv\", \"post_text\": \"We use norm_ref as our index into some files, instead of fpos. What I am looking for is a way of taking the search query indices and searching them all at once without having to make up multiple INDEX() calls and then merge them. This is going to go into a published query and I don't want to create a new set of index commands each time.\\n\\nI could make a superkey, but that doesn't seem very elegant, and doesn't seem right within a published query.\\n\\nTIA,\\n\\nVic\\n\\n\\n// IMPORT\\n\\nSearchUID := [\\n\\t '197009369',\\n\\t'1058331805',\\n\\t '956379468',\\n\\t'2411500661'\\n];\\n\\nSearchQueryIndices := [\\n\\t'indexing::query::uid::uid__2014-01__mlk',\\n\\t'indexing::query::uid::uid__2014-02__mlk',\\n\\t'indexing::query::uid::uid__2014-03__mlk',\\n\\t'indexing::query::uid::uid__2014-04__mlk',\\n\\t'indexing::query::uid::uid__2014-05__mlk',\\n\\t'indexing::query::uid::uid__2014-06__mlk',\\n\\t'indexing::query::uid::uid__2014-07__mlk',\\n\\t'indexing::query::uid::uid__2014-08__mlk',\\n\\t'indexing::query::uid::uid__2014-09__mlk',\\n\\t'indexing::query::uid::uid__2014-10__mlk',\\n\\t'indexing::query::uid::uid__2014-11__mlk',\\n\\t'indexing::query::uid::uid__2014-12__mlk',\\n\\t'indexing::query::uid::uid__2015-01__mlk',\\n\\t'indexing::query::uid::uid__2015-02__mlk',\\n\\t'indexing::query::uid::uid__2015-03__mlk',\\n\\t'indexing::query::uid::uid__2015-04__mlk',\\n\\t'indexing::query::uid::uid__2015-05__mlk',\\n\\t'indexing::query::uid::uid__2015-06__mlk',\\n\\t'indexing::query::uid::uid__2015-07__mlk',\\n\\t'indexing::query::uid::uid__2015-08__mlk',\\n\\t'indexing::query::uid::uid__2015-09__mlk',\\n\\t'indexing::query::uid::uid__2015-10__mlk',\\n\\t'indexing::query::uid::uid__2015-11__mlk',\\n\\t'indexing::query::uid::uid__2015-12__mlk'\\n];\\n\\nIndexPayloadRecord := RECORD\\n// string60 uid; // Put this below\\n qstring50 norm_ref; // Payload\\n string2 row_typ; // Payload\\n// unsigned8 __internal_fpos__; // Get rid of this\\nEND;\\n\\nIndexKeyRecord := RECORD\\n string60 uid;\\nEND;\\n\\nIndex__2014_01 := INDEX(\\n IndexKeyRecord,\\n\\tIndexPayloadRecord,\\n\\t'~indexing::query::uid::uid__2014-01__mlk',\\n\\tSORTED,\\n\\tDISTRIBUTED\\n);\\nResults__2014_01 := Index__2014_01( uid IN SearchUID );\\n\\n\\nIndex__2014_02 := INDEX(\\n IndexKeyRecord,\\n\\tIndexPayloadRecord,\\n\\t'~indexing::query::uid::uid__2014-02__mlk',\\n\\tSORTED,\\n\\tDISTRIBUTED\\n);\\nResults__2014_02 := Index__2014_02( uid IN SearchUID );\\n\\n\\nIndex__2014_03 := INDEX(\\n IndexKeyRecord,\\n\\tIndexPayloadRecord,\\n\\t'~indexing::query::uid::uid__2014-03__mlk',\\n\\tSORTED,\\n\\tDISTRIBUTED\\n);\\nResults__2014_03 := Index__2014_03( uid IN SearchUID );\\n\\n\\nIndex__2014_04 := INDEX(\\n IndexKeyRecord,\\n\\tIndexPayloadRecord,\\n\\t'~indexing::query::uid::uid__2014-04__mlk',\\n\\tSORTED,\\n\\tDISTRIBUTED\\n);\\nResults__2014_04 := Index__2014_04( uid IN SearchUID );\\n\\n\\nIndex__2014_05 := INDEX(\\n IndexKeyRecord,\\n\\tIndexPayloadRecord,\\n\\t'~indexing::query::uid::uid__2014-05__mlk',\\n\\tSORTED,\\n\\tDISTRIBUTED\\n);\\nResults__2014_05 := Index__2014_05( uid IN SearchUID );\\n\\n\\nIndex__2014_06 := INDEX(\\n IndexKeyRecord,\\n\\tIndexPayloadRecord,\\n\\t'~indexing::query::uid::uid__2014-06__mlk',\\n\\tSORTED,\\n\\tDISTRIBUTED\\n);\\nResults__2014_06 := Index__2014_06( uid IN SearchUID );\\n\\n\\nIndex__2014_07 := INDEX(\\n IndexKeyRecord,\\n\\tIndexPayloadRecord,\\n\\t'~indexing::query::uid::uid__2014-07__mlk',\\n\\tSORTED,\\n\\tDISTRIBUTED\\n);\\nResults__2014_07 := Index__2014_07( uid IN SearchUID );\\n\\n\\nIndex__2014_08 := INDEX(\\n IndexKeyRecord,\\n\\tIndexPayloadRecord,\\n\\t'~indexing::query::uid::uid__2014-08__mlk',\\n\\tSORTED,\\n\\tDISTRIBUTED\\n);\\nResults__2014_08 := Index__2014_08( uid IN SearchUID );\\n\\n\\nIndex__2014_09 := INDEX(\\n IndexKeyRecord,\\n\\tIndexPayloadRecord,\\n\\t'~indexing::query::uid::uid__2014-09__mlk',\\n\\tSORTED,\\n\\tDISTRIBUTED\\n);\\nResults__2014_09 := Index__2014_09( uid IN SearchUID );\\n\\n\\nIndex__2014_10 := INDEX(\\n IndexKeyRecord,\\n\\tIndexPayloadRecord,\\n\\t'~indexing::query::uid::uid__2014-10__mlk',\\n\\tSORTED,\\n\\tDISTRIBUTED\\n);\\nResults__2014_10 := Index__2014_10( uid IN SearchUID );\\n\\n\\nIndex__2014_11 := INDEX(\\n IndexKeyRecord,\\n\\tIndexPayloadRecord,\\n\\t'~indexing::query::uid::uid__2014-11__mlk',\\n\\tSORTED,\\n\\tDISTRIBUTED\\n);\\nResults__2014_11 := Index__2014_11( uid IN SearchUID );\\n\\n\\nIndex__2014_12 := INDEX(\\n IndexKeyRecord,\\n\\tIndexPayloadRecord,\\n\\t'~indexing::query::uid::uid__2014-12__mlk',\\n\\tSORTED,\\n\\tDISTRIBUTED\\n);\\nResults__2014_12 := Index__2014_12( uid IN SearchUID );\\n\\nCompleteResults := MERGE(\\n\\tResults__2014_01, \\n\\tResults__2014_02, \\n\\tResults__2014_03, \\n\\tResults__2014_04, \\n\\tResults__2014_05, \\n\\tResults__2014_06, \\n\\tResults__2014_07, \\n\\tResults__2014_08, \\n\\tResults__2014_09, \\n\\tResults__2014_10, \\n\\tResults__2014_11, \\n\\tResults__2014_12, \\n\\tSORTED(uid), \\n\\tLOCAL\\n);\\n\\nCompleteResultsSorted := SORT(CompleteResults, norm_ref);\\n\\nOUTPUT(CompleteResultsSorted);\\n\\n
\", \"post_time\": \"2015-12-07 20:20:59\" },\n\t{ \"post_id\": 8842, \"topic_id\": 2046, \"forum_id\": 8, \"post_subject\": \"Re: Dynamic ESDL Query\", \"username\": \"anthony.fishbeck\", \"post_text\": \"You are on the right track, in that the trick is to transform whatever internal formats you have to match the layout generated from the ESDL.\\n\\nMaybe someone with experience writing ESDL queries can chime in as to what is the most efficient way of doing those transformations.\", \"post_time\": \"2015-12-14 18:26:26\" },\n\t{ \"post_id\": 8836, \"topic_id\": 2046, \"forum_id\": 8, \"post_subject\": \"Re: Dynamic ESDL Query\", \"username\": \"MauricioNO\", \"post_text\": \"I don't know if there is any other better way to solve this(I hope there is)\\nBut the way I managed to do it is performing a transform in all the record set fetched from the library operation and making sure it is of the type t_ComplaintSearchRecord.\\n\\nI added this to the query file:\\n\\niesp.wsNAME.t_ComplaintSearch toESDL(lib3.matches L) := TRANSFORM\\n SELF := L;\\nEND;\\n\\nds_out := PROJECT(lib3.matches, toESDL(LEFT));\\n
\\n\\nBut this only works because the field names of the data itself and the ones I defined on the layout are the same, otherwise it would have to have a line like the following for each field.\\nSELF.FieldNameAsDefinedOnWsNAME.ecl := L.FieldNameOnTheData
\\nAnd on cases where the Response record set has other datasets inside it I would need to use nested transforms to make sure the other datasets are also as defined on wsNAME.ecl\", \"post_time\": \"2015-12-13 20:56:28\" },\n\t{ \"post_id\": 8782, \"topic_id\": 2046, \"forum_id\": 8, \"post_subject\": \"Dynamic ESDL Query\", \"username\": \"MauricioNO\", \"post_text\": \"Hello,\\n\\nI have created a new web service called wsNAME and I added two methods to it, AddThis(which was done by following this guide: https://hpccsystems.com/download/docume ... namic-esdl )\\nand a ComplaintSearch method, which should retrieve data from a logical file on my HPCC platform.\\n\\nHere is the ecm definition for my service\\n\\nESPservice wsNAME\\n{\\nESPmethod AddThis(AddThisRequest, AddThisResponse);\\nESPmethod ComplaintSearch(ComplaintSearchRequest, ComplaintSearchResponse);\\n};\\n\\n//AddThis method\\nESPrequest AddThisRequest\\n{\\nint FirstNumber;\\nint SecondNumber;\\n};\\n\\nESPresponse AddThisResponse\\n{\\nint Answer;\\n};\\n\\n//ComplaintSearch Method\\nESPrequest ComplaintSearchRequest\\n{\\n string zipCode;\\n string state;\\n};\\n\\nESPstruct ComplaintSearchRecord\\n{\\n string complaintid;\\n string product;\\n string subproduct;\\n string issue;\\n string subissue;\\n string state; \\n string zipcode; \\n string submittedvia;\\n string datereceived;\\n string datesent;\\n string company;\\n string response;\\n string timely;\\n string disputed;\\n};\\n\\nESPresponse ComplaintSearchResponse\\n{\\n ESParray<ESPstruct ComplaintSearchRecord, Complaint> Records;\\n};\\n
\\n\\nThis is the ECL generated by it:\\n\\n/*** Not to be hand edited (changes will be lost on re-generation) ***/\\n/*** ECL Interface generated by esdl2ecl version 1.0 from wsNAME.xml. ***/\\n/*===================================================*/\\n\\nexport wsNAME := MODULE\\n\\nexport t_ComplaintSearchRecord := record\\n string complaintid {xpath('complaintid')};\\n string product {xpath('product')};\\n string subproduct {xpath('subproduct')};\\n string issue {xpath('issue')};\\n string subissue {xpath('subissue')};\\n string state {xpath('state')};\\n string zipcode {xpath('zipcode')};\\n string submittedvia {xpath('submittedvia')};\\n string datereceived {xpath('datereceived')};\\n string datesent {xpath('datesent')};\\n string company {xpath('company')};\\n string response {xpath('response')};\\n string timely {xpath('timely')};\\n string disputed {xpath('disputed')};\\nend;\\n\\nexport t_AddThisRequest := record\\n integer FirstNumber {xpath('FirstNumber')};\\n integer SecondNumber {xpath('SecondNumber')};\\nend;\\n\\nexport t_ComplaintSearchRequest := record\\n string zipCode {xpath('zipCode')};\\n string state {xpath('state')};\\nend;\\n\\nexport t_AddThisResponse := record\\n integer Answer {xpath('Answer')};\\nend;\\n\\nexport t_ComplaintSearchResponse := record\\n dataset(t_ComplaintSearchRecord) Records {xpath('Records/Complaint'), MAXCOUNT(1)};\\nend;\\n\\n\\nend;\\n\\n/*** Not to be hand edited (changes will be lost on re-generation) ***/\\n/*** ECL Interface generated by esdl2ecl version 1.0 from wsNAME.xml. ***/\\n/*===================================================*/\\n\\n
\\n\\nAnd here is where the problem lies, on the query to retrieve the data.\\nIt takes to inputs by the user, zipCode and State, and shows every complaint entry that matches either one of the inputs.\\n\\n//INTERFACE definition\\nIMPORT ConsumerComplaints as CC;\\nIMPORT iesp;\\n\\nrec_in := iesp.wsNAME.t_ComplaintSearchRequest;\\nFirst_row := ROW([], rec_in) : STORED('ComplaintSearchRequest', FEW);\\n\\n\\nCSVRecord := CC.LayoutComplaints;\\nIFilterArgs := INTERFACE //defines passed parameters\\nEXPORT DATASET(CSVRecord) ds;\\nEXPORT STRING searchZip;\\nEXPORT STRING searchState;\\nEND;\\nFilterLibIface2(IFilterArgs args) := INTERFACE\\nEXPORT DATASET(CSVRecord) matches;\\n// EXPORT DATASET(CSVRecord) others;\\nEND;\\n\\n//MODULE Definition\\nFilterDsLib2(IFilterArgs args) := MODULE,LIBRARY(FilterLibIface2)\\nEXPORT matches := args.ds(zipcode = args.searchzip OR state = args.searchState);\\n//EXPORT others := args.ds(zipcode != args.search); //No need to fetch data that doesnt match input criteria in this case\\nEND;\\n\\n\\n//Using the library\\nComplaints := DATASET('~MN::ProcessedComplaints', CSVRecord, Thor);\\n\\nSearchArgs := MODULE(IFilterArgs)\\nEXPORT DATASET(CSVRecord) ds := Complaints;\\nEXPORT STRING searchzip := First_row.zipcode;\\nEXPORT STRING searchstate := First_row.state;\\nEND;\\nlib3 := LIBRARY(INTERNAL(FilterDsLib2),FilterLibIface2(SearchArgs));\\n//ds_out := DATASET(lib3.matches, iesp.wsNAME.t_ComplaintSearchResponse);\\n//ds_out := ROW({lib3.matches}, iesp.wsNAME.t_ComplaintSearchResponse);\\n\\nOUTPUT(lib3.matches, NAMED('ComplaintSearchResponse'));\\n
\\n\\nAs the output is just the regular lib3.matches it looks and works fine if I go straight to :8002 and test it. I can see all of the complaints on a State and the ones on a specific zipcode.\\n\\nBut that's not how the output should be in order for the query to work with Dynamic ESDL, so I tried using\\nds_out := DATASET(lib3.matches, iesp.wsNAME.t_ComplaintSearchResponse);
\\n\\nBut got the "Error: syntax error near "t_ComplaintSearchResponse" : expected datarow, identifier, macro-name (36, 48), 3002".\\n\\nAlso tried using \\nds_out := ROW({lib3.matches}, iesp.wsNAME.t_ComplaintSearchResponse);
\\nand I get the error "Initializer for field records has the wrong type".\\n\\nSeems like something is wrong between the data type of lib3.matches and what t_ComplaintSearchResponse expects.\\n\\nThis is my first actual query using Dynamic ESDL so any tips and directions are appreciated.\\nThank you!\", \"post_time\": \"2015-12-09 19:22:40\" },\n\t{ \"post_id\": 8862, \"topic_id\": 2060, \"forum_id\": 8, \"post_subject\": \"Re: Resampling?\", \"username\": \"rtaylor\", \"post_text\": \"Janet,As far as the compressed CSV file, I was told to compress everything and I took it rather literally. I'm able to read the desprayed file just fine.
OK, in re-looking at the docs, I see that COMPRESSED is not listed as an option for OUTPUT,CSV so I expect the compiler is simply ignoring it.\\n\\nGlad it's all working OK for you now,\\n\\nRichard\", \"post_time\": \"2015-12-15 21:21:24\" },\n\t{ \"post_id\": 8860, \"topic_id\": 2060, \"forum_id\": 8, \"post_subject\": \"Re: Resampling?\", \"username\": \"janet.anderson\", \"post_text\": \"Your code gave me what I wanted, i.e. both files contain the same data records. Thank you. \\n\\nAs far as the compressed CSV file, I was told to compress everything and I took it rather literally. I'm able to read the desprayed file just fine.\", \"post_time\": \"2015-12-15 21:14:47\" },\n\t{ \"post_id\": 8858, \"topic_id\": 2060, \"forum_id\": 8, \"post_subject\": \"Re: Resampling?\", \"username\": \"rtaylor\", \"post_text\": \"Janet,Is the SEQUENTIAL action causing some resampling between the creation of the thor file and the creation of the CSV file?
In a word -- Yes. The SEQUENTIAL action says "do the first action in my list and then do the next one ..." which means, since your Smpl definition is used in both your "out" OUTPUT and the separate flat file OUTPUT, the MACRO is generating twice.\\n\\nTry it this way and see if you get the same result:IMPORT ut, lib_date, zz_MktgAnalytics;\\n\\nIH := (zz_MktgAnalytics.Files.IH_Core_File)(did <> 0 AND ind = 'CORE'); \\n\\nzz_MktgAnalytics.Create_Sample(Smpl, IH, 0.000001);\\n\\nFinalSample := choosen(Smpl, 100) : INDEPENDENT;\\n \\nout1 := output(FinalSample,, '~ja::tmp::FinalSampleToDE_test_smpl', thor, compressed, overwrite);\\n\\nout2 := output(FinalSample,, '~ja::FinalSampleToDE_test_smpl.csv',\\n csv(heading(SINGLE), separator('|'), quote('"')), compressed, overwrite);\\n \\ndespray := FileServices.DeSpray('~ja::FinalSampleToDE_test_smpl.csv', '10.195.97.26',\\n '/ap/p/mk01/projects/andeja01/' + 'SampleToDateEnhancement_test.csv', -1,\\n , , TRUE); \\nsequential(PARALLEL(out1,out2), despray);
The INDEPENDENT workflow service will ensure FinalSample runs only once.\\n\\nBTW, are you sure you want to compress that CSV file and then despray it? The compressed file will look like binary data, not the standard ASCII text that I would expect a CSV file to contain, when you open the desprayed file.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-12-15 20:27:10\" },\n\t{ \"post_id\": 8852, \"topic_id\": 2060, \"forum_id\": 8, \"post_subject\": \"Re: Resampling?\", \"username\": \"janet.anderson\", \"post_text\": \"Hi Richard,\\n\\nThe code in the Programmer's Guide is a little different, but relatively similar to what I am using. I think the crux of the question is after the sampling has been done and assigned to the attribute "Smpl", everything subsequent is built in the same workunit from this attribute. I expect the thor file and CSV file that are being output to contain the same data records, but they do not. Is the SEQUENTIAL action causing some resampling between the creation of the thor file and the creation of the CSV file?\\n\\nThanks.\", \"post_time\": \"2015-12-15 17:58:13\" },\n\t{ \"post_id\": 8850, \"topic_id\": 2060, \"forum_id\": 8, \"post_subject\": \"Re: Resampling?\", \"username\": \"rtaylor\", \"post_text\": \"Janet,\\n\\nHave you looked at the "Simple Random Sample" article in the Programmer's Guide? That article gives you code to produce statistically accurate "random" samples from any dataset.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-12-15 16:21:37\" },\n\t{ \"post_id\": 8848, \"topic_id\": 2060, \"forum_id\": 8, \"post_subject\": \"Resampling?\", \"username\": \"janet.anderson\", \"post_text\": \"I am taking a sample and then outputting the result first to a thor file for future analysis and separately to temporary CSV file to be desprayed for delivery to another group. I don't understand why the files don't match. Both output files are built from the same Final Sample attribute, so how is it that they contain different records?\\n\\nA simplified version of my code that creates and outputs the sample is:\\nIMPORT ut, lib_date, zz_MktgAnalytics;\\n\\nIH\\t:=\\t(zz_MktgAnalytics.Files.IH_Core_File)(did <> 0 AND ind = 'CORE'); \\n\\nzz_MktgAnalytics.Create_Sample(Smpl, IH, 0.000001);\\n\\nFinalSample := choosen(Smpl, 100);\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\noutput(FinalSample,, '~ja::tmp::FinalSampleToDE_test_smpl', thor, compressed, overwrite);\\n\\nout := output(FinalSample,, '~ja::FinalSampleToDE_test_smpl.csv',\\n csv(heading(SINGLE), separator('|'), quote('"')), compressed, overwrite);\\n\\t\\t\\t \\ndespray := FileServices.DeSpray('~ja::FinalSampleToDE_test_smpl.csv', '10.195.97.26',\\n '/ap/p/mk01/projects/andeja01/' + 'SampleToDateEnhancement_test.csv', -1,\\n , , TRUE);\\t\\nsequential(out, despray);
\\n\\nMy sampling code is:\\nEXPORT Create_Sample(OutFile, InFile, Pct) := MACRO\\n\\t\\t\\t#uniquename(singleton)\\n\\t\\t\\t%singleton% := DATASET([{0}], {real u});\\n\\t\\t\\tOutFile := join(InFile, %singleton%,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t(real)((random() % 1000000)/1000000) >= 1-Pct, \\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tTRANSFORM(recordof(InFile),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tself := left)\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t,all);\\nENDMACRO;\\t\\t
\", \"post_time\": \"2015-12-15 15:47:07\" },\n\t{ \"post_id\": 9014, \"topic_id\": 2068, \"forum_id\": 8, \"post_subject\": \"Re: JOINED Functionality in SORT function\", \"username\": \"bforeman\", \"post_text\": \"Quick follow up from the development team:\\n\\nIn general, it is probably better to use distribute to ensure both datasets are distributed the same and then locally sort. If you really want to do something like you are then you can build an index, and use a keyed distribute.\\n\\nAt some point in the future we plan to reimplement this area and make it much more flexible.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2016-01-06 14:30:13\" },\n\t{ \"post_id\": 8912, \"topic_id\": 2068, \"forum_id\": 8, \"post_subject\": \"Re: JOINED Functionality in SORT function\", \"username\": \"vardha24\", \"post_text\": \"Thanks Bob
\", \"post_time\": \"2015-12-18 05:35:05\" },\n\t{ \"post_id\": 8900, \"topic_id\": 2068, \"forum_id\": 8, \"post_subject\": \"Re: JOINED Functionality in SORT function\", \"username\": \"bforeman\", \"post_text\": \"So,\\nI searched the repositories for any example of the use of JOINED, and I could not find any.\\n\\nWith that in mind I have a message out to the HPCC development team, but in the meantime the following modification of your code will achieve the same desired result:\\n\\n
joinedset := SORT(Persons_ds,FIRSTNAME);\\njoinedset;\\nmysort := SORT(DS2,FIRSTNAME);\\n\\nout := JOIN(joinedset,mysort,LEFT.FIRSTNAME = RIGHT.FirstName,LOOKUP);\\nout;
\\n\\nThe LOOKUP flag in the JOIN loads the entire right recordset on to every node in your cluster, therefore achieving an implicit local operation.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-12-17 18:51:18\" },\n\t{ \"post_id\": 8883, \"topic_id\": 2068, \"forum_id\": 8, \"post_subject\": \"Re: JOINED Functionality in SORT function\", \"username\": \"vardha24\", \"post_text\": \"Thanks Bob for the prompt help !\\n\\nI tried the suggested solution and it still errors out while executing.\\nHere is what I tried :\\n\\njoinedset := SORT(Persons_ds,Persons_ds.FIRSTNAME);\\njoinedset;\\nmysort := SORT(DS2,DS2.FIRSTNAME,JOINED(joinedset));\\n//mysort;\\n\\nJOIN(joinedset,mysort,lefT.FIRSTNAME = RIGHT.FIRstname);\\n\\nThis still ends up in an execution error "Warning: (52,1): error C4168: SORT supplied to COSORT needs to be executed at the same time (0, 0), 0, "\\n\\nCould You help\", \"post_time\": \"2015-12-17 13:46:31\" },\n\t{ \"post_id\": 8882, \"topic_id\": 2068, \"forum_id\": 8, \"post_subject\": \"Re: JOINED Functionality in SORT function\", \"username\": \"bforeman\", \"post_text\": \"Try this instead:\\n\\njoinedset := SORT(Persons_ds,Persons_ds.FIRSTNAME);\\njoinedset;\\nmysort := SORT(DS2,DS2.FIRSTNAME,JOINED(joinedset));\\n\\nout := JOIN(joinedset,mysort,LEFT.FIRSTNAME = RIGHT.FIRSTNAME);
\\n\\nThis is what you want, right?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-12-17 13:19:01\" },\n\t{ \"post_id\": 8878, \"topic_id\": 2068, \"forum_id\": 8, \"post_subject\": \"JOINED Functionality in SORT function\", \"username\": \"vardha24\", \"post_text\": \"Hi All\\n\\nTrying to use the JOINED functionality in SORT Function \\n\\nRec1 := RECORD\\n\\tSTRING15 FirstName;\\n\\tSTRING Message := 'HAPPY BIRTHDAY';\\nEND;\\n\\nDS2 := DATASET([{'A','Happy Christmas'},{'A','Happy New Year'},{'A'}],REc1);\\n\\nPersons_Layout := Record\\n\\t\\t\\tINTEGER4 \\tRecid;\\n\\t\\t\\tString15 \\tFirstName; \\n\\t\\t\\tString25\\tLastName;\\n\\t\\t\\tString15 \\tMiddleName;\\n\\t\\t\\tString2\\t\\tNameSuffix;\\t\\n\\t\\t\\tString8\\t\\tFileDate;\\n\\t\\t\\tUnsigned2\\tBureauCode;\\n\\t\\t\\tString1\\t\\tMaritalStatus;\\n\\t\\t\\tString1\\t\\tGender;\\t\\n\\t\\t\\tUNSIGNED1\\tDependentCount;\\n\\t\\t\\tString8\\t\\tBirthDate;\\n\\t\\t\\tString42\\tStreetAddress;\\t\\n\\t\\t\\tString20\\tCity;\\n\\t\\t\\tString2\\t\\tState;\\n\\t\\t\\tString5\\t\\tZipcode;\\t\\nEnd;\\n\\nPersons_ds := DATASET('~online::persons::onlinelessonpersons',Persons_Layout,thor);\\n\\njoinedset := SORT(Persons_ds,Persons_ds.FIRSTNAME);\\njoinedset;\\nSORT(DS2,DS2.FIRSTNAME,JOINED(joinedset));\\n\\nJOIN(joinedset,DS2,lefT.FIRSTNAME = RIGHT.FIRstname);\\n\\n\\nAs seen above trying to sort Persons_ds dataset by firstname and trying to use the same radix point for sorting the ds2 dataset(a small dataset which happens to be the right dataset in the JOIN). \\n\\nGives a run time error : "Warning: (52,1): error C4168: SORT supplied to COSORT needs to be executed at the same time (0, 0), 0, "\\n\\nCan someone suggest what is going wrong here .. \\n\\nThanks in Advance!\\n\\nRegards\\nVarad\", \"post_time\": \"2015-12-17 06:25:46\" },\n\t{ \"post_id\": 8906, \"topic_id\": 2070, \"forum_id\": 8, \"post_subject\": \"Re: When does data "un/re-distribute"\", \"username\": \"janet.anderson\", \"post_text\": \"Thanks, Bob. I will forward the info to my team.\", \"post_time\": \"2015-12-17 20:48:27\" },\n\t{ \"post_id\": 8904, \"topic_id\": 2070, \"forum_id\": 8, \"post_subject\": \"Re: When does data "un/re-distribute"\", \"username\": \"bforeman\", \"post_text\": \"Hi Janet,\\n\\nLet me answer your last question first. In the Standard Library we have the NODE function (this was actually presented in class). By adding a field to your recordset you can monitor which node the record exists on.\\n\\nThat said, the concept of distributing data of course is to have each node work its equal share to the job at hand. It is the skew that appears on the graph that indicates that a distribute (or re-distribute) may be necessary. Skew tells you that one node may be working a given percentage over its normal workload, and also identifies nodes that are underachievers.\\n\\nThe compiler and the cluster binaries do a pretty good job of automating and optimizing your big data jobs. The old adage that "if it's not broke, don't fix it" applies here. It is a good and best practice to monitor your graphs to look for skew and distribute when necessary, but try to avoid overkill and let the compiler do the work for you.\\n\\nMore specifically to answer your questions:\\n\\nIt is possibly that the projects are un-distributing/re-distributing the data?
\\n\\nNo, PROJECT and ITERATE do not re-distribute records. SORT and JOIN do re-distribute records.\\n\\nShould I add explicit distributes before each local operation, even if the data was distributed by the same fields above?
\\n\\nOnly if you want to re-distribute based on different criteria.\\n\\nWill this cause a hit to performance, or is it pretty trivial to distribute data that is already distributed by the same hash value?\\n
\\nThe compiler may be smart enough to optimize them out, but why chance it.\\n\\n\\n"Stay Calm and Love ECL" \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-12-17 19:52:57\" },\n\t{ \"post_id\": 8892, \"topic_id\": 2070, \"forum_id\": 8, \"post_subject\": \"When does data "un/re-distribute"\", \"username\": \"janet.anderson\", \"post_text\": \"I am reviewing some code. There is a distribute, a project, a local iterate on the distributed fields, another project, another local iterate on the distributed fields, etc. It is possibly that the projects are un-distributing/re-distributing the data? Should I add explicit distributes before each local operation, even if the data was distributed by the same fields above? Will this cause a hit to performance, or is it pretty trivial to distribute data that is already distributed by the same hash value?\\n\\nIf I wanted to take a small example dataset, and trace a record through the process, is there a function that tells me which node a record resides on so that I could better understand when it moves nodes?\", \"post_time\": \"2015-12-17 16:24:40\" },\n\t{ \"post_id\": 8920, \"topic_id\": 2072, \"forum_id\": 8, \"post_subject\": \"Re: #OPTION location within code\", \"username\": \"bforeman\", \"post_text\": \"Hi Dan,\\n\\nI have a query out to the development team, but my educated guess is Yes, your #OPTION can be embedded in your function, and if there was a conflict in #OPTION settings, the last one encountered in the parsing flow would override the first. But let me get confirmation on this and I will repost if the information above is different.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-12-18 15:44:55\" },\n\t{ \"post_id\": 8914, \"topic_id\": 2072, \"forum_id\": 8, \"post_subject\": \"#OPTION location within code\", \"username\": \"DSC\", \"post_text\": \"I have several functions that manipulate a file that is specific to a Thor slave. That file always resides on the node on which the slave executes.\\n\\nOne of the functions appears to be 'simple' to the ECL compiler and, as a consequence, the compiler attempts to retarget the job from Thor to hthor. This causes problems because the hthor runs on only one node, so those files are not correctly located by the function. To prevent the compiler from retargeting the job you can add this to the code:\\n\\n
#OPTION('pickBestEngine', FALSE);
\\nTypically I've placed options like this at the top level of my code. My question is: Can this option be embedded within those functions where it is needed?\\n\\nRelated, and just for my own curiosity, what would happen if the compiler runs into multiple, conflicting #OPTION directives?\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2015-12-18 11:51:59\" },\n\t{ \"post_id\": 8932, \"topic_id\": 2074, \"forum_id\": 8, \"post_subject\": \"Re: Question regarding xml parsing\", \"username\": \"elango_v\", \"post_text\": \"Thank you \", \"post_time\": \"2015-12-18 17:13:56\" },\n\t{ \"post_id\": 8926, \"topic_id\": 2074, \"forum_id\": 8, \"post_subject\": \"Re: Question regarding xml parsing\", \"username\": \"bforeman\", \"post_text\": \"Here is one way to get there:\\n\\n
in_file:=dataset([{'<library><libraryname>govt library</libraryname><book isbn="2707889"><author>john</author><title>The truth</title></book>'+\\n'<book isbn="5778"><author>vikram</author><title>My country</title></book>'+\\n'</library>'}],{string txt});\\n\\nextractedValueRec := RECORD\\n INTEGER isbn;\\n STRING author;\\n\\tSTRING title;\\nEND;\\n\\nextractedRec := RECORD\\n STRING libname;\\n DATASET(extractedValueRec) books;\\nEND;\\n\\nextractedRec t1 := TRANSFORM\\n SELF.libname := XMLTEXT('libraryname');\\n SELF.books := XMLPROJECT('book',\\n TRANSFORM(extractedValueRec,\\n SELF.isbn := (INTEGER)XMLTEXT('@isbn'),\\n SELF.author :=XMLTEXT('author'),\\n SELF.title :=XMLTEXT('title')));\\n END;\\np := PARSE(in_file, txt, t1, XML('library'));\\nOUTPUT(p);\\n
\\n\\nOf course, you can NORMALIZE the result after the PARSE if you need it.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-12-18 16:45:12\" },\n\t{ \"post_id\": 8916, \"topic_id\": 2074, \"forum_id\": 8, \"post_subject\": \"Question regarding xml parsing\", \"username\": \"elango_v\", \"post_text\": \"Hi,\\n\\nBelow is the code snippet that I am using \\n\\nin_file:=dataset([{'<library><libraryname>govt library</libraryname><book isbn="2707889"><author>john</author><title>The truth</title></book>'+\\n '<book isbn="5778"><author>vikram</author><title>My country</title></book>'+\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t'</library>'}],{string txt});\\n\\t\\t\\t\\t\\t\\t\\nbook:=record\\nstring libname:='';\\ninteger id;\\nstring author;\\nstring title;\\nend;\\n\\nbook trans:=transform\\n//self.libname:=xmltext('libraryname');\\nself.id:=(integer)XMLTEXT('@isbn');\\nself.author:=XMLTEXT('author');\\nself.title:=XMLTEXT('title');\\nself:=[];\\nend;\\nout:=parse(in_file,txt,trans,XML('library/book'));\\n\\nbelow is the output\\nid author Title\\n-- ------- ------\\n2707889 John The truth\\n5778 Vikram My country\\n\\nI want to add the library name to each book details. How can I do that?\", \"post_time\": \"2015-12-18 13:38:44\" },\n\t{ \"post_id\": 8952, \"topic_id\": 2078, \"forum_id\": 8, \"post_subject\": \"Re: DEDUP -- more than 1 RIGHT KEEPER record\", \"username\": \"rtaylor\", \"post_text\": \"Varda,\\n\\nYour workaround is to SORT in descending order so that you can do a DEDUP,LEFT instead of DEDUP,RIGHT -- as in this example:\\nPersons_Layout1 := Record\\n\\tINTEGER4 Recid;\\n\\tString15 FirstName; \\n\\tString25\\tLastName;\\nEnd;\\n\\nPersons_ds1 := DATASET([\\n\\t{1,'Fred','Aa'},\\n\\t{2,'Fred','Jones'},\\n\\t{3,'Fred','Aa'},\\n\\t{4,'Fred','Aa'},\\n\\t{5,'Fred','Aa'},\\n\\t{6,'Freddie','Aa'}\\n],Persons_Layout1);\\nsrt1:=Sort(Persons_ds1(lastname = 'Aa'),firstname );\\nsrt2:=Sort(Persons_ds1(lastname = 'Aa'),-firstname );\\nsrt1;\\nsrt2;\\nDEDUP(srt1,firstname,Keep(2),LEFT ); \\nDEDUP(srt2,firstname,Keep(2),LEFT);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-12-21 14:41:37\" },\n\t{ \"post_id\": 8942, \"topic_id\": 2078, \"forum_id\": 8, \"post_subject\": \"Re: DEDUP -- more than 1 RIGHT KEEPER record\", \"username\": \"bforeman\", \"post_text\": \"Hi Varad,\\n\\nPlease open an issue on the Community Tracker. I can confirm your behavior. \\n\\nhttps://track.hpccsystems.com/secure/Dashboard.jspa\\n\\nIf this is expected behavior, it certainly needs to be clarified in the documentation.\\n\\nThank you for your report!\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-12-19 22:38:16\" },\n\t{ \"post_id\": 8940, \"topic_id\": 2078, \"forum_id\": 8, \"post_subject\": \"DEDUP -- more than 1 RIGHT KEEPER record\", \"username\": \"vardha24\", \"post_text\": \"Hi \\n\\nI was trying this code :\\nPersons_Layout1 := Record\\n\\t\\t\\tINTEGER4 \\tRecid;\\n\\t\\t\\tString15 \\tFirstName; \\n\\t\\t\\tString25\\tLastName;\\n\\t\\t\\tString15 \\tMiddleName;\\n\\t\\t\\tString2\\t\\tNameSuffix;\\t\\n\\t\\t\\tString8\\t\\tFileDate;\\n\\t\\t\\tUnsigned2\\tBureauCode;\\n\\t\\t\\tString1\\t\\tMaritalStatus;\\n\\t\\t\\tString1\\t\\tGender;\\t\\n\\t\\t\\tUNSIGNED1\\tDependentCount;\\n\\t\\t\\tString8\\t\\tBirthDate;\\n\\t\\t\\tString42\\tStreetAddress;\\t\\n\\t\\t\\tString20\\tCity;\\n\\t\\t\\tString2\\t\\tState;\\n\\t\\t\\tString5\\t\\tZipcode;\\t\\nEnd;\\n\\nPersons_ds1 \\t:= DATASET('~online::persons::onlinelessonpersons',Persons_Layout1,thor);\\nsrt:=Sort(Persons_ds1(firstname = 'Aa'),firstname );\\nsrt;\\nDEDUP(srt,firstname,Keep(4),LEFT ); // This executes fine\\nDEDUP(srt,firstname,Keep(1),RIGHT); // THIS executes fine\\nDEDUP(srt,firstname,Keep(2),RIGHT); // THIS errors out\\n\\nI tried keeping more than 1 Left records and it worked fine. But when trying to Keep the right survivors , I am not able to specify the number as greater than 1 . \\n\\nIs it that, while Dedup supports keeping more than 1 LEFT record, the same does not work for RIGHT option? Or has the construct to be used in a different way for RIGHT option.\\n\\nCould You suggest a workaround\\n\\nThanks in advance !\\n\\nRegards\\nVarad\", \"post_time\": \"2015-12-19 07:03:00\" },\n\t{ \"post_id\": 8974, \"topic_id\": 2080, \"forum_id\": 8, \"post_subject\": \"Re: How to get current time in ECL\", \"username\": \"rtaylor\", \"post_text\": \"Naveen,IMPORT STD;\\n\\nSTD.Date.CurrentTime();\\nSTD.Date.CurrentTime(TRUE);
\\nThis works for me in 5.4.2-1\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-12-23 14:49:14\" },\n\t{ \"post_id\": 8960, \"topic_id\": 2080, \"forum_id\": 8, \"post_subject\": \"Re: How to get current time in ECL\", \"username\": \"Naveen\", \"post_text\": \"Thanks Richard... \", \"post_time\": \"2015-12-22 09:23:47\" },\n\t{ \"post_id\": 8948, \"topic_id\": 2080, \"forum_id\": 8, \"post_subject\": \"Re: How to get current time in ECL\", \"username\": \"rtaylor\", \"post_text\": \"Naveen,\\n\\nIn my 5.4.2-1 repository I have a Date standard library. In that, I find the STD.Date.CurrentTimestamp() function that returns the current date and time as the number of microseconds since epoch (Jan 1, 1970). Most of the functions in the Date standard Library are not yet documented in the Standard Library Reference, but the file contains standard JavaDoc comments for each function, so that should help.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-12-21 14:27:15\" },\n\t{ \"post_id\": 8944, \"topic_id\": 2080, \"forum_id\": 8, \"post_subject\": \"How to get current time in ECL\", \"username\": \"Naveen\", \"post_text\": \"Dear Developers,\\n\\nLooking for inbuilt function to get current time.\\n\\nIf there is no such function, then suggest the way out.\", \"post_time\": \"2015-12-21 05:58:59\" },\n\t{ \"post_id\": 23563, \"topic_id\": 2082, \"forum_id\": 8, \"post_subject\": \"Re: Is there anything out there for datetime calculations?\", \"username\": \"Allan\", \"post_text\": \"All,\\n\\nSome STD functions are missing documentation, particularly 'STD.Date'.\\nOpened https://track.hpccsystems.com/browse/HPCC-20792 to get addressed.\\n\\nAlso a datetime difference function is being included in the STD.Date repertoire.\\nhttps://track.hpccsystems.com/browse/HPCC-20951\\n\\nYours\\nAllan\", \"post_time\": \"2018-11-14 19:10:52\" },\n\t{ \"post_id\": 8962, \"topic_id\": 2082, \"forum_id\": 8, \"post_subject\": \"Re: Is there anything out there for datetime calculations?\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nDid you miss the CurrentSeconds() function? It returns the current date and time as the number of seconds since epoch (Jan 1, 1970).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-12-22 15:07:47\" },\n\t{ \"post_id\": 8958, \"topic_id\": 2082, \"forum_id\": 8, \"post_subject\": \"Re: Is there anything out there for datetime calculations?\", \"username\": \"Allan\", \"post_text\": \"Nothing there, so knocked up my own, for use by others I reproduce below:\\n
\\nEXPORT STRING10 DateTimeDifference(STRING20 sd1,STRING20 sd2) := FUNCTION\\n\\n // Input STRINGs 'YYYY-MM-DD HH:MM:SS'\\n // Output STRING 'HHHH:MM:SS' Note multiple days are just represetned as HHHH > 24\\n\\n UNSIGNED8 SecondsSince1900(STRING20 dt) := FUNCTION\\n\\n ASSERT(REGEXFIND('[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}',dt),'Passed Invalid datetime: '+dt,FAIL);\\n RETURN lib_date.dayssince1900((INTEGER2)dt[1..4],(INTEGER1)dt[6..7],(INTEGER1)dt[9..10])*86400\\n +((UNSIGNED8)dt[12..13])*3600\\n +((UNSIGNED8)dt[15..16])*60\\n + (UNSIGNED8)dt[18..19];\\n END;\\n \\n diff := ABS(SecondsSince1900(sd2)-SecondsSince1900(sd1));\\n \\n RETURN INTFORMAT(diff DIV 3600,4,1)+':'+INTFORMAT((diff DIV 60) % 60,2,1)+':'+INTFORMAT(diff % 60,2,1);\\n\\nEND;\\n
\", \"post_time\": \"2015-12-22 08:17:35\" },\n\t{ \"post_id\": 8950, \"topic_id\": 2082, \"forum_id\": 8, \"post_subject\": \"Re: Is there anything out there for datetime calculations?\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nTime support has fairly recently been added to the Date standard library, so take a look in the Date.ecl file in your eclibrary and see if any of the new time functions are helpful to you.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-12-21 14:29:26\" },\n\t{ \"post_id\": 8946, \"topic_id\": 2082, \"forum_id\": 8, \"post_subject\": \"Is there anything out there for datetime calculations?\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI have to find the difference between two datetimes (e.g. operands of the form YYYYMMDDHHMMSS)\\n\\nI see a lot of support for date calculations but nothing for datetimes.\\n\\nThis is a very common thing to want to do, I'm sure I've just missed a post somewhere, if someone could just point me to a link I'd be very grateful. (I don't want to re-invent the wheel)\\n\\nYours\\n\\nAllan\", \"post_time\": \"2015-12-21 10:42:06\" },\n\t{ \"post_id\": 8966, \"topic_id\": 2084, \"forum_id\": 8, \"post_subject\": \"Re: reading dynamic xml values\", \"username\": \"rtaylor\", \"post_text\": \"Read them both then post-process to decide which to keep.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-12-22 15:22:57\" },\n\t{ \"post_id\": 8954, \"topic_id\": 2084, \"forum_id\": 8, \"post_subject\": \"reading dynamic xml values\", \"username\": \"elango_v\", \"post_text\": \"Hi Team,\\n\\nI have a xml , in which author xml tag names may changes as below.\\n\\n<library>\\n<book>\\n<author name></author name>\\n</book>\\n<book>\\n<blogger name></blogger name>\\n</book>\\n</library>\\n\\nso for every book if author name is available then I need to read the author name other wise I need to read blogger name. How can I read it?\", \"post_time\": \"2015-12-21 16:07:51\" },\n\t{ \"post_id\": 8976, \"topic_id\": 2088, \"forum_id\": 8, \"post_subject\": \"Re: Reading multiple tag names\", \"username\": \"elango_v\", \"post_text\": \"Thanks Bob for the assistance \", \"post_time\": \"2015-12-23 17:39:44\" },\n\t{ \"post_id\": 8972, \"topic_id\": 2088, \"forum_id\": 8, \"post_subject\": \"Re: Reading multiple tag names\", \"username\": \"bforeman\", \"post_text\": \"Elango,\\n\\nIn your previous post, the solution was to use XMLPROJECT. If you understand the data you are working with, all you need to do is to add another nested XMLPROJECT:\\n\\n
in_file := dataset([{'<library><libraryname>govt library</libraryname>'+\\n'<book isbn="2707889"><author>john</author><title_book>The truth</title_book><cat>a</cat><cat>b</cat><cat>c</cat></book>'+\\n'<book isbn="5778"><author>vikram</author><title_blog>My country</title_blog><cat>a</cat><cat></cat><cat></cat></book>'+\\n'</library>'}],{string txt});\\n\\ncatrec := RECORD\\nSTRING cat;\\nEND;\\n\\n\\nextractedValueRec := RECORD\\n INTEGER isbn;\\n STRING author;\\n STRING title_book;\\n STRING title_blog;\\n DATASET(catrec) cats;\\nEND;\\n\\nextractedRec := RECORD\\n STRING libname;\\n DATASET(extractedValueRec) books;\\nEND;\\n\\nextractedRec t1 := TRANSFORM\\n SELF.libname := XMLTEXT('libraryname');\\n SELF.books := XMLPROJECT('book',\\n TRANSFORM(extractedValueRec,\\n SELF.isbn := (INTEGER)XMLTEXT('@isbn'),\\n SELF.author := XMLTEXT('author'),\\n SELF.title_blog := XMLTEXT('title_blog'),\\n\\t\\t SELF.title_book := XMLTEXT('title_book'),\\n\\t\\t SELF.cats := XMLPROJECT('cat',\\n\\t\\t TRANSFORM(catrec,\\n\\t\\t\\t\\t\\t SELF.cat := XMLTEXT(''))))); \\n END;\\np := PARSE(in_file, txt, t1, XML('library'));\\nOUTPUT(p);
\\n\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-12-23 08:56:17\" },\n\t{ \"post_id\": 8970, \"topic_id\": 2088, \"forum_id\": 8, \"post_subject\": \"Reading multiple tag names\", \"username\": \"elango_v\", \"post_text\": \"Hi Team,\\nsample xml:\\n<library><libraryname>govt library</libraryname>\\n<book isbn="2707889"><author>john</author><title_book>The truth</title_book><cat>a</cat><cat>b</cat><cat>c</cat></book>'+\\n'<book isbn="5778"><author>vikram</author><title_blog>My country</title_blog><cat>a</cat><cat></cat><cat></cat></book>'+\\n'</library>'\\n\\nI need to have 3 columns in my output layout cat_a,cat_b,cat_c.\\nBut the tag name would be always cat and their values are (a/b/c). \\nIf 'a' is the value of the tag cat then I need to store 'a' to the cat_a value. Cat_b,cat_c columns values are populated like this.\\nI handled all other tags and stored all \\n except handling this cat tag. Can you please let me know how can I handle the multiple tags with the same name and store into different column?\\n\\nThanks,\\nElango\", \"post_time\": \"2015-12-22 17:56:52\" },\n\t{ \"post_id\": 9124, \"topic_id\": 2092, \"forum_id\": 8, \"post_subject\": \"Re: Generating Random dates\", \"username\": \"rtaylor\", \"post_text\": \"Ramesh,\\n\\nHere's a slightly more generic way to do it:IMPORT STD;\\n\\nChooseFromAndToDate_FirstClaims(UNSIGNED4 StartDate,UNSIGNED4 DateSpan,\\n UNSIGNED4 DaySpan,UNSIGNED4 NumRecs) := FUNCTION\\n GenDates(UNSIGNED4 Rval) := FUNCTION\\n Dates := MODULE\\n EXPORT From_Date := StartDate + (Rval % DateSpan);\\n EXPORT To_Date := From_date + (Rval % DaySpan) + 1;\\n END;\\n RETURN Dates;\\n END;\\n Rds := DATASET(NumRecs, TRANSFORM({UNSIGNED4 Rval},SELF.Rval := RANDOM()));\\n ds := DATASET(NumRecs, TRANSFORM({UNSIGNED FD, UNSIGNED TD},\\n date := GenDates(Rds[COUNTER].Rval);\\n SELF.FD := date.From_Date,\\n SELF.TD := date.To_Date));\\n RETURN ds;\\nEND;\\n\\nChooseFromAndToDate_FirstClaims(STD.Date.FromJulianYMD(2005, 1, 1),365,90,10);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-01-21 15:53:07\" },\n\t{ \"post_id\": 9118, \"topic_id\": 2092, \"forum_id\": 8, \"post_subject\": \"Re: Generating Random dates\", \"username\": \"rameshpachamuthu\", \"post_text\": \"Thanks Bob, This is really informative.\\n\\nRamesh.\", \"post_time\": \"2016-01-21 05:09:23\" },\n\t{ \"post_id\": 8982, \"topic_id\": 2092, \"forum_id\": 8, \"post_subject\": \"Re: Generating Random dates\", \"username\": \"bforeman\", \"post_text\": \"Hi Ramesh,\\n\\nHere is how I would approach this:\\n\\n IMPORT STD;\\n FromDateChosen := STD.Date.FromJulianYMD(2005, 1, 1) + \\n RANDOM()%365)+1; // Choosing one day in 2005\\n FromAndToDateSpan := 90;\\n FromAndToDateSpanChosen := (RANDOM() % FromAndToDateSpan)+1;\\n\\n ds := DATASET(10, TRANSFORM({UNSIGNED FD, UNSIGNED TD,UNSIGNED Span},\\n SELF.FD := FromDateChosen,\\n SELF.SPAN := FromAndToDateSpanChosen,\\n SELF.TD := 0));\\n //ds;\\n\\t\\t\\n\\t\\tprojout := PROJECT(ds,TRANSFORM(RECORDOF(ds),\\n\\t\\t SELF.TD := LEFT.FD + LEFT.SPAN,\\n\\t\\t SELF := LEFT));\\n projout;
\\n\\nI have to break it down to two steps, but the end result is correct and the timings are still acceptable.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-12-29 21:41:55\" },\n\t{ \"post_id\": 8980, \"topic_id\": 2092, \"forum_id\": 8, \"post_subject\": \"Generating Random dates\", \"username\": \"rameshpachamuthu\", \"post_text\": \"I am trying to generate random from and to dates. \\nIMPORT STD;\\nChooseFromAndToDate_FirstClaims := FUNCTION\\n \\n Allowed_Date_min := STD.Date.FromJulianYMD(2005, 1, 1);\\n Allowed_span := 365;\\n ChoosenVal := (RANDOM()%Allowed_span)+1;\\n \\n FromAndToDateSpan := 90;\\n FromAndToDateSpanChoosen := (RANDOM()%FromAndToDateSpan+1);\\n \\n FromDateChoosen := Allowed_Date_min + ChoosenVal; // Choosing one day in 2005\\n ToDateChoosen := FromDateChoosen + FromAndToDateSpanChoosen; // Choosing one day in next 3 months\\n\\n\\n Dates := MODULE\\n EXPORT From_Date := FromDateChoosen;\\n EXPORT To_Date := ToDateChoosen;\\n END;\\n RETURN Dates;\\nEND;\\n\\nds := DATASET(10, TRANSFORM({UNSIGNED FD\\n , UNSIGNED TD},\\n date := ChooseFromAndToDate_FirstClaims;\\n SELF.FD := date.From_Date,\\n SELF.TD := date.To_Date));\\nds;
\\nResult:\\n\\n
\\n\\nWhat I believe is that, while finding ‘ToDateChoosen’, system once again creates ‘FromDateChoosen’ with new value then find finds ‘ToDateChoosen’. So generated To_date is not same as intended.\\n\\nI want From_Date to be different for each row, hence Global & Independent will not be the solution.\\n\\nKindly help.\\n\\nThanks,\\nRamesh\", \"post_time\": \"2015-12-29 13:27:43\" },\n\t{ \"post_id\": 9162, \"topic_id\": 2104, \"forum_id\": 8, \"post_subject\": \"Re: Use of APPLY\", \"username\": \"pius_francis\", \"post_text\": \"Thanks a lot Richard & Dan for all your suggestions and clarifications throughout this entire thread \", \"post_time\": \"2016-01-28 21:15:49\" },\n\t{ \"post_id\": 9136, \"topic_id\": 2104, \"forum_id\": 8, \"post_subject\": \"Re: Use of APPLY\", \"username\": \"rtaylor\", \"post_text\": \"Pius,\\n\\nOK, since we're using the "brute force" approach, we can extend that to do the MACRO this way:
MAC_GenOutputs(cnt,prefix,ds) := MACRO\\n #DECLARE(ndx);\\n #DECLARE(outputstr);\\n #SET(outputstr,'FilePre := \\\\'' + prefix + '\\\\';\\\\n');\\n #SET (Ndx, 1); //initialize Ndx to 1\\n #LOOP\\n #IF (%Ndx% > cnt) \\n #BREAK // break out of the loop\\n #ELSE //otherwise\\n #APPEND(outputstr,'IF(' + #TEXT(ds) + '[' + \\n %'Ndx'% +'].filename!=\\\\'\\\\',OUTPUT(dataset([{' + \\n #TEXT(ds) + '[' + %'Ndx'% + \\n '].content}],{string txt}),,FilePre + ' + \\n #TEXT(ds) + '[' + %'Ndx'% +'].filename,overwrite));\\\\n')\\n #SET (Ndx, %Ndx% + 1)\\n #END\\n #END\\n // %'outputstr'% //shows the generated code\\n %outputstr% //runs the generated code \\nENDMACRO;
\\nThen call that MACRO by passing it a constant value for the cnt parameter that will always be greater than the actual number of records in the file, like this:// MyDS := dataset([{'John','File1'},{'Peter','File2'},\\n // {'Thomas','File3'},{'Lukas','File4'}],\\n // {string Content,string Filename});\\n\\nprefix :='~RTTEST::pius::applyCheck::';\\n\\n// OUTPUT(MyDS,,prefix + 'BaseFile');\\n\\nMyDS := DATASET(prefix + 'BaseFile',{string Content,string Filename},FLAT);\\nMAC_GenOutputs(6,prefix,MyDS);
You will always get the number of results specified by the cnt constant, but only those with text in the filename field will actually produce a dataset to disk.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-01-25 22:09:37\" },\n\t{ \"post_id\": 9134, \"topic_id\": 2104, \"forum_id\": 8, \"post_subject\": \"Re: Use of APPLY\", \"username\": \"pius_francis\", \"post_text\": \"Hi Richard,\\n sorry for the late reply. I tried with your MACRO suggestion . The issue which i get is that i am not able to pass variable for index inside loop. The errors say Error: Constant expression expected \\n\\nLine of code : #IF (%Ndx% > cnt) Here i have defined 'cnt' as count(ds) . \\n\\nCan you please help me out on this?\", \"post_time\": \"2016-01-25 16:37:42\" },\n\t{ \"post_id\": 9112, \"topic_id\": 2104, \"forum_id\": 8, \"post_subject\": \"Re: Use of APPLY\", \"username\": \"DSC\", \"post_text\": \"HTTPCALL really is targeted to processing a "pure" XML response. If you would like to see that expanded or modified, I would encourage you to file a Jira ticket requesting those changes. The Jira system can be found at https://track.hpccsystems.com.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2016-01-20 17:04:11\" },\n\t{ \"post_id\": 9110, \"topic_id\": 2104, \"forum_id\": 8, \"post_subject\": \"Re: Use of APPLY\", \"username\": \"pius_francis\", \"post_text\": \"Thanks Dan,\\n As we have a native HTTPCALL in ECL itself , we are trying to leverage it. Is it possible that the HTTPCALL be modified to accept the multipart xml as responses. We feel it will be an ideal solution. \\n\\n As of now we have JAVA code which does this HTTPCALL and returns it to ECL. But solving it with use ECL itself will be the best solution.\\n\\nThanks,\\nPius\", \"post_time\": \"2016-01-20 16:59:18\" },\n\t{ \"post_id\": 9108, \"topic_id\": 2104, \"forum_id\": 8, \"post_subject\": \"Re: Use of APPLY\", \"username\": \"DSC\", \"post_text\": \"HTTPCALL() expects well-formed XML as a response. The service you're calling is returning a multipart MIME block with embedded XML. The error you're seeing is the HTTPCALL() trying to parse the first MIME separator and failing.\\n\\nProbably the most straightforward solution would be to write a small script that performs this call for you and preprocesses the result before returning it to ECL. That script could accept all of the arguments you need to supply (authentication, URL, etc.), issue a command to /usr/bin/curl or something similar, extract the XML you need and repackage it for ECL's consumption. In this particular case, you could need to embed the two XML chunks within a single XML tag so the ECL code can ensure that the two chunks are parsed together.\\n\\nAll of this can be done with regex within the script; no actual XML parsing is required. The script can be executed from within ECL with the PIPE() command and the results (which would be simple text) parsed with the XML variant of PARSE(). Note that you'll need to install your script on each Thor node, as it can potentially be executed by every Thor slave.\\n\\nAll of that said, I noticed that one of your header values specifies "application/x-hub-multipart+xml". If the service can reply in a different reply format, such as strict XML, you can use HTTPCALL() as-is and this whole thing becomes much simpler.\\n\\nHope this helps.\\n\\nDan\", \"post_time\": \"2016-01-20 15:42:36\" },\n\t{ \"post_id\": 9104, \"topic_id\": 2104, \"forum_id\": 8, \"post_subject\": \"Re: Use of APPLY\", \"username\": \"pius_francis\", \"post_text\": \"Thanks Dan,the HTTPCALL has to return the attached result.txt file. But i am getting the below mentioned issue.\\n\\n<Error><text>Error - syntax error "Expecting "<"" [line 2, file offset 3] -*ERROR*-yytet00pubSubBoundary00tetyy Content-Type: application/atom+xml; type=feed </text><url>http://comp-i-services.lexisnexis.com:80/shared/pubsubhub/file/4db3424ca16c5fa20565d1b5676b6d0569dc32abda08</url></Error> (in HTTP dataset G1 E2) (0, 0), 2, \\n\\n\\nCode : \\n\\ncontentTypeSubscription := 'application/x-hub-subscription+xml; version=1.0';\\n\\ncontentTypeSubscriber := 'application/x-hub-subscriber+xml; version=1.0';\\n\\ncontentTypeAtomFile := 'application/x-hub-multipart+xml; version=1.0; charset=UTF-8';\\t\\n\\nhubfile := '<?xml version="1.0" encoding="UTF-8" standalone="yes"?><FileData><offset>0</offset><moreDataAvailable>false</moreDataAvailable><size>0</size><subscriptionGUID>3939a310-be3b-4882-81ad-dc1e66e39427</subscriptionGUID></FileData>';\\n\\nOutRec1 := RECORD\\n unicode file;\\nEND;\\n\\nHTTPCALL('http://comp-i-services.lexisnexis.com/shared/pubsubhub/file/4db3424ca16c5fa20565d1b5676b6d0569dc32abda08', 'GET','text/xml', OutRec1 \\n,HTTPHEADER('Authorization',BasicAuth)\\n,HTTPHEADER('X-LN-Request',lnReqHeader)\\n,HTTPHEADER('Content-Type',contentTypeAtomFile)\\n,HTTPHEADER('X-LN-ESBCredential',lnESB)\\n,HTTPHEADER( 'X-LN-HUB-File', hubfile));\\n\\nKindly help me regarding this.\", \"post_time\": \"2016-01-20 04:25:16\" },\n\t{ \"post_id\": 9096, \"topic_id\": 2104, \"forum_id\": 8, \"post_subject\": \"Re: Use of APPLY\", \"username\": \"DSC\", \"post_text\": \"Pius,\\n\\nYour original question involved obtaining results from HTTPCALL() where different URLs were supplied by different rows in a dataset. Below are a couple of alternative methods for obtaining this data, beyond what Richard has already outlined. These examples use a call to freegeoip.net to obtain geolocation information for an IP address. Only a single record is returned for each address, but a multiple-record response is fully supported.\\n\\nThe simpler alternative uses child datasets:\\n\\n// HTTPCALL result record\\nRawRec := RECORD\\n STRING ip {XPATH('/Response/IP')};\\n STRING countryCode {XPATH('/Response/CountryCode')};\\n STRING countryName {XPATH('/Response/CountryName')};\\n STRING regionCode {XPATH('/Response/RegionCode')};\\n STRING regionName {XPATH('/Response/RegionName')};\\n STRING city {XPATH('/Response/City')};\\n STRING zipCode {XPATH('/Response/ZipCode')};\\n STRING timeZone {XPATH('/Response/TimeZone')};\\n STRING latitude {XPATH('/Response/Latitude')};\\n STRING longitude {XPATH('/Response/Longitude')};\\n STRING metroCode {XPATH('/Response/MetroCode')};\\nEND;\\n\\n// Result record\\nDataRec := RECORD\\n STRING url;\\n DATASET(RawRec) theData;\\nEND;\\n\\n// Input record\\nURLRec := RECORD\\n STRING url;\\nEND;\\n\\nsites := DATASET\\n (\\n [\\n 'http://freegeoip.net/xml/4.59.90.221', // google.com\\n 'http://freegeoip.net/xml/17.178.96.59', // apple.com\\n 'http://freegeoip.net/xml/138.12.4.174' // lexisnexis.com\\n ],\\n URLRec\\n );\\n\\n// Make HTTPCALL for each site URL\\nds := PROJECT\\n (\\n sites,\\n TRANSFORM\\n (\\n DataRec,\\n SELF.url := LEFT.url,\\n SELF.theData := HTTPCALL(LEFT.url, 'GET', 'text/xml', RawRec)\\n )\\n );\\n\\nOUTPUT(ds);\\n
\\nThis works but you can run into memory issues if you are not careful. A child dataset within a record resides fully within that node (it doesn't span nodes). If the child dataset is too large, you could run out of memory trying to process it.\\n\\nAn alternate way of doing this is to use LOOP:\\n\\n// HTTPCALL result record\\nRawRec := RECORD\\n STRING ip {XPATH('/Response/IP')};\\n STRING countryCode {XPATH('/Response/CountryCode')};\\n STRING countryName {XPATH('/Response/CountryName')};\\n STRING regionCode {XPATH('/Response/RegionCode')};\\n STRING regionName {XPATH('/Response/RegionName')};\\n STRING city {XPATH('/Response/City')};\\n STRING zipCode {XPATH('/Response/ZipCode')};\\n STRING timeZone {XPATH('/Response/TimeZone')};\\n STRING latitude {XPATH('/Response/Latitude')};\\n STRING longitude {XPATH('/Response/Longitude')};\\n STRING metroCode {XPATH('/Response/MetroCode')};\\nEND;\\n\\n// Result record\\nDataRec := RECORD\\n STRING url;\\n RawRec;\\nEND;\\n\\n// Input record\\nURLRec := RECORD\\n STRING url;\\nEND;\\n\\nsites := DATASET\\n (\\n [\\n 'http://freegeoip.net/xml/4.59.90.221', // google.com\\n 'http://freegeoip.net/xml/17.178.96.59', // apple.com\\n 'http://freegeoip.net/xml/138.12.4.174' // lexisnexis.com\\n ],\\n URLRec\\n );\\n\\n// Append results\\nloopBody(DATASET(DataRec) ds, STRING inURL) := FUNCTION\\n newData := HTTPCALL(inURL, 'GET', 'text/xml', RawRec);\\n newDataWithURL := PROJECT\\n (\\n newData,\\n TRANSFORM\\n (\\n DataRec,\\n SELF.url := inURL,\\n SELF := LEFT\\n )\\n );\\n \\n RETURN ds + newDataWithURL;\\nEND;\\n\\nres := LOOP\\n (\\n DATASET([], DataRec),\\n COUNT(sites),\\n loopBody(ROWS(LEFT), sites[COUNTER].url)\\n );\\n\\nOUTPUT(res);\\n
\\nThis is less intuitive, but the result is a "flat" structure that can be more easily managed.\\n\\nAt any rate, the results of either example give you a single dataset that combines the results of all your HTTPCALLs. You can still write this dataset as a logical file and append it to a superfile, of course, so you can collect information over time. While the result is slightly different than the one-file-per-call you were looking for, a single dataset is probably just as useful, if not more.\\n\\nThe above code works with the current platform under both Roxie and hthor. In the course of creating this code I discovered a bug in the HTTPCALL code running under Thor (the result of the first HTTPCALL would be replicated for all subsequent records, regardless of the URL used). This has been fixed and will be available in an upcoming 5.6.0 release.\\n\\nHope this helps.\\n\\nDan\", \"post_time\": \"2016-01-15 15:12:20\" },\n\t{ \"post_id\": 9082, \"topic_id\": 2104, \"forum_id\": 8, \"post_subject\": \"Re: Use of APPLY\", \"username\": \"rtaylor\", \"post_text\": \"Pius,\\n\\nOK, then we can just do it with a MACRO to generate the "brute force" approach, like this:MAC_GenNotify(cnt,prefix,ds) := MACRO\\n #DECLARE(ndx);\\n #DECLARE(outputstr);\\n #SET(outputstr,'FilePre := \\\\'' + prefix + '\\\\';\\\\n');\\n #SET (Ndx, 1); //initialize Ndx to 1\\n #LOOP\\n #IF (%Ndx% > cnt) \\n #BREAK // break out of the loop\\n #ELSE //otherwise\\n #APPEND(outputstr,'OUTPUT(dataset([{' + \\n #TEXT(ds) + '[' + %'Ndx'% + \\n '].content}],{string txt}),,FilePre + ' + \\n #TEXT(ds) + '[' + %'Ndx'% +'].filename,overwrite);\\\\n')\\n #SET (Ndx, %Ndx% + 1)\\n #END\\n #END\\n // %'outputstr'% //shows the generated code\\n %outputstr% //runs the generated code \\nENDMACRO;\\n\\n\\nMyDS := dataset([{'John','File1'},{'Peter','File2'},\\n {'Thomas','File3'},{'Lukas','File4'}],\\n {string Content,string Filename});\\n\\nprefix :='~RTTEST::pius::applyCheck::';\\nMAC_GenNotify(COUNT(MyDS),prefix,MyDS);
I'd suggest reversing the comments on the final outputstr to look at the generated code once before running the actual test, just to see what's being run. It generates this code:FilePre := '~RTTEST::pius::applyCheck::';\\nOUTPUT(dataset([{MyDS[1].content}],{string txt}),,FilePre + MyDS[1].filename,overwrite);\\nOUTPUT(dataset([{MyDS[2].content}],{string txt}),,FilePre + MyDS[2].filename,overwrite);\\nOUTPUT(dataset([{MyDS[3].content}],{string txt}),,FilePre + MyDS[3].filename,overwrite);\\nOUTPUT(dataset([{MyDS[4].content}],{string txt}),,FilePre + MyDS[4].filename,overwrite);\\n
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-01-13 16:34:15\" },\n\t{ \"post_id\": 9080, \"topic_id\": 2104, \"forum_id\": 8, \"post_subject\": \"Re: Use of APPLY\", \"username\": \"pius_francis\", \"post_text\": \"Hi Richard,\\n Thanks a lot for your response. I tried your suggestion but i got below mentioned error.\\n\\nError:Cannot call function donotify in a non-global context\\n\\n \\nI tried using GLOBAL for the dataset in apply, but still the issue persists.\\n\\nAPPLY(GLOBAL(epochParsedNormalized)......,\\n\\nThanks,\\nPius\", \"post_time\": \"2016-01-12 16:01:24\" },\n\t{ \"post_id\": 9078, \"topic_id\": 2104, \"forum_id\": 8, \"post_subject\": \"Re: Use of APPLY\", \"username\": \"rtaylor\", \"post_text\": \"Pius,\\n\\nI have handled similar issues in the past by using NOTIFY to launch a waiting process to do the work. The specific situation I coded was to automatically spray and process groups of datasets when they appeared in the Landing Zone by reading a "semaphore" file that contained the list of files to spray. When that semaphore file appeared in the LZ, then the code would read each filename in turn and use NOTIFY to cause an already-waiting workunit to do its work. The filename to process was passed through EVENTEXTRA.\\n\\nTherefore, you could write your process code as BWR code to read a single URL and write the new file to disk, with the OUTPUT action having the WHEN workflow service so it doesn't actually do anything but wait for that event to fire before it commences to do the work. That means your "kickoff" code would look something like this:APPLY(ds,\\n NOTIFY('MyEventToLaunch',\\n '<Event>' + \\n '<TheURL>' + TRIM(ds.url) + '</TheURL>' + \\n '<FileName>' + outputfilename + '</FileName>' + \\n '</Event>'));
and your process code would end with an OUTPUT with the WHEN workflow service, something like this:URLtoScrape := EVENTEXTRA('TheURL');\\nfilename := EVENTEXTRA('FileName');\\n\\nOutputDS := ScrapeIt(URLtoScrape); //some processing code here\\n\\nOUTPUT(OutputDS,,filename) : WHEN('MyEventToLaunch');
You simply need to launch the OUTPUT code's workunit before you launch the APPLY job so it is ready and waiting for the event to fire.\\n \\nHTH,\\n\\nRichard\", \"post_time\": \"2016-01-12 14:38:43\" },\n\t{ \"post_id\": 9076, \"topic_id\": 2104, \"forum_id\": 8, \"post_subject\": \"Re: Use of APPLY\", \"username\": \"pius_francis\", \"post_text\": \"Problem Statement :\\n I have an 'url' field in my dataset from which i have to download the data and save to logical file. If i am going to use PROJECT is it possible to solve the above purpose ? Don't we need to assign the result of the function to one of the fields while using PROJECT ?\", \"post_time\": \"2016-01-11 16:03:20\" },\n\t{ \"post_id\": 9074, \"topic_id\": 2104, \"forum_id\": 8, \"post_subject\": \"Re: Use of APPLY\", \"username\": \"rtaylor\", \"post_text\": \"Pius,I have to make iterative calls to a function for each record in a dataset.
Then why not just use PROJECT? What does the function do? If it's writing a new file to disk with one record in each file, as your APPLY example is, then my next question is, why? What problem are we trying to solve that requires a separate file for each record?\\n\\nRichard\", \"post_time\": \"2016-01-08 20:32:12\" },\n\t{ \"post_id\": 9072, \"topic_id\": 2104, \"forum_id\": 8, \"post_subject\": \"Re: Use of APPLY\", \"username\": \"pius_francis\", \"post_text\": \"I have to make iterative calls to a function for each record in a dataset.\", \"post_time\": \"2016-01-08 18:43:17\" },\n\t{ \"post_id\": 9070, \"topic_id\": 2104, \"forum_id\": 8, \"post_subject\": \"Re: Use of APPLY\", \"username\": \"pius_francis\", \"post_text\": \"Thanks Richard , is it any other way to iterate through dataset and make a specific action on it?\", \"post_time\": \"2016-01-08 18:41:15\" },\n\t{ \"post_id\": 9068, \"topic_id\": 2104, \"forum_id\": 8, \"post_subject\": \"Re: Use of APPLY\", \"username\": \"rtaylor\", \"post_text\": \"Pius,Is there any other way to do iterative calls ?
What exactly are you trying to accomplish?\\n\\nRichard\", \"post_time\": \"2016-01-08 18:40:20\" },\n\t{ \"post_id\": 9066, \"topic_id\": 2104, \"forum_id\": 8, \"post_subject\": \"Re: Use of APPLY\", \"username\": \"rtaylor\", \"post_text\": \"Pius,\\n\\nIn my experience, APPLY only works in hthor.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-01-08 18:39:30\" },\n\t{ \"post_id\": 9064, \"topic_id\": 2104, \"forum_id\": 8, \"post_subject\": \"Re: Use of APPLY\", \"username\": \"pius_francis\", \"post_text\": \"Hi Richard ,\\n I tried to execute the above code in THOR cluster , i am getting the below error, i dnt get the error in HTHOR\\n\\nCode : \\n\\n\\tds := dataset([{'John','File1'},{'Peter','File2'},{'Thomas','File3'},{'Lukas','File4'}],{string Content,string Filename});\\n\\n\\tprefix :='~pius::applyCheck1::';\\n\\n\\twriteToFile(string Content) := function\\n\\n\\t\\treturn content;\\n\\n\\tend; \\n\\n\\tAPPLY(ds, OUTPUT(dataset([writeToFile(ds.content)],{string Content}),,\\n prefix + Filename));\\n\\nError Message :\\n\\nError: System error: 0: Graph[1], SLAVE 192.168.56.105:20100: Graph[1], apply[3]: Global child graph? : Global acts = Graph(5): [diskwrite(7)] (0, 0), 0,\", \"post_time\": \"2016-01-08 18:25:15\" },\n\t{ \"post_id\": 9062, \"topic_id\": 2104, \"forum_id\": 8, \"post_subject\": \"Re: Use of APPLY\", \"username\": \"pius_francis\", \"post_text\": \"Thanks Richard, Is there any other way to do iterative calls ?\", \"post_time\": \"2016-01-08 17:54:12\" },\n\t{ \"post_id\": 9060, \"topic_id\": 2104, \"forum_id\": 8, \"post_subject\": \"Re: Use of APPLY\", \"username\": \"rtaylor\", \"post_text\": \"Pius,\\n\\nI have duplicated you r result and reported in in JIRA: https://track.hpccsystems.com/browse/HPCC-14754\\n\\nYou can add yourself as a watcher on that issue.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-01-08 17:51:53\" },\n\t{ \"post_id\": 9058, \"topic_id\": 2104, \"forum_id\": 8, \"post_subject\": \"Re: Use of APPLY\", \"username\": \"pius_francis\", \"post_text\": \"Hi Richard,\\n The below example also gives the result as previous one.\\n\\n\\tds := dataset([{'John','File1'},{'Peter','File2'},{'Thomas','File3'},{'Lukas','File4'}],{string Content,string Filename});\\n\\n\\tprefix :='~pius::applyCheck::Reverse::';\\n\\n\\twriteToFile(string filename) := function\\n\\n\\t\\treturn filename;\\n\\n\\tend; \\n\\n\\tAPPLY(ds, OUTPUT(dataset([ds.content],{string Content}),,\\n prefix + writeToFile(filename)));\", \"post_time\": \"2016-01-08 17:35:07\" },\n\t{ \"post_id\": 9042, \"topic_id\": 2104, \"forum_id\": 8, \"post_subject\": \"Re: Use of APPLY\", \"username\": \"pius_francis\", \"post_text\": \"Hi Richard,\\n I have also tried the suggestion you have provided , but it is not working.I am getting the same results as earlier. The function in apply returns the value of the last call in apply.\\n\\nI have given a simple example below , where you can find the last value in the call is written to all files\\n\\nCode :\\n\\n\\tds := dataset([{'John','File1'},{'Peter','File2'},{'Thomas','File3'},{'Lukas','File4'}],{string Content,string Filename});\\n\\n\\tprefix :='~pius::applyCheck::';\\n\\n\\twriteToFile(string Content) := function\\n\\n\\t\\treturn content;\\n\\n\\tend; \\n\\n\\tAPPLY(ds, OUTPUT(dataset([writeToFile(content)],{string Content}),,\\n prefix + Filename));\\n\\nThanks,\\nPius\", \"post_time\": \"2016-01-08 16:58:03\" },\n\t{ \"post_id\": 9024, \"topic_id\": 2104, \"forum_id\": 8, \"post_subject\": \"Re: Use of APPLY\", \"username\": \"rtaylor\", \"post_text\": \"pius_francis,\\n\\nTry it something like this:atomFileCreator (string link,string subsriptionGUID) := function\\t\\n controlInfoAtomFile := CSSM_Feed.ControlInfo('atom',subsriptionGUID,BasicAuth);\\t\\n headerNamesAtomFile := controlInfoAtomFile.headerNames;\\t\\n headerValuesAtomFile := controlInfoAtomFile.headerValues;\\n AtomFile := dataset([{HTTPMask(headerNamesAtomFile,\\n headerValuesAtomFile,\\n link\\t)}],\\n {string document});\\n return AtomFile;\\nend;\\t\\nAPPLY(epochParsedNormalized,\\n OUTPUT(atomFileCreator(link,subsriptionGUID),,\\n prefix + subsriptionGUID+'_'+epochstart+'_'+epochend+'_',compressed));\\n
APPLY itself is an action, so you don't need SEQUENTIAL surrounding it. And, if you make your FUNCTION just RETURN the dataset, then the action for APPLY becomes the OUTPUT that writes each logical file.\\n\\nI obviously can't test this, but it's something to try, anyway.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-01-07 22:29:39\" },\n\t{ \"post_id\": 9022, \"topic_id\": 2104, \"forum_id\": 8, \"post_subject\": \"Re: Use of APPLY\", \"username\": \"pius_francis\", \"post_text\": \"Code:\\n\\n atomFileCreator (string link,string subsriptionGUID,string epochend,string epochstart) := function\\t \\n\\t\\t\\t\\n\\t\\t\\t\\n controlInfoAtomFile := CSSM_Feed.ControlInfo('atom',subsriptionGUID,BasicAuth);\\t\\t\\t\\t\\t\\n headerNamesAtomFile := controlInfoAtomFile.headerNames;\\t\\n headerValuesAtomFile := controlInfoAtomFile.headerValues;\\n\\nsubfileName := prefix + subsriptionGUID+'_'+epochstart+'_'+epochend+'_';\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t \\nAtomFile := \\tdataset([{HTTPMask(headerNamesAtomFile,headerValuesAtomFile,link\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t)}],{string document});\\n\\t\\t \\n\\t\\t\\treturn output(AtomFile,,subfileName,compressed);\\n\\t\\t\\n\\tend;\\t\\n\\t\\n\\tsequential(apply(epochParsedNormalized,evaluate(atomFileCreator(link,subsriptionGUID,epochend,epochstart))))\\n\\n\\nExplanation :\\n\\n For each call in apply, i am creating a logical file to store the output of individual HTTPCALLs . The logical files are getting created for each call , but the content of the file is all same. The content of the last call in apply is stored in all the files\", \"post_time\": \"2016-01-07 21:17:54\" },\n\t{ \"post_id\": 9018, \"topic_id\": 2104, \"forum_id\": 8, \"post_subject\": \"Re: Use of APPLY\", \"username\": \"rtaylor\", \"post_text\": \"pius_francis,\\n\\nI'm not sure I understand the problem. Your code was not included with the post.\\n\\nWhat is it not doing (or doing and shouldn't be)?\\n\\nRichard\", \"post_time\": \"2016-01-07 20:46:33\" },\n\t{ \"post_id\": 9016, \"topic_id\": 2104, \"forum_id\": 8, \"post_subject\": \"Use of APPLY\", \"username\": \"pius_francis\", \"post_text\": \"I am using APPLY() to call an HTTPCALL for each link in DS and right result of each call in logical file. \\n\\nLogical files are created properly but the dataset stored in file is always the result of last HTTPCALL. Kindly help me regarding this.\\n \\nCode:\", \"post_time\": \"2016-01-07 20:24:54\" },\n\t{ \"post_id\": 9086, \"topic_id\": 2120, \"forum_id\": 8, \"post_subject\": \"Re: SOAP Response Parsing\", \"username\": \"rtaylor\", \"post_text\": \"Brandon,\\n\\nI took your XML and wrote it to a file, then ran this code:HeaderRecsLayout := RECORD\\n\\tSTRING s_did {XPATH('s_did')};\\n\\tSTRING did {XPATH('did')};\\n\\tSTRING dt_first_seen {XPATH('dt_first_seen')};\\nEND;\\n\\nds := dataset('~rttest::parsesoap::testrespparse.xml',\\n HeaderRecsLayout,\\n XML('/soap:Envelope/soap:Body/riskwise.proddataResponse/Results/Result/Dataset[@name=\\\\'header_records_by_did\\\\']/Row'));\\nds;\\n
The result looks good to me.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-01-13 20:29:22\" },\n\t{ \"post_id\": 9084, \"topic_id\": 2120, \"forum_id\": 8, \"post_subject\": \"SOAP Response Parsing\", \"username\": \"brandon.walker\", \"post_text\": \"Hello,\\n\\nI'm having issues trying to parse a SOAP response. I have the output record defined below, along with the response structure. However, I'm unable to retrieve the output record appropriately. I notice that the first element inside the soap body, as well as the Dataset elements, are defined with the default namespace. Does this mean we are unable to parse those elements out via XPath?\\n\\nHeaderRecsLayout := RECORD\\n STRING s_did {XPATH('s_did')};\\n STRING did {XPATH('did')};\\n STRING dt_first_seen {XPATH('dt_first_seen')};\\nEND;\\n\\nOutput_Layout := Record \\n dataset(HeaderRecsLayout) header_records_by_did {xpath('/soap:Envelope/soap:Body/riskwise.proddataResponse/Results/Result/Dataset[@name=\\\\'header_records_by_did\\\\']/Row')}; \\nEND;\\n\\n<?xml version="1.0" encoding="UTF-8"?>\\n<soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">\\n <soap:Body>\\n <riskwise.proddataResponse xmlns="urn:hpccsystems:ecl:riskwise.proddata" sequence="0">\\n <Results>\\n <Result>\\n <Dataset xmlns="urn:hpccsystems:ecl:riskwise.proddata:result:indata" name="indata">\\n <Row>\\n <seq>1</seq>\\n <historydate>999999</historydate>\\n <did>0123456789</did>\\n <score>0</score>\\n </Row>\\n </Dataset>\\n <Dataset xmlns="urn:hpccsystems:ecl:riskwise.proddata:result:header_records_by_did" name="header_records_by_did">\\n <Row>\\n <s_did>0123456789</s_did>\\n <did>0123456789</did>\\n <dt_first_seen>201504</dt_first_seen>\\n </Row>\\n ... \\n </Dataset>\\n </Result>\\n </Results>\\n </riskwise.proddataResponse>\\n </soap:Body>\\n</soap:Envelope>\", \"post_time\": \"2016-01-13 19:18:54\" },\n\t{ \"post_id\": 9092, \"topic_id\": 2122, \"forum_id\": 8, \"post_subject\": \"Re: Convert a record structure to a dataset of record metada\", \"username\": \"gmarcan\", \"post_text\": \"Thank you very much. This works perfectly.\", \"post_time\": \"2016-01-14 19:38:40\" },\n\t{ \"post_id\": 9090, \"topic_id\": 2122, \"forum_id\": 8, \"post_subject\": \"Re: Convert a record structure to a dataset of record metada\", \"username\": \"rtaylor\", \"post_text\": \"gmarcan,\\n\\nHere's a MACRO that does exactly that, using #EXPORTXML from ECL's Template Language:MAC_Struct2DS(RecStruct,ResName) := MACRO\\n #EXPORTXML(out, RecStruct);\\n #DECLARE(Ndx);\\n #SET (Ndx, 0)\\n #DECLARE(OutStr);\\n #SET(OutStr,#TEXT(Resname) + ' := DATASET([')\\n #FOR (out)\\n #FOR (Field) \\n #SET (Ndx, %Ndx% + 1)\\n #IF (%Ndx% > 1)\\n #APPEND(OutStr,',') \\n #END\\n #APPEND(OutStr,'{\\\\'' + %'{@label}'% + '\\\\',\\\\'' + %'{@ecltype}'% + '\\\\'}') \\n #END\\n #END\\n #APPEND(OutStr,'],{string field_name, string field_type});')\\n // %'OutStr'% //show the generated code\\n %OutStr% //generate the code for use\\nENDMACRO;
This generates an inline DATASET declaration similar to this one:\\nResultDS := DATASET([{'first','string10'},{'last','string20'}],\\n {string field_name, string field_type});
when you call it like this:NamesRecord := RECORD\\n STRING10 first;\\n STRING20 last;\\nEND;\\nds := DATASET('~RTTEST::OUT::ds', NamesRecord, thor);\\n\\nMAC_Struct2DS(NamesRecord,ResultDS);\\nResultDS;
Or like this:NamesRecord := RECORD\\n STRING10 first;\\n STRING20 last;\\nEND;\\nds := DATASET('~RTTEST::OUT::ds', NamesRecord, thor);\\n\\nMAC_Struct2DS(RECORDOF(ds),ResultDS);\\nResultDS;
\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-01-14 19:08:26\" },\n\t{ \"post_id\": 9088, \"topic_id\": 2122, \"forum_id\": 8, \"post_subject\": \"Convert a record structure to a dataset of record metadata\", \"username\": \"gmarcan\", \"post_text\": \"Is there a simple way to convert the layout / record structure to a dataset which contains information about each field?\\n\\nIn other words, as a simple example, given a record structure:\\n\\nmy_record := {string field1, unsigned8 field2};\\n\\nI would like to get a dataset result identical to the following dataset definition:\\n\\ndataset([{'field1','string'},{'field2','unsiend8'}],{string field_name, string field_type});\\n\\nI can save the dataset to a file and then use a 'DFUGetDataColumns' soapcall to pull out the layout (using xpath('DFUDataNonKeyedColumns1/DFUDataColumn'), but that is a terrible and partial work-around.\", \"post_time\": \"2016-01-14 17:16:18\" },\n\t{ \"post_id\": 9098, \"topic_id\": 2124, \"forum_id\": 8, \"post_subject\": \"Re: Using other parts of ICU\", \"username\": \"ghalliday\", \"post_text\": \"Yes, we are using ICU to provide Unicode support with HPCC. There are no direct calls from the generated code to the ICU libraries to minimize the size of the include dependencies, and to avoid direct dependence on a particular ICU version.\\n\\nYou should be able to access any part of icu that you need to by including the appropriate icu headers, and linking the icu libraries.\", \"post_time\": \"2016-01-18 09:35:55\" },\n\t{ \"post_id\": 9094, \"topic_id\": 2124, \"forum_id\": 8, \"post_subject\": \"Using other parts of ICU\", \"username\": \"jwilt\", \"post_text\": \"What parts of the ICU (International Components for Unicode) are available in an HPCC build? ICU seems to be what's supporting parts of ECL, but is it possible to access other functionality, e.g., via BeginC++?\\nThanks.\", \"post_time\": \"2016-01-15 02:46:55\" },\n\t{ \"post_id\": 9168, \"topic_id\": 2138, \"forum_id\": 8, \"post_subject\": \"Re: ECL PLus List Limit\", \"username\": \"mrumsey\", \"post_text\": \"Once we are running multiple jobs, the automation script is handling the work, so that isn't as much of a problem. I also think that 50 smaller jobs is easier to track errors on (one for each company) because we have one file that needs to be tracked per job, for 50 jobs. The alternative is 50 files that need to be tracked for each of 4-5 jobs. \\n\\nThe more we open, modify, and close an existing file the more we open up the process to errors, file corruption, or just company data being deposited in the wrong file. It would also be harder and more hindering to our deliverable timeline if we have to re-run or delay 50 reports instead of delivering 49 and delaying just one.\\n\\nWe did consider the stacked dataset option for quite a while, but determined it didn't deliver as many benefits as using HPCC to create curated reports.\\n\\nOn a side note, is there a way to make DISTRIBUTE() spread the load on more than one node? If I have 6 groups and need to distribute (medians of groups - single node operation), it is highly ineffective. If I could use an option to allow multiple nodes to work as one, it would be very useful.\\n\\nThanks for the help!\", \"post_time\": \"2016-01-28 21:28:40\" },\n\t{ \"post_id\": 9164, \"topic_id\": 2138, \"forum_id\": 8, \"post_subject\": \"Re: ECL PLus List Limit\", \"username\": \"rtaylor\", \"post_text\": \"Matt, Quite a few share a layout, but not all of them.
In that case, then I would aggregate all of them that do, or could be made to, share the same layouts into a single job to create your nested child dataset that you can parse on your back end. That way your co-worker's script only has to handle the exceptions and you end up with fewer jobs to manage.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-01-28 21:17:36\" },\n\t{ \"post_id\": 9160, \"topic_id\": 2138, \"forum_id\": 8, \"post_subject\": \"Re: ECL PLus List Limit\", \"username\": \"mrumsey\", \"post_text\": \"Mostly small datasets and distributions - less than 36 rows each, 6 or fewer columns, mostly numeric.\\n\\nWe are tracking company trends and profile distributions. There is one or two with company data (ambest names, company name, channel of operation). Quite a few share a layout, but not all of them. This keeps me from using a stacked dataset and parsing it on the back end.\", \"post_time\": \"2016-01-28 21:11:38\" },\n\t{ \"post_id\": 9158, \"topic_id\": 2138, \"forum_id\": 8, \"post_subject\": \"Re: ECL PLus List Limit\", \"username\": \"rtaylor\", \"post_text\": \"Matt,\\n\\nIf they are all scalar values, then you could run the job to produce a single recordset with 50 rows and 80 columns and write that to disk as a CSV file, then despray it your landing zone and import it directly into Excel (if that's where your workbook is).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-01-28 21:06:57\" },\n\t{ \"post_id\": 9156, \"topic_id\": 2138, \"forum_id\": 8, \"post_subject\": \"Re: ECL PLus List Limit\", \"username\": \"rtaylor\", \"post_text\": \"Matt,\\n\\nSo these 80 result tabs -- are they mostly scalar values or recordsets?\\n\\nRichard\", \"post_time\": \"2016-01-28 19:58:26\" },\n\t{ \"post_id\": 9154, \"topic_id\": 2138, \"forum_id\": 8, \"post_subject\": \"Re: ECL PLus List Limit\", \"username\": \"mrumsey\", \"post_text\": \"Richard,\\n\\nEach workunit has 80 results. That would be a workunit with ~400 tabs. \\n\\nMy co-worker has developed a python script that scrapes the output from ECLPlus and dumps it into a workbook I have curated. This allows for us to generate 50 reports for sales with very little opportunity for human error.\\n\\nWith 400 tabs, this script would need to be more complex and have more moving parts. It would delay our delivery time and increase the chance that the script does something wonky and messes up a workbook, and possibly, every workbook after it. \\n\\nThis way ensures that any one error affects only one report, unless it is a systemic error.\\n\\nI hope this clarifies my need!\\n\\nThanks,\\n\\nMatt Rumsey\", \"post_time\": \"2016-01-28 19:54:32\" },\n\t{ \"post_id\": 9152, \"topic_id\": 2138, \"forum_id\": 8, \"post_subject\": \"Re: ECL PLus List Limit\", \"username\": \"rtaylor\", \"post_text\": \"Matt,\\n\\nI'm curious -- why does each report need to be a separate workunit? Could you not just run it all as a single workunit with 50 results?\\n\\nRichard\", \"post_time\": \"2016-01-28 19:48:47\" },\n\t{ \"post_id\": 9150, \"topic_id\": 2138, \"forum_id\": 8, \"post_subject\": \"Re: ECL PLus List Limit\", \"username\": \"mrumsey\", \"post_text\": \"Jim,\\n\\nI am aware of auto-switching of clusters. My particular job is an analysis of 50 companies. That means I have 50 reports to generate in HPCC. If I submit 50 jobs at one time, even with switching, I am taking up a lot of queue space and putting my jobs in front of anyone else.\\n\\nIn the spirit of easing the flow of work for the whole company, I want to throttle my input to better control the queue loads. This is why I wanted to grab the active jobs from the clusters before submitting.\\n\\nRight now, we have decided to limit my jobs to 2 per queue, regardless of queue length. This means I will never have more than 2 jobs on any particular cluster, which will allow other jobs to get in and complete as my project processes.\\n\\nThanks for the suggestion though, it is a less-well known option in my area (Analytics).\", \"post_time\": \"2016-01-28 19:43:09\" },\n\t{ \"post_id\": 9148, \"topic_id\": 2138, \"forum_id\": 8, \"post_subject\": \"Re: ECL PLus List Limit\", \"username\": \"JimD\", \"post_text\": \"I've confirmed that the results are limited to 100 from ECLPLus action=list.\\n\\nYou can submit a Jira issue to request an option to list all WUs, but I have another suggestion.\\n\\nIt sounds like you are trying to analyze activity on your clusters to balance out submitting of jobs. May I suggest you look into the Automatic Queue Switching capabliity?\\n\\nUsing #OPTION, you can specify:\\n\\nallowedClusters \\nSpecifies the comma-delimited list of cluster names (as\\na string constant) where the workunit may execute. This\\nallows the job to be switched between clusters, manually\\nor automatically, if the workunit is blocked on its\\nassigned cluster and another valid cluster is available\\nfor use.\\n\\nAllowAutoQueueSwitch \\nIf true, specifies that the workunit is automatically reassigned\\nto execute on another available cluster listed in\\nallowedClusters when blocked on its assigned cluster.\\n\\nFor example:\\n#OPTION('AllowAutoSwitchQueue', TRUE);\\n#OPTION('allowedClusters', 'thor50_dev02,thor50_dev');\\n
\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2016-01-28 19:37:18\" },\n\t{ \"post_id\": 9146, \"topic_id\": 2138, \"forum_id\": 8, \"post_subject\": \"ECL PLus List Limit\", \"username\": \"mrumsey\", \"post_text\": \"I am using ECLPlus to try and automate reporting for a project. I want to look at active and queued jobs so I can submit to clusters that are not busy and to avoid blocking all of the queues at once.\\n\\nThe problem is that the following command seems to only return 100 rows:\\neclplus action=list
\\n\\nThe 100 rows includes blocked, waiting, aborted, compiling, paused, and completed jobs, which hits 100 jobs in about 1-2 hours of historical data. Even if I look for running or blocked jobs only, it is a filter of the first 100 rows.\\n\\nThis limit is only overcome if I look for a particular username, which is not useful for my purpose. \\n\\nIs there a way to change the output limit or apply a filter that would allow me to look at jobs that are running, blocked, or waiting that may have been submitted earlier in the day? I ran into this problem last night where half of the jobs in queue wouldn't show up because of long queue times all day.\\n\\nThanks,\\n\\nMatt Rumsey\", \"post_time\": \"2016-01-28 16:23:07\" },\n\t{ \"post_id\": 9180, \"topic_id\": 2144, \"forum_id\": 8, \"post_subject\": \"Re: Can't remove file owned by deleted superfile\", \"username\": \"jsmith\", \"post_text\": \"There have been 1 or 2 bugs that could cause subfiles and superfiles to become out of sync. e.g. for a subfile to be marked as owned by a subfile when it was not. It is possible you are seeing a symptom of one of those bugs.\\nThis is one of them: https://track.hpccsystems.com/browse/HPCC-14727\\n\\nUnder those circumstances, it is necessary to manually delete the meta info in Dali - where the logical meta file info is stored.\\nTo do that, it is necessary to convert the logical filename to a xpath that points to the meta data, for example, if the file I wanted to delete is: "~myscope1::myscope2::myfile", the xpath would be:\\n/Scope[@name="myscope1"]/Scope[@name="myscope2"]/File[@name="myfile"]\\n\\nTo delete it, you must use the admin. tool daliadmin, e.g.:\\n\\n/opt/HPCCSystems/bin/daliadmin <dali-ip> delete '/Scope[@name="myscope1"]/Scope[@name="myscope2"]/File[@name="myfile"]'\\n\\nNB: Caution should be exercised when using daliadmin to manipulate the environments meta data directly like this. The wrong path could result in the loss of a large amount of file + workunit meta data.\\n\\nNote also, that in the forthcoming 5.6.0 build, this inconsistency will cause a warning to be issued to the workunit only and will not prevent the file being deleted.\", \"post_time\": \"2016-02-02 13:26:01\" },\n\t{ \"post_id\": 9174, \"topic_id\": 2144, \"forum_id\": 8, \"post_subject\": \"Can't remove file owned by deleted superfile\", \"username\": \"janet.anderson\", \"post_text\": \"I have a file that was part of a superfile (call it file201407), but I needed to re-run some of the data. I could not overwrite the file while it was attached to a superfile, so I detached it. When that still did not work, I deleted the superfile. However, I still cannot delete the original erroneous subfile (file201407). I get something like the error below:\\n\\nAction status:Could not delete file201407 on thor400_72: DFS Exception: 12: Failed to delete file: file201407 - cause: [ -1: Can't remove file201407: Cannot remove file file201407 as owned by SuperFile(s): fileall] \\n\\nWhy is this happening and how can I overwrite file201407 with corrected data?\", \"post_time\": \"2016-02-01 18:14:18\" },\n\t{ \"post_id\": 9208, \"topic_id\": 2152, \"forum_id\": 8, \"post_subject\": \"Re: Error Running embedded mysql\", \"username\": \"drealeed\", \"post_text\": \"Worked like a charm, thanks!\", \"post_time\": \"2016-02-02 21:21:00\" },\n\t{ \"post_id\": 9206, \"topic_id\": 2152, \"forum_id\": 8, \"post_subject\": \"Re: Error Running embedded mysql\", \"username\": \"rodrigo.pastrana\", \"post_text\": \"Drea, I think your embedded function 'testMySQL' is missing the empty input param list from its signature. Try 'testMySQL()', it should get you passed the no_embedbody.\\n\\nThis should provide a better error message, do you mind creating a Jira? Thanks.\", \"post_time\": \"2016-02-02 21:15:27\" },\n\t{ \"post_id\": 9202, \"topic_id\": 2152, \"forum_id\": 8, \"post_subject\": \"Error Running embedded mysql\", \"username\": \"drealeed\", \"post_text\": \"I'm trying to connect to an internal mysql server in the ln network and retrieve data from it. \\n \\nThe following code throws the exception\\nError: UNIMPLEMENTED Record count calculation for operator no_embedbody at /var/lib/jenkins/workspace/LN-Candidate-withplugins-5.4.4-1/LN/centos-6.4-x86_64/HPCC-Platform/ecl/hql/hqlattr.cpp(3004) (0, 0), -1, \\n\\n\\nHere's my code (password redacted):\\n\\nimport mysql;\\n\\nLayout := RECORD\\n STRING user_id; //mysql VARCHAR(100)\\n\\t STRING hpcc_id; //mysql VARCHAR(100)\\n\\t STRING workunit_id; //mysql varchar(16)\\n\\t STRING ddl; //mysql varchar(255)\\n\\t STRING layout; //mysql text\\n\\t INTEGER gcid; //mysql int(11) \\nEND;\\n\\ndataset(Layout) testMySQL := EMBED(mysql : server('dbdcorp-bct.risk.regn.net'),port('3306'), \\nuser('dspdev'), password('********'),database('dsp_dev'))\\nselect user_id,hpcc_id,workunit_id,ddl,layout,gcid from dashboard_layout;\\nENDEMBED;\\n\\nds := testMySQL();\\n\\noutput(ds);\\n
\\nIs this ecl correct? I've verified my user, password, port and database and sql statement in Mysql workbench.\\n\\nI ssh'd to the thor master node of the cluster I'm trying to run on and pinged dbdcorp-bct.risk.regn.net, and the ping was able to connect.\\n\\nI tried running against hthor and the thor cluster; same error both times.\", \"post_time\": \"2016-02-02 20:46:44\" },\n\t{ \"post_id\": 9242, \"topic_id\": 2156, \"forum_id\": 8, \"post_subject\": \"Re: How to display Quote Symbol inside the XML as an element\", \"username\": \"rtaylor\", \"post_text\": \"Sathya,\\n\\nStandard XML does not allow double quotes, therefore it is automatically replaced with the " XML entity that represents the " character. IOW, what you see is correct XML. \\n\\nThe following characters are reserved in XML and must be replaced:' is replaced with '\\n" is replaced with "\\n& is replaced with &\\n< is replaced with <\\n> is replaced with >
\\nTake a look at the XMLENCODE() and XMLDECODE() functions in the Language Reference.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-02-17 16:23:23\" },\n\t{ \"post_id\": 9240, \"topic_id\": 2156, \"forum_id\": 8, \"post_subject\": \"How to display Quote Symbol inside the XML as an element\", \"username\": \"sathya.akrec\", \"post_text\": \"All,\\n\\nI am trying to output the String in XML format. My String has a double quote in it. \\n\\nSample Input : '10"02'\\n\\nCurrent output : 10"02\\n\\nExpected Output : 10"02\\n\\nCould you please advice ?\\n\\nRegards,\\nSathya\\n\\n\\n\\n\\ncr := RECORD,MAXLENGTH(1024)\\n STRING phoneEx{XPATH('')};\\nEND;\\nr := RECORD,MAXLENGTH(4096)\\n STRING id{XPATH('COMP-ID')};\\n STRING phone{XPATH('PHONE-NUMBER')};\\n DATASET(cr) Fred{XPATH('PHONE-NUMBER-EXP')};\\nEND;\\n \\nDS := DATASET([{'10"02','1352,9493',['1352','9493']},\\n {'1003','4846,4582,0779',['4846','4582','0779']}],r);\\n\\nOUTPUT(ds,,'~RTTEST::XMLtest2',\\n XML('RECORD',\\n HEADING('<?xml version="1.0" encoding="UTF-8"?><RECORDS>',\\n '</RECORDS>')),overwrite);\\n
\", \"post_time\": \"2016-02-17 14:20:16\" },\n\t{ \"post_id\": 9246, \"topic_id\": 2158, \"forum_id\": 8, \"post_subject\": \"Re: Error when running ECL IDE with -legacy flag\", \"username\": \"ghalliday\", \"post_text\": \"No it isn't a known problem. A long time ago there was a related issue - so I can imagine how it might cause problems. Please can you submit a JIRA, including details of the ECL that was within the workunit for that query.\\n\\nIf it is on an internal system, please include a link to the failing workunit, and mark the issue as internal.\", \"post_time\": \"2016-02-18 09:30:43\" },\n\t{ \"post_id\": 9244, \"topic_id\": 2158, \"forum_id\": 8, \"post_subject\": \"Error when running ECL IDE with -legacy flag\", \"username\": \"drealeed\", \"post_text\": \"I have my OSS ECL IDE set up to use the -legacy compile flag.\\n\\nIf I have a BWR that has an import that's not used with the -legacy compile flag set I get an error. For example, the following code\\nIMPORT Utilities;\\n//OUTPUT(Utilities.alpha_prod_ip);\\nOUTPUT('hard coded ip');
\\n\\nCompiles without errors, but when run, it returns the error "Import names unknown module Utilities".\\n\\nRemoving the -legacy flag eliminates this error.\\n\\nIs this a known bug?\", \"post_time\": \"2016-02-17 20:56:40\" },\n\t{ \"post_id\": 9275, \"topic_id\": 2164, \"forum_id\": 8, \"post_subject\": \"Re: Dataset not active\", \"username\": \"Jerry\", \"post_text\": \"Hi Richard,\\n\\nThanks a lot for your suggestions.\\nThe problem was solved by dynamic code generation for each row of the project and then executing them using individual workunits.\\n\\nRegards,\\nJerry\", \"post_time\": \"2016-02-24 23:10:38\" },\n\t{ \"post_id\": 9274, \"topic_id\": 2164, \"forum_id\": 8, \"post_subject\": \"Re: Dataset not active\", \"username\": \"rtaylor\", \"post_text\": \"Jerry,\\n\\nYour example code doesn't compile on my training cluster, because of the side-effect action you have (which only works for you now because you're using the legacy switch).\\n\\nCan you try this without the legacy switch?\\n\\nRichard\", \"post_time\": \"2016-02-23 19:31:59\" },\n\t{ \"post_id\": 9272, \"topic_id\": 2164, \"forum_id\": 8, \"post_subject\": \"Re: Dataset not active\", \"username\": \"Jerry\", \"post_text\": \"Hi Richard,\\n\\nAre you saying that the Spray functions is yet to complete when the PROJECT function starts which is causing the DATASET not active error?\\nIn the file that I had attached, I was trying to do the same operation with out the dynamic spray(the files were sprayed before hand) and even then it was failing.\\nDoesn't this mean that even if I start a new workunit after my spray, I will encounter the same issue?\\n\\nOne thing that I have noticed is that If i hard code the values that I pass to the function(from the transform), the same code works fine.i.e. \\nlayout1 trans1(FilteredConfigDS L) := TRANSFORM\\n\\tSELF.line := func(L.ID, L.ML, L.DV, L.QueryName, L.Terr, L.QueryType, L.RoxieEnv);\\nEND;\\n\\ninstead of the above, if I use the below one, the entire code works fine\\n\\nlayout1 trans1(FilteredConfigDS L) := TRANSFORM\\n\\tSELF.line := func(FilteredConfigDS [1].ID, FilteredConfigDS [1].ML, FilteredConfigDS [1].DV, FilteredConfigDS [1].QueryName, FilteredConfigDS [1].Terr, FilteredConfigDS [1].QueryType, FilteredConfigDS [1].RoxieEnv);\\nNot sure why the difference in behavior.\\n\\n----------------------------------------------\\n\\nI have also come up with a sample piece of code that you can execute.\\nEven though the error is not the same, I believe both the errors are related and solving one will solve the other for me.\\nlayout1 := RECORD\\n\\tSTRING line:='';\\nEND;\\n\\nds1 := dataset([{'1'},{'2'},{'3'},{'4'}],layout1);\\noutput(ds1,,'~regression::allproducts::insurance::ds1',OVERWRITE);\\nds2 := dataset([{'a'},{'b'},{'c'},{'d'}],layout1);\\noutput(ds2,,'~regression::allproducts::insurance::ds2',OVERWRITE);\\n\\n\\nfunc(string line,UNSIGNED cnter) := function\\n\\tdataset2 := DATASET('~regression::allproducts::insurance::ds2',layout1,THOR);\\n\\tout:= output(dataset2,,'~regression::allproducts::insurance::test_'+cnter,OVERWRITE);\\n\\tSEQUENTIAL(out);\\n\\tRETURN 'done';\\nEND;\\n\\ndataset1 := DATASET('~regression::allproducts::insurance::ds1',layout1,THOR);\\n\\nlayout1 trans1(dataset1 L,UNSIGNED cnt) := TRANSFORM\\n\\tSELF.line := func(L.line,cnt);\\nEND;\\n\\n\\nproj := PROJECT(dataset1,trans1(left,COUNTER));\\nproj;
\\n\\nIn this case the error that I get is\\nError: OUTPUT to file is not supported inside NOTHOR() (25, 9), 4102, \", \"post_time\": \"2016-02-22 22:10:22\" },\n\t{ \"post_id\": 9270, \"topic_id\": 2164, \"forum_id\": 8, \"post_subject\": \"Re: Dataset not active\", \"username\": \"rtaylor\", \"post_text\": \"Jerry,\\n\\nOK, I see what you're trying to do. \\n\\nAssuming your "func" function actually runs on your cluster, I have to assume you're using either a 702 build or the "legacy" switch so the compiler doesn't complain about your side-effect action (you will have to change that code eventually).1. Dynamically spray a configuration file and a data file\\n2. Read the config file\\na. Iterate through the rows using PROJECT and call a function\\nb. The function will read the data file and make a SOAP call and then write the \\nresult to the file system.
I have done this sort of thing before in current (Open Source Community Edition) builds. The only way I have found to make it work is to use event scheduling (as in the NOTIFY() action) to launch the processes so that each bit is accomplished as a separate workunit.\\n\\nI wrote code to automate file sprays and do preliminary processing on the sprayed files (code that is now in production). That code used the STD.File.MonitorFile() function to wait for a semaphore file (AKA your configuration file) to appear on the landing zone. That semaphore file contained the list of files to spray, so the only thing that workunit did was spray the files appropriately. \\n\\nAfter the spray was successful, that filespray workunit would issue the appropriate NOTIFY action to trigger a specific event and pass the needed information to another workunit that was waiting for that event (using the WHEN workflow service) to begin doing its work. The extra information needed to do that processing job was passed through to the next workunit using the second parameter of NOTIFY and read by the next job using the EVENTEXTRA() function. This allowed common processes to work on different files and handle each correctly.\\n\\nOnce that job was complete, another NOTIFY launched some subsequent processing that needed to happen. You can string this concept out as much as necessary to get each step done in turn...\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-02-22 21:16:14\" },\n\t{ \"post_id\": 9268, \"topic_id\": 2164, \"forum_id\": 8, \"post_subject\": \"Re: Dataset not active\", \"username\": \"Jerry\", \"post_text\": \"Hi Richard,\\n\\nI apologize, after reading through my post again I understand that i doesn't make much sense. Let me try again.\\n\\nI have attached a sample of the code that I am trying to run. Unfortunately you will be able to run it.\\n[attachment=0:1r1lzmlm]Sample_Query.txt \\nWhen i run this code, I get the following error\\nError: INTERNAL: Dataset is not active: \\n\\nWhat should I do to fix this problem?\\n\\nRegards,\\nJerry\", \"post_time\": \"2016-02-22 19:47:42\" },\n\t{ \"post_id\": 9266, \"topic_id\": 2164, \"forum_id\": 8, \"post_subject\": \"Re: Dataset not active\", \"username\": \"rtaylor\", \"post_text\": \"Jerry,\\n\\nI'm having trouble understanding the link between your topic (Dataset not active) and your problem description (various NOTHOR issues). It would help if you could post your ECL code and explain why you are trying to use NOTHOR. What happens when you don't use NOTHOR at all?\\n\\nRichard\", \"post_time\": \"2016-02-22 18:34:20\" },\n\t{ \"post_id\": 9264, \"topic_id\": 2164, \"forum_id\": 8, \"post_subject\": \"Dataset not active\", \"username\": \"Jerry\", \"post_text\": \"Hi,\\n\\nUsing version : 5.4.6-1\\n\\nI am trying to achieve the following:\\n1. Dynamically spray a configuration file and a data file\\n2. Read the config file\\n a. Iterate through the rows using PROJECT and call a function\\n b. The function will read the data file and make a SOAP call and then write the \\n result to the file system.\\n\\nIssues that I am facing:\\nError: OUTPUT to file is not supported inside NOTHOR()\\n\\nI tried to use NOTHOR() on the OUTPUT statement which writes the result to file \\nsystem, but got the same error.\\n\\nI tried to apply the NOTHOR() on the function call inside the transform() of project,\\nbut got the following error\\nNOTHOR expression , in projtrans1 appears to access a parent dataset - this may cause a dataset not active error\\n\\nI then tried to apply NOTHOR() on the PROJECT call and got the following error\\nError: INTERNAL: Expected a parent/container context. Likely to be caused by executing something invalid inside a NOTHOR.\\n\\nWould someone please suggest a work around for this? \\n\\nRegards,\\nJerry\", \"post_time\": \"2016-02-22 16:22:04\" },\n\t{ \"post_id\": 9288, \"topic_id\": 2166, \"forum_id\": 8, \"post_subject\": \"Re: Issue while write large data in index\", \"username\": \"JimD\", \"post_text\": \"elango_v,\\n\\nThis behavior is expected. This Jira issue may shed some light on the subject:\\nhttps://track.hpccsystems.com/browse/HPCC-8975\\n\\nBasically, you need to identify the large field (in your case, the second column) and either move that field into a blob, or retrieve it from the original dataset when required via a FETCH or full keyed JOIN.\\n\\nHTH, \\nJim\", \"post_time\": \"2016-03-01 19:27:59\" },\n\t{ \"post_id\": 9282, \"topic_id\": 2166, \"forum_id\": 8, \"post_subject\": \"Issue while write large data in index\", \"username\": \"elango_v\", \"post_text\": \"Hi,\\nI am trying to create a payload index for a data source which contains two columns emailID and _data(social activities data of the emailId and this column size is huge)\\n\\nI can spray the file and do some transformation and write in HPCC disc.\\n\\nI am getting below error while creating payload index on email_id as main and _data field as payload.\\n\\nError: System error: 0: Graph[1], SLAVE #12 [10.193.65.17:8620]: Graph[1], indexwrite[4]: Key row too large to fit within a key node (uncompressed size=24080, variable=true, pos=2023474418), (0, 0), 0, \\n\\nI have changed the maxlength value in the dataset and in the layouts as well. Still I get the issue.\\n\\nCan some one please look into this issue?\", \"post_time\": \"2016-03-01 14:53:13\" },\n\t{ \"post_id\": 9286, \"topic_id\": 2168, \"forum_id\": 8, \"post_subject\": \"Re: UPDATE option on OUTPUT\", \"username\": \"JimD\", \"post_text\": \"Dustin, \\n\\nYou are correct in that UPDATE does imply allowing the file to be overwritten. However, UPDATE is not the same as OVERWRITE.\\n\\nUPDATE specifies that the file is rewritten ONLY if the underlying code or the input data has changed. If nothing has changed, a Warning is issued to the workunit for example:\\n\\nWarning: Graph[1], diskwrite[3]: output file = 'tutorial::yn::tutorialperson' - is up to date - it will not be rebuilt
\\n\\nUsing OVERWRITE instead of UPDATE would rewrite the file each time even if that would produce an identical copy overwriting the previous. UPDATE is more efficient in this case and avoids doing needless work.\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2016-03-01 19:10:12\" },\n\t{ \"post_id\": 9284, \"topic_id\": 2168, \"forum_id\": 8, \"post_subject\": \"UPDATE option on OUTPUT\", \"username\": \"dustinskaggs\", \"post_text\": \"Since the UPDATE option on an OUTPUT will also allow the file to be overwritten, under what circumstances would the OVERWRITE option be preferable to UPDATE? Are there performance issues with UPDATE having to check if the output file needs updated?\", \"post_time\": \"2016-03-01 15:38:44\" },\n\t{ \"post_id\": 9308, \"topic_id\": 2174, \"forum_id\": 8, \"post_subject\": \"Re: Handling FROMJSON errors within PROJECT\", \"username\": \"JimD\", \"post_text\": \"Dan,\\n\\nFROMJSON has an ONFAIL parameter. The Jira to add this to documentation is here:\\nhttps://track.hpccsystems.com/browse/HPCC-13994\\n\\n\\nAs we dicsussed offline, you can use:\\n\\n SELF := FROMJSON(JSONDataRec, LEFT.s, ONFAIL(TRANSFORM(JSONDataRec, SELF := [])))\\n
\\n\\nwhich is basically that same as "skip".\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2016-03-07 20:19:43\" },\n\t{ \"post_id\": 9306, \"topic_id\": 2174, \"forum_id\": 8, \"post_subject\": \"Re: Handling FROMJSON errors within PROJECT\", \"username\": \"DSC\", \"post_text\": \"That does work, but it's a bit heavy-handed: The result of the entire PROJECT() is an empty recordset. What I'm aiming for is selectively skipping only the malformed records. Something like an ONFAIL(SKIP) option on the FROMJSON() statement would work, for instance. That, unfortunately, doesn't seem to be available.\\n\\nDan\", \"post_time\": \"2016-03-07 19:38:19\" },\n\t{ \"post_id\": 9304, \"topic_id\": 2174, \"forum_id\": 8, \"post_subject\": \"Re: Handling FROMJSON errors within PROJECT\", \"username\": \"putnik\", \"post_text\": \"You can try CATCH ( Project ..... ), SKIP) ;\", \"post_time\": \"2016-03-07 16:55:26\" },\n\t{ \"post_id\": 9296, \"topic_id\": 2174, \"forum_id\": 8, \"post_subject\": \"Handling FROMJSON errors within PROJECT\", \"username\": \"DSC\", \"post_text\": \"Here is an example of the code I'm using to transform a dataset of JSON-formatted strings to ECL data structures:\\n\\nJSONDataRec := RECORD\\n STRING someValue {XPATH('key')};\\nEND;\\n\\nsampleStr := '{"key": "bar"}';\\n// sampleStr := '["key": "bar"}';\\n// sampleStr := '';\\n\\nds := DATASET([sampleStr], {STRING s});\\n\\nres := PROJECT\\n (\\n ds,\\n TRANSFORM\\n (\\n JSONDataRec,\\n SELF := FROMJSON(JSONDataRec, LEFT.s)\\n )\\n );\\n\\nOUTPUT(res);
\\nThis code works as-is. However, if the incoming data cannot be guaranteed to be correctly-formatted JSON then errors will be produced by the FROMJSON statement. Both of the other commented-out sampleStr definitions produce (two different) errors.\\n\\nMy question, how can I protect the PROJECT? I want to basically just SKIP malformed JSON code.\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2016-03-04 14:00:25\" },\n\t{ \"post_id\": 9496, \"topic_id\": 2198, \"forum_id\": 8, \"post_subject\": \"Re: KEL Lite Tutorial Error\", \"username\": \"wbeason\", \"post_text\": \"I was able to resolve the issue, and saving a KEL file now generates ECL files in my IDE. To do so I added my java (64bit) bin directory to my system path (Windows 10).\", \"post_time\": \"2016-04-07 17:25:58\" },\n\t{ \"post_id\": 9356, \"topic_id\": 2198, \"forum_id\": 8, \"post_subject\": \"Re: KEL Lite Tutorial Error\", \"username\": \"wbeason\", \"post_text\": \"Richard,\\n\\nI installed KEL with hpccsystems-kel-lite-5.4.2-1.Windows.exe, and left all of the configuration options as their default values.\\n\\nKEL.bat and KEL.jar are in the folder \\n\\nC:\\\\Program Files (x86)\\\\HPCCSystems\\\\5.4.2\\\\KEL\\n
\\n\\nAnd the ide/clienttools folder are in \\n\\nC:\\\\Program Files (x86)\\\\HPCCSystems\\\\5.4.2\\n
\\n\\nThanks,\\nWill\", \"post_time\": \"2016-03-17 14:36:58\" },\n\t{ \"post_id\": 9354, \"topic_id\": 2198, \"forum_id\": 8, \"post_subject\": \"Re: KEL Lite Tutorial Error\", \"username\": \"rtaylor\", \"post_text\": \"Will,\\n\\nHow did you install KEL itself? IOW, where are your KEL.bat and KEL.jar files? \\n\\nThey should be in a KEL directory under the installation version of your ECL IDE.\\n\\nFor example, if you are running the 5.4.2 version of the IDE (and it was installed to its default directory) they should be in:C:\\\\Program Files (x86)\\\\HPCCSystems\\\\5.4.2\\\\KEL
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-03-17 14:23:50\" },\n\t{ \"post_id\": 9350, \"topic_id\": 2198, \"forum_id\": 8, \"post_subject\": \"KEL Lite Tutorial Error\", \"username\": \"wbeason\", \"post_text\": \"Hi, hope this is the right place. I've been attempting to get KEL Lite running using the tutorial in KEL-Reference.pdf that comes with KEL.\\n\\nI've run BWR_GenFiles to generate some sample data and I'm now running the sample code given:\\nPerson := ENTITY( FLAT(UID=id,fname,mname,lname,address1,address2,city,state) );\\nUSE KEL_tutorial.File_Person.Small.File( FLAT, Person );\\nQUERY: ShowAll <= Person;\\n
\\n\\nThe reference says that upon saving this file ECL code should be generated, in particular the files E_Person.ECL and Q_Show_ALL.ECL, but I see neither of these after saving in any of the tutorial folders.\\n\\nPressing submit gives me errors:\\n\\nWarning: (1,11): error C2167: Unknown identifier "ENTITY" (0, 0), 0, \\nWarning: (2,1): error C2324: Unknown type 'USE' (0, 0), 0, \\nWarning: (2,17): error C2167: Unknown identifier before "." (expected :=) (0, 0), 0, \\nWarning: (3,6): error C3002: syntax error near ":" : expected := (0, 0), 0, \\n
\\nI added a line \\n\\nimport KEL06a as KEL\\n
\\nBut that had no effect.\\n\\nI've run the KEL installation, and imported the mod file (KEL06a.mod) into the IDE. I have also tried reinstalling both the IDE and KEL. Here's a picture of what my directories look like:\\n\\n I found an example of someone having similar errors here: viewtopic.php?f=8&t=1508&p=6590&hilit=kel#p6590, but that was of little help since I am loading the mod file. I fear I am missing something simple. Any help running this example would be greatly appreciated, \\n\\nThanks!\\nWill\", \"post_time\": \"2016-03-16 23:02:37\" },\n\t{ \"post_id\": 9438, \"topic_id\": 2215, \"forum_id\": 8, \"post_subject\": \"Re: Can't delete old subfile after superfile packagemap upda\", \"username\": \"bforeman\", \"post_text\": \"Hi Drea,\\n\\nYou should probably open a JIRA issue on this, if you haven't already done so.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2016-03-30 18:47:06\" },\n\t{ \"post_id\": 9389, \"topic_id\": 2215, \"forum_id\": 8, \"post_subject\": \"Can't delete old subfile after superfile packagemap update\", \"username\": \"drealeed\", \"post_text\": \"I have a query on roxie that uses a superfile. I published the query, and also deployed a packagemap for the query defining the superfile & subfile it uses\\n\\nI generated a new index subfile and redeployed the packagemap to roxie with addpackagemap. The deploy worked. The service query now returns data from the new subfile. When I look at the query in eclwatch, it correctly lists the superfile and new subfile as its data source.\\n\\nBut when I try to delete the old subfile, which is no longer referenced by the superfile, I get an error in ECL Watch:\\n\\nhipie::keys::leeddx_issue_1306_superfile::ins002_dsoutputfrominput_1::view_tablereferencewithbqw20160324-155303\\n\\nCould not delete hipie::keys::leeddx_issue_1306_superfile::ins002_dsoutputfrominput_1::view_tablereferencewithbqw20160324-155303 on roxie: DFS Exception: 12: Failed to delete file: hipie::keys::leeddx_issue_1306_superfile::ins002_dsoutputfrominput_1::view_tablereferencewithbqw20160324-155303 - cause: [ -1: Can't remove hipie::keys::leeddx_issue_1306_superfile::ins002_dsoutputfrominput_1::view_tablereferencewithbqw20160324-155303: Cannot remove file hipie::keys::leeddx_issue_1306_superfile::ins002_dsoutputfrominput_1::view_tablereferencewithbqw20160324-155303 as owned by SuperFile(s): hipie::keys::leeddx_issue_1306_superfile::ins002_dsoutputfrominput_1::view_tablereferencewithbq]\\n\\nIf I try it in ecl ide with the command fileservices.deleteLogicalFile('~hipie::keys::leeddx_issue_1306_superfile::ins002_dsoutputfrominput_1::view_tablereferencewithbqw20160324-155303'), the same error occurs.\\n\\nWhy does hpcc think this superfile still has a handle on the old subfile? If I search for queries using the old subfile, none come back. If I look at the files/subfiles used for the query in ecl watch, the old file doesn't show up.\", \"post_time\": \"2016-03-24 20:07:09\" },\n\t{ \"post_id\": 9394, \"topic_id\": 2216, \"forum_id\": 8, \"post_subject\": \"Re: Maximum number of digits returned by HASH\", \"username\": \"balajisampath\", \"post_text\": \"Thank You Richard\", \"post_time\": \"2016-03-28 13:17:37\" },\n\t{ \"post_id\": 9392, \"topic_id\": 2216, \"forum_id\": 8, \"post_subject\": \"Re: Maximum number of digits returned by HASH\", \"username\": \"rtaylor\", \"post_text\": \"Balaji,\\n\\nBoth HASH and HASH32 are 32-bit functions, so both return UNSIGNED4 values. HASH64 is 64-bit so it returns UNSIGNED8. Max and min values for these are listed in the Language Reference docs for INTEGER (which includes UNSIGNED).\\n\\nFYI, no matter which hashing function is used, the DISTRIBUTE function (most common use for hashing) only uses the lower 32 bits for determining its radix points, so HASH or HASH32 are the recommendations there.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-03-28 13:14:34\" },\n\t{ \"post_id\": 9390, \"topic_id\": 2216, \"forum_id\": 8, \"post_subject\": \"Maximum number of digits returned by HASH\", \"username\": \"balajisampath\", \"post_text\": \"What is the maximum value that the Hash would return\\n\\nHASH-?\\nHASH32-?\\nHASH64- UNSIGNED8\\n\\n\\nThanks,\\nBalaji\", \"post_time\": \"2016-03-25 13:49:20\" },\n\t{ \"post_id\": 9402, \"topic_id\": 2218, \"forum_id\": 8, \"post_subject\": \"Re: FileServices.deleteFiles fails in both thor & NOTHOR()\", \"username\": \"rtaylor\", \"post_text\": \"Drea,\\n\\nAnd your workaround would be to write a MACRO that generates a separate DeleteLogicalFile action for each individual filename to remove.
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-03-28 20:19:59\" },\n\t{ \"post_id\": 9400, \"topic_id\": 2218, \"forum_id\": 8, \"post_subject\": \"Re: FileServices.deleteFiles fails in both thor & NOTHOR()\", \"username\": \"drealeed\", \"post_text\": \"Richard,\\n\\nThanks. What I actually have to do is filter the list, and then sort it by name descending, and grab everything /but/ the first line. (The TOPN was used in the example.) the SORT(dsSafeToDelete,-name)[2..] is failing with the same issue. I'll put in a ticket for it.\", \"post_time\": \"2016-03-28 20:15:02\" },\n\t{ \"post_id\": 9398, \"topic_id\": 2218, \"forum_id\": 8, \"post_subject\": \"Re: FileServices.deleteFiles fails in both thor & NOTHOR()\", \"username\": \"rtaylor\", \"post_text\": \"Drea,
If I do a simple filter on the list, perform a TOPN on the filter and then try to delete the TOPN'd list, the delete fails whether wrapped in a NOTHOR or not.
Since your TOPN only wants the first entry, you can try it like this:superfilename_notilde := ... ;\\nindexfilename_notilde := ... ;\\n\\ndsOldSubfiles := NOTHOR(fileservices.LogicalFileList( superfilename_notilde + '_subfile_*'));\\ndsSafeToDelete := dsOldSubfiles(Str.ToLowerCase(name) != \\n Str.ToLowerCase((indexfilename_notilde)));\\ndsOlder := SORT(dsSafeToDelete,name);\\n\\nNOTHOR(fileservices.DeleteLogicalFile('~' + dsOlder[1].name));
If this works, then you're GTG as long as you only want to remove the first file. If not, or you need to remove multiple files, then another JIRA ticket against APPLY would be my next move (I've found that APPLY can be more problematic than useful in some cases).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-03-28 20:04:09\" },\n\t{ \"post_id\": 9396, \"topic_id\": 2218, \"forum_id\": 8, \"post_subject\": \"FileServices.deleteFiles fails in both thor & NOTHOR()\", \"username\": \"drealeed\", \"post_text\": \"I have a list of filenames retrieved with FileServices.logicalFileList.\\n\\nIf I do a simple filter on the list of filenames and then run them through an APPLY to delete the filtered list, this works if the APPLY is wrapped in a NOTHOR.\\n\\nIf I do a simple filter on the list, perform a TOPN on the filter and then try to delete the TOPN'd list, the delete fails whether wrapped in a NOTHOR or not. Without the NOTHOR, the error says "Wrap this in a nothor". With a NOTHOR, the error says "nothor shouldn't be used.\\n\\nThis is in HPCC 5.6., on cluster http://10.173.147.1:8010. Any idea what's up?\\n\\nsuperfilename_notilde :='hipie::keys::leeddx_issue_1306_superfile::ins002_dsoutputfrominput_1::view_tablereferencewithbq';\\nindexfilename_notilde :='hipie::keys::leeddx_issue_1306_superfile::ins002_dsoutputfrominput_1::view_tablereferencewithbq_subfile_w20160328-100345';\\n\\n\\ndsOldSubfiles :=NOTHOR(fileservices.LogicalFileList( superfilename_notilde + '_subfile_*'));\\ndsSafeToDelete:=dsOldSubfiles(Str.ToLowerCase(name) != Str.ToLowerCase((indexfilename_notilde)));\\ndsOlder:=TOPN(dsSafeToDelete,1,name);\\n\\n//APPLY(dsSafeToDelete,fileservices.DeleteLogicalFile('~' + name)); //error: cause: [ -1: No access to Dali - this normally means a plugin call is being called from a thorslave\\n//NOTHOR(APPLY(dsSafeToDelete,fileservices.DeleteLogicalFile('~' + name))); //works\\n\\n//NOTHOR(APPLY(dsOlder,fileservices.DeleteLogicalFile('~' + name))); // fails with Warning: (0,0): error C4818: INTERNAL: Expected a parent/container context. Likely to be caused by executing something invalid inside a NOTHOR. (0, 0), 0, \\n\\nAPPLY(dsOlder,fileservices.DeleteLogicalFile('~' + name)); // //error: cause: [ -1: No access to Dali - this normally means a plugin call is being called from a thorslave\", \"post_time\": \"2016-03-28 14:29:42\" },\n\t{ \"post_id\": 9420, \"topic_id\": 2220, \"forum_id\": 8, \"post_subject\": \"Re: Question about recordset filtering example\", \"username\": \"rtaylor\", \"post_text\": \"sukhong,The filter condition is referring to dataset X within recordset Y.\\nHow can this be possible without a explicit parent-child relationship between X and Y?
There is no parent-child relationship here -- these are simply recordset definitions that build on previous recordset definitions.\\n\\nZ := Y(X.id != 100);
The purpose of the X in this code is simply to qualify which dataset the field originated from. \\n\\nThat code could just as correctly be coded like this:X := DATASET(...);\\nY := X(id != 0);\\nZ := Y(id != 100);
which does exactly the same thing. The id field in this version is implicitly assumed to be from the recordset being filtered. Full qualification of the id field, in this instance, is OK but unnecessary.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-03-29 20:28:58\" },\n\t{ \"post_id\": 9418, \"topic_id\": 2220, \"forum_id\": 8, \"post_subject\": \"Re: Question about recordset filtering example\", \"username\": \"sukhong\", \"post_text\": \"\\nZ := Y(X.id != 100);\\n
\\nThe filter condition is referring to dataset X within recordset Y.\\n\\nHow can this be possible without a explicit parent-child relationship between X and Y?\\n\\nDoes ecl compiler implicitly "deduces" a parent-child relationship for all such filtering conditions (i.e. Y was derived from X using some filter conditions)?\", \"post_time\": \"2016-03-29 19:23:43\" },\n\t{ \"post_id\": 9416, \"topic_id\": 2220, \"forum_id\": 8, \"post_subject\": \"Re: Question about recordset filtering example\", \"username\": \"rtaylor\", \"post_text\": \"sukhong,\\n\\nThis code:X := DATASET(...);
declares a DATASET that will be referred to as "X"\\n\\nThis code:Y := X(X.id != 0);
defines a recordset (referred to as "Y") that applies a filter to the X dataset such that Y will contain only those records for which the filter condition is true.\\n\\nAnd this code:Z := Y(X.id != 100);
defines a recordset (referred to as "Z") that applies a further filter to the Y dataset such that Z will contain only those records for which the filter condition is true.\\n\\nSo, bottom line: X is all the records in the file on disk. Y is a subset of those X records where the id field is not equal to 0, and Z is a subset of those Y records (the non-zero records) where the id field is not equal to 100.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-03-29 19:04:33\" },\n\t{ \"post_id\": 9414, \"topic_id\": 2220, \"forum_id\": 8, \"post_subject\": \"Question about recordset filtering example\", \"username\": \"sukhong\", \"post_text\": \"Following ECL code is legal (taken from eclcc documentation):\\n\\nX := DATASET(...);\\nY := X(X.id != 0);\\nZ := Y(X.id != 100);\\n\\nWhat exactly is the meaning of Z := Y(X.id != 100); ?\\n\\nWe are referring to dataset X's fields within dataset Y, and it doesn't seem to make sense unless it implicitly creates parent-child relationship when recordset Y was created by filtering dataset X.\\n\\nI've tried to look up the syntax and semantics of above queries in ECLLanguageReference Document, but the only section I found close was "Implicit Dataset Relationality" (pg. 83) section. \\n\\nThis section doesn't mention anything about creating relationship implicitly using recordset filtering.\\n\\nMy questions are:\\n\\n1. Does recordset filtering implicitly creates a parent-child relationships between two (source and target) data/recordsets? \\n\\n2. When does eclcc implicitly creates relationships between fields? Is there any "rules" or "documentations" that I can refer to?\\n\\nThank you!\", \"post_time\": \"2016-03-29 18:22:50\" },\n\t{ \"post_id\": 9434, \"topic_id\": 2222, \"forum_id\": 8, \"post_subject\": \"Re: passing a dataset to wsWorkunit/WURun as a stored variab\", \"username\": \"rtaylor\", \"post_text\": \"Drea,\\n\\nI took your code (without #STORED) and published it to my hthor as testdatasetpassing (on my training cluster: http://10.173.248.1:8002/ then I tested it by adding a single record. It ran successfully, so I then changed "Output Tables" to "Output XML" in the dropllist, hit submit, and here's the XML I got:− <testdatasetpassingResponse xmlns="urn:hpccsystems:ecl:testdatasetpassing">\\n− <Result>\\n− <Dataset name="Result 1">\\n− <Row>\\n<dali>10.173.48.1</dali>\\n</Row>\\n</Dataset>\\n</Result>\\n</testdatasetpassingResponse>
\\nAnd here's the Sample SOAP Request for that query:− <testdatasetpassingRequest xmlns="urn:hpccsystems:ecl:testdatasetpassing">\\n− <roxies>\\n− <Row>\\n<dali>String</dali>\\n</Row>\\n− <Row>\\n<dali>String</dali>\\n</Row>\\n</roxies>\\n</testdatasetpassingRequest>
\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-03-30 18:09:17\" },\n\t{ \"post_id\": 9432, \"topic_id\": 2222, \"forum_id\": 8, \"post_subject\": \"Re: passing a dataset to wsWorkunit/WURun as a stored variab\", \"username\": \"drealeed\", \"post_text\": \"Richard,\\n\\nThanks.\\n\\nSetting #STORED in the ecl works; what I need to be able to do is to pass a dataset in as a stored variable via the soap interface, and that's where I'm having trouble figuring out the structure of what to pass in soap.\\n\\n\\nDrea\", \"post_time\": \"2016-03-30 17:52:04\" },\n\t{ \"post_id\": 9430, \"topic_id\": 2222, \"forum_id\": 8, \"post_subject\": \"Re: passing a dataset to wsWorkunit/WURun as a stored variab\", \"username\": \"rtaylor\", \"post_text\": \"Drea,\\n\\nI made your example code work with #STORED like this:l_roxie:={ STRING dali};\\nDATASET(l_roxie) roxies:=DATASET([],l_roxie):STORED('roxies');\\n\\n#STORED('roxies',DATASET([{'10.173.147.1'}],l_roxie));\\n\\noutput(roxies);
Maybe this will give you an idea.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-03-30 17:46:31\" },\n\t{ \"post_id\": 9422, \"topic_id\": 2222, \"forum_id\": 8, \"post_subject\": \"passing a dataset to wsWorkunit/WURun as a stored variable?\", \"username\": \"drealeed\", \"post_text\": \"I have some ecl to be compiled into a workunit and run. I want to be able to pass a dataset in to the workunit as a stored variable and have it run. I've defined service inputs as datasets, but am having trouble with passing the same information in via SOAP to the WURun service.\\n\\nHere's the ecl:\\nl_roxie:={ STRING dali};\\nDATASET(l_roxie) roxies:=DATASET([],l_roxie):STORED('roxies');\\noutput(roxies);
\\n\\nI run this workunit via ECL IDE and get no output, as expected.\\n\\nThen I take the workunit ID and rerun it with WURun, providing the xml input to a dataset service input. But this doesn't work. What should I pass in for the dataset info? I've tried xml escaping the xml inside <Value> but I still get no output.\\n\\n\\n<?xml version="1.0" encoding="utf-8"?>\\n<soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/" xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/" xmlns="urn:hpccsystems:ws:wsworkunits">\\n <soap:Body>\\n <WURunRequest>\\n <Wuid>W20160329-180243</Wuid>\\n <CloneWorkunit>1</CloneWorkunit>\\n <Variables>\\n <NamedValue>\\n <Name>roxies</Name>\\n <Value><roxies><Row><dali>10.173.147.1</dali></Row></roxies></Value>\\n </NamedValue>\\n </Variables>\\n <ExceptionSeverity>info</ExceptionSeverity>\\n </WURunRequest>\\n </soap:Body>\\n</soap:Envelope>
\", \"post_time\": \"2016-03-29 22:10:29\" },\n\t{ \"post_id\": 9484, \"topic_id\": 2234, \"forum_id\": 8, \"post_subject\": \"Re: Extracting all leaf nodes using xPath\", \"username\": \"Allan\", \"post_text\": \"Ok, this can be done but its not pretty.\\nUse a mixture of SplitWords and REGEXFIND.\\nIn the code below the outer TRANSFORM splits the XML at the start of TAG, producing the following dataset:\\n\\nBSB>\\nNottop>\\ndownAgan>\\nKK>123\\n/KK>\\nD4>K8765\\n/D4>\\nAU>765\\n/AU>\\n...\\n
\\nNote only the leaf nodes have text following the '>'.\\nWe can then filter for that condition producing:\\n\\nKK>123\\nD4>K8765\\nAU>765\\nJK>654\\nIUUT>B4536\\nYetAnother>SomeValue\\nTTR3>moredata\\n...\\n
\\n\\nThe inner transform then just splits the components into tag and its content. The entire code being:\\n\\nxm := DATASET([{'<BSB>'\\n+'<Nottop>'\\n+'<downAgan>'\\n+'<KK>123</KK>'\\n+'<D4>K8765</D4>'\\n+'<AU>765</AU>'\\n+'</downAgan>'\\n+'</Nottop>'\\n+'<Another>'\\n+'<JK>654</JK>'\\n+'<IUUT>B4536</IUUT>'\\n+'</Another>'\\n+'<YetAnother>SomeValue</YetAnother>'\\n+'<EvenMore>'\\n+'<down>'\\n+'<down2>'\\n+'<down3>'\\n+'<TTR3>moredata</TTR3>'\\n+'<TTR4>moredata</TTR4>'\\n+'<TTR5>moredata</TTR5>'\\n+'<TWQ55>moredata</TWQ55>'\\n+'<QQW4W>moredata</QQW4W>'\\n+'</down3>'\\n+'</down2>'\\n+'</down>'\\n+'</EvenMore>'\\n+'</BSB>'}],{STRING txt});\\n\\nRBSB := RECORD\\n STRING id;\\n STRING val;\\nEND;\\n\\nR5 := RECORD\\n DATASET(RBSB) BSB;\\nEND;\\n\\nR5 doit(RECORDOF(xm) L) := TRANSFORM\\n\\n d := DATASET(STD.Str.SplitWords(REGEXFIND('<BSB>(.+?)</BSB>',L.txt,1),'<'),{STRING line});\\n \\n RBSB MakeFields(RECORDOF(d) K) := TRANSFORM\\n itm := STD.Str.SplitWords(K.line,'>');\\n SELF.id := itm[1];\\n SELF.val := itm[2];\\n END;\\n\\n SELF.BSB := PROJECT(d(REGEXFIND('.*>.+',line)),MakeFields(LEFT)); // Only leaf nodes have any text after the ‘>’\\n\\nEND;\\nPROJECT(xm,doit(LEFT));\\n
\\nYours\\nAllan\", \"post_time\": \"2016-04-06 15:26:17\" },\n\t{ \"post_id\": 9474, \"topic_id\": 2234, \"forum_id\": 8, \"post_subject\": \"Re: Extracting all leaf nodes using xPath\", \"username\": \"Allan\", \"post_text\": \"Some dummy XML displaying the type of structure we're given.\\n\\n<BSB>\\n<Nottop>\\n<downAgan>\\n <KK>123</KK>\\n <D4>K8765</D4>\\n <AU>765</AU>\\n </downAgan>\\n </Nottop>\\n<Another>\\n <JK>654</JK>\\n <IUUT>B4536</IUUT>\\n </Another>\\n <YetAnother>SomeValue</YetAnother>\\n<EvenMore>\\n<down>\\n<down2>\\n<down3>\\n <TTR3>moredata</TTR3>\\n <TTR4>moredata</TTR4>\\n <TTR5>moredata</TTR5>\\n <TWQ55>moredata</TWQ55>\\n <QQW4W>moredata</QQW4W>\\n </down3>\\n </down2>\\n </down>\\n </EvenMore>\\n </BSB>\\n
\\nIt may be helpful to add that the keys, for one document, are unique.\\nYours\\nAllan\", \"post_time\": \"2016-04-04 19:22:43\" },\n\t{ \"post_id\": 9472, \"topic_id\": 2234, \"forum_id\": 8, \"post_subject\": \"Re: Extracting all leaf nodes using xPath\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nAny chance of seeing an example of the XML data you're needing to parse?\\n\\nRichard\", \"post_time\": \"2016-04-04 17:11:19\" },\n\t{ \"post_id\": 9468, \"topic_id\": 2234, \"forum_id\": 8, \"post_subject\": \"Extracting all leaf nodes using xPath\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI have an XML construct where various nodes contain hundreds of key value pairs, the only distinguishing factor for these pairs is that they are leaf nodes (no children).\\n\\nI need to extract both the tag name (the key) and its value, I've tried:\\n\\nSELF.bsb := XMLPROJECT('BSB/*[child="0"]'\\n ,TRANSFORM(RBSB\\n ,SELF.id := ''; // <== Not sure how to get name of tag\\n SELF.val := XMLTEXT('');\\n )\\n );\\n
\\nWhich seemed the obvious choice but no luck.\\n\\nAny ideas?\\nYours\\n\\nAllan\", \"post_time\": \"2016-04-04 16:08:17\" },\n\t{ \"post_id\": 9480, \"topic_id\": 2236, \"forum_id\": 8, \"post_subject\": \"Re: JoinCondition Options\", \"username\": \"rtaylor\", \"post_text\": \"househippo,\\n\\nThe JOIN condition can use "fuzzy logic" but if the condition contains no equality portion (i.e. "non-fuzzy" logic) then you get the "join too complex" error from your syntax check, which can be overcome with the ALL option, like this:\\nbigJoin := JOIN(set_one,set_two, \\n ABS(LEFT.Value1 - RIGHT.Value1) <= 250, \\n JoinThem(LEFT,RIGHT),ALL);
Use that JOIN In your example and you will get the one match you wanted.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-04-05 14:51:28\" },\n\t{ \"post_id\": 9476, \"topic_id\": 2236, \"forum_id\": 8, \"post_subject\": \"JoinCondition Options\", \"username\": \"househippo\", \"post_text\": \"I'm wanting to join two datasets but when the join evaluation is not 100% exact.\\n\\nExample when Left side is 1000 , and right side is 1250.\\n\\nI want to join on a Range when LEFT side is 1000 when right side is between 1000 AND (1000 + 500),\\nThis way I could have a 1(Left) to many(Right) relationship.\\n\\nschema1 := RECORD\\n STRING20 name;\\n INTEGER4 Value1;\\nEND;\\nset_one := DATASET([{'bob',1000},{'kevin',2000}],schema1);\\n\\nschema2 := RECORD\\n STRING20 city;\\n INTEGER4 Value1;\\nEND;\\n\\nset_two := DATASET([{'Great Falls',1250},{'Hickory Oak',5000}],schema2);\\n\\nMyOutRec := RECORD\\n\\tINTEGER4 LeftValue2;\\n\\tINTEGER4 RightValue2;\\n STRING10 LeftName;\\n STRING10 RightCity; \\nEND;\\n\\nMyOutRec JoinThem(schema1 L, schema2 R) := TRANSFORM\\n SELF.LeftName := L.name;\\n SELF.RightCity := R.city;\\n\\tSELF.LeftValue2 := L.Value1;\\n\\tSELF.RightValue2 := R.Value1;\\nEND;\\n\\nbigJoin := JOIN(set_one,set_two, LEFT.Value1 = RIGHT.Value1, JoinThem(LEFT,RIGHT));\\nbigJoin;
\", \"post_time\": \"2016-04-05 07:46:01\" },\n\t{ \"post_id\": 9510, \"topic_id\": 2242, \"forum_id\": 8, \"post_subject\": \"Re: MACRO and #EXPAND\", \"username\": \"rtaylor\", \"post_text\": \"Matt,\\n\\nYour original code would not work, because you were referencing "RptData" (which I renamed as RptDataset) in the IMPORT_Text string, which would have resulted in a syntax error once #EXPAND did its thing, since RptData is not the name of the passsed dataset.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-04-11 17:57:21\" },\n\t{ \"post_id\": 9508, \"topic_id\": 2242, \"forum_id\": 8, \"post_subject\": \"Re: MACRO and #EXPAND\", \"username\": \"mrumsey\", \"post_text\": \"Thanks Richard! \\n\\nI'll have to wait for my environment to come back up to test it.\\n\\nShould my original code work or is this a case where I am using #EXPAND outside of its intended use?\", \"post_time\": \"2016-04-11 17:54:08\" },\n\t{ \"post_id\": 9506, \"topic_id\": 2242, \"forum_id\": 8, \"post_subject\": \"Re: MACRO and #EXPAND\", \"username\": \"rtaylor\", \"post_text\": \"Matt,\\n\\nThis version works for me:CannedRpt(RptName, RptDataset, RptGroup1, RptGroup2,RecStructAdd) := MACRO\\n\\tGrp1 := TABLE(RptDataset,{RptGroup1,GrpCnt := COUNT(GROUP)},RptGroup1); \\n\\tRptName := TABLE(RptDataset,\\n\\t {STRING50 Att_Group := (STRING)RptGroup2,\\n #EXPAND(RecStructAdd)}, \\n RptGroup2, SKEW(1));\\nENDMACRO;\\n\\nIMPORT_Text := 'REAL4 Six_Mo := COUNT(GROUP, Profiling_Att = \\\\'6\\\\') / ' +\\n 'Grp1(Profiling_Att=\\\\'6\\\\')[1].GrpCnt,' + \\n 'REAL8 Twelve_Mo := COUNT(GROUP, Profiling_Att = \\\\'12\\\\') / ' +\\n 'Grp1(Profiling_Att=\\\\'12\\\\')[1].GrpCnt';\\n\\nProfile_Data := DATASET([{1,'6'},{2,'6'},{2,'12'},{1,'12'},{1,'6'}],{INTEGER MRI_Std,STRING2 Profiling_Att});\\n\\nCannedRpt(RptByMRIStd, Profile_Data, Profiling_Att, MRI_Std,IMPORT_Text);\\nRptByMRIStd;
The key here is to calculate the divisor within the MACRO so you don't have the problem of replacing the name of the dataset in the RecStruct string that should be passed to the MACRO and not simply referenced from within it.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-04-11 17:46:39\" },\n\t{ \"post_id\": 9504, \"topic_id\": 2242, \"forum_id\": 8, \"post_subject\": \"MACRO and #EXPAND\", \"username\": \"mrumsey\", \"post_text\": \"Hello again,\\n\\nI am attempting to automate a report through HPCC. Given our current visualization processes, I need to do manual formatting to get the table output to be unstacked instead of stacked. \\n\\nIt seems as though if I were to able to pass a STRING into a MACRO with the column definitions I want, I would be able to #EXPAND them within the MACRO call and get the appropriate number of columns for my desired output. \\n\\nUnfortunately, it seems that the #EXPAND command is evaluated before the MACRO call (at compile time I'd guess) and does not contain the reference of the MACRO. Below is an example of the IMPORT_Text string (column definitions), the MACRO, and the MACRO call.\\n\\nIMPORT_Text Example:\\nIMPORT_Text\\t:= 'REAL4 Six_Mo := (Count(Group, Profiling_Att = \\\\'6\\\\')/Count(RptData(Profiling_Att=\\\\'6\\\\'))), REAL8\\tTwelve_Mo := (Count(Group, Profiling_Att = \\\\'12\\\\')/Count(RptData(Profiling_Att=\\\\'12\\\\')))';
\\nThis current version has 2 attributes, Six_mo and Twelve_mo. They are simple distributions for this example (% of group/group total). I am using REAL4/REAL8 for troubleshooting.\\n\\nMACRO:\\nCannedRpt(Rpt, RptData, RptGroup1, RptGroup2) := MACRO\\n\\tRpt := TABLE(RptData, \\n {\\n\\t\\t\\t\\t\\t\\t\\t\\tSTRING50\\t\\tAtt_Group := (STRING)RptGroup2,\\n\\t\\t\\t\\t\\t\\t\\t\\t#EXPAND(IMPORT_Text),\\n\\t\\t\\t\\t\\t\\t\\t }/*End Record Structure*/, \\n\\t\\t\\t\\t\\t\\t\\t\\tRptGroup2/*Group By*/, SKEW(1));\\nENDMACRO;
\\nRpt is the name of the RETURN equivalent (a table in this case).\\nRptData is the Input Dataset\\nRptGroup1 may be unnecessary as I am explicitly calling the attribute in this version of the MACRO (Profiling_Att is explicitly called in the IMPORT_Text string. RptGroup1 could be used here instead).\\nRptGroup2 is the Attribute on which to create a distribution table.\\n\\nMACRO call:\\nCannedRpt(RptByMRIStd, Profile_Data, Profiling_Att, MRI_Std);
\\n\\nAlso, I have tried passing the string through the MACRO call or calling directly (as I am in this iteration) and both return the same error:\\n\\nUnknown Identifier "RptData"\\n\\nThis seems to happen only if I use RptData in the #EXPAND call, not if I use other, static values on #EXPAND (i.e. 'REAL4 ThisAtt := 4.555632' works perfectly);\\n\\nI have also tried using:\\nLOCAL #EXPAND(IMPORT_Text)
\\nThis was to try and force the compiler to evaluate the #EXPAND statement inside the scope of the MACRO. It gives me an error that I need '(' near 'REAL4'. This shouldn't be necessary as the code works perfectly when typed explicitly (no #EXPAND) in the table. The only differences are the escape characters and the outside (').\\n\\nIf anyone has any ideas, let me know!\\n\\nThanks,\\n\\nMatt Rumsey\", \"post_time\": \"2016-04-11 14:47:54\" },\n\t{ \"post_id\": 9528, \"topic_id\": 2250, \"forum_id\": 8, \"post_subject\": \"Re: Join two datasets based on Row Position\", \"username\": \"David Dasher\", \"post_text\": \"Hi Richard\\n\\nThanks, I'll take a look first thing tomorrow and let you know how I get on. \\n\\nTake care\\n\\nDavid\", \"post_time\": \"2016-04-12 20:46:09\" },\n\t{ \"post_id\": 9526, \"topic_id\": 2250, \"forum_id\": 8, \"post_subject\": \"Re: Join two datasets based on Row Position\", \"username\": \"rtaylor\", \"post_text\": \"David,\\n\\nThere's another way that doesn't require adding that extra row number field. It also doesn't require a JOIN, just a PROJECT, like this:\\nFile1 := DATASET([{'A'},{'B'},{'C'},{'D'},{'E'},\\n {'F'},{'G'},{'H'},{'I'},{'J'},\\n {'K'},{'L'},{'M'} ,{'N'},{'O'},\\n {'P'},{'Q'},{'R'},{'S'},{'T'},\\n {'U'},{'V'},{'W'},{'X'},{'Y'}],\\n {STRING1 Letter});\\n\\nFile2 := DATASET([{'P'},{'Q'},{'R'},{'S'},{'T'},\\n {'F'},{'G'},{'H'},{'I'},{'J'},\\n {'A'},{'B'},{'C'},{'D'},{'E'},\\n {'K'},{'L'},{'M'} ,{'N'},{'O'},\\n {'U'},{'V'},{'W'},{'X'},{'Y'}],\\n {STRING1 Letter});\\n\\nOutRec := RECORD\\n\\tSTRING1 LeftLetter;\\n\\tSTRING1 RightLetter;\\n\\tSTRING2 CatLetter;\\nEND;\\n\\nOutrec XF(File1 L, INTEGER C) := TRANSFORM\\n R := File2[C];\\n SELF.LeftLetter := L.Letter;\\n SELF.RightLetter := R.Letter;\\n SELF.CatLetter := L.Letter + R.Letter;\\nEND;\\n\\nPROJECT(File1,XF(LEFT,COUNTER));
\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-04-12 20:41:54\" },\n\t{ \"post_id\": 9520, \"topic_id\": 2250, \"forum_id\": 8, \"post_subject\": \"Re: Join two datasets based on Row Position\", \"username\": \"rtaylor\", \"post_text\": \"David,Is it possible to do a join based on the row position?
Sure. You just need to add a row number field to each dataset then do your JOIN on that field.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-04-12 14:37:07\" },\n\t{ \"post_id\": 9518, \"topic_id\": 2250, \"forum_id\": 8, \"post_subject\": \"Join two datasets based on Row Position\", \"username\": \"David Dasher\", \"post_text\": \"Hello all\\n\\nI'm doing some sales analysis for a customer based on a date range grouped by week, over say a two month period. We now have to compare the previous years data, however, as the key data is different (Not only the year but the day i.e. comparing Saturday to Saturday of the previous year). \\n\\nIs it possible to do a join based on the row position? Each of my datasets is sorted correctly and theoretically everything would match up.\\n\\nMany thanks\\n\\nDavid\", \"post_time\": \"2016-04-12 12:02:39\" },\n\t{ \"post_id\": 9562, \"topic_id\": 2256, \"forum_id\": 8, \"post_subject\": \"Re: Retrieve data from a publshed service\", \"username\": \"vyasshub\", \"post_text\": \"@gsmith: Tried ,no change \", \"post_time\": \"2016-04-21 13:28:41\" },\n\t{ \"post_id\": 9560, \"topic_id\": 2256, \"forum_id\": 8, \"post_subject\": \"Re: Retrieve data from a publshed service\", \"username\": \"gsmith\", \"post_text\": \"Try adding
_rawxml=true
to your request.\", \"post_time\": \"2016-04-21 10:41:45\" },\n\t{ \"post_id\": 9558, \"topic_id\": 2256, \"forum_id\": 8, \"post_subject\": \"Re: Retrieve data from a publshed service\", \"username\": \"vyasshub\", \"post_text\": \"@gsmith:Thanks for your reply, i tried out the above solution, but its not working for me.Following are the issue\\n(1)INPUT Issue:I have very large xml input, unable to pass it to the above mentioned query.Also i tried with small xml input by publishing a simple service to join fname and lname.So the query i use is as follows:\\nwget http://**.**.**.**:8002/WsEcl/example/r ... lname=Vyas\\n\\nThe expected output is "ShubVyas", but all i am getting is the a file with name "Shub" in it ,lname parameter is missing.PFB the snapshot of output\\n[attachment=0:1skyobzq]13.PNG\\n\\n\\n(2)WORKUNIT Issue : It creates a new WorkUnit every time I execute it.I want to use the same workunit through which i published service,just needs to save the output of service.\\nFor Example:[attachment=2:1skyobzq]11.png\\n\\nNow when i click on submit after passing fname=Shub and lname=Vyas, i get following\\n[attachment=1:1skyobzq]12.PNG\\n\\nI just need to save ShubVyas to a text/xml file.\\nLet me know your thoughts on this.\\nThanks in advance\", \"post_time\": \"2016-04-21 10:32:58\" },\n\t{ \"post_id\": 9546, \"topic_id\": 2256, \"forum_id\": 8, \"post_subject\": \"Re: Retrieve data from a publshed service\", \"username\": \"gsmith\", \"post_text\": \"If you click the "links" tab on the test page, you will get a bunch of example on how to submit your request.\\n\\nIn your case a wget from the command line will fetch the response as either JSON or XML depending on which URL you use. In my trivial example it would look like this:\\n\\nwget http://x.x.x.x:8002/WsEcl/submit/query/thor/def/xml?param1=xxx¶m2=yyy\\nwget http://x.x.x.x:8002/WsEcl/submit/query/thor/def/json?param1=xxx¶m2=yyy\\n
\\n\\nIn your case "thor" would be "roxie" and "def" would be the name of your query...\\n\\nAlso you would need to pass the authentication via command line to wget.\", \"post_time\": \"2016-04-18 10:55:10\" },\n\t{ \"post_id\": 9544, \"topic_id\": 2256, \"forum_id\": 8, \"post_subject\": \"Re: Retrieve data from a publshed service\", \"username\": \"vyasshub\", \"post_text\": \"@tlhumphrey2: Thanks for your reply, but I am looking to save the output in a text file/xml file that we get after we give input in the FORM and then click on submit.Let me know your thoughts on that.\", \"post_time\": \"2016-04-18 05:58:46\" },\n\t{ \"post_id\": 9542, \"topic_id\": 2256, \"forum_id\": 8, \"post_subject\": \"Re: Retrieve data from a publshed service\", \"username\": \"tlhumphrey2\", \"post_text\": \"You can use WsECL. In your browser address box, enter your ESP's IP (same IP used to get ECL Watch) followed by colon and port 8002. For example: 54.23.45.102:8002. In the left margin you will see a list of targets, one of which will be your roxie. Click on it and you will see a list of roxie services (queries). Click on the one you want and a form shows up on the right.\", \"post_time\": \"2016-04-17 13:45:58\" },\n\t{ \"post_id\": 9540, \"topic_id\": 2256, \"forum_id\": 8, \"post_subject\": \"Re: Retrieve data from a publshed service\", \"username\": \"vyasshub\", \"post_text\": \"Any Update ?\", \"post_time\": \"2016-04-16 12:07:45\" },\n\t{ \"post_id\": 9530, \"topic_id\": 2256, \"forum_id\": 8, \"post_subject\": \"Retrieve data from a publshed service\", \"username\": \"vyasshub\", \"post_text\": \"Hi, \\nI want the response data from published service in xml file, is there any better way to do that apart from eclplus.I am using the following command in batch file:\\neclplus server=******** user=**** password=****** cluster=roxie_dev action=view wuid=W12345-12345 output=D:\\\\test.xml\\n\\nIn above command the wuid is for the published service.I am able to get some data in xml file, but most of the fields are empty specially the datasets which are nested.\\n\\nAny help is appreciated.\", \"post_time\": \"2016-04-13 14:11:23\" },\n\t{ \"post_id\": 9536, \"topic_id\": 2260, \"forum_id\": 8, \"post_subject\": \"Re: outputs not being cleared on WURun rerun of workunit?\", \"username\": \"rtaylor\", \"post_text\": \"Drea,\\n\\nIMO -- not a bug, yes it's because of the EXTEND, and try losing the EXTEND and see if it works the way you'd expect then.\\n\\n\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-04-14 21:11:49\" },\n\t{ \"post_id\": 9534, \"topic_id\": 2260, \"forum_id\": 8, \"post_subject\": \"outputs not being cleared on WURun rerun of workunit?\", \"username\": \"drealeed\", \"post_text\": \"I have a workunit that calls\\n\\ndsstats:=NOTHOR(FileServices.LogicalFileList(dsfilenotilde));\\nOUTPUT(dsstats,NAMED('UseDataset_files'),EXTEND);\\n\\nAnd it outputs a single record to a named output.\\n\\nIf I programatically clone that workunit and rerun it (using WURun + clone), the UseDataset_files output has another record of the same file added rather than the output being overwritten. is this a bug? Is it due to EXTEND? Is there a way to work around it?\", \"post_time\": \"2016-04-14 21:08:13\" },\n\t{ \"post_id\": 9746, \"topic_id\": 2280, \"forum_id\": 8, \"post_subject\": \"Re: Resolution for: Jbuff: Out of Memory Error.\", \"username\": \"ghalliday\", \"post_text\": \"The normal cause is when you have some code\\n\\n\\nmyDatasets(someField IN SET(anotherDataset, fieldX))\\n\\nwhere anotherDataset is a large dataset. Really you should be doing a join instead.\\n\\nSee https://track.hpccsystems.com/browse/HPCC-15575 for more details.\", \"post_time\": \"2016-06-13 17:05:05\" },\n\t{ \"post_id\": 9610, \"topic_id\": 2280, \"forum_id\": 8, \"post_subject\": \"Resolution for: Jbuff: Out of Memory Error.\", \"username\": \"Rahul Jain\", \"post_text\": \"Hi Team,\\n\\nI would like to get below information by end of post:\\n1. What is Jbuff Error?\\n2. Why do we have it? If there is straight known cause well and good. If not \\n3. How can we get rid of this error? What possible approach will be required to dig in and resolve error?\\n\\nHere is Summary:\\n\\nWe are scrubbing a file sized 12GB. Initially we had error - "DATASET too Large".\\nTo overcome this issue we increased output limit to 2000(Maximum allowed), using \\n
#OPTION('outputLimit',2000);
\\nWe broke 1 wall but encountered below error: \\nError: System error: -7: Graph[183], workunitwrite[185]: SDS: INTERNAL ERROR\\nSDS Reply Error : Jbuff: Out of Memory (2147483648,8), Master exception (0, 0), -7,\\n\\n\\nAs per graph (attached image) it seems HPCC/ECL itself is trying to spill to disk rather we doing it manually. \\n\\nPlease look for my email, for more information on the same.\\n\\nThanks,\\nRahul\", \"post_time\": \"2016-05-10 07:44:12\" },\n\t{ \"post_id\": 9638, \"topic_id\": 2284, \"forum_id\": 8, \"post_subject\": \"Re: RECORD - type stored variable?\", \"username\": \"drealeed\", \"post_text\": \"Richard,\\n\\nThanks for the response. I'll look into the xml option.\", \"post_time\": \"2016-05-12 16:40:33\" },\n\t{ \"post_id\": 9636, \"topic_id\": 2284, \"forum_id\": 8, \"post_subject\": \"Re: RECORD - type stored variable?\", \"username\": \"rtaylor\", \"post_text\": \"Drea,Is it possible to define a RECORD as a stored variable?
I wouldn't think so. I have an ecl workunit that I want to be able to pass a record structure and filename into as stored variables, and have it read and output that file.\\n
If you pass the RECORD structure in as an XML string, you may be able to use Template language to construct a RECORD structure. \\n\\nOR, if the filename you mentioned is in fact the name of a DATASET declaration, then you can just use the RECORDOF() function.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-05-12 16:37:48\" },\n\t{ \"post_id\": 9634, \"topic_id\": 2284, \"forum_id\": 8, \"post_subject\": \"RECORD - type stored variable?\", \"username\": \"drealeed\", \"post_text\": \"I have an ecl workunit that I want to be able to pass a record structure and filename into as stored variables, and have it read and output that file.\\n\\nThe code works fine if I have a standard record layout; but as soon as I put the :STORED('RecordStructure') onto the end of the record attribute definition, running the workunit creates the following error:\\n\\nCannot return a result of this type from a workunit\\n\\nIs it possible to define a RECORD as a stored variable?\\n\\nDrea\", \"post_time\": \"2016-05-12 16:14:39\" },\n\t{ \"post_id\": 9642, \"topic_id\": 2286, \"forum_id\": 8, \"post_subject\": \"Re: Different Results for WUQueryDetails and WUListQueries\", \"username\": \"rtaylor\", \"post_text\": \"Drea,\\n\\nLooks like an imminent JIRA to me. \\n\\nRichard\", \"post_time\": \"2016-05-17 21:08:43\" },\n\t{ \"post_id\": 9640, \"topic_id\": 2286, \"forum_id\": 8, \"post_subject\": \"Different Results for WUQueryDetails and WUListQueries\", \"username\": \"drealeed\", \"post_text\": \"When I search for a roxie query using WUQueryDetails and passing in a queryset and query id, I get a result.\\n\\nWhen I pass in the same query id and queryset into WUListQueries, nothing returns.\\n\\nThis is on the http://10.173.147.1:8010 cluster (version HPCC 5.6)\\n\\nWorking WUQueryDetails soapcall:\\n
<?xml version="1.0" encoding="UTF-8"?>\\n<soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/" xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/" xmlns="urn:hpccsystems:ws:wsworkunits">\\n <soap:Body>\\n <WUQueryDetailsRequest>\\n <QueryId>lrenn_delete.ins002_service_20160517_042800.1</QueryId>\\n <QuerySet>roxie</QuerySet>\\n <IncludeStateOnClusters>1</IncludeStateOnClusters>\\n <IncludeSuperFiles>1</IncludeSuperFiles>\\n </WUQueryDetailsRequest>\\n </soap:Body>\\n</soap:Envelope>\\n
\\n\\nFailing WUListQueries soapcall:\\n<?xml version="1.0" encoding="UTF-8"?>\\n<soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/" xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/" xmlns="urn:hpccsystems:ws:wsworkunits">\\n <soap:Body>\\n <WUListQueriesRequest>\\n <QuerySetName>roxie</QuerySetName>\\n <ClusterName>roxie</ClusterName>\\n <QueryID>lrenn_delete.ins002_service_20160517_042800.1</QueryID>\\n </WUListQueriesRequest>\\n </soap:Body>\\n</soap:Envelope>\\n
\\n\\nAm I doing something wrong, or is this a bug?\", \"post_time\": \"2016-05-17 20:46:39\" },\n\t{ \"post_id\": 9694, \"topic_id\": 2288, \"forum_id\": 8, \"post_subject\": \"Re: ECL Standalone compilation\", \"username\": \"rtaylor\", \"post_text\": \"Daniel,\\n\\nThe only way I can think of to make your standalone operate on that data would be to copy it to your local PC for the EXE to work with.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-06-02 12:27:54\" },\n\t{ \"post_id\": 9692, \"topic_id\": 2288, \"forum_id\": 8, \"post_subject\": \"Re: ECL Standalone compilation\", \"username\": \"dsanchez\", \"post_text\": \"Hi Richard,\\n\\nWe do have memory-eating-feet-chomping-chainsawy inline c++ code around using some spatial libraries that are also memory-eating monsters. We are already in contact with the core hpcc team (Gavin Halliday and Richard Chapman are being very useful) and we already have some preliminary debug steps and ideas so we are having more stability and less problems derived from the inline c++.\\n\\nHowever, I wanted to make sure that the eclcc compiling and valgrind execution are as close as possible to the actual execution on Roxie. Therefore I wanted to have the valgrind execution of the code to be able to read the file or at least get the data in some way.\\n\\nCheers!\\nDaniel\", \"post_time\": \"2016-06-02 08:31:05\" },\n\t{ \"post_id\": 9688, \"topic_id\": 2288, \"forum_id\": 8, \"post_subject\": \"Re: ECL Standalone compilation\", \"username\": \"rtaylor\", \"post_text\": \"Daniel,\\n\\nUnless your ECL code has some inline C++ that eats memory, the problem is not likely to be specifically in your code but in the Roxie infrastructure. You should submit a JIRA issue describing the problem and as many details as you can possibly include so the developers can help you. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-06-01 19:06:40\" },\n\t{ \"post_id\": 9676, \"topic_id\": 2288, \"forum_id\": 8, \"post_subject\": \"Re: ECL Standalone compilation\", \"username\": \"dsanchez\", \"post_text\": \"Hi Richard,\\n\\nWe are trying to recreate an actual query running in our Roxie in the executable. We are having some memory issues so we would like to pass the code through valgrind and get some feedback on how to solve this problems.\\n\\nIs there any alternative solution to how to have access to the data from a standalone exe?\\n\\nThanks!\", \"post_time\": \"2016-05-30 07:19:57\" },\n\t{ \"post_id\": 9672, \"topic_id\": 2288, \"forum_id\": 8, \"post_subject\": \"Re: ECL Standalone compilation\", \"username\": \"rtaylor\", \"post_text\": \"Daniel,Now the problem is that the executable created there doesn't have access to the files like it does when I submit the query to roxie.
Local compilation is there to create a standalone EXE to run on your PC against local data (on your PC). It's not meant for anything else. If you need the data on the Roxie then you should run the code on Thor or Roxie. \\n\\nSo what problem are you trying to solve with this local compile? What are you really trying to accomplish?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-05-28 12:03:29\" },\n\t{ \"post_id\": 9671, \"topic_id\": 2288, \"forum_id\": 8, \"post_subject\": \"Re: ECL Standalone compilation\", \"username\": \"dsanchez\", \"post_text\": \"Got it working finally! (yay)\\nNow the problem is that the executable created there doesn't have access to the files like it does when I submit the query to roxie.\\n\\nI am trying to do something like this:\\n\\nRTREE_IDX := INDEX(OrigDataset), {keys}, {payload}, '~mypath::myfile');\\n
\\nThen I compile the archive xml generated on submission with this:\\n\\neclcc -o output MyArchiveFile.xml\\n
\\nAnd then try to run output. This shows the error EXCEPTION: Could not resolve filename mypath::myfile (in Index Read X)
.\\n\\nIs there anyway to have this working or will I need to feed the data in some other way to have this working?\", \"post_time\": \"2016-05-26 15:30:08\" },\n\t{ \"post_id\": 9662, \"topic_id\": 2288, \"forum_id\": 8, \"post_subject\": \"Re: ECL Standalone compilation\", \"username\": \"dsanchez\", \"post_text\": \"Ok this is fixed, seems like it couldn't find the c++ compiler so installed the visio tools and it is working now. It is stopping later because is missing a library but I guess this is a whole different thing, I thought that the archive eclxml file had all the needed dependencies but it seems not for the external libraries.\", \"post_time\": \"2016-05-25 13:36:07\" },\n\t{ \"post_id\": 9650, \"topic_id\": 2288, \"forum_id\": 8, \"post_subject\": \"ECL Standalone compilation\", \"username\": \"dsanchez\", \"post_text\": \"Hi everyone,\\nI am trying to compile on my local machine an archive.eclxml file to run valgrind and check for memory leak problems (we seem to be having some occasional segfaults in our query).\\nWhen I try to run the command:\\neclcc -I <the dependencies location> --logfile compile.log -v -o output myarchivefile.archive.eclxml
\\nI get the error: "Fatal Error: Unable to locate C++ compiler/linker".\\n\\nIt looks like I am doing something wrong here but since the eclcc command is actually working and I can see in the log file that some work is done I don't really know whats the root of the problem.\\n\\nThanks,\\nDaniel.\", \"post_time\": \"2016-05-24 16:16:23\" },\n\t{ \"post_id\": 9660, \"topic_id\": 2290, \"forum_id\": 8, \"post_subject\": \"Re: Convert from hexadecimal to string and blog article\", \"username\": \"rtaylor\", \"post_text\": \"Ignacio,\\n\\nHere's my complete code for that function:EXPORT String2HexString(STRING DataIn) := FUNCTION\\n\\n\\tSTRING2 Str2Hex(STRING1 StrIn) := FUNCTION\\n\\t\\tSTRING1 HexVal(UNSIGNED1 val) := \\n\\t\\t\\t\\t\\t\\tCHOOSE(val,'1','2','3','4','5','6','7','8',\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t '9','A','B','C','D','E','F','0');\\n\\t\\tUNSIGNED1 Char1 := (>UNSIGNED1<)StrIn >> 4;\\n\\t\\tUNSIGNED1 Char2 := ((>UNSIGNED1<)StrIn & 00001111b);\\n\\t\\tRETURN HexVal(Char1) + HexVal(Char2);\\n\\tEND;\\n\\t\\n Rec := {STRING Hex{MAXLENGTH(1024)}}; \\n ds := DATASET(LENGTH(TRIM(DataIn)),\\n TRANSFORM(Rec,\\n SELF.Hex := Str2Hex(DataIn[COUNTER])));\\n HexOut := ROLLUP(ds,TRUE,TRANSFORM(OutRec,SELF.Hex := LEFT.Hex + RIGHT.Hex));\\n RETURN DATASET([{DataIn,HexOut[1].Hex}],\\n {STRING Txt{MAXLENGTH(1024)},\\n STRING Hex{MAXLENGTH(1024)}});\\nEND;
Which looks a whole lot like your reconstruction! \\n\\nAnd I have corrected the Blog post. Thanks for the heads-up!!\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-05-25 08:27:18\" },\n\t{ \"post_id\": 9658, \"topic_id\": 2290, \"forum_id\": 8, \"post_subject\": \"Convert from hexadecimal to string and blog article\", \"username\": \"Ignacio\", \"post_text\": \"I was looking for (and found) a way to convert a string of hexadecimal digits into the corresponding data object, that is, for each two hexa (base-16) characters convert them into one (base-256) character. I finally did that with the following function :\\nStd.Str.FromHexPairs. \\n\\nFrom standard library reference : \\n
FromHexPairs returns a data value with each byte created from a pair of hex digits.
\\nIn the process of trying to solve this I came across Richard Taylor's post on the HPCC blog about bit fiddling which had been very useful for me once that I had to work with this kind of operations. It also contains a function which I realized is used for a different purpose : \\n\\nConvert to Hexadecimal : display text and the corresponding Hexadecimal values side by side -> String2HexString\\n\\nEXPORT String2HexString(STRING DataIn) := FUNCTION\\n\\n STRING2 Str2Hex(STRING1 StrIn) := FUNCTION\\n STRING1 HexVal(UNSIGNED1 val) := \\n\\t\\t CHOOSE(val,'1','2','3','4','5','6','7','8',\\n\\t\\t\\t\\t '9','A','B','C','D','E','F','0');\\n UNSIGNED1 Char1 := (>UNSIGNED1> 4;\\n UNSIGNED1 Char2 := ((>UNSIGNED1
\\n\\nMy point here is that the code was cropped at some stage, and it is incomplete now. I wonder whether somebody could review and fix it. I think the original one should be something like this : \\n\\nEXPORT String2HexString(STRING DataIn) := FUNCTION\\n\\n STRING2 Str2Hex(STRING1 StrIn) := FUNCTION\\n STRING1 HexVal(UNSIGNED1 val) := \\n\\t\\t CHOOSE(val,'1','2','3','4','5','6','7','8',\\n\\t\\t\\t\\t '9','A','B','C','D','E','F','0');\\n UNSIGNED1 Char1 := (>UNSIGNED1<)StrIn >> 4;\\n UNSIGNED1 Char2 := ((>UNSIGNED1<)StrIn & 00001111b);\\n RETURN HexVal(Char1) + HexVal(Char2);\\n END;\\n\\t\\n Rec := {STRING Hex{MAXLENGTH(1024)}}; \\n ds := DATASET(LENGTH(TRIM(DataIn)),\\n TRANSFORM(Rec,\\n \\t SELF.Hex := Str2Hex(DataIn[COUNTER])));\\n HexOut := ROLLUP(ds,TRUE,TRANSFORM(OutRec,SELF.Hex := LEFT.Hex + RIGHT.Hex));\\n RETURN DATASET([{DataIn,HexOut[1].Hex}],\\n {STRING Txt{MAXLENGTH(1024)},\\n STRING Hex{MAXLENGTH(1024)}});\\nEND;
\", \"post_time\": \"2016-05-25 08:15:55\" },\n\t{ \"post_id\": 9667, \"topic_id\": 2291, \"forum_id\": 8, \"post_subject\": \"Re: Global side-effect ASSERT\", \"username\": \"james.wilson\", \"post_text\": \"Thanks for the quick response!\\n\\nNo, it's not a legacy system, there is however a lot of legacy code so I'm probably in that mindset. I'm not using WHEN, so that will be the problem. I've read the documentation on WHEN now, I'll have a play with it and if I'm still struggling I'll be back with another question.\", \"post_time\": \"2016-05-26 13:05:25\" },\n\t{ \"post_id\": 9665, \"topic_id\": 2291, \"forum_id\": 8, \"post_subject\": \"Re: Global side-effect ASSERT\", \"username\": \"rtaylor\", \"post_text\": \"James,\\n\\nIs this on a legacy system? \\n\\nASSERT is an action, and the change from legacy to OSS required that side-effect actions in a FUNCTION be called only from the WHEN function. Are you using WHEN?\\n\\nRichard\", \"post_time\": \"2016-05-26 12:50:10\" },\n\t{ \"post_id\": 9663, \"topic_id\": 2291, \"forum_id\": 8, \"post_subject\": \"Global side-effect ASSERT\", \"username\": \"james.wilson\", \"post_text\": \"I'm getting a warning from an ASSERT in a FUNCTION: Global side-effect ASSERT seems to be context dependent - it may not function as expected\\n\\nDoes anyone know what this means?\", \"post_time\": \"2016-05-26 12:28:19\" },\n\t{ \"post_id\": 9686, \"topic_id\": 2293, \"forum_id\": 8, \"post_subject\": \"Re: HPCC JDBC driver support\", \"username\": \"rodrigo.pastrana\", \"post_text\": \"Hello Ashoka, the JDBC driver supports "simple SQL SELECT or CALL syntax" \\nPlease take a look at the JDBC documentation for full grammar supported here: http://cdn.hpccsystems.com/install/jdbc/stable/HPCC_JDBC_Driver-0.3.0-1Beta.pdf\", \"post_time\": \"2016-06-01 18:46:17\" },\n\t{ \"post_id\": 9669, \"topic_id\": 2293, \"forum_id\": 8, \"post_subject\": \"HPCC JDBC driver support\", \"username\": \"vyasshub\", \"post_text\": \"Hi,\\n\\nDoes the HPCC JDBC driver support an equivalent of the LEN() function i.e. one which provides the length of a string. We’ve tried LEN and LENGTH but neither work\\n\\ne.g.\\n\\nselect LEN(firstName) from person;\\n\\nOn trying this, we get the following error\\n\\nException in thread "main" java.sql.SQLException: Audience: user Source: CSoapResponseBinding Message: 2016-05-26 09:43:04 GMT: Error while parsing: ANTLR Error 3 : \\n Near [Index: 2 (Start: 366182151-Stop: 366182156) ='LEN', type<81> Line: 1 LinePos:6]\\n : cannot match to any predicted input...\\n\\n\\n at org.hpccsystems.ws.client.extended.HPCCWsSQLClient.handleExceptions(HPCCWsSQLClient.java:472)\\n at org.hpccsystems.ws.client.extended.HPCCWsSQLClient.executeSQLFullResponse(HPCCWsSQLClient.java:353)\\n at org.hpccsystems.jdbcdriver.HPCCConnection.executeSQL(HPCCConnection.java:539)\", \"post_time\": \"2016-05-26 13:19:13\" },\n\t{ \"post_id\": 9696, \"topic_id\": 2300, \"forum_id\": 8, \"post_subject\": \"KEL runtime error\", \"username\": \"vin\", \"post_text\": \"A simple KEL file generates the following when executed on Thor cluster.\\nSystem error: 0: Graph[143], SLAVE #1 [10.10.1.57:20100]: Graph[143], indexwrite[147]: Key row too large to fit within a key node (uncompressed size=40457, variable=true, pos=0),
\\n\\nThe KEL file is;\\nPlay := ENTITY(FLAT(UID=playid, seq, playNum,\\n type, possession, time, quarter,\\n down, yardnet, yardline, scoring, ptsHome, ptsAway,\\n toHome, toAway, description));\\n/*\\nPlay := ENTITY(FLAT(UID=playid, type, time);\\n*/\\n\\nUSE NFL.plays(FLAT, Play);\\n\\n/* every way that I tried to execute the query below returns error:\\n * Key row too large to fit within a key node (uncompressed size=40457, variable=true, pos=0)\\n */\\n \\n//QUERY: A <= Play;\\n//QUERY: B <= Play(type='KICKOFF');\\nQUERY: C <= Play(time=15);\\n
\\n\\nRegarding the code that is commented out. Because of the "key row too large" in the error, I tried using an entity with fewer fields. Same error. I tried several query versions and all failed with the above error.\\n\\nLast note: I am able to run other KEL queries. In fact, I stripped out 3 other entiries and a few associations from the original KEL file before posting it above.\\n\\nThe ECL record corresponding to NFL.Plays is\\nexport PlayLayout := Record\\n\\tunsigned8 pid;\\n\\tunsigned1 seq;\\n\\tunsigned4 gameid;\\n\\tunsigned2 playid;\\n\\tstring playtype;\\n\\tstring1 possesion;\\n\\tunsigned2 time;\\n\\tunsigned1 quarter;\\n\\tunsigned1 down;\\n\\tunsigned1 yardline;\\n\\tboolean scoringplay;\\n\\tunsigned1 awayscore;\\n\\tunsigned1 homescore;\\n\\tunsigned1 awaytimeouts;\\n\\tunsigned1 hometimeouts;\\n\\tstring description;\\n end;\\n
\", \"post_time\": \"2016-06-02 16:50:03\" },\n\t{ \"post_id\": 9714, \"topic_id\": 2304, \"forum_id\": 8, \"post_subject\": \"Re: "Field cannot follow a variable length aggregate" error\", \"username\": \"rtaylor\", \"post_text\": \"Drea,\\n\\nThis code works:dsin := DATASET([\\n {'Boca','FL','33434','1','2'},\\n {'Boca','FL','33434','3','4'},\\n {'Boca','FL','33434','2','6'},\\n {'Boca','GA','33435','1','6'},\\n {'Boca','GA','33435','5','8'},\\n {'Boca','GA','33435','4','4'}\\n\\n],\\n {STRING city,STRING st,STRING zip,STRING clean_county,STRING clean_error});\\n\\nagg1:=TABLE(dsin,{ STRING100 agg_city:= city ,STRING100 agg_st:=st , STRING100 agg_zip:=zip ,\\nSTRING50 max_county :=MAX(GROUP,clean_county) , STRING50 min_error :=MIN(GROUP,clean_error) },city,st,zip );
It's just not happy with variable-length STRING on your output TABLE, since it's a group by. It wants defined lengths.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-06-08 15:42:14\" },\n\t{ \"post_id\": 9710, \"topic_id\": 2304, \"forum_id\": 8, \"post_subject\": \"Re: "Field cannot follow a variable length aggregate" error\", \"username\": \"bforeman\", \"post_text\": \"Hi Drea,\\n\\nDo you really want to extract the largest string, or just the length of the largest string? It might be better to do a cross-tab on the LENGTH, and that would give you the size of the STRING needed to bypass that compiler error.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2016-06-08 12:28:48\" },\n\t{ \"post_id\": 9708, \"topic_id\": 2304, \"forum_id\": 8, \"post_subject\": \""Field cannot follow a variable length aggregate" error\", \"username\": \"drealeed\", \"post_text\": \"I have a chunk of ecl code where a table is defined with min/max aggregates on string fields. \\n\\nWhen I try to validate this ecl, I get the error "Fields cannot follow a variable length aggregate in the field." Below is an example of the pared down ecl causing the error.\\n\\ndsin := DATASET('~qa::aggregate::aggregate::input',{STRING city,STRING st,STRING zip,STRING clean_county,STRING clean_error},THOR);\\n\\nagg1:=TABLE(dsin,{ STRING agg_city:= city ,STRING agg_st:=st , STRING agg_zip:=zip ,\\n STRING max_county :=MAX(GROUP,clean_county) , STRING min_error :=MIN(GROUP,clean_error) },city,st,zip );\\n
\\n\\nI have3 no control over the field types coming in; they'll remain STRING. Is there anything I can change to get this to compile?\", \"post_time\": \"2016-06-07 18:48:37\" },\n\t{ \"post_id\": 9804, \"topic_id\": 2312, \"forum_id\": 8, \"post_subject\": \"Re: Assigning a value to a range using a set\", \"username\": \"ghalliday\", \"post_text\": \"Another solution is to iterate through a dataset, and return a value from the first row that matches.\\n\\n\\nmapping := DATASET([{5,0},{10,1},{20,2},{50,3},{75,4}], { unsigned threshold, unsigned result{DEFAULT(99)} });\\ndoMapping(unsigned value) := mapping(value < threshold)[1].result;\\n\\ndoMapping(3);\\ndoMapping(8);\\ndoMapping(18);\\ndoMapping(99);\\n
\\n\\nThere are two ways of specifying the out-of range value. Either add a table entry that it is known will always be matched, or use DEFAULT() for the field in the record structure.\\n\\nThis works well for small datasets, but wouldn't work so well for large ones.\", \"post_time\": \"2016-06-17 08:21:01\" },\n\t{ \"post_id\": 9800, \"topic_id\": 2312, \"forum_id\": 8, \"post_subject\": \"Re: Assigning a value to a range using a set\", \"username\": \"james.wilson\", \"post_text\": \"Tony Kirk came up with a pure ECL solution for Thor by taking a step back and looking at datasets and what Thor does well, rather than trying to do a function like this. It's a great example of thinking in an ECL way (I've still got a bit of my old procedural mindset hanging on ). The way he described it is:\\n
So... get the MIN and MAX of the values you're going to be checking via TABLE of the dataset to scrub. Then, starting with the MIN value, NORMALIZE a single record (MAX-MIN)+1 times to produce a record for each discrete value that might be seen, doing a SKIP if the COUNTER value is in the SET you'll be range checking later. Then add the set to that dataset with markers letting you know they are the boundaries. Then, ITERATE through that dataset setting the range number, starting with zero, incrementing each time you hit one of your known boundary points (if you have to have a max, it would be different). Now you have a dataset that can be JOINed directly (LOOKUP, probably) on the value you are checking, pulling the range number from that dataset. Thor's good at that.
\\nAnd the code:\\n\\n// Scrub values\\nrScrubValues\\t:=\\nrecord\\n\\tinteger1\\tTheValue;\\nend;\\ndScrubValues\\t:=\\tdataset([\\t{ 7}, {42}, {26}, {81}, {12}, {68}, {65}, {33}, {95}, {72}, {66}, {24}, {19}, {52}, {24},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t{61}, {14}, {57}, {90}, {80}, {5}, {43}, {51}, {66}, {53}, {2}, {14}, {49}, {18}, {7}, {57},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t{44}, {25}, {11}, {70}, {46}, {30}, {71}, {98}, {49}, {87}, {69}, {56}, {35}, {60}, {93}, {55}\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t],\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\trScrubValues\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t );\\n\\n// Range values\\nset of integer1\\tsRanges\\t:=\\t[5, 10, 20, 50, 75];\\ndRanges\\t:=\\tdataset(sRanges, {integer1 TheValue});\\n\\nrRangesForJoin\\t:=\\nrecord\\n\\tinteger1\\tTheValue;\\n\\tinteger1\\tTheRange\\t :=\\t0;\\n\\tboolean\\t\\tIsBoundary := false;\\nend;\\n\\nrRangesForJoin\\ttRangesForJoinPrep(dRanges pInput)\\t:=\\ntransform\\n\\tself.TheValue\\t\\t:=\\tpInput.TheValue;\\n\\tself.TheRange\\t\\t:=\\t0;\\n\\tself.IsBoundary\\t:=\\ttrue;\\nend;\\ndRangesForJoinPrep\\t:=\\tproject(dRanges, tRangesForJoinPrep(left));\\n\\n// Determine min and max values necessary for right side of JOIN\\nrScrubValuesRange\\t:=\\nrecord\\n\\tinteger1\\tLowValue\\t:=\\tmin(group, dScrubValues.TheValue);\\n\\tinteger1\\tHiValue\\t\\t:=\\tmax(group, dScrubValues.TheValue);\\nend;\\ndScrubValuesRange\\t:=\\ttable(dScrubValues, rScrubValuesRange, few);\\n\\n// Create right-side dataset for JOIN\\ndForNormalization\\t:=\\tdataset([{0, 0, 0}], rRangesForJoin);\\nrRangesForJoin\\ttNormalizeRanges(rRangesForJoin pLeft, unsigned2 pCounter) :=\\ntransform\\n\\tlTheValue\\t\\t\\t\\t:=\\t(dScrubValuesRange[1].LowValue - 1) + pCounter;\\n\\tself.TheValue\\t\\t:=\\tif(lTheValue in sRanges, skip, lTheValue);\\n\\tself.TheRange\\t\\t:=\\t0;\\n\\tself.IsBoundary\\t:=\\tfalse;\\nend;\\ndNormalizeRanges\\t:=\\tnormalize(dForNormalization, (dScrubValuesRange[1].HiValue - dScrubValuesRange[1].LowValue) + 1, tNormalizeRanges(left, counter));\\ndRangesForJoinAll\\t:=\\tsort(dNormalizeRanges + dRangesForJoinPrep, TheValue);\\n\\n// Set right-side boundary ranges\\nrRangesForJoin\\ttInitializeRangeValues(dRangesForJoinAll pLeft, dRangesForJoinAll pRight)\\t:=\\ntransform\\n\\tself.TheValue\\t\\t:=\\tpRight.TheValue;\\n\\tself.TheRange\\t\\t:=\\tif(pRight.IsBoundary, pLeft.TheRange + 1, pLeft.TheRange);\\n\\tself.IsBoundary\\t:=\\tpRight.IsBoundary;\\nend;\\ndRangesForJoinInitialized\\t:=\\titerate(dRangesForJoinAll, tInitializeRangeValues(left, right));\\n\\n// Join boundard ranges to scrub data\\nrScrubAndRange\\t:=\\nrecord\\n\\trScrubValues;\\n\\tinteger1\\t\\tTheRange;\\nend;\\nrScrubAndRange\\ttScrubAndRange(dScrubValues pScrub, dRangesForJoinInitialized pRanges)\\t:=\\ntransform\\n\\tself.TheValue\\t:=\\tpScrub.TheValue;\\n\\tself.TheRange\\t:=\\tpRanges.TheRange;\\nend;\\ndScrubAndRange\\t:=\\tjoin(dScrubValues, dRangesForJoinInitialized,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t left.TheValue = right.TheValue,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t tScrubAndRange(left, right),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t lookup\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t);\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\noutput(dScrubValuesRange, all, named('ScrubValuesRange'));\\noutput(dRanges, all, named('Ranges'));\\noutput(sort(dRangesForJoinAll, TheValue), all, named('RangesForJoinAll'));\\noutput(dRangesForJoinInitialized, all, named('RangesForJoinInitialized'));\\noutput(sort(dScrubAndRange, TheValue), all, named('ScrubAndRange'));\\n
\", \"post_time\": \"2016-06-17 07:39:54\" },\n\t{ \"post_id\": 9798, \"topic_id\": 2312, \"forum_id\": 8, \"post_subject\": \"Re: Assigning a value to a range using a set\", \"username\": \"james.wilson\", \"post_text\": \"Yes, for a single function I think the C++ is neater. Here's what I came up with (again this assumes the set is sorted, it may be better to add a wrapper that:\\n\\n\\n// Assign the value supplied in the first parameter to a bucket, where the bucket boundaries are defined by the second parameter.\\n// Boundary values are where the next boundary starts\\n// Return values start at 0\\n// So if the value in Days is less than the first value in Buckets then 0 will be returned, if it's equal to or greater than the first value but less than the second value then 1 will be returned, and so on.\\nSHARED INTEGER2 GetBucket(INTEGER2 Days, SET OF INTEGER2 Buckets) := \\n BEGINC++\\n signed short numBuckets = lenBuckets / sizeof(days);\\n for (size32_t i = 0; i < numBuckets; ++i)\\n if (days < *(signed short*)(buckets + i * sizeof(days)))\\n return i;\\n return numBuckets;\\n ENDC++\\n ;\\n
\\nQuick question (this is the first time I've embedded C++ in ECL): should I use #option pure
?\", \"post_time\": \"2016-06-17 07:32:10\" },\n\t{ \"post_id\": 9796, \"topic_id\": 2312, \"forum_id\": 8, \"post_subject\": \"Re: Assigning a value to a range using a set\", \"username\": \"richardkchapman\", \"post_text\": \"I suspect this is a case where embedded C++ will result in clearer and certainly faster code.\", \"post_time\": \"2016-06-17 06:17:05\" },\n\t{ \"post_id\": 9758, \"topic_id\": 2312, \"forum_id\": 8, \"post_subject\": \"Re: Assigning a value to a range using a set\", \"username\": \"rtaylor\", \"post_text\": \"James,\\n\\nHow about something like this:FindRange(SET pSet, val) := FUNCTION\\n DS := TABLE(DATASET(pSet,{INTEGER Element}),\\n {Element,UNSIGNED RangeID := 0,UNSIGNED1 Flag := 0});\\n NumElements := COUNT(DS);\\n RECORDOF(DS) XF(DS L, DS R, INTEGER C) := TRANSFORM\\n SELF.Flag := (UNSIGNED)(val BETWEEN L.Element AND R.Element-1);\\n SELF.RangeID := C;\\n SELF := R;\\n END;\\n Flags := ITERATE(DS,XF(LEFT,RIGHT,COUNTER));\\n RangeIDVal := Flags(Flag<>0)[1].RangeID;\\n RETURN IF(RangeIDVal<>0,RangeIDVal-1,IF(val<0,-1,NumElements));\\nEND;\\n\\nMySet1 := [5, 10, 20, 50, 75];\\n\\nFindRange(MySet1,3);\\nFindRange(MySet1,8);\\nFindRange(MySet1,18);\\nFindRange(MySet1,48);\\nFindRange(MySet1,68);\\nFindRange(MySet1,99); \\nFindRange(MySet1,-99); //returns -1 indicating no matching range
Obviously this will only work with integer sets and values, but it does meet your example requirements. I also have it returning -1 if the value < 0 but you can change that to 0 if you only care that the value is less than the first element value.\\n\\nThis code also expects the set to be pre-sorted. If it may not be, then you just need to SORT the DATASET before doing the TABLE, like this:\\n DS := TABLE(SORT(DATASET(pSet,{INTEGER Element}),Element),\\n {Element,UNSIGNED RangeID := 0,UNSIGNED1 Flag := 0});\\n
\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-06-14 14:39:12\" },\n\t{ \"post_id\": 9730, \"topic_id\": 2312, \"forum_id\": 8, \"post_subject\": \"Assigning a value to a range using a set\", \"username\": \"james.wilson\", \"post_text\": \"I want to create a function to return a number depending on how a value compares to the values in a (variable) set, e.g. if my set is [5, 10, 20, 50, 75] then I want to return 0 if the supplied value is < 5, 1 if it's between 5 and 9, 2 if it's between 10 and 19, 3 if it's between 20 and 49, 4 if it's between 50 and 74, and 5 if it's >=75. And I want the set to be variable, i.e. I don't want to hard-code it with MAP or CASE.\\n\\nI can do it with a C++ block, but is there a way to do it in ECL?\", \"post_time\": \"2016-06-10 15:55:10\" },\n\t{ \"post_id\": 9848, \"topic_id\": 2324, \"forum_id\": 8, \"post_subject\": \"Re: Generating a string of HTML table from a record set\", \"username\": \"rtaylor\", \"post_text\": \"Jim,\\n\\nIt sounds like you're constructing HTML for web page viewing. Couldn't you just use XSLT templates to format your result for viewing in the web page, similar to the way ECL Watch does for its results display?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-06-27 08:19:13\" },\n\t{ \"post_id\": 9844, \"topic_id\": 2324, \"forum_id\": 8, \"post_subject\": \"Re: Generating a string of HTML table from a record set\", \"username\": \"jwilt\", \"post_text\": \"Thanks, Gordon.\\n\\nI'm writing a single method (MACRO) to take "any ol'" recordset and generate an HTML table string.\\nThis means... getting the field names.\\nAnd wrapping with markup.\\n\\nI'm using #EXPORT to get the field names.\\nAnd I'm doing some terrible hack to convert rows to HTML (again, layout-agnostic). (If you're wondering... this involves PIPE'ing to CSV, replacing the inbound XML field markup with HTML... ugly...)\\nTOXML(...) would help alot, but it omits empty elements.\\n\\nAgain, open to ideas.\\nThanks!\", \"post_time\": \"2016-06-24 20:46:05\" },\n\t{ \"post_id\": 9786, \"topic_id\": 2324, \"forum_id\": 8, \"post_subject\": \"Re: Generating a string of HTML table from a record set\", \"username\": \"gsmith\", \"post_text\": \"The cell formatter bundle has a "Table" option which you may get some inspiration from: https://github.com/hpcc-systems/ecl-bun ... lFormatter\\n\\nYou can think of this bundle as a string formatter, so the following example will create a HTML table as a string:\\n\\nIMPORT CellFormatter AS CF;\\nSTRING myTable := CF.HTML.Table(\\n CF.HTML.TableRow(CF.HTML.TableHeader('Column 1') + CF.HTML.TableHeader('Column 2')) +\\n CF.HTML.TableRow(CF.HTML.TableCell('Cell 1, 1') + CF.HTML.TableCell('cell 1, 2')) + \\n CF.HTML.TableRow(CF.HTML.TableCell('Cell 2, 1') + CF.HTML.TableCell(u'Unicode Text:非常によい'))\\n, TRUE);\\n
\", \"post_time\": \"2016-06-16 10:54:49\" },\n\t{ \"post_id\": 9780, \"topic_id\": 2324, \"forum_id\": 8, \"post_subject\": \"Re: Generating a string of HTML table from a record set\", \"username\": \"jwilt\", \"post_text\": \"Another question, from this same project...\\n\\nIf I build said (tiny) HTML table from a recordset, on a Thor cluster - \\nAnd I send that out via HTTPCALL(...) - \\nIs that guaranteed to send only one message...\\nAnd is the entire recordset guaranteed to go into that single message?\\n\\nThanks again.\", \"post_time\": \"2016-06-15 22:20:36\" },\n\t{ \"post_id\": 9766, \"topic_id\": 2324, \"forum_id\": 8, \"post_subject\": \"Re: Generating a string of HTML table from a record set\", \"username\": \"rtaylor\", \"post_text\": \"Not that I am aware of. You could submit a JIRA asking for that option to be added (or add it yourself to the C++ source code and submit a pull request ).\", \"post_time\": \"2016-06-15 08:21:57\" },\n\t{ \"post_id\": 9762, \"topic_id\": 2324, \"forum_id\": 8, \"post_subject\": \"Re: Generating a string of HTML table from a record set\", \"username\": \"jwilt\", \"post_text\": \"Excellent. I don't know how I hadn't found that already.\\n\\nAlso - \\nIs there any way to get TOXML(...) to *NOT* exclude empty elements?\", \"post_time\": \"2016-06-14 20:43:33\" },\n\t{ \"post_id\": 9756, \"topic_id\": 2324, \"forum_id\": 8, \"post_subject\": \"Re: Generating a string of HTML table from a record set\", \"username\": \"rtaylor\", \"post_text\": \"Jim,\\n\\nYes, it can certainly be done that way. Look at #EXPORTXML to get the field names.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-06-14 08:18:33\" },\n\t{ \"post_id\": 9750, \"topic_id\": 2324, \"forum_id\": 8, \"post_subject\": \"Generating a string of HTML table from a record set\", \"username\": \"jwilt\", \"post_text\": \"Is there a way to take an unknown in-coming recordset (e.g., in a MACRO) and generate an HTML table string for output to a service?\\nThis HTML table would need a header row (<th>...</th>) -\\nSo the code needs to get the field names for the (unknown) in-coming recordset.\\n\\nOr, open to other approaches.\\n\\nThanks.\", \"post_time\": \"2016-06-14 02:19:34\" },\n\t{ \"post_id\": 9810, \"topic_id\": 2332, \"forum_id\": 8, \"post_subject\": \"Re: Retrieve files based on the workunit\", \"username\": \"ksviswa\", \"post_text\": \"Thanks a lot.\\n\\nLet me try the same.\\n\\nViswa\", \"post_time\": \"2016-06-21 13:36:56\" },\n\t{ \"post_id\": 9792, \"topic_id\": 2332, \"forum_id\": 8, \"post_subject\": \"Re: Retrieve files based on the workunit\", \"username\": \"JimD\", \"post_text\": \"To restore programmatically, you can use the Sasha command line interface (found on your server in /opt/HPCCSystems/bin/)\\n\\n./sasha server=<sasha-server-ip> action=RESTORE <wu-specifier>\\n\\nexample:\\nsasha server=192.168.150.10 action=restore wuid=W20040514-123412\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2016-06-16 14:36:04\" },\n\t{ \"post_id\": 9790, \"topic_id\": 2332, \"forum_id\": 8, \"post_subject\": \"Re: Retrieve files based on the workunit\", \"username\": \"ksviswa\", \"post_text\": \"Hi Richard,\\n\\nYeah that works but only if the workunit is not archived.\\n\\nWe could do a restore to the archived workunits and then use the same function.\\n\\nAny ways to restore archived workunits programatically ? \\n\\nViswa\", \"post_time\": \"2016-06-16 13:34:46\" },\n\t{ \"post_id\": 9782, \"topic_id\": 2332, \"forum_id\": 8, \"post_subject\": \"Re: Retrieve files based on the workunit\", \"username\": \"rtaylor\", \"post_text\": \"viswa,\\n\\nHave you tried using the STD.System.Workunit.WorkunitFilesWritten() function?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-06-16 09:12:21\" },\n\t{ \"post_id\": 9776, \"topic_id\": 2332, \"forum_id\": 8, \"post_subject\": \"Retrieve files based on the workunit\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nIs it possible to get the list of output files associated with a work unit even if the work unit is archived ?\\n\\nviswa\", \"post_time\": \"2016-06-15 15:41:27\" },\n\t{ \"post_id\": 9802, \"topic_id\": 2334, \"forum_id\": 8, \"post_subject\": \"Re: SELF JOIN will take some time\", \"username\": \"rtaylor\", \"post_text\": \"chucks,\\n\\nDepending on your JOIN condition, maybe the GROUP function would help? \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-06-17 07:58:25\" },\n\t{ \"post_id\": 9794, \"topic_id\": 2334, \"forum_id\": 8, \"post_subject\": \"SELF JOIN will take some time\", \"username\": \"chucks\", \"post_text\": \"I am getting this warning:\\n\\nGraph[1], selfjoin[4]: SELFJOIN: Warning 67505 preliminary matches, join will take some time\\n\\nActually, 4 or 5 of them for the same join ... just with a different number of preliminary matches\\n\\nAny suggestions about optimizing the self join?\", \"post_time\": \"2016-06-16 17:30:16\" },\n\t{ \"post_id\": 9838, \"topic_id\": 2344, \"forum_id\": 8, \"post_subject\": \"Re: HPCC Pagination\", \"username\": \"rtaylor\", \"post_text\": \"Daniel,\\n\\nYou might try doing the COUNT (only) as a separate query from your GUI and then sending the real query a page at a time.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-06-24 16:00:28\" },\n\t{ \"post_id\": 9832, \"topic_id\": 2344, \"forum_id\": 8, \"post_subject\": \"Re: HPCC Pagination\", \"username\": \"dsanchez\", \"post_text\": \"Hey Richard,\\n\\nSure that's exactly what I am doing. The problem is that I also want to return the total number of results (so the UI can show the pagination buttons correctly) and sort the results before the chooseN does his thing (so I know that we are getting the actual first N results according to the sorting field).\\n\\nCheers!\\nDaniel.\", \"post_time\": \"2016-06-24 15:19:53\" },\n\t{ \"post_id\": 9830, \"topic_id\": 2344, \"forum_id\": 8, \"post_subject\": \"Re: HPCC Pagination\", \"username\": \"rtaylor\", \"post_text\": \"Daniel,\\n\\nThe CHOOSEN function takes an optional third parameter for the start position. You can just add a parameter to your query for the start position and have that passed value managed by the end-user GUI. Each time the user requests another page, the GUI can increment/decrement the start position parameter passed to your query along with simply repeating the original user-entered search parameters.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-06-24 14:56:20\" },\n\t{ \"post_id\": 9824, \"topic_id\": 2344, \"forum_id\": 8, \"post_subject\": \"HPCC Pagination\", \"username\": \"dsanchez\", \"post_text\": \"Hi all,\\n\\nWe are currently trying to add pagination to our roxie queries (the number of results could be too big for the client to handle). We also need to be able to sort the results so we created a simple wrapper around our code that will take the full result set, takes the total count, sorts the data by the requested field and makes a chooseN according to the pagination parameters (offset and page size).\\n\\nThe problem I am finding doing this is that the count doesn't work always (and neither do the sort) because if the amount of results is too big and it needs to spill to disk it just doesn't do it and avoids the sorting and the count.\\n\\nAny ideas from anyone who has face this same problem?\\n\\nCheers!\\nDaniel\\n\\nPD: This other post is about the same thing but it's been dead for so long that I preferred to create a new one. viewtopic.php?f=8&t=320\\n\\nEDIT: Some code example of what I am doing\\n
\\nEXPORT getPaginatedStuff(STRING searchParams, INTEGER offset = 0, INTEGER nResults = 0, STRING orderBy = '', STRING ascOrder = '') := FUNCTION\\n\\tfullResult\\t\\t:=\\tgetTheSearchResults(searchParams);\\n\\tsortedResult\\t:=\\tsortByField(fullResult,orderBy,ascOrder);\\n\\n\\tcurrentPage\\t\\t:=\\tIFF (\\n\\t\\t\\t\\t\\t\\t\\tnResults = 0,\\n\\t\\t\\t\\t\\t\\t\\tCHOOSEN(sortedResult, Constants.GenericResultsLimit, offset+1),\\n\\t\\t\\t\\t\\t\\t\\tCHOOSEN(sortedResult, nResults, offset+1)\\n\\t\\t\\t\\t\\t\\t);\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\tgroupedResults\\t:=\\tsomeProjectionsToGroupResults(currentPage); //We make a single row layour with all the results on one field and the total count on another.\\n\\tcountDs\\t\\t\\t:=\\tCOUNT(fullResult);// : INDEPENDENT; tried this and it works but too much over-doing on the graph\\n\\t\\n\\twithCount\\t\\t:=\\tPROJECT(\\n\\t\\t\\t\\t\\t\\t\\tgroupedResults,\\n\\t\\t\\t\\t\\t\\t\\tTRANSFORM(\\n\\t\\t\\t\\t\\t\\t\\t\\tGroupedResultsLayout,\\n\\t\\t\\t\\t\\t\\t\\t\\tSELF.TotalCount\\t:= countDs;\\n\\t\\t\\t\\t\\t\\t\\t\\tSELF := LEFT;\\n\\t\\t\\t\\t\\t\\t\\t)\\n\\t\\t\\t\\t\\t\\t);\\n\\tRETURN withCount;\\nEND;\\n
\", \"post_time\": \"2016-06-24 14:09:09\" },\n\t{ \"post_id\": 9868, \"topic_id\": 2346, \"forum_id\": 8, \"post_subject\": \"Re: failmessage doesnt print nothing\", \"username\": \"rtaylor\", \"post_text\": \"BarrOs01,\\n\\nOK, then we've clearly identified that the FAILMESSAGE() function either isn't working in this instance, or is working and is returning nothing because the code didn't actually fail and the email being sent at all is the real bug. \\n\\nEither way, your next step is to submit a JIRA ticket (https://track.hpccsystems.com) to report the issue to the developers so they can investigate fix any bugs they find, and potentially offer you a workaround.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-06-30 08:47:41\" },\n\t{ \"post_id\": 9864, \"topic_id\": 2346, \"forum_id\": 8, \"post_subject\": \"Re: failmessage doesnt print nothing\", \"username\": \"BarrOs01\", \"post_text\": \"No. im not defining a variable, I'm using the actual failmessage function as described here: [url]\\nhttps://hpccsystems.com/download/docume ... SSAGE.html[/url]\\n\\nIt should display the last failure message in the FAILURE workflow\\n\\nActually the example shown in that page, is exactly what I'm trying to implement. \\nThe difference is that im not doing a simple count/dedup. I'm spraying files, splitting data, transforming, doing many calcs in order to build our input files. That's why my code looks like:\\n\\nsequential( \\n spray_xml_files(),\\n build_inputs(), // read sprayed files, transform and create new inputs. \\n promote_files(),\\n ): failure(send_email(failmessage));\", \"post_time\": \"2016-06-29 12:20:25\" },\n\t{ \"post_id\": 9862, \"topic_id\": 2346, \"forum_id\": 8, \"post_subject\": \"Re: failmessage doesnt print nothing\", \"username\": \"rtaylor\", \"post_text\": \"BarrOs01,\\n\\nSo I ask again, how and where are you defining your "failmessage"? I'm asking for the actual definition, something like this:failmessage := 'My Fail Message';
The problem seems to be with that definition, since you're getting the email AND sending a string constant works.\\n\\nRichard\", \"post_time\": \"2016-06-29 07:18:32\" },\n\t{ \"post_id\": 9858, \"topic_id\": 2346, \"forum_id\": 8, \"post_subject\": \"Re: failmessage doesnt print nothing\", \"username\": \"BarrOs01\", \"post_text\": \"I'm sending the email successfully, I'm getting it in my inbox.\\nBut the failmessage is empty. Using a string constant works fine.\\n\\nI've tried to : failure( output(failmessage) ); instead of sending an email, but still the output is empty.\", \"post_time\": \"2016-06-28 17:47:12\" },\n\t{ \"post_id\": 9856, \"topic_id\": 2346, \"forum_id\": 8, \"post_subject\": \"Re: failmessage doesnt print nothing\", \"username\": \"rtaylor\", \"post_text\": \"BarrOs01,as I've shown in the code above (original post)
Sorry, we must be miscommunicating.\\n\\nI'm interested in knowing what exact content you expected to receive in the email. I would also like to know if the email is being sent at all. \\n\\nSo these are the next questions:\\n
\\nI think you can see where I'm going with this.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-06-28 13:26:23\" },\n\t{ \"post_id\": 9852, \"topic_id\": 2346, \"forum_id\": 8, \"post_subject\": \"Re: failmessage doesnt print nothing\", \"username\": \"BarrOs01\", \"post_text\": \"as I've shown in the code above (original post) \\n\\nbasically i place the failmessage inside a failure workflow. So in the event the sequential fails I would like to send an email notifying the fail message received.\", \"post_time\": \"2016-06-27 14:41:27\" },\n\t{ \"post_id\": 9846, \"topic_id\": 2346, \"forum_id\": 8, \"post_subject\": \"Re: failmessage doesnt print nothing\", \"username\": \"rtaylor\", \"post_text\": \"BarrOs01,\\n\\nHow and where have you defined your failmessage?\\n\\nRichard\", \"post_time\": \"2016-06-27 08:14:09\" },\n\t{ \"post_id\": 9840, \"topic_id\": 2346, \"forum_id\": 8, \"post_subject\": \"failmessage doesnt print nothing\", \"username\": \"BarrOs01\", \"post_text\": \"I have implemented a failure workflow to send an email when the process fails but failmessage is always empty, what could I be missing ? I don't have any other failure workflows inside the process. The process is triggered by a cron using wk_ut.CreateWuid \\n\\n\\nbuilt := sequential(\\nSpray_File(),\\nBuild_Input()\\n): failure( sendEmail(failmessage));\\n
\", \"post_time\": \"2016-06-24 16:28:48\" },\n\t{ \"post_id\": 9906, \"topic_id\": 2348, \"forum_id\": 8, \"post_subject\": \"Re: Distributed indexes\", \"username\": \"bforeman\", \"post_text\": \"Are there any good examples on building and using distributed indexes?
\\nThe Online ROXIE courses and ROXIE and Programmer's Guide documentation has many good examples. And of our indexes are distributed by default in HPCC.\\n \\nCan they be used on Thor, or only for Roxie?
\\nDistributed indexes are built on THOR, published (copied) to Roxie. They can be used on both, but indexes are optimized for ROXIE.\\n\\nCan they be successfully copied between Thor clusters, across Dali's?
\\nYes, this technique is known as publishing. It is a parallel copy from THOR to ROXIE.\\n\\nBob\", \"post_time\": \"2016-07-06 19:05:22\" },\n\t{ \"post_id\": 9860, \"topic_id\": 2348, \"forum_id\": 8, \"post_subject\": \"Distributed indexes\", \"username\": \"jwilt\", \"post_text\": \"Hi,\\nAre there any good examples on building and using distributed indexes?\\n\\nCan they be used on Thor, or only for Roxie?\\n\\nCan they be successfully copied between Thor clusters, across Dali's?\\n\\nThanks so much.\", \"post_time\": \"2016-06-28 21:02:54\" },\n\t{ \"post_id\": 9912, \"topic_id\": 2360, \"forum_id\": 8, \"post_subject\": \"Re: System error: 4: MP link closed\", \"username\": \"vivekaxl\", \"post_text\": \"Thank you.\", \"post_time\": \"2016-07-06 19:26:59\" },\n\t{ \"post_id\": 9908, \"topic_id\": 2360, \"forum_id\": 8, \"post_subject\": \"Re: System error: 4: MP link closed\", \"username\": \"sort\", \"post_text\": \"When 6.0.2 is released, this issue will get resolved. https://track.hpccsystems.com/browse/HPCC-15827\\n\\n(https://track.hpccsystems.com/browse/HPCC-15838 - which is marked as a duplicate of 15827)\", \"post_time\": \"2016-07-06 19:19:53\" },\n\t{ \"post_id\": 9898, \"topic_id\": 2360, \"forum_id\": 8, \"post_subject\": \"Re: System error: 4: MP link closed\", \"username\": \"vivekaxl\", \"post_text\": \"I was running the code on a VM of version (6.0.0-2). But when I tried it on version 5.6.4-1, it works well.\", \"post_time\": \"2016-07-06 13:18:15\" },\n\t{ \"post_id\": 9892, \"topic_id\": 2360, \"forum_id\": 8, \"post_subject\": \"System error: 4: MP link closed\", \"username\": \"vivekaxl\", \"post_text\": \"I am trying to run this piece of code which runs well in hthor throws an error in case of thor. Has anyone faced similar issues?\\n\\n\\nIMPORT * FROM ML;\\nIMPORT * FROM ML.Types;\\nIMPORT * FROM ParameterTuning;\\n\\nDE(DATASET(PLTypes.tuning_range_rec) t_ranges, REAL CF=0.75, REAL F=0.3) := MODULE \\n SHARED REAL real_random_between(INTEGER lower_limit, ANY upper_limit) := FUNCTION\\n RETURN lower_limit + ((RANDOM()%100)/100) * (upper_limit - lower_limit);\\n END;\\n\\n SHARED REAL ObjectiveFunction(PLTypes.indep_de_rec individual):= FUNCTION\\n RETURN individual.indep1 + individual.indep2 + individual.indep3 + individual.indep4 ;\\n END;\\n\\n SHARED PLTypes.de_rec evaluate_individual(PLTypes.indep_de_rec indi):= TRANSFORM\\n SELF.objective := ObjectiveFunction(indi);\\n SELF := indi;\\n END; \\n \\n EXPORT DATASET(PLTypes.de_rec) generate_population():= FUNCTION\\n \\n PLTypes.indep_de_rec generate_individual(INTEGER id=-1, INTEGER gen=0):= TRANSFORM\\n SELF.individual_id := id;\\n SELF.generation_id := gen;\\n SELF.indep1 := (INTEGER)real_random_between(t_ranges(parameter_id=1)[1].minimun_value, t_ranges(parameter_id=1)[1].maximum_value);\\n SELF.indep2 := (REAL)real_random_between(t_ranges(parameter_id=2)[1].minimun_value, t_ranges(parameter_id=2)[1].maximum_value);\\n SELF.indep3 := (REAL)real_random_between(t_ranges(parameter_id=3)[1].minimun_value, t_ranges(parameter_id=3)[1].maximum_value);\\n SELF.indep4 := (INTEGER)real_random_between(t_ranges(parameter_id=4)[1].minimun_value, t_ranges(parameter_id=4)[1].maximum_value);\\n END;\\n\\n independent_population := DATASET(20, generate_individual(COUNTER));\\n evaluated_population := PROJECT(independent_population, evaluate_individual(LEFT));\\n RETURN evaluated_population;\\n END;\\n\\n\\n EXPORT DATASET(PLTypes.de_rec) run_one_generation(DATASET(PLTypes.de_rec) population , INTEGER gen):= FUNCTION\\n REAL de_style_mutation(REAL a, REAL b, REAL c):= FUNCTION\\n RETURN a + F * (b - a);\\n END;\\n PLTypes.de_rec fetch_random_member():= FUNCTION\\n RETURN population[(INTEGER)real_random_between(1, COUNT(population))];\\n END;\\n PLTypes.indep_de_rec new_member(PLTypes.de_rec one, PLTypes.de_rec two, PLTypes.de_rec three, INTEGER pid):= TRANSFORM\\n SELF.individual_id := pid;\\n SELF.generation_id := gen;\\n SELF.indep1 := (INTEGER)IF(real_random_between(0, 1) < CF, de_style_mutation(one.indep1, two.indep1, three.indep1), one.indep1);\\n SELF.indep2 := (REAL)IF(real_random_between(0, 1) < CF, de_style_mutation(one.indep2, two.indep2, three.indep2), one.indep2);\\n SELF.indep3 := (REAL)IF(real_random_between(0, 1) < CF, de_style_mutation(one.indep3, two.indep3, three.indep3), one.indep3);\\n SELF.indep4 := (INTEGER)IF(real_random_between(0, 1) < CF, de_style_mutation(one.indep4, two.indep4, three.indep4), one.indep4); \\n END;\\n new_independent_population := DATASET(COUNT(population), new_member(population[COUNTER], fetch_random_member(),fetch_random_member(), COUNTER));\\n new_evaluated_population := PROJECT(new_independent_population, evaluate_individual(LEFT));\\n PLTypes.de_rec filter_it( PLTypes.de_rec L, PLTypes.de_rec R) := TRANSFORM\\n SELF := IF(L.objective > R.objective, L, R)\\n END;\\n filtered_population := JOIN(population, new_evaluated_population, LEFT.individual_id = RIGHT.individual_id, filter_it(LEFT, RIGHT));\\n RETURN filtered_population;\\n END;\\n \\n EXPORT DATASET(PLTypes.de_rec) run_multiple_generation(DATASET(PLTypes.de_rec) population , INTEGER number_of_generations):= FUNCTION\\n // final_population := LOOP(population, COUNTER<number_of_generations, run_one_generation(ROWS(LEFT),COUNTER+1));\\n final_population := LOOP(population, \\n COUNTER <= number_of_generations,\\n run_one_generation(ROWS(LEFT) , COUNTER+1)\\n );\\n RETURN final_population;\\n END;\\nEND; \\n\\n\\n\\ntuning_range := DATASET([\\n {1, 40, 80, 40},\\n {2, 3, 4, 1},\\n {3, 0.9, 1.0, 0.1},\\n {4, 28, 36, 1}],\\n PLTypes.tuning_range_rec);\\n\\nOUTPUT(tuning_range(parameter_id=1));\\nEA := DE(tuning_range);\\nzero_pop := EA.generate_population();\\nOUTPUT(zero_pop, NAMED('GEN0'));\\n// first_pop := EA.run_one_generation(zero_pop, 1);\\n// OUTPUT(first_pop, NAMED('GEN1'));\\nfinal_pop := EA.run_multiple_generation(zero_pop, 20);\\nOUTPUT(final_pop);\\n
\", \"post_time\": \"2016-07-05 20:47:15\" },\n\t{ \"post_id\": 9930, \"topic_id\": 2362, \"forum_id\": 8, \"post_subject\": \"Re: The code never terminates\", \"username\": \"vivekaxl\", \"post_text\": \"Reported as an issue. HPCC-15902 - ECL Code which doesn't terminate\", \"post_time\": \"2016-07-09 18:39:20\" },\n\t{ \"post_id\": 9894, \"topic_id\": 2362, \"forum_id\": 8, \"post_subject\": \"The code never terminates\", \"username\": \"vivekaxl\", \"post_text\": \"I am trying to include grid search for parameter tuning but the project doesn't seem to e working. \\n\\n\\nnumberFormat := RECORD\\n\\t\\tgrid;\\n\\t\\tREAL result;\\n\\tEND;\\nresult := PROJECT(grid, TRANSFORM(numberFormat, \\n SELF.result := PLhelper.RunRandomForestClassfier(trainIndepData, trainDepData, tuneIndepData, tuneDepData, LEFT.v1, 3, 1.0, 100);\\n SELF := LEFT;\\n ));\\nOUTPUT(result);\\n
\\n\\nThis code never finishes, but when I replace LEFT.v1 with Constants the code runs well. \\n\\nFind attached the ZAP. Any ideas?\", \"post_time\": \"2016-07-05 21:16:36\" },\n\t{ \"post_id\": 9904, \"topic_id\": 2364, \"forum_id\": 8, \"post_subject\": \"Re: Watchdog has lost contact with Thor slave\", \"username\": \"bforeman\", \"post_text\": \"You probably need to open up a JIRA report as soon as possible.\\n\\nhttps://track.hpccsystems.com/secure/Dashboard.jspa\\n\\nFrom there, the development team can review what you have posted, and request additional logs as needed.\\n\\nIs this your first cluster assembly, or did this issue just surface when you upgraded to the RC version?\\n\\nBob\", \"post_time\": \"2016-07-06 18:59:26\" },\n\t{ \"post_id\": 9896, \"topic_id\": 2364, \"forum_id\": 8, \"post_subject\": \"Watchdog has lost contact with Thor slave\", \"username\": \"bbrown57\", \"post_text\": \"I have created a Thor Cluster with 5 slave nodes and it constantly loses connection and crashes. Users can't submit jobs to the cluster as they receive errors. I have included a snip of the log file for the Thor instance. There is more but it all repeats the same process as below:\\n\\n00000002 2016-07-06 07:37:56.723 95967 95967 "Opened log file //10.141.0.38/var/log/HPCCSystems/mythor/thormaster.2016_07_06.log"\\n00000003 2016-07-06 07:37:56.723 95967 95967 "Build community_6.0.0-rc4"\\n00000004 2016-07-06 07:37:56.723 95967 95967 "calling initClientProcess Port 20000"\\n00000005 2016-07-06 07:37:56.729 95967 95967 "Checking cluster replicate nodes"\\n00000006 2016-07-06 07:37:56.730 95967 95967 "Cluster replicate nodes check completed in 2ms"\\n00000007 2016-07-06 07:37:56.731 95967 95967 "Global memory size = 96829 MB"\\n00000008 2016-07-06 07:37:56.731 95967 95967 "RoxieMemMgr: Setting memory limit to 101532565504 bytes (387316 pages)"\\n00000009 2016-07-06 07:37:56.732 95967 95967 "Transparent huge pages are not supported on this kernel. Requires kernel version > 2.6.38."\\n0000000A 2016-07-06 07:37:56.732 95967 95967 "Memory released to OS on each 256k 'page'"\\n0000000B 2016-07-06 07:37:56.732 95967 95967 "RoxieMemMgr: 387328 Pages successfully allocated for the pool - memsize=101535711232 base=0x2aaacc200000 alignment=262144 bitmapSize=12104"\\n0000000C 2016-07-06 07:37:56.732 95967 95967 "Disk space: /var/lib/HPCCSystems/hpcc-data/thor = 0 MB, /var/lib/HPCCSystems/hpcc-mirror/thor = 0 MB, /var/lib/HPCCSystems/mythor/temp = 4385 MB"\\n0000000D 2016-07-06 07:37:56.735 95967 95967 "Starting watchdog"\\n0000000F 2016-07-06 07:37:56.735 95967 95967 "ThorMaster version 4.1, Started on 10.141.0.38:20000"\\n0000000E 2016-07-06 07:37:56.735 95967 95983 "Started watchdog"\\n00000010 2016-07-06 07:37:56.735 95967 95967 "Thor name = mythor, queue = thor.thor, nodeGroup = mythor"\\n00000011 2016-07-06 07:37:56.735 95967 95967 "Waiting for 5 slaves to register"\\n00000012 2016-07-06 07:37:56.735 95967 95967 "Verifying connection to slave 5"\\n00000013 2016-07-06 07:37:56.736 95967 95967 "verified connection with 10.141.0.39:20100"\\n00000014 2016-07-06 07:37:56.736 95967 95967 "Verifying connection to slave 3"\\n00000015 2016-07-06 07:37:56.737 95967 95967 "verified connection with 10.141.0.42:20100"\\n00000016 2016-07-06 07:37:56.737 95967 95967 "Verifying connection to slave 1"\\n00000017 2016-07-06 07:37:56.737 95967 95967 "verified connection with 10.141.0.40:20100"\\n00000018 2016-07-06 07:37:56.737 95967 95967 "Verifying connection to slave 4"\\n00000019 2016-07-06 07:37:56.738 95967 95967 "verified connection with 10.141.0.26:20100"\\n0000001A 2016-07-06 07:37:56.738 95967 95967 "Verifying connection to slave 2"\\n0000001B 2016-07-06 07:37:56.738 95967 95967 "verified connection with 10.141.0.41:20100"\\n0000001C 2016-07-06 07:37:56.738 95967 95967 "Slaves connected, initializing.."\\n0000001D 2016-07-06 07:37:56.739 95967 95967 "Initialization sent to slave group"\\n0000001E 2016-07-06 07:37:56.739 95967 95967 "Registration confirmation from 10.141.0.41:20100"\\n0000001F 2016-07-06 07:37:56.739 95967 95967 "Slave 2 (10.141.0.41:20100) registered"\\n00000020 2016-07-06 07:37:56.739 95967 95967 "Registration confirmation from 10.141.0.40:20100"\\n00000021 2016-07-06 07:37:56.739 95967 95967 "Slave 1 (10.141.0.40:20100) registered"\\n00000022 2016-07-06 07:37:56.739 95967 95967 "Registration confirmation from 10.141.0.39:20100"\\n00000023 2016-07-06 07:37:56.739 95967 95967 "Slave 5 (10.141.0.39:20100) registered"\\n00000024 2016-07-06 07:37:56.739 95967 95967 "Registration confirmation from 10.141.0.42:20100"\\n00000025 2016-07-06 07:37:56.739 95967 95967 "Slave 3 (10.141.0.42:20100) registered"\\n00000026 2016-07-06 07:37:56.739 95967 95967 "Registration confirmation from 10.141.0.26:20100"\\n00000027 2016-07-06 07:37:56.739 95967 95967 "Slave 4 (10.141.0.26:20100) registered"\\n00000028 2016-07-06 07:37:56.739 95967 95967 "Slaves initialized"\\n00000029 2016-07-06 07:37:56.740 95967 95967 "verifying mp connection to rest of cluster"\\n0000002A 2016-07-06 07:37:56.740 95967 95967 "verified mp connection to rest of cluster"\\n0000002B 2016-07-06 07:37:56.740 95967 95967 ",Progress,Thor,Startup,mythor,mythor,thor.thor,//10.141.0.38/var/log/HPCCSystems/mythor/thormaster.2016_07_06.log"\\n0000002C 2016-07-06 07:37:56.740 95967 95967 "Creating sentinel file thor.sentinel for rerun from script"\\n0000002D 2016-07-06 07:37:56.740 95967 95967 "Listening for graph"\\n0000002E 2016-07-06 07:37:56.741 95967 95967 "verifying mp connection to all slaves"\\n0000002F 2016-07-06 07:37:56.741 95967 95967 "verified mp connection to all slaves"\\n00000030 2016-07-06 07:37:56.741 95967 95967 "ThorLCR(10.141.0.38:20000) available, waiting on queue thor.thor"\\n00000031 2016-07-06 07:52:56.735 95967 95983 "Watchdog : Marking Machine as Down! [10.141.0.26:20100]"\\n00000032 2016-07-06 07:52:56.736 95967 95983 "Watchdog : Marking Machine as Down! [10.141.0.42:20100]"\\n00000033 2016-07-06 07:52:56.736 95967 95983 "Watchdog : Marking Machine as Down! [10.141.0.39:20100]"\\n00000034 2016-07-06 07:52:56.736 95967 95983 "Watchdog : Marking Machine as Down! [10.141.0.40:20100]"\\n00000035 2016-07-06 07:52:56.736 95967 95983 "Watchdog : Marking Machine as Down! [10.141.0.41:20100]"\\n00000036 2016-07-06 08:02:56.736 95967 95983 "ERROR: 10056: /var/lib/jenkins/workspace/CE-Candidate-6.0.0-rc4/CE/centos-6.4-x86_64/HPCC-Platform/thorlcr/master/thgraphmanager.cpp(958) : abortThor : Watchdog has lost contact with Thor slave: 10.141.0.26:20100 (Process terminated or node down?)"\\n00000037 2016-07-06 08:02:56.736 95967 95983 "abortThor called"\\n00000038 2016-07-06 08:02:56.736 95967 95983 "Stopping jobManager"\\n00000039 2016-07-06 08:02:56.736 95967 95983 "aborting any current active job"\\n0000003A 2016-07-06 08:02:56.736 95967 95967 "acceptConversation aborted - terminating"\\n0000003B 2016-07-06 08:02:56.739 95967 95967 ",Progress,Thor,Terminate,mythor,mythor,thor.thor"\\n0000003C 2016-07-06 08:02:56.739 95967 95967 "ThorMaster terminated OK"\\n0000003D 2016-07-06 08:02:57.740 95967 95967 "priority set id=46912652035840 policy=0 pri=0 PID=95967"\\n0000003E 2016-07-06 08:02:57.740 95967 95967 "Stopping watchdog"\\n0000003F 2016-07-06 08:02:57.740 95967 95967 "Stopped watchdog"\\n00000040 2016-07-06 08:03:17.740 95967 95967 "Timeout waiting for Shutdown reply from slave(s) (0 replied out of 5 total)"\\n00000041 2016-07-06 08:03:17.740 95967 95967 "Slaves that have not replied: 1,2,3,4,5"\\n00000042 2016-07-06 08:03:17.751 95967 95967 "Thor closing down 5"\\n00000043 2016-07-06 08:03:17.751 95967 95967 "Thor closing down 4"\\n00000044 2016-07-06 08:03:17.752 95967 95967 "Thor closing down 3"\\n00000045 2016-07-06 08:03:17.752 95967 95967 "Thor closing down 2"\\n00000046 2016-07-06 08:03:17.763 95967 95967 "Thor closing down 1"\", \"post_time\": \"2016-07-06 12:27:47\" },\n\t{ \"post_id\": 9928, \"topic_id\": 2368, \"forum_id\": 8, \"post_subject\": \"Re: DATASET(count, transform) did not work as expected\", \"username\": \"vivekaxl\", \"post_text\": \"\\nI would expect your definition to be OK when run on an HTHOR queue (single node) because the "set_data" dataset is available as if it were local to the node.\\n
\\n\\nI tried it on HTHOR, the results don't change. \\n\\nI went ahead and tried the method which you suggested and I was able to oversample and get the desired result. \\n\\nThe step 1 is not exactly how you had suggested (I couldn't get it to work). Here is my version\\n\\n\\nIMPORT STD;\\nLayout_People := RECORD\\nSTRING15 FirstName;\\nSTRING25 LastName;\\nSTRING15 MiddleName;\\nSTRING5 Zip;\\nSTRING42 Street;\\nSTRING20 City;\\nSTRING2 State;\\nEND; \\nset_data := DATASET('~tutorial::VN::OriginalPerson',Layout_People, THOR)[1..1000];\\nid_layout_people := RECORD(Layout_People)\\n INTEGER id; \\nEND;\\n\\n// Step 1\\nper_cluster := COUNT(set_data)/Std.system.ThorLib.Nodes();\\nid_set_data := PROJECT(set_data, TRANSFORM(id_layout_people, \\n SELF.id := Std.system.ThorLib.Node() * per_cluster + COUNTER;\\n SELF := LEFT;\\n ), LOCAL);\\n\\n//Step 2\\nid_record := RECORD \\n INTEGER id;\\n END;\\nid_ds := DATASET( COUNT(set_data) * 2, TRANSFORM(\\n id_record,\\n SELF.id := RANDOM() % COUNT(set_data);\\n ));\\n\\n\\n//Step 3\\nos_ds := JOIN(id_ds, id_set_data, LEFT.id = RIGHT.id, TRANSFORM(id_layout_people, SELF:= RIGHT), MANY);\\n\\nCOUNT(set_data);\\nCOUNT(os_ds);\\n\\n
\", \"post_time\": \"2016-07-08 22:58:15\" },\n\t{ \"post_id\": 9924, \"topic_id\": 2368, \"forum_id\": 8, \"post_subject\": \"Re: DATASET(count, transform) did not work as expected\", \"username\": \"john holt\", \"post_text\": \"The "set_data" dataset is distributed on the THOR nodes.\\n\\nI would expect your definition to be OK when run on an HTHOR queue (single node) because the "set_data" dataset is available as if it were local to the node.\\n\\nWhat I would do to over sample with your case is:\\n1) PROJECT the "set_data" records into the "layout_id_people" record layout, numbering each record. You can do this with COUNT inside of project, BUT, this is by nature slow as each node must work in series. You can assign the numbers independently and in parallel with a description that uses the node number and number of nodes, by running the PROJECT(..., LOCAL) and the assignment SELF.id := Std.ThorLib.Node() + ((COUNTER-1)*Std.ThorLib.Nodes()).\\n\\n2) Make a random dataset with just an ID field, SELF.id:=RANDOM() % COUNT(set_data);// (Node() runs 0 to Nodes()-1\\n\\n3) Perform a JOIN of the result of (1) and (2). You can use LOOKUP, MANY which will let the same record be selected more than once and will use your random list on every node in parallel.\", \"post_time\": \"2016-07-08 14:03:43\" },\n\t{ \"post_id\": 9920, \"topic_id\": 2368, \"forum_id\": 8, \"post_subject\": \"DATASET(count, transform) did not work as expected\", \"username\": \"vivekaxl\", \"post_text\": \"I am trying to oversample the original person dataset. The code given below works for inline datasets and doesn't work for THOR. \\n\\n\\nLayout_People := RECORD\\n STRING15 FirstName;\\n STRING25 LastName;\\n STRING15 MiddleName;\\n STRING5 Zip;\\n STRING42 Street;\\n STRING20 City;\\n STRING2 State;\\nEND; \\nset_data := DATASET('~tutorial::VN::OriginalPerson',Layout_People, THOR);\\nid_layout_people := RECORD(Layout_People)\\n INTEGER id; \\nEND;\\nOUTPUT(set_data, NAMED('original'));\\nrandom_dataset := DATASET(200, TRANSFORM(id_layout_people, \\n SELF.id := COUNTER;\\n SELF := set_data[(RANDOM()%COUNT(set_data))+1];\\n )\\n );\\nOUTPUT(random_dataset,NAMED('transform'),ALL);\\nOUTPUT(set_data[23], NAMED('twetythr'));\\n
\\n\\nDoes anyone see anything wrong with code? \\nPS: I have tried this code on 6.0.0-2 and 5.6.4-1.\", \"post_time\": \"2016-07-07 21:54:27\" },\n\t{ \"post_id\": 9950, \"topic_id\": 2370, \"forum_id\": 8, \"post_subject\": \"Re: ECL Error "value global('aJ7GR') in workunit is undefine\", \"username\": \"ghalliday\", \"post_text\": \"I think that is an example of something that has been fixed in 6.0.2.\\n\\nSee https://track.hpccsystems.com/browse/HPCC-15714\", \"post_time\": \"2016-07-12 15:33:18\" },\n\t{ \"post_id\": 9944, \"topic_id\": 2370, \"forum_id\": 8, \"post_subject\": \"Re: ECL Error "value global('aJ7GR') in workunit is undefine\", \"username\": \"drealeed\", \"post_text\": \"Gavin,\\n\\nI tried that. It fixed the error in the whittled-down BWR that I crafted to isolate the issue. But when I make that replacement in the actual ecl code and run it on http://10.173.147.1:8010, I now get this error (in W20160712-091945)\\n\\nWarning: Mismatch in major version number (6.0.0 v 5.6.4) (0, 0 - unknown)\\nError: ‘struct cAc46’ has no member named ‘colocal’ (1887, 0 - W20160712-091945_1.cpp)\\nError: ‘struct cAc46’ has no member named ‘colocal’ (1887, 0 - W20160712-091945_1.cpp)\\nError: Compile/Link failed for W20160712-091945 (see '//10.173.147.1/mnt/disk1/var/lib/HPCCSystems/myeclccserver/eclcc.log' for details) (0, 0 - W20160712-091945)\\nWarning: \\nWarning: ---------- compiler output --------------\\nWarning: W20160712-091945_1.cpp: In member function ‘virtual void cAc92::toXML(const byte*, IXmlWriter&)’:\\nWarning: W20160712-091945_1.cpp:1887: error: ‘struct cAc46’ has no member named ‘colocal’\\nWarning: W20160712-091945_1.cpp:1887: error: ‘struct cAc46’ has no member named ‘colocal’\\nWarning: g++: W20160712-091945_1.cpp.o: No such file or directory\\nWarning: \\nWarning: --------- end compiler output -----------\\n\\nI'll work on recreating this error in a standalone BWR as well. In the meantime I've attached the zap report and the generated c++ for the erring workunit.\", \"post_time\": \"2016-07-12 13:38:11\" },\n\t{ \"post_id\": 9942, \"topic_id\": 2370, \"forum_id\": 8, \"post_subject\": \"Re: ECL Error "value global('aJ7GR') in workunit is undefine\", \"username\": \"ghalliday\", \"post_text\": \"Try using ORDERED inside FN_Debug instead of SEQUENTIAL.\", \"post_time\": \"2016-07-12 12:59:13\" },\n\t{ \"post_id\": 9926, \"topic_id\": 2370, \"forum_id\": 8, \"post_subject\": \"ECL Error "value global('aJ7GR') in workunit is undefined"\", \"username\": \"drealeed\", \"post_text\": \"I'm attempting to publish compiled service workunits and update the packagemaps for them within ECL, using SOAPCALL, to a collection of roxies. There are more than one services being published to each of several roxies.\\n\\nI have a function performing the logic to publish to a roxie/deploy a packagemap to that roxie, called with an APPLY against a dataset of roxie info. The function publishes the service via a soapcall and then adds/updates the packagemap via a soapcall. \\n\\nI've gotten a number of errors while trying to run this, including things like \\nError: System error: 10023: Graph[31], value global('aJ7GR') in workunit is undefined\\n\\nand \\n\\nError: ‘struct cAc46’ has no member named ‘colocal’ (1887, 0 - W20160708-110353_1.cpp.\\n\\nI've gotten the errors using both the 5.6.2 eclcc compiler and the 6.0.0 ecl compiler.\\n\\nI finally managed to aggregate the ecl into a single bwr that triggers the error. at the bottom of the included script is a sequential. If either item in the sequential is commented out the ecl runs fine; if both are there it throws the "value global (xxx) in workunit is undefined" error when running.\\n\\nAny idea what this could be?\\n\\n// Modified on 07 Jul 2016 16:53:12 GMT by HIPIE (version 1.7) on machine BCTLW7LEEDDX\\n IMPORT lib_thorlib;\\n IMPORT $;\\n IMPORT lib_fileservices;\\n IMPORT Std.Str;\\n\\t\\n\\t\\n\\tEXPORT l_status:=RECORD\\n\\t\\tSTRING Code {XPATH('Code')};\\n\\t\\tSTRING Description {XPATH('Description')};\\n\\tEND;\\n\\n\\tEXPORT l_flat_package:=RECORD\\n\\t\\tSTRING queryname:='';\\n\\t\\tSTRING superfilename:='';\\n\\t\\tSTRING subfilename:='';\\n\\t\\tBOOLEAN active:=false;\\n\\tEND;\\n\\n\\tEXPORT l_roxieconfig:=RECORD\\n\\t\\tSTRING RoxieLabel:='';\\n\\t\\tSTRING SourceDali:='';\\n \\t\\tSTRING RoxieDali:='';\\n\\t\\tSTRING RoxiePublishUrl:='';\\n\\t\\tSTRING RoxieServiceUrl:='';\\n\\t\\tSTRING RoxieInternalServiceUrl:='';\\n\\t\\tSTRING cluster:='roxie';\\n\\t\\tSTRING clustergroup:='roxie';\\n\\t\\tBOOLEAN selected:=true;\\n\\t\\tSTRING username:='';\\n\\t\\tSTRING password:='';\\n\\tEND;\\n\\n l_files := RECORD\\n\\t\\t\\tSTRING servicename:='';\\n\\t\\t\\tSTRING superfilename_tilde:='';\\n\\t\\t\\tSTRING superfilename_notilde:='';\\n\\t\\t\\tSTRING indexfilename_tilde:='';\\n\\t\\t\\tSTRING indexfilename_notilde:='';\\n\\t\\t\\tBOOLEAN indexStructureChanged:=false;\\n\\t\\t\\tBOOLEAN serviceexists:=false,\\n\\t\\t\\tSTRING servicewuid:='',\\n\\t\\t\\tBOOLEAN servicechanged:=true,\\n\\t\\t\\tDATASET({STRING name}) oldFiles:=DATASET([],{STRING name});\\n\\tEND;\\n\\t\\n\\tdefaultroxie:=DATASET([\\n\\t{'ramps_dev_detached_roxie','null','10.241.100.159','http://ramps_dev_svc:Lexis2016@10.241.100.159:8010/','http://10.241.100.159:8002/','http://10.241.100.159:8002/','roxie','null','true','ramps_dev_svc','Lexis2016'}\\n\\t],l_roxieconfig);\\n\\n files := DATASET([\\n{'leeddx_issue_1365_remoteroxie.Ins003_Service_1',\\n'~hipie::keys::leeddx_issue_1365_remoteroxie::Ins003_dsOutputFromInput_1::View_TableReferenceWithBQ',\\n'hipie::keys::leeddx_issue_1365_remoteroxie::Ins003_dsOutputFromInput_1::View_TableReferenceWithBQ',\\n'~hipie::keys::leeddx_issue_1365_remoteroxie::Ins003_dsOutputFromInput_1::View_TableReferenceWithBQ_subfile1',\\n'hipie::keys::leeddx_issue_1365_remoteroxie::Ins003_dsOutputFromInput_1::View_TableReferenceWithBQ_subfile1',\\ntrue,'','W20160707-132339'}\\n],l_files);\\n\\t\\n //composition_uuid:='1c2a-3e7f-44d6-8aa0-445372be';\\n\\t\\nFN_PublishWorkunit(STRING workunitid, STRING RoxiePublishURL, STRING RoxieCluster, STRING sourcedali) := FUNCTION\\n publishendpoint :=RoxiePublishURL + 'WsWorkunits';\\n layout_wupublish_in := RECORD\\n\\t\\t\\t\\t\\t\\tSTRING Wuid {XPATH('Wuid')} := workunitid;\\n\\t\\t\\t\\t\\t\\tSTRING Activate {XPATH('Activate')} := '1';\\n\\t\\t\\t\\t\\t\\tSTRING Cluster {XPATH('Cluster')} := roxiecluster;\\n\\t\\t\\t\\t\\t\\tSTRING RemoteDali {XPATH('RemoteDali')} := sourcedali;\\n\\t\\t\\t\\t\\t\\tSTRING Wait {XPATH('Wait')} := '10000';\\n\\t\\t\\t\\tEND;\\n\\t\\t\\t\\t\\n\\t\\t\\t\\tEXPORT l_wupublish_out := RECORD\\n\\t\\t\\t\\t\\t\\tSTRING Wuid {XPATH('Wuid')};\\n\\t\\t\\t\\t\\t\\tSTRING QuerySet {XPATH('QuerySet')};\\n\\t\\t\\t\\t\\t\\tSTRING QueryName {XPATH('QueryName')};\\n\\t\\t\\t\\t\\t\\tSTRING QueryId {XPATH('QueryId')};\\n\\t\\t\\t\\t\\t\\tSTRING ReloadFailed {XPATH('ReloadFailed')};\\n\\t\\t\\t\\t\\t\\tSTRING Suspended {XPATH('Suspended')};\\n\\t\\t\\t\\t\\t\\tSTRING ErrorMessage {XPATH('ErrorMessage')};\\n\\t\\t\\t\\tEND;\\n\\t\\n ds:=SOAPCALL(publishendpoint,'WUPublishWorkunit',layout_wupublish_in,l_wupublish_out,LITERAL,XPATH('WUPublishWorkunitResponse'));\\n return OUTPUT(DATASET(ds),NAMED('publishedworkunits'),EXTEND);\\n\\tEND;\\n\\n\\n\\n FN_Debug(STRING roxiename, STRING roxiedali, STRING roxieurl , STRING roxiecluster,\\n\\t\\t\\t\\t\\tDATASET(l_files) files):=FUNCTION\\n\\n\\t\\t\\t\\tdsnewpackages:=PROJECT(files,TRANSFORM(l_flat_package,\\n\\t\\t\\t\\t\\tSELF.queryname:=left.servicename;\\n\\t\\t\\t\\t\\tSELF.superfilename:=left.superfilename_notilde;\\n\\t\\t\\t\\t\\tSELF.subfilename:=LEFT.indexfilename_notilde;\\n\\t\\t\\t\\t\\tSELF.active:=true;\\n\\t\\t\\t\\t));\\n\\n\\t\\t\\t requestLayout := RECORD\\n\\t\\t\\t\\t\\tSTRING PackageMapId {XPATH('PackageMapId')} :='HIPIE';\\n\\t\\t\\t\\tEND;\\n\\n\\t\\t\\t\\tl_result:=RECORD\\n\\t\\t\\t\\t\\tDATASET(l_status) status {XPATH('status')} ;\\n\\t\\t\\t\\t\\tSTRING info {XPATH('Info')};\\n\\t\\t\\t\\tEND;\\n\\n\\t\\t\\tlist:=SOAPCALL\\n\\t\\t\\t(\\n\\t\\t\\t\\t\\tDATASET([{'HIPIE'}],requestlayout),\\n\\t\\t\\t\\t\\troxieurl + '/WsPackageProcess',\\n\\t\\t\\t\\t\\t'GetPackageMapById',\\n\\t\\t\\t\\t\\tRequestLayout,\\n\\t\\t\\t\\t\\tTRANSFORM(LEFT),\\n\\t\\t\\t\\t\\tDATASET(l_result),\\n\\t\\t\\t\\t\\tXPATH('GetPackageMapByIdResponse')\\n\\t\\t\\t);\\n\\n\\t\\t\\t\\tnewservices:=files(serviceexists=false and servicewuid != '');\\n\\t\\t\\t\\tpublishNewServices:= APPLY(newservices,FN_PublishWorkunit(servicewuid,roxieurl,roxiecluster,thorlib.daliServers()));\\n\\n//comment out either of the items in the sequential in the code works; with both it throws the error\\n// rror: System error: 10023: Graph[31], value global('aJ7GR') in workunit is undefined\\n\\t\\t\\t\\t\\tdeployroxieservice:=SEQUENTIAL(\\n\\t\\t\\t\\t\\t\\t\\t publishNewServices,\\n\\t\\t\\t\\t\\t\\t\\t OUTPUT(list)\\n\\t\\t\\t\\t\\t\\t\\t );\\n\\n\\t\\t\\t\\t\\tRETURN deployroxieservice;\\n\\nEND;\\n\\t\\n\\tAPPLY(defaultroxie, FN_Debug(RoxieLabel, RoxieDali,RoxiePublishUrl,cluster,files));\\n\\t\\t\\t\\t\\t\\t
\", \"post_time\": \"2016-07-08 15:40:36\" },\n\t{ \"post_id\": 9966, \"topic_id\": 2372, \"forum_id\": 8, \"post_subject\": \"Re: Parse Question '+'\", \"username\": \"rtaylor\", \"post_text\": \"wjblack,\\n\\nWhile I don't have the definitive answer to your questions, I do have a suggestion. \\n\\nSince the ECL language is declarative, the actual executable code is the generated C++. Therefore, one way you can determine the difference would be to add this to your code:#OPTION('saveCppTempFiles',true)
then compile both methods, one at a time, and compare the generated C++ code. You'll find the links to that generated C++ code on the Helpers tab of the ECL Watch page for the workunit.\\n\\nIf they are exactly the same, then you'll know that the more efficient way is without the "+" (less typing ).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-07-18 15:03:16\" },\n\t{ \"post_id\": 9932, \"topic_id\": 2372, \"forum_id\": 8, \"post_subject\": \"Parse Question '+'\", \"username\": \"wjblack\", \"post_text\": \"I'm trying to determine if there's a difference in using the '+' in the second 'pattern02' pattern verses the first 'pattern01'. I've run both and they appear at first glance to be working the same. If they are the same then is there any additional performance/work steps that happens when using the '+' verses not using?\\n\\npattern number := pattern('[0-9]');\\npattern numbers := number+;\\npattern letter := pattern('[A-Za-z]');\\npattern letters := letter+;\\npattern hyphen := '-';\\n\\npattern pattern01 := numbers hyphen numbers hyphen letters hyphen numbers;\\npattern pattern02 := numbers + hyphen + numbers + hyphen + letters + hyphen + numbers;\", \"post_time\": \"2016-07-11 12:51:02\" },\n\t{ \"post_id\": 10493, \"topic_id\": 2374, \"forum_id\": 8, \"post_subject\": \"Re: EMBED Insert Error - Expected a parent/container context\", \"username\": \"chuck.beam\", \"post_text\": \"Hi Bob,\\n\\nHope you are doing well.\\n\\nThe issue has been resolved with the release of the HPCC core 6.0.4.\\n\\nThanks\\nChuck\", \"post_time\": \"2016-08-11 10:54:20\" },\n\t{ \"post_id\": 10463, \"topic_id\": 2374, \"forum_id\": 8, \"post_subject\": \"Re: EMBED Insert Error - Expected a parent/container context\", \"username\": \"bforeman\", \"post_text\": \"Hi Chuck,\\n\\nIf you haven't already resolved this, you probably should open up a report in the Community Issue Tracker.\\n\\nhttps://track.hpccsystems.com/secure/Dashboard.jspa\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2016-08-10 20:31:45\" },\n\t{ \"post_id\": 9934, \"topic_id\": 2374, \"forum_id\": 8, \"post_subject\": \"EMBED Insert Error - Expected a parent/container context\", \"username\": \"chuck.beam\", \"post_text\": \"Hello again,\\n\\nI am trying to insert into a MySQL database using a simple EMBED.\\n\\nI generate the target dataset by reading 100 rows from the target table.\\n\\nI update the primary key and output the dataset.\\n\\nI then attempt to read in the dataset and call the EMBED insert.\\n\\nI am getting this error:\\n\\nError: INTERNAL: Expected a parent/container context. Likely to be caused by executing something invalid inside a NOTHOR. (0, 0), 4818, \\n\\nI am not using NOTHOR anywhere in my code.\\n\\nAny help would be much appreciated!\\n\\nThanks\\nChuck\\n\\nCode to generate input dataset:\\n\\n
\\n\\nIMPORT mysql;\\n\\ncopy_Record :=\\n record\\n varstring transaction_id;\\n integer4 product_id;\\n varstring date_added;\\n varstring service_type;\\n varstring special_billing_id;\\n varstring report_code;\\n varstring report_usage;\\n varstring requestor;\\n varstring reference_number;\\n varstring account_base;\\n varstring account_suffix;\\n integer4 account_id;\\n integer4 customer_id;\\n varstring anchor_transaction_id;\\n integer4 anchor_product_id;\\n varstring full_quote_back;\\n varstring i_date_ordered;\\n varstring i_addr_house_num;\\n varstring i_addr_apt_num;\\n varstring i_addr_line;\\n varstring i_addr_state;\\n varstring i_addr_zip;\\n varstring i_addr_city;\\n varstring i_addr_county;\\n varstring i_addr_country;\\n integer4 i_addr_type;\\n varstring addr_household_sur_name;\\n integer4 listed_driver_count;\\n integer4 confirmed_count;\\n integer4 not_confirmed_count;\\n integer4 not_found_count;\\n integer4 discovered_surname_count;\\n integer4 discovered_surname_exception_count;\\n varstring result_format;\\n varstring record_version;\\n varstring processing_status;\\n varstring inquiry_status;\\n varstring inquiry_processing_status;\\n integer4 billing_type_id;\\n varstring price;\\n integer4 currency;\\n integer4 pricing_error_code;\\n integer4 free;\\n integer4 transaction_code;\\n varstring return_node_id;\\n varstring request_node_id;\\n integer4 order_status_code;\\n varstring product_line;\\n integer4 login_history_id;\\n varstring ip_address;\\n varstring response_time;\\n varstring esp_method;\\n integer4 batch_job_id;\\n integer4 batch_seq_number;\\n end;\\n\\n// Read x number of trasactions from the database\\nDATASET(copy_Record) getTransactions(INTEGER MaxRecords) := EMBED(mysql : user('username'),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tserver('server'),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tport('port'),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tdatabase('db'),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tpassword('password')) \\n\\t\\tSELECT transaction_id, product_id, date_added, service_type, special_billing_id, report_code, \\n\\t\\t\\t\\treport_usage, requestor, reference_number, account_base, account_suffix, account_id, customer_id, \\n\\t\\t\\t\\tanchor_transaction_id, anchor_product_id, full_quote_back, i_date_ordered, i_addr_house_num, \\n\\t\\t\\t\\ti_addr_apt_num, i_addr_line, i_addr_state, i_addr_zip, i_addr_city, i_addr_county, i_addr_country, \\n\\t\\t\\t\\ti_addr_type, addr_household_sur_name, listed_driver_count, confirmed_count, not_confirmed_count, \\n\\t\\t\\t\\tnot_found_count, discovered_surname_count, discovered_surname_exception_count, result_format, \\n\\t\\t\\t\\trecord_version, processing_status, inquiry_status, inquiry_processing_status, billing_type_id, \\n\\t\\t\\t\\tprice, currency, pricing_error_code, free, transaction_code, return_node_id, \\n\\t\\t\\t\\trequest_node_id, order_status_code, product_line, login_history_id, ip_address, response_time, \\n\\t\\t\\t\\tesp_method, batch_job_id, batch_seq_number \\n\\t\\t\\tFROM log_dd.transaction_log LIMIT ?;\\nENDEMBED;\\n\\nTransaction_DS := getTransactions(100);\\n\\nOUT1 := OUTPUT(Transaction_DS,NAMED('Transaction_DS'));\\n\\ncopy_Record SetTransactionID(copy_Record L, INTEGER C) := TRANSFORM\\n\\tSELF.transaction_id := 'WU06-' + C;\\n\\tSELF \\t\\t\\t\\t\\t\\t\\t\\t:= L;\\nEND;\\n\\nInsert_DS := PROJECT(Transaction_DS, SetTransactionID(LEFT, COUNTER));\\n\\nOUT2 := OUTPUT(Insert_DS, NAMED('Insert_DS'));\\n\\nOUT3 := OUTPUT(Insert_DS, , '~THOR::ACTIVEINSIGHTS::DEBUG::DDLOGGING::Insert_DS', THOR, OVERWRITE);\\n\\n
\\n\\nCode to do the EMBED insert:\\n\\n\\nimport mysql;\\n\\ncopy_Record :=\\n record\\n varstring transaction_id;\\n integer4 product_id;\\n varstring date_added;\\n varstring service_type;\\n varstring special_billing_id;\\n varstring report_code;\\n varstring report_usage;\\n varstring requestor;\\n varstring reference_number;\\n varstring account_base;\\n varstring account_suffix;\\n integer4 account_id;\\n integer4 customer_id;\\n varstring anchor_transaction_id;\\n integer4 anchor_product_id;\\n varstring full_quote_back;\\n varstring i_date_ordered;\\n varstring i_addr_house_num;\\n varstring i_addr_apt_num;\\n varstring i_addr_line;\\n varstring i_addr_state;\\n varstring i_addr_zip;\\n varstring i_addr_city;\\n varstring i_addr_county;\\n varstring i_addr_country;\\n integer4 i_addr_type;\\n varstring addr_household_sur_name;\\n integer4 listed_driver_count;\\n integer4 confirmed_count;\\n integer4 not_confirmed_count;\\n integer4 not_found_count;\\n integer4 discovered_surname_count;\\n integer4 discovered_surname_exception_count;\\n varstring result_format;\\n varstring record_version;\\n varstring processing_status;\\n varstring inquiry_status;\\n varstring inquiry_processing_status;\\n integer4 billing_type_id;\\n varstring price;\\n integer4 currency;\\n integer4 pricing_error_code;\\n integer4 free;\\n integer4 transaction_code;\\n varstring return_node_id;\\n varstring request_node_id;\\n integer4 order_status_code;\\n varstring product_line;\\n integer4 login_history_id;\\n varstring ip_address;\\n varstring response_time;\\n varstring esp_method;\\n integer4 batch_job_id;\\n integer4 batch_seq_number;\\n end;\\n\\n\\tInsert_DS := PULL(DATASET('~THOR::ACTIVEINSIGHTS::DEBUG::DDLOGGING::Insert_DS',copy_Record, THOR));\\n\\t\\n\\tinitialize(dataset(copy_Record) values) := EMBED(mysql : \\tuser('username'),\\n \\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tserver('server'),\\n \\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tport('port'),\\n \\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tdatabase('db'),\\n \\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tpassword('password')) \\n \\tINSERT INTO log_dd.transaction_log\\n \\t\\t\\t\\t(transaction_id, product_id, date_added, service_type, special_billing_id, report_code, \\n \\t\\t\\t\\treport_usage, requestor, reference_number, account_base, account_suffix, account_id, customer_id, \\n \\t\\t\\t\\tanchor_transaction_id, anchor_product_id, full_quote_back, i_date_ordered, i_addr_house_num, \\n \\t\\t\\t\\ti_addr_apt_num, i_addr_line, i_addr_state, i_addr_zip, i_addr_city, i_addr_county, i_addr_country, \\n \\t\\t\\t\\ti_addr_type, addr_household_sur_name, listed_driver_count, confirmed_count, not_confirmed_count, \\n \\t\\t\\t\\tnot_found_count, discovered_surname_count, discovered_surname_exception_count, result_format, \\n \\t\\t\\t\\trecord_version, processing_status, inquiry_status, inquiry_processing_status, billing_type_id, \\n \\t\\t\\t\\tprice, currency, pricing_error_code, free, transaction_code, return_node_id, \\n \\t\\t\\t\\trequest_node_id, order_status_code, product_line, login_history_id, ip_address, response_time, \\n \\t\\t\\t\\tesp_method, batch_job_id, batch_seq_number)\\n \\t\\t\\t\\tVALUES \\n \\t\\t\\t\\t(?, ?, ?, ?, ?, ?, ?, ?, ?, ?,\\n \\t\\t\\t\\t?, ?, ?, ?, ?, ?, ?, ?, ?, ?,\\n \\t\\t\\t\\t?, ?, ?, ?, ?, ?, ?, ?, ?, ?,\\n \\t\\t\\t\\t?, ?, ?, ?, ?, ?, ?, ?, ?, ?,\\n \\t\\t\\t\\t?, ?, ?, ?, ?, ?, ?, ?, ?, ?,\\n \\t\\t\\t\\t?, ?, ?, ?)\\n \\tENDEMBED;\\n\\t\\t\\n OUT1 := initialize(Insert_DS);\\n\\nSEQUENTIAL(OUT1);\\n
\", \"post_time\": \"2016-07-11 18:24:05\" },\n\t{ \"post_id\": 9974, \"topic_id\": 2384, \"forum_id\": 8, \"post_subject\": \"Re: Error while processing LOOP\", \"username\": \"vivekaxl\", \"post_text\": \"Thank Richard. I have raised the ticket HPCC-15950 assert(read <= limit) while executing loops.\", \"post_time\": \"2016-07-18 18:13:08\" },\n\t{ \"post_id\": 9972, \"topic_id\": 2384, \"forum_id\": 8, \"post_subject\": \"Re: Error while processing LOOP\", \"username\": \"rtaylor\", \"post_text\": \"vivekaxl,\\n\\nThis should be reported in a JIRA ticket.\\n\\nRichard\", \"post_time\": \"2016-07-18 15:55:06\" },\n\t{ \"post_id\": 9964, \"topic_id\": 2384, \"forum_id\": 8, \"post_subject\": \"Error while processing LOOP\", \"username\": \"vivekaxl\", \"post_text\": \"I am trying to implement a recursive splitting method and this requires me to use LOOP. When I try using LOOP, I keep getting error "Error: System error: 3000: Graph graph1[89], firstn[91]: assert(read <= limit) failed"\\n\\nLoop Code:\\n\\nEXPORT DATASET(ML.Types.NumericField) run_multiple_splits(DATASET(ML.Types.NumericField) population):= FUNCTION\\n //stopping_point := (INTEGER)(log(100)/log(2)/2);\\n final_population := LOOP(population, \\n COUNTER <= 3,\\n run_one_split(ROWS(LEFT))\\n );\\n RETURN final_population;\\n END;\\n
\\n\\nIt fails when run on THOR but runs well on HTHOR. \\n\\nWorks Well:\\n\\nafter_first_split := EA.run_one_split(nf_zero_pop);\\nafter_second_split := EA.run_one_split(after_first_split);\\nafter_third_split := EA.run_one_split(after_second_split);\\nOUTPUT(after_third_split);\\n
\\nworks well, which leads me to believe that the code is working well.\\n\\nI get the same error in both 5.6.0.4 as well as 6.0.0.2. \\nIs there something obvious that I am doing wrong?\\n\\nPFA: ZAP\", \"post_time\": \"2016-07-16 19:42:55\" },\n\t{ \"post_id\": 9978, \"topic_id\": 2385, \"forum_id\": 8, \"post_subject\": \"Re: Passing a STORED dataset value into a workunit with WURu\", \"username\": \"rtaylor\", \"post_text\": \"Drea,\\n\\nThis sounds like a good candidate for a JIRA to me.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-07-21 13:36:04\" },\n\t{ \"post_id\": 9977, \"topic_id\": 2385, \"forum_id\": 8, \"post_subject\": \"Passing a STORED dataset value into a workunit with WURun()\", \"username\": \"drealeed\", \"post_text\": \"I have a workunit that defines a dataset attribute from a STORED value. \\n\\nDATASET(l_roxieconfig) roxies:=DATASET([],l_roxieconfig) : STORED('roxies');\\n\\nI ran this workunit, and then called it again via WURun() with clone set to true and the roxies xml passed in as a variable. I set the xml to the same xml that would be set when passing a dataset into a service.\\n\\nXML for the dataset:\\n<Row>\\n<dali>10.173.147.1</dali>\\n<espUrl>http://10.173.147.1:8010/</espUrl>\\n<cluster>roxie</clusters>\\n<selected>true</selectedClusters>\\n<username>testuser</username>\\n<password>OB8_gQ7vxqTpAhheMIxhUA</password>\\n</Row>
\\n \\nWhen the workunit runs, however, this information isn't picked up. Am I formatting things correctly? the roxies variable is listed in the workunit variables but says it has -1 rows.\\n\\nHere's what I'm passing in on the WURun soap test page at\\nhttp://10.173.147.1:8010/WsWorkunits/WU ... p_builder_:\\n\\n\\n<?xml version="1.0" encoding="utf-8"?>\\n<soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/" xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/" xmlns="urn:hpccsystems:ws:wsworkunits">\\n <soap:Body>\\n <WURunRequest>\\n <Wuid>W20160720-161218</Wuid>\\n <CloneWorkunit>1</CloneWorkunit>\\n <Wait>-1</Wait>\\n <NoRootTag>0</NoRootTag>\\n <Variables>\\n <NamedValue>\\n <Name>roxies</Name>\\n <Value><Row> <dali>10.173.147.1</dali> <espUrl>http://10.173.147.1:8010/</espUrl> <cluster>roxie</clusters> <selected>true</selectedClusters> <username>testuser</username> <password>OB8_gQ7vxqTpAhheMIxhUA</password> </Row></Value>\\n </NamedValue>\\n </Variables>\\n <ExceptionSeverity>info</ExceptionSeverity>\\n </WURunRequest>\\n </soap:Body>\\n</soap:Envelope>\\n
\\n\\nI've attached a zap report of the resulting workunit, containing the workunit xml. The xml does show there's a variable roxies with data in it.\\n\\nHere's the ECL for the workunit being rerun with WURun():\\n\\n l_roxieconfig:=RECORD\\n\\t\\tSTRING RoxieLabel:='';\\n\\t\\tSTRING SourceDali:='';\\n \\t\\tSTRING RoxieDali:='';\\n\\t\\tSTRING RoxiePublishUrl:='';\\n\\t\\tSTRING RoxieServiceUrl:='';\\n\\t\\tSTRING RoxieInternalServiceUrl:='';\\n\\t\\tSTRING cluster:='roxie';\\n\\t\\tSTRING clustergroup:='roxie';\\n\\t\\tBOOLEAN selected:=true;\\n\\t\\tSTRING username:='';\\n\\t\\tSTRING password:='';\\n\\tEND;\\n\\nDATASET(l_roxieconfig) roxies:=DATASET([],l_roxieconfig) : STORED('roxies');\\n\\noutput(roxies,named('roxies'));\\n
\", \"post_time\": \"2016-07-20 20:31:30\" },\n\t{ \"post_id\": 10453, \"topic_id\": 2473, \"forum_id\": 8, \"post_subject\": \"Re: Pinpointing roxie service deployment bottlenecks\", \"username\": \"bforeman\", \"post_text\": \"Drea, did you get an answer to this yet? You may want to send a message to RKC.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2016-08-10 20:26:19\" },\n\t{ \"post_id\": 10193, \"topic_id\": 2473, \"forum_id\": 8, \"post_subject\": \"Pinpointing roxie service deployment bottlenecks\", \"username\": \"drealeed\", \"post_text\": \"I'm helping a team improve the performance of their deployments to roxie. What is the best way to identify the speed/amount of time it takes to copy an index to roxie, from start to finish? Is there a particular log entry I can grep for?\", \"post_time\": \"2016-08-02 19:00:29\" },\n\t{ \"post_id\": 10323, \"topic_id\": 2503, \"forum_id\": 8, \"post_subject\": \"Re: OUTPUT - EXPIRE (Days) Question\", \"username\": \"chuck.beam\", \"post_text\": \"Hi Richard,\\n\\nI decide to take matters into my own hands and delete the files in my ECL, see below.\\n\\n// Finally delete any Work Unit Exclusion Files older than 7 days\\nFilesToDelete\\t\\t:= WorkUnitExclusionFiles((STRING)(std.date.DateFromParts((INTEGER)modified[1..4],\\n(INTEGER)modified[6..7],\\n(INTEGER)modified[8..9])) < \\nAI_Common.Common.RollBack_date((STRING)std.date.Today(),-7));\\n\\t\\t\\t\\nDeleteFiles := NOTHOR(APPLY(FilesToDelete, STD.File.DeleteLogicalFile('~' + name)));
\\n\\nThanks!\\nChuck\", \"post_time\": \"2016-08-03 16:51:45\" },\n\t{ \"post_id\": 10273, \"topic_id\": 2503, \"forum_id\": 8, \"post_subject\": \"Re: OUTPUT - EXPIRE (Days) Question\", \"username\": \"rtaylor\", \"post_text\": \"Chuck,\\n\\nSo, once deleted do you want a new file to be created?\\n\\nIf so, then you should use PERSIST instead of OUTPUT.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-08-03 13:49:56\" },\n\t{ \"post_id\": 10263, \"topic_id\": 2503, \"forum_id\": 8, \"post_subject\": \"Re: OUTPUT - EXPIRE (Days) Question\", \"username\": \"chuck.beam\", \"post_text\": \"Hi Richard,\\n\\nThat is what I thought.\\n\\nNow an even better question . . .\\n\\nWhat if I want the file deleted after 7 days (even though it gets read daily)?\\n\\nHow can I force the file to be deleted after 7 days?\\n\\nThanks!\\nChuck\", \"post_time\": \"2016-08-03 13:41:09\" },\n\t{ \"post_id\": 10253, \"topic_id\": 2503, \"forum_id\": 8, \"post_subject\": \"Re: OUTPUT - EXPIRE (Days) Question\", \"username\": \"rtaylor\", \"post_text\": \"Chuck,If my file is read everyday by a daily monitoring job, will the file ever get deleted?
I would expect not, so if you see any other behavior than that you should report it in JIRA.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-08-03 13:38:03\" },\n\t{ \"post_id\": 10243, \"topic_id\": 2503, \"forum_id\": 8, \"post_subject\": \"OUTPUT - EXPIRE (Days) Question\", \"username\": \"chuck.beam\", \"post_text\": \"I am working on outputting some files for future processing in Active Insights and I have a question regarding the EXPIRE and (Days) parameters.\\n\\nThe documentation state:\\n\\nEXPIRE 'Optional. Specifies the file is a temporary file that may be automatically deleted after the specified number of days since the file was read."\\n\\ndays "Optional. The number of days from last file read after which the file may be automatically deleted. If omitted, the default is seven (7)."\\n\\nI am curious about the "days from last file read" statement.\\n\\nIf my file is read everyday by a daily monitoring job, will the file ever get deleted?\\n\\nThanks!\\nChuck\", \"post_time\": \"2016-08-03 13:27:17\" },\n\t{ \"post_id\": 10683, \"topic_id\": 2513, \"forum_id\": 8, \"post_subject\": \"Re: DIfferences between THOR/FLAT and CSV files\", \"username\": \"ANTONIOCARLOSPINA\", \"post_text\": \"Just to give you a final feedback (sorry it took so long, I was out on vacation), now I can see the 4-bytes header and no terminators exactly as you said.\\n\\n0000 ••••01entretenim 17 00 00 00 30 31 65 6E 74 72 65 74 65 6E 69 6D\\n0010 ento.com.br••••1 65 6E 74 6F 2E 63 6F 6D 2E 62 72 0A 00 00 00 31\\n0020 2m.com.br••••2rs 32 6D 2E 63 6F 6D 2E 62 72 10 00 00 00 32 72 73\\n0030 ervice.com.br••• 65 72 76 69 63 65 2E 63 6F 6D 2E 62 72 0F 00 00\\n0040 •a2office.com.br 00 61 32 6F 66 66 69 63 65 2E 63 6F 6D 2E 62 72\\n\\nAnd I noticed the file ends with an extra \\\\r after the last record:\\n\\n0600 ••••anana.com.br 0C 00 00 00 61 6E 61 6E 61 2E 63 6F 6D 2E 62 72\\n0610 ••••andysuppercl 15 00 00 00 61 6E 64 79 73 75 70 70 65 72 63 6C\\n0620 ub.com.br••••ann 75 62 2E 63 6F 6D 2E 62 72 12 00 00 00 61 6E 6E\\n0630 azaharov.com.br• 61 7A 61 68 61 72 6F 76 2E 63 6F 6D 2E 62 72 0D\\n\\nThank you very much for your assistance, much appreciated.\\n\\nAntonio Pina\", \"post_time\": \"2016-08-18 13:16:58\" },\n\t{ \"post_id\": 10363, \"topic_id\": 2513, \"forum_id\": 8, \"post_subject\": \"Re: DIfferences between THOR/FLAT and CSV files\", \"username\": \"ANTONIOCARLOSPINA\", \"post_text\": \"Richard, that makes sense.\\n\\nI'm going to try your suggestion soon.\\n\\nThanks !\", \"post_time\": \"2016-08-04 00:48:49\" },\n\t{ \"post_id\": 10313, \"topic_id\": 2513, \"forum_id\": 8, \"post_subject\": \"Re: DIfferences between THOR/FLAT and CSV files\", \"username\": \"rtaylor\", \"post_text\": \"Antonio,So my question is: What is a FLAT/THOR file ? What did I get wrong about it ?
The problem with your FLAT file definition is that you specified a variable-length STRING field and when creating variable-length FLAT file records in HPCC the system automatically prepends a 4-byte integer to the beginning of the variable-length field. \\n\\nSince you had just sprayed the file (I assume you used the Delimited spray), that prepended length integer was not present, so the file was not able to be properly read as a FLAT file. Re-defining it as a CSV file was the correct thing to do. \\n\\nIn fact, the Delimited spray used to be called the CSV spray. However, Delimited is more correct since it is designed to allow you to spray any variable-length file that has record delimiters (which CSV files do have), whether there are field delimiters present or not. \\n\\nAs an experiment, you can take your CSV file definition and write the records to disk as a FLAT file (with the variable-length field), like this:R := RECORD\\n STRING line;\\nEND; \\nfile1 := DATASET('~.::dominios-tecla.txt',R,CSV(separator('')));\\nOUTPUT(file1,,'~.::dominios-tecla_FLAT.txt');
Then despray and download that new file and do a compare between the two files and you will see exactly what I'm talking about.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-08-03 15:31:50\" },\n\t{ \"post_id\": 10283, \"topic_id\": 2513, \"forum_id\": 8, \"post_subject\": \"DIfferences between THOR/FLAT and CSV files\", \"username\": \"ANTONIOCARLOSPINA\", \"post_text\": \"Hi,\\n\\nI'm starting to learn HPCC and although I think this is a very basic question, I couldn't find a good explanation to the behavior I've seen.\\n\\nI've downloaded the Virtualbox VM to run HPCC and sprayed an ordinary text file with ~300 domain names, no separators and terminated with \\\\r\\\\n. Example:\\n\\n01entretenimento.com.br\\n12m.com.br\\n2rservice.com.br\\na2office.com.br\\naaposta.com.br\\nabagrafica.com.br\\nabccat.org.br\\n...\\n\\nI've first tried to read the file as FLAT/THOR using this command:\\n\\nR := RECORD\\n STRING line;\\nEND;\\nfile1 := DATASET('~.::dominios-tecla.txt',R,THOR); \\nfile1;\\n\\nBut then eclagent stops complaining of "Memory Exhausted". I actually hacked into RoxieTopology.xml to increase the totalMemoryLimit (couldn't find the this configuration in ECLWatch) but then I've received another error message ("Datastream Read Error")\\n\\nInstead of the FLAT file, If I use a CSV file with no separators, voilá ! It works like a charm:\\n\\nR := RECORD\\n STRING line;\\nEND; \\nfile1 := DATASET('~.::dominios-tecla.txt',R,CSV(separator('')));\\nfile1;\\n\\nSo my question is: What is a FLAT/THOR file ? What did I get wrong about it ?\\n\\nThanks!\", \"post_time\": \"2016-08-03 14:43:53\" },\n\t{ \"post_id\": 10403, \"topic_id\": 2543, \"forum_id\": 8, \"post_subject\": \"Re: Using Ý as CSV HEADER separator\", \"username\": \"afarrell\", \"post_text\": \"Will do Richard,\\n\\nThanks,\\n\\n-A\", \"post_time\": \"2016-08-05 08:20:34\" },\n\t{ \"post_id\": 10383, \"topic_id\": 2543, \"forum_id\": 8, \"post_subject\": \"Re: Using Ý as CSV HEADER separator\", \"username\": \"rtaylor\", \"post_text\": \"afarrell,\\n\\nYou should submit this issue to JIRA.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-08-04 18:29:07\" },\n\t{ \"post_id\": 10373, \"topic_id\": 2543, \"forum_id\": 8, \"post_subject\": \"Using Ý as CSV HEADER separator\", \"username\": \"afarrell\", \"post_text\": \"Hi,\\n\\nWe have a requirement to output a CSV from THOR using 'Ý' as a separator, \\nCapital Y with Acute. Unfortunately I am unable to use this separator for the column headers, but works ok for every other row in the csv.\\n\\nAny ideas?\\n\\nThe following BWR illustrates my approach to get UPPERCASE headers and custom separator.\\n\\n\\nmyRec := record\\n\\tstring4 f1;\\n\\tinteger1 f2;\\n\\tstring1 f3;\\nend;\\n\\n\\nwriteCSV(_ds, _filename, _sep) := functionmacro\\n\\t// KLUDGE to output uppercase headers, and use specific separator\\n\\toutput(dataset([],recordof(_ds)),,_filename+'_headers',overwrite,csv(heading(SINGLE),separator(_sep),quote('"'),terminator('\\\\r\\\\n'),UNICODE));\\n\\theaderRow := dataset(_filename+'_headers',{String line},csv(heading(0),separator('|'),quote('"'),terminator('\\\\r\\\\n'),UNICODE));\\n\\n\\t// use UPPER case headers as headertext and write out UTF-8/16 CSV\\n\\treturn output(_ds,,_filename,overwrite,csv(heading(STD.STR.ToUpperCase(headerRow[1].line),single),separator(_sep),quote('"'),terminator('\\\\r\\\\n'),UNICODE));\\nendmacro;\\n\\n\\nds := dataset(\\n\\t[\\n\\t\\t{'aaaa',1,'A'},\\n\\t\\t{'bbbb',2,'B'},\\n\\t\\t{'cccc',3,'C'}\\n\\t],\\n\\tmyRec\\n);\\n\\t\\t\\nwriteCSV(ds, '~afarrell::20160803::csvtest',U'Ý');\\n\\n
\", \"post_time\": \"2016-08-04 08:59:13\" },\n\t{ \"post_id\": 10643, \"topic_id\": 2603, \"forum_id\": 8, \"post_subject\": \"Re: Passing stored input value via command line parameter\", \"username\": \"schen\", \"post_text\": \"Here is an example of running KEL generated ECL:\\nKEL query:\\nQUERY: Show(INTEGER page) <= Person(Age>page);
\\n\\nCommand line1:\\necl run thor -Xpage=40 C:\\\\test\\\\RS_Show.ecl -I C:\\\\test\\\\
\\nResult1:\\n<Result>\\n<Dataset name='Result'>\\n <Row><uid>1</uid><__uid_flags>0</__uid_flags><name_>DAVID</name_><__name__flags>0</__name__flags><age_>50</age_><__age__flags>0</__age__flags><__queryid>1</__queryid><__recordcount>0</__recordcount></Row>\\n <Row><uid>3</uid><__uid_flags>0</__uid_flags><name_>HELEN</name_><__name__flags>0</__name__flags><age_>46</age_><__age__flags>0</__age__flags><__queryid>1</__queryid><__recordcount>0</__recordcount></Row>\\n <Row><uid>7</uid><__uid_flags>0</__uid_flags><name_>FRED</name_><__name__flags>0</__name__flags><age_>70</age_><__age__flags>0</__age__flags><__queryid>1</__queryid><__recordcount>0</__recordcount></Row>\\n</Dataset>\\n</Result>
\\n\\nCommand line2:\\necl run thor -Xpage=60 C:\\\\test\\\\RS_Show.ecl -I C:\\\\test\\\\
\\nResult2:\\n<Result>\\n<Dataset name='Result'>\\n <Row><uid>7</uid><__uid_flags>0</__uid_flags><name_>FRED</name_><__name__flags>0</__name__flags><age_>70</age_><__age__flags>0</__age__flags><__queryid>1</__queryid><__recordcount>0</__recordcount></Row>\\n</Dataset>\\n</Result>
\\n\\nHTH,\\n\\nShawn\", \"post_time\": \"2016-08-16 15:12:25\" },\n\t{ \"post_id\": 10603, \"topic_id\": 2603, \"forum_id\": 8, \"post_subject\": \"Re: Passing stored input value via command line parameter\", \"username\": \"vin\", \"post_text\": \"Thanks, Richard. That is good to know.\\n\\nAfter I posted, I remembered that I used the -X parameter successfully before. So I continued to try. I now have it working. But I do not know what has changed.\\n\\nI am still learning HPCC and (too) many of the operations still appear as magic to me. \\n\\nThanks,\\n+vince\", \"post_time\": \"2016-08-15 16:51:25\" },\n\t{ \"post_id\": 10593, \"topic_id\": 2603, \"forum_id\": 8, \"post_subject\": \"Re: Passing stored input value via command line parameter\", \"username\": \"JimD\", \"post_text\": \"Here is an example:\\n\\nFor this ECL code (saved as runMe.ecl):\\n\\n\\na:= '42' :STORED('a');\\na;\\n
\\n\\nI run this on command line:\\n\\necl run thor -Xa='hello world' runMe.ecl
\\n\\nthe result is:\\n\\n<Result>\\n<Dataset name='Result 1'>\\n <Row><Result_1>hello world</Result_1></Row>\\n</Dataset>\\n</Result>\\n
\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2016-08-15 16:15:31\" },\n\t{ \"post_id\": 10583, \"topic_id\": 2603, \"forum_id\": 8, \"post_subject\": \"Re: Passing stored input value via command line parameter\", \"username\": \"rtaylor\", \"post_text\": \"vince,\\n\\nThe STORED workflow service is the mechanism by which SOAP calls can pass runtime parameter values to a workunit. STORED opens a named space in the workunit, and that named space can then receive the passed parameter value from SOAP. It is used in Roxie queries.\\n\\nThe doc you reference simply indicates that you can also pass those parameters via the ecl.exe command line instead of using SOAP. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-08-15 15:25:49\" },\n\t{ \"post_id\": 10573, \"topic_id\": 2603, \"forum_id\": 8, \"post_subject\": \"Passing stored input value via command line parameter\", \"username\": \"vin\", \"post_text\": \"According to "The ECL IDE and HPCC Client Tools" pg 60 and the ecl command help messages stored input values can be passed via the command line.\\n\\nHere is the relevant text from command help:\\n$ ecl help run\\n\\nUsage:\\n[...snip...]\\n -X<name>=<value> sets the stored input value (stored('name'))\\n[...snip...]\\n
\\n\\nI get the same results for the following command line invocations (examples are simplified and stylized from actual commands):\\n\\n$ ecl run thor RS_Query1.ecl\\n$ ecl run thor -X<valid_name>=<valid_value> RS_Query1.ecl\\n$ ecl run thor -X<valid_name>=<invalid_value> RS_Query1.ecl\\n$ ecl run thor -X<invalid_name>=<invalid_value> RS_Query1.ecl\\n
\\n\\nvalid_name is a name defined by STORED. ex: STORED('bucket').\\n\\nThe above make me think I do not understand how to use this option. Can anyone enlighten me?\\n\\nBTW, this is KEL-generated ECL. The stored name is from a KEL query, ie: "QUERY: Foo(name) <= ...;"\\n\\nThanks,\\n+vince\", \"post_time\": \"2016-08-15 13:56:25\" },\n\t{ \"post_id\": 10733, \"topic_id\": 2623, \"forum_id\": 8, \"post_subject\": \"Re: How to know the existing distribution of files\", \"username\": \"balajisampath\", \"post_text\": \"Thank You Richard,\\n\\nI was referring to HASH, RANDOM, SKEW distributions\", \"post_time\": \"2016-08-18 20:12:23\" },\n\t{ \"post_id\": 10713, \"topic_id\": 2623, \"forum_id\": 8, \"post_subject\": \"Re: How to know the existing distribution of files\", \"username\": \"rtaylor\", \"post_text\": \"balajisampath,Is there any way to find on which column(s) the file was distributed?
Not that I am aware of. But if you need to have the data in a particular distribution, just use DISTRIBUTE in your ECL code. If the data is already distributed in the manner you require, then it will essentially be a no-op (probably not quite, but it should do very little work at all given there's really nothing to do). And, if the data is NOT already distributed in the manner you require, then it will simply perform the required distribution for you.and also what kind of distribution?
I'm not sure what you're asking for here. Please elaborate.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-08-18 17:59:16\" },\n\t{ \"post_id\": 10693, \"topic_id\": 2623, \"forum_id\": 8, \"post_subject\": \"How to know the existing distribution of files\", \"username\": \"balajisampath\", \"post_text\": \"Is there any way to find on which column(s) the file was distributed? and also what kind of distribution?\", \"post_time\": \"2016-08-18 15:33:44\" },\n\t{ \"post_id\": 10723, \"topic_id\": 2633, \"forum_id\": 8, \"post_subject\": \"Re: Parsing a PDF file\", \"username\": \"rtaylor\", \"post_text\": \"John,\\n\\nWhile it may be possible to do this in ECL (you can look at the PARSE function to get an idea of how it might be accomplished), I would begin with this question: Is ECL as it exists today the right tool for extracting text data from PDF files in your circumstance?\\n\\nA quick Google search shows me that there are a very large number of APIs and SDKs already built to handle PDF documents. And they're all available at the click of a mouse (and maybe a credit card ). This page may be helpful to your research: http://okfnlabs.org/blog/2016/04/19/pdf-tools-extract-text-and-data-from-pdfs.html\\n\\nSo, there are really two ways to go with this:
Either way, you can automate the sprays by using either DFUplus.exe or our standard library spray/despray functions. That's exactly what they're designed to do.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-08-18 18:42:26\" },\n\t{ \"post_id\": 10703, \"topic_id\": 2633, \"forum_id\": 8, \"post_subject\": \"Parsing a PDF file\", \"username\": \"John Meier\", \"post_text\": \"I have a situation where the data I need to ingest into the system is in a PDF document. Currently, the file is (A) manually downloaded and then (B) opened in a program like Adobe Acrobat Pro, then (C) saved as a text file. The text file is then (D) manually manipulated and saved as a (E) pipe-delimited file, which can then (F) be sprayed to THOR and (G) processed.\\n\\nI would still have to (A) manually download the file, but I think I can (B) ingest the file into THOR as a binary BLOB. The issue then becomes one of parsing the binary data. I have searched the documentation but I haven't found anything on parsing binary data in a single file. Has anyone done this?\\n\\nI thought of running the entire field through a "translator"-type function where every 8-bits is translated into some ASCII character. Once I have that output, I could then look for any field "delimiters" so I could extract the data into the appropriate record layout to (F) write out (with a DISTRIBUTE).\\n\\nAm I thinking this out correctly? Can anyone point me to some example code / documentation that I could reference?\\n\\nThank You.\", \"post_time\": \"2016-08-18 17:38:00\" },\n\t{ \"post_id\": 10763, \"topic_id\": 2643, \"forum_id\": 8, \"post_subject\": \"Re: How to continue processing items in a sequential stateme\", \"username\": \"rtaylor\", \"post_text\": \"gouldbrfl ,\\n\\nThe SEQUENTIAL action is designed to run jobs where each successive action depends on the successful completion of each previous action. If your actions are truly independent of each other, then you can simply NOT use SEQUENTIAL, like this:attribute('state','date');\\nattritute('state','date');\\nattribute('state','date');
Each action will run separately and independently. Depending on exactly what they're doing, the compiler may even run them in parallel.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-08-19 14:48:50\" },\n\t{ \"post_id\": 10743, \"topic_id\": 2643, \"forum_id\": 8, \"post_subject\": \"How to continue processing items in a sequential statement\", \"username\": \"gouldbrfl\", \"post_text\": \"I have a situation where a ksh script builds a file to be submitted to thor based on the daily files that are landed. The statement looks something like\\nsequential(\\nattribute('state','date'),\\nattritute('state','date'),\\nattribute('state','date')\\n);\\n\\nEach one of these attribute statements are really standalone. If there is a failure on one or more of these, let the others process?\", \"post_time\": \"2016-08-19 12:14:51\" },\n\t{ \"post_id\": 10803, \"topic_id\": 2653, \"forum_id\": 8, \"post_subject\": \"Re: Dataset append syntax\", \"username\": \"rtaylor\", \"post_text\": \"wjblack,\\n\\nPer the docs:+ Append all records from both files, independent of any order \\n& Append all records from both files, maintaining record order on each node
I don't see where either of these would be "more optimal" than the other.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-08-19 19:01:18\" },\n\t{ \"post_id\": 10753, \"topic_id\": 2653, \"forum_id\": 8, \"post_subject\": \"Dataset append syntax\", \"username\": \"wjblack\", \"post_text\": \"I'm fairly new to the ECL syntax and am not able to find the differences in using a '&' verses a '+' to append a dataset in the below fashion. I've been told that using '&' was more optimal but didn't receive information why it is.\\t\\n\\nexport Results := project(DataSetA & \\n DataSetB & \\n DataSetC &\\n DataSetD,\\n TRANSFORM(\\n NewLayout,\\n self:=left\\n ));\\n\\nexport Results := project(DataSetA +\\n DataSetB + \\n DataSetC +\\n DataSetD,\\n TRANSFORM(\\n NewLayout,\\n self:=left\\n ));\", \"post_time\": \"2016-08-19 12:18:21\" },\n\t{ \"post_id\": 11293, \"topic_id\": 2683, \"forum_id\": 8, \"post_subject\": \"Re: Regarding ECL syntax question\", \"username\": \"FanFei\", \"post_text\": \"Interesting!!! I tried a similar syntax before, but it was not working. Lots of thanks for providing the sample code!\", \"post_time\": \"2016-09-16 15:06:27\" },\n\t{ \"post_id\": 11243, \"topic_id\": 2683, \"forum_id\": 8, \"post_subject\": \"Re: Regarding ECL syntax question\", \"username\": \"omnibuzz\", \"post_text\": \"And then there is your approach that seems to work too which I didn't expect to be honest.\\n
\\nConstant1 := MODULE\\n EXPORT STRING a := 'a1';\\n EXPORT STRING b := 'b1';\\nEND;\\n\\nConstant2 := MODULE\\n EXPORT STRING a := 'a2';\\n EXPORT STRING b := 'b2';\\nEND;\\n\\n\\nConstant := IF(TRUE,Constant1,Constant2);\\nConstant.b;\\n
\", \"post_time\": \"2016-09-15 16:09:08\" },\n\t{ \"post_id\": 11233, \"topic_id\": 2683, \"forum_id\": 8, \"post_subject\": \"Re: Regarding ECL syntax question\", \"username\": \"omnibuzz\", \"post_text\": \"There are many ways to accomplish it\\n1. You can use an interface structure\\nI1 := INTERFACE\\n\\tEXPORT STRING a;\\n\\tEXPORT STRING b;\\nEND;\\n\\nConstant1 := MODULE(I1)\\n\\tEXPORT STRING a := 'a1';\\n\\tEXPORT STRING b := 'b1';\\nEND;\\n\\nConstant2 := MODULE(I1)\\n\\tEXPORT STRING a := 'a2';\\n\\tEXPORT STRING b := 'b2';\\nEND;\\n\\n\\nGetImplementation(I1 C) := c;\\n\\nMyImp1 := GetImplementation(Constant1);\\nMyImp1.a;\\nMyImp2 := GetImplementation(Constant2);\\nMyImp2.a;\\n
\\n\\n2. Or you can use a function macro without the interface structure\\n\\nConstant1 := MODULE\\n\\tEXPORT STRING a := 'a1';\\n\\tEXPORT STRING b := 'b1';\\nEND;\\n\\nConstant2 := MODULE\\n\\tEXPORT STRING a := 'a2';\\n\\tEXPORT STRING b := 'b2';\\nEND;\\n\\n\\nGetImplementation(a) := FUNCTIONMACRO\\n\\tRETURN a;\\nENDMACRO;\\n\\nMyImp1 := GetImplementation(Constant1);\\nMyImp1.a;\\nMyImp2 := GetImplementation(Constant2);\\nMyImp2.a;\\n
\\n\\nPersonally, I would go with option 1 because, you know, macros..\\nBy the way, you can use the inheritence for record structure too, though I don't know why you should. \\nCheers\\nSrini\", \"post_time\": \"2016-09-15 15:28:24\" },\n\t{ \"post_id\": 11023, \"topic_id\": 2683, \"forum_id\": 8, \"post_subject\": \"Re: Regarding ECL syntax question\", \"username\": \"FanFei\", \"post_text\": \"I see your point. Thanks for the reply!\", \"post_time\": \"2016-08-25 17:59:42\" },\n\t{ \"post_id\": 10863, \"topic_id\": 2683, \"forum_id\": 8, \"post_subject\": \"Re: Regarding ECL syntax question\", \"username\": \"rtaylor\", \"post_text\": \"Fan,\\n\\nSince you want your separate MODULE structures to contain the same members, the better way of accomplishing this is to just pass a parameter to a single MODULE structure, so that the return values change based on the passed parameter. Something like this:Mod(UNSIGNED x) := MODULE\\n EXPORT STRING10 Val1 := CASE(x,42 => 'ABC',43 => 'GHI','XYZ');\\n EXPORT STRING10 Val2 := IF(x=42,'DEF','JKL');\\nEND;\\n\\nds := DATASET([{42},{43},{44},{22}],{UNSIGNED1 x});\\n\\n{STRING10 Val1,STRING10 Val2} XF(ds L) := TRANSFORM\\n SELF.Val1 := Mod(L.x).Val1;\\n SELF.Val2 := Mod(L.x).Val2;\\nEND;\\t\\nPROJECT(ds,XF(LEFT));
Note that I'm using both CASE and IF as examples of how you might choose to determine the return value of each. You could also use CHOOSE, MAP, etc.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-08-22 19:20:01\" },\n\t{ \"post_id\": 10823, \"topic_id\": 2683, \"forum_id\": 8, \"post_subject\": \"Regarding ECL syntax question\", \"username\": \"FanFei\", \"post_text\": \"Hey, I have a question about ECL syntax. Is it possible to refer different modules based on different conditions? Here is my situation:\\n\\nI have Constants1 module, Constants2 module. They contains exactly the same variable names, let's say recordLimit. Due to some reasons, we don't want to combine these two modules into one. In the program, we need some statement like - \\n if(dataSource=X, Constants1.recordLimit, Constants2.recordLimit);
\\n\\nSince we have hundreds of variables in Constants1 and 2, is it possible to simplify this script? Can we have something like the following so I don't have to write hundreds of if statement in the script? \\n Constants := if(dataSource=X, Constants1, Constants2);
\\n\\nSo I can directly use\\n Constants.recordLimit
\\n\\nI tried some macro and functionmacro, but they don't work as expected. Any suggestion?\\n\\nThanks,\\n-Fan\", \"post_time\": \"2016-08-19 21:19:21\" },\n\t{ \"post_id\": 10903, \"topic_id\": 2703, \"forum_id\": 8, \"post_subject\": \"Re: Problems with renaming fields for a subset\", \"username\": \"rtaylor\", \"post_text\": \"Geoff,\\n\\nThe problem is one we actually discussed in class. \\n\\nWhen you OUTPUT a new dataset, it always uses the field names for the fields in the recordset that you're writing, and those go into the DFU metadata for the new file. So, if you want to rename fields, you need to rename them in the recordset before you do the OUTPUT. Take a look at my code for Intro Thor, exercise 6a, the BWR_RollupCSZ code and you'll see how I did it in that one.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-08-23 14:17:54\" },\n\t{ \"post_id\": 10883, \"topic_id\": 2703, \"forum_id\": 8, \"post_subject\": \"Problems with renaming fields for a subset\", \"username\": \"maching\", \"post_text\": \"Hi there. I am very new to this and come from a SQL background so please be gentle
\\n\\nI am having problems outputting a subset of data. I have a table with a record structure for example:\\n \\nOrganisationRec :=RECORD\\n unsigned3 Organisationid; \\n string100 OrganisationName;\\n string50 country;\\n string50 organisationtype;\\n END; \\nOrganisation := DATASET('~Training::gam::in::Organisation.csv',OrganisationRec,CSV(HEADING(1)));\\nThis links to a data set of a load of different companies.\\n\\nI create a table called ACManufacturer that is a filter of this dataset by organisation type ='MyType'\\nACManufacturer :=Table(Organisation(OrganisationType in ['Airframe']));\\n\\nI output this to a file \\n\\nOUTPUT(ACManufacturer,,'~training::gam::ACManufacturer',OVERWRITE,COMPRESSED);\\nI then create a record structure\\n\\nACmanufacturersRec :=RECORD\\n unsigned3 Manufacturerid; \\n string100 Manufacturer;\\n string50 country;\\n string50 organisationtype;\\n END;\\n \\n Amanufacturers := DATASET('~Training::gam::ACManufacturer',ACmanufacturersRec,THOR);\\n\\noutput(Amanufacturers,Named ('tests') );\\n\\nI get this error\\n\\nError: System error: 10124: Graph[55], diskread[56]: diskread: Layout does not match published layout. File: training::gam::acmanufacturer (0, 0), 10124, \\n\\nIf the structure inherits the fields is there a way to alias the field name or something. I am essentially creating a subset but I want to use the names manufacturer later on as I have several of these and with a join organisationname from the very first will stay but any other will not.\\n\\nany pointers for something obvious, most appreciated.\", \"post_time\": \"2016-08-23 10:24:31\" },\n\t{ \"post_id\": 11173, \"topic_id\": 2773, \"forum_id\": 8, \"post_subject\": \"Re: Check if a Workunit is running by name\", \"username\": \"dsette\", \"post_text\": \"Bob,\\n\\nThanks for your prompt response, I will try that.\\nIf we run the command using the ECL command line tools, then presumably the same applies. i.e. we should use the same version of the client tools as the cluster.\", \"post_time\": \"2016-09-09 16:18:17\" },\n\t{ \"post_id\": 11163, \"topic_id\": 2773, \"forum_id\": 8, \"post_subject\": \"Re: Check if a Workunit is running by name\", \"username\": \"bforeman\", \"post_text\": \"David,\\n\\nI think your approach is sound, but the error indicates a mismatch in version for that function.\\n\\nI tested your code on a 6.0.0 training cluster using the 6.0.4 ECL IDE and did not receive that error. It is possible that you may need to drop back to an earlier compiler version that matches the cluster that you are targeting.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2016-09-09 16:08:42\" },\n\t{ \"post_id\": 11153, \"topic_id\": 2773, \"forum_id\": 8, \"post_subject\": \"Check if a Workunit is running by name\", \"username\": \"dsette\", \"post_text\": \"We have a process that takes a few minutes to run, and we'd like to ensure that the process cannot be run more than once simultaneously.\\n\\nOur initial thinking is to perform a check first, to ensure that the workunit is not already running.\\nIf it is, then return an error (possibly using ASSERT).\\n\\nWe wanted to use the WorkUnitList function from STD to do the initial check, thus:\\n
OUTPUT(STD.System.Workunit.WorkUnitList('', NAMED state:='running', NAMED jobname := 'workunitname'));
\\nThe above gives the error:\\nError: Too many parameters passed to function WorkunitList (expected 13) (89, 59), 2061, C:\\\\Program Files (x86)\\\\HPCCSystems\\\\6.0.4\\\\clienttools\\\\share\\\\ecllibrary\\\\std\\\\system\\\\Workunit.ecl
\\nIs this an error due to how we call the function, or something else?\\nIs our proposed approach a valid one, or is there a better way to achieve what we're trying to do?\\n\\nThanks,\\nDavid\", \"post_time\": \"2016-09-09 10:57:35\" },\n\t{ \"post_id\": 11213, \"topic_id\": 2793, \"forum_id\": 8, \"post_subject\": \"Re: DEDUP Fails\", \"username\": \"omnibuzz\", \"post_text\": \"Thanks for the correction, Richard. I agree with you. I will create a JIRA. We either fix the code or document the behavior and throw a syntax error.\\nCheers\\nSrini\", \"post_time\": \"2016-09-15 11:30:00\" },\n\t{ \"post_id\": 11203, \"topic_id\": 2793, \"forum_id\": 8, \"post_subject\": \"Re: DEDUP Fails\", \"username\": \"rtaylor\", \"post_text\": \"Srini,\\n\\nSince it's an ASSERT in the code that's causing the error and its argument is "keepLeft || numToKeep == 1" I would suspect it's expected, but undocumented behavior.\\n\\nDefinitely time for a JIRA. \\n\\nBTW, I changed your example code to add the necessary SORT and a record to discard:
Layout_Person := RECORD\\n UNSIGNED1 PersonID;\\n STRING15 FirstName;\\n STRING25 LastName;\\nEND;\\n\\nallPeople := DATASET([ {1,'Fred','Smith'},\\n {2,'Joe','Smith'},\\n {3,'Joe','Blow'},\\n {4,'Jane','Smith'}],Layout_Person);\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t \\n\\nDEDUP(SORT(allPeople,LastName),LastName,KEEP 2, RIGHT);\\n//dies with this error:\\n//System error: 3000: Graph graph1[1], workunitwrite[5]: SLAVE #2 [10.173.248.3:20100]: assert(keepLeft || numToKeep == 1) failed ...
\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-09-14 14:19:55\" },\n\t{ \"post_id\": 11193, \"topic_id\": 2793, \"forum_id\": 8, \"post_subject\": \"DEDUP Fails\", \"username\": \"omnibuzz\", \"post_text\": \"Hi, \\n I am trying the following code and it fails. KEEP 2, LEFT works AND KEEP 1, RIGHT works too. \\n\\nLayout_Person := RECORD\\n UNSIGNED1 PersonID;\\n STRING15 FirstName;\\n STRING25 LastName;\\nEND;\\n\\nallPeople := DATASET([ {1,'Fred','Smith'},\\n {2,'Joe','Blow'},\\n {3,'Jane','Smith'}],Layout_Person);\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t \\n\\nDEDUP(allPeople,LastName,KEEP 2, RIGHT);\\n
\\n\\nThis is the error that I get\\n\\nError: System error: 3000: Graph[1], SLAVE #1 [XXX.XXX.XXX.XXX:XXXXX]: assert(keepLeft || numToKeep == 1) failed - file: /var/lib/jenkins/workspace/CE-Candidate-5.4.6-1/CE/centos-7.0-x86_64/HPCC-Platform/thorlcr/activities/rollup/throllupslave.cpp, line 298\\n
\\n\\nFrom the error it looks like you are not expecting KEEP > 1 for RIGHT. Is this a bug or expected behavior?\", \"post_time\": \"2016-09-14 01:27:29\" },\n\t{ \"post_id\": 11323, \"topic_id\": 2803, \"forum_id\": 8, \"post_subject\": \"Re: INDEX sort in descending order\", \"username\": \"omnibuzz\", \"post_text\": \"Thanks, Richard. That was what I tried too. It was just not in the spirit of the ECL language to jump through hoops I will create a JIRA and see what the team thinks.\\nRegards\\nSrini\", \"post_time\": \"2016-09-17 15:19:53\" },\n\t{ \"post_id\": 11283, \"topic_id\": 2803, \"forum_id\": 8, \"post_subject\": \"Re: INDEX sort in descending order\", \"username\": \"rtaylor\", \"post_text\": \"Srini,\\n\\nOK, here's how I would do that:
IMPORT TrainingYourName;\\n//first define and build the INDEX:\\nds := TrainingYourName.File_Persons.File(BirthDate <> '');\\nIDX := INDEX(ds,{INTEGER4 Bdate := -(INTEGER4)Birthdate },\\n {ds},'~RTTEST::DescendingIDX::IDX');\\n// BUILD(IDX);\\n\\n//then you can query the INDEX like this:\\nStartDate := '19000101'; //parameters passed to the query\\nEndDate := '20170101';\\nNegStart := -(INTEGER4)EndDate; //flip the parms to negatives\\nNegEnd := -(INTEGER4)StartDate; //and flip their order\\nCHOOSEN(IDX(Bdate BETWEEN NegStart AND NegEnd),100);
My code was written for my training data, but the principle is applicable to your situation.\\n\\nI first built my descending INDEX with negative numeric date values, so an ascending sort of the key field is actually a descending sort of the source date field records.\\n\\nThen you see I wrote my query code to simply take the supplied parameters (assuming they're coming in with the same string format as the data) and flip them both by sign and start/end specification.\\n\\nSo the CHOOSEN will just get the first 100 records for this test case, and should work perfectly well in every other case.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-09-16 14:47:24\" },\n\t{ \"post_id\": 11263, \"topic_id\": 2803, \"forum_id\": 8, \"post_subject\": \"Re: INDEX sort in descending order\", \"username\": \"omnibuzz\", \"post_text\": \"Richard - Here is my requirement.\\nLet's say I have a billion records index where the key is a timestamp.\\nNow index defintion is {timestamp},{payload}.\\n\\nIn my roxie query, I accept a start date and end date and get the top 100 records starting from end date, backwards.\\n\\nNow, I can give the start date as 1900-01-01 and end date as 2017-01-01. In which case I would need to just take the latest 100 records. With the index in the ascending order, I will match the entire 1 billion records with the search and then I will do a CHOOSEN of 100. It seems to be an inefficient way. Do you have any idea on how I go about handling it. If you feel it's a problem, then I will raise a JIRA.\\nThanks for the help.\\nRegards\\nSrini\", \"post_time\": \"2016-09-15 20:19:20\" },\n\t{ \"post_id\": 11253, \"topic_id\": 2803, \"forum_id\": 8, \"post_subject\": \"Re: INDEX sort in descending order\", \"username\": \"rtaylor\", \"post_text\": \"Srini,\\n\\nI don't believe there is any descending sort option for INDEX key fields. The BUILD action is the sort order creator, and I see no options that would do that.\\n\\nAn INDEX always has a binary tree so it can get to the individual "leaf" node records containing either the record pointer or the payload data. So the binary tree is built on the sort order of the key fields so that it can be easily traversed to find the exact "leaf" nodes required. And I would expect that all that binary tree traversal code is written so that it expects the key fields to be in ascending order. \\n\\nSo, if you need the return results from your INDEX to be sorted in a descending order, then you can simply use SORT.\\n\\nAlternatively, you can submit a JIRA asking for descending order INDEXes.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-09-15 17:16:47\" },\n\t{ \"post_id\": 11223, \"topic_id\": 2803, \"forum_id\": 8, \"post_subject\": \"INDEX sort in descending order\", \"username\": \"omnibuzz\", \"post_text\": \"Hi - I feel stupid that I don't know the answer to this till now. How do I sort the key field in descending order? I tried using -Keyfield in the index def. It works fine and compiles, but it essentially flipped the sign for numeric fields and automatically type-casted to integer8. It works in theory, but now, I need to flip the sign of input parameters and do my search and flip the sign of output. I am sure there is a simpler way that I am missing. \\nThanks for the help.\\nRegards\\nSrini\", \"post_time\": \"2016-09-15 11:50:18\" },\n\t{ \"post_id\": 11313, \"topic_id\": 2813, \"forum_id\": 8, \"post_subject\": \"Re: Dedup and retain the input order\", \"username\": \"ksviswa\", \"post_text\": \"Thanks a lot\\n\\nRegards,\\nViswa\", \"post_time\": \"2016-09-16 21:30:28\" },\n\t{ \"post_id\": 11303, \"topic_id\": 2813, \"forum_id\": 8, \"post_subject\": \"Re: Dedup and retain the input order\", \"username\": \"rtaylor\", \"post_text\": \"Viswa,\\n\\nHere's the way I would do it:IMPORT Std;\\nUpperIt(STRING s) := Std.Str.toUpperCase(s);\\nInDS := DATASET([{'Hello'},{'world'},{'HPCC'},{'Thor'},\\n {'Hello'},{'World'},{'HPCC'},{'THOR'},\\n {'Roxie'},{'Roxie'},{'ECL'},{'ECL'}],\\n {STRING10 word});\\n\\t\\t\\nDupRec := {UNSIGNED RecID,STRING10 word};\\nDupRecs := PROJECT(InDS,TRANSFORM(DupRec,SELF.RecID := COUNTER,SELF := LEFT));\\n\\nDistinct := DEDUP(SORT(DupRecs,UpperIt(word)),UpperIt(LEFT.word) = UpperIt(RIGHT.word));\\nSORT(Distinct,RecID);
Because DEDUP requires the SORT for most efficient operation, the way to do it is number the input records (PROJECT), SORT them for the DEDUP, the re-sort the results using the record identifier from the PROJECT.\\n\\nI added the UpperIt() function so your "World" and "world" records would match as you showed in your expected results.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-09-16 15:32:13\" },\n\t{ \"post_id\": 11273, \"topic_id\": 2813, \"forum_id\": 8, \"post_subject\": \"Dedup and retain the input order\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nIs there any way where we could retain the input order after a dedup ? Tried using STABLE and ORDERED but didnt get the result as expected.\\n\\nSay for example :\\n\\nInput dataset : \\n\\n\\nHello\\nworld\\nHPCC\\nThor\\nHello\\nWorld\\nHPCC\\nTHOR\\nRoxie\\nRoxie\\nECL\\nECL\\n
\\n\\nI want the output as :\\n\\n\\nHello\\nworld\\nHPCC\\nThor\\nRoxie\\nECL\\n
\\n\\nI could do a dedup with option all and get all the unique values but the output order is not the same as the input, also these are strings so no specific way to sort and then dedup.\\n\\nAny suggestions..?\\n\\nRegards,\\nViswa\", \"post_time\": \"2016-09-16 04:05:11\" },\n\t{ \"post_id\": 11483, \"topic_id\": 2833, \"forum_id\": 8, \"post_subject\": \"Re: Query regarding setting threshold for SALT search.\", \"username\": \"bforeman\", \"post_text\": \"Hi Akhilesh,\\n\\nIn the future, please post any SALT related questions to our SALT forum.\\n\\nThank you!\\n\\nBob\", \"post_time\": \"2016-09-27 19:06:39\" },\n\t{ \"post_id\": 11343, \"topic_id\": 2833, \"forum_id\": 8, \"post_subject\": \"Query regarding setting threshold for SALT search.\", \"username\": \"akhileshbadhri\", \"post_text\": \"Hello,\\n\\nI wish to understand, what would be a good way to set a threshold for SALT searching. I am using external linking in SALT searching and the generation docs has the following two values:\\n\\nRecommended matching threshold 34\\nSearch Threshold set at 13\\n\\nI am using the last name, first name and date of birth while searching for a person. The specificity of some last names is low. Because of this low specificity, the weight assigned to the search result is less and hence these results are excluded from the final SALT output.\\n\\nI have tried making this threshold to zero and then applying the threshold limits manually in a separate code and filtering the records for the final SALT output. But even here, I am not able to specify a suitable value for the threshold limits for scenarios where the specificity of some field values is less.\\n\\nCould you please help me with some tips to set a good threshold or by any chance could I use the total specificity file generated for the data to set the threshold ?\\n\\nThanks and regards,\\nAkhilesh Badhri.\", \"post_time\": \"2016-09-22 12:35:04\" },\n\t{ \"post_id\": 12163, \"topic_id\": 2991, \"forum_id\": 8, \"post_subject\": \"Re: Logical Operators - OR not short-circuiting\", \"username\": \"rtaylor\", \"post_text\": \"David,\\n\\nIt doesn't hurt to add a new JIRA. If it's a duplicate issue, that will get flagged in your new JIRA and you will then be able to start watching the original report's JIRA.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-10-19 13:16:52\" },\n\t{ \"post_id\": 12133, \"topic_id\": 2991, \"forum_id\": 8, \"post_subject\": \"Re: Logical Operators - OR not short-circuiting\", \"username\": \"dsette\", \"post_text\": \"Thanks for your response Richard.\\nIn this case, your getSubCount definition is an Action, and the compiler executing all actions in a condition is a known issue.
\\nIs there already a JIRA item for this, or should I raise one?\\n\\nI take on board your points regarding the deletion of superfiles. \\nThe example I provided was simply ensuring that the file was not present before demonstrating the issue of the IF statement.\\n\\nThanks for your help,\\nDavid\", \"post_time\": \"2016-10-19 10:11:57\" },\n\t{ \"post_id\": 12061, \"topic_id\": 2991, \"forum_id\": 8, \"post_subject\": \"Re: Logical Operators - OR not short-circuiting\", \"username\": \"rtaylor\", \"post_text\": \"dsette,\\n\\nIn this case, your getSubCount definition is an Action, and the compiler executing all actions in a condition is a known issue.\\n\\nThis example works perfectly as described:f(Tval, Fval) := IF(Tval = 1 OR Fval = 1,'True value','False value');\\n\\nf(1,0); //True value\\nf(0,1); //True value\\nf(0,0); //False value\\nf(1,1); //True value
\\nBTW, a superfile should never need to be deleted. It should be created once, then used and re-structured as needed from then on. There is no necessity to periodically delete and re-create them. \\n\\nIf you want to test if a superfile has been mysteriously deleted by someone, then I suggest using an ASSERT to handle that situation, something like this:ASSERT(STD.File.FileExists(superFileName),superFileName + ' does not exist',FAIL);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-10-17 17:15:19\" },\n\t{ \"post_id\": 11971, \"topic_id\": 2991, \"forum_id\": 8, \"post_subject\": \"Logical Operators - OR not short-circuiting\", \"username\": \"dsette\", \"post_text\": \"The documentation indicates that the OR logical operator should short-circuit:\\nhttp://cdn.hpccsystems.com/releases/CE-Candidate-6.0.6/docs/ECLLanguageReference-6.0.6-1.pdf#logical%20operatorsIf the probability of occurrence is known,\\nyou should order them from the most likely to occur to the least likely to occur, because once any part of a compound\\nOR condition evaluates to TRUE, the remainder of the expression is bypassed.
Unless I am mistaken, this is not behaving as expected. \\nWhen it has to evaluate an expression that returns TRUE, it seems to continue to evaluate the next expression after the OR.\\nIt seems that for a hard-coded value of TRUE, it works as expected.\\nAm I doing something wrong or misunderstanding the code/documentation?\\n\\nConsider the below code:\\nIMPORT STD;\\n\\nsuperFileName\\t:= 'temp::superFile';\\nfileName\\t\\t\\t:= 'temp::regularFile';\\n\\nreturnsTrue\\t\\t:= ~STD.File.FileExists(fileName, TRUE); // File does not exist, so will return true as expression is negated\\ngetSubCount\\t\\t:= NOTHOR(STD.File.GetSuperFileSubCount(superFileName)) > 0; // "Could not locate superfile: thor::nonExistent"\\n\\ndeleteFile\\t\\t\\t:= STD.File.DeleteLogicalFile(fileName);\\ndeleteSuperFile\\t:= STD.File.DeleteSuperFile(superFileName);\\n\\nSEQUENTIAL(\\n\\tdeleteFile,\\n\\tdeleteSuperFile,\\n\\tOUTPUT(returnsTrue), // true\\n\\tOUTPUT(IF ((TRUE OR getSubCount), 'true', 'false')), // 'true'\\n\\tOUTPUT(IF ((returnsTrue OR getSubCount), 'true', 'false')), // "Could not locate superfile: thor::temp::superFile"\\n);
\", \"post_time\": \"2016-10-14 12:01:08\" },\n\t{ \"post_id\": 12121, \"topic_id\": 3001, \"forum_id\": 8, \"post_subject\": \"Re: Breaking loops dependent on query results\", \"username\": \"rtaylor\", \"post_text\": \"ricardoleon & mansfield_bitter,\\n\\nYes, I have come up against this issue a number of times myself.\\n\\nFirst, please submit a JIRA ticket to add the feature to allow the Template Language to use "non-constant" input (by submitting yourself, you are automatically included in the developer's "conversation" about the issue).\\n\\nThe only way I have been able to work around this is to use the "drilldown" technique illustrated in the attached code. This code is a "garbage" data generation tool to create test data. \\n\\nThis .zip file contains a .mod file that you simply open in the ECL IDE to automatically install the .ecl files into your repository into "DataGen" and "drilldown" modules. The DataGen.__readme.ecl file contains all the information on how to use it (which also explains the technique to work around this Template Language limitation), and the drilldown.fldtype.ecl file contains the actual Template code.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-10-18 14:19:45\" },\n\t{ \"post_id\": 12111, \"topic_id\": 3001, \"forum_id\": 8, \"post_subject\": \"Re: Breaking loops dependent on query results\", \"username\": \"ricardoleon\", \"post_text\": \"We are trying to generate code for a repetitive task we have. Basically we need to create multiple blocks of ECL code with different parameters, based on static configuration sets. We basically are trying to implement a 'batching' approach. For example:\\n\\ntaskName := ['a','b','c'];\\nnumberOfBatches := ['10', '20', '30'];\\nbatchSize := 10;\\n\\nSo for task 'a' we need to execute certain code 10 times, for 'b' 20 times and for 'c' 30 times. \\n\\nSo our expected output would be something like:\\n\\n\\n\\nDoSomething('a', batchSize);\\nDoSomething('a', batchSize);\\n.\\n.\\n.\\nDoSomething('a', batchSize); //-- 10 times\\nDoSomething('b', batchSize);\\nDoSomething('b', batchSize);\\n.\\n.\\n.\\nDoSomething('b', batchSize); //-- 20 times\\n\\nDoSomething('c', batchSize);\\nDoSomething('c', batchSize);\\n.\\n.\\n.\\nDoSomething('c', batchSize); //-- 30 times\\n\\n\\nThis code is generated by using the template language. We would like this to be a bit more flexible that having static declared configuration and we would like to use a proper dataset as input (as declared below) but we bump into the issue originally mentioned in this post.\\n\\nmyRecord := RECORD\\n STRING taskName,\\n INTEGER numberOfBatches,\\n INTEGER batchSize\\nEND;\", \"post_time\": \"2016-10-18 13:40:26\" },\n\t{ \"post_id\": 12091, \"topic_id\": 3001, \"forum_id\": 8, \"post_subject\": \"Re: Breaking loops dependent on query results\", \"username\": \"mansfield_bitter\", \"post_text\": \"It's come up a couple of times in the team. I was trying to do some SALT hacking and wanted it to cease iterating if certain conditions were met and my colleague was trying to create multiple blocks of ECL code with different parameters, based on static configuration sets. We basically are trying to implement a 'batching' approach. For example:\\n\\n\\ntaskName := ['a','b','c'];\\nnumberOfBatches := ['10', '20', '30'];\\nbatchSize := 10;\\n
\\n\\nSo for task 'a' we need to execute certain code 10 times, for 'b' 20 times and for 'c' 30 times. Our expected output would be something like:\\n\\n\\nDoSomething('a', batchSize);\\nDoSomething('a', batchSize);\\n.\\n.\\n.\\nDoSomething('a', batchSize); //-- 10 times\\nDoSomething('b', batchSize);\\nDoSomething('b', batchSize);\\n.\\n.\\n.\\nDoSomething('b', batchSize); //-- 20 times\\n\\nDoSomething('c', batchSize);\\nDoSomething('c', batchSize);\\n.\\n.\\n.\\nDoSomething('c', batchSize); //-- 30 times\\n
\\n\\nWe would prefer this to be a bit more flexible than having a static declared configuration and would like to use a proper dataset as input (as declared below) but we bump into the issue originally mentioned in this ticket.\\n\\n\\nmyRecord := RECORD\\n STRING taskName,\\n INTEGER numberOfBatches,\\n INTEGER batchSize\\nEND;\\n
\", \"post_time\": \"2016-10-18 08:07:52\" },\n\t{ \"post_id\": 12031, \"topic_id\": 3001, \"forum_id\": 8, \"post_subject\": \"Re: Breaking loops dependent on query results\", \"username\": \"rtaylor\", \"post_text\": \"mansfield_bitter,\\n\\nYour problem is that the Template Language is a meta-language designed to take in XML text and generate ECL code from that. Therefore, it expects the XML input string is a constant value.\\n\\nWhat you appear to be trying is to do "procedural coding" in ECL by using the Template Language. That will not work.\\n\\nSo, given that you must use a non-procedural approach in ECL, what exactly are you trying to accomplish?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-10-17 16:16:47\" },\n\t{ \"post_id\": 12011, \"topic_id\": 3001, \"forum_id\": 8, \"post_subject\": \"Breaking loops dependent on query results\", \"username\": \"mansfield_bitter\", \"post_text\": \"We are trying to write a loop macro with a break function. Basically we would like to repeat a certain action until X (derived from a Dataset) is true. The compiler doesn't seem to like this approach and returns:\\n'Constant expression expected'
\\n\\nWe were wondering if there is a known workaround, please? \\n\\nAn entirely artificial example is provided below, called from a BWR window by:\\n LoopFunction(5);
\\n\\nEXPORT LoopFunction(NMax = 5) := MACRO\\n \\n Rec := RECORD\\n INTEGER i;\\n END;\\n\\n\\t#DECLARE(i);\\n \\n\\n OUTPUT(DATASET([1], REC), ,'~TEMP::MB::LOOPTEST' + %i%, COMPRESSED, OVERWRITE);\\n\\n\\t#SET(i, 2);\\n\\n shouldIbreak := DATASET('~TEMP::MB::LOOPTEST' + (%i% - 1), Rec, THOR);\\n\\t\\n #LOOP\\n\\n OUTPUT(shouldIbreak +DATASET([%i%], REC), ,'~TEMP::MB::LOOPTEST' + %i%, COMPRESSED, OVERWRITE);\\n\\n #SET (i, %i%+1);\\t\\n \\n\\t\\t#IF (COUNT(shouldIbreak) > Nmax);\\n #BREAK\\n\\t\\t#END\\n\\t#END\\n\\nENDMACRO;\\n
\", \"post_time\": \"2016-10-17 13:48:03\" },\n\t{ \"post_id\": 12303, \"topic_id\": 3043, \"forum_id\": 8, \"post_subject\": \"Re: stored value causing both conditional persists to build\", \"username\": \"gmarcan\", \"post_text\": \"Thanks Richard,\\n\\nI have noted this on Jira: https://track.hpccsystems.com/browse/HPCC-16515\\n\\nGabriel\", \"post_time\": \"2016-10-21 14:25:01\" },\n\t{ \"post_id\": 12293, \"topic_id\": 3043, \"forum_id\": 8, \"post_subject\": \"Re: stored value causing both conditional persists to build\", \"username\": \"rtaylor\", \"post_text\": \"gmarcan,Is this by design, or a bug?
Not a bug, per se, but a known issue in regard to actions. \\n\\nExtending that behavior to PERSISTed definitions when the boolean condition is STORED is new to me, though. It appears that your PERSIST is acting like an action, but only when STORED is present. \\n\\nYou should report this in JIRA.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-10-21 14:06:44\" },\n\t{ \"post_id\": 12283, \"topic_id\": 3043, \"forum_id\": 8, \"post_subject\": \"stored value causing both conditional persists to build\", \"username\": \"gmarcan\", \"post_text\": \"Hi,\\n\\nThe following code works as expected. The compiler chooses a single branch, and runs 2 graphs:\\n\\n#STORED('var1',true);\\nboolean test_var := false ;//:stored('var1');\\noutput(test_var);\\ndo_if_true := dataset([{'It is true..'}],{string s}): persist('persist::test::stored::is_true');\\ndo_if_false:= dataset([{'It is false..'}],{string s}): persist('persist::test::stored::is_false');\\nres1 := if(test_var, do_if_true, do_if_false) ;\\nres1;
\\n\\nHowever the following variaion, while the result is correct, causes both persists files to build which causes all 3 graphs to run:\\n\\n#STORED('var1',true);\\nboolean test_var := false :stored('var1');\\noutput(test_var);\\ndo_if_true := dataset([{'It is true.'}],{string s}): persist('persist::test::gmarcan::is_true');\\ndo_if_false:= dataset([{'It is false.'}],{string s}): persist('persist::test::gmarcan::is_false');\\nres1 := if(test_var, do_if_true, do_if_false) ;\\nres1;
\\n\\nIs this by design, or a bug? Is there a way around causing the second branch to build? My current workaround is using #IF #ELSE #END. (Using ECLAGENT build internal_5.6.8-1, Server 5.6.6-1)\", \"post_time\": \"2016-10-20 18:11:15\" },\n\t{ \"post_id\": 13053, \"topic_id\": 3093, \"forum_id\": 8, \"post_subject\": \"Re: Equivalent of IsNumeric in 2016?\", \"username\": \"rtaylor\", \"post_text\": \"Oscar,So there is no native IsNumeric function in HPCC... yet.
No, but you're welcome to submit a JIRA to have that function added -- and I'm told it's more likely to get added if you attach the new function's C++ code to the JIRA ticket so all they have to do is approve the code and add it to the next build. HPCC/ECL is, after all, Open Source! \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-11-09 14:50:56\" },\n\t{ \"post_id\": 13023, \"topic_id\": 3093, \"forum_id\": 8, \"post_subject\": \"Re: Equivalent of IsNumeric in 2016?\", \"username\": \"oscar.foley\", \"post_text\": \"So there is no native IsNumeric function in HPCC... yet.\\n\\nI will use one of your ECL IsNumeric functions. \\n\\nThanks for those
\", \"post_time\": \"2016-11-09 10:03:54\" },\n\t{ \"post_id\": 12643, \"topic_id\": 3093, \"forum_id\": 8, \"post_subject\": \"Re: Equivalent of IsNumeric in 2016?\", \"username\": \"rtaylor\", \"post_text\": \"Or even:
IsNumeric(STRING n) := n = (STRING)(DECIMAL)n;
\", \"post_time\": \"2016-10-31 12:17:59\" },\n\t{ \"post_id\": 12633, \"topic_id\": 3093, \"forum_id\": 8, \"post_subject\": \"Re: Equivalent of IsNumeric in 2016?\", \"username\": \"gsmith\", \"post_text\": \"How about:\\n
\\nIsNumeric(STRING n) := FUNCTION\\n RETURN IF(n = (STRING)(DECIMAL)n, TRUE, FALSE);\\nEND;\\n
\", \"post_time\": \"2016-10-28 16:28:09\" },\n\t{ \"post_id\": 12473, \"topic_id\": 3093, \"forum_id\": 8, \"post_subject\": \"Re: Equivalent of IsNumeric in 2016?\", \"username\": \"rtaylor\", \"post_text\": \"Oscar,\\n\\nSimple fix:IsNumeric(STRING n) := FUNCTION\\n IsNotNumeric := Std.Str.Find(n,'.',2) <> 0;\\n NoPeriod := Std.Str.FindReplace(n,'.','');\\n RETURN IF(IsNotNumeric,FALSE,NoPeriod = (STRING)(INTEGER)NoPeriod);\\nEND;\\n\\nIsNumeric('41.0.05'); //false
\\nIf I knew of an existing function I would not have bothered writing this one. \\n\\nOf course, if you have a simple IsNumeric function written in C/C++ you could submit a JIRA ticket with that code asking for it to be added to the Standard Library.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-10-25 19:33:23\" },\n\t{ \"post_id\": 12453, \"topic_id\": 3093, \"forum_id\": 8, \"post_subject\": \"Re: Equivalent of IsNumeric in 2016?\", \"username\": \"oscar.foley\", \"post_text\": \"Thanks for your snippet. I am afraid it fails in cases like: \\n\\n
IsNumeric('41.0.05'); //true (it should be false)
\\n\\n\\nWhat I was wondering if is there any built-in function to avoid these bugs (probably my regex proposed function also fails in some edge cases).\\n\\nIt is like using TryParse in C# instead of programming my own IsNumeric function...\\n\\nThanks \\nOscar\", \"post_time\": \"2016-10-25 17:56:08\" },\n\t{ \"post_id\": 12423, \"topic_id\": 3093, \"forum_id\": 8, \"post_subject\": \"Re: Equivalent of IsNumeric in 2016?\", \"username\": \"rtaylor\", \"post_text\": \"Oscar,\\n\\nHere's one alternative method:
IMPORT Std;\\nIsNumeric(STRING n) := FUNCTION\\n NoPeriod := Std.Str.FindReplace(n,'.','');\\n RETURN NoPeriod = (STRING)(INTEGER)NoPeriod;\\nEND;\\n\\nIsNumeric('42'); //true\\nIsNumeric('41.005'); //true\\nIsNumeric('-41.005'); //true\\nIsNumeric('41.005 FRED'); //false
\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-10-25 17:25:17\" },\n\t{ \"post_id\": 12403, \"topic_id\": 3093, \"forum_id\": 8, \"post_subject\": \"Equivalent of IsNumeric in 2016?\", \"username\": \"oscar.foley\", \"post_text\": \"I want to check if some value (i.e: 42 or 41.0005) is a number in ECL.\\nI have in mind a function using Regular Expressions like this:\\n\\n EXPORT IsNumeric(STRING UniqueID) := FUNCTION\\n regex:='^\\\\\\\\d+$'; \\n RETURN REGEXFIND(regex, UniqueID); \\n END;
\\n\\nIs there a built-in isNumeric function?\\nI can see in this old question from 2011 that it is not. Many releases have happened since then...\\n\\nIf not... Is there any better way of doing it than using regex? (Thinking both in readability and performance) \\n\\n- Maybe an ECL command?\\n- STD function?\\n- Casting?\\n- #GetDataType? \\n- Str.Filter?
\\n\\nI have also posted this question (http://stackoverflow.com/questions/40242659/what-is-the-ecl-equivalent-of-nan-or-isnumeric) in StackOverflow.\", \"post_time\": \"2016-10-25 14:45:15\" },\n\t{ \"post_id\": 12523, \"topic_id\": 3123, \"forum_id\": 8, \"post_subject\": \"Re: How to update Logical Files in real-time\", \"username\": \"rtaylor\", \"post_text\": \"James,\\n\\nHPCC is not an RDBMS -- you never "update" data files. In fact, you are precluded from reading from and writing to the same file in the same workunit. \\n\\nThe HPCC tools you need to look at for continually adding new data are Superfiles. The creation, use, and maintenance of Superfiles are covered in our Advanced Thor class and discussed in their own section of articles the Programmer's Guide. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-10-26 13:34:42\" },\n\t{ \"post_id\": 12503, \"topic_id\": 3123, \"forum_id\": 8, \"post_subject\": \"How to update Logical Files in real-time\", \"username\": \"rqg0717\", \"post_text\": \"Dear All,\\n\\nI was wondering how to pull data from external web sites and update Logical Files in real-time. It would be great, if you could provide some demos or example codes. Please let me know. Thank you so much.\\n\\nSincerely,\\nJames\", \"post_time\": \"2016-10-25 21:51:07\" },\n\t{ \"post_id\": 12583, \"topic_id\": 3133, \"forum_id\": 8, \"post_subject\": \"Re: macros runnings simultaneously\", \"username\": \"andres5\", \"post_text\": \"Thank you Richard!\", \"post_time\": \"2016-10-27 16:51:26\" },\n\t{ \"post_id\": 12573, \"topic_id\": 3133, \"forum_id\": 8, \"post_subject\": \"Re: macros runnings simultaneously\", \"username\": \"rtaylor\", \"post_text\": \"Andres,\\n\\nHere's an alternative way:IMPORT Std;\\nNextNumFunc(STRING NameStub) := FUNCTION\\n StubLen := LENGTH(NameStub) + 1;\\n FileList := NOTHOR(STD.File.LogicalFileList(NameStub + '*'));\\n FileCnt := COUNT(FileList);\\n LastNum := (UNSIGNED)FileList[FileCnt].name[StubLen..];\\n NextNum := LastNum + 1;\\n NewFileName := '~' + NameStub + NextNum;\\n Layout := {UNSIGNED Nbr};\\n NewDS := DATASET([{NextNum}],Layout); \\n WriteFile := OUTPUT(NewDS,,NewFileName,EXPIRE(7));\\n RETURN WHEN(NewDS,WriteFile);\\nEND;\\t\\n\\nMyNewNbr := NextNumFunc('RTTEST::TestNbr')[1].Nbr;\\n\\nSEQUENTIAL(OUTPUT(MyNewNbr),\\n OUTPUT('Do Everthing else here'));
\\nHere are my assumptions:
\\nGiven those assumptions, this version should work for you. Note that I set the EXPIRE option on the numbering files to 7 days. If that still leaves you with too many old number files cluttering up your system then you should change it to whatever works best for you.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-10-27 15:13:21\" },\n\t{ \"post_id\": 12553, \"topic_id\": 3133, \"forum_id\": 8, \"post_subject\": \"macros runnings simultaneously\", \"username\": \"andres5\", \"post_text\": \"Hello All, I have a macro that assigns a sequential number based on the max number of a field in a file. The service works with no issues. However, if called by two users at the same time, it pulls the same max(number) creating duplicates on the sequence. \\n\\nis there any way to prevent a service, function, or macro from running multiple times simultaneously?\\n\\nI was able to detect if two instances were running by changing the workunit name when the macro is called and then check if there is more than one workunit with the same name running. I am trying to find a better solution. Thanks in advance for any comments!\\n\\n\\n#WORKUNIT('name','service_running')\\nisrunning := if(count(nothor(WorkunitServices.WorkunitList('',jobname:='service_running'))(state not in ['completed','failed']))>1,TRUE,FALSE); \\n
\", \"post_time\": \"2016-10-27 13:06:57\" },\n\t{ \"post_id\": 12711, \"topic_id\": 3201, \"forum_id\": 8, \"post_subject\": \"Re: Spraying multiple CSV files\", \"username\": \"John Meier\", \"post_text\": \"Hi Richard;\\n\\nYes: each file is a mix of fixed records.\\nFile01.txt TYPE01..rec01..(len 585 bytes)\\n TYPE02..rec02..(len 202 bytes)\\n TYPE01..rec03..(len 585 bytes)\\n TYPE02..rec04..(len 202 bytes)\\n TYPE02..rec05..(len 202 bytes)\\nThey are text files (no binary data). And there are multiple files with the same mixed content (File01.txt, File02.txt, etc.).\\n\\nI did get the wildcard (File*.txt) to work by spraying as a 'CSV'. Even though the individual records are "fixed" in length, 'CSV' allows me to spray as a "ragged-right". I just defined the TERMINATOR as a newline/carriage return. This helped to eliminate the empty record at the end of each file.\\n\\nI thought if I could get the ECL code to access the LandingZone files directly, I would have appended a block of spaces to bring the TYPE02 records up to the length of the TYPE01 records (making the spray a fixed FLAT one), but that proved unnecessary.\\n\\nSo - problem solved (albiet with much head scratching ) Thanks!\", \"post_time\": \"2016-10-31 20:53:13\" },\n\t{ \"post_id\": 12701, \"topic_id\": 3201, \"forum_id\": 8, \"post_subject\": \"Re: Spraying multiple CSV files\", \"username\": \"rtaylor\", \"post_text\": \"John,
I have 30 files. Each file contains two record types: \\nTYPE01 is a fixed layout of 585 bytes in length\\nTYPE02 is a fixed layout of 202 bytes in length\\nTYPE01 can have 1 to n number of TYPE02 records associated with it.
So, just to be sure I understand -- each file contains BOTH record types? And is it just one Type 01 followed by all its Type 02 recs in each file? Or does the file contain multiple Type 01 recs with their associated 02 recs?\\nBecause of the variability of the record lengths, I spray the files as a CSV where the TERMINATOR is '\\\\n\\\\r'. Where the file name is to be specified, I use a wildcard (FileRpt*.txt) which should ingest to the spray operation all the files in the directory path (FileRpt01.txt, FileRpt02.txt, etc).ECL Watch reports a successful spray and I have a target file. But when I count the number of records in the file, it only reports a count for the first file...it's like the spray says "Hey, I found 20 files with that wildcard in the name, but I'm only going to successfully spray the first file and ignore the rest."
Exactly how are you doing the spray -- DFUplus.exe? Std.File.SprayVariable()? ECL Watch page? And how many logical files does the spray result in?\\n\\nAlso, since each record type is a fixed length record, are all the fields just text, or are there binary fields mixed in?\\n\\nRichard\", \"post_time\": \"2016-10-31 20:17:41\" },\n\t{ \"post_id\": 12661, \"topic_id\": 3201, \"forum_id\": 8, \"post_subject\": \"Spraying multiple CSV files\", \"username\": \"John Meier\", \"post_text\": \"I'm having a bit of an issue:\\nI have 30 files. Each file contains two record types: \\n TYPE01 is a fixed layout of 585 bytes in length\\n TYPE02 is a fixed layout of 202 bytes in length\\nTYPE01 can have 1 to n number of TYPE02 records associated with it. Because of the variability of the record lengths, I spray the files as a CSV where the TERMINATOR is '\\\\n\\\\r'. Where the file name is to be specified, I use a wildcard (FileRpt*.txt) which should ingest to the spray operation all the files in the directory path (FileRpt01.txt, FileRpt02.txt, etc).\\n\\nECL Watch reports a successful spray and I have a target file. But when I count the number of records in the file, it only reports a count for the first file...it's like the spray says "Hey, I found 20 files with that wildcard in the name, but I'm only going to successfully spray the first file and ignore the rest." Is there some kind of parameter I should be using to force the spray of multiple files? In some languages, when working with variable-length files, it works only when the record lengths drop from largest to smallest. In this case, I MUST keep the records in their ordinal positions (there is a relationship between the TYPE01 and TYPE02 records which I will use to build linking keys - but that's after I spray the files).\\n\\nCan anyone point me where to find an answer? All of the documentation on spraying variable-length files show single file sprays - nothing dealing with multiple file sprays.\\n\\nThanks.\", \"post_time\": \"2016-10-31 17:58:43\" },\n\t{ \"post_id\": 12853, \"topic_id\": 3223, \"forum_id\": 8, \"post_subject\": \"Re: Cross join (Cartesian product) in ECL?\", \"username\": \"JimD\", \"post_text\": \"Here is a link to the article that Richard mentioned:\\n\\nhttp://cdn.hpccsystems.com/releases/CE- ... df#page=84\\n\\nHTH,\\nJim\", \"post_time\": \"2016-11-07 18:25:21\" },\n\t{ \"post_id\": 12843, \"topic_id\": 3223, \"forum_id\": 8, \"post_subject\": \"Re: Cross join (Cartesian product) in ECL?\", \"username\": \"rtaylor\", \"post_text\": \"James,\\n\\nHere's an alternative:VehicleTypes := TABLE(TheData, {vehicle_type}, vehicle_type);\\nCustomers := TABLE(TheData, {customer}, customer);\\n\\nProduct := JOIN(Customers, VehicleTypes,TRUE,ALL);\\n\\nresult := JOIN(Product,TheData,\\n LEFT.Customer=RIGHT.Customer AND LEFT.vehicle_type=RIGHT.vehicle_type,\\n TRANSFORM(Layout,SELF := LEFT,SELF := RIGHT),SMART,\\n LEFT OUTER);\\n\\nSORT(result,customer,vehicle_type);
\\n\\nThere are also a couple of examples in the Programmer's Guide article titled: Cartesian Product of Two Datasets\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-11-07 17:45:40\" },\n\t{ \"post_id\": 12833, \"topic_id\": 3223, \"forum_id\": 8, \"post_subject\": \"Cross join (Cartesian product) in ECL?\", \"username\": \"james.wilson\", \"post_text\": \"Say for example I have a table of customers, vehicle types, and counts. I want a dataset containing all customers and vehicle types, with zero for the count where I don't have an entry in my table. I don't know what the full list of vehicle types are, so can't create a TABLE with a column for each.\\ne.g.\\n\\nLayout := RECORD\\n STRING9 customer;\\n STRING2 vehicle_type;\\n INTEGER num;\\nEND;\\n\\nTheData := DATASET([ {'000000001', 'ca', 10}\\n , {'000000001', 'vn', 10}\\n , {'000000002', 'ca', 10}\\n , {'000000003', 'mb', 10}\\n ], Layout);\\n
\\n\\nWell, I should ask if there's a nice/in-built way of doing it. I have found one way, but it seems very inelegant:\\n\\nVehicleTypes := DEDUP(SORT(TABLE(TheData, {vehicle_type}), vehicle_type), vehicle_type);\\n\\nDEDUP(\\n SORT(\\n JOIN(TheData\\n , VehicleTypes\\n , TRUE\\n , TRANSFORM(Layout, SELF.vehicle_type := RIGHT.vehicle_type\\n ; SELF.num := IF(LEFT.vehicle_type = RIGHT.vehicle_type, LEFT.num, 0)\\n ; SELF := LEFT\\n )\\n , ALL\\n )\\n , customer, vehicle_type, -num)\\n , customer, vehicle_type);\\n
\\n\\nAny improvements on that?\", \"post_time\": \"2016-11-07 15:58:24\" },\n\t{ \"post_id\": 13073, \"topic_id\": 3283, \"forum_id\": 8, \"post_subject\": \"Re: ECL deploy client tool returning wrong errorlevel. Bug?\", \"username\": \"JimD\", \"post_text\": \"also does it fail from command line when the task is NOT on run from Jenkins server?\\n\\nJim\", \"post_time\": \"2016-11-09 15:04:25\" },\n\t{ \"post_id\": 13063, \"topic_id\": 3283, \"forum_id\": 8, \"post_subject\": \"Re: ECL deploy client tool returning wrong errorlevel. Bug?\", \"username\": \"rtaylor\", \"post_text\": \"Oscar,\\n\\nDoes it also fail if you just run it from the IDE, or is it only hapening with the command line ecl.exe?\\n\\nRichard\", \"post_time\": \"2016-11-09 15:03:12\" },\n\t{ \"post_id\": 13033, \"topic_id\": 3283, \"forum_id\": 8, \"post_subject\": \"ECL deploy client tool returning wrong errorlevel. Bug?\", \"username\": \"oscar.foley\", \"post_text\": \"My HPCC version is: 5.6.4-1\\nClient tools version is: 5.6.4\\n\\nI have a bash script that runs on Jenkins server. It executed this:\\n\\necl run thor MyEcl.ecl --server=$pServerIP --port=$pServerPORT -I $pBaseCodeDirectory
\\n\\nThe workunit generated failed with error: W20161109-081817 System error: -1: Failed to receive reply from thor 10.53.57.39:20000; (-1, Failed to receive reply from thor 10.53.57.39:20000)
\\n\\nError is originated by some problem in the code that we already fixed (An INDEPENDENT while reading a file that shouldn't fail... but that is another problem).\\n\\nWith that error I expect ecl run thor to return an error level >0 and my build to fail. But I got errorlevel 0 and my build thinks that everything was good so it returns a green light:\\n\\nExecuting MyEcl.ecl\\nW20161109-081817 failed\\nExecuting MyEcl.ecl FINISHED.\\nError status = 0 (0 = success, >0 error)\\nSTEP 03 - done.\\n[Pipeline] }\\n[Pipeline] // dir\\n[Pipeline] }\\n[Pipeline] // node\\n[Pipeline] End of Pipeline\\nFinished: SUCCESS
\\n\\nSo: \\n- Am I doing something wrong?\\n- Or is this a bug in ClientTools?\", \"post_time\": \"2016-11-09 13:39:31\" },\n\t{ \"post_id\": 13273, \"topic_id\": 3333, \"forum_id\": 8, \"post_subject\": \"Re: BOM bytes (FFFE), sprayed as UTF16LE Output Error\", \"username\": \"newportm\", \"post_text\": \"After further review STD.Str.SplitWords seems to drop empty sets. By replacing the delimiter with '[space](delim)[space]' I get the data I needed. \\n\\nThanks\", \"post_time\": \"2016-11-16 13:42:44\" },\n\t{ \"post_id\": 13253, \"topic_id\": 3333, \"forum_id\": 8, \"post_subject\": \"Re: BOM bytes (FFFE), sprayed as UTF16LE Output Error\", \"username\": \"newportm\", \"post_text\": \"Ok Bob,\\n\\nI created the ticket IDE-588\\n\\nDo you know what is going on with STD.Str.SplitWords. It seems to be skipping consecutive delimiters and not creating an item in the set for them.\\n\\nTim\", \"post_time\": \"2016-11-16 13:32:52\" },\n\t{ \"post_id\": 13213, \"topic_id\": 3333, \"forum_id\": 8, \"post_subject\": \"Re: BOM bytes (FFFE), sprayed as UTF16LE Output Error\", \"username\": \"rtaylor\", \"post_text\": \"Tim,\\n\\nSo it looks OK in ECL Watch through the Logical File Details page's Content tab, and it looks OK in the ECL Watch workunit results page when you run a query, and only looks NOT OK in the results tab of the IDE? If that is correct, then the IDE apparently has a problem that you should report through JIRA.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-11-15 18:50:52\" },\n\t{ \"post_id\": 13193, \"topic_id\": 3333, \"forum_id\": 8, \"post_subject\": \"BOM bytes (FFFE), sprayed as UTF16LE Output Error\", \"username\": \"newportm\", \"post_text\": \"Hey,\\n\\nI have some Tab delimited CSV files that are encoded in UTF16LE. They have a BOM byte of (FFFE). When using the FileServices.SprayVariable I set the encoding to utf16le. If I look at the file in ECL Watch the data seems to look as expected. However, If I define a dataset in a builder window, setting the encoding to UNICODE or UNICODE16 the results do not look good in ECL Workunit output. They do look good in the ECL Watch display for the workunit. \\n\\nI tried defining the columns as string as well as unicode and neither made a difference. \\n\\nI also tried to spray as Ascii and remove unprintables while each row was stored as a single string and then use STD.Str.SplitWords to create a SET and then transfer the set to the desired layout and when I output more than a few rows I get an error: System error: 1301: Memory pool exhausted.\\ndInputDs := dataset(pFilename, {STRING\\tEntireRecord},csv(terminator(pCSVTerminator), separator('!~!'),heading(0),UNICODE, quote([])));\\n\\nCan you please direct me as to what I am missing. \\n\\nThanks, Tim\", \"post_time\": \"2016-11-15 15:04:15\" },\n\t{ \"post_id\": 13263, \"topic_id\": 3343, \"forum_id\": 8, \"post_subject\": \"Re: Sequencing Some Records\", \"username\": \"John Meier\", \"post_text\": \"Hey Richard,\\n\\nAwesome! I though my INTEGER default was too small and the COUNTER was rolling over. I just removed the LOCAL and the issue disappeared. Thank you so much! \\n\\nJohn\", \"post_time\": \"2016-11-16 13:36:52\" },\n\t{ \"post_id\": 13243, \"topic_id\": 3343, \"forum_id\": 8, \"post_subject\": \"Re: Sequencing Some Records\", \"username\": \"rtaylor\", \"post_text\": \"John,\\n\\nYour PROJECT has the LOCAL option, which means each node starts from 1. Since you're getting 15 recs with the same number, I expect that you're running on a 15-node Thor, right?\\n\\nRemove the LOCAL option from your PROJECT to get global numbering.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-11-15 20:40:05\" },\n\t{ \"post_id\": 13233, \"topic_id\": 3343, \"forum_id\": 8, \"post_subject\": \"Sequencing Some Records\", \"username\": \"John Meier\", \"post_text\": \"Hello All;\\n\\nI have an issue with sequencing some records. I have sprayed several physical files (all the same layouts) into a single THOR file. The data consists to two different record types: 'PC1' and 'PC2' - they are related and are positioned in the incoming file such that a single 'PC1' leads its related 'PC2' record(s).\\n\\nSo I attempt to build a key - a sequence number for every 'PC1' record. After that is complete, I then ITERATE over the file again. For every 'PC2' record (current), I pull the previous key (reference record). \\n\\nHere is the code:\\nLayout_Combo := RECORD\\nSTRING616 COMBINED_PC1_PC2_REC;\\nEND;\\n\\n//----------------------------------------\\nLayout_Combo XFRM_PC1_PC2_SINGLEFILE(Layout_Combo LL, INTEGER Cnt) := TRANSFORM\\nSELF.COMBINED_PC1_PC2_REC := IF( LL.COMBINED_PC1_PC2_REC[1..3] = 'PC1'\\n , LL.COMBINED_PC1_PC2_REC[1..585] + 'XX' + INTFORMAT(Cnt,9,1);\\nself := [];\\nEND;\\nDS_PROJ_PC1_PC2 := PROJECT(File_RAW_FULL, XFRM_PC1_PC2_SINGLEFILE(LEFT, COUNTER), LOCAL);\\n\\nSeems easy enough, but when I filter the dataset DS_PROJ_PC1_PC2 , filtering for the first expected key 'XX000000001', I don't get the expected 1 record - I GET 15!\\n\\nCOUNTER is supposed to be sequential and I should only have 1 first record. Is COUNTER somehow resetting itself? why am I not getting a smooth increment?\\n\\nAny help would be appreciate.\\n\\nJohn\", \"post_time\": \"2016-11-15 20:20:41\" },\n\t{ \"post_id\": 13473, \"topic_id\": 3373, \"forum_id\": 8, \"post_subject\": \"Re: Conditional Compilation on datatype in MACRO\", \"username\": \"Allan\", \"post_text\": \"Thanks for the steer Richard, I'll give it a whirl.\", \"post_time\": \"2016-11-30 10:16:48\" },\n\t{ \"post_id\": 13433, \"topic_id\": 3373, \"forum_id\": 8, \"post_subject\": \"Re: Conditional Compilation on datatype in MACRO\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nInstead of #GETDATATYPE I think you want to look at #EXPORT and #EXPORTXML then have your FUNCTIONMACRO generate the correct code for the data type of the passed "fld" you want to work with.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-11-29 14:29:04\" },\n\t{ \"post_id\": 13343, \"topic_id\": 3373, \"forum_id\": 8, \"post_subject\": \"Conditional Compilation on datatype in MACRO\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI have to use a FUNCTIONMACRO in two contexts, one numeric, the other textual.\\ne.g.\\n
\\nSELF.Total := FMM(d,1,matched)+FMM(d,1,notmathced);\\nSELF.Name := FMM(d,1,Surname);\\n
\\n\\nThe FUNCTIONMACRO itself is just an array bound check on the dataset 'd' returning a default if the index is out of range. But the default is a different type depending upon the type of the 'field' being returned.\\n\\nFMM(ds,idx,fld) := FUNCTIONMACRO\\n RETURN IF(COUNT(ds) >= idx,ds[idx].fld,'');\\nENDMACRO;\\n
\\n\\nI read up on #GETDATATYPE but can't see how to use it in this context.\\nCurrently I just have two macros one for numeric fields the other for STRINGs, but it does not look nice and one has to know which macro to use.\\n\\nAny ideas?\\n\\nYours\\nAllan\", \"post_time\": \"2016-11-18 13:53:52\" },\n\t{ \"post_id\": 13393, \"topic_id\": 3393, \"forum_id\": 8, \"post_subject\": \"Image Processing Libraries\", \"username\": \"vchinta\", \"post_text\": \"Hi,\\n\\nI'm trying to run a few resource intensive image processing jobs on the cluster. Is there any way for me to use external libraries like LibJpeg or something similar. If not, any suggestions on how I can go about these tasks?\\n\\nThanks in advance\", \"post_time\": \"2016-11-27 18:37:01\" },\n\t{ \"post_id\": 13623, \"topic_id\": 3433, \"forum_id\": 8, \"post_subject\": \"Re: Simple MACRO expansion\", \"username\": \"Allan\", \"post_text\": \"Thanks Richard,\\n\\nJIRA raised. I'post something when I get an answer.\", \"post_time\": \"2016-12-05 13:38:19\" },\n\t{ \"post_id\": 13613, \"topic_id\": 3433, \"forum_id\": 8, \"post_subject\": \"Re: Simple MACRO expansion\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nI suggest a JIRA ticket is in order. \\n\\nRichard\", \"post_time\": \"2016-12-05 11:45:04\" },\n\t{ \"post_id\": 13603, \"topic_id\": 3433, \"forum_id\": 8, \"post_subject\": \"Simple MACRO expansion\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI have the most simple MACRO that I cannot get to work.\\n\\nThis compiles:\\n
\\nSTRING str := 'aaa';\\n\\nirow() := MACRO\\n {'str',str}\\nENDMACRO;\\n\\nd := DATASET([{'str',str}]\\n ,{STRING one,STRING two});\\n
\\nThis does not:\\n\\nSTRING str := 'aaa';\\n\\nirow() := MACRO\\n {'str',str}\\nENDMACRO;\\n\\nd := DATASET([irow()]\\n ,{STRING one,STRING two});\\n
\\nThe ECL ref manual says 'A macro behaves as if you had typed the tokenstream into the exact position you use it, using lexical substitution'\\nI'm not even using any lexical substitution so why does this macro not work, and how can I get it to work?\\n\\nYours\\nAllan\", \"post_time\": \"2016-12-05 10:09:53\" },\n\t{ \"post_id\": 13691, \"topic_id\": 3453, \"forum_id\": 8, \"post_subject\": \"Re: FunctionMacro Error with Syntax free code\", \"username\": \"rtaylor\", \"post_text\": \"Mike,\\n\\nWhy not just do it this way:somefunction(STRING str1, STRING str2, STRING str3) := str1+str2+str3;\\n\\nUpdater(SET OF STRING queryList,STRING sfp,STRING ekp) := FUNCTION\\n rec := {STRING Str}; \\n ds := DATASET(queryList,rec);\\n P := PROJECT(ds,TRANSFORM(rec, SELF.Str := somefunction(LEFT.Str,sfp,ekp)));\\n RETURN SET(P,Str);\\nEND;\\t\\n\\nlabelset := ['test1','test2'];\\n\\nUpdater(labelset,'string123','string456');
\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-12-07 09:29:35\" },\n\t{ \"post_id\": 13663, \"topic_id\": 3453, \"forum_id\": 8, \"post_subject\": \"FunctionMacro Error with Syntax free code\", \"username\": \"iMikePayne\", \"post_text\": \"Hi,\\n\\nI have an issue using FunctionMacro. The following code does not work\\n\\n\\nsomefunction(STRING str1, STRING str2, STRING str3) := FUNCTION\\n\\n\\tRETURN str1+str2+str3;\\n\\nEND;\\n\\nUpdater(queryList,sfp,ekp) := FUNCTIONMACRO\\n\\t\\n\\t#DECLARE(entryStr)\\n\\t#DECLARE(edx)\\n\\te:=COUNT(queryList);\\n\\t#SET(edx,1)\\n\\t#SET(entryStr,'SET of STRING results :=\\\\n[\\\\n')\\n\\t\\n\\t#LOOP\\n\\t\\t#IF(%edx%>e)\\n\\t\\t\\t#BREAK\\n\\t\\t#ELSEIF(%edx%=e)\\n\\t\\t\\t#APPEND(entryStr,'\\\\tsomefunction(queryList['+%edx%+'],sfp,ekp)\\\\n];')\\n\\t\\t\\t#SET(edx,%edx%+1)\\n\\t\\t#ELSE\\n\\t\\t\\t#APPEND(entryStr,'\\\\tsomefunction(queryList['+%edx%+'],sfp,ekp),\\\\n')\\n\\t\\t\\t#SET(edx,%edx%+1)\\n\\t\\t#END\\n\\t#END\\n\\t\\n\\t%entryStr%\\n\\t\\n\\tRETURN results;\\n\\nENDMACRO;\\n\\nlabelset := ['test1','test2'];\\n\\nUpdater(labelset,'string123','string456');\\n
\\n\\nI get this error\\n\\nError: syntax error near "[" : expected :=
\\n\\nNow if I output the generated code\\n\\n\\nsomefunction(STRING str1, STRING str2, STRING str3) := FUNCTION\\n\\n\\tRETURN str1+str2+str3;\\n\\nEND;\\n\\nUpdater(queryList,sfp,ekp) := FUNCTIONMACRO\\n\\t\\n\\t#DECLARE(entryStr)\\n\\t#DECLARE(edx)\\n\\te:=COUNT(queryList);\\n\\t#SET(edx,1)\\n\\t#SET(entryStr,'SET of STRING results :=\\\\n[\\\\n')\\n\\t\\n\\t#LOOP\\n\\t\\t#IF(%edx%>e)\\n\\t\\t\\t#BREAK\\n\\t\\t#ELSEIF(%edx%=e)\\n\\t\\t\\t#APPEND(entryStr,'\\\\tsomefunction(queryList['+%edx%+'],sfp,ekp)\\\\n];')\\n\\t\\t\\t#SET(edx,%edx%+1)\\n\\t\\t#ELSE\\n\\t\\t\\t#APPEND(entryStr,'\\\\tsomefunction(queryList['+%edx%+'],sfp,ekp),\\\\n')\\n\\t\\t\\t#SET(edx,%edx%+1)\\n\\t\\t#END\\n\\t#END\\n\\t\\n\\t// %entryStr%\\n\\t\\n\\tRETURN %'entryStr'%;//results;\\n\\nENDMACRO;\\n\\nlabelset := ['test1','test2'];\\n\\nUpdater(labelset,'string123','string456');\\n
\\n\\nI get \\nSET of STRING results :=\\n[\\n\\tsomefunction(queryList[1],sfp,ekp),\\n\\tsomefunction(queryList[2],sfp,ekp)\\n];\\n
\\n\\nNow if I paste it where the code is generated:\\n\\n\\nsomefunction(STRING str1, STRING str2, STRING str3) := FUNCTION\\n\\n\\tRETURN str1+str2+str3;\\n\\nEND;\\n\\nUpdater(queryList,sfp,ekp) := FUNCTIONMACRO\\n\\t\\n\\t#DECLARE(entryStr)\\n\\t#DECLARE(edx)\\n\\te:=COUNT(queryList);\\n\\t#SET(edx,1)\\n\\t#SET(entryStr,'SET of STRING results :=\\\\n[\\\\n')\\n\\t\\n\\t#LOOP\\n\\t\\t#IF(%edx%>e)\\n\\t\\t\\t#BREAK\\n\\t\\t#ELSEIF(%edx%=e)\\n\\t\\t\\t#APPEND(entryStr,'\\\\tsomefunction(queryList['+%edx%+'],sfp,ekp)\\\\n];')\\n\\t\\t\\t#SET(edx,%edx%+1)\\n\\t\\t#ELSE\\n\\t\\t\\t#APPEND(entryStr,'\\\\tsomefunction(queryList['+%edx%+'],sfp,ekp),\\\\n')\\n\\t\\t\\t#SET(edx,%edx%+1)\\n\\t\\t#END\\n\\t#END\\n\\t\\n\\tSET of STRING results :=\\n[\\n\\tsomefunction(queryList[1],sfp,ekp),\\n\\tsomefunction(queryList[2],sfp,ekp)\\n];\\n\\t\\n\\tRETURN results;\\n\\nENDMACRO;\\n\\nlabelset := ['test1','test2'];\\n\\nUpdater(labelset,'string123','string456');\\n
\\n\\nThis works fine. I'm confused. Any ideas on why this occurs?\\n\\nHPCC 6.0.6-1\", \"post_time\": \"2016-12-06 17:26:30\" },\n\t{ \"post_id\": 13943, \"topic_id\": 3503, \"forum_id\": 8, \"post_subject\": \"Re: Convert NumericField to c structure\", \"username\": \"rtaylor\", \"post_text\": \"Maryam,\\n\\nThe ECL to C++ mapping is documented in the BEGINC++ structure docs in this section https://hpccsystems.com/download/documentation/ecl-language-reference/html/ECL_to_Cplus_Mapping.html where it says:A dataset is passed as a size/pointer pair. The length gives the size of the following dataset in bytes. The same naming convention is used:\\n\\nDATASET(r) ABC -> size32_t lenAbc, const void * abc\\n The rows are accessed as x+0, x + length(row1), x + length(row1) + length(row2)\\n\\nLINKCOUNTED DATASET(r) ABC -> size32_t countAbc, const byte * * abc\\n The rows are accessed as x[0], x[1], x[2]
\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-01-03 16:10:34\" },\n\t{ \"post_id\": 13873, \"topic_id\": 3503, \"forum_id\": 8, \"post_subject\": \"Convert NumericField to c structure\", \"username\": \"maryamregister\", \"post_text\": \"I am passing a dataset of type ML.NumericField to a c++ function. \\nML.NumericField has the following definition:\\nNumericField := RECORD\\n UNSIGNED id; // 8 bytes\\n UNSIGNED4 number; // 4 bytes\\n REAL8 value; // 8 bytes\\nEND;\\nI am using the following typedef definition to translate the numericfield format to c in my c++ function:\\n\\ntypedef struct work1 { // copy of numericfield translated to C\\n uint64_t id;\\n uint32_t number;\\n double value;\\n };\\n\\nThis does not translate my dataset of type ML.NumericField to correct c format. Printing the results, I get dummy values which clearly says the mapping from ECL to c has not been done correctly.\\nAlso when I output sizeof(work1) in my c++ function it returns 24 which is strange. It should be 8 bytes (uint64_t) + 4 bytes (uint32_t ) + 8 bytes (double) = 20.\", \"post_time\": \"2016-12-20 17:24:10\" },\n\t{ \"post_id\": 15213, \"topic_id\": 3693, \"forum_id\": 8, \"post_subject\": \"Re: What is default TimeOut value on eclcc (client tools)?\", \"username\": \"g-pan\", \"post_text\": \"Oscar: \\nThe Client Tools manual is the appropriate documentation. \\nLike you stated the documentation says:\\nEvaluation of options follows this order of precedence:\\n• command line\\n• ini file\\n• environment variable\\n• default value\\n\\nI can verify that the default values you cited in the source file are in milliseconds, not seconds.\\nHowever I can see where you would deduce that it is in seconds as the eclplus.exe has a (query) timeout= option that would be in seconds. \\nYet without specifying to use that option on the command line, \\nI would be more likely to conclude that the source file that you are using for your system is the correct timeout value for your installation.\", \"post_time\": \"2017-02-10 14:16:09\" },\n\t{ \"post_id\": 14993, \"topic_id\": 3693, \"forum_id\": 8, \"post_subject\": \"What is default TimeOut value on eclcc (client tools)?\", \"username\": \"oscar.foley\", \"post_text\": \"I am running some queries in HPCC from command line:\\n- HPCC Cluster 5.6.4-1\\n- HPCCClientTools 5.6.4-1\\n\\nHPCCClientTools documentation says that options are evaluated in this order of precedence:\\n1- command line\\n2- ini file\\n3- environment variable\\n4- default value\\n\\nI am not passing any value for waitTimeout on command line, nor have a ini file nor have any environment variable so I use the default value.\\n\\nSo my questions are:\\n- What is that default value for waitTimeOut?\\n- Where can I find in general those values documented?
\\n\\nI think value is 2h (7200 secs) according to hints in https://hpccsystems.com/bb/viewtopic.php?f=10&t=3063 and https://track.hpccsystems.com/browse/HPCC-13971, but reading source code (https://searchcode.com/codesearch/view/67597342/) default value seems to be 100*1000 secs (27.7h)\\n\\n\\n// various options \\n#define CONNECT_TIMEOUT_REFUSED_WAIT 1000 // maximum to sleep on connect_timeout\\n#define TRACE_SLOW_BLOCK_TRANSFER \\n#define DEFAULT_CONNECT_TIME (100*1000) // for connect_wait\\n
\", \"post_time\": \"2017-02-06 12:23:35\" },\n\t{ \"post_id\": 15423, \"topic_id\": 3733, \"forum_id\": 8, \"post_subject\": \"Re: Running ECL code over 60 mins fails from ECLClient tools\", \"username\": \"bforeman\", \"post_text\": \"Hi Oscar,\\n\\nOk, thanks for the update, you have it already reported in Jira and it might be a good idea to add a comment to that report with the updated information.\\n\\nBob\", \"post_time\": \"2017-02-22 13:35:06\" },\n\t{ \"post_id\": 15403, \"topic_id\": 3733, \"forum_id\": 8, \"post_subject\": \"Re: Running ECL code over 60 mins fails from ECLClient tools\", \"username\": \"oscar.foley\", \"post_text\": \"Hello Bob\\n\\nAsier confirmed me that he can reproduce the bug in 6.2.4. You saw WU success but you didn't see the error in Asier computer: \\n"SOAP rpc error[errorCode = -6 message = timeout expired"
\\n\\n\\nAs you said WU finishes successfully but that success is not reported back to ECL Client Tools when the WU is over 60 mins...\", \"post_time\": \"2017-02-22 10:44:31\" },\n\t{ \"post_id\": 15383, \"topic_id\": 3733, \"forum_id\": 8, \"post_subject\": \"Re: Running ECL code over 60 mins fails from ECLClient tools\", \"username\": \"bforeman\", \"post_text\": \"Hi Oscar,\\n\\nCheck with Asier. We verified that the bug was FIXED in the latest 6.2.4 server version.\\n\\nSee WU W20170215-025913 on the training cluster. Asier ran the test using the ECL command line and the correct result was returned.\\n\\nRegards,\\n\\nBob Foreman\", \"post_time\": \"2017-02-21 14:23:03\" },\n\t{ \"post_id\": 15373, \"topic_id\": 3733, \"forum_id\": 8, \"post_subject\": \"Re: Running ECL code over 60 mins fails from ECLClient tools\", \"username\": \"oscar.foley\", \"post_text\": \"There were some training happening here and a colleague managed to reproduce the bug in 6.2.4 version.\", \"post_time\": \"2017-02-20 11:20:42\" },\n\t{ \"post_id\": 15183, \"topic_id\": 3733, \"forum_id\": 8, \"post_subject\": \"Re: Running ECL code over 60 mins fails from ECLClient tools\", \"username\": \"oscar.foley\", \"post_text\": \"One HPCC trainer that is on my company suggested to try ECL_WAIT_TIMEOUT\\nI tried. just in case. and it doesn't work.\\n\\nECL_WAIT_TIMEOUT affects to the 2h timeout... but timeout happens anyways.\\n\\nThe problem is the lack of communication between HPCC Cluster and my client tools, because either:\\n\\nA- HPCC doesn't send the response. (The most probable)\\nOR\\nB- ClientTools stopped listening.\", \"post_time\": \"2017-02-09 20:37:20\" },\n\t{ \"post_id\": 15143, \"topic_id\": 3733, \"forum_id\": 8, \"post_subject\": \"Re: Running ECL code over 60 mins fails from ECLClient tools\", \"username\": \"oscar.foley\", \"post_text\": \"Anyone that can try to replicate this bug with latest version of HPCC?\", \"post_time\": \"2017-02-09 08:30:12\" },\n\t{ \"post_id\": 15113, \"topic_id\": 3733, \"forum_id\": 8, \"post_subject\": \"Re: Running ECL code over 60 mins fails from ECLClient tools\", \"username\": \"rtaylor\", \"post_text\": \"Oscar,\\n\\nGood bug report! Thanks for creating the JIRA ticket for the developers. \\n\\nRichard\", \"post_time\": \"2017-02-08 16:20:12\" },\n\t{ \"post_id\": 15073, \"topic_id\": 3733, \"forum_id\": 8, \"post_subject\": \"Running ECL code over 60 mins fails from ECLClient tools\", \"username\": \"oscar.foley\", \"post_text\": \"My HPCC version is:\\n- HPCC Cluster 5.6.4-1\\n- HPCCClientTools 5.6.4-1\\n\\nI have this "advanced" ECL code:\\n
\\n IMPORT STD;\\n\\n WaitingTime:=65*60*1000;// 65 mins\\n OUTPUT ('ETL Start...');\\n STD.system.Debug.Sleep(WaitingTime);\\n OUTPUT ('ETL End...');\\n\\n
\\n\\nIf I run it from ECLIDE it works ok. If I run it from HPCCClientTools I find problems. Exact Command is:\\n\\nPS C:\\\\CODE\\\\odin\\\\HPCC> ecl run thor ".\\\\BWR\\\\OscarFoley\\\\BWR_FakeRunETL1h.ecl" --username="Oscar.Foley" --password=" " -legacy --server="HPCC.Server" --port="8010" -I "."\\n\\n
\\n\\nBehavior\\n1- Code starts to run correctly.\\n2- After 65 mins you can see in ECLWatch it finishes.\\n3- HPPCClientTools (ecl.exe) fails to detect that it finished.\\n4- After two hours, HPPCClientTools (ecl.exe) launches following error:\\n\\nSOAP rpc error[errorCode = -6 message = timeout expired\\nTarget: C!10.53.56.31, Raised in: D:\\\\jenkins2\\\\workspace\\\\CE-Candidate-Clienttools-Win32-5.6.4-1\\\\CE\\\\Windows_2k8_Servers\\\\HP\\nCC-Platform\\\\system\\\\jlib\\\\jsocket.cpp, line 1600
\\n\\nBug description: \\nSeems that in long ECL queries HPCC fails to report back successful execution.
\\n\\nImportant notes:\\n- Same code with less time (58 mins) works ok. Seems a time related problem in HPCC.\\n- Different long code also fails. This code is a simplification of my RunETL.ecl code. This proves is not related with STD.system.Debug.Sleep code. So it is not a code problem.\\n- It happens with two different environments so it is not an installation problem.\\n- Both environments have the same version and I don't have access to a newer HPCC server so it might be a bug in HPCC 5.6.4-1. Ideally someone with the latest version could try this...\\n- It happens both from my Windows7 laptop and from Jenkins Linux server, so it is not OS related.\\n- I created JIRA bug https://track.hpccsystems.com/browse/HPCC-17037\", \"post_time\": \"2017-02-08 13:54:01\" },\n\t{ \"post_id\": 15123, \"topic_id\": 3743, \"forum_id\": 8, \"post_subject\": \"Re: Creating a definition in a macro using a parameter\", \"username\": \"rtaylor\", \"post_text\": \"James,\\n\\nHere's how I would approach it:AddField(OldDS,FieldToAdd,AddFieldType,FieldToMod,AddTxt) := FUNCTIONMACRO\\n NewRec := {OldDS,AddFieldType FieldToAdd};\\n NewDS := PROJECT(OldDS,\\n TRANSFORM(NewRec,\\n SELF.FieldToAdd := LEFT.FieldToMod + AddTxt,\\n SELF := LEFT));\\n RETURN NewDS; \\nENDMACRO;
FUNCTIONMACRO seems to me to be better suited to the task than a MACRO because FUNCTIONMACRO removes the need to pass in a definition name to the MACRO (but either can do what you want). It also provides name definition scoping so you don't have name collision problems to contend with.\\n\\nds1 := DATASET([{1,'SMITH'},{2,'JONES'}],{UNSIGNED1 UID,STRING10 OldField});\\nds2 := DATASET([{'SMITH',1},{'JONES',2}],{STRING10 OldField,UNSIGNED1 UID});\\nAddField(ds1,NewField,STRING15,OldField,',FRED');\\nAddField(ds2,NewField,STRING15,OldField,',SAM');
MACRO and FUNCTIONMACRO are both ECL code generators, so their parameters are token names for lexical substitution in the generated code. Wherever the token appears in the code, it is replaced by the parameter passed in. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-02-08 17:30:55\" },\n\t{ \"post_id\": 15093, \"topic_id\": 3743, \"forum_id\": 8, \"post_subject\": \"Re: Creating a definition in a macro using a parameter\", \"username\": \"james.wilson\", \"post_text\": \"I should add that of course I can make my macro take the definition name as a parameter, and that's what I'm going to do now as I can't figure this out, but it seems like there should be a more elegant solution.\", \"post_time\": \"2017-02-08 15:58:13\" },\n\t{ \"post_id\": 15083, \"topic_id\": 3743, \"forum_id\": 8, \"post_subject\": \"Creating a definition in a macro using a parameter\", \"username\": \"james.wilson\", \"post_text\": \"I wanted to write some code to add a field to datasets in several different layouts. This seems perfect for a macro, however I wanted to use a parameter as part of the name of the definitions I'm creating and I can't work out how to do it.\\n\\nI want something like:\\n\\nMakeRecordTypeConverter(RecordType) := MACRO\\n NewModule.RecordType ConvertToNewXXXLayout(OldModule.RecordType L) := TRANSFORM\\n SELF := L;\\n SELF.super_new_field := L.boring_old_field + '_v2';\\n END;\\nENDMACRO;\\n
\\n\\nSo that will create a definition called ConvertToNewXXXLayout, I want to make it so the definition is called ConvertToNew<RecordType>Layout, so I can call it for my different record types. Can anyone help?\", \"post_time\": \"2017-02-08 15:57:06\" },\n\t{ \"post_id\": 15133, \"topic_id\": 3753, \"forum_id\": 8, \"post_subject\": \"Automated Unit Testing for ECL?\", \"username\": \"LaureFischer\", \"post_text\": \"Any discussions about creating a new tool for automated unit testing (like JUNIT or NUNIT) for ECL? I noticed the intern project list included using Jenkins to set up continuous integration and continuous development. \\n\\n It seems to me that "automated unit tests" are part of that process.\", \"post_time\": \"2017-02-08 19:49:34\" },\n\t{ \"post_id\": 15173, \"topic_id\": 3763, \"forum_id\": 8, \"post_subject\": \"Re: How to normalize data?\", \"username\": \"rtaylor\", \"post_text\": \"ome,I am already stuck at the very beginning, after going through the Data Tutorial.
You are correct that NORMALIZE would be a good way to accomplish what you want to do. But if all you have done is the Data Tutorial, then you have not yet begun to utilize all the training resources we provide. \\n\\nWe have a number of ECL Training classes (described here: https://hpccsystems.com/enterprise-services/professional-training) for you to avail yourself of. They are taught on-site and as remote classes. The same material is covered in our online eLearning courses, which are available here: https://learn.lexisnexis.com/hpcc\\n\\nBoth of our Introduction to ECL online courses are free to everybody in the world. These courses are the pre-requisites to all the other courses. As a graduate student you may qualify for discount codes to receive the other courses free of charge (send an email to training@hpccsystems.com to ask about that).\\n\\nNORMALIZE is covered in our Advanced ECL (part 1) course that deals with nested child datasets.\\n\\nBut here's how I would do this task. First, you have to get your data into a nested child dataset form for NORMALIZE to work with:IMPORT Std;\\nds := DATASET([{'2 12492,17184,21427,41824,67163,77807,105796,107572,113421,115485'},\\n {'3 16679,24460,27238,66550,90056,102065'},\\n {'4 20921,24460,33795,61061,171135,189017,189122'}],{STRING indata});\\nValRec := RECORD\\n UNSIGNED4 val;\\nEND;\\t\\nDNrec := RECORD\\n UNSIGNED4 RecID;\\n DATASET(ValRec) Values;\\nEND;\\n\\nDNrec XF(ds L) := TRANSFORM\\n SpacePos := Std.Str.Find(L.indata,' ',1);\\n SetStrVals := Std.Str.SplitWords(L.indata[SpacePos..],',');\\n ValuesDS := DATASET(SetStrVals,{STRING StrVal});\\n SELF.RecID := (UNSIGNED4)L.indata[1..SpacePos];\\n SELF.Values := PROJECT(ValuesDS,\\n TRANSFORM(ValRec,\\n SELF.val := (UNSIGNED4)LEFT.StrVal));\\nEND;\\nNestedDS := PROJECT(ds,XF(LEFT));\\t\\nNestedDS;
The use of inline DATASET (line 2) is taught in both Introduction to ECL classes. The various forms of RECORD structures (lines 5 & 8) are taught in all our classes. \\n\\nThe TRANSFORM function (line 13) works with the PROJECT (line 22) to produce the nested child dataset. These are taught in our Introduction to ECL (part 2) class, along with the availability of a large number of Standard Library functions available (I'm using the Std.Str.Find() and Std.Str.SplitWords() functions to create the child dataset).\\n\\nOnce you've got a nested child dataset to work with, THEN you can use NORMALIZE to parse the values out to separate records, like this:OutRec := RECORD\\n UNSIGNED4 RecID;\\n UNSIGNED4 val;\\nEND;\\n\\nNORMALIZE(NestedDS,COUNT(LEFT.Values),\\n TRANSFORM(OutRec,\\n SELF.val := LEFT.Values[COUNTER].val,\\n SELF := LEFT));
\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-02-09 19:23:44\" },\n\t{ \"post_id\": 15153, \"topic_id\": 3763, \"forum_id\": 8, \"post_subject\": \"How to normalize data?\", \"username\": \"ome\", \"post_text\": \"Hello everyone,\\n\\nI am a graduate student and have to perform several tasks with HPCC. I am already stuck at the very beginning, after going through the Data Tutorial. \\n\\nWell, I have data in the following format:\\n\\n2 12492,17184,21427,41824,67163,77807,105796,107572,113421,115485\\n3 16679,24460,27238,66550,90056,102065\\n4 20921,24460,33795,61061,171135,189017,189122
\\n\\nAnd need the data in this format: \\n\\n2 12492\\n2 17184 \\n...\\n3 16679\\n3 24460 \\n..
\\nand so on. \\n\\nI have already to RECORD-Layouts and with DATASET I have the format UNSIGNED, STRING2000 and I think I need NORMALIZE to transform the data from the first format to the second. THe key thing is, I do not know how much commata are in my string so I can an unlimited number of entities there.\\n\\nBut reading the reference again and again I do not get the point how to achieve hat. Can somebody help me? Is there any further documenation I should check? I hate to ask a simple question like that already.\", \"post_time\": \"2017-02-09 16:24:13\" },\n\t{ \"post_id\": 15193, \"topic_id\": 3773, \"forum_id\": 8, \"post_subject\": \"Re: Submitting ECL via eclplus LDAP\", \"username\": \"iMikePayne\", \"post_text\": \"Also getting the similar error when use ecl.exe run file.ecl\", \"post_time\": \"2017-02-09 21:03:42\" },\n\t{ \"post_id\": 15163, \"topic_id\": 3773, \"forum_id\": 8, \"post_subject\": \"Submitting ECL via eclplus LDAP\", \"username\": \"iMikePayne\", \"post_text\": \"Hi,\\n\\nI am working on a cluster that has LDAP. I try to submit a workunit using eclplus and get this error:\\n\\nError: SOAP authentication error[HTTP Status 401 Unauthorized]\\n\\nI have an account on the cluster. I also include my password and username when the submitting the workunit. Is there anything internally I should to modify on the cluster?\", \"post_time\": \"2017-02-09 18:05:32\" },\n\t{ \"post_id\": 15303, \"topic_id\": 3783, \"forum_id\": 8, \"post_subject\": \"Re: Renaming a file\", \"username\": \"rtaylor\", \"post_text\": \"James,\\n\\nThis code works, but only when the target is hThor:IMPORT Std;\\nRenameFile(STRING CurrentFilename, STRING NewFilename) := FUNCTION\\n DATASET(STD.File.FsLogicalFileNameRecord) Superfiles := \\n STD.File.LogicalFileSuperOwners(CurrentFilename) : STORED('Superfiles');\\n RemoveFromSuperfiles := \\n SEQUENTIAL(Std.File.StartSuperfileTransaction(),\\n APPLY(SuperFiles, \\n STD.File.RemoveSuperfile('~' + name, CurrentFilename)),\\n Std.File.FinishSuperfileTransaction());\\n DoRename := STD.File.RenameLogicalFile(CurrentFilename, NewFilename);\\n ReplaceInSuperfiles := SEQUENTIAL(\\n Std.File.StartSuperfileTransaction(),\\n\\t APPLY(SuperFiles, \\n STD.File.AddSuperfile('~' + name, NewFilename)),\\n Std.File.FinishSuperfileTransaction());\\n DoItAll := SEQUENTIAL(OUTPUT(Superfiles,NAMED('SuperfileList')), \\n RemoveFromSuperfiles, \\n DoRename, \\n ReplaceInSuperfiles);\\n RETURN NOTHOR(DoItAll);\\nEND;
The OUTPUT(Superfiles) isn't strictly necessary, but it makes it easy to see exactly which superfiles were involved in the sub-file rename.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-02-15 16:32:39\" },\n\t{ \"post_id\": 15293, \"topic_id\": 3783, \"forum_id\": 8, \"post_subject\": \"Re: Renaming a file\", \"username\": \"james.wilson\", \"post_text\": \"Hi Richard\\n\\nGood point, I hadn't. \\n\\nHowever now I have, and got exactly the same result. Any other ideas? I guess I can save the list in a file and then read it, but that seems a very long-winded way to do it.\\n\\nThanks\\n\\nJames\", \"post_time\": \"2017-02-15 10:03:45\" },\n\t{ \"post_id\": 15263, \"topic_id\": 3783, \"forum_id\": 8, \"post_subject\": \"Re: Renaming a file\", \"username\": \"rtaylor\", \"post_text\": \"James,\\n\\nBut did you TRY using SEQUENTIAL? If so, what was the result? \\n\\nAND, since what you're doing is basically an all or nothing type of operation, why have you not used transaction framing around these operations (the StartSuperFileTransaction() and FinishSuperFileTransaction() functions)?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-02-14 20:01:32\" },\n\t{ \"post_id\": 15253, \"topic_id\": 3783, \"forum_id\": 8, \"post_subject\": \"Re: Renaming a file\", \"username\": \"james.wilson\", \"post_text\": \"Hi Richard\\n\\nThat's one of the reasons why I used ORDERED rather than SEQUENTIAL, I don't want Superfiles to be re-evaluated!\\n\\nThanks\\n\\nJames\", \"post_time\": \"2017-02-14 10:07:23\" },\n\t{ \"post_id\": 15243, \"topic_id\": 3783, \"forum_id\": 8, \"post_subject\": \"Re: Renaming a file\", \"username\": \"rtaylor\", \"post_text\": \"James,\\n\\nFrom the ORDERED docs:
If there is any chance of a shared value which may change meaning, you should use SEQUENTIAL.
Therefore, I suggest you try replacing ORDERED with SEQUENTIAL.\\n\\nHTH\\n\\nRichard\", \"post_time\": \"2017-02-13 20:30:41\" },\n\t{ \"post_id\": 15223, \"topic_id\": 3783, \"forum_id\": 8, \"post_subject\": \"Renaming a file\", \"username\": \"james.wilson\", \"post_text\": \"I'm trying to rename logical files in code, coping with the fact that it may be in one or more superfiles. I thought I could do this:\\n\\n\\nRenameFile(STRING CurrentFilename, STRING NewFilename) := FUNCTION\\n DATASET(FsLogicalFileNameRecord) Superfiles := STD.File.LogicalFileSuperOwners(CurrentFilename);\\n RemoveFromSuperfiles := APPLY(SuperFiles, STD.File.RemoveSuperfile('~' + name, CurrentFilename));\\n DoRename := STD.File.RenameLogicalFile(CurrentFilename, NewFilename);\\n ReplaceInSuperfiles := APPLY(SuperFiles, STD.File.AddSuperfile('~' + name, NewFilename));\\n RETURN NOTHOR(ORDERED(RemoveFromSuperfiles, DoRename, ReplaceInSuperfiles));\\nEND;\\n
\\n\\nHowever when I try that I get an error that it couldn't find the superfiles for CurrentFilename. When I check it has already renamed the file, so my guess is that when it comes to putting the renamed file in to the superfiles it's attempting to re-evaluate the Superfiles value. I've tried adding the INDEPENDENT workflow service to the definition of Superfiles but then get an error about a context being expected. Is there a simple way round this?\", \"post_time\": \"2017-02-13 14:22:16\" },\n\t{ \"post_id\": 15363, \"topic_id\": 3813, \"forum_id\": 8, \"post_subject\": \"Baffling: Very long runtime on thor compared to hthor\", \"username\": \"anirudh\", \"post_text\": \"Greetings!\\n\\nI have a small piece of ECL code that is taking 1.7 seconds to run on hthor while it take 38 mins to run on thor cluster (10 node cluster).\\n\\nThe log files are available at: https://www.dropbox.com/sh/yvib442myp3yih1/AAAnYpE13fe9OXS9Vs_xer_ea?dl=0\\n\\nAny pointers would be very helpful.\\n\\n\\nIMPORT ML;\\n\\nvalue_record := RECORD\\n unsigned \\trid;\\n real \\t\\tage;\\n real \\t\\theight;\\n integer1 \\tsex; // 0 = female, 1 = male\\nEND;\\n \\nd := DATASET([{1,35,149,0},{2,11,138,0},{3,12,148,1},{4,16,156,0},\\n {5,32,152,0},{6,16,157,0},{7,14,165,0},{8,8,152,1},\\n\\t {9,35,177,0},{10,33,158,1},{11,40,166,0},{12,28,165,0},\\t\\n\\t {13,23,160,0},{14,52,178,1},{15,46,169,0},{16,29,173,1},\\n\\t {17,30,172,0},{18,21,163,0},{19,21,164,0},{20,20,189,1},\\n\\t {21,34,182,1},{22,43,184,1},{23,35,174,1},{24,39,177,1},\\n\\t {25,43,183,1},{26,37,175,1},{27,32,173,1},{28,24,173,1},\\n\\t {29,20,162,0},{30,25,180,1},{31,22,173,1},{32,25,171,1}]\\n ,value_record);\\n \\nML.ToField(d,flds0);\\nf4 := PROJECT(flds0(Number=3),TRANSFORM(ML.Types.NumericField,SELF.Number := 4,SELF.Value := 1-LEFT.Value,SELF := LEFT));\\nflds1 := flds0+f4;\\nflds := ML.Discretize.ByRounding(flds1);\\nLogisticModule := ML.Classify.Logistic();\\n\\nModel3 := LogisticModule.LearnCS(flds0(Number<=2),flds(Number=3));\\nModel3;\\nModel4 := LogisticModule.LearnCS(flds0(Number<=2),flds(Number=4));\\nModel4;\\n\\nTestModule := LogisticModule.TestD(flds(Number<=2),flds(Number>=3));\\nTestModule.CrossAssignments;\\nTestModule.PrecisionByClass;\\nTestModule.Accuracy;\\n\\nLogisticModule.ClassifyC(flds0(Number<=2),Model3);\\nLogisticModule.ClassifyC(flds0(Number<=2),Model4);\\n
\", \"post_time\": \"2017-02-17 18:15:48\" },\n\t{ \"post_id\": 19153, \"topic_id\": 3823, \"forum_id\": 8, \"post_subject\": \"Re: "Memory pool exhausted" error when running PIPE command\", \"username\": \"tlhumphrey2\", \"post_text\": \"Drea, \\n\\nDid you ever found out what was causing the "memory pool exhausted" error. I'm having the same problem.\\n\\nTim\", \"post_time\": \"2017-09-28 20:24:43\" },\n\t{ \"post_id\": 15393, \"topic_id\": 3823, \"forum_id\": 8, \"post_subject\": \""Memory pool exhausted" error when running PIPE command\", \"username\": \"drealeed\", \"post_text\": \"I'm experimenting with ways to identify whether a roxie service is really truly ready to run, with all files copied over to all roxie nodes.\\n\\nI tried calling testsocket via a PIPE command with the intention of parsing the resulting xml, but get the following error:\\n\\nError: System error: 1301: Memory pool exhausted: pool id 4194304 (1216 pages) exhausted, requested 7068 active(1) heap(1/1216) (0, 0), 1301, \\n\\nThe xml coming back from the testsocket command is quite short, just a couple of dozen lines. Here's the ecl:\\n\\n EXPORT RoxieFileCopyStatus(STRING roxienode) := PIPE('testsocket ' + roxienode + ' -lock \\\\'<control:numfilestoprocess/>\\\\'',{STRING hack});\\n\\nOUTPUT(RoxieFileCopyStatus('10.173.22.201'));\\n\\n\\nAnd here's what's returned when I run the same command ssh'd to that cluster:\\n\\n[leeddx@node010241100159 ~]$ testsocket 10.173.22.201 -lock '<control:numfilestoprocess/>'\\n<Control>\\n<Lock>5</Lock><NumServers>5</NumServers></Control>\\n<Control>\\n<Endpoint ep="10.173.22.201:9876">\\n <FilesToProcess value="0"/>\\n <Status>ok</Status>\\n</Endpoint>\\n<Endpoint ep="10.173.22.203:9876">\\n <FilesToProcess value="0"/>\\n <Status>ok</Status>\\n</Endpoint>\\n<Endpoint ep="10.173.22.202:9876">\\n <FilesToProcess value="0"/>\\n <Status>ok</Status>\\n</Endpoint>\\n<Endpoint ep="10.173.22.204:9876">\\n <FilesToProcess value="0"/>\\n <Status>ok</Status>\\n</Endpoint>\\n<Endpoint ep="10.173.22.205:9876">\\n <FilesToProcess value="0"/>\\n <Status>ok</Status>\\n</Endpoint>\\n</Control>\\n\\nAny ideas what's causing that error?\", \"post_time\": \"2017-02-21 17:11:11\" },\n\t{ \"post_id\": 15533, \"topic_id\": 3873, \"forum_id\": 8, \"post_subject\": \"Re: counterintuitive subtraction of unsigned\", \"username\": \"oleg\", \"post_text\": \"Got that.\\n\\nCreated issue 17154:\\nhttps://track.hpccsystems.com/browse/HPCC-17154\", \"post_time\": \"2017-03-01 13:50:55\" },\n\t{ \"post_id\": 15523, \"topic_id\": 3873, \"forum_id\": 8, \"post_subject\": \"Re: counterintuitive subtraction of unsigned\", \"username\": \"oleg\", \"post_text\": \"Thanks, Richard!\\n\\nWhich project I need to use for the JIRA?\", \"post_time\": \"2017-03-01 12:52:43\" },\n\t{ \"post_id\": 15513, \"topic_id\": 3873, \"forum_id\": 8, \"post_subject\": \"Re: counterintuitive subtraction of unsigned\", \"username\": \"rtaylor\", \"post_text\": \"Oleg,\\n\\nInteresting. I expanded your testing a bit, like this:UNSIGNED8 T1 := 1;\\nUNSIGNED8 T2 := 2;\\nINTEGER8 dif := T1-T2;\\nINTEGER8 adif := ABS(T1-T2);\\nds := DATASET([{dif,'expecting -1'},\\n {adif,'expecting 1'},\\n {ABS(-1),'expecting 1'}],\\n {INTEGER RetVal,STRING Expecting});\\nds;\\nABS(T1-T2); //expecting 1\\nT1-T2; //expecting 18446744073709551615\\n
Bottom line is -- I suggest you submit a JIRA ticket and see what the developers have to say about it all.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-03-01 11:35:05\" },\n\t{ \"post_id\": 15503, \"topic_id\": 3873, \"forum_id\": 8, \"post_subject\": \"counterintuitive subtraction of unsigned\", \"username\": \"oleg\", \"post_text\": \"I noticed some counterintuitive result involving subtraction of unsigned values:\\n\\nUNSIGNED8 T1 := 1;\\nUNSIGNED8 T2 := 2;\\nINTEGER8 dif := T1-T2;\\nINTEGER8 adif := ABS(T1-T2);\\ndif; // expecting -1\\nadif; // expecting 1\\n
\\n\\nWhile the first result printed indeed is a negative one, the second quite surprisingly is a negative one as well.\\n\\nThe root of the problem is that arithmetic operator assumes that the result of subtraction of two unsigned would be unsigned as well.\", \"post_time\": \"2017-02-28 23:23:56\" },\n\t{ \"post_id\": 15563, \"topic_id\": 3883, \"forum_id\": 8, \"post_subject\": \"ECL Compiler issue on MacOS Sierra\", \"username\": \"ricardovinci\", \"post_text\": \"Hi everyone,\\n\\nI'm having some problems with the ECL compiler locally, basically, when I try to compile anything, it takes a really long time(1-2 minutes) to do so. I noticed this annoying problem when I was using eclipse so I decided to investigate more and I figured out that this problem is not something related to eclipse or to the eclipse plugin but maybe is something weird with the compiler on Mac. \\n\\nThis is my configuration:\\n\\nMac OS 10.12.2 \\nOpenSSL 0.9.8zh 14 Jan 2016\\neclcc --version 6.2.6 community_6.2.6-1
\\n\\nThis is how I'm testing it. I created a sample ECL in /tmp with just an output. So my ECL file looks like this:\\nOUTPUT('Hello world');
\\n\\nThen I go to /opt/HPCCSystems/6.2.6/clienttools/bin/ and run the command time ./eclcc /tmp/hello.ecl and it takes a long time to run. This is the output I get: \\n\\nLOGGING: could not open file '/opt/HPCCSystems/6.2.6/clienttools/bin/eclcc.log' for output\\nError: LOGGING: could not open file '/opt/HPCCSystems/6.2.6/clienttools/bin/eclcc.log' for output\\n\\nreal 1m0.582s\\nuser 0m0.010s\\nsys 0m0.009s\\n\\nSo it is taking 1 minute to compile just an output \\n\\nI tried other client tools version but I see the same thing, I tested the versions:\\n\\n
5.4.2 \\n5.4.4\\n5.6.8\\n6.0.12\\n6.2.6
\\n\\nA colleague at work is not using OS Sierra and he is not having any issues, this is his setup:\\n\\nOS El Capitain \\neclcc 5.4.2 \\nOpenSSL 1.0.2a 19 Mar 2015
\\n\\nDo you guys know what I'm missing? Maybe is some sort of issue with the default OpenSSL version of MacOS 10.12.2?\", \"post_time\": \"2017-03-02 16:41:09\" },\n\t{ \"post_id\": 15623, \"topic_id\": 3893, \"forum_id\": 8, \"post_subject\": \"Re: ECL and ECL Plus returning different XML\", \"username\": \"oscar.foley\", \"post_text\": \"Raised \\n\\nhttps://track.hpccsystems.com/browse/HPCC-17177\", \"post_time\": \"2017-03-06 15:29:38\" },\n\t{ \"post_id\": 15613, \"topic_id\": 3893, \"forum_id\": 8, \"post_subject\": \"Re: ECL and ECL Plus returning different XML\", \"username\": \"rtaylor\", \"post_text\": \"Oscar,\\n\\nThis should be reported in a JIRA ticket.\\n\\nRichard\", \"post_time\": \"2017-03-06 15:10:21\" },\n\t{ \"post_id\": 15603, \"topic_id\": 3893, \"forum_id\": 8, \"post_subject\": \"ECL and ECL Plus returning different XML\", \"username\": \"oscar.foley\", \"post_text\": \"I think I have found a minor bug\\nI ran a command in client tools with ecl run and got this correct xml output:\\n
<Result>\\n<Dataset name='Result 1'>\\n <Row><Result_1>ETL Start...</Result_1></Row>\\n</Dataset>\\n<Dataset name='Result 2'>\\n <Row><Result_2>ETL End...</Result_2></Row>\\n</Dataset>\\n</Result>
\\n\\nWhen later I retrieve the workunit output in xml format I get only a xml fragment that I cannot parse easily.\\n\\n<Dataset name='Result 1'>\\n <Row><Result_1>ETL Start...</Result_1></Row>\\n</Dataset>\\n<Dataset name='Result 2'>\\n <Row><Result_2>ETL End...</Result_2></Row>\\n</Dataset>\\n
\\n\\nHere is the proof:\\n[attachment=0:3dd8z3pc]Capture.PNG\\n\\nIn my opinion both (ecl and eclplus) should return a valid xml (like the first one) so the bug is that xml output in eclplus fails to wrap results in <Result>/<Result>\", \"post_time\": \"2017-03-06 14:38:58\" },\n\t{ \"post_id\": 15643, \"topic_id\": 3903, \"forum_id\": 8, \"post_subject\": \"Import ECL file folder into Eclipse ECL project\", \"username\": \"lily\", \"post_text\": \"Hi everyone,\\n\\nI am trying to run ECL code in my Mac. I downloaded Eclipse and installed the all other required the applications according to the instruction document. \\n\\nBut the thing was I dont know how to import my ecl file folder into Eclipse and successfully run it. \\nI tried to import from Git repository or from File system and then convert them into ECL project. \\nBut neither of them worked out. \\n\\nI dont' know other IDE that can run ECL code in Mac. If anyone know how to solve my problem or know any other IDE to run ECL code in Mac. PLease let me know. Thank you so much.\\n\\n\\nLily\", \"post_time\": \"2017-03-06 19:17:25\" },\n\t{ \"post_id\": 15783, \"topic_id\": 3933, \"forum_id\": 8, \"post_subject\": \"Re: Grab the WorkUnit ID\", \"username\": \"fanglimian\", \"post_text\": \"Ah thanks!! I didn't realize there is a WORKUNIT function...I only used #WORKUNIT before.(silly me )\\n\\nThanks a lot!\", \"post_time\": \"2017-03-10 14:42:13\" },\n\t{ \"post_id\": 15773, \"topic_id\": 3933, \"forum_id\": 8, \"post_subject\": \"Re: Grab the WorkUnit ID\", \"username\": \"rtaylor\", \"post_text\": \"Why not just use ECL's WORKUNIT function? You can also try pressing F1 in the ECL IDE
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-03-10 12:02:45\" },\n\t{ \"post_id\": 15743, \"topic_id\": 3933, \"forum_id\": 8, \"post_subject\": \"Grab the WorkUnit ID\", \"username\": \"fanglimian\", \"post_text\": \"Hi,\\n\\nI wonder whether there is a function in ECL that can return the current workunit ID? (I want to send myself an email with workunit ID as part of the automation effort).\\n\\nThanks!\", \"post_time\": \"2017-03-09 21:28:37\" },\n\t{ \"post_id\": 15823, \"topic_id\": 3943, \"forum_id\": 8, \"post_subject\": \"Re: PARSE for finding multiple matches in a single string\", \"username\": \"slafavor\", \"post_text\": \"This is exactly what I was looking for...\\n\\nMost of the wordiness of my example was based on the example that I was working off of in the ECL Extended PARSE examples. Your version allows for me to make the modifications necessary to allow for the MULTIPOLYGON examples on that same WKT site to be parsed correctly.\\n\\nThe WKT POLYGON has two flavors where a set of points can represent either the polygon or 1 or more donut holes in a polygon. Parenthesis are used to determine the difference.\\n\\nThe MULTIPOLYGON statement is the same way in that it is a combination of POLYGON styles. Your example with the OPT(', ') part gave me the piece that I was missing to put it all together. I've posted my code below for anyone who may want to see it below, but now I just need to take the output results and parse them to get either the polygon points or the donut hole points.\\n
\\nPATTERN number := PATTERN('[0-9]')+;\\nPATTERN point := number ' ' number OPT(', ');\\nPATTERN points := point+;\\nPATTERN setOfPoints := '(' Points ')' OPT(', ');\\nPATTERN completePolygon := '(' setOfPoints+ ')';\\n\\npolyRec := {UNSIGNED1 RecID, STRING polygonRecord};\\n\\ninfile := DATASET([\\n\\t{1,'POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))'},\\n\\t{2,'POLYGON ((35 10, 45 45, 15 40, 10 20, 35 10), (20 30, 35 35, 30 20, 20 30))'},\\n\\t{3, 'MULTIPOLYGON (((30 20, 45 40, 10 40, 30 20)), ((15 5, 40 10, 10 20, 5 10, 15 5)))'},\\n\\t{4, 'MULTIPOLYGON (((40 40, 20 45, 45 30, 40 40)), ((20 35, 10 30, 10 10, 30 5, 45 20, 20 35), (30 20, 20 15, 20 25, 30 20)))'}\\t\\t\\n], polyRec);\\n\\noutRec := RECORD\\n\\tpolyRec.RecID;\\n\\tstring recType;\\n\\tstring pointsStr;\\nEND;\\n\\noutRec XForm(polyRec L) := TRANSFORM\\n SELF.recType := STD.Str.GetNthWord(L.polygonRecord, 1);\\n SELF.pointsStr := MATCHTEXT(completePolygon);\\n SELF := L;\\nEND; \\n\\noutfile := PARSE(infile, polygonRecord, completePolygon, XForm(LEFT), ALL);\\n\\nOUTPUT (outfile);\\n
\\n\\nResults:\\n\\n1\\tPOLYGON\\t((30 10, 40 40, 20 40, 10 20, 30 10))\\n2\\tPOLYGON\\t((35 10, 45 45, 15 40, 10 20, 35 10), (20 30, 35 35, 30 20, 20 30))\\n3\\tMULTIPOLYGON\\t((30 20, 45 40, 10 40, 30 20))\\n3\\tMULTIPOLYGON\\t((15 5, 40 10, 10 20, 5 10, 15 5))\\n4\\tMULTIPOLYGON\\t((40 40, 20 45, 45 30, 40 40))\\n4\\tMULTIPOLYGON\\t((20 35, 10 30, 10 10, 30 5, 45 20, 20 35), (30 20, 20 15, 20 25, 30 20))\\n
\", \"post_time\": \"2017-03-14 16:10:42\" },\n\t{ \"post_id\": 15803, \"topic_id\": 3943, \"forum_id\": 8, \"post_subject\": \"Re: PARSE for finding multiple matches in a single string\", \"username\": \"rtaylor\", \"post_text\": \"slafavor,\\n\\nFor Programmer's, the temptation is always to over-complicate. But in ECL, simpler is usually better. \\n\\nHere's my take on your code:
PATTERN number := PATTERN('[0-9]')+;\\nPATTERN point := number ' ' number OPT(', ');\\nPATTERN points := Point+;\\nPATTERN setOfPoints := '(' Points ')';\\n\\npolyRec := {string polygonRecord};\\n\\ninfile := DATASET([\\n{'POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))'},\\n{'POLYGON ((35 10, 45 45, 15 40, 10 20, 35 10), (20 30, 35 35, 30 20, 20 30))'}\\n], polyRec);\\n\\noutfile := PARSE(infile, polygonRecord, setOfPoints, \\n {string Matches := MATCHTEXT(setOfPoints/Points)}, ALL);\\n\\nOUTPUT (outfile);
This produces these three records:\\n30 10, 40 40, 20 40, 10 20, 30 10\\n35 10, 45 45, 15 40, 10 20, 35 10\\n20 30, 35 35, 30 20, 20 30\\n
I think that's what you wanted, wasn't it?\\n\\nBut here's an extrapolation to keep track of which input record each set of points came from (this one uses a TRANSFORM):PATTERN number := PATTERN('[0-9]')+;\\nPATTERN point := number ' ' number OPT(', ');\\nPATTERN points := Point+;\\nPATTERN setOfPoints := '(' Points ')';\\n\\npolyRec := {UNSIGNED1 RecID,string polygonRecord};\\n\\ninfile := DATASET([\\n{1,'POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))'},\\n{2,'POLYGON ((35 10, 45 45, 15 40, 10 20, 35 10), (20 30, 35 35, 30 20, 20 30))'}\\n], polyRec);\\n\\npolyRec XF(infile L) := TRANSFORM\\n SELF.RecID := L. RecID;\\n SELF.polygonRecord := MATCHTEXT(setOfPoints/Points);\\nEND; \\n\\noutfile := PARSE(infile, polygonRecord, setOfPoints, \\n XF(LEFT), ALL);\\n\\nOUTPUT (outfile);
which produces these records:1\\t30 10, 40 40, 20 40, 10 20, 30 10\\n2\\t35 10, 45 45, 15 40, 10 20, 35 10\\n2\\t20 30, 35 35, 30 20, 20 30\\n
\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-03-14 06:24:31\" },\n\t{ \"post_id\": 15793, \"topic_id\": 3943, \"forum_id\": 8, \"post_subject\": \"PARSE for finding multiple matches in a single string\", \"username\": \"slafavor\", \"post_text\": \"I am trying to come up with an ECL way of parsing the POLYGON lines from WKT (definition found here:https://en.wikipedia.org/wiki/Well-known_text\\n\\n'POLYGON ((35 10, 45 45, 15 40, 10 20, 35 10), (20 30, 35 35, 30 20, 20 30))'\\n\\nSo I have developed the following code:\\n\\n\\nPATTERN number := PATTERN('[0-9]')+;\\nPATTERN ws := ' ';\\nPATTERN openParen := '(';\\nPATTERN closeParen := ')';\\nPATTERN comma := ',';\\nPATTERN commaSeparator := comma ws;\\nPATTERN point := number ws number;\\nPATTERN nextPoint := commaSeparator point;\\nPATTERN allPolyPoints := point nextPoint+;\\nPATTERN setOfPoints := openParen allPolyPoints closeParen;\\nPATTERN nextSetOfPoints := commaSeparator setOfPoints;\\nPATTERN polygon := 'POLYGON' ws openParen setOfPoints nextSetOfPoints* closeParen;\\n\\npolyRec := {string polygonRecord};\\n\\ninfile := DATASET([\\n{'POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))'},\\n{'POLYGON ((35 10, 45 45, 15 40, 10 20, 35 10), (20 30, 35 35, 30 20, 20 30))'}\\n], polyRec);\\n\\noutfile := PARSE(infile, polygonRecord, polygon, {string Matches := MATCHTEXT(allPolyPoints)}, ALL);\\n\\nOUTPUT (outfile);\\n
\\n\\nNote that the number of points in each grouping can be variable (but always at least more than one) as well as the number of groupings in each polygon statement (always at least one).\\n\\nI would expect to get one record for the first line, and two for the second, but I am only getting one record per line that contains the first grouping for each POLYGON line.\\n\\nThat would be my first question. I've tried TRANSFORMS (which does not support COUNTER so I don't know which one to read), but I think this would be a good start.\", \"post_time\": \"2017-03-10 21:38:24\" },\n\t{ \"post_id\": 15933, \"topic_id\": 3963, \"forum_id\": 8, \"post_subject\": \"Re: How do I write ecl for this roxie query?\", \"username\": \"tlhumphrey2\", \"post_text\": \"I made an error in my ecl soapcall, i.e. I had the following as the server's url: 'http://http://52.14.86.34:9876/roxie
. But, what ran forever didn't have 'http://' in the url twice.\\n\\nI did get the ecl to work by using the private ip instead of the public ip.\", \"post_time\": \"2017-03-21 20:37:08\" },\n\t{ \"post_id\": 15923, \"topic_id\": 3963, \"forum_id\": 8, \"post_subject\": \"How do I write ecl for this roxie query?\", \"username\": \"tlhumphrey2\", \"post_text\": \"I can put the following into my browser and get back valid responses from my roxie query: http://52.14.86.34:9876/roxie/SimpleRoxieQuery.\\nHere is what is returned:<SimpleRoxieQueryResponse xmlns="urn:hpccsystems:ecl:simpleroxiequery" sequence="0">\\n <Results>\\n <Result>\\n <Dataset xmlns="urn:hpccsystems:ecl:simpleroxiequery:result:result_1" name="Result 1">\\n <Row>\\n <ltr>T</ltr>\\n <recid>44298</recid>\\n <recptr>398673</recptr>\\n </Row>\\n </Dataset>\\n </Result>\\n </Results>\\n</SimpleRoxieQueryResponse>
\\n\\nBut, I can't seem to write an ecl soapcall that works. Here is my last try which runs forever:\\nrec := RECORD\\nSTRING ltr;\\nUNSIGNED recid;\\nUNSIGNED recptr;\\nEND;\\nip := 'http://http://52.14.86.34:9876/roxie'; \\nsvc:= 'SimpleRoxieQuery';\\ns:=SOAPCALL(ip, svc,{name:='t'},rec);\\nOUTPUT(s);
\\n\\nAny suggestions?\", \"post_time\": \"2017-03-21 20:05:53\" },\n\t{ \"post_id\": 15943, \"topic_id\": 3973, \"forum_id\": 8, \"post_subject\": \"GetDFUWorkunits SOAP call fails while calling from ECL\", \"username\": \"balajisampath\", \"post_text\": \"Can someone help me with this.I am getting error while calling from ECL but works fine while calling from R.\\n\\nCode,error message and version below:\\n\\n\\nSTRING URL := 'http://192.168.56.101:8010/FileSpray/';\\n\\n\\nrWUQuery\\t:= \\tRECORD\\n\\t\\n\\tSTRING \\tWuid{XPATH('Wuid'),MAXLENGTH(20)} \\t\\t:=\\t'';\\n\\tSTRING \\tOwner{XPATH('Owner'),MAXLENGTH(30)} \\t\\t:=\\t'';\\n\\tSTRING \\tCluster{XPATH('Cluster'),MAXLENGTH(30)} \\t:=\\t'';\\n\\tSTRING \\tStateReq{XPATH('StateReq'),MAXLENGTH(40)} \\t\\t:= \\t'';\\n\\tSTRING \\tType{XPATH('Type'),MAXLENGTH(40)} \\t\\t:= \\t'';\\n\\tSTRING \\tJobname{XPATH('Jobname'),MAXLENGTH(30)} \\t:=\\t'';\\n UNSIGNED PageSize{XPATH('PageSize')} := 2147483647;\\n\\tUNSIGNED CurrentPage{XPATH('CurrentPage')} := 32716;\\n\\tINTEGER PageStartFrom{XPATH('PageStartFrom')} := -1;\\n\\tSTRING \\tSortby{XPATH('Sortby'),MAXLENGTH(30)} \\t:=\\t'';\\n\\tBOOLEAN \\tDescending{XPATH('Descending'),MAXLENGTH(30)} \\t:=\\tFALSE;\\n\\tINTEGER CacheHint{XPATH('CacheHint')} := -1;\\n\\nEND;\\n\\nrEspException\\t:= \\tRECORD\\n\\t\\tSTRING\\t\\tCode{XPATH('Code'),maxlength(10)};\\n\\t\\tSTRING\\t\\tAudience{XPATH('Audience'),maxlength(50)};\\n\\t\\tSTRING\\t\\tSource{XPATH('Source'),maxlength(30)};\\n\\t\\tSTRING\\t\\tMessage{XPATH('Message'),maxlength(200)};\\nEND;\\n\\nrDFUWorkunit :=\\t\\tRECORD\\n\\tSTRING \\t\\tWorkunits{XPATH('Wuid'),maxlength(10)};\\n\\tSTRING \\t\\tOwner{XPATH('Owner'),maxlength(50)};\\n\\tSTRING \\t\\tJobname{XPATH('Jobname'),maxlength(30)};\\n\\tSTRING \\t\\tCluster{XPATH('Cluster'),maxlength(200)};\\nEND;\\n\\n\\nrWUQueryResponse\\t:= RECORD\\n\\tDATASET(rESPException)\\t\\tExceptions{XPATH('Exceptions/ESPException'),maxcount(110)};\\n\\tDATASET(rDFUWorkunit)\\t\\tDFUWorkunit{XPATH('results/DFUWorkunit'),maxcount(110)};\\nEND;\\n\\n\\n\\nDATASET(rWUQueryResponse) dWUSubmitResult\\t:=\\tSOAPCALL(URL,\\n\\t\\t\\t\\t 'GetDFUWorkunits',\\n\\t\\t\\t\\t rWUQuery,\\n\\t\\t\\t\\t DATASET(rWUQueryResponse),\\n\\t\\t\\t\\t XPATH('GetDFUWorkunitResponse')\\n\\t\\t\\t\\t);\\n\\n\\ndWUSubmitResult;\\n
\\n\\n\\nError: System error: -1: <Error><text>HTTP error (500) in processQuery</text><soapresponse><?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/" xmlns:wsse="http://schemas.xmlsoap.org/ws/2002/04/secext"><soap:Body><soap:Fault><faultcode>400</faultcode><faultstring>[400: Bad Request [Method GetDFUWorkunitsRequest not available in service FileSpray]] </faultstring><faultactor>Esp</faultactor><detail><Exceptions xmlns="urn:hpccsystems:ws:filespray" xsi:schemaLocation="urn:hpccsystems:ws:filespray http://192.168.56.101:8010/FileSpray/?xsd"><Source>Esp</Source><Exception><Code>400</Code><Audience>user</Audience><Message>Bad Request [Method GetDFUWorkunitsRequest not available in service FileSpray]</Message></Exception></Exceptions></detail></soap:Fault></soap:Body></soap:Envelope></soapresponse><url>http://192.168.56.101:8010/FileSpray</url></Error> (in SOAP dataset G1 E2) (0, 0), -1, \\n
\", \"post_time\": \"2017-03-22 14:46:55\" },\n\t{ \"post_id\": 16173, \"topic_id\": 3983, \"forum_id\": 8, \"post_subject\": \"Re: How to iterate a SET for multiple fetches and joins?\", \"username\": \"rtaylor\", \"post_text\": \"ome,\\n\\nTry it this way:IMPORT ome;\\nSetNeedleWords := [33795, 24460, 77807];\\nFetchByWords := FETCH(ome.DataDocWordList, \\n ome.IdxDocWordList(Word IN SetNeedleWords), \\n RIGHT.fpos);\\nOUTPUT(FetchByWords , NAMED('Docs with SetNeedleWords'));
\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-04-09 23:28:43\" },\n\t{ \"post_id\": 16013, \"topic_id\": 3983, \"forum_id\": 8, \"post_subject\": \"Re: How to iterate a SET for multiple fetches and joins?\", \"username\": \"ome\", \"post_text\": \"rtaylor,\\n\\nthank you very much for your solution. It worked like a charm and I learned a lot about ECL using it.\\n\\nNevertheless, is there to use the different FETCH's and JOIN's and really to loop/iterate through my SET? Maybe, that looks counter-intuitive to an real ECL-developer, and may not be really declarative, but I need it this way.\\n\\nSorry to bother you again and thank you so much in advance.\", \"post_time\": \"2017-03-27 05:43:24\" },\n\t{ \"post_id\": 15983, \"topic_id\": 3983, \"forum_id\": 8, \"post_subject\": \"Re: How to iterate a SET for multiple fetches and joins?\", \"username\": \"rtaylor\", \"post_text\": \"ome,\\n\\nThis one uses and INDEX:rec := {UNSIGNED docid, UNSIGNED word};\\nds := DATASET([{1,1},{1,3},{1,2},{2,2},{2,3},{3,1},{3,3}],rec);\\nidx1 := INDEX(ds,{word},{docid},'~RTTEST::IDX::WordSerachKey');\\nBld := BUILD(idx1,overwrite);\\nSetWords1 := [1,2];\\nSetWords2 := [3,2];\\n\\nFindDocs(DATASET(RECORDOF(idx1)) idx,SET S) := FUNCTION\\n Candidates := idx(KEYED(word IN S));\\n Finalists := TABLE(Candidates,{Docid, cnt := COUNT(GROUP)},docid)(cnt = COUNT(S));\\n RETURN TABLE(Finalists,{docID});\\nEND; \\nSEQUENTIAL(Bld,OUTPUT(FindDocs(idx1,SetWords1)),OUTPUT(FindDocs(idx1,SetWords2)));
\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-03-23 23:55:50\" },\n\t{ \"post_id\": 15973, \"topic_id\": 3983, \"forum_id\": 8, \"post_subject\": \"Re: How to iterate a SET for multiple fetches and joins?\", \"username\": \"ome\", \"post_text\": \"rtaylor,\\n\\nthank you very much for your answer. I will test it. But as I can see, the index structure is not used, I created (ome.IdxDocWordList). \\n\\nI need to use my index, how can I do that using your approach?\", \"post_time\": \"2017-03-23 19:28:14\" },\n\t{ \"post_id\": 15963, \"topic_id\": 3983, \"forum_id\": 8, \"post_subject\": \"Re: How to iterate a SET for multiple fetches and joins?\", \"username\": \"rtaylor\", \"post_text\": \"ome,\\n\\nHere's how I would do it:rec := {UNSIGNED docid, UNSIGNED word};\\nds1 := DATASET([{1,1},{1,2},{2,2},{2,3},{3,1},{3,3}],rec);\\nds2 := DATASET([{1,3},{1,2},{2,2},{2,3},{3,1},{3,3}],rec);\\n\\nSetWords1 := [1,2];\\nSetWords2 := [3,2];\\n\\nFindDocs(DATASET(rec) DS,SET S) := FUNCTION\\n Candidates := DS(word IN S);\\n Finalists := TABLE(Candidates,{Docid, cnt := COUNT(GROUP)},docid)(cnt = COUNT(S));\\n RETURN TABLE(Finalists,{docID});\\nEND;\\t\\n\\t\\nFindDocs(ds1,SetWords1);\\t\\nFindDocs(ds2,SetWords2);
The key here is the FindDocs function that simply makes use of a crosstab report to get the count of results for each doc and only includes the ones that have all the words in the set as the Finalists, from which just the winning DocIDs are returned.\\n\\nWhen you run this code, the first call returns only the DocID 1, while the second returns both 1 and 2 (since both docs 1 & 2 have words 2 & 3 present).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-03-23 01:17:12\" },\n\t{ \"post_id\": 15953, \"topic_id\": 3983, \"forum_id\": 8, \"post_subject\": \"How to iterate a SET for multiple fetches and joins?\", \"username\": \"ome\", \"post_text\": \"My data-structure is {UNSIGNED docid, UNSIGNED word}. My problem is: which docid have the same given (!) words? \\n\\nExample: (d1, w1), (d1, w2), (d2, w2), (d2, w3). My input is: (w1, w2). The expected result is: (d1). d2 is not a part of the result set, because I have no tuple with (d2, w1) (but with w2).\\n\\nTHe following code works as expected:\\n\\n\\nIMPORT ome;\\nSetNeedleWords := [33795, 24460, 77807];\\nFetchByW1 := FETCH(ome.DataDocWordList, \\n ome.IdxDocWordList(Word=SetNeedleWords[1]), \\n RIGHT.fpos);\\n\\nFetchByW2 := FETCH(ome.DataDocWordList, \\n ome.IdxDocWordList(Word=SetNeedleWords[2]), \\n RIGHT.fpos);\\n\\nFetchByW3 := FETCH(ome.DataDocWordList, \\n ome.IdxDocWordList(Word=SetNeedleWords[3]), \\n RIGHT.fpos);\\n\\nstep1 := JOIN(FetchByW1, FetchByW2, LEFT.documentid=RIGHT.documentID);\\nstep2 := JOIN(step1, FetchByW3, LEFT.documentid=RIGHT.documentID);\\n\\nOUTPUT(step2, NAMED('Docs with SetNeedleWords'));\\n
.\\n\\nObviously, this code does not scale. Intuitively I would iterate the SetNeedleWords and perform on each iteration a JOIN (currently in step) so my result set will shrink and after the last element I would love to output the result. But I have no idea (a) how to iterate the SET and how to shrink the resultset using multiple fetches and joins.\\n\\nHow can I achieve that?\", \"post_time\": \"2017-03-22 16:22:24\" },\n\t{ \"post_id\": 16113, \"topic_id\": 4033, \"forum_id\": 8, \"post_subject\": \"Re: Cannot use an abstract MODULE in this context\", \"username\": \"bforeman\", \"post_text\": \"Hi Steve,\\n\\nDid you try:\\n\\nmod := if(true,OUTPUT(mod001),OUTPUT(mod002));\\nmod;
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2017-04-05 13:21:04\" },\n\t{ \"post_id\": 16053, \"topic_id\": 4033, \"forum_id\": 8, \"post_subject\": \"Cannot use an abstract MODULE in this context\", \"username\": \"slafavor\", \"post_text\": \"I have an issue trying to modify elements of an interface inside of a MODULE structure that I believe I have boiled down to the snippet of code below.\\n\\n\\niFace := INTERFACE\\n\\tEXPORT string1 stringOne := 'a';\\n\\tEXPORT string1 stringTwo := 'b';\\n\\tEXPORT boolean checkThis := TRUE;\\n\\tEND;\\n\\nmod001 := MODULE(iFace ) END;\\n\\nmod002 := MODULE(iFace )\\n\\tEXPORT checkThis := FALSE;\\nEND;\\n\\nmod := if(true,mod001,mod002);\\n
\\n\\nIf I put an OUTPUT(mod001) line next, it works - I get three outputs: checkThis (true), stringOne, and stringTwo.\\n\\nIf I put an OUTPUT(mod002) line next, it works - I get three outputs: checkThis (false), stringOne, and stringTwo.\\n\\nIf I put an OUTPUT(mod) line next, I get the error of "Cannot use an abstract MODULE in this context"\\n\\nIt's definitely something in the IF statement. I've tried projecting mod onto iFace, but still get the same error.\\n\\nClient Version: community_6.2.0-1\\nServer Version: internal_6.2.6-1\\n\\nAny ideas?\\n\\n*Steve*\", \"post_time\": \"2017-03-27 19:03:21\" },\n\t{ \"post_id\": 16313, \"topic_id\": 4063, \"forum_id\": 8, \"post_subject\": \"Re: MAP gives different result from IF and CASE\", \"username\": \"NP\", \"post_text\": \"Richard, \\n\\n[quote="rtaylor":30qmr3l5]NP,I would've hoped that CASE would produce the same result as MAP, given the same input data. Doesn't seem to be the case.
I agree that MAP and CASE should operate the same in this case. Can you please submit a JIRA ticket for this?\\n\\nThanks,\\n\\nRichard\\n\\nWhile trying to create the JIRA ticket, I've given it some more thought and I've come to think that CASE is just smart enough to use Uni comparison based on the param value, while MAP only did what I told it to - which is to use "=". I still think there is a problem with MAP, cause "=" works fine in IF, and then I found some more inconsistencies with MAP. I've detailed them all in the ticket: https://track.hpccsystems.com/browse/HPCC-17390. \\n\\nThanks for your help,\\n\\nNemanja\", \"post_time\": \"2017-04-11 18:31:32\" },\n\t{ \"post_id\": 16183, \"topic_id\": 4063, \"forum_id\": 8, \"post_subject\": \"Re: MAP gives different result from IF and CASE\", \"username\": \"rtaylor\", \"post_text\": \"NP,I would've hoped that CASE would produce the same result as MAP, given the same input data. Doesn't seem to be the case.
I agree that MAP and CASE should operate the same in this case. Can you please submit a JIRA ticket for this?\\n\\nThanks,\\n\\nRichard\", \"post_time\": \"2017-04-09 23:34:46\" },\n\t{ \"post_id\": 16143, \"topic_id\": 4063, \"forum_id\": 8, \"post_subject\": \"Re: MAP gives different result from IF and CASE\", \"username\": \"NP\", \"post_text\": \"Thanks Bob. You are right, it's something in the data, I just can't figure out what. Since I posted this, I've tried some other things and it seems that if I use Std.Uni.CompareAtStrength instead of "=" to compare the Unicode strings, it then works as expected. However, just for consistency, I would've hoped that CASE would produce the same result as MAP, given the same input data. Doesn't seem to be the case. \\n\\nThanks again.\", \"post_time\": \"2017-04-05 14:42:53\" },\n\t{ \"post_id\": 16123, \"topic_id\": 4063, \"forum_id\": 8, \"post_subject\": \"Re: MAP gives different result from IF and CASE\", \"username\": \"bforeman\", \"post_text\": \"It;s got to be something with the field information coming in. I modified your code and I am getting the expected result:\\n\\n // IMPORT Util.Files;\\n\\n FilePath := 'MyTestFile';\\n FileLayout := { utf8 MyValue };\\n // MyFile := Files.LoadCsvFile(FilePath, FileLayout);\\n\\n x := u'Permanent';\\n y := u'Permanent'; //MyFile[1].MyValue;\\n\\n MapUsingMap(UTF8 myValue) := FUNCTION\\n RETURN MAP(\\n myValue = U'Permanent' => 2,\\n myValue = U'Fixed term contract' => 3,\\n myValue = U'Fixed Term' => 3,\\n 1\\n );\\n END;\\n \\n MapUsingCase(UTF8 myValue) := FUNCTION\\n RETURN CASE(myValue,\\n U'Permanent' => 2,\\n U'Fixed term contract' => 3,\\n U'Fixed Term' => 3,\\n 1\\n );\\n END;\\n \\n x = y; // true\\n IF(x = U'Permanent', 2, 1); // 2\\n IF(y = U'Permanent', 2, 1); // 2\\n MapUsingCase(x); // 2\\n MapUsingCase(y); // 2\\n MapUsingMap(x); // 2\\n // so, by now I'm pretty sure that mapping y should produce 2 as the result\\n MapUsingMap(y); // 1 - how?\\n
\\n\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2017-04-05 13:29:41\" },\n\t{ \"post_id\": 16103, \"topic_id\": 4063, \"forum_id\": 8, \"post_subject\": \"MAP gives different result from IF and CASE\", \"username\": \"NP\", \"post_text\": \"Hi,\\n\\nI am trying to use Map and I am getting a strange result - please see the code\\n\\n\\nIMPORT Util.Files;\\n\\nFilePath := 'MyTestFile';\\nFileLayout := { utf8 MyValue };\\nMyFile := Files.LoadCsvFile(FilePath, FileLayout);\\n\\nx := u'Permanent';\\ny := MyFile[1].MyValue; \\n\\nMapUsingMap(UTF8 myValue) := FUNCTION\\n RETURN MAP(\\n myValue = U'Permanent' => 2,\\n myValue = U'Fixed term contract' => 3,\\n myValue = U'Fixed Term' => 3,\\n 1\\n );\\nEND;\\n \\nMapUsingCase(UTF8 myValue) := FUNCTION\\n RETURN CASE(myValue, \\n U'Permanent' => 2, \\n U'Fixed term contract' => 3, \\n U'Fixed Term' => 3, \\n 1\\n );\\nEND;\\n \\nx = y; // true \\nIF(x = U'Permanent', 2, 1); // 2\\nIF(y = U'Permanent', 2, 1); // 2\\nMapUsingCase(x); // 2\\nMapUsingCase(y); // 2\\nMapUsingMap(x); // 2\\n// so, by now I'm pretty sure that mapping y should produce 2 as the result\\nMapUsingMap(y); // 1 - how?\\n
\\n\\nWhy does the MAP function return a different value for x and y when IF and CASE return the same?\\n\\nThanks\", \"post_time\": \"2017-03-31 12:50:36\" },\n\t{ \"post_id\": 18963, \"topic_id\": 4093, \"forum_id\": 8, \"post_subject\": \"Re: Compiling ECL code without specifying it's Module name.\", \"username\": \"bforeman\", \"post_text\": \"SALT versions as recent as 3.7.1 still depend on this compiler option, though.
\\n\\nFor now As Richard said, this too will probably be deprecated in the near future.\\n\\nWhen I get ready to do any SALT work, I have a custom SALT configuration in the ECL IDE that uses the -legacy flag, for all other work, the -legacy flag is removed.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2017-09-22 14:23:18\" },\n\t{ \"post_id\": 18933, \"topic_id\": 4093, \"forum_id\": 8, \"post_subject\": \"Re: Compiling ECL code without specifying it's Module name.\", \"username\": \"RoqScheer\", \"post_text\": \"SALT versions as recent as 3.7.1 still depend on this compiler option, though.\", \"post_time\": \"2017-09-22 14:11:09\" },\n\t{ \"post_id\": 18913, \"topic_id\": 4093, \"forum_id\": 8, \"post_subject\": \"Re: Compiling ECL code without specifying it's Module name.\", \"username\": \"rtaylor\", \"post_text\": \"RoqScheer,
The compiler supports a "-legacy" parameter
That -legacy switch is meant only to support allowing pre-Open Source, central ECL Repository code to continue to function with newer Open Source builds. \\n\\nThe move to Open Source happened 6.5 years ago. That -legacy feature was added to allow a gradual migration from the pre-Open Source IMPORT code requirements (much looser due to the assumption of the presence of a central ECL code Repository) to the new Open Source requirements (where explicit IMPORT and full qualification is always required because there is no central Repository). \\n\\nPrior to going Open Source, the HPCC platform was primarily an internal LNRS tool. LNRS migration to the new Open Source style (no central Repository) is in progress, so you can expect that this "feature' will eventually be deprecated. IOW, this is NOT a "shortcut" that you should rely on always being there to allow you to not have to type a few extra characters. \\n\\nFor all new code on a non-central Repository system, you should write ECL that follows the Open Source rules. We have been teaching that style of coding in all our ECL classes since the first day we went Open Source. You should NOT be using that -legacy switch on any environment that doesn't still have a central ECL code Repository.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-09-22 14:01:31\" },\n\t{ \"post_id\": 18893, \"topic_id\": 4093, \"forum_id\": 8, \"post_subject\": \"Re: Compiling ECL code without specifying it's Module name.\", \"username\": \"RoqScheer\", \"post_text\": \"The compiler supports a "-legacy" parameter on the command line. When using the ECL IDE, this parameter can be added in the "Arguments" field under the "Compiler" tab of the "Preferences" dialog box.\\n\\nThis parameter forces the compiler to look for definitions in the same folder without the need to explicitly IMPORT and fully qualify them. This helps to run legacy code unmodified.\", \"post_time\": \"2017-09-21 14:36:30\" },\n\t{ \"post_id\": 16353, \"topic_id\": 4093, \"forum_id\": 8, \"post_subject\": \"Re: Compiling ECL code without specifying it's Module name.\", \"username\": \"akhileshbadhri\", \"post_text\": \"Thanks a lot Bob. This information helped a lot.\", \"post_time\": \"2017-04-12 06:35:27\" },\n\t{ \"post_id\": 16303, \"topic_id\": 4093, \"forum_id\": 8, \"post_subject\": \"Re: Compiling ECL code without specifying it's Module name.\", \"username\": \"bforeman\", \"post_text\": \"You may be referring to a feature in the repository before HPCC went open source that allowed this, but the Default folder in Open Source repositories is no longer valid and you MUST EXPLICITLY IMPORT a folder if you want to reference anything EXPORTed.\\n\\nThere is also this:\\n\\nIMPORT * FROM <foldername>;\\n
\\nWhich removes the need to reference the folder, but development has told us that this usage will soon be deprecated, so I do not recommend using it. \\n\\nBob\", \"post_time\": \"2017-04-11 17:19:53\" },\n\t{ \"post_id\": 16273, \"topic_id\": 4093, \"forum_id\": 8, \"post_subject\": \"Re: Compiling ECL code without specifying it's Module name.\", \"username\": \"akhileshbadhri\", \"post_text\": \"Thank you Bob.\\nBut in our current ECL repository, I do find most of the files being referenced in other files, without having the parent folder name prefixed to them. Is there some configuration with some environments / ECL IDE that helps in referencing files without being prefixed with their folder name ?\\n\\nThanks and regards,\\nAkhilesh.\", \"post_time\": \"2017-04-11 12:53:27\" },\n\t{ \"post_id\": 16263, \"topic_id\": 4093, \"forum_id\": 8, \"post_subject\": \"Re: Compiling ECL code without specifying it's Module name.\", \"username\": \"bforeman\", \"post_text\": \"Hi Akhilesh,\\n\\nIn short, yes!\\n\\nAll EXPORTed definitions need to be explicitly IMPORTed, even if they are in the same folder.\\n\\nYou can also use the $ shortcut when referencing EXPORTed definitions in the same folder:\\n\\nIMPORT $;\\n$.MyFile1;
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2017-04-11 12:48:41\" },\n\t{ \"post_id\": 16193, \"topic_id\": 4093, \"forum_id\": 8, \"post_subject\": \"Compiling ECL code without specifying it's Module name.\", \"username\": \"akhileshbadhri\", \"post_text\": \"Hello,\\n\\nI have the following structure of files in my ECL repository.\\nModuleName :- MyModule\\nFiles within the Module:-\\n1. MyFile1\\n2. MyFile2\\n\\nI am referencing MyFile1 within MyFile2. Since both these files belong to the same Module (folder within my ECL repository), I did not refer MyFile1 in MyFile2 like "MyModule.MyFile1". This approach results in error that MyFile1 could not be found.\\n\\nNow when I import MyModule within MyFile2 and then call MyFile1 like "MyModule.MyFile1", then the ECL code in MyFile2 gets compiled.\\n\\nIs it always necessary to Import the module name within files belonging to the same Module ?\\n\\nThanks and regards,\\nAkhilesh.\", \"post_time\": \"2017-04-10 11:55:46\" },\n\t{ \"post_id\": 16393, \"topic_id\": 4113, \"forum_id\": 8, \"post_subject\": \"Re: SendEmail does not work on AWS\", \"username\": \"rtaylor\", \"post_text\": \"Tim,\\n\\nThat error looks to me like you're getting to the email server but it wants username/password. Looking at the SendMail() docs I don't see a way to provide them. \\n\\nI suggest raising a JIRA for this issue to either add those parameters or improve the docs as to where/how to include them in the existing set.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-04-12 18:35:44\" },\n\t{ \"post_id\": 16253, \"topic_id\": 4113, \"forum_id\": 8, \"post_subject\": \"Re: SendEmail does not work on AWS\", \"username\": \"tlhumphrey2\", \"post_text\": \"I didn't finish my previous post.\\n\\nI get the above mentioned error message with a lot of different email servers -- NOT all but a lot. I'm running this in us-east-1 and therefore also tried the aws email server "email-smtp.us-east-1.amazonaws.com". Using this email server, I get the following error message: Negative reply from mail server at email-smtp.us-east-1.amazonaws.com:25 after writing MAIL FROM:<timothy.humphrey@lexisnexis.com> in SendEmail*: 530 Authentication required
\", \"post_time\": \"2017-04-10 18:58:32\" },\n\t{ \"post_id\": 16243, \"topic_id\": 4113, \"forum_id\": 8, \"post_subject\": \"SendEmail does not work on AWS\", \"username\": \"tlhumphrey2\", \"post_text\": \"Here is my ecl: IMPORT STD;\\nSTD.System.Email.SendEmail(\\n 'timothy.humphrey@lexisnexis.com',\\n 'Test subject line. 170410',\\n 'Test body line170410',\\n\\t'appmail.risk.regn.net',\\n\\t,\\n\\t'timothy.humphrey@lexisnexis.com'\\n);\\n
\\n\\nI get the following error:\\nSystem error: 0: Could not resolve mail server address appmail.risk.regn.net in SendEmail*
\", \"post_time\": \"2017-04-10 18:52:50\" },\n\t{ \"post_id\": 16293, \"topic_id\": 4123, \"forum_id\": 8, \"post_subject\": \"Re: HASHMD5 with Nested Dataset !!\", \"username\": \"bforeman\", \"post_text\": \"The documentation for HASHMD5 states:\\n\\nUnlike other hashing functions, trailing spaces are NOT trimmed before the value is calculated.\\n
\\n\\nSo since you are hashing an entire dataset, it could be that the algorithm is incorrect for the entire dataset.\\n\\nWhen I tried it on a single record:\\n\\nHASHMD5(personDataset[1]);\\nHASHMD5(personDataset_new[1]);
\\n\\nThe hash values are indeed different as expected.\\n\\nI would recommend that you please log this anomaly to the Issue Tracker System:\\n\\nhttps://track.hpccsystems.com/secure/Dashboard.jspa\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2017-04-11 15:26:40\" },\n\t{ \"post_id\": 16283, \"topic_id\": 4123, \"forum_id\": 8, \"post_subject\": \"HASHMD5 with Nested Dataset !!\", \"username\": \"YASHAS RATTEHALLI\", \"post_text\": \"We have a use case where we need to generate HASHMD5 on a Nested Dataset. However, We noticed a weird behaviour when we tried to do this !\\n\\nI’m posting a sample code below to reproduce the behaviour. \\n\\n// ------------------------- CODE STARTS HERE ----------------------------------\\nnameRecord := {STRING20 lname,STRING10 fname,STRING1 initial := ''};\\n\\npersonRecord := RECORD\\n nameRecord primary;\\n nameRecord mother;\\n nameRecord father;\\nEND;\\n\\npersonDataset := DATASET([{{'James','Walters','C'},\\n {'Jessie','Blenger'},\\n {'Horatio','Walters'}},\\n {{'Anne','Winston'},\\n {'Sant','Aclause'},\\n {'Elfin','And'}}], personRecord);\\n \\npersonDataset_new := DATASET([{{'Jimmy','Walters','C'},\\n {'Jessie','Blenger'},\\n {'Horatio','Walters'}},\\n {{'Anne','Winston'},\\n {'Sant','Aclause'},\\n {'Elfin','And'}}], personRecord);\\n \\nHASHMD5(personDataset);\\nHASHMD5(personDataset_new);\\nHASH64(personDataset);\\nHASH64(personDataset_new);\\nHASH32(personDataset);\\nHASH32(personDataset_new);\\nHASH(personDataset);\\nHASH(personDataset_new);\\n\\n// ------------------------- CODE ENDS HERE ------------------------------------\\n\\nThough HASHMD5 does accept Nested Datasets, The value which is generated is same for both “personDataset” & “personDataset_new”. Why is this happening ?\\n\\nAlso, HASH64 does seem to work just fine ! So for now we can use HASH64 instead of HASHMD5, However the question is how reliable is HASH64 with Nested Datasets give the fact that HASHMD5 doesn’t seem to work well ?\", \"post_time\": \"2017-04-11 14:49:59\" },\n\t{ \"post_id\": 16433, \"topic_id\": 4143, \"forum_id\": 8, \"post_subject\": \"Re: foreign dataset from AWS to VMWare\", \"username\": \"rtaylor\", \"post_text\": \"Zahir,\\n\\nI suggest you submit a JIRA ticket for this issue.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-04-12 21:15:12\" },\n\t{ \"post_id\": 16363, \"topic_id\": 4143, \"forum_id\": 8, \"post_subject\": \"foreign dataset from AWS to VMWare\", \"username\": \"zhezouat\", \"post_text\": \"Hello,\\n\\nI have got this issue when I use foreign between aws HPCC cluster and vmware HPCC cluster.\\n\\nAWS HPCC Cluster\\nI have these file parts : \\n\\nPart\\nCopy\\nIP\\nCluster\\nSize\\nActual Size\\n1\\t1\\t10.52.96.188\\t__anon12\\t51,007\\t\\n2\\t1\\t10.52.96.191\\t__anon12\\t7,911\\t\\n3\\t1\\t10.52.96.190\\t__anon12\\t34,464\\t\\n4\\t1\\t10.52.96.48\\t__anon12\\t11,359\\t\\n5\\t1\\t10.52.96.51\\t__anon12\\t13,720\\t\\n6\\t1\\t10.52.96.185\\t__anon12\\t6,491\\t\\n7\\t1\\t10.52.96.184\\t__anon12\\t14,803\\t\\n8\\t1\\t0.0.0.0\\t__anon12\\t3,054\\t\\n9\\t1\\t0.0.0.191\\t__anon12\\t11,071\\t\\n10\\t1\\t0.0.0.190\\t__anon12\\t32,284\\t\\n11\\t1\\t0.0.0.48\\t__anon12\\t3,963\\t\\n12\\t1\\t0.0.0.51\\t__anon12\\t16,097\\t\\n13\\t1\\t0.0.0.185\\t__anon12\\t36,045\\t\\n14\\t1\\t0.0.0.184\\t__anon12\\t47,293\\t\\n15\\t1\\t0.0.0.187\\t__anon12\\t179,054\\t\\n16\\t1\\t10.52.96.188\\t__anon12\\t328,874\\t\\n\\nAlthough on VMware HPCC Cluster\\n\\nPart\\nCopy\\nIP\\nCluster\\nSize\\nActual Size\\n1\\t1\\t10.52.96.188\\tthor1\\t8,415\\t\\n2\\t1\\t10.52.96.191\\tthor1\\t8,188\\t\\n3\\t1\\t10.52.96.190\\tthor1\\t8,008\\t\\n4\\t1\\t10.52.96.48\\tthor1\\t8,752\\t\\n5\\t1\\t10.52.96.51\\tthor1\\t8,540\\t\\n6\\t1\\t10.52.96.185\\tthor1\\t9,164\\t\\n7\\t1\\t10.52.96.184\\tthor1\\t9,005\\t\\n8\\t1\\t10.52.96.187\\tthor1\\t9,149\\t\\n9\\t1\\t10.52.96.188\\tthor1\\t9,834\\t\\n10\\t1\\t10.52.96.191\\tthor1\\t9,017\\t\\n11\\t1\\t10.52.96.190\\tthor1\\t8,104\\t\\n12\\t1\\t10.52.96.48\\tthor1\\t8,295\\t\\n13\\t1\\t10.52.96.51\\tthor1\\t7,273\\t\\n14\\t1\\t10.52.96.185\\tthor1\\t9,367\\t\\n15\\t1\\t10.52.96.184\\tthor1\\t8,905\\t\\n16\\t1\\t10.52.96.187\\tthor1\\t9,397\\t\\n\\n\\nAs you can see, on Aws I have ip 0.0.0.0 in file parts instead of having correct ip. I think the daliserver on HPCC AWS can't retrieve all file parts into VMWare's daliserver.\\n\\nDoes foreign option work between aws and vmware ?\\n\\nBest regards,\\n\\nZahir,\", \"post_time\": \"2017-04-12 07:03:58\" },\n\t{ \"post_id\": 18133, \"topic_id\": 4153, \"forum_id\": 8, \"post_subject\": \"Re: Problem with #EXPAND\", \"username\": \"oscar.foley\", \"post_text\": \"Bug raised: https://track.hpccsystems.com/browse/HPCC-17950\\nSomehow screenshot was deleted \\nHere it is again...\\n\\n[attachment=0:3ac9pvdk]Capture.PNG\", \"post_time\": \"2017-07-05 16:46:56\" },\n\t{ \"post_id\": 16443, \"topic_id\": 4153, \"forum_id\": 8, \"post_subject\": \"Re: Problem with #EXPAND\", \"username\": \"rtaylor\", \"post_text\": \"Oscar,\\n\\nI can't test this code since it is written for a legacy system and my training clusters are all Open Source (syntax check fails with this error: "Warning: (7,14): error C2325: WHEN must be used to associate an action with a definition" which is one of the fundamental syntax changes from Legacy systems to Open Source).\\n\\nBut either way, this is the type of issue that should be reported in JIRA so the developers are aware of it.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-04-12 21:44:19\" },\n\t{ \"post_id\": 16373, \"topic_id\": 4153, \"forum_id\": 8, \"post_subject\": \"Problem with #EXPAND\", \"username\": \"oscar.foley\", \"post_text\": \"This simplified code contains a test to check if date is valid or not.\\n- V0.x passing test (just for reference)\\n- V1.x failing test with variable\\n- V2.x failing test with function as parameter\\n- V3.x same as V2.x with function as external definition.\\n\\nEach test has x.1 version that uses #TEXT, x.2 version that uses #EXPAND and x.3 version that uses nothing.\\n\\nThe problem:\\nThe expected behavior of AssertTrue when the assertion fails is outputting an error message that says "Expected: TRUE Actual: FALSE"\\nAll the three versions AssertTrueV1, AssertTrueV2, AssertTrueV3 fail as you can see in the output except in the case I pass a FALSE (First yellow highlight)\\n\\nHere is the code:\\n\\n
\\nIMPORT STD;\\n\\nEXPORT AssertUtils := MODULE\\n\\tEXPORT AssertTrueV1(testName, actual) := FUNCTIONMACRO\\n\\t\\tSTD.System.Log.addWorkunitInformation('TestName=' + testName, 1);\\n\\t\\tSTD.System.Log.addWorkunitInformation('Actual=' + actual, 1);\\n\\t\\tRETURN ASSERT(actual = TRUE, testName + ' Expected: TRUE Actual: ' + #TEXT(actual));\\n\\tENDMACRO;\\n\\tEXPORT AssertTrueV2(testName, actual) := FUNCTIONMACRO\\n\\t\\tSTD.System.Log.addWorkunitInformation('TestName=' + testName, 1);\\n\\t\\tSTD.System.Log.addWorkunitInformation('Actual=' + actual, 1);\\n\\t\\tRETURN ASSERT(actual = TRUE, testName + ' Expected: TRUE Actual: ' + #EXPAND(actual));\\n\\tENDMACRO;\\n\\tEXPORT AssertTrueV3(testName, actual) := FUNCTIONMACRO\\n\\t\\tSTD.System.Log.addWorkunitInformation('TestName=' + testName, 1);\\n\\t\\tSTD.System.Log.addWorkunitInformation('Actual=' + actual, 1);\\n\\t\\tRETURN ASSERT(actual = TRUE, testName + ' Expected: TRUE Actual: ' + actual);\\n\\tENDMACRO;\\nEND;\\n\\t\\nEXPORT IsDate(STRING dateToCheck) := FUNCTION\\n\\t\\tregex:='^(\\\\\\\\d{4})-(\\\\\\\\d{2})-(\\\\\\\\d{2})T(\\\\\\\\d{2}):(\\\\\\\\d{2}):(\\\\\\\\d{2})Z$';\\n\\t\\tRETURN REGEXFIND(regex, dateToCheck); \\nEND;\\n\\nCreatedDateTimeCorrect := '2016-05-12T00:00:00Z';\\nCreatedDateTimeIncorrect := '2016-05-12XXX00:00:00Z';\\n\\n// CASE 0: Valid test\\nAssertUtils.AssertTrueV1('Is CreatedDateTime a valid date V0.1?', IsDate(CreatedDateTimeCorrect));\\nAssertUtils.AssertTrueV2('Is CreatedDateTime a valid date V0.2?', IsDate(CreatedDateTimeCorrect));\\nAssertUtils.AssertTrueV3('Is CreatedDateTime a valid date V0.3?', IsDate(CreatedDateTimeCorrect));\\n\\n// CASE 1: Failing test with variable\\nAssertUtils.AssertTrueV1('Is CreatedDateTime a valid date V1.1?', FALSE);\\nAssertUtils.AssertTrueV2('Is CreatedDateTime a valid date V1.2?', FALSE);\\nAssertUtils.AssertTrueV3('Is CreatedDateTime a valid date V1.3?', FALSE);\\n\\n// CASE 2: Failing test with function\\nAssertUtils.AssertTrueV1('Is CreatedDateTime a valid date V2.1?', IsDate(CreatedDateTimeIncorrect));// THIS should work\\nAssertUtils.AssertTrueV2('Is CreatedDateTime a valid date V2.2?', IsDate(CreatedDateTimeIncorrect));\\nAssertUtils.AssertTrueV3('Is CreatedDateTime a valid date V2.3?', IsDate(CreatedDateTimeIncorrect));\\n\\n// CASE3: Failing test with function in definition\\nIsDateResult:=IsDate(CreatedDateTimeIncorrect);\\nAssertUtils.AssertTrueV1('Is CreatedDateTime a valid date V3.1?', IsDateResult);// THIS should work\\nAssertUtils.AssertTrueV2('Is CreatedDateTime a valid date V3.2?', IsDateResult);\\nAssertUtils.AssertTrueV3('Is CreatedDateTime a valid date V3.3?', IsDateResult);\\n
\\n\\nHere is the output:\\n[attachment=0:3g86uyfd]Capture.PNG\\n\\nThe tests marked as //This Should work... should work and give correct message in case of error?\\n- Why are they failing to expand value?\\n- Is there any workaround?\", \"post_time\": \"2017-04-12 10:52:40\" },\n\t{ \"post_id\": 16931, \"topic_id\": 4211, \"forum_id\": 8, \"post_subject\": \"Re: What is StrType\", \"username\": \"georgeb2d\", \"post_text\": \"Found it.\\nEXPORT StrType := \\n#if (UnicodeCfg.UseUnicode)\\nUNICODE\\n#else\\nSTRING\\n#end\\n;\\n\\nThanks\", \"post_time\": \"2017-05-04 14:21:21\" },\n\t{ \"post_id\": 16921, \"topic_id\": 4211, \"forum_id\": 8, \"post_subject\": \"Re: What is StrType\", \"username\": \"rtaylor\", \"post_text\": \"StrType is just the name of a definition (most likely a data type re-definition). Find that StrType definition and its expression will tell you exactly what it is.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-05-02 20:45:05\" },\n\t{ \"post_id\": 16911, \"topic_id\": 4211, \"forum_id\": 8, \"post_subject\": \"What is StrType\", \"username\": \"georgeb2d\", \"post_text\": \"I am looking at some SALT generated code and came across this in a layout:\\n\\nexport Layout_FieldValueList := RECORD,MAXLENGTH(2048)\\n StrType Val;\\n UNSIGNED4 Cnt := 1;\\nEND;\\n\\nI assuming StrType is the same as STRING. Or is it the same as TYPEOF(field)?\\n\\nI have been unable to find any documentation. Where is the documentation? \\n\\nIt is not highlighted in ECL IDE so is it a keyword? \\n\\nThanks.\", \"post_time\": \"2017-05-02 20:31:54\" },\n\t{ \"post_id\": 17233, \"topic_id\": 4273, \"forum_id\": 8, \"post_subject\": \"Re: Usage of JOIN within a FUNCTION\", \"username\": \"ome\", \"post_text\": \"rtaylor,\\n\\nthank you so much. That works like a charm!\", \"post_time\": \"2017-05-30 16:53:20\" },\n\t{ \"post_id\": 17223, \"topic_id\": 4273, \"forum_id\": 8, \"post_subject\": \"Re: Usage of JOIN within a FUNCTION\", \"username\": \"rtaylor\", \"post_text\": \"ome,\\n\\nChange your GetSim FUNCTION definition to this:\\nGetSim(DATASET(RecWords) ws, UNSIGNED r) := FUNCTION\\n
In your version, the function expected to receive a single record but you are passing an entire dataset, so the data type for that parameter needs to specify a DATASET being passed and its structure.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-05-30 16:40:16\" },\n\t{ \"post_id\": 17193, \"topic_id\": 4273, \"forum_id\": 8, \"post_subject\": \"Usage of JOIN within a FUNCTION\", \"username\": \"ome\", \"post_text\": \"I have a list of document-ids of type UNSIGNED. For each entitiy I want to add my similarity. To calculate the similarity, i wrote a FUNCTION GetSim, which takes all words of my search document and an id of an other document.\\n\\nThese are my RECORD-structures:\\nRecWords := RECORD\\n UNSIGNED word;\\nEND;\\n\\nRecDocWithSim := RECORD\\n UNSIGNED INTEGER documentid;\\n\\tUNSIGNED DECIMAL9_8 sim := 0;\\nEND;
\\n\\nThis is my GetSim FUNCTION:\\nGetSim(RecWords ws, UNSIGNED r) := FUNCTION\\n\\twl :=\\tTABLE(gen.IdxMiniDoc(KEYED(documentid = r)), {word});\\n\\tIntersect := JOIN(ws, wl, LEFT.word = RIGHT.word, FULL OUTER);\\n\\tUnion := JOIN(ws, wl, LEFT.word = RIGHT.word);\\n\\tCntUnion :=\\tCOUNT(Union);\\n\\tCntIntersect := COUNT(Intersect);\\n RETURN CntUnion / CntIntersect;\\nEND;
\\n\\nThis is my TRANSFORM:\\nRecDocWithSim CheckSim(RecDocWithSim l, RecWords WordsOfS, UNSIGNED DECIMAL t) := TRANSFORM\\n\\tSELF.sim := GetSim(WordsOfS, l.documentid); //CntUnion / CntIntersect;\\n\\tSELF := l;\\nEND;
\\n\\nAnd this is my call:\\nResultSet := PROJECT(Candidates, CheckSim(LEFT, WordsOfSearchDocument, 0.7));
\\n\\nTrying to compile that yields the following messages:\\n\\nError: syntax error near "," : expected &&, '+', '&', '.', '(' (15, 22), 3002, \\nError: syntax error near "," : expected &&, '+', '&', '.', '(' (16, 18), 3002,
\\n\\nwhich happens to be the lines \\n\\tIntersect := JOIN(ws, wl, LEFT.word = RIGHT.word, FULL OUTER);\\n\\tUnion := JOIN(ws, wl, LEFT.word = RIGHT.word);
\\n\\nI assume, that's because of the usage of LEFT.word=RIGHT.word and this statements are not compatible within a FUNCTION. Is this true? Am I not able to JOIN datasets within a FUNCTION? \\n\\nAny help is really appreciated and thank you in advance.\", \"post_time\": \"2017-05-29 21:51:37\" },\n\t{ \"post_id\": 17293, \"topic_id\": 4293, \"forum_id\": 8, \"post_subject\": \"Re: Compare the results of two jobs\", \"username\": \"lily\", \"post_text\": \"Cheers! The answer is really helpful! \\nI used the JOIN function you wrote above to compare the results. It works well now.\", \"post_time\": \"2017-06-01 19:49:59\" },\n\t{ \"post_id\": 17283, \"topic_id\": 4293, \"forum_id\": 8, \"post_subject\": \"Re: Compare the results of two jobs\", \"username\": \"rtaylor\", \"post_text\": \"Lily,\\n\\nAssuming both workunits are meant to create exactly the same result file, then as long as you name the files differently you can do a simple file compare, something like this://Datasets:\\nrec := {UNSIGNED1 UID, STRING10 stuff};\\nDS1 := DATASET([{1,'aaa'},{2,'bbb'},{3,'ccc'}],rec); //my testing data\\nDS2 := DATASET([{1,'aaa'},{2,'bbb'},{3,'ccc'}],rec); //exactly the same\\n// DS2 := DATASET([{1,'aaa'},{2,'bbc'},{3,'ccc'}],rec); //1 changed\\n// DS2 := DATASET([{1,'aaa'},{2,'bbb'},{3,'ccc'},{4,'ddd'}],rec); //1 added rec\\n// DS2 := DATASET([{1,'aaa'},{2,'bbc'},{3,'ccc'},{4,'ddd'}],rec); //1 added, 1 changed\\n\\n//your DATASETs will look something like these:\\n// DS1 := DATASET('~test::file1',rec,FLAT); \\n// DS2 := DATASET('~test::file2',rec,FLAT);
Then here's the actual comparison code://Append the datasets, then SORT and DEDUP.\\nAppendRecs := DS1 + DS2;\\nSortRecs := SORT(AppendRecs,WHOLE RECORD);\\nDedupPersons := DEDUP(SortRecs,WHOLE RECORD);\\n\\nNumRecs_DS1 := COUNT(DS1);\\nNumRecs_DS2 := COUNT(DS2);\\nIsSameNumRecs := NumRecs_DS1=NumRecs_DS2;\\nDupCount := MAP(IsSameNumRecs => COUNT(DedupPersons)-NumRecs_DS1,\\n NumRecs_DS1 > NumRecs_DS2 => COUNT(DedupPersons)-NumRecs_DS2,\\n NumRecs_DS2 > NumRecs_DS1 => COUNT(DedupPersons)-NumRecs_DS1,\\n COUNT(DedupPersons));\\nDATASET([{NumRecs_DS1,'NumRecs_DS1'},{NumRecs_DS2,'NumRecs_DS2'},\\n {ABS(NumRecs_DS1-NumRecs_DS2),'DIFFERENT Input Record Counts'},\\n {DupCount,'# non-duplicate records'}],\\n {INTEGER Cnt,STRING txt});\\n\\n//you can look at the non-duplicate recs this way:\\nOUTPUT(JOIN(DS1,DS2,\\n LEFT = RIGHT,FULL ONLY),NAMED('Non_Duped_Records_JOIN'));\\n
\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-05-31 19:32:20\" },\n\t{ \"post_id\": 17273, \"topic_id\": 4293, \"forum_id\": 8, \"post_subject\": \"Compare the results of two jobs\", \"username\": \"lily\", \"post_text\": \"Hi everyone,\\n\\nDoes anybody know an easier way to compare the results of two work units?\\nI made a few improvement to my precious code, let's call this version 2 and the old version is version 1. \\n\\nI submitted version 1 and version2 and want to compare the results which are two tables. Currently I manually download the CSV file of the results to compare them. \\n\\nI am wondering is there an easier way to do this?\\n\\nThank you!\", \"post_time\": \"2017-05-31 18:27:10\" },\n\t{ \"post_id\": 17443, \"topic_id\": 4303, \"forum_id\": 8, \"post_subject\": \"Re: system error: 3000\", \"username\": \"lily\", \"post_text\": \"Hi Jim,\\n\\nYes, it's necessary to upgrade my current system to the newest version to very that. \\nI will verify that after the upgrade.\\n\\nCheers!\\n\\nLily\\n\\n[quote="JimD":e1zirdoj]Lily,\\n\\nSince this issue was marked as resolved in 6.4.0, you can verify that the fix solves your particular issue by trying the release candidate for 6.4.0 which is now available on the portal.\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2017-06-16 18:43:17\" },\n\t{ \"post_id\": 17433, \"topic_id\": 4303, \"forum_id\": 8, \"post_subject\": \"Re: system error: 3000\", \"username\": \"lily\", \"post_text\": \"Yea I noticed that. Cheers!\\n\\n\\n[quote="bforeman":3u04cyiv]Cool, you also must have noticed but did not mention that it is marked fixed (resolved) in version 6.4\\n\\nBob\", \"post_time\": \"2017-06-16 18:40:45\" },\n\t{ \"post_id\": 17423, \"topic_id\": 4303, \"forum_id\": 8, \"post_subject\": \"Re: system error: 3000\", \"username\": \"JimD\", \"post_text\": \"Lily,\\n\\nSince this issue was marked as resolved in 6.4.0, you can verify that the fix solves your particular issue by trying the release candidate for 6.4.0 which is now available on the portal.\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2017-06-16 13:54:15\" },\n\t{ \"post_id\": 17413, \"topic_id\": 4303, \"forum_id\": 8, \"post_subject\": \"Re: system error: 3000\", \"username\": \"bforeman\", \"post_text\": \"Cool, you also must have noticed but did not mention that it is marked fixed (resolved) in version 6.4\\n\\nBob\", \"post_time\": \"2017-06-15 21:28:07\" },\n\t{ \"post_id\": 17403, \"topic_id\": 4303, \"forum_id\": 8, \"post_subject\": \"Re: system error: 3000\", \"username\": \"lily\", \"post_text\": \"Hi Bob,\\n\\nThere is an issue ticket that reflects the exact same issue as I described above.\\nThe link of the issue ticket is as blow:\\n\\nhttps://track.hpccsystems.com/browse/HP ... 20~%202999\\n\\n\\nCheers,\\nLily\\n\\n[quote="bforeman":1ynklo1r]Hi Lily,\\n\\nIf it is consistent and reproducible it may be necessary to log it into the Issue Tracker system.\\n\\nhttps://track.hpccsystems.com/secure/Dashboard.jspa\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2017-06-15 20:29:40\" },\n\t{ \"post_id\": 17393, \"topic_id\": 4303, \"forum_id\": 8, \"post_subject\": \"Re: system error: 3000\", \"username\": \"bforeman\", \"post_text\": \"Hi Lily,\\n\\nIf it is consistent and reproducible it may be necessary to log it into the Issue Tracker system.\\n\\nhttps://track.hpccsystems.com/secure/Dashboard.jspa\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2017-06-14 15:11:33\" },\n\t{ \"post_id\": 17303, \"topic_id\": 4303, \"forum_id\": 8, \"post_subject\": \"system error: 3000\", \"username\": \"lily\", \"post_text\": \"Hi all,\\n\\nHave anyone met the system error 3000 before?\\n\\nThe detail of the error is as blow:\\n\\n\\nError: System error: 3000: assert(owner == subgraph->owner) failed - file: eclgraph.cpp, line 533 (0, 0)\\n\\n\\nThank you,\\nLily\", \"post_time\": \"2017-06-01 19:53:20\" },\n\t{ \"post_id\": 17713, \"topic_id\": 4383, \"forum_id\": 8, \"post_subject\": \"Re: Covert decimal to binary\", \"username\": \"Gopala Rudraraju\", \"post_text\": \"I ended up creating something similar decimal2base instead of binary for future uses.\\nThanks Bob for the response.\", \"post_time\": \"2017-06-29 18:02:08\" },\n\t{ \"post_id\": 17683, \"topic_id\": 4383, \"forum_id\": 8, \"post_subject\": \"Re: Covert decimal to binary\", \"username\": \"bforeman\", \"post_text\": \"Hi Gopala,\\n\\nThe simple answer to your question is "No", or at least I could not find any standard ECL function, but you can write your own conversion using something like this:\\n\\nDec2Bin(INTEGER1 Number) := FUNCTION\\nNum1 := Number DIV 2; //12\\nBitNum := Number % 2; //0\\nNum2 := Num1 DIV 2; //6\\nBitNum2 := Num1 % 2; //0\\nNum3 := Num2 DIV 2; //3\\nBitNum3 := Num2 % 2; //1\\nNum4 := Num3 DIV 2; //2\\nBitNum4 := Num3 % 2; //0\\nNum5 := Num4 DIV 2; //1\\nBitNum5 := Num4 % 2; //1\\nNum6 := Num5 DIV 2; //6\\nBitNum6 := Num5 % 2; //0\\nNum7 := Num6 DIV 2; //6\\nBitNum7 := Num6 % 2; //0\\nNum8 := Num7 DIV 2; //6\\nBitNum8 := Num7 % 2; //0\\n\\nSTRING Result := (STRING)BitNum8 + (STRING)BitNum7 + (STRING)BitNum6 + \\n (STRING)BitNum5 + (STRING)BitNum4 + (STRING)BitNum3 + \\n\\t\\t\\t\\t\\t\\t\\t\\t (STRING)BitNum2 + (STRING)BitNum;\\n\\nRETURN Result;\\n\\nEND;
\\n\\nI hesitated to post this, because there are probably 2 better ways to write this.\\n\\nThe first would be to use one of the many C++ examples that exist and drop it into your ECL using the BEGINC++ structure. The second would be to explore one of our recursive functions like LOOP or GRAPH to implement a more elegant solution. \\n\\nI will repost one or both of these when I can get them written. \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2017-06-29 16:33:24\" },\n\t{ \"post_id\": 17553, \"topic_id\": 4383, \"forum_id\": 8, \"post_subject\": \"Covert decimal to binary\", \"username\": \"Gopala Rudraraju\", \"post_text\": \"I know we can convert B2D\\nbin_val := 010b;\\ndec_val := (integer)bin_val;\\ndec_val;\\n// output 2\\n\\n\\nDo we have a ECL fn that converts Decimal to Binary?\\n\\n0 -> 0\\t\\n1 -> 1\\t\\n2 -> 10\\t\\n3 -> 11\\t\\n4 -> 100\\t\\n5 -> 101\", \"post_time\": \"2017-06-20 15:34:13\" },\n\t{ \"post_id\": 17643, \"topic_id\": 4413, \"forum_id\": 8, \"post_subject\": \"Re: Question about Denormalze\", \"username\": \"David Dasher\", \"post_text\": \"Never mind, I found the answer\\n
output(globex.NextGenUserConfig.includerecs(includetype='Positions'));
\\n\\nThanks\\n\\nDavid\", \"post_time\": \"2017-06-27 15:36:24\" },\n\t{ \"post_id\": 17633, \"topic_id\": 4413, \"forum_id\": 8, \"post_subject\": \"Question about Denormalze\", \"username\": \"David Dasher\", \"post_text\": \"Hi\\n\\nThis is probably a really silly question and will confirm my breakdown \\n\\nI have a Denormalized dataset holding configuration rows for a reporting engine. I want to now read those rows in the child dataset and join them to another dataset based on a certain criteria.\\n\\nHow do I limit the rows in the child dataset or join the child dataset to my other data?\\n\\nAs an example I've tried the below which compiles \\n
\\noutput(globex.NextGenUserConfig(includerecs.includetype = 'Multiid')); \\n
\\n\\nhowever, I get this error\\nWarning: (0,0): error C2131: DENORMALIZE(excluderuleparentonly, SORT(dsjoinbaseexclude, gcid), LEFT.gcid = RIGHT.gcid, excluderulechildmove(LEFT, RIGHT, COUNTER), left outer) : PERSIST('~globex::special::NextGenUserConfig').includerecs.includetype - Table ... : PERSIST('~globex::special::NextGenUserConfig').includerecs is not related to denormexcluderules\\n
\\n\\n[attachment=0:16rh7btp]Screen Shot 2017-06-27 at 14.54.07.png\", \"post_time\": \"2017-06-27 14:03:27\" },\n\t{ \"post_id\": 18063, \"topic_id\": 4423, \"forum_id\": 8, \"post_subject\": \"Re: Set Questions\", \"username\": \"rtaylor\", \"post_text\": \"Patrick,\\n\\nHow about something like this:TypeCombos(SET OF STRING typeset,SET OF STRING subtypeset) := FUNCTION\\n typeDS := DATASET(typeset,{STRING types});\\n subDS := DATASET(subtypeset,{STRING subtypes});\\n retDS := JOIN(typeDS,subDS,TRUE,ALL);\\n RETURN retDS;\\nEND;\\ntypes1 := ['A','D','E'];\\nsubtypes1 := ['Z','Y']; \\ntypes2 := ['A','B','F'];\\nsubtypes2 := ['Z','X','W'];\\n\\nds1 := TypeCombos(types1,subtypes1);\\nds2 := TypeCombos(types2,subtypes2);\\nds1;ds2;\\nSEQUENTIAL(\\nAPPLY(ds1,\\n OUTPUT(DATASET([{'~thor::pml::results::'+types+'::'+subtypes}],\\n {STRING name}),\\n EXTEND,NAMED('fred'))),\\nAPPLY(ds2,\\n OUTPUT(DATASET([{'~thor::pml::results::'+types+'::'+subtypes}],\\n {STRING name}),\\n EXTEND,NAMED('george'))));\\n
\\nSince ECL does not have the kind of set operations you'd like to use, you simply need to convert those sets into datasets that can be processed. That's what the TypeCombos FUNCTION does. The APPLY just demonstrates how you might use that dataset.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-06-30 19:32:02\" },\n\t{ \"post_id\": 17723, \"topic_id\": 4423, \"forum_id\": 8, \"post_subject\": \"Re: Set Questions\", \"username\": \"bforeman\", \"post_text\": \"The only way I know how to do it is to have an intermediate process that converts each set element into its own "record" in an inline dataset and then you can test and extract what you want using filtering. \\n\\nBut the question is out here on the forum and perhaps someone else may have a different perspective.\\n\\n...and your colleagues are correct in that what you want to do is contained in the ECL template language (there is a #FOR construct for example), but that was originally designed as an ECL code generator and do you really need that level of complexity to name files on the cluster? I would use the set to dataset approach and then any value or set of values from the dataset could be extracted using filtering. You might also look at the new DICTIONARY statement that was designed for inline lookup tables similar to what you have in mind.\\n\\nRegards,\\nBob\", \"post_time\": \"2017-06-29 18:26:37\" },\n\t{ \"post_id\": 17703, \"topic_id\": 4423, \"forum_id\": 8, \"post_subject\": \"Re: Set Questions\", \"username\": \"pml\", \"post_text\": \"Thanks for the reply, Bob! I may be misunderstanding, but it seems like the example you provided is more about applying actions to a set rather than taking actions based on set elements. I think I'm trying to find something similar to a foreach loop, so that I can parameterize a list of possibly-dynamic values to process instead of passing them in individually. Here's an example:\\n\\nLet's say I need to perform processing on specified types and subtypes of data, and output the results. The types and subtypes that are in scope could change between different executions of the job in question, and passing them in as single-value parameters could require ever-increasing manual intervention as the related production environment evolves.\\n\\nCurrent:\\ntype := 'A'; // Also need to process D & E this time, next time could be A,B,F\\nsubtype := 'Z'; // Also need Y this time, next time could be Z, X, W\\n\\nresults := doSomething(type, subtype);\\noutput(results,,'~thor::pml::results::'+type+'::'+subtype);
\\nI have to change the type and subtype manually after each execution to hold the next pair of values (A&Y, D&Z, D&Y, etc.) until all combinations are exhausted, and since the number of types and subtypes of interest could vary, I can't just use a fixed-size set and call each member by static index.\\n\\nWhat I'd like to do is:\\n\\n\\nTheoretical:\\ntypes := ['A','D','E']; // Next time A, B, F\\nsubtypes := ['Z','Y']; // Next time Z, X, W\\n\\nforeach type in types{\\n foreach subtype in subtypes{\\n output(doSomething(type, subtype),,'~thor::pml::results::'+type+'::'+subtype);\\n }\\n}
\\nThis way I could just update the lists and execute the code once, and it processes all combinations instead of having to be manually updated for each combination.\\n\\nSyntax aside, is something similar to that possible in ECL? Can you act on a set to perform processing based on each member in that set? If I store the sets as datasets instead can I use loop or process to accomplish similar functionality? Is there some other approach that would allow me to use a dynamic array of values to direct the processing that needs to be done?\\n\\nThanks,\\n\\n - Patrick\", \"post_time\": \"2017-06-29 17:31:04\" },\n\t{ \"post_id\": 17693, \"topic_id\": 4423, \"forum_id\": 8, \"post_subject\": \"Re: Set Questions\", \"username\": \"bforeman\", \"post_text\": \"Hi Patrick,\\n\\nWe have indexing support into SETs as you already know:\\n\\nMySet := [1,2,3,4,5];\\nThirdElement := MySet[3];
\\n\\n...but what you might not know is that you can process and extract set elements into an inline dataset and then sort, filter, or transform those elements just like any other dataset, and then use the SET function to write those dataset entries back into a SET.\\n\\nHere is a simple example:\\n\\n\\nSetAllStates := ['AL','FL','GA','NY','CA','FL','AR','MS'];\\nSetDS := DATASET(SetAllStates,{STRING2 State});\\nSortedSet := SORT(SetDS,State);\\nDedupedSet := DEDUP(SortedSet,State);\\nCountUniqueElements := COUNT(DedupedSet);\\nNewSet := SET(Dedupedset,State);
\\n\\nHopefully this will give you some ideas \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2017-06-29 16:45:34\" },\n\t{ \"post_id\": 17653, \"topic_id\": 4423, \"forum_id\": 8, \"post_subject\": \"Set Questions\", \"username\": \"pml\", \"post_text\": \"Greetings,\\n\\nApologies if these are simple questions for the experts but I've asked 4 people on my team and no one yet has been sure of a good way to do these:\\n\\n\\n1. Access specific members within a set\\n\\nGiven setA := [2,4,6,8,10], how could I access the first, third and fifth members, like setA[1,3,5]?\\n\\nThe best answer received up to this point was to create an additional set like setB := [setA[1],setA[3],setA[5]], but it seems like there should be something cleaner.\\n\\n\\n2. Act on each member in a set\\n\\nGiven setSuffixes := ['afile','bfile','cfile'], how can I process the set to create output files using each suffix, like thor::pml::afile, thor::pml::bfile and thor::pml::cfile?\\n\\nI also tried converting the set to a recordset to try things like loop, iterate, process and apply (is it true that only works on hthor?), but since the goal is to create an output file with the results of the suffix-specific processing, those options haven't yet worked for me since I don't think you can nest an output statement within one of those.\\n\\nThe best answer received up to this point was to create a separate macro to generate the code needed to act on each suffix individually, which still isn't really processing the set as a set and also involves creating that extra macro code.\\n\\n\\nIt seems like these two tasks should be fairly straightforward (and are in many languages), but I can't seem to find how ECL best facilitates them.\\n\\nThanks!\\n\\n - Patrick\", \"post_time\": \"2017-06-28 17:00:54\" },\n\t{ \"post_id\": 18123, \"topic_id\": 4523, \"forum_id\": 8, \"post_subject\": \"Re: How Soapcall passes data into Roxie service?\", \"username\": \"rtaylor\", \"post_text\": \"yunchen,\\n\\nThe set of articles in the Working With Roxie section of the Programmer's Guide should answer these questions. \\n\\nYou can access the Programmer's Guide by pressing F1 in the ECL IDE -- the IDE's help file contains three complete books: the ECL Language Reference, the Standard Library Reference, and the Programmer's Guide.\\n\\nPlease come back with any additional questions you may still have if you don't find all the answers there.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-07-05 13:23:46\" },\n\t{ \"post_id\": 18083, \"topic_id\": 4523, \"forum_id\": 8, \"post_subject\": \"How Soapcall passes data into Roxie service?\", \"username\": \"yunchen\", \"post_text\": \"In a typical SOAPCALL below:\\n\\nresult := SOAPCALL(ds, url, svc, inRecord, t(LEFT),DATASET(outRecord));\\n\\n'svc' is the Roxie service that uses "STORED" to retrieve 'inRecord' passed by SOAPCALL. Here are my questions that I can't find a clean answer from manuals:\\n\\n1. How SOAPCALL passes inRecord to "STORED"?\\nI assume after WsECL receives the soap call, then WsECL stores 'inRecord' somewhere so that Roxie can retrieve? If so, where is the data stored? On disk where Roxie workunit runs? Does WsECL write to the disk directly or go through another service? \\n\\n2. How does Roxie retrieve "STORED" data?\\nIs there a registry managing "STORED" variables for Roxie to look up? Does Roxie read from disk directly? \\n\\n3. How does Roxie service execute?\\nDoes Roxie service listen to a port in order to receive a call from WsECL or go through another kind of communication? When result is returned from SOAPCALL, is the result passed back by using the same "STORED" mechanism or something different?\", \"post_time\": \"2017-06-30 21:27:33\" },\n\t{ \"post_id\": 18173, \"topic_id\": 4533, \"forum_id\": 8, \"post_subject\": \"Re: Regarding viewing superfiles in published ROXIE query.\", \"username\": \"akhileshbadhri\", \"post_text\": \"Hello Anthony,\\n\\nYes, my application on ROXIE shows correct behaviour with respect to the data returned. I just wanted to verify the superfile hierarchy is correct.\\n\\nThanks and regards,\\nAkhilesh Badhri.\", \"post_time\": \"2017-07-07 06:39:19\" },\n\t{ \"post_id\": 18163, \"topic_id\": 4533, \"forum_id\": 8, \"post_subject\": \"Re: Regarding viewing superfiles in published ROXIE query.\", \"username\": \"anthony.fishbeck\", \"post_text\": \"Hi Akhilesh,\\n\\nThe problem is purely in the user interface and doesn't affect the behaviour of the actual data or query.\\n\\nThe logical files tab should still be accurate. Are you just trying to verify that the superfile hierarchy is what you expect it to be?\\n\\nRegards,\\nAnthony\", \"post_time\": \"2017-07-06 14:02:02\" },\n\t{ \"post_id\": 18153, \"topic_id\": 4533, \"forum_id\": 8, \"post_subject\": \"Re: Regarding viewing superfiles in published ROXIE query.\", \"username\": \"akhileshbadhri\", \"post_text\": \"Hello Anthony,\\n\\nThanks for your help.\\n\\nYes your assumptions are correct. The environment where I am facing this problem uses packagemaps and the other environment where I do not face the problem does not use packagemaps. I will update the JIRA with some more observations.\\n\\nSo for time being, is there a way to correct this situation ?\\nLike adding subfiles manually to the superfiles can this be a solution ? And can I do this on ROXIE ?\\n\\nThanks and regards,\\nAkhilesh Badhri.\", \"post_time\": \"2017-07-06 07:14:42\" },\n\t{ \"post_id\": 18143, \"topic_id\": 4533, \"forum_id\": 8, \"post_subject\": \"Re: Regarding viewing superfiles in published ROXIE query.\", \"username\": \"anthony.fishbeck\", \"post_text\": \"Hi Akhilesh,\\n\\nWhen you say "I update the subfiles everyday for the published ROXIE query", is it correct to assume you are doing this using packagemaps?\\n\\nAnd when you say "Also I have one more environment of the same HPCC ROXIE version. The same query when published on this environment shows correct subfiles under superfiles in the "Superfiles Tab".\\n\\nIs that environment perhaps not using packagemaps, at least in the case where this works?\\n\\nIt does seem like the "Queries > Query > superfiles" tab does not work completely with packagemaps. When you try to expand the entries it doesn't show the contents, and when you open the entry it goes to the DFS superfile entry, instead of the packagemap entry.\\n\\nI have opened a JIRA ticket about the behavior I have seen: https://track.hpccsystems.com/browse/HPCC-17955\\n\\nFeel free to comment on that JIRA, but also please let me know if my assumptions were correct.\\n\\nRegards,\\nAnthony\", \"post_time\": \"2017-07-06 00:07:45\" },\n\t{ \"post_id\": 18093, \"topic_id\": 4533, \"forum_id\": 8, \"post_subject\": \"Regarding viewing superfiles in published ROXIE query.\", \"username\": \"akhileshbadhri\", \"post_text\": \"Hello,\\n\\nThe HPCC ROXIE version I am using is 6.2.16-1. \\n\\n1. I have published an ECL query on ROXIE. When I see this query in the ECL WATCH, the "superfiles tab" for this query shows the superfile's name but I cannot see the subfiles's name when I click on the drop down button for a specific superfile. But on selecting any given superfile name and then clicking open button shows me the subfile. \\n\\n2. Also the subfile found after opening the superfile, is old. I update the subfiles everyday for the published ROXIE query. So new subfiles are generated which resides in the existing superfiles. Although the "logical files tab" for the published ROXIE query shows the latest subfiles, but the subfiles seen under the "superfiles tab" are old. \\n\\nMy query is, why do I see incorrect subfile under a superfile in the "Superfiles tab" of the published ROXIE query?\\nAlso I have one more environment of the same HPCC ROXIE version. The same query when published on this environment shows correct subfiles under superfiles in the "Superfiles Tab".\\n\\nRequest you assistance in understanding this discrepancy. \\n\\nThanks and regards,\\nAkhilesh Badhri.\", \"post_time\": \"2017-07-04 08:23:56\" },\n\t{ \"post_id\": 18263, \"topic_id\": 4553, \"forum_id\": 8, \"post_subject\": \"Re: Reading files on THOR using foreign keyword and wildcard\", \"username\": \"bforeman\", \"post_text\": \"https://track.hpccsystems.com/secure/Dashboard.jspa\\n\\nPlease open an issue in our Issue Tracking System, and a member of our HPCC team will review this.\\n\\nThank You,\\n\\nBob\", \"post_time\": \"2017-07-18 19:32:59\" },\n\t{ \"post_id\": 18253, \"topic_id\": 4553, \"forum_id\": 8, \"post_subject\": \"Reading files on THOR using foreign keyword and wildcards.\", \"username\": \"akhileshbadhri\", \"post_text\": \"Hello Team,\\n\\nThe HPCC THOR version I am using is 6.2.16-1.\\n\\nI am trying to accessing files from a different HPCC machine using the foreign keyword. I am using the following syntax to read those files. \\n\\nEXPORT ExpDS := DATASET('~foreign::xx.xxx.xx.x::sb::li::r*::c9_*_b*_li_*.txt_w201611??-??????_bd', Dataset_Layout,THOR);\\n\\nI am able to use the wildcard characters when I accessing the files from the HOST (HPCC server having these files). But I get the following error \\n"\\nSystem error: 0: Read: Logical file name 'foreign::xx.xxx.xx.x::sb::li::r*::c9_*_b*_li_*.txt_w201611??-??????_bd' could not be resolved\\n"\\nwhen trying to access the files using the foreign keyword and wildcards. Also I am able to access a single filename from a different HPCC server using the foreign keyword but the error raises when using both foreign keyword and wildcards.\\n\\nPlease help me in reading files with wildcards from a different HPCC server.\\n\\nThanks and regards,\\nAkhilesh Badhri.\", \"post_time\": \"2017-07-14 08:35:58\" },\n\t{ \"post_id\": 18303, \"topic_id\": 4563, \"forum_id\": 8, \"post_subject\": \"Re: Superfile and logical files\", \"username\": \"andres5\", \"post_text\": \"Hi Richard, I implemented another solution. As always thanks for your help!\", \"post_time\": \"2017-07-24 12:57:33\" },\n\t{ \"post_id\": 18283, \"topic_id\": 4563, \"forum_id\": 8, \"post_subject\": \"Re: Superfile and logical files\", \"username\": \"rtaylor\", \"post_text\": \"Andres,
Is there any method that can tell me the logical file of a row within the superfile?
Not that I am aware of. If you need that, then you'll need to build it into all the sub-files. Why does it matter (IOW, what problem are you trying to solve)?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-07-20 19:00:30\" },\n\t{ \"post_id\": 18273, \"topic_id\": 4563, \"forum_id\": 8, \"post_subject\": \"Superfile and logical files\", \"username\": \"andres5\", \"post_text\": \"Hello All, I have a superfile that has multiple logical files. These files do not have any field that can tell me what is the source of the row (origin logical file). Is there any method that can tell me the logical file of a row within the superfile?\\n\\nThanks,\\n\\nAndres\", \"post_time\": \"2017-07-20 13:28:51\" },\n\t{ \"post_id\": 18453, \"topic_id\": 4573, \"forum_id\": 8, \"post_subject\": \"Re: Stand alone eclcc\", \"username\": \"vin\", \"post_text\": \"Never mind the last post.\\n\\nI registered and submitted a bug to HPCC project\", \"post_time\": \"2017-07-31 22:00:23\" },\n\t{ \"post_id\": 18443, \"topic_id\": 4573, \"forum_id\": 8, \"post_subject\": \"Re: Stand alone eclcc\", \"username\": \"vin\", \"post_text\": \"Bob,\\n\\nI'd be happy to. However, I don't have an account on this JIRA and I could not find a link for unregistered users.\\n\\nAlso, I do not see a project for eclcc. Should I use the HPCCHPCC project?\\n\\nThanks,\\n+vince\", \"post_time\": \"2017-07-31 21:30:30\" },\n\t{ \"post_id\": 18383, \"topic_id\": 4573, \"forum_id\": 8, \"post_subject\": \"Re: Stand alone eclcc\", \"username\": \"bforeman\", \"post_text\": \"Vin,\\n\\nWould you mind please moving this information into our issue tracking system and open up a new report? Our development team would like to review this further.\\n\\nhttps://track.hpccsystems.com/secure/Dashboard.jspa\\n\\nThank you!\\n\\nBob\", \"post_time\": \"2017-07-25 13:26:08\" },\n\t{ \"post_id\": 18373, \"topic_id\": 4573, \"forum_id\": 8, \"post_subject\": \"Re: Stand alone eclcc\", \"username\": \"vin\", \"post_text\": \"Thanks for the response. It inspired me to look more deeply. Two notes. (1) I'm using a Mac. (2) It doesn't hang permanently. \\n\\nI'm attaching a transcript that I will now describe. I have a complicate networking stack. Present all interfaces, then use grep to show the active external. Compile takes 1.01s and executable finishes in 50ms. I show the eclcc.log for reference.\\n\\nNext, turn off network (in GUI). Compile takes >2 mins and a.out >1 min. Looking at the eclcc.log from this compile we see that gethostbyname fails. The first two lines in the log are out of order (might be error is written to stderr and others to stdout). Best I can tell by looking at the log is it takes a minute for gethostbyname to fail. Then another minute to get back to work.\\n\\nHere is the full transcript. Command prompt is "sh-3.2$"\\n\\nsh-3.2$ ifconfig\\nlo0: flags=8049<UP,LOOPBACK,RUNNING,MULTICAST> mtu 16384\\n options=1203<RXCSUM,TXCSUM,TXSTATUS,SW_TIMESTAMP>\\n inet 127.0.0.1 netmask 0xff000000\\n inet6 ::1 prefixlen 128\\n inet6 fe80::1%lo0 prefixlen 64 scopeid 0x1\\n nd6 options=201<PERFORMNUD,DAD>\\ngif0: flags=8010<POINTOPOINT,MULTICAST> mtu 1280\\nstf0: flags=0<> mtu 1280\\nen0: flags=8823<UP,BROADCAST,SMART,SIMPLEX,MULTICAST> mtu 1500\\n ether ac:bc:32:cf:7e:e7\\n nd6 options=201<PERFORMNUD,DAD>\\n media: autoselect (<unknown type>)\\n status: inactive\\nen1: flags=963<UP,BROADCAST,SMART,RUNNING,PROMISC,SIMPLEX> mtu 1500\\n options=60<TSO4,TSO6>\\n ether 6a:00:01:6d:2d:b0\\n media: autoselect <full-duplex>\\n status: inactive\\nen2: flags=963<UP,BROADCAST,SMART,RUNNING,PROMISC,SIMPLEX> mtu 1500\\n options=60<TSO4,TSO6>\\n ether 6a:00:01:6d:2d:b1\\n media: autoselect <full-duplex>\\n status: inactive\\nbridge0: flags=8863<UP,BROADCAST,SMART,RUNNING,SIMPLEX,MULTICAST> mtu 1500\\n options=63<RXCSUM,TXCSUM,TSO4,TSO6>\\n ether 6a:00:01:6d:2d:b0\\n Configuration:\\n id 0:0:0:0:0:0 priority 0 hellotime 0 fwddelay 0\\n maxage 0 holdcnt 0 proto stp maxaddr 100 timeout 1200\\n root id 0:0:0:0:0:0 priority 0 ifcost 0 port 0\\n ipfilter disabled flags 0x2\\n member: en1 flags=3<LEARNING,DISCOVER>\\n ifmaxaddr 0 port 6 priority 0 path cost 0\\n member: en2 flags=3<LEARNING,DISCOVER>\\n ifmaxaddr 0 port 7 priority 0 path cost 0\\n nd6 options=201<PERFORMNUD,DAD>\\n media: <unknown type>\\n status: inactive\\np2p0: flags=8802<BROADCAST,SIMPLEX,MULTICAST> mtu 2304\\n ether 0e:bc:32:cf:7e:e7\\n media: autoselect\\n status: inactive\\nawdl0: flags=8902<BROADCAST,PROMISC,SIMPLEX,MULTICAST> mtu 1484\\n ether ce:c5:f4:e7:54:d2\\n nd6 options=201<PERFORMNUD,DAD>\\n media: autoselect\\n status: inactive\\nutun0: flags=8051<UP,POINTOPOINT,RUNNING,MULTICAST> mtu 2000\\n inet6 fe80::70a7:71bd:32d4:787%utun0 prefixlen 64 scopeid 0xc\\n nd6 options=201<PERFORMNUD,DAD>\\nfw0: flags=8863<UP,BROADCAST,SMART,RUNNING,SIMPLEX,MULTICAST> mtu 4078\\n lladdr 00:0a:27:02:00:53:1f:01\\n nd6 options=201<PERFORMNUD,DAD>\\n media: autoselect <full-duplex>\\n status: inactive\\nen5: flags=8863<UP,BROADCAST,SMART,RUNNING,SIMPLEX,MULTICAST> mtu 1500\\n options=10b<RXCSUM,TXCSUM,VLAN_HWTAGGING,AV>\\n ether 10:dd:b1:d8:91:66\\n inet6 fe80::1076:825c:b271:157e%en5 prefixlen 64 secured scopeid 0x4\\n inet 152.14.89.252 netmask 0xfffffe00 broadcast 152.14.89.255\\n nd6 options=201<PERFORMNUD,DAD>\\n media: autoselect (1000baseT <full-duplex>)\\n status: active\\nsh-3.2$ ifconfig | grep -w inet\\n inet 127.0.0.1 netmask 0xff000000\\n inet 152.14.89.252 netmask 0xfffffe00 broadcast 152.14.89.255\\nsh-3.2$ time eclcc hello.ecl\\n\\nreal 0m1.010s\\nuser 0m0.369s\\nsys 0m0.498s\\nsh-3.2$ time ./a.out\\nHello and Welcome!\\n\\nreal 0m0.050s\\nuser 0m0.023s\\nsys 0m0.016s\\nsh-3.2$ cat eclcc.log\\n00000000 2017-07-24 14:45:49 13030 -1 Warning: Could not load /opt/HPCCSystems/6.0.0/clienttools/filehooks/libarchivefile.dylib: dlopen(/opt/HPCCSystems/6.0.0/clienttools/filehooks/libarchivefile.dylib, 2): Library not loaded: /opt/local/lib/libarchive.13.dylib\\n Referenced from: /opt/HPCCSystems/6.0.0/clienttools/filehooks/libarchivefile.dylib\\n Reason: image not found\\n00000001 2017-07-24 14:45:49 13030 -1 File hook library /opt/HPCCSystems/6.0.0/clienttools/filehooks/libarchivefile.dylib could not be loaded\\n00000002 2017-07-24 14:45:49 13030 -1 Loading plugin /opt/HPCCSystems/6.0.0/clienttools/plugins/libauditlib.dylib[lib_auditlib] version = AUDITLIB 1.0.1\\n00000003 2017-07-24 14:45:49 13030 -1 Loading plugin /opt/HPCCSystems/6.0.0/clienttools/plugins/libdebugservices.dylib[lib_debugservices] version = DEBUGSERVICES 1.0.1\\n00000004 2017-07-24 14:45:49 13030 -1 Plugin /opt/HPCCSystems/6.0.0/clienttools/plugins/libdmetaphone.dylib exports getECLPluginDefinition but does not export ECL - not loading\\n00000005 2017-07-24 14:45:49 13030 -1 Loading plugin /opt/HPCCSystems/6.0.0/clienttools/plugins/libfileservices.dylib[lib_fileservices] version = FILESERVICES 2.1.3\\n00000006 2017-07-24 14:45:49 13030 -1 Loading plugin /opt/HPCCSystems/6.0.0/clienttools/plugins/liblogging.dylib[lib_logging] version = LOGGING 1.0.1\\n00000007 2017-07-24 14:45:49 13030 -1 Loading plugin /opt/HPCCSystems/6.0.0/clienttools/plugins/libparselib.dylib[lib_parselib] version = PARSELIB 1.0.1\\n00000008 2017-07-24 14:45:49 13030 -1 Loading plugin /opt/HPCCSystems/6.0.0/clienttools/plugins/libstringlib.dylib[lib_stringlib] version = STRINGLIB 1.1.14\\n00000009 2017-07-24 14:45:49 13030 -1 Loading plugin /opt/HPCCSystems/6.0.0/clienttools/plugins/libtimelib.dylib[lib_timelib] version = TIMELIB 1.0.0\\n0000000A 2017-07-24 14:45:49 13030 -1 Loading plugin /opt/HPCCSystems/6.0.0/clienttools/plugins/libunicodelib.dylib[lib_unicodelib] version = UNICODELIB 1.1.06\\n0000000B 2017-07-24 14:45:49 13030 -1 Loading plugin /opt/HPCCSystems/6.0.0/clienttools/plugins/libworkunitservices.dylib[lib_WORKUNITSERVICES] version = WORKUNITSERVICES 1.0.2\\n0000000C 2017-07-24 14:45:49 13030 -1 Adding library: eclrtl\\n0000000D 2017-07-24 14:45:49 13030 -1 Adding source file: a.out.res.s\\n0000000E 2017-07-24 14:45:49 13030 -1 addSourceFile a.out.cpp\\n0000000F 2017-07-24 14:45:49 13030 -1 addSourceFile a.out.res.s\\n00000010 2017-07-24 14:45:49 13030 -1 Compiling a.out\\n00000011 2017-07-24 14:45:50 13030 -1 Remove temporaries\\n00000012 2017-07-24 14:45:50 13030 -1 Remove a.out.res.s*\\n00000013 2017-07-24 14:45:50 13030 -1 Compiled a.out\\nsh-3.2$ echo turned off network\\nturned off network\\nsh-3.2$ ifconfig | grep -w inet\\n inet 127.0.0.1 netmask 0xff000000\\nsh-3.2$ time eclcc hello.ecl\\n\\nreal 2m2.246s\\nuser 0m0.387s\\nsys 0m0.516s\\nsh-3.2$ cat eclcc.log\\n00000001 2017-07-24 14:38:09 12970 -1 jsocket(1,2890) gethostbyname failed err = 1 : tardis-3.local\\n00000000 2017-07-24 14:37:08 12970 -1 Warning: Could not load /opt/HPCCSystems/6.0.0/clienttools/filehooks/libarchivefile.dylib: dlopen(/opt/HPCCSystems/6.0.0/clienttools/filehooks/libarchivefile.dylib, 2): Library not loaded: /opt/local/lib/libarchive.13.dylib\\n Referenced from: /opt/HPCCSystems/6.0.0/clienttools/filehooks/libarchivefile.dylib\\n Reason: image not found\\n00000002 2017-07-24 14:39:09 12970 -1 File hook library /opt/HPCCSystems/6.0.0/clienttools/filehooks/libarchivefile.dylib could not be loaded\\n00000003 2017-07-24 14:39:09 12970 -1 Loading plugin /opt/HPCCSystems/6.0.0/clienttools/plugins/libauditlib.dylib[lib_auditlib] version = AUDITLIB 1.0.1\\n00000004 2017-07-24 14:39:09 12970 -1 Loading plugin /opt/HPCCSystems/6.0.0/clienttools/plugins/libdebugservices.dylib[lib_debugservices] version = DEBUGSERVICES 1.0.1\\n00000005 2017-07-24 14:39:09 12970 -1 Plugin /opt/HPCCSystems/6.0.0/clienttools/plugins/libdmetaphone.dylib exports getECLPluginDefinition but does not export ECL - not loading\\n00000006 2017-07-24 14:39:09 12970 -1 Loading plugin /opt/HPCCSystems/6.0.0/clienttools/plugins/libfileservices.dylib[lib_fileservices] version = FILESERVICES 2.1.3\\n00000007 2017-07-24 14:39:09 12970 -1 Loading plugin /opt/HPCCSystems/6.0.0/clienttools/plugins/liblogging.dylib[lib_logging] version = LOGGING 1.0.1\\n00000008 2017-07-24 14:39:09 12970 -1 Loading plugin /opt/HPCCSystems/6.0.0/clienttools/plugins/libparselib.dylib[lib_parselib] version = PARSELIB 1.0.1\\n00000009 2017-07-24 14:39:09 12970 -1 Loading plugin /opt/HPCCSystems/6.0.0/clienttools/plugins/libstringlib.dylib[lib_stringlib] version = STRINGLIB 1.1.14\\n0000000A 2017-07-24 14:39:09 12970 -1 Loading plugin /opt/HPCCSystems/6.0.0/clienttools/plugins/libtimelib.dylib[lib_timelib] version = TIMELIB 1.0.0\\n0000000B 2017-07-24 14:39:09 12970 -1 Loading plugin /opt/HPCCSystems/6.0.0/clienttools/plugins/libunicodelib.dylib[lib_unicodelib] version = UNICODELIB 1.1.06\\n0000000C 2017-07-24 14:39:10 12970 -1 Loading plugin /opt/HPCCSystems/6.0.0/clienttools/plugins/libworkunitservices.dylib[lib_WORKUNITSERVICES] version = WORKUNITSERVICES 1.0.2\\n0000000D 2017-07-24 14:39:10 12970 -1 Adding library: eclrtl\\n0000000E 2017-07-24 14:39:10 12970 -1 Adding source file: a.out.res.s\\n0000000F 2017-07-24 14:39:10 12970 -1 addSourceFile a.out.cpp\\n00000010 2017-07-24 14:39:10 12970 -1 addSourceFile a.out.res.s\\n00000011 2017-07-24 14:39:10 12970 -1 Compiling a.out\\n00000012 2017-07-24 14:39:10 12970 -1 Remove temporaries\\n00000013 2017-07-24 14:39:10 12970 -1 Remove a.out.res.s*\\n00000014 2017-07-24 14:39:10 12970 -1 Compiled a.out\\nsh-3.2$ time ./a.out\\nHello and Welcome!\\n\\nreal 1m0.661s\\nuser 0m0.028s\\nsys 0m0.027s\\n
\", \"post_time\": \"2017-07-24 19:05:37\" },\n\t{ \"post_id\": 18353, \"topic_id\": 4573, \"forum_id\": 8, \"post_subject\": \"Re: Stand alone eclcc\", \"username\": \"JimD\", \"post_text\": \"I just tried to reproduce this on my Ubuntu 16.04 Virtual Machine running on Virtual Box. \\n\\nIn netowrk settings, I disabled both network card devices, then restarted the VM.\\n\\nI was still able to compile and link hello.ecl using\\n\\nsudo eclcc hello.ecl
\\n\\nThis created a.out, which I was able to run it using:\\n./a.out
\\n\\nAm I missing something? How are you disabling networking?\\n\\nJim\", \"post_time\": \"2017-07-24 17:39:33\" },\n\t{ \"post_id\": 18333, \"topic_id\": 4573, \"forum_id\": 8, \"post_subject\": \"Re: Stand alone eclcc\", \"username\": \"bforeman\", \"post_text\": \"Checking with development. In the past all clusters were connected via IP, so perhaps it's a legacy feature. Confirming.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2017-07-24 16:45:11\" },\n\t{ \"post_id\": 18313, \"topic_id\": 4573, \"forum_id\": 8, \"post_subject\": \"Stand alone eclcc\", \"username\": \"vin\", \"post_text\": \"It is great that eclcc can generate a native a.out file. I use this understand new operations quite often. I noticed the other day that when using eclcc to generate a native a.out file it hangs if there is no internet connection.\\n\\nQUESTION: Why does a local compile of eclcc required networking?\\n\\nSpecifics:\\n\\n% eclcc --version\\n6.0.0 community_6.0.0-1\\n% uname -v\\nDarwin Kernel Version 16.6.0: Fri Apr 14 16:21:16 PDT 2017; root:xnu-3789.60.24~6/RELEASE_X86_64\\n\\nWhen I execute eclcc from the command line, eg:\\n\\n% eclcc inc.ecl\\n\\nit generates an a.out if connected to the internet. However, if I disable networking, it hangs. (At least it took longer than I was willing to wait: >5 mins.)\\n\\nNote: I have an ecl.ini file that has info to access my HPCC cluster. eclcc behaves the same when remove ecl.ini\", \"post_time\": \"2017-07-24 13:34:10\" },\n\t{ \"post_id\": 18423, \"topic_id\": 4593, \"forum_id\": 8, \"post_subject\": \"Re: Constant expression expected -- Macro\", \"username\": \"rtaylor\", \"post_text\": \"ksviswa,\\n\\nThen I would do it this way (keeping the Python but eliminating the Template Language):import std , python;\\n\\nSET OF STRING70 splitString(STRING p , Integer num_parts ) := EMBED(Python)\\n import textwrap\\n return textwrap.wrap(p, num_parts)\\nENDEMBED;\\n\\nstatements := 'Enterprise Control Language (ECL) has been designed specifically for huge data projects using the LexisNexis High Performance Computer Cluster (HPCC). ECL’s extreme scalability comes from a design that allows you to leverage every query you create for re-use in subsequent queries as needed. To do this, ECL takes a Dictionary approach to building queries wherein each ECL definition defines an expression. Each previous Definition can then be used in succeeding ECL definitions—the language extends itself as you use it.';\\n\\nds := DATASET( [ { statements } ] , { STRING statement_text } );\\n\\nAStatement_Info := RECORD\\n STRING70 STATEMENT_TEXT;\\nEND;\\n\\nres := PROJECT(ds,TRANSFORM({DATASET(AStatement_Info) Statement1 {MAXCOUNT(20)};\\n DATASET(AStatement_Info) Statement2 {MAXCOUNT(20)};},\\n settxt := splitString( LEFT.statement_text , 70);\\n tds := DATASET(settxt,{ STRING70 STATEMENT_TEXT })\\n ( STATEMENT_TEXT <> '');\\n NumParts := COUNT(temp_ds);\\n SELF.Statement1 := IF(NumParts < 21, \\n temp_ds,temp_ds[1..20]);\\n SELF.Statement2 := IF(NumParts > 20,\\n temp_ds[21..],[]);\\n ));\\n \\noutput(res);
This method also solves a problem that your splitting at 1400 idea might have had, by determining the number of parts AFTER the Python code splits it instead of assuming splitting at 1400 will not miss a part (because the Python code is actually splitting at < 70 characters each part so as not to split a word -- so if there were 1399 characters and Python left > 2 spaces at the end of even one part you would have ended up with 21 parts instead of 20 and would have missed putting that last part into your Statement2). \\n\\nOf course, you would eliminate all these problems by just allowing a larger MAXCOUNT on your child dataset and putting all the parts into one instead of two. Is there a particular reason for NOT doing that? \\n\\nTry it and see how it works.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-07-28 13:36:42\" },\n\t{ \"post_id\": 18413, \"topic_id\": 4593, \"forum_id\": 8, \"post_subject\": \"Re: Constant expression expected -- Macro\", \"username\": \"ksviswa\", \"post_text\": \"Thanks Richard..\\n\\nI did not have any problems with splitting the data , we could use your approach or the python one. The python one makes sure the word wrapping is done and we just do not split by 70 characters , so that the same word is not split in to multiple lines.\\n\\nI was stuck with the execution of multiple blocks based on the length of the statement. \\nstatement1 and statement2 are updated based on the condition given below.\\n\\nIF(length(IncomingString <= 1400 then\\nCall the wordwrap with 70 and put those into Statement1\\nStatement2 is all empty.\\nIF(length(IncomingString) > 1400 then\\nCall that word wrap code first with 1400 – that would split into two.\\nThen call it with 70 with the first half. Put those into Statement1\\nThen call it with 70 with the second half. Put those into Statement2\", \"post_time\": \"2017-07-26 20:20:07\" },\n\t{ \"post_id\": 18403, \"topic_id\": 4593, \"forum_id\": 8, \"post_subject\": \"Re: Constant expression expected -- Macro\", \"username\": \"rtaylor\", \"post_text\": \"ksviswa,\\n\\nHere's how I would do it (note that I had to change a couple of characters in your string):statements := 'Enterprise Control Language (ECL) has been designed specifically for huge data projects using the LexisNexis High Performance Computer Cluster (HPCC). ECL\\\\'s extreme scalability comes from a design that allows you to leverage every query you create for re-use in subsequent queries as needed. To do this, ECL takes a Dictionary approach to building queries wherein each ECL definition defines an expression. Each previous Definition can then be used in succeeding ECL definitions-the language extends itself as you use it.';\\n\\nds := DATASET( [ { statements } ] , { STRING statement_text } );\\n\\nAStatement_Info := RECORD\\n STRING70 STATEMENT_TEXT;\\nEND;\\n\\n{DATASET(AStatement_Info) Statement1 {MAXCOUNT(20)}} XF(ds L) := TRANSFORM\\n NumParts := ROUNDUP(LENGTH(TRIM(L.statement_text))/70);\\n SELF.Statement1 := DATASET(NumParts,\\n TRANSFORM(AStatement_Info,\\n StartPt := ((COUNTER-1)*70)+1;\\n SELF.STATEMENT_TEXT :=\\n L.statement_text[StartPt.. ]));\\nEND;\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\nres := PROJECT(ds,XF(LEFT));\\noutput(res);
This just splits it into 70-byte chunks, ignoring whether that split words or not, but I think that's what your code would also have done. No need for either Python or Template Language code.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-07-26 19:19:15\" },\n\t{ \"post_id\": 18393, \"topic_id\": 4593, \"forum_id\": 8, \"post_subject\": \"Constant expression expected -- Macro\", \"username\": \"ksviswa\", \"post_text\": \"Hi , \\n\\nI am trying to split a text in to multiple parts based on some condition.\\n\\nThis is the condition am trying to achieve, trying to use a template language for the same to execute a block of code , but encounter this error as we cannot use dynamically computed values in the macro and it has to be a constant in #IF.\\n\\nPlease suggest me how i can improve the same or any other way i can achieve the same.\\n\\nIF(length(IncomingString <= 1400 then\\n\\tCall the wordwrap with 70 and put those into Statement1\\n\\tStatement2 is all empty.\\nIF(length(IncomingString) > 1400 then\\n\\tCall that word wrap code first with 1400 – that would split into two.\\n\\tThen call it with 70 with the first half. Put those into Statement1\\n\\tThen call it with 70 with the second half. Put those into Statement2\\n\\nHere is the sample code am using \\n\\n\\nimport std , python;\\n\\nSET OF STRING70 splitString(STRING p , Integer num_parts ) := EMBED(Python)\\n import textwrap\\n return textwrap.wrap(p, num_parts)\\nENDEMBED;\\n\\nstatements := 'Enterprise Control Language (ECL) has been designed specifically for huge data projects using the LexisNexis High Performance Computer Cluster (HPCC). ECL’s extreme scalability comes from a design that allows you to leverage every query you create for re-use in subsequent queries as needed. To do this, ECL takes a Dictionary approach to building queries wherein each ECL definition defines an expression. Each previous Definition can then be used in succeeding ECL definitions—the language extends itself as you use it.';\\n\\nds := DATASET( [ { statements } ] , { STRING statement_text } );\\n\\nAStatement_Info := RECORD\\n STRING70 STATEMENT_TEXT;\\nEND;\\n\\nres := PROJECT( ds , TRANSFORM ( \\n { \\n DATASET(AStatement_Info) Statement1 {MAXCOUNT(20)};\\n DATASET(AStatement_Info) Statement2 {MAXCOUNT(20)};\\n },\\n len_consumer_statements:=length(LEFT.statement_text);\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t#IF( len_consumer_statements<= 1400)\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\ttemp := splitString( LEFT.statement_text , 70);\\n temp_ds:=DATASET(temp,{ STRING70 STATEMENT_TEXT });\\n SELF.Statement1 := temp_ds( STATEMENT_TEXT <> '');\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tSELF := [];\\n #ELSE\\n temp1:= splitString( LEFT.statement_text , 1400);\\n temp2 := splitString ( temp1[1] , 70);\\n temp3 := splitString ( temp1[2] , 70);\\n temp_ds_1:=DATASET(temp2,{STRING70 STATEMENT_TEXT});\\n temp_ds_2:=DATASET(temp3,{STRING70 STATEMENT_TEXT});\\n SELF.Statement1:=temp_ds_1(STATEMENT_TEXT <> '');\\n SELF.Statement2:= temp_ds_2( STATEMENT_TEXT <> '');\\n #END \\n ));\\n\\t\\t\\t\\t\\t\\t\\noutput(res);\\n \\n
\", \"post_time\": \"2017-07-25 18:45:32\" },\n\t{ \"post_id\": 18483, \"topic_id\": 4603, \"forum_id\": 8, \"post_subject\": \"Re: Using HTTPCALL with text/plain webpage results?\", \"username\": \"anthony.fishbeck\", \"post_text\": \"Currently only "application/json" and "text/xml" are supported. We should add more error checking to report if mime types are used that are not supported.\\n\\nThere is a general JIRA about supporting more HTTPCALL functionality that will probably be broken broken up into sub tasks.\\n\\nhttps://track.hpccsystems.com/browse/HPCC-2917\\n\\nPlease comment on the JIRA describing what you are trying to do so we can prioritize that behavior and make sure we support your needs in the future.\\n\\nThanks,\\nTony\", \"post_time\": \"2017-08-02 13:52:56\" },\n\t{ \"post_id\": 18463, \"topic_id\": 4603, \"forum_id\": 8, \"post_subject\": \"Using HTTPCALL with text/plain webpage results?\", \"username\": \"drealeed\", \"post_text\": \"Hi there. I'm attempting to retrieve the content of a webpage that is simply text (a single string), not html. When I try, I get the error \\n\\nError - syntax error "Expecting "<""\\n\\nI'm not sure of the correct way to retrieve text/plain content via HTTP, and can't find it in the documentation. How would I need to change the below to make it work?\\n\\n#option('soapTraceLevel', 10); \\n\\nl_token := RECORD\\nSTRING token ; //capture the array contents\\nEND;\\n\\nraw2 := HTTPCALL('http://172.23.42.86:9080/token', 'GET', 'text/plain', \\n l_token, HTTPHEADER('Authorization','Basic bGVlZGR4OjMjZUVkRGND'));\\n\\nOUTPUT(raw2);
\", \"post_time\": \"2017-08-01 19:37:52\" },\n\t{ \"post_id\": 18553, \"topic_id\": 4643, \"forum_id\": 8, \"post_subject\": \"Re: Split data from one logical file to many logical files.\", \"username\": \"akhileshbadhri\", \"post_text\": \"Thanks a lot Richard.\\n\\nFollowing is the code I am trying to execute:\\n\\nnamesRec := RECORD \\nSTRING20 lname;\\nSTRING10 fname;\\n UNSIGNED2 age := 25;\\n UNSIGNED2 ctr := 0;\\nEND;\\nnamesTable2 := DATASET([{'Flintstone','Fred',35},\\n {'Flintstone','Wilma',33},\\n {'Mr. T','Z-man'},\\n {'Zetson','Georgie',10},\\t\\t\\n {'Flintstone','Wilma',33}], namesRec);\\n\\nSetDiffs := SET(TABLE(namesTable2,{lname},lname),lname);\\nCntDiffs := COUNT(SetDiffs);\\n\\nloopBody(DATASET(namesRec) ds, unsigned4 c) := FUNCTION\\n\\tThisDs := PROJECT(namesTable2(lname = SetDiffs[c]),TRANSFORM(namesRec,SELF.ctr := c; SELF:=LEFT;));\\n//OUTPUT(ThisDs,,'~thor::testing::output'+SetDiffs[c]);\\n\\tRETURN ThisDs;\\nEND;\\n \\nOUTPUT(LOOP(namesTable2,CntDiffs,loopBody(ROWS(LEFT),COUNTER)));\\n\\nThe output I get is always the last of all the iterations. I tried using OUTPUT within the function (commented line). The output file also has the record of the last iteration.\\n\\nIs there a way I can output records of all the iterations into different logical files.\", \"post_time\": \"2017-08-08 12:45:54\" },\n\t{ \"post_id\": 18543, \"topic_id\": 4643, \"forum_id\": 8, \"post_subject\": \"Re: Split data from one logical file to many logical files.\", \"username\": \"rtaylor\", \"post_text\": \"Akhilesh,\\n\\nYou can try something like this:ds := DATASET(...);\\n\\nSetDiffs := SET(TABLE(ds,{RecDiff},RecDiff),RecDiff);\\nCntDiffs := COUNT(SetDiffs);\\n\\nLOOP(ds,CntDiffs, <loopbody> )
This is obviously pseudo code but the point is to split them out within the <loopbody> function by simple filtering like this:ThisDS := ds(RecDiff = SetDiffs[COUNTER]);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-08-07 13:17:42\" },\n\t{ \"post_id\": 18533, \"topic_id\": 4643, \"forum_id\": 8, \"post_subject\": \"Split data from one logical file to many logical files.\", \"username\": \"akhileshbadhri\", \"post_text\": \"Hello,\\n\\nI have logical file on THOR of the following layout:\\n\\nLayoutOne := RECORD\\nUnsigned2 ID;\\nSTRING Field1;\\nSTRING Field2;\\nSTRING Field3;\\nSTRING Field4;\\nSTRING Field5;\\n..\\n..\\n..\\nSTRING RecordDifferentiater;\\nEND;\\n\\nThe field RecordDifferentiater differentiates the data in the logical file in terms of the source from where the data has been gathered. Depending upon the value of RecordDifferentiater,\\nI wish to split the data from the logical file into different logical files at runtime. So every logical file that gets created will have a data from the same RecordDifferentiater.\\n\\nFor example:\\nIf the base logical file has 10 records with 10 different RecordDifferentiater, I want to create 10 logical files at runtime.\\n\\nRequest your help in doing this. I tried using LOOP, but was not able to implemente the logic.\\n\\nThanks and regards,\\nAkhilesh Badhri.\", \"post_time\": \"2017-08-07 11:47:47\" },\n\t{ \"post_id\": 18693, \"topic_id\": 4663, \"forum_id\": 8, \"post_subject\": \"Re: Setting stored variable in standalone executable\", \"username\": \"ghalliday\", \"post_text\": \"If the stand alone executable is compiled using the hthor engine (-platform=hthor), then you can use:\\n\\n\\n./a.out /day=28\\n
\\n\\nHowever that doesn't appear to be supported for the default (roxie) engine. I imagine it would be relatively simple to fix, so please open a JIRA and we can investigate further.\", \"post_time\": \"2017-09-06 16:23:18\" },\n\t{ \"post_id\": 18623, \"topic_id\": 4663, \"forum_id\": 8, \"post_subject\": \"Setting stored variable in standalone executable\", \"username\": \"vin\", \"post_text\": \"My ECL program has "stored" variables. Eg,\\ninteger day := 27 : stored('day');\\ninteger month := 7 : stored('month');\\ninteger year := 1996 : stored('year');\\n
\\n\\nI can set these parameters when using ecl run using the -X parameter. Is there a way to set stored variables in a standalone program? Specifically, I compile as:\\n\\neclcc wrapper.ecl
\\n\\nHelp doesn't help.\\n\\n$ ./a.out --help\\nUsage: a.out [options]\\n\\nOptions:\\n --daliServers=[host1,...] : List of Dali servers to use\\n --tracelevel=[integer] : Amount of information to dump on logs\\n --stdlog=[boolean] : Standard log format (based on tracelevel)\\n --logfile : Outputs to logfile, rather than stdout\\n --help|-h : This message\\n
\\n\\nAppears to ignore -X flag. (Output for second call should be 1.)\\n\\n\\n$ ./a.out\\n0\\n$ ./a.out -Xday=28\\n0
\", \"post_time\": \"2017-08-25 20:38:04\" },\n\t{ \"post_id\": 18683, \"topic_id\": 4673, \"forum_id\": 8, \"post_subject\": \"Re: ECL syntax question\", \"username\": \"vin\", \"post_text\": \"That is it! Thanks for the help!\", \"post_time\": \"2017-09-06 10:47:15\" },\n\t{ \"post_id\": 18673, \"topic_id\": 4673, \"forum_id\": 8, \"post_subject\": \"Re: ECL syntax question\", \"username\": \"ghalliday\", \"post_text\": \"The problem is that SET is shorthand for "SET OF INTEGER".\\n\\nIf you change the parameter to SET OF STRING it should work.\\n\\nGavin\", \"post_time\": \"2017-09-06 09:42:11\" },\n\t{ \"post_id\": 18633, \"topic_id\": 4673, \"forum_id\": 8, \"post_subject\": \"ECL syntax question\", \"username\": \"vin\", \"post_text\": \"I am struggling to write valid ECL code. I have simplified the problem into just a few lines, which I will show below. First, note that the following compiles and produces the expected output.\\ni_rec := RECORD\\n INTEGER val;\\nEND;\\ni_set := [1, 2, 3];\\ni_ds := DATASET(i_set, i_rec);\\noutput(i_ds);\\n\\ns_rec := RECORD\\n STRING val;\\nEND;\\ns_set := ['a', 'b', 'c'];\\ns_ds := DATASET(s_set, s_rec);\\n\\noutput(s_ds);
\\nThe top and bottom halves differ only in that the top uses integers and the bottom strings.\\n\\nSo the does following.\\nEXPORT ints := MODULE\\n EXPORT rec := RECORD\\n\\tINTEGER val;\\n END;\\n\\n EXPORT ds(set theset) := DATASET(theset, rec);\\nEND;
\\n\\n Module "ints.ecl" above is imported in this program:\\nIMPORT ints;\\n\\nOUTPUT(ints.ds([1, 2, 3]));
\\nThis compiles and produces the expected output.\\n\\nBut when I change the above to use strings instead of integers it fails to compile. Here' s the code. First the module "strs.ecl"\\nEXPORT strs := MODULE\\n EXPORT rec := RECORD\\n\\tSTRING val;\\n END;\\n\\n EXPORT ds(set theset) := DATASET(theset, rec);\\nEND;\\n
\\nand the now the invoking program\\nIMPORT strs;\\n\\nOUTPUT(strs.ds(['a', 'b', 'c']));
\\n\\nThe compiler error is:\\n/Users/vin/Work/tecl/temp/./strs.ecl(6,46): error C2307: The field in the record does not match the type of the set elements
\\n\\nI have checked this several times and I believe the only difference between the latter two programs in the type INTEGER v. STRING. But this wasn't a problem in the first program.\\n\\nIt would be great if someone verified these findings. Therefore, attached is a zip file with all the source files and a script. If eclcc is in your path, you can run the script as `sh wtf.sh` , which will compile and run the programs to show the results.\", \"post_time\": \"2017-08-30 18:30:38\" },\n\t{ \"post_id\": 18743, \"topic_id\": 4703, \"forum_id\": 8, \"post_subject\": \"Re: WUID triggered by Cron Job Fails on GIT\", \"username\": \"rtaylor\", \"post_text\": \"Rahul,\\n\\nYou should report this in JIRA: https://track.hpccsystems.com/\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-09-11 16:01:55\" },\n\t{ \"post_id\": 18703, \"topic_id\": 4703, \"forum_id\": 8, \"post_subject\": \"WUID triggered by Cron Job Fails on GIT\", \"username\": \"Rahul Jain\", \"post_text\": \"Background of process -\\n\\nWe have recently started integrated GITLAB & CI for HPCC-ECL. We work @ THOR end.\\nWe have in all 5 Cron Jobs running as of date today for Ingestion process.\\nAll of them are in wait state and running fine. \\nThey are automated to validate the customer authenticity, file movement & run time file spraying.
\\n\\nWhat’s the problem – \\n4/5 Cron Job do work as expected. \\nThe last Cron Job - triggers the scrub WUID which fails. It works fine if GITLAB not used.\\n\\nWhy it fails – (Just all based on so far findings which may be right or wrong)\\nWe have cron jobs in separate folder as say - indiaeclcommon\\nOther Cron Jobs work fine as they never reference another folder indiaeclhealth (which has health related code) from indiaeclcommon hence they work fine.\\nWe have IMPORTS added properly but when ran via CRON it never understands existence of indiaeclhealth.
\\n\\nIt works all good if ran by me on BWR window as it picks correct inputs in Argument (within preferences)\\n\\nI am not providing WUID which is first thing you would be looking for for certain reasons.\", \"post_time\": \"2017-09-08 08:36:34\" },\n\t{ \"post_id\": 19043, \"topic_id\": 4753, \"forum_id\": 8, \"post_subject\": \"Re: Locate ECL files in ECL code Generated at Run Time\", \"username\": \"abhayamishra\", \"post_text\": \"Jim,\\n\\nI tried doing this, but it failed.\\n\\n\\t<urn:DebugValues>\\n \\n <urn:DebugValue>\\n <urn:Name>eclcc-I</urn:Name> \\n <urn:Value>ecl_project1</urn:Value>\\n </urn:DebugValue>\\n \\n <urn:DebugValue>\\n <urn:Name>eclcc-I</urn:Name>\\n <urn:Value>ecl_common</urn:Value>\\n </urn:DebugValue>\\n <urn:DebugValue>\\n <urn:Name>eclcc-legacyimport</urn:Name>\\n <urn:Value>ecl_project1</urn:Value>\\n </urn:DebugValue>\\n \\n <urn:DebugValue>\\n <urn:Name>eclcc-legacyimport</urn:Name>\\n <urn:Value>ecl_common</urn:Value>\\n </urn:DebugValue>\\n\\n\\t</urn:DebugValues>\\n\\nFirst i called WUCreateAndUpdate method and generated workunit is passed to WUSubmit.\\nError received - \\n\\n<Debug>\\n <created_by>ws_workunits</created_by>\\n <created_for>amishra</created_for>\\n <eclcc-i>ecl_project</eclcc-i>\\n <eclcc-legacyimport>ecl_health</eclcc-legacyimport>\\n <target64bit>1</target64bit>\\n <targetclustertype>thorlcr</targetclustertype>\\n </Debug>\\n <Exceptions>\\n <Exception code="2167"\\n col="17"\\n row="1"\\n sequence="0"\\n severity="2"\\n source="eclserver">\\n Unknown identifier before "." (expected :=)\\n </Exception>\\n </Exceptions>\\n\\nThis Name-Value Debug values pair take only allowed values. It is not allowing like below one. It is getting failed while making soap call. \\n\\n <urn:DebugValues>\\n <urn:DebugValue>\\n <urn:Name>-I</urn:Name>\\n <urn:Value>ecl_project1</urn:Value>\\n </urn:DebugValue>\\n <urn:DebugValue>\\n <urn:Name>eclcc-I</urn:Name>\\n <urn:Value>ecl_common</urn:Value>\\n </urn:DebugValue>\\n </urn:DebugValues>\", \"post_time\": \"2017-09-26 07:10:21\" },\n\t{ \"post_id\": 19033, \"topic_id\": 4753, \"forum_id\": 8, \"post_subject\": \"Re: Locate ECL files in ECL code Generated at Run Time\", \"username\": \"JimD\", \"post_text\": \"In the WsWorkunits/WUCreateAndUpdate method, have you tried using \\n<DebugValues>\\n <DebugValue>\\n <Name>*****</Name>\\n <Value>****</Value>\\n </DebugValue>\\n</DebugValues>
\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2017-09-25 15:14:43\" },\n\t{ \"post_id\": 19013, \"topic_id\": 4753, \"forum_id\": 8, \"post_subject\": \"Re: Locate ECL files in ECL code Generated at Run Time\", \"username\": \"abhayamishra\", \"post_text\": \"Thanks Jim For your reply.\\n\\nECL code is submitted as Query String onto ECL server by using SOAP call.\\nESP SOAP Url is\\n http://ESPAddress:Port/WsWorkunits?ver_=1.62\\nTwo methods has been called successively.\\nWUCreateAndUpdate\\nWUSubmit\\n\\nWe have GITlab VCS for ECL repository. And it has folder structure like below\\n-ecl_common\\n-ecl_project1\\n-ecl_project2\\n\\nThis is not working specially in this case because project1 ecl files are using some of ecl_common ecl files.\\nSimilar to -I <Import Directories>, what is similar option while making SOAP call?\\nWe have used -legacy and -I option in ECL IDE and ecl run/eclcc command lines and it worked without any issues. but here we stuck.\\nPlease suggest your views.\", \"post_time\": \"2017-09-25 07:45:03\" },\n\t{ \"post_id\": 18943, \"topic_id\": 4753, \"forum_id\": 8, \"post_subject\": \"Re: Locate ECL files in ECL code Generated at Run Time\", \"username\": \"JimD\", \"post_text\": \"abhayamishra,\\n\\nI am not sure we fully understand your question. \\n\\nHow is your ECL code generated at runtime? Did you write some type of code generator? If so, you should know the location where it is writing ECL files. \\n\\nIf you know the location, you can IMPORT definitions from the location specified in the -I<directorylocation> argument.\\n\\nJim\", \"post_time\": \"2017-09-22 14:16:35\" },\n\t{ \"post_id\": 18873, \"topic_id\": 4753, \"forum_id\": 8, \"post_subject\": \"Locate ECL files in ECL code Generated at Run Time\", \"username\": \"abhayamishra\", \"post_text\": \"Dear Sir,\\n\\nCan you please assist me solving this problem.\\n\\nHow can locate ECL files which is in different folder into ECL file which is generated at runtime?\\n\\nProblem Statement:\\nIn ECL command line, there are ways to set some options (compile/run time args)\\n ecl run -legacy -I directorylocation\\n\\nHow can this be achieved if ECL code is generated run time and soap call is made to submit generated Run time ecl code on ecl server.\\n\\nSample Code: \\n\\nString eclCode := 'india_ecl_common.Testing.Test();';\\n\\nsoapcall('http://' + ESPAddress + ':' + ESPPort + '/WsWorkunits',\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t 'WUSubmit',\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t Request,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t Response,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t XPATH('WUSubmitResponse/Exceptions/Exception')\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t);\", \"post_time\": \"2017-09-21 12:42:19\" },\n\t{ \"post_id\": 19003, \"topic_id\": 4773, \"forum_id\": 8, \"post_subject\": \"Re: Converting numpy array to ecl dataset (embed python)\", \"username\": \"richardkchapman\", \"post_text\": \"There are also some examples in https://hpccsystems.com/blog/embedding- ... ations-ecl\", \"post_time\": \"2017-09-23 09:55:02\" },\n\t{ \"post_id\": 18993, \"topic_id\": 4773, \"forum_id\": 8, \"post_subject\": \"Re: Converting numpy array to ecl dataset (embed python)\", \"username\": \"richardkchapman\", \"post_text\": \"Here's an example from the regression suite:\\n\\n\\nIMPORT Python;\\n\\nchildrec := RECORD\\n string name => unsigned value;\\nEND;\\n\\nnamesRecord := RECORD\\n STRING name1;\\n STRING10 name2;\\n LINKCOUNTED DATASET(childrec) childnames;\\n LINKCOUNTED DICTIONARY(childrec) childdict{linkcounted};\\n childrec r;\\n unsigned1 val1;\\n integer1 val2;\\n UTF8 u1;\\n UNICODE u2;\\n UNICODE8 u3;\\n BIG_ENDIAN unsigned4 val3;\\n DATA d;\\n BOOLEAN b;\\n SET OF STRING ss1;\\nEND;\\n\\ndataset(namesRecord) blockedNames(string prefix) := EMBED(Python)\\n return ["Gavin","John","Bart"]\\nENDEMBED;\\n\\n_linkcounted_ dataset(namesRecord) linkedNames(string prefix) := EMBED(Python)\\n return ["Gavin","John","Bart"]\\nENDEMBED;\\n\\ndataset(namesRecord) streamedNames(data d, utf8 u) := EMBED(Python)\\n return [ \\\\\\n ("Gavin", "Halliday", [("a", 1),("b", 2),("c", 3)], [("aa", 11)], ("aaa", 111), 250, -1, U'là', U'là', U'là', 1, d, False, ["1","2"]), \\\\\\n ("John", "Smith", [], [], ("c", 3), 250, -1, U'là', U'là', u, 2, d, True, set(["3"])) \\\\\\n ]\\nENDEMBED;\\n\\n// Test use of Python generator object for lazy evaluation...\\n\\ndataset(childrec) testGenerator(unsigned lim) := EMBED(Python:time)\\n num = 0\\n while num < lim:\\n yield ("Generate:", num)\\n num += 1\\nENDEMBED;\\n\\noutput(streamedNames(d'AA', u'là'));\\noutput (testGenerator(10));\\n\\n// Test what happens when two threads pull from a generator\\nc := testGenerator(1000);\\ncount(c(value < 500));\\ncount(c(value > 500));\\n\\n// Test Python code returning named tuples\\nchildrec tnamed(string s) := EMBED(Python)\\n import collections;\\n childrec = collections.namedtuple("childrec", "value,name")\\n return childrec(1,s)\\nENDEMBED;\\n\\noutput(tnamed('Yo').name);\\n\\n// Test passing records into Python\\n\\ndataset(namesRecord) streamInOut(dataset(namesRecord) recs) := EMBED(Python)\\n for rec in recs:\\n if rec.name1 == 'Gavin':\\n yield rec\\nENDEMBED;\\n\\noutput(streamInOut(streamedNames(d'AA', u'là')));\\n
\", \"post_time\": \"2017-09-22 20:53:12\" },\n\t{ \"post_id\": 18983, \"topic_id\": 4773, \"forum_id\": 8, \"post_subject\": \"Re: Converting numpy array to ecl dataset (embed python)\", \"username\": \"tlhumphrey2\", \"post_text\": \"Removed a couple of useless lines of code from that originally posted. Here is new code.\\n\\nimport python;\\n\\nrec := RECORD\\n set of REAL row;\\nEND;\\n\\nSTRING matrix(DATASET(rec) s) := embed(Python)\\n import numpy as np\\n import re\\n\\n def ECLDataset2NPArray(s):\\n zarray=np.empty([3,2],dtype=float)\\n i=0\\n for row in s:\\n z='%s' % (' '.join(map(str,row)))\\n z = re.sub('[\\\\[\\\\]]', '', z)\\n zlist=z.split(',')\\n zarray[i]=np.asarray(zlist)\\n i+=1\\n return zarray\\n\\n zarray=ECLDataset2NPArray(s)\\n zarray+=1000;\\n str_zarray=np.array_str(zarray)\\n\\n return str_zarray\\n\\nendembed;\\n\\ns := DATASET([{[1.1,2.2]},{[3.3,4.4]},{[5.5,6.6]}],rec);\\n\\nmatrix(s);\\n\\n
\", \"post_time\": \"2017-09-22 20:21:24\" },\n\t{ \"post_id\": 18973, \"topic_id\": 4773, \"forum_id\": 8, \"post_subject\": \"Converting numpy array to ecl dataset (embed python)\", \"username\": \"tlhumphrey2\", \"post_text\": \"I have code that uses embed python to convert a numeric ecl dataset to a python numpy array. Then it does computation with the numpy array and returns a STRING that represents the result of the computation. Below is that code.\\n\\nBut, I want the embed python to return an ecl dataset of the result of the computation. Any ideas?\\n\\nimport python;\\n\\nrec := RECORD\\n set of REAL row;\\nEND;\\n\\nSTRING matrix(DATASET(rec) s) := embed(Python)\\n import numpy as np\\n import re\\n\\n #return s\\n\\n def ECLDataset2NPArray(s):\\n zarray=np.empty([3,2],dtype=float)\\n p=''\\n i=0 \\n for row in s:\\n z='%s' % (' '.join(map(str,row)))\\n p+=z;\\n z = re.sub('[\\\\[\\\\]]', '', z)\\n zlist=z.split(',')\\n zarray[i]=np.asarray(zlist)\\n i+=1\\n return zarray\\n \\n zarray=ECLDataset2NPArray(s)\\n zarray+=1000;\\n str_zarray=np.array_str(zarray)\\n str_zarray=np.array_str(zarray)\\n\\n return str_zarray\\n\\nendembed;\\n\\ns := DATASET([{[1.1,2.2]},{[3.3,4.4]},{[5.5,6.6]}],rec);\\n\\nmatrix(s);\\n
\", \"post_time\": \"2017-09-22 20:15:09\" },\n\t{ \"post_id\": 19723, \"topic_id\": 4783, \"forum_id\": 8, \"post_subject\": \"Re: How to pass parameters into ECL File\", \"username\": \"abhayamishra\", \"post_text\": \"Dear Richard, \\n\\nThankyou for help.\\n\\nI am able to do it using stored variable. Mainly my concern was to doing using command line.\\nI used -X option for same. ecl run -Xparam=value\\n\\nThankyou\\nAbhay\", \"post_time\": \"2017-10-25 06:46:10\" },\n\t{ \"post_id\": 19133, \"topic_id\": 4783, \"forum_id\": 8, \"post_subject\": \"Re: How to pass parameters into ECL File\", \"username\": \"rtaylor\", \"post_text\": \"abhayamishra,How an ECL file can read input parameters/file from outside(not within ECL/HPCC env) running on THOR cluster?\\nOr How can i pass parameters(generated dynamically) to ECL file from outside of ECL/HPCC env?
That question is also covered in our ECL online courses (the Roxie ECL courses). The short answer is: using SOAP and ECL's STORED workflow service. \\n\\nTheir use and interaction for this is also fully described in the section of Programmer's Guide articles titled Working With Roxie. The Programmer's Guide is availabe as a PDF download (https://hpccsystems.com/training/documentation/learning-ecl) and is also completely contained in the ECL IDE compiled Help file (press F1).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-09-27 14:10:36\" },\n\t{ \"post_id\": 19113, \"topic_id\": 4783, \"forum_id\": 8, \"post_subject\": \"Re: How to pass parameters into ECL File\", \"username\": \"abhayamishra\", \"post_text\": \"Hello Richard,\\n\\nLet me rephrase my question.\\nHow an ECL file can read input parameters/file from outside(not within ECL/HPCC env) running on THOR cluster?\\nOr How can i pass parameters(generated dynamically) to ECL file from outside of ECL/HPCC env?\\n\\nFor an Example, \\nI have to spray file to HPCC thor system. And that file path is not constant(directory is changing at runtime) or vary across all envs (dev, qc). \\nThen to spray file everytime from different filepaths, i have to pass file path at runtime.\\nHow could it be done?\", \"post_time\": \"2017-09-27 06:23:57\" },\n\t{ \"post_id\": 19093, \"topic_id\": 4783, \"forum_id\": 8, \"post_subject\": \"Re: How to pass parameters into ECL File\", \"username\": \"rtaylor\", \"post_text\": \"abhayamishra,\\n\\nThat's the kind of basic question that is already covered in our free ECL online courses https://hpccsystems.com/training#Classes -- have you gone through them yet?\\n\\nSo, to execute your "test" function, you can just add a new ECL file to the same directory (module) your function in written to, and in that file type this:IMPORT $;\\n$.test(2); //2 is my test value, you can make that whatever you want\\n //should return your second record {'b', 31}
then hit the Submit button and your function will be used as your query.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-09-26 17:31:26\" },\n\t{ \"post_id\": 19023, \"topic_id\": 4783, \"forum_id\": 8, \"post_subject\": \"How to pass parameters into ECL File\", \"username\": \"abhayamishra\", \"post_text\": \"Dear Team,\\n\\nCan you please assist me in finding way to pass parameters into ECL file.\\nSample Code:\\nEXPORT Test(Integer val) := FUNCTION\\n\\tshared layout := RECORD \\n \\t\\tString Name;\\n \\t\\tINTEGER AGE;\\n\\tEND;\\n\\t\\tOUTPUT(val)\\t;\\n\\t\\tds := DATASET([{'a', 33}, {'b', 31}, {'c', 29}, {'d', 40}], layout);\\n\\t\\treturn ds[val];\\t\\nEND;\\n\\nHow can i call it by passing some parameters?\", \"post_time\": \"2017-09-25 14:03:35\" },\n\t{ \"post_id\": 19073, \"topic_id\": 4793, \"forum_id\": 8, \"post_subject\": \"Re: Why can't I convert this set to a dataset?\", \"username\": \"tlhumphrey2\", \"post_text\": \"Richard,\\n\\nThanks so much. That works perfectly.\", \"post_time\": \"2017-09-26 16:43:28\" },\n\t{ \"post_id\": 19063, \"topic_id\": 4793, \"forum_id\": 8, \"post_subject\": \"Re: Why can't I convert this set to a dataset?\", \"username\": \"rtaylor\", \"post_text\": \"Tim,\\n\\nThis line is the problem:EXPORT Set10 := %'SetString'%; //generate the ECL code
With the single quotes, you're generating a string instead of the ECL set expression that you actually want. Change it to this:EXPORT Set10 := %SetString%; //generate the ECL code
Removing the single quotes generates the ECL code, not the string.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-09-26 15:53:45\" },\n\t{ \"post_id\": 19053, \"topic_id\": 4793, \"forum_id\": 8, \"post_subject\": \"Why can't I convert this set to a dataset?\", \"username\": \"tlhumphrey2\", \"post_text\": \"I use the example given in the ECL Language Reference Manual under #LOOP to make a set of numbers. Then I attempt to convert that set into a dataset (see the next to the last line in the code below). By the way the code below is the example given in the language reference manual under #LOOP EXCEPT for the last 2 lines.\\n\\nThe error I get is "Expected a list".\\n\\n// This script creates a set attribute definition of the 1st 10\\n// natural numbers and defines an attribute named "Set10"\\n#DECLARE (SetString)\\n#DECLARE (Ndx)\\n#SET (SetString, '['); //initialize SetString to [\\n#SET (Ndx, 1); //initialize Ndx to 1\\n#LOOP\\n#IF (%Ndx% > 9) //if we've iterated 9 times\\n#BREAK // break out of the loop\\n#ELSE //otherwise\\n#APPEND (SetString, %'Ndx'% + ',');\\n//append Ndx and comma to SetString\\n#SET (Ndx, %Ndx% + 1)\\n//and increment the value of Ndx\\n#END\\n#END\\n#APPEND (SetString, %'Ndx'% + ']'); //add 10th element and closing ]\\nEXPORT Set10 := %'SetString'%; //generate the ECL code\\nds := DATASET(Set10,{UNSIGNED s});\\nOUTPUT(COUNT(ds));\\n
\", \"post_time\": \"2017-09-26 15:23:46\" },\n\t{ \"post_id\": 19143, \"topic_id\": 4803, \"forum_id\": 8, \"post_subject\": \"Re: MoveExternalFile\", \"username\": \"mkellyhpcc\", \"post_text\": \"Hi,\\n\\nA few questions:\\n\\nIs "/data/test/APPLICATION/requests" your dropzone location ?\\nWhat version of HPCC are you using ?\\nWhat target (hthor/thor) are you submitting to ?\\nWho is the owner of the /data/test/APPLICATION/requests/IN/myfile.csv file ?\\nCan you provide the complete and exact error msg text ?\\n\\nthanks,\\nmark\", \"post_time\": \"2017-09-27 20:29:51\" },\n\t{ \"post_id\": 19083, \"topic_id\": 4803, \"forum_id\": 8, \"post_subject\": \"MoveExternalFile\", \"username\": \"John Meier\", \"post_text\": \"Greetings - \\n\\nI have a file on the Landing Zone, which is sprayed and processed with no problem. Now I want to take that file and move it to a different folder.\\n\\nI have looked at this forum and the ECL Standards Library Reference Manual and I'm trying the simplest code possible:\\nIMPORT STD;\\n\\nIP := '10.999.88.001'; // faked address;\\ninfile := '/data/test/APPLICATION/requests/IN/myfile.csv';\\noutfile := '/data/test/APPLICATION/requests/PROCESSED/myfile.csv';\\n\\nM1 := STD.File.MoveExternalFile(IP, infile, outfile); \\nO1 := OUTPUT(DATASET(STD.File.ExternalLogicalFileName(IP,outfile),{STRING1 Char},FLAT));\\nSEQUENTIAL(M1,O1);
\\n\\nFairly simple - move the code from one folder to another, then print it. Well, it fails. I get a RFSERR_MoveFailed(1) error telling me 'Operation not permitted'. The folder/file permissions (since this is a Linux system) is set to the most accessible: 7777 -(includes the sticky bits)- so that anyone can Read/Write/Execute/Delete. So is this an issue where the daliserve utility program is not running on the remote machine? Is there something missing in the latest release that's not in the manual?\\n There's not much in the way of diagnostics on this, so any help would be greatly appreciated.\\n\\nRegards,\\nJohn\", \"post_time\": \"2017-09-26 17:20:22\" },\n\t{ \"post_id\": 19523, \"topic_id\": 4813, \"forum_id\": 8, \"post_subject\": \"Re: Memory pool exhausted when passing 2 datasets embed\", \"username\": \"richardkchapman\", \"post_text\": \"You also need to remove the line:\\n\\n#option('outputLimitMb',10000); \\n\\nIf you try to set the limit higher than 2000, you will get this error, regardless of how much is actually output (in this example a couple of very small spill values are written to the workunit).\", \"post_time\": \"2017-10-17 09:22:32\" },\n\t{ \"post_id\": 19513, \"topic_id\": 4813, \"forum_id\": 8, \"post_subject\": \"Re: Memory pool exhausted when passing 2 datasets embed\", \"username\": \"ghalliday\", \"post_text\": \"If I change the last line of your query to:\\n\\noutput(MatrixMultiply(A,NRowsA,NColsA,B,NRowsB,NColsB),,'result');\\n\\nthen I don't see any outputs to dali in the generated query. That should avoid the error.\", \"post_time\": \"2017-10-17 08:57:01\" },\n\t{ \"post_id\": 19503, \"topic_id\": 4813, \"forum_id\": 8, \"post_subject\": \"Re: Memory pool exhausted when passing 2 datasets embed\", \"username\": \"tlhumphrey2\", \"post_text\": \"Richard,\\n\\nI started getting the dali error, "Dali result outputs are restricted to a maximum of 2000 MB", after doing what you suggested to increase EclAgentProcess memory and output to the workunit limits (see above).\\n \\nI'm fairly sure that the dali error I'm getting is caused by the passing of the 2 large datasets to the embedded python routine. Why? Because I tried this 3 different ways and got the same error message: 1) the code as I have in my 1st post, above; 2) OUTPUT(COUNT(resultdataset)); and 3) OUTPUT(resultdataset,,'tlh::CMatrix',OVERWRITE).\", \"post_time\": \"2017-10-16 20:40:19\" },\n\t{ \"post_id\": 19403, \"topic_id\": 4813, \"forum_id\": 8, \"post_subject\": \"Re: Memory pool exhausted when passing 2 datasets embed\", \"username\": \"richardkchapman\", \"post_text\": \"No. If you want larger outputs they have to be to a file.\", \"post_time\": \"2017-10-11 07:46:36\" },\n\t{ \"post_id\": 19233, \"topic_id\": 4813, \"forum_id\": 8, \"post_subject\": \"Re: Memory pool exhausted when passing 2 datasets embed\", \"username\": \"tlhumphrey2\", \"post_text\": \"Is there a setting in the environment.xml file that would increase the Dali’s output from the current 10MB to its maximum?\", \"post_time\": \"2017-09-29 16:43:47\" },\n\t{ \"post_id\": 19223, \"topic_id\": 4813, \"forum_id\": 8, \"post_subject\": \"Re: Memory pool exhausted when passing 2 datasets embed\", \"username\": \"tlhumphrey2\", \"post_text\": \"I set the following parameter in EclAgentProcess:defaultMemoryLimitMB="10000"
\\nAnd, I put the following 2 statements at the top of my ECL. #option('hthorMemoryLimit',10000);\\n#option('outputLimitMb',10000); \\n
\\nNote. I had to add outputLimitMb because the default for output to the workunit is 10MB. \\n\\nNow, I get this error.\\nError: System error: 0: Dali result outputs are restricted to a maximum of 2000 MB, the current limit is 10 MB. A huge dali result usually indicates the ECL needs altering.
\\nIs there anything I can do to eliminate this error?\", \"post_time\": \"2017-09-29 15:54:13\" },\n\t{ \"post_id\": 19213, \"topic_id\": 4813, \"forum_id\": 8, \"post_subject\": \"Re: Memory pool exhausted when passing 2 datasets embed\", \"username\": \"richardkchapman\", \"post_text\": \"In eclagent the memory limit comes from 2 places:\\n\\ndefaultMemoryLimitMB value in the config\\n#option("hthorMemoryLimit") value in the workunit\\n\\nIn both cases the value is specified in megabytes.\", \"post_time\": \"2017-09-29 15:13:42\" },\n\t{ \"post_id\": 19193, \"topic_id\": 4813, \"forum_id\": 8, \"post_subject\": \"Re: Memory pool exhausted when passing 2 datasets embed\", \"username\": \"richardkchapman\", \"post_text\": \"How big are the datasets?\", \"post_time\": \"2017-09-29 15:04:38\" },\n\t{ \"post_id\": 19183, \"topic_id\": 4813, \"forum_id\": 8, \"post_subject\": \"Re: Memory pool exhausted when passing 2 datasets embed\", \"username\": \"rtaylor\", \"post_text\": \"Tim,\\n\\nJust curious -- since your embedded Python "does nothing" what happens if you remove the Python and replace it with an ECL function that equally "does nothing"? Does it still fail with the same error?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-09-29 14:55:10\" },\n\t{ \"post_id\": 19163, \"topic_id\": 4813, \"forum_id\": 8, \"post_subject\": \"Memory pool exhausted when passing 2 datasets embed\", \"username\": \"tlhumphrey2\", \"post_text\": \"Below is the code. Currently, my embedded python doesn't do anything. The ECL reads in 2 large datasets and passes both to the embedded python, MatrixMultiply. The sizes of the 2 datasets are 1,600,040,000 and 1,600,080,000. I get the following error:Error: System error: 1301: Memory pool exhausted: pool id 4194314 (1216 pages) exhausted, requested 1 (in Disk Read G22 E23)
\\nimport python;\\n\\nrec0 := RECORD\\n REAL cell;\\nEND;\\n\\nrec := RECORD\\n DATASET(rec0) arow;\\nEND;\\n\\nDATASET(rec) MatrixMultiply(DATASET(rec) A, unsigned nrowsA, unsigned ncolsA,DATASET(rec) B, unsigned nrowsB, unsigned ncolsB) := embed(Python)\\n import numpy as np\\n import re\\n return A\\n\\nendembed;\\n\\nA:=DATASET('~hthor::tlh::AMatrix',rec,THOR);\\nB:=DATASET('~hthor::tlh::BMatrix',rec,THOR);\\nNRowsA:=COUNT(A);\\nNColsA:=COUNT(A[1].arow);\\nNRowsB:=COUNT(B);\\nNColsB:=COUNT(B[1].arow);\\n\\nMatrixMultiply(A,NRowsA,NColsA,B,NRowsB,NColsB);\\n
\\nI'm running this on hthor. I'm working on a machine that has 15GB of memory. So, I should have plenty of memory. Both datasets should fit entirely in memory. I want to make changes to my environment.xml file so more memory will be available to my workunit. I have added these 2 parameters to both EclAgentProcess and ThorCluster. But, I'm still getting the error.\\ndefaultMemoryLimit="10000000000"\\ntotalMemoryLimit="10000000000"\\n
\\nAng help would be most appreciated.\", \"post_time\": \"2017-09-29 14:48:18\" },\n\t{ \"post_id\": 19273, \"topic_id\": 4843, \"forum_id\": 8, \"post_subject\": \"Re: JSON Array DataSet\", \"username\": \"househippo\", \"post_text\": \"Tony, above solution works great thank you.\", \"post_time\": \"2017-10-02 17:43:29\" },\n\t{ \"post_id\": 19253, \"topic_id\": 4843, \"forum_id\": 8, \"post_subject\": \"Re: JSON Array DataSet\", \"username\": \"Tony Kirk\", \"post_text\": \"What about this?\\n\\n SET OF STRING days{XPATH('days')};
\", \"post_time\": \"2017-10-02 12:18:52\" },\n\t{ \"post_id\": 19243, \"topic_id\": 4843, \"forum_id\": 8, \"post_subject\": \"JSON Array DataSet\", \"username\": \"househippo\", \"post_text\": \"How do I read "days" below?\\n\\n{\\n"season":"winter",\\n"email":[{"home":"test@test.com"}],\\n"days":["monday","tuesday"]\\n}
\\n\\nschemaEmail := RECORD\\n STRING emails {XPATH('home')};\\nEND;\\nschemaRoot := RECORD\\n STRING season {XPATH('season')};\\n DATASET(schemaEmail) email{XPATH('email')};\\n ???????;\\nEND;\\n\\na := DATASET('~::data.json',schemaRoot);
\\n\\nWhat does the ECL look like for "days"?\", \"post_time\": \"2017-10-02 06:22:10\" },\n\t{ \"post_id\": 19303, \"topic_id\": 4863, \"forum_id\": 8, \"post_subject\": \"Re: Question about the nature of Actions\", \"username\": \"janet.anderson\", \"post_text\": \"Yes, that clarifies. Thank you.\", \"post_time\": \"2017-10-05 13:48:16\" },\n\t{ \"post_id\": 19293, \"topic_id\": 4863, \"forum_id\": 8, \"post_subject\": \"Re: Question about the nature of Actions\", \"username\": \"rtaylor\", \"post_text\": \"Janet,\\n\\nAn Action is simply an expression for which you want to see the result. In your code, the Action is "def". You could have also done this to achieve the same result:myVal := FALSE; \\nIF(myVal,OUTPUT('if'));
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-10-05 12:50:09\" },\n\t{ \"post_id\": 19283, \"topic_id\": 4863, \"forum_id\": 8, \"post_subject\": \"Question about the nature of Actions\", \"username\": \"janet.anderson\", \"post_text\": \"I'm confused about actions. Actions are required to instigate workunits, and each job must have at least one action. But the following code works, even though there isn't an action (the job runs, but there are no results tabs). How is this working?\\n\\n\\nmyVal := FALSE; \\ndef := IF(myVal,OUTPUT('if'));\\ndef;\\n
\", \"post_time\": \"2017-10-03 20:17:38\" },\n\t{ \"post_id\": 19573, \"topic_id\": 4873, \"forum_id\": 8, \"post_subject\": \"Re: Error: CEnvironmentClusterInfo\", \"username\": \"bforeman\", \"post_text\": \"Hi Janet,\\n\\nI asked our THOR developer and this was his reply:\\n\\nHm, I may have seen it before, but don't recollect the problem.\\nI think it's a configuration issue.\\n\\nI would ask her to contact the system administrators to restart the environment.\\n
\\n\\nI would also add that if you only changed the logical file name could there have been a problem with that specific file and could it possibly need a re-spray?\\n\\nIf the above doesn't help and you can consistently reproduce this I would open an issue report in our issue tracker.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2017-10-18 14:25:20\" },\n\t{ \"post_id\": 19313, \"topic_id\": 4873, \"forum_id\": 8, \"post_subject\": \"Error: CEnvironmentClusterInfo\", \"username\": \"janet.anderson\", \"post_text\": \"When I syntax check my code, I get the error message: "Error: CEnvironmentClusterInfo: Thor cluster can not have 0 slave processes (0, 0), 5008, ". What does this error mean? More confusingly, the code being syntax checked is the same as some code that ran perfectly fine except starting with a different logical file (in the same format, in the same location).\", \"post_time\": \"2017-10-06 14:01:56\" },\n\t{ \"post_id\": 19413, \"topic_id\": 4893, \"forum_id\": 8, \"post_subject\": \"Re: Rolling up data into a set field.\", \"username\": \"rtaylor\", \"post_text\": \"BGehalo,\\n\\nYou do not need the SORTs in your JOIN:\\nJoinedData := JOIN(names,phones, \\n (INTEGER)LEFT.DID = (INTEGER)RIGHT.UniqueID, \\n ItXForm(LEFT, RIGHT), LEFT OUTER);
because the internals of JOIN automatically handle that for you.\\n\\nAnd your DEDUP versus my ROLLUP -- both accomplish the same thing, but ROLLUP calls a TRANSFORM function on the duplicate records making it a much more flexible solution than DEDUP. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-10-11 17:36:29\" },\n\t{ \"post_id\": 19383, \"topic_id\": 4893, \"forum_id\": 8, \"post_subject\": \"Re: Rolling up data into a set field.\", \"username\": \"BGehalo\", \"post_text\": \"Playing around with stuff I also got it working by creating the set value during the join and then deduping the result.\\n\\n\\n{RECORDOF (names), SET OF STRING PhoneNums} ItXForm(names L, phones R) := TRANSFORM\\n\\tSELF.PhoneNums := SET(phones((INTEGER)UniqueID = (INTEGER)L.DID), Phone);\\n\\tSELF := L;\\nEND;\\n\\nJoinedData := JOIN(SORT(names, DID), SORT(phones, UniqueID), (INTEGER)LEFT.DID = (INTEGER)RIGHT.UniqueID, ItXForm(LEFT, RIGHT), LEFT OUTER);\\nSortedData := SORT(JoinedData, DID, FName, LName);\\nDEDUP(JoinedData, DID, FName, LName);\\n
\", \"post_time\": \"2017-10-10 20:38:58\" },\n\t{ \"post_id\": 19353, \"topic_id\": 4893, \"forum_id\": 8, \"post_subject\": \"Re: Rolling up data into a set field.\", \"username\": \"BGehalo\", \"post_text\": \"Thanks!\", \"post_time\": \"2017-10-10 19:40:37\" },\n\t{ \"post_id\": 19343, \"topic_id\": 4893, \"forum_id\": 8, \"post_subject\": \"Re: Rolling up data into a set field.\", \"username\": \"rtaylor\", \"post_text\": \"BGehalo,\\n\\nYes, the job can be done more simply. Here's the way I would approach this:J1Rec := RECORD\\n RECORDOF(names); \\n DATASET({phones.Phone}) PhoneNums;\\nEND;\\nJ1Rec J1XF(names L, phones R) := TRANSFORM\\n SELF := L;\\n SELF.PhoneNums := ROW({R.Phone},{phones.Phone});\\nEND;\\nJ1 := JOIN(names,phones,\\n (INTEGER)LEFT.did = (INTEGER)RIGHT.UniqueID,\\n J1XF(LEFT,RIGHT),LEFT OUTER);\\n\\nJ1Rec Roll1XF(J1Rec L, J1Rec R) := TRANSFORM\\n SELF.PhoneNums := L.PhoneNums + R.PhoneNums;\\n SELF := L;\\nEND;\\nROLLUP(SORT(J1,DID),\\n LEFT.DID=RIGHT.DID AND LEFT.Fname=RIGHT.Fname,\\n Roll1XF(LEFT,RIGHT));
Note that I'm creating a nested child dataset (covered in our Advanced ECL Part 1 course online) instead of the SET OF STRING that you had defined. This is a more "typical" ECL construct.\\n\\nBut if you really require a SET OF STRING, then this minor change (two lines, only) accomplishes that:J2Rec := RECORD\\n RECORDOF(names); \\n SET OF STRING PhoneNums; //this changed\\nEND;\\nJ2Rec J2XF(names L, phones R) := TRANSFORM\\n SELF := L;\\n SELF.PhoneNums := [R.Phone]; //and this changed\\nEND;\\nJ2 := JOIN(names,phones,\\n (INTEGER)LEFT.did = (INTEGER)RIGHT.UniqueID,\\n J2XF(LEFT,RIGHT),LEFT OUTER);\\nJ2Rec Roll2XF(J2Rec L, J2Rec R) := TRANSFORM\\n SELF.PhoneNums := L.PhoneNums + R.PhoneNums;\\n SELF := L;\\nEND;\\nROLLUP(SORT(J2,DID),\\n LEFT.DID=RIGHT.DID AND LEFT.Fname=RIGHT.Fname,\\n Roll2XF(LEFT,RIGHT));
\\nOne general note: I see you're defining all your string fields as variable-length STRING. It's better practice, when you know the maximum size of a string field, to explicitly define it at that size. That produces more efficient runtime code. So I'd suggest changing your inline DATASETs to this:\\nnames := DATASET([{'B','Smith','015553'},\\n {'E','Smith','00041254'},\\n {'Br','Smith','015553'},\\n {'rwrr','Smith','342342342342'}], \\n {STRING4 FName, STRING5 LName, STRING12 DID});\\n\\nphonesStruct := {STRING Phone, STRING12 UniqueID};\\nphones := DATASET([{'9541241', '15553'}, \\n {'954444241', '41254'}, \\n {'1111', '15553'},\\n {'2222', '15553'},\\n {'3333', '41254'},\\n {'4444', '41254'}], \\n phonesStruct);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-10-10 18:54:30\" },\n\t{ \"post_id\": 19333, \"topic_id\": 4893, \"forum_id\": 8, \"post_subject\": \"Rolling up data into a set field.\", \"username\": \"BGehalo\", \"post_text\": \"Hello, I'm a new ECL developer and I'm trying to get a good handle on writing ECL efficiently.\\n\\nI recently had something I was working on in which I had to create a set of data and then append that data into a new record structure. I figured out how to do it but I feel there's a better way to do so.\\n\\nRight now I'm projecting a record set of phone numbers into a new structure that combines the phone numbers into a set based on the UniqueID and then Deduping that table, then combining the deduped table with another record set using a join.\\n\\nHere's the proof of concept I came up with.\\n\\n//Some sample data\\nnames := DATASET([{'B','Smith','015553'},\\n{'E','Smith','00041254'},\\n{'Br','Smith','015553'},\\n{'rwrr','Smith','342342342342'}], {STRING FName, STRING LName, STRING DID});\\n\\n\\nphonesStruct := {STRING Phone, STRING UniqueID};\\nphones := DATASET([{'9541241', '15553'}, \\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t{'954444241', '41254'}, \\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t{'1111', '15553'},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t{'2222', '15553'},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t{'3333', '41254'},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t{'4444', '41254'}\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t], \\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tphonesStruct);\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n//Define a structure for a table with the set of phone numbers.\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\nProjectedPhonesStruct := {\\n\\tSTRING UniqueID, \\n\\tSET OF STRING PhoneNums\\n};\\n\\n//Define a function to return a set of phone numbers\\nSET OF STRING PhonesSet(dataset(phonesStruct) InRecs) := FUNCTION\\n\\tRETURN SET(InRecs, phone);\\nEND;\\n\\n//Define a transform to project the phones record into the new structure\\nProjectedPhonesStruct PXForm(Phones L) := TRANSFORM\\n\\tSELF.PhoneNums := PhonesSet(Phones(UniqueID = L.UniqueID));\\n\\tSELF := L;\\nEND;\\n\\n//Project the phones\\nProjectedPhones := PROJECT(Phones, PXForm(LEFT));\\n\\n//Now remove duplicates, seems inefficient here?\\nDDPhonesAsSet := DEDUP(SORT(ProjectedPhones, (INTEGER)UniqueID), UniqueID);\\n\\n//Define a transform to join the ProjectedPhones and Names recsets while omitting the redundant UniqueID field.\\n{names, SET OF STRING PhoneNums} CombinePhoneSetName(names L, ProjectedPhones R) := TRANSFORM\\n\\tSELF.PhoneNums := R.PhoneNums;\\n\\tSELF := L;\\nEND;\\n\\n//And finally JOIN them\\nJOIN(SORT(names, (INTEGER)DID), DDPhonesAsSet, \\n\\t\\t\\t(INTEGER)LEFT.DID = (INTEGER)RIGHT.UniqueID, CombinePhoneSetName(LEFT, RIGHT), LEFT OUTER);\\n
\\n\\nI explicitly converted the UniqueId and DID fields to integers to emulate the real world layout in which they are both strings but one record set has leading zeros. I'm wondering if the INTFORMAT function is more efficient now that I wrote this up.\\n\\nThanks!\", \"post_time\": \"2017-10-10 15:39:30\" },\n\t{ \"post_id\": 19453, \"topic_id\": 4923, \"forum_id\": 8, \"post_subject\": \"Re: Loading a dataset from a Workunit output\", \"username\": \"rtaylor\", \"post_text\": \"Drea,\\n\\nYou might try Template Language to parse the xmlschema and generate the correct ECL. I have no idea if that will/might work, but it's the best thought I can come up with right now. Otherwise ... JIRA? \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-10-12 20:40:20\" },\n\t{ \"post_id\": 19443, \"topic_id\": 4923, \"forum_id\": 8, \"post_subject\": \"Loading a dataset from a Workunit output\", \"username\": \"drealeed\", \"post_text\": \"I'm looking to load the output of a previous workunit into a dataset using the standard\\n\\nDATASET(WORKUNIT('W20171012-143110','test2'), {STRING1 field1});\\n\\nformat. However, I want to be able to load a workunit output into a dataset without necessarily having the record structure for it to hand, instead using the xmlschema that's part of the workunit result as the source for the dataset structure.\\n\\nIs there any standard way to do this?\\n\\nThanks,\\n\\nDrea\", \"post_time\": \"2017-10-12 18:58:28\" },\n\t{ \"post_id\": 19553, \"topic_id\": 4933, \"forum_id\": 8, \"post_subject\": \"Re: Dali result outputs are restricted to a maximum of 2000 \", \"username\": \"john holt\", \"post_text\": \"Tim,\\n You still have the huge SET OF REAL8 objects. These need to be removed.\\n\\n Start with your A and B datasets and do the rollups, BUT, only if all of the positions are present as records. If you are missing any records, you will need to use the PBblas function. The PBblas function does not need to sort the records before constructing the array.\", \"post_time\": \"2017-10-17 17:22:07\" },\n\t{ \"post_id\": 19543, \"topic_id\": 4933, \"forum_id\": 8, \"post_subject\": \"Re: Dali result outputs are restricted to a maximum of 2000 \", \"username\": \"richardkchapman\", \"post_text\": \"You need to get rid of the line\\n\\n#option('outputLimitMb',10000);\\n\\nOr nothing will work\", \"post_time\": \"2017-10-17 16:53:44\" },\n\t{ \"post_id\": 19533, \"topic_id\": 4933, \"forum_id\": 8, \"post_subject\": \"Re: Dali result outputs are restricted to a maximum of 2000 \", \"username\": \"tlhumphrey2\", \"post_text\": \"John,\\n\\nWill the following do the same thing as the group rollup followed by the combine?\\n\\n
#option('hthorMemoryLimit',10000);\\n#option('outputLimitMb',10000);\\nIMPORT STD;\\nIMPORT PBblas;\\nIMPORT PBblas.Types as Types;\\nLayout_Cell := Types.Layout_Cell;\\n\\nA := DATASET('~hthor::tlh::AMatrix_PBblas',Layout_Cell,THOR);\\nB := DATASET('~hthor::tlh::BMatrix_PBblas',Layout_Cell,THOR);\\n\\nSET OF REAL8 A_Set := SET(SORT(A,y,x), v);\\nSET OF REAL8 B_Set := SET(SORT(B,y,x), v);\\n\\naMatrixRec := RECORD\\n SET OF REAL8 mat;\\nEND;\\n\\nA_DS := DATASET([{A_Set}], aMatrixRec);\\nB_DS := DATASET([{B_Set}], aMatrixRec);\\n\\naMatrixRec matrixMultiply(aMatrixRec A, aMatrixRec B) := TRANSFORM\\n SELF.mat := STD.BLAS.dgemm(FALSE, FALSE, 11410, 11410, 40000, 1.0, A.mat, B.mat);\\nEND;\\n\\nC_DS := COMBINE(A_DS, B_DS,matrixMultiply(LEFT,RIGHT));\\n\\n//OUTPUT(COUNT(C_DS));\\nOUTPUT(C_DS,,'tlh::CMatrix_PBblas',OVERWRITE);\\n
\\n\\nTim\", \"post_time\": \"2017-10-17 16:48:46\" },\n\t{ \"post_id\": 19493, \"topic_id\": 4933, \"forum_id\": 8, \"post_subject\": \"Re: Dali result outputs are restricted to a maximum of 2000 \", \"username\": \"john holt\", \"post_text\": \"I would try a group rollup on each of the two datasets creating a singe record per dataset of {SET OF REAL8 mat} \\n\\nGiven the 2 new record sets, I would use COMBINE to bring the two records together in the same transform, and then call the BLAS routine in the transform for the COMBINE(...) function.\", \"post_time\": \"2017-10-16 12:11:59\" },\n\t{ \"post_id\": 19483, \"topic_id\": 4933, \"forum_id\": 8, \"post_subject\": \"Re: Dali result outputs are restricted to a maximum of 2000 \", \"username\": \"tlhumphrey2\", \"post_text\": \"I forgot to mention that I replaced the OUTPUT(COUNT with the OUTPUT to a file that I have commented out and got the same error message.\", \"post_time\": \"2017-10-13 19:58:57\" },\n\t{ \"post_id\": 19473, \"topic_id\": 4933, \"forum_id\": 8, \"post_subject\": \"Dali result outputs are restricted to a maximum of 2000 MB\", \"username\": \"tlhumphrey2\", \"post_text\": \"Below is my ecl code. Here is the full error message I'm getting. Error: System error: 0: Dali result outputs are restricted to a maximum of 2000 MB, the current limit is 10 MB. A huge dali result usually indicates the ECL needs altering.
\\n\\nThe 2 datasets, A3 and B3, are large. Basically these are 2 matrices. The dimensions of the first one is 11410 x 40000 and the second one's dimensions are 40000 x 11310. I'm calling STD.BLAS.dgemm to do a matrix multiply.\\n\\nAny one know how I can get around this error?\\n\\n#option('hthorMemoryLimit',10000);\\n#option('outputLimitMb',10000);\\nIMPORT STD;\\nIMPORT PBblas;\\nIMPORT PBblas.Types as Types;\\nLayout_Cell := Types.Layout_Cell;\\n\\nA3 := DATASET('~hthor::tlh::AMatrix_PBblas',Layout_Cell,THOR);\\nB3 := DATASET('~hthor::tlh::BMatrix_PBblas',Layout_Cell,THOR);\\n\\nA3_Set := SET(A3, v);\\nB3_Set := SET(B3, v);\\nC := STD.BLAS.dgemm(FALSE, FALSE, 11410, 11410, 40000, 1.0, A3_Set, B3_Set);\\nOUTPUT(COUNT(C));\\n//OUTPUT(DATASET([C],{REAL8 v}),,'tlh::CMatrix_PBblas',OVERWRITE);
\", \"post_time\": \"2017-10-13 19:07:31\" },\n\t{ \"post_id\": 19683, \"topic_id\": 4943, \"forum_id\": 8, \"post_subject\": \"Re: Appending datasets with other datasets while adding a fi\", \"username\": \"BGehalo\", \"post_text\": \"Thanks Richard,\\n\\nI assume it's this JIRA:\\nhttps://track.hpccsystems.com/secure/Dashboard.jspa\\n\\nI just made a bug report:\\nhttps://track.hpccsystems.com/browse/HPCC-18579\", \"post_time\": \"2017-10-20 19:11:05\" },\n\t{ \"post_id\": 19673, \"topic_id\": 4943, \"forum_id\": 8, \"post_subject\": \"Re: Appending datasets with other datasets while adding a fi\", \"username\": \"rtaylor\", \"post_text\": \"BGehalo,\\n\\nYour first issue is, since your TestMac MACRO wants a string (and you're generating in just the characters), you need to get single quotes into your generated MACRO call surrounding the set value to pass, like this: \\n#APPEND(code,'TestMac(\\\\'' + vars[%idx%] + '\\\\');\\\\n');
And I always prefer generating code that could be readable, so I also added the \\\\n to the end.\\n\\nNext, you only need to do this to see the generated code:RETURN %'code'%;
Note the addition of the single quotes inside the percent signs. That changes it from generating the code itself, to a STRING showing the generated code.\\n\\nAnd finally, here's all my test code. I generated the STRING of code then ran them (commented out) to make sure the TestMac MACRO works as expected. It does.\\nTestMac(testStr) := MACRO\\n OUTPUT(testStr);\\nENDMACRO;\\n\\nCodeGeneration(Vars) := FUNCTIONMACRO\\n len := COUNT(vars);\\n #DECLARE(idx);\\n #DECLARE(code);\\n #SET(idx, 1);\\n #SET(code, '');\\n #LOOP\\n #IF (%idx% > len) #BREAK\\n #ELSE\\n #APPEND(code,'TestMac(\\\\'' + vars[%idx%] + '\\\\');\\\\n');\\n #SET(idx, %idx%+1);\\n #END\\n #END\\n // RETURN %'code'%;\\n RETURN %code%;\\nENDMACRO;\\n\\nCodeGeneration(['a','b','c']);\\n// TestMac('a');\\n// TestMac('b');\\n// TestMac('c');\\n
But using the FUNCTIONMACRO to generate the MACRO calls fails, making this a prime candidate for a JIRA ticket. Please submit that so the developers can have a look at this anomaly.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-10-20 18:31:32\" },\n\t{ \"post_id\": 19663, \"topic_id\": 4943, \"forum_id\": 8, \"post_subject\": \"Re: Appending datasets with other datasets while adding a fi\", \"username\": \"BGehalo\", \"post_text\": \"Okay I got it to work by changing it to a macro. If I understand how macros work it executes the string saved to %code% as ECL code when I call it at the bottom of the macro.\\n\\n\\nSHARED TestMac(outVar, idx) := MACRO\\n\\toutVar := idx;\\nENDMACRO;\\n\\nCodeGeneration(Vars) := MACRO\\n\\tlen := COUNT(vars);\\n\\t#DECLARE(idx);\\n\\t#DECLARE(code);\\n\\t#SET(idx, 1);\\n\\t#SET(code, '');\\n\\t#LOOP\\n\\t\\t#IF (%idx% > len) #BREAK\\n\\t\\t#ELSE\\n\\t\\t\\t#APPEND(code,'TestMac('+vars[%idx%]+',\\\\''+%idx%+'\\\\');');\\n\\t\\t\\t#SET(idx, %idx%+1);\\n\\t\\t #END\\n\\t#END\\n\\t%code%\\nENDMACRO;\\n\\nCodeGeneration(['a','b','c']);\\na; b; c;\\n
\", \"post_time\": \"2017-10-20 18:27:20\" },\n\t{ \"post_id\": 19653, \"topic_id\": 4943, \"forum_id\": 8, \"post_subject\": \"Re: Appending datasets with other datasets while adding a fi\", \"username\": \"BGehalo\", \"post_text\": \"Thanks, I went and revisited some of the videos but I'm still having a bit of trouble getting it to work.\\n\\nI've been playing around on a local vm trying to emulate what I need, I think something strange is happening where it's using double quotes when it's calling a macro from within a function macro.\\n\\n\\nSHARED TestMac(testStr, idx) := MACRO\\n\\ttestStr := idx;\\nENDMACRO;\\n\\nCodeGeneration(Vars) := FUNCTIONMACRO\\n\\tlen := COUNT(vars);\\n\\t#DECLARE(idx);\\n\\t#DECLARE(code);\\n\\t#SET(idx, 1);\\n\\t#SET(code, '');\\n\\t#LOOP\\n\\t\\t#IF (%idx% > len) #BREAK\\n\\t\\t#ELSE\\n\\t\\t\\t#APPEND(code,'TestMac('+vars[%idx%]+',\\\\''+%idx%+'\\\\');');\\n\\t\\t\\t#SET(idx, %idx%+1);\\n\\t\\t #END\\n\\t#END\\n\\tReturn %code%;\\nENDMACRO;\\n\\nCodeGeneration(['a','b', 'c']);\\na; b; c;\\n
\\n\\nThis gives me a syntax error which references the test macro above.\\nError: syntax error near " (1, 7), 3002, \\n\\nIt must be putting double quotes in there or something strange.\\n\\nIf I wrap the return %code% in a #TEXT declaration I can see my results as string:\\nTestMac( a, '1') ; TestMac( b, '2') ; TestMac( c, '3') ;\\n
\\n\\nStrange that it's adding spaces in there. If I run the above generated code it works, I can then output a;b;c; and it's as expected.\", \"post_time\": \"2017-10-20 15:38:08\" },\n\t{ \"post_id\": 19633, \"topic_id\": 4943, \"forum_id\": 8, \"post_subject\": \"Re: Appending datasets with other datasets while adding a fi\", \"username\": \"rtaylor\", \"post_text\": \"BGehalo,I'm wondering if it would be better to contain the code generation inside ECL and am not sure where to start.
ECL's Template Language would be my suggestion.\\n\\nSo a good place to start would be our online training course: Applied ECL - ECL Code Generation Tools (available here: https://hpccsystems.com/training). \\n\\nThis course covers all the ECL code generation tools: MACROs, FUNCTIONMACROs, and the Template Language.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-10-20 13:58:21\" },\n\t{ \"post_id\": 19613, \"topic_id\": 4943, \"forum_id\": 8, \"post_subject\": \"Appending datasets with other datasets while adding a field.\", \"username\": \"BGehalo\", \"post_text\": \"So I had a situation today in which I had 16 different datasets. They all have the same output record structure so I figured it would look nicer if they were output in a single table instead of separate ones. \\n\\nThe issue was I needed to differentiate them by the macros they came from, so I figured I'd add a new column for this. I got it to work but it's a very brute force method and I feel like there's probably some way to do it using code generation functions in ECL. \\n\\nHere's how I have it now:\\n\\nnewLayout := {\\n\\tSTRING ResultType,\\n\\tdoxie.layout_best\\n};\\n\\nnewLayout AddResultType(doxie.layout_best L, STRING ResultType) := TRANSFORM\\n\\tSELF.ResultType := ResultType;\\n\\tSELF := L;\\nEND;\\n\\n//These macros assign a dataset in doxie.layout_best structure to the first param.\\ndoxie.MAC_fetch_watchdog(results_glb,did_stream,LexID,Watchdog.Key_Watchdog_glb);\\ndoxie.MAC_fetch_watchdog(results_glb_nonutil,did_stream,LexID,Watchdog.Key_Watchdog_glb_nonutil);\\ndoxie.MAC_fetch_watchdog(results_glb_nonutil_nonblank,did_stream,LexID,Watchdog.Key_Watchdog_glb_nonutil_nonblank);\\n//etc...\\n\\n//Project the results to our new structure and add the correct resulttype string.\\n//And append them all to each other using the + operator.\\n\\tFormattedResults :=\\n\\tPROJECT(results_glb, AddResultType(LEFT, 'glb')) +\\n\\tPROJECT(results_glb_nonutil, AddResultType(LEFT, 'glb_nonutil')) +\\n\\tPROJECT(results_glb_nonutil_nonblank, AddResultType(LEFT, 'glb_nonutil_nonblank')) +\\n//etc...\\n
\\n\\nSo that works and it wasn't too hard to generate the code using regular expressions in another tool but I'm wondering if it would be better to contain the code generation inside ECL and am not sure where to start.\\n\\nMy gut says it's somewhere with a for loop iterating over a set of variable names and substituting them where appropriate but I could use a little nudge in the right direction.\", \"post_time\": \"2017-10-19 20:40:10\" },\n\t{ \"post_id\": 19713, \"topic_id\": 4953, \"forum_id\": 8, \"post_subject\": \"Re: Macro syntax check oddities\", \"username\": \"NP\", \"post_text\": \"Thanks Richard. Raised: https://track.hpccsystems.com/browse/HPCC-18600\", \"post_time\": \"2017-10-24 16:58:10\" },\n\t{ \"post_id\": 19643, \"topic_id\": 4953, \"forum_id\": 8, \"post_subject\": \"Re: Macro syntax check oddities\", \"username\": \"rtaylor\", \"post_text\": \"Nemanja,\\n\\nGood job!! This is a perfect bug report. Now you just need to copy/paste it all into a JIRA ticket for the developers to track and fix. \\n\\nRichard\", \"post_time\": \"2017-10-20 14:52:09\" },\n\t{ \"post_id\": 19623, \"topic_id\": 4953, \"forum_id\": 8, \"post_subject\": \"Macro syntax check oddities\", \"username\": \"NP\", \"post_text\": \"Hi,\\n\\nWe recently upgraded from 6.0.2. to 6.4.2 and noticed that one of the changes is that macros are now syntax checked. We realized because we have a script that runs a syntax check on every file in the folder, and it started failing when syntax checking macros. I am, however, unsure what the correct approach should be and also there is some odd behavior. Here is an example:\\n\\nOpen a new builder, write \\n
\\nEXPORT A := MACRO\\n x := Files.LoadFile();\\nENDMACRO;\\n
\\n\\nand hit F7 -> No Errors...\\n\\nNow save the file as A.ecl, hit F7 -> Error 'Unknown identifier “Files”'. I find this odd, because the fact that it is saved or not should not affect the syntax check?\\n\\nThen, change the code by adding a random parameter:\\n\\n\\nEXPORT A(b) := MACRO\\n x := Files.LoadFile();\\nENDMACRO;\\n
\\n\\nF7 -> No Errors... - This is also odd, as the parameter does not change anything, but causes it to pass syntax check.\\n\\nFinally, changing the code to \\n\\n\\nEXPORT A := MODULE\\n EXPORT A1 := MACRO\\n x := Files.LoadFile();\\n sdfgdsfdsaf THIS SHOULD REALLY NOT PASS!!!!\\n ENDMACRO;\\nEND;\\n
\\n\\nF7 -> No Errors... So, just wrapping it in a module makes the syntax check pass (even if I add some nonsense).\\n\\nSo, what is the recommended approach here? If I leave the macros as they were (with no parameters for instance), they fail the syntax check even though they are fine at the point of extraction and work fine. If I add any(!) parameter or just wrap them in a module, then they pass the syntax check even if they are completely wrong (which kind of beats the point of syntax check)? What am I missing here?\\n\\nThanks\\n\\nNemanja\", \"post_time\": \"2017-10-20 10:07:36\" },\n\t{ \"post_id\": 19693, \"topic_id\": 4963, \"forum_id\": 8, \"post_subject\": \"IMPORT Java\", \"username\": \"ravishankar\", \"post_text\": \"Team, \\n\\nI am Trying to register a new Java User defined function in /opt/HPCCSystems/classes \\nand access the same in the IMPORT java ECL Code. It couldn't run since the error 'Type mismatch on result'\\n\\nBelow is my code. Could you please let me know what am I missing here. \\n\\nIMPORT java;\\nSTRING jadd(integer a, integer b) := IMPORT(java, 'JavaAdd.add:(II)I'\\n: classpath('/opt/HPCCSystems/classes/'));\\njadd(1,2);\\n\\n\\nThis signature , I took from the below stuff\\n\\nhpccdemo@HPCCSystemsVM-amd64-6:/opt/HPCCSystems/classes$ javap -s JavaAdd\\nCompiled from "JavaAdd.java"\\npublic class JavaAdd {\\npublic JavaAdd();\\nSignature: ()V\\n\\npublic static int add(int, int);\\nSignature: (II)I\\n}\\n\\n\\nError: System error: 0: javaembed: Type mismatch on result (0, 0), 0,\", \"post_time\": \"2017-10-23 19:48:26\" },\n\t{ \"post_id\": 31753, \"topic_id\": 5003, \"forum_id\": 8, \"post_subject\": \"Re: Converting a string into a field reference for a macro\", \"username\": \"RuchiM\", \"post_text\": \"Thanks Richard\", \"post_time\": \"2020-08-10 14:00:21\" },\n\t{ \"post_id\": 31723, \"topic_id\": 5003, \"forum_id\": 8, \"post_subject\": \"Re: Converting a string into a field reference for a macro\", \"username\": \"rtaylor\", \"post_text\": \"Ruchika,\\n\\nI have attached the code file I sent to Vannel to this response. This is some fairly complex code that I wrote in 2016 to generate "garbage" test data. It will not compile today without a massive re-write, because many changes have been made to the Machine Learning Library (which it uses extensively) since then, but it does demonstrate how I solved the problem you currently have.\\n\\nThe attached ZIP file contains a single text file: DataGen.mod\\n\\nYou just open that .mod file using the ECL IDE, then a new DataGen folder will be created in your local repository containing all the definition files that comprise this example (this will not work in VS Code). \\n\\nI suggest you start with the __ReadMe.ecl file \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-08-10 12:36:59\" },\n\t{ \"post_id\": 31683, \"topic_id\": 5003, \"forum_id\": 8, \"post_subject\": \"Re: Converting a string into a field reference for a macro\", \"username\": \"RuchiM\", \"post_text\": \"Hi Vannel,\\n\\nCan you share the code which Richard sent to you as I am also looking for the solution for the same problem. \\n\\nThanks,\\nRuchika\", \"post_time\": \"2020-08-07 17:18:33\" },\n\t{ \"post_id\": 31093, \"topic_id\": 5003, \"forum_id\": 8, \"post_subject\": \"Re: Converting a string into a field reference for a macro\", \"username\": \"rtaylor\", \"post_text\": \"Vannel,\\n\\nExtensive example code sent in PM.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-06-08 11:49:38\" },\n\t{ \"post_id\": 31083, \"topic_id\": 5003, \"forum_id\": 8, \"post_subject\": \"Re: Converting a string into a field reference for a macro\", \"username\": \"vzeufack\", \"post_text\": \"Hi @Rtaylor,\\n\\nCan you please share your code with me?\\n\\nI need to understand the proper way of handling this problem in ECL:\\n\\n
\\nBasically, I have a situation where I am in a function, have a string value, and have to get it into a macro as a field value.\\n
\\n\\nBest regards,\\n\\nVannel,\", \"post_time\": \"2020-06-05 21:38:40\" },\n\t{ \"post_id\": 19793, \"topic_id\": 5003, \"forum_id\": 8, \"post_subject\": \"Re: Converting a string into a field reference for a macro\", \"username\": \"rtaylor\", \"post_text\": \"Drea, Am I hosed?
I think so, yes. Or is there some clever trick to manage this?
The only way I've ever managed anything like this is to turn it into a two-step manual process:
PM me if you want to see the actual code that does all that. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-10-27 20:43:52\" },\n\t{ \"post_id\": 19783, \"topic_id\": 5003, \"forum_id\": 8, \"post_subject\": \"Converting a string into a field reference for a macro\", \"username\": \"drealeed\", \"post_text\": \"I need to create a function that can take input parameters, pass them to any given function or functionmacro, shape the output of the functionmacro and return it.\\n\\nI have no control over the functions & macros I'm passing data to, but do have full control over the content of the function.\\n\\nMy problem is this: if the macro I'm passing data to has a "field" type input, I need a way to convert the "string" parameter passed into the function by a user to a field to pass into a macro. Usually I could use #EXPAND() to do this, but because this is a function, a "constant value required" error is returned.\\n\\nI tried adding in a MAP to the function, e.g. fieldtouse:=MAP(val='xxx'=>dsin.field1, val='xx2'=>dsin.field2) to get a concrete field to pass into the macro. Unfortuntely if the fields coming back out of the map are of different datatypes this fails.\\n\\nBasically, I have a situation where I am in a function, have a string value, and have to get it into a macro as a field value. I can't change the fact that I'm in a function, that the input parameter is a string, or the content of the macro. Am I hosed? Or is there some clever trick to manage this?\", \"post_time\": \"2017-10-27 19:53:08\" },\n\t{ \"post_id\": 19823, \"topic_id\": 5013, \"forum_id\": 8, \"post_subject\": \"Re: ECLPlus not returning valid XML with Assert\", \"username\": \"oscar.foley\", \"post_text\": \"Bug submitted: https://track.hpccsystems.com/browse/HPCC-18646\\n\\nCheers,\\nOscar\", \"post_time\": \"2017-11-01 16:44:17\" },\n\t{ \"post_id\": 19813, \"topic_id\": 5013, \"forum_id\": 8, \"post_subject\": \"Re: ECLPlus not returning valid XML with Assert\", \"username\": \"rtaylor\", \"post_text\": \"Oscar,\\n\\nExcellent bug report! Now you can easily copy/paste your post into a JIRA ticket (https://track.hpccsystems.com) so the developers are absolutely aware of the issue and can monitor/track its progress through the system.
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-10-31 13:07:40\" },\n\t{ \"post_id\": 19803, \"topic_id\": 5013, \"forum_id\": 8, \"post_subject\": \"ECLPlus not returning valid XML with Assert\", \"username\": \"oscar.foley\", \"post_text\": \"ECLPlus not returning valid XML with Assert\\n\\nHPCC version 5.6.4, client tools 5.6.4... (Also with version 6.4.2)\\nI run this ECL:\\n
\\nOUTPUT('Test 1 <>&');\\n\\nASSERT(1 = 2, 'Test 2 <>&');
\\n\\nIf I run code with client tools ecl run, XML is ok.\\n/opt/HPCCSystems/5.6.4/clienttools/bin/ecl run --server 1.1.1.1 --port 8010 --username 'oscarfoley' --password ' ' -legacy thor testecl.ecl\\n\\n<Result>\\n <Exception><Code>100000</Code><Filename>testecl.ecl</Filename><Line>3</Line><Source>user</Source><Message>Test 2 <>&</Message></Exception>\\n<Dataset name='Result 1'>\\n <Row><Result_1>Test 1 <>&</Result_1></Row>\\n</Dataset>\\n</Result>\\n
\\n\\nBut if I get the very same work unit result with ecl plus, XML is invalid \\neclplus server=10.53.57.69 action=view wuid=W20171030-184629 format=xml\\n
\\n<Error><source>user</source><line>3</line><code>100000</code><message>Test 2 <>&</message></Error>\\n<Dataset name='Result 1'>\\n <Row><Result_1>Test 1 <>&</Result_1></Row>\\n</Dataset>\\n
\\n\\nXML retrieved by ECLPlus is invalid because:\\n1) ASSERT message <message>Test 2 <>&</message> does not escape XML special characters so XML is invalid. However OUTPUT Test1 is correctly escaped.\\n2) XML does not have root element. Everything should be surrounded by <Result></Result>\\n3) XML returned by ecl run and eclplus have different casing for ASSERT. XML is Case sensitive...\\n4) XML returned by ecl run and eclplus is different for ASSERT. They should be identical, shouldn't them? One uses <Exception> and the other <Error>\\n\\n<Exception><Code>100000</Code><Filename>testecl.ecl</Filename><Line>3</Line><Source>user</Source><Message>Test 2 <>&</Message></Exception>\\n\\nvs.\\n\\n<Error><source>user</source><line>3</line><code>100000</code><message>Test 2 <>&</message></Error>\\n
\", \"post_time\": \"2017-10-31 11:33:03\" },\n\t{ \"post_id\": 19853, \"topic_id\": 5023, \"forum_id\": 8, \"post_subject\": \"Re: Using relative import for multiple container levels\", \"username\": \"NP\", \"post_text\": \"Thanks Richard, will do.\", \"post_time\": \"2017-11-03 11:03:21\" },\n\t{ \"post_id\": 19843, \"topic_id\": 5023, \"forum_id\": 8, \"post_subject\": \"Re: Using relative import for multiple container levels\", \"username\": \"rtaylor\", \"post_text\": \"Nemanja,\\n\\nMy testing makes it look like a bug to me. \\n\\nReported: https://track.hpccsystems.com/browse/HPCC-18657\\n\\nAdd yourself as a Watcher on this issue to track its progress.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-11-02 20:23:21\" },\n\t{ \"post_id\": 19833, \"topic_id\": 5023, \"forum_id\": 8, \"post_subject\": \"Using relative import for multiple container levels\", \"username\": \"NP\", \"post_text\": \"Hi,\\n\\nShould \\n\\nIMPORT $.^.^.^ AS M1;
\\n\\nwork? It compiled in older versions, but it seems 6.4.2-rc3 does not allow this.\\n\\nThe error is\\n\\nError: Cannot access container for Object 'SOME_FOLDER_NAME' (1, 11), 2394
\\n\\nHas a limit on the depth been set, or anything like that?\\n\\nThanks,\\n\\nNemanja\", \"post_time\": \"2017-11-02 18:07:47\" },\n\t{ \"post_id\": 19973, \"topic_id\": 5063, \"forum_id\": 8, \"post_subject\": \"Re: Spraying a txt file?\", \"username\": \"rtaylor\", \"post_text\": \"Katy,What if I wanted tab delimiters for this text file. Is it possible to still do that?
Yes, absolutely. Just change from the default to \\\\t for tab delimiters.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-11-14 15:29:33\" },\n\t{ \"post_id\": 19963, \"topic_id\": 5063, \"forum_id\": 8, \"post_subject\": \"Re: Spraying a txt file?\", \"username\": \"KatyChow\", \"post_text\": \"Hi Richard,\\n\\nWhat if I wanted tab delimiters for this text file. Is it possible to still do that?\\n\\nThanks!\\n\\nKaty\", \"post_time\": \"2017-11-14 14:42:34\" },\n\t{ \"post_id\": 19953, \"topic_id\": 5063, \"forum_id\": 8, \"post_subject\": \"Re: Spraying a txt file?\", \"username\": \"rtaylor\", \"post_text\": \"Katy,\\n\\nSpray Delimited can spray any variable-length file format that has a record delimiter. Most text files have either CR/LF or NewLine delimiters, and these are the default record delimiters for Spray Delimited. So a text file can be sprayed the same way as a CSV file, because in effect, a text file is just a CSV file that happens to only contain a single field per record. \\n\\nSo Spray Delimited the text file, making sure to check the Omit Separator box to make it absolutely clear that it's just a variable-length record file with record delimiters but no field separators. Then you define the sprayed file with a single variable-length STRING field and you're GTG.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-11-14 09:59:56\" },\n\t{ \"post_id\": 19943, \"topic_id\": 5063, \"forum_id\": 8, \"post_subject\": \"Spraying a txt file?\", \"username\": \"KatyChow\", \"post_text\": \"Hi there!\\n\\nI am trying to spray some text files and was wondering if that is something HPCC supports. I always have CSV's that I spray on the thors, so I am not sure how this works.\\n\\nThanks!\\n\\nKaty\", \"post_time\": \"2017-11-13 20:21:44\" },\n\t{ \"post_id\": 20183, \"topic_id\": 5093, \"forum_id\": 8, \"post_subject\": \"Re: System error: 28\", \"username\": \"RoqScheer\", \"post_text\": \"I am seeing this error occasionally, too. If this error code 28 is being returned by the operating system during a write call, it means "no space left on device".\", \"post_time\": \"2017-12-05 11:51:52\" },\n\t{ \"post_id\": 20143, \"topic_id\": 5093, \"forum_id\": 8, \"post_subject\": \"Re: System error: 28\", \"username\": \"bforeman\", \"post_text\": \"Hi Katy,\\n\\nDid you figure this out yet? Could it be that you did not have rights to access that cluster?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2017-12-01 16:08:56\" },\n\t{ \"post_id\": 20043, \"topic_id\": 5093, \"forum_id\": 8, \"post_subject\": \"System error: 28\", \"username\": \"KatyChow\", \"post_text\": \"Hi!\\n\\nI am trying to run a simple output and keep getting this error. \\nError: System error: 28: copyFile target=/var/lib/HPCCSystems/dllserver/temp/libW20171121-164857.so__CsFAD8cDAQD-WBRa7I4BAAAA.tmp source=//10.193.64.19:7100/mnt/disk1/var/lib/HPCCSystems/eclserver_dev_2/libW20171121-164857.so; read/write failure (28): (0, 0), 28, \\n\\nCode here hmlr_rec := RECORD\\n string2 src;\\n string4 src_sub;\\n string64 id;\\n unsigned8 _value;\\n string20 date;\\n string10 postcode_original;\\n string8 postcode;\\n string1 propertytype;\\n string1 newprop;\\n string1 tenure;\\n string64 address1;\\n string32 address2;\\n string64 address3;\\n string64 address4;\\n string64 address5;\\n string4 ppd_category_type;\\n string4 flag;\\n string10 row_status;\\n string25 last_updated;\\n unsigned4 file_created_date;\\n unsigned4 urn_new;\\n unsigned8 row_id;\\n unsigned8 unique_hash;\\n unsigned6 addr_id;\\n string128 org_name;\\n string64 building_name;\\n string64 building_number;\\n string64 dept_street;\\n string10 dept_street_type;\\n string64 street;\\n string10 street_type;\\n string64 dept_locality;\\n string64 locality;\\n string64 town;\\n string20 unit_desig;\\n string10 unit_number;\\n string3 post_area;\\n string2 post_district;\\n string2 post_sector;\\n string2 post_unit;\\n string150 unmatched_address;\\n string2 dps;\\n string5 pre_direction;\\n string5 post_direction;\\n string25 region;\\n real8 latitude;\\n real8 longitude;\\n string25 country;\\n string5 error_code;\\n END;\\n\\n\\nFileName := '~tracesmart::base::hmlr::hmlr::prod';\\nFile := dataset(filename,hmlr_rec,thor);\\nOUTPUT(File,named('file'));\\n
\\n\\n\\nWhat could be my problem?\\n\\nThanks!\\n\\nKaty\", \"post_time\": \"2017-11-21 16:51:13\" },\n\t{ \"post_id\": 22723, \"topic_id\": 5103, \"forum_id\": 8, \"post_subject\": \"Re: Load XML Inline into a dataset?\", \"username\": \"abittandan\", \"post_text\": \"After playing around with it, I finally got around the "trailing xml" error mentioned above and was able to do this via FROMXML with a batch as follows - \\n\\naddrRec := RECORD\\n string20 acctno{xpath('acctno')};\\n\\tstring100 addr{xpath('addr')};\\n\\tstring25 p_city_name{xpath('p_city_name')};\\nEND;\\n\\nfinalRec := RECORD\\n DATASET(addrRec) ds{xpath('/row')};\\nEND;\\n\\nx := '<Batch_in>'\\n\\t\\t\\t\\t+'<row>'\\n\\t\\t\\t\\t+'\\t<acctno>12</acctno>'\\n\\t\\t\\t\\t+'\\t<addr>123 PAPAI ST</addr>'\\n\\t\\t\\t\\t+'\\t<p_city_name>HONOLULU</p_city_name>'\\n\\t\\t\\t\\t+'</row>'\\n\\t\\t\\t\\t+'<row>'\\n\\t\\t\\t\\t+'\\t<acctno>26</acctno>'\\n\\t\\t\\t\\t+'\\t<addr>45 5TH ST</addr>'\\n\\t\\t\\t\\t+'\\t<p_city_name>Honewyll</p_city_name>'\\n\\t\\t\\t\\t+'</row>'\\n +'</Batch_in>';\\n\\t\\t\\nFROMXML(finalRec,x).ds;
\\n\\nthe only catch is that you need to extract the nested inner ds which is why it is more efficient to use PARSE for batches.\", \"post_time\": \"2018-08-22 14:07:15\" },\n\t{ \"post_id\": 22693, \"topic_id\": 5103, \"forum_id\": 8, \"post_subject\": \"Re: Load XML Inline into a dataset?\", \"username\": \"abittandan\", \"post_text\": \"The suggestion from R Taylor only solves it for a single xml record (I was not able to make it work for a batch). I kept getting the following error - \\nError: System error: 2: Error - syntax error "Trailing xml after close of root tag"\\n\\nI figured out a way to translate a batch xml file to a dataset simply via PARSE - \\nin_xml:=dataset([{\\n '<row>'\\n\\t\\t+'\\t<acctno>12</acctno>'\\n\\t\\t+'\\t<addr>123 PAPAI ST</addr>'\\n\\t\\t+'\\t<p_city_name>HONOLULU</p_city_name>'\\n\\t\\t+'</row>'\\n\\t\\t+'<row>'\\n\\t\\t+'\\t<acctno>26</acctno>'\\n\\t\\t+'\\t<addr>45 5TH ST</addr>'\\n\\t\\t+'\\t<p_city_name>Honewyll</p_city_name>'\\n\\t\\t+'</row>'\\n}],{string xml});\\n\\nout_rec := record\\n string20 acctno;\\n\\tstring100 addr;\\n\\tstring25 p_city_name;\\nend;\\n\\nout_rec tran := \\n\\tTRANSFORM\\n\\t\\t SELF.acctno := XMLTEXT('acctno');\\n\\t\\t SELF.addr := XMLTEXT('addr');\\n\\t\\t SELF.p_city_name := XMLTEXT('p_city_name');\\n\\tEND;\\n\\nPARSE(in_xml,xml,tran,XML('row'));
\", \"post_time\": \"2018-08-21 19:05:48\" },\n\t{ \"post_id\": 22043, \"topic_id\": 5103, \"forum_id\": 8, \"post_subject\": \"Re: Load XML Inline into a dataset?\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nOK, your original code DOES work if I comment out this line:+'<names>'
\\n\\n\\n\\nRichard\", \"post_time\": \"2018-05-23 13:33:33\" },\n\t{ \"post_id\": 22033, \"topic_id\": 5103, \"forum_id\": 8, \"post_subject\": \"Re: Load XML Inline into a dataset?\", \"username\": \"Allan\", \"post_text\": \"Hum,\\n\\nI don't get any errors from my original code, it all runs ok and returns expected results..\\n I'm using client 6.2.0 and server is 6.2.26\\n\\nYours\\nAllan\", \"post_time\": \"2018-05-22 10:47:20\" },\n\t{ \"post_id\": 22013, \"topic_id\": 5103, \"forum_id\": 8, \"post_subject\": \"Re: Load XML Inline into a dataset?\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nI tried your code and got some errors so I took the liberty of making a couple of minor (but important) modifications to your code
\\n
NameRec := RECORD\\n STRING Firstname{xpath('fname')};\\n STRING Lastname{xpath('surname')};\\nEND;\\n\\nnamesRec := RECORD\\n UNSIGNED4 EmployeeID{xpath('NI')};\\n DATASET(NameRec) names{xpath('names/name')}; //note the xpath\\nEND;\\n\\nPeopleRec := RECORD\\n DATASET(NamesRec) People{xpath('/People')};\\nEND;\\n\\nx := '<Row>'\\n+'<People>'\\n+'<NI>1234567</NI>'\\n+'<names>' //you had this one, so I added the ones below\\n+'<name><fname>Allan</fname><surname>Wrobel</surname></name>'\\n+'<name><fname>Anna</fname><surname>Smith</surname></name>'\\n+'<name><fname>Nina</fname><surname>Harrison</surname></name>'\\n+'</names>' //added\\n+'</People>'\\n+'<People>'\\n+'<NI>98765</NI>'\\n+'<names>' //added\\n+'<name><fname>Colin</fname><surname>Harison</surname></name>'\\n+'<name><fname>James</fname><surname>Wilson</surname></name>'\\n+'<name><fname>Nick</fname><surname>Pine</surname></name>'\\n+'</names>' //added\\n+'</People>'\\n+'</Row>';\\nrec := FROMXML(PeopleRec,x);\\n\\nOUTPUT(rec); //added\\n\\t/* <NI>1234567</NI>\\n\\t<names><name><fname>Allan</fname><surname>Wrobel</surname>\\n\\t</name><name><fname>Anna</fname><surname>Smith</surname></name>\\n\\t<name><fname>Nina</fname><surname>Harrison</surname></name></names>\\n\\t<NI>98765</NI>\\n\\t<names><name><fname>Colin</fname><surname>Harison</surname>\\n\\t</name><name><fname>James</fname><surname>Wilson</surname>\\n\\t</name><name><fname>Nick</fname><surname>Pine</surname></name></names> */\\n\\nOUTPUT(rec.People);\\n\\np2 := rec.people[2];\\np2.EmployeeID; //98765\\np2.names[3].Firstname; //Nick
\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-05-21 17:22:11\" },\n\t{ \"post_id\": 21983, \"topic_id\": 5103, \"forum_id\": 8, \"post_subject\": \"Re: Load XML Inline into a dataset?\", \"username\": \"Allan\", \"post_text\": \"Hi,\\nIf you just want to test something out, read up FROMXML in the ECL Ref manual.\\nhere is a bit of an example hack I did to get my ideas straight:\\n\\nNameRec := RECORD\\n STRING Firstname{xpath('fname')};\\n STRING Lastname{xpath('surname')};\\nEND;\\n\\nnamesRec := RECORD\\n UNSIGNED4 EmployeeID{xpath('NI')};\\n DATASET(NameRec) names{xpath('name')};\\nEND;\\n\\nPeopleRec := RECORD\\n DATASET(NamesRec) People{xpath('/People')};\\nEND;\\n\\nx := '<Row>'\\n+'<People>'\\n+'<NI>1234567</NI>'\\n+'<names>'\\n+'<name><fname>Allan</fname><surname>Wrobel</surname></name>'\\n+'<name><fname>Anna</fname><surname>Smith</surname></name>'\\n+'<name><fname>Nina</fname><surname>Harrison</surname></name>'\\n+'</People>'\\n+'<People>'\\n+'<NI>98765</NI>'\\n+'<name><fname>Colin</fname><surname>Harison</surname></name>'\\n+'<name><fname>James</fname><surname>Wilson</surname></name>'\\n+'<name><fname>Nick</fname><surname>Pine</surname></name>'\\n+'</People>'\\n+'</Row>';\\nrec := FROMXML(PeopleRec,x);\\nOUTPUT(rec.People);\\np2 := rec.people[2];\\n\\np2.EmployeeID;\\np2.names[3].Firstname;\\n
\\nYours\\nAllan\", \"post_time\": \"2018-05-21 14:43:51\" },\n\t{ \"post_id\": 20093, \"topic_id\": 5103, \"forum_id\": 8, \"post_subject\": \"Re: Load XML Inline into a dataset?\", \"username\": \"rtaylor\", \"post_text\": \"BGehalo,\\n\\nUnfortunately, inline DATASETs do not support the XML option. You can remove the XML option and then you will have a single variable length STRING field that you can use PARSE to extract your xml, but if what you want to do is develop code to operate on XML datasets, then you'll need to write your test XML data to a disk file that a standard XML DATASET can operate on.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-11-28 18:29:15\" },\n\t{ \"post_id\": 20063, \"topic_id\": 5103, \"forum_id\": 8, \"post_subject\": \"Load XML Inline into a dataset?\", \"username\": \"BGehalo\", \"post_text\": \"This might be a pretty basic question, I can't find any examples in the documentation though. I'm trying to load some XML data into a dataset like so:\\n\\n\\ntest := '<row><name>greg</name></row>';\\n\\nr := {\\n\\tstring name {xpath('name')};\\n};\\n\\nDS := DATASET(test, r, XML('row'));\\nDS;\\n
\\n\\nOf course it tries to open a logical file with the string assigned to test. I feel like there was a way to do this but can't figure it out.\", \"post_time\": \"2017-11-22 16:22:34\" },\n\t{ \"post_id\": 20133, \"topic_id\": 5123, \"forum_id\": 8, \"post_subject\": \"Re: Java Plugin\", \"username\": \"bforeman\", \"post_text\": \"Hi William,\\n\\nIf the STRING usage works, why not return the STRING and then cast it to a BOOLEAN definition after?\\n\\nIsValidationDOC := (BOOLEAN)ValidationDoc();\\n\\nSomething like that....\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2017-12-01 16:03:38\" },\n\t{ \"post_id\": 20103, \"topic_id\": 5123, \"forum_id\": 8, \"post_subject\": \"Java Plugin\", \"username\": \"wjblack\", \"post_text\": \"All,\\n\\nI have a Java plugin that works when I return a string as shown in example#1.\\n\\nstring validationDoc(String document,String xsdpath) := import(java, 'com.path.to.lib.myLib.validateDoc:(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;');\\n\\nI want to use the same function signature except now I want to return a boolean.\\n\\nboolean validationDoc(String document,String xsdpath) := import(java, 'com.path.to.lib.myLib.validateDoc:(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/boolean;');\\n\\nI've tried many different permutations of the boolean return type as it seems to be a primitive type. I've tried Lboolean, L/boolean, Ljava/boolean, etc and can't seem to get it to work. Any help would be appreciated.\", \"post_time\": \"2017-11-29 20:10:29\" },\n\t{ \"post_id\": 20173, \"topic_id\": 5133, \"forum_id\": 8, \"post_subject\": \"Re: Use of ROW\", \"username\": \"janet.anderson\", \"post_text\": \"Thanks, Bob.\", \"post_time\": \"2017-12-01 17:31:37\" },\n\t{ \"post_id\": 20163, \"topic_id\": 5133, \"forum_id\": 8, \"post_subject\": \"Re: Use of ROW\", \"username\": \"bforeman\", \"post_text\": \"Janet,\\n\\nAny definition that passes parameters is an ECL function, and what is "return"ed is the result of the expression, but there is also a FUNCTION structure that allows you to encapsulate code and RETURN a specific value. \\n\\nRegarding the ROW usage, since it is inline with a recordset filter, the compiler knows in that case to read a single record. When you pull it out of the filter, ROW no longer knows which specific record you are referring to, and hence the error.\\n\\nBob\", \"post_time\": \"2017-12-01 17:29:15\" },\n\t{ \"post_id\": 20153, \"topic_id\": 5133, \"forum_id\": 8, \"post_subject\": \"Re: Use of ROW\", \"username\": \"janet.anderson\", \"post_text\": \"Why does an function like CanSearch not require a RETURN statement?\\n\\nSince the full statement\\n%HoldL_CNPNAME_zipcan% := %ToProcess%(~BizLinkFull.Key_BizHead_L_CNPNAME_ZIP.CanSearch(ROW(%ToProcess%)));
\\ndoes not give an error, and here ROW is also taking the dataset %ToProcess%, then why would it give me an error when I want to break it down and look at only\\n%HoldL_CNPNAME_ziprow% := ROW(%ToProcess%);
\\n\\nTo see an example, look at Dataland WU W20171201-114948: lines 367-372 are commented out and the code runs. W20171201-114953 the job fails when the line when I uncomment these lines.\", \"post_time\": \"2017-12-01 16:56:49\" },\n\t{ \"post_id\": 20123, \"topic_id\": 5133, \"forum_id\": 8, \"post_subject\": \"Re: Use of ROW\", \"username\": \"bforeman\", \"post_text\": \"What is causing the error?\\nThere are 3 forms listed in the language reference for ROW, and this doesn't appear to be following any of them. How is it even working before I try to break down the statement?\\nThe attribute CanSearch takes a parameter, but it's not a function or a macro. What is it?
\\n\\nCanSearch looks like a BOOLEAN function to me, based on the expression. \\n\\nROW is looking for a single record input, and it looks like you are providing a whole recordset. Hence the message DATASET[1], which would return a single record and satisfy the ROW requirement.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2017-12-01 15:54:25\" },\n\t{ \"post_id\": 20113, \"topic_id\": 5133, \"forum_id\": 8, \"post_subject\": \"Use of ROW\", \"username\": \"janet.anderson\", \"post_text\": \"I have a statement like \\n\\t%HoldL_CNPNAME_zipcan% := %ToProcess%(~BizLinkFull.Key_BizHead_L_CNPNAME_ZIP.CanSearch(ROW(%ToProcess%)));\\n;
\\n\\nwhere %ToProcess% is a dataset and CanSearch is an attribute in module defined as\\nEXPORT CanSearch(Process_Biz_Layouts.InputLayout le) := le.cnp_name <> (TYPEOF(le.cnp_name))'' AND Fields.InValid_cnp_name((SALT37.StrType)le.cnp_name)=0 AND EXISTS(le.zip_cases);
\\n\\nThis works fine. But I don't really understand what it's doing, so I try to break it down. \\n%HoldL_CNPNAME_ziprow% := ROW(%ToProcess%);
\\nThis gives me an error: Error: ROW(PROJECT(sf, sb(LEFT)...) - dataset argument is not in scope. Did you mean dataset[1]?\\n\\nWhat is causing the error?\\nThere are 3 forms listed in the language reference for ROW, and this doesn't appear to be following any of them. How is it even working before I try to break down the statement?\\nThe attribute CanSearch takes a parameter, but it's not a function or a macro. What is it?\", \"post_time\": \"2017-11-30 04:07:16\" },\n\t{ \"post_id\": 20333, \"topic_id\": 5143, \"forum_id\": 8, \"post_subject\": \"Re: Returning png data from embedded python\", \"username\": \"tlhumphrey2\", \"post_text\": \"With the help of Gordon Smith, I got this to work. But I had to change to platform 6.2.16-1. The following code displays the image as output to the workunit:\\nIMPORT Python;\\nSTRING ReadAndOutputPngFile() := EMBED(Python)\\n import base64\\n pngfile='Eigen_Silly_Professor_64x64.png';\\n encoded = base64.b64encode(open(pngfile, "rb").read())\\n return encoded\\nENDEMBED;\\n\\nimg:=ReadAndOutputPngFile();\\nOUTPUT(DATASET([{'<img src="data:image/png;base64,' + img + '" />'}], {STRING base64Image__html}));\\n
\", \"post_time\": \"2017-12-19 16:31:18\" },\n\t{ \"post_id\": 20203, \"topic_id\": 5143, \"forum_id\": 8, \"post_subject\": \"Re: Returning png data from embedded python\", \"username\": \"bforeman\", \"post_text\": \"Hi Tim,\\n\\nI do not work with Python, but I found some interesting links:\\n\\nhttps://stackoverflow.com/questions/7380460/byte-array-in-python\\n\\nhttps://www.w3resource.com/python/python-bytes.php\\n\\nI guess the compiler doesn't like DATA as the return value, maybe try a SET OF UNSIGNED1 ? \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2017-12-06 13:48:33\" },\n\t{ \"post_id\": 20193, \"topic_id\": 5143, \"forum_id\": 8, \"post_subject\": \"Returning png data from embedded python\", \"username\": \"tlhumphrey2\", \"post_text\": \"I'm trying to return an image from my embedded python. Here is my code:IMPORT Python;\\nDATA ReadAndOutputPngFile() := EMBED(Python)\\n from imageio import imread\\n import numpy as np\\n\\n output_index=65\\n png=imread('class%s-maximal-image.png' % (output_index))\\n return png\\nENDEMBED;\\n\\nOUTPUT(ReadAndOutputPngFile());
\\n\\nWhen I attempt to execute this, I get the following error message:\\nError: Error: 0: pyembed: type mismatch - bytearray expected
\\n\\nWhat must my return data type?\", \"post_time\": \"2017-12-05 14:24:56\" },\n\t{ \"post_id\": 20263, \"topic_id\": 5163, \"forum_id\": 8, \"post_subject\": \"Java plugin output location\", \"username\": \"wjblack\", \"post_text\": \"If I was writing standard out within the Java that the ECL Java plugin uses where would this output reside in the ECL client once run? I assumed the logs but can't find anything.\", \"post_time\": \"2017-12-15 12:26:14\" },\n\t{ \"post_id\": 20303, \"topic_id\": 5173, \"forum_id\": 8, \"post_subject\": \"Re: Ignore transform on LEFT OUTER join non match.\", \"username\": \"BGehalo\", \"post_text\": \"[quote="rtaylor":2qn01pcr]BGehalo,\\n\\nI'm assuming that your real task is taking a dataset of update records (DS2) and "overwriting" the base file (DS1) records with the relevant changes. \\n\\nI'm also assuming that your base file may contain a "bazillion" records and the number of (daily? weekly?) updates is a relatively small percentage of the base file records.\\n\\nSo, given those two assumptions, here's an even simpler way to accomplish it without using JOIN at all:rec := {string a, string b, string c};\\nDS1 := DATASET([{'1','',''}, {'2','Brian','Ge'}, \\n {'3','Kate','Wins'}, {'4','Greg',''}], rec);\\nDS2 := DATASET([{'1','Fred','Flintstone'}, \\n {'4','Jerry','Fring'},\\n {'5','Joe','Schmo'}], rec);\\n\\nSetUpdRecs := SET(DS2,a);\\nDS3 := DS1(a NOT IN SetUpdRecs) + DS2;\\nSORT(DS3,a);
This version also handles Added records along with the Updated records in your DS2 file. Strictly speaking, the SORT isn't necessary, I just added it to make the first four records result exactly the same as yours.\\n\\nHTH,\\n\\nRichard\\n\\nOh cool thanks, didn't think of doing it that way!\", \"post_time\": \"2017-12-18 16:08:20\" },\n\t{ \"post_id\": 20293, \"topic_id\": 5173, \"forum_id\": 8, \"post_subject\": \"Re: Ignore transform on LEFT OUTER join non match.\", \"username\": \"rtaylor\", \"post_text\": \"BGehalo,\\n\\nI'm assuming that your real task is taking a dataset of update records (DS2) and "overwriting" the base file (DS1) records with the relevant changes. \\n\\nI'm also assuming that your base file may contain a "bazillion" records and the number of (daily? weekly?) updates is a relatively small percentage of the base file records.\\n\\nSo, given those two assumptions, here's an even simpler way to accomplish it without using JOIN at all:rec := {string a, string b, string c};\\nDS1 := DATASET([{'1','',''}, {'2','Brian','Ge'}, \\n {'3','Kate','Wins'}, {'4','Greg',''}], rec);\\nDS2 := DATASET([{'1','Fred','Flintstone'}, \\n {'4','Jerry','Fring'},\\n {'5','Joe','Schmo'}], rec);\\n\\nSetUpdRecs := SET(DS2,a);\\nDS3 := DS1(a NOT IN SetUpdRecs) + DS2;\\nSORT(DS3,a);
This version also handles Added records along with the Updated records in your DS2 file. Strictly speaking, the SORT isn't necessary, I just added it to make the first four records result exactly the same as yours.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-12-18 16:03:16\" },\n\t{ \"post_id\": 20273, \"topic_id\": 5173, \"forum_id\": 8, \"post_subject\": \"Ignore transform on LEFT OUTER join non match.\", \"username\": \"BGehalo\", \"post_text\": \"So I was wondering if there's a way to have a LEFT OUTER join not go through the transform function on non-matches.\\n\\nCurrently the following code will blank out records 2 and 3:\\n\\nrec := {string a, string b, string c};\\nDS1 := DATASET([{'1','',''}, {'2','Brian','Ge'}, {'3','Kate','Wins'}, {'4','Greg',''}], rec);\\nDS2 := DATASET([{'1','Fred','Flintstone'}, {'4','Jerry','Fring'}], rec);\\n\\nresult := JOIN(DS1, DS2,\\n\\tLEFT.a = RIGHT.a, TRANSFORM(rec,\\n\\tSELF := RIGHT\\n\\t), LEFT OUTER);\\nOUTPUT(result);
\\nThis makes sense because every record goes through the transform and those with no matches are getting set to an empty record.\\n\\nI can accomplish what I need like so:\\nrec := {string a, string b, string c};\\nDS1 := DATASET([{'1','',''}, {'2','Brian','Ge'}, {'3','Kate','Wins'}, {'4','Greg',''}], rec);\\nDS2 := DATASET([{'1','Fred','Flintstone'}, {'4','Jerry','Fring'}], rec);\\n\\nresult := JOIN(DS1, DS2,\\n\\tLEFT.a = RIGHT.a, TRANSFORM(rec,\\n\\tSELF.b := IF(RIGHT.a <> '', RIGHT.b, LEFT.b),\\n\\tSELF.c := IF(RIGHT.a <> '', RIGHT.c, LEFT.c),\\n\\tSELF := LEFT\\n\\t), LEFT OUTER);\\nOUTPUT(result);
\\n\\nHowever this method would require including every single field we want from the RIGHT record. I feel like there might be a better way to do this.\\n\\nEDIT - I figured out a way to do it nicely:\\nrec := {string a, string b, string c};\\nDS1 := DATASET([{'1','',''}, {'2','Brian','Ge'}, {'3','Kate','Wins'}, {'4','Greg',''}], rec);\\nDS2 := DATASET([{'1','Fred','Flintstone'}, {'4','Jerry','Fring'}], rec);\\n\\nresult := JOIN(DS1, DS2,\\n\\tLEFT.a = RIGHT.a, TRANSFORM(rec,\\n\\tSELF := IF(RIGHT.a <> '', RIGHT, LEFT)\\n\\t), LEFT OUTER);\\nOUTPUT(result);
\", \"post_time\": \"2017-12-15 20:08:42\" },\n\t{ \"post_id\": 20683, \"topic_id\": 5263, \"forum_id\": 8, \"post_subject\": \"Re: HTTPCALL HTTPHEADER\", \"username\": \"jbanner\", \"post_text\": \"Thank you for you suggestions. I finally got it working. I had been trying to include the authentication parameters in the query string as described in https://docs.aws.amazon.com/general/lat ... mples.html in the section titled "Using GET with Authentication Information in the Query String". I couldn't get the Basic authentication to go away from the header. I've now switched to the method titled "Using GET with an Authorization Header" where the authorization strings are in the header instead. Doing it that way the default authentication header value was replaced appropriately. This is working well for the simple test requests I am currently making. Hopefully it will continue to function for more complex operations. Thanks again.\", \"post_time\": \"2018-02-01 19:06:01\" },\n\t{ \"post_id\": 20673, \"topic_id\": 5263, \"forum_id\": 8, \"post_subject\": \"Re: HTTPCALL HTTPHEADER\", \"username\": \"anthony.fishbeck\", \"post_text\": \"I've created JIRAs for two different solutions:\\n\\n1. Allow full control of HTTPHEADERs:\\nhttps://track.hpccsystems.com/browse/HPCC-19045\\n\\n2. Native support for AWS Signatures:\\nhttps://track.hpccsystems.com/browse/HPCC-19046\\n\\nIn the meantime if you are not able to use HTTPCALL to make these requests, you could try making the request through one of the embedded languages supported by the HPCC.\\n\\nEmbedding some Javascript, Python, or Java code to make the request and return the data to your ECL code may be a good workaround. \\n\\nYou can find examples of using embedded languages as part of the normal installation on any HPCC server in the following directory:\\n\\n/opt/HPCCSystems/examples/embed
\\nor in our source code repository at: \\n\\nhttps://github.com/hpcc-systems/HPCC-Platform/tree/master/initfiles/examples/embed\", \"post_time\": \"2018-02-01 17:58:38\" },\n\t{ \"post_id\": 20653, \"topic_id\": 5263, \"forum_id\": 8, \"post_subject\": \"HTTPCALL HTTPHEADER\", \"username\": \"jbanner\", \"post_text\": \"I'm trying to call Amazon Web Services from an HTTPCALL and I'm running into some issues. I have got functions that construct the request and sign it using the correct hashing methods. The resulting url works from a web browser or Fiddler and returns the requested response. However, when I send the request from ECL using HTTPCALL I get errors. One of the problems is that HTTPCALL seems to automatically add headers to my request even when they are not specified with the HTTPHEADER option. Using tcpdump on my HPCC system I can see the GET request go out and it includes "Authorization: Basic ##somehash##", "Accept: text/xml", and "Host: ec2.amazonaws.com". The host is fine, but the other two cause issues. The problem is that the request, including the headers, needs to be hashed into a signature and they have to be added in a very specific way in a specific order. The best solution would be if I could control the headers that are added and the order using HTTPHEADER. I even tried adding the Accept and Authorization headers manually thinking maybe it would overwrite the "built in" ones. Instead I got two Authorization lines in the request.\", \"post_time\": \"2018-01-31 20:52:10\" },\n\t{ \"post_id\": 20803, \"topic_id\": 5303, \"forum_id\": 8, \"post_subject\": \"Re: error message Error: System error: 4: MP link closed (1\", \"username\": \"fanglimian\", \"post_text\": \"I see.\\nThanks Richard!!\\n\\nBest,\\nLimian\", \"post_time\": \"2018-02-09 19:21:07\" },\n\t{ \"post_id\": 20793, \"topic_id\": 5303, \"forum_id\": 8, \"post_subject\": \"Re: error message Error: System error: 4: MP link closed (1\", \"username\": \"rtaylor\", \"post_text\": \"fanglimian,\\n\\nThe only way to find out would be to create the suggested ZAP report from the failed workunit and submit a JIRA ticket to the developers.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-02-09 19:02:37\" },\n\t{ \"post_id\": 20783, \"topic_id\": 5303, \"forum_id\": 8, \"post_subject\": \"Re: error message Error: System error: 4: MP link closed (1\", \"username\": \"fanglimian\", \"post_text\": \"Thanks Richard.\\nI am not sure if I fully understand what he means - so maybe my job has taken up too much memory so it crashed? Or the thor itself has space issue so that my job can no longer be processed?\", \"post_time\": \"2018-02-09 18:32:58\" },\n\t{ \"post_id\": 20773, \"topic_id\": 5303, \"forum_id\": 8, \"post_subject\": \"Re: error message Error: System error: 4: MP link closed (1\", \"username\": \"rtaylor\", \"post_text\": \"fanglimian ,\\n\\nI passed this question in to the developers, who said:An MP link closed message means a component has lost contact with a service running on the ip:port indicated in the error.\\nNormally that means it's crashed, been killed,\\nbut could also happen if e.g. the machine was switched off, there were network issues etc.\\n\\nSometimes the root cause can be that the OS killed the process, e.g. if the box running the service ran out of memory.\\n\\nThere's not lot of detail in that report, but judging from the port number involved, it's probably a Thor slave at that ip:port.\\nI'd get some more detail in the form of:\\n+ build version\\n+ zap report, preferably including thor slaves within\\nif can't include slave logs in zap, then will need at least the master log showing the reported error and the slave log\\nfrom 10.194.96.16:6600 at the same time as the error was 1st reported in the master log.
HTH,\\n\\nRichard\", \"post_time\": \"2018-02-09 14:27:14\" },\n\t{ \"post_id\": 20743, \"topic_id\": 5303, \"forum_id\": 8, \"post_subject\": \"error message Error: System error: 4: MP link closed (10.19\", \"username\": \"fanglimian\", \"post_text\": \"Hi\\n\\nI received this error message "\\nError: System error: 4: MP link closed (10.194.96.16:6600)"\\nanyone can help me with what it means? Thanks!\", \"post_time\": \"2018-02-08 17:09:31\" },\n\t{ \"post_id\": 27213, \"topic_id\": 5373, \"forum_id\": 8, \"post_subject\": \"Re: generate unique random numbers\", \"username\": \"DSC\", \"post_text\": \"[quote="vzeufack":rgat35qm]Yeah that is my real issue. I do realize that the RANDOM() function hardly generates same numbers. But, how do I do if I want unique random numbers within a specific range (let say between 1 and 10)?\\n\\nAre you confusing "random" with "shuffled"? Random numbers can repeat. Dice have six numbers on them, and if you throw one die the likelihood of any number showing face-up is 1/6. If you threw one die twice and wound up with "five" both times, that is still random even though you got the same result.\\n\\nIn case you're looking for shuffled numbers, here is one way to do that:\\n\\nNUM_COUNT := 10;\\n\\nds := DATASET\\n (\\n NUM_COUNT,\\n TRANSFORM\\n (\\n {\\n UNSIGNED4 rand,\\n UNSIGNED4 num\\n },\\n SELF.rand := RANDOM(),\\n SELF.num := COUNTER\\n )\\n );\\n\\n// sort the dataset by the random numbers\\nsortedByRandom := SORT(ds, rand);\\n\\n// strip out the random numbers\\nonlyNums := TABLE(sortedByRandom, {num});\\n\\nOUTPUT(onlyNums);
\\n\\nThe result will be the unique numbers 1-10 in a random order.\", \"post_time\": \"2019-08-02 19:17:24\" },\n\t{ \"post_id\": 27203, \"topic_id\": 5373, \"forum_id\": 8, \"post_subject\": \"Re: generate unique random numbers\", \"username\": \"vzeufack\", \"post_text\": \"And those X random numbers lying between [a - b] have to be unique.\", \"post_time\": \"2019-08-02 19:11:34\" },\n\t{ \"post_id\": 27193, \"topic_id\": 5373, \"forum_id\": 8, \"post_subject\": \"Re: generate unique random numbers\", \"username\": \"vzeufack\", \"post_text\": \"Sorry if I may have not set my question clearly. I think this function generates as many unique integers as you specify right? \\nI am looking for a function which can generate 10 random numbers taken from the range [1 - 1000] for example. So, the general problem is to generate X random numbers taken from the range [a - b].\", \"post_time\": \"2019-08-02 19:09:42\" },\n\t{ \"post_id\": 27183, \"topic_id\": 5373, \"forum_id\": 8, \"post_subject\": \"Re: generate unique random numbers\", \"username\": \"rtaylor\", \"post_text\": \"Vannel,\\n\\nAnd I just heard about this function, that does it all:IMPORT Std;\\nds := DATASET(1000*1000, TRANSFORM({UNSIGNED4 r}, \\n SELF.r := STD.System.Util.GetUniqueInteger()));\\nCOUNT(ds); //a million \\nout := DEDUP(SORT(ds,r),r);\\nCOUNT(out); //still a million
\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-08-02 18:55:29\" },\n\t{ \"post_id\": 27173, \"topic_id\": 5373, \"forum_id\": 8, \"post_subject\": \"Re: generate unique random numbers\", \"username\": \"vzeufack\", \"post_text\": \"Yeah that is my real issue.\\n\\nI do realize that the RANDOM() function hardly generates same numbers. But, how do I do if I want unique random numbers within a specific range (let say between 1 and 10)?\\n\\nBest regards,\\nVannel,\", \"post_time\": \"2019-08-02 18:33:16\" },\n\t{ \"post_id\": 27163, \"topic_id\": 5373, \"forum_id\": 8, \"post_subject\": \"Re: generate unique random numbers\", \"username\": \"rtaylor\", \"post_text\": \"Vannel,This is the code:\\nCODE: SELECT ALL\\nds := DATASET(5, TRANSFORM({INTEGER r,INTEGER r1},\\n SELF.r := RANDOM(),\\n SELF.r1 := 0));\\n \\nout := PROJECT(ds, TRANSFORM({INTEGER r,INTEGER r1},\\n SELF.r := LEFT.r,\\n SELF.r1 := LEFT.r % 10));\\nOUTPUT(out);\\n\\n\\nThis is the output:\\t\\nCODE: SELECT ALL\\n1 819659058 8\\n2 1733070309 9\\n3 535821437 7\\n4 3518949408 8\\n5 905533075 5
Not to put too fine a point on it, but none of those random numbers are duplicated. The modulus 10 numbers do have duplicates, but those are hardly "random" -- they are simply the remainders after division by 10 of the actual random numbers.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-08-02 18:27:30\" },\n\t{ \"post_id\": 27153, \"topic_id\": 5373, \"forum_id\": 8, \"post_subject\": \"Re: generate unique random numbers\", \"username\": \"rtaylor\", \"post_text\": \"Vannel,\\n\\nHere's an example of how to generate any number of guaranteed unique random numbers:GenerateUniqueRandoms(UNSIGNED4 U) := FUNCTION\\n //generate 10% extra\\n ds := DATASET(U*1.1, \\n TRANSFORM({UNSIGNED4 r}, \\n SELF.r := RANDOM()));\\n //then dedup the result \\n out := DEDUP(SORT(ds,r),r); \\n //and limit to the desired number\\n RETURN out[1..U];\\nEND;\\n\\nCOUNT(GenerateUniqueRandoms(1000000)); //I want a million uniques\\nCOUNT(GenerateUniqueRandoms(1000)); //now I want a thousand
\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-08-02 18:22:59\" },\n\t{ \"post_id\": 27143, \"topic_id\": 5373, \"forum_id\": 8, \"post_subject\": \"Re: generate unique random numbers\", \"username\": \"vzeufack\", \"post_text\": \"This is the code:\\n\\nds := DATASET(5, TRANSFORM({INTEGER r,INTEGER r1},\\n SELF.r := RANDOM(),\\n SELF.r1 := 0));\\n \\nout := PROJECT(ds, TRANSFORM({INTEGER r,INTEGER r1},\\n SELF.r := LEFT.r,\\n SELF.r1 := LEFT.r % 10));\\nOUTPUT(out);\\n
\\n\\nThis is the output:\\t\\n\\n1\\t819659058\\t 8\\n2\\t1733070309\\t9\\n3\\t535821437\\t 7\\n4\\t3518949408\\t8\\n5\\t905533075\\t 5\\n
\", \"post_time\": \"2019-08-02 18:13:26\" },\n\t{ \"post_id\": 27133, \"topic_id\": 5373, \"forum_id\": 8, \"post_subject\": \"Re: generate unique random numbers\", \"username\": \"rtaylor\", \"post_text\": \"Vannel,\\n\\nPlease post your code that produced duplicate values and the result showing the duplicates so I can try to recreate the problem.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-08-02 17:50:24\" },\n\t{ \"post_id\": 27123, \"topic_id\": 5373, \"forum_id\": 8, \"post_subject\": \"Re: generate unique random numbers\", \"username\": \"vzeufack\", \"post_text\": \"Hi,\\n\\nI tried the proposed solution to generate unique random numbers. However, I am still getting duplicates. Is there a way to get unique random numbers with 100% certainty using ECL?\\n\\nBest regards,\\nVannel,\", \"post_time\": \"2019-08-02 17:44:29\" },\n\t{ \"post_id\": 21093, \"topic_id\": 5373, \"forum_id\": 8, \"post_subject\": \"Re: generate unique random numbers\", \"username\": \"lily\", \"post_text\": \"Thank you very much Taylor! It really helps!\\n\\nLily\\n\\n\\n[quote="rtaylor":12pyu3a1]Lily,\\n\\nThe appearance of duplication is due to your use of the modulus operator limiting your result to only 10 possibilities (the remainders of dividing the actual RANDOM() number by 10). \\n\\nThis example demonstrates that the RANDOM() function itself returns a very different value for each use:out := DATASET(5, TRANSFORM({INTEGER r,INTEGER r1,INTEGER r2}, \\n SELF.r := RANDOM(),\\n SELF.r1 := SELF.r % 10,\\n SELF.r2 := SELF.r % 100));\\nOUTPUT(out);
I just ran this code and got this result:\\n1911583916\\t0\\t56\\n3647224352\\t8\\t2\\n1636695419\\t8\\t51\\n3689116099\\t3\\t6\\n1012783233\\t9\\t79\\n
You will note in the first record that the first RANDOM() result is 1911583916, the modulus 10 result is 0, and the modulus 100 result is 56. This doesn't make sense if the first random value (1911583916) is used for the two modulus calculations. But it does, because the RANDOM() function is actually called again each time an expression is calculated using it, producing this (correct) result. \\n\\nSo, as you see, RANDOM() actually DOES return unique values each time. If you really did want the modulus values to use the first RANDOM() in each record you would have to do it this way:ds := DATASET(5, TRANSFORM({INTEGER r,INTEGER r1,INTEGER r2}, \\n SELF.r := RANDOM(),\\n SELF.r1 := 0,\\n SELF.r2 := 0));\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\nout := PROJECT(ds, TRANSFORM({INTEGER r,INTEGER r1,INTEGER r2}, \\n SELF.r := LEFT.r,\\n SELF.r1 := LEFT.r % 10,\\n SELF.r2 := LEFT.r % 100));\\nOUTPUT(out);
to produce this result:\\n3040707218\\t8\\t18\\n1727978997\\t7\\t97\\n3116981210\\t0\\t10\\n2703444385\\t5\\t85\\n4157152711\\t1\\t11\\n
And now the modulus results definitely come from the generated RANDOM() values.\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-03-07 19:55:58\" },\n\t{ \"post_id\": 21083, \"topic_id\": 5373, \"forum_id\": 8, \"post_subject\": \"Re: generate unique random numbers\", \"username\": \"rtaylor\", \"post_text\": \"Lily,\\n\\nThe appearance of duplication is due to your use of the modulus operator limiting your result to only 10 possibilities (the remainders of dividing the actual RANDOM() number by 10). \\n\\nThis example demonstrates that the RANDOM() function itself returns a very different value for each use:out := DATASET(5, TRANSFORM({INTEGER r,INTEGER r1,INTEGER r2}, \\n SELF.r := RANDOM(),\\n SELF.r1 := SELF.r % 10,\\n SELF.r2 := SELF.r % 100));\\nOUTPUT(out);
I just ran this code and got this result:\\n1911583916\\t0\\t56\\n3647224352\\t8\\t2\\n1636695419\\t8\\t51\\n3689116099\\t3\\t6\\n1012783233\\t9\\t79\\n
You will note in the first record that the first RANDOM() result is 1911583916, the modulus 10 result is 0, and the modulus 100 result is 56. This doesn't make sense if the first random value (1911583916) is used for the two modulus calculations. But it does, because the RANDOM() function is actually called again each time an expression is calculated using it, producing this (correct) result. \\n\\nSo, as you see, RANDOM() actually DOES return unique values each time. If you really did want the modulus values to use the first RANDOM() in each record you would have to do it this way:ds := DATASET(5, TRANSFORM({INTEGER r,INTEGER r1,INTEGER r2}, \\n SELF.r := RANDOM(),\\n SELF.r1 := 0,\\n SELF.r2 := 0));\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\nout := PROJECT(ds, TRANSFORM({INTEGER r,INTEGER r1,INTEGER r2}, \\n SELF.r := LEFT.r,\\n SELF.r1 := LEFT.r % 10,\\n SELF.r2 := LEFT.r % 100));\\nOUTPUT(out);
to produce this result:\\n3040707218\\t8\\t18\\n1727978997\\t7\\t97\\n3116981210\\t0\\t10\\n2703444385\\t5\\t85\\n4157152711\\t1\\t11\\n
And now the modulus results definitely come from the generated RANDOM() values.\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-03-07 19:39:39\" },\n\t{ \"post_id\": 21073, \"topic_id\": 5373, \"forum_id\": 8, \"post_subject\": \"Re: generate unique random numbers\", \"username\": \"lily\", \"post_text\": \"Hi Taylor,\\n\\nThanks for help!\\n\\nThe code that generated the repeated random numbers is as shown below:\\n\\nout := DATASET(5, TRANSFORM({INTEGER r}, SELF.r:= RANDOM()%10));\\nOUTPUT(out);\\n\\nThe output result is as shown below:\\n\\n## r\\n1\\t8\\n2\\t0\\n3\\t8\\n4\\t7\\n5\\t1\\n\\nThanks,\\nLily\\n\\n[quote="rtaylor":kh52pyed]Lily,\\n\\nIn my experience, it can be difficult to make RANDOM() generate the same numbers when you want it to, so I'm wondering how you used RANDOM()? In what context? Can you show me the code that produces the "same" non-unique numbers, please?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-03-07 19:13:37\" },\n\t{ \"post_id\": 21063, \"topic_id\": 5373, \"forum_id\": 8, \"post_subject\": \"Re: generate unique random numbers\", \"username\": \"rtaylor\", \"post_text\": \"Lily,\\n\\nIn my experience, it can be difficult to make RANDOM() generate the same numbers when you want it to, so I'm wondering how you used RANDOM()? In what context? Can you show me the code that produces the "same" non-unique numbers, please?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-03-07 19:08:29\" },\n\t{ \"post_id\": 21053, \"topic_id\": 5373, \"forum_id\": 8, \"post_subject\": \"generate unique random numbers\", \"username\": \"lily\", \"post_text\": \"Hi everyone,\\n\\nI am wondering is there a way to generate unique random numbers in ECL?\\nI tried to use RANDOM() but the result contains same random numbers.\\n\\nThanks,\\nLily\", \"post_time\": \"2018-03-07 19:04:13\" },\n\t{ \"post_id\": 21163, \"topic_id\": 5383, \"forum_id\": 8, \"post_subject\": \"Re: Catching layout mismatch without failing the WU\", \"username\": \"rtaylor\", \"post_text\": \"Jim,\\n\\nThat looks like your best option. Did you also find the Programmer's Guide article about their use?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-03-16 13:14:58\" },\n\t{ \"post_id\": 21133, \"topic_id\": 5383, \"forum_id\": 8, \"post_subject\": \"Re: Catching layout mismatch without failing the WU\", \"username\": \"jwilt\", \"post_text\": \"I've just seen the new "LOOKUP" option for DATASET and RECORDOF, for 6.4+. This should help with the scenario I outlined.\", \"post_time\": \"2018-03-15 05:41:50\" },\n\t{ \"post_id\": 21103, \"topic_id\": 5383, \"forum_id\": 8, \"post_subject\": \"Catching layout mismatch without failing the WU\", \"username\": \"jwilt\", \"post_text\": \"Scenario:\\nMany logical files are processed in the same workunit.\\nIf one of them has had a recent layout change, get "Layout does not match published layout", and workunit fails. I.e., none of the other (independent) results are generated.\\nNaturally, this can be broken into smaller independent parts.\\n\\nBut - \\nIs there a way to "catch" this layout mismatch failure (without failing the WU) and continue processing (with a 0-record result for the failed ds, or similar)?\\nThanks.\", \"post_time\": \"2018-03-14 00:11:21\" },\n\t{ \"post_id\": 21243, \"topic_id\": 5393, \"forum_id\": 8, \"post_subject\": \"Re: PIPE memory pool exhausted\", \"username\": \"rtaylor\", \"post_text\": \"rken,\\n\\nGlad that works for you! I think this would be helpful to put in the documentation for PIPE(): what exactly is expected if not CSV or XML.
I agree. And since you're new to the Forum, I expect you may also be new to our bug tracking/feature request system: JIRA - https://track.hpccsystems.com. \\n\\nYou can use JIRA to make any reports you need to and you'll automatically be updated as the ticket makes its way from initial report, through developer discussion, and on to completion.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-03-19 13:32:54\" },\n\t{ \"post_id\": 21233, \"topic_id\": 5393, \"forum_id\": 8, \"post_subject\": \"Re: PIPE memory pool exhausted\", \"username\": \"rken\", \"post_text\": \"[quote="rtaylor":svedm6dh]rken,\\n\\nIt also occurs to me that you could try making your Python code return your variable-length STRING in the format that HPCC expects it -- a leading 4-byte integer value indicating the length of the text prepended to the text itself, something like this: //this is obviously ECL, but you should get the idea:\\n\\n //hex representation of 11, in Little Endian format:\\nDATA4 LeadingLength := x'0B000000'; \\n\\n //type transfer to STRING4 and concatenate to return string\\nReturnStr := ((>STRING4<)LeadingLength) + 'Hello World';
\\nLet me know if that works, please. \\n\\nHTH,\\n\\nRichard\\n\\n\\nYes, this works! Exactly what I was looking for. Adding a 4 byte little endian, signed integer to the front of my python scripts stdout works like a charm. \\n\\nECL Record being returned by PIPE():\\n
rec := RECORD\\n STRING output;\\nEND;
\\n\\nPython Code:\\n\\nlorem = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam condimentum tincidunt quam et vulputate metus."\\nsOut = struct.pack('<i', len(lorem))\\nsys.stdout.write(sOut + lorem)
\\n\\nThe above outputs a single record row with the lorem text. If one wants to output multiple rows in the record, simply have separate stdout calls acomplishes this (perhaps adding a std.flush() in between might increase stability)\\n\\nlorem = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam condimentum tincidunt quam et vulputate metus."\\nsOut = struct.pack('<i', len(lorem))\\n\\nfor _ in range(n):\\n sys.stdout.write(sOut + lorem)
\\n\\nI think this would be helpful to put in the documentation for PIPE(): what exactly is expected if not CSV or XML.\", \"post_time\": \"2018-03-16 20:19:47\" },\n\t{ \"post_id\": 21223, \"topic_id\": 5393, \"forum_id\": 8, \"post_subject\": \"Re: PIPE memory pool exhausted\", \"username\": \"rken\", \"post_text\": \"1: I tried using a large fixed length string(and other types too) and they work as expected. It also doesn't fail if i use a STRINGn where: n % string.length() == 0, except it just returns 100 rows, slicing off whatever is after.\\n\\n2 and 3: With EMBED(python), i can return, in python, a [bytearray] into a rec:= DATA d; END; with no problem, tested it up to a 7 mbit string. This indicates that there is something else besides the string that is communicated between Python and ECL via the plugin.\", \"post_time\": \"2018-03-16 20:13:13\" },\n\t{ \"post_id\": 21213, \"topic_id\": 5393, \"forum_id\": 8, \"post_subject\": \"Re: PIPE memory pool exhausted\", \"username\": \"rtaylor\", \"post_text\": \"rken,\\n\\nIt also occurs to me that you could try making your Python code return your variable-length STRING in the format that HPCC expects it -- a leading 4-byte integer value indicating the length of the text prepended to the text itself, something like this: //this is obviously ECL, but you should get the idea:\\n\\n //hex representation of 11, in Little Endian format:\\nDATA4 LeadingLength := x'0B000000'; \\n\\n //type transfer to STRING4 and concatenate to return string\\nReturnStr := ((>STRING4<)LeadingLength) + 'Hello World';
\\nLet me know if that works, please. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-03-16 14:06:14\" },\n\t{ \"post_id\": 21203, \"topic_id\": 5393, \"forum_id\": 8, \"post_subject\": \"Re: PIPE memory pool exhausted\", \"username\": \"bforeman\", \"post_text\": \"Also, did you try approaching this using embedded Python?\\n\\nSee the following topic for more information:\\n\\nhttps://hpccsystems.com/training/documentation/ecl-language-reference/html/EMBED_Structure.html\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2018-03-16 13:57:38\" },\n\t{ \"post_id\": 21193, \"topic_id\": 5393, \"forum_id\": 8, \"post_subject\": \"Re: PIPE memory pool exhausted\", \"username\": \"bforeman\", \"post_text\": \"Hello,\\nI agree with Richard's comment, but I also found a similar post regarding the memory pool with someone using embedded python. \\nhttps://hpccsystems.com/bb/viewtopic.php?f=14&t=4823\\n\\nMay or may not be related, but it might be worth it to post this to our JIRA Issue Tracker.\\n\\nhttps://track.hpccsystems.com/secure/Dashboard.jspa\\n\\nIf it is true that variable length strings are not permitted it may be something to add to our documentation.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2018-03-16 13:40:03\" },\n\t{ \"post_id\": 21183, \"topic_id\": 5393, \"forum_id\": 8, \"post_subject\": \"Re: PIPE memory pool exhausted\", \"username\": \"rtaylor\", \"post_text\": \"rken,\\n\\nI think your problem is that the internal respresentation of a variable-length STRING is different to that of a fixed-length STRINGn. \\n\\nI suggest you try using a large fixed-length string as your return type (maybe STRING200 in this case) and see if that works for you. If it does, then your workaround is to understand what your maximize size return value is likely to be and define your STRINGn to something a bit larger.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-03-16 13:35:45\" },\n\t{ \"post_id\": 21153, \"topic_id\": 5393, \"forum_id\": 8, \"post_subject\": \"PIPE memory pool exhausted\", \"username\": \"rken\", \"post_text\": \"I have a simple python script I wrote that just returns 'hello world' via stdout. I am calling this script with as follows:\\n\\n
rec := RECORD\\n STRING11 out;\\nEND;\\n\\nmyPipe := PIPE(recordInput, 'python myPythonScript.py ', rec);\\n
\\n\\nIf the record is "STRING11" it works fine. If i change it to "STRING", to try and accept a variable length string, it returns this error: "Error: System error: 1301: Pool memory exhausted: ...". An 11 character length string doesn't seem that large.\\n\\nI have also tried returning from python a C struct, which creates a different set of errors and only gets a memory exhaust error if the struct is over 100 long.\\n\\nAnyone have this issue or have a link to more documentation on what exactly PIPE() is expecting. My end goal is to simply return a variable length (string, int, data, etc) to a record...\\n\\nThanks in advance\", \"post_time\": \"2018-03-15 22:48:07\" },\n\t{ \"post_id\": 21433, \"topic_id\": 5423, \"forum_id\": 8, \"post_subject\": \"Re: FROMJSON and Abstract fields\", \"username\": \"rtaylor\", \"post_text\": \"Felipe,\\n\\nNot every XML or JSON file is going to be easily definable in ECL, and this is one of those. The primary mission of any XML or JSON file processing is to extract the relevant data from the file and get it into a format that is native to HPCC and can work efficiently. Here's my code that does that:IMPORT Std;\\n//start by getting the most data possible directly from the JSON file\\n// and defining the max sizes of all the fields it's possible to determine:\\nLayout := RECORD\\n STRING22 business_id{xpath('business_id')};\\n STRING40 name{xpath('name')};\\n STRING40 address{xpath('address')};\\n STRING1 neighborhood{xpath('neighborhood')};\\n STRING20 city{xpath('city')};\\n STRING2 state{xpath('state')};\\n STRING5 postal_code{xpath('postal_code')};\\n STRING12 latitude{xpath('latitude')};\\n STRING12 longitude{xpath('longitude')};\\n STRING4 stars{xpath('stars')};\\n STRING4 review_count{xpath('review_count')};\\n STRING1 is_open{xpath('is_open')};\\n STRING11 hours1{xpath('hours/Monday')};\\n STRING11 hours2{xpath('hours/Tuesday')};\\n STRING11 hours3{xpath('hours/Wednesday')};\\n STRING11 hours4{xpath('hours/Thursday')};\\n STRING11 hours5{xpath('hours/Friday')};\\n STRING11 hours6{xpath('hours/Saturday')};\\n STRING11 hours7{xpath('hours/Sunday')};\\n SET OF STRING SSCategory{xpath('categories')};\\t\\n STRING attributes;\\nEND;\\n\\nJSONds := DATASET('~rttest::json::yelp::business_sample-fixed.txt',Layout,JSON('/'));\\n\\n//re-define the same logical file as a CSV DATASET \\n// for simple parsing of the Atttributes tag contents of each record\\nrec := {STRING line};\\nCSVds := DATASET('~rttest::json::yelp::business_sample-fixed.txt',rec,\\n CSV(SEPARATOR('')));\\nJustData := CSVds(line[1] NOT IN ['[',']']); //remove "empty" recs\\n// JustData;\\n\\n//extract just the business_id and Attributes tag content:\\nPrjRec := RECORD\\n STRING business_id;\\n STRING Attributes;\\nEND;\\nAttrTbl := PROJECT(JustData,\\n TRANSFORM(PrjRec,\\n SELF.business_id := LEFT.line[18..39];\\n AttrPos := Std.Str.Find(LEFT.line,'"attributes"',1);\\n CatPos := Std.Str.Find(LEFT.line,'"categories"',1);\\n SELF.Attributes := LEFT.line[AttrPos+15 .. CatPos-1]));\\nAttrTbl; //business_id to join to JSON data, and Attributes to post-process\\n\\n//then JOIN to the JSONds to fill in the Attributes field\\nAttrDS := JOIN(JSONds,AttrTbl,LEFT.business_id=RIGHT.business_id,\\n TRANSFORM(Layout,SELF.Attributes := RIGHT.Attributes,SELF := LEFT));\\n//and write the result to disk file\\nOUTPUT(AttrDS,ALL);
Note that the Categories and Attributes data will require post-processing. The Categories, being a simple comma-delimited set of strings, is fairly straight-forward, as are the hours fields. \\n\\nThe Attributes data will require more complex parsing. It would be possible to define this all in the RECORD structure using XPATH, but that would mean first determining all the unique tag names within it for all the records, and any nested child datasets for any repeating structures (and there are some). That's why I just parsed out the content, and you can post-process that string any way you need to extract whatever data is relevant to your product.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-04-04 13:43:29\" },\n\t{ \"post_id\": 21393, \"topic_id\": 5423, \"forum_id\": 8, \"post_subject\": \"Re: FROMJSON and Abstract fields\", \"username\": \"fgbulsoni\", \"post_text\": \"Hey Richard, thank you for the reply!\\n\\nAnd sure, here's a file containing the first half dozen records of the dataset:\\n[attachment=0:1vma4lex]business_sample.txt\\n\\nThanks,\\nFelipe\", \"post_time\": \"2018-04-03 11:30:43\" },\n\t{ \"post_id\": 21383, \"topic_id\": 5423, \"forum_id\": 8, \"post_subject\": \"Re: FROMJSON and Abstract fields\", \"username\": \"rtaylor\", \"post_text\": \"Felipe,\\n\\nCan you please attach a file containing the first half dozen or so records from the actual JSON file? \\n\\nThanks,\\n\\nRichard\", \"post_time\": \"2018-04-03 10:40:20\" },\n\t{ \"post_id\": 21363, \"topic_id\": 5423, \"forum_id\": 8, \"post_subject\": \"FROMJSON and Abstract fields\", \"username\": \"fgbulsoni\", \"post_text\": \"Hello, I'm sorry if this has been posted before, but when searching for answers I found no results which matched exactly what I want.\\n\\nI'm trying to load a json dataset which contains many nested structures inside it, more specifically the Yelp challenge dataset (https://www.yelp.com/dataset/challenge).\\n\\nA sample of a record would be something like the following:\\n\\n{\\n "business_id":"FYWN1wneV18bWNgQjJ2GNg",\\n "name":"Dental by Design",\\n "neighborhood":"",\\n "address":"4855 E Warner Rd, Ste B9",\\n "city":"Ahwatukee",\\n "state":"AZ",\\n "postal_code":"85044",\\n "latitude":33.3306902,\\n "longitude":-111.9785992,\\n "stars":4.0,\\n "review_count":22,\\n "is_open":1,\\n "attributes":{\\n "AcceptsInsurance":true,\\n "ByAppointmentOnly":true,\\n "BusinessAcceptsCreditCards":true\\n },\\n "categories":[\\n "Dentists",\\n "General Dentistry",\\n "Health & Medical",\\n "Oral Surgeons",\\n "Cosmetic Dentists",\\n "Orthodontists"\\n ],\\n "hours":{\\n "Friday":"7:30-17:00",\\n "Tuesday":"7:30-17:00",\\n "Thursday":"7:30-17:00",\\n "Wednesday":"7:30-17:00",\\n "Monday":"7:30-17:00"\\n }\\n}
\\n\\n\\nThe issue I'm currently facing is: I can't seem to be able to load the nested hours unless I specifically specify the 'day' of the week for them.\\n\\nI'd like something like this:\\n\\nhtest := RECORD\\n STRING weekday;\\n STRING hours;\\nEND;\\n\\nlcategory := RECORD\\n STRING category;\\nEND;\\n\\nnamesRec := RECORD \\n UNSIGNED2 EmployeeID{xpath('EmpID')}; \\n STRING10 Firstname{xpath('FName')}; \\n STRING10 Lastname{xpath('LName')}; \\n DATASET(htest) hHours{xpath('hours')}; \\n DATASET(lcategory) NCategory{xpath('categories')}; \\n SET OF STRING SSCategory{xpath('categories')}; \\n
\\n\\nAnd I'd expect the days such as 'monday', 'tuesday', etc to come under the field 'weekday', while the hours would come under the field 'hours'.\\n\\nBut, while this syntax does compile, I get empty fields as a result. \\n\\nIs it possible to load data like this/ what should be changed to achieve that effect? \\n\\nFor more info: I've verified that I can successfully retrieve the hour for a specific day by having a RECORD structure such as:\\n\\ntest := RECORD\\n STRING friday{xpath('Friday')};\\nEND;
\\n\\nBut that would render me with a field for each day of the week, instead of just a 'weekdays' field, which is something I'd not want. \\nThis issue would be even bigger when trying to map a field such as 'attributes', which basically contains a multitude of different possible keys.\\n\\n\\nThanks,\\nFelipe.\", \"post_time\": \"2018-04-02 13:00:34\" },\n\t{ \"post_id\": 21533, \"topic_id\": 5453, \"forum_id\": 8, \"post_subject\": \"Re: ECL Watch - Individual nodes on cluster\", \"username\": \"rtaylor\", \"post_text\": \"rsghatpa,\\n\\nOperations >> System Servers tells you the IPs of all the middleware components of your environment.\\n\\nOperations >> Cluster Processes >> <clustername> tells you the IPs of the Thor or Roxie nodes for that cluster.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-04-09 18:35:28\" },\n\t{ \"post_id\": 21493, \"topic_id\": 5453, \"forum_id\": 8, \"post_subject\": \"ECL Watch - Individual nodes on cluster\", \"username\": \"rsghatpa\", \"post_text\": \"How to check what process is running on what node in a cluster environment through ECL Watch?\", \"post_time\": \"2018-04-09 03:36:32\" },\n\t{ \"post_id\": 25163, \"topic_id\": 5503, \"forum_id\": 8, \"post_subject\": \"Re: Retrieving group name from Thorlib.group()\", \"username\": \"rtaylor\", \"post_text\": \"micevepay,\\n\\nAssuming the semi-colons in your filename are just a posting typo, then I suggest you submit a JIRA bug report for the CLUSTER option error you're getting.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-03-12 15:16:36\" },\n\t{ \"post_id\": 25063, \"topic_id\": 5503, \"forum_id\": 8, \"post_subject\": \"Re: Retrieving group name from Thorlib.group()\", \"username\": \"micevepay\", \"post_text\": \"I am getting a similar error when I try to write a file to both thor components usingOutput(somedataset,,'~path::to;;file',CLUSTER( 'mythor','mythor_2' ))
\", \"post_time\": \"2019-03-12 03:21:13\" },\n\t{ \"post_id\": 24553, \"topic_id\": 5503, \"forum_id\": 8, \"post_subject\": \"Re: Retrieving group name from Thorlib.group()\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nIf you have the WUID, something of the form W20190222-134355\\n\\nThere is a SOAP interface, see URL\\n\\nhttp://<ESP IP:port>/WsWorkunits/WUInfo
\\nSupply the WUID in the structure:\\n\\n<soap:Body>\\n <WUInfo>\\n <Wuid>W20190222-134355</Wuid>\\n <TruncateEclTo64k>1</TruncateEclTo64k>\\n <Type/>\\n <IncludeExceptions>1</IncludeExceptions>\\n <IncludeGraphs>1</IncludeGraphs>\\n <IncludeSourceFiles>1</IncludeSourceFiles>\\n <IncludeResults>1</IncludeResults>\\n <IncludeResultsViewNames>0</IncludeResultsViewNames>\\n <IncludeVariables>1</IncludeVariables>\\n <IncludeTimers>1</IncludeTimers>\\n <IncludeDebugValues>1</IncludeDebugValues>\\n <IncludeApplicationValues>1</IncludeApplicationValues>\\n <IncludeWorkflows>1</IncludeWorkflows>\\n <IncludeXmlSchemas>0</IncludeXmlSchemas>\\n <IncludeResourceURLs>0</IncludeResourceURLs>\\n <IncludeECL>1</IncludeECL>\\n <IncludeHelpers>1</IncludeHelpers>\\n <IncludeAllowedClusters>1</IncludeAllowedClusters>\\n <SuppressResultSchemas>0</SuppressResultSchemas>\\n <ThorSlaveIP/>\\n </WUInfo>\\n </soap:Body>\\n
\\nAnd the SOAP response is in XML tag\\n<WUInfoResponse><Workunit><Cluster>
\\nNote if you only want the 'cluster' information, set all the request 'Include' tags to 0\\n\\nNote there is a shed load of other interfaces just see the list in:\\n\\nhttp://<ESP IP:port>/WsWorkunits
\\n\\nYours\\nAllan\", \"post_time\": \"2019-02-22 16:07:14\" },\n\t{ \"post_id\": 22603, \"topic_id\": 5503, \"forum_id\": 8, \"post_subject\": \"Re: Retrieving group name from Thorlib.group()\", \"username\": \"harshdesai\", \"post_text\": \"Hi Team,\\nCan you please update is there a way to find on which Queue our workunit would have been ran.\\n\\nRegards\\nHarsh Desai\", \"post_time\": \"2018-08-14 05:28:55\" },\n\t{ \"post_id\": 21803, \"topic_id\": 5503, \"forum_id\": 8, \"post_subject\": \"Retrieving group name from Thorlib.group()\", \"username\": \"akhileshbadhri\", \"post_text\": \"Hello All,\\n\\nThe current topology of our THOR cluster on HPCC is as follows:-\\ncluster - thordev01\\n queue - thorXX_dev01\\n queue - thorXX_dev02\\n\\nWhen i execute the code "lib_thorlib.Thorlib.group();" on ECL IDE, I get an error \\n"Error: System error: -1: getGroupName(): ambiguous groups thorXX_dev01, thorXX_dev02".\\n\\nIs there a way to get a single queue / group name in such a scenario? ELSE\\nCan you help with some ECL code to get a single queue / group name?\\n\\nI need this queue name / group name for file spraying.\\n\\nThanks and regards,\\nAkhilesh Badhri.\", \"post_time\": \"2018-05-02 09:13:22\" },\n\t{ \"post_id\": 22133, \"topic_id\": 5643, \"forum_id\": 8, \"post_subject\": \"Re: Occasional Roxie Error 6.2.4-1\", \"username\": \"richardkchapman\", \"post_text\": \"Roxie doesn't log what it writes - but you'd probably be better off tracing what you receive anyway.\\n\\nWhat led you to think that the result from Roxie was corrupt (as opposed to some bug in your core library, for example)?\\n\\nRichard\", \"post_time\": \"2018-06-18 08:51:34\" },\n\t{ \"post_id\": 22123, \"topic_id\": 5643, \"forum_id\": 8, \"post_subject\": \"Re: Occasional Roxie Error 6.2.4-1\", \"username\": \"daviddasher\", \"post_text\": \"Hello\\n\\nNo tracing currently, only in our core library which was in the original message. Is there anything we can turn on in the Roxie config that would help?\\n\\nThanks\\n\\nD\", \"post_time\": \"2018-06-15 13:21:12\" },\n\t{ \"post_id\": 22113, \"topic_id\": 5643, \"forum_id\": 8, \"post_subject\": \"Re: Occasional Roxie Error 6.2.4-1\", \"username\": \"richardkchapman\", \"post_text\": \"Do you have any tracing that would tell you what the xml returned from Roxie is when you get these errors?\", \"post_time\": \"2018-06-15 12:47:12\" },\n\t{ \"post_id\": 22103, \"topic_id\": 5643, \"forum_id\": 8, \"post_subject\": \"Re: Occasional Roxie Error 6.2.4-1\", \"username\": \"tlhumphrey2\", \"post_text\": \"The error message gives a line number and location. Can you include the code, please?\", \"post_time\": \"2018-06-15 12:10:30\" },\n\t{ \"post_id\": 22093, \"topic_id\": 5643, \"forum_id\": 8, \"post_subject\": \"Re: Occasional Roxie Error 6.2.4-1\", \"username\": \"daviddasher\", \"post_text\": \"Morning Richard\\n\\nThe "HPCCGetDescStatsPaged" is our Roxie query name which is being called from our internal data access library.\\n\\nThis only happens when getting xml from Roxie. It's not linked to one roxie query as it can happen on any.\\n\\nThanks\\n\\nDavid\", \"post_time\": \"2018-06-15 08:01:06\" },\n\t{ \"post_id\": 22083, \"topic_id\": 5643, \"forum_id\": 8, \"post_subject\": \"Re: Occasional Roxie Error 6.2.4-1\", \"username\": \"richardkchapman\", \"post_text\": \"I'm not sure that this error is coming from our system - I searched the sources (and the brains of a few developers) for the string "HPCCGetDescStatsPaged" and it does not appear.\\n\\nYou say it's caused by a resultname being blank? Would that be in the XML from roxie, or from ESP ?\", \"post_time\": \"2018-06-15 07:56:43\" },\n\t{ \"post_id\": 22073, \"topic_id\": 5643, \"forum_id\": 8, \"post_subject\": \"Occasional Roxie Error 6.2.4-1\", \"username\": \"daviddasher\", \"post_text\": \"Hello \\n\\nOur operational team are reporting about 200 errors per day from our Roxie service where the result appears to be corrupt or as you can see from the error below a blank where we should get a resultname. When we try this directly in ESP or IDE we have never been able to replicate it. \\n\\nSent via DB Mail DateTime: 14/06/2018 13:22:57 SingleId: 0 MultiId: 106290 MultiUserId: 1413055 System.Xml.XmlException: Name cannot begin with the ' ' character, hexadecimal value 0x20. Line 18, position 40. at Cpl.Data.Hpcc.GetData(String MethodName, NameValueCollection Parameters, NameValueCollection ColumnMapping, String[] DataSetNames) at Cpl.Web.Pages.Base.PageAuthenticated.HPCCGetDescStatsPaged(Int32 MultiUserId, Int32 CurrentPageIndex, Int32 PageSize, String SortMode, String Filter)
\\n\\nAny help would be appreciated.\\n\\nThanks\\n\\nDavid\", \"post_time\": \"2018-06-14 13:48:03\" },\n\t{ \"post_id\": 22303, \"topic_id\": 5703, \"forum_id\": 8, \"post_subject\": \"Re: Searching for an Archived WUID via ECL/c++?\", \"username\": \"hellowangzhi\", \"post_text\": \"Thank you!\", \"post_time\": \"2018-07-04 02:02:52\" },\n\t{ \"post_id\": 22273, \"topic_id\": 5703, \"forum_id\": 8, \"post_subject\": \"Re: Searching for an Archived WUID via ECL/c++?\", \"username\": \"JimD\", \"post_text\": \"Archived workunits are stored on disk in the folder specified in your configuration. Each WU is a folder containing one or more files. One of these files is an ECL Archive named <wuid>.XML\\n\\nThe default is:\\n /var/lib/HPCCSystems/hpcc-data/sasha/Archive/WorkUnits\\n\\nYou could use find or grep or a combination of the two. The following example returns all WUIDs containing the word JOIN:\\n\\n egrep -lir /var/lib/HPCCSystems/hpcc-data/sasha/Archive/WorkUnits --include=*.{xml,ecl,cpp} "JOIN"
\\n\\nIf you want to run a linux command using ECL, you can try:\\nSTD.System.Util.CmdProcess
\\n\\nHTH,\\nJim\", \"post_time\": \"2018-07-03 19:16:15\" },\n\t{ \"post_id\": 22243, \"topic_id\": 5703, \"forum_id\": 8, \"post_subject\": \"Searching for an Archived WUID via ECL/c++?\", \"username\": \"newportm\", \"post_text\": \"Morning,\\n\\nI was wondering if there is a way to search through archived work units and look for specific code that was executed. I know this can be done manually in ecl watch if you know the date that the query was executed on. I'm interested in programmatically adding my own filters for finding the work units and search criteria that will find archived ecl. Even something that I can return all the archived ecl in the xml format and then I can do a search after I have saved those search results to a dataset(s).\\n\\nIn essence I'm looking for bwr code that is not saved to my machine. \\n\\nThanks Tim\", \"post_time\": \"2018-07-03 12:52:57\" },\n\t{ \"post_id\": 22383, \"topic_id\": 5733, \"forum_id\": 8, \"post_subject\": \"Re: Where template language is used?\", \"username\": \"rtaylor\", \"post_text\": \"hellowangzhi,\\n\\nThe Template Language is covered in our online eLearning course: \\n Applied ECL - ECL Code Generation Tools \\n\\nAnd that is available here:\\n https://learn.lexisnexis.com/hpcc\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-07-09 13:13:37\" },\n\t{ \"post_id\": 22323, \"topic_id\": 5733, \"forum_id\": 8, \"post_subject\": \"Where template language is used?\", \"username\": \"hellowangzhi\", \"post_text\": \"Where template language is used? I'm learning the ECLlanguage.I don't understand how to use it. Please help me. thank you!\", \"post_time\": \"2018-07-05 02:16:23\" },\n\t{ \"post_id\": 22333, \"topic_id\": 5743, \"forum_id\": 8, \"post_subject\": \"Where template language is used?\", \"username\": \"hellowangzhi\", \"post_text\": \"Where template language is used? I'm learning the ECLlanguage.I don't understand how to use it. Please help me. thank you!\", \"post_time\": \"2018-07-05 02:17:09\" },\n\t{ \"post_id\": 22413, \"topic_id\": 5753, \"forum_id\": 8, \"post_subject\": \"Re: What is the superfile?\", \"username\": \"rtaylor\", \"post_text\": \"hellowangzhi,\\n\\nSuperfiles are covered in our online eLearning course: \\nAdvanced ECL (Part 2)\\n\\nAnd that is available here:\\nhttps://learn.lexisnexis.com/hpcc\\n\\nThey are also discussed in the Programmer Guide, available for download here:\\nhttps://hpccsystems.com/training/documentation/learning-ecl\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-07-09 14:10:52\" },\n\t{ \"post_id\": 22343, \"topic_id\": 5753, \"forum_id\": 8, \"post_subject\": \"What is the superfile?\", \"username\": \"hellowangzhi\", \"post_text\": \"What is the superfile? What is the difference between it and the file?\", \"post_time\": \"2018-07-05 09:12:48\" },\n\t{ \"post_id\": 22513, \"topic_id\": 5773, \"forum_id\": 8, \"post_subject\": \"Re: using #CONSTANT value in template\", \"username\": \"rtaylor\", \"post_text\": \"KLB,\\n\\nThe short answer is no, the only way to do what you want is to define a constant and edit that constant immediately before you run the BWR. That definition can be separate from the template language code you want to run, but it must be a compile-time constant. \\n\\nHere's the way I would have written your test function to demonstrate this:IMPORT $;\\nEXPORT TestFunction() := function\\n\\n #DECLARE(ndx);\\n #SET(ndx, 1);\\n #DECLARE(rtn);\\n #SET(rtn, '0');\\n\\n #LOOP\\n #IF (%ndx% > $.max_times) \\n #BREAK\\n #ELSE\\n #APPEND(rtn,',' + %'ndx'% );\\n #END\\n #SET(ndx, %ndx% + 1);\\n #END \\n\\n return %'rtn'%;\\nend;
I added a separate rtn symbol to emulate the code generation that Template language is designed to do. Note that the max_times is now outside the FUNCTION structure and in a separate EXPORT definition:\\nEXPORT max_times := 10;
It could be anywhere else in your repository, but I put mine in the same folder as the function. Then the BWR code looks like this:\\nIMPORT Test;\\n//MODIFY test.max_times BEFORE RUNNING THIS\\noutput(test.TestFunction(), named('result'));
\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-07-12 14:48:02\" },\n\t{ \"post_id\": 22503, \"topic_id\": 5773, \"forum_id\": 8, \"post_subject\": \"Re: using #CONSTANT value in template\", \"username\": \"KLB\", \"post_text\": \"I meant to add, what I am really after is a way to include a constant in a BWR that will allow the code that it runs to compile conditionally based on that constant. We don't want to have to modify and sandbox the actual code each time, we'd like to just modify the BWR that runs it. The constant will determine how many times the #LOOP runs. Is that possible?\", \"post_time\": \"2018-07-12 10:02:51\" },\n\t{ \"post_id\": 22493, \"topic_id\": 5773, \"forum_id\": 8, \"post_subject\": \"Re: using #CONSTANT value in template\", \"username\": \"KLB\", \"post_text\": \"Thanks Richard. I think what you said in another thread just made me think it could be used at compile time in Thor...\\n\\nhttps://hpccsystems.com/bb/viewtopic.php?f=8&t=1785&p=7841&hilit=%23constant+constant+stored#p7841\\n\\nWhen running interactively in Thor, the #STORED takes effect (it's basically a compile-time tool) and you see its value as the result.
\", \"post_time\": \"2018-07-11 20:04:10\" },\n\t{ \"post_id\": 22483, \"topic_id\": 5773, \"forum_id\": 8, \"post_subject\": \"Re: using #CONSTANT value in template\", \"username\": \"rtaylor\", \"post_text\": \"KLB,\\n\\nThat "Constant expression expected" error is asking for a compile-time constant, not a run-time constant. \\n\\nTemplate language generates ECL code, and that generated ECL is then parsed and translated into the appropriate C++ for the job, which is then compiled into the object code that executes on each node. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-07-11 18:03:54\" },\n\t{ \"post_id\": 22463, \"topic_id\": 5773, \"forum_id\": 8, \"post_subject\": \"using #CONSTANT value in template\", \"username\": \"KLB\", \"post_text\": \"I would think the following should work, but I get the "Constant expression expected" error. Why would max_times not be considered a constant here?\\n\\n\\n#CONSTANT('times_Count', 25)\\n\\nTestFunction() := function\\n\\n max_times := 0: stored('times_count'); \\n\\n #DECLARE(ndx);\\n #SET(ndx, 1);\\n \\n #LOOP\\n #SET(ndx, %ndx% + 1);\\n #IF (%ndx% > max_times) \\n #BREAK\\n #END\\n #END \\n\\n return %ndx%;\\n\\nend;\\n\\n \\noutput(TestFunction(), named('result'));\\n
\", \"post_time\": \"2018-07-10 18:19:34\" },\n\t{ \"post_id\": 22853, \"topic_id\": 5833, \"forum_id\": 8, \"post_subject\": \"Re: Joining two index with different keys\", \"username\": \"jtaylor306\", \"post_text\": \"[quote="rtaylor":367avwcr]jtaylor306, but of course ECL says I have to use the keys in join condition
I know of no such restriction, so what makes you think that's the case? Are you getting some error or warning? If so, exactly what does that say?\\n\\nI could speak to your specific problem much better if I could see the actual code. Can you post more details please? At the very least, the JOIN that you're having the problem with would help.\\n\\nHTH,\\n\\nRichard\\n\\nHey Richard, \\n\\nNever even noticed anyone responded to this post. Anyways, I'm not sure what I was doing wrong when I originally wrote this post(it was my 2nd day with ECL) because I have most it working now. Thank you!\", \"post_time\": \"2018-09-07 15:21:40\" },\n\t{ \"post_id\": 22653, \"topic_id\": 5833, \"forum_id\": 8, \"post_subject\": \"Re: Joining two index with different keys\", \"username\": \"rtaylor\", \"post_text\": \"jtaylor306, but of course ECL says I have to use the keys in join condition
I know of no such restriction, so what makes you think that's the case? Are you getting some error or warning? If so, exactly what does that say?\\n\\nI could speak to your specific problem much better if I could see the actual code. Can you post more details please? At the very least, the JOIN that you're having the problem with would help.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-08-17 20:40:12\" },\n\t{ \"post_id\": 22643, \"topic_id\": 5833, \"forum_id\": 8, \"post_subject\": \"Joining two index with different keys\", \"username\": \"jtaylor306\", \"post_text\": \"I'm pulling some data that has a set of keys lets say titled: keyA and keyB. I have the second index with keys: keyZ, keyX. THe keys dont contain any of the same info so I don't want to have to use them to do my join conditional. Now suppose both data sets have a a bunch of fields, let one of those fields be titled "name." I want my join condition to be just index1.name=index2.name, but of course ECL says I have to use the keys in join condition, any way around this?\", \"post_time\": \"2018-08-17 15:02:20\" },\n\t{ \"post_id\": 22773, \"topic_id\": 5853, \"forum_id\": 8, \"post_subject\": \"Re: Is SHARED implicit in modules with only one export?\", \"username\": \"rtaylor\", \"post_text\": \"BGehalo, I'm assuming SHARED is implicitly defined in modules where there's only one export.
No, that is an incorrect assumption.\\n\\nSo are all variables in a module considered SHARED if there's only one exported attribute in that module?
No.\\n\\nFirst, ECL has NO "variables" -- they're definitions (also referred to as "attributes" by old-timers). The word "variable" implies that it can be reassigned different values at different logic points in your code, but in ECL a "definition" can only be defined once (within a given scope of visibility, and visibility scopes are specified by the keywords EXPORT and SHARED), and ECL does not have any assignment statement -- the operator := is not an assignment, but is read as "is defined as" to specify the expression the definition implements. So that this code:MyName := 42;
defines "MyName" as the value "42" and should be read as "MyName is defined as 42."\\n\\nYour real question is: Why is TestVal only visible to the share_export() function and not also to the share_export2() function? \\n\\nAnd the answer is: because it is neither SHARED nor EXPORTed, therefore it is local. The visibility scope of a local definition terminates at the end of the very next EXPORT or SHARED definition. \\n\\nTherefore, your code could be like this:SharedTest := MODULE\\n test_val := '1'; //local definition, terminates visibility at the "END;"\\n\\n EXPORT share_export() := FUNCTION\\n RETURN test_val; //always returns '1'\\n END; //the test_val above is now no longer visible, so its name may be re-used\\n\\n test_val := '42'; //local definition, terminates visibility at the "END;"\\n \\n EXPORT share_export2() := FUNCTION\\n RETURN test_val; //always returns '42'\\n END; \\n\\nEND;\\n\\nSharedTest.share_export(); //returns '1'\\nSharedTest.share_export2(); //returns '42'
\\nSo it's really all about scoping and visibility, a form of less formal "encapsulation" when compared to the FUNCTION and MODULE structures that are specifically designed for encapsulation purposes.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-08-27 19:32:17\" },\n\t{ \"post_id\": 22763, \"topic_id\": 5853, \"forum_id\": 8, \"post_subject\": \"Is SHARED implicit in modules with only one export?\", \"username\": \"BGehalo\", \"post_text\": \"Came across this today and am trying to understand why. I'm assuming SHARED is implicitly defined in modules where there's only one export.\\n\\nThis works:\\nSharedTest := MODULE\\n\\ttest_val := '1';\\n\\n\\tEXPORT share_export() := FUNCTION\\n\\t\\tRETURN test_val;\\n\\tEND;\\n\\t\\nEND;\\n\\nSharedTest.share_export();
\\n\\nThis doesn't:\\nSharedTest := MODULE\\n\\ttest_val := '1';\\n\\n\\tEXPORT share_export() := FUNCTION\\n\\t\\tRETURN test_val;\\n\\tEND;\\n\\t\\n\\tEXPORT share_export2() := FUNCTION\\n\\t\\tRETURN test_val;\\n\\tEND;\\n\\nEND;\\n\\nSharedTest.share_export();\\n
\\n\\nOf course if I add "SHARED" to test_val it will work in both examples.\\nSo are all variables in a module considered SHARED if there's only one exported attribute in that module?\", \"post_time\": \"2018-08-27 19:01:59\" },\n\t{ \"post_id\": 33343, \"topic_id\": 5863, \"forum_id\": 8, \"post_subject\": \"Re: Pandas Module Not found\", \"username\": \"jwilt\", \"post_text\": \"You'll need help from someone who manages the servers your HPCC instance runs on, e.g., Linux admin.\\nThe easiest first step is to have them (standard) install the modules you need on the eclagent (hthor) server.\\nOnce this is done, referencing a Python module ***when running on hthor*** should resolve (will not work on Thor clusters).\\nHTH.\", \"post_time\": \"2021-04-09 01:15:59\" },\n\t{ \"post_id\": 28183, \"topic_id\": 5863, \"forum_id\": 8, \"post_subject\": \"Re: Pandas Module Not found\", \"username\": \"harshdesai\", \"post_text\": \"HI All,\\nDid we find any solution how to install Python modules.\\n\\n\\nRegards\\nHarsh\", \"post_time\": \"2019-11-19 11:22:16\" },\n\t{ \"post_id\": 22993, \"topic_id\": 5863, \"forum_id\": 8, \"post_subject\": \"Re: Pandas Module Not found\", \"username\": \"abhijeetnaib1993\", \"post_text\": \"Thank You Lilli, I have dropped you a email .\\nPlease check.\\n\\nThanks\\nAbhijeet\", \"post_time\": \"2018-09-13 06:39:33\" },\n\t{ \"post_id\": 22963, \"topic_id\": 5863, \"forum_id\": 8, \"post_subject\": \"Re: Pandas Module Not found\", \"username\": \"lily\", \"post_text\": \"Hi abhijeetnaib1993,\\n\\nI will be glad to help you. Can you please send me your contact info to training@hpccsystems.com so that we can get in touch with you directly?\\nPlease add attention to Lili. Thank you.\\n\\nBest,\\nLili\", \"post_time\": \"2018-09-11 20:40:45\" },\n\t{ \"post_id\": 22833, \"topic_id\": 5863, \"forum_id\": 8, \"post_subject\": \"Pandas Module Not found\", \"username\": \"abhijeetnaib1993\", \"post_text\": \"Hi All,\\n\\nWe are trying to run python logistic model through pickle file in our HPCC environment.\\nTo load the pickle file We need sklearn and to run rest of the code we need pandas.\\n\\nI am able to successfully import python , numpy etc. but the program fails in calling pandas and sklearn.\\nI have explicitly installed the libraries in /usr/bin/python which is the default python path and they are callable from there.\\n\\nI have even tried calling libraries from external file.\\n\\nbelow is the code for same.\\n\\nrec:= RECORD\\n STRING V_F_ALL_REL_SAS_CUST_REF_CODE;\\n STRING PRIORITY_TAG;\\n STRING SMART_PHONE_TAG;\\n STRING AGE;\\n STRING OCCUP_ALL_NEW;\\n STRING CASA_TAG;\\n STRING VINTAGE_N;\\n STRING TOTAL_EP_TAG;\\n STRING TOTAL_CP_TAG;\\n STRING DIGITAL_ACTIVE;\\n STRING RBI_CLASS;\\n STRING FINAL_WORTH;\\n STRING ENGAGEMENT_TAG;\\nend;\\nT1 := dataset('~test::bank_customer_jul18_dist_py.csv',rec,csv(heading(1),sepataor('|')));\\n\\n\\nstring tag(string a):= Import(Python,'/var/lib/HPCCSystems/mydropzone/model_testing.Preditctions');\\nchoosen(tag(T1),1000);\\n\\nbut this also fails with module not found pandas.\\n\\nPlease let me know how to resolved this issue\", \"post_time\": \"2018-08-31 12:03:44\" },\n\t{ \"post_id\": 22953, \"topic_id\": 5873, \"forum_id\": 8, \"post_subject\": \"Re: System error: 6003\", \"username\": \"rtaylor\", \"post_text\": \"Katy,Has anyone come across an error about unused variables?
More specific details, please. What exactly is the error message you're getting? What code of yours is producing the error?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-09-11 19:49:08\" },\n\t{ \"post_id\": 22843, \"topic_id\": 5873, \"forum_id\": 8, \"post_subject\": \"System error: 6003\", \"username\": \"KatyChow\", \"post_text\": \"Has anyone come across an error about unused variables? My code is currently calling into a function writen in C++.... Not sure what to do about this. \\n\\nThanks!!!\\n\\nKaty\", \"post_time\": \"2018-09-04 18:59:42\" },\n\t{ \"post_id\": 23023, \"topic_id\": 5893, \"forum_id\": 8, \"post_subject\": \"Re: Converting field types of records\", \"username\": \"rtaylor\", \"post_text\": \"jtaylor306,So is there no way to alter the original field type of record?
That depends on whether the file was sprayed to your cluster or written to your cluster using ECL.
\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-09-13 14:21:20\" },\n\t{ \"post_id\": 23003, \"topic_id\": 5893, \"forum_id\": 8, \"post_subject\": \"Re: Converting field types of records\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nReading:\\n\\nBasically, I need to change the way the field is defined within the record structure, not necessarily change each value 1 at a time.\\n\\nYou can run a one-off PROJECT (remember to use LOCAL qualifier) to convert to the new structure, OUTPUT it to a differently named logical file. Then use either ECLWatch or the file manipulation functions in the standard Library (STD) to replace the existing file with the one in the new format. Remember to check in your change to the RECORD structure, and there you go, one pristine new file ready to go.\\n\\nYours\\nAllan\", \"post_time\": \"2018-09-13 07:29:23\" },\n\t{ \"post_id\": 22973, \"topic_id\": 5893, \"forum_id\": 8, \"post_subject\": \"Re: Converting field types of records\", \"username\": \"jtaylor306\", \"post_text\": \"[quote="JimD":14qwalk3]There are two topics in the ECL Language Reference that may help:\\nType Casting\\nhttp://cdn.hpccsystems.com/releases/CE- ... df#page=54\\n\\nThe TRANSFER function\\nhttp://cdn.hpccsystems.com/releases/CE- ... f#page=354\\n\\nHTH,\\n\\nJim\\nThanks. So is there no way to alter the original field type of record? Or would the best way be to copy the entire record structure, add "string myfield;" copy all values from "integer myfield" to the new string field that I've created.And then somehow delete the field "integer myfield". \\n\\nBasically, I need to change the way the field is defined within the record structure, not necessarily change each value 1 at a time.\", \"post_time\": \"2018-09-11 20:46:53\" },\n\t{ \"post_id\": 22943, \"topic_id\": 5893, \"forum_id\": 8, \"post_subject\": \"Re: Converting field types of records\", \"username\": \"JimD\", \"post_text\": \"There are two topics in the ECL Language Reference that may help:\\nType Casting\\nhttp://cdn.hpccsystems.com/releases/CE- ... df#page=54\\n\\nThe TRANSFER function\\nhttp://cdn.hpccsystems.com/releases/CE- ... f#page=354\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2018-09-11 18:40:47\" },\n\t{ \"post_id\": 22873, \"topic_id\": 5893, \"forum_id\": 8, \"post_subject\": \"Converting field types of records\", \"username\": \"jtaylor306\", \"post_text\": \"Hey,\\n\\nI have two published layouts that I cannot change from the source. When ECL tries to join these two, it complains because they both have a field variable COLUMNX, however, in one of the record structures this variable is a string and in the other, it is an integer. I'm trying to find the proper project transform syntax to accomplish converting the entire field to a different datatype(say the integer to a string.) Is this possible? If so please point me in the direction of some good material to help, looking at the ECL manual didn't help much for me.\", \"post_time\": \"2018-09-10 13:19:53\" },\n\t{ \"post_id\": 22983, \"topic_id\": 5903, \"forum_id\": 8, \"post_subject\": \"Anyone written or know of a ECL 'Prettyfier'\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nWe're tightening up our dept. standards for ECL. (standardized indention; no single letter attribute names etc)\\n\\nIs there out there in the HPCC community a 'Prettyfier' for ECL? Preferably with configurable settings to conform to. There is for every other language.\\n\\nNot bothered about the technology stack used, I just don't went to re-invent the wheel.\\n\\nYours\\nAllan\", \"post_time\": \"2018-09-12 14:10:15\" },\n\t{ \"post_id\": 23133, \"topic_id\": 5913, \"forum_id\": 8, \"post_subject\": \"Re: ECL OUTPUT\", \"username\": \"vzeufack\", \"post_text\": \"OK Thanks!\", \"post_time\": \"2018-09-28 01:41:00\" },\n\t{ \"post_id\": 23113, \"topic_id\": 5913, \"forum_id\": 8, \"post_subject\": \"Re: ECL OUTPUT\", \"username\": \"rtaylor\", \"post_text\": \"vzeufack,I still do not know why I was getting memory error when using THOR as parameter.
It's likely due to the fact that the THOR option on OUTPUT doesn't have any second "layout" parameter (https://hpccsystems.com/training/documentation/ecl-language-reference/html/OUTPUT_Workunit_Files.html). You "confused" the compiler. \\n\\nIt would probably have worked if you'd coded it like this:
OUTPUT ('~file_scope::name', THOR)
\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-09-27 13:32:57\" },\n\t{ \"post_id\": 23093, \"topic_id\": 5913, \"forum_id\": 8, \"post_subject\": \"Re: ECL OUTPUT\", \"username\": \"vzeufack\", \"post_text\": \"Hi rtaylor,\\n\\nI fortunately found the issue. The issue was about the way I outputed the dataset. I was using the following code:\\n\\n“OUTPUT (“~file_scope::name”, layout, THOR”);\\n\\nI just changed it to \\n\\nOUTPUT (“~file_scope::name”, layout, CSV(HEADING(1)));\\n\\nIt works fine now. I still do not know why I was getting memory error when using THOR as parameter.\", \"post_time\": \"2018-09-27 01:43:45\" },\n\t{ \"post_id\": 23083, \"topic_id\": 5913, \"forum_id\": 8, \"post_subject\": \"Re: ECL OUTPUT\", \"username\": \"rtaylor\", \"post_text\": \"vzeufack,\\n\\nIt would help if you could post the code that generates that error message so we could see the exact syntax you're using.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-09-26 19:38:22\" },\n\t{ \"post_id\": 23033, \"topic_id\": 5913, \"forum_id\": 8, \"post_subject\": \"ECL OUTPUT\", \"username\": \"vzeufack\", \"post_text\": \"I have an issue to print the count or the output of a dataset I just sprayed. The dataset is 174MB and is made of 46 fields. After uploading the file and spraying, I created a layout file for it. Then I tried to show either a count or the full output exactly like what is done in the HPCC Data Tutorial. But, I keep getting the following error:\\n\\nSystem error: 1300: Graph graph1[1], diskcount[2]: SLAVE #1 [192.168.159.128:20100]: Memory limit exceeded: current 1, requested 5552, limit 2992 active(1) heap(1/3008), Memory limit exceeded: current 1, requested 5552, limit 2992 active(1) heap(1/3008) - handling file: /var/lib/HPCCSystems/hpcc-data/thor/cs4322/project/voters_8.csv._1_of_1 - caused by (1300, Memory limit exceeded: current 1, requested 5552, limit 2992 active(1) heap(1/3008))\\n\\nHow can solve that?\", \"post_time\": \"2018-09-25 16:01:44\" },\n\t{ \"post_id\": 23333, \"topic_id\": 5953, \"forum_id\": 8, \"post_subject\": \"Re: Record Length in Data Sparying\", \"username\": \"rtaylor\", \"post_text\": \"Shayan,\\n\\nYour problem is your DATASET definition is telling the compiler that it's a "thor" file -- but it's not. It's a CSV file, so your DATASET should be:\\nEXPORT Hospitals := dataset('~online::ss::project::hospitals',Layout_Hospitals,CSV);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-10-12 15:12:45\" },\n\t{ \"post_id\": 23313, \"topic_id\": 5953, \"forum_id\": 8, \"post_subject\": \"Re: Record Length in Data Sparying\", \"username\": \"sh.shmss\", \"post_text\": \"Actually, I'm using the code that is introduced for defining data in that course:\\n\\n\\n\\nLayout_Hospitals := record\\nstring hospital_name;\\nstring provider_number;\\nstring state;\\nstring measure_name;\\nstring number_of_discharges;\\nstring footnote;\\nstring excess_readmission_ratio;\\nstring predicted_readmission_rate;\\nstring expected_readmission_rate;\\nstring number_of_readmissions;\\nstring start_date;\\nstring end_date;\\nend;\\n\\nEXPORT Hospitals := dataset('~online::ss::project::hospitals',Layout_Hospitals,thor); \\n\\n
\\n\\nAnd this is the dataset I've already sprayed:\\nhttps://data.medicare.gov/Hospital-Comp ... /9n3s-kdb3\\n\\nAs always, I appreciate your help.\\n\\nShayan\", \"post_time\": \"2018-10-11 18:50:08\" },\n\t{ \"post_id\": 23293, \"topic_id\": 5953, \"forum_id\": 8, \"post_subject\": \"Re: Record Length in Data Sparying\", \"username\": \"rtaylor\", \"post_text\": \"Shayan,\\n\\nHave you taken the online Intro to ECL (Part 1) course (https://learn.lexisnexis.com/hpcc)? Spraying and defining files is covered there.\\n\\nPlease include your code along with the error message you get so I can see what syntax you tried. Since you were spraying a variable-length record file, you need a RECORD structure that does not specify the length of the records. The error message is telling you that the record length you specified is incorrect.\\n\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-10-11 16:40:08\" },\n\t{ \"post_id\": 23273, \"topic_id\": 5953, \"forum_id\": 8, \"post_subject\": \"Re: Record Length in Data Sparying\", \"username\": \"sh.shmss\", \"post_text\": \"Thank you for your reply, Richard. I managed to spray the file. Now, I have problem defining and locating the data through dataset command. If I don't mention the record length in my layout, I'll get the following error:\\n\\nError: System error: 1301: Pool memory exhausted: pool id 4194304 exhausted, requested 6473 heap(1/4294967295) global(1/1216) (in Disk Read G1 E2) (0, 0), 1301, \\n\\nIf I define the record length for my records, I'll get the following error:\\n\\nError: System error: 1: File /var/lib/HPCCSystems/hpcc-data/thor/online/ss/project/hospitals._1_of_1 size is 313263 which is not a multiple of 177 (0, 0), 1, \\n\\nNo need to mention that I can't seem to find a total record size which is a factor of 313263 (1, 3, 9, 34807, 104421, 313263). \\n\\nI'd really appreciate if you could assist me with defining my dataset.\\n\\nThanks,\\nShayan\", \"post_time\": \"2018-10-10 19:03:45\" },\n\t{ \"post_id\": 23193, \"topic_id\": 5953, \"forum_id\": 8, \"post_subject\": \"Re: Record Length in Data Sparying\", \"username\": \"rtaylor\", \"post_text\": \"Shayan,\\n\\nJSON and CSV files are inherently variable-length. That means you don't need to know the record length, you only need to know the maximum length of the longest record. But if you don't know that and the spray fails, you can just increase that (the default max is 8K) to whatever value you want -- the largest max length I've seen successfully used was 10 million bytes.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-10-05 18:41:00\" },\n\t{ \"post_id\": 23183, \"topic_id\": 5953, \"forum_id\": 8, \"post_subject\": \"Record Length in Data Sparying\", \"username\": \"sh.shmss\", \"post_text\": \"Hello,\\n\\nHow can we spray an external file (JSON or CSV) without knowing the record length of that file. Is there any way to find out the record length of such a file?\\n\\nThanks,\\nShayan\", \"post_time\": \"2018-10-05 16:15:12\" },\n\t{ \"post_id\": 23413, \"topic_id\": 6013, \"forum_id\": 8, \"post_subject\": \"Re: WsECL output\", \"username\": \"vzeufack\", \"post_text\": \"Thanks very much Jim!\", \"post_time\": \"2018-10-24 20:32:38\" },\n\t{ \"post_id\": 23403, \"topic_id\": 6013, \"forum_id\": 8, \"post_subject\": \"Re: WsECL output\", \"username\": \"JimD\", \"post_text\": \"It is probably better to handle case sensitivity in your ECL code.\\n \\nLet the user enter any case they want and convert their input to uppercase in the code using the Std.Str.toUpperCase method in the Standard Library.\\n\\nSee http://cdn.hpccsystems.com/releases/CE- ... f#page=106\\n\\nHTH,\\nJim\", \"post_time\": \"2018-10-24 19:29:12\" },\n\t{ \"post_id\": 23393, \"topic_id\": 6013, \"forum_id\": 8, \"post_subject\": \"Re: WsECL output\", \"username\": \"vzeufack\", \"post_text\": \"Thanks very much Jim!\\n\\nWe are going to dig deeper into that.\\nDo we still play with the FORMAT function if we want some fields to not be mandatory. We actually have many research filters but the user must fill them all and in capital letters. We instead would Iike some fields to not be mandatory and also not enforcing capital letters. Does the FORMAT function handles that as well?\\n\\nBest,\", \"post_time\": \"2018-10-24 18:31:55\" },\n\t{ \"post_id\": 23383, \"topic_id\": 6013, \"forum_id\": 8, \"post_subject\": \"Re: WsECL output\", \"username\": \"JimD\", \"post_text\": \"In ECL, you can use the FORMAT option of the STORED workflow service. The example below shows a couple of variations of droplists:\\n \\nSTRING ch1 := 'ban' : STORED('ch1', FORMAT(SELECT('apple=app,pear,*banana=ban')));\\n //banana is default\\n STRING ch2 := '' : STORED('ch2', FORMAT(SELECT(',apple=app,pear,banana=ban')));\\n //starts empty, no specified default
\\n\\nSee http://cdn.hpccsystems.com/releases/CE- ... f#page=380 for more details.\\n \\nHTH,\\n\\nJim\", \"post_time\": \"2018-10-24 13:41:08\" },\n\t{ \"post_id\": 23373, \"topic_id\": 6013, \"forum_id\": 8, \"post_subject\": \"WsECL output\", \"username\": \"vzeufack\", \"post_text\": \"Hi,\\n\\nI, with group members working on a LexisNexis project, successfully used WsECL to have a GUI from which user can input data to search. Now, we would like to customize the GUI like having a drop down menu instead of an editText for example. How do we do that please? or How to customize WsECL forms? \\n\\nBest,\", \"post_time\": \"2018-10-24 12:35:05\" },\n\t{ \"post_id\": 23453, \"topic_id\": 6023, \"forum_id\": 8, \"post_subject\": \"Re: APPLY()\", \"username\": \"lpezet\", \"post_text\": \"Ticket created: https://track.hpccsystems.com/browse/HPCC-20894\\n(not sure how to add you as watcher...sorry)\\n\\nThanks Richard!\", \"post_time\": \"2018-11-02 02:25:33\" },\n\t{ \"post_id\": 23451, \"topic_id\": 6023, \"forum_id\": 8, \"post_subject\": \"Re: APPLY()\", \"username\": \"lpezet\", \"post_text\": \"JIRA seems to be down right now. But I'll check again later and create a ticket.\\n\\nThanks!\", \"post_time\": \"2018-11-01 21:41:34\" },\n\t{ \"post_id\": 23433, \"topic_id\": 6023, \"forum_id\": 8, \"post_subject\": \"Re: APPLY()\", \"username\": \"rtaylor\", \"post_text\": \"lpezet,\\n\\nI just tested your code on 6.4.12-1 (3-node Thor) and 6.4.28-1 (VM). \\n\\nIn both environments, on Thor, the syntax checks but gets a runtime error: \\nSystem error: 0: Graph graph1[1], apply[3]: SLAVE #1 [10.173.248.8:20100]: Global child graph? : Global acts = Graph(5): [diskwrite(7)]\\n \\nIn both environments, it does run on hThor, but the ECL IDE and ECL Watch both show only a single result for the WU, but the ECL Watch Logical Files page shows all three files written to disk, each containing the "C" data, only.\\n\\nThe APPLY docs clearly state (bold added by me): \\n"actionlist A comma-delimited list of the operations to perform on the dataset. Typically, this is an external service (see SERVICE Structure). This may not be an OUTPUT or any function that triggers a child query." \\n\\nSo, I have a couple of problems with this code:
\\nI'd suggest submitting a JIRA report referencing this thread (and make me a Watcher, too, please).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-10-30 18:59:19\" },\n\t{ \"post_id\": 23423, \"topic_id\": 6023, \"forum_id\": 8, \"post_subject\": \"APPLY()\", \"username\": \"lpezet\", \"post_text\": \"Hello!\\n\\nI'd like to understand a behavior I'm seeing when using APPLY and OUTPUTing to file.\\nI wanted to see if I could use it to load data into logical files (like APPLY( ds, OUTPUT(std.File.ExternalLogicalFilename(...), ...) type thing). Apparently not, but I'd like to understand nonetheless.\\n\\nHere's the behavior I'm seeing in a snippet:\\n\\nlayout := { STRING letter; };\\nds := DATASET([{'A'},{'B'},{'C'}], layout);\\nAPPLY(ds, OUTPUT(DATASET([letter],layout),,'~temp::letters::'+letter, OVERWRITE));\\n
\\nBasically I'm trying to create 3 logical files ('~temp::letters::a', '~temp::letters::b', and '~temp::letters::c'), each with, well, its own letter. So '~temp::letters::a' should just contain "a" as "letter" and '~temp::letters::b' just "b", etc.\\n\\nSomehow, and that's what I'd like to understand, all logical files contain the letter "c". It's interesting that '~temp::letters::'+letter is well interpreted while DATASET([letter],layout) got somehow hijacked.\\nCould someone please shed some light on this?\\n\\n\\nThank you!\", \"post_time\": \"2018-10-30 02:20:58\" },\n\t{ \"post_id\": 23613, \"topic_id\": 6033, \"forum_id\": 8, \"post_subject\": \"Re: WsECL chart output\", \"username\": \"gsmith\", \"post_text\": \"Its not so much about the visualizer, but rather more about exposing ESP / ECLWatch outside of your firewall. The same is true for any roxie services you may have created.\\n\\nIf your target audience are in-house employees with access to the HPCC-Platform then all is good, but if you want to expose any of the above (ESP, Roxie, ECLWatch) outside of your firewall then care must be taken to ensure only authorized folks can access them.\", \"post_time\": \"2018-11-17 06:12:11\" },\n\t{ \"post_id\": 23603, \"topic_id\": 6033, \"forum_id\": 8, \"post_subject\": \"Re: WsECL chart output\", \"username\": \"vzeufack\", \"post_text\": \"So, if I understand your answers, and from the tutorial link you posted, \\n1) The Visualizer can only serve as a preview tool\\n2) To externalize our ECL work we should create a traditional website which will be linked to the ECL cluster through the JavaScript code.\\nIs that right?\", \"post_time\": \"2018-11-17 02:55:53\" },\n\t{ \"post_id\": 23463, \"topic_id\": 6033, \"forum_id\": 8, \"post_subject\": \"Re: WsECL chart output\", \"username\": \"gsmith\", \"post_text\": \"You will need to setup a traditional web stack to safely allow external users to (ultimately) make WsECL / Roxie calls. The key pieces relating to your questions are:\\n1. A proxy server that is available to the public and is responsible for ensuring that only "trusted" requests are serviced (internally it will then make the actual calls to WsECL).\\n2. A new web page which hosts the charts in question. Initially this page would be developed targeting the WsECL directly, but once the proxy is in place, the same requests would be redirected to the new proxy server (along with any additional authentication tokens etc.).\\n\\nFor #2 above, its actually a lot easier than it sounds (I like to think of the visualizer bundle as a prototyping tool for actual product) - there are some good examples in the tutorials here: https://github.com/hpcc-systems/Visuali ... /Tutorials \\n\\nPlease reach out if you have any questions...\", \"post_time\": \"2018-11-02 08:44:39\" },\n\t{ \"post_id\": 23443, \"topic_id\": 6033, \"forum_id\": 8, \"post_subject\": \"WsECL chart output\", \"username\": \"vzeufack\", \"post_text\": \"Hi,\\n\\nMe and my group members working on a LexisNexis Project have two issues with the user side of the application we are trying to build. Basically, it is going to be a search application. The user will fill the provided fields and get an output. So,\\n\\n1) How to make our output from WsECL available to use by external users on their computers?\\n\\n2) How do we display charts on WsECL. We are using the Visualizer to make charts but WsECL seems not to be able to display charts.\\n\\nBest regards,\", \"post_time\": \"2018-11-01 18:01:21\" },\n\t{ \"post_id\": 23533, \"topic_id\": 6073, \"forum_id\": 8, \"post_subject\": \"Re: You do not have permission to directly access datafiles\", \"username\": \"jwilt\", \"post_text\": \"Created a ticket:\\nhttps://track.hpccsystems.com/browse/HPCC-20927\", \"post_time\": \"2018-11-14 05:29:15\" },\n\t{ \"post_id\": 23523, \"topic_id\": 6073, \"forum_id\": 8, \"post_subject\": \"Re: You do not have permission to directly access datafiles\", \"username\": \"jwilt\", \"post_text\": \"Yes, of course that’s the obvious first thought. And it may be. \\nI’m missing in the HPCC documentation exactly where it explains how to set the proper permissions, what OUs are involved, etc. \\nThis very error message is verbatim found in the source code (easily found by a google search, not by a GitHub search). Apparently added in v6. Hard to track in the source code where it’s set. Seems like it may be an eclcc option, something about “datafile”.\\nI haven’t yet found reference to this elsewhere.\", \"post_time\": \"2018-11-09 15:36:14\" },\n\t{ \"post_id\": 23513, \"topic_id\": 6073, \"forum_id\": 8, \"post_subject\": \"Re: You do not have permission to directly access datafiles\", \"username\": \"rtaylor\", \"post_text\": \"Jim,\\n\\nSounds to me like LDAP permissions.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-11-09 13:30:16\" },\n\t{ \"post_id\": 23503, \"topic_id\": 6073, \"forum_id\": 8, \"post_subject\": \"You do not have permission to directly access datafiles\", \"username\": \"jwilt\", \"post_text\": \"Getting this error when attempting to read a logical file:\\n"You do not have permission to directly access datafiles"\\nThis is HPCC 6.4.10-1.\\n\\nIs there a configuration change or #OPTION or some other way to get around this?\\nThanks.\", \"post_time\": \"2018-11-09 01:05:43\" },\n\t{ \"post_id\": 23633, \"topic_id\": 6083, \"forum_id\": 8, \"post_subject\": \"Re: Performing multiple aggregations with different grouping\", \"username\": \"rtaylor\", \"post_text\": \"Akilesh,\\n\\nI think this is the best way to get what you want:\\nfilteredDS := DATASET([ {'abc','002','001','20181114','20181119'},\\n {'abc','001','001','20181114','20181119'},\\n {'abc','001','001','20181115','20181119'}],\\n {string field_one,\\n string field_two,\\n string field_three,\\n string field_four,\\n string field_five});\\n\\nCount_rec := Record\\n filteredDS;\\n INTEGER v_count := 0;\\nEND;\\n\\nstbl := TABLE(filteredDS,\\n {field_one,field_two,field_three,v_count := COUNT(GROUP)},\\n field_one,field_two,field_three);\\n\\nfilteredDS_cnts := JOIN(filteredDS,stbl,\\n LEFT.field_one=RIGHT.field_one AND\\n LEFT.field_two=RIGHT.field_two AND \\n LEFT.field_three=RIGHT.field_three,\\n TRANSFORM(Count_rec,\\n SELF.v_count := RIGHT.v_count,\\n SELF := LEFT));\\nfilteredDS_cnts;
\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-11-19 15:54:05\" },\n\t{ \"post_id\": 23593, \"topic_id\": 6083, \"forum_id\": 8, \"post_subject\": \"Re: Performing multiple aggregations with different grouping\", \"username\": \"bforeman\", \"post_text\": \"Hi Akilesh,\\n\\nThis should get you there:\\nfilteredDS := DATASET([ {'abc','002','001','20181114','20181119'},\\n {'abc','001','001','20181114','20181119'},\\n {'abc','001','001','20181115','20181119'}],\\n\\t\\t {string field_one,\\n string field_two,\\n string field_three,\\n string field_four,\\n string field_five});\\n\\nCount_rec := Record\\nINTEGER v_count := COUNT(GROUP);\\nEND;\\n\\nfilteredDS_cnt1 := TABLE(filteredDS,Count_rec,field_one);\\nfilteredDS_cnt2 := TABLE(filteredDS,Count_rec,field_two);\\nfilteredDS_cnt3 := TABLE(filteredDS,Count_rec,field_three);\\n\\nc1 := COUNT(filteredDS_cnt1);\\nc2 := COUNT(filteredDS_cnt2);\\nc3 := COUNT(filteredDS_cnt3);\\n\\nd := DATASET([{'Field One Count:',c1},\\n\\t\\t{'Field Two Count:',c2},\\n\\t\\t{'Field Three Count:',c3}],\\n\\t\\t{STRING20 valuetype,INTEGER val});\\n\\nOUTPUT(d);
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2018-11-16 18:34:31\" },\n\t{ \"post_id\": 23543, \"topic_id\": 6083, \"forum_id\": 8, \"post_subject\": \"Performing multiple aggregations with different grouping.\", \"username\": \"akhileshbadhri\", \"post_text\": \"Hello,\\n\\nI am trying to execute the following code:\\n\\nfilteredDS := DATASET([\\t{'abc','002','001','20181114','20181119'},\\n\\t\\t\\t\\t\\t\\t{'abc','001','001','20181114','20181119'},\\n\\t\\t\\t\\t\\t\\t{'abc','001','001','20181115','20181119'}],{string field_one,string field_two,string field_three, string field_four, string field_five});\\n\\nCount_rec := Record\\n\\tfilteredDS;\\n\\tINTEGER v_count \\t:= COUNT(GROUP(filteredDS,field_one,field_two,field_three));\\t\\t\\t\\nEND;\\n\\nfilteredDS_cnts := TABLE(filteredDS,Count_rec);\\nfilteredDS_cnts;\\n\\nUsing the above code I wish to get the count based on the grouping provided in the layout "Count_rec". So as per my requirement, for first record i should be getting v_count = 1 and for second and third records I should be getting v_count = 2. But I do not get such counts and I get v_count = 3 for all the records.\\n\\nRequest your help in achieving this. I wanted to try this approach so that the actual number of records does not get reduced in my output "filteredDS_cnts". If the number of records get reduced due to group by clause then I may have to join back "filteredDS_cnts" to my original dataset "filteredDS".\\n\\nThanks and regards,\\nAkhilesh Badhri.\", \"post_time\": \"2018-11-14 10:00:36\" },\n\t{ \"post_id\": 23583, \"topic_id\": 6093, \"forum_id\": 8, \"post_subject\": \"Re: Building Persist file error\", \"username\": \"bforeman\", \"post_text\": \"That is NOT an error, that is simply a message that notifies you that a PERSIST file was successfully built.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2018-11-16 17:46:37\" },\n\t{ \"post_id\": 23553, \"topic_id\": 6093, \"forum_id\": 8, \"post_subject\": \"Building Persist file error\", \"username\": \"mverma\", \"post_text\": \"I am running my code in ECL Watch and I get the following error:\\nBuilding PERSIST('ONLINE::MV::PERSIST::PeopleVehicles__p307348277'): It hasn't been calculated before\\n\\n/********the code**********/\\n/***** People file********/\\nPeopleLayout := RECORD\\n UNSIGNED8 id;\\n STRING15 firstname;\\n STRING25 lastname;\\n STRING15 middlename;\\n STRING2 namesuffix;\\n STRING8 filedate;\\n STRING1 gender;\\n STRING8 birthdate;\\n END;\\n\\nPeopleFile := DATASET('~ONLINE::MV::AdvECL::People',PeopleLayout,THOR);\\n\\n/******Vehicle*****/\\nVehicleLayout := RECORD\\n UNSIGNED8 personid;\\n STRING2 orig_state;\\n UNSIGNED2 year_make;\\n STRING5 make_code;\\n STRING4 vehicle_type;\\n STRING3 model;\\n STRING5 body_code;\\n STRING1 vehicle_use;\\n STRING3 major_color_code;\\n STRING3 minor_color_code;\\n UNSIGNED2 model_year;\\n STRING3 vina_series;\\n STRING3 vina_model;\\n STRING2 vina_body_style;\\n STRING36 make_description;\\n STRING36 model_description;\\n STRING25 body_style_description;\\n STRING2 number_of_cylinders;\\n STRING4 engine_size;\\n STRING1 fuel_code;\\n UNSIGNED4 vina_price;\\n STRING1 history;\\n STRING5 best_make_code;\\n STRING3 best_series_code;\\n STRING3 best_model_code;\\n STRING5 best_body_code;\\n UNSIGNED2 best_model_year;\\n STRING3 best_major_color_code;\\n STRING3 best_minor_color_code;\\n STRING8 purch_date;\\n END;\\n\\nVehicleFile := DATASET('~ONLINE::MV::AdvECL::Vehicle',VehicleLayout,THOR);\\n/************Property************/\\nPropertyLayout := RECORD\\n UNSIGNED8 personid;\\n INTEGER8 propertyid;\\n STRING10 house_number;\\n STRING10 house_number_suffix;\\n STRING2 predir;\\n STRING30 street;\\n STRING5 streettype;\\n STRING2 postdir;\\n STRING6 apt;\\n STRING40 city;\\n STRING2 state;\\n STRING5 zip;\\n UNSIGNED4 total_value;\\n UNSIGNED4 assessed_value;\\n UNSIGNED2 year_acquired;\\n UNSIGNED4 land_square_footage;\\n UNSIGNED4 living_square_feet;\\n UNSIGNED2 bedrooms;\\n UNSIGNED2 full_baths;\\n UNSIGNED2 half_baths;\\n UNSIGNED2 year_built;\\n END;\\n\\nPropertyFile := DATASET('~ONLINE::XXX::AdvECL::Property',PropertyLayout,THOR);\\n\\n/*********taxdata**************/\\nTaxdataLayout := RECORD\\n INTEGER8 propertyid;\\n STRING4 document_year;\\n UNSIGNED4 total_val_calc;\\n UNSIGNED4 land_val_calc;\\n UNSIGNED4 improvement_value_calc;\\n UNSIGNED4 assd_total_val;\\n UNSIGNED4 tax_amount;\\n UNSIGNED4 mkt_total_val;\\n UNSIGNED4 mkt_land_val;\\n UNSIGNED4 mkt_improvement_val;\\n UNSIGNED4 tax_year;\\n UNSIGNED4 land_square_footage;\\n UNSIGNED4 adjusted_gross_square_feet;\\n UNSIGNED4 living_square_feet;\\n UNSIGNED2 bedrooms;\\n UNSIGNED2 full_baths;\\n UNSIGNED2 half_baths;\\n UNSIGNED2 stories_number;\\n END;\\n\\t \\nTaxdataFile :=\\tDATASET('~ONLINE::MV::AdvECL::Taxdata',TaxdataLayout,THOR);\\n\\nCombPeopleVehicles := RECORD\\nPeopleLayout;\\nUNSIGNED1 ChildVCount;\\nDATASET(VehicleLayout) VehicleRecs{MAXCOUNT(20)};\\nEND;\\n\\nCombPeopleVehicles ParentMove(PeopleLayout L) := TRANSFORM\\n\\tSELF.ChildVCount := 0;\\n\\tSELF.VehicleRecs := [];\\n\\tSELF := L;\\n\\tEND;\\n\\t\\nParentOnly := PROJECT(PeopleFile, ParentMove(LEFT));\\n\\t\\nCombPeopleVehicles ChildMove(CombPeopleVehicles L,VehicleLayout R,INTEGER C) := TRANSFORM\\n\\t\\tSELF.ChildVCount := C;\\n\\t\\tSELF.VehicleRecs := L.VehicleRecs + R;\\n\\t\\tSELF := L;\\n\\t\\tEND;\\n\\t\\t\\n DenormPeopleVehicles := DENORMALIZE(ParentOnly, VehicleFile, LEFT.id = RIGHT.personid, ChildMove(LEFT,RIGHT,COUNTER))\\n\\t\\n\\t: PERSIST('ONLINE::MV::PERSIST::PeopleVehicles');\\n\\nOUTPUT(DenormPeopleVehicles);\", \"post_time\": \"2018-11-14 15:19:55\" },\n\t{ \"post_id\": 23573, \"topic_id\": 6101, \"forum_id\": 8, \"post_subject\": \"Re: WsELC Checkbox\", \"username\": \"bforeman\", \"post_text\": \"I'm afraid the closest thing you have is a three state checkbox. Make the definition a BOOLEAN and you can preset it to TRUE or FALSE.\\n\\nHTH,\\n\\nBob\", \"post_time\": \"2018-11-16 17:44:03\" },\n\t{ \"post_id\": 23571, \"topic_id\": 6101, \"forum_id\": 8, \"post_subject\": \"WsELC Checkbox\", \"username\": \"vzeufack\", \"post_text\": \"Hello I’m working on putting the finishing touches on a group project and I have a question about using WsECL. How would I add checkboxes to the WsECL form? I know that using the following code produces a dropdown menu: \\n\\nSTRING3 FruitSelection := 'Fruit' :STORED('Fruit', FORMAT(SELECT('*ALL=ALL,Apple=Apple,Orange=Orange,Banana=Banana'))); \\n\\nBut what would I do if I want to create checkboxes instead?\", \"post_time\": \"2018-11-15 20:28:55\" },\n\t{ \"post_id\": 23713, \"topic_id\": 6113, \"forum_id\": 8, \"post_subject\": \"Re: standalong compiler ecl file error\", \"username\": \"ghalliday\", \"post_text\": \"There is a batch file cl.bat and link.bat that is used to set up the path to the windows compilers. It may be that isn't correctly detecting that version of visual studio.\\n\\nVS2017 changed the installation so that environment variables were not set up by default. If VS150COMNTOOLS is not defined that batch file checks for a file \\n%ProgramFiles(x86)%\\\\Microsoft Visual Studio\\\\2017\\\\Community\\\\Common7\\\\Tools\\\\VsDevCmd.bat\\nand if it is present it suggests you run it first. Can you locate that file (VsDevCmd.bat) on your system, and let me know the full path?\\n\\nIf you run that batch file before calling eclcc it should work.\", \"post_time\": \"2018-11-30 09:25:27\" },\n\t{ \"post_id\": 23693, \"topic_id\": 6113, \"forum_id\": 8, \"post_subject\": \"Re: standalong compiler ecl file error\", \"username\": \"jfeng@ln\", \"post_text\": \"Thank you, Richard. It seems Visual Studio issue.\\nI tried to install Visual Studio 2017 professional. Then, I ran the command:\\n eclcc hello2.ecl\\nI got the following error:\\nC:\\\\Program Files (x86)\\\\HPCCSystems\\\\7.0.0\\\\clienttools\\\\bin>eclcc hello2.ecl\\na.out(0,0): error C3000: Compile/Link failed for a.out (see '\\\\\\\\172.29.128.31\\\\c$\\\\Program Files (x86)\\\\HPCCSystems\\\\7.0.0\\\\clienttools\\\\bin\\\\eclcc.log' for details)\\n\\n---------- compiler output --------------\\nError: Could not locate a supported version of visual studio.\\n\\n--------- end compiler output -----------\\n1 error, 0 warning\\n\\n\\n\\nthe information from the eclcc.log as following:\\n\\n00000000 2018-11-28 06:33:34 2884 5544 Loading plugin C:\\\\Program Files (x86)\\\\HPCCSystems\\\\7.0.0\\\\clienttools\\\\plugins\\\\auditlib.dll[lib_auditlib] version = AUDITLIB 1.0.1\\n00000001 2018-11-28 06:33:34 2884 5544 Loading plugin C:\\\\Program Files (x86)\\\\HPCCSystems\\\\7.0.0\\\\clienttools\\\\plugins\\\\debugservices.dll[lib_debugservices] version = DEBUGSERVICES 1.0.1\\n00000002 2018-11-28 06:33:34 2884 5544 Plugin C:\\\\Program Files (x86)\\\\HPCCSystems\\\\7.0.0\\\\clienttools\\\\plugins\\\\dmetaphone.dll exports getECLPluginDefinition but does not export ECL - not loading\\n00000003 2018-11-28 06:33:34 2884 5544 Loading plugin C:\\\\Program Files (x86)\\\\HPCCSystems\\\\7.0.0\\\\clienttools\\\\plugins\\\\fileservices.dll[lib_fileservices] version = FILESERVICES 2.1.4\\n00000004 2018-11-28 06:33:34 2884 5544 Loading plugin C:\\\\Program Files (x86)\\\\HPCCSystems\\\\7.0.0\\\\clienttools\\\\plugins\\\\logging.dll[lib_logging] version = LOGGING 1.0.1\\n00000005 2018-11-28 06:33:34 2884 5544 Loading plugin C:\\\\Program Files (x86)\\\\HPCCSystems\\\\7.0.0\\\\clienttools\\\\plugins\\\\parselib.dll[lib_parselib] version = PARSELIB 1.0.1\\n00000006 2018-11-28 06:33:34 2884 5544 Loading plugin C:\\\\Program Files (x86)\\\\HPCCSystems\\\\7.0.0\\\\clienttools\\\\plugins\\\\stringlib.dll[lib_stringlib] version = STRINGLIB 1.1.14\\n00000007 2018-11-28 06:33:34 2884 5544 Loading plugin C:\\\\Program Files (x86)\\\\HPCCSystems\\\\7.0.0\\\\clienttools\\\\plugins\\\\timelib.dll[lib_timelib] version = TIMELIB 1.0.0\\n00000008 2018-11-28 06:33:34 2884 5544 Loading plugin C:\\\\Program Files (x86)\\\\HPCCSystems\\\\7.0.0\\\\clienttools\\\\plugins\\\\unicodelib.dll[lib_unicodelib] version = UNICODELIB 1.1.06\\n00000009 2018-11-28 06:33:34 2884 5544 Loading plugin C:\\\\Program Files (x86)\\\\HPCCSystems\\\\7.0.0\\\\clienttools\\\\plugins\\\\workunitservices.dll[lib_WORKUNITSERVICES] version = WORKUNITSERVICES 1.0.2\\n0000000A 2018-11-28 06:33:34 2884 5544 Adding library: eclrtl\\n0000000B 2018-11-28 06:33:34 2884 5544 Adding object file: a.out.res\\n0000000C 2018-11-28 06:33:34 2884 5544 addSourceFile a.out.cpp\\n0000000D 2018-11-28 06:33:34 2884 5544 Compiling a.out\\n0000000E 2018-11-28 06:33:34 2884 5544 Remove temporaries\\n0000000F 2018-11-28 06:33:34 2884 5544 Could not remove file 'a.out.exp'\\n00000010 2018-11-28 06:33:34 2884 5544 Could not remove file 'a.out.lib'\\n00000011 2018-11-28 06:33:34 2884 5544 Failed to compile a.out\\n00000012 2018-11-28 06:33:34 2884 5544 \\n---------- compiler output --------------\\nError: Could not locate a supported version of visual studio.\\n\\n\\n--------- end compiler output -----------\", \"post_time\": \"2018-11-28 12:09:12\" },\n\t{ \"post_id\": 23683, \"topic_id\": 6113, \"forum_id\": 8, \"post_subject\": \"Re: standalong compiler ecl file error\", \"username\": \"rtaylor\", \"post_text\": \"jfeng@ln,\\n\\nI just looked at that directory in my 7.0 installation and see that it's marked as "Read Only" (applying to all files within it) -- which would explain why you got that error. Try changing that setting.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-11-27 20:30:45\" },\n\t{ \"post_id\": 23673, \"topic_id\": 6113, \"forum_id\": 8, \"post_subject\": \"standalong compiler ecl file error\", \"username\": \"jfeng@ln\", \"post_text\": \"I installed the HPCC 7.0 on my laptop and tried to compiler a ecl file using command:\\nC:\\\\Program Files (x86)\\\\HPCCSystems\\\\7.0.0\\\\clienttools\\\\bin>eclcc C:\\\\Users\\\\NAMEJX\\\\Desktop\\\\test\\\\test1.ecl\\nI got the following error:\\nLOGGING: could not open file 'C:\\\\Program Files (x86)\\\\HPCCSystems\\\\7.0.0\\\\clienttools\\\\bin\\\\eclcc.log' for output\\nError: LOGGING: could not open file 'C:\\\\Program Files (x86)\\\\HPCCSystems\\\\7.0.0\\\\clienttools\\\\bin\\\\eclcc.log' for output\\n\\nany help? Thank you\", \"post_time\": \"2018-11-27 15:32:03\" },\n\t{ \"post_id\": 23793, \"topic_id\": 6153, \"forum_id\": 8, \"post_subject\": \"Re: client tools to compile multi ecl files\", \"username\": \"ghalliday\", \"post_text\": \"When creating an archive (or compiling a query) you only need to include the main file that is used for the query on the command line. eclcc automatically includes any other files that are referenced.\\n\\nSo:\\n eclcc callHW.ecl -E\\n\\nshould create the archive containing everything.\", \"post_time\": \"2018-12-12 09:31:30\" },\n\t{ \"post_id\": 23783, \"topic_id\": 6153, \"forum_id\": 8, \"post_subject\": \"client tools to compile multi ecl files\", \"username\": \"jfeng@ln\", \"post_text\": \"I have two ecl files:\\n\\n1. helloWorld.ecl\\nEXPORT helloWorld() := MODULE\\nexport add1(integer a1, integer a2) := function\\n a := a1 + a2;\\t \\n\\t return a;\\n\\t end;\\nEND;\\n\\n2. callHW.ecl\\na := helloWorld().add1(5,6);\\noutput(a);\\n\\nI used the command: eclcc helloWorld.ecl callHW.ecl -E\\nI got the archive:\\n<Archive build="community_7.0.2-1"\\n eclVersion="7.0.2"\\n legacyImport="0"\\n legacyWhen="0">\\n <Query attributePath="helloWorld"/>\\n <Module key="" name="">\\n <Attribute key="helloworld"\\n name="helloWorld"\\n sourcePath="/opt/HPCCSystems/7.0.2/clienttools/bin/helloWorld.ecl"\\n ts="1544543862000000">\\n EXPORT helloWorld() := MODULE\\n\\nexport add1(integer a1, integer a2) := function\\n a := a1 + a2;\\t \\n\\t return a;\\n\\t end;\\n\\t \\nEND;	  \\n </Attribute>\\n </Module>\\n</Archive>\\n\\nthis archive only includes helloWorld.ecl, but without callHW.ecl. I can't figure out the reason and how to correct it. Any help? thank you.\", \"post_time\": \"2018-12-11 16:05:04\" },\n\t{ \"post_id\": 24413, \"topic_id\": 6243, \"forum_id\": 8, \"post_subject\": \"Re: Request help using #GETDATATYPE in MACRO\", \"username\": \"DSC\", \"post_text\": \"Ah! Reading comprehension fails me sometimes. Probably more often than "sometimes" but we won't get into that.\\n\\nDoes this resemble your code?\\n\\nConcatFieldValues(inFile, outFieldType, outField, inField, delim = '\\\\'\\\\'') := FUNCTIONMACRO\\n LOCAL onlyFieldData := PROJECT\\n (\\n inFile,\\n TRANSFORM\\n (\\n {\\n outFieldType outField\\n },\\n SELF.outField := (outFieldType)LEFT.inField\\n )\\n );\\n \\n LOCAL rolledUpData := ROLLUP\\n (\\n onlyFieldData,\\n TRUE,\\n TRANSFORM\\n (\\n RECORDOF(LEFT),\\n SELF.outField := LEFT.outField + (outFieldType)delim + RIGHT.outField\\n )\\n );\\n \\n RETURN rolledUpData;\\nENDMACRO;\\n\\n//------------------------------------------------------------------------------\\n\\nDataRec := RECORD\\n QSTRING fname;\\n STRING1 mname_initial;\\n STRING lname;\\n UNSIGNED1 age;\\nEND;\\n\\nds := DATASET\\n (\\n [\\n {'Dan', 'S', 'Camper', 53},\\n {'John', 'Q', 'Public', 21},\\n {'Jane', '', 'Doe', 23}\\n ],\\n DataRec\\n );\\n\\nConcatFieldValues(ds, STRING, myOut, fname, ',');\\nConcatFieldValues(ds, STRING, myOut, lname, ',');\\nConcatFieldValues(ds, QSTRING, myOut, mname_initial, ':');\\nConcatFieldValues(ds, UTF8, myOut, age, ';');\\nConcatFieldValues(ds, STRING, myOut, fname, 0);
\", \"post_time\": \"2019-01-31 20:40:57\" },\n\t{ \"post_id\": 24393, \"topic_id\": 6243, \"forum_id\": 8, \"post_subject\": \"Re: Request help using #GETDATATYPE in MACRO\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nThat's really nice, and I'm going to keep that ECL in my pocket. As to my particular issue, its different, in that I'm concatenating a single field from all records in a dataset into a single STRING (with optional delimiters)\\n\\nYours\\n\\nAllan\", \"post_time\": \"2019-01-31 20:24:25\" },\n\t{ \"post_id\": 24353, \"topic_id\": 6243, \"forum_id\": 8, \"post_subject\": \"Re: Request help using #GETDATATYPE in MACRO\", \"username\": \"DSC\", \"post_text\": \"I had a similar problem awhile back and still have the code, so I thought I'd share. I don't know if it addresses all of your concerns, but it perhaps it is close.\\n\\nConcatFields(inFile, outFieldType, outField, fieldListStr, delim = '\\\\'\\\\'') := FUNCTIONMACRO\\n #UNIQUENAME(myDelim);\\n #IF(delim != '')\\n #SET(myDelim, '\\\\'' + delim + '\\\\' + ')\\n #ELSE\\n #SET(myDelim, '')\\n #END\\n #UNIQUENAME(rhs);\\n #SET(rhs, REGEXREPLACE(',', REGEXREPLACE('(\\\\\\\\w+)', TRIM((STRING)fieldListStr, ALL), '(' + #TEXT(outFieldType) + ')LEFT.$1'), ' + ' + %'myDelim'%));\\n\\n // RETURN %'rhs'%;\\n RETURN PROJECT\\n (\\n inFile,\\n TRANSFORM\\n (\\n {\\n RECORDOF(inFile),\\n outFieldType outField\\n },\\n SELF.outField := %rhs%,\\n SELF := LEFT\\n )\\n );\\nENDMACRO;\\n\\n//------------------------------------------------------------------------------\\n\\nDataRec := RECORD\\n QSTRING fname;\\n STRING1 mname_initial;\\n STRING lname;\\n UNSIGNED1 age;\\nEND;\\n\\nds := DATASET\\n (\\n [\\n {'Dan', 'S', 'Camper', 53}\\n ],\\n DataRec\\n );\\n\\nConcatFields(ds, STRING, myOut, 'fname, mname_initial, lname', ',');\\nConcatFields(ds, QSTRING, myOut, 'fname, age', ':');\\nConcatFields(ds, UTF8, myOut, 'age, lname');\\n
\", \"post_time\": \"2019-01-30 15:30:55\" },\n\t{ \"post_id\": 24113, \"topic_id\": 6243, \"forum_id\": 8, \"post_subject\": \"Re: Request help using #GETDATATYPE in MACRO\", \"username\": \"Allan\", \"post_text\": \"Thanks Richard,\\n\\nI've rushed and I'll have to do a lot more work on this to mak it production ready.\\nRaised a 'question' with the core team:\\nhttps://track.hpccsystems.com/browse/HPCC-21330\", \"post_time\": \"2019-01-16 17:27:08\" },\n\t{ \"post_id\": 24093, \"topic_id\": 6243, \"forum_id\": 8, \"post_subject\": \"Re: Request help using #GETDATATYPE in MACRO\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nHere's what I get when I run your code:ds := DATASET([{'1',1},{'2',2},{'3',3}],{STRING s, UNSIGNED l});\\n\\nConcatenateFields(ds,s,'-'); //works just fine: 1-2-3\\nConcatenateFields(ds,l,'-'); //Error: Can not assign String to Integer (field SELF.l)\\nConcatenateFields(ds,l,0); //Error: Type of 'Seperator' (integer8) must match the \\n //type of field being concaternated (unsigned8).
\\nSince concatenation is the mission here, you'll need to cast any non-STRING datatypes to STRING to achieve that.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-01-16 14:34:46\" },\n\t{ \"post_id\": 24083, \"topic_id\": 6243, \"forum_id\": 8, \"post_subject\": \"Re: Request help using #GETDATATYPE in MACRO\", \"username\": \"Allan\", \"post_text\": \"Actually the test in the ASSERT, may need further work. No time at the moment.\", \"post_time\": \"2019-01-16 14:21:46\" },\n\t{ \"post_id\": 24073, \"topic_id\": 6243, \"forum_id\": 8, \"post_subject\": \"Re: Request help using #GETDATATYPE in MACRO\", \"username\": \"Allan\", \"post_text\": \"for completeness, here it the working version, with type checking in place.\\n\\nConcatenateFields(_ds_,_fld_,_sep_) := FUNCTIONMACRO\\n\\n #DECLARE(Seperator);\\n #SET(Seperator, #TEXT(_sep_));\\n #DECLARE(SeperatorType);\\n #SET(SeperatorType, #GETDATATYPE(%Seperator%) ); \\n #DECLARE(field);\\n #SET(field, #TEXT(_ds_._fld_));\\n #DECLARE(fieldtype);\\n #SET(fieldtype, #GETDATATYPE(%field%) ); \\n\\t_ds1_ := PROJECT(_ds_,TRANSFORM({%fieldtype% _fld_},SELF._fld_ := LEFT._fld_));\\n\\n\\tBaseType(STRING ty) := REGEXREPLACE('[^[:alpha:]]',ty,'');\\n\\tchk := ASSERT(BaseType(%'fieldtype'%) = BaseType(%'SeperatorType'%),'Type of \\\\'Seperator\\\\' ('+%'SeperatorType'%+') must match the type of field being concaternated ('+%'fieldtype'%+').',FAIL);\\n RETURN WHEN(ROLLUP(_ds1_,TRUE,TRANSFORM({%fieldtype% _fld_},SELF._fld_ := LEFT._fld_ + _sep_ + RIGHT._fld_))[1]._fld_,chk);\\n\\nENDMACRO;\\n
\", \"post_time\": \"2019-01-16 14:08:50\" },\n\t{ \"post_id\": 24063, \"topic_id\": 6243, \"forum_id\": 8, \"post_subject\": \"Re: Request help using #GETDATATYPE in MACRO\", \"username\": \"Allan\", \"post_text\": \"replacing\\n #SET(field, 'arr.chr');
\\nwith\\n #SET(field, _ds_._fld_);
\\n\\nfails with 'constant expression expected.\\n\\nAll's well with:\\n#SET(field, #TEXT(_ds_._fld_));
\\n\\nso got the whole thing working.\", \"post_time\": \"2019-01-16 13:52:14\" },\n\t{ \"post_id\": 24053, \"topic_id\": 6243, \"forum_id\": 8, \"post_subject\": \"Re: Request help using #GETDATATYPE in MACRO\", \"username\": \"Allan\", \"post_text\": \"AH Well this is embarrassing, I recall nearly getting it working on Monday. (last thing late in day)\\nAn example where it flattens UNICODE field in a DATASET.\\nI had to hard code 'arr.chr' in the FUNCTIONMACRO. So getting over that hurdle would be great. I actually was not getting a problem using #GETDATATYPE. I expect I could sort it out, but had incorrectly remembered not getting it working at all.\\n\\nUNICODE arrowarray := U'\\\\u2190\\\\u2191\\\\u2192\\\\u2193\\\\u2196\\\\u2197\\\\u2198\\\\u2199';\\nLL := 79;\\t\\t\\t// Line length\\n\\nRDS := {UNICODE chr,BOOLEAN Changed};\\n\\nRDS Create({UNICODE a} L) := TRANSFORM\\n SELF.chr := arrowarray[(RANDOM() % 8)+1];\\n SELF.Changed := FALSE;\\nEND;\\n\\narr := NORMALIZE(DATASET([{U'\\\\u2190'}],{UNICODE a}),LL*60,Create(LEFT));\\n\\n\\nConcatenateFields(_ds_,_fld_,_sep_) := FUNCTIONMACRO\\n\\n #DECLARE(field);\\n #SET(field, 'arr.chr');\\n #DECLARE(fieldtype);\\n #SET(fieldtype, #GETDATATYPE(%field%) ); \\n\\t_ds1_ := PROJECT(_ds_,TRANSFORM({%fieldtype% _fld_},SELF._fld_ := LEFT._fld_));\\n\\n RETURN ROLLUP(_ds1_,TRUE,TRANSFORM({%fieldtype% _fld_},SELF._fld_ := LEFT._fld_ + _sep_ + RIGHT._fld_))[1]._fld_;\\n\\nENDMACRO;\\n\\nd := DATASET([{ConcatenateFields(arr,chr,U''),TRUE}],{UNICODE str,BOOLEAN Changed});\\nd;\\n
\\n\\nAs to using AGGREGATE, this macro was written years back by Rob, there was a long discussion then on the merits on AGGREGATE over ROLLUP, I think it was decided that ROLLUP was best.\\nApologies for wasting people time.\", \"post_time\": \"2019-01-16 13:45:04\" },\n\t{ \"post_id\": 24043, \"topic_id\": 6243, \"forum_id\": 8, \"post_subject\": \"Re: Request help using #GETDATATYPE in MACRO\", \"username\": \"ghalliday\", \"post_text\": \"You don't really say what the problem is you are hitting with #getdatatype. That it doesn't work, or generates unusable results?\\n\\nAs an aside you will find AGGREGATE is likely to be more efficient than ROLLUP.\", \"post_time\": \"2019-01-16 12:10:37\" },\n\t{ \"post_id\": 24033, \"topic_id\": 6243, \"forum_id\": 8, \"post_subject\": \"Request help using #GETDATATYPE in MACRO\", \"username\": \"Allan\", \"post_text\": \"Hi,\\nI've been tieing myself in knots trying to dynamically construct a record structure given the #GETDATATYPE of a parameter to a FUNCTIONMACRO.\\nOriginal code:\\n\\nEXPORT ConcatenateStringFields(_ds_,_fld_,_sep_) := FUNCTIONMACRO\\n\\n _ds1_ := PROJECT(_ds_,TRANSFORM({STRING _fld_},SELF._fld_ := LEFT._fld_));\\n\\n RETURN ROLLUP(_ds1_,TRUE,TRANSFORM({STRING _fld_},SELF._fld_ := LEFT._fld_ + _sep_ + RIGHT._fld_))[1]._fld_;\\n\\nENDMACRO;\\n\\nThis macro is already very useful, but, as you can tell from its name, its confined to be being used by STRINGs.\\nIf the record types in bold could be dictated by the '_fld_' or '_sep_' parameter the MACRO would be even more useful.\\n\\nA 'nice to have' would also be a check that the type of the '_fld_' matched the type of '_sep_' and ASSERT FAIL if not. (with useful message)\\n\\nThanks for your efforts in advance.\\nYours\\nAllan\", \"post_time\": \"2019-01-16 10:52:38\" },\n\t{ \"post_id\": 24223, \"topic_id\": 6253, \"forum_id\": 8, \"post_subject\": \"Re: Updating a DATASET as an action tied to a RETURN\", \"username\": \"Allan\", \"post_text\": \"A section of ECL, that takes the XML of a record structure and flattens it into another record structure. Details in the ECL comments. Note the code only flattens the 1st record of any child datasets, which is ok for our functionality, but limits this codes use in the general case.\\n \\n\\nEXPORT RunScoreStats(TYPEOF(LogLayouts.content_type) pExternalVendor\\n ,STRING xmlOfRecordStructure\\n ,STRING rootFldToScores\\n ,STRING ECLForStat) := FUNCTION\\n /*\\n Flatten vendor scores into a format that is usable by stat generating attributes,\\n and actually daisy-chain off a WU to generate the stat.\\n\\n INPUT PARAMETERS\\n pExternalVendor Filter for the specific vendor being analysed. \\n xmlOfRecordStructure The XML of the record structure of the data holding the scores to be processed.\\n rootFldToScores The scores can be nested quite deep inside the record structure, so a 'root' to that\\n structure needs to be supplied.\\n Example:\\n Vendor1 Scores are under:\\n <Row><XXX>scores are all here</XX></Row> // (Some tags removed for clarity of explanation)\\n So if the entire structure of the XML score structure is supplied in xmlOfRecordStructure\\n then the 'root' to the structure to flatten is 'XXX'.\\n If the <XXX> structure alone is supplied in xmlOfRecordStructure then this\\n rootFldToScores must be ''. \\n ECLForStat Text of ECL to generate the actual stat. Make any reference to the flattened input in the ECL as 'input'.\\n\\n OTHER REQUIRMENTS\\n An EXPORTed routine named 'pExternalVendor' must exist in this MODULE.\\n It must return a DATASET in the format as described by 'xmlOfRecordStructure'.\\n\\n OUTPUTs\\n 1. stat WU initiated\\n 2. WUid of said WU.\\n */\\n\\n ExternalVendor := TRIM(pExternalVendor,LEFT,RIGHT);\\n VendorList := [LOGCONST.Vendor1Response,LOGCONST.Vendor2Response,LOGCONST.Vendor3Response,LOGCONST.Vendor4Response];\\n\\n // Check inputs\\n\\n ValidExternalVendorID := ASSERT(ExternalVendor IN VendorList,'Unregognised External Vendor',FAIL);\\n\\n Checks := ORDERED(ValidDates,ValidExternalVendorID);\\n\\n //////////////////////////////////////////////////////////////////////////////////////\\n // Construct ECL to convert this hierarchical structure into a vertical slice record structure.\\n // This will flatten the entire response structure into <fldName>,<value> pairs.\\n //////////////////////////////////////////////////////////////////////////////////////\\n\\n#UNIQUENAME(datasetName);\\n\\nGetRecStructure := FUNCTIONMACRO\\n\\n /* Cope with duplicate field names.\\n As the flattened structure must give a unique name to each of its fields, the input hierarchical structure may well have the same\\n field name in different parts of its hierarchy.\\n To Solve this, retain a list of field names and generate a field name from the input field name but append the COUNT of that\\n field name already encountered in the input.\\n\\n LEFT constructs the string to be the right hand side of each flattened assignment, e.g.\\n string dateraised8 := __datasetName__11979__.Results.addresslinks[1].notices[1].dateraised;\\n\\n RIGHT side retains the current state of referencing down to the fields currently having assignments generated for them. e.g.\\n [addresslinks[1]\\n ,notices[1]]\\n\\n This is an improvement on just using COUNTER to the ITERATE to generate unique fieldnames as the original field names\\nare retained as far as possible.\\n */\\n\\n RRec := RECORD\\n STRING ecltype := XMLTEXT('@ecltype');\\n BOOLEAN isRecord := (BOOLEAN) XMLTEXT('@isRecord');\\n STRING label := XMLTEXT('@label');\\n STRING name := XMLTEXT('@name');\\n STRING position := XMLTEXT('@position');\\n STRING rawtype := XMLTEXT('@rawtype');\\n STRING size := XMLTEXT('@size');\\n STRING ttype := XMLTEXT('@type');\\n BOOLEAN isEnd := (BOOLEAN) XMLTEXT('@isEnd');\\n STRING FldEntry := '';\\n END;\\n\\n OnRec := {STRING FldEntry};\\n InputDSToFlattener := PARSE(DATASET([{xmlOfRecordStructure}],OnRec),FldEntry,RRec,XML('Data/Field'));\\n\\n rootToFldNormalised := REGEXREPLACE('(^\\\\\\\\.{1,}|\\\\\\\\.{1,}$)',rootFldToScores,''); // Normalise path to root of scores, i.e. .fred.charlie. => fred.charlie\\n StartStack := DATASET([%'datasetName'%+'.'+rootToFldNormalised+IF(rootToFldNormalised != '','.','')],OnRec);\\n\\n RDS := {DATASET(OnRec) namelist;\\n DATASET(OnRec) DirEntry};\\n\\n RRec RecordFlattener(RRec L,RDS R) := TRANSFORM\\n\\n // Note this only works if all input records root field type is 'string'. (i.e. still ok if field type is STRING12)\\n // If not the test L.ttype = 'string' will have to be enhanced to truly detect a 'terminal' field type.\\n STRING GetName := L.name + IF(COUNT(R.namelist(FldEntry = L.name)) > 0,(STRING) COUNT(R.namelist(FldEntry = L.name)),'');\\nSELF.FldEntry := IF(L.ttype = 'string','string '+GetName+' := '+ConcatenateStringFields(R.DirEntry,FldEntry,'')+L.name+';','');\\nSELF := L;\\n END;\\n\\n RDS RetainHierarchicalState(RRec L,RDS R) := TRANSFORM\\n\\nIsDataSet := REGEXFIND('table of',L.ttype);\\nSELF.Namelist := R.namelist(FldEntry != '') & ROW({L.name},OnRec);\\nSELF.DirEntry := MAP(IsDataSet => R.DirEntry & ROW({L.name+'[1].'},OnRec)\\n ,L.isRecord => R.DirEntry & ROW({L.name+'.'},OnRec)\\n ,L.isEnd => R.DirEntry[1..COUNT(R.DirEntry)-1]\\n , R.DirEntry);\\n END;\\n\\n irStruct := PROCESS(InputDSToFlattener\\n ,ROW({StartStack,StartStack},RDS)\\n ,RecordFlattener(LEFT,RIGHT)\\n ,RetainHierarchicalState(LEFT,RIGHT)\\n ,STABLE)(fldEntry != '');\\n\\n RETURN ConcatenateStringFields(irStruct,fldEntry,'\\\\n');\\n\\nENDMACRO;\\n #UNIQUENAME(recStructName);\\n\\n eclRun := '#WORKUNIT(\\\\'name\\\\',\\\\''+ExternalVendor+' Score Stats '+FromDate+' to '+ToDate+'\\\\');\\\\n'\\n + %'datasetName'%+' := Reports.DataGetterTransformer('+FromDate+','+ToDate+').'+ExternalVendor+';\\\\n'\\n + %'recStructName'% +' := RECORD ' + GetRecStructure() + 'END;\\\\ninput := TABLE('+%'datasetName'%+','+%'recStructName'%+',LOCAL);\\\\n'\\n + ECLForStat;\\n\\n Cluster := NOTHOR(STD.System.Workunit.WorkunitList(WORKUNIT,WORKUNIT)[1].cluster) : INDEPENDENT;\\n\\n RETURN WHEN(WorkUnitManagement.fSubmitNewWorkunit(eclRun,Cluster, Cluster+'.thor'),Checks);\\n\\nEND;\\n
\\nAnd to use:\\n\\nIMPORT Vendor1;\\nIMPORT * FROM Reports;\\n\\n#DECLARE(xmlOfRecordStructure)\\n#EXPORT(xmlOfRecordStructure,Vendor1.t_scores.score)\\n\\nDataGetterTransformer(20181210,20181210)\\n .RunScoreStats('Vendor1_Response'\\n ,%'xmlOfRecordStructure'%\\n ,'scores'\\n ,'OUTPUT(Profile_Everything(input, \\\\'STAT\\\\'),NAMED(\\\\'Profile\\\\'));');\\n
\\n\\nActually the function name 'RunScoreStats' is inappropriate, as the ECL using the flattened dataset, could be anything, nothing to do with 'Scores'. I requested a change of name but got overruled.\", \"post_time\": \"2019-01-23 09:49:47\" },\n\t{ \"post_id\": 24173, \"topic_id\": 6253, \"forum_id\": 8, \"post_subject\": \"Re: Updating a DATASET as an action tied to a RETURN\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nGlad that PROCESS worked out for you. Can you post example code for what you're doing (so all the world can see)?\\n\\nThanks,\\n\\nRichard\", \"post_time\": \"2019-01-22 14:09:21\" },\n\t{ \"post_id\": 24153, \"topic_id\": 6253, \"forum_id\": 8, \"post_subject\": \"Re: Updating a DATASET as an action tied to a RETURN\", \"username\": \"Allan\", \"post_text\": \"Actually for my specific requirement, the built in PROCESS works fine.\", \"post_time\": \"2019-01-21 08:42:49\" },\n\t{ \"post_id\": 24143, \"topic_id\": 6253, \"forum_id\": 8, \"post_subject\": \"Re: Updating a DATASET as an action tied to a RETURN\", \"username\": \"Allan\", \"post_text\": \"Richard,\\n\\nNo, does not work, doing:\\n\\nNamesUsed := DATASET([],{STRING namelist});\\nSTRING GetName(STRING nm) := FUNCTION\\n Exist := COUNT(NamesUsed(namelist = nm));\\n NamesUsed := NamesUsed & ROW({nm},{STRING namelist});\\n RETURN nm+IF(Exist > 0,(STRING)Exist,'');\\nEND;\\n
\\ngives error:\\nError: syntax error near ":=" : expected ';' (385, 18), 3002, Reports.DataGetterTransformer\\n
\\nwhereas:\\n\\nxx := NamesUsed & ROW({nm},{STRING namelist});\\n
\\nworks.\\nI don't want any dedup as I using the COUNT of the number of times the 'name' has been found to help generate a unique name in a record structure I'm creating in a FUNCTIONMACRO.\\nI need to record how many times a field name has been encountered so I can generate a 'fieldname<n>' where 'n' is a COUNT of the number of times 'fieldname' has already been processed. Consequently I need a side-effect action to update my list of fieldnames every time I process a fieldname.\\nI can't just use #UNIQUENAME or the COUNTER in the ITERATE as I want to retain the original fieldnames as closely as possible. So if 'fieldname' is only encountered once the fieldname will be unaltered from the input fieldlist to the FUNCTIONMACRO.\\n\\nYours\\nAllan\", \"post_time\": \"2019-01-17 09:34:20\" },\n\t{ \"post_id\": 24133, \"topic_id\": 6253, \"forum_id\": 8, \"post_subject\": \"Re: Updating a DATASET as an action tied to a RETURN\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nI would just do it like this:NamesUsed := DATASET([{'Fred'},{'Joe'}],{STRING namelist});\\n\\nNamesToTry := DATASET([{'Fred'},{'Sam'}],{STRING namelist});\\n\\nMergedNames := NamesUsed & NamesToTry(nameList NOT IN SET(NamesUsed,namelist));\\n\\nMergedNames;
\\nOr, if you have tons of records in each, then you could do it like this:AllNames := NamesUsed & NamesToTry;\\nUniqueNames := DEDUP(SORT(AllNames,namelist),namelist);\\nUniqueNames;
\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-01-16 18:49:45\" },\n\t{ \"post_id\": 24123, \"topic_id\": 6253, \"forum_id\": 8, \"post_subject\": \"Updating a DATASET as an action tied to a RETURN\", \"username\": \"Allan\", \"post_text\": \"Hi,\\nSimple issue - I hope,\\n\\nI want to append to a DATASET a record on returning from a FUNCTION.\\nI have:\\n\\nNamesUsed := DATASET([],{STRING namelist});\\nSTRING GetName(STRING nm) := FUNCTION\\n\\n\\tExist := COUNT(NamesUsed(namelist = nm));\\n\\tRETURN WHEN(nm+IF(Exist > 0,(STRING)Exist,''),EVALUATE(NamesUsed := NamesUsed & ROW({nm},{STRING namelist})));\\nEND;\\n
\\nBut this is not right.\\nAny ideas?\\nYours\\nAllan\", \"post_time\": \"2019-01-16 17:30:48\" },\n\t{ \"post_id\": 24333, \"topic_id\": 6283, \"forum_id\": 8, \"post_subject\": \"Re: Native TOJSON usage\", \"username\": \"rtaylor\", \"post_text\": \"wjblack,\\n\\nYour problem was with your initial RECORD Structure and inline DATASET. If you need a SET in the JSON, then it makes sense to start with a SET in ECL. Here's a way you can do what you want:rec := RECORD \\n STRING text;\\n STRING2 languageCode;\\n SET OF STRING fruits\\nEND;\\nds := dataset([{'My Basket', 'en', ['APPLES','ORANGES','BANANAS']}],rec);\\nds;\\nstr1 := TOJSON(ds[1]); \\nOUTPUT(str1);\\n//Produces: \\n//{"text": "My Basket", "languagecode": "en", "fruits": {"Item": ["APPLES", "ORANGES", "BANANAS"]}\\n
\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-01-28 16:32:57\" },\n\t{ \"post_id\": 24313, \"topic_id\": 6283, \"forum_id\": 8, \"post_subject\": \"Re: Native TOJSON usage\", \"username\": \"wjblack\", \"post_text\": \"While it's not the ideal way to represent a json mapping I was able to find a workaround by doing the following. It would be ideal if there was a way just using the TOJSON method.\\n\\n\\nrec := RECORD \\n STRING text;\\n STRING2 languageCode;\\n STRING fruits\\nEND;\\n\\nds := dataset([\\n {'My Basket', 'en', 'APPLES'},\\n\\t {'My Basket', 'en', 'ORANGES'},\\n\\t {'My Basket', 'en', 'BANANAS'}\\n],rec);\\n\\nds;\\n\\nrec2 := {\\n STRING text;\\n STRING2 languageCode;\\n set of string fruits {xpath('fruits')}\\n};\\n\\nds2 := project(ds,\\n transform(rec2,\\n self.fruits := [left.fruits],\\n self := left));\\n\\nds2;\\n\\nds3 := rollup(ds2,\\n left.text = right.text and \\n\\t\\t\\t\\t\\t\\t\\tleft.languageCode = right.languageCode,\\n transform(rec2,\\n self.fruits := left.fruits + right.fruits,\\n self := left));\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\nds3;\\n\\nrec3 := {\\n unicode jsonRow\\n}; \\n \\nds4 := project(ds3,\\n transform(rec3,\\n self.jsonRow := '{' + toJson(left) + '}'));\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t \\nds4;\\n
\\n\\nThe result is\\n\\n{"text": "My Basket", "languagecode": "en", "fruits": ["APPLES", "ORANGES", "BANANAS"]}\\n\\n
\", \"post_time\": \"2019-01-27 00:57:55\" },\n\t{ \"post_id\": 24303, \"topic_id\": 6283, \"forum_id\": 8, \"post_subject\": \"Native TOJSON usage\", \"username\": \"wjblack\", \"post_text\": \"I'm trying to determine how to format the below json using the TOJSON() call. Text and languageCode can easily be represented but how does one represent multiple vales for a key like with friuts?\\n\\n\\n{\\n\\t"text": "My Basket",\\n\\t"languageCode": "en",\\n\\t"fruits": [\\n\\t\\t "APPLES",\\n\\t\\t "ORANGES",\\n\\t\\t "BANANAS",\\n\\t ]\\n}\\n
\\n\\nFor example,\\n\\nnamesRec1 := RECORD \\n UNSIGNED2 EmployeeID{xpath('EmpID')}; \\n STRING10 Firstname{xpath('FName')}; \\n STRING10 Lastname{xpath('LName')};\\nEND; \\nstr1 := TOJSON(ROW({42,'Fred','Flintstone'},namesRec1)); \\nOUTPUT(str1);\\n
\\nbecomes \\n\\n"EmpID": 42, "FName": "Fred", "LName": "Flintstone"\\n
\\n\\nEssentially how do you represent a json array\\n{\\n"employees":[ "John", "Anna", "Peter" ]\\n}
\", \"post_time\": \"2019-01-25 18:15:46\" },\n\t{ \"post_id\": 24383, \"topic_id\": 6293, \"forum_id\": 8, \"post_subject\": \"Re: HPCC Java Plugin\", \"username\": \"richardkchapman\", \"post_text\": \"That looks about right to me.\", \"post_time\": \"2019-01-31 10:42:29\" },\n\t{ \"post_id\": 24373, \"topic_id\": 6293, \"forum_id\": 8, \"post_subject\": \"HPCC Java Plugin\", \"username\": \"wjblack\", \"post_text\": \"All,\\n\\nI have a jar file named 'foobar.jar'. In foobar.jar is a package named 'com.b.a' that contains a class named 'MyClass' with a function named 'myFunc'. How will the signature look when referencing it from HPCC? Will it be \\n\\n\\nimport java;\\nmyFunc(string txt) := import(java,'com/b/a/MyClass.myFunc:(Ljava/lang/String)Ljava/lang/String;');\\n
\", \"post_time\": \"2019-01-30 19:38:43\" },\n\t{ \"post_id\": 24433, \"topic_id\": 6313, \"forum_id\": 8, \"post_subject\": \"STD.File.ProtectLogicalFile\", \"username\": \"sajish\", \"post_text\": \"Hi,\\nWhen I use the STD.File.ProtectLogicalFile to protect a logical file, it restricts that only the same user(me in this case) can toggle off the protection before the file could be deleted. Incase of superfile, the user has to explicitly toggle off the protection before any other user can add/promote files into this superfile. Is this the expected behavior in ECL? Is there any other option to protect logical files from being deleted as a common practice?\\nThanks in Advance!\", \"post_time\": \"2019-02-08 15:10:28\" },\n\t{ \"post_id\": 24473, \"topic_id\": 6323, \"forum_id\": 8, \"post_subject\": \"Re: FROMJSON & json array\", \"username\": \"wjblack\", \"post_text\": \"Thanks... The premise of the root question is to pull in a json file of this format. I'm hoping the recommendation will work for this as well. There are different tags under each entity that's returned back.\\n\\n\\n{\\n\\t"entities": [\\n\\t\\t{\\n\\t\\t\\t"attributes": {\\n\\t\\t\\t\\t"firstName": "John",\\n\\t\\t\\t\\t"gender": "M",\\n\\t\\t\\t\\t"lastName": "Smith"\\n\\t\\t\\t},\\n\\t\\t\\t"count": 1,\\n\\t\\t\\t"label": "John Smith",\\n\\t\\t\\t"mentions": [\\n\\t\\t\\t\\t{\\n\\t\\t\\t\\t\\t"extractedText": "John Smith",\\n\\t\\t\\t\\t\\t"position": [\\n\\t\\t\\t\\t\\t\\t0,\\n\\t\\t\\t\\t\\t\\t11\\n\\t\\t\\t\\t\\t]\\n\\t\\t\\t\\t}\\n\\t\\t\\t],\\n\\t\\t\\t"type": "PERSON"\\n\\t\\t},\\n\\t\\t{\\n\\t\\t\\t"some_other_attributes": {\\n\\t\\t\\t\\t"id": "12345",\\n\\t\\t\\t\\t"population": 200000\\n\\t\\t\\t},\\n\\t\\t\\t"count": 1,\\n\\t\\t\\t"label": "Iowa",\\n\\t\\t\\t"mentions": [\\n\\t\\t\\t\\t{\\n\\t\\t\\t\\t\\t"extractedText": "Iowa",\\n\\t\\t\\t\\t\\t"position": [\\n\\t\\t\\t\\t\\t\\t21,\\n\\t\\t\\t\\t\\t\\t25\\n\\t\\t\\t\\t\\t]\\n\\t\\t\\t\\t}\\n\\t\\t\\t],\\n\\t\\t\\t"type": "GEO"\\n\\t\\t}\\n\\t],\\n\\t"requestId": "8675309"\\n}\\n
\", \"post_time\": \"2019-02-09 16:33:46\" },\n\t{ \"post_id\": 24463, \"topic_id\": 6323, \"forum_id\": 8, \"post_subject\": \"Re: FROMJSON & json array\", \"username\": \"Tony Kirk\", \"post_text\": \"Or, to your original question that actually has a field in the primary record (still only one record, though), and after adding some missing commas:\\n\\njsonstring\\t:=\\n '{'\\n+ ' "gender": ['\\n+ ' {'\\n+ ' "type": "male",'\\n+ ' "age": 25,'\\n+ ' "name": "will"'\\n+ ' },'\\n+ ' {'\\n+ ' "type": "female",'\\n+ ' "age": 21,'\\n+ ' "name": "wilma"'\\n+ ' }'\\n + '],'\\n+ ' "requestId": "8675309"'\\n+ '}';\\n\\nrGender\\t:=\\nrecord\\n string type{xpath('type')};\\n unsigned age{xpath('age')};\\n string name{xpath('name')};\\nend;\\n\\nrMain\\t:=\\nrecord\\n dataset(rGender) Gender{xpath('gender')};\\n string ReqeuestId{xpath('requestId')};\\nend;\\n\\nfromjson(rMain, jsonstring);
\", \"post_time\": \"2019-02-08 20:16:49\" },\n\t{ \"post_id\": 24453, \"topic_id\": 6323, \"forum_id\": 8, \"post_subject\": \"Re: FROMJSON & json array\", \"username\": \"Tony Kirk\", \"post_text\": \"There is nothing else in the primary record, and there is only one, but how about this to treat "users" as a child dataset?\\n\\njsonstring\\t:=\\n '{'\\n+ ' "users": ['\\n+ ' {'\\n+ ' "username": "SammyShark",'\\n+ ' "location": "Indian Ocean"'\\n+ ' },'\\n+ ' {'\\n+ ' "username": "JesseOctopus",'\\n+ ' "location": "Pacific Ocean"'\\n+ ' },'\\n+ ' {'\\n+ ' "username": "DrewSquid",'\\n+ ' "location": "Atlantic Ocean"'\\n+ ' },'\\n+ ' {'\\n+ ' "username": "JamieMantisShrimp",'\\n+ ' "location": "Pacific Ocean"'\\n+ ' }'\\n+ ' ]'\\n+ '}';\\n\\nrUsers\\t:=\\nrecord\\n string username{xpath('username')};\\n string location{xpath('location')};\\nend;\\nrMain :=\\nrecord\\n dataset(rUsers) Users{xpath('users')};\\nend;\\nfromjson(rMain, jsonstring);
\", \"post_time\": \"2019-02-08 20:05:46\" },\n\t{ \"post_id\": 24443, \"topic_id\": 6323, \"forum_id\": 8, \"post_subject\": \"FROMJSON & json array\", \"username\": \"wjblack\", \"post_text\": \"How can I use the fromjson() command to read in a json array into ecl.\\n\\n\\n{\\n\\t"users": [\\n\\t\\t{\\n\\t\\t\\t"username": "SammyShark",\\n\\t\\t\\t"location": "Indian Ocean"\\n\\t\\t},\\n\\t\\t{\\n\\t\\t\\t"username": "JesseOctopus",\\n\\t\\t\\t"location": "Pacific Ocean"\\n\\t\\t},\\n\\t\\t{\\n\\t\\t\\t"username": "DrewSquid",\\n\\t\\t\\t"location": "Atlantic Ocean"\\n\\t\\t},\\n\\t\\t{\\n\\t\\t\\t"username": "JamieMantisShrimp",\\n\\t\\t\\t"location": "Pacific Ocean"\\n\\t\\t}\\n\\t]\\n}\\n
\", \"post_time\": \"2019-02-08 19:35:08\" },\n\t{ \"post_id\": 25373, \"topic_id\": 6333, \"forum_id\": 8, \"post_subject\": \"Re: Splitting of a string in a dataset.\", \"username\": \"rtaylor\", \"post_text\": \"wjblack,\\n\\nThis way produces what you want in a nested child dataset format:IMPORT Std;\\nds := DATASET([{1,'PERSON|1$GEO|1'},{2,'PERSON|2$GEO|2'}],{UNSIGNED1 UID,STRING str});\\nSplitEm(STRING str) := DATASET(std.str.splitwords(str,'$'),{STRING s});\\ndsNew := PROJECT(ds,\\n TRANSFORM({UNSIGNED1 UID, DATASET({STRING s}) Splits},\\n SELF.UID := LEFT.UID;\\n SELF.Splits := SplitEm(LEFT.str)));\\ndsNew;
\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-03-19 13:30:46\" },\n\t{ \"post_id\": 25323, \"topic_id\": 6333, \"forum_id\": 8, \"post_subject\": \"Re: Splitting of a string in a dataset.\", \"username\": \"wjblack\", \"post_text\": \"What about if it was like this? This is what I have\\n\\n\\n'1','PERSON|1$GEO|1'\\n'2','PERSON|2$GEO|2'\\n
\\n\\nAnd this is what I want... Split on the '$' in the second field.\\n\\n'1','PERSON|1'\\n'1','GEO|1'\\n'2','PERSON|2'\\n'2','GEO|2'\\n
\", \"post_time\": \"2019-03-19 00:56:28\" },\n\t{ \"post_id\": 24513, \"topic_id\": 6333, \"forum_id\": 8, \"post_subject\": \"Re: Splitting of a string in a dataset.\", \"username\": \"rtaylor\", \"post_text\": \"Or, just a bit simpler:IMPORT Std;\\nds := DATASET([{'PERSON|1$GEO|1'},{'PERSON|2$GEO|2'}],{STRING str});\\nSplitEm(STRING str) := DATASET(std.str.splitwords(str,'$'),{STRING s});\\ndsNew := PROJECT(ds,\\n TRANSFORM({DATASET({STRING s}) Splits},\\n SELF.Splits := SplitEm(LEFT.str))).splits;\\ndsNew;
\\n \\n\\nRichard\", \"post_time\": \"2019-02-15 17:50:30\" },\n\t{ \"post_id\": 24503, \"topic_id\": 6333, \"forum_id\": 8, \"post_subject\": \"Re: Splitting of a string in a dataset.\", \"username\": \"wjblack\", \"post_text\": \"Thanks Richard. I threw this together once I was aware of std.str.splitwords\\n\\n
\\nLineRec := RECORD \\n STRING str;\\nEND; \\n\\nds := DATASET([{'PERSON|1$GEO|1'},{'PERSON|2$GEO|2'}],LineRec);\\n\\noutlay := {dataset({string split}) splits};\\n\\ndsNew := project(ds,transform(outlay,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tself.Splits := \\tdataset([std.str.splitwords((string)left.str,'$')],{string split})\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t));\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\nnormalize(dsNew,left.splits,transform({string Split},self:=right))\\n
\", \"post_time\": \"2019-02-15 17:20:14\" },\n\t{ \"post_id\": 24493, \"topic_id\": 6333, \"forum_id\": 8, \"post_subject\": \"Re: Splitting of a string in a dataset.\", \"username\": \"rtaylor\", \"post_text\": \"wjblack,\\n\\nTake a look at the Std.Str.SplitWords() standard library function. That should give you exactly what you want.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-02-14 10:52:01\" },\n\t{ \"post_id\": 24483, \"topic_id\": 6333, \"forum_id\": 8, \"post_subject\": \"Splitting of a string in a dataset.\", \"username\": \"wjblack\", \"post_text\": \"This is what I have\\n'PERSON|1$GEO|1'\\n'PERSON|2$GEO|2'\\n\\nAnd this is what I want... Split on the '$'\\n'PERSON|1'\\n'GEO|1'\\n'PERSON|2'\\n'GEO|2'\\n\\nLineRec := RECORD \\n STRING x;\\nEND; \\n\\nall_lines := DATASET([{'PERSON|1$GEO|1'},\\n {'PERSON|2$GEO|2'}],LineRec);\\n
\", \"post_time\": \"2019-02-13 18:38:31\" },\n\t{ \"post_id\": 24573, \"topic_id\": 6343, \"forum_id\": 8, \"post_subject\": \"Re: Dynamically create a record structure\", \"username\": \"Allan\", \"post_text\": \"Hi wjblack,\\n\\nThe problem with your code is that your unconditionally index tokens by 1,2,3,4. You have to batch the strings up into groups of 4 elements.\\nSee example code below. I've added a check to fail the WU if the input names are not multiples of 4 fields.\\n\\nI don't make any pretence that this is a definitive solution, as I've just knocked it up, but it gives you an idea on how to progress.\\n\\nIMPORT STD;\\n\\ncurrRec := RECORD\\n UNSIGNED id;\\n STRING person_list;\\nEND;\\n\\nds := DATASET([{12345,'Smith^SF^John^SF^100^SF^4.0^SF^Black^SF^Henry^SF^100^SF^3.8'}\\n ,{67890,'Jones^SF^Allan^SF^90^SF^5.0'}\\n ,{22334,'Cooper^SF^Fran^SF^30^SF^1.0^SF^Fletcher^SF^Ian^SF^70^SF^3.8^SF^Johnson^SF^Boris^SF^20^SF^0.8'}],currRec);\\n\\nChildRec := record \\n unsigned child_num;\\n unicode lastname;\\n unicode firstname;\\n unicode grade;\\n unicode gpa;\\nend;\\n\\nParentRec := RECORD\\n UNSIGNED id;\\n DATASET(ChildRec) Persons;\\nEND;\\n\\nParentRec ProcessOneInputRecord (currRec L) := TRANSFORM\\n\\n Children := STD.Str.SplitWords(L.person_List,'^SF^');\\n CntChildren := IF(COUNT(Children) % 4 != 0,ERROR('missing children fields: '+(STRING)L.id),COUNT(Children) DIV 4);\\n\\n ChildRec GetOneChild({UNSIGNED dummy} L,UNSIGNED Cnt) := TRANSFORM\\n UNSIGNED offset := Cnt*4-3;\\n SELF.child_num := Cnt;\\n SELF.lastname := Children[offset];\\n SELF.firstname := Children[offset+1];\\n SELF.grade := Children[offset+2];\\n SELF.gpa := Children[offset+3];\\n END;\\n\\n SELF.Id := L.id;\\n SELF.Persons := NORMALIZE(DATASET([{0}],{UNSIGNED dummy}),CntChildren,GetOneChild(LEFT,COUNTER));\\n\\nEND;\\n \\nPROJECT(ds,ProcessOneInputRecord(LEFT));\\n
\\nYours\\nAllan\", \"post_time\": \"2019-02-25 09:35:58\" },\n\t{ \"post_id\": 24563, \"topic_id\": 6343, \"forum_id\": 8, \"post_subject\": \"Re: Dynamically create a record structure\", \"username\": \"wjblack\", \"post_text\": \"Thanks Richard. I went with the child dataset idea although I'm facing one other problem to display the tokens of the second child programmatically. I'm using STD.STr.SplitWords to split the child attribute values based on the ^SF^ delimiter. As it stands the values lastname, firstname, grade, gpa are duplicated for both children.\\n\\n\\nIMPORT Std;\\n\\ncurrRec := RECORD\\n UNICODE id;\\n\\tUNICODE person_ct;\\n UNICODE person_list;\\nEND;\\n\\nds := DATASET([{'12345','2','Smith^SF^John^SF^100^SF^4.0^SF^Black^SF^Henry^SF^100^SF^3.8'}],currRec);\\n\\nChildRec := record \\n unsigned child_num;\\n unicode lastname;\\n unicode firstname;\\n unicode grade;\\n unicode gpa;\\nend;\\n\\nParentRec := RECORD\\n UNICODE id;\\n UNICODE person_ct;\\n DATASET(ChildRec) Persons;\\nEND;\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t \\nPROJECT(ds,TRANSFORM(ParentRec,\\n\\t\\t JustNames := LEFT.person_list[1.. LENGTH(LEFT.person_list)];\\n\\t\\t Tokens := STD.STr.SplitWords((string)JustNames,'^SF^');\\n\\t\\t Child_ct := (unsigned)LEFT.person_ct;\\n\\t\\t SELF.Persons := normalize(dataset([{JustNames}],{string str}),\\n\\t\\t Child_ct,\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t \\n TRANSFORM(ChildRec,\\n self.child_num := counter;\\t\\t\\t\\t\\n\\t\\t\\t self.lastname := Tokens[1];\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\t\\t\\t self.firstname := Tokens[2];\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\t\\t\\t self.grade := Tokens[3];\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\t\\t\\t self.gpa := Tokens[4];\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\t\\t\\t SELF := LEFT)),\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n SELF := LEFT));\\n
\", \"post_time\": \"2019-02-23 16:40:22\" },\n\t{ \"post_id\": 24543, \"topic_id\": 6343, \"forum_id\": 8, \"post_subject\": \"Re: Dynamically create a record structure\", \"username\": \"rtaylor\", \"post_text\": \"wjblack,\\n\\nRather than a dynamic RECORD structure, you might want to look at using a nested child dataset, like this:IMPORT Std;\\ncurrRec := RECORD\\n UNICODE id;\\n UNICODE name_list;\\nEND;\\n\\nds := DATASET([{'12345','[Smith|Will|J$Patel|Sandeep|R]'},\\n {'23456','[John|Joe|Sue$Smith|Deepak|Q]'}\\n ],currRec);\\nCrec := {UNICODE name};\\nOutRec := RECORD\\n UNICODE id;\\n DATASET(Crec) Names;\\nEND;\\n\\nPROJECT(ds,TRANSFORM(OutRec,\\n JustNames := LEFT.name_list[2.. LENGTH(LEFT.name_list)-1];\\n SELF.Names := DATASET(Std.Uni.SplitWords(JustNames,'|'),Crec),\\n SELF := LEFT));\\n
\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-02-22 14:11:54\" },\n\t{ \"post_id\": 24533, \"topic_id\": 6343, \"forum_id\": 8, \"post_subject\": \"Re: Dynamically create a record structure\", \"username\": \"Allan\", \"post_text\": \"Hi WjBlack,\\n\\nI'm sure this is possible, but can you clarify some things for progres to be made.\\n\\n1. Is the input known (i.e. the list of names) known at compile time or runtime?\\n2. There is some defined structure to the input, like the names always gets presented in triplets of firstname,middlename and surname, without gaps? (i.e. there are place holders where parts of the name do not exist)\\nYours\\nAllan\", \"post_time\": \"2019-02-22 13:53:47\" },\n\t{ \"post_id\": 24523, \"topic_id\": 6343, \"forum_id\": 8, \"post_subject\": \"Dynamically create a record structure\", \"username\": \"wjblack\", \"post_text\": \"Is there a way to dynamically create a record structure? For example if I have a dataset with a list of names I want to keep as one record. Something like:\\n\\n\\ncurrRec := record\\n unicode id;\\n unicode name_list;\\nend;\\n\\nds := dataset([{'12345','[Smith|Will|J$Patel|Sandeep|R]'}],currRec);\\n
\\n\\nI'd like to transform it to a single dataset like. The dataset could have 1-N of names in the list.\\n\\n\\nexport currRec := record\\n unicode id;\\n unicode last_1;\\n unicode first_1;\\n unicode middle_1;\\n unicode last_2;\\n unicode first_2;\\n unicode middle_2;\\nend;\\n\\nds := dataset([{'12345','Smith','Will','J','Patel','Sandeep','R']'}],currRec);\\n
\", \"post_time\": \"2019-02-21 21:15:26\" },\n\t{ \"post_id\": 24663, \"topic_id\": 6353, \"forum_id\": 8, \"post_subject\": \"Re: data cleansing ,facing challenging\", \"username\": \"suleman Shreef\", \"post_text\": \"hi,\\nRichard i have done small project in hpcc can you check , if you send your email i will forward my hppc Project.\", \"post_time\": \"2019-02-28 15:22:31\" },\n\t{ \"post_id\": 24653, \"topic_id\": 6353, \"forum_id\": 8, \"post_subject\": \"Re: data cleansing ,facing challenging\", \"username\": \"suleman Shreef\", \"post_text\": \"Thank you Richard superb thinking\", \"post_time\": \"2019-02-28 14:54:35\" },\n\t{ \"post_id\": 24643, \"topic_id\": 6353, \"forum_id\": 8, \"post_subject\": \"Re: data cleansing ,facing challenging\", \"username\": \"rtaylor\", \"post_text\": \"Suleman,\\n\\nOK, I wasn't happy with the LOOP solution, because it would involve PROJECTing through the same set of records multiple times, and so, could be relatively inefficient on large datasets.\\n\\nSo, here's an alternative that I would expect to be more efficient:
IMPORT STD;\\n\\nLayout:=record\\n UNSIGNED1 cid;\\n STRING Company_Name;\\nEND;\\n\\nCompRec:=DATASET([{1,'infoTech Private Ltd'},\\n {2,'infoTech Pvt Ltd'},\\n {3,'gate private'}],\\n Layout);\\n\\nlookRecs:=DATASET([{'Pvt','Private'},{'Ltd','Limited'},{'gate','fred'}],\\n {STRING findWord,STRING replaceWord});\\n\\n//DICTIONARY Solution\\nReplaceDCT := DICTIONARY(LookRecs,{findWord => replaceWord});\\n\\nReplaceFunc(STRING s) := FUNCTION\\n rec := {STRING w};\\n DSwords := DATASET(Std.Str.SplitWords(s,' '),rec);\\n P := PROJECT(DSWords,\\n TRANSFORM(rec,\\n SELF.w := IF(LEFT.w IN ReplaceDCT,\\n ReplaceDCT[LEFT.w].replaceWord,\\n LEFT.w)));\\n RETURN ROLLUP(P,TRUE,\\n TRANSFORM(rec,SELF.w := TRIM(LEFT.w + ' ' + RIGHT.w,LEFT)))[1].w;\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t \\nEND;\\n\\nProjRecs := PROJECT(CompRec,\\n TRANSFORM(layout,\\n SELF.Company_Name := ReplaceFunc(LEFT.Company_Name),\\n SELF := LEFT));\\nOUTPUT(ProjRecs,NAMED('DICTIONARY_Solution'));
\\nThis example uses a DICTIONARY for the replacement words, allowing a single pass through the CompRec dataset.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-02-28 13:00:16\" },\n\t{ \"post_id\": 24633, \"topic_id\": 6353, \"forum_id\": 8, \"post_subject\": \"Re: data cleansing ,facing challenging\", \"username\": \"suleman Shreef\", \"post_text\": \"Thanks a lot Richard,\\nyour code is helped me to achieve my expectation \\n\\nRegards,\\nSuleman Shreef\", \"post_time\": \"2019-02-28 10:16:25\" },\n\t{ \"post_id\": 24603, \"topic_id\": 6353, \"forum_id\": 8, \"post_subject\": \"Re: data cleansing ,facing challenging\", \"username\": \"rtaylor\", \"post_text\": \"suleman,\\n\\nHere's one way to approach the problem: \\nIMPORT STD;\\n\\nLayout:=record\\n UNSIGNED1 cid;\\n STRING Company_Name;\\nEND;\\n\\nCompRec:=DATASET([{1,'infoTech Private Ltd'},\\n {2,'infoTech Pvt Ltd'},\\n {3,'gate private'}],\\n Layout);\\n\\nlookRecs:=DATASET([{'Pvt','Private'},{'Ltd','Limited'},{'gate','fred'}],\\n {STRING findWord,STRING replaceWord});\\n\\nReplaceCnt := COUNT(LookRecs); //how many words to replace\\nLoopBody(DATASET(Layout) ds,INTEGER C) := \\n PROJECT(ds,\\n TRANSFORM(Layout,\\n Rec := lookRecs[C]; //which replace words to use this time\\n SELF.Company_Name := Std.Str.FindReplace(LEFT.Company_Name,\\n rec.findWord,\\n rec.replaceWord),\\n SELF := LEFT));\\nLOOP(CompRec,ReplaceCnt,LoopBody(ROWS(LEFT),COUNTER));
\\nThe LOOP will run the PROJECT as many times as you have words to replace, always working with the result of the previous LOOP iteration. So it will replace one word at a time in each record.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-02-27 14:16:06\" },\n\t{ \"post_id\": 24593, \"topic_id\": 6353, \"forum_id\": 8, \"post_subject\": \"data cleansing ,facing challenging\", \"username\": \"suleman Shreef\", \"post_text\": \"hi all,\\n\\nit is very challenging one to make data standardization,\\ni have done find replace 'infoTech Pvt Ltd' to 'infoTech private Ltd'.in first transform\\ni thought that is not perfect,it can it possible to with single transform like,''infoTech private Limited',\\n\\nmy inline data like this.\\nIMPORT STD.Str;\\n\\nLayout:=record\\n UNSIGNED1 cid;\\n STRING Company_Name;\\nend;\\n\\nCompRec:=DATASET([{1,'infoTech private Ltd'},\\n\\t\\t {2,'infoTech Pvt Ltd'},\\n\\t\\t {3,'gate private'}],\\n\\t\\t Layout);\\n\\t\\t\\t\\t\\t\\t\\t\\n\\nlookRecSet:=DATASET([{'pvt','private'},\\n {'Ltd','Limited'}}],\\n\\t\\t {STRING findWord,\\n\\t\\t STRING replaceWord}\\n\\t\\t );\\n\\n\\n//THIS JOIN,will only do the FIND REPLACE FOR SINGLE WORD\\nJoinLookup := JOIN(CompRec,\\n lookRecSet,\\n\\t\\t//regexfind(RIGHT.findWord,LEFT.cname,nocase),\\n Str.FindWord(LEFT.Company_Name,RIGHT.findWord,TRUE),\\n TRANSFORM(RECORDOF(LEFT), \\n\\t\\t\\tSELF.Company_Name:=Str.FindReplace(LEFT.Company_Name,RIGHT.findWord,RIGHT.replaceWord);\\n\\t\\t\\tSELF:=LEFT;\\n\\t\\t ),\\n\\tALL);\\n\\n\\n JOIN(JoinLookup,\\n lookRecSet,\\n Str.FindWord(LEFT.Company_Name,RIGHT.findWord,TRUE),\\n TRANSFORM(RECORDOF(LEFT), \\n \\t SELF.Company_Name:=Str.FindReplace(LEFT.Company_Name,RIGHT.findWord,RIGHT.replaceWord);\\n \\t SELF:=LEFT;\\n \\t\\t),\\n \\t ALL\\t\\t\\t\\t\\t\\n \\t);\\n-- any can suggest, how it possible,thanks\", \"post_time\": \"2019-02-27 10:38:55\" },\n\t{ \"post_id\": 24623, \"topic_id\": 6363, \"forum_id\": 8, \"post_subject\": \"Re: HPCC Architecture Performance\", \"username\": \"mkellyhpcc\", \"post_text\": \"Hi,\\nI will try to help. But can you send your ECL or some pseudo-code of your ECL to help me fully understand ?\\nIf you are running on Thor and really on Thor (not hThor) then I would say there\\nis a separate connection from each Thor slave. But I really wat to understand more before I feel I can answer accurately.\\nthanks,\\nmark\", \"post_time\": \"2019-02-27 18:34:00\" },\n\t{ \"post_id\": 24613, \"topic_id\": 6363, \"forum_id\": 8, \"post_subject\": \"HPCC Architecture Performance\", \"username\": \"wjblack\", \"post_text\": \"All,\\n\\nI'm trying to determine if the HPCC Architecture is playing a role in the performance degradation I'm seeing with one implementation. We have an ECL module that makes a POST call to a Java plugin that in turn makes a POST call a public facing end point. We are restricted from going out to a public network from the HPCC environment so from the Java plugin I've implemented it in such a way that it hops over to an AWS network that can reach the public network. While it is performing a double hop/POST it's able to securely reach the public facing end point and return the result. I'm seeing some degradation in performance and I'm trying to determine all steps in this setup that will be causing slowness. Regarding the HPCC Architecture and given the above mentioned setup how would network traffic flow out of the HPCC space and into the AWS realm? Would all the traffic go out through the master since it dishes out the slave and worker nodes? Some other way?\", \"post_time\": \"2019-02-27 16:56:20\" },\n\t{ \"post_id\": 24733, \"topic_id\": 6373, \"forum_id\": 8, \"post_subject\": \"Re: Join Options(Partition left and Partition Right)\", \"username\": \"Daniel_mani\", \"post_text\": \"Thanks for the Clarification Richard..\\n\\nRegards,\\nManikandan N\", \"post_time\": \"2019-03-05 12:54:51\" },\n\t{ \"post_id\": 24713, \"topic_id\": 6373, \"forum_id\": 8, \"post_subject\": \"Re: Join Options(Partition left and Partition Right)\", \"username\": \"rtaylor\", \"post_text\": \"Manikandan N,\\n\\nHere's the example: If you're joining by "lastname, firstname" then in order for the JOIN to work, all the "Tom Jones" records from both datasets have to be on the same node together. That means JOIN moves data around the nodes as it needs to in order to accomplish the task. \\n\\nPARTITION LEFT (the default behavior) says that the distribution of the data from both datasets is determined by the LEFT dataset, while PARTITION RIGHT says that the distribution of the data from both datasets is determined by the RIGHT dataset. \\n\\nSo, if you're JOINing a 10 Billion record dataset to a 20 Million record dataset, then the most even distribution of all data from both datasets would be determined by the larger dataset. Generally, you would make that one the LEFT dataset and go with the default partitioning, but if you have some particular need for that larger file to be the RIGHT dataset, then you should specify PARTITION RIGHT on the JOIN.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-03-04 14:42:11\" },\n\t{ \"post_id\": 24673, \"topic_id\": 6373, \"forum_id\": 8, \"post_subject\": \"Join Options(Partition left and Partition Right)\", \"username\": \"Daniel_mani\", \"post_text\": \"Hi Team,\\nCan anyone explain the functionality of Partition Left and Right with an example.\\n\\n\\nThanks & Regards,\\n\\nManikandan N\", \"post_time\": \"2019-03-04 08:06:33\" },\n\t{ \"post_id\": 24783, \"topic_id\": 6403, \"forum_id\": 8, \"post_subject\": \"Re: Difference Between IF and IFF\", \"username\": \"Daniel_mani\", \"post_text\": \"Thanks a lot Richard..\\n\\nRegards,\\nManikandan N\", \"post_time\": \"2019-03-07 10:07:48\" },\n\t{ \"post_id\": 24763, \"topic_id\": 6403, \"forum_id\": 8, \"post_subject\": \"Re: Difference Between IF and IFF\", \"username\": \"rtaylor\", \"post_text\": \"Manikandan N,\\n\\nI'm quoting Gavin Halliday from this post: https://hpccsystems.com/bb/viewtopic.php?f=8&t=1491&p=8422&hilit=IFF#p8422\\n... IF() only tries to ensure that only the correct branch of the IF() is executed if the type of the results is an action or a dataset. For scalars (and rows) both branches may be executed. \\n\\nTo ensure that the other branch isn't executed you can use IFF(). ...
\\nSo, the bottom line is that IFF is used where you need to ensure that only the True response is evaluated if the expression is TRUE and only the False response is evaluated if the expression is FALSE. \\n\\nTake a look at the referenced thread for the full discussion of that particular real-world problem that using IFF instead of IF solved.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-03-06 15:16:06\" },\n\t{ \"post_id\": 24753, \"topic_id\": 6403, \"forum_id\": 8, \"post_subject\": \"Difference Between IF and IFF\", \"username\": \"Daniel_mani\", \"post_text\": \"Hi Team,\\n\\n\\nCan say What is the Difference Between the IF and IFF. Where we can use IF and IFF With example?\\n\\nRegards,\\nManikandan N\", \"post_time\": \"2019-03-06 08:34:24\" },\n\t{ \"post_id\": 24883, \"topic_id\": 6433, \"forum_id\": 8, \"post_subject\": \"Re: failing the test case,using assert\", \"username\": \"suleman Shreef\", \"post_text\": \"thanks Richard\", \"post_time\": \"2019-03-08 04:41:38\" },\n\t{ \"post_id\": 24853, \"topic_id\": 6433, \"forum_id\": 8, \"post_subject\": \"Re: failing the test case,using assert\", \"username\": \"rtaylor\", \"post_text\": \"Suleman,\\n\\nIt works on my environment, but it needs to be run on hThor (Thor will error out on it).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-03-07 19:24:47\" },\n\t{ \"post_id\": 24813, \"topic_id\": 6433, \"forum_id\": 8, \"post_subject\": \"Re: failing the test case,using assert\", \"username\": \"suleman Shreef\", \"post_text\": \"hi, richard can you suggest this is right or any other approach\\n\\nIMPORT std.File;\\n\\n\\nds2:=DATASET(['sulaiman::subfile::onlinenamephonesupd2',\\n'sulaiman::subfile::onlinenamephonesupd1',\\n'sulaiman1::subfile::nothing',\\n'sulaiman1::subfile::verything'],{varstring name1});\\n\\nJoinDs:=JOIN(File.LogicalfileList(namepattern := 'sulaiman*'),\\n\\t\\tds2,\\n\\t\\tleft.name=right.name1,\\n\\t\\tright outer);\\nassert(JoinDs,name=name1,'this files is missing=>'+name1);\\n\\n\\nthanks and regards,\\nSuleman\", \"post_time\": \"2019-03-07 13:01:14\" },\n\t{ \"post_id\": 24803, \"topic_id\": 6433, \"forum_id\": 8, \"post_subject\": \"failing the test case,using assert\", \"username\": \"suleman Shreef\", \"post_text\": \"Hi, team\\ni have logical file list out of that i didn't,remaining two file is available or not\\nand i need to return the those two file which is not available,using assert\\nthis list\\nsulaiman::contactinfo::contactinfo.csv\\nsulaiman::companyinfo::companyinfo1.csv\\nsulaiman::labexecise1::personsinfo\\nsulaiman::labexecise1::uniquestatecityrecords\\nsulaiman::stringmodi::fullname.csv\\nsulaiman::empoyeeinfo::empdetails.csv\\n\\nand missing file which is in doubt\\n\\nmovieslenstask2::sulaiman::incoming::movies.csv \\nmovieslenstask2::sulaiman::incoming::ratings.csv \\n\\necl code is\\nimport std.File;\\nIMPORT File.IncomingLogicalFiles;\\n//File.LogicalfileList(namepattern := 'sulaiman*');\\n// IncomingLogicalFiles.CompanyDay1LogicalName;\\nFileListRecSet:=TABLE(File.LogicalfileList(namepattern := 'sulaiman*'),{name});\\nFileListRecSet(name in [IncomingLogicalFiles.CompanyDay1LogicalName]);\", \"post_time\": \"2019-03-07 11:14:22\" },\n\t{ \"post_id\": 24943, \"topic_id\": 6443, \"forum_id\": 8, \"post_subject\": \"Re: accent character handling\", \"username\": \"suleman Shreef\", \"post_text\": \"Thank you so much Richard, I done same thing what you said and its working fine,\\nonce again thank you so much for your support.\", \"post_time\": \"2019-03-09 01:11:01\" },\n\t{ \"post_id\": 24923, \"topic_id\": 6443, \"forum_id\": 8, \"post_subject\": \"Re: accent character handling\", \"username\": \"rtaylor\", \"post_text\": \"Suleman,\\n\\nDid you add the LOCALE option to your RECORD structure? \\n\\nI can try to duplicate your problem if you give me your code with inline data (not a reference to files that I can't get to).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-03-08 14:32:59\" },\n\t{ \"post_id\": 24903, \"topic_id\": 6443, \"forum_id\": 8, \"post_subject\": \"Re: accent character handling\", \"username\": \"suleman Shreef\", \"post_text\": \"But,Richard\\neven though it is not working.. \", \"post_time\": \"2019-03-08 05:44:27\" },\n\t{ \"post_id\": 24873, \"topic_id\": 6443, \"forum_id\": 8, \"post_subject\": \"Re: accent character handling\", \"username\": \"rtaylor\", \"post_text\": \"Suleman,\\n\\nTry changing your UNICODE data type to UTF8 and set the LOCALE on the RECORD structure (as specified in ISO standards 639 and 3166) to the correct code to support your character set.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-03-07 19:30:59\" },\n\t{ \"post_id\": 24823, \"topic_id\": 6443, \"forum_id\": 8, \"post_subject\": \"accent character handling\", \"username\": \"suleman Shreef\", \"post_text\": \"hi team ,\\nmy sources file type is text in that character set is "ISO 8859-1"\\nwhen I open that file it shows all accent characters looks fine, \\nI spray that file using format "utf8" after that while reading logical file\\nunable to see the actual value.\\nNote:using the layout datatype is "utf8",\\nthis is my sources\\n\\n1,Agro Oils SRL - Buenos Aires\\n2,Agro Oils SRL - Buenos Aires\\n3,Ambiente-Recuperação de Materiais de Plásticos SA\\n4,欢迎\\n \\nthis is my ecl code\\n
Layout := RECORD\\nUNSIGNED4 UserId;\\nUNICODE NAME;\\nEND;\\n\\nUtf8File := DATASET('~sathish::file_utf8::file_utf8.txt', Layout, CSV);\\n// westernFile := DATASET('~sathish::file_western::file_westerneuro.txt', Layout, CSV);\\nWesternFile := DATASET('~sathish::file_westerneuro::file_westerneuro.txt', Layout, CSV);\\nAnsiFile := DATASET('~sathish::file_ansi::file_ansi.txt', Layout, CSV);\\n\\nOUTPUT(Utf8File,,NAMED('Utf8File'));\\nOUTPUT(WesternFile,,NAMED('WesternFile'));\\nOUTPUT(AnsiFile,,NAMED('AnsiFile'));
\", \"post_time\": \"2019-03-07 13:50:47\" },\n\t{ \"post_id\": 25093, \"topic_id\": 6483, \"forum_id\": 8, \"post_subject\": \"Re: How to heading the header on sub file\", \"username\": \"suleman Shreef\", \"post_text\": \"Richard, \\n this normal filtration,I though they will be chance of having the build in function.\", \"post_time\": \"2019-03-12 05:20:39\" },\n\t{ \"post_id\": 24993, \"topic_id\": 6483, \"forum_id\": 8, \"post_subject\": \"Re: How to heading the header on sub file\", \"username\": \"rtaylor\", \"post_text\": \"suleman,\\n\\nYou could just use a simple filter, like this:\\nMySuperFileData := MySuperFile(SomeField <> 'HeaderText');
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-03-11 15:37:06\" },\n\t{ \"post_id\": 24953, \"topic_id\": 6483, \"forum_id\": 8, \"post_subject\": \"How to heading the header on sub file\", \"username\": \"suleman Shreef\", \"post_text\": \"Hi,\\nRichard, how to hide a first row in all "Sub file" in "Super file",\\nif I use CSV(HEADING(1)) while reading dataset , it only hide first row of sub file,\\nbut remaining row from other "sub file" does not reflect,if any best way to do this..?\\n\\n\\nthanks.\", \"post_time\": \"2019-03-09 01:29:27\" },\n\t{ \"post_id\": 25113, \"topic_id\": 6513, \"forum_id\": 8, \"post_subject\": \"Re: Fuzzy Match\", \"username\": \"JimD\", \"post_text\": \"You can also use the metaphone library support in the Standard Library to create "sounds like" fuzzy matching. \\n\\nSee http://cdn.hpccsystems.com/releases/CE- ... f#page=111\\n\\nHTH,\\n \\nJim\", \"post_time\": \"2019-03-12 14:39:21\" },\n\t{ \"post_id\": 25083, \"topic_id\": 6513, \"forum_id\": 8, \"post_subject\": \"Re: Fuzzy Match\", \"username\": \"suleman Shreef\", \"post_text\": \"Richard, I have done using string build in function, but based on matched score like 80 matched need to select.\", \"post_time\": \"2019-03-12 05:15:40\" },\n\t{ \"post_id\": 25003, \"topic_id\": 6513, \"forum_id\": 8, \"post_subject\": \"Re: Fuzzy Match\", \"username\": \"rtaylor\", \"post_text\": \"Suleman,\\n\\nHere are a couple of ways:IMPORT Std;\\nCompanyRecSet:= DATASET([{'HPCC system','101 Sussex Street'},\\n {'HPCC','101 Sussex Street'},\\n {'HPCC Systems Inc','101 Sussex Street'},\\n {'HPCsystems Limited','201 Sussex Street'},\\n {'AET','1900 West Loop South, Suite 920'},\\n {'AET UK LIMITED','1900 West Loop South, Suite 920'}],\\n {STRING CompanyName,STRING Address});\\n\\nCompanyRecSet;\\nCompanyRecSet(Std.Str.StartsWith(CompanyName,'HPCC'));\\nCompanyRecSet(Std.Str.WildMatch(CompanyName,'*System*',TRUE));
The Standard Library Reference contains the documentation for these (and many other) string handling functions. Press F1 in the ECL IDE and you'll see that book is part of the online Help file.\\n\\nBTW, you will note that I changed your VARSTRINGs to STRINGs. In HPCC, STRING is the default string type. VARSTRING is only used for data coming in or going out that has/needs actual null terminators -- internally within HPCC there is no advantage to using VARSTRING over STRING.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-03-11 15:52:53\" },\n\t{ \"post_id\": 24983, \"topic_id\": 6513, \"forum_id\": 8, \"post_subject\": \"Fuzzy Match\", \"username\": \"suleman Shreef\", \"post_text\": \"Hi,\\nRichard, there is any way to do fuzzy matched and return the full company name.\\nreturn the full company name and address\\nall most all row are same. \\n\\n\\nCompanyRecSet:= dataset([{'HPCC system','101 Sussex Street'},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t{'HPCC','101 Sussex Street'},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t{'AET','1900 West Loop South, Suite 920'},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t{'AET UK LIMITED','1900 West Loop South, Suite 920'}],\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t{varstring ComapnyName,varstring Address});\\n\\nCompanyRecSet;
\\n\\n\\nthanks and regard \\nSuleman shreef\", \"post_time\": \"2019-03-11 12:13:45\" },\n\t{ \"post_id\": 25333, \"topic_id\": 6533, \"forum_id\": 8, \"post_subject\": \"Re: Install ECL bundle on server and IMPORT\", \"username\": \"Rekhagr@123\", \"post_text\": \"hi All,\\n\\nwhen i was trying to display graph in local am getting following issue :\\n\\neclcc 2081 Import names unknown module "Visualizer" 19 1 \\n\\nError eclcc 2081 Import names unknown module "Demo" 13 2 \\n\\nError eclcc 2167 Unknown identifier "Demo" 8 4 \\n\\nError eclcc 2167 Unknown identifier before "." (expected :=)\", \"post_time\": \"2019-03-19 05:29:54\" },\n\t{ \"post_id\": 25283, \"topic_id\": 6533, \"forum_id\": 8, \"post_subject\": \"Re: Install ECL bundle on server and IMPORT\", \"username\": \"JimD\", \"post_text\": \"This error is due to a denial of write access to the folder from which you are running it. You can run cmd prompt as an administrator. \\n\\nhttp://cdn.hpccsystems.com/releases/CE- ... pdf#page=7\\n\\nHTH, \\nJim\", \"post_time\": \"2019-03-18 14:14:22\" },\n\t{ \"post_id\": 25273, \"topic_id\": 6533, \"forum_id\": 8, \"post_subject\": \"Re: Install ECL bundle on server and IMPORT\", \"username\": \"Rekhagr@123\", \"post_text\": \"hi All,\\n\\nwhile am trying to install ecl bundle install https://github.com/hpcc-systems/Visualizer.git\\n getting error : Visualizer cannot be parsed as a bundle\\n\\nAccess is denied\\n\\nwhat is the issue?how can i resolve this.please update me on same.\", \"post_time\": \"2019-03-18 12:17:32\" },\n\t{ \"post_id\": 25203, \"topic_id\": 6533, \"forum_id\": 8, \"post_subject\": \"Re: Install ECL bundle on server and IMPORT\", \"username\": \"lpezet\", \"post_text\": \"Perfect!\\n\\nThanks a lot!\", \"post_time\": \"2019-03-13 16:25:44\" },\n\t{ \"post_id\": 25143, \"topic_id\": 6533, \"forum_id\": 8, \"post_subject\": \"Re: Install ECL bundle on server and IMPORT\", \"username\": \"JimD\", \"post_text\": \"To make a bundle available from the playground, you must install it to the ECLCCserver node. \\n\\nNote: It must be installed as the hpcc user.\\n\\n
\\n\\nHTH, \\nJim\", \"post_time\": \"2019-03-12 15:05:33\" },\n\t{ \"post_id\": 25073, \"topic_id\": 6533, \"forum_id\": 8, \"post_subject\": \"Install ECL bundle on server and IMPORT\", \"username\": \"lpezet\", \"post_text\": \"Hello!\\n\\nLast conference I remember someone telling me it's possible to install an ECL Bundle on [forgot...but I'm thinking the node with the ECL Agent?] and being able to IMPORT that bundle in ECL Watch "Playground" (and consequently use it there).\\n\\nFor testing purposes, I'm using HPCC VM. I did install a bundle, for example:\\necl bundle install https://github.com/hpcc-systems/DataPatterns.git
\\nand it seemed to have succeeded. The following does show that bundle:\\necl bundle list
\\nbut I can't IMPORT it in ECL Watch Playground (error "Import names unknown module DataPatterns").\\n\\nNow my exact use case is a little different. I'm trying to run ECL code by calling eclplus but I'm thinking in the end it might be the same thing.\\n\\nIs it possible to install ECL Bundles server-side (i.e. on (specific) node(s)) and to leverage those via eclplus/Playground vs. installing those bundles client-side (i.e. locally)?\\n\\nThanks!\", \"post_time\": \"2019-03-12 05:06:13\" },\n\t{ \"post_id\": 25233, \"topic_id\": 6573, \"forum_id\": 8, \"post_subject\": \"Re: PRODUCT(GROUP,<field>) in the TABLE built-in\", \"username\": \"Allan\", \"post_text\": \"Salini gave me this:\\n\\nROUND(EXP(SUM(GROUP,LN(dds.value))));\\n
\\n\\nIts based on https://viralpatel.net/blogs/row-data-multiplication-in-oracle/\", \"post_time\": \"2019-03-14 17:07:04\" },\n\t{ \"post_id\": 25223, \"topic_id\": 6573, \"forum_id\": 8, \"post_subject\": \"PRODUCT(GROUP,<field>) in the TABLE built-in\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nSimple question:\\nThere is available to the TABLE built-in\\nSUM(GROUP,<field>)\\n
\\nthere is no:\\nPRODUCT(GROUP,<field>)\\n
\\nAny way to achieve that using TABLE cross-tab?\\nYours\\nAllan\", \"post_time\": \"2019-03-14 15:31:26\" },\n\t{ \"post_id\": 25253, \"topic_id\": 6583, \"forum_id\": 8, \"post_subject\": \"Re: HTTP Headers from Roxie Query\", \"username\": \"anthony.fishbeck\", \"post_text\": \"I don't think it's currently possible to access those headers via ECL. Open a JIRA at https://track.hpccsystems.com if you'd like to see that feature added.\", \"post_time\": \"2019-03-15 14:05:37\" },\n\t{ \"post_id\": 25243, \"topic_id\": 6583, \"forum_id\": 8, \"post_subject\": \"HTTP Headers from Roxie Query\", \"username\": \"francisco_escher\", \"post_text\": \"Hello,\\n\\nI am wondering if it is possible to read http headers from the request sent to a roxie query from the query itself.\\n\\nThanks!\", \"post_time\": \"2019-03-14 19:31:49\" },\n\t{ \"post_id\": 25513, \"topic_id\": 6603, \"forum_id\": 8, \"post_subject\": \"Re: FORMATXML\", \"username\": \"rtaylor\", \"post_text\": \"ravishankar,\\n\\nThe XPATHs in your RECORD structures are the problem. It works correctly this way:layout := RECORD\\n STRING arrString{XPATH('')}; // get self..\\nEND;\\nrLex := RECORD\\n STRING dappIDD{XPATH('test:request/dapp:IDD')}; \\n STRING AfterTimeStamp\\n {XPATH('test:request/dapp:Request/dapp:RecordReqGetRec/dapp:AfterTimeStamp')};\\n DATASET(Layout) StatusCodes\\n {XPATH('test:request/dapp:Request/dapp:RecordReqGetRec/dapp:StatusCodes/arr:string')};\\nEND;\\nreq := '<ROW>'+\\n '<test:request> '+\\n '<dapp:IDD>16212321321j321n3kj21j3kn213</dapp:IDD>'+\\n '<dapp:Request>'+\\n '<dapp:RecordReqGetRec>'+\\n '<dapp:AfterTimeStamp>2018-03-11</dapp:AfterTimeStamp>'+\\n '<dapp:StatusCodes>'+\\n '<arr:string>NFFF</arr:string>'+\\n '<arr:string>NPPP</arr:string>'+\\n '</dapp:StatusCodes>'+\\n '</dapp:RecordReqGetRec>'+\\n '</dapp:Request>'+\\n '</test:request>'+\\n '</ROW>'; \\n \\n FROMXML(rLex,req);\\n request1 := DATASET([FROMXML(rLex,req)],rLex);\\n request1;
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-03-26 20:18:34\" },\n\t{ \"post_id\": 25303, \"topic_id\": 6603, \"forum_id\": 8, \"post_subject\": \"FORMATXML\", \"username\": \"ravishankar\", \"post_text\": \"I am writing ECL SOAPCALL where I need to pass a array of string in the SOAP request. I am not getting appropriate response. However I tried FORMATXML to see the request XML is framed correctly from my layout definition. While parsing the node 'arr:string', I am not getting the XML node values in the FORMAT XML output. \\n\\nCould you please look in ?\\n\\n\\nlayout := RECORD\\n\\t\\tSTRING\\t\\t\\tarrString\\t\\t\\t \\t\\t\\t\\t\\t\\t{\\tXPATH('arr:string'\\t\\t\\t\\t\\t\\t\\t\\t)\\t}\\t; \\nEND ;\\n\\n\\tarrstring := RECORD\\n\\t\\tDATASET(layout) arrStatusstr {\\tXPATH('arr:string'\\t\\t\\t\\t\\t\\t\\t\\t)\\t}\\t ; \\n\\tEND ;\\t\\n\\n\\trStatusCodes\\t\\t:= \\tRECORD\\n\\t\\tSTRING\\t\\t\\tAfterTimeStamp\\t\\t\\t \\t{\\tXPATH('dapp:AfterTimeStamp'\\t\\t)\\t}\\t\\t\\t; \\n\\t\\tarrstring\\t\\tStatusCodes\\t\\t\\t\\t\\t\\t{ XPATH('dapp:StatusCodes'\\t\\t\\t\\t)\\t}\\t\\t\\t\\t\\t;\\n\\tEND\\t;\\n\\t\\n\\trRecordReqGetRec\\t\\t:= \\tRECORD\\n\\t\\trStatusCodes\\t\\t\\tRecordReqGetRec \\t\\t\\t\\t\\t\\t\\t\\t\\t\\t{\\tXPATH('dapp:RecordReqGetRec'\\t\\t)\\t}\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t;\\t\\t\\t\\t\\t\\t\\t\\t\\n\\tEND\\t;\\n\\t\\n\\trLex\\t\\t\\t:= \\tRECORD\\n\\t\\trRecordReqGetRec\\t\\t \\tRequest\\t\\t \\t\\t{\\tXPATH('dapp:Request'\\t\\t\\t\\t\\t\\t\\t\\t)\\t}\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t;\\n\\t\\tSTRING \\t\\t\\tdappIDD \\t\\t\\t\\t\\t\\t\\t\\t{\\tXPATH('dapp:IDD'\\t\\t\\t\\t\\t\\t\\t\\t\\t)\\t}\\t\\t\\t;\\t\\n\\tEND\\t;\\n\\n\\trRequest\\t:= \\tRECORD\\n\\t\\trLex\\t\\t\\t\\tRequest\\t\\t\\t\\t\\t\\t\\t\\t\\t{\\tXPATH('test:request'\\t\\t\\t\\t\\t\\t\\t\\t)\\t}\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t;\\n\\tEND\\t;\\n\\t\\n\\t\\nreq := '<ROW>'+\\n\\t\\t\\t'<test:request> '+\\n\\t\\t\\t\\t'<dapp:IDD>16212321321j321n3kj21j3kn213</dapp:IDD>'+\\n\\t\\t\\t\\t'<dapp:Request>'+\\n\\t\\t\\t\\t\\t'<dapp:RecordReqGetRec>'+\\n\\t\\t\\t\\t\\t'<dapp:AfterTimeStamp>2018-03-11</dapp:AfterTimeStamp>'+\\n\\t\\t\\t\\t\\t'<dapp:StatusCodes>'+\\n\\t\\t\\t\\t\\t\\t'<arr:string>NFFF</arr:string>'+\\n\\t\\t\\t\\t\\t\\t'<arr:string>NPPP</arr:string>'+\\n\\t\\t\\t\\t\\t'</dapp:StatusCodes>'+\\n\\t\\t\\t\\t'</dapp:RecordReqGetRec>'+\\n\\t\\t\\t'</dapp:Request>'+\\n\\t\\t'</test:request>'+\\n'</ROW>';\\t\\n\\t\\n\\trequest1 := DATASET([FROMXML(rRequest,req)],rRequest);\\n\\trequest1;
\", \"post_time\": \"2019-03-18 16:54:57\" },\n\t{ \"post_id\": 25463, \"topic_id\": 6613, \"forum_id\": 8, \"post_subject\": \"Re: visualizer issue\", \"username\": \"gsmith\", \"post_text\": \"Import names unknown module "Visualizer"
\\n\\nThis sounds like the Visualizer bundle did not install correctly on your machine. When installing you _may_ need to run the commands as "admin" (IOW in an elevated cmd prompt).\", \"post_time\": \"2019-03-25 15:02:28\" },\n\t{ \"post_id\": 25353, \"topic_id\": 6613, \"forum_id\": 8, \"post_subject\": \"visualizer issue\", \"username\": \"Rekhagr@123\", \"post_text\": \"hi,\\nwhen am trying to show graph generated from ecl file.am getting issue as follows:\\n\\nImport names unknown module "Visualizer" \\n\\nError eclcc 2081 Import names unknown module "Demo" \\n\\nError eclcc 2167 Unknown identifier "Demo" \\n\\nError eclcc 2167 Unknown identifier before "." (expected :=) \\n\\n"demo is file name "\\nplease give solution for this issue.\", \"post_time\": \"2019-03-19 07:24:39\" },\n\t{ \"post_id\": 25503, \"topic_id\": 6633, \"forum_id\": 8, \"post_subject\": \"Re: error while using #EXPAND\", \"username\": \"omnibuzz\", \"post_text\": \"Thank you, Richard. I got the idea. What I gave was a contrived self-contained example to explain the issue. I have a more complex transformation to be done on a string constant.\\n\\nCheers\\nSrini\", \"post_time\": \"2019-03-25 20:28:11\" },\n\t{ \"post_id\": 25493, \"topic_id\": 6633, \"forum_id\": 8, \"post_subject\": \"Re: error while using #EXPAND\", \"username\": \"rtaylor\", \"post_text\": \"Srini,\\n\\nSince you already have the fieldnames in a string constant, why not just add a set of field names to the code, like this:Rec := {STRING aaa,STRING bbb,STRING ccc};\\n\\nfieldList := 'aaa,bbb,ccc';\\nfieldSet := ['aaa','bbb','ccc'];\\n\\nds := DATASET([{ 'PORTLY', 'STUART' , '39'},\\n { 'PORTLY', 'STACIE' , '36'},\\n { 'PORTLY', 'DARA' , ' 1'},\\n { 'PORTLY', 'GARRETT', ' 4'}], Rec);\\n \\nmac_fn2(inds,infl) := FUNCTIONMACRO\\n RETURN TABLE(inds,{#expand(infl)});\\nENDMACRO;\\n\\nmac_fn2(ds,fieldSet[1]);\\nmac_fn2(ds,fieldSet[1]+','+fieldSet[3]);
\\nThat way you could have the best of both worlds.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-03-25 20:00:38\" },\n\t{ \"post_id\": 25483, \"topic_id\": 6633, \"forum_id\": 8, \"post_subject\": \"Re: error while using #EXPAND\", \"username\": \"rtaylor\", \"post_text\": \"Srini,is there a list of functions that are constant folded?
Not that I know of. \\n\\nIn this case I think it has to do with the context and not the function itself. \\n\\nPassing a string constant to the FUNCTIONMACRO is the key here. And in this case that constant is just truncated by the Find function's return value.\\n\\nIOW, these both work for me:mac_fn2(ds,fieldList[1..Str.Find(fieldList,',',1)-1]);\\nmac_fn2(ds,fieldList[1..Str.Find(fieldList,',',2)-1]);
So the constant value requirement for Template Language is met by passing in a string constant to the FUNCTIONMACRO and the compiler treats the calculated range as a constant expression (and probably does the calculation) at compile time.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-03-25 15:57:10\" },\n\t{ \"post_id\": 25473, \"topic_id\": 6633, \"forum_id\": 8, \"post_subject\": \"Re: error while using #EXPAND\", \"username\": \"omnibuzz\", \"post_text\": \"Thank you, Richard. That is awesome. \\nMay be I missed it in the documentation, but is there a list of functions that are constant folded?\\n\\nCheers\\nSrini\", \"post_time\": \"2019-03-25 15:16:03\" },\n\t{ \"post_id\": 25453, \"topic_id\": 6633, \"forum_id\": 8, \"post_subject\": \"Re: error while using #EXPAND\", \"username\": \"rtaylor\", \"post_text\": \"Srini,\\n\\nThis works for me:Rec := {STRING aaa,STRING bbb,STRING ccc};\\n\\nfieldList := 'aaa,bbb,ccc';\\n\\nds := DATASET([{ 'PORTLY', 'STUART' , '39'},\\n { 'PORTLY', 'STACIE' , '36'},\\n { 'PORTLY', 'DARA' , ' 1'},\\n { 'PORTLY', 'GARRETT', ' 4'}], Rec);\\n \\nmac_fn2(inds,infl) := FUNCTIONMACRO\\n RETURN TABLE(inds,{#expand(infl)});\\nENDMACRO;\\n\\nIMPORT Std.Str;\\n// firstfield := Str.SplitWords(fieldList,',')[1];\\n// mac_fn2(ds,firstfield);\\nmac_fn2(ds,fieldList[1..Str.Find(fieldList,',',1)-1]);
I think the difference is the Find can resolve to a string constant and the SplitWords to a dynamic set.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-03-25 14:24:28\" },\n\t{ \"post_id\": 25423, \"topic_id\": 6633, \"forum_id\": 8, \"post_subject\": \"error while using #EXPAND\", \"username\": \"omnibuzz\", \"post_text\": \"I am trying to use #EXPAND to evaluate a constant expression and it works fine.\\n\\nRec := {STRING a,STRING b,STRING c};\\n\\nfieldList := 'a,b,c';\\n\\nds := DATASET([{ 'PORTLY', 'STUART' , '39'},\\n { 'PORTLY', 'STACIE' , '36'},\\n { 'PORTLY', 'DARA' , ' 1'},\\n { 'PORTLY', 'GARRETT', ' 4'}], Rec);\\n\\t\\t\\t\\t\\t\\t\\t\\n\\nmac_fn(inds,infl) := FUNCTIONMACRO\\n\\tRETURN TABLE(inds,{#expand(infl)});\\nENDMACRO;\\nmac_fn(ds,fieldlist);;\\n
\\n\\nWhen I try to call a deterministic function on a constant and pass that to #EXPAND, it complains. Is there a way to force the compiler to constant fold firstfield? \\n\\n\\nRec := {STRING a,STRING b,STRING c};\\n\\nfieldList := 'a,b,c';\\n\\nds := DATASET([{ 'PORTLY', 'STUART' , '39'},\\n { 'PORTLY', 'STACIE' , '36'},\\n { 'PORTLY', 'DARA' , ' 1'},\\n { 'PORTLY', 'GARRETT', ' 4'}], Rec);\\n\\t\\t\\t\\t\\t\\t\\t\\nmac_fn2(inds,infl) := FUNCTIONMACRO\\n\\tRETURN TABLE(inds,{#expand(infl)});\\nENDMACRO;\\n\\nIMPORT Std.Str;\\nfirstfield := Str.SplitWords(fieldList,',')[1];\\nmac_fn2(ds,firstfield);\\n
\\n\\n-Srini\", \"post_time\": \"2019-03-24 13:09:52\" },\n\t{ \"post_id\": 25553, \"topic_id\": 6673, \"forum_id\": 8, \"post_subject\": \"Re: Error:CMemoryBufferSerialStream::get read past end of st\", \"username\": \"omnibuzz\", \"post_text\": \"Sure. Thanks, Richard.\\nRegards\\nSrini\", \"post_time\": \"2019-04-02 14:15:51\" },\n\t{ \"post_id\": 25543, \"topic_id\": 6673, \"forum_id\": 8, \"post_subject\": \"Re: Error:CMemoryBufferSerialStream::get read past end of st\", \"username\": \"rtaylor\", \"post_text\": \"Srini,\\n\\nSince it works in Roxie but not in Thor, I can only suggest a JIRA report requesting Thor support for DATASET parameters.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-04-02 13:49:19\" },\n\t{ \"post_id\": 25533, \"topic_id\": 6673, \"forum_id\": 8, \"post_subject\": \"Error:CMemoryBufferSerialStream::get read past end of stream\", \"username\": \"omnibuzz\", \"post_text\": \"Hi,\\n I published the code given below in Thor \\n\\nInChildRec := RECORD\\n\\tSTRING20 Child1;\\nEND;\\n\\nInRec := RECORD\\n\\tSTRING20 Col1;\\n\\tDATASET(InChildRec) Children;\\nEND;\\n\\n\\nDATASET(InRec) InDs := DATASET([],InRec) :STORED('InputData');\\n\\nOUTPUT(InDs, NAMED('barf'));\\n
\\n\\nand it throws the exception:\\n\\n\\nException\\n Reported by: eclagent\\n Message: System error: -1: Graph[1], workunitread[2]: SLAVE #1 [XXX.XXX.XXX.XXX:PPPPP]: CMemoryBufferSerialStream::get read past end of stream (20,8), \\n
\\n\\nThe same code works fine in Roxie. I guess it is not allocating space for storing the dataset while compiling. Can you help?\\n\\nCheers\\nSrini\", \"post_time\": \"2019-04-02 12:56:59\" },\n\t{ \"post_id\": 26073, \"topic_id\": 6763, \"forum_id\": 8, \"post_subject\": \"Re: Divide the dataset into time windows\", \"username\": \"vzeufack\", \"post_text\": \"Thanks very much Allan!\", \"post_time\": \"2019-05-03 14:06:16\" },\n\t{ \"post_id\": 26033, \"topic_id\": 6763, \"forum_id\": 8, \"post_subject\": \"Re: Divide the dataset into time windows\", \"username\": \"Allan\", \"post_text\": \"Hi vzeufack,\\n\\nI saw your post and it reminded me of a very similar problem I had a long time back.\\n\\nOne that RICHARD also solved for me.\\n\\nSee post:\\nhttps://hpccsystems.com/bb/viewtopic.php?f=10&t=3383 \\n\\nIts a bit different in that I needed to group all records that were within some time period (a year I think). So one record could end up in two groups.\\n\\nNote quite what you want but an interesting problem and interesting solution.\\nYours\\nAllan\", \"post_time\": \"2019-05-03 11:11:08\" },\n\t{ \"post_id\": 25643, \"topic_id\": 6763, \"forum_id\": 8, \"post_subject\": \"Re: Divide the dataset into time windows\", \"username\": \"vzeufack\", \"post_text\": \"Thanks very much RTAYLOR!\\n\\nIt works perfectly although your proposition changes a bit my logic. Indeed I was trying to know how to implement the following logic in ECL, which would be done using a loop in languages like Java:\\n\\n- Get the time of the first record\\n- compute time difference with next records until the difference exceeds an 1h\\n- group\\n- Repeat starting with next record.\\n\\nI think the logic of you proposed is to group records according to their hours right?\\n\\nSo concretely, if the first record has timestamp 12:10:15, then I would like to group all records within an hour from 12:10:15. Then I would get the next record which would have timestamp 13:10:15 or greater and do the same process. How can I achieve that?\", \"post_time\": \"2019-04-10 21:43:41\" },\n\t{ \"post_id\": 25603, \"topic_id\": 6763, \"forum_id\": 8, \"post_subject\": \"Re: Divide the dataset into time windows\", \"username\": \"rtaylor\", \"post_text\": \"vzeufack,\\n\\nAssuming your time data is similar to our Time_t format, you could simply add a grouping field, like this:IMPORT Std;\\ndatalayout := RECORD\\n STRING lineId;\\n Std.Date.Date_t date;\\n Std.Date.Time_t time; //integer time in HHMMSS format\\n STRING eventId;\\n STRING eventTemplate;\\nEND;\\n\\nds := DATASET([{'1',20190101, 12300,'A','ABC'},\\n {'2',20190101, 15300,'B','ABC'},\\n {'3',20190101,123300,'C','ABC'},\\n {'4',20190101,125300,'D','ABC'},\\n {'5',20190101,172300,'E','ABC'},\\n {'6',20190101,175300,'F','ABC'}\\n ], datalayout);\\n\\nHrGrp(Std.Date.Time_t t) := TRUNCATE(t/10000);\\n\\nPROJECT(ds,\\n TRANSFORM({datalayout,UNSIGNED1 TimeGrp},\\n SELF.TimeGrp := HrGrp(LEFT.time),\\n SELF := LEFT));
Then use that grouping field however you need to, such as in the GROUP function, or the dedup condition for a ROLLUP or self-JOIN, or ... whatever you need.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-04-10 14:20:21\" },\n\t{ \"post_id\": 25593, \"topic_id\": 6763, \"forum_id\": 8, \"post_subject\": \"Divide the dataset into time windows\", \"username\": \"vzeufack\", \"post_text\": \"Hi,\\n\\nI have a dataset containing dates and times in each record. Now, I would like to divide the dataset into one hour windows. So, I would like smaller sets, such that the difference in time between the last and first record is at most one hour. I can easily compute the difference between times and know whether the difference is less or equal than an hour but I do not know how to construct the smaller sets. \\n\\nMy record looks like this:\\n\\ndatalayout := RECORD\\n STRING lineId;\\n Std.Date.Date_t date;\\n INTEGER time;\\n STRING eventId;\\n STRING eventTemplate;\\nEND;\\n\\ndataset := DATASET(dataPath, datalayout, THOR);\", \"post_time\": \"2019-04-10 13:43:31\" },\n\t{ \"post_id\": 25713, \"topic_id\": 6793, \"forum_id\": 8, \"post_subject\": \"Re: max ID\", \"username\": \"wjblack\", \"post_text\": \"Thanks Richard...\", \"post_time\": \"2019-04-15 14:03:30\" },\n\t{ \"post_id\": 25703, \"topic_id\": 6793, \"forum_id\": 8, \"post_subject\": \"Re: max ID\", \"username\": \"rtaylor\", \"post_text\": \"wjblack,\\n\\nYes, that is better than the ROLLUP solution. \\n\\nAnd, if your end result would be to JOIN my TABLE result back to your dsA dataset to get the rest of the fields in that max subID record, then this would also be better than that one, too, because it eliminates the JOIN and would get you straight to the result you wanted). Like this:LayoutA := RECORD\\n unsigned8 id;\\n integer8 subId;\\n\\tSTRING stuff;\\nEND;\\n\\ndsA := DATASET([{2,'100','A'},\\n {1,'111','B'},\\n {2,'120','C'},\\n {1,'221','D'},\\n {3,'100','E'},\\n {3,'325','F'}],LayoutA);\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n// tbl := TABLE(dsA,{id,MaxSubID := MAX(GROUP,SubID)},id);\\n// dsB := JOIN(dsA,tbl,\\n// LEFT.ID = RIGHT.ID AND LEFT.subID = RIGHT.MaxsubID,\\n// TRANSFORM(LEFT) );\\ndsB := DEDUP(SORT(dsA,id,-subId),id); //eliminates the JOIN\\n \\ndsB;
\\n\\nHTH,\\nRichard\", \"post_time\": \"2019-04-15 13:43:22\" },\n\t{ \"post_id\": 25693, \"topic_id\": 6793, \"forum_id\": 8, \"post_subject\": \"Re: max ID\", \"username\": \"wjblack\", \"post_text\": \"This is for my initial solution and not Richards. Better yet this works better and is less resource intensive.\\n\\ndsB := dedup(sort(dsA,id,-subId),id);
\", \"post_time\": \"2019-04-15 13:17:39\" },\n\t{ \"post_id\": 25683, \"topic_id\": 6793, \"forum_id\": 8, \"post_subject\": \"Re: max ID\", \"username\": \"rtaylor\", \"post_text\": \"wjblack,\\n\\nWhile your solution does work, it can be easier than that:TABLE(dsA,{id,MaxSubID := MAX(GROUP,SubID)},id);
A simple Crosstab report (covered in the Intro to ECL class, part 2 ).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-04-15 13:05:40\" },\n\t{ \"post_id\": 25673, \"topic_id\": 6793, \"forum_id\": 8, \"post_subject\": \"Re: max ID\", \"username\": \"wjblack\", \"post_text\": \"I figured it out.\\n\\n
\\ndsB := rollup(sort(dsA,id),\\n\\t\\t\\t\\t\\t\\t\\tleft.id = right.id,\\n\\t\\t\\t\\t\\t\\t\\ttransform(LayoutB,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tself.subId := if(left.id=right.id, max(left.subId,right.subId),right.subId);\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tself := left\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t),\\n\\t\\t\\t\\t\\t\\t local);\\n
\\n\\n1\\t221\\n2\\t120\\n3\\t325\", \"post_time\": \"2019-04-15 02:01:35\" },\n\t{ \"post_id\": 25663, \"topic_id\": 6793, \"forum_id\": 8, \"post_subject\": \"max ID\", \"username\": \"wjblack\", \"post_text\": \"How can I get the max 'subID' for each 'id' and create a second dataset dsB from it.\\n\\n\\nLayoutA := RECORD\\n unsigned8 id;\\n integer8 subId;\\nEND;\\n\\nLayoutB := RECORD\\n unsigned8 id;\\n integer8 subId;\\nEND;\\n\\ndsA := DATASET([ {2,'100'},\\n {1,'111'},\\n {2,'120'},\\n\\t\\t\\t\\t\\t\\t\\t\\t{1,'221'},\\n\\t\\t\\t\\t\\t\\t\\t\\t{3,'100'},\\n\\t\\t\\t\\t\\t\\t\\t\\t{3,'325'}],LayoutA);\\n
\\n\\nI need \\n\\n\\ndsA := DATASET([{1,'221'},\\n {2,'120'},\\n\\t\\t {3,'325'}],LayoutB);\\n
\", \"post_time\": \"2019-04-14 22:10:48\" },\n\t{ \"post_id\": 26023, \"topic_id\": 6833, \"forum_id\": 8, \"post_subject\": \"Re: Combining rows\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nThere are now YouTube Videos on ROLLUP:\\nhttps://www.youtube.com/watch?v=LE5HDegz5II&t=65s\\nhttps://www.youtube.com/watch?v=E-R138uuR3M\\n\\nThese might help.\\nAllan\", \"post_time\": \"2019-05-03 10:55:43\" },\n\t{ \"post_id\": 25793, \"topic_id\": 6833, \"forum_id\": 8, \"post_subject\": \"Re: Combining rows\", \"username\": \"vzeufack\", \"post_text\": \"Thanks very much Richard!\", \"post_time\": \"2019-04-18 18:36:17\" },\n\t{ \"post_id\": 25773, \"topic_id\": 6833, \"forum_id\": 8, \"post_subject\": \"Re: Combining rows\", \"username\": \"rtaylor\", \"post_text\": \"vzeufack,\\n\\nEither the TABLE or ROLLUP functions (both covered in the Intro to ECL Part 2 class) will do that:layout := RECORD\\n INTEGER id;\\n INTEGER value1;\\n INTEGER value2;\\nEND;\\nds := DATASET([ {1, 10, 5},\\n {1, 20, 0},\\n {2, 5, 10},\\n {2, 5, 0},\\n {2, 0, 15}\\n ],layout);\\nROLLUP(ds,LEFT.id=RIGHT.id,\\n TRANSFORM(layout,\\n SELF.value1 := LEFT.value1 + RIGHT.value1,\\n SELF.value2 := LEFT.value2 + RIGHT.value2,\\n SELF := LEFT));\\nTABLE(ds,\\n {id,\\n INTEGER value1 := SUM(GROUP,ds.value1),\\n INTEGER value2 := SUM(GROUP,ds.value2)},\\n id);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-04-18 15:36:59\" },\n\t{ \"post_id\": 25763, \"topic_id\": 6833, \"forum_id\": 8, \"post_subject\": \"Combining rows\", \"username\": \"vzeufack\", \"post_text\": \"Hi,\\n\\nLet's consider we have\\n\\nEXPORT layout := RECORD\\n INTEGER id;\\n INTEGER value1;\\n INTEGER value2;\\nEND;
\\n\\nHow to convert this:\\n\\nid value1 value2\\n 1 10 5\\n 1 20 0\\n 2 5 10\\n 2 5 0\\n 2 0 15
\\n\\nto:\\n\\nid value1 value2\\n 1 30 5\\n 2 10 25
\\n\\nBest,\", \"post_time\": \"2019-04-18 15:23:22\" },\n\t{ \"post_id\": 25903, \"topic_id\": 6873, \"forum_id\": 8, \"post_subject\": \"Re: STD.File.FileExists with nothor()\", \"username\": \"rati\", \"post_text\": \"Adding few to global() helped. \\n\\nglobal_newSuperfileLink := global(newSuperfileLink,few);
\", \"post_time\": \"2019-04-23 15:13:51\" },\n\t{ \"post_id\": 25853, \"topic_id\": 6873, \"forum_id\": 8, \"post_subject\": \"Re: STD.File.FileExists with nothor()\", \"username\": \"rtaylor\", \"post_text\": \"rati,\\n\\nThere is no inherent problem with using STD.File.FileExists inside a NOTHOR. I just tested this code on one of our training clusters and it ran fine:\\nimport Std;\\nLogicalFile := '~CLASS::RT::Intro::Persons';\\nNOTHOR(STD.File.FileExists(LogicalFile));
So your problem is not with that function per se. This code also works correctly:\\nimport Std;\\nds := DATASET([{'~CLASS::RT::Intro::Persons'},\\n {'~CLASS::RT::Intro::Accounts'},\\n {'~CLASS::RT::Intro::NoFileForThis'}\\n ],{STRING LogicalFile});\\n\\n nothor(PROJECT(ds,\\n TRANSFORM({BOOLEAN LogicalFileExists,STRING LogicalFile},\\n SELF.LogicalFileExists := STD.File.FileExists(left.LogicalFile),\\n SELF := left ))) : INDEPENDENT;\\n
\\nPerhaps it's an interaction with your use of the GLOBAL function?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-04-19 16:32:06\" },\n\t{ \"post_id\": 25823, \"topic_id\": 6873, \"forum_id\": 8, \"post_subject\": \"STD.File.FileExists with nothor()\", \"username\": \"rati\", \"post_text\": \"I am trying to use nothor() for a simple project() . \\n\\n\\nHere is my code :\\n----------------------SNIP---------------------------------------------------\\nglobal_newSuperfileLink := global(newSuperfileLink);\\n\\nnewSuperfileLinkCheckLogical := nothor(PROJECT(global_newSuperfileLink,\\n TRANSFORM(superfile_logicalfile_flat_layout,\\nSELF.LogicalFileExists := STD.File.FileExists(left.LogicalFile),\\nSELF := left ))) : INDEPENDENT;\\n\\nThen I use newSuperfileLinkCheckLogical for rest of the processing …..\\n----------------------SNIP---------------------------------------------------\\n\\n\\nThis gives me an error : Error: INTERNAL: Expected a parent/container context. Likely to be caused by executing something invalid inside a NOTHOR. (0, 0), 4818,\\n\\nI just want to check and record if those files actually exist.\", \"post_time\": \"2019-04-18 22:38:05\" },\n\t{ \"post_id\": 26093, \"topic_id\": 6913, \"forum_id\": 8, \"post_subject\": \"Re: Error while defining INTERFACE with child dataset\", \"username\": \"balajisampath\", \"post_text\": \"It worked after adding LINKCOUNTED\\n\\nRec := RECORD\\n STRING15 col1;\\n\\t UNSIGNED2 col2; \\n\\t LINKCOUNTED DATASET (Text_Segment_Rec) TextLineSegments {MAXLENGTH(5000)}; \\nEND;
\", \"post_time\": \"2019-05-06 09:57:33\" },\n\t{ \"post_id\": 26003, \"topic_id\": 6913, \"forum_id\": 8, \"post_subject\": \"Error while defining INTERFACE with child dataset\", \"username\": \"balajisampath\", \"post_text\": \"I am trying to create external libraries. My layouts/interface includes child datasets\\n\\nIs child dataset not supported in INTERFACE/LIBRARY or am I making some mistake. Please help\\n\\nText_Segment_Rec := RECORD\\n\\t\\tINTEGER3 LineSeq; \\n\\t\\tINTEGER3 segmentSeq; \\n STRING200 descText; \\n STRING20 DataType := ''; \\n STRING10 Code := '';\\nEND;\\n\\nText_Segment_Rec_Plus := RECORD\\n STRING15 Id;\\n\\t\\tUNSIGNED2 SeqNum;\\n\\t\\tText_Segment_Rec;\\nEND;\\n\\nRec := RECORD\\n STRING15 col1;\\n\\t UNSIGNED2 col2; \\n\\t DATASET (Text_Segment_Rec) TextLineSegments {MAXLENGTH(5000)}; \\nEND;\\n\\niface1( \\tDATASET(Rec) recIn = DATASET([],Rec)\\t) := INTERFACE\\n\\texport boolean Response;\\nEND;\\n\\nlib1 ( DATASET(Rec) recIn\\t) := MODULE,LIBRARY(iface1)\\n\\texport Response := true;\\nEND;\\n\\t\\n#WORKUNIT('name','lib1');\\nBUILD(lib1);
\\n\\nError: assert(!recordRequiresLinkCount(record)) failed - file: hqltcppc2.cpp, line 274 (0, 0), 3000,
\\n\\nIt works fine if the child dataset is removed from "Rec"\", \"post_time\": \"2019-04-30 14:06:10\" },\n\t{ \"post_id\": 26303, \"topic_id\": 6943, \"forum_id\": 8, \"post_subject\": \"Re: lost order when concat dataset using "+"\", \"username\": \"ghalliday\", \"post_text\": \"ok, I understand the issue. It will only occur on Thor.\\n\\nThe problem is that & only preserves local ordering. \\n\\nSay you have a 3 way thor with a dataset A with parts a1,a2,a3 and a dataset B with parts b1, b2, b3. Then the global ordering of A & B will be a1, b1, a2, b2, a3, b3.\\n\\nThis is because the parts on node 1 will be appended in order, then the parts on node 2, followed by the parts on node 3.\\n\\nIn your example the TABLE() statements will cause the rows to be distributed to different nodes. The order of the results, and the node they live on coming out of the TABLE() will depend on the size of the thor cluster.\\n\\nIt may have changed between versions because the default implementation of TABLE() may have changed (I haven't checked). In general there are no guarantees about the order or distribution of rows coming out of a (non local) TABLE.\", \"post_time\": \"2019-05-15 11:05:39\" },\n\t{ \"post_id\": 26293, \"topic_id\": 6943, \"forum_id\": 8, \"post_subject\": \"Re: lost order when concat dataset using "+"\", \"username\": \"ghalliday\", \"post_text\": \"Please report a jira - including which platform you are running against (hthor/thor/roxie). I tried your example, and didn't get the results you had, so I must have been doing something different.\\n\\nI had:\\nBlow ,3\\nSmith ,2\\nJoe ,5\\nJoe ,5\\nBlow ,3\\nSmith ,2\", \"post_time\": \"2019-05-15 08:49:58\" },\n\t{ \"post_id\": 26283, \"topic_id\": 6943, \"forum_id\": 8, \"post_subject\": \"Re: lost order when concat dataset using "+"\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nThis issue has been introduced between:\\n\\ncommunity_6.4.14-1 server internal_6.4.38-1 compiler unknown <= works\\neclide_7.2.0-rc4 server internal_7.0.18-rc1 Compiler 7.2.0 community_7.2.0.rc4 <= bug
\", \"post_time\": \"2019-05-15 08:18:49\" },\n\t{ \"post_id\": 26273, \"topic_id\": 6943, \"forum_id\": 8, \"post_subject\": \"Re: lost order when concat dataset using "+"\", \"username\": \"rtaylor\", \"post_text\": \"hhuang,\\n\\nI duplicated this. You need to submit a JIRA to report the issue.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-05-14 20:58:29\" },\n\t{ \"post_id\": 26263, \"topic_id\": 6943, \"forum_id\": 8, \"post_subject\": \"Re: lost order when concat dataset using "+"\", \"username\": \"bforeman\", \"post_text\": \"Will, \\nI think you might have stumbled on a compiler issue. I will ask the development team to look at this.\\n\\nThank You!\\n\\nBob\", \"post_time\": \"2019-05-14 20:57:01\" },\n\t{ \"post_id\": 26173, \"topic_id\": 6943, \"forum_id\": 8, \"post_subject\": \"Re: lost order when concat dataset using "+"\", \"username\": \"hhuang\", \"post_text\": \"Thank you Bob.\\n\\nI tired to replace "+" with "&", but it is still the same.\\nI also tired to concat record sets outside a function and it had no problem. Don't know why this happened.\\n\\nHere is my code:\\n\\n\\nLayout_Person := RECORD\\n UNSIGNED1 PersonID;\\n STRING15 FirstName;\\n STRING25 LastName;\\nEND;\\n\\nallPeople := DATASET([ {1,'Joe','Smith'},\\n {2,'Joe','Blow'},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t {3,'Joe','Blow'},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t {4,'Joe','Blow'},\\n {3,'Joe','Smith'}],Layout_Person);\\n\\n// Outputs ---\\nconcat_tb1(dataset(layout_person) dt) := function\\nt1 := table(dt,{string30 name:= dt.lastname, cnt:=count(group)},lastname);\\nt2 := table(dt,{string30 name:= dt.firstname, cnt:=count(group)},firstname);\\nreturn t1 & t2;\\nend;\\n\\nconcat_tb2(dataset(layout_person) dt) := function\\nt1 := table(dt,{string30 name:= dt.lastname, cnt:=count(group)},lastname);\\nt2 := table(dt,{string30 name:= dt.firstname, cnt:=count(group)},firstname);\\nreturn t2 & t1;\\nend;\\n\\n\\noutput(concat_tb1(allpeople));\\noutput(concat_tb2(allpeople));\\n\\n
\\n\\nI was expecting two different results:\\n\\nBlow \\t3\\nSmith \\t2\\nJoe \\t5\\n\\nand\\n\\nJoe \\t5\\nBlow \\t3\\nSmith \\t2\\n\\nBut it always generates the first one.\", \"post_time\": \"2019-05-09 18:08:29\" },\n\t{ \"post_id\": 26153, \"topic_id\": 6943, \"forum_id\": 8, \"post_subject\": \"Re: lost order when concat dataset using "+"\", \"username\": \"bforeman\", \"post_text\": \"Use the & operator instead of the + operator.\", \"post_time\": \"2019-05-09 17:43:28\" },\n\t{ \"post_id\": 26143, \"topic_id\": 6943, \"forum_id\": 8, \"post_subject\": \"lost order when concat dataset using "+"\", \"username\": \"hhuang\", \"post_text\": \"Hello,\\n\\nI want to concat two datasts as the return of a function, however I found that the order of the return dataset is always the same. Is there a way I can preserve the order when doing concatenation?\\n\\nThank you,\\nWill\\n\\n\\n\\nLayout_Person := RECORD\\n UNSIGNED1 PersonID;\\n STRING15 FirstName;\\n STRING25 LastName;\\nEND;\\n\\nallPeople := DATASET([ {1,'Fred','Smith'},\\n {2,'Joe','Blow'},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t {3,'Joe','Blow'},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t {4,'Joe','Blow'},\\n {3,'Jane','Smith'}],Layout_Person);\\n\\nsomePeople := allPeople(LastName = 'Smith');\\n\\n// Outputs ---\\nconcat_tb1(dataset(layout_person) dt) := function\\nt1 := table(dt,{string30 name:= dt.lastname, cnt:=count(group)},lastname);\\nt2 := table(dt,{string30 name:= dt.firstname, cnt:=count(group)},firstname);\\nreturn t1 + t2;\\nend;\\n\\nconcat_tb2(dataset(layout_person) dt) := function\\nt2 := table(dt,{string30 name:= dt.firstname, cnt:=count(group)},firstname);\\nt1 := table(dt,{string30 name:= dt.lastname, cnt:=count(group)},lastname);\\nreturn t2 + t1;\\nend;\\n\\noutput(concat_tb1(allpeople));\\noutput(concat_tb2(allpeople));\\n
\", \"post_time\": \"2019-05-08 20:08:25\" },\n\t{ \"post_id\": 26243, \"topic_id\": 6953, \"forum_id\": 8, \"post_subject\": \"Re: FROMJSON to DataSet\", \"username\": \"wjblack\", \"post_text\": \"Thanks for your help. I always do start with a search of the forum.\", \"post_time\": \"2019-05-13 19:41:13\" },\n\t{ \"post_id\": 26223, \"topic_id\": 6953, \"forum_id\": 8, \"post_subject\": \"Re: FROMJSON to DataSet\", \"username\": \"Allan\", \"post_text\": \"In addition, I always find it worth my while to search the Forum on say 'JSON'.\\nThere are other posts that might have helped.\\n\\nYours\\nAllan\", \"post_time\": \"2019-05-13 15:41:22\" },\n\t{ \"post_id\": 26213, \"topic_id\": 6953, \"forum_id\": 8, \"post_subject\": \"Re: FROMJSON to DataSet\", \"username\": \"Allan\", \"post_text\": \"Hi wjblack,\\n\\nDivide and conquer:\\nx := '{"childNames": [{"FN" : "Sasha"},{"FN":"Malia"}]}';\\n\\nPERSONRec := RECORD\\n STRING FN{xpath('FN')};\\nEND;\\n\\nPeopleRec := RECORD\\n DATASET(PERSONRec) childNames {xpath('childNames')};\\nEND;\\n\\nFROMJSON(PeopleRec,x);\\n
\\n\\nYours\\nAllan\", \"post_time\": \"2019-05-13 15:19:54\" },\n\t{ \"post_id\": 26183, \"topic_id\": 6953, \"forum_id\": 8, \"post_subject\": \"FROMJSON to DataSet\", \"username\": \"wjblack\", \"post_text\": \"How can I turn the json array into a child dataset verses a SET?\\n\\n\\nx := '{"childNames": ["Sasha Obama","Malia Obama"]}';\\n\\nPersonRec := RECORD\\n\\tDATASET({UNICODE name}) childNames {xpath('childNames')};\\nEND;\\n\\nline := FROMJSON(PersonRec,x);\\n\\nline;\\n
\", \"post_time\": \"2019-05-09 19:02:36\" },\n\t{ \"post_id\": 26553, \"topic_id\": 7003, \"forum_id\": 8, \"post_subject\": \"Re: Loading XML into a DATASET using PARSE.\", \"username\": \"Allan\", \"post_text\": \"Yea : Thanks, Richard.\\n\\nActually, it would be quite unusual to NOT to want to do some processing on ingested data, so XMLPROJECT and a TRANSFORM would almost always be needed anyway.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2019-05-21 10:43:40\" },\n\t{ \"post_id\": 26533, \"topic_id\": 7003, \"forum_id\": 8, \"post_subject\": \"Re: Loading XML into a DATASET using PARSE.\", \"username\": \"ghalliday\", \"post_text\": \"I think FROMXML may be closer to what you want.\", \"post_time\": \"2019-05-20 14:09:01\" },\n\t{ \"post_id\": 26503, \"topic_id\": 7003, \"forum_id\": 8, \"post_subject\": \"Re: Loading XML into a DATASET using PARSE.\", \"username\": \"Allan\", \"post_text\": \"The original post from 2017:\\nNameRec := RECORD\\n STRING Firstname{xpath('fname')};\\n STRING Lastname{xpath('surname')};\\nEND;\\n\\nnamesRec := RECORD\\n UNSIGNED4 EmployeeID{xpath('NI')};\\n DATASET(NameRec) names{xpath('name')}; //note the xpath\\nEND;\\n\\nPeopleRec := RECORD\\n DATASET(NamesRec) People{xpath('/People')};\\nEND;\\n\\nx := '<Row>'\\n+'<People>'\\n+'<NI>1234567</NI>'\\n+'<name><fname>Allan</fname><surname>Wrobel</surname></name>'\\n+'<name><fname>Anna</fname><surname>Smith</surname></name>'\\n+'<name><fname>Nina</fname><surname>Harrison</surname></name>'\\n+'</People>'\\n+'<People>'\\n+'<NI>98765</NI>'\\n+'<name><fname>Colin</fname><surname>Harison</surname></name>'\\n+'<name><fname>James</fname><surname>Wilson</surname></name>'\\n+'<name><fname>Nick</fname><surname>Pine</surname></name>'\\n+'</People>'\\n+'</Row>';\\nrec := FROMXML(PeopleRec,x);\\nrec;\\n
\\nWorks fine using FROMXML.\\n\\nI'm trying the equivalent (nested dataset) using PARSE.\\ne.g.\\ndd := DATASET([{x}],{STRING line});\\nPARSE(dd,line,PeopleRec,XML('Row'));\\n
\\n\\nI can use XMLPROJECT, but don't really need the flexibility or power of a TRANSFORM.\", \"post_time\": \"2019-05-20 13:39:28\" },\n\t{ \"post_id\": 26493, \"topic_id\": 7003, \"forum_id\": 8, \"post_subject\": \"Loading XML into a DATASET using PARSE.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\nFollowing up on a 2017 post on loading nested XML into a dataset:\\noriginal post)\\nBut replacing FROMXML with:\\n\\ndd := DATASET([{x}],{STRING line});\\nPARSE(dd,line,PeopleRec,XML('Row'));\\n
\\nFails syntex check with:\\nError: Need to supply a value for field 'People' (37, 15), 2170,
\\n\\nSo how does one load XML into a dataset using PARSE? \\nYours\\nAllan\\n\\nHum, There is XMLPROJECT and this can be done with a transform, but I should not need to go down the 'transform' route.\\n\\nXMLPROJECT works fine, but I don't really need the TRANSFORM\\n as I'm just setting fields in the record for the content of the tags in the xml file.\\nin the link.\", \"post_time\": \"2019-05-20 10:25:44\" },\n\t{ \"post_id\": 26863, \"topic_id\": 7123, \"forum_id\": 8, \"post_subject\": \"Re: System error: 3000\", \"username\": \"vzeufack\", \"post_text\": \"Problem solved!!!\\n\\nThat was my bad. I sprayed the dataset using delimited and referenced it as follows: EXPORT rawDS := DATASET(rawFilePath, layout, THOR);
\\nI switched THOR to CSV and now it working fine!!! \", \"post_time\": \"2019-06-29 00:26:31\" },\n\t{ \"post_id\": 26853, \"topic_id\": 7123, \"forum_id\": 8, \"post_subject\": \"Re: System error: 3000\", \"username\": \"vzeufack\", \"post_text\": \"I updated both my VM and my ECL IDE to the latest and I am still getting almost the same error:\\n\\n"Error: System error: 1301: Pool memory exhausted: pool id 4194304 exhausted, requested 3085 heap(1/4294967295) global(1/1216) WM(0..38) (in Disk Read G1 E2) (0, 0), 1301,"\", \"post_time\": \"2019-06-28 19:26:12\" },\n\t{ \"post_id\": 26833, \"topic_id\": 7123, \"forum_id\": 8, \"post_subject\": \"System error: 3000\", \"username\": \"vzeufack\", \"post_text\": \"Hi,\\n\\nI did the normal process to output a file: upload to cluster, reference it in ECL code, defined the dataset and then output the dataset. I never had an issue with that process until today. I am getting the following error:\\n\\n"System error: 3000: assert(required <= maxOffset) failed - file: rtlcommon.hpp, line 98 (in Disk Read G1 E2"\\n\\nHow to solve it?\\n\\nBest regards,\", \"post_time\": \"2019-06-28 15:42:29\" },\n\t{ \"post_id\": 26883, \"topic_id\": 7133, \"forum_id\": 8, \"post_subject\": \"Re: Cluster thor not listening for workunits\", \"username\": \"vzeufack\", \"post_text\": \"I downgraded to HPCCSystemsVM-amd64-7.2.22-1 and the error disappeared
\", \"post_time\": \"2019-07-04 21:49:54\" },\n\t{ \"post_id\": 26873, \"topic_id\": 7133, \"forum_id\": 8, \"post_subject\": \"Cluster thor not listening for workunits\", \"username\": \"vzeufack\", \"post_text\": \"Hi,\\n\\nI am running HPCCSystemsVM-amd64-7.2.24-1 on VirtualBox with the proper network settings (NAT for Adapter 1 and Host Only Adapter for Adapter 2). However, when I launch ECL watch from Mozilla, I get the following errors:\\n- Cluster thor not listening for workunits; thor.thor: queue active; \\n- Cluster thor_roxie not listening for workunits; thor_roxie.thor: queue active;\\n\\nYou can visualize it on this image: download/file.php?mode=view&id=1163\\n\\nThose errors are preventing me from uploading files to the cluster. Please how to resolve them?\\n\\nThanks,\\nVannel\", \"post_time\": \"2019-07-04 18:11:28\" },\n\t{ \"post_id\": 27103, \"topic_id\": 7203, \"forum_id\": 8, \"post_subject\": \"Re: Default Value in RECORD\", \"username\": \"rtaylor\", \"post_text\": \"Artur,\\n\\nThe TRANSFORM must supply a value for every field in the result RECORD structure. That's why you get the error. The DEFAULT field modifier works like this:
// Deprecated Layout\\nEXPORT lMyLayout01 := RECORD\\n STRING message {XPATH('messagge')};\\n UNSIGNED code {XPATH('code')};\\nEND;\\n\\n// New Record Layout\\nEXPORT lMyLayout02 := RECORD\\n STRING message2 {XPATH('Message'),DEFAULT('default')};\\n UNSIGNED code2 {XPATH('Code'),DEFAULT(42)};\\nEND;\\n\\n// Main record structure that uses both layouts.\\nEXPORT lMyFinalLayout := RECORD\\n lMyLayout01 MyError;\\n lMyLayout02 MyError2;\\nEND;\\n\\nr01 := ROW({'abc', 123}, lMyLayout01);\\n\\nt01 := ROW(TRANSFORM(lMyFinalLayout,\\n SELF.MyError := r01;\\n SELF := []));\\nr01;\\nt01;
Notice that I added the DEFAULT modifier to the fields, then added the SELF := [] to the TRANSFORM to to handle the errors. The DEFAULT values are what show up as the result of the SELF := [].\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-07-25 19:33:50\" },\n\t{ \"post_id\": 27093, \"topic_id\": 7203, \"forum_id\": 8, \"post_subject\": \"Re: Default Value in RECORD\", \"username\": \"abaruchi\", \"post_text\": \"Hi Richard, thanks for your reply.\\n\\n\\n// Deprecated Layout\\nEXPORT lMyLayout01 := RECORD\\n STRING message {XPATH('messagge')};\\n UNSIGNED code {XPATH('code')};\\nEND;\\n\\n// New Record Layout\\nEXPORT lMyLayout02 := RECORD\\n STRING message2 {XPATH('Message')};\\n UNSIGNED code2 {XPATH('Code')};\\nEND;\\n\\n// Main record structure that uses both layouts.\\nEXPORT lMyFinalLayout := RECORD\\n lMyLayout01 MyError;\\n lMyLayout02 MyError2 := [];\\nEND;\\n\\nr01 := ROW({'abc', 123}, lMyLayout01);\\n\\nt01 := ROW(TRANSFORM(lMyFinalLayout,\\n SELF.MyError := r01;));\\n
\\n\\nWhen I run this code, I got an error saying that I didn't provide values to MyError2.message2 and MyError2.code2. As far as I understand, since I defined MyError2 in lMyFinalLayout as an empty row, t01 transform shouldn't care about it.\\n\\nThanks,\\n\\nAtt.\\nArtur Baruchi\", \"post_time\": \"2019-07-25 13:53:19\" },\n\t{ \"post_id\": 27083, \"topic_id\": 7203, \"forum_id\": 8, \"post_subject\": \"Re: Default Value in RECORD\", \"username\": \"rtaylor\", \"post_text\": \"Artur,\\n\\nThe default value for a field in a RECORD structure requires use of the definition operator (:=) so your code example should be:EXPORT myLayout := RECORD\\n STRING oldvalue1;\\n STRING oldvalue2;\\n ....\\n STRING newValue1 := '';\\nEND
You should also look at possibly using the {DEFAULT('')} field modifier, like this:EXPORT myLayout := RECORD\\n STRING oldvalue1;\\n STRING oldvalue2;\\n ....\\n STRING newValue1{DEFAULT('')};\\nEND
\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-07-24 21:19:21\" },\n\t{ \"post_id\": 27073, \"topic_id\": 7203, \"forum_id\": 8, \"post_subject\": \"Default Value in RECORD\", \"username\": \"abaruchi\", \"post_text\": \"Hi,\\n\\nIs it possible to provide a Default Value for a given field in a RECORD statement? I need to add a new field in some layouts, which are already being used and I don't wanna to change old code, that already use this layout. So, my idea would be add a new field to this layout and set a default value for it and change it only when I use it in my new code.\\n\\nI tried this, but didn't worked:\\n\\nEXPORT myLayout := RECORD\\n STRING oldvalue1;\\n STRING oldvalue2;\\n ....\\n STRING newValue1 = '';\\nEND;\\n
\\n\\nBut every time try this, I see an error telling that I should provide a value to newValue1. \\n\\nThanks,\\n\\nAtt.\\nArtur Baruchi\", \"post_time\": \"2019-07-24 21:08:58\" },\n\t{ \"post_id\": 27243, \"topic_id\": 7233, \"forum_id\": 8, \"post_subject\": \"Re: Field Removal from Record Definition\", \"username\": \"rtaylor\", \"post_text\": \"Artur,Is it possible to remove one (or more) fields from a record definition that uses another record definition?
Yes. In the RECORD structure docs look at the section titled "Field Inheritance" and you'll see the exception list format, like this:\\nlMyLayout01 := RECORD\\n STRING field01;\\n STRING field02;\\n STRING fieldToRemove;\\nEND;\\n\\n\\n/*****\\n* Remove 'fieldToRemove' from lMyLayout01 here, so we would have, field01, field02 and field03.\\n*****/\\nlMyLayout02 := RECORD\\n lMyLayout01 AND NOT fieldToRemove;\\n STRING field03\\nEND;
\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-08-06 19:59:14\" },\n\t{ \"post_id\": 27233, \"topic_id\": 7233, \"forum_id\": 8, \"post_subject\": \"Field Removal from Record Definition\", \"username\": \"abaruchi\", \"post_text\": \"Hi Guys,\\n\\nIs it possible to remove one (or more) fields from a record definition that uses another record definition? \\n\\n\\nlMyLayout01 := RECORD\\n STRING field01;\\n STRING field02;\\n STRING fieldToRemove;\\nEND;\\n\\n\\n /*****\\n * Remove 'fieldToRemove' from lMyLayout01 here, so we would have, field01, field02 and field03.\\n *****/\\nlMyLayout02 := RECORD\\n lMyLayout01;\\n STRING field03\\nEND;\\n
\\n\\nIf Im not wrong, I saw something like this during the ECL course, but I wasn't able to find. \\n\\nThanks in advance,\\n\\nAtt.\\nArtur Baruchi\", \"post_time\": \"2019-08-06 19:37:14\" },\n\t{ \"post_id\": 27253, \"topic_id\": 7243, \"forum_id\": 8, \"post_subject\": \"Error: 0: pyembed: No module named 'numpy'\", \"username\": \"tlhumphrey2\", \"post_text\": \"The platform is 7.2.12 and I’m using python3.5 (I checked the version using an EMBED python3 function). My embedded python is the following:\\nIMPORT Python3;\\nstring packages_available() := EMBED(Python3)\\nimport numpy\\nreturn 'All packages are installed'\\nENDEMBED;\\n\\nOUTPUT(packages_available());
\\n\\nBut, I’m getting the error on the subject line. Does this mean that the shared object library for python3.5 needs to be updated? Or, am I missing plugins?\", \"post_time\": \"2019-08-06 21:06:22\" },\n\t{ \"post_id\": 27381, \"topic_id\": 7273, \"forum_id\": 8, \"post_subject\": \"Re: Interesting 'Daisy Chain' problem.\", \"username\": \"Allan\", \"post_text\": \"Thanks, Both of you.\\n\\nTony, Whatever they're paying you it's not enough.\\n\\nAllan\", \"post_time\": \"2019-08-20 08:06:50\" },\n\t{ \"post_id\": 27353, \"topic_id\": 7273, \"forum_id\": 8, \"post_subject\": \"Re: Interesting 'Daisy Chain' problem.\", \"username\": \"rtaylor\", \"post_text\": \"Tony,\\n\\nNice!! \\n\\nRichard\", \"post_time\": \"2019-08-19 15:14:15\" },\n\t{ \"post_id\": 27343, \"topic_id\": 7273, \"forum_id\": 8, \"post_subject\": \"Re: Interesting 'Daisy Chain' problem.\", \"username\": \"Tony Kirk\", \"post_text\": \"You're essentially looking for all those not referenced in next_state, right?\\n\\n
ds(state not in set(ds, next_state));
\\n\\nJOIN logic could be used, especially if ds is large.\", \"post_time\": \"2019-08-19 15:05:16\" },\n\t{ \"post_id\": 27333, \"topic_id\": 7273, \"forum_id\": 8, \"post_subject\": \"Re: Interesting 'Daisy Chain' problem.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nHere's how I would do it:rec := {UNSIGNED1 state, UNSIGNED1 next_state, STRING4 Task};\\nds := DATASET([{ 1, 2, 'Job1'},\\n { 2, 3, 'Job2'},\\n { 3, 0, 'Job3'},\\n { 55, 77, 'Job4'},\\n { 77, 22, 'Job5'},\\n { 22, 23, 'Job6'},\\n { 23, 0, 'Job7'},\\n { 4, 0, 'Job8'}],rec);\\nStartTbl := TABLE(ds,{ds,BOOLEAN start := FALSE});\\nFlagged := ITERATE(StartTbl,\\n TRANSFORM({StartTbl},\\n SELF.start := LEFT.next_state = 0,\\n SELF := RIGHT));\\nStarters := PROJECT(Flagged(start = TRUE),rec);\\nStarters;
\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-08-19 14:15:05\" },\n\t{ \"post_id\": 27323, \"topic_id\": 7273, \"forum_id\": 8, \"post_subject\": \"Interesting 'Daisy Chain' problem.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\nI have a series of tasks whose run sequence is dictated by a 'state' number. Runs can also occur in parallel. Each sequence is terminated with a 'next state' of zero.\\ne.g.\\n\\n\\nstate next-state Task\\n 1 2 Job1\\n 2 3 Job2\\n 3 0 Job3\\n 55 77 Job4\\n 77 22 Job5\\n 22 23 Job6\\n 23 0 Job7\\n 4 0 Job8\\n
\\nThis would run these jobs in parallel:\\n\\nJob1,Job2,Job3\\nJob4,Job5,Job6,Job7\\nJob8\\n
\\nThe daisy-chaining from one state to another is easy enough, my question is, given just this dataset, what ECL returns a list of all the start states.\\ne.g., in this case, the result would be 1,4,55 (I've deliberately mixed the numbers up as the numbers given to states may well not be in sequence)\\n\\nThere is LOOP, but I wondered if there was something a bit clever?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2019-08-19 13:24:48\" },\n\t{ \"post_id\": 27463, \"topic_id\": 7283, \"forum_id\": 8, \"post_subject\": \"Re: Call REST POST from ECL\", \"username\": \"anthony.fishbeck\", \"post_text\": \"Excellent. Thank you for posting your example. It's very helpful.\", \"post_time\": \"2019-08-22 15:46:26\" },\n\t{ \"post_id\": 27453, \"topic_id\": 7283, \"forum_id\": 8, \"post_subject\": \"Re: Call REST POST from ECL\", \"username\": \"brw\", \"post_text\": \"Thanks everyone.\\n\\nIt turns out to be pretty simple implementation in Python, I hope this helps someone else who is also struggling with the lack of built in Support for REST calls \\n\\nIn my case I am POSTing a semicolon delimited String containing muliple records and the Service returns a semicolon delimited String with a different layout to the Request.\\n\\nNOTE: Response_Layout has all fields as STRING, whereas JourneysIn_Layout has the correct types, so there is some subsequent code to project the result to a Typed Dataset, which I haven't shown here.\\n\\n
DATASET(Response_Layout) CallREST(STRING IP, UNSIGNED2 Port, DATASET(JourneysIn_Layout) JourneysIn) := EMBED(Python)\\n\\n\\timport httplib\\n\\timport base64\\n\\timport ssl\\n\\timport csv\\n\\timport StringIO\\n\\n\\tconn = httplib.HTTPConnection(IP, Port) \\n\\n\\theaders = {}\\n\\theaders['Content-Type'] = "text/csv"\\n\\t#headers['Authorization'] = \\\\\\n\\t# "Basic %s" % base64.standard_b64encode("admin:ibm")\\n\\n\\tStrWriter = StringIO.StringIO()\\n\\n\\tBodyWriter = csv.writer(StrWriter, delimiter=';')\\n\\n\\tfor Journey in JourneysIn:\\n\\t\\tBodyWriter.writerow(Journey)\\n\\n\\tBody = StrWriter.getvalue()\\n\\n\\treq = conn.request('POST', '/api/odin/enrich_journeys/hpcc', headers=headers, body=Body)\\n\\tres = conn.getresponse()\\n\\n\\tcsvLines = csv.reader(res.read().splitlines(), delimiter=';')\\n\\tfor csvLine in csvLines:\\n\\t\\t\\tyield tuple(csvLine)\\n\\nENDEMBED;
\", \"post_time\": \"2019-08-22 15:12:58\" },\n\t{ \"post_id\": 27443, \"topic_id\": 7283, \"forum_id\": 8, \"post_subject\": \"Re: Call REST POST from ECL\", \"username\": \"anthony.fishbeck\", \"post_text\": \"You might consider using embedded JavaScript (v8). I haven't tried it myself, but it may have some pretty convenient HTTP related functions built in. \\n\\nYou'll have to figure out the HTTP call stuff elsewhere, but you can find some simple embedded Javascript test cases here:\\n\\nhttps://github.com/hpcc-systems/HPCC-Pl ... mbedjs.ecl\\n\\nPython and JAVA are other options.\\n\\nAs mentioned, we do use our own internal C++ HTTP classes so you would need the headers and link libraries in place to use those. There are other c++ HTTP libraries you could use but would require installing dependencies.\", \"post_time\": \"2019-08-21 20:52:22\" },\n\t{ \"post_id\": 27423, \"topic_id\": 7283, \"forum_id\": 8, \"post_subject\": \"Re: Call REST POST from ECL\", \"username\": \"DSC\", \"post_text\": \"Unfortunately, I believe the platform uses internal code (read: no external libraries) for TCP/IP communication. Leveraging that is possible, but it would actually require an even bigger change to your dev environment because you would have to install the platform source code instead of just the libcurl headers. Given the choice, libcurl would be the smaller installation by a long shot.\\n\\n(For clarity: You would need to install the dev libcurl stuff only on your HPCC Systems node that is running eclccserver. You can get away with installing the non-dev/regular libcurl on all other HPCC Systems nodes. libcurl's headers are required only for compilation of your workunit.)\\n\\nIf you cannot change the environment then you may need to use Std.System.Util.CmdProcess() or PIPE() and invoke the curl binary to do what you need. Hopefully your distribution has the curl binary already installed, and in a standard location.\", \"post_time\": \"2019-08-21 19:13:23\" },\n\t{ \"post_id\": 27413, \"topic_id\": 7283, \"forum_id\": 8, \"post_subject\": \"Re: Call REST POST from ECL\", \"username\": \"brw\", \"post_text\": \"Thanks again Dan for the C++, but unfortunately it doesn't work on my environment presumably because I don't have libcurl dependencies installed and modifying our environment to add them is probably not an option.\\n\\nIs there a way to do this without the need to install any additional libraries - ie with just the libraries that come with HPCC as standard?\\n\\nWhich Libraries does the built in HPCC HTTPCall function use under the covers to call a REST GET, can I use the same libraries in a C++ HPCC function?\", \"post_time\": \"2019-08-21 16:55:20\" },\n\t{ \"post_id\": 27393, \"topic_id\": 7283, \"forum_id\": 8, \"post_subject\": \"Re: Call REST POST from ECL\", \"username\": \"DSC\", \"post_text\": \"The embedded version is far more performant. ECL is transpiled to C++, so there are no external environments or context switching involved with using an embedded C++ function. The function is simply included in the generated C++ source code.\\n\\nStd.System.Util.CmdProcess() and PIPE() invoke external applications by forking the current process and executing the binary in place of the fork. Data I/O has to be serialized, parsed, etc. There is more work and more system resources involved for every invocation.\\n\\nIf you're processing a lot of records then it will definitely pay off in the long run to use the libcurl version.\\n\\nDan\", \"post_time\": \"2019-08-20 11:38:19\" },\n\t{ \"post_id\": 27383, \"topic_id\": 7283, \"forum_id\": 8, \"post_subject\": \"Re: Call REST POST from ECL\", \"username\": \"brw\", \"post_text\": \"Thanks Dan,\\n\\nI managed to get a test working in my Local VM HPCC using the CmdProcess function (see ECL below).\\n\\nWhat are the Pros and Cons of this approach compared to the embedded C++ solution?\\n\\ncmd := 'curl -X POST http://10.243.5.1:8080/journey_lookup -H "Content-Type: application/json"';\\n\\nBody := '{"Journeys":[{"JourneyId":1206582,"Id":[1826],"XLongitude":[-84.22373],"YLatitude":[34.15089],"HorizontalSpeed":"OA==","ReverseGeoCodeRow":null,"PulseDateTimeUTCAsUnixEpoch":null,"RoadSpeed":null,"RoadSegId":null,"RoadTypeId":null,"Validated":null,"CountryId":null,"RoadCategory":[0],"BoundaryId":null,"NearestRoadDistance":null,"Proximity":null,"NearestRoadSegId":null}],"CalculateRailwayProximity":false,"RoadSpeedRule":"SmartPhone"}';\\n\\nSTD.System.Util.CmdProcess(cmd + ' -d \\\\'' + Body + '\\\\'', '');
\", \"post_time\": \"2019-08-20 10:54:31\" },\n\t{ \"post_id\": 27373, \"topic_id\": 7283, \"forum_id\": 8, \"post_subject\": \"Re: Call REST POST from ECL\", \"username\": \"DSC\", \"post_text\": \"Hi Ben,\\n\\nIf you're comfortable with C++ access to curl (technically, libcurl) then the following my give you a leg up on writing your own embedded function:\\n\\nIMPORT Std;\\n\\n#WORKUNIT('name', 'libcurl testing');\\n\\nSTRING CallCurl(CONST VARSTRING url) := EMBED(C++)\\n #option library curl\\n\\n #include <curl/curl.h>\\n\\n struct MemoryStruct\\n {\\n char* memory;\\n size_t size;\\n size_t capacity;\\n };\\n\\n static void InitMemoryStructCapacity(MemoryStruct& mem, size_t initCapacity)\\n {\\n mem.memory = static_cast<char*>(rtlMalloc(initCapacity));\\n mem.size = 0;\\n mem.capacity = initCapacity;\\n }\\n\\n static size_t CaptureIncomingReply(void* contents, size_t size, size_t nmemb, void* userp)\\n {\\n size_t incomingDataSize = size * nmemb;\\n MemoryStruct* mem = static_cast<struct MemoryStruct*>(userp);\\n\\n if (mem->size + incomingDataSize > mem->capacity)\\n {\\n size_t newCapacity = mem->capacity * 2;\\n\\n // Keep doubling capacity until it is greater than what we need\\n while (mem->size + incomingDataSize > newCapacity)\\n {\\n newCapacity *= 2;\\n }\\n\\n mem->memory = static_cast<char*>(rtlRealloc(mem->memory, newCapacity));\\n mem->capacity = newCapacity;\\n }\\n\\n memcpy(&(mem->memory[mem->size]), contents, incomingDataSize);\\n mem->size += incomingDataSize;\\n \\n return incomingDataSize;\\n }\\n\\n #body\\n\\n __lenResult = 0;\\n __result = NULL;\\n\\n CURL* curlHandle = curl_easy_init();\\n\\n if(curlHandle)\\n {\\n CURLcode curlResponseCode;\\n struct curl_slist* headers = NULL;\\n MemoryStruct captureBuffer;\\n\\n // Initialize our capture buffer to a reasonable size to avoid\\n // memory reallocation\\n InitMemoryStructCapacity(captureBuffer, 8196);\\n\\n // headers = curl_slist_append(headers, "Content-Type: application/json");\\n // curl_easy_setopt(curlHandle, CURLOPT_HTTPHEADER, headers);\\n\\n curl_easy_setopt(curlHandle, CURLOPT_URL, url);\\n curl_easy_setopt(curlHandle, CURLOPT_FOLLOWLOCATION, 1);\\n curl_easy_setopt(curlHandle, CURLOPT_NOPROGRESS, 1);\\n curl_easy_setopt(curlHandle, CURLOPT_WRITEFUNCTION, CaptureIncomingReply);\\n curl_easy_setopt(curlHandle, CURLOPT_WRITEDATA, static_cast<void*>(&captureBuffer));\\n\\n curlResponseCode = curl_easy_perform(curlHandle);\\n\\n if (curlResponseCode == CURLE_OK)\\n {\\n long httpResponseCode;\\n\\n curl_easy_getinfo(curlHandle, CURLINFO_RESPONSE_CODE, &httpResponseCode);\\n\\n if (httpResponseCode == 200 && captureBuffer.size > 0)\\n {\\n __lenResult = captureBuffer.size;\\n __result = captureBuffer.memory;\\n }\\n }\\n\\n curl_easy_cleanup(curlHandle);\\n curl_slist_free_all(headers);\\n }\\nENDEMBED;\\n\\n//-------\\n\\nCallCurl('http://example.com');\\n
\\nThis was extracted from a running project that uses libcurl for REST access. As written, it uses GET but obviously you can make curl execute a POST instead. I've left a couple of commented-out around (populating headers) that illustrate where that goes as well.\\n\\nOh, it does assume that you have a developer's version of libcurl installed (meaning, both the library and header files).\\n\\nHope this helps.\\n\\nDan\", \"post_time\": \"2019-08-19 17:50:16\" },\n\t{ \"post_id\": 27363, \"topic_id\": 7283, \"forum_id\": 8, \"post_subject\": \"Call REST POST from ECL\", \"username\": \"brw\", \"post_text\": \"I see from the help that HTTPCALL only supports GET and that there is an outstanding ticket to add the ability to call POST https://track.hpccsystems.com/browse/HPCC-9805.\\n\\nMy question is what is the workaround for this while we are waiting for this to be supported?\\n\\nCan a C++ ECL function be written to do this? Has anyone got an example of the C++ code I need to implement in an ECL function to do this?\\n\\nCan curl be called from the built in PIPE function or CmdProcess function?\\n\\nThanks\\n\\nBen\", \"post_time\": \"2019-08-19 17:04:36\" },\n\t{ \"post_id\": 27433, \"topic_id\": 7293, \"forum_id\": 8, \"post_subject\": \"Re: GMT to EST conversion in Version 7.2.28\", \"username\": \"DSC\", \"post_text\": \"Daylight savings is a very complicated issue, as its observance varies by both geography and local laws. Geography isn't part of the Std.Date.TimeZone module, so there is not a way of automatically determining if DST should be included or not.\\n\\nThere is a single function that can tell you if DST may be if effect: Std.Date.IsLocalDaylightSavingsInEffect(). However, its return value is only for the current date/time and is determined by the time zone set in the HPCC Systems cluster's operating system; it also does not take into account geography or local laws, nor does it allow you to determine DST for a random date/time.\\n\\nIf you can make a determination regarding DST prior to calling Std.Date.TimeZone.AdjustTimeTZ(), you can adjust your arguments to switch between 'EST' or 'EDT' for the toTimeZoneAbbrev parameter. Note that in some cases you need to supply a toLocation argument as well to differentiate between time zones with the same abbreviation (e.g. 'CST').\\n\\nHope this helps.\\n\\nDan\", \"post_time\": \"2019-08-21 19:29:21\" },\n\t{ \"post_id\": 27403, \"topic_id\": 7293, \"forum_id\": 8, \"post_subject\": \"GMT to EST conversion in Version 7.2.28\", \"username\": \"Anustiya\", \"post_text\": \"Hi,\\n\\nWhat is the best way to convert time from GMT to EST in ECL version 7.2.28.1?\\n\\nThere is a built-in function, which converts the given input time(in Time_t format) to the required TimeZone as given below\\n\\nIMPORT STD;\\nSTD.Date.TimeZone.AdjustTimeTZ(155026,'GMT','EST','','');\\n\\n//output : 105026
\\n\\nBut how can we get the actual converted time wrt Daylight Savings as we are not giving any specific date as input to the function?\", \"post_time\": \"2019-08-21 12:43:56\" },\n\t{ \"post_id\": 27503, \"topic_id\": 7303, \"forum_id\": 8, \"post_subject\": \"Re: Missing Records reading a Sprayed File\", \"username\": \"rtaylor\", \"post_text\": \"SChatman85,\\n\\nI agree that it is probably the single quote characters in your data causing the missing 131 records. The most likely reason is that the "missing" records are between the two "pairs" of single quotes. \\n\\nUsing the QUOTE('') option is your workaround, but I suggest you report this issue in JIRA (http://track.hpccsystems.com) and attach your data file to the report (if you can legally do so) to make it easy for the developers to duplicate the problem.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-09-03 15:22:17\" },\n\t{ \"post_id\": 27493, \"topic_id\": 7303, \"forum_id\": 8, \"post_subject\": \"Re: Missing Records reading a Sprayed File\", \"username\": \"SChatman85\", \"post_text\": \"I have carried out some further testing on this, and found that if I put the separator as it should be, and override QUOTE then I can get the correct record count.\\n\\nds_in := DATASET(inputFilename, in_layout, CSV(MAXLENGTH(2000000), SEPARATOR(','),terminator(['\\\\r\\\\n']),quote('')),OPT);
\\n\\n(If you Omit QUOTE, as per the documentation it will default to what was used during Spray)\\n\\nThe adjacent records in the file when viewed outside of HPCC did not contain any quotes - so not 100% certain why the records were lost but assume it is somehow related to the 4 ' marks within the data file I was using.\", \"post_time\": \"2019-09-03 14:51:59\" },\n\t{ \"post_id\": 27483, \"topic_id\": 7303, \"forum_id\": 8, \"post_subject\": \"Missing Records reading a Sprayed File\", \"username\": \"SChatman85\", \"post_text\": \"Hi,\\n\\nI've sprayed a file and have tried to read it using a basic definition:\\n\\nds_in := DATASET(inputFilename, in_layout, CSV(MAXLENGTH(2000000), SEPARATOR(','),terminator(['\\\\r\\\\n']),quote('"')),OPT);
\\n\\nWhen I run a count on this it is short of 131 records compared to the file I started with.\\n\\nIf I change the Separator to something random (such as *) then it returns the correct record count. I've carried out a JOIN to look for records that are missed, when I check them in the raw file there is nothing peculiar about them, or the preceding records.\\n\\nThere are no 'random' characters, no extra-line breaks, no stray Quote's (single or double)\\n\\nThis seems to be a bug with the CSV definition? Does anyone have any further suggestions I could follow to pinpoint the root cause of these records being missed from the Dataset?\", \"post_time\": \"2019-09-02 14:17:36\" },\n\t{ \"post_id\": 27533, \"topic_id\": 7323, \"forum_id\": 8, \"post_subject\": \"Re: Create an Empty Key Inline\", \"username\": \"rtaylor\", \"post_text\": \"Artur,\\n\\nWe have inline DATASET but there is no inline INDEX. DICTIONARY acts like an INDEX and can be defined inline, so perhaps that will get you what you need. \\n\\nOr you can create an empty INDEX like this:ds := DATASET([{''}],{STRING10 key});\\nIDX := INDEX(ds,{key},'~RTTEST::EmptyKey');\\nBUILD(IDX);
although I cannot think of an instance where that would be useful. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-09-10 13:47:45\" },\n\t{ \"post_id\": 27523, \"topic_id\": 7323, \"forum_id\": 8, \"post_subject\": \"Create an Empty Key Inline\", \"username\": \"abaruchi\", \"post_text\": \"Hi Guys,\\n\\nIs it possible to create an empty key inline to use in my code? I did't generated it already, but need it to perform some tests in my code.\\n\\nThanks,\\n\\nAtt.\\nArtur Baruchi\", \"post_time\": \"2019-09-10 13:39:15\" },\n\t{ \"post_id\": 27993, \"topic_id\": 7353, \"forum_id\": 8, \"post_subject\": \"Re: Merge datasets without sorting issue\", \"username\": \"jtaylor178\", \"post_text\": \"Thank you that seemed to work\", \"post_time\": \"2019-10-31 16:13:20\" },\n\t{ \"post_id\": 27673, \"topic_id\": 7353, \"forum_id\": 8, \"post_subject\": \"Re: Merge datasets without sorting issue\", \"username\": \"rtaylor\", \"post_text\": \"jtaylor178,\\n\\nDid you try the Append operator (&) or the Prefix Append Operator ( (+) ) (https://hpccsystems.com/training/documentation/ecl-language-reference/html/Record_SetRecord_Set_Operators.html)?\\n
\\n//Try this:\\nOrderedFlights := FormattedFlights1 & FormattedFlights2 & FormattedFlights3;\\n//Or this:\\nOrderedFlights := (+)(FormattedFlights1,FormattedFlights2,FormattedFlights3);\\n
\\nAnd if that doesn't get you there, you can just do the whole thing against the entire dataset, like this:\\n//add a flight type code\\nIsDirect := FormattedFlights.L2_departstationcode = ''; //Type 1\\nIsSingle := FormattedFlights.L2_departstationcode <> '' AND \\n FormattedFlights.L3_departstationcode = ''; //Type 2\\nIsDouble := FormattedFlights.L3_departstationcode <> ''; //Type 3\\nTypedFlights := PROJECT(FormattedFlights,\\n TRANSFORM({UNSIGNED1 TypeCode,FormattedFlights},\\n SELF.TypeCode := WHICH(IsDirect,IsSingle,IsDouble),\\n SELF := LEFT));\\n\\n//then SORT globally by TypeCode and the other sort fields\\nSortFlights := SORT(TypedFlights,TypeCode,L1_DepartStationCode,L1_DepartTimein12hrsfmt); \\n//then you can lose the TypeCode field, maintaining the sorted order\\nOrderedFlights := PROJECT(SortFlights,{FormattedFlights});
\\nThis uses WHICH to determine which flight type each record is, then you can just do a global SORT by that flight type and your two other sort fields. A simple PROJECT gets rid of the flight type field for the rest of your code.\\n\\nYou might want to detect if you end up with any TypeCode 0 records (which is possible if none of the WHICH expressions is TRUE) and handle that however you deem appropriate.\\n \\nHTH,\\n\\nRichard\", \"post_time\": \"2019-10-08 12:28:21\" },\n\t{ \"post_id\": 27663, \"topic_id\": 7353, \"forum_id\": 8, \"post_subject\": \"Merge datasets without sorting issue\", \"username\": \"jtaylor178\", \"post_text\": \"I am trying to produce a single file that has the direct flights first, then singles and then double flights. with in each section there is a sort\\nI dont want the sort order changed and the each section can be very large.\\n\\nThe output should be these 3 files should be appended together in the same order.\\n\\nHere is what I tried\\n FormattedFlights := PROJECT(T4Seats,Schema.Layout);\\n\\n\\nAll three files use the same record schema\\n\\n// contains direct flights sorted\\nFormattedFlights1:= SORT(FormattedFlights(L2_departstationcode = ''),L1_DepartStationCode,L1_DepartTimein12hrsfmt);\\n// contains single connection flights sorted\\nFormattedFlights2:= SORT(FormattedFlights(L2_departstationcode <> '' and L3_departstationcode = ''),L1_DepartStationCode,L1_DepartTimein12hrsfmt);\\n// containds double connection flights sorted\\nFormattedFlights3:= SORT(FormattedFlights(L3_departstationcode <> '' ),L1_DepartStationCode,L1_DepartTimein12hrsfmt);\\n\\n\\n\\n\\n//Tried to just output but it looked like it merged per node\\n OutputFlights(FormattedFlights1+FormattedFlights2+ FormattedFlights1);\\n\\n//Tried this but it resorted my output so I dont have distict directs,single and double flights \\nOutputFlights := MERGE(FormattedFlights1,FormattedFlights2,FormattedFlights3, SORTED(L1_DepartStationCode,L1_DepartTimein12hrsfmt));\\n\\t\\t\\t\\t\\t\\n//I tried MERGE without sort and I get a Warning Merge without an explicit SORTED() attribute is deprecated. But this did not give me the correct order either\\nOutputFlights := MERGE(FormattedFlights1,FormattedFlights2,FormattedFlights3);\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\nI would appreciate any suggestions.\", \"post_time\": \"2019-10-07 18:30:23\" },\n\t{ \"post_id\": 27703, \"topic_id\": 7363, \"forum_id\": 8, \"post_subject\": \"Re: toJson\", \"username\": \"DSC\", \"post_text\": \"Also: If you think of an ECL record as a JSON object, then you will need to wrap the result of TOJSON() in braces in order to get valid JSON:\\n\\n\\nstr1 := TOJSON(...);\\nstr2 := U8'{' + str1 + U8'}';\\nOUTPUT(str2);\\n
\\nAlso, note that TOJSON() returns UTF8, not STRING. That is why the "wrapping" is casting the brace characters to UTF8 in the above example.\\n\\nDan\", \"post_time\": \"2019-10-08 14:18:57\" },\n\t{ \"post_id\": 27693, \"topic_id\": 7363, \"forum_id\": 8, \"post_subject\": \"Re: toJson\", \"username\": \"rtaylor\", \"post_text\": \"wjblack,\\n\\nLike this:\\nServiceRec := RECORD\\n UNICODE name{xpath('name')};\\n UNICODE body{xpath('body')};\\nEND;\\nRequestRec := record\\n UNICODE text{xpath('text')};\\n DATASET(ServiceRec) services {xpath('services')}\\nend;\\n\\nstr1 := TOJSON(ROW({U'Hello World',\\n [{U'ServiceA',U'default'},\\n {U'ServiceB',U'custom'},\\n {U'ServiceC',U'default'}]},RequestRec)); \\nOUTPUT(str1);
\\nRepeating elements in both XML and JSON are expressed as nested child datasets in ECL.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-10-08 14:02:38\" },\n\t{ \"post_id\": 27683, \"topic_id\": 7363, \"forum_id\": 8, \"post_subject\": \"toJson\", \"username\": \"wjblack\", \"post_text\": \"I'm trying to use the tojson() command to form json in the following format. How can this be done?\\n\\n\\n{\\n "text": "Hello World",\\n\\t"services": [\\n\\t\\t{\\n\\t\\t\\t"name": "ServiceA",\\n\\t\\t\\t"body": "default",\\n\\t\\t},\\n\\t\\t{\\n\\t\\t\\t"name": "ServiceB",\\n\\t\\t\\t"body": "custom"\\n\\t\\t},\\n\\t\\t{\\n\\t\\t\\t"name": "ServiceC",\\n\\t\\t\\t"body": "default"\\n\\t\\t}\\n\\t]\\n}\\n
\\n\\n\\nRequestRec := record\\n\\tunicode text := (unicode)'';\\n\\tset of unicode services {xpath('services')}\\nend;\\n
\", \"post_time\": \"2019-10-08 13:34:05\" },\n\t{ \"post_id\": 28253, \"topic_id\": 7373, \"forum_id\": 8, \"post_subject\": \"Re: Check if two record structure match\", \"username\": \"newportm\", \"post_text\": \"get_ThorFile_Info(STRING filename,STRING pcluster = '',STRING pesp = _Control.ThisEnvironment.ESP_IPAddress) := FUNCTION\\n\\nDFUDefFileRequest := RECORD, MAXLENGTH(100)\\n STRING Name {XPATH('Name' )} := filename;\\n STRING Format {XPATH('Format' )} := 'xml';\\n END;\\n \\n DFUDefFileRecord := RECORD, MAXLENGTH(100000)\\n STRING defFile {XPATH('defFile' )};\\n END;\\n \\n results := SOAPCALL('thor_esp.net:'+ port + '/WsDfu'\\n ,'DFUDefFile'\\n ,DFUDefFileRequest\\n ,DATASET(DFUDefFileRecord)\\n ,XPATH('DFUDefFileResponse')\\n );\\nRETURN results;\\nEND;\\n\\nfile1 := '~thor::base::cd::superfile::somefile';\\n\\nstring get_data := get_ThorFile_Info(file1)[1].defFile;\\n\\nData out := STD.Str.DecodeBase64(get_data);\\n(string)out;
\\n\\nThat did the trick to get the data from the soap call. Unfortunately, there is still to much of a difference between child datasets. Recordof(dataset) counts every column as its own in seq. The logical file treats each child dataset as its own. So I still need the change requested.\", \"post_time\": \"2019-11-20 14:37:02\" },\n\t{ \"post_id\": 28093, \"topic_id\": 7373, \"forum_id\": 8, \"post_subject\": \"Re: Check if two record structure match\", \"username\": \"anthony.fishbeck\", \"post_text\": \"Tim,\\n\\nIf all you really need is access to the result of "DFUDefFile", the value is Base64 encoded so all you have to do is decode via a standard API. \\n\\nIt's easily enough done in most languages, \\n\\nIn ECL you can call "STD.Str.DecodeBase64(value)".\\n\\nRegards,\\nTony\", \"post_time\": \"2019-11-13 22:08:38\" },\n\t{ \"post_id\": 27903, \"topic_id\": 7373, \"forum_id\": 8, \"post_subject\": \"Re: Check if two record structure match\", \"username\": \"Allan\", \"post_text\": \"Hi Everyone,\\n\\nI noticed this post and realised that a series of YouTube Videos I've done that discuss manipulating / analyzing dataset structures at compile time, may be of help.\\n\\nhttps://www.youtube.com/playlist?list=PLONd-6DN_sz3QTzE5s_qbOSDJ8V-IEXUM \\n\\nYours\\nAllan\", \"post_time\": \"2019-10-21 16:27:20\" },\n\t{ \"post_id\": 27863, \"topic_id\": 7373, \"forum_id\": 8, \"post_subject\": \"Re: Check if two record structure match\", \"username\": \"rtaylor\", \"post_text\": \"Tim,\\n\\nOK, then I suggest it's time for you to submit a feature request in JIRA. You can ask for an option to enable you to easily get the same return result from either your SOAPCALL or the GetLogicalFileAttribute function call (or both).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-10-15 13:34:14\" },\n\t{ \"post_id\": 27843, \"topic_id\": 7373, \"forum_id\": 8, \"post_subject\": \"Re: Check if two record structure match\", \"username\": \"newportm\", \"post_text\": \"EXPORT get_ThorFile_Info(STRING filename,STRING pcluster = '',STRING pesp = _Control.ThisEnvironment.ESP_IPAddress) := FUNCTION\\n\\nDFUInfoRequest := RECORD, MAXLENGTH(100)\\n\\t\\tSTRING Name {XPATH('Name' )} := filename;\\n\\t\\tSTRING Cluster {XPATH('Cluster' )} := pcluster;\\n\\t\\tSTRING UpdateDescription {XPATH('UpdateDescription' )} := '0';\\n\\t\\tSTRING FileName {XPATH('FileName' )} := '';\\n\\t\\tSTRING FileDesc {XPATH('FileDesc' )} := '';\\nEND;\\n\\t\\nDFUInfoOutRecord := RECORD, MAXLENGTH(100000)\\n\\t\\tSTRING exception_code {XPATH('Exceptions/Exception/Code' )};\\n\\t\\tSTRING exception_source {XPATH('Exceptions/Exception/Source' )};\\n\\t\\tSTRING exception_msg {XPATH('Exceptions/Exception/Message')};\\n\\t\\tSTRING Name {XPATH('FileDetail/Name' )};\\n\\t\\tSTRING Filename {XPATH('FileDetail/Filename' )};\\n\\t\\tSTRING Description {XPATH('FileDetail/Description' )};\\n\\t\\tSTRING Dir {XPATH('FileDetail/Dir' )};\\n\\t\\tSTRING PathMask {XPATH('FileDetail/PathMask' )};\\n\\t\\tSTRING Filesize {XPATH('FileDetail/Filesize' )};\\n\\t\\tSTRING ActualSize {XPATH('FileDetail/ActualSize' )};\\n\\t\\tSTRING RecordSize {XPATH('FileDetail/RecordSize' )};\\n\\t\\tSTRING RecordCount {XPATH('FileDetail/RecordCount' )};\\n\\t\\tSTRING Wuid {XPATH('FileDetail/Wuid' )};\\n\\t\\tSTRING Owner {XPATH('FileDetail/Owner' )};\\n\\t\\tSTRING Cluster {XPATH('FileDetail/Cluster' )};\\n\\t\\tSTRING JobName {XPATH('FileDetail/JobName' )};\\n\\t\\tSTRING Persistent {XPATH('FileDetail/Persistent' )};\\n\\t\\tSTRING Format {XPATH('FileDetail/Format' )};\\n\\t\\tSTRING MaxRecordSize {XPATH('FileDetail/MaxRecordSize' )};\\n\\t\\tSTRING CsvSeparate {XPATH('FileDetail/CsvSeparate' )};\\n\\t\\tSTRING CsvQuote {XPATH('FileDetail/CsvQuote' )};\\n\\t\\tSTRING CsvTerminate {XPATH('FileDetail/CsvTerminate' )};\\n\\t\\tSTRING CsvEscape {XPATH('FileDetail/CsvEscape' )};\\n\\t\\tSTRING Modified {XPATH('FileDetail/Modified' )};\\n\\t\\tSTRING Ecl {XPATH('FileDetail/Ecl' )};\\n\\t\\tSTRING Eclxml {XPATH('FileDetail/Ecl/Format/xml' )};\\n STRING isSuper {XPATH('FileDetail/isSuperfile' )};\\n STRING subfiles {XPATH('FileDetail/subfiles/Item' )};\\nEND;\\n\\nesp\\t\\t\\t\\t:= pesp + ':8010';\\n\\nresults := SOAPCALL('http://' + esp + '/WsDfu'\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t,'DFUInfo'\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t,DFUInfoRequest\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t,DATASET(DFUInfoOutRecord)\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t,XPATH('DFUInfoResponse')\\n\\nRETURN results;\\nEND;
\\n\\n rec := IF((boolean)(get_ThorFile_Info(file1)[1].isSuper) ,\\n get_ThorFile_Info('~' + \\n get_ThorFile_Info(file1)[1].subfiles)[1].ecl,\\n get_ThorFile_Info(file1)[1].ecl);\\n \\n rec;
\\n\\nYes, I already went down that road. However, the result of the soap call is identical to NOTHOR(STD.File.GetLogicalFileAttribute(file1,'ECL'));\\n\\nThat said, if we go out to the http://esp.net:8010/WsDfu/ we can access the DFUDefFile\\nThe only problem is the soap call returns a hash of the blob unlike the button on the GUI. \\n\\nDFUDefFileRequest := RECORD, MAXLENGTH(100)\\n \\t\\tSTRING Name {XPATH('Name' )} := filename;\\n \\t\\tSTRING Format {XPATH('Format' )} := 'xml';\\n END;\\n \\n DFUDefFileRecord := RECORD, MAXLENGTH(100000)\\n STRING defFile {XPATH('defFile' )};\\n END;\\n \\n results := SOAPCALL('thor_esp.net/WsDfu'\\t\\t\\t\\t\\t\\t\\t\\t\\t,'DFUDefFile'\\t\\t\\t\\t\\t\\t\\t\\t\\t,DFUDefFileRequest \\t\\t\\t\\t\\t\\t\\t\\t\\t,DATASET(DFUDefFileRecord)\\t\\t\\t\\t\\t\\t\\t\\t\\t,XPATH('DFUDefFileResponse')\\t\\t\\t\\t\\t\\t\\t\\t);\\n results;
\\n\\nthe xml embedded in the returned code is identical to that of Recordof(dataset) so it would enable me to do a like for like comparison with no other checks. But ... I can't access the actual result of the DFUDefFile \", \"post_time\": \"2019-10-14 20:33:27\" },\n\t{ \"post_id\": 27793, \"topic_id\": 7373, \"forum_id\": 8, \"post_subject\": \"Re: Check if two record structure match\", \"username\": \"rtaylor\", \"post_text\": \"Tim,\\n\\nOK, upon further exploration, it appears that GetLogicalFileAttribute returns whatever text appears in the ECL tab in ECL Watch for that logical file.
Here is an option. I can pull the data from the DFU with a soapcall at runtime. and parse out the record struct. One caveat is this does not return any information if it is a superfile.
And that appears to be the same thing that GetLogicalFileAttribute is doing.\\n\\nFor small/simple files, that record structure is expressed as { unsigned4 recid, string10 homephone };
\\nFor larger record structures (including nested Child Datasets) that takes the form: \\nRECORD\\n unsigned4 recid;\\n string10 homephone;\\n string10 cellphone;\\n string20 fname;\\n string20 mname;\\n string20 lname;\\n string10 new_homephone;\\n string10 new_cellphone;\\n string20 new_fname;\\n string20 new_mname;\\n string20 new_lname;\\n END;
So I'll have to reconsider how to duplicate that structure.\\n\\nOtherwise, could you simply default to using GetLogicalFileAttribute on both sides of your comparison? That would make it much simpler. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-10-10 14:06:53\" },\n\t{ \"post_id\": 27783, \"topic_id\": 7373, \"forum_id\": 8, \"post_subject\": \"Re: Check if two record structure match\", \"username\": \"rtaylor\", \"post_text\": \"Tim,
A note that the macro GetStructTxt only works for inline dataset definitions or data that has been transformed/referenced in some way other than an output.
This simple solution makes it work for me:IMPORT TrainingYourName;\\n\\nds1 := TrainingYourName.File_Persons_Slim.file[1..2];\\nds2 := TrainingYourName.Accounts[1..2];\\n\\nrecstruct1a:= (STRING)GetStructTxt(ds1); \\nrecstruct2a:= (STRING)GetStructTxt(ds2); \\nOUTPUT(recstruct1a,NAMED('recstruct1a_rawEXPORT'));//OUTPUT file\\nOUTPUT(recstruct2a,NAMED('recstruct2a_rawEXPORT'));//sprayed file
You just need to make the dataset you pass a subset (like the first 2 recs, as I did here) and then it works correctly.\\n\\nThe problem is, sometime in the last 20 years the #EXPORT and #EXPORTXML format was expanded to include file information. Unfortunately, that info was added as a set of enclosing tags (whose info is only in XML attributes) instead of a simple self-contained tag. The problem is, the tag name is different for each filetype, so I would need to write several separate versions to handle this. Here's what it looks like:<Data>\\n <CsvTable exported="false" name="csv^class::rt::intro::accounts">\\n <Field ecltype="unsigned8"\\n label="personid"\\n name="personid"\\n position="0"\\n rawtype="524545"\\n size="8"\\n type="unsigned"/>\\n </CsvTable>\\n</Data>\\n\\nand ...\\n\\n<Data>\\n <FlatTable exported="false" name="flat^class::rt::intro::persons" recordLength="155">\\n <Field ecltype="unsigned8"\\n label="id"\\n name="id"\\n position="0"\\n rawtype="524545"\\n size="8"\\n type="unsigned"/>\\n </FlatTable>\\n</Data>
The fact that GetLogicalFileAttrbute returns totally different text for nested child datasets means separate code to match that. \\n\\nI'll see what I can do with that, but I'm traveling now so ...
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-10-10 13:19:03\" },\n\t{ \"post_id\": 27773, \"topic_id\": 7373, \"forum_id\": 8, \"post_subject\": \"Re: Check if two record structure match\", \"username\": \"newportm\", \"post_text\": \"Richard,\\n\\nI really appreciate you taking the time to put this together. I agree it has been a fun thing to work on. I don;t get to do much code problem solving these days. Your solution is pretty slick and gets around the one issue I was coming up with. A note that the macro GetStructTxt only works for inline dataset definitions or data that has been transformed/referenced in some way other than an output. If I instead use a dataset defined like this\\n\\n
DS := Dataset('~thor::base::test', TestFolder.layouts.sampLayout,thor);
\\n\\nassuming I am going to pass the dataset around and do stuff with it later, it returns { } as the layout.\\n\\nIn other news,\\nif I do the same thing but the file name is built as in my example above. \\n\\n#CONSTANT('myfileprefix','~thor::tmsn');\\nprefix := '~thor' : stored('myfileprefix');\\nfilename := prefix + '::testfile';\\n\\nTest1 := dataset(filename,TestFolder.layouts.sampLayout,thor);
\\n\\nGetStructTxt(Test1 ); the compiler creates a local workunit and says it completed but never actually submits the job. L20191009-123456\\n\\nDoing an output to read in a sequential does not change the behavior. Now if I take an altering action on the dataset say a project or sort the layout format actually changes for a file with a child dataset or 50...\\n\\nsimplified items in layout for here. \\n\\n/////RESULT OF NOTHOR(STD.File.GetLogicalFileAttribute(file2,'ECL'));\\ncoverage_info := RECORD\\n string4 child1;\\n END;\\n\\nfinance_company_info := RECORD\\n string15 child2;\\n END;\\n\\nRECORD\\n string6 rec1\\n string20 rec2\\n DATASET(coverage_info) coverages{maxcount(18)};\\n DATASET(finance_company_info) finance_info{maxcount(4)};\\n END;
\\n\\n////////REsult of GetStructTxt //////\\n{ string6 rec1, string20 rec2, table of <unnamed> coverages, string4 child1, coverages, table of <unnamed> finance_info, string15 child2, finance_info };\\n
\\nI guess I can write another wrapper to convert all layout with child datasets into the {} format.\", \"post_time\": \"2019-10-09 23:41:38\" },\n\t{ \"post_id\": 27763, \"topic_id\": 7373, \"forum_id\": 8, \"post_subject\": \"Re: Check if two record structure match\", \"username\": \"rtaylor\", \"post_text\": \"Tim,\\n\\nNot a simple problem, but I managed to find a fairly simple way to do it! \\n\\nFirst I wrote a FUNCTIONMACRO that uses #EXPORTXML to get the structure information from any declared DATASET (inline or on disk) and used Template Language to format the result exactly the same as the GetLogicalFileAttribute function's return result:
GetStructTxt(ds) := FUNCTIONMACRO\\n #DECLARE(Ctr);\\n #SET(Ctr,0);\\n #DECLARE(OutString);\\n #SET(OutString,'{ ');\\n #EXPORTXML(Fred,ds);\\n #FOR (Fred)\\n #FOR (Field) \\n #IF(%Ctr%=0)\\n #APPEND(OutString,%'{@ecltype}'% + ' ' + %'{@name}'% ) \\n #SET(Ctr,1);\\n #ELSE\\t\\n #APPEND(OutString,', ' + %'{@ecltype}'% + ' ' + %'{@name}'% ) \\n #END\\n #END\\n #END\\n #APPEND(OutString,' };\\\\n'); //add \\\\n to duplicate GetLogicalFileAttribute() return\\n RETURN %'OutString'%;\\nENDMACRO;
\\nNow you can use the GetLogicalFileAttribute function to get the structure when you only have the filename. The "trick" to this function that I learned through hard effort is that it appends a newline character to the end of its return result, so I had to make sure the FUNCTIONMACRO duplicated that format exactly to allow a simple string compare between the two results.\\n\\nThen you can compare any two dataset structures, like this:\\n#CONSTANT('myfileprefix','~thor::test::RT');\\nprefix := '~thor' : stored('myfileprefix');\\nfilename := prefix + '::testfile';\\n\\nTest1 := dataset([{1,'one'},{2,'two'}],{integer id , string desc});\\nTest2 := dataset([{2,'two'},{3,'three'}],{integer id , string desc}); //disk file\\nTest3 := dataset([{1,'one'},{2,'two'}],{UNSIGNED id , string10 desc});\\n\\nIMPORT Std;\\n\\nrecstruct1 := GetStructTxt(Test1); \\nrecstruct2 := STD.File.GetLogicalFileAttribute(filename,'ECL');\\nrecstruct3 := GetStructTxt(Test3); \\n\\nOUTPUT(recstruct1,NAMED('recstruct1_raw'));\\nOUTPUT(recstruct2,NAMED('recstruct2_raw'));\\nOUTPUT(recstruct3,NAMED('recstruct3_raw'));\\nOUTPUT(recstruct1 = recstruct2,NAMED('Compare_1_2')); \\nOUTPUT(recstruct1 = recstruct3,NAMED('Compare_1_3')); \\nOUTPUT(recstruct2 = recstruct3,NAMED('Compare_2_3'));
Thanks for the interesting problem.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-10-09 19:07:55\" },\n\t{ \"post_id\": 27753, \"topic_id\": 7373, \"forum_id\": 8, \"post_subject\": \"Re: Check if two record structure match\", \"username\": \"newportm\", \"post_text\": \"Here is an option. I can pull the data from the DFU with a soapcall at runtime. and parse out the record struct. One caveat is this does not return any information if it is a superfile. \\n\\nDFUInfoRequest := RECORD, MAXLENGTH(100)\\n\\t\\tSTRING Name {XPATH('Name' )} := filename;\\n\\t\\tSTRING Cluster {XPATH('Cluster' )} := cluster;\\n\\t\\tSTRING UpdateDescription {XPATH('UpdateDescription' )} := '0';\\n\\t\\tSTRING FileName {XPATH('FileName' )} := '';\\n\\t\\tSTRING FileDesc {XPATH('FileDesc' )} := '';\\nEND;\\n\\t\\nDFUInfoOutRecord := RECORD, MAXLENGTH(100000)\\n\\t\\tSTRING Ecl {XPATH('FileDetail/Ecl' )};\\t\\nEND;\\n\\nesp\\t\\t\\t\\t:= pesp + ':8010';\\nresults := SOAPCALL('http://' + esp + '/WsDfu'\\n ,'DFUInfo'\\n ,DFUInfoRequest\\n ,DATASET(DFUInfoOutRecord)\\n ,XPATH('DFUInfoResponse')\\n );\\n \\nresults;
\", \"post_time\": \"2019-10-09 14:04:15\" },\n\t{ \"post_id\": 27723, \"topic_id\": 7373, \"forum_id\": 8, \"post_subject\": \"Check if two record structure match\", \"username\": \"newportm\", \"post_text\": \"I'm putting together a macro to check if two record structures match between two given datasets. If I have the dataset I pass that in so I can compare the xml value of the rec structure for the datasets. If I don't have a dataset defined and was just given a filename then I attempt to lookup the record structure. If the file name does not exist it throws a warning and shows as a mismatch and I'm good with that. My question is how do I get around file names that are not constant at runtime and I don't have a dataset definition. I need to get this working without having the dataset handy and without being able to create it because I don't know the the layout and with the file name built in this exact manner. Trying to integrate into existing common code that everyone uses without any code changes to the builds. \\n\\nThanks for your ideas. I'm so close I can taste it I just need a way to interrogate the file structure by name at runtime \\n\\nPS. I tried to use get column mapping and GetLogicalFileAttribute first. \\n\\nExample: macro defined below\\n\\n
#CONSTANT('myfileprefix','~thor::tmsn');\\n\\nprefix := '~thor' : stored('myfileprefix');\\n\\n\\nTest1 := dataset([{1,'one'},{2,'two'}],{integer id , string desc});\\nTest2 := dataset([{2,'two'},{3,'three'}],{integer id , string desc});\\n\\nfilename := prefix + '::testfile';\\n//run first wuid then comment out\\n// output(Test2,,filename,thor);\\n\\n//uncomment below and run second time after test file created. you can just syntex check it it will say\\n//Error: LOOKUP attribute requires a constant filename MAC_Check_Rec_Struct_Match.ecl\\n#IF(MAC_Check_Rec_Struct_Match(Test1,filename))\\noutput('They Match',named('match'));\\n#ELSE\\noutput('NO LUCK',named('NOPE'));\\n#END\\n\\n\\n//pass in two file names and this code will tell you if the record structure is //identical two datasets will also work as RECORDOF will use the dataset and extract //the known structure newly added functionality in 6.4 //https://hpccsystems.com/blog/file-layout-resolution-compile-time \\nEXPORT MAC_Check_Rec_Struct_Match(file1,file2) := functionmacro\\nimport std;\\n\\n #uniquename(typ1);\\n #uniquename(typ2);\\n #uniquename(r);\\n #uniquename(r2);\\n #uniquename(out);\\n #uniquename(out2);\\n \\n //check if the parms are datasets or strings\\n %typ1% := STD.Str.ToLowerCase(#GETDATATYPE(file1)[..6]) = 'string';\\n %typ2% := STD.Str.ToLowerCase(#GETDATATYPE(file2)[..6]) = 'string';\\n #IF(%typ1%)\\n %r% := RECORDOF(file1,LOOKUP); //if string look up record def assuming file exists\\n #ELSE \\n %r% := RECORDOF(file1); //assume it is a dataset and has been loaded into memory\\n #END\\n \\n #IF(%typ2%) %r2% := RECORDOF(file2,LOOKUP);\\n #ELSE %r2% := RECORDOF(file2); #END \\n #EXPORT(out, %r%);\\n #EXPORT(out2, %r2%);\\n\\n return %'out'% = %'out2'%;\\n\\nendmacro;
\\n\\nTim N\", \"post_time\": \"2019-10-08 23:33:36\" },\n\t{ \"post_id\": 27833, \"topic_id\": 7393, \"forum_id\": 8, \"post_subject\": \"Re: FROMJSON to DataSet produces empty dataset\", \"username\": \"DSC\", \"post_text\": \"ECL's FROMJSON() and FROMXML() functions don't create a record definition from the data, but rather populate a record definition by examining the data. It's a distinction that implies a couple of things:\\n\\n\\n
\\n\\nIn your example, the only difficulty I saw was with the 'flags' field. In the data, it is defined as a JSON object, but in the record definition it is defined as a dataset (an array of JSON objects). I think that actually parses correctly, but the display in ECL Watch may be a little confusing. Anyway, I would define rServices like this:\\n\\n\\nrServices := record\\n string name{xpath('name')};\\n string body{xpath('body')};\\n rFlags flags{xpath('flags')};\\n string count{xpath('count')};\\nend;
\\nOn the subject of displaying things in ECL Watch: By default, the output will label fields with the XPATH value rather than the attribute name. That can lead to all kinds of issues, especially with more complex XPATH values (e.g. "foo[1]/@bar"). To always show the ECL attribute names, add a NOXPATH to OUTPUT:\\n\\n\\nOUTPUT(ds, NOXPATH);
\\nThat wasn't really an issue with your example, but if you continue to play with JSON parsing then you will probably run into it at some point.\\n\\nLastly, one thing you might want to check into is the IFBLOCK() option within a RECORD. That option can be a useful way to maintain slightly different views of records within a dataset. It does need to be keyed off something though, so in your example you might need a field like "hasFlags" to indicate whether to show any of the 'flags' data. Also, be aware that IFBLOCK() is not like a C-style union, where the field contents are overlayed; all of the fields will physically exist in the record, end-on-end, but IFBLOCK() will govern which ones you have access to and which ones are shown.\\n\\nHope this helps.\\n\\nDan\", \"post_time\": \"2019-10-14 17:34:45\" },\n\t{ \"post_id\": 27813, \"topic_id\": 7393, \"forum_id\": 8, \"post_subject\": \"FROMJSON to DataSet produces empty dataset\", \"username\": \"wjblack\", \"post_text\": \"I'm seeing some strange behavior when there's an optional member that would become a dataset when reading it in using fromjson(). For the below json lets say we have a member for one service that's optional? For example, what if ServiceA has a member named 'flags' that has members (flaga, flagb, flagc) that's not always present for Service A. 'flags' is optional. When reading this in using the record structure it would produce a dataset when present and an empty dataset when not. How would the record structure look then? If we specify 'flags' in the record structure and it's not in the json it seems to produce an empty dataset named something other than 'flags'. In some instances it gives the empty dataset the name 'name' or whatever the first member of the structure is Is there a way in HPCC/ECL to read this in correctly using fromjson() for both scenarios mentioned?\\n\\n\\njsonstring2 :=\\n '{'\\n+ ' "text": "Hello World",'\\n+ ' "services": ['\\n+ ' {'\\n+ ' "name": "ServiceA",'\\n+ ' "body": "default",'\\n// + ' \\t "flags": {'\\n// + '\\t "flaga": "a",'\\n// + '\\t "flagb": "b",'\\n// + '\\t "flagc": "c"'\\n// + '\\t },'\\n+ ' "count": "1"'\\n+ ' },'\\n+ ' {'\\n+ ' "name": "ServiceB",'\\n+ ' "body": "custom",'\\n+ ' "count": "2"'\\n+ ' },'\\n+ ' {'\\n+ ' "name": "ServiceC",'\\n+ ' "body": "default",'\\n+ ' "count": "3"'\\n+ ' }'\\n+ ' ]'\\n+ '}';\\n\\nrFlags := record\\n string flaga{xpath('flaga')} := '';\\n string flagb{xpath('flagb')} := '';\\n string flagc{xpath('flagc')} := '';\\nend;\\n\\nrServices := record\\n string name{};\\n string body{};\\n dataset(rFlags) flags{xpath('flags')} := dataset([],rFlags);\\n string count{xpath('count')};\\nend;\\n\\nrMain := record\\n string text{} := '';\\n dataset(rServices) services{xpath('services')};\\nend;\\n\\nds := fromjson(rMain, jsonstring2);\\nds\\n
\", \"post_time\": \"2019-10-14 02:15:44\" },\n\t{ \"post_id\": 27953, \"topic_id\": 7423, \"forum_id\": 8, \"post_subject\": \"Re: Library Modules Examples\", \"username\": \"rtaylor\", \"post_text\": \"The Programmer's Guide also has an article about creating LIBRARY MODULEs.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-10-22 12:31:17\" },\n\t{ \"post_id\": 27943, \"topic_id\": 7423, \"forum_id\": 8, \"post_subject\": \"Re: Library Modules Examples\", \"username\": \"JimD\", \"post_text\": \"Does the example in the LIBRARY topic of the docs help? \\n\\nhttps://hpccsystems.com/training/docume ... BRARY.html\\n\\nJim\", \"post_time\": \"2019-10-22 11:54:17\" },\n\t{ \"post_id\": 27923, \"topic_id\": 7423, \"forum_id\": 8, \"post_subject\": \"Library Modules Examples\", \"username\": \"abaruchi\", \"post_text\": \"Hi Guys,\\n\\nI need to create a Library Module in order to improve the compilation time of my Roxie Query. However, the Documentation do not provides useful examples (actually, there is no example of module Library usage). So, I would like to know if someone has two or three examples of code using this Library definition when creating a module.\\n\\nDoc Link: https://hpccsystems.com/training/docume ... dules.html\\n\\nThanks!!\\n\\nAtt.\\nArtur Baruchi\", \"post_time\": \"2019-10-21 22:14:30\" },\n\t{ \"post_id\": 28023, \"topic_id\": 7443, \"forum_id\": 8, \"post_subject\": \"Re: Writing a Null character to a flat file\", \"username\": \"jtaylor178\", \"post_text\": \"darn, simple. I should have thought of that. thanks\", \"post_time\": \"2019-10-31 18:42:33\" },\n\t{ \"post_id\": 28013, \"topic_id\": 7443, \"forum_id\": 8, \"post_subject\": \"Re: Writing a Null character to a flat file\", \"username\": \"rtaylor\", \"post_text\": \"jtaylor178,\\n\\nYou can do it just like this:Nul := '\\\\000';\\nds := DATASET([{'ABC',Nul,'DEF'}],{STRING3 F1,STRING3 F2,STRING3 F3});\\nOUTPUT(ds,,'~RTTEST::CSV::NullTest',CSV(SEPARATOR('\\\\t')));
The Nul definition uses the octal constant form for string constants to specify the character is an ASCII zero.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-10-31 17:53:20\" },\n\t{ \"post_id\": 28003, \"topic_id\": 7443, \"forum_id\": 8, \"post_subject\": \"Writing a Null character to a flat file\", \"username\": \"jtaylor178\", \"post_text\": \"[attachment=0:248kajj4]NULL.PNGI am producing a tab delimited file but in some fields instead of an empty string I need to write out a null character. I am writing a process to replace an existing file from another system. Attached is a picture of the existing file shown in Notepad++ with all symbols shown\\n\\nHow do I do this?\", \"post_time\": \"2019-10-31 16:19:20\" },\n\t{ \"post_id\": 29383, \"topic_id\": 7533, \"forum_id\": 8, \"post_subject\": \"Re: Finding text that does not parse PARSE\", \"username\": \"Allan\", \"post_text\": \"Richard,\\n\\nFinally got round to trying\\n\\nPARSE(... NOT MATCHED);\\n\\nWorks a Treat!\\nAll the 'Matches' return false, therefor as long as you have something in your TRANSFORM that acts on the 'else' part of the MAP or CASE (whatever), you get the bad input.\\n\\nSimple\\n\\nThanks \\n\\nAllan\", \"post_time\": \"2020-02-07 16:39:57\" },\n\t{ \"post_id\": 28653, \"topic_id\": 7533, \"forum_id\": 8, \"post_subject\": \"Re: Finding text that does not parse PARSE\", \"username\": \"Allan\", \"post_text\": \"Thanks Richard,\\n\\nI'll give your suggestion a whirl.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2020-01-03 12:07:01\" },\n\t{ \"post_id\": 28553, \"topic_id\": 7533, \"forum_id\": 8, \"post_subject\": \"Re: Finding text that does not parse PARSE\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nYou should be able to get all the non-matches by using NOT MATCHED(pattern) as your search criteria, and returning MATCHROW() for those will allow you to see the actual non-matching records.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-12-23 08:17:05\" },\n\t{ \"post_id\": 28413, \"topic_id\": 7533, \"forum_id\": 8, \"post_subject\": \"Re: Finding text that does not parse PARSE\", \"username\": \"Allan\", \"post_text\": \"Adding to this,\\nPreferably I would not just have an indication there was incorrect text, but the incorrect text was also returned with perhaps a BOOLEAN Bad set.\\nso for:\\nITEM1=A,Item=,ITEM2=B,ITEM3=123456,,,,,,,,
\\nI would get back:\\n\\nBad Text Fragment\\nFALSE ITEM1=A\\nTRUE ITEM=\\nFALSE ITEM2=B\\nFALSE ITEM3=123456\\nTRUE ,,,,,,,,,,\\n
\\nYours\\nAllan\", \"post_time\": \"2019-12-11 12:18:31\" },\n\t{ \"post_id\": 28403, \"topic_id\": 7533, \"forum_id\": 8, \"post_subject\": \"Finding text that does not parse PARSE\", \"username\": \"Allan\", \"post_text\": \"Hi Richard,\\n\\nCribbing from your excellent Teck Talk Episode 10 on PARSE, I've found it surprisingly easy to parse some simple text E.G.\\nITEM1=A,ITEM2=B,ITEM3=123456
\\nI retrieve all the distinct matches for '<item>=<value>', so from the example above I get 3 records back, all well and good.\\n\\nMy problem comes in detecting invalid input, I want to report if someone has input\\nITEM=
\\nwith no 'value', but PARSE, as it stands, correctly does not return the invalid value but I've no indication that there was text that did not match.\\n\\nYours\\nAllan\", \"post_time\": \"2019-12-11 12:08:05\" },\n\t{ \"post_id\": 28433, \"topic_id\": 7543, \"forum_id\": 8, \"post_subject\": \"Re: How to retrieve the 2nd matched text using PARSE\", \"username\": \"Allan\", \"post_text\": \"Ah answered my own question:\\n\\nMATCHTEXT(MinMax/IntBounds/Int[2])\\n
\", \"post_time\": \"2019-12-11 14:43:15\" },\n\t{ \"post_id\": 28423, \"topic_id\": 7543, \"forum_id\": 8, \"post_subject\": \"How to retrieve the 2nd matched text using PARSE\", \"username\": \"Allan\", \"post_text\": \"Hi\\n\\nI have a Patten:\\n\\n PATTERN IntBounds := Int OPT(Ws) Sep OPT(Ws) Int;\\n PATTERN ReelBounds := Reel OPT(Ws) Sep OPT(Ws) Reel;\\n PATTERN MinMax := OpenP OPT(Ws) (IntBounds | ReelBounds) OPT(Ws) CloseP;\\n
\\nI can retrieve the 1st of the numbers of the pairs of numbers using:\\n\\nMATCHTEXT(MinMax/IntBounds/Int)\\n
\\nor\\nMATCHTEXT(MinMax/ReelBounds/Reel)\\n
\\n\\nbut how does MATCHTEXT retrieve the 2nd of the 'Int' or 'Reel' components of MinMax?\\nI expect I can re-structure the patters to individually isolate the separate numbers which can then be referenced directly, but this seems a bit of a hack.\\n\\nYours\\nAllan\", \"post_time\": \"2019-12-11 14:39:08\" },\n\t{ \"post_id\": 29093, \"topic_id\": 7553, \"forum_id\": 8, \"post_subject\": \"Re: Finding the number of elements matched by PARSE\", \"username\": \"Allan\", \"post_text\": \"Thanks Richard,\\n\\nThis implementation is a definite improvement on mine.\\nJust correcting two typos in your ECL:\\n\\nds := DATASET([\\n {'setelement1,setelement2,setelement3'},\\n {'setelement1,setelement2,setelement3'},\\n {'setelement1,setelement2,setelement3'},\\n {'setelement1,setelement2,setelement3'}],{STRING s});\\n\\n//add a UID to each rec\\nUIDrec := {UNSIGNED UID,STRING s};\\nds_UID := PROJECT(ds,TRANSFORM(UIDrec,\\n SELF.UID := COUNTER,\\n SELF.s := LEFT.s));\\n//parsing patterns: \\nPATTERN nbr := PATTERN('[0-9]'); \\nPATTERN sep := ',';\\nPATTERN element := 'setelement' nbr; \\nPATTERN elements := element OPT(sep);\\n\\nUIDrec XF(ds_UID L) := TRANSFORM\\n SELF.UID := L.UID;\\n SELF.s := MATCHTEXT(element);\\nEND; \\nPARSE(ds_UID,s,elements,XF(LEFT));\\n
\\nYours\\nAllan\", \"post_time\": \"2020-01-17 10:27:09\" },\n\t{ \"post_id\": 28683, \"topic_id\": 7553, \"forum_id\": 8, \"post_subject\": \"Re: Finding the number of elements matched by PARSE\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nI meant something like this:ds := DATASET()[\\n {'setelement1,setelement2,setelement3'},\\n {'setelement1,setelement2,setelement3'},\\n {'setelement1,setelement2,setelement3'},\\n {'setelement1,setelement2,setelement3'}],{STRING s});\\n\\n//add a UID to each rec\\nUIDrec := {UNSIGNED UID,STRING s};\\nds_UID := PROJECT(ds,TRANSFORM(UIDrec,\\n SELF.UID := COUNTER,\\n SELF.s := LEFT.s));\\n//parsing patterns: \\nPATTERN nbr := PATTERN('[0-9]'); \\nPATTERN sep := ',';\\nPATTERN element := 'setelement' nbr; \\nPATTERN elements := element OPT(sep);\\n\\nUIDrec XF(ds L) := TRANSFORM\\n SELF.UID := L.UID;\\n SELF.s := MATCHTEXT(element);\\nEND; \\nPARSE(ds_UID,s,elements,XF(LEFT));
\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-01-04 08:46:38\" },\n\t{ \"post_id\": 28633, \"topic_id\": 7553, \"forum_id\": 8, \"post_subject\": \"Re: Finding the number of elements matched by PARSE\", \"username\": \"Allan\", \"post_text\": \"Hi Richard,\\n\\nI don't understand your post.\\nCould you give an example when you have time.\\n\\nThanks\\n\\nAllan\", \"post_time\": \"2020-01-03 11:50:01\" },\n\t{ \"post_id\": 28563, \"topic_id\": 7553, \"forum_id\": 8, \"post_subject\": \"Re: Finding the number of elements matched by PARSE\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nI would probably approach this by adding a UID to each record before doing the PARSE. \\n\\nThen I would let PARSE extract each set element (and keep the record number it came from in each result rec). \\n\\nThen I would do a simple crosstab on that result to determine how many set elements were in each unique input record.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-12-23 08:24:50\" },\n\t{ \"post_id\": 28453, \"topic_id\": 7553, \"forum_id\": 8, \"post_subject\": \"Re: Finding the number of elements matched by PARSE\", \"username\": \"Allan\", \"post_text\": \"I have actually 'fixed' it myself, by MACROising 500 constant references to the instance of the pattern match:\\n\\nDATASET(Layouts.Boundary) GatherSetElements := FUNCTION\\nMAC_accessOperandAlpha (num) := MACRO\\n #DECLARE(eclfragment)\\n #SET(eclfragment,'')\\n #DECLARE(cnt)\\n #SET(cnt,0)\\n #DECLARE(sep)\\n #SET(sep,'')\\n\\n #LOOP \\n #IF (%cnt% = num)\\n\\t #BREAK\\n #ELSE\\n\\t#SET(cnt,%cnt% +1)\\n\\t#APPEND(eclfragment,%'sep'%+'{0,Constants.BoundaryTypes.SETELEMENT,MATCHTEXT(OneSet/OperandAlpha['+%'cnt'%+'])}')\\n\\t#SET(sep,',')\\n #END\\n #END\\n %'eclfragment'%\\nENDMACRO;\\n\\nd := DATASET([ #EXPAND(MAC_accessOperandAlpha(500)) ],Layouts.Boundary);\\nRETURN d(Value != '');\\nEND;\\n
\\n\\nNot pretty at all, so if anyone has a better idea please share it.\\n\\nThanks\\n\\nAllan\", \"post_time\": \"2019-12-12 14:45:40\" },\n\t{ \"post_id\": 28443, \"topic_id\": 7553, \"forum_id\": 8, \"post_subject\": \"Finding the number of elements matched by PARSE\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI have textual input of the form:\\n\\n[setelement1,setelement2,setelement3]\\n
\\nI can supply patterns to PARSE to extract individual elements from the set, no problem.\\nAnd obviously there can be a variable number of set elements.\\nBut I need to load these elements into separate rows in a DATASET, but I can't see a way of processing a variable number of elements. There does not seem to be a equivalent of MATCHED or MATCHETEXT that gives you the number of elements (at that level) matched.\\nI know that if you use an 'out of range' index to a MATCHTEXT(pattern path) you get an empty string back so I've tried:\\n\\n\\nRETURN LOOP(DATASET([],Layouts.Boundary)\\n\\t ,MATCHTEXT(OneSet/OperandAlpha[COUNTER]) <> ''\\n\\t ,PROJECT(ROWS(LEFT),TRANSFORM(Layouts.Boundary;\\n\\t\\t\\t\\t\\tSELF.BoundaryID := 0;\\n\\t\\t\\t\\t\\tSELF.TypeBoundary := Constants.BoundaryTypes.SETELEMENT;\\n\\t\\t\\t\\t\\tSELF.Value := MATCHTEXT(OneSet/OperandAlpha[COUNTER])\\n\\t\\t\\t\\t\\t)\\n\\t\\t )\\n\\t );\\n
\\n\\nThis passes the syntax check but at runtime I get error:\\nExpression is not constant: COUNTER
\\nI've had this problem before and raised a ticket with the core team.\\nhttps://track.hpccsystems.com/browse/HPCC-22160\\nThis has been 'accepted' as an issue, but I need a workaround now.\\n\\nI'll attach my PATTERNS I'm using if it helps. (the set PATTERN is called 'oneset')\\nAny ideas?\\n\\nYours\\nAllan\", \"post_time\": \"2019-12-12 11:01:09\" },\n\t{ \"post_id\": 29083, \"topic_id\": 7593, \"forum_id\": 8, \"post_subject\": \"Re: Matching the 'longest' string preferentially using PARSE\", \"username\": \"Allan\", \"post_text\": \"Great Richard,\\n\\nSo I have the key now to controlling preference.\\nPerhaps this could be made clear in the REF manual?\\n\\nThanks very much\\n\\nAllan\", \"post_time\": \"2020-01-17 10:25:07\" },\n\t{ \"post_id\": 28843, \"topic_id\": 7593, \"forum_id\": 8, \"post_subject\": \"Re: Matching the 'longest' string preferentially using PARSE\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nOK, here are THREE ways to do it (the first two you've already seen). You'll note that all three examples create exactly the same result:rec := {STRING n};\\nds := DATASET([{'12,13,-5.8,+22.732,6,1234567,-128.0'},\\n {'11,10,-9.6,+34.999,9,7654321,-459.0'}],rec);\\n\\n//SplitWords solution:\\t\\t\\t\\t\\t\\t\\t \\nIMPORT Std;\\nResRec := {DATASET(rec) Nbrs};\\nP := PROJECT(ds,TRANSFORM(ResRec,\\n SELF.Nbrs := DATASET(Std.Str.SplitWords(LEFT.n,','),rec)));\\nP.Nbrs;\\n\\n//PARSE solution:\\t\\t\\t\\t\\t\\t\\t \\nPATTERN nbr := PATTERN('[-+.0-9]')+;\\nPATTERN sep := ',';\\nRULE num := nbr OPT(sep);\\n\\nPrec := {STRING n := MATCHTEXT(nbr)};\\nPARSE(ds,n,num,Prec,FIRST);\\n\\n//Second PARSE solution:\\t\\t\\t\\t\\t\\t\\t \\nPATTERN int := PATTERN('[0-9]')+;\\nPATTERN dot := '.';\\nPATTERN sign := ['+','-'];\\nPATTERN real_nbr := int dot int;\\nPATTERN int_nbr := int;\\nPATTERN val := real_nbr | int_nbr;\\n\\nRULE the_val := OPT(sign) val OPT(sep); \\n\\nVrec := {STRING n := MATCHTEXT(sign) + MATCHTEXT(val)};\\nPARSE(ds,n,the_val,Vrec,FIRST);
The key to your pattern precedence issue is handled by the order of the alternative patterns in the val PATTERN definition.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-01-08 21:00:24\" },\n\t{ \"post_id\": 28733, \"topic_id\": 7593, \"forum_id\": 8, \"post_subject\": \"Re: Matching the 'longest' string preferentially using PARSE\", \"username\": \"Allan\", \"post_text\": \"Hum,\\n\\nThinking on about this, perhaps the only way is to do the work in the TRANSFORM.\\nBy that I mean use:\\n\\nWHICH(MATCHED(<long pattern reference>),MATCHED(<shorter pattern reference>),MATCHED(<shortest pattern reference>));\\n
\\n\\n?\\n\\nAllan\", \"post_time\": \"2020-01-07 09:41:00\" },\n\t{ \"post_id\": 28723, \"topic_id\": 7593, \"forum_id\": 8, \"post_subject\": \"Re: Matching the 'longest' string preferentially using PARSE\", \"username\": \"Allan\", \"post_text\": \"Thanks Richard,\\n\\nYes I am concentrating on PARSE. Still getting my head round it.\\nok, you're solution is just a clever pretty version of my ,(.*), solution. As you say is 'side steps' the issue. \\n\\nI aiming to get my head round the general way to preferentially match longer patterns using PARSE, but confining myself to this specific example, say I wanted additional information returned by PARSE, say it had to recognise that the element was an integer or real so returned a dataset like:\\nReal itm\\nFALSE 12\\nFALSE 13\\nTRUE -5.8\\nTRUE +22.732\\netc\\n
\\nThen the issue could not be sidestepped. (well could be by analysing the element within the transform, but I'm looking to the pattern matcher to do the work.)\\n\\nYours\\nAllan\", \"post_time\": \"2020-01-07 09:12:54\" },\n\t{ \"post_id\": 28713, \"topic_id\": 7593, \"forum_id\": 8, \"post_subject\": \"Re: Matching the 'longest' string preferentially using PARSE\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nSorry, I obviously read too quickly and replied too glibly. \\n\\nHere's my real solution (two ways):
rec := {STRING n};\\nds := DATASET([{'12,13,-5.8,+22.732,6,1234567,-128.0'},\\n {'11,10,-9.6,+34.999,9,7654321,-459.0'}],rec);\\n\\n//SplitWords solution:\\t\\t\\t\\t\\t\\t\\t \\nIMPORT Std;\\nResRec := {DATASET(rec) Nbrs};\\nP := PROJECT(ds,TRANSFORM(ResRec,\\n SELF.Nbrs := DATASET(Std.Str.SplitWords(LEFT.n,','),rec)));\\nP.Nbrs;\\n\\n//PARSE solution:\\t\\t\\t\\t\\t\\t\\t \\nPATTERN nbr := PATTERN('[-+.0-9]')+;\\nPATTERN sep := ',';\\nRULE num := nbr OPT(sep);\\n\\nPrec := {STRING n := MATCHTEXT(nbr)};\\nPARSE(ds,n,num,Prec,FIRST);\\n
The first solution just uses the SplitWords function from the Standard Library and a nested child dataset. \\n\\nThe second is the PARSE answer I think you're looking for. Notice that I'm using a single pattern for the numbers and not building from smaller patterns -- for this problem, that makes more sense to me. \\n\\nI think the general rule would be to try to create parsing patterns that encompass all the possible variants of a single entity type (in this case, matching both positive and negative ints and reals as just generic numeric entities). Doing that should sidestep the "shorter match vs longer match" issue you're asking about.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-01-06 15:26:55\" },\n\t{ \"post_id\": 28703, \"topic_id\": 7593, \"forum_id\": 8, \"post_subject\": \"Re: Matching the 'longest' string preferentially using PARSE\", \"username\": \"Allan\", \"post_text\": \"Thanks Richard,\\n\\nYes obviously I could just match ,(.*), which extracts the items that can then have post processing done on them, but it does not answer my question.\\n\\nThere may be many levels of PATTERNS that match on particular stretch of input but the question is how to inhibit matching on shorter patterns if a longer pattern matches, but DO match if any longer patterns do NOT match.\\n\\nCheers\\nAllan\", \"post_time\": \"2020-01-06 11:24:16\" },\n\t{ \"post_id\": 28693, \"topic_id\": 7593, \"forum_id\": 8, \"post_subject\": \"Re: Matching the 'longest' string preferentially using PARSE\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nOnce you've extracted the numeric values, just cast them all to STRING then use the LENGTH function to get the longest one. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-01-04 08:49:23\" },\n\t{ \"post_id\": 28643, \"topic_id\": 7593, \"forum_id\": 8, \"post_subject\": \"Matching the 'longest' string preferentially using PARSE.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI expect this is a very simple question to answer, but the following is bugging me.\\nI have a comma separated list of integers or reals, interspersed in any order, so for example:\\n\\n
12,13,-5.8,+22.732,6,1234567,-128.0
\\n\\nIf a number is a real PATTERN(int dot int) I want that selected over a possible match of just an 'Int'.\\nSo I would like to extract from the above:\\n\\n12\\n13\\n-5.8\\n+22.732\\n6\\n1234567\\n-128.0\\n
\\nmy problem is that my match for integers matches in preference to my pattern for real and returns say:\\n12\\n13\\n-5\\n
\\n\\nThis is a 2 min question for the likes of Richard.\\n\\nCheers\\nAllan\", \"post_time\": \"2020-01-03 12:00:30\" },\n\t{ \"post_id\": 29143, \"topic_id\": 7663, \"forum_id\": 8, \"post_subject\": \"Re: Return the difference between two strings\", \"username\": \"Allan\", \"post_text\": \"Not exactly what your're looking for but there are, in the standard library:\\n\\nSTD.Str.EditDistance\\nSTD.Str.EditDistanceWithinRadius\\n
\\nWhich gives you a metric on how different two strings are (there are also uni code versions of these functions)\\n\\nAllan\", \"post_time\": \"2020-01-21 15:33:28\" },\n\t{ \"post_id\": 29073, \"topic_id\": 7663, \"forum_id\": 8, \"post_subject\": \"Re: Return the difference between two strings\", \"username\": \"rtaylor\", \"post_text\": \"Tim,\\n\\nOK, here's a simple example of the way I would start approaching the problem:\\nStringDiff(STRING S1, STRING S2) := FUNCTION\\n L1 := LENGTH(S1);\\n L2 := LENGTH(S2);\\n ds := DATASET(MAX(L1,L2),\\n TRANSFORM({STRING char},\\n SELF.char := IF(S1[COUNTER]=S2[COUNTER],' ',S2[COUNTER])));\\n Rs := ROLLUP(ds,TRUE,TRANSFORM({STRING char},\\n SELF.char := LEFT.char + RIGHT.char))[1].char;\\t\\n // RETURN Rs;\\n RETURN DATASET([{S1},{S2},{Rs}],{STRING char});\\nEND;\\n\\nStringDiff('ABC','ABS');\\n\\nC1 := 'ABC DEF';\\nC2 := 'Abc Def Ghi';\\nStringDiff(C1,C2);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-01-16 20:06:05\" },\n\t{ \"post_id\": 29063, \"topic_id\": 7663, \"forum_id\": 8, \"post_subject\": \"Re: Return the difference between two strings\", \"username\": \"newportm\", \"post_text\": \"Hey Richard,\\n\\nWhat I am looking to do is compare XML strings to each other and pull out anything that is different between the two. I was not being picky with word vs Character because I could implement either. I also do not see any tools available at this time in the STD library. I opened HPCC-23304 just to be sure / get that conversation started. \\n\\nThere are certainly ways to do it in ECL using Normalize and writing some helper functions to detect character/word shifts. SALT has a few tools available but they don't tell you what is different just the % difference. So I was more looking to see if someone went down this path while I wait for the platform team's consideration.\\n\\nTim\", \"post_time\": \"2020-01-16 15:55:46\" },\n\t{ \"post_id\": 29053, \"topic_id\": 7663, \"forum_id\": 8, \"post_subject\": \"Re: Return the difference between two strings\", \"username\": \"rtaylor\", \"post_text\": \"newportm,\\n\\nDo you have some specifics of what you would expect and the type of string data you're looking at comparing? \\n\\nIOW, what's the real scope of your problem?\\n
Or possibly something else?\\n\\nFWIW, I don't know of any ECL/HPCC functions that do any of these (although I use Beyond Compare all the time, so I know it has all been done before on other platforms). \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-01-16 15:28:39\" },\n\t{ \"post_id\": 29043, \"topic_id\": 7663, \"forum_id\": 8, \"post_subject\": \"Return the difference between two strings\", \"username\": \"newportm\", \"post_text\": \"Has anyone come up with a good way to return the difference between two strings in ECL? I'm trying not to recreate the wheel here.\\n\\nThere are a lot of good tools to tell us the degree at which two strings do not match but nothing I can find to return the actual differences.\", \"post_time\": \"2020-01-16 00:45:46\" },\n\t{ \"post_id\": 29453, \"topic_id\": 7673, \"forum_id\": 8, \"post_subject\": \"Re: Need to use active(dataset) to refer to the current row\", \"username\": \"kumarip\", \"post_text\": \"Good Information.\", \"post_time\": \"2020-02-12 11:49:47\" },\n\t{ \"post_id\": 29273, \"topic_id\": 7673, \"forum_id\": 8, \"post_subject\": \"Re: Need to use active(dataset) to refer to the current row\", \"username\": \"ghalliday\", \"post_text\": \"(Copied from email response:)\\n\\nI think I have got to the bottom of the problem. The error may be correct, but the error location in the error message is very unhelpful.\\n\\nThe problem is in the following line in GetStatesFrom:\\n\\n currentStatus := TABLE(transitions, statesFromStatusRec);\\n\\nI think what you are trying to do is to create a data set with a single row that contains all the transitions as a child dataset. What that line is doing is creating a dataset with a row for each row in the transitions dataset. The complaint is coming because the record:\\n\\n DATASET(Model.DFATransition) t := transitions;\\n\\nwhich is called for each row in transitions, thinks it should be treated as \\n\\n DATASET(Model.DFATransition) t := ROW(transitions);\\n\\nI think what you want to achieve is probably:\\n\\n currentStatus := DATASET([{transitions}], statesFromStatusRec);\\n\\nIf you actually want a dataset with one row for each transition, with the transitions dataset duplicated into each row then I would suggest using PROJECT instead of TABLE.\\n\\nI have opened a jira to improve the error message. (HPCC-23437)\", \"post_time\": \"2020-02-03 10:26:06\" },\n\t{ \"post_id\": 29193, \"topic_id\": 7673, \"forum_id\": 8, \"post_subject\": \"Re: Need to use active(dataset) to refer to the current row\", \"username\": \"vzeufack\", \"post_text\": \"Hi @Ghalliday,\\n\\nI just sent again.\\n\\nI think It may be because I did not use the email with which I registered in this forum.\\n\\nBest regards,\\nVannel,\", \"post_time\": \"2020-01-27 15:07:25\" },\n\t{ \"post_id\": 29163, \"topic_id\": 7673, \"forum_id\": 8, \"post_subject\": \"Re: Need to use active(dataset) to refer to the current row\", \"username\": \"ghalliday\", \"post_text\": \"I haven't seen an email yet. It is possible that it was blocked, but I don't think it would be.\", \"post_time\": \"2020-01-25 08:06:53\" },\n\t{ \"post_id\": 29153, \"topic_id\": 7673, \"forum_id\": 8, \"post_subject\": \"Re: Need to use active(dataset) to refer to the current row\", \"username\": \"vzeufack\", \"post_text\": \"OK Sent!\", \"post_time\": \"2020-01-21 19:02:56\" },\n\t{ \"post_id\": 29133, \"topic_id\": 7673, \"forum_id\": 8, \"post_subject\": \"Re: Need to use active(dataset) to refer to the current row\", \"username\": \"ghalliday\", \"post_text\": \"When you submit the query it creates an xml archive - which contains all the code used by the query. If you look at the helpers tab in eclwatch you will see one of them is an archive. If you are happy for me to see the code you could email that to gavin <dot> halliday <at> lexisnexisrisk <dot> com and I can debug the issue.\", \"post_time\": \"2020-01-21 15:01:58\" },\n\t{ \"post_id\": 29123, \"topic_id\": 7673, \"forum_id\": 8, \"post_subject\": \"Re: Need to use active(dataset) to refer to the current row\", \"username\": \"vzeufack\", \"post_text\": \"Hi @ghalliday,\\n\\nI am using HPCCSystemsVM-amd64-7.2.20-1.\\nWhat do you mean by "archive of the query"?\\n\\nBest regards,\\nVannel,\", \"post_time\": \"2020-01-21 14:40:55\" },\n\t{ \"post_id\": 29113, \"topic_id\": 7673, \"forum_id\": 8, \"post_subject\": \"Re: Need to use active(dataset) to refer to the current row\", \"username\": \"ghalliday\", \"post_text\": \"Not immediately, from a quick look the code looks correct.\\n\\nWhat version of the platform are you using? (It might possibly be related to https://track.hpccsystems.com/browse/HPCC-20933).\\n\\nI would probably need a copy of the archive of the query to be able to debug much further.\", \"post_time\": \"2020-01-20 12:11:24\" },\n\t{ \"post_id\": 29103, \"topic_id\": 7673, \"forum_id\": 8, \"post_subject\": \"Need to use active(dataset) to refer to the current row\", \"username\": \"vzeufack\", \"post_text\": \"Hi,\\n\\nI wrote the following ECL function:\\n\\n//transform a NFA into a DFA\\nEXPORT NFAtoDFA (Model.NFA nfa) := FUNCTION\\n DFAStatus := RECORD\\n DATASET (Model.DFAState) tobeDone;\\n DATASET (Model.DFATransition) transitions := DATASET([], Model.DFATransition);\\n END;\\n\\n dfaStartState := DATASET([{Controller.Closure(nfa, nfa.states[1].value)}], Model.DFAState);\\n\\n currentDFAStatus := DATASET([{dfaStartState}], DFAStatus);\\n\\n finalDFAStatus := LOOP(currentDFAStatus,\\n COUNT(LEFT.tobeDone) > 0,\\n PROJECT(ROWS(LEFT),\\n TRANSFORM(DFAStatus,\\n newTransitions := Controller.ComputeDFATransitions(nfa, LEFT.tobeDone[1]); \\n newStates := GetStatesFrom(newTransitions); \\n newTobeDone := LEFT.tobeDone[2..] + newStates;\\n SELF.transitions := LEFT.transitions + newTransitions; \\n SELF.tobeDone := DEDUP(newTobeDone, ALL)))); \\n \\n RETURN finalDFAStatus[1].transitions;\\nEND;\\n
\\n\\nHowever when running this, I get the following error:\\nComputeDFATransitions(nfa, LEFT.tobeDone[1]) - Need to use active(dataset) to refer to the current row of an active dataset
\\n\\nAny idea on how to solve the issue?\\n\\nBest regards,\\nVannel\", \"post_time\": \"2020-01-17 14:41:33\" },\n\t{ \"post_id\": 29253, \"topic_id\": 7693, \"forum_id\": 8, \"post_subject\": \"Re: leading zero's being stripped\", \"username\": \"jtaylor178\", \"post_text\": \"your are correct sorry. Found a macro some of my lower level code was calling and it did a trim. My BAD.\\n\\nthanks\", \"post_time\": \"2020-01-30 20:18:47\" },\n\t{ \"post_id\": 29243, \"topic_id\": 7693, \"forum_id\": 8, \"post_subject\": \"Re: leading zero's being stripped\", \"username\": \"rtaylor\", \"post_text\": \"jtaylor178,\\n\\nThis appears to be a cosmetic issue only in the ECL Watch results page.\\n\\nIn my testing, the results of these two queries are correctly displayed in the ECL IDE, but leading zeroes are missing in the ECL Watch display. IOW, the actual result value is correct but the display seems to automatically strip the leading spaces. This is probably an artifact of the difference between the way web pages and Windows programs display data.\\n\\nPlease submit a JIRA ticket for this issue.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-01-30 19:10:48\" },\n\t{ \"post_id\": 29233, \"topic_id\": 7693, \"forum_id\": 8, \"post_subject\": \"leading zero's being stripped\", \"username\": \"jtaylor178\", \"post_text\": \"My string is being trimmed.\\n\\nI have tried STRING and STRING7 and in both the leading and end space are being removed. I need to keep them, any suggestions\\n\\nSTRING7 xx:=' '+'2'+' '+'3'+'4'+' ';\\n\\nOutput(xx);\\n\\nOutput(' 2 34 ');\\n\\nOUTPUT is :\\n2 34\\n2 34\\n\\n\\n\\nIn another process, I am creating a file and despraying it. Here I tried to add the NOTRIM but it is still trimmed.\\n\\nOutputJorudanLogical \\t:= IF(DoSort,OUTPUT(GSECSorted,,Outpath,CSV(NOTRIM,HEADING(Joru ...\", \"post_time\": \"2020-01-30 18:44:23\" },\n\t{ \"post_id\": 29723, \"topic_id\": 7793, \"forum_id\": 8, \"post_subject\": \"Re: ECL Watch Spray delimited -- how does it work?\", \"username\": \"hwatanuki\", \"post_text\": \"Hello Vin,\\n\\nThe behavior you describe below seems to be correct. Here are my two cents...\\n \\nOverall, the goal of the spray operation is to partition the original data file you´d uploaded to the landing zone into as many "pieces" as there are Thor nodes in your target cluster and put each "piece" of the partitioned file into the disk of a respective Thor node. \\n\\nIn order to perform the delimited spray operation, the DFU needs to know, ideally, the size of your original data file and the character(s) for the line terminator. Based on these information, the original data file can then be partitioned more uniformly across the cluster nodes without "breaking" any records. You can also provide information about the separators at this point in time and this information can be used when you define your DATASET later in a ECL code.\\n\\nAfter the delimited spray operation is performed (with or without the information about separator), if you look at the logical file content in ECL Watch, you will still see the record contents into a single line, as you describe. However, once you define the DATASET and its respective RECORD structure in a ECL code and OUTPUT its content, you will be able to see the fields properly separated. At this point, if you don´t provide specification about the separator during the definition of your DATASET in the ECL code, the separator information you had eventually provided during the spray operation will be leveraged automatically for you.\\n\\nHTH\\nHugo W.\", \"post_time\": \"2020-03-07 20:19:12\" },\n\t{ \"post_id\": 29713, \"topic_id\": 7793, \"forum_id\": 8, \"post_subject\": \"ECL Watch Spray delimited -- how does it work?\", \"username\": \"vin\", \"post_text\": \"I am trying to upload and spray a CSV file.\\n\\nIn ECL Watch, in the "files>>landing zone" section, there is an option to spray delimited. In the drop-down associated with spray-delimited, there are several fields. One field is "Separators". Regards of what separator I have specified in that field, my file always loads with one record per line. That is it ignores the separator field.\\n\\nI have tried this with commas and tabs (and even letters) delimiters without any effect. I understand there is a CSV option available in ECL https://hpccsystems.com/training/documentation/ecl-language-reference/html/CSV_Files.html. I will be trying this next.\\n\\nHowever, my questions about spray-delimited still remain.\\n\\nWhat does it do?\\nHow do I use it?\\n
\", \"post_time\": \"2020-03-07 15:37:36\" },\n\t{ \"post_id\": 29843, \"topic_id\": 7833, \"forum_id\": 8, \"post_subject\": \"Re: Normal distribution\", \"username\": \"rtaylor\", \"post_text\": \"mo0926,\\n\\nTake a look at the Machine Learning Library (https://hpccsystems.com/es/download/free-modules/machine-learning-library) which does contain some normal distributions functions.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-03-20 20:28:46\" },\n\t{ \"post_id\": 29833, \"topic_id\": 7833, \"forum_id\": 8, \"post_subject\": \"Normal distribution\", \"username\": \"mo0926\", \"post_text\": \"Hello, \\n\\nHow would you run normal distribution algorithm across every dob and lexid in a file?\", \"post_time\": \"2020-03-20 12:15:57\" },\n\t{ \"post_id\": 29913, \"topic_id\": 7853, \"forum_id\": 8, \"post_subject\": \"Re: Protecting files from deletion\", \"username\": \"janet.anderson\", \"post_text\": \"Thank you!\", \"post_time\": \"2020-03-24 19:20:01\" },\n\t{ \"post_id\": 29873, \"topic_id\": 7853, \"forum_id\": 8, \"post_subject\": \"Re: Protecting files from deletion\", \"username\": \"Tony Kirk\", \"post_text\": \"Perhaps https://hpccsystems.com/training/docume ... lFile.html?\\n\\nWhat is not mentioned there is that only the user who protects a file can then unprotect it.\", \"post_time\": \"2020-03-24 10:36:01\" },\n\t{ \"post_id\": 29863, \"topic_id\": 7853, \"forum_id\": 8, \"post_subject\": \"Protecting files from deletion\", \"username\": \"janet.anderson\", \"post_text\": \"I am aware of the protect option using #WORKUNIT, but is there anything to protect logical files on the Thor from being deleted? I realize that in a Dev environment, this may not be generally advisable, but some co-workers are having issues with files being deleted without notification. \\n\\nI have very VAGUE memories of someone mentioning code signing in relation to protecting files from malicious actors, but nothing in the Programmers' Guide is hinting at that use.\", \"post_time\": \"2020-03-23 20:47:55\" },\n\t{ \"post_id\": 29903, \"topic_id\": 7863, \"forum_id\": 8, \"post_subject\": \"Re: Create a PERSIST file COMPRESSED\", \"username\": \"rtaylor\", \"post_text\": \"mateus.andrade,\\n\\nPERSIST files are always automatically LZW compressed.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-03-24 15:25:58\" },\n\t{ \"post_id\": 29883, \"topic_id\": 7863, \"forum_id\": 8, \"post_subject\": \"Create a PERSIST file COMPRESSED\", \"username\": \"mateus.andrade\", \"post_text\": \"Dear all, \\n\\nI'm facing the need to use either PERSIST or INDEPENDENT to ensure a graph will be only executed once, but the DATASET is too large and will create a huge file during the Workunit execution. \\n\\nI've notice also I can save some space in disk is this output is COMPRESSED, is there a way to combine those options PERSIST and COMPRESSED?\\n\\nHPCC Version: 6.44\", \"post_time\": \"2020-03-24 11:10:40\" },\n\t{ \"post_id\": 29961, \"topic_id\": 7903, \"forum_id\": 8, \"post_subject\": \"Re: Visualizer properties in code?\", \"username\": \"gsmith\", \"post_text\": \"Some samples in case you didn't see them already:\\nhttps://github.com/hpcc-systems/Visuali ... erties.ecl\\n\\nhttps://github.com/hpcc-systems/Visuali ... r/tutorial\", \"post_time\": \"2020-03-26 19:09:29\" },\n\t{ \"post_id\": 29953, \"topic_id\": 7903, \"forum_id\": 8, \"post_subject\": \"Visualizer properties in code?\", \"username\": \"jcl\", \"post_text\": \"Apologies if this is asked and answered; I tried searching and didn't see any relevant topics for "visualizer". But could anyone point me in the direction of some examples of how to define the "properties" for methods of the Visualizer module in ECL?\\n\\nThe documentation only ever references the "Dermatology Properties" section, which says "You can set properties in your ECL code or afterwards in ECL Watch:"... and then shows a few screenshots demonstrating how that can be done in ECL Watch. But there doesn't seem to be any info about what properties can be set in the "properties" argument of the various methods.\\n\\nIs it possible to modify any properties above the "widget.content.widget" level, eg could I set the "widget.content.title"?\\n\\nThanks!\", \"post_time\": \"2020-03-26 18:11:49\" },\n\t{ \"post_id\": 30013, \"topic_id\": 7923, \"forum_id\": 8, \"post_subject\": \"Re: DICTIONARY and lookup of DATASETS\", \"username\": \"Allan\", \"post_text\": \"Thanks for the confirmation.\\n\\nPerhaps a 'Documentation' ticket would be better. I don't 'need' it.\\nCreated: https://track.hpccsystems.com/browse/HPCC-23834\\nCheers\\n\\nAllan\", \"post_time\": \"2020-04-07 06:44:19\" },\n\t{ \"post_id\": 30003, \"topic_id\": 7923, \"forum_id\": 8, \"post_subject\": \"Re: DICTIONARY and lookup of DATASETS\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nI duplicated all this. It appears as though, while a DICTIONARY may be a nested child dataset, it may not contain a nested child dataset. IOW, this works:RR := RECORD\\n UNSIGNED2 AttributeID;\\n UNSIGNED2 BoundaryType;\\n DATASET({STRING itm}) BoundaryTexts;\\nEND;\\n\\nd := DATASET([{1,55,DATASET([{'aaa'},{'bbb'}],{STRING itm})}],RR);\\n\\ndtype := DICTIONARY(d,{AttributeID => BoundaryType});\\n\\ndtype[1].BoundaryType;
\\nIf this is something you need, then a JIRA feature request would be in order.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-04-06 20:26:38\" },\n\t{ \"post_id\": 29993, \"topic_id\": 7923, \"forum_id\": 8, \"post_subject\": \"DICTIONARY and lookup of DATASETS\", \"username\": \"Allan\", \"post_text\": \"Hi,\\nThis may be documented but I see strange behaviour in DICTIONARY when the item looked up is a DATASET.\\nFirstly when syntax checking:\\n\\nRR := RECORD\\n UNSIGNED2 AttributeID;\\n UNSIGNED2 BoundaryType;\\n DATASET({STRING itm}) BoundaryTexts;\\nEND;\\n\\nd := DATASET([{1,55,DATASET([{'aaa'},{'bbb'}],{STRING itm})}],RR);\\n\\ndtype := DICTIONARY(d,{AttributeID => BoundaryType});\\ndText := DICTIONARY(d,{AttributeID => BoundaryTexts});\\n\\ndtype[1].BoundaryType;\\ndText[1].BoundaryTexts;\\n
\\nReturns error:\\nError: Unknown identifier "BoundaryTexts" (13, 10), 2167,
\\nNow I know the field qualifier is not needed in this instance but its still been allowed for 'BoundaryType'.\\n\\nNow if I remove the unnecessary field qualifiers thus:\\ndtype[1];\\ndText[1];\\n
\\naccess of 'dtype' works, but the access of 'dtext' crashes the workunit with:\\nError: DATASET([{1,55,DATASET([{'aaa'},{'bbb'}], { string itm })}], rr).boundarytexts.itm - Table DATASET([{1,55,...}], rr).boundarytexts is not related to d (10, 10), 2131, \\n
\\n\\nSo is there a restriction on the types of items that can be looked up with DICTIONARY?\\n\\nI notice that SET OF STRING works ok:\\nRR := RECORD\\n UNSIGNED2 AttributeID;\\n UNSIGNED2 BoundaryType;\\n SET OF STRING BoundaryTexts;\\nEND;\\n\\nd := DATASET([{1,55,['aaa','bbb']}],RR);\\n\\ndtype := DICTIONARY(d,{AttributeID => BoundaryType});\\ndText := DICTIONARY(d,{AttributeID => BoundaryTexts});\\n\\n//dtype[1].BoundaryType;\\ndText[1];\\n
\", \"post_time\": \"2020-04-06 17:11:38\" },\n\t{ \"post_id\": 30073, \"topic_id\": 7953, \"forum_id\": 8, \"post_subject\": \"Re: SendEmail, Outlook and newline characters\", \"username\": \"SChatman85\", \"post_text\": \"Hi Richard,\\n\\nThanks for the response.\\n\\nInterestingly \\\\r\\\\n behaves the same way as \\\\n\\\\n and i get:\\n\\nLine 1\\n\\nLine 2
\\n\\nas opposed to the desired\\n\\nLine1\\nLine2
\", \"post_time\": \"2020-04-09 13:29:22\" },\n\t{ \"post_id\": 30063, \"topic_id\": 7953, \"forum_id\": 8, \"post_subject\": \"Re: SendEmail, Outlook and newline characters\", \"username\": \"rtaylor\", \"post_text\": \"SChatman85,\\n\\nHave you tried using "\\\\r\\\\n" (DOS-style instead of Unix-style) instead?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-04-09 13:23:02\" },\n\t{ \"post_id\": 30053, \"topic_id\": 7953, \"forum_id\": 8, \"post_subject\": \"SendEmail, Outlook and newline characters\", \"username\": \"SChatman85\", \"post_text\": \"Hi all,\\n\\nAs part of one of our builds we have a reporting email being sent.\\n\\nWe are using STD.System.Email.SendEmail to fire this off.\\n\\nWithin the string building we are using '\\\\n' to format the output to make it easier to digest. The email starts off ok but then after about 20 lines the new line characters are ignored for a few lines. Things then go back to normal, before again newlines are removed.\\n\\nI have used OUTPUT on the body variable and it looks ok within ECL IDE if I paste the string into a tool such as Notepad++\\n\\nIt seems that it is Outlook causing issues.\\n\\nI don't want to be going down the route of pushing this data as an attachment so was wondering if there are any other methods/means to try and work around this?\\n\\n\\nI have so far tried using alternative such as html tags but those are just printed as plain text.\", \"post_time\": \"2020-04-09 09:31:05\" },\n\t{ \"post_id\": 30903, \"topic_id\": 7983, \"forum_id\": 8, \"post_subject\": \"Re: EXPIRE not working\", \"username\": \"jsmith\", \"post_text\": \"Hi,\\n\\nthe Sasha component is responsible for the deletion of expired files.\\n\\nCould you check it is running properly, it's process is running (saserver)?\\nIf it is running correctly, there should be evidence of the files it tried to delete. The logging should contain "FILEEXPIRY: Deleting" and "FILEEXPIRY: Deleted" together with the names of the logical filenames that were being handled.\\n\\nThanks.\", \"post_time\": \"2020-05-23 16:43:05\" },\n\t{ \"post_id\": 30893, \"topic_id\": 7983, \"forum_id\": 8, \"post_subject\": \"Re: EXPIRE not working\", \"username\": \"micevepay\", \"post_text\": \"Can you share the JIRA ticket for this?\\n\\n[quote="SChatman85":n16p56ci]Hi Richard,\\n\\nThanks for the response.\\n\\nThey do indeed come back as 90:\\n\\naccessed\\t expire\\n2020-01-10T17:05:08\\t90
\\n\\nI'll look to log a Bug on JIRA\", \"post_time\": \"2020-05-22 20:21:01\" },\n\t{ \"post_id\": 30183, \"topic_id\": 7983, \"forum_id\": 8, \"post_subject\": \"Re: EXPIRE not working\", \"username\": \"SChatman85\", \"post_text\": \"Hi Richard,\\n\\nThanks for the response.\\n\\nThey do indeed come back as 90:\\n\\naccessed\\t expire\\n2020-01-10T17:05:08\\t90
\\n\\nI'll look to log a Bug on JIRA\", \"post_time\": \"2020-04-15 08:13:09\" },\n\t{ \"post_id\": 30173, \"topic_id\": 7983, \"forum_id\": 8, \"post_subject\": \"Re: EXPIRE not working\", \"username\": \"rtaylor\", \"post_text\": \"SChatman85,\\n\\nWhat do you get when you run this for those files?STD.File.GetLogicalFileAttribute('~' + L.name, 'expireDays');
If this comes back blank, then EXPIRE was not set on the OUTPUT that created the file. \\n\\nBut, if they come back as "90" then you've identified a problem that needs to be reported in JIRA.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-04-14 18:14:08\" },\n\t{ \"post_id\": 30163, \"topic_id\": 7983, \"forum_id\": 8, \"post_subject\": \"EXPIRE not working\", \"username\": \"SChatman85\", \"post_text\": \"Hi,\\n\\nWe have EXPIRE(90) set on the creation of output logs from a job each day.\\n\\nI understand that this is from the last time the file was read.\\n\\nI have used:\\n\\nSTD.File.GetLogicalFileAttribute('~' + L.name, 'accessed');
\\n\\n84 of these files show as:\\n\\n2020-01-10T17:05:08\\n\\nThis is more than 90 days ago, so I don't understand why they have not been removed.\\n\\nCan anyone point me in the right direction to resolve this?\\n\\nThanks\", \"post_time\": \"2020-04-14 15:39:38\" },\n\t{ \"post_id\": 30343, \"topic_id\": 8033, \"forum_id\": 8, \"post_subject\": \"Re: Error: Object does not have a member named 'fSprayJson'\", \"username\": \"JimD\", \"post_text\": \"In general, you should use a version of Client Tools that matches the server to which you are submitting ECL. \\n\\nYou can have more than one version of Client Tools installed. The IDE will auto-detect and use the version that best matches the target (unless you specify Override Compiler options in your configuration preferences). \\n\\nThe ECL extension for VSCode also auto-detects, if you let it.\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2020-04-24 16:00:12\" },\n\t{ \"post_id\": 30333, \"topic_id\": 8033, \"forum_id\": 8, \"post_subject\": \"Re: Error: Object does not have a member named 'fSprayJson'\", \"username\": \"janet.anderson\", \"post_text\": \"It looks to be running now that I am choosing a different target (eclcc) on same server.\", \"post_time\": \"2020-04-24 14:47:53\" },\n\t{ \"post_id\": 30323, \"topic_id\": 8033, \"forum_id\": 8, \"post_subject\": \"Re: Error: Object does not have a member named 'fSprayJson'\", \"username\": \"janet.anderson\", \"post_text\": \"I'm using 7.8.0 ECL IDE and Client Tools. The server is 7.6.40-rc1.\", \"post_time\": \"2020-04-24 14:28:13\" },\n\t{ \"post_id\": 30313, \"topic_id\": 8033, \"forum_id\": 8, \"post_subject\": \"Re: Error: Object does not have a member named 'fSprayJson'\", \"username\": \"rtaylor\", \"post_text\": \"janet,\\n\\nWe've tried it with a stock 7.8.0 install and it works fine. So, is there any possibility that you've edited that file? \\n\\nAlso, what version of Client Tools are you using and what platform version (are they both the same, or is there a mismatch)?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-04-24 12:57:46\" },\n\t{ \"post_id\": 30303, \"topic_id\": 8033, \"forum_id\": 8, \"post_subject\": \"Re: Error: Object does not have a member named 'fSprayJson'\", \"username\": \"JimD\", \"post_text\": \"What is the Platform version on the server you are submitting to?\\n\\nJim\", \"post_time\": \"2020-04-24 12:30:40\" },\n\t{ \"post_id\": 30293, \"topic_id\": 8033, \"forum_id\": 8, \"post_subject\": \"Error: Object does not have a member named 'fSprayJson'\", \"username\": \"janet.anderson\", \"post_text\": \"I get the following errors:\\n\\n[color=#000080:3miqvlnd]Error: Object does not have a member named 'fSprayJson' (558, 35 - C:\\\\Program Files (x86)\\\\HPCCSystems\\\\7.8.0\\\\clienttools\\\\share\\\\ecllibrary\\\\std\\\\File.ecl)\\nError: Unknown identifier "fSprayJson" (558, 35 - C:\\\\Program Files (x86)\\\\HPCCSystems\\\\7.8.0\\\\clienttools\\\\share\\\\ecllibrary\\\\std\\\\File.ecl)\\nError: Incompatible types: can not assign Integer to varstring (557, 1 - C:\\\\Program Files (x86)\\\\HPCCSystems\\\\7.8.0\\\\clienttools\\\\share\\\\ecllibrary\\\\std\\\\File.ecl)\\nError: Object does not have a member named 'SprayJson' (568, 35 - C:\\\\Program Files (x86)\\\\HPCCSystems\\\\7.8.0\\\\clienttools\\\\share\\\\ecllibrary\\\\std\\\\File.ecl)\\nError: Unknown identifier "SprayJson" (568, 35 - C:\\\\Program Files (x86)\\\\HPCCSystems\\\\7.8.0\\\\clienttools\\\\share\\\\ecllibrary\\\\std\\\\File.ecl)\\nError: Too many errors (max = 5); Aborting... (568, 44 - C:\\\\Program Files (x86)\\\\HPCCSystems\\\\7.8.0\\\\clienttools\\\\share\\\\ecllibrary\\\\std\\\\File.ecl)\\n\\nI don't even seee where the code I'm calling would be calling fSprayJson. But I verified that C:\\\\Program Files (x86)\\\\HPCCSystems\\\\7.8.0\\\\clienttools\\\\share\\\\ecllibrary\\\\std\\\\File.ecl has fSprayJson at line 557. \\n\\nMy ECL IDE preferences show the compiler pointing to: \\nC:\\\\Program Files (x86)\\\\HPCCSystems\\\\7.8.0\\\\clienttools\\\\bin\\\\eclcc.exe\\n\\nWhat do I need to fix to get my code to run?\", \"post_time\": \"2020-04-23 19:49:46\" },\n\t{ \"post_id\": 30383, \"topic_id\": 8043, \"forum_id\": 8, \"post_subject\": \"Re: option export dependencies\", \"username\": \"ghalliday\", \"post_text\": \"I don't really use ECL IDE either. I think you probably could set it that way. I was meaning to click the debug button on the builder window and then there is an option to add debug options. That is more of a pain because you need to set it for each query.\\n\\nIf you want it set on all workunits, then you could configure it on the servers by add it as an option in the eclccserver settings - then it would automatically be applied to all workunits.\\n\\nexportDependencies with a value of 1\", \"post_time\": \"2020-04-28 13:37:22\" },\n\t{ \"post_id\": 30373, \"topic_id\": 8043, \"forum_id\": 8, \"post_subject\": \"Re: option export dependencies\", \"username\": \"amillar\", \"post_text\": \"Hi there,\\n\\nthanks for the quick reply, one more quick query,\\n\\nwhen you say "or set the option in the eclide debug settings."\\n\\ndo you mean within ECL Ide - Preferences - Compiler - WU Arguments\\n\\nand if so do you just enter -f exportDependencies\\n\\nI am asking for our dev team as I just setup the clusters for their use and don't really use the ECL IDE.\\n\\nIs this documented anywhere so I can read up on it?\\n\\nThanks in advance\\n\\nAntony\", \"post_time\": \"2020-04-27 14:50:02\" },\n\t{ \"post_id\": 30363, \"topic_id\": 8043, \"forum_id\": 8, \"post_subject\": \"Re: option export dependencies\", \"username\": \"ghalliday\", \"post_text\": \"I think using a #option is too late for enabling the details (because the information is gathered as the query is parsed). You need to submit the workunit with the option set. E.g.\\n\\necl run myquery.ecl -fexportDependencies\\n\\nor set the option in the eclide debug settings.\", \"post_time\": \"2020-04-27 13:02:08\" },\n\t{ \"post_id\": 30353, \"topic_id\": 8043, \"forum_id\": 8, \"post_subject\": \"option export dependencies\", \"username\": \"amillar\", \"post_text\": \"Hi There,\\n\\nI have just come across this blog : https://hpccsystems.com/blog/definition-dependencies\\n\\nBy Gavin Halliday and I am trying to enable the “export dependencies” option for one of my work units so I can track dependencies.\\n\\nI have added : \\n\\n#Option('exportDependencies',true);\\n\\nTo my work unit, however when I submit the job, I do not see an extra XML in the helper’s tab as described.\\n\\nCan you let me know where I have gone wrong?\\n\\nI would also like to add this as a default for all work units, can you let me know how I do this, is it in the environment.xml or somewhere in the ECL server?\\n\\nThe blog goes on to say : \\n\\nThis information gives us the option to possibly add dependency graphs, and searches for all workunits that use a particular attribute to future versions of EclWatch.\\n\\nDo you know if this is still planned for a future version of ECL Watch?\\n\\nThanks in advance\\n\\nAntony\", \"post_time\": \"2020-04-27 10:52:45\" },\n\t{ \"post_id\": 30913, \"topic_id\": 8063, \"forum_id\": 8, \"post_subject\": \"Re: Depray into compressed arcive\", \"username\": \"oleg\", \"post_text\": \"Thank you very much, guys!\\nFYI: we decided that the simplest way is to go with the standard UNIX compressed drive feature. Hopefully, compression ratio on it will be good enough.\", \"post_time\": \"2020-05-28 15:24:38\" },\n\t{ \"post_id\": 30601, \"topic_id\": 8063, \"forum_id\": 8, \"post_subject\": \"Re: Depray into compressed arcive\", \"username\": \"Allan\", \"post_text\": \"But as you say hwatanuki,\\n\\n Its not 'on-the-fly'.\\n Oleg is attempting to avoid the despray of uncompressed data.\\n\\nOleg, \\n\\n There are compression attributes in the repo, you could compress every field before despraying, perhaps also use STD.Str.EncodeBase64 on top, if the output could not be binary?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2020-05-14 06:15:33\" },\n\t{ \"post_id\": 30541, \"topic_id\": 8063, \"forum_id\": 8, \"post_subject\": \"Re: Depray into compressed arcive\", \"username\": \"hwatanuki\", \"post_text\": \"Hello Oleg, \\n\\nEven though it is not precisely "on-the-fly", the code below is an example with PIPE that can be run on the playground and may be useful to accomplish your end goal in a "semi-automated" way. \\n\\nHTH,\\nHugoW\\n\\n\\nIMPORT STD;\\n\\nrec := RECORD\\n string name;\\nEND;\\n\\n\\nDespray := Std.File.Despray('~test::hmw::despray_compress',\\n '10.0.0.90',\\t \\n '/var/lib/HPCCSystems/mydropzone/despray_file',\\n -1,\\n 'https://10.0.0.90:18010/FileSpray',\\n 1,\\n TRUE);\\n \\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\nMyDropZone := '/var/lib/HPCCSystems/mydropzone/';\\t\\t\\t\\t\\t\\t\\t\\t \\nRawFilename := MyDropZone + 'despray_file'; \\n//ZipFilename := MyDropZone + 'compressed_file'; //Optional, to maintain the original file\\n \\nZipCmdRaw := 'ssh 10.0.0.90 "' +\\n // 'cp ' + RawFileName + ' ' + ZipFilename + ' && gzip -f ' + ZipFilename + '";'; //Optional, to maintain the original file\\n 'gzip -f ' + RawFilename + '";';\\n \\nZipCmd := 'bash -c \\\\'' + ZipCmdRaw + '\\\\'';\\nZippedDS := PIPE(ZipCmd,rec);\\n \\nORDERED(Despray,OUTPUT(ZippedDS));\\n
\", \"post_time\": \"2020-05-13 19:21:22\" },\n\t{ \"post_id\": 30531, \"topic_id\": 8063, \"forum_id\": 8, \"post_subject\": \"Re: Depray into compressed arcive\", \"username\": \"jsmith\", \"post_text\": \"Hi,\\n\\nthe platform does not currently support despraying to compressed formats.\\n\\nThere are some Linux filing system types that support on-the-fly compression, such that you can configure a folder as compressed and it will compress all output to it as it is written.\\n\\nAlternatively, as Allan has suggested, you could use PIPE to use a command line tool that accepts data from stdin. If this is a distributed file, you would end up with N part outputs, but you could ensure the 1st had all the data, by using DISTRIBUTE(theds, 0);\\n\\nHope that helps.\", \"post_time\": \"2020-05-13 17:06:31\" },\n\t{ \"post_id\": 30501, \"topic_id\": 8063, \"forum_id\": 8, \"post_subject\": \"Re: Depray into compressed arcive\", \"username\": \"Allan\", \"post_text\": \"Hi Oleg,\\n\\nLong time no hear from you.\\n\\nHave you investigated PIPE, executing an external compress command (7za say) on each node and somehow its stdout is directed to your target box.\\n\\nJust a guess, I've not tried it myself.\\n\\nCheers, all the best.\\n\\nAllan\", \"post_time\": \"2020-05-13 15:49:29\" },\n\t{ \"post_id\": 30483, \"topic_id\": 8063, \"forum_id\": 8, \"post_subject\": \"Depray into compressed arcive\", \"username\": \"oleg\", \"post_text\": \"Hi everybody,\\n\\nI need to despray and archive a bunch of very large files (around 20 TB each) and looking for option to compress them on the fly to save both time and space.\\n\\nIs there any option to despray and compress files?\\n\\nAlternatively, maybe there is a Linux feature which can help? (I think the named pipes can solve this - but I didn't use this feature for ages)\", \"post_time\": \"2020-05-12 20:20:19\" },\n\t{ \"post_id\": 30551, \"topic_id\": 8071, \"forum_id\": 8, \"post_subject\": \"Re: Question behaviour of Local JOINs FULL ONLY\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nThe form of DISTRIBUTE you're using is the "random" version, which basically distributes the records based on a hash of the entire record. \\n\\nSince your JOIN is LOCAL, my guess/explanation would be that the "single" results you're seeing are those records where the left or right "matching" record ended up on a different node. \\n\\nSince your global JOIN version works correctly, I'm pretty sure that's the reason.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-05-13 19:24:40\" },\n\t{ \"post_id\": 30521, \"topic_id\": 8071, \"forum_id\": 8, \"post_subject\": \"Re: Question behaviour of Local JOINs FULL ONLY\", \"username\": \"Allan\", \"post_text\": \"Just to add, if I run non-local, i.e. pan node I always get 2 records back for each record difference.\\nBut would still like to understand the LOCAL behaviour.\\nYours\\nAllan\", \"post_time\": \"2020-05-13 16:17:48\" },\n\t{ \"post_id\": 30511, \"topic_id\": 8071, \"forum_id\": 8, \"post_subject\": \"Question behaviour of Local JOINs FULL ONLY\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI'm comparing two DATASETs and want to find differences (any differences).\\nI've started my implementation attempting to use:\\n\\ndBase := DISTRIBUTE(base); // On whole record\\ndcandidate := DISTRIBUTE(candidate);\\nJOIN(dbase,dcandidate,<equality comparison on every field>,FULL ONLY,LOCAL);\\n
\\n\\nI'm trying to justify the results I'm getting back.\\nSome different records return two results, one for the LEFT the other for the RIGHT, but other different records just return a single record, either from the LEFT or RIGHT.\\n\\nCan someone explain this behaviour?\\nSay there was a single letter case difference in a STRING field between the two DATASETS, now the DISTRIBUTE can do one of two things, either allocate them out to the same node or allocate them out to different nodes.\\nIs that the crux of the difference, for I can think of no other.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2020-05-13 16:03:51\" },\n\t{ \"post_id\": 30943, \"topic_id\": 8153, \"forum_id\": 8, \"post_subject\": \"Re: Internal output exceeds limit error\", \"username\": \"BalajiTS\", \"post_text\": \"Thank you Richard. \\nI reached out to Dan offline and came to know that moving MySQL plugin call processing to slave might solve this issue and it worked!!\", \"post_time\": \"2020-06-01 17:15:08\" },\n\t{ \"post_id\": 30933, \"topic_id\": 8153, \"forum_id\": 8, \"post_subject\": \"Re: IInternal output exceeds limit error\", \"username\": \"rtaylor\", \"post_text\": \"Balaji,graph28[2111], workunitwrite[2114]
This part of the error message is telling exactly which activity in the graph is causing the problem (activity 2114 in the 2111 subgraph, which is in graph 28). So look at that activity and relate it back you your ECL code to see which line of code it is. \\n\\nThe problem is that the intermediate dataset produced by that line of ECL (whatever it is) is more than 10 megabytes of data, and the limit is set to 10 (the default). You can try changing the limit using #OPTION like this:\\n#OPTION('outputLimit',20); //double the limit to 20 Mb
or you can just find another way of accomplishing the task.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-05-29 12:40:57\" },\n\t{ \"post_id\": 30923, \"topic_id\": 8153, \"forum_id\": 8, \"post_subject\": \"Internal output exceeds limit error\", \"username\": \"BalajiTS\", \"post_text\": \"Hi,\\nPlease help me understand more about the below error.\\n\\n9\\tSystem error: 10099: Graph graph28[2111], workunitwrite[2114]: Dataset too large to output to workunit (limit is set to 10) megabytes, in result (name=auto1L), Master exception - caused by (10099, Dataset too large to output to workunit (limit is set to 10) megabytes, in result (name=auto1L))
\\n\\nCode is huge to post also when I try to run only part of the code to simulate the issue it doesn't error out so, attached the portion of graph\\n\\nI am not using SET which I know could potentially exceed the limit.\\n\\nIt would be nice to know when does Internal OUTPUT happens and why not Store internal or spill.\\n\\nI also read one thread https://track.hpccsystems.com/browse/HPCC-15315 which says passing in a huge dataset to a function could cause this. In my case I do pass a huge dataset to function but its not a streamed dataset.\\n\\nPlease help.\\n\\nThanks,\\nBalaji\", \"post_time\": \"2020-05-29 03:16:29\" },\n\t{ \"post_id\": 31043, \"topic_id\": 8163, \"forum_id\": 8, \"post_subject\": \"Re: Accessing subfile names from ecl code\", \"username\": \"Gurman\", \"post_text\": \"Yes that works Richard,\\nAnd I think it is more efficient than LOOP.\\n\\nThank you for your help.\\n-Gurman\", \"post_time\": \"2020-06-03 20:19:53\" },\n\t{ \"post_id\": 31013, \"topic_id\": 8163, \"forum_id\": 8, \"post_subject\": \"Re: Accessing subfile names from ecl code\", \"username\": \"rtaylor\", \"post_text\": \"Gurman,\\n\\nOK, so your approach gave me an idea. Here's how I would do it (this code goes at the bottom of my previous example, and it works on my small test superfile)://if you don't have a UID to use, \\n// you can create a nested child dataset:\\nPrj := PROJECT(SubFiles,\\n TRANSFORM({STRING name,DATASET(rec) child},\\n ds := ThisDS('~' + LEFT.name);\\n SELF.child := ds; \\n SELF := LEFT));\\n\\n// then NORMALIZE it to do the "work" with the subfilename\\n// in this case, just adding it to each record\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t \\nWrkRec := {STRING name,rec}; \\nWrkRec XF(rec L, STRING name) := TRANSFORM\\n SELF.name := name; \\n SELF := L;\\nEND;\\n\\nWrk := NORMALIZE(Prj,LEFT.child,XF(RIGHT,LEFT.name));\\n\\nOUTPUT(Wrk,ALL);
I'm creating a nested child dataset first to attach the file names to their proper subfile records, then using NORMALIZE to do the "work" you want to do. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-06-03 12:58:18\" },\n\t{ \"post_id\": 31003, \"topic_id\": 8163, \"forum_id\": 8, \"post_subject\": \"Re: Accessing subfile names from ecl code\", \"username\": \"Gurman\", \"post_text\": \"Dear Richard,\\n\\nThank you for the effort you put in, but unfortunately the data does not have any such values that can be mapped for indexing, like you mentioned.\\n\\nI am trying the following flow as a work around:\\n\\n
\\nLet me know your views on this.\\n\\nI'm facing a simple '2035: Output dataset must match the source dataset type' error at the following line for now while implementing the above approach(at location where fjoin is written), will update here once that works out.\\n\\nloop(namesTable, count(namesTable), fJoin(rows(left)));\\n
\\n\\nThanks,\\nGurman\", \"post_time\": \"2020-06-03 01:32:24\" },\n\t{ \"post_id\": 30993, \"topic_id\": 8163, \"forum_id\": 8, \"post_subject\": \"Re: Accessing subfile names from ecl code\", \"username\": \"rtaylor\", \"post_text\": \"Gurman, \\n\\nI wanted to test this concept myself, so I created three subfiles (from my training data), like this:\\nIMPORT Training.IntroECL_P2 AS T;\\nds := T.UID_Persons;\\nOUTPUT(ds(RecID BETWEEN 1 AND 100),,'~rttest::SF::UID_Persons_1');\\nOUTPUT(ds(RecID BETWEEN 101 AND 200),,'~rttest::SF::UID_Persons_2');\\nOUTPUT(ds(RecID BETWEEN 201 AND 300),,'~rttest::SF::UID_Persons_3');\\n
Then I went into ECL Watch and added them to a new SuperFile.\\n\\nNext, I defined the SuperFile:SF_rec := RECORD\\n unsigned4 recid;\\n unsigned8 id;\\n string15 firstname;\\n string25 lastname;\\n string15 middlename;\\n string2 namesuffix;\\n string8 filedate;\\n unsigned2 bureaucode;\\n string1 maritalstatus;\\n string1 gender;\\n unsigned1 dependentcount;\\n string8 birthdate;\\n string42 streetaddress;\\n string20 city;\\n string2 state;\\n string5 zipcode;\\n END;\\n\\nSuperfile := DATASET('~rttest::sf::superfile',SF_rec,FLAT);\\n
and then I wrote the process I described above:IMPORT Std;\\nSubFiles := NOTHOR(Std.File.SuperFileContents('~rttest::sf::superfile'));\\nRec := RECORDOF(Superfile);\\n\\nThisDS(STRING name) := DATASET(name,rec,FLAT);\\nTbl := TABLE(SubFiles,\\n {name, \\n UNSIGNED lo := MIN(ThisDS('~' + name),RecID), \\n UNSIGNED hi := MAX(ThisDS('~' + name),RecID)});\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t \\nGetSubName(UNSIGNED val) := Tbl(val BETWEEN lo AND hi)[1].name;\\n\\nP := PROJECT(SuperFile,\\n TRANSFORM({UNSIGNED RecID,STRING name},\\n SELF.name := GetSubName(LEFT.RecID),\\n SELF := LEFT));\\n\\nOUTPUT(P,ALL);\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t \\n
I wrapped NOTHOR around the Std.File.SuperFileContents() function call because it only works with DFU metadata and doesn't need to be run from every node in your Thor. \\n\\nAnyway, this code works with my training data, so good luck with your project.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-06-02 20:07:34\" },\n\t{ \"post_id\": 30973, \"topic_id\": 8163, \"forum_id\": 8, \"post_subject\": \"Re: Accessing subfile names from ecl code\", \"username\": \"rtaylor\", \"post_text\": \"Gurman,\\n\\nIf each subfile record has an identifier field (UID) that is unique within the superfile, AND each subfile's UID field contains a single contiguous range of values, then that could be used to imply the file it came from. \\n\\nGiven that, you could code an inline DATASET that would reference those ranges back to the subfilename, like this: //the dataset of ranges\\nSubNameDS := DATASET([{'sub1',0,10},{'sub2',11,20},{'sub3',21,30}],\\n {STRING filename, UNSIGNED lo, UNSIGNED hi});\\n//and a function to get the subfilename, passing the UID value:\\nGetSubName(UNSIGNED val) := SubNameDS(val BETWEEN lo AND hi)[1].filename;\\nGetSubName(22); //returns "sub3"\\n
\\nYou might be able to build that dataset automatically as a TABLE by using the STD.File.SuperFileContents() function to get the current list of subfiles, then use that to get the range of UID values for each, something like this://NOT TESTED, PSEUDOCODE ONLY\\nIMPORT Std;\\nSubFiles := Std.File.SuperFileContents('Superfilename');\\nRec := RECORDOF(Superfile);\\nThisDS(STRING name) := DATASET(name,rec,FLAT);\\nTbl := TABLE(SubFiles,\\n {name, \\n UNSIGNED lo := MIN(ThisDS(name),ThisDS.UID), \\n UNSIGNED hi := MAX(ThisDS(name),ThisDS.UID)});\\nGetSubName(UNSIGNED val) := Tbl(val BETWEEN lo AND hi)[1].filename;\\n
\\nOf course, if your data doesn't support this scheme, then I think the answer would have to be NO, I have no idea how you can do this in ECL. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-06-02 13:15:35\" },\n\t{ \"post_id\": 30953, \"topic_id\": 8163, \"forum_id\": 8, \"post_subject\": \"Accessing subfile names from ecl code\", \"username\": \"Gurman\", \"post_text\": \"The data transformation on the superfile that I am doing consists of using information from the name of the individual subfiles.\\n\\ni.e.,\\nIn order to transform each record, I need to extract information from the name of the subfile that the particular record belongs to.\\n\\nPlease let me know if this can be achieved, and a possible document if yes.\\n\\nThanks\", \"post_time\": \"2020-06-01 23:24:53\" },\n\t{ \"post_id\": 30983, \"topic_id\": 8173, \"forum_id\": 8, \"post_subject\": \"Re: Java plugin output location\", \"username\": \"richardkchapman\", \"post_text\": \"I don't think stdout is captured at present - but there is a function HpccUtils.log that you can use to write to the standard component log file\", \"post_time\": \"2020-06-02 13:55:55\" },\n\t{ \"post_id\": 30963, \"topic_id\": 8173, \"forum_id\": 8, \"post_subject\": \"Java plugin output location\", \"username\": \"kaiamacejkovic\", \"post_text\": \"If I was writing standard out within the Java that the ECL Java plugin uses where would this output reside in the ECL client once run? I assumed the logs but can't find anything.\", \"post_time\": \"2020-06-02 07:53:08\" },\n\t{ \"post_id\": 31073, \"topic_id\": 8183, \"forum_id\": 8, \"post_subject\": \"Re: Unable to expand constant expression when using cache\", \"username\": \"ghalliday\", \"post_text\": \"I think you are right, i didn't read the ecl carefully enough. The argument to #expand must be a compile time constant.\", \"post_time\": \"2020-06-05 07:04:47\" },\n\t{ \"post_id\": 31063, \"topic_id\": 8183, \"forum_id\": 8, \"post_subject\": \"Re: Unable to expand constant expression when using cache\", \"username\": \"vzeufack\", \"post_text\": \"I think this is related to this issue: viewtopic.php?f=8&t=650#.\\n\\nI may have to find another way to implement this module.\", \"post_time\": \"2020-06-04 22:17:48\" },\n\t{ \"post_id\": 31053, \"topic_id\": 8183, \"forum_id\": 8, \"post_subject\": \"Re: Unable to expand constant expression when using cache\", \"username\": \"vzeufack\", \"post_text\": \"Hi @ghalliday,\\n\\nI was using version 7.0.4 which I upgraged to 7.8.8 but the problem persists. \\nI looked up the commands but did not find any which allows to disable cache. \\n--metacache: specify directory to store distributed meta information\\n\\n\\nBest regards,\\n\\nVannel,\", \"post_time\": \"2020-06-04 21:44:37\" },\n\t{ \"post_id\": 31051, \"topic_id\": 8183, \"forum_id\": 8, \"post_subject\": \"Re: Unable to expand constant expression when using cache\", \"username\": \"ghalliday\", \"post_text\": \"Which version of the system are you using? The cache was an idea to speed up ECL compilation. We eventually decided it could not be implemented without too many limitations - so it has been removed/disabled from recent versions (e.g. 7.8).\\n\\nThere should be options to disable it (try eclcc -help -v for a full list of options). E.g. --metacache=\", \"post_time\": \"2020-06-04 07:03:18\" },\n\t{ \"post_id\": 31033, \"topic_id\": 8183, \"forum_id\": 8, \"post_subject\": \"Re: Unable to expand constant expression when using cache\", \"username\": \"vzeufack\", \"post_text\": \"The error is on "#EXPAND(featuresDS.value)". It says:\\n
\\n2071: Unable to expand constant expression when using cache. Try disabling cache.\\n
\", \"post_time\": \"2020-06-03 18:10:07\" },\n\t{ \"post_id\": 31023, \"topic_id\": 8183, \"forum_id\": 8, \"post_subject\": \"Unable to expand constant expression when using cache\", \"username\": \"vzeufack\", \"post_text\": \"Hi,\\n\\nCan you please help me resolve this code:\\n\\nIMPORT KMeans;\\n\\nFit (original_data, features) := FUNCTIONMACRO\\n\\t \\n StrRec := RECORD\\n STRING value;\\n END;\\n \\n //converts the set of features to Strings\\n featuresDS := DATASET(features, StrRec);\\n \\n EncoderRec := RECORD\\n STRING feature_name := featuresDS.value; \\n SET OF STRING categories := (SET OF STRING) SET(original_data, #EXPAND(featuresDS.value);; //here I want the identifer and not the string\\n END;\\n\\n result := TABLE(featuresDS, EncoderRec);\\nRETURN result;\\nENDMACRO;\\n\\n//Import raw data.\\nRaw_data := KMeans.Test.Datasets.DSIris.ds;\\nencoder := Fit(Raw_data, ['sepal_length', 'sepal_width']);\\nOUTPUT(encoder);\\n
\\n\\nI am facing this issue frequently when using #EXPAND. Can you provide a rule of thumb to avoid this error?\\n\\n\\nBest regards,\\n\\nVannel,\", \"post_time\": \"2020-06-03 15:57:58\" },\n\t{ \"post_id\": 31133, \"topic_id\": 8193, \"forum_id\": 8, \"post_subject\": \"Re: pyembed: No module named 'numpy'\", \"username\": \"bforeman\", \"post_text\": \"Hi Gurman,\\nIt's possible that the Tensor MODULE makes use of the Python embeds, so you need to have Python3 installed on your target cluster.\\n\\nYour HPCC Systems administrator with administrator’s rights can install Tensorflow and Python3, or do it yourself on a HPCC VM as shown here:\\n\\n1. On Ubuntu, first refresh the APT (Advanced Package Tool) repository:\\nsudo apt update
\\n2. Install Python3 if not already installed:\\nsudo apt install python3
\\n3. Install pip3 (Python3 package installer) – this will take a few minutes\\nsudo apt install python3-pip
\\n4. Install tensorflow for all users. This is the recommended approach, since it needs to be available to the hpcc user as well as the current user. The –H sudo option is necessary in order to have it installed globally:\\nsudo –H pip3 install tensorflow
\\n\\nFinally, the setuptest.ecl file found in the Test directory of the GNN bundle will verify that Python3 and Tensorflow are correctly installed on each Thor node. \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2020-06-10 12:48:14\" },\n\t{ \"post_id\": 31123, \"topic_id\": 8193, \"forum_id\": 8, \"post_subject\": \"pyembed: No module named 'numpy'\", \"username\": \"Gurman\", \"post_text\": \"I am trying tutorials for GNN bundle, under that I tried to create a tensorData as per the below tutorial: https://hpccsystems.com/blog/gnn-bundle\\n\\nI am not using the EMBEDED(Python) feature right now, just trying to execute the following code:\\n\\nIMPORT GNN.Tensor;\\n\\ntensData1 := DATASET([{[1,2], 1},\\n {[2,1], 2},\\n {[2,2], 3}],\\n Tensor.R4.TensData);\\n\\nmyTensor := Tensor.R4.MakeTensor([2,2], tensData1);\\n\\ntensData2 := Tensor.R4.GetData(myTensor);\\n\\ntensData2;\\n
\\n\\nBut getting the following error: \\n<Result>\\n <Exception><Source>eclagent</Source><Message>System error: 0: Graph graph1[14], externalprocess[18]: SLAVE #1 [10.0.2.6:20100]: pyembed: No module named 'numpy', - caused by (0, pyembed: No module named 'numpy')</Message></Exception>\\n</Result>\\n
\\n\\nAm I missing something on my cluster?\", \"post_time\": \"2020-06-09 20:33:33\" },\n\t{ \"post_id\": 31193, \"topic_id\": 8213, \"forum_id\": 8, \"post_subject\": \"Re: Convert degrees to radian\", \"username\": \"jtaylor178\", \"post_text\": \"thanks.\\n\\nI started with DMS format instead of DD in your example.\\n\\nSo for my case I would convert DMS format 172109S to DD format -17.3525 and then \\nmultiple it by (3.14159265359/180)\", \"post_time\": \"2020-06-16 16:21:57\" },\n\t{ \"post_id\": 31183, \"topic_id\": 8213, \"forum_id\": 8, \"post_subject\": \"Re: Convert degrees to radian\", \"username\": \"rtaylor\", \"post_text\": \"jtaylor178,\\n\\nStrangely enough, a colleague and I were discusing exactly that question two weeks ago, so here's the code I wrote for that:Pi := 3.141592653589793;\\nDeg2Rad(REAL deg) := deg * (Pi/180); //degrees * number of radians in a degree
BTW, this Pi is exactly the same value as returned by the Python Math.Pi function.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-06-16 15:44:50\" },\n\t{ \"post_id\": 31173, \"topic_id\": 8213, \"forum_id\": 8, \"post_subject\": \"Convert degrees to radian\", \"username\": \"jtaylor178\", \"post_text\": \"Is there an hpcc utility that converts degrees to radians?\", \"post_time\": \"2020-06-16 15:33:59\" },\n\t{ \"post_id\": 31433, \"topic_id\": 8223, \"forum_id\": 8, \"post_subject\": \"Re: How is scope (visibility) resolved for function macros?\", \"username\": \"Allan\", \"post_text\": \"Interestingly vzeufack,\\n\\nI was also getting confused about the context a functionmacro ran in, so created this 5 min video on what I found.\\nhttps://www.youtube.com/watch?v=rZM7VhnpCP8&list=PLONd-6DN_sz2aHrCyFbvIU6Q33r55YzWk&index=6&t=0s\\n\\nCheers\\nAllan\", \"post_time\": \"2020-07-09 15:13:41\" },\n\t{ \"post_id\": 31313, \"topic_id\": 8223, \"forum_id\": 8, \"post_subject\": \"Re: How is scope (visibility) resolved for function macros?\", \"username\": \"vzeufack\", \"post_text\": \"Thanks Richard!\\n\\nI was indeed suspecting that something of the like was happening. Thanks for your help.\\n\\n\\nBest regards\\n\\nVannel\", \"post_time\": \"2020-06-29 19:06:36\" },\n\t{ \"post_id\": 31303, \"topic_id\": 8223, \"forum_id\": 8, \"post_subject\": \"Re: How is scope (visibility) resolved for function macros?\", \"username\": \"rtaylor\", \"post_text\": \"Vannel,\\n\\nThis code works for me:TestModule := MODULE\\n EXPORT helperFn(params) := FUNCTIONMACRO\\n Result := 'Helper function called: ' + params;\\n RETURN Result;\\n ENDMACRO;\\n\\n EXPORT testFn(params) := FUNCTIONMACRO\\n Result := TestModule.helperFn(params); //Unknown identifier helperFn\\n RETURN Result;\\n ENDMACRO;\\nEND;\\n\\nTestModule.TestFn('Success');
You'll note that I made the helperFn an EXPORT definition and called it with a fully qualified pathname. This is necessary because a FUNCTIONMACRO is a code generator that generates its code where it is called. That means the call is outside the MODULE structure, so the helperFn() call is out of scope at that point. But making helperFn() EXPORT and calling it fully qualified means it can be found wherever the first FUNCTIONMACRO is called.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-06-29 19:02:02\" },\n\t{ \"post_id\": 31293, \"topic_id\": 8223, \"forum_id\": 8, \"post_subject\": \"Re: How is scope (visibility) resolved for function macros?\", \"username\": \"vzeufack\", \"post_text\": \"Can you try to explain based on this example which does not use template language?\\n\\n\\nEXPORT TestModule := MODULE\\n helperFn(params) := FUNCTIONMACRO\\n Result := 'Helper function called: ' + params; \\n RETURN Result;\\n ENDMACRO;\\n\\n EXPORT testFn(params) := FUNCTIONMACRO\\n Result := helperFn(params); //Unknown identifier helperFn\\n RETURN Result;\\n ENDMACRO;\\nEND;\\n\\n//I called the TestFn function in another file as follows\\nResult := $.TestModule.TestFn('Success');\\n\\n// **** I am getting that same error 'Unknown identifier helperFn' *******\\n
\\n\\nWish you a restful PTO \", \"post_time\": \"2020-06-26 21:39:55\" },\n\t{ \"post_id\": 31283, \"topic_id\": 8223, \"forum_id\": 8, \"post_subject\": \"Re: How is scope (visibility) resolved for function macros?\", \"username\": \"rtaylor\", \"post_text\": \"Vannel,\\n\\nOK, it appears on a quick first examination that you're using Template language to generate ECL code in your FUNCTIONMACROs. So the MODULE structure does syntax check (generated code only gets syntax checked on compile), but when you use it in a workunit that generated code looks to me like it's generating circular calls and that is the reason for your problem.\\n\\nI'll be on PTO all next week, so I hope this helps you figure out how to correct the issue.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-06-26 21:29:34\" },\n\t{ \"post_id\": 31273, \"topic_id\": 8223, \"forum_id\": 8, \"post_subject\": \"Re: How is scope (visibility) resolved for function macros?\", \"username\": \"vzeufack\", \"post_text\": \"For me it seems like, within a module, function macros should be self contained to avoid any problems. If a function macro wants to call another function macro, the called function must be embedded in the caller. So, I cannot write a helper function which will be called by many other function macros within the same module. I do not know if that is true but I am having trouble to achieve that.\\n\\nI made this simple code to validate the error:\\n\\n
\\nEXPORT TestModule := MODULE\\n helperFn(params) := FUNCTIONMACRO\\n Result := 'Helper function called: ' + params; \\n RETURN Result;\\n ENDMACRO;\\n\\n EXPORT testFn(params) := FUNCTIONMACRO\\n Result := helperFn(params); //Unknown identifier helperFn\\n RETURN Result;\\n ENDMACRO;\\nEND;\\n\\n//I called the TestFn function in another file as follows\\nResult := $.TestModule.TestFn('Success');\\n\\n// **** I am getting that same error 'Unknown identifier helperFn' *******\\n
\\n\\nNote that I am getting the error whatever the visibility of helperFn (default, SHARED or EXPORT).\\n\\nI will repeat code for now or embed the function.\\n\\nPlease let me know if you have any clue about what I am doing wrong.\\n\\n\\nBest regards,\\n\\nVannel\", \"post_time\": \"2020-06-26 21:24:43\" },\n\t{ \"post_id\": 31263, \"topic_id\": 8223, \"forum_id\": 8, \"post_subject\": \"Re: How is scope (visibility) resolved for function macros?\", \"username\": \"vzeufack\", \"post_text\": \"I have attached the module (LabelEncoder.ecl) and the file which calls the "problematic function" (Test1.ecl). Within Test1.ecl, it is the call to "UnwrapEncoderKey" which creates an issue.\\n\\nLet me know if you can use the uploaded files or if I should find another way to share the code (maybe through the udl cluster).\\n\\n\\nBest regards,\\n\\nVannel\", \"post_time\": \"2020-06-26 20:06:11\" },\n\t{ \"post_id\": 31253, \"topic_id\": 8223, \"forum_id\": 8, \"post_subject\": \"Re: How is scope (visibility) resolved for function macros?\", \"username\": \"rtaylor\", \"post_text\": \"Vannel,I had to embed fn2 into fn1 for it work.\\nDo you have any idea of why this could happen?
I'd have to see your actual code to answer that \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-06-26 19:56:16\" },\n\t{ \"post_id\": 31243, \"topic_id\": 8223, \"forum_id\": 8, \"post_subject\": \"Re: How is scope (visibility) resolved for function macros?\", \"username\": \"vzeufack\", \"post_text\": \"Thanks Richard,\\n\\nBut now, I am getting the error which made me want to post this message 2 or 3 days ago. I inverted the position but I am still getting "Unknown identifier fn2" inside fn1.\\n\\n
\\nmyModule := MODULE\\n EXPORT fn2(params) := FUNCTIONMACRO\\n ...\\n END;\\n\\n EXPORT fn1(params) := FUNCTIONMACRO\\n fn2Result := fn2(params); //Unknown identifier fn2\\n ...\\n END; \\nEND;\\n
\\n\\nI had to embed fn2 into fn1 for it work. \\nDo you have any idea of why this could happen?\\n\\n\\nBest regads,\\n\\nVannel,\", \"post_time\": \"2020-06-26 19:50:04\" },\n\t{ \"post_id\": 31223, \"topic_id\": 8223, \"forum_id\": 8, \"post_subject\": \"Re: How is scope (visibility) resolved for function macros?\", \"username\": \"rtaylor\", \"post_text\": \"Vannel,\\n\\nThe ECL compiler is a one-pass tool. That means, if you want Fn1 to be able to reference Fn2, you need to reverse their order within the MODULE. This is not specific to FUNCTIONMACROs, it is true for all ECL code.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-06-26 19:14:19\" },\n\t{ \"post_id\": 31213, \"topic_id\": 8223, \"forum_id\": 8, \"post_subject\": \"How is scope (visibility) resolved for function macros?\", \"username\": \"vzeufack\", \"post_text\": \"Hi,\\n\\nI came to realize that scope does not work the same when using FUNCTION and FUNCTIONMACRO. I have a specific issue regarding that. I currently have two function macros fn1 and fn2 which are within the same module as shown below.\\n\\n\\nmyModule := MODULE\\n EXPORT fn1(params) := FUNCTIONMACRO\\n fn2Result := fn2(params);\\n ...\\n END;\\n\\n EXPORT fn2(params) := FUNCTIONMACRO\\n ...\\n END;\\nEND;\\n
\\n\\nI realize that fn1 can only call fn2 if fn2 is EXPORTed. However, I only want to make fn1 available to the public and fn2 only available inside the module as a utility function. However, when I set fn2 as SHARED, fn1 can't see it.\\n\\nCan you help me understand that issue?\\nAlso, can you let me know if there is some documentation regarding visibility rules when it comes to function macros?\\n\\n\\nBest regards,\\n\\nVannel\", \"post_time\": \"2020-06-26 18:58:07\" },\n\t{ \"post_id\": 31513, \"topic_id\": 8273, \"forum_id\": 8, \"post_subject\": \"Re: Purpose of "Export Main" in Module structure\", \"username\": \"gmarcan\", \"post_text\": \"Hi Richard,\\n\\nThis answers my question.\\n\\nThank you.\\n\\nGabriel\", \"post_time\": \"2020-07-24 18:52:12\" },\n\t{ \"post_id\": 31503, \"topic_id\": 8273, \"forum_id\": 8, \"post_subject\": \"Re: Purpose of "Export Main" in Module structure\", \"username\": \"rtaylor\", \"post_text\": \"Gabriel,Is it just a quirk that some how relates to the underlying compiler?
Pretty much. ECL defines what you want, the compiler generates the code for how to do it. And that generated code is C++,and all C/C++ programs need to have a main() to compile.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-07-24 18:33:28\" },\n\t{ \"post_id\": 31493, \"topic_id\": 8273, \"forum_id\": 8, \"post_subject\": \"Re: Purpose of "Export Main" in Module structure\", \"username\": \"gmarcan\", \"post_text\": \"Hi Richard,\\n\\nThank you. Do you know what is the intended purpose of "EXPORT MAIN"? Is it just a quirk that some how relates to the underlying compiler?\\n\\nI did manage to find another reference out there to the "EXPORT MAIN", but it does not provide much further insight:\\n\\nviewtopic.php?f=8&t=125\\n\\nNonetheless, I find your advise sensible and I will avoid using this construct for creating unit testing. The use of #IF along with a constant configuration attribute provides a much more consistent and elegant option.\\n\\nGabriel\", \"post_time\": \"2020-07-24 18:03:46\" },\n\t{ \"post_id\": 31483, \"topic_id\": 8273, \"forum_id\": 8, \"post_subject\": \"Re: Purpose of "Export Main" in Module structure\", \"username\": \"rtaylor\", \"post_text\": \"Gabriel,\\n\\nYou have discovered a kludge that allows you to exercise a "bad habit" to get into. \\n\\nIn every class I teach, I tell the students: "Don't get into the habit of just hitting Submit on definition files, because sometimes it will work and sometimes it won't! You should only hit the Submit button on BWR files (Builder Window Runnable -- ECL code files that contain actions, not just definitions)."\\n\\nA MODULE structure definition file is one of those where it won't work, and produces an error that says, "Module yourfilename does not EXPORT an attribute main()" because there is no action to execute in the file, just the MODULE definition.\\n\\nSo, by adding an "EXPORT Main" definition within the MODULE you can get away with just hitting Submit, because the compiler takes that definition as the missing "main()" that it's looking for and uses that as the action to execute. And that is a bad habit to get into! \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-07-23 12:16:46\" },\n\t{ \"post_id\": 31473, \"topic_id\": 8273, \"forum_id\": 8, \"post_subject\": \"Purpose of "Export Main" in Module structure\", \"username\": \"gmarcan\", \"post_text\": \"Hello,\\n\\nI am trying to find out more about the "EXPORT MAIN" feature in a MODULE structure. Looking through the ECL documentation, I cannot find information about this and I am wondering if anyone has more details on why it exists, and what is the intended purpose.\\n\\nAll I know is that it can be used to test, or run a default outcome on a module. For example:\\n\\n
EXPORT myModule := MODULE\\n EXPORT add(integer4 aVar, integer4 bVar):= aVar + bVar;\\n EXPORT MAIN := add(5,6);\\nEND;\\n\\n//myModule.add(1,3);\\n//myModule.MAIN;
\\n\\nIf you save the module code as an independent attribute ( "myModele.ecl"), then submitting the "myModule" attribute directly to the HPCC cluster will result in the MAIN function being invoked. Meaning, that if you add code to test the module in the "MAIN" export then you can easily run a unit test on the module using some default values, or invoke some default behavior.\\n\\nI did see some comments that it relates to queries, but I am not understanding how this fits into the HPCC run/deployment life cycle.\\n\\nPlease can someone point me to the documentation on this, or help me understand the purpose and correct usage.\\n\\nThank you.\\n\\nGabriel\", \"post_time\": \"2020-07-22 21:27:27\" },\n\t{ \"post_id\": 31543, \"topic_id\": 8283, \"forum_id\": 8, \"post_subject\": \"Re: Query about ECL code quality.\", \"username\": \"akhileshbadhri\", \"post_text\": \"Thanks a lot Richard. Your reply and the links are very helpful.\\n\\nThanks and regards,\\nAkhilesh Badhri.\", \"post_time\": \"2020-07-28 14:38:50\" },\n\t{ \"post_id\": 31533, \"topic_id\": 8283, \"forum_id\": 8, \"post_subject\": \"Re: Query about ECL code quality.\", \"username\": \"rtaylor\", \"post_text\": \"Akhilesh,Is there a way to check the quality of an ECL code ? By checking the quality I mean -\\nAre some basic ECL coding guidelines have been followed ?\\nHighlighting code which can be improved in terms of performance improvement ?\\nSome security checks for code where calls have been made outside HPCC ?
AFAIK there are no tools for ECL to do that for you. Coding conventions/standards can vary between companies, and even between different teams within the same company. In our ECL classes we teach generic guidelines and best practices, but there are no "global" guidelines that everyone follows. Therefore, it would be extremely difficult to create a tool to do that.\\n\\nHowever, most coding teams have their own operating rules/conventions, and almost always that includes a "code review" step before checking code into version control. That is the step where a junior coder gets their code checked by a more senior member of the team. That is where you can implement adherence to your team's convention/standard.I know when we check the syntax of code OR compile a code in ECL IDE, the IDE does gives us information / warnings at the bottom. How does this happen ? What is the scope / variation of warnings we can get from ECL IDE ?
Since the ECL language itself does have "hard and fast" syntax rules, the compiler can run a syntax check to ensure the code meets all those rules, and that is what generates the information/warnings. However, syntax rules are NOT the same as your team's coding convention.\\n\\nThere is a new tool that will automatically provide more performance-based information about a workunit, called the Workunit Analyzer. This tool is new and currently limited to reporting just a couple of things it "sees" but it is actively under development and will gain more abilities as it progresses. \\n\\nHere are some helpful links:
\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-07-28 12:31:01\" },\n\t{ \"post_id\": 31523, \"topic_id\": 8283, \"forum_id\": 8, \"post_subject\": \"Query about ECL code quality.\", \"username\": \"akhileshbadhri\", \"post_text\": \"Hello Team,\\n\\nIs there a way to check the quality of an ECL code ? By checking the quality I mean - \\nAre some basic ECL coding guidelines have been followed ?\\nHighlighting code which can be improved in terms of performance improvement ?\\nSome security checks for code where calls have been made outside HPCC ?\\n
\\n\\nI know when we check the syntax of code OR compile a code in ECL IDE, the IDE does gives us information / warnings at the bottom. How does this happen ? What is the scope / variation of warnings we can get from ECL IDE ?\\n\\nThanks and regards,\\nAkhilesh Badhri.\", \"post_time\": \"2020-07-28 11:12:55\" },\n\t{ \"post_id\": 31573, \"topic_id\": 8293, \"forum_id\": 8, \"post_subject\": \"Re: DUPLIATE INDEX Build fails\", \"username\": \"BalajiTS\", \"post_text\": \"Thank you Richard for the clarification\", \"post_time\": \"2020-08-04 13:38:54\" },\n\t{ \"post_id\": 31563, \"topic_id\": 8293, \"forum_id\": 8, \"post_subject\": \"Re: DUPLIATE INDEX Build fails\", \"username\": \"rtaylor\", \"post_text\": \"BalajiTS,\\n\\nThe first way doesn't work because the structure you defined for the INDEX does not match the structure you specified in your BUILD.\\n\\nidx1:= BUILD(ds1,{ds1.id},{ds1},'~RTtest::file1::202007284_idx',OVERWRITE);\\nidx1;
This BUILD is creating a payload INDEX with the id field as the search term and the col1 field as the only payload (because it's the only other field in the RECORD structure for ds1 that isn't already specified as a search term).\\n\\nidx2:= INDEX(ds1,{UNSIGNED1 id,STRING10 col1},rec,'~RTtest::file1::202007284_idx');\\nidx2;
But this INDEX declaration is not defining the same structure as the BUILD, because you have both the id and col1 fields defined as search terms. The rec "payload" parameter is irrelevant because all the fields are already defined as search terms.\\n\\nTo duplicate the result of your BUILD you need to define it like this:idx2:= INDEX(ds1,{UNSIGNED1 id},{STRING10 col1},'~RTtest::file1::202007284_idx');\\nidx2;
That makes the id field the only search term, and the col1 field the only payload, duplicating the file structure built by the BUILD.\\n \\nHTH,\\n\\nRichard\", \"post_time\": \"2020-08-04 13:18:00\" },\n\t{ \"post_id\": 31553, \"topic_id\": 8293, \"forum_id\": 8, \"post_subject\": \"DUPLIATE INDEX Build fails\", \"username\": \"BalajiTS\", \"post_text\": \"Please help me understand the difference on why it fails on method 1\\n\\n\\n\\t"message": "2305: The index record contains fields with no mappings - cannot build an index on it",\\n\\t"
\\n\\nRec := RECORD\\n\\tUNSIGNED1 id;\\n\\tSTRING10 col1;\\nEND;\\n\\nds1:= DATASET([{1,'a'}],Rec);\\n\\n/* Method 1 - NOT WORKING */\\n//idx1:= BUILD(ds1,{ds1.id},{ds1},'~thor::test::file1::202007284_idx',OVERWRITE);\\n//idx1;\\n//idx2:= INDEX(ds1,{UNSIGNED1 id,STRING10 col1},rec,'~thor::test::file1::202007284_idx');\\n//idx2;\\n//newidx := INDEX(idx2,'~thor::test::file1::202007285_idx_new'); //DUPLICATE INDEX DEFN\\n//BUILDINDEX(newidx ); //FAILS\\n\\n/* Method 2 WORKING */\\nidx_1:= INDEX(ds1,{id},{ds1},'~thor::test::file1::202007281_idx_1');\\n//build(idx_1);\\nnewidx_1:= INDEX(idx_1,'~thor::test::file1::202007281_idx_2'); //DUPLICATE INDEX DEFN\\nBUILDINDEX(newidx_1); //WORKS\\n
\", \"post_time\": \"2020-08-02 12:29:31\" },\n\t{ \"post_id\": 31813, \"topic_id\": 8313, \"forum_id\": 8, \"post_subject\": \"Re: Problems using CreateSuperFile function\", \"username\": \"jsmith\", \"post_text\": \"Thank you.\\n\\nI suspect the version of the server (7.4.52) you are submitting to has a different (older) version of lib_fileservices than the references in your query uses (from 7.10.0)\\n\\nThere might be a warning of the version mismatch in the workunit?\", \"post_time\": \"2020-08-17 09:49:25\" },\n\t{ \"post_id\": 31673, \"topic_id\": 8313, \"forum_id\": 8, \"post_subject\": \"Re: Problems using CreateSuperFile function\", \"username\": \"igorgallon\", \"post_text\": \"Here it is\", \"post_time\": \"2020-08-07 12:19:02\" },\n\t{ \"post_id\": 31663, \"topic_id\": 8313, \"forum_id\": 8, \"post_subject\": \"Re: Problems using CreateSuperFile function\", \"username\": \"jsmith\", \"post_text\": \"Do you still have the failed workunit?\\nCan you attach the archive (under Helpers) here?\", \"post_time\": \"2020-08-06 16:41:55\" },\n\t{ \"post_id\": 31653, \"topic_id\": 8313, \"forum_id\": 8, \"post_subject\": \"Re: Problems using CreateSuperFile function\", \"username\": \"igorgallon\", \"post_text\": \"Hi,\\n\\nI was trying to submit via ECL IDE v7.10.0-1 with the compiler version 7.10.0. The cluster version is 7.4.52-1. I regressed the compiler version to 6.4.22 and now it's working fine!\", \"post_time\": \"2020-08-06 15:47:43\" },\n\t{ \"post_id\": 31643, \"topic_id\": 8313, \"forum_id\": 8, \"post_subject\": \"Re: Problems using CreateSuperFile function\", \"username\": \"jsmith\", \"post_text\": \"Hi,\\n\\nI have not seen that error before.\\nWhich version of the platform are you using?\\nAnd how are you submitting the query, e.g. via the IDE, via the CLI ecl command, or in the playground?\\n\\nThanks.\", \"post_time\": \"2020-08-06 12:00:36\" },\n\t{ \"post_id\": 31633, \"topic_id\": 8313, \"forum_id\": 8, \"post_subject\": \"Problems using CreateSuperFile function\", \"username\": \"igorgallon\", \"post_text\": \"I'm trying to simply create a super file:\\nIMPORT STD;\\n\\nSTD.File.CreateSuperFile('~EXERCISE::IFG::Yelp::SF::Review');
\\n\\nBut when I submit the BWR to cluster, the following message error is displayed:\\n\\n(code 2171) Object 'lib_fileservices' does not have a member named 'FsDropZone'\\n(code 2167) Unknown identifier "FsDropZone"\\n
\\n\\nI would like to understand this error and how can I properly create the super file.\", \"post_time\": \"2020-08-05 19:26:55\" },\n\t{ \"post_id\": 31883, \"topic_id\": 8323, \"forum_id\": 8, \"post_subject\": \"Re: Reading a Repository Name in a Module/Function\", \"username\": \"Allan\", \"post_text\": \"Hi hwatanuki,\\n\\nI may not have your precise issue straight, but we have multiple repos, one for development, certification, production etc and we have different settings that are environment specific.\\nAll repos hold an identically named filename, e.g.\\n\\nControl/Environment.ecl\\n
\\nThis can contain a string that identifies the environment, e.g.\\nEXPORT ID := 'DEV';
\\nEach environment has 'ID' setup appropriately.\\nAll code that needs to use environment specific attributes uses:\\nIMPORT Control;
\\nthen uses the ID attribute from 'Environment' thus:\\n#IF(Control.Environment.ID = 'PROD')\\n EXPORT STRING TargetURL := 'someurl'; \\n#ELSEIF(Control.Environment.ID = 'QA') \\n EXPORT STRING TargetURL := 'some other url'; \\n#ELSE\\n EXPORT STRING TargetURL := 'Development url'; \\n#END\\n
\\nThe application then juse uses 'TargetURL' which is now defined in an environmentally specific way.\\nSOAPCALL(TargetURL,...);
\\n\\nAs I said I may not understand your issue, but hope this might help.\\nYours\\nAllan\", \"post_time\": \"2020-08-24 09:17:17\" },\n\t{ \"post_id\": 31793, \"topic_id\": 8323, \"forum_id\": 8, \"post_subject\": \"Re: Reading a Repository Name in a Module/Function\", \"username\": \"rtaylor\", \"post_text\": \"Ruchika,\\n\\nPerhaps another approach might work better. \\n\\nTake a look at Allan Wrobel's "Universal WU Scheduler" for some ideas. He posted it in our Tips and Tricks forum in these two posts: https://hpccsystems.com/bb/viewtopic.php?f=41&t=7813 and https://hpccsystems.com/bb/viewtopic.php?f=41&t=7803\\n\\nI suggest this because I don't believe there is any native way to automatically get those directory names in ECL and Allan's approach may give you a way to manage the task without too much "manual labor" involved.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-08-11 11:49:55\" },\n\t{ \"post_id\": 31783, \"topic_id\": 8323, \"forum_id\": 8, \"post_subject\": \"Re: Reading a Repository Name in a Module/Function\", \"username\": \"hwatanuki\", \"post_text\": \"No problem, feel free to share the piece that is not working and maybe we can find a way around it. \", \"post_time\": \"2020-08-11 04:08:19\" },\n\t{ \"post_id\": 31773, \"topic_id\": 8323, \"forum_id\": 8, \"post_subject\": \"Re: Reading a Repository Name in a Module/Function\", \"username\": \"RuchiM\", \"post_text\": \"Thanks Hugo for the suggestion but thats not working in my use case. \\n\\nRegards,\\nRuchika\", \"post_time\": \"2020-08-10 21:48:10\" },\n\t{ \"post_id\": 31763, \"topic_id\": 8323, \"forum_id\": 8, \"post_subject\": \"Re: Reading a Repository Name in a Module/Function\", \"username\": \"hwatanuki\", \"post_text\": \"Hello Ruchika, \\n\\nThanks for the detailed example. It is now much clearer what you are trying to accomplish, but I am afraid that the precise functionality you are looking for is an enhancement still being explored on the roadmap: https://track.hpccsystems.com/browse/HPCC-24449 \\n\\nFor the time being, I believe that the best alternative for your current need would be to have your repositories "HC_DATA_NOIDS" and "HPC_DATA_DEA" defined as separate modules (sub directories) in the ECL IDE repository. Note that you can do this by simply referencing the target repository's parent folder (instead the repository folder itself) when including it as a "ECL folder" under the "Compiler" tab of your ECL IDE "preferences" menu.\\n\\nBy doing this, you will be able to work with IMPORT statements and fully qualification of your exported definitions with minimized risk of naming clashes between the different repositories.\\n\\nHTH,\\nHugo\", \"post_time\": \"2020-08-10 18:37:57\" },\n\t{ \"post_id\": 31743, \"topic_id\": 8323, \"forum_id\": 8, \"post_subject\": \"Re: Reading a Repository Name in a Module/Function\", \"username\": \"RuchiM\", \"post_text\": \"HI Richard,\\n\\nSo I have a Repository called HC_Data_Statistics, I want to run Statistics on different files which are the results of different projects/Repositories (for eg: Here I am using two different Repos i.e. HC_DATA_NOIDS and HC_DATA_DEA). \\n\\nStep 1 :\\nI wrote a Macro to read different files like this . Here Files is an attribute in each of the repository. \\n\\tEXPORT GetCurrentFile(Name) := FUNCTIONMACRO \\n\\t\\tRETURN #EXPAND(Name).Files().Base.new; //Current File \\n\\tENDMACRO;\\n\\nStep 2: The above function macro is being called in GetBase Function which is reading the files. \\n EXPORT GetBase := FUNCTION\\n\\t//----READING NOIDS FILES---------------------------------\\n\\t\\tNOIDSPREVIOUSBASE := GetPreviousFile('HC_DATA_NOIDS');\\n\\t\\tNOIDSCURRENTBASE := GetCurrentFile('HC_DATA_NOIDS');\\n\\t//----READING DEA FILES ----------------------------------\\t\\n\\t\\tDEAPREVIOUSBASE := GetPreviousFile('HC_DATA_DEA');\\n\\t\\tDEACURRENTBASE := GetCurrentFile('HC_DATA_DEA');\\n RETURN MODULE\\n\\t\\tEXPORT NOIDSPDATASET \\t:= NOIDSPREVIOUSBASE;\\n\\t\\tEXPORT NOIDSCDATASET \\t:= NOIDSCURRENTBASE;\\n\\t\\tEXPORT DEAPDATASET \\t:= DEAPREVIOUSBASE;\\n\\t\\tEXPORT DEACDATASET \\t:= DEACURRENTBASE;\\n END; //END OF RETURN MODULE\\n END;//END OF GET BASE FUNCTION\\n\\nStep 3 : In another attribute (Get_Counts) of the Repository HC_DATA_STATISTICS, I want to call GetBase function based on the name of the Repository in which I will insert the Call of Get_Counts. So as you see below, cdataset and pdataset should have values according to the parameter NAME of Get_Counts. This Module will be called in all the Repositories/Builds Code and it should read the files according to what repository it is in. \\n\\nEXPORT Get_Counts(STRING NAME) := MODULE \\n EXPORT cdataset := if(Name = 'Noids',Utils.GetBase.NOIDSCDATASET);\\n EXPORT pdataset :=\\tif(Name = 'Noids', Utils.GetBase.NOIDSPDATASET);\\nEND; //end of Get_Counts Module\\n\\n\\nHope I am able to communicate what I want to do. Thanks for the help..\\n\\nRegards,\\nRuchika\", \"post_time\": \"2020-08-10 13:40:35\" },\n\t{ \"post_id\": 31733, \"topic_id\": 8323, \"forum_id\": 8, \"post_subject\": \"Re: Reading a Repository Name in a Module/Function\", \"username\": \"rtaylor\", \"post_text\": \"RuchiM,\\n\\nExactly what are you trying to accomplish? What do you want to do that requires the name of your repository's directory structure?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-08-10 12:40:48\" },\n\t{ \"post_id\": 31713, \"topic_id\": 8323, \"forum_id\": 8, \"post_subject\": \"Re: Reading a Repository Name in a Module/Function\", \"username\": \"RuchiM\", \"post_text\": \"Hi Hwatanuki,\\n\\nThanks for your reply, ok let me explain my question again.\\nSuppose there is a repository in ECL IDE called Statistics and there is a module called Utilities in it. I want to pass the repo name i.e. Statistics as a parameter to a function. I just need the name of that repo as a parameter or stored in any variable. Is that possible ?\\n\\nI know I can import the repo name and use it in any module of any other repository or any module of the same repository. \\n\\nThanks again for your help.\\n\\nRegards,\\nRuchiM\", \"post_time\": \"2020-08-09 20:34:52\" },\n\t{ \"post_id\": 31703, \"topic_id\": 8323, \"forum_id\": 8, \"post_subject\": \"Re: Reading a Repository Name in a Module/Function\", \"username\": \"hwatanuki\", \"post_text\": \"Hello RuchiM,\\n\\nI am not sure I fully understand the question (maybe you can elaborate a little bit more on what you are trying to achieve), but as long as you have structured your local repository(ies) by containing ECL definition files (.ecl) within modules (brown colored folders in your ECL IDE repository window), then referencing/calling an exported definition from another mapped repository/module would be just a matter of:\\n\\n1) using an IMPORT statement to the source module (no need for the repository name);\\n2) fully qualifying the imported definition whenever it is referenced in the code.\\n\\nHTH,\\nhwatanuki\", \"post_time\": \"2020-08-09 20:28:22\" },\n\t{ \"post_id\": 31693, \"topic_id\": 8323, \"forum_id\": 8, \"post_subject\": \"Reading a Repository Name in a Module/Function\", \"username\": \"RuchiM\", \"post_text\": \"How can I read the repo name in the code ?\", \"post_time\": \"2020-08-07 17:21:48\" },\n\t{ \"post_id\": 31873, \"topic_id\": 8353, \"forum_id\": 8, \"post_subject\": \"Re: assert(queryUnqualifiedType(left->queryType()) ... Faile\", \"username\": \"vzeufack\", \"post_text\": \"Problem solved!\\n\\nI am not sure about what caused the issue but since the error referred to "Types" I went into my code to verify that no weird implicit type casting was occuring. I indeed found some weird type casting uncaught by the compiler.\\n\\nBest regards,\\nVannel\", \"post_time\": \"2020-08-23 03:11:02\" },\n\t{ \"post_id\": 31863, \"topic_id\": 8353, \"forum_id\": 8, \"post_subject\": \"assert(queryUnqualifiedType(left->queryType()) ... Failed\", \"username\": \"vzeufack\", \"post_text\": \"Hi,\\n\\nI am getting the following unusual run time error:\\n\\n
\\nError code: 3000\\n\\nassert(queryUnqualifiedType(left->queryType()) == queryUnqualifiedType(right->queryType())) failed - file: hqlcpp.cpp, line 673\\n
\\n\\nIt is the second time it occurs to me. The first time, it referred to an ITERATE call and this time, it points at a PROJECT. Do you have any idea about the meaning of this error?\\n\\nI have attached the file containing the query which returned the error.\\n\\nBest regards,\\nVannel\", \"post_time\": \"2020-08-21 19:33:37\" },\n\t{ \"post_id\": 32603, \"topic_id\": 8503, \"forum_id\": 8, \"post_subject\": \"Re: Query regarding use of MAXCOUNT.\", \"username\": \"rtaylor\", \"post_text\": \"akhileshbadhri,\\n\\nI've played around with your code and get the same results, no matter what changes I make. \\n\\nYou should submit a JIRA ticket (https://track.hpccsystems.com) to report this to the developers so they can determine whether it's a bug or a "feature" .\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-12-02 19:51:58\" },\n\t{ \"post_id\": 32593, \"topic_id\": 8503, \"forum_id\": 8, \"post_subject\": \"Query regarding use of MAXCOUNT.\", \"username\": \"akhileshbadhri\", \"post_text\": \"Hello Everyone,\\n\\nIn the following example 1, I am using a maxcount of "1". So when I run the code of example 1, it fails saying "Error: Too many rows assigned to field childrens". I expected this behaviour.\\n\\nEXAMPLE 1\\n\\n
childPersonRecord := {STRING fname,UNSIGNED1 age};\\n\\npersonRecord := RECORD\\n STRING20 fname;\\n STRING20 lname;\\n UNSIGNED2 numChildren;\\n DATASET(childPersonRecord) childrens{xpath('childrens/children'),MAXCOUNT(1)};\\nEND;\\n\\n\\npersonDataset := DATASET([{'Kevin','Hall',2,[{'Abby',2},{'Nat',2}]},\\n {'Jon','Simms',3,[{'Jen',18},{'Ali',16},{'Andy',13}]}],\\n personRecord);\\npersonDataset;
\\n\\n\\nIn the example 2 below, I am using maxcount "1" and my input data is XML. When I run the code in example 2, it runs successfully. I was excepting example 2 to fail as I am passing more than 1 child dataset here.\\n\\nEXAMPLE 2\\n\\nroxierequest \\t:= '<Row><CommonProcessParameters><OrderReceivedTimestamp>2020-08-31 20:44:34</OrderReceivedTimestamp><Amplified>N</Amplified><SearchPeriods><SearchPeriod><Product>XXX</Product><Period>60</Period></SearchPeriod><SearchPeriod><Product>AAA</Product><Period>60</Period></SearchPeriod><SearchPeriod><Product>ZZZ</Product><Period>60</Period></SearchPeriod></SearchPeriods></CommonProcessParameters></Row>';\\n\\nt_InformXMLSearchPeriod := record\\n\\tstring3 Product {xpath('Product')};\\n\\tstring3 Period {xpath('Period')};\\nend;\\n\\nt_InformXMLCommonProcessParameters := record\\n\\tstring7 OrderReceivedTimestamp {xpath('OrderReceivedTimestamp')};\\n\\tstring1 Amplified {xpath('Amplified')};\\n\\tstring1 TypeMVR {xpath('TypeMVR')};\\n\\tstring1 SecurityFreeze {xpath('SecurityFreeze')};\\n\\tdataset(t_InformXMLSearchPeriod) SearchPeriods {xpath('SearchPeriods/SearchPeriod'), MAXCOUNT(1)};\\nend;\\n\\nt_InformXMLRequest := record\\n t_InformXMLCommonProcessParameters CommonProcessParameters {xpath('CommonProcessParameters')};\\nend;\\n\\nrequests\\t\\t\\t:= dataset(fromxml(t_InformXMLRequest,roxierequest));\\nproject(requests,transform(t_InformXMLRequest,self := left;));\\n
\\nRequest your assistance in understanding the behaviour here. I am doing something wrong here ?\\n\\nThanks and regards,\\nAkhilesh Badhri.\", \"post_time\": \"2020-11-27 11:09:18\" },\n\t{ \"post_id\": 32873, \"topic_id\": 8573, \"forum_id\": 8, \"post_subject\": \"Re: Std.File,Copy ForcePush := TRUE\", \"username\": \"jsmith\", \"post_text\": \"Pushing logical files to another environment isn't supported.\\nTo copy logical files between environments, the copy commands must be run on the target environment, and point to the source Dali where the source logical file resides.\\n\\nThe 'forcePush' is not related to logical file copies between environment, but rather how physical files are copied/sprayed/desprayed within an environment.\\nIt's really an implementation detail, and should not be used. It determines (within an environment) whether the processes involved in a copy, are run on the source nodes, or the target nodes, and therefore whether they are reading or writing(pushing).\\n\\nHope that helps.\", \"post_time\": \"2021-01-15 12:24:55\" },\n\t{ \"post_id\": 32863, \"topic_id\": 8573, \"forum_id\": 8, \"post_subject\": \"Re: Std.File,Copy ForcePush := TRUE\", \"username\": \"rtaylor\", \"post_text\": \"Janet,\\n\\nI spoke with the developer now responsible for this function, who said that this is either a bug or has never worked. \\n\\nPlease report this in JIRA, so you can follow the progress.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2021-01-14 19:13:19\" },\n\t{ \"post_id\": 32853, \"topic_id\": 8573, \"forum_id\": 8, \"post_subject\": \"Std.File,Copy ForcePush := TRUE\", \"username\": \"janet.anderson\", \"post_text\": \"I am trying to use Std.File.Copy to copy a file across different environments. I want to use ForcePush := TRUE, as opposed to pulling. I can't find an example of this that shows how the other parameters should change (like does sourceDali become targetDali?) and my tests have failed. Can someone provide an example or explanation, please?\", \"post_time\": \"2021-01-13 14:32:45\" },\n\t{ \"post_id\": 32903, \"topic_id\": 8583, \"forum_id\": 8, \"post_subject\": \"Re: OUTPUT module as a dataset\", \"username\": \"flyer\", \"post_text\": \"Hi Richard,\\n\\nThat is what I thought - to your short answer \\n\\nIn any case, thank you so much for sharing the way you would approach the problem. It is always nice to be exposed to others' ways of thinking.\", \"post_time\": \"2021-02-04 03:30:12\" },\n\t{ \"post_id\": 32893, \"topic_id\": 8583, \"forum_id\": 8, \"post_subject\": \"Re: OUTPUT module as a dataset\", \"username\": \"rtaylor\", \"post_text\": \"flyer,
Is there an easy way to do this without having to define a new layout and projecting everything from the module into a dataset?
Short answer: None that I know of (and I tried a couple of ideas before replying). \\n\\nIn fact, I'm surprised that your OUTPUT(myModule) code works at all. I think you're taking advantage of an undocumented "feature" with that one. \\n\\nSo here's how I would approach the problem:\\n
UNSIGNED firstSettingIn := 2;\\n\\nMY_LIBIN := INTERFACE\\n EXPORT UNSIGNED1 firstSetting := 1;\\n EXPORT UNSIGNED1 secondSetting := 5;\\n EXPORT BOOLEAN isInternalProduct := TRUE;\\nEND;\\n\\n\\nmyModule := MODULE (MY_LIBIN)\\n EXPORT UNSIGNED1 firstSetting := firstSettingIn;\\nEND;\\n \\n//my suggested solution: \\nmodToDataset(MY_LIBIN i) := FUNCTION\\n MY_LIBOUT := RECORD\\n i.firstSetting;\\n i.secondSetting;\\n i.isInternalProduct;\\n END;\\n ds := DATASET(1,TRANSFORM(MY_LIBOUT,SELF := []));\\n RETURN TABLE(ds,MY_LIBOUT);\\nEND;\\n\\nOUTPUT(modToDataset(myModule), NAMED('modToDataset'));
This is a FUNCTION that takes a parameter of your initial INTERFACE type. The MY_LIBOUT RECORD structure is designed for use by the TABLE function, and the DATASET is just to create a 1-record dataset for the TABLE to operate against, producing the values that are passed in by whatever MODULE instance you pass that implements your INTERFACE. For each INTERFACE you create, you just need one of these FUNCTIONs and it will operate against whatever MODULE instance you pass each time.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2021-02-03 18:55:34\" },\n\t{ \"post_id\": 32883, \"topic_id\": 8583, \"forum_id\": 8, \"post_subject\": \"OUTPUT module as a dataset\", \"username\": \"flyer\", \"post_text\": \"Hi,\\n\\nI am trying to understand a case where a module could easily be converted to a dataset. The reason why I am trying to do this is that some modules I am working with have 50+ fields, and I would like to be able to have a single output as opposed to 50+ in the ECL IDE or out on the ECL Watch page. \\n\\nFollowing is a piece of code I used to test what I am trying to accomplish:\\n\\n\\nUNSIGNED firstSettingIn := 2;\\n\\nMY_LIBIN := INTERFACE\\n EXPORT UNSIGNED1 firstSetting := 1;\\n EXPORT UNSIGNED1 secondSetting := 5;\\n EXPORT BOOLEAN isInternalProduct := TRUE;\\nEND;\\n\\n\\nmyModule := MODULE (MY_LIBIN) \\n EXPORT UNSIGNED1 firstSetting := firstSettingIn;\\nEND;\\n \\n \\nOUTPUT(myModule);\\n
\\nOUTPUT(myModule) produces three outputs - one for each variable, which is kind of undesired. See attachment (outputs).\\n\\nIs there an easy way to do this without having to define a new layout and projecting everything from the module into a dataset? I thought that doing something like the following would work, but it appears that it doesn't:\\n\\n\\nmodToDataset := DATASET([TRANSFORM(MY_LIBIN,\\n SELF.firstSetting := myModule.firstSetting;\\n SELF := [];)\\n ]);\\n \\nOUTPUT(modToDataset, NAMED('modToDataset'));\\n
\\nI get an error that is exactly at the end of MY_LIBIN, which makes sense and is kind of expected as MY_LIBIN is not a real layout. \\n\\nAny other insights on how this could easily be achieved would be appreciated.\\n\\n\\nThanks.\", \"post_time\": \"2021-02-03 00:44:38\" },\n\t{ \"post_id\": 33023, \"topic_id\": 8593, \"forum_id\": 8, \"post_subject\": \"Re: Questions about PackageMaps & Roxie 7.6.52-1\", \"username\": \"daviddasher\", \"post_text\": \"Hi Tony\\n\\nThis statement from my previous message appears to be false, just after I posted all three boxes were still copying the previous hours data and none of them accepted the new package map and therefore I had to wait until the following hour for them to accept the next package map.\\n\\nOf the 3 roxies I'm testing with two of them have accepted the next package and are loading the next set which is great
\\n\\nThanks\\n\\nDavid\", \"post_time\": \"2021-02-10 14:59:32\" },\n\t{ \"post_id\": 33013, \"topic_id\": 8593, \"forum_id\": 8, \"post_subject\": \"Re: Questions about PackageMaps & Roxie 7.6.52-1\", \"username\": \"daviddasher\", \"post_text\": \"Hi Tony\\n\\nWe are setting Overwrite and Activate package. I have pasted the code below.\\n\\nRegarding the packages we have two variants inside the one package, one where indexes are only required to be built once a day and the other is the hourly data. I made a change last night where I moved 5 indexes into our daily build which would save 16gb being copied each hour. Of the 3 roxies I'm testing with two of them have accepted the next package and are loading the next set which is great (So hopefully a lot less data has helped), however, one remaining roxie started copying the indexes and suddenly stopped. In the logs for that machine I'm seeing this (Show in attachment and I have no idea why all lines are repeated), what is strange is the other roxies are reading from that Thor node without issue. The. roxie in question is showing as running, however, it won't start copying data again until I run\\n\\nsudo service hpcc-init -c myroxie restart\\n\\n\\nRequestLayout := RECORD\\n STRING packageMapData {XPATH('Info')};\\n BOOLEAN overwritePackage {XPATH('OverWrite')};\\n BOOLEAN activatePackage {XPATH('Activate')};\\n STRING targetCluster {XPATH('Target')};\\n STRING packageMapID {XPATH('PackageMap')};\\n\\t\\tSTRING\\t\\t\\tProcess\\t\\t\\t\\t\\t\\t\\t\\t\\t{XPATH('Process')};\\n\\t\\tSTRING\\t\\t\\tDaliIp\\t\\t\\t\\t\\t\\t\\t\\t\\t{XPATH('DaliIp')};\\nEND;\\n\\nrequest := DATASET\\n (\\n [\\n {\\n completePackage,\\n TRUE,\\n TRUE,\\n Roxie_Clustername, // roxie clustername.\\n Package_Name,\\n\\t\\t\\t\\t\\t\\t\\t\\t'*',\\n\\t\\t\\t\\t\\t\\t\\t\\tDali_IP\\n }\\n ],\\n RequestLayout\\n );\\n\\nResponseLayout := RECORD\\n STRING code {XPATH('Code')};\\n STRING description {XPATH('Description')};\\nEND;\\n\\nDeployPackage := SOAPCALL\\n (\\n request,\\n RoxieUrl_WsPackageProcess,\\n 'AddPackage',\\n RequestLayout,\\n TRANSFORM(LEFT),\\n DATASET(ResponseLayout),\\n XPATH('AddPackageResponse/status')\\n );\\n\\nsequential(output(LoadDate), output(DailyLoadDate), output(DeployPackage));\\n
\", \"post_time\": \"2021-02-10 11:47:12\" },\n\t{ \"post_id\": 33003, \"topic_id\": 8593, \"forum_id\": 8, \"post_subject\": \"Re: Questions about PackageMaps & Roxie 7.6.52-1\", \"username\": \"anthony.fishbeck\", \"post_text\": \"David,\\nWhen you call AddPackage for the new package you should be able to set an "Activate" flag. When the new package is activated it should automatically deactivate the old one. Deactivating is not the same a deleting and you eventually have to clean up those old deactivated packagemaps.\\n\\nCan you show what the request being sent to AddPackage looks like?\\n\\nAlso, can you describe the content of your packages? For example are you adding additional files to what was in the previous package, or completely replacing what was there?\\n\\nTony\", \"post_time\": \"2021-02-09 18:09:28\" },\n\t{ \"post_id\": 32993, \"topic_id\": 8593, \"forum_id\": 8, \"post_subject\": \"Re: Questions about PackageMaps & Roxie 7.6.52-1\", \"username\": \"daviddasher\", \"post_text\": \"Thanks Richard, it's appreciated.\\n\\nDavid\", \"post_time\": \"2021-02-09 14:51:25\" },\n\t{ \"post_id\": 32983, \"topic_id\": 8593, \"forum_id\": 8, \"post_subject\": \"Re: Questions about PackageMaps & Roxie 7.6.52-1\", \"username\": \"rtaylor\", \"post_text\": \"David,Have you come across issues with frequent deploy's to a roxie?
Not that I am aware of, I was just offering a possible alternative solution. Tony is the expert in this area. \\n\\nRichard\", \"post_time\": \"2021-02-09 14:45:46\" },\n\t{ \"post_id\": 32973, \"topic_id\": 8593, \"forum_id\": 8, \"post_subject\": \"Re: Questions about PackageMaps & Roxie 7.6.52-1\", \"username\": \"daviddasher\", \"post_text\": \"Hi Richard\\n\\nOur firewall does not have the option of dynamically configuring rules unfortunately. We have 6 roxies in this group attached to our end point. I appreciate we might have to invest in a newer firewall but it's a big job for our team and we have been hit with budget freezes as you can imagine. \\n\\nI was hoping we could find a solution with the deploy and deactivating the current package map.\\n\\nHave you come across issues with frequent deploy's to a roxie?\\n\\nThanks\\n\\nDavid\", \"post_time\": \"2021-02-09 14:40:12\" },\n\t{ \"post_id\": 32963, \"topic_id\": 8593, \"forum_id\": 8, \"post_subject\": \"Re: Questions about PackageMaps & Roxie 7.6.52-1\", \"username\": \"rtaylor\", \"post_text\": \"David,
We are redeploying a new Package map each hour but the index copy 9 times out of 10 will take longer than the hour so the new package will not be accepted and generally waits for the next one.
Have you considered having online and offline ROXIEs and just switching the load balancer between the two. That way you could update the offline ROXIE then switch the load balancer's target when the update is complete.\\n\\nJAT,\\n\\nRichard\", \"post_time\": \"2021-02-09 14:23:23\" },\n\t{ \"post_id\": 32953, \"topic_id\": 8593, \"forum_id\": 8, \"post_subject\": \"Re: Questions about PackageMaps & Roxie 7.6.52-1\", \"username\": \"daviddasher\", \"post_text\": \"Hi Tony\\n\\nI have my hardware team checking for bottlenecks on the problems discussed, however I have a question about my first problem if that's ok.\\n\\nWe are redeploying a new Package map each hour but the index copy 9 times out of 10 will take longer than the hour so the new package will not be accepted and generally waits for the next one. I have tried manually deactivating the package map and deploying the new one which is great and works, however, all queries become suspended from the time I deactivate to the new one being activated. I assume if I deactivated and activated in a split second it shouldn't be an issue. \\n\\nCan you please give me any pointers in how I should deactivate the old package? Should it be in the same work unit that deploys the new package?\\n\\n\\n\\nDeployPackage := SOAPCALL\\n (\\n request,\\n RoxieUrl_WsPackageProcess,\\n 'AddPackage',\\n RequestLayout,\\n TRANSFORM(LEFT),\\n DATASET(ResponseLayout),\\n XPATH('AddPackageResponse/status')\\n )\\n\\n// Call deactivate package ??\\nsequential(output(LoadDate), output(DailyLoadDate), output(DeployPackage));\\n
\\n\\nAlso, at least once a day the roxie will accept the new package (Which I can see on eco watch), however, it will not start copying data from the Dali. In the logs I can't even see it doing anything. I don't even see \\tpackagemaproxie::globex.pkg,activetrue. So it just sits there until I restart the myroxieservice. Obviously this isn't great for a production environment. So I'm just seeing if you've heard of this before.\\n\\nThanks\\n\\nDavid\", \"post_time\": \"2021-02-09 14:16:42\" },\n\t{ \"post_id\": 32943, \"topic_id\": 8593, \"forum_id\": 8, \"post_subject\": \"Re: Questions about PackageMaps & Roxie 7.6.52-1\", \"username\": \"anthony.fishbeck\", \"post_text\": \"Roxie wasn't able to connect to dafilserv on that node (192.168.24.134). Either hardware/networking, or dafileserv is down. If you can't track it down let me know.\\n\\nRegards,\\nTony\", \"post_time\": \"2021-02-05 19:04:26\" },\n\t{ \"post_id\": 32933, \"topic_id\": 8593, \"forum_id\": 8, \"post_subject\": \"Re: Questions about PackageMaps & Roxie 7.6.52-1\", \"username\": \"daviddasher\", \"post_text\": \"Thanks for the prompt response Tony.\\n\\nI can sort (1), that's great.\\n\\nRegarding (2,3) we have that setting set to true, I'll check with my hardware team and make sure we don't have any bottlenecks, we did some comprehensive speed tests when we originally setup the cluster and we certainly didn't see anything that should cause this. Are there any diagnostics or other things to look for within HPCC or does this become purely a hardware issue?\\n\\nHave a great weekend.\\n\\nDavid\", \"post_time\": \"2021-02-05 18:40:26\" },\n\t{ \"post_id\": 32923, \"topic_id\": 8593, \"forum_id\": 8, \"post_subject\": \"Re: Questions about PackageMaps & Roxie 7.6.52-1\", \"username\": \"anthony.fishbeck\", \"post_text\": \"Hi David,\\n\\n1. Roxie wouldn't stop loading the old packagemap unless you explicitly unload it. That get's a bit tricky because it continues to use prior packagemaps until the new one is loaded. Most likeley best to remove old after new one has loaded.\\n\\n2 and 3: Using the default and most common configuration of roxie (useRemoteResources="true") roxie would make use of the remote file until the local copy of the file was copied. Thus the query should work even before the file is copied. But the error you see in #3 seems to indicate that roxie is having problems connecting to the location of the original file. 192.168.24.134:7100.\\n\\nRegards,\\nTony\", \"post_time\": \"2021-02-05 18:29:49\" },\n\t{ \"post_id\": 32913, \"topic_id\": 8593, \"forum_id\": 8, \"post_subject\": \"Questions about PackageMaps & Roxie 7.6.52-1\", \"username\": \"daviddasher\", \"post_text\": \"Hi\\n\\nI have a couple of questions about PackageMaps and Some errors in the Roxie Logs.\\n\\n1: We have to deploy our packages each hour and sometimes the copy is still going when the new package arrives. Will Roxie stop with the old package and just defer to the new?\\n\\n2: Sometimes we are receiving exceptions like below, I assume this is because all the index parts have not copied over. Is there anyway to protect the query until the package has copied?\\n\\ndashboardgenericreportingconfigoutput Response\\nException\\nReported by: Roxie\\nMessage: Query dashboardgenericreportingconfigoutput.1 is suspended because Could not open file /var/lib/HPCCSystems/hpcc-data/roxie/globex/key_multiid_nextgenreports_reportnames_202102051400._53_of_145 (in Index Read 5)\\n\\n3: We are seeing this message in the Roxie logs during the file copy process. I can't see any posts relating to it. Can anyone please advise?\\n\\n:Whilecreatingremotefilereference:Failedtoconnect(hostmarkeddown)todafilesrv/daliservixon192.168.24.134:7100"\\n\\nMany thanks\\n\\nDavid\", \"post_time\": \"2021-02-05 16:13:20\" },\n\t{ \"post_id\": 33383, \"topic_id\": 8603, \"forum_id\": 8, \"post_subject\": \"Re: Deploying Indexes via Roxie Package\", \"username\": \"daviddasher\", \"post_text\": \"Hi Bob\\n\\nNo worries at all.\\n\\nIt turns out we had some issues with a firewall which would terminate the connection between Dali and roxie after an hour. Initially we created a new set of roxies in the same subnet which eliminated the issue and then tracked it back to the firewall rule on the original roxies.\\n\\nI do need to report via tracker so I'll chase our firewall team on all the details.\\n\\nThanks for checking and I hope you are well.\\n\\nThanks\\n\\nDavid\", \"post_time\": \"2021-04-20 16:21:34\" },\n\t{ \"post_id\": 33363, \"topic_id\": 8603, \"forum_id\": 8, \"post_subject\": \"Re: Deploying Indexes via Roxie Package\", \"username\": \"bforeman\", \"post_text\": \"Hi David,\\n\\nSorry for the delay in reply! Did anyone reach out to yet with a resolution?\\nIf you haven't already done so, this looks like something that needs to be reported to our Issue Tracker.\\n\\nhttps://track.hpccsystems.com/secure/Dashboard.jspa\\n\\nThank you!\\n\\nBob\", \"post_time\": \"2021-04-19 16:32:21\" },\n\t{ \"post_id\": 33033, \"topic_id\": 8603, \"forum_id\": 8, \"post_subject\": \"Deploying Indexes via Roxie Package\", \"username\": \"daviddasher\", \"post_text\": \"Hi\\n\\nI'm having several issues when deploying packagemaps to my roxies \\n\\nProblem 1\\n\\nWhen deploying each hour throughout the day I sometimes get the this message on a roxie Exception\\nReported by: Roxie\\nMessage: Query roxiewarmup.2 is suspended because Could not open file /var/lib/HPCCSystems/hpcc-data/roxie/globex/key_multiuserid_multiusersandmultipremium_202102161000._37_of_37
. When I look at the logs also below you can see the "No more data files to copy message", however, the data usually takes about 40 minutes to copy to the roxies and the no more data to copy appears after about 15 minutes. The files in question appear in my Thor cluster and are accessable. Once I receive this message I have to sudo service hpcc-init -c myroxie restart
to get the files copying again.\\n\\n\\n0000599D PRG 2021-02-16 11:05:21.822 2846 2853 "Background copying //192.168.24.124:7100/var/lib/HPCCSystems/hpcc-data/thor/globex/key_airesourcescontentngrams_202102161000._4_of_145 to /var/lib/HPCCSystems/hpcc-data/roxie/globex/key_airesourcescontentngrams_202102161000._4_of_145"\\n0000599E PRG 2021-02-16 11:05:22.307 2846 2853 "Background copy to /var/lib/HPCCSystems/hpcc-data/roxie/globex/key_airesourcescontentngrams_202102161000._4_of_145 complete in 485 ms (32.7 MB/sec)"\\n0000599F PRG 2021-02-16 11:05:22.412 2846 2853 "Background copying //192.168.24.123:7100/var/lib/HPCCSystems/hpcc-data/thor/globex/key_airesourcescontentngrams_202102161000._3_of_145 to /var/lib/HPCCSystems/hpcc-data/roxie/globex/key_airesourcescontentngrams_202102161000._3_of_145"\\n000059A0 PRG 2021-02-16 11:05:22.688 2846 2853 "Background copy to /var/lib/HPCCSystems/hpcc-data/roxie/globex/key_airesourcescontentngrams_202102161000._3_of_145 complete in 276 ms (51.4 MB/sec)"\\n000059A1 PRG 2021-02-16 11:05:22.795 2846 2853 "Background copying //192.168.24.122:7100/var/lib/HPCCSystems/hpcc-data/thor/globex/key_airesourcescontentngrams_202102161000._2_of_145 to /var/lib/HPCCSystems/hpcc-data/roxie/globex/key_airesourcescontentngrams_202102161000._2_of_145"\\n000059A2 PRG 2021-02-16 11:05:23.436 2846 2853 "Background copy to /var/lib/HPCCSystems/hpcc-data/roxie/globex/key_airesourcescontentngrams_202102161000._2_of_145 complete in 642 ms (34.2 MB/sec)"\\n000059A3 PRG 2021-02-16 11:05:23.538 2846 2853 "Background copying //192.168.24.121:7100/var/lib/HPCCSystems/hpcc-data/thor/globex/key_airesourcescontentngrams_202102161000._1_of_145 to /var/lib/HPCCSystems/hpcc-data/roxie/globex/key_airesourcescontentngrams_202102161000._1_of_145"\\n000059A4 PRG 2021-02-16 11:05:24.277 2846 2853 "Background copy to /var/lib/HPCCSystems/hpcc-data/roxie/globex/key_airesourcescontentngrams_202102161000._1_of_145 complete in 739 ms (31.7 MB/sec)"\\n000059A5 PRG 2021-02-16 11:05:24.328 2846 2853 "No more data files to copy"\\n000059A6 PRG 2021-02-16 11:05:32.803 2846 2852 "SYS: LPT=15862 APT=316692 PU= 2% MU= 10% MAL=2362552320 MMP=2048638976 SBK=313913344 TOT=2311136K RAM=5312660K SWP=2528K RMU= 1% RMX=1023M"\\n000059A7 PRG 2021-02-16 11:05:32.804 2846 2852 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=164.3 kw/s=23817.0 bsy=54 NIC: [bond0] rxp/s=17978.0 rxk/s=25549.4 txp/s=1578.3 txk/s=110.4 rxerrs=0 rxdrps=166 txerrs=0 txdrps=0 CPU: usr=0 sys=1 iow=1 idle=97"\\n000059A8 PRG 2021-02-16 11:05:44.953 2846 8078 "PING: 1 replies received, average delay 781us"\\n000059A9 PRG 2021-02-16 11:06:32.825 2846 2852 "SYS: LPT=15862 APT=316692 PU= 0% MU= 10% MAL=2362552320 MMP=2048638976 SBK=313913344 TOT=2311136K RAM=5312004K SWP=2528K RMU= 1% RMX=1023M"\\n000059AA PRG 2021-02-16 11:06:32.826 2846 2852 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.8 kw/s=8.9 bsy=0 NIC: [bond0] rxp/s=13.2 rxk/s=4.1 txp/s=1.7 txk/s=0.7 rxerrs=0 rxdrps=162 txerrs=0 txdrps=0 CPU: usr=0 sys=0 iow=0 idle=99"\\n000059AB PRG 2021-02-16 11:06:44.954 2846 8078 "PING: 1 replies received, average delay 236us"\\n000059AC PRG 2021-02-16 11:07:32.847 2846 2852 "SYS: LPT=15862 APT=316692 PU= 0% MU= 10% MAL=2362552320 MMP=2048638976 SBK=313913344 TOT=2311136K RAM=5314436K SWP=2528K RMU= 1% RMX=1023M"\\n000059AD PRG 2021-02-16 11:07:32.847 2846 2852 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=0.4 kw/s=2.7 bsy=0 NIC: [bond0] rxp/s=16.0 rxk/s=4.2 txp/s=2.1 txk/s=0.7 rxerrs=0 rxdrps=161 txerrs=0 txdrps=0 CPU: usr=0 sys=0 iow=0 idle=99"\\n000059AE PRG 2021-02-16 11:07:44.954 2846 8078 "PING: 1 replies received, average delay 235us"\\n000059AF PRG 2021-02-16 11:08:32.866 2846 2852 "SYS: LPT=15862 APT=316692 PU= 0% MU= 10% MAL=2362552320 MMP=2048638976 SBK=313913344 TOT=2311136K RAM=5314436K SWP=2528K RMU= 1% RMX=1023M"\\n000059B0 PRG 2021-02-16 11:08:32.867 2846 2852 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=5.1 kw/s=28.0 bsy=1 NIC: [bond0] rxp/s=28.2 rxk/s=6.2 txp/s=12.8 txk/s=5.9 rxerrs=0 rxdrps=161 txerrs=0 txdrps=0 CPU: usr=0 sys=0 iow=0 idle=99"\\n000059B1 PRG 2021-02-16 11:08:36.234 2846 9084 "[192.168.20.25:9876{2}] FAILED: "\\n000059B2 PRG 2021-02-16 11:08:36.234 2846 9084 "[192.168.20.25:9876{2}] EXCEPTION: Query roxiewarmup.2 is suspended because Could not open file /var/lib/HPCCSystems/hpcc-data/roxie/globex/key_multiuserid_multiusersandmultipremium_202102161000._37_of_37"\\n000059B3 PRG 2021-02-16 11:08:44.954 2846 8078 "PING: 1 replies received, average delay 160us"\\n000059B4 PRG 2021-02-16 11:09:32.889 2846 2852 "SYS: LPT=15862 APT=316692 PU= 0% MU= 10% MAL=2362552320 MMP=2048638976 SBK=313913344 TOT=2311136K RAM=5254920K SWP=2528K RMU= 1% RMX=1023M"\\n000059B5 PRG 2021-02-16 11:09:32.889 2846 2852 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=0.8 kw/s=6.9 bsy=0 NIC: [bond0] rxp/s=17.4 rxk/s=4.9 txp/s=5.8 txk/s=2.3 rxerrs=0 rxdrps=160 txerrs=0 txdrps=0 CPU: usr=0 sys=0 iow=0 idle=99"\\n000059B6 PRG 2021-02-16 11:09:44.955 2846 8078 "PING: 1 replies received, average delay 256us"\\n000059B7 PRG 2021-02-16 11:10:32.910 2846 2852 "SYS: LPT=15862 APT=316692 PU= 0% MU= 10% MAL=2362552320 MMP=2048638976 SBK=313913344 TOT=2311136K RAM=5255156K SWP=2528K RMU= 1% RMX=1023M"\\n000059B8 PRG 2021-02-16 11:10:32.910 2846 2852 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=0.5 kw/s=3.1 bsy=0 NIC: [bond0] rxp/s=12.2 rxk/s=4.0 txp/s=1.4 txk/s=0.6 rxerrs=0 rxdrps=162 txerrs=0 txdrps=0 CPU: usr=0 sys=0 iow=0 idle=99"\\n000059B9 PRG 2021-02-16 11:10:40.757 2846 9084 "connectChild connecting to 192.168.20.25:9876"\\n000059BA PRG 2021-02-16 11:10:40.757 2846 9084 "connectChild connected to 192.168.20.25:9876"\\n000059BB PRG 2021-02-16 11:10:40.758 2846 23600 "[192.168.20.25:9876{4}] doControlMessage - control:state"\\n000059BC PRG 2021-02-16 11:10:44.955 2846 8078 "PING: 1 replies received, average delay 232us"\\n000059BD PRG 2021-02-16 11:11:32.931 2846 2852 "SYS: LPT=15862 APT=316692 PU= 0% MU= 10% MAL=2362552320 MMP=2048638976 SBK=313913344 TOT=2311136K RAM=5258284K SWP=2528K RMU= 1% RMX=1023M"\\n000059BE PRG 2021-02-16 11:11:32.931 2846 2852 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=2.3 kw/s=13.9 bsy=0 NIC: [bond0] rxp/s=13.0 rxk/s=4.0 txp/s=1.2 txk/s=0.3 rxerrs=0 rxdrps=167 txerrs=0 txdrps=0 CPU: usr=0 sys=0 iow=0 idle=99"\\n000059BF PRG 2021-02-16 11:11:44.956 2846 8078 "PING: 1 replies received, average delay 246us"\\n000059C0 PRG 2021-02-16 11:11:47.464 2846 9084 "[192.168.20.25:9876{5}] doControlMessage - control:queries"\\n000059C1 PRG 2021-02-16 11:12:27.734 2846 9084 "RoxieMemMgr: Heap size 4096 pages, 4095 free, largest block 4095, heapLWM 0, heapHWM 128, dataBuffersActive=0, dataBufferPages=0"\\n000059C2 PRG 2021-02-16 11:12:32.952 2846 2852 "SYS: LPT=15862 APT=316692 PU= 0% MU= 10% MAL=2363887616 MMP=2049974272 SBK=313913344 TOT=2312440K RAM=5258076K SWP=2528K RMU= 1% RMX=1023M"\\n000059C3 PRG 2021-02-16 11:12:32.953 2846 2852 "DSK: [sda] r/s=1.1 kr/s=11.1 w/s=0.4 kw/s=3.7 bsy=0 NIC: [bond0] rxp/s=15.2 rxk/s=4.3 txp/s=1.9 txk/s=0.8 rxerrs=0 rxdrps=168 txerrs=0 txdrps=0 CPU: usr=0 sys=0 iow=0 idle=99"\\n000059C4 PRG 2021-02-16 11:12:44.956 2846 8078 "PING: 1 replies received, average delay 265us"\\n000059C5 PRG 2021-02-16 11:13:32.974 2846 2852 "SYS: LPT=15862 APT=316692 PU= 0% MU= 10% MAL=2363887616 MMP=2049974272 SBK=313913344 TOT=2312440K RAM=5258880K SWP=2528K RMU= 1% RMX=1023M"\\n000059C6 PRG 2021-02-16 11:13:32.975 2846 2852 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=0.7 kw/s=4.7 bsy=0 NIC: [bond0] rxp/s=13.4 rxk/s=4.0 txp/s=0.9 txk/s=0.2 rxerrs=0 rxdrps=162 txerrs=0 txdrps=0 CPU: usr=0 sys=0 iow=0 idle=99"\\n000059C7 PRG 2021-02-16 11:13:44.957 2846 8078 "PING: 1 replies received, average delay 217us"
\\n\\n\\nProblem 2\\n\\nOur second issue is when deploying our Roxie Package to 3 roxies one in every 3 deploys fails and the roxies fail to accept the soap request to replace the current package.\\n\\nWe managed to get some information from our logs 0000C6D4 PRG 2021-02-15 07:49:02.701 41665 42734 "MP: Possible clash between 192.168.24.120:7070->192.168.20.25:7339 0(0)" 0000DA3D PRG 2021-02-15 10:50:26.156 41665 42734 "MP: Possible clash between 192.168.24.120:7070->192.168.20.26:7166 0(0)" 0000D4A2 PRG 2021-02-15 10:49:09.333 41665 42734 "MP: Possible clash between 192.168.24.120:7070->192.168.20.27:7475 0(0)" 0000C5F3 PRG 2021-02-15 06:50:23.516 41665 42734 "MP: Possible clash between 192.168.24.120:7070->192.168.20.26:7156 0(0)" 0000C5F4 PRG 2021-02-15 06:50:23.516 41665 42734 "Message Passing - removing stale socket to 192.168.20.26:7156"
\\n\\nIf I clone the failed job to force the package in I start getting the issues in Problem 1.\\n\\nCan anyone please shed any light or push us in the right direction?\\n\\nWe are using version 7.8.46-1, however we are upgrading to 7.12.24.\\n\\nThanks\\n\\nDavid\", \"post_time\": \"2021-02-16 11:43:35\" },\n\t{ \"post_id\": 33233, \"topic_id\": 8663, \"forum_id\": 8, \"post_subject\": \"Re: Using WsECL\", \"username\": \"anthony.fishbeck\", \"post_text\": \"If you use the stored type of BOOLEAN then it results in something similar to a check box.\\nBOOLEAN myoption := false : STORED('myoption');\\n
\\n\\nNormal HTML checkboxes don't allow for using the default value for the query... so WsECL generates a tri state input providing choices of "true/false/default".\\n\\nIf you really want you can completely provide your own html form that calls back into the query. The html and other resources are added by using a manifest file with the same name as your ECL file. Here's a really old example:\\n\\nhttps://github.com/hpcc-systems/ecl-sam ... olumnChart\", \"post_time\": \"2021-03-23 20:18:48\" },\n\t{ \"post_id\": 33223, \"topic_id\": 8663, \"forum_id\": 8, \"post_subject\": \"Using WsECL\", \"username\": \"khaldrogo\", \"post_text\": \"Hello I’m working on putting the finishing touches on a group project and I have a question about using WsECL. How would I add checkboxes to the WsECL form? I know that using the following code produces a dropdown menu:\\n\\nSTRING3 FruitSelection := 'Fruit' :STORED('Fruit', FORMAT(SELECT('*ALL=ALL,Apple=Apple,Orange=Orange,Banana=Banana')));\\n\\nBut what would I do if I want to create checkboxes instead?\", \"post_time\": \"2021-03-23 03:18:18\" },\n\t{ \"post_id\": 34343, \"topic_id\": 8693, \"forum_id\": 8, \"post_subject\": \"Re: iterate over list of fields and apply same macro logic\", \"username\": \"rtaylor\", \"post_text\": \"Jim,\\n\\nGood alternative!!\\n\\nOne minor correction, this line: strFixed := STD.Str.FindReplace( strRec, '>-1<', '');
needs to change to this: strFixed := STD.Str.FindReplace( strRec, '>-1<', '><');
otherwise your XML becomes malformed and the FROMXML() won't work. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2021-11-15 11:22:31\" },\n\t{ \"post_id\": 34333, \"topic_id\": 8693, \"forum_id\": 8, \"post_subject\": \"Re: iterate over list of fields and apply same macro logic\", \"username\": \"jwilt\", \"post_text\": \"An alternate approach that *may* (or may not) be more clear to some developers... which does similar string-handling (under the hood), I suspect, but uses TOXML() and FROMXML() to string-ify rows for brute-force string replacement:\\n\\n
lRando := {STRING f1; STRING f2; STRING f3;};\\ndsRando := DATASET([\\n {'1', '-1', '-1'}\\n, {'-1', '2', '-1'}\\n, {'-1', '-1', '3'}\\n, {'4', '4', '4'}\\n], lRando);\\nlRando xfFixIt(dsRando Lf) := TRANSFORM\\n strRec := '<Row>' + (STRING)TOXML(ROW(Lf, lRando)) + '</Row>';\\n strFixed := STD.Str.FindReplace( strRec, '>-1<', '');\\n self := FROMXML(lRando, strFixed);\\nEND;\\ndsFixed := PROJECT(dsRando, xfFixIt(LEFT));
\\nFor what that's worth. Not sure this completely replaces the logic in your examples, but you get the gist. Template language can be hard to write. And read. And understand.\", \"post_time\": \"2021-11-14 02:20:49\" },\n\t{ \"post_id\": 33333, \"topic_id\": 8693, \"forum_id\": 8, \"post_subject\": \"Re: iterate over list of fields and apply same macro logic\", \"username\": \"Eric Graves\", \"post_text\": \"Thank you both. I think the piece I was missing in reading the documentation was the connection between the template #FOR working on XML and the #EXPORTXML to generate XML from a record layout. With your clear, self-contained examples I was able to make progress.\", \"post_time\": \"2021-04-05 20:10:51\" },\n\t{ \"post_id\": 33323, \"topic_id\": 8693, \"forum_id\": 8, \"post_subject\": \"Re: iterate over list of fields and apply same macro logic\", \"username\": \"rtaylor\", \"post_text\": \"Eric,\\n\\nAnd just to finally "ice the cake" \\n\\nHere's an improved MACRO that handles any type field and any search/replace values:\\n
MAC_ReplaceVals(ds,thislayout,resultname,FldType,SrchVal,ReplVal) := MACRO\\n #UNIQUENAME(XF)\\n #EXPORTXML(Fred,thislayout);\\n #DECLARE(ProjStr)\\n #DECLARE (Ndx)\\n #SET (Ndx, 0); \\n #FOR (Fred)\\n #FOR (Field) \\n #SET (Ndx, %Ndx% + 1)\\n #IF ( %Ndx% = 1)\\n #SET(ProjStr,'RECORDOF(' + ds + ') ' + %'XF'% + '(' + ds + ' L) := TRANSFORM\\\\n')\\n #END\\n #IF (%'{@type}'% = FldType)\\n #IF (%'{@type}'% = 'string')\\n #APPEND(projstr,' SELF.' + %'{@label}'% + ' := IF(L.' + %'{@label}'% + '=\\\\'' + SrchVal + '\\\\',\\\\'\\\\',L.' + %'{@label}'% + ');\\\\n');\\n #ELSE\\n #APPEND(projstr,' SELF.' + %'{@label}'% + ' := IF(L.' + %'{@label}'% + '=' + SrchVal + ',' + ReplVal + ',L.' + %'{@label}'% + ');\\\\n');\\n #END\\n #END\\n #END\\n #END\\n #APPEND(projstr,' SELF := L;\\\\nEND;\\\\n' + resultname + ' := PROJECT(' + ds + ',' + %'XF'% + '(LEFT));\\\\n');\\n %projstr%;\\n\\t\\t \\n // OUTPUT(%'projstr'%);\\t\\nENDMACRO;\\t\\t \\n\\nMAC_ReplaceVals('inds',filelayout,'Res1','string','-1','');\\nMAC_ReplaceVals('inds',filelayout,'Res2','real',-1,-9999);\\n\\nOUTPUT(Res1);\\t//runs the generated code to produce the result\\nOUTPUT(Res2);\\t//runs the generated code to produce the result
Of course, my demo uses the same test dataset as the previous.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2021-04-05 20:01:23\" },\n\t{ \"post_id\": 33313, \"topic_id\": 8693, \"forum_id\": 8, \"post_subject\": \"Re: iterate over list of fields and apply same macro logic\", \"username\": \"rtaylor\", \"post_text\": \"Eric,\\n\\nYes, this is a common issue. The problem is, since ECL is a declarative and non-procedural language, it cannot have the type of looping syntax you're used to in imperative languages. That's why we have all the operations that use TRANSFORM functions (PROJECT, ITERATE, etc.).\\n\\nIf you have only a few fields to do this on, then using PROJECT and just writing the transformation definitions to handle it is how it's usually done. Like this (I'm presuming STRING fields, because that's the only field type that makes sense to replace -1 with blank):\\nfilelayout := RECORD\\n UNSIGNED myid;\\n STRING10 f1;\\n REAL f2;\\n STRING10 f3;\\n STRING10 f4;\\nEND;\\n\\ninds := DATASET([{1, '11.0', -1, '13.0', '99.0'},\\n {2, '-1', 22.0, '-1', '24.0'},\\n {3, '31.0', 32.0, '33.0', '-1'},\\n {4, '41.0', -1, '-1', '44.0'}\\n ],filelayout);\\nPROJECT(inds,\\n TRANSFORM(RECORDOF(inds),\\n SELF.F1 := IF(LEFT.F1='-1','',LEFT.F1),\\n SELF.F3 := IF(LEFT.F3='-1','',LEFT.F3),\\n SELF.F4 := IF(LEFT.F4='-1','',LEFT.F4),\\n SELF := LEFT));
In fact, I've written this type of one-off code many times with many more fields. I generally do it by using a text editor with good macro capabilities to write the "boiler plate" lines for me.\\n\\nBut for a more generic approach, ECL's Template Language is the best approach, because it's all about generating "boiler plate" code. \\n\\nSo here's a MACRO that uses Template Language to do just that:MAC_ReplaceNegOneWithBlanks(ds,thislayout,resultname) := MACRO\\n #EXPORTXML(Fred,thislayout);\\n #DECLARE(ProjStr)\\n #DECLARE (Ndx)\\n #SET (Ndx, 0); \\n #FOR (Fred)\\n #FOR (Field) \\n #SET (Ndx, %Ndx% + 1)\\n #IF ( %Ndx% = 1)\\n #SET(ProjStr,'RECORDOF(' + ds + ') XF(' + ds + ' L) := TRANSFORM\\\\n')\\n #END\\n #IF (%'{@type}'% = 'string')\\n #APPEND(projstr,' SELF.' + %'{@label}'% + ' := IF(L.' + %'{@label}'% + '=\\\\'-1\\\\',\\\\'\\\\',L.' + %'{@label}'% + ');\\\\n');\\n #END\\n #END\\n #END\\n #APPEND(projstr,' SELF := L;\\\\nEND;\\\\n' + resultname + ' := PROJECT(' + ds + ',XF(LEFT));\\\\n');\\n %projstr%;\\n //%projstr% generates the code, while this:\\t \\n // OUTPUT(%'projstr'%);\\t//shows us the generated code\\nENDMACRO;
This MACRO uses #EXPORTXML to generate an XML string from the RECORD structure of the dataset you want to use. Template Language is designed to parse XML and generate ECL code from that. In this case, for my test file, that generated code would be this:\\t\\t\\t\\tRECORDOF(inds) XF(inds L) := TRANSFORM\\n\\t\\t\\t\\t\\tSELF.f1 := IF(L.f1='-1','',L.f1);\\n\\t\\t\\t\\t\\tSELF.f3 := IF(L.f3='-1','',L.f3);\\n\\t\\t\\t\\t\\tSELF.f4 := IF(L.f4='-1','',L.f4);\\n\\t\\t\\t\\t\\tSELF := L;\\n\\t\\t\\t\\tEND;\\n\\t\\t\\t\\tRes := PROJECT(inds,XF(LEFT));\\n
So that, when I call the MACRO for my test dataset like this:MAC_ReplaceNegOneWithBlanks('inds',filelayout,'Res');\\n\\nOUTPUT(Res);\\t//runs the generated code to produce the result\\n
I get the result from that generated code that looks like this:\\n1\\t11.0 \\t-1.0\\t13.0 \\t99.0 \\n2\\t \\t22.0\\t \\t24.0 \\n3\\t31.0 \\t32.0\\t33.0 \\t \\n4\\t41.0 \\t-1.0\\t \\t44.0 \\n
\\nHTH,\\n\\nRichard\", \"post_time\": \"2021-04-05 15:27:11\" },\n\t{ \"post_id\": 33303, \"topic_id\": 8693, \"forum_id\": 8, \"post_subject\": \"Re: iterate over list of fields and apply same macro logic\", \"username\": \"hwatanuki\", \"post_text\": \"Hello Eric, \\n\\nIf I got your scenario description right, I think that this can be accomplished by using some of the ECL template language statements as exemplified in the code snippet below:\\n\\n\\nmyrec := RECORD\\n integer4 rec_id;\\n integer4 instance_id;\\n integer4 first_date_received;\\n integer4 last_date_received;\\nEND;\\n\\nmyds := DATASET([{100,-1,20190101,20191231},\\n {101,10002,-1,20201231},\\n {103,10003,20190101,-1},\\n {104,10004,20190303,20191231}],myrec);\\n\\n#EXPORTXML(myrecords,myrec);\\n#DECLARE(projStr);\\n#SET(projstr,'myrec mytransf(myds L) := TRANSFORM\\\\n');\\n#FOR (myrecords);\\n #FOR (field);\\n #APPEND(projstr,' SELF.' + %'{@label}'% + ' := IF(L.' + %'{@label}'% + '=-1,0,L.'+ %'{@label}'%+');\\\\n');\\n #END\\n#END;\\n#APPEND(projstr,'END;\\\\n myproj := PROJECT(myds,mytransf(LEFT));\\\\n');\\n\\n%projstr%;\\nGenCode := %'projstr'%;\\n\\nOUTPUT(GenCode);\\nOUTPUT(myproj);\\n
\\n\\nIn this example, the RECORD structure is being exported and loaded as a XML stream (#EXPORTXML) and then the field labels in this XML string are being parsed using a loop iteration (#FOR) to properly generate the PROJECT/TRANSFORM code containing the value replacement logic in every field.\\n\\nPlease let me know in case this is what you are looking for.\\n\\nHTH,\\nHugoW\", \"post_time\": \"2021-04-05 15:02:24\" },\n\t{ \"post_id\": 33293, \"topic_id\": 8693, \"forum_id\": 8, \"post_subject\": \"iterate over list of fields and apply same macro logic\", \"username\": \"Eric Graves\", \"post_text\": \"I think `iterate` is an overloaded term here. I am referring to it in the general sense of looping over a collection. \\n\\nI have a list of 300 fields. For each field I need to recode any -1 values to an empty string. How can I do this programatically without having to write 300 definitions?\\n\\nI have searched the documentation looking for control-flow keywords in the base language as well as the macro and template languages but don't see any obvious path for what seems like a fairly common data munging task.\", \"post_time\": \"2021-04-02 20:45:01\" },\n\t{ \"post_id\": 33423, \"topic_id\": 8713, \"forum_id\": 8, \"post_subject\": \"Re: Sort on Enum Values\", \"username\": \"Allan\", \"post_text\": \"\\nds := DATASET([{'Payload1','ABA'}\\n ,{'Payload2','BBB'}\\n ,{'Payload3','AAA'}\\n ,{'Payload4','BBA'}\\n ],{STRING payload,STRING3 Itm});\\n \\n Codes := DATASET([{'ABA' ,3 },\\n {'BBB' ,2 },\\n {'AAA' ,1 },\\n {'BBA' ,0 }], {STRING code,UNSIGNED1 sortPosition});\\n \\n Lokup := DICTIONARY(Codes,{code => sortPosition});\\n \\n SORT(ds,Lokup[Itm].sortPosition);\\n
\", \"post_time\": \"2021-04-21 14:40:07\" },\n\t{ \"post_id\": 33413, \"topic_id\": 8713, \"forum_id\": 8, \"post_subject\": \"Re: Sort on Enum Values\", \"username\": \"Allan\", \"post_text\": \"You could just PROJECT (or Vertical Slice TABLE) to add / replace the ENUM values with specific integers say, then, with output from this PROJECT sort on that integer?\\nYours\\nAllan\\n\\nActually the ref manual says:\\nA comma-delimited list of expressions or key fields.\\nSo 'expression' could be the return from a DICTIONARY lookup that converts ENUM to a required sort order?\", \"post_time\": \"2021-04-21 13:10:01\" },\n\t{ \"post_id\": 33393, \"topic_id\": 8713, \"forum_id\": 8, \"post_subject\": \"Sort on Enum Values\", \"username\": \"harshdesai\", \"post_text\": \"Hi ALL ,\\nIn case have to sort on enum values rather the column values as need is in specific order \\nand which has to be consistent and in control rather to be dependent on data.\\nIs there a way to sort \\n\\nMyRec := {STRING3 Value1,STRING3 Category1, STRING3 Category2};\\nSomeFile := DATASET([{'NVS','T','ABC'},{'NVS','C','ABC'},{'A','X','BAC'},\\n {'B','G','PQR'},{'A','B','AAC'},{'NVS','T','AAA'}],MyRec);\\n\\nsrtvalues := enum(unsigned1,AAA, ABC, BAC,AAC,PQR); \\n//need to sort on values of 3rd field which is static but can add latter\\nSrtd_values_ds := sort(SomeFile,srtvalues.AAA);\", \"post_time\": \"2021-04-21 08:49:15\" },\n\t{ \"post_id\": 33513, \"topic_id\": 8773, \"forum_id\": 8, \"post_subject\": \"Re: Import names unknown module "Visualizer"\", \"username\": \"gsmith\", \"post_text\": \"Sounds like your trying to submit samples from the Visualizer Bundle, without installing it.\\n\\nSee: https://github.com/hpcc-systems/Visualizer\", \"post_time\": \"2021-05-14 11:50:48\" },\n\t{ \"post_id\": 33503, \"topic_id\": 8773, \"forum_id\": 8, \"post_subject\": \"Import names unknown module "Visualizer"\", \"username\": \"kaizmoto1489\", \"post_text\": \"Hi everyone,\\nwhen am trying to show graph generated from ecl file.am getting issue as follows:\\n\\nImport names unknown module "Visualizer"\\n\\nError eclcc 2081 Import names unknown module "Demo"\\n\\nError eclcc 2167 Unknown identifier "Demo"\\n\\nError eclcc 2167 Unknown identifier before "."\\n\\nplease give solution for this issue.\", \"post_time\": \"2021-05-14 00:05:39\" },\n\t{ \"post_id\": 33693, \"topic_id\": 8873, \"forum_id\": 8, \"post_subject\": \"Re: Detecting a layout\", \"username\": \"Allan\", \"post_text\": \"Also read up section \\n'RECORD Structure' in the ECL reference manual.\\nIn there you'll see an\\nIFBLOCK construct you can use in a RECORD layout.\\nThis IFBLOCK is a run time action conditional on content of preceding fields in the same record.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2021-07-02 15:56:08\" },\n\t{ \"post_id\": 33683, \"topic_id\": 8873, \"forum_id\": 8, \"post_subject\": \"Re: Detecting a layout\", \"username\": \"rtaylor\", \"post_text\": \"mo0926,\\n\\nIt's not the compiler, but the runtime that needs to know. \\n\\nYou write your code to handle either, then you could read the first 32K of the file with a RECORD structure like this:\\nrec := RECORD\\n DATA1 char;\\nEND;
and do some heuristics to detect which format you've got. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2021-06-30 14:31:20\" },\n\t{ \"post_id\": 33673, \"topic_id\": 8873, \"forum_id\": 8, \"post_subject\": \"Detecting a layout\", \"username\": \"mo0926\", \"post_text\": \"Hello, I hope someone can help with this. Any way we can detect an input file's layout before performing the next operation. Let's say the vendor sends a monthly file but sends it with two different layouts without warning. How can I ask the compiler to make a decision based on the input layout with ECL code?\", \"post_time\": \"2021-06-29 20:29:41\" },\n\t{ \"post_id\": 33733, \"topic_id\": 8893, \"forum_id\": 8, \"post_subject\": \"Re: Last date of the given month and year\", \"username\": \"ravishankar\", \"post_text\": \"Thanks a lot Richard , this helps.\", \"post_time\": \"2021-07-08 14:05:22\" },\n\t{ \"post_id\": 33723, \"topic_id\": 8893, \"forum_id\": 8, \"post_subject\": \"Re: Last date of the given month and year\", \"username\": \"rtaylor\", \"post_text\": \"ravishankar,\\n\\nYou can get the Days_t value for the first day of the next month and subtract 1, then translate that back to a Date value. Something like this: IMPORT STD;\\nD := STD.Date.FromGregorianYMD(2015,12 + 1,1) - 1;\\nX := STD.Date.ToGregorianYMD(D);\\nX.Day; //returns 31
Note that you just add one to the month you want and the FromGregorianYMD() function correctly handles month 13.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2021-07-08 13:38:22\" },\n\t{ \"post_id\": 33713, \"topic_id\": 8893, \"forum_id\": 8, \"post_subject\": \"Last date of the given month and year\", \"username\": \"ravishankar\", \"post_text\": \"Team,\\n\\nI am finding it hard to calculate the last date for the given month and year. I searched Date library and couldn't figured it out from there. For instance if the input is year - 2015 and month - 12 the output should be 31. \\n\\nIs the only way to write UDF with hardcoded last date monthg values considering the leap year or any other ways within date lib ?\", \"post_time\": \"2021-07-08 12:55:48\" },\n\t{ \"post_id\": 33953, \"topic_id\": 8903, \"forum_id\": 8, \"post_subject\": \"Re: Query about zipping files on unix landing zone from HPCC\", \"username\": \"akhileshbadhri\", \"post_text\": \"Thanks a lot jsmith.\\nI separated the gzip part from ECL and I am doing it in UNIX. I thought this to be a better approach.\\n\\nThanks and regards,\\nAkhilesh Badhri.\", \"post_time\": \"2021-08-10 08:43:01\" },\n\t{ \"post_id\": 33773, \"topic_id\": 8903, \"forum_id\": 8, \"post_subject\": \"Re: Query about zipping files on unix landing zone from HPCC\", \"username\": \"jsmith\", \"post_text\": \"There's no clean way, as it's not something built into the platform.\\nFrom the error, it sounds like it's treating the whole command ("gzip -f /home/bxxxxx01/transfer/4xx_res_resp.xml") as the command (the same error would be issued if a quoted "gzip -f /home/bxxxxx01/transfer/4xx_res_resp.xml" was run from bash directly on the command line.\\n\\nI think it would get further if you remove the quotes and ran e.g.:\\nZipCmd := 'gzip -f ' + RawFilename;\\nZippedDS := PIPE(ZipCmd,rec);
\\n\\n\\nHowever, this approach won't work if the hthor node and the LZ are on different IPs, because gzip will have no direct access to the URL.\\nA possible solution to that is to use an OUTPUT statement to write the file out as 1 part in hthor, instead of the Despray. Then get the path to the [single] physical part belonging to the new file and zip it.\\nI haven't tried it, but something like may work:\\n\\nIMPORT STD.System.Thorlib;\\n\\ninRecDef := * define to record def. of in inupt file *\\ninLogicalFilename := '~thor::4xx_res_resp.xml';\\n\\n\\nMyDropZone := '/home/bxxxxx01/transfer/';\\nRawFilename := MyDropZone + '4xx_res_resp.tgz';\\n\\ninDs := DATASET(inLogicalFilename, inRecDef, FLAT);\\n\\noutputFilename := '~transfer::4xx_res_resp.xml';\\nwriteStep := OUTPUT(inDs, , outputFilename , OVERWRITE);\\n\\nZipCmd := 'tar cfz ' + RawFilename + ' ' + thorlib.logicalToPhysical(outputFilename);\\n\\ndummyRec := RECORD\\n string1 unused;\\nEND;\\nZippedDS := PIPE(ZipCmd, dummyRec);\\n\\nORDERED(\\n writeStep,\\n OUTPUT(ZippedDS) // NB: the created compressed tar file is going to created on the hthor node (which may not be same as LZ node)\\n);
\\n\\nIn general, since things like this aren't navitely supported withint ECL and it's related to extracting data out of the platform, it would be best if possible to run your workflow steps of which this is part as an external script, where the ECL job(s) are steps in that process. i.e. step 1 runs ECL script with 'ecl run' that performs task and desprays. step 2 then uses bash/ssh to perform gzip on output.\", \"post_time\": \"2021-07-12 13:56:03\" },\n\t{ \"post_id\": 33763, \"topic_id\": 8903, \"forum_id\": 8, \"post_subject\": \"Re: Query about zipping files on unix landing zone from HPCC\", \"username\": \"akhileshbadhri\", \"post_text\": \"Hello Richard,\\n\\nI tried the following code - \\n\\nIMPORT STD;\\n\\nrec := RECORD\\n string name;\\nEND;\\n\\nIP := 'landing_zone_ip';\\n\\nDespray := Std.File.Despray('~thor::4xx_res_resp.xml',\\n IP, \\n '/home/bxxxxx01/transfer/4xx_res_resp.xml',,,,true);\\n \\nMyDropZone := '/home/bxxxxx01/transfer/'; \\nRawFilename := MyDropZone + '4xx_res_resp.xml';\\n \\nZipCmdRaw := '" gzip -f ' + RawFilename + '";';\\n \\nZipCmd := 'bash -c \\\\'' + ZipCmdRaw + '\\\\'';\\nZippedDS := PIPE(ZipCmd,rec);\\n \\nORDERED(Despray,OUTPUT(ZippedDS));\\n\\n\\nIt gives me the following error - \\n\\nSystem error: 2: Error piping from (bash -c '"gzip -f /home/bxxxxx01/transfer/4xx_res_resp.xml";'): process failed with code 127, stderr: 'bash: gzip -f /home/bxxxxx01/transfer/4xx_res_resp.xml: No such file or directory '\\n\\nI also tried the following command by giving the landing zone IP like - \\nZipCmdRaw := '" gzip -f landing_zone_IP:' + RawFilename + '";';\\n\\nBut still I get the same error.\\n\\nAm I missing something here? \\n\\nThanks and regards,\\nAkhilesh Badhri.\", \"post_time\": \"2021-07-12 08:55:08\" },\n\t{ \"post_id\": 33753, \"topic_id\": 8903, \"forum_id\": 8, \"post_subject\": \"Re: Query about zipping files on unix landing zone from HPCC\", \"username\": \"rtaylor\", \"post_text\": \"Akhilesh,\\n\\nI am unaware of any way to do that from ECL. You could try using the first form of the PIPE() function to call the Linux ZIP command. I have not tried it, nor do I know of anyone who has, so I have no example to provide. \\n\\nSince you wouldn't want this to run n times (where n is the number of Thor nodes you're running on), I'd suggest running it only on hThor (or possibly using the NOTHOR() action) to ensure it only runs once.\\n\\nIf you get it working, perhaps you could post an example of how you did it here.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2021-07-09 13:01:45\" },\n\t{ \"post_id\": 33743, \"topic_id\": 8903, \"forum_id\": 8, \"post_subject\": \"Query about zipping files on unix landing zone from HPCC.\", \"username\": \"akhileshbadhri\", \"post_text\": \"Hello Everyone,\\n\\nIs there a way to zip files on the unix landing zone from HPCC using ECL ? An example of how to do will help.\\n\\nThanks and regards,\\nAkhilesh Badhri.\", \"post_time\": \"2021-07-09 08:52:20\" },\n\t{ \"post_id\": 33812, \"topic_id\": 8922, \"forum_id\": 8, \"post_subject\": \"Re: Conditional logic in join criteria\", \"username\": \"janet.anderson\", \"post_text\": \"Apparently I over-complicated it. The solution worked. Thanks!\", \"post_time\": \"2021-07-22 13:51:43\" },\n\t{ \"post_id\": 33822, \"topic_id\": 8922, \"forum_id\": 8, \"post_subject\": \"Re: Conditional logic in join criteria\", \"username\": \"janet.anderson\", \"post_text\": \"Apparently I over-complicated it. The solution worked. Thanks!\", \"post_time\": \"2021-07-22 13:51:43\" },\n\t{ \"post_id\": 33802, \"topic_id\": 8922, \"forum_id\": 8, \"post_subject\": \"Re: Conditional logic in join criteria\", \"username\": \"rtaylor\", \"post_text\": \"Janet,\\n\\nTry it like this: JOIN(lds,rds,\\n IF(RIGHT.statelist[1] = 'All', \\n LEFT.carrier = RIGHT.CarrierCode\\n AND LEFT.date BETWEEN RIGHT.BeginDate AND RIGHT.EndDate,\\n LEFT.carrier = RIGHT.CarrierCode\\n AND LEFT.state in RIGHT.StateList\\n AND LEFT.date BETWEEN RIGHT.BeginDate AND RIGHT.EndDate));
\\nStandard IF..THEN..ELSE with BOOLEAN expressions as the true/false returns.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2021-07-22 12:29:48\" },\n\t{ \"post_id\": 33792, \"topic_id\": 8922, \"forum_id\": 8, \"post_subject\": \"Conditional logic in join criteria\", \"username\": \"janet.anderson\", \"post_text\": \"My team has a table of criteria that we want excluded from a dataset. It has a layout like {STRING CarrierCode, SET OF STRING StateList, STRING8 BeginDate, STRING8 EndDate}. So I want to join my exclude table to the dataset with criterial like\\n\\nLEFT.carrier = RIGHT.CarrierCode \\t\\t\\t\\nAND LEFT.state in RIGHT.StateList\\nAND LEFT.date BETWEEN RIGHT.BeginDate AND RIGHT.EndDate\\n
\\n\\nThe catch is that the StateList can be something like ['All'] that implies there should be no join criteria for state. So what I would really like is more like\\n\\nLEFT.carrier = RIGHT.CarrierCode\\n#IF RIGHT.statelist[1] <> 'All'\\t\\t\\nAND LEFT.state in RIGHT.StateList\\n#END\\nAND LEFT.date BETWEEN RIGHT.BeginDate AND RIGHT.EndDate\\n
\\n\\nExcept that code doesn't work. Can you suggest a way to make conditional join criteria work?\", \"post_time\": \"2021-07-21 21:33:03\" },\n\t{ \"post_id\": 34013, \"topic_id\": 8963, \"forum_id\": 8, \"post_subject\": \"Re: Having the code that runs on Roxie on Thor\", \"username\": \"rtaylor\", \"post_text\": \"Akhilesh,1. Use of keyed for indexes.
ROXIE is designed to use INDEXes, so the use of KEYED in the JOIN condition operates like using KEYED/WILD in an INDEX filter. This functionality is the same on both Thor and ROXIE.\\n2. Use of distribute for the left / right dataset.
If you're writing ROXIE query code, then it's best if you use INDEXes for both the LEFT and RIGHT "datasets" for your JOINs (usually payload INDEXes), since those will be most efficient. The DISTRIBUTE function is meant to re-distribute DATASET records (not INDEXes) on Thor, so it's not really applicable to most ROXIE code. Therefore, your ROXIE code (using INDEXes) should function the same way on Thor.\\n3. Use of keep and atmost.
These JOIN options were added specifically to limit the number of "matches" returned from the JOIN. ROXIE queries are targeted for "end-users" so you always want to limit the total number of results returned to avoid overloading them with "too many" results to be "meaningful" to them. Therefore, your ROXIE code (using KEEP and ATMOST) should function the same way on Thor.\\n4. Would sorting a dataset before a distribute or a join would be better ?
Once again, ROXIE queries should almost always be built using INDEXes, which are already sorted on the search terms of the INDEX, so SORT is unnecessary (and DISTRIBUTE is discussed above).\\n\\nRemember, Thor is a back office tool and the query runs on all nodes at once (usually using DATASETs) to produce all possible results. But ROXIE is an end-user-facing tool where your code runs only on the one ROXIE Server node that handles the individual query instance and pulls the data for that one query instance (usually in INDEXes) from multiple ROXIE Agent nodes to produce the result for that single query (unless you're using the ALLNODES() function to force ROXIE to operate like Thor).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2021-08-12 16:55:36\" },\n\t{ \"post_id\": 34003, \"topic_id\": 8963, \"forum_id\": 8, \"post_subject\": \"Re: Having the code that runs on Roxie on Thor\", \"username\": \"akhileshbadhri\", \"post_text\": \"Hello Richard,\\n\\nThanks for the response. To be specific, I wish to know if there would be any difference of using Joins in Thor and Roxie like - \\n\\n1. Use of keyed for indexes.\\n2. Use of distribute for the left / right dataset.\\n3. Use of keep and atmost.\\n4. Would sorting a dataset before a distribute or a join would be better ?\\n\\nThanks and regards,\\nAkhilesh Badhri\", \"post_time\": \"2021-08-12 08:22:27\" },\n\t{ \"post_id\": 33973, \"topic_id\": 8963, \"forum_id\": 8, \"post_subject\": \"Re: Having the code that runs on Roxie on Thor\", \"username\": \"rtaylor\", \"post_text\": \"Akhilesh,\\n\\nThe general rule is that anything that runs on Thor should also run on ROXIE. But the reverse is not true, because there are several functions that are ROXIE-only. Here's a (possibly incomplete) list of those I'm aware of:\\n
\\nCompiling code with any of these functions for a Thor cluster should just result in a noop for that part of the code, but that should not be a problem because most of them (except PRELOAD()) were designed to make ROXIE operate more like Thor does natively.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2021-08-10 13:12:13\" },\n\t{ \"post_id\": 33963, \"topic_id\": 8963, \"forum_id\": 8, \"post_subject\": \"Having the code that runs on Roxie on Thor\", \"username\": \"akhileshbadhri\", \"post_text\": \"Hello Everyone,\\n\\nI am working on a project which involves running a ROXIE code / logic on Thor. This is the requirement.\\nI wish to ask, what things / code rules / principles should be considered while implementing the code that runs on Roxie, on Thor.\\n\\nThanks and regards,\\nAkhilesh Badhri.\", \"post_time\": \"2021-08-10 08:53:57\" },\n\t{ \"post_id\": 34103, \"topic_id\": 8993, \"forum_id\": 8, \"post_subject\": \"Re: Please Help Clarify Misunderstanding PERSIST and INDEPEN\", \"username\": \"rtaylor\", \"post_text\": \"Janet,\\n\\nI would need to look at your code and the WU to even try to evaluate what might be happening. Contact me on Teams so you can screen share with me and I'll try to help.\\n\\nRichard\", \"post_time\": \"2021-09-23 14:59:11\" },\n\t{ \"post_id\": 34093, \"topic_id\": 8993, \"forum_id\": 8, \"post_subject\": \"Please Help Clarify Misunderstanding PERSIST and INDEPENDENT\", \"username\": \"janet.anderson\", \"post_text\": \"I have some inherited code that I am evaluating to find edge cases where our logic is not properly accounting for all scenarios. As such, I am running many tests with lots of small changes and looking at samples. I created a PERSIST in the code right before the steps where I am making my test changes, but when I look at the ECL Watch page it tells me that it is running sg89-graph2, which is a part of the code well before my PERSIST. Why is it re-running this step? Nothing changed before the PERSIST. My WU is Alpha Dev W20210923-094020.\\n\\nAdditionally, I'm getting a message to AutoWorkflow: Try adding ': INDEPENDENT' to JOIN to common up code between workflow items. I don't have any SEQUENTIAL in my code, so I don't understand what the INDEPENDENT would be doing.\", \"post_time\": \"2021-09-23 14:48:50\" },\n\t{ \"post_id\": 34123, \"topic_id\": 9003, \"forum_id\": 8, \"post_subject\": \"Re: How to receive a string parameter from stored() to a mac\", \"username\": \"rtaylor\", \"post_text\": \"andre,\\n\\nUnfortunately, I have no good answer for you. The Template Language (which #EXPAND is part of) was originally designed to receive an XML string from an external app (like a website), generate ECL code from that XML input, run it, and return the results. That means it is expecting a string constant and you're trying to pass in a variable-value parameter. \\n\\nI've been trying for years to figure out a good workaround to do all that within the HPCC Systems environment (you are not the first to ask). The best solution I've had so far has been to make it a two-step manual process: first step to generate the XML, then copy that and paste it into the code to run the actual job. Even if I use the "drilldown" mechanism in the ECL IDE, it's still a two-step manual process.\\n\\nContact me directly on Teams if you want to have a discussion about all the possible ways I've already tried (many) and your specific requirements for this task.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2021-10-08 19:30:53\" },\n\t{ \"post_id\": 34113, \"topic_id\": 9003, \"forum_id\": 8, \"post_subject\": \"How to receive a string parameter from stored() to a macro\", \"username\": \"andre.martins\", \"post_text\": \"I'm working on a query to calculate a frequency of a field inside a dataset. So first of all I coded a simple function that works for one hard coded field and then and made a function macro that should receive a string parameter containing the field name and calculate its frequency.\\n\\n\\nEXPORT test(Field) := FUNCTIONMACRO\\n Layout := RECORD\\n UNSIGNED1 PersonID;\\n STRING15 FirstName;\\n STRING25 LastName;\\n END;\\n\\n students := DATASET(\\n [\\n {1,'Fred','Smith'},\\n {2,'Joe','Blow'},\\n {3,'Jane','Smith'}\\n ],\\n Layout);\\n\\n r := RECORD\\n students.#EXPAND(Field);\\n UNSIGNED Frequency := COUNT(GROUP);\\n END;\\n\\n final_result := TABLE(students, r, Field);\\n sort_result := SORT(final_result, Field);\\n RETURN OUTPUT(sort_result);\\nENDMACRO;\\n
\\n\\nFor running it on thor works normally calling test('LastName'). But I'm trying to receive this string value from the user using a STORED() call as the following example:\\n\\n\\nIMPORT ^.functions AS F;\\n\\nEXPORT SVC5_Test() := FUNCTION\\n STRING Field := 'LastName' : STORED('Field'); // Receive LastName, FirstName or PersonID\\n\\n result := F.test(Field);\\n\\n RETURN result;\\nEND;\\n
\\n\\nIn this way, I stucked on this following error in the line with #EXPAND(): \\n2071: Constant expression expected
\\n\\nIn short, is there a way to receive this field as a string parameter and send it to a function macro (or any other way with template language, for example), performing and publishing this query based on the parameter field sent by the user?\\n\\nWhile the development of this code I was thinking about the possibility of being able to perform a query only adding a new field inside the record structure, without the need to change the roxie query logic, in order to work dinamically based on the parameter/field sent by the user. Keep in mind that the real RECORD has more than 50 fields avaiable that I'd group and calculate its frequency.\", \"post_time\": \"2021-10-06 14:40:31\" },\n\t{ \"post_id\": 34193, \"topic_id\": 9023, \"forum_id\": 8, \"post_subject\": \"Re: An error while reading a key file on Thor.\", \"username\": \"akhileshbadhri\", \"post_text\": \"Fine Richard.\\nThanks a lot for the input.\\n\\nI will try this approach.\\n\\nThanks and regards,\\nAkhilesh Badhri.\", \"post_time\": \"2021-10-14 15:42:10\" },\n\t{ \"post_id\": 34183, \"topic_id\": 9023, \"forum_id\": 8, \"post_subject\": \"Re: An error while reading a key file on Thor.\", \"username\": \"rtaylor\", \"post_text\": \"Akhilesh,I am getting the error while the key is distributed. The key is distributed for a join with another big dataset on Thor.
OK, so I'm assuming by "key" you mean INDEX. For a JOIN of two INDEXes, you simply need to use the INDEXes as the join files and use the KEYED keyword in the JOIN condition to ensure you get the performance advantage of INDEXes.\\n\\nUsing DISTRIBUTE treats the INDEX as a DATASET, eliminating the performance advantages of using the binary tree to get to the specific records you want. Not recommended.\\n\\nI'd start by just eliminating the DISTRIBUTEs and see how well the JOIN works.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2021-10-14 14:41:45\" },\n\t{ \"post_id\": 34173, \"topic_id\": 9023, \"forum_id\": 8, \"post_subject\": \"Re: An error while reading a key file on Thor.\", \"username\": \"akhileshbadhri\", \"post_text\": \"Thank you Richard. Following are the details about what I am doing -\\n\\nI am getting the error while the key is distributed. The key is distributed for a join with another big dataset on Thor. Following is the distribute I tried and both of them give error.\\n\\ndistribute(Key, hash64(field1+field2));\\ndistribute(pull(Key), hash64(field1+field2));\\n\\n\\nIf I simply read the key like "Key;" it works fine.\\nI am running the code on the same Thor environment where the logical file resides. I am not doing foreign.\\n\\nHope this helps.\\n\\nThanks and regards,\\nAkhilesh Badhri.\", \"post_time\": \"2021-10-14 14:26:19\" },\n\t{ \"post_id\": 34163, \"topic_id\": 9023, \"forum_id\": 8, \"post_subject\": \"Re: An error while reading a key file on Thor.\", \"username\": \"rtaylor\", \"post_text\": \"Akhilesh,Am I doing something wrong here ?
Without more details, it's impossible to say.Remote streaming failure, failing over to direct read for: <Logical file path>\\nERROR: cmd=RFCStreamRead, error=Internal Error (0, Failed to read key header: file too small, could not read 272 bytes)
This error message tells me:
\\nHTH,\\n\\nRichard\", \"post_time\": \"2021-10-14 12:54:20\" },\n\t{ \"post_id\": 34153, \"topic_id\": 9023, \"forum_id\": 8, \"post_subject\": \"An error while reading a key file on Thor.\", \"username\": \"akhileshbadhri\", \"post_text\": \"Hello Everyone,\\n\\nI am reading a key file on THOR and distributing it. I get the following error while I do that - \\n\\nRemote streaming failure, failing over to direct read for: <Logical file path>\\nERROR: cmd=RFCStreamRead, error=Internal Error (0, Failed to read key header: file too small, could not read 272 bytes)\\n\\nAm I doing something wrong here ? Request your help.\\n\\nThanks and regards,\\nAkhilesh Badhri\", \"post_time\": \"2021-10-14 10:46:20\" },\n\t{ \"post_id\": 34263, \"topic_id\": 9043, \"forum_id\": 8, \"post_subject\": \"Re: Fails first, succeeds thereafter\", \"username\": \"lpezet\", \"post_text\": \"Looks like some seg fault on the slave(s).\\nI guess I need to re-install things (preflight certification went fine though the first time).\\nSigh...\", \"post_time\": \"2021-10-26 00:06:41\" },\n\t{ \"post_id\": 34253, \"topic_id\": 9043, \"forum_id\": 8, \"post_subject\": \"Fails first, succeeds thereafter\", \"username\": \"lpezet\", \"post_text\": \"Hello!\\n\\nI'm trying to troubleshoot some weird behavior. I'm on 8.2.6-1 Server.\\n\\nI have some ECL code like the following:\\n\\nIMPORT STD;\\nlayout := RECORD\\n ...\\nEND;\\nds := DATASET(std.File.ExternalLogicalFilename('192.168.0.1', '/var/lib/HPCCSystems/mydropzone/somefile.csv'), layout, CSV(HEADING(1)));\\nOUTPUT(ds,,'~test::weird::somefile',OVERWRITE);\\n
\\nI run it like so:\\n\\necl run thor my.ecl [other options like -ssl, -I=, etc.]\\n
\\nNow I have like 6 or 8 files of this ECL code: the layout is different, csv file is different, and logical filename is different too in each file, but the logic is the same overall.\\n\\nNow here's what's odd.\\nI run one of the ECL files:\\n\\n$ ecl run thor my.ecl [other options like -ssl, -I=, etc.]\\nUsing eclcc path /opt/HPCCSystems/bin/eclcc\\nEXEC: Creating PIPE program process : '/opt/HPCCSystems/bin/eclcc -E "-I/home/hpcc/git/master/" "/var/lib/HPCCSystems/.../my.ecl"' - hasinput=0, hasoutput=1 stderrbufsize=0\\nEXEC: Pipe: process 13422 complete 0\\nCould not find platform dependent libraries <exec_prefix>\\nConsider setting $PYTHONHOME to <prefix>[:<exec_prefix>]\\n\\n\\nDeploying ECL Archive /var/lib/HPCCSystems/.../my.ecl\\n\\nDeployed\\n wuid: W20211025-233226\\n state: compiled\\n\\nRunning deployed workunit W20211025-233226\\n<Result>\\n<Dataset name='Result 1'>\\n</Dataset>\\n</Result>\\n
\\nAll good.\\nI run a different one:\\n\\n$ ecl run thor myother.ecl [other options like -ssl, -I=, etc.]\\nUsing eclcc path /opt/HPCCSystems/bin/eclcc\\nEXEC: Creating PIPE program process : '/opt/HPCCSystems/bin/eclcc -E "-I/home/hpcc/git/master/" "/var/lib/HPCCSystems/.../myother.ecl"' - hasinput=0, hasoutput=1 stderrbufsize=0\\nEXEC: Pipe: process 13545 complete 0\\nCould not find platform dependent libraries <exec_prefix>\\nConsider setting $PYTHONHOME to <prefix>[:<exec_prefix>]\\n\\n\\nDeploying ECL Archive /var/lib/HPCCSystems/.../myother.ecl\\n\\nDeployed\\n wuid: W20211025-233234\\n state: compiled\\n\\nRunning deployed workunit W20211025-233234\\nW20211025-233234 failed\\n<Result>\\n <Exception><Code>4294967295</Code><Source>eclagent</Source><Message>System error: -1: Failed to receive reply from thor 192.168.0.1:20000; (-1, Failed to receive reply from thor 192.168.0.1:20000)</Message></Exception>\\n</Result>\\n
\\n\\nIt fails. BUT, if I run it again right after:\\n\\n$ ecl run thor myother.ecl [other options like -ssl, -I=, etc.]\\nUsing eclcc path /opt/HPCCSystems/bin/eclcc\\nEXEC: Creating PIPE program process : '/opt/HPCCSystems/bin/eclcc -E "-I/home/hpcc/git/master" "/var/lib/HPCCSystems/.../myother.ecl"' - hasinput=0, hasoutput=1 stderrbufsize=0\\nEXEC: Pipe: process 15504 complete 0\\nCould not find platform dependent libraries <exec_prefix>\\nConsider setting $PYTHONHOME to <prefix>[:<exec_prefix>]\\n\\n\\nDeploying ECL Archive /var/lib/HPCCSystems/.../myother.ecl\\n\\nDeployed\\n wuid: W20211025-233809\\n state: compiled\\n\\nRunning deployed workunit W20211025-233809\\n\\n<Result>\\n<Dataset name='Result 1'>\\n</Dataset>\\n</Result>\\n
\\nNow I know we can use other ways to accomplish this but I'm trying to understand why it seems to be failing here when running different ECL code, then run just fine, no matter how often I re-run what just failed.\\n\\nI did notice 2 distinct errors showing up (sometimes it's one, other times the other):\\n\\n<Result>\\n <Exception><Code>4</Code><Source>eclagent</Source><Message>System error: 4: Unexpected process termination (ep:192.168.0.1:20100)</Message></Exception>\\n</Result>\\n
\\n\\n<Result>\\n <Exception><Code>4294967295</Code><Source>eclagent</Source><Message>System error: -1: Failed to receive reply from thor 192.168.0.1:20000; (-1, Failed to receive reply from thor 192.168.0.1:20000)</Message></Exception>\\n</Result>\\n
\\n\\nOther pieces of ECL code run just fine.\\nI changed the code in each .ecl file to NOT OUTPUT to a logical file (simply "OUTPUT(ds);") and everything works.\\nAs soon as I want to (over)write a logical file, I get this weird behavior.\\n\\nAny deja vu?\\nAny idea how I can troubleshoot that further?\\n\\nThanks!\", \"post_time\": \"2021-10-25 23:49:47\" },\n\t{ \"post_id\": 34383, \"topic_id\": 9063, \"forum_id\": 8, \"post_subject\": \"Re: STD.File.ReplaceSuperFile\", \"username\": \"janet.anderson\", \"post_text\": \"Ah, I'm sorry for my oversight. I see now that it stopped working after a week where there was legitimately no updates and the last_file_date was out of sync with what was actually used in the superfile. Thank you.\", \"post_time\": \"2021-11-15 18:15:14\" },\n\t{ \"post_id\": 34373, \"topic_id\": 9063, \"forum_id\": 8, \"post_subject\": \"Re: STD.File.ReplaceSuperFile\", \"username\": \"jsmith\", \"post_text\": \"Hi,\\n\\nas far as I can see, W20211115-095712 didn't update the super file,\\nbecause it is coded with :\\nlast_file_date := '20211108'\\n\\nand so is expecting the superfile it is trying to manipulate to contain the subfile '~zwei::adi::mapping_ambest_20211108', but it does not.\\nInstead contains '~zwei::adi::mapping_ambest_20211025', so there was no action to perform.\", \"post_time\": \"2021-11-15 18:10:48\" },\n\t{ \"post_id\": 34353, \"topic_id\": 9063, \"forum_id\": 8, \"post_subject\": \"STD.File.ReplaceSuperFile\", \"username\": \"janet.anderson\", \"post_text\": \"I have a job I run every week that updates a dataset using STD.File.ReplaceSuperFile. It normally runs fine, but this week and last the superfile has not updated. The WU is W20211115-095712 on Alpha Dev. How can I troubleshoot what is going wrong?\", \"post_time\": \"2021-11-15 15:28:46\" },\n\t{ \"post_id\": 34433, \"topic_id\": 9083, \"forum_id\": 8, \"post_subject\": \"Re: Implicit SORT on index write\", \"username\": \"rtaylor\", \"post_text\": \"eduardo.I was analyzing some workunits and saw that one of the most intensive activity in the graph was the implicit sort done by the index build. My question is, this SORT is implicitly DISTRIBUTEd and LOCAL or not?
If you're building a standard INDEX, then it will be a global SORT, because that is how the data must be sorted to create the INDEX. You have hit on one of the primary reasons why the Thor cluster was created -- to build INDEXes on a massively parallel platform, so this operation has been highly optimized for many years now. If you need to have local operations (where each INDEX file part is a local index), then you need to use the DISTRIBUTED option on the BUILD and INDEX definitions.\\n\\nIf it isn't, can I SORT the dataset by the index key using DISTRIBUTED and LOCAL and then call the BUILD function with the SORTED option? If I do this, the resulting index will have the same performance as the current one?
You can try doing something like that, but I doubt that you'll find any performance difference in the BUILD. And, if you're not careful, you may end up with an INDEX that doesn't work. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2021-11-29 21:36:52\" },\n\t{ \"post_id\": 34423, \"topic_id\": 9083, \"forum_id\": 8, \"post_subject\": \"Implicit SORT on index write\", \"username\": \"eduardo.costa\", \"post_text\": \"Hello everyone,\\n\\nI was analyzing some workunits and saw that one of the most intensive activity in the graph was the implicit sort done by the index build. My question is, this SORT is implicitly DISTRIBUTEd and LOCAL or not? If it isn't, can I SORT the dataset by the index key using DISTRIBUTED and LOCAL and then call the BUILD function with the SORTED option? If I do this, the resulting index will have the same performance as the current one?\", \"post_time\": \"2021-11-22 17:59:08\" },\n\t{ \"post_id\": 34463, \"topic_id\": 9093, \"forum_id\": 8, \"post_subject\": \"Re: Query about an assert error on HPCC.\", \"username\": \"akhileshbadhri\", \"post_text\": \"Thanks a lot Richard.\", \"post_time\": \"2021-12-06 11:41:45\" },\n\t{ \"post_id\": 34453, \"topic_id\": 9093, \"forum_id\": 8, \"post_subject\": \"Re: Query about an assert error on HPCC.\", \"username\": \"rtaylor\", \"post_text\": \"Akilesh,\\n\\nIt may be related to this issue: https://track.hpccsystems.com/browse/HPCC-25293\\n\\nThere is a workaround mentioned in the ticket.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2021-11-30 18:10:22\" },\n\t{ \"post_id\": 34443, \"topic_id\": 9093, \"forum_id\": 8, \"post_subject\": \"Query about an assert error on HPCC.\", \"username\": \"akhileshbadhri\", \"post_text\": \"Hello Everyone,\\n\\nI get the following error while I am trying to run an ECL code - \\n\\nassert(isGrouped(expr) == (meta.grouping != NULL)) failed - file: hqlmeta.cpp, line 2976\\n\\nI am getting this error in one HPCC environment. The same code on a different HPCC environment runs fine.\\n\\nI do not get any other detail about this error in the workunit. Can someone please help me, what could be the possible reason of getting this error ?\\n\\nThanks and regards,\\nAkhilesh Badhri.\", \"post_time\": \"2021-11-30 11:05:03\" },\n\t{ \"post_id\": 34483, \"topic_id\": 9103, \"forum_id\": 8, \"post_subject\": \"Re: Query about using translateDFSlayouts option.\", \"username\": \"ghalliday\", \"post_text\": \"The cause is likely to be only indirectly associated with that #option. It is most likely to be some code that precedes that #option or something related.\\n\\nIt may also depend on which version of the platform you are using (if a bug has been fixed). Probably the best approach is to create a bug report on https://track.hpccsystems.com/ and include an archive of the query that fails.\", \"post_time\": \"2021-12-08 09:02:24\" },\n\t{ \"post_id\": 34473, \"topic_id\": 9103, \"forum_id\": 8, \"post_subject\": \"Query about using translateDFSlayouts option.\", \"username\": \"akhileshbadhri\", \"post_text\": \"Hello Everyone,\\n\\nWhen I am using the following option in a BWR, I get an "OUTPUT() appears to be context dependent - this may cause a dataset not active error"\\n\\n#OPTION('translateDFSlayouts',TRUE);\\n\\nIn the output statement I am using the dataset which I am preparing in the BWR. I am writing this dataset to a CSV logical file.\\n\\nThe reason of me using this option is there will be scenarios when the logical files on HPCC would have an updated layout (logical file prepared by a different team) and the code would not have those changes (while I am running the code) while reading the logical file. So to avoid an error while reading logical files in case of layout mismatch, I am using this option.\\n\\nIs there a specific way of using this option ? Should I be considering something while using it ?\\n\\nThanks and regards,\\nAkhilesh Badhri.\", \"post_time\": \"2021-12-06 12:07:17\" },\n\t{ \"post_id\": 35285, \"topic_id\": 9151, \"forum_id\": 8, \"post_subject\": \"Re: Key condition does not have any comparisons against key \", \"username\": \"bforeman\", \"post_text\": \"Test, please ignore...\", \"post_time\": \"2022-03-22 12:49:10\" },\n\t{ \"post_id\": 34623, \"topic_id\": 9151, \"forum_id\": 8, \"post_subject\": \"Re: Key condition does not have any comparisons against key \", \"username\": \"janet.anderson\", \"post_text\": \"The LOOKUP seemed to work. Thanks, Richard.\", \"post_time\": \"2022-01-06 15:09:16\" },\n\t{ \"post_id\": 34613, \"topic_id\": 9151, \"forum_id\": 8, \"post_subject\": \"Re: Key condition does not have any comparisons against key \", \"username\": \"rtaylor\", \"post_text\": \"Janet,\\n\\nI would first try adding the LOOKUP option to your JOIN. As it's written, the compiler sees a half-keyed JOIN, so it's not expecting to treat the rhs as a dataset but as an index. Adding LOOKUP may be enough to get around that.\\n\\nIf that doesn't work, I'd create a TABLE from the index of just the zips and their state field values then do the JOIN against the TABLE instead of the index.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2022-01-04 20:20:57\" },\n\t{ \"post_id\": 34603, \"topic_id\": 9151, \"forum_id\": 8, \"post_subject\": \"Re: Key condition does not have any comparisons against key \", \"username\": \"janet.anderson\", \"post_text\": \"Below is the index definition:\\n
\\nSHARED r_zipcityst := RECORD\\n STRING28 city;\\n STRING2 state\\n =>\\n STRING5 zip5;\\n STRING1 zipclass;\\n STRING25 county;\\n STRING28 prefctystname;\\n UNSIGNED8 __internal_fpos__;\\nEND;\\n\\nSHARED d_zipcityst := DATASET(prod_prefix + 'thor_data400::key::bipv2::qa::zipcityst', r_zipcityst, THOR);\\nEXPORT i_zipcityst := INDEX(d_zipcityst, {city, state}, {zip5, zipclass, county, prefctystname,__internal_fpos__}, prod_prefix + 'thor_data400::key::bipv2::qa::zipcityst');\\n
\\nAnd the join:\\n\\nd_trans7 := join(d_trans6, Monthly_Shop_Report_Datacube.modFiles().i_zipcityst,\\n\\tleft.order_zip = right.zip5,\\n\\ttransform(recordof(d_trans6),\\n\\tself.order_state := right.state;\\n\\tself := left;), left outer);\\n
\", \"post_time\": \"2022-01-04 19:48:32\" },\n\t{ \"post_id\": 34583, \"topic_id\": 9151, \"forum_id\": 8, \"post_subject\": \"Re: Key condition does not have any comparisons against key \", \"username\": \"rtaylor\", \"post_text\": \"Janet,\\n\\nWhat does the actual JOIN code look like? Is the index the first or second parameter to your JOIN? Do you have the LOOKUP option on it?\\n\\nI don't see any reason why it shouldn't work, so I'd need to see the code to comment any further.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2022-01-04 18:38:53\" },\n\t{ \"post_id\": 34561, \"topic_id\": 9151, \"forum_id\": 8, \"post_subject\": \"Key condition does not have any comparisons against key fiel\", \"username\": \"janet.anderson\", \"post_text\": \"I am trying to use an index that has city, state, zip (this is a file maintained by another user, but I want to leverage it). City and state are keys, but I want to use this index like a normal dataset. I am trying to do a join on zip but get the following error:\\n\\nError: Key condition (LEFT.order_zip = RIGHT.zip5) does not have any comparisons against key fields (496, 13), 4099, \\n\\nHow can I do a join on a non-keyed field only?\", \"post_time\": \"2021-12-29 17:00:21\" },\n\t{ \"post_id\": 34661, \"topic_id\": 9171, \"forum_id\": 8, \"post_subject\": \"Re: Parse a JSON field where the name starts with a number?\", \"username\": \"james.wilson\", \"post_text\": \"Hi Richard\\n\\nUnfortunately it's not my JSON so I can't change the field names \\n\\nI've created https://track.hpccsystems.com/browse/HPCC-26969 to see if it can be fixed at some point.\\n\\nThanks\\n\\nJames\", \"post_time\": \"2022-01-14 11:31:38\" },\n\t{ \"post_id\": 34651, \"topic_id\": 9171, \"forum_id\": 8, \"post_subject\": \"Re: Parse a JSON field where the name starts with a number?\", \"username\": \"rtaylor\", \"post_text\": \"James,\\n\\nYep, looks like the XPATH parser doesn't like names that start with a number.
\\n\\nMy only suggestion is to edit the source JSON text and search/replace all those names by prepending an underscore (i.e. change "2field" to "_2field") and then the parser will be happy.\\n\\nOtherwise, if this is a common practice in JSON, you need to submit a JIRA to fix the "bug" in the parser.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2022-01-13 19:50:15\" },\n\t{ \"post_id\": 34641, \"topic_id\": 9171, \"forum_id\": 8, \"post_subject\": \"Parse a JSON field where the name starts with a number?\", \"username\": \"james.wilson\", \"post_text\": \"I have some JSON that looks like this:\\n\\n
{\\n "first_field": "a value",\\n "2nd_field": "another value"\\n}
\\nIs it possible to model this in ECL? I can't have a fieldname that starts with a number\\n\\nMyLayout := RECORD\\n STRING first_field;\\n STRING 2nd_field;\\nEND;
\\n\\nBut I don't appear to be able to do it with XPATH either, this produces a compilation error too:\\n\\nMyLayout := RECORD\\n STRING first_field;\\n STRING second_field {XPATH('2nd_field')};\\nEND;
\\n\\nAny ways round this?\", \"post_time\": \"2022-01-13 10:52:10\" },\n\t{ \"post_id\": 34681, \"topic_id\": 9181, \"forum_id\": 8, \"post_subject\": \"Re: JSON arrays\", \"username\": \"james.wilson\", \"post_text\": \"Aha! A colleague has helped me out with this, it works correctly if the field has an XPATH:\\nResultData := RECORD\\n SET OF UTF8 level1_field1 {XPATH('level1_field1')};\\nEND;
\\n\\nSo TL;DR is always, always have XPATHs for all fields when parsing JSON (which will also get round annoying case issues as if there's no XPATH then HPCC will use a lowercased version of the field name).\", \"post_time\": \"2022-01-18 10:22:22\" },\n\t{ \"post_id\": 34671, \"topic_id\": 9181, \"forum_id\": 8, \"post_subject\": \"JSON arrays\", \"username\": \"james.wilson\", \"post_text\": \"I'm trying to read the following JSON:\\n{\\n "level1_field1":[\\n "level1_value1",\\n "level2_value2"\\n ]\\n}
\\n\\nThe ECL record structure I would think I need is:\\nResultData := RECORD\\n SET OF UTF8 level1_field1;\\nEND;
\\n\\nHowever if I try parsing that using FROMJSON I get my record contains a level1_field1 which in turn appears to contain a record named Item and which contains no data ([attachment=0:cqws188i]parsed_json_empty.png). Does anyone know a way round this or do I need to create a JIRA?\", \"post_time\": \"2022-01-17 17:16:04\" },\n\t{ \"post_id\": 34761, \"topic_id\": 9191, \"forum_id\": 8, \"post_subject\": \"Re: Error: assert(areTypesComparable(leftType,rightType)) fa\", \"username\": \"rtaylor\", \"post_text\": \"Janet,\\n\\nGlad you found a workaround, but you should definitely create a JIRA ticket for this issue.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2022-01-24 12:32:41\" },\n\t{ \"post_id\": 34741, \"topic_id\": 9191, \"forum_id\": 8, \"post_subject\": \"Re: Error: assert(areTypesComparable(leftType,rightType)) fa\", \"username\": \"janet.anderson\", \"post_text\": \"if(self.sum_val1>0,'Y','N') did not work (Error: SELF cannot be used to provide a value for field 'adpf_in' (605, 36), 2316, ), but if(max(group,d_shop4.shop_val1)>0,'Y','N') does. \\n\\nThere is no urgency around this since there is a workaround, I just can't seem to wrap my head around why there is an issue with the original code or your proposed code. \\n\\nFYI - this was a runtime error, not a compiler error.\", \"post_time\": \"2022-01-21 21:50:03\" },\n\t{ \"post_id\": 34731, \"topic_id\": 9191, \"forum_id\": 8, \"post_subject\": \"Re: Error: assert(areTypesComparable(leftType,rightType)) fa\", \"username\": \"rtaylor\", \"post_text\": \"Janet,\\n\\nCan you try this and see if it compiles/works?r_shop5 := record\\n d_shop4.id;\\n d_shop4.shopper_dob;\\n d_shop4.shop_date;\\n d_shop4.shopped_carrier;\\n d_shop4.shop_state;\\n d_shop4.shop_zip;\\n unsigned sum_val1 := sum(group,d_shop4.shop_val1);\\n unsigned sum_val2 := sum(group,d_shop4.shop_val2);\\n unsigned sum_val3 := sum(group,d_shop4.shop_val3);\\n unsigned sum_val4 := sum(group,d_shop4.shop_val4);\\n STRING1 val1_in := if(self.sum_val1>0,'Y','N');\\n STRING1 val2_in := if(self.sum_val2>0,'Y','N');\\n STRING1 val3_in := if(self.sum_val3>0,'Y','N');\\n STRING1 val4_in := if(self.sum_val4>0,'Y','N');\\nend;\\nd_shop5 := table(d_shop4,r_shop5,id,shopper_dob,shop_date,shopped_carrier,shop_state,shop_zip,local);
I haven't tested this, but it's the first thing I would try.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2022-01-21 19:06:49\" },\n\t{ \"post_id\": 34721, \"topic_id\": 9191, \"forum_id\": 8, \"post_subject\": \"Error: assert(areTypesComparable(leftType,rightType)) failed\", \"username\": \"janet.anderson\", \"post_text\": \"I get the following run time error: Error: assert(areTypesComparable(leftType,rightType)) failed - file: hqlcpp.cpp, line 4955 (605, 17)\\n\\nIt is directing me to the following code:\\n\\nr_shop5 := record\\n\\td_shop4.id;\\n\\td_shop4.shopper_dob;\\n\\td_shop4.shop_date;\\n\\td_shop4.shopped_carrier;\\n\\td_shop4.shop_state;\\n\\td_shop4.shop_zip;\\n\\tunsigned sum_val1 := sum(group,d_shop4.shop_val1);\\n\\tunsigned sum_val2 := sum(group,d_shop4.shop_val2);\\n\\tunsigned sum_val3 := sum(group,d_shop4.shop_val3);\\n\\tunsigned sum_val4 := sum(group,d_shop4.shop_val4);\\n\\tSTRING1 val1_in := if(sum(group,d_shop4.shop_val1)>0,'Y','N');\\n\\tSTRING1 val2_in := if(sum(group,d_shop4.shop_val2)>0,'Y','N');\\n\\tSTRING1 val3_in := if(sum(group,d_shop4.shop_val3)>0,'Y','N');\\n\\tSTRING1 val4_in := if(sum(group,d_shop4.shop_val4)>0,'Y','N');\\nend;\\nd_shop5 := table(d_shop4,r_shop5,id,shopper_dob,shop_date,shopped_carrier,shop_state,shop_zip,local);\\n
\\n\\nI can remove either the block of sums or the if(sums) and it seems to work. It just doesn't like having both in the same table. I can work around this in my code, but I don't understand why it's a problem and the error message is not helping (other than the line number).\", \"post_time\": \"2022-01-21 18:22:28\" },\n\t{ \"post_id\": 35225, \"topic_id\": 9211, \"forum_id\": 8, \"post_subject\": \"Re: To fetch ECLContains , to get stored and constants passe\", \"username\": \"rtaylor\", \"post_text\": \"This workaround was posted in the JIRA ticket discussion (copying here for the world to easily see):Harsh after you got the relevant WUIDs using STD.System.Workunit.WorkunitList() using its eclcontains search parameter. You may get the ECL code from the found WUID using SOAPCall to WsWorkunits.WUInfo. The ECL code is in WUInfoResponse.Workunit.Query.Text.
\\nHTH,\\n\\nRichard\", \"post_time\": \"2022-02-28 12:38:28\" },\n\t{ \"post_id\": 34871, \"topic_id\": 9211, \"forum_id\": 8, \"post_subject\": \"Re: To fetch ECLContains , to get stored and constants passe\", \"username\": \"harshdesai\", \"post_text\": \"Raised :- https://track.hpccsystems.com/browse/HPCC-27089\", \"post_time\": \"2022-02-03 10:47:20\" },\n\t{ \"post_id\": 34801, \"topic_id\": 9211, \"forum_id\": 8, \"post_subject\": \"Re: To fetch ECLContains , to get stored and constants passe\", \"username\": \"rtaylor\", \"post_text\": \"Harsh,\\n\\nIf I read your post correctly, it looks to me like you want to use STD.System.Workunit.WorkunitList() using its eclcontains search parameter to search for relevant WUIDs, but then you want to actually get the ECL code from the found WUID. \\n\\nI don't see that we have a Standard Library function to do that even though ECL Watch clearly has that capability. So this looks like an opportunity for you to submit a JIRA request for a new Standard Library function to do for workunits what the STD.File.GetLogicalFileAttribute() function does for logical files.\\n\\nThe best workaround I can think of would be to get the list of WUIDs using STD.System.Workunit.WorkunitList() then just manually open each one in ECL Watch to see the ECL code.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2022-01-27 14:07:30\" },\n\t{ \"post_id\": 34791, \"topic_id\": 9211, \"forum_id\": 8, \"post_subject\": \"To fetch ECLContains , to get stored and constants passed\", \"username\": \"harshdesai\", \"post_text\": \"Hi Team\\nCan you please suggest how can we fetch ECLContains values to look at code which is executed \\nWas trying to look at https://alpha_dev_thor_esp.risk.regn.ne ... Workunits/ so can use service in this to fetch the ECL ran but could . \\nCan you please suggest and alternative . As in STD.System.Workunit.WorkunitList we can used eclcontains as search attribute , I need as output/Value to make some changes ahead.\\n\\nRegards\\nHarsh Desai\", \"post_time\": \"2022-01-27 13:21:38\" },\n\t{ \"post_id\": 35255, \"topic_id\": 9305, \"forum_id\": 8, \"post_subject\": \"Re: ECL Builder tool - what happened?\", \"username\": \"rfernandez2007\", \"post_text\": \"Hi Richard,\\n\\nI'm baffled yet happy. \\nYes, I can access the repository now, using the URL you wrote.\\nSo the odds are I made a typo. Also, now I search Github for "eclbuilder" (which I also did before asking the question, with zero results, and now it founds it just like that
)\\n\\nAnyway, thank you very much for helping me get to it!!\\n\\nWarm regards\\nRicardo\", \"post_time\": \"2022-03-07 14:52:15\" },\n\t{ \"post_id\": 35245, \"topic_id\": 9305, \"forum_id\": 8, \"post_subject\": \"Re: ECL Builder tool - what happened?\", \"username\": \"rtaylor\", \"post_text\": \"Ricardo,\\n\\nYou said the github link in the video was broken, but I just went to https://github.com/infosys-hpcc/eclbuilder and found a public repository. Can you get to that page?\\n\\nI was able to download a ZIP file and unzip it, so although it appears not to be under active development, it is still there.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2022-03-07 14:31:06\" },\n\t{ \"post_id\": 35235, \"topic_id\": 9305, \"forum_id\": 8, \"post_subject\": \"ECL Builder tool - what happened?\", \"username\": \"rfernandez2007\", \"post_text\": \"Hi,\\nWhat happened to this tool? \\nhttps://www.youtube.com/watch?v=fes1RGmOjOk around time mark 1:22:20\\nIt looked promising as a starting playground for non-technical people.\\n\\nGoogle doesn't know about it.\\nThe Github link referenced in the presentation is broken, and the repository seems to have disappeared.\\nIt doesn't appear in any search on the HPCCSystems websites.\\n\\nDoes anybody know about it?\\nThank you!\", \"post_time\": \"2022-03-06 04:44:05\" },\n\t{ \"post_id\": 35275, \"topic_id\": 9315, \"forum_id\": 8, \"post_subject\": \"Re: How to call ECL BWR from Winfrom C# code\", \"username\": \"rtaylor\", \"post_text\": \"Harsh,\\n\\nYou can have your C# program execute the ecl.exe command line program, using its "run" command to launch the BWR file. This is documented in The ECL IDE and HPCC Systems Client Tools PDF, available here: https://hpccsystems.com/training/documentation/all\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2022-03-15 12:47:13\" },\n\t{ \"post_id\": 35265, \"topic_id\": 9315, \"forum_id\": 8, \"post_subject\": \"How to call ECL BWR from Winfrom C# code\", \"username\": \"harshdesai\", \"post_text\": \"Hi ,\\nCan you please suggest how to call ecl bwr ,using c# as created a gui to do multiple task so it should run BWR on button click using C# code.\\n\\nRegards\\nHarsh Desai\", \"post_time\": \"2022-03-15 05:57:40\" },\n\t{ \"post_id\": 35335, \"topic_id\": 9325, \"forum_id\": 8, \"post_subject\": \"Re: Get a const set of field names from a layout\", \"username\": \"katzda01\", \"post_text\": \"Thank you all! I appreciate your responses and you definitely gave me some useful ideas. Below are my partial solutions. I imagine I'll be able to use macro v1 and v3 when appropriate as both have pros and cons. Not sure what benefit could v2 solution have over v1, maybe performance?\\n\\nThere is one last task which I still need to work on but posting this now to keep you updated and give more information. But I'm certainly in a much better position to solve the second task myself than I was before.\\n\\n
\\nGetFields_v1(layout) := FUNCTIONMACRO\\n /* N.B:\\n + Preserves case\\n + Preserves order\\n - Generates a NON-constant expression\\n - Won't preserve STRING fields if the layout hasn't defined a default value with {DEFAULT}, e.g {DEFAULT('-')}\\n */\\n xml_txt := (STRING)toxml(ROW([], layout));\\n\\n STRING attributes := (STRING)REGEXREPLACE('<(\\\\\\\\w+)>([a-z\\\\\\\\-0-9]+)?</\\\\\\\\w+>', xml_txt, '$1,');\\n list := STD.STr.SplitWords(attributes, ',');\\n RETURN list;\\nENDMACRO;\\n\\nGetFields_v2(layout) := FUNCTIONMACRO\\n /* N.B:\\n + Preserves case\\n - Doesn't preserves order\\n - Result is NOT constant\\n - Won't preserve STRING fields if the layout hasn't defined a default value with {DEFAULT}, e.g {DEFAULT('-')}\\n */\\n xml_txt := (STRING)toxml(ROW([], layout));\\n\\n PATTERN Tagname := PATTERN('[-_.A-Za-z0-9]')+;\\n PATTERN NameEnd := PATTERN('[ />]')+;\\n PATTERN Find := '<' Tagname NameEnd;\\n\\n ds := dataset([{xml_txt}],{STRING60 line});\\n \\n P := PARSE(ds,line,Find,{STRING Tag := MATCHTEXT(Tagname)},FIRST);\\n TagNames := DEDUP(SORT(P,Tag)); \\n SetTags := SET(TagNames,Tag);\\n RETURN SetTags;\\nENDMACRO;\\n\\nGetFields_v3(layout) := FUNCTIONMACRO\\n /*N.B\\n - Won't preserve case\\n - Won't preserve STRING fields if the layout hasn't defined a default value with {DEFAULT}, e.g {DEFAULT('-')}\\n + Generated SET IS constant!\\n */\\n rec := ROW([], layout);\\n\\n #UNIQUENAME(out)\\n #UNIQUENAME(sep)\\n \\n #SET(sep,'')\\n \\n #EXPORTXML(out, rec)\\n RETURN [ \\n #FOR (out)\\n #FOR (Field)\\n #IF(%'sep'% = '')\\n #SET(sep,',')\\n #ELSE\\n %sep%\\n #END\\n %'{@label}'%\\n #END\\n #END\\n ];\\nENDMACRO;\\n\\n/* TASK 1: Get fields from Attributes_l as a SET and update all fields which have the same name\\n Assume Updates_l will have the same fields as Attributes_l and more\\n\\n Assume all fields are integers for now.\\n Case sensitivity doesnt matter in this case\\n */\\nAttributes_l := RECORD\\n INTEGER a {xpath('a')};\\n INTEGER B {xpath('B')};\\n INTEGER E {xpath('E')};\\nEND;\\nUpdates_l := RECORD \\n INTEGER a {xpath('a')};\\n INTEGER B {xpath('B')};\\n INTEGER E {xpath('E')};\\n INTEGER f {xpath('f')};\\nEND;\\n\\nattributes := ROW({0,1,2}, Attributes_l);\\nupdates := ROW({1,2,3,4}, Updates_l);\\n\\n/* TESTING */\\nfieldNames_v1 := GetFields_v1(Attributes_l);\\nfieldNames_v2 := GetFields_v2(Attributes_l);\\nfieldNames_v3 := GetFields_v3(Attributes_l);\\nOUTPUT(fieldNames_v1, NAMED('fieldNames_v1')); \\nOUTPUT(fieldNames_v2, NAMED('fieldNames_v2')); \\nOUTPUT(fieldNames_v3, NAMED('fieldNames_v3')); \\n\\nupdatedAttributes := ROW(TRANSFORM(Attributes_l,\\n\\n fieldNames := GetFields_v3(Attributes_l);\\n #DECLARE (cnt)\\n #DECLARE (len)\\n #DECLARE (field)\\n\\n #SET (cnt, 1)\\n #SET (len, COUNT(fieldNames))\\n\\n #LOOP\\n #SET (field, fieldNames[%cnt%])\\n #IF (%cnt% <= %len%)\\n #SET (cnt, %cnt% + 1)\\n SELF.%field% := attributes.%field% + updates.%field%;\\n #ELSE\\n #BREAK\\n #END\\n #END\\n SELF := attributes;\\n));\\n\\nOUTPUT(updatedAttributes, NAMED('updatedAttributes'));\\n\\n/* TASK 2: Take the attributes instance of Attributes_l and convert into name/value pairs dataset.\\n Assume Attributes_l will contain STRING fields\\n We have to preserve case \\n*/
\", \"post_time\": \"2022-03-29 12:59:12\" },\n\t{ \"post_id\": 35325, \"topic_id\": 9325, \"forum_id\": 8, \"post_subject\": \"Re: Get a const set of field names from a layout\", \"username\": \"rtaylor\", \"post_text\": \"katzda,\\n\\nYou said:I would like to get a constant set of field names, same as if I had hard-coded them like this:
so you can accomplish that like this://Tag names can contain letters, digits, hyphens, underscores, and periods\\n// and the name ends with either a space, a slash, or an angle bracket:\\nPATTERN Tagname := PATTERN('[-_.A-Za-z0-9]')+;\\nPATTERN NameEnd := PATTERN('[ />]')+;\\nPATTERN Find := '<' Tagname NameEnd;\\n\\n\\nds := dataset([ {'<Row><Name><Fname>Fred</Fname><Lname>Jones</Lname></Name>'},\\n {'<Address CSZ="Anytown, FL 12345">223 Main Street</Address>'},\\n {'<EmptyTag/><More stuff="and nonsense"/></Row>'}, \\n {'<Row><Name><Fname>John</Fname><Lname>Smith</Lname></Name>'},\\n {'<Address CSZ="Anyville, GA 54321">145 High Street</Address>'},\\n {'<EmptyTag/><More stuff="and nonsense"/></Row>'}], \\n\\t\\t\\t\\t\\t\\t\\t\\t{STRING60 line});\\n\\t\\t\\t\\t\\t\\t\\t\\t\\nP := PARSE(ds,line,Find,{STRING Tag := MATCHTEXT(Tagname)},FIRST);\\nTagNames := DEDUP(SORT(P,Tag));\\t\\t\\t\\t\\t\\t\\t\\t\\nSetTags := SET(TagNames,Tag);\\nSetTags
This example works to produce a list of all the unique tag names in the XML and preserves the case of those names. \\n\\nIf the tag order is important, you can do it like this (after the PARSE):TagNames := PROJECT(P,\\n TRANSFORM({UNSIGNED C,STRING Tag},\\n SELF.C := COUNTER,\\n SELF.Tag := LEFT.Tag));\\nUniqueTags := SORT(DEDUP(SORT(Tagnames,Tag,C),tag),C);\\t\\t\\t\\t\\t\\n\\t\\t\\nSetTags := SET(UniqueTags,Tag);\\n
\\nLet me know if there's a next step to your problem that you'd like some help with. \\n\\nHTH\\n\\nRichard\", \"post_time\": \"2022-03-28 16:00:38\" },\n\t{ \"post_id\": 35315, \"topic_id\": 9325, \"forum_id\": 8, \"post_subject\": \"Re: Get a const set of field names from a layout\", \"username\": \"Allan\", \"post_text\": \"Ah, I've just read your note on preserving case. The exported XML does not preserve case. This is a right pain that I have brought up with the core team before. Curiously, case used to be preserved as the example in the ECL ref manual (last time I looked) did preserve case.\", \"post_time\": \"2022-03-28 15:00:16\" },\n\t{ \"post_id\": 35305, \"topic_id\": 9325, \"forum_id\": 8, \"post_subject\": \"Re: Get a const set of field names from a layout\", \"username\": \"Allan\", \"post_text\": \"HI,\\nYou can export the layout of the structure into XML which you can then loop through, in a MACRO, generating any ECL you would want. Then feed that constructed ECL into the compilers token stream.\\nI have MACRO below that takes a record structure as its input and constructs a field list.\\ne.g.\\n\\nR := RECORD\\n STRING fl1;\\n INTEGER itm2;\\nEND;\\nMAC_makeFieldListFromLayout(R);\\n
\\nGenerates\\n{fl1,itm2}\\n\\n\\nEXPORT MAC_makeFieldListFromLayout(lay) := MACRO\\n #UNIQUENAME(attrib)\\n #SET(attrib,'')\\n #UNIQUENAME(sep)\\n #SET(sep,'{')\\n #UNIQUENAME(out)\\n #EXPORTXML(out, lay)\\n #FOR (out)\\n #FOR (Field)\\n #APPEND(attrib,%'sep'%+%'{@label}'%)\\n #SET(sep,',')\\n #END\\n #END\\n %'attrib'%+'}'\\nENDMACRO;\\n
\\nYou change 'APPEND' to construct the ECL suitable for your case.\\nYours\\nAllan\", \"post_time\": \"2022-03-28 14:55:11\" },\n\t{ \"post_id\": 35295, \"topic_id\": 9325, \"forum_id\": 8, \"post_subject\": \"Get a const set of field names from a layout\", \"username\": \"katzda\", \"post_text\": \"Say we have a layout like this:\\nMyLayout := RECORD\\n INTEGER a;\\n INTEGER B;\\n STRING C;\\n STRING D;\\n INTEGER e;\\n //plus another 1000 attributes\\nEND;
\\n\\nI would like to get a constant set of field names, same as if I had hard-coded them like this:\\n\\nEXPORT SET OF STRING keys := [\\n 'a',\\n 'B',\\n 'C',\\n 'D',\\n 'e'\\n];
\\n\\n1. trouble: I need the field names to preserve case\\n\\nI could do something like this\\n\\nMyLayout := RECORD\\n INTEGER a {xpath('a')};\\n INTEGER B {xpath('B')};\\n STRING C {xpath('C')};\\n STRING D {xpath('D')};\\n INTEGER e {xpath('e')};\\nEND;
\\n\\n2. trouble\\nThis removes empty STRING fields and I need to keep all fields.\\nxml_txt := (STRING)toxml(ROW([], a_l));\\n\\nI could do this:\\nMyLayout := RECORD\\n INTEGER a {xpath('a')};\\n INTEGER B {xpath('B')};\\n STRING C {xpath('C'), DEFAULT( '-' )};\\n STRING D {xpath('D'), DEFAULT( '-' )};\\n INTEGER e {xpath('e')};\\nEND;
\\n\\n3. trouble\\nIf I parse the XML like this:\\n\\nattributes_l := RECORD\\n STRING name;\\nEND;\\n\\nattributes := REGEXREPLACE('<(\\\\\\\\w+)>([a-z\\\\\\\\-0-9]+)?</\\\\\\\\w+>', xml_txt, '$1,');\\nlist := DATASET(STD.STr.SplitWords(attributes, ','), attributes_l);
\\n\\nI get the correct result but its not a constant set anymore. So I can't use static code generation like this:\\n\\n\\nupdates := //get external data \\nROW(TRANSFORM(MyLayout,\\n #DECLARE (cnt)\\n #DECLARE (len)\\n #DECLARE (attribute)\\n\\n #SET (cnt, 1)\\n #SET (len, COUNT(attributeNames))\\n\\n #LOOP\\n #SET (attribute, list[%cnt%])\\n #IF (%cnt% <= %len%)\\n #SET (cnt, %cnt% + 1)\\n SELF.%attribute% := LEFT.%attribute% + updates.%attribute%;\\n #ELSE\\n #BREAK\\n #END\\n #END\\nSELF := LEFT;\\n));
\\n\\nI just want to avoid hardcoding field names multiple times in a file or subsets of the fields in different files, esp when there is over a thousand fields.\", \"post_time\": \"2022-03-25 19:23:03\" },\n\t{ \"post_id\": 219, \"topic_id\": 79, \"forum_id\": 9, \"post_subject\": \"Re: Regarding alert message in ECL IDE after logging\", \"username\": \"bforeman\", \"post_text\": \"Hi sarveshk,\\n\\nWhen you open the ECL IDE, you will see a login window. On that window is a button marked Preferences. Click on that button, and then navigate to the Compiler tab. The Compiler entry field must point to the folder that contains the ECL Compiler (eclcc.exe). On my machine this is located at:\\nC:\\\\Program Files\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_0\\\\eclcc.exe\\n\\nYou can press the Reset button on that tab and it should be able to locate the compiler automatically.\\n\\nBest Regards,\\n\\nBob Foreman\", \"post_time\": \"2011-08-02 12:13:04\" },\n\t{ \"post_id\": 218, \"topic_id\": 79, \"forum_id\": 9, \"post_subject\": \"Regarding alert message in ECL IDE after logging\", \"username\": \"sarveshk\", \"post_text\": \"Hi All,\\n\\nI am getting problem after logging in ECL IDE it's showing "compiler have invalid path" so what will be the solution.\", \"post_time\": \"2011-08-02 04:51:19\" },\n\t{ \"post_id\": 266, \"topic_id\": 80, \"forum_id\": 9, \"post_subject\": \"Re: File Rename Bug\", \"username\": \"gsmith\", \"post_text\": \"This has now been fixed in trunk.\\n\\nGordon.\", \"post_time\": \"2011-08-11 15:58:20\" },\n\t{ \"post_id\": 223, \"topic_id\": 80, \"forum_id\": 9, \"post_subject\": \"Re: File Rename Bug\", \"username\": \"richardkchapman\", \"post_text\": \"I'll raise a big for Gordon to look at...\", \"post_time\": \"2011-08-03 07:42:45\" },\n\t{ \"post_id\": 222, \"topic_id\": 80, \"forum_id\": 9, \"post_subject\": \"File Rename Bug\", \"username\": \"mglheureux\", \"post_text\": \"Just to inform other ECL programmers and the developers of the ECL IDE of an annoying bug:\\n\\nI was working in the ECL IDE today and I was getting an error associated with an EXPORT that was ultimately rooted in the fact that I had named a file wrong (so that the file was something like "foo.ecl" but I was trying to export "fo"). With the file open in the editor, I right-clicked the misnamed file in the repository and renamed it from the context menu.\\n\\nHowever, it appears that after I pressed enter to finalise my changes, the IDE copied the contents of the misnamed file to a new file with the corrected name I provided, leaving the original (and misnamed) file both extant and open, so that I continued to receive the error.\", \"post_time\": \"2011-08-02 21:19:42\" },\n\t{ \"post_id\": 1449, \"topic_id\": 81, \"forum_id\": 9, \"post_subject\": \"Re: Has Anyone tried to run ECLIDE with wine?\", \"username\": \"gsmith\", \"post_text\": \"That is likly to be one of two things:\\n1. The IP address of the server is incorrect or the server is not accesible (Preferences/General)\\n2. The compiler settings are incorrect (Preferences/Compiler).\\n\\nIf you can send screenshots of those two screens that may help.\\n\\nGordon.\\n\\nPS If you introduce a deliberate syntax error, do get the expected error messages?\\nPPS You can get the Eclipse ECL Plugin at http://eclipse.hpccsystems.com/develop but at the moment you will need to build your own ecl compiler from the sources (not hard).\", \"post_time\": \"2012-04-07 06:46:54\" },\n\t{ \"post_id\": 1442, \"topic_id\": 81, \"forum_id\": 9, \"post_subject\": \"Re: Has Anyone tried to run ECLIDE with wine?\", \"username\": \"Ranga Swamy\", \"post_text\": \"when i compile code , no syntax errors and when i submit (run) the program i got error \\n\\n[color=#FF0000:1zm6mnx0]XML Parsing Error: no element found\\nLocation: wine:file://C:\\\\users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\wu\\\\L20120405-125904.xml\\nLine Number 1, Column 1:\\n^\\n\\nplease help me out \\n\\nThanks\\nRanga Swamy\", \"post_time\": \"2012-04-05 10:32:28\" },\n\t{ \"post_id\": 955, \"topic_id\": 81, \"forum_id\": 9, \"post_subject\": \"Re: Has Anyone tried to run ECLIDE with wine?\", \"username\": \"gsmith\", \"post_text\": \"The crash was caused because the msxml3 was not present.\\n\\n(just after this call: pWriter.CoCreateInstance(CLSID_MXXMLWriter30, NULL);)\\n\\nI would suggest starting clean:\\nDelete your .wine folder.\\nInstall msxml3 (and follow the configuration steps again).\\nThen re-install the IDE.\", \"post_time\": \"2012-01-31 09:25:42\" },\n\t{ \"post_id\": 953, \"topic_id\": 81, \"forum_id\": 9, \"post_subject\": \"Re: Has Anyone tried to run ECLIDE with wine?\", \"username\": \"Ranga Swamy\", \"post_text\": \"i attached zip file , while opening a new file ECL IDE crashed\", \"post_time\": \"2012-01-31 07:40:13\" },\n\t{ \"post_id\": 934, \"topic_id\": 81, \"forum_id\": 9, \"post_subject\": \"Re: Has Anyone tried to run ECLIDE with wine?\", \"username\": \"gsmith\", \"post_text\": \"I just retested (with success) with the following setup:\\nUbuntu 11.10 (32bit) in a VM.\\nWine 1.2 from ubuntu software center\\nmsxml3 from previous link (following steps 2+3)\\nIDE\\n\\nNote: It may be important to install msxml3 _before_ IDE\\n\\nGordon.\\n\\nPS to submit the crash report either click Save and send the zip file or press Submit and it will attempt to send the zip via email.\", \"post_time\": \"2012-01-30 14:28:18\" },\n\t{ \"post_id\": 932, \"topic_id\": 81, \"forum_id\": 9, \"post_subject\": \"Re: Has Anyone tried to run ECLIDE with wine?\", \"username\": \"Ranga Swamy\", \"post_text\": \"Hi \\n\\n same error no change and can you please tell, how to refer backup code\\nhttps://docs.google.com/document/d/1VmyS-jh2RJogNm2ic2wYB70JcvlL56AOIxIDIJ3CvYk/edit\", \"post_time\": \"2012-01-30 13:57:28\" },\n\t{ \"post_id\": 931, \"topic_id\": 81, \"forum_id\": 9, \"post_subject\": \"Re: Has Anyone tried to run ECLIDE with wine?\", \"username\": \"gsmith\", \"post_text\": \"Looks like the link to msxml3 on the MS web site is dead. You can grab a copy from here:\\nhttp://download.cnet.com/Microsoft-XML- ... 31613.html\", \"post_time\": \"2012-01-30 13:18:25\" },\n\t{ \"post_id\": 930, \"topic_id\": 81, \"forum_id\": 9, \"post_subject\": \"Re: Has Anyone tried to run ECLIDE with wine?\", \"username\": \"gsmith\", \"post_text\": \"The common crash in wine is when the user has not installed the msxml setup from steps 2 + 3.\\n\\nCan you double check your wine/msxml settings?\\n\\nAlso did you get the option to submit a crash report? If so either submit it directly or manually post the attached files and I will take a further look.\\n\\nGordon.\", \"post_time\": \"2012-01-30 10:19:13\" },\n\t{ \"post_id\": 929, \"topic_id\": 81, \"forum_id\": 9, \"post_subject\": \"ECL IDE Crashing while creating new file\", \"username\": \"Ranga Swamy\", \"post_text\": \"Hi\\n\\n[color=#FF0000:3snn2vbv]ECL IDE crashing Error:\\n\\ni connected with server \\nusename : hpccdemo\\npassword: hpccdemo\\n\\nok, now i connected ECL IDE with server\\n\\n[color=#FF0000:3snn2vbv]while opening a new file (ctrl+n), crashing the ECL IDE.\\nerror Description:\\n\\nECLIDE.exe caused ACCESS_VIOLATION in module "C:\\\\Program Files\\\\HPCC Systems\\\\HPCC\\\\ECL IDE\\\\CLIB.dll" at 0023:100417D8\\n\\n\\nos : ubuntu\\n wine version : wine-1.3.28\\nECL IDE version : hpccsystems-clienttools-community_3.4.0-1\\n\\nany one give suggestion on that\", \"post_time\": \"2012-01-30 10:14:25\" },\n\t{ \"post_id\": 915, \"topic_id\": 81, \"forum_id\": 9, \"post_subject\": \"Re: Has Anyone tried to run ECLIDE with wine?\", \"username\": \"gsmith\", \"post_text\": \"(ignoring the eclipse plugin for now as it won't help unless you are running the platform native on your machine).\\n\\nHave you followed the "wine" instructions from this thread yet?\\nIf so, just opening the SetupClientTools.msi should install the IDE on your machine.\\n\\n(the msi file is downloaded from the VM by clicking on the "ECL IDE Installer" link in the EclWatch page).\", \"post_time\": \"2012-01-27 11:14:15\" },\n\t{ \"post_id\": 914, \"topic_id\": 81, \"forum_id\": 9, \"post_subject\": \"Re: Has Anyone tried to run ECLIDE with wine?\", \"username\": \"Ranga Swamy\", \"post_text\": \"Hi GorDon\\n\\nsystem configuration:\\nos : ubuntu 11.10.\\nram size : 1.8 gb\\nprocessor: intel pentium.\\n\\nsoftwares :\\nvmware player 4.0.\\nHPCCSystemsVM-3.4.0.1 \\nhpccsystems-clienttools-community_3.4.0-1-noarch \\n\\n\\nhttp://hpccsystems.com/community/docs/running-hpcc-vm\\n\\ni am following above link, [color=#0000BF:2r94otty]up to ECL watch i am done but after that i am [color=#FF0000:2r94otty]struck while installing ECL IDE on ubuntu 11.10.\\n\\ncan you post another link which are related to ubuntu11.10 operating system.\\n\\ni searched ,but i didn't got [color=#000080:2r94otty]eclipse plugins for ECL compiler , can you please post the link.\\n\\nRanga Swamy\\nranga@trendwiseanalytics.com\\n9620720182\", \"post_time\": \"2012-01-27 10:03:34\" },\n\t{ \"post_id\": 892, \"topic_id\": 81, \"forum_id\": 9, \"post_subject\": \"Re: Has Anyone tried to run ECLIDE with wine?\", \"username\": \"gsmith\", \"post_text\": \"Just to clarify. \\n\\nAre you running the hpcc servers inside a VM on an Ubuntu host?\\n\\nYou are trying to install IDE in the Ubuntu host?\\n\\nAssuming the above is correct I would suggest:\\n1. In Ubuntu host open FireFox and goto the ECL Watch page for the VM machine.\\n2. Locate "Resources" and download the EclIDE.msi from there\\n3. Follow the "wine" instructions above in this thread.\\n\\nThere is an eclipse plugin available, but it does not include the needed client compiler and the only way to get it on Linux is to install the entire hpcc-platform (assuming you have 64bit Linux).\\n\\nOption 2:\\nIf you are running Ubuntu 11.10 64bit on the host, just install and run the hpcc-platform natively, then the eclipse plugin will just use the native ECL compiler from that installation (and it runs faster etc.)...\\n\\nGordon.\", \"post_time\": \"2012-01-25 13:37:43\" },\n\t{ \"post_id\": 889, \"topic_id\": 81, \"forum_id\": 9, \"post_subject\": \"how to install ECL IDE in ubuntu 11.10 .but i got ECL watch\", \"username\": \"Ranga Swamy\", \"post_text\": \"i installed vmware player , hpcc image and i got ecl watch but i am struging for installation of ECL IDE. can any one give suggestion how to install ECL IDE in ubuntu 11.10. \\n\\ni got error:\\n\\n[color=#FF0000:3sybk3zo]Dependency is not satisfiable: hpccsystems-platform\\n\\nvmware player 4.0, hpcc3.4.0.1,and ECL IDE for Ubuntu11.10\\n\\n\\nBy \\nRanga Swamy\\ncrswamy929@gmail.com\\n9620720182\", \"post_time\": \"2012-01-25 12:49:23\" },\n\t{ \"post_id\": 240, \"topic_id\": 81, \"forum_id\": 9, \"post_subject\": \"Re: Has Anyone tried to run ECLIDE with wine?\", \"username\": \"gsmith\", \"post_text\": \"The last time I tested, I created these notes (let me know how you get on),\\n\\nThe crash is caused by the lack of the msxml install.\\n\\nGordon.\\n\\n1.\\tWithin the Synaptic Package Manager, select “wine1.2” (this corresponds to Wine version 1.1.31), install it and its dependencies.\\n2.\\tDownload msxml3.msi from Microsoft (latest at time of writing was Service Pack 7): \\nhttp://www.microsoft.com/downloads/deta ... laylang=en\\n3.\\tInstall msxml3.msi in Wine (just double click the msi file and Wine will install it).\\n4.\\tOpen “Configure Wine” (Applications/Wine/Configure Wine):\\na.\\tSelect the Libraries tab.\\nb.\\tIn the “New override for library” drop down select “msxml3”.\\nc.\\tPress the add button.\\nd.\\tEnsure msxml3 is selected in the “Existing overrides” list box and press Edit.\\ne.\\tSelect the “Native (Windows)” option and press OK.\\nf.\\tClick OK to close the Wine Configuration window.\\n5.\\tInstall the HPCC Client Tools (again just double click the SetupClientTools.msi file and Wine will install it).\", \"post_time\": \"2011-08-08 14:59:17\" },\n\t{ \"post_id\": 229, \"topic_id\": 81, \"forum_id\": 9, \"post_subject\": \"Re: Has Anyone tried to run ECLIDE with wine?\", \"username\": \"richardkchapman\", \"post_text\": \"I did have success getting ECL IDE to run under Wine when I last tried it (several months ago), but I'm not a wine expert and I don't recall enough of the steps to give you step-by-step instructions. I don't remember there being any particular issues, though the ECL IDE setup has changed a bit since then.\\n\\nYou don't have to use ECL IDE if you prefer a different editor, though you'll lose the workunit and graph display integration. You can use eclcc to create archives and submit them for remote execution using eclplus. \\n\\nWe have an eclipse plugin at the proof-of-concept stage and will be working on polishing it up as soon as manpower is available.\", \"post_time\": \"2011-08-04 13:22:32\" },\n\t{ \"post_id\": 228, \"topic_id\": 81, \"forum_id\": 9, \"post_subject\": \"Has Anyone tried to run ECLIDE with wine?\", \"username\": \"cmastrange3\", \"post_text\": \"I'm trying to setup a dev environment on my Ubuntu 10.10 workstation. I downloaded the HPCC VM and it runs without any problems, but when I tried to download ECL IDE and run it I noticed it was only for Windows. I prefer to not have two separate dev machines so I tried installing ECL IDE with wine which worked. However, when I tried to actually run it with wine the clicking on any of the drop down menus brought up solid black pages, and trying to open up a new builder window crashed it.\\n\\nHas there been any luck getting this to work under Wine? Are there any alternatives to that I can use ECL IDE to program?\", \"post_time\": \"2011-08-04 12:55:50\" },\n\t{ \"post_id\": 298, \"topic_id\": 100, \"forum_id\": 9, \"post_subject\": \"Re: configuration and build management in ECL\", \"username\": \"aintnomyth\", \"post_text\": \"Thanks for the info and the quick response.\", \"post_time\": \"2011-09-01 16:13:28\" },\n\t{ \"post_id\": 296, \"topic_id\": 100, \"forum_id\": 9, \"post_subject\": \"Re: configuration and build management in ECL\", \"username\": \"gsmith\", \"post_text\": \"1.\\tNo. While the IDE does not affiliate itself with any source control system, the files are stored simply on disk and the user is free to use whatever source control they like.\\n2.\\tThere are many ways of managing source control through the entire lifecycle and ECL is no different, the user is free to pick which paradigm + systems they prefer.\\n3.\\tMy understanding of this question is “how do I manage getting my local ECL (which is managed by traditional source control systems as per 1 + 2) deployed to any given HPCC cluster?”. The simple answer to this is “Use the IDE – as it uses a combination of local and remote compilers to ensure that your local ECL is gathered locally, but compiled and executed remotely. \\n\\nGordon.\", \"post_time\": \"2011-08-31 15:30:46\" },\n\t{ \"post_id\": 295, \"topic_id\": 100, \"forum_id\": 9, \"post_subject\": \"configuration and build management in ECL\", \"username\": \"aintnomyth\", \"post_text\": \"Hello,\\nI have a few SDLC type questions...\\n\\n1. Does the ECL IDE integrate with VSS/TFS or any other source control systems?\\n2. Can you offer any guidance for promoting ECL code from one environment to another, like the typical dev to QA to prod? Is this a manual effort or can it be automated?\\n3. Can ECL code be deployed to (or retrieved from) remote repositories? Say I have several remote deployments and I want to publish code updates using a software update service, has this kind problem been solved in HPCC? \\n\\nThanks\", \"post_time\": \"2011-08-31 15:14:32\" },\n\t{ \"post_id\": 622, \"topic_id\": 108, \"forum_id\": 9, \"post_subject\": \"Re: Login Issue\", \"username\": \"gsmith\", \"post_text\": \"First check that you can reach the server by opening a web browser and navigating to the Ecl Watch page:\\n"http://XXX.XXX.XXX.XXX:8010/"\\n\\nAssuming you can - double check the IP address entered in the configuration page (it should be the same IP that worked in the web browser (with no ports, http, spaces etc.)\\n\\nGordon.\", \"post_time\": \"2011-11-21 15:38:38\" },\n\t{ \"post_id\": 621, \"topic_id\": 108, \"forum_id\": 9, \"post_subject\": \"Re: Login Issue\", \"username\": \"sivaji\", \"post_text\": \"I am having trouble to login during first time ECLIDE configuration. After successfully downloading VM Image and installing ECLIDE, i was getting the below error message when i entered correct IP address and demo username and password.How can i resolve the login issue? Thanks for any help that could resolve my issue.\\n1003: Unable to communicate with server.\", \"post_time\": \"2011-11-21 15:30:39\" },\n\t{ \"post_id\": 324, \"topic_id\": 108, \"forum_id\": 9, \"post_subject\": \"Re: Login Issue\", \"username\": \"gsmith\", \"post_text\": \"It looks like you are either using a new OSS IDE with an old server or an old IDE with a new OSS server (if you provide version numbers I can verify).\\n\\nThe latest version of IDE (6.0.1.6) is able to target both.\\n\\nGordon\", \"post_time\": \"2011-09-08 17:22:32\" },\n\t{ \"post_id\": 323, \"topic_id\": 108, \"forum_id\": 9, \"post_subject\": \"Login Issue\", \"username\": \"Durai\", \"post_text\": \"Hi,\\n\\nI am one of admin for a HPCC cluster. I was able to login thourgh browser and see all the properties Enterprise Service Platform. But when I try log into ECL IDE the following error was returned "SOAP 1.1 Fault: SOAP-ENV: Client(no subcode) *Validation constraint violation: tag name or namespace". I have no clue what's going on. as mentioned above my ID/password is working in the browser session. \\n\\nCan you please help?\\n\\nThanks\\nDurai\", \"post_time\": \"2011-09-08 16:30:00\" },\n\t{ \"post_id\": 355, \"topic_id\": 117, \"forum_id\": 9, \"post_subject\": \"Online tutorial on ECL IDE\", \"username\": \"HPCC Staff\", \"post_text\": \"There is a great video now available about the ECL IDE. It includes an overview and some of its powerful features. Check it out!\\n\\nhttp://www.hpccsystems.com/community/tr ... os/ecl-ide\\n\\nThank you Bob Foreman for contributing this video.\", \"post_time\": \"2011-09-22 19:21:11\" },\n\t{ \"post_id\": 594, \"topic_id\": 141, \"forum_id\": 9, \"post_subject\": \"Re: Love the new context help!\", \"username\": \"Rob Pelley\", \"post_text\": \"Brilliant!\", \"post_time\": \"2011-11-11 17:19:25\" },\n\t{ \"post_id\": 545, \"topic_id\": 141, \"forum_id\": 9, \"post_subject\": \"Re: Love the new context help!\", \"username\": \"Allan\", \"post_text\": \"I agree Fantastic!\", \"post_time\": \"2011-10-26 18:48:15\" },\n\t{ \"post_id\": 507, \"topic_id\": 141, \"forum_id\": 9, \"post_subject\": \"Love the new context help!\", \"username\": \"bforeman\", \"post_text\": \"Just a comment, I love the new context sensitive help in the ECL IDE. \\n\\nJust place your cursor on any keyword, and press the F1 key. Great productivity tool, thanks!\", \"post_time\": \"2011-10-21 12:20:01\" },\n\t{ \"post_id\": 600, \"topic_id\": 158, \"forum_id\": 9, \"post_subject\": \"Re: 'INNER' keyword not highlighted in IDE\", \"username\": \"rtaylor\", \"post_text\": \"Sorry, after writing about and teaching this stuff for 10+ years I've developed the pedant's habit of over-explaining, particularly in public forums where you never know who's reading it and what they may or may not know. \", \"post_time\": \"2011-11-14 16:32:30\" },\n\t{ \"post_id\": 599, \"topic_id\": 158, \"forum_id\": 9, \"post_subject\": \"Re: 'INNER' keyword not highlighted in IDE\", \"username\": \"Allan\", \"post_text\": \"Hi Richard,\\n\\nI did know it is the default and did say it was a very small point.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-11-14 16:24:31\" },\n\t{ \"post_id\": 598, \"topic_id\": 158, \"forum_id\": 9, \"post_subject\": \"Re: 'INNER' keyword not highlighted in IDE\", \"username\": \"rtaylor\", \"post_text\": \"Probably because it is almost never used -- the default join type is inner, therefore not specifying INNER gets you there (and I'm a fan of doing as little typing as is absolutely necessary to get the job done).\\n\\nWith that said, I'll make sure it gets added to the highlight list.\\n\\nRichard\", \"post_time\": \"2011-11-14 16:17:10\" },\n\t{ \"post_id\": 596, \"topic_id\": 158, \"forum_id\": 9, \"post_subject\": \"'INNER' keyword not highlighted in IDE\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nVery small issue, but LN might as well know.\\nThe keyword 'INNER' is not highlighted in the ECL IDE. (It works ok)\\ne.g. in code:\\n
\\nOUTPUT(JOIN(V1,V2,LEFT.Value1=RIGHT.Value1,JoinIt(LEFT,RIGHT),INNER),NAMED('FullInner')); // The default type of Join\\nOUTPUT(JOIN(V1,V2,LEFT.Value1=RIGHT.Value1,JoinIt(LEFT,RIGHT),LEFT OUTER),NAMED('LeftOuter'));\\n
\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-11-12 20:42:38\" },\n\t{ \"post_id\": 716, \"topic_id\": 178, \"forum_id\": 9, \"post_subject\": \"Re: ECL Library\", \"username\": \"rtaylor\", \"post_text\": \"Jacob,\\n\\nSorry for the inquiry over something so trivial.
\\nGlad to be of help, and it's not -that- trivial -- this "newer IDE to older environment" scenario is always possible with downloadable tools. It caused problems this time because 3.4 contained some newer standard library functions that were not present in the 3.2 version (hence the errors).\\n\\nRichard\", \"post_time\": \"2011-12-05 20:54:13\" },\n\t{ \"post_id\": 714, \"topic_id\": 178, \"forum_id\": 9, \"post_subject\": \"Re: ECL Library\", \"username\": \"jkatzen\", \"post_text\": \"I got similar errors and the reason was I was using a 3.4 version IDE in a 3.2 version environment. Upgrading the environment solved the problem for me.
\\n\\nExcellent - this turned out to be my problem. I had downloaded the HPCC VM on 11/22 and the IDE on 12/2. Turns out updates were pushed for both on 11/29. Whoops.\\n\\nSorry for the inquiry over something so trivial.\\n\\n- Jacob\", \"post_time\": \"2011-12-05 19:57:42\" },\n\t{ \"post_id\": 713, \"topic_id\": 178, \"forum_id\": 9, \"post_subject\": \"Re: ECL Library\", \"username\": \"rtaylor\", \"post_text\": \"Errors are as follows:\\nError: Unknown identifier "StringToTitleCase" (164, 66 - C:\\\\Program Files\\\\HPCC
\\n\\nI got similar errors and the reason was I was using a 3.4 version IDE in a 3.2 version environment. Upgrading the environment solved the problem for me.\", \"post_time\": \"2011-12-05 19:02:41\" },\n\t{ \"post_id\": 712, \"topic_id\": 178, \"forum_id\": 9, \"post_subject\": \"Re: ECL Library\", \"username\": \"jkatzen\", \"post_text\": \"I have confirmed that downgrading to ECL IDE version 6.0.1.5.682.1 from 6.0.4.3.682 has resolved my issue.\\n\\nI had set and reset my path variable to have the C:\\\\...\\\\ver_3_0 included prior to posting, but not the plugins - I will check that again later and update.\\n\\nThanks,\\nJacob\", \"post_time\": \"2011-12-05 17:17:52\" },\n\t{ \"post_id\": 711, \"topic_id\": 178, \"forum_id\": 9, \"post_subject\": \"Re: ECL Library\", \"username\": \"gsmith\", \"post_text\": \"Does your path include:\\nC:\\\\Program Files (x86)\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_0\\\\\\nand\\nC:\\\\Program Files (x86)\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_0\\\\plugins\", \"post_time\": \"2011-12-05 17:09:02\" },\n\t{ \"post_id\": 710, \"topic_id\": 178, \"forum_id\": 9, \"post_subject\": \"Re: ECL Library\", \"username\": \"jkatzen\", \"post_text\": \"set HPCCBIN returns HPCCBIN=C:\\\\Program Files (x86)\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_0\\\\ as expected.\\n\\nGoing to try an older version of the IDE.\\n\\n- Jacob\", \"post_time\": \"2011-12-05 17:06:42\" },\n\t{ \"post_id\": 709, \"topic_id\": 178, \"forum_id\": 9, \"post_subject\": \"Re: ECL Library\", \"username\": \"gsmith\", \"post_text\": \"Sounds like an environment variable didn't get set as part of the install, can you check that:\\n\\nHPCCBIN=C:\\\\Program Files (x86)\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_0\\\\\\n\\n(cmd prompt + type "set HPCCBIN").\\n\\nIf not can you set it and restart the IDE and see if that helps?\\n\\n(Right click "Computer"->Properties->Advanced System Settings->Advanced->Environment Variables) \\n\\nThx,\\n\\nGordon.\", \"post_time\": \"2011-12-05 16:57:24\" },\n\t{ \"post_id\": 707, \"topic_id\": 178, \"forum_id\": 9, \"post_subject\": \"Re: ECL Library\", \"username\": \"bforeman\", \"post_text\": \"Hi Jacob,\\n\\nI just fired up my HPCC VM and the following code executes just fine in a Builder window:\\n\\nIMPORT STD;\\nhello := 'hello world';\\nOUTPUT(std.str.touppercase(hello));
\\n\\nMy ECL IDE is 6.0.1.5.682.1\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2011-12-05 16:39:00\" },\n\t{ \"post_id\": 706, \"topic_id\": 178, \"forum_id\": 9, \"post_subject\": \"Re: ECL Library\", \"username\": \"jkatzen\", \"post_text\": \"In a builder window with this code:\\nIMPORT Std;\\n\\nhello := 'Hello World';\\nOUTPUT(std.str.ToUpperCase(hello));
\\n\\nErrors are as follows:\\nError: Unknown identifier "StringToTitleCase" (164, 66 - C:\\\\Program Files\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_0\\\\ecllibrary\\\\std\\\\Str.ecl)\\nError: Incompatible types: can not assign Integer to String (164, 26 - C:\\\\Program Files\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_0\\\\ecllibrary\\\\std\\\\Str.ecl)\\nError: Unknown identifier "CountWords" (248, 115 - C:\\\\Program Files\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_0\\\\ecllibrary\\\\std\\\\Str.ecl)\\nError: Unknown identifier "SplitWords" (259, 119 - C:\\\\Program Files\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_0\\\\ecllibrary\\\\std\\\\Str.ecl)\\nError: Incompatible types: can not assign Integer to Set of String (259, 32 - C:\\\\Program Files\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_0\\\\ecllibrary\\\\std\\\\Str.ecl)\\nError: Unknown identifier "CombineWords" (270, 94 - C:\\\\Program Files\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_0\\\\ecllibrary\\\\std\\\\Str.ecl)\\nError: Incompatible types: can not assign Integer to String (270, 27 - C:\\\\Program Files\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_0\\\\ecllibrary\\\\std\\\\Str.ecl)\\nError: Unknown identifier "str" (4, 12 - C:\\\\Users\\\\jkatzen\\\\AppData\\\\Local\\\\Temp\\\\TFR7B66.tmp)\\n\\nAlso tried aliasing it with the std.str as XXXStrXXX and it still threw me the same errors:\\nError: Unknown identifier "StringToTitleCase" (164, 66 - C:\\\\Program Files\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_0\\\\ecllibrary\\\\std\\\\Str.ecl)\\nError: Incompatible types: can not assign Integer to String (164, 26 - C:\\\\Program Files\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_0\\\\ecllibrary\\\\std\\\\Str.ecl)\\nError: Unknown identifier "CountWords" (248, 115 - C:\\\\Program Files\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_0\\\\ecllibrary\\\\std\\\\Str.ecl)\\nError: Unknown identifier "SplitWords" (259, 119 - C:\\\\Program Files\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_0\\\\ecllibrary\\\\std\\\\Str.ecl)\\nError: Incompatible types: can not assign Integer to Set of String (259, 32 - C:\\\\Program Files\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_0\\\\ecllibrary\\\\std\\\\Str.ecl)\\nError: Unknown identifier "CombineWords" (270, 94 - C:\\\\Program Files\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_0\\\\ecllibrary\\\\std\\\\Str.ecl)\\nError: Incompatible types: can not assign Integer to String (270, 27 - C:\\\\Program Files\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_0\\\\ecllibrary\\\\std\\\\Str.ecl)\\nError: Object 'std' does not have a field named 'str' (1, 8 - C:\\\\Users\\\\jkatzen\\\\AppData\\\\Local\\\\Temp\\\\TFR555C.tmp)\\nError: Unknown identifier "XXXStrXXX" (4, 8 - C:\\\\Users\\\\jkatzen\\\\AppData\\\\Local\\\\Temp\\\\TFR555C.tmp)\\n\\nNotes: I have checked the paths and confirmed that Str.ecl is indeed located at C:\\\\Program Files\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_0\\\\ecllibrary\\\\std\\\\Str.ecl\", \"post_time\": \"2011-12-05 14:42:20\" },\n\t{ \"post_id\": 705, \"topic_id\": 178, \"forum_id\": 9, \"post_subject\": \"Re: ECL Library\", \"username\": \"gsmith\", \"post_text\": \"[quote="jkatzen":42iopwji]Hello -\\n\\nI am having the same problems as the OP of this topic, however uninstalling and re-installing the IDE has not fixed my problem with importing from the std library (using string functions). Running version 6.0.4.3.682 of the IDE with compiler 3.0.0 community_3.4.0-1. I've already checked my compiler settings in the preferences tab as well.\\n\\nAlso tested this on another system and it still did not work.\\n\\nNote: I am running HPCC through VMware.\\n\\nCan you post some of the errors?\\n\\nAlso can you do a quick test with:\\n\\nimport std.str as XXXStrXXX and see if that makes any difference?\\n\\nThx,\\n\\nGordon.\", \"post_time\": \"2011-12-05 14:29:46\" },\n\t{ \"post_id\": 694, \"topic_id\": 178, \"forum_id\": 9, \"post_subject\": \"Re: ECL Library\", \"username\": \"jkatzen\", \"post_text\": \"Hello -\\n\\nI am having the same problems as the OP of this topic, however uninstalling and re-installing the IDE has not fixed my problem with importing from the std library (using string functions). Running version 6.0.4.3.682 of the IDE with compiler 3.0.0 community_3.4.0-1. I've already checked my compiler settings in the preferences tab as well.\\n\\nAlso tested this on another system and it still did not work.\\n\\nNote: I am running HPCC through VMware.\", \"post_time\": \"2011-12-02 20:49:11\" },\n\t{ \"post_id\": 675, \"topic_id\": 178, \"forum_id\": 9, \"post_subject\": \"Re: ECL Library\", \"username\": \"gsmith\", \"post_text\": \"[quote="chargil":13lnxefb]I uninstalled the IDE and re-installed and that seemed to have fixed my problems.\\n\\nIt may have been the default folders has not been set correctly, next time double check login->prefrences->compiler and see if the folders are correct (press reset if needed).\\n\\nGordon\", \"post_time\": \"2011-11-30 13:56:05\" },\n\t{ \"post_id\": 674, \"topic_id\": 178, \"forum_id\": 9, \"post_subject\": \"Re: ECL Library\", \"username\": \"chargil\", \"post_text\": \"I uninstalled the IDE and re-installed and that seemed to have fixed my problems.\", \"post_time\": \"2011-11-30 13:53:45\" },\n\t{ \"post_id\": 673, \"topic_id\": 178, \"forum_id\": 9, \"post_subject\": \"ECL Library\", \"username\": \"chargil\", \"post_text\": \"I just downloaded and installed the new version of the IDE and I'm having problems with importing from the ecllibrary. \\n\\n\\nA quick example: I try to import\\n\\nIMPORT Std.Str AS Str
\\n\\nChecking syntax brings up no errors. However, when submitting workunits a string of errors would be raised because "std" could not be found.\", \"post_time\": \"2011-11-30 13:34:12\" },\n\t{ \"post_id\": 769, \"topic_id\": 198, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE within Centos\", \"username\": \"gsmith\", \"post_text\": \"The only way to get the IDE to run in Linux is via wine (there are some instructions in the documentation, which I don't have to hand) - some instuctions can be found in this thread: viewtopic.php?f=9&t=81\\n\\nShould be enough to get you going.\\n\\nGordon.\\n\\nPS The Eclipse ECL Plugin is on the verge of being released which should help the Linux/Mac client users...\", \"post_time\": \"2011-12-29 20:14:49\" },\n\t{ \"post_id\": 768, \"topic_id\": 198, \"forum_id\": 9, \"post_subject\": \"ECL IDE within Centos\", \"username\": \"thildebrant\", \"post_text\": \"Hello,\\nI tried to install the IDE within centos, using http://hpccsystems.com/download/free-co ... ient-tools, but the rpm only unpacks \\n[root@localhost Desktop]# rpm -qlp hpccsystems-clienttools-community_3.4.0-1-noarch.rpm\\n/opt\\n/opt/HPCCSystems\\n/opt/HPCCSystems/componentfiles\\n/opt/HPCCSystems/componentfiles/files\\n/opt/HPCCSystems/componentfiles/files/downloads\\n/opt/HPCCSystems/componentfiles/files/downloads/clienttools\\n/opt/HPCCSystems/componentfiles/files/downloads/clienttools/SetupECLIDE.msi\\n/opt/HPCCSystems/componentfiles/files/downloads/clienttools/description.xml\\n\\nAm I missing some method to install the IDE under linux?\\n\\nThanks,\\nTodd\", \"post_time\": \"2011-12-29 18:52:40\" },\n\t{ \"post_id\": 797, \"topic_id\": 205, \"forum_id\": 9, \"post_subject\": \"Re: how to Use OUTPUT (for Appending)\", \"username\": \"rtaylor\", \"post_text\": \"Look at he section of the OUTPUT documentation that discusses the NAMED option, which also allows you to use the EXTEND option as in this example:\\n\\n//a NAMED, EXTEND example:\\nerrMsgRec := RECORD\\n UNSIGNED4 code;\\n STRING text;\\nEND;\\nmakeErrMsg(UNSIGNED4 _code,STRING _text) := DATASET([{_code, _text}], errMsgRec);\\n\\nrptErrMsg(UNSIGNED4 _code,STRING _text) := \\n OUTPUT(makeErrMsg(_code,_text),NAMED('ErrorResult'),EXTEND);\\n\\nOUTPUT(DATASET([{100, 'Failed'}],errMsgRec),NAMED('ErrorResult'),EXTEND);\\n //Explicit syntax.\\n\\n//Something else creates the dataset\\nOUTPUT(makeErrMsg(101, 'Failed again'),NAMED('ErrorResult'),EXTEND);
\", \"post_time\": \"2012-01-06 21:54:55\" },\n\t{ \"post_id\": 789, \"topic_id\": 205, \"forum_id\": 9, \"post_subject\": \"Re: how to Use OUTPUT (for Appending)\", \"username\": \"gsmith\", \"post_text\": \"This question will be better answered in the "ECL" Forum (this forum is more for IDE queries).\\n\\nWhile I don't think there is anyway to append data to an existing file on disk, you can use the "+" operator with tables + datasets:\\nd1 := dataset(...orig results...)\\n\\nd2 := dataset(...new results...)\\n\\nd3 := d1 + d2 //record struct will have to match!\\n\\noutput(d3);
\\n\\nHTH,\\n\\nGordon.\", \"post_time\": \"2012-01-06 14:21:07\" },\n\t{ \"post_id\": 788, \"topic_id\": 205, \"forum_id\": 9, \"post_subject\": \"how to Use OUTPUT (for Appending)\", \"username\": \"Apurv.Khare\", \"post_text\": \"Hi, \\nWe are facing problem while using OUTPUT, we want to use some keyword in OUTPUT so that we can append the new result in alresdy exsisting dataset.\\nIs there any Keyword for Appending in OUTPUT.\", \"post_time\": \"2012-01-06 13:44:20\" },\n\t{ \"post_id\": 870, \"topic_id\": 206, \"forum_id\": 9, \"post_subject\": \"Re: Problems using IDE as default target for *.ecl files in \", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nFYI I've created a ecl.syn for Textpad.\\n\\nPosted it to the textpad support team, don't know when they will upload it to their site.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-01-22 19:49:18\" },\n\t{ \"post_id\": 819, \"topic_id\": 206, \"forum_id\": 9, \"post_subject\": \"Re: Problems using IDE as default target for *.ecl files in \", \"username\": \"Allan\", \"post_text\": \"Thanks\\n\\nGordon\", \"post_time\": \"2012-01-12 19:46:02\" },\n\t{ \"post_id\": 817, \"topic_id\": 206, \"forum_id\": 9, \"post_subject\": \"Re: Problems using IDE as default target for *.ecl files in\", \"username\": \"gsmith\", \"post_text\": \"You can grab a windows version of the ECL Notepad at:\\nhttps://skydrive.live.com/redir.aspx?ci ... KXp5YxZsdE\\n\\nSources and build instructions for Win/Linux will be forthcomming.\\n\\nGordon.\", \"post_time\": \"2012-01-12 14:09:14\" },\n\t{ \"post_id\": 806, \"topic_id\": 206, \"forum_id\": 9, \"post_subject\": \"Re: Problems using IDE as default target for *.ecl files in \", \"username\": \"Allan\", \"post_text\": \"Gordon,\\nI don't know how to (P)rivate (M)essage you.\", \"post_time\": \"2012-01-11 13:50:49\" },\n\t{ \"post_id\": 805, \"topic_id\": 206, \"forum_id\": 9, \"post_subject\": \"Re: Problems using IDE as default target for *.ecl files in \", \"username\": \"gsmith\", \"post_text\": \"I was unable to PM you (can you PM me?). I have contacted site admin to see what is up.\\n\\nGordon.\", \"post_time\": \"2012-01-11 13:42:02\" },\n\t{ \"post_id\": 804, \"topic_id\": 206, \"forum_id\": 9, \"post_subject\": \"Re: Problems using IDE as default target for *.ecl files in\", \"username\": \"Allan\", \"post_text\": \"Hi Gordon,\\n\\nNice to know the problem is being addressed.\\n\\nIn the mean time I would very much like your ecl sensitive notepad.\\n\\nIt would be good if someone wrote a syntax definition file for 'TextPad'.\\nI would, but wife/family are higher priorities \", \"post_time\": \"2012-01-11 13:22:32\" },\n\t{ \"post_id\": 803, \"topic_id\": 206, \"forum_id\": 9, \"post_subject\": \"Re: Problems using IDE as default target for *.ecl files in \", \"username\": \"gsmith\", \"post_text\": \"Thanks for the report.\\n \\nSome background may be helpful in understanding why this doesn't work today (I will open a GitHub issue specifically for this):\\n\\nUp until recently ECL files where never stored local, they lived on the platform server and the IDE would load and save them via SOAP calls.\\n\\nBecause of this, the paradigm was such that the user would have to nominate and log into a particular server prior to having access to the IDE features.\\n\\nThe behaviour you are seeing is a direct result of the above (still undesirable I know).\\n\\nI do have an “ECL” Notepad (based on SciTE) knocking around which may be of interest.\\n\\nGordon.\", \"post_time\": \"2012-01-11 12:47:05\" },\n\t{ \"post_id\": 802, \"topic_id\": 206, \"forum_id\": 9, \"post_subject\": \"Problems using IDE as default target for *.ecl files in W7\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI'm using Windows 7.\\nI have the ECL IDE setup as the default program to use for files with extension *.ecl\\n\\nThere are two problems I encounter when attempting to, say open an ecl file attached to some mail:\\n
\\n1. If the ECL IDE is already running it errors saying that only one instance of the ECl IDE is allowed to run at the same time.\\n2. If the ECL IDE is not already running, the IDE loads and runs but does not show the file I want to look at!!!!\\n
\\n\\nI regularly want to look at ecl attached to e-mails so this is a particular irritant!\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-01-11 12:01:34\" },\n\t{ \"post_id\": 3157, \"topic_id\": 207, \"forum_id\": 9, \"post_subject\": \"Re: graph control stopped working\", \"username\": \"Allan\", \"post_text\": \"Sorted!\\n\\nFound the 32-bit version of Graph control dll on this HPCC site, installed, then run that regsvr32 and Graph now works at 3.10 (also on firefox)\\n\\nYours\\n\\nAllan\", \"post_time\": \"2013-01-23 17:13:14\" },\n\t{ \"post_id\": 3156, \"topic_id\": 207, \"forum_id\": 9, \"post_subject\": \"Re: graph control stopped working\", \"username\": \"Allan\", \"post_text\": \"I also cannot see WU graphs now I've upgraded to 3.10 IDE + client tools.\\n(I'm using Windows 7 64 bit)\\n\\nAll I have installed is:\\n\\n Directory of C:\\\\Program Files (x86)\\\\HPCC Systems\\\\HPCC\\n\\n22/01/2013 11:45 <DIR> .\\n22/01/2013 11:45 <DIR> ..\\n16/04/2012 13:30 <DIR> bin\\n22/01/2013 11:45 <DIR> ECL IDE\\n22/01/2013 11:45 <DIR> Graph Control x64\\n22/01/2013 11:45 <DIR> ver_3_6\\n
\\nAnd under 'Graph Control x64'\\n\\n Directory of C:\\\\Program Files (x86)\\\\HPCC Systems\\\\HPCC\\\\Graph Control x64\\n\\n22/01/2013 11:45 <DIR> .\\n22/01/2013 11:45 <DIR> ..\\n19/11/2012 12:01 4,105,728 npHPCCSystemsGraphViewControl.dll\\n
\\nI've attempted using the regsvr32 mentioned above to no avail. (I expected not)\\n\\nbeing a work machine I'm constrained in the browsers I can use, I have IE8 and Firefox ?\\n\\nAny help would be greatly appreciated.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2013-01-23 17:01:09\" },\n\t{ \"post_id\": 814, \"topic_id\": 207, \"forum_id\": 9, \"post_subject\": \"Re: graph control stopped working\", \"username\": \"gsmith\", \"post_text\": \"I would guess version 6 has been partially uninstalled? Can you reinstall it?\", \"post_time\": \"2012-01-11 17:32:02\" },\n\t{ \"post_id\": 813, \"topic_id\": 207, \"forum_id\": 9, \"post_subject\": \"Re: graph control stopped working\", \"username\": \"aintnomyth\", \"post_text\": \"expat.dll\\ngraphdb.dll\\ngraphlayout.dll\\ngraphrender.dll\\nnpHPCCSystemsGraphViewControl.dll\\n\\nWith version 5 installed I have all of the files listed. With version 6 I only have npHPCCSystemsGraphViewControl.dll. \\n\\nI think version 6 isn't finishing the install process. I double click the .msi and a progress bar appears but nothing after that. I didn't notice that until I installed 5 and 6 back to back.\", \"post_time\": \"2012-01-11 16:55:45\" },\n\t{ \"post_id\": 812, \"topic_id\": 207, \"forum_id\": 9, \"post_subject\": \"Re: graph control stopped working\", \"username\": \"gsmith\", \"post_text\": \"You want the v6 one. \\n\\nCan you double check the content of the folder in question to see if the DLLs are actually there?\\nexpat.dll\\ngraphdb.dll\\ngraphlayout.dll\\ngraphrender.dll\\nnpHPCCSystemsGraphViewControl.dll\\n\\nGordon.\", \"post_time\": \"2012-01-11 16:26:56\" },\n\t{ \"post_id\": 811, \"topic_id\": 207, \"forum_id\": 9, \"post_subject\": \"Re: graph control stopped working\", \"username\": \"aintnomyth\", \"post_text\": \"Thanks again, I tried registering from the command prompt (launching cmd in run-as-administrator mode). \\n\\nThe regsvr32 command works when I have the prior graph control installed (v 5.x) but the graphs still crash after they briefly display.\\n\\nThe regsvr32 command fails when I have the current graph control installed (v 6.x), I get a windows popup that says:\\n "specified module could not be found"\\n\\n\\nI'm on windows 7 64 bit if it matters, but the graphs were displaying wonderfully until they suddenly started crashing. \", \"post_time\": \"2012-01-11 16:03:00\" },\n\t{ \"post_id\": 810, \"topic_id\": 207, \"forum_id\": 9, \"post_subject\": \"Re: graph control stopped working\", \"username\": \"gsmith\", \"post_text\": \"On my machine the "manual" way to do the same is (from a command prompt with admin rights):\\n\\ncd "C:\\\\Program Files (x86)\\\\HPCC Systems\\\\HPCC\\\\Graph Control"\\nregsvr32 npHPCCSystemsGraphViewControl.dll\\n\\nGordon.\\n\\nPS Again, the above folder should be the latest IDE on your machine.\", \"post_time\": \"2012-01-11 15:08:12\" },\n\t{ \"post_id\": 809, \"topic_id\": 207, \"forum_id\": 9, \"post_subject\": \"Re: graph control stopped working\", \"username\": \"aintnomyth\", \"post_text\": \"Thanks for the reply. I uninstalled the current graph control from control panel, closed my browsers and IDE, but now the "Install Version" is grayed out so I can't click it.\", \"post_time\": \"2012-01-11 14:58:02\" },\n\t{ \"post_id\": 808, \"topic_id\": 207, \"forum_id\": 9, \"post_subject\": \"Re: graph control stopped working\", \"username\": \"gsmith\", \"post_text\": \"This sometimes happen if you install an older IDE (or QueryBuilder). In general it can be resolved by:\\n\\n1. Close all instances of IDE/QB\\n2. Open newest IDE\\n3. Goto Prefs/Other and click the "Install X.Y.Z" option (just noticed that they are missing a title/group "Graph Control").\\n\\nGordon.\", \"post_time\": \"2012-01-11 14:26:34\" },\n\t{ \"post_id\": 807, \"topic_id\": 207, \"forum_id\": 9, \"post_subject\": \"graph control stopped working\", \"username\": \"aintnomyth\", \"post_text\": \"Hello,\\nMy graph visualizer was working fine and suddenly stopped working. I've tried uninstalling and re-installing but it's still not working. \\n\\nIt actually loads for a split second and I can even see the graph but then my browser reports:\\n"The HPCCSystemsGraphViewControl plugin has crashed."\\n\\nFollowed by a dialog box:\\n"Graph Control Needs to be installed to visualize activity graphs."\\n\\nI've tried uninstalling and reinstalling, then I tried a repair, then I tried installing a previous version. Any ideas for how to get it back?\\n\\nThanks!\", \"post_time\": \"2012-01-11 14:19:35\" },\n\t{ \"post_id\": 833, \"topic_id\": 211, \"forum_id\": 9, \"post_subject\": \"Re: Problem with 'Save As'.\", \"username\": \"gsmith\", \"post_text\": \"Resolved for future build.\", \"post_time\": \"2012-01-16 13:11:16\" },\n\t{ \"post_id\": 832, \"topic_id\": 211, \"forum_id\": 9, \"post_subject\": \"Re: Problem with 'Save As'.\", \"username\": \"Allan\", \"post_text\": \"Hi Gordon,\\n\\nFYI\\n\\nDoes NOT seem to be a problem with the enterprise edition 5.8.2.8.682.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-01-16 11:41:00\" },\n\t{ \"post_id\": 831, \"topic_id\": 211, \"forum_id\": 9, \"post_subject\": \"Re: Problem with 'Save As'.\", \"username\": \"gsmith\", \"post_text\": \"Thanks for the detailed steps, I will investigate ASAP.\\n\\nGordon.\\n\\nPS Issue raised: https://github.com/hpcc-systems/eclide/issues/54\", \"post_time\": \"2012-01-16 10:28:44\" },\n\t{ \"post_id\": 830, \"topic_id\": 211, \"forum_id\": 9, \"post_subject\": \"Problem with 'Save As'.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nThis may be related to the 'File Rename Bug' posted below by mglheureux.\\nI'm using IDE 6.0.2.4.682.1\\n\\nOn saving a file using The 'Save As' button the filename in the tab above the file changes to the new file. However the file you are editing is still the old file!\\n\\nSteps to reproduce (all done from within the ECL IDE):\\n
\\nInsert file a.ecl into a folder // Some default ecl is automatically inserted.\\n'Save As' b.ecl // Tab over file now refers to b.ecl\\nchange some text.\\n'Save' b.ecl\\nclose b.ecl\\nReopen a.ecl // Find it holds the changes you thought to made to b.ecl!\\n
\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-01-16 09:47:35\" },\n\t{ \"post_id\": 865, \"topic_id\": 218, \"forum_id\": 9, \"post_subject\": \"Re: List of keywords that are not highlighted.\", \"username\": \"gsmith\", \"post_text\": \"Thanks for the info.\\n\\nThis list is generated (periodically) from the documentation, I will give them a heads up that we need a refresh! (The beginc++ endc++ needs a tweak to the editor lexer to get working).\\n\\nGordon.\", \"post_time\": \"2012-01-22 08:16:08\" },\n\t{ \"post_id\": 860, \"topic_id\": 218, \"forum_id\": 9, \"post_subject\": \"List of keywords that are not highlighted.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nBelow are a list of keywords that are not highlighted in my version of the IDE (6.0.2.4.682.1)\\nI created this list while producing a syntax definition file the the popular 'TextPad' editor.\\n\\nAFTER\\nAGGREGATE\\nBEGINC++\\nCLUSTER\\nCOMPRESSED\\nDYNAMIC\\nENUM \\nFROMXML\\nFUNCTIONMACRO\\nGROUPED\\nINDEPENDENT\\nINNER\\nINTERNAL\\nLITERAL\\nMATCHROW\\nMOFN\\nNAMESPACE\\nNOCASE\\nONFAIL \\nPERSIST\\nPRIORITY\\nRECOVERY\\nRETRY \\nROWS\\nSINGLE \\nSTABLE\\nSUCCESS\\nTIMEOUT \\nUNORDERED\\nUNSTABLE\\nWHEN\\nWIDTH\\nXMLPROJECT\\n
\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-01-21 15:27:34\" },\n\t{ \"post_id\": 1011, \"topic_id\": 219, \"forum_id\": 9, \"post_subject\": \"Re: A ECL syntax definition file for the 'TextPad' editor.\", \"username\": \"Allan\", \"post_text\": \"Thanks everyone for checking it.\\n\\nI'll get the corrections uploaded onto the TextPad site.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-02-05 19:52:52\" },\n\t{ \"post_id\": 900, \"topic_id\": 219, \"forum_id\": 9, \"post_subject\": \"Re: A ECL syntax definition file for the 'TextPad' editor.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nTHANK YOU!!\\n\\nWorks great on my TextPad. \\n\\nI did add a few items that were missing, so I -think- I've now got it all current. If not, I'll add the missing stuff as I find it.\\n\\nRichard\", \"post_time\": \"2012-01-25 20:56:35\" },\n\t{ \"post_id\": 893, \"topic_id\": 219, \"forum_id\": 9, \"post_subject\": \"Re: A ECL syntax definition file for the 'TextPad' editor.\", \"username\": \"david.wheelock@lexisnexis.com\", \"post_text\": \"Allan,\\n\\nIt appears the .syn file you have submitted is missing a few tokens. Specifically, the ones I have identified so far are listed below. I have changed mine, but I wanted to make sure you were aware so you could update your original if desired.\\n\\n#ELSE\\n#END\\n#ENDMACRO\\n\\n- David W.\", \"post_time\": \"2012-01-25 15:19:58\" },\n\t{ \"post_id\": 891, \"topic_id\": 219, \"forum_id\": 9, \"post_subject\": \"Re: A ECL syntax definition file for the 'TextPad' editor.\", \"username\": \"bforeman\", \"post_text\": \"Hi Allan,\\n\\nNot sure yet, but someone on our documentation team with review it and then we will make that decision. Thanks again for the contribution!\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-01-25 13:22:03\" },\n\t{ \"post_id\": 887, \"topic_id\": 219, \"forum_id\": 9, \"post_subject\": \"Re: A ECL syntax definition file for the 'TextPad' editor.\", \"username\": \"Allan\", \"post_text\": \"TextPad support have put the file on their Web site.\\n\\nhttp://www.textpad.com/add-ons/syna2g.html\\n\\nOne question: I've written it, who's going to maintain it?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-01-25 10:10:56\" },\n\t{ \"post_id\": 883, \"topic_id\": 219, \"forum_id\": 9, \"post_subject\": \"Re: A ECL syntax definition file for the 'TextPad' editor.\", \"username\": \"bforeman\", \"post_text\": \"Hi Allan,\\n\\nThanks for the work on this. Can you simply zip and attach the syn file?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-01-24 21:10:38\" },\n\t{ \"post_id\": 861, \"topic_id\": 219, \"forum_id\": 9, \"post_subject\": \"A ECL syntax definition file for the 'TextPad' editor.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nFor the dedicated band of people (like me) who will only ever use the TextPad editor, I attach a syntax definition file.\\n\\nPlease inform me of any issues and if someone could tell me how to recognise\\nUNICODE[locale][n] I would be most grateful.\\n\\nYours\\n\\nAllan\\n\\nOh - the extension 'syn' is not allowed!\\nSo here it is inline:\\n\\nOh - Now that blows the max size allowed for this text box, you'll just have to wait till TextPad support upload it to their website.\", \"post_time\": \"2012-01-21 15:37:43\" },\n\t{ \"post_id\": 882, \"topic_id\": 223, \"forum_id\": 9, \"post_subject\": \"Re: Curious behaviour showing results From the ECLWatch pane\", \"username\": \"bforeman\", \"post_text\": \"Hi Allan,\\n\\nForwarded to our development team for review. Thank You!\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-01-24 21:08:47\" },\n\t{ \"post_id\": 868, \"topic_id\": 223, \"forum_id\": 9, \"post_subject\": \"Curious behaviour showing results From the ECLWatch pane IDE\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nSay I have two result sets from one workunit, as in the code below.\\n\\nAkaRec := {STRING20 forename,STRING20 surname };\\noutputRec := RECORD\\n UNSIGNED id,\\n DATASET(AkaRec) kids;\\nEND;\\n\\ninputRec := {UNSIGNED id,STRING20 forename,STRING20 surname };\\nInPeople := DATASET ([ {1,'Allan','Wrobel'},\\n {1,'Allan','Smith'},\\n\\t\\t {2,'Anna','Smith'},\\n {2,'Keith','Harrison'}],inputRec);\\noutputRec makeFatRecord(inputRec L) := TRANSFORM\\n SELF.id := L.id;\\n SELF.kids := DATASET([{L.forename,L.surname}],AkaRec);\\nEND;\\nFatIn := PROJECT(InPeople,makeFatRecord(LEFT));\\noutputRec makeChildren(outputRec L,outputRec R) := TRANSFORM\\n SELF.id := L.id;\\n SELF.kids := L.kids + ROW({R.kids[1].forename,R.kids[1].surname},AkaRec);\\nEND;\\nOUTPUT(FatIn,NAMED('FatIn'));\\nOUTPUT(ROLLUP(FatIn,id,makeChildren(LEFT,RIGHT)),NAMED('Post_rollup'));\\n
\\n\\nNow on looking at the two results from clicking on the links in the ECLWatch pane the results are displayed in a separate window, however if an attempt is made to redisplay a result set, without closing the separate open result window , the existing displayed result is left shown. One cannot flip between displays, without closing these result windows.\\n\\nI attach some screen shots in an attempt to show what I mean.\\n\\nVersion of IDE 6.0.2.4.682.1\\n\\nI can only attach 3 files so You cannot see the screen shot that shows the error. A re-click of the 1st results does not display the 1st result again.\\nThe 2nd set of results are left displayed.\\n\\nTo reiterate, it all works fine if one closes the output display before one re-clicks a result.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-01-22 19:30:35\" },\n\t{ \"post_id\": 879, \"topic_id\": 224, \"forum_id\": 9, \"post_subject\": \"Re: A suggestion for the Results screen displayed from ECLWa\", \"username\": \"Allan\", \"post_text\": \"Hi Richard,\\n\\nI just could not see it, printed it off, read it over and over \\n\\nMust have had my 'boy eyes' in.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-01-24 20:07:15\" },\n\t{ \"post_id\": 878, \"topic_id\": 224, \"forum_id\": 9, \"post_subject\": \"Re: A suggestion for the Results screen displayed from ECLWa\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nI see "Fat In" on my screen, AND in your screen shot. Are you looking for something else?\\n\\nRichard\", \"post_time\": \"2012-01-24 18:48:54\" },\n\t{ \"post_id\": 869, \"topic_id\": 224, \"forum_id\": 9, \"post_subject\": \"A suggestion for the Results screen displayed from ECLWatch\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI attach a sample result output from the ECLWatch pane of the IDE.\\nThe OUTPUT is 'NAMED' \\n
\\nOUTPUT(FatIn,NAMED('FatIn'));\\n
\\nand this name 'FatIn' is show in the result row on the ECLWatch pane.\\n\\nIt would be very nice if the output result screen also contained the name of the result. i.e. 'FatIn'.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-01-22 19:43:03\" },\n\t{ \"post_id\": 949, \"topic_id\": 234, \"forum_id\": 9, \"post_subject\": \"Re: XSLT/Visualization in 3.4.0-1?\", \"username\": \"DSC\", \"post_text\": \"[quote="anthony.fishbeck@lexisnexis.com":1zzcyu59]Btw, there is a way to change the tag name for nested child Rows in ECL (by using the XPATH modifier), but unfortunately it is somewhat broken at the moment. A fix is being worked on now.\\n\\nExcellent. I'm already looking forward to that when it's fixed.\\n\\n[quote="anthony.fishbeck@lexisnexis.com":1zzcyu59]\\nBut for XJC you can customize the class names by adding XJC application info to the WSDL or xml schema before you generate your Java code. You can find information on how to add the annotations here:\\n\\nhttp://jaxb.java.net/tutorial/section_5 ... Overriding Names\\n\\nI did find that after our last exchange and forwarded it to my developer. He's checking into it now.\\n\\nThanks so much for your assistance!\\n\\nDan\", \"post_time\": \"2012-01-30 20:53:48\" },\n\t{ \"post_id\": 948, \"topic_id\": 234, \"forum_id\": 9, \"post_subject\": \"Re: XSLT/Visualization in 3.4.0-1?\", \"username\": \"anthony.fishbeck@lexisnexis.com\", \"post_text\": \"Btw, there is a way to change the tag name for nested child Rows in ECL (by using the XPATH modifier), but unfortunately it is somewhat broken at the moment. A fix is being worked on now.\\n\\nBut for XJC you can customize the class names by adding XJC application info to the WSDL or xml schema before you generate your Java code. You can find information on how to add the annotations here:\\n\\nhttp://jaxb.java.net/tutorial/section_5 ... Overriding Names\", \"post_time\": \"2012-01-30 20:28:21\" },\n\t{ \"post_id\": 947, \"topic_id\": 234, \"forum_id\": 9, \"post_subject\": \"Re: XSLT/Visualization in 3.4.0-1?\", \"username\": \"DSC\", \"post_text\": \"[quote="anthony.fishbeck@lexisnexis.com":sti8qcxp]The REST example I gave returns the result of your transformation view, but JSON is another option.\\n\\nWhoa. I need to read things more closely. Thanks for pointing that out!\", \"post_time\": \"2012-01-30 20:13:47\" },\n\t{ \"post_id\": 946, \"topic_id\": 234, \"forum_id\": 9, \"post_subject\": \"Re: XSLT/Visualization in 3.4.0-1?\", \"username\": \"anthony.fishbeck@lexisnexis.com\", \"post_text\": \"The REST example I gave returns the result of your transformation view, but JSON is another option.\", \"post_time\": \"2012-01-30 20:07:42\" },\n\t{ \"post_id\": 945, \"topic_id\": 234, \"forum_id\": 9, \"post_subject\": \"Re: XSLT/Visualization in 3.4.0-1?\", \"username\": \"DSC\", \"post_text\": \"[quote="anthony.fishbeck@lexisnexis.com":lgwajtiz]Regarding the original problem, do you know what tool the Java developer is using to generate his code? XML tags are context sensitive and a Row tag in one place is not the same as a Row tag in another place. If he is using a WSDL or Xml Schema code generator it should take that into account. The different Row positions have different schema types. I suppose the tool may be generating duplicate (unmangled) class names even if it knows the content would be different.\\n\\nHe's using XJC, and I'm completely unfamiliar with the tool. Apparently all he did was point the tool at the SOAP endpoint and ask for class generation.\\n\\n\\nBtw, are the duplicate row tags coming from separate outputs, or a single complex output with child (nested) datasets?
\\n\\nOne OUTPUT of a dataset with one row, and that row is complex (nested children).\\n\\n\\nThere is currently no way of modifying the SOAP output. You could get to the modified output via REST, especially if your query inputs are fairly simple, for example... \\n\\nhttp://IP:8002/WsEcl/xslt/query/hthor/e ... ticsResult\\n
\\n\\nI'll explore that. In this case, the query is sufficiently simple to justify JSON.\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-01-30 20:02:03\" },\n\t{ \"post_id\": 944, \"topic_id\": 234, \"forum_id\": 9, \"post_subject\": \"Re: XSLT/Visualization in 3.4.0-1?\", \"username\": \"anthony.fishbeck@lexisnexis.com\", \"post_text\": \"Hi Dan,\\n\\nRegarding the original problem, do you know what tool the Java developer is using to generate his code? XML tags are context sensitive and a Row tag in one place is not the same as a Row tag in another place. If he is using a WSDL or Xml Schema code generator it should take that into account. The different Row positions have different schema types. I suppose the tool may be generating duplicate (unmangled) class names even if it knows the content would be different.\\n\\nBtw, are the duplicate row tags coming from separate outputs, or a single complex output with child (nested) datasets?\\n\\nThere is currently no way of modifying the SOAP output. You could get to the modified output via REST, especially if your query inputs are fairly simple, for example... \\n\\nhttp://IP:8002/WsEcl/xslt/query/hthor/example_query?input1=123&view=AnalyticsResult\\n
\\n\\nRegards,\\nTony\", \"post_time\": \"2012-01-30 19:56:18\" },\n\t{ \"post_id\": 941, \"topic_id\": 234, \"forum_id\": 9, \"post_subject\": \"Re: XSLT/Visualization in 3.4.0-1?\", \"username\": \"DSC\", \"post_text\": \"Gotcha. I think I figured out my primary mistake as well: I completely overlooked the fact that you had to select the name of the output in the Results section of the job. I went into this thinking that I would be rewriting the standard results -- the ones that are naturally output by the ECL job. So, at this point, I am getting output that's running through the XSL processor if I click on the right thing in the IDE.\\n\\nIs there a way to tie this to the regular output? Or more specifically, perform an XML rewrite on a SOAP response?\\n\\nBTW, what I'm really trying to do is solve a different problem: The SOAP response contains a lot of repetitive <Row> tags embedded in various parts of the output. Another developer is looking at consuming this output into a Java application. The tool he's using to generate Java classes is complaining about redefining Row classes over and over. So, the *real* issue here is having finer-grained control over eventual SOAP output. If you have any insight there, that would be most helpful.\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-01-30 18:39:24\" },\n\t{ \"post_id\": 940, \"topic_id\": 234, \"forum_id\": 9, \"post_subject\": \"Re: XSLT/Visualization in 3.4.0-1?\", \"username\": \"david.wheelock@lexisnexis.com\", \"post_text\": \"Sorry for using confusing language. I did not mean to imply that the XSLT file was invoking code -- what I meant to say is that the XSLT file contains conditional logic that indicates whether or not the script within it gets used during the presentation of information.\\n\\nFor example, you may have references to three separate XSLT files in your manifest: One for Pie, one for Line and one for Bar charts. In such a case you do not want to use all three to present your output. Therefore, you include logic in the piechart.xslt file that says "If the name of the output starts with 'PieChart' then use me", and similar conditionals in the other two looking for 'LineChart' and 'BarChart'.\\n\\nIf you have specified the name of your output to begin with one of these three strings, then the XSLT that contains that pattern will be used during the presentation of your data and the other two will be disregarded. If your output name doesn't fit any of them, then you will get the standard tabular output without any XSLT formatting.\\n\\n- David\", \"post_time\": \"2012-01-30 18:26:28\" },\n\t{ \"post_id\": 939, \"topic_id\": 234, \"forum_id\": 9, \"post_subject\": \"Re: XSLT/Visualization in 3.4.0-1?\", \"username\": \"DSC\", \"post_text\": \"[quote="david.wheelock@lexisnexis.com":i552qgd6]The reason for using NAMED during your OUTPUT statement is so that the XSLT file can choose to invoke or not based on that specific string. For the Google Pie Chart example, for instance, you have code in the XSLT that looks like this:\\n\\n <xsl:template match="Dataset[starts-with(@name,'PieChart')]" mode="generate_body">\\n <h1><xsl:value-of select="translate(substring-after(@name, 'PieChart_'),'_',' ')"/></h1>\\n <div style="height: 400px; width: 600px;">\\n <xsl:attribute name="id"><xsl:value-of select="@name"/></xsl:attribute>\\n </div>\\n </xsl:template>
\\n\\nThis is saying "If the name starts with the string "PieChart", then invoke this XSLT code. So you would have a scenario like this:\\n\\nOUTPUT(dPieData,NAMED('PieChart_TEST')); // will invoke the XSLT and output a chart\\nOUTPUT(dNotPieData,NAMED('RandomStuff')); // will not invoke
\\n\\nDoes your output.xslt contain the logic to identify "AnalyticsResult" as a pattern that will invoke the code?\\n\\nMy ECL contains:\\n\\nOUTPUT(result,NAMED('AnalyticsResult'));
\\n\\nDoes that not tie the XSLT declarations in the manifest to the named output? Your last question makes it sound like XSLT is invoking ECL, not just processing the output.\", \"post_time\": \"2012-01-30 18:03:43\" },\n\t{ \"post_id\": 938, \"topic_id\": 234, \"forum_id\": 9, \"post_subject\": \"Re: XSLT/Visualization in 3.4.0-1?\", \"username\": \"david.wheelock@lexisnexis.com\", \"post_text\": \"The reason for using NAMED during your OUTPUT statement is so that the XSLT file can choose to invoke or not based on that specific string. For the Google Pie Chart example, for instance, you have code in the XSLT that looks like this:\\n\\n <xsl:template match="Dataset[starts-with(@name,'PieChart')]" mode="generate_body">\\n <h1><xsl:value-of select="translate(substring-after(@name, 'PieChart_'),'_',' ')"/></h1>\\n <div style="height: 400px; width: 600px;">\\n <xsl:attribute name="id"><xsl:value-of select="@name"/></xsl:attribute>\\n </div>\\n </xsl:template>
\\n\\nThis is saying "If the name starts with the string "PieChart", then invoke this XSLT code. So you would have a scenario like this:\\n\\nOUTPUT(dPieData,NAMED('PieChart_TEST')); // will invoke the XSLT and output a chart\\nOUTPUT(dNotPieData,NAMED('RandomStuff')); // will not invoke
\\n\\nDoes your output.xslt contain the logic to identify "AnalyticsResult" as a pattern that will invoke the code?\\n\\n- David W.\", \"post_time\": \"2012-01-30 17:58:34\" },\n\t{ \"post_id\": 937, \"topic_id\": 234, \"forum_id\": 9, \"post_subject\": \"Re: XSLT/Visualization in 3.4.0-1?\", \"username\": \"DSC\", \"post_text\": \"[quote="david.wheelock@lexisnexis.com":cvzcshq2]At the moment, when using the IDE you need to make a change to your preferences to accommodate the XSLT stuff. In the IDE's preferences, go to the "Compiler" tab and enter this into the "Arguments" box:\\n\\n -manifest "vl_parent\\\\VL\\\\XSLT\\\\manifest.xml"\\n\\nWhere vl_parent is the folder that the VL module is located. This instructs the IDE to load the manifest file that manages all of the visualization XSLT files.\\n\\nHi David,\\n\\nI did make a change to Preferences for the manifest file, but I'm probably doing something wrong. The "Visualizing ECL Results" PDF document doesn't indicate any special location for these things, so I probably put them into the wrong place. Here is a synopsis of my current environment:\\n\\n* All files (ECL, manifest, XSLT file) are located in the same subdirectory (C:\\\\Documents and Settings\\\\All Users\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\foo\\\\dev\\\\bar).\\n\\n* The compiler preference in the IDE contains this option: -manifest="C:\\\\Documents and Settings\\\\All Users\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\foo\\\\dev\\\\bar\\\\manifest.xml"\\n\\n* The manifest file contains only this:\\n\\n\\n<Manifest>\\n\\t/*\\n <Resource type="XSLT" filename="output.xslt" name="AnalyticsResult"/>\\n <Views>\\n <Results name="AnalyticsResult" resource="AnalyticsResult" type="XSLT"/>\\n </Views>\\n */\\n</Manifest>
\\n\\n* The output.xslt file is confirmed to work.\\n\\n* The piece of ECL code that generates the output was changed to:\\n\\n\\nresultAction := OUTPUT(result,NAMED('AnalyticsResult'));
\\n\\nNote: I am entirely unclear what the relationship precisely is between the NAMED parameter and the attributes within the manifest file. That's why I simply named everything the same, hoping to get lucky.\\n\\nIs it obvious what I'm doing wrong? At this point, I feel like I'm overlooking something incredibly obvious.\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-01-30 17:23:08\" },\n\t{ \"post_id\": 936, \"topic_id\": 234, \"forum_id\": 9, \"post_subject\": \"Re: XSLT/Visualization in 3.4.0-1?\", \"username\": \"david.wheelock@lexisnexis.com\", \"post_text\": \"Dan,\\n\\nAt the moment, when using the IDE you need to make a change to your preferences to accommodate the XSLT stuff. In the IDE's preferences, go to the "Compiler" tab and enter this into the "Arguments" box:\\n\\n -manifest "vl_parent\\\\VL\\\\XSLT\\\\manifest.xml"\\n\\nWhere vl_parent is the folder that the VL module is located. This instructs the IDE to load the manifest file that manages all of the visualization XSLT files.\\n\\n- David W.\", \"post_time\": \"2012-01-30 16:57:47\" },\n\t{ \"post_id\": 935, \"topic_id\": 234, \"forum_id\": 9, \"post_subject\": \"XSLT/Visualization in 3.4.0-1?\", \"username\": \"DSC\", \"post_text\": \"Is the XSLT/visualization hook available in the current community edition (3.4.0.1)? I can't seem to make an XSLT transformation change my output, and I'm wondering if this is perhaps a release candidate feature.\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-01-30 16:44:49\" },\n\t{ \"post_id\": 1117, \"topic_id\": 248, \"forum_id\": 9, \"post_subject\": \"Re: Problems 'Visualizing' ECL Results.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nVersion now 6.0.4 with server at 3.4.2\\n\\nEverything now just works a treat \\n\\nThanks very much for your help.\\n\\nAllan\", \"post_time\": \"2012-02-15 20:47:06\" },\n\t{ \"post_id\": 1108, \"topic_id\": 248, \"forum_id\": 9, \"post_subject\": \"Re: Problems 'Visualizing' ECL Results.\", \"username\": \"gsmith\", \"post_text\": \"Sorry I didn't spot the server version earlier. Basically you will need eclcc (client and server) to be at least 3.4 for this to work.\", \"post_time\": \"2012-02-14 13:46:14\" },\n\t{ \"post_id\": 1107, \"topic_id\": 248, \"forum_id\": 9, \"post_subject\": \"Re: Problems 'Visualizing' ECL Results.\", \"username\": \"Allan\", \"post_text\": \"Gordon,\\n\\nI was following your instructions in an earlier post to this issue:\\n\\nI suspect you need newer eclcc (comes with IDE):\\nhttp://hpccsystems.com/download/free-co ... ient-tools\\n\\n\\nCan you please tell me what to install now.\\nPlease supply a link.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-02-14 13:36:14\" },\n\t{ \"post_id\": 1105, \"topic_id\": 248, \"forum_id\": 9, \"post_subject\": \"Re: Problems 'Visualizing' ECL Results.\", \"username\": \"gsmith\", \"post_text\": \"I _think_ you need eclcc (client and server) to be at least 3.4 (your server is still 3.2.2).\\n\\nGordon.\", \"post_time\": \"2012-02-14 12:52:44\" },\n\t{ \"post_id\": 1103, \"topic_id\": 248, \"forum_id\": 9, \"post_subject\": \"Re: Problems 'Visualizing' ECL Results.\", \"username\": \"Allan\", \"post_text\": \"Hi Anyone,\\n\\nAny update on this?\\nIts a bit worrying tha the version has the word 'internal' in it?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-02-14 12:04:50\" },\n\t{ \"post_id\": 1091, \"topic_id\": 248, \"forum_id\": 9, \"post_subject\": \"Re: Problems 'Visualizing' ECL Results.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nOK installed the suggested version. See version info in attachment.\\n\\nBut still not seeing what's expected in the ECLWatch page. See yet another attachment.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-02-10 20:40:29\" },\n\t{ \"post_id\": 1039, \"topic_id\": 248, \"forum_id\": 9, \"post_subject\": \"Re: Problems 'Visualizing' ECL Results.\", \"username\": \"gsmith\", \"post_text\": \"I suspect you need newer eclcc (comes with IDE):\\nhttp://hpccsystems.com/download/free-co ... ient-tools\\n\\nGordon.\", \"post_time\": \"2012-02-06 18:55:15\" },\n\t{ \"post_id\": 1038, \"topic_id\": 248, \"forum_id\": 9, \"post_subject\": \"Re: Problems 'Visualizing' ECL Results.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI definitely have my target as 'Thor'.\\n\\nThe about box with version information is in another attachment.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-02-06 18:44:27\" },\n\t{ \"post_id\": 1028, \"topic_id\": 248, \"forum_id\": 9, \"post_subject\": \"Re: Problems 'Visualizing' ECL Results.\", \"username\": \"gsmith\", \"post_text\": \"The target issue has been raised (and fixed for next release):\\nhttps://github.com/hpcc-systems/eclide/pull/76\", \"post_time\": \"2012-02-06 13:55:18\" },\n\t{ \"post_id\": 1027, \"topic_id\": 248, \"forum_id\": 9, \"post_subject\": \"Re: Problems 'Visualizing' ECL Results.\", \"username\": \"david.wheelock@lexisnexis.com\", \"post_text\": \"Click on the help button in the IDE, and select "About". Along with the IDE version you will also see the compiler version, which will be something like:\\n\\n"3.0.0 community_3.4.0-1"\\n\\n- David W.\", \"post_time\": \"2012-02-06 13:47:03\" },\n\t{ \"post_id\": 1026, \"topic_id\": 248, \"forum_id\": 9, \"post_subject\": \"Re: Problems 'Visualizing' ECL Results.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nThe version of the ECL IDE is Version of IDE 6.0.2.4.682.1 \\nI'm not sure how to find the version of Thor.\\n\\nAlso I definitely selected 'Thor' as my target. Though there might have been finger trouble on the instance I attached to this post.\\nI'll re-try in the evening, just to double check. I have found an annoying propensity for the IDE to reset the 'Target' at random times. Like when displaying 'Preferences'.\\nOne has to remember to re-set the Target. \\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-02-06 12:39:21\" },\n\t{ \"post_id\": 1025, \"topic_id\": 248, \"forum_id\": 9, \"post_subject\": \"Re: Problems 'Visualizing' ECL Results.\", \"username\": \"gsmith\", \"post_text\": \"Also, this query was submitted local and not to the server, was this intentional?\\n\\nGordon.\", \"post_time\": \"2012-02-06 12:19:22\" },\n\t{ \"post_id\": 1024, \"topic_id\": 248, \"forum_id\": 9, \"post_subject\": \"Re: Problems 'Visualizing' ECL Results.\", \"username\": \"david.wheelock@lexisnexis.com\", \"post_text\": \"Allan,\\n\\nMy immediate thought is that you may be working with an older version of Thor. Can you confirm that you are on release 3.4.0 or newer?\\n\\n- David W.\", \"post_time\": \"2012-02-06 12:08:39\" },\n\t{ \"post_id\": 1012, \"topic_id\": 248, \"forum_id\": 9, \"post_subject\": \"Problems 'Visualizing' ECL Results.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI've just gone though the 'Visualizing ECL Results' Document and attempted to reproduce the 'Pie' example in the document, with out much luck. See attached screen dump.\\n\\nI'm using the ECL IDE. The Argument I have placed in my compiler preferences is:\\n
\\n-manifest="C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\visualizations\\\\google_charts\\\\files\\\\manifest.xml"\\n
\\n\\nAny idea's what I've done wrong.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-02-05 20:01:21\" },\n\t{ \"post_id\": 1040, \"topic_id\": 249, \"forum_id\": 9, \"post_subject\": \"Re: Exporting call graph?\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\nIs there a way to export or otherwise image a call graph from the IDE or a web browser? I need to put together a presentation about HPCC and showing how code is turned into a graph would definitely go over well.
I've always found that the best way to present this kind of stuff is to actually demo it. You can use the VMware version to demo anywhere without attaching to a real cluster, and depending on the example code you write, you can show some pretty complex graphs.\\n\\nAlso, if you want to be able to zoom in and draw arrows and stuff to point out features, Microsoft has a free tool (zoomit.exe) that I use when I'm teaching.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-02-06 19:29:33\" },\n\t{ \"post_id\": 1036, \"topic_id\": 249, \"forum_id\": 9, \"post_subject\": \"Re: Exporting call graph?\", \"username\": \"DSC\", \"post_text\": \"[quote="gsmith":25o5km1e]The old one gave a bmp or jpeg/png (if libraries present). The new one does have the option to save as SVG which can be opened in "other" viewers... (but that method is probably not visible in the web page or IDE) - but could be invoked in the web page with some trickery?\\nI bet that someone with more skills than I could extract the image. I was just hoping that I could learn of a tool or setting hidden away somewhere that would provide that functionality. Maybe a special-built web page would be the way to go, though. Hmm.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-02-06 16:26:11\" },\n\t{ \"post_id\": 1033, \"topic_id\": 249, \"forum_id\": 9, \"post_subject\": \"Re: Exporting call graph?\", \"username\": \"gsmith\", \"post_text\": \"[quote="DSC":1y7vqef7][quote="davidhof":1y7vqef7]As inelegant as it is, I have used SnagIt to crop screen captures.\\nYeah, I tried that and was unhappy with the results, both from a size and a resolution standpoint. I'm hoping for a high-res bitmap or, better yet, vector format (e.g., PDF).\\n\\nThe best would actually be a Visio or OmniGraffle export. One can dream, right?\\nThe old one gave a bmp or jpeg/png (if libraries present). The new one does have the option to save as SVG which can be opened in "other" viewers... (but that method is probably not visible in the web page or IDE) - but could be invoked in the web page with some trickery?\", \"post_time\": \"2012-02-06 15:28:44\" },\n\t{ \"post_id\": 1032, \"topic_id\": 249, \"forum_id\": 9, \"post_subject\": \"Re: Exporting call graph?\", \"username\": \"DSC\", \"post_text\": \"[quote="davidhof":doiscz21]As inelegant as it is, I have used SnagIt to crop screen captures.\\nYeah, I tried that and was unhappy with the results, both from a size and a resolution standpoint. I'm hoping for a high-res bitmap or, better yet, vector format (e.g., PDF).\\n\\nThe best would actually be a Visio or OmniGraffle export. One can dream, right?\", \"post_time\": \"2012-02-06 15:23:37\" },\n\t{ \"post_id\": 1031, \"topic_id\": 249, \"forum_id\": 9, \"post_subject\": \"Re: Exporting call graph?\", \"username\": \"davidhof\", \"post_text\": \"As inelegant as it is, I have used SnagIt to crop screen captures.\", \"post_time\": \"2012-02-06 14:45:21\" },\n\t{ \"post_id\": 1030, \"topic_id\": 249, \"forum_id\": 9, \"post_subject\": \"Exporting call graph?\", \"username\": \"DSC\", \"post_text\": \"Is there a way to export or otherwise image a call graph from the IDE or a web browser? I need to put together a presentation about HPCC and showing how code is turned into a graph would definitely go over well.\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-02-06 14:26:40\" },\n\t{ \"post_id\": 1099, \"topic_id\": 263, \"forum_id\": 9, \"post_subject\": \"Re: source control hooks\", \"username\": \"aintnomyth\", \"post_text\": \"Very cool, good to hear. Thanks!\", \"post_time\": \"2012-02-13 13:34:47\" },\n\t{ \"post_id\": 1098, \"topic_id\": 263, \"forum_id\": 9, \"post_subject\": \"Re: source control hooks\", \"username\": \"gsmith\", \"post_text\": \"There is an ECL Plugin for Eclipse IDE in the works (and eclipse does have TLS support I believe).\\n\\nGordon.\", \"post_time\": \"2012-02-13 13:33:39\" },\n\t{ \"post_id\": 1097, \"topic_id\": 263, \"forum_id\": 9, \"post_subject\": \"source control hooks\", \"username\": \"aintnomyth\", \"post_text\": \"Is there a way to use the IDE to fire external source control commands or are there any plans for that? I'm using TFS, to check out a file I have to:\\n\\n1. Open the ECL file in the IDE (it has the locked icon)\\n2. Press F8 to locate the file in Windows Explorer\\n3. Right click the file and navigate through the TFS context menu to perform the check-out\\n4. Alt-Tab back to the IDE\\n5. Close the file in the IDE\\n6. Right-click the file, click Refresh (to remove the locked status)\\n7. Re-open the file in the IDE to edit the file\\n\\nSteps 2-7 could be eliminated if the IDE had some sort of add-in/scripting/macro mechanism...\\n\\nThanks!\", \"post_time\": \"2012-02-13 13:29:01\" },\n\t{ \"post_id\": 1116, \"topic_id\": 268, \"forum_id\": 9, \"post_subject\": \"Re: Repository Find\", \"username\": \"gsmith\", \"post_text\": \"That sounds like a bug (probably introduced when the ability to have more than one repository folder was added).\\n\\nhttps://github.com/hpcc-systems/eclide/issues/81\", \"post_time\": \"2012-02-15 13:55:44\" },\n\t{ \"post_id\": 1115, \"topic_id\": 268, \"forum_id\": 9, \"post_subject\": \"Repository Find\", \"username\": \"aintnomyth\", \"post_text\": \"When I use the Repository Find window to search for code it seems to only search the "ecllibrary" folder of the Repository but I actually have several blue "root" folders in the Repository. Is this a bug or user error?\", \"post_time\": \"2012-02-15 13:51:07\" },\n\t{ \"post_id\": 1239, \"topic_id\": 288, \"forum_id\": 9, \"post_subject\": \"Re: Debugger - generic question\", \"username\": \"bforeman\", \"post_text\": \"Trent, I received this feedback from Gordon:\\n\\nECL is compiled and produces an execution graph.\\nWithin the execution graph it injects source code file + line number information _when_possible_\\n\\nIt is this information that I use to list the active files and to show the possible breakpoints (as these known points relate back to places on the graph).\\n\\n\\nHTH,\\n\\nBob\", \"post_time\": \"2012-03-04 16:18:45\" },\n\t{ \"post_id\": 1234, \"topic_id\": 288, \"forum_id\": 9, \"post_subject\": \"Re: Debugger - generic question\", \"username\": \"bforeman\", \"post_text\": \"Hi Trent...checking with documentation and development now. You would think that the active files option would show files current active in the ECL process, but I want to confirm.\", \"post_time\": \"2012-03-02 15:41:11\" },\n\t{ \"post_id\": 1230, \"topic_id\": 288, \"forum_id\": 9, \"post_subject\": \"Debugger - generic question\", \"username\": \"TrentB\", \"post_text\": \"While running the debugger, there is a window labeled Debug Information. There is a menu option called Active Files. What qualifies a file to be listed here? I noticed that All files that contain source does not appear. And I saw some files in the list that did not have the ability to set a break point in them so I wondered which files appeared in this list.\", \"post_time\": \"2012-03-01 15:29:25\" },\n\t{ \"post_id\": 1825, \"topic_id\": 312, \"forum_id\": 9, \"post_subject\": \"Re: Is there support for an ECL IDE on Mac OS or Ubuntu?\", \"username\": \"gsmith\", \"post_text\": \"[quote="gkrasnow":1k44hrnm]...\\nIs there a compiled version of the GraphControl plugin for Mac OS which could be used in Safari or Chrome?\\n\\n\\nNot right now, but it is getting closer to the top of my list.\\nFYI there is one for Ubuntu\\n\\nGordon.\\n\\nPS The Mac "client tools" are almost ready which will let you use the eclipse plugin on the mac...\", \"post_time\": \"2012-06-21 19:14:10\" },\n\t{ \"post_id\": 1824, \"topic_id\": 312, \"forum_id\": 9, \"post_subject\": \"Re: Is there support for an ECL IDE on Mac OS or Ubuntu?\", \"username\": \"gkrasnow\", \"post_text\": \"[quote="gsmith":33uy8d2r][quote="DSC":33uy8d2r]I use a Mac, with the IDE running in a Windows host under Parallels. I use the IDE as little as possible...\\n\\nFor that scenario you may want to look at "ECL Command Line Interface" section in the Client Tools documentation. \\n\\nThat way you can submit + run ECL via the command line in Parallels and use Mac web browser to monitor and view the executing WUs. (Takes the IDE out of the loop).\\n\\nI will open an issue with regards to the IDE touching unmodified files (there is an option in preference to change the "Auto Save" frequency which may be the culprit).\\n\\nGordon.\\n\\nIs there a compiled version of the GraphControl plugin for Mac OS which could be used in Safari or Chrome?\", \"post_time\": \"2012-06-21 18:41:06\" },\n\t{ \"post_id\": 1351, \"topic_id\": 312, \"forum_id\": 9, \"post_subject\": \"Re: Is there support for an ECL IDE on Mac OS or Ubuntu?\", \"username\": \"ckaminski\", \"post_text\": \"[quote="botchagalupe":1s2uwdtf]If not are tere any examples on how to use the aws.hpccsystems.com/aws interface without an IDE?\\nHi John, Thanks for participating at the Cloud Computing Atlanta Meetup last night. I added an FAQ to the One-Click Thor site to answer your question.\\n\\nhttps://aws.hpccsystems.com/aws/faqs/\\n\\nCan I run ECL code without installing the IDE?\\n\\nYes. There is a primitive interface to run ECL code directly from the web browser as long as there are no external dependencies.\\n\\n1. Click on your ESP Page from the ‘View Clusters’ page. This will launch your ECL Watch page.\\n2. Click on ‘System Servers’ on the left\\n3. Click on ‘myesp’\\n4. Click on ‘myecldirect’\\n5. Click on ‘RunEcl’\\n\\nThere are a number of tools in the IDE not present in ECLDirect, but you will be able to run code.\", \"post_time\": \"2012-03-14 14:46:05\" },\n\t{ \"post_id\": 1350, \"topic_id\": 312, \"forum_id\": 9, \"post_subject\": \"Re: Is there support for an ECL IDE on Mac OS or Ubuntu?\", \"username\": \"gsmith\", \"post_text\": \"Also worth noting SciTE in the Mac App Store now supports ECL (since 3.04) the same is true in the Linux + Windows variants.\\n\\nIt is probably disabled by default - to enable it you have to remove it from the exclude list in SciTEGlobal.properties:\\n…snip…\\n# To keep menus short, these .properties files are not loaded by default.\\nimports.exclude=abaqus asn1 au3 avenue avs baan blitzbasic bullant \\\\\\ncobol cmake csound ecl escript flagship forth freebasic gap haskell inno \\\\\\nkix latex lot lout metapost mmixal modula3 nimrod nncrontab nsis \\\\\\nopal powerpro powershell purebasic r rebol \\\\\\nscriptol smalltalk sorcins spice specman \\\\\\ntacl tal txt2tags verilog vhdl\\n…snip…
\\n\\nGordon.\", \"post_time\": \"2012-03-14 13:43:13\" },\n\t{ \"post_id\": 1349, \"topic_id\": 312, \"forum_id\": 9, \"post_subject\": \"Re: Is there support for an ECL IDE on Mac OS or Ubuntu?\", \"username\": \"DSC\", \"post_text\": \"[quote="gsmith":12hz2ovt]For that scenario you may want to look at "ECL Command Line Interface" section in the Client Tools documentation. \\n\\nThat way you can submit + run ECL via the command line in Parallels and use Mac web browser to monitor and view the executing WUs. (Takes the IDE out of the loop).\\n\\nLet me rephrase my comment: I use the IDE for code creation as little as possible. Everything else is fine, though I do tend to use Chrome and the web-based ECL to watch the jobs. The IDE's use of panes and whatnot is what I find painful, and that's an entirely personal opinion. I avoid Eclipse for exactly the same reason.\\n\\nAll programmers have preferences (tabs! spaces! open-bracket-on-same-line! next-line! x_foo! xFoo!). Adaptability is the thing, no matter what.\\n\\n[quote="gsmith":12hz2ovt]I will open an issue with regards to the IDE touching unmodified files (there is an option in preference to change the "Auto Save" frequency which may be the culprit).\\n\\nI had forgotten about auto-save. I'll check that out forthwith.\\n\\nCheers!\\n\\nDan\", \"post_time\": \"2012-03-14 12:17:49\" },\n\t{ \"post_id\": 1348, \"topic_id\": 312, \"forum_id\": 9, \"post_subject\": \"Re: Is there support for an ECL IDE on Mac OS or Ubuntu?\", \"username\": \"gsmith\", \"post_text\": \"[quote="DSC":2zeh854k]I use a Mac, with the IDE running in a Windows host under Parallels. I use the IDE as little as possible...\\n\\nFor that scenario you may want to look at "ECL Command Line Interface" section in the Client Tools documentation. \\n\\nThat way you can submit + run ECL via the command line in Parallels and use Mac web browser to monitor and view the executing WUs. (Takes the IDE out of the loop).\\n\\nI will open an issue with regards to the IDE touching unmodified files (there is an option in preference to change the "Auto Save" frequency which may be the culprit).\\n\\nGordon.\", \"post_time\": \"2012-03-14 12:02:48\" },\n\t{ \"post_id\": 1347, \"topic_id\": 312, \"forum_id\": 9, \"post_subject\": \"Re: Is there support for an ECL IDE on Mac OS or Ubuntu?\", \"username\": \"DSC\", \"post_text\": \"I use a Mac, with the IDE running in a Windows host under Parallels. I use the IDE as little as possible.\\n\\nI use BBEdit for code editing, and the setup works well. The biggest problem is that the IDE 'touches' open files a lot and BBEdit notices this. It keeps popping up a warning that some external application has modified an open file, which in most cases is not true (other than the modification date). My workaround is to simply not keep the file opened in both BBEdit and in the IDE at the same time. For small test projects, that means having two files: One with the code, opened in BBEdit, and the other containing only a reference to the first file, open in the IDE. The second file is the one that is compiled/submitted/whatever.\\n\\nHope this helps.\\n\\nDan\", \"post_time\": \"2012-03-14 11:48:34\" },\n\t{ \"post_id\": 1345, \"topic_id\": 312, \"forum_id\": 9, \"post_subject\": \"Re: Is there support for an ECL IDE on Mac OS or Ubuntu?\", \"username\": \"gsmith\", \"post_text\": \"The IDE will run with wine for Linux (see instructions below). I believe that it also works with "CrossOver" on the Mac.\\n\\nThere is also an Eclipse plugin under development but it will require that you manually build the eclcc compilers on Linux (not hard) or use the prebuilt ones from ECL IDE on windows (see instructions below).\\n\\nRun IDE in Wine:\\n1. Within the Synaptic Package Manager, select “wine1.2” (this corresponds to Wine version 1.1.31), install it and its dependencies.\\n2. Download msxml3.msi from Microsoft (latest at time of writing was Service Pack 7): \\nhttp://www.microsoft.com/downloads/deta ... laylang=en\\n3. Install msxml3.msi in Wine (just double click the msi file and Wine will install it).\\n4. Open “Configure Wine” (Applications/Wine/Configure Wine):\\na. Select the Libraries tab.\\nb. In the “New override for library” drop down select “msxml3”.\\nc. Press the add button.\\nd. Ensure msxml3 is selected in the “Existing overrides” list box and press Edit.\\ne. Select the “Native (Windows)” option and press OK.\\nf. Click OK to close the Wine Configuration window.\\n5. Install the HPCC Client Tools (again just double click the SetupClientTools.msi file and Wine will install it).\\n\\nInstall Eclipse Plugin:\\n1. Open Eclipse\\n2. Select "Help/Install New Software"\\n3. Click "Add" and enter: http://eclipse.hpccsystems.com/develop\\n4. Select ECL Language and continue to install.\", \"post_time\": \"2012-03-14 09:25:25\" },\n\t{ \"post_id\": 1343, \"topic_id\": 312, \"forum_id\": 9, \"post_subject\": \"Is there support for an ECL IDE on Mac OS or Ubuntu?\", \"username\": \"botchagalupe\", \"post_text\": \"If not are tere any examples on how to use the aws.hpccsystems.com/aws interface without an IDE?\", \"post_time\": \"2012-03-14 03:03:05\" },\n\t{ \"post_id\": 1370, \"topic_id\": 314, \"forum_id\": 9, \"post_subject\": \"Re: Published Job Name\", \"username\": \"anthony.fishbeck@lexisnexis.com\", \"post_text\": \"Actually #workunit('name', 'myname') is supposed to do what you wanted, and in fact the workunit very briefly has the job name you wanted. \\n\\nThen the ide or the services behind it actually change the name after.\\n\\nWe should consider changing that.\\n\\nI'll open a github issue for discussion.\\n\\nTony\", \"post_time\": \"2012-03-15 23:47:57\" },\n\t{ \"post_id\": 1368, \"topic_id\": 314, \"forum_id\": 9, \"post_subject\": \"Re: Published Job Name\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nWhat I meant to talk about was programmatically determining the string that would appear in that Job Name field. 'Programatically' should be 'determine at compile time' or something along those lines.
AH! In that case the answer is, "Yes, there is -- using an appropriate naming convention for the EXPORT definition names of the queries that you want to publish." \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-03-15 19:59:32\" },\n\t{ \"post_id\": 1367, \"topic_id\": 314, \"forum_id\": 9, \"post_subject\": \"Re: Published Job Name\", \"username\": \"DSC\", \"post_text\": \"Ah, well. My poor choice of phrasing earlier was undoubtedly due to low caffeine intake. That's my story, and I'm sticking to it.\\n\\nWhat I meant to talk about was programmatically determining the string that would appear in that Job Name field. 'Programatically' should be 'determine at compile time' or something along those lines. That #WORKUNIT directive seemed promising, as would a C-style #define or something similar.\", \"post_time\": \"2012-03-15 19:36:15\" },\n\t{ \"post_id\": 1365, \"topic_id\": 314, \"forum_id\": 9, \"post_subject\": \"Re: Published Job Name\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\n
Is there a way to dynamically set that Jobname value?
In the 3.6 environment I can was able to edit the Jobname content before I hit Publish. I was able to name my query "Fred" quite nicely. \\n\\nAs far as "dynamically" goes -- that sounds like you want to automate your query publishing somehow and specify how the default Jobname value is populated. I don't think that's possible (other than using a nice naming convention for the queries that you want to publish), and from what I've seen around here I can't envision a scenario where we would want to do that -- publishing queries is a very programmer-hands-and-eyes-on type of process. \\n\\nSo my question back to you is -- what are you trying to accomplish?\\n\\nRichard\", \"post_time\": \"2012-03-15 19:01:53\" },\n\t{ \"post_id\": 1362, \"topic_id\": 314, \"forum_id\": 9, \"post_subject\": \"Published Job Name\", \"username\": \"DSC\", \"post_text\": \"After compiling a query and viewing the Workunit Details in the IDE, you have the opportunity to publish the query with a button. The Jobname field is prepopulated and is based on the name of the file you just compiled. Is there a way to dynamically set that Jobname value? I've seen #WORKUNIT (with the 'name' option) but that doesn't do what I want.\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-03-15 13:08:13\" },\n\t{ \"post_id\": 1384, \"topic_id\": 316, \"forum_id\": 9, \"post_subject\": \"Re: compiler error after install\", \"username\": \"gsmith\", \"post_text\": \"I would suspect a "CFG file" issue then:\\n\\nIf you are having continual issues logging in, you can try the following steps to create a "clean" startup (try 1 first, then 2 if needed):\\n\\n1. Goto preferences and press “New” and re-enter the server information under a new configuration name.\\n2. Open regedit and delete (or rename) the following key: HKEY_CURRENT_USER\\\\Software\\\\HPCC Systems\", \"post_time\": \"2012-03-18 08:17:35\" },\n\t{ \"post_id\": 1383, \"topic_id\": 316, \"forum_id\": 9, \"post_subject\": \"Re: compiler error after install\", \"username\": \"ckaminski\", \"post_text\": \"It's happening when the user logs into the cluster.\", \"post_time\": \"2012-03-18 02:03:20\" },\n\t{ \"post_id\": 1382, \"topic_id\": 316, \"forum_id\": 9, \"post_subject\": \"Re: compiler error after install\", \"username\": \"gsmith\", \"post_text\": \"Did they mention when the crash happens? At startup, login, syntax check or submit query?\\n\\nGordon.\", \"post_time\": \"2012-03-17 22:33:05\" },\n\t{ \"post_id\": 1381, \"topic_id\": 316, \"forum_id\": 9, \"post_subject\": \"compiler error after install\", \"username\": \"ckaminski\", \"post_text\": \"A user is getting the following error after installing the windows ide\\n===\\n\\n It gives me a compiler issue error. So I went to the compiler tab in the preferences window and set the compiler to link to\\n\\nC:\\\\Program Files (x86)\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_0\\\\eclcc.exe and C:\\\\Program Files (x86)\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_6\\\\eclcc.exe\\n\\nboth give the following error: ECLIDE.exe caused UNKNOWN_ERROR in module "C:\\\\Windows\\\\syswow64\\\\KERNELBASE.dll" at 0023:756AB9BC, RaiseException()+88 byte(s)\\n\\nNot sure how to fix this and the bug reporting system won't work since it requires some sort of email client. The computer is up to date with all the patches, and its running windows 7 64 bit w/ i7.\", \"post_time\": \"2012-03-17 21:43:23\" },\n\t{ \"post_id\": 1474, \"topic_id\": 331, \"forum_id\": 9, \"post_subject\": \"Re: Newbie question: unable to communicate with server\", \"username\": \"eric.scott\", \"post_text\": \"Thanks. It looks like the problem comes from the fact that I was running on a VPN.\", \"post_time\": \"2012-04-10 17:11:49\" },\n\t{ \"post_id\": 1464, \"topic_id\": 331, \"forum_id\": 9, \"post_subject\": \"Re: Newbie question: unable to communicate with server\", \"username\": \"gsmith\", \"post_text\": \"
Going to a browser and entering <ip>:8010 gets me 'web page cannot be displayed'.
\\n\\nThis indicates it is a network issue (not an IDE issue), which probably means the network adapter used in VMWare is not playing nice.\\n\\nFWIW I have my virtual machine configured with "Host-only" network adapters (which is fine for my usage) and the HPCC-Platform configures itself in the 192.168.X.Y range. \\n\\nHTH,\\n\\nGordon.\", \"post_time\": \"2012-04-10 07:39:09\" },\n\t{ \"post_id\": 1460, \"topic_id\": 331, \"forum_id\": 9, \"post_subject\": \"Newbie question: unable to communicate with server\", \"username\": \"eric.scott\", \"post_text\": \"I'm trying to go through the tutorial for HPCC using the VM version of HPCC.\\n\\nI've downloaded the VM and gotten a pulse from it; it gives me an IP address. Along the way I've given logins and passwords for VMWARE and HPCC. When the VM comes up it gives a login and password as 'hpccdemo', which works to get me to a linux prompt.\\n\\nI add said IP address to the preferences in the ECL IDC, it prompts me for a login and password. I've added every one of the login/pw pairs I've had to enter up to this point, and I get an error: 'unable to communicate with server'.\\n\\nGoing to a browser and entering <ip>:8010 gets me 'web page cannot be displayed'.\\n\\nIs there a step here that I'm missing?\\n\\nAny help appreciated.\\n\\nThanks,\", \"post_time\": \"2012-04-09 22:32:46\" },\n\t{ \"post_id\": 1581, \"topic_id\": 338, \"forum_id\": 9, \"post_subject\": \"Re: min/max skews in Graphs view\", \"username\": \"rtaylor\", \"post_text\": \"I've never actually seen it documented anywhere, but the max/min skew values are a percentage difference (+ for max, - for min) between the perfect distribution count of records and the actual count of records for the most extreme node in each category.
This is one of the things we talk about in our Intro classes when we go over what the graphs represent and how they can be used. My usual example is a 3-node Thor, 300 records in a dataset, and skew percentages of (+200%, -100%) -- how many records are on each node? And the answer is: two of the nodes have no records and the other has all 300.\", \"post_time\": \"2012-05-02 14:31:20\" },\n\t{ \"post_id\": 1580, \"topic_id\": 338, \"forum_id\": 9, \"post_subject\": \"Re: min/max skews in Graphs view\", \"username\": \"DSC\", \"post_text\": \"So... skew numbers closer to zero are better during a running graph?\\n\\nWhat kinds of things should we look at -- or better yet, do -- if we see outrageous skew numbers?\", \"post_time\": \"2012-05-01 14:22:35\" },\n\t{ \"post_id\": 1576, \"topic_id\": 338, \"forum_id\": 9, \"post_subject\": \"Re: min/max skews in Graphs view\", \"username\": \"joecella\", \"post_text\": \"First, let me say that "If you're not viewing your graphs, you're not developing in ECL." Part of maximizing the information provided in the graphs is understanding the skew metrics.\\n\\nI've never actually seen it documented anywhere, but the max/min skew values are a percentage difference (+ for max, - for min) between the perfect distribution count of records and the actual count of records for the most extreme node in each category. \\n\\nperfect distribution count per node = (total records / # of thor nodes)\\n\\nmaxskew = (highest record count for a single node - (total records / # of thor nodes)) / (total records / # of thor nodes)) * 100 and displayed as a %\\n\\nminskew = (1 - (lowest record count for a single node / (total records / # of thor nodes))) * 100 and displayed as a %\\n\\nSo, if there are 800,000 records in a recordset being processed by a 10 way thor, the perfect distribution would have 80,000 records on each node. If the maximum number of records on a single node is 320,000 and minimum is 20,000, the maxskew is +300% and the minskew is -75%.\\n\\nThe maximum maxskew is controlled by the number of nodes. A 10 way thor has a max maxskew of +900% if all the data (9x what is expected) is on a single node. A 100 way thor was a max maxskew of +9900%. A 400-way +39900%, etc. \\n\\nThe maximum (min?, most exteme) min skew is -100%. \\n\\nOther notes on skew:\\n\\nWhen watching the graph for a running workunit, the skew numbers will update as the data is processed through activities in the subgraphs (within the graph within the wokrunit). Something like a COUNT PROJECT that executes as a globally sequencial activity will show a -100% minskew until it begins work on the last thor node. Some activities that redistribute the data, like a global SORT, cannot report skew while executing because the skew is not known for an individual node until the activity completes.\\n\\nIf an activity has values of +0% and -0%, the skew values are not displayed. \\n\\nAs, you have seen, sometimes when watching a graph run, the last update to the graph sometimes is missed by the refresh logic, so the final values may display incorrectly.\", \"post_time\": \"2012-04-28 04:04:58\" },\n\t{ \"post_id\": 1527, \"topic_id\": 338, \"forum_id\": 9, \"post_subject\": \"Re: min/max skews in Graphs view\", \"username\": \"gsmith\", \"post_text\": \"Sounds like an IDE issue. I suspect the IDE sees the “completed” message before getting the last refresh of graph data. I will open an issue on github.\\n\\nGordon.\", \"post_time\": \"2012-04-16 09:33:01\" },\n\t{ \"post_id\": 1517, \"topic_id\": 338, \"forum_id\": 9, \"post_subject\": \"Re: min/max skews in Graphs view\", \"username\": \"jeremy\", \"post_text\": \"To further test:\\n1. In the IDE, I closed the local workunit and then opened it again from the workunit browser... same issue.\\n2. I shutdown the IDE, then started it up again, loaded the workunit... and now the results match ECL Watch...\\n\\nPerhaps some type of cache issue?\", \"post_time\": \"2012-04-13 14:10:06\" },\n\t{ \"post_id\": 1516, \"topic_id\": 338, \"forum_id\": 9, \"post_subject\": \"Re: min/max skews in Graphs view\", \"username\": \"jeremy\", \"post_text\": \"So I checked that last night and the results were still different... I just repeated the same steps:\\n1. Run a workunit in the IDE, wait for completion.\\n2. Refresh the graph layout in the IDE.\\n3. Compare to the graph layout on ECL Watch.\\nHere are the two snapshots... both say "completed".\\n\\nECL IDE:\\n[attachment=1:2v1gx0qa]ecl-ide.png\\n\\nECL Watch:\\n[attachment=0:2v1gx0qa]ecl-watch.png\", \"post_time\": \"2012-04-13 14:05:22\" },\n\t{ \"post_id\": 1513, \"topic_id\": 338, \"forum_id\": 9, \"post_subject\": \"Re: min/max skews in Graphs view\", \"username\": \"gsmith\", \"post_text\": \"Both the IDE and ECL Watch use the same data to visualize the graph so both should be the same. The only explanation for the difference is that one was for the completed graph and the other was for a running graph? If you open the completed WU in the IDE and look at the graph does it now match the ECL Watch one?\\n\\nGordon.\", \"post_time\": \"2012-04-13 08:55:20\" },\n\t{ \"post_id\": 1512, \"topic_id\": 338, \"forum_id\": 9, \"post_subject\": \"Re: min/max skews in Graphs view\", \"username\": \"jeremy\", \"post_text\": \"Interestingly enough, the graph view available via the ECL Watch for the same completed workunit shows different ( and much saner ) skews, so maybe the issue is with the IDE's representation?\", \"post_time\": \"2012-04-13 04:27:59\" },\n\t{ \"post_id\": 1511, \"topic_id\": 338, \"forum_id\": 9, \"post_subject\": \"min/max skews in Graphs view\", \"username\": \"jeremy\", \"post_text\": \"I noticed that when Thor runs workunits against relatively small datasets ( 100s of MB ), there are sometimes relatively large min/max skews displayed in the IDE graph... in particular I'm seeing a -100% min skew for a node that I know has relevant data on it. I can't find much in the way of documentation on what those skews mean. Are the skews similar to DISTRIBUTE, measuring the relative difference in data processed on each node for that particular step? And when I see a -100% for a given node, is that because Dali realized that it would be faster to just use the mirrored copies for some nodes so that fewer nodes had to be queried overall?\", \"post_time\": \"2012-04-13 04:18:04\" },\n\t{ \"post_id\": 1542, \"topic_id\": 342, \"forum_id\": 9, \"post_subject\": \"Re: 'Close All' Quick access command no longer available at \", \"username\": \"Allan\", \"post_text\": \"Thanks Gordon,\\n\\nWell yes - there it is - Don't know why I could not see it before.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-04-19 08:51:31\" },\n\t{ \"post_id\": 1539, \"topic_id\": 342, \"forum_id\": 9, \"post_subject\": \"Re: 'Close All' Quick access command no longer available at \", \"username\": \"gsmith\", \"post_text\": \"Its still there under "Home/Window/Windows (drop down)/Close All".\\n\\nTo add to the QAT (Quick Access Toolbar):\\nRight click on QAT and select Customize QAT\\nSelect "Home" in the drop down.\\nLocate "Close All" and press "Add>>"\\n\\nGordon.\", \"post_time\": \"2012-04-18 19:26:46\" },\n\t{ \"post_id\": 1538, \"topic_id\": 342, \"forum_id\": 9, \"post_subject\": \"'Close All' Quick access command no longer available at 3.6\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nWith 3.0.2 of the IDE there was a 'close all' button available in the list of commands. (it was a green circle).\\nThis was very handy as I often had 10+ files open.\\n\\nAt 3.6 of the IDE this command does not seem to be available and has disappeared from my quick access bar.\\n\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-04-18 19:12:28\" },\n\t{ \"post_id\": 1579, \"topic_id\": 351, \"forum_id\": 9, \"post_subject\": \"Re: Compatibility\", \"username\": \"DSC\", \"post_text\": \"Excellent. Thanks, Gordon!\", \"post_time\": \"2012-04-30 20:48:02\" },\n\t{ \"post_id\": 1578, \"topic_id\": 351, \"forum_id\": 9, \"post_subject\": \"Re: Compatibility\", \"username\": \"gsmith\", \"post_text\": \"In general the latest version of the IDE will work with _all_ older versions (and will have a fair chance of working with newer versions).\\n\\nBUT - There is a tighter coupling between the eclcc compiler and the server version.\\n\\nYou may have noticed that 3.6.x IDE includes two sets of "Client Tools" 3.6 + 3.0. The user gets to choose which Client Tools to use on a per-configuration basis (at the moment they pick 3.0 for all server version <= 3.4.x and 3.6 for all versions > 3.6.0, thus allowing the one IDE to target any server version released (to-date).\\n\\nNote: If the user submits a query with the wrong version of eclcc they will receive a warning.\\n\\nFor the "point" version change (versioning: major.minor.point) there would never be any intentional breaking change from the client eclcc point of view. IOW the latest 3.6.X IDE (and client tools) should always be the "best" one to use for ALL 3.6.Y servers.\\n\\nFrom 3.8 onwards we plan on packaging the client tools as separate downloadable and installable entities (which will then get bundled with the IDE or Eclipse etc.)\\n\\nHTH, Gordon.\", \"post_time\": \"2012-04-30 20:08:38\" },\n\t{ \"post_id\": 1577, \"topic_id\": 351, \"forum_id\": 9, \"post_subject\": \"Compatibility\", \"username\": \"DSC\", \"post_text\": \"What kind of compatibility can we expect in regards to mixing versions of the IDE and the server? None? Will later versions of the IDE work with older versions of the server?\\n\\nI'm specifically wondering about 3.6.2 versus 3.6.0, but the general question sounds better.\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-04-30 19:18:43\" },\n\t{ \"post_id\": 1647, \"topic_id\": 367, \"forum_id\": 9, \"post_subject\": \"Re: XSLT/Visualization\", \"username\": \"david.wheelock@lexisnexis.com\", \"post_text\": \"Have you ensured that the IDE has been updated to reference the manifest.xml file? If not, please refer to the Visualization Library section of the ML Modules chapter in the Machine Learning documentation: http://cdn.hpccsystems.com/pdf/machinelearning.pdf\\n\\nIf the manifest reference is there, then you should have access to the charts from the IDE. Bear in mind that the charts do not appear directly in the IDE -- rather, the IDE will present links in the ECL Watch tab's Results section that will open the charts in a separate browser. These links will all contain the text "View Chart".\\n\\nThe VL is designed to act as an interface to publicly-available third party charting APIs, which for the most part use SVG or VML. As such, the extent to which a chart can be saved depends on whether those APIs enable that (for example, using the now-deprecated "image" chart types in Google Charts).\\n\\nI hope this helps.\", \"post_time\": \"2012-05-24 11:43:28\" },\n\t{ \"post_id\": 1645, \"topic_id\": 367, \"forum_id\": 9, \"post_subject\": \"XSLT/Visualization\", \"username\": \"Apurv.Khare\", \"post_text\": \"Hi,\\nWe want to show the output of our ecl code in the form of Line chart or Pie chart.\\nWhat all files or changes are required to achieve this result.\\nCan we view these charts in ECL IDE and can we save them.\\n\\nWrite now we tried running the sample code in the VL library but the output is coming in tabular format.\\nwe are using Community Edition 3.6.0-1.\\n\\nPlease guide us on this as we need this Visualization Charts for a Client Demo.\", \"post_time\": \"2012-05-24 05:57:37\" },\n\t{ \"post_id\": 1679, \"topic_id\": 372, \"forum_id\": 9, \"post_subject\": \"Re: Interoperating with ECL Plus\", \"username\": \"bforeman\", \"post_text\": \"Hi Eric,\\n\\nYes, I am testing with the latest OSS version 3.6\\n\\nI copied the ECLPLUS.EXE from the BIN folder to a folder I named ECLPLUS.\\nThe ECLPLUS INI is in the same folder as the ECLPLUS EXE\\n\\nHere is what I'm seeing when duplicating your test:\\n\\nC:\\\\ECLPLUS ECLPLUS action=view wuid=W20120531-083800 \\n[Result 1]\\nResult_1\\n5
\\n\\nI'll try the same with legacy a little later, but OSS looks OK. \\n\\nSorry I can't reproduce. I would guess that perhaps you may be using the wrong INI, but your list command works fine. \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-05-31 12:48:09\" },\n\t{ \"post_id\": 1677, \"topic_id\": 372, \"forum_id\": 9, \"post_subject\": \"Re: Interoperating with ECL Plus\", \"username\": \"eric.scott\", \"post_text\": \"OK, Here's a minimal case.\\n\\nIn a builder window, I enter\\n\\n----\\noutput (5);\\n---\\n And run it in the IDE, getting a result as '5' in a single cell, assigned to wid W20120530-165043\\n\\nOn the ECLPlus side ...\\n\\nc:\\\\HPCC\\\\code>eclplus action=list\\neclplus action=list\\nWUID OWNER JOBNAME STATUS\\nyadda\\nyadda\\nW20120530-165043 myUserName completed\\n\\nc:\\\\HPCC\\\\code>eclplus action=view wid=W20120530-165043\\neclplus action=view wid=W20120530-165043\\n\\nc:\\\\HPCC\\\\code>\\n(note the blank line)\\nI also get a blank line when I ask for a dump of the work unit.\\n\\nAm I making some subtle syntax error here?\\n\\nAlso, I should add that I'm using the 'legacy' version of HPCC here...\", \"post_time\": \"2012-05-30 21:09:32\" },\n\t{ \"post_id\": 1676, \"topic_id\": 372, \"forum_id\": 9, \"post_subject\": \"Re: Interoperating with ECL Plus\", \"username\": \"bforeman\", \"post_text\": \"Hi Eric,\\n\\nNo, I actually used ECLPlus and the original ECL file to create the workunit, but I also tested the code in the ECL IDE before applying it to ECLPlus, so we are talking about two different workunits.\\n\\n...and I'm sorry, I forgot the "W" in front of the number....typo.
\\n\\nShould be:\\n\\nC:\\\\ECLPlus eclplus action=view wuid=W20120524-150075 \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-05-30 20:21:34\" },\n\t{ \"post_id\": 1675, \"topic_id\": 372, \"forum_id\": 9, \"post_subject\": \"Re: Interoperating with ECL Plus\", \"username\": \"eric.scott\", \"post_text\": \"Was wuid=20120524-150075 executed using the IDE?\", \"post_time\": \"2012-05-30 20:08:17\" },\n\t{ \"post_id\": 1674, \"topic_id\": 372, \"forum_id\": 9, \"post_subject\": \"Re: Interoperating with ECL Plus\", \"username\": \"bforeman\", \"post_text\": \"Hi Eric,\\n\\nI can't seem to reproduce what you are reporting. My INI looks like this:\\n\\nserver=xx.xxx.xxx.x\\ncluster=thor\\nowner=bforeman\\njobname=Exercise12Test\\n\\nExecuting ECLPlus as you say:\\n\\nC:\\\\ECLPLUS eclplus action=view wuid=20120524-150075\\n\\nWorks fine here....\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-05-30 18:45:41\" },\n\t{ \"post_id\": 1673, \"topic_id\": 372, \"forum_id\": 9, \"post_subject\": \"Interoperating with ECL Plus\", \"username\": \"eric.scott\", \"post_text\": \"I need to interact with THOR clusters programmatically, and I'm hoping to be able to do this with ECL Plus. I'm hitting a snag trying to share content between what is accessible through the IDE and what is accessible through ECL Plus.\\n\\nMy ECLPLUS.INI points to the same userid, cluster and queue as the IDE, using exactly the same IP address. \\n\\nSo let's say I have created a work unit with say wid W12345-6789 using the IDE, is there a way I could enter:\\n\\neclplus action=view wuid=W12345-6789 \\n\\nAnd recieve the results from that work unit?\\n\\nRight now when I do this I get no error message, but there is nothing displayed. I should add that I get the expected results in cases where the work unit is created within the ECL Plus environment. \\n\\nThanks,\", \"post_time\": \"2012-05-30 17:36:33\" },\n\t{ \"post_id\": 1773, \"topic_id\": 389, \"forum_id\": 9, \"post_subject\": \"Re: MERGE fails with set of dataset\", \"username\": \"rtaylor\", \"post_text\": \"JM,\\n
What I need exactly is to make a dataset from a set of datasets without using artificial records that complexify the code.
To my simple mind, this sounds like you just want to treat multiple datasets as a single entity and query that single entity. If that is correct then there are a couple of ways to go about it. The first is to simply append the datasets, using either the + or & operators, llike this:rec := RECORD\\n INTEGER i;\\nEND;\\n\\nds1 := DATASET([{1},{2},{3}],rec);\\nds2 := DATASET([{3},{2},{1}],rec);\\nds3 := DATASET([{5},{4},{6}],rec);\\n\\ndsA := ds1 + ds2 + ds3;\\ndsB := ds1 & ds2 & ds3;\\n\\nOUTPUT(dsA);\\nOUTPUT(dsB);
The second way to go about it would be to use Superfiles, as described in the Programmer's Guide (or come to our ECL classes http://hpccsystems.com/community/traini ... s/training -- Superfiles are taught in the Advanced Thor class). I am surprised because the sds[0] should be accessible as a dataset according to GRAPH documentation example.
I think you may have mis-read the docs. You were trying to apply the GRAPH docs discussion of its third parameter and use that information in its first parameter -- which obviously does not work. \\n\\nThe ROWSET(LEFT) may take an index value of 0 as an argument to the processor call specified by the third parameter to GRAPH. AFAIK, this is the only use of a 0 index value anywhere in ECL. ECL is 1-based in all other cases. \\n\\n
I have also another question : is it possible to have an equivalent of COUNT operation with a set of dataset?
If you want a COUNT of the number of datasets in the set, then yes. If you want a COUNT of the number of records across all the datasets in the set, then no. However, COUNT will function correctly on the appended datasets and superfiles I described above.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-06-13 14:10:53\" },\n\t{ \"post_id\": 1769, \"topic_id\": 389, \"forum_id\": 9, \"post_subject\": \"Re: MERGE fails with set of dataset\", \"username\": \"ideal\", \"post_text\": \"Hello Richard,\\n\\nI am not sure now I need specifically a MERGE operation. \\nWhat I need exactly is to make a dataset from a set of datasets without using artificial records that complexify the code. \\n\\nNow I am facing a new choice : either I find a suitable workaround, or I recompile source code branch with the fix on the VM image.\\n\\nI would prefer the second solution as it is cleaner but I wonder if it is easy to do.\\n\\nI have found some workaround but I am facing a new problem concerning access to one dataset in a set of dataset, by its indice ("sds[i]").\\n\\nIn detail, here is a simple example to illustrate : \\n\\nrec := RECORD\\nINTEGER i;\\nEND;\\n\\nds1 := DATASET([{1},{2},{3}],rec);\\nds2 := DATASET([{1},{2},{3}],rec);\\nds3 := DATASET([{1},{2},{3}],rec);\\n\\nSET OF DATASET(rec) sds := [ds1,ds2,ds3];\\n\\n// f do nothing : return always the same dataset, just for test\\nDATASET(rec) f(SET OF DATASET(rec) fds,INTEGER c) := FUNCTION \\n RETURN DATASET([{1},{2},{3}],rec);\\nEND;\\n\\nDATASET(rec) ds := GRAPH(sds[0],3,f(ROWSET(LEFT),COUNTER));\\n\\nOUTPUT(ds);
\\n\\nI get an error :\\n\\nC:\\\\Users\\\\JEAN-M~1\\\\AppData\\\\Local\\\\Temp\\\\TFRCC4.tmp (19,29) : 3000: assert(false) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.2/CE/Ubuntu-10.04-i386/HPCC-Platform/common/deftype/deftype.ipp, line 553\\n\\nWhen I change the code with :\\n DATASET(rec) ds := GRAPH(ds1,3,f(ROWSET(LEFT),COUNTER));
there is no more error.\\n\\nI am surprised because the sds[0] should be accessible as a dataset according to GRAPH documentation example.\\n\\nI have also another question : is it possible to have an equivalent of COUNT operation with a set of dataset ?\\n\\nBest Regards,\\nJM\", \"post_time\": \"2012-06-13 11:07:02\" },\n\t{ \"post_id\": 1766, \"topic_id\": 389, \"forum_id\": 9, \"post_subject\": \"Re: MERGE fails with set of dataset\", \"username\": \"rtaylor\", \"post_text\": \"JM,\\n\\nOK, I duplicated the problem and will report it.\\n\\nThe issue appears to me to be specific to the second form of MERGE (set of datasets) and the first form (comma-delimited list of files) does operate correctly in my testing, so using that first form would be your workaround.\\n\\nI have also updated the example code in the docs to add that missing SORT option.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-06-12 14:42:05\" },\n\t{ \"post_id\": 1765, \"topic_id\": 389, \"forum_id\": 9, \"post_subject\": \"MERGE fails with set of dataset\", \"username\": \"ideal\", \"post_text\": \"I am new to ecl and hpcc system.\\nTo sumarize my issue : I can merge two datasets with first syntax but I can't merge a set of dataset with second syntax. As I want to combine undetermined length datasets, I would prefer to use second form if possible.\\n\\nIn detail:\\nI am using the sample from documentation MERGE as below :\\n\\nSetDS := [ds1,ds2];\\nds4 := MERGE([ds1,ds2],letter,number);\\n\\nWith this syntax, I first receive a warning about deprecated use.\\n\\nI replace by \\nds4 := MERGE([ds1,ds2],letter,number,SORTED(letter,number));\\n\\nthen, I submit the query and fail with error message below : \\n\\n000001D7 2012-06-12 14:35:52 2092 2092 Processing graph - graph(graph1, 1)\\n000001D8 2012-06-12 14:35:52 2092 2881 3000: Graph[1], nwaymerge[4]: SLAVE 192.168.23.128:20100: assert(started()) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.2/CE/Ubuntu-10.04-i386/HPCC-Platform/thorlcr/activities/./../activities/thactivityutil.ipp, line 211\\n000001D9 2012-06-12 14:35:52 2092 2881 INFORM [EXCEPTION]\\n000001DA 2012-06-12 14:35:52 2092 2881 3000: Graph[1], nwaymerge[4]: SLAVE 192.168.23.128:20100: assert(started()) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.2/CE/Ubuntu-10.04-i386/HPCC-Platform/thorlcr/activities/./../activities/thactivityutil.ipp, line 211\\n000001DB 2012-06-12 14:35:52 2092 2881 Posting exception: Graph[1], nwaymerge[4]: SLAVE 192.168.23.128:20100: assert(started()) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.2/CE/Ubuntu-10.04-i386/HPCC-Platform/thorlcr/activities/./../activities/thactivityutil.ipp, line 211 to agent 192.168.23.128 for workunit(W20120612-143551)\\n000001DC 2012-06-12 14:35:52 2092 2881 INFORM [EXCEPTION]\\n\\nIt seems something goes wrong with graph but I don't know how to bypass this problem. \\nI would appreciate if you could help me. Thanks.\\nJM.\", \"post_time\": \"2012-06-12 13:30:37\" },\n\t{ \"post_id\": 1983, \"topic_id\": 408, \"forum_id\": 9, \"post_subject\": \"Re: Error messages meaning ?\", \"username\": \"ideal\", \"post_text\": \"OK, it is not convenient to me, but at least, it is clear. I am looking for an ITERATE-like (GRAPH I think) function instead. It allows transforming input dataset "recursively" but it force to determine in advance the number of iterations.\\n\\nThis close the subject.\\n\\nThanks,\\nJM.\", \"post_time\": \"2012-07-16 14:37:25\" },\n\t{ \"post_id\": 1980, \"topic_id\": 408, \"forum_id\": 9, \"post_subject\": \"Re: Error messages meaning ?\", \"username\": \"ghalliday\", \"post_text\": \"Unfortunately the current version of Thor doesn't support that particular variety of LOOP. It required some changes in the code generation - which are included in 3.8 - but didn't make it in time for Thor to be updated.\\n\\nThe plan is to include it in the next release - either 3.10 or possibly (depending on the scope of the change) 3.8.2.\", \"post_time\": \"2012-07-16 07:50:06\" },\n\t{ \"post_id\": 1977, \"topic_id\": 408, \"forum_id\": 9, \"post_subject\": \"Re: Error messages meaning ?\", \"username\": \"ideal\", \"post_text\": \"If your LOOP code does not work on Thor, [..] Your workaround is to run that job on hThor
\\n\\nIn a distributed parallel computing environment, I don't think hthor can be considered as a "workaround".\\n\\nJM.\", \"post_time\": \"2012-07-14 18:20:45\" },\n\t{ \"post_id\": 1940, \"topic_id\": 408, \"forum_id\": 9, \"post_subject\": \"Re: Error messages meaning ?\", \"username\": \"ideal\", \"post_text\": \"If your LOOP code does not work on Thor, then that should also be reported as a bug
\\n\\nAs I said before, error message tells us it is not a bug : \\n
\\n\\nThat is the reason why I am asking you : Error: System error: 0: Graph[1], Thor currently, does not support a dataset loop condition, activity id: 7 (0, 0), 0,
why is there an entry about LOOP function in documentation when it is not available in the software
. \\nI choose hpcc by reading the documentation, finding good things inside, some are essential to me but it seems it does not work as written. In that sense, it is a bug. My partial workaround until now is an iterate function but it is not perfectly accurate.\\n\\nRoughly speaking, What I need to do is starting with a set of records and add or remove records by applying transformations or tests implying all records together. This process must run until there is no more new records to add. Then, this is typically a potentially infinite loop with break test
.\\n\\nWhen you say it is available on hthor, it does not suit my requirements because what I need the most is scalability. \\n\\nIf you can solve this, it would be nice.\\n\\nJM\", \"post_time\": \"2012-07-11 18:32:29\" },\n\t{ \"post_id\": 1921, \"topic_id\": 408, \"forum_id\": 9, \"post_subject\": \"Re: Error messages meaning ?\", \"username\": \"rtaylor\", \"post_text\": \"JM,\\nwhy is there an entry about LOOP function in documentation when it is not available in the software
LOOP is a valid function in the ECL Language that performs recursive tasks. It was originally designed to operate only on hThor (a testbed platform) and Roxie clusters (a production environment) and not on Thor clusters (primarily dedicated to ETL work). It was documented that way when it was first introduced several years ago, and it still works correctly on those platforms. \\n\\nWhen we Open Sourced HPCC, both LOOP and GRAPH were meant to be updated to also operate on Thor. You are already aware of the GRAPH bug in 3.6 that is fixed in 3.8. If your LOOP code does not work on Thor, then that should also be reported as a bug. Your workaround is to run that job on hThor.\\n\\nIf LOOP is not available, how is it possible to replace it by a potentially infinite loop with break test ?
Rather than answer this question, let me ask this one:\\n \\nWhat are you trying to accomplish? \\n\\nI ask this because it is quite possible that there is another more "ECL-ish" way to accomplish what you want to do. ECL is not like other languages, and frequently requires a different mindset to approaching problems than that used in procedural languages -- take a look at the "Attribute Creation" Programmer's Guide article for a discussion of the "bottom-up" approach to problem solving that works best with ECL.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-07-09 14:18:42\" },\n\t{ \"post_id\": 1917, \"topic_id\": 408, \"forum_id\": 9, \"post_subject\": \"Re: Error messages meaning ?\", \"username\": \"ideal\", \"post_text\": \"If LOOP is not available, how is it possible to replace it by a potentially infinite loop with break test ?\", \"post_time\": \"2012-07-08 10:25:59\" },\n\t{ \"post_id\": 1916, \"topic_id\": 408, \"forum_id\": 9, \"post_subject\": \"Re: Error messages meaning ?\", \"username\": \"ideal\", \"post_text\": \"Richard,\\n\\nLet me rephrase my question to clarify it : why is there an entry about LOOP function in documentation when it is not available in the software, as stated in previous post ? \\n\\nThanks,\\nJM.\", \"post_time\": \"2012-07-08 08:30:45\" },\n\t{ \"post_id\": 1911, \"topic_id\": 408, \"forum_id\": 9, \"post_subject\": \"Re: Error messages meaning ?\", \"username\": \"rtaylor\", \"post_text\": \"JM,Then, is it possible to do a real loop in ecl (not just simulated with iterate) ?
Doing a "real loop" is an inherently procedural task, and since ECL is a declarative, non-procedural language, the answer to that would be no. \\n\\nPROJECT, ITERATE, and all the rest of the functions that use TRANSFORMs are designed to handle all the "looping" type of situations. The LOOP and GRAPH functions aren't even real loops either, but more a form of recursion.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-07-06 21:15:17\" },\n\t{ \"post_id\": 1909, \"topic_id\": 408, \"forum_id\": 9, \"post_subject\": \"Re: Error messages meaning ?\", \"username\": \"ideal\", \"post_text\": \"After changing my code to avoid above issues, I come back now to the loop command, hoping it can work with this new release as it would be very convenient to have it. \\nApparently we still have the same problem : \\n\\nError: System error: 0: Graph[1], Thor currently, does not support a dataset loop condition, activity id: 7 (0, 0), 0, \\n
\\n\\nI checked the hpcc system version by looking on EclWatch and get this : \\nEnterprise Services Platform\\ncommunity_3.8.0-4rc \\n\\nAt first try, there was a compiler mismatch but still the same loop error after configuring the right compiler.\\n\\nThen, is it possible to do a real loop in ecl (not just simulated with iterate) ?\\nJM.\", \"post_time\": \"2012-07-06 15:00:03\" },\n\t{ \"post_id\": 1903, \"topic_id\": 408, \"forum_id\": 9, \"post_subject\": \"Re: Error messages meaning ?\", \"username\": \"ideal\", \"post_text\": \"Hello,\\n\\nIt solved the problem, at least on this sample above.\\n\\nThanks,\\nJM\", \"post_time\": \"2012-07-05 21:44:36\" },\n\t{ \"post_id\": 1890, \"topic_id\": 408, \"forum_id\": 9, \"post_subject\": \"Re: Error messages meaning ?\", \"username\": \"jsmith\", \"post_text\": \"[quote="ideal":1nx9xd82]\\nI think there is a bug with GRAPH nested in ITERATE, ON THOR, which is very annoying for me as I expected to run queries to filter complex data on THOR.\\nMy last try is on VM image but I have the same behaviour on a AWS cluster.\\n\\nDon't try to find a meaning in the code I provide, as I reduced it to emphasize my problem. I want this (GRAPH nested in ITERATE) to work unless you tell me it is impossible, but in this case, I don't see it in documentation.\\n\\n\\n[quote="ideal":1nx9xd82]\\nSimply, you will find commented in function f3 different alternatives : one works and others don't on THOR but all work on HTHOR.\\n\\n\\nHi,\\n\\nare you using build 3.6.2 ? If so, there was a bug affecting GRAPH and LOOP there, that caused some child queries to spuriously throw an error when they were run : "Global child graph? : Global acts = ..".\\nI've run your example and reproduced that bug in 3.6.2\\n\\nIn 3.8 those issues are fixed, are you able to try the latest 3.8 release candidate that's available on the portal?\", \"post_time\": \"2012-07-05 09:10:54\" },\n\t{ \"post_id\": 1888, \"topic_id\": 408, \"forum_id\": 9, \"post_subject\": \"Re: Error messages meaning ?\", \"username\": \"rtaylor\", \"post_text\": \"JM,\\nI am not sure to understand perfectly the exact difference between THOR and ROXIE. \\nI tested some simple code on a THOR AWS cluster and saw it is scalable, gaining performances by adding servers (this is was I am looking for). This is not the case with (multi-node) ROXIE, as only one server was loaded during query run. I did not found much information on the web and my current knowledge is that Roxie is just adding some indexes to accelerate queries on Thor (and may eventually deals with multi user connexions). But performance tests show me the contrary.\\nNow, can you confirm or invalidate that THOR is the only platform (vs Roxie) that allow scalability in a distributed multi server architecture ?
This white paper (http://cdn.hpccsystems.com/whitepapers/wp_introduction_HPCC.pdf) goes into great detail about the HPCC platform and the differences between Thor and Roxie, but here's the brief description:\\n\\nThor is a massively parallel cluster designed to do ETL (Extract, Transform, and Load) work -- the kind of data ingest and transformation jobs that every big data shop needs to do to take raw data and transform it into useful information that can either be sold to external customers or used by internal customers. Data is distributed across the nodes and the same "work" is done in parallel by each node on the data that that node is given to work on. IOW, we distribute the work by how we distribute the data. Thor is a "back office" tool (running just one job at a time) that builds data products from raw, un-indexed data, and creates whatever the "saleable" format needs to be (datasets and/or indexes).\\n\\nRoxie is a customer interface tool designed to serve up data requests to end-users, handling thousands of concurrent requests at a time. Unlike Thor, a single Roxie query is mostly (with few exceptions) handled by a single Roxie node, with support from whichever nodes in the cluster contains the data needed for each particular query. Most Roxie queries use indexes for fast performance. Roxie is a "front counter" tool that serves up information to customers.\\n\\nThor is used to create the data that Roxie will use. ETL and query development work are all done on Thor. A query (and its required data) is only published to Roxie when the final data/query combination is complete. Once published to Roxie, the query is available for end-users to use.\\n\\nSo, as an ECL developer, 99%+ of all your work is done on Thor -- Roxie receives the end result of your work and allows customers to reap the benefits of all your work.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-07-03 12:47:47\" },\n\t{ \"post_id\": 1887, \"topic_id\": 408, \"forum_id\": 9, \"post_subject\": \"Re: Error messages meaning ?\", \"username\": \"ideal\", \"post_text\": \"Richard,\\n\\nThank you for your answer. I've lost one week trying to fix this issue (no debugger and code pretty complex). Everything is pretty clear now and I re-made GRAPH actions in something else with, I must confess, some benefit to code optimization.\\n\\nAfter fixing GRAPH's code, I found also that : \\nError: System error: 0: Graph[310], Global DEDUP,ALL is not supported\\nError: System error: 0: Graph[21], Thor currently, does not support a dataset loop condition, activity id: 23\\nThe documentation doesn't say anything on this. I am trying now to find an alternative to LOOP at least. \\n\\nI am not sure to understand perfectly the exact difference between THOR and ROXIE. \\nI tested some simple code on a THOR AWS cluster and saw it is scalable, gaining performances by adding servers (this is was I am looking for). This is not the case with (multi-node) ROXIE, as only one server was loaded during query run. I did not found much information on the web and my current knowledge is that Roxie is just adding some indexes to accelerate queries on Thor (and may eventually deals with multi user connexions). But performance tests show me the contrary.\\nNow, can you confirm or invalidate that THOR is the only platform (vs Roxie) that allow scalability in a distributed multi server architecture ?\\n\\nThanks,\\nJM.\", \"post_time\": \"2012-07-02 22:19:55\" },\n\t{ \"post_id\": 1880, \"topic_id\": 408, \"forum_id\": 9, \"post_subject\": \"Re: Error messages meaning ?\", \"username\": \"rtaylor\", \"post_text\": \"JM,\\nSimply, you will find commented in function f3 different alternatives : one works and others don't on THOR but all work on HTHOR.
Before the Open Source releases, both the LOOP and GRAPH functions were documented to only operate on Roxie and hThor (not Thor). With the move to Open Source, both these functions are now meant to work on Thor too. Obviously, GRAPH is not. I will report the issue.\\nI would be very nice if you could help me on this. And if there is some fix available, how to install sources with the fix inside.
Help to/from the HPCC user community is precisely what this forum is about. Your previous questions have been answered, as will your future questions. At some point you will be well enough informed to begin helping others with their issues. That's what Community is all about.Debugger does not work in this case, I don't konw why.
The Debugger only works on hThor and Roxie, because these both have a single node that the code runs on. Thor, however, has code running on multiple nodes simultaneously, making debugging extremely problematic.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-07-02 13:34:05\" },\n\t{ \"post_id\": 1878, \"topic_id\": 408, \"forum_id\": 9, \"post_subject\": \"Re: Error messages meaning ?\", \"username\": \"ideal\", \"post_text\": \"Hello,\\n\\nI think there is a bug with GRAPH nested in ITERATE, ON THOR, which is very annoying for me as I expected to run queries to filter complex data on THOR.\\nMy last try is on VM image but I have the same behaviour on a AWS cluster.\\n\\nDon't try to find a meaning in the code I provide, as I reduced it to emphasize my problem. I want this (GRAPH nested in ITERATE) to work unless you tell me it is impossible, but in this case, I don't see it in documentation.\\n\\nHere is my code :\\n\\n\\n TypeNoeud := ENUM(SYMBOL,VAR,NOEUD,VIDE,LIEN); \\n\\n Noeud := RECORD\\n\\tINTEGER id;\\t\\t\\t\\n\\tINTEGER idmax;\\t\\n\\tINTEGER symbol;\\n\\tSTRING chaine; \\n\\tTypeNoeud type;\\n\\tINTEGER gauche;\\t\\n\\tINTEGER droite;\\t\\nEND;\\n\\n Btree := RECORD\\n\\tDATASET(Noeud) dn;\\nEND;\\n\\n StatutContrainte := ENUM(ACTIVE,PASSIVE,DELETED);\\n\\n Contrainte := RECORD\\n\\tINTEGER id;\\n\\tBtree b;\\n\\tStatutContrainte statut;\\nEND;\\n\\n TypeModele := ENUM(FIXE,REMOVABLE);\\n\\n Modele := RECORD\\n\\tINTEGER id;\\n\\tBtree b;\\n\\tTypeModele type;\\nEND;\\n\\n StatutVariable := ENUM(CONT,STOP,INUTILE,VIDER);\\n\\n Variable := RECORD\\n\\tINTEGER symbol;\\n\\tBtree val;\\n\\tStatutVariable statut ;\\n\\t\\nEND;\\n\\n StatutTuple := ENUM(NONVIDE,VIDE);\\n\\n TupleVariable := RECORD\\n\\tDATASET(Variable) dv;\\n\\tINTEGER p; \\n\\tINTEGER c; \\n\\tSET OF INTEGER sc; \\n\\tStatutTuple statut;\\n\\tTypeModele typeModele;\\n\\tDATASET({INTEGER id}) fusionContraintesSupprimes;\\nEND;\\n\\n\\n StatutChoixTuple := ENUM(NONVIDE,VIDE);\\n\\n ChoixTuplesVariables := RECORD\\n\\tDATASET(TupleVariable) dt;\\n\\tStatutChoixTuple statut;\\nEND;\\n\\n ListeContraintes := RECORD\\n\\tDATASET(Contrainte) lc;\\n\\tDATASET({INTEGER id}) fusionContraintesSupprimes;\\nEND;\\n\\n\\n\\n\\n\\n\\n\\nNoeud f9(Noeud n,INTEGER offset) := TRANSFORM\\n\\tSELF.id:=offset;\\n\\tSELF := n;\\nEND;\\n\\nDATASET(Noeud) f8(DATASET(Noeud) dn,INTEGER offset) := FUNCTION\\n\\tRETURN PROJECT(dn,f9(LEFT,offset));\\nEND;\\n\\nDATASET(Noeud) f7(DATASET(Noeud) gauche) := FUNCTION\\n\\toffset := COUNT(gauche); \\n\\tdroite2 := f8(gauche,offset);\\n\\tRETURN droite2;\\nEND;\\n\\nDATASET(Noeud) f6(DATASET(Noeud) modele_cur) := FUNCTION\\n\\tRETURN f7(modele_cur);\\nEND;\\n\\n\\nDATASET(Noeud) f5(SET OF DATASET(Noeud) modele,INTEGER d) := FUNCTION\\n\\tmodele_cur := modele[d-1];\\n\\tRETURN \\tf6(modele_cur);\\nEND;\\n\\n\\n\\nDATASET(Noeud) f4(Btree modele) := FUNCTION\\n\\tlisteNoeudsContrainte := GRAPH(modele.dn,COUNT(modele.dn),f5(ROWSET(LEFT),COUNTER));\\n\\tRETURN listeNoeudsContrainte;\\nEND;\\n\\n\\nVariable f3(Variable modele,Variable var,INTEGER c) := TRANSFORM\\n//\\tSELF.val.dn := f4(IF(TRUE,var.val,modele.val)); // this works with var.val !\\n//\\tSELF.val.dn := f4(IF(c=10000,var.val,modele.val)); // this bug because of modele.val !\\n\\tSELF.val.dn := f4(IF(c=1,var.val,modele.val)); // this bug because of counter and modele.val !?!\\n\\tSELF := var;\\nEND;\\n\\n\\n\\n\\n\\n\\n\\nlisteTuplesLc := DATASET([{DATASET([{2844991738,{DATASET([{1,1,703514648,'a',1,-1,-1}],Noeud)},1}],Variable),1,1,[],1,1,[]}],TupleVariable);\\nlisteChoixTuples := DATASET([{listeTuplesLc,1}],ChoixTuplesVariables);\\nlChoixTuples := listeChoixTuples[1];\\nTupleVariable tuple := lChoixTuples.dt[1]; \\n\\nDATASET(Variable) lVar := ITERATE(tuple.dv,f3(LEFT,RIGHT,COUNTER));\\n\\nOUTPUT(lVar);\\n
\\n\\n\\nSimply, you will find commented in function f3 different alternatives : one works and others don't on THOR but all work on HTHOR.\\n\\nI would be very nice if you could help me on this. And if there is some fix available, how to install sources with the fix inside.\\nAgain, "error manual" is cruelly missing here. Debugger does not work in this case, I don't konw why.\\n\\nThanks,\\nJM.\", \"post_time\": \"2012-07-02 10:32:19\" },\n\t{ \"post_id\": 1857, \"topic_id\": 408, \"forum_id\": 9, \"post_subject\": \"Re: Error messages meaning ?\", \"username\": \"rtaylor\", \"post_text\": \"JM,\\n\\nGlad you found a workaround for your issue.\\nI still don't understand the error message and why parallel filtering is not possible in this case.
Neither do I. The code snippet you included in your first post did not provide enough context to understand what you were trying to accomplish. For any future issues that come up, please try to include as much relevant code as possible so we can see the context. That would make it more likely we'll be able to see the problem and help you faster.\\n\\nRichard\", \"post_time\": \"2012-06-27 19:58:43\" },\n\t{ \"post_id\": 1850, \"topic_id\": 408, \"forum_id\": 9, \"post_subject\": \"Re: Error messages meaning ?\", \"username\": \"ideal\", \"post_text\": \"I've found a workaround better than the original.\\nPer chance, the resulting dataset is produced by ITERATE. I changed the way of doing it by creating a larger collector record of the result.\\nMy finding is by accumulating the final result inside the ITERATE operation, I don't need parallel tests on a unique dataset after which seems to be the cause of confusion. Anyway, this is not an explanation and I still don't understand the error message and why parallel filtering is not possible in this case.\\n\\nJM.\", \"post_time\": \"2012-06-27 16:58:03\" },\n\t{ \"post_id\": 1843, \"topic_id\": 408, \"forum_id\": 9, \"post_subject\": \"Error messages meaning ?\", \"username\": \"ideal\", \"post_text\": \"Hello,\\n\\nIs there some error messages manual in which would be explained for each error its meaning and some idea on how to handle with it ? \\n\\nI have some error in a THOR cluster (AWS platform with a 3 instances thor cluster): \\n\\nError: System error: -1: Graph[21], localresultwrite[29]: SLAVE 10.91.1.230:6600: Partial row written at end of file 23 of 24 (0, 0), -1, \\n\\nBy investigating my code and the graph, I can see it happens in this context, where I am looking for either one STOP record at least in the dataset to empty it, or only CONT(inue) records.\\n\\n\\tINTEGER ifstatut := IF(EXISTS(listeVariables(statut=StatutVariable.STOP)),StatutVariable.VIDER,StatutVariable.CONT);\\n\\tlisteVariables2 := listeVariables(statut=ifstatut);\\n
\\n\\nIt seems parallel filtering of listeVariables is clashing with error mentionned above because of an incomplete-row story but I don't understand much more about this.\\n\\nI am investigating on this error by trying something else but I would prefer to understand what HPCC is really doing to fail, when it does not happen on HTHOR, and more generally, I think it would be a good idea to have an error index to complete the documentation.\\n\\nCould you help me on this specific problem ?\\nThanks,\\nJM.\", \"post_time\": \"2012-06-26 21:27:48\" },\n\t{ \"post_id\": 1896, \"topic_id\": 411, \"forum_id\": 9, \"post_subject\": \"Re: Strange error in ECL IDE\", \"username\": \"jsmith\", \"post_text\": \"Thor runs daliadmin per startup, so it could well be that, perhaps other things do.\\nDaliadmin really shouldn't be creating a log per run ( see github issue https://github.com/hpcc-systems/HPCC-Platform/issues/2779 )\", \"post_time\": \"2012-07-05 15:22:29\" },\n\t{ \"post_id\": 1895, \"topic_id\": 411, \"forum_id\": 9, \"post_subject\": \"Re: Strange error in ECL IDE\", \"username\": \"jsmith\", \"post_text\": \"Glad your found the cause.\\nIt crossed my mind, whether # files per directory (another hard limit I think), or something similar.\\n\\ndaliadmin is a utility command, not a daemon, so I'm curious why there were 1000's of daliadmin log files still though...\", \"post_time\": \"2012-07-05 14:51:24\" },\n\t{ \"post_id\": 1894, \"topic_id\": 411, \"forum_id\": 9, \"post_subject\": \"Re: Strange error in ECL IDE\", \"username\": \"michael-mason\", \"post_text\": \"Hello,\\n\\nThe problem turned out to be that our system that runs HPCC ran out of inodes. Even though we had disk space left (as reported by df), we didn't have any remaining inodes left as we reached our system limit.\\n\\nThe daemon process daliadmin created over 1,000,000 files in /var/log/daliadmin, adn I believe that was the culprit. I deleted all of the old log files, and everything is working great now.\\n\\nThanks,\\n-Mike\", \"post_time\": \"2012-07-05 14:42:03\" },\n\t{ \"post_id\": 1883, \"topic_id\": 411, \"forum_id\": 9, \"post_subject\": \"Re: Strange error in ECL IDE\", \"username\": \"michael-mason\", \"post_text\": \"Hi,\\n\\nIt looks like the VM isn't allowing new files to be created on the system even though 'df' reports there is available space. I'm not a VM guy, but I'm guessing the host system isn't allowing the VM to expand its disk space usage or something and this is causing my issues.\\n\\nWhen I try to do something as simple as 'touch test.txt', it gives me the 'No space left on device' error. I have an email out to our sysadmin/VM guy, so hopefully he can resolve the issue. Thank you very much for your help! HPCC is a really cool system and I'm really enjoying working with it.\", \"post_time\": \"2012-07-02 16:51:25\" },\n\t{ \"post_id\": 1882, \"topic_id\": 411, \"forum_id\": 9, \"post_subject\": \"Re: Strange error in ECL IDE\", \"username\": \"jsmith\", \"post_text\": \"That error certainly suggests that disk was full at the point of failure.\\nPerhaps space cleared by the time the space was inspected?\\n\\nThe error is originating from Dali creating a very small file (8 bytes).\\nIs Dali also on the same machine/vm ? Is /var/lib/HPCCSystems/hpcc-data/dali/ on the same partition as the rest?\\n\\nHas the VM host machine/partition got enough space? I think it's possible to get a guest 'disk full' error if the VDI is dynamic and the host is short of space. i.e. at the point the VM tries to expand the virtual disk.\", \"post_time\": \"2012-07-02 16:11:33\" },\n\t{ \"post_id\": 1865, \"topic_id\": 411, \"forum_id\": 9, \"post_subject\": \"Strange error in ECL IDE\", \"username\": \"michael-mason\", \"post_text\": \"Hello,\\n\\nI'm going through the tutorial, and when I try to submit the sample code:\\n\\nIMPORT TutorialMM;\\nCOUNT(TutorialMM.File_OriginalPerson);\\n\\nI get the following error in the ECL Error window at the bottom:\\n\\nWARNING: ESP Exception - CSoapResponseBinding: 2012-06-29 10:56:45 GMT: SDS: Dirty client cache members used\\nSDS Reply Error : CFile::open /var/lib/HPCCSystems/hpcc-data/dali/delta.progress, Disk full (D:\\\\hpccsystems\\\\src\\\\eclide\\\\comms\\\\Dali.cpp, 1281) SoapUtil.h(419)\\n\\nWe have HPCC running on a VM (single node) and I checked and we seem to have several gigabytes of free space.\\n\\nCan anyone help? Thanks,\", \"post_time\": \"2012-06-29 15:18:49\" },\n\t{ \"post_id\": 1923, \"topic_id\": 422, \"forum_id\": 9, \"post_subject\": \"Re: IDE crashes\", \"username\": \"eric.scott\", \"post_text\": \"Thanks for your reply.\\n\\nI re-installed from the VM version (3.6.2.3).\\n\\nAnd followed your instructions. It no longer crashes.\\n\\nIt looks like at least part of the problem stemmed from the fact that on my (old) system 'C:\\\\Documents and Settings\\\\All Users\\\\Documents\\\\' is known as 'C:\\\\Documents and Settings\\\\All Users\\\\Shared Documents\\\\'. \\n\\nThe seem to be some funky aliasing problems going on, but I finally got around the problem by creating a 'HPCCDocuments' folder, specifying 'C:\\\\Documents and Settings\\\\All Users\\\\HPCCDocuments\\\\HPCC Systems\\\\ECL\\\\wu' as my working folder, with ecl folders to match.\\n\\nThanks for your help!\", \"post_time\": \"2012-07-09 17:53:27\" },\n\t{ \"post_id\": 1918, \"topic_id\": 422, \"forum_id\": 9, \"post_subject\": \"Re: IDE crashes\", \"username\": \"bforeman\", \"post_text\": \"Hi Eric,\\n\\nI've seen this happen on occasion when switching from a Legacy IDE to the OSS IDE on the same machine. Often just restarting the IDE will correct the issue. If that is not the case in your situation, when you first login and view your preferences make sure that the settings on the Compiler tab are also correct. Press the Reset button to reset back to the defaults, and then verify that your path to the Compiler, Working Folder, and ECL Folders are correct. \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-07-09 11:49:33\" },\n\t{ \"post_id\": 1915, \"topic_id\": 422, \"forum_id\": 9, \"post_subject\": \"IDE crashes\", \"username\": \"eric.scott\", \"post_text\": \"I've been using the legacy system and its associated IDE without a problem. The legacy system my team is using is being taken down to upgrade to the new version. Hoping to work on the VM version while the system was down, I uninstalled the old IDE, installed the latest VM demo version and re-installed the IDE from the resources on the associated ECL watch page. When I pointed the IDE at the proper IP, it crashed with this helpful message: 'A crash has been detected by BugTrap'.\\n\\nThinking this was a problem with the VM, uninstalled the IDE, started up an image on Amazon Web Services, re-installed the IDE from the ECL watch page provided with that image, and got the same result.\\n\\nI'm using a pretty old machine. Windows v. 5.1. With 3.5 gigs of available physical memory.\\n\\nAny help appreciated.\", \"post_time\": \"2012-07-07 14:06:17\" },\n\t{ \"post_id\": 1936, \"topic_id\": 426, \"forum_id\": 9, \"post_subject\": \"Re: Feature Request (is the correct place?)\", \"username\": \"bforeman\", \"post_text\": \"Hi Michael,\\n\\nThis works I will pass this on to the develoment team.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-07-11 12:15:55\" },\n\t{ \"post_id\": 1928, \"topic_id\": 426, \"forum_id\": 9, \"post_subject\": \"Feature Request (is the correct place?)\", \"username\": \"michael-mason\", \"post_text\": \"(I'm not sure if this the correct place to post this)\\n\\nIn the ECL IDE, it would be very helpful to allow (at least a subset of) Emacs key bindings.\\n\\nThanks,\", \"post_time\": \"2012-07-10 21:48:09\" },\n\t{ \"post_id\": 1993, \"topic_id\": 438, \"forum_id\": 9, \"post_subject\": \"Re: Sudden Compile/Link failure\", \"username\": \"eric.scott\", \"post_text\": \"I think I found the problem. I must inadvertently have reset the cluster to 'Local'. When I changed it to 'thor' everything worked fine.\\n\\nSorry for the confusion.\", \"post_time\": \"2012-07-17 23:03:45\" },\n\t{ \"post_id\": 1992, \"topic_id\": 438, \"forum_id\": 9, \"post_subject\": \"Sudden Compile/Link failure\", \"username\": \"eric.scott\", \"post_text\": \"I was able to use the IDE using the VM just fine until today when I started getting this error:\\n\\nThe code I'm executing at this point is just 'output(5);' in a builder.\\n\\n[color=#008000:3vh0lltg]Error: Compile/Link failed for C:\\\\HPCC\\\\ECLSource\\\\ECL\\\\wu\\\\L20120717-175737 (see '\\\\\\\\192.168.1.66\\\\c$\\\\HPCC\\\\ECLSource\\\\ECL\\\\wu\\\\eclcc.log' for details) (0, 0), 3000, C:\\\\HPCC\\\\ECLSource\\\\ECL\\\\wu\\\\L20120717-175737\\nI've included eclcc.log as an appendix to this message, also an excerpt from what I think are the pertinent parts of my configuration file, which have not changed since yesterday when things were peachy.\\n\\nThe eclcc.bin file is in the expected place, and 'check' executes fine.\\nCalling eclcc from a shell seems to work as well.\\n\\nRestarting the IDE, VM and my machine do not have an effect.\\n\\nThanks for any help.\\n\\n***APPENDIX *****\\n[color=#FF0000:3vh0lltg]----eclcc.log ----------\\n00000000 2012-07-17 17:57:37 3452 5376 Loading plugin C:\\\\Program Files\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_6\\\\plugins\\\\auditlib.dll[lib_auditlib] version = AUDITLIB 1.0.1\\n00000001 2012-07-17 17:57:37 3452 5376 Loading plugin C:\\\\Program Files\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_6\\\\plugins\\\\dmetaphone.dll[lib_metaphone] version = DMETAPHONE 1.1.05\\n00000002 2012-07-17 17:57:37 3452 5376 Loading plugin C:\\\\Program Files\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_6\\\\plugins\\\\fileservices.dll[lib_fileservices] version = FILESERVICES 2.1.3\\n00000003 2012-07-17 17:57:37 3452 5376 Loading plugin C:\\\\Program Files\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_6\\\\plugins\\\\logging.dll[lib_logging] version = LOGGING 1.0.1\\n00000004 2012-07-17 17:57:37 3452 5376 Loading plugin C:\\\\Program Files\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_6\\\\plugins\\\\parselib.dll[lib_parselib] version = PARSELIB 1.0.1\\n00000005 2012-07-17 17:57:37 3452 5376 Loading plugin C:\\\\Program Files\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_6\\\\plugins\\\\stringlib.dll[lib_stringlib] version = STRINGLIB 1.1.14\\n00000006 2012-07-17 17:57:37 3452 5376 Loading plugin C:\\\\Program Files\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_6\\\\plugins\\\\unicodelib.dll[lib_unicodelib] version = UNICODELIB 1.1.05\\n00000007 2012-07-17 17:57:37 3452 5376 Loading plugin C:\\\\Program Files\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_6\\\\plugins\\\\workunitservices.dll[lib_WORKUNITSERVICES] version = WORKUNITSERVICES 1.0.1\\n00000008 2012-07-17 17:57:38 3452 5376 Adding library: eclrtl\\n00000009 2012-07-17 17:57:38 3452 5376 Adding library: C:\\\\HPCC\\\\ECLSource\\\\ECL\\\\wu\\\\L20120717-175737.res\\n0000000A 2012-07-17 17:57:38 3452 5376 Compiling C:\\\\HPCC\\\\ECLSource\\\\ECL\\\\wu\\\\L20120717-175737\\n0000000B 2012-07-17 17:57:39 3452 5376 Failed to compile C:\\\\HPCC\\\\ECLSource\\\\ECL\\\\wu\\\\L20120717-175737\\n0000000C 2012-07-17 17:57:39 3452 5376 Fatal Error: Unable to locate C++ compiler/linker\\n0000000D 2012-07-17 17:57:39 3452 5376 \\n---------- compiler output --------------\\n'"vsvars32"' is not recognized as an internal or external command,\\n\\noperable program or batch file.\\n\\n'cl.exe' is not recognized as an internal or external command,\\n\\noperable program or batch file.\\n\\n--end eclcc.log -------------\\n[color=#0000BF:3vh0lltg]-- config file ----------------\\nConfig:\\n[Compiler]\\nLocation=C:\\\\Program Files\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_6\\\\eclcc.exe\\nEclWorkingFolder=C:\\\\HPCC\\\\ECLSource\\\\ECL\\\\wu\\nEclFolder00=C:\\\\HPCC\\\\ECLSource\\\\ECL\\\\My Files\\nEclFolder01=C:\\\\HPCC\\\\ECLSource\\\\ECL\\\\Samples\\n...\\nVersion=6.6.2.1001.682.1\\n...\\n--- end config file -----------\", \"post_time\": \"2012-07-17 22:22:11\" },\n\t{ \"post_id\": 2085, \"topic_id\": 447, \"forum_id\": 9, \"post_subject\": \"Re: Queries related to ECL scripts\", \"username\": \"prachi\", \"post_text\": \"
Thanks..\", \"post_time\": \"2012-07-26 06:49:35\" },\n\t{ \"post_id\": 2057, \"topic_id\": 447, \"forum_id\": 9, \"post_subject\": \"Re: Queries related to ECL scripts\", \"username\": \"bforeman\", \"post_text\": \"
1. What is the exact flow of compilation when an ECL query is submitted? Is it that first an .ecl file is converted into a .cpp file and then to a .so file or something else?\\n
\\n\\nYou are exactly right with the sequence!\\n\\n2. Other than the .ecl file, what all files are generated and stored on the client machine(where ECL IDE) is installed ?\\n
\\n\\nThere is a Workunit folder that you set up in the Compiler preferences that stores some backup files and occasionally an XML and LOG file.\\n\\n3. I'm unclear about the significance of the .so file - is it the final machine executable that actually runs on the clusters or some intermediate representation(like the Java bytecode in a .class file)\\n
\\n\\nYes, you are correct again in your assumption. \\n\\n\\n
\\n4. Following 3. - where do the final executable file(s) corresponding to an ECL query submitted via ECL IDE reside on the server?\\n
\\n\\nI did a quick search for the .SO files, look in the myeclccserver folder.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-07-24 15:41:45\" },\n\t{ \"post_id\": 2053, \"topic_id\": 447, \"forum_id\": 9, \"post_subject\": \"Re: Queries related to ECL scripts\", \"username\": \"prachi\", \"post_text\": \"As per the definition of ECL server :\\n\\nIncludes the ECL compiler and executable code generator, and functions as the job\\nserver for Thor job execution in the HPCC system environment. The ECL compiler\\ntranslates the source ECL statements into executable C++ code in the form of dynamic\\nlink libraries (DLLs) that can be executed on Thor or Roxie clusters.\\n\\n\\nI used the find command on the HPCC single-node machine at the root folder and found many .so(shared object) files under /lib/xtables, /opt/HPCCSystems/lib and so on.\\n\\nI have following queries :\\n\\n1. What is the exact flow of compilation when an ECL query is submitted? Is it that first an .ecl file is converted into a .cpp file and then to a .so file or something else?\\n\\n2. Other than the .ecl file, what all files are generated and stored on the client machine(where ECL IDE) is installed ?\\n\\n3. I'm unclear about the significance of the .so file - is it the final machine executable that actually runs on the clusters or some intermediate representation(like the Java bytecode in a .class file)\\n\\n4. Following 3. - where do the final executable file(s) corresponding to an ECL query submitted via ECL IDE reside on the server?\\n\\nThanks and regards !\", \"post_time\": \"2012-07-24 08:34:08\" },\n\t{ \"post_id\": 2052, \"topic_id\": 447, \"forum_id\": 9, \"post_subject\": \"Re: Queries related to ECL scripts\", \"username\": \"prachi\", \"post_text\": \"Thanks for reply..\\nI downloaded the file which is stored in server...the file type is 'SO file' . So is this the only cpp file which is generated after compiling? Will you plz help to find its content...\\nAlso what are the .bak files created in our workspace?\", \"post_time\": \"2012-07-24 05:42:59\" },\n\t{ \"post_id\": 2049, \"topic_id\": 447, \"forum_id\": 9, \"post_subject\": \"Re: Queries related to ECL scripts\", \"username\": \"bforeman\", \"post_text\": \"I forgot to mention, you can view the CPP files directly from ECL Watch in the Workunit Details - look for the Helper link near the bottom.\\n\\nBob\", \"post_time\": \"2012-07-23 16:30:24\" },\n\t{ \"post_id\": 2047, \"topic_id\": 447, \"forum_id\": 9, \"post_subject\": \"Re: Queries related to ECL scripts\", \"username\": \"bforeman\", \"post_text\": \"I believe that the CPP files are temporarily generated as they are then compiled to the shared object files. However, in the Debug option found in the More dialog of the ECL file, you can enter the following command to save the CPP files generated:\\n\\nSaveCPPTempFiles=1\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-07-23 15:14:15\" },\n\t{ \"post_id\": 2045, \"topic_id\": 447, \"forum_id\": 9, \"post_subject\": \"Queries related to ECL scripts\", \"username\": \"prachi\", \"post_text\": \"As we know, the ECL scripts we write ultimately are compiled in C++ code.\\nI have two queries :\\n1. When I do a check and/or a submit, what files are created(if any) on the local machine?\\n2. Where do the C++ files reside on the HPCC server i.e when u do a submit, in which directory are C++ files created on single hpcc node?\", \"post_time\": \"2012-07-23 11:58:49\" },\n\t{ \"post_id\": 2059, \"topic_id\": 452, \"forum_id\": 9, \"post_subject\": \"Re: Fault-Tolerance in HPCC\", \"username\": \"HPCC Staff\", \"post_text\": \"A response has been posted to this same question in the ECL forum:\\nviewtopic.php?t=451&p=2056#p2056\", \"post_time\": \"2012-07-24 15:54:54\" },\n\t{ \"post_id\": 2055, \"topic_id\": 452, \"forum_id\": 9, \"post_subject\": \"Fault-Tolerance in HPCC\", \"username\": \"Ankita Singla\", \"post_text\": \"hello\\nIts urgent...\\n\\nI am new to HPCC and not getting any idea abt\\nhow fault-tolerance is achieved especially for Thor cluster.\\nHow mirroring takes place?\\nwhat happens if a node goes down...\\nIs any manual intervention required?\\nDoes the fail-over happen automatically?\\n\\nplz Help....\", \"post_time\": \"2012-07-24 11:40:08\" },\n\t{ \"post_id\": 2359, \"topic_id\": 453, \"forum_id\": 9, \"post_subject\": \"Re: ECL mode for emacs\", \"username\": \"yil43\", \"post_text\": \"Just updated the attachment file to make the syntax highlighting case insensitive. Also you can add the -I option to invoke eclcc, e.g., \\n(list "eclcc" (list "-syntax" "-I.." local-file))\\nso that your import won't appear as errors in the syntax checking.\", \"post_time\": \"2012-09-15 18:12:40\" },\n\t{ \"post_id\": 2077, \"topic_id\": 453, \"forum_id\": 9, \"post_subject\": \"Re: ECL mode for emacs\", \"username\": \"HPCC Staff\", \"post_text\": \"Thank you for sharing this! We will certainly check it out.\", \"post_time\": \"2012-07-25 14:24:39\" },\n\t{ \"post_id\": 2064, \"topic_id\": 453, \"forum_id\": 9, \"post_subject\": \"ECL mode for emacs\", \"username\": \"yil43\", \"post_text\": \"Hi, just feeling a command line alternative of the IDE might be useful when the need arises to edit the ecl source in the terminal. So I configured this ecl mode for emacs. I am neither an expert in emacs nor ecl, just followed the on-line tutorials and the ECL documentation, so there are likely to problems. Please feel free to report bugs, make improvement, and share with others.\\n \\nTo enable syntax highlighting, please untar the attached file [attachment=0:34rihp7o]ecl-mode.el.tar.gz and put it in your emacs load path, and add the following statement in your .emacs file.\\n(require 'ecl-mode)
\\n\\nYou can also configure syntax checking with flymake and the eclcc program. To do that, please make sure the program eclcc (available once you’ve installed HPCC) is on your system path and flymake is present (shipped with recent versions of emacs), and then add the following lines in you .emacs file. \\n\\n\\n(require 'flymake)\\n(add-hook 'find-file-hook 'flymake-find-file-hook)\\n\\n(defun flymake-ecl-init ()\\n (let* ((temp-file (flymake-init-create-temp-buffer-copy\\n 'flymake-create-temp-inplace))\\n (local-file (file-relative-name\\n temp-file\\n (file-name-directory buffer-file-name))))\\n (list "eclcc" (list "-syntax" "-I.." local-file))))\\n\\n(setq flymake-allowed-file-name-masks\\n (cons '(".+\\\\\\\\.ecl$"\\n flymake-ecl-init\\n flymake-simple-cleanup\\n flymake-get-real-file-name)\\n flymake-allowed-file-name-masks))\\n\\n(setq flymake-err-line-patterns\\n (cons '("\\\\\\\\(.*\\\\\\\\)(\\\\\\\\([0-9]+\\\\\\\\),[0-9]+):.*:\\\\\\\\(.*$\\\\\\\\)"\\n 1 2 nil 3)\\n flymake-err-line-patterns))\\n
\\n\\nHope it helps. Thanks.\\nYing\", \"post_time\": \"2012-07-24 18:18:42\" },\n\t{ \"post_id\": 2177, \"topic_id\": 474, \"forum_id\": 9, \"post_subject\": \"Re: How to delete sprayed file from HPCC System\", \"username\": \"Ankita Singla\", \"post_text\": \"Thanks for the reply....\\nIt will help me.... \", \"post_time\": \"2012-08-08 11:46:48\" },\n\t{ \"post_id\": 2152, \"topic_id\": 474, \"forum_id\": 9, \"post_subject\": \"Re: How to delete sprayed file from HPCC System\", \"username\": \"rtaylor\", \"post_text\": \"Ankita,\\n\\nMy reply to your other post (http://hpccsystems.com/bb/viewtopic.php?f=8&t=478&sid=91a5b96a6eea55fbf263bf4f30a3b436) addresses how to manually delete files.\\n\\nThere is also a Standard Library function, STD.File.DeleteLogicalFile(), that allows you to delete files during a job.
the delete and despray options are not freeing up the space in the Thor cluster.
Despray is not meant to delete the source file, ever -- it is just a mechanism to get data out of the HPCC environment. But the Delete I described in my post referenced above does actually delete all the file parts from all the nodes, so my question is: how are you attempting to delete, and how are you determining that the space has not been freed up?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-08-03 13:47:04\" },\n\t{ \"post_id\": 2142, \"topic_id\": 474, \"forum_id\": 9, \"post_subject\": \"How to delete sprayed file from HPCC System\", \"username\": \"Ankita Singla\", \"post_text\": \"Hi...\\n\\nhow to 'delete' a sprayed file from the HPCC system as the delete and despray options\\nare not freeing up the space in the Thor cluster.The files are still persist in parts..\\n\\nPlzz help..\", \"post_time\": \"2012-08-03 06:30:20\" },\n\t{ \"post_id\": 2166, \"topic_id\": 481, \"forum_id\": 9, \"post_subject\": \"Re: Strange error in ECL IDE\", \"username\": \"divyanshu\", \"post_text\": \"Thanks a lot!! Working fine now. \", \"post_time\": \"2012-08-07 06:44:23\" },\n\t{ \"post_id\": 2161, \"topic_id\": 481, \"forum_id\": 9, \"post_subject\": \"Re: Strange error in ECL IDE\", \"username\": \"gsmith\", \"post_text\": \"The strange error is a direct result of the invalid compiler options.\\n\\nStart the IDE and on the login window press "preferences". Select the compiler tab and press the "Reset" button.\\n\\nIf the Compiler/Working folder/ECL Folders don’t auto fill, then it sounds like the environment variables weren’t set during the install process. Try re-installing and rebooting prior to executing the IDE.\\n\\nFYI the environment folders should look something like:\\nHPCCBIN=C:\\\\Program Files (x86)\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_6\\\\\\nHPCCECL=C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\\", \"post_time\": \"2012-08-06 14:36:30\" },\n\t{ \"post_id\": 2160, \"topic_id\": 481, \"forum_id\": 9, \"post_subject\": \"Strange error in ECL IDE\", \"username\": \"divyanshu\", \"post_text\": \"Hi,\\n\\nI get this error while running an output('hello world) in my ECL IDE\\n\\nWARNING: ESP Exception - CSoapResponseBinding: 2012-08-06 12:20:34 GMT: Cannot open workunit L20120806-174732. (D:\\\\hpccsystems\\\\src\\\\eclide\\\\comms\\\\Dali.cpp, 1014) SoapUtil.h(419). \\n\\nAlso: while i start my ECL IDE i get a pop up saying "compiler options specified are invalid". clicking ok on the pop up just opens the ECL IDE. \\n\\nDont have any clue on how to solve/debug it? Any kind of help is appreciated. \\n\\nThanks\\nDivyanshu\", \"post_time\": \"2012-08-06 12:33:05\" },\n\t{ \"post_id\": 2555, \"topic_id\": 559, \"forum_id\": 9, \"post_subject\": \"Re: Superkey and superfile\", \"username\": \"bforeman\", \"post_text\": \"There is an open thread on this topic already, have a look here:\\n\\nhttp://hpccsystems.com/bb/viewtopic.php?f=8&t=550&sid=4ad83782f62cce00cee6caa1e067140d\", \"post_time\": \"2012-10-19 12:05:12\" },\n\t{ \"post_id\": 2551, \"topic_id\": 559, \"forum_id\": 9, \"post_subject\": \"Superkey and superfile\", \"username\": \"wgsh\", \"post_text\": \"As document says,superfile can dynamic add subfiles . But after publish a query on roxie to use superkey,superfile in thor became static and add subfile report lock failed on 3.8.4. Is it the normal work mode or i miss something ?\\nIn my test case, inserting data need continue as roxie query service online .\\nAppreciate for your help.\\n\\nThanks!\", \"post_time\": \"2012-10-19 09:38:40\" },\n\t{ \"post_id\": 2697, \"topic_id\": 583, \"forum_id\": 9, \"post_subject\": \"Re: How does Schedule in ECL IDE works\", \"username\": \"ghalliday\", \"post_text\": \"That error seems to suggest that the workunit wasn't compiled properly.\\nI remember fixing a similar issue, but I'm not sure if that was since 3.8.2.\\n\\nI'll add a issue to Jira (https://track.hpccsystems.com/browse/HPCC-8206) so it can be tracked.\", \"post_time\": \"2012-11-05 15:29:51\" },\n\t{ \"post_id\": 2691, \"topic_id\": 583, \"forum_id\": 9, \"post_subject\": \"Re: How does Schedule in ECL IDE works\", \"username\": \"Apurv.Khare\", \"post_text\": \"Hi Jim,\\n\\nWe have updated our servers long back, we are using Community Edition 3.8.2 \\nbut we are scheduling our jobs now, we didn't explored this feature in earlier version.\\n\\nCan you see this issue.\", \"post_time\": \"2012-11-05 04:47:53\" },\n\t{ \"post_id\": 2670, \"topic_id\": 583, \"forum_id\": 9, \"post_subject\": \"Re: How does Schedule in ECL IDE works\", \"username\": \"JimD\", \"post_text\": \"[quote="Apurv.Khare":1hrckzsh]Hi,\\nWe are using HPCC Community Edition.\\nWe are trying to use the schedule feature of ECL IDE, but as we schedule any workunit it doesn't get executed and shows this error:\\n\\nError: System error: 0: Workunit was compiled for eclagent interface version 0, this eclagent requires version 138..139 (0, 0), 0, \\n\\nCan you show us some insight on this issue, are we missing any Pre-requisite??\\nAnd do difference in time on server and our local machine can be a cause?\\n\\nIt appears that this query was compiled using an older version of the compiler and the ECL Agent server was subsequently updated. \\n\\nWhen you update your server(s), scheduled queries may need to be recompiled and rescheduled.\", \"post_time\": \"2012-10-31 14:51:08\" },\n\t{ \"post_id\": 2665, \"topic_id\": 583, \"forum_id\": 9, \"post_subject\": \"How does Schedule in ECL IDE works\", \"username\": \"Apurv.Khare\", \"post_text\": \"Hi,\\nWe are using HPCC Community Edition.\\nWe are trying to use the schedule feature of ECL IDE, but as we schedule any workunit it doesn't get executed and shows this error:\\n\\nError: System error: 0: Workunit was compiled for eclagent interface version 0, this eclagent requires version 138..139 (0, 0), 0, \\n\\nCan you show us some insight on this issue, are we missing any Pre-requisite??\\nAnd do difference in time on server and our local machine can be a cause?\", \"post_time\": \"2012-10-31 06:54:12\" },\n\t{ \"post_id\": 2836, \"topic_id\": 621, \"forum_id\": 9, \"post_subject\": \"Re: IDE Preferences\", \"username\": \"gsmith\", \"post_text\": \"That fix ended up in release 6.8.6.1001, you can grab that from our download section...\", \"post_time\": \"2012-11-21 20:33:26\" },\n\t{ \"post_id\": 2835, \"topic_id\": 621, \"forum_id\": 9, \"post_subject\": \"Re: IDE Preferences\", \"username\": \"tdelbecque\", \"post_text\": \"Sounds like related indeed.\\n\\nMy version is 6.8.0.7.682.1, under windows.\", \"post_time\": \"2012-11-21 15:24:13\" },\n\t{ \"post_id\": 2833, \"topic_id\": 621, \"forum_id\": 9, \"post_subject\": \"Re: IDE Preferences\", \"username\": \"gsmith\", \"post_text\": \"This issue sounds a little bit like: https://track.hpccsystems.com/browse/IDE-138\\n\\nWhat version IDE are you using?\\n\\nAlso can try logging out and then re-select the desired configuration and login again?\", \"post_time\": \"2012-11-21 14:31:01\" },\n\t{ \"post_id\": 2832, \"topic_id\": 621, \"forum_id\": 9, \"post_subject\": \"IDE Preferences\", \"username\": \"tdelbecque\", \"post_text\": \"Hello,\\n\\nI changed a parameter in the IDE preferences (the TAB as white space, but I think this info is not relevant), and when I saved the setting and got back to the editors, the 'target' combo was positioned on 'Local', which was not the setting before I entered the preferences dialog (it was thor).\\n\\nIt took some minutes for me to realize this fact. Also, coming back from the preference dialog makes all the WU links at the bottom of the editor to be lost. I did no succeed in getting back these links.\\n\\nIs it normal that the setting of the 'target' combo get changed after one modifies a preference option ?\\n\\nthanks, Thierry.\", \"post_time\": \"2012-11-21 11:11:51\" },\n\t{ \"post_id\": 3967, \"topic_id\": 635, \"forum_id\": 9, \"post_subject\": \"Re: Weird Error in ECL IDE\", \"username\": \"karthikreddy\", \"post_text\": \"move your Target cluster from Local to THOR................\\n\\n\\nkarthik reddy\", \"post_time\": \"2013-04-24 20:30:00\" },\n\t{ \"post_id\": 2886, \"topic_id\": 635, \"forum_id\": 9, \"post_subject\": \"Re: Weird Error in ECL IDE\", \"username\": \"gsmith\", \"post_text\": \"[quote="Leofei":1po3xfx9]It seems like it is looking for a file that does not exist. This behavior is very strange because the ECL IDE had been working fine all week last week and my environment has not changed. Could it be that this temp file is not being created?\\n\\nFYI The tmp file is only created when you have an ECL window which you have not saved yet. Had you of saved it with a file name that file would have been used (the tmp file lasts just long enough to submit).\", \"post_time\": \"2012-11-26 21:08:32\" },\n\t{ \"post_id\": 2882, \"topic_id\": 635, \"forum_id\": 9, \"post_subject\": \"Re: Weird Error in ECL IDE\", \"username\": \"Leofei\", \"post_text\": \"
Thx, it works now
\", \"post_time\": \"2012-11-26 19:28:09\" },\n\t{ \"post_id\": 2881, \"topic_id\": 635, \"forum_id\": 9, \"post_subject\": \"Re: Weird Error in ECL IDE\", \"username\": \"Leofei\", \"post_text\": \""C:\\\\Program Files (x86)\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_6\\\\eclcc.exe" -q -fapplyInstantEclTransformations=1 -fapplyInstantEclTransformationsLimit=100 -o"L20121126-140504" -P"C:\\\\Users\\\\lfei\\\\Desktop\\\\workunits" -I"C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files" -I"C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\wu" -I"C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\Training" "C:\\\\Users\\\\lfei\\\\AppData\\\\Local\\\\Temp\\\\TFR77AB.tmp"\\n\\nIt seems like it is looking for a file that does not exist. This behavior is very strange because the ECL IDE had been working fine all week last week and my environment has not changed. Could it be that this temp file is not being created?\", \"post_time\": \"2012-11-26 19:06:46\" },\n\t{ \"post_id\": 2879, \"topic_id\": 635, \"forum_id\": 9, \"post_subject\": \"Re: Weird Error in ECL IDE\", \"username\": \"gsmith\", \"post_text\": \"This really does look like a compiler preferences issue.\\n\\nAs a sanity check try this:\\n1. Hold down the "shift" key when you press the submit button.\\n2. In the "Error Log" window you will see the actual command sent to the local eclcc compiler. On my machine they look something like this:\\n\\n\\ncd C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\wu\\n\\n"C:\\\\Program Files (x86)\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_6\\\\eclcc.exe" -E -P"C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\wu" -I"C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files" -I"C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\Samples" "C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\ECLTest\\\\Hello.ecl"\\n
\\n\\nCopy and paste those two lines into a command prompt and see what happens - this is what I got on my machine:\\n\\nunknown(0,0): warning C0010: Explicit source file C:\\\\Program Files (x86)\\\\HPCC Systems\\\\HPCC\\\\bin\\\\plugins not found\\nunknown(0,0): warning C0010: Explicit source file C:\\\\Program Files (x86)\\\\HPCC Systems\\\\HPCC\\\\bin\\\\share/ecllibrary/ not found\\n0 error, 2 warnings\\n<Archive build="community_3.10.0-5rc" eclVersion="3.6.1" legacyMode="0">\\n <Query attributePath="ECLTest.Hello"/>\\n <Module key="ecltest" name="ecltest">\\n <Attribute key="hello" name="hello" sourcePath="C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\ECLTest\\\\Hello.ecl">\\n 'Hello'\\n </Attribute>\\n </Module>\\n</Archive>\\n
\\n\\n(You can ignore the warnings as I have a broken dev setup)\\n\\nGordon.\", \"post_time\": \"2012-11-26 17:59:50\" },\n\t{ \"post_id\": 2876, \"topic_id\": 635, \"forum_id\": 9, \"post_subject\": \"Weird Error in ECL IDE\", \"username\": \"Leofei\", \"post_text\": \"Hi, all,\\n\\nI got a really weird error when it's running any script, for example:\\noutput('hello');\\n\\nIt shows:\\nWARNING: ESP Exception - CSoapResponseBinding: 2012-11-26 17:13:41 GMT: Cannot open workunit L20121126-121405. (D:\\\\hpccsystems\\\\src\\\\eclide\\\\comms\\\\Dali.cpp, 1014) SoapUtil.h(419)\\n\\nThere is a post talking about a similar problem, but the solution doesn't apply for me. I checked the "preferences" and the compiler tab, they are right, and I reinstalled the IDE. But it's still not working.\\n\\nAny help is appreciated!\\n\\nLeo\", \"post_time\": \"2012-11-26 17:23:31\" },\n\t{ \"post_id\": 3054, \"topic_id\": 667, \"forum_id\": 9, \"post_subject\": \"Re: Code versioning via command line\", \"username\": \"gsmith\", \"post_text\": \"[quote="tdelbecque":myy7sajm]Thanks for these points. I will look at the fourth.\\n\\nIf you are going down that route (and if you happen to be an elipse/java person), you may want to look at: https://github.com/hpcc-systems/EclipsePlugin\\n\\nIt deliberately does not officially support the remote repository (but does include the SOAP stubs for it and also contains a very simple "Import From Repository" Wizard).\", \"post_time\": \"2013-01-04 11:28:26\" },\n\t{ \"post_id\": 3053, \"topic_id\": 667, \"forum_id\": 9, \"post_subject\": \"Re: Code versioning via command line\", \"username\": \"tdelbecque\", \"post_text\": \"Thanks for these points. I will look at the fourth. I tried ECLIDE with wine, but the experience was bad, and I gave up.\\n\\nThierry.\", \"post_time\": \"2013-01-04 10:42:03\" },\n\t{ \"post_id\": 3052, \"topic_id\": 667, \"forum_id\": 9, \"post_subject\": \"Re: Code versioning via command line\", \"username\": \"gsmith\", \"post_text\": \"Ok, just to clarify (more for others who may be reading this):\\nYou are using a version of HPCC-Platform that still uses the integrated remote repository (which is not available in the OSS version, as it uses local ECL files and traditional source control systems).\\n\\n1. There used to be a command line tool (similar to eclplus) as part of the platform build which allowed import/export of attributes (but I am not sure if it supported the version feature).\\n2. You could run ECLIDE under wine (see viewtopic.php?f=9&t=81 make sure you read page 2 as well).\\n3. You could use AMT to import/export attributes (but again it is a windows application). \\n4. You can roll your own tools using the SOAP interface (Look at http://IP_OF_YOUR_ESP_SERVER:8145/WsAttributes/)\", \"post_time\": \"2013-01-04 10:20:56\" },\n\t{ \"post_id\": 3051, \"topic_id\": 667, \"forum_id\": 9, \"post_subject\": \"Re: Code versioning via command line\", \"username\": \"tdelbecque\", \"post_text\": \"My daily working environment is unix and I submit my queries to my Thor cluster via eclplus. \\n\\nNow I need to integrate my development in a repository. I do that with ECLIDE, but then keeping my sources synchronized between my windows machine and my unix environment is not so seamless. This is why I have this question about code versioning with the command line, so that I would be able to use only the one working env.\\n\\nthanks, \\n\\nThierry.\", \"post_time\": \"2013-01-04 09:53:19\" },\n\t{ \"post_id\": 3050, \"topic_id\": 667, \"forum_id\": 9, \"post_subject\": \"Re: Code versioning via command line\", \"username\": \"gsmith\", \"post_text\": \"What tools do you currently use to edit ECL within the repository?\", \"post_time\": \"2013-01-04 09:44:14\" },\n\t{ \"post_id\": 3049, \"topic_id\": 667, \"forum_id\": 9, \"post_subject\": \"Code versioning via command line\", \"username\": \"tdelbecque\", \"post_text\": \"Hello,\\n\\nECL IDE allows for code versioning of queries in the repository, which is nice when working with MS Windows. Is it possible to get those same code versioning functionalities with command line tools (for those not working mainly on Windows) ?\\n\\nThanks, Thierry.\", \"post_time\": \"2013-01-04 08:56:42\" },\n\t{ \"post_id\": 3077, \"topic_id\": 669, \"forum_id\": 9, \"post_subject\": \"Re: The repository is locked for hpccdemo user\", \"username\": \"hungifi\", \"post_text\": \"That works fine, thanks for your help\", \"post_time\": \"2013-01-09 16:06:29\" },\n\t{ \"post_id\": 3074, \"topic_id\": 669, \"forum_id\": 9, \"post_subject\": \"Re: The repository is locked for hpccdemo user\", \"username\": \"gsmith\", \"post_text\": \"When I said "Make a Note" I meant the type you make with a pen and paper<g>.\\n\\nIs your ECL Folders list empty? If so can you press the "Reset" button and see if they get populated?\\n\\nIf they are not empty, "Look" at the folders they refer to and see if you have access to them in windows explorer.\\n\\nI have attached a screen shot of how a typical system looks.\", \"post_time\": \"2013-01-08 18:28:03\" },\n\t{ \"post_id\": 3070, \"topic_id\": 669, \"forum_id\": 9, \"post_subject\": \"Re: The repository is locked for hpccdemo user\", \"username\": \"hungifi\", \"post_text\": \"Thanks for your reply, I tried to make a note inside ECL Folder but that is disable (as show in the Attachment).\\n\\nI'am under W8 professional and I will try with the W7...\", \"post_time\": \"2013-01-08 14:22:08\" },\n\t{ \"post_id\": 3069, \"topic_id\": 669, \"forum_id\": 9, \"post_subject\": \"Re: The repository is locked for hpccdemo user\", \"username\": \"gsmith\", \"post_text\": \"From the Login window, press "Preferences..."\\nSelect the "Compiler" tab.\\nMake a note of the "ECL Folders" and try opening them in windows explorer and see if you can create and edit files there.\\n\\nIf these folders are read only, just copy the contents to somewhere that you have full access to (like your documents folder or desktop) and update the ECL Folders to include them from there.\", \"post_time\": \"2013-01-08 13:55:03\" },\n\t{ \"post_id\": 3067, \"topic_id\": 669, \"forum_id\": 9, \"post_subject\": \"Re: The repository is locked for hpccdemo user\", \"username\": \"bforeman\", \"post_text\": \"I’m guessing you are using Windows 7 and it might be a rights issue on your repository folder.\", \"post_time\": \"2013-01-08 12:45:28\" },\n\t{ \"post_id\": 3062, \"topic_id\": 669, \"forum_id\": 9, \"post_subject\": \"The repository is locked for hpccdemo user\", \"username\": \"hungifi\", \"post_text\": \"Hello everybody,\\n\\nI'am newbie in ECL/HPCC and I wish receive your help, please.\\n\\nAfter spraying the "OriginalPerson" data, i try to start with code in ECL but I can not "Insert Folder" because the Repository is locked for hpccdemo user. How can I solve that?\\n\\nThanks\", \"post_time\": \"2013-01-07 16:20:49\" },\n\t{ \"post_id\": 7671, \"topic_id\": 673, \"forum_id\": 9, \"post_subject\": \"Re: How to add a library in an integrated remote repository\", \"username\": \"bforeman\", \"post_text\": \"It's the Migrate option that moves the files. The fact that it looks disabled in your image suggests that you have not selected your target yet, or you may not have write access to the target you selected.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-05-27 16:47:55\" },\n\t{ \"post_id\": 7670, \"topic_id\": 673, \"forum_id\": 9, \"post_subject\": \"Re: How to add a library in an integrated remote repository\", \"username\": \"iMikePayne\", \"post_text\": \"Hi,\\n\\nI am also trying to move the ML library to the repository. I am in the AMT program and can't tell what I am supposed to click to actually move the files over. Other than select/clear/expand all, no other button does anything.\", \"post_time\": \"2015-05-27 16:35:56\" },\n\t{ \"post_id\": 3086, \"topic_id\": 673, \"forum_id\": 9, \"post_subject\": \"Re: How to add a library in an integrated remote repository\", \"username\": \"tdelbecque\", \"post_text\": \"well, I am not migrating, indeed ...\", \"post_time\": \"2013-01-10 15:20:27\" },\n\t{ \"post_id\": 3085, \"topic_id\": 673, \"forum_id\": 9, \"post_subject\": \"Re: How to add a library in an integrated remote repository\", \"username\": \"gsmith\", \"post_text\": \"The reason its "hidden" is that in the OSS world it doesn't really serve a purpose. But it is included for folks who are migrating _from_ legacy to OSS (but you appear to be going the other way, hmmmm.)\", \"post_time\": \"2013-01-10 15:16:30\" },\n\t{ \"post_id\": 3084, \"topic_id\": 673, \"forum_id\": 9, \"post_subject\": \"Re: How to add a library in an integrated remote repository\", \"username\": \"tdelbecque\", \"post_text\": \"This AMT tool is really useful; imho it would deserve some words in the "The ECL IDE and HPCC Client Tools" manual, even if using it is really obvious.\\n\\nBest regards, Thierry.\", \"post_time\": \"2013-01-10 15:08:34\" },\n\t{ \"post_id\": 3083, \"topic_id\": 673, \"forum_id\": 9, \"post_subject\": \"Re: How to add a library in an integrated remote repository\", \"username\": \"tdelbecque\", \"post_text\": \"I did not know these options, AMT in particular. Thanks for the advices, I am going to look at that.\\n\\nBest regards, Thierry.\", \"post_time\": \"2013-01-10 13:48:50\" },\n\t{ \"post_id\": 3082, \"topic_id\": 673, \"forum_id\": 9, \"post_subject\": \"Re: How to add a library in an integrated remote repository\", \"username\": \"bforeman\", \"post_text\": \"Thanks Gordon, I didn't realize that was still there! \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-01-10 13:47:37\" },\n\t{ \"post_id\": 3081, \"topic_id\": 673, \"forum_id\": 9, \"post_subject\": \"Re: How to add a library in an integrated remote repository\", \"username\": \"gsmith\", \"post_text\": \"If you look in the ECLIDE folder (C:\\\\Program Files (x86)\\\\HPCC Systems\\\\HPCC\\\\ECL IDE) you will see an application called AMT.\\n\\nIt can assist with the moving of a local folder to a remote (integrated) repository.\\n\\nYou will need to check the various IMPORT statements once done I suspect.\\n\\nGordon.\", \"post_time\": \"2013-01-10 13:43:30\" },\n\t{ \"post_id\": 3080, \"topic_id\": 673, \"forum_id\": 9, \"post_subject\": \"Re: How to add a library in an integrated remote repository\", \"username\": \"bforeman\", \"post_text\": \"I would simply add a new folder to the integrated remote repository, and then manually copy the ML libraries from your local machine to that new folder. Or, if you have the local folder added to your preferences (see Compiler tab), then use the ECL IDE copy capability to copy the ECL from the local folder to your new one. \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-01-10 13:38:46\" },\n\t{ \"post_id\": 3079, \"topic_id\": 673, \"forum_id\": 9, \"post_subject\": \"How to add a library in an integrated remote repository\", \"username\": \"tdelbecque\", \"post_text\": \"Hello,\\n\\nI need to put the ML librairy on our integrated remote repository. Is there an obvious way of doing that with ECLIDE ? Sorry for this maybe stupid question, I did not find it in the GUI (other than the brute manual commit of each files, which is rather clumsy ...) \\n\\nThanks, Thierry.\", \"post_time\": \"2013-01-10 12:22:44\" },\n\t{ \"post_id\": 3110, \"topic_id\": 680, \"forum_id\": 9, \"post_subject\": \"Re: Not able to check in the file\", \"username\": \"gsmith\", \"post_text\": \"Can you look in the "error window" to see if there is any additional information displayed?\\nYou may need to talk with Ops to get someone to check your WsAttibutes ESP service log to see what is happening.\", \"post_time\": \"2013-01-16 11:51:48\" },\n\t{ \"post_id\": 3107, \"topic_id\": 680, \"forum_id\": 9, \"post_subject\": \"Not able to check in the file\", \"username\": \"Srirama\", \"post_text\": \"I'm having a file with 4100 lines of code, for some reason I'm not able to check in this file.\\n\\nI tried to rename the existing file and inserting a new file and copying the content to it. I tried it in all the possible ways which I know. \\n\\nWhen I try to save the file it shows that it is trying to save but nothing is saved at the end. When I close to close the window it asks to save the file even If i click save it doesn't save. Even I can't close the file.\\n\\nI closed ECL Ide and re-opened even after I see the same behaviour. I tried the same scenario after rebooting the machine but no luck.\\n\\n\\nCan some help me in providing the solution for it?.\", \"post_time\": \"2013-01-15 15:03:29\" },\n\t{ \"post_id\": 3131, \"topic_id\": 688, \"forum_id\": 9, \"post_subject\": \"Re: Visualization Tools\", \"username\": \"Neha Singh\", \"post_text\": \"What all visualization tools can be integrated with HPCC? For eg.tableau\", \"post_time\": \"2013-01-21 11:44:02\" },\n\t{ \"post_id\": 3130, \"topic_id\": 688, \"forum_id\": 9, \"post_subject\": \"Re: Visualization Tools\", \"username\": \"gsmith\", \"post_text\": \"There are various different techniques, (and probably more than I list here):\\n1. Take a look at https://github.com/hpcc-systems/ecl-samples: It includes a vizualisation section.\\n2. You can download result sets as excel spreadsheets and use it to display charts.\\n3. Publish your queries to a Roxie and use SOAP calls to build an interactive visualzation.\", \"post_time\": \"2013-01-21 10:03:27\" },\n\t{ \"post_id\": 3129, \"topic_id\": 688, \"forum_id\": 9, \"post_subject\": \"Visualization Tools\", \"username\": \"Neha Singh\", \"post_text\": \"What are the different tools that support visualization in HPCC?\", \"post_time\": \"2013-01-21 09:37:54\" },\n\t{ \"post_id\": 11753, \"topic_id\": 715, \"forum_id\": 9, \"post_subject\": \"Re: Why workunit blocked\", \"username\": \"vin\", \"post_text\": \"My cluster has been blocked for over an hour. Is there a way to tell the cluster to reset now? (cluster version 6.0.2-1)\", \"post_time\": \"2016-10-04 15:38:53\" },\n\t{ \"post_id\": 3218, \"topic_id\": 715, \"forum_id\": 9, \"post_subject\": \"Re: Why workunit blocked\", \"username\": \"rtaylor\", \"post_text\": \"JM,\\n\\nDepending on how the previous job failed, HPCC may need to reset itself. Therefore, while it is resetting all other jobs in the queue are "blocked." They should automatically unblock as soon as the reset is complete and HPCC is ready to go again.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-01-28 20:36:50\" },\n\t{ \"post_id\": 3214, \"topic_id\": 715, \"forum_id\": 9, \"post_subject\": \"Why workunit blocked\", \"username\": \"ideal\", \"post_text\": \"Hello,\\n\\nWhy workunit is blocked when previous one has failed ?\\nIt can work but then, why there is a delay before it is possible to run a new job after failure ?\\n\\nThanks,\\nJM\", \"post_time\": \"2013-01-28 16:27:13\" },\n\t{ \"post_id\": 3249, \"topic_id\": 720, \"forum_id\": 9, \"post_subject\": \"Re: Landing Zone File: Read and Write\", \"username\": \"prachi\", \"post_text\": \"Hello,\\nI have followed this syntax:\\nMyFile :=DATASET('~file::10.150.50.12::c$::training::import::myfile',RecStruct,FLAT);\\n\\nso instead of c$::training::import::myfile i wrote var::lib::HPCCSystems::dropzone::adcampaign_twitter_data.csv where adcampaign_twitter_data.csv is my file name uploaded via ECLWatch and now present on dropzone. And also this file is not sprayed yet.\\nCan you please guide me where i am going wrong in code??\\n\\nThanks and Regards!!\", \"post_time\": \"2013-01-30 16:37:37\" },\n\t{ \"post_id\": 3242, \"topic_id\": 720, \"forum_id\": 9, \"post_subject\": \"Re: Landing Zone File: Read and Write\", \"username\": \"DSC\", \"post_text\": \"Using FLAT as an option to DATASET means that you're looking for an HPCC logical file (i.e. something already sprayed into the cluster). I doubt you'll have something like that already sitting around on an external file system.\\n\\nUse the CSV or XML option instead. Also, you may want to check out the standard library's File.ExternalLogicalFileName() function. That allows you to define a path to the file in a more readable manner. That function's result would become the first argument to the DATASET() definition.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-01-30 12:59:15\" },\n\t{ \"post_id\": 3230, \"topic_id\": 720, \"forum_id\": 9, \"post_subject\": \"Landing Zone File: Read and Write\", \"username\": \"prachi\", \"post_text\": \"Hello\\nPlease find our problem http://hpccsystems.com/bb/viewtopic.php?f=34&t=719&sid=83432948e6bbd62fa32153026f090acc \\n\\nTo avoid spraying but to add file to HPCC DFS, we are doing the following : \\n\\nIn ECLLanguageReference pdf file, under Landing Zone Files, it says syntax should be:\\nThe syntax looks like this:\\n‘~file::<LZ-ip>::<path>::<filename>’\\nMyFile :=DATASET('~file::10.150.50.12::c$::training::import::myfile',RecStruct,FLAT);\\n\\n
\\nLayout_adcampaign_twitter_data := RECORD\\nVARSTRING User_ID;\\nVARSTRING Search_Keyword;\\nVARSTRING Tweet;\\nVARSTRING Sentiment;\\nVARSTRING date;\\nVARSTRING SearchID;\\nVARSTRING Domain;\\nVARSTRING TweetID;\\nVARSTRING Checked;\\nVARSTRING TwitterUserName;\\nVARSTRING No_Of_Followers;\\n\\nEND;\\n\\nFile_landingZoneFile :=DATASET('~file::10.101.2.171::var::lib::HPCCSystems::dropzone::adcampaign_twitter_data.csv',Layout_adcampaign_twitter_data,CSV);\\noutput(File_landingZoneFile);
\\n\\nBut facing error as though file is present in dropzone:\\nError: System error: 1: Could not open logical file ~file::10.101.2.171::var::lib::HPCCSystems::dropzone::adcampaign_twitter_data.csv in any of these locations:\\n//10.101.2.171:7100/var/lib/HPCCSystems/dropzone/adcampaign_twitter_data.csv (115)
\\n\\nThanks and Regards!\", \"post_time\": \"2013-01-29 14:39:20\" },\n\t{ \"post_id\": 3243, \"topic_id\": 724, \"forum_id\": 9, \"post_subject\": \"Re: OUTPUT action:CSV parameters\", \"username\": \"rtaylor\", \"post_text\": \"The most likely problem would be on the input file -- how are the fields delimited? Comma? Tab? Bar (|)? IOW, the CSV option on the DATASET has all the defaults specified, but your OUTPUT CSV option has two non-default values specified (HEADING and QUOTE).\\n\\nOnce you get the DATASET defintion correct, your OUTPUT will work correctly. Something like this:Layout_adcampaign_twitter_data := RECORD\\nVARSTRING User_ID;\\nVARSTRING Search_Keyword;\\nVARSTRING Tweet;\\nVARSTRING Sentiment;\\nVARSTRING date;\\nVARSTRING SearchID;\\nVARSTRING Domain;\\nVARSTRING TweetID;\\nVARSTRING Checked;\\nVARSTRING TwitterUserName;\\nVARSTRING No_Of_Followers;\\n\\nEND;\\nFile_Layout_adcampaign_twitter_data_DataSet_CSV_delmiter :=\\nDATASET('~.::adcampaign_twitter_data',Layout_adcampaign_twitter_data,\\n CSV(HEADING(SINGLE), SEPARATOR(','), TERMINATOR('\\\\n'), QUOTE('')));\\nOUTPUT(File_Layout_adcampaign_twitter_data_DataSet_CSV_delmiter,, \\n '~.::hpcc::adcampaign_twitter_data::csv::delmiter',\\n CSV(HEADING(SINGLE), SEPARATOR(','), TERMINATOR('\\\\n'), QUOTE('')),OVERWRITE);
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-01-30 13:50:50\" },\n\t{ \"post_id\": 3239, \"topic_id\": 724, \"forum_id\": 9, \"post_subject\": \"OUTPUT action:CSV parameters\", \"username\": \"prachi\", \"post_text\": \"Hello,\\nWe are trying to get CSV file in OUTPUT.Code for that is\\nLayout_adcampaign_twitter_data := RECORD\\nVARSTRING User_ID;\\nVARSTRING Search_Keyword;\\nVARSTRING Tweet;\\nVARSTRING Sentiment;\\nVARSTRING date;\\nVARSTRING SearchID;\\nVARSTRING Domain;\\nVARSTRING TweetID;\\nVARSTRING Checked;\\nVARSTRING TwitterUserName;\\nVARSTRING No_Of_Followers;\\n\\nEND;\\nFile_Layout_adcampaign_twitter_data_DataSet_CSV_delmiter :=\\nDATASET('~.::adcampaign_twitter_data',Layout_adcampaign_twitter_data,CSV);\\nOUTPUT(File_Layout_adcampaign_twitter_data_DataSet_CSV_delmiter,, '~.::hpcc::adcampaign_twitter_data::csv::delmiter',CSV(HEADING(SINGLE), SEPARATOR(','), TERMINATOR('\\\\n'), QUOTE('')),OVERWRITE);
\\n\\nHere we are specifying the seperator for CSV but the workunit shows only one column with all data.\\n\\nWhere code is going wrong?\\n\\nThanks and Regards !!\", \"post_time\": \"2013-01-30 09:37:39\" },\n\t{ \"post_id\": 3294, \"topic_id\": 725, \"forum_id\": 9, \"post_subject\": \"Re: ECLPlus Query\", \"username\": \"bforeman\", \"post_text\": \"I have a dumb question...have you considered using the DFUPlus command line to spray instead of the ECLPlus? ...and also the ECL Command line will soon supercede the ECLplus command line.\\n\\nDFUPlus was designed to spray and despray via the command line as needed.\\n\\nRegardless, I've asked development to look at your issue the way you are trying to implement it.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-02-01 13:43:49\" },\n\t{ \"post_id\": 3240, \"topic_id\": 725, \"forum_id\": 9, \"post_subject\": \"ECLPlus Query\", \"username\": \"prachi\", \"post_text\": \"Hello\\nWe have ECL code to spray file like this:\\nIMPORT Std;\\n\\nSprayCSVFile_withContent :=STD.File.fSprayVariable('10.101.2.171','/var/lib/HPCCSystems/mydropzone/ClientWebCrawlData_withFileContent.csv',\\n,,,, \\n'mythor','~Sapphire::data::csvFile::withContent',,'http://10.101.2.170:8010/FileSpray', \\n,TRUE,TRUE,FALSE);\\n\\noutput(SprayCSVFile_withContent);
\\n\\nand ECLPlus code to invoke this file:\\neclplus owner=root password=newuser_123 cluster=thor server=10.101.2.170 @/usr/share/dumphere/hpcc/SprayCSVFile_withContent.ecl
\\n\\nfile is getting sprayed.\\nBut now i want remove the hardcoded name of file in ECL code and want to pass the name of file through ECLPlus code as parameter.\\n\\n1.How to achieve this??\\n2.What changes has to be done in the ECL code to substitute file name as variable name.\\n(currently code has file name: ClientWebCrawlData_withFileContent.csv) ??\\n3.Is there any way other than ECLPlus to achieve??\\n4.Multiple parameters as arguments can be passed through ECLPlus??\", \"post_time\": \"2013-01-30 12:01:34\" },\n\t{ \"post_id\": 3452, \"topic_id\": 763, \"forum_id\": 9, \"post_subject\": \"Re: Target Option in ECL IDE\", \"username\": \"bforeman\", \"post_text\": \"Hi Srini,\\n\\nLook at #WORKUNIT\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-02-13 18:08:28\" },\n\t{ \"post_id\": 3446, \"topic_id\": 763, \"forum_id\": 9, \"post_subject\": \"Target Option in ECL IDE\", \"username\": \"omnibuzz\", \"post_text\": \"When I am working on a ROXIE query and try to open an existing THOR code and run, it sets target as Roxie by default and I often forget to change the target. This sometimes brings down the cluster (it did). Is there a way by which we can allow to add a pre-processor directive to the IDE at the top of the ECL code, like\\n\\n@THOR // Run in THOR\\n(or) \\n!@ROXIE //Run in THOR or HTHOR\\n\\nwhich the IDE can look at and set the target accordingly... \\nMore often than not, when i am beginning to write a code I know for sure whether its intended for Roxie or THOR and I wouldn't be changing it later.. \\n\\nCheers\\nSrini\", \"post_time\": \"2013-02-13 15:28:29\" },\n\t{ \"post_id\": 4152, \"topic_id\": 767, \"forum_id\": 9, \"post_subject\": \"Re: Workunit running for long time!!\", \"username\": \"sort\", \"post_text\": \"If you want to update the contents of superkeys used by roxie without removing queries from roxie, here is what I recommend.\\n\\n. Have multiple dalis. 1 for Thor and 1 for roxie. Roxie will lock the files in the thor dali while roxie is accessing the file remotely -i.e. when copying the file. Once roxie has the files local, it should remove the remote lock. \\n\\n. Use packages to define the contents of superkeys (use latest 3.10.x release on the portal). This will cause roxie to lock the subkeys, but not the superkeys\\n\\n. Remember as you add more and more subkeys to a superkey, you may decrease performance. Consolidate data from time to time\\n\\nThe "hang" when roxie has a lock on it should now be fixed. Users should get a message indicating the action could not be completed, which is better than hanging with no information.\", \"post_time\": \"2013-05-28 13:19:18\" },\n\t{ \"post_id\": 3477, \"topic_id\": 767, \"forum_id\": 9, \"post_subject\": \"Re: Workunit running for long time!!\", \"username\": \"prachi\", \"post_text\": \"Hi Dan,\\n\\nThanks for your input.\\n\\nI need to build a workaround in which unpublish and publish of ROXIE query fits in.\\n\\nI also have the same doubt which u had.\\n1.As described above, web application users will query on ROXIE (say clientdata.1).If at that time i am unpublishing that query (clientdata.1) then user will face an error. How to handle this situation then??\\n\\n2.How to determine exact query name?\\n\\nThanks and Regards!\", \"post_time\": \"2013-02-17 07:12:25\" },\n\t{ \"post_id\": 3475, \"topic_id\": 767, \"forum_id\": 9, \"post_subject\": \"Re: Workunit running for long time!!\", \"username\": \"DSC\", \"post_text\": \"I think there are two general 'truths' to keep in mind that may help you craft a solution:\\n\\n* Published Roxie queries acquire locks on logical files, indexes, and superfiles/superkeys. This happens when the query is published, and released when the query is unpublished.\\n\\n* When any of those file structures are locked, you cannot modify them (e.g. add a subkey to a superkey). Attempting to modify them will result in the behavior you see (the update process waiting a long time to acquire its own lock, then eventually giving up and failing).\\n\\nTherefore, in order to update the data that is being used by a published Roxie query, you will need to first unpublish the query (actually, all queries that reference the data), perform the update, then republish the query/queries. That is the basis for performing an update. Exactly how you perform these steps depend on your external requirements, your HPCC environment (e.g. a multi-cluster configuration behind a load balancer offers more flexibility) and your query-to-data configurations.\\n\\nYou may want to search the forum more, as this topic has been discussed numerous times. One possible solution may be within http://hpccsystems.com/bb/viewtopic.php?f=8&t=550&p=2567 but I'm given to understand that it may not work in all situations/environments. It may give you some ideas, though.\\n\\nHope this helps.\\n\\nDan\", \"post_time\": \"2013-02-16 16:36:08\" },\n\t{ \"post_id\": 3474, \"topic_id\": 767, \"forum_id\": 9, \"post_subject\": \"Re: Workunit running for long time!!\", \"username\": \"prachi\", \"post_text\": \"Hi,\\n\\nAs described above i was working on Superfile.I also tried working with simple logical file.Steps were:\\nTHOR code:\\n1.Create one logical file record structure.\\n2.Build INDEX on it.\\n\\nROXIE code:\\n1.Publish code to access this logical file on ROXIE.\\n\\nAnd then again run THOR code.\\n\\nBUT when i run THOR code 2nd time, workunit goes in running state for indefinite time ! And my other workunit (other than this ECL code) goes in blocked state.\\n\\nMy assumption is: may be after publishing on ROXIE, if i try to overwrite those logical file with target cluster as THOR then this problem is getting created.\\n\\nSo i tried above scenario again but with little change in it.THIS time i deleted published query on ROXIE and then submitted THOR code for second time and it completed successfully!!\\n\\nBUT BUT after overwriting logical file when i tried to PUBLISH query on ROXIE (to access this logical file), it FAILED!\\n\\nQuestions:\\n1.So is it that we cant overwrite logical file with target cluster as THOR and which is copied on ROXIE cluster as well??? \\n\\n2.Why after overwriting logical file on THOR it is not getting PUBLISHED again???\\n\\nWaiting for reply/response.\\n\\nThanks and Regards!\", \"post_time\": \"2013-02-16 15:49:23\" },\n\t{ \"post_id\": 3457, \"topic_id\": 767, \"forum_id\": 9, \"post_subject\": \"Workunit running for long time!!\", \"username\": \"prachi\", \"post_text\": \"Hi,\\n\\nOur general usage scenario is :\\n\\nThe general requirement is that a sub-file will be added every n minutes to a superfile and I need an index to be built on this superfile. Multiple such super-files(hence, sub-files exist)
\\nA precreated superfile viz. ~prachi::superfile exists on Thor
\\nVia a web application ->ECL code on HPCC master, multiple users are adding sub-files to super-files other than ~prachi::superfile and also reading the super-files via the HPCC-JDBC driver
\\n\\nNow, I'm trying to execute the following ECL code via the ECL IDE, multiple times:\\n\\nTHOR_CODE:\\n\\nIMPORT Std;\\n\\nSuperFile_name := '~prachi::superfile';\\nindexfile_name := '~prachi::indexfile';\\n\\nVARSTRING fileName := 'clientwebcrawldata_2';\\nVARSTRING destinationlogicalname := '~sprayed::' +fileName;\\nVARSTRING sourceIP := '10.101.2.171';\\nVARSTRING sourcepath := '/var/lib/HPCCSystems/mydropzone/superfile_files/' +fileName +'.csv';\\nVARSTRING srcCSVseparator := ';';\\nVARSTRING destinationgroup := 'mythor';\\nVARSTRING espserverIPport := 'http://10.101.2.170:8010/FileSpray';\\nVARSTRING subFileDestinationLogicalname := '~prachi::subfile::' +fileName;\\n\\n/*Spray the csv file from the dropzone*/\\nSprayCSVFile :=STD.File.fSprayVariable(sourceIP,sourcepath,,srcCSVseparator,,,\\ndestinationgroup,destinationlogicalname,,espserverIPport,\\n,TRUE,TRUE,FALSE);\\n\\n\\nLayout_ClientWebCrawlData := RECORD\\nVARSTRING Controller_ID;\\nVARSTRING User_ID;\\nVARSTRING URL_Link;\\nVARSTRING URL_Content;\\nREAL URL_Sentiment;\\nVARSTRING URL_Date;\\nINTEGER8 Unique_Search_ID;\\nINTEGER8 InsertID;\\nVARSTRING StatusID;\\nVARSTRING Search_Pattern;\\nVARSTRING Word_Ignored;\\nVARSTRING Search_Date;\\nVARSTRING Detected_Language;\\nEND;\\n\\nFile_Layout_logical_file_Dataset := DATASET(destinationlogicalname,Layout_ClientWebCrawlData,CSV(SEPARATOR(';'))); \\n\\nsubfileCreation := OUTPUT(File_Layout_logical_file_Dataset,,subFileDestinationLogicalname,THOR,OVERWRITE);\\n\\ndeleteSprayedLogicalFile := STD.File.DeleteLogicalFile(destinationlogicalname);\\n\\nSuperFile_Dataset := DATASET(SuperFile_name,{Layout_ClientWebCrawlData,UNSIGNED8 fpos {virtual(fileposition)}},THOR);\\n\\nIDX_SuperFile := INDEX(SuperFile_Dataset,{InsertID,fpos},indexfile_name);\\nbuild_superfile_index := BUILDINDEX(IDX_SuperFile,OVERWRITE);\\n\\nSEQUENTIAL(\\nSprayCSVFile,\\nsubfileCreation,\\ndeleteSprayedLogicalFile,\\nStd.File.StartSuperFileTransaction(),\\nStd.File.AddSuperFile(SuperFile_name,subFileDestinationLogicalname),\\nStd.File.FinishSuperFileTransaction(),\\nbuild_superfile_index\\n);\\n
\\n\\nThese steps are performed correctly when submitted for first time i.e the 1st subfile is added and the index also is created correctly.\\n\\nRespective to this code an ECL code to compile on ROXIE has been written which publishes successfully and is also giving results(via WsECL).\\n\\nROXIE_CODE :\\n\\n\\nIMPORT STD;\\n\\nExport QueryToRunOnRoxie():=Function\\nINTEGER insertid_value := 0 : STORED('insertid');\\n\\nLayout_ClientWebCrawlData := RECORD\\nVARSTRING Controller_ID;\\nVARSTRING User_ID;\\nVARSTRING URL_Link;\\nVARSTRING URL_Content;\\nREAL URL_Sentiment;\\nVARSTRING URL_Date;\\nINTEGER8 Unique_Search_ID;\\nINTEGER8 InsertID;\\nVARSTRING StatusID;\\nVARSTRING Search_Pattern;\\nVARSTRING Word_Ignored;\\nVARSTRING Search_Date;\\nVARSTRING Detected_Language;\\nEND;\\n\\t \\nSTRING SuperFile_name := '~prachi::superfile';\\nSTRING indexfile_name := '~prachi::indexfile';\\n\\n// create dataset of superfile\\nSuperFile_Dataset := DATASET(SuperFile_name,{Layout_ClientWebCrawlData,UNSIGNED8 fpos {virtual(fileposition)}},THOR);\\n\\n// create index on superfile\\nIDX_SuperFile := INDEX(SuperFile_Dataset,{InsertID,fpos},indexfile_name);\\n\\n//fetch records \\nfetched_records := FETCH(SuperFile_Dataset, IDX_SuperFile(insertid=insertid_value), RIGHT.fpos);\\n\\n//store output\\ngetOutput := OUTPUT(fetched_records);\\n\\nRETURN getOutput;\\nEND;
\\n\\nBut when THOR_CODE runs again, say 2nd time, the workunit goes in the 'running' state for an indefinite period. I suspect that though I'm working on a super-file not used by anyone, some deadlock/blocking is occurring at Thor in 4 node cluster. But when tried on separate single-node cluster, with only above two codes (no web app is running.This cluster is totally separate), same issue takes place.\\nPlease evaluate my assumption.\\n\\nEclagent.log of single-node cluster is as follows:\\n\\n\\n00000000 2013-02-14 20:40:02 30752 30752 Logging to /var/log/HPCCSystems/myeclagent/eclagent.2013_02_14.log\\n00000001 2013-02-14 20:40:02 30752 30752 ECLAGENT build community_3.10.0-1\\n00000002 2013-02-14 20:40:02 30752 30752 Waiting for workunit lock\\n00000003 2013-02-14 20:40:02 30752 30752 Obtained workunit lock\\n00000004 2013-02-14 20:40:02 30752 30752 Loading dll (libW20130214-203956.so) from location /var/lib/HPCCSystems/myeclccserver/libW20130214-203956.so\\n00000005 2013-02-14 20:40:02 30752 30752 Starting process\\n00000006 2013-02-14 20:40:02 30752 30752 RoxieMemMgr: Setting memory limit to 314572800 bytes (300 pages)\\n00000007 2013-02-14 20:40:02 30752 30752 RoxieMemMgr: 320 Pages successfully allocated for the pool - memsize=335544320 base=0x7f521bf00000 alignment=1048576 bitmapSize=10\\n00000008 2013-02-14 20:40:02 30752 30752 Waiting for run lock\\n00000009 2013-02-14 20:40:02 30752 30752 Obtained run lock\\n0000000A 2013-02-14 20:40:02 30752 30752 Spray: ~sprayed::clientwebcrawldata_2\\n0000000B 2013-02-14 20:40:09 30752 30752 setResultString((null),0,'D20130214-204002')\\n0000000C 2013-02-14 20:40:09 30752 30752 Enqueuing on thor.thor to run wuid=W20130214-203956, graph=graph1, timelimit=600 seconds, priority=0\\n0000000D 2013-02-14 20:40:09 30752 30752 Thor on 172.25.37.135:20000 running W20130214-203956\\n0000000E 2013-02-14 20:40:10 30752 30752 Deleting NS logical file sprayed::clientwebcrawldata_2 for user 10602210\\n0000000F 2013-02-14 20:40:10 30752 30752 DeleteLogicalFile ('sprayed::clientwebcrawldata_2') done\\n00000010 2013-02-14 20:40:10 30752 30752 ,FileAccess,FileServices,DeleteLogicalFile,W20130214-203956,10602210,sprayed::clientwebcrawldata_2\\n00000011 2013-02-14 20:40:10 30752 30752 StartSuperFileTransaction\\n00000012 2013-02-14 20:40:10 30752 30752 AddSuperFile ('prachi::superfile', 'prachi::subfile::clientwebcrawldata_2') trans\\n00000013 2013-02-14 20:40:10 30752 30752 ,FileAccess,FileServices,AddSuperFile,W20130214-203956,10602210,prachi::superfile,prachi::subfile::clientwebcrawldata_2\\n00000014 2013-02-14 20:40:15 30752 30752 safeChangeModeWrite - temporarily releasing lock on prachi::superfile to avoid deadlock\\n00000015 2013-02-14 20:40:20 30752 30752 safeChangeModeWrite on prachi::superfile waiting for 10s\\n00000016 2013-02-14 20:40:20 30752 30752 Backtrace:\\n00000017 2013-02-14 20:40:20 30752 30752 /opt/HPCCSystems/lib/libjlib.so(_Z16PrintStackReportv+0x26) [0x7f524483c4f6]\\n00000018 2013-02-14 20:40:20 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_Z19safeChangeModeWriteP17IRemoteConnectionPKcRbj+0x1ff) [0x7f52437d229f]\\n00000019 2013-02-14 20:40:20 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN20CDistributedFileBaseI21IDistributedSuperFileE14lockPropertiesEj+0x64) [0x7f52437680c4]\\n0000001A 2013-02-14 20:40:20 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN21CDistributedSuperFile17cAddSubFileAction7prepareEv+0xcb) [0x7f524376ff9b]\\n0000001B 2013-02-14 20:40:20 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN27CDistributedFileTransaction6commitEv+0x4e) [0x7f5243776a3e]\\n0000001C 2013-02-14 20:40:20 30752 30752 /opt/HPCCSystems/plugins/libfileservices.so(fslFinishSuperFileTransaction+0x4f) [0x7f5236510d7f]\\n0000001D 2013-02-14 20:40:20 30752 30752 /var/lib/HPCCSystems/myeclccserver/libW20130214-203956.so(+0xb22e) [0x7f5235f8e22e]\\n0000001E 2013-02-14 20:40:20 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11performItemEjj+0x54) [0x7f5243044504]\\n0000001F 2013-02-14 20:40:20 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine13doExecuteItemER20IRuntimeWorkflowItemj+0x41) [0x7f5243045121]\\n00000020 2013-02-14 20:40:20 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11executeItemEjj+0x2b7) [0x7f5243044d17]\\n00000021 2013-02-14 20:40:20 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine7performEP18IGlobalCodeContextP11IEclProcess+0x17c) [0x7f52430454cc]\\n00000022 2013-02-14 20:40:20 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent10runProcessEP11IEclProcess+0x147) [0x7f5244c6c977]\\n00000023 2013-02-14 20:40:20 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent9doProcessEv+0x277) [0x7f5244c6ee97]\\n00000024 2013-02-14 20:40:20 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_Z13eclagent_mainiPPKcP12StringBufferb+0x7a0) [0x7f5244c719c0]\\n00000025 2013-02-14 20:40:20 30752 30752 eclagent(main+0x51) [0x400f41]\\n00000026 2013-02-14 20:40:20 30752 30752 /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xed) [0x7f5243ec576d]\\n00000027 2013-02-14 20:40:20 30752 30752 eclagent() [0x401019]\\n00000028 2013-02-14 20:40:20 30752 30752 CDFAction lock timed out on prachi::superfile\\n00000029 2013-02-14 20:40:20 30752 30752 CDistributedFileTransaction: Transaction pausing\\n0000002A 2013-02-14 20:41:53 30752 30752 safeChangeModeWrite - temporarily releasing lock on prachi::superfile to avoid deadlock\\n0000002B 2013-02-14 20:41:58 30752 30752 safeChangeModeWrite on prachi::superfile waiting for 10s\\n0000002C 2013-02-14 20:41:58 30752 30752 Backtrace:\\n0000002D 2013-02-14 20:41:58 30752 30752 /opt/HPCCSystems/lib/libjlib.so(_Z16PrintStackReportv+0x26) [0x7f524483c4f6]\\n0000002E 2013-02-14 20:41:58 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_Z19safeChangeModeWriteP17IRemoteConnectionPKcRbj+0x1ff) [0x7f52437d229f]\\n0000002F 2013-02-14 20:41:58 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN20CDistributedFileBaseI21IDistributedSuperFileE14lockPropertiesEj+0x64) [0x7f52437680c4]\\n00000030 2013-02-14 20:41:58 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN21CDistributedSuperFile17cAddSubFileAction7prepareEv+0xcb) [0x7f524376ff9b]\\n00000031 2013-02-14 20:41:58 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN27CDistributedFileTransaction6commitEv+0x4e) [0x7f5243776a3e]\\n00000032 2013-02-14 20:41:58 30752 30752 /opt/HPCCSystems/plugins/libfileservices.so(fslFinishSuperFileTransaction+0x4f) [0x7f5236510d7f]\\n00000033 2013-02-14 20:41:58 30752 30752 /var/lib/HPCCSystems/myeclccserver/libW20130214-203956.so(+0xb22e) [0x7f5235f8e22e]\\n00000034 2013-02-14 20:41:58 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11performItemEjj+0x54) [0x7f5243044504]\\n00000035 2013-02-14 20:41:58 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine13doExecuteItemER20IRuntimeWorkflowItemj+0x41) [0x7f5243045121]\\n00000036 2013-02-14 20:41:58 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11executeItemEjj+0x2b7) [0x7f5243044d17]\\n00000037 2013-02-14 20:41:58 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine7performEP18IGlobalCodeContextP11IEclProcess+0x17c) [0x7f52430454cc]\\n00000038 2013-02-14 20:41:58 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent10runProcessEP11IEclProcess+0x147) [0x7f5244c6c977]\\n00000039 2013-02-14 20:41:58 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent9doProcessEv+0x277) [0x7f5244c6ee97]\\n0000003A 2013-02-14 20:41:58 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_Z13eclagent_mainiPPKcP12StringBufferb+0x7a0) [0x7f5244c719c0]\\n0000003B 2013-02-14 20:41:58 30752 30752 eclagent(main+0x51) [0x400f41]\\n0000003C 2013-02-14 20:41:58 30752 30752 /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xed) [0x7f5243ec576d]\\n0000003D 2013-02-14 20:41:58 30752 30752 eclagent() [0x401019]\\n0000003E 2013-02-14 20:41:58 30752 30752 CDFAction lock timed out on prachi::superfile\\n0000003F 2013-02-14 20:41:58 30752 30752 CDistributedFileTransaction: Transaction pausing\\n00000040 2013-02-14 20:43:05 30752 30752 safeChangeModeWrite - temporarily releasing lock on prachi::superfile to avoid deadlock\\n00000041 2013-02-14 20:43:10 30752 30752 safeChangeModeWrite on prachi::superfile waiting for 10s\\n00000042 2013-02-14 20:43:10 30752 30752 Backtrace:\\n00000043 2013-02-14 20:43:10 30752 30752 /opt/HPCCSystems/lib/libjlib.so(_Z16PrintStackReportv+0x26) [0x7f524483c4f6]\\n00000044 2013-02-14 20:43:10 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_Z19safeChangeModeWriteP17IRemoteConnectionPKcRbj+0x1ff) [0x7f52437d229f]\\n00000045 2013-02-14 20:43:10 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN20CDistributedFileBaseI21IDistributedSuperFileE14lockPropertiesEj+0x64) [0x7f52437680c4]\\n00000046 2013-02-14 20:43:10 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN21CDistributedSuperFile17cAddSubFileAction7prepareEv+0xcb) [0x7f524376ff9b]\\n00000047 2013-02-14 20:43:10 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN27CDistributedFileTransaction6commitEv+0x4e) [0x7f5243776a3e]\\n00000048 2013-02-14 20:43:10 30752 30752 /opt/HPCCSystems/plugins/libfileservices.so(fslFinishSuperFileTransaction+0x4f) [0x7f5236510d7f]\\n00000049 2013-02-14 20:43:10 30752 30752 /var/lib/HPCCSystems/myeclccserver/libW20130214-203956.so(+0xb22e) [0x7f5235f8e22e]\\n0000004A 2013-02-14 20:43:10 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11performItemEjj+0x54) [0x7f5243044504]\\n0000004B 2013-02-14 20:43:10 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine13doExecuteItemER20IRuntimeWorkflowItemj+0x41) [0x7f5243045121]\\n0000004C 2013-02-14 20:43:10 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11executeItemEjj+0x2b7) [0x7f5243044d17]\\n0000004D 2013-02-14 20:43:10 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine7performEP18IGlobalCodeContextP11IEclProcess+0x17c) [0x7f52430454cc]\\n0000004E 2013-02-14 20:43:10 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent10runProcessEP11IEclProcess+0x147) [0x7f5244c6c977]\\n0000004F 2013-02-14 20:43:10 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent9doProcessEv+0x277) [0x7f5244c6ee97]\\n00000050 2013-02-14 20:43:10 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_Z13eclagent_mainiPPKcP12StringBufferb+0x7a0) [0x7f5244c719c0]\\n00000051 2013-02-14 20:43:10 30752 30752 eclagent(main+0x51) [0x400f41]\\n00000052 2013-02-14 20:43:10 30752 30752 /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xed) [0x7f5243ec576d]\\n00000053 2013-02-14 20:43:10 30752 30752 eclagent() [0x401019]\\n00000054 2013-02-14 20:43:10 30752 30752 CDFAction lock timed out on prachi::superfile\\n00000055 2013-02-14 20:43:10 30752 30752 CDistributedFileTransaction: Transaction pausing\\n00000056 2013-02-14 20:44:16 30752 30752 safeChangeModeWrite - temporarily releasing lock on prachi::superfile to avoid deadlock\\n00000057 2013-02-14 20:44:21 30752 30752 safeChangeModeWrite on prachi::superfile waiting for 10s\\n00000058 2013-02-14 20:44:21 30752 30752 Backtrace:\\n00000059 2013-02-14 20:44:21 30752 30752 /opt/HPCCSystems/lib/libjlib.so(_Z16PrintStackReportv+0x26) [0x7f524483c4f6]\\n0000005A 2013-02-14 20:44:21 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_Z19safeChangeModeWriteP17IRemoteConnectionPKcRbj+0x1ff) [0x7f52437d229f]\\n0000005B 2013-02-14 20:44:21 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN20CDistributedFileBaseI21IDistributedSuperFileE14lockPropertiesEj+0x64) [0x7f52437680c4]\\n0000005C 2013-02-14 20:44:21 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN21CDistributedSuperFile17cAddSubFileAction7prepareEv+0xcb) [0x7f524376ff9b]\\n0000005D 2013-02-14 20:44:21 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN27CDistributedFileTransaction6commitEv+0x4e) [0x7f5243776a3e]\\n0000005E 2013-02-14 20:44:21 30752 30752 /opt/HPCCSystems/plugins/libfileservices.so(fslFinishSuperFileTransaction+0x4f) [0x7f5236510d7f]\\n0000005F 2013-02-14 20:44:21 30752 30752 /var/lib/HPCCSystems/myeclccserver/libW20130214-203956.so(+0xb22e) [0x7f5235f8e22e]\\n00000060 2013-02-14 20:44:21 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11performItemEjj+0x54) [0x7f5243044504]\\n00000061 2013-02-14 20:44:21 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine13doExecuteItemER20IRuntimeWorkflowItemj+0x41) [0x7f5243045121]\\n00000062 2013-02-14 20:44:21 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11executeItemEjj+0x2b7) [0x7f5243044d17]\\n00000063 2013-02-14 20:44:21 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine7performEP18IGlobalCodeContextP11IEclProcess+0x17c) [0x7f52430454cc]\\n00000064 2013-02-14 20:44:21 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent10runProcessEP11IEclProcess+0x147) [0x7f5244c6c977]\\n00000065 2013-02-14 20:44:21 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent9doProcessEv+0x277) [0x7f5244c6ee97]\\n00000066 2013-02-14 20:44:21 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_Z13eclagent_mainiPPKcP12StringBufferb+0x7a0) [0x7f5244c719c0]\\n00000067 2013-02-14 20:44:21 30752 30752 eclagent(main+0x51) [0x400f41]\\n00000068 2013-02-14 20:44:21 30752 30752 /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xed) [0x7f5243ec576d]\\n00000069 2013-02-14 20:44:21 30752 30752 eclagent() [0x401019]\\n0000006A 2013-02-14 20:44:21 30752 30752 CDFAction lock timed out on prachi::superfile\\n0000006B 2013-02-14 20:44:21 30752 30752 CDistributedFileTransaction: Transaction pausing\\n0000006C 2013-02-14 20:45:29 30752 30752 safeChangeModeWrite - temporarily releasing lock on prachi::superfile to avoid deadlock\\n0000006D 2013-02-14 20:45:34 30752 30752 safeChangeModeWrite on prachi::superfile waiting for 10s\\n0000006E 2013-02-14 20:45:34 30752 30752 Backtrace:\\n0000006F 2013-02-14 20:45:34 30752 30752 /opt/HPCCSystems/lib/libjlib.so(_Z16PrintStackReportv+0x26) [0x7f524483c4f6]\\n00000070 2013-02-14 20:45:34 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_Z19safeChangeModeWriteP17IRemoteConnectionPKcRbj+0x1ff) [0x7f52437d229f]\\n00000071 2013-02-14 20:45:34 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN20CDistributedFileBaseI21IDistributedSuperFileE14lockPropertiesEj+0x64) [0x7f52437680c4]\\n00000072 2013-02-14 20:45:34 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN21CDistributedSuperFile17cAddSubFileAction7prepareEv+0xcb) [0x7f524376ff9b]\\n00000073 2013-02-14 20:45:34 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN27CDistributedFileTransaction6commitEv+0x4e) [0x7f5243776a3e]\\n00000074 2013-02-14 20:45:34 30752 30752 /opt/HPCCSystems/plugins/libfileservices.so(fslFinishSuperFileTransaction+0x4f) [0x7f5236510d7f]\\n00000075 2013-02-14 20:45:34 30752 30752 /var/lib/HPCCSystems/myeclccserver/libW20130214-203956.so(+0xb22e) [0x7f5235f8e22e]\\n00000076 2013-02-14 20:45:34 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11performItemEjj+0x54) [0x7f5243044504]\\n00000077 2013-02-14 20:45:34 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine13doExecuteItemER20IRuntimeWorkflowItemj+0x41) [0x7f5243045121]\\n00000078 2013-02-14 20:45:34 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11executeItemEjj+0x2b7) [0x7f5243044d17]\\n00000079 2013-02-14 20:45:34 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine7performEP18IGlobalCodeContextP11IEclProcess+0x17c) [0x7f52430454cc]\\n0000007A 2013-02-14 20:45:34 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent10runProcessEP11IEclProcess+0x147) [0x7f5244c6c977]\\n0000007B 2013-02-14 20:45:34 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent9doProcessEv+0x277) [0x7f5244c6ee97]\\n0000007C 2013-02-14 20:45:34 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_Z13eclagent_mainiPPKcP12StringBufferb+0x7a0) [0x7f5244c719c0]\\n0000007D 2013-02-14 20:45:34 30752 30752 eclagent(main+0x51) [0x400f41]\\n0000007E 2013-02-14 20:45:34 30752 30752 /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xed) [0x7f5243ec576d]\\n0000007F 2013-02-14 20:45:34 30752 30752 eclagent() [0x401019]\\n00000080 2013-02-14 20:45:34 30752 30752 CDFAction lock timed out on prachi::superfile\\n00000081 2013-02-14 20:45:34 30752 30752 CDistributedFileTransaction: Transaction pausing\\n00000082 2013-02-14 20:47:09 30752 30752 safeChangeModeWrite - temporarily releasing lock on prachi::superfile to avoid deadlock\\n00000083 2013-02-14 20:47:14 30752 30752 safeChangeModeWrite on prachi::superfile waiting for 10s\\n00000084 2013-02-14 20:47:14 30752 30752 Backtrace:\\n00000085 2013-02-14 20:47:14 30752 30752 /opt/HPCCSystems/lib/libjlib.so(_Z16PrintStackReportv+0x26) [0x7f524483c4f6]\\n00000086 2013-02-14 20:47:14 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_Z19safeChangeModeWriteP17IRemoteConnectionPKcRbj+0x1ff) [0x7f52437d229f]\\n00000087 2013-02-14 20:47:14 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN20CDistributedFileBaseI21IDistributedSuperFileE14lockPropertiesEj+0x64) [0x7f52437680c4]\\n00000088 2013-02-14 20:47:14 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN21CDistributedSuperFile17cAddSubFileAction7prepareEv+0xcb) [0x7f524376ff9b]\\n00000089 2013-02-14 20:47:14 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN27CDistributedFileTransaction6commitEv+0x4e) [0x7f5243776a3e]\\n0000008A 2013-02-14 20:47:14 30752 30752 /opt/HPCCSystems/plugins/libfileservices.so(fslFinishSuperFileTransaction+0x4f) [0x7f5236510d7f]\\n0000008B 2013-02-14 20:47:14 30752 30752 /var/lib/HPCCSystems/myeclccserver/libW20130214-203956.so(+0xb22e) [0x7f5235f8e22e]\\n0000008C 2013-02-14 20:47:14 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11performItemEjj+0x54) [0x7f5243044504]\\n0000008D 2013-02-14 20:47:14 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine13doExecuteItemER20IRuntimeWorkflowItemj+0x41) [0x7f5243045121]\\n0000008E 2013-02-14 20:47:14 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11executeItemEjj+0x2b7) [0x7f5243044d17]\\n0000008F 2013-02-14 20:47:14 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine7performEP18IGlobalCodeContextP11IEclProcess+0x17c) [0x7f52430454cc]\\n00000090 2013-02-14 20:47:14 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent10runProcessEP11IEclProcess+0x147) [0x7f5244c6c977]\\n00000091 2013-02-14 20:47:14 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent9doProcessEv+0x277) [0x7f5244c6ee97]\\n00000092 2013-02-14 20:47:14 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_Z13eclagent_mainiPPKcP12StringBufferb+0x7a0) [0x7f5244c719c0]\\n00000093 2013-02-14 20:47:14 30752 30752 eclagent(main+0x51) [0x400f41]\\n00000094 2013-02-14 20:47:14 30752 30752 /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xed) [0x7f5243ec576d]\\n00000095 2013-02-14 20:47:14 30752 30752 eclagent() [0x401019]\\n00000096 2013-02-14 20:47:14 30752 30752 CDFAction lock timed out on prachi::superfile\\n00000097 2013-02-14 20:47:14 30752 30752 CDistributedFileTransaction: Transaction pausing\\n00000098 2013-02-14 20:48:42 30752 30752 safeChangeModeWrite - temporarily releasing lock on prachi::superfile to avoid deadlock\\n00000099 2013-02-14 20:48:47 30752 30752 safeChangeModeWrite on prachi::superfile waiting for 10s\\n0000009A 2013-02-14 20:48:47 30752 30752 Backtrace:\\n0000009B 2013-02-14 20:48:47 30752 30752 /opt/HPCCSystems/lib/libjlib.so(_Z16PrintStackReportv+0x26) [0x7f524483c4f6]\\n0000009C 2013-02-14 20:48:47 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_Z19safeChangeModeWriteP17IRemoteConnectionPKcRbj+0x1ff) [0x7f52437d229f]\\n0000009D 2013-02-14 20:48:47 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN20CDistributedFileBaseI21IDistributedSuperFileE14lockPropertiesEj+0x64) [0x7f52437680c4]\\n0000009E 2013-02-14 20:48:47 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN21CDistributedSuperFile17cAddSubFileAction7prepareEv+0xcb) [0x7f524376ff9b]\\n0000009F 2013-02-14 20:48:47 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN27CDistributedFileTransaction6commitEv+0x4e) [0x7f5243776a3e]\\n000000A0 2013-02-14 20:48:47 30752 30752 /opt/HPCCSystems/plugins/libfileservices.so(fslFinishSuperFileTransaction+0x4f) [0x7f5236510d7f]\\n000000A1 2013-02-14 20:48:47 30752 30752 /var/lib/HPCCSystems/myeclccserver/libW20130214-203956.so(+0xb22e) [0x7f5235f8e22e]\\n000000A2 2013-02-14 20:48:47 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11performItemEjj+0x54) [0x7f5243044504]\\n000000A3 2013-02-14 20:48:47 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine13doExecuteItemER20IRuntimeWorkflowItemj+0x41) [0x7f5243045121]\\n000000A4 2013-02-14 20:48:47 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11executeItemEjj+0x2b7) [0x7f5243044d17]\\n000000A5 2013-02-14 20:48:47 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine7performEP18IGlobalCodeContextP11IEclProcess+0x17c) [0x7f52430454cc]\\n000000A6 2013-02-14 20:48:47 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent10runProcessEP11IEclProcess+0x147) [0x7f5244c6c977]\\n000000A7 2013-02-14 20:48:47 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent9doProcessEv+0x277) [0x7f5244c6ee97]\\n000000A8 2013-02-14 20:48:47 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_Z13eclagent_mainiPPKcP12StringBufferb+0x7a0) [0x7f5244c719c0]\\n000000A9 2013-02-14 20:48:47 30752 30752 eclagent(main+0x51) [0x400f41]\\n000000AA 2013-02-14 20:48:47 30752 30752 /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xed) [0x7f5243ec576d]\\n000000AB 2013-02-14 20:48:47 30752 30752 eclagent() [0x401019]\\n000000AC 2013-02-14 20:48:47 30752 30752 CDFAction lock timed out on prachi::superfile\\n000000AD 2013-02-14 20:48:47 30752 30752 CDistributedFileTransaction: Transaction pausing\\n000000AE 2013-02-14 20:49:53 30752 30752 safeChangeModeWrite - temporarily releasing lock on prachi::superfile to avoid deadlock\\n000000AF 2013-02-14 20:49:58 30752 30752 safeChangeModeWrite on prachi::superfile waiting for 10s\\n000000B0 2013-02-14 20:49:58 30752 30752 Backtrace:\\n000000B1 2013-02-14 20:49:58 30752 30752 /opt/HPCCSystems/lib/libjlib.so(_Z16PrintStackReportv+0x26) [0x7f524483c4f6]\\n000000B2 2013-02-14 20:49:58 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_Z19safeChangeModeWriteP17IRemoteConnectionPKcRbj+0x1ff) [0x7f52437d229f]\\n000000B3 2013-02-14 20:49:58 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN20CDistributedFileBaseI21IDistributedSuperFileE14lockPropertiesEj+0x64) [0x7f52437680c4]\\n000000B4 2013-02-14 20:49:58 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN21CDistributedSuperFile17cAddSubFileAction7prepareEv+0xcb) [0x7f524376ff9b]\\n000000B5 2013-02-14 20:49:58 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN27CDistributedFileTransaction6commitEv+0x4e) [0x7f5243776a3e]\\n000000B6 2013-02-14 20:49:58 30752 30752 /opt/HPCCSystems/plugins/libfileservices.so(fslFinishSuperFileTransaction+0x4f) [0x7f5236510d7f]\\n000000B7 2013-02-14 20:49:58 30752 30752 /var/lib/HPCCSystems/myeclccserver/libW20130214-203956.so(+0xb22e) [0x7f5235f8e22e]\\n000000B8 2013-02-14 20:49:58 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11performItemEjj+0x54) [0x7f5243044504]\\n000000B9 2013-02-14 20:49:58 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine13doExecuteItemER20IRuntimeWorkflowItemj+0x41) [0x7f5243045121]\\n000000BA 2013-02-14 20:49:58 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11executeItemEjj+0x2b7) [0x7f5243044d17]\\n000000BB 2013-02-14 20:49:58 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine7performEP18IGlobalCodeContextP11IEclProcess+0x17c) [0x7f52430454cc]\\n000000BC 2013-02-14 20:49:58 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent10runProcessEP11IEclProcess+0x147) [0x7f5244c6c977]\\n000000BD 2013-02-14 20:49:58 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent9doProcessEv+0x277) [0x7f5244c6ee97]\\n000000BE 2013-02-14 20:49:58 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_Z13eclagent_mainiPPKcP12StringBufferb+0x7a0) [0x7f5244c719c0]\\n000000BF 2013-02-14 20:49:58 30752 30752 eclagent(main+0x51) [0x400f41]\\n000000C0 2013-02-14 20:49:58 30752 30752 /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xed) [0x7f5243ec576d]\\n000000C1 2013-02-14 20:49:58 30752 30752 eclagent() [0x401019]\\n000000C2 2013-02-14 20:49:58 30752 30752 CDFAction lock timed out on prachi::superfile\\n000000C3 2013-02-14 20:49:58 30752 30752 CDistributedFileTransaction: Transaction pausing\\n000000C4 2013-02-14 20:51:23 30752 30752 safeChangeModeWrite - temporarily releasing lock on prachi::superfile to avoid deadlock\\n000000C5 2013-02-14 20:51:28 30752 30752 safeChangeModeWrite on prachi::superfile waiting for 10s\\n000000C6 2013-02-14 20:51:28 30752 30752 Backtrace:\\n000000C7 2013-02-14 20:51:28 30752 30752 /opt/HPCCSystems/lib/libjlib.so(_Z16PrintStackReportv+0x26) [0x7f524483c4f6]\\n000000C8 2013-02-14 20:51:28 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_Z19safeChangeModeWriteP17IRemoteConnectionPKcRbj+0x1ff) [0x7f52437d229f]\\n000000C9 2013-02-14 20:51:28 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN20CDistributedFileBaseI21IDistributedSuperFileE14lockPropertiesEj+0x64) [0x7f52437680c4]\\n000000CA 2013-02-14 20:51:28 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN21CDistributedSuperFile17cAddSubFileAction7prepareEv+0xcb) [0x7f524376ff9b]\\n000000CB 2013-02-14 20:51:28 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN27CDistributedFileTransaction6commitEv+0x4e) [0x7f5243776a3e]\\n000000CC 2013-02-14 20:51:28 30752 30752 /opt/HPCCSystems/plugins/libfileservices.so(fslFinishSuperFileTransaction+0x4f) [0x7f5236510d7f]\\n000000CD 2013-02-14 20:51:28 30752 30752 /var/lib/HPCCSystems/myeclccserver/libW20130214-203956.so(+0xb22e) [0x7f5235f8e22e]\\n000000CE 2013-02-14 20:51:28 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11performItemEjj+0x54) [0x7f5243044504]\\n000000CF 2013-02-14 20:51:28 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine13doExecuteItemER20IRuntimeWorkflowItemj+0x41) [0x7f5243045121]\\n000000D0 2013-02-14 20:51:28 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11executeItemEjj+0x2b7) [0x7f5243044d17]\\n000000D1 2013-02-14 20:51:28 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine7performEP18IGlobalCodeContextP11IEclProcess+0x17c) [0x7f52430454cc]\\n000000D2 2013-02-14 20:51:28 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent10runProcessEP11IEclProcess+0x147) [0x7f5244c6c977]\\n000000D3 2013-02-14 20:51:28 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent9doProcessEv+0x277) [0x7f5244c6ee97]\\n000000D4 2013-02-14 20:51:28 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_Z13eclagent_mainiPPKcP12StringBufferb+0x7a0) [0x7f5244c719c0]\\n000000D5 2013-02-14 20:51:28 30752 30752 eclagent(main+0x51) [0x400f41]\\n000000D6 2013-02-14 20:51:28 30752 30752 /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xed) [0x7f5243ec576d]\\n000000D7 2013-02-14 20:51:28 30752 30752 eclagent() [0x401019]\\n000000D8 2013-02-14 20:51:28 30752 30752 CDFAction lock timed out on prachi::superfile\\n000000D9 2013-02-14 20:51:28 30752 30752 CDistributedFileTransaction: Transaction pausing\\n000000DA 2013-02-14 20:52:58 30752 30752 safeChangeModeWrite - temporarily releasing lock on prachi::superfile to avoid deadlock\\n000000DB 2013-02-14 20:53:03 30752 30752 safeChangeModeWrite on prachi::superfile waiting for 10s\\n000000DC 2013-02-14 20:53:03 30752 30752 Backtrace:\\n000000DD 2013-02-14 20:53:03 30752 30752 /opt/HPCCSystems/lib/libjlib.so(_Z16PrintStackReportv+0x26) [0x7f524483c4f6]\\n000000DE 2013-02-14 20:53:03 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_Z19safeChangeModeWriteP17IRemoteConnectionPKcRbj+0x1ff) [0x7f52437d229f]\\n000000DF 2013-02-14 20:53:03 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN20CDistributedFileBaseI21IDistributedSuperFileE14lockPropertiesEj+0x64) [0x7f52437680c4]\\n000000E0 2013-02-14 20:53:03 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN21CDistributedSuperFile17cAddSubFileAction7prepareEv+0xcb) [0x7f524376ff9b]\\n000000E1 2013-02-14 20:53:03 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN27CDistributedFileTransaction6commitEv+0x4e) [0x7f5243776a3e]\\n000000E2 2013-02-14 20:53:03 30752 30752 /opt/HPCCSystems/plugins/libfileservices.so(fslFinishSuperFileTransaction+0x4f) [0x7f5236510d7f]\\n000000E3 2013-02-14 20:53:03 30752 30752 /var/lib/HPCCSystems/myeclccserver/libW20130214-203956.so(+0xb22e) [0x7f5235f8e22e]\\n000000E4 2013-02-14 20:53:03 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11performItemEjj+0x54) [0x7f5243044504]\\n000000E5 2013-02-14 20:53:03 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine13doExecuteItemER20IRuntimeWorkflowItemj+0x41) [0x7f5243045121]\\n000000E6 2013-02-14 20:53:03 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11executeItemEjj+0x2b7) [0x7f5243044d17]\\n000000E7 2013-02-14 20:53:03 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine7performEP18IGlobalCodeContextP11IEclProcess+0x17c) [0x7f52430454cc]\\n000000E8 2013-02-14 20:53:03 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent10runProcessEP11IEclProcess+0x147) [0x7f5244c6c977]\\n000000E9 2013-02-14 20:53:03 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent9doProcessEv+0x277) [0x7f5244c6ee97]\\n000000EA 2013-02-14 20:53:03 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_Z13eclagent_mainiPPKcP12StringBufferb+0x7a0) [0x7f5244c719c0]\\n000000EB 2013-02-14 20:53:03 30752 30752 eclagent(main+0x51) [0x400f41]\\n000000EC 2013-02-14 20:53:03 30752 30752 /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xed) [0x7f5243ec576d]\\n000000ED 2013-02-14 20:53:03 30752 30752 eclagent() [0x401019]\\n000000EE 2013-02-14 20:53:03 30752 30752 CDFAction lock timed out on prachi::superfile\\n000000EF 2013-02-14 20:53:03 30752 30752 CDistributedFileTransaction: Transaction pausing\\n000000F0 2013-02-14 20:53:53 30752 30752 safeChangeModeWrite - temporarily releasing lock on prachi::superfile to avoid deadlock\\n000000F1 2013-02-14 20:53:58 30752 30752 safeChangeModeWrite on prachi::superfile waiting for 10s\\n000000F2 2013-02-14 20:53:58 30752 30752 Backtrace:\\n000000F3 2013-02-14 20:53:58 30752 30752 /opt/HPCCSystems/lib/libjlib.so(_Z16PrintStackReportv+0x26) [0x7f524483c4f6]\\n000000F4 2013-02-14 20:53:58 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_Z19safeChangeModeWriteP17IRemoteConnectionPKcRbj+0x1ff) [0x7f52437d229f]\\n000000F5 2013-02-14 20:53:58 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN20CDistributedFileBaseI21IDistributedSuperFileE14lockPropertiesEj+0x64) [0x7f52437680c4]\\n000000F6 2013-02-14 20:53:58 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN21CDistributedSuperFile17cAddSubFileAction7prepareEv+0xcb) [0x7f524376ff9b]\\n000000F7 2013-02-14 20:53:58 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN27CDistributedFileTransaction6commitEv+0x4e) [0x7f5243776a3e]\\n000000F8 2013-02-14 20:53:58 30752 30752 /opt/HPCCSystems/plugins/libfileservices.so(fslFinishSuperFileTransaction+0x4f) [0x7f5236510d7f]\\n000000F9 2013-02-14 20:53:58 30752 30752 /var/lib/HPCCSystems/myeclccserver/libW20130214-203956.so(+0xb22e) [0x7f5235f8e22e]\\n000000FA 2013-02-14 20:53:58 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11performItemEjj+0x54) [0x7f5243044504]\\n000000FB 2013-02-14 20:53:58 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine13doExecuteItemER20IRuntimeWorkflowItemj+0x41) [0x7f5243045121]\\n000000FC 2013-02-14 20:53:58 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11executeItemEjj+0x2b7) [0x7f5243044d17]\\n000000FD 2013-02-14 20:53:58 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine7performEP18IGlobalCodeContextP11IEclProcess+0x17c) [0x7f52430454cc]\\n000000FE 2013-02-14 20:53:58 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent10runProcessEP11IEclProcess+0x147) [0x7f5244c6c977]\\n000000FF 2013-02-14 20:53:58 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent9doProcessEv+0x277) [0x7f5244c6ee97]\\n00000100 2013-02-14 20:53:58 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_Z13eclagent_mainiPPKcP12StringBufferb+0x7a0) [0x7f5244c719c0]\\n00000101 2013-02-14 20:53:58 30752 30752 eclagent(main+0x51) [0x400f41]\\n00000102 2013-02-14 20:53:58 30752 30752 /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xed) [0x7f5243ec576d]\\n00000103 2013-02-14 20:53:58 30752 30752 eclagent() [0x401019]\\n00000104 2013-02-14 20:53:58 30752 30752 CDFAction lock timed out on prachi::superfile\\n00000105 2013-02-14 20:53:58 30752 30752 CDistributedFileTransaction: Transaction pausing\\n00000106 2013-02-14 20:55:12 30752 30752 safeChangeModeWrite - temporarily releasing lock on prachi::superfile to avoid deadlock\\n00000107 2013-02-14 20:55:18 30752 30752 safeChangeModeWrite on prachi::superfile waiting for 10s\\n00000108 2013-02-14 20:55:18 30752 30752 Backtrace:\\n00000109 2013-02-14 20:55:18 30752 30752 /opt/HPCCSystems/lib/libjlib.so(_Z16PrintStackReportv+0x26) [0x7f524483c4f6]\\n0000010A 2013-02-14 20:55:18 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_Z19safeChangeModeWriteP17IRemoteConnectionPKcRbj+0x1ff) [0x7f52437d229f]\\n0000010B 2013-02-14 20:55:18 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN20CDistributedFileBaseI21IDistributedSuperFileE14lockPropertiesEj+0x64) [0x7f52437680c4]\\n0000010C 2013-02-14 20:55:18 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN21CDistributedSuperFile17cAddSubFileAction7prepareEv+0xcb) [0x7f524376ff9b]\\n0000010D 2013-02-14 20:55:18 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN27CDistributedFileTransaction6commitEv+0x4e) [0x7f5243776a3e]\\n0000010E 2013-02-14 20:55:18 30752 30752 /opt/HPCCSystems/plugins/libfileservices.so(fslFinishSuperFileTransaction+0x4f) [0x7f5236510d7f]\\n0000010F 2013-02-14 20:55:18 30752 30752 /var/lib/HPCCSystems/myeclccserver/libW20130214-203956.so(+0xb22e) [0x7f5235f8e22e]\\n00000110 2013-02-14 20:55:18 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11performItemEjj+0x54) [0x7f5243044504]\\n00000111 2013-02-14 20:55:18 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine13doExecuteItemER20IRuntimeWorkflowItemj+0x41) [0x7f5243045121]\\n00000112 2013-02-14 20:55:18 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11executeItemEjj+0x2b7) [0x7f5243044d17]\\n00000113 2013-02-14 20:55:18 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine7performEP18IGlobalCodeContextP11IEclProcess+0x17c) [0x7f52430454cc]\\n00000114 2013-02-14 20:55:18 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent10runProcessEP11IEclProcess+0x147) [0x7f5244c6c977]\\n00000115 2013-02-14 20:55:18 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent9doProcessEv+0x277) [0x7f5244c6ee97]\\n00000116 2013-02-14 20:55:18 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_Z13eclagent_mainiPPKcP12StringBufferb+0x7a0) [0x7f5244c719c0]\\n00000117 2013-02-14 20:55:18 30752 30752 eclagent(main+0x51) [0x400f41]\\n00000118 2013-02-14 20:55:18 30752 30752 /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xed) [0x7f5243ec576d]\\n00000119 2013-02-14 20:55:18 30752 30752 eclagent() [0x401019]\\n0000011A 2013-02-14 20:55:18 30752 30752 CDFAction lock timed out on prachi::superfile\\n0000011B 2013-02-14 20:55:18 30752 30752 CDistributedFileTransaction: Transaction pausing\\n0000011C 2013-02-14 20:56:00 30752 30752 safeChangeModeWrite - temporarily releasing lock on prachi::superfile to avoid deadlock\\n0000011D 2013-02-14 20:56:05 30752 30752 safeChangeModeWrite on prachi::superfile waiting for 10s\\n0000011E 2013-02-14 20:56:05 30752 30752 Backtrace:\\n0000011F 2013-02-14 20:56:05 30752 30752 /opt/HPCCSystems/lib/libjlib.so(_Z16PrintStackReportv+0x26) [0x7f524483c4f6]\\n00000120 2013-02-14 20:56:05 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_Z19safeChangeModeWriteP17IRemoteConnectionPKcRbj+0x1ff) [0x7f52437d229f]\\n00000121 2013-02-14 20:56:05 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN20CDistributedFileBaseI21IDistributedSuperFileE14lockPropertiesEj+0x64) [0x7f52437680c4]\\n00000122 2013-02-14 20:56:05 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN21CDistributedSuperFile17cAddSubFileAction7prepareEv+0xcb) [0x7f524376ff9b]\\n00000123 2013-02-14 20:56:05 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN27CDistributedFileTransaction6commitEv+0x4e) [0x7f5243776a3e]\\n00000124 2013-02-14 20:56:05 30752 30752 /opt/HPCCSystems/plugins/libfileservices.so(fslFinishSuperFileTransaction+0x4f) [0x7f5236510d7f]\\n00000125 2013-02-14 20:56:05 30752 30752 /var/lib/HPCCSystems/myeclccserver/libW20130214-203956.so(+0xb22e) [0x7f5235f8e22e]\\n00000126 2013-02-14 20:56:05 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11performItemEjj+0x54) [0x7f5243044504]\\n00000127 2013-02-14 20:56:05 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine13doExecuteItemER20IRuntimeWorkflowItemj+0x41) [0x7f5243045121]\\n00000128 2013-02-14 20:56:05 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11executeItemEjj+0x2b7) [0x7f5243044d17]\\n00000129 2013-02-14 20:56:05 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine7performEP18IGlobalCodeContextP11IEclProcess+0x17c) [0x7f52430454cc]\\n0000012A 2013-02-14 20:56:05 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent10runProcessEP11IEclProcess+0x147) [0x7f5244c6c977]\\n0000012B 2013-02-14 20:56:05 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent9doProcessEv+0x277) [0x7f5244c6ee97]\\n0000012C 2013-02-14 20:56:05 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_Z13eclagent_mainiPPKcP12StringBufferb+0x7a0) [0x7f5244c719c0]\\n0000012D 2013-02-14 20:56:05 30752 30752 eclagent(main+0x51) [0x400f41]\\n0000012E 2013-02-14 20:56:05 30752 30752 /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xed) [0x7f5243ec576d]\\n0000012F 2013-02-14 20:56:05 30752 30752 eclagent() [0x401019]\\n00000130 2013-02-14 20:56:05 30752 30752 CDFAction lock timed out on prachi::superfile\\n00000131 2013-02-14 20:56:05 30752 30752 CDistributedFileTransaction: Transaction pausing\\n00000132 2013-02-14 20:57:21 30752 30752 safeChangeModeWrite - temporarily releasing lock on prachi::superfile to avoid deadlock\\n00000133 2013-02-14 20:57:27 30752 30752 safeChangeModeWrite on prachi::superfile waiting for 10s\\n00000134 2013-02-14 20:57:27 30752 30752 Backtrace:\\n00000135 2013-02-14 20:57:27 30752 30752 /opt/HPCCSystems/lib/libjlib.so(_Z16PrintStackReportv+0x26) [0x7f524483c4f6]\\n00000136 2013-02-14 20:57:27 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_Z19safeChangeModeWriteP17IRemoteConnectionPKcRbj+0x1ff) [0x7f52437d229f]\\n00000137 2013-02-14 20:57:27 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN20CDistributedFileBaseI21IDistributedSuperFileE14lockPropertiesEj+0x64) [0x7f52437680c4]\\n00000138 2013-02-14 20:57:27 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN21CDistributedSuperFile17cAddSubFileAction7prepareEv+0xcb) [0x7f524376ff9b]\\n00000139 2013-02-14 20:57:27 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN27CDistributedFileTransaction6commitEv+0x4e) [0x7f5243776a3e]\\n0000013A 2013-02-14 20:57:27 30752 30752 /opt/HPCCSystems/plugins/libfileservices.so(fslFinishSuperFileTransaction+0x4f) [0x7f5236510d7f]\\n0000013B 2013-02-14 20:57:27 30752 30752 /var/lib/HPCCSystems/myeclccserver/libW20130214-203956.so(+0xb22e) [0x7f5235f8e22e]\\n0000013C 2013-02-14 20:57:27 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11performItemEjj+0x54) [0x7f5243044504]\\n0000013D 2013-02-14 20:57:27 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine13doExecuteItemER20IRuntimeWorkflowItemj+0x41) [0x7f5243045121]\\n0000013E 2013-02-14 20:57:27 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11executeItemEjj+0x2b7) [0x7f5243044d17]\\n0000013F 2013-02-14 20:57:27 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine7performEP18IGlobalCodeContextP11IEclProcess+0x17c) [0x7f52430454cc]\\n00000140 2013-02-14 20:57:27 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent10runProcessEP11IEclProcess+0x147) [0x7f5244c6c977]\\n00000141 2013-02-14 20:57:27 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent9doProcessEv+0x277) [0x7f5244c6ee97]\\n00000142 2013-02-14 20:57:27 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_Z13eclagent_mainiPPKcP12StringBufferb+0x7a0) [0x7f5244c719c0]\\n00000143 2013-02-14 20:57:27 30752 30752 eclagent(main+0x51) [0x400f41]\\n00000144 2013-02-14 20:57:27 30752 30752 /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xed) [0x7f5243ec576d]\\n00000145 2013-02-14 20:57:27 30752 30752 eclagent() [0x401019]\\n00000146 2013-02-14 20:57:27 30752 30752 CDFAction lock timed out on prachi::superfile\\n00000147 2013-02-14 20:57:27 30752 30752 CDistributedFileTransaction: Transaction pausing\\n00000148 2013-02-14 20:58:23 30752 30752 safeChangeModeWrite - temporarily releasing lock on prachi::superfile to avoid deadlock\\n00000149 2013-02-14 20:58:28 30752 30752 safeChangeModeWrite on prachi::superfile waiting for 10s\\n0000014A 2013-02-14 20:58:28 30752 30752 Backtrace:\\n0000014B 2013-02-14 20:58:28 30752 30752 /opt/HPCCSystems/lib/libjlib.so(_Z16PrintStackReportv+0x26) [0x7f524483c4f6]\\n0000014C 2013-02-14 20:58:28 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_Z19safeChangeModeWriteP17IRemoteConnectionPKcRbj+0x1ff) [0x7f52437d229f]\\n0000014D 2013-02-14 20:58:28 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN20CDistributedFileBaseI21IDistributedSuperFileE14lockPropertiesEj+0x64) [0x7f52437680c4]\\n0000014E 2013-02-14 20:58:28 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN21CDistributedSuperFile17cAddSubFileAction7prepareEv+0xcb) [0x7f524376ff9b]\\n0000014F 2013-02-14 20:58:28 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN27CDistributedFileTransaction6commitEv+0x4e) [0x7f5243776a3e]\\n00000150 2013-02-14 20:58:28 30752 30752 /opt/HPCCSystems/plugins/libfileservices.so(fslFinishSuperFileTransaction+0x4f) [0x7f5236510d7f]\\n00000151 2013-02-14 20:58:28 30752 30752 /var/lib/HPCCSystems/myeclccserver/libW20130214-203956.so(+0xb22e) [0x7f5235f8e22e]\\n00000152 2013-02-14 20:58:28 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11performItemEjj+0x54) [0x7f5243044504]\\n00000153 2013-02-14 20:58:28 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine13doExecuteItemER20IRuntimeWorkflowItemj+0x41) [0x7f5243045121]\\n00000154 2013-02-14 20:58:28 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11executeItemEjj+0x2b7) [0x7f5243044d17]\\n00000155 2013-02-14 20:58:28 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine7performEP18IGlobalCodeContextP11IEclProcess+0x17c) [0x7f52430454cc]\\n00000156 2013-02-14 20:58:28 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent10runProcessEP11IEclProcess+0x147) [0x7f5244c6c977]\\n00000157 2013-02-14 20:58:28 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent9doProcessEv+0x277) [0x7f5244c6ee97]\\n00000158 2013-02-14 20:58:28 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_Z13eclagent_mainiPPKcP12StringBufferb+0x7a0) [0x7f5244c719c0]\\n00000159 2013-02-14 20:58:28 30752 30752 eclagent(main+0x51) [0x400f41]\\n0000015A 2013-02-14 20:58:28 30752 30752 /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xed) [0x7f5243ec576d]\\n0000015B 2013-02-14 20:58:28 30752 30752 eclagent() [0x401019]\\n0000015C 2013-02-14 20:58:28 30752 30752 CDFAction lock timed out on prachi::superfile\\n0000015D 2013-02-14 20:58:28 30752 30752 CDistributedFileTransaction: Transaction pausing\\n0000015E 2013-02-14 20:59:27 30752 30752 safeChangeModeWrite - temporarily releasing lock on prachi::superfile to avoid deadlock\\n0000015F 2013-02-14 20:59:32 30752 30752 safeChangeModeWrite on prachi::superfile waiting for 10s\\n00000160 2013-02-14 20:59:32 30752 30752 Backtrace:\\n00000161 2013-02-14 20:59:32 30752 30752 /opt/HPCCSystems/lib/libjlib.so(_Z16PrintStackReportv+0x26) [0x7f524483c4f6]\\n00000162 2013-02-14 20:59:32 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_Z19safeChangeModeWriteP17IRemoteConnectionPKcRbj+0x1ff) [0x7f52437d229f]\\n00000163 2013-02-14 20:59:32 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN20CDistributedFileBaseI21IDistributedSuperFileE14lockPropertiesEj+0x64) [0x7f52437680c4]\\n00000164 2013-02-14 20:59:32 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN21CDistributedSuperFile17cAddSubFileAction7prepareEv+0xcb) [0x7f524376ff9b]\\n00000165 2013-02-14 20:59:32 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN27CDistributedFileTransaction6commitEv+0x4e) [0x7f5243776a3e]\\n00000166 2013-02-14 20:59:32 30752 30752 /opt/HPCCSystems/plugins/libfileservices.so(fslFinishSuperFileTransaction+0x4f) [0x7f5236510d7f]\\n00000167 2013-02-14 20:59:32 30752 30752 /var/lib/HPCCSystems/myeclccserver/libW20130214-203956.so(+0xb22e) [0x7f5235f8e22e]\\n00000168 2013-02-14 20:59:32 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11performItemEjj+0x54) [0x7f5243044504]\\n00000169 2013-02-14 20:59:32 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine13doExecuteItemER20IRuntimeWorkflowItemj+0x41) [0x7f5243045121]\\n0000016A 2013-02-14 20:59:32 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11executeItemEjj+0x2b7) [0x7f5243044d17]\\n0000016B 2013-02-14 20:59:32 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine7performEP18IGlobalCodeContextP11IEclProcess+0x17c) [0x7f52430454cc]\\n0000016C 2013-02-14 20:59:32 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent10runProcessEP11IEclProcess+0x147) [0x7f5244c6c977]\\n0000016D 2013-02-14 20:59:32 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent9doProcessEv+0x277) [0x7f5244c6ee97]\\n0000016E 2013-02-14 20:59:32 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_Z13eclagent_mainiPPKcP12StringBufferb+0x7a0) [0x7f5244c719c0]\\n0000016F 2013-02-14 20:59:32 30752 30752 eclagent(main+0x51) [0x400f41]\\n00000170 2013-02-14 20:59:32 30752 30752 /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xed) [0x7f5243ec576d]\\n00000171 2013-02-14 20:59:32 30752 30752 eclagent() [0x401019]\\n00000172 2013-02-14 20:59:32 30752 30752 CDFAction lock timed out on prachi::superfile\\n00000173 2013-02-14 20:59:32 30752 30752 CDistributedFileTransaction: Transaction pausing\\n00000174 2013-02-14 21:00:27 30752 30752 safeChangeModeWrite - temporarily releasing lock on prachi::superfile to avoid deadlock\\n00000175 2013-02-14 21:00:32 30752 30752 safeChangeModeWrite on prachi::superfile waiting for 10s\\n00000176 2013-02-14 21:00:32 30752 30752 Backtrace:\\n00000177 2013-02-14 21:00:32 30752 30752 /opt/HPCCSystems/lib/libjlib.so(_Z16PrintStackReportv+0x26) [0x7f524483c4f6]\\n00000178 2013-02-14 21:00:32 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_Z19safeChangeModeWriteP17IRemoteConnectionPKcRbj+0x1ff) [0x7f52437d229f]\\n00000179 2013-02-14 21:00:32 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN20CDistributedFileBaseI21IDistributedSuperFileE14lockPropertiesEj+0x64) [0x7f52437680c4]\\n0000017A 2013-02-14 21:00:32 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN21CDistributedSuperFile17cAddSubFileAction7prepareEv+0xcb) [0x7f524376ff9b]\\n0000017B 2013-02-14 21:00:32 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN27CDistributedFileTransaction6commitEv+0x4e) [0x7f5243776a3e]\\n0000017C 2013-02-14 21:00:32 30752 30752 /opt/HPCCSystems/plugins/libfileservices.so(fslFinishSuperFileTransaction+0x4f) [0x7f5236510d7f]\\n0000017D 2013-02-14 21:00:32 30752 30752 /var/lib/HPCCSystems/myeclccserver/libW20130214-203956.so(+0xb22e) [0x7f5235f8e22e]\\n0000017E 2013-02-14 21:00:32 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11performItemEjj+0x54) [0x7f5243044504]\\n0000017F 2013-02-14 21:00:32 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine13doExecuteItemER20IRuntimeWorkflowItemj+0x41) [0x7f5243045121]\\n00000180 2013-02-14 21:00:32 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11executeItemEjj+0x2b7) [0x7f5243044d17]\\n00000181 2013-02-14 21:00:32 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine7performEP18IGlobalCodeContextP11IEclProcess+0x17c) [0x7f52430454cc]\\n00000182 2013-02-14 21:00:32 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent10runProcessEP11IEclProcess+0x147) [0x7f5244c6c977]\\n00000183 2013-02-14 21:00:32 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent9doProcessEv+0x277) [0x7f5244c6ee97]\\n00000184 2013-02-14 21:00:32 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_Z13eclagent_mainiPPKcP12StringBufferb+0x7a0) [0x7f5244c719c0]\\n00000185 2013-02-14 21:00:32 30752 30752 eclagent(main+0x51) [0x400f41]\\n00000186 2013-02-14 21:00:32 30752 30752 /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xed) [0x7f5243ec576d]\\n00000187 2013-02-14 21:00:32 30752 30752 eclagent() [0x401019]\\n00000188 2013-02-14 21:00:32 30752 30752 CDFAction lock timed out on prachi::superfile\\n00000189 2013-02-14 21:00:32 30752 30752 CDistributedFileTransaction: Transaction pausing\\n0000018A 2013-02-14 21:01:43 30752 30752 safeChangeModeWrite - temporarily releasing lock on prachi::superfile to avoid deadlock\\n0000018B 2013-02-14 21:01:48 30752 30752 safeChangeModeWrite on prachi::superfile waiting for 10s\\n0000018C 2013-02-14 21:01:48 30752 30752 Backtrace:\\n0000018D 2013-02-14 21:01:48 30752 30752 /opt/HPCCSystems/lib/libjlib.so(_Z16PrintStackReportv+0x26) [0x7f524483c4f6]\\n0000018E 2013-02-14 21:01:48 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_Z19safeChangeModeWriteP17IRemoteConnectionPKcRbj+0x1ff) [0x7f52437d229f]\\n0000018F 2013-02-14 21:01:48 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN20CDistributedFileBaseI21IDistributedSuperFileE14lockPropertiesEj+0x64) [0x7f52437680c4]\\n00000190 2013-02-14 21:01:48 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN21CDistributedSuperFile17cAddSubFileAction7prepareEv+0xcb) [0x7f524376ff9b]\\n00000191 2013-02-14 21:01:48 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN27CDistributedFileTransaction6commitEv+0x4e) [0x7f5243776a3e]\\n00000192 2013-02-14 21:01:48 30752 30752 /opt/HPCCSystems/plugins/libfileservices.so(fslFinishSuperFileTransaction+0x4f) [0x7f5236510d7f]\\n00000193 2013-02-14 21:01:48 30752 30752 /var/lib/HPCCSystems/myeclccserver/libW20130214-203956.so(+0xb22e) [0x7f5235f8e22e]\\n00000194 2013-02-14 21:01:48 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11performItemEjj+0x54) [0x7f5243044504]\\n00000195 2013-02-14 21:01:48 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine13doExecuteItemER20IRuntimeWorkflowItemj+0x41) [0x7f5243045121]\\n00000196 2013-02-14 21:01:48 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11executeItemEjj+0x2b7) [0x7f5243044d17]\\n00000197 2013-02-14 21:01:48 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine7performEP18IGlobalCodeContextP11IEclProcess+0x17c) [0x7f52430454cc]\\n00000198 2013-02-14 21:01:48 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent10runProcessEP11IEclProcess+0x147) [0x7f5244c6c977]\\n00000199 2013-02-14 21:01:48 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent9doProcessEv+0x277) [0x7f5244c6ee97]\\n0000019A 2013-02-14 21:01:48 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_Z13eclagent_mainiPPKcP12StringBufferb+0x7a0) [0x7f5244c719c0]\\n0000019B 2013-02-14 21:01:48 30752 30752 eclagent(main+0x51) [0x400f41]\\n0000019C 2013-02-14 21:01:48 30752 30752 /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xed) [0x7f5243ec576d]\\n0000019D 2013-02-14 21:01:48 30752 30752 eclagent() [0x401019]\\n0000019E 2013-02-14 21:01:48 30752 30752 CDFAction lock timed out on prachi::superfile\\n0000019F 2013-02-14 21:01:48 30752 30752 CDistributedFileTransaction: Transaction pausing\\n000001A0 2013-02-14 21:02:45 30752 30752 safeChangeModeWrite - temporarily releasing lock on prachi::superfile to avoid deadlock\\n000001A1 2013-02-14 21:02:50 30752 30752 safeChangeModeWrite on prachi::superfile waiting for 10s\\n000001A2 2013-02-14 21:02:50 30752 30752 Backtrace:\\n000001A3 2013-02-14 21:02:50 30752 30752 /opt/HPCCSystems/lib/libjlib.so(_Z16PrintStackReportv+0x26) [0x7f524483c4f6]\\n000001A4 2013-02-14 21:02:50 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_Z19safeChangeModeWriteP17IRemoteConnectionPKcRbj+0x1ff) [0x7f52437d229f]\\n000001A5 2013-02-14 21:02:50 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN20CDistributedFileBaseI21IDistributedSuperFileE14lockPropertiesEj+0x64) [0x7f52437680c4]\\n000001A6 2013-02-14 21:02:50 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN21CDistributedSuperFile17cAddSubFileAction7prepareEv+0xcb) [0x7f524376ff9b]\\n000001A7 2013-02-14 21:02:50 30752 30752 /opt/HPCCSystems/lib/libdalibase.so(_ZN27CDistributedFileTransaction6commitEv+0x4e) [0x7f5243776a3e]\\n000001A8 2013-02-14 21:02:50 30752 30752 /opt/HPCCSystems/plugins/libfileservices.so(fslFinishSuperFileTransaction+0x4f) [0x7f5236510d7f]\\n000001A9 2013-02-14 21:02:50 30752 30752 /var/lib/HPCCSystems/myeclccserver/libW20130214-203956.so(+0xb22e) [0x7f5235f8e22e]\\n000001AA 2013-02-14 21:02:50 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11performItemEjj+0x54) [0x7f5243044504]\\n000001AB 2013-02-14 21:02:50 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine13doExecuteItemER20IRuntimeWorkflowItemj+0x41) [0x7f5243045121]\\n000001AC 2013-02-14 21:02:50 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11executeItemEjj+0x2b7) [0x7f5243044d17]\\n000001AD 2013-02-14 21:02:50 30752 30752 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine7performEP18IGlobalCodeContextP11IEclProcess+0x17c) [0x7f52430454cc]\\n000001AE 2013-02-14 21:02:50 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent10runProcessEP11IEclProcess+0x147) [0x7f5244c6c977]\\n000001AF 2013-02-14 21:02:50 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent9doProcessEv+0x277) [0x7f5244c6ee97]\\n000001B0 2013-02-14 21:02:50 30752 30752 /opt/HPCCSystems/lib/libhthor.so(_Z13eclagent_mainiPPKcP12StringBufferb+0x7a0) [0x7f5244c719c0]\\n000001B1 2013-02-14 21:02:50 30752 30752 eclagent(main+0x51) [0x400f41]\\n000001B2 2013-02-14 21:02:50 30752 30752 /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xed) [0x7f5243ec576d]\\n000001B3 2013-02-14 21:02:50 30752 30752 eclagent() [0x401019]\\n000001B4 2013-02-14 21:02:50 30752 30752 CDFAction lock timed out on prachi::superfile\\n000001B5 2013-02-14 21:02:50 30752 30752 CDistributedFileTransaction: Transaction pausing\\n
\\nI am attaching images of ECL Watch and ECL IDE.\\n\\nThanks and Regards!\", \"post_time\": \"2013-02-14 08:36:17\" },\n\t{ \"post_id\": 3816, \"topic_id\": 791, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE not showing up as a UI but loading as a process\", \"username\": \"hungifi\", \"post_text\": \"That fixed, thanks\", \"post_time\": \"2013-03-22 15:55:00\" },\n\t{ \"post_id\": 3815, \"topic_id\": 791, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE not showing up as a UI but loading as a process\", \"username\": \"gsmith\", \"post_text\": \"This was fixed in the 3.10.4 release.\\n\\nFYI - It was due to a change that IE10 introduced in wininet.dll\", \"post_time\": \"2013-03-22 15:46:52\" },\n\t{ \"post_id\": 3813, \"topic_id\": 791, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE not showing up as a UI but loading as a process\", \"username\": \"hungifi\", \"post_text\": \"I had the same problem few days ago. Have you any information about the next release?\\n\\nThanks for your reply\", \"post_time\": \"2013-03-22 14:51:41\" },\n\t{ \"post_id\": 3575, \"topic_id\": 791, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE not showing up as a UI but loading as a process\", \"username\": \"mohan\", \"post_text\": \"Hi\\n\\nI did try uninstalling IE10 completely and then rebooting and reinstalling ECL IDE. It still doesn't work.\\n\\nI will wait till the next fix is out. Hopefully everything should work then \\n\\nThanks\\nMohan\", \"post_time\": \"2013-02-27 20:02:51\" },\n\t{ \"post_id\": 3562, \"topic_id\": 791, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE not showing up as a UI but loading as a process\", \"username\": \"gsmith\", \"post_text\": \"I think it is related to the IE10 pre-release, but now it has been officially released I will need to investigate further ASAP.\\n\\nCan you uninstall IE10, reboot the machine and then reinstall the IDE and see if that works while I investigate more?\\n\\nIssue Report: https://track.hpccsystems.com/browse/IDE-217\\n\\nUpdate: A fix has been issued and pulled into upstream, will be included in next official release.\", \"post_time\": \"2013-02-27 09:10:23\" },\n\t{ \"post_id\": 3558, \"topic_id\": 791, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE not showing up as a UI but loading as a process\", \"username\": \"mohan\", \"post_text\": \"Hi \\n\\nI did have ie 10 pre-release installed. I uninstalled it, but it is still not working.\\nI tried with the latest ie as well and it didn't work.\\nI did not have any entry in the registry editor. So I guess that part is fine.\\n\\nAny other thoughts?
\\n\\nMohan\", \"post_time\": \"2013-02-26 21:35:28\" },\n\t{ \"post_id\": 3557, \"topic_id\": 791, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE not showing up as a UI but loading as a process\", \"username\": \"gsmith\", \"post_text\": \"Are you running Windows 7 and have you installed IE 10 Beta (or RC)?\\nDoes uninstalling IE 10 fix the issue?\\nDoes installing IE 10 Gold (released today) fix the issue?\\n\\nIf none of the above are applicable try:\\nOpen regedit and delete (or rename) the following key: HKEY_CURRENT_USER\\\\Software\\\\HPCC Systems:\\na. Press Win+R (opens run window)\\nb. Enter “regedit” and press OK\\nc. Navigate to HKEY_CURRENT_USER\\\\Software\\\\HPCC Systems\\nd. Right click on the HKEY_CURRENT_USER\\\\Software\\\\HPCC Systems and either delete or rename it.\", \"post_time\": \"2013-02-26 19:34:12\" },\n\t{ \"post_id\": 3556, \"topic_id\": 791, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE not showing up as a UI but loading as a process\", \"username\": \"bforeman\", \"post_text\": \"Mohan, if the ECL IDE works on other machines, what is different? \\nHow do the operating systems differ? Are your security rights the same on both machines? When you installed the ECL IDE did you simply accept defaults, or change any paths? Did you try to Run as Administrator, or in Compatibility Mode?\\n\\nForwarded to the ECL IDE developer for additional comment. \\n\\nHave to admit, my colleagues I have asked have never seen this happen. \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-02-26 19:03:29\" },\n\t{ \"post_id\": 3555, \"topic_id\": 791, \"forum_id\": 9, \"post_subject\": \"ECL IDE not showing up as a UI but loading as a process\", \"username\": \"mohan\", \"post_text\": \"Hi \\n\\nI installed the ECL IDE and it just runs as a process (when I view the task manager), but the UI never shows up.\\n\\nI have tried to install it on other pcs and it works fine.\\nI have also tried uninstall, reboot, installed again… every combination I could think of, but nothing is working.\\n\\nAny advice would be greatly appreciated.\\nThanks\\nMohan\", \"post_time\": \"2013-02-26 18:45:50\" },\n\t{ \"post_id\": 3743, \"topic_id\": 836, \"forum_id\": 9, \"post_subject\": \"Re: how to run multiple ecl file using eclplus\", \"username\": \"bforeman\", \"post_text\": \"Hi sunxg,\\n\\nECLPlus is gradually getting replaced by the ECL Command Line. Have you tried using the ECL command line instead of ECLPlus?\\n\\nThe last time I chatted with the developer he recommended that I use the ECL Command line. Ecl submitted to Eclplus would need to be self-contained. The ECL Command line calls the ECL Compiler (eclcc) which compiles it locally and thus resolves the imports.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-03-14 15:25:39\" },\n\t{ \"post_id\": 3729, \"topic_id\": 836, \"forum_id\": 9, \"post_subject\": \"how to run multiple ecl file using eclplus\", \"username\": \"sunxg\", \"post_text\": \"I'm a beginner. \\n\\nmy question is:\\n\\nhow to run multiple ecl file using eclplus , or how to organize multiple ecl run in one workunit when use eclplus.\\n\\nin sunxg_mr.ecl:\\n
\\nEXPORT mac_sunxg_mr(RET_TYPE,in_file_name) := FUNCTIONMACRO\\n...\\nENDMACRO;\\n
\\n\\nin t1.ecl:\\n\\nimport $ as MRLib;\\n\\nMRLib.sunxg_mr.mac_sunxg_mr(MRLib.sunxg_mr.CELL_INTERF,'{~hpcctell::mr::20120614-000_*.csv}');\\n
\\n\\nit's ok in ECL IDE, but report error when use eclplus, fellowing is the opration:\\n\\nD:\\\\sunxg\\\\ecl\\\\test>eclplus @t1.ecl output=o.txt -ID:\\\\sunxg\\\\ecl\\\\test -LD:\\\\sunxg\\\\ecl\\\\test\\nWorkunit W20130313-161244 submitted\\n
\\n\\nerror reported in eclwatch:\\n\\neclcc\\tstdin: (3,7) : 2171: Object does not have a member named 'sunxg_mr'\\neclcc\\tstdin: (3,7) : 2167: Unknown identifier "sunxg_mr"\\neclcc\\tstdin: (3,16) : 2171: Object does not have a member named 'mac_sunxg_mr'\\neclcc\\tstdin: (3,29) : 2171: Object does not have a member named 'mrlib'\\neclcc\\tstdin: (3,35) : 2171: Object does not have a member named 'sunxg_mr'\\neclcc\\tstdin: (3,44) : 2171: Object does not have a member named 'cell_interf'\\n
\", \"post_time\": \"2013-03-13 09:33:25\" },\n\t{ \"post_id\": 3749, \"topic_id\": 841, \"forum_id\": 9, \"post_subject\": \"Re: Count(group) exceed skew limit\", \"username\": \"bforeman\", \"post_text\": \"Please see Dan's reply here:\\n\\nhttp://hpccsystems.com/bb/viewtopic.php?f=8&t=842&sid=e77fdd8e25dd03b7221177cee77d59b6\\n\\nThanks Dan!\\n\\nBob\", \"post_time\": \"2013-03-14 19:10:10\" },\n\t{ \"post_id\": 3746, \"topic_id\": 841, \"forum_id\": 9, \"post_subject\": \"Count(group) exceed skew limit\", \"username\": \"rachel\", \"post_text\": \"Hi\\nMy code like this \\nr1:=record\\ndataset1.title;\\nn:=count(group);\\nEnd;\\no1=Table(dataset1,r1,number);\\n\\nwhen I do the same thing again count(group)of n from the above result. A system error show exceed skew limit. But o1 is much smaller than dataset1. Why this happened? How to solve it?\\nThank you\", \"post_time\": \"2013-03-14 16:27:14\" },\n\t{ \"post_id\": 3872, \"topic_id\": 867, \"forum_id\": 9, \"post_subject\": \"Re: 1003:unable to communicate with server\", \"username\": \"swapna\", \"post_text\": \"This issue got resolved, when the proxy is bypassed for the Virtual machine IP. \\n\\nThanks, \\nSwapna.p\", \"post_time\": \"2013-04-02 12:04:01\" },\n\t{ \"post_id\": 3871, \"topic_id\": 867, \"forum_id\": 9, \"post_subject\": \"Re: 1003:unable to communicate with server\", \"username\": \"gsmith\", \"post_text\": \"This will most likely be a Host/VM network configuration issue.\\n\\nFYI If the ECL Watch cannot be accessed then the IDE will not be able to connect.\", \"post_time\": \"2013-04-02 11:22:37\" },\n\t{ \"post_id\": 3870, \"topic_id\": 867, \"forum_id\": 9, \"post_subject\": \"1003:unable to communicate with server\", \"username\": \"swapna\", \"post_text\": \"Hi, \\n\\ni downloaded and installed VM player, HPCC virtual machine and ECL IDE. when i try to acces the ECL watch page with the vitual ip generated http://192.168.220.128:8010\\ni am getting operation timedout error. But i am able to telnet 192.168.220.128 8010\\n\\nAlso after setting the virtual machine IP in the ECL IDE i am getting 1003:unable to communicate with server error. \\n\\nplease help.\\n\\nRegards, \\nSwapna.p\", \"post_time\": \"2013-04-02 09:17:34\" },\n\t{ \"post_id\": 3922, \"topic_id\": 875, \"forum_id\": 9, \"post_subject\": \"Re: eclipse IDE documentation file error 404\", \"username\": \"gsmith\", \"post_text\": \"It is also worth pointing out that there is now also built in contextual documentation - Just press F1 while in an ECL editor.\", \"post_time\": \"2013-04-15 08:00:47\" },\n\t{ \"post_id\": 3919, \"topic_id\": 875, \"forum_id\": 9, \"post_subject\": \"Re: eclipse IDE documentation file error 404\", \"username\": \"HPCC Staff\", \"post_text\": \"Thank you for pointing this out. The link has been corrected and is now available at http://hpccsystems.com/products-and-ser ... clipse-ide\", \"post_time\": \"2013-04-12 19:19:08\" },\n\t{ \"post_id\": 3918, \"topic_id\": 875, \"forum_id\": 9, \"post_subject\": \"eclipse IDE documentation file error 404\", \"username\": \"greg.whitaker@lexisnexis.com\", \"post_text\": \"getting 404 file not found error when I click on the documentation pdf link.\\nWeb page location I'm hitting:\\nhttp://hpccsystems.com/products-and-ser ... clipse-ide\", \"post_time\": \"2013-04-12 18:48:04\" },\n\t{ \"post_id\": 3968, \"topic_id\": 887, \"forum_id\": 9, \"post_subject\": \"Re: Getting error in lab #3 exercise: Intro to ECL course\", \"username\": \"karthikreddy\", \"post_text\": \"EXAMPLE:[/b] [b]Layout_People.ecl \\nEXPORT Layout_People := RECORD\\n STRING15 FirstName;\\n STRING25 LastName;\\n STRING15 MiddleName;\\n STRING5 Zip;\\n STRING42 Street;\\n STRING20 City;\\n STRING2 State;\\nEND;\\nFile_TutorialPerson.ecl\\nIMPORT $ AS Tutorial; //IMPORT TutorialYourName;\\nEXPORT File_TutorialPerson := \\n DATASET('~tutorial::yn::TutorialPerson', {Tutorial.Layout_People, UNSIGNED8 fpos {virtual(fileposition)}}, THOR);\\n\\n\\nkarthik reddy\", \"post_time\": \"2013-04-24 20:50:39\" },\n\t{ \"post_id\": 3960, \"topic_id\": 887, \"forum_id\": 9, \"post_subject\": \"Re: Getting error in lab #3 exercise: Intro to ECL course\", \"username\": \"bforeman\", \"post_text\": \"Everything Richard said, and in addition that warning you are seeing means that your preferences is pointing to an older compiler version. You should be pointing to the compiler in the ver_3_6 folder of the ECL IDE install.\\n\\nIn what folder did you create the Persons.ECL file? In your repository, where does Persons exist? \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-04-22 20:23:46\" },\n\t{ \"post_id\": 3959, \"topic_id\": 887, \"forum_id\": 9, \"post_subject\": \"Re: Getting error in lab #3 exercise: Intro to ECL course\", \"username\": \"rtaylor\", \"post_text\": \"rhimbo,I'm following the videos for the Introduction to ECL course. Lesson 16, Defining Your Data, has lab exercise #3. It asks the user to define a RECORD and then run a DATASET command to read data previously sprayed in lesson #2.
First, let me point out that since ECL is a declarative, non-procedural language, you cannot "run a DATASET command" because DATASET is not a "command" nor can it be "run" -- it is a simple declaration (definition) of a file on disk. Because this code defines a dataset, the system will allow you to hit the Submit button and will show you records from that dataset (that works with dataset and set definitions, may work with some value and boolean definitions and not with others), but the more standard way of looking at records in a dataset is to open a separate builder window (click the New button on the Quick Access toolbar) and write code like this:IMPORT TrainingYourName;\\nTrainingYourName.persons;
The IMPORT makes EXPORT definitions from the TrainingYourName directory available for use, then the TrainingYourName.persons; line of code is the action that tells the compiler what result to produce (in this case, logically equivalent to calling the OUTPUT(TrainingYourName.persons) action).\\nMy ECL code is shown in the screen capture attached called "ecl-window.jpg". The code is the same as the instructor's in the video. It compiles with no errors and no warnings.
The code you wrote looks perfectly fine. \\n\\nHowever, when I hit "Submit" to run the code I get the errors shown in the screen capture shown in the attachment called "errors.jpg".
The error message says the system can't find your TrainingVartanPiroumian.Persons code file, which is strange since you're presumably hitting Submit on this window (or are you?) and it works just fine on my machine.\\n\\nLooking at your screenshot of the IDE, I see that you somehow have two "MyECLCode" directories. Does either of these have a space in the folder name (as in, "My ECL Code" or "MyECL Code")? \\n\\nAnd, exactly where does your "TrainingVartanPiroumian.persons.ecl" file live?\\n\\nIf it is under a directory tree with a space in the name (allowed by Windows, but not allowed by HPCC), then I would suspect that is the cause of the problem. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-04-22 19:46:41\" },\n\t{ \"post_id\": 3956, \"topic_id\": 887, \"forum_id\": 9, \"post_subject\": \"Re: Getting error in lab #3 exercise: Intro to ECL course\", \"username\": \"gsmith\", \"post_text\": \"Just looking at the ECL in the attached screen shot - there is no "action", you are exporting a dataset called "Persons".\\n\\nGenerally you would have a different window open with code something like:\\n\\n\\nimport folder.persons;\\npersons;\\n
\\nand then submit that.\", \"post_time\": \"2013-04-22 10:55:17\" },\n\t{ \"post_id\": 3954, \"topic_id\": 887, \"forum_id\": 9, \"post_subject\": \"Getting error in lab #3 exercise: Intro to ECL course\", \"username\": \"rhimbo\", \"post_text\": \"Hi folks,\\n\\nI'm following the videos for the Introduction to ECL course. Lesson 16, Defining Your Data, has lab exercise #3. It asks the user to define a RECORD and then run a DATASET command to read data previously sprayed in lesson #2. \\n\\nMy ECL code is shown in the screen capture attached called "ecl-window.jpg". The code is the same as the instructor's in the video. It compiles with no errors and no warnings. \\n\\nHowever, when I hit "Submit" to run the code I get the errors shown in the screen capture shown in the attachment called "errors.jpg".\\n\\nI'm wondering if I have a configuration or setup problem with the IDE?\\n\\nThanks in advance for any ideas....\", \"post_time\": \"2013-04-20 17:44:23\" },\n\t{ \"post_id\": 4045, \"topic_id\": 894, \"forum_id\": 9, \"post_subject\": \"Re: WARNING: ESP Exception - CSoapResponseBinding:\", \"username\": \"clo\", \"post_text\": \"Hi Nick,\\n\\nI'm sorry you're having so many issues. What version of the IDE are you trying to install and run? Perhaps a newer version from the portal will help. In the meantime, I'm going to speak to some of my colleagues to see if they've seen this.\", \"post_time\": \"2013-05-03 11:27:52\" },\n\t{ \"post_id\": 4040, \"topic_id\": 894, \"forum_id\": 9, \"post_subject\": \"Re: WARNING: ESP Exception - CSoapResponseBinding:\", \"username\": \"NickA\", \"post_text\": \"I have reinstalled the system entirely over ten times... including fully removing the program, all the application data folders, and the registry keys associated with the HPCC tools.\", \"post_time\": \"2013-05-01 21:33:52\" },\n\t{ \"post_id\": 4039, \"topic_id\": 894, \"forum_id\": 9, \"post_subject\": \"Re: WARNING: ESP Exception - CSoapResponseBinding:\", \"username\": \"rtaylor\", \"post_text\": \"Nick,\\n\\nThis bit caught my ayyention:WARNING: ERROR: API = CreateProcess.\\n error code = 14001.\\n message = This application has failed to start because the application configuration is incorrect. Reinstalling the application may fix this problem.
Perhaps you should try re-installing as it suggests.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-05-01 18:39:40\" },\n\t{ \"post_id\": 4037, \"topic_id\": 894, \"forum_id\": 9, \"post_subject\": \"Re: WARNING: ESP Exception - CSoapResponseBinding:\", \"username\": \"NickA\", \"post_text\": \"This system has never worked, so there's nothing that suddenly changed.\\n\\nThis is the code that I was attempting to run. It's just from one of the training exercises that is in the Introduction to ECL course.\\nIMPORT $;\\n\\nPSPersons := SORT($.Persons,LastName,FirstName,RecID);\\n\\nEXPORT DedupPersons := DEDUP(PreSortPersons,LastName,FirstName):PERSIST('~class::nda::persist::DedupPersons');
\\n\\nBelow is the error received when I Shift+Submitted the program above. \\nINFO: \\n\\ncd C:\\\\Documents and Settings\\\\All Users\\\\Documents\\\\HPCC Systems\\\\ECL\\\\wu\\n\\n"C:\\\\Program Files\\\\HPCC Systems\\\\HPCC\\\\ver_3_6\\\\bin\\\\eclcc.exe" -E -P"C:\\\\Documents and Settings\\\\All Users\\\\Documents\\\\HPCC Systems\\\\ECL\\\\wu" -I"C:\\\\Documents and Settings\\\\All Users\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files" -I"C:\\\\Documents and Settings\\\\All Users\\\\Documents\\\\HPCC Systems\\\\ECL\\\\Samples" "C:\\\\Documents and Settings\\\\All Users\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\trainingnick\\\\DedupPersons.ecl"\\n\\n cmdProcess.cpp(304)\\nWARNING: ERROR: API = CreateProcess.\\n error code = 14001.\\n message = This application has failed to start because the application configuration is incorrect. Reinstalling the application may fix this problem.\\n\\n.\\n cmdProcess.cpp(224)\\nWARNING: ERROR: API = CloseHandle.\\n error code = 6.\\n message = The handle is invalid.
\\n\\nAnd finally, here is a screenshot of the compiler tab on the preferences. \\n[attachment=0:1v8gitk4]Compilerstuff.JPG\\nAlso, as a quick side note, I get the two API errors even when I press the check button on the above code. And it doesn't seem to matter what code I've used, I get the same problems all the time. \\n\\nAlso, I'm not using a VM, but actually using a remote compiler which the training provider has let us use for this training seminar. \\n\\nThanks for your help.\", \"post_time\": \"2013-05-01 15:06:21\" },\n\t{ \"post_id\": 4036, \"topic_id\": 894, \"forum_id\": 9, \"post_subject\": \"Re: WARNING: ESP Exception - CSoapResponseBinding:\", \"username\": \"rtaylor\", \"post_text\": \"Nick,Sorry to resurrect this issue, but I'm having the same exact problem, however my target is definitely set to Thor already.
OK, so could we see an example of the code that won't compile and the error message you're seeing? Also, have you ever been able to run, or has nothing ever run? If nothing, then a screenshot of your compiler tab on the Preferences dialog might help, too.\\n\\nRichard\", \"post_time\": \"2013-05-01 14:47:22\" },\n\t{ \"post_id\": 4031, \"topic_id\": 894, \"forum_id\": 9, \"post_subject\": \"Re: WARNING: ESP Exception - CSoapResponseBinding:\", \"username\": \"NickA\", \"post_text\": \"Sorry to resurrect this issue, but I'm having the same exact problem, however my target is definitely set to Thor already. I'm not sure why it seems to be defaulting to a local compile (which obviously fails terribly), however it doesn't seem to be connecting to the thor server at all. \\n\\nI can use ECLWatch just fine, and there doesn't seem to be any issues there. \\n\\nI could open a new thread, since this has already been answered, but I wasn't sure if I should or not. \\n\\nThanks.\", \"post_time\": \"2013-04-30 14:20:00\" },\n\t{ \"post_id\": 3991, \"topic_id\": 894, \"forum_id\": 9, \"post_subject\": \"Re: WARNING: ESP Exception - CSoapResponseBinding:\", \"username\": \"bohman\", \"post_text\": \"Thanks Richard - this helped resolve "the issue" captured in my other post: https://hpccsystems.com/bb/viewtopic.php?f=8&t=893\", \"post_time\": \"2013-04-26 12:12:52\" },\n\t{ \"post_id\": 3988, \"topic_id\": 894, \"forum_id\": 9, \"post_subject\": \"Re: WARNING: ESP Exception - CSoapResponseBinding:\", \"username\": \"rtaylor\", \"post_text\": \"bohman,\\n\\nThe leading "L" in the workunit identifier indicates that you're doing a local compile. Try targeting your Thor (the Target droplist is at the top-right corner of the builder window).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-04-25 20:51:36\" },\n\t{ \"post_id\": 3986, \"topic_id\": 894, \"forum_id\": 9, \"post_subject\": \"WARNING: ESP Exception - CSoapResponseBinding:\", \"username\": \"bohman\", \"post_text\": \"I see the following in the Error Log window of the ECL IDE:\\n\\n[size=85:2esfues4]WARNING: ESP Exception - CSoapResponseBinding: 2013-04-25 19:51:21 GMT: Invalid Workunit ID: L20130425-155130 (D:\\\\hpccsystems\\\\src\\\\eclide\\\\comms\\\\Dali.cpp, 1014) SoapUtil.h(419)\\n\\nI seem to get an entry each time I submit. I'm not sure what to make of it or how to resolve it. Feedback is of interest.\\n\\nThanks!\", \"post_time\": \"2013-04-25 20:01:12\" },\n\t{ \"post_id\": 4494, \"topic_id\": 895, \"forum_id\": 9, \"post_subject\": \"Re: Result limit\", \"username\": \"Rahul Jain\", \"post_text\": \"You can also use Topn. That migh also help you fetching required rows.\", \"post_time\": \"2013-08-27 15:39:20\" },\n\t{ \"post_id\": 3994, \"topic_id\": 895, \"forum_id\": 9, \"post_subject\": \"Re: Result limit\", \"username\": \"sbagaria\", \"post_text\": \"Thanks Richard. Specifying the limit as 0 worked. I also set the output limit to 100 MB, so I will be good there too. \\n\\nI will create an issue for this in Jira if there is not one already.\", \"post_time\": \"2013-04-26 13:30:27\" },\n\t{ \"post_id\": 3993, \"topic_id\": 895, \"forum_id\": 9, \"post_subject\": \"Re: Result limit\", \"username\": \"rtaylor\", \"post_text\": \"Sid,\\n
Does ECL IDE put a hard upper limit on the number of rows in the result?
Apparently. I just tested it and found that specifying 10005 returns only 9999. I'd call this an IDE bug that he's maxing out at 9999. Please report this in JIRA. \\n\\nYou can, of course, specify 0 in that limit field and get all records returned (if they total < 10 Mb of data).\\n\\nRichard\", \"post_time\": \"2013-04-26 13:20:13\" },\n\t{ \"post_id\": 3990, \"topic_id\": 895, \"forum_id\": 9, \"post_subject\": \"Result limit\", \"username\": \"sbagaria\", \"post_text\": \"Does ECL IDE put a hard upper limit on the number of rows in the result?\\n\\nIt seems like the default is 100 and the hard limit is 9,999. So even if I specify something more than 10,000, my result stored in the workunit is always truncated to the first 9,999 rows.\", \"post_time\": \"2013-04-26 11:09:50\" },\n\t{ \"post_id\": 4487, \"topic_id\": 1003, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE integration with java\", \"username\": \"richardkchapman\", \"post_text\": \"The standard advice is "Install the java package file that represents the default java install for your distro"\\n\\nI would think that the best bet if you can't use apt-get is to retrieve the deb file manually, then install it using dpkg (though I suppose you would need to find all the deb files for dependencies too.\\n\\nOn my Ubuntu 12.04 system, running\\n\\n ldd /opt/HPCCSystems/plugins/libjavaembed.so \\n\\ntells me that the jdk library is being resolved to \\n\\n libjvm.so => /usr/lib/jvm/java-6-openjdk/jre/lib/amd64/server/libjvm.so\\n\\nOther distros may well use other versions (I think 13.04 uses java-7 for example) and/or install to different places.\", \"post_time\": \"2013-08-23 13:43:58\" },\n\t{ \"post_id\": 4486, \"topic_id\": 1003, \"forum_id\": 9, \"post_subject\": \"ECL IDE integration with java\", \"username\": \"sunilatCTS\", \"post_text\": \"This is with respect to the Open NLP integration with HPCC that I am currently working upon. I would need java installed in the HPCC environment to make this OpenNLP libraries working.\\n\\nI tried to follow the steps given in this link \\n\\nhttps://github.com/arjunachala/arjunach ... ntergation\\n\\nfor step 2, I downloaded the corresponding jdk file for a 32 bit os since I run on a Virtual Machine and followed the steps as given in this link \\n\\nhttp://www.wikihow.com/Install-Oracle-J ... untu-Linux\\n\\nDue to Proxy restrictions, I had to opt to install java manually only. (sudo apt-get install does not work due to proxy restrictions)\\n\\nI recieved the following error on compiling the sample program given at the end of first link\\n\\n“Error: System error: 0: Error loading /var/lib/HPCCSystems/myeclccserver/libW20130823-183501.so: libjvm.so: cannot open shared object file: No such file or directory (0, 0)”\\n\\nAny detailed document that could help on the installation of open jdk ? or am i doing something wrong here/ missing a few steps ?\\n\\nThanks,\\nSunil\", \"post_time\": \"2013-08-23 13:32:39\" },\n\t{ \"post_id\": 4531, \"topic_id\": 1019, \"forum_id\": 9, \"post_subject\": \"ECL IDE Installer blocked/removed by Norton EndPoint Protec\", \"username\": \"JimD\", \"post_text\": \"After downloading the ECL IDE installer, Norton EndPoint Protection removed the file and reported it as suspicious.\\n\\nIt says the suspected threat is Suspiscious.Cloud.7.EP.\\n\\nHas anyone else encountered this?\\n\\nI have added a JIRA issue: \\n\\nhttps://track.hpccsystems.com/browse/IDE-314\", \"post_time\": \"2013-09-04 14:11:29\" },\n\t{ \"post_id\": 4546, \"topic_id\": 1021, \"forum_id\": 9, \"post_subject\": \"Re: Compiler error when trying to use ECL IDE\", \"username\": \"gsmith\", \"post_text\": \"Is it still saying "build_0_0" or is it giving you an actual server version now?\\n\\nIf it is giving you the server version, you should download the windows "clienttools" that match it.\\n\\nThen as long as your on the latest IDE it will just locate the appropriate client tools for the server in question.\\n\\nFinally double check the "compiler" tab and ensure that "Override automatic compiler selection" is turned off.\", \"post_time\": \"2013-09-05 21:56:15\" },\n\t{ \"post_id\": 4543, \"topic_id\": 1021, \"forum_id\": 9, \"post_subject\": \"Re: Compiler error when trying to use ECL IDE\", \"username\": \"cliffcourt\", \"post_text\": \"I have tried selecting both thor and hthor but it doesn't make a difference\\n\\nThe initial error every time I run the program about the Compiler/Server Mismatch seems to be the main issue here.\", \"post_time\": \"2013-09-05 19:07:11\" },\n\t{ \"post_id\": 4538, \"topic_id\": 1021, \"forum_id\": 9, \"post_subject\": \"Re: Compiler error when trying to use ECL IDE\", \"username\": \"gsmith\", \"post_text\": \"In the builder window can you select a "new" item in the Target (hthor or such like).\", \"post_time\": \"2013-09-05 13:55:23\" },\n\t{ \"post_id\": 4537, \"topic_id\": 1021, \"forum_id\": 9, \"post_subject\": \"Re: Compiler error when trying to use ECL IDE\", \"username\": \"cliffcourt\", \"post_text\": \"Thanks gsmith for the response.\\n\\nI can reach the server with the ipaddress:8010 \\n\\nI ran the IDE again and saw the preferences had no ipaddress loaded. So I entered the server IP address but it has not made much difference.\\n\\nI now get error reported in another post\\nviewtopic.php?f=9&t=894\\n\\nIt seems to be a SOAP related issue now as well.\\n\\nThanks\\nCliff\", \"post_time\": \"2013-09-05 13:46:40\" },\n\t{ \"post_id\": 4536, \"topic_id\": 1021, \"forum_id\": 9, \"post_subject\": \"Re: Compiler error when trying to use ECL IDE\", \"username\": \"gsmith\", \"post_text\": \"Cliff - \\n\\nI suspect it is unable to reach the server nominated in your preferences.\\n\\nFor sanity take the IP address you entered in the preferences and open a web browser and navigate to:\\nhttp://YOURIP:8010/\\n\\nDoes that work?\", \"post_time\": \"2013-09-05 13:13:29\" },\n\t{ \"post_id\": 4535, \"topic_id\": 1021, \"forum_id\": 9, \"post_subject\": \"Compiler error when trying to use ECL IDE\", \"username\": \"cliffcourt\", \"post_text\": \"Good Day\\n\\nI am trying to use the ECL IDE on a 32-bit Win 7 PC but when I load the ECL IDE application, it gives me and error on launch that says "Compiler/Server mismatch, Compiler 4.0.0 community_4.0.0-4, Server: build_0_0\\n\\nIf I then try to submit some basic lines of ECL, I get Compile/Link failed with some files paths attached. This seems clearly linked to the error at launch.\\n\\nI have tried an earlier ECL IDE version 3.0 install and it gives the same error.\\n\\nOn a separate 64-bit Win 7 PC, this all works OK. \\n\\nPlease can anyone make a suggestion as to how I overcome this problem?\\n\\nMany thanks\\nCliff Court\", \"post_time\": \"2013-09-05 13:06:09\" },\n\t{ \"post_id\": 4631, \"topic_id\": 1037, \"forum_id\": 9, \"post_subject\": \"Re: About ECL IDE Toolbar\", \"username\": \"Rahul Jain\", \"post_text\": \"Thanks. I will look into to the code and start constructing something relevant.\", \"post_time\": \"2013-09-23 15:29:22\" },\n\t{ \"post_id\": 4626, \"topic_id\": 1037, \"forum_id\": 9, \"post_subject\": \"Re: About ECL IDE Toolbar\", \"username\": \"gsmith\", \"post_text\": \"If you are interested in enhancing the ECL editor/IDE then like Richard said, I would suggest looking at the Open Surce ECL Language Plugin for Eclipse: https://github.com/hpcc-systems/EclipsePlugin\\n\\nThere is also a simple ECL Notepad project here:\\nhttps://github.com/GordonSmith/ECLNotepad\\n\\nTo answer some of the specific questions:\\n1. There is no scripting/macro facility in the standard ECLIDE\\n2. Alt+Select allows you to cut and paste columns of ECL\\n\\n---\\n1. No not really\\n2. No the ECLIDE is written in C++\\n3. For future development we are going to focus on the Eclipse ECL Language Plugin (see above).\", \"post_time\": \"2013-09-23 07:58:52\" },\n\t{ \"post_id\": 4610, \"topic_id\": 1037, \"forum_id\": 9, \"post_subject\": \"Re: About ECL IDE Toolbar\", \"username\": \"rtaylor\", \"post_text\": \"Rahul,\\n\\nYou might want to look at the ECL plugin for Eclipse as the better area to spend your time contributing, since it will be the IDE-of-choice in the future.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-09-18 20:54:39\" },\n\t{ \"post_id\": 4609, \"topic_id\": 1037, \"forum_id\": 9, \"post_subject\": \"About ECL IDE Toolbar\", \"username\": \"Rahul Jain\", \"post_text\": \"I was looking for a way to add new functionality to ECL Editor like -\\n1. Convert all the keywords to upper case once passed to a Macro.Finally if works can be made like a button in editor.\\n2. Column mode and Row mode editing option as Ultra-edit also has.\\n\\nI wanted to know -\\n1. Is there any place where code behind editor tabs can be found. Ex currently - CTRL + SHIFT +U converts selected text to upper case. So is there a place where we can find code written behind this.\\n2. Is this code also written using ECL?\\n3. How can we fetch the ECL IDE code as text?\", \"post_time\": \"2013-09-18 19:22:51\" },\n\t{ \"post_id\": 4659, \"topic_id\": 1048, \"forum_id\": 9, \"post_subject\": \"Re: Error: Import names unknown module "std"\", \"username\": \"pauloakes\", \"post_text\": \"I've gotten past this error by:\\n\\n1. shutting down the HPCC VM.\\n2. Increasing HPCC VM memory to 4 GB, adding a Bridged Network adapter (not necessary but telling all)\\n3. Shutting down the ECL IDE\\n4. Starting HPCC VM (with new settings)\\n5. Starting ECL IDE.\\n\\nSo, increased memory and restart of both seemed to do the trick.\", \"post_time\": \"2013-09-27 23:31:58\" },\n\t{ \"post_id\": 4658, \"topic_id\": 1048, \"forum_id\": 9, \"post_subject\": \"Error: Import names unknown module "std"\", \"username\": \"pauloakes\", \"post_text\": \"I'm working through the HPCCDataTutorial. \\n\\nI'm stuck with an "Error: Import names unknown module "std" (1, 27), 2081,"\\n\\nI'm working on a Mac Book Pro, running the HPCC "community_4.0.2-2" VM in VirtualBox. \\n\\nI'm using Crossover for Mac to run the ECL IDE.\\n\\nI get the "unknown module std" error when I try to submit this code:\\n\\nimport TutorialPO, Std;\\n TutorialPO.Layout_People toUpperPlease(TutorialPO.Layout_People pInput) \\n\\t\\t := TRANSFORM\\n\\t\\tSELF.FirstName := Std.Str.ToUpperCase(pInput.FirstName);\\n...\\n\\nHas anyone seen this error before and successfully been able to resolve the problem?\\n\\nThanks!\", \"post_time\": \"2013-09-27 22:47:42\" },\n\t{ \"post_id\": 4831, \"topic_id\": 1091, \"forum_id\": 9, \"post_subject\": \"Cannot view complex key\", \"username\": \"abhisr\", \"post_text\": \"I am not able to view certain logical files in ESP using 'View Data File'. \\nThe message am getting is Cannot view complex key
.\\nThe only difference is that my file layout has a column of type blob .\\n\\nWhy is it not showing for files with type blob ?\", \"post_time\": \"2013-10-24 18:41:08\" },\n\t{ \"post_id\": 4836, \"topic_id\": 1092, \"forum_id\": 9, \"post_subject\": \"Re: Command Line automation with /usr/bin/ecl\", \"username\": \"BrianB644\", \"post_text\": \"I certainly tried it ... both with -legacy and --legacy ... in almost every conceivable portion of the command line I could. It certainly appears that it should be there based on the code to support it in the code base, but it isn't in the help message, it isn't in the documentation, and the string "legacy" isn't compiled into the executable ... at least not in the way that strings for most of the other arguments are present.\\n\\nConcerning modifying the code ... the code is generated, I don't maintain it, and updating the generator isn't in scope for my current project.\\n\\nAnother nit ... /usr/bin/ecl doesn't always return a non-zero error code when "there is a failure". I previously used eclplus, and it seemed to return codes that made sense from an automation perspective. Unfortunately, I couldn't seem to get eclplus to respect the -I option so had to switch the framework to use the "ecl command" instead.\\n\\nCheers,\\n\\nBrian B.\", \"post_time\": \"2013-10-25 15:27:21\" },\n\t{ \"post_id\": 4835, \"topic_id\": 1092, \"forum_id\": 9, \"post_subject\": \"Re: Command Line automation with /usr/bin/ecl\", \"username\": \"gsmith\", \"post_text\": \"Does ecl --legacy not work?\\n\\nAlso since this feature is being deprecated is there any reason why you need it (rather than updating the ecl files to use the new import semantics)?\", \"post_time\": \"2013-10-25 12:35:27\" },\n\t{ \"post_id\": 4834, \"topic_id\": 1092, \"forum_id\": 9, \"post_subject\": \"Re: Command Line automation with /usr/bin/ecl\", \"username\": \"BrianB644\", \"post_text\": \"So ... I peeked through the code for "eclcmd" and it appears that most of the work to support a -legacy option is present ... but the output of "strings /usr/bin/ecl" doesn't have "-legacy" in it ... though the other supported options do seem appear. \\n\\nThe code did call "eclcc" as a pipe command though and appeared to rely on eclcc to be in the path ... so ... I made my own version of "eclcc" to call /usr/bin/eclcc with the -legacy option enabled and that "worked around" my problem.\\n\\nWhat I have come to consider an idiom for something like this is as follows ... though I tend to think it is a bit too cute at the expense of readability:\\n\\n#!/bin/bash\\ntest $# -eq 0 && exec /usr/bin/ecl -legacy\\nexec /usr/bin/ecl -legacy "$*"\\n\\nThis worked around my problem ... though I'd be happy to have a way to do it from the command line.\\n\\nCheers,\\n\\nBrian\", \"post_time\": \"2013-10-25 12:02:29\" },\n\t{ \"post_id\": 4833, \"topic_id\": 1092, \"forum_id\": 9, \"post_subject\": \"Command Line automation with /usr/bin/ecl\", \"username\": \"BrianB644\", \"post_text\": \"I am doing some command line automation under Linux. For this project we are using the standard file system folders for our code under windows and replicating the repository to a Linux box where we expect to run our automation.\\n\\nAfter much gnashing-of-teeth things seem to be configured correctly to use "/usr/bin/ecl" to run the code and reference our filesystem-based code libraries ... except my work requires that I pass the "-legacy" flag to the compiler and I don't see how to do that for the ecl compiler. Any pointers?\\n\\nThanks in advance,\\n\\nBrian\", \"post_time\": \"2013-10-25 09:26:27\" },\n\t{ \"post_id\": 5049, \"topic_id\": 1141, \"forum_id\": 9, \"post_subject\": \"Re: Passing runtime parameters\", \"username\": \"sameermsc\", \"post_text\": \"Hi Neha,\\n\\nTo my Knowledge there is no such functionality "A popup displayed at runtime to enter an argument value" in ECL IDE\\nthe closest thing you can expect is to dynamically grab a "named" output and use it\\n\\nits long time since i have gone through the Documentations, so, Please correct me if i am wrong \\n\\nRegards,\\nSameer\", \"post_time\": \"2013-12-12 07:54:50\" },\n\t{ \"post_id\": 5048, \"topic_id\": 1141, \"forum_id\": 9, \"post_subject\": \"Re: Passing runtime parameters\", \"username\": \"Neha Singh\", \"post_text\": \"We want to pass variable at runtime.\\nLike we have many ETL tools where we declared file path=$Fp and when we execute the job ,there is a pop up window which ask us to fill the parameter, Is this kind of functionality available with ECL IDE.\", \"post_time\": \"2013-12-12 05:41:32\" },\n\t{ \"post_id\": 5038, \"topic_id\": 1141, \"forum_id\": 9, \"post_subject\": \"Re: Passing runtime parameters\", \"username\": \"bforeman\", \"post_text\": \"Passing parameters is a fundamental part of ECL:\\n\\nSee:\\n\\nhttp://hpccsystems.com/download/docs/ecl-language-reference/html/Function_Attributes__Parameter_Passing.html\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-12-10 12:57:08\" },\n\t{ \"post_id\": 5036, \"topic_id\": 1141, \"forum_id\": 9, \"post_subject\": \"Passing runtime parameters\", \"username\": \"Neha Singh\", \"post_text\": \"We want to pass parameter at run-time. For example:-We have mention the @filepath in my code,so we want to pass value to @filename at run time. Is this possible using ECL.\", \"post_time\": \"2013-12-10 10:54:12\" },\n\t{ \"post_id\": 5047, \"topic_id\": 1143, \"forum_id\": 9, \"post_subject\": \"Re: Graphs on Fedora in wine\", \"username\": \"gsmith\", \"post_text\": \"The BugTrap stack trace may actually help, so please do send it on. \\n\\nThe common issue when running in wine was either have the wrong version of msxml installed or none at all. There are some instructions for installing on wine (probably for an old ubunutu setup), did you see those?\", \"post_time\": \"2013-12-11 22:23:48\" },\n\t{ \"post_id\": 5040, \"topic_id\": 1143, \"forum_id\": 9, \"post_subject\": \"Graphs on Fedora in wine\", \"username\": \"GregM\", \"post_text\": \"Hi,\\n\\nI'm running the ECL IDE in wine on Fedora and whenever I click on a graphs it crashes:(\\n\\nWine version: wine-1.7.5-1.fc19.x86_64\\nECL IDE version: 4.0.2\\n\\nThe exception reason is:\\nECLIDE.exe caused UNKNOWN_ERROR in module "C:\\\\windows\\\\system32\\\\KERNEL32.dll" at 0023:7B83AD4F\\n\\nI can attach the errorlog.xml if required (bugtrap can't seem to submit it...)\\n\\nThanks!\", \"post_time\": \"2013-12-10 17:56:52\" },\n\t{ \"post_id\": 5145, \"topic_id\": 1177, \"forum_id\": 9, \"post_subject\": \"Re: API for HPCC\", \"username\": \"gsmith\", \"post_text\": \"Most of the client tools talk to the ESP layer via SOAP (and now JSON). There isn't an official API (well not documented as such), but its the same interface that can be called via ECL.\\n\\nTo get you you started take a look at:\\nhttp://X.X.X.X:8010/WsWorkunits/ \\nThis page will list the available Actions on the WsWorkunits Service. If you are using SOAP, you can get the WSDL from the top link on the page.\\n\\nTo open a test form for an Action simply open an URL similar to this:\\nhttp://X.X.X.X:8010/WsWorkunits/WUQuery?form\\n(WUQuery will list all the workunits). You can now at least "play" with the API.\\n\\nThe next thing to know is that ESP will accept queries in many different formats (XML, JSON (GET, POST)) and will return the data in HTML, XML or JSON based on the request.\\n\\nHTML Is basically the ECL Watch page associated with that Action.\\nXML can be requested by adding a "rawxml_=1" parameter to the call.\\nJSON can be requested by tweaking the target URL to: http://X.X.X.X:8010/WsWorkunits/WUQuery.json\\n\\nSo at a simple level if you want to get a list of all the WUs submittied to hthor as a JSON string you can just open this URL in your browser:\\nhttp://X.X.X.X:8010/WsWorkunits/WUQuery ... ster=hthor\\n\\nArmed with this knowledge you can see how things like the ECL Playground work by simply monitoring the network traffic in chrome/IE.\\n\\nI know this is only scratching the surface, but I hope it helps...\", \"post_time\": \"2014-01-21 06:54:43\" },\n\t{ \"post_id\": 5143, \"topic_id\": 1177, \"forum_id\": 9, \"post_subject\": \"API for HPCC\", \"username\": \"spmurphy\", \"post_text\": \"Do you have an API for HPCC (and documents)? We have tools that historically talk directly to the system and would like to continue to function this way. Preferably a REST API.\\n\\nthank you,\\nShawn\", \"post_time\": \"2014-01-20 18:34:08\" },\n\t{ \"post_id\": 5217, \"topic_id\": 1188, \"forum_id\": 9, \"post_subject\": \"Re: Version Control in non-legacy systems\", \"username\": \"BenJones\", \"post_text\": \"I've been experimenting with Eclipse. You seriously need to update the manual for setting it up. It shows you how to do Hello World but leaves it as an exercise for the reader how to do other important things such as bringing in an existing repository and setting up version control. These steps are fairly straightforward when you know what you are doing but are a nightmare if you don't.\\n\\nSo here is the trick for getting an existing repository into Eclipse:\\n\\n
\\n
\\n\\nOne thing I have noticed is that if you are trying out the Virtual Machine and Eclipse at the same time is that Eclipse and its "Run Configuration" window in particular run extremely sluggishly when the Virtual Machine is running on the same computer, whereas the ECL IDE is much more responsive. I suspect the reason for that is Eclipse is built with Java and ECL IDE was probably written in C/C++ and therefore runs much more efficiently.\", \"post_time\": \"2014-02-17 17:45:04\" },\n\t{ \"post_id\": 5167, \"topic_id\": 1188, \"forum_id\": 9, \"post_subject\": \"Re: Version Control in non-legacy systems\", \"username\": \"gsmith\", \"post_text\": \"This is one of the reasons why we are providing the ECL Plugin for Eclipse...\", \"post_time\": \"2014-02-03 16:47:08\" },\n\t{ \"post_id\": 5166, \"topic_id\": 1188, \"forum_id\": 9, \"post_subject\": \"Version Control in non-legacy systems\", \"username\": \"BenJones\", \"post_text\": \"Currently, you can configure ECL IDE to use a Repository or a hierarchical file system. When you use a Repository, you get a version control system that allows you to check out, check in, and view history.\\n\\nIf you use the hierarchical file system, those features disappear. It is true that you can enable Subversion or some other version control system in this mode. However, you then have to do your version control outside of the ECL IDE. At first glance, this seems like no big deal. In Windows, you can install Subversion and then if you view the folder hierarchy with Windows Explorer and get all the usual check in, check out, history, comparison, branching, etc.\\n\\nI explained this to my customer and was told that their people would be unlikely to want to deal with version control in this manner. Now that I think about it, here is why:\\n\\nIn the Repository mode, I have a Sandbox which is very well understood, which is that when other users check things in, they are automatically visible to me. I can edit a file to my heart's content and it will only be visible to me in my sandbox. As soon as I check it in, it becomes visible to everyone else. Furthermore, the Control-S key or Save menu function doesn't merely overwrite the existing file in Repository mode but keeps an instant backup of each time you save, making it more comfortable to edit, save, syntax check, and test without worrying that you may have accidentally trashed some work with no hope of getting it back.\\n\\nIn non-repository mode, this whole mindset has to change. You can't merely Control-S or Save but you have to flip over to Explorer to find the file(s) you just edited and check them into your branch, every time you save. You have to explicitly update your sandbox from the appropriate branch to use everyone else's latest and greatest.\\n\\nSo, what exactly is so hard about either providing the Repository-style capability in hierarchical mode or providing hooks for your favorite version control system, like most other respectable IDE's? I've heard some say that we should just switch over to Eclipse and do our ECL development there but again, ECL IDE is already a very nice system to use for ECL development because it is tailored precisely to our needs.\", \"post_time\": \"2014-02-03 16:11:52\" },\n\t{ \"post_id\": 8250, \"topic_id\": 1209, \"forum_id\": 9, \"post_subject\": \"Re: Shortcut keys stop working in ECL IDE\", \"username\": \"gsmith\", \"post_text\": \"Also you can use the 4.2.2 ClientTools with the 5.0.4 IDE (on a per config basis), which my help?\", \"post_time\": \"2015-10-06 09:54:10\" },\n\t{ \"post_id\": 8244, \"topic_id\": 1209, \"forum_id\": 9, \"post_subject\": \"Re: Shortcut keys stop working in ECL IDE\", \"username\": \"jwilt\", \"post_text\": \"Did more testing on this.\\nI confirmed on my own instance (my workstation, my installs, etc.) that your test does, in fact, generate the dropping of shortcut keys.\\n\\nI used inline data, generated several workunits, single BWR window, hit "everything" in WU results, even ECL Watch and graphs within, except the ECL IDE Graph tab - all with no problems.\\nOnly when I hit the ECL IDE Graph tab (not within ECL Watch) - it immediately killed shortcut keys.\\nThis was with 4.2.2 ECL IDE.\\n\\nI could *not* get the test to drop shortcut keys with ECL IDE v. 5.0.4-1.\\n\\nI'll see if others near me see similar behavior, with 4.2.2. If so, then we have a workaround, until we get to another version.\\n\\n(We're also seeing issues with graph controls... different story...)\\n\\nThanks for the help, Gordon.\", \"post_time\": \"2015-10-05 22:04:14\" },\n\t{ \"post_id\": 8012, \"topic_id\": 1209, \"forum_id\": 9, \"post_subject\": \"Re: Shortcut keys stop working in ECL IDE\", \"username\": \"gsmith\", \"post_text\": \"The issue that was fixed had very specific steps to reproduce (switching tab to the Graph page did it if I remember correctly) - if you have similar steps please post them here and I will take look.\", \"post_time\": \"2015-08-12 11:26:23\" },\n\t{ \"post_id\": 8010, \"topic_id\": 1209, \"forum_id\": 9, \"post_subject\": \"Re: Shortcut keys stop working in ECL IDE\", \"username\": \"jwilt\", \"post_text\": \"Hmm... that's disappointing. I'm still seeing issues in 5.x versions.\\nI'm looking at Enterprise Edition - I assume that won't make a difference?\", \"post_time\": \"2015-08-11 20:30:54\" },\n\t{ \"post_id\": 8009, \"topic_id\": 1209, \"forum_id\": 9, \"post_subject\": \"Re: Shortcut keys stop working in ECL IDE\", \"username\": \"gsmith\", \"post_text\": \"It was merged into candidate-4.2.6, so should be available in any build since then.\", \"post_time\": \"2015-08-11 20:24:44\" },\n\t{ \"post_id\": 8007, \"topic_id\": 1209, \"forum_id\": 9, \"post_subject\": \"Re: Shortcut keys stop working in ECL IDE\", \"username\": \"jwilt\", \"post_text\": \"Do we know yet when this fix should appear in 5.x releases?\", \"post_time\": \"2015-08-11 18:58:08\" },\n\t{ \"post_id\": 5776, \"topic_id\": 1209, \"forum_id\": 9, \"post_subject\": \"Re: Shortcut keys stop working in ECL IDE\", \"username\": \"jwilt\", \"post_text\": \"Excellent, thanks!\", \"post_time\": \"2014-05-29 20:58:13\" },\n\t{ \"post_id\": 5744, \"topic_id\": 1209, \"forum_id\": 9, \"post_subject\": \"Re: Shortcut keys stop working in ECL IDE\", \"username\": \"gsmith\", \"post_text\": \"FYI - A fix has been submitted for the next build: https://track.hpccsystems.com/browse/IDE-336\\nhttps://track.hpccsystems.com/browse/IDE-340\", \"post_time\": \"2014-05-27 07:05:14\" },\n\t{ \"post_id\": 5419, \"topic_id\": 1209, \"forum_id\": 9, \"post_subject\": \"Re: Shortcut keys stop working in ECL IDE\", \"username\": \"Leofei\", \"post_text\": \"Same issue. When I try to use ctrl-q, a highlighted DC1 appears.\", \"post_time\": \"2014-03-24 22:31:32\" },\n\t{ \"post_id\": 5404, \"topic_id\": 1209, \"forum_id\": 9, \"post_subject\": \"Re: Shortcut keys stop working in ECL IDE\", \"username\": \"gsmith\", \"post_text\": \"Thanks for the additional information. I have opened a Jira ticket: https://track.hpccsystems.com/browse/IDE-340 and I will see if I can reproduce.\", \"post_time\": \"2014-03-20 18:16:08\" },\n\t{ \"post_id\": 5403, \"topic_id\": 1209, \"forum_id\": 9, \"post_subject\": \"Re: Shortcut keys stop working in ECL IDE\", \"username\": \"perekl\", \"post_text\": \"I have the same issue and it is very annoying. I have found that as soon as I go into the graph of my workunit, that is when the shortcuts will stop working, but only for the current builder window. Other builders still allow me to use the shortcuts. I haven't figured out how to get them to work again, other than restarting.\", \"post_time\": \"2014-03-20 17:59:41\" },\n\t{ \"post_id\": 5229, \"topic_id\": 1209, \"forum_id\": 9, \"post_subject\": \"Re: Shortcut keys stop working in ECL IDE\", \"username\": \"gsmith\", \"post_text\": \"I have not seen a report like this before... \\n\\nMy first hunch (like yours) would have been to limit the number of open Workunits and Editor windows, but it sounds like that didn't help - I would also suggest trying to increase the "auto save" duration to a few minutes?\", \"post_time\": \"2014-02-19 07:29:36\" },\n\t{ \"post_id\": 5228, \"topic_id\": 1209, \"forum_id\": 9, \"post_subject\": \"Shortcut keys stop working in ECL IDE\", \"username\": \"jwilt\", \"post_text\": \"In ECL IDE, Version community_4.0.2-1...\\nIn some cases, at some point, shortcut keys (like Ctrl-F, Ctrl-Enter, Ctrl-G) stop working - the editor accepts the literal value of the key and inserts it into the code.\\nOn a just-fresh load of ECL IDE, even with "busy" workspaces, the shortcut keys seem to work fine at first, but later drop off.\\nI'm wondering if this is related to a "heavy" environment - busy workspaces (lots of modules/BWR) or even several old workunits for a given module.\\nI have often switched to a new tab/window, and the keys work fine. Switch back, and they're gone again.\\nThanks.\", \"post_time\": \"2014-02-19 00:01:53\" },\n\t{ \"post_id\": 5380, \"topic_id\": 1238, \"forum_id\": 9, \"post_subject\": \"Re: Documentation downloads\", \"username\": \"jwilt\", \"post_text\": \"Right, understood.\\nAll that aside, the reality is that's our situation. It will change when it changes.\\nWelcome to reality time! \", \"post_time\": \"2014-03-12 04:06:51\" },\n\t{ \"post_id\": 5379, \"topic_id\": 1238, \"forum_id\": 9, \"post_subject\": \"Re: Documentation downloads\", \"username\": \"rtaylor\", \"post_text\": \"Jim,\\n\\nNo, the help files are not subtly different from the legacy 702 but very different, partly due to the fundamental differences between the legacy and Open Source versions of ECL, but also due to the fact that your 702 docs are now three years out of date, and many improvements have been made to the language in the interim. Welcome to present time!
\\n\\nRichard\", \"post_time\": \"2014-03-12 03:39:53\" },\n\t{ \"post_id\": 5378, \"topic_id\": 1238, \"forum_id\": 9, \"post_subject\": \"Re: Documentation downloads\", \"username\": \"jwilt\", \"post_text\": \"Thanks, Richard.\\nWe're aware of the F1 Help pages, and look forward to using them in the future. We'd like to use the old-style PDFs for a while, yet, as we migrate from legacy to HPCC - in part, because the Help files are (subtly?) different from legacy.\\nThis would be temporary, until we fully move to HPCC.\", \"post_time\": \"2014-03-12 01:41:00\" },\n\t{ \"post_id\": 5377, \"topic_id\": 1238, \"forum_id\": 9, \"post_subject\": \"Re: Documentation downloads\", \"username\": \"rtaylor\", \"post_text\": \"jwilt,\\n\\nIn the ECL IDE, all you need to do is place your insertion point cursor on the keyword you want to know about and press F1. This will automatically bring up the topic for that keyword from the IDE's compiled help file (.chm). That help file contains the entire ECL Language Reference, Programmer's Guide, and Standard Library Reference books. This help file is part of the IDE installation and each new updated IDE installation always contains a help file with the latest release of these docs.\\n\\nYou can download the PDFs separately from the website (for printing or reading on the computer), but the IDE's F1 key will only pull up topics from its help file. And for the Eclipse users, we also have these same docs in Eclipse help format.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-03-12 01:30:29\" },\n\t{ \"post_id\": 5375, \"topic_id\": 1238, \"forum_id\": 9, \"post_subject\": \"Documentation downloads\", \"username\": \"jwilt\", \"post_text\": \"Hi - \\nFor ECL IDE users that don't have access to the internet - \\nIs there a way to copy the ECL Language Reference (and other) pdf's so that ECL IDE can bring those up from the "Help" dropdown? (Maybe copying them to the same location that "Download..." would use?)\\nThanks.\", \"post_time\": \"2014-03-11 19:39:02\" },\n\t{ \"post_id\": 20413, \"topic_id\": 1257, \"forum_id\": 9, \"post_subject\": \"Re: Fatal Error: Unable to locate C++ compiler/linker\", \"username\": \"micevepay\", \"post_text\": \"What compilers and tools will I need to compile this locally via cmd to create an executable on Windows? I'm getting the same errors.\", \"post_time\": \"2018-01-09 22:07:22\" },\n\t{ \"post_id\": 13223, \"topic_id\": 1257, \"forum_id\": 9, \"post_subject\": \"Re: Fatal Error: Unable to locate C++ compiler/linker\", \"username\": \"rtaylor\", \"post_text\": \"Oscar,\\n\\nAs Gordon said:
What the error means: \\nYou are trying to compile a WU on your local machine, but you are missing the required compilers and tools.\\n\\nTypically this happens if you select "Local" from the target dropdown on the builder windows. Sometimes it happens if you have some invalid settings on the preferences window.\\n\\nThings to try:\\n1. In the builder window select a different target in the drop down (even if it does not say local). Note: If local is your only option here then its likely to be a preferences issue.\\n2. Double check your preferences specifically the server settings and compiler tab. If in doubt create a new preference profile and renter the server IP (leave the rest to their defaults).
I would only add that, if Local is your only choice then you need to close the IDE and re-open it (usually that brings back the list of target clusters that you should have).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-11-15 19:00:46\" },\n\t{ \"post_id\": 13203, \"topic_id\": 1257, \"forum_id\": 9, \"post_subject\": \"Re: Fatal Error: Unable to locate C++ compiler/linker\", \"username\": \"oscar.foley\", \"post_text\": \"Did you fixed it @nileshdchavan? \\n\\nI have exactly the same problem...\", \"post_time\": \"2016-11-15 16:51:26\" },\n\t{ \"post_id\": 6715, \"topic_id\": 1257, \"forum_id\": 9, \"post_subject\": \"Re: Fatal Error: Unable to locate C++ compiler/linker\", \"username\": \"Andrio\", \"post_text\": \"You can download the PDFs separately from the website (for printing or reading on the computer), but the IDE's Testking HP0-S34 exam F1 key will only pull up topics from its help file. And for the Eclipse users, we also have these same docs in Eclipse help format.\", \"post_time\": \"2014-12-22 10:19:56\" },\n\t{ \"post_id\": 5438, \"topic_id\": 1257, \"forum_id\": 9, \"post_subject\": \"Re: Fatal Error: Unable to locate C++ compiler/linker\", \"username\": \"gsmith\", \"post_text\": \"What the error means: \\nYou are trying to compile a WU on your local machine, but you are missing the required compilers and tools.\\n\\nTypically this happens if you select "Local" from the target dropdown on the builder windows. Sometimes it happens if you have some invalid settings on the preferences window.\\n\\nThings to try:\\n1. In the builder window select a different target in the drop down (even if it does not say local). Note: If local is your only option here then its likely to be a preferences issue.\\n2. Double check your preferences specifically the server settings and compiler tab. If in doubt create a new preference profile and renter the server IP (leave the rest to their defaults).\", \"post_time\": \"2014-03-28 08:28:30\" },\n\t{ \"post_id\": 5437, \"topic_id\": 1257, \"forum_id\": 9, \"post_subject\": \"Fatal Error: Unable to locate C++ compiler/linker\", \"username\": \"nileshdchavan\", \"post_text\": \"Hello,\\nI'm running the HPCC cluster using VM on my windows 8 system. I have installed ECLIDE on my machine. When i submit the ECL code in ECLIDE, it gives me following error in eclcc.log file -\\n\\n00000008 2014-03-25 23:51:23 7592 9788 Adding library: eclrtl\\n00000009 2014-03-25 23:51:23 7592 9788 Adding library: C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\wu\\\\default\\\\L20140325-235122.res\\n0000000A 2014-03-25 23:51:23 7592 9788 Compiling C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\wu\\\\default\\\\L20140325-235122\\n0000000B 2014-03-25 23:51:23 7592 9788 Failed to compile C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\wu\\\\default\\\\L20140325-235122\\n0000000C 2014-03-25 23:51:23 7592 9788 Fatal Error: Unable to locate C++ compiler/linker\\n0000000D 2014-03-25 23:51:23 7592 9788\\n---------- compiler output --------------\\n'"vsvars32"' is not recognized as an internal or external command,\\noperable program or batch file.\\n'cl.exe' is not recognized as an internal or external command,\\noperable program or batch file.\\n'"vsvars32"' is not recognized as an internal or external command,\\noperable program or batch file.\\n'cl.exe' is not recognized as an internal or external command,\\noperable program or batch file.\\n--------- end compiler output -----------\\n\\nHere's the compiler setting in my ECL IDE -\\n\\nCompiler = C:\\\\Program Files (x86)\\\\HPCCSystems\\\\4.2.0\\\\clienttools\\\\bin\\\\eclcc.exe\\n\\nPlease let me know how to resolve this issue. Thank you.\", \"post_time\": \"2014-03-27 23:48:36\" },\n\t{ \"post_id\": 5758, \"topic_id\": 1326, \"forum_id\": 9, \"post_subject\": \"Re: ECL code review tool\", \"username\": \"bforeman\", \"post_text\": \"Hi Arjun,\\n\\nI don't see anything in the works, may I suggest that you post a suggestion in the Community Issue Tracker?\\n\\nhttps://track.hpccsystems.com/secure/Dashboard.jspa\\n\\nTry to be as specific as possible as to what kind of tool and features you are looking for.\\n\\nThanks,\\n\\nBob\", \"post_time\": \"2014-05-28 13:54:06\" },\n\t{ \"post_id\": 5743, \"topic_id\": 1326, \"forum_id\": 9, \"post_subject\": \"ECL code review tool\", \"username\": \"ArjunKumar\", \"post_text\": \"Hi Team,\\n\\nCan we get any code review tool for ECL IDE like Sonar for Java. please let us know if any plugins are there to review the ECL code.\\n\\n\\nThanks,\\nArjun\", \"post_time\": \"2014-05-27 04:52:21\" },\n\t{ \"post_id\": 5767, \"topic_id\": 1330, \"forum_id\": 9, \"post_subject\": \"Re: Mismatch in major version number(4.2.4 v 3.6.1)\", \"username\": \"adidassler2011\", \"post_text\": \"Thank you, I confirmed that the server is 3.6.1.\", \"post_time\": \"2014-05-29 02:16:41\" },\n\t{ \"post_id\": 5752, \"topic_id\": 1330, \"forum_id\": 9, \"post_subject\": \"Re: Mismatch in major version number(4.2.4 v 3.6.1)\", \"username\": \"gsmith\", \"post_text\": \"Sounds like you are using 4.2.4 Client Tools and a 3.6.1 Server.\\n\\nThe simplest solution is to just install the 3.6.1 Client Tools (alongside the 4.2.4 ones).\\n\\nAnother option is to upgrade your server to 4.2.4.\\n\\nNote: I would be a little concerned about the 3.6.1 version, as official gold releases tend to be even numbered (3.6.2 for example).\", \"post_time\": \"2014-05-28 05:55:31\" },\n\t{ \"post_id\": 5750, \"topic_id\": 1330, \"forum_id\": 9, \"post_subject\": \"Mismatch in major version number(4.2.4 v 3.6.1)\", \"username\": \"adidassler2011\", \"post_text\": \"I have just installed the ECL plug-in for the eclipse IDE. Also, I installed the HPCC Client Tools version 4.2.4 for Mac OSX and the HPCC Graph Control for Mac OSX. I tried to output hello world ...OUTPUT('Hello, World');...and I received the following error: Mismatch in major version number(4.2.4 v 3.6.1). Does anyone know of a fix?\", \"post_time\": \"2014-05-27 18:11:05\" },\n\t{ \"post_id\": 5877, \"topic_id\": 1350, \"forum_id\": 9, \"post_subject\": \"Re: IDE formmatter/beautifier\", \"username\": \"Chang\", \"post_text\": \"[quote="gsmith":1p3a3t9r](Thats a "no" BTW <g>)\\nThank you for your prompt reply \", \"post_time\": \"2014-06-12 17:23:07\" },\n\t{ \"post_id\": 5875, \"topic_id\": 1350, \"forum_id\": 9, \"post_subject\": \"Re: IDE formmatter/beautifier\", \"username\": \"gsmith\", \"post_text\": \"(Thats a "no" BTW <g>)\", \"post_time\": \"2014-06-12 14:55:55\" },\n\t{ \"post_id\": 5874, \"topic_id\": 1350, \"forum_id\": 9, \"post_subject\": \"Re: IDE formmatter/beautifier\", \"username\": \"gsmith\", \"post_text\": \"It has been on a todo list for a long time (PrettyPrint), but keeps getting pushed back..\\n\\nBut now that you have mentioned it (and that we happen to be doing some work in a related area), I will open a fresh JIRA request for it.\", \"post_time\": \"2014-06-12 14:55:28\" },\n\t{ \"post_id\": 5873, \"topic_id\": 1350, \"forum_id\": 9, \"post_subject\": \"IDE formmatter/beautifier\", \"username\": \"Chang\", \"post_text\": \"Hi,\\n\\nI use ECL everyday and would love to know if there's any formmatter or beautifier tool in IDE so that I can have a better looking code with just a magical click.\", \"post_time\": \"2014-06-12 14:41:16\" },\n\t{ \"post_id\": 6311, \"topic_id\": 1373, \"forum_id\": 9, \"post_subject\": \"Re: Unable to log in to ECL IDE\", \"username\": \"tinebp\", \"post_text\": \"I'm having the same issue.\\nI did a single node install of HPCC Systems on a ubuntu system.\\nECL watch works fine of the local browser using localhost:8010\\non my remote windows machine, I can ping the ubuntu machine using the ip address but I cannot access ECL watch with th same ip, http://<ip>:8010\\nthe ECL IDE doesn't connect either.\", \"post_time\": \"2014-09-15 21:06:34\" },\n\t{ \"post_id\": 5993, \"topic_id\": 1373, \"forum_id\": 9, \"post_subject\": \"Re: Unable to log in to ECL IDE\", \"username\": \"fanglimian\", \"post_text\": \"I cannot open it in web browser..any idea why not..?\", \"post_time\": \"2014-06-30 14:03:49\" },\n\t{ \"post_id\": 5992, \"topic_id\": 1373, \"forum_id\": 9, \"post_subject\": \"Re: Unable to log in to ECL IDE\", \"username\": \"gsmith\", \"post_text\": \"Just to clarify - Are you able to open ECL Watch in a web browser? \\n\\nIf not then the IDE will not be able to connect either...\", \"post_time\": \"2014-06-30 14:01:20\" },\n\t{ \"post_id\": 5991, \"topic_id\": 1373, \"forum_id\": 9, \"post_subject\": \"Re: Unable to log in to ECL IDE\", \"username\": \"fanglimian\", \"post_text\": \"Hi gsmith: yes. everything is the same. I can open IDE watch on Thursday but not any more from Friday out of sudden..\", \"post_time\": \"2014-06-30 13:49:56\" },\n\t{ \"post_id\": 5990, \"topic_id\": 1373, \"forum_id\": 9, \"post_subject\": \"Re: Unable to log in to ECL IDE\", \"username\": \"gsmith\", \"post_text\": \"Is ECL Watch Working in a regular browser?\\n\\nIn your preferences dialog in the IP address, make sure you only have the IP (no http or port number etc.)\\n\\nFurther 1003 normally means that the IDE cannot communicate with the server...\", \"post_time\": \"2014-06-30 13:44:32\" },\n\t{ \"post_id\": 5989, \"topic_id\": 1373, \"forum_id\": 9, \"post_subject\": \"Re: Unable to log in to ECL IDE\", \"username\": \"fanglimian\", \"post_text\": \"Thank you!\\nA new configuration is not working too...IDE is not responding for a while then the error appears: 1003: unable to communicate with the server.\\n\\nThe internet is fine. I am using the office landline.\", \"post_time\": \"2014-06-30 13:36:59\" },\n\t{ \"post_id\": 5988, \"topic_id\": 1373, \"forum_id\": 9, \"post_subject\": \"Re: Unable to log in to ECL IDE\", \"username\": \"bforeman\", \"post_text\": \"Hi Limian,\\n\\nTry creating a brand new configuration. If that doesn't work, there is one more thing that we can try, you may have something locked in your registry. Let me check with development.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-06-30 13:28:48\" },\n\t{ \"post_id\": 5987, \"topic_id\": 1373, \"forum_id\": 9, \"post_subject\": \"Re: Unable to log in to ECL IDE\", \"username\": \"fanglimian\", \"post_text\": \"Hi Bob,\\n\\nthanks for the reply!\\nYes I tried uninstall and re-install IDE this morning, and it still does not work...\\nI don't think it is the cluster as my other colleague are all using that cluster and they are all able to log in. And I tried my user id on my colleagues computer and I can log in too. It must be something about the configuration on my computer. Is there anyone I can call or email to so they can take a look at my issue?\\n\\nThanks!\\n\\nLimian\", \"post_time\": \"2014-06-30 13:24:09\" },\n\t{ \"post_id\": 5985, \"topic_id\": 1373, \"forum_id\": 9, \"post_subject\": \"Re: Unable to log in to ECL IDE\", \"username\": \"bforeman\", \"post_text\": \"Hi Limian,\\n\\nThat's a new one to me. Try reinstalling the ECL IDE on your machine if you continue to have problems. It might have been a problem on your target cluster. Can you connect to other clusters, like our training cluster?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-06-30 07:54:47\" },\n\t{ \"post_id\": 5978, \"topic_id\": 1373, \"forum_id\": 9, \"post_subject\": \"Unable to log in to ECL IDE\", \"username\": \"fanglimian\", \"post_text\": \"Hi ,\\n\\nI have been not able to log in to ECL IDE today. I had the below errors after I entered my password (and it was not responding for a few minutes, then the error message appeared):\\n\\n-1 Error -1 fault : SOAP-ENV: Client [no subcode]\\n*End of file or no input: The system cannot find the file.\\n\\nAnyone knows what is going on and how I can fix it?\\n\\nThank you!!\", \"post_time\": \"2014-06-27 18:34:27\" },\n\t{ \"post_id\": 6063, \"topic_id\": 1391, \"forum_id\": 9, \"post_subject\": \"Re: ecl command line SOAP error\", \"username\": \"jeroenbaas\", \"post_text\": \"Thanks. Is there an old command line tool that can be used with this version?\", \"post_time\": \"2014-07-18 06:12:11\" },\n\t{ \"post_id\": 6060, \"topic_id\": 1391, \"forum_id\": 9, \"post_subject\": \"Re: ecl command line SOAP error\", \"username\": \"anthony.fishbeck\", \"post_text\": \"That's a really old pre-oss server, it's incompatible with the oss command line tools.\", \"post_time\": \"2014-07-17 15:14:26\" },\n\t{ \"post_id\": 6059, \"topic_id\": 1391, \"forum_id\": 9, \"post_subject\": \"Re: ecl command line SOAP error\", \"username\": \"jeroenbaas\", \"post_text\": \"Thanks; how can I find out what version is running?\\nAll I see is build_0702_79_gentoo64_linux\", \"post_time\": \"2014-07-17 14:59:13\" },\n\t{ \"post_id\": 6058, \"topic_id\": 1391, \"forum_id\": 9, \"post_subject\": \"Re: ecl command line SOAP error\", \"username\": \"anthony.fishbeck\", \"post_text\": \"That error is coming from the server. Which software version is running on the ESP/EclWatch?\", \"post_time\": \"2014-07-17 13:57:21\" },\n\t{ \"post_id\": 6057, \"topic_id\": 1391, \"forum_id\": 9, \"post_subject\": \"ecl command line SOAP error\", \"username\": \"jeroenbaas\", \"post_text\": \"Hi, sorry if I misplace my question, not sure if this is the right subsection, but here goes;\\n\\nI have the following test.ecl file:\\n
OUTPUT('hello world!');
\\n\\nI installed the ecl client on CentOS: \\nHPCC Client Tools for Centos5/RHEL5 Release Date: 07/14/2014 CentOS 64bit\\t15.047 MB \\t5.0.0-2\\n\\nAll fine and sweet. Tried to run the code using:\\n\\n$ ecl run -s=<IP> -u=<USER> -pw=<PASS> --target=<thor cluster name> --port=<ECL watch port> --name=mytestrun test.ecl\\n
\\n\\nand got:\\nSOAP server error[HTTP Status 500 Internal Server ErrorSOAP fault: string=[400: Bad Request [Method WUDeployWorkunit not available in service WsWorkunits]] . detail=<Exceptions xmlns="http://webservices.seisint.com/WsWorkunits" xsi:schemaLocation="http://webservices.seisint.com/WsWorkunits http://<IP>:<PORT>/WsWorkunits/?xsd">><Source>Esp</Source><Exception><Code>400</Code><Audience>user</Audience><Message>Bad Request [Method WUDeployWorkunit not available in service WsWorkunits]</Message></Exception></Exceptions>.]
\", \"post_time\": \"2014-07-17 12:15:18\" },\n\t{ \"post_id\": 6597, \"topic_id\": 1395, \"forum_id\": 9, \"post_subject\": \"Re: Recursive find in repository\", \"username\": \"maya11\", \"post_text\": \"The IDE crashes in the repository have been reported fixed in HPCC 5.0. If you still see them in 5.0, you should open an issue in the Community Issue Tracker and then post the crash logs.\\n\\n\\n\\n_________________\\nSign up with 9L0-412 ccna for getting Testking.net online testking courses artinstitutesmiami and www.tabor.edu We also provide best Florida Institute of Technology and Johnson University Florida with guaranteed success.\", \"post_time\": \"2014-11-17 04:43:20\" },\n\t{ \"post_id\": 6144, \"topic_id\": 1395, \"forum_id\": 9, \"post_subject\": \"Re: Recursive find in repository\", \"username\": \"bforeman\", \"post_text\": \"My tests show that recursive searches are working.\", \"post_time\": \"2014-07-31 12:40:13\" },\n\t{ \"post_id\": 6072, \"topic_id\": 1395, \"forum_id\": 9, \"post_subject\": \"Recursive find in repository\", \"username\": \"aintnomyth\", \"post_text\": \"Is it possible to make the "Find" feature perform a recursive search within sub directories?\", \"post_time\": \"2014-07-21 14:25:25\" },\n\t{ \"post_id\": 6115, \"topic_id\": 1398, \"forum_id\": 9, \"post_subject\": \"Re: WARNING: SOAP 1.1 fault: SOAP-ENV:Client[no subcode]\", \"username\": \"gsmith\", \"post_text\": \"[quote="aintnomyth":1jaxgl2o]\\nAlso, and possibly related, the IDE crashes frequently when I expand a folder in the Repository window.\\n\\nAny ideas?\\n\\nI am getting a report that one person is getting the "Expand a folder" crash _only_ if they are running "Snagit" - I don't suppose you happen to be running Snagit (or some other screen capture/monitor type application)?\\n\\n(Unfortunately I have been unable to reproduce on my machine making it near impossible to diagnose)\", \"post_time\": \"2014-07-28 18:43:49\" },\n\t{ \"post_id\": 6114, \"topic_id\": 1398, \"forum_id\": 9, \"post_subject\": \"Re: WARNING: SOAP 1.1 fault: SOAP-ENV:Client[no subcode]\", \"username\": \"gsmith\", \"post_text\": \"[quote="aintnomyth":2hx5q7re]\\nAlso, and possibly related, the IDE crashes frequently when I expand a folder in the Repository window.\\n\\nAny ideas?\", \"post_time\": \"2014-07-28 18:40:58\" },\n\t{ \"post_id\": 6096, \"topic_id\": 1398, \"forum_id\": 9, \"post_subject\": \"Re: WARNING: SOAP 1.1 fault: SOAP-ENV:Client[no subcode]\", \"username\": \"bforeman\", \"post_text\": \"The IDE crashes in the repository have been reported fixed in HPCC 5.0. If you still see them in 5.0, you should open an issue in the Community Issue Tracker and then post the crash logs.\\n\\nI've been told that the SOAP Warning you mention is harmless. During training, I will have the students simply close the Error Log window as these warnings can sometimes be a distraction. It also leaves more room for the more important Syntax Errors \\n\\nAs in ECL, warnings are more informational and actual errors are more important.\\n\\nI have passed this on to the ECL IDE developer.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-07-22 15:36:31\" },\n\t{ \"post_id\": 6094, \"topic_id\": 1398, \"forum_id\": 9, \"post_subject\": \"WARNING: SOAP 1.1 fault: SOAP-ENV:Client[no subcode]\", \"username\": \"aintnomyth\", \"post_text\": \"I'm seeing an odd warning in the Error Log as soon as I login. \\n\\n
WARNING: SOAP 1.1 fault: SOAP-ENV:Client[no subcode]\\n"An HTTP processing error occurred"\\nDetail: [no detail]\\nRepository.cpp(345)
\\n\\nI'm currently using version community_5.0.0-1 with the 5.0 compiler and VM, but I had the same issue when I used version 4.2.2.\\n\\nIn the ECL IDE Preferences\\nServer: 192.168.88.129 (SSL and Adavanced are unchecked)\\nCompiler: Default compiler points to C:\\\\Program Files (x86)\\\\HPCCSystems\\\\5.0.0\\\\clienttools\\\\bin\\\\eclcc.exe (tried clicking Reset a few times)\\n
\\nI'm using VMware Player 6.0.1 build-1379776. The host is Windows 7 Enterprise 64-bit, SP1. The VM settings are:\\nMemory: 4GB\\nProcessors: 2\\nHDD: 80 GB\\nCD/DVD: Auto Detect\\nNetwork Adapter: Host-only (it was NAT by default but I changed it to Host-only, didn't help)\\nNetwork Adapter 2: Host-only\\nDisplay: Auto Detect
\\n\\nAlso, and possibly related, the IDE crashes frequently when I expand a folder in the Repository window.\\n\\nAny ideas?\", \"post_time\": \"2014-07-22 13:23:03\" },\n\t{ \"post_id\": 6289, \"topic_id\": 1439, \"forum_id\": 9, \"post_subject\": \"Re: Issue connecting to VM Box version from ECL IDE\", \"username\": \"billwrightpwa\", \"post_text\": \"Thanks - I guess I'm going blind - \\nI had other issues including not having adapter2 set up and I guess in all my reconfirming and installing i dropped a digit.\\n\\nThanks again!\", \"post_time\": \"2014-09-10 14:45:40\" },\n\t{ \"post_id\": 6287, \"topic_id\": 1439, \"forum_id\": 9, \"post_subject\": \"Re: Issue connecting to VM Box version from ECL IDE\", \"username\": \"david.wheelock\", \"post_text\": \"Looks like your second octet is missing a "1" in front of the "68" in the IDE.\", \"post_time\": \"2014-09-10 14:05:01\" },\n\t{ \"post_id\": 6285, \"topic_id\": 1439, \"forum_id\": 9, \"post_subject\": \"Re: Issue connecting to VM Box version from ECL IDE\", \"username\": \"billwrightpwa\", \"post_text\": \"I have attached the screenshots of the issues.\\nIf you need anything else please let me know and thanks for your time.\\n[attachment=0:2fe6abq4]ecl-ide-issue.jpg\", \"post_time\": \"2014-09-10 13:21:57\" },\n\t{ \"post_id\": 6284, \"topic_id\": 1439, \"forum_id\": 9, \"post_subject\": \"Re: Issue connecting to VM Box version from ECL IDE\", \"username\": \"gsmith\", \"post_text\": \"It really should be just a simple matter of entering the IP address into the preferences page.\\n\\nDouble check that you only entered the IP and no port number etc.\\n\\nIf you can send a screen shot of the working web page and of the not working preferences I will see if I can spot anything.\", \"post_time\": \"2014-09-10 06:04:55\" },\n\t{ \"post_id\": 6283, \"topic_id\": 1439, \"forum_id\": 9, \"post_subject\": \"Issue connecting to VM Box version from ECL IDE\", \"username\": \"billwrightpwa\", \"post_text\": \"Issue with IDE not connecting to host server (Oracle VM Box version).\\nI can get to the ECL watch page from a browser (192.168.56.101:8010), but the IDE cannot log in. I get a '1003: Unable to communicate with the server' error\\n\\nThe DHCP for the VM indicates that it is set for the lowest bound address (192.168.56.101) and it is attached to Host-only Adapter and name is 'VirtualBox Host-Only Etghernet Adapter'\\n\\nHPCC Vm version is amd64-5.0.0-3 and the IDE is community_5.0.0-1\\nThis is all running on Windows 7\\n\\nAny help would be appreciated.\", \"post_time\": \"2014-09-09 21:02:57\" },\n\t{ \"post_id\": 6310, \"topic_id\": 1445, \"forum_id\": 9, \"post_subject\": \"Re: How do I view code and results side by side?\", \"username\": \"Richard_Wyant\", \"post_text\": \"That answers my question. Thanks.\", \"post_time\": \"2014-09-15 20:00:46\" },\n\t{ \"post_id\": 6309, \"topic_id\": 1445, \"forum_id\": 9, \"post_subject\": \"Re: How do I view code and results side by side?\", \"username\": \"rtaylor\", \"post_text\": \"That is correct.\\n\\nYou could try the ECL plugin for Eclipse -- maybe Eclipse will do it the same way Toad does (or maybe not, I don't know).\\n\\nThe way I see it, having the ECL Watch page open at all times (for the environment you're working in) is simply "best practice" for all ECL programmers.\", \"post_time\": \"2014-09-15 19:46:00\" },\n\t{ \"post_id\": 6308, \"topic_id\": 1445, \"forum_id\": 9, \"post_subject\": \"Re: How do I view code and results side by side?\", \"username\": \"Richard_Wyant\", \"post_text\": \"So If I'm understanding this correctly, there's no way to do this in the IDE. I have to open up ECL Watch and do it there. Is that correct?\", \"post_time\": \"2014-09-15 19:29:01\" },\n\t{ \"post_id\": 6307, \"topic_id\": 1445, \"forum_id\": 9, \"post_subject\": \"Re: How do I view code and results side by side?\", \"username\": \"rtaylor\", \"post_text\": \"In your 4.2.8 ECL Watch page, click on the "ECL Watch" link under the Tech Preview section. That will open a new tab with the 5.0 version of ECL Watch. Then find your WU, open it and go to the Playground tab ...\", \"post_time\": \"2014-09-15 18:56:14\" },\n\t{ \"post_id\": 6306, \"topic_id\": 1445, \"forum_id\": 9, \"post_subject\": \"Re: How do I view code and results side by side?\", \"username\": \"Richard_Wyant\", \"post_text\": \"Is there a way to do this in the IDE? The version of ECL Watch we're on is 4.2.8-1.\", \"post_time\": \"2014-09-15 18:50:46\" },\n\t{ \"post_id\": 6305, \"topic_id\": 1445, \"forum_id\": 9, \"post_subject\": \"Re: How do I view code and results side by side?\", \"username\": \"rtaylor\", \"post_text\": \"Richard,\\n\\nIn the 5.0 release, if you open the workunit in ECL Watch then clickon the "Playground" tab, you will see the code in the top half of the window and the resultsin the lower half.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-09-15 18:41:40\" },\n\t{ \"post_id\": 6304, \"topic_id\": 1445, \"forum_id\": 9, \"post_subject\": \"How do I view code and results side by side?\", \"username\": \"Richard_Wyant\", \"post_text\": \"In Toad, I can see the result of an SQL query and the query itself side by side. How can I do the same thing in ECL IDE?\", \"post_time\": \"2014-09-15 18:34:35\" },\n\t{ \"post_id\": 6326, \"topic_id\": 1448, \"forum_id\": 9, \"post_subject\": \"Re: How can I open ECL in stable builder windows?\", \"username\": \"rtaylor\", \"post_text\": \"Richard,\\n\\nWhat version of the IDE are you using and what version of the environment are you attaching to?\\n\\nRichard\", \"post_time\": \"2014-09-18 18:08:45\" },\n\t{ \"post_id\": 6322, \"topic_id\": 1448, \"forum_id\": 9, \"post_subject\": \"How can I open ECL in stable builder windows?\", \"username\": \"Richard_Wyant\", \"post_text\": \"Opening a file by double clicking opens it in an edit window and renames the file to builder_xxxx.ecl where xxxx is some number. I have to right click the file and use the context menu to open it in a builder.\\n\\nAt this point, I can edit and save just fine. But if I close the IDE or it crashes and I still have that builder window open, the builder window will lose its relationship with the original file I was editing. When I open ECL, edit it, and try to save again I get this a Save As dialog box. To get around this, I have to:\\n\\n•\\tre-open the original file in another builder or edit window\\n•\\tcopy the entire thing and paste it into the re-opened file\\n•\\tsave it\\n•\\tclose the original builder window\\n\\nIs there a way to do the following:\\n1. Have ecl files open in builder windows by default?\\n2. Preserve the relationship between a builder window and the file it has open between ECL sessions?\\n\\nI'd post a word doc or pictures if that helps, but I get a message saying they are not allowed on this forum. I hope I've provided enough info.\\n\\nThanks.\", \"post_time\": \"2014-09-18 16:01:33\" },\n\t{ \"post_id\": 6331, \"topic_id\": 1451, \"forum_id\": 9, \"post_subject\": \"Re: How can I *not* see everyone else's workunits?\", \"username\": \"gsmith\", \"post_text\": \"This was a regression in the 5.0 release and has been resolved for 5.0.2.\", \"post_time\": \"2014-09-18 21:02:50\" },\n\t{ \"post_id\": 6325, \"topic_id\": 1451, \"forum_id\": 9, \"post_subject\": \"How can I *not* see everyone else's workunits?\", \"username\": \"Richard_Wyant\", \"post_text\": \"Sometimes when I open a new builder window, I'll see workunits I didn't submit. I tried to shut them out by View tab > Workunit Find, checking User and putting in my name. I'm still seeing other people's workunits and I don't want to. Is there another place I should check for this option? What could I be doing wrong? Thanks.\", \"post_time\": \"2014-09-18 17:58:03\" },\n\t{ \"post_id\": 6495, \"topic_id\": 1466, \"forum_id\": 9, \"post_subject\": \"Re: Pause vs Abort in ESP\", \"username\": \"bforeman\", \"post_text\": \"It will allow other jobs to complete, but your job will not release any resources, it is just paused or suspended. Abort will release resources.\", \"post_time\": \"2014-10-24 12:00:25\" },\n\t{ \"post_id\": 6494, \"topic_id\": 1466, \"forum_id\": 9, \"post_subject\": \"Re: Pause vs Abort in ESP\", \"username\": \"anwaar ali\", \"post_text\": \"I have been running a query for quite some time on a thor. I see a lot of queries queued in that thor. I would like to know if pausing my query will release the resources that it was using in that thor so that the other queries could use those resources (I intend to resume my query later when the thor is free)\\n\\n\\n\\n\\n\\n\\n\\n________________\\n\\nanwaar ali\", \"post_time\": \"2014-10-24 05:21:01\" },\n\t{ \"post_id\": 6402, \"topic_id\": 1466, \"forum_id\": 9, \"post_subject\": \"Re: Pause vs Abort in ESP\", \"username\": \"vikram\", \"post_text\": \"Thanks Bob!\", \"post_time\": \"2014-10-03 13:55:30\" },\n\t{ \"post_id\": 6394, \"topic_id\": 1466, \"forum_id\": 9, \"post_subject\": \"Re: Pause vs Abort in ESP\", \"username\": \"bforeman\", \"post_text\": \"I don't think Pause will free Resources, it will simply stop your job and allow others to complete. \\n\\nA better practice would be to add PERSIST to key places in your job, and then if you have to Abort, restarting the job will pick up from the last valid PERSIST.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-10-02 12:42:22\" },\n\t{ \"post_id\": 6392, \"topic_id\": 1466, \"forum_id\": 9, \"post_subject\": \"Pause vs Abort in ESP\", \"username\": \"vikram\", \"post_text\": \"Hi,\\n\\n I have been running a query for quite some time on a thor. I see a lot of queries queued in that thor. I would like to know if pausing my query will release the resources that it was using in that thor so that the other queries could use those resources (I intend to resume my query later when the thor is free)\\n\\nThank you,\\nVikram\", \"post_time\": \"2014-10-01 21:21:17\" },\n\t{ \"post_id\": 6534, \"topic_id\": 1493, \"forum_id\": 9, \"post_subject\": \"Re: Excel Number format Issue while downloading result in EC\", \"username\": \"pius_francis\", \"post_text\": \"Smith , i have tried that part. The scenario i have shared is a sample one. But we have different number format for same column for the real Use Case.Only if we are able to avoid the formatting change when it goes into excel or csv it will be useful. \\n\\nThanks\\nPius\", \"post_time\": \"2014-10-30 10:44:46\" },\n\t{ \"post_id\": 6533, \"topic_id\": 1493, \"forum_id\": 9, \"post_subject\": \"Re: Excel Number format Issue while downloading result in EC\", \"username\": \"gsmith\", \"post_text\": \"In the IDE you can save a result as CSV - then in Excel, click on the DATA tab and import the CSV, during that process you can specify the data type for each column...\", \"post_time\": \"2014-10-30 10:33:16\" },\n\t{ \"post_id\": 6532, \"topic_id\": 1493, \"forum_id\": 9, \"post_subject\": \"Re: Excel Number format Issue while downloading result in EC\", \"username\": \"pius_francis\", \"post_text\": \"Thanks for the reply.\\n\\nYeah Smith , in xml we will get .15 only. Is this an issue with ECL download or can we have any work around like specifying the number format for excel while downloading ?\", \"post_time\": \"2014-10-30 10:07:13\" },\n\t{ \"post_id\": 6531, \"topic_id\": 1493, \"forum_id\": 9, \"post_subject\": \"Re: Excel Number format Issue while downloading result in EC\", \"username\": \"gsmith\", \"post_text\": \"How interesting...\\n\\nI just opened the raw xls file in a text editor and it looks like it is Excel that is adding the "0":\\n\\n<html xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xs="http://www.w3.org/2001/XMLSchema">\\n <head>\\n <META http-equiv="Content-Type" content="text/html; charset=UTF-8">\\n <meta http-equiv="Content-Type" content="text/html; charset=utf-8">\\n <title>Result </title>\\n </head>\\n <body class="yui-skin-sam">\\n <table xmlns:msxsl="urn:schemas-microsoft-com:xslt" class="results" cellspacing="0" frame="box" rules="all">\\n <thead>\\n <tr valign="bottom">\\n <th rowspan="1">text</th>\\n </tr>\\n </thead>\\n <tr>\\n <td>.15</td>\\n </tr>\\n </table>\\n </body>\\n</html>
\", \"post_time\": \"2014-10-30 09:56:20\" },\n\t{ \"post_id\": 6530, \"topic_id\": 1493, \"forum_id\": 9, \"post_subject\": \"Excel Number format Issue while downloading result in EC\", \"username\": \"pius_francis\", \"post_text\": \"Hi ,\\n I am encountering an issue with number format in excel for the results downloaded from ECL watch. PFB, the example.\\n//Start of code\\n layout := RECORD\\n STRING text;\\n END;\\n\\n output := DATASET(['.15'],layout); \\n output;\\n//End of code\\nWhile downlaoding the result from ECL as excel the value .15 is being stored as 0.15. Is there is workaroud to avoid it?\\n\\nPlease help me out regarding the same.\", \"post_time\": \"2014-10-30 09:39:44\" },\n\t{ \"post_id\": 6767, \"topic_id\": 1554, \"forum_id\": 9, \"post_subject\": \"Re: [Tut] fpos and __fileposition__ are different\", \"username\": \"NSD\", \"post_text\": \"thank you. ill work through more tutorials \\n\\nin my case i will only work with CSV's. as i remeber right, i hadn't set up a record length.\\n\\nfor anybody, who's interested how it works:\\n\\nhttp://www.unix.com/unix-for-dummies-qu ... -file.html\", \"post_time\": \"2015-01-08 15:51:45\" },\n\t{ \"post_id\": 6764, \"topic_id\": 1554, \"forum_id\": 9, \"post_subject\": \"Re: [Tut] fpos and __fileposition__ are different\", \"username\": \"rtaylor\", \"post_text\": \"NSD,\\n\\nYou're confusing the "Persons" file from our Introduction to ECL online eLearning course (155-byte records) with the "OriginalPerson" file from the Data Tutorial downloadable PDF (which has 124-byte records).
how do i know the exact Record Length of a File?
The answer to this is the same as in any other language/development platform -- you either are given that information from the data provider, or you need to empirically determine it for yourself. \\n\\nIn the case of our ECL teaching materials, we will always give you enough information up front to be able to work with the file. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-01-08 14:42:58\" },\n\t{ \"post_id\": 6761, \"topic_id\": 1554, \"forum_id\": 9, \"post_subject\": \"Re: [Tut] fpos and __fileposition__ are different\", \"username\": \"NSD\", \"post_text\": \"Ive fixed this:\\n\\nIn a previous tutorial, there was a Record Length of 155 of the OriginalPerson File, now it is 124. My question is now: how do i know the exact Record Length of a File?\", \"post_time\": \"2015-01-08 12:02:05\" },\n\t{ \"post_id\": 6759, \"topic_id\": 1554, \"forum_id\": 9, \"post_subject\": \"[Tut] fpos and __fileposition__ are different\", \"username\": \"NSD\", \"post_text\": \"Hi,\\n\\nI worked through the Tutorial 'till "Index the Data", now i recognized, that fpos != __fileposition__ ? Are they not the same?\\n\\n\\n\\nDataset_PrepareIndex_Zip\\nImport Tutorial;\\n\\nEXPORT Dataset_PrepareIndex_Zip := \\n\\t\\tDATASET\\t('~tutorial::myname::upperperson',\\n\\t\\t\\t\\t\\t\\t{Tutorial.Layout_People, UNSIGNED8 fpos {virtual(fileposition)}},\\n\\t\\t\\t\\t\\t\\tTHOR);
\\n\\n\\nProcess_AllPeopleToUpper\\nIMPORT Tutorial, STD;\\t\\t//Std = Standard Library\\n\\n\\tTutorial.Layout_People AllPeopleToUpper(Tutorial.Layout_People rawInput) := \\n\\t\\tTRANSFORM\\n\\t\\t\\tSELF.FirstName := STD.Str.ToUpperCase(rawInput.FirstName);\\n\\t\\t\\tSELF.LastName := STD.Str.ToUpperCase(rawInput.LastName);\\n\\t\\t\\tSELF.MiddleName := STD.Str.ToUpperCase(rawInput.MiddleName);\\n\\t\\t\\tSELF.Zip := rawInput.Zip;\\n\\t\\t\\tSELF.Street := rawInput.Street;\\n\\t\\t\\tSELF.City := rawInput.City;\\n\\t\\t\\tSELF.State := rawInput.State;\\n\\t\\tEND;\\n\\t\\t\\nOrigDataset := Tutorial.Dataset_OriginalPerson;\\nNewDataset := PROJECT(OrigDataset,AllPeopleToUpper(RIGHT));\\nOUTPUT(NewDataset,,'~tutorial::myname::UpperPerson',OVERWRITE);
\", \"post_time\": \"2015-01-07 19:05:51\" },\n\t{ \"post_id\": 6766, \"topic_id\": 1555, \"forum_id\": 9, \"post_subject\": \"Re: Autocomplete stops at level-1\", \"username\": \"NSD\", \"post_text\": \"thank you, found the ECL-Code for those libs.\", \"post_time\": \"2015-01-08 15:49:33\" },\n\t{ \"post_id\": 6765, \"topic_id\": 1555, \"forum_id\": 9, \"post_subject\": \"Re: Autocomplete stops at level-1\", \"username\": \"rtaylor\", \"post_text\": \"NSD,\\n\\nNotice that if you choose System from your STD. droplist that the auto-complete DOES show you the next level below that. That's because System is a directory containing files (as you can see in the Repository tree -- STD is under "ecllibrary"), so it shows you the list of those files from which to choose. \\n\\nThe rest are all just .ECL files containing function prototypes for use in your ECL code. What the auto-complete feature does not yet do is parse through those files to present you with a list of the EXPORT definitions from the file. That's what the JIRA feature request ticket Bob mentioned is all about. \\n\\nIf you want to know what Standard Library functions are available, you can look in the Standard Library Reference (complete in the F1 help file, or downloadable as a separate PDF). You can also look in the code files themselves in the Repository tree. Most of the functions in there do have javadoc descriptions along with the code.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-01-08 14:59:41\" },\n\t{ \"post_id\": 6762, \"topic_id\": 1555, \"forum_id\": 9, \"post_subject\": \"Re: Autocomplete stops at level-1\", \"username\": \"bforeman\", \"post_text\": \"This is a known issue and logged in the Community Issue Tracker:\\n\\nhttps://track.hpccsystems.com/browse/EPE-38\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-01-08 13:49:50\" },\n\t{ \"post_id\": 6760, \"topic_id\": 1555, \"forum_id\": 9, \"post_subject\": \"Autocomplete stops at level-1\", \"username\": \"NSD\", \"post_text\": \"Hi,\\n\\ni worked through the Tutorial and recognized, that the IDE's autocomplete function stops at the first layer.\\n\\nI type\\nSTD.
\\nthe autocomplete function correctly displays now Audit, BundleBase, ...\\n if i choose one of them and type the typical 'dot' i wont get any further information, such as STD.Str. [...] (no autocomplete appears here). \\n\\nIs this a Bug or a configuration problem? thx!\", \"post_time\": \"2015-01-07 19:10:34\" },\n\t{ \"post_id\": 6788, \"topic_id\": 1562, \"forum_id\": 9, \"post_subject\": \"Folder is not showing up in IDE repositories\", \"username\": \"tlhumphrey2\", \"post_text\": \"I'm using the internal cloud and have a folder on my machine that I've added to ECL Folders under the compiler tab of the IDE. Its path is c:\\\\Users\\\\humphrtl\\\\Documents\\\\HPCCSystems\\\\ECL\\\\MyFiles
. I expect to see MyFiles as a blue colored folder in the list of repositories. But it isn't showing-up.\\n\\nThe version of HPCC is community_4.2.0-1.\", \"post_time\": \"2015-01-14 14:21:37\" },\n\t{ \"post_id\": 6866, \"topic_id\": 1582, \"forum_id\": 9, \"post_subject\": \"Re: STD vs lib_fileservices\", \"username\": \"georgeb2d\", \"post_text\": \"Thanks for your quick and informative response.\\n\\nDon\", \"post_time\": \"2015-01-27 21:29:04\" },\n\t{ \"post_id\": 6865, \"topic_id\": 1582, \"forum_id\": 9, \"post_subject\": \"Re: STD vs lib_fileservices\", \"username\": \"rtaylor\", \"post_text\": \"Don,Question 1: Why don't I see a STD library?
The Standard Library is only in Open Source environments, not legacy pre-OSS environments. \\n\\nOur ECL classes are taught using Open Source environments because the legacy system is deprecated and will be discontinued at some point.Question 2. Is this the same thing?
Yes, it is the same function.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-01-27 20:08:29\" },\n\t{ \"post_id\": 6864, \"topic_id\": 1582, \"forum_id\": 9, \"post_subject\": \"STD vs lib_fileservices\", \"username\": \"georgeb2d\", \"post_text\": \"Hello,\\n\\nThe code I am looking at calls FileServices.RemoteDirectory.\\n\\nIn class I was introduced to the STD libraries. Now on the Alpha_Dev_Thor repository I find no STD library. However, I have found lib_fileservices. It does have a FileServices.RemoteDirectory. \\n \\nQuestion 1: Why don't I see a STD library? Is this only with the Version 5?\\n\\nIn the documentation there is a STD.RemoteDirectory. It is like this:\\nSTD.File.RemoteDirectory( machineIP, directory [ , mask ][ , includesubs ] )\\n\\nIn lib_fileservices there is the following:\\ndataset(FsFilenameRecord) RemoteDirectory(const varstring machineIP,const varstring dir,const varstring mask='*',boolean sub=false) : c,entrypoint='fsRemoteDirectory';\\n\\nQuestion 2. Is this the same thing? It looks very similar, but appears to be calling a .so program if I am reading the documentation properly. If not, where is the STD library? \\n\\nThanks,\\nDon Baggett\", \"post_time\": \"2015-01-27 19:51:49\" },\n\t{ \"post_id\": 7028, \"topic_id\": 1611, \"forum_id\": 9, \"post_subject\": \"Re: How to SprayFixed in ECL IDE\", \"username\": \"rtaylor\", \"post_text\": \"vinothkumar,\\n\\nI go the same error yesterday using DFUplus.exe to do a spray. The problem was fixed in my case by making the full path to the file to spray completely explicit in stead of trying to use relative pathing (as I see you are doing, too).\\n\\nTry it something like this:STD.File.SprayFixed( 'http://192.168.83.135:8010' , 'c$/myfullpath/onlinepeople' , 82, 'mythor', '~VINO::MyFile' , -1, 'http://192.168.83.135:8010/FileSpray' );
\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-02-26 15:14:45\" },\n\t{ \"post_id\": 7026, \"topic_id\": 1611, \"forum_id\": 9, \"post_subject\": \"Re: How to SprayFixed in ECL IDE\", \"username\": \"bforeman\", \"post_text\": \"Can you spray this file using the ECL Watch interface? The file is OnlinePeople, is it possible that you haven't extracted it from the zip download?\\n\\nThis code on my training cluster works:\\n\\nIMPORT STD;\\n\\nSrcIP := '10.173.248.1';\\nSrcPath := '//10.173.248.1/mnt/disk1/var/lib/HPCCSystems/dropzone/';\\nInitials := 'BMF'; \\n\\n\\t\\t\\t\\t\\n//************ Spray Advanced ECL Class Files *******************************\\n STD.File.SprayFixed(SrcIP,SrcPath + 'OnlinePeople',82,'mythor', \\n \\t\\t\\t\\t\\t\\t'~CLASS::' + Initials + '::AdvECL::People2',,,,true,true);
\\n\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-02-26 13:09:58\" },\n\t{ \"post_id\": 7025, \"topic_id\": 1611, \"forum_id\": 9, \"post_subject\": \"How to SprayFixed in ECL IDE\", \"username\": \"vinomra\", \"post_text\": \"Hi,\\n\\nI got a error during spray fixed in ECL IDE. Let me know if you have any soluions.\\nthis is my spray text \\n\\nSTD.File.SprayFixed( 'http://192.168.83.135:8010' , '/onlinepeople' , 82, 'mythor', '~VINO::MyFile' , -1, 'http://192.168.83.135:8010/FileSpray' );\\n\\nError: System error: 0: DFUServer Error Failed: DFUWU: cannot determine endpoint for part file (0, 0), 0, \\n\\n\\nThanks,\\nvinothkumar\", \"post_time\": \"2015-02-26 10:17:52\" },\n\t{ \"post_id\": 7068, \"topic_id\": 1614, \"forum_id\": 9, \"post_subject\": \"Re: Comparison between two repositories\", \"username\": \"georgeb2d\", \"post_text\": \"Thanks.\", \"post_time\": \"2015-03-03 18:24:38\" },\n\t{ \"post_id\": 7061, \"topic_id\": 1614, \"forum_id\": 9, \"post_subject\": \"Re: Comparison between two repositories\", \"username\": \"rtaylor\", \"post_text\": \"That depends on whether you're using the Open Source Community Edition with its local repositories, or if you have a pre-OSS-style legacy system with a central repository.\\n\\nIf the first, then any file compare/directory compare utility should work. If the second, then look at the AMT.exe tool.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-03-03 15:50:40\" },\n\t{ \"post_id\": 7059, \"topic_id\": 1614, \"forum_id\": 9, \"post_subject\": \"Comparison between two repositories\", \"username\": \"georgeb2d\", \"post_text\": \"Is there an easy way to compare code in Production vs code in Development, i.e., two different repositories?\", \"post_time\": \"2015-03-03 13:57:04\" },\n\t{ \"post_id\": 7094, \"topic_id\": 1622, \"forum_id\": 9, \"post_subject\": \"Re: New to ECL,Please help!\", \"username\": \"tlhumphrey2\", \"post_text\": \"Would you attach your ECL code, please.\", \"post_time\": \"2015-03-06 18:44:19\" },\n\t{ \"post_id\": 7093, \"topic_id\": 1622, \"forum_id\": 9, \"post_subject\": \"New to ECL,Please help!\", \"username\": \"sayan6\", \"post_text\": \"Please see the attached file.\\nI am getting this error log on the bottom screen.\", \"post_time\": \"2015-03-06 17:24:27\" },\n\t{ \"post_id\": 7124, \"topic_id\": 1632, \"forum_id\": 9, \"post_subject\": \"Re: Graph Control\", \"username\": \"David Dasher\", \"post_text\": \"Thanks both for the speedy response. \\n\\nDavid\", \"post_time\": \"2015-03-10 19:08:17\" },\n\t{ \"post_id\": 7123, \"topic_id\": 1632, \"forum_id\": 9, \"post_subject\": \"Re: Graph Control\", \"username\": \"gsmith\", \"post_text\": \"Its "supposed" to be the number of processed rows. The issue you have spotted has been resolved.\", \"post_time\": \"2015-03-10 17:47:32\" },\n\t{ \"post_id\": 7122, \"topic_id\": 1632, \"forum_id\": 9, \"post_subject\": \"Re: Graph Control\", \"username\": \"bforeman\", \"post_text\": \"Hi David,\\n\\nI'm checking with the developer, the 335215 looks to be the number of records processed by the JOIN. The number in brackets has to be either the index size of possibly the total records in the INDEX prior to filtering by the JOIN. Checking now!\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-03-10 17:28:29\" },\n\t{ \"post_id\": 7121, \"topic_id\": 1632, \"forum_id\": 9, \"post_subject\": \"Graph Control\", \"username\": \"David Dasher\", \"post_text\": \"Hi\\n\\nI'm trying to speed up a Query, I am looking at the Timers and associated graphs and making some progress. I am, however slightly confused by the attached image. What does the number inside the square brackets mean?\\n\\nThanks David\", \"post_time\": \"2015-03-10 16:42:23\" },\n\t{ \"post_id\": 7299, \"topic_id\": 1672, \"forum_id\": 9, \"post_subject\": \"Re: How to spray multiple delimited files using DFU command?\", \"username\": \"Abhishek_M04\", \"post_text\": \"Thanks Bob and Jim. Yes , it worked.\", \"post_time\": \"2015-04-10 09:59:10\" },\n\t{ \"post_id\": 7296, \"topic_id\": 1672, \"forum_id\": 9, \"post_subject\": \"Re: How to spray multiple delimited files using DFU command?\", \"username\": \"JimD\", \"post_text\": \"You can use DFU Plus to spray multiple files (using either wildcards or a comma-delimited list). But, this sprays to a single destination file. To spray many to many, you need one DFUPlus command for each file.\\n\\nYou can easily write a bash script or bat file to do this.\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2015-04-09 17:21:31\" },\n\t{ \"post_id\": 7294, \"topic_id\": 1672, \"forum_id\": 9, \"post_subject\": \"Re: How to spray multiple delimited files using DFU command?\", \"username\": \"bforeman\", \"post_text\": \"Hi Abishek,\\n\\nIn the ECL IDE, you can call the SprayVariable function successively to spray multiple times:\\nIMPORT STD;\\nSrcIP := '10.252.65.200';\\nSrcPath := '//10.252.65.200/var/lib/HPCCSystems/mydropzone/snap-d126c9ba/';\\nInitials := 'RT';\\n STD.File.SprayVariable(SrcIP,SrcPath + 'embeddedxmltimezones',,'','\\\\\\\\>','','mythor',\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t'~CLASS::' + Initials + '::IN::EmbeddedXMLtimezones',,,,true,true);\\t\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n STD.File.SprayVariable(SrcIP,SrcPath + 'imdb_movies',,'',,'','mythor',\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t'~CLASS::' + Initials + '::IN::imdb_movies',,,,true,true);\\t
\\n\\nI'm pretty sure with DFUPlus that you can spray multiple files by setting up your parameters in an INI file. Check out the associated documentation for more details.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-09 12:02:47\" },\n\t{ \"post_id\": 7292, \"topic_id\": 1672, \"forum_id\": 9, \"post_subject\": \"How to spray multiple delimited files using DFU command?\", \"username\": \"Abhishek_M04\", \"post_text\": \"Hi,\\n\\nCan anyone please tell me how to spray multiple delimited files from the Landing Zone to the THOR Cluster using DFU command from DFU Plus or ECL IDE if possible. This should spray all the files with same spray delimited options together in one shot. A reply would help.\\n\\n\\nThanks,\\nAbhishek.\", \"post_time\": \"2015-04-09 07:12:25\" },\n\t{ \"post_id\": 7571, \"topic_id\": 1675, \"forum_id\": 9, \"post_subject\": \"Regarding MAXLENGTH in RECORD Structure.\", \"username\": \"Abhishek_M04\", \"post_text\": \"Hi Team,\\n\\nI have a query while I run this below ECL code:\\n\\nMyRec := RECORD \\n STRING Name {MAXLENGTH(2)} ;\\nEND;\\n\\nds := DATASET( \\n [\\n\\t\\t {'Abhishek'},\\n\\t\\t {'Vishnu'}\\n\\t ],\\n\\t\\tMyRec\\n );\\n\\t\\t\\t\\t\\t\\t \\nds;\\t\\t\\n\\nI am getting the Output as :\\n\\nname\\n--------\\nAbhishek\\nVishnu\\n\\nSo, what does MAXLENGTH(2) does then. It can't stop after first displaying first 2 characters, then why should we use the same?\\n\\n\\nPlease provide a small example snippet so that I can understand what MAXLENGTH does.\\n\\nAlso, please give an example placing MAXLENGTH after RECORD , while defining.\\n\\nA reply early would help.\\n\\n\\n\\nRegards,\\nAbhi,\\n9962210131.\", \"post_time\": \"2015-05-11 10:09:19\" },\n\t{ \"post_id\": 7354, \"topic_id\": 1675, \"forum_id\": 9, \"post_subject\": \"Re: Spraying a file programatically into the Landing Zone\", \"username\": \"Abhishek_M04\", \"post_text\": \"Is there any update on this?\\n\\nRegards,\\nAbhishek.\", \"post_time\": \"2015-04-15 07:44:00\" },\n\t{ \"post_id\": 7323, \"topic_id\": 1675, \"forum_id\": 9, \"post_subject\": \"Re: Spraying a file programatically into the Landing Zone\", \"username\": \"Abhishek_M04\", \"post_text\": \"Hi Team,\\n\\nCan anyone please let me know how this can be done using FTP? I need urgently.\\n\\n\\nRegards,\\nAbhishek.\", \"post_time\": \"2015-04-13 05:23:02\" },\n\t{ \"post_id\": 7304, \"topic_id\": 1675, \"forum_id\": 9, \"post_subject\": \"Re: Spraying a file programatically into the Landing Zone\", \"username\": \"Abhishek_M04\", \"post_text\": \"Thanks Richard.\\n\\nIf anyone can provide some inputs without FTP , simply through ECL code, will be helpful.\\n\\n\\nRegards,\\nAbhishek.\", \"post_time\": \"2015-04-10 13:45:37\" },\n\t{ \"post_id\": 7302, \"topic_id\": 1675, \"forum_id\": 9, \"post_subject\": \"Re: Spraying a file programatically into the Landing Zone\", \"username\": \"rtaylor\", \"post_text\": \"Abhishek,Can anyone tell me whether there is a way to upload a file programatically to the landing zone.
Not that I am aware of. Most "automated" uploads to an LZ are done using FTP and whatever automation is available through whatever FTP program you use.\\n\\nHowever, you can use DFUplus.exe to spray straight to your Thor cluster, making whatever box you're running DFUplus.exe on a "temporary" LZ. The one possible hitch to this scenario is that it is possible to have firewall problems that preclude it working. Testing will quickly show whether it works in your security environment or not.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-04-10 12:40:51\" },\n\t{ \"post_id\": 7298, \"topic_id\": 1675, \"forum_id\": 9, \"post_subject\": \"Spraying a file programatically into the Landing Zone\", \"username\": \"Abhishek_M04\", \"post_text\": \"Hi Team,\\n\\nCan anyone tell me whether there is a way to upload a file programatically to the landing zone. That is I have a file in any of my local path which I want to Upload into the Landing Zone either through DFU Plus command or through ECL IDE Code.\\n\\nNote:- I don't want the ftp/scp option as I would need the login ID and password to \\n the machine.\\n\\nA reply would help.\\n\\n\\nRegards,\\nAbhishek.\", \"post_time\": \"2015-04-10 09:58:07\" },\n\t{ \"post_id\": 7426, \"topic_id\": 1692, \"forum_id\": 9, \"post_subject\": \"Re: How to make Regex stop at the first occurance of a strin\", \"username\": \"bforeman\", \"post_text\": \"Hi Abhishek,\\n\\nI am not exactly sure what you mean, can you attach some example code to demonstrate what you are trying to do?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-21 15:29:58\" },\n\t{ \"post_id\": 7421, \"topic_id\": 1692, \"forum_id\": 9, \"post_subject\": \"How to make Regex stop at the first occurance of a string.\", \"username\": \"Abhishek_M04\", \"post_text\": \"Hi Team,\\n\\nIn ECL , how can I make a regex stop at the first occurrence of a string to make it non-greedy?\\n\\nA reply soon would help.\\n\\n\\nRegards,\\nAbhishek.\", \"post_time\": \"2015-04-20 15:27:15\" },\n\t{ \"post_id\": 7649, \"topic_id\": 1740, \"forum_id\": 9, \"post_subject\": \"Re: Can't see Graph Control 5.2\", \"username\": \"David Dasher\", \"post_text\": \"Hi Bob\\n\\nMany thanks for the advice and the link to the video.\\n\\nI just tried on my home laptop and it's displaying so there must be something wrong with my office machine. I'll reinstall the IDE when I get back in.\\n\\nKind regards\\n\\nDavid\", \"post_time\": \"2015-05-24 19:30:01\" },\n\t{ \"post_id\": 7648, \"topic_id\": 1740, \"forum_id\": 9, \"post_subject\": \"Re: Can't see Graph Control 5.2\", \"username\": \"bforeman\", \"post_text\": \"Hi David,\\n\\nFrom the image, I see you are using the Graphs option from the ECL IDE. Try opening the ECL Watch tab and look at the Graph from there. You do have the ability to control how many processes and edges to display based on the complexity of the graph.\\n\\nThere is actually a pretty good video that we made a few months ago when the 5.x Graph Control was released. Here is the link to that video:\\n\\nhttp://hpccsystems.com/community/training-videos/ecl-watch-interface\\n\\nBest Regards,\\n\\nBob\", \"post_time\": \"2015-05-24 16:06:15\" },\n\t{ \"post_id\": 7642, \"topic_id\": 1740, \"forum_id\": 9, \"post_subject\": \"Can't see Graph Control 5.2\", \"username\": \"David Dasher\", \"post_text\": \"Hello\\n\\nI'm having trouble with some ECL code so I'm trying to use the graph control to help me debug the problem. My issue is that for small scripts the graph control is fine, however, this particular code has several sub functions and is quite complex. The graph control just shows a grey box. Image attached.\\n\\nIs there a limit to how much the control can show?\\n\\nKind regards\\n\\nDavid\", \"post_time\": \"2015-05-22 13:17:51\" },\n\t{ \"post_id\": 7784, \"topic_id\": 1771, \"forum_id\": 9, \"post_subject\": \"Re: Quote in dataset\", \"username\": \"elango_v\", \"post_text\": \"Yes. It is working now after providing the escape attribute. Thanks a lot for the info.\", \"post_time\": \"2015-06-17 13:27:54\" },\n\t{ \"post_id\": 7783, \"topic_id\": 1771, \"forum_id\": 9, \"post_subject\": \"Re: Quote in dataset\", \"username\": \"rtaylor\", \"post_text\": \"Try it this way:CSV( SEPARATOR(','),HEADING(1), QUOTE('"'), ESCAPE('\\\\'))
\\nThere is no default value for the ESCAPE character, so you need to explicitly specify it.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-06-17 13:17:39\" },\n\t{ \"post_id\": 7782, \"topic_id\": 1771, \"forum_id\": 9, \"post_subject\": \"Re: Quote in dataset\", \"username\": \"elango_v\", \"post_text\": \"Thanks for the update.I am facing a truncation issue in a column . there are 10 columns in a csv file. \\nbelow is one column value in the source -\\n"dsfklsdfk dskaflskdf as Jafar Dochin, 136 Chous, mumbai, formerly of 20 St Leonard Court, bombay 330000, 9 Cunzie Neuk, santacruize, Fife and 1 View Court, formerly t/a \\\\"Sofia’s\\\\" Fish Bar was sequestrated by The Accountant in Bankruptcy on 26 February 2009 and ambany, Accountant in Bankruptcy, has been appointed to act as trustee on the sequestrated estate. Any creditor of the debtor named above is invited to submit his statement of claim in the prescribed form, with any supporting accounts or vouchers, to Eileen Blackburn CA, French Duncan, the agent acting on behalf of the Accountant in Bankruptcy in this sequestration. For the purpose of formulating claims, creditors should note that the date of sequestration is 26 February 2009. ambani, Accountant in Bankruptcy, Trustee Accountant in Bankruptcy, 1 mg Road, chennai, 622004. (2517/403)"\\n\\nI am using these attributes : csv( separator(','),heading(1), terminator(['\\\\n', '\\\\r\\\\n']), quote(['\\\\'','"']))\\n\\nAll other records are fine except the above column. it is truncated remaining string are overlapping other columns. \\nis that because there \\\\"\\\\" inside the column? how to handle this? all other records are working fine except the above.\\nPlease clarify.\", \"post_time\": \"2015-06-17 10:54:04\" },\n\t{ \"post_id\": 7778, \"topic_id\": 1771, \"forum_id\": 9, \"post_subject\": \"Re: Quote in dataset\", \"username\": \"rtaylor\", \"post_text\": \"Elango,\\n\\nStandard CSV-type files always have a record delimiter (usually CRLF) and a field delimiter (usually a comma). Therefore, if your data should happen to contain any of the specified delimiters for that file, then the strings that could contain them must also be delimited by "quote" characters so the system can correctly interpret the difference between them. \\n\\nFor example, say you have a 3-field standard CSV file:\\n\\nid, name, address
and the data looks like this:\\n\\n1, jones, fred, 123 main st, anytown, FL 12345
note that there are "too many" commas here and by default the three fields will contain this:\\n1, jones, fred
and you will have lost actual data. To make it come out "right" your data has to look like this:\\n\\n1, "jones, fred", "123 main st, anytown, FL 12345"
and that's what the QUOTE option on our CSV files is all about.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-06-16 17:26:21\" },\n\t{ \"post_id\": 7777, \"topic_id\": 1771, \"forum_id\": 9, \"post_subject\": \"Quote in dataset\", \"username\": \"elango_v\", \"post_text\": \"Hi Team,\\n\\nwhat is the use of Quote attribute while we read the csv files in the dataset? Is it indicating the columns having quotes in the source. Please clarify.\\n\\nThanks,\\nElango\", \"post_time\": \"2015-06-16 16:00:55\" },\n\t{ \"post_id\": 7935, \"topic_id\": 1817, \"forum_id\": 9, \"post_subject\": \"HPCC Visualization\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nI am trying to compare 2 files and display the mismatched results.\\n\\nIs there any plugins available within HPCC to highlight the differences or represent the mismatched data in a different way or Do we use some third party plugins for the same ?\\n\\nWhat would be the best approach to handle such scenarios ? \\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2015-07-22 18:18:10\" },\n\t{ \"post_id\": 8044, \"topic_id\": 1851, \"forum_id\": 9, \"post_subject\": \"Re: Cannot get Error Log to Display\", \"username\": \"georgeb2d\", \"post_text\": \"This was user error. It was on auto hide.\", \"post_time\": \"2015-08-27 14:17:22\" },\n\t{ \"post_id\": 8043, \"topic_id\": 1851, \"forum_id\": 9, \"post_subject\": \"Re: Cannot get Error Log to Display\", \"username\": \"gsmith\", \"post_text\": \"If you click on the "view" tab at the top of the ribbon there are a bunch of options - worst case is to select "Reset Right/Default" which should reset all the docked windows back to the factory default...\", \"post_time\": \"2015-08-27 12:40:25\" },\n\t{ \"post_id\": 8042, \"topic_id\": 1851, \"forum_id\": 9, \"post_subject\": \"Cannot get Error Log to Display\", \"username\": \"georgeb2d\", \"post_text\": \"I have the Community_5.2.2-1 version.\\n\\nYesterday the Error log got split off from the normal display. It was on different screen. Today I see the Error Log yellow highlighted box on the bottom of the screen. When I click on it I get the Warnings box although at the top of the Warnings Box it says Error log. Inside this box it says a bunch of WARNING: ESP Exception, etc. \\n\\nHow can I get my Error Log back?\", \"post_time\": \"2015-08-27 12:30:42\" },\n\t{ \"post_id\": 8134, \"topic_id\": 1876, \"forum_id\": 9, \"post_subject\": \"Re: Unable to Open ECL IDE\", \"username\": \"raghpr01\", \"post_text\": \"Thank You Smith. It worked.\", \"post_time\": \"2015-09-15 18:55:18\" },\n\t{ \"post_id\": 8122, \"topic_id\": 1876, \"forum_id\": 9, \"post_subject\": \"Re: Unable to Open ECL IDE\", \"username\": \"raghpr01\", \"post_text\": \"Thank you. Will do that now.\", \"post_time\": \"2015-09-15 13:31:05\" },\n\t{ \"post_id\": 8118, \"topic_id\": 1876, \"forum_id\": 9, \"post_subject\": \"Re: Unable to Open ECL IDE\", \"username\": \"gsmith\", \"post_text\": \"If you are having continual issues logging in, you can try the following steps to create a "clean" startup (try 1 first, then 2 if needed):\\n\\n1. On the login window. Select preferences and press “New” and re-enter the server information under a new configuration name.\\n2. Open regedit and delete (or rename) the following key: HKEY_CURRENT_USER\\\\Software\\\\HPCC Systems.\\n\\nIf the above does not help, please let me know along with any further information that could help to track down the issue.\", \"post_time\": \"2015-09-15 05:11:56\" },\n\t{ \"post_id\": 8116, \"topic_id\": 1876, \"forum_id\": 9, \"post_subject\": \"Re: Unable to Open ECL IDE\", \"username\": \"raghpr01\", \"post_text\": \"I get a different error now when I uninstalled and reinstalled. Below is the error:\\nECLIDE.exe caused ACCESS_VIOLATION at 0023:00C2F718\\n\\nHowever, my ecl watch works fine with no issues.\\n\\nI tried my user id on my colleagues computer and I can log in too. It must be something about the configuration on my computer.\", \"post_time\": \"2015-09-14 18:07:05\" },\n\t{ \"post_id\": 8114, \"topic_id\": 1876, \"forum_id\": 9, \"post_subject\": \"Unable to Open ECL IDE\", \"username\": \"raghpr01\", \"post_text\": \"My ECL IDE got crashed when I re-submitted the code in a builder window.\\n\\nI get the below error when I try to re-open the ECL IDE.\\n\\nIt says: Exception Reason:\\nECLIDE.exe caused ACCESS_VIOLATION in module "C:\\\\Program Files (x86)\\\\HPCCSystems\\\\5.2.4\\\\eclide\\\\bin\\\\mfc100u.dll" at 0023:50E665A8.\\n\\nI've tried to re-install the latest version also the old versions probably like 6 times. Re-booted thrice. Took the help desk support. I get the same error. It asks me to submit the bug, but when I do that, it says "can't send error report to the Server".\\n\\nThanks!\", \"post_time\": \"2015-09-14 17:14:11\" },\n\t{ \"post_id\": 8248, \"topic_id\": 1910, \"forum_id\": 9, \"post_subject\": \"Re: Where is flow?\", \"username\": \"Dimon\", \"post_text\": \"Hi Joe,\\n\\nThanks for getting back so quickly! But this doesn't really answer my question. After installing spoon plugins I can't locate "Frequency" element anywhere in the design entries in Spoon. I see in your video that upper folder from where you drag "Frequency" node into your design page (see attached file). I don't have that folder. Can you confirm this folder suppose to be presented in spoon-plugins or not? This is what I'm trying to figure out.\\n\\nThank you very much.\", \"post_time\": \"2015-10-06 02:22:55\" },\n\t{ \"post_id\": 8226, \"topic_id\": 1910, \"forum_id\": 9, \"post_subject\": \"Re: Where is flow?\", \"username\": \"joe.chambers\", \"post_text\": \"Flow is a custom build of Penthaho Spoon + HPCC Specific plugins. The plugins are here https://github.com/hpcc-systems/spoon-plugins and can be added to a local install of Spoon.\\n\\nThese plugins handle building ecl code as well as executing the workunit against the cluster. The plugins are open source and we welcome any contributions and use.\", \"post_time\": \"2015-10-05 18:45:03\" },\n\t{ \"post_id\": 8220, \"topic_id\": 1910, \"forum_id\": 9, \"post_subject\": \"Where is flow?\", \"username\": \"Dimon\", \"post_text\": \"Hi Everyone!\\n\\nI was watching this cool presentation about HPCC here:\\nhttps://www.youtube.com/watch?v=ex4J3SAAsp4&list=PL724E33CC5FBF8D78&index=18\\n\\nand was wondering if "flow" tool is available at all as a separate tool or opensource product. I couldn't find it on hpcc website and therefore the question. I'm using pentaho kettle and pentaho hpcc integration, but I don't see certain components in this integration framework (ex: frequency node which presenter mentioned in this video). This makes me think the pentaho hpcc integration doesn't have components which flow tool offers, and therefore this presentation is not repeatable by the rest of us. Can someone confirm if "flow" is availalbe somewhere? If yes, where? Can someone confirm that pentaho integration doesn't have certain components like "frequency"?\\n\\nAs a side note, I would like to propose to HPCC Systems to make always clear few things:\\n\\n1. Where to download all the tools, which you refer in your public presentations.\\n2. Make clear if tool is propitiatory or open source and if it is available for general public.\\n3. If your product (aka flow) is propitiatory and in you using it in your public presentation conveying among other things the idea that HPCC is easy to use via tools like flow, try to refrain from that paradigm, because it is confusing to find at the end that tool you are presenting is not available and it is impossible to repeat your presentation case. It would hurt and would save us a lot of time if you mention in this presentation that Flow tool is proprietary and some components are not available if one would try to repeat what was done in the presentation. This will help eliminate the confusion when using the HPCC product(s).\\n\\nThanks!\\nDimon.\", \"post_time\": \"2015-10-04 01:18:49\" },\n\t{ \"post_id\": 8264, \"topic_id\": 1916, \"forum_id\": 9, \"post_subject\": \"Re: ECLIDE crashes when trying to view graphs\", \"username\": \"alex\", \"post_text\": \"Does the graph control display first and then crash while loading the data do you think?
\\n\\nAfter going to the "Graphs" tab of a workunit in the IDE, the crash occurs immediately when I try to click any graph\\n\\nAlso if you click Send on the bug trap I can take a look at the stack trace.\\n
\\n\\nUnfortunately I'm not on an Internet connected network.\\n\\nIn the meantime go to the IDE / Preferences / Other and try uninstalling the graph control and reinstalling (uninstall, reboot, test, install, test, reboot etc.).\\n
\\n\\nThe Uninstall/Install buttons on Preferences->Other are grayed out. As an aside, the "Test GPF" button also causes the IDE to crash.\\n\\n\\nAlso is the Graph Control working in ECL Watch (FF or IE)?
\\nYes when I connect to an HPCC system (both v5.0.4 and v5.2.4), no when I connect to our 702 instance (where I get a missing-plugin error; I've always assumed that was ActiveX)\", \"post_time\": \"2015-10-07 17:36:54\" },\n\t{ \"post_id\": 8262, \"topic_id\": 1916, \"forum_id\": 9, \"post_subject\": \"Re: ECLIDE crashes when trying to view graphs\", \"username\": \"gsmith\", \"post_text\": \"That sounds like an invalid structure being passed to/from the graph control (which is an ActiveX in this case).\\n\\nDoes the graph control display first and then crash while loading the data do you think?\\n\\nAlso if you click Send on the bug trap I can take a look at the stack trace.\\n\\nIn the meantime go to the IDE / Preferences / Other and try uninstalling the graph control and reinstalling (uninstall, reboot, test, install, test, reboot etc.).\\n\\nAlso is the Graph Control working in ECL Watch (FF or IE)?\", \"post_time\": \"2015-10-07 11:46:20\" },\n\t{ \"post_id\": 8256, \"topic_id\": 1916, \"forum_id\": 9, \"post_subject\": \"Re: ECLIDE crashes when trying to view graphs\", \"username\": \"alex\", \"post_text\": \"Thanks Richard. I created https://track.hpccsystems.com/browse/IDE-447 to hopefully get some help.\", \"post_time\": \"2015-10-06 14:44:15\" },\n\t{ \"post_id\": 8254, \"topic_id\": 1916, \"forum_id\": 9, \"post_subject\": \"Re: ECLIDE crashes when trying to view graphs\", \"username\": \"rtaylor\", \"post_text\": \"Alex,\\n\\nGood information for a bug report in JIRA: https://track.hpccsystems.com\\n\\nYou should open a JIRA ticket and copy/paste that into the report (adding any additional information you may have that might help).\\n\\nReporting issues like this yourself in JIRA has several benefits:
\\nSo, if you do not already have GitHub and JIRA logins, I suggest you get them to fully participate in this Open Source Community.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-10-06 13:59:55\" },\n\t{ \"post_id\": 8252, \"topic_id\": 1916, \"forum_id\": 9, \"post_subject\": \"ECLIDE crashes when trying to view graphs\", \"username\": \"alex\", \"post_text\": \"The ECL IDE crashes whenever I try to view a graph in the graph tab of a work unit. BugTrap says the error is in Windows\\\\syswow64\\\\OLEAUT32.dll, at SafeArrayGetVartype().\\n\\nI have tried versions 4.2.2, 5.0.4, and 5.2.4 (IDE and GraphControl) have the same error in each of them.\\n\\nI am using Windows 7 64-bit.\", \"post_time\": \"2015-10-06 13:12:19\" },\n\t{ \"post_id\": 8260, \"topic_id\": 1918, \"forum_id\": 9, \"post_subject\": \"Re: Bizarre Events in ECL IDE\", \"username\": \"georgeb2d\", \"post_text\": \"This has been resolved. I found out that the queue was pointing to dev, I did not realize that the queues are set to look at certain repositories. I thought that was set by the ECL IDE. I should have been using one of the QC queues. \\n\\nI learned something new today.\\n\\nI will know better next time.\", \"post_time\": \"2015-10-06 15:55:06\" },\n\t{ \"post_id\": 8258, \"topic_id\": 1918, \"forum_id\": 9, \"post_subject\": \"Bizarre Events in ECL IDE\", \"username\": \"georgeb2d\", \"post_text\": \"I have the following code:\\n\\nOUTPUT(X.SprayProcess_FileNames().DATA_BASE_SprayDirPath);\\nOUTPUT(X.Constants.DatabaseExtracts_Spray_DirName);\\nDATA_BASE_SprayDirPath := X.Constants.DatabaseExtracts_Spray_DirName \\n\\t + X.Constants.FileSeparator \\n\\t + 'process' \\n\\t\\t\\t + X.Constants.FileSeparator \\n\\t\\t\\t\\t+ 'don1';\\nOUTPUT(DATA_BASE_SprayDirPath);\\n
\\nThe first output is :\\n/data/wunelli/qc/in/database/2015093016000\\nThe second output is:\\n/data/orbittesting/wunelli/database\\nThe third output is:\\n/data/orbittesting/wunelli/database/process/don1\\n\\nResult 1 and Result 3 should be the same. I have no clue as to why these are different. \\nThis is W20151006-112250 on Alpharetta_QC. \\n\\nThanks,\\nDon\", \"post_time\": \"2015-10-06 15:27:27\" },\n\t{ \"post_id\": 8492, \"topic_id\": 1980, \"forum_id\": 9, \"post_subject\": \"Re: How to make ECL IDE get more intellisenses?\", \"username\": \"bforeman\", \"post_text\": \"I always use the latest version of the ECL IDE that is available on the HPCCSystems portal.\\n\\nBob\", \"post_time\": \"2015-11-09 18:57:30\" },\n\t{ \"post_id\": 8454, \"topic_id\": 1980, \"forum_id\": 9, \"post_subject\": \"Re: How to make ECL IDE get more intellisenses?\", \"username\": \"WeiDong\", \"post_text\": \"So which ECL IDE is MOST suitable for us?\", \"post_time\": \"2015-11-09 03:30:36\" },\n\t{ \"post_id\": 8444, \"topic_id\": 1980, \"forum_id\": 9, \"post_subject\": \"Re: How to make ECL IDE get more intellisenses?\", \"username\": \"bforeman\", \"post_text\": \"This is a known issue and plans to improve the auto-completion (intellisense) is slated for HPCC 6.0\\n\\nSee:\\nhttps://track.hpccsystems.com/browse/IDE-423\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-11-06 16:04:54\" },\n\t{ \"post_id\": 8432, \"topic_id\": 1980, \"forum_id\": 9, \"post_subject\": \"How to make ECL IDE get more intellisenses?\", \"username\": \"WeiDong\", \"post_text\": \"Hello all:\\n\\n I find ECL IDE doesn't have a good intellisense like Visual Studio …… And when I say:\\n\\n \\nImport $; \\n$.\\n
\\n\\n It only gives me a rapid flash intellisense window but disappears. And then nothing gives me....\\n\\n Will ECL IDE enhance the IDE with auto complete? Or any plug-ins?\\n\\n Thanks!\", \"post_time\": \"2015-11-06 01:10:43\" },\n\t{ \"post_id\": 8550, \"topic_id\": 1994, \"forum_id\": 9, \"post_subject\": \"Re: ECl IDE\", \"username\": \"JimD\", \"post_text\": \"You can download the IDE from:\\n\\nhttps://hpccsystems.com/download/develo ... ls/ecl-ide\\n\\nYou can download the Graph control from:\\n\\nhttps://hpccsystems.com/download/develo ... ph-control\\n\\nHTH,\\nJim\", \"post_time\": \"2015-11-10 15:39:06\" },\n\t{ \"post_id\": 8544, \"topic_id\": 1994, \"forum_id\": 9, \"post_subject\": \"Re: ECl IDE\", \"username\": \"sipan4434\", \"post_text\": \"I have created Amazon account and I want to install HPCC systems there too. But I can not understand how can I install ECL IDE on ubuntu's HPCC or on Amazon AWS. \\n\\n\\nInstalling the ECL IDE\\n\\nThe ECL IDE installs to your Windows workstation. Once you install it, you can use it for any cluster.\\n\\n1. From the View Clusters page, click on the ESP link to launch the ECL Watch page for a cluster.\\n Take note of the IP address for the ESP server running ECL Watch, you will use it later. \\n2. Click on the Resources/Browse link in the left side menu. \\n3. Download and save (do not run from your browser) both ECL IDE Installer and Graph Control Installer. \\n Install both the ECL IDE and Graph Control. When installation is complete, launch the ECL IDE. \\n4. When you open the ECL IDE for the first time, supply the IP address of the ESP server. If this is not the first time, \\n you may edit the ESP server IP address by clicking the Preferences button on the Login dialog. \\n5. Login using HPCCDemo as the username and password.\\n\\nI am following this instruction. But I do not understand how to download ECL IDE installer and Graph Control installer.\", \"post_time\": \"2015-11-10 15:03:59\" },\n\t{ \"post_id\": 8538, \"topic_id\": 1994, \"forum_id\": 9, \"post_subject\": \"Re: ECl IDE\", \"username\": \"rtaylor\", \"post_text\": \"Sipan,\\n\\nYou don't install the IDE to the AWS cluster, you install it on your Windows computer and "point it" to the AWS cluster (the Server IP on the IDE's Preferences window).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-11-10 14:48:02\" },\n\t{ \"post_id\": 8530, \"topic_id\": 1994, \"forum_id\": 9, \"post_subject\": \"ECl IDE\", \"username\": \"sipan4434\", \"post_text\": \"How to install ECI IDE on Amazon AWS or on UBUNTU. I can not find Resources/Browse link as it is said in the site https://aws.hpccsystems.com/aws/getting_started/\\n\\nPlease help me.\", \"post_time\": \"2015-11-10 11:18:56\" },\n\t{ \"post_id\": 8706, \"topic_id\": 2030, \"forum_id\": 9, \"post_subject\": \"Re: ECL Watch: This page Can't be displayed\", \"username\": \"gsmith\", \"post_text\": \"Can you post some screenshots of:\\n1. The page not showing in the IDE\\n2. The page which opens when you click on the "Launch -> Force Internet Explorer"\\n\\nSpecifically I am looking for what version of IDE / IE you have installed. Also if ECL Watch is rendering in IE and that IE has compatibility mode turned off.\", \"post_time\": \"2015-11-26 06:04:24\" },\n\t{ \"post_id\": 8700, \"topic_id\": 2030, \"forum_id\": 9, \"post_subject\": \"Re: ECL Watch: This page Can't be displayed\", \"username\": \"tlhumphrey2\", \"post_text\": \"What version of ECL IDE do you have?\", \"post_time\": \"2015-11-25 18:52:21\" },\n\t{ \"post_id\": 8698, \"topic_id\": 2030, \"forum_id\": 9, \"post_subject\": \"Re: ECL Watch: This page Can't be displayed\", \"username\": \"rtaylor\", \"post_text\": \"Submit a JIRA ticket for this, please.\\n\\nRichard\", \"post_time\": \"2015-11-25 18:47:01\" },\n\t{ \"post_id\": 8694, \"topic_id\": 2030, \"forum_id\": 9, \"post_subject\": \"ECL Watch: This page Can't be displayed\", \"username\": \"georgeb2d\", \"post_text\": \"After I run a Workunit I click on ECL Watch and This page can't be displayed displays. Been like this for several months. Just go to browser and look. There must be an easier way. Used to work...\", \"post_time\": \"2015-11-25 18:15:38\" },\n\t{ \"post_id\": 8880, \"topic_id\": 2062, \"forum_id\": 9, \"post_subject\": \"Re: SOAP Error\", \"username\": \"bmonroe\", \"post_text\": \"Ok, thank you!\", \"post_time\": \"2015-12-17 13:14:33\" },\n\t{ \"post_id\": 8876, \"topic_id\": 2062, \"forum_id\": 9, \"post_subject\": \"Re: SOAP Error\", \"username\": \"gsmith\", \"post_text\": \"Assuming you have not manually installed some different client tools, your IDE will also be 5.4.2-1.\\n\\nFWIW Both those warnings are normal:\\nThe compiler warning is simply saying that your local compiler and the server side compiler are different version (which for a point release is fine 99.9% of the time).\\n\\nThe SOAP fault in this case happens when the IDE sends a request to the server, checking for the availability of the legacy remote repository, in the OSS releases you won't have one of those.\", \"post_time\": \"2015-12-17 05:15:09\" },\n\t{ \"post_id\": 8872, \"topic_id\": 2062, \"forum_id\": 9, \"post_subject\": \"Re: SOAP Error\", \"username\": \"bmonroe\", \"post_text\": \"I'm also getting this message below which suggests mismatch of compiler and server. So I'm assuming these versions.\", \"post_time\": \"2015-12-16 18:47:12\" },\n\t{ \"post_id\": 8868, \"topic_id\": 2062, \"forum_id\": 9, \"post_subject\": \"Re: SOAP Error\", \"username\": \"gsmith\", \"post_text\": \"Quick question - what IDE version and what Server Version?\", \"post_time\": \"2015-12-16 18:00:30\" },\n\t{ \"post_id\": 8856, \"topic_id\": 2062, \"forum_id\": 9, \"post_subject\": \"SOAP Error\", \"username\": \"bmonroe\", \"post_text\": \"I am currently receiving the error in the attachment. Searching the forum for a solution was futile. Anyone have any suggestions of what's going on? Additionally, I am unable to process any data in the compiler. \\n(I am currently on the Advanced ECL lessons for context)\\nThank you!\\nBrittney\", \"post_time\": \"2015-12-15 20:16:15\" },\n\t{ \"post_id\": 9436, \"topic_id\": 2212, \"forum_id\": 9, \"post_subject\": \"Re: Plugins development for ECL IDE\", \"username\": \"vyasshub\", \"post_text\": \"gsmith\\nAny update ??\", \"post_time\": \"2016-03-30 18:33:20\" },\n\t{ \"post_id\": 9412, \"topic_id\": 2212, \"forum_id\": 9, \"post_subject\": \"Re: Plugins development for ECL IDE\", \"username\": \"vyasshub\", \"post_text\": \"@gsmith: Thanks for quick reply.\\nLets work on developing Platform Plugins.\\n\\nFor Example, I want to develop a plugin for following scenario:\\n\\nIf a programmer editing a file in a module and after editing the file programmer check-in the file, now I want to send an email alert to module lead ,that a particular file in his module is edited and check-in by a particular programmer(lets say we are sending programmer userid in email alert).\", \"post_time\": \"2016-03-29 14:22:01\" },\n\t{ \"post_id\": 9410, \"topic_id\": 2212, \"forum_id\": 9, \"post_subject\": \"Re: Plugins development for ECL IDE\", \"username\": \"gsmith\", \"post_text\": \"The latter is currently support (Platform Plugins). The ECL IDE currently doesn't support plugins as such, but as part of the 6.0 release, we are hoping to release the IDE as open source, at which point anyone will be able to extend it.\", \"post_time\": \"2016-03-29 12:41:09\" },\n\t{ \"post_id\": 9408, \"topic_id\": 2212, \"forum_id\": 9, \"post_subject\": \"Re: Plugins development for ECL IDE\", \"username\": \"vyasshub\", \"post_text\": \"@gsmith: Actually I am looking into both , but lets start with extending the IDE itself,\\nPlease let me know from where I can start (documents or links for any related posts)\\nThanks in advance for you help.\", \"post_time\": \"2016-03-29 12:38:40\" },\n\t{ \"post_id\": 9388, \"topic_id\": 2212, \"forum_id\": 9, \"post_subject\": \"Re: Plugins development for ECL IDE\", \"username\": \"tlhumphrey2\", \"post_text\": \"vyasshub,\\n\\nIs your post a response to Lorraine Chapman's post, https://hpccsystems.com/bb/viewtopic.php?t=2210&p=9378#p9378, about our summer intern projects -- one of which is "Adding to our list of supported embedded languages"?\\n\\nTim\", \"post_time\": \"2016-03-23 12:57:22\" },\n\t{ \"post_id\": 9382, \"topic_id\": 2212, \"forum_id\": 9, \"post_subject\": \"Re: Plugins development for ECL IDE\", \"username\": \"gsmith\", \"post_text\": \"Are you trying to extend the IDE itself or are you trying to write a HPCC Platform "plugin" (one which can be called from ECL)?\", \"post_time\": \"2016-03-23 08:05:50\" },\n\t{ \"post_id\": 9380, \"topic_id\": 2212, \"forum_id\": 9, \"post_subject\": \"Plugins development for ECL IDE\", \"username\": \"vyasshub\", \"post_text\": \"Hi,\\nI want to develop plugins for ECL IDE.\\nNeed some guidance on developing plugins(which language I can use C++,Python,C#) and \\nonce developed how to import Plugins in ECL IDE ?\", \"post_time\": \"2016-03-23 06:11:39\" },\n\t{ \"post_id\": 9648, \"topic_id\": 2276, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE fails to run on Window\", \"username\": \"ming\", \"post_text\": \"Glad to hear it works. ECLIDE 6.0.0 includes Clienttools build with improved required runtime libraries package.\", \"post_time\": \"2016-05-20 12:21:19\" },\n\t{ \"post_id\": 9646, \"topic_id\": 2276, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE fails to run on Window\", \"username\": \"vin\", \"post_text\": \"Update: ECLIDE is working with KEL.\\n\\nI do not know what change made it work but the configuration is:\\n\\n1. ECLIDE 6.0.0\\n2. KEL 0.7 (5.6.2)\\n3. Java JDK 8u92\\n\\nThanks,\\n+vince\", \"post_time\": \"2016-05-20 00:56:37\" },\n\t{ \"post_id\": 9606, \"topic_id\": 2276, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE fails to run on Window\", \"username\": \"ming\", \"post_text\": \"Have you tried to apply the update for Universal C Runtime in Windows: https://support.microsoft.com/en-us/kb/2999226?\\n\\nIt is a work-around. We will try to fix it in our next HPCC Clienttools relesae.\", \"post_time\": \"2016-05-09 17:07:35\" },\n\t{ \"post_id\": 9594, \"topic_id\": 2276, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE fails to run on Window\", \"username\": \"rtaylor\", \"post_text\": \"Vince,\\n\\nYour post has been added to this JIRA: https://track.hpccsystems.com/browse/HPCC-14436?jql=text%20~%20clienttools%20ORDER%20BY%20key%20DESC\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-05-04 14:11:30\" },\n\t{ \"post_id\": 9588, \"topic_id\": 2276, \"forum_id\": 9, \"post_subject\": \"ECL IDE fails to run on Window\", \"username\": \"vin\", \"post_text\": \"Tried to install ECLIDE on Window 8.1 Enterprise 64-bit. \\n\\n1. Downloaded hpccsystems-eclide-community_5.6.2-1Windows-i386.exe\\n2. Installed it.\\n3. Launched ECL IDE got error because api-ms-win-crt-runtime-l1-1-0.dll not found. See attached figure.\\n\\n[attachment=1:c0xp3goq]Screen Shot 2016-05-03 at 3.49.47 PM.png\\n\\nSolution:\\n\\nInstall Microsoft Visual C++ (help found at this link: http://forums.autodesk.com/t5/installat ... 17/td-p/62\\n31310)\\n\\n4. Re-launched ECL IDE: now get eclcc.exe error.\\n\\n[attachment=0:c0xp3goq]Screen Shot 2016-05-03 at 4.06.01 PM.png\\n\\n5. The cluster has a different version. From shell on cluster:\\n\\n$ eclcc --version\\n5.4.6 community_5.4.6-1\\n\\nAt this point I started all over with a fresh Windows instance.\\n\\n6. Installed HPCC Client Tools for Windows version 5.4.6-1 (the version that matches the cluster).\\n7. Installed ECL IDE 5.6.2-1 (because I could not find version 5.4.6-1).\\n8. Installed dll as discussed in #3 above.\\n9. Launched ECL IDE\\n10. In "Preferences > Compiler" overrode compiler to use 5.4.6 instead of 5.6.2.\\n11. Connected to cluster.\\n12. Alas, eclcc.exe crashes again. It actually crashes many times. At least I have to close the "eclcc.exe has stopped working" popup about 9 times. No window pops up with an error as in #4 above. However, after closing the "not working" popup 5 times the following error appears in the ECL IDE error log window.\\n\\nWARNING: SP Exception - CSoapResponseBinding: ... Client version build_3_08 (server community_5.4.6-1) is out of date. \\n\\nI am able to use the cluster from ECL IDE running on my Mac in Wine. That version is 5.4.2-1 as is the compiler.\\n\\n13. Therefore, I download the ECL IDE & Client Tools version 5.4.2-1, installed it on a fresh windows instance. Again, eclcc.exe stops working--complains about a compiler mismatch.\\n\\nI am out of ideas. We will consider upgrading the cluster to 5.6.2-1. But that is just a workaround.\\n\\nSummary:\\n
\\n
\", \"post_time\": \"2016-05-04 13:27:12\" },\n\t{ \"post_id\": 9760, \"topic_id\": 2316, \"forum_id\": 9, \"post_subject\": \"Re: Linux Install\", \"username\": \"rtaylor\", \"post_text\": \"bbrown57,\\n\\nUnfortunately, there is no Linux version of the ECL IDE, it is only a Windows program. Your two choices are to run it in a Windows emulator on your Linux box, or download our ECL Plugin for Eclipse (https://hpccsystems.com/download/third-party-integrations/eclipse-ide) and use Eclipse instead of the ECL IDE.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-06-14 16:06:21\" },\n\t{ \"post_id\": 9740, \"topic_id\": 2316, \"forum_id\": 9, \"post_subject\": \"Linux Install\", \"username\": \"bbrown57\", \"post_text\": \"I am trying to located a linux RHEL install for ECL IDE. The downloads page only lists a Windows installer. Unfortunately My HPCC cluster is not accessible from the outside world and is only visible to a Linux login node where the ECL IDE needs to be installed. Any help is appreciated.\", \"post_time\": \"2016-06-13 14:53:58\" },\n\t{ \"post_id\": 9993, \"topic_id\": 2393, \"forum_id\": 9, \"post_subject\": \"Re: Sorting files in repository folders\", \"username\": \"jwilt\", \"post_text\": \"Check that. I now see this change should show up in 6.0, from my previous JIRA ticket.\\nSorry for the dup.\", \"post_time\": \"2016-07-24 00:43:06\" },\n\t{ \"post_id\": 9983, \"topic_id\": 2393, \"forum_id\": 9, \"post_subject\": \"Sorting files in repository folders\", \"username\": \"jwilt\", \"post_text\": \"We're currently using a storage product that does not sort files/folders listed in a folder in a mounted volume (non-legacy repository, not MySQL). \\nSo, in ECL IDE, the Repository browser shows folders unsorted - a royal pain for a sizable repository.\\nNote that in the Windows COMMAND, a "dir" shows the same (non-)sorting.\\n\\nDon't suppose it would be easy to tweak the ECL IDE Repository pane to explicitly sort the folders/files (as should normally be done by a filesystem)?\\n(Unfortunately, we have no control over the storage product that no longer sorts as it previously did.)\\n\\nThanks.\", \"post_time\": \"2016-07-24 00:37:15\" },\n\t{ \"post_id\": 13553, \"topic_id\": 2743, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE and git repositories\", \"username\": \"nawazkhan\", \"post_text\": \"Thanks Bob, \\n\\nyou are right, have installed IDE 6 and it works.\\n\\n\\nRegards Nawaz\", \"post_time\": \"2016-12-01 07:56:58\" },\n\t{ \"post_id\": 13523, \"topic_id\": 2743, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE and git repositories\", \"username\": \"bforeman\", \"post_text\": \"I think so, I think Gordon added the capability in the 6.x versions. You can have both 5.x and 6.x installed on your same machine.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2016-11-30 13:50:40\" },\n\t{ \"post_id\": 13513, \"topic_id\": 2743, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE and git repositories\", \"username\": \"nawazkhan\", \"post_text\": \"thanks bob, \\n\\nI am using ECL 5.0.2. Will that be issue?\", \"post_time\": \"2016-11-30 13:46:38\" },\n\t{ \"post_id\": 13503, \"topic_id\": 2743, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE and git repositories\", \"username\": \"bforeman\", \"post_text\": \"For me, it was a four step process.\\n\\n1) Installed Tortoise GIT\\n2) Installed GIT for Windows (not sure if this was necessary, but it satisfied an issue in Tortoise GIT to help locate the git.exe)\\n3) Cloned the folder that I had planned to attach to the ECL IDE, using the Git Clone option on the Windows Explorer context menu.\\n4) Added the cloned folder via the ECL IDE Preferences ECL Folders option.\\n\\nI am using version 6.0.6-1 of the ECL IDE.\\n\\nBob\", \"post_time\": \"2016-11-30 13:44:27\" },\n\t{ \"post_id\": 13463, \"topic_id\": 2743, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE and git repositories\", \"username\": \"nawazkhan\", \"post_text\": \"Thanks smith and Bob.\\n\\nI have done the cloning and mapped the folder path under ECL Folders in Preference.\\n\\nBut still I can get the context menu (same as you attached) only with repository folder, not inside ECL IDE.\\n\\nDo i need to map the path anywhere in IDE?\", \"post_time\": \"2016-11-30 07:44:54\" },\n\t{ \"post_id\": 13453, \"topic_id\": 2743, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE and git repositories\", \"username\": \"bforeman\", \"post_text\": \"You also must CLONE a folder from the remote GIT repo to a local folder first, and then add that folder to the ECL IDE Preferences window. \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2016-11-29 15:45:53\" },\n\t{ \"post_id\": 13423, \"topic_id\": 2743, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE and git repositories\", \"username\": \"gsmith\", \"post_text\": \"In the IDE you should now get the same context menu when you right click on a "root / blue" folder (see attached img)\", \"post_time\": \"2016-11-29 14:28:15\" },\n\t{ \"post_id\": 13413, \"topic_id\": 2743, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE and git repositories\", \"username\": \"nawazkhan\", \"post_text\": \"Hi Smith,\\n\\nI have installed tortoise git and can see the context menu only with repository folder instead of IDE. Can you help me to configure with ide?\", \"post_time\": \"2016-11-29 08:55:18\" },\n\t{ \"post_id\": 13383, \"topic_id\": 2743, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE and git repositories\", \"username\": \"nawazkhan\", \"post_text\": \"Hi Smith,\\n\\n I have installed TortoiseGit and context menu is displaying with repository not with IDE.\\nCan you help to configure TG with ECL IDE?\", \"post_time\": \"2016-11-25 11:22:44\" },\n\t{ \"post_id\": 11133, \"topic_id\": 2743, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE and git repositories\", \"username\": \"gsmith\", \"post_text\": \"Further - if you install TortoiseGIT (a windows shell extension) - you will get git context menu items on the root folders in the IDE.\", \"post_time\": \"2016-09-08 19:28:27\" },\n\t{ \"post_id\": 11123, \"topic_id\": 2743, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE and git repositories\", \"username\": \"bforeman\", \"post_text\": \"If you look in the ECL IDE Preferences, in the Compiler tab you will see the option to add additional ECL folders. \\n\\nSo with regards to Git Hub, you simply need to add your local repository folder to your list of ECL folders. You will then be able to see and work with the ECL code in your repository. The changes can then be sent back to the shared repository using a commit and then a pull request.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2016-09-08 15:26:43\" },\n\t{ \"post_id\": 11093, \"topic_id\": 2743, \"forum_id\": 9, \"post_subject\": \"ECL IDE and git repositories\", \"username\": \"clanderson\", \"post_text\": \"I am working to transfer our source code from MySQL to a GIT repository. I am looking at both the ECL IDE and Eclipse plugin. Our developer community is still very much using the ECL IDE - what I need information on is working with our source code in a git repository within the ECL IDE. \\n\\nIs that possible and who has information on how to set that up. I have the code our of MySQL (mysql2git) - and have created our repository on gitlab (intrasource.lexisnexis.com) - and have a local and remote repository. But - I am not seeing how to work with this source code within the ECL IDE (I have a similar but different concern with the Eclipse plugin - will be posting in that forum shortly).\", \"post_time\": \"2016-09-07 17:45:31\" },\n\t{ \"post_id\": 11833, \"topic_id\": 2903, \"forum_id\": 9, \"post_subject\": \"Re: Is there a list of keyboard shortcuts for ECL IDE?\", \"username\": \"JimD\", \"post_text\": \"I already entered a Jira and resolved it \\n\\nhttps://track.hpccsystems.com/browse/HPCC-16400 \\n\\nYou will find the list in the next release of documentation.\\n\\nAs for a shortcut to compile code, you will have to submit a feature request. I think it is a good suggestion.
\\n\\nJim\", \"post_time\": \"2016-10-05 18:09:58\" },\n\t{ \"post_id\": 11713, \"topic_id\": 2903, \"forum_id\": 9, \"post_subject\": \"Re: Is there a list of keyboard shortcuts for ECL IDE?\", \"username\": \"oscar.foley\", \"post_text\": \"Hello Jim\\n\\nThanks for your list!\\nHere is your ticket: https://track.hpccsystems.com/browse/IDE-570\\n\\nIs there any short cut to compile code?\\n\\nCheers,\\nOscar\", \"post_time\": \"2016-10-03 08:18:53\" },\n\t{ \"post_id\": 11693, \"topic_id\": 2903, \"forum_id\": 9, \"post_subject\": \"Re: Is there a list of keyboard shortcuts for ECL IDE?\", \"username\": \"JimD\", \"post_text\": \"If you open a Jira ticket, I can add a page to the manual for the IDE. https://track.hpccsystems.com/\\n\\nThis is what I have:\\n\\nF1\\tOnline Help for Syntax\\nCtrl+F\\tFind\\nF3\\tFind Next\\nShift+F3 \\tFind Previous\\nCtrl+H\\tReplace\\nF4\\tNext Error\\nShift+F4\\tPrevious Error\\nF5\\tRefresh Repository\\nF7\\tCheck Syntax\\nF8\\tLocate File in Explorer\\nF9\\tLocate Current FIle\\nF11\\tLocate Selected File\\nF12\\tOpen Selected File\\nCtrl+Enter\\tSubmit ECL\\nCtrl+Alt+Enter\\tSubmit Selected ECL\\nCtrl+X / Shift+Delete\\tCut\\nCtrl+C / Ctrl+Insert\\tCopy\\nCtrl+V / Shift+Insert\\tPaste\\nCtrl+Z / Alt+backspace\\tUndo\\nCtrl+Y\\tRedo\\nCtrl+F4\\tClose File/Builder Window\\nCtrl+Q\\tToggle Comment with //\\nCtrl+Shift+Q\\tToggle Block Comment /*\\nCtrl+L\\tToggle Line Numbers\\nCtrl+S\\tSave\\nCtrl+O\\tOpen\\nCtrl+P\\tPrint\\nCtrl+N New Builder Window\\nCtrl+G\\tGo to Line\\nCtrl+A\\tSelect All\\nCtrl+Space\\tComplete Word\\nCtrl+E\\tMatch Brace\\nCtrl+Shift+E\\tSelect to Brace\\nCtrl+W\\tWord Wrap\\nCtrl+Alt+C\\tCopy Filename\\nCtrl+Shift+U\\tUppercase\\nCtrl+U\\tLowercase\\nCtrl+Shift+R\\tRecord/Stop Macro\\nCtrl+Shift+P\\tPlay Macro\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2016-09-30 19:24:23\" },\n\t{ \"post_id\": 11663, \"topic_id\": 2903, \"forum_id\": 9, \"post_subject\": \"Re: Is there a list of keyboard shortcuts for ECL IDE?\", \"username\": \"bforeman\", \"post_text\": \"No, there is not a summary list, but the ECL IDE and Client Tools PDF has complete documentation of all IDE features with their respective hot key values. You can also use the ribbon and popup menus in the IDE to display these hot keys.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2016-09-30 14:26:01\" },\n\t{ \"post_id\": 11633, \"topic_id\": 2903, \"forum_id\": 9, \"post_subject\": \"Is there a list of keyboard shortcuts for ECL IDE?\", \"username\": \"oscar.foley\", \"post_text\": \"Is there a list of keyboard shortcuts for ECL IDE?\\n\\nIf such list doesn't exist... would you mind sharing the shortcuts you already know.\\nSo far I have some from the tutorials:\\n\\nSpecific\\nF7 = Check Syntax\\nF8 = Open file location in windows explorer\\nCtrl + Enter = Submit\\nCtrl + Q = Comment/uncomment block with //\\nCtrl + Shift + Q = comment block with /*\\nCtrl + L = Show/ hide line numbers\\n\\nStandard\\nCtrl + S = Save\\nCtrl + O = Open\\nCtrl + N = New File\\nCtrl + F = Find\\nCtrl + G = Go to line\\nCtrl + H = Replace\\nCtrl + C = Copy\\nCtrl + X = Cut\\nCtrl + V = Paste\", \"post_time\": \"2016-09-30 13:31:59\" },\n\t{ \"post_id\": 15683, \"topic_id\": 2913, \"forum_id\": 9, \"post_subject\": \"Re: Compiling ALL files in Repository\", \"username\": \"oscar.foley\", \"post_text\": \"Pre-compiling the code using eclcc was slow and not really feasible. \\n\\nBut making syntax check has proven to be extremely useful in my project. More info here: https://hpccsystems.com/bb/viewtopic.php?f=9&t=3913\", \"post_time\": \"2017-03-07 11:59:19\" },\n\t{ \"post_id\": 11803, \"topic_id\": 2913, \"forum_id\": 9, \"post_subject\": \"Re: Compiling ALL files in Repository\", \"username\": \"rtaylor\", \"post_text\": \"Oscar,
Is there any way of compiling ALL of the ones I have in a repository?
ECL/HPCC is very different from "standard" programming languages. \\n\\nECL is a declarative, non-procedural language. Therefore, your ECL code is never "executable" but just defines what you want the result to be. And that result is specified by an ECL Action (usually OUTPUT, but any expression without a definition name constitutes an Action).\\n\\nAlso, the way Thor operates (which does 99% of all HPCC work) is that you Submit your job and compilation is the first step in that job submission. If you re-submit the same job, the ECL code is re-compiled every time. The only purpose of a separate Compile is to Publish that query (usually to Roxie).\\n\\nSo the short answer to your question is NO. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-10-05 16:16:08\" },\n\t{ \"post_id\": 11653, \"topic_id\": 2913, \"forum_id\": 9, \"post_subject\": \"Compiling ALL files in Repository\", \"username\": \"oscar.foley\", \"post_text\": \"Hello\\n\\nI can compile easily a single .ecl file in ECL IDE. Is there any way of compiling ALL of the ones I have in a repository? Something like CTRL + SHIFT + B in Visual Studio?\\n\\nIf $false... I will prepare a PowerShell script that iterates thru a repository and uses client tools to compile everything. \\nIs there any way of invoking that script from ECL IDE using a menu item or a combination of keys? \\n(i.e. CTRL + SHIFT + B
)\", \"post_time\": \"2016-09-30 14:14:03\" },\n\t{ \"post_id\": 13123, \"topic_id\": 3293, \"forum_id\": 9, \"post_subject\": \"Re: ECLIDE 6.0.6 Crashes using graphs and deleting workunit\", \"username\": \"rtaylor\", \"post_text\": \"Manny,\\n\\neMail it to me and I will pass it on.\\n\\nHTH,\\n\\nRichard\\nrichard.taylor@lexisnexis.com\", \"post_time\": \"2016-11-11 19:11:22\" },\n\t{ \"post_id\": 13103, \"topic_id\": 3293, \"forum_id\": 9, \"post_subject\": \"ECLIDE 6.0.6 Crashes using graphs and deleting workunit\", \"username\": \"handymanny35\", \"post_text\": \"Hi, the ECLIDE 6.0.6 crashes I try and use the Graphs or when I tried to delete a workunit, and it has even crashed when I was creating a Workspace.\\n\\nI have uninstalled the IDE and Graph Control, rebooted, over and over again with no luck.\\n\\nI cannot email the BugTrap info as the IDE configured default email app apparently cannot fulfil the request. I cannot attach it either as its too big. This is not going well!\\n\\nHere is the Exception Reason:\\nECLIDE.exe caused UNKNOWN_ERROR in module "C:\\\\Windows\\\\SYSTEM32\\\\KERNELBASE.dll" at 0023:74F43FC8, RaiseException()+72 byte(s)\\n\\nAny clues anyone? Anyone know who I can email the Crash Report to?\\n\\nHELP!\\n\\nCheers\\n\\nManny\", \"post_time\": \"2016-11-11 16:50:12\" },\n\t{ \"post_id\": 13133, \"topic_id\": 3303, \"forum_id\": 9, \"post_subject\": \"Re: 1004: Error: Invalid compiler configuration (eclcc)\", \"username\": \"rtaylor\", \"post_text\": \"Marco,\\n\\nSince it's saying your compiler configuration is invalid, that's the first place I would look. \\n\\nDo you have the compiler configured at all? Is it pointing to the right eclcc.exe version? ... Those are the kind of questions you should ask yourself first.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-11-11 19:17:47\" },\n\t{ \"post_id\": 13113, \"topic_id\": 3303, \"forum_id\": 9, \"post_subject\": \"1004: Error: Invalid compiler configuration (eclcc)\", \"username\": \"marcoalexpoppius\", \"post_text\": \"Hi,\\nI've installed the ECL plugin in Eclipse Neon and I've got the message above while writing any ECL script. I'te testes my ECL watch connectity and it is on. Any idea what is wrong with my settings?\\n\\nThanks,\\nMarco\", \"post_time\": \"2016-11-11 18:44:43\" },\n\t{ \"post_id\": 13543, \"topic_id\": 3413, \"forum_id\": 9, \"post_subject\": \"Re: Warnings removal\", \"username\": \"rtaylor\", \"post_text\": \"BarrOs01,\\n\\nWithout looking at your specific code, I can only say that warnings are meant to alert you to possible issues in your code. Since the first warning indicates the problem lies in macro-generated code, then I suggest you look at the MACRO itself and/or the actual calls to the MACRO to ensure that the values you're passing will generate good ECL.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-11-30 15:48:35\" },\n\t{ \"post_id\": 13533, \"topic_id\": 3413, \"forum_id\": 9, \"post_subject\": \"Warnings removal\", \"username\": \"BarrOs01\", \"post_text\": \"I would like to remove some warning from our code. Do you know How to fix the following:\\n\\nWarning: While expanding macro\\n\\nWarning: Field 'Priority' in TABLE does not appear to be properly defined by grouping conditions\\n\\nWarning: Identifier 'Strata' hides previous import\", \"post_time\": \"2016-11-30 15:38:15\" },\n\t{ \"post_id\": 14813, \"topic_id\": 3633, \"forum_id\": 9, \"post_subject\": \"Re: Format ECL Code\", \"username\": \"rtaylor\", \"post_text\": \"wjblack,\\n\\nThe short answer is NO, you need to do that manually. Of course, the compiler doesn't care how unreadable your code is as long as it's syntactically correct.
\\n\\nThe longer answer is: HPCC/ECL is Open Source, so you are cordially invited to contribute to its development.
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-01-20 21:06:06\" },\n\t{ \"post_id\": 14803, \"topic_id\": 3633, \"forum_id\": 9, \"post_subject\": \"Format ECL Code\", \"username\": \"wjblack\", \"post_text\": \"Is there a way to select and format ECL code within the IDE? Is there a keyboard shortcut? I've been sifting through documentation and haven't found anything. For example, let's assume that I've copied some ECL in from a notepad++ document.\", \"post_time\": \"2017-01-20 19:02:40\" },\n\t{ \"post_id\": 15033, \"topic_id\": 3703, \"forum_id\": 9, \"post_subject\": \"Re: Small error with workunit helpers\", \"username\": \"oscar.foley\", \"post_text\": \"[quote="rtaylor":342u6gab]Oscar,\\n\\nThat looks like a candidate for a JIRA ticket to me.
\\n\\nHTH,\\n\\nRichard\\nSure thing!\\n\\nI have created it... HPCC-17001: https://track.hpccsystems.com/browse/HPCC-17011\\n\\nCheers,\\nOscar\", \"post_time\": \"2017-02-07 08:06:14\" },\n\t{ \"post_id\": 15023, \"topic_id\": 3703, \"forum_id\": 9, \"post_subject\": \"Re: Small error with workunit helpers\", \"username\": \"rtaylor\", \"post_text\": \"Oscar,\\n\\nThat looks like a candidate for a JIRA ticket to me.
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-02-06 20:41:20\" },\n\t{ \"post_id\": 15013, \"topic_id\": 3703, \"forum_id\": 9, \"post_subject\": \"Log file ECLAgentLog2 missing from helpers page\", \"username\": \"oscar.foley\", \"post_text\": \"I am using following versions\\n- HPCC 5.6.4-1\\n- ECLIDE 6.2.0 (using client tools 5.6.4-1 and -legacy) \\n\\nWhen I run a long ecl (runETL.ecl) close to midnight I have found a problem with logs on helpers tab in ECLIDE (and ECLWatch).\\n\\nAs you can see in screenshot WU starts at 23:22:53 and tooks 1h 20 mins. (see attachment)\\n[attachment=1:2p3kjdph]Capture0.PNG\\n\\nIf I go to helpers tab (see attachment)\\n\\n[attachment=0:2p3kjdph]Capture1.PNG\\n\\nYou can see that correctly files ThorLog2 and ThorLog4 are created.\\nBut ECLAgentLog2 is not being created or not being shown...\\n\\nCheers,\\nOscar\", \"post_time\": \"2017-02-06 16:27:17\" },\n\t{ \"post_id\": 15673, \"topic_id\": 3913, \"forum_id\": 9, \"post_subject\": \"Re: Syntax Check ALL files in a repository\", \"username\": \"oscar.foley\", \"post_text\": \"Example of Linux Jenkins Pipeline log with 0 errors:\\n[attachment=0:2li9p2tl]Capture3.PNG\", \"post_time\": \"2017-03-07 11:52:40\" },\n\t{ \"post_id\": 15663, \"topic_id\": 3913, \"forum_id\": 9, \"post_subject\": \"Syntax Check ALL files in a repository\", \"username\": \"oscar.foley\", \"post_text\": \"Hi All\\n\\nIn my project (Odin Project, ProAgrica - Reed Business Information - http://www.reedbusiness.com/products-services/proagrica) we are developing several utilities that might be useful for other members of the community so we decided to release them with Open Source license.\\n\\nHere is the first one: ECL Syntax Checker\\n\\nGITHUB URL: https://github.com/OdinProAgrica/ECLSyntaxChecker\\n\\nECL Syntax Checker can be useful if you are developing a project in HPCC. It allows you to perform a Syntax Check in all files as ECLIDE can only make syntax check of a single file. Script makes a Syntax Check on all *.ecl files in a given directory and subdirectories. We have 2,741 ecl files that are checked in 55-59 secs on average.\\n\\nUseful in these situations:\\n\\n- Windows PowerShell Script: Moving or refactoring files in your project. You can use it in conjunction with ECLIDE.\\n- Linux BASH Script: Integrate it as one step of your CI build.\\n\\nExample of Windows usage with 0 errors:\\n[attachment=2:50hoh9g2]Capture.PNG\\n\\nExample of Windows usage with no errors:\\n[attachment=1:50hoh9g2]Capture1.PNG\\n\\nExample of Linux Jenkins Pipeline:\\n[attachment=0:50hoh9g2]Capture2.PNG\\n\\nContributions are more than welcome
\", \"post_time\": \"2017-03-07 11:50:59\" },\n\t{ \"post_id\": 21263, \"topic_id\": 4183, \"forum_id\": 9, \"post_subject\": \"Re: Copy Title\", \"username\": \"rtaylor\", \"post_text\": \"Steve,\\n\\nI suggest you report this in JIRA (https://track.hpccsystems.com). I see no indication that it has been reported already, and if it has then the developers will simply mark it as a duplicate (and you'll automatically be notified of changes to the ticket).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-03-19 17:58:40\" },\n\t{ \"post_id\": 21123, \"topic_id\": 4183, \"forum_id\": 9, \"post_subject\": \"Re: Copy Title\", \"username\": \"stephenj2585\", \"post_text\": \"for example, using the Risk Insurance as the 'git' repository, I can double click on ISS_MODELS and selected D104.ecl. The tabbed module has "Insurance.ISS_MODELS.D104" as the tab name. I then scroll down and double-click on MODELS and select A030.ecl - and a tab appears labeled "*".\\n\\nWhy the difference? And of course it'd be nice to see that complete path in each tab name.
\\n\\n\\nSteve\", \"post_time\": \"2018-03-14 18:39:12\" },\n\t{ \"post_id\": 21113, \"topic_id\": 4183, \"forum_id\": 9, \"post_subject\": \"Re: Copy Title\", \"username\": \"stephenj2585\", \"post_text\": \"Good afternoon everyone. Has this issue been addressed? I am also seeing a single "*" in the module tab.\\n\\nSteve in Alpharetta\", \"post_time\": \"2018-03-14 18:30:12\" },\n\t{ \"post_id\": 16743, \"topic_id\": 4183, \"forum_id\": 9, \"post_subject\": \"Re: Copy Title\", \"username\": \"rtaylor\", \"post_text\": \"Trent,\\n\\nI think a JIRA report would be appropriate for this issue.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-04-21 13:27:54\" },\n\t{ \"post_id\": 16723, \"topic_id\": 4183, \"forum_id\": 9, \"post_subject\": \"Copy Title\", \"username\": \"TrentB\", \"post_text\": \"When using a local repository vs using Mysql repository, why does copy title not include the Module.Attribute name? First of all the Copy title option is disabled altogether on the ribbon. but if you right mouse click to the menu, the copy option actually gives me attribute.ecl which is not nearly as useful as the module.attribute for coding.\", \"post_time\": \"2017-04-21 13:03:50\" },\n\t{ \"post_id\": 17543, \"topic_id\": 4373, \"forum_id\": 9, \"post_subject\": \"Re: Background save still in progress\", \"username\": \"JimD\", \"post_text\": \"Since I can NOW see the images, I think you were trying select All, then rt-click>Copy which works with a small result. But, for a result that is returned in pages, you would never know when the results have completed downloading to the clipboard. \\n\\nIn your scenario, I would use the rt-click > Save As method.\", \"post_time\": \"2017-06-19 20:00:54\" },\n\t{ \"post_id\": 17533, \"topic_id\": 4373, \"forum_id\": 9, \"post_subject\": \"Re: Background save still in progress\", \"username\": \"JimD\", \"post_text\": \"The earlier image is now displaying.\", \"post_time\": \"2017-06-19 19:54:03\" },\n\t{ \"post_id\": 17523, \"topic_id\": 4373, \"forum_id\": 9, \"post_subject\": \"Re: Background save still in progress\", \"username\": \"janet.anderson\", \"post_text\": \"Thanks, Jim. I will try the method you suggested.\", \"post_time\": \"2017-06-19 19:51:36\" },\n\t{ \"post_id\": 17513, \"topic_id\": 4373, \"forum_id\": 9, \"post_subject\": \"Re: Background save still in progress\", \"username\": \"JimD\", \"post_text\": \"I was talking about the ECL Watch view inside the IDE. Unfortunately, the image I included did not display so you could not see what I meant.\\n\\nIn the IDE, under the builder window, when you open a successful workunit, you get at least 2 tabs, the first one on the left is an ECL Watch view.\\n\\nHow are you trying to download your results now? \\n\\nI was able to save using the method I previously mentioned and I also saved by rt-clicking on a result display in the IDE and selecting Save As. The Save As method prompts me that "Not all results have been downloaded..." but the results did save to CSV in a moment or two. My test with 1 million records took about a minute and 1,000 took about 10 or 15 seconds (only a guestimate).\", \"post_time\": \"2017-06-19 19:48:13\" },\n\t{ \"post_id\": 17503, \"topic_id\": 4373, \"forum_id\": 9, \"post_subject\": \"Re: Background save still in progress\", \"username\": \"janet.anderson\", \"post_text\": \"Thanks for the response. I have found viewing results in ECL Watch to be a) an extra step to get to, b) fairly slow to refresh compared to the IDE (when the IDE works). So although I do appreciate having an alternative approach, is there something I can do to regain the functionality in the results tab that I used to have consistently?\", \"post_time\": \"2017-06-19 17:56:59\" },\n\t{ \"post_id\": 17493, \"topic_id\": 4373, \"forum_id\": 9, \"post_subject\": \"Re: Background save still in progress\", \"username\": \"JimD\", \"post_text\": \"Are you downloading the CSV of the result?\\n[attachment=0:2t6kvcvl]DownloadCSV.jpg\\n1. Select the ECL Watch view of the workunit\\n2. Select the Outputs tab\\n3. Open the result you want by clicking on the hyperlink\\n4. Press the CSV button\\n5. The browser instance will prompt you (depending on your browser settings)\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2017-06-19 17:40:19\" },\n\t{ \"post_id\": 17483, \"topic_id\": 4373, \"forum_id\": 9, \"post_subject\": \"Background save still in progress\", \"username\": \"janet.anderson\", \"post_text\": \"I am trying to copy the results of a WU to Excel, but I keep getting a message "Background save still in progress". There are only ~400 records, this is not a huge volume. I was using IDE version 6.0 , got this error a bunch of times (but not for every result), downloaded 6.2 and still got the error. What can I do to avoid this productivity killing demon that has started to haunt my days?\", \"post_time\": \"2017-06-19 17:09:36\" },\n\t{ \"post_id\": 18733, \"topic_id\": 4693, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE crashing when connecting to git repository\", \"username\": \"gsmith\", \"post_text\": \"Thanks for the report. While there is one known (crashing) issue with that release, it is different to what your describing.\\n\\nFor sanity can you check if the previous 6.2.x release works or if it is also failing.\\n(Its ok to have both installed, as the IDE will auto select the correct Client Tools versions for the server your targeting).\\n\\nAlso does your git folder contain:\\n* Many files?\\n* Non ECL files?\\n\\nFinally if you get an option to forward a crash report, please do as it often contains useful information.\\n\\nThx.\", \"post_time\": \"2017-09-11 15:58:32\" },\n\t{ \"post_id\": 18653, \"topic_id\": 4693, \"forum_id\": 9, \"post_subject\": \"ECL IDE crashing when connecting to git repository\", \"username\": \"elouche\", \"post_text\": \"Hi there,\\n\\nI am attempting to connect to a Git repository through ECL IDE and the IDE keeps crashing. The exception reason is "ECLIDE.exe caused ACCESS_VIOLATION at 0023:006D0120". I have version 6.4.0.101 installed and am using the following compiler: C:\\\\Program Files (x86)\\\\HPCCSystems\\\\6.4.0\\\\clienttools\\\\bin\\\\eclcc.exe. My ECL Folder is set to the location of my git repository. \\n\\nSometimes the repository loads and it crashes when I click on it, and other times it crashes before the repository even loads. I have the crash report if needed, but it is too big to attach here.\\n\\nAny assistance would be greatly appreciated.\\n\\nThanks in advance,\\nBeth\", \"post_time\": \"2017-08-31 13:43:39\" },\n\t{ \"post_id\": 18953, \"topic_id\": 4763, \"forum_id\": 9, \"post_subject\": \"Re: Possible bug with ECL IDE & clipboard\", \"username\": \"oscar.foley\", \"post_text\": \"Done https://track.hpccsystems.com/browse/IDE-696\", \"post_time\": \"2017-09-22 14:20:54\" },\n\t{ \"post_id\": 18923, \"topic_id\": 4763, \"forum_id\": 9, \"post_subject\": \"Re: Possible bug with ECL IDE & clipboard\", \"username\": \"rtaylor\", \"post_text\": \"Oscar,\\n\\nA perfect example of the type of issue that should be reported in JIRA (https://track.hpccsystems.com) against the ECL IDE
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-09-22 14:05:00\" },\n\t{ \"post_id\": 18903, \"topic_id\": 4763, \"forum_id\": 9, \"post_subject\": \"Possible bug with ECL IDE & clipboard\", \"username\": \"oscar.foley\", \"post_text\": \"I am using \\n- ECL IDE 6.4.2-rc1\\n- Client Tools 5.6.4-1\\n- HPCC Server 5.6.4-1\\n\\nWhen I run some long workunits I usually keep programming in ECL IDE (or other apps). I have found that randomly (but pretty often) ECL IDE replaces the content of clipboard with this:\\n
cd C:\\\\Users\\\\foleyo\\\\AppData\\\\Local\\\\Temp\\\\\\nC:\\\\Program Files (x86)\\\\HPCCSystems\\\\5.6.4\\\\clienttools\\\\bin\\\\eclcc.exe -showpaths\\n~
\\n\\nIt happens only when I am running a workunit and it happens several times during its execution. Other members of my team have the same problem. It is disruptive because it affects my copy&paste.\\n\\nAs a general good practice, Windows Applications should avoid affecting clipboard...\", \"post_time\": \"2017-09-22 10:42:58\" },\n\t{ \"post_id\": 20373, \"topic_id\": 5183, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE 6.4.4 - tab spacing issue\", \"username\": \"georgeb2d\", \"post_text\": \"Please add me to the pre-release party.\", \"post_time\": \"2017-12-21 20:24:35\" },\n\t{ \"post_id\": 20363, \"topic_id\": 5183, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE 6.4.4 - tab spacing issue\", \"username\": \"dehilster\", \"post_text\": \"We have a version that will be coming out that solves this problem. If you are interested, please let me know and I can send you a 'pre-release' version.\", \"post_time\": \"2017-12-20 18:34:05\" },\n\t{ \"post_id\": 20353, \"topic_id\": 5183, \"forum_id\": 9, \"post_subject\": \"ECL IDE 6.4.4 - tab spacing issue\", \"username\": \"somberi\", \"post_text\": \"Hi,\\n\\nI would like to know if there has been any changes to how the ecl ide editor handles tab spacing. \\n\\nAfter upgrading to the 6.4.4-1 version of the ecl ide, the code formatted (indented in 6.2 version of the ide) now looks all over the place.\", \"post_time\": \"2017-12-20 10:31:41\" },\n\t{ \"post_id\": 20393, \"topic_id\": 5193, \"forum_id\": 9, \"post_subject\": \"Re: Syntax Errors not attached\", \"username\": \"rtaylor\", \"post_text\": \"georgeb2d,\\n\\nYou can always put the ECL IDE back into its default configuration then move things around again to the way you like them. The Ribbon bar has a View tab and at the right end of that tab are the reset buttons (both default left and default right).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-12-22 18:10:56\" },\n\t{ \"post_id\": 20383, \"topic_id\": 5193, \"forum_id\": 9, \"post_subject\": \"Syntax Errors not attached\", \"username\": \"georgeb2d\", \"post_text\": \"I run into this problem when I work from home. At the office my syntax errors are not attached anymore to the ECL IDE window. I have not been able to figure out how to attach it. Anyway, it is on the second monitor at work. When I work from home, I can't see it since I am just working from the laptop screen. It apparently is on the invisible other screen. So in order to see my errors I have to submit the job. Not good. How can I fix this.\\n\\nAnother Note: I just got it attached somehow. I don't know what I did. I moved it over the window and clicked on it.\", \"post_time\": \"2017-12-21 20:30:22\" },\n\t{ \"post_id\": 22473, \"topic_id\": 5763, \"forum_id\": 9, \"post_subject\": \"Re: Remember password in ECLIDE & ECLWatch feature request\", \"username\": \"oscar.foley\", \"post_text\": \"- https://track.hpccsystems.com/browse/HPCC-20081\\n- https://track.hpccsystems.com/browse/IDE-772\", \"post_time\": \"2018-07-11 16:35:39\" },\n\t{ \"post_id\": 22453, \"topic_id\": 5763, \"forum_id\": 9, \"post_subject\": \"Re: Remember password in ECLIDE & ECLWatch feature request\", \"username\": \"rtaylor\", \"post_text\": \"oscar.foley & mansfield_bitter,\\n\\nThe proper avenue to make feature requests and bug reports is JIRA (https://track.hpccsystems.com)\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-07-09 20:50:49\" },\n\t{ \"post_id\": 22443, \"topic_id\": 5763, \"forum_id\": 9, \"post_subject\": \"Re: Remember password in ECLIDE & ECLWatch feature request\", \"username\": \"mansfield_bitter\", \"post_text\": \"Massive plus one on this one. In fact:\\n\\n\\n[size=200:4t97mh5h]+1 \\n\\n\\nThere we are.\", \"post_time\": \"2018-07-09 19:47:26\" },\n\t{ \"post_id\": 22433, \"topic_id\": 5763, \"forum_id\": 9, \"post_subject\": \"Remember password in ECLIDE & ECLWatch feature request\", \"username\": \"oscar.foley\", \"post_text\": \"In my project I am using HPCC 6.4.2 with authentication. (ECL ID 6.4.16)\\nIs it possible to remember password somehow?\\nIf not, could this feature be added?\\n[attachment=1:ptc51sgb]Capture1.PNG\\n\\nSame for ECL Watch...\\n[attachment=0:ptc51sgb]Capture2.PNG\\n\\nThanks,\\nOscar Foley\", \"post_time\": \"2018-07-09 17:27:20\" },\n\t{ \"post_id\": 22823, \"topic_id\": 5803, \"forum_id\": 9, \"post_subject\": \"Re: Unable to open multiple builder windows in IDE (6.4.22)\", \"username\": \"JimD\", \"post_text\": \"The latest version of the IDE prior to 6.4.22 can be found here:\\n\\nhttps://hpccsystems.com/download/archiv ... v=6.4.16-1\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2018-08-29 15:20:47\" },\n\t{ \"post_id\": 22803, \"topic_id\": 5803, \"forum_id\": 9, \"post_subject\": \"Re: Unable to open multiple builder windows in IDE (6.4.22)\", \"username\": \"Neha merchant\", \"post_text\": \"Same problem with me - i downloaded new version 6.4.24 and cannot open new builder window or click open and open a query from hard drive.\\n\\nAlso how to download previous version of ECL idE?\", \"post_time\": \"2018-08-28 17:01:24\" },\n\t{ \"post_id\": 22623, \"topic_id\": 5803, \"forum_id\": 9, \"post_subject\": \"Re: Unable to open multiple builder windows in IDE (6.4.22)\", \"username\": \"Satheesh_T\", \"post_text\": \"Me too having the same issue. When i try to open new window first time it is opening. Later time it always pointing to first one. I am unable to open multiple new windows. Pleas help.\", \"post_time\": \"2018-08-14 19:04:30\" },\n\t{ \"post_id\": 22573, \"topic_id\": 5803, \"forum_id\": 9, \"post_subject\": \"Re: Unable to open multiple builder windows in IDE (6.4.22)\", \"username\": \"dehilster\", \"post_text\": \"I will look into this. This was most likely introduced when I fixed the opening of duplicate existing attributes. Am working on a fix right now.\", \"post_time\": \"2018-07-30 20:07:02\" },\n\t{ \"post_id\": 22563, \"topic_id\": 5803, \"forum_id\": 9, \"post_subject\": \"Re: Unable to open multiple builder windows in IDE (6.4.22)\", \"username\": \"dehilster\", \"post_text\": \"The question is how SHOULD this work? The bug I fixed was that if you searched for an attribute file and it was already open in a tab, then we don't open a new one but open the existing one.\\n\\nIf you try to open the same attribute file in the repository directory, should it not have the same behavior?\\n\\nMaybe we need to create a "duplicate" attribute option that adds "_1" or "_new" to the end?\", \"post_time\": \"2018-07-30 20:01:39\" },\n\t{ \"post_id\": 22553, \"topic_id\": 5803, \"forum_id\": 9, \"post_subject\": \"Re: Unable to open multiple builder windows in IDE (6.4.22)\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nI just duplicated the problem in 6.4.24-1 but it worked fine in my older 6.4.16-1 ECL IDE so it looks like a regression. Time for a JIRA ticket, and see if anyone has a slightly older ECL IDE install to give your colleague.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-07-30 18:45:00\" },\n\t{ \"post_id\": 22543, \"topic_id\": 5803, \"forum_id\": 9, \"post_subject\": \"Unable to open multiple builder windows in IDE (6.4.22)\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nA work colleague is training using a local VM setup.\\nShe is unable to open multiple builder windows. Clicking 'New Builder Window' once the 1st builder window is up does nothing.\\n\\nShe can open multiple ECL files from her repository.\\nShe can also create multiple workspaces, each one of which has a single binder window in it.\\n\\nErr -any ideas? I thought it might be a permissions issue with here users->public->documents->HPCC Systems windows direcotry but its not that.\\n\\nShe's waiting for approval for her forum user to be created, but in the mean time...\\n\\nYours\\nAllan\", \"post_time\": \"2018-07-30 15:08:08\" },\n\t{ \"post_id\": 22783, \"topic_id\": 5843, \"forum_id\": 9, \"post_subject\": \"Re: Module cannot be exported\", \"username\": \"rtaylor\", \"post_text\": \"Shayan,\\n\\nI presume when you said:The code works well if I perform it as an action. But as a module, I can't export it.
That what you meant was you could not just hit "Submit" and run a workunit. And the reason for that is implied by the error message: Module OnlineShayan.STD_Persons does not EXPORT an attribute main() (0, 0), 0,
because there is no action in the code. You DO have an EXPORT definition whose expression is the OUTPUT action, but by giving it a definition name you make it no longer a direct action but just an action definition to be invoked as an action elsewhere in your code.\\n\\nSo, the real lesson here is: never just hit "Submit" on a definition file (any file that contains an EXPORT or SHARED definition) -- because sometimes it will work and sometimes it won't (and in this case, it won't). If you want to run your code, the file you "Submit" should always be "BWR" code ("Builder Window Runnable" code as discussed in the ECL courses).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-08-27 20:04:12\" },\n\t{ \"post_id\": 22753, \"topic_id\": 5843, \"forum_id\": 9, \"post_subject\": \"Module cannot be exported\", \"username\": \"sh.shmss\", \"post_text\": \"Hi,\\n\\nI have problem running the following code (it's from Introduction to ECL 2 Course):\\n\\nIMPORT $,STD;\\n\\nEXPORT STD_Persons := MODULE\\nEXPORT Layout := RECORD\\n$.UID_Persons.UID;\\n$.UID_Persons.RecID;\\nSTRING15 FirstName := STD.Str.ToUpperCase($.UID_Persons.FirstName);\\nSTRING25 LastName := STD.Str.ToUpperCase($.UID_Persons.LastName);\\nSTRING1 MiddleName := STD.Str.ToUpperCase($.UID_Persons.MiddleName);\\nSTRING2 NameSuffix := STD.Str.ToUpperCase($.UID_Persons.NameSuffix);\\nUNSIGNED4 FileDate := (UNSIGNED4)$.UID_Persons.FileDate;\\n$.UID_Persons.BureauCode;\\n$.UID_Persons.Gender;\\n$.UID_Persons.DependentCount;\\nUNSIGNED4 BirthDate := (UNSIGNED4)$.UID_Persons.BirthDate;\\n$.UID_Persons.StreetAddress;\\n$.UID_Persons.City;\\n$.UID_Persons.State;\\nUNSIGNED3 ZipCode := (UNSIGNED3)$.UID_Persons.ZipCode;\\nEND;\\nEXPORT File :=output($.UID_Persons,Layout)\\nERSIST('~CLASS::SS::PERSIST::STD_Persons');\\nEND;\\n\\nI get error C2386: Module OnlineShayan.STD_Persons does not EXPORT an attribute main() (0, 0), 0,\\n \\nThe code works well if I perform it as an action. But as a module, I can't export it. What do you think is the problem?\\n\\nThanks,\\nShayan\", \"post_time\": \"2018-08-24 15:33:13\" },\n\t{ \"post_id\": 24863, \"topic_id\": 6453, \"forum_id\": 9, \"post_subject\": \"Re: Beautify my ECL\", \"username\": \"rtaylor\", \"post_text\": \"wjblack,\\n\\nNot automatically. You could make a feature request in JIRA.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-03-07 19:26:14\" },\n\t{ \"post_id\": 24843, \"topic_id\": 6453, \"forum_id\": 9, \"post_subject\": \"Beautify my ECL\", \"username\": \"wjblack\", \"post_text\": \"All,\\n\\nIs there a way to easily format (beautify) your code within the ECL IDE?\", \"post_time\": \"2019-03-07 19:11:33\" },\n\t{ \"post_id\": 26463, \"topic_id\": 6983, \"forum_id\": 9, \"post_subject\": \"Re: unable to compile any job due to std file.ecl errors\", \"username\": \"JimD\", \"post_text\": \"You can have multiple versions of Client Tools and the IDE will choose the best one to use.\\n\\nThere is no "guide" to which version to use because the rule of thumb is to use the matching versions. If you have the matching version installed, the IDE will sort it out automagically. \\n\\nIf you are accessing a 6.4.x platform deployment, you should install that version of client tools. Keep that version until you no longer need it (IOW, when you no longer need to connect to 6.4.x platforms). There is very little overhead in keeping older versions. \\n\\nHTH,\\n\\nJim\", \"post_time\": \"2019-05-17 15:07:38\" },\n\t{ \"post_id\": 26453, \"topic_id\": 6983, \"forum_id\": 9, \"post_subject\": \"Re: unable to compile any job due to std file.ecl errors\", \"username\": \"LimianFang\", \"post_text\": \"right. Thanks Jim!\\nI didn't know the differences among ECL client tool differences , now I do.\\nshould I always use this old version of client tool in the future? \\nI hope there is some guide or reference for us so we know which version to use.\", \"post_time\": \"2019-05-17 14:31:51\" },\n\t{ \"post_id\": 26443, \"topic_id\": 6983, \"forum_id\": 9, \"post_subject\": \"Re: unable to compile any job due to std file.ecl errors\", \"username\": \"JimD\", \"post_text\": \"You can (and should) always use the latest ECL IDE. \\n\\nHowever, you should also have versions of the HPCC Client Tools to match each environment you are going to use. When you connect to the server, the IDE will choose the version that best matches. \\n\\nIn your case, you could use the 7.2.10-1 version of the IDE and the 6.4.36 version of the Client Tools. \\n\\nThe IDE is designed to be backward compatible and supports multiple versions of Client tools.\\n\\nHTH,\\nJim\", \"post_time\": \"2019-05-17 13:21:15\" },\n\t{ \"post_id\": 26433, \"topic_id\": 6983, \"forum_id\": 9, \"post_subject\": \"Re: unable to compile any job due to std file.ecl errors\", \"username\": \"LimianFang\", \"post_text\": \"Hi Richard,\\n\\nI found out why. I uninstalled and reinstalled an older version of ECL. I switched back to 6.4.16 then the errors are gone.\\nThe problem is the new ECL installation package and the std library comes with it. somehow someone added more arguments into the file.ecl script in the std library inside the installation package.\\n\\nI dont know if anyone else has the issue with the newer version of ECL.\", \"post_time\": \"2019-05-16 20:18:53\" },\n\t{ \"post_id\": 26423, \"topic_id\": 6983, \"forum_id\": 9, \"post_subject\": \"Re: unable to compile any job due to std file.ecl errors\", \"username\": \"LimianFang\", \"post_text\": \"It is 7.2.10 the newest version - I just downloaded. \\nI am using GitLab.\", \"post_time\": \"2019-05-16 20:01:34\" },\n\t{ \"post_id\": 26413, \"topic_id\": 6983, \"forum_id\": 9, \"post_subject\": \"Re: unable to compile any job due to std file.ecl errors\", \"username\": \"rtaylor\", \"post_text\": \"LimianFang,\\n\\nWhat version is the environment on, and what version of the ECL IDE are you running? Is your environment using a central repository or GitLab?\\n\\nRichard\", \"post_time\": \"2019-05-16 19:55:22\" },\n\t{ \"post_id\": 26403, \"topic_id\": 6983, \"forum_id\": 9, \"post_subject\": \"Re: unable to compile any job due to std file.ecl errors\", \"username\": \"LimianFang\", \"post_text\": \"Hi Richard,\\n\\nThanks for your reply. however, I meant the errors remain no matter which file I output. not just that script I showed as an example, the errors still are there for ANY file on prod I try output.Other people have no such problem with same script.\\nI wonder whether there is some ECL setup issue with my new computer (my old computer doesnt have this error before). Do you know anyone that can troubleshoot with me?\\n\\nI have reached out for several colleagues but no one can figure out why the errors.\\n\\nThanks a lot!\", \"post_time\": \"2019-05-16 19:19:40\" },\n\t{ \"post_id\": 26393, \"topic_id\": 6983, \"forum_id\": 9, \"post_subject\": \"Re: unable to compile any job due to std file.ecl errors\", \"username\": \"rtaylor\", \"post_text\": \"LimianFang,\\n \\nThe problem is not in YOUR code, but the definition you're using (TelematicsConsolidation.files_database.DS_ALL_DRIVER_USER) contains calls to the functions that are mentioned in the error messages. Find those and look at the number of parameters they're passing and you should find there's at least one extra parameter that shouldn't be there. Then find the person that "owns" that code and ask them to help you fix it.\\n \\nHTH,\\n\\nRichard\", \"post_time\": \"2019-05-16 18:14:20\" },\n\t{ \"post_id\": 26383, \"topic_id\": 6983, \"forum_id\": 9, \"post_subject\": \"Re: unable to compile any job due to std file.ecl errors\", \"username\": \"LimianFang\", \"post_text\": \"Hi Richard,\\nThe error remains for any other files I output.\", \"post_time\": \"2019-05-16 15:54:48\" },\n\t{ \"post_id\": 26373, \"topic_id\": 6983, \"forum_id\": 9, \"post_subject\": \"Re: unable to compile any job due to std file.ecl errors\", \"username\": \"rtaylor\", \"post_text\": \"LimianFang,\\n\\nYou need to examine the actual ECL code that's calling these functions (which your code does not show). \\n\\nThat would be somewhere inside the TelematicsConsolidation.files_database.DS_ALL_DRIVER_USER code.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-05-16 15:20:17\" },\n\t{ \"post_id\": 26363, \"topic_id\": 6983, \"forum_id\": 9, \"post_subject\": \"Re: unable to compile any job due to std file.ecl errors\", \"username\": \"LimianFang\", \"post_text\": \"For example, this is my code. I am running it in Alpharetta Prod. THANK YOU!\\n\\nimport TelematicsConsolidation;\\noutput(TelematicsConsolidation.files_database.DS_ALL_DRIVER_USER);\\n\\n\\n\\nthe errors are :\\nError: Too many parameters passed to function RenameLogicalFile (expected 2) (141, 71), 2061, C:\\\\Program Files\\\\HPCCSystems\\\\7.2.10\\\\clienttools\\\\share\\\\ecllibrary\\\\std\\\\File.ecl\\nError: Too many parameters passed to function fSprayFixed (expected 12) (420, 222), 2061, C:\\\\Program Files\\\\HPCCSystems\\\\7.2.10\\\\clienttools\\\\share\\\\ecllibrary\\\\std\\\\File.ecl\\nError: Too many parameters passed to function SprayFixed (expected 12) (429, 221), 2061, C:\\\\Program Files\\\\HPCCSystems\\\\7.2.10\\\\clienttools\\\\share\\\\ecllibrary\\\\std\\\\File.ecl\\nError: Too many parameters passed to function fSprayVariable (expected 18) (433, 348), 2061, C:\\\\Program Files\\\\HPCCSystems\\\\7.2.10\\\\clienttools\\\\share\\\\ecllibrary\\\\std\\\\File.ecl\\nError: Too many parameters passed to function fSprayVariable (expected 18) (433, 358), 2061, C:\\\\Program Files\\\\HPCCSystems\\\\7.2.10\\\\clienttools\\\\share\\\\ecllibrary\\\\std\\\\File.ecl\\nError: Too many errors (max = 5); Aborting... (433, 369), 3001, C:\\\\Program Files\\\\HPCCSystems\\\\7.2.10\\\\clienttools\\\\share\\\\ecllibrary\\\\std\\\\File.ecl\\nWarning: Mismatch in major version number (7.2.10 v 6.4.36) (0, 0), 3118,\", \"post_time\": \"2019-05-16 15:08:10\" },\n\t{ \"post_id\": 26353, \"topic_id\": 6983, \"forum_id\": 9, \"post_subject\": \"Re: unable to compile any job due to std file.ecl errors\", \"username\": \"LimianFang\", \"post_text\": \"Hi,\\n\\nI am using the newest version. the errors remain if I use an older version.I didnt write any code myself. I am simplying outing a file on prod .\", \"post_time\": \"2019-05-16 15:03:21\" },\n\t{ \"post_id\": 26343, \"topic_id\": 6983, \"forum_id\": 9, \"post_subject\": \"Re: unable to compile any job due to std file.ecl errors\", \"username\": \"rtaylor\", \"post_text\": \"LimianFang,\\n\\nThese errors are self-explanatory -- you're trying to pass more parameters than the version of HPCC you're using supports. What version are you on? And are you looking at the docs for newer version (where more parameters have been added to the functions)?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-05-16 14:59:12\" },\n\t{ \"post_id\": 26333, \"topic_id\": 6983, \"forum_id\": 9, \"post_subject\": \"unable to compile any job due to std file.ecl errors\", \"username\": \"LimianFang\", \"post_text\": \"Hi,\\n\\nAfter I connected git repo and edited the compiler manually, i have been having these errors for any job , even a simple output.\\n\\nerrors are like below:\\nError: Too many parameters passed to function RenameLogicalFile (expected 2) (141, 71), 2061, \\nError: Too many parameters passed to function fSprayFixed (expected 12) (420, 222), 2061, \\n\\nit seems they are all about file.ecl in the std folder but i dont know what I can do since the file.ecl comes from the ecl installation.\\n\\nThank you!\", \"post_time\": \"2019-05-15 19:30:26\" },\n\t{ \"post_id\": 26323, \"topic_id\": 6983, \"forum_id\": 9, \"post_subject\": \"unable to compile with weird error about file.ecl\", \"username\": \"LimianFang\", \"post_text\": \"HI,\\n\\ni setup the complier and use the git repo as ECL folder.\\nI keep have errors about std file.ecl (like below) no matter what code I run, even a simple output.\\n\\nCan someone help me understand what I can do to solve this problem?\\nThank you so much!!\\n\\nError: Too many parameters passed to function RenameLogicalFile (expected 2) (141, 71), 2061, C:\\\\Program Files\\\\HPCCSystems\\\\7.0.8\\\\clienttools\\\\share\\\\ecllibrary\\\\std\\\\File.ecl\\n\\nError: Too many parameters passed to function fSprayFixed (expected 12) (420, 222), 2061,\", \"post_time\": \"2019-05-15 19:15:35\" },\n\t{ \"post_id\": 30611, \"topic_id\": 7333, \"forum_id\": 9, \"post_subject\": \"Re: Invalid Workunit\", \"username\": \"lpezet\", \"post_text\": \"I believe I've figured it out.\\n\\nI used command line to run eclcc.exe manually. When doing so, it complained MSVCR120.DLL was missing. I had Microsoft Visual C++ 2015-2019 installed (and I probably uninstalled earlier version) but Microsoft Visual C++ 2013 is needed.\\n\\nOnce I installed Microsoft Visual C++ 2013 I was back in business.\", \"post_time\": \"2020-05-14 18:50:08\" },\n\t{ \"post_id\": 30581, \"topic_id\": 7333, \"forum_id\": 9, \"post_subject\": \"Re: Invalid Workunit\", \"username\": \"lpezet\", \"post_text\": \"Hi HugoW!\\n\\nI just tried. It managed to connect to the cluster I believe (otherwise I'd get that "Unable to communicate with server" error).\\nI used the code from ECL Playground (the dataset with 3 people in it) and still no luck: it doesn't submit anything remotely, just stays "local".\\n\\nI'll look into uninstalling and installing different versions (again) then.\\n\\nThanks!\", \"post_time\": \"2020-05-13 21:12:07\" },\n\t{ \"post_id\": 30571, \"topic_id\": 7333, \"forum_id\": 9, \"post_subject\": \"Re: Invalid Workunit\", \"username\": \"hwatanuki\", \"post_text\": \"Hello lpezet,\\n\\nI have seen cases before where this could be caused by a bad ECL IDE install or connectivity issues with the cluster. \\n\\nAt this point in time, and based on the details you have provided so far, I would suggest you to setup a configuration within your current ECL IDE to the playground (play.hpccsystems.com) and give it a try with a simple code. Note that the playground is currently setup to use SSL.\\n\\nLet us know how it goes.\\n\\nHugoW\", \"post_time\": \"2020-05-13 20:46:32\" },\n\t{ \"post_id\": 30561, \"topic_id\": 7333, \"forum_id\": 9, \"post_subject\": \"Re: Invalid Workunit\", \"username\": \"lpezet\", \"post_text\": \"Hello!\\nI know this is old but I'm getting the same exact behavior right now.\\nAnd yes, I selected "Thor" and I still get "Local" stuff.\\nI can select anything I want in the "Target" dropdown (roxie, thor, thor_roxie, hthor), but it doesn't seem to matter, it's stuck to "Local" somehow.\\n\\nI was working off 6.4 when I decided to upgrade. I installed 7.8.8-1 (server is 7.8.6-1) and since then I'm stuck in "Local" mode. I tried uninstalling it and going down couple versions, but still stuck.\\n\\nAny tips what I could be looking at? (logs?? some Microsoft C++ dependencies??)\\n\\n\\nThanks for the help!\", \"post_time\": \"2020-05-13 19:50:17\" },\n\t{ \"post_id\": 27573, \"topic_id\": 7333, \"forum_id\": 9, \"post_subject\": \"Re: Invalid Workunit\", \"username\": \"swampsoda\", \"post_text\": \"Hey all, I got it fixed. \\nI downloaded Visual Studio C++ compiler and it started to work.\\nThanks for the responses!\", \"post_time\": \"2019-09-17 02:10:59\" },\n\t{ \"post_id\": 27563, \"topic_id\": 7333, \"forum_id\": 9, \"post_subject\": \"Re: Invalid Workunit\", \"username\": \"JimD\", \"post_text\": \"Swampsoda,\\n\\nIn addition. if your submitted ECL has a syntax error, your Workunit ID may begin with an L (indicating Local) because that is where the error occurred. In other words, it failed to syntax check locally before ever reaching Thor.\\n\\nDo any syntax errors display in the IDE?\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2019-09-16 15:01:26\" },\n\t{ \"post_id\": 27553, \"topic_id\": 7333, \"forum_id\": 9, \"post_subject\": \"Re: Invalid Workunit\", \"username\": \"rtaylor\", \"post_text\": \"swampsoda,\\n\\nBecause your WUID begins with "L" that tells me you're compiling "Local" -- try selecting "Thor" from the target droplist and see if that corrects the issue.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-09-16 13:13:53\" },\n\t{ \"post_id\": 27543, \"topic_id\": 7333, \"forum_id\": 9, \"post_subject\": \"Invalid Workunit\", \"username\": \"swampsoda\", \"post_text\": \"I'm trying to follow the tutorial and run the Hello World program but keep getting this Invalid Workunit error message:\\n\\nWsWorkunits.WUInfo\\n2019-09-15 12:14:27 GMT: WUInfo: Invalid Workunit ID: L20190915-081424 \\n\\nI could not find someone with a similar problem and would appreciate any help I can get.\", \"post_time\": \"2019-09-15 12:15:43\" },\n\t{ \"post_id\": 29683, \"topic_id\": 7753, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE for MacOS X\", \"username\": \"ming\", \"post_text\": \"By default HPCC Platform on VM use the second adapter: eth1 which probably is host-only adapter. The ip is 192.168.x.x. If you can ping this from your host you can use it in ECL IDE or VS Code. If you are on VPN you host may not see this private ip. You have other options, for example,\\n1) use bridge network instead of host-only for eth1\\n2) use NAT which is eth0 and "Port Forwarding" from VirtualBox Adapter1 Advanced. In this case you do need change eth1 to eth0 in /etc/HPCCSystems/environment.cfg inside VM and restart HPCC Platform: sudo /etc/init.d/hpcc-init restart\\n\\nThese two method also will help if your IDE/VS Code on a different system than VM\", \"post_time\": \"2020-02-25 13:52:27\" },\n\t{ \"post_id\": 29673, \"topic_id\": 7753, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE for MacOS X\", \"username\": \"vedant_dulori\", \"post_text\": \"Yeah, the HPCC cluster is in a VM. Also, which IP does local system IP mean?\", \"post_time\": \"2020-02-25 01:34:09\" },\n\t{ \"post_id\": 29663, \"topic_id\": 7753, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE for MacOS X\", \"username\": \"ming\", \"post_text\": \"Is the HPCC cluster in a VM?\", \"post_time\": \"2020-02-25 01:31:19\" },\n\t{ \"post_id\": 29653, \"topic_id\": 7753, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE for MacOS X\", \"username\": \"ming\", \"post_text\": \"In the link Richard provide, here is an example: \\nssh -L 8010:<hpcc cluster ip>:8010 <your local system ip>\\n\\nAfter this you can use localhost:8010 for hpcc cluster eclwatch\", \"post_time\": \"2020-02-24 20:14:18\" },\n\t{ \"post_id\": 29643, \"topic_id\": 7753, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE for MacOS X\", \"username\": \"vedant_dulori\", \"post_text\": \"Hey\\n\\nThanks for the reply. I am trying to tunnel ssh from your laptop through the VM then to the HPCC cluster and as I don't have much knowledge in networking I am facing a lot of difficulties. It would be great if you could give step by step instructions on how I can fix this issue\\n\\nThanks!\", \"post_time\": \"2020-02-24 18:10:12\" },\n\t{ \"post_id\": 29633, \"topic_id\": 7753, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE for MacOS X\", \"username\": \"DSC\", \"post_text\": \"The ECL client tools do not support proxy connections the way browsers support them. The only proxy configuration that you could use would be more like tunneling, where you set up a proxy/tunnel on a system that forwards connections to port 8010 (or whatever port you're using) to the HPCC cluster, then configure your local system to connect to the proxy system on that port. This kind of setup is useful for getting past a firewall that does not allow outbound connections to 8010 but does on a different port, the proxy maps inbound connections to a different outbound port, provided the proxy system lies outside of the firewall. That "proxy system" can be your local laptop, but that probably doesn't help in this case.\\n\\nSome more information on port forwarding can be found at https://www.ssh.com/ssh/tunneling/example if you're interested in exploring that setup.\", \"post_time\": \"2020-02-21 14:19:56\" },\n\t{ \"post_id\": 29623, \"topic_id\": 7753, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE for MacOS X\", \"username\": \"vedant_dulori\", \"post_text\": \"Hi,\\n\\nThanks a lot for the instructions. However, the difficulties I am facing are connecting to the cluster and this is because currently, I am accessing ECL Watch via a proxy server. I am starting a proxy on my laptop (eg, `ssh -ND <pass-thru-proxy>`). Then by configuring a browser to use this proxy via SOCKS, I am able to access the ECL Watch. \\n\\nThe problem is I want to develop it on my laptop. I do not know how to use ECL client tools via the pass-through. I wanted to know ways to redirect through ECL as, I think without the ability to run ECL commands locally, the IDE is quite handicapped. \\n\\nI have come across SSH tunneling to do so but have minimal knowledge in doing so. It would be great if you can suggest ways to solve this problem.\", \"post_time\": \"2020-02-19 17:52:42\" },\n\t{ \"post_id\": 29593, \"topic_id\": 7753, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE for MacOS X\", \"username\": \"DSC\", \"post_text\": \"It is not clear how far you've gotten in your set up, so I will start at the beginning.\\n\\nA good set of instructions for installing VS Code and the HPCC client tools can be found at https://wiki.hpccsystems.com/display/hpcc/VS+Code+and+HPCC+Systems+Installation+Cheat+Sheet. You may have already done this part.\\n\\nThe only remaining bit is configuring a VS Code launch.json file for ECL. Given the way VS Code works, you will need one launch.json entry for every cluster you connect to. Here is an example:\\n\\n
{\\n "name": "My-HPCC-Cluster",\\n "type": "ecl",\\n "request": "launch",\\n "mode": "submit",\\n "workspace": "${workspaceRoot}",\\n "program": "${file}",\\n "protocol": "http",\\n "serverAddress": "example.com",\\n "port": 8010,\\n "rejectUnauthorized": false,\\n "targetCluster": "thor",\\n "eclccPath": "${config:ecl.eclccPath}",\\n "eclccArgs": [],\\n "includeFolders": "${config:ecl.includeFolders}",\\n "legacyMode": "${config:ecl.legacyMode}",\\n "resultLimit": 100,\\n "user": "DSC",\\n "password": ""\\n}
\\nThe really important entries in that JSON object are:\\n\\n\\n
\\n\\nOne important thing to remember is that VS Code selects valid launch.json entries based on filename extension. All of your code file names need to have a '.ecl' extension in order to be recognized as ECL.\\n\\nHope this helps!\\n\\nDSC\", \"post_time\": \"2020-02-18 12:47:29\" },\n\t{ \"post_id\": 29573, \"topic_id\": 7753, \"forum_id\": 9, \"post_subject\": \"ECL IDE for MacOS X\", \"username\": \"vedant_dulori\", \"post_text\": \"Hi,\\n\\nI want to use an ECL IDE for MacOS X and hence tried to you the VSCode plugin but I am having difficulties configuring ECL on that. Hence, can anyone suggest how I can code ECL on MacOS and set up an environment to be able to run the ECL code on the cluster as well?\\n\\nThank you.\", \"post_time\": \"2020-02-16 02:35:01\" },\n\t{ \"post_id\": 31443, \"topic_id\": 8253, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE not compiling / submitting work units\", \"username\": \"amillar\", \"post_text\": \"Hi ghalliday.\\n\\nthanks for your help, you are absolutely correct switching from systemctl to init.d has solved our problem.\\n\\nBest Regards\\n\\nAntony\", \"post_time\": \"2020-07-09 15:48:32\" },\n\t{ \"post_id\": 31423, \"topic_id\": 8253, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE not compiling / submitting work units\", \"username\": \"ghalliday\", \"post_text\": \"See https://track.hpccsystems.com/browse/HPCC-24389\\n\\nThe problem only appears if you use systemctl to start the system. You can use the old scripts in /etc/init.d/hpcc-init to start as a work around. Fix should be in the next point release.\", \"post_time\": \"2020-07-08 15:36:54\" },\n\t{ \"post_id\": 31413, \"topic_id\": 8253, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE not compiling / submitting work units\", \"username\": \"ghalliday\", \"post_text\": \"This looks like it is probably a platform regression. I think I know the fix, but I am confused why it wasn't caught much earlier in testing.\", \"post_time\": \"2020-07-08 15:14:15\" },\n\t{ \"post_id\": 31403, \"topic_id\": 8253, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE not compiling / submitting work units\", \"username\": \"amillar\", \"post_text\": \"Thanks for the quick reply,\\n\\nagreed this seems to be a problem with the cluster, do those log files help?\\n\\nDo you need anymore information from me to help narrow down the problem?\\n\\nThanks\\n\\nAntony\", \"post_time\": \"2020-07-08 12:32:48\" },\n\t{ \"post_id\": 31393, \"topic_id\": 8253, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE not compiling / submitting work units\", \"username\": \"gsmith\", \"post_text\": \"I have tried submitting a job from the ECL playground against all targets and unfortunately that has not been successful.
\\n\\nThis rules out any issue with the ECLIDE and the Info / Warning messages can be safely ignored.\", \"post_time\": \"2020-07-08 12:23:41\" },\n\t{ \"post_id\": 31383, \"topic_id\": 8253, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE not compiling / submitting work units\", \"username\": \"amillar\", \"post_text\": \"Hi Hugo and G Smith,\\n\\nThanks for the quick reply,\\n\\nI will definitely check the preferences in the ECL IDE to see if I can solve those warnings.\\n\\nI have had a look at the operations - system services and I cannot see any issues with the ECL CC server.\\n\\nLooking at the logs, the only warning / errors I receive are : \\n\\n00000A5F PRG 2020-07-08 09:11:19.445 13998 49953 "Compile request processing for workunit W20200708-091117"\\n00000A60 USR 2020-07-08 09:11:19.452 13998 49953 "/opt/HPCCSystems/bin/eclcc: Creating PIPE program process : '/opt/HPCCSystems/bin/eclcc -shared --daemon myeclccserver - --timings --xml --nostdinc --metacache= --logfile=W20200708-091117.eclcc.log -dfs=192.168.24.120:7070 -cluster=hthor -wuid=W20200708-091117 -token=HPCC[u=,w=W20200708-091117]; -oW20200708-091117 -platform=hthor --component=myeclccserver@192.168.24.120 -fcreated_by=ws_workunits' - hasinput=1, hasoutput=0 stderrbufsize=0"\\n00000A61 PRG 2020-07-08 09:11:19.478 13998 41355 "ERROR: Unrecognised error: Error: unrecognised option --daemon"\\n00000A62 PRG 2020-07-08 09:11:19.478 13998 49953 "ERROR: Unrecognised error: Pipe: write failed (size 368), Broken pipe"\\n00000A63 USR 2020-07-08 09:11:19.478 13998 49953 "/opt/HPCCSystems/bin/eclcc: Pipe: process 41354 complete 1"\\n00000A64 PRG 2020-07-08 09:11:19.538 13998 14211 "SYS: LPT=2 APT=96 PU= 0% MU= 0% MAL=108883968 MMP=108150784 SBK=733184 TOT=106512K RAM=2642524K SWP=0K"\\n00000A65 PRG 2020-07-08 09:11:19.538 13998 14211 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=0.7 kw/s=3.9 bsy=0 NIC: [bond0] rxp/s=38.3 rxk/s=5.4 txp/s=36.6 txk/s=12.1 rxerrs=0 rxdrps=53 txerrs=0 txdrps=0 CPU: usr=0 sys=0 iow=0 idle=99"\\n00000A66 PRG 2020-07-08 09:11:31.670 13998 49953 "Compile request processing for workunit W20200708-091130"\\n00000A67 USR 2020-07-08 09:11:31.679 13998 49953 "/opt/HPCCSystems/bin/eclcc: Creating PIPE program process : '/opt/HPCCSystems/bin/eclcc -shared --daemon myeclccserver - --timings --xml --nostdinc --metacache= --logfile=W20200708-091130.eclcc.log -dfs=192.168.24.120:7070 -cluster=Thor-Large -wuid=W20200708-091130 -token=HPCC[u=,w=W20200708-091130]; -oW20200708-091130 -platform=thorlcr --component=myeclccserver@192.168.24.120 -fcreated_by=ws_workunits' - hasinput=1, hasoutput=0 stderrbufsize=0"\\n00000A68 PRG 2020-07-08 09:11:31.702 13998 41370 "ERROR: Unrecognised error: Error: unrecognised option --daemon"\\n00000A69 PRG 2020-07-08 09:11:31.702 13998 49953 "ERROR: Unrecognised error: Pipe: write failed (size 368), Broken pipe"\\n00000A6A USR 2020-07-08 09:11:31.702 13998 49953 "/opt/HPCCSystems/bin/eclcc: Pipe: process 41369 complete 1"\\n00000A6B PRG 2020-07-08 09:11:39.493 13998 49953 "Compile request processing for workunit W20200708-091139"\\n00000A6C USR 2020-07-08 09:11:39.500 13998 49953 "/opt/HPCCSystems/bin/eclcc: Creating PIPE program process : '/opt/HPCCSystems/bin/eclcc -shared --daemon myeclccserver - --timings --xml --nostdinc --metacache= --logfile=W20200708-091139.eclcc.log -dfs=192.168.24.120:7070 -cluster=Thor-Small -wuid=W20200708-091139 -token=HPCC[u=,w=W20200708-091139]; -oW20200708-091139 -platform=thorlcr --component=myeclccserver@192.168.24.120 -fcreated_by=ws_workunits' - hasinput=1, hasoutput=0 stderrbufsize=0"\\n00000A6D PRG 2020-07-08 09:11:39.525 13998 41383 "ERROR: Unrecognised error: Error: unrecognised option --daemon"\\n00000A6E PRG 2020-07-08 09:11:39.525 13998 49953 "ERROR: Unrecognised error: Pipe: write failed (size 368), Broken pipe"\\n00000A6F USR 2020-07-08 09:11:39.525 13998 49953 "/opt/HPCCSystems/bin/eclcc: Pipe: process 41382 complete 1"\\n\\n\\nI have tried submitting a job from the ECL playground against all targets and unfortunately that has not been successful.\\n\\nIf it helps we are running the platform on Ubuntu 18.04.\\n\\nHopefully the information in the log above will help narrow this down\\n\\nThanks in advance\\nAntony\", \"post_time\": \"2020-07-08 09:15:45\" },\n\t{ \"post_id\": 31373, \"topic_id\": 8253, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE not compiling / submitting work units\", \"username\": \"gsmith\", \"post_text\": \"As @hwatanuki points out, the "Warning" and "Info" logs you are seeing are probably unrelated to the underlying issue.\\n\\nIn ECL Watch can you open the ECL Playground and submit a Job to each of the target clusters (hthor, thor etc.) and see if that works?\", \"post_time\": \"2020-07-08 05:19:32\" },\n\t{ \"post_id\": 31363, \"topic_id\": 8253, \"forum_id\": 9, \"post_subject\": \"Re: ECL IDE not compiling / submitting work units\", \"username\": \"hwatanuki\", \"post_text\": \"Hello amillar,\\n\\nI have just installed a virtual cluster by using v7.8.26-1 of the platform and I do confirm that I am receiving the same informational warnings as you reported above in the ECL IDE logs panel. However, I don´t think they are causing the issues you are reporting with WU submission, as I am able to submit WU´s sucessfully despite the messages. The SOAP warning may actually be caused by a missing Attribute Server in your environment and, in this case, the warning can be eliminated by cleaning up/correcting the "Attribute Server" field in the ECL IDE preferences tab.\\n\\nBy the description of your scenario, I would suggest that you take a look at the "Operations>System Servers" tab in your ECL Watch and try to spot something wrong with the ECLCC Server status or in its log files. I was able to partially replicate the issue you describe by manually killing my eclcc daemon and trying to submit a job. Also, note that "cloning" a job from the ECL Watch doesn´t reproduce all the job submission workflow in its entirety, so it cannot be considered, in its essence, an "apples to apples" comparison to confirm that all components on the cluster side are working properly. Submitting a test job from the ECL playground panel in ECL Watch could give you a better data point in this specific case.\\n\\nHTH,\\nHugoW\", \"post_time\": \"2020-07-07 18:24:57\" },\n\t{ \"post_id\": 31353, \"topic_id\": 8253, \"forum_id\": 9, \"post_subject\": \"ECL IDE not compiling / submitting work units\", \"username\": \"amillar\", \"post_text\": \"Hi There,\\n\\ntoday we upgraded our HPCC cluster from 7.6.22-1 to 7.8.26-1 and everything seemed to go without issues.\\n\\nHowever, we are having problems submitting Work Units from the ECL IDE.\\n\\nNo matter what we submit we always get the same error : \\n\\nWARNING: SOAP 1.1 fault: SOAP-ENV:Client[no subcode]\\n"An HTTP processing error occurred"\\nDetail: [no detail]\\n Repository.cpp(350)\\nINFO: eclcc.exe relative path: C:\\\\Program Files (x86)\\\\HPCCSystems\\\\7.8.26\\\\clienttools\\\\bin\\\\eclcc.exe EclCC.cpp(791)\\nINFO: eclcc.exe relative path: C:\\\\Program Files (x86)\\\\HPCCSystems\\\\7.8.26\\\\clienttools\\\\bin\\\\eclcc.exe EclCC.cpp(791)\\nINFO: eclcc.exe relative path: C:\\\\Program Files (x86)\\\\HPCCSystems\\\\7.8.26\\\\clienttools\\\\bin\\\\eclcc.exe EclCC.cpp(791)\\nINFO: eclcc.exe relative path: C:\\\\Program Files (x86)\\\\HPCCSystems\\\\7.8.26\\\\clienttools\\\\bin\\\\eclcc.exe \\n\\n\\nI have installed ECL IDE V 7.8.26-1 to match the cluster version but that has not solved the problem.\\n\\nWe are able to "clone" exiting work units which tells me the cluster is working.\\n\\nI have attached a screen shot of the error, and you can see it doesn't seem to even compile.\\n\\nAny help would be greatly appreciated.\\n\\nThanks in advance\\n\\nAntony\", \"post_time\": \"2020-07-07 12:21:10\" },\n\t{ \"post_id\": 31993, \"topic_id\": 8363, \"forum_id\": 9, \"post_subject\": \"Re: Import names unknown module "lib_fileservices"\", \"username\": \"omnibuzz\", \"post_text\": \"Thank you, Hugo. I, unfortunately, don't have access to view the jira ticket. I am able to see the other JIRA tasks. \\n\\nIt would be great if you can actually prioritize support for VDI over 64 bit as that is a bigger issue.\\n\\nRegards,\\nSrini\", \"post_time\": \"2020-09-16 02:49:39\" },\n\t{ \"post_id\": 31983, \"topic_id\": 8363, \"forum_id\": 9, \"post_subject\": \"Re: Import names unknown module "lib_fileservices"\", \"username\": \"hwatanuki\", \"post_text\": \"Hello Srini,\\n\\nThanks for the additional testing. \\nI have submitted a JIRA ticket (https://track.hpccsystems.com/browse/HPCC-24728) for the syntax check issue with the 64bit version of the client tools on physical machines. \\nUnfortunately, I don´t have access to any Windows virtualized solutions at the moment to reproduce your testing, so I would kindly request you to either include your findings to the comments of my JIRA ticket above or submit a ticket separately.\\n\\nHTH,\\nHugoW\", \"post_time\": \"2020-09-15 22:42:56\" },\n\t{ \"post_id\": 31973, \"topic_id\": 8363, \"forum_id\": 9, \"post_subject\": \"Re: Import names unknown module "lib_fileservices"\", \"username\": \"omnibuzz\", \"post_text\": \"Hi,\\n I have also verified with other virtual machine instances running windows server 2016 and 2019 with windows 10 experience. There seems to be some problem with the filservices.dll file in any windows virtualized development environments.\\n Also, HPCC version 5.6.4 and earlier seems to be working fine in the virtualized environment, but any version above 6.0 seems to fail with the same error.\\nRegards,\\nSrini\", \"post_time\": \"2020-09-14 14:52:58\" },\n\t{ \"post_id\": 31963, \"topic_id\": 8363, \"forum_id\": 9, \"post_subject\": \"Re: Import names unknown module "lib_fileservices"\", \"username\": \"omnibuzz\", \"post_text\": \"I tried it with the 32 bit again.. And it has the same error. I also tried with an older version (7.7.48) 32 bit and I am getting the same issue. I tried to install in a windows 10 machine that is not virtualized and it seems to work fine. The problem is just in installing ECL IDE in amazon workspace.\\nRegards,\\nSrini\", \"post_time\": \"2020-09-10 18:03:21\" },\n\t{ \"post_id\": 31953, \"topic_id\": 8363, \"forum_id\": 9, \"post_subject\": \"Re: Import names unknown module "lib_fileservices"\", \"username\": \"hwatanuki\", \"post_text\": \"Hi Srini, got it now. I was able to reproduce your syntax check issue with the 64bit version of the clienttools, but the 32bit version seems to be working fine on my end. Would you mind give it another try with the 32 bit version? If the same happens to you then I guess I could submit a JIRA on this.\", \"post_time\": \"2020-09-10 16:49:57\" },\n\t{ \"post_id\": 31943, \"topic_id\": 8363, \"forum_id\": 9, \"post_subject\": \"Re: Import names unknown module "lib_fileservices"\", \"username\": \"omnibuzz\", \"post_text\": \"Thank you for the response, Hugo. The server is the same version. However, this is not even submitting the job to the server. Just compiling on the client side throws this error.\\nRegards,\\nSrini\", \"post_time\": \"2020-09-10 15:47:00\" },\n\t{ \"post_id\": 31933, \"topic_id\": 8363, \"forum_id\": 9, \"post_subject\": \"Re: Import names unknown module "lib_fileservices"\", \"username\": \"hwatanuki\", \"post_text\": \"Hello Srini,\\n\\nI have seen a similar issue recently, where a different version of lib_fileservices on the server side was causing the problem (https://hpccsystems.com/bb/viewtopic.php?f=8&t=8313). Just so we can rule out that possibility, can you please confirm what version of the platform you are using on the server side? \\n\\nJust FYI, I have tried to create a SF using a v7.10.18-1 clienttools against the playground (currently a 7.8.38-1 server side) just for testing purposes and it works fine.\\n\\nHTH,\\nHugoW\", \"post_time\": \"2020-09-10 14:59:58\" },\n\t{ \"post_id\": 31923, \"topic_id\": 8363, \"forum_id\": 9, \"post_subject\": \"Import names unknown module "lib_fileservices"\", \"username\": \"omnibuzz\", \"post_text\": \"Hi,\\n I am trying to install the latest IDE (Gold version: 7.10.18) in Amazon Workspace (windows server 2016 Data center edition). When I try to compile the code (F7 in ECL IDE), I get the following error:\\n[attachment=1:3heott9s]Error in7.10.18 build.png\\n\\nI see a similar issue tracked and fixed for MacOS here.\\n\\nI have tried to use both 32 bit and 64 bit compiler and the issue is the same. \\nI have done a verbose compilation and it gives the following error:\\n00000006 PRG 2020-09-10 09:17:29 132316 76788 "Error loading C:\\\\Program Files\\\\HPCCSystems\\\\7.10.18\\\\clienttools\\\\plugins\\\\fileservices.dll: 126 - The specified module could not be found."\\nFull log is attached for your perusal. \\n\\nI feel like I may be missing something simple but I am not able to figure it out.\\nAppreciate any help.\\nRegards,\\nSrini\", \"post_time\": \"2020-09-10 13:36:04\" },\n\t{ \"post_id\": 32303, \"topic_id\": 8413, \"forum_id\": 9, \"post_subject\": \"Re: Unable to use ECLIDE\", \"username\": \"lisa\", \"post_text\": \"Hi Richard,\\n\\nWith help from some other colleagues, I am able to log into DEV and PROD now.\\n\\nThanks.\\nLisa\", \"post_time\": \"2020-10-20 21:09:17\" },\n\t{ \"post_id\": 32293, \"topic_id\": 8413, \"forum_id\": 9, \"post_subject\": \"Re: Unable to use ECLIDE\", \"username\": \"lisa\", \"post_text\": \"I do use VPN everyday and login ECLIDE through VPN but it did not work.\\n\\nThanks,\\nLisa\", \"post_time\": \"2020-10-20 17:02:05\" },\n\t{ \"post_id\": 32283, \"topic_id\": 8413, \"forum_id\": 9, \"post_subject\": \"Re: Unable to use ECLIDE\", \"username\": \"rtaylor\", \"post_text\": \"Lisa,\\n\\nIf you're working from home then you need to be on VPN to get to the appropriate environment.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-10-20 16:52:32\" },\n\t{ \"post_id\": 32263, \"topic_id\": 8413, \"forum_id\": 9, \"post_subject\": \"Unable to use ECLIDE\", \"username\": \"lisa\", \"post_text\": \"Hi,\\n\\nI installed ECLIDE version 7.12.0 in my computer but I am not able to log into Dev and prod.It keeps showing : 1003: unable to communicate with server or a problem caused the program to stop working correctly. \\nI checked ECLIDE server and complier settings for DEV and PROD, they all seem correct. \\nI can use the same userid and password to log into DEV and PROD HPCC through browser .\\n\\nI will be very appreciate if someone can help me solve the issue.\\n\\nThanks.\\nLisa\", \"post_time\": \"2020-10-16 17:20:19\" },\n\t{ \"post_id\": 33213, \"topic_id\": 8633, \"forum_id\": 9, \"post_subject\": \"Re: issue with roundup\", \"username\": \"rtaylor\", \"post_text\": \"Michelle,\\n\\nTo expand on my previous reply, if what you need is a true "round up" that works on both positive and negative numbers, then here's my solution:DECIMAL5_2 Pos := 2.2;\\nDECIMAL5_2 Neg := -2.2;\\nROUNDUP(Pos); //3\\nROUNDUP(Neg); //-3\\n\\nMyRoundUp(REAL8 r) := IF(r < 0,TRUNCATE(r),ROUNDUP(r)); \\nMyRoundUp(Pos); //3\\nMyRoundUp(Neg); //-2
\\nHTH,\\n\\nRichard\", \"post_time\": \"2021-03-22 17:43:52\" },\n\t{ \"post_id\": 33203, \"topic_id\": 8633, \"forum_id\": 9, \"post_subject\": \"Re: issue with roundup\", \"username\": \"rtaylor\", \"post_text\": \"Michelle,\\n\\nPlease post your code that demonstrates this problem.\\n\\nRemember, this is a globally public forum, so internal LNRS workunit IDs are unavailable to anyone outside LNRS (and many inside that do not have access to your work environment).\\n\\nAlso, the ROUNDUP() docs say:The ROUNDUP function returns the rounded integer of the realvalue by rounding any decimal portion to the next larger integer value, regardless of sign.
\\nSo that "regardless of sign" means that this code is all correct:DECIMAL5_2 Pos := 2.2;\\nDECIMAL5_2 Neg := -2.2;\\nROUNDUP(Pos); //3\\nROUNDUP(Neg); //-3
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2021-03-22 11:51:15\" },\n\t{ \"post_id\": 33153, \"topic_id\": 8633, \"forum_id\": 9, \"post_subject\": \"issue with roundup\", \"username\": \"gogreen\", \"post_text\": \"I had a strange issue with Roundup function. The roundup is broken for numbers less than 0.1 (roundup to 0 instead of 1) when I calculate a decimal number and round it up to assign to a second attribute. However when I did a 2nd project using the same roundup statement and assign to a third attribute, it worked! I really want to understand the issues here since this problem is really hard to detect! It's causing unwanted result without us knowing for a while. Appreciate any input!\\n\\nMy WUID# W20210318-101352 (alpha dev).\\n\\nBest regards,\\nMichelle\", \"post_time\": \"2021-03-18 14:51:07\" },\n\t{ \"post_id\": 33473, \"topic_id\": 8743, \"forum_id\": 9, \"post_subject\": \"Re: No bookmarks file found\", \"username\": \"dehilster\", \"post_text\": \"Wolfkg:\\n\\nGreat to hear!\\n\\nWe really appreciated any feedback and if you need anything else, don't hesitate to contact us!\\n\\nDavid de Hilster\\nECL IDE\", \"post_time\": \"2021-05-12 13:40:00\" },\n\t{ \"post_id\": 33463, \"topic_id\": 8743, \"forum_id\": 9, \"post_subject\": \"Re: No bookmarks file found\", \"username\": \"wolfkg\", \"post_text\": \"dehilster:\\n\\nThank you for your response. I did have the latest version, but my link was taking me to a previous installed version as I listed above 7.10. I found the latest installed version (8.0.4-1) and started it up without getting that warning message. Thank you!\", \"post_time\": \"2021-05-12 13:23:23\" },\n\t{ \"post_id\": 33453, \"topic_id\": 8743, \"forum_id\": 9, \"post_subject\": \"Re: No bookmarks file found\", \"username\": \"dehilster\", \"post_text\": \"Wolfkg:\\n\\nThanks for the feedback. This was a known issue and resolved in August 2020. I know you said you were using the latest version, but could you verify the version of the ECL IDE you are using just to double check?\", \"post_time\": \"2021-05-12 13:10:03\" },\n\t{ \"post_id\": 33443, \"topic_id\": 8743, \"forum_id\": 9, \"post_subject\": \"No bookmarks file found\", \"username\": \"wolfkg\", \"post_text\": \"Recently, I downloaded the latest version of HPCC and ECL IDE. Each time I open up the IDE I get the warning "No bookmarks file found". I haven't been able to locate any documentation or information relating to the bookmarks file.\\n\\nVersion: community_7.10.0-1\\nServer: internal_7.8.80-1\\nCompiler: 7.10.64 community_7.10.64-rc1\\n\\nAny information on how to rectify the warning is appreciated. Thanks!\", \"post_time\": \"2021-05-11 13:36:31\" },\n\t{ \"post_id\": 34053, \"topic_id\": 8983, \"forum_id\": 9, \"post_subject\": \"Re: IMPORT $ and repository\", \"username\": \"bforeman\", \"post_text\": \"Hi,\\nIn ECL, the repository you are using is local on your machine only. To allow others to see your repository folders there is usually a version control repository set up by your team, like GitHub. \\n\\nRegarding the use of $ and IMPORT. $ is a shortcut to the name of your current folder where your ECL file is created. For example, if your folder name is Training, your new ECL file created in the Training folder can reference other EXPORTed definitions in that same folder by using:\\n\\nIMPORT Training;
\\n\\nor\\n\\nIMPORT $;
\\n\\nThe error you are reporting is telling you that the Persons definition is either in a different folder, OR, you haven't EXPORTed the Persons definition. For example:\\n\\nEXPORT Persons := DATASET('~ONLINE::Intro::Persons',Layout,FLAT);
\\n\\nPlease feel free to reach out to me directly via email if you have any additional questions.\\nrobert.foreman@lexisnexisrisk.com\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2021-09-09 13:27:47\" },\n\t{ \"post_id\": 34033, \"topic_id\": 8983, \"forum_id\": 9, \"post_subject\": \"IMPORT $ and repository\", \"username\": \"215joec\", \"post_text\": \"I am taking the LN Intro to ECL and I’m having some issues with the repository. It seems that when I add folders/files to the repository, those items are not visible on anyone else’s instance of the repository. Additionally, using IMPORT $; does not make the other EXPORTed files in the folder available. This is the code:\\n\\n//EXPORT BWR_BasicQueries := 'todo';\\nimport $;\\n$.Persons;\\ncount($.Persons);\\n\\nErr msgs rcvd: \\nError: syntax error near ";" : expected '.' (3, 10), 3002, \\nError: syntax error near ")" : expected '.' (5, 17), 3002, \\n\\nAny help greatly appreciated.\", \"post_time\": \"2021-09-08 14:34:14\" },\n\t{ \"post_id\": 34781, \"topic_id\": 9201, \"forum_id\": 9, \"post_subject\": \"Re: ESP Exception - CSoapResponseBinding:Failed to get envir\", \"username\": \"rfernandez2007\", \"post_text\": \"Hi,\\nThank you for clarifying the source of the error.\\nAfter further investigation, I found out that I had a mistake in my connection definition.\\n\\nThank you!\", \"post_time\": \"2022-01-24 16:36:26\" },\n\t{ \"post_id\": 34771, \"topic_id\": 9201, \"forum_id\": 9, \"post_subject\": \"Re: ESP Exception - CSoapResponseBinding:Failed to get envir\", \"username\": \"dehilster\", \"post_text\": \"The warning is from the ECL IDE not getting the environment information from the server. The IDE communicates with the server via SOAP calls and if the call comes back with an error, this error is displayed.\", \"post_time\": \"2022-01-24 15:44:21\" },\n\t{ \"post_id\": 34751, \"topic_id\": 9201, \"forum_id\": 9, \"post_subject\": \"ESP Exception - CSoapResponseBinding:Failed to get environme\", \"username\": \"rfernandez2007\", \"post_text\": \"Hi,\\nI'm using the ECL IDE to connect to 2 different servers.\\nBoth the servers are up and running.\\nI've checked the status of the platform and all the services seem to be running OK.\\nWhen connecting from the ECL Watch I can see the full topology and perform regular operations.\\n\\nHowever when I connect from the ECL IDE one of them works well, and the other one throws some warning messages when connecting.\\nThen the IDE starts, but the target dropdown is empty, so I cannot submit anything to the cluster.\\n\\nWhat could be the problem?\\n\\nBelow I attach some information from the config files and logs.\\n\\nThank you!!\\n\\nThese are the errors:\\n\\nWARNING: ESP Exception - CSoapResponseBinding: 2022-01-22 04:31:24 GMT: Failed to get environment information. (D:\\\\J\\\\ws\\\\8.2.6-1\\\\CE\\\\W19-3-IDE\\\\ECLIDE\\\\comms\\\\Topology.cpp, 158) SoapUtil.h(419)\\nINFO: eclcc.exe relative path: C:\\\\Program Files (x86)\\\\HPCCSystems\\\\8.2.6\\\\clienttools\\\\bin\\\\eclcc.exe \\nWARNING: ESP Exception - CSoapResponseBinding: 2022-01-22 04:31:24 GMT: Failed to get environment information. (D:\\\\J\\\\ws\\\\8.2.6-1\\\\CE\\\\W19-3-IDE\\\\ECLIDE\\\\comms\\\\Topology.cpp, 158) SoapUtil.h(419)\\nWARNING: ESP Exception - CSoapResponseBinding: 2022-01-22 04:31:24 GMT: Failed to get environment information. (D:\\\\J\\\\ws\\\\8.2.6-1\\\\CE\\\\W19-3-IDE\\\\ECLIDE\\\\comms\\\\Topology.cpp, 158) SoapUtil.h(419)\\nWARNING: ESP Exception - CSoapResponseBinding: 2022-01-22 04:31:24 GMT: Failed to get environment information. (D:\\\\J\\\\ws\\\\8.2.6-1\\\\CE\\\\W19-3-IDE\\\\ECLIDE\\\\comms\\\\Topology.cpp, 158) SoapUtil.h(419)\\nWARNING: ESP Exception - CSoapResponseBinding: 2022-01-22 04:31:24 GMT: Failed to get environment information. (D:\\\\J\\\\ws\\\\8.2.6-1\\\\CE\\\\W19-3-IDE\\\\ECLIDE\\\\comms\\\\Topology.cpp, 158) SoapUtil.h(419)\\nWARNING: ESP Exception - CSoapResponseBinding: 2022-01-22 04:31:24 GMT: Failed to get environment information. (D:\\\\J\\\\ws\\\\8.2.6-1\\\\CE\\\\W19-3-IDE\\\\ECLIDE\\\\comms\\\\Topology.cpp, 158) SoapUtil.h(419) EclCC.cpp(786)\\n
\\n\\n\\n\\nThe setting in the local environment are the same for both servers.\\nThis is the eclide.cfg file\\n\\n[Compiler]\\nLocation=C:\\\\Program Files (x86)\\\\HPCCSystems\\\\8.2.6\\\\clienttools\\\\bin\\\\eclcc.exe\\nEclWorkingFolder=C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\wu\\\\ContaboThingsboard\\nEclFolder00=C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\nEclFolder01=C:\\\\Program Files (x86)\\\\HPCCSystems\\\\8.2.6\\\\clienttools\\\\examples\\nOverrideDefaultSelection=0\\nArguments=\\nWUArguments=\\nEclFolder02=\\nEclFolder03=\\nEclFolder04=\\nEclFolder05=\\nEclFolder06=\\nEclFolder07=\\nEclFolder08=\\nEclFolder09=\\nDisableMeta=0\\n[Server]\\nTopology=http://serverIPaddress:8010/WsTopology\\nWorkunit=http://serverIPaddress:8010/WsWorkunits\\nAttribute=\\nAccount=http://serverIPaddress:8010/Ws_Account\\nSMC=http://serverIPaddress:8010/WsSMC\\nFileSpray=http://serverIPaddress:8010/FileSpray\\nDFU=http://serverIPaddress:8010/WsDfu\\nSSL=0\\nAdvanced=-1\\nServerIP=serverIPaddress\\n[Editor]\\nTabWidth=2\\nUseSpaces=0\\nMaintainIndent=1\\nLineNo=0\\nTree=1\\nTooltip=1\\nShowWS=0\\nTargetColor=0\\nSyncRepository=0\\nAutoCompleteOnDot=1\\nDoubleClickSelQualLabel=0\\n[General]\\nOpenWindowMax=0\\nAutoSaveFreq=30\\nAutoOpenResult=1\\nIgnoreServerVersion=0\\nHelpLocale=\\nUser=root\\nVersion=community_8.2.6-1\\n[Workunit]\\nResultLimit=100\\nFetchLimit=999\\nPersistLimit=3\\n[Results]\\nResultFont=Courier New\\nResultFontSize=10\\nLegacyIE=0\\n[Debug]\\nDisableInoke=0\\n[Compare]\\nIgnoreWhitespace=0\\nShowCRLF=0\\n[ActiveWorkunit]\\nRefresh=10\\n[Workunits]\\nAllUsers=0
\\n\\nThese are the processes in the server:\\n\\nroot@IoT01:/var/lib/HPCCSystems# ps -ef |grep hpcc\\nhpcc 2632464 1 0 Jan21 ? 00:00:00 /opt/HPCCSystems/bin/dafilesrv --logDir=/var/log/HPCCSystems --name=mydafilesrv --daemon\\nhpcc 2697220 1 0 00:43 ? 00:00:00 /opt/HPCCSystems/bin/eclccserver --daemon myeclccserver\\nhpcc 2697225 1 0 00:43 ? 00:00:00 /opt/HPCCSystems/bin/dfuserver --daemon mydfuserver\\nhpcc 2697226 1 0 00:43 ? 00:00:00 /opt/HPCCSystems/bin/daserver --daemon mydali\\nhpcc 2697254 1 0 00:43 ? 00:00:00 /opt/HPCCSystems/bin/agentexec --daemon myeclagent\\nhpcc 2697258 1 0 00:43 ? 00:00:00 /opt/HPCCSystems/bin/saserver --daemon mysasha\\nhpcc 2697263 1 0 00:43 ? 00:00:00 /opt/HPCCSystems/bin/toposerver --daemon mytoposerver\\nhpcc 2697264 1 0 00:43 ? 00:00:00 /opt/HPCCSystems/bin/eclscheduler --daemon myeclscheduler\\nhpcc 2697272 1 0 00:43 ? 00:00:00 /opt/HPCCSystems/bin/roxie --topology=RoxieTopology.xml --logfile --restarts=2 --stdlog=0 --daemon myroxie\\nhpcc 2697309 1 0 00:43 ? 00:00:01 /opt/HPCCSystems/bin/esp --daemon myesp\\nhpcc 2697960 1 0 00:43 ? 00:00:00 /opt/HPCCSystems/bin/thorslave_lcr --daemon thorslave_mythor_1 --master=10.220.115.2:20000 --slave=.:20100 --slavenum=1 --logDir=/var/log/HPCCSystems/mythor\\nhpcc 2697961 1 0 00:43 ? 00:00:00 /opt/HPCCSystems/bin/thormaster_lcr --daemon mythor MASTER=10.220.115.2:20000
\\n\\nAnd this are the last lines from the esp.log in the server:\\n\\n0000039D PRG 2022-01-22 01:20:14.615 2697309 2705043 "POST /FileSpray/GetDFUWorkunits.json, from 167.57.68.239"\\n0000039E USR 2022-01-22 01:20:14.615 2697309 2705043 "GetDFUWorkunits: getWorkUnitsSorted"\\n0000039F USR 2022-01-22 01:20:14.616 2697309 2705043 "GetDFUWorkunits: getWorkUnitsSorted done"\\n000003A0 PRG 2022-01-22 01:20:14.617 2697309 2705043 "TxSummary[activeReqs=1;auth=NA;contLen=40;rcv=1;handleHttp=2;user=@167.57.68.239;req=POST filespray.GETDFUWORKUNITS v1.22;total=2;]"\\n000003A1 PRG 2022-01-22 01:20:22.613 2697309 2705221 "HTTP First Line: POST /WsWorkunits/WUQuery.json HTTP/1.1"\\n000003A2 PRG 2022-01-22 01:20:22.613 2697309 2705221 "POST /WsWorkunits/WUQuery.json, from 167.57.68.239"\\n000003A3 USR 2022-01-22 01:20:22.613 2697309 2705221 "WUQuery: getWorkUnitsSorted"\\n000003A4 USR 2022-01-22 01:20:22.613 2697309 2705221 "WUQuery: getWorkUnitsSorted done"\\n000003A5 PRG 2022-01-22 01:20:22.614 2697309 2705221 "TxSummary[activeReqs=1;auth=NA;contLen=37;rcv=0;handleHttp=1;user=@167.57.68.239;req=POST wsworkunits.WUQUERY v1.84;total=1;]"\\n000003A6 PRG 2022-01-22 01:20:53.609 2697309 2705275 "HTTP First Line: POST /WsWorkunits/WUQuery.json HTTP/1.1"\\n000003A7 PRG 2022-01-22 01:20:53.609 2697309 2705275 "POST /WsWorkunits/WUQuery.json, from 167.57.68.239"\\n000003A8 USR 2022-01-22 01:20:53.609 2697309 2705275 "WUQuery: getWorkUnitsSorted"\\n000003A9 USR 2022-01-22 01:20:53.610 2697309 2705275 "WUQuery: getWorkUnitsSorted done"\\n000003AA PRG 2022-01-22 01:20:53.610 2697309 2705275 "TxSummary[activeReqs=1;auth=NA;contLen=37;rcv=0;handleHttp=1;user=@167.57.68.239;req=POST wsworkunits.WUQUERY v1.84;total=1;]"\\n000003AB PRG 2022-01-22 01:21:03.835 2697309 2697356 "SYS: LPT=274 APT=161 PU= 8% MU= 57% MAL=166014976 MMP=163069952 SBK=2945024 TOT=162916K RAM=5345492K SWP=499248K"\\n000003AC PRG 2022-01-22 01:21:03.835 2697309 2697356 "DSK: [xvdh] r/s=0.0 kr/s=0.0 w/s=0.0 kw/s=0.0 bsy=0 [xvdb] r/s=0.1 kr/s=2.4 w/s=0.0 kw/s=0.0 bsy=0 [xvdc] r/s=0.7 kr/s=13.8 w/s=12.5 kw/s=115.7 bsy=0 [xvda] r/s=0.5 kr/s=20.6 w/s=2.5 kw/s=3.5 bsy=0 NIC: [eth0] rxp/s=0.1 rxk/s=0.0 txp/s=0.1 txk/s=0.0 rxerrs=0 rxdrps=0 txerrs=0 txdrps=0 CPU: usr=5 sys=3 iow=0 idle=91"\\n000003AD USR 2022-01-22 01:21:03.967 2697309 2697371 "CInfoCacheReaderThread Activity Reader: InfoCache collected (0 seconds)."\\n000003AE PRG 2022-01-22 01:21:14.611 2697309 2705302 "HTTP First Line: POST /FileSpray/GetDFUWorkunits.json HTTP/1.1"\\n000003AF PRG 2022-01-22 01:21:14.611 2697309 2705302 "POST /FileSpray/GetDFUWorkunits.json, from 167.57.68.239"\\n000003B0 USR 2022-01-22 01:21:14.611 2697309 2705302 "GetDFUWorkunits: getWorkUnitsSorted"\\n000003B1 USR 2022-01-22 01:21:14.612 2697309 2705302 "GetDFUWorkunits: getWorkUnitsSorted done"\\n000003B2 PRG 2022-01-22 01:21:14.613 2697309 2705302 "TxSummary[activeReqs=1;auth=NA;contLen=40;rcv=0;handleHttp=2;user=@167.57.68.239;req=POST filespray.GETDFUWORKUNITS v1.22;total=2;]"\\n000003B3 PRG 2022-01-22 01:21:24.611 2697309 2705315 "HTTP First Line: POST /WsWorkunits/WUQuery.json HTTP/1.1"\\n000003B4 PRG 2022-01-22 01:21:24.611 2697309 2705315 "POST /WsWorkunits/WUQuery.json, from 167.57.68.239"\\n000003B5 USR 2022-01-22 01:21:24.611 2697309 2705315 "WUQuery: getWorkUnitsSorted"\\n000003B6 USR 2022-01-22 01:21:24.611 2697309 2705315 "WUQuery: getWorkUnitsSorted done"\\n000003B7 PRG 2022-01-22 01:21:24.612 2697309 2705315 "TxSummary[activeReqs=1;auth=NA;contLen=37;rcv=1;handleHttp=1;user=@167.57.68.239;req=POST wsworkunits.WUQUERY v1.84;total=1;]"
\", \"post_time\": \"2022-01-22 04:48:46\" },\n\t{ \"post_id\": 140, \"topic_id\": 41, \"forum_id\": 10, \"post_subject\": \"Re: Creating numbers from unicode data\", \"username\": \"bforeman\", \"post_text\": \"Hi Todd,\\n\\nPlease submit a Feature Request via the Community Issue Tracker here on this web site:\\n\\nYou can get there from this link:\\n\\nhttp://hpccsystems.com/support\\n\\nBest regards,\\n\\nBob Foreman\", \"post_time\": \"2011-07-13 13:32:41\" },\n\t{ \"post_id\": 139, \"topic_id\": 41, \"forum_id\": 10, \"post_subject\": \"Re: Creating numbers from unicode data\", \"username\": \"thildebrant\", \"post_text\": \"[quote="ghalliday":270nao4q]\\nAnd please feel free to submit a feature request for directly reading utf16... it should be possible to autodetect the format in most situations.\\n\\nWhat is the best method to submit a feature request?\\n\\nThank you,\\nTodd\", \"post_time\": \"2011-07-13 12:37:13\" },\n\t{ \"post_id\": 76, \"topic_id\": 41, \"forum_id\": 10, \"post_subject\": \"Re: Creating numbers from unicode data\", \"username\": \"ghalliday\", \"post_text\": \"Someone pointed out to me the correct syntax is\\n\\nFileTestData := DATASET('~testa', TestRow, CSV(UNICODE));\\n\\n(UTF8 is currently an undocumented synonym for UNICODE in this context.)\\n\\nI suspect we should support ,UTF8 as a synonym for ,CSV(UTF8). I'll investigate that..\", \"post_time\": \"2011-06-28 12:52:40\" },\n\t{ \"post_id\": 75, \"topic_id\": 41, \"forum_id\": 10, \"post_subject\": \"Re: Creating numbers from unicode data\", \"username\": \"robert.foreman@lexisnexis.com\", \"post_text\": \"Hi Andrew,\\n\\nThe Language Reference also mentions this:\\n\\nCasting UNICODE to VARUNICODE, STRING, or DATA is allowed, while casting to any numeric type will first implicitly cast to an ASCII STRING and then cast to the target value type.\\n\\nSee the section on type casting on page 51 of the Language Reference Manual.\\n\\nRegards,\\n\\nBob Foreman\", \"post_time\": \"2011-06-28 11:55:51\" },\n\t{ \"post_id\": 74, \"topic_id\": 41, \"forum_id\": 10, \"post_subject\": \"Re: Creating numbers from unicode data\", \"username\": \"ghalliday\", \"post_text\": \"The problem is that ,CSV on the DATASET definition implies that the input file is encoded using latin1 8 bit encoding.\\n\\nIf you replace CSV with UTF8 it will read the input file as a UTF8 file. i.e., \\n\\nFileTestData := DATASET('~testa', TestRow, UTF8);\\n\\nThe system doesn't currently support direct reading of utf16be/le, utf32 files. However the file spray does allow you to convert to/from utf16 to utf8.\\n\\nAnd please feel free to submit a feature request for directly reading utf16... it should be possible to autodetect the format in most situations.\", \"post_time\": \"2011-06-28 09:23:49\" },\n\t{ \"post_id\": 73, \"topic_id\": 41, \"forum_id\": 10, \"post_subject\": \"Creating numbers from unicode data\", \"username\": \"andrew\", \"post_text\": \"I'm having trouble creating numeric types from unicode data. I'm running the 3.0.0.2 VM through VMware Player on Windows 7 Pro 64 bit. I sprayed a UTF-16BE CSV file to my cluster, and was able to output a record with DATA and UNICODE fields in the ECL IDE. When I tried to change the field types to INTEGER or REAL, only zeroes showed up in the results window. I tried uploading UTF-8 data with the same results. So I tried spraying an ASCII file to use with the same ECL code, and then the numbers show up correctly in the results window. How can I convert unicode strings to numbers? Any help would be appreciated.\\n\\nTestRow := RECORD\\nDATA field1;\\nUNICODE field2;\\nUNICODE field3;\\nUNICODE field4;\\nEND;\\nFileTestData := DATASET('~testa', TestRow, CSV);\\nOUTPUT(FileTestData);
\\n\\nTestRow := RECORD\\nINTEGER field1;\\nREAL field2;\\nREAL field3;\\nREAL field4;\\nEND;\\nFileTestData := DATASET('~asciitextnums', TestRow, CSV);\\nOUTPUT(FileTestData);
\", \"post_time\": \"2011-06-27 23:51:51\" },\n\t{ \"post_id\": 92, \"topic_id\": 42, \"forum_id\": 10, \"post_subject\": \"Re: A short list of problems\", \"username\": \"dabayliss\", \"post_text\": \"1. I have large matrices (on the order of millions of rows) and I need to find the Eigenvalues / Eigenvectors.
\\nWell - ECL does not have direct support for Matrix computation built in. I have done matrix math of sparse matrices using a record of the form: xpos, ypos, value. Things such as add, subtract and multiply a fairly straightfoward. Determinants are a rather more interesting proposition - of course I last did that in college which was probably before you were born ...\\nIf you want to start a matrix library; especially if you want to share it; I would happy to help with the ECL side - you may need to supply (or at least remind me of) the math!\\n\\n2. I have large Web server log files and I need to do tabulation and anomaly detection. I don't think the tabulation would be hard, but trying to chain together requests into a graph might be more difficult.
What do you mean by 'chain together requests' - we have done significant work internally one this - I did not encounter any significant issues\\n\\n3. I need to rank documents based on some natural language processing (mark it zero, positive, or negative) for a large number of documents. (I did take the Advanced ECL training class, but the pattern primitives seemed to be very similar to regular expressions.)
\\nNatural language processing is a highly abused term; right up there with 'Artificial Intelligence'. What exactly are you trying to do? The ECL 'NLP' capability has two different grammers within it one is similar to RE although with extensions (extremely similar to Snobol 4 if you are familiar with that). The other is a Tomita parser ...\\n\\nDavid\", \"post_time\": \"2011-06-30 16:29:07\" },\n\t{ \"post_id\": 77, \"topic_id\": 42, \"forum_id\": 10, \"post_subject\": \"A short list of problems\", \"username\": \"cmastrange3\", \"post_text\": \"Hey all,\\n\\nI am currently doing research to see if HPCC/ECL would be able to speed up some big data problems. Any thoughts would be appreciated.\\n\\n1. I have large matrices (on the order of millions of rows) and I need to find the Eigenvalues / Eigenvectors. \\n\\n2. I have large Web server log files and I need to do tabulation and anomaly detection. I don't think the tabulation would be hard, but trying to chain together requests into a graph might be more difficult.\\n\\n3. I need to rank documents based on some natural language processing (mark it zero, positive, or negative) for a large number of documents. (I did take the Advanced ECL training class, but the pattern primitives seemed to be very similar to regular expressions.)\\n\\nI would be interested to get any thoughts on these problems.\", \"post_time\": \"2011-06-28 21:01:22\" },\n\t{ \"post_id\": 320, \"topic_id\": 43, \"forum_id\": 10, \"post_subject\": \"Re: Best tools for SOAP and WSDL development?\", \"username\": \"anthony.fishbeck@lexisnexis.com\", \"post_text\": \"FYI, There are two forms of URL you can post your JSON requests to. \\n\\nThe JSON Test page currently shows the format based on the query's WorkUnit id, but you can use a simpler format.\\n\\nhttp://localhost:8002/WsEcl/json/query/ ... son_demo.2.\\n\\n1. Replace "hthor" with the name of the queryset. \\n\\nThe queryset is usually the name of the cluster your query was deployed to, but you you can quickly verify in WsECL because it will also be the name of the node you expanded to find your query in the left hand navigation tree.\\n\\n2. Replace "json_demo.2" with the name of your query as it appears in WsECL.\", \"post_time\": \"2011-09-07 16:13:24\" },\n\t{ \"post_id\": 319, \"topic_id\": 43, \"forum_id\": 10, \"post_subject\": \"Re: Best tools for SOAP and WSDL development?\", \"username\": \"anthony.fishbeck@lexisnexis.com\", \"post_text\": \"JSON requests should be sent as an HTTP Posts with the "Content-Type" \\nset to "application/json; charset=UTF-8".\\n\\nWsECL provides an interactive tool that demonstrates how to use JSON to call a query.\\n\\nFrom the main WsEcl page, navigate to the form for your query by selecting your query on the left hand navigation tree.\\n\\nFill in the parameter values you want set in your JSON object.\\n\\nAt the bottom of the form there is an action drop down that should say "OUTPUT TABLES", change that to "JSON TEST".\\n\\nClick Submit.\\n\\nYou should see a sample JSON request similar to:\\n\\n\\n{\\n "json_demo.1": {\\n "my_id": "001"\\n }\\n}\\n
\\n\\nNotice that it contains an object with the same name as your query. The contents of that object are your query parameters.\\n\\nClick "Send Request", and you should see a JSON response containing the output from your ECL code.\\n\\n\\n{\\n "json_demo.1Response": {\\n "Results": {\\n "Result_1": {\\n "Row": [ {\\n "Result_1": "001"\\n }\\n ]\\n } \\n }\\n }\\n}\\n
\\n\\nLike the request, the response also contains a JSON object based on the name of your query but with the string "Response" at the end. Under the query response object there is an object named "Results". Under that object, each direct output from your ECL will appear using the output name as its object name.\\n\\nTony\", \"post_time\": \"2011-09-07 15:36:16\" },\n\t{ \"post_id\": 318, \"topic_id\": 43, \"forum_id\": 10, \"post_subject\": \"Re: Best tools for SOAP and WSDL development?\", \"username\": \"hli\", \"post_text\": \"Could you provide a piece of sample code about how to retrieve data from Roxie?\\nFor example:\\n\\nThe service link is: \\nhttp://localhost:8002/WsEcl/example/req ... infobyid.1\\n\\nThe only parameter is 'id' and we want to query the user with id=001.\\n\\nHow can we create and send a json request to the server?\", \"post_time\": \"2011-09-07 13:54:30\" },\n\t{ \"post_id\": 90, \"topic_id\": 43, \"forum_id\": 10, \"post_subject\": \"Re: Best tools for SOAP and WSDL development?\", \"username\": \"bforeman\", \"post_text\": \"Anthony, this is exactly what I needed. I'm using a tool called Clarion.NET that allows easy import of any Web Reference. Looking forward to trying this over the Holiday weekend. Thanks very much for your detailed reply, very much appreciated!\", \"post_time\": \"2011-06-30 11:56:46\" },\n\t{ \"post_id\": 89, \"topic_id\": 43, \"forum_id\": 10, \"post_subject\": \"Re: Best tools for SOAP and WSDL development?\", \"username\": \"anthony.fishbeck@lexisnexis.com\", \"post_text\": \"Right, that "Data Tutorial" document has sections named "Publishing your Query" and "Compile and Publish the Roxie Query" that walk you through easily turning any ECL query into a web service. \\n\\nOnce a query is published the WSDL is automatically available for use by client tools.\\n\\nA published query can be accessed through an HPCC component called WsECL ("Web Service ECL"). WsECL should be accessible at http://nnn.nnn.nnn.nnn:8002 (where nnn.nnn.nnn.nnn is your ESP Server’s IP address and 8002 is the default port for WsECL).\\n\\nAny input parameters that were declared using the STORED ECL keyword will be web service inputs, and direct (non file) outputs using the OUTPUT keyword will be web service outputs.\\n\\nA links tab provides easy access to a WSDL, a XML Schema, a Sample Request, a Sample Response, and more. The query can also be accessed through a browser using a generated form and test pages where you can interact with your query using SOAP or JSON right from your browser. (From the WsECL form, select "SOAP Test" or "JSON Test" before clicking on submit). \\n\\nClient applications can interact with the service using SOAP, JSON, Form Posts, and parameters directly embedded in a URL.\\n\\nOne common tool for developing clients is the Apache Axis library for Java, and support is tightly integrated into the .net development environment. But the idea is that these services should be inter operable with any standard web service tools. \\n\\nThere are many tools available that allow you to take the given WSDLs and XML schemas and generate client application code for just about any major development language/platform. If you have questions about specific languages or environments I may be able to help.\", \"post_time\": \"2011-06-29 23:24:26\" },\n\t{ \"post_id\": 78, \"topic_id\": 43, \"forum_id\": 10, \"post_subject\": \"Best tools for SOAP and WSDL development?\", \"username\": \"robert.foreman@lexisnexis.com\", \"post_text\": \"Hi!\\n\\nI am loving the HPCC VM Edition, and have completed the Data Tutorial PDF. On the last page of the tutorial it says the following:\\n\\n• Write client applications to access your queries using SOAP.\\n• Publish the WSDL so others can write custom applications using SOAP.\\n\\nWhat development tools would you recommend to help me write these SOAP applications, and what guide would you recommend to help me create and publish the Web Service? (WSDL)\\n\\nThanks in advance!\", \"post_time\": \"2011-06-29 14:51:34\" },\n\t{ \"post_id\": 83, \"topic_id\": 47, \"forum_id\": 10, \"post_subject\": \"Re: SuperFiles\", \"username\": \"dabayliss\", \"post_text\": \"One simple example:\\n\\nI have an ECL file called 'weblogs' that I want to process all of the weblogs I have ever recieved. I upload all the logs I have so far in a logical file and create a superfile that 'points to' that logical file.\\n\\nNow in my code I use the superfile - it all works using my uploaded weblogs. Then tomorrow we have some more weblogs - operations uploads the days worth of weblogs to a new logical file - updates the superfile to point to the original file AND the daily file.\\n\\nMy code still executes as before with no changes. Note - if I had used PERSIST technology then any persists that needed to would rebuild.\", \"post_time\": \"2011-06-29 15:44:01\" },\n\t{ \"post_id\": 82, \"topic_id\": 47, \"forum_id\": 10, \"post_subject\": \"SuperFiles\", \"username\": \"champirs\", \"post_text\": \"SuperFiles seem to be an interesting concept and are supported by ECL. To gain a better understanding, does someone have a practical example of when these should be used?\", \"post_time\": \"2011-06-29 15:30:13\" },\n\t{ \"post_id\": 100, \"topic_id\": 51, \"forum_id\": 10, \"post_subject\": \"Re: Testing for uniqueness in a set of values\", \"username\": \"bforeman\", \"post_text\": \"VERY elegant AND efficient, thank you very much!\\n\\nBob\", \"post_time\": \"2011-07-06 14:03:54\" },\n\t{ \"post_id\": 99, \"topic_id\": 51, \"forum_id\": 10, \"post_subject\": \"Re: Testing for uniqueness in a set of values\", \"username\": \"ghalliday\", \"post_text\": \"You need to make use of the form of DATASET that converts a set to a dataset. Simplest, and relatively efficient for small lists would be:\\n\\nisUnique1(set of integer x) := function\\n\\tds1 := dataset(x, { integer value1; });\\n\\tds2 := dataset(x, { integer value2; });\\n\\treturn not exists(ds1(count(ds2(ds1.value1 = ds2.value2)) > 1));\\nEND;\\n\\ni.e. count the number of times each element occurs in the list, and not duplicates if it never exceeds 1. This will typically be O(N^2)\\n\\nFor longer lists you're better off sorting, deduping and comparing the counts, which is O(Nln(N)):\\n\\nisUnique2(set of integer x) := function\\n\\tds1 := dataset(x, { integer value; });\\n\\treturn count(x) = count(dedup(sort(ds1, value), value));\\nEND;\", \"post_time\": \"2011-07-06 13:58:10\" },\n\t{ \"post_id\": 98, \"topic_id\": 51, \"forum_id\": 10, \"post_subject\": \"Testing for uniqueness in a set of values\", \"username\": \"bforeman\", \"post_text\": \"Hi team,\\n\\nWhat is the best way to test for uniqueness in a set of values? For example, if I have the following set definition:\\n\\nSetNum := [1,2,3,4,5,6,3,2,7];\\n\\nWhat's the best way to test that each element is unique?\\n\\nI was creating a training exercise where a student needs to determine the highest count in a series of counts presented to them. This is a basic phase of the training course where TRANSFORM and the ETL processes have yet to be introduced. \\n\\nAfter returning a series of COUNT values, and then using the MAX function I was able to get the maximium count in that set, but then I realized, "How do I know it was a unique value?". I ended up using WHICH and checked for equality across all count values. If WHICH returned anything other than zero, I knew that there was a duplicate in my counts.\\n\\nThis worked because I was only comparing 5 values, and using WHICH this gave me only 15 combinations to test, but in a larger set of values the combinations would be too many and WHICH would not be the best solution.\\n\\nI know that we can use DEDUP and ROLLUP to remove duplicates and salvage data when needed, but what I was looking for is something like this:\\n\\nval := ISUNIQUE(set); //returns FALSE if duplicates are found in the set\\nval2 := ISUNIQUE(recordset,field); //returns FALSE if duplicate in field\\n\\nDo we have anything in ECL similar to this that I might have overlooked? Or is there a better or simpler approach to what I need to do?\\n \\nBest Regards,\\n\\nBob Foreman\", \"post_time\": \"2011-07-06 12:30:21\" },\n\t{ \"post_id\": 278, \"topic_id\": 53, \"forum_id\": 10, \"post_subject\": \"Re: New variations of IMPORT\", \"username\": \"michael.krumlauf@lexisnexis.com\", \"post_text\": \"Thanks for the explanation.\", \"post_time\": \"2011-08-16 14:52:58\" },\n\t{ \"post_id\": 105, \"topic_id\": 53, \"forum_id\": 10, \"post_subject\": \"New variations of IMPORT\", \"username\": \"bforeman\", \"post_text\": \"In case you missed it (and I did, which is why I'm posting here), there are some new forms of IMPORT in this latest release that I'm sure all of you will find very handy. Here is a summary:\\n\\nIMPORT $; //makes all definitions from the same folder available\\nIMPORT $, Std; //makes the standard library functions available, also\\nIMPORT MyModule; //makes available the definitions from MyModule folder\\nIMPORT SomeFolder.SomeFile; //make the specific file available\\nIMPORT SomeReallyLongFolderName AS SN; //alias the long name as SN\\nIMPORT * FROM Fred; //makes everything from Fred available, unqualified\\n\\nRegards,\\n\\nBob Foreman\", \"post_time\": \"2011-07-07 12:00:33\" },\n\t{ \"post_id\": 130, \"topic_id\": 54, \"forum_id\": 10, \"post_subject\": \"Re: PROJECT vs. ROW\", \"username\": \"champirs\", \"post_text\": \"I see what you mean, Richard. I am not used to a compiler being that intuitive. So that does make me feel better from the code side. I just want to be as efficient as possible - you and David have certainly helped in that direction. \\n\\nThanks again!\\n\\nBecky\", \"post_time\": \"2011-07-12 19:28:54\" },\n\t{ \"post_id\": 128, \"topic_id\": 54, \"forum_id\": 10, \"post_subject\": \"Re: PROJECT vs. ROW\", \"username\": \"champirs\", \"post_text\": \"It does help - thanks so much. I just wasn't sure if there would be a negative impact in either case. I agree with your 'tell the truth' logic. That also makes it much easier on maintenance!\", \"post_time\": \"2011-07-12 19:26:05\" },\n\t{ \"post_id\": 110, \"topic_id\": 54, \"forum_id\": 10, \"post_subject\": \"Re: PROJECT vs. ROW\", \"username\": \"richardkchapman\", \"post_text\": \"To reinforce David's comments, so long as what you have coded is not so complicated that the code generator cannot recognise it, it will transform what you wrote into the most efficient alternative prior to generation. Thus you are usually better off writing simple ECL that the code-generator (and your fellow ECL coders) can understand more easily, than trying to second-guess the compiler by substituting a more obscure construct that you think might go faster. \\n\\nBecause the ECL expressions that the code generator is trying to optimize are side-effect free, the range of optimizations and transformations that it can (and does) apply without changing the result is pretty large.\\n\\nRichard\", \"post_time\": \"2011-07-08 06:12:50\" },\n\t{ \"post_id\": 109, \"topic_id\": 54, \"forum_id\": 10, \"post_subject\": \"Re: PROJECT vs. ROW\", \"username\": \"dabayliss\", \"post_text\": \"The simple answer is 'yes'. It is fairly each to convert a row into a dataset with a single record and to convert a dataset with a single record into a row. Therefore you nearly always have the option of one or the other.\\n\\nIn terms of 'which is better' - there is a simple rule of thumb for this (and most of ECL) - which is 'tell the truth'. If what you are trying to do is transform one dataset into another - use a project; if what you are trying to do is build a row - then use ROW.\\n\\nPossibly the easiest test is: "which one requires least code" - that is typically the method where you are telling the most truth.\\n\\nHTH\\n\\nDavid\", \"post_time\": \"2011-07-08 00:36:09\" },\n\t{ \"post_id\": 108, \"topic_id\": 54, \"forum_id\": 10, \"post_subject\": \"PROJECT vs. ROW\", \"username\": \"champirs\", \"post_text\": \"The definitions for PROJECT and ROW are as follows:\\n\\n"The PROJECT function processes through all records in the recordset performing the transform function on each record in turn."\\n\\n"The ROW function creates a single data record and is valid for use in any expression where a single record is valid."\\n\\nSo PROJECT is used to transform recordsets and ROW is used to transform a single record, correct? Is there ever a condition when using either ROW or PROJECT is a viable option? If so, which is better?\", \"post_time\": \"2011-07-07 22:34:01\" },\n\t{ \"post_id\": 116, \"topic_id\": 55, \"forum_id\": 10, \"post_subject\": \"Re: Comparing files\", \"username\": \"John.Freibaum\", \"post_text\": \"Thank you to the two of you, this is very helpful and informative.\", \"post_time\": \"2011-07-08 19:17:38\" },\n\t{ \"post_id\": 115, \"topic_id\": 55, \"forum_id\": 10, \"post_subject\": \"Re: Comparing files\", \"username\": \"dabayliss\", \"post_text\": \"Well,\\n\\nYou do (probably) want a left only join; but you need to be careful regarding the join condition.\\n\\nIF your file has a unique record id, then you can do a simple left only join -\\n\\nNewRecs := JOIN(NewFile,OldFile,LEFT.UniqueID=RIGHT.UniqueID,TRANSFORM(LEFT),LEFT ONLY);\\n\\nIf your file does NOT have a unique ID - but each record is unique then you can do:\\n\\nNewRecs := JOIN(NewFile,OldFile,LEFT.Field1=RIGHT.Field1 AND LEFT.Field2=RIGHT.Field2 ... LEFT.FieldN=RIGHT.FieldN,TRANSFORM(LEFT),LEFT ONLY);\\n\\nIf either or both files might contain complete duplicates then you really want to dedup both sides first (or you will get a cross-product out of the join. Thus\\n\\nN1 := DEDUP(NewFile,WHOLE RECORD,ALL);\\nN2 := DEDUP(OldFile,WHOLE RECORD,ALL);\\n\\n// Perform the JOIN on these\\n\\nIf you only want to know if there is new data - but you don't care what it is - then actually you can cheat:\\n\\nIF ( COUNT(DEDUP(OldFile,WHOLE RECORD,ALL))<>COUNT(DEDUP(OldFile+NewFile,WHOLE RECORD,ALL)),'New Data','None');\\n\\nIncidentally, the whole business of turning a stream of database snapshots into date-denoted basefile is a moderately sticky business; and is one of the features of our SALT tool (that generates ECL)\\n\\nDavid\\n[quote="John.Freibaum":3hla5e52]What is the best way to determine that a new file contains new records when compared to the current file using ECL?\", \"post_time\": \"2011-07-08 19:10:23\" },\n\t{ \"post_id\": 112, \"topic_id\": 55, \"forum_id\": 10, \"post_subject\": \"Re: Comparing files\", \"username\": \"gmwitz\", \"post_text\": \"The best way would be to use a LEFT ONLY Join.\\nAn example of a left only join would be :\\n\\nj_new_recs := join(ds,\\t\\t\\t\\t\\t\\t\\t\\t\\t ds_father,\\n\\t\\t left.FIELD = right.FIELD,\\n\\t\\t transform(recordof(ds), self := left),\\n\\t left only,\\n\\t\\t local);\\n\\nds = Current Dataset\\nds_father = Father Dataset\\nFIELD = Any field that is in both the ds and ds_father layout.\", \"post_time\": \"2011-07-08 15:17:27\" },\n\t{ \"post_id\": 111, \"topic_id\": 55, \"forum_id\": 10, \"post_subject\": \"Comparing files\", \"username\": \"John.Freibaum\", \"post_text\": \"What is the best way to determine that a new file contains new records when compared to the current file using ECL?\", \"post_time\": \"2011-07-08 15:10:35\" },\n\t{ \"post_id\": 171, \"topic_id\": 57, \"forum_id\": 10, \"post_subject\": \"Re: How can I remove duplicate records from a file?\", \"username\": \"John.Freibaum\", \"post_text\": \"Thank you for your help.\", \"post_time\": \"2011-07-19 15:09:34\" },\n\t{ \"post_id\": 142, \"topic_id\": 57, \"forum_id\": 10, \"post_subject\": \"Re: How can I remove duplicate records from a file?\", \"username\": \"joecella\", \"post_text\": \"If you really want a count based on column1, TABLE is the way to go:\\n\\nECL:\\ntheRecord := {\\n string1 column1;\\n string1 column2;\\n string1 column3;\\n string1 column4;\\n string1 column5;\\n};\\n\\ninlineData := dataset([\\n {'A','B','C','D','E'},\\n {'A','C','D','E','F'},\\n {'A','D','E','F','G'},\\n {'B','B','C','D','E'},\\n {'B','C','D','E','F'}],theRecord);\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t \\ntableRecord := {\\n inlineData.column1;\\n unsigned4 cnt := count(group);\\n};\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t \\n\\ncolumn1Counts := table(inlineData,tableRecord,column1);\\noutput(column1Counts);\\n\\nResult:\\n\\ncolumn1\\tcnt\\nB 2\\nA 3\", \"post_time\": \"2011-07-13 15:31:36\" },\n\t{ \"post_id\": 125, \"topic_id\": 57, \"forum_id\": 10, \"post_subject\": \"Re: How can I remove duplicate records from a file?\", \"username\": \"richard.taylor@lexisnexis.com\", \"post_text\": \"That's what the DEDUP function does. You simply need to SORT the dataset by column 1, then DEDUP by column 1, like this:\\n\\n s := SORT(ds,Column1);\\n d := DEDUP(s,LEFT.Column1 = RIGHT.Column1);\\n OUTPUT(d);\\n\\nThis will keep the first record for each duplicate Column1 value.\", \"post_time\": \"2011-07-12 14:17:07\" },\n\t{ \"post_id\": 124, \"topic_id\": 57, \"forum_id\": 10, \"post_subject\": \"How can I remove duplicate records from a file?\", \"username\": \"John.Freibaum\", \"post_text\": \"I have a file that contains a number of duplicate records in the first column, the other 4 columns contain data about each record in the first column but I only want to see one record for each category in the first column to get an accurate count. For example: I have one category called “A” that consists of 3 “A” records but I only want to see 1 “A” record.\\n\\nBefore duplicate “A” records are filtered from column1 \\nColumn1 Column2 Column3 Column4 Column5\\nA \\nA \\t \\nA \\t \\n \\nAfter duplicate “A” records are filtered from Column2\\nColumn1 Column2 Column3 Column4 Column5\\nA \\t \\n\\nHow can this be accomplished using ECL?\", \"post_time\": \"2011-07-12 13:53:57\" },\n\t{ \"post_id\": 149, \"topic_id\": 61, \"forum_id\": 10, \"post_subject\": \"Re: time with milliseconds\", \"username\": \"ghalliday\", \"post_text\": \"Try\\nimport std.system.debug;\\n\\ndebug.msTick();\\n\\nBeware that definitions aren't assignments, and because the function isn't pure it will tend to be re-evaluated each time it is used. This can lead to some confusing results if you're not careful. E.g.,\\n\\nstartTime := debug.msTick();\\ndoSomethingComplex();\\noutput(debug.msTick()-startTime);\\nWill typically output 0. startTime is evaluated at the same time as debug.msTick()!\\n\\nAssigning startTime into a row, or using independent e.g.,\\n\\nstartTime := debug.msTick() : independent;\\n\\nare some ways of avoiding this.\\n\\n\\nYou need to be careful about when the expression is evaluated.\", \"post_time\": \"2011-07-14 09:59:31\" },\n\t{ \"post_id\": 144, \"topic_id\": 61, \"forum_id\": 10, \"post_subject\": \"time with milliseconds\", \"username\": \"champirs\", \"post_text\": \"Hello! I see there is an attribute (ut.getTime()) which returns hrs/mins/secs. Is there an existing attribute which will return milliseconds?\\nThanks!\", \"post_time\": \"2011-07-13 20:05:56\" },\n\t{ \"post_id\": 151, \"topic_id\": 63, \"forum_id\": 10, \"post_subject\": \"Re: Data extract to RDBMS\", \"username\": \"bforeman\", \"post_text\": \"Hi Nirupa,\\n\\nThis FAQ on our web site might be helpful to you:\\n \\nHow can HPCC help me with my existing RDBMS? \\n\\nWhether you are using an ISAM (Indexed Sequential Access Method) or SQL file system, the HPCC can be a great resource for analyzing and reporting on your existing data, particularly if your data is starting to get very large and hard to manage on your existing system. All that you need to do is export your data files to either a fixed length format, CSV (comma separated values) format, or XML format, and then copy them to the HPCC Landing (or Drop) Zone and spray them to the THOR Data Refinery in HPCC. After that, your related files can be quickly joined and transformed using one of many ECL transformation functions. Your results can be stored in new tables and later indexed for faster access on the Roxie Data Delivery Engine which is also built-in to your HPCC. \\n\\n*****\\nAlthough this FAQ describes getting data into HPCC from a RDBMS, the opposite process is very similar. From HPCC, you would simply despray your THOR, CSV, or XML file back to your landing zone, and then use the import tools of your RDBMS to merge or import the data back to your target system.\\n\\nRegards,\\n\\nBob Foreman\", \"post_time\": \"2011-07-14 13:07:42\" },\n\t{ \"post_id\": 147, \"topic_id\": 63, \"forum_id\": 10, \"post_subject\": \"Data extract to RDBMS\", \"username\": \"NirupaRavilla\", \"post_text\": \"How can HPCC data (or a subset of data) be extracted to a relational database?\", \"post_time\": \"2011-07-13 23:28:18\" },\n\t{ \"post_id\": 177, \"topic_id\": 71, \"forum_id\": 10, \"post_subject\": \"Re: simple data profiling code\", \"username\": \"ghalliday\", \"post_text\": \"For your information, the efficiency of \\n\\noutput(m_Lengths); or output(m_maxLengths); \\n\\nwhen there is no output(m_Dataset)\\n\\nis improved (when coming from a csv file) in the next (3.1) release of the platform.\", \"post_time\": \"2011-07-20 08:44:33\" },\n\t{ \"post_id\": 175, \"topic_id\": 71, \"forum_id\": 10, \"post_subject\": \"Re: simple data profiling code\", \"username\": \"aintnomyth\", \"post_text\": \"Awesome, thanks for the help and the quick reply.\", \"post_time\": \"2011-07-19 20:18:35\" },\n\t{ \"post_id\": 174, \"topic_id\": 71, \"forum_id\": 10, \"post_subject\": \"Re: simple data profiling code\", \"username\": \"ghalliday\", \"post_text\": \"PROJECT and TABLE (in its normal form) generate an output row for each input row.\\n\\nA maximum for a single field could be calculated using\\n\\noutput(max(m_Lengths, f1));\\n\\nTo calculate them all at the same time you want to use the aggregating form of TABLE.\\n\\nm_maxLengths := TABLE(m_Lengths, { maxf1 := MAX(group, f1), maxf2 := MAX(group, f2), maxfn := MAX(group, fn) });\\n\\noutput(m_maxLengths);\", \"post_time\": \"2011-07-19 18:57:49\" },\n\t{ \"post_id\": 173, \"topic_id\": 71, \"forum_id\": 10, \"post_subject\": \"simple data profiling code\", \"username\": \"aintnomyth\", \"post_text\": \"I'm running into issues (likely user error) with a simple data profiling task.\\n\\nThe first file I'm working with is a 26 field tab delimited file. I sprayed it using CSV (\\\\t delimiter) and the output looks correct for the 100 record sample.\\n\\nI wanted to find the min/max field lengths so I used a PROJECT/TRANSFORM. The code looks approximately like this:\\n\\nm_Name := 'somepath::somefile';\\nm_Format := RECORD\\n\\tstring f1;\\n\\tstring f2;\\n\\tstring fn;\\nEND;\\n\\nm_Dataset := DATASET(m_Name, m_Format,\\tCSV(HEADING(1))\\t);\\n\\nm_FormatLength GetLengths(m_Format L) := TRANSFORM\\n\\tSELF.f1 := LENGTH(L.f1);\\n\\tSELF.f2 := LENGTH(L.f2);\\n\\tSELF.fn := LENGTH(L.fn);\\nEND;\\n\\nm_Lengths := PROJECT(m_Dataset,GetLengths(LEFT)); \\n\\noutput(m_Dataset);\\noutput(m_Lengths);\\n
\\nThis also looked correct for the 100 record sample. \\n\\nFor the next step I want to produce 1 row listing the max length for fields f1 through fn. I tried another PROJECT/TRANSFORM but that listed every row instead of just 1. Next I tried a RECORD/TABLE and here is the interesting part; it seems that forcing the iteration over the entire file causes "Result 1" (m_Dataset) to cram everything into field 1. Before I go off the deep end investigating the file format, is this a common user-error with a common fix?\\n\\nThanks in advance!\", \"post_time\": \"2011-07-19 18:34:01\" },\n\t{ \"post_id\": 20863, \"topic_id\": 73, \"forum_id\": 10, \"post_subject\": \"Re: Working with Multi-Layout Fixed Length File\", \"username\": \"rtaylor\", \"post_text\": \"Jim,\\n\\nPersonally, I would go with your B solution. \\n\\nIt doesn't hurt anything to have a nested child dataset that contains no records, because nested child datasets are inherently variable-length records and my understanding is that an empty child dataset takes up no room in the record.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-02-22 14:17:06\" },\n\t{ \"post_id\": 20853, \"topic_id\": 73, \"forum_id\": 10, \"post_subject\": \"Re: Working with Multi-Layout Fixed Length File\", \"username\": \"jwilt\", \"post_text\": \"Possibly another alternative:\\n\\n// C) Contains child datasets with conditional maxcounts\\nlPar := RECORD\\n INTEGER id;\\n BOOLEAN hasChld1;\\n BOOLEAN hasChld2;\\n DATASET(lChd1) ds1 {maxcount(if(self.hasChld1, 10, 0))};\\n DATASET(lChd2) ds2 {maxcount(if(self.hasChld2, 20, 0))};\\nEND;\\n
\\nAny benefit here?\", \"post_time\": \"2018-02-21 20:07:40\" },\n\t{ \"post_id\": 20843, \"topic_id\": 73, \"forum_id\": 10, \"post_subject\": \"Re: Working with Multi-Layout Fixed Length File\", \"username\": \"jwilt\", \"post_text\": \"[quote="ghalliday":27jx4ujb]Once you've read your data in, it is worth converting it (e.g., using PROJECT) to a single format that doesn't contain IFBLOCKs since the systems tends to process rows more efficiently if they don't contain conditional fields.\\n\\nSeverely belated follow-up question, if I may...\\nWhich of the following would be (generally) more efficient?\\n\\n// A) Contains child datasets only conditionally\\nlPar := RECORD\\n INTEGER id;\\n BOOLEAN hasChld1;\\n BOOLEAN hasChld2;\\n IFBLOCK(SELF.hasChld1)\\n DATASET(lChd1) ds1;\\n END;\\n IFBLOCK(SELF.hasChld2)\\n DATASET(lChd2) ds2;\\n END;\\nEND;\\n\\n// B) Always contains (possibly empty) child datasets\\nlPar := RECORD\\n INTEGER id;\\n DATASET(lChd1) ds1;\\n DATASET(lChd2) ds2;\\nEND;\\n
\\n\\nYour comment above, Gavin, seems to indicate that B) would be more efficient?\\nThanks.\", \"post_time\": \"2018-02-21 18:57:58\" },\n\t{ \"post_id\": 191, \"topic_id\": 73, \"forum_id\": 10, \"post_subject\": \"Re: Working with Multi-Layout Fixed Length File\", \"username\": \"vfpeter\", \"post_text\": \"You guys rock! \\n\\nRichard, thanks for the example. \\n\\nI made some minor changes as I already had the layout defined previously.\\n\\n
MultiRec := RECORD\\n STRING3 RecSeq;\\n STRING1 RecType;\\n\\tIFBLOCK(SELF.RecType = 'A')\\n\\t\\tMyLayoutA AND NOT [RecordSequence, RecordType] NewRec_A;\\n\\tEND;\\n\\tIFBLOCK(SELF.RecType = 'B')\\t\\n\\t\\tMyLayoutB AND NOT [RecordSequence, RecordType] NewRec_B;\\n\\tEND;\\n\\tIFBLOCK(SELF.RecType = 'C')\\t\\n\\t\\tMyLayoutC AND NOT [RecordSequence, RecordType] NewRec_C;\\n\\tEND;\\n\\tIFBLOCK(SELF.RecType = 'D’)\\n\\t\\tMyLayoutD AND NOT [RecordSequence, RecordType] NewRec_D;\\n\\tEND;\\n\\tIFBLOCK(SELF.RecType = 'E')\\t\\n\\t\\tMyLayoutE AND NOT [RecordSequence, RecordType] NewRec_E;\\n\\tEND;\\n\\tIFBLOCK(SELF.RecType = 'F')\\t\\n\\t\\tMyLayoutF AND NOT [RecordSequence, RecordType] NewRec_F;\\n\\tEND;\\n\\tSTRING1 EOL;\\nEND;\\n\\nds1 := DATASET('~TEST::MULTILAYOUT::TestInputData'', MultiRec, thor);\\nOUTPUT(ds1,NAMED('FinalRead'));
\\n\\n[quote="ghalliday":2ztslr93]Once you've read your data in, it is worth converting it (e.g., using PROJECT) to a single format that doesn't contain IFBLOCKs since the systems tends to process rows more efficiently if they don't contain conditional fields.\\nThat is exactly what I plan to do, after I read the data. \\n\\nThanks for all the help.\\nPeter\", \"post_time\": \"2011-07-25 20:05:28\" },\n\t{ \"post_id\": 190, \"topic_id\": 73, \"forum_id\": 10, \"post_subject\": \"Re: Working with Multi-Layout Fixed Length File\", \"username\": \"ghalliday\", \"post_text\": \"Once you've read your data in, it is worth converting it (e.g., using PROJECT) to a single format that doesn't contain IFBLOCKs since the systems tends to process rows more efficiently if they don't contain conditional fields.\", \"post_time\": \"2011-07-25 15:39:51\" },\n\t{ \"post_id\": 189, \"topic_id\": 73, \"forum_id\": 10, \"post_subject\": \"Re: Working with Multi-Layout Fixed Length File\", \"username\": \"richard.taylor@lexisnexis.com\", \"post_text\": \"I created this file in a text editor:\\n\\n1AAAAABBBBBCCCCCDDDDD\\n2AABBCCDDEEFFGGHHIIJJ\\n1FFFFFGGGGGHHHHHIIIII\\n2FFGGHHIIJJKKLLMMNNOO\\n\\nThen I uploaded it to the VM and did a spray fixed with rec length 23 (CRLF added by text editor). This code reads the file and splits out the separate structures perfectly (you'll need to look at the result through the ECL Watch page, not the Results tab of the ECL IDE):\\n\\n\\nMultiRec := RECORD\\n STRING1 RecType;\\n\\tIFBLOCK(SELF.RecType = '1')\\n\\t STRING5 F1_1;\\n\\t STRING5 F2_1;\\n\\t STRING5 F3_1;\\n\\t STRING5 F4_1;\\n\\tEND;\\n\\tIFBLOCK(SELF.RecType = '2')\\n\\t STRING2 F1_2;\\n\\t STRING2 F2_2;\\n\\t STRING2 F3_2;\\n\\t STRING2 F4_2;\\n\\t STRING2 F5_2;\\n\\t STRING2 F6_2;\\n\\t STRING2 F7_2;\\n\\t STRING2 F8_2;\\n\\t STRING2 F9_2;\\n\\t STRING2 F10_2;\\n\\tEND;\\n\\tSTRING2 CRLF;\\nEND;\\nds := dataset('~TEST::MULTILAYOUT::InputData',MultiRec,flat);\\n\\nOUTPUT(ds);\\n
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2011-07-25 15:26:41\" },\n\t{ \"post_id\": 188, \"topic_id\": 73, \"forum_id\": 10, \"post_subject\": \"Re: Working with Multi-Layout Fixed Length File\", \"username\": \"vfpeter\", \"post_text\": \"Thanks for the quick response David . I will try this out.\\n\\nPeter.\", \"post_time\": \"2011-07-25 14:57:50\" },\n\t{ \"post_id\": 187, \"topic_id\": 73, \"forum_id\": 10, \"post_subject\": \"Re: Working with Multi-Layout Fixed Length File\", \"username\": \"dabayliss\", \"post_text\": \"Ahh ... the old Cobol Copybook ... looks nostalgic
\\n\\nThis is one of the first 'nasty' things ECL had to support; if you look in the language reference under record structure - there is a capability called IFBLOCK - it essentially allows you to introduce a section of a fixed length field record that only exists dependant upon an expression based upon prior fields ...\\n\\nIt is a while since I have actually used the feature myself - but if you have any questions please ask - I can blow out the cobwebs ...\\n\\nDavid\", \"post_time\": \"2011-07-25 14:22:59\" },\n\t{ \"post_id\": 186, \"topic_id\": 73, \"forum_id\": 10, \"post_subject\": \"Working with Multi-Layout Fixed Length File\", \"username\": \"vfpeter\", \"post_text\": \"I have sprayed a fixed length file. Although the length of each line is the same, the layout of each line may or may not be the same. Say I have 5 layouts (L1, L2, L3, L4 and L5). A transaction may consist of 1 to n of such layouts (say L1, L2, L2, L3, L3, L4, L5). Another transaction may look like L1, L2, L3, L4, L4, L4 ,L4, L5, L5.\\n\\nMy dilemma is that I cannot have a common layout to read all the lines. However, I can parse the 3 character to determine the Layout that the line is following.\\n\\n Is there a ECL Design Pattern that I can use to tackle this efficiently
\", \"post_time\": \"2011-07-25 12:29:00\" },\n\t{ \"post_id\": 205, \"topic_id\": 74, \"forum_id\": 10, \"post_subject\": \"Re: Spray CSV for a tab delimited file\", \"username\": \"aintnomyth\", \"post_text\": \"Thanks for the replies, I'll specify the delimiter in ECL for now.\", \"post_time\": \"2011-07-28 14:15:51\" },\n\t{ \"post_id\": 202, \"topic_id\": 74, \"forum_id\": 10, \"post_subject\": \"Re: Spray CSV for a tab delimited file\", \"username\": \"ghalliday\", \"post_text\": \"In a word, No (!)\\n\\nIf the separator/quote/terminator are not specified in the ECL and the file was sprayed, then it should pick up the settings from the spray.\\n\\nIt looks like it might be a spelling mistake in the tag name (seperate v separate) that may be causing the problem. I'll open a bug.\", \"post_time\": \"2011-07-28 08:21:38\" },\n\t{ \"post_id\": 193, \"topic_id\": 74, \"forum_id\": 10, \"post_subject\": \"Re: Spray CSV for a tab delimited file\", \"username\": \"richard.taylor@lexisnexis.com\", \"post_text\": \"
Must I explicitly define the dataset using CSV(SEPARATOR('\\\\t')) in ECL when the Spray CSV operation already has "\\\\t" in the Separator field?
\\n\\nIn a word, Yes.\\n\\nSpray is an operation that only gets the data file into the system so you may use it. It is part of the Distributed File Utility -- part of the infrastructure of the HPCC. The Spray CSV page can spray -any- variable-length data file (with a record delimiter), not just field-delimited files like CSV or tab-delimited.\\n\\nThe ECL definition of the file needs to define the file as it is on disk. That's why the DATASET must specify the SEPARATOR if it is anything other than the default comma.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2011-07-26 15:45:23\" },\n\t{ \"post_id\": 192, \"topic_id\": 74, \"forum_id\": 10, \"post_subject\": \"Spray CSV for a tab delimited file\", \"username\": \"aintnomyth\", \"post_text\": \"I'm spraying a tab delimited file. From the [Spray CSV] page I select "\\\\t" in the Separator field to indicate that the file is delimited by tabs. I use the file in ECL with this code:\\n\\nm_Dataset := DATASET(m_Name, m_Format,\\tCSV(HEADING(1))\\t);
\\n\\nMy test file has 26 fields and the above code splits fields properly (I see data in all 26 columns of the Result pane) until I perform an operation that iterates over the entire file, at which point it seems to revert to comma delimited splitting behavior (the entire line is dumped into columns 1 through 3 depending on how many commas were found). \\n\\nWhen I explicitly define the separator in ECL code it does split the fields as expected:\\nm_Dataset := DATASET(m_Name, m_Format,\\tCSV(SEPARATOR('\\\\t'))\\t );
\\n\\nIs this user error or a bug or just one of those things?\\n\\nThanks!\\n\\n[edit]\\nSorry, I read this again and realized I need to define my question a little better:\\n\\nMust I explicitly define the dataset using CSV(SEPARATOR('\\\\t')) in ECL when the Spray CSV operation already has "\\\\t" in the Separator field? \\n\\nJust curious, thanks again!\", \"post_time\": \"2011-07-25 21:08:35\" },\n\t{ \"post_id\": 647, \"topic_id\": 78, \"forum_id\": 10, \"post_subject\": \"Re: working with dates\", \"username\": \"ghalliday\", \"post_text\": \"Version 3.4 will contain the start of a date manipulation library. \\n\\nIn particular it provides\\n- a date type (stored as YYYYMMDD in a decimal representation)\\n- a days type.\\n- Functions to convert julian and gregorian calendar days to days since ...\\n- todays date\\n- Functions to convert dates to and from strings.\\n- Other utility functions (is leap year, months between).\\n\\nI believe it contains the core functionality you need to do most date processing, but I am sure there are capabilities it would be useful to include. Suggestions and implementation of enhancements gratefully received.\\n\\nThere should also be a time module, and probably support for imestamps including both elements. Those are simpler to implement, so the date was considered the priority.\", \"post_time\": \"2011-11-29 12:17:39\" },\n\t{ \"post_id\": 636, \"topic_id\": 78, \"forum_id\": 10, \"post_subject\": \"Re: working with dates\", \"username\": \"sasi\", \"post_text\": \"I wish ECL has DATE manipulation library. Everyone writing this same function is waste of time.\", \"post_time\": \"2011-11-27 23:24:40\" },\n\t{ \"post_id\": 221, \"topic_id\": 78, \"forum_id\": 10, \"post_subject\": \"Re: working with dates\", \"username\": \"aintnomyth\", \"post_text\": \"Exactly what I was looking for, thanks for the help.\", \"post_time\": \"2011-08-02 13:01:13\" },\n\t{ \"post_id\": 220, \"topic_id\": 78, \"forum_id\": 10, \"post_subject\": \"Re: working with dates\", \"username\": \"bforeman\", \"post_text\": \"We actually teach this one in our training classes. We start with a FUNCTION that converts an 8 character date to its Julian Date:\\n\\nEXPORT Z2JD(STRING8 Zdate) := FUNCTION\\n// adapted from an algorithm described here:\\n// http://quasar.as.utexas.edu/BillInfo/JulianDatesG.html\\n A(Y) := TRUNCATE(Y/100);\\n B(Aval) := TRUNCATE(Aval/4);\\n C(Y) := 2-A(Y)+B(A(Y));\\n\\n E(Y) := TRUNCATE(365.25 * (Y+4716));\\n F(M) := TRUNCATE(30.6001 * (M+1));\\n \\n Yval := IF( (INTEGER1)(Zdate[5..6]) < 3,(INTEGER2)(Zdate[1..4])-1,(INTEGER2)(Zdate[1..4]));\\n Mval := IF( (INTEGER1)(Zdate[5..6]) < 3,(INTEGER1)(Zdate[5..6])+12,(INTEGER1)(Zdate[5..6]));\\n Dval := (INTEGER1)(Zdate[7..8]);\\n\\n RETURN IF(Zdate='',0,TRUNCATE(C(Yval) + Dval + E(Yval) + F(Mval)- 1524.5));\\nEND;\\n\\nAfter that, it's a simple matter to write a function to compare two Julian dates:\\n\\nWithinDays(STRING8 ldate,\\n STRING8 rdate,\\n\\t INTEGER days) := ABS(Z2JD(ldate)-Z2JD(rdate)) <= days;\\n\\nOnce you get the date range, you can bang out months or years from this if needed.\\n\\nRegards,\\n\\nBob Foreman\", \"post_time\": \"2011-08-02 12:20:52\" },\n\t{ \"post_id\": 217, \"topic_id\": 78, \"forum_id\": 10, \"post_subject\": \"working with dates\", \"username\": \"aintnomyth\", \"post_text\": \"Does anyone have any code snippets for computing the number of days/months/years between two dates?\\n\\nI could probably bang it out but I figured I would ask first...\\n\\nThanks!\", \"post_time\": \"2011-08-01 19:41:09\" },\n\t{ \"post_id\": 258, \"topic_id\": 86, \"forum_id\": 10, \"post_subject\": \"Re: CSV with headers and footers\", \"username\": \"bforeman\", \"post_text\": \"Hello,\\n\\nAFAIK, there is no FOOTER option similar to the HEADER option provided with CSV files. I can think of many ways of handling footers, or any record for that matter that would not meet your criteria. One way would be to create a simply PROJECT with a TRANSFORM, and then in the TRANSFORM conditionally SKIP those records that you want to reject.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2011-08-11 13:35:24\" },\n\t{ \"post_id\": 253, \"topic_id\": 86, \"forum_id\": 10, \"post_subject\": \"CSV with headers and footers\", \"username\": \"aintnomyth\", \"post_text\": \"Hello,\\nDoes the DATASET function support removing footer records similarly to the CSV(HEADING(1)) syntax?\\n\\nThanks!\", \"post_time\": \"2011-08-11 12:11:02\" },\n\t{ \"post_id\": 265, \"topic_id\": 88, \"forum_id\": 10, \"post_subject\": \"Re: working with free form docs\", \"username\": \"Tony Kirk\", \"post_text\": \"I think you'll want to define the DATASET as CSV (instead of THOR) and match the options (separator, quote, terminator) you used when spraying.\\n\\nAs defined (THOR dataset, STRING field of variable length), the first four bytes (as an UNSIGNED4) of the string are expected to tell its length, and it is probably trying to allocate enough memory to load that field/record based upon that integer. If the DATASET is defined as CSV, it will parse to the separator/terminator, and that may solve it.\", \"post_time\": \"2011-08-11 14:54:56\" },\n\t{ \"post_id\": 262, \"topic_id\": 88, \"forum_id\": 10, \"post_subject\": \"working with free form docs\", \"username\": \"mjwalshe\", \"post_text\": \"Hi i am trying to understand how to manipulate free text file and am hitting a bit of a problem:\\n\\nTo start my aim is to count the number of words in a text file (before trying feature extract and clustering) I am using a text file from the Guttenberg project. I have sprayed the file using the csv option clearing out the clear out the separator and quote values.\\n\\nWhich succeeds and gives me a file wordcount::roundup \\nWorking from the fixed length example I think I know how it should go, I defined how to layout the file like this. \\n\\nEXPORT Layout_Book := RECORD\\n\\tSTRING Line;\\nEND;\\n
\\nI then Export the file like this\\n\\nIMPORT WordCount;\\nEXPORT File_Book := DATASET('~wordcount::roundup',WordCount.Layout_Book,THOR);
\\n\\nAnd then when I then try and do something \\n\\nIMPORT WordCount;\\nCOUNT(WordCount.File_Book);\\n\\nI get an error \\n\\nError: System error: 11: Graph[1], diskcount[2]: SLAVE 127.0.1.1:6600: JMalloc Heap error 11 ((nil)), JMalloc Heap error 11 ((nil)) - handling file: /var/lib/HPCCSystems/hpcc-data/thor/wordcount/roundup._1_of_1 (0, 0), 11,
\\n\\nAny Ideas This is running community_3.0.2-5 on unbuntu 64bit in a Windows XP host 32 Bit VM with 1024Mb and 40GB disk.\", \"post_time\": \"2011-08-11 14:27:20\" },\n\t{ \"post_id\": 273, \"topic_id\": 91, \"forum_id\": 10, \"post_subject\": \"Re: Commented out code?\", \"username\": \"kovacsbv\", \"post_text\": \"Yes, that's it. I'm still picking up the basics of ECL.\\n\\nThanks,\\n\\nVic\", \"post_time\": \"2011-08-12 15:11:42\" },\n\t{ \"post_id\": 272, \"topic_id\": 91, \"forum_id\": 10, \"post_subject\": \"Re: Commented out code?\", \"username\": \"richardkchapman\", \"post_text\": \"I suspect it's just used position 1,1 as the error location as it can't give you a better estimate than that.\\n\\nis your code missing an EXPORT first_query := XXX; at the end?\", \"post_time\": \"2011-08-12 15:09:21\" },\n\t{ \"post_id\": 271, \"topic_id\": 91, \"forum_id\": 10, \"post_subject\": \"Commented out code?\", \"username\": \"kovacsbv\", \"post_text\": \"I tried to comment out a line of code and it seems the compiler looked at it anyway.\\n\\nIs this a bug or a sign that I didn't do something else I should have?\\n\\nSorry for the compression artifacts in the screen shot,\\nbut I tried to make the file small as possible.\\n\\n[attachment=0:3hc0vao9]00--Error on commented out code.jpg\\n\\nVic\", \"post_time\": \"2011-08-12 15:06:40\" },\n\t{ \"post_id\": 277, \"topic_id\": 93, \"forum_id\": 10, \"post_subject\": \"Re: Casting of all Fields of a Dataset in single text\", \"username\": \"ghalliday\", \"post_text\": \"Try\\n\\nSELF.longLine := TRANSFER(L, STRING114);\", \"post_time\": \"2011-08-16 09:58:38\" },\n\t{ \"post_id\": 276, \"topic_id\": 93, \"forum_id\": 10, \"post_subject\": \"Casting of all Fields of a Dataset in single text\", \"username\": \"sanjaykumar_ln\", \"post_text\": \"rec := RECORD\\nSTRING10 field1;\\nSTRING60 field2;\\nSTRING14 field3;\\nSTRING30 field4;\\nEND;\\nlongRec := RECORD\\nSTRING longLine;\\nEND;\\nds := DATASET ([{'ROW1FLD1','ROW1FLD2','ROW1FLD3','ROW1FLD4'},\\n {'ROW2FLD1','ROW2FLD2','ROW2FLD3','ROW2FLD4'}], rec); \\n\\nlongRec fieldToStringXform (rec L) := TRANSFORM\\n SELF.longLine := L.field1 + L.field2 + L.field3 + L.field4;\\nEND;\\n\\nlongDS := PROJECT (ds, fieldToStringXform (LEFT));\\n\\nAbove code uses field1, field2, field3 and so during Transform.\\n\\nIs there other way to get big text string without referring all fields in dataset (ds) similar to TRANSFER ? TRANSFER works other way in above example.\", \"post_time\": \"2011-08-13 14:28:20\" },\n\t{ \"post_id\": 7484, \"topic_id\": 101, \"forum_id\": 10, \"post_subject\": \"Re: patial match on String fields\", \"username\": \"SandraBuda\", \"post_text\": \"Hi Dan,\\n\\nThank you very much for your reply.\\nFinally, we decided to introduce a C++ function to count the number of matching characters between two Strings, just as you suggested.\\nWe tried to avoid this as an exercise of thought. We were also concerned with performance, but it's good to hear that using C++ is probably more efficient. \\n\\nThanks again for your answer,\\nBest regards,\\nSandra\", \"post_time\": \"2015-04-29 12:27:09\" },\n\t{ \"post_id\": 7444, \"topic_id\": 101, \"forum_id\": 10, \"post_subject\": \"Re: patial match on String fields\", \"username\": \"DSC\", \"post_text\": \"Hi Sandra,\\n\\nWhy would you avoid C++ for this task? This is one of those cases where C++ allows you to write a much more concise and probably more efficient solution. You may be looking for an ECL solution as a thought exercise, I guess, in which case I apologize for this post.\\n\\nHere is a version of your code that uses a simple C++ routine to calculate the length of a shared prefix between two strings:\\n\\nUNSIGNED4 PrefixMatchLength(STRING s1, STRING s2) := BEGINC++\\n unsigned int p = 0;\\n \\n while (p < lenS1 && p < lenS2 && s1[p] == s2[p])\\n ++p;\\n \\n return p;\\nENDC++;\\n\\nrec := {STRING100 line};\\nstrings:=DATASET([{'ch101'},{'ch110'},{'ch12'},{'ch15'},{'ch112'}] ,rec);\\nSTRING Given:='ch112';\\n\\nNewRecord := RECORD\\n strings;\\n INTEGER no:=0;\\nend;\\n\\nNewRecord CalculateMatch(rec l):= TRANSFORM\\n SELF.no:=PrefixMatchLength(l.line, Given); \\n SELF:=L;\\nEND;\\n\\noutput(PROJECT(strings,CalculateMatch(LEFT)));\\n
\\nHope this helps.\\n\\nDan\", \"post_time\": \"2015-04-24 13:14:55\" },\n\t{ \"post_id\": 7398, \"topic_id\": 101, \"forum_id\": 10, \"post_subject\": \"Re: patial match on String fields\", \"username\": \"SandraBuda\", \"post_text\": \"Hi,\\n\\nRelated to this question, I was wondering whether there is a solution for counting the number of matching characters between two strings. \\n\\nFor instance, given two strings str1:='PH123', and str2:='PH145', countMatch(str1,str2) should return 3. I would like to do this using the ECL language and not C++, if possible.\\n\\nI've tried using the MATCHLENGTH, but unsuccesfully. I also did not find any function from the str lib that could aid for this purpose. Below is a sample of my attempts:\\n\\nrec := {STRING100 line};\\nstrings:=DATASET([{'ch101'},{'ch110'},{'ch12'},{'ch15'},{'ch112'}] ,rec);\\nSTRING Given:='ch112';\\nRULE pRule :=PATTERN('ch112'); /* this rule does not accept the string Given, which is also unfortunate */\\n\\nNewRecord := RECORD\\n\\tstrings;\\n\\tINTEGER no:=0;\\nend;\\n\\nNewRecord CalculateMatch(rec l):= TRANSFORM\\n\\tSELF.no:=MATCHLENGTH(pRule); //self.no:=Std.Str.Find(l.line,Given);\\t\\n\\tSELF:=L;\\nEND;\\n\\noutput(PROJECT(strings,CalculateMatch(LEFT)));/* should ideally return each string with its number of matching characters; currently only returns one string (the one that matches)*/\\n\\n
\\n\\nAny help is much appreciated,\\nBest regards,\\nSandra\", \"post_time\": \"2015-04-17 15:04:55\" },\n\t{ \"post_id\": 306, \"topic_id\": 101, \"forum_id\": 10, \"post_subject\": \"Re: patial match on String fields\", \"username\": \"dabayliss\", \"post_text\": \"Hey Hongchao,\\n\\nThe full and complete answer to your question is really a 3 day course - so let me give you a few 'factoids' which will hopefully help:\\n\\n1) The ECL capability equivalent to SELECT is a superset of anything I have ever seen in SQL\\n2) Provided you are doing the 'fuzzy stuff' on a lower order component of your key the you can use our REGEX library (or any other ECL you right) to do any degree of fuzzing you require\\n3) As Richard points out there are many things that people use LIKE for in SQL that have more direct equivalents in ECL that the system can optimize (and which can then be used higher up the key)\\n4) In the pathogenic case of needing to support horrible REGEX in the leading component of a big key (*DF*12*) - then yes you will want to build some special keys - fortunately we have a technology (smart stepping) to make those keys go faster that one would expect\\n\\nDavid\", \"post_time\": \"2011-09-02 00:09:11\" },\n\t{ \"post_id\": 304, \"topic_id\": 101, \"forum_id\": 10, \"post_subject\": \"Re: patial match on String fields\", \"username\": \"richard.taylor@lexisnexis.com\", \"post_text\": \"The short answer is no -- filtering similar to SQL's LIKE does not take any special functions in ECL. You just need to use the "indexing into a string" functionality in most cases. I went to http://www.techonthenet.com/sql/like.php and got a few examples that I translated to ECL like this:\\n\\n//SELECT * FROM suppliers\\n//WHERE supplier_name like 'Hew%';\\n
ds := suppliers(supplier_name[1..3]='Hew');
\\n\\n//SELECT * FROM suppliers\\n//WHERE supplier_name like '%bob%';\\nIMPORT Std;\\nds := suppliers(Std.Str.Contains(supplier_name,'bob');
\\n\\n//SELECT * FROM suppliers\\n//WHERE supplier_name not like 'T%';\\nds := suppliers(supplier_name[1]<>'T');
\\n\\n//SELECT * FROM suppliers\\n//WHERE supplier_name like 'Sm_th';\\nds := suppliers(supplier_name[1..2]='Sm',\\n supplier_name[4..5]='th',\\n LENGTH(TRIM(supplier_name))=5);
\\n\\n//SELECT * FROM suppliers\\n//WHERE account_number like '12317_';\\nds := suppliers(account_number[1..5]='12317',LENGTH(TRIM(account_number))=6);
\", \"post_time\": \"2011-09-01 23:22:02\" },\n\t{ \"post_id\": 297, \"topic_id\": 101, \"forum_id\": 10, \"post_subject\": \"patial match on String fields\", \"username\": \"hli\", \"post_text\": \"Hi,\\n\\nDo we need build special index for string fields to support partial match (or substring match) function? In SQL, we can use 'LIKE' to perform this kind of filter. How can we realize this function in HPCC? \\n\\nAny help is appreciated. \\n\\n\\nThanks,\\n\\n-Hongchao\", \"post_time\": \"2011-09-01 14:35:57\" },\n\t{ \"post_id\": 307, \"topic_id\": 102, \"forum_id\": 10, \"post_subject\": \"Re: System Error(1451) when load a CSV file\", \"username\": \"richardkchapman\", \"post_text\": \"If there are no quote characters in your file at all then it shouldn't matter whether you have specified a QUOTE setting for the csv read activity or not.\\n\\nBut if there is a single ' with no closing ' on any line in the file it can cause chaos, as the CSV 'spec' - such as it exists - allows such quoted strings to span multiple lines, and thus the record can get arbitrarily large. It's possible that is what is happening here, though I wouldn't have expected you to get an out-of-memory exception as we do place an upper limit on how large a single CSV 'line' can be precisely to avoid that scenario.\\n\\nIf you don't need your string fields to potentially include embedded separator characters, and there are no quote characters in the input file that you want to have stripped, then it is safest to give an empty QUOTE set.\", \"post_time\": \"2011-09-02 08:11:30\" },\n\t{ \"post_id\": 305, \"topic_id\": 102, \"forum_id\": 10, \"post_subject\": \"Re: System Error(1451) when load a CSV file\", \"username\": \"richard.taylor@lexisnexis.com\", \"post_text\": \"Could we see your RECORD structure and DATASET declaration for the file along with an example data record?\", \"post_time\": \"2011-09-01 23:25:14\" },\n\t{ \"post_id\": 301, \"topic_id\": 102, \"forum_id\": 10, \"post_subject\": \"Re: System Error(1451) when load a CSV file\", \"username\": \"hli\", \"post_text\": \"I used the default setting: \\nseparator: \\\\,\\nline terminator: \\\\n, \\\\n\\\\r\\nQuote: '\\n\\nBut, my file DOES NOT have any quote at all. Should I add Quote to include string values?\\n\\nThanks,\", \"post_time\": \"2011-09-01 21:04:23\" },\n\t{ \"post_id\": 300, \"topic_id\": 102, \"forum_id\": 10, \"post_subject\": \"Re: System Error(1451) when load a CSV file\", \"username\": \"Tony Kirk\", \"post_text\": \"My guess is there's a mismatch between the CSV options (terminator, separator, quote) and the actual file. Any particulars you can provide about the file and your DATASET definiton (especially the CSV options provided) may help.\\n\\nDo keep in mind that the omission of a CSV option such as QUOTE does not instruct ECL that there is no quote, it tells it to use the default. I have seen this cause what you are seeing, as the system is looking for the closing apostrophe/quote before ending the field.\", \"post_time\": \"2011-09-01 20:58:25\" },\n\t{ \"post_id\": 299, \"topic_id\": 102, \"forum_id\": 10, \"post_subject\": \"System Error(1451) when load a CSV file\", \"username\": \"hli\", \"post_text\": \"hi,\\n\\nWhen I tried to OUTPUT the content of a CSV file, I got a system error:\\nError: System error: 1451: Memory pool exhausted (in Disk Read G1 E2) (0, 0), 1451, \\n\\nThe CSV file is only about 12Mbytes and my thor cluster has two nodes each having 4G memory. So, the memory should not be a problem. \\n\\nOne potential problem is that this file has 22 fields, 2 of which are float, 7 are strings and the rest 13 are integers. I do not know the length or range of each field, so I directly use REAL, STRING and INTEGER respectively in the record definition.\\n\\nThen, I load the file into a DATASET and then output the dataset. \\n\\nAnd, when I tried to Count the record number, I got:\\n\\nError: System error: 11: Graph[1], diskcount[2]: SLAVE 172.27.10.130:6600: JMalloc Heap error 11 ((nil)), JMalloc Heap error 11 ((nil)) - handling file: /var/lib/HPCCSystems/hpcc-data/thor/ipovw/20110825/000part._2_of_2\\n\\nAny help is appreciated. \\n\\n-Hongchao\", \"post_time\": \"2011-09-01 18:47:23\" },\n\t{ \"post_id\": 450, \"topic_id\": 111, \"forum_id\": 10, \"post_subject\": \"Re: BACON Pig to ECL converter\", \"username\": \"HPCC Staff\", \"post_text\": \"The beta version is still in development. We will have a better idea on ETA within the coming weeks and can contact you directly. Thank you for your interest!\", \"post_time\": \"2011-10-13 12:08:21\" },\n\t{ \"post_id\": 441, \"topic_id\": 111, \"forum_id\": 10, \"post_subject\": \"Re: BACON Pig to ECL converter\", \"username\": \"mayurchoubey\", \"post_text\": \"Hey\\n\\nI want to have this urgently. Can you suggest me a tentative date for the beta release of this.\\n\\nThanks.\", \"post_time\": \"2011-10-12 11:18:05\" },\n\t{ \"post_id\": 339, \"topic_id\": 111, \"forum_id\": 10, \"post_subject\": \"Re: BACON Pig to ECL converter\", \"username\": \"dabayliss\", \"post_text\": \"Hey,\\n\\nWe are verifying compatibility with the latest OSS release and finalizing the docs. We will reach out to you as soon as they are ready.\\nThank you for you interest\\n\\nDavid\", \"post_time\": \"2011-09-14 12:33:18\" },\n\t{ \"post_id\": 338, \"topic_id\": 111, \"forum_id\": 10, \"post_subject\": \"Re: BACON Pig to ECL converter\", \"username\": \"mayurchoubey\", \"post_text\": \"Any update on this? \\n\\nThanks,\\nMayur\", \"post_time\": \"2011-09-14 10:25:15\" },\n\t{ \"post_id\": 336, \"topic_id\": 111, \"forum_id\": 10, \"post_subject\": \"Re: BACON Pig to ECL converter\", \"username\": \"mayurchoubey\", \"post_text\": \"Surely we can help you in making it production ready. Let me know further details.\\n\\nThanks.\", \"post_time\": \"2011-09-13 13:35:42\" },\n\t{ \"post_id\": 335, \"topic_id\": 111, \"forum_id\": 10, \"post_subject\": \"Re: BACON Pig to ECL converter\", \"username\": \"dabayliss\", \"post_text\": \"Hey There,\\n\\nBACON is not a 'production ready' part of our distribution presently. However, we are actively looking for beta testers; so if you are interested in participating please let us know.\\n\\nDavid\", \"post_time\": \"2011-09-13 13:28:56\" },\n\t{ \"post_id\": 331, \"topic_id\": 111, \"forum_id\": 10, \"post_subject\": \"BACON Pig to ECL converter\", \"username\": \"mayurchoubey\", \"post_text\": \"Hi,\\n\\nI am new to HPCC. I need to convert my pig scripts to ECL. Currently using VM image and not be able to find or locate BACON command line tool.\\n\\nPlease suggest how this can be achieved.\\n\\nThanks in advance.\", \"post_time\": \"2011-09-13 07:48:49\" },\n\t{ \"post_id\": 342, \"topic_id\": 113, \"forum_id\": 10, \"post_subject\": \"Re: ECL implementation of compound boolean expression evalua\", \"username\": \"ghalliday\", \"post_text\": \"There are a several different ways of approaching this.\\n\\nThe simplest way to implement a stack would be to use a child dataset. In this case a child dataset for the operator stack, and one for the values.\\n\\nYou can use PARSE (or something simpler) to split the string into a dataset of tokens. You should be able to use either ROLLUP or AGGREGATE to process each token in turn, and generate a new output record with modified stacks.\\n\\nThe other alternative is to use the productions in the PARSE functionality. \\nIf you have access to the github sources, see ecl\\\\regress\\\\tpatcalc.ecl in the eclcc regression suite which contains a sample calulator. Unfortunately that varient of PARSE doesn't yet support unicode.\", \"post_time\": \"2011-09-15 08:04:15\" },\n\t{ \"post_id\": 340, \"topic_id\": 113, \"forum_id\": 10, \"post_subject\": \"ECL implementation of compound boolean expression evaluator\", \"username\": \"michael.krumlauf@lexisnexis.com\", \"post_text\": \"I am looking at the possibility of implementing a compound boolean expression evaluator in ECL, one that takes a postfix expression such as "A B C AND OR D NOT AND", and then return the boolean evaluation of the expression. 4 questions:\\n\\n1. Does anyone have an ECL implementation of a stack?\\n\\n2. Has anyone implemented this already?\\n\\n3. Is there anything regarding this subject that I'm not taking into consideration but need to do so?\\n\\n4. Am I "out to lunch" on this one?\\n\\nThanks,\\nMike\", \"post_time\": \"2011-09-14 18:59:05\" },\n\t{ \"post_id\": 354, \"topic_id\": 115, \"forum_id\": 10, \"post_subject\": \"Re: A bug on Std.Str.Contains() ?\", \"username\": \"ghalliday\", \"post_text\": \"I think the problem is with the name of the function. I would tend to expect that a function called contains would see if one string is a subset of the other.\\n\\nI am tempted to rename the function to ContainsAllCharacters() to make it much clearer. Would that help?\\n\\nThe most direct equivalent for LIKE is to use Std.Str.Find or REGEXFIND or substrings. Here are some examples, with some ECL equivalents:\\n\\nA) SELECT * FROM people WHERE title LIKE 'Mr%\\n 1) people(title[1..2]='Mr');\\n 2) people(Std.Str.Find(title, 'Mr') = 1);\\n 3) people(REGEXFIND('^Mr', title) != 0);\\n\\nB) SELECT * FROM people WHERE title LIKE '%Mr%\\n 1) people(Std.Str.Find(title, 'Mr') != 0);\\n 2) people(REGEXFIND('Mr', title) != 0);\\n 3) people(REGEXFIND('^.*Mr.*$', title) != 0);\\n\\nC) SELECT * FROM people WHERE extra LIKE 'G__h%PO%x[abc]'\\n 1) people(REGEXFIND('^G..h.*PO.*x[abc]$', extra) != 0)\\n\\n(Apologies if the syntax isn't quite right. I didn't verify either the SQL or the ECL.)\", \"post_time\": \"2011-09-21 11:23:52\" },\n\t{ \"post_id\": 351, \"topic_id\": 115, \"forum_id\": 10, \"post_subject\": \"Re: A bug on Std.Str.Contains() ?\", \"username\": \"bforeman\", \"post_text\": \"For a partial match, use string indexing, like this:\\n\\nzip[1...5] = '33024'\\n\\nSee the Language Reference for more info (search for string indexing)\", \"post_time\": \"2011-09-18 14:17:25\" },\n\t{ \"post_id\": 348, \"topic_id\": 115, \"forum_id\": 10, \"post_subject\": \"Re: A bug on Std.Str.Contains() ?\", \"username\": \"hli\", \"post_text\": \"Why it returns all the combination of the letters? Why not the first string contains the second one as substring? \\n\\nIf I want to do a partial match like SQL syntax : LIKE "%substring%", what should I do?\", \"post_time\": \"2011-09-16 13:12:58\" },\n\t{ \"post_id\": 347, \"topic_id\": 115, \"forum_id\": 10, \"post_subject\": \"Re: A bug on Std.Str.Contains() ?\", \"username\": \"bforeman\", \"post_text\": \"This code works as expected on my machine:\\n\\nIMPORT STD;\\n\\nA := std.str.Contains('33024','33024', true);\\nB:= std.str.Contains('33024','33025', true);\\n\\nOUTPUT(A); //true - a match\\nOUTPUT(B); //false - did not match\\n\\nand a filter test also worked as expected. I got ALL combinations of 30024.\", \"post_time\": \"2011-09-15 22:59:11\" },\n\t{ \"post_id\": 346, \"topic_id\": 115, \"forum_id\": 10, \"post_subject\": \"Re: A bug on Std.Str.Contains() ?\", \"username\": \"bforeman\", \"post_text\": \"Have you tried just a simple filter, like (ZIP = '30024')? Could it be that the string needs to be TRIMmed first?\\n\\nWe need to write a simple test, but we are unaware of a problem with Contains. \\nRemember, it is looking for any match, so it may return 03024, 02430, etc.\", \"post_time\": \"2011-09-15 21:09:25\" },\n\t{ \"post_id\": 345, \"topic_id\": 115, \"forum_id\": 10, \"post_subject\": \"A bug on Std.Str.Contains() ?\", \"username\": \"hli\", \"post_text\": \"Hi,\\n\\nI have tried multiple examples with Std.Str.Contains() on indexed field and found all them failed. \\n\\nFor example, in the tutorial 'People' example, after I built the index on zip, I query on zip with: \\n\\nFETCH(TutorialHL.File_TutorialPerson, TutorialHL.IDX_PeopleByZip(Std.Str.contains(zip, '30024', true)), RIGHT.fpos);\\nOUTPUT(resultSet);\\n\\nit seems to return every record in the table.\\n\\nIs it a potential bug? I am the centos5 version.\\n\\nthanks,\\n\\n-Hongchao\", \"post_time\": \"2011-09-15 20:29:33\" },\n\t{ \"post_id\": 353, \"topic_id\": 116, \"forum_id\": 10, \"post_subject\": \"Re: when to use NOTHOR\", \"username\": \"ghalliday\", \"post_text\": \"NOTHOR is ugly and the code generator should be intelligent enough to not need it... until I (or someone else) fixes it the approximate rules are as follows:\\n\\nNOTHOR needs to be used around operations that use the superfile transactions, and a few others that need to be executed in a global context.\\n\\nHowever you can only do very simple dataset operations within a NOTHOR - filter, project, and a couple of others - since they are implemented inline. If you have some complex data processing that needs to be done and fed into something that uses superfile transactions often the easiest solution is to use GLOBAL() around the dataset.\\n\\nAPPLY(GLOBAL(some-complex-dataset),...);\\n\\nIf that doesn't work try using\\n\\nmyComplexDataset := .... : independent;\\nAPPLY(myComplexDataset,...);\\n\\nwhich is similar, but splits the dataset code into a separate workflow item.\\n\\nMy only concern is you might hit problems with SEQUENTIAL and ordering - in which case you might then need to remove the SEQUENTIAL.\\n\\nPlease come back if you hit any problems.\", \"post_time\": \"2011-09-20 08:18:22\" },\n\t{ \"post_id\": 352, \"topic_id\": 116, \"forum_id\": 10, \"post_subject\": \"when to use NOTHOR\", \"username\": \"aintnomyth\", \"post_text\": \"Hello,\\nI'm bouncing between two different exceptions on a data ingestion process. The algorithm looks like this:\\n1. Scan a directory\\n2. For each file (using Apply)\\n (SEQUENTIAL here)\\n 2.A. Spray the file\\n 2.B. Move the file to a "sprayed" directory\\n 2.C. (SEQUENTIAL here too) Update the relevant superfile to include the newly sprayed logical file.\\n\\nThis causes this exception:\\nError: Cannot call function startsuperfiletransaction in a non-global context (92, 4 - ...)
\\n\\nSo I modified the SEQUENTIAL... to NOTHOR(SEQUENTIAL...) but that causes this exception:\\n\\n INTERNAL: Expected a parent/container context. Likely to be caused by executing something invalid inside a NOTHOR.
\\n\\nI'm pretty sure I have no idea when to use NOTHOR and that is probably causing my issue, any insight would be appreciated!\\n\\nThanks\", \"post_time\": \"2011-09-19 21:18:05\" },\n\t{ \"post_id\": 640, \"topic_id\": 120, \"forum_id\": 10, \"post_subject\": \"Re: current date/time\", \"username\": \"flavio\", \"post_text\": \"Sasi,\\n\\nThe ECL STD library should be part of the 3.4 release candidate which can be downloaded from: http://hpccsystems.com/download/free-community-edition/all/beta.\\n\\nThe final release for 3.4 should be very close (perhaps tomorrow), so you may want to wait for the final release instead...\\n\\nFlavio\", \"post_time\": \"2011-11-28 18:33:56\" },\n\t{ \"post_id\": 638, \"topic_id\": 120, \"forum_id\": 10, \"post_subject\": \"Re: current date/time\", \"username\": \"sasi\", \"post_text\": \"ECL version 6.0.1.5.682.1 does not have date module in STD library. Is it in the later version? \", \"post_time\": \"2011-11-28 05:32:33\" },\n\t{ \"post_id\": 637, \"topic_id\": 120, \"forum_id\": 10, \"post_subject\": \"Re: current date/time\", \"username\": \"sasi\", \"post_text\": \"How to know about ECL API? For example std api.\\n\\nIs there any document like Javadoc for java packages?\\n\\nThanks\\nSasi.\", \"post_time\": \"2011-11-27 23:27:22\" },\n\t{ \"post_id\": 583, \"topic_id\": 120, \"forum_id\": 10, \"post_subject\": \"Re: current date/time\", \"username\": \"dabayliss\", \"post_text\": \"There is a date module being added to the Std library rsn.\", \"post_time\": \"2011-11-05 16:47:39\" },\n\t{ \"post_id\": 581, \"topic_id\": 120, \"forum_id\": 10, \"post_subject\": \"Re: current date/time\", \"username\": \"ECLer2011\", \"post_text\": \"It'll be convinient if ECL makes current DATE & TIME as internal functions, if not yet.\", \"post_time\": \"2011-11-05 00:29:53\" },\n\t{ \"post_id\": 361, \"topic_id\": 120, \"forum_id\": 10, \"post_subject\": \"Re: current date/time\", \"username\": \"bforeman\", \"post_text\": \"Here's the function for gettime. I imagine that there is an equivalent for getdate a as well.\\n\\nRegards,\\n\\nBob Foreman\\n\\n// Function to get time in HHMMSS format\\n// Courtesy : Nigel/Gavin\\nEXPORT GetTime() := FUNCTION\\n//function to get time\\nstring6 getTime() := BEGINC++\\n// Declarations\\nstruct tm localt; // localtime in "tm" structure\\ntime_t timeinsecs; // variable to store time in secs\\n\\n// Get time in sec since Epoch\\ntime(&timeinsecs); \\n// Convert to local time\\nlocaltime_r(&timeinsecs,&localt);\\n// Format the local time value\\nstrftime(__result, 8, "%H%M%S", &localt); // Formats the localtime to HHMMSS\\n\\nENDC++;\\n\\nreturn getTime();\\nEND;\", \"post_time\": \"2011-09-26 18:14:30\" },\n\t{ \"post_id\": 358, \"topic_id\": 120, \"forum_id\": 10, \"post_subject\": \"current date/time\", \"username\": \"aintnomyth\", \"post_text\": \"Is there a function for retrieving the current system date & time?\", \"post_time\": \"2011-09-26 13:39:20\" },\n\t{ \"post_id\": 401, \"topic_id\": 127, \"forum_id\": 10, \"post_subject\": \"Re: Roxie Query\", \"username\": \"bforeman\", \"post_text\": \"I think in this case where the data types are different you would just need to handle 3 input parameters instaed of just 2, and then modify your condition to call the DS1 query or the DS2 query based on the input.\", \"post_time\": \"2011-10-05 12:41:46\" },\n\t{ \"post_id\": 400, \"topic_id\": 127, \"forum_id\": 10, \"post_subject\": \"Re: Roxie Query\", \"username\": \"sameermsc\", \"post_text\": \"Thanks for the reply Bob
\\n\\nIn the above situation, based on the inputs, i am trying to fetch data from the same dataset\\n\\nbased on the reply (as suggested), I have tried another case where the datasets are different (in number of fields and the datatypes (EX: Col2 is integer1 (ID) in DS1 and integer2 (EMPID) in DS2), say \\n\\nDS1 := \\nCol1 Col2 Col3 \\nFN1 N1 DATA1\\nFN1 N2 DATA2\\nFN2 N3 DATA3\\n\\nDS2 := \\nCol1 Col2 Col4 Col5\\nFN1 N1 ADDRESS1 EMAIL1\\nFN1 N2 ADDRESS2 EMAIL2\\nFN2 N3 ADDRESS3 EMAIL3\\n\\nDS1 is used for 1st question (ref above post for question)\\nDS2 is used for 2nd question (ref above post for question)\\n\\nHere i get an error saying:\\n1) Type mismatch for corresponding fields id(integer) vs empid (integer) - 2012\\n2) Name mismatch for corresponding fields id vs empid - 2012\\n\\nIs it possible to have a single roxie query for this case?\", \"post_time\": \"2011-10-05 12:28:39\" },\n\t{ \"post_id\": 399, \"topic_id\": 127, \"forum_id\": 10, \"post_subject\": \"Re: Roxie Query\", \"username\": \"ghalliday\", \"post_text\": \"If you want the result to contain different numbers of columns use a conditional action:\\n\\n
\\n Reuslt := IF(Col2_value = '',\\n output($.Fetch_Col123(Col1_value)),\\n output($.Fetch_Col1245(Col1_value,Col2_value)));\\n RETURN Result;\\nEND;\\n
\\n\\nThe query will have two potential results, but only one will be output.\", \"post_time\": \"2011-10-05 12:21:43\" },\n\t{ \"post_id\": 397, \"topic_id\": 127, \"forum_id\": 10, \"post_subject\": \"Re: Roxie Query\", \"username\": \"bforeman\", \"post_text\": \"Hi sameermsc,\\n\\nThe trick is to wrap a FUNCTION around two other FUNCTIONs, and then conditionally call one or the other, based on the input parameters. Something like this:\\n\\nIMPORT $;\\nEXPORT YourSearchService() := FUNCTION\\nSTRING30 Col1_value := '' : STORED('Col1');\\nSTRING30 Col2_value := '' : STORED('Col2');\\nFetched := IF(Col2_value = '',\\n $.Fetch_Col123(Col1_value),\\n $.Fetch_Col1245(Col1_value,Col2_value));\\nRETURN OUTPUT(Fetched);\\nEND;\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2011-10-05 11:51:42\" },\n\t{ \"post_id\": 396, \"topic_id\": 127, \"forum_id\": 10, \"post_subject\": \"Roxie Query\", \"username\": \"sameermsc\", \"post_text\": \"I have a sample dataset with the following structure\\n\\nCol1\\tCol2\\tCol3\\tCol4\\t Col5\\nFN1\\tLN1\\tDATA1\\tADDRESS1\\tEMAIL1\\nFN1\\tLN2\\tDATA2\\tADDRESS2\\tEMAIL2\\nFN2\\tLN3\\tDATA3\\tADDRESS3\\tEMAIL3\\n\\nI have the below set of queries which return diferent outputs(columns)\\n1) If the input is a value from Col1 the query should return data of 3 columns\\tie., Col1, Col2, Col3\\n2) If the input is a value from both Col1 and Col2 the query should return data of 4 columns\\tie., Col1, Col2, Col4, Col5\\n \\nHow do i create a single roxie query for the above requirement\\n\\nAny help is appreciated\", \"post_time\": \"2011-10-05 10:12:14\" },\n\t{ \"post_id\": 487, \"topic_id\": 130, \"forum_id\": 10, \"post_subject\": \"Re: Exception in Roxie Query\", \"username\": \"richardkchapman\", \"post_text\": \"Roxie keeps the files it uses locked while a query that uses them is loaded - this will block other queries that are trying to overwriting the file and probably explains why "The state of the work unit will not change from running to completed till the Roxie service is deleted". \\n\\nWe normally avoid ever overwriting files (because they may be in use by Roxie) but rather create a new file and then update a superfile so that the next time the query is deployed it uses the new file.\", \"post_time\": \"2011-10-19 17:49:49\" },\n\t{ \"post_id\": 486, \"topic_id\": 130, \"forum_id\": 10, \"post_subject\": \"Re: Exception in Roxie Query\", \"username\": \"sameermsc\", \"post_text\": \"Hi Richard,\\n\\nThanks for the reply\\n\\nI have the following points to add\\n\\nOn a new VM Image I have not observed any issues with respect to Roxie\\n\\nEvery time i change the code (like sorting on different fields, changing sorting order etc.,) and regenerate the dataset and index files with overwrite option, I have observed inconsistent outputs and exception messages on Roxie\\n\\nBelow are my observations \\n1)\\t while accessing data from Roxie there is a possibility that the Indexes (If any used) might fetch some older data present on the disk (not the recently written/overwritten file)\\n2)\\tAfter deleting a Roxie services and publish a new one by compiling again, one might see strange Exceptions or even no exceptions – No consistency\\n3)\\tSystem hangs when I try to overwrite a file used by a Roxie query in the recent past\\n4)\\tThe state of the work unit will not change from running to completed till the Roxie service is deleted \\n\\nwill post any other observations made \", \"post_time\": \"2011-10-19 17:39:39\" },\n\t{ \"post_id\": 459, \"topic_id\": 130, \"forum_id\": 10, \"post_subject\": \"Re: Exception in Roxie Query\", \"username\": \"richardkchapman\", \"post_text\": \"The assert error suggests that Roxie is trying to process some invalid XML when reading the query info, which is not a good thing. Any change you could send me an archive of the query and a copy of the roxie log from when you tried to do the publish to Roxie.\\n\\nrchapman@hpccsystems.com\", \"post_time\": \"2011-10-14 12:16:26\" },\n\t{ \"post_id\": 437, \"topic_id\": 130, \"forum_id\": 10, \"post_subject\": \"Re: Exception in Roxie Query\", \"username\": \"ghalliday\", \"post_text\": \"My guess is it is something to do with the index definition. For example if you had\\n\\nindex({ col1, col2, col3, col4 }, 'indexname'); \\n\\nyou would get the error you describe - because in this form of the index statement the last numeric field is assumed to be a file pos field, and isn't keyed. Try adding an explicit empty payload:\\n\\nindex({ col1, col2, col3, col4 }, {}, 'indexname'); \\n\\nThese semantics are rather strange, and have roots in index formats from many many years ago. Ideally we would change it - but it would cause various compatibility issues, so we havent bitten the bullet - yet.\", \"post_time\": \"2011-10-11 16:02:57\" },\n\t{ \"post_id\": 434, \"topic_id\": 130, \"forum_id\": 10, \"post_subject\": \"Re: Exception in Roxie Query\", \"username\": \"bforeman\", \"post_text\": \"It's difficult to determine what is the cause of your error without looking at the code, but the mismatch of 4 bytes suggests to me that perhaps Col4 is not defined correctly, and perhaps needs to be an 8-byte integer. Can you attach a sample of the code that you are using?\", \"post_time\": \"2011-10-11 12:50:00\" },\n\t{ \"post_id\": 433, \"topic_id\": 130, \"forum_id\": 10, \"post_subject\": \"New Exception in Roxie Query\", \"username\": \"sameermsc\", \"post_text\": \"I have regenerated the indexes and redeployed the query on Roxie\\nIt runs perfectly on Thor, but not on Roxie\\n\\nThis time i got a different Exception\\n\\nException \\nReported by: Roxie\\nMessage: Query Sampledetails_2.1 is suspended because assert(s) failed - file: /var/jenkins/workspace/Release-3.2.0/src/system/jlib/jptree.cpp, line 1302 \\n\\n\\nDoes this have something to do with the output data (output has around 70 columns and number of records vary (may be few hundreds) based on the inputs)\", \"post_time\": \"2011-10-11 08:22:20\" },\n\t{ \"post_id\": 430, \"topic_id\": 130, \"forum_id\": 10, \"post_subject\": \"Exception in Roxie Query\", \"username\": \"sameermsc\", \"post_text\": \"I have created an index with the below columns (Record structure of the index is given below), when browsed using ECL Watch, i see two files (parts) corresponding to the Index\\n\\nRECORD\\n string10 Col1;\\n string10 Col2;\\n string4 Col3;\\n integer4 Col4;\\n unsigned8 fpos;\\n END;\\n\\n\\nI have create a query to fetch data using index, it ran perfectly on Thor and displayed the respective output, after deploying the same on Roxie, i got the below exception\\n\\nException \\nReported by: Roxie\\nMessage: Key size mismatch on key /var/lib/HPCCSystems/hpcc-data/thor/thor/in/sample/sampleindex._2_of_2/2328461874 - size was 28, expected 24 (in Index Read 2) \\n\\nI am unable to figure out the problem?\", \"post_time\": \"2011-10-10 15:25:07\" },\n\t{ \"post_id\": 457, \"topic_id\": 132, \"forum_id\": 10, \"post_subject\": \"Re: short circuiting conditional statements\", \"username\": \"dabayliss\", \"post_text\": \"The N version should work - (it was actually the test-code for the thor/loop implementation)\\n\\nThe version that is conditioned upon a degree of convergence is not yet supported (but they are working on it - we need it for ML too)\\n\\nDavid\", \"post_time\": \"2011-10-13 22:53:08\" },\n\t{ \"post_id\": 456, \"topic_id\": 132, \"forum_id\": 10, \"post_subject\": \"Re: short circuiting conditional statements\", \"username\": \"dean\", \"post_text\": \"David, Richard,\\nThanks, this helps a lot. I was planning to tweak the SALT MOD file using java to insert the iteration parameter 'i' into the match attribute. I guess that wasn't as haired-brained as I thought it was since its more or less what the -p0 switch appears to do. \\n\\nIf I understand correctly, micro-mode linking will now work on thor as well as hthor and roxie because the LOOP command is now supported on thor?\", \"post_time\": \"2011-10-13 22:17:23\" },\n\t{ \"post_id\": 446, \"topic_id\": 132, \"forum_id\": 10, \"post_subject\": \"Re: short circuiting conditional statements\", \"username\": \"dabayliss\", \"post_text\": \"Dean,\\n\\nUse -p0 on the SALT command line; then proc_iterate gains a number of 'LoopN' functions to allow you to perform multiple SALT iterations. Look in the manual under 'micro-mode'.\\n\\nIn our slightly older internal systems Loop was not supported in Thor; under OSS it is. Once the SALT/OSS port is complete I will be able to unhook the LoopN logic from micro-mode.\\n\\nDavid\", \"post_time\": \"2011-10-12 15:18:40\" },\n\t{ \"post_id\": 443, \"topic_id\": 132, \"forum_id\": 10, \"post_subject\": \"Re: short circuiting conditional statements\", \"username\": \"richard.taylor@lexisnexis.com\", \"post_text\": \"You must be looking at an older version of the Language Reference (download the latest here: http://hpccsystems.com/community/docs/all ). LOOP and GRAPH now both operate on Thor in the Open Source releases.\", \"post_time\": \"2011-10-12 14:55:08\" },\n\t{ \"post_id\": 440, \"topic_id\": 132, \"forum_id\": 10, \"post_subject\": \"Re: short circuiting conditional statements\", \"username\": \"dean\", \"post_text\": \"Thanks for your response Richard. LOOP and GRAPH are pretty close, but there are two issues. 1) they only run on hthor and roxie, and 2) I can't define the loopcondition without knowing the iteration number.\\n\\nSince you asked for more explicit info ...\\n\\nI'm try to automate salt internal linking iterations on a dataset using ECL code. I've written two functions based on the salt generated matches attribute:\\n\\nThis returns the number of matches performed in iteration i:\\n
export Integer mp(dataset(test.Layout_Match) idata,String2 i) := function\\n\\treturn test.matches(idata,i).MatchesPerformed;\\nend;
\\n\\nThis returns the dataset after iteration i\\n\\nexport dataset(test.Layout_Match) it(dataset(test.Layout_Match) idata,String2 i) := function\\n\\treturn test.matches(idata,i).patched_infile;\\nend;\\n
\\n\\nThen I write iteration functions\\n\\n\\nit1:=it(test.In_Match,'1');\\nit2:=it(it1,'2');\\nit3:=it(it2,'3');\\n
\\n\\nThen I have a function that does iterations until the condition is met:\\n\\n\\nexport dataset(test.Layout_Match) match() := function\\n\\treturn if(mp(test.In_Match,'1')<2,f1, \\n\\t\\tif(mp(f1,'2')<2,f2,\\n\\t\\tif(mp(f2,'3')<2,f3);\\n
\\n\\nThe match() function actually returns the correct dataset, but the problem is that all of my iteration functions (f1,f2,f3) are invoked, even when I don't need them. \\n\\nI've got a hunch there is a really simple solution to this and I'm just being dense, but I've been stuck on this for a couple of days.\\n\\nAgain, thanks\", \"post_time\": \"2011-10-11 22:57:22\" },\n\t{ \"post_id\": 439, \"topic_id\": 132, \"forum_id\": 10, \"post_subject\": \"Re: short circuiting conditional statements\", \"username\": \"richard.taylor@lexisnexis.com\", \"post_text\": \"I think you need to take a look at the LOOP and GRAPH functions. They will probably provide what you need. If these don't help, please describe what you're trying to accomplish in more explicit terms.\", \"post_time\": \"2011-10-11 20:49:13\" },\n\t{ \"post_id\": 438, \"topic_id\": 132, \"forum_id\": 10, \"post_subject\": \"short circuiting conditional statements\", \"username\": \"dean\", \"post_text\": \"I'm doing a transformation that requires multiple iterations. The output of iteration i is the input to iteration i+1. After each iteration, I would like to test to see if a condition is met and break the iteration loop. I don't think ECL supports recursion (right?), so that's out. \\n\\nI cobbled together some code that more or less models what I've done. Each iteration is defined in a function (which obviously limits the possible number of iterations). The return value of function i+1 is dependent on function i, etc. Then I have an if statement that invokes function i and test to see if a condition is met. \\n\\nThe problem is that all three iter functions are invoked even though the first one meets the condition in the if statement. Perhaps there's a better way to do the iterations? Is there a way to short circuit the conditional statement, so I don't invoke every iteration function? \\n\\nthankyouthankyouthankyou\\n\\niter1:=function\\n\\toutput('iter1');\\n\\treturn 1;\\nend;\\niter2:=function\\n\\toutput('iter2');\\n\\treturn iter1+1;\\nend;\\niter3:=function\\n\\toutput('iter3');\\n\\treturn iter2+1;\\nend;\\noutput(if(iter1<2,1,\\n if(iter2<2,2,iter3)));
\", \"post_time\": \"2011-10-11 17:49:01\" },\n\t{ \"post_id\": 608, \"topic_id\": 133, \"forum_id\": 10, \"post_subject\": \"Re: Runtime error using KJV example from Web.\", \"username\": \"Allan\", \"post_text\": \"This is what I have, which works fine:\\n\\n\\nIMPORT * from KJV;\\nIMPORT * from Std.Str;\\n
\", \"post_time\": \"2011-11-15 20:40:12\" },\n\t{ \"post_id\": 607, \"topic_id\": 133, \"forum_id\": 10, \"post_subject\": \"Re: Runtime error using KJV example from Web.\", \"username\": \"ghalliday\", \"post_text\": \"Try \\nimport kjv;\\nimport * from Std.Str;\\n\\nYou probably just want to say \\n\\nimport kjv;\\nimport Std.Str;\\n\\n"* from" means that all of the definitions within Std.Str are added to the current scope. It is normally preferable to prefix them with Str, e.g., Str.toUpperCase\", \"post_time\": \"2011-11-15 20:11:23\" },\n\t{ \"post_id\": 604, \"topic_id\": 133, \"forum_id\": 10, \"post_subject\": \"Re: Runtime error using KJV example from Web.\", \"username\": \"moewin\", \"post_text\": \"Thank you Allan.Now I can run ESP using <ip>:8002.In part III I put searchservice,UT and MAC_BuildCase . I got error in searchservice .[code][/code]Error: IMPORT <module> AS * is deprecated, use IMPORT * FROM <module> (1, 11), 2304, \\nError: Import names unknown module "kjv" (1, 27), 2081, \\nError: Unknown identifier "KJV" (8, 6), 2167, \\nError: Unknown identifier "KJV" (22, 8), 2167, \\nError: syntax error near "." : expected ')' (25, 32), 3002, \\nError: Record must not be zero length (24, 6), 2318, \\nError: syntax error near "res" : expected RANGE, ROWSET, SELF, SUCCESS, datarow, dataset, module-name, identifier, identifier, function-name, identifier, macro-name, '^', '(', '[' (31, 7), 3002,
\\n\\nimport kjv,std.Str as *;\\nSTRING ToSearch := '' : STORED('SearchText');\\ns := KJV.Inversion.Search(ToSearch);\\n
\\nI was copy and paste from web site .\\n\\nYours ,\\nMoe Win\", \"post_time\": \"2011-11-15 17:02:43\" },\n\t{ \"post_id\": 602, \"topic_id\": 133, \"forum_id\": 10, \"post_subject\": \"Re: Runtime error using KJV example from Web.\", \"username\": \"Allan\", \"post_text\": \"Hi moe,\\n\\nI had no trouble with Bible III.\\n\\nYou compile for Roxie (or hthor), Publish it from the ECL Watch pane in the IDE then\\nbring up your <ip>:8002 page from your browser. I think LN call it the 'WSecl' screen.\\nThen drill down to your query from the 'query sets' tree displayed on the right.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-11-15 14:47:54\" },\n\t{ \"post_id\": 601, \"topic_id\": 133, \"forum_id\": 10, \"post_subject\": \"Re: Runtime error using KJV example from Web.\", \"username\": \"moewin\", \"post_text\": \"Hello Allan ,\\n\\n Did you try bible search part III ? I confused about how to call form HTML page .. \\n\\nThank you,\\nMoe\", \"post_time\": \"2011-11-14 21:32:09\" },\n\t{ \"post_id\": 472, \"topic_id\": 133, \"forum_id\": 10, \"post_subject\": \"Re: Runtime error using KJV example from Web.\", \"username\": \"Allan\", \"post_text\": \"Hi Moe,\\n\\nYes - great error message isn't it \\n\\nYou'll find there are other equally obscure error messages as you move on, you'll not home yet!\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-10-17 15:24:07\" },\n\t{ \"post_id\": 469, \"topic_id\": 133, \"forum_id\": 10, \"post_subject\": \"Re: Runtime error using KJV example from Web.\", \"username\": \"moewin\", \"post_text\": \"Hello Allan,\\n\\n You right Allan " I suspect your filename for module File_KJV is not the same. i.e. your file holding this code must be 'File_KJV.ecl'"\\n\\nThank you for you help
\", \"post_time\": \"2011-10-17 14:04:02\" },\n\t{ \"post_id\": 465, \"topic_id\": 133, \"forum_id\": 10, \"post_subject\": \"Re: Runtime error using KJV example from Web.\", \"username\": \"Allan\", \"post_text\": \"Hi Moe,\\n\\nYou seem to be going though the same pain barrier as me.\\n\\nTo begin with you find yourself juggling these type of errors!\\n\\nI suspect your filename for module File_KJV is not the same. i.e. your file holding this code must be 'File_KJV.ecl'\\n\\nI cut a pasted your entire program into my File_KJV project and it passed the syntax checker.\\n\\nI then cut and pasted it into a file called 'File_KJVX' and I got errors:\\n
\\nError: Definition must contain EXPORT or SHARED value for file_kjvx (1, 0), 3,
\\nThen I changed your module name to:\\n\\nEXPORT File_KJVX := MODULE\\n
\\n\\nAnd hay presto - No Errors!\\n\\nIn earlier forum notes, people from LN have admitted a deficiency in the documentation and are correcting it.\\n\\nI hope this helps.\\nYours\\n\\nAllan\", \"post_time\": \"2011-10-17 07:27:35\" },\n\t{ \"post_id\": 464, \"topic_id\": 133, \"forum_id\": 10, \"post_subject\": \"Re: Runtime error using KJV example from Web.\", \"username\": \"moewin\", \"post_text\": \"Hello Everyone,\\n@ Allan .. I'm happy for you finish the KJV example .I 'm facing with \\n\\nError: Definition must contain EXPORT or SHARED value for module_test (1, 0), 3, \\n\\nthis error .. \\nimport Std.Str AS *;\\n\\nEXPORT File_KJV := MODULE\\n\\nR := RECORD\\n \\tSTRING Txt;\\n END;\\n\\t\\n\\nd := DATASET('kjv_text',R,CSV(SEPARATOR('')));\\n\\nR TextOntoReference(R le, R ri) := TRANSFORM\\n SELF.Txt := le.Txt + ' ' + ri.Txt;\\n END;\\t\\n\\t\\n\\nRld := ROLLUP(d,LEFT.Txt[1]='$' AND RIGHT.Txt[1]<>'$',TextOntoReference(LEFT,RIGHT));\\n\\nUNSIGNED GetChapter(STRING S) := (UNSIGNED)S[1..Find(S,':',1)-1];\\nUNSIGNED GetVerse(STRING S) := (UNSIGNED)S[Find(S,':',1)+1..];\\n\\nR2 := RECORD\\n \\tSTRING5 Book := GetNthWord(Rld.Txt,2);\\n \\tUNSIGNED1 Booknum := 0;\\n\\tUNSIGNED1 Chapter := GetChapter(GetNthWord(Rld.Txt,3));\\n\\tUNSIGNED1 Verse := GetVerse(GetNthWord(Rld.Txt,3));\\n\\tSTRING Verse_Text := Rld.Txt[Find(Rld.Txt,' ',3)+1..];\\n END;\\n\\t\\n\\nt := TABLE(Rld,R2);\\n\\nR2 NoteBookNum(R2 le, R2 ri) := TRANSFORM\\n SELF.Booknum := IF( le.book=ri.book, le.booknum, le.booknum+1 );\\n SELF := ri;\\n END;\\n\\nEXPORT Txt := ITERATE(t,NoteBookNum(LEFT,RIGHT));\\nEXPORT Key := INDEX(Txt,{BookNum,Chapter,Verse},{Verse_Text},'~key::kjv_txt');\\nEXPORT Bld := BUILDINDEX(Key,OVERWRITE);\\n\\nEXPORT Layout_Reference := RECORD\\n UNSIGNED1 BookNum;\\n\\tUNSIGNED1 Chapter;\\n\\tUNSIGNED1 Verse;\\n END;\\n\\nEXPORT Get(UNSIGNED1 pBookNum,UNSIGNED1 pChapter, UNSIGNED1 pVerse) := FUNCTION\\n\\tRETURN Key(BookNum=pBookNum,pChapter=0 OR pChapter=Chapter,pVerse=0 OR pVerse=Verse);\\n END;\\n\\t\\n\\nEXPORT GetBatch(DATASET(Layout_Reference) refs) := FUNCTION\\n\\tRETURN JOIN(refs,Key,LEFT.BookNum=RIGHT.BookNum AND LEFT.Chapter=RIGHT.Chapter AND LEFT.Verse=RIGHT.Verse,TRANSFORM(RIGHT));\\n END;\\n\\t\\n\\n END;\\t
\\n\\nThank You ,\\nMoe\", \"post_time\": \"2011-10-16 15:58:45\" },\n\t{ \"post_id\": 463, \"topic_id\": 133, \"forum_id\": 10, \"post_subject\": \"Re: Runtime error using KJV example from Web.\", \"username\": \"Allan\", \"post_text\": \"Hi Everyone,\\n\\nAfter a slight delay re-installing HPCC and VMWare after a forced shutdown. \\nI've finally got the KJV example to work.\\n\\nI created a file that actually actions the index build onto thor.\\nI also added the construction of the inverted index as I thought that would also be necessary but was not sure if the inverted file should reside with the search engine on roxie, not thor. So I now have:\\n
\\n$.File_KJV.Bld;\\n$.Inversion.Bld;\\n
\\n\\nSo Thank you very all very much for your help.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-10-15 19:54:17\" },\n\t{ \"post_id\": 452, \"topic_id\": 133, \"forum_id\": 10, \"post_subject\": \"Re: Runtime error using KJV example from Web.\", \"username\": \"dabayliss\", \"post_text\": \"@Joe: Thanks you beat me to the answer\\n\\n@Alan: Good catch on the ';' - unfortunately 'cutting and pasting' code from an editor into an html file so that is looks 'a bit like code' and is still 'cut & pasteable' - involves quite a bit of manual editing - a gremlin clearly crept into the process!\\n\\nI have tweaked the pages in question - hopefully everything is a little clearer now.\", \"post_time\": \"2011-10-13 13:06:29\" },\n\t{ \"post_id\": 451, \"topic_id\": 133, \"forum_id\": 10, \"post_subject\": \"Re: Runtime error using KJV example from Web.\", \"username\": \"joecella\", \"post_text\": \"You have the attribute that defines how to build the index, but you have not execucted the action to build the index. In a seperate editor window, you need to submit the following to thor:\\n\\nFile_KJV.Bld;\", \"post_time\": \"2011-10-13 13:01:28\" },\n\t{ \"post_id\": 448, \"topic_id\": 133, \"forum_id\": 10, \"post_subject\": \"Re: Runtime error using KJV example from Web.\", \"username\": \"Allan\", \"post_text\": \"The index is built.\\nA mote complete snippet from the same KJV example is:\\n\\n\\nEXPORT Key := INDEX(Records,{STRING20 Wrd := Word,BookNum,Chapter,Verse,Word_Pos},'~key::kjv_inversion');\\nEXPORT Bld := BUILDINDEX(key,OVERWRITE);\\n
\\n\\nSo I'm still not clear as to what's wrong.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-10-12 17:33:00\" },\n\t{ \"post_id\": 447, \"topic_id\": 133, \"forum_id\": 10, \"post_subject\": \"Re: Runtime error using KJV example from Web.\", \"username\": \"Allan\", \"post_text\": \"Thanks again for the prompe reply.\\n\\n There is also a small typo in the complete example at the end of 'Bible Part 2'\\n\\nSHARED R := RECORD\\n UNSIGNED1 BookNum := 0\\n UNSIGNED1 Chapter := 0;\\n UNSIGNED1 Verse := 0;\\n UNSIGNED1 Word_Pos := 0;\\n STRING Word := '';\\n END;\\n
\\n\\nThere is a missing ';' so I don't know how it got past your syntax checker given the accompanying text says it 'cut and pasteable'.\\n\\nThought you better know.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-10-12 15:58:23\" },\n\t{ \"post_id\": 445, \"topic_id\": 133, \"forum_id\": 10, \"post_subject\": \"Re: Runtime error using KJV example from Web.\", \"username\": \"dabayliss\", \"post_text\": \"Richard,\\n\\nYep, my bad - this is why I normally leave the docs to you!\\nI told them they had to write the function to build the index - I didn't actually tell them they had to call it!\\n\\nAlan - sorry - you need to invoke the Bld ... (I was so busy girding up to try to explain GRAPH that I skipped the 'obvious' <if you've been doing it for a decade>)\\n\\nDavid\", \"post_time\": \"2011-10-12 15:15:06\" },\n\t{ \"post_id\": 444, \"topic_id\": 133, \"forum_id\": 10, \"post_subject\": \"Re: Runtime error using KJV example from Web.\", \"username\": \"richard.taylor@lexisnexis.com\", \"post_text\": \"You have the INDEX defintion, but did you create it (using the BUILD action)? \\n\\nThe INDEX declaration spoecifies that there IS a file on disk, but it has to have been created first to use it, and that's what BUILD is for.\", \"post_time\": \"2011-10-12 15:00:02\" },\n\t{ \"post_id\": 442, \"topic_id\": 133, \"forum_id\": 10, \"post_subject\": \"Runtime error using KJV example from Web.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nIn an attempt to learn ECL I've implemented the example project from:\\n\\nhttp://www.dabhand.org/ECL/construct_a_ ... search.htm\\n\\nand\\n\\nhttp://www.dabhand.org/ECL/construct_a_ ... archII.htm\\n\\nThis I've got to compile and published both the construction of the Bibliographic data (1st link above) and the inverted file (from the 2nd link)\\nHowever when I submit the following query:\\n\\n\\ns := $.Inversion.Search('LOVE OF GOD');\\n$.File_KJV.GetBatch(s);\\n
\\n\\nif the 'target' is 'thor' I get error:\\nError: System error: 10001: Graph[24], Missing logical file key::kjv_inversion\\n (0, 0), 10001, \\n
\\nSubmitting and publishing to 'roxie' seems to succeeds but then the error is just delayed till an attempt is made to run it, when:\\nQuery query.2 is suspended because Could not resolve filename key::kjv_inversion \\n
\\n\\nis generated. (Its actually bad English)\\n\\nkey::kjv_inversion is defined (I believe) in the construction of the inverted index.\\nEXPORT Key := INDEX(Records,{STRING20 Wrd := Word,BookNum,Chapter,Verse,Word_Pos},'~key::kjv_inversion');\\n
\\n\\nAny pointers as to what might be going wrong?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-10-12 12:47:22\" },\n\t{ \"post_id\": 500, \"topic_id\": 140, \"forum_id\": 10, \"post_subject\": \"Re: MACRO - already defined\", \"username\": \"bforeman\", \"post_text\": \"I posted a Code Snippet a couple of weeks ago that highlights this new structure.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2011-10-20 14:02:45\" },\n\t{ \"post_id\": 499, \"topic_id\": 140, \"forum_id\": 10, \"post_subject\": \"Re: MACRO - already defined\", \"username\": \"aintnomyth\", \"post_text\": \"Wow that's a nice piece of new functionality, great work!\", \"post_time\": \"2011-10-20 13:58:19\" },\n\t{ \"post_id\": 498, \"topic_id\": 140, \"forum_id\": 10, \"post_subject\": \"Re: MACRO - already defined\", \"username\": \"ghalliday\", \"post_text\": \"Another alternative is to use a relatively new feature: FUNCTIONMACRO (I'm not sure I like the keyword). It acts like defining a function, so you can you can defined local symbols that don't clash with symbols that already exist. It also has the benefit of behaving a lot more like a normal attribute.\\n\\nSo your example would become:\\n\\nGetPersonDetails(RecType, DS1, DS2, fieldName) := FUNCTIONMACRO\\nRecType tJoin(DS1 L, DS2 R) := transform\\n SELF.contact := R.fieldName;\\n SELF := L;\\nEND;\\n\\nRETURN JOIN(DS1, DS2, LEFT.id = RIGHT.id, tJoin(LEFT, RIGHT));\\n
\\n\\nWhich you can then use in a normal way:\\n\\n\\noutDs := GetPersonDetails(outRec, ds1, ds2, myField);\\n
\", \"post_time\": \"2011-10-20 08:17:42\" },\n\t{ \"post_id\": 497, \"topic_id\": 140, \"forum_id\": 10, \"post_subject\": \"Re: MACRO - already defined\", \"username\": \"sameermsc\", \"post_text\": \"Bob, Thanks for pointing out that i need to use template symbol and #UNIQUENAME\\n\\nI was trying with %symbol% but without #UNIQUENAME template and got few errors like "No XML Scope Active etc., "\\nusing both %symbol% and #UNIQUENAME it worked fine\\n\\nbelow are the changes (in Bold) to the MACRO that worked without error\\n\\nGetPersonDetails(outDS, RecType, DS1, DS2, fieldName) := MACRO\\n\\t#UNIQUENAME(tJoin)\\n\\tRecType %tJoin%(DS1 L, DS2 R) := transform\\n\\t\\tSELF.contact := R.fieldName;\\n\\t\\tSELF := L;\\n\\tEND;\\n\\t\\n\\toutDS := JOIN(DS1, DS2, LEFT.id = RIGHT.id, %tJoin%(LEFT, RIGHT));\\n\\nENDMACRO;\", \"post_time\": \"2011-10-20 01:57:51\" },\n\t{ \"post_id\": 490, \"topic_id\": 140, \"forum_id\": 10, \"post_subject\": \"Re: MACRO - already defined\", \"username\": \"bforeman\", \"post_text\": \"See the doc on #UNIQUENAME and template symbols. This technique allow you to call multiple instances of the same MACRO in the same ECL File. For example:\\n\\nEXPORT MAC_Field_Cardinality(infile,infield) := MACRO\\n\\n#UNIQUENAME(t_macro)\\n%t_macro% := TABLE(infile,{infile.infield});\\n#UNIQUENAME(dt_macro)\\n%dt_macro% := DISTRIBUTE(%t_macro%,HASH32(infield));\\n#UNIQUENAME(sdt_macro)\\n%sdt_macro% := SORT(%dt_macro%,infield,LOCAL);\\n#UNIQUENAME(dsdt_macro)\\n%dsdt_macro% := DEDUP(%sdt_macro%,infield,LOCAL);\\n\\nOUTPUT(COUNT(%dsdt_macro%),NAMED('Cardinality_'+ #TEXT(infield)));\\n// COUNT(%dsdt_macro%);\\n\\nENDMACRO;\\n\\nSo I would guess that tjoin needs to be made %tjoin% and a #UNIQUENAME(tjoin) just above it.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2011-10-19 18:18:30\" },\n\t{ \"post_id\": 488, \"topic_id\": 140, \"forum_id\": 10, \"post_subject\": \"MACRO - already defined\", \"username\": \"sameermsc\", \"post_text\": \"I have two datasets and i want to join them to create two new datasets with same record structure, with the exception that the first one will have a email while the second one will have phone as a value of contact field (output record structure)\\n\\nI have defined a macro to perform the join, to meet the above requirement i pass the field name to be assigned to contact as an argument of macro. macro works fine for the first time but when i invoke it for the second time with a different field name i get an Error\\n\\n\\nError: Identifier 'tjoin' is already defined (27, 11), 2143, \\nError: While expanding macro getpersondetails (39, 87), 2143, \\n\\nHow to avoid the error (already defined)? \\n\\nBelow is the sample code for reference\\n\\n\\nperson := Record\\n\\tinteger2 id;\\n\\tstring10 firstname;\\n\\tstring10 lastname;\\nEnd;\\n\\ncommunication := Record\\n\\tinteger2 id;\\n\\tstring50 email;\\n\\tstring50 phone;\\nEnd;\\n\\n\\npersondetails := Record\\n\\tinteger2 id;\\n\\tstring10 firstname;\\n\\tstring10 lastname;\\n\\tstring50 contact;\\nEnd;\\n\\n\\nds_person := DATASET([{1, 'ravi', 'kiran'}, {2, 'shyam', 'sundar'}], person);\\nds_communication := DATASET([{1, 'ravi@abc.com', '123456789'}, {2, 'shyam@abc.com', '234567890'}], communication);\\n\\n\\nGetPersonDetails(outDS, RecType, DS1, DS2, fieldName) := MACRO\\n\\tRecType tJoin(DS1 L, DS2 R) := transform\\n\\t\\tSELF.contact := R.fieldName;\\n\\t\\tSELF := L;\\n\\tEND;\\n\\t\\n\\toutDS := JOIN(DS1, DS2, LEFT.id = RIGHT.id, tJoin(LEFT, RIGHT));\\n\\nENDMACRO;\\n\\nGetPersonDetails(ds_personDetails1, persondetails, ds_person, ds_communication, email);\\nOUTPUT(ds_personDetails1);\\n\\nGetPersonDetails(ds_personDetails2, persondetails, ds_person, ds_communication, phone);\\nOUTPUT(ds_personDetails2);\", \"post_time\": \"2011-10-19 17:58:18\" },\n\t{ \"post_id\": 587, \"topic_id\": 156, \"forum_id\": 10, \"post_subject\": \"Re: deploy ECL query\", \"username\": \"clo\", \"post_text\": \"In order to check the status of a Roxie Cluster,\\n\\n1. Navigate on EclWatch to the Topology section of the left hand side navigation panel\\n2. Click on Target Clusters \\n3. From the list of clusters, select the check-box to the left of the section for your roxie cluster\\n-\\tEnsure that Action: lists ‘Machine Information’\\n o Leave all the options selected by default checked\\n-\\tClick on the [Submit] button and the system information will be displayed.\\n\\nRestarting a Roxie Cluster:\\n\\n-\\tOn a node where the required package was installed, run this on the command terminal:\\nsudo -u hpcc /opt/HPCCSystems/sbin/hpcc-run.sh -a hpcc-init -c roxie restart\\n\\n\\nI hope this helps.\", \"post_time\": \"2011-11-07 15:48:32\" },\n\t{ \"post_id\": 584, \"topic_id\": 156, \"forum_id\": 10, \"post_subject\": \"deploy ECL query\", \"username\": \"gopi\", \"post_text\": \"Hi,\\n\\nWhen i doing deploy ECL query in roxie_esp, i am getting the error message.\\nError : "Request Failed! All roxie nodes unable to process this request at this time. Roxie is busy - possibly in the middle of another deployment. Try again later, \\nif problem persists, make sure all nodes are running"\\n\\nIs there any option to reset the Roxie or to look which are the service deploying?\\n\\nPlease help me any one\", \"post_time\": \"2011-11-07 06:30:31\" },\n\t{ \"post_id\": 595, \"topic_id\": 157, \"forum_id\": 10, \"post_subject\": \"Re: Issue with XML example from 'ECL Programmers Guide'\", \"username\": \"Rob Pelley\", \"post_text\": \"Gavin, thanks for the explanation ... I'd been stuck on this one too.\", \"post_time\": \"2011-11-11 17:23:36\" },\n\t{ \"post_id\": 593, \"topic_id\": 157, \"forum_id\": 10, \"post_subject\": \"Re: Issue with XML example from 'ECL Programmers Guide'\", \"username\": \"ghalliday\", \"post_text\": \"The problem is to do with the way the results are displayed in the ECLIDE.\\n\\nThe result generates <n> rows, but <n>+3 lines of text to make up the xml (becuase of the header and footer). The IDE is only reading <n> lines, which means the last few lines are missing from the display.\\n\\nI'm not sure yet which piece of code needs to change, but at least the XML file will be generated correctly.\", \"post_time\": \"2011-11-11 15:16:52\" },\n\t{ \"post_id\": 590, \"topic_id\": 157, \"forum_id\": 10, \"post_subject\": \"Re: Issue with XML example from 'ECL Programmers Guide'\", \"username\": \"bforeman\", \"post_text\": \"Will do Gavin, thank you!\", \"post_time\": \"2011-11-10 14:17:49\" },\n\t{ \"post_id\": 589, \"topic_id\": 157, \"forum_id\": 10, \"post_subject\": \"Re: Issue with XML example from 'ECL Programmers Guide'\", \"username\": \"ghalliday\", \"post_text\": \"Bob, can you email me an archive of the query that goes wrong and I will investigate.\", \"post_time\": \"2011-11-10 14:15:59\" },\n\t{ \"post_id\": 588, \"topic_id\": 157, \"forum_id\": 10, \"post_subject\": \"Re: Issue with XML example from 'ECL Programmers Guide'\", \"username\": \"bforeman\", \"post_text\": \"Hi Allan,\\n\\nThe reason in the distored output lies in the XML Flag in the OUTPUT. As a test, I removed that section and just tested with:\\n\\nOUTPUT(InterestingRecs,{code,timezone});\\n\\n...and the results are spot on. So there is something wrong in the XML output satatement, haven't found it yet...\", \"post_time\": \"2011-11-07 20:12:52\" },\n\t{ \"post_id\": 585, \"topic_id\": 157, \"forum_id\": 10, \"post_subject\": \"Issue with XML example from 'ECL Programmers Guide'\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI'm working though the XML section of said guide, and the example:\\n\\nInterestingRecs := StrippedRecs((INTEGER)code BETWEEN 301 AND 303);\\nOUTPUT(InterestingRecs,{code,timezone},\\n\\t '~PROGGUIDE::EXAMPLEDATA::OUT::timezones300',\\n\\t\\t\\t XML('area',HEADING('<?xml version=1.0 ...?>\\\\n<timezones>\\\\n',\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t'</timezones>')),OVERWRITE);\\n
\\nis only returning two records 301 and 302.\\nIf I try other bounds I get 303 back plus other rows but you can see from the results below something weird is going on.\\n\\nBounds Rows returned\\n301-303 301-302\\n301-304 301-302\\n301-305 301-303\\n301-306 301-304\\n301-307 301-305\\n301-308 301-306\\n301-309 301-307\\n301-314 301-312\\n
\\n\\nCould someone please explain this?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-11-07 12:36:05\" },\n\t{ \"post_id\": 660, \"topic_id\": 163, \"forum_id\": 10, \"post_subject\": \"Re: MERGE applied to a grouped dataset\", \"username\": \"ghalliday\", \"post_text\": \"It is probably an example of a query which probably isn't necessary.\\n\\nIt is because a MERGE operation doesn't preserve any grouping in the incomming datasets - so feeding a grouped dataset in is a bit strange. However all the engines should ignore any grouping in the input datasets.\\n\\nI'll remove it since it cause more confusion than help.\", \"post_time\": \"2011-11-29 20:54:27\" },\n\t{ \"post_id\": 653, \"topic_id\": 163, \"forum_id\": 10, \"post_subject\": \"Re: MERGE applied to a grouped dataset\", \"username\": \"bforeman\", \"post_text\": \"I can't speak for the compiler guru, but generally a warning implies that MERGE probably wasn't intended for a grouped dataset, but it will still attempt to perform the operation, and there is a chance that it may run a bit slower than intended.\\n\\nBut warnings are usually harmless and can be safely ignored.\\n\\nHope this helps!\\n\\nBob\", \"post_time\": \"2011-11-29 19:38:47\" },\n\t{ \"post_id\": 623, \"topic_id\": 163, \"forum_id\": 10, \"post_subject\": \"MERGE applied to a grouped dataset\", \"username\": \"aintnomyth\", \"post_text\": \""Warning: MERGE applied to a grouped dataset - was this intended?"\\n\\nI get this warning when I use MERGE on a grouped dataset. Is this something I should worry about? What is the underlying intent of the warning?\", \"post_time\": \"2011-11-21 15:55:32\" },\n\t{ \"post_id\": 668, \"topic_id\": 169, \"forum_id\": 10, \"post_subject\": \"Re: Problem with 'Query Library' example from ECL Programmer\", \"username\": \"ghalliday\", \"post_text\": \"The easiest solution is to have the definition of the library in one file, the build statement in a second, and the code that uses the library in a third.\\n\\nIf you want to export multiple items from a single file/definition then that definition needs to return a module.\", \"post_time\": \"2011-11-30 09:34:37\" },\n\t{ \"post_id\": 664, \"topic_id\": 169, \"forum_id\": 10, \"post_subject\": \"Re: Problem with 'Query Library' example from ECL Programmer\", \"username\": \"Allan\", \"post_text\": \"Hi Everyone,\\n\\nI was attempting to use the library function from another module but the other module did not know about 'FilterDsLib'.\\n\\nAnd putting 'EXPORT' before the 'FilterDsLib' definition caused an error there.\\n\\nWarning: Definition exquerylibrary cannot define a public symbol filterdslib (21, 1), 2380, \\n
\\n\\nHowever I'm now on day 2 of the ECL course and the mists of scoping are beginning to clear, so I'll re-look at this example after the course.\\n\\nThanks for your help.\\n\\nAllan\", \"post_time\": \"2011-11-29 23:03:25\" },\n\t{ \"post_id\": 661, \"topic_id\": 169, \"forum_id\": 10, \"post_subject\": \"Re: Problem with 'Query Library' example from ECL Programmer\", \"username\": \"ghalliday\", \"post_text\": \"Yes, the BUILD() statement to create the library needs to be in a separate query from the query that uses it.\\n\\nIf you're using the form of LIBRARY that provides the implementation then you don't need the BUILD statement.\\n\\nI'll add an issue (1074) to git hub to improve the error handling.\", \"post_time\": \"2011-11-29 21:03:37\" },\n\t{ \"post_id\": 655, \"topic_id\": 169, \"forum_id\": 10, \"post_subject\": \"Re: Problem with 'Query Library' example from ECL Programmer\", \"username\": \"bforeman\", \"post_text\": \"Hi Allan,\\n\\nYes, I believe that you would need to move that OUTPUT action to a separate builder window. Are you getting an error that says something like "Action cannot follow an EXPORTed module" ?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2011-11-29 19:50:16\" },\n\t{ \"post_id\": 633, \"topic_id\": 169, \"forum_id\": 10, \"post_subject\": \"Problem with 'Query Library' example from ECL Programmers gu\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI'm just working though the 'Query Library' examples from the ECL Programmers guide and am triggering an assertion from the underlying C++.\\n\\n1st the code I'm trying to use.\\n\\nnamesRec := RECORD\\n INTEGER1 NameID;\\n STRING20 FName;\\n STRING20 LName;\\nEND;\\n\\n// Define the filter parameters\\nIFilterArgs :=INTERFACE\\n EXPORT DATASET(namesRec) ds;\\n EXPORT STRING search;\\nEND;\\n\\n// define the interface to the library.\\nFilterLibIFace(IFilterArgs args) := INTERFACE\\n EXPORT DATASET(namesRec) matches;\\n EXPORT DATASET(namesRec) others;\\nEND;\\n\\n// Thats the interface delt with, now for the implementation of the library.\\nFilterDsLib(IFilterArgs args) := MODULE,LIBRARY(FilterLibIFace)\\n EXPORT matches := args.ds(LName = args.search);\\n EXPORT others := args.ds(LName <>args.search);\\nEND;\\n\\n#WORKUNIT('Name','IPass.FilterDsLib');\\nBUILD(FilterDsLib);\\n\\nnamesTable := DATASET([ {1,'Doc','Holliday'},\\n {2,'Liz','Taylor'},\\n\\t\\t\\t{3,'Mr','Nobody'} ],namesRec);\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\nSearchArgs := MODULE(IFilterArgs)\\n EXPORT DATASET(namesRec) ds := namesTable;\\n\\t EXPORT STRING search := 'Holliday';\\nEND;\\n\\nlib1 := LIBRARY(INTERNAL(FilterDsLib),FilterLibIFace(SearchArgs));\\nOUTPUT(lib1.matches);\\n
\\n\\nThis generates error:\\n\\nError: Object cannot be used as an action (27, 1), 2100, \\nError: assert(expr1->queryType()->getTypeCode() == type_void) failed - file: ..\\\\..\\\\..\\\\..\\\\HPCC-Platform\\\\ecl\\\\hql\\\\hqlexpr.cpp, line 11523 (0, 0), 3000, \\n
\\n\\nI can't see what I'm doing wrong, any idea's anyone?\\n\\nTwo things to note, I'm only doing a syntax check, I've not submitted this example. Secondly if I comment out the 'OUTPUT' at the end the syntax check goes though with no errors.\\n\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-11-26 19:43:30\" },\n\t{ \"post_id\": 652, \"topic_id\": 170, \"forum_id\": 10, \"post_subject\": \"Re: How can I retrieve a record from a table\", \"username\": \"bforeman\", \"post_text\": \"Hi,\\n\\nHave you tried:\\n\\nAuto := TableType[1].Y2009 \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2011-11-29 19:32:04\" },\n\t{ \"post_id\": 634, \"topic_id\": 170, \"forum_id\": 10, \"post_subject\": \"How can I retrieve a record from a table\", \"username\": \"mbarrios78\", \"post_text\": \"I need to use a record from a table to use it in a calculation.\\n\\nThis is my code:\\nIMPORT $;\\n\\n\\tStateLayout := RECORD\\n\\t$.VehiclesByState.State;\\n\\t$.VehiclesByState.Y2009;\\n\\tEND;\\n\\t\\n TableState := TABLE($.VehiclesByState, StateLayout);\\n\\t\\n\\tTypeLayout := RECORD\\n\\t$.VehiclesTypePercentage.Y2009;\\n\\tEND;\\n\\n\\tTableType := TABLE ($.VehiclesTypePercentage, TypeLayout);\\n Auto := TableType[1];\\n\\tBus := TableType[2];\\n\\tTruck := TableType[3];\\n\\n\\n\\tVehiclesTypeLayout := RECORD\\n\\tTableState.State;\\n\\tDECIMAL10_7 Automobiles :=0;\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\tEND;\\n\\t\\n\\tVehiclesTypeLayout Layout1(StateLayout B) := TRANSFORM\\n\\tSELF.State := B.State;\\n\\tSELF.Automobiles := (B.Y2009*Auto)/(100);\\n\\tEND;\\nI am trying to use Auto := TableType[1]; but it is not just a record is a table with that single record. How do I get the actual data in that record.\\n\\nThank you very much.\", \"post_time\": \"2011-11-27 04:26:55\" },\n\t{ \"post_id\": 683, \"topic_id\": 172, \"forum_id\": 10, \"post_subject\": \"Re: Problem running Cartesian.ECL example from Prog Guide\", \"username\": \"bforeman\", \"post_text\": \"That did the trick Richard, thanks!\\n\\nBob\", \"post_time\": \"2011-11-30 16:30:54\" },\n\t{ \"post_id\": 681, \"topic_id\": 172, \"forum_id\": 10, \"post_subject\": \"Re: Problem running Cartesian.ECL example from Prog Guide\", \"username\": \"rtaylor\", \"post_text\": \"I think it is simply a matter of too much data for the size of cluster you are running the code one. Since this is just example code, try editing it so that this code:\\n\\n\\nInds1 := DATASET([{'A'},{'B'},{'C'},{'D'},{'E'},\\n\\t\\t{'F'},{'G'},{'H'},{'I'},{'J'},\\n\\t\\t{'K'},{'L'},{'M'},{'N'},{'O'},\\n\\t\\t{'P'},{'Q'},{'R'},{'S'},{'T'},\\n\\t\\t{'U'},{'V'},{'W'},{'X'},{'Y'}],\\n\\t rec);\\nInds2 := DATASET([{'A'},{'B'},{'C'},{'D'},{'E'},\\n\\t\\t {'F'},{'G'},{'H'},{'I'},{'J'},\\n\\t\\t {'K'},{'L'},{'M'},{'N'},{'O'},\\n\\t \\t {'P'},{'Q'},{'R'},{'S'},{'T'},\\n\\t\\t {'U'},{'V'},{'W'},{'X'},{'Y'}],\\n\\t\\t rec);\\n
\\n\\nbecomes this:\\n\\nInds1 := DATASET([{'A'},{'B'},{'C'},{'D'},{'E'},\\n {'U'},{'V'},{'W'},{'X'},{'Y'}],\\n\\t\\t rec);\\nInds2 := DATASET([{'A'},{'B'},{'C'},{'D'},{'E'},\\n {'U'},{'V'},{'W'},{'X'},{'Y'}],\\n\\t\\t rec);\\n
\\n\\nWith less data it should run fairly quickly.\", \"post_time\": \"2011-11-30 15:46:51\" },\n\t{ \"post_id\": 656, \"topic_id\": 172, \"forum_id\": 10, \"post_subject\": \"Re: Problem running Cartesian.ECL example from Prog Guide\", \"username\": \"bforeman\", \"post_text\": \"Hi Allan,\\n\\nI am seeing the same thing. Will investigate and determine if it is a code issue or a graph/system bug. Thanks for the report!\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2011-11-29 19:58:34\" },\n\t{ \"post_id\": 639, \"topic_id\": 172, \"forum_id\": 10, \"post_subject\": \"Problem running Cartesian.ECL example from Prog Guide\", \"username\": \"Allan\", \"post_text\": \"Hi I've run the 'Cartesian.ECL' example code shipped under the 'ProgrammersGuide' directory.\\n\\nI've not altered the code yet the 3rd graph runs for 17 minutes then fails.\\nSee the eclagent.log below:\\n\\n00000000 2011-11-28 16:04:26 5592 5592 ECLAGENT build community_3.2.2-1\\n00000001 2011-11-28 16:04:26 5592 5592 Waiting for workunit lock\\n00000002 2011-11-28 16:04:26 5592 5592 Obtained workunit lock\\n00000003 2011-11-28 16:04:26 5592 5592 Loading dll (libW20111128-160424.so) from location /var/lib/HPCCSystems/myeclccserver/libW20111128-160424.so\\n00000004 2011-11-28 16:04:26 5592 5592 Starting process\\n00000005 2011-11-28 16:04:26 5592 5592 RoxieMemMgr: Setting memory limit to 314572800 bytes (300 pages)\\n00000006 2011-11-28 16:04:26 5592 5592 RoxieMemMgr: 320 Pages successfully allocated for the pool - memsize=335544320 base=0x9d900000 alignment=1048576 bitmapSize=10\\n00000007 2011-11-28 16:04:26 5592 5592 Waiting for run lock\\n00000008 2011-11-28 16:04:26 5592 5592 Obtained run lock\\n00000009 2011-11-28 16:04:26 5592 5592 Enqueuing on thor.thor to run wuid=W20111128-160424, graph=graph1, timelimit=600 seconds, priority=0\\n0000000A 2011-11-28 16:04:26 5592 5592 Thor on 192.168.147.128:6500 running W20111128-160424\\n0000000B 2011-11-28 16:04:27 5592 5592 Enqueuing on thor.thor to run wuid=W20111128-160424, graph=graph2, timelimit=600 seconds, priority=0\\n0000000C 2011-11-28 16:04:27 5592 5592 Thor on 192.168.147.128:6500 running W20111128-160424\\n0000000D 2011-11-28 16:04:27 5592 5592 Enqueuing on thor.thor to run wuid=W20111128-160424, graph=graph3, timelimit=600 seconds, priority=0\\n0000000E 2011-11-28 16:04:27 5592 5592 Thor on 192.168.147.128:6500 running W20111128-160424\\n0000000F 2011-11-28 16:21:39 5592 5592 ERROR: 3000: Graph[21], normalize[23]: SLAVE 192.168.147.128:6600: assert(started()) failed - file: /var/jenkins/workspace/Release-3.2.2/src/thorlcr/activities/./../activities/thactivityutil.ipp, line 212 (in item 1)\\n00000010 2011-11-28 16:21:39 5592 5592 Releasing run lock\\n00000011 2011-11-28 16:21:39 5592 5592 System error: 3000: Graph[21], normalize[23]: SLAVE 192.168.147.128:6600: assert(started()) failed - file: /var/jenkins/workspace/Release-3.2.2/src/thorlcr/activities/./../activities/thactivityutil.ipp, line 212\\n00000012 2011-11-28 16:21:39 5592 5592 3000: System error: 3000: Graph[21], normalize[23]: SLAVE 192.168.147.128:6600: assert(started()) failed - file: /var/jenkins/workspace/Release-3.2.2/src/thorlcr/activities/./../activities/thactivityutil.ipp, line 212\\n00000013 2011-11-28 16:21:39 5592 5592 Process complete\\n00000014 2011-11-28 16:21:39 5592 5592 Workunit written complete\\n
\\n\\nAny idea's what's wrong?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-11-28 16:47:55\" },\n\t{ \"post_id\": 650, \"topic_id\": 173, \"forum_id\": 10, \"post_subject\": \"Re: Join operation - pre dist/sort/local vs auto/hash\", \"username\": \"dabayliss\", \"post_text\": \"Well - first an observation - whenever I am called in to optimize a piece of code - my first step is usually to remove all the distribute and local statements - the code nearly always runs faster afterwards. The compiler has a global view of things - it can nearly always do a better job than you can.\\n\\nThere are two principle exceptions to this:\\n\\n1) You are someone numerically generating data and happen to 'know' that it will cause it to be distributed a particular way - even though no distributing is happening\\n\\n2) You have apriori knowledge that a SEQUENCE of operations will all benefit from having data distributed one particular way\\n\\nDavid\", \"post_time\": \"2011-11-29 16:24:39\" },\n\t{ \"post_id\": 641, \"topic_id\": 173, \"forum_id\": 10, \"post_subject\": \"Join operation - pre dist/sort/local vs auto/hash\", \"username\": \"aintnomyth\", \"post_text\": \"From a performance perspective, when is it better to feed the JOIN operation distributed and sorted datasets with the LOCAL keyword vs allowing the optimizer to distribute and or hash the data on its own?\", \"post_time\": \"2011-11-28 18:46:16\" },\n\t{ \"post_id\": 659, \"topic_id\": 174, \"forum_id\": 10, \"post_subject\": \"Re: DENORMALIZE with GROUP on THOR\", \"username\": \"bforeman\", \"post_text\": \"No, thank YOU for the report! \\nI will pass this on to the documentation team. \\n\\nBob\", \"post_time\": \"2011-11-29 20:25:14\" },\n\t{ \"post_id\": 658, \"topic_id\": 174, \"forum_id\": 10, \"post_subject\": \"Re: DENORMALIZE with GROUP on THOR\", \"username\": \"aintnomyth\", \"post_text\": \"Thanks for the reply, it does work on THOR.\", \"post_time\": \"2011-11-29 20:19:01\" },\n\t{ \"post_id\": 657, \"topic_id\": 174, \"forum_id\": 10, \"post_subject\": \"Re: DENORMALIZE with GROUP on THOR\", \"username\": \"bforeman\", \"post_text\": \"It's possible that the compiler has been updated to run GROUP with DENORMALIZE on THOR, did you try it?\\n\\nIf so we will need to update the documentation. Thanks for your report.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2011-11-29 20:17:46\" },\n\t{ \"post_id\": 642, \"topic_id\": 174, \"forum_id\": 10, \"post_subject\": \"DENORMALIZE with GROUP on THOR\", \"username\": \"aintnomyth\", \"post_text\": \"I've been using the DENORMALIZE operation instead of a JOIN for certain 1:M scenarios where the output transform aggregates info in the RIGHT dataset producing 1 record instead of M records. \\n\\nEverything works great so far but I noticed this section in the documentation on DENORMALIZE so I'm wondering if the system was changed to support THOR or if I am setting myself up for trouble down the road?\\n\\n
GROUP Specifies grouping the childrecset records based on the join condition so all the related child records are passed as a dataset parameter to the transform. Valid for use only on hThor and Roxie.
\", \"post_time\": \"2011-11-28 18:58:18\" },\n\t{ \"post_id\": 719, \"topic_id\": 177, \"forum_id\": 10, \"post_subject\": \"Re: cannot call function in non-global context\", \"username\": \"ghalliday\", \"post_text\": \"It works because GLOBAL() forces the input expression to be evaluated in a global context. That is outside the NOTHOR, so it can contain any dataset operations.\\n\\nThe result from a global can be saved in one of two ways - to disk, or as a temporary workunit result. The default is to disk, but the ,FEW indicates that the result is small enough to save as a temporary result.\\n\\nThe inline dataset processing cannot read from a disk file (which is why the GLOBAL gave you the error), but it can read from a temporary result - which is why GLOBAL(,FEW) did work.\\n\\nI'm not sure if there is any documentation on this area. (In the past it has been a case of people asking me what to do to avoid the problem.)\", \"post_time\": \"2011-12-06 09:49:42\" },\n\t{ \"post_id\": 704, \"topic_id\": 177, \"forum_id\": 10, \"post_subject\": \"Re: cannot call function in non-global context\", \"username\": \"sameermsc\", \"post_text\": \"Thanks, it works\\nmy filter condition is simple (id = 1)\\n\\nis there any documentation i can refer to get more information on such kind of issues?\\n\\ncan you explain how it worked?\", \"post_time\": \"2011-12-05 14:09:48\" },\n\t{ \"post_id\": 703, \"topic_id\": 177, \"forum_id\": 10, \"post_subject\": \"Re: cannot call function in non-global context\", \"username\": \"ghalliday\", \"post_text\": \"A filter condition on its own should be supported - I wonder if it is something complicated inside the filter condition.\\n\\nI've just looked back at some examples. Try GLOBAL(ds_data, FEW).\", \"post_time\": \"2011-12-05 14:02:21\" },\n\t{ \"post_id\": 702, \"topic_id\": 177, \"forum_id\": 10, \"post_subject\": \"Re: cannot call function in non-global context\", \"username\": \"sameermsc\", \"post_text\": \"it gives the same warning and does not work even after using GLOBAL \", \"post_time\": \"2011-12-05 13:51:58\" },\n\t{ \"post_id\": 701, \"topic_id\": 177, \"forum_id\": 10, \"post_subject\": \"Re: cannot call function in non-global context\", \"username\": \"ghalliday\", \"post_text\": \"Yes - it is because not all dataset operations can be executed inline (which is what happens in the global context). Adding GLOBAL() around the parameter to the APPLY should ensure that gets evaluated separately - which will hopefully work as you expect. i.e.\\n\\nAPPLY(GLOBAL(ds_data), funct(superFile, subFile));\\n\\nNeedless to say this really should be done automatically...\", \"post_time\": \"2011-12-05 13:36:17\" },\n\t{ \"post_id\": 700, \"topic_id\": 177, \"forum_id\": 10, \"post_subject\": \"Re: cannot call function in non-global context\", \"username\": \"sameermsc\", \"post_text\": \"Instead of passing the entire dataset, if i filter few records based on a condition and pass the filtered dataset to APPLY function i get the below warning and the program terminates\\n\\n[color=#408040:2zl9a3e4]Warning: (0,0): error C4818: INTERNAL: Expected a parent/container context. Likely to be caused by executing something invalid inside a NOTHOR. (0, 0), 0, \\n\\nWhen i pass in the complete dataset without filtering etc, the code executes without errors\\nIs there anything to do with the filtering?\", \"post_time\": \"2011-12-05 13:17:38\" },\n\t{ \"post_id\": 682, \"topic_id\": 177, \"forum_id\": 10, \"post_subject\": \"Re: cannot call function in non-global context\", \"username\": \"rtaylor\", \"post_text\": \"
To work around this you need to use the (undocumented) NOTHOR directive to force it to be executed inline.
\\n\\nActually, NOTHOR is documented in the Language Reference, and based on this discussion I will be improving those docs. \", \"post_time\": \"2011-11-30 15:58:54\" },\n\t{ \"post_id\": 671, \"topic_id\": 177, \"forum_id\": 10, \"post_subject\": \"Re: cannot call function in non-global context\", \"username\": \"sameermsc\", \"post_text\": \"Thanks
\\n\\nit works with the suggested modifications\", \"post_time\": \"2011-11-30 13:30:43\" },\n\t{ \"post_id\": 667, \"topic_id\": 177, \"forum_id\": 10, \"post_subject\": \"Re: cannot call function in non-global context\", \"username\": \"ghalliday\", \"post_text\": \"The answers is in two parts...\\n\\nFirst of all I think you want to be using APPLY instead of PROJECT/OUTPUT:\\n\\nAPPLY(ds_data, funct(superFile, subFile));\\n\\nwhere the funct just contains the SEQUENTIAL statement, not the WHEN.\\n\\nUnfortunately that is still likely to generate the global error. The problem is that an APPLY action is executed in a graph, and the superfile actions aren't currently allowed inside a graph. To work around this you need to use the (undocumented) NOTHOR directive to force it to be executed inline. That leaves you with\\n\\n
\\nimport std;\\n\\nSuperFileInfo := RECORD\\n integer2 id;\\n string250 subfile;\\n string250 superfile;\\nEND;\\n\\nds_data := DATASET([{1, '~thor::in::sample1.list', '~thor::in::super1'}, {2, '~thor::in::sample2.list', '~thor::in::super1'}, {3, '~thor::in::sample3.list', '~thor::in::super2'}], SuperFileInfo);\\n\\nfunct(string superFile, string subFile) := FUNCTION\\n RETURN SEQUENTIAL(\\n IF(~Std.File.SuperFileExists(SuperFile),\\n Std.File.CreateSuperFile(SuperFile)),\\n Std.File.StartSuperFileTransaction(),\\n IF(Std.File.FindSuperFileSubName(SuperFile, SubFile) = 0, \\n Std.File.AddSuperFile(SuperFile,SubFile)),\\n Std.File.FinishSuperFileTransaction());\\nEND;\\n\\nNOTHOR(APPLY(ds_data, funct(SubFile, SuperFile)));\\n
\\n\\nNeedless to say you shouldn't need to do this. We should either support the super file actions inside a graph (should be possible in hthor/roxie, probably not in thor), or automatically evaluate the code in the correct place.\", \"post_time\": \"2011-11-30 09:33:22\" },\n\t{ \"post_id\": 666, \"topic_id\": 177, \"forum_id\": 10, \"post_subject\": \"cannot call function in non-global context\", \"username\": \"sameermsc\", \"post_text\": \"I have defined a record with three fields ie., id, superfile and subfile paths; created a dataset with few records.\\nNow i want to iterate through the records and add the subfiles into the respective superfile(here both super file and sub files physically exists)\\n\\nimport std;\\n\\nSuperFileInfo := RECORD\\n\\tinteger2 id;\\n\\tstring250 subfile;\\n\\tstring250 superfile;\\nEND;\\n\\nds_data := DATASET([{1, '~thor::in::sample1.list', '~thor::in::super1'},\\t\\t\\t{2, '~thor::in::sample2.list', '~thor::in::super1'},\\t\\t\\t\\t{3, '~thor::in::sample3.list', '~thor::in::super2'}], SuperFileInfo);\\n\\nfunct(string superFile, string subFile) := FUNCTION\\n\\tRETURN WHEN(0 , SEQUENTIAL(\\n\\t\\t\\t\\tIF(~Std.File.SuperFileExists(SuperFile),\\n\\t\\t\\t\\tStd.File.CreateSuperFile(SuperFile)),\\n\\t\\t\\t\\tStd.File.StartSuperFileTransaction(),\\n\\t\\t\\t\\tIF(Std.File.FindSuperFileSubName(SuperFile, SubFile) = 0, \\n\\t\\t\\t\\t\\t\\tStd.File.AddSuperFile(SuperFile,SubFile)),\\n\\t\\t\\t\\tStd.File.FinishSuperFileTransaction()));\\nEND;\\n\\nSuperFileInfo tdata(SuperFileInfo L) := Transform\\t\\t\\t\\t\\n\\tSELF.id := funct(L.SuperFile, L.SubFile);\\n\\tSELF := L;\\nEnd;\\n\\nds := PROJECT(ds_data, tdata(LEFT));\\noutput(ds);
\\n\\non Execution i get an error \\n\\nError: Cannot call function startsuperfiletransaction in a non-global context (26, 1), 4055, Temp\\\\TFR1EE8.tmp\\nWarning: Global side-effect SEQUENTIAL seems to be context dependent - it may not function as expected (12, 18), 4536, Temp\\\\TFR1EE8.tmp\\nWarning: Global side-effect SEQUENTIAL seems to be context dependent - it may not function as expected (12, 18), 4536, Temp\\\\TFR1EE8.tmp\\n\\nI have also tried using GLOBAL function in combination with APPLY and EVALUATE but no luck\\nAny soultion for this?\", \"post_time\": \"2011-11-30 08:53:01\" },\n\t{ \"post_id\": 699, \"topic_id\": 183, \"forum_id\": 10, \"post_subject\": \"Re: Can someone please explain this PARSE result.\", \"username\": \"Allan\", \"post_text\": \"Thanks for this ghalliday,\\n\\nI think I'll use REGEXFIND and REGEXREPLACE.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-12-05 12:57:11\" },\n\t{ \"post_id\": 698, \"topic_id\": 183, \"forum_id\": 10, \"post_subject\": \"Re: Can someone please explain this PARSE result.\", \"username\": \"ghalliday\", \"post_text\": \"The pattern a-z+ first of all matches 'hen', but that fails the validate\\nit backtracks, and matches 'he', which passes the validate.\\n(It also matches 'h', but that is shorter than 'he' so BEST selects 'he' as the match.)\\n\\nThe parse then tries to find another match at the first unmatched position - matching 'n'.\\n\\nWhat were you hoping for it to generate?\", \"post_time\": \"2011-12-05 10:32:01\" },\n\t{ \"post_id\": 696, \"topic_id\": 183, \"forum_id\": 10, \"post_subject\": \"Can someone please explain this PARSE result.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI've taken this example from the PARSE section in the ECL Reference and cut it down to its bare bones in an attempt to explain the results, which has eluded me.\\nThe code is:\\n\\nrs := RECORD\\n STRING line;\\nEND;\\nds := DATASET([{'hen'}], rs);\\n\\nPATTERN Alpha := PATTERN('[A-Za-z]');\\nPATTERN Word := Alpha+;\\nPATTERN notHen := VALIDATE(Word, MATCHTEXT != 'hen');\\n\\nRULE NounPhraseComponent2 := notHen;\\n\\nps2 := RECORD\\n out2 := MATCHTEXT(NounPhraseComponent2);\\nEND;\\np2 := PARSE(ds, line, NounPhraseComponent2, ps2, BEST, MANY, NOCASE);\\n\\nOUTPUT(p2);\\n
\\n\\nThis produced a record set with two records in it:\\n\\nhe\\nn\\n
\\n\\nCould someone please explain this result to me.\\n\\nMany thanks in advance.\\n\\nAllan\", \"post_time\": \"2011-12-02 21:51:36\" },\n\t{ \"post_id\": 734, \"topic_id\": 187, \"forum_id\": 10, \"post_subject\": \"Re: Underlying dataset record from index?\", \"username\": \"rtaylor\", \"post_text\": \"To answer your question regarding half-keyed JOINs, I cannot use them because the index record would become too large (there is simply too much data in the actual record that needs to be copied over). I tried that earlier.
\\n\\nI'm curious -- when you tried this previously, did you make all the fields in the file search keys, or did you include them as payload fields? \\n\\nA single index record must be defined as less than 32K and result in a less than 8K page after compression. Therefore, what is the SIZEOF the RECORD structure of the financials dataset? Are there any variable-length fields or nested child datasets?\", \"post_time\": \"2011-12-08 20:57:36\" },\n\t{ \"post_id\": 731, \"topic_id\": 187, \"forum_id\": 10, \"post_subject\": \"Re: Underlying dataset record from index?\", \"username\": \"DSC\", \"post_text\": \"You accurately predicted my code, Bob! Here is the snippetized version:\\n\\n\\nEXPORT CompanyFinancial := MODULE\\n\\n\\tETL(DATASET(RawLayout) ds) := FUNCTION\\n\\t\\t// Blah blah blah\\n\\t\\tRETURN DISTRIBUTE(fullRecords,HASH32(companyID));\\n\\tEND;\\n\\t\\n\\tRecords := OUTPUT(ETL(RawDS),,kDataFilePath,OVERWRITE,COMPRESSED);\\n\\t\\n\\tEXPORT DS := DATASET\\t(\\n\\t\\t\\t\\t\\t\\t\\t\\tkDataFilePath,\\n\\t\\t\\t\\t\\t\\t\\t\\tLayoutWithPosition,\\n\\t\\t\\t\\t\\t\\t\\t\\tTHOR\\n\\t\\t\\t\\t\\t\\t\\t);\\n\\t\\n\\tEXPORT\\tIDX_CompanyID := INDEX\\t\\t(\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tDS,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t{companyID,recordType,__pos},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t{id,reportDate},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t'~keys::CompanyFinancialByCompanyID'\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t);\\n\\tEXPORT\\tIDX_FinancialID := INDEX\\t(\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tDS,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t{id, __pos},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t'~keys::CompanyFinancialByFinancialID'\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t);\\n\\nEND; // CompanyFinancial Module\\n
\\n\\n\\nEXPORT CompanyInfo := MODULE\\n\\t\\n\\tETL() := FUNCTION\\n\\t\\t// Blah blah blah\\n\\t\\t\\n\\t\\t// Fill in the latest financial record\\n\\t\\tLayout CopyLatestFinancial(Layout l, CompanyFinancial.IDX_CompanyID i) := TRANSFORM,SKIP(l.latestFinancial.id != '' AND l.latestFinancial.reportDate > i.reportDate)\\n\\t\\t\\ttempRecords := FETCH(CompanyFinancial.DS,CompanyFinancial.IDX_FinancialID(id = i.id),RIGHT.__pos,TRANSFORM(LEFT),LOCAL);\\n\\t\\t\\tSELF.latestFinancial := tempRecords[1];\\n\\t\\t\\tSELF := l;\\n\\t\\tEND;\\n\\t\\tfinalList := DENORMALIZE(listSoFar,CompanyFinancial.IDX_CompanyID,LEFT.companyID = RIGHT.companyID AND RIGHT.recordType = 'A',CopyLatestFinancial(LEFT,RIGHT),LOCAL);\\n\\t\\t\\n\\t\\tRETURN DISTRIBUTE(finalList,HASH32(companyID));\\n\\tEND;\\n\\t\\nEND; // CompanyInfo Module\\n
\\n\\nThis is part of an attempt to populate a CompanyInfo RECORD with a bunch of disparate pieces of information. The specific goal of the portion of the CompanyInfo.ETL function is to attach the latest annual financial record from a recordset containing zero or more financial records of various types. This code compiles but the FETCH fails at runtime:\\n\\n\\nError: System error: 0: Graph[209], keyeddenormalize[229]: SLAVE 10.210.150.80:6600: Global child graph? : Global acts = Graph(231): [fetch(47)] (0, 0), 0, \\n
\\n\\nThis is merely my latest attempt. I have several more variations on this theme that compile but fail in nearly the same way.\\n\\nOur sample datasets are explicitly DISTRIBUTED based on a common field (companyID) in order to group the closely-related information together.\\n\\nTo answer your question regarding half-keyed JOINs, I cannot use them because the index record would become too large (there is simply too much data in the actual record that needs to be copied over). I tried that earlier.\\n\\nThanks!\", \"post_time\": \"2011-12-08 18:17:07\" },\n\t{ \"post_id\": 730, \"topic_id\": 187, \"forum_id\": 10, \"post_subject\": \"Re: Underlying dataset record from index?\", \"username\": \"bforeman\", \"post_text\": \"Without seeing any code, did you try to simply DISTRIBUTE the base dataset first and then try FETCH with the LOCAL option?\\n\\nAlso, could you get what you need by using a half-keyed JOINs perhaps?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2011-12-08 17:20:09\" },\n\t{ \"post_id\": 727, \"topic_id\": 187, \"forum_id\": 10, \"post_subject\": \"Underlying dataset record from index?\", \"username\": \"DSC\", \"post_text\": \"Is it possible to access the base dataset record from an index record?\\n\\nI have an index that contains only keys (because including the rest of the fields in the payload would make the index record too large). The index is used on the righthand side of a DENORMALIZE. I would like the associated TRANSFORM function to conditionally roll all of the data from the base dataset referenced by the index into SELF, but that means dereferencing the index record and I don't really see how to do that. I did try using a FETCH but ran into global versus local execution problems.\\n\\nAny hints would be appreciated!\", \"post_time\": \"2011-12-08 15:03:08\" },\n\t{ \"post_id\": 751, \"topic_id\": 192, \"forum_id\": 10, \"post_subject\": \"Re: IsNumeric function\", \"username\": \"Rob Pelley\", \"post_text\": \"Here's an alternative using a regular expression ...\\n\\n\\nisNumeric(STRING str) := REGEXFIND('^-?[0-9]+[.]?[0-9]*$',str);\\n
\\n\\nHere are my tests ...\\n\\n\\ntest(STRING str) := str + ' ' + IF(isNumeric(str),'TRUE','FALSE');\\n\\ntest('0'); // True\\ntest('123.456'); // True\\ntest('01234'); // True\\ntest('-12.34'); // True\\ntest(''); // False\\ntest('01A34'); // False\\ntest('-12.-34'); // False\\ntest('-12.3.4'); // False\\ntest('+123.4'); // False\\ntest('12+3.4'); // False\\ntest('-12+3.4'); // False\\n
\\n\\nHope this helps ...\\n\\nRob.\", \"post_time\": \"2011-12-15 02:36:58\" },\n\t{ \"post_id\": 750, \"topic_id\": 192, \"forum_id\": 10, \"post_subject\": \"Re: IsNumeric function\", \"username\": \"rtaylor\", \"post_text\": \"IMPORT STD.STR;\\nNum1 := '01234';\\nNum2 := '01A34';\\nNum3 := '-12.34';\\n\\nIsNumeric(string srcArg) := FUNCTION\\n filterArg := '0123456789.-';\\n return ( srcArg = str.Filter(srcArg, filterArg));\\nEND;\\n\\nIsNumeric(Num1); //true\\nIsNumeric(Num2); //false\\nIsNumeric(Num3); //true
\\n\\nYour code works for me. I made one small change and added - to your set of valid numerics.\", \"post_time\": \"2011-12-13 18:01:18\" },\n\t{ \"post_id\": 749, \"topic_id\": 192, \"forum_id\": 10, \"post_subject\": \"IsNumeric function\", \"username\": \"aintnomyth\", \"post_text\": \"Is there a built-in "IsNumeric" function?\\n\\nI built my own but then I realized it doesn't handle leading 0's so well \\n\\n
EXPORT IsNumeric(string srcArg) := FUNCTION\\n\\tfilterArg := '0123456789.';\\n\\treturn ( srcArg = str.Filter(srcArg, filterArg));\\nEND;
\\n\\n[Edit] after a second look it seems like it should handle leading zeros but it returns false for everything, I'm thinking it is related to the issue I'm having reading data in UTF8 format instead of ASCII.\", \"post_time\": \"2011-12-13 15:41:52\" },\n\t{ \"post_id\": 853, \"topic_id\": 194, \"forum_id\": 10, \"post_subject\": \"Re: Initialising multiple rows to constant values.\", \"username\": \"bforeman\", \"post_text\": \"Very nice Gavin, thanks!\", \"post_time\": \"2012-01-20 15:20:30\" },\n\t{ \"post_id\": 849, \"topic_id\": 194, \"forum_id\": 10, \"post_subject\": \"Re: Initialising multiple rows to constant values.\", \"username\": \"ghalliday\", \"post_text\": \"FYI We are in the process of adding a new dataset operator which would make this easier:\\n\\n DATASET(count, transform(COUNTER));\\n\\nSo your code would become\\n\\n\\nBlankDS := DATASET(10,TRANSFORM($.DeclareData.InData, SELF := []));\\n
\\n\\nIt should also support a ,DISTRIBUTED flag to enable the data to be generation to be split over all the nodes instead of just node1.\\n\\nOnce it is available I would expect your code to be automatically optimized to the new format.\", \"post_time\": \"2012-01-19 09:22:34\" },\n\t{ \"post_id\": 848, \"topic_id\": 194, \"forum_id\": 10, \"post_subject\": \"Re: Initialising multiple rows to constant values.\", \"username\": \"Allan\", \"post_text\": \"Hi dabayliss,\\n\\nDid not notice your reply.\\n\\nThanks for the information.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-01-18 20:13:23\" },\n\t{ \"post_id\": 759, \"topic_id\": 194, \"forum_id\": 10, \"post_subject\": \"Re: Initialising multiple rows to constant values.\", \"username\": \"dabayliss\", \"post_text\": \"If you only want a small number of records then normalize is the way to go; however it will generate all the data on a single node.\\nIf you want a LOT of data (millions or billions of rows) - then you can still use NORMALIZE but you have to be a bit cleverer.\\nOur upcoming ML library demonstrates how to generate billions of rows in parallel:\\nhttps://github.com/hpcc-systems/ecl-ml/ ... at/Vec.ecl\\nThe 'from' definition does it for Vectors. It is also tackling the slightly harder problem of leaving them sequentially numbered ...\\nBeing lazy; I would probably call the Vec routines and then PROJECT or TABLE to the format I wanted ...\", \"post_time\": \"2011-12-24 02:22:13\" },\n\t{ \"post_id\": 757, \"topic_id\": 194, \"forum_id\": 10, \"post_subject\": \"Re: Initialising multiple rows to constant values.\", \"username\": \"Allan\", \"post_text\": \"Actually this would do:\\n\\nBlank := DATASET([{'','','',0}],$.DeclareData.InData);\\n\\nBlankDS := NORMALIZE(Blank,10,TRANSFORM(LEFT)));\\n
\", \"post_time\": \"2011-12-23 19:05:46\" },\n\t{ \"post_id\": 756, \"topic_id\": 194, \"forum_id\": 10, \"post_subject\": \"Initialising multiple rows to constant values.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI would like to be able to initialise a DATASET to say 50 rows of constant values.\\n\\nThis is achievable, but in a convoluted way. Though a PROJECT passing a input dataset of the required number of rows, then in the transform doing something like:\\n\\nSELF.FirstName := '';\\nSELF.Age := 0;\\n
\\n\\nNote the input datset is not being used at all in the TRANSFORM.\\n\\nIs there an easier way to do this?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-12-23 11:38:00\" },\n\t{ \"post_id\": 2788, \"topic_id\": 195, \"forum_id\": 10, \"post_subject\": \"Re: Cross Record String Concatenation\", \"username\": \"rtaylor\", \"post_text\": \"Oleg,\\n\\nYour understanding is correct, as shown by this example:import STD;\\ns := ['a','b','c','d','e','f','g','h','i','j','k','l','m',\\n 'n','o','p','q','r','s','t','u','v','w','x','y','z'];\\nds := SORT(DISTRIBUTE(DATASET(s,{string infld})),infld);\\nROLLUP(ds,TRUE,TRANSFORM({string infld},\\n SELF.infld := LEFT.infld + RIGHT.infld + \\n (STD.system.Thorlib.Node()+1)));\\n //on my 3-node cluster this produces: \\n //ab1c1d1e1f1g1h1i2j2k2l2m2n2o2p2q3r3s3t3u3v3w3x3y3z3
David's comment that "rollup does not allow for a cross-node merge step" is referring to the differences between the way ROLLUP and AGGREGATE operate.\\n HTH,\\n\\nRichard\", \"post_time\": \"2012-11-15 18:26:19\" },\n\t{ \"post_id\": 2785, \"topic_id\": 195, \"forum_id\": 10, \"post_subject\": \"Re: Cross Record String Concatenation\", \"username\": \"oleg\", \"post_text\": \"[quote="dabayliss":3kotpei9]Guys,\\n\\nYou are right; but remember that rollup does not allow for a cross-node merge step. Put another way; the rollup requires all the data on one node ...\\n\\nDavid\\n\\nSorry, I hit this comment while actually looked for AGGREGATE problems, but it struck me right away - I was always sure that ROLLUP(someDS, TRUE, someXform), as well as DEDUP(someDS, TRUE) will generate a single record dataset unless explicitly specified as a LOCAL or GROUP.\\n\\nDid I misunderstood something??\", \"post_time\": \"2012-11-15 16:41:35\" },\n\t{ \"post_id\": 779, \"topic_id\": 195, \"forum_id\": 10, \"post_subject\": \"Re: Cross Record String Concatenation\", \"username\": \"dustinskaggs\", \"post_text\": \"Or you could keep the PROJECT and just use the simplified syntax:\\n\\n_ds1_ := PROJECT(_ds_, {STRING _fld_});
\", \"post_time\": \"2012-01-04 18:45:10\" },\n\t{ \"post_id\": 776, \"topic_id\": 195, \"forum_id\": 10, \"post_subject\": \"Re: Cross Record String Concatenation\", \"username\": \"Allan\", \"post_text\": \"The Code can be made simpler still!\\nInstead of the PROJECT you only need\\n\\n _ds1_ := TABLE(_ds_,{STRING _fld_:=_fld_});\\n
\\n\\nDon't think it will make it faster, not tried with large dateset.\\nYours\\n\\nAllan\", \"post_time\": \"2012-01-04 13:40:27\" },\n\t{ \"post_id\": 774, \"topic_id\": 195, \"forum_id\": 10, \"post_subject\": \"Re: Cross Record String Concatenation\", \"username\": \"Rob Pelley\", \"post_text\": \"David,\\n\\nThanks for your advice. We would like to use AGGREGATE but have had to resort to the ROLLUP approach because AGGREGATE is not available in our legacy environment. This should not be a problem because the datasets that we will be using to cross concatenate the strings will be very small.\\n\\nHere is my most concise version using ROLLUP (given that the FUNCTIONMACRO needs to be generic and flexible enough to handle any string in any dataset) ...\\n\\n\\nEXPORT CrossConcatenate(_ds_,_fld_,_sep_) := FUNCTIONMACRO\\n\\n _ds1_ := PROJECT(_ds_,TRANSFORM({STRING _fld_},SELF._fld_ := LEFT._fld_));\\n\\n RETURN ROLLUP(_ds1_,TRUE,TRANSFORM({STRING _fld_},SELF._fld_ := LEFT._fld_ + _sep_ + RIGHT._fld_));\\n\\nENDMACRO;\\n
\\n\\nIt's been a good exercise in trying various approaches to achieve the desired result ... many thanks to all who have provided input \\n\\nRob.\", \"post_time\": \"2012-01-03 21:09:14\" },\n\t{ \"post_id\": 773, \"topic_id\": 195, \"forum_id\": 10, \"post_subject\": \"Re: Cross Record String Concatenation\", \"username\": \"dabayliss\", \"post_text\": \"Guys,\\n\\nYou are right; but remember that rollup does not allow for a cross-node merge step. Put another way; the rollup requires all the data on one node ...\\n\\nDavid\", \"post_time\": \"2012-01-03 19:56:25\" },\n\t{ \"post_id\": 772, \"topic_id\": 195, \"forum_id\": 10, \"post_subject\": \"Re: Cross Record String Concatenation\", \"username\": \"rtaylor\", \"post_text\": \"Rob,\\n\\nIt can get even simpler than that:\\n\\n
s := ['a','b','c','d','e'];\\nds := DATASET(s,{string infld});\\nROLLUP(ds,TRUE,TRANSFORM({string infld},SELF.infld := LEFT.infld + RIGHT.infld));\\n //produces 'abcde'
\", \"post_time\": \"2012-01-03 19:18:32\" },\n\t{ \"post_id\": 771, \"topic_id\": 195, \"forum_id\": 10, \"post_subject\": \"Re: Cross Record String Concatenation\", \"username\": \"Rob Pelley\", \"post_text\": \"An alternative to the DENORMALIZE and AGGREGATE approaches to solving this problem is to use ROLLUP as follows ...\\n\\n\\nEXPORT CrossConcatenate(_ds_,_rec_,_fld_,_sep_) := FUNCTIONMACRO\\n\\n _R1_ := RECORD\\n UNSIGNED1 _id_;\\n STRING _fld_;\\n END;\\n\\n _R1_ _XF1_(_ds_ _L_) := TRANSFORM\\n SELF._id_ := 1;\\n SELF._fld_ := _L_._fld_;\\n END;\\n\\n _ds1_ := PROJECT(_ds_,_XF1_(LEFT));\\n\\n _R1_ _XF2_(_R1_ _L_,_R1_ _R_) := TRANSFORM\\n SELF._id_ := _L_._id_;\\n SELF._fld_ := _L_._fld_ + _sep_ + _R_._fld_;\\n END;\\n\\n RETURN ROLLUP(_ds1_,LEFT._id_=RIGHT._id_,_XF2_(LEFT,RIGHT));\\n\\nENDMACRO;\\n
\", \"post_time\": \"2012-01-03 16:38:41\" },\n\t{ \"post_id\": 765, \"topic_id\": 195, \"forum_id\": 10, \"post_subject\": \"Re: Cross Record String Concatenation\", \"username\": \"Rob Pelley\", \"post_text\": \"Thanks David,\\n\\nHere is a revised version of the FUNCTIONMACRO using AGGREGATE ...\\n\\n\\nEXPORT CrossConcatenate(inDS,inRec,inFld,sep) := FUNCTIONMACRO\\n\\n outRec := RECORD\\n STRING result;\\n END;\\n\\n outRec T1(inRec L, outRec R) := TRANSFORM\\n SELF.result := R.result + IF(R.result <> '',sep,'') + L.inFld;\\n END;\\n\\n outRec T2(outRec R1, outRec R2) := TRANSFORM\\n SELF.result := R1.result + R2.result;\\n END;\\n\\n RETURN AGGREGATE(inDS,outRec,T1(LEFT,RIGHT),T2(RIGHT1,RIGHT2));\\n\\nENDMACRO;\\n
\", \"post_time\": \"2011-12-27 17:18:45\" },\n\t{ \"post_id\": 760, \"topic_id\": 195, \"forum_id\": 10, \"post_subject\": \"Re: Cross Record String Concatenation\", \"username\": \"dabayliss\", \"post_text\": \"AGGREGATE was purpose built for just such a task ...\", \"post_time\": \"2011-12-24 02:23:50\" },\n\t{ \"post_id\": 758, \"topic_id\": 195, \"forum_id\": 10, \"post_subject\": \"Cross Record String Concatenation\", \"username\": \"Rob Pelley\", \"post_text\": \"I need to be able to concatenate the strings for a given field in a given dataset, across all of the records in the dataset.\\n\\nFor example :\\n\\nInput\\n\\nA\\nB\\nC\\nD\\n\\nOutput\\n\\nABCD\\n\\nI have written a FUNCTIONMACRO which accepts the following parameters ...\\n\\nds : the dataset\\nrec : the dataset record structure\\nfld : the string field that needs to be cross concatenated\\nsep : the separator\\n\\nHere is the code ...\\n\\n\\nEXPORT CrossConcatenate(ds,rec,fld,sep) := FUNCTIONMACRO\\n\\n STRING concat(STRING str1,STRING str2,STRING sep) := str1 + sep + str2; \\n\\n R1 := RECORD\\n UNSIGNED1 _id_;\\n rec;\\n END;\\n\\n R1 XF(ds L) := TRANSFORM\\n SELF._id_ := 1;\\n SELF := L;\\n END;\\n\\n ds1 := PROJECT(ds,XF(left));\\n\\t\\n R2 := RECORD\\n UNSIGNED1 _id_;\\n INTEGER strcnt;\\n STRING fld;\\n END;\\n\\n R2 Prepare(R1 L) := TRANSFORM\\n SELF._id_ := 1;\\n SELF.strcnt := 0;\\n SELF.fld := '';\\n END;\\n\\n ds2 := PROJECT(ds1,Prepare(LEFT));\\n\\n R2 Move(R2 L,R1 R,INTEGER C) := TRANSFORM\\n SELF.strcnt := C;\\n SELF.fld := IF(c=1,R.fld,concat(L.fld,R.fld,sep));\\n SELF := L;\\n END;\\n\\n ds3 := DEDUP(DENORMALIZE(ds2,ds1,LEFT._id_=RIGHT._id_,Move(LEFT,RIGHT,COUNTER)),_id_);\\n\\n RETURN ds3;\\n\\nENDMACRO;\\n
\\n\\nTo test the FUNCTIONMACRO I used the following ...\\n\\n\\nimport $;\\n\\n// Test the Macro\\n\\nr := record\\n unsigned1 v1;\\n unsigned1 v2;\\n string str;\\n unsigned1 v3;\\nend;\\n\\nds := dataset([{1,2,'abc',1},{3,4,'defg',2},{5,6,'hijkl',3},{7,8,'mnopqr',4},{9,10,'stuvwxyz',5}],r);\\n\\nstring sep := '-';\\n\\noutput($.CrossConcatenate(ds,r,str,sep));\\n
\\n\\nThis works but is there a better way to achieve the desired result?\\n\\nAny feedback would be appreciated.\\n\\nRob.\", \"post_time\": \"2011-12-23 21:18:24\" },\n\t{ \"post_id\": 770, \"topic_id\": 196, \"forum_id\": 10, \"post_subject\": \"Re: Regarding Local Path Description for OUTPUT\", \"username\": \"bforeman\", \"post_text\": \"Hi GK,\\n\\nWhen you say you could not make an example, what errors (if any) do you see with a syntax check? The docs for using TABLE are fairly straightforward, the important rule to remember is that the RECORD structure used by the TABLE statement needs to have default values assigned to each field in the structure. \\n\\nHere is a pretty cool example of TABLE that we use in our training classes:\\n\\nIMPORT STD;\\nds := DATASET([{'A','B','C'},{'D','E','F'},{'G','H','I'}],\\n {STRING1 Ltr1, STRING1 Ltr2, STRING1 Ltr3});\\n\\nTrec := RECORD\\n STRING1 Ltr1 := ds.Ltr1; //explicitly specifies type, name, and source\\n ds.Ltr2; //implicitly specifies type, name, and source\\n STRING1 Ltr3 := STD.Str.ToLowerCase(ds.Ltr3); //modify the data\\n STRING3 F1 := ds.Ltr1 + ds.Ltr2 + ds.Ltr3; //build totally new fields\\n STRING20 F4 := 'I am a work field'; //create a work field\\nEND; \\nt := TABLE(ds,Trec);\\nt;
\\n\\nHope this helps!\\n\\nBob\", \"post_time\": \"2012-01-02 13:20:57\" },\n\t{ \"post_id\": 764, \"topic_id\": 196, \"forum_id\": 10, \"post_subject\": \"Required Example on TABLE\", \"username\": \"intas_mr\", \"post_text\": \"Hi,\\n\\nI required an sample example that should provide OUTPUT on DATASET(RecordSet) and New Record written in ECLIDE. and I should access through table. Even i have verified the topic TABLE . But i was not able to make an example. Can you please provide an sample example Where i can combined my DATASET and new record in the ECLIDE from to display the information using 'TABLE' .\\n\\nThanks,\\nGK.\", \"post_time\": \"2011-12-27 06:39:48\" },\n\t{ \"post_id\": 763, \"topic_id\": 196, \"forum_id\": 10, \"post_subject\": \"Re: Regarding Local Path Description for OUTPUT\", \"username\": \"intas_mr\", \"post_text\": \"Thank you dabayliss for your reply.\", \"post_time\": \"2011-12-27 06:34:22\" },\n\t{ \"post_id\": 762, \"topic_id\": 196, \"forum_id\": 10, \"post_subject\": \"Re: Regarding Local Path Description for OUTPUT\", \"username\": \"dabayliss\", \"post_text\": \"An output to disk stores the data locally to the thor (or hthor if you are using that). If the thor is 1x (or if you are using hthor) - then that data is stored as a 'normal' file - you can read it as you would any other linux file.\\nIf the thor is multi-way; then the data is still stored as 'normal' data on the disk - except it will be in multiple parts. The despray collects those multiple parts and stores them into a single concatenated file. Note that spray and despray does NOT require ECLWATCH; it can be done from within ECL too ...\\n\\nThe place that each thor node (or hthor) stores the data is configurable using the configuration system.\\n\\nThe above is a long way of saying: yes - you can control where the data goes - BUT if you have a multi-node thor you will probably want to despray (by ECLWATCH or from ECL code)\", \"post_time\": \"2011-12-25 22:27:12\" },\n\t{ \"post_id\": 761, \"topic_id\": 196, \"forum_id\": 10, \"post_subject\": \"Regarding Local Path Description for OUTPUT\", \"username\": \"intas_mr\", \"post_text\": \"Hi ,\\n\\nI have one query regarding the OUTPUT i.e., when i created .csv file and done spraying on ECL WATCH providing a path i.e., \\n \\n Syntax1:- attr1:=DATASET('~c::Foldername::Folder.csv'). \\n\\nWhen we write code in ECLIDE for and output(attr1);.The OUTPUT Action will provide result in ECLIDE. \\n\\nLets here the question :\\n\\n When i create the DATASET on ECLIDE with record definition and i want to Create Folder Path for that DATASET IN OUTPUT Action without giving scope Operator i.e.\\n\\n Syntax2:- attr1: OUTPUT('c::FolderName::Folder); Action with out providing the any scope '~'.\\n\\n So please can any one describe whether i can create file on particular folder using above syntax2: i mean i want to create file on those that with some name in my local drive. Is it possible without spraying on to ECL Watch. Because in ECL Watch we will spray and after executing the code results in thor to same data that is publish on Roxie as their will be publish the link in ECLIDE .So my question is without using my scope('~')operator can we can do action of OUTPUT(); \\n\\nThanks,\\nGK\", \"post_time\": \"2011-12-25 06:11:38\" },\n\t{ \"post_id\": 767, \"topic_id\": 197, \"forum_id\": 10, \"post_subject\": \"Re: Regarding INDEX AND BUILDINDEX\", \"username\": \"rtaylor\", \"post_text\": \"Your problem appears to be that you're trying to define two INDEXes in a single definition, and that is not possible -- a single definition can only have a single expression.\\n\\nYou need to make the two INDEXes separate definitions (lines of code). The typical way of doing this would be to use the MODULE structure, like this:\\n\\n//in the Employee.ecl file:\\nEXPORT Employee:= MODULE\\n EXPORT Layout := RECORD\\n --\\n --\\n END;\\n EXPORT File := DATASET('filename',Layout,THOR);\\n EXPORT ZipIDX := INDEX(File,{zip,fpos},'~c::name1::ZipIndex');\\n EXPORT ZIPANDCOUNTRY := INDEX(File,{zip,country,fpos},'~c::name1::ZipandCountryINDEX');\\nEND;
\\n\\nThen to build the indexes, execute BUILDINDEX like this:\\n\\n\\nIMPORT foldername;\\n\\nBUILDINDEX(foldername.Employee.ZipIDX,'~c::name1::ZipIndex');\\nBUILDINDEX(foldername.Employee.ZIPANDCOUNTRY ,'~c::name1::ZipandCountryINDEX');\\n
\", \"post_time\": \"2011-12-28 21:20:19\" },\n\t{ \"post_id\": 766, \"topic_id\": 197, \"forum_id\": 10, \"post_subject\": \"Regarding INDEX AND BUILDINDEX\", \"username\": \"intas_mr\", \"post_text\": \"Hi,\\n\\nI want to Create multiple INDEX and BUILDINDEX FOR MY EXPORT Definition in one file how it can possible for example\\nfile one--\\nEXPORT Employee:=REcord\\n--\\n--\\nEnd;\\nfile 2----\\nIMPORT foldername;\\nExport Name_File:=\\nINDEX(-------------{zip,fpos});-- WORKING FINE THIS ANDGETTING OUTPUT \\nINDEX(--------------{ZIPANDCOUNTRY,fpos});-- GETTING ERROR\\n\\nfile3------------\\nBUILDINDEX(foldername.Name_File,'~c::name1::ZipIndex);-- WORKING FINE THIS\\nBUILDINDEX(foldername.Name_File,'~c::name1::ZipandCountryINDEX);----GETTING ERROR\\n\\n\\nWhen i create two or more indexes and buildindex in file2 and file3 i was not able to build the data. Can i know what is the procedure to Create new files in particular location . As i have sprayed using 'CSV'. I required to create two or more index and buildindex with different names having file position to them. How it can possible. Can you please any explain me from above example\", \"post_time\": \"2011-12-28 18:26:04\" },\n\t{ \"post_id\": 800, \"topic_id\": 203, \"forum_id\": 10, \"post_subject\": \"Re: Generating a SEQUENTIAL list of Actions\", \"username\": \"Rob Pelley\", \"post_text\": \"David,\\n\\nI understand that from a Big Data perspective performance considerations must take precedence and that ECL has been designed as a compiled language for this reason. \\n\\nHaving worked with interpreted languages for many years, I was used to the flexibilty that these had to offer, albeit at the cost of performance. This poor performance however, was the main reason why I came to HPCC because it has been highly optimised for massive data processing.\\n\\nIn short, I'm not surprised to find that ECL does not allow me to do what I had wanted, I think I was just 'testing the water' ...\\n\\nThanks for your help.\\n\\nRob.\", \"post_time\": \"2012-01-07 11:30:25\" },\n\t{ \"post_id\": 799, \"topic_id\": 203, \"forum_id\": 10, \"post_subject\": \"Re: Generating a SEQUENTIAL list of Actions\", \"username\": \"dabayliss\", \"post_text\": \"The simplest, most direct and most general answer to your question is: no. ECL is a compiled language - there is no run-time interpreter - so you cannot in general do what you ask.\\n\\nThat said; with a certain animal cunning you make make it LOOK like that is what you are doing well enough that no-one will notice.\\n\\nIn Roxie it is particularly easy; GRAPH is designed to do just this - you can quite easily create your own languages and then 'interpret' them at run time.\\n\\nThor is a little harder; you essentially need to create a parameterised function that describes each 'option' of the things you might want to do. You then use LOOP with a MAP statement inside it to do the things one after another.\\n\\nHTH\\n\\nDavid\\n\\nIn case you are wondering: "but why ..." the answer is simple - performance.\", \"post_time\": \"2012-01-07 02:16:02\" },\n\t{ \"post_id\": 796, \"topic_id\": 203, \"forum_id\": 10, \"post_subject\": \"Re: Generating a SEQUENTIAL list of Actions\", \"username\": \"bforeman\", \"post_text\": \"If the DATASET is created at runtime, couldn't you just throw what you need into a memory TABLE prior to calling the MACRO? \\n\\nOr maybe just setting up a default value that will satisfy the compiler, and then update it after the DATASET is generated.\\n\\nLet me think a little more \", \"post_time\": \"2012-01-06 20:57:21\" },\n\t{ \"post_id\": 795, \"topic_id\": 203, \"forum_id\": 10, \"post_subject\": \"Re: Generating a SEQUENTIAL list of Actions\", \"username\": \"Rob Pelley\", \"post_text\": \"Thanks Bob
\\n\\nAgain, your MACRO works fine but the example you gave still has a list of actions that is known at compile time.\\n\\nHere is a (simplified) version of what I'm trying to do ...\\n\\n
\\nMAC_GenFile(C) := FUNCTIONMACRO\\n RETURN OUTPUT(#TEXT(C));\\nENDMACRO;\\n\\nMAC_Outputs(ROutput,ActionString) := MACRO\\n ROutput := SEQUENTIAL(#EXPAND(ActionString));\\nENDMACRO;\\n\\nMyActions := 'MAC_GenFile(1),MAC_GenFile(2),MAC_GenFile(3)';\\nMac_Outputs(RO,MyActions);\\nRO;\\n
\\n\\nThis works fine and produces three result sets (1,2 and 3) as expected.\\n\\nHowever, what I want to be able to do is to generate the action list at runtime, but when I try to do this the MACRO substitution does not work because the list is not known at compile time \\n\\nIf I were to substitute the following line ...\\n\\n
\\nMyActions := 'MAC_GenFile(1),MAC_GenFile(2),MAC_GenFile(3)';\\n
\\n\\nWith ...\\n\\n\\nMyActions := DS[1].ActionList;\\n
\\n\\nHere, the dataset DS is not produced until runtime so the compiler (quite rightly) complains.\\n\\nThanks for your help so far ... any other suggestions? \\n\\nRob.\", \"post_time\": \"2012-01-06 20:45:45\" },\n\t{ \"post_id\": 794, \"topic_id\": 203, \"forum_id\": 10, \"post_subject\": \"Re: Generating a SEQUENTIAL list of Actions\", \"username\": \"bforeman\", \"post_text\": \"Hi Rob, \\n\\nOK, how about this?
\\n\\n
EXPORT MAC_Outputs(ROutput,ActionString) := MACRO\\n\\tROutput := SEQUENTIAL(#EXPAND(ActionString));\\nENDMACRO;
\\n\\nIn a Builder window:\\n\\n/* IMPORT YourModule;\\n MyActions := 'OUTPUT('Hello'),OUTPUT('Hello Again')'; //As many actions as you need.\\n YourModule.Mac_Outputs(RO,MyActions);\\n RO;\\n*/
\", \"post_time\": \"2012-01-06 18:20:11\" },\n\t{ \"post_id\": 793, \"topic_id\": 203, \"forum_id\": 10, \"post_subject\": \"Re: Generating a SEQUENTIAL list of Actions\", \"username\": \"Rob Pelley\", \"post_text\": \"What I meant was, is it possible to generate code using dynamic inputs (where you do not know the inputs until runtime)?\\n\\nIn Bob's example the MACRO substitution is for static inputs whereby the actions A and B are predefined ... what if there were an indeterminate number of actions to execute sequentially?\\n\\nRob.\", \"post_time\": \"2012-01-06 18:06:12\" },\n\t{ \"post_id\": 792, \"topic_id\": 203, \"forum_id\": 10, \"post_subject\": \"Re: Generating a SEQUENTIAL list of Actions\", \"username\": \"ghalliday\", \"post_text\": \"What do you mean by\\n....I run into the problem that the values to #EXPAND are not known until runtime.\\n
\\nOtherwise #EXPAND(RO) should work.\", \"post_time\": \"2012-01-06 16:23:29\" },\n\t{ \"post_id\": 791, \"topic_id\": 203, \"forum_id\": 10, \"post_subject\": \"Re: Generating a SEQUENTIAL list of Actions\", \"username\": \"Rob Pelley\", \"post_text\": \"Hi Bob,\\n\\nThanks for the quick response \\n\\nYes, your MACRO will generate the text 'Test := SEQUENTIAL(A,B);' and deliver it as ECL code but my problem is how to get the compiler to execute the code?\\n\\nHave you tested that your example will run to produce the desired output?\\n\\nWhen I try to run my tests, all I can get is the text output 'Test := SEQUENTIAL(A,B);' ...\\n\\n
\\nMAC_Outputs(ROutput) := MACRO\\n A := OUTPUT('Hello');\\n B := OUTPUT('Hello Again');\\n ROutput := 'Test := SEQUENTIAL(' + #TEXT(A)+',' + #TEXT(B) + ');';\\nENDMACRO;\\n\\nMac_Outputs(RO);\\n\\nRO;\\n
\\n\\nProduces ...\\n\\n\\nTest := SEQUENTIAL(A,B);\\n
\\n\\nIf I then try to use the attribute 'Test' I get an error because it is unknown ...\\n\\nRegards,\\n\\nRob.\", \"post_time\": \"2012-01-06 15:57:04\" },\n\t{ \"post_id\": 790, \"topic_id\": 203, \"forum_id\": 10, \"post_subject\": \"Re: Generating a SEQUENTIAL list of Actions\", \"username\": \"bforeman\", \"post_text\": \"Hi Rob,\\n\\nTry this:\\n\\nEXPORT MAC_Outputs(ROutput) := MACRO\\n\\tA := OUTPUT('Hello');\\n\\tB := OUTPUT('Hello Again');\\n\\tROutput := 'Test := SEQUENTIAL(' + #TEXT(A)+',' + #TEXT(B) + ');';\\nENDMACRO;
\\n\\nand then:\\n\\nIMPORT YourModule;\\nYourModele.Mac_Outputs(RO);\\nRO;
\\nproduces:\\n\\nTest := SEQUENTIAL(A,B);
\", \"post_time\": \"2012-01-06 14:27:12\" },\n\t{ \"post_id\": 786, \"topic_id\": 203, \"forum_id\": 10, \"post_subject\": \"Generating a SEQUENTIAL list of Actions\", \"username\": \"Rob Pelley\", \"post_text\": \"I would like to know if it is possible in ECL to generate a SEQUENTIAL list of actions. \\n\\nFor example, if I want the following ECL statement ...\\n\\nSEQUENTIAL(A,B,C,D);\\n\\nwhere A,B,C,D are valid ECL actions, can I somehow generate the SEQUENTIAL list of actions? \\n\\nI can easily generate a STRING of the required actions 'SEQUENTIAL(A,B,C,D)' but is it possible to translate this string into a valid ECL statement?\\n \\nI've been experimenting with MACRO and FUNCTIONMACRO and the use of #EXPAND but I run into the problem that the values to #EXPAND are not known until runtime.\\n\\nRob.\", \"post_time\": \"2012-01-06 11:47:54\" },\n\t{ \"post_id\": 843, \"topic_id\": 209, \"forum_id\": 10, \"post_subject\": \"Re: Encrypting text not just entire datasets.\", \"username\": \"Allan\", \"post_text\": \"Ok,\\n\\nI've written my own (Acknowledging Rob Pelley's input of FUNCTIOMACRO RecreateString.)\\n\\nIt is perhaps a bit specific to my needs but I attach anyway as an example.\\n\\nYours\\n\\nAllan\\n\\nActually don't seem to be able to attach files with extension 'ecl' ????\\nSo here it is inline:\\n\\nEXPORT Encryption(unsigned4 seed,STRING1 FieldSeperator = ',') := MODULE\\n/*\\n\\t\\tIt’s a textual encryptor only encrypting the printable ASCII characters. (32 – 127 inclusive). Characters 0 – 31 inclusive are discarded. \\n\\t\\t(Need some of those to do the work below)\\n\\t\\tThe encrypted text will not contain a double quote, single quote, reverse quote or space character (so anyone inadvertently trimming the string \\n\\t\\twont destroy the code). \\n\\t\\tIt will also never encode to a field delineator, which can be specified (defaulted to comma).\\n\\t\\tAll this to remove problems in transferring encrypted text.\\n\\t\\tThe code itself is a pseudo one time pad, ‘pseudo’ because the list of random numbers is finite. There are way to elaborate the coding \\n\\t\\tbut by far the easiest way to increase security is to increase the number of random numbers.\\n\\n\\t\\tThe 'seed' and 'FieldSeperator' are parameters as the MODULE level to the encrypt and decrypt sing from the\\n same hymn sheet.\\n\\t\\tIt won’t work for EBCDIC or UNICODE.\\n\\n\\t\\tExample of Use:\\n\\n\\t\\te := $.Encryption(111117); \\n Encrip := e.EncryptIt('It don\\\\'t mean a thing, if it ain\\\\'t got that swing.');\\n OUTPUT(Encrip,NAMED('Encrypt'));\\n OUTPUT(e.DecryptIt(Encrip),NAMED('Decrypt'));\\n*/\\n SHARED SET OF UNSIGNED1 mask :=[0,1,2,3,4,126 /* Put thousands of random numbers into this ranged 0 to 126 inclusive. */];\\n\\n\\t\\tSHARED unsigned off := seed % COUNT(mask);\\n\\t\\tSHARED INTEGER1 bias := 32;\\t\\t\\t// Strip out all control characters. 0-1f inclusive\\n\\t\\tSHARED CommaIndicator := TRANSFER(x'1f',STRING1);\\n\\t\\tSHARED SpaceIndicator := TRANSFER(x'1e',STRING1);\\n\\t\\tSHARED DQuoteIndicator := TRANSFER(x'1d',STRING1);\\n\\t\\tSHARED QuoteIndicator := TRANSFER(x'1c',STRING1);\\n\\t\\tSHARED RQuoteIndicator := TRANSFER(x'1b',STRING1);\\n\\n\\t\\tSHARED StringAsDS := RECORD\\n\\t\\t\\t STRING1 chr;\\n\\t\\tEND;\\n\\t\\t\\t\\n\\t\\tSHARED STRING1 Convert(STRING1 txt,INTEGER C,INTEGER1 whichway) := FUNCTION\\n\\t\\t\\tINTEGER i := ((off+C) % COUNT(mask))+1;\\n\\t\\t\\tINTEGER amo := TRANSFER(txt,INTEGER1)-bias;\\t\\t\\t\\t// Perform calculation on char range 0 to ...\\n\\t\\t\\tUNSIGNED1 enc := (amo+(mask[i]*whichway)) & 127;\\n\\t\\t\\tRETURN TRANSFER(enc+bias,STRING1);\\t\\t\\t // Get characters back into printable range.\\n\\t\\tEND;\\n\\n\\t\\tSHARED RecreateString(_ds_,_fld_) := FUNCTIONMACRO\\n\\n\\t\\t\\t_ds1_ := PROJECT(_ds_,{STRING _fld_});\\n\\n\\t\\t\\tRETURN ROLLUP(_ds1_,TRUE,TRANSFORM({STRING _fld_},SELF._fld_ := LEFT._fld_ + RIGHT._fld_))[1]._fld_;\\n\\n\\t\\tENDMACRO;\\n\\t\\t\\n\\t EXPORT STRING EncryptIt(STRING plainText) := FUNCTION\\n\\n\\t\\t\\tStringAsDS ConvertToDataSet(STRING txt) := FUNCTION\\n\\n\\t\\t\\t\\tStringAsDS createDS(INTEGER C) := TRANSFORM\\n\\t\\t\\t\\t\\t// Unconditionally strip out any control characters. (Will need space for encoded comma x1f DQuote etc)\\n\\t\\t\\t\\t\\tSELF.chr := IF(txt[C] < ' ',SKIP,txt[C]);\\n\\t\\t\\t\\tEND;\\n\\t\\t\\t\\t\\n\\t\\t\\t\\tRETURN NORMALIZE(DATASET([' '],StringAsDS),LENGTH(txt),createDS(COUNTER));\\n\\t\\t\\tEND;\\n\\n\\t StringAsDS applymask(StringAsDS L,INTEGER C) := TRANSFORM\\n\\n\\t\\t\\t STRING1 chr := Convert(L.chr,C,1);\\n\\t\\t\\t\\tSELF.chr := MAP(chr = FieldSeperator => CommaIndicator,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t chr = ' ' => SpaceIndicator, \\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t chr = '"' => DQuoteIndicator,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tchr = '\\\\'' => QuoteIndicator,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tchr = '`' => RQuoteIndicator, \\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tchr);\\n\\t\\t\\tEND;\\n\\t\\t\\t\\n\\t\\t\\ta := PROJECT(ConvertToDataSet(plainText),applymask(LEFT,COUNTER));\\n\\t\\t\\tRETURN RecreateString(a,chr);\\n\\t\\tEND;\\n\\n\\t\\tEXPORT STRING DecryptIt(STRING coded) := FUNCTION\\n\\t\\t\\n\\t\\t\\tStringAsDS ConvertToDataSet(STRING txt) := FUNCTION\\n\\n\\t\\t\\t\\tStringAsDS createDS(INTEGER C) := TRANSFORM\\n\\t\\t\\t\\t\\tSELF.chr := txt[C];\\n\\t\\t\\t\\tEND;\\n\\t\\t\\t\\t\\n\\t\\t\\t\\tRETURN NORMALIZE(DATASET([' '],StringAsDS),LENGTH(txt),createDS(COUNTER));\\n\\t\\t\\tEND;\\n\\n\\t\\t\\tStringAsDS applymask(StringAsDS L,INTEGER C) := TRANSFORM\\n\\n\\t\\t\\t\\tSELF.chr := MAP(L.chr = CommaIndicator => Convert(FieldSeperator,C,-1),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t L.chr = SpaceIndicator => Convert(' ',C,-1),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t L.chr = DQuoteIndicator => Convert('"',C,-1),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tL.chr = QuoteIndicator => Convert('\\\\'',C,-1),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tL.chr = RQuoteIndicator => Convert('`',C,-1),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tConvert(L.chr,C,-1));\\n\\t\\t\\tEND;\\n\\t\\t\\t\\n\\t\\t\\ta := PROJECT(ConvertToDataSet(coded),applymask(LEFT,COUNTER));\\n\\t\\t\\tRETURN RecreateString(a,chr);\\n\\t\\tEND;\\n\\t\\t\\nEND;\\n
\", \"post_time\": \"2012-01-17 21:21:31\" },\n\t{ \"post_id\": 828, \"topic_id\": 209, \"forum_id\": 10, \"post_subject\": \"Re: Encrypting text not just entire datasets.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nI have data where only particular fields are private the rest must be plain text. I would have thought this would be quite a common requirement.
\\n\\nFrom whom should they be private? If it is from the end-user, then you simply do not return those private fields to the user unless the user has rights to see them.\\n\\nHowever, if it is from individual ECL developers, some who have rights and some who do not, then the problem becomes a question of "how granular does the security need to be?" Adding encryption/viewing rights support on an individual field level would drastically increase the complexity of managing the LDAP security. Our current security is at the file level -- an individual will only see/work with a file if they have the proper rights in LDAP. Therefore, Bob's suggestion of putting those fields into a separate DATASET (encrypted or not) is the proper solution, along with establishing LDAP permissions on that file as to who can see/work with that dataset.\\n\\nOf course, nothing is preventing you from writing your own individual field encryption/decryption functions, too. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-01-13 19:20:05\" },\n\t{ \"post_id\": 825, \"topic_id\": 209, \"forum_id\": 10, \"post_subject\": \"Re: Encrypting text not just entire datasets.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nYes thought of that but - ugh
\\n\\nI have data where only particular fields are private the rest must be plain text. I would have thought this would be quite a common requirement. \\nOne option would be finer granularity on the 'output', indicating which fields special actions must be take on (perhaps not just encryption, but compression, goodness knows what.)\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-01-13 18:59:29\" },\n\t{ \"post_id\": 823, \"topic_id\": 209, \"forum_id\": 10, \"post_subject\": \"Re: Encrypting text not just entire datasets.\", \"username\": \"bforeman\", \"post_text\": \"AFAIK there is not a function that individually encrypts a single field in a dataset. Perhaps one alternative would be to export those fields you wish to encrypt to a new DATASET and encrypt the entire record and then JOIN them back when needed.\", \"post_time\": \"2012-01-13 15:06:42\" },\n\t{ \"post_id\": 822, \"topic_id\": 209, \"forum_id\": 10, \"post_subject\": \"Encrypting text not just entire datasets.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI would like to be able to encrypt individual fields in a record set. \\nI See there is Encrypt for DATASETS but it would be nice to have a function that takes a plain text string (with seed) and returns a textual encrypted text (plus an inverse function provided). An elaboration would be an option to generate binary encryption or encrypted text.\\n\\nI can find nothing in the libraries supplied by LN, and don't want to re-invent the wheel.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-01-13 14:02:13\" },\n\t{ \"post_id\": 4317, \"topic_id\": 212, \"forum_id\": 10, \"post_subject\": \"Re: Question on str.SubstituteIncluded\", \"username\": \"Allan\", \"post_text\": \"A useful bit of ECL, that does not do exactly whats mentioned above, but can put things like escape character before a set of characters in one operation is:\\n
\\nEscape(STRING s) := REGEXREPLACE('("|,)',s,'\\\\\\\\\\\\\\\\$0');\\n
\", \"post_time\": \"2013-07-12 09:23:38\" },\n\t{ \"post_id\": 847, \"topic_id\": 212, \"forum_id\": 10, \"post_subject\": \"Re: Question on str.SubstituteIncluded\", \"username\": \"ghalliday\", \"post_text\": \"I agree with both points.\\n\\nI have added a pull request to change the paramater to a STRING1, and a separate pull request to add a new function Str.Translate().\\n\\nThey're likely to go in 3.6 - although they'll probably be in master soon.\", \"post_time\": \"2012-01-18 17:29:11\" },\n\t{ \"post_id\": 842, \"topic_id\": 212, \"forum_id\": 10, \"post_subject\": \"Re: Question on str.SubstituteIncluded\", \"username\": \"Allan\", \"post_text\": \"Hi Richard,\\nThanks for the response.\\n\\nAllan\", \"post_time\": \"2012-01-17 21:13:10\" },\n\t{ \"post_id\": 840, \"topic_id\": 212, \"forum_id\": 10, \"post_subject\": \"Re: Question on str.SubstituteIncluded\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\nBut there are two things to note.\\n1. The type of 'replace_char' is STRING, to stop confusion it would have been better defined as STRING1.\\n2. Would not a more useful implementation have been to allow multiple replacments in a single command, with the ordinal position of characters in 'filter' and 'replace_char' defining the mapping between the two parameters, so that the code above would have generated:
\\n\\nRegarding #1, I will raise the issue with the developers.\\n\\nRegarding #2, I will raise the issue with the developers, but it would likely be a different function, since SubstituteIncluded and SubstituteExcluded are a "matched set" and your suggestion would only apply to SubstituteIncluded.\\n\\nRichard\", \"post_time\": \"2012-01-17 16:27:16\" },\n\t{ \"post_id\": 834, \"topic_id\": 212, \"forum_id\": 10, \"post_subject\": \"Question on str.SubstituteIncluded\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nThe header to this library says:\\n\\n/**\\n * Returns the source string with the replacement character substituted for all characters included in the\\n * filter string.\\n * MORE: Should this be a general string substitution?\\n * \\n * @param src The string that is being tested.\\n * @param filter The string containing the set of characters to be included.\\n * @param replace_char The character to be substituted into the result.\\n * @see Std.Str.SubstituteExcluded\\n */\\n\\nEXPORT STRING SubstituteIncluded(STRING src, STRING filter, STRING replace_char) :=\\n lib_stringlib.StringLib.StringSubstituteOut(src, filter, replace_char);\\n
\\n\\nNow it does say, for 'replace_char', 'The character' indicating a single character would be used in the replacment. This is, in fact, what happens:\\n\\n\\nresstr := Std.Str.SubstituteIncluded('abcabc,'ac','yz');\\nOUTPUT(resstr);\\n
\\ngenerates:\\n\\nybyyby\\n
\\n\\nBut there are two things to note.\\n1. The type of 'replace_char' is STRING, to stop confusion it would have been better defined as STRING1.\\n2. Would not a more useful implementation have been to allow multiple replacments in a single command, with the ordinal position of characters in 'filter' and 'replace_char' defining the mapping between the two parameters, so that the code above would have generated:\\nybzybz
\\nYours\\n\\nAllan\", \"post_time\": \"2012-01-16 16:04:40\" },\n\t{ \"post_id\": 872, \"topic_id\": 221, \"forum_id\": 10, \"post_subject\": \"Re: Example ECL that crashes the eclcc.exe\", \"username\": \"ghalliday\", \"post_text\": \"Thanks for the example. \\n\\nI have uploaded a fix to github which I imagine will be merged in today.\", \"post_time\": \"2012-01-23 09:23:33\" },\n\t{ \"post_id\": 866, \"topic_id\": 221, \"forum_id\": 10, \"post_subject\": \"Re: Example ECL that crashes the eclcc.exe\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nYes your right, that's the problem with copying code \\nStill its found a wrinkle.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-01-22 09:04:54\" },\n\t{ \"post_id\": 864, \"topic_id\": 221, \"forum_id\": 10, \"post_subject\": \"Re: Example ECL that crashes the eclcc.exe\", \"username\": \"Tony Kirk\", \"post_text\": \"I suspect you intended to use the labels of the transform parameters you provided (L and R) instead of LEFT and RIGHT. Given the reserved nature of LEFT and RIGHT, it may be the cause of the crash. Still shouldn't happen, but...\\n\\n
\\noutrec DoJoin(inrec L,outrec R) := TRANSFORM\\n SELF := L;\\n SELF := R;\\nEND;\\n
\", \"post_time\": \"2012-01-21 22:09:44\" },\n\t{ \"post_id\": 863, \"topic_id\": 221, \"forum_id\": 10, \"post_subject\": \"Example ECL that crashes the eclcc.exe\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nThe code below crashes when I attempt to syntax check it.\\nAlso attached is the dialog box of the crash.\\nThe version I'm running is 6.0.2.4.682.1\\n\\n// ----------------------------------\\n// Exercise 'GROUPED ROLLUP'\\n// ----------------------------------\\ninrec := RECORD\\n UNSIGNED6 did;\\nEND;\\n\\noutrec := RECORD(inrec)\\n STRING20 name;\\n UNSIGNED score;\\nEND;\\n\\nnamerec := RECORD\\n STRING20 name;\\nEND;\\n\\nfinalrec := RECORD\\n DATASET(namerec) names;\\n STRING20 secondname;\\nEND;\\n\\nds := DATASET([1,2,3,4,5,6],inrec);\\ndsg := GROUP(ds,ROW);\\n\\ni1 := DATASET([ {1,'Kevin', 10 },\\n {2,'Richard', 5},\\n {5,'Nigel', 2},\\n {0,'', 0}],outrec);\\n\\ni2 := DATASET([ {1,'Kevin Halligan', 12 },\\n {2,'Richard Charles',15 },\\n {3,'Blake Smith', 20},\\n {5,'Nigel Hicks', 100},\\n {0,'', 0}],outrec);\\n\\ni3 := DATASET([ {1,'Halligan', 8 },\\n {2,'Richard',8 },\\n {6,'Pete', 4},\\n {6,'Peter', 8},\\n {6,'Petie', },\\n {0,'', 0}],outrec);\\n\\noutrec DoJoin(inrec L,outrec R) := TRANSFORM\\n SELF := LEFT;\\n SELF := RIGHT;\\nEND;\\n/*\\nj1 := JOIN (dsg,\\n i1,\\n LEFT.did = RIGHT.did,\\n DoJoin(LEFT,RIGHT),\\n LEFT OUTER,\\n MANY LOOKUP);\\n\\nj2 := JOIN (dsg,\\n i2,\\n LEFT.did = RIGHT.did,\\n DoJoin(LEFT,RIGHT),\\n LEFT OUTER,\\n MANY LOOKUP);\\n\\nj3 := JOIN (dsg,\\n i3,\\n LEFT.did = RIGHT.did,\\n DoJoin(LEFT,RIGHT),\\n LEFT OUTER,\\n MANY LOOKUP);\\n\\nEXPORT Combined := REGROUP(j1,j2,j3);\\n*/\\nEXPORT ExRollup2 := 'A';\\n
\\n\\nIf I start the comment before the 'DoJoin' transform function, I get a syntax error but at least it does not crash.\\n\\nI was attempting to understand the GROUPED rollup and was typing in the example from the ECL reference manual page 263.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-01-21 20:25:18\" },\n\t{ \"post_id\": 881, \"topic_id\": 227, \"forum_id\": 10, \"post_subject\": \"Re: publishing many Roxie queries\", \"username\": \"bforeman\", \"post_text\": \"Currently no, you have to use the ECL Watch interface, but stay tuned for updates on this.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-01-24 20:18:38\" },\n\t{ \"post_id\": 875, \"topic_id\": 227, \"forum_id\": 10, \"post_subject\": \"publishing many Roxie queries\", \"username\": \"aintnomyth\", \"post_text\": \"Is there a command line tool for publishing Roxie queries? I was thinking about ECL plus but it doesn't seem to have the ECL Watch Publish mechanism.\", \"post_time\": \"2012-01-23 18:21:37\" },\n\t{ \"post_id\": 907, \"topic_id\": 231, \"forum_id\": 10, \"post_subject\": \"Re: non-deleting DEDUP routine\", \"username\": \"aintnomyth\", \"post_text\": \"Yep, I'm following the pattern from the programmer's guide for distribute/sort/group.\", \"post_time\": \"2012-01-26 17:28:36\" },\n\t{ \"post_id\": 905, \"topic_id\": 231, \"forum_id\": 10, \"post_subject\": \"Re: non-deleting DEDUP routine\", \"username\": \"rtaylor\", \"post_text\": \"Is your input ds to the GROUP function previously sorted by your pk_fields? If so, then this code all looks pretty cool to me. If not ... \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-01-26 16:48:13\" },\n\t{ \"post_id\": 903, \"topic_id\": 231, \"forum_id\": 10, \"post_subject\": \"non-deleting DEDUP routine\", \"username\": \"aintnomyth\", \"post_text\": \"For some of my dedup tasks I need to identify/flag the records to-be-removed for reports and analysis purposes and I also need to identify the "kept" record as surviving a PK collision.\\n\\nThe process I've been using is something like this:\\n\\n
\\ndedup_layout := RECORD\\n\\tds;\\n\\tunsigned1 pk_Dup_count := 0; //total number of records with same PK\\n\\tunsigned1 dupDiscard := 0; //set to 1 for all but 1 of the PK group\\nEND;\\npk_groups := GROUP(ds, pk_fields);\\npk_dups := HAVING(pk_groups, COUNT(ROWS(LEFT)) > 1);\\npk_non_dups := HAVING(pk_groups, COUNT(ROWS(LEFT)) = 1);\\n\\npk_flags := PROJECT(pk_dups, TRANSFORM(dedup_layout...set flags))\\npk_no_flags := PROJECT(pk_non_dups, TRANSFORM(dedup_layout...defaults))\\n\\npk_results := UNGROUP(pk_flags + pk_no_flags)
\\n\\nAre there any problems/gotchas with this approach, or is there a better method? I thought about using iterate but then I would need to count the dup records after the fact to populate the pk_dup_count field.\\n\\nThanks\", \"post_time\": \"2012-01-26 15:25:25\" },\n\t{ \"post_id\": 20433, \"topic_id\": 245, \"forum_id\": 10, \"post_subject\": \"Re: Roxie "Different version of file" error\", \"username\": \"francisco_escher\", \"post_text\": \"Put the index key inside a super file and tell the Roxie query to fetch data from it instead.\\n\\nWhen you want to update the key, clear the super file and add the new key, that usually solves the issue.\", \"post_time\": \"2018-01-11 19:30:18\" },\n\t{ \"post_id\": 2887, \"topic_id\": 245, \"forum_id\": 10, \"post_subject\": \"Re: Roxie "Different version of file" error\", \"username\": \"tmurphy\", \"post_text\": \"We are getting the same error described below. We delete a logical file, recreate it with the same name but a different record layout, and then get this error when we run Roxie queries on it. As the last reply suggests, restarting the server (in our case same box runs both Thor and Roxie) does indeed fix the problem. But, is there some other way to avoid this problem that does not involve a restart??? We're running community_3.8.2-1.\", \"post_time\": \"2012-11-26 22:28:36\" },\n\t{ \"post_id\": 981, \"topic_id\": 245, \"forum_id\": 10, \"post_subject\": \"Re: Roxie "Different version of file" error\", \"username\": \"sort\", \"post_text\": \"The "Could not resolve filename..." message indicates that roxie was not expecting this file to be used.\\n\\nThe "Different version" message indicates that roxie thinks it has a different version loaded.\\n\\nIf you deleted all the queries using the file and the logical file information is not in dali and you are still getting the "Different version" message, can you restart the roxie cluster and see if that stops the error from occurring. We will need to investigate further\", \"post_time\": \"2012-02-02 14:37:40\" },\n\t{ \"post_id\": 979, \"topic_id\": 245, \"forum_id\": 10, \"post_subject\": \"Re: Roxie "Different version of file" error\", \"username\": \"aintnomyth\", \"post_text\": \"One more thing...before I publish the query to Roxie I can run it fine in thor or hthor. I also tried publishing it to Roxie2 but ran into the same error.\", \"post_time\": \"2012-02-02 14:17:22\" },\n\t{ \"post_id\": 978, \"topic_id\": 245, \"forum_id\": 10, \"post_subject\": \"Roxie "Different version of file" error\", \"username\": \"aintnomyth\", \"post_text\": \"I'm getting an odd error running a Roxie query from the ESP page:\\nException \\nReported by: Roxie\\nMessage: Different version of thor::myfilename::idx_myIndexedFields already loaded: sizes = 57344 65536 Date = (in Index Read 2)
\\n\\nI rebuilt the indexes several times prior to this without any issues, then after another index rebuild the error started. To troubleshoot it I deleted the query/.so from the ECL watch page and then deleted the index files. \\n\\nAt that point I was getting the error below so I thought "good, there's no way the wrong index could be loaded now because it's missing"\\nException \\nReported by: Roxie\\nMessage: Could not resolve filename thor::myfilename::idx_myIndexedFields (in Index Read 2)
\\n\\nSo I deleted the Roxie query, rebuild the indexes, recompiled/published the query and I'm still getting the "already loaded" exception running the web service. Any ideas?\", \"post_time\": \"2012-02-02 14:10:47\" },\n\t{ \"post_id\": 997, \"topic_id\": 246, \"forum_id\": 10, \"post_subject\": \"Re: Roxie "Failed to get response from slave(s)"\", \"username\": \"richardkchapman\", \"post_text\": \"See https://github.com/hpcc-systems/HPCC-Pl ... ssues/1415\", \"post_time\": \"2012-02-03 13:37:50\" },\n\t{ \"post_id\": 996, \"topic_id\": 246, \"forum_id\": 10, \"post_subject\": \"Re: Roxie "Failed to get response from slave(s)"\", \"username\": \"aintnomyth\", \"post_text\": \"[quote="richardkchapman":2uvbdvco]Are you saying that you get the error a few times (immediately after publishing a query), then it goes away? \\n\\nYes, I get the error immediately after publishing and then it goes away. Also, we're on version 3.4.0-1 according to the ECL Watch page.\", \"post_time\": \"2012-02-03 13:35:14\" },\n\t{ \"post_id\": 994, \"topic_id\": 246, \"forum_id\": 10, \"post_subject\": \"Re: Roxie "Failed to get response from slave(s)"\", \"username\": \"richardkchapman\", \"post_text\": \"Are you saying that you get the error a few times (immediately after publishing a query), then it goes away? Or are you saying sometimes you get this error, sometimes you don't (and if so is there a pattern to it)?\\n\\nIf it's immediately after deployment of a query, it may be that the query has simply not yet loaded on all the slaves (there is a way to force - and wait for - a roxie update, and the process that deploys a query to Roxie shold be calling it, though I have heard suggestions that in the current version 3.4.0 it is not doing so).\", \"post_time\": \"2012-02-03 09:55:23\" },\n\t{ \"post_id\": 980, \"topic_id\": 246, \"forum_id\": 10, \"post_subject\": \"Roxie "Failed to get response from slave(s)"\", \"username\": \"aintnomyth\", \"post_text\": \"After publishing a query I often run into this error several times while testing the new query:\\n\\nException \\nReported by: Roxie\\nMessage: Failed to get response from slave(s) for uid=0x0000772e activityId=2 pri=LOW queryHash=7470a9ff641257ab ch=1 seq=0 cont=0 server=<myIp.7> retries=0003 in activity 2 (in Index Read 2)
\\n\\nAm I doing something wrong?\", \"post_time\": \"2012-02-02 14:23:41\" },\n\t{ \"post_id\": 1053, \"topic_id\": 254, \"forum_id\": 10, \"post_subject\": \"Re: securing roxie queries\", \"username\": \"aintnomyth\", \"post_text\": \"[quote="richardkchapman":1luqoine]Normal practice has been to handle credentials in the layer above Roxie (typically ESP)\\n\\nAs to why your query is crashing, I think I'd need to see a full ECL archive and get it into a debugger to be sure, but if it is crashing in eclagent then I can say fairly confidently that it was not submitted to Roxie...\\n\\nThanks Richard, can you point me towards any documentation on setting up security with ESP?\", \"post_time\": \"2012-02-08 01:26:55\" },\n\t{ \"post_id\": 1051, \"topic_id\": 254, \"forum_id\": 10, \"post_subject\": \"Re: securing roxie queries\", \"username\": \"richardkchapman\", \"post_text\": \"Normal practice has been to handle credentials in the layer above Roxie (typically ESP)\\n\\nAs to why your query is crashing, I think I'd need to see a full ECL archive and get it into a debugger to be sure, but if it is crashing in eclagent then I can say fairly confidently that it was not submitted to Roxie...\", \"post_time\": \"2012-02-07 17:12:09\" },\n\t{ \"post_id\": 1050, \"topic_id\": 254, \"forum_id\": 10, \"post_subject\": \"securing roxie queries\", \"username\": \"aintnomyth\", \"post_text\": \"I'm trying to formulate a plan for securing my roxie queries using parameterized credentials. I'm running into some hurdles and wondering if I'm going about it the wrong way.\\n\\nOur legacy app stores credentials in a table which I'm simulating with the secDatabase dataset.\\n\\nHere is An example Roxie query:\\n\\nEXPORT ICredentials := INTERFACE\\t\\n\\tEXPORT string20 pUserClient:= 'tst';\\t\\n\\tEXPORT data16 pUserName := HASHMD5('myname');\\t\\n\\tEXPORT data16 pUserPw:= HASHMD5('mypassword');\\t\\nEND;\\n\\nIParams := INTERFACE(ICredentials)\\n\\tEXPORT string20 pUserClient:= 'tst';\\t\\n\\tEXPORT data16 pUserName := HASHMD5('myname');\\t\\n\\tEXPORT data16 pUserPw:= HASHMD5('mypassword');\\t\\n\\tEXPORT string8 pOthereParam1 := 'p1';\\n\\tEXPORT string8 pOthereParam2 := 'p2';\\n\\tEXPORT Qstring20 pOthereParam3 := 'p3';\\nEND;\\n\\nparams := STORED(IParams);\\n\\n\\nsecLayout := RECORD\\n\\tstring clientCode;\\n\\tdata hashUser;\\n\\tdata hashPw;\\nEND;\\n\\nauthResultLayout := RECORD\\n\\tinteger4 authCode;\\n\\tqstring20 authText;\\n\\tqstring20 otherText;\\nEND;\\n\\nsecDatabase := DATASET([\\n\\t\\t\\t//test good record\\n\\t\\t\\t{'tst', hashmd5('myname'), hashmd5('mypassword')}],\\n\\t\\t\\t//test bad record\\n\\t\\t\\t//{'tst', hashmd5('myname1'), hashmd5('mypassword1')}],\\n\\t\\t\\t\\t\\tsecLayout);\\n\\nauthDs := DATASET([\\n\\t\\t\\t\\t\\t{0, 'authenticated', ''},\\n\\t\\t\\t\\t\\t{1, 'input not encrypted', ''},\\n\\t\\t\\t\\t\\t{2, 'bad credentials', ''}],\\n\\t\\t\\t\\t\\tauthResultLayout);\\t\\t\\n\\t\\t\\t\\t\\t\\nsecResult := EXISTS(secDatabase( hashUser = params.pUserName, hashPw = params.pUserPw));\\n\\nauthResult := IF(LENGTH(params.pUsername) < 16 or LENGTH(params.pUserPw) < 16, authDs(authCode = 1),\\n\\t\\t\\t\\t\\t\\t\\t\\tIF(~secResult, authDs(authCode = 2),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tauthDs(authCode = 0)));\\t\\t\\t\\t\\t\\n\\nOUTPUT(authResult);\\nOUTPUT( params.pUsername);\\nOUTPUT( params.pUserPw);
\\n\\nThis runs from the ECL IDE but then I compile and publish it, and launch the ESP tester form with these parameters:\\npUserName: ABB45C192F0818FF22B7DDDA8566DAC3\\npUserPw: 34819D7BEEABB9260A5C854BC85B3E44\\n\\nI get this exception:\\nException\\nReported by: ECLAGENT\\nMessage: SIG: Segmentation fault(11), accessing 0000000012A64000, IP=00002ADDBB5A2537
\\n\\nIs this because my parameters are DATA16 instead of STRING32? \\n\\nIf I define the query with STRING params instead of DATA, is there a built in function for converting an incoming hexadecimal in STRING format to DATA16 so I can compare to the stored HASHMD5 result in my security tables?? (I found the Data2String function in the service library reference v1.10 but it is not available in my install it seems)\", \"post_time\": \"2012-02-07 17:05:57\" },\n\t{ \"post_id\": 1058, \"topic_id\": 255, \"forum_id\": 10, \"post_subject\": \"Re: Roxie IPropertyTree: xpath parse error\", \"username\": \"aintnomyth\", \"post_text\": \"Finally traced this down, the query source file name is built based on parameters passed into the query. I reworked the query to output the file name instead of the dataset and here is what it looked like on Roxie:\\n~thor::temp::cli::cli2h ::linked::CLAIMS::summary
\\n\\nSo I'm thinking thor/hthor will automatically trim strings in different situations than Roxie. Is it possible that my Roxie nodes are different versions of the code base than the thor/hthor nodes?\", \"post_time\": \"2012-02-08 18:48:40\" },\n\t{ \"post_id\": 1056, \"topic_id\": 255, \"forum_id\": 10, \"post_subject\": \"Roxie IPropertyTree: xpath parse error\", \"username\": \"aintnomyth\", \"post_text\": \"I have a set of queries that ran fine on hthor (compiled, published, and tested in ESP) but when I run them on Roxie from the ESP test page I get this error:\\nException\\nReported by: Roxie\\nMessage: IPropertyTree: xpath parse error XPath Exception: Qualifier expected e.g. [..] in xpath = ::linked::claims::summary ^
\\n\\nAny ideas?\", \"post_time\": \"2012-02-08 15:21:03\" },\n\t{ \"post_id\": 1102, \"topic_id\": 264, \"forum_id\": 10, \"post_subject\": \"Re: scope of referenced namespaces\", \"username\": \"rtaylor\", \"post_text\": \"I would expect so, because IMPORT is really just a compiler directive that makes exported definitions available to use in your code. Since we have a one-pass compiler, the position of IMPORT in the code is irrelevant as long as it comes before any use of what you're importing.\\n\\nBoth versions of this code work for me:// IMPORT * from TrainingYourName;\\nSTRING MyFunc(string1 ltr, STRING val) := FUNCTION\\n IMPORT * from TrainingYourName;\\n RETURN Ltr + val;\\nEND;\\n\\nOUTPUT(people.file,{ID,MyFunc(middlename[1],firstname)});
HTH,\\n\\nRichard\", \"post_time\": \"2012-02-13 15:48:45\" },\n\t{ \"post_id\": 1101, \"topic_id\": 264, \"forum_id\": 10, \"post_subject\": \"scope of referenced namespaces\", \"username\": \"aintnomyth\", \"post_text\": \"Another of my random curiosities. I noticed (by accident) that I can embed an IMPORT within another function - are these two code blocks treated exactly the same way?\\n\\nIMPORT module1;\\nEXPORT function1 := FUNCTION\\n...\\nEND;
\\n\\nEXPORT function1 := FUNCTION\\nIMPORT module1;\\n...\\nEND;
\", \"post_time\": \"2012-02-13 13:58:42\" },\n\t{ \"post_id\": 1129, \"topic_id\": 270, \"forum_id\": 10, \"post_subject\": \"Re: COUNTER not used inside the transform\", \"username\": \"ghalliday\", \"post_text\": \"The fix has just squeezed into 3.6.0 which should be available soon.\", \"post_time\": \"2012-02-17 15:30:14\" },\n\t{ \"post_id\": 1128, \"topic_id\": 270, \"forum_id\": 10, \"post_subject\": \"Re: COUNTER not used inside the transform\", \"username\": \"ghalliday\", \"post_text\": \"It is a code generator bug - which I'm surprised no one has hit before.\\n\\nI'll test a fix.\\n\\nAs a (bizarre) work around try appending\\n\\n(string0)cnt; to your string.\\n\\ni.e.,\\n\\nSelf.A:=L.s+Msg+(string0)cnt; \\n\\nI'm slightly surprised that isn't optimized away!\", \"post_time\": \"2012-02-17 13:58:38\" },\n\t{ \"post_id\": 1127, \"topic_id\": 270, \"forum_id\": 10, \"post_subject\": \"Re: COUNTER not used inside the transform\", \"username\": \"ashishbhagasra\", \"post_text\": \"Updated the question .. have a look at it again ..\", \"post_time\": \"2012-02-17 13:12:47\" },\n\t{ \"post_id\": 1126, \"topic_id\": 270, \"forum_id\": 10, \"post_subject\": \"Re: COUNTER not used inside the transform\", \"username\": \"DSC\", \"post_text\": \"[quote="ashishbhagasra":13otz217]Suppose it is getting in the false code ... Then ???\\n\\nIf you mean you change your IF statement so that it is always false instead, then I would expect the compiler to warn you about the TheWords variable instead (since it would be optimized away). That's if my theory is right; it may not be. Changing 'TheWord' to 'L.s' may be what you really want, as it that would test the inbound string value rather than a constant (and therefore the compiler wouldn't optimize either IF clause away).\\n\\nAgain, I could be very wrong. I'm kind of new at ECL.\\n\\nDan\", \"post_time\": \"2012-02-17 12:46:41\" },\n\t{ \"post_id\": 1125, \"topic_id\": 270, \"forum_id\": 10, \"post_subject\": \"Re: COUNTER not used inside the transform\", \"username\": \"ashishbhagasra\", \"post_text\": \"[quote="DSC":vm6xikqi]Since you explicit define TheWord within the transform, the IF statement (TheWord != 'Ashish') will always be true and the compiler optimizes away the 'else' clause. Since the 'else' clause is the only place your counter is used, the compiler now warns you about it.\\n\\nThat's my interpretation, anyway. I'm eagerly awaiting correction. \\n\\nCheers,\\n\\nDan\\n Suppose it is getting in the false code ... Then ???\", \"post_time\": \"2012-02-17 12:38:40\" },\n\t{ \"post_id\": 1124, \"topic_id\": 270, \"forum_id\": 10, \"post_subject\": \"Re: COUNTER not used inside the transform\", \"username\": \"DSC\", \"post_text\": \"Since you explicit define TheWord within the transform, the IF statement (TheWord != 'Ashish') will always be true and the compiler optimizes away the 'else' clause. Since the 'else' clause is the only place your counter is used, the compiler now warns you about it.\\n\\nThat's my interpretation, anyway. I'm eagerly awaiting correction.
\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-02-17 12:35:39\" },\n\t{ \"post_id\": 1123, \"topic_id\": 270, \"forum_id\": 10, \"post_subject\": \"COUNTER not used inside the transform\", \"username\": \"ashishbhagasra\", \"post_text\": \"I have to use the COUNTER value of DS1 Dataset to retrieve value from DS2 dataset. \\n\\nI have used the below Code:\\n//TheSentence:= 'Displaying the hello World';\\nDS1:=Dataset([{'Displaying'},{'the'},{'hello'},{'World'}], {String s});\\nDS2:=Dataset([{'America'},{'Egypt'},{'Europe'},{'India'}], {String s});\\n\\n {String A} getMessage(DS1 L, unsigned cnt):= Transform \\n wrdChk:= 'hello'; \\n Msg := If (L.s = wrdChk, DS2[Cnt].s, ''); \\n Self.A:=L.s+Msg; \\n End; \\nDS3 := Project(DS1, getMessage(Left, counter));\\n\\n Output(DS3) ;\\n\\nWhen I Execute, I get the below Errors:\\nWarning: COUNTER not used inside the transform \\nWarning: OUTPUT() appears to be context dependent - this may cause a dataset not active error \\nError: COUNTER is not legal in this context\\n\\nCan someone tell me how to do it.\", \"post_time\": \"2012-02-17 12:05:18\" },\n\t{ \"post_id\": 1162, \"topic_id\": 271, \"forum_id\": 10, \"post_subject\": \"Re: Regexreplace\", \"username\": \"dabayliss\", \"post_text\": \"Leaving aside the question for a moment (
) - what are you actually trying to do?\\n\\nPerforming detailed textual analysis down at the regex level is rarely a good idea ...\\n\\nDavid\", \"post_time\": \"2012-02-21 20:56:55\" },\n\t{ \"post_id\": 1161, \"topic_id\": 271, \"forum_id\": 10, \"post_subject\": \"Re: Regexreplace\", \"username\": \"DSC\", \"post_text\": \"Ack. Yeah, I just realized that the apostrophe would be interpreted as a word break. And wrapping all of the pattern in another layer of parenthesis is exactly the right thing to do to. Closer, but still not what you want.\\n\\nIf you don't need to keep the formatting of the original text, you can preprocess it to normalize word breaks and to handle cases like this with a simple substitution (e.g., replace \\\\' with something you can find and revert later, like '_apostrophe_').\\n\\nYou may be better off tokenizing the input string into a set of words via Str.SplitWords(), then running a match against the individual words within the set instead using buffer boundaries instead of word boundaries.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-02-21 19:30:46\" },\n\t{ \"post_id\": 1160, \"topic_id\": 271, \"forum_id\": 10, \"post_subject\": \"Re: Regexreplace\", \"username\": \"sameermsc\", \"post_text\": \"Hi Dan,\\n\\nbelow are few srchStr_1 variations and the corresponding outputs\\n\\n
srchStr_1:= '\\\\\\\\b(i)|(t)|(it)|(it\\\\'s)\\\\\\\\b';\\ndat:= 'If the word is unknown then it\\\\'s probably best to try the substitution';\\noutput(regexreplace(srchStr_1 , dat, ' ', nocase), named('srchStr_1'));
\\noutput \\n[color=#BF4000:2vet6knn] f he word s unknown hen 's probably bes o ry he subs u ion\\n\\nsrchStr_1:= '\\\\\\\\b(i)|(t)|(it)|(it\\\\'s)\\\\\\\\b';\\ndat:= 'If the word is unknown then it\\\\'s probably best to try the substitution';\\noutput(regexreplace('\\\\\\\\b(' + srchStr_1 + ')\\\\\\\\b' , dat, ' ', nocase), named('srchStr_1'));
\\noutput\\n[color=#BF4000:2vet6knn]If the word is unknown then 's probably best to try the substitution\\n\\nsrchStr_1:= '\\\\\\\\b((i)|(t)|(it)|(it\\\\'s))\\\\\\\\b';\\ndat:= 'If the word is unknown then it\\\\'s probably best to try the substitution';\\noutput(regexreplace(srchStr_1, dat, ' ', nocase), named('srchStr_1'));
\\noutput\\n[color=#BF4000:2vet6knn]If the word is unknown then 's probably best to try the substitution\\n\\n\\nThanks for the link, i have referred to this long back, will again revisit to check if have missed to notice something \\n\\nregards\\nSameer\", \"post_time\": \"2012-02-21 19:08:07\" },\n\t{ \"post_id\": 1158, \"topic_id\": 271, \"forum_id\": 10, \"post_subject\": \"Re: Regexreplace\", \"username\": \"DSC\", \"post_text\": \"I think the problem is that your earlier/shorter search patterns are consuming only a portion of your word when in fact you want to remove whole words. You can change your pattern to something like this:\\n\\n
srchStr_1:= '\\\\\\\\b(i)|(t)|(it)|(it\\\\'s)\\\\\\\\b';
\\n\\nThe parenthesis added around single characters are not required, but I added them for clarity. In essence, this says "replace the word "i" or the word "t" or the word "it" or the words "it's" but only if they are whole words and in lowercase" (you may have to add a flag for case-insensitive searching).\\n\\nThere are two parts to my recommendation:\\n\\n1) Wrap stop words in parenthesis to make sure they act as a single regex element.\\n\\n2) Put \\\\b (\\\\\\\\b in code) before and after the pattern to force matching on word breaks.\\n\\nThe second one is the important bit. As an example, a pattern of '\\\\bit\\\\b' will fail to match the string "it's" because 'it' is not a self-contained word.\\n\\nECL uses a Boost library under the covers and that library uses PCRE (Perl Compatible Regular Expressions). Complete documentation for the pattern syntax is at http://www.boost.org/doc/libs/1_39_0/li ... html.There are a boatload of examples to be found in Google as well; just search for "pcre examples".\\n\\nHope this helps!\\n\\nDan\", \"post_time\": \"2012-02-21 18:43:29\" },\n\t{ \"post_id\": 1157, \"topic_id\": 271, \"forum_id\": 10, \"post_subject\": \"Re: Regexreplace\", \"username\": \"sameermsc\", \"post_text\": \"I have a list of stop words, say [color=#BF4000:1jtzvter]i, [color=#BF4000:1jtzvter]t , [color=#BF4000:1jtzvter]it and [color=#BF4000:1jtzvter]it's are only present in the list in the said order\\n\\ni have a function which generates the srchStr_1 with the words in the stop word list, since the samplelist has the above said four words only the srchStr_1 will be as shown in the code below\\n\\nsrchStr_1:= 'i|t|it|it\\\\'s';\\ninputData := 'If the word is unknown then it\\\\'s probably best to try the substitution';\\noutput(regexreplace('\\\\\\\\b ('+ srchStr_1 + ')\\\\\\\\b', inputData , ' ', nocase), named('srchStr_1'));
\\n\\nif i do a regexreplace on the inputData, output is "If the word is unknown then [color=#FF0000:1jtzvter]'s probably best to try the substitution"\\n\\n[color=#FF0000:1jtzvter]'s is an additional text which i don't want\", \"post_time\": \"2012-02-21 18:28:05\" },\n\t{ \"post_id\": 1155, \"topic_id\": 271, \"forum_id\": 10, \"post_subject\": \"Re: Regexreplace\", \"username\": \"DSC\", \"post_text\": \"Hi Sameer,\\n\\nI don't quite understand what you are trying to do. If you want to do only whole-string searching, why search for individual characters at all? Can you provide some additional examples of input and desired output to illustrate what you're aiming for?\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-02-21 15:39:49\" },\n\t{ \"post_id\": 1154, \"topic_id\": 271, \"forum_id\": 10, \"post_subject\": \"Re: Regexreplace\", \"username\": \"sameermsc\", \"post_text\": \"Hi Dan,\\n\\nThanks for the explanation "pipe (regex OR) is a single-element switch" \\n\\nThe requirement here is that i am generating a regex pattern dynamically, and i don't have any prior knowledge on the words/characters that will be present in the expression\\n
srchStr_2:= 'w|o|n|t|(won\\\\'t)';
\\nis one such pattern generated dynamically, since individual characters are present at the beginning of the search pattern result of regexreplace is '\\n\\ninspite of me grouping individual search strings it wont work\\n\\ni don't want to use a parse function to perform a replace (with MAX and MANY options), i.e., capture all others except the search strings\\n\\nare there any other mechanisms you are aware of, which handles such cases?\\n\\nThanks \\nSameer\", \"post_time\": \"2012-02-21 15:29:12\" },\n\t{ \"post_id\": 1140, \"topic_id\": 271, \"forum_id\": 10, \"post_subject\": \"Re: Regexreplace\", \"username\": \"DSC\", \"post_text\": \"The pipe (regex OR) is a single-element switch. Those patterns look more like this pseudo-pattern:\\n\\nsrchStr_1: "won\\\\'" OR "t" OR "w" OR "o" OR "n" OR "t"\\nsrchStr_2: "w" OR "o" OR "n" OR "t" OR "w" OR "on\\\\'t"
\\n\\nIf you process your input string ("won't") with those patterns, you'll see why you get the results you do.\\n\\nIn traditional regex you solve the problem by wrapping the whole words in parenthesis, which makes that word the element (rather than just a single character):\\n\\n(won\\\\'t)|w|o|n|t
\\n\\nThere is also a switch for preventing those parenthesis from capturing the pattern results but I don't remember that off the top of my head. You may not need it, anyway, depending on what you're doing.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-02-21 13:02:27\" },\n\t{ \"post_id\": 1132, \"topic_id\": 271, \"forum_id\": 10, \"post_subject\": \"Regexreplace\", \"username\": \"sameermsc\", \"post_text\": \"I have two regexsearch patterns with a couple of strings but in different order\\n\\nsrchStr_1:= 'won\\\\'t|w|o|n|t';\\nsrchStr_2:= 'w|o|n|t|won\\\\'t';\\ndat:= 'won\\\\'t';\\n\\noutput(regexreplace(srchStr_1, dat, '', nocase), named('srchStr_1'));\\noutput(regexreplace(srchStr_2, dat, '', nocase), named('srchStr_2'));
\\n\\nOutput:\\nfor srchStr_1 its NULL\\nfor srchStr_2 its '\\n\\nPer my observation matching is done from left to right and the characters/text are replaced accordingly\\n\\nWhy is the output different for both the search strings inspite of won\\\\'t being present in both of them, is there any option/mechanism available using which i can direct the compiler to go for a full string replace rather the individual character replace \\n\\nI am looking at an option like [color=#0000FF:24w9usa9]MAX in [color=#0000FF:24w9usa9]PARSE\", \"post_time\": \"2012-02-20 09:06:52\" },\n\t{ \"post_id\": 1227, \"topic_id\": 278, \"forum_id\": 10, \"post_subject\": \"Re: enumerating fields in a record\", \"username\": \"dlingle\", \"post_text\": \"The example that bforeman posted in this thread did have this one line missing from the top of the example:\\n\\nLOADXML('<xml/>');\", \"post_time\": \"2012-02-29 16:58:39\" },\n\t{ \"post_id\": 1186, \"topic_id\": 278, \"forum_id\": 10, \"post_subject\": \"Re: enumerating fields in a record\", \"username\": \"aintnomyth\", \"post_text\": \"That looks like exactly what I need, thanks for the info.\", \"post_time\": \"2012-02-24 14:02:43\" },\n\t{ \"post_id\": 1185, \"topic_id\": 278, \"forum_id\": 10, \"post_subject\": \"Re: enumerating fields in a record\", \"username\": \"bforeman\", \"post_text\": \"The docs on #EXPORTXML have a great example that I think is doing what you want:\\n\\n NamesRecord := RECORD\\n STRING10 first;\\n STRING20 last;\\n END;\\n \\n r := RECORD\\n UNSIGNED4 dg_parentid;\\n STRING10 dg_firstname;\\n STRING dg_lastname;\\n UNSIGNED1 dg_prange;\\n IFBLOCK(SELF.dg_prange % 2 = 0)\\n STRING20 extrafield;\\n END;\\n NamesRecord namerec;\\n DATASET(NamesRecord) childNames;\\n END;\\n \\n ds := DATASET('~RTTEST::OUT::ds', r, THOR);\\n \\n #EXPORTXML(Fred,r);\\n \\n #FOR (Fred)\\n #FOR (Field) \\n #IF (%'{@isEnd}'% <> '')\\n OUTPUT('END');\\n #ELSE\\n OUTPUT(%'{@type}'%\\n #IF (%'{@size}'% <> '-15' AND\\n %'{@isRecord}'%='' AND\\n %'{@isDataset}'%='')\\n + %'{@size}'%\\n #END\\n + ' ' + %'{@label}'% + ';');\\n #END\\n #END\\n #END
\\n\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-02-24 13:59:18\" },\n\t{ \"post_id\": 1181, \"topic_id\": 278, \"forum_id\": 10, \"post_subject\": \"enumerating fields in a record\", \"username\": \"aintnomyth\", \"post_text\": \"In Oracle I can query the data dictionary views ALL_TABLES and ALL_TAB_COLUMNS to retrieve metadata about tables and columns, is there any way to do something similar in ECL?\", \"post_time\": \"2012-02-23 17:34:14\" },\n\t{ \"post_id\": 1273, \"topic_id\": 295, \"forum_id\": 10, \"post_subject\": \"Re: Populating dataset with multiple files content using pro\", \"username\": \"sameermsc\", \"post_text\": \"Thanks Richard\\nwhen run on hThor the results are correct\\n\\nRegards,\\nSameer\", \"post_time\": \"2012-03-08 09:38:01\" },\n\t{ \"post_id\": 1268, \"topic_id\": 295, \"forum_id\": 10, \"post_subject\": \"Re: Populating dataset with multiple files content using pro\", \"username\": \"rtaylor\", \"post_text\": \"OK, I've now run this code on three separate builds: 3.4 (rc1), 3.4.2 (1), and 3.6 (1)\\n\\nMy result has been consistent on all three -- when you run the code on Thor the result is incorrect, and when you run it on hThor it gets the correct result.\\n\\nI will be reporting this issue, but your workaround is to run the job in hThor.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-03-07 19:19:28\" },\n\t{ \"post_id\": 1264, \"topic_id\": 295, \"forum_id\": 10, \"post_subject\": \"Re: Populating dataset with multiple files content using pro\", \"username\": \"rtaylor\", \"post_text\": \"Hmm -- same IDE, same Compiler, slightly different Server -- mine is 3.4 (1rc).\\n\\nI'll try it again on 3.4.2 (1)\", \"post_time\": \"2012-03-07 14:34:26\" },\n\t{ \"post_id\": 1262, \"topic_id\": 295, \"forum_id\": 10, \"post_subject\": \"Re: Populating dataset with multiple files content using pro\", \"username\": \"sameermsc\", \"post_text\": \"I did a run on community_3.4.2-1\\nAttached is a screen shot with info of ECL IDE/compiler versions\", \"post_time\": \"2012-03-07 06:24:03\" },\n\t{ \"post_id\": 1258, \"topic_id\": 295, \"forum_id\": 10, \"post_subject\": \"Re: Populating dataset with multiple files content using pro\", \"username\": \"rtaylor\", \"post_text\": \"I just ran your code and it worked just fine for me on the 3.4.2 build:\\n\\n[attachment=0:2rs74j9g]3-6-2012 10-40-06 AM.png\\n\\nWhat build did you run on?\\n\\nRichard\", \"post_time\": \"2012-03-06 15:46:56\" },\n\t{ \"post_id\": 1248, \"topic_id\": 295, \"forum_id\": 10, \"post_subject\": \"Populating dataset with multiple files content using project\", \"username\": \"sameermsc\", \"post_text\": \"I am trying to populate the contents of multiple logical files into a single dataset consisting of two fields where, first field should have the full file content (dataset) and the second field should have the file path\\n\\n(I am looking at a scenario where use of superfile to load all of the content will not be feasible, ie., select a specific column data based on a file name and populate it)\\n\\nBelow is the code\\nCreate sample data\\nds_dow := dataset(['sunday', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday'], {string dat});\\nds_month := dataset(['January', 'February', 'March', 'April'], {string dat});\\n\\noutput(ds_dow, ,'~test::dayofweek');\\noutput(ds_month, ,'~test::month');
\\n\\npopulate the data into result dataset\\n\\nds := dataset([{'~test::dayofweek'}, {'~test::month'}], {string path});\\n\\ndat1 := record\\n\\tstring dat;\\nend;\\t\\t\\t\\t\\t\\t\\t\\t\\n\\t\\t\\t\\t\\t\\t\\t\\t\\ndatRec := record\\n\\tdataset(dat1) datChild := dataset([], dat1);\\n\\tstring path;\\nend;\\t\\t\\t\\t\\t\\t\\t\\t\\ndatrec tReadData({string path} l) := transform\\n\\t\\tpaths := l.path;\\n\\t\\tself.datChild := dataset(l.path, {string dat}, thor, opt);\\n\\t\\tself.path := paths;\\nend;\\nds_dat := project(ds, tReadData(left));\\n\\noutput(ds_dat);
\\n\\nBut the Output is different as shown in the attached image\\n\\nunable to find out where is the problem\", \"post_time\": \"2012-03-06 12:37:39\" },\n\t{ \"post_id\": 1303, \"topic_id\": 298, \"forum_id\": 10, \"post_subject\": \"Re: Assigning default value to records?\", \"username\": \"rtaylor\", \"post_text\": \"Gavin,\\n\\n<queue complaints from Richard why he doesn't know about that feature>
YES - I'M COMPLAINING!!! \\n\\nI only know about what I write in the Language Reference, and this is not in there (it will be in the next release)!!\\n\\nRichard\", \"post_time\": \"2012-03-09 14:35:46\" },\n\t{ \"post_id\": 1295, \"topic_id\": 298, \"forum_id\": 10, \"post_subject\": \"Re: Assigning default value to records?\", \"username\": \"ghalliday\", \"post_text\": \"Have you tried using XMLDEFAULT?\\n\\n
\\nSourcePath:= 'thisIsMyPath';\\npersonRec:= record\\nstring name {xpath('')};\\nstring path{xmldefault(SourcePath')};\\nend;\\n\\nxmlLayout:=record\\ndataset(personRec) person{xpath('A/B/C/PERSON')};\\nend;\\n\\nDs:= dataset('~'+SourcePath, xmlLayout, xml('myXml'),opt);\\n
\\n\\n<queue complaints from Richard why he doesn't know about that feature> It only works for constant values.\", \"post_time\": \"2012-03-09 12:01:58\" },\n\t{ \"post_id\": 1287, \"topic_id\": 298, \"forum_id\": 10, \"post_subject\": \"Re: Assigning default value to records?\", \"username\": \"Ghost\", \"post_text\": \"[quote="Ghost":2yar7htv][quote="rtaylor":2yar7htv]Ghost,\\n the short answer .....\\nRichard\\nTx.. Richard\", \"post_time\": \"2012-03-09 06:02:03\" },\n\t{ \"post_id\": 1283, \"topic_id\": 298, \"forum_id\": 10, \"post_subject\": \"Re: Assigning default value to records?\", \"username\": \"rtaylor\", \"post_text\": \"Ghost,\\n\\nIs this related to the code in your other post (that I just answered: viewtopic.php?f=10&t=301&p=1282&sid=b79f8f6228bc265ca868c99569f70e70#p1282)? If so, it would appear that this issue is now moot (since you're not using this approach in that code).\\n\\nHowever, the short answer to why this doesn't work is that default values in a RECORD structure are used primarily by the TABLE function, and can also be used for an "inline" DATASET, but not one that references a real file on disk (like you're trying to do here).\\n\\nRichard\", \"post_time\": \"2012-03-08 17:02:31\" },\n\t{ \"post_id\": 1279, \"topic_id\": 298, \"forum_id\": 10, \"post_subject\": \"Re: Assigning default value to records?\", \"username\": \"Ghost\", \"post_text\": \"[quote="bforeman":2q2axz2q]Can you please post a small snippet of the target XML?...\\nBob\\n \\n/* file1.Xml(of layout1):\\n <root>\\n <A>\\n <PERSON>SMITH</PERSON>\\n <PERSON>JOHN</PERSON>\\n <PERSON>BRET</PERSON>\\n </A>\\n </root>\\n Logical path for this file is 'search::file1' */\\n\\nSourcePath:= 'sample::file1';\\npersonRec:= record\\nstring name {xpath('')};\\nstring path:=SourcePath;\\nend;\\n\\nxmlLayout:=record\\ndataset(personRec) person{xpath('A/PERSON')};\\nend;\\n\\nDs:= dataset('~'+SourcePath, xmlLayout, xml('root'));\\nDs;\\n
\\nAttached is the output i am getting.\\nBut i want the 'SourcePath' value in the 'path' field(which is blank in the output attched). I am using this code inside a transform, so i cannot use another transform/project/iterate,etc.\", \"post_time\": \"2012-03-08 13:44:16\" },\n\t{ \"post_id\": 1276, \"topic_id\": 298, \"forum_id\": 10, \"post_subject\": \"Re: Assigning default value to records?\", \"username\": \"bforeman\", \"post_text\": \"Can you please post a small snippet of the target XML?\\nI think that what you will need to do is to read the XML and then run a simple PROJECT to seed the output with your default value, but I'd like to test and confirm this.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-03-08 13:14:42\" },\n\t{ \"post_id\": 1272, \"topic_id\": 298, \"forum_id\": 10, \"post_subject\": \"Re: Assigning default value to records?\", \"username\": \"Ghost\", \"post_text\": \"[quote="bforeman":10gxl4qq]Have you tried:\\nstring path {xpath('')} := SourcePath;\\nRegards,\\nBob\\n\\nYes, i tried it, and now Output is:\\nname path\\n'Kaka' 'Kaka'\\n'Messi' 'Messi'\\n'Jessi' 'Jessi'\", \"post_time\": \"2012-03-08 08:58:40\" },\n\t{ \"post_id\": 1270, \"topic_id\": 298, \"forum_id\": 10, \"post_subject\": \"Re: Assigning default value to records?\", \"username\": \"bforeman\", \"post_text\": \"Have you tried:\\n\\nstring path {xpath('')} := SourcePath;\\n\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-03-07 20:24:57\" },\n\t{ \"post_id\": 1263, \"topic_id\": 298, \"forum_id\": 10, \"post_subject\": \"Assigning default value to records?\", \"username\": \"Ghost\", \"post_text\": \"As shown in code below, I am trying to assign the value of 'SourcePath' to the 'path' field of 'personRec'. \\n\\n\\nSourcePath:= 'thisIsMyPath';\\npersonRec:= record\\nstring name {xpath('')};\\nstring path:=SourcePath;\\nend;\\n\\nxmlLayout:=record\\ndataset(personRec) person{xpath('A/B/C/PERSON')};\\nend;\\n\\nDs:= dataset('~'+SourcePath, xmlLayout, xml('myXml'),opt);
\\n\\nI expected some output of the format:\\nname path\\n'Kaka' 'thisIsMyPath'\\n'Messi' 'thisIsMyPath'\\n'Jessi' 'thisIsMyPath'\\n\\nBut the Output i am getting is:\\nname path\\n'Kaka' ''\\n'Messi' ''\\n'Jessi' ''\\nHow can i get the expected output ?\", \"post_time\": \"2012-03-07 10:23:28\" },\n\t{ \"post_id\": 1298, \"topic_id\": 300, \"forum_id\": 10, \"post_subject\": \"Re: Nested Transforms - access intermediate output\", \"username\": \"sameermsc\", \"post_text\": \"Thanks, \\nI remember Loop was not supported in one of the earlier versions, so had to use iterate/process to achieve this\\n\\nRegards,\\nSameer\", \"post_time\": \"2012-03-09 13:11:42\" },\n\t{ \"post_id\": 1296, \"topic_id\": 300, \"forum_id\": 10, \"post_subject\": \"Re: Nested Transforms - access intermediate output\", \"username\": \"ghalliday\", \"post_text\": \"As well as process there is also AGGREGATE which can be used in some situations.\\n\\nIn this case it is probably most naturally expressed using LOOP....\\n\\n\\nrec2_layout := record\\n integer2 sal;\\nend;\\n \\nrec1_layout := record\\n integer1 num;\\nend;\\n\\nds1 := dataset([{100}, {200}, {300}, {400}, {500}], rec2_layout);\\n\\nds2 := dataset([{1}, {2}, {3}], rec1_layout);\\n\\nxform1(dataset(rec2_layout) ds, unsigned cnt) := function\\n rec2_layout xform2(rec2_layout l) := transform\\n self.sal := l.sal * ds2[cnt].num;\\n end;\\n RETURN PROJECT(ds, xform2(LEFT));\\nend;\\n\\nds_final := loop(ds1, COUNT(ds2), xform1(rows(left), counter));\\n\\noutput(ds_final);\\n
\\n\\nWhich is the most efficient? I'm not sure!\", \"post_time\": \"2012-03-09 12:11:05\" },\n\t{ \"post_id\": 1281, \"topic_id\": 300, \"forum_id\": 10, \"post_subject\": \"Re: Nested Transforms - access intermediate output\", \"username\": \"rtaylor\", \"post_text\": \"Sameer,\\n\\nOK, first you can express your TRANSFORM much more succintly, like this: \\nrec1_layout xform1(rec1_layout l, rec1_layout r, unsigned cnt) := transform\\n self.rec2 := IF(cnt = 1, \\n ds1,\\n project(l.rec2, \\n TRANSFORM(rec2_layout,SELF.sal := left.sal * r.num)));\\n self := r;\\nend;
Less code = less typing = less "work" \\n\\n
I am curious to know is there any option/mechanism available where the intermediate results (output dataset of inner transform) are not stored as a field of the record in the outer transform (i.e., self.rec2 field in xform1) but are still available for the next iteration in the inner transform, this way i can get the final output i want without storing the intermediate results in any field (self.rec2 in xform1) \\n\\nIs there any plan to introduce such function/provision in future, if it is not yet available?
No, and not that I am aware of. Your approach is perfectly appropriate for this case. \\n\\nBut if you need more of a "state machine" you should take a look at the PROCESS function (which is basically ITERATE "on steroids").\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-03-08 15:44:04\" },\n\t{ \"post_id\": 1274, \"topic_id\": 300, \"forum_id\": 10, \"post_subject\": \"Nested Transforms - access intermediate output\", \"username\": \"sameermsc\", \"post_text\": \"I have a scenario where i need to nest transforms\\nExcept for the first time, outer transform should operate on the result of inner transform to perform some task as shown in the below code -- it is mandatory for me to pass ds2 in iterate (for outer transform) and ds1 or intermediate result in project (for inner transform) and this order of calling or the data in the datasets cannot be changed\\n\\nrec2_layout := record\\n \\tinteger2 sal;\\n end;\\n \\n rec1_layout := record\\n\\tinteger1 num;\\n\\tdataset(rec2_layout) rec2;\\n end;\\n \\n ds1 := dataset([{100}, {200}, {300}, {400}, {500}], rec2_layout);\\n \\n ds2 := dataset([{1, []}, {2, []}, {3, []}], rec1_layout);\\n \\n rec1_layout xform1(rec1_layout l, rec1_layout r, unsigned cnt) := transform\\n\\tmulNum := r.num;\\n\\trec2_layout xform2(rec2_layout i) := transform\\n\\t\\tself.sal := i.sal * mulNum;\\n\\tend;\\n\\tself.rec2 := map(cnt = 1 => project(ds1, xform2(left)), \\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tproject(l.rec2, xform2(left)));\\n\\tself := r;\\n end;\\n \\n ds_final := iterate(ds2, xform1(left, right, counter));\\n \\n output(ds_final);\\n output(ds_final[count(ds_final)].rec2);
\\n\\nOutput of the above code is as shown in the attachment below\\n\\nFinally what i want as the output is "600, 1200, 1800, 2400, 3000" (which is the rec2 column of 3rd record)\\n\\nWith the above code i need to capture the intermediate results in the left record (of the pair) and finally get the last record of the dataset. This is fine as long as there are few hundreds of records in ds1, if the number is in millions?\\n\\nI am curious to know is there any option/mechanism available where the intermediate results (output dataset of inner transform) are not stored as a field of the record in the outer transform (i.e., self.rec2 field in xform1) but are still available for the next iteration in the inner transform, this way i can get the final output i want without storing the intermediate results in any field (self.rec2 in xform1) \\n\\nIs there any plan to introduce such function/provision in future, if it is not yet available?\\n\\nRegards,\\nSameer\", \"post_time\": \"2012-03-08 11:16:21\" },\n\t{ \"post_id\": 1944, \"topic_id\": 301, \"forum_id\": 10, \"post_subject\": \"Re: MP link closed\", \"username\": \"ideal\", \"post_text\": \"Richard,\\n\\nyou can DISTRIBUTE your records across the nodes, but child datasets must be contained within the parent record to which they belong
\\nThen, how is it possible that when you comment choice 2.1 and uncomment choice 2.2, it works. It is doing the same thing.\\n\\n\\nStatutRec := ENUM(ACTIVE,PASSIVE,DELETED);\\n\\n Rec := RECORD\\n INTEGER id;\\n StatutRec statut;\\n END;\\n\\n\\n Rec1 := RECORD\\n DATASET(Rec) recs;\\n DATASET(Rec) new;\\n END;\\n\\n DATASET(Rec1) h(DATASET(Rec) recs) := FUNCTION\\n vide := DATASET([],Rec);\\n clone := DATASET([{vide,vide}],Rec1);\\n startds := NORMALIZE(clone,1,TRANSFORM(Rec1,SELF:=IF(COUNTER=1,ROW({recs,recs},Rec1),LEFT)));\\n RETURN startds;\\n END;\\n\\n Rec1 g(DATASET(Rec) recs) := FUNCTION\\n new := DATASET([],Rec);\\n RETURN ROW({recs,new},Rec1);\\n\\n END;\\n\\n Rec1 f(Rec1 cur,INTEGER c) := TRANSFORM\\n\\n // Choice 1.1 : works without distribute\\n // listeRecs := cur.recs;\\n // Choice 1.2 : does not work\\n listeRecs := DISTRIBUTE(cur.recs,id);\\n SELF := g(listeRecs);\\n\\n END;\\n\\n\\n DATASET(Rec) execute(DATASET(Rec) recs) := FUNCTION\\n\\n // Choice 2.1 : does not work with distribute (choice1.1)\\n // DATASET(Rec1) startds := h(recs);\\n\\n // Choice 2.2 : works with distribute (choice1.1)\\n vide := DATASET([],Rec);\\n clone := DATASET([{vide,vide}],Rec1);\\n startds := NORMALIZE(clone,1,TRANSFORM(Rec1,SELF:=ROW({recs,recs},Rec1)));\\n\\n DATASET(Rec1) result := PROJECT(startds,f(LEFT,COUNTER));\\n\\n DATASET(Rec) recsFinales := result[COUNT(result)].recs;\\n RETURN recsFinales;\\n END;\\n\\n\\n recs := DATASET([{1,1},{2,2},{3,3}],Rec);\\n\\n lc := execute(recs);\\n\\n OUTPUT(lc);\\n\\n
\\n\\nmaybe you're trying to think in a Hadoop fashion
\\nNo I am not, but I thought that hpcc was able to deal with data distribution automatically as it is the case in hadoop. Maybe ETL is not exactly what I need or maybe I did not understand, but I see no formal reason why nested data should not be distributed. \\n\\n\\nJM.\", \"post_time\": \"2012-07-12 07:41:03\" },\n\t{ \"post_id\": 1912, \"topic_id\": 301, \"forum_id\": 10, \"post_subject\": \"Re: MP link closed\", \"username\": \"rtaylor\", \"post_text\": \"JM,\\n\\nYour use of DISTRIBUTE in the context of choice 1.2 won't work since what you're trying to DISTRIBUTE is a nested child dataset -- which must always be contained within a single parent record. The distributed datasets in HPCC are old-fashioned ISAM files distributed on a parallel platform, and each top-level record in the dataset must be whole and complete on a single node. IOW, you can DISTRIBUTE your records across the nodes, but child datasets must be contained within the parent record to which they belong.\\n\\nHPCC's data handling is very different from the Hadoop key-value pair paradigm. From the example code you've posted it appears that maybe you're trying to think in a Hadoop fashion and duplicate the same Hadoop-style process on HPCC. Is that what's going on? \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-07-06 23:00:22\" },\n\t{ \"post_id\": 1913, \"topic_id\": 301, \"forum_id\": 10, \"post_subject\": \"Re: MP link closed\", \"username\": \"rtaylor\", \"post_text\": \"JM,\\n\\nYour use of DISTRIBUTE in the context of choice 1.2 won't work since what you're trying to DISTRIBUTE is a nested child dataset -- which must always be contained within a single parent record. The distributed datasets in HPCC are old-fashioned ISAM files distributed on a parallel platform, and each top-level record in the dataset must be whole and complete on a single node. IOW, you can DISTRIBUTE your records across the nodes, but child datasets must be contained within the parent record to which they belong.\\n\\nHPCC's data handling is very different from the Hadoop key-value pair paradigm. From the example code you've posted it appears that maybe you're trying to think in a Hadoop fashion and duplicate the same Hadoop-style process on HPCC. Is that what's going on? \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-07-06 23:00:22\" },\n\t{ \"post_id\": 1910, \"topic_id\": 301, \"forum_id\": 10, \"post_subject\": \"Re: MP link closed\", \"username\": \"ideal\", \"post_text\": \"Maybe useful ..\\n\\nWhen I stop/start mythor component from the master, slaves complain they don't know it :\\n\\n\\nubuntu@ip-10-4-46-195:~$ sudo -u hpcc /opt/HPCCSystems/sbin/hpcc-run.sh -a hpcc-init stop -c mythor\\n10.190.122.252: Host is alive.\\n10.190.122.252: Running sudo /etc/init.d/hpcc-init -c mythor stop\\nUnknown component: mythor\\n10.2.119.167: Host is alive.\\n10.2.119.167: Running sudo /etc/init.d/hpcc-init -c mythor stop\\nUnknown component: mythor\\n10.96.62.78: Host is alive.\\n10.96.62.78: Running sudo /etc/init.d/hpcc-init -c mythor stop\\nUnknown component: mythor\\n10.4.46.195: Host is alive.\\n10.4.46.195: Running sudo /etc/init.d/hpcc-init -c mythor stop\\nStopping mythor... [ OK ]
\", \"post_time\": \"2012-07-06 18:34:07\" },\n\t{ \"post_id\": 1908, \"topic_id\": 301, \"forum_id\": 10, \"post_subject\": \"Re: MP link closed\", \"username\": \"ideal\", \"post_text\": \"Hello,\\n\\nAny news about this ? I don't see it in issue tracker but maybe I missed it.\\n\\nThe same annoying problem is happening to me, preventing to distribute dataset and then using parallelism with all slaves, which seems weird for a massively parallel platform.\\n\\nHere is my code with clear choices :\\n\\nIf your leave choice 1.1 and choice 2.1 uncommented as it is, it fails and one slave in my configuration is no more reachable in EclWatch console. Sometimes, process is killed, sometimes not, but never reachable until I restart mythor component.\\n\\nIf you choose not to use DISTRIBUTE, it succeeds.\\n\\n\\nStatutRec := ENUM(ACTIVE,PASSIVE,DELETED);\\n\\nRec := RECORD\\n INTEGER id;\\n StatutRec statut;\\nEND;\\n\\n\\nRec1 := RECORD\\n\\tDATASET(Rec) recs;\\n\\tDATASET(Rec) new;\\nEND;\\n\\nDATASET(Rec1) h(DATASET(Rec) recs) := FUNCTION\\n\\tvide := DATASET([],Rec);\\n\\tclone := DATASET([{vide,vide}],Rec1);\\n\\tstartds := NORMALIZE(clone,1,TRANSFORM(Rec1,SELF:=IF(COUNTER=1,ROW({recs,recs},Rec1),LEFT)));\\n\\tRETURN startds;\\nEND;\\n\\nRec1 g(DATASET(Rec) recs) := FUNCTION\\n\\tnew := DATASET([],Rec);\\n\\tRETURN ROW({recs,new},Rec1);\\n\\nEND;\\n\\nRec1 f(Rec1 cur,INTEGER c) := TRANSFORM\\n\\n// Choice 1.1 : works without distribute\\n//\\tlisteRecs := cur.recs;\\n// Choice 1.2 : does not work \\n\\tlisteRecs := DISTRIBUTE(cur.recs,id);\\n\\tSELF := g(listeRecs);\\n\\nEND;\\n\\n\\n DATASET(Rec) execute(DATASET(Rec) recs) := FUNCTION\\n\\n// Choice 2.1 : does not work with distribute (choice1.1)\\n\\tDATASET(Rec1) startds := h(recs);\\n\\n// Choice 2.2 : works with distribute (choice1.1)\\n//\\tvide := DATASET([],Rec);\\n//\\tclone := DATASET([{vide,vide}],Rec1);\\n//\\tstartds := NORMALIZE(clone,1,TRANSFORM(Rec1,SELF:=ROW({recs,recs},Rec1)));\\n\\n\\tDATASET(Rec1) result := PROJECT(startds,f(LEFT,COUNTER));\\n\\n\\tDATASET(Rec) recsFinales := result[COUNT(result)].recs;\\n\\tRETURN recsFinales;\\nEND;\\n\\n\\nrecs := DATASET([{1,1},{2,2},{3,3}],Rec);\\n\\nlc := execute(recs);\\n\\nOUTPUT(lc);\\n\\n
\\n\\nThormaster log is below (impossible to upload a file) :\\n\\n00000057 2012-07-06 10:55:59 17769 17769 Started wuid=W20120706-105558, user=hpccdemo, graph=graph1\\n\\n00000058 2012-07-06 10:55:59 17769 17769 Query /tmp/var/lib/HPCCSystems/queries/mythor/V3063736089_libW20120706-105558.so loaded\\n00000059 2012-07-06 10:55:59 17769 17769 CRC allocator OFF\\n0000005A 2012-07-06 10:55:59 17769 17769 Packed allocator OFF\\n0000005B 2012-07-06 10:55:59 17769 17769 Global memory size = 5587 MB, large mem size = 4190 MB\\n0000005C 2012-07-06 10:55:59 17769 17769 allocateMPTag: tag = 65538\\n0000005D 2012-07-06 10:55:59 17769 17769 allocateMPTag: tag = 65537\\n0000005E 2012-07-06 10:55:59 17769 17769 allocateMPTag: tag = 65542\\n0000005F 2012-07-06 10:55:59 17769 17769 allocateMPTag: tag = 65543\\n00000060 2012-07-06 10:55:59 17769 17769 allocateMPTag: tag = 65544\\n00000061 2012-07-06 10:55:59 17769 17769 allocateMPTag: tag = 65541\\n00000062 2012-07-06 10:55:59 17769 17769 allocateMPTag: tag = 65540\\n00000063 2012-07-06 10:55:59 17769 17769 allocateMPTag: tag = 65539\\n00000064 2012-07-06 10:55:59 17769 17769 allocateMPTag: tag = 65545\\n00000065 2012-07-06 10:55:59 17769 17769 allocateMPTag: tag = 65546\\n00000066 2012-07-06 10:55:59 17769 17769 allocateMPTag: tag = 65547\\n00000067 2012-07-06 10:55:59 17769 17769 Graph graph1 created\\n00000068 2012-07-06 10:55:59 17769 17769 Running graph=graph1\\n00000069 2012-07-06 10:55:59 17769 17769 temp directory cleared\\n0000006A 2012-07-06 10:55:59 17769 17769 Running graph [global] : <graph>\\n <node id="2" label="Inline Row">\\n <att name="definition" value="C:\\\\Users\\\\Jean-Michel\\\\jeanmichel\\\\LN\\\\Interpreteur\\\\HPCC\\\\benchmark_tests\\\\bug_distribute3.ecl(20,2)"/>\\n <att name="name" value="clone"/>\\n <att name="_kind" value="169"/>\\n <att name="ecl" value="ROW(TRANSFORM(rec1,SELF.recs := vide;SELF.new := vide;)); "/>\\n <att name="recordSize" value="24"/>\\n <att name="recordCount" value="1..1[tiny]"/>\\n </node>\\n <node id="3" label="Normalize">\\n <att name="definition" value="C:\\\\Users\\\\Jean-Michel\\\\jeanmichel\\\\LN\\\\Interpreteur\\\\HPCC\\\\benchmark_tests\\\\bug_distribute3.ecl(21,2)"/>\\n <att name="name" value="startds"/>\\n <att name="_kind" value="27"/>\\n <att name="ecl" value="NORMALIZE(1, f(LEFT, COUNTER)); "/>\\n <att name="recordSize" value="24"/>\\n <att name="recordCount" value="1..1[tiny]"/>\\n </node>\\n <node id="4">\\n <att name="_kind" value="1">\\n <graph>\\n <node id="5">\\n <att name="_kind" value="1">\\n <graph child="1">\\n <node id="6" label="Inline Row">\\n <att name="_kind" value="169"/>\\n <att name="_parentActivity" value="3"/>\\n <att name="coLocal" value="1"/>\\n <att name="ecl" value="IF(COUNTER = 1, ROW(TRANSFORM(rec1,SELF.recs := recs;SELF.new := recs;)), LEFT); "/>\\n <att name="recordSize" value="24"/>\\n <att name="recordCount" value="1..1[tiny]"/>\\n </node>\\n <node id="7" label="Normalize">\\n <att name="_kind" value="150"/>\\n <att name="_parentActivity" value="3"/>\\n <att name="coLocal" value="1"/>\\n <att name="ecl" value="<...>.recs; "/>\\n <att name="recordSize" value="12"/>\\n <att name="recordCount" value="0..?[few]"/>\\n </node>\\n <node id="8" label="Hash Distribute">\\n <att name="definition" value="C:\\\\Users\\\\Jean-Michel\\\\jeanmichel\\\\LN\\\\Interpreteur\\\\HPCC\\\\benchmark_tests\\\\bug_distribute3.ecl(40,2)"/>\\n <att name="name" value="listerecs"/>\\n <att name="_kind" value="25"/>\\n <att name="_parentActivity" value="3"/>\\n <att name="coLocal" value="1"/>\\n <att name="ecl" value="DISTRIBUTE(id); "/>\\n <att name="recordSize" value="12"/>\\n <att name="recordCount" value="0..?[few]"/>\\n </node>\\n <node id="9" label="Local Result">\\n <att name="_kind" value="102"/>\\n <att name="_parentActivity" value="3"/>\\n <att name="ecl" value="SetGraphResult(0); "/>\\n <att name="recordSize" value="12"/>\\n </node>\\n <att name="rootGraph" value="1"/>\\n <edge id="6_0" source="6" target="7"/>\\n <edge id="7_0" source="7" target="8"/>\\n <edge id="8_0" source="8" target="9"/>\\n </graph>\\n </att>\\n <att name="_parentActivity" value="3"/>\\n </node>\\n <att name="_numResults" value="1"/>\\n </graph>\\n </att>\\n <att name="_parentActivity" value="3"/>\\n </node>\\n <node id="10" label="Normalize">\\n <att name="definition" value="C:\\\\Users\\\\Jean-Michel\\\\jeanmichel\\\\LN\\\\Interpreteur\\\\HPCC\\\\benchmark_tests\\\\bug_distribute3.ecl(58,2)"/>\\n <att name="name" value="recsfinales"/>\\n <att name="_kind" value="150"/>\\n <att name="ecl" value="<...>.recs; "/>\\n <att name="recordSize" value="12"/>\\n <att name="recordCount" value="0..?[few]"/>\\n </node>\\n <node id="11" label="Firstn">\\n <att name="_kind" value="12"/>\\n <att name="ecl" value="CHOOSEN(100); "/>\\n <att name="recordSize" value="12"/>\\n <att name="recordCount" value="0..100[group]"/>\\n </node>\\n <node id="12" label="Output Result #1">\\n <att name="definition" value="C:\\\\Users\\\\Jean-Michel\\\\jeanmichel\\\\LN\\\\Interpreteur\\\\HPCC\\\\benchmark_tests\\\\bug_distribute3.ecl(1,1)"/>\\n <att name="name" value="bug_distribute3"/>\\n <att name="definition" value="C:\\\\Users\\\\Jean-Michel\\\\jeanmichel\\\\LN\\\\Interpreteur\\\\HPCC\\\\benchmark_tests\\\\bug_distribute3.ecl(67,1)"/>\\n <att name="_kind" value="21"/>\\n <att name="ecl" value="OUTPUT(..., workunit); "/>\\n <att name="recordSize" value="12"/>\\n </node>\\n <att name="rootGraph" value="1"/>\\n <edge id="2_0" source="2" target="3"/>\\n <edge id="3_0" source="3" target="10"/>\\n <edge id="10_0" source="10" target="11"/>\\n <edge id="11_0" source="11" target="12"/>\\n </graph>\\n - graph(graph1, 1)\\n0000006B 2012-07-06 10:55:59 17769 17769 CONNECTING (id=2, idx=0) to (id=3, idx=0) - activity(normalize, 3)\\n0000006C 2012-07-06 10:55:59 17769 17769 CONNECTING (id=3, idx=0) to (id=10, idx=0) - activity(normalizelinkedchild, 10)\\n0000006D 2012-07-06 10:55:59 17769 17769 CONNECTING (id=10, idx=0) to (id=11, idx=0) - activity(firstn, 11)\\n0000006E 2012-07-06 10:55:59 17769 17769 allocateMPTag: tag = 65548\\n0000006F 2012-07-06 10:55:59 17769 17769 CONNECTING (id=11, idx=0) to (id=12, idx=0) - activity(workunitwrite, 12)\\n00000070 2012-07-06 10:55:59 17769 17769 allocateMPTag: tag = 65549\\n00000071 2012-07-06 10:55:59 17769 17769 Query dll: /tmp/var/lib/HPCCSystems/queries/mythor/V3063736089_libW20120706-105558.so\\n00000072 2012-07-06 10:55:59 17769 17769 ,Progress,Thor,StartSubgraph,mythor,W20120706-105558,1,1,mythor,thor.thor\\n00000073 2012-07-06 10:55:59 17769 17769 allocateMPTag: tag = 65550\\n00000074 2012-07-06 10:55:59 17769 17769 sendGraph took 3 ms - graph(graph1, 1)\\n00000075 2012-07-06 10:55:59 17769 17769 Processing graph - graph(graph1, 1)\\n00000076 2012-07-06 10:55:59 17769 18394 Graph(5) - initializing master graph with parentExtract 16 bytes\\n00000077 2012-07-06 10:55:59 17769 18394 4: MP link closed (10.190.122.252:20100)\\n00000078 2012-07-06 10:55:59 17769 18397 activity(workunitwrite, 12) : Graph[1], workunitwrite[12]: MP link closed (10.190.122.252:20100), Master exception\\n0000007A 2012-07-06 10:55:59 17769 18397 4: Graph[1], workunitwrite[12]: MP link closed (10.190.122.252:20100), Master exception\\n0000007C 2012-07-06 10:55:59 17769 18397 INFORM [EXCEPTION]\\n0000007B 2012-07-06 10:55:59 17769 18398 activity(firstn, 11) : Graph[1], firstn[11]: MP link closed (10.190.122.252:20100), Master exception\\n00000079 2012-07-06 10:55:59 17769 18394 Posting exception: MP link closed (10.190.122.252:20100) to agent 10.4.46.195 for workunit(W20120706-105558)\\n0000007D 2012-07-06 10:55:59 17769 18394 INFORM [EXCEPTION]\\n0000007E 2012-07-06 10:56:00 17769 18394 Abort condition set - activity(workunitwrite, 12)\\n0000007F 2012-07-06 10:56:00 17769 18394 Abort condition set - activity(firstn, 11)\\n00000080 2012-07-06 10:56:00 17769 18394 Abort condition set - activity(normalizelinkedchild, 10)\\n00000081 2012-07-06 10:56:00 17769 18394 Abort condition set - activity(normalize, 3)\\n00000082 2012-07-06 10:56:02 17769 18394 Abort condition set - activity(inlinetable, 2)\\n00000083 2012-07-06 10:56:02 17769 18394 Aborting master graph - graph(graph1, 1) : MP link closed (10.190.122.252:20100)\\n00000084 2012-07-06 10:56:03 17769 18394 Aborting slave graph - graph(graph1, 1) : MP link closed (10.190.122.252:20100)\\n00000085 2012-07-06 10:56:03 17769 18394 4: Reporting exception to WU : 4, MP link closed (10.190.122.252:20100) : Error aborting job, will cause thor restart\\n00000086 2012-07-06 10:56:03 17769 18394 Stopping jobManager\\n00000087 2012-07-06 10:56:03 17769 18397 4: Graph[1], workunitwrite[12]: MP link closed (10.190.122.252:20100), Master exception\\n00000088 2012-07-06 10:56:03 17769 18397 INFORM [EXCEPTION]\\n00000089 2012-07-06 10:56:03 17769 18398 4: Graph[1], firstn[11]: MP link closed (10.190.122.252:20100), Master exception\\n0000008A 2012-07-06 10:56:03 17769 18398 INFORM [EXCEPTION]\\n0000008B 2012-07-06 10:56:03 17769 18398 4: Graph[1], firstn[11]: MP link closed (10.190.122.252:20100), Master exception\\n0000008C 2012-07-06 10:56:03 17769 18398 INFORM [EXCEPTION]\\n0000008D 2012-07-06 10:56:34 17769 17781 SYS: PU= 1% MU= 2% MAL=1578394304 MMP=1578110976 SBK=283328 TOT=1541892K RAM=223780K SWP=0K\\n0000008E 2012-07-06 10:56:34 17769 17781 NIC: rxp/s=444.9 rxk/s=0.0 txp/s=5659.5 txk/s=0.0 CPU: usr=0 sys=0 iow=0 idle=98\\n0000008F 2012-07-06 10:56:59 17769 18396 4: /var/jenkins/workspace/CE-Candidate-3.8.0/CE/Ubuntu-12.04-amd64/HPCC-Platform/thorlcr/graph/thgraphmaster.cpp(75) : FAILED TO RECOVER FROM EXCEPTION, STOPPING THOR : Graph[1], workunitwrite[12]: MP link closed (10.190.122.252:20100), Master exception\\n00000090 2012-07-06 10:56:59 17769 18396 ,Timing,ThorGraph,mythor,W20120706-105558,1,1,1,60151,FAILED,mythor,thor.thor\\n00000091 2012-07-06 10:56:59 17769 18396 ,Progress,Thor,Terminate,mythor,mythor,thor.thor,exception\\n00000001 2012-07-06 10:57:01 18446 18446 Opened log file //10.4.46.195/var/log/HPCCSystems/mythor/thormaster.2012_07_06.log\\n00000002 2012-07-06 10:57:01 18446 18446 Build community_3.8.0-4rc\\n00000003 2012-07-06 10:57:01 18446 18446 calling initClientProcess Port 20000\\n00000004 2012-07-06 10:57:01 18446 18446 Checking cluster replicate nodes\\n00000005 2012-07-06 10:57:01 18446 18446 Cluster replicate nodes check completed in 2ms\\n00000006 2012-07-06 10:57:01 18446 18446 RoxieMemMgr: Setting memory limit to 5858394112 bytes (5587 pages)\\n00000007 2012-07-06 10:57:01 18446 18446 RoxieMemMgr: 5600 Pages successfully allocated for the pool - memsize=5872025600 base=0x7f31d9f00000 alignment=1048576 bitmapSize=175\\n00000008 2012-07-06 10:57:01 18446 18446 Starting watchdog\\n00000009 2012-07-06 10:57:01 18446 18446 ThorMaster version 4.1, Started on 10.4.46.195:20000\\n0000000B 2012-07-06 10:57:01 18446 18446 Thor name = mythor, queue = thor.thor, nodeGroup = mythor\\n0000000A 2012-07-06 10:57:01 18446 18459 Started watchdog\\n0000000C 2012-07-06 10:57:01 18446 18446 Creating sentinel file thor.sentinel for rerun from script\\n0000000D 2012-07-06 10:57:01 18446 18446 Waiting for 2 slaves to register\\n0000000E 2012-07-06 10:57:01 18446 18446 Verifying connection to slave 1\\n0000000F 2012-07-06 10:57:01 18446 18446 verified connection with 10.190.122.252:20100\\n00000010 2012-07-06 10:57:01 18446 18446 Verifying connection to slave 2\\n00000011 2012-07-06 10:57:01 18446 18446 verified connection with 10.96.62.78:20100\\n00000012 2012-07-06 10:57:01 18446 18446 Slaves connected, initializing..\\n00000013 2012-07-06 10:57:01 18446 18446 Initialization sent to slave group\\n00000014 2012-07-06 10:57:01 18446 18446 Registration confirmation from 10.190.122.252:20100\\n00000015 2012-07-06 10:57:01 18446 18446 Slave 1 (10.190.122.252:20100) registered\\n00000016 2012-07-06 10:58:01 18446 18458 SYS: PU= 0% MU= 2% MAL=1578302992 MMP=1578110976 SBK=192016 TOT=1541388K RAM=222796K SWP=0K\\n00000017 2012-07-06 10:59:01 18446 18458 SYS: PU= 0% MU= 2% MAL=1578302992 MMP=1578110976 SBK=192016 TOT=1541388K RAM=222964K SWP=0K\\n00000018 2012-07-06 10:59:01 18446 18458 NIC: rxp/s=155.1 rxk/s=0.0 txp/s=157.6 txk/s=0.0 CPU: usr=0 sys=0 iow=0 idle=99\\n00000019 2012-07-06 11:00:01 18446 18458 SYS: PU= 0% MU= 2% MAL=1578302992 MMP=1578110976 SBK=192016 TOT=1541388K RAM=223268K SWP=0K\\n0000001A 2012-07-06 11:00:01 18446 18458 NIC: rxp/s=155.1 rxk/s=0.0 txp/s=157.7 txk/s=0.0 CPU: usr=0 sys=0 iow=0 idle=99\\n0000001B 2012-07-06 11:01:01 18446 18458 SYS: PU= 0% MU= 2% MAL=1578302992 MMP=1578110976 SBK=192016 TOT=1541388K RAM=223316K SWP=0K\\n0000001C 2012-07-06 11:01:01 18446 18458 NIC: rxp/s=155.1 rxk/s=0.0 txp/s=157.7 txk/s=0.0 CPU: usr=0 sys=0 iow=0 idle=99\\n0000001D 2012-07-06 11:02:01 18446 18458 SYS: PU= 0% MU= 2% MAL=1578302992 MMP=1578110976 SBK=192016 TOT=1541388K RAM=223384K SWP=0K\\n0000001E 2012-07-06 11:02:01 18446 18458 NIC: rxp/s=154.4 rxk/s=0.0 txp/s=157.7 txk/s=0.0 CPU: usr=0 sys=0 iow=0 idle=99\\n0000001F 2012-07-06 11:03:01 18446 18458 SYS: PU= 0% MU= 2% MAL=1578302992 MMP=1578110976 SBK=192016 TOT=1541388K RAM=223432K SWP=0K\\n00000020 2012-07-06 11:03:01 18446 18458 NIC: rxp/s=154.4 rxk/s=0.0 txp/s=157.7 txk/s=0.0 CPU: usr=0 sys=0 iow=0 idle=99\\n00000021 2012-07-06 11:04:01 18446 18458 SYS: PU= 0% MU= 2% MAL=1578302992 MMP=1578110976 SBK=192016 TOT=1541388K RAM=223488K SWP=0K\\n00000022 2012-07-06 11:04:01 18446 18458 NIC: rxp/s=154.4 rxk/s=0.0 txp/s=157.6 txk/s=0.0 CPU: usr=0 sys=0 iow=0 idle=99\\n00000023 2012-07-06 11:05:01 18446 18458 SYS: PU= 0% MU= 2% MAL=1578302992 MMP=1578110976 SBK=192016 TOT=1541388K RAM=223668K SWP=0K\\n00000024 2012-07-06 11:05:01 18446 18458 NIC: rxp/s=155.1 rxk/s=0.0 txp/s=157.7 txk/s=0.0 CPU: usr=0 sys=0 iow=0 idle=99\\n00000025 2012-07-06 11:06:01 18446 18458 SYS: PU= 0% MU= 2% MAL=1578302992 MMP=1578110976 SBK=192016 TOT=1541388K RAM=223716K SWP=0K\\n00000026 2012-07-06 11:06:01 18446 18458 NIC: rxp/s=154.5 rxk/s=0.0 txp/s=157.7 txk/s=0.0 CPU: usr=0 sys=0 iow=0 idle=99\\n00000027 2012-07-06 11:07:01 18446 18458 SYS: PU= 0% MU= 2% MAL=1578302992 MMP=1578110976 SBK=192016 TOT=1541388K RAM=223760K SWP=0K\\n00000028 2012-07-06 11:07:01 18446 18458 NIC: rxp/s=155.1 rxk/s=0.0 txp/s=157.6 txk/s=0.0 CPU: usr=0 sys=0 iow=0 idle=99\\n00000029 2012-07-06 11:08:01 18446 18458 SYS: PU= 0% MU= 2% MAL=1578302992 MMP=1578110976 SBK=192016 TOT=1541388K RAM=223928K SWP=0K\\n0000002A 2012-07-06 11:08:01 18446 18458 NIC: rxp/s=494.4 rxk/s=0.0 txp/s=4667.0 txk/s=0.0 CPU: usr=0 sys=0 iow=0 idle=99\\n0000002B 2012-07-06 11:09:01 18446 18458 SYS: PU= 0% MU= 2% MAL=1578302992 MMP=1578110976 SBK=192016 TOT=1541388K RAM=223980K SWP=0K\\n0000002C 2012-07-06 11:09:01 18446 18458 NIC: rxp/s=155.8 rxk/s=0.0 txp/s=157.7 txk/s=0.0 CPU: usr=0 sys=0 iow=0 idle=99\\n\\n
\\n\\nCould you help me on this problem ?\\nThanks\\nJM.\", \"post_time\": \"2012-07-06 11:16:50\" },\n\t{ \"post_id\": 1319, \"topic_id\": 301, \"forum_id\": 10, \"post_subject\": \"Re: MP link closed\", \"username\": \"rtaylor\", \"post_text\": \"I duplicated this problem and it appears to be the same bug as described above. Your workaround is to use hthor until a new release is issued. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-03-12 13:24:10\" },\n\t{ \"post_id\": 1315, \"topic_id\": 301, \"forum_id\": 10, \"post_subject\": \"Re: MP link closed\", \"username\": \"pprajapati22\", \"post_text\": \"I have same Error issue but in different scenario,\\nI trying to read multiple file using Transform by passing file names form Inline dataset:\\n1) When I am trying to read XML files(as shown in below code),In hTHOR I am getting correct output.But when I am running same code sample in THOR ,getting the \\nError: System error: 4: MP link closed (192.168.72.131:6600) (0, 0), 4, \\n\\n2) while reading CSV files ,it is working fine in both THOR and hTHOR. but in case of THOR I am getting output of First file for all the paths I am passing, and in hTHOR i am getting correct output. \\n\\nSample Input File "'~RXMLTest::NY::XMLtfile1"\\n\\n<Persons> \\n\\t\\t<PERSON >A</PERSON>\\n\\t\\t<PERSON >B</PERSON> \\n\\t\\t<PERSON >C</PERSON> \\n\\t\\t<PERSON >D</PERSON> \\n\\t\\t<PERSON >E</PERSON> \\n\\t\\t<PERSON >F</PERSON>\\n\\t\\t<PERSON >G</PERSON>\\n\\t\\t<PERSON >H</PERSON> \\n</Persons>\\n\\nthe other two files ('~RXMLTest::NY::XMLtfile2', '~RXMLTest::NY::XMLtfile3') are of the same structure with different names values\\n\\n\\n\\t\\t PersonRec:={ \\n\\t\\t String40 Person{XPATH('')}:='';\\n\\t\\t };\\n \\n\\t Persons:={ \\n\\t DATASET (PersonRec) Personds{XPATH('PERSON')} := DATASET([],\\tPersonRec);\\n String FileName:='';\\n };\\n\\t\\t\\t\\n\\n\\n Ds_files:= DATASET([{'~RXMLTest::NY::XMLtfile1'},\\n \\t\\t\\t\\t\\t\\t\\t\\t\\t\\t {'~RXMLTest::NY::XMLtfile2'},\\n \\t\\t\\t\\t\\t\\t\\t\\t\\t\\t {'~RXMLTest::NY::XMLtfile3'}\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t],{ STRING LfileName:=''});\\n \\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n \\n \\t\\n \\n \\tPersons readfrm(Ds_files L):= TRANSFORM \\n \\t\\t\\t\\t self.Personds:=DATASET(L.LfileName,\\n \\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tPersons,\\n \\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tXML('Persons/')).Personds;\\n \\n \\t Self.Filename:=L.LfileName;\\n \\tEND ; \\n \\t\\t\\t\\t \\n \\tDs_5:=Project(Ds_files,readfrm(LEFT));\\n \\tOUTPUT(Ds_5);\\n
\", \"post_time\": \"2012-03-12 07:24:02\" },\n\t{ \"post_id\": 1302, \"topic_id\": 301, \"forum_id\": 10, \"post_subject\": \"Re: MP link closed\", \"username\": \"rtaylor\", \"post_text\": \"Ghost,\\n\\neven After removing the THOR option as u mentioned above doesn't solve the problem. I am getting the same error.
It worked for me yesterday, but today it doesn't. And the reason is that this is a bug that our developers found as a result of your post (he said "all will fail without the fix, if reading xml in child query"), so thank you for pointing this out.\\n\\nMy 'getDs' attribute is a dataset of format 'person'
No, it is not. Your getDS is defined like this:\\tds_layout1 := dataset('~'+l.name, layout1, xml('root'));\\n\\tds_layout2 := dataset('~'+l.name, layout2, xml('root')); \\n\\n\\tgetDs:= map(exists(ds_layout1[1].person)=true => ds_layout1[1].person,\\n\\t\\t\\t\\t\\t\\t\\texists(ds_layout2[1].B[1].person)=true => ds_layout2[1].B[1].person);\\n
The two possible getDS results come from two separate datasets with two separate RECORD structures, which both happen to contain nested child datasets that have the same structure.\\n\\nI can see no difference in the formats of 'getDs' and 'nameNfile'. \\nIs the '{xpath('')}' in 'person' record creating problem??
Probably so.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-03-09 14:30:36\" },\n\t{ \"post_id\": 1291, \"topic_id\": 301, \"forum_id\": 10, \"post_subject\": \"Re: MP link closed\", \"username\": \"Ghost\", \"post_text\": \"(Referring to the source code again)\\n My 'getDs' attribute is a dataset of format 'person'\\n\\nperson:= record\\nstring name {xpath('')};\\nend;
\\n\\nAnd as shown in approach 1(in code), i am assigning this 'getDs' dataset to 'nameNfile' dataset which is of the format,\\n\\nnameNfile:= record\\nstring name;\\nend;
\\nI can see no difference in the formats of 'getDs' and 'nameNfile'. \\nIs the '{xpath('')}' in 'person' record creating problem??\", \"post_time\": \"2012-03-09 06:07:26\" },\n\t{ \"post_id\": 1290, \"topic_id\": 301, \"forum_id\": 10, \"post_subject\": \"Re: MP link closed\", \"username\": \"Ghost\", \"post_text\": \"output(outDs,, '~sampleResult::outDs'); // Problem 3
\\nrtaylor,\\neven After removing the THOR option as u mentioned above doesn't solve the problem. I am getting the same error.\", \"post_time\": \"2012-03-09 06:07:07\" },\n\t{ \"post_id\": 1282, \"topic_id\": 301, \"forum_id\": 10, \"post_subject\": \"Re: MP link closed\", \"username\": \"rtaylor\", \"post_text\": \"Ghost,\\n1) When i am using 'Approach 1'(as shown in code) , I am getting the below syntax error:\\nError: Can not assign Table of person to Table of namenfile (field SELF.namenfile).\\nBut when i am doing the same task in an indirect way, as in 'Approach 2'(as shown in code),there is no syntax error.Why? \\nAnd how can i get my 'Approach 1' right?
Answering the second question first -- you can't. Because in approach 1, the RECORD structure of GetDS will be either layout1 or layout2, and neither of these matches the RECORD structure of SELF.nameNfile (which has the nameNfile RECORD structure). \\n\\nAlso, you are using "nameNfile" twice in your code -- once as a RECORD structure definition name and a second time as a field name within another RECORD structure. Obviously, the compiler allows this (two different scopes), but best practice would be to try to avoid confusion possibilities like this.\\n\\nYour Approach 2 works because the result is a DATASET with the same structure as the target. You can "tighten up" your Approach 2 code this way:\\tself.nameNfile:= DATASET(SET(getDs,name),nameNfile);
2) When i am trying to get the Count of records of outDs by mentioning the target as THOR , I am getting the following error:\\nError: System error: 4: MP link closed (192.168.139.130:6600)\\nBut,when i am doing the same task on HTHOR, its working. Why it's not working on THOR?
I don't know, but I have duplicated the issue and will report it.\\n\\n3) When i am trying to write the outRec to thor by mentioning the target as THOR, I am getting the following error:\\nError: System error: 4: Graph[1], firstn[31]: MP link closed (192.168.139.130:6600), Master exception\\nBut when i am doing the same task on HTHOR, its working. Why it's not working on THOR?
This one I can answer. \\n\\nThe reason is the THOR option on the OUTPUT action is valid only when you are not naming a file to write to. Take a look at the "OUTPUT Workunit Files" section of the OUTPUT docs (it's right at the end).\\n\\nRemove the THOR option to make your code like this and it will work:
output(outDs,, '~sampleResult::outDs'); // Problem 3
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-03-08 16:44:48\" },\n\t{ \"post_id\": 1277, \"topic_id\": 301, \"forum_id\": 10, \"post_subject\": \"MP link closed\", \"username\": \"Ghost\", \"post_text\": \"I am having the below concerns:\\n1) When i am using 'Approach 1'(as shown in code) , I am getting the below syntax error:\\nError: Can not assign Table of person to Table of namenfile (field SELF.namenfile).\\nBut when i am doing the same task in an indirect way, as in 'Approach 2'(as shown in code),there is no syntax error.Why? \\nAnd how can i get my 'Approach 1' right? \\n\\n2) When i am trying to get the Count of records of outDs by mentioning the target as THOR , I am getting the following error:\\nError: System error: 4: MP link closed (192.168.139.130:6600)\\nBut,when i am doing the same task on HTHOR, its working. Why it's not working on THOR?\\n\\n3) When i am trying to write the outRec to thor by mentioning the target as THOR, I am getting the following error:\\nError: System error: 4: Graph[1], firstn[31]: MP link closed (192.168.139.130:6600), Master exception\\nBut when i am doing the same task on HTHOR, its working. Why it's not working on THOR?\\n\\nBelow is the code:\\n\\n\\n\\nallfiles:= std.file.logicalfilelist();\\n\\nnameNfile:= record\\nstring name;\\nend;\\n\\n outRec := record\\n string file;\\n dataset(nameNfile) nameNfile:=dataset([],nameNfile);\\nend;\\n\\noutRec DoTransform(allfiles L):= transform, skip(not std.Str.startsWith(L.name, 'sample::'))\\n\\nperson:= record\\nstring name {xpath('')};\\nend;\\n\\n\\nlayout1:=record\\ndataset(person) person {xpath('A/PERSON')};\\nend;\\n\\n\\nB:= record\\ndataset(person) person {xpath('B/PERSON')}\\nend;\\n\\nlayout2:= record\\n dataset(B) B {xpath('A')};\\n end;\\n\\n\\nds_layout1:= dataset('~'+l.name, layout1, xml('root'));\\nds_layout2:= dataset('~'+l.name, layout2,xml('root')); \\n\\ngetDs:= map(exists(ds_layout1[1].person)=true=> ds_layout1[1].person,\\n exists(ds_layout2[1].B[1].person)=true=> ds_layout2[1].B[1].person);\\n \\n //Approach 1 (Below Line) : Problem 1\\n // self.nameNfile:= getDs;\\n \\n //Approach 2\\n setOf_getDs:= set (getDs, name);\\n\\nnewDs:= dataset(setOf_getDs,{string name});\\n\\nself.nameNfile:= newDs;\\n\\nself.file:=l.name;\\n\\nend;\\n\\noutDs:= project(allfiles, DoTransform(left));\\noutput(outDs);\\n//output(count(outDs)); // Problem 2\\n//output(outDs,, '~sampleResult::outDs',thor); // Problem 3\\n/* Input Files\\n file1.Xml(of layout1):\\n <root>\\n <A>\\n <PERSON>SMITH</PERSON>\\n <PERSON>JOHN</PERSON>\\n <PERSON>BRET</PERSON>\\n </A>\\n </root>\\n Logical path for this file is 'sample::file1'\\n \\n file2.Xml(of layout2):\\n <root>\\n <A>\\n <B><PERSON>SMITH</PERSON></B>\\n <B><PERSON>JOHN</PERSON></B>\\n <B><PERSON>BRET</PERSON></B>\\n </A>\\n </root>\\n Logical path for this file is 'sample::file2'\\n*/
\", \"post_time\": \"2012-03-08 13:26:56\" },\n\t{ \"post_id\": 1377, \"topic_id\": 315, \"forum_id\": 10, \"post_subject\": \"Re: Issue with code produced by SALT(hygiene)\", \"username\": \"Allan\", \"post_text\": \"Thanks for this.\\n\\nYes taking note of the error messages produced by SALT helps a lot. \\nOnce errors addressed the ECL produced was fine.\\n\\nWith new to a product you don't know what to look for, I'll add -efilename to my command line form now on.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-03-16 15:56:45\" },\n\t{ \"post_id\": 1375, \"topic_id\": 315, \"forum_id\": 10, \"post_subject\": \"Re: Issue with code produced by SALT(hygiene)\", \"username\": \"dabayliss\", \"post_text\": \"Ok,\\n\\nJust tried your code in the 2.3B1 that is upcoming - looking at the generated file (or the error file) I see:\\n+++Line:21:WORDS restrictions only make sense if you have SPACES specified:TCOUNTY\\n+++Line:22:WORDS restrictions only make sense if you have SPACES specified:TPOSTCODE\\n+++Line:23:WORDS restrictions only make sense if you have SPACES specified:TID\\n\\nLooking at where your 'bad' ECL is - that is the validation code for WORDS - so my guess is that either:\\na) 2.2 was silently getting itself confused by the missing space definitions\\nb) You were ignoring the poor beast that was trying to tell you it was unhappy\\n\\nTip: if you are 'command line' - always look at the generated file and search for +++ OR use the capability to redirect your error messages to an error file.\\n\\nIf you are using the IDE - it should scream at you if any errors are generated - if it is not that is a problem (which for now I will blame on the platform
)\", \"post_time\": \"2012-03-16 14:11:56\" },\n\t{ \"post_id\": 1374, \"topic_id\": 315, \"forum_id\": 10, \"post_subject\": \"Re: Issue with code produced by SALT(hygiene)\", \"username\": \"dabayliss\", \"post_text\": \"Could be - but the parse/fieldtype capability was also brand-new code in 2.2 (in fact - even came late in the beta cycle if I remember correctly) - so I would not be complete astonished to find the occasional gremlin lurking ...\\n\\nDavid\", \"post_time\": \"2012-03-16 13:57:08\" },\n\t{ \"post_id\": 1373, \"topic_id\": 315, \"forum_id\": 10, \"post_subject\": \"Re: Issue with code produced by SALT(hygiene)\", \"username\": \"Allan\", \"post_text\": \"Hi David,\\n\\nI'm using salt 2.2 Gold, so from what you say, the issue is just a plain one version miss-match.
Though the actual ECL created looks strange whatever version, I mean 's', 'ss', then an 'sss'?\\n\\nI changed all field types to have a single LIKE, but it made no difference.\\nHowever I do have a 'cascade' of of definitions that contain LIKEs, not sure if that's significant.\\n\\nAs a suggestion, it would be very handy to allow multiple LIKE's, not sure you would need a check for mutually exclusive LIKE's, just a long as the generated ECL syntax checked and ran.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-03-16 13:38:31\" },\n\t{ \"post_id\": 1372, \"topic_id\": 315, \"forum_id\": 10, \"post_subject\": \"Re: Issue with code produced by SALT(hygiene)\", \"username\": \"dabayliss\", \"post_text\": \"Which version of SALT are you using? \\n\\n2.2 Gold is compatible with .702; \\n2.1 was compatible with .682 & .702.\\n\\n2.3B1 (appearing shortly) is compatible with OSS 3.6, & .702\\n\\nYou can only have one like on a fieldtype (single inheritance)\\n\\nTony M is going to have a quick look at this for you today; if that fails I will chase it Monday morning.\\n\\nDavid\", \"post_time\": \"2012-03-16 12:41:22\" },\n\t{ \"post_id\": 1371, \"topic_id\": 315, \"forum_id\": 10, \"post_subject\": \"Re: Issue with code produced by SALT(hygiene)\", \"username\": \"Allan\", \"post_text\": \"Hi Richard,\\n\\nThe pre OSS version 5.8.2.8.682.\\n\\nI'm not completly clear on what versions SALT is compatible with?\\n\\nIn addition I was not sure if multiple 'LIKE's were allowed for one FIELDTYPE.\\nSo I changed:\\n
\\nFIELDTYPE:ALPHANUM:LIKE(NUMBER):LIKE(ALPHA):\\n
\\nto:\\n\\nFIELDTYPE:ALPHANUM:LIKE(NUMBER):ALLOW(ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz):\\n
\\n\\nBut its made no difference, exactly the same ECL code generated.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-03-16 09:58:46\" },\n\t{ \"post_id\": 1366, \"topic_id\": 315, \"forum_id\": 10, \"post_subject\": \"Re: Issue with code produced by SALT(hygiene)\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nAre you doing this on an Open Source build, or are you using the pre-OSS legacy system?\\n\\nRichard\", \"post_time\": \"2012-03-15 19:14:58\" },\n\t{ \"post_id\": 1363, \"topic_id\": 315, \"forum_id\": 10, \"post_subject\": \"Issue with code produced by SALT(hygiene)\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI'm learning SALT and have hit an issue attempting to generate a hygiene parser. Where the generated ECL appears to be wrong.\\n\\nThe test data:\\n\\nFirst name|Surname|Address1|County|Postal Code|ID\\nHyatt|Dudley|P.O. Box 829 625 Vitae Rd.|Mayagz|AT7 8CR|1\\nGannon|Russell|P.O. Box 872 4387 Lorem Street|Laurel|TR89 4HE|2\\nReed|Taylor|7704 Luctus Rd.|El Monte|AJ5 1BZ|3\\n
\\n\\nMy ECL layout:\\n\\nexport ex1_layout_file := MODULE\\n export Layout := RECORD\\n\\tSTRING50 FirstName;\\n\\tSTRING50 Surname;\\n\\tSTRING100 Address1;\\n\\tSTRING20 County;\\n\\tSTRING20 PostCode;\\n\\tSTRING20 Id;\\n END;\\n EXPORT File := DATASET('~afw::salt::ex1_spray',Layout,CSV(HEADING(1),SEPARATOR('|')));\\n // Post Code acceptance pattern for hygeine parse.\\n EXPORT PATTERN PCATTRIB := PATTERN('[A-Z]{1,2}[0-9R][0-9A-Z]? [0-9][ABD-HJLNP-UW-Z]{2}');\\nEND;\\n
\\nThe .spc file I've setup to check the input data:\\n\\nOPTIONS:-gh\\nMODULE:WROBEL\\nFILENAME:EX1_LAYOUT_FILE\\n//Uncomment up to NINES for internal or external adl\\n//IDFIELD:EXISTS:<NameOfIDField>\\n//RIDFIELD:<NameOfRidField>\\n//RECORDS:<NumberOfRecordsInDataFile>\\n//POPULATION:<ExpectedNumberOfEntitiesInDataFile>\\n//NINES:<Precision required 3 = 99.9%, 2 = 99% etc>\\n//Uncomment Process if doing external adl\\n//PROCESS:<ProcessName>\\n//PCATTRIB := PATTERN('[A-Z]{1,2}[0-9R][0-9A-Z]? [0-9][ABD-HJLNP-UW-Z]{2}')\\nFIELDTYPE:DEFAULT:LEFTTRIM:NOQUOTES("'):\\nFIELDTYPE:NUMBER:LIKE(DEFAULT):ALLOW(0123456789):\\nFIELDTYPE:ALPHA:LIKE(DEFAULT):ALLOW(ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz):\\nFIELDTYPE:ALPHANUM:LIKE(NUMBER):LIKE(ALPHA):\\nFIELDTYPE:NAME:LIKE(ALPHA):PARSE(*P):SPACES( -):ONFAIL(REJECT):\\nFIELDTYPE:FORENAME:LIKE(NAME):WORDS(1):ONFAIL(REJECT):\\nFIELDTYPE:LASTNAME:LIKE(NAME):WORDS(1..):ONFAIL(REJECT):\\nFIELDTYPE:ADDRESS:LIKE(ALPHANUM):SPACES( -#.):ONFAIL(REJECT):\\nFIELDTYPE:TCOUNTY:LIKE(ALPHANUM):PARSE(*P):WORDS(1..):ONFAIL(REJECT):\\nFIELDTYPE:TPOSTCODE:LIKE(ALPHANUM):PARSE(PCATTRIB):CAPS:WORDS(1,2):ONFAIL(REJECT):\\nFIELDTYPE:TID:LIKE(NUMBER):WORDS(1):ONFAIL(BLANK):\\nFIELD:FirstName:LIKE(FORENAME):0,0\\nFIELD:Surname:LIKE(LASTNAME):0,0\\nFIELD:Address1:LIKE(ADDRESS):0,0\\nFIELD:County:LIKE(TCOUNTY):0,0\\nFIELD:PostCode:LIKE(TPOSTCODE):0,0\\nFIELD:Id:LIKE(TID):0,0\\n
\\n\\nThe SALT command I use to generate the .mod file:\\n\\nsalt -gh ex1.spc >ex1.mod\\n
\\n\\nWith this setup the three ECL definitions for: TCOUNTY, TPOSTCODE and TID are wrong in Fields.ecl:\\n\\nEXPORT InValidFT_TCOUNTY(SALT22.StrType s) := WHICH(s[1]=' ' AND LENGTH(TRIM(s))>0,\\n stringlib.stringfind('"\\\\'',s[1],1)<>0 and stringlib.stringfind('"\\\\'',s[LENGTH(TRIM(s))],1)<>0,\\n LENGTH(TRIM(s))<>LENGTH(TRIM(stringlib.stringfilter(s,'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'))),\\n ~(SALT22.WordCount(stringlib.stringsubstituteout(s,' -',' '))s >= 1));\\n\\n//Wrong\\nEXPORT InValidFT_TPOSTCODE(SALT22.StrType s) := WHICH(s[1]=' ' AND LENGTH(TRIM(s))>0,\\n stringlib.stringfind('"\\\\'',s[1],1)<>0 and stringlib.stringfind('"\\\\'',s[LENGTH(TRIM(s))],1)<>0,\\n stringlib.stringtouppercase(s)<>s,\\n LENGTH(TRIM(s))<>LENGTH(TRIM(stringlib.stringfilter(s,'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'))),\\n ~(SALT22.WordCount(stringlib.stringsubstituteout(s,' -',' '))ss = 1 OR SALT22.WordCount(stringlib.stringsubstituteout(s,' -',' '))ss = 2));\\n\\n//Wrong\\nEXPORT InValidFT_TID(SALT22.StrType s) := WHICH(s[1]=' ' AND LENGTH(TRIM(s))>0,\\n stringlib.stringfind('"\\\\'',s[1],1)<>0 and stringlib.stringfind('"\\\\'',s[LENGTH(TRIM(s))],1)<>0,\\n LENGTH(TRIM(s))<>LENGTH(TRIM(stringlib.stringfilter(s,'0123456789'))),\\n ~(SALT22.WordCount(stringlib.stringsubstituteout(s,' -',' '))sss = 1));\\n
\\n\\nIf I remove the spurious 's' from the TCOUNTY definition, the 'ss's from the TPOSTCODE definition and the 'sss' from the TID definition all syntax checks and runs ok.\\n\\nAny idea's?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-03-15 14:57:25\" },\n\t{ \"post_id\": 1470, \"topic_id\": 327, \"forum_id\": 10, \"post_subject\": \"Re: Exact requirements for SOAP call to Roxie\", \"username\": \"sort\", \"post_text\": \"3.6.2 rc4 beta is available on the hpccsystems portal if you would like to try it out\", \"post_time\": \"2012-04-10 13:02:30\" },\n\t{ \"post_id\": 1459, \"topic_id\": 327, \"forum_id\": 10, \"post_subject\": \"Re: Exact requirements for SOAP call to Roxie\", \"username\": \"DSC\", \"post_text\": \"Another reason to anticipate 3.6.2!\\n\\nThanks for the follow-up.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-04-09 22:04:13\" },\n\t{ \"post_id\": 1458, \"topic_id\": 327, \"forum_id\": 10, \"post_subject\": \"Re: Exact requirements for SOAP call to Roxie\", \"username\": \"anthony.fishbeck@lexisnexis.com\", \"post_text\": \"Dan,\\n\\nIt turns out this issue is probably due to a known regression in 3.6 that is fixed in 3.6.2 which should be released very soon.\\n\\nRegards,\\nTony\", \"post_time\": \"2012-04-09 20:37:13\" },\n\t{ \"post_id\": 1448, \"topic_id\": 327, \"forum_id\": 10, \"post_subject\": \"Re: Exact requirements for SOAP call to Roxie\", \"username\": \"DSC\", \"post_text\": \"Duh. You said the esp log, and that was the one I was looking at earlier.\\n\\nHere is an excerpt from a form-base submission (extra stuff may be included, as I'm going by timestamps):\\n\\n00009F3E 2012-04-06 12:51:10 5779 16702 "HTTP First Line: POST /WsEcl/xslt/query/myroxie/audit_search HTTP/1.1"\\n00009F3F 2012-04-06 12:51:10 5779 16702 "POST /WsEcl/xslt/query/myroxie/audit_search, from unknown@66.179.38.245"\\n00009F40 2012-04-06 12:51:10 5779 16702 "parmtree: <__unnamed__>\\n <reportday>1</reportday>\\n <S1>Submit</S1>\\n <submit_type_>run_xslt</submit_type_>\\n <reportmonth>10</reportmonth>\\n <reportyear>2011</reportyear>\\n <messagetype>1</messagetype>\\n</__unnamed__>\\n"\\n00009F41 2012-04-06 12:51:10 5779 16702 "request schema: <xsd:schema elementFormDefault="qualified" targetNamespace="urn:hpccsystems:ecl:audit_search" xmlns:tns="urn:hpccsystems:ecl:audit_search" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:ds1="urn:hpccsystems:ecl:audit_search:result:searchresult">\\n<xsd:import namespace="urn:hpccsystems:ecl:audit_search:result:searchresult" schemaLocation="../result/SearchResult.xsd"/><xsd:complexType name="EspException"><xsd:all><xsd:element name="Code" type="xsd:string" minOccurs="0"/><xsd:element name="Audience" type="xsd:string" minOccurs="0"/><xsd:element name="Source" type="xsd:string" minOccurs="0"/><xsd:element name="Message" type="xsd:string" minOccurs="0"/></xsd:all></xsd:complexType>\\n<xsd:complexType name="ArrayOfEspException"><xsd:sequence><xsd:element name="Source" type="xsd:string" minOccurs="0"/><xsd:element name="Exception" type="tns:EspException" minOccurs="0" maxOccurs="unbounded"/></xsd:sequence></xsd:complexType>\\n<xsd:element name="Exceptions" type="tns:ArrayOfEspException"/>\\n<xsd:complexType name="EspStringArray"><xsd:sequence><xsd:element name="Item" type="xsd:string" minOccurs="0" maxOccurs="unbounded"/></xsd:sequence></xsd:complexType><xsd:complexType name="EspIntArray"><xsd:sequence><xsd:element name="Item" type="xsd:int" minOccurs="0" maxOccurs="unbounded"/></xsd:sequence></xsd:complexType><xsd:simpleType name="XmlDataSet"><xsd:restriction base="xsd:string"/></xsd:simpleType><xsd:simpleType name="CsvDataFile"><xsd:restriction base="xsd:string"/></xsd:simpleType><xsd:simpleType name="RawDataFile"><xsd:restriction base="xsd:base64Binary"/></xsd:simpleType><xsd:element name="audit_searchRequest"><xsd:complexType><xsd:all><xsd:element minOccurs="0" maxOccurs="1" name="messagetype" type="xsd:integer"/><xsd:element minOccurs="0" maxOccurs="1" name="reportyear" type="xsd:integer"/><xsd:element minOccurs="0" maxOccurs="1" name="reportmonth" type="xsd:integer"/><xsd:element minOccurs="0" maxOccurs="1" name="reportday" type="xsd:integer"/><xsd:element minOccurs="0" maxOccurs="1" name="reporthour" type="xsd:integer"/></xsd:all></xsd:complexType></xsd:element><xsd:element name="audit_searchResponse"><xsd:complexType><xsd:all><xsd:element name="Exceptions" type="tns:ArrayOfEspException" minOccurs="0"/><xsd:element name="Results" minOccurs="0"><xsd:complexType><xsd:all><xsd:element name="Result"><xsd:complexType><xsd:all><xsd:element ref="ds1:Dataset" minOccurs="0"/></xsd:all></xsd:complexType></xsd:element></xsd:all></xsd:complexType></xsd:element></xsd:all><xsd:attribute name="sequence" type="xsd:int"/></xsd:complexType></xsd:element><xsd:element name="string" nillable="true" type="xsd:string" />\\n</xsd:schema>"\\n00009F42 2012-04-06 12:51:10 5779 16702 "submitQuery soap: <?xml version="1.0" encoding="UTF-8"?><soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/" xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"> <soap:Body><audit_searchRequest xmlns="urn:hpccsystems:ecl:audit_search"><messagetype>1</messagetype><reportyear>2011</reportyear><reportmonth>10</reportmonth><reportday>1</reportday></audit_searchRequest></soap:Body></soap:Envelope>"\\n00009F43 2012-04-06 12:51:10 5779 16702 "Loading dll (libW20120405-124159.so) from location /var/lib/HPCCSystems/queries/myroxie//libW20120405-124159.so"\\n00009F44 2012-04-06 12:51:10 5779 16702 "TxSummary[activeReqs=3;user=@66.179.38.245;total=7304ms;]"\\n00009F45 2012-04-06 12:51:10 5779 16703 "HTTP First Line: GET /esp/files/default.css HTTP/1.1"\\n00009F46 2012-04-06 12:51:10 5779 16703 "GET /esp/files/default.css, from unknown@66.179.38.245"\\n00009F47 2012-04-06 12:51:10 5779 16703 "TxSummary[activeReqs=4;user=@66.179.38.245;total=6321ms;]"
\\n\\nHere is a SOAP call to the same service:\\n\\n0000969F 2012-04-06 06:59:44 5779 15706 "HTTP First Line: POST /WsEcl/soap/query/myroxie/audit_search HTTP/1.1"\\n000096A0 2012-04-06 06:59:45 5779 15706 "Loading dll (libW20120405-124159.so) from location /var/lib/HPCCSystems/queries/myroxie//libW20120405-124159.so"\\n000096A1 2012-04-06 06:59:45 5779 15706 "HandleSoapRequest response: "\\n000096A2 2012-04-06 06:59:45 5779 15706 "TxSummary[activeReqs=1;user=@66.179.38.245;total=687ms;]"\\n
\\n\\nNot much to go on, methinks.\\n\\nDan\", \"post_time\": \"2012-04-06 22:59:15\" },\n\t{ \"post_id\": 1447, \"topic_id\": 327, \"forum_id\": 10, \"post_subject\": \"Re: Exact requirements for SOAP call to Roxie\", \"username\": \"DSC\", \"post_text\": \"[quote="anthony.fishbeck@lexisnexis.com":iyft27zi]SOAPUI usually works as is and from what I can tell your request looks good. I don't see anything missing.\\n\\nWsEcl should never return an empty 200 OK message, I'll have to investigate what might have caused that.. any chance you can send me the part of the esp log where this request was being processed?\\n\\nBtw, Have you tried running the query through the forms at http://10.210.150.78:8002? Expand myroxie on the left and select your query, fill in the form and click submit.\\n\\nNo problem sending log excerpts. Which log(s)?\\n\\nAll of the form-based stuffed works great. The log I was watching -- esp. log? -- did show wildly different output depending on whether I submitted via SOAP or from the forms, but then again the form endpoint was wildly different as well.\\n\\nDan\", \"post_time\": \"2012-04-06 22:53:30\" },\n\t{ \"post_id\": 1446, \"topic_id\": 327, \"forum_id\": 10, \"post_subject\": \"Re: Exact requirements for SOAP call to Roxie\", \"username\": \"anthony.fishbeck@lexisnexis.com\", \"post_text\": \"SOAPUI usually works as is and from what I can tell your request looks good. I don't see anything missing.\\n\\nWsEcl should never return an empty 200 OK message, I'll have to investigate what might have caused that.. any chance you can send me the part of the esp log where this request was being processed?\\n\\nBtw, Have you tried running the query through the forms at http://10.210.150.78:8002? Expand myroxie on the left and select your query, fill in the form and click submit.\", \"post_time\": \"2012-04-06 20:15:46\" },\n\t{ \"post_id\": 1445, \"topic_id\": 327, \"forum_id\": 10, \"post_subject\": \"Exact requirements for SOAP call to Roxie\", \"username\": \"DSC\", \"post_text\": \"I'm having trouble getting soapUI to talk to a Roxie query I have. The query seems to execute, but absolutely no data is being returned. My belief that the query executes lies in the time the query takes: Known-invalid queries return immediately while valid queries take longer, and both of these assertions are supported by log entries at /var/log/HPPCSystems/myesp/esp.log.\\n\\nWhat are the exact minimum requirements for a SOAP call, header- and body-wise?\\n\\nIf anyone cares to trawl through the WSDL:\\n\\n<definitions xmlns="http://schemas.xmlsoap.org/wsdl/" xmlns:soap="http://schemas.xmlsoap.org/wsdl/soap/" xmlns:http="http://schemas.xmlsoap.org/wsdl/http/" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:mime="http://schemas.xmlsoap.org/wsdl/mime/" xmlns:tns="urn:hpccsystems:ecl:audit_search" targetNamespace="urn:hpccsystems:ecl:audit_search">\\n\\t<types>\\n\\t\\t<xsd:schema xmlns:ds1="urn:hpccsystems:ecl:audit_search:result:searchresult" elementFormDefault="qualified" targetNamespace="urn:hpccsystems:ecl:audit_search">\\n\\t\\t\\t<xsd:import namespace="urn:hpccsystems:ecl:audit_search:result:searchresult" schemaLocation="../result/SearchResult.xsd" /> \\n\\t\\t\\t<xsd:complexType name="EspException">\\n\\t\\t\\t\\t<xsd:all>\\n\\t\\t\\t\\t\\t<xsd:element name="Code" type="xsd:string" minOccurs="0" /> <xsd:element name="Audience" type="xsd:string" minOccurs="0" /> <xsd:element name="Source" type="xsd:string" minOccurs="0" /> <xsd:element name="Message" type="xsd:string" minOccurs="0" /> \\n\\t\\t\\t\\t</xsd:all>\\n\\t\\t\\t</xsd:complexType>\\n\\t\\t\\t<xsd:complexType name="ArrayOfEspException">\\n\\t\\t\\t\\t<xsd:sequence>\\n\\t\\t\\t\\t\\t<xsd:element name="Source" type="xsd:string" minOccurs="0" /> <xsd:element name="Exception" type="tns:EspException" minOccurs="0" maxOccurs="unbounded" /> \\n\\t\\t\\t\\t</xsd:sequence>\\n\\t\\t\\t</xsd:complexType>\\n\\t\\t\\t<xsd:element name="Exceptions" type="tns:ArrayOfEspException" /> \\n\\t\\t\\t<xsd:complexType name="EspStringArray">\\n\\t\\t\\t\\t<xsd:sequence>\\n\\t\\t\\t\\t\\t<xsd:element name="Item" type="xsd:string" minOccurs="0" maxOccurs="unbounded" /> \\n\\t\\t\\t\\t</xsd:sequence>\\n\\t\\t\\t</xsd:complexType>\\n\\t\\t\\t<xsd:complexType name="EspIntArray">\\n\\t\\t\\t\\t<xsd:sequence>\\n\\t\\t\\t\\t\\t<xsd:element name="Item" type="xsd:int" minOccurs="0" maxOccurs="unbounded" /> \\n\\t\\t\\t\\t</xsd:sequence>\\n\\t\\t\\t</xsd:complexType>\\n\\t\\t\\t<xsd:simpleType name="XmlDataSet">\\n\\t\\t\\t\\t<xsd:restriction base="xsd:string" /> \\n\\t\\t\\t</xsd:simpleType>\\n\\t\\t\\t<xsd:simpleType name="CsvDataFile">\\n\\t\\t\\t\\t<xsd:restriction base="xsd:string" /> \\n\\t\\t\\t</xsd:simpleType>\\n\\t\\t\\t<xsd:simpleType name="RawDataFile">\\n\\t\\t\\t\\t<xsd:restriction base="xsd:base64Binary" /> \\n\\t\\t\\t</xsd:simpleType>\\n\\t\\t\\t<xsd:element name="audit_searchRequest">\\n\\t\\t\\t\\t<xsd:complexType>\\n\\t\\t\\t\\t\\t<xsd:all>\\n\\t\\t\\t\\t\\t\\t<xsd:element minOccurs="0" maxOccurs="1" name="messagetype" type="xsd:integer" /> <xsd:element minOccurs="0" maxOccurs="1" name="reportyear" type="xsd:integer" /> <xsd:element minOccurs="0" maxOccurs="1" name="reportmonth" type="xsd:integer" /> <xsd:element minOccurs="0" maxOccurs="1" name="reportday" type="xsd:integer" /> <xsd:element minOccurs="0" maxOccurs="1" name="reporthour" type="xsd:integer" /> \\n\\t\\t\\t\\t\\t</xsd:all>\\n\\t\\t\\t\\t</xsd:complexType>\\n\\t\\t\\t</xsd:element>\\n\\t\\t\\t<xsd:element name="audit_searchResponse">\\n\\t\\t\\t\\t<xsd:complexType>\\n\\t\\t\\t\\t\\t<xsd:all>\\n\\t\\t\\t\\t\\t\\t<xsd:element name="Exceptions" type="tns:ArrayOfEspException" minOccurs="0" /> \\n\\t\\t\\t\\t\\t\\t<xsd:element name="Results" minOccurs="0">\\n\\t\\t\\t\\t\\t\\t\\t<xsd:complexType>\\n\\t\\t\\t\\t\\t\\t\\t\\t<xsd:all>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t<xsd:element name="Result">\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<xsd:complexType>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<xsd:all>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<xsd:element ref="ds1:Dataset" minOccurs="0" /> \\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t</xsd:all>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t</xsd:complexType>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t</xsd:element>\\n\\t\\t\\t\\t\\t\\t\\t\\t</xsd:all>\\n\\t\\t\\t\\t\\t\\t\\t</xsd:complexType>\\n\\t\\t\\t\\t\\t\\t</xsd:element>\\n\\t\\t\\t\\t\\t</xsd:all>\\n\\t\\t\\t\\t\\t<xsd:attribute name="sequence" type="xsd:int" /> \\n\\t\\t\\t\\t</xsd:complexType>\\n\\t\\t\\t</xsd:element>\\n\\t\\t\\t<xsd:element name="string" nillable="true" type="xsd:string" /> \\n\\t\\t</xsd:schema>\\n\\t</types>\\n\\t<message name="audit_searchSoapIn">\\n\\t\\t<part name="parameters" element="tns:audit_searchRequest" /> \\n\\t</message>\\n\\t<message name="audit_searchSoapOut">\\n\\t\\t<part name="parameters" element="tns:audit_searchResponse" /> \\n\\t</message>\\n\\t<portType name="myroxieServiceSoap">\\n\\t\\t<operation name="audit_search">\\n\\t\\t\\t<input message="tns:audit_searchSoapIn" /> <output message="tns:audit_searchSoapOut" /> \\n\\t\\t</operation>\\n\\t</portType>\\n\\t<binding name="myroxieServiceSoap" type="tns:myroxieServiceSoap">\\n\\t\\t<soap:binding transport="http://schemas.xmlsoap.org/soap/http" style="document" /> \\n\\t\\t<operation name="audit_search">\\n\\t\\t\\t<soap:operation soapAction="/myroxie/audit_search?ver_=1.0" style="document" /> \\n\\t\\t\\t<input>\\n\\t\\t\\t\\t<soap:body use="literal" /> \\n\\t\\t\\t</input>\\n\\t\\t\\t<output>\\n\\t\\t\\t\\t<soap:body use="literal" /> \\n\\t\\t\\t</output>\\n\\t\\t</operation>\\n\\t</binding>\\n\\t<service name="myroxie">\\n\\t\\t<port name="myroxieServiceSoap" binding="tns:myroxieServiceSoap">\\n\\t\\t\\t<soap:address location="http://10.210.150.78:8002/WsEcl/soap/query/myroxie/audit_search" /> \\n\\t\\t</port>\\n\\t</service>\\n</definitions>\\n
\\n\\nAnd XSD:\\n\\n<xsd:schema xmlns:tns="urn:hpccsystems:ecl:audit_search" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:ds1="urn:hpccsystems:ecl:audit_search:result:searchresult" elementFormDefault="qualified" targetNamespace="urn:hpccsystems:ecl:audit_search">\\n\\t<xsd:import namespace="urn:hpccsystems:ecl:audit_search:result:searchresult" schemaLocation="../result/SearchResult.xsd" /> \\n\\t<xsd:complexType name="EspException">\\n\\t\\t<xsd:all>\\n\\t\\t\\t<xsd:element name="Code" type="xsd:string" minOccurs="0" /> <xsd:element name="Audience" type="xsd:string" minOccurs="0" /> <xsd:element name="Source" type="xsd:string" minOccurs="0" /> <xsd:element name="Message" type="xsd:string" minOccurs="0" /> \\n\\t\\t</xsd:all>\\n\\t</xsd:complexType>\\n\\t<xsd:complexType name="ArrayOfEspException">\\n\\t\\t<xsd:sequence>\\n\\t\\t\\t<xsd:element name="Source" type="xsd:string" minOccurs="0" /> <xsd:element name="Exception" type="tns:EspException" minOccurs="0" maxOccurs="unbounded" /> \\n\\t\\t</xsd:sequence>\\n\\t</xsd:complexType>\\n\\t<xsd:element name="Exceptions" type="tns:ArrayOfEspException" /> \\n\\t<xsd:complexType name="EspStringArray">\\n\\t\\t<xsd:sequence>\\n\\t\\t\\t<xsd:element name="Item" type="xsd:string" minOccurs="0" maxOccurs="unbounded" /> \\n\\t\\t</xsd:sequence>\\n\\t</xsd:complexType>\\n\\t<xsd:complexType name="EspIntArray">\\n\\t\\t<xsd:sequence>\\n\\t\\t\\t<xsd:element name="Item" type="xsd:int" minOccurs="0" maxOccurs="unbounded" /> \\n\\t\\t</xsd:sequence>\\n\\t</xsd:complexType>\\n\\t<xsd:simpleType name="XmlDataSet">\\n\\t\\t<xsd:restriction base="xsd:string" /> \\n\\t</xsd:simpleType>\\n\\t<xsd:simpleType name="CsvDataFile">\\n\\t\\t<xsd:restriction base="xsd:string" /> \\n\\t</xsd:simpleType>\\n\\t<xsd:simpleType name="RawDataFile">\\n\\t\\t<xsd:restriction base="xsd:base64Binary" /> \\n\\t</xsd:simpleType>\\n\\t<xsd:element name="audit_searchRequest">\\n\\t\\t<xsd:complexType>\\n\\t\\t\\t<xsd:all>\\n\\t\\t\\t\\t<xsd:element minOccurs="0" maxOccurs="1" name="messagetype" type="xsd:integer" /> <xsd:element minOccurs="0" maxOccurs="1" name="reportyear" type="xsd:integer" /> <xsd:element minOccurs="0" maxOccurs="1" name="reportmonth" type="xsd:integer" /> <xsd:element minOccurs="0" maxOccurs="1" name="reportday" type="xsd:integer" /> <xsd:element minOccurs="0" maxOccurs="1" name="reporthour" type="xsd:integer" /> \\n\\t\\t\\t</xsd:all>\\n\\t\\t</xsd:complexType>\\n\\t</xsd:element>\\n\\t<xsd:element name="audit_searchResponse">\\n\\t\\t<xsd:complexType>\\n\\t\\t\\t<xsd:all>\\n\\t\\t\\t\\t<xsd:element name="Exceptions" type="tns:ArrayOfEspException" minOccurs="0" /> \\n\\t\\t\\t\\t<xsd:element name="Results" minOccurs="0">\\n\\t\\t\\t\\t\\t<xsd:complexType>\\n\\t\\t\\t\\t\\t\\t<xsd:all>\\n\\t\\t\\t\\t\\t\\t\\t<xsd:element name="Result">\\n\\t\\t\\t\\t\\t\\t\\t\\t<xsd:complexType>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t<xsd:all>\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t<xsd:element ref="ds1:Dataset" minOccurs="0" /> \\n\\t\\t\\t\\t\\t\\t\\t\\t\\t</xsd:all>\\n\\t\\t\\t\\t\\t\\t\\t\\t</xsd:complexType>\\n\\t\\t\\t\\t\\t\\t\\t</xsd:element>\\n\\t\\t\\t\\t\\t\\t</xsd:all>\\n\\t\\t\\t\\t\\t</xsd:complexType>\\n\\t\\t\\t\\t</xsd:element>\\n\\t\\t\\t</xsd:all>\\n\\t\\t\\t<xsd:attribute name="sequence" type="xsd:int" /> \\n\\t\\t</xsd:complexType>\\n\\t</xsd:element>\\n\\t<xsd:element name="string" nillable="true" type="xsd:string" /> \\n</xsd:schema>\\n
\\n\\nThat XSD references an external schema:\\n\\n<xs:schema xmlns="urn:hpccsystems:ecl:audit_search:result:searchresult" xmlns:xs="http://www.w3.org/2001/XMLSchema" attributeFormDefault="unqualified" elementFormDefault="qualified" targetNamespace="urn:hpccsystems:ecl:audit_search:result:searchresult">\\n\\t<xs:element name="Dataset">\\n\\t\\t<xs:complexType>\\n\\t\\t\\t<xs:sequence maxOccurs="unbounded" minOccurs="0">\\n\\t\\t\\t\\t<xs:element minOccurs="0" name="Row">\\n\\t\\t\\t\\t\\t<xs:complexType>\\n\\t\\t\\t\\t\\t\\t<xs:sequence>\\n\\t\\t\\t\\t\\t\\t\\t<xs:element minOccurs="0" name="year" type="xs:nonNegativeInteger" /> <xs:element minOccurs="0" name="month" type="xs:nonNegativeInteger" /> <xs:element minOccurs="0" name="day" type="xs:nonNegativeInteger" /> <xs:element minOccurs="0" name="hour" type="xs:nonNegativeInteger" /> <xs:element minOccurs="0" name="eventtype" type="xs:nonNegativeInteger" /> <xs:element minOccurs="0" name="totalmessagecount" type="xs:nonNegativeInteger" /> <xs:element minOccurs="0" name="totalrecordcount" type="xs:nonNegativeInteger" /> <xs:element minOccurs="0" name="successfulrecordcount" type="xs:nonNegativeInteger" /> <xs:element minOccurs="0" name="failedrecordcount" type="xs:nonNegativeInteger" /> \\n\\t\\t\\t\\t\\t\\t</xs:sequence>\\n\\t\\t\\t\\t\\t</xs:complexType>\\n\\t\\t\\t\\t</xs:element>\\n\\t\\t\\t</xs:sequence>\\n\\t\\t\\t<xs:attribute name="name" type="xs:string" /> \\n\\t\\t</xs:complexType>\\n\\t</xs:element>\\n</xs:schema>\\n
\\n\\nThe soapUI log shows:\\n\\nFri Apr 06 09:37:15 CDT 2012:DEBUG:>> "POST /WsEcl/soap/query/myroxie/audit_search HTTP/1.1[\\\\r][\\\\n]"\\nFri Apr 06 09:37:15 CDT 2012:DEBUG:>> "Accept-Encoding: gzip,deflate[\\\\r][\\\\n]"\\nFri Apr 06 09:37:15 CDT 2012:DEBUG:>> "Content-Type: text/xml;charset=UTF-8[\\\\r][\\\\n]"\\nFri Apr 06 09:37:15 CDT 2012:DEBUG:>> "SOAPAction: "/myroxie/audit_search?ver_=1.0"[\\\\r][\\\\n]"\\nFri Apr 06 09:37:15 CDT 2012:DEBUG:>> "Content-Length: 687[\\\\r][\\\\n]"\\nFri Apr 06 09:37:15 CDT 2012:DEBUG:>> "Host: 10.210.150.78:8002[\\\\r][\\\\n]"\\nFri Apr 06 09:37:15 CDT 2012:DEBUG:>> "Connection: Keep-Alive[\\\\r][\\\\n]"\\nFri Apr 06 09:37:15 CDT 2012:DEBUG:>> "User-Agent: Apache-HttpClient/4.1.1 (java 1.5)[\\\\r][\\\\n]"\\nFri Apr 06 09:37:15 CDT 2012:DEBUG:>> "[\\\\r][\\\\n]"\\nFri Apr 06 09:37:15 CDT 2012:DEBUG:>> "<soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/" xmlns:urn="urn:hpccsystems:ecl:audit_search">[\\\\n]"\\nFri Apr 06 09:37:15 CDT 2012:DEBUG:>> " <soapenv:Header/>[\\\\n]"\\nFri Apr 06 09:37:15 CDT 2012:DEBUG:>> " <soapenv:Body>[\\\\n]"\\nFri Apr 06 09:37:15 CDT 2012:DEBUG:>> " <urn:audit_searchRequest>[\\\\n]"\\nFri Apr 06 09:37:15 CDT 2012:DEBUG:>> " <!--You may enter the following 5 items in any order-->[\\\\n]"\\nFri Apr 06 09:37:15 CDT 2012:DEBUG:>> " <!--Optional:-->[\\\\n]"\\nFri Apr 06 09:37:15 CDT 2012:DEBUG:>> " <urn:messagetype>1</urn:messagetype>[\\\\n]"\\nFri Apr 06 09:37:15 CDT 2012:DEBUG:>> " <!--Optional:-->[\\\\n]"\\nFri Apr 06 09:37:15 CDT 2012:DEBUG:>> " <urn:reportyear>2011</urn:reportyear>[\\\\n]"\\nFri Apr 06 09:37:15 CDT 2012:DEBUG:>> " <!--Optional:-->[\\\\n]"\\nFri Apr 06 09:37:15 CDT 2012:DEBUG:>> " <urn:reportmonth>-1</urn:reportmonth>[\\\\n]"\\nFri Apr 06 09:37:15 CDT 2012:DEBUG:>> " <!--Optional:-->[\\\\n]"\\nFri Apr 06 09:37:15 CDT 2012:DEBUG:>> " <urn:reportday>-1</urn:reportday>[\\\\n]"\\nFri Apr 06 09:37:15 CDT 2012:DEBUG:>> " <!--Optional:-->[\\\\n]"\\nFri Apr 06 09:37:15 CDT 2012:DEBUG:>> " <urn:reporthour>-1</urn:reporthour>[\\\\n]"\\nFri Apr 06 09:37:15 CDT 2012:DEBUG:>> " </urn:audit_searchRequest>[\\\\n]"\\nFri Apr 06 09:37:15 CDT 2012:DEBUG:>> " </soapenv:Body>[\\\\n]"\\nFri Apr 06 09:37:15 CDT 2012:DEBUG:>> "</soapenv:Envelope>"\\nFri Apr 06 09:37:16 CDT 2012:DEBUG:<< "HTTP/1.1 200 OK[\\\\r][\\\\n]"\\nFri Apr 06 09:37:16 CDT 2012:DEBUG:<< "Content-Type: text/xml[\\\\r][\\\\n]"\\nFri Apr 06 09:37:16 CDT 2012:DEBUG:<< "Connection: close[\\\\r][\\\\n]"\\nFri Apr 06 09:37:16 CDT 2012:DEBUG:<< "[\\\\r][\\\\n]"
\\n\\nI suspect that I'm missing a header, or a header has a wrong value, but I could be wrong. Any pointers would be appreciated.\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-04-06 14:38:04\" },\n\t{ \"post_id\": 1473, \"topic_id\": 328, \"forum_id\": 10, \"post_subject\": \"Re: Not seeing all 'STORED' Variables in my Roxie query.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nHowever I don't understand how I was 'not using' the variables in my initial example.
I'd have to see your original Search function code to tell that, but in my experimentation it all comes down to exactly what is required to produce the final result output -- that's what the compiler seems to do is work backwards from the final result and determine what it needs to produce that -- and that's what you get, nothing less and nothing more.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-04-10 14:35:32\" },\n\t{ \"post_id\": 1467, \"topic_id\": 328, \"forum_id\": 10, \"post_subject\": \"Re: Not seeing all 'STORED' Variables in my Roxie query.\", \"username\": \"Allan\", \"post_text\": \"Thanks Richard,\\n\\nHumm, Ok I changed the code to:\\n\\nIMPORT * from $.Inversion;\\nIMPORT * from Std.Str;\\n\\nSTRING ToSearch := '' : STORED('SearchText');\\nBOOLEAN Near := FALSE : STORED('Near');\\nBOOLEAN Old := TRUE : STORED('Old_Testaments');\\nBOOLEAN New := TRUE : STORED('New_Testaments');\\n\\ns := Search(ToUpperCase(ToSearch),Near,Old,New);\\nCOUNT(s);\\nOUTPUT(s);\\n
\\nAnd put all the other code into 'Search' - Now works fine with 4 Variables.\\nHowever I don't understand how I was 'not using' the variables in my initial example.\\nAnd there is the converse example where I was not passing 'Old Testament' to 'Search' in the initil example yet that variable was being published?\\n\\nYours confused.\\n\\nAllan\", \"post_time\": \"2012-04-10 11:23:26\" },\n\t{ \"post_id\": 1455, \"topic_id\": 328, \"forum_id\": 10, \"post_subject\": \"Re: Not seeing all 'STORED' Variables in my Roxie query.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nIn trying to duplicate your issue I've come to the conclusion that the most probable reason you're not seeing the other two is because you're not actually USING them in the code that determines the result to return. \\n\\nI took your code, added my own "Search" function (which used only one of the STORED definitions), and when I published it I only saw the one that I was actually using. Hence my supposition...\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-04-09 19:09:05\" },\n\t{ \"post_id\": 1450, \"topic_id\": 328, \"forum_id\": 10, \"post_subject\": \"Not seeing all 'STORED' Variables in my Roxie query.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\nThis may be documented somewhere, but I have 4 variables I want to publish in a Roxie query below:\\n\\nIMPORT * from $.Inversion;\\nIMPORT * from Std.Str;\\n\\nSTRING ToSearch := '' : STORED('SearchText');\\nBOOLEAN Near := FALSE : STORED('Near');\\nBOOLEAN Old := TRUE : STORED('Old_Testament');\\nBOOLEAN New := TRUE : STORED('New_Testament');\\n\\nUNSIGNED1 distance := IF(Near=TRUE,3,0);\\n\\nSET OF STRING1 t0 := IF(Old=TRUE,['O'],[]);\\n t1 := t0 + IF(New=TRUE,['N'],[]);\\n t2 := IF(t1=[],['O','N'],t1);\\ns := Search(ToUpperCase(ToSearch),t2,distance);\\nCOUNT(s);\\nOUTPUT(s);\\n
\\n\\nI'm only seeing two of them published, See attached screen dump from ECLWatch.\\n\\nI've tried many combinations of 'STRINGS' BOOLEANS etc on both 6.0.4 and 6.6.0 versions to no avail.\\n\\nAny ideas?\\n\\nYours\\nAllan\", \"post_time\": \"2012-04-07 18:40:24\" },\n\t{ \"post_id\": 1457, \"topic_id\": 330, \"forum_id\": 10, \"post_subject\": \"Re: LOOKUP JOIN size limits\", \"username\": \"rtaylor\", \"post_text\": \"Dustin,\\n\\nFor lookup joins, what is the limit on the size of the right dataset? Does it just have to be small enough to that it can fit into memory on a single node? Is a 1GB dataset still reasonable enough in size to use in a lookup join?
Yes, for both LOOKUP and ALL JOINs the right dataset must be small enough to fit completely in memory. \\n\\nA 1Gb file is "small enough" IFF each node has sufficient memory to accommodate the file and OS and still get your work done -- a file too large would probably incur OS spillage to disk to page memory in and out of RAM, so "caveat emptor."\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-04-09 19:33:02\" },\n\t{ \"post_id\": 1453, \"topic_id\": 330, \"forum_id\": 10, \"post_subject\": \"LOOKUP JOIN size limits\", \"username\": \"dustinskaggs\", \"post_text\": \"For lookup joins, what is the limit on the size of the right dataset? Does it just have to be small enough to that it can fit into memory on a single node? Is a 1GB dataset still reasonable enough in size to use in a lookup join?\\n\\n-Dustin\", \"post_time\": \"2012-04-09 16:54:02\" },\n\t{ \"post_id\": 1536, \"topic_id\": 332, \"forum_id\": 10, \"post_subject\": \"Re: CPP Assertion on Roxie.\", \"username\": \"Allan\", \"post_text\": \"Thanks Richard,\\n\\nAny idea when it will be available? (I wont hold you to it)\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-04-18 18:17:13\" },\n\t{ \"post_id\": 1533, \"topic_id\": 332, \"forum_id\": 10, \"post_subject\": \"Re: CPP Assertion on Roxie.\", \"username\": \"richardkchapman\", \"post_text\": \"It will be a new version of the VM image (and platform RPMs if you want to install your own).\\n\\nThe latest stable release of the VM/Platform is 3.6.0\\n\\nI see I made a typo when I called it 3.4.2rc6 - I should have said 3.6.2rc6. Sorry for the confusion.\\n\\nRichard\", \"post_time\": \"2012-04-18 08:51:47\" },\n\t{ \"post_id\": 1530, \"topic_id\": 332, \"forum_id\": 10, \"post_subject\": \"Re: CPP Assertion on Roxie.\", \"username\": \"Allan\", \"post_text\": \"Pardon Richard,\\n\\nWhat is this a version of?\\n\\nAs the HPCC VM image is already at 3.6.4.2 and the IDE is at 6.6.0.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-04-16 18:57:49\" },\n\t{ \"post_id\": 1529, \"topic_id\": 332, \"forum_id\": 10, \"post_subject\": \"Re: CPP Assertion on Roxie.\", \"username\": \"Allan\", \"post_text\": \"Ta Richard,\\n\\nI will wait.\", \"post_time\": \"2012-04-16 11:13:08\" },\n\t{ \"post_id\": 1528, \"topic_id\": 332, \"forum_id\": 10, \"post_subject\": \"Re: CPP Assertion on Roxie.\", \"username\": \"richardkchapman\", \"post_text\": \"You may need to delete the locally cached copies of the files in /var/lib/HPCCSystems/hpcc-data/roxie\\n\\nI would recommend trying with 3.4.2rc6 (should be available later today)\", \"post_time\": \"2012-04-16 11:12:01\" },\n\t{ \"post_id\": 1526, \"topic_id\": 332, \"forum_id\": 10, \"post_subject\": \"Re: CPP Assertion on Roxie.\", \"username\": \"Allan\", \"post_text\": \"I've deleted all old queries, deleted and re-constructed the Base file and Key file - problem persists.\\n\\nBy the way. Ignoring the STORED constructs the following simple code works when run against Thor but fails with the same assertion when run on Roxie.\\n\\n\\nitm :=FETCH(Bible,Key(WORD='HYSSOP'),RIGHT.Basepos);\\nOUTPUT(itm);\\n
\\n\\nCan you please explain how I can proceed?\\nI didn't quite grasp the last couple of messages.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-04-13 19:21:41\" },\n\t{ \"post_id\": 1515, \"topic_id\": 332, \"forum_id\": 10, \"post_subject\": \"Re: CPP Assertion on Roxie.\", \"username\": \"richardkchapman\", \"post_text\": \"Ok, I think we have a good idea what is going on (more than one issue, interacting with each other in confusing ways). See https://github.com/hpcc-systems/HPCC-Pl ... ssues/2096 for an ongoing discussion. I hope that these should be addressed or at least mitigated in the upcoming 3.6.2 release.\", \"post_time\": \"2012-04-13 13:28:45\" },\n\t{ \"post_id\": 1505, \"topic_id\": 332, \"forum_id\": 10, \"post_subject\": \"Re: CPP Assertion on Roxie.\", \"username\": \"richardkchapman\", \"post_text\": \"I suspect the different versions of the key / the suspended query is probably related to the failure.\", \"post_time\": \"2012-04-12 13:54:20\" },\n\t{ \"post_id\": 1504, \"topic_id\": 332, \"forum_id\": 10, \"post_subject\": \"Re: CPP Assertion on Roxie.\", \"username\": \"sort\", \"post_text\": \"The message about different versions of the bible::key is also something that needs to be addressed. Please make sure that you do not create keys and reuse file names. This will confuse roxie since roxie opens a file once and shares the this file open across all queries. This error can cause a roxie slave not to load a query\\n\\n000004D2 2012-04-10 17:58:55 2068 2230 "roxie: Dequeued workunit request 'W20120410-175852'"\\n000004D3 2012-04-10 17:58:55 2068 4660 "Loading dll (libW20120410-175852.so) from location /var/lib/HPCCSystems/queries/myroxie//libW20120410-175852.so"\\n000004D4 2012-04-10 17:58:55 2068 2174 "ERROR: Query W20120410-175852 suspended: Different version of bible::key already loaded: sizes = 5308416 3170304 Date = "\", \"post_time\": \"2012-04-12 13:49:47\" },\n\t{ \"post_id\": 1503, \"topic_id\": 332, \"forum_id\": 10, \"post_subject\": \"Re: CPP Assertion on Roxie.\", \"username\": \"richardkchapman\", \"post_text\": \"You can email them to me at rchapman@hpccsystems.com\\n\\nCan you tell me a bit more about your system - how many nodes, is it using the VM image, etc? It looks like a 32-bit build which makes me think it probably is...\\n\\nThe error message is indicating that a Roxie slave did not manage to load the query from the workunit .so file successfully, which is a bit bizarre as in order to have got that far it must have successfully loaded the query on the Roxie server, and on the VM image the server and slave are the same process.\\n\\nIf you publish the query to roxie then run it via the QuerySets interface, does it run successfully?\", \"post_time\": \"2012-04-12 09:53:04\" },\n\t{ \"post_id\": 1502, \"topic_id\": 332, \"forum_id\": 10, \"post_subject\": \"Re: CPP Assertion on Roxie.\", \"username\": \"Allan\", \"post_text\": \"Cannot attach - Its not allowing any extension I try *.log *.txt even *.ecl\\n\\nPutting tail in-line\\n\\n\\n000004CA 2012-04-10 17:58:11 2068 4660 "ERROR: 3000 - assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095 (in Index Read 29)"\\n000004CB 2012-04-10 17:58:11 2068 4660 "[W20120410-175808] FAILED: W20120410-175808"\\n000004CC 2012-04-10 17:58:11 2068 4660 "[W20120410-175808] EXCEPTION: assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095 (in Index Read 29)"\\n000004CD 2012-04-10 17:58:11 2068 4660 "ERROR: 3000 - assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095 (in Index Read 29)"\\n000004CE 2012-04-10 17:58:11 2068 2171 "[W20120410-175808] Unload received for channel 1"\\n000004CF 2012-04-10 17:58:17 2068 2231 "PING: 0 replies received, average delay 0"\\n000004D0 2012-04-10 17:58:17 2068 2072 "SYS: PU= 14% MU= 34% MAL=1101821728 MMP=1074794496 SBK=27027232 TOT=1076776K RAM=204236K SWP=0K"\\n000004D1 2012-04-10 17:58:17 2068 2072 "DSK: [sda] r/s=0.1 kr/s=1.8 w/s=3.3 kw/s=47.2 bsy=1 NIC: rxp/s=13.1 rxk/s=2.8 txp/s=17.1 txk/s=15.6 CPU: usr=1 sys=10 iow=0 idle=86"\\n000004D2 2012-04-10 17:58:55 2068 2230 "roxie: Dequeued workunit request 'W20120410-175852'"\\n000004D3 2012-04-10 17:58:55 2068 4660 "Loading dll (libW20120410-175852.so) from location /var/lib/HPCCSystems/queries/myroxie//libW20120410-175852.so"\\n000004D4 2012-04-10 17:58:55 2068 2174 "ERROR: Query W20120410-175852 suspended: Different version of bible::key already loaded: sizes = 5308416 3170304 Date = "\\n000004D5 2012-04-10 17:58:55 2068 2174 "Backtrace:"\\n000004D6 2012-04-10 17:58:55 2068 2174 " /opt/HPCCSystems/lib/libjlib.so(_Z16PrintStackReportv+0x3b) [0x1aee0b]"\\n000004D7 2012-04-10 17:58:55 2068 2174 " /opt/HPCCSystems/lib/libjlib.so(_Z20RaiseAssertExceptionPKcS0_j+0x1c) [0x1b091c]"\\n000004D8 2012-04-10 17:58:55 2068 2174 " /opt/HPCCSystems/lib/libccd.so(_ZN12CRoxieWorker10doActivityEv+0x6b8) [0xd93d38]"\\n000004D9 2012-04-10 17:58:55 2068 2174 " /opt/HPCCSystems/lib/libccd.so(_ZN12CRoxieWorker4mainEv+0x19e) [0xd94a6e]"\\n000004DA 2012-04-10 17:58:55 2068 2174 " /opt/HPCCSystems/lib/libjlib.so(_ZN20CPooledThreadWrapper3runEv+0x7e) [0x25ac8e]"\\n000004DB 2012-04-10 17:58:55 2068 2174 " /opt/HPCCSystems/lib/libjlib.so(_ZN6Thread5beginEv+0x42) [0x2597c2]"\\n000004DC 2012-04-10 17:58:55 2068 2174 " /opt/HPCCSystems/lib/libjlib.so(_ZN6Thread11_threadmainEPv+0x27) [0x2574f7]"\\n000004DD 2012-04-10 17:58:55 2068 2174 " /lib/tls/i686/cmov/libpthread.so.0(+0x596e) [0xf2d96e]"\\n000004DE 2012-04-10 17:58:55 2068 2174 " /lib/tls/i686/cmov/libc.so.6(clone+0x5e) [0x4b22a4e]"\\n000004DF 2012-04-10 17:58:55 2068 2174 "ERROR: assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095"\\n000004E0 2012-04-10 17:58:55 2068 2174 "[(null)] ERROR: 3000: assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095: Exception in slave for packet uid=0x0000000d activityId=29 pri=LOW queryHash=5e9084106a8b5140 ch=1 seq=0 cont=0 server=192.168.217.128"\\n000004E1 2012-04-10 17:58:55 2068 18337 "[W20120410-175852@29] Multiple exceptions on query - aborting"\\n000004E2 2012-04-10 17:58:55 2068 18337 "[W20120410-175852@29] makeWrappedException - assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095 (in Index Read 29)"\\n000004E3 2012-04-10 17:58:55 2068 4660 "[W20120410-175852] Exception thrown in query - cleaning up: 3000: assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095 (in Index Read 29)"\\n000004E4 2012-04-10 17:58:55 2068 4660 "[W20120410-175852] Done cleaning up"\\n000004E5 2012-04-10 17:58:55 2068 4660 "[(null)] ERROR: 3000: assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095 (in Index Read 29) (in item 2)"\\n000004E6 2012-04-10 17:58:55 2068 4660 "ERROR: 3000 - assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095 (in Index Read 29)"\\n000004E7 2012-04-10 17:58:55 2068 4660 "[W20120410-175852] FAILED: W20120410-175852"\\n000004E8 2012-04-10 17:58:55 2068 4660 "[W20120410-175852] EXCEPTION: assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095 (in Index Read 29)"\\n000004E9 2012-04-10 17:58:55 2068 4660 "ERROR: 3000 - assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095 (in Index Read 29)"\\n000004EA 2012-04-10 17:58:55 2068 2177 "[W20120410-175852] Unload received for channel 1"\\n000004EB 2012-04-10 17:59:17 2068 2231 "PING: 0 replies received, average delay 0"\\n000004EC 2012-04-10 17:59:17 2068 2072 "SYS: PU= 10% MU= 34% MAL=1101822832 MMP=1074794496 SBK=27028336 TOT=1076776K RAM=204356K SWP=0K"\\n000004ED 2012-04-10 17:59:17 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=3.0 kw/s=36.1 bsy=1 NIC: rxp/s=2.8 rxk/s=1.9 txp/s=2.2 txk/s=0.5 CPU: usr=0 sys=7 iow=0 idle=91"\\n000004EE 2012-04-10 18:00:17 2068 2231 "PING: 0 replies received, average delay 0"\\n000004EF 2012-04-10 18:00:17 2068 2072 "SYS: PU= 12% MU= 34% MAL=1101822832 MMP=1074794496 SBK=27028336 TOT=1076776K RAM=205024K SWP=0K"\\n000004F0 2012-04-10 18:00:17 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=2.2 kw/s=31.3 bsy=1 NIC: rxp/s=45.3 rxk/s=5.5 txp/s=70.7 txk/s=80.8 CPU: usr=0 sys=7 iow=0 idle=91"\\n000004F1 2012-04-10 18:01:17 2068 2231 "PING: 0 replies received, average delay 0"\\n000004F2 2012-04-10 18:01:17 2068 2072 "SYS: PU= 7% MU= 34% MAL=1101822832 MMP=1074794496 SBK=27028336 TOT=1076776K RAM=205952K SWP=0K"\\n000004F3 2012-04-10 18:01:17 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=2.1 kw/s=24.8 bsy=0 NIC: rxp/s=0.8 rxk/s=0.2 txp/s=0.8 txk/s=0.2 CPU: usr=0 sys=3 iow=0 idle=95"\\n000004F4 2012-04-10 18:02:17 2068 2231 "PING: 0 replies received, average delay 0"\\n000004F5 2012-04-10 18:02:17 2068 2072 "SYS: PU= 6% MU= 34% MAL=1101822832 MMP=1074794496 SBK=27028336 TOT=1076776K RAM=205968K SWP=0K"\\n000004F6 2012-04-10 18:02:17 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.7 kw/s=17.8 bsy=1 NIC: rxp/s=0.7 rxk/s=0.1 txp/s=0.5 txk/s=0.1 CPU: usr=0 sys=2 iow=0 idle=96"\\n000004F7 2012-04-10 18:03:17 2068 2231 "PING: 0 replies received, average delay 0"\\n000004F8 2012-04-10 18:03:17 2068 2072 "SYS: PU= 5% MU= 34% MAL=1101822824 MMP=1074794496 SBK=27028328 TOT=1076776K RAM=205976K SWP=0K"\\n000004F9 2012-04-10 18:03:17 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.7 kw/s=18.7 bsy=1 NIC: rxp/s=1.0 rxk/s=0.2 txp/s=1.1 txk/s=0.9 CPU: usr=0 sys=2 iow=0 idle=96"\\n000004FA 2012-04-10 18:04:17 2068 2231 "PING: 0 replies received, average delay 0"\\n000004FB 2012-04-10 18:04:17 2068 2072 "SYS: PU= 5% MU= 34% MAL=1101822824 MMP=1074794496 SBK=27028328 TOT=1076776K RAM=206456K SWP=0K"\\n000004FC 2012-04-10 18:04:17 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=2.0 kw/s=18.9 bsy=0 NIC: rxp/s=6.5 rxk/s=0.7 txp/s=9.3 txk/s=9.5 CPU: usr=0 sys=2 iow=0 idle=96"\\n000004FD 2012-04-10 18:05:17 2068 2231 "PING: 0 replies received, average delay 0"\\n000004FE 2012-04-10 18:05:17 2068 2072 "SYS: PU= 7% MU= 34% MAL=1101822824 MMP=1074794496 SBK=27028328 TOT=1076776K RAM=206280K SWP=0K"\\n000004FF 2012-04-10 18:05:17 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.6 kw/s=18.4 bsy=1 NIC: rxp/s=25.6 rxk/s=2.7 txp/s=38.6 txk/s=41.5 CPU: usr=0 sys=3 iow=0 idle=95"\\n00000500 2012-04-10 18:06:17 2068 2231 "PING: 0 replies received, average delay 0"\\n00000501 2012-04-10 18:06:17 2068 2072 "SYS: PU= 34% MU= 34% MAL=1101822816 MMP=1074794496 SBK=27028320 TOT=1076776K RAM=207432K SWP=0K"\\n00000502 2012-04-10 18:06:17 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=5.6 kw/s=714.0 bsy=6 NIC: rxp/s=6.4 rxk/s=2.3 txp/s=7.3 txk/s=5.5 CPU: usr=11 sys=17 iow=3 idle=67"\\n00000503 2012-04-10 18:06:37 2068 2230 "roxie: Dequeued workunit request 'W20120410-180634'"\\n00000504 2012-04-10 18:06:37 2068 4660 "Loading dll (libW20120410-180634.so) from location /var/lib/HPCCSystems/queries/myroxie//libW20120410-180634.so"\\n00000505 2012-04-10 18:06:37 2068 18885 "RoxieMemMgr: Heap size 1024 pages, 1023 free, largest block 1023, heapLWM 0, dataBuffersActive=0, dataBufferPages=0"\\n00000506 2012-04-10 18:06:37 2068 2180 "ERROR: Query W20120410-180634 suspended: Different version of bible::key already loaded: sizes = 5423104 3170304 Date = "\\n00000507 2012-04-10 18:06:37 2068 2180 "Backtrace:"\\n00000508 2012-04-10 18:06:37 2068 2180 " /opt/HPCCSystems/lib/libjlib.so(_Z16PrintStackReportv+0x3b) [0x1aee0b]"\\n00000509 2012-04-10 18:06:37 2068 2180 " /opt/HPCCSystems/lib/libjlib.so(_Z20RaiseAssertExceptionPKcS0_j+0x1c) [0x1b091c]"\\n0000050A 2012-04-10 18:06:37 2068 2180 " /opt/HPCCSystems/lib/libccd.so(_ZN12CRoxieWorker10doActivityEv+0x6b8) [0xd93d38]"\\n0000050B 2012-04-10 18:06:37 2068 2180 " /opt/HPCCSystems/lib/libccd.so(_ZN12CRoxieWorker4mainEv+0x19e) [0xd94a6e]"\\n0000050C 2012-04-10 18:06:37 2068 2180 " /opt/HPCCSystems/lib/libjlib.so(_ZN20CPooledThreadWrapper3runEv+0x7e) [0x25ac8e]"\\n0000050D 2012-04-10 18:06:37 2068 2180 " /opt/HPCCSystems/lib/libjlib.so(_ZN6Thread5beginEv+0x42) [0x2597c2]"\\n0000050E 2012-04-10 18:06:37 2068 2180 " /opt/HPCCSystems/lib/libjlib.so(_ZN6Thread11_threadmainEPv+0x27) [0x2574f7]"\\n0000050F 2012-04-10 18:06:37 2068 2180 " /lib/tls/i686/cmov/libpthread.so.0(+0x596e) [0xf2d96e]"\\n00000510 2012-04-10 18:06:37 2068 2180 " /lib/tls/i686/cmov/libc.so.6(clone+0x5e) [0x4b22a4e]"\\n00000511 2012-04-10 18:06:37 2068 2180 "ERROR: assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095"\\n00000512 2012-04-10 18:06:37 2068 2180 "[(null)] ERROR: 3000: assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095: Exception in slave for packet uid=0x0000000f activityId=29 pri=LOW queryHash=e4970fa6658da9a0 ch=1 seq=0 cont=0 server=192.168.217.128"\\n00000513 2012-04-10 18:06:37 2068 18887 "[W20120410-180634@29] Multiple exceptions on query - aborting"\\n00000514 2012-04-10 18:06:37 2068 18887 "[W20120410-180634@29] makeWrappedException - assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095 (in Index Read 29)"\\n00000515 2012-04-10 18:06:37 2068 4660 "[W20120410-180634] Exception thrown in query - cleaning up: 3000: assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095 (in Index Read 29)"\\n00000516 2012-04-10 18:06:37 2068 4660 "[W20120410-180634] Done cleaning up"\\n00000517 2012-04-10 18:06:37 2068 4660 "[(null)] ERROR: 3000: assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095 (in Index Read 29) (in item 2)"\\n00000518 2012-04-10 18:06:37 2068 4660 "ERROR: 3000 - assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095 (in Index Read 29)"\\n00000519 2012-04-10 18:06:37 2068 4660 "[W20120410-180634] FAILED: W20120410-180634"\\n0000051A 2012-04-10 18:06:37 2068 4660 "[W20120410-180634] EXCEPTION: assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095 (in Index Read 29)"\\n0000051B 2012-04-10 18:06:37 2068 4660 "ERROR: 3000 - assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095 (in Index Read 29)"\\n0000051C 2012-04-10 18:06:37 2068 2183 "[W20120410-180634] Unload received for channel 1"\\n0000051D 2012-04-10 18:07:17 2068 2231 "PING: 0 replies received, average delay 0"\\n0000051E 2012-04-10 18:07:17 2068 2072 "SYS: PU= 10% MU= 34% MAL=1101824904 MMP=1074794496 SBK=27030408 TOT=1076776K RAM=207116K SWP=0K"\\n0000051F 2012-04-10 18:07:17 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=2.6 kw/s=38.4 bsy=1 NIC: rxp/s=1.9 rxk/s=1.7 txp/s=1.4 txk/s=0.3 CPU: usr=0 sys=6 iow=0 idle=92"\\n00000520 2012-04-10 18:07:33 2068 2230 "roxie: Dequeued workunit request 'W20120410-180731'"\\n00000521 2012-04-10 18:07:33 2068 4660 "Loading dll (libW20120410-180731.so) from location /var/lib/HPCCSystems/queries/myroxie//libW20120410-180731.so"\\n00000522 2012-04-10 18:07:33 2068 2186 "ERROR: Query W20120410-180731 suspended: Different version of bible::key already loaded: sizes = 5423104 3170304 Date = "\\n00000523 2012-04-10 18:07:33 2068 2186 "Backtrace:"\\n00000524 2012-04-10 18:07:33 2068 2186 " /opt/HPCCSystems/lib/libjlib.so(_Z16PrintStackReportv+0x3b) [0x1aee0b]"\\n00000525 2012-04-10 18:07:33 2068 2186 " /opt/HPCCSystems/lib/libjlib.so(_Z20RaiseAssertExceptionPKcS0_j+0x1c) [0x1b091c]"\\n00000526 2012-04-10 18:07:33 2068 2186 " /opt/HPCCSystems/lib/libccd.so(_ZN12CRoxieWorker10doActivityEv+0x6b8) [0xd93d38]"\\n00000527 2012-04-10 18:07:33 2068 2186 " /opt/HPCCSystems/lib/libccd.so(_ZN12CRoxieWorker4mainEv+0x19e) [0xd94a6e]"\\n00000528 2012-04-10 18:07:33 2068 2186 " /opt/HPCCSystems/lib/libjlib.so(_ZN20CPooledThreadWrapper3runEv+0x7e) [0x25ac8e]"\\n00000529 2012-04-10 18:07:33 2068 2186 " /opt/HPCCSystems/lib/libjlib.so(_ZN6Thread5beginEv+0x42) [0x2597c2]"\\n0000052A 2012-04-10 18:07:33 2068 2186 " /opt/HPCCSystems/lib/libjlib.so(_ZN6Thread11_threadmainEPv+0x27) [0x2574f7]"\\n0000052B 2012-04-10 18:07:33 2068 2186 " /lib/tls/i686/cmov/libpthread.so.0(+0x596e) [0xf2d96e]"\\n0000052C 2012-04-10 18:07:33 2068 2186 " /lib/tls/i686/cmov/libc.so.6(clone+0x5e) [0x4b22a4e]"\\n0000052D 2012-04-10 18:07:33 2068 2186 "ERROR: assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095"\\n0000052E 2012-04-10 18:07:33 2068 2186 "[(null)] ERROR: 3000: assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095: Exception in slave for packet uid=0x00000011 activityId=29 pri=LOW queryHash=1845bf2bd9851688 ch=1 seq=0 cont=0 server=192.168.217.128"\\n0000052F 2012-04-10 18:07:33 2068 18905 "[W20120410-180731@29] Multiple exceptions on query - aborting"\\n00000530 2012-04-10 18:07:33 2068 18905 "[W20120410-180731@29] makeWrappedException - assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095 (in Index Read 29)"\\n00000531 2012-04-10 18:07:34 2068 4660 "[W20120410-180731] Exception thrown in query - cleaning up: 3000: assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095 (in Index Read 29)"\\n00000532 2012-04-10 18:07:34 2068 4660 "[W20120410-180731] Done cleaning up"\\n00000533 2012-04-10 18:07:34 2068 4660 "[(null)] ERROR: 3000: assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095 (in Index Read 29) (in item 2)"\\n00000534 2012-04-10 18:07:34 2068 4660 "ERROR: 3000 - assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095 (in Index Read 29)"\\n00000535 2012-04-10 18:07:34 2068 4660 "[W20120410-180731] FAILED: W20120410-180731"\\n00000536 2012-04-10 18:07:34 2068 4660 "[W20120410-180731] EXCEPTION: assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095 (in Index Read 29)"\\n00000537 2012-04-10 18:07:34 2068 4660 "ERROR: 3000 - assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095 (in Index Read 29)"\\n00000538 2012-04-10 18:07:34 2068 2189 "[W20120410-180731] Unload received for channel 1"\\n00000539 2012-04-10 18:08:17 2068 2231 "PING: 0 replies received, average delay 0"\\n0000053A 2012-04-10 18:08:17 2068 2072 "SYS: PU= 10% MU= 34% MAL=1101825944 MMP=1074794496 SBK=27031448 TOT=1076776K RAM=207544K SWP=0K"\\n0000053B 2012-04-10 18:08:17 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=2.6 kw/s=36.5 bsy=1 NIC: rxp/s=1.8 rxk/s=1.7 txp/s=1.3 txk/s=0.3 CPU: usr=0 sys=6 iow=0 idle=92"\\n0000053C 2012-04-10 18:09:17 2068 2231 "PING: 0 replies received, average delay 0"\\n0000053D 2012-04-10 18:09:17 2068 2072 "SYS: PU= 5% MU= 34% MAL=1101825944 MMP=1074794496 SBK=27031448 TOT=1076776K RAM=207440K SWP=0K"\\n0000053E 2012-04-10 18:09:17 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.4 kw/s=17.0 bsy=0 NIC: rxp/s=0.0 rxk/s=0.0 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=1 iow=0 idle=97"\\n0000053F 2012-04-10 18:10:17 2068 2231 "PING: 0 replies received, average delay 0"\\n00000540 2012-04-10 18:10:17 2068 2072 "SYS: PU= 5% MU= 34% MAL=1101825936 MMP=1074794496 SBK=27031440 TOT=1076776K RAM=207584K SWP=0K"\\n00000541 2012-04-10 18:10:17 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.5 kw/s=16.8 bsy=0 NIC: rxp/s=0.0 rxk/s=0.0 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=1 iow=0 idle=97"\\n00000542 2012-04-10 18:11:17 2068 2231 "PING: 0 replies received, average delay 0"\\n00000543 2012-04-10 18:11:17 2068 2072 "SYS: PU= 6% MU= 34% MAL=1101825936 MMP=1074794496 SBK=27031440 TOT=1076776K RAM=208104K SWP=0K"\\n00000544 2012-04-10 18:11:17 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.8 kw/s=19.3 bsy=1 NIC: rxp/s=0.0 rxk/s=0.0 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=2 iow=0 idle=97"\\n00000545 2012-04-10 18:12:17 2068 2231 "PING: 0 replies received, average delay 0"\\n00000546 2012-04-10 18:12:17 2068 2072 "SYS: PU= 5% MU= 34% MAL=1101825944 MMP=1074794496 SBK=27031448 TOT=1076776K RAM=208000K SWP=0K"\\n00000547 2012-04-10 18:12:17 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.6 kw/s=17.6 bsy=1 NIC: rxp/s=0.1 rxk/s=0.0 txp/s=0.1 txk/s=0.0 CPU: usr=0 sys=1 iow=0 idle=97"\\n00000548 2012-04-10 18:13:17 2068 2231 "PING: 0 replies received, average delay 0"\\n00000549 2012-04-10 18:13:17 2068 2072 "SYS: PU= 6% MU= 34% MAL=1101825944 MMP=1074794496 SBK=27031448 TOT=1076776K RAM=208016K SWP=0K"\\n0000054A 2012-04-10 18:13:17 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.7 kw/s=17.6 bsy=1 NIC: rxp/s=0.0 rxk/s=0.0 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=2 iow=0 idle=96"\\n0000054B 2012-04-10 18:14:17 2068 2231 "PING: 0 replies received, average delay 0"\\n0000054C 2012-04-10 18:14:17 2068 2072 "SYS: PU= 6% MU= 34% MAL=1101825944 MMP=1074794496 SBK=27031448 TOT=1076776K RAM=208032K SWP=0K"\\n0000054D 2012-04-10 18:14:17 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.4 kw/s=15.7 bsy=0 NIC: rxp/s=0.0 rxk/s=0.0 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=2 iow=0 idle=97"\\n0000054E 2012-04-10 18:15:17 2068 2231 "PING: 0 replies received, average delay 0"\\n0000054F 2012-04-10 18:15:17 2068 2072 "SYS: PU= 6% MU= 34% MAL=1101825944 MMP=1074794496 SBK=27031448 TOT=1076776K RAM=207928K SWP=0K"\\n00000550 2012-04-10 18:15:17 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.5 kw/s=16.9 bsy=1 NIC: rxp/s=0.0 rxk/s=0.0 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=1 iow=0 idle=97"\\n00000551 2012-04-10 18:16:17 2068 2231 "PING: 0 replies received, average delay 0"\\n00000552 2012-04-10 18:16:17 2068 2072 "SYS: PU= 6% MU= 34% MAL=1101825944 MMP=1074794496 SBK=27031448 TOT=1076776K RAM=208072K SWP=0K"\\n00000553 2012-04-10 18:16:17 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.5 kw/s=17.3 bsy=0 NIC: rxp/s=0.0 rxk/s=0.0 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=2 iow=0 idle=97"\\n00000554 2012-04-10 18:17:17 2068 2231 "PING: 0 replies received, average delay 0"\\n00000555 2012-04-10 18:17:17 2068 2072 "SYS: PU= 6% MU= 34% MAL=1101825944 MMP=1074794496 SBK=27031448 TOT=1076776K RAM=208592K SWP=0K"\\n00000556 2012-04-10 18:17:17 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.6 kw/s=18.0 bsy=1 NIC: rxp/s=0.0 rxk/s=0.0 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=2 iow=0 idle=96"\\n00000557 2012-04-11 18:41:11 2068 2231 "PING: 0 replies received, average delay 0"\\n00000558 2012-04-11 18:41:12 2068 2072 "SYS: PU= 9% MU= 34% MAL=1101825944 MMP=1074794496 SBK=27031448 TOT=1076776K RAM=207944K SWP=0K"\\n00000559 2012-04-11 18:41:12 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=2.9 kw/s=32.1 bsy=2 NIC: rxp/s=0.8 rxk/s=0.1 txp/s=0.6 txk/s=0.1 CPU: usr=0 sys=4 iow=0 idle=94"\\n0000055A 2012-04-11 18:41:12 2068 2072 "KERN_DEBUG: [21709.738260] eth0: no IPv6 routers present"\\n0000055B 2012-04-11 18:42:11 2068 2231 "PING: 0 replies received, average delay 0"\\n0000055C 2012-04-11 18:42:12 2068 2072 "SYS: PU= 3% MU= 34% MAL=1101825944 MMP=1074794496 SBK=27031448 TOT=1076776K RAM=207536K SWP=0K"\\n0000055D 2012-04-11 18:42:12 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=2.0 kw/s=19.8 bsy=1 NIC: rxp/s=2.4 rxk/s=0.4 txp/s=1.6 txk/s=0.6 CPU: usr=0 sys=1 iow=1 idle=96"\\n0000055E 2012-04-11 18:43:11 2068 2231 "PING: 0 replies received, average delay 0"\\n0000055F 2012-04-11 18:43:12 2068 2072 "SYS: PU= 8% MU= 34% MAL=1101825944 MMP=1074794496 SBK=27031448 TOT=1076776K RAM=208412K SWP=0K"\\n00000560 2012-04-11 18:43:12 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=2.0 kw/s=21.7 bsy=4 NIC: rxp/s=10.2 rxk/s=1.1 txp/s=12.0 txk/s=9.4 CPU: usr=0 sys=2 iow=4 idle=92"\\n00000561 2012-04-11 18:44:11 2068 2231 "PING: 0 replies received, average delay 0"\\n00000562 2012-04-11 18:44:12 2068 2072 "SYS: PU= 34% MU= 34% MAL=1101825952 MMP=1074794496 SBK=27031456 TOT=1076776K RAM=219476K SWP=0K"\\n00000563 2012-04-11 18:44:12 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=3.2 kw/s=274.3 bsy=14 NIC: rxp/s=18.2 rxk/s=3.6 txp/s=22.1 txk/s=18.8 CPU: usr=3 sys=12 iow=10 idle=72"\\n00000564 2012-04-11 18:44:12 2068 2072 "KERN_INFO: [21877.976222] esp[19141]: segfault at 0 ip 026c4ea3 sp 019ce950 error 4 in libws_workunits.so[2114000+6b4000]"\\n00000565 2012-04-11 18:45:11 2068 2231 "PING: 0 replies received, average delay 0"\\n00000566 2012-04-11 18:45:12 2068 2072 "SYS: PU= 40% MU= 34% MAL=1101825960 MMP=1074794496 SBK=27031464 TOT=1076776K RAM=200132K SWP=0K"\\n00000567 2012-04-11 18:45:12 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=8.8 kw/s=1287.7 bsy=29 NIC: rxp/s=5.3 rxk/s=0.8 txp/s=4.1 txk/s=2.0 CPU: usr=11 sys=10 iow=14 idle=62"\\n00000568 2012-04-11 18:45:12 2068 2072 "KERN_INFO: [21973.137996] esp[19334]: segfault at 0 ip 01d1aea3 sp b4ee4950 error 4 in libws_workunits.so[176a000+6b4000]"\\n00000569 2012-04-11 18:45:29 2068 2230 "roxie: Dequeued workunit request 'W20120411-184525'"\\n0000056A 2012-04-11 18:45:29 2068 4660 "Loading dll (libW20120411-184525.so) from location /var/lib/HPCCSystems/queries/myroxie//libW20120411-184525.so"\\n0000056B 2012-04-11 18:45:29 2068 19377 "RoxieMemMgr: Heap size 1024 pages, 1023 free, largest block 1023, heapLWM 0, dataBuffersActive=0, dataBufferPages=0"\\n0000056C 2012-04-11 18:45:29 2068 2192 "ERROR: Query W20120411-184525 suspended: Different version of bible::key already loaded: sizes = 5128192 3170304 Date = "\\n0000056D 2012-04-11 18:45:29 2068 2192 "Backtrace:"\\n0000056E 2012-04-11 18:45:29 2068 2192 " /opt/HPCCSystems/lib/libjlib.so(_Z16PrintStackReportv+0x3b) [0x1aee0b]"\\n0000056F 2012-04-11 18:45:29 2068 2192 " /opt/HPCCSystems/lib/libjlib.so(_Z20RaiseAssertExceptionPKcS0_j+0x1c) [0x1b091c]"\\n00000570 2012-04-11 18:45:29 2068 2192 " /opt/HPCCSystems/lib/libccd.so(_ZN12CRoxieWorker10doActivityEv+0x6b8) [0xd93d38]"\\n00000571 2012-04-11 18:45:29 2068 2192 " /opt/HPCCSystems/lib/libccd.so(_ZN12CRoxieWorker4mainEv+0x19e) [0xd94a6e]"\\n00000572 2012-04-11 18:45:29 2068 2192 " /opt/HPCCSystems/lib/libjlib.so(_ZN20CPooledThreadWrapper3runEv+0x7e) [0x25ac8e]"\\n00000573 2012-04-11 18:45:29 2068 2192 " /opt/HPCCSystems/lib/libjlib.so(_ZN6Thread5beginEv+0x42) [0x2597c2]"\\n00000574 2012-04-11 18:45:29 2068 2192 " /opt/HPCCSystems/lib/libjlib.so(_ZN6Thread11_threadmainEPv+0x27) [0x2574f7]"\\n00000575 2012-04-11 18:45:29 2068 2192 " /lib/tls/i686/cmov/libpthread.so.0(+0x596e) [0xf2d96e]"\\n00000576 2012-04-11 18:45:29 2068 2192 " /lib/tls/i686/cmov/libc.so.6(clone+0x5e) [0x4b22a4e]"\\n00000577 2012-04-11 18:45:29 2068 2192 "ERROR: assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095"\\n00000578 2012-04-11 18:45:29 2068 2192 "[(null)] ERROR: 3000: assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095: Exception in slave for packet uid=0x00000013 activityId=29 pri=LOW queryHash=521baa39b1f28f34 ch=1 seq=0 cont=0 server=192.168.217.128"\\n00000579 2012-04-11 18:45:29 2068 19379 "[W20120411-184525@29] Multiple exceptions on query - aborting"\\n0000057A 2012-04-11 18:45:29 2068 19379 "[W20120411-184525@29] makeWrappedException - assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095 (in Index Read 29)"\\n0000057B 2012-04-11 18:45:29 2068 4660 "[W20120411-184525] Exception thrown in query - cleaning up: 3000: assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095 (in Index Read 29)"\\n0000057C 2012-04-11 18:45:29 2068 4660 "[W20120411-184525] Done cleaning up"\\n0000057D 2012-04-11 18:45:29 2068 4660 "[(null)] ERROR: 3000: assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095 (in Index Read 29) (in item 2)"\\n0000057E 2012-04-11 18:45:29 2068 4660 "ERROR: 3000 - assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095 (in Index Read 29)"\\n0000057F 2012-04-11 18:45:29 2068 4660 "[W20120411-184525] FAILED: W20120411-184525"\\n00000580 2012-04-11 18:45:29 2068 4660 "[W20120411-184525] EXCEPTION: assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095 (in Index Read 29)"\\n00000581 2012-04-11 18:45:29 2068 4660 "ERROR: 3000 - assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095 (in Index Read 29)"\\n00000582 2012-04-11 18:45:29 2068 2195 "[W20120411-184525] Unload received for channel 1"\\n00000583 2012-04-11 18:46:11 2068 2231 "PING: 0 replies received, average delay 0"\\n00000584 2012-04-11 18:46:12 2068 2072 "SYS: PU= 14% MU= 34% MAL=1101828120 MMP=1074794496 SBK=27033624 TOT=1076776K RAM=198136K SWP=0K"\\n00000585 2012-04-11 18:46:12 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=3.9 kw/s=168.2 bsy=7 NIC: rxp/s=3.7 rxk/s=2.0 txp/s=2.6 txk/s=0.4 CPU: usr=2 sys=7 iow=4 idle=86"\\n00000586 2012-04-11 18:46:12 2068 2072 "KERN_INFO: [21980.994921] esp[19349]: segfault at 0 ip 026b1ea3 sp b5876950 error 4 in libws_workunits.so[2101000+6b4000]"\\n00000587 2012-04-11 18:47:11 2068 2231 "PING: 0 replies received, average delay 0"\\n00000588 2012-04-11 18:47:12 2068 2072 "SYS: PU= 9% MU= 34% MAL=1101829696 MMP=1074794496 SBK=27035200 TOT=1076776K RAM=198156K SWP=0K"\\n00000589 2012-04-11 18:47:12 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.5 kw/s=16.9 bsy=6 NIC: rxp/s=2.8 rxk/s=0.5 txp/s=1.8 txk/s=0.4 CPU: usr=0 sys=2 iow=5 idle=92"\\n0000058A 2012-04-11 18:47:16 2068 2230 "roxie: Dequeued workunit request 'W20120411-184712'"\\n0000058B 2012-04-11 18:47:16 2068 4660 "Loading dll (libW20120411-184712.so) from location /var/lib/HPCCSystems/queries/myroxie//libW20120411-184712.so"\\n0000058C 2012-04-11 18:47:16 2068 19422 "RoxieMemMgr: Heap size 1024 pages, 1023 free, largest block 1023, heapLWM 0, dataBuffersActive=0, dataBufferPages=0"\\n0000058D 2012-04-11 18:47:16 2068 2198 "ERROR: Query W20120411-184712 suspended: Different version of bible::key already loaded: sizes = 5128192 3170304 Date = "\\n0000058E 2012-04-11 18:47:16 2068 2198 "Backtrace:"\\n0000058F 2012-04-11 18:47:16 2068 2198 " /opt/HPCCSystems/lib/libjlib.so(_Z16PrintStackReportv+0x3b) [0x1aee0b]"\\n00000590 2012-04-11 18:47:16 2068 2198 " /opt/HPCCSystems/lib/libjlib.so(_Z20RaiseAssertExceptionPKcS0_j+0x1c) [0x1b091c]"\\n00000591 2012-04-11 18:47:16 2068 2198 " /opt/HPCCSystems/lib/libccd.so(_ZN12CRoxieWorker10doActivityEv+0x6b8) [0xd93d38]"\\n00000592 2012-04-11 18:47:16 2068 2198 " /opt/HPCCSystems/lib/libccd.so(_ZN12CRoxieWorker4mainEv+0x19e) [0xd94a6e]"\\n00000593 2012-04-11 18:47:16 2068 2198 " /opt/HPCCSystems/lib/libjlib.so(_ZN20CPooledThreadWrapper3runEv+0x7e) [0x25ac8e]"\\n00000594 2012-04-11 18:47:16 2068 2198 " /opt/HPCCSystems/lib/libjlib.so(_ZN6Thread5beginEv+0x42) [0x2597c2]"\\n00000595 2012-04-11 18:47:16 2068 2198 " /opt/HPCCSystems/lib/libjlib.so(_ZN6Thread11_threadmainEPv+0x27) [0x2574f7]"\\n00000596 2012-04-11 18:47:16 2068 2198 " /lib/tls/i686/cmov/libpthread.so.0(+0x596e) [0xf2d96e]"\\n00000597 2012-04-11 18:47:16 2068 2198 " /lib/tls/i686/cmov/libc.so.6(clone+0x5e) [0x4b22a4e]"\\n00000598 2012-04-11 18:47:16 2068 2198 "ERROR: assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095"\\n00000599 2012-04-11 18:47:16 2068 2198 "[(null)] ERROR: 3000: assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095: Exception in slave for packet uid=0x00000015 activityId=29 pri=LOW queryHash=a5a45e58b1f42f98 ch=1 seq=0 cont=0 server=192.168.217.128"\\n0000059A 2012-04-11 18:47:16 2068 19424 "[W20120411-184712@29] Multiple exceptions on query - aborting"\\n0000059B 2012-04-11 18:47:16 2068 19424 "[W20120411-184712@29] makeWrappedException - assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095 (in Index Read 29)"\\n0000059C 2012-04-11 18:47:16 2068 4660 "[W20120411-184712] Exception thrown in query - cleaning up: 3000: assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095 (in Index Read 29)"\\n0000059D 2012-04-11 18:47:16 2068 4660 "[W20120411-184712] Done cleaning up"\\n0000059E 2012-04-11 18:47:16 2068 4660 "[(null)] ERROR: 3000: assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095 (in Index Read 29) (in item 2)"\\n0000059F 2012-04-11 18:47:16 2068 4660 "ERROR: 3000 - assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095 (in Index Read 29)"\\n000005A0 2012-04-11 18:47:16 2068 4660 "[W20120411-184712] FAILED: W20120411-184712"\\n000005A1 2012-04-11 18:47:16 2068 4660 "[W20120411-184712] EXCEPTION: assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095 (in Index Read 29)"\\n000005A2 2012-04-11 18:47:16 2068 4660 "ERROR: 3000 - assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095 (in Index Read 29)"\\n000005A3 2012-04-11 18:47:16 2068 2201 "[W20120411-184712] Unload received for channel 1"\\n000005A4 2012-04-11 18:48:12 2068 2231 "PING: 0 replies received, average delay 0"\\n000005A5 2012-04-11 18:48:12 2068 2072 "SYS: PU= 12% MU= 34% MAL=1101829160 MMP=1074794496 SBK=27034664 TOT=1076776K RAM=198324K SWP=0K"\\n000005A6 2012-04-11 18:48:12 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=2.4 kw/s=31.7 bsy=3 NIC: rxp/s=3.3 rxk/s=1.9 txp/s=2.0 txk/s=0.4 CPU: usr=1 sys=5 iow=3 idle=88"\\n000005A7 2012-04-11 18:49:12 2068 2231 "PING: 0 replies received, average delay 0"\\n000005A8 2012-04-11 18:49:12 2068 2072 "SYS: PU= 8% MU= 34% MAL=1101829160 MMP=1074794496 SBK=27034664 TOT=1076776K RAM=198472K SWP=0K"\\n000005A9 2012-04-11 18:49:12 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.8 kw/s=17.8 bsy=6 NIC: rxp/s=1.0 rxk/s=0.1 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=1 iow=5 idle=93"\\n000005AA 2012-04-11 18:50:12 2068 2231 "PING: 0 replies received, average delay 0"\\n000005AB 2012-04-11 18:50:12 2068 2072 "SYS: PU= 5% MU= 34% MAL=1101829160 MMP=1074794496 SBK=27034664 TOT=1076776K RAM=198484K SWP=0K"\\n000005AC 2012-04-11 18:50:12 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.4 kw/s=15.6 bsy=2 NIC: rxp/s=0.4 rxk/s=0.0 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=1 iow=1 idle=96"\\n000005AD 2012-04-11 18:51:12 2068 2231 "PING: 0 replies received, average delay 0"\\n000005AE 2012-04-11 18:51:12 2068 2072 "SYS: PU= 8% MU= 34% MAL=1101829160 MMP=1074794496 SBK=27034664 TOT=1076776K RAM=198628K SWP=0K"\\n000005AF 2012-04-11 18:51:12 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.4 kw/s=17.1 bsy=5 NIC: rxp/s=0.8 rxk/s=0.1 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=1 iow=5 idle=92"\\n000005B0 2012-04-11 18:52:12 2068 2231 "PING: 0 replies received, average delay 0"\\n000005B1 2012-04-11 18:52:12 2068 2072 "SYS: PU= 8% MU= 34% MAL=1101829160 MMP=1074794496 SBK=27034664 TOT=1076776K RAM=200268K SWP=0K"\\n000005B2 2012-04-11 18:52:12 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.6 kw/s=16.7 bsy=5 NIC: rxp/s=5.6 rxk/s=0.7 txp/s=6.1 txk/s=4.9 CPU: usr=0 sys=2 iow=4 idle=93"\\n000005B3 2012-04-11 18:53:12 2068 2231 "PING: 0 replies received, average delay 0"\\n000005B4 2012-04-11 18:53:12 2068 2072 "SYS: PU= 9% MU= 34% MAL=1101829160 MMP=1074794496 SBK=27034664 TOT=1076776K RAM=201612K SWP=0K"\\n000005B5 2012-04-11 18:53:12 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.6 kw/s=18.0 bsy=6 NIC: rxp/s=1.8 rxk/s=0.2 txp/s=0.9 txk/s=0.4 CPU: usr=0 sys=1 iow=6 idle=91"\\n000005B6 2012-04-11 18:54:12 2068 2231 "PING: 0 replies received, average delay 0"\\n000005B7 2012-04-11 18:54:12 2068 2072 "SYS: PU= 6% MU= 34% MAL=1101829160 MMP=1074794496 SBK=27034664 TOT=1076776K RAM=201636K SWP=0K"\\n000005B8 2012-04-11 18:54:12 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.5 kw/s=16.0 bsy=4 NIC: rxp/s=1.3 rxk/s=0.2 txp/s=0.5 txk/s=0.1 CPU: usr=0 sys=1 iow=4 idle=93"\\n000005B9 2012-04-11 18:55:12 2068 2231 "PING: 0 replies received, average delay 0"\\n000005BA 2012-04-11 18:55:12 2068 2072 "SYS: PU= 6% MU= 34% MAL=1101829160 MMP=1074794496 SBK=27034664 TOT=1076776K RAM=201768K SWP=0K"\\n000005BB 2012-04-11 18:55:12 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.4 kw/s=16.9 bsy=3 NIC: rxp/s=1.3 rxk/s=0.2 txp/s=0.5 txk/s=0.1 CPU: usr=0 sys=1 iow=3 idle=94"\\n000005BC 2012-04-11 20:01:42 2068 2231 "PING: 0 replies received, average delay 0"\\n000005BD 2012-04-11 20:01:43 2068 2072 "SYS: PU= 26% MU= 34% MAL=1101829160 MMP=1074794496 SBK=27034664 TOT=1076776K RAM=146900K SWP=0K"\\n000005BE 2012-04-11 20:01:43 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.2 w/s=3.5 kw/s=35.1 bsy=7 NIC: rxp/s=7.9 rxk/s=1.0 txp/s=9.2 txk/s=6.9 CPU: usr=0 sys=5 iow=2 idle=90"\\n000005BF 2012-04-11 20:01:43 2068 2072 "KERN_DEBUG: [22608.359916] eth0: no IPv6 routers present"\\n000005C0 2012-04-11 20:02:42 2068 2231 "PING: 0 replies received, average delay 0"\\n000005C1 2012-04-11 20:02:43 2068 2072 "SYS: PU= 5% MU= 34% MAL=1101829160 MMP=1074794496 SBK=27034664 TOT=1076776K RAM=146748K SWP=0K"\\n000005C2 2012-04-11 20:02:43 2068 2072 "DSK: [sda] r/s=0.2 kr/s=2.4 w/s=2.3 kw/s=21.4 bsy=1 NIC: rxp/s=2.3 rxk/s=0.3 txp/s=1.6 txk/s=0.5 CPU: usr=0 sys=2 iow=0 idle=96"\\n000005C3 2012-04-11 20:03:42 2068 2231 "PING: 0 replies received, average delay 0"\\n000005C4 2012-04-11 20:03:43 2068 2072 "SYS: PU= 6% MU= 34% MAL=1101829160 MMP=1074794496 SBK=27034664 TOT=1076776K RAM=147640K SWP=0K"\\n000005C5 2012-04-11 20:03:43 2068 2072 "DSK: [sda] r/s=0.2 kr/s=2.1 w/s=2.1 kw/s=26.6 bsy=1 NIC: rxp/s=1.1 rxk/s=0.2 txp/s=0.7 txk/s=0.1 CPU: usr=0 sys=3 iow=0 idle=95"\\n000005C6 2012-04-11 20:04:42 2068 2231 "PING: 0 replies received, average delay 0"\\n000005C7 2012-04-11 20:04:43 2068 2072 "SYS: PU= 4% MU= 34% MAL=1101829160 MMP=1074794496 SBK=27034664 TOT=1076776K RAM=147648K SWP=0K"\\n000005C8 2012-04-11 20:04:43 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.7 kw/s=17.9 bsy=0 NIC: rxp/s=1.3 rxk/s=0.2 txp/s=0.5 txk/s=0.1 CPU: usr=0 sys=3 iow=0 idle=95"\\n000005C9 2012-04-11 20:05:42 2068 2231 "PING: 0 replies received, average delay 0"\\n000005CA 2012-04-11 20:05:43 2068 2072 "SYS: PU= 5% MU= 34% MAL=1101829160 MMP=1074794496 SBK=27034664 TOT=1076776K RAM=147560K SWP=0K"\\n000005CB 2012-04-11 20:05:43 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.8 kw/s=19.2 bsy=0 NIC: rxp/s=3.5 rxk/s=0.5 txp/s=3.5 txk/s=2.5 CPU: usr=0 sys=2 iow=0 idle=96"\\n000005CC 2012-04-11 20:06:42 2068 2231 "PING: 0 replies received, average delay 0"\\n000005CD 2012-04-11 20:06:43 2068 2072 "SYS: PU= 6% MU= 34% MAL=1101829160 MMP=1074794496 SBK=27034664 TOT=1076776K RAM=147680K SWP=0K"\\n000005CE 2012-04-11 20:06:43 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.4 kw/s=17.0 bsy=0 NIC: rxp/s=1.5 rxk/s=0.2 txp/s=0.5 txk/s=0.1 CPU: usr=0 sys=2 iow=0 idle=96"\\n000005CF 2012-04-11 20:07:42 2068 2231 "PING: 0 replies received, average delay 0"\\n000005D0 2012-04-11 20:07:43 2068 2072 "SYS: PU= 6% MU= 34% MAL=1101829160 MMP=1074794496 SBK=27034664 TOT=1076776K RAM=147816K SWP=0K"\\n000005D1 2012-04-11 20:07:43 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.7 kw/s=17.8 bsy=0 NIC: rxp/s=1.1 rxk/s=0.2 txp/s=0.6 txk/s=0.1 CPU: usr=0 sys=2 iow=0 idle=96"\\n000005D2 2012-04-11 20:08:42 2068 2231 "PING: 0 replies received, average delay 0"\\n000005D3 2012-04-11 20:08:43 2068 2072 "SYS: PU= 8% MU= 34% MAL=1101829168 MMP=1074794496 SBK=27034672 TOT=1076776K RAM=148064K SWP=0K"\\n000005D4 2012-04-11 20:08:43 2068 2072 "DSK: [sda] r/s=0.1 kr/s=0.3 w/s=1.5 kw/s=18.1 bsy=0 NIC: rxp/s=1.9 rxk/s=0.3 txp/s=0.9 txk/s=0.4 CPU: usr=0 sys=4 iow=0 idle=95"\\n000005D5 2012-04-11 20:09:42 2068 2231 "PING: 0 replies received, average delay 0"\\n000005D6 2012-04-11 20:09:43 2068 2072 "SYS: PU= 28% MU= 34% MAL=1101829168 MMP=1074794496 SBK=27034672 TOT=1076776K RAM=149932K SWP=0K"\\n000005D7 2012-04-11 20:09:43 2068 2072 "DSK: [sda] r/s=26.3 kr/s=185.9 w/s=2.2 kw/s=46.1 bsy=17 NIC: rxp/s=1.1 rxk/s=0.2 txp/s=0.5 txk/s=0.1 CPU: usr=0 sys=10 iow=16 idle=73"\\n000005D8 2012-04-11 20:10:42 2068 2231 "PING: 0 replies received, average delay 0"\\n000005D9 2012-04-11 20:10:43 2068 2072 "SYS: PU= 13% MU= 34% MAL=1101829168 MMP=1074794496 SBK=27034672 TOT=1076776K RAM=150076K SWP=0K"\\n000005DA 2012-04-11 20:10:43 2068 2072 "DSK: [sda] r/s=6.9 kr/s=73.4 w/s=1.7 kw/s=47.8 bsy=5 NIC: rxp/s=1.3 rxk/s=0.2 txp/s=0.5 txk/s=0.1 CPU: usr=0 sys=11 iow=0 idle=88"\\n000005DB 2012-04-11 20:11:42 2068 2231 "PING: 0 replies received, average delay 0"\\n000005DC 2012-04-11 20:11:43 2068 2072 "SYS: PU= 5% MU= 34% MAL=1101829176 MMP=1074794496 SBK=27034680 TOT=1076776K RAM=150092K SWP=0K"\\n000005DD 2012-04-11 20:11:43 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.5 kw/s=17.9 bsy=0 NIC: rxp/s=1.2 rxk/s=0.2 txp/s=0.6 txk/s=0.1 CPU: usr=0 sys=2 iow=0 idle=96"\\n000005DE 2012-04-11 20:12:42 2068 2231 "PING: 0 replies received, average delay 0"\\n000005DF 2012-04-11 20:12:43 2068 2072 "SYS: PU= 4% MU= 34% MAL=1101829176 MMP=1074794496 SBK=27034680 TOT=1076776K RAM=151248K SWP=0K"\\n000005E0 2012-04-11 20:12:43 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=2.1 kw/s=20.8 bsy=0 NIC: rxp/s=1.5 rxk/s=0.2 txp/s=1.0 txk/s=0.2 CPU: usr=0 sys=2 iow=0 idle=97"\\n000005E1 2012-04-11 20:13:42 2068 2231 "PING: 0 replies received, average delay 0"\\n000005E2 2012-04-11 20:13:43 2068 2072 "SYS: PU= 7% MU= 34% MAL=1101829176 MMP=1074794496 SBK=27034680 TOT=1076776K RAM=151260K SWP=0K"\\n000005E3 2012-04-11 20:13:43 2068 2072 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=1.7 kw/s=18.7 bsy=0 NIC: rxp/s=1.2 rxk/s=0.2 txp/s=0.5 txk/s=0.1 CPU: usr=0 sys=3 iow=0 idle=96"\\n
\", \"post_time\": \"2012-04-11 20:17:38\" },\n\t{ \"post_id\": 1501, \"topic_id\": 332, \"forum_id\": 10, \"post_subject\": \"Re: CPP Assertion on Roxie.\", \"username\": \"bforeman\", \"post_text\": \"I believe the logs that development is looking for is in:\\n\\n/var/log/HPCCSystems/myroxie\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-04-11 20:17:18\" },\n\t{ \"post_id\": 1500, \"topic_id\": 332, \"forum_id\": 10, \"post_subject\": \"Re: CPP Assertion on Roxie.\", \"username\": \"Allan\", \"post_text\": \"Err - Where are the Roxie logs?\\n\\nUnlike Thor builds I don't seem to get a list of files under 'Helpers' with the Roxie Workunit.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-04-11 20:07:24\" },\n\t{ \"post_id\": 1496, \"topic_id\": 332, \"forum_id\": 10, \"post_subject\": \"Re: CPP Assertion on Roxie.\", \"username\": \"bforeman\", \"post_text\": \"The assertion from Roxie is a concern. Can you attach the roxie logs??\", \"post_time\": \"2012-04-11 18:54:57\" },\n\t{ \"post_id\": 1493, \"topic_id\": 332, \"forum_id\": 10, \"post_subject\": \"Re: CPP Assertion on Roxie.\", \"username\": \"Allan\", \"post_text\": \"Thanks Bob,\\n\\nYes the wrong compiler was being referenced in the preferences.\\n\\nHowever I still get the same assertion. But the message about minor version numbers has changed.\\n\\nWarning: Mismatch in minor version number (3.6.0 v 3.6.0) (0, 0), \\n
\\n\\nI clicked the install version 6.6.0 button for good measure.\\nYours\\n\\nAllan\", \"post_time\": \"2012-04-11 18:50:48\" },\n\t{ \"post_id\": 1490, \"topic_id\": 332, \"forum_id\": 10, \"post_subject\": \"Re: CPP Assertion on Roxie.\", \"username\": \"bforeman\", \"post_text\": \"Hi Allan,\\n\\nCheck your preferences and the Compiler tab and verify that your compiler is pointing to the 3.6 folder.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-04-11 18:23:30\" },\n\t{ \"post_id\": 1475, \"topic_id\": 332, \"forum_id\": 10, \"post_subject\": \"CPP Assertion on Roxie.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI've upgraded to 6.6.0.1001.682.1 and am now getting this assertion submitting a workunit to Roxie.\\n\\n\\nError: assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095 (in Index Read 29) (0, 0), 3000, \\nError: assert(factory) failed - file: /var/jenkins/workspace/CE-Candidate-3.6.0/CE/Ubuntu-10.04-i386/HPCC-Platform/roxie/ccd/ccdqueue.cpp, line 1095 (in Index Read 29) (0, 0), 3000, \\nWarning: Mismatch in minor version number (3.0.0 v 3.6.0) (0, 0), 3118, unknown\\nWarning: Neither LIMIT() nor CHOOSEN() supplied for index read on '~Bible::Key' (61, 41), 4523, C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\Bible\\\\Inversion.ecl\\nWarning: Implicit LIMIT(10000) added to keyed join '~Bible::Key' (62, 33), 4522, C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\Bible\\\\Inversion.ecl\\nWarning: keyed filter on word follows unkeyed component testament in the key '~Bible::Key' (62, 33), 4515, C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\Bible\\\\Inversion.ecl\\nWarning: keyed filter on basepos follows unkeyed component word_pos in the key '~Bible::Key' (62, 33), 4515, C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\Bible\\\\Inversion.ecl\\nWarning: keyed filter on basepos follows unkeyed component word_pos in the key '~Bible::Key' (72, 25), 4515, C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\Bible\\\\Inversion.ecl\\nWarning: Neither LIMIT() nor CHOOSEN() supplied for index read on '~Bible::Key' (72, 25), 4523, C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\Bible\\\\Inversion.ecl\\n
\\n\\nThis does not assert if my target is hthor.\\n\\nAlso a bit concerned about the warning 'Mismatch in minor version number',\\nthat's also produced when I build the keys on Thor though the workunit itself succeeds.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-04-10 18:16:27\" },\n\t{ \"post_id\": 1495, \"topic_id\": 335, \"forum_id\": 10, \"post_subject\": \"Re: coping with different data layouts from one data source?\", \"username\": \"Allan\", \"post_text\": \"Thanks for the pointer - I will read the manual.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-04-11 18:54:01\" },\n\t{ \"post_id\": 1485, \"topic_id\": 335, \"forum_id\": 10, \"post_subject\": \"Re: coping with different data layouts from one data source?\", \"username\": \"dabayliss\", \"post_text\": \"The record structure allows for an IFBLOCk - for exactly this reason. Check out 'structures' in the docs\", \"post_time\": \"2012-04-11 15:28:05\" },\n\t{ \"post_id\": 1480, \"topic_id\": 335, \"forum_id\": 10, \"post_subject\": \"coping with different data layouts from one data source?\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nHow would one cope with different types of records presented in one logical CSV file?\\n \\nThe type of record would be identified by the content of one field common to all record types.\\n\\nI thought of using a MAP but that must return the same record type from all is cases.\\n\\nOne could pre-process the file with something like 'awk' to split into multiple files but that's working round the problem rather than confronting it.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-04-11 09:44:11\" },\n\t{ \"post_id\": 1499, \"topic_id\": 336, \"forum_id\": 10, \"post_subject\": \"Re: Re-partitioning sorted data\", \"username\": \"bforeman\", \"post_text\": \"So if you re-SORT with the LOCAL attribute after you DISTRIBUTE, would that satify what you need to do?\", \"post_time\": \"2012-04-11 19:26:57\" },\n\t{ \"post_id\": 1498, \"topic_id\": 336, \"forum_id\": 10, \"post_subject\": \"Re: Re-partitioning sorted data\", \"username\": \"dustinskaggs\", \"post_text\": \"In your example, the DISTRIBUTE reorders the records. My understanding of DISTRIBUTE is that for the form DISTRIBUTE(dataset, expression), it will NEVER make any guarantees as to maintaining the order of the input dataset.\", \"post_time\": \"2012-04-11 19:21:42\" },\n\t{ \"post_id\": 1497, \"topic_id\": 336, \"forum_id\": 10, \"post_subject\": \"Re: Re-partitioning sorted data\", \"username\": \"bforeman\", \"post_text\": \"This seems to do the trick, but maybe I'm oversimplifying?\\n\\nMyRec := RECORD\\n\\tSTRING1 Value1;\\n\\tSTRING1 Value2;\\nEND;\\n\\n SomeFile := DATASET([{'C','G'},\\n \\t {'C','C'},\\n \\t {'A','X'},\\n \\t {'B','G'},\\n \\t {'A','B'}],MyRec);\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t \\nSDS := SORT(SomeFile,Value1);\\n\\nOUTPUT(SDS);\\n\\nDSDS := DISTRIBUTE(SDS,HASH(Value1));\\n\\nOUTPUT(DSDS);
\", \"post_time\": \"2012-04-11 19:11:50\" },\n\t{ \"post_id\": 1494, \"topic_id\": 336, \"forum_id\": 10, \"post_subject\": \"Re: Re-partitioning sorted data\", \"username\": \"bforeman\", \"post_text\": \"Have you tried DISTRIBUTE using the actual sort value as the expression? I think that will not affect the order.\", \"post_time\": \"2012-04-11 18:51:05\" },\n\t{ \"post_id\": 1492, \"topic_id\": 336, \"forum_id\": 10, \"post_subject\": \"Re: Re-partitioning sorted data\", \"username\": \"dustinskaggs\", \"post_text\": \"I don't see how a DISTRIBUTE can be used without it reordering my records. I need the records in the dataset to stay in the same order, but with just the partition points shifted.\", \"post_time\": \"2012-04-11 18:38:25\" },\n\t{ \"post_id\": 1491, \"topic_id\": 336, \"forum_id\": 10, \"post_subject\": \"Re: Re-partitioning sorted data\", \"username\": \"bforeman\", \"post_text\": \"Have you looked at the DISTRIBUTE statement? Using DISTRIBUTE with HASH based on the sorted order should help to reduce your skew.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-04-11 18:25:36\" },\n\t{ \"post_id\": 1488, \"topic_id\": 336, \"forum_id\": 10, \"post_subject\": \"Re-partitioning sorted data\", \"username\": \"dustinskaggs\", \"post_text\": \"If I have a dataset that is in sort order but has a high amount of skew, what is the most efficient way to lessen the amount of skew while maintaining the sort order? For my current issue, calling SORT will produce the result I want but it seems inefficient to SORT something that is already in sort order.\", \"post_time\": \"2012-04-11 15:39:45\" },\n\t{ \"post_id\": 1650, \"topic_id\": 362, \"forum_id\": 10, \"post_subject\": \"Re: Parsing a pattern that spans multiple lines\", \"username\": \"rtaylor\", \"post_text\": \"John,\\n\\nSorry, I missed this:Finally, I am checking for a space in the first column, but what if there is a tab? Is there some magic whitespace test I can use instead?
You'll need to examine your data to see if there are any tabs or other whitespace characters besides blank in that first position and specifically code for them.\\n\\nOR, since it appears that the first rec in a multi-line block always starts with a current date, you could instead do the negative test (like: line[1] <> '2' -- which should work for the next ~1000 years ) to detect a continuation rec.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-05-24 13:16:49\" },\n\t{ \"post_id\": 1649, \"topic_id\": 362, \"forum_id\": 10, \"post_subject\": \"Re: Parsing a pattern that spans multiple lines\", \"username\": \"rtaylor\", \"post_text\": \"John,\\n\\nShort answer = yes.
\\n\\nLong answer = ROLLUP takes in records in the order they appear in the dataset, only comparing contiguous records for possible matches. Therefore, your ROLLUP code, as written, will always append the subsequent "block" records to the end of the existing "block" records. Run your code against the example dataset I used in my code and you'll quickly see exactly what it will do.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-05-24 13:10:25\" },\n\t{ \"post_id\": 1646, \"topic_id\": 362, \"forum_id\": 10, \"post_subject\": \"Re: Parsing a pattern that spans multiple lines\", \"username\": \"jandleman\", \"post_text\": \"I have managed to take what David and Richard taught me about ROLLUP, and I have created a procedure that will first rollup and then parse. I would still like to know if it is possible to ensure that rolled up rows will be apended in the order they appear in the source file. Here is my procedure:\\n\\n
\\nLogFileLayout := record \\n string rawText;\\nend;\\n\\nFile_Log := dataset('~col::jsa::contin', LogFileLayout,CSV (NOTRIM) );\\n \\n\\t\\t\\t\\t\\t\\t\\t\\t \\nPATTERN ws := pattern('[ ]')+;\\nPATTERN txt := ANY+;\\nPATTERN nbr := PATTERN('[0-9]');\\nPATTERN date := REPEAT(nbr,4) '-' REPEAT(nbr,2) '-' REPEAT(nbr,2); \\nPATTERN time := REPEAT(nbr,2) ':' REPEAT(nbr,2) ':' REPEAT(nbr,2); \\nPATTERN match := OPT(date ws time) ws txt;\\n\\n\\nLogFileLayout XF2(LogFileLayout L, LogFileLayout R) := TRANSFORM\\n SELF.rawText := L.rawText + ' ' + R.rawText;\\n SELF := L; \\nEND;\\n\\nR := ROLLUP(File_Log,LEFT.rawText[1] != ' ' AND RIGHT.rawText[1] = ' ',XF2(LEFT,RIGHT));\\n\\nR;\\n\\nOutRec := record\\n string date := MATCHTEXT(date);\\n string time := MATCHTEXT(time);\\n string RestOfStuff := MATCHTEXT(txt);\\nEND;\\n\\nO := parse(R,R.rawText,match,OutRec,FIRST,SCAN);\\n\\nO ;\\n
\\n\\nFinally, I am checking for a space in the first column, but what if there is a tab? Is there some magic whitespace test I can use instead?\\n\\nThanks,\\nJohn\", \"post_time\": \"2012-05-24 07:06:11\" },\n\t{ \"post_id\": 1643, \"topic_id\": 362, \"forum_id\": 10, \"post_subject\": \"Re: Parsing a pattern that spans multiple lines\", \"username\": \"jandleman\", \"post_text\": \"This works well, although ideally, I would like to merge all of the parts and then parse. I think in some cases the meanings of the child rows will get derive some their meaning from the context. I'm experimenting that now. I do have this question: Will the rolled-up records (right) always be appended to the left in the order they appear in the input dataset, or is that not guaranteed?\\n\\nThanks,\\nJohn\", \"post_time\": \"2012-05-24 03:14:39\" },\n\t{ \"post_id\": 1627, \"topic_id\": 362, \"forum_id\": 10, \"post_subject\": \"Re: Parsing a pattern that spans multiple lines\", \"username\": \"rtaylor\", \"post_text\": \"John,\\n\\nDavid's ROLLUP suggestion is meant to be a post-process to the PARSE, like this:ds := dataset([\\n{'2012-05-14 08:09:05 &X=<Notification> Broker::authUser() - call succeeded ' },\\n{'2012-05-14 08:10:05 &X=<Notification> Broker::authUser() - call succeeded '},\\n{'2012-05-14 08:11:05 &X=<Notification> Broker::authUser() - call failed (2173)'},\\n{' (2173) "broker error code"'},\\n{' Personal Paid Plan Expired'},\\n{'2012-05-14 08:19:05 &X=<Notification> Broker::authUser() - call succeeded '}],{string line});\\n\\t\\t\\t\\nPATTERN ws := pattern('[ ]')+;\\nPATTERN txt := ANY+;\\nPATTERN nbr := PATTERN('[0-9]');\\nPATTERN date := REPEAT(nbr,4) '-' REPEAT(nbr,2) '-' REPEAT(nbr,2); \\t\\t\\t\\nPATTERN time := REPEAT(nbr,2) ':' REPEAT(nbr,2) ':' REPEAT(nbr,2); \\t\\t\\t\\nPATTERN match := OPT(date ws time) ws txt;\\n\\nOutrec := RECORD\\n UNSIGNED1 RollFactor; //this is what makes the ROLLUP work\\n STRING10 date;\\n STRING8 time;\\n STRING RestOfStuff;\\t\\nEND;\\n\\nOutRec XF1(ds L) := TRANSFORM\\n SELF.date := MATCHTEXT(date);\\n SELF.time := MATCHTEXT(time);\\n SELF.RestOfStuff := MATCHTEXT(txt);\\n SELF.RollFactor := IF(MATCHED(date),1,2); //1=start rec, 2=continuation rec\\nEND;\\n\\nP := PARSE(ds,line,match,XF1(LEFT),FIRST);\\n\\nP;\\n\\nOutRec XF2(OutRec L, OutRec R) := TRANSFORM\\n SELF.RestOfStuff := L.RestOfStuff + ' ' + R.RestOfStuff;\\n SELF := L; \\nEND;\\n\\nR := ROLLUP(P,LEFT.RollFactor = 1 AND RIGHT.RollFactor = 2,XF2(LEFT,RIGHT));\\n\\nR;
HTH,\\n\\nRichard\", \"post_time\": \"2012-05-21 15:32:36\" },\n\t{ \"post_id\": 1623, \"topic_id\": 362, \"forum_id\": 10, \"post_subject\": \"Re: Parsing a pattern that spans multiple lines\", \"username\": \"jandleman\", \"post_text\": \"Hi David and Richard,\\n\\nActually, after looking further, I discovered a general rule. When a message is continued on subsequent lines, the continuation lines are indented 8 spaces. There may be 1, or 2, or more continuation lines. I took a look at the rollup functionality in the documentation, and I'm not sure quite how to start. I don't know when a line is going to be continued, but I know what a continuation line looks like. I am well versed in SQL, so this to me is like a 1:many outer join (within the same "table"). Can I do that with rollup? Do you have any examples? There are some rollup examples in the ECL manual, but they are different situations. Ideally, what I want is something that would append continuation lines onto the end of the parent line, hopefully in the same order. That would make parsing easier, and would work well in the logic flow I have already developed.\\n\\nThanks,\\nJohn\", \"post_time\": \"2012-05-20 16:05:14\" },\n\t{ \"post_id\": 1622, \"topic_id\": 362, \"forum_id\": 10, \"post_subject\": \"Re: Parsing a pattern that spans multiple lines\", \"username\": \"rtaylor\", \"post_text\": \"John,\\n\\nWhat David said \\n\\nRichard\", \"post_time\": \"2012-05-20 14:52:29\" },\n\t{ \"post_id\": 1621, \"topic_id\": 362, \"forum_id\": 10, \"post_subject\": \"Re: Parsing a pattern that spans multiple lines\", \"username\": \"dabayliss\", \"post_text\": \"Provided the second and subsequent lines are 'unique' (ie they only exist as 'subsequent' lines then:\\n\\na) Give every output row a column 'rollup_number' usually assigned to 0\\nPARSE as normal - but \\nb) For any first line that has a subsequent - give in the number 1\\nc) For any subsequent give it the number 2\\nd) After the parsing have a ROLLUP,LOCAL with a condition of LEFT.rollup_number = 1 AND RIGHT.rollup_number = 2\\n\\nHTH\\n\\nDavid\", \"post_time\": \"2012-05-20 01:23:59\" },\n\t{ \"post_id\": 1620, \"topic_id\": 362, \"forum_id\": 10, \"post_subject\": \"Re: Parsing a pattern that spans multiple lines\", \"username\": \"jandleman\", \"post_text\": \"Hi Richard,\\n\\nIn this example, the message from failed authentication spans 3 lines, \\n\\n2012-05-14 08:11:05 U=ABCDEFGHIJKL &X=<Notification> Broker::authUser() - call failed (2173)\\n\\t(2173) "broker error code"\\n\\tPersonal Paid Plan Expired\\n\\nThere are a few other messages types that span multiple lines; some encompass 2 lines, some 4, etc. Most messages fit on a single line, but in this case, the developers decided that they would issue multiple print lines rather than building the message once and then printing it.\\n\\nThanks,\\nJohn\", \"post_time\": \"2012-05-19 20:32:21\" },\n\t{ \"post_id\": 1619, \"topic_id\": 362, \"forum_id\": 10, \"post_subject\": \"Re: Parsing a pattern that spans multiple lines\", \"username\": \"rtaylor\", \"post_text\": \"As a general rule, ECL's PARSE pattern matching is based on the matching pattern being contained within a single record (single row of data). It sounds like you have data wherein multiple physical records comprise a single logical record (sometimes). How best to handle that would have to depend on the exact format of the data. I could imagine in some scenarios that it would necessitate a pre-PARSE-processing step to make it all work out correctly. I could also imagine circumstances that could be handled during the PARSE. It would help if could you post an example of the content you're talking about.\\n\\nRichard\", \"post_time\": \"2012-05-19 16:07:19\" },\n\t{ \"post_id\": 1618, \"topic_id\": 362, \"forum_id\": 10, \"post_subject\": \"Parsing a pattern that spans multiple lines\", \"username\": \"jandleman\", \"post_text\": \"I am writing an ECL procedure to extract content from computer-generated log files. I am creating multiple complex patterns and this is working very well. Most message are on a single line, but there are a few formats that span several lines. Is there a way to define a pattern that spans multiple lines to extract this content? I can't find any examples of this in the documentation or the forums. It seems to me that this would be a common big data problem.\", \"post_time\": \"2012-05-19 03:00:34\" },\n\t{ \"post_id\": 1651, \"topic_id\": 366, \"forum_id\": 10, \"post_subject\": \"Re: Sentiment analysis\", \"username\": \"arjuna chala\", \"post_text\": \"Hi Saurabh,\\n\\nWe have a contributed demo at - http://hpccsystems.com/community/contributions/data-descriptors-and-simple-example-programs/see-save-skip-twitter-sentiment. This is a sentiment analysis example using Twitter data and the ECL-ML library. More details of the ML library can be found here - http://hpccsystems.com/ml.\\n\\nPlease feel free to post if you have any further questions or suggestions.\\n\\nThanks\\n\\nArjuna\", \"post_time\": \"2012-05-24 13:49:22\" },\n\t{ \"post_id\": 1644, \"topic_id\": 366, \"forum_id\": 10, \"post_subject\": \"Sentiment analysis\", \"username\": \"saurabh.bhalerao\", \"post_text\": \"I am completely new to HPCC. I have to perform sentiment analysis using HPCC. How exactly can I do it. Is there any readymade code or library available. Is the algorithms for performing sentiment analysis available as any kind of API or library fuctions which would make it easy.\\n\\nIf there is any code already for sentiment analysis Kindly share.\\n\\nThanks\", \"post_time\": \"2012-05-24 05:12:44\" },\n\t{ \"post_id\": 1663, \"topic_id\": 370, \"forum_id\": 10, \"post_subject\": \"Re: Parsing Repeating Patterns & Extracting Content\", \"username\": \"rtaylor\", \"post_text\": \"John,\\n\\nI just noticed you said you're trying to put the result into an "array" (which, in ECL would be a "set") so that you can use that to produce a file. That is unnecessary, since the PARSE always produces a recordset as its result. You can either write that recordset to a file, or use the TABLE function to extract just the result fields to write to your file, like this:
OUTPUT(p,,'MyTestParseFile1');\\nOUTPUT(TABLE(p,{name}),,'MyTestParseFile2');
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-05-29 14:03:32\" },\n\t{ \"post_id\": 1662, \"topic_id\": 370, \"forum_id\": 10, \"post_subject\": \"Re: Parsing Repeating Patterns & Extracting Content\", \"username\": \"rtaylor\", \"post_text\": \"John,\\n\\nHere's the way I would parse that text:ds := dataset([\\n {'LC: Snapshot (44/6); iexplore.exe 38/5; chrome.exe 5/1; Explorer.EXE 1/0; '},\\n {'LC: Snapshot (45/6); firefox.exe 38/5; MyExplorer.EXE 1/0; '},\\n {'LC: Snapshot (46/6); word.exe 38/5; HIS_chrome.exe 5/1; YOUR_Explorer.EXE 1/0; '}],\\n {string line});\\n\\t\\t\\t\\t\\t\\t\\t\\nPATTERN ws := ' ';\\nPATTERN delim := ';';\\nPATTERN alpha := PATTERN('[_A-Za-z]')+;\\nPATTERN num := PATTERN('[0-9]')+;\\n\\nPATTERN inst := num '/' num;\\nPATTERN snap := 'LC: Snapshot (' inst ')' delim;\\nPATTERN name := alpha '.' alpha;\\nPATTERN prog := name ws inst delim;\\n\\nPATTERN info := OPT(snap ws) prog;\\n\\noutrec := RECORD\\n string SnapInst := matchtext(snap/inst);\\n string name := matchtext(prog/name);\\n string inst := matchtext(prog/inst);\\nEND;\\n\\np := PARSE(ds,line,info,outrec);\\n\\np;
When I parse I always go for the simplest solution first, then I make it more complex if I need to. \\n\\nSometimes it's best to take a simple solution that gets you 80%+ of the way you want to go, then post-process the result to get the rest of the way. For example, assuming you want the snapshot instance to be in every record (instead of just the first one as the above code does), you can do a simple ITERATE and you're there, like this:outrec XF(outrec L, outrec R) := TRANSFORM\\n SELF.SnapInst := IF(R.SnapInst = '', L.SnapInst,R.SnapInst);\\n SELF := R;\\nEND;\\n\\ni := ITERATE(p,XF(LEFT,RIGHT));\\n\\ni;
HTH,\\n\\nRichard\", \"post_time\": \"2012-05-29 13:55:48\" },\n\t{ \"post_id\": 1661, \"topic_id\": 370, \"forum_id\": 10, \"post_subject\": \"Re: Parsing Repeating Patterns & Extracting Content\", \"username\": \"bforeman\", \"post_text\": \"Hi John,\\n\\nWhat does your PARSE statement look like? \\nAlso, can you post your complete PATTERN definitions? \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-05-29 12:41:51\" },\n\t{ \"post_id\": 1660, \"topic_id\": 370, \"forum_id\": 10, \"post_subject\": \"Parsing Repeating Patterns & Extracting Content\", \"username\": \"jandleman\", \"post_text\": \"Help!\\n\\nI am parsing this message:\\n\\n\\nLC: Snapshot (44/6); iexplore.exe 38/5; chrome.exe 5/1; Explorer.EXE 1/0; \\n
\\n\\nUsing this pattern definition:\\n\\n\\npattern SnapShotFrontFmt := 'LC: Snapshot' ;\\npattern SnapShotFront3Fmt := ');' ; \\npattern SnapShotFront2fmt := AnyFmt BEFORE SnapShotFront3Fmt ;\\npattern SnapShotPgmNameFmt := NonSpacesFmt ;\\npattern SnapShotMem1Fmt := IntegerFmt ;\\npattern SnapShotSlashFmt := '/' ;\\npattern SnapShotMem2Fmt := IntegerFmt ;\\npattern SnapShotTailFmt := ';' ;\\npattern SnapShotPgmStatFmt := SpacesFmt SnapShotPgmNameFmt \\n SpacesFmt SnapShotMem1Fmt\\t \\n SnapShotSlashFmt SnapShotMem2Fmt ; \\npattern SnapShotFmt := SnapShotFrontFmt SnapShotFront2fmt \\n SnapShotFront3Fmt REPEAT( SnapShotPgmStatFmt )\\t \\n SpacesFmt ;\\n
\\n\\nThere can be multiple program occurrences captured in a snapshot, thus I am using the REPEAT option on the SnapShotPgmStatFmt pattern. This pattern seems to be parsed okay, but I am trying to capture all occurrences of SnapShotPgmNameFmt into an array for writing to an output file. This statement puts the first occurrence of SnapShotPgmNameFmt into the array, but I can't seem to get access to the other occurences:\\n\\n\\nset of string EventValues := map((matchtext(SnapShotFmt) <> '') =>\\n [ matchtext(SnapShotPgmNameFmt)] ,\\n
\\n\\nIf I specify SnapShotPgmNameFmt[1], I get the same result. If I specify SnapShotPgmNameFmt[2], I get a null string (''). How can I extract all occurrences of this repeating pattern and put them into an array for output?\\n\\nThanks,\\nJohn\", \"post_time\": \"2012-05-29 08:53:18\" },\n\t{ \"post_id\": 3029, \"topic_id\": 383, \"forum_id\": 10, \"post_subject\": \"Re: Batch file rename in ECL\", \"username\": \"Allan\", \"post_text\": \"Gavin has given me a solution.\\n\\nOne has to get the construction of the dataset to be used by APPLY computed at a GLOBAL level (static in c/c++ parlance) for the code to compile.\\nThe code below works:\\n\\nSHARED CopyLF(INTEGER generation) := FUNCTION\\n myFiles := GLOBAL(FilesToCopy(generation), FEW);\\n RETURN NOTHOR(IF(EXISTS(myFiles),\\n APPLY(myFiles,fileservices.Copy('~'+name,'thor5_64_development',DestFile(generation+1,name)),\\n fileservices.AddSuperFile(SF(generation+1),DestFile(generation+1,name))\\n )\\n )\\n );\\nEND;\\n
\", \"post_time\": \"2012-12-21 09:24:02\" },\n\t{ \"post_id\": 3025, \"topic_id\": 383, \"forum_id\": 10, \"post_subject\": \"Re: Batch file rename in ECL\", \"username\": \"Allan\", \"post_text\": \"APPLY works for simple operations but their is a chain of correspondence, initiated by Oleg, where he has found limitations in what APPLY can be used with.\\n\\nI am hitting the same wall - I'm attempting:\\n\\nFOREACH logical file in a dataset of type STD.File.FsLogicalFileNameRecord DO\\n STD.File.Copy to a new logical file\\n Add new file to a Superfile\\nREP\\n
\\nIf I just use APPLY on the dataset I get error:\\n INTERNAL: Dataset is not active
\\nIf I wrap the APPLY within a NOTHOR I get error:\\n INTERNAL: Expected a parent/container context. Likely to be caused by executing something invalid inside a NOTHOR
\\n\\nIt seems to be the 'AddToSuperfile' component that's failing inside the APPLY.\\nWhen commented out, leaving just the STD.File.Copy The operation works.\\n\\nObviously, from the thread above, a PROJECT does not work, so any ideas?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-12-19 16:22:32\" },\n\t{ \"post_id\": 1754, \"topic_id\": 383, \"forum_id\": 10, \"post_subject\": \"Re: Batch file rename in ECL\", \"username\": \"jeremy\", \"post_text\": \"Thanks, worked like a charm.\\nA few notes for anyone who runs into the same issue:\\n\\nIMPORT std;\\nFileList := Std.File.LogicalFileList( '<some pattern>' );\\nRenamedFile := RECORD\\n String oldName;\\n String newName;\\nEND;\\nRenamedFile rename( Std.File.FsLogicalFileInfoRecord L ) := TRANSFORM\\n SELF.oldName := '~' + L.name;\\n SELF.newName := '~' + <some new file name>;\\nEND;\\nRenamedFileList := PROJECT( FileList, rename( LEFT ));\\nAPPLY( RenamedFileList, STD.File.RenameLogicalFile( oldName, newName ));\\n
\\nThe code can be condensed, but I left things broken out to show the steps I took... of note is that LogicalFileList returns the scope without a '~', but RenameLogicalFile will append the default scope name if you don't add a leading '~'. Anyway, Richard is right, APPLY is your friend here.\", \"post_time\": \"2012-06-08 19:14:40\" },\n\t{ \"post_id\": 1735, \"topic_id\": 383, \"forum_id\": 10, \"post_subject\": \"Re: Batch file rename in ECL\", \"username\": \"rtaylor\", \"post_text\": \"Jeremy,\\n\\nTake a look at APPLY -- it might get you where you want to be.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-06-08 13:48:17\" },\n\t{ \"post_id\": 1730, \"topic_id\": 383, \"forum_id\": 10, \"post_subject\": \"Batch file rename in ECL\", \"username\": \"jeremy\", \"post_text\": \"I'm using STD.File.LogicalFileList to return a list of files, which I can then PROJECT/TRANSFORM to create a new name for each file. I would then like to call STD.File.RenameLogicalFile for each new file name, but I can't seem to find a looping construct in ECL that allows this non-TRANSFORM-like functionality... they all seem to want a TRANSFORM or other set operator, rather then a single statement like dbglog, OUTPUT, or RenameLogicalFile.\\nany thoughts?\", \"post_time\": \"2012-06-08 03:10:13\" },\n\t{ \"post_id\": 1820, \"topic_id\": 396, \"forum_id\": 10, \"post_subject\": \"Re: Strange behaviours with EXPORT\", \"username\": \"rtaylor\", \"post_text\": \"JM,\\n\\nIf I take your analogy with c++, I think any member x of a class can access to its private member y.
Sorry, but they're only similar, not the same as. You must use SHARED on DeckOfCards for it to be visible to PinochleDeck.\\n\\nRichard\", \"post_time\": \"2012-06-21 14:16:11\" },\n\t{ \"post_id\": 1806, \"topic_id\": 396, \"forum_id\": 10, \"post_subject\": \"Re: Strange behaviours with EXPORT\", \"username\": \"ideal\", \"post_text\": \"Hello Richard,\\n\\nFirst, let me thank you for your long explanation (tutorial) about EXPORT use. I accept it even if I don't still understand "why", it must definitly working as you (and documentation) say : \\n\\n
a local definition (neither EXPORT nor SHARED) ends it's visibility at the end of the very next EXPORT or SHARED definition.
\\n\\nI thought that instead of :\\n\\nEXPORT CardMod := MODULE\\n Universe := 42;\\n SHARED DeckOfCards := Universe + 10;\\n EXPORT Suits := DeckOfCards / 4;\\n EXPORT PinochleDeck := DeckOfCards - 4;\\nEND;
\\n\\nwe could have : \\n\\nEXPORT CardMod := MODULE\\n Universe := 42;\\n DeckOfCards := Universe + 10;\\n EXPORT Suits := DeckOfCards / 4;\\n EXPORT PinochleDeck := DeckOfCards - 4;\\nEND;
\\n\\nwhere DeckOfCards would be visible from PinochleDeck as it is in the same module.\\n\\nIf I take your analogy with c++, I think any member x of a class can access to its private member y.\\n\\nthanks,\\nJM.\", \"post_time\": \"2012-06-20 07:52:36\" },\n\t{ \"post_id\": 1796, \"topic_id\": 396, \"forum_id\": 10, \"post_subject\": \"Re: Strange behaviours with EXPORT\", \"username\": \"rtaylor\", \"post_text\": \"JM,\\n\\nI see you re-wrote your original post and added another as I was forming my previous reply, so let me re-state my response in what may be more familiar terms.\\n\\nThe whole point of local vs. SHARED vs. EXPORT definitions is the ability to scope-limit code so that you may encapsulate definitions to preclude any naming collisions (a definition may only be defined once within any visibility scope). \\n\\n"Local" and SHARED definitions are the "supporting cast" to the stars of the show -- the EXPORTs. To put them in OOP terms, you can think of local as "private", SHARED as "protected", and EXPORT as "public".\\n\\nThere are two separate levels at which these operate -- the "Repository" level (your directory/file structure on disk), and within the MODULE structure in ECL code. \\n\\nThe semantics are the same at both levels:a Directory is equivalent to the MODULE structure
a code File in the Directory can contain exactly one EXPORT or SHARED definition
a SHARED definition is only visible within the Directory or Module it is contained in
a local definition precedes the EXPORT or SHARED definition that it supports, whether that EXPORT or SHARED is in a file or within the MODULE structure
\\nEXPORT CardMod := MODULE\\n Universe := 42;\\n\\tSHARED DeckOfCards := Universe + 10;\\n\\tEXPORT Suits := DeckOfCards / 4;\\n\\tEXPORT PinochleDeck := DeckOfCards - 4;\\nEND;
In this example, the CardMod MODULE is the one EXPORT in the file, CardMod.ECL. Universe is a local definition (no SHARED or EXPORT) because it is only needed by DeckOfCards. But DeckOfCards is SHARED because it is needed by both Suits and PinochleDeck. The EXPORTs are the only guys visible outside the MODULE.\\n\\nTo reference the EXPORTs from this MODULE, you would open another code file (or builder window) and your code would look like this:IMPORT MyDirectory;\\nOUTPUT(MyDirectory.CardMod.Suits);\\nOUTPUT(MyDirectory.CardMod.PinochleDeck);
The IMPORT is required to make the EXPORT definitions from MyDirectory available for use. Then you fully-qualify the names of the EXPORT definitions that you want to reference using standard object.property-type of syntax, drilling all the way down to get to the final definition you need to reference.\\n\\nIf this code were separated into files in your directory, there would be no CardMod file. Instead there would be three files: DeckOfCards.ECL (containing both Universe and DeckOfCards definitions), Suits.ECL (containing just the Suits definition), and PinochleDeck.ECL (containing just the PinochleDeck definition). In that case, the code to reference would look like this:IMPORT MyDirectory;\\nOUTPUT(MyDirectory.Suits);\\nOUTPUT(MyDirectory.PinochleDeck);
HTH,\\n\\nRichard\", \"post_time\": \"2012-06-18 18:31:58\" },\n\t{ \"post_id\": 1795, \"topic_id\": 396, \"forum_id\": 10, \"post_subject\": \"Re: Strange behaviours with EXPORT\", \"username\": \"rtaylor\", \"post_text\": \"ideal,\\n\\nIn the following code, the exported function fonc4, prevents compiler to "see" function fonc3. When it is moved before fonc3, then it compiles. I wonder why.
The code is operating exactly as it should. It all has to do with scoping and visibility of definitions, as described in this section of the Language Reference: http://hpccsystems.com/community/docs/e ... visibility\\n\\nThe reason it does not work is the presence of the EXPORT on "fonc4" -- a local definition (neither EXPORT nor SHARED) ends it's visibility at the end of the very next EXPORT or SHARED definition.\\n\\nInside a MODULE structure, the only definitions that need to be EXPORT are those that you will need to reference from outside the MODULE structure. All the rest of the "supporting code" in the MODULE should be local or SHARED.\\n\\nThere is also another behaviour I dont undertand : when I put OUTPUT at the end of this code, an error message is triggered.\\nError: Definition contains actions after the EXPORT has been defined (41, 1), 2325,
Again, exactly as it should be. A code file should either contain a single EXPORT (or SHARED) definition (in your case, that's the MODULE structure itself) and no actions, OR it should contain "Builder Window Runnable" code (actions in the code and no EXPORT definition).\\n\\nSo, to make this code run the way it looks you want it to, you need to remove the EXPORTs from both the MODULE structure AND your "fonc4" definition.\\n\\nAlso, the IMPORT is not needed at all -- the code is all in the same file. IMPORT is designed to make code in other files (in the same or other directories) available for use in the current file (see http://hpccsystems.com/community/docs/e ... tml/import ). You simply need to fully qualify your call the to the EXPORT definition with the MODULE structure's name.\\n\\nHere's your "corrected" code:test_export_import := MODULE\\n\\n\\tEXPORT rec := RECORD\\n\\t\\t STRING a;\\n\\t\\t STRING b;\\n\\tEND;\\n\\trec fonc3(STRING a,STRING b) := TRANSFORM\\n\\t\\t SELF.a := a;\\n\\t\\t SELF.b := b;\\n\\tEND;\\n\\trec fonc4(STRING a,STRING b) := TRANSFORM\\n\\t\\t SELF.a := a;\\n\\t\\t SELF.b := b;\\n\\tEND;\\n\\n\\tPATTERN titi := PATTERN('[a-z]+');\\n\\tPATTERN tutu := PATTERN('[a-z]+');\\n\\tRULE(rec) toto := titi ' ' tutu fonc3($1,$3);\\n\\n\\trec fonc2(rec x) := TRANSFORM(rec,SELF:=x);\\n\\n\\tEXPORT fonc (DATASET({STRING line}) source) := FUNCTION\\n\\t\\t RETURN PARSE(source, line, toto, fonc2($1), FIRST,WHOLE,PARSE);\\n\\tEND;\\n\\nEND; // Module\\n\\nsource := DATASET([{'titi tutu'}],{STRING line});\\ndr := test_export_import.fonc(source);\\nOUTPUT(source);
\\nBTW, these are precisely the kind of issues that we discuss in great detail in class: http://hpccsystems.com/community/traini ... s/training\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-06-18 15:05:19\" },\n\t{ \"post_id\": 1794, \"topic_id\": 396, \"forum_id\": 10, \"post_subject\": \"Re: Strange behaviours with EXPORT\", \"username\": \"ideal\", \"post_text\": \"Ok, I got it but I still don't understand ..\\n\\nIn the documentation (EXPORT or SHARED), we find :\\n"Without either the SHARED or EXPORT keywords, an Attribute's scope is limited to the next SHARED or EXPORTed Attribute"\\n\\nAnd it is true, you cannot reference a attribute thru an export or shared keyword.\\nIn pseudo code, it means :\\n\\nA\\nEXPORT B\\nC(A)\\n\\n=> you get an error : Error: Unknown identifier "A"\\n\\nAnd if you put SHARED like this :\\n\\nSHARED A\\nEXPORT B\\nC(A)\\n\\nCompilation is OK.\\n\\nI really don't understand the reason why this choice has been made as it is counter intuitive (and loose time). Personnally, I would have simply allowed attributes to be visible in the entire module without SHARED keyword.\\n\\nMaybe someone has an answer ? \\n\\nThanks\\nJM.\", \"post_time\": \"2012-06-18 15:02:33\" },\n\t{ \"post_id\": 1791, \"topic_id\": 396, \"forum_id\": 10, \"post_subject\": \"Strange behaviours with EXPORT\", \"username\": \"ideal\", \"post_text\": \"Hello,\\n\\nI am new to HPCC but I have found some strange behaviours.\\n\\nIn the following code, the exported function fonc4, prevents compiler to "see" function fonc3. When it is moved before fonc3, then it compiles. I wonder why.\\n\\nThere is also another behaviour I dont undertand : when I put OUTPUT at the end of this code, an error message is triggered.\\nError: Definition contains actions after the EXPORT has been defined (41, 1), 2325, \\nI don't know why either.\\n\\nI rewrote this post because the code section creates bugs at compilation (there must be some special characters inside).\\ndont forget to name your file "test_export_import" or change the module name.\\n\\nThis code generates first an error.\\nThen if you move fonc 4 before fonc 3, it should be ok now.\\n\\nAnother observation : when you remove all about parsing rules, then you can move fonc4 after fonc3. So there is a link between this behaviour and the presence of grammar rules. I really don't understand as there is no reason for this, to me.\\n\\nEXPORT test_export_import := MODULE\\n\\nEXPORT rec := RECORD\\nSTRING a;\\nSTRING b;\\nEND;\\n\\n\\n\\t\\t\\nrec fonc3(STRING a,STRING b) := TRANSFORM\\nSELF.a := a;\\nSELF.b := b;\\nEND;\\n\\n// Move before fonc3 to avoid error message at compilation.\\nEXPORT rec fonc4(STRING a,STRING b) := TRANSFORM\\n SELF.a := a;\\n SELF.b := b;\\nEND;\\n\\nPATTERN titi := PATTERN('[a-z]+');\\nPATTERN tutu := PATTERN('[a-z]+');\\nRULE(rec) toto := titi ' ' tutu fonc3($1,$3);\\n\\nrec fonc2(rec x) := TRANSFORM(rec,SELF:=x);\\n\\nEXPORT fonc (DATASET({STRING line}) source) := FUNCTION\\nRETURN PARSE(source, line, toto, fonc2($1), FIRST,WHOLE,PARSE);\\nEND;\\n\\nEND; // Module\", \"post_time\": \"2012-06-17 15:31:16\" },\n\t{ \"post_id\": 1807, \"topic_id\": 398, \"forum_id\": 10, \"post_subject\": \"Re: Segmentation fault in IF\", \"username\": \"ideal\", \"post_text\": \"Hello Bob,\\n\\nSorry, I dont have time to analyse further, anyway, it is coring in my configuration. I found a workaround. \\nI don't see version 3.6 in my current installation but I am glad to know it exist. I must have done it from administration panel after installing Vm image. I'll try to find it.\\n\\nThanks,\\nJM.\", \"post_time\": \"2012-06-20 07:57:16\" },\n\t{ \"post_id\": 1804, \"topic_id\": 398, \"forum_id\": 10, \"post_subject\": \"Re: Segmentation fault in IF\", \"username\": \"bforeman\", \"post_text\": \"but I dont find any 3.6.0 compiler version in ECLIDE distribution.\\n\\n
\\n\\nIn the latest install, you sould see this folder:\\n\\nC:\\\\Program Files\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_6\\\\eclcc.exe\\n\\nAlso, I'm curious, what exactly are you trying to do here? The use of the term "variable" makes me cringe , but what does the record structure look like in the DATASET? \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-06-19 20:06:39\" },\n\t{ \"post_id\": 1798, \"topic_id\": 398, \"forum_id\": 10, \"post_subject\": \"Segmentation fault in IF\", \"username\": \"ideal\", \"post_text\": \"Hello,\\n\\nI am trying this : \\n\\n\\t
listeVide := DATASET([],Variable);\\n\\tBoolean test := EXISTS(listeVariables(statut=StatutVariable.STOP));\\n\\tlisteVariables2 := IF(test,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tlisteVide,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tlisteVariables);
\\n\\nI get segfault : \\n\\nError: SIG: Segmentation fault(11), accessing (nil), IP=8
\\n\\nThe log eclagent.log is :\\n\\n00000000 2012-06-19 16:16:33 13324 13324 Logging to /var/log/HPCCSystems/myeclagent/eclagent.2012_06_19.log\\n00000001 2012-06-19 16:16:33 13324 13324 ECLAGENT build community_3.6.2-3\\n00000002 2012-06-19 16:16:33 13324 13324 Waiting for workunit lock\\n00000003 2012-06-19 16:16:33 13324 13324 Obtained workunit lock\\n00000004 2012-06-19 16:16:33 13324 13324 Loading dll (libW20120619-161613.so) from location /var/lib/HPCCSystems/myeclccserver/libW20120619-161613.so\\n00000005 2012-06-19 16:16:33 13324 13324 Starting process\\n00000006 2012-06-19 16:16:33 13324 13324 RoxieMemMgr: Setting memory limit to 314572800 bytes (300 pages)\\n00000007 2012-06-19 16:16:33 13324 13324 RoxieMemMgr: 320 Pages successfully allocated for the pool - memsize=335544320 base=0xa1b00000 alignment=1048576 bitmapSize=10\\n00000008 2012-06-19 16:16:33 13324 13324 Waiting for run lock\\n00000009 2012-06-19 16:16:33 13324 13324 Obtained run lock\\n0000000A 2012-06-19 16:16:33 13324 13324 Executing hthor graph graph1\\n0000000B 2012-06-19 16:16:33 13324 13324 Executing subgraph 1\\n0000000C 2012-06-19 16:16:33 13324 13324 DISKWRITE: using temporary filename /var/lib/HPCCSystems/myeclagent/temp/W20120619-161613.~spill__scope__5__W20120619-161613\\n0000000D 2012-06-19 16:16:33 13324 13324 setResultRaw((null),0,(157 bytes))\\n0000000E 2012-06-19 16:16:33 13324 13324 Completed subgraph 1\\n0000000F 2012-06-19 16:16:33 13324 13324 Executing subgraph 12\\n00000010 2012-06-19 16:16:33 13324 13324 DISKWRITE: using temporary filename /var/lib/HPCCSystems/myeclagent/temp/W20120619-161613.~spill__scope__6__W20120619-161613\\n00000011 2012-06-19 16:16:33 13324 13324 setResultRaw((null),1,(157 bytes))\\n00000012 2012-06-19 16:16:33 13324 13324 Completed subgraph 12\\n00000013 2012-06-19 16:16:33 13324 13324 Executing subgraph 31\\n00000014 2012-06-19 16:16:33 13324 13324 Executing subgraph 21\\n00000015 2012-06-19 16:16:33 13324 13324 Reading file /var/lib/HPCCSystems/myeclagent/temp/W20120619-161613.~spill__scope__5__W20120619-161613\\n00000016 2012-06-19 16:16:33 13324 13324 setResultRaw(a1,-3,(157 bytes))\\n00000017 2012-06-19 16:16:33 13324 13324 Completed subgraph 21\\n00000018 2012-06-19 16:16:33 13324 13324 Executing subgraph 27\\n00000019 2012-06-19 16:16:33 13324 13324 Executing subgraph 24\\n0000001A 2012-06-19 16:16:33 13324 13324 Reading file /var/lib/HPCCSystems/myeclagent/temp/W20120619-161613.~spill__scope__6__W20120619-161613\\n0000001B 2012-06-19 16:16:33 13324 13324 setResultRaw(a2,-3,(157 bytes))\\n0000001C 2012-06-19 16:16:33 13324 13324 Completed subgraph 24\\n0000001D 2012-06-19 16:16:33 13324 13324 DISKWRITE: using temporary filename /var/lib/HPCCSystems/myeclagent/temp/W20120619-161613.~spill__scope__auto4__W20120619-161613\\n0000001E 2012-06-19 16:16:33 13324 13324 Completed subgraph 27\\n0000001F 2012-06-19 16:16:33 13324 13324 ================================================\\n00000020 2012-06-19 16:16:33 13324 13324 Signal: 11 Segmentation fault\\n00000021 2012-06-19 16:16:33 13324 13324 Fault IP: 00846891\\n00000022 2012-06-19 16:16:33 13324 13324 Accessing: 00000000\\n00000023 2012-06-19 16:16:33 13324 13324 Registers:\\n00000024 2012-06-19 16:16:33 13324 13324 EAX:00000000 EBX:00883FF4 ECX:0084CC34 EDX:0084C47B ESI:09FFE728 EDI:00000001\\n00000025 2012-06-19 16:16:33 13324 13324 CS:EIP:0073:00846891\\n00000026 2012-06-19 16:16:33 13324 13324 SS:ESP:007B:BFF12500 EBP:BFF12548\\n00000027 2012-06-19 16:16:33 13324 13324 Stack[BFF12500]: 09FFE728 0A024030 00000001 00842632 09FFF870 0117CECD 00760228 0084C47B\\n00000028 2012-06-19 16:16:33 13324 13324 Stack[BFF12520]: 0084CC34 0084C47B 0084CC34 00000001 00000005 BFF12570 008E26B6 00883FF4\\n00000029 2012-06-19 16:16:33 13324 13324 Stack[BFF12540]: 00000001 09FFE630 BFF12588 00846B04 09FFE728 0A024030 00000001 00747000\\n0000002A 2012-06-19 16:16:33 13324 13324 Stack[BFF12560]: 00760228 0084C47B 0084CC34 00000001 BFF125B8 008E83C0 0084C47B 00883FF4\\n0000002B 2012-06-19 16:16:33 13324 13324 Stack[BFF12580]: 09FFE630 0A024030 BFF125B8 00846B7C 09FFE630 0A024030 09FFE630 0A02ADB8\\n0000002C 2012-06-19 16:16:33 13324 13324 Stack[BFF125A0]: BFF125F8 008E83C0 00000000 00883FF4 09FFE630 0A02ADB8 BFF125F8 00846C13\\n0000002D 2012-06-19 16:16:33 13324 13324 Stack[BFF125C0]: 09FFE630 0A024030 BFF125F8 00B4E9F0 00BB2FF4 09FFE5A8 BFF125F8 09FFE6CC\\n0000002E 2012-06-19 16:16:33 13324 13324 Stack[BFF125E0]: 0A02B1E8 0000000C 00AC0F88 00BB2FF4 09FFE640 00846B80 BFF126A8 00B87438\\n0000002F 2012-06-19 16:16:33 13324 13324 Frame:\\n00000030 2012-06-19 16:16:33 13324 13324 Backtrace:\\n00000031 2012-06-19 16:16:33 13324 13324 /opt/HPCCSystems/lib/libjlib.so(_Z16PrintStackReportv+0x3b) [0xff3edb]\\n00000032 2012-06-19 16:16:33 13324 13324 /opt/HPCCSystems/lib/libjlib.so(_Z13excsighandleriP7siginfoPv+0xbf8) [0xff4dd8]\\n00000033 2012-06-19 16:16:33 13324 13324 [0x2d7410]\\n00000034 2012-06-19 16:16:33 13324 13324 /opt/HPCCSystems/lib/libhthor.so(_ZN11EclSubGraph9doExecuteEPKhb+0x131) [0x846891]\\n00000035 2012-06-19 16:16:33 13324 13324 /opt/HPCCSystems/lib/libhthor.so(_ZN11EclSubGraph16executeSubgraphsEPKh+0x94) [0x846b04]\\n00000036 2012-06-19 16:16:33 13324 13324 /opt/HPCCSystems/lib/libhthor.so(_ZN11EclSubGraph14doExecuteChildEPKh+0x5c) [0x846b7c]\\n00000037 2012-06-19 16:16:33 13324 13324 /opt/HPCCSystems/lib/libhthor.so(_ZN11EclSubGraph8evaluateEjPKh+0x83) [0x846c13]\\n00000038 2012-06-19 16:16:33 13324 13324 /var/lib/HPCCSystems/myeclccserver/libW20120619-161613.so(+0x4c438) [0xb87438]\\n00000039 2012-06-19 16:16:33 13324 13324 /opt/HPCCSystems/lib/libhthor.so(_ZN15EclGraphElement7prepareER13IAgentContextPKhb+0x35d) [0x8463ed]\\n0000003A 2012-06-19 16:16:33 13324 13324 /opt/HPCCSystems/lib/libhthor.so(_ZN15EclGraphElement7prepareER13IAgentContextPKhb+0x195) [0x846225]\\n0000003B 2012-06-19 16:16:33 13324 13324 /opt/HPCCSystems/lib/libhthor.so(_ZN11EclSubGraph7prepareEPKhb+0x9b) [0x84673b]\\n0000003C 2012-06-19 16:16:33 13324 13324 /opt/HPCCSystems/lib/libhthor.so(_ZN11EclSubGraph7executeEPKh+0x53) [0x845bf3]\\n0000003D 2012-06-19 16:16:33 13324 13324 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclGraph7executeEPKh+0x9f) [0x846edf]\\n0000003E 2012-06-19 16:16:33 13324 13324 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent12executeGraphEPKcbjPKv+0x164) [0x848724]\\n0000003F 2012-06-19 16:16:33 13324 13324 /var/lib/HPCCSystems/myeclccserver/libW20120619-161613.so(+0x4f27e) [0xb8a27e]\\n00000040 2012-06-19 16:16:33 13324 13324 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11performItemEjj+0x82) [0xaa5202]\\n00000041 2012-06-19 16:16:33 13324 13324 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine13doExecuteItemER20IRuntimeWorkflowItemj+0x3b) [0xaa622b]\\n00000042 2012-06-19 16:16:33 13324 13324 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11executeItemEjj+0x2ed) [0xaa5c8d]\\n00000043 2012-06-19 16:16:33 13324 13324 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine7performEP18IGlobalCodeContextP11IEclProcess+0x131) [0xaa6751]\\n00000044 2012-06-19 16:16:33 13324 13324 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent10runProcessEP11IEclProcess+0x1ff) [0x833fdf]\\n00000045 2012-06-19 16:16:33 13324 13324 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent9doProcessEv+0x28d) [0x83441d]\\n00000046 2012-06-19 16:16:33 13324 13324 /opt/HPCCSystems/lib/libhthor.so(_Z13eclagent_mainiPPKcP12StringBufferb+0xaad) [0x8363ed]\\n00000047 2012-06-19 16:16:33 13324 13324 eclagent(main+0x87) [0x8049067]\\n00000048 2012-06-19 16:16:33 13324 13324 /lib/tls/i686/cmov/libc.so.6(__libc_start_main+0xe6) [0x1123bd6]\\n00000049 2012-06-19 16:16:33 13324 13324 eclagent() [0x8048f11]\\n0000004A 2012-06-19 16:16:33 13324 13324 ThreadList:\\n0000004B 2012-06-19 16:16:33 13324 13324 SIG: Segmentation fault(11), accessing (nil), IP=846891\\n
\\n\\nThere is also this message after segfault : \\n\\nWarning: Mismatch in minor version number (3.0.0 v 3.6.0) (0, 0), 3118, unknown
\\n\\n.. but I dont find any 3.6.0 compiler version in ECLIDE distribution.\\n\\nI know IF returns a single value. Despite, it should not core.\\n\\nJM\", \"post_time\": \"2012-06-19 14:24:51\" },\n\t{ \"post_id\": 1813, \"topic_id\": 399, \"forum_id\": 10, \"post_subject\": \"Re: Recursive NLP parsing - howto ?\", \"username\": \"HPCC Staff\", \"post_text\": \"Hello - a similar question was answered in this forum:\\nviewtopic.php?t=395&p=1789#p1789\", \"post_time\": \"2012-06-20 11:57:43\" },\n\t{ \"post_id\": 1799, \"topic_id\": 399, \"forum_id\": 10, \"post_subject\": \"Recursive NLP parsing - howto ?\", \"username\": \"ideal\", \"post_text\": \"Hello,\\n\\nI am trying to use a forward reference in a PARSE grammar. \\nIn the sample in the documentation, it is said necessary to define things as below :\\n\\nRULE a := USE('symbol');\\nRULE b := 'pattern';\\nRULE s := DEFINE(b,'symbol');
\\n\\nMy code is : \\n\\n....\\nRULE(Btree) predicat := foncteur '(' USE(Btree,'args') ')' conspred($1,$3);\\nRULE(Btree) arg := atome consarg($1) | variable consarg($1) | predicat transarg($1);\\nRULE(Btree) args := arg OPT(suiteargs) addarg($1,$2);\\nRULE(Modele) fonction := foncteur '(' args ')' consmodele($1,$3);\\n....
\\n\\nI dont know where to put DEFINE action (and everything else maybe) to refer to recursive symbol args in rules above. \\nCould you please help me ?\\n\\nThanks.\\nJM.\", \"post_time\": \"2012-06-19 14:36:18\" },\n\t{ \"post_id\": 1846, \"topic_id\": 405, \"forum_id\": 10, \"post_subject\": \"Re: project/transform transitions issue\", \"username\": \"rtaylor\", \"post_text\": \"JM,\\n\\nAnd there is an article in the Programmer's Guide on doing Cartesian Products.\\n\\nRichard\", \"post_time\": \"2012-06-27 13:57:37\" },\n\t{ \"post_id\": 1844, \"topic_id\": 405, \"forum_id\": 10, \"post_subject\": \"Re: project/transform transitions issue\", \"username\": \"ideal\", \"post_text\": \"Richard,\\n\\nNow I see my description was not clear enough. My intention is to make a cartesian product with the two datasets. With (12,34,56) and (78,90), the result would be : (1278,1290,3478,3490,5678,5690). Thanks to the programming tutorial, I found a solution with normalize, very similar to the one you suggested :\\n\\nrec1 := RECORD\\n INTEGER x;\\nEND;\\n\\nrec := RECORD\\n DATASET(rec1) dv;\\nEND;\\n\\nlisteA := DATASET([{[{1},{2}]},{[{5},{6}]}],rec);\\nlisteB := DATASET([{[{3},{4}]},{[{7},{8}]}],rec);\\n\\nrec myXF(listeA L,UNSIGNED c) := TRANSFORM\\n SELF.DV := IF(c=c,L.dv + listeB[c].dv);\\nEND;\\n \\nmyds := NORMALIZE(listeA,COUNT(listeA),myXF(LEFT,COUNTER));\\n\\noutput(myds);
\\n\\nThanks,\\nJM.\", \"post_time\": \"2012-06-26 21:53:34\" },\n\t{ \"post_id\": 1830, \"topic_id\": 405, \"forum_id\": 10, \"post_subject\": \"Re: project/transform transitions issue\", \"username\": \"rtaylor\", \"post_text\": \"JM,\\n\\nIf your intention is to append the nested child dataset records from one dataset to the like-numbered record in another, then this code will accomplish that a bit more simply:rec1 := RECORD\\n INTEGER x;\\nEND;\\n\\nrec := RECORD\\n DATASET(rec1) dv;\\nEND;\\n\\nlisteA := DATASET([{[{1},{2}]},{[{5},{6}]}],rec);\\nlisteB := DATASET([{[{3},{4}]},{[{7},{8}]}],rec);\\n\\nrec myXF(listeA L, integer C) := TRANSFORM\\n SELF.DV := L.dv + listeB[C].dv;\\nEND;\\n\\t\\nmyds := PROJECT(listeA,myXF(LEFT,COUNTER));\\n\\noutput(myds);
HTH,\\n\\nRichard\", \"post_time\": \"2012-06-25 18:53:56\" },\n\t{ \"post_id\": 1827, \"topic_id\": 405, \"forum_id\": 10, \"post_subject\": \"project/transform transitions issue\", \"username\": \"ideal\", \"post_text\": \"I'm stuck since two days on a problem without being able to find the beginning of a solution. I only localized the problem as described in the sample code below. \\n\\nWith this code, I expect, at the end of project/transform suite, a dataset similar to (1,2,3,4) and I get (3,4,3,4).\\n\\nI get exactly <Row><dv><Row><x>3</x></Row><Row><x>4</x></Row><Row><x>3</x></Row><Row><x>4</x></Row></dv></Row> in ds.dr field.\\n\\nCould you help me to correct my mistake ? \\n\\nrec1 := RECORD\\n\\tINTEGER x;\\nEND;\\n\\nrec := RECORD\\n\\tDATASET(rec1) dv;\\nEND;\\n\\nlisteA := DATASET([{DATASET([{1},{2}],rec1)}],rec);\\nlisteB := DATASET([{DATASET([{3},{4}],rec1)}],rec);\\n\\nrec g(rec elmtB,rec elmtLocalA) := TRANSFORM\\n\\tSELF.dv := elmtLocalA.dv+elmtB.dv;\\nEND;\\n\\n{DATASET(rec) dr} f(rec elmtA,DATASET(rec) listeLocalB) := TRANSFORM\\n\\tSELF.dr := PROJECT(listeLocalB,g(LEFT,elmtA));\\nEND;\\n\\nds := PROJECT(listeA,f(LEFT,listeB));\\n\\nOUTPUT(ds);
\\n\\nThanks,\\nJM.\", \"post_time\": \"2012-06-22 22:48:04\" },\n\t{ \"post_id\": 1948, \"topic_id\": 429, \"forum_id\": 10, \"post_subject\": \"Re: xUnit style testing in ECL\", \"username\": \"bforeman\", \"post_text\": \"Hi Greg,\\n\\nI would start by creating a Builder Window Runnable file in my repository, let's call it BWR_TestSTD.\\n\\nIn that file, all of the calls to all of the tests would be in there. For example:\\n\\nIMPORT TestSTD;\\n\\nTESTSTD.str.TestCleanSpaces.TestConstant.Test01;\\nTESTSTD.str.TestCleanSpaces.TestConstant.Test02;\\nTESTSTD.str.TestCleanSpaces.TestConstant.Test03;\\nTESTSTD.str.TestCleanSpaces.TestConstant.Test04;\\nTESTSTD.str.TestCleanSpaces.TestConstant.Test05;\\nTESTSTD.str.TestCleanSpaces.TestConstant.Test06;\\nTESTSTD.str.TestCleanSpaces.TestConstant.Test07;\\nTESTSTD.str.TestCleanSpaces.TestConstant.Test08;\\nTESTSTD.str.TestCleanSpaces.TestConstant.Test09;\\nTESTSTD.str.TestCleanSpaces.TestConstant.Test10;\\nTESTSTD.str.TestCleanSpaces.TestConstant.Test11;\\nTESTSTD.str.TestCleanSpaces.TestConstant.Test12;
\\n\\nI can run this in the ECL IDE, or on the command line via ECL Plus, or from the new ECL command line. (see the latest HPCCClientTools PDF for more information). I would run this everytime I upgraded to a new version.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-07-12 12:17:37\" },\n\t{ \"post_id\": 1939, \"topic_id\": 429, \"forum_id\": 10, \"post_subject\": \"xUnit style testing in ECL\", \"username\": \"gkrasnow\", \"post_text\": \"I have seen in the ECL Standard Library that there is a test folder with tests for many of the functions using the assert keyword. I was wondering if there is sample code for a test harness to be able to run through a test folder like this and report test failures. Thanks.\\n\\n- Greg\", \"post_time\": \"2012-07-11 16:46:27\" },\n\t{ \"post_id\": 1956, \"topic_id\": 431, \"forum_id\": 10, \"post_subject\": \"Re: Debugging a MODULE\", \"username\": \"rtaylor\", \"post_text\": \"Presuming the definition you want to OUTPUT is EXPORT, you should just open another window and do the OUTPUT there, something like this:\\nIMPORT MyFolder;\\nOUTPUT(MyFolder.MyModule.MyDef)
HTH,\\n\\nRichard\", \"post_time\": \"2012-07-12 18:25:42\" },\n\t{ \"post_id\": 1953, \"topic_id\": 431, \"forum_id\": 10, \"post_subject\": \"Debugging a MODULE\", \"username\": \"gkrasnow\", \"post_text\": \"I have been trying to move some code into modules so that they could be tested independently from the rest of the code. However, when I then want to try to debug the code I find that I cannot just place OUTPUT() statements in the module. I get an error about side effects not associated with an action. What would be the proper way to debug a module? Thanks.\", \"post_time\": \"2012-07-12 17:29:26\" },\n\t{ \"post_id\": 2825, \"topic_id\": 433, \"forum_id\": 10, \"post_subject\": \"Re: C++ ECL Plugins\", \"username\": \"DSC\", \"post_text\": \"OK, I figured out how to get this working the way it should. Here are some notes for interested readers that want to write plugins:\\n\\n* It helps to include a couple of header files from the HPCC source code. Specifically platform.h and hqlplugins.hpp (both within system/include/ in the source tree). There are some good #defines in there that will make it easier to use sample code, like 'size32_t' and 'PLUGIN_VERSION'.\\n\\n* You have to supply a getECLPluginDefinition() function within your plugin. Whatever you define for 'moduleName' will be what you cite in the IMPORT statement within the client ECL code (where you're calling the service from).\\n\\n* Again within getECLPluginDefinition(), you have to define a value for 'ECL'. That value is a char* that contains ECL code. That code should be everything that the client ECL code needs (EXPORTed RECORD definitions, SERVICE declarations, etc.). The compiler on the server needs this.\\n\\n* I was unable to find a good, automated way of installing newly-built plugins. My solution involved two steps: 1) Change the ownership of /opt/HPCCSystems/plugins/ on all nodes from root to hpcc (to contain the privileges and allow me to install new things there easily); and 2) leverage the script at /opt/HPCCSystems/sbin/hpcc-push.sh to copy the plugin (a shared library) from my source directory to all nodes. I made the call to hpcc_push.sh conditional on successful compilation. Note that you have to 'sudo -u hpcc' to run that script.\\n\\n* While the server's compiler can use the ECL code just mentioned, the Windows-based IDE has no idea that the plugin exists. You have to create, basically, an interface declaration and then IMPORT that into your client code. The interface declaration should be a module that exactly matches the 'moduleName' value from getECLPluginDefinition(), and the module should contain (at minimum) the stuff as defined for ECL within getECLPluginDefinition(). To leverage the example plugin listed in the Language Reference Manual: The moduleName value is 'lib_examplelib' and so you create an ECL file named 'lib_examplelib.ecl' containing a MODULE named 'lib_examplelib' and within that you put the value of the EclDefinition global variable (from the program listing). Anyway, this interface declaration allows the IDE to syntax check correctly. It obviously won't run locally, on that Windows system, but I wasn't aiming for that.\\n\\nThis is probably all clear as mud. Hopefully it will help someone, though.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-11-20 16:46:21\" },\n\t{ \"post_id\": 2817, \"topic_id\": 433, \"forum_id\": 10, \"post_subject\": \"Re: C++ ECL Plugins\", \"username\": \"DSC\", \"post_text\": \"I just discovered that I can at least begin to execute the plugin successfully if I submit the code using eclplus on the HPCC server itself. It segfaults, but that's my problem. The eclcc.log shows that the ECL code successfully compiled and linked against my plugin (within the SERVICE definition, BTW).\\n\\nSo the problem reduces to: How do you tell the Windows IDE about plugins installed in the cluster?\\n\\nDan\", \"post_time\": \"2012-11-19 16:51:14\" },\n\t{ \"post_id\": 2815, \"topic_id\": 433, \"forum_id\": 10, \"post_subject\": \"Re: C++ ECL Plugins\", \"username\": \"rtaylor\", \"post_text\": \"Dan,Would it perhaps be better to try to deploy this as a service rather than a plugin? What is the difference between the two, anyway? The documentation says only that it's possible to do both, not why you would choose one over the other.
And now you're beyond my area of expertise. Someone else will have to educate us both. \\n\\nRichard\", \"post_time\": \"2012-11-19 16:37:01\" },\n\t{ \"post_id\": 2814, \"topic_id\": 433, \"forum_id\": 10, \"post_subject\": \"Re: C++ ECL Plugins\", \"username\": \"DSC\", \"post_text\": \"[quote="rtaylor":1poqw4lj]Do you have a SERVICE structure containing the ECL prototypes for the functions you want to access?\\n\\nWell, that's part of the confusion. If I implement a SERVICE definition within my code I can get the syntax checker to accept it, but then the compiler on the server starts complaining (undefined symbol on the function entry point). Wrapping the SERVICE in a MODULE named the same what is defined in the C++ code doesn't help, either.\\n\\nAddendum: If I omit the 'library' parameter to the SERVICE declaration, I get the above behavior. If I include it, the server's linker complains that the library cannot be found (which makes some sense, as I don't believe the plugin directory is in the library path).\\n\\nBased on the source code of the plugins that ship with HPCC, it looks like the module name, service definition, exported record definitions, etc. are all defined in the C++ code, specifically in ECLPluginDefinitionBlock.ECL. None of the other plugins redefine that stuff elsewhere, as far as I can see. This may be due to the IDE having .dll instances of the libraries though; I'm just not sure.\\n\\nWould it perhaps be better to try to deploy this as a service rather than a plugin? What is the difference between the two, anyway? The documentation says only that it's possible to do both, not why you would choose one over the other.\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-11-19 16:33:16\" },\n\t{ \"post_id\": 2812, \"topic_id\": 433, \"forum_id\": 10, \"post_subject\": \"Re: C++ ECL Plugins\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nDo you have a SERVICE structure containing the ECL prototypes for the functions you want to access?\\n\\nRichard\", \"post_time\": \"2012-11-19 16:13:46\" },\n\t{ \"post_id\": 2810, \"topic_id\": 433, \"forum_id\": 10, \"post_subject\": \"Re: C++ ECL Plugins\", \"username\": \"DSC\", \"post_text\": \"Plugins appear to be tricky.\\n\\nI'm trying to deploy a plugin. It compiles to a shared library (.so under RHEL5) and I've manually copied it to /opt/HPCCSystems/plugins/ on every node in my cluster. eclcc.log indicates that the plugin loads correctly. At this point I'd like to actually try calling the thing but I can't convince the IDE that the plugin exists.\\n\\nWithin getECLPluginDefinition() inside the plugin I've defined moduleName (and verified that it matches what is emitted within eclcc.log). When I try to IMPORT that module name in my ECL code, the IDE immediately complains that the symbol is unknown. I suspect that the server is not being consulted during this step.\\n\\nWhat steps am I missing to glue this deployed library to my ECL code?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-11-19 15:54:33\" },\n\t{ \"post_id\": 1994, \"topic_id\": 433, \"forum_id\": 10, \"post_subject\": \"Re: C++ ECL Plugins\", \"username\": \"sbagaria\", \"post_text\": \"[quote="gkrasnow":20lhgbmz]\\nI tried using the eclcc command locally and I am seeing the following error:\\n00000001 2012-07-17 15:35:42 21360 21360 Plugin /opt/HPCCSystems/plugins/pluginexample.so exports getECLPluginDefinition but fails consistency check - not loading\\n\\n\\nThis usually means that the magicVersion field in the ECLPluginDefinitionBlock was undefined or incorrectly defined. As of today, the compatible plugin version is 2. The most currrent ECL Language Reference would illustrate this to be 1 in an example, which is wrong and one should always consult the hqlplugins.hpp file for the correct version. A hack would be to just define the version as the PLUGIN_VERSION macro from the same hpp file to always be consistent.\", \"post_time\": \"2012-07-18 01:14:46\" },\n\t{ \"post_id\": 1991, \"topic_id\": 433, \"forum_id\": 10, \"post_subject\": \"Re: C++ ECL Plugins\", \"username\": \"gkrasnow\", \"post_text\": \"Is there a way to have the system place the full compile line into the log file? I am seeing an error that seems to indicate that it cannot find my .so file, so I would like to see what -L arguments are on the command line. Thanks.\\n\\nI tried using the eclcc command locally and I am seeing the following error:\\n00000001 2012-07-17 15:35:42 21360 21360 Plugin /opt/HPCCSystems/plugins/pluginexample.so exports getECLPluginDefinition but fails consistency check - not loading\\n\\nNot sure what gets checked for the "consistency check".\\n\\n- Greg\", \"post_time\": \"2012-07-17 16:54:31\" },\n\t{ \"post_id\": 1982, \"topic_id\": 433, \"forum_id\": 10, \"post_subject\": \"Re: C++ ECL Plugins\", \"username\": \"jsmith\", \"post_text\": \"The ECL language reference mentions has the following extract under ECL-Plugin deployment:\\n\\n
Deployment\\nExternal .SOs must be deployed to the /opt/HPCCSystems/plugins directory on each node of the target environment.\\nIf external data files are required, they should be either manually deployed to each node, or referenced from a network\\nnode (the latter requires hard-coding the address in the code for the .SO). Note that manually deployed files are not\\nbacked up with the standard SDS backup utilities.
\\n\\n\\nSo, basically you need to ensure your plugin exists in that directory on all thor nodes.\", \"post_time\": \"2012-07-16 12:28:34\" },\n\t{ \"post_id\": 1976, \"topic_id\": 433, \"forum_id\": 10, \"post_subject\": \"Re: C++ ECL Plugins\", \"username\": \"rtaylor\", \"post_text\": \"Greg,I do not see a document in the community documentation set labeled "operational manual".
Let me re-phrase my answer then: How to deploy a plugin should be documented in one of the operational manuals (sorry, I don't know which one). \\n\\n
I understand that each node will run the C++ function separately. I just wanted to understand how the type conversion works. I think that there may be some use-cases where it would make sense (though obviously not all use-cases).
As Dan pointed out, the information is in the BEGINC++ structure docs.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-07-13 21:51:16\" },\n\t{ \"post_id\": 1975, \"topic_id\": 433, \"forum_id\": 10, \"post_subject\": \"Re: C++ ECL Plugins\", \"username\": \"gkrasnow\", \"post_text\": \"[quote="rtaylor":3c8cta03]Greg,\\n\\nHow to deploy a plugin should be documented in the operational manuals.\\n\\nSomeone else will have to give the definitive answer about working with a recordset in a C++ function, but I expect that it would be problematic, since our datasets are distributed across the nodes and your function would be running separately on each node, in parallel, when it's called by your ECL code.\\n\\nHTH,\\n\\nRichard\\nI do not see a document in the community documentation set labeled "operational manual".\\n\\nI understand that each node will run the C++ function separately. I just wanted to understand how the type conversion works. I think that there may be some use-cases where it would make sense (though obviously not all use-cases).\\n\\nThanks.\\n\\n- Greg\", \"post_time\": \"2012-07-13 20:46:12\" },\n\t{ \"post_id\": 1974, \"topic_id\": 433, \"forum_id\": 10, \"post_subject\": \"Re: C++ ECL Plugins\", \"username\": \"nvasil\", \"post_text\": \"If you want an extensive example of integrating libraries through the BEGINC++ interface take a look at this example http://www.ismion.com/documentation/ecl-pb/index.html\\n\\nIn short the recordset and the DATASET are treated the same way as pointed in the previous post.\", \"post_time\": \"2012-07-13 19:20:37\" },\n\t{ \"post_id\": 1972, \"topic_id\": 433, \"forum_id\": 10, \"post_subject\": \"Re: C++ ECL Plugins\", \"username\": \"DSC\", \"post_text\": \"Passing a dataset is documented in the Language Reference, within the "BEGINC++ Structure" section (inside the Special Structures topic). In short, a dataset is passed as a size/pointer pair. The documentation is a little bare-bones, but it is there.\\n\\nAs Richard pointed out, you do have to be aware that your C++ code will have only local data. That is actually useful for a large number of use cases, but it won't be for others.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-07-13 18:42:15\" },\n\t{ \"post_id\": 1971, \"topic_id\": 433, \"forum_id\": 10, \"post_subject\": \"Re: C++ ECL Plugins\", \"username\": \"rtaylor\", \"post_text\": \"Greg,\\n\\nHow to deploy a plugin should be documented in the operational manuals.\\n\\nSomeone else will have to give the definitive answer about working with a recordset in a C++ function, but I expect that it would be problematic, since our datasets are distributed across the nodes and your function would be running separately on each node, in parallel, when it's called by your ECL code.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-07-13 17:38:05\" },\n\t{ \"post_id\": 1965, \"topic_id\": 433, \"forum_id\": 10, \"post_subject\": \"C++ ECL Plugins\", \"username\": \"gkrasnow\", \"post_text\": \"There is some very limited documentation in the ECL Language Reference. One thing that is missing though is how to deploy a plugin. It mentions a utility called "ConfigEnv", but I don't see anything called "ConfigEnv" nor can I find any other reference to this utility. How does one deploy a plugin?\\n\\nAlso, on a slightly related topic... Is it possible to have a C++ function which operates on a recordset? I don't really see that mentioned in the documentation where the ECL <-> C++ type conversion is mentioned.\\n\\nThanks.\\n\\n- Greg\", \"post_time\": \"2012-07-13 00:06:25\" },\n\t{ \"post_id\": 2061, \"topic_id\": 437, \"forum_id\": 10, \"post_subject\": \"Re: Using drilldown in MACRO's from within TRANSFORM's\", \"username\": \"Allan\", \"post_text\": \"I could not get the\\n\\n#IF(itm in ['mmm','nnn'])\\n
\\nconstruct to work, however:\\n\\n #IF(REGEXFIND(%'@name'%,'RecordType,Eff_Date,Type_Of_UPDATE,POLICY_TYPE',NOCASE))\\n
\\n\\ndoes work fine.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-07-24 16:18:32\" },\n\t{ \"post_id\": 2032, \"topic_id\": 437, \"forum_id\": 10, \"post_subject\": \"Re: Using drilldown in MACRO's from within TRANSFORM's\", \"username\": \"Allan\", \"post_text\": \"Richard,\\n\\nWe have a dataset presented to us with about 4 different record types in it. (all fields of fixed length)\\nThe 1st 5 fields are common to all record types.\\n\\nI'm attempting to setup a 'Testharness' MODULE which holds test cases in a DATASET. And exported attributes that ensure every field of the test data is padded out to the correct width by a project TRANSFROM before being desprayed.\\n\\nThere quite a few fields and they can well change so its a perfect candidate for a MACRO.\\n\\nI've got it all working just fine apart from this issue with the MACRO.\\n\\nI'll send you the ECL module directly to you via e-mail tomorrow.\\n\\nThanks\\n\\nAllan\", \"post_time\": \"2012-07-19 18:11:21\" },\n\t{ \"post_id\": 2031, \"topic_id\": 437, \"forum_id\": 10, \"post_subject\": \"Re: Using drilldown in MACRO's from within TRANSFORM's\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nOK, now I need to ask what it is you're trying to accomplish? What operation will use this TRANSFORM you're building?\\n\\nRichard\", \"post_time\": \"2012-07-19 17:56:17\" },\n\t{ \"post_id\": 2019, \"topic_id\": 437, \"forum_id\": 10, \"post_subject\": \"Re: Using drilldown in MACRO's from within TRANSFORM's\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nA point to note:\\nas this macro is being called multiple times I've found I've had to declare the 'FieldDef' macro variable at the same scope as the 'loadxml' statement otherwise the compiler complains that the variable is already declared.\\n\\nIn addition I'm not out of the woods yet.\\nI actually have multiple record structures that must have this\\nself.<field> := P(L.<field>);
\\ngenerated for all their fields, unfortunatly there is a small set of fields that are common to all the record types, so multiple uses of the macro in the same TRANSFORM will fail with 'value for self.field has already been specified'.\\nI thought it would be an easy matter to exclude this set of fields using a construct like:\\n\\nMAC_PrepTestRecord(pInputRecord,pGenerateCommonFields = FALSE) := MACRO\\n \\n #exportxml(rTheRecord, pInputRecord)\\n #for(rTheRecord)\\n #for(Field)\\n #SET(FieldDef,'\\tself.'+%'@name'%+' := P(L.'+ %'@name'%+')')\\n //#if(%'@name'% IN ['RecordType','POLICY_NUMBER','Eff_Date','Type_Of_UPDATE'])\\n #if(%'@name'% = 'RecordType')\\n #if(pGenerateCommonFields)\\n %FieldDef%;\\n #end\\n #else\\n %FieldDef%;\\n #end\\n #end\\n #end\\nendmacro;\\n
\\nThen call the macros in the TRANSFORM thus:\\n\\n MAC_PrepTestRecord (CS.POL01,TRUE);\\n MAC_PrepTestRecord (CS.SUB01);\\n MAC_PrepTestRecord (CS.VEH01);\\n
\\nHowever whatever I try I just get error:\\n\\nError: A value for "SELF.recordtype" has already been specified (19, 22), 2110, project_uk_policy.testharness\\n
\\n\\nThe '#if' seems to be being ignored.\\n\\nAny ideas?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-07-19 10:33:34\" },\n\t{ \"post_id\": 2017, \"topic_id\": 437, \"forum_id\": 10, \"post_subject\": \"Re: Using drilldown in MACRO's from within TRANSFORM's\", \"username\": \"Allan\", \"post_text\": \"Richard,\\n\\nThanks very much!\\n\\nI was scompletly stuck - would never have thought of that in a hundred years!\\n\\nAdding that to my collection of useful ECL examples.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-07-19 08:59:47\" },\n\t{ \"post_id\": 2016, \"topic_id\": 437, \"forum_id\": 10, \"post_subject\": \"Re: Using drilldown in MACRO's from within TRANSFORM's\", \"username\": \"bforeman\", \"post_text\": \"Good trick, thanks Richard Archived for prosterity.\", \"post_time\": \"2012-07-18 22:12:19\" },\n\t{ \"post_id\": 2015, \"topic_id\": 437, \"forum_id\": 10, \"post_subject\": \"Re: Using drilldown in MACRO's from within TRANSFORM's\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nThis code works for me:
import STD;\\n\\nr := record\\n string1 One;\\n string1 Two;\\nend;\\n\\nds := dataset([{'a','b'},{'c','d'}],r);\\n\\nP(string1 L) := STD.Str.ToUpperCase(L);\\n loadxml('<xml/>');\\n\\nMAC_PrepTestRecord(pInputRecord) :=\\nmacro\\n #exportxml(rTheRecord, pInputRecord)\\n #declare(FieldDef)\\n #for(rTheRecord)\\n #for(Field)\\n #SET(FieldDef,' self.'+%'@name'%+' := P(L.'+ %'@name'%+')')\\n %FieldDef%;\\n #end\\n #end\\nendmacro;\\n\\nR XF(ds L) := TRANSFORM\\n MAC_PrepTestRecord(r)\\nEND;\\n\\nPr := PROJECT(ds,XF(LEFT));\\n\\nPr;\\t
The "trick" is to get the LOADXML function outside of the MACRO.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-07-18 22:04:17\" },\n\t{ \"post_id\": 2009, \"topic_id\": 437, \"forum_id\": 10, \"post_subject\": \"Re: Using drilldown in MACRO's from within TRANSFORM's\", \"username\": \"Allan\", \"post_text\": \"Hi Bob,\\n\\nActually I just want to iterate over all fields in a record (non-nested very simple) within a TRANSFORM.\\n\\nI used the word 'drilldown' as in the examples on #EXPORTXML etc in the Ref manual it uses the word in the context of performing this type of operation.\\n\\nIt would be great to have an answer, as there are many fields (about 100 in all), and not stable at the moment, will change in the near and long term.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-07-18 17:35:32\" },\n\t{ \"post_id\": 2005, \"topic_id\": 437, \"forum_id\": 10, \"post_subject\": \"Re: Using drilldown in MACRO's from within TRANSFORM's\", \"username\": \"bforeman\", \"post_text\": \"Hi Allan,\\n\\nYou mention "drilldown" in your message subject. Are you trying to use the drilldown feature in the ECL IDE? I believe there is an issue there that I have reported.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-07-18 16:03:47\" },\n\t{ \"post_id\": 1989, \"topic_id\": 437, \"forum_id\": 10, \"post_subject\": \"Using drilldown in MACRO's from within TRANSFORM's\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nUsing a MACRO(<record defintion>) I'm attempting to generate:\\n\\nSELF.<field in record> := P(L.<field in record>);\\n\\nFor all fields in the supplied record structure.\\n\\nCopying examples form the Ref manual I'm attempting stuff like:\\n\\nMAC_PrepTestRecord(pInputRecord)\\t:=\\nmacro\\n loadxml('<xml/>');\\n #exportxml(rTheRecord, pInputRecord)\\n #declare(FieldDef)\\n #for(rTheRecord)\\n #for(Field)\\n #SET(FieldDef,'\\tself.'+%'@name'%+' := P(L.'+ %'@name'%+')')\\n %FieldDef%;\\n #end\\n #end\\nendmacro;\\n
\\nHowwever, whatever I do I get error:\\nError: syntax error near "'<xml/>'" : expected ANY, _ARRAY_, ASCII, ASSERT, BIG_ENDIAN, CONST, DATASET, EBCDIC, EMBEDDED, GROUPED, _LINKCOUNTED_, LITTLE_ENDIAN, OPT, OUT, PACKED, PATTERN, RECORD, ROW, RULE, SET, <typename>, STREAMED, TOKEN, TYPEOF, UNSIGNED, VIRTUAL, <?>, <??>, dataset, identifier, type name, type name, identifier, datarow, function-name, function-name, action, pattern, event, transform-name, '^', '$' (20, 11), 3002, project_uk_policy.testharness\\n
\\n\\nIf I just try:\\n\\nMAC_PrepTestRecord(pInputRecord)\\t:=\\nmacro\\n loadxml('<xml/>');\\nendmacro;\\n
\\nI get the same error.\\nIf I move the 2nd MACRO example outside the TRANSFORM it does not error, so the issue is the context in which I'm attmepting to use the MACRO.\\n\\nPlease help - as the error message is not very illuminating.\\n\\nThanks\\n\\nAllan\", \"post_time\": \"2012-07-17 08:44:11\" },\n\t{ \"post_id\": 2042, \"topic_id\": 441, \"forum_id\": 10, \"post_subject\": \"Re: External libraries in BEGINC++\", \"username\": \"sbagaria\", \"post_text\": \"This is now solved. So I went inside the source code and found that HPCC uses dlopen(3) to load the workunit dynamic library. I then consulted its man page which said that the library search path is determined by the -rpath arguments given at the time of linking, the LD_LIBRARY_PATH when the program was started, and the system default library folders.\\n\\nThe LD_LIBRARY_PATH variable may or not be set when the hpcc-init service was started, specially if the environment variables get reset when you use sudo. A workaround was to hardcode the value manually in /opt/HPCCSystems/sbin/hpcc_setenv but this is not a clean way.\\n\\nI ultimately added the -rpath argument to my linkOptions. And it worked.\", \"post_time\": \"2012-07-21 08:17:28\" },\n\t{ \"post_id\": 2041, \"topic_id\": 441, \"forum_id\": 10, \"post_subject\": \"Re: External libraries in BEGINC++\", \"username\": \"sbagaria\", \"post_text\": \"I had manually set my LD_LIBRARY_PATH to contain the folders where my libraries are and that is why the local executable was working. If I remove that environment variable, even the local executable can not load the external libraries.\\n\\nSo, the question is how do I configure the library search path while loading the compiled and linked workunit .so for thor.\\n\\nMore generally, where do I configure enviroment variables for thor and hthor?\\n\\nI think now it has become more of a configuration question so I will post in one of those forums.\\n\\nThanks.\", \"post_time\": \"2012-07-21 06:21:24\" },\n\t{ \"post_id\": 2036, \"topic_id\": 441, \"forum_id\": 10, \"post_subject\": \"Re: External libraries in BEGINC++\", \"username\": \"sbagaria\", \"post_text\": \"Sorry I was mistaken before. It does not run on hthor, just locally through eclcc.\", \"post_time\": \"2012-07-20 02:13:33\" },\n\t{ \"post_id\": 2033, \"topic_id\": 441, \"forum_id\": 10, \"post_subject\": \"Re: External libraries in BEGINC++\", \"username\": \"sbagaria\", \"post_text\": \"Yes, I can run them locally and on hthor but can not on my two physical node thor. I have made sure that all the shared objects are on both machines with the same names in the same locations.\", \"post_time\": \"2012-07-19 19:14:39\" },\n\t{ \"post_id\": 2018, \"topic_id\": 441, \"forum_id\": 10, \"post_subject\": \"Re: External libraries in BEGINC++\", \"username\": \"ghalliday\", \"post_text\": \"If it runs locally then it suggests the eclcc is correctly finding the libraries and linking to them. It appears the problem occurs when they are used in thor.\\n\\nAre the necessary shared objects deployed to all the thor nodes? They would need to be installed and available on each machine that needs to load the generated .so.\", \"post_time\": \"2012-07-19 09:03:28\" },\n\t{ \"post_id\": 2008, \"topic_id\": 441, \"forum_id\": 10, \"post_subject\": \"External libraries in BEGINC++\", \"username\": \"sbagaria\", \"post_text\": \"Hi,\\n\\nSo I am trying to use some external libraries in my BEGINC++ code segment. I can compile and run successfully locally and on hthor. But when I try to submit it to thor, the workunit gets compiled to a shared object (.so) file and links beautifully to all the linked libraries on my thor master, but during execution it complains of not being able to find the linked .so files for the external libraries.\\n\\nI am using the command line utility 'ecl' to submit my query to the cluster with my LD_LIBRARY_PATH environment variable making sure that all the library folders are in my search path. I also supply the -L parameter just so that eclcc does not find a reason to not be happy. I specify my libraries using #option('linkoption','<comma-separated arguments to the linker as in my g++ manual>'). This gives me a warning which I suppose is maybe because these options are only useful for eclcc.\\n\\neclcc: unknown(0,0): Warning C4534: #option ('linkOptions') will have no effect - it needs to be set in the submitted workunit.
\\n\\nMy libraries are in /usr/local/lib on all my nodes with the same names and I have also copied them to the /opt/HPCCSystem/plugins and /opt/HPCCSystems/lib directories with no success.\\n\\nI am now considering rewriting my entire BEGINC++ code as an external service. However, that is a major refactoring. But in any case, it is useful to know how to use external libraries within BEGINC++ as some of the boost libraries are very useful for some one-off use. I have gone through the External Services Implementation section and the BEGINC++ section in the ECL Language Reference (3.8.0.4rc) and I can not find any hints to my problem.\\n\\nAny help would be appreciated.\", \"post_time\": \"2012-07-18 17:20:46\" },\n\t{ \"post_id\": 2043, \"topic_id\": 442, \"forum_id\": 10, \"post_subject\": \"Re: Why debugger does not follow source code ?\", \"username\": \"ideal\", \"post_text\": \"Hello Gordon,\\n\\nAs stated by the title of this post, I think about a simple debugger that follows the code. What I mean by simple debugger is a set of features present in common debuggers like : \\n- display current code during execution\\n- display current data\\n- step inside\\n- step over\\n- goto\\n- break point\\n- watch point\\n.. and some I might forget (displays under the cursors). In one word, everything that can ease debugging and development process.\\n\\nThe layout used in ECL playground may be a good choice but ECLIDE is already a good platform.\\n\\nJM.\", \"post_time\": \"2012-07-22 15:40:55\" },\n\t{ \"post_id\": 2038, \"topic_id\": 442, \"forum_id\": 10, \"post_subject\": \"Re: Why debugger does not follow source code ?\", \"username\": \"gsmith\", \"post_text\": \"There were some additional tweaks to the layout settings to try and avoid the overlap issue you are seeing in the 3.8 IDE release, so if you haven't tried it yet, it _may_ help.\\n\\nI would be very interested in what your expectations for a debugger are, as there are plans to redo it as a 100% web hosted app. (If you look at ECL Playground and see the interaction between the graph and the source code you will see some of the needed building blocks in place).\\n\\nGordon.\", \"post_time\": \"2012-07-20 08:21:03\" },\n\t{ \"post_id\": 2035, \"topic_id\": 442, \"forum_id\": 10, \"post_subject\": \"Re: Why debugger does not follow source code ?\", \"username\": \"flavio\", \"post_text\": \"JM,\\n\\nI do agree with you that there may be areas to improve in the ECL code debugging/troubleshooting arena, and I'll forward this thread to some of our user tools developers, to find out which improvements they are currently working on.\\n\\nRegarding the graph, you're right on that it can get confusing and suffer from quirks if the workunit is too complex. I'll also forward this to the developers to see if they have any recommendations in this arena.\\n\\nThanks,\\n\\nFlavio\", \"post_time\": \"2012-07-19 22:44:55\" },\n\t{ \"post_id\": 2034, \"topic_id\": 442, \"forum_id\": 10, \"post_subject\": \"Re: Why debugger does not follow source code ?\", \"username\": \"ideal\", \"post_text\": \"I would like to add that debugger is also buggy. When code is quite complex, it happens that graph does not display entirely, making the process difficult to follow.\\nYou can see this in attached file.\\n\\nJM.\", \"post_time\": \"2012-07-19 22:38:30\" },\n\t{ \"post_id\": 2028, \"topic_id\": 442, \"forum_id\": 10, \"post_subject\": \"Re: Why debugger does not follow source code ?\", \"username\": \"ideal\", \"post_text\": \"Hello Flavio,\\n\\nI think debugging session could be improved.\\n\\nHPCC executes programs in parallel, not only in multiple threads of execution in each node, but also across a potentially large number of nodes
\\nTo my knowledge (from a previous post), debugger is only available in HTHOR, then parallel nodes should not be a concern here. \\n\\nyou can break apart your ECL workunit into smaller atoms (functions) and evaluate those independently, it's normally quite trivial to do unit testing to identify problems in the code
\\n\\nI agree it is sometimes the best option now but it is time consuming as test data have to be prepared first and some code added to proceed to test. \\nI see in property window of a debugging session, that (fortunately!) some pieces of code can be related to pieces of graphs. It is a nightmare to try to connect your code to error messages that are talking about graphs. Then, at least for those pieces, and in a HTHOR environment, it should be possible to create a more cumfortable debugging view of ECL process. It would not solve parallelism issues but at least it would allow not to have to prepare test data and additionnal code.\\n\\nPlease let me know if this answers your question
\\nNot exactly because it does not demontrate it is impossible to improve. It could be rather simple as ECLIDE offers already some features that I must use manually to trace my issues. It would be heaven if I could follow my code and data transformations in one click.\\n\\nThe only problem I see is that it cannot solve issues related to data and processes distribution, but as a non expert user, I am sure it would help me to understand at least basics mistakes.\\n\\nJM.\", \"post_time\": \"2012-07-19 14:59:28\" },\n\t{ \"post_id\": 2027, \"topic_id\": 442, \"forum_id\": 10, \"post_subject\": \"Re: Why debugger does not follow source code ?\", \"username\": \"flavio\", \"post_text\": \"JM,\\n\\nThere are certain aspects of ECL and HPCC that could make debugging code, in a conventional sense, more challenging:\\n\\n\\nECL is a declarative language with lazy evaluation. As such, the user normally tells the system "What needs to get done", rather than "How to do it in detail". The compiler/optimizer will define the best execution plan (graph) to achieve the user's goals, which may or may not resemble the original ECL program/workunit that the user submitted. It is not a coincidence that conventional debuggers are not that popular in functional programming languages, either.\\n\\nHPCC executes programs in parallel, not only in multiple threads of execution in each node, but also across a potentially large number of nodes. Having input to a debugger from thousands of threads of execution could be very confusing.\\n
\\n\\nHowever, since you can break apart your ECL workunit into smaller atoms (functions) and evaluate those independently, it's normally quite trivial to do unit testing to identify problems in the code. It is important to mention that ECL discourages side effects and mutable state, so once you have verified that a function provides the expected results, this would be true in any context.\\n\\nHaving said this, we do have an ECL debugger for Roxie, where the particular characteristics of the environment and query execution make it more desirable.\\n\\nPlease let me know if this answers your question.\\n\\nThanks,\\n\\nFlavio\", \"post_time\": \"2012-07-19 13:10:00\" },\n\t{ \"post_id\": 2020, \"topic_id\": 442, \"forum_id\": 10, \"post_subject\": \"Why debugger does not follow source code ?\", \"username\": \"ideal\", \"post_text\": \"Hello,\\n\\nI would like to understand why the debugger is so complicated ? Why is it not possible to have a simple debugger that follows source code lines as does a normal debugger ? \\n\\nJM.\", \"post_time\": \"2012-07-19 10:42:13\" },\n\t{ \"post_id\": 3822, \"topic_id\": 456, \"forum_id\": 10, \"post_subject\": \"Re: SOAPCALL function\", \"username\": \"sbagaria\", \"post_text\": \"This is the XML schema I get from a run\\n<WUResultResponse>\\n<Wuid>W20130323-100911</Wuid>\\n<Sequence>0</Sequence>\\n<LogicalName/>\\n<Cluster/>\\n<Name>Result 1</Name>\\n<Start>0</Start>\\n<Requested>1</Requested>\\n<Count>1</Count>\\n<Total>1</Total>\\n<Result>\\n<XmlSchema name="myschema">\\n<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" elementFormDefault="qualified" attributeFormDefault="unqualified">\\n<xs:element name="Dataset">\\n<xs:complexType>\\n<xs:sequence minOccurs="0" maxOccurs="unbounded">\\n<xs:element name="Row">\\n<xs:complexType>\\n<xs:sequence>\\n<xs:element name="p" minOccurs="0" maxOccurs="unbounded">\\n<xs:complexType>\\n<xs:sequence>\\n<xs:element name="" type="xs:string"/>\\n</xs:sequence>\\n</xs:complexType>\\n</xs:element>\\n</xs:sequence>\\n<xs:attribute name="uID" type="xs:integer" use="required"/>\\n<xs:attribute name="iID" type="xs:string" use="required"/>\\n</xs:complexType>\\n</xs:element>\\n</xs:sequence>\\n</xs:complexType>\\n</xs:element>\\n</xs:schema>\\n</XmlSchema>\\n<Dataset xmlSchema="myschema">\\n<Row uID="492729">\\n<p iID="55280">4.83653</p>\\n<p iID="84381">4.67588</p>\\n<p iID="82946">4.6493</p>\\n<p iID="36641">4.60334</p>\\n<p iID="90103">4.44718</p>\\n<p iID="7442">4.34309</p>\\n<p iID="44372">4.30049</p>\\n<p iID="93868">4.24493</p>\\n<p iID="83383">4.22387</p>\\n<p iID="97500">4.20033</p>\\n</Row>\\n</Dataset>\\n</Result>\\n</WUResultResponse>\\n
\\n\\nOn further trials, I managed to use the result but learned two things in the process. The result is not a dataset but a single record (hence the confusion which led me to believe that I am not being able to use the result). I can use the fields just fine as-is in subsequent ECL but I have to use a PROJECT or a TABLE function to transform the result so that it displays properly in the output.\\n\\nThanks a lot for your help. This was very useful.\", \"post_time\": \"2013-03-23 14:30:51\" },\n\t{ \"post_id\": 3819, \"topic_id\": 456, \"forum_id\": 10, \"post_subject\": \"Re: SOAPCALL function\", \"username\": \"anthony.fishbeck\", \"post_text\": \"I'm not sure what the symptoms are that you are experiencing, but it may simply be that the results are fine, but not displaying properly.\\n\\nHTTPCALL may not pass the XPATH meta data along that generates the schema the display needs to process the output. You may be able to use the recordset, and view the xml just fine.\\n\\nHow did you get to the xml view of the result?\\n\\nIf you go to:\\n\\nhttp://IP:8010/WsWorkunits/WUResult.xml ... Sequence=0\\n\\nafter replacing the IP and the Wuid parameter with your WUID, you can see what the end result schema is as well... and it may not match the real format.\\n\\nIf that is the case we can open an issue about fixing the metadata being dropped from HTTPCALL results.\", \"post_time\": \"2013-03-22 22:52:27\" },\n\t{ \"post_id\": 3818, \"topic_id\": 456, \"forum_id\": 10, \"post_subject\": \"Re: SOAPCALL function\", \"username\": \"sbagaria\", \"post_text\": \"OK. So on checking the XML output of the workunit, I could see that the result being returned was the XML representation of the XPATH specification in the record definition. Like so...\\n<Result>\\n<Dataset name='Result 1'>\\n <Row uID="492729"><p iID="55280">4.83653</p><p iID="84381">4.67588</p><p iID="82946">4.6493</p><p iID="36641">4.60334</p><p iID="90103">4.44718</p><p iID="7442">4.34309</p><p iID="44372">4.30049</p><p iID="93868">4.24493</p><p iID="83383">4.22387</p><p iID="97500">4.20033</p></Row>\\n</Dataset>\\n</Result>
\\n\\nHow do I get the XML representation into something that ECL understands? I will keep trying.\", \"post_time\": \"2013-03-22 21:23:09\" },\n\t{ \"post_id\": 3817, \"topic_id\": 456, \"forum_id\": 10, \"post_subject\": \"Re: SOAPCALL function\", \"username\": \"sbagaria\", \"post_text\": \"Sorry for reviving an old thread. But I want to use HTTPCALL to parse a response in the following format. \\n\\n\\n<?xml version="1.0"?>\\n<predictions uID="492729">\\n <p iID="55280">4.83653</p>\\n <p iID="84381">4.67588</p>\\n <p iID="82946">4.6493</p>\\n <p iID="36641">4.60334</p>\\n....\\n
\\nHowever, I can't seem to get the XPATH definitions right. The only thing I am able to retrieve is the UserID, and the predictions column is just blank (but a huge cell). I checked, it is not returning an embedded dataset. My current code looks like this:\\n\\nScoreStruct := RECORD\\n\\tSTRING ITEM_ID {XPATH('@iID')};\\n\\tSTRING SCORE {XPATH('')};\\nEND;\\nPredictStruct := RECORD\\n\\tINTEGER UserID {XPATH('@uID')};\\n\\tDATASET(ScoreStruct) Scores {XPATH('/p')};\\nEND;\\n\\nHTTPCALL('http://somelocation','GET','text/xml',PredictStruct);
\", \"post_time\": \"2013-03-22 20:32:27\" },\n\t{ \"post_id\": 2129, \"topic_id\": 456, \"forum_id\": 10, \"post_subject\": \"Re: SOAPCALL function\", \"username\": \"mrudul\", \"post_text\": \"Thanks Anthony.\\nThis did not work. The ECL program gets executed but there is no result. There is no documentation on HTTPCALL hence it becomes a little difficult. Anyways I will update if I am able to invoke a sample jsp that returns a valid XML on local tomcat.\", \"post_time\": \"2012-08-01 00:42:08\" },\n\t{ \"post_id\": 2128, \"topic_id\": 456, \"forum_id\": 10, \"post_subject\": \"Re: SOAPCALL function\", \"username\": \"anthony.fishbeck@lexisnexis.com\", \"post_text\": \"Turns out I was wrong in this case about needing the namespace prefix in the xpaths. You may find you need that when dealing with xml files, but not httpcall.\\n\\nSo the following should work:\\n\\n\\nworldBankSource := RECORD\\nstring name {xpath('name')}\\nEND;\\n\\nOutRec1 := RECORD\\nDATASET(worldBankSource) Fred{XPATH('/source')};\\nEND;\\n\\nraw := HTTPCALL('http://api.worldbank.org/sources', 'GET', 'text/xml', OutRec1);\\n\\nOUTPUT(raw);\\n
\\n\\nThe main thing to notice is the xpath for the dataset "/source". Because we are at the root, and there is no further tag wrapping the "source" tags we need to tell the dataset not to look for one.\", \"post_time\": \"2012-07-31 17:13:14\" },\n\t{ \"post_id\": 2110, \"topic_id\": 456, \"forum_id\": 10, \"post_subject\": \"Re: SOAPCALL function\", \"username\": \"anthony.fishbeck@lexisnexis.com\", \"post_text\": \"You're quite welcome, I hope your POC goes well.\\n\\nI haven't managed to get http://api.worldbank.org/sources to work yet either so will have to look at it further and maybe ask around.\\n\\nBut I can give you two clues:\\n\\n1. ECL is not namespace aware so you have to include the prefix in your xpath, exactly as it will appear in the result. Unfortunately I realize that it can break if the chosen prefix changes.\\n\\n2. You are defining the dataset root tag as the root of the response, but the response is already being used as the root of the record definition.\\n\\nSo what should work is something like the following, but as I mentioned I still have to work out exactly what is going on:\\n\\n\\nworldBankSource := RECORD\\nstring name {xpath('wb:name')}\\nEND;\\n\\nOutRec1 := RECORD\\nDATASET(worldBankSource) Fred{XPATH('/wb:source')};\\nEND;\\n\\nraw := HTTPCALL('http://api.worldbank.org/sources', 'GET', 'text/xml', OutRec1);\\n\\nOUTPUT(raw);\\n
\\n\\nFor now, if you have control over what the jsp xml returned looks like, you can simplify this by putting the repetitive "dataset" element in a child tag.\\n\\nRegards,\\nTony\", \"post_time\": \"2012-07-30 14:55:12\" },\n\t{ \"post_id\": 2104, \"topic_id\": 456, \"forum_id\": 10, \"post_subject\": \"Re: SOAPCALL function\", \"username\": \"mrudul\", \"post_text\": \"Thanks a lot Anthony.\\n\\nBoth the examples worked and are displaying result.\\n\\nHowever I tried another url (http://api.worldbank.org/sources) which returns valid XML, but it is not showing the result. The XML has namespace and I am wondering if that's causing the problem. I am also going to have sample jsp on tomcat that returns valid XML. I will post my results. Thanks for replying my posts, this forum rocks as I have been able to head start my HPCC POC.\\n\\nI am looking in to the problem but in the mean time if you can give me a quick clue. Below is the ECL program.\\n\\nworldBankSource := RECORD\\n string name {xpath('name')}\\n END;\\n\\nOutRec1 := RECORD\\n DATASET(worldBankSource) Fred{XPATH('sources/source')};\\nEND;\\n\\nraw := HTTPCALL('http://api.worldbank.org/sources', 'GET', 'text/xml', OutRec1);\\n\\nOUTPUT(raw);\\n\\nOnce again thanks for replying.\", \"post_time\": \"2012-07-28 13:16:23\" },\n\t{ \"post_id\": 2100, \"topic_id\": 456, \"forum_id\": 10, \"post_subject\": \"Re: SOAPCALL function\", \"username\": \"anthony.fishbeck@lexisnexis.com\", \"post_text\": \"Here's an example without the dataset embedded in the record:\\n\\n\\nOutRec1 := RECORD\\n string build {xpath('Build')};\\nEND;\\n\\nraw := HTTPCALL('http://localhost:8010/WsSMC/Activity?rawxml_', 'GET', 'text/xml', OutRec1);\\n\\nOUTPUT(raw);\\n
\", \"post_time\": \"2012-07-27 16:10:02\" },\n\t{ \"post_id\": 2099, \"topic_id\": 456, \"forum_id\": 10, \"post_subject\": \"Re: SOAPCALL function\", \"username\": \"anthony.fishbeck@lexisnexis.com\", \"post_text\": \"I think there may be two issues:\\n\\n1. TomCat HellowWorldExample returns HTML that is not XHTML complient.. which means it can't be parsed as xml.\\n\\n2. You should probably use the xpath form of xml record definition.\\n\\nTry this example, which calls into your local ESP.\\n\\n\\nThorCluster := RECORD\\n string name {xpath('ClusterName')}\\n END;\\n\\nOutRec1 := RECORD\\n DATASET(ThorCluster) Fred{XPATH('ThorClusters/ThorCluster')};\\nEND;\\n\\nraw := HTTPCALL('http://localhost:8010/WsSMC/Activity?rawxml_', 'GET', 'text/xml', OutRec1);\\n\\nOUTPUT(raw);\\n
\", \"post_time\": \"2012-07-27 16:03:18\" },\n\t{ \"post_id\": 2095, \"topic_id\": 456, \"forum_id\": 10, \"post_subject\": \"Re: SOAPCALL function\", \"username\": \"mrudul\", \"post_text\": \"I tried it on THOR too. There is no error but there is no result. The workunit display only ECL Watch and Graphs.\\n\\nAny clue why it is not displaying any result.\", \"post_time\": \"2012-07-27 00:35:23\" },\n\t{ \"post_id\": 2093, \"topic_id\": 456, \"forum_id\": 10, \"post_subject\": \"Re: SOAPCALL function\", \"username\": \"anthony.fishbeck@lexisnexis.com\", \"post_text\": \"Ah, perhaps that's why it hasn't been documented yet... seems like it might not be available on roxie.\\n\\nCan you try the same code using hthor?\\n\\nI'll open an issue on github about adding HTTPCALL to roxie.\", \"post_time\": \"2012-07-26 15:03:48\" },\n\t{ \"post_id\": 2083, \"topic_id\": 456, \"forum_id\": 10, \"post_subject\": \"Re: SOAPCALL function\", \"username\": \"mrudul\", \"post_text\": \"Thanks Anthony.\\n\\nI tried the HTTPCALL but its throwing an error. I used the function HTTPCALL with http url pointing to a web-application running on the TOMCAT (web server) on the same local machine. And secondly, I am running the HPCC VM Image to run this example.\\n\\nHere is the code:\\nOutRec1 := RECORD\\n STRING source := XMLTEXT('title');\\nEND;\\nraw := HTTPCALL('http://localhost:9090/examples/servlets/servlet/HelloWorldExample', 'GET', 'text/xml', OutRec1);\\nOUTPUT(raw);\\n\\n\\nAnd Error is:\\nError: Query W20120726-004332 is suspended because Unimplemented activity HTTP dataset required (0, 0), 1402, \\n\\nDO I have use a dataset in HTTPCALL. I am not able to understand the error. And whats the best way to analyze the errors. Is there any document that can help in understanding these errors.\\n\\nThank you once again for replying to the post.\", \"post_time\": \"2012-07-26 00:50:31\" },\n\t{ \"post_id\": 2080, \"topic_id\": 456, \"forum_id\": 10, \"post_subject\": \"Re: SOAPCALL function\", \"username\": \"anthony.fishbeck@lexisnexis.com\", \"post_text\": \"1. Yes, you should be able to call any SOAP service. There hasn't been a lot of interoperabilty testing done, so if you run into any issues let us know. \\n\\n
\\n\\nSOAPCALL(ds, 'http://webservices.megacorp.com/MegaSearch', 'MegaSearchRequest', inRecord, t(LEFT),outRecord, LITERAL, SOAPACTION('megacorp_action'),NAMESPACE('urn:megacorp:namespace'), ONFAIL(genDefault2(LEFT)));\\n\\n2. There is another ECL function "HTTPCALL" for calling a rest service. It doesn't seem to be in the documentation.\\n\\nHTTPCALL('http://webservices.megacorp.com/MegaSearch?name=john', 'GET', 'text/xml', outRecord);\", \"post_time\": \"2012-07-25 15:34:08\" },\n\t{ \"post_id\": 2073, \"topic_id\": 456, \"forum_id\": 10, \"post_subject\": \"SOAPCALL function\", \"username\": \"mrudul\", \"post_text\": \"I have some questions on SOAPCALL function, appreciate some help on them.\\n\\n1. Can this function be used to invoke external services (SOAP services on internet)\\n\\n2. Can this be used for invoking a REST service or an HTTP url that returns XML data over\\n\\nI am trying to do a proof of concept which involves below tasks\\n\\n1. Load some data (file based data) on THOR server\\n2. Spray the data\\n3. Invoke an external web service or an HTTP url to fetch some XML data\\n4. Parse the XML data\\n5. Analyze the both datasets (data that has been loaded and data that has been fetched from internet) to get some meaningful results\\n\\nThank you for help\", \"post_time\": \"2012-07-25 11:40:48\" },\n\t{ \"post_id\": 2134, \"topic_id\": 466, \"forum_id\": 10, \"post_subject\": \"Re: Propose an enhancement to STD.*.CleanSpaces\", \"username\": \"Allan\", \"post_text\": \"Hi Richard,\\n\\nOK - Will do when I get round to it.\\n\\nYours\", \"post_time\": \"2012-08-02 10:01:38\" },\n\t{ \"post_id\": 2121, \"topic_id\": 466, \"forum_id\": 10, \"post_subject\": \"Re: Propose an enhancement to STD.*.CleanSpaces\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nI assume that the source code for the Standard Library functions are all in GitHub. Therefore, all you need do is start with the CleanSpaces source and modify it to create your CleanCharacters function the way you would like it to be, then submit that as a contribution to the HPCC codebase for the benefit of all. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-07-31 14:23:34\" },\n\t{ \"post_id\": 2116, \"topic_id\": 466, \"forum_id\": 10, \"post_subject\": \"Propose an enhancement to STD.*.CleanSpaces\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nWe have in the Standard library Str.CleanSpaces along with its Unicode equivalent.\\n\\nIt would have been more useful to allow the character to be cleaned to be passed as a parameter, with a default of space.\\n\\n(Function would have been called 'CleanCharacters')\\n\\nAny hope of this being put in one day?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-07-31 09:17:45\" },\n\t{ \"post_id\": 2135, \"topic_id\": 467, \"forum_id\": 10, \"post_subject\": \"Re: Propose an enhancement to STD.*.Extract\", \"username\": \"Allan\", \"post_text\": \"Hi Richard,\\n\\nOK - Will do when I get round to it.\\n\\nYours\", \"post_time\": \"2012-08-02 10:02:20\" },\n\t{ \"post_id\": 2123, \"topic_id\": 467, \"forum_id\": 10, \"post_subject\": \"Re: Propose an enhancement to STD.*.Extract\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nSee my reply to your other proposed enhancement here: http://hpccsystems.com/bb/viewtopic.php?f=10&t=466&p=2121&hilit=cleanspaces&sid=766a2b48bc93cd7ec7f5e1b329990a14&sid=766a2b48bc93cd7ec7f5e1b329990a14#p2121\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-07-31 14:25:59\" },\n\t{ \"post_id\": 2117, \"topic_id\": 467, \"forum_id\": 10, \"post_subject\": \"Propose an enhancement to STD.*.Extract\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nWe have in the Standard library Str.Extract along with its Unicode equivalent.\\n\\nIt would have been more useful to allow the character separater to be passed as a parameter, with a default of comma.\\n\\nAny hope of this being put in one day?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-07-31 10:43:49\" },\n\t{ \"post_id\": 2248, \"topic_id\": 496, \"forum_id\": 10, \"post_subject\": \"Re: How to get field name dynamically\", \"username\": \"sameermsc\", \"post_text\": \"Thank you Bob and Richard for the information\\n\\n- Sameer\", \"post_time\": \"2012-08-22 11:35:07\" },\n\t{ \"post_id\": 2246, \"topic_id\": 496, \"forum_id\": 10, \"post_subject\": \"Re: How to get field name dynamically\", \"username\": \"rtaylor\", \"post_text\": \"Sameer,\\n\\nYou can also take a look at the #EXPORTXML template language function.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-08-21 18:46:59\" },\n\t{ \"post_id\": 2244, \"topic_id\": 496, \"forum_id\": 10, \"post_subject\": \"Re: How to get field name dynamically\", \"username\": \"bforeman\", \"post_text\": \"Hi Sameer,\\n\\nThe closest thing like that I could find is this:\\n\\nresult := STD.File.GetColumnMapping( file );\\nfile A null-terminated string containing the logical filename.\\n\\nReturn: GetColumnMapping returns a null-terminated string containing the comma-delimited list of field mappings for the file.\\n\\nThe GetColumnMapping function returns the field mappings for the file, in the same format specified for the SetColumnMapping function.\\n\\nExample:\\nMaps := STD.File.GetColumnMapping('Thor::in::SomeFile');\\n\\n...and I know that we support Record Indexing as you mentioned, but I've never seen multi-level indexing used.\\n\\nFor example, MyRecord[1].FirstField, but MyRecord[1].[1] I don't think is supported.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-08-21 12:25:34\" },\n\t{ \"post_id\": 2240, \"topic_id\": 496, \"forum_id\": 10, \"post_subject\": \"How to get field name dynamically\", \"username\": \"sameermsc\", \"post_text\": \"Instead of specifying the field name, is there a way to know the field name by supplying the column index on a rows data of a dataset\\n\\nfor example something like "Sample[1][3].getFieldName"\\n\\nThis is not part of any requirement, just thought of checking if there is any such function/option in ECL\\n\\nRegards,\\nSameer\", \"post_time\": \"2012-08-21 10:07:46\" },\n\t{ \"post_id\": 2257, \"topic_id\": 497, \"forum_id\": 10, \"post_subject\": \"Re: Execution time difference between [:alpha:]+ and [a-zA-Z\", \"username\": \"sameermsc\", \"post_text\": \"Thanks Bob
\", \"post_time\": \"2012-08-28 13:14:38\" },\n\t{ \"post_id\": 2256, \"topic_id\": 497, \"forum_id\": 10, \"post_subject\": \"Re: Execution time difference between [:alpha:]+ and [a-zA-Z\", \"username\": \"bforeman\", \"post_text\": \"Hi Sameer,\\n\\nI am certainly not a REGEX expert, but there are some things I can recommend to perhaps track down the delay.\\n\\n1. Have a look at the underlying C++ code, and see if you can spot any differences. To do this, open the Debug dialog and enter SaveCPPTempFiles=1\\n\\n2. see if you can spot any differences in the graph and timings.\\n\\nAlso, I found in my Google search this statement when comparing techniques:\\n\\n
Note that there are portability differences between [[:alpha:]] and [A-Za-z]. [A-Za-z] works in more versions of grep, but [[:alpha:]] takes account of wide character environments and internationalization (accented characters for example when they are included in the locale).
\\n\\nHope this helps!\\n\\nBob\", \"post_time\": \"2012-08-28 12:06:12\" },\n\t{ \"post_id\": 2254, \"topic_id\": 497, \"forum_id\": 10, \"post_subject\": \"Execution time difference between [:alpha:]+ and [a-zA-Z]+\", \"username\": \"sameermsc\", \"post_text\": \"Hi,\\n\\ni have a million records text dataset on which i am performing a regex search\\n\\n1) when i use [:alpha:]+ as part of a regex search pattern execution time on doxie is more than 10 min - and i get an exception "exceed 600 sec"\\n\\n2) when i use [a-zA-Z]+ as part of the regex search pattern doxie returns the results in less than a min\\n\\nwhy is there a a difference in execution time?\\n\\nRegards,\\nSameer\", \"post_time\": \"2012-08-27 13:02:04\" },\n\t{ \"post_id\": 2261, \"topic_id\": 499, \"forum_id\": 10, \"post_subject\": \"Re: optimizing the Roxie service - Options\", \"username\": \"rtaylor\", \"post_text\": \"Sameer,NOTE: inspite of using the above options the code returns output much faster on Doxie (~1 min) while on Roxie it either takes large time or sometimes never returns for certain search expressions
A couple of questions:\\n
\\nRichard\", \"post_time\": \"2012-08-28 18:58:08\" },\n\t{ \"post_id\": 2260, \"topic_id\": 499, \"forum_id\": 10, \"post_subject\": \"Re: optimizing the Roxie service - Options\", \"username\": \"sameermsc\", \"post_text\": \"Thanks Bob \", \"post_time\": \"2012-08-28 14:45:06\" },\n\t{ \"post_id\": 2259, \"topic_id\": 499, \"forum_id\": 10, \"post_subject\": \"Re: optimizing the Roxie service - Options\", \"username\": \"bforeman\", \"post_text\": \"Hi Sameer,\\n\\nAs far as text search examples, have you seen David Bayliss' Bible Search example?\\n\\nHave a look here:\\n\\nhttp://www.dabhand.org/Technical%20Documents.htm\\n\\nThe whole article is great, but what will interest you the most is how the ROXIE query was created and deployed (published), and is pretty fast in my testing.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-08-28 14:38:14\" },\n\t{ \"post_id\": 2258, \"topic_id\": 499, \"forum_id\": 10, \"post_subject\": \"optimizing the Roxie service - Options\", \"username\": \"sameermsc\", \"post_text\": \"Hi,\\n\\ni have a service (text search) that is to be deployed on Roxie, what are the options that can be used to optimize the service ie., run faster/parallel on roxie\\n\\ni know few options\\n- use distributed while creating indexes or datasets\\n- use allnodes and thisnode while performing transforms\\n\\nam i right on the above? are there any other options that can be used, any sample code would be appreciated\\n\\nNOTE: inspite of using the above options the code returns output much faster on Doxie (~1 min) while on Roxie it either takes large time or sometimes never returns for certain search expressions
\\n\\nRegards,\\nSameer\", \"post_time\": \"2012-08-28 13:42:19\" },\n\t{ \"post_id\": 2287, \"topic_id\": 505, \"forum_id\": 10, \"post_subject\": \"Re: Can HPCC handle other format files except with delimiter\", \"username\": \"dsun\", \"post_text\": \"Thanks for your explanation, it really help me a lot to understand how it works and how to use it.\", \"post_time\": \"2012-09-04 13:15:34\" },\n\t{ \"post_id\": 2286, \"topic_id\": 505, \"forum_id\": 10, \"post_subject\": \"Re: Can HPCC handle other format files except with delimiter\", \"username\": \"DSC\", \"post_text\": \"Your two questions are actually related. In reverse order:\\n
\\n2. Whether HPCC support other format? i.e. Some data files are based on length, 1-10 characters stand for the 1st column, 11-15 for the 2nd, etc.\\n
\\nThat style of data storage is known as fixed-width, as each field within a record is a fixed size. Summing the sizes of the individual fields gives you the overall record size, which is also fixed. HPCC supports fixed-width records easily. To give a couple of names to your example fields, here is one way to define your record:\\n\\n\\nSampleRec := RECORD\\n STRING10 firstField;\\n STRING5 secondField;\\nEND;\\n
\\n1. What does Spray Fixed really mean?
\\n"Spray" is the terminology describing importing data into an HPCC cluster and distributing that data across the cluster's nodes. "Spray Fixed" indicates that the incoming data is comprised of fixed-width records (15 bytes, in your example). The other options are "Spray XML" -- meaning the incoming data is comprised of XML documents -- and "Spray CSV" -- meaning the data is in CSV or really any variable-width record format. HPCC needs to know how records are delimited so that it does not "break" a record across multiple nodes (each record always resides fully on one data node).\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-09-04 12:28:41\" },\n\t{ \"post_id\": 2285, \"topic_id\": 505, \"forum_id\": 10, \"post_subject\": \"Can HPCC handle other format files except with delimiter?\", \"username\": \"dsun\", \"post_text\": \"Hi All,\\n\\nI'm a totally beginner in the ECL, I have some question about the Data Handling:\\n1. What does Spray Fixed really mean?\\n2. Whether HPCC support other format? i.e. Some data files are based on length, 1-10 characters stand for the 1st column, 11-15 for the 2nd, etc.\\n\\nCould anybody give a detail explanation?\\n\\nThanks a lot,\", \"post_time\": \"2012-09-04 06:49:21\" },\n\t{ \"post_id\": 2290, \"topic_id\": 506, \"forum_id\": 10, \"post_subject\": \"Re: Questions when Spray CSV on Ecl watch\", \"username\": \"dsun\", \"post_text\": \"Richard, thanks for you help.\\n\\nOne question is still there, in this case, I have 10 historical raw data files with the same structure, supposed that I spray them once, then I have to add one more filter in the next to remove the other 9 headers (if we use "ds := DATASET('MyFile',MyRec,CSV(SEPARATOR('|'),HEADING(1)));", but it only for the first header), is it correct?\\n\\nWe can not handle this case when Spray CSV, right?\\n\\nThanks,\\nDongliang\", \"post_time\": \"2012-09-04 17:45:35\" },\n\t{ \"post_id\": 2289, \"topic_id\": 506, \"forum_id\": 10, \"post_subject\": \"Re: Questions when Spray CSV on Ecl watch\", \"username\": \"rtaylor\", \"post_text\": \"Dongliang,1. When set the configure info on Ecl watch, there is an option 'Separator:' and the default value is '\\\\,'. But if I changed it to any else ('|' or '\\\\|'), it does not work, it still display '\\\\,' in the 'View Data File' page, is this a bug or I made an incorrect config?
Yes, that's a bug. If you specify spraying with a | delimiter, then you just need to declare the file in your ECL code also with that | delimiter, like this:ds := DATASET('MyFile',MyRec,CSV(SEPARATOR('|')));
2. Does it support to skip the header in the Spray CSV stage? Otherwise, if I have a batch of csv files, I have to do a manually filter later.
No, spray is always going to just spray every record, no matter what. But you can specify skipping the header record(s) in the DATASET declaration (so they won't be included as data), like this:ds := DATASET('MyFile',MyRec,CSV(SEPARATOR('|'),HEADING(1)));
HTH,\\n\\nRichard\", \"post_time\": \"2012-09-04 17:35:51\" },\n\t{ \"post_id\": 2288, \"topic_id\": 506, \"forum_id\": 10, \"post_subject\": \"Questions when Spray CSV on Ecl watch\", \"username\": \"dsun\", \"post_text\": \"Hi All,\\n\\nI'm working on community_3.8.2-1, I have two questions about Spray CSV:\\n\\n1. When set the configure info on Ecl watch, there is an option 'Separator:' and the default value is '\\\\,'. But if I changed it to any else ('|' or '\\\\|'), it does not work, it still display '\\\\,' in the 'View Data File' page, is this a bug or I made an incorrect config?\\n2. Does it support to skip the header in the Spray CSV stage? Otherwise, if I have a batch of csv files, I have to do a manually filter later.\\n\\nThanks,\\nDongliang\", \"post_time\": \"2012-09-04 16:35:17\" },\n\t{ \"post_id\": 2397, \"topic_id\": 524, \"forum_id\": 10, \"post_subject\": \"Re: STD.Str.RemoveSuffix not stripping training spaces.\", \"username\": \"bforeman\", \"post_text\": \"That said, I see the example in the docs looks to be incorrect:\\n\\nIMPORT STD;\\nSTD.STr.RemoveSuffix('a word away','away'); //returns 'a word'\\nSTD.STr.RemoveSuffix('a word a way','away'); //returns 'a word a way'\\n\\n//should be:\\nSTD.STR.RemoveSuffix('a word away','away'); //returns 'a word '\\n
\\n\\nPassed to documentation and development. Thanks again Allan for your post!\\n\\nBob\", \"post_time\": \"2012-09-19 12:16:51\" },\n\t{ \"post_id\": 2396, \"topic_id\": 524, \"forum_id\": 10, \"post_subject\": \"Re: STD.Str.RemoveSuffix not stripping training spaces.\", \"username\": \"bforeman\", \"post_text\": \"Hi Allan,\\n\\nThe RemoveSuffix function returns the source string with the ending text in the suffix parameter removed. If the source string does not end with the suffix, then the source string is returned unchanged.\\n
\\n\\nIt looks like it is working as documented, there is no mention of trimming trailing spaces, so I guess we have to handle that with TRIM.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-09-19 12:08:39\" },\n\t{ \"post_id\": 2393, \"topic_id\": 524, \"forum_id\": 10, \"post_subject\": \"STD.Str.RemoveSuffix not stripping training spaces.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nThe following code returns a length of 4.\\nI was rather expecting the TRIM to have been done for me.\\n\\n\\nIMPORT * FROM STD.Str;\\nOUTPUT(LENGTH(RemoveSuffix('AAA JR','JR'))); // Returns 4\\n
\\n\\n(using GOLD edition, not sure how to tell the version of library I'm using.)\\nYours\\n\\nAllan\", \"post_time\": \"2012-09-19 09:35:52\" },\n\t{ \"post_id\": 4514, \"topic_id\": 551, \"forum_id\": 10, \"post_subject\": \"Re: ECL unit testing\", \"username\": \"michael.krumlauf@lexisnexis.com\", \"post_text\": \"I'm not sure of the general applicability of this little framework but here's what I have come up with so far:\\n\\nEXPORT ECLUnit := MODULE\\n\\n EXPORT assertRec := RECORD\\n BOOLEAN assertCond;\\n STRING failMsg;\\n END;\\n\\t\\n SHARED _testMsg(CONST STRING msg, CONST STRING errType, CONST INTEGER4 line) := msg + ': ' + errType + ' at line ' + line;\\n \\n EXPORT failMsg(CONST STRING msg, CONST INTEGER4 line) := _testMsg(msg, 'FAILURE', line);\\n \\n EXPORT errMsg(CONST STRING msg, CONST INTEGER4 line) := _testMsg(msg, 'ERROR', line);\\n \\n EXPORT runTestsFor(DATASET(assertRec) assertRec) := FUNCTION\\n testResults := OUTPUT(\\n ASSERT(assertRec, assertCond = TRUE, assertRec.failMsg, FAIL), \\n NAMED('RESULTS_DETAIL'), EXTEND);\\n\\t\\t\\t\\n\\t\\tRETURN testResults;\\n\\tEND;\\n\\nEND;\\n
\\n---- and here is a unit test that exercises the framework to test an RPN boolean expression evaluator (not shown):\\n\\nIMPORT Your_Folder.RpnBoolExprEvaluator AS TestSubject;\\nIMPORT * FROM Your_Folder.ECLUnit;\\n\\ndoEval(dsData) := FUNCTIONMACRO\\n RETURN TestSubject.Eval(DATASET(dsData, TestSubject.tokRec));\\nENDMACRO;\\n\\nsimpleUnaryConditions := \\n DATASET(\\n [\\n {doEval([{'TRUE'}]) = TRUE, failMsg('TRUE must be TRUE', __LINE__)} \\n , {doEval([{'FALSE'},{'NOT'}]) = TRUE, failMsg('FALSE/NOT must be TRUE', __LINE__)}\\n , {doEval([{'FALSE'}]) = FALSE, failMsg('FALSE must be FALSE', __LINE__)}\\n , {doEval([{'TRUE'},{'NOT'}]) = FALSE, failMsg('TRUE/NOT must be FALSE', __LINE__)}\\n ], \\n assertRec);\\n\\nrunTestsFor(simpleUnaryConditions);\\n
\\nIf the tests pass, the work unit compiles and runs successfully. \\nIf a test fails, the work unit fails to compile and issues an error message similar to this:\\nSystem error: 100000: TRUE must be TRUE: FAILURE at line 14(0,0)\\n\\nOne of the nice features is that test failures are captured at compile time. \\n\\nIt remains to be seen if this framework/pattern can be applied to more complex cases.\", \"post_time\": \"2013-08-30 13:31:23\" },\n\t{ \"post_id\": 3047, \"topic_id\": 551, \"forum_id\": 10, \"post_subject\": \"Re: ECL unit testing\", \"username\": \"Durai\", \"post_text\": \"Preparing test dataset and writing macros to cross -validate the test data for expected results is one of the approach for unit testing. \\n\\nIMHO, It may not be feasible to build common framework to support unit testing. Would love to see others opinion on this.\", \"post_time\": \"2013-01-03 18:59:39\" },\n\t{ \"post_id\": 3037, \"topic_id\": 551, \"forum_id\": 10, \"post_subject\": \"Re: ECL unit testing\", \"username\": \"kausar89\", \"post_text\": \"Hii Alan,\\n could u please elaborate your answer with help of any small example so that it would get more clear to us...\\nThanks.\", \"post_time\": \"2013-01-02 07:48:32\" },\n\t{ \"post_id\": 2622, \"topic_id\": 551, \"forum_id\": 10, \"post_subject\": \"Re: ECL unit testing\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nFor unit testing I've just created inline datasets containing data that exercise the product I'm developing. All very easy.\\nIts also very easy, with filters, to be able to run a sub-set of the unit tests if your concentrating on a particular feature.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-10-25 16:38:19\" },\n\t{ \"post_id\": 2544, \"topic_id\": 551, \"forum_id\": 10, \"post_subject\": \"Re: ECL unit testing\", \"username\": \"ksviswa\", \"post_text\": \"You could consider using ASSERT statements:\\n\\nASSERT( condition [ , message ] [ , FAIL ] [ , CONST ])ASSERT( recset, condition [ , message ] [ , FAIL ] [\\n, CONST ])
\\n\\nExample (From ECL Language reference, page 127):\\n\\nval1 := 1;\\nval2 := 1;\\nval3 := 2;\\nval4 := 2 : STORED('val4');\\nASSERT(val1 = val2);\\nASSERT(val1 = val2, 'Abc1');\\nASSERT(val1 = val3);\\nASSERT(val1 = val3, 'Abc2');\\nASSERT(val1 = val4);\\nASSERT(val1 = val4, 'Abc3');
\\n\\nRegards\\nViswa\", \"post_time\": \"2012-10-18 13:40:37\" },\n\t{ \"post_id\": 2516, \"topic_id\": 551, \"forum_id\": 10, \"post_subject\": \"Re: ECL unit testing\", \"username\": \"bforeman\", \"post_text\": \"There is no tool for ECL Unit Testing that I am aware of, checking with development to see if one is planned.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-10-12 14:32:51\" },\n\t{ \"post_id\": 2508, \"topic_id\": 551, \"forum_id\": 10, \"post_subject\": \"ECL unit testing\", \"username\": \"jeeves\", \"post_text\": \"Do we have any testing tools or frameworks for testing a ECL based code base? In the hadoop world there is PigUnit for Pig - do we have anything similar here?\", \"post_time\": \"2012-10-12 13:01:30\" },\n\t{ \"post_id\": 2616, \"topic_id\": 557, \"forum_id\": 10, \"post_subject\": \"Re: Update/Insert records in a logical file\", \"username\": \"jeeves\", \"post_text\": \"Hi Richard,\\n\\nThanks for your inputs. Will let you know how it goes..\\n\\nThanks,\", \"post_time\": \"2012-10-25 11:02:00\" },\n\t{ \"post_id\": 2606, \"topic_id\": 557, \"forum_id\": 10, \"post_subject\": \"Re: Update/Insert records in a logical file\", \"username\": \"rtaylor\", \"post_text\": \"jeeves,What would happen if another ECL script(say a reporting script) tries to read the base logical file exactly when the batch job is overwriting that file with the updated set of data?If we are doing the merges once an hour this may not be a rare occurence.
Ah HAH! \\n\\nYou just said "overwriting that file" -- and this is something that can never happen in HPCC, because when you are reading data from a file you are never allowed to write to that same file.
I am under the impression that by using super files I can get around this problem as the hourly batch job can lock the the super file.
But it is not a problem that can ever occur.I am considering using one super file which will always point to the current base data set. Every hour or so I would do the merges to create a new base data set, clear the super file and add the newly created base data set to the super file.
Yes, that will work. The advantage of this is your code is written to just read the superfile, whatever its content.The only con. I am seeing is the storage requirement. I would probably need another script to delete the old base dataset or go for a more complex approach which will involve making the merges less frequent and the queries more complex(looking at both the base and updated data sets).
And the more complex approach is what I was describing in my last post (the advantage being speed of data updates, since you would only rollup a new base file once a day instead of once an hour). The storage requirement just means bigger hard drives and periodic data rollup/purge/backup/backoff processes.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-10-23 17:55:19\" },\n\t{ \"post_id\": 2604, \"topic_id\": 557, \"forum_id\": 10, \"post_subject\": \"Re: Update/Insert records in a logical file\", \"username\": \"jeeves\", \"post_text\": \"
What would happen if another ECL script(say a reporting script) tries to read the base logical file exactly when the batch job is overwriting that file with the updated set of data?If we are doing the merges once an hour this may not be a rare occurence.
\\n\\nI guess this will not be an issue since thor in general will not have concurrent jobs running.\", \"post_time\": \"2012-10-23 16:26:34\" },\n\t{ \"post_id\": 2603, \"topic_id\": 557, \"forum_id\": 10, \"post_subject\": \"Re: Update/Insert records in a logical file\", \"username\": \"jeeves\", \"post_text\": \"Richard,\\n\\nWhat would happen if another ECL script(say a reporting script) tries to read the base logical file exactly when the batch job is overwriting that file with the updated set of data?If we are doing the merges once an hour this may not be a rare occurence.\\n\\nI am under the impression that by using super files I can get around this problem as the hourly batch job can lock the the super file. \\n\\nI am considering using one super file which will always point to the current base data set. Every hour or so I would do the merges to create a new base data set, clear the super file and add the newly created base data set to the super file.\\n\\nThe only con. I am seeing is the storage requirement. I would probably need another script to delete the old base dataset or go for a more complex approach which will involve making the merges less frequent and the queries more complex(looking at both the base and updated data sets).\\n\\nThanks,\\n-David\", \"post_time\": \"2012-10-23 16:20:18\" },\n\t{ \"post_id\": 2602, \"topic_id\": 557, \"forum_id\": 10, \"post_subject\": \"Re: Update/Insert records in a logical file\", \"username\": \"rtaylor\", \"post_text\": \"jeeves,I think and advantage with superfiles is that it provides some transaction support. Any thoughts on that?
Updating and maintaining the DFU metadata for a superfile is done within a transaction frame (see the Programmer's Guide section of articles on Working With Superfiles), but that's as far as "transaction support" goes in ECL.\\n\\nFor your situation, superfiles may or may not be the way to go, since use of superfiles is generally predicated on continuous adds of new records, while your requirement includes updates of existing records. \\n\\nUsing Superfiles would mean a completely different approach to your queries. If you're going to do that, then you would not bother to consolidate your data the way I suggested previously except maybe once a day to produce a new "base file." Your query would have to look for return data in the "base file" AND the "New data" file AND the "Updated Data" file, and then combine the three results while filtering out "base data" records that were also present in the "Updated Data" result set.\\n\\nIt can be done either way, so I would probably start with the simplest solution first and see how well that works. \\n\\nRichard\", \"post_time\": \"2012-10-23 15:24:29\" },\n\t{ \"post_id\": 2593, \"topic_id\": 557, \"forum_id\": 10, \"post_subject\": \"Re: Update/Insert records in a logical file\", \"username\": \"jeeves\", \"post_text\": \"Hi,\\n\\nI found the information above really useful. Meanwhile someone suggested using superfiles. I think and advantage with superfiles is that it provides some transaction support. Any thoughts on that?\\n\\nThanks,\\n-David\", \"post_time\": \"2012-10-23 07:49:03\" },\n\t{ \"post_id\": 2545, \"topic_id\": 557, \"forum_id\": 10, \"post_subject\": \"Re: Update/Insert records in a logical file\", \"username\": \"rtaylor\", \"post_text\": \"jeeves,\\n\\nI would approach both insert and update like this:D_Entity := DATASET('~thor::alldata::entityfile',R_EntityData,CSV);\\nD_Updates := DATASET('~thor::staging::updates',R_EntityData,CSV);\\nD_New := DATASET('~thor::staging::inserts',R_EntityData,CSV);\\n\\nSetUpdates := SET(D_Updates,entityId);\\nD_Joined := D_Entity(entityId NOT IN SetUpdates) + D_Updates + D_New;\\n\\nOUTPUT(D_Joined,R_EntityData,'~thor::alldata::entityfile',CSV());
Filtering the D_Entity to eliminate all the entity records coming in from the D_Updates then appending that to the updates and adds gets you where you need to be. Given that you're doing this once an hour there should be only about 36K records in your updates file. Try it both ways and see which goes faster.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-10-18 13:54:41\" },\n\t{ \"post_id\": 2539, \"topic_id\": 557, \"forum_id\": 10, \"post_subject\": \"Re: Update/Insert records in a logical file\", \"username\": \"bforeman\", \"post_text\": \"Another way to handle updates would be to first treat all updates as new records, in other words, add them as new records, then use ROLLUP to remove the duplicate records and salvage any data that you need from the incoming "updates" in the ROLLUP's TRANSFORM. ROLLUP is also ideal if there are multiple updates of the same record during a given period.\\n\\nThis is a great and important topic, hopefully others can contribute some input to this thread. \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-10-18 12:51:43\" },\n\t{ \"post_id\": 2538, \"topic_id\": 557, \"forum_id\": 10, \"post_subject\": \"Update/Insert records in a logical file\", \"username\": \"jeeves\", \"post_text\": \"Hi,\\n\\nWe have a requirement to continuously update/insert a huge logical file in HPCC with size of the order of 10 - 100 GB. The record is in the format described below(simplified for clarity).\\n\\nR_EntityData := RECORD\\nString entityId;\\nString entityInfoXml;\\nString entityDetailsXml;\\nend;\\n\\nwe will receive inserts/updates notification to this data at the rate of around 10 updates a second. We are planning to write an hourly Job that will consolidate the updates and then update/insert the data in HPCC. The unique identifier is the entityId.\\n\\nInsert appears to be simple(pseudo code below) but it is not clear what is the \\nbest way to handle updates to existing records. During an update a part or the whole record could get updated. Th entityId(unique key) will not be updated. I am assuming that a LEFT OUTER JOIN on the entityId attribute will do the job. If there is a more efficient way I would like to know it.\\n\\n\\nInsert pseudo code:\\n\\n
D_Entity := DATASET('~thor::alldata::entityfile',R_EntityData,CSV);\\nD_New := DATASET('~thor::staging::inserts',R_EntityData,CSV);\\n\\nD_Merged := MERGE(D_Entity,D_New,SORTED(entityId);\\n\\nOUTOUT(D_Merged,R_EntityData,'~thor::alldata::entityfile',CSV());
\\n\\nJoin pseudo code:\\n\\nD_Entity := DATASET('~thor::alldata::entityfile',R_EntityData,CSV);\\nD_Updates := DATASET('~thor::staging::updates',R_EntityData,CSV);\\n\\nD_Joined := JOIN(D_Entity,D_Updates,LEFT.entityId = RIGHT.entityId,copyChanges(LEFT,RIGHT), LEFT OUTER);\\nOUTOUT(D_Joined,R_EntityData,'~thor::alldata::entityfile',CSV());
\", \"post_time\": \"2012-10-18 12:15:15\" },\n\t{ \"post_id\": 3378, \"topic_id\": 575, \"forum_id\": 10, \"post_subject\": \"Re: Spraying a large data set\", \"username\": \"Allan\", \"post_text\": \"Thanks for the quick reply.\\n\\nActually 'SprayXML' seems to be working fine now - not sure what I changed, if anything, to get it to work.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2013-02-07 15:37:49\" },\n\t{ \"post_id\": 3374, \"topic_id\": 575, \"forum_id\": 10, \"post_subject\": \"Re: Spraying a large data set\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nThe functions and the command line should operate similarly -- just try using the standard DOS wildcards (* ?) in the filename parameter to spray multiple files to a single logical HPCC dataset.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-02-07 15:18:54\" },\n\t{ \"post_id\": 3372, \"topic_id\": 575, \"forum_id\": 10, \"post_subject\": \"Re: Spraying a large data set\", \"username\": \"Allan\", \"post_text\": \"Hi Richard,\\n\\nI'm having trouble spraying in a XML file, I'm attempting to use library function SprayXML. I know my XML is valid as I can access it using FROMXML. and it passes 'xmllint' on Unix. However, in production, I'll have to access from a file.\\n\\nI wont go into the details of the failure here as I've been reading the notes in this thread, thinking I could access the XML as a BLOB.\\n\\nI see you can use command line options to spray in a BLOB but what is the equivalent library function?\\nThe three documentation ones, SprayFixed,SprayVariable and SprayXML don't seem to fit.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2013-02-07 15:10:01\" },\n\t{ \"post_id\": 2671, \"topic_id\": 575, \"forum_id\": 10, \"post_subject\": \"Re: Spraying a large data set\", \"username\": \"rtaylor\", \"post_text\": \"Great to hear that my "theoretical" test case properly aligns with the "practical" implementation in the Real World! \", \"post_time\": \"2012-10-31 16:36:57\" },\n\t{ \"post_id\": 2664, \"topic_id\": 575, \"forum_id\": 10, \"post_subject\": \"Re: Spraying a large data set\", \"username\": \"jeeves\", \"post_text\": \"This worked. Thanks!\", \"post_time\": \"2012-10-31 06:18:45\" },\n\t{ \"post_id\": 2625, \"topic_id\": 575, \"forum_id\": 10, \"post_subject\": \"Re: Spraying a large data set\", \"username\": \"rtaylor\", \"post_text\": \"David,\\n\\nTake a look at the "Working with BLOBs" article in the Programmer's guide. That will show you how to accomplish the first step. Here's the way I just tested it.\\n\\nFirst I needed to spray the multiple XML files to a single logical file using DFUplus.exe at the command line, like this:
\\ndfuplus action=spray server=http://10.173.248.7:8010/ username=rtaylor overwrite=1 replicate=1 srcip=10.173.248.7 srcfile=/mnt/disk1/var/lib/HPCCSystems/dropzone/*.xml dstcluster=mythor dstname=RTTEST::XML::xmldb PREFIX=FILENAME,FILESIZE nosplit=1
And then I needed to define the logical file and see if I could read the records:Rec := RECORD\\n STRING filename;\\n STRING XMLtxt{MAXLENGTH(55000)};\\nEND;\\nXMLData := DATASET('~rttest::xml::xmldb',Rec,FLAT);\\n\\nXMLData;
That's as far as I went in testing it because your next step is simply to use the XML form of PARSE to parse your data any way you need to.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-10-25 18:14:28\" },\n\t{ \"post_id\": 2617, \"topic_id\": 575, \"forum_id\": 10, \"post_subject\": \"Spraying a large data set\", \"username\": \"jeeves\", \"post_text\": \"Hi,\\n\\nWe have a requirement to spray around 3 - 30 million xml files. Each xml will be about a 100 KB in size. I would like to store all the xml files in one logical file in the following format.\\n\\ninitialFormat := RECORD\\n\\nSTRING fileId;\\nSTRING fileContentXML;\\nEND;
\\nLater i would like to get to the following format\\n\\nfinalFormat := RECORD\\n\\nSTRING fileid;\\nSTRING sectionName;\\nString sectionContentXML;\\n\\nEND;
\\nIn the final format we have the file split into logical sections.\\n\\nIt is fine if we can directly get the "finalFormat" without first getting the data into the "initialFormat".\\n\\nAt this point I am not sure how to go about this..\\n\\nThanks,\\n-David\", \"post_time\": \"2012-10-25 11:32:58\" },\n\t{ \"post_id\": 2719, \"topic_id\": 590, \"forum_id\": 10, \"post_subject\": \"Re: string SplitWords function getting error\", \"username\": \"rtaylor\", \"post_text\": \"gopi,\\n\\nThis code works for me:IMPORT STD;\\nStr := 'a word a day keeps the grammarian away';\\nSTD.Str.SplitWords(Str, ' ');
Does it also work on your machine?\\n\\nRichard\", \"post_time\": \"2012-11-07 14:15:57\" },\n\t{ \"post_id\": 2705, \"topic_id\": 590, \"forum_id\": 10, \"post_subject\": \"Re: string SplitWords function getting error\", \"username\": \"DSC\", \"post_text\": \"Well, the ECL compiler is finding the standard library directory (std) and the string library file (Str.ecl) but there is apparently something wrong with the contents of that file. Or you don't have permissions. Or it's a very old version, before that function was available (not that I know if such a thing existed).\\n\\nAre you using the IDE or Eclipse? If you're using a version of the IDE older than 3.8.6 then you might want to consider installing the newer version; that should fix any standard library problems as well as give you various bug fixes and upgrades. Otherwise, I'll have to punt and let the HPCC folks supply a better answer.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-11-06 16:10:02\" },\n\t{ \"post_id\": 2703, \"topic_id\": 590, \"forum_id\": 10, \"post_subject\": \"Re: string SplitWords function getting error\", \"username\": \"gopi\", \"post_text\": \"Error: Unknown identifier "SplitWords" (5, 9)\", \"post_time\": \"2012-11-06 15:56:05\" },\n\t{ \"post_id\": 2702, \"topic_id\": 590, \"forum_id\": 10, \"post_subject\": \"Re: string SplitWords function getting error\", \"username\": \"DSC\", \"post_text\": \"That code runs cleanly on my system under Thor. What error are you seeing?\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-11-06 15:53:01\" },\n\t{ \"post_id\": 2701, \"topic_id\": 590, \"forum_id\": 10, \"post_subject\": \"string SplitWords function getting error\", \"username\": \"gopi\", \"post_text\": \"Hi,\\n\\ni am using string Split function, getting the error for the function of SplitWords. can any one give the suggestion to solve. \\n \\nIMPORT Std;\\nstr1 := 'a word a day keeps the doctor away';\\na := STD.STr.SplitWords(str1,' ');\\noutput(a);\\n\\nThanks in advance.\", \"post_time\": \"2012-11-06 15:39:44\" },\n\t{ \"post_id\": 2721, \"topic_id\": 591, \"forum_id\": 10, \"post_subject\": \"Re: how to convert XMLTEXT to UNICODE\", \"username\": \"rtaylor\", \"post_text\": \"gopi,In above code last line getting error. \\nError: XMLTEXT is only legal in a PARSE transform (58, 2)
This error is telling you that the XMLTEXT function is only valid for use when you're using it in the RECORD structure or TRANSFORM function that is associated with a PARSE.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-11-07 14:31:35\" },\n\t{ \"post_id\": 2706, \"topic_id\": 591, \"forum_id\": 10, \"post_subject\": \"Re: how to convert XMLTEXT to UNICODE\", \"username\": \"DSC\", \"post_text\": \"You might want to try using XMLUNICODE(xmltag) instead of XMLTEXT(xmltag) to parse the data. Or use the TOUNICODE built-in function.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-11-06 17:49:47\" },\n\t{ \"post_id\": 2704, \"topic_id\": 591, \"forum_id\": 10, \"post_subject\": \"how to convert XMLTEXT to UNICODE\", \"username\": \"gopi\", \"post_text\": \"Hi, \\n\\ni am doing xml parsing in transform function. In transform, i want to convert XMLTEXT to UNICODE.\\n\\nmy code :\\nline_to := DATASET([{U'the;nexis;lexis'}],{UNICODE to});\\n\\nthe above line execute well.\\n\\nstring str_to := xmltext('Tags/Tag[@TagName="#To"]/@TagValue')[1..2000];\\n\\tUNICODE temp_to := (UNICODE)str_to;\\n\\tline_to := DATASET([{temp_to}],{UNICODE mail_to});\\n\\nIn above code last line getting error. \\nError: XMLTEXT is only legal in a PARSE transform (58, 2)\\n\\nThanks in advance.\", \"post_time\": \"2012-11-06 16:05:02\" },\n\t{ \"post_id\": 3315, \"topic_id\": 594, \"forum_id\": 10, \"post_subject\": \"Re: delta-base\", \"username\": \"jeeves\", \"post_text\": \"Richard,\\n\\nThis looks great! I will try to get this built and check it out.\\n\\n\\nThanks,\\n-David\", \"post_time\": \"2013-02-04 11:50:35\" },\n\t{ \"post_id\": 3309, \"topic_id\": 594, \"forum_id\": 10, \"post_subject\": \"Re: delta-base\", \"username\": \"richardkchapman\", \"post_text\": \"The master branch on github at https://github.com/hpcc-systems/HPCC-Platform contains the support for embedding calls to Java (and Javascript, Python and R).\\n\\nA trivial example would look something like this:\\n\\nJava:\\n\\npublic class JavaCat\\n{\\n public static String cat(String a, String b)\\n {\\n return a + b;\\n }\\n}\\n
\\n\\nECL:\\n\\nIMPORT java;\\n\\nstring cat(string s1, string s2) := IMPORT(java, 'JavaCat.cat:(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;');\\n\\ncat('Hello', ' world');\\n
\", \"post_time\": \"2013-02-04 09:10:45\" },\n\t{ \"post_id\": 3304, \"topic_id\": 594, \"forum_id\": 10, \"post_subject\": \"Re: delta-base\", \"username\": \"flavio\", \"post_text\": \"[quote="jeeves":5ghxf3tt]Flavio,\\n\\nAre there any plans to standardize any of these plugins? I think a plugin that provides a database interface(like JDBC/ODBC) will be a good addition to ECL.\\n\\nAlternatively, is there any way I can code extensions to HPCC in Java. My team is skilled in Java and that is why I was looking at PIPE.\\n\\nThanks,\\n-David\\n\\nDavid,\\n\\nYou are absolutely right! As a matter of fact, I was just discussing this morning with Arjuna the possibility of releasing a standard Deltabase connection plugin (perhaps just for MySQL and MariaDB, which is what we normally use), but we haven't even started on this (keep in mind that we love contributions from the community!).\\n\\nRegarding embedding Java, Richard has started work to embed other programming languages, including Java, Javascript, Python and R (and he's expecting a pull request from me to add Haskell too ). This support will be "officially" available on 4.0 (expect it around April), but if you're courageous enough, and nice to Richard, he may give you the GitHub location of his development branch, where actual working code is
\\n\\nFlavio\", \"post_time\": \"2013-02-01 20:29:13\" },\n\t{ \"post_id\": 3293, \"topic_id\": 594, \"forum_id\": 10, \"post_subject\": \"Re: delta-base\", \"username\": \"jeeves\", \"post_text\": \"Flavio,\\n\\nAre there any plans to standardize any of these plugins? I think a plugin that provides a database interface(like JDBC/ODBC) will be a good addition to ECL.\\n\\nAlternatively, is there any way I can code extensions to HPCC in Java. My team is skilled in Java and that is why I was looking at PIPE.\\n\\nThanks,\\n-David\", \"post_time\": \"2013-02-01 12:21:43\" },\n\t{ \"post_id\": 3288, \"topic_id\": 594, \"forum_id\": 10, \"post_subject\": \"Re: delta-base\", \"username\": \"flavio\", \"post_text\": \"I would try to avoid using PIPE from within Roxie, as the performance of the individual query will be depending on your external piece of code (OS scheduler, number of cores, etc.).\\n\\nI would say that it's better to create an ECL plugin to handle these types of remote connections from within a Roxie query (at least, this is what we do for our own systems using a delta-base).\\n\\nFlavio\", \"post_time\": \"2013-02-01 11:41:09\" },\n\t{ \"post_id\": 3284, \"topic_id\": 594, \"forum_id\": 10, \"post_subject\": \"Re: delta-base\", \"username\": \"jeeves\", \"post_text\": \"I am thinking that using PIPE to invoke a JDBC client is an option here. Any thoughts?\", \"post_time\": \"2013-02-01 10:55:36\" },\n\t{ \"post_id\": 3283, \"topic_id\": 594, \"forum_id\": 10, \"post_subject\": \"Re: delta-base\", \"username\": \"jeeves\", \"post_text\": \"Are there any best practices or patterns to implement a query that retrieves data from both HPCC and deltabase?\", \"post_time\": \"2013-02-01 10:51:26\" },\n\t{ \"post_id\": 2718, \"topic_id\": 594, \"forum_id\": 10, \"post_subject\": \"Re: delta-base\", \"username\": \"rtaylor\", \"post_text\": \"David,\\n\\nTo expand a bit on what Flavio said, the purpose of implementing a "deltabase" is to allow an HPCC system to closely emulate an OLTP system. If you direct new data to an SQL database, build your Roxie queries to search that SQL database in addition to your Roxie data, and create operational processes to periodically "flush" the SQL data and integrate it into your Roxie data (like once/day, once/hour, every 10 minutes -- whatever period suits your particular situation), then you will have effectively extended our "batch mode" HPCC technology to handle OLTP situations.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-11-07 14:09:53\" },\n\t{ \"post_id\": 2717, \"topic_id\": 594, \"forum_id\": 10, \"post_subject\": \"Re: delta-base\", \"username\": \"flavio\", \"post_text\": \"David,\\n\\nYou are correct. And you will probably also want to integrate the ability to query that SQL server from within your Roxie query, if your business logic requires it.\\n\\nFlavio\", \"post_time\": \"2012-11-07 13:43:56\" },\n\t{ \"post_id\": 2716, \"topic_id\": 594, \"forum_id\": 10, \"post_subject\": \"Re: delta-base\", \"username\": \"jeeves\", \"post_text\": \"Flavio,\\n\\nSo if I understand this right this is not an out-of-the-box kind of functionality. I probably have to set up an SQL server and then write a job which will periodically read data from the database and spray it into HPCC.\\n\\nThanks,\\n-David\", \"post_time\": \"2012-11-07 13:34:41\" },\n\t{ \"post_id\": 2715, \"topic_id\": 594, \"forum_id\": 10, \"post_subject\": \"Re: delta-base\", \"username\": \"flavio\", \"post_text\": \"[quote="jeeves":3d3ln3ug]I have heard that HPCC has a tool called deltabase to handle realtime updates to data in a HPCC cluster. But I cannot find information about it online or in HPCC documentation. Can someone throw some light on this?\\n\\nIn the hadoop world the equivalent would be HBASE. So if there is some other HPCC equivalent to HBASE I would like to hear about that too.\\n"Deltabase" uses an external MySQL database to provide with real-time and transactional updates to otherwise read-only Roxie data. Moreover, there is nothing special about the external database used, and if ACID compliance is not required, you could resort to any distributed and scalable key/value store. \\n\\nIn addition to this, data updates stored in the deltabase are rolled into Roxie on a regular basis, by pushing them through the back-end Thor system, for merging and indexing. This process can be done as frequently as needed.\\n\\nPlease let me know if this helps.\\n\\nThanks,\\n\\nFlavio\", \"post_time\": \"2012-11-07 13:28:43\" },\n\t{ \"post_id\": 2710, \"topic_id\": 594, \"forum_id\": 10, \"post_subject\": \"delta-base\", \"username\": \"jeeves\", \"post_text\": \"I have heard that HPCC has a tool called deltabase to handle realtime updates to data in a HPCC cluster. But I cannot find information about it online or in HPCC documentation. Can someone throw some light on this?\\n\\nIn the hadoop world the equivalent would be HBASE. So if there is some other HPCC equivalent to HBASE I would like to hear about that too.\", \"post_time\": \"2012-11-07 08:19:15\" },\n\t{ \"post_id\": 2925, \"topic_id\": 615, \"forum_id\": 10, \"post_subject\": \"Re: Roxie in one click thor - AWS\", \"username\": \"bforeman\", \"post_text\": \"I think Richard would like to see the slave logs.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-12-03 13:17:54\" },\n\t{ \"post_id\": 2923, \"topic_id\": 615, \"forum_id\": 10, \"post_subject\": \"Re: Roxie in one click thor - AWS\", \"username\": \"jeeves\", \"post_text\": \"Hi,\\n\\nI will post the logs when I get hold of them. Is there any specific log file you \\nwould like to see, if as I assume there are many types of logs.\\n\\nThanks,\\nDavid.\", \"post_time\": \"2012-12-03 09:27:28\" },\n\t{ \"post_id\": 2871, \"topic_id\": 615, \"forum_id\": 10, \"post_subject\": \"Re: Roxie in one click thor - AWS\", \"username\": \"bforeman\", \"post_text\": \"Ok thanks, as Richard requested, we will probably have to look at your logs.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-11-26 15:50:37\" },\n\t{ \"post_id\": 2870, \"topic_id\": 615, \"forum_id\": 10, \"post_subject\": \"Re: Roxie in one click thor - AWS\", \"username\": \"jeeves\", \"post_text\": \"I do not get this error with a 1 thor + 3 roxie setup. But with a 7 thor + 3 roxie setup I get this error.\\n\\n
Is this happening with all of your queries, or just a specific one
\\nI think I get this with all roxie queries but I will have to experiment more to accurately answer this question.\\n\\nI have tried republishing. But it did not fix the issue.\", \"post_time\": \"2012-11-26 15:25:46\" },\n\t{ \"post_id\": 2867, \"topic_id\": 615, \"forum_id\": 10, \"post_subject\": \"Re: Roxie in one click thor - AWS\", \"username\": \"richardkchapman\", \"post_text\": \"I don't think you would get this error if the query had not been deployed to all slaves.\\n\\nYou CAN get this if slaves fail to respond, either because they are dying while trying to process the query, or because of some network issues. Would need to look at the logs to tell much more.\\n\\nRichard\", \"post_time\": \"2012-11-26 15:01:15\" },\n\t{ \"post_id\": 2864, \"topic_id\": 615, \"forum_id\": 10, \"post_subject\": \"Re: Roxie in one click thor - AWS\", \"username\": \"bforeman\", \"post_text\": \"Hi David,\\n\\nIs this happening with all of your queries, or just a specific one? Have you tried deleting it and re-publishing it? I have never seen this error but it almost looks like parts of the query did not get published to one of the nodes.\\\\\\n\\nChecking with development.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-11-26 14:07:30\" },\n\t{ \"post_id\": 2820, \"topic_id\": 615, \"forum_id\": 10, \"post_subject\": \"Roxie in one click thor - AWS\", \"username\": \"jeeves\", \"post_text\": \"Hi,\\n\\nWhen trying to invoke a query published in Roxie(Amazon AWS - One Click Thor) we got the following error.\\n\\n− <online_retrieval_v1Response>\\n− <Result>\\n− <Exception>\\n<Source>Roxie</Source>\\n<Code>1406</Code>\\n<Message>Failed to get response from slave(s) for uid=0x00000003 activityId=2 pri=LOW queryHash=3cbd15d724d3e54c ch=1 seq=0 cont=0 server=10.244.152.25 retries=400f BROADCAST in activity 2</Message>\\n</Exception>\\n</Result>\\n</online_retrieval_v1Response>
\\n\\nIs anyone familiar with this kind of error.\\n\\nThanks,\\n-David\", \"post_time\": \"2012-11-19 17:28:04\" },\n\t{ \"post_id\": 2875, \"topic_id\": 622, \"forum_id\": 10, \"post_subject\": \"Re: issue constructing ID's within MACROS\", \"username\": \"bforeman\", \"post_text\": \"Hi Allan,\\n\\nYou are concatenating right? So does the following work:\\n\\nx+#EXPAND(typ)(LEFT));
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-11-26 17:18:51\" },\n\t{ \"post_id\": 2838, \"topic_id\": 622, \"forum_id\": 10, \"post_subject\": \"issue constructing ID's within MACROS\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI have an issue creating identifiers using the #EXPAND within a MACRO.\\nif parameter to a macro typ= 'def'\\n\\nabc#EXPAND(typ)\\n\\ndoes not syntax check. (and does not generate 'abcdef')\\n\\nThe specific example I have below works fine:\\n\\nMAC_Record_Convert(attribute,typ) := MACRO\\nLayouts.#EXPAND(typ) #EXPAND(typ)(R pInput) := TRANSFORM\\n self := pInput;\\nEND;\\nEXPORT #EXPAND(attribute)(STRING pfilter='') := PROJECT(FilteredTestData(pfilter)(RecordType = '#EXPAND(typ)'),#EXPAND(typ)(LEFT));\\nENDMACRO;\\n
\\nThe name of the constructed TRANSFORM function ending up as 'typ'.\\n\\nHowever the following macro does not compile, when I attempt to create the TRANSFORM with a name x'typ'\\n\\nMAC_Record_Convert(attribute,typ) := MACRO\\nLayouts.#EXPAND(typ) x#EXPAND(typ)(R pInput) := TRANSFORM\\n self := pInput;\\nEND;\\nEXPORT #EXPAND(attribute)(STRING pfilter='') := PROJECT(FilteredTestData(pfilter)(RecordType = '#EXPAND(typ)'),x#EXPAND(typ)(LEFT));\\nENDMACRO;\\n
\\n\\nAny idea's why this is?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-11-22 11:31:18\" },\n\t{ \"post_id\": 3310, \"topic_id\": 626, \"forum_id\": 10, \"post_subject\": \"Re: Issue waiting on multiple events.\", \"username\": \"Allan\", \"post_text\": \"Hi Richard,\\n\\nThanks for continuing with this.\\n\\nSome points:\\nFor me the code at the end of page 2 syntax checks regardless if the '-legacy' qualifier is present or not.\\n\\nIf I start having to use 'WHEN' will this workaround be untenable, I just don't know.\\nIn the long run, the original problem with multiple events will be fixed and we can dispense with with this workaround.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2013-02-04 09:49:23\" },\n\t{ \"post_id\": 3298, \"topic_id\": 626, \"forum_id\": 10, \"post_subject\": \"Re: Issue waiting on multiple events.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nOK, using a 3.10 environment with a central repository (requiring the -legacy compiler switch) your code syntax checks. However, you should be aware that at some point that -legacy switch will probably go away and you'll have to re-write to use WHEN at that point.\\n\\nNow that I have an environment similar to yours, perhaps I can actually start to work the issue \\n\\nI'll see what I can find,\\n\\nRichard\", \"post_time\": \"2013-02-01 15:36:51\" },\n\t{ \"post_id\": 3297, \"topic_id\": 626, \"forum_id\": 10, \"post_subject\": \"Re: Issue waiting on multiple events.\", \"username\": \"Allan\", \"post_text\": \"Hi Richard,\\n\\nI'm using a central repository, though the BWR pane I'm using is not in the repository, its just a temporary BWR.\\n\\nInformation from 'About ECL IDE' is:\\n\\nVersion: 6.10.0.1001.3.8\\nServer: 3.8.2 (rc3)\\n\\nYours\\n\\nAllan\", \"post_time\": \"2013-02-01 15:29:19\" },\n\t{ \"post_id\": 3295, \"topic_id\": 626, \"forum_id\": 10, \"post_subject\": \"Re: Issue waiting on multiple events.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nOK, on internal_3.10.0-1 I get this error:
Error: WHEN must be used to associated an action with a definition (26, 11), 2325
Are you using a central code repository or OSS-style local repository?\\n\\nRichard\", \"post_time\": \"2013-02-01 14:30:41\" },\n\t{ \"post_id\": 3281, \"topic_id\": 626, \"forum_id\": 10, \"post_subject\": \"Re: Issue waiting on multiple events.\", \"username\": \"Allan\", \"post_text\": \"Hi Richard,\\n\\nYes types must agree, but I'm trying to call a function not return a string.\\n\\nIts running on version 'internal_3.8.2-3rc'\\n\\nYours\\n\\nAllan\", \"post_time\": \"2013-02-01 09:21:12\" },\n\t{ \"post_id\": 3273, \"topic_id\": 626, \"forum_id\": 10, \"post_subject\": \"Re: Issue waiting on multiple events.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,The 'wierd' line of code is:\\n... \\nnote how on the 'true' side of the if statement I cannot put (), and a 'false' side has to be supplied for this code to pass the syntax check.
Not weird at all. Since jobname is a string and not a function, it doesn't need (or want) parentheses. The second and third parameters to IF must agree, so the third must also be a string, and you elected to create a DUMMY function to return an empty string. You always have to call functions with parentheses, whether you're passing parameters or not, so that's why you have to have them. \\n\\nYou could eliminate the DUMMY() function and your code would be:RETURN IF (count(msgs) = pcnt, jobname,'');
\\nRegarding the big issue, I can't get your code to pass syntax check -- it's asking for a WHEN on line 26. Are you running this on a legacy system?\\n\\nRichard\", \"post_time\": \"2013-01-31 15:35:25\" },\n\t{ \"post_id\": 3268, \"topic_id\": 626, \"forum_id\": 10, \"post_subject\": \"Re: Issue waiting on multiple events.\", \"username\": \"Allan\", \"post_text\": \"Hi Richard,\\n\\nThe code below syntax checks and runs and processes events correctly, the only thing it does not do is run the dependent tasks.\\nThe 'wierd' line of code is:\\n\\nRETURN IF (count(msgs) = pcnt, jobname,DUMMY());\\n
\\nnote how on the 'true' side of the if statement I cannot put (), and a 'false' side has to be supplied for this code to pass the syntax check.\\nThe complete code:\\n\\n A1 () := FUNCTION\\n RETURN OUTPUT('Dependent job A1 Complete');\\n END;\\n \\n A2 () := FUNCTION\\n RETURN OUTPUT('Dependent job A2 Complete');\\n END;\\n\\n DUMMY () := FUNCTION\\n RETURN '';\\n END;\\n\\nnoteReceived(jobname,name) := FUNCTIONMACRO\\n logRecord := { string msg };\\n RETURN output(dataset([name], logRecord),NAMED('Received_'+jobname), extend);\\nENDMACRO;\\n\\ncheckComplete(jobname,pcnt) := FUNCTIONMACRO\\n logRecord := { string msg };\\n msgs := DATASET(WORKUNIT('Received_'+jobname),logRecord);\\n RETURN IF (count(msgs) = pcnt, jobname,DUMMY());\\nENDMACRO;\\n\\nprocessReceived(string jobname,string name,integer pcnt) := FUNCTION\\n noteReceived(jobname,name);\\n RETURN checkComplete(jobname,pcnt);\\nEND;\\n\\nMAC_EventActionSetup(InXmlText) := MACRO\\n LOADXML(InXmlText);\\n #DECLARE(cntEvents);\\n #DECLARE(OutputStr)\\n #SET(OutputStr, '' )\\n\\n #FOR(Job)\\n #SET(cntEvents,0);\\n #FOR(Events)\\n #SET(cntEvents,%cntEvents%+1);\\n #END\\n #FOR(Events)\\n #APPEND(OutputStr,'processReceived(\\\\''+%'Action'%+'\\\\',\\\\''+%'Name'%+'\\\\','+%cntEvents%+') : WHEN(\\\\''+%'Name'%+'\\\\',COUNT(1));\\\\n' )\\n #END\\n #END\\n //%'OutputStr'% //show me the generated code\\n %OutputStr% //run the generated code\\n \\nENDMACRO;\\n\\n//--------------------------------------------\\nx := '<ROW>'\\n +' <Job>'\\n +' <Events>'\\n +' <Name>Prerequisite_1</Name>'\\n +' </Events>'\\n +' <Events>'\\n +' <Name>Prerequisite_2</Name>'\\n +' </Events>'\\n +' <Action>A1</Action>'\\n +' </Job>'\\n +' <Job>'\\n +' <Events>'\\n +' <Name>Prerequisite_1</Name>'\\n +' </Events>'\\n +' <Events>'\\n +' <Name>Prerequisite_3</Name>'\\n +' </Events>'\\n +' <Events>'\\n +' <Name>Prerequisite_4</Name>'\\n +' </Events>'\\n +' <Action>A2</Action>'\\n +' </Job>'\\n +'</ROW>';\\n \\n MAC_EventActionSetup(x);\\n
\\n\\nWe must be nearly there?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2013-01-31 12:54:53\" },\n\t{ \"post_id\": 3191, \"topic_id\": 626, \"forum_id\": 10, \"post_subject\": \"Re: Issue waiting on multiple events.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nYour code should be: A1 () := FUNCTION\\n return OUTPUT('Dependent job A1 Complete');\\n END;\\n \\n A2 () := FUNCTION\\n return OUTPUT('Dependent job A2 Complete');\\n END;\\n MAC_EventActionSetup(x); \\n
\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-01-25 18:09:56\" },\n\t{ \"post_id\": 3189, \"topic_id\": 626, \"forum_id\": 10, \"post_subject\": \"Re: Issue waiting on multiple events.\", \"username\": \"Allan\", \"post_text\": \"I've upgraded to 3.10 of the IDE and client tools and now the code I posted on the Fri Jan 18, 2013 11:58 am above fails with:\\n\\nError: syntax error near "END" : expected APPLY, _ARRAY_, BIG_ENDIAN, BUILD, DISTRIBUTION, EMBEDDED, expression, EXPORT, FEATURE, IMPORT, KEYDIFF, KEYPATCH, _LINKCOUNTED_, LITTLE_ENDIAN, LOADXML, NOTIFY, OUTPUT, PACKED, PARALLEL, PATTERN, RANGE, RECORD, RETURN, ROWSET, RULE, SET, SHARED, <typename>, STREAMED, SUCCESS, TOKEN, TRANSFORM, TYPEOF, UNSIGNED, UPDATE, WAIT, WILD, <, datarow, identifier, action, constant, #CONSTANT, #OPTION, #WORKUNIT, #STORED, #LINK, #ONWARNING, '^', ';', '$' (65, 5), 3002, \\n
\\n\\nAny ideas why?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2013-01-25 16:33:23\" },\n\t{ \"post_id\": 3186, \"topic_id\": 626, \"forum_id\": 10, \"post_subject\": \"Re: Issue waiting on multiple events.\", \"username\": \"Allan\", \"post_text\": \"Hi Richard,\\n\\nThe initial 'Prerequisite' jobs are started by CRON during the night.\\nAll subsequent dependent jobs will be waiting on events named 'id<rundate>'.\\n\\nAt the moment I'm not getting time to work on this, I only get odd moments, will try to concentrate on this next week.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2013-01-25 16:01:35\" },\n\t{ \"post_id\": 3127, \"topic_id\": 626, \"forum_id\": 10, \"post_subject\": \"Re: Issue waiting on multiple events.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nThis code does not launch the prerequisite jobs, only the final job, so how are the prerequisite jobs being launched? \\n\\nWhat happens with this code when you simply run the prerequisite jobs? Do the proper events fire?\\n\\nRichard\", \"post_time\": \"2013-01-18 14:36:33\" },\n\t{ \"post_id\": 3126, \"topic_id\": 626, \"forum_id\": 10, \"post_subject\": \"Re: Issue waiting on multiple events.\", \"username\": \"Allan\", \"post_text\": \"Thanks Richard\\n\\nWith a bit of tweaking, your code almost works.\\n\\nI've tweaked slightly, so the NOTIFY uses as a parameter 'Begin_A1', and the 'Received' datsets are distinguishable by 'jobname'.\\n\\nnoteReceived(jobname,name) := FUNCTIONMACRO\\n logRecord := { string msg };\\n RETURN output(dataset([name], logRecord),NAMED('Received_'+jobname), extend);\\nENDMACRO;\\n\\ncheckComplete(jobname,pcnt) := FUNCTIONMACRO\\n logRecord := { string msg };\\n msgs := DATASET(WORKUNIT('Received_'+jobname),logRecord);\\n // RETURN IF (count(msgs) = pcnt, jobname);\\n RETURN IF (count(msgs) = pcnt, NOTIFY('Begin_'+jobname,''));\\n //RETURN IF (count(msgs) = pcnt, EVALUATE(jobname));\\nENDMACRO;\\n\\nprocessReceived(string jobname,string name,integer pcnt) := FUNCTION\\n RETURN [noteReceived(jobname,name); checkComplete(jobname,pcnt)];\\nEND;\\n\\nMAC_EventActionSetup(InXmlText) := MACRO\\n LOADXML(InXmlText);\\n #DECLARE(cntEvents);\\n #DECLARE(OutputStr)\\n #SET(OutputStr, '' )\\n\\n #FOR(Job)\\n #SET(cntEvents,0);\\n #FOR(Events)\\n #SET(cntEvents,%cntEvents%+1);\\n #END\\n #FOR(Events)\\n #APPEND(OutputStr,'processReceived(\\\\''+%'Action'%+'\\\\',\\\\''+%'Name'%+'\\\\','+%cntEvents%+') : WHEN(\\\\''+%'Name'%+'\\\\',COUNT(1));\\\\n' )\\n #END\\n #END\\n //%'OutputStr'% //show me the generated code\\n %OutputStr% //run the generated code\\n \\nENDMACRO;\\n\\n//--------------------------------------------\\nx := '<ROW>'\\n +' <Job>'\\n +' <Events>'\\n +' <Name>Prerequisite_1</Name>'\\n +' </Events>'\\n +' <Events>'\\n +' <Name>Prerequisite_2</Name>'\\n +' </Events>'\\n +' <Action>A1</Action>'\\n +' </Job>'\\n +' <Job>'\\n +' <Events>'\\n +' <Name>Prerequisite_1</Name>'\\n +' </Events>'\\n +' <Events>'\\n +' <Name>Prerequisite_3</Name>'\\n +' </Events>'\\n +' <Events>'\\n +' <Name>Prerequisite_4</Name>'\\n +' </Events>'\\n +' <Action>A2</Action>'\\n +' </Job>'\\n +'</ROW>';\\n \\n A1 () := FUNCTION\\n OUTPUT('Dependent job A1 Complete');\\n END;\\n \\n A2 () := FUNCTION\\n OUTPUT('Dependent job A2 Complete');\\n END;\\n MAC_EventActionSetup(x); \\n
\\n\\nI then use WHEN on the actual jobs:\\n\\nOUTPUT('Dependent job A1 Complete') : WHEN ('Begin_A1');\\nOUTPUT('Dependent job A2 Complete') : WHEN ('Begin_A2');\\n
\\n\\nUnfortunately the actual WU scheduling the jobs does not 'Complete' but remains in a 'Wait' state indefinitely.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2013-01-18 11:58:05\" },\n\t{ \"post_id\": 3113, \"topic_id\": 626, \"forum_id\": 10, \"post_subject\": \"Re: Issue waiting on multiple events.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nOK. See how much further down the road this attempt gets us:\\nnoteReceived(jobname,name) := FUNCTIONMACRO\\n\\tlogRecord := { string msg };\\n\\tRETURN output(dataset([name], logRecord),NAMED('Received_'+#TEXT(jobname)), extend);\\nENDMACRO;\\n\\ncheckComplete(jobname,pcnt) := FUNCTIONMACRO\\n\\tlogRecord := { string msg };\\n\\tmsgs := DATASET(WORKUNIT('Received'+#TEXT(jobname)),logRecord);\\n\\t// RETURN IF (count(msgs) = pcnt, jobname);\\n\\tRETURN IF (count(msgs) = pcnt, NOTIFY('Begin',#TEXT(jobname))); \\nENDMACRO;\\n\\nprocessReceived(string jobname,string name,integer pcnt) := FUNCTION\\n\\tRETURN [noteReceived(jobname,name); checkComplete(jobname,pcnt)];\\nEND;\\n\\n\\nMAC_EventActionSetup(InXmlText) := MACRO\\n\\tLOADXML(InXmlText);\\n\\t#DECLARE(cntEvents);\\n\\t#DECLARE(OutputStr) \\n\\t#SET(OutputStr, '' ) \\n\\n\\t#FOR(Job)\\n\\t\\t#SET(cntEvents,0);\\n\\t\\t#FOR(Events)\\n\\t\\t\\t#SET(cntEvents,%cntEvents%+1);\\n\\t\\t#END\\n\\t\\t#FOR(Events)\\n #APPEND(OutputStr,'processReceived(\\\\''+%'Action'%+'\\\\',\\\\''+%'Name'%+'\\\\','+%cntEvents%+') : WHEN(\\\\''+%'Name'%+'\\\\',COUNT(1));\\\\n' )\\n\\t\\t#END\\n\\t#END\\n\\t// %'OutputStr'% //show me the generated code\\n\\t%OutputStr%\\t\\t\\t\\t//run the generated code\\n\\t\\nENDMACRO;\\n\\n// ***************************************************************\\nx := '<ROW>'\\n +' <Job>'\\n +' <Events>'\\n +' <Name>Prerequisite_1</Name>'\\n +' </Events>'\\n +' <Events>'\\n +' <Name>Prerequisite_2</Name>'\\n +' </Events>'\\n +' <Action>A1</Action>'\\n +' </Job>'\\n +' <Job>'\\n +' <Events>'\\n +' <Name>Prerequisite_3</Name>'\\n +' </Events>'\\n +' <Events>'\\n +' <Name>Prerequisite_4</Name>'\\n +' </Events>'\\n +' <Events>'\\n +' <Name>Prerequisite_5</Name>'\\n +' </Events>'\\n +' <Action>A2</Action>'\\n +' </Job>'\\n +'</ROW>';\\n\\nPrerequisite_1 := OUTPUT('Prerequisite_1');\\t\\t \\nPrerequisite_2 := OUTPUT('Prerequisite_2');\\t\\t \\nPrerequisite_3 := OUTPUT('Prerequisite_3');\\t\\t \\nPrerequisite_4 := OUTPUT('Prerequisite_4');\\t\\t \\nPrerequisite_5 := OUTPUT('Prerequisite_5');\\t\\t \\n\\t\\t \\nA1 := OUTPUT('Dependent job A1 Complete');\\nA2 := OUTPUT('Dependent job A2 Complete');\\nMAC_EventActionSetup(x);
\\nNote that the big difference here in the use of FUNCTIONMACROs instead of FUNCTIONs (since your XML contains multiple jobs, they can't all be using the same named dataset to track the prerequisite jobs) and the NOTIFY action to kick off an event -- that means your dependent job needs to use the WHEN workflow service or the WAIT function to only launch the job when the prerequisites are complete. \\n\\nLet me know how it works out,\\n\\nRichard\", \"post_time\": \"2013-01-16 21:24:50\" },\n\t{ \"post_id\": 3111, \"topic_id\": 626, \"forum_id\": 10, \"post_subject\": \"Re: Issue waiting on multiple events.\", \"username\": \"Allan\", \"post_text\": \"Hi Richard,\\n\\nUnfortunatly the simple minded approach does not work, due to the issue in the ECL compiler.\\n\\nWe have to use Gavin's workround ECL he supplied above.\", \"post_time\": \"2013-01-16 15:06:30\" },\n\t{ \"post_id\": 3101, \"topic_id\": 626, \"forum_id\": 10, \"post_subject\": \"Re: Issue waiting on multiple events.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nBeing a simple-minded sort of programmer,I would approach your problem more like this: MAC_EventActionSetup(InXmlText) := MACRO\\n LOADXML(InXmlText);\\n #DECLARE(cntEvents);\\n\\t\\t\\t\\t#DECLARE(OutputStr) \\n\\t\\t\\t\\t#SET(OutputStr, '' ) \\n\\n #FOR(Job)\\n #APPEND(OutputStr,'SEQUENTIAL(PARALLEL(' )\\n #SET(cntEvents,0);\\n #FOR(Events)\\n #SET(cntEvents,%cntEvents%+1);\\n\\t\\t\\t\\t\\t #IF(%cntEvents%=1)\\n \\t\\t #APPEND(OutputStr,%'Name'% )\\n\\t\\t\\t\\t\\t\\t#ELSE\\t\\n \\t\\t #APPEND(OutputStr,',' + %'Name'% )\\n\\t\\t\\t\\t\\t\\t#END\\t\\n #END\\n \\t#APPEND(OutputStr,'),' + %'Action'% + ');\\\\n' )\\n #END\\n\\t\\t\\t\\t// %'OutputStr'%\\n\\t\\t\\t\\t%OutputStr%\\n\\t\\t\\t\\t\\n ENDMACRO;\\n\\t\\t \\n//***************************************************************\\nx := '<ROW>'\\n +' <Job>'\\n +' <Events>'\\n +' <Name>Prerequisite_1</Name>'\\n +' </Events>'\\n +' <Events>'\\n +' <Name>Prerequisite_2</Name>'\\n +' </Events>'\\n +' <Action>A1</Action>'\\n +' </Job>'\\n +' <Job>'\\n +' <Events>'\\n +' <Name>Prerequisite_3</Name>'\\n +' </Events>'\\n +' <Events>'\\n +' <Name>Prerequisite_4</Name>'\\n +' </Events>'\\n +' <Events>'\\n +' <Name>Prerequisite_5</Name>'\\n +' </Events>'\\n +' <Action>A2</Action>'\\n +' </Job>'\\n +'</ROW>';\\n\\nPrerequisite_1 := OUTPUT('Prerequisite_1');\\t\\t \\nPrerequisite_2 := OUTPUT('Prerequisite_2');\\t\\t \\nPrerequisite_3 := OUTPUT('Prerequisite_3');\\t\\t \\nPrerequisite_4 := OUTPUT('Prerequisite_4');\\t\\t \\nPrerequisite_5 := OUTPUT('Prerequisite_5');\\t\\t \\n A1 := OUTPUT('Dependent job A1 Complete');\\n A2 := OUTPUT('Dependent job A2 Complete');\\n MAC_EventActionSetup(x);
Try it the same two ways as the previous example and see where you're at then.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-01-14 19:27:14\" },\n\t{ \"post_id\": 3097, \"topic_id\": 626, \"forum_id\": 10, \"post_subject\": \"Re: Issue waiting on multiple events.\", \"username\": \"Allan\", \"post_text\": \"Thank you very much Richard for this.\\n\\nI actually thought I was generating code with the:\\n\\nprocessReceived(%Action%,%'Name'%) : WHEN(%'Name'%,COUNT(1));\\n
\\nbut obviously was not.\\n\\nThe code works fine (I've added 'cntEvents' to the call to 'processRecieved' as the number of prerequisite events varies with each action.)\\n\\nHowever I still have a problem in that the 'Action' (A1 and A2 in the example XML has to be passed into 'checkComplete' at compile time.\\nThat's why I made 'checkComplete' a FUNCTIONMACRO. But I cant get 'checkComplete' to compile.\\nI've tried various things like:\\n\\n checkComplete(cntEvents,Action) := FUNCTIONMACRO\\n logRecord := { string msg };\\n msgs := DATASET(WORKUNIT('Received'),logRecord)(msg[1..LENGTH(Action)] = Action);\\n RETURN IF (count(msgs) = cntEvents, #TEXT(Action));\\n ENDMACRO;\\n
\\n\\nAny ideas?\\n\\nYours\\nAllan\", \"post_time\": \"2013-01-14 09:31:56\" },\n\t{ \"post_id\": 3096, \"topic_id\": 626, \"forum_id\": 10, \"post_subject\": \"Re: Issue waiting on multiple events.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nYour problem is that you're using the template language and not generating code with it. Here's your code working properly (note the addition of the outputStr and the #APPEND):noteReceived(string text) := FUNCTION\\n\\tlogRecord := { string msg };\\n\\tRETURN output(dataset([text], logRecord),NAMED('Received'), extend);\\nEND;\\ncheckComplete(STRING filter) := FUNCTION\\n\\tlogRecord := { string msg };\\n\\tmsgs := DATASET(WORKUNIT('Received'),logRecord)(msg[1..LENGTH(filter)] = filter);\\n\\tRETURN IF (count(msgs) = 2, OUTPUT('Dependent Job \\\\''+filter+'\\\\' Completed'));\\nEND;\\n\\nprocessReceived(STRING fctn,string name) := FUNCTION\\n\\tRETURN [noteReceived(fctn+name); checkComplete(fctn)];\\nEND; \\nMAC_EventActionSetup(InXmlText) := MACRO\\n\\tLOADXML(InXmlText);\\n\\t#DECLARE(cntEvents);\\n\\t#DECLARE(OutputStr) \\n\\t#SET(OutputStr, '' ) \\n\\n\\t#FOR(Job)\\n\\t\\t#SET(cntEvents,0);\\n\\t\\t#FOR(Events)\\n\\t\\t\\t\\t#SET(cntEvents,%cntEvents%+1);\\n\\t\\t#END\\n\\t\\t#FOR(Events)\\n\\t\\t\\t#APPEND(OutputStr,'processReceived(\\\\''+%'Action'%+'\\\\',\\\\''+%'Name'%+'\\\\') : WHEN(\\\\''+%'Name'%+'\\\\',COUNT(1));\\\\n' )\\n\\t\\t\\t // processReceived(%Action%,%'Name'%) : WHEN(%'Name'%,COUNT(1));\\n\\t\\t#END\\n\\t#END\\n\\t// %'OutputStr'% //show me the generated code\\n\\t%OutputStr%\\t\\t\\t\\t//run the generated code\\n\\t\\nENDMACRO;\\n\\n//***************************************************************\\nx := '<ROW>'\\n +' <Job>'\\n +' <Events>'\\n +' <Name>Prerequisite_1</Name>'\\n +' </Events>'\\n +' <Events>'\\n +' <Name>Prerequisite_2</Name>'\\n +' </Events>'\\n +' <Action>A1</Action>'\\n +' </Job>'\\n +' <Job>'\\n +' <Events>'\\n +' <Name>Prerequisite_3</Name>'\\n +' </Events>'\\n +' <Events>'\\n +' <Name>Prerequisite_4</Name>'\\n +' </Events>'\\n +' <Events>'\\n +' <Name>Prerequisite_5</Name>'\\n +' </Events>'\\n +' <Action>A2</Action>'\\n +' </Job>'\\n +'</ROW>';\\n\\t\\t \\n A1 := OUTPUT('Dependent job A1 Complete');\\n A2 := OUTPUT('Dependent job A2 Complete');\\n MAC_EventActionSetup(x);\\t\\t \\n\\n// Generates this code:\\n// processReceived('A1','Prerequisite_1') : WHEN('Prerequisite_1',COUNT(1));\\n// processReceived('A1','Prerequisite_2') : WHEN('Prerequisite_2',COUNT(1));\\n// processReceived('A2','Prerequisite_3') : WHEN('Prerequisite_3',COUNT(1));\\n// processReceived('A2','Prerequisite_4') : WHEN('Prerequisite_4',COUNT(1));\\n// processReceived('A2','Prerequisite_5') : WHEN('Prerequisite_5',COUNT(1));
Copy and paste this into a builder window and switch the comments on the two "outputStr" at the bottom of the MACRO then run it to see the code that's generated first, then switch them back and run the code.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-01-11 22:17:21\" },\n\t{ \"post_id\": 3093, \"topic_id\": 626, \"forum_id\": 10, \"post_subject\": \"Re: Issue waiting on multiple events.\", \"username\": \"Allan\", \"post_text\": \"ps. I realise the above code would not work for multiple actions waiting on a combination of multiple events. But to accomidate this is a minor enhancment:\\n\\ncheckComplete(STRING filter) := FUNCTION\\n logRecord := { string msg };\\n msgs := DATASET(WORKUNIT('Received'),logRecord)(msg[1..LENGTH(filter)] = filter);\\n RETURN IF (count(msgs) = 2, OUTPUT('Dependent Job \\\\''+filter+'\\\\' Completed'));\\nEND;\\n\\nprocessReceived(STRING fctn,string name) := FUNCTION\\n RETURN [noteReceived(fctn+name); checkComplete(fctn)];\\nEND;\\n
\\nOne should then be able to do with a MACRO that has populated the 'Action' correctly for each job, the equivalent of:\\n\\nprocessReceived('A','Prerequisite_1') : WHEN('Prerequisite_1', COUNT(1));\\nprocessReceived('A','Prerequisite_2') : WHEN('Prerequisite_2', COUNT(1));\\n\\nprocessReceived('B','Prerequisite_1') : WHEN('Prerequisite_1', COUNT(1));\\nprocessReceived('B','Prerequisite_3') : WHEN('Prerequisite_3', COUNT(1));\\n
\", \"post_time\": \"2013-01-11 17:04:32\" },\n\t{ \"post_id\": 3090, \"topic_id\": 626, \"forum_id\": 10, \"post_subject\": \"Re: Issue waiting on multiple events.\", \"username\": \"Allan\", \"post_text\": \"Hi, \\n\\nContinuing with this, it seems to have turned up an issue, so the straight forward approach above will not work. Gavin has very kindly sent me a workaround that I reproduce below.\\n\\n noteReceived(string text) := FUNCTION\\n logRecord := { string msg };\\n RETURN output(dataset([text], logRecord),NAMED('Received'), extend);\\n END;\\n\\n checkComplete() := FUNCTION\\n logRecord := { string msg };\\n msgs := DATASET(WORKUNIT('Received'),logRecord);\\n RETURN IF (count(msgs) = 2, OUTPUT('Dependent Job Completed'));\\n END;\\n\\n processReceived(string name) := FUNCTION\\n RETURN [noteReceived('Received '+name); checkComplete()];\\n END;\\n\\n processReceived('Prerequisite_1') : WHEN('Prerequisite_1', COUNT(1));\\n processReceived('Prerequisite_2') : WHEN('Prerequisite_2', COUNT(1));\\n
\\nThis works fine, but obviously to be of much use, it needs generalising to cope with any number of differenly named events and any multiple number of actions.\\nTo this end I've been attempting to create a MACRO round this that uses as its XML source driver:\\n\\nx := '<ROW>'\\n +' <Job>'\\n +' <Events>'\\n +' <Name>Prerequisite_1</Name>'\\n +' </Events>'\\n +' <Events>'\\n +' <Name>Prerequisite_2</Name>'\\n +' </Events>'\\n +' <Action>A1</Action>'\\n +' </Job>'\\n +' <Job>'\\n +' <Events>'\\n +' <Name>Prerequisite_3</Name>'\\n +' </Events>'\\n +' <Events>'\\n +' <Name>Prerequisite_4</Name>'\\n +' </Events>'\\n +' <Events>'\\n +' <Name>Prerequisite_5</Name>'\\n +' </Events>'\\n +' <Action>A2</Action>'\\n +' </Job>'\\n +'</ROW>';\\n
\\nTo use the above all that needs to be done is:\\n\\n A1 := OUTPUT('Dependent job A1 Complete');\\n A2 := OUTPUT('Dependent job A2 Complete');\\n MAC_EventActionSetup(x);\\n
\\nHowever I cannot get the MACRO to work. I get error:\\nError: syntax error near "," : expected := (47, 63), 3002\\non the decleration of action 'processReceived' below. I can't see why. (Not the decleration of FUNCTIONMACRO 'processReceived')\\n\\n noteReceived(string text) := FUNCTION\\n logRecord := { string msg };\\n RETURN output(dataset([text], logRecord),NAMED('Received'), extend);\\n END;\\n checkComplete(cntEvents,Action) := FUNCTIONMACRO\\n logRecord := { string msg };\\n msgs := DATASET(WORKUNIT('Received'),logRecord);\\n RETURN IF (count(msgs) = cntEvents, Action);\\n ENDMACRO;\\n \\n processReceived(cntEvents,Action,Name) := FUNCTIONMACRO\\n RETURN [noteReceived('Received '+Name); checkComplete(cntEvents,Action)];\\n ENDMACRO;\\n \\n EXPORT MAC_EventActionSetup(InXmlText) := MACRO\\n LOADXML(InXmlText);\\n #DECLARE(cntEvents);\\n #FOR(Job)\\n #SET(cntEvents,0);\\n #FOR(Events)\\n #SET(cntEvents,%cntEvents%+1);\\n #END\\n #FOR(Events)\\n processReceived(%'cntEvents'%,%Action%,%'Name'%) : WHEN(%'Name'%,COUNT(1));\\n #END\\n #END\\n ENDMACRO;\\n
\\n\\nAny idea's as I've run out of ideas myself.\\n\\nThanks in advance\\n\\nAllan\", \"post_time\": \"2013-01-11 12:46:54\" },\n\t{ \"post_id\": 2849, \"topic_id\": 626, \"forum_id\": 10, \"post_subject\": \"Re: Issue waiting on multiple events.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nYour code looks like it should work to me (as should my suggestion). Report the issue in JIRA and see what the guys say.\\n\\nRichard\", \"post_time\": \"2012-11-24 15:28:43\" },\n\t{ \"post_id\": 2848, \"topic_id\": 626, \"forum_id\": 10, \"post_subject\": \"Re: Issue waiting on multiple events.\", \"username\": \"Allan\", \"post_text\": \"Richard,\\n\\nWould like to, but I have to work with current scripts/ ECL.\\n\\nI wanted to change our daily processing as little as possible. As it is, getting a NOTIFY into existing workunits is a whole cycle of promotions from DEV => CERT => PROD.\\n\\nAnd anyway, what I wrote should work, should it not?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-11-24 10:39:55\" },\n\t{ \"post_id\": 2845, \"topic_id\": 626, \"forum_id\": 10, \"post_subject\": \"Re: Issue waiting on multiple events.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nWhy not simply wrap all three in a single job and use PARALLEL and SEQUENTIAL, something like this:SEQUENTIAL(PARALLEL(Pre1,Pre2),FinalJob);
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-11-23 15:48:10\" },\n\t{ \"post_id\": 2842, \"topic_id\": 626, \"forum_id\": 10, \"post_subject\": \"Issue waiting on multiple events.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI have a workunit that depends upon the completion of two other workunits. The prerequisite workunits can complete in either order.\\nWith the code I'm attempting to use below, the prerequisites have to complete in order 1,2 for the dependent worknit to run.\\n\\nI attach screen shots showing both cases, a 'good' run where prerequisite 1 completes before prerequisite 2 and a 'wrong' run where prerequisite 2 completes before prerequisite 1 and the dependent job does not run.\\n\\nFirst my dependent workunit code is:\\n\\n#workunit('name','Dependent_Job');\\nSEQUENTIAL(PARALLEL(WAIT('Prerequisite_1'),WAIT('Prerequisite_2')),OUTPUT('Dependent Job Completed'));\\n
\\n\\nand the two prerequisite workunits are:\\n\\n#workunit('name','Prerequisite_1');\\nNOTIFY('Prerequisite_1','1');\\n
\\n\\n#workunit('name','Prerequisite_2');\\nNOTIFY('Prerequisite_2','1');\\n
\\n\\nCan anyone help me out? Perhaps I'm not using the preferred method for waiting on multiple events.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-11-23 10:21:36\" },\n\t{ \"post_id\": 2892, \"topic_id\": 627, \"forum_id\": 10, \"post_subject\": \"Re: HPCC as a performant RDF triple store??\", \"username\": \"dabayliss\", \"post_text\": \"@Flavio - you may update the bio - but let's keep with the old photo - modern ones have a greyish tinge. \\n\\nTo the question - for an RDF triple store to be genuinely performant the standard trick is to haul all or most of the store into memory. This is the yarc approach.\\n\\nOnce you are down at 'hitting the disks' the performance is going to limit you to fairly simple queries - 2-5x faster than 'painfully slow' is still fairly slow; the question therefore becomes - why RDF?\\n\\nThe answer is simple - the ability to access remote databases about which you know little or nothing; for that to work one needs to be standards complaint.\\n\\nTherefore, as Flavio notes, we are looking to moving HPCC under one of the compliant RDF front ends. This WILL nullify two of the advantages noted in that white paper; as such it is not clear what the performance footprint will be like - however we believe that the ability to standard up a store that is complaint is a useful feature\\n\\nDavid\", \"post_time\": \"2012-11-27 18:16:47\" },\n\t{ \"post_id\": 2884, \"topic_id\": 627, \"forum_id\": 10, \"post_subject\": \"Re: HPCC as a performant RDF triple store??\", \"username\": \"flavio\", \"post_text\": \"Pat,\\n\\nWe are working on making this information available in the HPCC portal, but we'll also post entries in the forums and the mailing lists. We hope to have this roadmap available later this week.\\n\\nI personally like mailing lists, as I prefer "push" rather than "pull"
\\n\\nThanks,\\n\\nFlavio\", \"post_time\": \"2012-11-26 19:35:07\" },\n\t{ \"post_id\": 2877, \"topic_id\": 627, \"forum_id\": 10, \"post_subject\": \"Re: HPCC as a performant RDF triple store??\", \"username\": \"pmcb55\", \"post_text\": \"Thanks Flavio - that all sounds great, and I'm certainly interested in developments in the areas you detailed.\\n\\nSo how will I be able to track those developments? Is there any way I can register my interest and get automated notifications, or should I just check a forum periodically (like the 'Announcements' forum) or something...??!\\n\\nCheers,\\n\\nPat.\", \"post_time\": \"2012-11-26 17:34:13\" },\n\t{ \"post_id\": 2873, \"topic_id\": 627, \"forum_id\": 10, \"post_subject\": \"Re: HPCC as a performant RDF triple store??\", \"username\": \"flavio\", \"post_text\": \"Pat,\\n\\nI guess we need to update David's bio page...
\\n\\nThe plan on KEL is to have an early release in 2013, and you should expect to see a few inter-dependent projects happening too: OWL/RDFS support, KEL, more Machine Learning related capabilities, etc.\\n\\nSince we'd like to see more community involvement, we are actively working on a roadmap page, indicating projects and estimated degree of effort, to see if there are any takers out there that can help speed up some of this development...\\n\\nFlavio\", \"post_time\": \"2012-11-26 16:36:08\" },\n\t{ \"post_id\": 2869, \"topic_id\": 627, \"forum_id\": 10, \"post_subject\": \"Re: HPCC as a performant RDF triple store??\", \"username\": \"pmcb55\", \"post_text\": \"Hi Flavio,\\n\\nThanks for the reply. I've seen very few references to KEL, basically just the article I mentioned (from 2011) and the HPCC bio page for David Bayliss which worryingly states that David is only 'beginning to design a next generation Knowledge Engineering Language'. Searching your site for KEL gives no hits, and 'Knowledge Engineering Language' gives just 4 hits, none of which provide any further information.\\n\\nIt certainly looks like a very interesting development, but where can I find out more information - and fundamentally, when could I expect to see something actually released (not an actual release date, just a release year would be good!)??\\n\\nPat.\", \"post_time\": \"2012-11-26 15:16:42\" },\n\t{ \"post_id\": 2862, \"topic_id\": 627, \"forum_id\": 10, \"post_subject\": \"Re: HPCC as a performant RDF triple store??\", \"username\": \"flavio\", \"post_text\": \"Pat,\\n\\nThe HPCC Systems platform is a general purpose big data analytics platform and, as such, it could be used for triplets based semantic data representations (RDF, for example). But being a general purpose big data platform means that you get to define the data layout, analytics and query strategy, which does not come as an "out of the box" solution.\\n\\nHowever, there is a project planned to provide for OWL based RDFS querying on the HPCC Systems platform that will bring these "out of the box" capabilities. We are also working on a new semantic language called KEL (Knowledge Engineering Language) which will create the data transformations and queries for you.\\n\\nFlavio\", \"post_time\": \"2012-11-26 13:58:02\" },\n\t{ \"post_id\": 2846, \"topic_id\": 627, \"forum_id\": 10, \"post_subject\": \"HPCC as a performant RDF triple store??\", \"username\": \"pmcb55\", \"post_text\": \"This official HPCC link (http://cdn.hpccsystems.com/whitepapers/ ... g_data.pdf) makes some very strong claims for HPCC, particularly this claim:\\n\\n'If ECL is compared in a genuine ‘apples to apples’ comparison against any of the technologies here on their own data \\nmodel it tends to win by somewhere between a factor of 2 and 5.'\\n\\nSo I just have a very simple question - why isn't HPCC listed as a viable RDF triple store on Wikipedia: (http://en.wikipedia.org/wiki/Triplestore)? Is HPCC really a viable RDF triple store? For instance, would you HPCC guys claim that HPCC would outperform (by a factor of 2 to 5!) all the triplestores benchmarked here: http://wifo5-03.informatik.uni-mannheim ... comparison?\\n\\nAre there even any commercial implementations of HPCC as a pure RDF triple store?\\n\\nThanks,\\n\\nPat.\", \"post_time\": \"2012-11-23 17:14:23\" },\n\t{ \"post_id\": 2914, \"topic_id\": 640, \"forum_id\": 10, \"post_subject\": \"Re: child record split into new record\", \"username\": \"DSC\", \"post_text\": \"What about this?\\n\\n
ToAddress := {STRING ToEmailId};\\n\\nMailRecord := RECORD\\n\\tString ID;\\n\\tSTRING FromMailId;\\n\\tDATASET(ToAddress) ToAddressList;\\n\\tSTRING Subject;\\nEND;\\n\\nMailDataset := DATASET(\\n\\t[\\n\\t\\t{'1','from1@mail.com',[{'gopi'},{'balaji'},{'ravi'},{'arun'}],'1 mail'},\\n\\t\\t{'2','from2@mail.com',[{'allan'},{'Daniel'},{'William'}],'2 mail'}\\n\\t],\\n\\tMailRecord);\\n\\nOutRec := RECORD\\n\\tSTRING\\ts;\\nEND;\\n\\nOutRec DoXForm (MailRecord l, UNSIGNED c) := TRANSFORM\\n\\tSELF.s := l.ID + ' ' + l.FromMailId + ' ' + l.ToAddressList[c].ToEmailId;\\nEND;\\n\\nrs := NORMALIZE(MailDataset,LEFT.ToAddressList,DoXForm(LEFT,COUNTER));\\n\\nOUTPUT(rs);
\\n\\nThe output is just a list of strings, but it should show you how to get a handle on the data. In general, NORMALIZE is designed for this exact scenario. This example uses the second form of it.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-11-30 13:06:35\" },\n\t{ \"post_id\": 2913, \"topic_id\": 640, \"forum_id\": 10, \"post_subject\": \"child record split into new record\", \"username\": \"gopi\", \"post_text\": \"Hi,\\n\\nIs it possible child record split into new record with main fields ?\\n\\nMy ECL code \\nToAddress := {STRING ToEmailId};\\n\\nMailRecord := RECORD\\n\\tString ID;\\n STRING FromMailId;\\n\\tDATASET(ToAddress) ToAddressList;\\n STRING Subject;\\nEND;\\n\\nMailDataset := DATASET([{'1','from1@mail.com',[{'gopi'},{'balaji'},{'ravi'},{'arun'}],'1 mail'},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t{'2','from2@mail.com',[{'allan'},{'Daniel'},{'William'}],'2 mail'}\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t ],\\n MailRecord);\\n\\ni need below output\\n\\n1\\tfrom1@mail.com\\tgopi\\n1\\tfrom1@mail.com\\tbalaji\\n1\\tfrom1@mail.com\\travi\\n1\\tfrom1@mail.com\\tarun\\n2\\tfrom2@mail.com\\tallan\\n2\\tfrom2@mail.com\\tDaniel\\n2\\tfrom2@mail.com\\tWilliam\\n\\nplease give me the suggestion to do like this.\\nAdvance thanks\\n\\nBy\\nGopi\", \"post_time\": \"2012-11-30 12:28:44\" },\n\t{ \"post_id\": 2932, \"topic_id\": 645, \"forum_id\": 10, \"post_subject\": \"Re: cross tab with merge string\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nTry using rollup...it will work..\\n\\nRegards,\\nviswa\", \"post_time\": \"2012-12-03 19:43:41\" },\n\t{ \"post_id\": 2930, \"topic_id\": 645, \"forum_id\": 10, \"post_subject\": \"cross tab with merge string\", \"username\": \"gopi\", \"post_text\": \"Hi,\\n\\nis it possible to do the cross tab with string merge?\\n\\n\\nLay_ds := {string50 field1, string50 field2};\\n\\nds := dataset([\\n {'a','aa'},\\n {'a','ab'},\\n {'a','ac'},\\n\\t{'1','11'},\\n {'1','12'},\\n {'1','13'}\\n\\t],\\n Lay_ds);\\n\\noutput(ds);\\n\\nds1 := table(ds, {ds.field1}, field1);\\n\\noutput(ds1);\\t\\n
\\t\\n\\nin above code dataset have 2 string fields, group by the field1 and get the field2 values merger with comma separator. is it possible to do it in group by function or any way to do this. \\n\\nRequired output\\na aa,ab,ac\\n1 11,12,13\\n\\nBy\\nGopi\", \"post_time\": \"2012-12-03 14:39:18\" },\n\t{ \"post_id\": 2954, \"topic_id\": 648, \"forum_id\": 10, \"post_subject\": \"-\", \"username\": \"naier1\", \"post_text\": \"-\", \"post_time\": \"2012-12-05 05:24:58\" },\n\t{ \"post_id\": 3912, \"topic_id\": 661, \"forum_id\": 10, \"post_subject\": \"Re: Memory pool exhausted\", \"username\": \"jeeves\", \"post_text\": \"I have a roxie on Amazon AWS(M1 Large Instance, 7.5 GB RAM) launched using one click thor. \\n\\nI have a file with about 7 millions records(20 GB). It is indexed on a unique identifier.\\n\\n\\nI then run a half keyed join followed by a fetch which returns around 100,000 records(All on Roxie)\\n\\nAfter this when I try to sort or project over these 100,000 records I get a "Memory Pool Exhausted Error"\\n\\nAm I demanding too much from one Roxie node or should I look at changing totalMemoryLimit. If totalMemoryLimit has to be changed, instructions on how to do it will be greatly appreciated since I have no experience changing Roxie/Thor configurations.\\n\\nThanks,\\n-David\", \"post_time\": \"2013-04-10 15:01:28\" },\n\t{ \"post_id\": 3726, \"topic_id\": 661, \"forum_id\": 10, \"post_subject\": \"Re: Memory pool exhausted\", \"username\": \"battleman\", \"post_text\": \"[quote="sort":bypkxvc7]Try modifying the "totalMemoryLimit" option for roxie. This set the total amount of memory to be allocated for the memory pool used by all queries\\n\\nBy the way,where should I change this "totalMemoryLimit"?\", \"post_time\": \"2013-03-13 06:53:04\" },\n\t{ \"post_id\": 3299, \"topic_id\": 661, \"forum_id\": 10, \"post_subject\": \"Re: Memory pool exhausted\", \"username\": \"sort\", \"post_text\": \"Try modifying the "totalMemoryLimit" option for roxie. This set the total amount of memory to be allocated for the memory pool used by all queries\", \"post_time\": \"2013-02-01 15:50:14\" },\n\t{ \"post_id\": 3059, \"topic_id\": 661, \"forum_id\": 10, \"post_subject\": \"Re: Memory pool exhausted\", \"username\": \"sbagaria\", \"post_text\": \"Thanks.\\n\\nWhat is the option to change the memory limit? I tried resourceMaxMemory but it does not work. Says something like this, 'Cannot resource activity OUTPUT a cluster with 400 nodes'.\\n\\nIs it a bug? Or am I using the wrong option?\", \"post_time\": \"2013-01-04 21:27:30\" },\n\t{ \"post_id\": 3038, \"topic_id\": 661, \"forum_id\": 10, \"post_subject\": \"Re: Memory pool exhausted\", \"username\": \"bforeman\", \"post_text\": \"Roxie always uses lookup joins where it can.\\nYou just need to change the memory limit in the config.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-01-02 12:30:40\" },\n\t{ \"post_id\": 3024, \"topic_id\": 661, \"forum_id\": 10, \"post_subject\": \"Memory pool exhausted\", \"username\": \"sbagaria\", \"post_text\": \"How do we get around the memory pool exhausted error? I have decent RAM on my machine so each slave can be allocated more RAM. This will enable us to use LOOKUP joins instead of the traditional joins in some cases.\\n\\nI could not find an option for this. I saw in the logs that roxie mem manager allocates about 300 pages - about 400 MB of RAM. What does this mean?\", \"post_time\": \"2012-12-18 20:41:47\" },\n\t{ \"post_id\": 3040, \"topic_id\": 664, \"forum_id\": 10, \"post_subject\": \"Re: Hpcc Testing\", \"username\": \"rtaylor\", \"post_text\": \"MY BAD! I wrote the previous code "off the cuff" without testing (I was on vacation when I wrote that reply, so that's my excuse ).\\n\\nHere's code that works:
MyFunc(INTEGER InVal) := Inval + 2;\\n\\nMyRec := RECORD\\n INTEGER F1;\\nEND;\\nDS := DATASET([{1},{2},{3}], MyRec); //input dataset\\n\\nMyRec XF(DS L) := TRANSFORM\\n SELF.F1 := MyFunc(L.F1);\\nEND;\\nRes := PROJECT(DS,XF(LEFT));\\nOUTPUT(Res,NAMED('MyFunc_Test_Result'));
NOTE that this uses the inline form of the DATASET declaration (not the TABLE function as I previously mis-stated).\\n\\nRoxie's role in HPCC is to deliver fast results to end-users, thousands of concurrent queries at a time, so no, it has no significant role in code testing that I can think of.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-01-02 15:26:03\" },\n\t{ \"post_id\": 3036, \"topic_id\": 664, \"forum_id\": 10, \"post_subject\": \"Re: Hpcc Testing\", \"username\": \"kausar89\", \"post_text\": \"Hii Richard..\\n Thanks for ur help..wil surely try this out...Had one more vague thought..can Roxie be used in any way for testng..I can be wrong as i dnt knw much details about ECl..but just out of curiosity am asking..and also apart from this manual testing is der any other way out for testing ECL code?\\nAwaiting Your Reply..\", \"post_time\": \"2013-01-02 07:32:40\" },\n\t{ \"post_id\": 3034, \"topic_id\": 664, \"forum_id\": 10, \"post_subject\": \"Re: Hpcc Testing\", \"username\": \"rtaylor\", \"post_text\": \"If you're referring to unit testing of your code, then you simply need to create a small query exercising that code with known values to see if it produces the result you expect. \\n\\nIn this regard, the inline form of the TABLE function can be extremely useful. \\n\\nLet's say you have written a function. And now you want to test that function on a set of known values. You can create those known values as an inline TABLE and then use that as the input to a process that will test your code, something like this:MyFunc(INTEGER InVal) := Inval + 2;\\n\\nMyRec := RECORD\\n INTEGER F1;\\nEND;\\nDS := TABLE({1},{2},{3}, MyRec); //input dataset\\n\\nMyRec XF(DS L) := TRANSFORM\\n SELF,F1 := MyFunc(L.F1);\\nEND;\\nRes := PROJECT(DS,XF(LEFT));\\nOUTPUT(Res,NAMED('MyFunc_Test_Result'));
If you have some other kind of testing in mind, then please specify what you're looking to accomplish.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-12-31 21:00:54\" },\n\t{ \"post_id\": 3032, \"topic_id\": 664, \"forum_id\": 10, \"post_subject\": \"Hpcc Testing\", \"username\": \"kausar89\", \"post_text\": \"As I am new to this Hpcc i.e. ECL Programming can anyone guide me as how to perform testing in ECL ...Is there ant tool for testing..Awaiting your reply..\", \"post_time\": \"2012-12-28 04:16:27\" },\n\t{ \"post_id\": 3060, \"topic_id\": 665, \"forum_id\": 10, \"post_subject\": \"Re: Multiple aggregates on the same dataset\", \"username\": \"sbagaria\", \"post_text\": \"Thanks Richard. I was using ROLLUP because TABLE can sometimes work in mysterious ways, and I thought a sort followed by a grouped aggregate is the best way. The way you used TABLE also generates the same graph every time as my ROLLUP, so we are doing the same thing and we are good. \\n\\nI have to be a little careful and design my dependencies such that I get the spill points at the right places. I think I now have a fairly optimized design which keeps the CPU always busy even on 24 core, 10 slave nodes.\\n\\nIn this design, my first subgraph computes all the filtered projects (each project is one column), counts the number of rows, computes the sum and spills the projected columns. Then the spilled column is taken in another subgraph, where the sort, the aggregate (now coded using the table function) and the sorted recordset subset operations (points 4 & 5 in requirements) are performed. This subgraph also spills the result of the table function. The next subgraph picks it up and performs a topn operation to get the top 5 elements.\\n\\nThis is the best I could do and I am happy with it. It is about 2.5x times faster than the first code I wrote which produced a very messy graph. And I can easily follow the new graph.\", \"post_time\": \"2013-01-04 21:49:55\" },\n\t{ \"post_id\": 3035, \"topic_id\": 665, \"forum_id\": 10, \"post_subject\": \"Re: Multiple aggregates on the same dataset\", \"username\": \"rtaylor\", \"post_text\": \"Sid,\\n\\nI find it interesting that you're using SORT and ROLLUP for your 1-3 results. I would have started with code like this for 1-4:
MyRec := RECORD\\n\\tSTRING1 Value1;\\n\\tSTRING1 Value2;\\n\\tINTEGER1 Value3;\\nEND;\\nSomeFile := DATASET([{'C','G',1},\\n\\t\\t\\t\\t\\t {'C','C',2},\\n\\t\\t\\t\\t\\t {'A','X',3},\\n\\t\\t\\t\\t\\t {'C','C',2},\\n {'D','G',4},\\n\\t\\t\\t\\t\\t {'E','C',2},\\n {'D','G',4},\\n\\t\\t\\t\\t\\t {'C','C',2},\\n {'B','G',4},\\n\\t\\t\\t\\t\\t {'A','B',5}],MyRec);\\n\\nFMAC_Calc(Infile, Infield) := FUNCTIONMACRO\\n Tbl := SORT(TABLE(Infile,{Infield}),Infield);\\n Xtab := SORT(TABLE(Tbl,{Infield,GrpCnt := COUNT(GROUP)},InField),-GrpCnt);\\n OutCnt := COUNT(Tbl);\\n HalfCnt := OutCnt DIV 2;\\n OutMean := AVE(Tbl,(REAL)Infield);\\n OutMedian := IF(OutCnt%2=0,\\n ((REAL)Tbl[HalfCnt].Infield+(REAL)Tbl[HalfCnt+1].Infield)/2,\\n (REAL)Tbl[HalfCnt].Infield);\\n OutDS := DATASET([{'Distinct',COUNT(Xtab)},\\n {'TopVal1',Xtab[1].InField},\\n {'TopVal2',Xtab[2].InField},\\n {'TopVal3',Xtab[3].InField},\\n {'TopVal4',Xtab[4].InField},\\n {'TopVal5',Xtab[5].InField},\\n {'Mean',OutMean},\\n {'Median',OutMedian}\\n ],\\n {STRING Prompt,STRING Val});\\n RETURN OutDS;\\nENDMACRO;\\n\\nOUTPUT(FMAC_Calc(SomeFile,Value1));\\nOUTPUT(FMAC_Calc(SomeFile,Value2));\\nOUTPUT(FMAC_Calc(SomeFile,Value3));\\n
Did you try TABLE first and go to SORT/ROLLUP due to some issue?\\n\\nRichard\", \"post_time\": \"2012-12-31 21:45:05\" },\n\t{ \"post_id\": 3033, \"topic_id\": 665, \"forum_id\": 10, \"post_subject\": \"Multiple aggregates on the same dataset\", \"username\": \"sbagaria\", \"post_text\": \"Hi! \\n\\nI am trying to do multiple aggregates on the same dataset. Essentially, for each column, I want to calculate: \\n1. Number of distinct entries\\n2. Top 5 most frequent entries\\n3. Mean (if numeric)\\n4. Median (if numeric)\\n5. Quartiles (if numeric)\\n\\nWhile this is easy enough to be done for a single column, I am not getting good performance when I do this for multiple columns; hthor and thor (30 slaves) give me identical timings (8 min for hthor vs 6 min for thor). I do this through a sort followed by rollup for the first 3, and record subset indexing for the last 2. I am still working on optimizing the code.\\n\\nI tried reading http://hpccsystems.com/Why-HPCC/HPCC-vs ... ix_ecl#L15 to get an idea of what the optimal code should look like, but I couldn't get much far.\", \"post_time\": \"2012-12-28 10:14:32\" },\n\t{ \"post_id\": 3061, \"topic_id\": 668, \"forum_id\": 10, \"post_subject\": \"Re: Calling Function parameters from FAILURE.\", \"username\": \"Allan\", \"post_text\": \"Thanks Richard,\\n\\nAs you expected, the rename of the function made no difference.\\n\\nTried various things like APPLY filtering a datset, all to no avail.\\nThink I'll just have to sendEmail with an appropiate message.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2013-01-07 10:17:19\" },\n\t{ \"post_id\": 3058, \"topic_id\": 668, \"forum_id\": 10, \"post_subject\": \"Re: Calling Function parameters from FAILURE.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nOK, I see a couple of things here:\\n
\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-01-04 18:57:50\" },\n\t{ \"post_id\": 3055, \"topic_id\": 668, \"forum_id\": 10, \"post_subject\": \"Calling Function parameters from FAILURE.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI'm getting this error:\\n\\nError: syntax error near "OnFail" : expected ALLNODES, APPLY, ASSERT, BUILD, CASE, DISTRIBUTION, EVALUATE, FAIL, GLOBAL, IF, IFF, KEYDIFF, KEYPATCH, LOADXML, MAP, NOFOLD, NOTHOR, NOTIFY, OUTPUT, PARALLEL, SEQUENTIAL, SOAPCALL, UPDATE, WAIT, WHEN, module-name, identifier, action, macro-name, #CONSTANT, #OPTION, #WORKUNIT, #STORED, #LINK, #ONWARNING, '^', '[', '@' (139, 16), 3002, wrobel.ru2\\n
\\n\\nWhen attempting to call a function parameter from a FAILURE clause.\\ne.g.\\n\\nEXPORT Ru2(BOOLEAN OnFail(STRING mess)) := MODULE\\n.\\n.\\n.\\nEND : FAILURE (OnFail('FAIL'));\\n
\\n\\nThis is a service module that can be used by many different processes.\\nIt cannot know what to do for errors and has to pass the job for failure processing back to the caller.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2013-01-04 11:31:53\" },\n\t{ \"post_id\": 3065, \"topic_id\": 670, \"forum_id\": 10, \"post_subject\": \"Re: Data fields in cross-tab TABLE outputs.\", \"username\": \"Allan\", \"post_text\": \"Thanks Richard,\\n\\nThat's exactly what I was aiming for.\\n\\nThank you very much\\n\\nYours\\n\\nAllan\", \"post_time\": \"2013-01-08 09:25:20\" },\n\t{ \"post_id\": 3064, \"topic_id\": 670, \"forum_id\": 10, \"post_subject\": \"Re: Data fields in cross-tab TABLE outputs.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nA crosstab report ("group by" form of TABLE) produces exactly one record for each unique value if its "group by" field(s). Since you're grouping by the "typ" field you see the unique value for that field and the field a, b, and c values for the first record included in each group. \\n\\nObviously, this is not what you want. So, if your intention is to see the actual record of each "typ" that has the highest "a" field value (my best guess as to what you might be trying to accomplish), then you could do it this way:R := RECORD\\n STRING5 typ; \\n STRING8 a;\\n STRING2 b;\\n STRING2 c;\\nEND;\\nDR2:= DATASET([{'POL01','20120113','1b','70'},\\n {'POL01','20120121','2b','71'},\\n {'POL01','20120131','3b','72'},\\n {'POL01','20120201','4b','73'},\\n {'POL01','20120202','5b','74'},\\n {'POL01','20120301','6b','75'},\\n {'SUB01','20120302','7b','76'},\\n {'SUB01','20120303','8b','77'},\\n {'SUB01','20120330','9b','78'},\\n {'SUB01','20120401','ab','79'},\\n {'SUB01','20120402','bb','80'},\\n {'SUB01','20120403','cb','81'},\\n {'SUB01','20120404','db','82'},\\n {'SUB01','20120501','eb','83'},\\n {'VEH01','20120603','fb','84'}],R);\\ntr := RECORD\\n DR2.typ;\\n // a := MAX(GROUP,DR2.a);\\n DR2.a;\\n DR2.b;\\n DR2.c;\\nEND;\\n\\nOUTPUT(TABLE(SORT(DR2,typ,-a),tr,typ));
Ignoring the warnings, of course. Or you could do it without the warnings this way:R := RECORD\\n STRING5 typ; \\n STRING8 a;\\n STRING2 b;\\n STRING2 c;\\nEND;\\nDR2:= DATASET([{'POL01','20120113','1b','70'},\\n {'POL01','20120121','2b','71'},\\n {'POL01','20120131','3b','72'},\\n {'POL01','20120201','4b','73'},\\n {'POL01','20120202','5b','74'},\\n {'POL01','20120301','6b','75'},\\n {'SUB01','20120302','7b','76'},\\n {'SUB01','20120303','8b','77'},\\n {'SUB01','20120330','9b','78'},\\n {'SUB01','20120401','ab','79'},\\n {'SUB01','20120402','bb','80'},\\n {'SUB01','20120403','cb','81'},\\n {'SUB01','20120404','db','82'},\\n {'SUB01','20120501','eb','83'},\\n {'VEH01','20120603','fb','84'}],R);\\n\\nOUTPUT(DEDUP(SORT(DR2,typ,-a),typ));
\\nHowever, if you're trying to accomplish something else, please let me know and I'll try to help,\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-01-07 18:58:59\" },\n\t{ \"post_id\": 3063, \"topic_id\": 670, \"forum_id\": 10, \"post_subject\": \"Data fields in cross-tab TABLE outputs.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI'm having difficulty picking up the correct data fields from a cross-tab report.\\n\\nI have a dataset containing groups of policy sets, I must extract the 'latest' policy record from each policy.\\nExtracting the correct 'key' fields is ok - no problem.\\nHowever the data fields just seem to be from the 1st record encountered in any policy set. I need the data fields form the 'latest' record.\\nHere is example code:\\n\\nR := RECORD\\n STRING5 typ; \\n STRING8 a;\\n STRING2 b;\\n STRING2 c;\\nEND;\\nDR2:= DATASET([{'POL01','20120113','1b','70'},\\n {'POL01','20120121','2b','71'},\\n {'POL01','20120131','3b','72'},\\n {'POL01','20120201','4b','73'},\\n {'POL01','20120202','5b','74'},\\n {'POL01','20120301','6b','75'},\\n {'SUB01','20120302','7b','76'},\\n {'SUB01','20120303','8b','77'},\\n {'SUB01','20120330','9b','78'},\\n {'SUB01','20120401','ab','79'},\\n {'SUB01','20120402','bb','80'},\\n {'SUB01','20120403','cb','81'},\\n {'SUB01','20120404','db','82'},\\n {'SUB01','20120501','eb','83'},\\n {'VEH01','20120603','fb','84'}],R);\\ntr := RECORD\\n DR2.typ;\\n a := MAX(DR2.a);\\n DR2.b;\\n DR2.c;\\nEND;\\n\\nOUTPUT(TABLE(DR2,tr,typ));\\n
\\nThe output generated is:\\n\\nPOL01,20120301,1b,70\\nSUB01,20120501,7b,76\\nVEH01,20120603,fb,84\\n
\\n\\nOnly the VEH01 record is popultated with correct data fields 'b' and 'c' as there is only one VEH01 record.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2013-01-07 16:45:25\" },\n\t{ \"post_id\": 3095, \"topic_id\": 674, \"forum_id\": 10, \"post_subject\": \"Re: error using super file\", \"username\": \"tmurphy\", \"post_text\": \"Thanks Bob. Another guy here explained that I'm trying to hit a superfile from Roxie and that I can only hit it from Thor (which I don't want to do, meaning the answer for me is to do some more ETL in Thor to create a summary of the desired data in a regular file and then query that in Roxie).\", \"post_time\": \"2013-01-11 18:11:40\" },\n\t{ \"post_id\": 3094, \"topic_id\": 674, \"forum_id\": 10, \"post_subject\": \"Re: error using super file\", \"username\": \"bforeman\", \"post_text\": \"The error looks like it's telling you the there are no subfiles in your target superfile. \\n\\nTry removing all sub files (ClearSuperFile) and then add the subfiles again.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-01-11 18:02:16\" },\n\t{ \"post_id\": 3087, \"topic_id\": 674, \"forum_id\": 10, \"post_subject\": \"error using super file\", \"username\": \"tmurphy\", \"post_text\": \"I'm getting this error \\n\\nError: Query W20130110-161600 is suspended because assert(subFiles.length()==1) failed - file: /var/jenkins/workspace/CE-Candidate-3.8.2/CE/centos_5_x86_64/HPCC-Platform/roxie/ccd/ccdfile.cpp, line 1970 (0, 0), 1402, \\n\\nwhen I try to access a super file. Even the most basic ECL seems to fail, like:\\n\\nds := DATASET('mysuperfilename',{mylayout,UNSIGNED8 __fpos{VIRTUAL(FILEPOSITION)}},THOR);\\noutput(choosen(ds,100));\\n\\nBut if I change 'mysuperfilename' to the name of one of the files contained in the super file it works fine. I tried bouncing the server but still get error.\", \"post_time\": \"2013-01-10 22:46:31\" },\n\t{ \"post_id\": 3100, \"topic_id\": 678, \"forum_id\": 10, \"post_subject\": \"Re: Job Priority\", \"username\": \"bforeman\", \"post_text\": \"Thanks for your report, I am seeing the same behavior and will open an issue.\\nhttps://track.hpccsystems.com/browse/HPCC-8585\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-01-14 16:18:55\" },\n\t{ \"post_id\": 3099, \"topic_id\": 678, \"forum_id\": 10, \"post_subject\": \"Job Priority\", \"username\": \"jacob\", \"post_text\": \"I was trying to set and read the priority of my workunit as follows:\\n\\nimport std.system.Workunit as wu;\\n#workunit('priority', 1);\\nwu.workunitlist(Workunit)[1].priority;\\n
\\n...but it always comes up blank when reading the workunit record. The following produces the same result. \\n#workunit('priority', 'high');
\\nIs my code to set the priority not working? And why is the priority not being displayed as I would expect in the workunit details?\", \"post_time\": \"2013-01-14 15:26:27\" },\n\t{ \"post_id\": 3120, \"topic_id\": 682, \"forum_id\": 10, \"post_subject\": \"Re: Difference in Sort behavior on String and Unicode data\", \"username\": \"rtaylor\", \"post_text\": \"Sameer,we have a situation where some data needs to be sorted based on certain fields and topn N rows after sorting have to be returned as the output.
Then you'll want to use the TOPN function instead of SORT and CHOOOSEN -- that;'s what it is designed for.\\nall was fine as long as we were using String (for English alphabet) as the data type, we got entirely different results when we changed the record layout from String to Unicode to support unicode data, as typecast from unicode to string may lead to information loss
The typecast I used was only on the SORT parameter (and applies equally to TOPN), so there's absolutely no data loss in the result data, just the possibility of some non-ASCII characters being ignored (treated as blanks) by the collating operation.\\n\\nThe major difference between your original two examples is the case sensitivity. So the next question that occurs to me is -- you've been used to case sensitive sort with STRING data, but is that actually appropriate for what you want to accomplish? Might it be that you should have been doing a case insensitive SORT all along? Theses are operational questions for you to answer for yourself (I don't need to know).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-01-17 16:32:26\" },\n\t{ \"post_id\": 3118, \"topic_id\": 682, \"forum_id\": 10, \"post_subject\": \"Re: Difference in Sort behavior on String and Unicode data\", \"username\": \"sameermsc\", \"post_text\": \"Thanks Richard,\\n\\nwe have a situation where some data needs to be sorted based on certain fields and topn N rows after sorting have to be returned as the output. all was fine as long as we were using String (for English alphabet) as the data type, we got entirely different results when we changed the record layout from String to Unicode to support unicode data, as typecast from unicode to string may lead to information loss\\n\\n\\ndoes the sort algorithm makes use of certain levels (http://www.unicode.org/reports/tr10/). for Example, For the Latin script, these levels correspond roughly to:\\nalphabetic ordering\\ndiacritic ordering\\ncase ordering\\n\\nRegards,\\nSameer\", \"post_time\": \"2013-01-17 15:31:31\" },\n\t{ \"post_id\": 3117, \"topic_id\": 682, \"forum_id\": 10, \"post_subject\": \"Re: Difference in Sort behavior on String and Unicode data\", \"username\": \"rtaylor\", \"post_text\": \"Sameer,\\n\\nIt appeared to me that the UNICODE sort was probably done case insensitive while the ASCII text sort was case sensitive, so I added a couple more SORTs to test the hypothesis:ds_1 := dataset([{'Sameer'},{'anshish'},{'aNshish'}, {'abshish'}, {'Ashish'}, {'Samir'}, {'Avinash'}, {'aNOOp'}, {'preetam'}, {'Vishy'}], {string name});\\nds_2 := dataset([{u'Sameer'},{u'anshish'},{u'aNshish'}, {u'abshish'}, {u'Ashish'}, {u'Samir'}, {u'Avinash'}, {u'aNOOp'}, {u'preetam'}, {u'Vishy'}], {unicode name});\\n\\noutput(sort(ds_1, name));\\noutput(sort(ds_2, name));\\n\\nimport std;\\noutput(sort(ds_1, STD.str.touppercase(name)));\\noutput(sort(ds_1, (UNICODE)name));\\noutput(sort(ds_2, (STRING)name));
\\nWhen you run this you will note that the second, third, and fourth results are now all the same (case insensitive result), while the first and fifth are also identical (case sensitive result). \\n\\nThe key thing to understand here is that the "how to sort" parameters of the SORT function may be expressions and not just field names -- and how the sort is done does not affect the data that ends up in the result.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-01-17 14:57:21\" },\n\t{ \"post_id\": 3114, \"topic_id\": 682, \"forum_id\": 10, \"post_subject\": \"Difference in Sort behavior on String and Unicode data\", \"username\": \"sameermsc\", \"post_text\": \"Hi,\\n\\ni have observed that the sort behavior is different on String and Unicode data sets, below is a sample example\\n\\nds_1 := dataset([{'Sameer'},{'anshish'},{'aNshish'}, {'abshish'}, {'Ashish'}, {'Samir'}, {'Avinash'}, {'aNOOp'}, {'preetam'}, {'Vishy'}], {string name});\\nds_2 := dataset([{u'Sameer'},{u'anshish'},{u'aNshish'}, {u'abshish'}, {u'Ashish'}, {u'Samir'}, {u'Avinash'}, {u'aNOOp'}, {u'preetam'}, {u'Vishy'}], {unicode name});\\n\\noutput(sort(ds_1, name));\\noutput(sort(ds_2, name));
\\n\\nBelow are the results\\nResult1 (on String data)\\nAshish\\nAvinash\\nSameer\\nSamir\\nVishy\\naNOOp\\naNshish\\nabshish\\nanshish\\npreetam\\n\\n\\nResult2 (on Unicode Data)\\nabshish\\naNOOp\\nanshish\\naNshish\\nAshish\\nAvinash\\npreetam\\nSameer\\nSamir\\nVishy\\n\\nI have following questions\\n1) Is there a way to force the sort on unicode data to produce same results as the one generated on string data (without type-casting unicode data to string )\\n2) Can someone explain the sort behavior (how it sorts) on String and Unicode data
\\n\\nRegards,\\nSameer\", \"post_time\": \"2013-01-17 11:07:31\" },\n\t{ \"post_id\": 3123, \"topic_id\": 684, \"forum_id\": 10, \"post_subject\": \"Re: How to study step-by-step ECL\", \"username\": \"rtaylor\", \"post_text\": \"hungifi,\\n\\nRight now, your best bet is to begin with the free videos posted on our website, starting here:\\nhttp://hpccsystems.com/community/training-videos/ecl-concepts-1\\n\\nInstructor-led ECL classes are currently offered on a regular schedule at various LexisNexis locations. They can also be scheduled onsite at your location, given a minimum of six students. These courses are described here:http://hpccsystems.com/products-and-services/services/training and you can sign up for them here:http://hpccsystems.com/community/training-events/training\\n\\nWe are also in the process of developing:
We will be making announcements once the online and remote training courses become available (watch this space ).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-01-17 19:17:14\" },\n\t{ \"post_id\": 3119, \"topic_id\": 684, \"forum_id\": 10, \"post_subject\": \"How to study step-by-step ECL\", \"username\": \"hungifi\", \"post_text\": \"Hello everybody,\\n\\nI am a newbie, I've started with HPCC/ECL since a month. By downloading/reading the document from Hpcc site, I've tried to study the ECL but It is not really easy for me:(.\\n\\nIs there any other document to step-by-step with this new language including exercise at the end of each session (liked other language tutorial such as C++, Java...)?\\n\\nThanks in advance for your help.\", \"post_time\": \"2013-01-17 15:50:12\" },\n\t{ \"post_id\": 3327, \"topic_id\": 691, \"forum_id\": 10, \"post_subject\": \"Re: index refresh during a loop\", \"username\": \"ideal\", \"post_text\": \"I guess it might not be a bug, because as we are in parallel environment, LOOP and GRAPH cannot close (or overwrite) a file until function ends, as data can be written or read from file at any step in the loop. Then, writing the same file inside a LOOP or GRAPH function might be not possible at all. Anyway, if I am right, error message should be more specific to explain it is not possible.\\nThen, if I am right, solution would be to create new file at each step.\\n\\n\\nJM.\", \"post_time\": \"2013-02-04 22:26:48\" },\n\t{ \"post_id\": 3247, \"topic_id\": 691, \"forum_id\": 10, \"post_subject\": \"Re: index refresh during a loop\", \"username\": \"ideal\", \"post_text\": \"Hello,\\n\\nCan someone tell me if it is useful to build an index in a loop or graph function ?\\n\\nI may be slow to understand but I suddenly understood that it is only useful when you read dataset on the disk, but as soon it is in memory, everything is linked and optimized already, and it is hopeless to try to improve performances this way.\\nAm I right ?\", \"post_time\": \"2013-01-30 15:29:14\" },\n\t{ \"post_id\": 3190, \"topic_id\": 691, \"forum_id\": 10, \"post_subject\": \"Re: index refresh during a loop\", \"username\": \"ideal\", \"post_text\": \"To answer more specifically about other points you raised in your previous post.\\n\\n
ECL is designed as a data processing language and therefore its encapsulation mechanisms (of which there are many) are focused towards encapsulating data logic; not process.
\\nI am sure you made the best state of the art code ever, dealing with large dataset, no doubt about this. My point was about my inhability to understand properly ECL and with the fact I encountered unexpected behaviours.\\n\\nNow, with regard to performance, you appear to be doing some kind of tree (or graph) walking exercise using disk to store your intermediates?
\\n\\nI would say hyper-graph or if you prefer rewriting rules.\\n\\nGraphs ARE painful and it is very easy to write code that is mis-balanced across a large cluster - have you checked the execution graphs to see if you have skew?
\\n\\nCan you briefly be more specific about this check ? Do you mean graph visualization in ECL Watch ? Or data skewing check over cluster (with distribution function for example) ? In the first option, visual graphs are very complex and uneasy to read and use. In the second option, I guess I did the best distribution I could regarding data. This is why I am thinking about indexing data at each step because all are involved at each computation step (rewriting rules condition test).\\n\\nAlso - if you are hitting disks for each walk - then you are limiting yourself to around 200 node-hits per disk per second (unless you have SSD).\\nThe ML libraries have some tree walking code in there which may help
\\n\\nI am not sure to understand, I read data from disk at the beginning of the loop and then, I manage datasets in the loop, in memory I guess : I don't know how exactly ECL deals with data. What I think is if I use indexes, I must refresh them on the disk, but I hope it will give better performance.\\nThe ML library contains a lot of algorithms but I didn't see anyone suiting me. Anyway, I can reread more carefully.\\n\\nJM.\", \"post_time\": \"2013-01-25 17:00:28\" },\n\t{ \"post_id\": 3187, \"topic_id\": 691, \"forum_id\": 10, \"post_subject\": \"Re: index refresh during a loop\", \"username\": \"bforeman\", \"post_text\": \"Sumbmitted to Issue Tracker:\\n\\nhttps://track.hpccsystems.com/browse/HPCC-8675\", \"post_time\": \"2013-01-25 16:12:49\" },\n\t{ \"post_id\": 3184, \"topic_id\": 691, \"forum_id\": 10, \"post_subject\": \"Re: index refresh during a loop\", \"username\": \"ideal\", \"post_text\": \"Hello David,\\n\\nThanks for your answer. \\nIf the latter you need an OVERWRITE on the BUILDINDEX statement
\\nThis is the case but if you try my sample code on THOR, you should get the error message despite OVERWRITE option.\\n\\nJM.\", \"post_time\": \"2013-01-25 15:09:33\" },\n\t{ \"post_id\": 3183, \"topic_id\": 691, \"forum_id\": 10, \"post_subject\": \"Re: index refresh during a loop\", \"username\": \"dabayliss\", \"post_text\": \"JM,\\n\\nFortunately I'm too busy to rise to the bait regarding Prolog or C. Suffice to say I have programmed extensively in both and I would pick Prolog for a logic problem without question and C without question (well - perhaps C++) for a systems problem. Good luck tackling a large data problem with either.\\n\\nECL is designed as a data processing language and therefore its encapsulation mechanisms (of which there are many) are focused towards encapsulating data logic; not process.\\n\\nNow - to your main question - the run-time is warning you that you are using BUILDINDEX on a file you have already created (in the previous iteration of the loop) - and you are attempting to build the file again with the same name. ECL defaults to not trashing your data; generally a feature.\\n\\nThe question is - are you trying to build a different index upon each iteration? Or rebuild the same index upon each iteration? If the former you need some kind of 'uniqueness' about your index name - perhaps adding a loop counter as a suffix to the index name. If the latter you need an OVERWRITE on the BUILDINDEX statement\\n\\nNow, with regard to performance, you appear to be doing some kind of tree (or graph) walking exercise using disk to store your intermediates? Graphs ARE painful and it is very easy to write code that is mis-balanced across a large cluster - have you checked the execution graphs to see if you have skew? Also - if you are hitting disks for each walk - then you are limiting yourself to around 200 node-hits per disk per second (unless you have SSD).\\n\\nThe ML libraries have some tree walking code in there which may help\\n\\nDavid\\n\\nHTH\\n\\nDavid\", \"post_time\": \"2013-01-25 14:18:52\" },\n\t{ \"post_id\": 3170, \"topic_id\": 691, \"forum_id\": 10, \"post_subject\": \"Re: index refresh during a loop\", \"username\": \"ideal\", \"post_text\": \"Hello Bob,\\nAs you can understand easily, my code is not public, so I can only illustrate my problems as simple as possible to help your diagnostic. \\nBut you are right, I must clarify the expression "a lot of limits". \\nI feel ECL is not as well structured as prolog or even C. I don't blame it because I guess it has been done in a pragmatical way, more or less as a list of recipes, to improve the only thing that matters : performance. \\nNevertheless, a good documentation about bests practices is cruelly missing, to understand for example, how in detail the optimizer simplifies the code, and generally speaking the philosophy behind ECL. It can be understood more or less but it would save a lot of time if I could have a clear picture on each command.\\nI don't say ECL is not good at performances (I hope) but it is very hard to use it efficiently, and right now, I need better performance.\\nI see execution time cannot be simply reduced by increasing size or quality of the cluster. There is some natural limits I illustrate by the curve below. \\n[attachment=0:256rb9so]performance issue 2.jpg\\nBut I hope it remains one factor I can improve : my ECL code. \\nAbout my example above, it is off course only a very simplified view of what I need. I don't say there no better way to do what I have to do but this is the way I choose with the information I have : a loop with an index rebuild at each step.\\n\\nNORMALIZE does not fit because, in my code, at each step, the entire dataset is revised, some data are removed, some are added, all are used. More, I need a condition to stop iterations. Even GRAPH I have used until now because LOOP condition were not available in version 3.6, is not convenient (this is an example of bad structure I was talking about). I really need functions LOOP and INDEX and BUILD working together, in THOR.\\n\\nThanks,\\nJM.\", \"post_time\": \"2013-01-24 18:43:19\" },\n\t{ \"post_id\": 3165, \"topic_id\": 691, \"forum_id\": 10, \"post_subject\": \"Re: index refresh during a loop\", \"username\": \"bforeman\", \"post_text\": \"Hi JM,\\n\\nWell, first, when putting my declarative hat on, I need to ask you:\\n\\nWhat it is that you are trying to achieve? What data do you need to return?\\nThinking declaratively, we need to approach this from the data that you need.\\n\\nCould you give me the correct way to rebuild an index during a loop.\\nI am running after performances and scalability, and found a lot of limits even when expanding cluster size. Then I am trying with indexes, expecting some improvement in join calls.
\\n\\n"A lot of limits" needs to be clarified. Certainly, repetitively building an index in a loop may not be taking advantage of any parallel cluster, and the keyed joins you mention are better suited on the ROXIE side. THOR is built to hammer the data so it should be able to handle any two datasets that you can throw at it. \\n\\nI've looked at your ECL code, and it seems that somehow the last WHEN function is never getting the trigger when targeting THOR, causing the workunit to never complete. Perhaps it's something simple and I am discussing this with our support team.\\n\\nI'm thinking that perhaps a NORMALIZE that calls a TRANSFORM a finite number of times may be an alternative approach.\\n\\nI will keep you posted, thanks for your post and example code.\\n\\nReagards,\\n\\nBob\", \"post_time\": \"2013-01-24 13:27:19\" },\n\t{ \"post_id\": 3134, \"topic_id\": 691, \"forum_id\": 10, \"post_subject\": \"index refresh during a loop\", \"username\": \"ideal\", \"post_text\": \"Hello,\\n\\nI try to run a loop with index rebuilt at each step.\\n\\nThis is the way I do it :\\n
rec := RECORD\\nINTEGER id;\\nINTEGER v;\\nEND;\\n\\nrecloop := RECORD\\n\\tINTEGER id;\\n\\tINTEGER v;\\n\\tUNSIGNED8 RecPtr {virtual(fileposition)};\\nEND;\\n\\n\\ndsin := DATASET([{1,1},{2,2},{3,3},{4,4},{5,5}],rec);\\ndsin1:=DATASET('~thor::chr::dsin',{rec,UNSIGNED8 RecPtr {virtual(fileposition)}},FLAT);\\n\\n\\nfloop(DATASET(recloop) ds,INTEGER cpt) := FUNCTION\\n\\tds1:=PROJECT(ds,TRANSFORM(recloop,SELF.id:=cpt;SELF:=LEFT));\\n\\n\\tds_idx:=INDEX(ds1,{id,RecPtr},{v},'~hthor::chr::index');\\n\\tbuild_idx:=BUILDINDEX(ds_idx,OVERWRITE);\\n\\n\\tres := JOIN(ds,ds_idx,LEFT.id=RIGHT.id,TRANSFORM(recloop,SELF:=LEFT),KEYED,INNER);\\n\\n\\tRETURN WHEN(res,build_idx,BEFORE);\\nEND;\\n\\ndsout:=OUTPUT(dsin,,'chr::dsin',OVERWRITE);\\n\\n\\nloopResult := LOOP(dsin1,2,floop(ROWS(LEFT),COUNTER));\\nloopResult1 := WHEN(loopResult,dsout);\\n\\nOUTPUT(loopResult1);
\\n\\nDiagnostic : \\nWhen I run it on thor, it runs an infinite loop.\\nWhen I run it on hthor, it gives an error : "Error: System error: 1: DFS Exception: 1: logical name hthor::chr::index already exists (0, 0), 1, "\\nWhen I run only 1 loop, it ends correctly, whatever the result is.\\n\\nCould you give me the correct way to rebuild an index during a loop.\\nI am running after performances and scalability, and found a lot of limits even when expanding cluster size. Then I am trying with indexes, expecting some improvement in join calls.\\n\\nThanks,\\nJM.\", \"post_time\": \"2013-01-21 15:58:29\" },\n\t{ \"post_id\": 3269, \"topic_id\": 707, \"forum_id\": 10, \"post_subject\": \"Re: HPCC Source Compilation OSX 64bits Issue\", \"username\": \"pschwartz\", \"post_text\": \"Bo,\\n\\nCurrently we only support client tools builds for MacOSX. Any changes that you make in order to get the system (full build) building correctly on MacOSX would be appreciated. This would also require testing to verify that the platform works correctly also.\\n\\nAs part of this, the init system does not currently support MacOSX. This includes all bash scripts currently used to start and stop the platform located under the initfiles directory in our source tree.\\n\\nIf you have questions about how they work while you are attempting to make changes, please feel free to ask and I will provide any information you might need.\\n\\n- Philip\", \"post_time\": \"2013-01-31 13:57:58\" },\n\t{ \"post_id\": 3215, \"topic_id\": 707, \"forum_id\": 10, \"post_subject\": \"Re: HPCC Source Compilation OSX 64bits Issue\", \"username\": \"buptkang\", \"post_text\": \"Hey,\\n\\nThanks a lot for the help, it works for my OSX 64 machine now to compile and build the whole system. But it seems that during deploying process, there are something wrong with the hpcc-init.sh file with OSX file. \\n\\nIn other word, below shell cannot work on my installed path under /opt/HPCCSystems/\\n\\nsudo /etc/init.d/hpcc-init status\\nsudo /etc/init.d/hpcc-init start\\nsudo /etc/init.d/hpcc-init stop\\n\\nBo\", \"post_time\": \"2013-01-28 16:54:13\" },\n\t{ \"post_id\": 3198, \"topic_id\": 707, \"forum_id\": 10, \"post_subject\": \"Re: HPCC Source Compilation OSX 64bits Issue\", \"username\": \"gsmith\", \"post_text\": \"Disclaimer: The only experience I have building on OSX is 64bit + Client Tools Only (not the entire platform) on Snow Lepeord.\\n\\n1. There is a CMAKE flag USE_LIBARCHIVE which you can turn off (see 2 for better solution).\\n2. I have updated the "Building HPCC" wiki to include "libarchive" in the "prerequisites" Mac section.\\n3. I have also updated the wiki to show how to build client tools only (adding the -DCLIENTTOOLS_ONLY=1 option)\\n\\nI see you are attempting to build 32bit on 64bit OSX, if you have any success with that will you post back your steps?\\n\\nEdit: Adding libarchive to the OSX prerequisites does not fix the issue (as it is on version 2.8.5 which is too old), but adding -DUSE_LIBARCHIVE=0 should get you further.\", \"post_time\": \"2013-01-27 08:44:57\" },\n\t{ \"post_id\": 3196, \"topic_id\": 707, \"forum_id\": 10, \"post_subject\": \"HPCC Source Compilation OSX 64bits Issue\", \"username\": \"buptkang\", \"post_text\": \"Hello there,\\n\\nI am a newbie on HPCC, currently I am trying to compile the source code of HPCC in order to customize something for my own need. \\n\\nMy OS is OSX 64bits, and I downloaded the source code HPCC 3.10.0 package.\\n\\nI saw the installation hints from :https://github.com/hpcc-systems/HPCC-Platform/tree/release-3.2.2\\n\\nAlso I got some other information from its CMakeLists.txt as well, \\n\\nbut no matter I run it using 32bits compiler or 64 bits compiler, I cannot run make properly with different error message. \\n\\nFor cmake -DCMAKE_C_FLAGS:STRING="-m32 -march=i386" -DCMAKE_CXX_FLAGS:STRING="-m32 -march=i386" ~/hpcc\\n\\nI got lots of error like below:\\nld: warning: could not create compact unwind for ***function
\\n\\n\\n\\nFor cmake ~/hpcc,\\n\\nI got the error as:\\n\\n[ 38%] Building CXX object common/remote/hooks/libarchive/CMakeFiles/archivefile.dir/archive.cpp.o\\nLinking CXX shared library ../../../../Release/libs/libarchivefile.dylib\\nUndefined symbols for architecture x86_64:\\n "_archive_read_next_header2", referenced from:\\n ArchiveDirectoryIterator::first() in archive.cpp.o\\n ArchiveFileIO::ArchiveFileIO(char const*)in archive.cpp.o
\\n\\nCurrently, my assumption is that utilizing Ubuntu 12.04 under 32 bits machine can guarantee to compile the code without problems. Can somebody give me some advice on it?\\n\\nThanks\\nBo\", \"post_time\": \"2013-01-26 23:10:29\" },\n\t{ \"post_id\": 6758, \"topic_id\": 716, \"forum_id\": 10, \"post_subject\": \"Re: Machine Learning ECL into cloud\", \"username\": \"benhastings\", \"post_text\": \"I'm trying to get a new system setup using eclipse. the ecl-ml library is core to our work yet I haven't seen any instructions on the "right" way to get this installed and working with other projects/repositories.\\n\\nIs that forthcoming?\", \"post_time\": \"2015-01-07 18:35:47\" },\n\t{ \"post_id\": 3217, \"topic_id\": 716, \"forum_id\": 10, \"post_subject\": \"Re: Machine Learning ECL into cloud\", \"username\": \"arjuna chala\", \"post_text\": \"Bo,\\n\\nThis is a very good suggestion. In fact we have discussed this internally before. The fact that the build stream for the ML is different from the core server build is one of the reasons why we have not implemented this. \\n\\nHowever, we are working on a smart download mechanism which will install all the client side modules automatically on the client computer with minimal effort. This functionality will be built into the Eclipse and ECL IDEs.\\n\\nThank You \\n\\nArjuna\", \"post_time\": \"2013-01-28 19:58:31\" },\n\t{ \"post_id\": 3216, \"topic_id\": 716, \"forum_id\": 10, \"post_subject\": \"Machine Learning ECL into cloud\", \"username\": \"buptkang\", \"post_text\": \"Hey there,\\n\\nCurrently, I have built the HPCC system in my computer. Also I have practiced to use ECL-ML to do some data analysis, I am wondering why don't we put all the ECL-ML scripts into the cloud shared library under HPCC server side, instead of having to access it through client side?\\n\\nAny comments will be helpful.\\n\\nWith Thanks and Regards\\nBo\", \"post_time\": \"2013-01-28 17:18:23\" },\n\t{ \"post_id\": 3275, \"topic_id\": 721, \"forum_id\": 10, \"post_subject\": \"Re: using SOAPCALL\", \"username\": \"anthony.fishbeck\", \"post_text\": \"Hi Dean,\\n\\nI got the following code to work calling your service:\\n\\nstring ip:='http://IP:PORT/shared/idgenerator/getlnis/soap/';\\n\\nstring svc := 'LNIGeneratorRequest';\\n\\noutrec1 := record\\n set of string myset {xpath('/LNI')};\\nend;\\n\\ninrec:=record\\n integer2 noOfStampers {xpath('numberofStampers')} :=11;\\n integer2 partitionId {xpath('partitionId')} :=10;\\n integer2 type {xpath('type')} :=3;\\n string clientInfo {xpath('clientInfo')} :='TEST';\\nend;\\n\\n\\nc:=soapcall(ip,svc,inrec, dataset(outrec1), LITERAL,\\n NAMESPACE('http://www.lexisnexis.org/CSA/LNIGeneratorRequest'), \\n SOAPACTION('http://www.lexisnexis.org/CSA/Services/LNIGeneratorService/generateIdentifiers'),\\n XPATH('LNIGeneratorResponse'));\\n\\noutput(c);\\n
\\n----------------------------------------\\nNote that the ‘servicename’ really tells soapcall what the request tag should be, and the XPATH in soapcall tells it what the response tag will be.\\n\\nI also struggled with getting the xpath for numberofStampers right. Kept uppercasing the ‘Of’ and didn’t catch the mistake for a while.\\n\\nThe xpath of the set of strings is the tricky part. By default sets have a root tag and then children tags. This xpath says that they are just the LNI repeating tag.\\n\\nLet me know whether you get this to work.. or need any more help.\", \"post_time\": \"2013-01-31 19:43:32\" },\n\t{ \"post_id\": 3261, \"topic_id\": 721, \"forum_id\": 10, \"post_subject\": \"Re: using SOAPCALL\", \"username\": \"dean\", \"post_text\": \"thanks again. XSDs are in the attached zip. \\n\\nI've got this code, with your suggestion, running now ... (taking a long time)\\n\\n\\nip:='http://tpc3632:8888/shared/idgenerator/getlnis/soap';\\nsvc:='LNIGeneratorRequest';\\n\\noutrec1:=record\\n\\tstring100 \\tOutData{xpath('LNI')};\\n\\tunsigned4 \\tLatency{xpath('_call_latency')};\\nend;\\n\\n\\ninrec:=record\\n integer2 noOfStampers:=11;\\n\\t integer2 partitionId:=10;\\n\\t integer2 type:=3;\\n\\t string clientInfo:='TEST';\\nend;\\n\\n\\nc:=soapcall(ip,svc,inrec,dataset(outrec1), \\n\\t\\t\\t\\t\\t\\ttimeout(10),\\n\\t\\t\\t\\t\\t\\tliteral, \\n\\t\\t\\t\\t\\t\\theading('<lnig:LNIGeneratorService>','</lnig:LNIGeneratorService>'),\\n\\t\\t\\t\\t\\t\\txpath('//LNI'), \\n\\t\\t\\t\\t\\t\\tnamespace('http://www.lexisnexis.org/CSA/LNIGeneratorRequest'),\\n\\t\\t\\t\\t\\t\\tsoapaction('"LNIGeneratorRequest"'));\\noutput(c);
\", \"post_time\": \"2013-01-30 23:27:26\" },\n\t{ \"post_id\": 3260, \"topic_id\": 721, \"forum_id\": 10, \"post_subject\": \"Re: using SOAPCALL\", \"username\": \"anthony.fishbeck\", \"post_text\": \"Right, the first error seems to be related to resolving the hostname. Using the IP may be getting around that.\\n\\nThanks, can you also attach LNIGeneratorRequest.xsd and LNIGeneratorResponse.xsd? That should tell me more.\\n\\nBut in any case, I think your first 2 lines should be:\\n\\nip:='http://IPADDRESS:8888/shared/idgenerator/getlnis/soap/';\\nsvc := 'LNIGeneratorRequest';\", \"post_time\": \"2013-01-30 22:54:41\" },\n\t{ \"post_id\": 3259, \"topic_id\": 721, \"forum_id\": 10, \"post_subject\": \"Re: using SOAPCALL\", \"username\": \"dean\", \"post_text\": \"wsdl attached (in zip). \\n\\nThanks for the help. I ran my code using the IP address of a soap service that is working in the same hpcc cluster and got back a very SOAPish error, which was expected since I didn't change any of the request data. \\n\\nThis was the error using the IP for the service I want:\\nSystem error: 3000: <Error><text>assert(hostname) failed - file: /mnt/disk2/buildspace/svn/build_0702_64/system/jlib/jsocket.cpp, line 764</text><url>http://tpc3632:8888/</url></Error> (in SOAP dataset G1 E3)
\\n\\n\\nthis is the error I get when I run the same request against a function soap service:\\nSystem error: -1: <Error><text>HTTP error (500) in processQuery</text><soapresponse><?xml version="1.0" ?><S:Envelope xmlns:S="http://schemas.xmlsoap.org/soap/envelope/"><S:Body><S:Fault xmlns:ns3="http://www.w3.org/2003/05/soap-envelope"><faultcode>S:Client</faultcode><faultstring>Cannot find dispatch method for {http://www.lexisnexis.org/CSA/LNIGeneratorRequest}updateFileProcessingStatus</faultstring></S:Fault></S:Body></S:Envelope></soapresponse><url>http://138.12.81.236:8080/erfepubsubhub/statusnotification</url></Error> (in SOAP dataset G1 E3)
\", \"post_time\": \"2013-01-30 22:34:35\" },\n\t{ \"post_id\": 3258, \"topic_id\": 721, \"forum_id\": 10, \"post_subject\": \"Re: using SOAPCALL\", \"username\": \"anthony.fishbeck\", \"post_text\": \"Any chance this service exposes a WSDL I can look at?\\n\\nEven sending your sample request directly I'm getting back a soapfault validation error:\\n\\n<soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/">\\n <soapenv:Body>\\n <soap:Fault xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">\\n <faultcode xmlns:soap-env="http://schemas.xmlsoap.org/soap/envelope/">soap-env:Server</faultcode>\\n <faultstring>Validation Error</faultstring>\\n <faultactor>/shared/idgenerator/getlnis/soap</faultactor>\\n <detail>\\n <lnig:Errors xmlns:lnig="http://www.lexisnexis.org/CSA/LNIGeneratorErrorResponse">\\n <lnig:error>\\n <lnig:error_type/>\\n <lnig:error_code/>\\n <lnig:error_msg/>\\n <lnig:error_msg_dtls/>\\n </lnig:error>\\n </lnig:Errors>\\n </detail>\\n </soap:Fault>\\n </soapenv:Body>\\n</soapenv:Envelope>
\", \"post_time\": \"2013-01-30 22:21:00\" },\n\t{ \"post_id\": 3235, \"topic_id\": 721, \"forum_id\": 10, \"post_subject\": \"using SOAPCALL\", \"username\": \"dean\", \"post_text\": \"Hey folks,\\nI'm trying to connect to an external soap service at this URI:\\nhttp://tpc3632:8888/shared/idgenerator/getlnis/soap\\n\\nThe soap request body xml is:\\n<soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/" xmlns:lnig="http://www.mycom.org/CSA/LNIGeneratorRequest">\\n <soapenv:Header/>\\n <soapenv:Body>\\n <lnig:LNIGeneratorRequest>\\n <lnig:noOfStampers>11</lnig:noOfStampers>\\n <lnig:partitionId>10</lnig:partitionId>\\n <lnig:type>3</lnig:type>\\n <lnig:clientInfo>TEST</lnig:clientInfo>\\n </lnig:LNIGeneratorRequest>\\n </soapenv:Body>\\n</soapenv:Envelope>
\\n\\nI've tried this ecl code:\\n\\nip:='http://tpc3632:8888/';\\nsvc:='shared/idgenerator/getlnis/soap/';\\n\\noutrec1:=record\\n\\tstring100 \\tOutData{xpath('LNI')};\\n\\tunsigned4 \\tLatency{xpath('_call_latency')};\\nend;\\n\\ninrec:=record\\n integer2 noOfStampers:=11;\\n integer2 partitionId:=10;\\n integer2 type:=3;\\n string clientInfo:='TEST';\\nend;\\n\\nc:=soapcall(ip,svc,inrec,dataset(outrec1), LITERAL,\\n NAMESPACE('http://www.lexisnexis.org/CSA/LNIGeneratorRequest'));\\noutput(c);\\n
\\n\\nI get this error:\\n[color=#FF0040:huw49suz]Error: System error: 3000: <Error><text>assert(hostname) failed - file: /mnt/disk2/buildspace/svn/build_0702_64/system/jlib/jsocket.cpp, line 764</text><url>http://tpc3632:8888/</url></Error> (in SOAP dataset G1 E2)\\n\\nI not clear on how to set up the namespace for the soap request. And I can't make heads or tails of that error message. \\n\\nAny help would be great. Thanks!\", \"post_time\": \"2013-01-29 22:54:05\" },\n\t{ \"post_id\": 3270, \"topic_id\": 722, \"forum_id\": 10, \"post_subject\": \"Re: Incorporating external programs\", \"username\": \"bforeman\", \"post_text\": \"Good news, bad news:\\n\\nGood news:\\nThere is a project currently underway that will allow you to push your Java algorithms directly inline with an ECL file. \\n\\nBad news:\\nIt hasn't been released yet, so if you had a time sensitive project you would have to convert your Java algorithms today to the ECL equivalent, and perhaps that's not as bad as it sounds. \\n\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-01-31 14:58:40\" },\n\t{ \"post_id\": 3262, \"topic_id\": 722, \"forum_id\": 10, \"post_subject\": \"Re: Incorporating external programs\", \"username\": \"kaliyugantagonist\", \"post_text\": \"Hi Bob,\\n\\nHPCC-JDBC driver is a good solution but only in the short-term. The algorithms written in Java are not actually getting executed on the HPCC cluster but on a single node - only few 'select' queries in those are passed to HPCC i.e HPCC is acting as a mere data-store here.\\n\\nThe real performance advantage will come when we have sprayed the data onto HPCC cluster AND our algorithms are, too, running on the HPCC cluster. So my question is straightforward - how can I push my 'Java algorithms' into HPCC(for parallel processing) without rewriting them in ECL?\\n\\nThanks and regards !\", \"post_time\": \"2013-01-31 04:02:12\" },\n\t{ \"post_id\": 3256, \"topic_id\": 722, \"forum_id\": 10, \"post_subject\": \"Re: Incorporating external programs\", \"username\": \"bforeman\", \"post_text\": \"I think you are on the right track.\\n\\nIf you spray your data to HPCC, and then access it via the JDBC driver, you would not have to rewrite your algorithms because the JDBC driver should do that.\\n\\nThe JDBC driver docs says:\\n\\n
Submitted SQL request generates ECL code which is submitted, compiled, and executed on your target cluster
\\n\\nSo I think that would be the extent of it, unless I'm missing something simple \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-01-30 22:04:52\" },\n\t{ \"post_id\": 3236, \"topic_id\": 722, \"forum_id\": 10, \"post_subject\": \"Incorporating external programs\", \"username\": \"kaliyugantagonist\", \"post_text\": \"Hi,\\n\\nWe have several algorithms for sentiment calculations that access multiple tables which have large no. of records. The processing time is high and we are looking to HPCC as a solution.\\n\\nThe algorithms are tried and tested but written in the Java programming language. Now, even if we import our database into HPCC and use the HPCC-JDBC driver, I don't think it will reduce the processing time - the algorithms will still run in a JVM on the machine where my application is deployed and we won't be able run these algorithms on HPCC nodes in-parallel.\\n\\nIn the 'Introduction' documentation of HPCC, I read the following :\\n\\nECL is compiled into optimized C++ code for execution on the HPCC system platform, and can be used for complex data processing and analysis jobs on a Thor cluster or for comprehensive query and report processing on a Roxie cluster.ECL allows inline C++ functions to be incorporated into ECL programs, and external programs in other languages can be incorporated and parallelized through a PIPE facility\\n\\nHow do we push our java code into HPCC for parallel processing? Is the mentioned 'PIPE' facility relevant and helpful in our scenario? Or is it that we will have to re-write our algorithms in ECL?\", \"post_time\": \"2013-01-30 03:45:57\" },\n\t{ \"post_id\": 3292, \"topic_id\": 729, \"forum_id\": 10, \"post_subject\": \"Re: Consistency\", \"username\": \"jeeves\", \"post_text\": \"Thank you! This helps.\", \"post_time\": \"2013-02-01 12:19:16\" },\n\t{ \"post_id\": 3291, \"topic_id\": 729, \"forum_id\": 10, \"post_subject\": \"Re: Consistency\", \"username\": \"jsmith\", \"post_text\": \"Yes, that's the default behaviour.\\nReplication is asynchronous and may well complete post job completion.\\n\\nHowever, it is configurable via the "replicateAsync" property in configmgr.\\n\\nHope that helps.\\n\\nJake.\", \"post_time\": \"2013-02-01 12:18:05\" },\n\t{ \"post_id\": 3290, \"topic_id\": 729, \"forum_id\": 10, \"post_subject\": \"Re: Consistency\", \"username\": \"flavio\", \"post_text\": \"IIRC, replication is lazy, so it's not guaranteed to be completed at the time the workunit completes, but I may be wrong. I'd like to hear from Jake on this (I'll ping him to see if he can chime in).\\n\\nFlavio\", \"post_time\": \"2013-02-01 12:03:25\" },\n\t{ \"post_id\": 3289, \"topic_id\": 729, \"forum_id\": 10, \"post_subject\": \"Re: Consistency\", \"username\": \"jeeves\", \"post_text\": \"Flavio,\\n\\nMy question is with respect to replication. Once a workunit execution is complete is replication of the data to backup nodes also guaranteed to be complete?\\n\\nThanks,\\n-David\", \"post_time\": \"2013-02-01 11:52:39\" },\n\t{ \"post_id\": 3287, \"topic_id\": 729, \"forum_id\": 10, \"post_subject\": \"Re: Consistency\", \"username\": \"flavio\", \"post_text\": \"Neither (or both)
\\n\\nACID properties apply to transactional databases, which is not the case of HPCC. \\n\\nIn a Thor system, you read a dataset, apply certain transformations and eventually create a new dataset. As such, the source dataset is never altered (which is a condition required by the fact that the ECL language is -mostly- exempt of side effects). Moreover, as ECL activities are executed, the ECL programmer can safely assume that data will be consistent at all times, based on the dataflow oriented properties of the language (in your excel spreadsheet, if you change a value in a row which is part of a calculation, you can safely assume that every dependent row will be in a updated and in a consistent state at the time you perform your next operation).\\n\\nIn Roxie, data is loaded in bulk and never altered in the system.\\n\\nDoes this help? Or better, why did you ask? Is there any case that you want to ensure is handled properly within HPCC?\\n\\nFlavio\", \"post_time\": \"2013-02-01 11:36:47\" },\n\t{ \"post_id\": 3264, \"topic_id\": 729, \"forum_id\": 10, \"post_subject\": \"Consistency\", \"username\": \"jeeves\", \"post_text\": \"I have a question about consistency as in ACID properties. In other words is HPCC consistent or is it eventually consistant.\", \"post_time\": \"2013-01-31 11:27:18\" },\n\t{ \"post_id\": 3272, \"topic_id\": 731, \"forum_id\": 10, \"post_subject\": \"Re: ECL - some difficulties with the basics\", \"username\": \"bforeman\", \"post_text\": \"Yep, no need for duplicate posts of the same topic. We look at ALL unanswered posts every day
\\n\\nThanks!\\n\\nBob\", \"post_time\": \"2013-01-31 15:07:39\" },\n\t{ \"post_id\": 3266, \"topic_id\": 731, \"forum_id\": 10, \"post_subject\": \"ECL - some difficulties with the basics\", \"username\": \"kaliyugantagonist\", \"post_text\": \"To avoid redundancy, posting the link :\\n\\nhttp://hpccsystems.com/bb/viewtopic.php?f=8&t=730&sid=858e21f0e296ed96566eae8dbb939e03\", \"post_time\": \"2013-01-31 12:07:33\" },\n\t{ \"post_id\": 3303, \"topic_id\": 732, \"forum_id\": 10, \"post_subject\": \"Re: Lookup joins\", \"username\": \"flavio\", \"post_text\": \"[quote="DSC":2o208tre][quote="jsmith":2o208tre]On systems with a lot of memory, the default 75% of physical memory is probably too conservative and configuring to [total physical] - 2GB, is probably sensible. If 'slavePerNode' is >1, then divide this figure by slavesPerNode.\\n\\nWhat amount is "a lot of memory"?\\n\\n\\n\\nMore than little memory, but a bunch less than excessive, naturally.
\\n\\nOn a serious note, if you have 48GB of RAM, reserving 12GB for the system is too much, so you may want to up this setting and only reserve 2-4GB (the setting would depend on the number of slaves per node, as Jake indicates above).\\n\\nFlavio\", \"post_time\": \"2013-02-01 20:24:53\" },\n\t{ \"post_id\": 3302, \"topic_id\": 732, \"forum_id\": 10, \"post_subject\": \"Re: Lookup joins\", \"username\": \"DSC\", \"post_text\": \"[quote="jsmith":h19s9avn]On systems with a lot of memory, the default 75% of physical memory is probably too conservative and configuring to [total physical] - 2GB, is probably sensible. If 'slavePerNode' is >1, then divide this figure by slavesPerNode.\\n\\nWhat amount is "a lot of memory"?\\n\\n(Sorry. I've been reading business requirement documents all day and this kind of thing seems to stand out.)\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2013-02-01 20:10:03\" },\n\t{ \"post_id\": 3301, \"topic_id\": 732, \"forum_id\": 10, \"post_subject\": \"Re: Lookup joins\", \"username\": \"sbagaria\", \"post_text\": \"This is perfect! One more mystery solved.\\n\\nThank you!\", \"post_time\": \"2013-02-01 17:09:42\" },\n\t{ \"post_id\": 3300, \"topic_id\": 732, \"forum_id\": 10, \"post_subject\": \"Re: Lookup joins\", \"username\": \"jsmith\", \"post_text\": \"Hi,\\n\\nThe Thor configuration option 'globalMemorySize' if set controls the amount of memory each slave gets.\\n\\nIf 'globlaMemorySize' is left unset, Thor[master] will detect total physical memory (*on the master*) and allocate 75% of it. If there are multiple slaves per node (slavesPerNode>1) it will divide the total amongst the slaves.\\n\\nOn systems with a lot of memory, the default 75% of physical memory is probably too conservative and configuring to [total physical] - 2GB, is probably sensible. If 'slavePerNode' is >1, then divide this figure by slavesPerNode.\\nYou may also want to set globalMemorySize manually, if your master node has a different amount of memory from your slaves.\\n\\nIn HPCC versions >= 3.8.6, there's a 'masterMemorySize' configuration option, which allows you to override the memory allocated to the master independently.\\n\\n
Also, in a lookup join, I believe each slave process (not each node) gets a copy of the lookup table, am I right?
\\nThat's correct.\\n\\n\\nHope that helps.\", \"post_time\": \"2013-02-01 17:06:31\" },\n\t{ \"post_id\": 3286, \"topic_id\": 732, \"forum_id\": 10, \"post_subject\": \"Re: Lookup joins\", \"username\": \"sbagaria\", \"post_text\": \"Sorry for the cross-post from viewtopic.php?f=10&t=661&p=3059\", \"post_time\": \"2013-02-01 11:21:31\" },\n\t{ \"post_id\": 3282, \"topic_id\": 732, \"forum_id\": 10, \"post_subject\": \"Lookup joins\", \"username\": \"sbagaria\", \"post_text\": \"How do I increase the page limit (3629 in this case) to a bigger number? Which option controls the memory pool for a job. This job was run on Thor. Also, in a lookup join, I believe each slave process (not each node) gets a copy of the lookup table, am I right?\\n\\n\\n000156A7 2013-01-31 20:41:17 30213 17641 RoxieMemMgr: CChunkingRowManager::allocate(size 30719880) allocated new HugeHeaplet size 31457280 - addr=0x7f970bc00000 pages=30 pageLimit=3629 peakPages=3565 rowMgr=0x194df48\\n000156A8 2013-01-31 20:41:19 30213 17641 LOOKUPJOIN hash table created: 1546350 elements 0 duplicates - activity(lookupjoin, 7125)\\n000156A9 2013-01-31 20:41:21 30213 17664 RoxieMemMgr: Memory pool (3648 pages) exhausted requested 30\\n
\", \"post_time\": \"2013-02-01 10:49:51\" },\n\t{ \"post_id\": 35215, \"topic_id\": 733, \"forum_id\": 10, \"post_subject\": \"Re: build web service inside of hpcc source code\", \"username\": \"jhonedk\", \"post_text\": \"I am impressed with your idea so that we will get more information from this website in future, I am Nice working and good language from this developer of yours HTTP and s language. keep it more create a website in future. [size=1:1trf776c]Kinemaster Mod Apk\", \"post_time\": \"2022-02-23 03:54:03\" },\n\t{ \"post_id\": 33373, \"topic_id\": 733, \"forum_id\": 10, \"post_subject\": \"Re: build web service inside of hpcc source code\", \"username\": \"bforeman\", \"post_text\": \"Hi Shown,\\n\\n
Hi All, I'm new to CS. I've heard people who go to Hackathons talk about using the Google API, or the Twitter API, etc, and I don't know what they mean. Would someone care to explain to me? Thank you so much.
\\n\\nAPI is an Application Programming Interface. The way that I understand it is essentially a series of libraries or methods that you can call into from your source code to perform some specific task with another system. Examples are email, parsing, or machine learning. Usually an API is dedicated to performing a specific related group of tasks. \\n\\nYou can search the web for more specific information on the Google or Twitter APIs as you mentioned. \\n\\nHope this helps!\\n\\nBob\", \"post_time\": \"2021-04-19 16:46:55\" },\n\t{ \"post_id\": 33361, \"topic_id\": 733, \"forum_id\": 10, \"post_subject\": \"Re: build web service inside of hpcc source code\", \"username\": \"shown\", \"post_text\": \"[quote="buptkang":1wjxbqlr]Hi Tony,\\n\\nThere is a long time interval since the last message on this question. In general, I want to build a machine learning web portal based on ECL-ML toolbox. \\n\\nI will mostly create and export ECL code as the web service to be called later. Then my question is still here:\\n\\nMy goal is to do the web-development as less as possible, I mainly focus on data manipulation using ECL. So I assume that I need to create some HTML forms such as load file, manipulate file and so forth in my portal. \\n\\nIf so, which way do you recommend me to do now? Developing such portal inside of HPCC system or build my own client [size=1:1wjxbqlr]ogwhatsapp app to call? Like what I am saying, I want to minimize any effort on web development.\\n\\n\\nUnder such circumstance, could you give me some ideas on the work flow I should continue to try?\\n\\nThanks\\nBo\\nHi All, I'm new to CS. I've heard people who go to Hackathons talk about using the Google API, or the Twitter API, etc, and I don't know what they mean. Would someone care to explain to me? Thank you so much.\", \"post_time\": \"2021-04-17 06:23:14\" },\n\t{ \"post_id\": 3649, \"topic_id\": 733, \"forum_id\": 10, \"post_subject\": \"Re: build web service inside of hpcc source code\", \"username\": \"anthony.fishbeck\", \"post_text\": \"Not sure what specific steps you're taking... but a few general points come to mind:\\n\\n1. You can't just publish the HTHOR or ROXIE workunit to THOR, you have to compile a workunit specifically for THOR and publish that.\\n\\n2. If your main query is running on the same THOR target that you will be calling the webservice on, there will be contention... that is, the soapcalls will block on the queue behind the job that is making the soapcall. Try running the main job on HTHOR, and the published query on THOR or vice versa.\\n\\nIf you're still stuck, maybe you could explain the steps you took.\", \"post_time\": \"2013-03-06 16:47:20\" },\n\t{ \"post_id\": 3648, \"topic_id\": 733, \"forum_id\": 10, \"post_subject\": \"Re: build web service inside of hpcc source code\", \"username\": \"buptkang\", \"post_text\": \"Hi Tony,\\n\\nI have successfully to run that service call which is built on Roxie server, but interesting, if I publish the same query in the THOR server, then I cannot retrieve the result. Do you have any clue?\\n\\nRegards\\nBo\", \"post_time\": \"2013-03-06 14:29:25\" },\n\t{ \"post_id\": 3633, \"topic_id\": 733, \"forum_id\": 10, \"post_subject\": \"Re: build web service inside of hpcc source code\", \"username\": \"anthony.fishbeck\", \"post_text\": \"I think the main issue is that your URL references what we call the process cluster (physical roxie) rather than target cluster. Assuming you are using the default configuration, change 'myroxie' to 'roxie':\\n\\nRoxieIP = 'http://myIP:8002/WsEcl/soap/query/roxie/soapcall3.1';\\n\\nAnd try again.\\n\\nRegards,\\nTony\", \"post_time\": \"2013-03-05 17:10:54\" },\n\t{ \"post_id\": 3620, \"topic_id\": 733, \"forum_id\": 10, \"post_subject\": \"Re: build web service inside of hpcc source code\", \"username\": \"buptkang\", \"post_text\": \"Hi Tony,\\n\\nCurrently I am trying to develop my ECL function and access the data through SOAP, but I faced up issues on retrieving data.\\n\\nFirst, I submitted and published the below function into THOR server(sample code):\\n\\n//\\n// Example code - use without restriction. \\n//\\nParseWords(STRING LineIn) := FUNCTION\\n\\tPATTERN Ltrs := PATTERN('[A-Za-z]');\\n\\tPATTERN Char := Ltrs | '-' | '\\\\'';\\n\\tTOKEN Word := Char+;\\n\\t\\t\\t\\t\\tds \\t := DATASET([{LineIn}],{STRING line});\\n\\tRETURN PARSE(ds,line,Word,{STRING Pword := MATCHTEXT(Word)});\\nEND;\\t\\n\\nEXPORT Soapcall3() := FUNCTION\\n STRING UID := '' : STORED('UIDstr');\\n STRING LeftIn := '' : STORED('LeftInStr');\\n STRING RightIn := '' : STORED('RightInStr');\\n BOOLEAN TokenMatch := FUNCTION\\n P1 := ParseWords(LeftIn);\\n P2 := ParseWords(RightIn);\\n SetSrch := SET(P1,Pword);\\n ProjRes := PROJECT(P2,\\n\\t\\t TRANSFORM({BOOLEAN Fnd},\\n\\t\\t\\t\\t\\t SELF.Fnd := LEFT.Pword IN SetSrch));\\n AllRes := DEDUP(SORT(ProjRes,Fnd));\\n RETURN COUNT(AllRes) = 1 AND AllRes[1].Fnd = TRUE;\\n END;\\n RETURN OUTPUT(DATASET([{UID,TokenMatch}],{STRING UID,BOOLEAN res}));\\nEND;
\\n\\n\\nThen I used another sample code to retrieve the data:\\n\\nInRec := RECORD\\n\\tSTRING UIDstr{XPATH('UIDstr')}; \\n\\tSTRING LeftInStr{XPATH('LeftInStr')};\\n\\tSTRING RightInStr{XPATH('RightInStr')};\\nEND;\\nInDS := DATASET([\\n\\t\\t\\t{'1','the quick brown fox jumped over the lazy red dog','quick fox red dog'},\\n\\t\\t\\t{'2','the quick brown fox jumped over the lazy red dog','quick fox black dog'},\\n\\t\\t\\t{'3','george of the jungle lives here','fox black dog'},\\n\\t\\t\\t{'4','fred and wilma flintstone','fred flintstone'},\\n\\t\\t\\t{'5','yomama comeonah','brake chill'}\\n\\t\\t\\t\\t\\t\\t\\t\\t],InRec);\\n\\nRS := SOAPCALL(InDS,\\n\\t\\t\\t\\t\\t\\t\\t RoxieIP,\\n svc,\\n\\t\\t\\t\\t\\t\\t\\t InRec,\\n\\t\\t\\t\\t\\t\\t\\t TRANSFORM(LEFT),\\n\\t\\t\\t\\t\\t\\t\\t DATASET({STRING UIDval{XPATH('uid')},\\n\\t\\t\\t\\t\\t\\t\\t BOOLEAN CompareResult{XPATH('res')}}));\\n\\nOUTPUT(RS);\\n
\\n\\n\\nIn the above code, I do not write the variable RoxieIP and svc variable. \\n\\nRoxieIP = 'http://myIP:8002/WsEcl/soap/query/myroxie/soapcall3.1';\\nsvc = 'soapcall3.1';\\n\\nBut when I was running the above code, it always run without finishing signal. \\n\\nDo you have any clue on this?\\n\\nThanks \\nBo\", \"post_time\": \"2013-03-04 20:53:05\" },\n\t{ \"post_id\": 3581, \"topic_id\": 733, \"forum_id\": 10, \"post_subject\": \"Re: build web service inside of hpcc source code\", \"username\": \"buptkang\", \"post_text\": \"Hi Tony,\\n\\nThanks, I am more clear toward my target now. \\n\\nBo\", \"post_time\": \"2013-02-27 23:05:51\" },\n\t{ \"post_id\": 3546, \"topic_id\": 733, \"forum_id\": 10, \"post_subject\": \"Re: build web service inside of hpcc source code\", \"username\": \"anthony.fishbeck\", \"post_text\": \"Hi Bo,\\n\\nThe generated WSDL/XMLSchema describe the SOAP/XML format for the query. The JSON format is structurally very similar, and you can use the WSDL as a form of documentation that is very close. But we don't currently have a schema specifically for JSON so you would have to hand code the JSON message processing.\\n\\nIf you have tools that generate code for handling SOAP/XML from a WSDL, it makes the question of whether you should use SOAP or JSON trickier. I would suggest playing with both and deciding which better meets your needs.\\n\\nAs for running your html+js from the HPCC ESP component or outside the HPCC, first of all if you have a web server set up I would probably use that rather than embedding in the ESP just to keep things cleen. If not I would base it on the scale of the system, a large or active system should probably have a stand alone web server. A small prototype system may run ok from ESP.\\n\\nRegards,\\nTony\", \"post_time\": \"2013-02-25 17:52:42\" },\n\t{ \"post_id\": 3540, \"topic_id\": 733, \"forum_id\": 10, \"post_subject\": \"Re: build web service inside of hpcc source code\", \"username\": \"buptkang\", \"post_text\": \"Hi Tony,\\n\\nThanks again for your comment. \\n\\nIn general, I will create most ECL script to manipulate data, then publish the query into the server in order to wait for end-user's data request through web portal.\\n\\nFrom client web portal side, I should create HTML+Javascript(JQuery) to pull the data through WS using JSON data format or XML format. \\n\\nBut here, I have a question:you mentions that I can put my client files(html+js) into HPPC deployed source folder. If I want to use HPCC's internal ecm WSDL to represent data in order to reduce lots of web dev work, how could I use JSON to store the data in client side? Does it mean that I have to stick onto XML soap method?\\n\\nThere is another big design issue: Do you think that building another web portal inside of HPCC is a "good" option other than building outside HPCC context? Here my "good" definition is to try to reduce web development work as less as possible, I want to focus on data manipulation side. \\n\\nWith Thanks and Regards\\nBo\", \"post_time\": \"2013-02-22 17:50:44\" },\n\t{ \"post_id\": 3526, \"topic_id\": 733, \"forum_id\": 10, \"post_subject\": \"Re: build web service inside of hpcc source code\", \"username\": \"anthony.fishbeck\", \"post_text\": \"This is a really difficult question because you have so many options available and much of it is based on your personal preferences and style.\\n\\nIt is possible to write ECL that uses the ECL "SOAPCALL" method to call HPCC system functions. That way you could publish ECL queries that did things like copying files onto the system, and then publish another ECL query that used the files. All of those queries could be accessed using forms from the WsEcl page.\\n\\nYou can btw call published queries using an HTTP GET url. So for example if you\\nhave a query with one parameter called "lastname" you could run the query using the\\nfollowing url: \\n\\nhttp://ip:port/WsEcl/submit/query/hthor/myquery?lastname=smith\\n
\\n\\n(just replace hthor with the target cluster you are hitting, and myquery with your published query name).\\n\\nYou could also drop html files in /opt/HPCCSystems/componentfiles/files and they will be accessible through esp. For example, if you add myfile.html you can see it from ESP as http://ip:port/esp/files/myfile.html
That page could have convenient links to your published queries, or could directly contain Ajax calls out to the system functions.\\n\\nAs soon as you want to start doing really sophisticated web development though I would probably recommend using another front end and making calls from there to the HPCC.\\n\\nRegards,\\nTony\", \"post_time\": \"2013-02-21 22:34:40\" },\n\t{ \"post_id\": 3513, \"topic_id\": 733, \"forum_id\": 10, \"post_subject\": \"Re: build web service inside of hpcc source code\", \"username\": \"buptkang\", \"post_text\": \"Hi Tony,\\n\\nThere is a long time interval since the last message on this question. In general, I want to build a machine learning web portal based on ECL-ML toolbox. \\n\\nI will mostly create and export ECL code as the web service to be called later. Then my question is still here:\\n\\nMy goal is to do the web-development as less as possible, I mainly focus on data manipulation using ECL. So I assume that I need to create some HTML forms such as load file, manipulate file and so forth in my portal. \\n\\nIf so, which way do you recommend me to do now? Developing such portal inside of HPCC system or build my own client app to call? Like what I am saying, I want to minimize any effort on web development.\\n\\nUnder such circumstance, could you give me some ideas on the work flow I should continue to try?\\n\\nThanks\\nBo\", \"post_time\": \"2013-02-20 23:04:03\" },\n\t{ \"post_id\": 3325, \"topic_id\": 733, \"forum_id\": 10, \"post_subject\": \"Re: build web service inside of hpcc source code\", \"username\": \"anthony.fishbeck\", \"post_text\": \"It really depends on what you are trying to accomplish how much I could reccomened your service 1. Calling an ECL based service. 2. Calling into ESP (EclWatch) services via SOAP or REST calls. 3. Being implemented inside of ESP as part of a C++ service.\\n\\nYou can create ECL and publish it. You can then call it from your service. And that ECL can even use SOAPCALL or HTTPCALL to call other services in ESP or elsewhere.\\n\\nOr you can have your service call ESP services to manipulate the HPPC and / or run ECL.\\n\\nA basic workflow might help me understand what you are trying to do and where it might fit.\", \"post_time\": \"2013-02-04 21:29:57\" },\n\t{ \"post_id\": 3306, \"topic_id\": 733, \"forum_id\": 10, \"post_subject\": \"build web service inside of hpcc source code\", \"username\": \"buptkang\", \"post_text\": \"Hello there, \\n\\nCurrently, I am trying to build some web services and relevant http forms which are based on existing WSs in HPCC WsECL. From the internal source code perspective, I can add a new subfolder in ESP/services/ws_*** for my own need, and it could work if I can continue to develop C++ code on it. \\n\\nBut my intention is that finally we want to build a web service HTTP form, which is separate with the HPCC system, so in this way, could somebody tell me how could I do it more efficiently using ECL instead of using pure C++ code to create such service?\\n\\nWith Thanks and Regards\\nBo\", \"post_time\": \"2013-02-02 23:12:21\" },\n\t{ \"post_id\": 3402, \"topic_id\": 749, \"forum_id\": 10, \"post_subject\": \"Re: Queries to clear basic concepts of THOR and ROXIE\", \"username\": \"rtaylor\", \"post_text\": \"prachi,I assume that OUTPUT which is an action cannot be used on ROXIE!!
An INcorrect assumption. OUTPUT is used in virtually every Roxie query ever written.\\n\\nAbove code when submitted on THOR, gets executed and workunit is created with DATASET but when same is compiled on ROXIE generates Error message.
I see nothing inherently wrong with that code such that it could not run correctly on either Thor or Roxie. Without knowing the exact error message from Roxie, I have no way to provide any further input as to what the specific problem in this instance might be.\\n\\nI would like to know basic conceptual difference between THOR and ROXIE
I can only refer you to my previous post, the Programmer's Guide, which contains an entire section (seven articles) specifically about working with Roxie, and the ECL Language Reference, where the documentation for those few functions that are only available on Roxie (such as PRELOAD, ALLNODES, THISNODE, etc.) clearly state that information. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-02-09 21:13:41\" },\n\t{ \"post_id\": 3400, \"topic_id\": 749, \"forum_id\": 10, \"post_subject\": \"Re: Queries to clear basic concepts of THOR and ROXIE\", \"username\": \"prachi\", \"post_text\": \"Hi Richard,\\n\\nThanks for the reply !\\n\\nAs per your statement:\\nThe are a very few bits of ECL that only work in Roxie, other than those, the code for both Thor and Roxie is the same.
\\n\\nAnd PDF referred which states that apart from the target cluster(THOR/ROXIE) nothing has to be changed to execute query on THOR/ROXIE.
\\n\\nCode:\\nTwitter_dataset := DATASET(logicalname,layout_twitter,CSV);\\nOUTPUT(Twitter_dataset);
\\n\\nAbove code when submitted on THOR, gets executed and workunit is created with DATASET but when same is compiled on ROXIE generates Error message.\\n\\nI assume that OUTPUT which is an action cannot be used on ROXIE!! \\n\\n\\nI would like to know [color=#BF0000:183sq7xs]basic conceptual difference between THOR and ROXIE (like ex. whether action can be used or no?). Since i will be writting a single code and as per other person's demand will be executing code on THOR/ROXIE, it is necessary for me to know code level difference between them.\\n\\nThanks and regards !\", \"post_time\": \"2013-02-09 18:59:52\" },\n\t{ \"post_id\": 3398, \"topic_id\": 749, \"forum_id\": 10, \"post_subject\": \"Re: Queries to clear basic concepts of THOR and ROXIE\", \"username\": \"rtaylor\", \"post_text\": \"prachi,
1. Different ECL code/program has to written to execute it on THOR and ROXIE.
The are a very few bits of ECL that only work in Roxie, other than those, the code for both Thor and Roxie is the same. However, since you're doing different jobs on the two tools, you may write your code for each from a different approach.\\n2. ECL code for ROXIE may or may not have INDEXes.
Correct. Roxie queries almost always use INDEXes, but that is not a requirement.\\nBUT:\\n1. If it mandatory to build an INDEX for query to process on ROXIE, how and when would be the INDEXes built on THOR would be transferred on ROXIE?Is it when you 'publish' the ECL code to ROXIE?
yes, Roxie gets both its code and data when you Publish the query.\\nthne my assumption that till User1's query is not completed, User2's query won't be executed/processed....is right??
Pretty much. That's why we have the capability of configuring multiple instances of Thor on the same hardware (multi-Thor) to allow multiple jobs to run "at once".\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-02-09 15:58:41\" },\n\t{ \"post_id\": 3397, \"topic_id\": 749, \"forum_id\": 10, \"post_subject\": \"Re: Queries to clear basic concepts of THOR and ROXIE\", \"username\": \"prachi\", \"post_text\": \"Hi Richard,\\n\\nWhat things i understood is (correct me if i am wrong):\\n\\n1. Different ECL code/program has to written to execute it on THOR and ROXIE.\\n2. ECL code for ROXIE may or may not have INDEXes.\\n\\nBUT:\\n1. If it mandatory to build an INDEX for query to process on ROXIE, how and when would be the INDEXes built on THOR would be transferred on ROXIE?Is it when you 'publish' the ECL code to ROXIE?\\n\\n2. Thor is a "back office" tool (running just one job at a time)
thne my assumption that till User1's query is not completed, User2's query won't be executed/processed....is right??\", \"post_time\": \"2013-02-09 10:06:39\" },\n\t{ \"post_id\": 3390, \"topic_id\": 749, \"forum_id\": 10, \"post_subject\": \"Re: Queries to clear basic concepts of THOR and ROXIE\", \"username\": \"rtaylor\", \"post_text\": \"Prachi,\\n\\nHere's something I wrote in a previous thread (http://hpccsystems.com/bb/viewtopic.php?f=9&t=408&hilit=+Roxie+thor+difference&sid=6c0b9b31262e017ea23d6d6675e33228) that should answer your questions:\\n*****************************************\\nThis white paper (http://cdn.hpccsystems.com/whitepapers/ ... n_HPCC.pdf) goes into great detail about the HPCC platform and the differences between Thor and Roxie, but here's the brief description:\\n\\nThor is a massively parallel cluster designed to do ETL (Extract, Transform, and Load) work -- the kind of data ingest and transformation jobs that every big data shop needs to do to take raw data and transform it into useful information that can either be sold to external customers or used by internal customers. Data is distributed across the nodes and the same "work" is done in parallel by each node on the data that that node is given to work on. IOW, we distribute the work by how we distribute the data. Thor is a "back office" tool (running just one job at a time) that builds data products from raw, un-indexed data, and creates whatever the "saleable" format needs to be (datasets and/or indexes).\\n\\nRoxie is a customer interface tool designed to serve up data requests to end-users, handling thousands of concurrent requests at a time. Unlike Thor, a single Roxie query is mostly (with few exceptions) handled by a single Roxie node, with support from whichever nodes in the cluster contains the data needed for each particular query. Most Roxie queries use indexes for fast performance. Roxie is a "front counter" tool that serves up information to customers.\\n\\nThor is used to create the data that Roxie will use. ETL and query development work are all done on Thor. A query (and its required data) is only published to Roxie when the final data/query combination is complete. Once published to Roxie, the query is available for end-users to use.\\n\\nSo, as an ECL developer, 99%+ of all your work is done on Thor -- Roxie receives the end result of your work and allows customers to reap the benefits of all your work.\\n*****************************************\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-02-08 18:50:56\" },\n\t{ \"post_id\": 3387, \"topic_id\": 749, \"forum_id\": 10, \"post_subject\": \"Queries to clear basic concepts of THOR and ROXIE\", \"username\": \"prachi\", \"post_text\": \"Note: Please move this post to appropriate forum's topic,if required\\n\\nHi,\\nRegarding THOR and ROXIE we have some assumptions/queries as stated below:\\n\\nWhile THOR is the data repository and ROXIE is the query processing component, the queries(with some limitations)can be executed on THOR only. If required, the same query can be executed on ROXIE. Will the same ECL code work seamlessly irrespective of the target cluster? The point is what exactly is the conceptual difference between the codes to be executed on THOR and ROXIE respectively?
\\nOne can have an HPCC cluster with just THOR(and not ROXIE). Suppose on such a cluster, a csv having a large no. of records is spread and several users execute (ECL) queries on the above csv's logical file simultaneously - will there be 'wait' for them i.e till User1's query isn't completed, User2's query won't be executed/processed?
\\nConsider a simple scenario - a large csv is sprayed onto THOR, the data was verified by a simple ECL code which is executed on THOR itself via ECL IDE. Now, suppose, a similar query needs to be executed on ROXIE - is it mandatory to build an INDEX for the same?If yes, how and when would be the INDEXes built on THOR would be transferred on ROXIE?Is it when you 'publish' the ECL code to ROXIE?
\\n\\nThanks and regards !\", \"post_time\": \"2013-02-08 11:30:03\" },\n\t{ \"post_id\": 3412, \"topic_id\": 751, \"forum_id\": 10, \"post_subject\": \"Re: File Upload in DFU\", \"username\": \"buptkang\", \"post_text\": \"Hi Richard,\\n\\nSorry, I think I still have a little bit issue on the data. \\n\\nMy current data in CSV can be sprayed properly, and my data looks like:\\n\\nAA BB 8.0\\n\\nBut when I query the data, I got only 1 column instead of my assumed 3 columns, so I got\\nFirst column AABB8.0, second and third column as empty. \\n\\nIn my data format, there is a empty space between each column, I am not sure in the spray CSV configuration page:\\n\\nwhat exactly the parameter I should set up in order to insert the data properly?\\n\\nMax Record Length: 8192\\nSeparator:\\t\\nNo Separator???\\nEscape:\\t ???\\nLine Terminator: \\\\n,\\\\r\\\\n \\t\\nQuote:\\t ???\\n\\nThanks\\nBo\", \"post_time\": \"2013-02-10 19:24:40\" },\n\t{ \"post_id\": 3411, \"topic_id\": 751, \"forum_id\": 10, \"post_subject\": \"Re: File Upload in DFU\", \"username\": \"buptkang\", \"post_text\": \"Hi Richard,\\n\\nThanks for your kind help, I think right now I can upload my file using spray CSV properly, I am not sure why I cannot do it successfully during the weekday in this week?\\n\\nAnyway, thanks a lot.\\n\\nBo\", \"post_time\": \"2013-02-10 18:50:34\" },\n\t{ \"post_id\": 3410, \"topic_id\": 751, \"forum_id\": 10, \"post_subject\": \"Re: File Upload in DFU\", \"username\": \"rtaylor\", \"post_text\": \"Bo,\\n\\nIf there is a record delimiter between the variable length records, the you can use SPRAY CSV (which should actually be named "spray variable"). \\n\\nIf there is no record delimiter, then you'll need to pre-process the file, either to add delimiters, or to make the records fixed length.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-02-10 18:35:48\" },\n\t{ \"post_id\": 3409, \"topic_id\": 751, \"forum_id\": 10, \"post_subject\": \"Re: File Upload in DFU\", \"username\": \"buptkang\", \"post_text\": \"Hey Richard,\\n\\nThank for your tip on counting record length using HEX mode. I tried on my data file, it seems that each record has variable length instead of fix length. So under this condition, how could I pre-processed my data in order to spray it?\\n\\nThanks\\nBo\", \"post_time\": \"2013-02-10 18:32:37\" },\n\t{ \"post_id\": 3408, \"topic_id\": 751, \"forum_id\": 10, \"post_subject\": \"Re: File Upload in DFU\", \"username\": \"rtaylor\", \"post_text\": \"Bo,\\n\\nIf your file contains fixed length records with 100 bytes of data and spray fixed at 100 doesn't work, you are probably not taking into account record delimiters. \\n\\nI suggest you open the file and look at the actual data. I usually do this with a text editor that has a Hex mode (Visual Slickedit does this quite well). Most likely, you will find that in addition to your data there is also a record delimiter (either 0aH - Unix-style newline, or 0d0aH - DOS-style carriage return line feed) at the end of each record. Add the appropriate number of bytes and re-try the Spray Fixed.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-02-10 18:08:47\" },\n\t{ \"post_id\": 3407, \"topic_id\": 751, \"forum_id\": 10, \"post_subject\": \"File Upload in DFU\", \"username\": \"buptkang\", \"post_text\": \"Hello there, \\n\\nCurrently we have some legacy data which is not CSV format or XML format. So I assume the only way to spray my big data file is through Spray Fixed approach. When I was trying to spray the logic file which has been loaded into the HPCC server, I got the following error message:\\n\\nFailed: Source file //192.168.17.128/var/lib/HPCCSystems/mydropzone/Test1.TAB is not a valid multiple of the expected record size (100)\\n\\nOur data file contains billions of record, each record contains three variables, each one variable is separated by the space, and each record is one line without any separator like comma.\\n\\nAs there are millions of record, so I cannot do data conversion from Excel to convert the data format as CSV or XML format. \\n\\nTherefore, could somebody give me some idea how could I spray my big data file in my data format into the HPCC?\\n\\nThanks\\nBo\", \"post_time\": \"2013-02-10 17:56:02\" },\n\t{ \"post_id\": 3460, \"topic_id\": 758, \"forum_id\": 10, \"post_subject\": \"Re: GRAPH + JOIN runs forever in THOR\", \"username\": \"omnibuzz\", \"post_text\": \"Great, Thanks. Looking forward to the fix. \\nRegards\\nSrini\", \"post_time\": \"2013-02-14 14:08:08\" },\n\t{ \"post_id\": 3459, \"topic_id\": 758, \"forum_id\": 10, \"post_subject\": \"Re: GRAPH + JOIN runs forever in THOR\", \"username\": \"bforeman\", \"post_text\": \"Which build were you running on?\\nIf it's 3.10.2-1 , then it's likely hanging because of the lookup/all join activity, see: https://track.hpccsystems.com/browse/HPCC-8758\\nThe fix will in 3.10.4. I don't think 3.8.6 was affected.\\n\\nRegards,\\n\\nBob (for Jake)\", \"post_time\": \"2013-02-14 13:07:28\" },\n\t{ \"post_id\": 3454, \"topic_id\": 758, \"forum_id\": 10, \"post_subject\": \"Re: GRAPH + JOIN runs forever in THOR\", \"username\": \"bforeman\", \"post_text\": \"Confirmed here too, thanks for the sample code. Forwarded to development for review.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-02-13 21:25:06\" },\n\t{ \"post_id\": 3433, \"topic_id\": 758, \"forum_id\": 10, \"post_subject\": \"GRAPH + JOIN runs forever in THOR\", \"username\": \"omnibuzz\", \"post_text\": \"Given below is a self-contained snippet that illustrates the problem I have. \\nThis code generates positive integers upto 128. I am able to run this in HTHOR but it runs for ever in THOR. Appreciate your help in advance.\\n\\n\\nRec := {UNSIGNED Val};\\n\\nDS1 := DATASET([1,2],Rec);\\n\\nbase := COUNT(DS1);\\n\\nExplode(SET OF DATASET(Rec) ds, UNSIGNED4 c) :=\\n JOIN(ds[c-1],DS1,1=1,TRANSFORM(rec,SELF.Val := POWER(base,C)*(RIGHT.Val - 1) + LEFT.val),ALL);\\n\\nGRAPH(DS1,6,Explode(ROWSET(LEFT),COUNTER));\\n
\", \"post_time\": \"2013-02-12 22:39:41\" },\n\t{ \"post_id\": 3492, \"topic_id\": 777, \"forum_id\": 10, \"post_subject\": \"Re: Unchanged Roxie Query New data\", \"username\": \"rtaylor\", \"post_text\": \"Apurv,\\n\\nFor a development/testbed system, simply un-publishing a query and re-publishing it manually is a perfectly appropriate solution.\\n\\nFor a Production system, however, you don't want any interruption of service to the end-users. That's why we typically configure our Production Roxies with an "extra" offline Roxie that is used to update queries, data, do QA work, etc. before we make the new version available to end-users. That makes the switch to new data/query versions a simple matter of telling the load balancer to start sending all new queries to the Roxie that was just updated, bringing it online and taking the previous version Roxie offline (ready to be updated with even newer stuff). That way, if there is a problem with the new release you can just as easily switch back to the previous version.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-02-19 15:21:25\" },\n\t{ \"post_id\": 3491, \"topic_id\": 777, \"forum_id\": 10, \"post_subject\": \"Re: Unchanged Roxie Query New data\", \"username\": \"bforeman\", \"post_text\": \"The standard technique is to create a new index, then update a superfile so that Roxie knows to load the new one. There is a great article in the Programmer's Guide that discussed SuperKeys and the use of payload indexes to store the data and eliminate the need to access the bases datasets. As a new file comes in, build a new payload index on the new data and update the SuperKey.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-02-19 15:20:19\" },\n\t{ \"post_id\": 3487, \"topic_id\": 777, \"forum_id\": 10, \"post_subject\": \"Unchanged Roxie Query New data\", \"username\": \"Apurv.Khare\", \"post_text\": \"Hello,\\nI have requirement where new data keep on coming from sources and Roxie Query used to display the latest data available. (using Superfile to consolidate data under one file name)\\n\\nWe realised that data is locked once a Roxie query using it is published.\\nAnd due to this lock we are not able to add newly coming data. \\n\\nWe need to unpublish and then re-publish the roxie queries manualy. Whereas the application is such that this manual intervention is not possible. Please suggest a good way to perform all these operations.\", \"post_time\": \"2013-02-19 13:11:15\" },\n\t{ \"post_id\": 3593, \"topic_id\": 792, \"forum_id\": 10, \"post_subject\": \"Re: packagemap help required\", \"username\": \"kaliyugantagonist\", \"post_text\": \"Hi Bob,\\n\\nActually, the issue was the incorrect .pkg file - I found it after line-by-line reading \\n\\nThanks and regards !!!\\n\\nP.S : I'm using payload indexes only\", \"post_time\": \"2013-02-28 16:49:57\" },\n\t{ \"post_id\": 3579, \"topic_id\": 792, \"forum_id\": 10, \"post_subject\": \"Re: packagemap help required\", \"username\": \"bforeman\", \"post_text\": \"I'm not a big expert on packages (yet), but the fact that you can verify that the index was published to ROXIE leaves me with one question: Are the indexes payload or non-payload, and if non-payload, can you verify that the updated base file was copied?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-02-27 21:57:59\" },\n\t{ \"post_id\": 3564, \"topic_id\": 792, \"forum_id\": 10, \"post_subject\": \"packagemap help required\", \"username\": \"kaliyugantagonist\", \"post_text\": \"Please find the background thread here :\\n\\nhttp://hpccsystems.com/bb/viewtopic.php?f=8&t=776&sid=2237e8c8beb53b5da7c6987660312a60\\n\\nI played around with the packages but facing some issues
\\n\\nMy Pkg_CCWD.pkg is as follows :\\n\\n
<RoxiePackages>\\n<!-- Begin Queries -->\\n<Package id="Pkg_CCWD">\\n<Base id="firstpkg::ccwd"/>\\n </Package>\\n<!-- End Queries -->\\n<!-- Begin File references -->\\n<Package id="firstpkg::ccwd">\\n<SuperFile id="~hpcc::sk::cwcd">\\n <SubFile value="~hpcc::index::idxpayload_cwcd_two"/>\\n </SuperFile>\\n </Package>\\n <!--End File references -->\\n </RoxiePackages>\\n
\\n\\nHere, ~hpcc::sk::cwcd is a super-key which is being used in a published Roxie query and returning data for older files.\\n\\nNow I execute this :\\n\\nroot@cloudx-843-770:/usr/share/bigdata/hpcc/cmd/superkey_packages# ecl packagemap add --daliip=172.25.37.135 --overwrite --activate roxie Pkg_CCWD.pkg\\n\\n ... adding package map Pkg_CCWD.pkg now\\n\\nroot@cloudx-843-770:/usr/share/bigdata/hpcc/cmd/superkey_packages#\\n
\\n\\nThe ECL Watch shows that the new index viz. ~hpcc::index::idxpayload_cwcd_two is copied to Roxie, too. But when I use the published Roxie query, it doesn't reflect the new data.\\n\\nWhat could be the problem?\\n\\nThanks and regards !!!\", \"post_time\": \"2013-02-27 11:54:29\" },\n\t{ \"post_id\": 3617, \"topic_id\": 796, \"forum_id\": 10, \"post_subject\": \"Re: using beginc++ to connect to socket\", \"username\": \"ghalliday\", \"post_text\": \"The simplest way is to define a macro to clone a string and return it:\\n\\n\\n #define RETSTR(x) { size32_t len = strlen(x); __lenResult = len; __result = (char *)rtlMalloc(len); memcpy(__result, x, len); return; }\\n
\\n\\nSo the complete code for your example is now:\\n\\n\\nString getAddr(string host, integer2 port) := beginc++\\n #include<sys/types.h>\\n #include<sys/socket.h>\\n #include <stdlib.h>\\n #include <unistd.h>\\n #include <errno.h>\\n #include<fcntl.h>\\n #include<string.h>\\n #include<netinet/in.h>\\n #include<stdio.h>\\n #include<arpa/inet.h>\\n #include<string.h>\\n\\n#body\\n #define RETSTR(x) { size32_t len = strlen(x); __lenResult = len; __result = (char *)rtlMalloc(len); memcpy(__result, x, len); return; }\\n \\n int sfd =0;\\n struct sockaddr_in server;\\n\\n sfd= socket(AF_INET,SOCK_STREAM,0);\\n\\n char tempBuffer[256]; \\n if(sfd<0) {\\n// char* err=strerror_r(errno, tempBuffer, sizeof(tempBuffer));\\n sprintf(tempBuffer, "socket error %d", errno);\\n RETSTR(tempBuffer);\\n }\\n \\n bzero(&server,sizeof(struct sockaddr_in));\\n server.sin_family=AF_INET;\\n server.sin_port=htons(port);\\n inet_aton(host,&server.sin_addr);\\n int con=connect(sfd,(struct sockaddr *)&server,sizeof(server));\\n if(con<0) {\\n close(sfd);\\n char* err=strerror_r(errno, tempBuffer, sizeof(tempBuffer));\\n// sprintf(tempBuffer, "connect error %d", errno);\\n RETSTR(err);\\n }\\n\\n close(sfd);\\n RETSTR("success");\\nendc++;\\n\\nhostname:='cert-mw-addrdocpoc-mem.lexisnexis.com';\\nportnumber:=27917;\\n\\noutput(getAddr(hostname,portnumber));\\n
\\n\\n(One example using strerror_r, another using sprintf)\", \"post_time\": \"2013-03-04 15:40:37\" },\n\t{ \"post_id\": 3580, \"topic_id\": 796, \"forum_id\": 10, \"post_subject\": \"using beginc++ to connect to socket\", \"username\": \"dean\", \"post_text\": \"I am connecting to an external socket based service from ECL. I have some C code which I have been assured works correctly outside of HPCC, but I'm not able to connect from within HPCC. \\n\\nThis is my first use of beginc++ and I've got a couple of questions ...\\n\\nDo I really need to use rtlMalloc to get memory for the return messages? I tried using char xxx[]="blah", but I get runtime issues.\\n\\nHow can I use strerror_r to fetch the string error message?\\n\\nwhen returning a string, I'm using the __lenReuslt, __result construction, but this doesn't force a return. So I've got lots of else statements. Is there a better way to emulate:\\n
\\n\\nAs always, thanks for the help.\\n\\n\\nHere is my ecl beginc++ block\\nif(error1) return "error1";\\nif(error2) return "error2";\\nreturn "success";\\n
\\nString getAddr(string host, integer2 port) := beginc++\\n #include<sys/types.h>\\n #include<sys/socket.h>\\n #include <stdlib.h>\\n #include <unistd.h>\\n #include <errno.h>\\n #include<fcntl.h>\\n #include<string.h>\\n #include<netinet/in.h>\\n #include<stdio.h>\\n #include<arpa/inet.h>\\n #include<string.h>\\n\\n#body\\n int sfd =0;\\n struct sockaddr_in server;\\n\\n sfd= socket(AF_INET,SOCK_STREAM,0);\\n \\n char* SUCCESS=(char*)rtlMalloc(8);\\n strcpy(SUCCESS,"success");\\n\\t\\t\\t\\n if(sfd<0) {\\n char* buff=(char*)rtlMalloc(256);\\n// char* err=strerror_r(errno, buff, 256);\\n sprintf(buff, "socket error %d", errno);\\n __lenResult = strlen(SOCKET_ERR);\\n __result = SOCKET_ERR;\\n } else {\\n bzero(&server,sizeof(struct sockaddr_in));\\n server.sin_family=AF_INET;\\n server.sin_port=htons(port);\\n inet_aton(host,&server.sin_addr);\\n int con=connect(sfd,(struct sockaddr *)&server,sizeof(server));\\n if(con<0) {\\n close(sfd);\\n char* buff=(char*)rtlMalloc(256);\\n// char* err=strerror_r(errno, buff, 256);\\n sprintf(buff, "connect error %d", errno);\\n __lenResult = strlen(buff);\\n __result=buff;\\n } else {\\n close(sfd);\\n char* buff=(char*)rtlMalloc(256);\\n sprintf(buff, "success");\\n __lenResult = strlen(SUCCESS);\\n __result = SUCCESS;\\n }\\n }\\nendc++;\\n\\nhostname:='cert-mw-addrdocpoc-mem.lexisnexis.com';\\nportnumber:=27917;\\n\\noutput(getAddr(hostname,portnumber));\\n\\n
\\n\\n\\n\\n\\nand, for reference, here's the C code that works outside of HPCC. \\n#include<sys/types.h>\\n#include<sys/socket.h>\\n#include <stdlib.h>\\n#include <unistd.h>\\n#include <errno.h>\\n#include<fcntl.h>\\n#include<string.h>\\n#include<netinet/in.h>\\n#include<stdio.h>\\n#include<arpa/inet.h>\\n#include<string.h>\\n\\nint getCA(const char *hostName, const long portNumber)\\n{\\n int sfd =0;\\n int n=0;\\n fd_set rset;\\n struct sockaddr_in server;\\n //const char closeMsg []= "</Result>";\\n const char closeMsg []= "</addrInfo:addressInfo>";\\n char cleansedAddrRecvbuff [64001];\\n\\n sfd= socket(AF_INET,SOCK_STREAM,0);\\n if(sfd<0)\\n {\\n //printf("\\\\n Error: Socket file descriptor is not created!\\\\n");\\n return -1;\\n }\\n bzero(&server,sizeof(struct sockaddr_in));\\n server.sin_family=AF_INET;\\n\\n bzero(&server,sizeof(struct sockaddr_in));\\n server.sin_family=AF_INET;\\n server.sin_port=htons(portNumber);\\n inet_aton(hostName,&server.sin_addr);\\n\\n if(connect(sfd,(struct sockaddr *)&server,sizeof(server)) <0)\\n {\\n //printf("\\\\n Error: Unable to connect to server!\\\\n");\\n return -2;\\n } else {\\n close(sfd);\\n return -3;\\n }\\n}
\", \"post_time\": \"2013-02-27 22:50:08\" },\n\t{ \"post_id\": 3606, \"topic_id\": 800, \"forum_id\": 10, \"post_subject\": \"Re: Search OR AND NOT NEAR\", \"username\": \"rtaylor\", \"post_text\": \"Mohan,Do I need to write a function to create AND & NOT?\\nDoes a concept of NEAR exist?
ECL supports the standard boolean logical operators: AND, OR, and NOT (~ is the shorthand version of NOT).\\n\\nThey are documented here: http://hpccsystems.com/community/docs/ecl-language-reference/html/logical-operators along with a discussion of how to accomplish XOR if you need that.\\n\\nWe do not support NEAR, so you could write a function to accomplish that with your data. I think Bob gave you a good starting point list of functions that may help you with that.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-03-01 15:41:16\" },\n\t{ \"post_id\": 3598, \"topic_id\": 800, \"forum_id\": 10, \"post_subject\": \"Re: Search OR AND NOT NEAR\", \"username\": \"mohan\", \"post_text\": \"Thanks Bob\\n\\nAre there any samples for AND and NOT?\\nI did see some those functions you mentioned, but it would mean that we need to implement functions to support AND and NOT. I haven't played around with the regex functions yet, but that is definitely something that I can look into.\\n\\nThanks,\\nMohan\", \"post_time\": \"2013-02-28 19:37:20\" },\n\t{ \"post_id\": 3597, \"topic_id\": 800, \"forum_id\": 10, \"post_subject\": \"Re: Search OR AND NOT NEAR\", \"username\": \"bforeman\", \"post_text\": \"Hi Mohan,\\n\\nI know that we support the native AND and NOT operators. We can also handle any REGEX expression using REGEXFIND and REGEXREPLACE. Finally, there is a rich STRING library in the Standard Function Library Reference PDF. Things like:\\n\\nCleanAccents\\nCleanSpaces \\nCompareAtStrength\\nCompareIgnoreCase\\nContains\\nCountWords\\nEditDistance\\nEditDistanceWithinRadius\\nEndsWith \\nEqualIgnoreCase\\nExtract\\nFilter \\nFilterOut\\nFind \\nFindCount\\nFindAtStrength\\nFindAtStrengthReplace\\nFindReplace\\nGetNthWord\\nRemoveSuffix\\nReverse\\nSplitWords\\nSubstituteExcluded \\nSubstituteIncluded \\nStartsWith \\nToLowerCase\\nToTitleCase\\nToUpperCase\\nWildMatch \\nWordCount \\n\\nNot sure about NEAR, but I'm sure we have some equivalent in the language. Maybe someone else can chime in here.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-02-28 18:58:13\" },\n\t{ \"post_id\": 3596, \"topic_id\": 800, \"forum_id\": 10, \"post_subject\": \"Search OR AND NOT NEAR\", \"username\": \"mohan\", \"post_text\": \"Hi\\n\\nI was looking to implement OR, AND, NOT and NEAR for STRING field search.\\nI have been looking around in the documentation and general search within the site and I could not find anything related to it except for simple OR search.\\n\\nIs this functionality already there by default? \\nI found something in the ECL Programmers Guide for single word search or a list of words, basically it would be a logical OR.\\n\\nDo I need to write a function to create AND & NOT?\\nDoes a concept of NEAR exist?\\n\\nAny help pointing me in the right direction would greatly appreciated.\\n\\n{BOOLEAN Found} FindWord(SearchWordDS L, STRING inword) := TRANSFORM\\nSELF.Found := Std.Str.Find(inword,TRIM(L.word),1)>0;\\nEND;\\n
\", \"post_time\": \"2013-02-28 18:30:10\" },\n\t{ \"post_id\": 3605, \"topic_id\": 801, \"forum_id\": 10, \"post_subject\": \"Re: Add a virtual field to the RECORD structure\", \"username\": \"mohan\", \"post_text\": \"HI Bob\\n\\nThat works!!!\\nThanks \\nMohan\", \"post_time\": \"2013-03-01 13:27:44\" },\n\t{ \"post_id\": 3604, \"topic_id\": 801, \"forum_id\": 10, \"post_subject\": \"Re: Add the a virtual field to the RECORD structure\", \"username\": \"bforeman\", \"post_text\": \"Try adding the VIRTUAL{fileposition} directly to your DATASET statement used to define and build the INDEX. The example code below works fine for me! \\n\\nr := RECORD\\n INTEGER2 code;\\n STRING110 description;\\n STRING42 zone;\\nEND;\\nd := DATASET('~CLASS::BMF::IN::timezones',r,XML('Dataset/area'));\\nOUTPUT(d);\\n\\ndPLUS := DATASET('~CLASS::BMF::IN::timezones',\\n {r,UNSIGNED8 RecPos{virtual(fileposition)}},XML('Dataset/area'));\\n\\nIDX_CODE := INDEX(dPlus,{CODE,RecPos},'~CLASS::BMF::KEY::xmlCODE');\\n\\nBUILD(IDX_CODE,overwrite);
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-03-01 13:18:02\" },\n\t{ \"post_id\": 3599, \"topic_id\": 801, \"forum_id\": 10, \"post_subject\": \"Add a virtual field to the RECORD structure\", \"username\": \"mohan\", \"post_text\": \"Hi\\n\\nI am trying to add a virtual field to the RECORD structure for the file position similar to how the tutorial for "HPCC Data Tutorial - Working with Data - Page 16 - Using our New Data" to create an Index on the State field.\\n\\nMy dataset has about 215,000 records. I am able to query the data successfully when trying this\\n \\nIMPORT TestProject;\\nEXPORT _02File_OriginalProject := DATASET('~testproject::YN::projectsample1',TestProject._01Layout_Project, XML('NewDataSet/DOCUMENT'));\\n
\\n \\nI am assuming that I am getting this error (shown below) because of the dataset size. Is that a correct assumption or do I need to tweak some settings on the cluster.\\n\\nError: System error: 3000: assert(required <= maxOffset) failed - file: /var/jenkins/workspace/LN-Candidate-3.10.2/LN/centos_6_x86_64/HPCC-Platform/common/thorhelper/thorcommon.ipp, line 789 (in Disk Read G1 E2) (0, 0), 3000,\", \"post_time\": \"2013-02-28 21:05:33\" },\n\t{ \"post_id\": 3608, \"topic_id\": 802, \"forum_id\": 10, \"post_subject\": \"Re: Xml version Tag\", \"username\": \"mohan\", \"post_text\": \"Thanks Richard.\", \"post_time\": \"2013-03-01 15:56:55\" },\n\t{ \"post_id\": 3607, \"topic_id\": 802, \"forum_id\": 10, \"post_subject\": \"Re: Xml version Tag\", \"username\": \"rtaylor\", \"post_text\": \"Mohan,\\n\\nThere should be no difference between Thor and Roxie in how they handle data files. \\n\\nWith that said, however, there is a difference between the purpose of the two cluster types. Thor is a back office tool, used to prepare data for end-users to access. Roxie is meant to deliver data to end-users.\\n\\nTherefore, it would never occur to me to put XML data on a Roxie. XML is perfectly good format for receiving data, but if you're querying you want a data format that can be efficiently queried, and that is not the XML format. What you want on Roxie are INDEXes (almost always payload INDEXes) so the data may be retrieved as quickly as possible.\\n\\nSo the "standard" process would be: Thor receives XML data, works with it to distill the "salable" information from it and puts it into payload INDEXes that your Roxie queries use to deliver results to end-users as fast as possible. \\n\\nI would suggest that coming to some of our classes might be beneficial to your overall understanding of the system.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-03-01 15:53:22\" },\n\t{ \"post_id\": 3602, \"topic_id\": 802, \"forum_id\": 10, \"post_subject\": \"Re: Xml version Tag\", \"username\": \"mohan\", \"post_text\": \"Hi\\n\\nI tried it by removing the xml version tag and I am still getting the same error. My next guess would be that Roxie is cutting the xml file short for some reason. But, that is just a guess.\\n\\nCan anyone chime in?\\n\\nThanks,\\nMohan\", \"post_time\": \"2013-03-01 10:46:20\" },\n\t{ \"post_id\": 3600, \"topic_id\": 802, \"forum_id\": 10, \"post_subject\": \"Xml version Tag\", \"username\": \"mohan\", \"post_text\": \"Hi \\n\\nI imported data and wrote a few ecl files and published it to thor and roxie.\\nMy web service works fine in thor, but when I try the same in roxie it gives me an error as shown below.\\n\\nDoes it mean that somehow roxie is recognizing the <xml version="1.0" standalone="yes"> as a root tag and thor doesn't mind it being there?\\n\\nThoughts?\\nI could test it, but my cluster is locked up right now trying to perform a DFU workunit.\\n\\nJust wanted to know the thoughts of those who have experienced this.\\n\\n06findingwordsmarcro3 passingwords.2 Response\\nException\\n Reported by: Roxie\\n Message: Error - syntax error "Trailing tag open after close of root tag" [line 1389668, file offset 66310286] xml version="1.0" standalone="yes"?> <N*ERROR*ewDataSet> <DOCUMENT> <ProjectID\", \"post_time\": \"2013-02-28 21:53:03\" },\n\t{ \"post_id\": 3619, \"topic_id\": 806, \"forum_id\": 10, \"post_subject\": \"Query Status Doubt\", \"username\": \"buptkang\", \"post_text\": \"Hello there, \\n\\nWhen I am learning HPCC/ECL distributed computing platform, I faced up a confusion dealing with ECL published query.\\n\\n[img]QueryActive[/img]\\n\\nAfter publishing my ECL query, I saw that my ECL might be in query status, then I need to activate it manually to change its status, can someone explain to me why do the system divide into such two states?\\n\\nWith Thanks and Regards\\nBo\", \"post_time\": \"2013-03-04 19:52:54\" },\n\t{ \"post_id\": 3644, \"topic_id\": 809, \"forum_id\": 10, \"post_subject\": \"Re: Incremental Index\", \"username\": \"bforeman\", \"post_text\": \"There was another thread with this same topic regarding the updating of published queries, check out the last post in the thread:\\n\\nhttp://hpccsystems.com/bb/viewtopic.php?f=10&t=777&sid=5b57b1d6bc740e18292732821fa8f02a\\n\\nI have one more thing in mind,We can delete published queries from roxie using ECL watch so can i call the same function in ECL code to delete the published query?
\\n\\nYes, you can use the ECL command line tool to do this, but I believe it is not a good practice to try to automate this with ECL Code. Normally there needs to be a Q & A process in between. \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-03-06 13:02:13\" },\n\t{ \"post_id\": 3642, \"topic_id\": 809, \"forum_id\": 10, \"post_subject\": \"Re: Incremental Index\", \"username\": \"arun\", \"post_text\": \"Bob,\\n Thanks for reply and i can merge indexes using Merge command, this will help to solve my problem.\\n\\nAfter publishing the query to roxie,the query set is not updatable unless i UN-publish the same as roxie puts lock.\\nI have to do merging of old and new indexes again as i am getting new files on daily basis.\\n\\nSo,can you suggest me how to remove lock through ECL code or Command line options?\\n\\nI have one more thing in mind,We can delete published queries from roxie using ECL watch so can i call the same function in ECL code to delete the published query?\\n\\nThanks and Regards!!\", \"post_time\": \"2013-03-06 12:37:59\" },\n\t{ \"post_id\": 3632, \"topic_id\": 809, \"forum_id\": 10, \"post_subject\": \"Re: Incremental Index\", \"username\": \"bforeman\", \"post_text\": \"There is a great article in the Programmer's Guide that discusses Superfiles and Superkeys, intended for exactly what you want to achieve.\\n\\nTake a look starting on page 46. Here is the link to download the Programmer's Guide:\\n\\nhttp://hpccsystems.com/download/docs/programmers-guide\\n\\nAfter you look it over and try the examples come back here and post back if you have any questions.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-03-05 16:04:20\" },\n\t{ \"post_id\": 3624, \"topic_id\": 809, \"forum_id\": 10, \"post_subject\": \"Incremental Index\", \"username\": \"arun\", \"post_text\": \"Hi Friends,\\n I am new to ECL and i have to do incremental index as per below usecases.\\n 1.I will be getting daily new files.\\n 2.I need to add the new files to existing indexes, instead of re-indexing again completely.\\n Grateful for any suggestions! …\", \"post_time\": \"2013-03-05 12:41:38\" },\n\t{ \"post_id\": 3809, \"topic_id\": 817, \"forum_id\": 10, \"post_subject\": \"Re: Cannot rename a field of XML\", \"username\": \"Apurv.Khare\", \"post_text\": \"Hi Richard,\\n\\nI have added this as a Issue in JIRA. You can track the issue here\\n\\nhttps://track.hpccsystems.com/browse/HPCC-9009\", \"post_time\": \"2013-03-22 09:27:23\" },\n\t{ \"post_id\": 3778, \"topic_id\": 817, \"forum_id\": 10, \"post_subject\": \"Re: Cannot rename a field of XML\", \"username\": \"rtaylor\", \"post_text\": \"Apurv,\\n\\nOK, now I see what you're trying to say -- when you change the field name from the tag name in your ECL, when you display the result in either the IDE or ECL Watch, the column header displayed is always the tag name and NOT the field name from your ECL code. IOW, this code:
r := RECORD\\n\\tINTEGER2 code;\\n\\tSTRING110 fred{xpath('description')};\\n\\tSTRING42 zone;\\nEND;\\nd := DATASET('~CLASS::rt::IN::timezones',r,XML('Dataset/area'));\\nOUTPUT(d);
displays an output column named "description" where it should display a "fred" column header.\\n\\nI agree, this is an issue. Can you report it in a JIRA issue, please?\\n\\nRichard\", \"post_time\": \"2013-03-19 14:37:00\" },\n\t{ \"post_id\": 3777, \"topic_id\": 817, \"forum_id\": 10, \"post_subject\": \"Re: Cannot rename a field of XML\", \"username\": \"Apurv.Khare\", \"post_text\": \"Hi Richard,\\n\\nChanging the tag name of XML at source in not available as an option for us.\\n\\nThe ECL Language Reference, describes a way of redefining a field name while reading XML through DATASET keyword.\\n\\nPage NO.61 \\n\\n\\n/* an XML file called "MyFile" contains this XML data:\\n<library>\\n<book isbn="123456789X">\\n<author>Bayliss</author>\\n<title>A Way Too Far</title>\\n</book>\\n<book isbn="1234567801">\\n<author>Smith</author>\\n<title>A Way Too Short</title>\\n</book>\\n</library>\\n*/\\n\\nrform := RECORD\\nSTRING author; //data from author tag -- tag name is lowercase and matches field name\\n[b]STRING name {XPATH('title')}; //data from title tag, renaming the field[/b]\\nSTRING isbn {XPATH('@isbn')}; //isbn attribute data from book tag\\ntag\\nEND;\\nbooks := DATASET('MyFile',rform,XML('library/book'));\\n
\\n\\nIn this particular example as they are changing the name of tag "title" to "name" we want to achieve similar results.\\n\\nWe tried this with earlier version and we achieved desired results, but now with latest version of HPCC we are not able to achieve same results.\\n\\nThe tag "Title" is read as title instead of "name". There is no Syntax or Runtime Errors, its just the field name doesn't get changed and it is read back as "title".\\n\\nCan you check this issue and enlighten me if i'm wrong on any part.\\n\\nThanks and Regard,\\nApurv Khare\", \"post_time\": \"2013-03-19 12:01:02\" },\n\t{ \"post_id\": 3773, \"topic_id\": 817, \"forum_id\": 10, \"post_subject\": \"Re: Cannot rename a field of XML\", \"username\": \"rtaylor\", \"post_text\": \"Apurv,Can you help me with how can I change the name of xml tag "message" under the "data" tag (data tag is our row tag) as "strMsg", when i am reading this XML with the help of DATASET keyword.
Is it the tag name in the XML that you want to change? If so, then the answer is -- that's the format of the data and you would have to change it at the source of the data.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-03-18 20:44:44\" },\n\t{ \"post_id\": 3761, \"topic_id\": 817, \"forum_id\": 10, \"post_subject\": \"Re: Cannot rename a field of XML\", \"username\": \"Apurv.Khare\", \"post_text\": \"Hi Richard,\\n\\nSorry if you got confused from my post. The code which i have intact in earlier post doesn't have any syntax or runtime error.\\n\\nThe problem i'm facing or rephrasing my Question as:\\n\\nCan you help me with how can I change the name of xml tag "message" under the "data" tag (data tag is our row tag) as "strMsg", when i am reading this XML with the help of DATASET keyword.\\n\\nPlease check the Comment i added in the code, against the field strMsg.\\n\\nwallComments := RECORD\\n STRING cmtFromName{XPATH('from/name')};\\n STRING cmtMsg{XPATH('message')};\\n STRING createdTime{XPATH('created_time')}; \\nEND;\\n\\nwallEntry := RECORD\\n STRING fName {XPATH('from/name')};\\n STRING toName {XPATH('to/data/name')};\\n STRING strMsg {XPATH('message')}; [b]// Need to change this field name as "strMsg", but when i read the XML the changes doesn't reflects and the field is read with the name "message".[/b] \\n STRING Type {XPATH('type')};\\n STRING createdTime{XPATH('created_time')};\\n DATASET(wallComments) Comments {XPATH('comments/data')};\\nEND;\\n\\nstrFileNam1 := 'allstate_fb_wall';\\n\\n\\nallWalls := DATASET('~file::172.20.104.223::home::user::hpcc::lz_data::web_analytics::' + strFileNam1 + '.xml',\\n wallEntry, XML('json/data'));\\n \\n \\nOUTPUT(allWalls);\\n\\n
\", \"post_time\": \"2013-03-18 06:34:23\" },\n\t{ \"post_id\": 3722, \"topic_id\": 817, \"forum_id\": 10, \"post_subject\": \"Re: Cannot rename a field of XML\", \"username\": \"rtaylor\", \"post_text\": \"Apurv,I'm not facing any error regarding this the code runs fine. The problem is that as the field name is changed the existing code which were refering those field name needs to be changed.
OK, now I'm completely confused. What exactly is the problem?\\n\\nRichard\", \"post_time\": \"2013-03-13 01:18:01\" },\n\t{ \"post_id\": 3706, \"topic_id\": 817, \"forum_id\": 10, \"post_subject\": \"Re: Cannot rename a field of XML\", \"username\": \"Apurv.Khare\", \"post_text\": \"Hi Richard,\\n\\nI'm not facing any error regarding this the code runs fine. The problem is that as the field name is changed the existing code which were refering those field name needs to be changed.\\n\\nI need to know why i cant change the name as i was able to do in the previous version.\\n\\nThanks,\\nApurv\", \"post_time\": \"2013-03-12 11:22:23\" },\n\t{ \"post_id\": 3677, \"topic_id\": 817, \"forum_id\": 10, \"post_subject\": \"Re: Cannot rename a field of XML\", \"username\": \"rtaylor\", \"post_text\": \"Apurv,The tag "message" under the "data" tag (data tag is our row tag) was earlier renamed as "strMsg" through ECL code and it was working fine, but now i can't rename it.
What kind of error do you get when you to attempt to "rename" as "strMsg"? Is it a syntax error or a runtime error? And what does that error message say, exactly?\\n\\nRichard\", \"post_time\": \"2013-03-08 14:32:53\" },\n\t{ \"post_id\": 3664, \"topic_id\": 817, \"forum_id\": 10, \"post_subject\": \"Cannot rename a field of XML\", \"username\": \"Apurv.Khare\", \"post_text\": \"Hi, \\nMy existing code was reading a XML and renaming some Xml tags as per requirement.\\nBut now after shifting to the New version on HPCC platform 3.10.2-1, my same code is showing some unexpected behaviour:\\n\\nEalier i was able to change the name of xml tags as per ECL code, but now i'm not able to change the name of tags which are at the level after the row tags.\\n\\nFor Eg:\\nThe tag "message" under the "data" tag (data tag is our row tag) was earlier renamed as "strMsg" through ECL code and it was working fine, but now i can't rename it.\\n\\nBut i'm able to rename the tags which are at nested level\\nEg: data/from/name i can rename it to any attribute i wish to.\\n\\nIs this a bug in the new version or expected behavior from Xml.\\n\\nThanks & Regards,\\nApurv Khare\\n\\nXML<?xml version="1.0" encoding="UTF-8" ?>\\n<json>\\n<data>\\n<id>100001472292561_302092713183136</id>\\n<from>\\n<name>Brain Conflict</name>\\n<id>100001472292561</id>\\n</from>\\n<message>Just heard a commercial on Pandora from an Insurance company backed by All-State. At the end of the commercial, a notice says, "Not available in all states."\\n....</message>\\n<type>status</type>\\n<created_time>2012-02-23T03:19:47+0000</created_time>\\n<updated_time>2012-02-23T03:19:47+0000</updated_time>\\n<likes>\\n<data>\\n<name>Raila Staggs</name>\\n<id>1356593568</id>\\n</data>\\n<data>\\n<name>Unsane Asylum</name>\\n<id>100003312266812</id>\\n</data>\\n<data>\\n<name>Jarrod Osborn</name>\\n<id>100002972358360</id>\\n</data>\\n<data>\\n<name>Tyson Dale</name>\\n<id>581905565</id>\\n</data>\\n<count>5</count>\\n</likes>\\n<is_published>true</is_published>\\n</data>\\n<data>\\n<id>100003567798206_329277407108635</id>\\n<from>\\n<name>Sherese Carpenter State Farm</name>\\n<id>100003567798206</id>\\n</from>\\n<message>STATE FARM IS THE #1 INSURANCE COMPANY IN THE U.S....GET WITH THE BEST! \\nCompany History: \\nThe Allstate Corporation is the holding company for Allstate Insurance Company, the second largest property and casualty insurance company by premiums in the United States. Allstate controls about 12 percent of the U.S. home and auto insurance market, SECOND TO ONLY STATE FARM INSURANCE COMPANIES.\\nhttp://www.fundinguniverse.com/company-histories/The-Allstate-Corporation-Company-History.html</message>\\n<picture>http://external.ak.fbcdn.net/safe_image.php?d=AQAeIm_T2KZ7bvcp&w=90&h=90&url=http%3A%2F%2Fwww.fundinguniverse.com%2Fimages%2Flogo.png</picture>\\n<link>http://www.fundinguniverse.com/company-histories/The-Allstate-Corporation-Company-History.html</link>\\n<name>The Allstate Corporation -- Company History</name>\\n<caption>www.fundinguniverse.com</caption>\\n<description>The Allstate Corporation is the holding company for Allstate Insurance Company, the second largest property and casualty insurance company by premiums in the United States. Allstate controls about 12 percent of the U.S. home and auto insurance market, second only to State Farm Insurance Companies. I...</description>\\n<icon>http://static.ak.fbcdn.net/rsrc.php/v1/yD/r/aS8ecmYRys0.gif</icon>\\n<type>link</type>\\n<created_time>2012-02-23T03:12:05+0000</created_time>\\n<updated_time>2012-02-23T03:12:05+0000</updated_time>\\n<is_published>true</is_published>\\n</data>\\n</json>
\\n\\nCODEwallComments := RECORD\\n\\tSTRING cmtFromName{XPATH('from/name')};\\n\\tSTRING cmtMsg{XPATH('message')};\\n\\tSTRING createdTime{XPATH('created_time')}; \\nEND;\\n\\nwallEntry := RECORD\\n\\tSTRING fName {XPATH('from/name')};\\n\\tSTRING toName {XPATH('to/data/name')};\\n STRING strMsg {XPATH('message')};\\n\\tSTRING Type {XPATH('type')};\\n\\tSTRING createdTime{XPATH('created_time')};\\n\\tDATASET(wallComments) Comments {XPATH('comments/data')};\\nEND;\\n\\nstrFileNam1 := 'allstate_fb_wall';\\n\\n\\nallWalls := DATASET('~file::172.20.104.223::home::user::hpcc::lz_data::web_analytics::' + strFileNam1 + '.xml',\\n wallEntry, XML('json/data'));\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t \\n\\t\\t \\nOUTPUT(allWalls);\\n
\", \"post_time\": \"2013-03-08 06:17:43\" },\n\t{ \"post_id\": 3684, \"topic_id\": 818, \"forum_id\": 10, \"post_subject\": \"Re: Reading Indexed File using Dataset\", \"username\": \"arun\", \"post_text\": \"Thx a lot Richard,\\nActually i seen the manuals for ecl and they explained the function usages very well but i want to know,how that functions are internally working like you explained in previous post.\\n\\nIn Hadoop,implementation of functions are transparent and documentation are available.And i saw the ecl docs i got overview but i am looking more internally like data split, reading, merging, sorting, etc.\", \"post_time\": \"2013-03-09 02:38:28\" },\n\t{ \"post_id\": 3678, \"topic_id\": 818, \"forum_id\": 10, \"post_subject\": \"Re: Reading Indexed File using Dataset\", \"username\": \"rtaylor\", \"post_text\": \"arun,\\n\\nThe problem is that you're trying to re-define an INDEX file (presuming you did use BUILD to create that file) as a DATASET. \\n\\nThe reason there are two different file declaration keywords in ECL is that they have very different internal formats. \\n\\nA DATASET logical file:
\\n\\nAn INDEX logical file:
\\nOne additional piece of information -- in your ECL code, you can use an INDEX just like a DATASET, so there's no need to try to re-define the INDEX logical file as a DATASET, just use the INDEX.\\n\\nI also suggest that you take a look at the Programmer's Guide article "Using ECL Keys (INDEX files)" that discusses how to use INDEXes. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-03-08 15:02:52\" },\n\t{ \"post_id\": 3668, \"topic_id\": 818, \"forum_id\": 10, \"post_subject\": \"Reading Indexed File using Dataset\", \"username\": \"arun\", \"post_text\": \"Hi,\\nI have doubt regarding indexed file and dataset.\\n\\n 1.Record Structure:\\n rec1 := record\\n\\tunsigned4 id;\\n\\tSTRING30 name;\\n\\tinteger3 age;\\n\\tstring30 gen;\\n\\tUNSIGNED8 RecPtr {virtual(fileposition)};\\nend;
\\n \\n 2.Index: \\n indexds1 := index(inputds1,{id,name,RecPtr},'~MergeTest::IDX1);
\\n\\n 3.Reading the index file through dataset: \\n idx1 := DATASET('~MergeTest::IDX1',{unsigned id, string name, unsigned recptr},THOR);
\\n\\nBut i got error like this [color=#BF0000:32jrj43t]Error: System error: 10124: Graph[28], diskread[29]: diskread: Layout does not match published layout. File: mergetest::idx1_2 (0, 0), 10124,
\\nEven though Index layout and Dataset Layout are same.\\nAny idea why i am getting this error
\", \"post_time\": \"2013-03-08 09:56:52\" },\n\t{ \"post_id\": 3704, \"topic_id\": 821, \"forum_id\": 10, \"post_subject\": \"Re: Super sub keys of different width\", \"username\": \"sameermsc\", \"post_text\": \"Thanks Richard,\\n\\nAs pointed out by you, indexes have been built on different sized clusters, rebuilding them on the same sized cluster has solved the problem\\n\\nRegards,\\nSameer\", \"post_time\": \"2013-03-12 11:00:35\" },\n\t{ \"post_id\": 3685, \"topic_id\": 821, \"forum_id\": 10, \"post_subject\": \"Re: Super sub keys of different width\", \"username\": \"rtaylor\", \"post_text\": \"Sameer,
Error: System error: 0: Graph[1], keyedjoin[4]: Super sub keys of different width cannot be mixed with distributed(tlk) keys in keyedjoin
This error sounds to me like the subkeys have different numbers of file parts -- IOW they were built on different sized clusters. Is that the case? If so, try re-building the keys on the same cluster and see if that cures the problem.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-03-09 14:21:01\" },\n\t{ \"post_id\": 3682, \"topic_id\": 821, \"forum_id\": 10, \"post_subject\": \"Super sub keys of different width\", \"username\": \"sameermsc\", \"post_text\": \"Hi\\n\\nI am trying to join two indexes, of which the first index is read from a single file and the second index is read from a super file (Master) containing a set of files and a (Child) super file (which has its own sub files) as its subfiles. This WU executes fine on hthor but fails on thor\\n\\nError: System error: 0: Graph[1], keyedjoin[4]: Super sub keys of different width cannot be mixed with distributed(tlk) keys in keyedjoin\\n\\nJust to experiment, i removed \\ni) the Child super file from the Master super file, Code runs fine\\nii) removed all sub files and retain only the Child super file in Master super file, code runs fine\\niii) retained all sub files, removed the child super file and added one of its sub file to the Master Super file, code fails\\n\\nI have to use the Master super file with none of its sub files removed for my work\\nany solution ?\\n\\n\\nRegards,\\nSameer\", \"post_time\": \"2013-03-08 17:34:15\" },\n\t{ \"post_id\": 3688, \"topic_id\": 822, \"forum_id\": 10, \"post_subject\": \"Re: Using Side effects Within TRANSFORMS?\", \"username\": \"rtaylor\", \"post_text\": \"Allan,What I'm trying to do is: with a dataset containing a list of filename of sprayed in files, and a list of their target filenames, call a PROJECT to load each target with transfomed data from the input sprayed files. (and at the same time generate a dataset which is the log of the process)
Are these files the same structure? If so, why not just put them all in a superfile, then run your TRANSFORM against that? Or is there some reason that each file has to be treated separately?\\n\\nAnd as afar as logging the process, I believe there are logging functions in the standard library that you could consider using, too.\\n\\nRichard\", \"post_time\": \"2013-03-10 15:08:53\" },\n\t{ \"post_id\": 3687, \"topic_id\": 822, \"forum_id\": 10, \"post_subject\": \"Re: Using Side effects Within TRANSFORMS?\", \"username\": \"Allan\", \"post_text\": \"Hi Richard,\\n\\nI just get syntax errors, whatever I try. The actual example above is wrong, it was just an example of the process I'm trying to do.\\n\\nWhat I'm trying to do is: with a dataset containing a list of filename of sprayed in files, and a list of their target filenames, call a PROJECT to load each target with transfomed data from the input sprayed files. (and at the same time generate a dataset which is the log of the process)\\n\\nI'm not trying to write to the same input file, agreed that would never work.\\n\\nI could leave the PROJECT to another pass and use APPLY on the dataset, but I was trying to avoid using APPLY as I've found it difficult to use (having to use NOTHOR and GLOBAL constructs for reasons I don't fully understand.)\\n\\nYours\\n\\nAllan\", \"post_time\": \"2013-03-10 08:32:25\" },\n\t{ \"post_id\": 3686, \"topic_id\": 822, \"forum_id\": 10, \"post_subject\": \"Re: Using Side effects Within TRANSFORMS?\", \"username\": \"rtaylor\", \"post_text\": \"Allan, but I can't get it to work.
Exactly what is not working? \\nAre you getting a syntax error? \\nAre you getting a runtime error that you're trying to overwrite an existing file without the OVERWRITE option on OUTPUT?\\nOr is it some other problem?\\n\\nI could see any/all of these as problems, but I'm going to guess that it's a runtime issue because, if I read your code snippet correctly, it appears like you're trying to read from and write to the same file, which is not allowed, ever. If you change the name of the OUTPUT file, then maybe your code might work (or not, if there are any other problems).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-03-09 14:36:55\" },\n\t{ \"post_id\": 3683, \"topic_id\": 822, \"forum_id\": 10, \"post_subject\": \"Using Side effects Within TRANSFORMS?\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI have a TRANSFORM used on a PROJECT that is transforming a dataset of filenames.\\nAs a side effect of the TRANSFORM I want a DATASET, created in that TRANSFORM (not the original dataset), to be OUTPUT.\\n\\nReading the Forum, I see there are tricks one can use with the WHEN construct, but I can't get it to work.\\n\\nExample snippet of code:\\n\\n Layouts.LogFile DoOut(SprayedR L) := TRANSFORM\\n \\n SideEffect(DATASET(Layouts.SprayIngest) d,STRING ofn) := FUNCTION\\n result := WHEN(d,OUTPUT(d,,ofn));\\n RETURN result;\\n END;\\n \\n ds := DATASET(L.Sprayed_fname,Layouts.SprayIngest,CSV(SEPARATOR(c.Field_seperator),TERMINATOR(c.Record_terminator)));\\n SideEffect(ds,L.Sprayed_fname);\\n SELF := L.something;\\nEND;\\nDoLoad := PROJECT(DoSpray,DoOut(LEFT));\\n
\\n\\nAny ideas?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2013-03-08 22:11:34\" },\n\t{ \"post_id\": 3709, \"topic_id\": 827, \"forum_id\": 10, \"post_subject\": \"Re: MERGE Function\", \"username\": \"arun\", \"post_text\": \"Thx Sammer,\\nI am doing the incremental index, so i don't want to reload the index completely. I saw the merge command will merge the index files but it is giving me as a dataset instead of indexed file.\\nI saw this Merge function desc in manual "The MERGE function returns a single dataset or index containing all the records ".\\n\\nEven i am facing problem in superfile with the record pointers but right now i concentrating in merge function only.\\n\\n\\nRegards,\\nArun Prakash\", \"post_time\": \"2013-03-12 11:54:58\" },\n\t{ \"post_id\": 3705, \"topic_id\": 827, \"forum_id\": 10, \"post_subject\": \"Re: MERGE Function\", \"username\": \"sameermsc\", \"post_text\": \"Arun,\\n\\nif the indexes you have are payload indexes with no virtual fpos field, you can combine them and write as a new index, else try adding those indexes to a superfile\\n\\nRegards,\\nSameer\", \"post_time\": \"2013-03-12 11:18:58\" },\n\t{ \"post_id\": 3703, \"topic_id\": 827, \"forum_id\": 10, \"post_subject\": \"MERGE Function\", \"username\": \"arun\", \"post_text\": \"Hi,\\nI merged the 2 index files using Merge command.\\nAfter merging my index file became normal logical file, how to get as a indexed file instead of normal recordset.\\nAny suggestions!\\n\\nRegards,\\nArun Prakash.\", \"post_time\": \"2013-03-12 10:49:18\" },\n\t{ \"post_id\": 3724, \"topic_id\": 828, \"forum_id\": 10, \"post_subject\": \"Re: Index File Size\", \"username\": \"arun\", \"post_text\": \"Thx Bob & Richard\\n\\nRegards,\\nArun Prakash\", \"post_time\": \"2013-03-13 04:31:34\" },\n\t{ \"post_id\": 3723, \"topic_id\": 828, \"forum_id\": 10, \"post_subject\": \"Re: Index File Size\", \"username\": \"rtaylor\", \"post_text\": \"Arun,\\n\\nOne difference between an INDEX and a DATASET is the number of file parts -- INDEXes always have 1 extra part, which is always 32K in size, and it contains the binary tree for the INDEX.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-03-13 01:22:00\" },\n\t{ \"post_id\": 3713, \"topic_id\": 828, \"forum_id\": 10, \"post_subject\": \"Re: Index File Size\", \"username\": \"bforeman\", \"post_text\": \"Are you referring to the size of the INDEX? Are you using payload or non-payload indexes? There could be a little overhead involved in the INDEX, but try generating a sample dataset of several thousand records and compare again. My point is that HPCC and ECL loves BIG data \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-03-12 13:04:45\" },\n\t{ \"post_id\": 3707, \"topic_id\": 828, \"forum_id\": 10, \"post_subject\": \"Index File Size\", \"username\": \"arun\", \"post_text\": \"Hi,\\nI have dataset with 10 records and file size is 670. And i indexed the dataset, then my file size became 57,344.\\nI am really surprised because for indexing it is using some compression technique and some tree data structure for fast retrieval but how come file is getting increased.\\nCorrect me if am wrong.\", \"post_time\": \"2013-03-12 11:35:10\" },\n\t{ \"post_id\": 3712, \"topic_id\": 831, \"forum_id\": 10, \"post_subject\": \"Re: Hpcc Internal Working\", \"username\": \"bforeman\", \"post_text\": \"Arun, the HPCC system is very transparent. As you run any ECL workunit, you have instant access to the workunit details via the ECL Watch tab. On that tab is everything you need to know about the internal process.\\n\\nStart with the Graph option, which gives you a visual display on the job that's getting done, and also identifies skew and local operations on the cluster. You have access to timings, and near the bottom there is a helper section, which shows log activity for the target clusters.\\n\\nIf you are interested in the C++ that is generated by ECL, you can set a Debug switch in the workunit to SaveCPPTempFiles.\\n\\nHopefully this is enough for you to explore and get started, and remember, being open source you also have access to the binaries and source code if needed.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-03-12 12:52:45\" },\n\t{ \"post_id\": 3711, \"topic_id\": 831, \"forum_id\": 10, \"post_subject\": \"Hpcc Internal Working\", \"username\": \"arun\", \"post_text\": \"Hi,\\nActually i seen the manuals for ecl and they explained the function usages very well but i want to know,how that functions are internally working.\\n\\nIn Hadoop,implementation of functions are transparent and documentation are available.But i saw the ecl docs(ECLLanguageReference, etc),i got overview but i am looking more internally like data split, reading, merging, sorting, etc. \\nSo anyone can suggest me, what is best way to do.\\n\\nRegards,\\nArun Prakash\", \"post_time\": \"2013-03-12 12:06:46\" },\n\t{ \"post_id\": 3906, \"topic_id\": 854, \"forum_id\": 10, \"post_subject\": \"Re: Spraying lots of small files\", \"username\": \"jeeves\", \"post_text\": \"Chris,\\n\\nTo clarify this is how the command actually looks like.\\n\\n
dfuplus action=spray server=http://10.253.55.26:8010/ username=hpccdemo overwrite=1 replicate=1 srcip=10.253.45.142 srcfile=/mnt/prn/*.xml dstcluster=mythor dstname=in::allxml PREFIX=FILENAME,FILESIZE nosplit=1
\\n\\n/mnt/prn\", \"post_time\": \"2013-04-09 14:40:42\" },\n\t{ \"post_id\": 3905, \"topic_id\": 854, \"forum_id\": 10, \"post_subject\": \"Re: Spraying lots of small files\", \"username\": \"clo\", \"post_text\": \"Hi, I was wondering if you can provide more of your script so I can be sure of all the variables you're using. I noticed that your sourcefile is specifying * (srcfile=$file/*) while your destination filename is specified with a counter (dstname=$spray_base"_"$cnt). Was this intentional?\\n\\n- Chris\", \"post_time\": \"2013-04-09 12:45:21\" },\n\t{ \"post_id\": 3796, \"topic_id\": 854, \"forum_id\": 10, \"post_subject\": \"Spraying lots of small files\", \"username\": \"jeeves\", \"post_text\": \"I have to spray around 50000 xml files. The total size is around 250 MB.\\n\\nAll the 50000 files are in one folder and I use the following command to spray\\n\\ndfuplus action=spray server=$protocol://$ip:$port/ username=$user overwrite=1 replicate=1 srcip=$ip srcfile=$file/* dstcluster=mythor dstname=$spray_base"_"$cnt PREFIX=FILENAME,FILESIZE nosplit=1\\n\\nThis works fine except that only exactly 14980 files get sprayed. I tried with a different set of 50000 files and again exactly only 14980 files got sprayed.\\n\\nI have worked around this by only spraying around 14000 files at a time.\\nCan anyone throw any light on what is really happening here?\", \"post_time\": \"2013-03-21 14:36:15\" },\n\t{ \"post_id\": 3824, \"topic_id\": 855, \"forum_id\": 10, \"post_subject\": \"Re: Full keyed join\", \"username\": \"jeeves\", \"post_text\": \"Another update - Full keyed join works fine on roxie and fails only on thor.\", \"post_time\": \"2013-03-25 07:37:20\" },\n\t{ \"post_id\": 3800, \"topic_id\": 855, \"forum_id\": 10, \"post_subject\": \"Re: Full keyed join\", \"username\": \"jeeves\", \"post_text\": \"I repeated this experiment with a 3 node thor and 2 node Roxie on AWS and got a different error. The difference is probably because this is a different version of HPCC(community_3.8.2-2 )\\n\\nError: System error: -1: Graph[1], keyedjoin[4]: SLAVE 10.254.20.238:20100: CFileSerialStream::get read past end of stream (0, 0), -1,\", \"post_time\": \"2013-03-21 15:08:34\" },\n\t{ \"post_id\": 3798, \"topic_id\": 855, \"forum_id\": 10, \"post_subject\": \"Full keyed join\", \"username\": \"jeeves\", \"post_text\": \"I have a full keyed join which is working fine in a single node(only one thor - community_3.8.2-2) cluster. But it fails on a 5 thor 3 roxie cluster(enterprise_3.10.4-1)\\n\\nThe error is:\\n\\n22: System error: 22: Graph[1], keyedjoin[4]: SLAVE 10.0.1.7:20100: checked_pread, Invalid argument\\n\\nDoes this sound familiar to any one?\", \"post_time\": \"2013-03-21 14:44:03\" },\n\t{ \"post_id\": 3823, \"topic_id\": 856, \"forum_id\": 10, \"post_subject\": \"Re: Removing empty XML tags.\", \"username\": \"Allan\", \"post_text\": \"Thanks Dan,\\n\\nWill give it a go. Thought it was a 'greedy' issue but did not know the syntax to override.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2013-03-24 16:21:35\" },\n\t{ \"post_id\": 3821, \"topic_id\": 856, \"forum_id\": 10, \"post_subject\": \"Re: Removing empty XML tags.\", \"username\": \"DSC\", \"post_text\": \"You may have to try the non-greedy form of your regex:\\n\\ninp:='<tagA>abc</tagA><tB></tB>';\\nREGEXREPLACE('<.+?></.+?>',inp,'',NOCASE);
\\n(Add question marks after your '+' symbols.)\\n\\nI didn't test this, but I think it's the right pattern. Hope it helps.\\n\\nDan\", \"post_time\": \"2013-03-23 14:04:03\" },\n\t{ \"post_id\": 3803, \"topic_id\": 856, \"forum_id\": 10, \"post_subject\": \"Removing empty XML tags.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nThere is an option on OUTPUT of XML, 'OPT' that removes empty tags while outputing.\\n\\nHowever I need to remove empty tags from an attribute within the ECL.\\n\\nI could try to use the PIPE option to OUTPUT but I'm not sure that can end up with another attribute.\\n\\nThe simplest option seems to be REGEXREPLACE, but here I get stuck.\\nThe simple approach below removes all tags, not just the empty ones.\\n\\ninp:='<tagA>abc</tagA><tB></tB>';\\nREGEXREPLACE('<.+></.+>',inp,'',NOCASE);\\n
\\n\\nI can get the above to work correctly if I replace the '.' wild character with a list of characters, e.g.:\\n\\ninp:='<tagA>abc</tagA><tB></tB>';\\nREGEXREPLACE('<[a-z]+></[a-z]+>',inp,'',NOCASE);\\n
\\nThis seems strange.\\nCould someone either explain this strange behaviour or better still give me a regex that will remove empty tags. (with white space as data being equivelent to an empty tag, but that's just an elaboration.) (Don't have to cope with tags that have attributes, one step at a time)\\n\\nYours\\n\\nAllan\", \"post_time\": \"2013-03-21 16:33:27\" },\n\t{ \"post_id\": 3914, \"topic_id\": 874, \"forum_id\": 10, \"post_subject\": \"hthor Search Webservice Question\", \"username\": \"Greg\", \"post_text\": \"Hi,\\n\\nMy company is using an HTHOR database, and we have a section we can then query the database based on a number of values. When all of these values are blank though, and the user hits the submit button, the database returns all items it contains.\\n\\nNow, I'm currently using a Visual Basic add-on for Excel 2010, and trying to code a button to behave exactly the same as the submit button on the query page (port 8002), as mentioned before, also as if all input values were blank. In other words, when I select this button in Excel, I want the database to return all items it contains. \\n\\nThe code for the web services for the HTHOR database has already been generated by the add-on, and one of the web services I see is "personsfilesearchserviceff", that accepts a total of 5 input parameters (I realize this specific class could be specific to only me though).\\n\\nPublic Function wsm_personsfilesearchserviceff(ByVal str_val1 As String, ByVal str_val2 As String, ByVal str_val3 As String, ByVal str_val4 As String, ByRef any_Results As MSXML2.IXMLDOMNodeList) As struct_ArrayOfEspException
\\n\\nTo me, this seems like the method I would need to call when I press "Submit" on my excel form. Since I want all of the strings to be blank, I just set the associated values as an empty string. However, I am struggling to determine what exactly I would put for the value "any_results". I am unfamiliar with the type, but I'm assuming that this would allow me to return all results in the Database?\\n\\nMy current code for the button in Excel is below. Help is much appreciated!\\n\\nPrivate Sub get_data()\\nDim info As New clsws_hthor\\nDim result As struct_ArrayOfEspException\\n\\nresult = info.wsm_personsfilesearchserviceff("", "", "", "", ?) 'The first four values would be emprty string, but what would the fifth input be?\\n\\nSet TestRange = Range("B1")\\nTestRange.Value = result\\n\\nEnd Sub\\n
\\n\\nThanks\", \"post_time\": \"2013-04-11 03:58:30\" },\n\t{ \"post_id\": 3965, \"topic_id\": 889, \"forum_id\": 10, \"post_subject\": \"Re: Spraying an xml file\", \"username\": \"bforeman\", \"post_text\": \"Think of a row tag as a record identifier. The tag is used to parse XML streams as records using the row tag as a boundary.\\n\\nSo here is a simple row tag example:\\n\\n<Dataset>\\n <area>\\n <code>201</code>\\n <description>PA Pennsylvania</description>\\n <zone>Eastern Time Zone</zone>\\n </area>\\n <area>\\n <code>202</code>\\n <description>OH Ohio (Cleveland area)</description>\\n <zone>Eastern Time Zone</zone>\\n </area>\\n </Dataset>[code][/code]
\\n\\n In the above example, Dataset is the root document tag, and "area" is the row tag.\\n\\nHope this helps!\\n\\nBob\", \"post_time\": \"2013-04-24 13:01:08\" },\n\t{ \"post_id\": 3962, \"topic_id\": 889, \"forum_id\": 10, \"post_subject\": \"Spraying an xml file\", \"username\": \"nvasil\", \"post_text\": \"Can somebody explain what the row tag is ?\", \"post_time\": \"2013-04-23 19:04:22\" },\n\t{ \"post_id\": 4043, \"topic_id\": 901, \"forum_id\": 10, \"post_subject\": \"Re: How do I conditionally compile for Thor and Roxie.\", \"username\": \"Allan\", \"post_text\": \"Thanks Bob I'll take a look.\", \"post_time\": \"2013-05-02 09:01:01\" },\n\t{ \"post_id\": 4038, \"topic_id\": 901, \"forum_id\": 10, \"post_subject\": \"Re: How do I conditionally compile for Thor and Roxie.\", \"username\": \"bforeman\", \"post_text\": \"Hi Allan,\\n\\nDid you look at #OPTION? That's the only way I could think of doing that.\\n\\nTry using the targetClusterType and allowedclusters options. \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-05-01 17:20:20\" },\n\t{ \"post_id\": 4035, \"topic_id\": 901, \"forum_id\": 10, \"post_subject\": \"How do I conditionally compile for Thor and Roxie.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nWe have different versions of Thor and Roxie, We have ECl code that must run on both.\\n\\nHow do we conditionally compile for each environment?\\n\\ne.g.\\n\\n#IFDEF something\\n#ELSE\\n#END\\n
\\n\\nThanks\", \"post_time\": \"2013-05-01 13:31:45\" },\n\t{ \"post_id\": 4044, \"topic_id\": 902, \"forum_id\": 10, \"post_subject\": \"Re: Gendata.ecl?\", \"username\": \"Rsomayajula\", \"post_text\": \"Perfect, thanks a bunch!\", \"post_time\": \"2013-05-02 15:58:15\" },\n\t{ \"post_id\": 4042, \"topic_id\": 902, \"forum_id\": 10, \"post_subject\": \"Re: Gendata.ecl?\", \"username\": \"gsmith\", \"post_text\": \"It is a separate download which accompanies the programmers guide:\\nhttp://hpccsystems.com/download/docs/programmers-guide\\n\\nGordon.\", \"post_time\": \"2013-05-02 08:18:20\" },\n\t{ \"post_id\": 4041, \"topic_id\": 902, \"forum_id\": 10, \"post_subject\": \"Gendata.ecl?\", \"username\": \"Rsomayajula\", \"post_text\": \"Hello, \\nI installed the ECL IDE on my laptop, but cant seem to find the Gendata.ecl file, I looked all over the internet, can someone help me? \\n\\nThanks! \\nRach\", \"post_time\": \"2013-05-02 01:49:33\" },\n\t{ \"post_id\": 4170, \"topic_id\": 907, \"forum_id\": 10, \"post_subject\": \"Re: Returning char* from an API in external service (.SO) to\", \"username\": \"vsreedharan\", \"post_text\": \"Hi,\\n\\nThanks for the help, It resolved my issue. \\nSince my C++ function return type was void, I didn't specify any return value in the ECL definition.\\n\\nThanks,\\nSreedharan\", \"post_time\": \"2013-06-05 15:45:56\" },\n\t{ \"post_id\": 4167, \"topic_id\": 907, \"forum_id\": 10, \"post_subject\": \"Re: Returning char* from an API in external service (.SO) to\", \"username\": \"rtaylor\", \"post_text\": \"The BEGINC++ structure docs contain a table of all the data type mappings for parameters and return types: http://hpccsystems.com/community/docs/ecl-language-reference/html/beginc%20%20-structure\\n\\nYour function prototype in your SERVICE structure neglected to specify the return type of your echo() function. Try it this way:\\nHPCCPluginLib := SERVICE\\n STRING Echo(String a,String b) : library='simplehpccplugin',entrypoint='elEchoString';\\nEND;\\n\\n\\nString\\n a := 'Hello';\\nString b := 'World';\\nHPCCPluginLib.Echo(a,b);\\noutput(b);
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-05-31 14:28:58\" },\n\t{ \"post_id\": 4165, \"topic_id\": 907, \"forum_id\": 10, \"post_subject\": \"Re: Returning char* from an API in external service (.SO) to\", \"username\": \"vsreedharan\", \"post_text\": \"Hi,\\nCan you please provide me a sample ECL code to invoke this API?\\n\\nI created a library file using examplelib.cpp & examplelib.hpp. While creating the library(.so) I linked it with eclrtl library.\\n\\nWhen I invoked elEchoString() function, It failed.\\n\\n\\nHPCCPluginLib := SERVICE\\n Echo( String a, String b) : library='simplehpccplugin',entrypoint='elEchoString';\\nEND;\\n\\n\\nString a := 'Hello';\\nString b := 'World';\\nHPCCPluginLib.Echo(a,b);\\noutput(b);\\n
\\n\\nError message reported ECL IDE,\\n\\nError: SIG: Segmentation fault(11), accessing 00007F5EDC383166, IP=00007F5EDC180829 (0, 0), 1000, \\n
\\n\\nPlease let me know what I am missing\", \"post_time\": \"2013-05-31 11:09:20\" },\n\t{ \"post_id\": 4075, \"topic_id\": 907, \"forum_id\": 10, \"post_subject\": \"Re: Returning char* from an API in external service (.SO) to\", \"username\": \"gsmith\", \"post_text\": \"From: https://github.com/hpcc-systems/HPCC-Pl ... examplelib\\n\\nEXAMPLELIB_API void EXAMPLELIB_CALL elEchoString(unsigned & tgtLen, char * & tgt, unsigned srcLen, const char * src)\\n{\\n tgt = (char *)CTXMALLOC(parentCtx, srcLen);\\n memcpy(tgt,src,srcLen);\\n tgtLen = srcLen;\\n}\", \"post_time\": \"2013-05-12 18:31:08\" },\n\t{ \"post_id\": 4074, \"topic_id\": 907, \"forum_id\": 10, \"post_subject\": \"Returning char* from an API in external service (.SO) to ECL\", \"username\": \"vsreedharan\", \"post_text\": \"Hi,\\n\\n1.\\tMy requirement is to pass a string from ECL to an API in external service (.SO file) and obtain a string(char*) output. Could someone please let me know the best way to do it?\\n2.\\tOne of the external service APIs require a callback function pointer as an argument. Is it possible to pass a call-back function pointer as an argument from ECL?\\n\\nFor the first requirement, Since the arguments can be passed as a pointer in C++, I thought of storing the result string in the argument itself. (Length of result string will be always less than the argument).\\n\\nI tried the following sample code and it resulted in a segmentation fault. In the following test code, I am just altering one of the characters in the input. \\nC++ function,\\nint test_api(char* str)\\n{\\n //If I test after commenting the following line, there is no segmentation fault\\n str[1] = str[0]; \\n return 0;\\n}\\n
\\n\\nECL code,\\n\\nMyLib := SERVICE\\n \\tinteger TestAPI( VARSTRING str) : library='mylib',entrypoint='test_api';\\n END;\\n\\nVARSTRING str := 'Hi!';\\nret := MyLib.TestAPI(str);\\noutput(str);\\n
\\n\\nError message,\\nError: SIG: Segmentation fault(11), accessing 00007FD7981E81E1, IP=00007FD792DF1783 (0, 0), 1000, \\n
\", \"post_time\": \"2013-05-12 06:08:17\" },\n\t{ \"post_id\": 4104, \"topic_id\": 913, \"forum_id\": 10, \"post_subject\": \"Re: Unknown identifier "AND"??\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"Thanks a ton!! That worked. \\n\\nNow I have certainly learnt the difference between a recordset filtering and a boolean definition, the hard way \\n\\nThanks once again.\", \"post_time\": \"2013-05-17 03:30:02\" },\n\t{ \"post_id\": 4101, \"topic_id\": 913, \"forum_id\": 10, \"post_subject\": \"Re: Unknown identifier "AND"??\", \"username\": \"rtaylor\", \"post_text\": \"OK, as my friend Bob just pointed out to me, I did not read your code carefully enough.
\\n\\nHere's your problem: IsFloridian, IsMale, and IsBorn80 are all supposed to be BOOLEAN definitions. But you have coded them as recordset definitions, not BOOLEAN. That's why you're getting that strange error message about AND -- because IsFloridian is not a BOOLEAN definition, therefore the compiler is not expecting the boolean AND operator at that point in your code.\\n\\nThe correct code should be:
IMPORT $;\\n//Check if a person belongs to Florida\\n// IsFloridian := $.Persons(State = 'FL');\\nIsFloridian := $.Persons.State = 'FL';\\n//Check if a person is male\\n// IsMale := $.Persons(Gender = 'M');\\nIsMale := $.Persons.Gender = 'M';\\n//Check if a person has a DOB and was born after 1980\\n// IsBorn80 := $.Persons(birthdate <> '' AND birthdate[1..4] > '1979');\\nIsBorn80 := $.Persons.birthdate <> '' AND $.Persons.birthdate[1..4] > '1979');\\n\\nEXPORT isYoungFloridaMale := IsFloridian AND IsMale AND IsBorn80;
HTH,\\n\\nRichard\", \"post_time\": \"2013-05-16 15:04:30\" },\n\t{ \"post_id\": 4100, \"topic_id\": 913, \"forum_id\": 10, \"post_subject\": \"Re: Unknown identifier "AND"??\", \"username\": \"rtaylor\", \"post_text\": \"I don't have any other (EXPORT) definitions for the Boolean variables used in this ECL. Any other thoughts?
Yes. First one: there is no such thing in ECL as a "variable" (these are more correctly referred to as "definitions"). Remember, ECL is a declarative, non-procedural language. \\n\\nNext thought: can you post a screen shot of your code, with the error message, and showing the Repository directory tree expanded out so I can see the filenames. Obviously, I am unable to duplicate the issue on my machine so i need to see what's on yours, please.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-05-16 12:53:50\" },\n\t{ \"post_id\": 4099, \"topic_id\": 913, \"forum_id\": 10, \"post_subject\": \"Re: Unknown identifier "AND"??\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"The file name wasn't isYoungFloridaMale. Thanks for pointing it out. \\n\\nI renamed it. Still get the same error.\\n\\nI don't have any other (EXPORT) definitions for the Boolean variables used in this ECL. Any other thoughts?\", \"post_time\": \"2013-05-16 03:38:40\" },\n\t{ \"post_id\": 4090, \"topic_id\": 913, \"forum_id\": 10, \"post_subject\": \"Re: Unknown identifier "AND"??\", \"username\": \"rtaylor\", \"post_text\": \"Here's a wild-guess question -- what is the name of the .ecl file that contains this code?\\n\\nIf it's not "isYoungFloridaMale.ecl" then that might be the problem, since the name of the EXPORT definition and the name of the file must always match.\\n\\nIf that's not the case, then do you already have an EXPORT definition in your repository called "isFloridian" or "isMale"?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-05-15 13:22:39\" },\n\t{ \"post_id\": 4089, \"topic_id\": 913, \"forum_id\": 10, \"post_subject\": \"Unknown identifier "AND"??\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"I recently installed ECL IDE and am using HPCC VM to try some basic programs from the training portal. \\n\\nThe following code piece, picked from tutorials, fails with Unknown identifier "AND" error in the EXPORT line:\\n
\\nIMPORT $;\\n//Check if a person belongs to Florida\\nIsFloridian := $.Persons(State = 'FL');\\n//Check if a person is male\\nIsMale := $.Persons(Gender = 'M');\\n//Check if a person has a DOB and was born after 1980\\nIsBorn80 := $.Persons(birthdate <> '' AND birthdate[1..4] > '1979');\\n\\nEXPORT isYoungFloridaMale := IsFloridian AND IsMale AND IsBorn80;\\n
\\n\\nIn one of the forums, there was a mention of a possible mismatch between VM/IDE versions. Is that the case in my scenario? \\n\\nMy ECL details are:\\nIDE Version: 6.10.2.101.3.8\\nServer: 3.10.4 (1)\\nCompiler: 3.6.1 community_3.10.2-1\\n\\nAny help is greatly appreciated.\", \"post_time\": \"2013-05-15 08:19:32\" },\n\t{ \"post_id\": 4144, \"topic_id\": 921, \"forum_id\": 10, \"post_subject\": \"Re: How to replace a line feed character?\", \"username\": \"hpccctio\", \"post_text\": \"Just tried this on the latest ECL release and worked fine now. \", \"post_time\": \"2013-05-23 19:40:13\" },\n\t{ \"post_id\": 4143, \"topic_id\": 921, \"forum_id\": 10, \"post_subject\": \"How to replace a line feed character?\", \"username\": \"hpccctio\", \"post_text\": \"I tried to remove a line feed character in a string with the following ecl statement:\\n\\n addr_new := REGEXREPLACE(x'0A', address, '');\\n\\nECL IDE did not give me a syntax error, however it did not remove the line feed character either. Help! Thanks in advance.\", \"post_time\": \"2013-05-23 15:04:26\" },\n\t{ \"post_id\": 4166, \"topic_id\": 925, \"forum_id\": 10, \"post_subject\": \"Re: Template language\", \"username\": \"rtaylor\", \"post_text\": \"You can take a look at the code in the ML.FromField definition for an example of its use. This is part of our Machine Learning library,which you can download here: http://hpccsystems.com/ml\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-05-31 14:18:58\" },\n\t{ \"post_id\": 4164, \"topic_id\": 925, \"forum_id\": 10, \"post_subject\": \"Re: Template language\", \"username\": \"sapthashree\", \"post_text\": \"Hi Richard,\\n\\nNow i got some idea why template language is used in HPCC.\\n\\nAlso i have some questions on some keywords.Here in Template language we have #EXPORT and #EXPORTXML.Using these keywords the output will be generated with the following format\\n<Data>\\n<Field label="<label-of-field>"\\nname="<name-of-field>"\\nposition="<n>"\\nrawtype="<n>"\\nsize="<n>"\\ntype="<ecl-type-without-size>" />\\n...\\n</Data>\\n\\nWhat is the use of generating these outputs and where it is used?\", \"post_time\": \"2013-05-31 04:37:09\" },\n\t{ \"post_id\": 4162, \"topic_id\": 925, \"forum_id\": 10, \"post_subject\": \"Re: Template language\", \"username\": \"rtaylor\", \"post_text\": \"sapthashree,
One more thing, in which situation we'll go for Template Language.In previous post you have said it is used as code generation tool but my question is why we need to generate the ecl code. what is the use of generating ecl code,where it is used and how it is used further(benifit)?
The template language is mostly used to generate ECL code for situations where the ECL cannnot be pre-written. The example I gave you does this because the filter condition is different for each separate OUTPUT, as well as the name of the file.Also it is said "Template language is designed to take an input XML datastream". \\nSuppose the input is not an xml dataset, then how this template language will work for naormal dataset.
Template language only works with XML string input, not an XML dataset. See the LOADXML function docs (http://hpccsystems.com/community/docs/ecl-language-reference/html/loadxml).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-05-30 15:03:48\" },\n\t{ \"post_id\": 4161, \"topic_id\": 925, \"forum_id\": 10, \"post_subject\": \"Re: Template language\", \"username\": \"sapthashree\", \"post_text\": \"Hi Richard,\\n\\nOne more thing, in which situation we'll go for Template Language.In previous post you have said it is used as code generation tool but my question is why we need to generate the ecl code. what is the use of generating ecl code,where it is used and how it is used further(benifit)?\\n\\nAlso it is said "Template language is designed to take an input XML datastream". \\nSuppose the input is not an xml dataset, then how this template language will work for naormal dataset.\", \"post_time\": \"2013-05-30 13:01:23\" },\n\t{ \"post_id\": 4157, \"topic_id\": 925, \"forum_id\": 10, \"post_subject\": \"Re: Template language\", \"username\": \"rtaylor\", \"post_text\": \"sapthashree,\\n\\nWe have several code generation tools in ECL -- MACRO, FUNCTIONMACRO, and the Template Language. The Template language is designed to take an input XML datastream and parse it so that you can generate valid ECL code to run your job.\\n\\nIn this (fully functional) example, the XML string contains part numbers, dataset and field names, and specific values to parse out:MAC_GenCode(StrXML) := MACRO\\n\\tloadxml(StrXML); \\n\\n\\t#DECLARE(OutStr);\\n\\t#SET(OutStr,'');\\n\\t#FOR(rec)\\n\\t\\t#APPEND(OutStr,'OUTPUT(' + %'ds'% + '('+ %'fld'% +' IN ' + %'id'% + '),,\\\\'~TEST::outfile' + %part% + '\\\\');\\\\n');\\n\\t#END;\\n\\n\\t%'OutStr'% //this one just shows the ECL code \\n\\t// %OutStr% //this one runs the ECL code\\nENDMACRO;\\n\\nX := '<XML><rec><part>1</part><ds>SomeFile</ds><fld>recid</fld><id>[1,2,3,4,5]</id>' + \\n '</rec><rec><part>2</part><ds>SomeFile</ds><fld>recid</fld><id>[6,7,8,9,10]</id></rec></XML>';\\n\\nMAC_GenCode(X);
So that it can generate this code:OUTPUT(SomeFile(recid IN [1,2,3,4,5]),,'~TEST::outfile1');\\nOUTPUT(SomeFile(recid IN [6,7,8,9,10]),,'~TEST::outfile2');
We have an Applied ECL: Code Generation class that teaches all these tools. It comes after all the other six classes.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-05-29 19:13:36\" },\n\t{ \"post_id\": 4153, \"topic_id\": 925, \"forum_id\": 10, \"post_subject\": \"Template language\", \"username\": \"sapthashree\", \"post_text\": \"Hi,\\n\\nWhat template language and Why is used in HPCC?\\n\\nHere in HPCC Language Reference pdf given that \\n"The Template language is a Meta-language that takes standard XML input, typically generated from an end-user GUI application (thereby vastly simplifying the coding problem in the GUI) and in turn generating the appropriate ECL code to implement the user's choices."
\\nCan u plaese explain the abaove lines in detail if possible with an example.\", \"post_time\": \"2013-05-29 10:03:48\" },\n\t{ \"post_id\": 4172, \"topic_id\": 927, \"forum_id\": 10, \"post_subject\": \"Re: Call ECL from existing C++ application\", \"username\": \"rtaylor\", \"post_text\": \"Rayappan,\\n\\nFirst, you need to do the ETL work in Thor that will get your data ready for production (our free online Introductory ECL courses can help you get started with this bit: (http://learn.lexisnexis.com/hpcc). \\n\\nThen you will need to develop the Roxie queries that will return the data to whatever application requests it (we have Roxie classes available, the schedule is here: http://hpccsystems.com/community/training-events/training). \\n\\nAnd then you can send requests for data to those queries, using SOAP (or JSON) from your C++ application.\\n\\nIf you've already got the first two up and running, then you should take a look at the "Using Roxie" PDF available for download here:\\nhttp://hpccsystems.com/download/docs/installation-and-administration\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-06-05 20:24:47\" },\n\t{ \"post_id\": 4169, \"topic_id\": 927, \"forum_id\": 10, \"post_subject\": \"Call ECL from existing C++ application\", \"username\": \"arayappan_mthsense\", \"post_text\": \"Hi All,\\n\\nI need to integrate HPCC with our C++ application.\\n\\nMy requirement is to insert all incoming request into database and later we use this requests for decision making making and report generation.\\n\\nI am very new to this HPCC and planning to use HPCC for our requirement.\\n\\nPlease anyone tell me how to insert request data into HPCC from my C++ application.\\n\\nRegards,\\nRayappan A\", \"post_time\": \"2013-06-05 15:37:29\" },\n\t{ \"post_id\": 4742, \"topic_id\": 941, \"forum_id\": 10, \"post_subject\": \"Re: Embedded Java\", \"username\": \"rphpcc\", \"post_text\": \"You can set the classpath in environemnt.conf to multiple jars.\\n\\nExample:\\nclasspath=/dev/jars/servlet.jar:/dev/jars/common-lang.jar:/dev/jars/apache-common.jar\", \"post_time\": \"2013-10-09 15:21:51\" },\n\t{ \"post_id\": 4250, \"topic_id\": 941, \"forum_id\": 10, \"post_subject\": \"Re: Embedded Java\", \"username\": \"richardkchapman\", \"post_text\": \"In 3.x, the /etc/HPCCSystems/environment.conf file is not overwritten by updates, so any changes you make there will be preserved.\\n\\nIn 4.x, the environment.conf file will not be overwritten by an update IF it is different from the default environment.conf file. If the default environment for the new version is different from the default from the version you are upgrading from, but you have also made changes, then a manual merge is required and a warning will be issued at rpm install time.\", \"post_time\": \"2013-06-24 14:00:47\" },\n\t{ \"post_id\": 4249, \"topic_id\": 941, \"forum_id\": 10, \"post_subject\": \"Re: Embedded Java\", \"username\": \"rphpcc\", \"post_text\": \"It looks like HPCC uses the default JVM for the OS. Added a jar file into JAVA_HOME/lib/ext directory and was able to reference the classes from that jar in ECL code. Also tried copying the jar to /opt/HPCCSystems/classes directory but that did not work\", \"post_time\": \"2013-06-21 16:14:11\" },\n\t{ \"post_id\": 4248, \"topic_id\": 941, \"forum_id\": 10, \"post_subject\": \"Re: Embedded Java\", \"username\": \"DSC\", \"post_text\": \"[quote="arjuna chala":237whcc0]The JAR file itself can be physically located anywhere. You can add the JAR file to the classpath using:\\n\\n1. The environment.conf file located under /etc/HPCCSystems has a classpath variable\\n2. The Java global classpath environment variable\\n\\nHope This Helps!\\n\\nArjuna\\nThanks for the info. A couple of questions, for each of your enumerated items:\\n\\n1. Is the environment.conf file rewritten during updates? Specifically, RPM updates?\\n2. Would this environment variable be set within the hpcc user's .bashrc startup script? If so, does that script get rewritten during updates?\\n\\nI'm just trying to anticipate problems. Hopefully there are none!\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2013-06-21 16:05:06\" },\n\t{ \"post_id\": 4247, \"topic_id\": 941, \"forum_id\": 10, \"post_subject\": \"Re: Embedded Java\", \"username\": \"arjuna chala\", \"post_text\": \"The JAR file itself can be physically located anywhere. You can add the JAR file to the classpath using:\\n\\n1. The environment.conf file located under /etc/HPCCSystems has a classpath variable\\n2. The Java global classpath environment variable\\n\\nHope This Helps!\\n\\nArjuna\", \"post_time\": \"2013-06-21 15:50:13\" },\n\t{ \"post_id\": 4246, \"topic_id\": 941, \"forum_id\": 10, \"post_subject\": \"Re: Embedded Java\", \"username\": \"DSC\", \"post_text\": \"Any information on this?\\n\\nThe scenario we're looking at is being able to drop .jar files, built elsewhere, and provide glue .class files to call them from within ECL. The basic question is, "where do those .jar files reside in a standard HPCC installation?" Or is such a thing even possible?\\n\\nA more concrete example of the scenario, which is what we're target, is calling .jar files created by Drools from within a TRANSFORM.\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2013-06-21 14:56:28\" },\n\t{ \"post_id\": 4228, \"topic_id\": 941, \"forum_id\": 10, \"post_subject\": \"Embedded Java\", \"username\": \"rphpcc\", \"post_text\": \"I have been playing with the HPCC release candidate - 4.0.0-rc9 & import java code into ECL. The default location where HPCC looks for java classes is /opt/HPCCSystems/classes. What is the directory location for putting the JAR files? I have classes that are in a JAR file and I want to import those classes into ECL. Is there a directory that HPCC looks for JARS? Thanks.\", \"post_time\": \"2013-06-19 17:12:05\" },\n\t{ \"post_id\": 4230, \"topic_id\": 942, \"forum_id\": 10, \"post_subject\": \"Re: Spray and Read a Binary File?\", \"username\": \"bforeman\", \"post_text\": \"There is actually a good article in the ECL Programmer's Guide that addresses this topic:\\n\\nhttp://hpccsystems.com/download/docs/programmers-guide\\n\\nSee Page 36 - "Working with BLOBs" (Binary Large Objects).\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-06-19 19:38:04\" },\n\t{ \"post_id\": 4229, \"topic_id\": 942, \"forum_id\": 10, \"post_subject\": \"Spray and Read a Binary File?\", \"username\": \"guytoncn\", \"post_text\": \"Spray and Read a Binary File? Has anyone done this and/or have some tips?\", \"post_time\": \"2013-06-19 19:30:40\" },\n\t{ \"post_id\": 4286, \"topic_id\": 945, \"forum_id\": 10, \"post_subject\": \"Re: System Error: 25: SDS: IPropertyTree exception\", \"username\": \"abhisr\", \"post_text\": \"Hi this is solved .\\nFilesRead := STD.System.Workunit.WorkunitFilesWritten(TRIM(L.WUID));
\\n\\nTRIM() worked.\\n\\nPlease refer :\\nhttps://track.hpccsystems.com/browse/HPCC-9607\\n\\nThanks\\nabhi\", \"post_time\": \"2013-07-03 19:50:31\" },\n\t{ \"post_id\": 4277, \"topic_id\": 945, \"forum_id\": 10, \"post_subject\": \"Re: System Error: 25: SDS: IPropertyTree exception\", \"username\": \"abhisr\", \"post_text\": \"I have a similar error\\nMy program is trying to get all the files written by work unit using STD.System.Workunit.WorkunitFilesWritten.
\\n\\nWhen i run my code the exception occurs \\nError: System error: 25: SDS: IPropertyTree exception \\nSDS Reply Error : SDS: IPropertyTree exception \\nIPropertyTree: xpath parse error\\nXPath Exception: Qualifier expected e.g. [/i\\n]and when i run it in thor the exception follows\\n[i]\\nError: System error: -1: Graph[14], csvread[15]: SLAVE 10.194.10.43:6600: No active dali server connection available (0, 0), -1, My piece of code goes here \\n\\nIMPORT STD;\\n\\nWorkunitRecord := RECORD\\n STRING24 wuid;\\n STRING owner{MAXLENGTH(64)};\\n STRING cluster{MAXLENGTH(64)};\\n STRING roxiecluster{MAXLENGTH(64)};\\n STRING job{MAXLENGTH(256)};\\n STRING10 state;\\n STRING7 priority;\\n STRING20 created;\\n STRING20 modified;\\n BOOLEAN online;\\n BOOLEAN protected;\\nEND;\\n\\nWuDetails := RECORD\\n STRING WUID;\\n SET OF STRING files;\\nEND;\\n\\n\\nWuDetails FileData_TRANS(WorkunitRecord L) := TRANSFORM\\n SELF.WUID := L.WUID;\\n\\t FilesRead := STD.System.Workunit.WorkunitFilesWritten(L.WUID);\\n SELF.files := SET(FilesRead,name);\\nEND; \\n\\n\\n\\nWunitList := STD.System.Workunit.WorkunitList('');\\n\\nWuDetails FilesWritten := PROJECT(WunitList,FileData_TRANS(LEFT)); \\nOUTPUT(FilesWritten);
\\n\\nI there any alternative way to get the files written ?\", \"post_time\": \"2013-07-01 16:35:17\" },\n\t{ \"post_id\": 4242, \"topic_id\": 945, \"forum_id\": 10, \"post_subject\": \"Re: System Error: 25: SDS: IPropertyTree exception\", \"username\": \"rtaylor\", \"post_text\": \"Strini,\\n\\nI'm getting the same error running your code. The STD.System.Workunit.WorkunitFilesRead() function works fine when given a string constant but breaks when given a field from a dataset, as demonstrated here:Inds := DATASET([{'W20130619-103850'}],{STRING24 WUID});\\n\\nSTD.System.Workunit.WorkunitFilesRead(InDS[1].WUID);
This code gets exactly the same error.\\n\\nPlease report this issue in JIRA.\\n\\nThanks,\\n\\nRichard\", \"post_time\": \"2013-06-21 08:22:44\" },\n\t{ \"post_id\": 4240, \"topic_id\": 945, \"forum_id\": 10, \"post_subject\": \"System Error: 25: SDS: IPropertyTree exception\", \"username\": \"omnibuzz\", \"post_text\": \"I am running this code to find out which all files have been read by all the work units. I get an XPath error. please help. The code below is self contained.\\n\\nIMPORT STD;\\n\\nWsFileRead := RECORD\\n STRING name{MAXLENGTH(256)};\\n STRING cluster{MAXLENGTH(64)};\\n BOOLEAN isSuper;\\n UNSIGNED4 usage;\\nEND;\\n\\n\\nds := STD.System.Workunit.WorkunitList('');\\n\\noutrec := RECORD\\n\\tSTRING WUID;\\n\\tDATASET(WsFileRead) files;\\nEND;\\n\\n\\noutrec XForm1(ds L) := TRANSFORM\\n\\tSELF.WUID := L.WUID;\\n\\tSELF.files := STD.System.Workunit.WorkunitFilesRead(L.WUID);\\nEND;\\t\\n\\nPROJECT(ds,XForm1(LEFT));
\", \"post_time\": \"2013-06-20 19:32:15\" },\n\t{ \"post_id\": 4269, \"topic_id\": 955, \"forum_id\": 10, \"post_subject\": \"Re: How to Get Social Media data\", \"username\": \"bforeman\", \"post_text\": \"There are a couple of links to explore:\\n\\nhttp://hpccsystems.com/demos/twitter-sentiment\\nhttp://hpccsystems.com/Why-HPCC/case-studies/engauge-pinterest\\nhttps://dev.twitter.com/docs/api/1.1\\n\\nThe idea with Twitter is to call the appropriate API via REST protocol. \\n\\nThe HTTPCALL ECL statement can do this for you. Check out the Language Reference for details about its implementation.\\n\\nBob\", \"post_time\": \"2013-06-28 12:29:11\" },\n\t{ \"post_id\": 4268, \"topic_id\": 955, \"forum_id\": 10, \"post_subject\": \"How to Get Social Media data\", \"username\": \"swapna\", \"post_text\": \"Hi, \\n\\nIs there any plugin available to get data from social media such as Facebook,twitter etc...I want to use this data for analytics. \\n\\nplease detail the steps for getting the social media data feed and Spraying in Thor. \\n\\nRegards, \\nSwapna.P\", \"post_time\": \"2013-06-28 09:01:20\" },\n\t{ \"post_id\": 4282, \"topic_id\": 957, \"forum_id\": 10, \"post_subject\": \"Re: DISTRIBUTION result to DATASET\", \"username\": \"joe.chambers\", \"post_text\": \"You may want to consider using table to do the cross tab report\\n\\n\\nR1 := RECORD\\n String24 r1s1;\\nEND;\\nDS1R1 := Dataset ([{'ace'},{'base'},{'base'},{'case'},{'case'},{'case'},{'dase'},{'dase'}],R1);\\n\\nR2 := RECORD\\n DS1R1.r1s1;\\n Integer cnt := COUNT(GROUP);\\nEND;\\n\\nA := TABLE(DS1R1,R2,r1s1);\\noutput(A);\\n
\", \"post_time\": \"2013-07-02 20:32:26\" },\n\t{ \"post_id\": 4278, \"topic_id\": 957, \"forum_id\": 10, \"post_subject\": \"DISTRIBUTION result to DATASET\", \"username\": \"abhisr\", \"post_text\": \"Hi,\\nI want to find the count of each columns in the data set so i used a DISTRIBUTION .\\nNow I have got the result in XML format after doing a DISTRIBUTION , how can I convert it into a DATASET.\\n\\nMy code goes here.\\n\\nR1 := RECORD \\n\\tString24 r1s1;\\nEND;\\nDS1R1 := Dataset ([{'ace'},{'base'},{'base'},{'case'},{'case'},{'case'},{'dase'},{'dase'}],R1);\\n DATASET x := DISTRIBUTION (DS1R1,NAMED('MATHIYAS'));\\nOUTPUT(x);\\n
\\nRESULT\\n\\n\\n<XML>\\n<Field name="r1s1" distinct="4">\\n <Value count="2">base</Value>\\n <Value count="3">case</Value>\\n <Value count="2">dase</Value>\\n <Value count="1">ace </Value>\\n</Field>\\n</XML>\\n\\n\\n
\\n\\nHow can I write it to a file ?\\nThanks\\nAbhi\", \"post_time\": \"2013-07-01 22:25:24\" },\n\t{ \"post_id\": 4319, \"topic_id\": 962, \"forum_id\": 10, \"post_subject\": \"Re: ECL generation dependent upon typeof Field.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nThe #EXPORTXML docs say. "The #EXPORTXML statement produces the same XML as #EXPORT ..." so the XML format produced is documented under #EXPORT (third normal form docs ).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-07-12 14:37:36\" },\n\t{ \"post_id\": 4316, \"topic_id\": 962, \"forum_id\": 10, \"post_subject\": \"Re: ECL generation dependent upon typeof Field.\", \"username\": \"Allan\", \"post_text\": \"Thanks Richard,\\n\\nThat works a treat.\\n\\nMy only quibble is in the documentation, I can't see these attributes of fields documented in the ECL reference manual.\\nThingks like @type and @size are used in examples but nowhere else. Correct me if I'm wrong.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2013-07-12 09:16:43\" },\n\t{ \"post_id\": 4314, \"topic_id\": 962, \"forum_id\": 10, \"post_subject\": \"Re: ECL generation dependent upon typeof Field.\", \"username\": \"rtaylor\", \"post_text\": \"Alan,\\n\\nOK, here's the way I would do it:
IMPORT STD;\\n\\nMAC_Escape(pInputRecord) := MACRO\\n loadxml('<xml/>');\\n #declare(FieldDef)\\n \\n #exportxml(rTheRecord, pInputRecord)\\n #for(rTheRecord)\\n #for(Field)\\n #IF(STD.Str.StartsWith(%'{@type}'%,'string') )// Field of type STRING\\n #SET(FieldDef,%'FieldDef'% + ' self.'+%'@name'%+' := Escape(L.'+ %'@name'%+');\\\\n');\\n #ELSE\\n #SET(FieldDef,%'FieldDef'% + ' self.'+%'@name'%+' := L.'+ %'@name'%+';\\\\n');\\n #END\\n #END\\n #END\\n %'FieldDef'%; //show me the generated code\\n // %FieldDef%; //use the generated code\\nENDMACRO;\\n\\nMyRec := RECORD\\n\\tSTRING1 Value1;\\n\\tSTRING1 Value2;\\n\\tINTEGER1 Value3 := 0;\\nEND;\\n\\nMAC_Escape(MyRec);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-07-11 19:00:42\" },\n\t{ \"post_id\": 4309, \"topic_id\": 962, \"forum_id\": 10, \"post_subject\": \"ECL generation dependent upon typeof Field.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI have a very straight forward requirement.\\n\\nWithin a MACRO perform some transformation (specifically escape some characters) on fields of type STRING (and string variants) but not on any other types.\\nSomething like:\\n\\nloadxml('<xml/>');\\n#declare(FieldDef)\\n\\nMAC_Escape(pInputRecord) := MACRO\\n \\n #exportxml(rTheRecord, pInputRecord)\\n #for(rTheRecord)\\n #for(Field)\\n #IF Field of type STRING\\n #SET(FieldDef,' self.'+%'@name'%+' := Escape(L.'+ %'@name'%+')');\\n #ELSE\\n #SET(FieldDef,' self.'+%'@name'%+' := L.'+ %'@name'%);\\n #END\\n %FieldDef%;\\n #END\\n #END\\nENDMACRO;\\n
\\nI just don't know how to construct the #IF expression.\\n\\nI could simplify by just bundling up all the 'pass through' fields in one expression at the end:\\nSELF := L;\\nBut it does not help with problem.\\n\\nAny ideas?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2013-07-11 12:59:31\" },\n\t{ \"post_id\": 4327, \"topic_id\": 966, \"forum_id\": 10, \"post_subject\": \"Re: Code failing in thor but success in hthor\", \"username\": \"abhisr\", \"post_text\": \"thanks rtaylor , issue reported.\\n\\nhttps://track.hpccsystems.com/browse/HPCC-9706.\\n\\nRegards\\nAbhi\", \"post_time\": \"2013-07-16 13:06:52\" },\n\t{ \"post_id\": 4326, \"topic_id\": 966, \"forum_id\": 10, \"post_subject\": \"Re: Code failing in thor but success in hthor\", \"username\": \"rtaylor\", \"post_text\": \"abhisr,\\n\\nThe problem appears to be with the LogicalFileList function, which seems to work only in hThor. Please report this in JIRA.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-07-15 21:06:13\" },\n\t{ \"post_id\": 4322, \"topic_id\": 966, \"forum_id\": 10, \"post_subject\": \"Code failing in thor but success in hthor\", \"username\": \"abhisr\", \"post_text\": \"Hi,\\n\\nI am running a piece of code that reads a file and am using CHOOSESETS . When i run the code it runs successfully in single cluster hThor, but fails in thor.\\n\\nHere is my code\\n\\nLAY:= record \\n\\nINTEGER N;\\nsTRING NAME;\\n\\nend;\\n\\n Name_DS := DATASET([{1,'ace'},{2,'bace'},{3,'case'},{4,'dice'},\\n\\t\\t{5,'face'},{6,'lace'},{7,'mice'}\\n\\t],laY);\\n\\nOUTPUT(Name_DS,,'~test::name::names',THOR,OVERWRITE);\\n\\nmyFiles := FileServices.LogicalFileList('*::name::name*');\\nmyFiles;\\n cnt \\t\\t\\t:= COUNT(myFiles);\\n cnt;\\nSTRING ScrubFileName \\t := myFiles[cnt].name;\\nScrubFileName;\\nscrubData \\t:= DATASET('~'+ScrubFileName,LAY,THOR);\\nscrubData;\\nPos2ANames \\t:= CHOOSESETS(scrubData\\n\\t\\t\\t\\t, NAME[1..3] = 'ace' => 1\\n\\t\\t\\t\\t\\n\\t\\t\\t\\t, ENTH );\\nOUTPUT(Pos2ANames,named('Pos2ANames'));\\n\\n
\\n\\n\\nThanks\\nabhi\", \"post_time\": \"2013-07-12 22:31:08\" },\n\t{ \"post_id\": 4426, \"topic_id\": 967, \"forum_id\": 10, \"post_subject\": \"Re: Get Workunits from Remote box\", \"username\": \"abhisr\", \"post_text\": \"Thanks\\nkevin\", \"post_time\": \"2013-08-02 19:29:49\" },\n\t{ \"post_id\": 4332, \"topic_id\": 967, \"forum_id\": 10, \"post_subject\": \"Re: Get Workunits from Remote box\", \"username\": \"kevin.wang@lexisnexis.com\", \"post_text\": \"You may get the workunits by sending a SOAP call to your ESP server:\\n\\n<?xml version="1.0" encoding="UTF-8"?>\\n<soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/" xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/" xmlns="urn:hpccsystems:ws:wsworkunits">\\n<soap:Body>\\n <WUQueryRequest/>\\n</soap:Body>\\n</soap:Envelope>\", \"post_time\": \"2013-07-17 16:00:10\" },\n\t{ \"post_id\": 4324, \"topic_id\": 967, \"forum_id\": 10, \"post_subject\": \"Get Workunits from Remote box\", \"username\": \"abhisr\", \"post_text\": \"Hi \\nhow can I get the work units from my production/remote box.\\nSTD.System.Workunit.WorkunitList(''));\\n
will return only workunits in which we are executing the code\\nRegards\\nabhi\", \"post_time\": \"2013-07-15 12:54:18\" },\n\t{ \"post_id\": 4333, \"topic_id\": 968, \"forum_id\": 10, \"post_subject\": \"Re: Question on File Label\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"Thanks Richard, for explaining very clearly \\n\\nGayathri\", \"post_time\": \"2013-07-18 04:39:39\" },\n\t{ \"post_id\": 4329, \"topic_id\": 968, \"forum_id\": 10, \"post_subject\": \"Re: Question on File Label\", \"username\": \"rtaylor\", \"post_text\": \"Gayathri,\\n\\nLogical file names in HPCC use Xpath syntax, which means the "::" delimiter is the Xpath version of a directory delimiter (\\\\ in Windows, / in Linux). We use this because HPCC was originally put into production on Windows clusters before moving to Linux, and we wanted the DFU syntax to be OS-agnostic.\\n\\nThe "class" portion is the scope -- the major directory under which the files will be stored. The "bmf" and "temp" portions are subdirectories, and "deduppersons" is the actual file name.\\n\\nSo, assuming your data files are stored under c$/thordata, then this "deduppersons" file would actually be under c$/thordata/class/bmf/temp on each node.\\n\\nThe Mask display on the spray page tells you how each physical file part of the logical file will be named on each node. So on a 3-node system, the file on node 1 would have the extension ._1_of_3 and node 2's file would be ._2_of_3 ...\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-07-17 14:43:45\" },\n\t{ \"post_id\": 4328, \"topic_id\": 968, \"forum_id\": 10, \"post_subject\": \"Question on File Label\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"I am an ECL newbie. \\n\\nWhile spraying a file, What do the individual parts given in a file 'label' mean? Can someone explain taking 'class::bmf::temp::deduppersons' as example? Also, do the individual parts necessarily need to be separated by ::?\", \"post_time\": \"2013-07-17 06:33:05\" },\n\t{ \"post_id\": 4378, \"topic_id\": 978, \"forum_id\": 10, \"post_subject\": \"Re: Compilation Time varies wih different way's of data load\", \"username\": \"ghalliday\", \"post_text\": \"If you are able to send me an archive of the query I can take a look at why it appears to be going slowly.\\n\\n(gavin.halliday@lexisnexis.com)\", \"post_time\": \"2013-07-30 16:08:47\" },\n\t{ \"post_id\": 4365, \"topic_id\": 978, \"forum_id\": 10, \"post_subject\": \"Compilation Time varies wih different way's of data loading\", \"username\": \"sameermsc\", \"post_text\": \"I have an interesting situation\\n\\nhere is a case where i am loading a dataset containing 1 record with the following layout\\n\\nRec := {\\n unsigned2 id,\\n unsigned1 sz,\\n boolean cf,\\n boolean sm,\\n boolean sw,\\n set of string csw {maxcount(500), maxlength(30)},\\n boolean \\tcs,\\n boolean \\tits, \\t\\t\\n};\\n\\n// Case 1:\\n// inline dataset \\nconfig := Module1.Attribute1().configdata;\\n\\n// Case 2:\\n// loading from a file\\n// config := dataset('~sample::config', Rec, thor);\\n\\noutput(config);\\n\\nwhen i execute the code for case 1 and case 2 separately, it takes similar compile and thor times\\n\\nnow comes the actual trouble\\n\\nwhen i use config as part of a another ECL code, the compilation times vary drastically. ie., when config is loaded as shown in case 1, compilation time is around 3.4 seconds, but, when config is loaded as shown in case 2, compilation time shoots up, now its 32 sec
\\neven the graphs look drastically different\\n\\nAny good reason for such behavior, and is there a way to bring the compilation times back to ~4 sec range even with Case 2?\\n\\ni am in favor of case 2, as i can load dynamically any data created even in future. we follow a specific path pattern (not shown in above example) to do this.\\n\\nRegards,\\nSameer\", \"post_time\": \"2013-07-26 06:51:09\" },\n\t{ \"post_id\": 4412, \"topic_id\": 980, \"forum_id\": 10, \"post_subject\": \"Re: Getting a File layout using file name\", \"username\": \"DSC\", \"post_text\": \"I was poking around with this as it was new to me as well. Here are some concrete examples that you can leverage:\\n\\nGiven a logical Thor file cryptically named 'pitcob::index::instance_cluster_1':\\n\\n* Record layout according to Logical File Details in ECL Watch:\\n\\n
{ unsigned8 instanceid, unsigned8 clusterid };
\\n* WSDL for the service Kevin cited above (replace 'localhost'):\\n\\nhttp://localhost:8010/WsDfu/DFUGetDataColumns?wsdl
\\n* HTML form for playing with service:\\n\\nhttp://localhost:8010/WsDfu/DFUGetDataColumns?form
\\n* Sample SOAP query for the file:\\n\\n<?xml version="1.0" encoding="UTF-8"?>\\n<soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/" xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/" xmlns="urn:hpccsystems:ws:wsdfu">\\n <soap:Body>\\n <DFUGetDataColumnsRequest>\\n <OpenLogicalName>pitcob::index::instance_cluster_1</OpenLogicalName>\\n <LogicalName>pitcob::index::instance_cluster_1</LogicalName>\\n <FilterBy/>\\n <ShowColumns/>\\n <ChooseFile>0</ChooseFile>\\n <Cluster/>\\n <ClusterType/>\\n <StartIndex>0</StartIndex>\\n <EndIndex>0</EndIndex>\\n </DFUGetDataColumnsRequest>\\n </soap:Body>\\n</soap:Envelope>
\\n* SOAP response:\\n\\n<?xml version="1.0" encoding="utf-8"?>\\n<soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/" xmlns:wsse="http://schemas.xmlsoap.org/ws/2002/04/secext">\\n <soap:Body>\\n <DFUGetDataColumnsResponse xmlns="urn:hpccsystems:ws:wsdfu">\\n <LogicalName>pitcob::index::instance_cluster_1</LogicalName>\\n <StartIndex>1</StartIndex>\\n <EndIndex>100</EndIndex>\\n <DFUDataKeyedColumns1>\\n <DFUDataColumn>\\n <ColumnLabel>instanceid</ColumnLabel>\\n <ColumnType>Integer</ColumnType>\\n <ColumnValue/>\\n <ColumnSize>20</ColumnSize>\\n <MaxSize>20</MaxSize>\\n </DFUDataColumn>\\n </DFUDataKeyedColumns1>\\n <DFUDataNonKeyedColumns1>\\n <DFUDataColumn>\\n <ColumnLabel>clusterid</ColumnLabel>\\n <ColumnType>Integer</ColumnType>\\n <ColumnValue/>\\n <ColumnSize>20</ColumnSize>\\n <MaxSize>20</MaxSize>\\n </DFUDataColumn>\\n </DFUDataNonKeyedColumns1>\\n <RowCount>1087973</RowCount>\\n <ChooseFile>0</ChooseFile>\\n </DFUGetDataColumnsResponse>\\n </soap:Body>\\n</soap:Envelope>
\\nThat's pretty neat.\\n\\nDan\", \"post_time\": \"2013-08-01 13:09:47\" },\n\t{ \"post_id\": 4411, \"topic_id\": 980, \"forum_id\": 10, \"post_subject\": \"Re: Getting a File layout using file name\", \"username\": \"kevin.wang@lexisnexis.com\", \"post_text\": \"You may try: WsDfu DFUGetDataColumns.\\n\\n<soap:Body>\\n <DFUGetDataColumnsRequest>\\n <OpenLogicalName>full_name</OpenLogicalName>\\n </DFUGetDataColumnsRequest>\\n </soap:Body>\", \"post_time\": \"2013-08-01 12:58:31\" },\n\t{ \"post_id\": 4367, \"topic_id\": 980, \"forum_id\": 10, \"post_subject\": \"Getting a File layout using file name\", \"username\": \"nkprasad\", \"post_text\": \"Is there any way to get Layout of a file using a file name from remote location. Tried soap call to DFUInfo (which is returning blanks). DFUQuery works, but does not have Layout info.\\n\\nDFUInfoRequest\\t:= \\tRECORD\\n\\t\\n\\tSTRING FileName{XPATH('FileName'),MAXLENGTH(200)} \\t\\t\\n\\t\\t\\t\\t\\t:=\\t'File_name';\\n\\nEND;\\n\\nEspException\\t:= \\tRECORD\\n\\t\\tSTRING\\t\\tCode{XPATH('Code'),maxlength(10)};\\n\\t\\tSTRING\\t\\tAudience{XPATH('Audience'),maxlength(50)};\\n\\t\\tSTRING\\t\\tSource{XPATH('Source'),maxlength(30)};\\n\\t\\tSTRING\\t\\tMessage{XPATH('Message'),maxlength(200)};\\nEND;\\n\\nDFULogicalFile :=\\t\\tRECORD\\n\\tSTRING \\t\\tClusterName{XPATH('ClusterName'),maxlength(20)};\\n\\tSTRING \\t\\tName{XPATH('Name'),maxlength(100)};\\n\\tSTRING \\t\\tRecordCount{XPATH('RecordCount'),maxlength(30)};\\n\\t\\nEND;\\n\\nDFUFileDetail\\t:= RECORD\\n\\tSTRING\\tWuid{XPATH('Wuid'),maxlength(30)};\\n\\tSTRING\\tEcl{XPATH('Ecl'),maxlength(300)};\\n\\tSTRING Cluster{XPATH('Cluster'),maxlength(20)};\\n\\tSTRING Filename{XPATH('Filename'),maxlength(20)};\\n\\tDATASET(DFULogicalFile)\\tDFULogicalFile{XPATH('SuperFiles/DFULogicalFile'),maxcount(110)};\\nEND;\\n\\n\\nDFUInfoResponse\\t:= RECORD\\n\\tDATASET(ESPException)\\t\\tExceptions{XPATH('Exceptions/ESPException'),maxcount(110)};\\n\\tDFUFileDetail \\t\\t\\t\\t DFUFileDetail{XPATH('FileDetail/DFUFileDetail'),maxcount(110)};\\n \\nEND;\\n\\nDATASET(DFUInfoResponse) DFUInfoResult\\t:=\\tSOAPCALL(IpAddress,\\n\\t\\t\\t\\t 'DFUInfo',\\n\\t\\t\\t\\t DFUInfoRequest,\\n\\t\\t\\t\\t DATASET(DFUInfoResponse),\\n\\t\\t\\t\\t XPATH('DFUInfoResponse')\\n\\t\\t\\t\\t);\\n\\noutput(DFUInfoResult);
\\n\\nAbove query reuturs\\n<Wuid></Wuid><Ecl></Ecl><Cluster></Cluster><Filename></Filename><SuperFiles></SuperFiles>\", \"post_time\": \"2013-07-26 13:23:00\" },\n\t{ \"post_id\": 5871, \"topic_id\": 981, \"forum_id\": 10, \"post_subject\": \"Re: Dataset to Multiple Output\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"Richard,\\n\\nI had similar needs and wrote a template language code to split my output and write to multiple files, the number of files decided dynamically. However, mine gave a compilation error that said constant expression expected \\nat this location:\\n\\n#set(endIndex, numRecsPerFile);
\\n\\nMy numRecsPerFile is defined as:\\nnumRecsPerFile := count(mydataset)/10;
\\n\\nIf I replaced count(mydataset) with some whole number, my code compiles & executes fine - this verified that there is no problem in my template code. But then, I need my numRecsPerFile to be dynamic (eventually, I intend to make the denominator for this attribute definition as dynamic too). \\n\\nI browsed through the forums for similar errors and stumbled across this post and your 'one pass' code - I had written something similar too. I copied your code over to my IDE and it worked! \\n\\nI did some more analysis to find out where I was going wrong - I replaced my template code with yours and re-wired it to point to my datasets. This time, the compile-time issue was back! \\n\\nOn further analysis, I found that while the setter in the template code using a 'variable' works for inline datasets, it doesn't work if the dataset were to be created from a logical file.\\n\\nIs there a way out for this? \\n\\nRegards,\\nGayathri\", \"post_time\": \"2014-06-12 06:31:53\" },\n\t{ \"post_id\": 4436, \"topic_id\": 981, \"forum_id\": 10, \"post_subject\": \"Re: Dataset to Multiple Output\", \"username\": \"rtaylor\", \"post_text\": \"Amos,\\n\\nOK, here's another ONE PASS solution that doesn't use unique IDs:\\n//\\n// Example code - use without restriction. \\n// \\n\\n//Another ONE PASS Solution:\\n//Generate a separate OUTPUT for each file part\\n// by adding a part number to each record that it should be written to\\n// then using the Template Language to generate the OUTPUTs, filtering \\n// each record into its appropriate file part and using PROJECT to\\n// remove the extra PartNum field\\n\\nGenCode(ds, CntRecs, CntParts) := MACRO\\n\\n RunDSRec := RECORD\\n UNSIGNED4 PartNum := 0; \\n ds;\\n END;\\n t := TABLE(ds,RunDSrec);\\n\\n RunDSRec XF(RunDSRec L, RunDSRec R, INTEGER C) := TRANSFORM\\n \\t\\tSELF.PartNum := IF(C % CntRecs = 0,C DIV CntRecs,(C DIV CntRecs) + 1);\\n SELF := R;\\n END;\\n i := ITERATE(t,XF(LEFT,RIGHT,COUNTER));\\n\\n loadxml('<XML/>'); //open dummy XML scope just to make template language available \\n\\n #DECLARE(PartCnt);\\n #SET(PartCnt,0);\\n #DECLARE(OutStr);\\n #SET(OutStr,'');\\n #LOOP\\n #SET(PartCnt,%PartCnt%+1);\\n #IF(%PartCnt% > CntParts)\\n #BREAK\\n #ELSE \\n #APPEND(OutStr,'OUTPUT(PROJECT(i(PartNum='+%'PartCnt'%+'),{RECORDOF(i) AND NOT [PartNum]}),,\\\\'~RTTEST::FilePartTest' + %PartCnt% + '\\\\',overwrite);\\\\n');\\n #END;\\n #END;\\n\\n // %'OutStr'% //this one just shows the ECL code \\n %OutStr% //this one runs the ECL code\\nENDMACRO;\\n\\n//////////////////////////////////////////////////////////////////////////////////////////////////////\\nIMPORT TrainingYourName;\\n//Test the process with this file:\\nSomeFile := DATASET([\\n {1,'A'},{2,'B'},{3,'C'},{4,'D'},{5,'E'},\\n {6,'F'},{7,'G'},{8,'H'},{9,'I'},{10,'J'},\\n {11,'K'},{12,'L'},{13,'M'} ,{14,'N'},{15,'O'},\\n {16,'P'},{17,'Q'},{18,'R'},{19,'S'},{20,'T'},\\n {21,'U'},{22,'V'},{23,'W'},{24,'X'},{25,'Y'},\\n {31,'A'},{32,'B'},{33,'C'},{34,'D'},{35,'E'},\\n {36,'F'},{37,'G'},{38,'H'},{39,'I'},{40,'J'},\\n {41,'K'},{42,'L'},{43,'M'} ,{44,'N'},{45,'O'},\\n {46,'P'},{47,'Q'},{48,'R'},{49,'S'},{50,'T'},\\n {51,'U'},{52,'V'},{53,'W'},{54,'X'},{55,'Y'}\\n ],\\n {unsigned1 recid,STRING1 Letter});\\n\\nds := DISTRIBUTE(Somefile);\\n\\n//GenCode(ds, CntRecs, CntParts): \\n// ds = dataset to write to disk\\n// CntRecs = number of recs to write to each file\\n// CntParts = total number of parts to write\\nGenCode(ds, 5,(COUNT(SomeFile)DIV 5) + 1)\\n
\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-08-06 19:27:14\" },\n\t{ \"post_id\": 4433, \"topic_id\": 981, \"forum_id\": 10, \"post_subject\": \"Re: Dataset to Multiple Output\", \"username\": \"gnanasinghamos\", \"post_text\": \"Hi Richard,\\n\\nThanks for your solution, I have tried with your sample its working fine for the data with unique key, but my file is of only one column and with length 2304 can you suggest me how to add unique column to an dataset and use the function.\\n\\nAmos\", \"post_time\": \"2013-08-06 04:23:48\" },\n\t{ \"post_id\": 4421, \"topic_id\": 981, \"forum_id\": 10, \"post_subject\": \"Re: Dataset to Multiple Output\", \"username\": \"sameermsc\", \"post_text\": \"Thanks a lot Richard for the solutions\\n\\ni would be interested to know if there is any HPCC function or so to perform a recursive operation that gets terminated based on a dynamically computed value. to my knowledge there is none available yet, i had to do some guess work to implement LOOP to handle such scenarios in the past\\n\\ni would love to see LOOP allowing a comparison of dynamically computed values in IF condition, something as shown below \\n\\n#IF(%PartCnt% > getCntParts())\\n\\nwhere getCntParts() is a function, which does some dynamic computation during each iteration and returns a value, its not a compile time constant.\\n\\nRegards,\\nSameer\", \"post_time\": \"2013-08-02 06:52:23\" },\n\t{ \"post_id\": 4420, \"topic_id\": 981, \"forum_id\": 10, \"post_subject\": \"Re: Dataset to Multiple Output\", \"username\": \"rtaylor\", \"post_text\": \"The solution to this is ECL code generation, which is what MACRO, FUNCTIONMACRO, and ECL's Template Language are all designed to do.\\n\\nHere is an example TWO PASS solution that I wrote for a client a couple of months ago:\\n//\\n// Example code - use without restriction. \\n// \\n\\n//TWO PASS Solution:\\n//Generate XML string to define the files to write to disk (STEP 1)\\nGenXMLstr(ds, fld, CntRecs) := FUNCTIONMACRO\\n\\n\\tRunDSRec := RECORD\\n\\t\\tds;\\n\\t\\tSTRING XMLstr{MAXLENGTH(100000)} := '';\\n\\tEND;\\n\\tt := TABLE(ds,RunDSrec);\\n\\tCntTbl := COUNT(t);\\n\\n\\tStartXML := '<XML>';\\n\\tEndXML := '</XML>';\\n\\tStartRec := '<rec>';\\n\\tEndRec := '</rec>';\\n\\tStartPart := '<part>';\\n\\tEndPart := '</part>';\\n\\n\\tRunDSRec XF(RunDSRec L, RunDSRec R, INTEGER C) := TRANSFORM\\n\\t\\tPartNum := (STRING)(C DIV CntRecs + 1);\\n\\t\\tThisRecXML := (STRING)R.RecID ;\\n\\t\\tAddXML := MAP(C = 1 => StartXML + StartRec + StartPart + PartNum + EndPart + '<id>[' + ThisRecXML,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tC = CntTbl => ',' + ThisRecXML + ']</id>' + EndRec + EndXML,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tC % CntRecs = 1 => ']</id>' + EndRec + StartRec + StartPart + PartNum + EndPart + '<id>[' + ThisRecXML ,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t',' + ThisRecXML );\\n\\t\\tSELF.XMLstr := L.XMLstr + AddXML;\\n\\t\\tSELF := R;\\n\\tEND;\\n\\ti := ITERATE(t,XF(LEFT,RIGHT,COUNTER));\\n RetStr := i[COUNT(i)].XMLstr;\\n RETURN RetStr;\\nENDMACRO;\\n\\n//////////////////////////////////////////////////////////////////////////////////////////////////////\\n//Generate the individual OUTPUT actions to write the files (STEP 3)\\nMAC_GenCode(ds, fld, StrXML) := MACRO\\n\\tloadxml(StrXML); \\n\\n\\t#DECLARE(OutStr);\\n\\t#SET(OutStr,'');\\n\\t#FOR(rec)\\n\\t\\t#APPEND(OutStr,'OUTPUT(' + #TEXT(ds) + '('+ #TEXT(fld) +' IN ' + %'id'% + '),,\\\\'~RTTEST::ApplyTest' + %part% + '\\\\');\\\\n');\\n\\t#END;\\n\\n\\t%'OutStr'% //this one just shows the ECL code \\n\\t// %OutStr% //this one runs the ECL code\\nENDMACRO;\\n\\n//////////////////////////////////////////////////////////////////////////////////////////////////////\\n\\n//Test the process with this file:\\nSomeFile := DATASET([{1,'A'},{2,'B'},{3,'C'},{4,'D'},{5,'E'},\\n {6,'F'},{7,'G'},{8,'H'},{9,'I'},{10,'J'},\\n {11,'K'},{12,'L'},{13,'M'} ,{14,'N'},{15,'O'},\\n {16,'P'},{17,'Q'},{18,'R'},{19,'S'},{20,'T'},\\n {21,'U'},{22,'V'},{23,'W'},{24,'X'},{25,'Y'}],\\n\\t\\t\\t\\t\\t{unsigned1 recid,STRING1 Letter});\\n\\t\\n//STEP 1: Generate the XML string\\tas a separate workunit\\n// this is required because the LOADXML function requires a constant-foldable string\\nGenXMLstr(Somefile, Somefile.recid, 5);\\n\\n//STEP 2:\\t\\tCopy and paste that XML string into a definition\\nX := '<XML><rec><part>1</part><id>[1,2,3,4,5]</id></rec><rec><part>2</part><id>[6,7,8,9,10]</id></rec><rec><part>3</part><id>[11,12,13,14,15]</id></rec><rec><part>4</part><id>[16,17,18,19,20]</id></rec><rec><part>5</part><id>[21,22,23,24,25]</id></rec></XML>';\\n\\n//STEP 3:\\t\\tUncomment this (comment out Step 1 code) and run the job:\\n// MAC_GenCode(Somefile, recid, X);\\n\\n
\\nI also wrote an alternative ONE PASS solution:\\n//\\n// Example code - use without restriction. \\n// \\n\\n//ONE PASS Solution:\\n//Generate a separate OUTPUT for each file part\\n// by creating a recordset of unique record IDs and the part number they should be written to\\n// then using the Template Language to generate the OUTPUTs, using the SET function to filter \\n// each record into its appropriate file part\\nGenCode(ds, fld, CntRecs, CntParts) := MACRO\\n\\n\\tRunDSRec := RECORD\\n\\t\\tds.fld;\\n\\t\\tUNSIGNED4 PartNum := 0; \\n\\tEND;\\n\\tt := TABLE(ds,RunDSrec);\\n\\n\\tRunDSRec XF(RunDSRec L, RunDSRec R, INTEGER C) := TRANSFORM\\n\\t\\tSELF.PartNum := C DIV CntRecs + 1;\\n\\t\\tSELF := R;\\n\\tEND;\\n\\ti := ITERATE(t,XF(LEFT,RIGHT,COUNTER));\\n\\n\\tloadxml('<XML/>'); //open dummy XML scope just to make template language available \\n\\n\\t#DECLARE(PartCnt);\\n\\t#SET(PartCnt,0);\\n\\t#DECLARE(OutStr);\\n\\t#SET(OutStr,'');\\n\\t#LOOP\\n \\t#SET(PartCnt,%PartCnt%+1);\\n\\t\\t#IF(%PartCnt% > CntParts)\\n\\t\\t #BREAK\\n\\t\\t#ELSE\\t\\n\\t\\t #APPEND(OutStr,'OUTPUT(' + #TEXT(ds) + '('+ #TEXT(fld) +' IN SET(i(PartNum='+%'PartCnt'%+'),'+#TEXT(fld)+')),,\\\\'~RTTEST::FilePartTest' + %PartCnt% + '\\\\',overwrite);\\\\n');\\n\\t #END;\\n\\t#END;\\n\\n\\t// %'OutStr'% //this one just shows the ECL code \\n\\t%OutStr% //this one runs the ECL code\\nENDMACRO;\\n\\n//////////////////////////////////////////////////////////////////////////////////////////////////////\\nIMPORT TrainingYourName;\\n//Test the process with this file:\\nSomeFile := DATASET([\\n {1,'A'},{2,'B'},{3,'C'},{4,'D'},{5,'E'},\\n {6,'F'},{7,'G'},{8,'H'},{9,'I'},{10,'J'},\\n {11,'K'},{12,'L'},{13,'M'} ,{14,'N'},{15,'O'},\\n {16,'P'},{17,'Q'},{18,'R'},{19,'S'},{20,'T'},\\n {21,'U'},{22,'V'},{23,'W'},{24,'X'},{25,'Y'},\\n {31,'A'},{32,'B'},{33,'C'},{34,'D'},{35,'E'},\\n {36,'F'},{37,'G'},{38,'H'},{39,'I'},{40,'J'},\\n {41,'K'},{42,'L'},{43,'M'} ,{44,'N'},{45,'O'},\\n {46,'P'},{47,'Q'},{48,'R'},{49,'S'},{50,'T'},\\n {51,'U'},{52,'V'},{53,'W'},{54,'X'},{55,'Y'}\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t ],\\n\\t\\t\\t\\t\\t{unsigned1 recid,STRING1 Letter});\\n\\nds := DISTRIBUTE(Somefile);\\n\\n//GenCode(ds, fld, CntRecs, CntParts): \\n// ds = dataset to write to disk\\n// fld = unique record ID field in the dataset\\n// CntRecs = number of recs to write to each file\\n// CntParts = total number of parts to write\\nGenCode(Somefile, recid, 5, COUNT(SomeFile)/5);\\n\\n
\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-08-02 01:37:24\" },\n\t{ \"post_id\": 4393, \"topic_id\": 981, \"forum_id\": 10, \"post_subject\": \"Re: Dataset to Multiple Output\", \"username\": \"sameermsc\", \"post_text\": \"here is a sample to give you an idea on how to do it, intentionally i have not used the LOOP part, want you to try it as its a good challenge and learning experience\\n\\nrec := {\\n\\tstring country;\\n};\\n\\nds := dataset(['INDIA', 'US', 'UK', 'AUSTRALIA', 'CHINA', 'RUSSIA', 'SINGAPORE'], rec);\\n\\n\\n\\nwrite_2_file(dataset(rec) dsv, integer recs, integer iter) := function\\n\\tds_tmp := dsv[(recs * (iter - 1)) + 1 .. (recs * iter)];\\n\\toutput(ds_tmp, , '~sameer::file_' + iter, overwrite);\\n\\treturn 'done';\\nend;\\n\\n\\n\\nwrite_2_file(ds, 2, 1);\\nwrite_2_file(ds, 2, 2);\\nwrite_2_file(ds, 2, 3);\\nwrite_2_file(ds, 2, 4);\\nwrite_2_file(ds, 2, 5);
\\n\\n\\nyou can execute above code and see the output\\n\\nlet me know how it goes\\n\\nregards,\\nSameer\", \"post_time\": \"2013-07-31 12:46:24\" },\n\t{ \"post_id\": 4392, \"topic_id\": 981, \"forum_id\": 10, \"post_subject\": \"Re: Dataset to Multiple Output\", \"username\": \"gnanasinghamos\", \"post_text\": \"Sameer,\\n\\nI have tried Loop as well but it didnt help out.\\n\\nAmos\", \"post_time\": \"2013-07-31 12:13:44\" },\n\t{ \"post_id\": 4391, \"topic_id\": 981, \"forum_id\": 10, \"post_subject\": \"Re: Dataset to Multiple Output\", \"username\": \"gnanasinghamos\", \"post_text\": \"Hi,\\n\\nI tried the code below\\n\\n//Function trying to call function inside a function\\n\\nIMPORT Mas;\\n export dataset multifile(dataset inputDataSet, integer recCount, integer fileCount, integer currentVal) := function\\n\\n newValue:=currentVal+1; \\n\\nfile:=if(fileCount>currentVal,\\n if(currentVal =1, \\n output(CHOOSEN(inputDataSet,recCount,currentVal)) ,\\n output(CHOOSEN(inputDataSet,recCount,(recCount*(currentVal-1))))\\n ) \\n );\\n \\nmultifile(inputDataSet, recCount, fileCount, newValue);\\n\\n return file ; \\n \\nEND;\\n------------------\\nGetting the error :\\nError: syntax error near "," : expected datarow, identifier, pattern-name, action, pattern (13, 23), 3002,\\n-------------\\n\\nimport Mas;\\nFile_FULL_GDM := DATASET('~FULL_GDM',MSFT.Output_layout.Layout_FULL_GDM,THOR);\\nInteger MyLimit1 := 10;\\nfileCount:=count(File_FULL_GDM)/MyLimit1;\\nInteger curval := 1;\\ninteger nextval:=10;\\n\\nds:= Mas.Common_Functions.multifile(File_FULL_GDM,MyLimit1,fileCount,curval) ;\\nds;\\n\\n\\nError: syntax error near "," : expected datarow, identifier, pattern-name, action, pattern (13, 23), 3002, D:\\\\Amos\\\\HPCC\\\\HPCC_System\\\\.\\\\MSFT\\\\Common_Functions\\\\Multifile.ecl\\nError: Object 'common_functions' does not have a member named 'multifile' (8, 28), 2171, \\nError: Unknown identifier "multifile" (8, 28), 2167,\\n\\n\\nAmos\", \"post_time\": \"2013-07-31 12:12:47\" },\n\t{ \"post_id\": 4390, \"topic_id\": 981, \"forum_id\": 10, \"post_subject\": \"Re: Dataset to Multiple Output\", \"username\": \"sameermsc\", \"post_text\": \"Hi Amos,\\n\\nif you can guess the max number of records the dataset can have, then there is a way to achieve this using LOOP, you have to define some logic to control whether you want to write a portion of data to a new file or not.\\n\\nfor example, say we guess that the max possible record count in dataset is 100K, and if we want to split the data into 10 files containing 10K records each, then we can LOOP for 10 + 3 = 13 Times, where 3 is an additional buffer to ensure that we write all data\\n\\nbit trickier but achievable, of-course it works only when we can approximately guess the size \\n\\nHope this is helpful\\n\\nRegards,\\nSameer\", \"post_time\": \"2013-07-31 10:59:55\" },\n\t{ \"post_id\": 4389, \"topic_id\": 981, \"forum_id\": 10, \"post_subject\": \"Re: Dataset to Multiple Output\", \"username\": \"gnanasinghamos\", \"post_text\": \"Hi Richard,\\n\\nThanks for the update, Sample is used to fetch only one record from a selective record set.\\n\\nBut my requirement is that to split the file in to multiple file based on row count. \\n\\nI.e., need to write all the file part to disk with separate file names without loosing any records.\\n\\nThanks,\\nAmos\", \"post_time\": \"2013-07-31 08:56:12\" },\n\t{ \"post_id\": 4387, \"topic_id\": 981, \"forum_id\": 10, \"post_subject\": \"Re: Dataset to Multiple Output\", \"username\": \"rtaylor\", \"post_text\": \"gnanasinghamos,\\n\\nDo you simply need to split it into multiple recordsets, or do you need to write all the file parts to disk with separate filenames?\\n\\nIf its the former, then you might look at the SAMPLE function.\\n\\n
Is there any function similar to "FOR" loop used in other languages.
No. ECL is a declarative, non-procedural language, so there is no loop (the LOOP function is more recursion than looping).\\n\\nRichard\", \"post_time\": \"2013-07-31 08:46:51\" },\n\t{ \"post_id\": 4384, \"topic_id\": 981, \"forum_id\": 10, \"post_subject\": \"Re: Dataset to Multiple Output\", \"username\": \"gnanasinghamos\", \"post_text\": \"Bob,\\n\\nThanks, I will be knowing the file record count only during the run time, this code can be used for pre defined row count.\\n\\nIs there any function similar to "FOR" loop used in other languages.\", \"post_time\": \"2013-07-31 04:54:45\" },\n\t{ \"post_id\": 4380, \"topic_id\": 981, \"forum_id\": 10, \"post_subject\": \"Re: Dataset to Multiple Output\", \"username\": \"bforeman\", \"post_text\": \"This works for me:\\n\\nRecs := 10;\\n\\nCHOOSEN($.STD_Persons.File,Recs);\\nCHOOSEN($.STD_Persons.File,Recs,Recs + 1);\\nCHOOSEN($.STD_Persons.File,Recs,(Recs * 2) + 1);\\nCHOOSEN($.STD_Persons.File,Recs,(Recs * 3) + 1);\\nCHOOSEN($.STD_Persons.File,Recs,(Recs * 4) + 1);
\\n\\netc...\\n\\n\\nBob\", \"post_time\": \"2013-07-30 19:17:46\" },\n\t{ \"post_id\": 4370, \"topic_id\": 981, \"forum_id\": 10, \"post_subject\": \"Dataset to Multiple Output\", \"username\": \"gnanasinghamos\", \"post_text\": \"Hi,\\n\\nI need to split the dataset into multiple output file based on the defined row count.\\n\\nEg: I have a file with 100 records and the defined row count is 10, for which i need to get 10 output.\\n\\nI have used Loop and Choosen to get the output but I couldn't get it.\\n\\nCould anyone help on this?\", \"post_time\": \"2013-07-30 10:25:53\" },\n\t{ \"post_id\": 4425, \"topic_id\": 991, \"forum_id\": 10, \"post_subject\": \"Re: Changing Layouts\", \"username\": \"omnibuzz\", \"post_text\": \"NORMALIZE(ConSt_DS,COUNT(LEFT.State),\\n TRANSFORM({STRING Country, STRING State},\\n\\t SELF.Country := LEFT.Country;\\n\\t SELF.State := LEFT.State[COUNTER];));\", \"post_time\": \"2013-08-02 17:32:43\" },\n\t{ \"post_id\": 4423, \"topic_id\": 991, \"forum_id\": 10, \"post_subject\": \"Changing Layouts\", \"username\": \"abhisr\", \"post_text\": \"How can I convert the dataset ConSt_DS of Layout R1 to a datset of Layout R2;\\n\\n\\nR1 := RECORD\\n\\tSTRING Country;\\n\\tSET OF STRING State;\\nEND;\\n\\nConSt_DS := DATASET([{'US',['AL','AK','AZ','AR','FL','GA']}],R1);\\nConSt_DS;\\n\\nR2 := RECORD\\n\\tSTRING Country;\\n\\tSTRING State;\\nEND;\\n\\nAllConSt_DS := DATASET([{'US','AL'},{'US','AK'},{'US','AZ'},{'US','AR'},{'US','FL'},{'US','GA'}],R2);\\nAllConSt_DS;\\n
\", \"post_time\": \"2013-08-02 15:03:50\" },\n\t{ \"post_id\": 9570, \"topic_id\": 994, \"forum_id\": 10, \"post_subject\": \"Re: Hpcc - Java - JVM options\", \"username\": \"kevinLv\", \"post_text\": \"Hi, thanks for the quick answer. \\n\\nFor my Thor cluster, we have install HPCC-5.4.6 and java version is JDK1.8. \\nFor the classpath, I have set /opt/HPCCSystems/classes into /etc/profile and source the profile file. \\nMeanwhile I tried all the same as described to call java in below link: viewtopic.php?f=41&t=1509. But still now work. \\nIt still saied failed to resolve the class name.\", \"post_time\": \"2016-04-22 08:54:27\" },\n\t{ \"post_id\": 9568, \"topic_id\": 994, \"forum_id\": 10, \"post_subject\": \"Re: Hpcc - Java - JVM options\", \"username\": \"richardkchapman\", \"post_text\": \"Two likely causes:\\n\\n1: Classpath issues - the jar file containing your classes is not being located\\n2: Java version issues - the jar file containing your classes was compiled with an incompatible version of Java from the one linked into the Java plugin\\n\\nWhat distro, and what version of Java, and what version of HPCC do you have installed?\", \"post_time\": \"2016-04-22 07:39:46\" },\n\t{ \"post_id\": 9566, \"topic_id\": 994, \"forum_id\": 10, \"post_subject\": \"Re: Hpcc - Java - JVM options\", \"username\": \"kevinLv\", \"post_text\": \"hi rphpcc, please allow me to ask you about an issue while calling java with ECL. It always said: Failed to resolve class name ***. This happens when I called the simple class I created. However I can successfully call JavaCat class which you know was the example embeded java class in /opt/HPCCSystem/classes/. How could this happen? Please help to guide us to solve. Many thanks.\", \"post_time\": \"2016-04-22 03:45:25\" },\n\t{ \"post_id\": 4475, \"topic_id\": 994, \"forum_id\": 10, \"post_subject\": \"Re: Hpcc - Java - JVM options\", \"username\": \"richardkchapman\", \"post_text\": \"I can't reproduce the JVM running out of memory - see https://track.hpccsystems.com/browse/HPCC-9871 for the test code I ran.\\n\\nCan you provide a more complete example of the code that caused the jvm to run out of memory for you? I had to guess some of the missing bits from your original example, and it's possible I guessed wrong. Also, were you running on thor, hthor, or roxie at the time?\", \"post_time\": \"2013-08-19 11:58:59\" },\n\t{ \"post_id\": 4471, \"topic_id\": 994, \"forum_id\": 10, \"post_subject\": \"Re: Hpcc - Java - JVM options\", \"username\": \"richardkchapman\", \"post_text\": \"I opened https://track.hpccsystems.com/browse/HPCC-9870 to address point 1, and https://track.hpccsystems.com/browse/HPCC-9871 to investigate point 2\", \"post_time\": \"2013-08-19 10:33:45\" },\n\t{ \"post_id\": 4470, \"topic_id\": 994, \"forum_id\": 10, \"post_subject\": \"Re: Hpcc - Java - JVM options\", \"username\": \"richardkchapman\", \"post_text\": \"I think there are two issues here:\\n\\n1. Easy access to the JNI option string in ECL - I think Jake's suggestion probably works (from reading the JNI docs) but we may want to provide a simpler method (or at least, document the use of JAVA_TOOL_OPTIONS)\\n2. Why is the jvm running out of memory> I would not expect there to be any correlation between the peak jvm memory requirements and the size of the input dataset in this example. Perhaps something is "leaking" so that the JVM garbage collector is not able to reclaim returned strings?\", \"post_time\": \"2013-08-19 10:10:47\" },\n\t{ \"post_id\": 4455, \"topic_id\": 994, \"forum_id\": 10, \"post_subject\": \"Re: Hpcc - Java - JVM options\", \"username\": \"jsmith\", \"post_text\": \"You may be able to feed options to the JVM, by setting environment variable JAVA_TOOL_OPTIONS.\\nTry adding:\\n\\nexport JAVA_TOOL_OPTIONS="blah.."\\n\\n.. to /opt/HPCCSystems/sbin/hpcc_setenv\", \"post_time\": \"2013-08-14 10:46:46\" },\n\t{ \"post_id\": 4451, \"topic_id\": 994, \"forum_id\": 10, \"post_subject\": \"Re: Hpcc - Java - JVM options\", \"username\": \"ghalliday\", \"post_text\": \"Unfortunately it isn't currently possible to do it from a user option. Can you could open an issue, (or we can) and we will make sure it is addressed.\\n\\nIf you want to modify the platform source code to explicitly add the options, then you need to modify the file plugins/javaembed/javaembed.cpp\\n\\naround line 95 you will see the code that initialises the options which are passed to initialize the jni. It should be relatively simple to modify the code to add the options you want there.\", \"post_time\": \"2013-08-13 21:04:09\" },\n\t{ \"post_id\": 4448, \"topic_id\": 994, \"forum_id\": 10, \"post_subject\": \"Hpcc - Java - JVM options\", \"username\": \"rphpcc\", \"post_text\": \"I am doing a POC to import java into HPCC. \\n\\nThe java code is very simple... takes a string input, split it by tab delimiter and return an array of Strings. \\n\\nThen in ECL code, I have referenced the java class and was able to execute the java method. It all looks & works great with small/ to medium dataset. \\n\\nWith bigger dataset, I am getting OutOFMemory Error.\\nError: Error: 0: javaembed: In method DataService.splitRecord:(Ljava/lang/String;)[Ljava/lang/String;: java.lang.OutOfMemoryError: Java heap space (0, 0), 0, \\n\\nHow can I tell HPCC to load JVM with specific options. i.e.. "-XX:PermSize", "-XX:MaxPermSize", -Xms and -Xmx. Please advise.\\n \\n\\nJava Code:\\nPublic Class DataService {\\n\\npublic String[] splitRecord(String input) {\\nreturn input.split("\\\\\\\\t");\\n}\\n\\n}\\n\\nECL Code:\\nEXPORT SET OF UNICODE splitRecord(Unicode a) := IMPORT(java,'DataService.splitRecord:(Ljava/lang/String;)[Ljava/lang/String;');\\n\\n\\nRowDataRecord createRowData(RawRecord l, Integer c) := TRANSFORM\\n\\t\\t\\tSELF.columns := splitRecord(l.rowValue);\\n\\t\\tEND;\\n\\n\\n\\n PROJECT(\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t500k_info_raw, \\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tcreateRowData(LEFT,COUNTER)\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t);\", \"post_time\": \"2013-08-12 18:30:47\" },\n\t{ \"post_id\": 4462, \"topic_id\": 998, \"forum_id\": 10, \"post_subject\": \"Re: Recursively spary files in the subfolders of a root fold\", \"username\": \"kevinLv\", \"post_text\": \"Thanks Richard.\\nI have spray all files in another way, embed all files in CSV and spray this CSV, then parse it in ECL, it works now.\", \"post_time\": \"2013-08-16 02:51:11\" },\n\t{ \"post_id\": 4461, \"topic_id\": 998, \"forum_id\": 10, \"post_subject\": \"Re: Recursively spary files in the subfolders of a root fold\", \"username\": \"rtaylor\", \"post_text\": \"Kevin,\\n\\nThe "srcfile=" parameter allows you to specify a comma-delimited list of the files to spray, and wildcards are allowed. However, there doesn't seem to be a provision for recursing sub-folders included in that syntax. \\n\\nTherefore, your current best way to do it would be to explicitly name all the directories you want to use. And you should submit the issue through JIRA to make the feature request for future builds.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-08-15 14:00:39\" },\n\t{ \"post_id\": 4460, \"topic_id\": 998, \"forum_id\": 10, \"post_subject\": \"Recursively spary files in the subfolders of a root folder\", \"username\": \"kevinLv\", \"post_text\": \"I have a project source code repository, now I want spray all source files(*.java, *.xml, *.cs ...) to thor, is it possible to do it using dfuplus?\\nI Have tried using "dfuplus action=spray ..." but only the file in specified folder are sprayed, all files in sub folders are missing, can anyone show light on this? thanks.\", \"post_time\": \"2013-08-15 03:28:08\" },\n\t{ \"post_id\": 4488, \"topic_id\": 1001, \"forum_id\": 10, \"post_subject\": \"Re: Calling LocaleFindAtStrength\", \"username\": \"omnibuzz\", \"post_text\": \"Thank you, Richard.\\n-Srini\", \"post_time\": \"2013-08-25 03:04:42\" },\n\t{ \"post_id\": 4481, \"topic_id\": 1001, \"forum_id\": 10, \"post_subject\": \"Re: Calling LocaleFindAtStrength\", \"username\": \"richardkchapman\", \"post_text\": \"Hmmm - it's crashing somewhere inside the ICU unicode libraries, and it's not immediately clear why. The compiler tries to constant-fold the call, which is why it causes problems at compile time. \\n\\nLooks like it could be related to https://track.hpccsystems.com/browse/HPCC-9817 - though not sure it is exactly the same issue. I have created Jira issue https://track.hpccsystems.com/browse/HPCC-9879 to address this new problem.\", \"post_time\": \"2013-08-20 14:44:10\" },\n\t{ \"post_id\": 4480, \"topic_id\": 1001, \"forum_id\": 10, \"post_subject\": \"Calling LocaleFindAtStrength\", \"username\": \"omnibuzz\", \"post_text\": \"When I run the following:\\n\\n\\nIMPORT Std;\\nSTD.Uni.LocaleFindAtStrength(u'',u'abc',1,'',1);\\n
\\n\\nVersion 3.10.6.1 HTHOR - Fails with no error info in the work unit\\nversion 4.0.0.5 HTHOR - Compiles forever\\nVersion 4.0.0.6 HTHOR - Error: 1001: EclServer terminated unexpectedly\\nVersion 4.0.0.9 HTHOR - eclagent 0: System error: 0: Workunit was compiled for eclagent interface version 0, this eclagent requires version 149..149\", \"post_time\": \"2013-08-20 14:21:13\" },\n\t{ \"post_id\": 4485, \"topic_id\": 1002, \"forum_id\": 10, \"post_subject\": \"Re: LOADXML failing to load XML file\", \"username\": \"richardkchapman\", \"post_text\": \"LOADXML is a relic of a (very) old system which passed XML and ECL together in to a central server to allow ECL queries to be 'parameterized' and/or ECL code to be generated via the internal ECL template language. For testing purposes, the incoming XML could be supplied manually via LOADXML.\\n\\nI do not believe there is a mechanism for LOADXML to process anything but inline XML, nor to supply values to the ECL template language via external XML in any other way.\\n\\nIf you want to generate ECL from a template and an XML data file, I would do so using xslt before submitting the ECL. But if at all possible, I'd try to keep the ECL logic constant and 'parameterize' via stored variables and logic within the ECL code.\", \"post_time\": \"2013-08-23 08:37:10\" },\n\t{ \"post_id\": 4484, \"topic_id\": 1002, \"forum_id\": 10, \"post_subject\": \"LOADXML failing to load XML file\", \"username\": \"srbhkmr\", \"post_text\": \"My xml file is a valid xml file with no carriage returns or line feeds and it sits in the landing zone without being sprayed. I used FileServices.ExternalLogicalFileName() to deduce it's correct external logical file name. LOADXML(filename);
fails.\\n\\nNext I tried to read the whole file into a string handle 's' LOADXML(s);
gives me: 'Const-foldable string expression expected' error.\\n\\nIf I just inline the whole content of file: LOADXML('<xml><tag>...</xml>);
it works fine.\\n\\nWhat am I missing here? Any pointers are appreciated.\\n\\nThanks,\", \"post_time\": \"2013-08-23 04:42:12\" },\n\t{ \"post_id\": 5452, \"topic_id\": 1008, \"forum_id\": 10, \"post_subject\": \"Re: How to spray multiple XML files.\", \"username\": \"micevepay\", \"post_text\": \"I am also interested in this question. I have the files in the dropzone already but how to perform a bulk spray of XML files. All the literature I have seen is for a single file which doesn't work 100k+.\", \"post_time\": \"2014-04-02 19:57:47\" },\n\t{ \"post_id\": 4585, \"topic_id\": 1008, \"forum_id\": 10, \"post_subject\": \"Re: How to spray multiple XML files.\", \"username\": \"DSC\", \"post_text\": \"sftp is a client-side utility. You would run it from a remote system, copying data from that system into the HPCC dropzone. You could also use scp, if you're using a Linux-like command line. Either one is designed to move files between systems, not between directories on the same system.\\n\\nHope this helps,\\n\\nDan\", \"post_time\": \"2013-09-17 11:40:20\" },\n\t{ \"post_id\": 4582, \"topic_id\": 1008, \"forum_id\": 10, \"post_subject\": \"Re: How to spray multiple XML files.\", \"username\": \"shank\", \"post_text\": \"Hi,\\n\\nWe tried running the sftp in VM. I have few questions:\\n1. Should we run the sftp inside the VM (or should i run it from my desktop)\\n2. I ran it in the VM. Both the remote and local directory is showing the dropzone path. When i tried changing the remote directory to my local, im getting an error "Could not find the path specified".\\n\\nHow do i resolve this.\\n\\nThanks,\\nShank\", \"post_time\": \"2013-09-17 06:45:00\" },\n\t{ \"post_id\": 4554, \"topic_id\": 1008, \"forum_id\": 10, \"post_subject\": \"Re: How to spray multiple XML files.\", \"username\": \"shank\", \"post_text\": \"Hi Richard,\\nThanks for the info , will try it out and get back if I need help.\\n\\nRegards,\\nShank.\", \"post_time\": \"2013-09-11 05:48:35\" },\n\t{ \"post_id\": 4553, \"topic_id\": 1008, \"forum_id\": 10, \"post_subject\": \"Re: How to spray multiple XML files.\", \"username\": \"DSC\", \"post_text\": \"Loading data into the drop zone is a process performed outside HPCC, right? You could use sftp, for example to copy them from one location to another. In this case, 'another' is the HPCC drop zone. You could use a different tool, such as ftp, but I think you would have to install extra software (e.g. an ftp server) on the drop zone's node to make that work.\\n\\nWe do this all the time, and our tool of choice is sftp. More precisely, it's a Java sftp library for automated transfers and an sftp client when we copy stuff manually. sftp is supported out-of-the-box with the standard HPCC installation.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-09-10 12:24:52\" },\n\t{ \"post_id\": 4552, \"topic_id\": 1008, \"forum_id\": 10, \"post_subject\": \"Re: How to spray multiple XML files.\", \"username\": \"shank\", \"post_text\": \"hi Richard,\\nI was able to spray the files when i changed the srcfile="" location from a random location to "Dropzone location".\\nBut i had to manually load the files in the dropzone. I just need to know is there a way to bulk load the files into the dropzone.\\n\\n\\nRegards,\\nShank.\", \"post_time\": \"2013-09-10 12:16:16\" },\n\t{ \"post_id\": 4517, \"topic_id\": 1008, \"forum_id\": 10, \"post_subject\": \"Re: How to spray multiple XML files.\", \"username\": \"rtaylor\", \"post_text\": \"shank,\\n\\nOK, I'm confused. \\n\\nYour first post said, " it gives a success message, but the size of the sprayed record is found to be '0' in the ECL Watch." \\n\\nBut now you're saying, "the file do not get sprayed at all"\\n\\nSo, my questions are:\\n1. Does the spray operation say it succeeded? What exactly does it say?\\n2. Is there a listing in the Logical File Details page for the file that was just sprayed? \\n3. When you say, "In the ECL Watch the logical file size shows as 0 bytes" can you please post a screen shot the page of where you see this?\\nSo i would like to know what is wrong with the dfuplus command given by me.
I don't see anything wrong (unless you're spraying from a Windows box and got the slashes backwards on your srcfile path) -- hence my confusion.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-08-30 18:41:41\" },\n\t{ \"post_id\": 4513, \"topic_id\": 1008, \"forum_id\": 10, \"post_subject\": \"Re: How to spray multiple XML files.\", \"username\": \"shank\", \"post_text\": \"Richard,\\nJust to be clear , the file do not get sprayed at all . In the ECL Watch the logical file size shows as 0 bytes.\\nIn other words the logical file is empty.So i would like to know what is wrong with the dfuplus command given by me.\\n\\nThanks,\\nShank\", \"post_time\": \"2013-08-30 06:40:20\" },\n\t{ \"post_id\": 4509, \"topic_id\": 1008, \"forum_id\": 10, \"post_subject\": \"Re: How to spray multiple XML files.\", \"username\": \"rtaylor\", \"post_text\": \"shank,but the size of the sprayed record is found to be '0' in the ECL Watch.
Do you mean the number of records column in the Logical Files list? If so, then this never shows any value from spraying any variable-length record format (CSV or XML). The more definitive test would be to run this code and see if you get a result:fn := 'myfilename';\\nds := DATASET(fn,{string1 char},FLAT);\\n\\nds;
This code will show you the first 100 characters in the file (this technique works with any just-sprayed file). If you see data, then all you need to do is define the correct RECORD structure and DATASET declaration for the file and start working with it.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-08-29 13:42:28\" },\n\t{ \"post_id\": 4501, \"topic_id\": 1008, \"forum_id\": 10, \"post_subject\": \"How to spray multiple XML files.\", \"username\": \"shank\", \"post_text\": \"Got the given below syntax from one of the post in the FORUM.But when I tried executing on my machine it did not work. Actually the it gives a success message, but the size of the sprayed record is found to be '0' in the ECL Watch.\\n\\ndfuplus action=spray server=http://192.168.19.130:8010/ username=hpccdemo password=hpccdemo overwrite=1 replicate=1 nosplit=1 jobname=batchSpray srcip=10.87.175.176 srcfile=D:/HPCC/1.xml dstname=tutorial::sn::files dstcluster=mythor rowtag=doc
\\n\\nPlease let me know your suggestion/modifications(code).\", \"post_time\": \"2013-08-29 08:08:04\" },\n\t{ \"post_id\": 4505, \"topic_id\": 1011, \"forum_id\": 10, \"post_subject\": \"Sample WebService Client Code\", \"username\": \"shank\", \"post_text\": \"I have published an ECL query as a ROXIE WebService. I am able to invoke it from wsECL.But I need to invoke it from a C# Client and I am not sure how to do it. \\nIs there any documentation available to explain the same? or if someone can share a sample code , it would be good.\", \"post_time\": \"2013-08-29 11:22:52\" },\n\t{ \"post_id\": 4528, \"topic_id\": 1018, \"forum_id\": 10, \"post_subject\": \"Re: is their any ECL script or code for publishing query, in\", \"username\": \"bforeman\", \"post_text\": \"In ECL, Gordon Smith posted this a while back:\\n\\nOutRec1 := RECORD\\n string1 result;\\nEND;\\nraw := HTTPCALL('http://192.168.1.201:8010/WsWorkunits/WUPublishWorkunit?Wuid=W20130609-230229&JobName=yxy&Activate=1', 'GET', 'text/xml', OutRec1, onfail(skip));\\nraw;
\\n\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-09-04 13:54:31\" },\n\t{ \"post_id\": 4525, \"topic_id\": 1018, \"forum_id\": 10, \"post_subject\": \"is their any ECL script or code for publishing query, instea\", \"username\": \"karthikreddy\", \"post_text\": \"is their any ECL script or code for publishing query, instead of publishing manually??(for cron job results publishing)\", \"post_time\": \"2013-09-04 13:41:22\" },\n\t{ \"post_id\": 4576, \"topic_id\": 1022, \"forum_id\": 10, \"post_subject\": \"Re: RECORD structure with default values\", \"username\": \"ghalliday\", \"post_text\": \"Two suggestions:\\n\\n* You could define a transform to initialise the record, with defaults for all the parameters and then use that to initialise the dataset.\\n\\ne.g.,\\n\\ndataset([t(), t(1), t(,2)])\\n\\n* Add a jira issue to allow leading omitted parameters in an inline table.\", \"post_time\": \"2013-09-16 09:43:01\" },\n\t{ \"post_id\": 4550, \"topic_id\": 1022, \"forum_id\": 10, \"post_subject\": \"Re: RECORD structure with default values\", \"username\": \"rtaylor\", \"post_text\": \"Srini,\\n\\nYou should also look at the new-to-4.0 DEFAULT field modifier syntax (look at the Field Modifiers section of the RECORD structure documentation).\\n\\nHere's my version of your code:rec1 := RECORD\\n INTEGER x; \\n INTEGER y := 15;\\nEND;\\n\\nrec2 := RECORD\\n INTEGER x{DEFAULT(10)}; \\n INTEGER y{DEFAULT(15)};\\nEND;\\n\\nrec3 := RECORD\\n INTEGER x{DEFAULT(10)}; \\n INTEGER y{DEFAULT(15)};\\nEND;\\n\\nDATASET([{12}],Rec1); \\n\\nDATASET([],Rec2)[1]; // No curlies, reference rec 1\\n\\nDATASET(1,TRANSFORM(Rec3,SELF.y:=12,SELF := [])); \\n //using new DATASET(n,TRANSFORM) syntax
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-09-09 14:02:45\" },\n\t{ \"post_id\": 4548, \"topic_id\": 1022, \"forum_id\": 10, \"post_subject\": \"Re: RECORD structure with default values\", \"username\": \"bforeman\", \"post_text\": \"Hi guys,\\n\\nHere's one way to do it:\\n\\nrec1 := RECORD\\n INTEGER x; \\n INTEGER y;\\n END;\\n\\nxdef := 10;\\nydef := 15;\\t\\n\\nA:= DATASET([{12,ydef}],Rec1); // This works.. Apply given value for x while taking default for y\\nB:= DATASET([{xdef,ydef}],Rec1); // How do I make it take default for both?\\nC:= DATASET([{xdef,12}],Rec1); // How do I pass value for y alone and take default for x?\\n\\na;\\nb;\\nc;
\\n\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-09-06 15:12:11\" },\n\t{ \"post_id\": 4545, \"topic_id\": 1022, \"forum_id\": 10, \"post_subject\": \"Re: RECORD structure with default values\", \"username\": \"DSC\", \"post_text\": \"Duh. You're right, of course.\\n\\nYou have good questions. I look forward to someone providing good answers!\\n\\nDan\", \"post_time\": \"2013-09-05 20:47:20\" },\n\t{ \"post_id\": 4544, \"topic_id\": 1022, \"forum_id\": 10, \"post_subject\": \"Re: RECORD structure with default values\", \"username\": \"omnibuzz\", \"post_text\": \"For 2:\\nOmitting the braces would give me an empty record set. I need to have 1 record with default values. \\n\\nFor 3 Of course. But how would I solve for this?\\n
\\nrec1 := RECORD\\n INTEGER a; \\n INTEGER b:= 20;\\nEND;\\n\\nrec3 := RECORD(Rec1)\\n INTEGER x; \\n INTEGER y :=10;\\nEND;\\n\\nDATASET([{12,14}],Rec3);\\n
\", \"post_time\": \"2013-09-05 20:44:33\" },\n\t{ \"post_id\": 4541, \"topic_id\": 1022, \"forum_id\": 10, \"post_subject\": \"Re: RECORD structure with default values\", \"username\": \"DSC\", \"post_text\": \"For your second example, omit the braces:\\n\\nDATASET([],Rec2);
\\nI don't know how to solve the third example, short of reordering the fields to be like Rec1. That's a good reason for either putting fields with default values at the end or giving default values to all fields.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-09-05 15:50:32\" },\n\t{ \"post_id\": 4539, \"topic_id\": 1022, \"forum_id\": 10, \"post_subject\": \"RECORD structure with default values\", \"username\": \"omnibuzz\", \"post_text\": \"First command works. How do I accomplish 2 and 3.\\n\\n\\nrec1\\t := RECORD\\n\\tINTEGER x;\\t\\n\\tINTEGER y := 15;\\nEND;\\n\\nrec2\\t\\t:= RECORD\\n\\tINTEGER x := 10;\\t\\n\\tINTEGER y := 15;\\nEND;\\n\\nrec3\\t\\t:= RECORD\\n\\tINTEGER x := 10;\\t\\n\\tINTEGER y;\\nEND;\\n\\nDATASET([{12}],Rec1); // This works.. Apply given value for x while taking default for y\\nDATASET([{}],Rec2); // How do I make it take default for both?\\nDATASET([{12}],Rec3); // How do I pass value for y alone and take default for x?\\n
\", \"post_time\": \"2013-09-05 15:05:15\" },\n\t{ \"post_id\": 4572, \"topic_id\": 1025, \"forum_id\": 10, \"post_subject\": \"Re: How to assign a rank within a group?\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"Good point. Tried them all \", \"post_time\": \"2013-09-13 09:55:21\" },\n\t{ \"post_id\": 4570, \"topic_id\": 1025, \"forum_id\": 10, \"post_subject\": \"Re: How to assign a rank within a group?\", \"username\": \"DSC\", \"post_text\": \"I'm not familiar with the behavior you're describing with UNGROUP. I admit that I haven't used that particular feature extensively, though. Maybe someone with more knowledge can chime in on that.\\n\\nYour proposed approach to adding the counts seems entirely reasonable to me. I can think of a couple of other ways to do it, but I don't think they would be any better (and would likely just be worse, from a performance viewpoint). As long as you're performing a TABLE action, you might also want to add "total sales" into the mix. Maybe min/max as well. Just for grins, you know.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-09-12 12:31:06\" },\n\t{ \"post_id\": 4569, \"topic_id\": 1025, \"forum_id\": 10, \"post_subject\": \"Re: How to assign a rank within a group?\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"Thanks Dan, I'll remember that..\\n\\nHowever, it seems only UNGROUP does the magic here. If I remove that statement, I get an across-the-groups incrementally ordered list. \\n\\nTo make the problem more interesting, against each region record, I wanted to add one more field containing count(sales folks) for that region. How can I do this optimally? \\n\\nI was thinking of creating a TABLE containing just city and count(city) and then JOINing that with my original dataset to populate the count value. Am I thinking in the right direction or is there a better way to do it?\\n\\nGayathri\", \"post_time\": \"2013-09-12 12:17:43\" },\n\t{ \"post_id\": 4567, \"topic_id\": 1025, \"forum_id\": 10, \"post_subject\": \"Re: How to assign a rank within a group?\", \"username\": \"DSC\", \"post_text\": \"
1. What exactly does an UNGROUP do? I can't seem to come up with a scenario where we would want to remove grouping (I know we used it in my example but I am unable to understand its purpose there).
\\n\\nUNGROUP removes the grouping set on a recordset. It may not be needed for this example, since we only output the result from that grouping, but you would definitely want to do this if you needed to perform a different recordset operation against the all records. To completely make something up, if you wanted to assign a sequential, unique integer to each record then you would UNGROUP and then run the recordset through another PROJECT, making that assignment.\\n\\n2. I did a step-by-step OUTPUT of the sales persons code and I found that the counter reset happened only after the UNGROUP statement!! Why is this so? I expected it to happen after the PROJECT itself. However, that TRANSFORM only gave incremental numbers for the entire list
\\n\\nRemember that ECL is a declarative language. You're basically defining what you want to do, not how to do it. There is only a loose correlation between your ECL code and the final execution. I honestly don't know why you saw what you saw, but I've seen similar things in the past. Personally, I eventually just decided that if the final result is correct then the compiler apparently knew what it was doing. During my ECL learning curve I spent weeks trying to force the compiler into (what I thought was) optimization paths, as if I was programming in C++ or Java. That turned out to be a Very Bad Idea. Honestly, within certain bounds, the more naive you approach a problem in ECL, the better the result, performance-wise. That was something of a revelation to me.\\n\\nHope this helps!\\n\\nDan\", \"post_time\": \"2013-09-12 11:18:27\" },\n\t{ \"post_id\": 4566, \"topic_id\": 1025, \"forum_id\": 10, \"post_subject\": \"Re: How to assign a rank within a group?\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"Thanks Dan It sure worked like a charm.. \\n\\nA couple of questions:\\n1. What exactly does an UNGROUP do? I can't seem to come up with a scenario where we would want to remove grouping (I know we used it in my example but I am unable to understand its purpose there). \\n\\n2. I did a step-by-step OUTPUT of the sales persons code and I found that the counter reset happened only after the UNGROUP statement!! Why is this so? I expected it to happen after the PROJECT itself. However, that TRANSFORM only gave incremental numbers for the entire list\\n\\nGayathri\", \"post_time\": \"2013-09-12 03:40:41\" },\n\t{ \"post_id\": 4556, \"topic_id\": 1025, \"forum_id\": 10, \"post_subject\": \"Re: How to assign a rank within a group?\", \"username\": \"DSC\", \"post_text\": \"In general, GROUP is the right function for this task. The Programmer's Guide has a section that explains GROUP and shows some other examples. The key to this is that operations against a grouped recordset operate on each subset independently, automatically. The PROJECT, below, has a COUNTER that resets every time the field(s) on which you've grouped changes.\\n\\nHere is one way to do what you're looking for:\\n\\n
// Structure to hold our data\\nSalesRec := RECORD\\n STRING1 salesRegion; // A-C\\n UNSIGNED1 salesRepID; // >100\\n UNSIGNED4 sales; // Positive large integer\\n UNSIGNED2 salesRank := 0; // 1=best; assigned later\\nEND;\\n\\n// Create sample data\\nSalesRec MakeSalesData(UNSIGNED1 c) := TRANSFORM\\n SELF.salesRegion := (>STRING1<)(RANDOM() % 3 + 65);\\n SELF.salesRepID := 100 + c;\\n SELF.sales := RANDOM();\\nEND;\\n\\nsalesData := DATASET(50,MakeSalesData(COUNTER));\\n\\n// Group records by region\\nd1 := SORT(salesData,salesRegion);\\nd2 := GROUP(d1,salesRegion);\\n\\n// Sort within region by sales, descending\\nd3 := SORT(d2,-sales);\\n\\n// Assign rank\\nd4 := PROJECT\\n (\\n d3,\\n TRANSFORM\\n (\\n SalesRec,\\n SELF.salesRank := COUNTER,\\n SELF := LEFT\\n )\\n );\\n\\n// Undo group and re-sort to show rank within region\\nd5 := UNGROUP(d4);\\nd6 := SORT(d5,salesRegion,salesRank);\\n\\n// Show result\\nOUTPUT(d6);
\\nCheers,\\n\\nDan\", \"post_time\": \"2013-09-11 12:35:47\" },\n\t{ \"post_id\": 4555, \"topic_id\": 1025, \"forum_id\": 10, \"post_subject\": \"How to assign a rank within a group?\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"Take this scenario - I have a list of sales persons and their sales data. \\n\\nFor each region, I need to group sales persons belonging to that region and assign them a rank based on their sales data. What function would I use here? \\n\\nI sorted/grouped the list and then at the point of assigning rank, I got stuck. I contemplated using NORMALIZE but how can I pass COUNT(GROUP) to it to specify the number of iterations post which the counter should reset? Also, how would I specify to it that the COUNT(GROUP) should operate on Region?\", \"post_time\": \"2013-09-11 11:50:26\" },\n\t{ \"post_id\": 4587, \"topic_id\": 1029, \"forum_id\": 10, \"post_subject\": \"Re: CRC Failure\", \"username\": \"bforeman\", \"post_text\": \"OK, well if you can share any more information to help us find the reason, that would be great.\\n\\nWhat are the datasets? What type of files?\\n\\nSample code?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-09-17 12:24:41\" },\n\t{ \"post_id\": 4586, \"topic_id\": 1029, \"forum_id\": 10, \"post_subject\": \"Re: CRC Failure\", \"username\": \"gopi\", \"post_text\": \"Yes, Join statement is local...\", \"post_time\": \"2013-09-17 12:00:01\" },\n\t{ \"post_id\": 4579, \"topic_id\": 1029, \"forum_id\": 10, \"post_subject\": \"Re: CRC Failure\", \"username\": \"bforeman\", \"post_text\": \"After you DISTRIBUTE, is your JOIN LOCAL?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-09-16 17:53:29\" },\n\t{ \"post_id\": 4568, \"topic_id\": 1029, \"forum_id\": 10, \"post_subject\": \"CRC Failure\", \"username\": \"gopi\", \"post_text\": \"Hi,\\n\\nwhile apply join from the distribute of two dataset, the below error noticed.\\nError: System error: 10115: Graph[1], diskread[2]: SLAVE IP Address: CRC Failure having read file: .....path......._43_of_100\\n\\nplease help me any one\\nAdvance Thanks\\n\\n--\\nJoy\", \"post_time\": \"2013-09-12 11:34:37\" },\n\t{ \"post_id\": 4574, \"topic_id\": 1030, \"forum_id\": 10, \"post_subject\": \"Re: Is there any function to fetch Current Date or System da\", \"username\": \"rtaylor\", \"post_text\": \"Try this:IMPORT STD;\\nSTD.Date.Today;
There is a Date standard library that contains a large number of date handling functions.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-09-15 15:32:17\" },\n\t{ \"post_id\": 4571, \"topic_id\": 1030, \"forum_id\": 10, \"post_subject\": \"Is there any function to fetch Current Date or System date?\", \"username\": \"sprvs\", \"post_text\": \"[color=#000080:2a60jm46]Is there any available function to fetch the Current Date or the System date in ECL.\\nRight now we are getting the current date hard-coded and using it to make a computation between two dates. But to make it more dynamic we just need to fetch the current date without hard-coding, so that it will be more optimal.\\n\\nThanks,\\nSprvs\", \"post_time\": \"2013-09-13 04:45:53\" },\n\t{ \"post_id\": 4623, \"topic_id\": 1033, \"forum_id\": 10, \"post_subject\": \"Re: TRANSFORM: Accessing result of previous rows\", \"username\": \"rtaylor\", \"post_text\": \"And my approach was using PROCESS (sort of an "ITERATE on steroids" function, which is IMO the kind of thing you need) with a nested child dataset in the one-record rhs dataset I was using, but I ran into some runtime problems that I will be reporting in JIRA. I will update this thread once I've submitted the issue.\\n\\nRichard\", \"post_time\": \"2013-09-20 18:49:25\" },\n\t{ \"post_id\": 4614, \"topic_id\": 1033, \"forum_id\": 10, \"post_subject\": \"Re: TRANSFORM: Accessing result of previous rows\", \"username\": \"srbhkmr\", \"post_text\": \"I tried using LOOP construct also but using that I think I need to use ROWS(LEFT) in a sub-query of loopbody, which again is not supported!\\n\\n\\nFurther description of my problem:\\nI've a graph like following:\\n\\n\\nrepresented by a dataset of RECORD type:\\n\\n
rec := RECORD\\n INTEGER x; // nodeId\\n INTEGER z; // reference(directParentId)\\nEND;\\n
\\nx z\\n1 -1\\n2 -2\\n3 -3\\n4 1\\n5 1\\n6 2\\n7 3\\n8 3\\n9 4\\n10 5\\n11 6\\n12 7\\n13 9
\\n\\nFurther I want to 'flatten' this up and assign each node the id of topmost(root) parent of it. i.e. I want to process it into following dataset:\\nx z\\n1 -1\\n2 -2\\n3 -3\\n4 -1\\n5 -1\\n6 -2\\n7 -3\\n8 -3\\n9 -1\\n10 -1\\n11 -2\\n12 -3\\n13 -1
\\n\\nHow can I achieve this from ECL code?\\n\\nFunctions I've tried:\\nProject- TRANSFORM\\nITERATE\\nLOOP\\nbut neither of above seem to have been designed to accomplish this, or am I missing something?\", \"post_time\": \"2013-09-20 04:55:08\" },\n\t{ \"post_id\": 4597, \"topic_id\": 1033, \"forum_id\": 10, \"post_subject\": \"Re: TRANSFORM: Accessing result of previous rows\", \"username\": \"srbhkmr\", \"post_text\": \"Thanks Richard for your quick reply. \\nI'm afraid what I'm looking for is a bit different than this.\\n\\nHere's exactly what I'm trying to achieve:\\n\\nGiven a RECORD definition like:\\nrec := RECORD\\n INTEGER x; //RowId\\n INTEGER z; //referenceValue\\nEND;\\n
\\nand for example, corresponding dataset like:\\n\\nx z\\n1 -1\\n2 1\\n3 -1\\n4 2\\n5 4\\n6 -1\\n7 3\\n8 6\\n9 7\\n. .\\n\\n
\\nIn this RECORD definition each rowId x has a reference value z which corresponds to a reference to another row's Id. Except the ones' with values z = -1, which are start of a new Section.\\n\\nNote: reference values z (except for z=-1) for any rowId x are always referring back to some previously defined rows only. i.e. The condition z < x always hold true in my dataset.\\n\\nNow I want to assign each row the corresponding 'SectionId'. The new transformed RECORD definition looks like:\\n\\n//Transformed Definition\\nRec := RECORD\\n STRING x; //RowId\\n STRING z; //SectionId\\nEND;\\n
\\n\\nFor this, first I sort the initial dataset so that all 'SectionStarts' (z = -1) accumulate in the beginning.\\nThen, in the TRANSFORM I can assign them unique SectionId's starting from 1, 2, 3... with the help of COUNTER.\\n\\nIn the same go I also want to assign other entries to their corresponding SectionId's. So, that ultimately my dataset transforms into:\\n\\n\\nx z\\n1 1\\n3 2\\n6 3\\n2 1 \\n4 1\\n5 1\\n7 2\\n8 3\\n9 2\\n. .\\n
\\n\\nThis can happen easily only if I've access to results of all previous transformed rows in my TRANSFORM definition.\\n\\nThe ECL code that I've is as following:\\n\\nrec := RECORD\\n INTEGER x; //RowId\\n INTEGER z; //reference\\nEND;\\n\\nD := DATASET([{1, -1},{2, 1},{3, -1},{4, 2}, {5, 4}, {6, -1}, {7, 3}, {8, 6}, {9, 7}],rec);\\n\\nD1 := SORT(D, z);\\n\\nrec F1(rec L, INTEGER C) := TRANSFORM\\n SELF.x := L.x;\\n SELF.z := IF(L.z = -1, C, L.z);\\nEND;\\n\\nD2 := PROJECT(D1, F1(LEFT, COUNTER));\\nD2;\\n
\\n\\nThe line:\\n\\n\\nSELF.z := IF(L.z = -1, C, L.z);\\n\\n// Instead of L.z (in else clause) there has to be some map like code from where I can access the corresponding sectionId's as per the referenceValue.\\n
\\nneeds to provide a mapping which can only be obtained if I've access to already processed results of previous rows.\\n\\nThanks,\", \"post_time\": \"2013-09-17 19:42:19\" },\n\t{ \"post_id\": 4595, \"topic_id\": 1033, \"forum_id\": 10, \"post_subject\": \"Re: TRANSFORM: Accessing result of previous rows\", \"username\": \"rtaylor\", \"post_text\": \"srbhkmr,\\n\\nUnless I misunderstood what you're trying to accomplish, what you describe is exactly what ITERATE does. Here's simple example:Rec := RECORD\\n STRING x;\\n STRING y;\\n STRING z;\\nEND;\\nD := DATASET([{'a','b',''},{'c','d',''},{'e','f',''},{'g','h',''}],rec);\\n\\nrec XF(rec L, rec R) := TRANSFORM\\n SELF.z := L.z + R.x + R.y;\\n SELF := R;\\nEND;\\n\\nres := ITERATE(D,XF(LEFT,RIGHT));\\nres;
The result of this code is this dataset:a\\tb\\tab\\nc\\td\\tabcd\\ne\\tf\\tabcdef\\ng\\th\\tabcdefgh\\n
So, is this anything like what you are trying to accomplish? Or is there a specific "map kind of structure" that you need?\\n\\nHTH,\\n\\nRichard\\nRichard\", \"post_time\": \"2013-09-17 15:37:35\" },\n\t{ \"post_id\": 4592, \"topic_id\": 1033, \"forum_id\": 10, \"post_subject\": \"Re: TRANSFORM: Accessing result of previous rows\", \"username\": \"srbhkmr\", \"post_text\": \"Thanks for the reply, Dan. ITERATE, as far as I can see, can be useful only if we want to do simple aggregation kind of operations while maintaining some 'summary' in a previous row.\\nBut in my case I want to maintain a map kind of structure for all the predecessor rows. so, that for the current row I can just look up that map/table and determine the value to be filled in.\", \"post_time\": \"2013-09-17 14:24:10\" },\n\t{ \"post_id\": 4591, \"topic_id\": 1033, \"forum_id\": 10, \"post_subject\": \"Re: TRANSFORM: Accessing result of previous rows\", \"username\": \"DSC\", \"post_text\": \"Have you looked at the ITERATE function? I don't know if that would satisfy your needs, but it may be worth a look.\\n\\nDan\", \"post_time\": \"2013-09-17 13:20:40\" },\n\t{ \"post_id\": 4583, \"topic_id\": 1033, \"forum_id\": 10, \"post_subject\": \"TRANSFORM: Accessing result of previous rows\", \"username\": \"srbhkmr\", \"post_text\": \"While defining a PROJECT-TRANSFORM definition my code logic expects the results of previous row transforms. How can I access results of previous rows within a TRANSFORM definition?\\n\\nPutting the problem more concretely:\\nSuppose dataset D has attributes x,y,z and for each row of D I'm writing a TRANSFORM to populate values of z. Value of z for a row actually is defined in terms of previously evaluated values of z's in preceding rows. How do I achieve this? \\n\\nIs TRANSFORM the right construct to use in ECL here? I was thinking of using ROWS() somehow in TRANSFORM definition but found out that it can't be used in a TRANSFORM.\\n\\nAny pointers are welcome.\\nThanks,\", \"post_time\": \"2013-09-17 08:21:18\" },\n\t{ \"post_id\": 4637, \"topic_id\": 1034, \"forum_id\": 10, \"post_subject\": \"Re: A very simple join fails\", \"username\": \"ghalliday\", \"post_text\": \"This looks like it is an instance of JIRA issue HPCC-9509. \\n\\nAs far as I can see it has been there a long time. The fix is released to the sources that will be used for the 4.2 build.\\n\\nThe work around is to add ,ALL to the join.\", \"post_time\": \"2013-09-25 12:48:37\" },\n\t{ \"post_id\": 4611, \"topic_id\": 1034, \"forum_id\": 10, \"post_subject\": \"Re: A very simple join fails\", \"username\": \"jeeves\", \"post_text\": \"Thank You!\", \"post_time\": \"2013-09-19 09:59:21\" },\n\t{ \"post_id\": 4593, \"topic_id\": 1034, \"forum_id\": 10, \"post_subject\": \"Re: A very simple join fails\", \"username\": \"rtaylor\", \"post_text\": \"jeeves,\\n\\nAnd the error also went away when I changed your code to this:TweetRec := RECORD \\n\\n STRING word;\\n INTEGER id;\\n \\nEND;\\n\\nDS := DATASET([\\n{'Coke',1},\\n{'Coke',1},\\n{'Coke',1},\\n{'Coke',1},\\n{'Coke',1},\\n{'Pepsi',1},\\n{'Pepsi',1},\\n{'Pepsi',1},\\n{'Pepsi',1},\\n{'Pepsi',1},\\n{'Pepsi',1},\\n{'Pepsi',1},\\n{'Pepsi',1},\\n{'India',1},\\n{'India',1},\\n{'India',1},\\n{'India',1},\\n{'India',1},\\n{'India',1},\\n{'India',1},\\n{'India',1},\\n{'India',1},\\n{'India',1},\\n{'BMW',2},\\n{'BMW',2},\\n{'BMW',2},\\n{'BMW',2},\\n{'BMW',2},\\n{'BMW',2},\\n{'BMW',2},\\n{'BMW',2},\\n{'Ford',3},\\n{'Ford',3},\\n{'Ford',3},\\n{'Ford',3},\\n{'Ford',3},\\n{'Ford',3},\\n{'Ford',3},\\n{'Ford',3}\\n],TweetRec);\\n\\nTweetsData := DISTRIBUTE(DS);\\n\\n\\nresult1 := DEDUP(TweetsData(word='Pepsi'),id,ALL);\\nresult2 := DEDUP(TweetsData(word='Coke'),id,ALL);\\n\\n\\nfinalResult := JOIN(result1,result2,LEFT.id=RIGHT.id);\\n\\nfinalResult;
Strange behavior indeed. I'll submit a JIRA issue.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-09-17 14:42:15\" },\n\t{ \"post_id\": 4589, \"topic_id\": 1034, \"forum_id\": 10, \"post_subject\": \"Re: A very simple join fails\", \"username\": \"jeeves\", \"post_text\": \"This error went away after I replaced the inline dataset with a CSV file backed dataset.\", \"post_time\": \"2013-09-17 12:48:46\" },\n\t{ \"post_id\": 4588, \"topic_id\": 1034, \"forum_id\": 10, \"post_subject\": \"A very simple join fails\", \"username\": \"jeeves\", \"post_text\": \"This code fails rather strangely. I tested this in enterprise_3.10.4-1 and community_4.0.0-rc14. Is this a bug or am i missing something?\\n\\nTweetRec := RECORD \\n\\n\\tSTRING word;\\n\\tINTEGER id;\\n\\t\\t\\nEND;\\n\\nTweetsData := DATASET([{'Coke',1},{'Pepsi',1},{'India',1},{'BMW',2},{'Ford',3}],TweetRec);\\n\\n\\n\\nresult1 := DEDUP(TweetsData(word='Pepsi'),id,ALL);\\nresult2 := DEDUP(TweetsData(word='Coke'),id,ALL);\\n\\n\\nfinalResult := JOIN(result1,result2,LEFT.id=RIGHT.id);\\n\\nfinalResult;\\n
\\n\\nThis fails with the following error\\n\\n\\nError: Compile/Link failed for W20130917-123043 (see '//10.254.93.188/var/lib/HPCCSystems/myeclccserver/eclcc.log' for details) (0, 0), 3000, W20130917-123043\\nWarning: JOIN condition folded to constant, converting to an ALL join (16, 16), 4531, D:\\\\dev\\\\hpcc\\\\examples\\\\BugExample.ecl\\nWarning: (0, 0), 0, \\nWarning: ---------- compiler output -------------- (0, 0), 0, \\nWarning: W20130917-123043_1.cpp: In function ‘IHThorArg* fAc3()’: (0, 0), 0, \\nWarning: W20130917-123043_1.cpp:30:52: error: cannot allocate an object of abstract type ‘cAc3’ (0, 0), 0, \\nWarning: W20130917-123043_1.cpp:18:8: note: because the following virtual functions are pure within ‘cAc3’: (0, 0), 0, \\nWarning: /opt/HPCCSystems/componentfiles/cl/include/eclhelper.hpp:1584:24: note: \\tvirtual ICompare* IHThorJoinBaseArg::queryCompareRight() (0, 0), 0, \\nWarning: /opt/HPCCSystems/componentfiles/cl/include/eclhelper.hpp:1585:24: note: \\tvirtual ICompare* IHThorJoinBaseArg::queryCompareLeft() (0, 0), 0, \\nWarning: /opt/HPCCSystems/componentfiles/cl/include/eclhelper.hpp:1588:24: note: \\tvirtual ICompare* IHThorJoinBaseArg::queryCompareLeftRight() (0, 0), 0, \\nWarning: (0, 0), 0, \\nWarning: --------- end compiler output ----------- (0, 0), 0,
\", \"post_time\": \"2013-09-17 12:39:42\" },\n\t{ \"post_id\": 4607, \"topic_id\": 1035, \"forum_id\": 10, \"post_subject\": \"Re: BEGINC++ and throwing exceptions.\", \"username\": \"flavio\", \"post_text\": \"Allan,\\n\\nIf you want to experiment to see how exceptions are handled for embedded C++ code, you could call rtlFail(), which is what the generated code does when it encounters a FAIL() construct.\\n\\nThere are also examples in the C++ code that handles the Embedded Java code (https://github.com/hpcc-systems/HPCC-Platform/blob/master/plugins/javaembed/javaembed.cpp), so you can see some code examples there.\\n\\nPlease let me know if this helps.\\n\\nThanks,\\n\\nFlavio\", \"post_time\": \"2013-09-18 13:11:19\" },\n\t{ \"post_id\": 4606, \"topic_id\": 1035, \"forum_id\": 10, \"post_subject\": \"Re: BEGINC++ and throwing exceptions.\", \"username\": \"bforeman\", \"post_text\": \"Hi Allan,\\n\\nJust to be clear, you are embedding C++ in ECL, and in your C++ embedding error checking? \\n\\nDid you know that you can look at the entire C++ generated by the ECL? Just set in your Debug the following switch:\\n\\nSaveCPPTempFiles=1
\\n\\nMaking an educated guess, I would think that any error thrown by C++ will also be handled by the Dali messaging queue.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-09-18 13:02:54\" },\n\t{ \"post_id\": 4590, \"topic_id\": 1035, \"forum_id\": 10, \"post_subject\": \"BEGINC++ and throwing exceptions.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nThis may be documented, if so please just point me to it, but I can't find a mapping between a 'throw' of a c++ exception and the way/where it's caught in ECL.\\nIn fact any information on this whole area would be very useful.\\n\\nmany thanks\\n\\nAllan\", \"post_time\": \"2013-09-17 13:17:33\" },\n\t{ \"post_id\": 4625, \"topic_id\": 1039, \"forum_id\": 10, \"post_subject\": \"Re: Fixed Width Layouts and Grouping Records Together.\", \"username\": \"rtaylor\", \"post_text\": \"Timothy,\\n\\nYou might also be able to use IFBLOCK in your RECORD structure (http://hpccsystems.com/community/docs/ecl-language-reference/html/record-structure).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-09-21 08:27:13\" },\n\t{ \"post_id\": 4622, \"topic_id\": 1039, \"forum_id\": 10, \"post_subject\": \"Re: Fixed Width Layouts and Grouping Records Together.\", \"username\": \"bforeman\", \"post_text\": \"Hi Timothy,\\n\\nTake a look at the DENORMALIZE function, it is designed to group related fields into a single record based on a matching condition.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-09-20 18:09:08\" },\n\t{ \"post_id\": 4613, \"topic_id\": 1039, \"forum_id\": 10, \"post_subject\": \"Fixed Width Layouts and Grouping Records Together.\", \"username\": \"tspowell-AU\", \"post_text\": \"I have a flat file that contains fixed width rows but the rows can be different layout based on an identifier at the beginning of the row. I’ve seen examples of reading in the file using IFBLOCK, but my question has to do with how can I take a group of records and make them one record?\\n\\nIf the identifiers are A1, B1, B2, C1, D1, A1, … where A1 contains a unique key and B1, B2, C1, D1 should be group with the A1 that precedes them but they do not contain the unique key how can I group them all together in one record?\\n\\nThanks.\\nTimothy\", \"post_time\": \"2013-09-19 20:51:26\" },\n\t{ \"post_id\": 4669, \"topic_id\": 1040, \"forum_id\": 10, \"post_subject\": \"Re: How to create an Index after making a Parse operation\", \"username\": \"sprvs\", \"post_text\": \"Thanks Richard. That was helpful.\\n\\nSprvs\", \"post_time\": \"2013-09-30 10:09:56\" },\n\t{ \"post_id\": 4650, \"topic_id\": 1040, \"forum_id\": 10, \"post_subject\": \"Re: How to create an Index after making a Parse operation\", \"username\": \"rtaylor\", \"post_text\": \"Sprvs,\\n\\nDICTIONARY is brand new to the language, while INDEX has been around a long time. An INDEX is a file on disk, just as a DATASET is a file on disk, and the two work together as an ISAM file system. A DICTIONARY is not a file at all, but more like an indexed recordset in memory. Beyond that, maybe someone who knows more can educate us both.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-09-26 15:25:55\" },\n\t{ \"post_id\": 4647, \"topic_id\": 1040, \"forum_id\": 10, \"post_subject\": \"Re: How to create an Index after making a Parse operation\", \"username\": \"sprvs\", \"post_text\": \"Thanks Richard.\\nwe are using DICTIONARY instead of INDEX. So is it means that DICTIONARY is better than INDEX. \\nwhat is the difference between INDEX & DICTIONARY..???\\nIn which scenarios can then both be used. It would be helpful if you can throw some light on this.\\n\\nThanks,\\nSprvs\", \"post_time\": \"2013-09-26 08:31:42\" },\n\t{ \"post_id\": 4642, \"topic_id\": 1040, \"forum_id\": 10, \"post_subject\": \"Re: How to create an Index after making a Parse operation\", \"username\": \"rtaylor\", \"post_text\": \"Sprvs,\\n\\nYou can have multiple fields in your DICTIONARY index, or you could just define multiple DICTIONARYs for the same recordset, with different index fields. Either way -- your choice.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-09-25 18:57:47\" },\n\t{ \"post_id\": 4627, \"topic_id\": 1040, \"forum_id\": 10, \"post_subject\": \"Re: How to create an Index after making a Parse operation\", \"username\": \"sprvs\", \"post_text\": \"Thanks Richard. DICTIONARY looks good for this scenario.\\nOne more clarification. I can understand from the DICTIONARY syntax that its not possible to use two different fields for indexing. Is there any other way to handle indexing using two or more Variable length fields.\\n\\nRegards,\\nSprvs\", \"post_time\": \"2013-09-23 12:16:37\" },\n\t{ \"post_id\": 4624, \"topic_id\": 1040, \"forum_id\": 10, \"post_subject\": \"Re: How to create an Index after making a Parse operation\", \"username\": \"rtaylor\", \"post_text\": \"Try looking at using a DICTIONARY (new on 4.0 release) instead of an INDEX.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-09-21 08:24:09\" },\n\t{ \"post_id\": 4615, \"topic_id\": 1040, \"forum_id\": 10, \"post_subject\": \"How to create an Index after making a Parse operation\", \"username\": \"sprvs\", \"post_text\": \"I make the below operations:\\n1.Creating a Rocord Structure to hold a paragraph\\n2.Creating a Dataset applying the above Record Structure.\\n3.Creating a PARSE operation using the above Dataset and the Record Structure.\\n\\nNow I want to apply INDEX to the PARSE output. But I'm not sure how to create an INDEX on a Recordset (all the variations of INDEX opration in the HPCC manual shows the use of a dataset and not a recordset. I'm referring the below manual : http://hpccsystems.com/community/docs/e ... html/index)\\n\\nAlso, the fpos pointer gets created on the dataset for all variations of INDEX operation in the HPCC manual. How do i create the pointer on a recordset instead of a dataset?\\n\\nI tried creating a dataset from my recordset; but i am not sure how to create a pointer and am not sure how to create an index on my dataset without using a logical file.\\n\\nPlease advise.\\n\\nThis is my code:\\n\\n\\n//record structre to hold the page contents\\nRecPara := RECORD \\nSTRING10000 Para {XPATh('paragraph')};\\nEND;\\n\\n//map the page contents to a dataset applying the record structure above\\nSetPara := DATASET ('~tutorial::VS::bjp_modi',{RecPara, UNSIGNED8 Recfps{virtual(fileposition)}},XML('feed/doc/info'));\\n\\n//tokenize the page into the individual words, capturing the fullstops and punctuations like commas separately\\nTOKEN Word := PATTERN('(\\\\\\\\.|[^ ]+?(?=; |, | |\\\\\\\\. |\\\\\\\\.))');\\n\\nWordStruct := RECORD\\n STRING WordsInPage := MATCHTEXT(Word);\\nEND;\\n\\n//calculate the total number of sentencs\\nWordSet := PARSE(SetPara, Para, Word, WordStruct, SCAN);\\n\\n/*IndexedWords := INDEX(SetPara, {WordsInPage,Recfps},'~tutorial::VS::bjp_modi'); \\nBuildwords := BUILDINDEX(IndexedWords);*/\\n\\nNumberOfSentences := count( WordSet(WordSet.WordsInPage='.'));\\nNumberOfSentences;\\n\\n//fetch all the words without any extra characters (thereby the puncutations will not add to the word count)\\nWords := WordSet(WordSet.WordsInPage<>'.', WordSet.WordsInPage<>',', WordSet.WordsInPage<>';');\\nWords;\\n\\n//calculate the Total number of words\\nNumberOfWords := count(Words);\\nNumberOfWords;\\n\\n//sort and group the words list\\nSortedWords := sort(Words,Words.WordsInPage);\\n\\nGroupedWordsStruct :=\\n{\\n\\tSortedWords.WordsInPage;\\n\\tCOUNT(GROUP);\\n};\\n\\n//frame the group count in a table\\t\\t\\nGroupedWords := TABLE(SortedWords, GroupedWordsStruct, SortedWords.WordsInPage);\\nGroupedWords;\\n\\n//pull the list of unique words\\nUniqueWords := dedup(Words, ALL);\\n\\n//calculate the number of unique words in the page\\ncount(UniqueWords);\\n
\", \"post_time\": \"2013-09-20 10:47:13\" },\n\t{ \"post_id\": 4638, \"topic_id\": 1045, \"forum_id\": 10, \"post_subject\": \"Re: Landing-zone on a NFS?\", \"username\": \"DSC\", \"post_text\": \"We use NAS storage for some of our projects. What we do is mount the NAS on the system that sponsors the landing zone, then create a symlink from within the landing zone, pointing to a directory on the NAS.\\n\\nThe only downside of that is if the mount point ever goes stale then you may run into problems with any scheduled monitor jobs (STD.File.MonitorFile). I'm actually investigating that issue right now.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-09-25 12:52:20\" },\n\t{ \"post_id\": 4636, \"topic_id\": 1045, \"forum_id\": 10, \"post_subject\": \"Landing-zone on a NFS?\", \"username\": \"srbhkmr\", \"post_text\": \"What all file-systems does Dali support? Can our landing zone be on a NFS?\\n\\nThanks,\", \"post_time\": \"2013-09-25 09:21:17\" },\n\t{ \"post_id\": 4683, \"topic_id\": 1050, \"forum_id\": 10, \"post_subject\": \"Re: R code fails with a standard library exception[Server 4.\", \"username\": \"omnibuzz\", \"post_text\": \"Bob- I just verified that it does not work in 4.0.0 version either.\\n-Srini\", \"post_time\": \"2013-09-30 17:21:02\" },\n\t{ \"post_id\": 4681, \"topic_id\": 1050, \"forum_id\": 10, \"post_subject\": \"Re: R code fails with a standard library exception[Server 4.\", \"username\": \"bforeman\", \"post_text\": \"Did this test code work in the last version? If so, you probably should open up an issue in the Issue Tracker.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-09-30 14:24:50\" },\n\t{ \"post_id\": 4664, \"topic_id\": 1050, \"forum_id\": 10, \"post_subject\": \"R code fails with a standard library exception[Server 4.0.2]\", \"username\": \"omnibuzz\", \"post_text\": \"When I run the code below, this is the error that I get:\\n\\nError: standard library exception (std::exception Error evaluating: read.csv(File);) (0, 0)\\n\\nIMPORT STD;\\nIMPORT R;\\n\\nds1 := DATASET(10000,TRANSFORM({INTEGER num},SELF.Num := counter),DISTRIBUTED);\\n\\nstring read(varstring File) := EMBED(R)\\n\\tread.csv(File);\\nENDEMBED;\\n\\nfile := '~thor::number1.csv';\\noutFile := OUTPUT(ds1,,file, CSV(HEADING('',''), SEPARATOR(','), TERMINATOR('\\\\n')));\\nphyFile := STD.System.Thorlib.LogicalToPhysical(file);\\nFileText := read(phyFile);\\n\\nSEQUENTIAL(Outfile,phyfile,filetext);\\n
\", \"post_time\": \"2013-09-29 00:54:11\" },\n\t{ \"post_id\": 4679, \"topic_id\": 1054, \"forum_id\": 10, \"post_subject\": \"Re: How to use NOCASE to make a pattern match case insensiti\", \"username\": \"bforeman\", \"post_text\": \"The way I read the docs, the NOCASE is applied during the actual search, but does not affect the actual result. To do what you want, you can simply modify the search result in the PARSE record output, like this:\\n\\n WordStruct := RECORD\\n STRING WordsInPage :=STD.Str.ToUpperCase(MATCHTEXT(Word));\\n END;
\\n\\nAfter that, your cross-tab report will reflect the correct word count.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-09-30 14:21:07\" },\n\t{ \"post_id\": 4670, \"topic_id\": 1054, \"forum_id\": 10, \"post_subject\": \"How to use NOCASE to make a pattern match case insensitive\", \"username\": \"sprvs\", \"post_text\": \"Hi,\\n\\nMy code has the below sequences:\\nI'm getting a sentence of data as input and performing a PARSE operation on it. Using PARSE I'm fetching all the words in that sentence, displaying them without any special characters and getting the COUNT of them. To make the pattern match in PARSE case-insensitive, I'm using NOCASE. But still after using the NOCASE the grouped output shows a same word with different Cases as two different occurences. But NOCASE should take it as case-insensitive and should give the COUNT of that word as 2 after making the GROUP operation. Thereby NOCASE is not working here for some reason. Pls clarify.\\n\\nThanks,\\nSprvs\\n\\nRecPara := RECORD\\nSTRING Para; \\nEND;\\n\\nParagraph :='HPCC (High Performance Computing Cluster) is a massive parallel-processing computing platform that solves Big Data problems. The Platform is Open Source! ';\\n\\n//Map the page contents to a dataset applying the record structure above\\nSetPara := DATASET ([Paragraph],RecPara);\\n\\n//Tokenize the page into the individual words, capturing the fullstops and punctuations like commas separately\\nTOKEN Word := PATTERN('(\\\\\\\\.|[^ ]+?(?=; |, | |\\\\\\\\. |\\\\\\\\.))');\\n\\nWordStruct := RECORD\\n STRING WordsInPage := MATCHTEXT(Word);\\nEND;\\n\\n//Calculate the total number of sentencs\\nWordSet := (PARSE(SetPara, Para, Word, WordStruct, SCAN, NOCASE));\\n\\n//Fetch all the words without any extra characters (thereby the puncutations will not add to the word count)\\nWords := WordSet(WordSet.WordsInPage<>'.', WordSet.WordsInPage<>',', WordSet.WordsInPage<>';');\\nOUTPUT (Words,NAMED('Words'));\\n\\n//Sort and group the words list\\nSortedWords := sort(Words,Words.WordsInPage);\\n\\nGroupedWordsStruct :=\\n{\\n\\tSortedWords.WordsInPage;\\n\\tWordCount := COUNT(GROUP);\\n};\\n\\n//Frame the group count in a table\\t\\t\\nGroupedWords := TABLE(SortedWords, GroupedWordsStruct, SortedWords.WordsInPage);\\nOUTPUT(GroupedWords, NAMED('GroupedWords'));\\n
\", \"post_time\": \"2013-09-30 10:21:54\" },\n\t{ \"post_id\": 4685, \"topic_id\": 1058, \"forum_id\": 10, \"post_subject\": \"Re: STD.File.ExternalLogicalFileName\", \"username\": \"rtaylor\", \"post_text\": \"Shank,\\n\\nBefore you can spray a file to the VM it must first be in the VM's landing zone (AKA dropzone), and you get it there by using the Upload/Download file menu selection on the ECL Watch page (or using something like winscp if the file is > 2Gb). Once the file is there, then the spray functions will work.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-09-30 18:28:23\" },\n\t{ \"post_id\": 4675, \"topic_id\": 1058, \"forum_id\": 10, \"post_subject\": \"Re: STD.File.ExternalLogicalFileName\", \"username\": \"shank\", \"post_text\": \"I also tried using STD.File.SprayFixed but i got the similar errors.\\n.\\nimport std;\\n\\nSTD.File.SprayFixed( '10.87.95.50' , 'C:\\\\\\\\Users\\\\\\\\saipadmarajen_v\\\\\\\\Desktop\\\\\\\\modi.xml' , 8192, 'DESTINE', 'IN::MyFile' , -1\\n, '_http://192.168.246.129:8010/FileSpray' );\\n\\n
\\n\\nError: Too many parameters passed to function sprayvariable (expected 14) (357, 268), 2061, D:\\\\Program Files\\\\HPCC Systems\\\\HPCC\\\\ver_3_6\\\\share\\\\ecllibrary\\\\std\\\\File.ecl\\nError: Too many parameters passed to function fsprayvariable (expected 14) (375, 269), 2061, D:\\\\Program Files\\\\HPCC Systems\\\\HPCC\\\\ver_3_6\\\\share\\\\ecllibrary\\\\std\\\\File.ecl\\nError: Object 'std' does not have a member named 'file' (3, 5), 2171, C:\\\\Users\\\\saipadmarajen_v\\\\AppData\\\\Local\\\\Temp\\\\TFRCEE5.tmp\\nError: Unknown identifier "File" (3, 5), 2167, C:\\\\Users\\\\saipadmarajen_v\\\\AppData\\\\Local\\\\Temp\\\\TFRCEE5.tmp\\nError: Object 'std' does not have a member named 'sprayfixed' (3, 10), 2171, C:\\\\Users\\\\saipadmarajen_v\\\\AppData\\\\Local\\\\Temp\\\\TFRCEE5.tmp\\n
\\n\\nEspecially this particular error raises my curiosity. What would this mean:\\n\\n\\nError: Object 'std' does not have a member named 'file' (3, 5), 2171, C:\\\\Users\\\\saipadmarajen_v\\\\AppData\\\\Local\\\\Temp\\\\TFRCEE5.tmp\\n
\\n\\nPls advise.\\nRegards,\\nShank\", \"post_time\": \"2013-09-30 11:54:59\" },\n\t{ \"post_id\": 4674, \"topic_id\": 1058, \"forum_id\": 10, \"post_subject\": \"STD.File.ExternalLogicalFileName\", \"username\": \"shank\", \"post_text\": \"I was recently looking at an ecl code which was used to spray files from ECL IDE itself in this linkhttps://hpccsystems.com/bb/viewtopic.php?f=8&t=893. I have a question on the method STD.File.ExternalLogicalFileName.\\nDoes the above method take care of both uploading and spraying or it is just used to spray the files.\\n\\nI am using a HPCC VM on a WIndows 7 machine. I tried running this command in ECL:\\n\\n\\nIMPORT STD;\\nRecordStruct := RECORD\\n STRING nameofthefile;\\n STRING XMLtxt;\\nEND;\\n\\nsourceMachineIp := '10.87.175.176';\\nsourceMachineFile := '/D$/HPCC/ERFE/CleansedKeys/';\\nohWrkDs := DATASET(STD.File.ExternalLogicalFileName(sourceMachineIp, sourceMachineFile), RecordStruct, FLAT);\\nOUTPUT(ohWrkDs);\\n
\\n\\nBut the above code did not work on my machine. got the below errors\\n[b]Error[/b]: Too many parameters passed to function sprayvariable (expected 14) (357, 268), 2061, C:\\\\Program Files\\\\HPCC Systems\\\\HPCC\\\\ver_3_6\\\\share\\\\ecllibrary\\\\std\\\\File.ecl\\n\\n[b]Error[/b]: Too many parameters passed to function fsprayvariable (expected 14) (375, 269), 2061, C:\\\\Program Files\\\\HPCC Systems\\\\HPCC\\\\ver_3_6\\\\share\\\\ecllibrary\\\\std\\\\File.ecl\\n\\n[b]Error[/b]: Object 'std' does not have a member named 'file' (9, 24), 2171, C:\\\\Users\\\\shankar_narayanan01\\\\AppData\\\\Local\\\\Temp\\\\TFRFF67.tmp\\n
\\n\\nPlease advise why I am getting these errors.\\nRegards,\\nShank\", \"post_time\": \"2013-09-30 11:47:38\" },\n\t{ \"post_id\": 4736, \"topic_id\": 1059, \"forum_id\": 10, \"post_subject\": \"Re: Get all the physical file parts of a logical file\", \"username\": \"omnibuzz\", \"post_text\": \"@Richard - I am trying to read the files locally within a Java program. The potential with having that ability is endless. I can explain better when I have a demo. \\n@Jake - That is amazing news. Thanks for the quick turn around.\\nCheers\\nSrini\", \"post_time\": \"2013-10-07 13:43:38\" },\n\t{ \"post_id\": 4733, \"topic_id\": 1059, \"forum_id\": 10, \"post_subject\": \"Re: Get all the physical file parts of a logical file\", \"username\": \"jsmith\", \"post_text\": \"Looks like a LogicalToPhysical has been broken in Thor since OSS day1 (show how little it's used). For some unknown reason there was a single line missing, causing the reply from master to slave to go awol.\\n\\nWe'll target 4.0.4 for the fix.\", \"post_time\": \"2013-10-04 14:35:15\" },\n\t{ \"post_id\": 4713, \"topic_id\": 1059, \"forum_id\": 10, \"post_subject\": \"Re: Get all the physical file parts of a logical file\", \"username\": \"rtaylor\", \"post_text\": \"Srini,\\n\\nGlad it will work for you, but why do you need it?\\n\\nRichard\", \"post_time\": \"2013-10-01 13:10:40\" },\n\t{ \"post_id\": 4690, \"topic_id\": 1059, \"forum_id\": 10, \"post_subject\": \"Re: Get all the physical file parts of a logical file\", \"username\": \"omnibuzz\", \"post_text\": \"You rock!! I guess that should work for most of the default cases. Thanks for the code. I should be able to proceed for now. I have anyways created a bug in JIRA for it not running from THOR.\\nRegards\\nSrini\", \"post_time\": \"2013-10-01 01:24:47\" },\n\t{ \"post_id\": 4689, \"topic_id\": 1059, \"forum_id\": 10, \"post_subject\": \"Re: Get all the physical file parts of a logical file\", \"username\": \"rtaylor\", \"post_text\": \"Srini,\\nWhat I am trying to accomplish is to get the list of all the file parts for a logical file as a dataset
\\nThen just run this code on hThor and you're done:\\nIMPORT STD;\\n\\nfile := '~RTTEST::temp::testfile';\\ndir := STD.File.GetLogicalFileAttribute(file,'directory');\\nparts := (integer)STD.File.GetLogicalFileAttribute(file,'numparts');\\nSetWords := STD.str.splitwords(file,'::');\\nnam := SetWords[COUNT(SetWords)];\\n\\nds2 := DATASET(parts,\\n TRANSFORM({STRING fil},\\n SELF.fil := dir + '/' + nam + '._' + COUNTER +'_of_' + parts));\\nds2;
\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-09-30 20:16:56\" },\n\t{ \"post_id\": 4688, \"topic_id\": 1059, \"forum_id\": 10, \"post_subject\": \"Re: Get all the physical file parts of a logical file\", \"username\": \"omnibuzz\", \"post_text\": \"Thanks, Richard. I think you summed it up right. I am getting the same error when I split it as 2 workunits. \\n\\nWhat I am trying to accomplish is to get the list of all the file parts for a logical file as a dataset. I will raise the ticket in JIRA.\\nRegards\\nSrini\", \"post_time\": \"2013-09-30 19:28:15\" },\n\t{ \"post_id\": 4686, \"topic_id\": 1059, \"forum_id\": 10, \"post_subject\": \"Re: Get all the physical file parts of a logical file\", \"username\": \"rtaylor\", \"post_text\": \"Srini,\\n\\nThe problem to report in JIRA is that the LogicalToPhysical function appears to only work on hthor. \\n\\nI just ran your code to create the file (on a 3-node Thor) as a separate workunit, then tried just the LogicalToPhysical function and it failed telling me that "number of parts in file ~RTTEST::temp::testfile does not match cluster size 1" --that's the problem here.\\n\\nSo, what exactly are you trying to accomplish with this code?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-09-30 18:55:48\" },\n\t{ \"post_id\": 4684, \"topic_id\": 1059, \"forum_id\": 10, \"post_subject\": \"Get all the physical file parts of a logical file\", \"username\": \"omnibuzz\", \"post_text\": \"The code below works in HTHOR, but does not work in THOR. It runs forever. \\n\\nIMPORT STD;\\nds1 := DATASET(10000,TRANSFORM({INTEGER num},SELF.Num := counter),DISTRIBUTED);\\nfile := '~thor::temp::testfile';\\noutFile := OUTPUT(ds1,,file, CSV(HEADING('',''), SEPARATOR(','), TERMINATOR('\\\\n')),OVERWRITE);\\nds2 := DATASET(STD.System.Thorlib.Nodes(),TRANSFORM({STRING file},SELF.file := STD.System.Thorlib.LogicalToPhysical(file)),DISTRIBUTED);\\nSEQUENTIAL(outFile,OUTPUT(ds2));\\n
\", \"post_time\": \"2013-09-30 17:25:28\" },\n\t{ \"post_id\": 4714, \"topic_id\": 1062, \"forum_id\": 10, \"post_subject\": \"Re: How to use DICTIONARY...?\", \"username\": \"rtaylor\", \"post_text\": \"sprvs,I'm trying to use DICTIONARY for indexing a set of records. \\n\\nI have a question on how to use it when compared to INDEX.
The two are completely different. As I said in this prior thread http://hpccsystems.com/bb/viewtopic.php?f=10&t=1040&sid=71fc53bcccb84d5db369273d63efc3bb:An INDEX is a file on disk, just as a DATASET is a file on disk, and the two work together as an ISAM file system. A DICTIONARY is not a file at all, but more like an indexed recordset in memory.
Perhaps you should just explain exactly what you're trying to accomplish and then we can discuss which form would be better for your specific use case.In INDEX, we just create the INDEX attribute and build it using BUILDINDEX; and all filters on the recordset (containing the indexed field(s) ) and GROUP BY would automatically use INDEX thereby improving performance.\\n\\n1. Is the same applicable for DICTIONARY too?
No. A DICTIONARY is simply defined and has no physical representation on disk, so it does not need to be built.2. Will DICTIONARY indexes be automatically used when a filter or GROUP BY is used on the field(s) that were indexed using DICTIONARY? or should we explicitly use [] to use the dictionary attribute on the contrary?
You need to specify using the DICTIONARY wherever you want to use it.3. Does DICTIONARY give a performance boost to queries (like INDEX does)?
I would expect so, but you could do some A/B testing to find out in your particular use case.4. Overall, is DICTIONARY better than INDEX? or are they meant to meet slightly different requirements/scenarios?
They are different.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-10-01 13:36:56\" },\n\t{ \"post_id\": 4695, \"topic_id\": 1062, \"forum_id\": 10, \"post_subject\": \"How to use DICTIONARY...?\", \"username\": \"sprvs\", \"post_text\": \"Hi,\\n\\nI'm trying to use DICTIONARY for indexing a set of records. \\n\\nI have a question on how to use it when compared to INDEX. In INDEX, we just create the INDEX attribute and build it using BUILDINDEX; and all filters on the recordset (containing the indexed field(s) ) and GROUP BY would automatically use INDEX thereby improving performance.\\n\\n1. Is the same applicable for DICTIONARY too?\\n2. Will DICTIONARY indexes be automatically used when a filter or GROUP BY is used on the field(s) that were indexed using DICTIONARY? or should we explicitly use [] to use the dictionary attribute on the contrary?\\n3. Does DICTIONARY give a performance boost to queries (like INDEX does)?\\n4. Overall, is DICTIONARY better than INDEX? or are they meant to meet slightly different requirements/scenarios?\\n\\nPlease explain. The ECL language reference does not seem to contain details on these points.\\n\\nThanks\\nsprvs\", \"post_time\": \"2013-10-01 09:04:21\" },\n\t{ \"post_id\": 4755, \"topic_id\": 1066, \"forum_id\": 10, \"post_subject\": \"Re: ECL - Java - contextClassLoader is not set\", \"username\": \"HPCC Staff\", \"post_text\": \"Thank you for sharing with us the error and workaround. Could you please also create a ticket regarding this issue here? https://track.hpccsystems.com\\n\\nMany thanks!\", \"post_time\": \"2013-10-11 14:49:08\" },\n\t{ \"post_id\": 4743, \"topic_id\": 1066, \"forum_id\": 10, \"post_subject\": \"ECL - Java - contextClassLoader is not set\", \"username\": \"rphpcc\", \"post_text\": \"We have been doing a POC to call java code from ECL code. \\n\\nOur java code uses multiple open source / third party java libraries. At some point during this integration, our ECL code stated throwing - java.lang.NoClassDefFoundError. We spent weeks trying to figure out what the heck is going on. It turns out that the contextClassLoader is null.\\n\\nThis error showed up while creating an instance of Jboss Drool's specific classes.\\n\\nThe following code fixed it. \\nif(Thread.currentThread().getContextClassLoader() == null) {\\n\\t\\t\\t\\tThread.currentThread().setContextClassLoader(String.class.getClassLoader());\\n\\t\\t\\t\\t\\n\\t\\t\\t}\", \"post_time\": \"2013-10-09 15:39:52\" },\n\t{ \"post_id\": 4772, \"topic_id\": 1075, \"forum_id\": 10, \"post_subject\": \"Re: WHEN and BEFORE\", \"username\": \"omnibuzz\", \"post_text\": \"Thanks Bob. I have created the JIRA ticket.\\n\\nhttps://track.hpccsystems.com/browse/HPCC-10243\\n\\nRegards\\nSrini\", \"post_time\": \"2013-10-16 18:20:29\" },\n\t{ \"post_id\": 4771, \"topic_id\": 1075, \"forum_id\": 10, \"post_subject\": \"Re: WHEN and BEFORE\", \"username\": \"bforeman\", \"post_text\": \"Hi Srini,\\n\\nI believe you have found a bug and will need to log this in the Issue Tracker. For the record, I searched our dataland cluster and could find no ECL code that uses BEFORE with WHEN, so that's probably why no one has discovered it until now. \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-10-16 16:51:47\" },\n\t{ \"post_id\": 4770, \"topic_id\": 1075, \"forum_id\": 10, \"post_subject\": \"WHEN and BEFORE\", \"username\": \"omnibuzz\", \"post_text\": \"I want to use the following construct within a function\\nWHEN(trigger, action [ BEFORE | SUCCESS ] )\\n\\nI want the action to execute before the function returns value. I am not sure how the syntax goes.\\nI tried the following:\\n\\nRETURN WHEN(Return_value, MyAction BEFORE);// Does not work.. Unknown identifier\\nRETURN WHEN(Return_value, MyAction, BEFORE);// Does not work either.. \\n\\nThis works by the way.. So, there was no problem with the attributes I used to my knowledge.\\n \\nRETURN WHEN(Return_value,MyAction); \\n\\nRegards\\nSrini\", \"post_time\": \"2013-10-16 14:55:53\" },\n\t{ \"post_id\": 4788, \"topic_id\": 1076, \"forum_id\": 10, \"post_subject\": \"Re: loading a node with a list of child nodes\", \"username\": \"rtaylor\", \"post_text\": \"Shank,
Thanks Bob, we were looking for the revision details to be displayed in individual fields but the above code displays all the revision details in a single field. \\nIs there an option to seggregate the details from the single revision details field into seperate fields.
Bob's code should already be giving you the revision details as separate fields in the "childrev" nested child dataset. \\n\\nIs the "single field" you're seeing displaying xml text? If so, you just need to look at the result through ECL Watch -- the ECL IDE cannot properly display nested child datasets, so it just shows you the xml and gives you a link to the ECL Watch page to view the results properly formatted by xslt templtes.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-10-18 15:46:16\" },\n\t{ \"post_id\": 4781, \"topic_id\": 1076, \"forum_id\": 10, \"post_subject\": \"Re: loading a node with a list of child nodes\", \"username\": \"shank\", \"post_text\": \"Hi,\\nThanks Bob, we were looking for the revision details to be displayed in individual fields but the above code displays all the revision details in a single field. \\nIs there an option to seggregate the details from the single revision details field into seperate fields.\\n\\nRegards,\\nShank\", \"post_time\": \"2013-10-18 07:19:07\" },\n\t{ \"post_id\": 4775, \"topic_id\": 1076, \"forum_id\": 10, \"post_subject\": \"Re: loading a node with a list of child nodes\", \"username\": \"bforeman\", \"post_text\": \"I just whipped this up:\\n\\nlayout_revision := RECORD\\n REAL id;\\n STRING revisiontext;\\nEND;\\n\\nlayout_page := RECORD\\n UNSIGNED8 id;\\n STRING15 title;\\n STRING25 author;\\n DATASET(layout_revision) childrev{xpath('revision'),maxCount(10)};\\nEND;\\n\\nds := DATASET('~CLASS::BMF::ForumPost',layout_page,XML('root/page'));\\nOUTPUT(ds);
\\n\\nThe only change I made to your sample XML is that I wrapped the whole thing around a "root" tag, like this:\\n\\n<root>\\n <page>\\n <id>1</id>\\n <title>Title 1</title>\\n <author>Author 1</author>\\n\\n <revision>\\n <id>1.1</id>\\n <revisiontext>this is revision 1 of page 1</revisiontext>\\n </revision>\\n\\n <revision>\\n <id>1.2</id>\\n <revisiontext>this is revision 2 of page 1</revisiontext>\\n </revision>\\n\\n </page>\\n\\n <page>\\n <id>2</id>\\n <title>Title 2</title>\\n <author>Author 2</author>\\n\\n <revision>\\n <id>2.1</id>\\n <revisiontext>this is revision 1 of page 2</revisiontext>\\n </revision>\\n\\n <revision>\\n <id>2.2</id>\\n <revisiontext>this is revision 2 of page 2</revisiontext>\\n </revision>\\n\\n </page>\\n</root>
\\n\\nJust spray the file using the XML option and "page" as the row tag and remember that the XPATH node contents are case sensitive.\\n\\nHope this helps!\\n\\nBob\", \"post_time\": \"2013-10-17 12:19:04\" },\n\t{ \"post_id\": 4774, \"topic_id\": 1076, \"forum_id\": 10, \"post_subject\": \"Re: loading a node with a list of child nodes\", \"username\": \"bforeman\", \"post_text\": \"Start by looking at using XPATH in the RECORD structure with a nested child DATASET. There are good examples in the Language Reference and also I think in the Programmer's Guide.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-10-17 11:58:34\" },\n\t{ \"post_id\": 4773, \"topic_id\": 1076, \"forum_id\": 10, \"post_subject\": \"loading a node with a list of child nodes\", \"username\": \"shank\", \"post_text\": \"I was looking at an XML that had a list of page elements and each page elements had elements like id ,title , author and multiple revisions. Each revision had an id and text. The sample xml is given below.\\n<page>\\n <id>1</id>\\n <title>Title 1</title>\\n <author>Author 1</author>\\n\\n <revision>\\n <id>1.1</id>\\n <revisiontext>this is revision 1 of page 1</revisiontext>\\n </revision>\\n\\n <revision>\\n <id>1.2</id>\\n <revisiontext>this is revision 2 of page 1</revisiontext>\\n </revision>\\n\\n</page>\\n\\n<page>\\n <id>2</id>\\n <title>Title 2</title>\\n <author>Author 2</author>\\n\\n <revision>\\n <id>2.1</id>\\n <revisiontext>this is revision 1 of page 2</revisiontext>\\n </revision>\\n\\n <revision>\\n <id>2.2</id>\\n <revisiontext>this is revision 2 of page 2</revisiontext>\\n </revision>\\n\\n</page>\\n
\\n\\nI want to load it in one of the two data models that is shown in the file attached.\\n\\n[attachment=0:14o1vst4]Data Models.PNG\\n\\n\\nRegards,\\nShank\", \"post_time\": \"2013-10-17 08:25:54\" },\n\t{ \"post_id\": 4790, \"topic_id\": 1077, \"forum_id\": 10, \"post_subject\": \"Re: INDEPENDENT within a function?\", \"username\": \"ghalliday\", \"post_text\": \"I have added a bit more of a comment to that issue.\\n\\nAlso, in 4.2 I have added a new keyword ORDERED. It is similar to SEQUENTIAL that it should ensure the order is preserved, but it doesn't have the extra semantic of preventing values from being reused. I need to finish testing it, and produce some documentation.\", \"post_time\": \"2013-10-21 08:58:31\" },\n\t{ \"post_id\": 4778, \"topic_id\": 1077, \"forum_id\": 10, \"post_subject\": \"Re: INDEPENDENT within a function?\", \"username\": \"omnibuzz\", \"post_text\": \"Thank you, Bob. It's the exact same issue that I have. I see that it's not resolved yet. I will reach out to Gavin and get an update.\\n\\nCheers\\nSrini\", \"post_time\": \"2013-10-17 13:39:41\" },\n\t{ \"post_id\": 4777, \"topic_id\": 1077, \"forum_id\": 10, \"post_subject\": \"Re: INDEPENDENT within a function?\", \"username\": \"bforeman\", \"post_text\": \"Hi Srini,\\n\\nThere was a similar post a while back with Dan Camper, Richard Taylor, and Gavin. You might want to follow up with Gavin and post your feedback here.\\n\\nHere is the link:\\n\\nhttp://hpccsystems.com/bb/viewtopic.php?f=8&t=536&hilit=persistent&sid=224136529f1423496ab17ba79997ca28\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-10-17 12:41:02\" },\n\t{ \"post_id\": 4776, \"topic_id\": 1077, \"forum_id\": 10, \"post_subject\": \"INDEPENDENT within a function?\", \"username\": \"omnibuzz\", \"post_text\": \"Let me illustrate what I want to achieve using the code below:\\n\\nIMPORT STD;\\n\\nFn1 := FUNCTION\\n a:= STD.System.Debug.msTick();//:INDEPENDENT;\\n Act := SEQUENTIAL(OUTPUT(a),STD.System.Debug.Sleep(1000),OUTPUT(a));\\nRETURN WHEN(TRUE,Act);\\nEND;\\n\\nSEQUENTIAL(Fn1,Fn1,Fn1,Fn1,Fn1,Fn1,Fn1); \\n
\\n\\nIn the above function,I want the value of "a" to remain a constant within the function scope but to change with each function call. \\nIf I run the above function as is, it returns a different value for each output within the function.. \\nIf I run is with the "INDEPENDENT" uncommented, "a" has the same value for all the function calls. \\nWhat do I do?\\n-Srini\", \"post_time\": \"2013-10-17 12:25:29\" },\n\t{ \"post_id\": 4892, \"topic_id\": 1078, \"forum_id\": 10, \"post_subject\": \"Re: EMBED(R) - std::exception Error evaluating...\", \"username\": \"richardkchapman\", \"post_text\": \"Thanks for the report. It appears that the R interpreter does not like carriage-return characters in the code it is asked to evaluate, and ECL saved by / submitted by the ECL IDE tends to have them in (code submitted from Eclipse or the ECL playground tends not to).\\n\\nThis issue has been addressed in release 4.2.0, which will be released (hopefully) next week.\", \"post_time\": \"2013-11-07 09:59:12\" },\n\t{ \"post_id\": 4789, \"topic_id\": 1078, \"forum_id\": 10, \"post_subject\": \"Re: EMBED(R) - std::exception Error evaluating...\", \"username\": \"balajisampath\", \"post_text\": \"Thank you Joe,\\n\\nRaised JIRA ticket \\nhttps://track.hpccsystems.com/browse/HPCC-10260\", \"post_time\": \"2013-10-18 17:00:45\" },\n\t{ \"post_id\": 4787, \"topic_id\": 1078, \"forum_id\": 10, \"post_subject\": \"Re: EMBED(R) - std::exception Error evaluating...\", \"username\": \"joe.chambers\", \"post_text\": \"Interesting -- I just tried it and it appears there may be a bug. If you have an account for the bug tracker please post it if not let me know and I'll post it. Most of my R work has been external to the IDE. It appears the compiler for the IDE doesn't support the extended syntax.\\n\\nThis works\\n\\nIMPORT R;\\ninteger add() := EMBED(R,'10+10;');\\nadd();\\n
\\n\\nThis does not\\n\\nIMPORT R;\\ninteger add() := EMBED(R)\\n10 + 10;\\nENDEMBED;\\n\\nadd();\\n
\", \"post_time\": \"2013-10-18 14:48:01\" },\n\t{ \"post_id\": 4786, \"topic_id\": 1078, \"forum_id\": 10, \"post_subject\": \"Re: EMBED(R) - std::exception Error evaluating...\", \"username\": \"balajisampath\", \"post_text\": \"It works in ECL playground not in IDE. I had IMPORT R statement in my code earlier also.\\n\\nBelow is the version I am using:\\n\\nVersion:\\t\\tcommunity_4.0.2-1\\nServer:\\t\\tenterprise-with-plugins_4.0.0-9[tags/enterprise_4.0.0-9-0-g514941]\\nCompiler:\\t4.0.2 community_4.0.2-1\\n\\n\\nComplete error message in IDE:\\nError: standard library exception (std::exception Error evaluating: \\nval+1) (0, 0), 0,\", \"post_time\": \"2013-10-18 14:27:00\" },\n\t{ \"post_id\": 4785, \"topic_id\": 1078, \"forum_id\": 10, \"post_subject\": \"Re: EMBED(R) - std::exception Error evaluating...\", \"username\": \"joe.chambers\", \"post_text\": \"I was able to run your code (with adding the IMPORT R; to the top)\\n\\n\\nIMPORT R;\\ninteger add1(integer val) := EMBED(R)\\n1 + 1;\\nENDEMBED;\\n\\nadd1(10);\\n
\\n\\nThis also works:\\n\\n\\nIMPORT R;\\n\\ninteger add1(integer val) := EMBED(R)\\nval+1\\nENDEMBED;\\n\\nadd1(10);\\n
\\n\\nWhat server version are your running and can you try running the sample posted above and post the full error you get. Also try running the code through the eclplayground in eclwatch, as this is how I am testing it and it could just be an issue with the IDE.\", \"post_time\": \"2013-10-18 13:58:14\" },\n\t{ \"post_id\": 4784, \"topic_id\": 1078, \"forum_id\": 10, \"post_subject\": \"Re: EMBED(R) - std::exception Error evaluating...\", \"username\": \"balajisampath\", \"post_text\": \"Thanks Joe for looking into this.\\n\\nStill getting the same error while using all lower case variables.\\n\\nI also tried without variables and got same error\\n\\ninteger add1(integer val) := EMBED(R)\\n1 + 1;\\nENDEMBED;\\n\\nadd1(10);
\", \"post_time\": \"2013-10-18 13:41:02\" },\n\t{ \"post_id\": 4783, \"topic_id\": 1078, \"forum_id\": 10, \"post_subject\": \"Re: EMBED(R) - std::exception Error evaluating...\", \"username\": \"joe.chambers\", \"post_text\": \"Try using all lower case for your variable VAL.\\n\\n\\ninteger add1(integer val) := EMBED(R)\\nval+1;\\nENDEMBED;\\n\\nadd1(10);\\n
\\n\\nThis is noted in the known limitations, when I first tried it I ran into the same problem.\\n\\nhttp://hpccsystems.com/download/free-co ... ns#4.0.0-1\\n\\nGive that a try and if it still doesn't work let me know and I'll dig a little deeper.\", \"post_time\": \"2013-10-18 13:29:12\" },\n\t{ \"post_id\": 4779, \"topic_id\": 1078, \"forum_id\": 10, \"post_subject\": \"EMBED(R) - std::exception Error evaluating...\", \"username\": \"balajisampath\", \"post_text\": \"I am trying to run simple R code using EMBED(R) and getting error \\n"Error: standard library exception (std::exception Error evaluating: VAL+1;) (0, 0), 0,"\\n\\nIDE version: 4.0.2\\nTried in both THOR and HTHOR\\n\\n\\ninteger add1(integer VAL) := EMBED(R)\\nVAL+1;\\nENDEMBED;\\n\\nadd1(10);
\\n\\nI am able to run the below R code but, I cannot pass parameter as it only allows static code\\n\\nIMPORT R;\\ninteger add() := EMBED(R,'10+10;');\\nadd();\\n
\\n\\nIs there any other way I can run R code with parameters?\", \"post_time\": \"2013-10-17 20:01:07\" },\n\t{ \"post_id\": 4825, \"topic_id\": 1082, \"forum_id\": 10, \"post_subject\": \"Re: Understanding superfile transactions\", \"username\": \"omnibuzz\", \"post_text\": \"This is an interesting approach I think we have a way now to do what I want.\\nThanks for the code!\\nCheers\\nSrini\", \"post_time\": \"2013-10-24 13:27:34\" },\n\t{ \"post_id\": 4820, \"topic_id\": 1082, \"forum_id\": 10, \"post_subject\": \"Re: Understanding superfile transactions\", \"username\": \"rtaylor\", \"post_text\": \"Srini,\\n\\nHere's some more test code:
IMPORT * from TrainingYourname;\\nIMPORT STD;\\nA1 := ASSERT(FALSE,'IT FAILED!!');\\nA2 := ASSERT(FALSE,'IT SUCCEEDED!!');\\nSTD.File.AddSuperFile(SF.Daily,'~ecltraining::in::namephonesupd3') : FAILURE(A1);
\\nWhen I ran this version, the workunit failed (because the superfile already contained a subfile with that name), and the FAILURE called my ASSERT, which added its "IT FAILED" message to the workunit.\\n\\nThen I ran this code:IMPORT * from TrainingYourname;\\nIMPORT STD;\\nA1 := ASSERT(FALSE,'IT FAILED!!');\\nA2 := ASSERT(FALSE,'IT SUCCEEDED!!');\\nSTD.File.AddSuperFile(SF.Daily,'~ecltraining::in::namephonesupd4') : SUCCESS(A2);
\\nThis one succeeded (since the superfile did not yet have this subfile) and the SUCCESS called my ASSERT, which added its "IT SUCCEEDED" message to the successful workunit.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-10-23 16:11:47\" },\n\t{ \"post_id\": 4816, \"topic_id\": 1082, \"forum_id\": 10, \"post_subject\": \"Re: Understanding superfile transactions\", \"username\": \"rtaylor\", \"post_text\": \"Srini,\\n\\nOK, here's a piece of code I just adapted from the Advanced Thor class (where we work with Superfiles):IMPORT * from TrainingYourname;\\nIMPORT STD;\\nInCnt := STD.File.GetSuperFileSubCount(SF.Daily) : STORED('InCnt');\\nSEQUENTIAL(STD.File.StartSuperFileTransaction(),\\n STD.File.AddSuperFile(SF.Daily,'~ecltraining::in::namephonesupd2'),\\n // STD.File.AddSuperFile(SF.Daily,'~ecltraining::in::namephonesupd3'),\\n STD.File.FinishSuperFileTransaction(),\\n ASSERT(InCnt <> STD.File.GetSuperFileSubCount(SF.Daily),'Counts are EQUAL'));
\\nI ran this code and the STORED InCnt value was 0 and it ran successfully. Remember that STORED calculates once, at a global scope, and shows you the value in the workunit. NB that the ASSERT is inside the SEQUENTIAL and I did put the AddSuperFile in a transaction frame.\\n\\nI then changed the code to this and re-ran it:IMPORT * from TrainingYourname;\\nIMPORT STD;\\nInCnt := STD.File.GetSuperFileSubCount(SF.Daily) : STORED('InCnt');\\nSEQUENTIAL(STD.File.StartSuperFileTransaction(),\\n // STD.File.AddSuperFile(SF.Daily,'~ecltraining::in::namephonesupd2'),\\n STD.File.AddSuperFile(SF.Daily,'~ecltraining::in::namephonesupd3'),\\n STD.File.FinishSuperFileTransaction(),\\n ASSERT(InCnt <> STD.File.GetSuperFileSubCount(SF.Daily),'Counts are EQUAL'));
\\nNow the STORED InCnt value was 1 and it again ran successfully. In neither run did the ASSERT "kick in" but if the AddSuperFile had failed, it would have.\\n\\nThe docs for AddSuperFile say, "This function may be included in a superfile transaction, but is not required to be." so I expect that simply doing a single AddSuperFile is transactionally safe, but wrapping it in a transaction frame doesn't hurt. Remember, all we're really doing here is updating metadata in the DFU, so either way the operation will be done quickly.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-10-23 14:07:15\" },\n\t{ \"post_id\": 4808, \"topic_id\": 1082, \"forum_id\": 10, \"post_subject\": \"Re: Understanding superfile transactions\", \"username\": \"omnibuzz\", \"post_text\": \"Richard - The contrived example was to illustrate a problem that I have for a more complex scenario. I however would agree to what you propose for the scenario I gave. And thanks for confirming it \\n\\nLet me put in a different way the problem I am facing.\\n \\nI have a super file SF1, and I want to add a sub file to it. I output a dataset to a subfile and try to add to the super file and if AddSuperFile fails, then I want to delete the file that I outputted, because it's an orphan file for which I don't have a handle.\\n \\nNow, OUTPUT function does not participate in a super file transaction.\\n\\nWhat I am expecting is a callback mechanism to which I can register a function that will be called as part of the finishSuperfileTransaction akin to a destructor in OOP, where I can do the clean up of Non-superfile operations. \\n\\nI would also like to know if there is just one SF operation like AddSuperFile, do I need to enclose that in a transaction scope or is all SF atomic operaions by default transactionally safe.\\n\\nCheers\\nSrini\", \"post_time\": \"2013-10-22 22:53:27\" },\n\t{ \"post_id\": 4799, \"topic_id\": 1082, \"forum_id\": 10, \"post_subject\": \"Re: Understanding superfile transactions\", \"username\": \"rtaylor\", \"post_text\": \"Srini,\\n\\nThe two purposes of transaction framing are:\\n
The way I have always thought about working with superfiles (and HPCC in general) tells me that your fundamental premise is flawed, because you want to overwrite an existing dataset that's used in a superfile. This is not the way I would approach the problem. \\n\\nInstead of overwriting an existing dataset with newer data, what you should do is always write the new data to a new dataset (PRIMARY RULE #1 -- NEVER THROW ANYTHING AWAY). This gives you two distinct advantages:
Once you have determined that all is well with the new data rollout, you can then get rid of the old dataset if you need to free up space.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-10-22 15:07:36\" },\n\t{ \"post_id\": 4798, \"topic_id\": 1082, \"forum_id\": 10, \"post_subject\": \"Understanding superfile transactions\", \"username\": \"omnibuzz\", \"post_text\": \"Let me illustrate the problem with a contrived scenario.\\n\\nI have created a subfile and added it to a super file. Now, I want to overwrite the sub file, with a rollback if it fails. \\n\\nHere is my approach:\\n1. This creates the super file and adds the sub file to it, within a transaction and everything looks good. \\n\\n// Run 1\\nIMPORT STD;\\nds1 := DATASET(100000,TRANSFORM({INTEGER num},SELF.Num := counter),DISTRIBUTED);\\n\\nSTD.File.CreateSuperFile('SF1');\\n\\nSEQUENTIAL(\\n OUTPUT(ds1,,'SUBF1'), \\n STD.File.StartSuperFileTransaction(),\\n STD.File.AddSuperFile('SF1','SUBF1'),\\n STD.File.FinishSuperFileTransaction()\\n);\\n
\\n\\n2. I am trying to overwrite the subfile.. and it fails, which is understandable.\\n\\n// Run 2\\nIMPORT STD;\\nds1 := DATASET(100020,TRANSFORM({INTEGER num},SELF.Num := counter),DISTRIBUTED);\\n\\nOUTPUT(ds1,,'SUBF1',OVERWRITE);\\n
\\nERROR: -1: System error: -1: Graph[1], detach: Cannot remove file thor::subf1 as owned by SuperFile(s): thor::sf1\\n\\n3.I try to clear the super file and overwrite the sub file and reattach it to the super file\\n\\n// Run 3\\nIMPORT STD;\\nds1 := DATASET(100020,TRANSFORM({INTEGER num},SELF.Num := counter),DISTRIBUTED);\\n\\nSEQUENTIAL(\\n STD.File.StartSuperFileTransaction(),\\n STD.File.ClearSuperFile('SF1'),\\n OUTPUT(ds1,,'SUBF1',OVERWRITE), \\n STD.File.AddSuperFile('SF1','SUBF1'),\\n STD.File.FinishSuperFileTransaction()\\n);\\n
\\n\\nThis goes into a deadlock. I guess the file is not removed from the super file till we commit the transaction and the output is trying to overwrite on a file that is in limbo. Anyways, this doesn't work!\\n\\n4. I think the only way to do it is to have each of them as a separate transaction. \\n\\n// Run 4\\nIMPORT STD;\\nds1 := DATASET(100020,TRANSFORM({INTEGER num},SELF.Num := counter),DISTRIBUTED);\\n\\nSEQUENTIAL(\\n STD.File.StartSuperFileTransaction(),\\n STD.File.ClearSuperFile('SF1'),\\n STD.File.FinishSuperFileTransaction(),\\n OUTPUT(ds1,,'SUBF1',OVERWRITE), \\n STD.File.StartSuperFileTransaction(),\\n STD.File.AddSuperFile('SF1','SUBF1'),\\n STD.File.FinishSuperFileTransaction()\\n);\\n
\\nVoila!! It works fine.\\n\\n5. I feel it's equivalent to this:\\n\\n// Run 5\\nIMPORT STD;\\nds1 := DATASET(100020,TRANSFORM({INTEGER num},SELF.Num := counter),DISTRIBUTED);\\n\\nSEQUENTIAL(\\n STD.File.ClearSuperFile('SF1'),\\n OUTPUT(ds1,,'SUBF1',OVERWRITE), \\n STD.File.AddSuperFile('SF1','SUBF1') \\n);\\n
\\nAnd it works exactly the same.\\n\\nSo, now I have two questions:\\n1. Is there any need for Run 4 or can I go for Run 5\\n2. Is there a better way to do what I want to do within the transaction semantics?\\n\\nCheers\\nSrini\", \"post_time\": \"2013-10-22 14:44:40\" },\n\t{ \"post_id\": 4819, \"topic_id\": 1086, \"forum_id\": 10, \"post_subject\": \"Re: Unknown exception in Thread ThorLookaheadCache\", \"username\": \"bforeman\", \"post_text\": \"Awesome, great job Srini!\", \"post_time\": \"2013-10-23 14:46:53\" },\n\t{ \"post_id\": 4818, \"topic_id\": 1086, \"forum_id\": 10, \"post_subject\": \"Re: Unknown exception in Thread ThorLookaheadCache\", \"username\": \"omnibuzz\", \"post_text\": \"I found that my code had a bug (thanks to you I revisited the code ). The R function returns a "set of integer" but I specified an "integer" return value. I was going back and forth with the code and I forgot to change. And the error message threw me off track. \\n\\nBut, it works fine now. And, I am super excited!\\n \\nThanks \\nSrini\", \"post_time\": \"2013-10-23 14:44:29\" },\n\t{ \"post_id\": 4811, \"topic_id\": 1086, \"forum_id\": 10, \"post_subject\": \"Re: Unknown exception in Thread ThorLookaheadCache\", \"username\": \"bforeman\", \"post_text\": \"Hi Srini,\\n\\nTry running in hTHOR, and certainly you should log this in the Community Issue Tracker.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-10-23 13:03:20\" },\n\t{ \"post_id\": 4807, \"topic_id\": 1086, \"forum_id\": 10, \"post_subject\": \"Unknown exception in Thread ThorLookaheadCache\", \"username\": \"omnibuzz\", \"post_text\": \"This is a self contained code that fails with the following exception\\n\\nError: System error: 0: SLAVE 192.168.56.101:20100: Unknown exception in Thread ThorLookaheadCache\\n\\n
\\nIMPORT STD;\\nIMPORT R;\\n \\nds1 := DATASET(1000000,TRANSFORM({INTEGER num},SELF.Num := counter),DISTRIBUTED);\\n \\nfile := '~thor::number.csv';\\noutFile := OUTPUT(ds1,,file, CSV(HEADING('',''), SEPARATOR(','), TERMINATOR('\\\\n')),THOR,OVERWRITE);\\n\\ndir := STD.File.GetLogicalFileAttribute(file,'directory');\\nparts := (integer)STD.File.GetLogicalFileAttribute(file,'numparts');\\nSetWords := STD.str.splitwords(file,'::');\\nnam := SetWords[COUNT(SetWords)];\\n\\nds2 := NOTHOR(DATASET(parts,\\n TRANSFORM({INTEGER Node,STRING fil},\\n SELF.node := COUNTER;\\n SELF.fil := dir + '/' + nam + '._' + COUNTER +'_of_' + parts))): INDEPENDENT;\\n\\ninteger read(String lfilename) := EMBED(R)\\n x <- read.csv(lfilename)\\n x[[1]]^2\\nENDEMBED;\\n\\nds3 := DATASET(STD.System.Thorlib.Nodes(),TRANSFORM({INTEGER itemCount, INTEGER ItemSquaresSum},\\n outval := read(ds2[counter].Fil); \\n SELF.itemCount := COUNT(outVal); \\n SELF.ItemSquaresSum := SUM(OutVal)),DISTRIBUTED);\\n\\n\\nSEQUENTIAL(OutFile, OUTPUT(ds2),OUTPUT(ds3));\\n
\\n\\nCheers\\nSrini\", \"post_time\": \"2013-10-22 21:53:36\" },\n\t{ \"post_id\": 4822, \"topic_id\": 1087, \"forum_id\": 10, \"post_subject\": \"Re: 'Workunit compiled for a different version..' issue\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"Thanks Bob! \\n\\nThe issue logged is with respect to scheduling but I can add in my case as well as the error message is the same.\", \"post_time\": \"2013-10-24 11:58:32\" },\n\t{ \"post_id\": 4810, \"topic_id\": 1087, \"forum_id\": 10, \"post_subject\": \"Re: 'Workunit compiled for a different version..' issue\", \"username\": \"bforeman\", \"post_text\": \"What exactly is the reason for this error?\\n
\\n\\nI am not exactly sure what could be causing this. It is possible that the ECL IDE is picking up the wrong compiler configuration. It might be a good idea to look and see if this has been reported in the Community issue tracker, and if not, submit a report with as much detail as you can.\\n\\nIt might be related to the following report:\\n\\nhttps://track.hpccsystems.com/browse/HPCC-8206\\n\\nYou are welcome to add a comment to this report if you like.\\n\\nIs an ecl file compatible across IDE/VM versions?
\\n\\nYes the language should be, but new language statements in 4.0 (like DICTIONARY) would not be backward compatible to the 3.x compiler versions.\\n\\nIf not, shouldn't a file recompile in the new version be sufficient?
\\n\\nYes, absolutely!\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-10-23 13:01:34\" },\n\t{ \"post_id\": 4809, \"topic_id\": 1087, \"forum_id\": 10, \"post_subject\": \"'Workunit compiled for a different version..' issue\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"I've been getting this error on and off in my IDE while compiling. Sometimes, it goes away with a VM restart, sometimes with the ECL IDE restart. And, when it happens, it is just for 1 arbitrary (??) file while the rest compile fine.\\n\\nThis time though, it just won't go, no matter what I do. Finally, I created a new ecl file, copied over the contents and lo, the new one executes without any error. \\n\\nWhat exactly is the reason for this error? \\nIs an ecl file compatible across IDE/VM versions? \\nIf not, shouldn't a file recompile in the new version be sufficient?\\n\\nThanks,\\nGayathri\", \"post_time\": \"2013-10-23 06:18:29\" },\n\t{ \"post_id\": 4856, \"topic_id\": 1097, \"forum_id\": 10, \"post_subject\": \"Re: Fuzzy matching in HPCC?\", \"username\": \"bforeman\", \"post_text\": \"Hi Gayathri,\\n\\nYes, fuzzy matching is a powerful feature in HPCC, and much of the ECL code is optimized internally to support it. For example, look at the "ALL" JOIN flag, which allows the compiler to ignore the lack of any "equality" portion to the condition (in other words, fuzzy matching on the join condition).\\n\\nThere is support for regular expressions in REGEXFIND and REGEXREPLACE, and finally our developers have written many custom functions to help facilitate the exact same thing with addresses that you mention.\\n\\nHope this helps!\\n\\nBob\", \"post_time\": \"2013-10-30 13:38:27\" },\n\t{ \"post_id\": 4855, \"topic_id\": 1097, \"forum_id\": 10, \"post_subject\": \"Re: Fuzzy matching in HPCC?\", \"username\": \"rtaylor\", \"post_text\": \"Gayathri,\\n\\nThe short answer is: Yes! You can certainly build your own fuzzy matching code in ECL and implement it any way you need to. And, this kind of fuzzy matching is also part of the code generated by our SALT tool.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-10-30 13:33:18\" },\n\t{ \"post_id\": 4851, \"topic_id\": 1097, \"forum_id\": 10, \"post_subject\": \"Fuzzy matching in HPCC?\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"Consider a scenario where we want to update entity address, if it has changed. We will use a fuzzy matching algorithm to accommodate for alternate representations of the same address (misspelt, shortened, incomplete etc.) in the incoming data.\\n\\nCan fuzzy matching be done in HPCC to achieve something like this? \\n\\nRegards,\\nGayathri\", \"post_time\": \"2013-10-30 10:32:56\" },\n\t{ \"post_id\": 4873, \"topic_id\": 1098, \"forum_id\": 10, \"post_subject\": \"Re: File monitoring in HPCC\", \"username\": \"bforeman\", \"post_text\": \"Yes, use something like this:\\n\\neveryminute := CRON('0-59/1 * * * *');\\n\\nContentCheck : WHEN(everyminute);
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-10-31 11:17:53\" },\n\t{ \"post_id\": 4872, \"topic_id\": 1098, \"forum_id\": 10, \"post_subject\": \"Re: File monitoring in HPCC\", \"username\": \"chhaya\", \"post_text\": \"hi,\\n\\nOk thanks.\\nIs it possible to run a CRON job every minute??\\n\\nThanks and Regards\\nChhaya Vishwakarma\", \"post_time\": \"2013-10-31 11:06:43\" },\n\t{ \"post_id\": 4871, \"topic_id\": 1098, \"forum_id\": 10, \"post_subject\": \"Re: File monitoring in HPCC\", \"username\": \"bforeman\", \"post_text\": \"Hi Chhaya,\\n\\nMonitoring file content is something that we also do regularly, so much in fact that it is incorporated in our add-on SALT product and a part of the Delta File Comparison process. SALT is described here: http://hpccsystems.com/products-and-services/products/modules/SALT\\n\\nThat said, you can still write your own content comparison process by using a JOIN. The LEFT recordset could be your main file and the RIGHT recordset is the file you are comparing. Based on the fields in your TRANSFORM you can mark the output record as new, changed, or deleted (not found in the new file). Of course I am simplifying the process here but I think you get the gist of it.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-10-31 10:52:45\" },\n\t{ \"post_id\": 4870, \"topic_id\": 1098, \"forum_id\": 10, \"post_subject\": \"Re: File monitoring in HPCC\", \"username\": \"chhaya\", \"post_text\": \"hi,\\nThanks\\n\\nI want only updates as a sub file.As soon as the file is getting updated, updates should be sprayed on HPCC cluster.\\n\\nCan i put a monitoring on file content change ? I read about file monitorinf but no where its mentioned it can do monitoring on file content(only file name)\\n\\nCould you please share any example of file monitoring.\\n\\nThanks and Regards \\nChhaya Vishwakarma\", \"post_time\": \"2013-10-31 09:41:51\" },\n\t{ \"post_id\": 4857, \"topic_id\": 1098, \"forum_id\": 10, \"post_subject\": \"Re: File monitoring in HPCC\", \"username\": \"bforeman\", \"post_text\": \"Hi Chhaya,\\n\\nIn the business of big data we are always updating files periodically as quickly as minutes and spanning Daily, weekly, and monthly updates as well. There are a few areas of documentation that you need to research.\\n\\nFirst, in the Language Reference and Standard Function Library PDFs, check out the topics and library support for superfiles and superkeys. This file structure makes it possible to update files with the same structure as needed. \\n\\nAlso, check out the ECL Scheduler PDF, which documents setting up ECL processes at a specific time.\\n\\nAnd finally, go back to the LRM and look at the docs on CRON.\\n\\nThis should be enough to get you started.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-10-30 13:46:58\" },\n\t{ \"post_id\": 4852, \"topic_id\": 1098, \"forum_id\": 10, \"post_subject\": \"File monitoring in HPCC\", \"username\": \"chhaya\", \"post_text\": \"hi,\\n I have a file which gets updated periodically. I want to spray the content of the file as and when file is getting updated.\\n\\nIs it possible to do in HPCC?\\nis there concept of real time data transfer? \\n\\nThanks and regards\\nChhaya\", \"post_time\": \"2013-10-30 11:42:58\" },\n\t{ \"post_id\": 5006, \"topic_id\": 1101, \"forum_id\": 10, \"post_subject\": \"Re: Loading XML Data with namespace in element name\", \"username\": \"rtaylor\", \"post_text\": \"shank,\\n\\nOK, I just added one field to the UCC_Rec and made this code do both at the same time:TransactionRec := RECORD\\n STRING ChildName;\\nEND;\\nUCC_Rec := RECORD\\n\\tSTRING filename;\\n\\tSTRING namespace;\\n\\tSTRING id;\\n\\tSTRING childId;\\n\\tSTRING childaddress;\\n\\tSTRING childnames;\\t//added this field\\n\\tDATASET(TransactionRec) Name;\\nEND;\\n\\nP1_Rec := RECORD\\n UCC_Rec;\\n\\tSTRING childtxt;\\nEND;\\n \\nP1_Rec XF1(RecSet L) := TRANSFORM\\n SELF.Filename := L.NameOfTheFile;\\n\\tSELF.namespace := XMLTEXT('@xmlns:ns');\\n\\tSELF.id := XMLTEXT('ns:id');\\n\\tSELF.childid := XMLTEXT('ns:child-element/ns:child-id');\\n\\tSELF.childaddress := XMLTEXT('ns:child-element/ns:child-address');\\n\\tSELF.childtxt := XMLTEXT('ns:child-element/<>');\\n\\t// SELF.name := [];\\n\\tSELF := [];\\nEND;\\t\\n\\n\\t\\nP1 := PARSE(RecSet,XMLtxt,XF1(LEFT),XML('doc/ns:parent-element'));\\n// P1;\\n\\nP2_Rec := RECORD\\n\\tSTRING id;\\n\\tSTRING childId;\\n STRING ChildName;\\nEND;\\n\\nP2_Rec XF2(P1_Rec L) := TRANSFORM\\n\\tSELF.id := L.id;\\n\\tSELF.childid := L.childid;\\n\\tSELF.childName := XMLTEXT('');\\nEND;\\t\\n\\nP2 := PARSE(P1,childtxt,XF2(LEFT),\\n XML('ns:child-element/ns:name/ns:child-name'));\\n// P2;\\nUCC := DENORMALIZE(P1,P2,\\n LEFT.id=RIGHT.id AND LEFT.ChildID=RIGHT.ChildID,\\n TRANSFORM(P1_Rec,\\n SELF.childnames := LEFT.childnames + \\n IF(COUNTER>1,',','') + \\n RIGHT.ChildName,\\n SELF.name := LEFT.Name + ROW(RIGHT,TransactionRec),\\n SELF := LEFT));\\n\\nPROJECT(UCC,UCC_Rec);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-12-02 19:01:15\" },\n\t{ \"post_id\": 4974, \"topic_id\": 1101, \"forum_id\": 10, \"post_subject\": \"Re: Loading XML Data with namespace in element name\", \"username\": \"shank\", \"post_text\": \"Hi Richard,\\nThe ECL code just did what I wanted. I was able to work with ease after i got the the final DS. I stumbled at a place where i wanted to combine all the child-names and place it under a single field.Say that if i add a last name to all of child namesas part of some processing \\n<ns:child-name>Shank6 Smith</ns:child-name>\\n <ns:child-name>Hulk6 Smith</ns:child-name>\\n <ns:child-name>Dr.Jane6 Smith</ns:child-name>\\n <ns:child-name>Bill6 Smith</ns:child-name>
\\nand after that i need to concatenate all the childnames say with a ',' separator .\\n<ns:child-name>Shank6 Smith, Hulk6 Smith, Dr.Jane6 Smith, Bill6 Smith</ns:child-name>
\\n\\nI was able to achieve this using ITERATE but i couldnt process it in the same Dataset. I had to do the ITERATE function and place it in an attribute . \\nI would like you to help me out , I want the concatenated data in the same dataset\\nUCC .\", \"post_time\": \"2013-11-27 11:09:54\" },\n\t{ \"post_id\": 4891, \"topic_id\": 1101, \"forum_id\": 10, \"post_subject\": \"Re: Loading XML Data with namespace in element name\", \"username\": \"rtaylor\", \"post_text\": \"Shank,\\n\\nOK, first I generated a file to emulate your problem. Here's that code:RecStruct := RECORD \\n STRING NameOfTheFile;\\n STRING XMLtxt; \\nEND;\\n\\nds := DATASET([\\n{'file1','<doc><ns:parent-element xmlns:ns="namespace1"><ns:id>11111111</ns:id><ns:child-element><ns:child-id>99999999</ns:child-id><ns:name><ns:child-name>John1</ns:child-name><ns:child-name>Smith1</ns:child-name><ns:child-name>will1</ns:child-name><ns:child-name>shane1</ns:child-name></ns:name><ns:child-address>1,ABC Towers</ns:child-address></ns:child-element></ns:parent-element><ns:parent-element xmlns:ns="namespace2"><ns:id>22222222</ns:id><ns:child-element><ns:child-id>66666666</ns:child-id><ns:name><ns:child-name>Shank2</ns:child-name><ns:child-name>Hulk2</ns:child-name><ns:child-name>Dr.Jane2</ns:child-name><ns:child-name>Bill2</ns:child-name></ns:name><ns:child-address>2,ABC Towers</ns:child-address></ns:child-element></ns:parent-element></doc>'},\\n{'file2','<doc><ns:parent-element xmlns:ns="namespace3"><ns:id>33333333</ns:id><ns:child-element><ns:child-id>88888888</ns:child-id><ns:name><ns:child-name>John3</ns:child-name><ns:child-name>Smith3</ns:child-name><ns:child-name>will3</ns:child-name><ns:child-name>shane3</ns:child-name></ns:name><ns:child-address>3,ABC Towers</ns:child-address></ns:child-element></ns:parent-element><ns:parent-element xmlns:ns="namespace4"><ns:id>44444444</ns:id><ns:child-element><ns:child-id>55555555</ns:child-id><ns:name><ns:child-name>Shank4</ns:child-name><ns:child-name>Hulk4</ns:child-name><ns:child-name>Dr.Jane4</ns:child-name><ns:child-name>Bill4</ns:child-name></ns:name><ns:child-address>4,ABC Towers</ns:child-address></ns:child-element></ns:parent-element></doc>'},\\n{'file3','<doc><ns:parent-element xmlns:ns="namespace5"><ns:id>55555555</ns:id><ns:child-element><ns:child-id>77777777</ns:child-id><ns:name><ns:child-name>John5</ns:child-name><ns:child-name>Smith5</ns:child-name><ns:child-name>will5</ns:child-name><ns:child-name>shane5</ns:child-name></ns:name><ns:child-address>5,ABC Towers</ns:child-address></ns:child-element></ns:parent-element><ns:parent-element xmlns:ns="namespace6"><ns:id>66666666</ns:id><ns:child-element><ns:child-id>44444444</ns:child-id><ns:name><ns:child-name>Shank6</ns:child-name><ns:child-name>Hulk6</ns:child-name><ns:child-name>Dr.Jane6</ns:child-name><ns:child-name>Bill6</ns:child-name></ns:name><ns:child-address>6,ABC Towers</ns:child-address></ns:child-element></ns:parent-element></doc>'}],\\nRecStruct);\\nOUTPUT(DISTRIBUTE(ds),,'~RTTEST::XML::ParseTest',overwrite);
\\nNext I declared the file to start solving the problem:RecSet := DATASET('~RTTEST::XML::ParseTest',RecStruct,FLAT);\\n// RecSet;
\\nThen I defined the RECORD structures for the final result:TransactionRec := RECORD\\n STRING ChildName;\\nEND;\\nUCC_Rec := RECORD\\n STRING filename;\\n STRING namespace;\\n STRING id;\\n STRING childId;\\n STRING childaddress;\\n DATASET(TransactionRec) Name;\\nEND;
\\nPlease note that I constructed these to contain every bit of extractable information from the dataset.\\n\\nBecause the XML is just contained in a STRING field of a flat file and is not a well-formed XML file by itself, the only approach to use is the PARSE function. And since each record in the dataset contains XML text with multiple "parents" and each "parent" has multiple "children" we're actually needing to work on three levels to extract everything. Therefore, I decided to take a two-pass approach. \\n\\nThe first pass, I extracted all the "parent" data, and and simply put all the "children" XML text into a STRING for the second pass to parse. Here's the first pass code:P1_Rec := RECORD\\n UCC_Rec;\\n STRING childtxt;\\nEND;\\nP1_Rec XF1(RecSet L) := TRANSFORM\\n SELF.Filename := L.NameOfTheFile;\\n SELF.namespace := XMLTEXT('@xmlns:ns');\\n SELF.id := XMLTEXT('ns:id');\\n SELF.childid := XMLTEXT('ns:child-element/ns:child-id');\\n SELF.childaddress := XMLTEXT('ns:child-element/ns:child-address');\\n SELF.childtxt := XMLTEXT('ns:child-element/<>');\\n SELF.name := [];\\nEND;\\t\\nP1 := PARSE(RecSet,XMLtxt,XF1(LEFT),XML('doc/ns:parent-element'));\\n// P1;
\\nThis starts with a RECORD structure containing all the final result fields, plus the STRING field to hold the "children" XML text.\\n\\nThe key to using PARSE with XML text is its XML option, which specifies the "xpath to the row tag" (in this case, the ns:parent-element is the row tag). PARSE will produce one result record for each instance of the specified row tag.\\n\\nI am using a TRANSFORM function for this PARSE because I need to extract data from other fields than just the XML text field I'm parsing. And, I need to initialize the nested child dataset to an empty set, which I can only do in a TRANSFORM.\\n\\nNote the use of "<>" at the end of the xpath for my childtxt field. This indicates the field will contain all the text that appears between the open and close ns:child-element tags -- the raw XML. This is the field that the second pass will PARSE, like this:P2_Rec := RECORD\\n STRING id;\\n STRING childId;\\n STRING ChildName;\\nEND;\\nP2_Rec XF2(P1_Rec L) := TRANSFORM\\n SELF.id := L.id;\\n SELF.childid := L.childid;\\n SELF.childName := XMLTEXT('');\\nEND;\\t\\nP2 := PARSE(P1,childtxt,XF2(LEFT),\\n XML('ns:child-element/ns:name/ns:child-name'));\\n// P2;
\\nThe RECORD structure to produce from this pass only needs to contain the linking fields and the child data itself. \\n\\nThis second PARSE is working with the result of the first pass, and parsing only the childtxt field, but since I need the two id fields, I once again need to use a TRANSFORM. Note the XML option on the PARSE (the "xpath to the row tag") starts with ns:child-element and takes us all the way down to the names we want to extract on this pass. That makes the xpath parameter to the XMLTEXT function in the TRANSFORM simply an empty string.\\n\\nNow we just need to join the two results together, and the DENORMALIZE function was built just for that purpose, because we want to put all the related child names into the nested child dataset for the final result, like this:UCC := DENORMALIZE(P1,P2,\\n LEFT.id=RIGHT.id AND LEFT.ChildID=RIGHT.ChildID,\\n TRANSFORM(P1_Rec,\\n SELF.name := LEFT.Name + ROW(RIGHT,TransactionRec),\\n SELF := LEFT));
\\nWe're joining the two PARSE results where the two id fields match, and using the ROW function to construct a record to append to the nested child dataset.\\n\\nNow the only thing left to do is produce the result, like this:PROJECT(UCC,UCC_Rec);
\\nI'm using PROJECT here because the result of the DENORMALIZE still contains the childtxt field from the first pass, and that's not needed for the final result.\\n\\nRun this code yourself and you'll see how it all works. Of course, you'll need to view the result in ECL Watch.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-11-06 21:14:02\" },\n\t{ \"post_id\": 4889, \"topic_id\": 1101, \"forum_id\": 10, \"post_subject\": \"Re: Loading XML Data with namespace in element name\", \"username\": \"shank\", \"post_text\": \"Sorry again I missed the ns:name in that code uploaded before.\\n\\nRegards,\\nShank\", \"post_time\": \"2013-11-06 10:04:38\" },\n\t{ \"post_id\": 4888, \"topic_id\": 1101, \"forum_id\": 10, \"post_subject\": \"Re: Loading XML Data with namespace in element name\", \"username\": \"shank\", \"post_text\": \"Ok i get it , But even if i dont use a TABLE and I directly load it as an XML and process it i dont get the output as desired.I tried the below piece of code too and it doesnt work. \\n\\nTransactionRec := RECORD\\nstring ChildName {XPATH('ns:child-name')};\\nEND;\\n\\nUCC_Rec := RECORD\\nSTRING id {XPATH('ns:parent-element/ns:id')};\\nSTRING childId {XPATH('ns:parent-element/ns:child-element/ns:child-id')};\\nDATASET(TransactionRec) Name {XPATH('ns:parent-element/ns:child-element')};\\nEND;\\n\\nUCC := DATASET('~tutorial::sn::abc',UCC_Rec,XML('doc'));\\nUCC;\\n\\n
\\n\\nI tried the below code and it works. its just when i have it nested it doesnt work.\\n\\nUCC_Rec := RECORD\\nSTRING id {XPATH('ns:parent-element/ns:id')};\\nSTRING childId {XPATH('ns:parent-element/ns:child-element/ns:child-id')};\\nEND;\\n\\nUCC := DATASET('~tutorial::sn::abc',UCC_Rec,XML('doc'));\\nUCC;\\n
\\n\\nPlease provide a solution to my issue . Please neglect the code written by me if it confuses.\\n\\nRegards,\\nShank\", \"post_time\": \"2013-11-06 09:37:10\" },\n\t{ \"post_id\": 4887, \"topic_id\": 1101, \"forum_id\": 10, \"post_subject\": \"Re: Loading XML Data with namespace in element name\", \"username\": \"rtaylor\", \"post_text\": \"Shank,\\n\\nThis comment of yours:RecStruct := RECORD \\n STRING NameOfTheFile;\\n STRING XMLtxt; \\nEND;\\n\\n//dataset - an XML file in each record\\nRecSet := DATASET('~tutorial::sn::abc',RecStruct,FLAT);
tells me that you're approaching this problem incorrectly. \\n\\nYou are trying to work with the data as if you were working with an XML file, and your dataset is not an XML file. Your file is a FLAT text file with two STRING fields, one of which happens to contain "XML" text. You're trying to get around that by adding an XML option to TABLE (which does not syntax check, because TABLE does not have an XML option).\\n\\nWe deal with this kind of file in the Advanced Thor class (exercise 3b), and the solution in ECL is to use PARSE with the XML option to extract the data from the STRING field.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-11-05 14:52:12\" },\n\t{ \"post_id\": 4886, \"topic_id\": 1101, \"forum_id\": 10, \"post_subject\": \"Re: Loading XML Data with namespace in element name\", \"username\": \"shank\", \"post_text\": \"Hi Bob,\\nI am sorry, That was a typo from my side , i dunno how that got missed.\\nThe ns:name was given but i am not able to get the result. \\n\\nI also want to know the does namespace("ns:") play a role in loading the data. Because the code doesn't return any output.\\n\\n\\nThanks ,\\nShank\", \"post_time\": \"2013-11-05 13:48:14\" },\n\t{ \"post_id\": 4885, \"topic_id\": 1101, \"forum_id\": 10, \"post_subject\": \"Re: Loading XML Data with namespace in element name\", \"username\": \"bforeman\", \"post_text\": \"Hi Shank,\\n\\nBased on your XML example, I think you are missing a tag.\\n\\nChange:\\n\\nDATASET(TransactionRec) Name {XPATH('ns:parent-element/ns:child-element')};\\n
\\n\\nto\\n\\nDATASET(TransactionRec) Name {XPATH('ns:parent-element/ns:child-element/ns:name')};
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-11-05 13:43:06\" },\n\t{ \"post_id\": 4883, \"topic_id\": 1101, \"forum_id\": 10, \"post_subject\": \"Loading XML Data with namespace in element name\", \"username\": \"shank\", \"post_text\": \"I am working on a similar kind of xml as below. I am not able to get the desired output as given below( after the code). Also I want to know if namespace plays any part in getting the data loaded in the dataset.Because if it is just the id , I am able to get the data but when I want to get the 4 child names for the same id, I dont get any output.\\n<doc>\\n <ns:parent-element xmlns:ns="the namespace of tag">\\n <ns:id>11111111</ns:id>\\n <ns:child-element>\\n <ns:child-id>99999999</ns:child-id>\\n <ns:name>\\n <ns:child-name>John</ns:child-name>\\n <ns:child-name>Smith</ns:child-name>\\n <ns:child-name>will</ns:child-name>\\n <ns:child-name>shane</ns:child-name>\\n </ns:name>\\n <ns:child-address>14,ABC Towers</ns:child-address>\\n </ns:child-element>\\n </ns:parent-element>\\n <ns:parent-element xmlns:ns="">\\n <ns:id>22222222</ns:id>\\n <ns:child-element>\\n <ns:child-id>88888888</ns:child-id>\\n <ns:name>\\n <ns:child-name>Shank</ns:child-name>\\n <ns:child-name>Hulk</ns:child-name>\\n <ns:child-name>Dr.Jane</ns:child-name>\\n <ns:child-name>Bill</ns:child-name>\\n </ns:name>\\n <ns:child-address>15,ABC Towers</ns:child-address>\\n </ns:child-element>\\n </ns:parent-element> \\n</doc>
\\n\\n\\nI want the output to look like the below:\\n[attachment=0:3ufcn54i]result.png\\n\\n\\nRecStruct := RECORD \\t\\n\\tSTRING NameOfTheFile;\\n\\tSTRING XMLtxt;\\t\\nEND;\\n\\n//dataset - an XML file in each record\\nRecSet := DATASET('~tutorial::sn::abc',RecStruct,FLAT);\\n\\n\\nTransactionRec := RECORD\\nstring ChildName {XPATH('ns:child-name')};\\nEND;\\n\\nUCC_Rec := RECORD\\nSTRING id {XPATH('ns:parent-element/ns:id')};\\nSTRING childId {XPATH('ns:parent-element/ns:child-element/ns:child-id')};\\nDATASET(TransactionRec) Name {XPATH('ns:parent-element/ns:child-element')};\\nEND;\\n\\nUCC := TABLE(RecSet,UCC_Rec,XML('doc'));\\nUCC;\\n
\\n\\nthanks,\\nShank\", \"post_time\": \"2013-11-05 07:30:45\" },\n\t{ \"post_id\": 4907, \"topic_id\": 1103, \"forum_id\": 10, \"post_subject\": \"Re: Best practices for controlling exclusive access to files\", \"username\": \"rtaylor\", \"post_text\": \"Oleg,\\n\\nOK, so you have a set of base data to which you want to continually add additional data in an asynchronous manner, and also ensure that a given piece of data is added only once and is "instantly" available to any/all concurrent processes.\\n\\nI would look at implementing a "deltabase" for this. A deltabase is an external SQL-based database (MySQL, or Oracle, or ...) to which all new data is added. \\n\\nTherefore, if a given piece of data is not in your superfile and a SOAPCALL to your deltabase doesn't show it already there, then it gets added to the deltabase (the same stored procedure that checks for previous existence can also do the add).\\n\\nThe key to this mechanism is keeping the deltabase small so the queries to it don't bog down the process. That means you will need to periodically flush all the data and add it to your superfile. The period to use would be determined by the volume of data being added to it -- the higher the volume the shorter the period.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-11-08 19:33:39\" },\n\t{ \"post_id\": 4906, \"topic_id\": 1103, \"forum_id\": 10, \"post_subject\": \"Re: Best practices for controlling exclusive access to files\", \"username\": \"oleg\", \"post_text\": \"Richard,\\nWe were mostly interesting if the generic solution exists for the problems like that (i.e. guarantee the sequential access t the file).\\n\\nI understand that we can create a single separate process which will collect all the information and add it once to the superfile, in our case it will be just less efficient - but maybe still worth it of we will ensure deadlock-free code.\", \"post_time\": \"2013-11-08 18:49:13\" },\n\t{ \"post_id\": 4905, \"topic_id\": 1103, \"forum_id\": 10, \"post_subject\": \"Re: Best practices for controlling exclusive access to files\", \"username\": \"rtaylor\", \"post_text\": \"Oleg,\\n\\nOK, you have a superfile whose sub-file list may get updated by multiple, possibly concurrent workunits? \\n\\nMy only question is, why? What problem are you trying to solve with this design? \\n\\nAnd, please send me a private email if there are sensitivity considerations around your answer to that question appearing in a public forum.\\n\\nRichard\", \"post_time\": \"2013-11-08 18:01:45\" },\n\t{ \"post_id\": 4903, \"topic_id\": 1103, \"forum_id\": 10, \"post_subject\": \"Re: Best practices for controlling exclusive access to files\", \"username\": \"oleg\", \"post_text\": \"Richard,\\n\\nThank you for the reply,\\nThe problem here is that the each WU may add more data into the file, so if both of them will read the slice before new information will be added, whoever come second will add this data not to the file it thinks it is .\\n\\nI.e. we want to be sure that the superfile will not be physically changed from the moment it has been read to the moment we will add the new piece to it.\", \"post_time\": \"2013-11-08 16:20:37\" },\n\t{ \"post_id\": 4902, \"topic_id\": 1103, \"forum_id\": 10, \"post_subject\": \"Re: Best practices for controlling exclusive access to files\", \"username\": \"rtaylor\", \"post_text\": \"Allan and Oleg,\\n\\nOK, how about you start by creating a vertical-slice TABLE of the file you are working with (that would be in Graph 1) and then only work with that TABLE for the rest of the WU. That way, it doesn't matter if someone updates it out from under you -- you've already read the data you want into your job.\\n\\nIn fact, you could make it easy on yourself by putting that TABLE into the MODULE structure where you define the DATASET, something like this:
\\nEXPORT File_Mydata := MODULE\\n EXPORT Layout := RECORD\\n //fields here\\n END;\\n ds := DATASET('MySuperFile',Layout,FLAT);\\n EXPORT File := TABLE(ds,{ds});\\nEND;
\\nThen all of your code just refers to File_MyData.File just as if it were the DATASET.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-11-08 16:03:41\" },\n\t{ \"post_id\": 4898, \"topic_id\": 1103, \"forum_id\": 10, \"post_subject\": \"Re: Best practices for controlling exclusive access to files\", \"username\": \"oleg\", \"post_text\": \"Let me add some more details: \\n\\nThere is a superfile, which being updated after reading by every WU which reads it, i.e. each WU will (or, at least, may) add subfile(s) to it after processing. \\nSo, we need to make sure that after WU 'opens' it for reading no other one will be able to read it until the previous one finished.\\n\\nIdeally, we want to know from inside the WU before the reading the file if some other WU is reading it right now - and if so, wait until that one finished.\", \"post_time\": \"2013-11-08 09:00:14\" },\n\t{ \"post_id\": 4897, \"topic_id\": 1103, \"forum_id\": 10, \"post_subject\": \"Re: Best practices for controlling exclusive access to files\", \"username\": \"Allan\", \"post_text\": \"Thanks Both for this.\\n\\nI should of made clear, this question was about Thor only.\\n\\nYes - Thor is single threaded, but only at the level of a graph.\\n\\nConcurrently running WUs get their graphs run interleaved with each other.\\n\\nCan there be a scenario like:\\n\\nWu 1 - Graph 1 - Reads file A\\nWu 2 - Graph 1 - Amends file A\\nWu 1 - Graph 2 - Reads File A again (but its contents are now different from when its graph 1 ran)\\n\\nYours\", \"post_time\": \"2013-11-08 08:48:02\" },\n\t{ \"post_id\": 4894, \"topic_id\": 1103, \"forum_id\": 10, \"post_subject\": \"Re: Best practices for controlling exclusive access to files\", \"username\": \"richardkchapman\", \"post_text\": \"Files are locked automatically by the system, so you shouldn't need to worry about it.\", \"post_time\": \"2013-11-07 13:13:06\" },\n\t{ \"post_id\": 4893, \"topic_id\": 1103, \"forum_id\": 10, \"post_subject\": \"Re: Best practices for controlling exclusive access to files\", \"username\": \"bforeman\", \"post_text\": \"Hi Allan,\\n\\nI'm not sure that you really need it, remember that ROXIE is read-only and THOR is single threaded, so there is no danger of one job stepping on another. So the best practice is to never lock a file. I remember doing that with old RDBMS systems and it was always a headache.\\n\\nKind Regards,\\n\\nBob\", \"post_time\": \"2013-11-07 12:28:43\" },\n\t{ \"post_id\": 4890, \"topic_id\": 1103, \"forum_id\": 10, \"post_subject\": \"Best practices for controlling exclusive access to files.\", \"username\": \"Allan\", \"post_text\": \"What's the best way to lock access to a file such that disparate WUs can ensure they have exclusive access?\", \"post_time\": \"2013-11-06 16:16:39\" },\n\t{ \"post_id\": 4900, \"topic_id\": 1104, \"forum_id\": 10, \"post_subject\": \"Re: Deleting/Removing files from directory\", \"username\": \"bforeman\", \"post_text\": \"Try using the RemoteDirectory function. From the docs:\\nEXPORT FsFilenameRecord := RECORD \\n STRING name; //filename \\n UNSIGNED8 size; //filesize \\n STRING19 modified; //date-time stamp \\nEND;\\n
\\nThe RemoteDirectory function returns a list of files as a dataset in the format listed above from the specified machineIP and directory. If includesubdir is set to TRUE, then the name field contains the relative path to the file from the specified directory.\\n
\\nSo you could PROJECT the dataset and simply move or delete the files that you need in the TRANSFORM.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-11-08 14:37:36\" },\n\t{ \"post_id\": 4895, \"topic_id\": 1104, \"forum_id\": 10, \"post_subject\": \"Deleting/Removing files from directory\", \"username\": \"chhaya\", \"post_text\": \"hi,\\n\\nI want to delete/move files from a directory using code.\\nTried STD.File.MoveExternalFile
and STD.File.DeleteExternalFile
\\n\\nThese commands work only for single file. How can delete/move multiple files from directory?\\n\\nThanks and Regards \\nChhaya\", \"post_time\": \"2013-11-07 13:50:02\" },\n\t{ \"post_id\": 4933, \"topic_id\": 1110, \"forum_id\": 10, \"post_subject\": \"Re: Using STD.File.SprayFixed\", \"username\": \"bforeman\", \"post_text\": \"I left out espserveraddress. Still didn't work.
\\nWhat specific error did you receive? \\n\\nNo luck with DFU either - it errored out with 'Failed: DFUWU: cannot determine endpoint for part file' error
\\n\\nThat sounds like something completely different, like your command line parameters could be incorrect. Can you post an example of what you did?\\n\\nI was able to spray the same file through ECL Watch.
\\nThat's a start! Now, using the Spray Fixed option you should be able to do the exact same operation, you just need to match the parameters shown in the ECL Watch.\\n\\nSo using my training cluster, a spray of a "persons" file would look like this:\\n\\n
SrcIP := '10.173.248.1'; //address of training cluster \\nSrcPath := '//10.173.248.1/mnt/disk1/var/lib/HPCCSystems/dropzone/';\\nInitials := 'BF'; \\n\\n//************ Spray Intro ECL/THOR Class Files *******************************\\n\\nSTD.File.SprayFixed(SrcIP,SrcPath + 'persons',155, 'mythor', \\n '~CLASS::' + Initials + '::Intro::Persons',,,,true,true);
\\n\\nThe SrcPath is the same path shown on the Spray Fixed ECL Watch Page Network Path line.\\n\\nHope this helps!\\n\\nBob\", \"post_time\": \"2013-11-13 12:58:54\" },\n\t{ \"post_id\": 4930, \"topic_id\": 1110, \"forum_id\": 10, \"post_subject\": \"Re: Using STD.File.SprayFixed\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"I left out espserveraddress. Still didn't work. \\n\\nNo luck with DFU either - it errored out with 'Failed: DFUWU: cannot determine endpoint for part file' error\\n\\nI was able to spray the same file through ECL Watch.\\n\\nRegards,\\nGayathri\", \"post_time\": \"2013-11-13 06:04:20\" },\n\t{ \"post_id\": 4921, \"topic_id\": 1110, \"forum_id\": 10, \"post_subject\": \"Re: Using STD.File.SprayFixed\", \"username\": \"bforeman\", \"post_text\": \"Error: System error: 0: DFUServer Error Failed: Failed to connect to dafilesrv/daliservix on <myIPAddress>:7100 (0, 0), 0, \\n
\\n\\nTry omitting the last parameter (espserveraddress), it looks like your address might be incorrect.\\n\\nquestion: can you spray this file directly from the ECL Watch? Or by using DFUPLUS on the command line?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-11-12 12:52:49\" },\n\t{ \"post_id\": 4915, \"topic_id\": 1110, \"forum_id\": 10, \"post_subject\": \"Re: Using STD.File.SprayFixed\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"This is the sample I lifted from ECL help file - I tried the same with my values. Here, the 2nd parameter is a file system path.\\n\\n\\nSTD.File.SprayFixed('10.150.50.14','c:\\\\\\\\InputData\\\\\\\\MyFile.txt',\\n 255,'400way','IN::MyFile',-1,\\n 'http://10.150.50.12:8010/FileSpray');
\", \"post_time\": \"2013-11-12 05:38:15\" },\n\t{ \"post_id\": 4912, \"topic_id\": 1110, \"forum_id\": 10, \"post_subject\": \"Re: Using STD.File.SprayFixed\", \"username\": \"bforeman\", \"post_text\": \"Hi Gayathri,\\n\\nThe way that I understand it, the file location has to be visible to the cluster, in other words, the address of a valid landing zone. For example, on one of my AWS clusters, I used this configuration:\\n\\nSrcIP := '10.252.64.82';\\nSrcPath := '//10.252.64.82/var/lib/HPCCSystems/mydropzone/';\\nInitials := 'RT'; \\n\\n//************ Spray Intro ECL/THOR Class Files *******************************\\n\\n STD.File.SprayFixed(SrcIP,SrcPath + 'persons',155, 'mythor', \\n \\t\\t\\t\\t\\t\\t'~CLASS::' + Initials + '::Intro::Persons',,,,true,true);
\\n\\nSrcIP is the address of the target cluster, and Srcpath points to the landing zone.\\n\\nHope this helps!\\n\\nBob\", \"post_time\": \"2013-11-11 13:30:14\" },\n\t{ \"post_id\": 4911, \"topic_id\": 1110, \"forum_id\": 10, \"post_subject\": \"Using STD.File.SprayFixed\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"I am trying to programmatically spray a fixed file to the VM like this:\\n\\nSTD.File.SprayFixed(machineIPAddress,\\n \\t fileLocation,\\n \\t 82,'mythor',\\t \\n logicalFileName,\\n\\t -1,espServerAddress);
\\n\\nand I received this error:\\nError: System error: 0: DFUServer Error Failed: Failed to connect to dafilesrv/daliservix on <myIPAddress>:7100 (0, 0), 0, \\n\\nI doubt if these variables are passed correctly:\\nmachineIPAddress = my system's IP address \\nfilelocation = folder path in D drive - where my file exists (We can spray directly from file system, isn't it? The help file example sprays from C drive)\\nespServerAddress = ECLWatch IP address with /FileSpray appended\\n\\nAm I doing something wrong?\\n\\nRegards,\\nGayathri\", \"post_time\": \"2013-11-11 11:43:08\" },\n\t{ \"post_id\": 4931, \"topic_id\": 1112, \"forum_id\": 10, \"post_subject\": \"Re: Ingesting files using sheduled Workunits.\", \"username\": \"Allan\", \"post_text\": \"Hum - Richard\\n\\nAn intriguing idea.\\n\\nI will go away and cogitate.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2013-11-13 09:18:13\" },\n\t{ \"post_id\": 4926, \"topic_id\": 1112, \"forum_id\": 10, \"post_subject\": \"Re: Ingesting files using sheduled Workunits.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nYou could implement a semaphore system that will flag the file as in process.\\n\\nA WU that "grabs" a specific file can simply create a new file with the same name and a ".sem" at the end, something like this:OUTPUT(DATASET([{WORKUNIT}],{STRING30 wuid}),,'TheFileName.sem');
\\nThat way, any additional jobs that come along that want to "grab" that same file can detect the semaphore file and leave it alone. It also creates a record of which workunit "grabbed" each file.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-11-12 16:32:53\" },\n\t{ \"post_id\": 4924, \"topic_id\": 1112, \"forum_id\": 10, \"post_subject\": \"Re: Ingesting files using sheduled Workunits.\", \"username\": \"Allan\", \"post_text\": \"To add - there is an obvious solution:\\n\\nIf a sibling directory to the incoming directory was created (say incoming2) then the scheduler WU could move files from 'incoming' to 'incoming2' while it was scheduling a WU to process the file.\\nThe processing WU itself would look into 'incoming2' for its file to process.\\n\\nHowever I'm not at all sure if this is the 'standard' solution to this kind of problem.\\nI've not seen such a directory layout used in other projects.\", \"post_time\": \"2013-11-12 15:04:51\" },\n\t{ \"post_id\": 4922, \"topic_id\": 1112, \"forum_id\": 10, \"post_subject\": \"Ingesting files using sheduled Workunits.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nWe have to ingest files 24/7.\\nWe have a scheduled task (running every 10 minutes) that fires off WUs to process individual files if there are any to process.\\n\\nWe have to ensure that once a WU has been fired off to process a particular file no other WU will subsequently be fired off to process the same file.\\n\\nI've attempted to do this by putting the filename in the name of the WU then using function STD.File.Workunit.WorkunitLists to find a WU that is or has been run for this file.\\n\\nThis approach does not work at all.\\nIts due to the fact that a WU can still be in the compilation queue when the scheduler kicks in 10 minutes later. The WU won't have its Job-name setup when the scheduler is running.\\n\\nThere is a workaround in that we can extend the schedule to run once every hour say, but this is just a workaround as there is no guarantee that 1 hour is long enough and this extended time can effect SLAs.\\n\\nThere is an added complication in that we must allow re-runs of the same ingest, so we can't just look at some saved dataset of filenames to inhibit runs.\\nAll we must inhibit is the symultainious multiple processing of the ingest.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2013-11-12 14:44:14\" },\n\t{ \"post_id\": 4935, \"topic_id\": 1114, \"forum_id\": 10, \"post_subject\": \"Re: Getting an error while spraying an XML File\", \"username\": \"rtaylor\", \"post_text\": \"deben18,\\n\\nI don't know why "node" doesn't work (that could possibly be a bug), but with your file structure I would spray it with "graph" as the row tag. \\n\\nMy reason is simple -- you always want to use the outermost "reasonable" container tag as the row tag for spraying, and in this case, that would be "graph". Remember, spraying is only about getting the data on the system so you can work with it. You can parse the information out of the XML a couple of different ways once it's on your Thor.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-11-13 14:10:18\" },\n\t{ \"post_id\": 4928, \"topic_id\": 1114, \"forum_id\": 10, \"post_subject\": \"Getting an error while spraying an XML File\", \"username\": \"deben18\", \"post_text\": \"We are trying to spray an xml file. The xml file perfectly opens in a browser and doesn't have any issue. Here is the basic structure of the xml file.\\n<graphml>\\n<key attr="" />\\n<graph id="G">\\n<node>\\n<data></data>......\\n</node>\\n<node>\\n<data></data>......\\n</node>............\\n<edge>\\n<data></data>......\\n</edge>\\n<edge>\\n<data></data>......\\n</edge>............\\n</graph>\\n</graphml>\\n\\nHere is a sample data file:\\n\\n<?xml version="1.0"?>\\n-<graphml xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns http://graphml.graphdrawing.org/xmlns/1.1/graphml.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://graphml.graphdrawing.org/xmlns">\\n<key attr.type="string" attr.name="cultures" for="node" id="cultures"/>\\n<graph id="G" edgedefault="directed">\\n<node id="100">\\n<data key="datasource_name">http://www.ssa.gov/OACT/babynames/</data>\\n<data key="type">datasource</data>\\n</node>\\n<edge id="194t-fAI-2F0LaTPQBE" label="term_to_term" target="11776" source="59936">\\n<data key="relation_score">1.0</data>\\n<data key="relation_type">HYPOCORISM</data>\\n</edge>\\n</graph>\\n</graphml>\\n\\nNow, when we try to spray the xml file with rowtag="edge", it sprays the file without any error but with rowtag="node", it gives the following error:\\n\\nError: System error: 0: DFUServer Error Failed: Could not find the end of the first record (0, 0), 0, \\n\\nI don't understand why the spray works with rowtag="edge" and not with rowtag="node". Please reply me if anyone came across with this type of issue of spraying xml file.\", \"post_time\": \"2013-11-12 22:12:06\" },\n\t{ \"post_id\": 4949, \"topic_id\": 1116, \"forum_id\": 10, \"post_subject\": \"Re: recursive filename dependency\", \"username\": \"bforeman\", \"post_text\": \"Hi Srini,\\n\\nThis is the big disadvantage of any MACRO or FUNCTIONMACRO, if it works it's fantastic but if it doesn't it is almost impossible to debug.\\n\\nI would try to deconstruct the code by temporarily removing the FUNCTIONMACRO and replace it with a FUNCTION without parameters, then just paste in the record structures you were passing in. This may help to pinpoint what line number in your masterpiece is failing \\n\\nHope this helps!\\n\\nBob\", \"post_time\": \"2013-11-15 15:23:04\" },\n\t{ \"post_id\": 4944, \"topic_id\": 1116, \"forum_id\": 10, \"post_subject\": \"recursive filename dependency\", \"username\": \"omnibuzz\", \"post_text\": \"When I run this code (self-contained), I get a recursive filename dependency. I would like to understand why and how to overcome it.\\n
\\nIMPORT STD;\\n\\nMyWrapper(InputRecordStructure,OutputRecordStructure) := FUNCTIONMACRO\\n RETURN MODULE \\n EXPORT MyInterface := MODULE, VIRTUAL \\n // You will do all the distribution and sorting as required here\\n EXPORT DATASET(InputRecordStructure) Projector(DATASET(InputRecordStructure) Input) := FUNCTION\\n RETURN Input;\\n END;\\n \\n EXPORT SET OF STRING LocalFn(SET OF STRING input) := FUNCTION\\n RETURN input;\\n END;\\n \\n EXPORT SET OF STRING GlobalFn(SET OF STRING input) := FUNCTION\\n RETURN input;\\n END;\\n END;\\n \\n EXPORT RExecute(MyInterface CustomImplementation, STRING WorkingScope = '') := MODULE\\n EXPORT DATASET(OutputRecordStructure) Run(DATASET(InputRecordStructure) Input) := FUNCTION\\n FileInput := workingScope + 'In::' + WORKUNIT;\\n FileOutput := workingScope + 'Out::' + WORKUNIT;\\n ProjectedInput := CustomImplementation.Projector(Input);\\n InFile := OUTPUT(ProjectedInput,,FileInput, CSV(HEADING, SEPARATOR('|'), TERMINATOR('\\\\n')),THOR,OVERWRITE);\\n dir := NOTHOR(STD.File.GetLogicalFileAttribute(FileInput,'directory')) : INDEPENDENT;\\n parts := (integer)NOTHOR(STD.File.GetLogicalFileAttribute(FileInput,'numparts')) : INDEPENDENT;\\n SetWords := STD.str.splitwords(FileInput,'::');\\n nam := SetWords[COUNT(SetWords)];\\n \\n MyRec := RECORD\\n STRING Result;\\n END;\\n \\n NodeResultRec := RECORD\\n UNSIGNED2 Node;\\n SET OF STRING Result;\\n END;\\n \\n IntermediateDS := DATASET(STD.System.Thorlib.Nodes(),\\n TRANSFORM(NodeResultRec,\\n SELF.node := COUNTER;\\n fileName := dir + '/' + nam + '._' + COUNTER +'_of_' + parts;\\n RInputDS := DATASET(fileName,MyRec,CSV(HEADING, SEPARATOR('|'), TERMINATOR('\\\\n')));\\n SELF.Result:= CustomImplementation.LocalFn(SET(RInputDS,Result))),DISTRIBUTED) : INDEPENDENT;\\n \\n \\n \\n AggregateDS := NORMALIZE(IntermediateDS,Dataset(LEFT.Result,MyRec),TRANSFORM(MyRec, SELF := RIGHT;));\\n \\n FinalDS := DATASET(CustomImplementation.GlobalFn(SET(AggregateDS,Result)),MyRec);\\n \\n OutFile := OUTPUT(FinalDS,,FileOutput, CSV(HEADING, SEPARATOR('|'), TERMINATOR('\\\\n')),THOR,OVERWRITE);\\n \\n OutputDS := DATASET(FileOutput,OutputRecordStructure,CSV(HEADING, SEPARATOR('|'), TERMINATOR('\\\\n')));\\n\\n ActionSeq := SEQUENTIAL(InFile,OutFile);\\n RETURN WHEN(OutputDS, ActionSeq,BEFORE);\\n END;\\n END;\\n END;\\nENDMACRO;\\n\\n\\nInputRec := {INTEGER num1,integer num2,integer num3,integer num4};\\n\\nInputDS := DATASET(10000,TRANSFORM(InputRec,\\n SELF.num1 := counter;\\n SELF.num2 := counter*2;\\n SELF.num3 := counter*3;\\n SELF.num4 := counter*4;),DISTRIBUTED);\\n\\nMyModule := MyWrapper(InputRec,InputRec);\\nMyImplement := MODULE(MyModule.MyInterface)\\nEND;\\nMyModule.RExecute(MyImplement).Run(InputDS);\\n
\\nThanks\\nSrini\", \"post_time\": \"2013-11-14 16:01:23\" },\n\t{ \"post_id\": 4951, \"topic_id\": 1119, \"forum_id\": 10, \"post_subject\": \"Re: Range of RANDOM()\", \"username\": \"BrianB644\", \"post_text\": \"The range on my 64-bit platform is 32-bits ... unsigned4.\\n\\nI peeked at the source code and also ran a quick workunit that filled in 20 unsigned8 values using RANDOM() ... bit-wise ORed them together and examined the bits. All 32 low-order bits were assigned ... none of the higher order bits were assigned.\\n\\nIn your example, your number "2645216828" exceeds "integer 32", but not "unsigned integer 32".\\n\\nCheers,\\n\\nBrian B\", \"post_time\": \"2013-11-18 01:50:19\" },\n\t{ \"post_id\": 4950, \"topic_id\": 1119, \"forum_id\": 10, \"post_subject\": \"Range of RANDOM()\", \"username\": \"srbhkmr\", \"post_text\": \"May I know what's the maximum limit of the RANDOM() call ?\\nAll I could find in documentation is:\\n\\nThe RANDOM function returns a pseudo-random positive integer value.
\\n\\nOne test run gave me 2645216828, which exceed the limit of UNSIGNED4!\\n\\nThanks,\", \"post_time\": \"2013-11-17 14:16:58\" },\n\t{ \"post_id\": 4962, \"topic_id\": 1120, \"forum_id\": 10, \"post_subject\": \"Re: Context dependent error in function call\", \"username\": \"bforeman\", \"post_text\": \"The problem apparently points to this line in TryMultiple:\\n\\noutputfilename:='~hppc::loganalysis::input::superfile::subfile'+superfilename+REGEXREPLACE(' ',REGEXREPLACE('/',REGEXREPLACE(':',p[1].date,'_'),'_'),'_'):independent;\\n\\nW:=output(x,,outputfilename,thor,overwrite);
\\n\\nSo it looks like your expression for outputfilename may be incorrect. \\n\\nYou will probably need to write some test code to see what outputfilename value is getting generated.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-11-20 13:28:06\" },\n\t{ \"post_id\": 4961, \"topic_id\": 1120, \"forum_id\": 10, \"post_subject\": \"Re: Context dependent error in function call\", \"username\": \"chhaya\", \"post_text\": \"hi Bob,\\n\\nI did not get the result.\\n\\nthis is my tryMultiple code\\n\\nimport std, std.File, std.Str,HPCCLogAnalysis;\\n\\nexport trymultiple(string dir,string path) := module\\n\\nrec := record\\nstring line;\\nend;\\n \\noutrec := record\\nstring stringdata;\\nend;\\n\\noutrec2 := record\\ndataset(outrec) child;\\nend;\\n\\n\\nfilelist:=STD.File.RemoteDirectory('ip',\\ndir,,true);\\n\\n// superfilename:='';\\n\\noutrec2 xform (filelist l) := transform\\n\\ninputds:=DATASET('~file::ip::home::user::hpcc::lz_data::^New folder::'+path+'::'+l.name,rec,csv(SEPARATOR(''), TERMINATOR('\\\\n')));\\n\\n\\noutrec xfrm2(inputds l):=transform\\n SELF.stringdata:=l.line;\\n end;\\n\\t\\n\\tds:= project(inputds,xfrm2(left));\\n\\tself.child:=ds;\\nEnd;\\n\\nx := project(filelist,xform(left));\\n \\nR:=Record\\nString date;\\nEnd;\\n\\np:=pipe('date +%D%T',R,csv);\\n\\n\\nsuperfilename:=path;\\n\\noutputfilename:='~hppc::loganalysis::input::superfile::subfile'+superfilename+REGEXREPLACE(' ',REGEXREPLACE('/',REGEXREPLACE(':',p[1].date,'_'),'_'),'_'):independent;\\n\\nW:=output(x,,outputfilename,thor,overwrite);\\n\\nbasefile :='~hppc::loganalysis::input::superfile::superfile'+superfilename;\\n\\nu:=SEQUENTIAL(W,Std.File.CreateSuperFile(basefile,,true),\\nStd.File.StartSuperFileTransaction(),\\nStd.File.AddSuperFile(basefile,outputfilename),\\nStd.File.FinishSuperFileTransaction());\\n\\nO:=if(exists(filelist),u,output('file does not exist'));\\n\\ntest(string filename) := function\\nIP:='ip';\\nfromloctn := '/home/user/hpcc/lz_data/New folder/'+path+'/';\\ntoloctn := '/home/user/hpcc/lz_data/FlumeHpcc/';\\ns:=STD.File.MoveExternalFile(IP,fromloctn+filename,toloctn+filename);\\nreturn when(true,s);\\nend;\\n\\ntestrec := record\\nboolean t;\\nend;\\n\\ntestrec xfrm(filelist l) := transform\\nself.t:=test(l.name);\\nend;\\n\\nremovedfile := project(filelist,xfrm(left));\\nl:=output(removedfile);\\n\\nEXPORT sup:=SEQUENTIAL(O,l);\\n\\nEND;\\n\\n
\", \"post_time\": \"2013-11-20 03:10:14\" },\n\t{ \"post_id\": 4956, \"topic_id\": 1120, \"forum_id\": 10, \"post_subject\": \"Re: Context dependent error in function call\", \"username\": \"bforeman\", \"post_text\": \"Warnings are not errors, did you eventually see a result?\\nWarnings are messages that the compiler generates for a variety of reasons, sometimes it's just to alert you to a line of code that it thinks might be suspect. Other times it will analyze the entire workunit and make a recommendation to one of your definitions.\\n\\nSo did you get a result when you ran the workunit?\\n\\nBut looking at your errors, you attached the wrong code All of the warnings point to your TryMultiple.ECL definition. Is it possible to attach that?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-11-19 12:51:32\" },\n\t{ \"post_id\": 4952, \"topic_id\": 1120, \"forum_id\": 10, \"post_subject\": \"Context dependent error in function call\", \"username\": \"chhaya\", \"post_text\": \"hi,\\n\\nThis is my code\\n\\n
import std;\\nimport HPCCLogAnalysis;\\n\\n\\nFsFilenameRecord := RECORD\\nSTRING name; \\nUNSIGNED8 size; \\nSTRING19 modified; \\nEND;\\n\\nfile:=STD.File.RemoteDirectory('IP',\\n'/home/user/hpcc/lz_data/New folder',,true);\\n\\noutrec:=Record\\nstring dir;\\nend;\\n\\noutrec get_folder(file L):=Transform\\nself.dir:=STD.STr.SplitWords(L.name,'/')[1];\\nend;\\n\\ndirectories:=Dedup(Project(file,get_folder(Left)));\\n\\ntest(string filename) := function\\na:=exists(directories);\\ns:=output(HPCCLogAnalysis.trymultiple('/home/user/hpcc/lz_data/New folder/'+filename,filename));\\nreturn when(a,s);\\nend;\\n\\ntestrec := record\\nboolean t;\\nend;\\n\\ntestrec xfrm(directories l) := transform\\nself.t:=test(l.dir);\\nend;\\n\\nout := project(directories,xfrm(left));\\nout;
\\n\\nafter running it i am getting these errors\\n\\nWarning: Workflow item 'outputfilename' seems to be context dependent (47, 1 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\HPCCLogAnalysis\\\\trymultiple.ecl)\\nWarning: Global side-effect 'o' seems to be context dependent - it may not function as expected (58, 11 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\HPCCLogAnalysis\\\\trymultiple.ecl)\\nWarning: OUTPUT() appears to be context dependent - this may cause a dataset not active error (49, 4 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\HPCCLogAnalysis\\\\trymultiple.ecl\\nWarning: (0,0): error C4153: INTERNAL: Dataset is not active: 'left')\\n\\nWhat can be the problem??\", \"post_time\": \"2013-11-18 10:52:39\" },\n\t{ \"post_id\": 4964, \"topic_id\": 1122, \"forum_id\": 10, \"post_subject\": \"Re: Performance of SprayFixed over SprayVariable.\", \"username\": \"DSC\", \"post_text\": \"Both variable and XML spraying will be slower because HPCC does not split a single record over multiple nodes.\\n\\nRecord positions must be determined during the spray, which involves scanning the contents, and that is complicated by such things as character escaping and quoting (i.e. making sure what appears to be an end-of-record indicator is not escaped or sitting within quoted data).\\n\\nWhile I don't know the exact algorithm used by fixed-width spraying, there are certainly a large number of optimizations you can apply to that function if you know the size of each record ahead of time.\\n\\nBottom line, variable-record-width spraying will be slower because of the processing overhead. It's nice to know how much slower it may be, though. Good research.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-11-21 14:05:39\" },\n\t{ \"post_id\": 4963, \"topic_id\": 1122, \"forum_id\": 10, \"post_subject\": \"Re: Performance of SprayFixed over SprayVariable.\", \"username\": \"Allan\", \"post_text\": \"ok as no one replied I've done my own investigation.\\n\\nSprayFixed is about 3 times faster than SprayVariable.\\n\\nWith a 12.5Gb file.\\n\\nSprayVariable 8 Minutes\\nSprayFixed 3 Minutes\\n
\\n\\nWith a 24.3Gb file.\\n\\nSprayVariable 17 Minutes\\nSprayFixed 6 Minutes\\n
\\n\\nI don't have an explanation for why SprayVariable is so much slower, given the process should be IO bound not CPU bound.\", \"post_time\": \"2013-11-21 12:18:12\" },\n\t{ \"post_id\": 4958, \"topic_id\": 1122, \"forum_id\": 10, \"post_subject\": \"Performance of SprayFixed over SprayVariable.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nDoes anyone have any figures of the relative performace of SprayFixed over SprayVariable?\\n\\nI'm assuming here SprayFixed is quicker than SprayVariable.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2013-11-19 14:19:39\" },\n\t{ \"post_id\": 4969, \"topic_id\": 1124, \"forum_id\": 10, \"post_subject\": \"Re: Lookup specified on an unfiltered keyed join\", \"username\": \"bforeman\", \"post_text\": \"Using LOOKUP, you are telling the compiler to load the entire index onto each node and therefore execute an implicit local JOIN. The compiler is just reminding you that you are doing that and if the index is large is could actually cause a slowdown in your job performance if the index contents spill to disk. If you know your index is relatively small you can safely ignore the warning.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-11-22 15:11:09\" },\n\t{ \"post_id\": 4966, \"topic_id\": 1124, \"forum_id\": 10, \"post_subject\": \"Lookup specified on an unfiltered keyed join\", \"username\": \"srbhkmr\", \"post_text\": \"What does this warning mean?\\n Lookup specified on an unfiltered keyed join - was this intended?
\\n\\nMy join is between a dataset A and an indexedFile BIdx\\n\\n J := JOIN(A, BIdx, RIGHT.field1 = LEFT.field2 or RIGHT.field1 = LEFT.field1\\n doJoin(LEFT, RIGHT),\\n LEFT OUTER, \\n LOOKUP\\n );\\n\\n\\nThanks,\", \"post_time\": \"2013-11-22 07:53:11\" },\n\t{ \"post_id\": 5134, \"topic_id\": 1126, \"forum_id\": 10, \"post_subject\": \"Re: FETCH() failing on Thor cluster\", \"username\": \"srbhkmr\", \"post_text\": \"Hi Bob,\\n\\nI discarded that approach for the problem I was trying to solve, couldn't post the exact code that I was using then. \\n\\nIt was most probably the same issue as here [ viewtopic.php?f=10&t=1174 ], though I can't be certain.\\n\\nThanks,\", \"post_time\": \"2014-01-16 04:42:40\" },\n\t{ \"post_id\": 4972, \"topic_id\": 1126, \"forum_id\": 10, \"post_subject\": \"Re: FETCH() failing on Thor cluster\", \"username\": \"bforeman\", \"post_text\": \"Sounds like it might be an issue/bug, but we can't be sure unless we can see your code. As a test, what happens if you switch your target to hTHOR?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-11-25 14:24:45\" },\n\t{ \"post_id\": 4971, \"topic_id\": 1126, \"forum_id\": 10, \"post_subject\": \"FETCH() failing on Thor cluster\", \"username\": \"srbhkmr\", \"post_text\": \"While trying to use FETCH() on an indexed Dataset, it works fine in with individual queries, but throws a runtime error, when called from within a PROJECT-TRANSFORM function saying:\\n\\n\\n<Result>\\n<Exception><Source>eclagent</Source><Message>System error: 0: Graph[9], csvread[10]: SLAVE 10.10.1.106:20900: Global child graph? : Global acts = Graph(12): [csvfetch(14)]</Message></Exception>\\n</Result>\\n
\\n\\nWhat might be causing such an error? Processing is being done on a Thor cluster.\\nAny hints are appreciated. \\n\\nThanks,\", \"post_time\": \"2013-11-23 18:39:27\" },\n\t{ \"post_id\": 4997, \"topic_id\": 1131, \"forum_id\": 10, \"post_subject\": \"Re: Remove Warning from non-grouped data in TABLE cross-tab.\", \"username\": \"bforeman\", \"post_text\": \"OK, well, I don't think the warning can be suppressed, unless there is something in #OPTIONS that I might be missing.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-12-02 17:32:01\" },\n\t{ \"post_id\": 4996, \"topic_id\": 1131, \"forum_id\": 10, \"post_subject\": \"Re: Remove Warning from non-grouped data in TABLE cross-tab.\", \"username\": \"Allan\", \"post_text\": \"Hi Bob,\\n\\nYes, I just get errors whatever I try.\\nThe child dataset is just a concaternation of ROW's. \\n\\nThe Ref Manual just has 'GROUPED' on DATASET (struct)\\nNot much help in this context, though I'm prepared to be corrected.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2013-12-02 17:30:24\" },\n\t{ \"post_id\": 4990, \"topic_id\": 1131, \"forum_id\": 10, \"post_subject\": \"Re: Remove Warning from non-grouped data in TABLE cross-tab.\", \"username\": \"bforeman\", \"post_text\": \"Hi Allan,\\n\\nHave you tried using the GROUPED keyword on the DATASET statement?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-12-02 14:52:37\" },\n\t{ \"post_id\": 4979, \"topic_id\": 1131, \"forum_id\": 10, \"post_subject\": \"Remove Warning from non-grouped data in TABLE cross-tab.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI'm getting this warning from a TABLE cross-tab.\\n\\nWarning: Field 'bucket_responsetimes' in TABLE does not appear to be properly defined by grouping conditions.\\n
\\n\\nThe compiler is telling the truth, my child dataset is not grouped but the content of the child dataset is grouped, see code below:\\n\\n REAL delta_a := 1.0;\\n BOOLEAN Bracket(REAL lb,REAL delta=delta_a) := tlcomplete.response_time >= lb and tlcomplete.response_time < (lb+delta);\\n\\n CommonGather := RECORD\\n STRING10 Day := tlbase.Day;\\n STRING2 Hour := tlbase.Hour;\\n UNSIGNED Complete := COUNT(GROUP,tlbase.order_status_code = '100');\\n UNSIGNED Timeouts := COUNT(GROUP,tlbase.order_status_code = '404');\\n UNSIGNED Errors := COUNT(GROUP,tlbase.order_status_code = '401');\\n UNSIGNED TotalCnt := COUNT(GROUP);\\n DATASET(LayoutStats.ResponseTimes) Bucket_ResponseTimes\\n\\t\\t\\t\\t\\t := ROW({'0SEC_1SEC',COUNT(GROUP,Bracket(0.0))},LayoutStats.ResponseTimes)&\\n ROW({'1SEC_2SEC',COUNT(GROUP,Bracket(1.0))},LayoutStats.ResponseTimes)&\\n ROW({'2SEC_3SEC',COUNT(GROUP,Bracket(2.0))},LayoutStats.ResponseTimes)&\\n ROW({'3SEC_4SEC',COUNT(GROUP,Bracket(3.0))},LayoutStats.ResponseTimes)&\\n ROW({'4SEC_5SEC',COUNT(GROUP,Bracket(4.0))},LayoutStats.ResponseTimes)&\\n ROW({'5SEC_HSEC',COUNT(GROUP,Bracket(5.0,99999.0))},LayoutStats.ResponseTimes);\\n STRING8 Avg_All_ResponseTimes\\n\\t\\t\\t\\t\\t := REALFORMAT(AVE(GROUP,tlbase.response_time),8,3);\\n END;\\n\\n Gather := RECORD\\n STRING11 Account := tlbase.Account;\\n CommonGather;\\n END;\\n\\n GatherNoAcc := RECORD\\n STRING11 Account := '0';\\n CommonGather;\\n END;\\n\\n t := IF(SmallResult\\n ,TABLE(tlbase,Gather,tlbase.Account,tlbase.day,tlbase.Hour,LOCAL,FEW)\\n ,TABLE(tlbase,Gather,tlbase.Account,tlbase.day,tlbase.Hour,LOCAL));\\n tNoAcc := TABLE(tlbase,GatherNoAcc,tlbase.day,tlbase.Hour,LOCAL,FEW);\\n
\\nThe code is running and generating correct results but I would like to remove the warning but don't know how.\\n\\nAny ideas?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2013-11-29 10:25:15\" },\n\t{ \"post_id\": 4998, \"topic_id\": 1133, \"forum_id\": 10, \"post_subject\": \"Re: Ignoring SKEW errors\", \"username\": \"Allan\", \"post_text\": \"Hi Dustin,\\n\\nFEW works a treat Good one to remember.\\n\\nI owe you a pint.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2013-12-02 17:37:48\" },\n\t{ \"post_id\": 4989, \"topic_id\": 1133, \"forum_id\": 10, \"post_subject\": \"Re: Ignoring SKEW errors\", \"username\": \"dustinskaggs\", \"post_text\": \"If you know that the TABLE will only produce just a few records, you can use the FEW option on TABLE. This will allow it to do its processing without the need of a full global sort and therefore should avoid the skew issue.\\n\\n-Dustin\", \"post_time\": \"2013-12-02 14:39:57\" },\n\t{ \"post_id\": 4981, \"topic_id\": 1133, \"forum_id\": 10, \"post_subject\": \"Ignoring SKEW errors\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI have a very simple THOR query. I just need a list of distinct account_numbers from a large dataset.\\nI've tried various things, using SKEW(1.0,1.0) with various actions. The actual job is just:\\n
\\n IMPORT * FROM ProjectUK_Deltas;\\n#workunit('name','Find distinct accounts');\\n InTl := delta_files.DS_BASE_DELTA_TRANSLOG;\\n\\nr := RECORD \\n STRING11 Account_number := InTl.Account_number;\\n UNSIGNED Cnt := COUNT(GROUP);\\nEND;\\nTABLE(InTl,r,Account_number);\\n
\\nThe 'Cnt' is not actually needed but I've been playing around a lot trying to get this to run.\\nI've also tried vertical slice, sorting and deduping.\\nWhatever I do I get 'exceeded skew limit' errors.\\nThere are only about 20 accounts, a very small result set.\\n\\nAny ideas?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2013-11-29 11:58:35\" },\n\t{ \"post_id\": 5042, \"topic_id\": 1136, \"forum_id\": 10, \"post_subject\": \"Re: Add multiple Datasets\", \"username\": \"Allan\", \"post_text\": \"I've also found creating temporary superfiles very useful.\\n\\nSee heading 'Temporary SuperFiles' in the ECL Reference manual.\\nFor example, when I was working through the KJV Bible exercise I had 40 or so seperate files to process, one for each book in the bible, yet I was able to refer to all of them as one (actually three) DATASET definition(s):\\n\\n SHARED SetBooks1 := 'genesis,exodus,levit,numbers,Deuteronomy';\\n SHARED SetBooks2 := 'Joshua,Judges,Ruth,1Samuel,2Samuel,1Kings,2Kings,1Chronicles,2Chronicles,ezra,nehemiah,esther,job,psalms,proverbs,'\\n +'eccl,song,isaiah,jeremiah,lament,ezekiel,daniel,hosea,joel,amos,'\\n +'obadiah,jonah,micah,nathum,habakkuk,zeph,haggai,zech,malachi';\\n SHARED SetBooks3 := 'matthew,mark,luke,john,acts,romans,1Corinthians,2Corinthians,galatian,ephesian,philipp,colossians,'\\n +'1thess,2thess,1timothy,2timothy,titus,philemon,hebrews,james,1peter,2peter,'\\n +'1john,2john,3john,jude,rev';\\n\\n Biblet1 := DATASET(C.Root+'{'+SetBooks1+'}',$.Layout.Layout_Raw_Book,CSV(HEADING(2),SEPARATOR('')));\\n Biblet2 := DATASET(C.Root+'{'+SetBooks2+'}',$.Layout.Layout_Raw_Book,CSV(SEPARATOR('')));\\n Biblet3 := DATASET(C.Root+'{'+SetBooks3+'}',$.Layout.Layout_Raw_Book,CSV(SEPARATOR('')));\\n
\\n\\nYours\\n\\nAllan\", \"post_time\": \"2013-12-11 09:57:40\" },\n\t{ \"post_id\": 5017, \"topic_id\": 1136, \"forum_id\": 10, \"post_subject\": \"Re: Add multiple Datasets\", \"username\": \"bforeman\", \"post_text\": \"You may be looking for a simple append. When you say "return the dataset" you are actually returning a recordset. You can easily do this:\\n\\nCombinedRecordset := RecSetFilename1 + RecSetFilename2 + RecSetFilenameN;
\\nand then OUTPUT the CombinedRecordset to a new filename and then define it with a new DATASET statement.\\n\\nOf course, another avenue to explore would be superfiles, they were designed to merge multiple files of the same layout into a single logical file.\\n\\nI think either approach would be worthy of consideration.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-12-03 12:53:20\" },\n\t{ \"post_id\": 5011, \"topic_id\": 1136, \"forum_id\": 10, \"post_subject\": \"Add multiple Datasets\", \"username\": \"abhisr\", \"post_text\": \"Hi,\\n\\nI have a set of (n) file names with same layout and I wrote a function that takes each file name, does some logic to extract particular rows and returns the dataset . \\n\\nNow I want to add all these datasets for my n file names into a single dataset for further processing .\\n\\nHow can I achieve this using ECL ?\", \"post_time\": \"2013-12-02 20:06:56\" },\n\t{ \"post_id\": 5033, \"topic_id\": 1139, \"forum_id\": 10, \"post_subject\": \"Re: Large records forcing inefficient use of memory buffers?\", \"username\": \"bforeman\", \"post_text\": \"Is there a probability of this turning into a future issue? \\n
\\n\\nI don't think so. As you start to clean and analyze the data, you may also discover that those fields can be reduced in size during the transformation process.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-12-09 12:45:17\" },\n\t{ \"post_id\": 5032, \"topic_id\": 1139, \"forum_id\": 10, \"post_subject\": \"Re: Large records forcing inefficient use of memory buffers?\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"Hi Bob,\\n\\nToday, yet another program of mine gave the same warning. \\n\\nI did some troubleshooting and found that my layout has 3 description-like unicode fields of which one has been declared with a maxlength of 15000 bytes and this seems to be the culprit. If I leave this field out of my structure, my program runs fine. Unfortunately, I can't reduce the width of this field - the file was generated by someone else and I'm only using it. \\n\\nIs there a probability of this turning into a future issue? \\n\\nRegards,\\nGayathri\", \"post_time\": \"2013-12-09 11:29:07\" },\n\t{ \"post_id\": 5031, \"topic_id\": 1139, \"forum_id\": 10, \"post_subject\": \"Re: Large records forcing inefficient use of memory buffers?\", \"username\": \"bforeman\", \"post_text\": \"Hi Gayathri,\\n\\nA warning is simply a warning, and it's telling you that the excessive use of the memory buffers could slow your process down. I'm curious, how exactly were you using OUTPUT in your code?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-12-06 20:56:35\" },\n\t{ \"post_id\": 5029, \"topic_id\": 1139, \"forum_id\": 10, \"post_subject\": \"Large records forcing inefficient use of memory buffers??\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"During execution of one of my ECL programs, I get a warning that says\\n\\nWarning: Graph[1], split[5]: SLAVE <some IP>: Large records forcing inefficient use of memory buffers in split\\n\\nIs this something I should be concerned about? \\n\\nIn my code, I just had a series of transforms and joins with some intermittent output statements. This warning stopped when I commented a couple of those output statements. \\n\\nRegards,\\nGayathri\", \"post_time\": \"2013-12-06 09:56:15\" },\n\t{ \"post_id\": 5059, \"topic_id\": 1142, \"forum_id\": 10, \"post_subject\": \"Re: Inconsistent use of 'Constants Strings' in workflow serv\", \"username\": \"Allan\", \"post_text\": \"Hi Richard,\\n\\nI don''t claim authorship of the idea, I just rummaged around the repository as others must have hit the same issue.\\n\\nThe prerequisite WU, has its ECL constructed dynamically, e.g:\\n\\nfLaunchBuild(BOOLEAN isCanned = FALSE)\\t:= function\\n bldType := IF(isCanned,'true','false');\\n rundate := stringlib.getDateYYYYMMDD(); \\t\\n ECLText\\t:=\\t'#workunit(\\\\'name\\\\', \\\\'Basic Build ' + rundate + '\\\\');\\\\n'\\n\\t\\t +\\t'#workunit(\\\\'priority\\\\',\\\\'high\\\\');\\\\n'\\n\\t\\t +\\t'#workunit(\\\\'priority\\\\',10);\\\\n'\\n\\t\\t +\\t'Build(nDate,true,true,'+bldType+') '\\n + ' : SUCCESS(NOTIFY(\\\\'Consolidated Build\\\\',\\\\''+bldType+rundate +'\\\\'));\\\\n',\\n return fSubmitNewWorkunit(ECLText,BuildWorkunitCluster,BuildWorkunitQueue);\\nend;\\n
\\n\\nThe handy 'fSubmitNewWorkunit' does the SOAPCALL to a workflow service.\\nNote the NOTIFY on successful completion is now specific to the days build and the type of bulid be being done.\\n\\nThe dependent WU is already in a WAIT state:\\n\\nEXPORT BatchGo(BOOLEAN isCanned = FALSE) := FUNCTION\\n\\n bldType := IF(isCanned,'true','false');\\n rundate := stringlib.getDateYYYYMMDD(); \\t\\n\\n ECLText:= '#workunit(\\\\'name\\\\', \\\\'NCD ('+IF(isCanned,'Canned','Live')+') Dependent Build ' + rundate + '\\\\');\\\\n'\\n\\t +\\t'#workunit(\\\\'priority\\\\',\\\\'high\\\\');\\\\n'\\n\\t +\\t'#workunit(\\\\'priority\\\\',10);\\\\n'\\n\\t +\\tIF(isCanned,\\n\\t 'ContributionLoadUKPD.Actions(\\\\'' + rundate + '\\\\',TRUE).BuildNCDBaseAndKey(ContributionLoadUKPD.KeyHelper(FALSE,TRUE).FilteredKey) ',\\n\\t\\t 'ContributionLoadUKPD.Actions(\\\\'' + rundate + '\\\\').BuildNCDBaseAndKey(ContributionLoadUKPD.KeyHelper().FilteredKey) '\\n\\t\\t )\\n + ' : WHEN(EVENT(\\\\'Consolidated Build\\\\',\\\\''+bldType+rundate+'\\\\',COUNT(1))) , SUCCESS(NOTIFY(\\\\'Yet Another Build\\\\',\\\\''+rundate+'\\\\'));\\\\n';\\n\\n RETURN fSubmitNewWorkunit(ECLText, BuildWorkunitCluster, BuildWorkunitQueue);\\nEND;\\n
\\n\\nThis is not only waiting on the first WU' EVENT to fire its also firing its own event to daisy chain a whole series of Builds.\\n\\nYours\", \"post_time\": \"2013-12-17 11:59:16\" },\n\t{ \"post_id\": 5053, \"topic_id\": 1142, \"forum_id\": 10, \"post_subject\": \"Re: Inconsistent use of 'Constants Strings' in workflow serv\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nThat sounds like an interesting approach.\\n\\nCan you post some example code to show exactly how you've done it?\\n\\nRichard\", \"post_time\": \"2013-12-12 19:51:31\" },\n\t{ \"post_id\": 5052, \"topic_id\": 1142, \"forum_id\": 10, \"post_subject\": \"Re: Inconsistent use of 'Constants Strings' in workflow serv\", \"username\": \"Allan\", \"post_text\": \"I've implemented dynamic event names and SubTypes, by constructing the ECL to execute at run time, then using a SOAPCALL to schedule the WU.\", \"post_time\": \"2013-12-12 16:35:23\" },\n\t{ \"post_id\": 5039, \"topic_id\": 1142, \"forum_id\": 10, \"post_subject\": \"Inconsistent use of 'Constants Strings' in workflow services\", \"username\": \"Allan\", \"post_text\": \"Hi,\\nI have Multiple WU's to run that are dependent on a single WU. I want to ensure that the events are uniquly distinguished, so, for example, a build for the 10th Dec does not cause a dependent WU to start that should be processing 9th Dec data.\\n\\nIts documented that WU services take 'constant strings', which seems to me to be quite a restriction, anyway battling on I seem to be able to compile and run expressions that include NOTIFY, but not expressions including EVENT. Can someone explain this strange behaviour:\\nThe Event strings:\\n\\nEXPORT Evnt := MODULE\\n\\n EXPORT Id := ENUM(UNSIGNED2,CONSOLIDATED_BUILD_CANNED=1,CONSOLIDATED_BUILD_LIVE=2);\\n\\n EXPORT EventInformation := RECORD\\n STRING Name;\\n STRING SubType;\\n END;\\n\\n EXPORT EventInformation E(Id itm) := CASE(itm\\n , Id.CONSOLIDATED_BUILD_CANNED => ROW({'Consolidated Build','Canned'},EventInformation)\\n , Id.CONSOLIDATED_BUILD_LIVE => ROW({'Consolidated Build','Live'} ,EventInformation)\\n , ROW({'None','None'},EventInformation));\\nEND;\\n
\\nThe prerequisite code that dependent WU's depend upon. (this compiles and runs ok)\\n\\nEXPORT Prerequisite := MODULE\\n\\n EXPORT Run(STRING pDate, BOOLEAN isCannedData) := FUNCTION\\n IMPORT * FROM TESTAREA;\\n \\n e := Evnt.E(IF(isCannedData,\\n Evnt.Id.CONSOLIDATED_BUILD_CANNED,\\n Evnt.Id.CONSOLIDATED_BUILD_LIVE)\\n );\\n p := output('SomeText') : SUCCESS(NOTIFY(e.name,e.SubType+pDate));\\n RETURN p;\\n END;\\n\\nEND;\\n
\\nThe Dependent code, that fails to compile (expected constant event name and filter)\\nrd:='20131209';\\n#workunit('name','dependent Live '+rd);\\n\\ne := TESTAREA.Evnt.e(UKServices_Utilities.UKEvents.Id.CONSOLIDATED_BUILD_LIVE);\\nOUTPUT('test live'+rd+' DONE') : WHEN (EVENT(e.Name,e.SubType+rd),COUNT(1));\\n
\\nWhy does the NOTIFY not fail with the same error? In fact runs successfully and triggers dependent tasks setup thus:\\n\\nrt:='Live';\\nrd:='20131209';\\n#workunit('name','dependent '+rt+' '+rd);\\nOUTPUT('test '+rt+' '+rd+' DONE') : WHEN (EVENT('Consolidated Build',rt+rd),COUNT(1));\\n
\\n\\nA general question how does one make EVENT's fire for a particular day? (CRON EVENTS do not help here)\\n\\nYours\\n\\nAllan\", \"post_time\": \"2013-12-10 15:21:51\" },\n\t{ \"post_id\": 5044, \"topic_id\": 1145, \"forum_id\": 10, \"post_subject\": \"Re: 'Joined' result sort order\", \"username\": \"rtaylor\", \"post_text\": \"Gayathri,\\n\\nYou're doing it correctly by sorting for the order you need after the JOIN. \\n\\nAs the JOIN docs state:JOIN dynamically sorts/distributes the leftrecset and rightrecset as needed to perform its operation based on the condition specified, therefore the output record set is not guaranteed to be in the same order as the input record sets. If JOIN does do a dynamic sort of its input record sets, that new sort order cannot be relied upon to exist past the execution of the JOIN.
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-12-11 18:12:30\" },\n\t{ \"post_id\": 5043, \"topic_id\": 1145, \"forum_id\": 10, \"post_subject\": \"'Joined' result sort order\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"I have 2 datasets, each sorted on different fields. While joining them, I find the result set to be in no particular order. Is it possible for me to enforce an order upon the results? Presently, I'm explicitly sorting the result set after join. \\n\\nThanks,\\nGayathri\", \"post_time\": \"2013-12-11 16:13:24\" },\n\t{ \"post_id\": 5063, \"topic_id\": 1146, \"forum_id\": 10, \"post_subject\": \"Re: Transform options\", \"username\": \"sameermsc\", \"post_text\": \"Hi Gayathri,\\n\\ncheck if this works for you\\n\\n\\nRec := RECORD\\n\\tUNSIGNED1 RecID;\\n\\tSTRING1 Val;\\nEND;\\n\\t \\nDS1 := DATASET([{1, 'A'},\\n\\t\\t\\t\\t\\t\\t\\t\\t{2, 'A'},\\n\\t\\t\\t\\t\\t\\t\\t\\t{3, 'B'},\\n\\t\\t\\t\\t\\t\\t\\t\\t{4, 'A'},\\n\\t\\t\\t\\t\\t\\t\\t\\t{5, 'E'}],Rec);\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t \\nDS2 := DATASET([{1, 'A'},\\n\\t\\t\\t\\t\\t\\t\\t\\t{2, 'B'},\\n\\t\\t\\t\\t\\t\\t\\t\\t{3, 'B'},\\n\\t\\t\\t\\t\\t\\t\\t\\t{4, 'A'},\\n\\t\\t\\t\\t\\t\\t\\t\\t{5, 'D'}],Rec);\\n\\t\\t\\t\\t\\t\\t\\t\\t\\nOutRec := RECORD\\n\\tUNSIGNED1 RecID_L;\\n\\tUNSIGNED1 RecID_R;\\n\\tSTRING1 Val;\\nEND;\\n\\nOutRec XF(Rec L, Rec R) := TRANSFORM\\n\\tSELF.RecID_L := if(L.RecID > r.recid, skip, l.recid);\\n\\tSELF.RecID_R := R.RecID;\\n\\tSELF := L;\\nEND;\\n\\nj1 := join(ds1, ds2, left.val = right.val, xf(left, right));\\n\\nj2 := dedup(j1, right.recid_l = left.recid_l and right.recid_r > left.recid_r);\\n\\nj3 := dedup(j2, right.recid_l <= left.recid_r and right.recid_r <= left.recid_r);\\nj2; \\nj3;\\n
\\n\\nLet me know if there are any other scenarios for which it is failing\\n\\nRegards,\\nSameer\", \"post_time\": \"2013-12-18 11:13:16\" },\n\t{ \"post_id\": 5062, \"topic_id\": 1146, \"forum_id\": 10, \"post_subject\": \"Re: Transform options\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"For the following example, it doesn't work (this is a possible scenario for my data):\\n\\nDS1 := DATASET([{1, 'A'},\\n {2, 'A'},\\n {3, 'B'},\\n\\t {4, 'A'},\\n {5, 'E'}],Rec);\\n \\nDS2 := DATASET([{1, 'A'},\\n {2, 'B'},\\n {3, 'B'},\\n {4, 'A'},\\n {5, 'D'}],Rec);
\\n\\nExpected output is \\nLId V RId\\n1 A 1\\n2 A 4
\\n\\nEach successive match on the right should start from the record after the last matched record till the end of the set. So, the 2nd A in left would match with 4th A in right. For 3rd B, we would start matching from record 5 and since there is no match, it would be ignored. The same is the case with 4th and 5th records in LHS.\\n\\nRegards,\\nGayathri\", \"post_time\": \"2013-12-18 10:00:16\" },\n\t{ \"post_id\": 5058, \"topic_id\": 1146, \"forum_id\": 10, \"post_subject\": \"Re: Transform options\", \"username\": \"rtaylor\", \"post_text\": \"Gayathri,\\n\\nNested transforms would be more complex than is actually required to accomplish this. \\nHere's some simpler code, just using SORT and JOIN and DEDUP, that accomplishes your purpose:Rec := RECORD\\n UNSIGNED1 RecID;\\n STRING1 Val;\\nEND;\\n\\t\\nDS1 := DATASET([{1, 'A'},\\n {3, 'B'},\\n {4, 'C'},\\n {2, 'A'},\\n {5, 'E'}],Rec);\\n\\t\\t\\t\\t\\t\\t\\t\\t\\nDS2 := DATASET([{1, 'A'},\\n {2, 'B'},\\n {4, 'C'},\\n {3, 'B'},\\n {5, 'D'}],Rec);\\nOutRec := RECORD\\n UNSIGNED1 RecID_L;\\n UNSIGNED1 RecID_R;\\n STRING1 Val;\\nEND;\\n\\nOutRec XF(Rec L, Rec R) := TRANSFORM\\n SELF.RecID_L := L.RecID;\\n SELF.RecID_R := R.RecID;\\n\\tSELF := L;\\nEND;\\n\\nsds1 := SORT(ds1,Val,RecID); //define the relevant sort order\\nsds2 := SORT(ds2,Val,RecID);\\nj1 := JOIN(sds1,sds2,LEFT.Val = RIGHT.Val,XF(LEFT,RIGHT));\\nj1;\\n\\nsj1 := SORT(j1,val,RecID_L,RecID_R); //ensure the sort order for DEDUP\\nd1 := DEDUP(sj1,LEFT.Val = RIGHT.Val AND \\n ((LEFT.RecID_L = RIGHT.RecID_L AND LEFT.RecID_R <> RIGHT.RecID_R) OR \\n (LEFT.RecID_L <> RIGHT.RecID_L AND LEFT.RecID_R = RIGHT.RecID_R)));\\t\\n\\t\\t\\t\\t\\t\\t\\t\\t\\nd1;
\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-12-16 19:29:49\" },\n\t{ \"post_id\": 5055, \"topic_id\": 1146, \"forum_id\": 10, \"post_subject\": \"Transform options\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"I have 2 datasets like this\\nDS1:\\nNo\\tVal\\n1\\tA\\n2\\tA\\n3\\tB\\n4\\tC\\n5\\tE
\\n\\nDS2:\\nNo\\tVal\\n1\\tA\\n2\\tB\\n3\\tB\\n4\\tC\\n5\\tD
\\n\\nMy requirement is to match datasets from both sides on 'Val', in exactly the same order in which they appear. \\n\\nThat is, the 1st A matches with first record A on RHS. For the 2nd LHS A, matching should start from the beginning of unmatched set ie, from 2nd record of RHS and since this is a B, there is no match. It would continue till the end of RHS set to see if there are more As. If there is none, the LHS A is considered unmatched and ignored. The iteration continues for each record of LHS.\\n\\nAfter matching, my output should be\\n\\nLNo\\tVal\\tRNo\\t\\n1\\t A\\t 1\\n3\\t B\\t 2\\n4\\t C\\t 4\\n
\\n\\nTo achieve this, I was thinking of using nested transforms (yet to try though! Is there a better way?) but to do this, I should be able to \\n i) skip looping over remaining records in RHS if a match is achieved (something like a 'break' construct available in some programming languages). Is this possible?
\\n\\n ii) skip records from the beginning till some index and then start the iteration (to ensure matches are done in the order of appearance). I know there is a SKIP option in transform but can it take an expression of the form 'SKIP C <= 5'?
\\nRegards,\\nGayathri\", \"post_time\": \"2013-12-16 14:48:59\" },\n\t{ \"post_id\": 5675, \"topic_id\": 1151, \"forum_id\": 10, \"post_subject\": \"Re: K-Means clustering giving Seg Fault.\", \"username\": \"david.wheelock\", \"post_text\": \"The ToField macro requires a record-level UID. If one is not specified as the third parameter, it assumes the first numeric field it finds is the UID, and all subsequent fields are the data. You can read the comments at the top of the ToField macro for more information.\", \"post_time\": \"2014-05-09 17:24:42\" },\n\t{ \"post_id\": 5634, \"topic_id\": 1151, \"forum_id\": 10, \"post_subject\": \"Re: K-Means clustering giving Seg Fault.\", \"username\": \"sailorconan1\", \"post_text\": \"?\", \"post_time\": \"2014-05-05 13:40:24\" },\n\t{ \"post_id\": 5625, \"topic_id\": 1151, \"forum_id\": 10, \"post_subject\": \"Re: K-Means clustering giving Seg Fault.\", \"username\": \"sailorconan1\", \"post_text\": \"I tried to reduce the number of column to 3, then the output only has two columns of data. The first column is missing in both cases. Should I leave the first column blank?\", \"post_time\": \"2014-05-02 17:09:45\" },\n\t{ \"post_id\": 5624, \"topic_id\": 1151, \"forum_id\": 10, \"post_subject\": \"Re: K-Means clustering giving Seg Fault.\", \"username\": \"sailorconan1\", \"post_text\": \"Got some weird ouput here:\\n\\nIMPORT ML;\\nIMPORT csvtest;\\n//grade\\tsize\\tnodes\\tage\\n\\ntestSize := 16;\\n\\ncentroids := DATASET([{1,10,1,10}, {9,999,99,80}], {UNSIGNED4 grade, UNSIGNED4 size, UNSIGNED4 nodes, UNSIGNED4 age});\\nML.ToField(centroids, o2);\\nML.ToField(csvtest.File_factor,o1);\\nOUTPUT(csvtest.File_factor);\\nKMeans := ML.Cluster.KMeans(o1, o2, 10, .01, ML.Cluster.DF.QEuclidean);\\n\\nKmeans.AllResults();
\\n\\nI have 4D data, but output is just 3D, why is that?\", \"post_time\": \"2014-05-02 17:00:04\" },\n\t{ \"post_id\": 5623, \"topic_id\": 1151, \"forum_id\": 10, \"post_subject\": \"Re: K-Means clustering giving Seg Fault.\", \"username\": \"sailorconan1\", \"post_text\": \"Got the answer: \\nThis will produce a table with a layout similar to NumericField, but instead of a single value field, we have a field\\nnamed “values” which is a set of values.\\nEach row will have the same number of values in this set, which is equal to the number of iterations + 1. Values[1]\\nis the initial value for the id/number combination, Values[2] is after the first iteration, etc.\", \"post_time\": \"2014-05-02 15:43:59\" },\n\t{ \"post_id\": 5605, \"topic_id\": 1151, \"forum_id\": 10, \"post_subject\": \"Re: K-Means clustering giving Seg Fault.\", \"username\": \"sailorconan1\", \"post_text\": \"If I have 3D data, is it going to output 4 columns (1: serial number, 2,3,4: coordinate)?\", \"post_time\": \"2014-05-01 15:45:12\" },\n\t{ \"post_id\": 5571, \"topic_id\": 1151, \"forum_id\": 10, \"post_subject\": \"Re: K-Means clustering giving Seg Fault.\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"The 1st field in the output is a serial number. \\nThe 2nd and 3rd fields are final coordinates for your 10 centroids after 'K-means clustering algorithm' has been run on your test data. \\n\\nGayathri\", \"post_time\": \"2014-04-29 06:32:26\" },\n\t{ \"post_id\": 5568, \"topic_id\": 1151, \"forum_id\": 10, \"post_subject\": \"Re: K-Means clustering giving Seg Fault.\", \"username\": \"sailorconan1\", \"post_text\": \"I didn't change anything on that code except the size. I attached my output. Can you take a look?\", \"post_time\": \"2014-04-28 21:41:32\" },\n\t{ \"post_id\": 5566, \"topic_id\": 1151, \"forum_id\": 10, \"post_subject\": \"Re: K-Means clustering giving Seg Fault.\", \"username\": \"tlhumphrey2\", \"post_text\": \"I have no idea what this output might be. I've looked at the output of all the KMeans functions for a small dataset I created. But, none look like your output.\\n\\nIf you could attach your code and a few lines of your input file, that would help.\\n\\nHave your looked at the machine learning reference manual? It might help you. You can get it at http://hpccsystems.com/download/docs/machine-learning\", \"post_time\": \"2014-04-28 19:41:12\" },\n\t{ \"post_id\": 5563, \"topic_id\": 1151, \"forum_id\": 10, \"post_subject\": \"Re: K-Means clustering giving Seg Fault.\", \"username\": \"sailorconan1\", \"post_text\": \"1\\t1\\t0.5876287425149699\\n2\\t1\\t1.80389010989011\\n3\\t1\\t3.332957446808511\\n4\\t1\\t6.409630498533728\\n5\\t1\\t16.46696843434344\\n6\\t1\\t46.64989302325586\\n7\\t1\\t120.8805169386826\\n8\\t1\\t268.3272349837522\\n9\\t1\\t506.6706276243614\\n10\\t1\\t824.0742471520571\\n\\n\\nI think it is the result from Kmeans.result().\\n\\nWhat does it mean?\", \"post_time\": \"2014-04-28 18:13:57\" },\n\t{ \"post_id\": 5561, \"topic_id\": 1151, \"forum_id\": 10, \"post_subject\": \"Re: K-Means clustering giving Seg Fault.\", \"username\": \"tlhumphrey2\", \"post_text\": \"This is the output of which of these kmeans function:\\n
KMeans.Allegiance\\nKMeans.Allegiances\\nKMeans.AllResults\\nKMeans.Convergence\\nKMeans.Delta\\nKMeans.DistanceDelta.\\nKMeans.Result\\n
\", \"post_time\": \"2014-04-28 17:35:20\" },\n\t{ \"post_id\": 5557, \"topic_id\": 1151, \"forum_id\": 10, \"post_subject\": \"Re: K-Means clustering giving Seg Fault.\", \"username\": \"sailorconan1\", \"post_text\": \"Can anyone explain the output?\\n\\n1\\t1\\t0.5876287425149699\\n2\\t1\\t1.80389010989011\\n3\\t1\\t3.332957446808511\\n4\\t1\\t6.409630498533728\\n5\\t1\\t16.46696843434344\\n6\\t1\\t46.64989302325586\\n7\\t1\\t120.8805169386826\\n8\\t1\\t268.3272349837522\\n9\\t1\\t506.6706276243614\\n10\\t1\\t824.0742471520571\", \"post_time\": \"2014-04-28 16:17:52\" },\n\t{ \"post_id\": 5092, \"topic_id\": 1151, \"forum_id\": 10, \"post_subject\": \"Re: K-Means clustering giving Seg Fault.\", \"username\": \"lokesh\", \"post_text\": \"I ran the code on the 4.2.2 version and it runs fine.\\n\\nSo I believe that solves the issus.\\n\\nThanks a lot for your help.\", \"post_time\": \"2013-12-24 13:39:37\" },\n\t{ \"post_id\": 5083, \"topic_id\": 1151, \"forum_id\": 10, \"post_subject\": \"Re: K-Means clustering giving Seg Fault.\", \"username\": \"lokesh\", \"post_text\": \"Can you just confirm that W20131220-121542 was the job this issue refers to ?\\n
\\nYes, it refers to the same workunit.\\n\\nBtw, is this the same cluster that srbhkmr and topic viewtopic.php?t=1152 uses ?
\\nYes, it is the same cluster configuration.\\n\\nAnyway, HPCC-10374 is fixed in 4.2.2, the current release candidate for which is available from http://hpccsystems.com/download/release-candidates\\n\\nLet me know if that solves the issue.
\\n\\nI'll install the new release and will update soon on the outcome.\\n\\nThanks a lot for your help.\", \"post_time\": \"2013-12-20 13:16:07\" },\n\t{ \"post_id\": 5082, \"topic_id\": 1151, \"forum_id\": 10, \"post_subject\": \"Re: K-Means clustering giving Seg Fault.\", \"username\": \"jsmith\", \"post_text\": \"Hi,\\n\\nfrom the ThorSlave-Failed.log - I see workunit W20131220-121542 failed as a result of bug HPCC-10374.\\n( for some reason the log continues and captures the next job too )\\n\\nCan you just confirm that W20131220-121542 was the job this issue refers to ?\\n\\nBtw, is this the same cluster that srbhkmr and topic viewtopic.php?t=1152 uses ?\\n\\nAnyway, HPCC-10374 is fixed in 4.2.2, the current release candidate for which is available from http://hpccsystems.com/download/release-candidates\\n\\nLet me know if that solves the issue.\", \"post_time\": \"2013-12-20 10:33:07\" },\n\t{ \"post_id\": 5080, \"topic_id\": 1151, \"forum_id\": 10, \"post_subject\": \"Re: K-Means clustering giving Seg Fault.\", \"username\": \"lokesh\", \"post_text\": \"Thanks for the reply.\\n\\nthis is before the segfault and the 1st sign of error afaics.
\\nI looked at the log file and couldn't figure out anything as to why this is happening.\\nAny pointers on why does this error occur ?\\n\\nI am just using the facilities provided in the ECL/ML library.\\n\\nFor reference I am also attaching couple of log files (compressed).\\nOne is when the process is completed with number of iterations = 10.\\nSecond is when the process gives error with number of iterations = 20.\\n\\nThanks,\", \"post_time\": \"2013-12-20 09:06:01\" },\n\t{ \"post_id\": 5069, \"topic_id\": 1151, \"forum_id\": 10, \"post_subject\": \"Re: K-Means clustering giving Seg Fault.\", \"username\": \"jsmith\", \"post_text\": \"At 1st I thought this was a symptom of HPCC-10374, but whilst I think the segfault in the log (reported as the MP link closed error) may well be because of HPCC-10374, it looks like this job was aborted 1st and then crashed trying to wind up.\\n\\nfrom the slave log :\\n00003D62 2013-12-19 16:13:13.857 26826 26826 "GraphAbort: W20131219-160024graph1"\\n
\\n.. this is before the segfault and the 1st sign of error afaics.\", \"post_time\": \"2013-12-19 15:45:59\" },\n\t{ \"post_id\": 5067, \"topic_id\": 1151, \"forum_id\": 10, \"post_subject\": \"K-Means clustering giving Seg Fault.\", \"username\": \"lokesh\", \"post_text\": \"[attachment=0:2jm8lhjt]ThorSlave.logHi everyone,\\n\\nI am using kmeans clustering and it shows very peculiar behavior.\\nHere is the code I used\\n\\nIMPORT ML;\\n\\ntestSize := 1500000;\\na1 := ML.Distribution.Uniform(0,1000,100000);\\n\\ntestData := ML.Distribution.GenData(testSize, a1, 1);\\n\\ncentroids := DATASET([{1,1}, {2,2}, {3,3}, {4,4}, {5,5}, {6,6}, {7,7}, {8,8}, {9,9},{10,10}], {UNSIGNED2 id, REAL4 value});\\n\\nML.ToField(centroids, o2);\\n\\nKMeans := ML.Cluster.KMeans(testData, o2, 10, .01,ML.Cluster.DF.QEuclidean);\\n\\nKmeans.convergence;\\nKmeans.result();
\\n\\nSo in the above code, if I change the number of iterations to 75, it gives MP link closed error but for 10 iterations, runs fine.\\n\\nI am using Enterprise Services Platform community_4.2.0-1 and twelve slave process on two nodes.\\n\\nAny pointers as to what is causing this ?\\n\\nPS: I have attached log file for single thor slave(trimmed), if that helps.\", \"post_time\": \"2013-12-19 12:24:31\" },\n\t{ \"post_id\": 5091, \"topic_id\": 1152, \"forum_id\": 10, \"post_subject\": \"Re: ML Associate.ecl code failing\", \"username\": \"srbhkmr\", \"post_text\": \"Yes, looks like it was the same issue as you quoted [HPCC-10374].\\nI reran it on v4.2.2 and it runs just fine.\\n\\nThanks a lot.\", \"post_time\": \"2013-12-24 12:59:13\" },\n\t{ \"post_id\": 5081, \"topic_id\": 1152, \"forum_id\": 10, \"post_subject\": \"Re: ML Associate.ecl code failing\", \"username\": \"jsmith\", \"post_text\": \"Thanks, good that thorslave log from PID 20061 has proven that this is a symptom of HPCC-10374 which has been fixed and in the 4.2.2.\\n\\nYou can get the latest 4.2.2 release candidate from : http://hpccsystems.com/download/release-candidates\", \"post_time\": \"2013-12-20 10:09:57\" },\n\t{ \"post_id\": 5079, \"topic_id\": 1152, \"forum_id\": 10, \"post_subject\": \"Re: ML Associate.ecl code failing\", \"username\": \"srbhkmr\", \"post_text\": \"... and log files for slave processes 10 11 and 12.\\n\\nThanks,\", \"post_time\": \"2013-12-20 07:48:24\" },\n\t{ \"post_id\": 5078, \"topic_id\": 1152, \"forum_id\": 10, \"post_subject\": \"Re: ML Associate.ecl code failing\", \"username\": \"srbhkmr\", \"post_text\": \"logs of slave processes 7 8 and 9.\", \"post_time\": \"2013-12-20 07:47:01\" },\n\t{ \"post_id\": 5077, \"topic_id\": 1152, \"forum_id\": 10, \"post_subject\": \"Re: ML Associate.ecl code failing\", \"username\": \"srbhkmr\", \"post_text\": \"Please find other log files in here.\", \"post_time\": \"2013-12-20 07:45:02\" },\n\t{ \"post_id\": 5076, \"topic_id\": 1152, \"forum_id\": 10, \"post_subject\": \"Re: ML Associate.ecl code failing\", \"username\": \"srbhkmr\", \"post_text\": \"[attachment=0:3u5om3nz]ThorSlave-4.log.gz\\n\\nYes, I tried attaching all 12 log files but the size exceeded the upper limit.\\nPlease find the log file for PID = 20061 in the attachment which has the backtrace.\\n\\nThanks,\", \"post_time\": \"2013-12-20 07:08:46\" },\n\t{ \"post_id\": 5071, \"topic_id\": 1152, \"forum_id\": 10, \"post_subject\": \"Re: ML Associate.ecl code failing\", \"username\": \"jsmith\", \"post_text\": \"I may need to see the other thorslave logs. Your zip contains 2 out of the 12 slave logs by the looks of it. (2 physical nodes, with 6 slaves per node (slavesPerNode=6))\\n\\nIn particular, I'd like to see the thor log that correspond to PID = 20061. \\n(1 of the thor logs captured that it crashed)\\n\\n000006D9 2013-12-17 17:24:05.009 15339 15483 "KERN_INFO: [1899393.846883] thorslave_lcr[20061]: segfault at 0 ip 00007f7cf63c1da0 sp 00007f7bf17f9af0 error 4 in libthorsort_lcr.so[7f7cf63a3000+2d000]"\\n
\\n\\nPlease attach all thorslave logs if possible though.\\nThanks.\", \"post_time\": \"2013-12-19 18:21:18\" },\n\t{ \"post_id\": 5068, \"topic_id\": 1152, \"forum_id\": 10, \"post_subject\": \"ML Associate.ecl code failing\", \"username\": \"srbhkmr\", \"post_text\": \"One of the problem we have is while doing the Frequent Itemset mining using the EclatN ECL routine the workunit fails with an error message saying -\\n'System error: 10056: Watchdog has lost contact with Thor slave: 10.10.1.106:20900 (Process terminated or node down?)'
\\nThe log files record a segfault happening somewhere in libthorsort_lcr.so library.\\n\\nThe ECL code is simply:\\n\\n\\nIMPORT * FROM ML;\\nIMPORT * FROM recDefinitions;\\n\\nA := DATASET('~datasetFile', recDefinitions.eclatInput_layout, CSV(HEADING(1)));\\n\\nML.AppendId(A, myid, A_withId);\\nML.ToField(A_withId, o4);\\nForAssoc := PROJECT(o4, ML.Types.ItemElement);\\n\\nAsso := ML.Associate(ForAssoc, 125000);\\n\\nFqItems := ASSO.EclatN(9, 2);\\nCHOOSEN(FqItems, 1000);\\nOUTPUT(COUNT(FqItems), NAMED('FqItemsets_Count'));\\n//OUTPUT(FqItems,,'~EclatN::freqItemsets_125KSupp', CSV(HEADING(SINGLE)));\\n\\nRules := ASSO.Rules(FqItems);\\nCHOOSEN(Rules, 1000);\\n//OUTPUT(Rules,,'~EclatN::Rules_10KSupp', CSV(HEADING(SINGLE)));\\n
\\n\\nThe input dataset has around 4.5 million rows and 9 columns.\\nProgram works fine upto a minimum support of 150K but as I lower it further to 125K the workunit fails.\\nI understand that output result size is exponential in nature but the workunit shouldn't fail as long as there is available space on nodes.\\n\\n\\nOther technical details are as following:\\n\\nHPCC Platform: community_4.2.0-1\\nconfig: 12slave processes on 2 slave nodes.\\ncode Associate.ecl comes from : https://github.com/hpcc-systems/ecl-ml/ ... ociate.ecl\\n\\nAttached are the log files of master and slave processes.\\n\\nAny help is appreciated\\nThanks,\", \"post_time\": \"2013-12-19 12:29:10\" },\n\t{ \"post_id\": 5105, \"topic_id\": 1164, \"forum_id\": 10, \"post_subject\": \"Re: How to merge column of two different DATSETS\", \"username\": \"shank\", \"post_text\": \"Thanks sameer, That worked the way i wanted it to. \\n\\n\\nregards,\\nShank\", \"post_time\": \"2014-01-07 11:47:33\" },\n\t{ \"post_id\": 5104, \"topic_id\": 1164, \"forum_id\": 10, \"post_subject\": \"Re: Howw to merge column of two different DATSETS\", \"username\": \"sameermsc\", \"post_text\": \"Hi,\\n\\nHope this is what you are looking for\\n\\nrecset := RECORD\\n\\tstring f0;\\n\\tstring f1;\\n\\tstring f2;\\nEND;\\n\\n\\n\\nds := DATASET([{'1','1','1'},\\n\\t\\t{'1','0','1'},\\n\\t\\t{'1','',''},\\n\\t\\t{'1','2','2'},\\n\\t\\t{'2','1','1'},\\n\\t\\t{'2', '0', '1'}], recset);\\n\\noutrecset := {\\n\\tstring f0 := ds.f0;\\n\\tunsigned c1 := SUM(GROUP,if(ds.f1=ds.f2, 1, 0));\\n\\tunsigned c2 := SUM(GROUP,if(ds.f1='', 1, 0));\\n\\tunsigned c3 := SUM(GROUP,if(ds.f2='', 1, 0));\\n};\\n\\nds_t := table(ds, outrecset, f0);\\n\\nds_t;\\n\\t\\t
\\n\\nRegards,\\nSameer\", \"post_time\": \"2014-01-07 07:15:20\" },\n\t{ \"post_id\": 5103, \"topic_id\": 1164, \"forum_id\": 10, \"post_subject\": \"Howw to merge column of two different DATSETS\", \"username\": \"shank\", \"post_text\": \"I have 3 field in a dataset named f0,f1,f2.\\nI need perform the following checks for each f0 and create a report:\\n 1.\\tThe count of f0’s having f1=f2 (c1)\\n 2.\\tThe count of f0’s having f1=’’ (c2)\\n 3.\\tThe count of f0’s having f2=’’ (c3)\\n\\nI executed the following piece of code in ecl.\\n\\nrecset := RECORD\\nstring f0;\\nstring f1;\\nstring f2;\\nEND;\\n\\nds := DATASET([{'1','1','1'},{'1','0','1'},{'1','',''},{'1','2','2'},\\n {'2','1','1'},{'2','0','1'},{'2','0','1'}],recset);\\n\\nds1 := ds(f1=f2);\\nds2 := ds(f1='');\\nds3 := ds(f2='');\\n\\nrecset1 := RECORD\\nds.slno;\\nf := count(group);\\nEND;\\n\\ndsc1 := TABLE(ds1, recset1, ds1.f0);\\ndsc1;\\n\\ndsc2 := TABLE(ds2, recset1, ds2.f0);\\ndsc2;\\n\\ndsc3 := TABLE(ds3, recset1, ds3.f0);\\ndsc3;\\n
\\n\\n 1.\\tIS there a way to merge dsc1, dsc2 and dsc3 to achieve my goal?\\n 2.\\tIs there a totally different way to generate the report in one go instead of having to create 3 different TABLE’s like I’ve shown above?\\n\\n\\nThe report should look like\\n\\n-------------------\\nf0 | c1 c2 c3\\n-------------------\\n1 | 3 1 1\\n2 | 1 0 0\\n-------------------\", \"post_time\": \"2014-01-07 04:51:14\" },\n\t{ \"post_id\": 5115, \"topic_id\": 1165, \"forum_id\": 10, \"post_subject\": \"Re: Alternate Roxie Queries failing on community_4.2.2-rc2\", \"username\": \"sort\", \"post_text\": \"At the moment I am not sure about the multicast error. I will have to follow up on it. As far as roxie running every other query... that is because wsecl knows about both roxie nodes. When queries get submitted through wsecl it picks nodes to send the request. In a 2 node system where only 1 nodes is running, 1 node will handle the request the other will fail since the server (farmer) will not actually receive the request. It is true that if an agent is down, its buddy can handle the work.\", \"post_time\": \"2014-01-10 13:34:44\" },\n\t{ \"post_id\": 5112, \"topic_id\": 1165, \"forum_id\": 10, \"post_subject\": \"Re: Alternate Roxie Queries failing on community_4.2.2-rc2\", \"username\": \"srbhkmr\", \"post_text\": \"Yes, 'ThorMaster' is the name of the node which hosts esp services and roxie cluster is called 'roxie' itself.\\n\\n\\nI checked the roxie.log on 10.10.1.106 and indeed the roxie service is not running on that node. It "fails to join multicast channel 0".\\n\\nThe log records:\\n\\n00000001 2014-01-09 14:05:05.727 9774 9774 "Roxie starting, build = community_4.2.2-rc2"\\n00000002 2014-01-09 14:05:05.728 9774 9774 "RoxieMemMgr: Setting memory limit to 1073741824 bytes (1024 pages)"\\n00000003 2014-01-09 14:05:05.728 9774 9774 "RoxieMemMgr: 1024 Pages successfully allocated for the pool - memsize=1073741824 base=0x7f57e3900000 alignment=1048576 bitmapSize=32"\\n00000004 2014-01-09 14:05:05.728 9774 9774 "Current Hardware Info: CPUs=8, speed=3401 MHz, Mem=15936 MB , primDisk=0 GB, primFree=0 GB, secDisk=0 GB, secFree=0 GB, NIC=0"\\n00000005 2014-01-09 14:05:05.728 9774 9778 "Background copy thread 0xe293b0 starting"\\n00000006 2014-01-09 14:05:05.728 9774 9779 "HandleCloser thread 0xe293b0 starting"\\n00000007 2014-01-09 14:05:05.728 9774 9774 "Roxie: multicast socket created port=8887 sockbuffsize=131071 actual 262142"\\n00000008 2014-01-09 14:05:05.729 9774 9774 "Joined multicast channel 2 (239.1.1.3:8887)"\\n00000009 2014-01-09 14:05:05.729 9774 9774 "EXCEPTION: (1406): Failed to join multicast channel 0 (239.1.1.1:8887)"\\n0000000A 2014-01-09 14:05:05.730 9774 9779 "Handle closer thread 0xe293b0 exiting"\\n0000000B 2014-01-09 14:05:05.730 9774 9778 "Background copy thread 0xe293b0 exiting"\\n
\\n\\n\\nWhat might be causing such a problem?\\n\\n\\nBut even in this scenario I would like all my roxie queries to be answered even if one of the nodes is down due to some reason.\\n\\nThe documentation states that typically two slaves will receive "each request" over multicast channel. If any slave is not responding, requests on that channel are handled by the other peer slaves responsible for that channel.\\n\\nright now my cluster has two roxie nodes and apparently on one of the node(106) the roxie service fails to start. The other roxie node(101) answers only alternate queries. i.e. every other query fails. \\n\\n\\n\\nThanks,\", \"post_time\": \"2014-01-10 07:42:25\" },\n\t{ \"post_id\": 5107, \"topic_id\": 1165, \"forum_id\": 10, \"post_subject\": \"Re: Alternate Roxie Queries failing on community_4.2.2-rc2\", \"username\": \"sort\", \"post_text\": \"just to make sure... is your esp named ThorMaster? --server is the esp and not thor. also make sure roxie is the target name of the roxie cluster.\\n\\nCan you send the roxie log from 10.10.1.106? Communication error is usually due to roxie not being running either because of configuration, node(s) not running, or an error. It would be interesting to see if roxie was running at the time of the query\", \"post_time\": \"2014-01-07 21:22:27\" },\n\t{ \"post_id\": 5106, \"topic_id\": 1165, \"forum_id\": 10, \"post_subject\": \"Alternate Roxie Queries failing on community_4.2.2-rc2\", \"username\": \"srbhkmr\", \"post_text\": \"I tried publishing a simple query on Roxie clulster. Following is the ECL code:\\n\\nEXPORT SOAPEnabling() := FUNCTION\\n STRING20 name := '' : STORED('v1');\\n RETURN OUTPUT(IF(name = '', 'Hello UNKNOWN', 'Hello ' + name));\\nEND;\\n
\\n\\nQuery was published using following command:\\necl publish --target=roxie --server=ThorMaster SOAPEnabling.ecl
\\n\\n* On HPCC edition 'community_4.0.2-2', it works just fine.\\n\\n* On HPCC edition 'community_4.2.2-rc2', the same published query exhibits a weird behaviour. For every alternate request the response we get is:\\n\\nException \\nReported by: WsEcl\\nMessage: Roxie cluster communication error: roxie
\\n\\nThe relevant logs I guess is this:\\n\\n000000B2 2014-01-07 19:50:15.822 6893 7442 "submitQuery soap: <?xml version="1.0" encoding="UTF-8"?><soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/" xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"> <soap:Body><soapenablingRequest xmlns="urn:hpccsystems:ecl:soapenabling"><v1>Giraffe</v1></soapenablingRequest></soap:Body></soap:Envelope>"\\n000000B3 2014-01-07 19:50:18.823 6893 7442 "ERROR: Error connecting to 10.10.1.106:9876"\\n000000B4 2014-01-07 19:50:18.823 6893 7442 "-3: connection failed\\nTarget: T>10.10.1.106, Raised in: /var/lib/jenkins/workspace/CE-Candidate-with-plugins-4.2.2-rc2/CE/ubuntu-12.04-amd64/HPCC-Platform/system/jlib/jsocket.cpp, line 1243"\\n000000B5 2014-01-07 19:50:18.827 6893 7442 "Loading dll (libW20140107-194356.so) from location /mnt/HPCCSystems/myeclccserver/libW20140107-194356.so"\\n\\n
\\n\\n\\nAny help is appreciated.\\nThanks,\", \"post_time\": \"2014-01-07 14:51:28\" },\n\t{ \"post_id\": 5139, \"topic_id\": 1166, \"forum_id\": 10, \"post_subject\": \"Re: Function call within PROJECT-TRANSFORM segfaulting on th\", \"username\": \"srbhkmr\", \"post_text\": \"Posted a JIRA issue[10643] on the same.\\n\\nhttps://track.hpccsystems.com/browse/HPCC-10643\\n\\nThanks,\", \"post_time\": \"2014-01-17 08:57:36\" },\n\t{ \"post_id\": 5120, \"topic_id\": 1166, \"forum_id\": 10, \"post_subject\": \"Re: Function call within PROJECT-TRANSFORM segfaulting on th\", \"username\": \"bforeman\", \"post_text\": \"This looks like something that should be logged in the Community Issue Tracker\\n\\nhttps://track.hpccsystems.com/secure/Dashboard.jspa\\n\\nThank you!\\n\\nBob\", \"post_time\": \"2014-01-13 13:50:57\" },\n\t{ \"post_id\": 5113, \"topic_id\": 1166, \"forum_id\": 10, \"post_subject\": \"Function call within PROJECT-TRANSFORM segfaulting on thor\", \"username\": \"srbhkmr\", \"post_text\": \"Following code runs just fine on hthor but it fails in distributed mode(thor).\\nThe error says - System error: 4: MP link closed
\\n\\nIt's a simple code that processes a huge dataset repeatedly with different parameter and collects the results in a 'result' dataset. \\nThe code repeatedly calls a function(which process a huge dataset) from a PROJECT-TRANSFORM and assigns the result.\\n\\nIMPORT ML;\\n\\n//////////////////////////////////////////////////////////////////\\n//////////////////////////////////////////////////////////////////\\n //Generate D:\\n rec := RECORD\\n UNSIGNED4 x;\\n END;\\n\\n D := DATASET(100, TRANSFORM(rec, SELF.x := RANDOM() % 100;));\\n D;\\n ML.AppendId(D,id, D_id);\\n\\n inMatrix := D_id;\\n lMatrix:= RECORDOF(inMatrix);\\n\\n ML.ToField(inMatrix,dDataset);\\n\\n //////////////////////////////////////////////////////////////////\\n////////////////////////////////////////////////////////////////////\\n\\nREAL4 fillin(UNSIGNED4 K) := FUNCTION\\n //Generate centroids:\\n centroids := DATASET(K, TRANSFORM(lMatrix, SELF.id := COUNTER, SELF.x := RANDOM() % 100; ));\\n\\n ML.ToField(centroids,dCentroids);\\n KMeans:=ML.Cluster.KMeans(dDataset, dCentroids, 30, 0.1);\\n \\n R := KMeans.Result(KMeans.Convergence);\\n ML.FromField(R, lMatrix, Res);\\n\\n //////////////////////////////\\n\\n distanceMatrix_layout := RECORD\\n UNSIGNED4 id1;\\n UNSIGNED4 id2;\\n REAL4 distance;\\n END;\\n\\n distanceMatrix_layout doJoin(lMatrix L, lMatrix R) := TRANSFORM\\n SELF.id1 := L.id;\\n SELF.id2 := R.id;\\n SELF.distance := ABS((L.x - R.x));\\n END;\\n\\n J := JOIN(Res, Res, LEFT.id < RIGHT.id, doJOIN(LEFT, RIGHT), ALL);\\n avg_interClusterDistance := AVE(J, distance);\\n Nr := avg_interClusterDistance;\\n\\n ////////////////////////////\\n //Dr:\\n A := GROUP(KMeans.Allegiances(), y, ALL);\\n meanCentroidDist_layout := RECORD\\n A.y;\\n REAL4 mean_distance := AVE(GROUP, A.value);\\n END;\\n intraClusterAverageDistance := TABLE(A, meanCentroidDist_layout);\\n Dr := MIN(intraClusterAverageDistance, mean_distance);\\n\\n RETURN Nr / Dr;\\nEND;\\n\\n/////////////////////////////////////////////////////////////////\\n/////////////////////////////////////////////////////////////////\\ndunnChart := RECORD\\n UNSIGNED4 K;\\n Real4 DunnsIndex := 0.0;\\nEND;\\n\\nRi := DATASET(20, TRANSFORM(dunnChart, SELF.K := COUNTER; ));\\nRi;\\n\\ndunnChart populateDunnsIndices(dunnChart Reco) := TRANSFORM \\n SELF.K := Reco.K;\\n SELF.DunnsIndex := fillin(Reco.K);\\nEND;\\n\\nR := Project(Ri, populateDunnsIndices(Left));\\nOUTPUT(R, NAMED('DunnsChart'));
\\n\\n\\nAttached log file records a segfault.\\nWe are using HPCC version 'community_4.2.2-rc2'\\n\\nThanks,\", \"post_time\": \"2014-01-10 09:53:05\" },\n\t{ \"post_id\": 5118, \"topic_id\": 1167, \"forum_id\": 10, \"post_subject\": \"Re: Only simple wildcard matching for STD.file.LogicalFileLi\", \"username\": \"bforeman\", \"post_text\": \"Cool, no problem!\\n\\nCheers,\\n\\nBob\", \"post_time\": \"2014-01-10 16:33:05\" },\n\t{ \"post_id\": 5117, \"topic_id\": 1167, \"forum_id\": 10, \"post_subject\": \"Re: Only simple wildcard matching for STD.file.LogicalFileLi\", \"username\": \"Allan\", \"post_text\": \"Thanks Bob,\\n\\nI was going to raise as a JIRA but wanted to touch base with community 1st.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2014-01-10 16:31:56\" },\n\t{ \"post_id\": 5116, \"topic_id\": 1167, \"forum_id\": 10, \"post_subject\": \"Re: Only simple wildcard matching for STD.file.LogicalFileLi\", \"username\": \"bforeman\", \"post_text\": \"Good morning Allan,\\n\\nThis would probably be better logged as an issue for a feature request, if you haven't already done so.\\n\\nBest regards,\\n\\nBob\", \"post_time\": \"2014-01-10 14:34:36\" },\n\t{ \"post_id\": 5114, \"topic_id\": 1167, \"forum_id\": 10, \"post_subject\": \"Only simple wildcard matching for STD.file.LogicalFileList\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nIt would be very nice if the full regex capability of ECL was available to library functions where wildcarding is pertinent.\\n\\nI seem to have to do the following:\\n\\n lfl := STD.File.LogicalFileList('scrub::c*::i*_b*_eukpd*_w*_stats')(REGEXFIND('^.*_eukpd..[^Tt][0-9]{6}.*$',name));\\n
\\nrather than pass the REGEXFIND pattern directly to LogicalFileList.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2014-01-10 12:16:03\" },\n\t{ \"post_id\": 5127, \"topic_id\": 1168, \"forum_id\": 10, \"post_subject\": \"Re: Roxie gives error when using WSDL\", \"username\": \"snawaz\", \"post_text\": \"[quote="gsmith":2lhl7hkq]Depending on the URL you pass to the browser you _might_ be getting different info back. For one of my queries if open this URL:\\nhttp://X.X.X.X:8002/WsEcl/definitions/q ... dl?display\\nI get the full WSDL. \\n\\nHowever if you request this URL:\\nhttp://X.X.X.X:8002/WsEcl/definitions/q ... sd?display\\nYou only get the schema.\\n\\nBut I use the same url everytime. I use url of this form\\n\\nhttp://X.X.X.X:8002/WsEcl/definitions/query/roxie/myquery/main/myquery.wsdl
\\n\\nstill I get schema occasionally. I believe there is something wrong which generates the wsdl definition, possibly undefined-behavior due to memory corruption or so.\", \"post_time\": \"2014-01-15 06:47:37\" },\n\t{ \"post_id\": 5121, \"topic_id\": 1168, \"forum_id\": 10, \"post_subject\": \"Re: Roxie gives error when using WSDL\", \"username\": \"gsmith\", \"post_text\": \"Depending on the URL you pass to the browser you _might_ be getting different info back. For one of my queries if open this URL:\\nhttp://X.X.X.X:8002/WsEcl/definitions/q ... dl?display\\nI get the full WSDL. \\n\\nHowever if you request this URL:\\nhttp://X.X.X.X:8002/WsEcl/definitions/q ... sd?display\\nYou only get the schema.\", \"post_time\": \"2014-01-13 15:49:11\" },\n\t{ \"post_id\": 5119, \"topic_id\": 1168, \"forum_id\": 10, \"post_subject\": \"Roxie gives error when using WSDL\", \"username\": \"snawaz\", \"post_text\": \"I've written few hello-world kind of queries in Roxie (version community_4.0.2-2). It works great from ESP. But when I call it programmatically using WSDL definitions of the queries, it sometime doesn't work. So far what I've figured out myself is that Roxie doesn't return the complete WSDL definition everytime. \\n\\nWhen it works, WSDl url returns this (just look at the beginning):\\n\\n<definitions xmlns="http://schemas.xmlsoap.org/wsdl/" xmlns:soap="http://schemas.xmlsoap.org/wsdl/soap/" xmlns:http="http://schemas.xmlsoap.org/wsdl/http/" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:mime="http://schemas.xmlsoap.org/wsdl/mime/" xmlns:tns="urn:hpccsystems:ecl:getcsv" targetNamespace="urn:hpccsystems:ecl:getcsv">\\n<types>\\n<xsd:schema xmlns:ds1="urn:hpccsystems:ecl:getcsv:result:result_1" elementFormDefault="qualified" targetNamespace="urn:hpccsystems:ecl:getcsv">\\n<xsd:import namespace="urn:hpccsystems:ecl:getcsv:result:result_1" schemaLocation="../result/Result_1.xsd"/>\\n<xsd:complexType name="EspException">\\n<xsd:all>\\n<xsd:element name="Code" type="xsd:string" minOccurs="0"/>\\n<xsd:element name="Audience" type="xsd:string" minOccurs="0"/>\\n<xsd:element name="Source" type="xsd:string" minOccurs="0"/>\\n<xsd:element name="Message" type="xsd:string" minOccurs="0"/>\\n</xsd:all>\\n</xsd:complexType>\\n....
\\n\\nWhen it doesn't work, it returns this (look at the beginining):\\n\\n<xsd:schema xmlns:ds1="urn:hpccsystems:ecl:getcsv:result:result_1" elementFormDefault="qualified" targetNamespace="urn:hpccsystems:ecl:getcsv">\\n<xsd:import namespace="urn:hpccsystems:ecl:getcsv:result:result_1" schemaLocation="../result/Result_1.xsd"/>\\n<xsd:complexType name="EspException">\\n<xsd:all>\\n<xsd:element name="Code" type="xsd:string" minOccurs="0"/>\\n<xsd:element name="Audience" type="xsd:string" minOccurs="0"/>\\n<xsd:element name="Source" type="xsd:string" minOccurs="0"/>\\n<xsd:element name="Message" type="xsd:string" minOccurs="0"/>\\n</xsd:all>\\n</xsd:complexType>\\n....
\\n\\nIn other words, when it doesn't work, the `<definitions>` tag is missing from the WSDL, only `<xsd:schema>` is there.\\n\\nIs this a known bug? Or I'm doing something incorrectly?\\n\\nI'm using Python and SOAPpy module.\\n\\nThen I wrote a small script to only see what WSDL-url returns (and I'm NOT doing anything else):\\n\\nimport requests\\nimport time\\n\\nurl = 'http://37.139.15.183:8002/WsEcl/definitions/query/roxie/Greet/main/Greet.wsdl'\\n\\nfor i in range(1000):\\n r = requests.get(url)\\n if r.ok:\\n if "definitions" in r.content:\\n print("OK")\\n else:\\n print (r.content)\\n else:\\n print(r)\\n time.sleep(1)\\n
\\n\\nInterestingly, it prints `OK` few times, then an html code consisting of the following lines:\\n\\n<body onload="nof5();onLoad()" class="yui-skin-sam">\\n<h3>Exception(s) occurred:</h3>\\n<h4>Reporter: WsEcl</h4>\\n<table border="0">\\n<tbody>\\n<tr>\\n<th>Code</th><th align="left">Message</th>\\n</tr>\\n<tr>\\n<td>-1</td><td align="left">StringBuffer::_realloc: Request for -1 bytes oldMax = 0</td>\\n</tr>\\n</tbody>\\n</table>\\n<br>\\n<input id="backBtn" type="button" value="Go Back" onclick="history.go(-index)" style="display:none">\\n</body>\\n
\\n\\nCould anybody explain the behaviour?\", \"post_time\": \"2014-01-13 08:57:03\" },\n\t{ \"post_id\": 5125, \"topic_id\": 1169, \"forum_id\": 10, \"post_subject\": \"Re: Trying to use notify in roxie query to call waiting job.\", \"username\": \"rtaylor\", \"post_text\": \"This is now JIRA issue HPCC-10629\\n\\nYou can track its progress through JIRA: \\nhttps://track.hpccsystems.com/browse/HPCC-10629\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-01-14 20:31:11\" },\n\t{ \"post_id\": 5124, \"topic_id\": 1169, \"forum_id\": 10, \"post_subject\": \"Re: Trying to use notify in roxie query to call waiting job.\", \"username\": \"jgostylo\", \"post_text\": \"Yes, we actually tried running the shortquery with thor and saw that it did work after I made that post. One other thing we were thinking of doing was making it a two part process. Part 1 was query through Roxie to get the information that we needed and part 2 was to push the event through SOAP. We are accessing everything through a server application so we can use that application to handle data transfer between the query and the long running job.\\n\\nIf you feel that this should work with a Roxie query firing a Notify then I still hope your submitted ticket is addressed and fixed.\\n\\nThanks!\", \"post_time\": \"2014-01-14 20:06:04\" },\n\t{ \"post_id\": 5123, \"topic_id\": 1169, \"forum_id\": 10, \"post_subject\": \"Re: Trying to use notify in roxie query to call waiting job.\", \"username\": \"rtaylor\", \"post_text\": \"I took your code (modified a little) and ran it on my training cluster, which is on the 4.2.0-2 build. Here's my version of your scheduled "long" job:IMPORT Std;\\n\\n#WORKUNIT('name','AsyncTest');\\n\\nRunLongEvent(STRING myId) := SEQUENTIAL(Std.system.Debug.Sleep(300000),\\n OUTPUT ('I am done with instance: ' + myId));\\n\\nRunLongEvent(EVENTEXTRA('passedId')) : WHEN('RunMyLongEvent');
I ran this on my Thor cluster, which put it into the "wait" state.\\n\\nI then created this "roxie-style" query, based on your second piece of code:EXPORT PushEventQuery() := FUNCTION\\n\\n STRING this_id := '' : STORED('Event_ID');\\n\\n runMyLongEvent := EVENT('RunMyLongEvent', '<Event><passedId>'+this_id+'</passedId></Event>');\\n\\n RETURN SEQUENTIAL(NOTIFY(runMyLongEvent),OUTPUT(this_id,NAMED('RunId')));\\nEND;
I first selected "hThor" as the target for this, hit "compile" under the Submit button, then in the ECL Watch page clicked on "Publish" to deploy the query.\\n\\nI then went to the WSecl ESP service (port 8002) and selected my PushEventQuery under hThor, entered an Event ID value and hit Submit. The hThor job ran correctly, and so did the scheduled job.\\n\\nI then re-compiled and published for Roxie and attempted to run it the same way. However, the Roxie query failed with the same UNIMPLEMENTED error message that you got.\\n\\nI have submitted a JIRA issue for this.\\n\\nYour workaround is to publish the query to hThor until the Roxie issue is resolved.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-01-14 19:20:41\" },\n\t{ \"post_id\": 5122, \"topic_id\": 1169, \"forum_id\": 10, \"post_subject\": \"Trying to use notify in roxie query to call waiting job.\", \"username\": \"jgostylo\", \"post_text\": \"I have a use case where I want to run a job that waits to listen to an event. That event will be triggered by a roxie query. The point is that the full job will take a long time to run but we want to return a response to the user right away and let that long job run asynchronously.\\n\\nTo test this I set up the following code for the long running job:\\n\\nIMPORT * FROM Std;\\n\\n#WORKUNIT('name','AsyncTest');\\n\\nRunLongEvent(STRING myId) := FUNCTION\\n\\tactions := SEQUENTIAL (\\n\\t\\tStd.system.Debug.Sleep(300000)\\n\\t);\\n\\tRETURN actions;\\nEND;\\n\\nRunLongEvent(EVENTEXTRA('passedId')) : WHEN('RunMyLongEvent');\\nOUTPUT ('I am done.');\\n
\\n\\nContrary to all the examples in the help documentation I discovered that the function triggered by the WHEN may not execute any actions (compile error) which is why I return a sequential. All the examples fail to compile because the triggered function contains actions (NOTIFY). This is the error that results:\\nError: WHEN must be used to associated an action with a definition\\n\\nThe query I am trying to use to trigger the WHEN is this:\\n\\n#WORKUNIT('name','ShortQuery');\\n\\nthis_id := '9re32Fh21';\\n\\nrunMyLongEvent := EVENT('RunMyLongEvent', '<Event><passedId>'+this_id+'</passedId></Event>');\\n\\nNOTIFY(runMyLongEvent);\\n\\nOUTPUT(this_id,ALL,NAMED('RunId'));\\n
\\n\\nEverything compiles fine but when I try to run the query I am getting this error:\\nMessage: UNIMPLEMENTED at /home/hpccuser/Projects/HPCC-Platform/roxie/ccd/ccdcontext.cpp:2888\\nThe server is running community_4.1.0.\\n\\nI can use ECLWatch to run the long running job with the Push Event tool. How do I duplicate that behavior with a Roxie query? My code is not working.\", \"post_time\": \"2014-01-14 16:41:13\" },\n\t{ \"post_id\": 5153, \"topic_id\": 1174, \"forum_id\": 10, \"post_subject\": \"Re: 'Global child graph?' Exception\", \"username\": \"jsmith\", \"post_text\": \"I've opened a JIRA issue : https://track.hpccsystems.com/browse/HPCC-10693\\n\\nIt's specific to CSV with a HEADING. If it was a FLAT or XML file or a CSV without HEADING it would have been ok (or outside of a child query).\", \"post_time\": \"2014-01-24 18:49:53\" },\n\t{ \"post_id\": 5135, \"topic_id\": 1174, \"forum_id\": 10, \"post_subject\": \"Re: 'Global child graph?' Exception\", \"username\": \"srbhkmr\", \"post_text\": \"Interestingly enough if I spill the file D on disk again, by doing:\\n\\nD_ := DATASET('~.::tiledPattern', rec, CSV(HEADING(1)));\\nD := CHOOSEN(D_, COUNT(D_));
\\n\\nthe code runs just fine! without complaining about any 'Global Child Graph'.\", \"post_time\": \"2014-01-16 11:46:23\" },\n\t{ \"post_id\": 5133, \"topic_id\": 1174, \"forum_id\": 10, \"post_subject\": \"'Global child graph?' Exception\", \"username\": \"srbhkmr\", \"post_text\": \"I have following code:\\n\\n\\nrec := RECORD\\n real4 x;\\n real4 y;\\n real4 z;\\nEND;\\n\\nrec2 := RECORD\\n STRING w;\\nEND;\\n\\n//D as a randomly generated dataset\\n//D := DATASET(50000, TRANSFORM(rec, SELF.x := RANDOM() % 100; SELF.y := RANDOM() % 100; SELF.z := RANDOM() % 100;));\\n\\n//D as a disk file\\nD := DATASET('~.::tiledPattern', rec, CSV(HEADING(1)));\\n\\nR := DATASET(50000, TRANSFORM(rec2, SELF.w := (STRING)(RANDOM() % 100);));\\n\\n//Some Helper function\\nIMPORT Python;\\nINTEGER IsPresent(UNSIGNED4 colid, STRING pat) := EMBED(Python)\\n list = str.split(pat, ' ');\\n for item in list:\\n if int(item) / 100000 == colid:\\n return int(item) % 100000;\\n return 0;\\nENDEMBED;\\n\\n//Some processing\\nUNSIGNED4 getPointsCount(STRING pat) := FUNCTION\\n D1 := IF(IsPresent(1, pat) > 0, D(D.x = IsPresent(1, pat)), D );\\n D2 := IF(IsPresent(2, pat) > 0, D1(D1.y = IsPresent(2, pat)), D1 );\\n D3 := IF(IsPresent(3, pat) > 0, D2(D2.z = IsPresent(3, pat)), D2 );\\n RETURN COUNT(D3);\\nEND;\\n\\n\\n\\n{RECORDOF(R) OR {UNSIGNED4 total_points}} fillIn(RECORDOF(R) Reco) := TRANSFORM\\n SELF.total_points := getPointsCount(Reco.w);\\n SELF := Reco;\\nEND;\\n\\nresult := PROJECT(R, fillIn(LEFT));\\nCHOOSEN(result, 100);\\n
\\n\\nWith D as a randomly generated dataset:\\nCode works fine on thor and hthor and gives results as expected.\\n\\nWith D as a disk file:\\nCode works fine on hthor, but in distributed mode(thor) it says -\\nSystem error: 0: Graph[1], project[4]: SLAVE 10.10.1.106:20100: Global child graph? : Global acts = Graph(6): [csvread(7)]
\\n\\n\\nCould someone please explain the error message. What is it that it's complaining about? \\nIs it that I'm trying to do something that is conceptually wrong here? like- trying to access another global file while processing a given file. (Dataset R is also going to be a disk file finally.)\\n\\nalso, why the two D's are being treated differently here?\\n\\n\\nI'm using community edition 4.2.2-rc2.\\nThanks,\", \"post_time\": \"2014-01-16 04:35:13\" },\n\t{ \"post_id\": 5144, \"topic_id\": 1175, \"forum_id\": 10, \"post_subject\": \"Re: Option to preserve 'compressed' state of logicalfile on \", \"username\": \"Allan\", \"post_text\": \"Thanks\\n\\nJake\", \"post_time\": \"2014-01-20 18:54:39\" },\n\t{ \"post_id\": 5141, \"topic_id\": 1175, \"forum_id\": 10, \"post_subject\": \"Re: Option to preserve 'compressed' state of logicalfile on \", \"username\": \"jsmith\", \"post_text\": \"In fact, there is a compress option to File.Copy but it seems it's undocumented.\\n\\nIn fact there's 3 tailing parameters that are available:\\n\\nboolean compress (optional, default false)\\nboolean forcePush (optional, default false)\\ninteger4 transferBufferSize (optional)\\n\\nI'll bring to documentations attention.\", \"post_time\": \"2014-01-20 15:57:46\" },\n\t{ \"post_id\": 5140, \"topic_id\": 1175, \"forum_id\": 10, \"post_subject\": \"Option to preserve 'compressed' state of logicalfile on Copy\", \"username\": \"Allan\", \"post_text\": \"Hi,\\nI’ve noticed my STD.File.Copy’s are not preserving the compressed state of a logical file.\\n\\nIMPORT * FROM STD.File;\\nCopy('~thor::base::deltauk::20140120::delta_key.txt','thordev10_2','~thor::base::deltauk::20140120::delta_key.new');\\n
\\n \\nSee attachment for the resultant listing. (sorry cannot attach an image - just believe me)\\n\\nCuriously the Documentation indicates that there is a ‘compress’ flag form ‘RemotePull’ but not for ‘Copy’. These is also existing questions on this subject in the forum, but they have not enlightened me.\\n\\nCan one copy a compressed logical file with the target remaining compressed?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2014-01-20 15:22:01\" },\n\t{ \"post_id\": 5149, \"topic_id\": 1178, \"forum_id\": 10, \"post_subject\": \"Re: Output\", \"username\": \"kumar2k14\", \"post_text\": \"THanks Sameer\", \"post_time\": \"2014-01-23 14:58:42\" },\n\t{ \"post_id\": 5148, \"topic_id\": 1178, \"forum_id\": 10, \"post_subject\": \"Re: Output\", \"username\": \"sameermsc\", \"post_text\": \"There is no direct mechanism to do this, you can try writing another program (Java or any other) to grab the ECL output and write desired data to Excel\\n\\nRegards,\\nSameer\", \"post_time\": \"2014-01-23 08:25:40\" },\n\t{ \"post_id\": 5147, \"topic_id\": 1178, \"forum_id\": 10, \"post_subject\": \"Output\", \"username\": \"kumar2k14\", \"post_text\": \"I am new to ECL and wanted to see if it is possible in ECL to write the output of the data counts of tables to excel on each tab.\\n\\nEx:tableA data to Tab1\\n tableB data to Tab2\\n tableB data to Tab3\\n\\nThanks for your help...\", \"post_time\": \"2014-01-22 22:33:29\" },\n\t{ \"post_id\": 5158, \"topic_id\": 1183, \"forum_id\": 10, \"post_subject\": \"Re: Using record definitions that take parameters as a param\", \"username\": \"Allan\", \"post_text\": \"Hi Richard,\\n\\nActually fixed by nesting MODULE's where the inner MODULE takes parameters.\\n\\nThe situation I had was one function was returning a dataset defined within itself.\\n I had a section Function 'Despray' that gives the option to despray said dataset.\\nThe parameter to the 'Despray' function being the dataset returned from 1st function.\\nIts just that the RECORD definition for the dataset takes a parameter.\\n\\nI count not see an example in the ref manual, nor was the case explicitly disallowed in same.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2014-01-28 10:56:48\" },\n\t{ \"post_id\": 5157, \"topic_id\": 1183, \"forum_id\": 10, \"post_subject\": \"Re: Using record definitions that take parameters as a param\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nWhat exactly are you trying to accomplish? \\n\\nHave you tried using a FUNCTIONMACRO instead of a FUNCTION?\\n\\nRichard\", \"post_time\": \"2014-01-27 21:11:42\" },\n\t{ \"post_id\": 5156, \"topic_id\": 1183, \"forum_id\": 10, \"post_subject\": \"Using record definitions that take parameters as a parameter\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI'm stumbling on what should be a very simple issue.\\n\\nI have a RECORD definition that takes a parameter, e.g.\\n\\nSHARED RVolumns(DATASET(Transaction_log) d) := RECORD\\n\\tSTRING30 user_added := d.user_added;\\n\\tSTRING10 date_added := d.date_added[1..10];\\n\\tUNSIGNED cnt := COUNT(GROUP);\\nEND;\\n
\\n\\nI then want to use that record definition in a paramter list to a function, e.g.\\n\\nEXPORT DesprayUserVolumnsByDay(DATASET(RVolumns(???)) inp) := FUNCTION\\n
\\n\\nWhatever I try DATASET(... or RECORDOF, fails the syntax check.\\nI've read appropriate sections in the ECL ref manual but am not much wiser.\\n\\nAny ideas?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2014-01-27 14:19:44\" },\n\t{ \"post_id\": 5184, \"topic_id\": 1190, \"forum_id\": 10, \"post_subject\": \"Re: Count of distinct within a group\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"Thanks Richard!\", \"post_time\": \"2014-02-07 11:47:25\" },\n\t{ \"post_id\": 5180, \"topic_id\": 1190, \"forum_id\": 10, \"post_subject\": \"Re: Count of distinct within a group\", \"username\": \"rtaylor\", \"post_text\": \"This code gets you there:rec := RECORD\\n STRING10 F1;\\n UNSIGNED1 F2;\\nEND;\\n\\nds := DATASET([{'test ',1},\\n {'test ',1},\\n {'test ',2},\\n {'test1 ',1},\\n {'test ',2},\\n {'test1 ',1},\\n {'test ',2},\\n {'test1 ',1},\\n {'test ',2},\\n {'test1 ',1},\\n {'test1 ',3},\\n {'test1 ',4}], rec);\\n\\nt1 := TABLE(ds,{F1,F2},F1,F2);\\n\\nt2 := TABLE(t1,{F1,COUNT(GROUP)},F1);\\n\\nt2;
It is possible that this method might execute faster, since it eliminates the need for the SORT that your DEDUP would require. You should test both methods against a large amount of data to determine which will give better performance.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-02-06 15:09:21\" },\n\t{ \"post_id\": 5179, \"topic_id\": 1190, \"forum_id\": 10, \"post_subject\": \"Re: Count of distinct within a group\", \"username\": \"Tony Kirk\", \"post_text\": \"How about TABLE() documentation, specifically the "CrossTab Report" section?\\n\\nPost again if more questions or details needed.\", \"post_time\": \"2014-02-06 12:38:43\" },\n\t{ \"post_id\": 5178, \"topic_id\": 1190, \"forum_id\": 10, \"post_subject\": \"Count of distinct within a group\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"I have a dataset like this:\\n\\nA\\t B\\n----------\\ntest\\t1\\ntest\\t1\\ntest\\t2\\ntest1\\t1\\ntest1\\t3\\ntest1\\t4
\\n\\nAs output, I want each value from A to be displayed along with how many distinct Bs are mapped to it. \\ni.e. the output should look like this:\\n\\nA num_B\\n--------\\ntest 2\\ntest1 3
\\n\\nCurrently, I am DEDUPing on (A,B) followed by a GROUP by A and a COUNT(GROUP). Is there a better way to do this?\\n\\nThanks,\\nGayathri\", \"post_time\": \"2014-02-06 11:42:27\" },\n\t{ \"post_id\": 5292, \"topic_id\": 1197, \"forum_id\": 10, \"post_subject\": \"Re: "Filtered RIGHT prevents a keyed join .." warning\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"Got it! Thanks.\", \"post_time\": \"2014-02-26 13:11:31\" },\n\t{ \"post_id\": 5290, \"topic_id\": 1197, \"forum_id\": 10, \"post_subject\": \"Re: "Filtered RIGHT prevents a keyed join .." warning\", \"username\": \"rtaylor\", \"post_text\": \"Gayathri,\\n\\nAssuming that both Field1 and Field2 are components of the rightIndex, then this should help:\\noutVar:= JOIN(leftDataSet, \\n rightIndex,\\n KEYED(LEFT.field1 = RIGHT.field1 AND LEFT.field2 = RIGHT.field2),\\n TRANSFORM(RECORDOF(LEFT),\\n SELF.field3 := RIGHT.field3,\\n SELF := LEFT),\\n FULL OUTER);
Because a JOIN condition is implicitly a filter on both the left and right datasets, you should use KEYED to indicate that the filter should use right dataset as an INDEX and not as a dataset. That is what the "Filtered RIGHT prevents a keyed join" warning is trying to tell you.\\n\\nIf only Field1 is a component of the rightIndex, then this is the way to do it:\\noutVar:= JOIN(leftDataSet, \\n rightIndex,\\n KEYED(LEFT.field1 = RIGHT.field1) AND LEFT.field2 = RIGHT.field2,\\n TRANSFORM(RECORDOF(LEFT),\\n SELF.field3 := RIGHT.field3,\\n SELF := LEFT),\\n FULL OUTER);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-02-26 10:42:42\" },\n\t{ \"post_id\": 5286, \"topic_id\": 1197, \"forum_id\": 10, \"post_subject\": \"Re: "Filtered RIGHT prevents a keyed join .." warning\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"Apologize for the delayed response! I am travelling and don't have access to code base.\\n\\nHere is the join statement that produces the warning:\\n\\noutVar:= join(leftDataSet, \\n\\t rightIndex,\\n\\t left.field1 = right.field1\\n and left.field2 = right.field2,\\n transform(recordof(left),\\n self.field3 = right.field3;\\n self := left;),\\n full outer);\\n
\", \"post_time\": \"2014-02-25 18:48:08\" },\n\t{ \"post_id\": 5251, \"topic_id\": 1197, \"forum_id\": 10, \"post_subject\": \"Re: "Filtered RIGHT prevents a keyed join .." warning\", \"username\": \"rtaylor\", \"post_text\": \"Gayathri,\\n\\nCan I see an example of the code that generates the warning?\\n\\nRichard\", \"post_time\": \"2014-02-20 14:47:01\" },\n\t{ \"post_id\": 5209, \"topic_id\": 1197, \"forum_id\": 10, \"post_subject\": \"Re: "Filtered RIGHT prevents a keyed join .." warning\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"Yes, Richard. It is an index.\", \"post_time\": \"2014-02-14 09:45:15\" },\n\t{ \"post_id\": 5200, \"topic_id\": 1197, \"forum_id\": 10, \"post_subject\": \"Re: "Filtered RIGHT prevents a keyed join .." warning\", \"username\": \"rtaylor\", \"post_text\": \"Gayathri,\\n\\nIs that RIGHT recordset an INDEX?\\n\\nRichard\", \"post_time\": \"2014-02-12 15:27:16\" },\n\t{ \"post_id\": 5197, \"topic_id\": 1197, \"forum_id\": 10, \"post_subject\": \""Filtered RIGHT prevents a keyed join .." warning\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"While executing one of my ECL programs, I get the warning \\nFiltered RIGHT prevents a keyed join being used. Consider including the filter in the join condition.\\n\\nHow can I get rid of this? \\n\\nOne of the datasets used in the join is an already filtered recordset. I tried re-writing it by providing the filter expression in the join condition but that didn't stop the warning from coming up again.\\n\\nThanks,\\nGayathri\", \"post_time\": \"2014-02-12 10:34:25\" },\n\t{ \"post_id\": 5294, \"topic_id\": 1200, \"forum_id\": 10, \"post_subject\": \"Re: JOIN - ATMOST vs LIMIT vs KEEP\", \"username\": \"Rahul Jain\", \"post_text\": \"Thanks, Example was very helpful. Yes I want no change in the the output I get as of date today. Hence I have to use J1 which has keep,limit,as other 2 changes the output. \\nBut yes for safer execution I can add skip to limit - KEEP(2), LIMIT(0,SKIP).\", \"post_time\": \"2014-02-26 15:39:31\" },\n\t{ \"post_id\": 5277, \"topic_id\": 1200, \"forum_id\": 10, \"post_subject\": \"Re: JOIN - ATMOST vs LIMIT vs KEEP\", \"username\": \"rtaylor\", \"post_text\": \"Rahul,\\n\\nMy point was, if you want the same result you're currently getting with the KEEP(3000), LIMIT(0), then neither one of the other options does that.\\n\\nHere's a piece of example code that demonstrates the differences between the three:PtblRec := RECORD\\n INTEGER8 sequence;\\n STRING2 State;\\n STRING20 City;\\n STRING25 Lname;\\n STRING15 Fname;\\nEND;\\n\\nTemp := DATASET([{3000,'FL','BOCA RATON','LONDON','BRIDGE'},\\n\\t\\t\\t\\t {35,'FL','BOCA RATON','SMITH','FRANK'},\\n\\t\\t\\t\\t {50,'FL','BOCA RATON','SMITH','SUE'},\\n\\t\\t\\t\\t {135,'FL','BOCA RATON','SMITH','NANCY'},\\n\\t\\t\\t\\t {235,'FL','BOCA RATON','SMITH','FRED'},\\n\\t\\t\\t\\t {335,'FL','BOCA RATON','TAYLOR','FRANK'},\\n\\t\\t\\t\\t {3500,'FL','BOCA RATON','JONES','FRANK'},\\n\\t\\t\\t\\t {30,'FL','BOCA RATON','TAYLOR','RICHARD'}], PtblRec);\\n\\nProj := sort(temp,sequence);\\n\\nDataFile := '~RTTEMP::TestKeyedJoin';\\nKeyFile := '~RTTEMP::lname.fnameKeyPay';\\nPtbl := DATASET(DataFile,PtblRec,FLAT);\\nPtblOut := OUTPUT(Proj,,DataFile,OVERWRITE);\\n\\nAlphaPay := INDEX(Ptbl,{lname,fname},{Ptbl},KeyFile);\\nBld := BUILD(AlphaPay,OVERWRITE);\\n\\npeopleRecord := RECORD\\n INTEGER8 id;\\n STRING20 lastname;\\nEND;\\n\\npeopleDataset := DATASET([{3000,'LONDON'},\\n\\t\\t\\t\\t\\t\\t {3500,'SMITH'},\\n\\t\\t\\t\\t\\t\\t {30,'TAYLOR'}], peopleRecord);\\n\\njoinedRecord := RECORD\\n PtblRec;\\n peopleRecord AND NOT [id];\\nEND;\\n\\nJ1 := JOIN(peopleDataset, AlphaPay, \\n KEYED(LEFT.lastname=RIGHT.Lname),\\n TRANSFORM(joinedRecord,SELF := LEFT,SELF := RIGHT),KEEP(2),KEEP(2),LIMIT(0));\\nJ2 := JOIN(peopleDataset, AlphaPay, \\n KEYED(LEFT.lastname=RIGHT.Lname),\\n TRANSFORM(joinedRecord,SELF := LEFT,SELF := RIGHT),LIMIT(2,SKIP));\\nJ3 := JOIN(peopleDataset, AlphaPay, \\n KEYED(LEFT.lastname=RIGHT.Lname),\\n TRANSFORM(joinedRecord,SELF := LEFT,SELF := RIGHT),ATMOST(2));\\n\\nSEQUENTIAL(PtblOut,Bld,\\n PARALLEL(OUTPUT(J1,NAMED('KEEP2_LIMIT0')),\\n OUTPUT(J2,NAMED('LIMIT2_SKIP')),\\n OUTPUT(J3,NAMED('ATMOST2'))));
When you run this code, you will find that the first result does NOT match the second and third. Therefore, if what you want is the first result, only the KEEP(3000),LIMIT(0) will accomplish that. \\n\\nHowever, if the second and third results meet your needs, then which one you choose to use is entirely dependent on your performance tests as to which goes faster with your data on your hardware.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-02-24 11:17:52\" },\n\t{ \"post_id\": 5275, \"topic_id\": 1200, \"forum_id\": 10, \"post_subject\": \"Re: JOIN - ATMOST vs LIMIT vs KEEP\", \"username\": \"Rahul Jain\", \"post_text\": \"Thanks Richard,\\n\\nThis is what I understand. I should go for either of 2 options to reduce scans :\\na) Use LIMIT(3000,SKIP)\\nb) Use ATMOST(3000)\\n\\nand I believe a) LIMIT(3000,SKIP) is better option than ATMOST(3000)\\nSo I will any way get at least 436501 scans. Is it not possible to have scans lesser than 436501?\\n\\n1 more question - \\nIs No. of scans = No. of times a key is Hit. Is it right ?\", \"post_time\": \"2014-02-21 20:37:07\" },\n\t{ \"post_id\": 5247, \"topic_id\": 1200, \"forum_id\": 10, \"post_subject\": \"Re: JOIN - ATMOST vs LIMIT vs KEEP\", \"username\": \"rtaylor\", \"post_text\": \"Rahul,\\n\\nYour KEEP(3000),LIMIT(0) is probably going to find all the matches (that's the LIMIT(0) in action) before it decides to keep just the first 3000. That's the most likely cause of the scans you're seeing.\\n\\nThe ATMOST(3000) form will simply eliminate all the matching records where there are > 3000 matches. The LIMIT(3000,SKIP) will also eliminate all the matches where there are > 3000 matches.\\n\\nTherefore, your options B and C will NOT produce the same result set as A.\\n\\nIs that what you want?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-02-20 13:06:07\" },\n\t{ \"post_id\": 5206, \"topic_id\": 1200, \"forum_id\": 10, \"post_subject\": \"JOIN - ATMOST vs LIMIT vs KEEP\", \"username\": \"Rahul Jain\", \"post_text\": \"[size=85:39mptjnq]Which of the joins are better on performance scale if my join is a keyed INNER join :-\\n\\nA)\\tUsing KEEP with limit:\\nrecs_by_phone := join(Dataset1, IndexKey, \\nkeyed(left.phoneno = right.phone),\\ntransform(Lay_out, self.acctno:=left.acctno;self :=LEFT),\\nkeep(3000), limit(0));\\n\\nB)\\tUsing ATMOST :\\nrecs_by_phone := join(Dataset1, IndexKey, \\nkeyed(left.phoneno = right.phone),\\ntransform(Lay_out, self.acctno:=left.acctno;self :=LEFT),\\nATMOST(3000));\\n\\nC)\\tUsing LIMIT +SKIP :\\nrecs_by_phone := join(Dataset1, IndexKey, \\nkeyed(left.phoneno = right.phone),\\ntransform(Lay_out, self.acctno:=left.acctno;self :=LEFT),\\nlimit(3000, SKIP));\\n\\n\\n\\nI am getting a SCANS = 1101370 due a join which is as option a) above and want to reduce those SCANS as much as possible.Using ATMOST reduced the SCANS to 436501. But is there anything else you can sugesst ?\", \"post_time\": \"2014-02-13 15:37:59\" },\n\t{ \"post_id\": 5269, \"topic_id\": 1207, \"forum_id\": 10, \"post_subject\": \"Re: ESP Get request querystring\", \"username\": \"dbang\", \"post_text\": \"Thank you. Used mod_rewrite on apache2 instead.\", \"post_time\": \"2014-02-21 12:28:12\" },\n\t{ \"post_id\": 5257, \"topic_id\": 1207, \"forum_id\": 10, \"post_subject\": \"Re: ESP Get request querystring\", \"username\": \"anthony.fishbeck\", \"post_text\": \"Right, WsEcl does currently validate the url inputs against the input shema generated by ECL.\\n\\nFYI, I see you were using the URL to access XLST generated HTML pages as your response. If the application did want an xml response it can bypass the validation by using "proxy" mode. The url for proxy mode would be: \\n\\nhttp://192.168.56.101:8002/WsEcl/proxy/ ... &REQUEST=1\\n\\nI've opened the following issue to consider making validation case insensitive for xslt (html) and other non-proxy mode requests.\\n\\nhttps://track.hpccsystems.com/browse/HPCC-10832\\n\\nRegards,\\nTony\", \"post_time\": \"2014-02-20 17:43:05\" },\n\t{ \"post_id\": 5226, \"topic_id\": 1207, \"forum_id\": 10, \"post_subject\": \"ESP Get request querystring\", \"username\": \"dbang\", \"post_text\": \"When using stored parameters like\\nSTRING REQUEST := 'GetCapabilities' : STORED('REQUEST');\\nit is turns into a lower case get parameter like\\nhttp://192.168.56.101:8002/WsEcl/xslt/q ... &request=1\\n\\nHowever some 3rd party programs uses upper case parameters and ESP does not pick on uppercase params like \\nhttp://192.168.56.101:8002/WsEcl/xslt/q ... &REQUEST=1\\n\\nIs there anyway around this? \\nMaybe either by grabbing the calling url and make it lower case via the ECL code or some config parameter.\\n\\nKind regards\", \"post_time\": \"2014-02-18 16:38:02\" },\n\t{ \"post_id\": 5306, \"topic_id\": 1214, \"forum_id\": 10, \"post_subject\": \"Re: Out of memory Error\", \"username\": \"jsmith\", \"post_text\": \"Have been running tests with Tim's job that OOM'd today and it seems very likely that Transparent Huge Pages are involved in the memory panic and oom death.\\n\\nTurning THP off on Tim's test cluster, resolved the problem.\\nYou can switch THP's off with:\\n\\necho never > /sys/kernel/mm/transparent_hugepage/enabled\\n
\\n\\nThe system had plenty of memory available to it at the time, but I think THP is conspiring to starve the processes (slave in this case) of the page sizes it wanted at the time it wanted them.\\n\\nWe've had other reports of problems with THP before, causing huge cpu loads in system time and slow queries.\\n\\nOthers (Oracle, Hadoop setups) report problems with THP and recommend switching it off.\\n\\nPlease disable on all HPCC nodes and retry your query.\\nThanks.\", \"post_time\": \"2014-02-28 17:26:57\" },\n\t{ \"post_id\": 5305, \"topic_id\": 1214, \"forum_id\": 10, \"post_subject\": \"Re: Out of memory Error\", \"username\": \"jsmith\", \"post_text\": \"Specifically, can you run this comment on one of your slave nodes (as root):\\n\\n\\ncat /sys/kernel/mm/transparent_hugepage/enabled\\n
\\n\\n.. and tell me what the output is?\\nThanks.\", \"post_time\": \"2014-02-28 14:12:58\" },\n\t{ \"post_id\": 5304, \"topic_id\": 1214, \"forum_id\": 10, \"post_subject\": \"Re: Out of memory Error\", \"username\": \"tlhumphrey2\", \"post_text\": \"srbhkmr,\\n\\nAre you running on a linux box? Or maybe a better question is what is your HPCC environment: linux, VM, windows, etc?\\n\\nWhy am I asking? We are trying to track down what is causing the problem and currently we believe it might be a linux configuation problem.\\n\\nTim\", \"post_time\": \"2014-02-28 14:09:17\" },\n\t{ \"post_id\": 5296, \"topic_id\": 1214, \"forum_id\": 10, \"post_subject\": \"Re: Out of memory Error\", \"username\": \"tlhumphrey2\", \"post_text\": \"I believe what is causing the Out of memory error is the fact that your input file, churndata, has a lot of fields with the value 5. But, ML.Associate should handle this. So, I’m writing a problem report so it will be fixed. Appreciate you finding this problem.\\n\\nOn another point, the test code you attached, test.ecl, uses churndata to create eclatNInput. But, after creating it, you don’t use it. I thought you were going to use it as input to EclatN and expected \\n\\n ML.ToField(churnData, o2);\\n\\nto be\\n\\n ML.ToField(eclatNInput, o2);\\n\\nWith the test data you attached, I did use eclatNInput as input to EclatN and it completed with no errors. But, when I ran the code with churndata as input to EclatN, I, too, got out of memory errors.\", \"post_time\": \"2014-02-26 18:52:52\" },\n\t{ \"post_id\": 5282, \"topic_id\": 1214, \"forum_id\": 10, \"post_subject\": \"Re: Out of memory Error\", \"username\": \"jsmith\", \"post_text\": \"Could you attach an archive of all the thorslave logs from this node for this query?\\nThanks.\", \"post_time\": \"2014-02-25 15:00:21\" },\n\t{ \"post_id\": 5279, \"topic_id\": 1214, \"forum_id\": 10, \"post_subject\": \"Re: Out of memory Error\", \"username\": \"srbhkmr\", \"post_text\": \"As I mentioned, attached inut is input for the Rules mining module. In preprocessing I'm actually ignoring remaining 2 columns. You can always add 2 arbitrary columns in the input dataset, if you want to use the same ecl code.\\n\\nThanks,\", \"post_time\": \"2014-02-25 06:10:43\" },\n\t{ \"post_id\": 5271, \"topic_id\": 1214, \"forum_id\": 10, \"post_subject\": \"Re: Out of memory Error\", \"username\": \"tlhumphrey2\", \"post_text\": \"Your record format, rec, has 22 fields. But, the input you attached only has 20. Can you attach a sample of the input dataset with all 22 fields, please.\", \"post_time\": \"2014-02-21 14:34:03\" },\n\t{ \"post_id\": 5267, \"topic_id\": 1214, \"forum_id\": 10, \"post_subject\": \"Re: Out of memory Error\", \"username\": \"srbhkmr\", \"post_text\": \"The ECL code is simple, it just reads a dataset, does some preprocessing and feeds it to RuleMining code of Associate.ecl in ML library.\\n\\nAttached is the ECL code and a sample input that goes into the Associate.ecl routines after the pre-processing phase.\\n\\nMemory settings in configuration are same as default. I haven't tried changing them much.\\n\\nFollowing are the memory settings for ThorCluster in configurationManager:\\n\\n- globalMemorySize is empty defaulting to 75% of physical memory/slaves per node.\\n\\n- masterMemorySize defaults to globalMemorySize\\n\\n- memorySpillAt is empty.\\n\\n- multiThorMemoryThreshold is empty.\\n\\n\\nShould I try tweaking the 'memorySpillAt' attribute for the Thor cluster?\", \"post_time\": \"2014-02-21 11:03:11\" },\n\t{ \"post_id\": 5258, \"topic_id\": 1214, \"forum_id\": 10, \"post_subject\": \"Re: Out of memory Error\", \"username\": \"flavio\", \"post_text\": \"In addition to that information, would you mind checking your memory settings in the HPCC configuration and posting them here too? Perhaps it's trying to allocate too much (or too little) memory to Thor.\\n\\nThanks,\\n\\nFlavio\", \"post_time\": \"2014-02-20 19:10:07\" },\n\t{ \"post_id\": 5239, \"topic_id\": 1214, \"forum_id\": 10, \"post_subject\": \"Re: Out of memory Error\", \"username\": \"tlhumphrey2\", \"post_text\": \"Would you please post your code, if it isn't too large. Plus, can you post a few lines of your input dataset, please.\", \"post_time\": \"2014-02-19 19:16:24\" },\n\t{ \"post_id\": 5236, \"topic_id\": 1214, \"forum_id\": 10, \"post_subject\": \"Out of memory Error\", \"username\": \"srbhkmr\", \"post_text\": \"I tried running code from Associate.ecl from ML library, for a dataset of 500K rows and 19 columns. The workunit failed giving "MP link closed" error.\\n\\nOne of the log file says - "Out of memory: Kill process 31544 (thorslave_lcr) score 229 or sacrifice child"
\\n\\nAs HPCC spills the data being processed onto disk, I wasn't expecting any error complaining about "Out of memory" like this. \\nIs this expected behaviour? \\n\\nCould some one throw some light on what are the limitations wrt. memory requirements for running a job.\\n\\nThe slave node has 16GB of main memory that was processing the data.\\nAttached is a snapshot of a slave's log file recording the error.\\n\\nThanks,\", \"post_time\": \"2014-02-19 16:41:39\" },\n\t{ \"post_id\": 5249, \"topic_id\": 1216, \"forum_id\": 10, \"post_subject\": \"Re: Significance of SELF keyword\", \"username\": \"pius_francis\", \"post_text\": \"Thanks a lot\", \"post_time\": \"2014-02-20 13:58:58\" },\n\t{ \"post_id\": 5248, \"topic_id\": 1216, \"forum_id\": 10, \"post_subject\": \"Re: Significance of SELF keyword\", \"username\": \"tlhumphrey2\", \"post_text\": \"Within a TRANSFORM SELF.fieldname designates a field of the dataset produced by the TRANSFORM. SELF by itself (no .fieldname to the the right of it) refers to all other fields of the dataset produced by the TRANSFORM. For example, often you might see SELF:=LEFT, which says all fields of the TRANSFORM's output dataset will be filled by corresponding fields of the LEFT inputted dataset EXCEPT those fields filled by by statements like,SELF.fieldname:=, of the TRANSFORM.\", \"post_time\": \"2014-02-20 13:57:00\" },\n\t{ \"post_id\": 5246, \"topic_id\": 1216, \"forum_id\": 10, \"post_subject\": \"Significance of SELF keyword\", \"username\": \"pius_francis\", \"post_text\": \"Hi all,\\nPlease let me know the significance of SELF keyword used along with TRANSFORM\\nThanks,\\nPius\", \"post_time\": \"2014-02-20 10:33:25\" },\n\t{ \"post_id\": 5293, \"topic_id\": 1219, \"forum_id\": 10, \"post_subject\": \"Re: Record\", \"username\": \"kumar2k14\", \"post_text\": \"Thanks Sameer !\", \"post_time\": \"2014-02-26 13:48:35\" },\n\t{ \"post_id\": 5288, \"topic_id\": 1219, \"forum_id\": 10, \"post_subject\": \"Re: Record\", \"username\": \"sameermsc\", \"post_text\": \"we cannot delete the records from any existing file, All that we can do is to create a new file with all the records that one wants to retain and write it back to disk\\n\\nHere is how you do\\n\\n1) Read the data from file\\n\\n2) \\nMethod 1:\\nProject the data to the same or desired layout, in the transform skip the records you want to delete, if record id is an existing field, you can specify the id's to skip (delete), works well when you have a couple of id's to remove\\n\\nMethod 2:\\ncreate a data set containing all the id's you want to delete. perform a join and retain only the records you want, use an outer join for this\\n\\n3) write the resultant data to a new file\\n\\nRegards,\\nSameer\", \"post_time\": \"2014-02-26 07:24:41\" },\n\t{ \"post_id\": 5287, \"topic_id\": 1219, \"forum_id\": 10, \"post_subject\": \"Record\", \"username\": \"kumar2k14\", \"post_text\": \"How to delete a record from base file based on record number..\", \"post_time\": \"2014-02-25 19:01:47\" },\n\t{ \"post_id\": 5298, \"topic_id\": 1221, \"forum_id\": 10, \"post_subject\": \"Re: Dataset as email attachment\", \"username\": \"abhisr\", \"post_text\": \"Thanks Sameer, \\n\\nBut adding '\\\\t' won't work as all the text will be written in the first column.\\n\\nI made a tweak in your code , i changed the '\\\\t' to comma (,) which worked perfectly.\\n\\nds2 := project(ds1, transform(rec1, self.txt := left.player + ',' + left.score + ',' + left.CenturyAgainst + ',' + left.StrikeRate + ',' + left.Year;));\\n\\nrec1 xform(ds2 l, ds2 r) := transform\\n\\nself.txt := l.txt + '\\\\n' + r.txt;\\nself := l;\\nend;\\n
\\nRegards\\nAbhi\", \"post_time\": \"2014-02-27 14:28:28\" },\n\t{ \"post_id\": 5297, \"topic_id\": 1221, \"forum_id\": 10, \"post_subject\": \"Re: Dataset as email attachment\", \"username\": \"sameermsc\", \"post_text\": \"Hi Abhi,\\n\\nTry this,\\n\\nREC := {\\n\\tstring player;\\n\\tinteger8 Score;\\n\\tstring CenturyAgainst;\\n\\tinteger8 StrikeRate;\\n\\tinteger8 Year;\\n};\\n\\nDS1 := DATASET\\n([{'Sehwag',219,'West Indies',157,2011} ,{'Sachin',200,'South Africa',189,2010},{'Coventry',219,'Bangladesh',157,2009}\\n],REC);\\n\\nrec1 := {\\n\\tinteger id := 0;\\n\\tstring txt := '';\\n};\\n\\nds2 := project(ds1, transform(rec1, self.txt := left.player + '\\\\t' + left.score + '\\\\t' + left.CenturyAgainst + '\\\\t' + left.StrikeRate + '\\\\t' + left.Year;));\\n\\nrec1 xform(ds2 l, ds2 r) := transform\\n\\nself.txt := l.txt + '\\\\n' + r.txt;\\nself := l;\\nend;\\n\\nds_roll := rollup(ds2, left.id = right.id, xform(left, right));\\n\\n\\nemailID := 'abhilash.nair@lexisnexis.com';\\nsubject := 'DailyReport';\\nbody := 'Please review the attachemt for .....';\\nattachmentName := 'ds1.csv';\\n\\nSTD.System.Email.SendEmailAttachData(emailID, subject, body, (DATA)(ds_roll[1].txt),\\n'text/csv', attachmentName);\\n\\n
\\n\\nRegards,\\nSameer\", \"post_time\": \"2014-02-27 08:52:22\" },\n\t{ \"post_id\": 5295, \"topic_id\": 1221, \"forum_id\": 10, \"post_subject\": \"Dataset as email attachment\", \"username\": \"abhisr\", \"post_text\": \"Hi ,\\nI have a job that runs daily collecting the statistics of other jobs ran and their outputs. The job needs to send out an email with the stats as an attachemnt.\\n\\nSo I used the STD.System.Email.SendEmailAttachData , but I am only able to attach one row of the result as data , how can I attach a whole dataset as result.\\n\\nThe syntax is STD.System.Email.SendEmailAttachData ( sendto, subject, body, attachment, mimietype, filename, server, port, sender )\\nattachment A DATA value containing the binary data to attach.
\\n\\nSample code\\n\\nExport REC := record\\n string player;\\n integer8 Score;\\n string CenturyAgainst;\\n integer8 StrikeRate;\\n integer8 Year;\\nend;\\nDS1 := DATASET\\n([{'Sehwag',219,'West Indies',157,2011} ,{'Sachin',200,'South Africa',189,2010},{'Coventry',219,'Bangladesh',157,2009}\\n],REC);\\n\\nfileName := '~thor::test::ar::attachemnt::ds1'; \\nwrite := OUTPUT(DS1,,fileName,THOR,OVERWRITE,EXPIRE(30));\\n\\ndespr := STD.File.DeSpray(fileName,'10.194.72.226', '/data/test/ds1',,,,TRUE); \\ndespary := SEQUENTIAL(write, despr);\\n\\nemailID := 'abhilash.nair@lexisnexis.com';\\nsubject := 'DailyReport';\\nbody := 'Please review the attachemt for .....';\\nattachmentName := 'ds1.csv';\\n\\nSTD.System.Email.SendEmailAttachData(emailID, subject, body, (DATA)(DS1[1].player), \\n'text/csv', attachmentName);\\n
\\nWhich return 'Sehwag' .\", \"post_time\": \"2014-02-26 16:21:08\" },\n\t{ \"post_id\": 5390, \"topic_id\": 1236, \"forum_id\": 10, \"post_subject\": \"Re: Access Child Dataset in BEGINC++\", \"username\": \"tlhumphrey2\", \"post_text\": \"gthompson\\n\\nInstead of passing the record, test, to GetCid, it might be better to pass just the child dataset. Why? Because the ECL Language Reference tells you how to handle datasets that are passed to a BEGINC++ function. \\n\\nSo your function definition would look like the following:\\n\\nREAL8 GetCid( clayout test_childDS) := BEGINC++\", \"post_time\": \"2014-03-18 15:50:05\" },\n\t{ \"post_id\": 5389, \"topic_id\": 1236, \"forum_id\": 10, \"post_subject\": \"Re: Access Child Dataset in BEGINC++\", \"username\": \"bforeman\", \"post_text\": \"I am not a C++ programmer, but I might want to reverse engineer this if I absolutely had to use C++ to do this.\\n\\nThe ECL code supports nested child datasets. Using DENORMALIZE, (or using the example in the ECL playground), you could set your debug switch to SaveCPPTempFiles=1 and then look at the C++ code that is generated by ECL in the Helper section of the Workunit details.\\n\\nFrom looking at the ECL generated C++, you can probably work out what you need to do in your own embed.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-03-17 11:08:06\" },\n\t{ \"post_id\": 5367, \"topic_id\": 1236, \"forum_id\": 10, \"post_subject\": \"Access Child Dataset in BEGINC++\", \"username\": \"gthompson\", \"post_text\": \"Hi.\\n\\nIs there any sample code or documentation for accessing child datasets in BEGINC++? In the following code, I would like to know how many child records and access their values in the c code.\\n\\n\\nclayout := record\\n unsigned2 cid;\\n unsigned2 did;\\n unsigned2 eid;\\nend;\\n\\nlayout := record\\n unsigned2 id;\\n dataset(clayout) c;\\nend;\\n\\ncrecds := DATASET([{3, 4, 5}, {6, 7, 8}], clayout);\\nrec := ROW({2, crecds}, layout);\\n\\n// extern double user1(const byte * test) {\\nREAL8 GetCid(layout test) := BEGINC++\\n struct layout {\\n unsigned short id;\\n //????\\n };\\n \\n layout theTest = *(layout *) test;\\n\\n return theTest.id;\\nENDC++;\\n\\nOUTPUT(rec);\\nOUTPUT(GetCid(rec));\\n
\\n\\nThanks.\", \"post_time\": \"2014-03-10 16:49:01\" },\n\t{ \"post_id\": 5370, \"topic_id\": 1237, \"forum_id\": 10, \"post_subject\": \"Re: Record distribute\", \"username\": \"rtaylor\", \"post_text\": \"Kumar,\\n\\nYou can just create a TABLE to view the result of the DISTRIBUTE, something like this:DistDS := DISTRIBUTE(MyDS,ID);\\nOUTPUT(TABLE(DistDS,{ID,UNSIGNED2 NodeID := STD.System.Thorlib.Node()+1},LOCAL));
This code assumes your ID field is an integer type (DISTRIBUTE requires its second parameter to be an integer type, usually done with the HASH32 function) and simply shows you, for each ID field from each record, which node the data is on.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-03-11 01:57:19\" },\n\t{ \"post_id\": 5368, \"topic_id\": 1237, \"forum_id\": 10, \"post_subject\": \"Record distribute\", \"username\": \"kumar2k14\", \"post_text\": \"I would like to know how to identify the files and records are being distributed after the process is complete with distribute function..\", \"post_time\": \"2014-03-10 20:38:56\" },\n\t{ \"post_id\": 5412, \"topic_id\": 1240, \"forum_id\": 10, \"post_subject\": \"Re: PENALTY keyword\", \"username\": \"tlhumphrey2\", \"post_text\": \"Below is a little example of PENALTY in a parse. I started with the PENALTY example in the language reference and changed a few things.\\n\\nIn my grammar, one of three things can follow an Article: Word or Animal or WildAnimal. WildAnimal is a subset of Animal. I used PENALTY on Word, Animal, and WildAnimal. The more specific pattern got the least penalty (i.e. WildAnimal got the least penalty).\\n\\nrs := RECORD\\n STRING100 line;\\nEND;\\n\\nds := DATASET([{'the bear at the dog'}], rs);\\n\\nPATTERN ws := PATTERN('[ \\\\r\\\\n]');\\nPATTERN alpha := PATTERN('[A-Za-z]');\\nPATTERN word := Alpha+ PENALTY(3);\\nPATTERN article := ['the', 'a'];\\nPATTERN wildanimal := ('bear' | 'lion') PENALTY(1);\\nPATTERN animal := ('bear' | 'lion' | 'cat' | 'dog' ) PENALTY(2);\\n\\nRULE NounPhraseComponent:= word | article ws (Word | wildanimal | animal);\\n\\nps := RECORD\\n word_type := MAP( \\n matchtext(word)<>'' => 'word',\\n matchtext(wildanimal)<>'' => 'wildanimal',\\n matchtext(animal)<>'' => 'animal',\\n 'BOGUS'\\n );\\n out1 := MATCHTEXT(NounPhraseComponent);\\nEND;\\n\\np := PARSE(ds, line, NounPhraseComponent, ps, BEST, MANY, NOCASE);\\nOUTPUT(p,NAMED('p'));\", \"post_time\": \"2014-03-21 19:51:28\" },\n\t{ \"post_id\": 5397, \"topic_id\": 1240, \"forum_id\": 10, \"post_subject\": \"Re: PENALTY keyword\", \"username\": \"pius_francis\", \"post_text\": \"Thanks a lot. It was insightful.\", \"post_time\": \"2014-03-19 09:58:13\" },\n\t{ \"post_id\": 5392, \"topic_id\": 1240, \"forum_id\": 10, \"post_subject\": \"Re: PENALTY keyword\", \"username\": \"dabayliss\", \"post_text\": \"As with most of ECL - my general advice is: "if it doesn't make sense, you don't need it"\\n\\nMost parsing systems are design to parse unambiguous grammers - for a given input there is one and only one correct parsing. As you head from proper languages to natural languages (spot the geek) you will hit situations where there are MULTIPLE CORRECT PARSINGS of a given input.\\n\\nIn those situations you have to 'pick one'. For natural language often the best way to pick one is to use the grammatical construct 'most likely' to occur. A way to encode that is to attach penalties to your rule productions - unusual constructs have higher penalties. Of course picking particular constructs early in a sentence might force you into rarer constructs elsewhere in the sentence.\\n\\nECL thus has PARSE,BEST as a mechanism. It will sum all the penalties across a parsing (or across a parsing of a particular length) and pick the one that accumulates the lowest number of penalties.\\n\\nAgain though - I would consider our penalty/best logic to be EXTREMELY cool but EXTREMELY marginal. If you are using it to port code that you previously did with a regular expression - then something is horribly wrong .....\", \"post_time\": \"2014-03-18 19:21:57\" },\n\t{ \"post_id\": 5383, \"topic_id\": 1240, \"forum_id\": 10, \"post_subject\": \"PENALTY keyword\", \"username\": \"pius_francis\", \"post_text\": \"Hi all,\\n What is the significance of PENALTY keyword used in PATTERN? Please clarify on this. TIA\\n\\nRegards,\\nPius\", \"post_time\": \"2014-03-13 12:12:47\" },\n\t{ \"post_id\": 5415, \"topic_id\": 1247, \"forum_id\": 10, \"post_subject\": \"Re: Log Analytics\", \"username\": \"tlhumphrey2\", \"post_text\": \"Jerry,\\n\\nI believe you have access to SALT. If you do, there is a field attribute that can be placed in your specification file called WHEEL. It can be placed on any field. For example, if you place it on the field containing IP addresses then SALT creates a ROXIE service. If you deploy that service, then anytime you enter something in the IP address field, it will start giving you suggestions. It works much like google’s auto-completion capability.\\n\\nThere is an example of this in the repository of the boca dataland. Module tlh2_SALT_Wheel_after_update. I believe I have WHEEL on 4 fields: city, state, company_name, and address. My specification file is tlh2_SALT_Wheel_after_update.SALT_Wheel_SPC.\\n\\nIt returns suggestions fairly fast.\\n\\nTim\", \"post_time\": \"2014-03-24 13:46:32\" },\n\t{ \"post_id\": 5413, \"topic_id\": 1247, \"forum_id\": 10, \"post_subject\": \"Log Analytics\", \"username\": \"jjacob\", \"post_text\": \"Hi,\\n\\nI am looking for some ideas on processing log files containing IP addresses and making them searchable (wild card) using HPCC. One day's log could contain millions of records. There are around 6 fields in the logs that we would like to be searchable. \\n\\nOur current approach is to use an inverted index with the searchable fields. But with the large volume of data, inverted index size is growing tremendously and slowing down the query performance. Is there a different approach to process this kind of data and make them wildcard searchable. A sample of the data is given below. \\n\\n\\nfield1=2012-02-27|field2=XXXX|field3=xxx.yyy.zzz.aaa|field4=xxxx|field5=xxx|field6=aaaaa|field7=a long text|field8=xxx.yyy.zzz.aaa|field9=12345|field10=xx.xx.xx.xx|field11=123|field11=aaa|field12=23
\\n\\nThanks,\\nJerry\", \"post_time\": \"2014-03-21 20:29:16\" },\n\t{ \"post_id\": 6353, \"topic_id\": 1255, \"forum_id\": 10, \"post_subject\": \"Re: Restore workunit - SOAP call or ECL library\", \"username\": \"abhisr\", \"post_text\": \"Restoring an archived workunit.\", \"post_time\": \"2014-09-19 18:30:09\" },\n\t{ \"post_id\": 6350, \"topic_id\": 1255, \"forum_id\": 10, \"post_subject\": \"Re: Restore workunit - SOAP call or ECL library\", \"username\": \"AttilaV\", \"post_text\": \"Hi,\\n\\nWhat do you mean "restore"? Restore from crash, fail, delete, or?\\n\\nAttila\", \"post_time\": \"2014-09-19 17:55:22\" },\n\t{ \"post_id\": 5429, \"topic_id\": 1255, \"forum_id\": 10, \"post_subject\": \"Restore workunit - SOAP call or ECL library\", \"username\": \"abhisr\", \"post_text\": \"Hi,\\n\\nIs there any library or SOAP call available to RESTORE workunit ?\\n\\nRegards\\nAbhi\", \"post_time\": \"2014-03-26 20:24:44\" },\n\t{ \"post_id\": 5464, \"topic_id\": 1261, \"forum_id\": 10, \"post_subject\": \"Re: Watchdog has lost contact with Thor slave\", \"username\": \"jsmith\", \"post_text\": \"This looks like an incident of https://track.hpccsystems.com/browse/HPCC-10374\\nIt would make sense to only see with larger inputs (that spill on sort) and it was inconsistent.\\nIt was fixed in 4.2.2.\", \"post_time\": \"2014-04-03 13:08:28\" },\n\t{ \"post_id\": 5461, \"topic_id\": 1261, \"forum_id\": 10, \"post_subject\": \"Re: Watchdog has lost contact with Thor slave\", \"username\": \"sameermsc\", \"post_text\": \"Hi,\\n\\nI dont have a correct answer for this, but sharing my experience with this Error. For us this error was due of the data we are processing. Our fix was to run the process on smaller data and identify the data (based on the id's we have) causing this Error. Excluding such data from processing allowed us a successful Execution.\\n\\nRegards,\\nSameer\", \"post_time\": \"2014-04-03 12:06:36\" },\n\t{ \"post_id\": 5449, \"topic_id\": 1261, \"forum_id\": 10, \"post_subject\": \"Watchdog has lost contact with Thor slave\", \"username\": \"DSC\", \"post_text\": \"We're having intermittent problems with a job. It fails with larger inputs, though there doesn't appear to be a direct correlation between the input size and the rate of failure. The node's thor slave log has a segfault in it; here is the snippet:\\n\\n000020A4 2014-04-01 09:31:42.283 20988 37002 "CONNECTING (id=458, idx=0) to (id=460, idx=0) - activity(join, 460)"\\n000020A5 2014-04-01 09:31:42.283 20988 37002 "CONNECTING (id=459, idx=0) to (id=460, idx=1) - activity(join, 460)"\\n000020A6 2014-04-01 09:31:42.283 20988 37002 "CONNECTING (id=460, idx=0) to (id=461, idx=0) - activity(diskwrite, 461)"\\n000020A7 2014-04-01 09:31:42.290 20988 37002 "deserializeMPTag: tag = 65788"\\n000020A8 2014-04-01 09:31:42.290 20988 37002 "deserializeMPTag: tag = 65552"\\n000020A9 2014-04-01 09:31:42.290 20988 37002 "SortJoinSlaveActivity::init portbase = 20102, mpTagRPC=65788 - activity(join, 460)"\\n000020AA 2014-04-01 09:31:42.290 20988 37020 "Creating SortSlaveServer on tag 65788 MP - activity(join, 460)"\\n000020AB 2014-04-01 09:31:42.295 20988 37002 "Watchdog: Start Job 457"\\n000020AC 2014-04-01 09:31:42.295 20988 37003 "handling fname : /var/lib/HPCCSystems/hpcc-data/thor/hpccinternal/temporary/spill/wf81__w20140401-091803._4_of_54 - activity(diskwrite, 461)"\\n000020AD 2014-04-01 09:31:42.295 20988 37003 "Starting input - activity(diskwrite, 461)"\\n000020AE 2014-04-01 09:31:42.295 20988 37003 "JOIN: Starting R then L - activity(join, 460)"\\n000020AF 2014-04-01 09:31:42.295 20988 37003 "Starting input - activity(join, 460)"\\n000020B0 2014-04-01 09:31:42.295 20988 37003 "Starting input - activity(join, 460)"\\n000020B2 2014-04-01 09:31:42.295 20988 37003 "diskread[part=3]: reading physical file '/var/lib/HPCCSystems/hpcc-data/thor/hpccinternal/temporary/spill/wf21__w20140401-091803._4_of_54' (logical file = ~spill::wf21) - activity(diskread, 458)"\\n000020B1 2014-04-01 09:31:42.295 20988 37029 "diskread[part=3]: reading physical file '/var/lib/HPCCSystems/hpcc-data/thor/hpccinternal/temporary/spill/wf61__w20140401-091803._4_of_54' (logical file = ~spill::wf61) - activity(diskread, 459)"\\n000020B3 2014-04-01 09:31:42.295 20988 37003 "diskread[part=3]: Base offset to 1369553272 - activity(diskread, 458)"\\n000020B4 2014-04-01 09:31:42.295 20988 37029 "diskread[part=3]: Base offset to 14869864 - activity(diskread, 459)"\\n000020B5 2014-04-01 09:31:42.295 20988 37003 "Reading block compressed file: /var/lib/HPCCSystems/hpcc-data/thor/hpccinternal/temporary/spill/wf21__w20140401-091803._4_of_54 - activity(diskread, 458)"\\n000020B6 2014-04-01 09:31:42.295 20988 37029 "Reading block compressed file: /var/lib/HPCCSystems/hpcc-data/thor/hpccinternal/temporary/spill/wf61__w20140401-091803._4_of_54 - activity(diskread, 459)"\\n000020B7 2014-04-01 09:31:42.295 20988 37029 "diskread[part=3]: variable (/var/lib/HPCCSystems/hpcc-data/thor/hpccinternal/temporary/spill/wf61__w20140401-091803._4_of_54) - activity(diskread, 459)"\\n000020B8 2014-04-01 09:31:42.295 20988 37003 "diskread[part=3]: variable (/var/lib/HPCCSystems/hpcc-data/thor/hpccinternal/temporary/spill/wf21__w20140401-091803._4_of_54) - activity(diskread, 458)"\\n000020B9 2014-04-01 09:31:42.295 20988 37029 "ITDL starting for output 0 - activity(diskread, 459)"\\n000020BA 2014-04-01 09:31:42.295 20988 37003 "ITDL starting for output 0 - activity(diskread, 458)"\\n000020BB 2014-04-01 09:31:42.295 20988 37003 "ITDL starting for output 0 - activity(join, 460)"\\n000020BC 2014-04-01 09:31:42.295 20988 37003 "Gather in - activity(join, 460)"\\n000020BD 2014-04-01 09:31:42.300 20988 37020 "Connected to slave 3 of 54 - activity(join, 460)"\\n000020BE 2014-04-01 09:31:42.300 20988 37038 "CSortTransferServerThread started port 20103"\\n000020BF 2014-04-01 09:31:42.301 20988 37029 "Record size (min) = 22 - activity(diskread, 459)"\\n000020C0 2014-04-01 09:31:42.314 20988 37020 "Start Gather - activity(join, 460)"\\n000020C1 2014-04-01 09:31:42.314 20988 37003 "SORT: Gather - activity(join, 460)"\\n000020C2 2014-04-01 09:31:42.320 20988 37003 "Record size (min) = 82 - activity(diskread, 458)"\\n000020C3 2014-04-01 09:31:44.502 20988 36975 "Sorting 2847680 rows - activity(join, 460)"\\n000020C4 2014-04-01 09:32:13.784 20988 36975 "Sort took: 29.285000 - activity(join, 460)"\\n000020C5 2014-04-01 09:32:13.784 20988 36975 "CThorSpillableRowArray::save 2847680 rows - activity(join, 460)"\\n000020C6 2014-04-01 09:32:23.047 20988 36975 "CThorSpillableRowArray::save done, bytes = 309860662 - activity(join, 460)"\\n000020C7 2014-04-01 09:32:24.333 20988 37003 "Sorting 1414956 rows - activity(join, 460)"\\n000020C8 2014-04-01 09:32:42.309 20988 36991 "SYS: PU= 96% MU= 59% MAL=804209440 MMP=798666752 SBK=5542688 TOT=785788K RAM=89188896K SWP=2432K"\\n000020C9 2014-04-01 09:32:43.790 20988 37003 "Sort took: 19.458000 - activity(join, 460)"\\n000020CA 2014-04-01 09:32:43.790 20988 37003 "CThorSpillableRowArray::save 1414956 rows - activity(join, 460)"\\n000020CB 2014-04-01 09:32:47.224 20988 37003 "CThorSpillableRowArray::save done, bytes = 153902832 - activity(join, 460)"\\n000020CC 2014-04-01 09:32:47.225 20988 37003 "Local run sort(s) done - activity(join, 460)"\\n000020CD 2014-04-01 09:32:47.225 20988 37003 "Local Overflow Merge start - activity(join, 460)"\\n000020CE 2014-04-01 09:32:47.225 20988 37003 "================================================"\\n000020CF 2014-04-01 09:32:47.225 20988 37003 "Signal: 11 Segmentation fault"\\n000020D0 2014-04-01 09:32:47.225 20988 37003 "Fault IP: 00007FC57BCBFD98"\\n000020D1 2014-04-01 09:32:47.225 20988 37003 "Accessing: 0000000000000000"\\n000020D2 2014-04-01 09:32:47.225 20988 37003 "Registers:"\\n000020D3 2014-04-01 09:32:47.225 20988 37003 "EAX:00007FC50B1F0900 EBX:00007FC3E4004300 ECX:00007FC50B1F0908 EDX:00007FC50B1F08D0 ESI:0000000000000000 EDI:0000000000000000"\\n000020D4 2014-04-01 09:32:47.225 20988 37003 "CS:EIP:0033:00007FC57BCBFD98"\\n000020D5 2014-04-01 09:32:47.225 20988 37003 " ESP:00007FC50B1F0890 EBP:00007FC3E40076A8"\\n000020D6 2014-04-01 09:32:47.225 20988 37003 "Stack[00007FC50B1F0890]: 0000000000000002 0000000700000000 0000000000000007 0B1F090800000000 00007FC50B1F0908 0B1F090000007FC5 00007FC50B1F0900 0B1F08D000007FC5"\\n000020D7 2014-04-01 09:32:47.225 20988 37003 "Stack[00007FC50B1F08B0]: 00007FC50B1F08D0 E400439000007FC5 00007FC3E4004390 0000005B00007FC3 0000007C0000005B E40076A80000007C 00007FC3E40076A8 E4002AD800007FC3"\\n000020D8 2014-04-01 09:32:47.225 20988 37003 "Stack[00007FC50B1F08D0]: 00007FC3E4002AD8 B0003C7000007FC3 00007FC3B0003C70 E4002BB800007FC3 00007FC3E4002BB8 B000131C00007FC3 00007FC3B000131C E400473000007FC3"\\n000020D9 2014-04-01 09:32:47.225 20988 37003 "Stack[00007FC50B1F08F0]: 00007FC3E4004730 0000004100007FC3 0000008000000041 E400485000000080 00007FC3E4004850 0000000000007FC3 0000000000000000 0000000100000000"\\n000020DA 2014-04-01 09:32:47.225 20988 37003 "Stack[00007FC50B1F0910]: 0000000000000001 B0003C7000000000 00007FC3B0003C70 E4002BB800007FC3 00007FC3E4002BB8 B000131C00007FC3 00007FC3B000131C E400430000007FC3"\\n000020DB 2014-04-01 09:32:47.225 20988 37003 "Stack[00007FC50B1F0930]: 00007FC3E4004300 0000000200007FC3 0000000000000002 0000000700000000 0000000000000007 7BCC09F800000000 00007FC57BCC09F8 0000000000007FC5"\\n000020DC 2014-04-01 09:32:47.225 20988 37003 "Stack[00007FC50B1F0950]: 00007FC300000000 E400480000007FC3 00007FC3E4004800 0000002800007FC3 0000000000000028 0000000000000000 0000000000000000 B0003CD800000000"\\n000020DD 2014-04-01 09:32:47.225 20988 37003 "Stack[00007FC50B1F0970]: 00007FC3B0003CD8 E400437800007FC3 00007FC3E4004378 E400430000007FC3 00007FC3E4004300 E400431800007FC3 00007FC3E4004318 B000091800007FC3"\\n000020DE 2014-04-01 09:32:47.225 20988 37003 "Backtrace:"\\n000020DF 2014-04-01 09:32:47.226 20988 37003 " /opt/HPCCSystems/lib/libjlib.so(_Z16PrintStackReportv+0x28) [0x7fc57e0f2ad8]"\\n000020E0 2014-04-01 09:32:47.226 20988 37003 " /opt/HPCCSystems/lib/libjlib.so(_Z13excsighandleriP7siginfoPv+0x9da) [0x7fc57e0f377a]"\\n000020E1 2014-04-01 09:32:47.226 20988 37003 " /usr/lib/jvm/jre/lib/amd64/server/libjvm.so(+0x7b5594) [0x7fc5393af594]"\\n000020E2 2014-04-01 09:32:47.226 20988 37003 " /usr/lib/jvm/jre/lib/amd64/server/libjvm.so(JVM_handle_linux_signal+0x95) [0x7fc5393b5645]"\\n000020E3 2014-04-01 09:32:47.226 20988 37003 " /lib64/libpthread.so.0() [0x303ec0f500]"\\n000020E4 2014-04-01 09:32:47.226 20988 37003 " /opt/HPCCSystems/lib/libthorsort_lcr.so(_ZN15CWriteIntercept5writeEP10IRowStream+0xd8) [0x7fc57bcbfd98]"\\n000020E5 2014-04-01 09:32:47.226 20988 37003 " /opt/HPCCSystems/lib/libthorsort_lcr.so(_ZN11CThorSorter6GatherEP14IRowInterfacesP10IRowStreamP8ICompareS5_S5_P18ISortKeySerializerPKvbbRbS1_+0x568) [0x7fc57bcc09f8]"\\n000020E6 2014-04-01 09:32:47.226 20988 37003 " /opt/HPCCSystems/lib/libactivityslaves_lcr.so(_ZN17JoinSlaveActivity12doglobaljoinEv+0x56f) [0x7fc57b9d51df]"\\n000020E7 2014-04-01 09:32:47.226 20988 37003 " /opt/HPCCSystems/lib/libactivityslaves_lcr.so(_ZN17JoinSlaveActivity5startEv+0x62b) [0x7fc57b9d636b]"\\n000020E8 2014-04-01 09:32:47.226 20988 37003 " /opt/HPCCSystems/lib/libgraphslave_lcr.so(_ZN14CSlaveActivity10startInputEP13IThorDataLinkPKc+0x75) [0x7fc57bee6015]"\\n000020E9 2014-04-01 09:32:47.226 20988 37003 " /opt/HPCCSystems/lib/libactivityslaves_lcr.so(_ZN27CDiskWriteSlaveActivityBase4openEv+0x7f) [0x7fc57ba3431f]"\\n000020EA 2014-04-01 09:32:47.226 20988 37003 " /opt/HPCCSystems/lib/libactivityslaves_lcr.so(_ZN27CDiskWriteSlaveActivityBase7processEv+0x95) [0x7fc57ba35715]"\\n000020EB 2014-04-01 09:32:47.226 20988 37003 " /opt/HPCCSystems/lib/libactivityslaves_lcr.so(_ZN20ProcessSlaveActivity4mainEv+0xb2) [0x7fc57b97e402]"\\n000020EC 2014-04-01 09:32:47.226 20988 37003 " /opt/HPCCSystems/lib/libjlib.so(_ZN19CThreadedPersistent4mainEv+0x65) [0x7fc57e191335]"\\n000020ED 2014-04-01 09:32:47.226 20988 37003 " /opt/HPCCSystems/lib/libjlib.so(_ZN19CThreadedPersistent8CAThread3runEv+0x10) [0x7fc57e195690]"\\n000020EE 2014-04-01 09:32:47.226 20988 37003 " /opt/HPCCSystems/lib/libjlib.so(_ZN6Thread5beginEv+0x2f) [0x7fc57e1914af]"\\n000020EF 2014-04-01 09:32:47.226 20988 37003 " /opt/HPCCSystems/lib/libjlib.so(_ZN6Thread11_threadmainEPv+0x1c) [0x7fc57e18fd9c]"\\n000020F0 2014-04-01 09:32:47.226 20988 37003 " /lib64/libpthread.so.0() [0x303ec07851]"\\n000020F1 2014-04-01 09:32:47.226 20988 37003 " /lib64/libc.so.6(clone+0x6d) [0x303e8e894d]"\\n000020F2 2014-04-01 09:32:47.226 20988 37003 "ThreadList:\\n7FC57AEB7700 140486147536640 21008: CMPNotifyClosedThread\\n7FC57A4B6700 140486137046784 21009: CSocketBaseThread\\n7FC579AB5700 140486126556928 21011: MP Connection Thread\\n7FC5790B4700 140486116067072 21040: CBackupHandler\\n7FC541EFE700 140485191526144 21049: CGraphProgressHandler\\n7FC540CFC700 140485172643584 36975: BackgroundReleaseBufferThread\\n7FC503FFF700 140484152391424 36991: CMemoryUsageReporter\\n7FC5093EE700 140484240402176 37002: CGraphExecutor pool\\n7FC50B1F1700 140484271871744 37003: ProcessSlaveActivity\\n7FC50A7F0700 140484261381888 37020: CThorSorter\\n7FC509DEF700 140484250892032 37029: ThorLookaheadCache\\n7FC5035FE700 140484141901568 37038: SortTransferServer\\n"
\\nIs there anything that can be gleaned from this that could point me in the right direction for troubleshooting?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2014-04-01 14:40:52\" },\n\t{ \"post_id\": 5463, \"topic_id\": 1262, \"forum_id\": 10, \"post_subject\": \"Re: Instant Memory Pool Exhausted Error 1301\", \"username\": \"rtaylor\", \"post_text\": \"Brian,Everything worked after I made suggested changes
Great to hear that. Any other issues pop up, just start another thread on the forum.\\n\\nRichard\", \"post_time\": \"2014-04-03 12:57:33\" },\n\t{ \"post_id\": 5457, \"topic_id\": 1262, \"forum_id\": 10, \"post_subject\": \"Re: Instant Memory Pool Exhausted Error 1301\", \"username\": \"SyntaxGeek\", \"post_text\": \"Everything worked after I made suggested changes.\\n\\nThanks,\\nBrian\", \"post_time\": \"2014-04-03 01:26:00\" },\n\t{ \"post_id\": 5456, \"topic_id\": 1262, \"forum_id\": 10, \"post_subject\": \"Re: Instant Memory Pool Exhausted Error 1301\", \"username\": \"SyntaxGeek\", \"post_text\": \"Fantastic, I will indeed get into those this evening, appreciate the information on the data type differences.\", \"post_time\": \"2014-04-02 20:33:23\" },\n\t{ \"post_id\": 5455, \"topic_id\": 1262, \"forum_id\": 10, \"post_subject\": \"Re: Instant Memory Pool Exhausted Error 1301\", \"username\": \"rtaylor\", \"post_text\": \"Brian,\\n\\nYes, QSTRING does give you ~25% data compression when written to a disk file. But in this case, your RECORD structure should define what the data is, and your CSV file only contains strings and numbers. Additionally, if you ever intend to put these fields into an INDEX (which are always LZW compressed), QSTRINGs will look like binary data to the compression algorithm, because each character is stored in 6 bits instead of 8 and you'll lose all the advantage of the LZW compression.\\n\\nHave you taken a look at our free online self-paced ECL courses? You can find the link to them here: http://hpccsystems.com/community/training-events/training\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-04-02 20:30:41\" },\n\t{ \"post_id\": 5454, \"topic_id\": 1262, \"forum_id\": 10, \"post_subject\": \"Re: Instant Memory Pool Exhausted Error 1301\", \"username\": \"SyntaxGeek\", \"post_text\": \"Richard thanks for the timely response, I copied that RECORD file from the example pdf which had THOR plugged in that position, I will have to go read up more on that particular declaration and the parameter meanings.\\n\\nI will test against my environment and confirm that I see similar results.\\n\\nOn a less related pointed, QSTRINGS was listed in the documentation as proper when casing wasn't needed which in my case casing isn't important and also I believe the documentation said there was some space savings and performance gains by using QSTRINGS.\\n\\nThanks,\\nBrian\", \"post_time\": \"2014-04-02 20:19:41\" },\n\t{ \"post_id\": 5453, \"topic_id\": 1262, \"forum_id\": 10, \"post_subject\": \"Re: Instant Memory Pool Exhausted Error 1301\", \"username\": \"rtaylor\", \"post_text\": \"Brian,\\n\\nOK, I downloaded the file and ran your code and got the same error. The cause of the error is the type of file you defined in your DATASET declaration. This is a classic CSV file, but you defined it as a THOR file (a flat file). You also were using QSTRING in your RECORDS structure where STRING is more appropriate (but this did not cause the problem).\\n\\nHere's my revision of your code:LayoutHash := RECORD\\n STRING64 SHA1;\\n STRING32 MD5;\\n STRING8 CRC32;\\n STRING FileName;\\n INTEGER3 FileSize;\\n INTEGER3 ProductCode;\\n INTEGER3 OpSystemCode;\\n INTEGER3 SpecialCode;\\nEND;\\n\\nFile_OriginalHash := DATASET('~rttest::in::nsrlfile',\\n LayoutHash,\\n CSV(HEADING(1),QUOTE('"')));\\n\\nFile_OriginalHash;
This code runs correctly on my 3-node training cluster and I would expect it to do the same on your VM.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-04-02 20:14:12\" },\n\t{ \"post_id\": 5450, \"topic_id\": 1262, \"forum_id\": 10, \"post_subject\": \"Instant Memory Pool Exhausted Error 1301\", \"username\": \"SyntaxGeek\", \"post_text\": \"First post and new to HPCC, ECL but have been programming for years.\\n\\nI'm looking into Hadoop, Mongo and HPCC as possible solutions for manage large data assets.\\n\\nAs a test I've gone and downloaded the presetup VM image (thanks for this!) and extended the VM memory (12GB) and CPU (8 threads).\\n\\nI used the spray delimited method for the NIST NSRL file which can be downloaded here:\\nhttp://www.nsrl.nist.gov/RDS/rds_2.43/RDS_243.iso\\n\\nThere is a NSRL txt file embedded in the iso containing ~114 million csv rows.\\n\\nThe spray only took ~12 minutes which was extremely impressive and I created what I think is the correct necessary record and dataset files:\\n\\nRecord File:\\n\\nEXPORT LayoutHash := RECORD\\n\\tQSTRING64 SHA1;\\n\\tQSTRING32 MD5;\\n\\tQSTRING8 CRC32;\\n\\tSTRING FileName;\\n\\tINTEGER3 FileSize;\\n\\tINTEGER3 ProductCode;\\n\\tINTEGER3 OpSystemCode;\\n\\tINTEGER3 SpecialCode;\\nEND;\\n
\\n\\nDataset File:\\n\\nIMPORT NIST;\\n\\nEXPORT File_OriginalHash := DATASET(\\n\\t'~nist::BM::NSRL',\\n\\tNIST.LayoutHash,\\n\\tTHOR\\n);\\n
\\n\\nQuery Test:\\n\\nIMPORT NIST;\\n\\nOUTPUT(NIST.File_OriginalHash);\\n
\\n\\nWhat's odd is I've logged into the HPCC vm and am running top to see free mem and watch process utilization and when I submit the query test in ECL IDE I see it create a work unit but instantly I have the following error:\\n\\nError: System error: 1301: Memory pool exhausted (in Disk Read G1 E2) (0, 0), 1301,\\n
\\n\\nSo I've searched around but most users are having a similar issue when using incorrect target which I've verified I'm using "thor" which elsewhere has mentioned is the correct target.\\n\\n11787208k mem free\\n44% disk free\\n\\nHopefully I've posted this in the correct forum and someone could point me in the correct direction, my only thought is that the VM is preconfigured to the VMs originally configured RAM and CPU designations and my increases through VMWare Player aren't being properly utilized.\\n\\nThanks,\\nBrian\", \"post_time\": \"2014-04-01 22:31:52\" },\n\t{ \"post_id\": 5470, \"topic_id\": 1265, \"forum_id\": 10, \"post_subject\": \"Re: How to access parameters returned by a function\", \"username\": \"sameermsc\", \"post_text\": \"Hi Shank,\\n\\nper my understanding of your requirement, there is a function which returns multiple outputs in parallel and you want to capture 3rd output and use it for further processing\\n\\nhere is a sample code \\nmyfun2(string inp) := function\\nreturn parallel(output(inp + '_result1'),\\n\\t\\toutput(inp + '_result2'),\\n\\t\\toutput(inp + '_result3'),\\n\\t\\toutput(inp + '_result4'),\\n\\t\\toutput(inp + '_result5'));\\nend;\\n\\na := myfun2('hello');\\na;\\ns := workunit('Result 3', string); // see my explanation below \\noutput('captured output :' + s);\\n
\\n\\nExplanation:\\nupon inspection 3rd output is displayed under the result name "Result 3" in the workunit (note i am not using the named option in output). so "Result 3" result name is used for selecting the specific workunit result data\\n\\nHope this helps you \\n\\nRegards,\\nSameer\", \"post_time\": \"2014-04-04 08:34:46\" },\n\t{ \"post_id\": 5469, \"topic_id\": 1265, \"forum_id\": 10, \"post_subject\": \"Re: How to access parameters returned by a function\", \"username\": \"shank\", \"post_text\": \"Hi,\\nThanks for the reply , but this is not what i am looking for . Here my limitation is i cannot change anything in the function. I just can make a function call, and the outputs are returned in parallel. I have to access a particular output from the place where i call the function.\\nSo, if you have any idea how to do this please let me know.\\n\\n\\nRegards,\\nShank\", \"post_time\": \"2014-04-04 05:09:46\" },\n\t{ \"post_id\": 5462, \"topic_id\": 1265, \"forum_id\": 10, \"post_subject\": \"Re: How to access parameters returned by a function\", \"username\": \"sameermsc\", \"post_text\": \"Hi,\\n\\ncreate a module to hold those 5 outputs and return it from function\\nthis way you can select the output you want for further processing\\n\\nRegards,\\nSameer\", \"post_time\": \"2014-04-03 12:09:46\" },\n\t{ \"post_id\": 5460, \"topic_id\": 1265, \"forum_id\": 10, \"post_subject\": \"How to access parameters returned by a function\", \"username\": \"shank\", \"post_text\": \"Hi,\\nI have a function that returns multiple outputs in parallel.\\nSay the number of outputs are 5 and I need to write the output 3 to a dataset . How do I go about doing it . \\nI need the dataset(output 3) for comparing it with another dataset.\\n\\n\\nRegards,\\nShank\", \"post_time\": \"2014-04-03 12:05:29\" },\n\t{ \"post_id\": 5505, \"topic_id\": 1272, \"forum_id\": 10, \"post_subject\": \"Re: Referencing Multiple Datasets in an HPCC Macro\", \"username\": \"mrumsey\", \"post_text\": \"Thanks! The I got the code working well. \\n\\nPassing (Large.Dataset[1].order_dt[1..4] + Large.Dataset[1].order_dt[6..7]) still gives me the relation error, but I can just pass a hard-coded value and it works fine. It also runs much faster!\\n\\nThanks again.\", \"post_time\": \"2014-04-11 15:39:31\" },\n\t{ \"post_id\": 5503, \"topic_id\": 1272, \"forum_id\": 10, \"post_subject\": \"Re: Referencing Multiple Datasets in an HPCC Macro\", \"username\": \"rtaylor\", \"post_text\": \"Matt,\\n\\nOK, here's the way i would write this MACRO:
MAC_NewShoppers(LargeDS, HugeDS, InsDS, DateStr, OutDef) := MACRO\\n #UNIQUENAME(LDS_tbl);\\n #UNIQUENAME(pols);\\n #UNIQUENAME(pols_tbl);\\n #UNIQUENAME(am_d);\\n #UNIQUENAME(pol_d);\\n #UNIQUENAME(rec1);\\n #UNIQUENAME(pol2);\\n #UNIQUENAME(pol2_d);\\n #UNIQUENAME(IV_d);\\n #UNIQUENAME(shoppers1);\\n #UNIQUENAME(shoppers);\\n #UNIQUENAME(newrec);\\n\\t\\n\\t%LDS_tbl% := TABLE(LargeDS,{ambest,customer_nbr});\\n\\n\\t%pols% := HugeDS(idl<>0 AND \\n start_date <= DateStr + '01' AND\\n end_date >= DateStr + '31' AND\\n (holder_cancel_date >= DateStr + '31' OR \\n holder_cancel_date = 0));\\n\\t \\n\\t%pols_tbl% := TABLE(%pols%,{idl,ambest});\\n\\n\\t%am_d% := sort(distribute(%LDS_tbl%, hash(ambest)), ambest, local);\\n\\t%pol_d% := sort(distribute(%pols_tbl%, hash(ambest)), ambest, local);\\n\\t\\n\\t%rec1% := record\\n\\t\\trecordof(%pol_d%);\\n\\t\\t%am_d%.customer_nbr;\\n\\tend;\\n\\n\\t%pol2% := join( %pol_d%, %am_d%,\\n left.ambest=right.ambest,\\n transform(%rec1%, self:=left; self:=right;),\\n left outer,\\n local);\\n\\n\\t%pol2_d% := sort(distribute(%pol2%, hash(idl, customer_nbr)), \\n idl, customer_nbr, local);\\n\\t%IV_d% := sort(distribute(InsDS, hash(ih_person_id, customer_nbr)), \\n ih_person_id, customer_nbr, local); \\n\\n\\t%shoppers1% := join( %pol2_d%, %iv_d%,\\n left.idl=right.ih_person_id and left.customer_nbr=right.customer_nbr,\\n transform(recordof(iv_d), self:=right;),\\n right only,\\n local);\\n\\n\\t%shoppers% := dedup(sort(%shoppers1%, ih_person_id, customer_nbr), ih_person_id);\\n\\n\\t%newrec% := record\\n\\t\\t%shoppers%.ih_person_id;\\n\\t\\tunsigned1 shopper;\\n\\tend;\\n\\n\\tOutDef := project(%shoppers%, \\n transform(%newrec%, \\n self.shopper:=1;\\n self:=left)) \\n : persist('~mr::persist::shoppers_'+DateStr);\\n\\nENDMACRO;
And then you call it like this:\\n\\nMAC_NewShoppers(Large.Dataset, \\n Huge.dataset, \\n Insurview.dataset, \\n Large.Dataset[1].order_dt[1..4] + Large.Dataset[1].order_dt[6..7],\\n ThisResultDefName);
And then your following code just uses ThisResultDefName to work with the result set of person ids. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-04-10 21:05:51\" },\n\t{ \"post_id\": 5502, \"topic_id\": 1272, \"forum_id\": 10, \"post_subject\": \"Re: Referencing Multiple Datasets in an HPCC Macro\", \"username\": \"rtaylor\", \"post_text\": \"OK, just a few more questions:\\n\\n1. Your two SORT(DISTRIBUTE()) functions both use HASH(ambest)am_d := sort(distribute(ambest, hash(ambest)), ambest, local);\\npol_d := sort(distribute(pols, hash(ambest)), ambest, local);
so what is the relationship between these two files?\\n\\n2. Do your 12 sets of "paired" datasets all use the same RECORD structures?\\n\\n3. This part of your pols filter: start_date <= 20130801 AND\\n end_date >= 20130831
implies you want all the records outside the date range. Is that correct? \\n\\n3. It appears as if the intention here is to identify the set of unique shoppers.ih_person_id values from iv_d that do NOT appear in the reuslt of your first JOIN.\\n\\nSo, is the overall intention to identify new shoppers from the fact that they are "shoppers" and have not yet bought something?\\n\\nMatt, can you give me a call at my desk phone? We might get further faster if we discuss it.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-04-10 19:51:59\" },\n\t{ \"post_id\": 5501, \"topic_id\": 1272, \"forum_id\": 10, \"post_subject\": \"Re: Referencing Multiple Datasets in an HPCC Macro\", \"username\": \"mrumsey\", \"post_text\": \"Below is the code that I ran for the stand-alone one month run. It ran fine and gave me expected results.\\n\\n\\nambest := Large.DataSet;\\n\\npols := Huge.DataSet(idl<>0 AND\\t\\n start_date <= 20130801 AND\\n end_date >= 20130831 AND\\n (holder_cancel_date >= 20130831 OR \\n holder_cancel_date = 0));\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\nam_d:=sort(distribute(ambest, hash(ambest)), ambest, local);\\npol_d:=sort(distribute(pols, hash(ambest)), ambest, local);\\n\\nrec1:=record\\nrecordof(pol_d);\\nam_d.customer_nbr;\\nend;\\n\\npol2:=join(\\tpol_d, am_d,\\n left.ambest=right.ambest,\\n transform(rec1, self:=left; self:=right;),\\n left outer,\\n local);\\n\\npol2_d:=sort(distribute(pol2, hash(idl, customer_nbr)), idl, customer_nbr, local);\\nIV_d:=sort(distribute(insurview, hash(ih_person_id, customer_nbr)), ih_person_id, customer_nbr, local);\\t\\t\\t\\t\\t\\t\\n\\n\\n\\nshoppers1:=join( pol2_d, iv_d,\\n left.idl=right.ih_person_id and left.customer_nbr=right.customer_nbr,\\n transform(recordof(iv_d), self:=right;),\\n right only,\\n local);\\n\\nshoppers := dedup(sort(shoppers1, ih_person_id, customer_nbr), ih_person_id);\\n\\nnewrec:=record\\nshoppers.ih_person_id;\\nunsigned1 shopper;\\nend;\\n\\nshop_list:=project(shoppers, transform(newrec, self.shopper:=1; self:=left)):persist('~mr::persist::shoppers_201308');\\n
\", \"post_time\": \"2014-04-10 18:27:35\" },\n\t{ \"post_id\": 5499, \"topic_id\": 1272, \"forum_id\": 10, \"post_subject\": \"Re: Referencing Multiple Datasets in an HPCC Macro\", \"username\": \"rtaylor\", \"post_text\": \"Matt,\\n\\nOK, looking at the whole MACRO I can see that you seem to be over-complicating the whole process by trying to write a MACRO before you have a fully-functional single instance of the standard process. This is one guaranteed way to drive yourself crazy! \\n\\nTherefore, let's start with just the first instance and get its code right before we then turn it into a MACRO (which is exactly the way a MACRO should be created). Can you please post the instance code you have? \\n\\nRichard\", \"post_time\": \"2014-04-10 15:17:04\" },\n\t{ \"post_id\": 5498, \"topic_id\": 1272, \"forum_id\": 10, \"post_subject\": \"Re: Referencing Multiple Datasets in an HPCC Macro\", \"username\": \"mrumsey\", \"post_text\": \"
\\ngetShoppers(din, dout) := MACRO\\n\\n#uniquename (ds_date)\\nstring4 %ds_date% := din.order_dt[1..4] + din.order_dt[6..7];\\n\\n#uniquename (first_day)\\nunsigned8 %first_day% := (integer)(%ds_date%+'01');\\n\\n#uniquename (last_day)\\nunsigned8 %last_day% := (integer)(%ds_date%+'31');\\n\\n#uniquename (policy)\\n%policy% := Huge.Data_Set ( uniqueID <> 0 AND\\n start_date <= %first_day% AND\\n end_date >= %last_day% AND\\n (holder_cancel_date >= %last_day% OR\\n holder_cancel_date = 0)\\n );\\n#uniquename (ambest_d)\\n%ambest_d% :=\\tsort(distribute(ambest, hash(ambest)), ambest, local);\\n\\n#uniquename (policy_d)\\n%policy_d% := sort(distribute(%policy%, hash(ambest)), ambest, local);\\n\\n#uniquename (cust_nbr_rec)\\n%cust_nbr_rec% := record\\n\\trecordof(%policy_d%);\\n\\t%ambest_d%.customer_nbr;\\nend;\\n\\n#uniquename (policy2)\\n%policy2% := join( %policy_d%, %ambest_d%,\\n left.ambest=right.ambest,\\n transform(%cust_nbr_rec%, self:=left; self:=right;),\\n left outer,\\n local\\n );\\n\\n#uniquename (policy2_d)\\n%policy2_d% := sort(distribute(%policy2%, hash(idl, customer_nbr)), idl, customer_nbr, local);\\n\\n#uniquename (ds_d)\\n%ds_d% := sort(distribute(din, hash(ih_person_id, customer_nbr)), ih_person_id, customer_nbr, local);\\n\\n#uniquename (ggg)\\n%ggg% := record\\nrecordof(%ds_d%);\\nunsigned8 deat;\\nend;\\n\\n#uniquename (shoppers_join)\\n%shoppers_join% := join( %policy2_d%, %ds_d%,\\n left.idl = right.ih_person_id AND\\n left.customer_nbr = right.customer_nbr,\\n transform(%ggg%, self.deat:=%first_date%; self:=right;),\\n right only,\\n local\\n );\\ndout:=\\tdedup(sort(%shoppers_join%, ih_person_id, household_id), ih_person_id, household_id);\\nENDMACRO;
\\n\\nThis is the whole MACRO that I created. I also have a very similar set of code that works for a single month (I just hard code the start_date and end_date values).\\nI have also attempted passing a string value in the MACRO call (din, ds_date, dout)\\nand doing the following:\\n#uniquename (first_day)\\nunsigned8 %first_day% := (integer)(ds_date+'01');
\\n\\nIt may work, but I can't get verification of it working. I tried adding the %first_date% value into a dataset and looking at the output, but the value of %first_date% resolves to 0 when it is removed. \\n\\nI assume a lot of this can be credited to simple mistakes from not understanding MACROs, so I thank everyone for their time in helping me understand!\", \"post_time\": \"2014-04-10 14:39:16\" },\n\t{ \"post_id\": 5497, \"topic_id\": 1272, \"forum_id\": 10, \"post_subject\": \"Re: Referencing Multiple Datasets in an HPCC Macro\", \"username\": \"rtaylor\", \"post_text\": \"Matt,I need to process and compare 12 sets of 2 separate datasets (time related) in order to validate orders as 'new business.' Is it possible to use information from the first dataset to filter the second?
The answer is, of course, Yes. The real question is: How?\\n\\nYou're trying to create a MACRO to do this. Sounds like a logical choice, since a MACRO is designed to generate ECL code to define a standard process that may be used with "any" dataset(s).\\n\\nSo, the way to create a MACRO in ECL is to start by NOT writing a MACRO, but instead writing a specific instance of the standard process you want to generate. \\n\\nThe MACRO in your first post is incomplete (there's no ENDMACRO there) and the code you just posted:pols := HUGE.Data_Set( idl<>0 AND\\n start_date <= 20130801 AND \\n end_date >= 20130831 AND \\n (holder_cancel_date >= 20130831 OR \\n holder_cancel_date = 0)\\n );
is not a two-dataset process but just a single filtered dataset definition. You said that you need to use two datasets, so can you please post the complete process you need for a specific instance, or at least describe exactly what is needed from the "non-huge" dataset?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-04-10 14:26:16\" },\n\t{ \"post_id\": 5496, \"topic_id\": 1272, \"forum_id\": 10, \"post_subject\": \"Re: Referencing Multiple Datasets in an HPCC Macro\", \"username\": \"bforeman\", \"post_text\": \"Hi Matt,\\n\\nI know that you are scheduled to be in class next week, so maybe we can look at this further, but if it is the dates that are changing maybe all you need is a function or possibly a FUNCTIONMACRO that simply passes the dynamic dates as needed.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-04-10 13:39:54\" },\n\t{ \"post_id\": 5495, \"topic_id\": 1272, \"forum_id\": 10, \"post_subject\": \"Re: Referencing Multiple Datasets in an HPCC Macro\", \"username\": \"mrumsey\", \"post_text\": \"pols := HUGE.Data_Set( idl<>0 AND\\n start_date <= 20130801 AND\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n end_date >= 20130831 AND\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n (holder_cancel_date >= 20130831 OR \\n holder_cancel_date = 0)\\n );
\\n\\nI just need to grab a subset of the account between the first and end of each of the 12 months. Since this is different for each month, I wanted to be able to dynamically select them. I also wanted it to be easily reproduced if I needed to change months on subsequent runs. I may just have to hard-code an attribute with an IF statement.\", \"post_time\": \"2014-04-10 13:33:45\" },\n\t{ \"post_id\": 5494, \"topic_id\": 1272, \"forum_id\": 10, \"post_subject\": \"Re: Referencing Multiple Datasets in an HPCC Macro\", \"username\": \"bforeman\", \"post_text\": \"Hi Matt,\\n\\nThe error looks to be pretty specific:\\n\\niv1.order_dt - Table iv1 is not related to Huge.Data_Set
\\n\\nSo you are trying to filter your Huge.Data_Set based on a field from a different record set, and that will not work.\\n\\nWhat does your original code look like prior to attempting the MACRO conversion?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-04-10 12:00:26\" },\n\t{ \"post_id\": 5493, \"topic_id\": 1272, \"forum_id\": 10, \"post_subject\": \"Referencing Multiple Datasets in an HPCC Macro\", \"username\": \"mrumsey\", \"post_text\": \"I need to process and compare 12 sets of 2 separate datasets (time related) in order to validate orders as 'new business.' Is it possible to use information from the first dataset to filter the second? \\n\\ngetShoppers(din, dout) := MACRO\\n\\n#uniquename (ds_date)\\nstring4 %ds_date% := din.order_dt[1..4] + din.order_dt[6..7];\\n\\n#uniquename (first_day)\\nunsigned8 %first_day% := (integer)(%ds_date%+'01');\\n\\n#uniquename (last_day)\\nunsigned8 %last_day% := (integer)(%ds_date%+'31');\\n\\n#uniquename (policy)\\n%policy% := Huge.Data_Set\\t( uniqueID <> 0 AND\\n start_date <= %first_day% AND\\n end_date >= %last_day% AND\\n (holder_cancel_date >= %last_day% OR\\n holder_cancel_date = 0)\\n );\\n
\\n\\nThe first 3 attributes are formatting and manipulating date/time variables into a more useful format (ds_date, first_day, last_day).\\n\\n%Policy% is using those reformatted dates to filter a much larger dataset for a pertinent subset of accounts.\\n\\nI get error 2131 referencing the ds_date code:\\niv1.order_dt - Table iv1 is not related to Huge.Data_Set\\n\\n'iv1' is 'din' in the macro.\\n\\nI have tried using a function to pull the dataset but got the same results. My Huge.Data_Set is located on a Production server, so I am unable to write a separate module to run this code for me. I really don't want to re-write the same code 12 times, but will if there is no way in HPCC to accomplish my goal.\\n\\nThanks in advance for any help!\", \"post_time\": \"2014-04-09 20:53:27\" },\n\t{ \"post_id\": 5509, \"topic_id\": 1275, \"forum_id\": 10, \"post_subject\": \"Re: Errors spraying XML and XML as CSV\", \"username\": \"bforeman\", \"post_text\": \"Hi Michael,\\n\\nA number of people have been replying to you via your private email, but essentially we think that this issue is related to the record terminator. Since you are treating the entire document as a single record, you need the appropriate tag that marks the end of the document in your spray options. So in the Spray Delimited option, make sure that you have the correct end of document identified.\\n\\nIt's a shame that you could not attend the Advanced ECL sessions this week, but in the Advanced THOR book that I gave you Exercise 3A shows a great example of exactly what you are trying to do.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-04-15 12:12:12\" },\n\t{ \"post_id\": 5506, \"topic_id\": 1275, \"forum_id\": 10, \"post_subject\": \"Errors spraying XML and XML as CSV\", \"username\": \"micevepay\", \"post_text\": \"I have been trying to spray about 1000000 or so XML files. Each containing a single record I followed the forum and came across http://hpccsystems.com/bb/viewtopic.php?f=8&t=483 where they sprayed the XML as a CSV but when I tried that method I got the error "...contained a line of length greater than 10485760 bytes."\\n\\nI then took a look at http://hpccsystems.com/bb/viewtopic.php?t=136&p=477 and looked at the solutions provided by leaving the QUOTE empty did not help at all.\\n\\nThen there is what I believe is the initial problem. Each file contains a single record but the tag for the record contains a colon (:). In my very first attempt to spray the data using the row tag xocs:doc the spraying process never started after a day or so of waiting. \\n\\nI've read the reference guide and the forums. Is there a legitimate way to spray multiple xml files, that contain special characters in the row tag, that are at least 21MB per file, and line lengths are longer than 10485760 bytes?\", \"post_time\": \"2014-04-13 03:43:30\" },\n\t{ \"post_id\": 5513, \"topic_id\": 1277, \"forum_id\": 10, \"post_subject\": \"Re: Dynamic query\", \"username\": \"dbang\", \"post_text\": \"Thanks for testing Bob. I have created a ticket in Jira.\\n\\nKind regards\\nDaniel\", \"post_time\": \"2014-04-17 08:47:45\" },\n\t{ \"post_id\": 5512, \"topic_id\": 1277, \"forum_id\": 10, \"post_subject\": \"Re: Dynamic query\", \"username\": \"bforeman\", \"post_text\": \"Just to add to this, the example "fred" code works when THOR is targeted, but only fails when Roxie is targeted.\\n\\nBob\", \"post_time\": \"2014-04-16 12:47:31\" },\n\t{ \"post_id\": 5511, \"topic_id\": 1277, \"forum_id\": 10, \"post_subject\": \"Re: Dynamic query\", \"username\": \"bforeman\", \"post_text\": \"Hi Daniel,\\n\\nI can confirm the issue running the "fred" example referenced in the DATASET Named Output DATASETs section. I wonder if both workunits need to be published to Roxie? Nevertheless, I see the same "UNIMPLEMENTED" message as you reported.\\n\\nReported by: Roxie\\nMessage: UNIMPLEMENTED at /var/lib/jenkins/workspace/LN-Candidate-4.2.2-rc1/LN/centos-5.7-x86_64/HPCC-Platform/roxie/ccd/ccdserver.cpp:5234
\\n\\nCan you take a few seconds and log this into the Community Issue Tracker?\\n\\nhttps://track.hpccsystems.com/secure/Dashboard.jspa\\n\\nThanks for the report Daniel!\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-04-16 12:41:27\" },\n\t{ \"post_id\": 5510, \"topic_id\": 1277, \"forum_id\": 10, \"post_subject\": \"Dynamic query\", \"username\": \"dbang\", \"post_text\": \"Hi \\n\\nI am trying out to figure out the best way to query an "unknown dataset" where a the dataset is determined by parameter. So far I think the solution is to call a workunit which are deployed to roxie. This means the wuid can be passed as a string.\\n\\nI used the fred example from the help doc\\nattr := DATASET( WORKUNIT( [ wuid , ] namedoutput ), struct );\\n\\nhowever when I try and run it I get the following error.\\n\\nError: UNIMPLEMENTED at /var/lib/jenkins/workspace/CE-Candidate-with-plugins-4.2.2-1/CE/ubuntu-12.04-amd64/HPCC-Platform/roxie/ccd/ccdserver.cpp:5236 (0, 0), 1417, \\n\\nI use version 4.2.2.1 64-bit vm image and IDE.\\n\\nBtw are they any way to pass parameters to the called workunit.\", \"post_time\": \"2014-04-15 12:46:33\" },\n\t{ \"post_id\": 5986, \"topic_id\": 1278, \"forum_id\": 10, \"post_subject\": \"Re: Error: System error: 4\", \"username\": \"jsmith\", \"post_text\": \"Looking at the tail of the slave log on the indicated IP (10.239.40.6) for that workunit:\\n0000072F 2014-06-27 12:50:13.362 13844 15097 "ERROR: 28: /var/lib/jenkins/workspace/EE-Candidate-withplugins-5.0.0-rc3/LN/centos-6.4-x86_64/HPCC-Platform/comm\\non/thorhelper/thorcommon.cpp(1374) : flushBuffer : CFileIO::write, [color=#FF0000:2erlz792]Disk full"\\n00000730 2014-06-27 12:50:13.588 13844 15273 "ERROR: HDIST: recvloop - activity(hashdistribute, 129) : CFileIO::write, [color=#FF0000:2erlz792]No space left on device"\\n00000731 2014-06-27 12:50:13.588 13844 15274 "Distribute send finishing - activity(hashdistribute, 129)"\\n00000732 2014-06-27 12:50:13.588 13844 15274 "HDIST: waiting for threads - activity(hashdistribute, 129)"\\n00000733 2014-06-27 12:50:13.589 13844 15279 "CDistributorBase::sendBlock stopped slave 12 - activity(hash00000000 2014-06-27 12:50:33.959 16765 16765 "Opened\\n log file //10.239.40.6/mnt/disk1/var/log/HPCCSystems/mythor/thorslave.1.2014_06_27.log"\\n\\n\\nSo it ran out of disk space...\\nAnd, it looks like the process died, because there wasn't even enough room to write the logs detailing the error, before it fed the info back to the master.\\nReally, the data / temp files, should not be on the same partition as the system/logs, which would have at least meant that the disk space error would have got reported.\", \"post_time\": \"2014-06-30 11:40:33\" },\n\t{ \"post_id\": 5979, \"topic_id\": 1278, \"forum_id\": 10, \"post_subject\": \"Re: Error: System error: 4\", \"username\": \"micevepay\", \"post_text\": \"This has happened to me twice in 24 hours on the Machine Learning Dev.\\n\\nSystem error: 4: MP link closed (10.239.40.6:20100)\\nMP link closed (10.239.40.6:20100) : Error aborting job, will cause thor restart\\n\\nW20140626-172610\", \"post_time\": \"2014-06-27 19:04:31\" },\n\t{ \"post_id\": 5529, \"topic_id\": 1278, \"forum_id\": 10, \"post_subject\": \"Re: Error: System error: 4\", \"username\": \"jsmith\", \"post_text\": \"In general it means the master has lost contact with slave indicated in the error. What caused the slave to 'disappear' can be various, but most often it is an indication that the slave crashed, was killed or less frequently, there was a network connectivity issue.\", \"post_time\": \"2014-04-22 14:29:39\" },\n\t{ \"post_id\": 5514, \"topic_id\": 1278, \"forum_id\": 10, \"post_subject\": \"Error: System error: 4\", \"username\": \"tlhumphrey2\", \"post_text\": \"While running on the Machine Learning Dev cluster, I got the following error message. Does anyone know what it means?\\n\\nError: System error: 4: Graph[148], countproject[489]: SLAVE 10.239.40.19:20100: MP link closed (10.239.40.18:20100), \\n\\nW20140417-144402\", \"post_time\": \"2014-04-17 19:48:51\" },\n\t{ \"post_id\": 5527, \"topic_id\": 1280, \"forum_id\": 10, \"post_subject\": \"Re: Dataset too large to output to WorkUnit\", \"username\": \"rtaylor\", \"post_text\": \"Biswanath,\\n\\nI have two comments:\\n\\n1. When you are posting formatted text, please use the "Code" tags in the editor so that this:\\n\\nMatchId\\tLHSId\\tLHSName\\tRHSId\\tRHSName\\tType\\n1 1\\tA1\\t11\\tA\\tWeekly\\n2 1 A1 12\\tA1\\tWeekly\\n\\nbecomes readable after posting, like this:\\n\\nMatchId LHSId LHSName RHSId RHSName Type\\n1 1 A1 11 A Weekly\\n2 1 A1 12 A1 Weekly
You should also note that the posting process "eats" tabs, so you need to only use blank spaces in the text to create the formatting -- use the Preview button to see how it will appear before hitting Submit.\\n\\n2. Description of your code process is helpful, but looking at the actual code is more helpful for us to understand exactly what you're doing. Mainly because your description will always say what you "think" you're doing, whereas the code shows what you're "actually" doing. The two can be different. \\n\\nAlso, posting your actual code makes it possible for us to try to duplicate the issue and test various possible solutions before posting our reply. So please, help us to help you and post your code.\\n\\nRichard\", \"post_time\": \"2014-04-22 13:45:04\" },\n\t{ \"post_id\": 5526, \"topic_id\": 1280, \"forum_id\": 10, \"post_subject\": \"Re: Dataset too large to output to WorkUnit\", \"username\": \"bforeman\", \"post_text\": \"
ii) Use Normalize to take out the child records and have them copied as parent records
\\n\\nWithout looking at your code, wouldn't a simple SORT/ROLLUP be better to do this task?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-04-22 12:10:33\" },\n\t{ \"post_id\": 5524, \"topic_id\": 1280, \"forum_id\": 10, \"post_subject\": \"Re: Dataset too large to output to WorkUnit\", \"username\": \"biswanath_c\", \"post_text\": \"I also tried using \\n\\n#OPTION('outputlimit', 100);\\n\\nbut when i used this i got a different error:\\n\\n"Error: System error: 10023: result global('spillOBROB1') in workunit contains an invalid raw value [Jbuff: Out of Memory (134217728k)] (0, 0), 10023, "\\n\\nPlease advise.\\n\\nThanks\\nBiswanath\", \"post_time\": \"2014-04-22 10:21:12\" },\n\t{ \"post_id\": 5523, \"topic_id\": 1280, \"forum_id\": 10, \"post_subject\": \"Re: Dataset too large to output to WorkUnit\", \"username\": \"biswanath_c\", \"post_text\": \"Hi,\\n\\nAm afraid the issue is not in the join but with a call to a normalize function.\\n\\nI have explained my scenario in detail below. Please advise the solution for this scenario of mine:\\n\\n\\nI have 2 datasets that need to be compared using a specific logic – a simplified version of the same is as below: \\n\\nFirst, the 2 datasets are joined to output ‘joinedSet’. In subsequent steps, invalid rows of joined set are identified and removed. \\n\\nConsider my joined set looks like this:\\nMatchId\\tLHSId\\tLHSName\\tRHSId\\tRHSName\\tType\\n1\\t1\\tA1\\t11\\tA\\tWeekly\\n2\\t1\\tA1\\t12\\tA1\\tWeekly\\n3\\t2\\tA\\t11\\tA\\tWeekly\\n4\\t2\\tA\\t12\\tA1\\tWeekly\\n5\\t3\\tB\\t \\t \\tWeekly\\n6\\t4\\tC\\t13\\tC\\tDaily\\n7\\t5\\tD\\t14\\tD2\\tWeekly\\n8\\t6\\tE\\t15\\tE1\\tWeekly\\n9\\t7\\tF\\t16\\tF1\\tWeekly\\n10\\t7\\tF\\t17\\tF\\tWeekly\\n11\\t8\\tG\\t18\\tG\\tDaily\\n\\nStep 1 : Pick type = ‘daily’ records – these are always considered valid\\nStep 2: Between successive records from Step 1, return best matches (computed by a logic using weightage for name matches), in ‘order of appearance in original dataset’ (marked by their respective Ids) as valid records. \\nMy target dataset of valid matches should look like this:\\nMatchId\\tLHSId\\tLHSName\\tRHSId\\tRHSName\\n2\\t1\\tA1\\t12\\tA1\\n \\t2\\tA\\t \\t \\n \\t3\\tB\\t \\t \\n6\\t4\\tC\\t13\\tC\\n7\\t5\\tD\\t14\\tD2\\n8\\t6\\tE\\t15\\tE1\\n10\\t7\\tF\\t17\\tF\\n11\\t8\\tG\\t18\\tG\\n\\nI have written Step2 logic like this:\\ni)\\tIterate over Step1 records\\n For all intermediate records, identify best matches and return MatchIds from joinedSet for valid records\\n Store returned MatchIds as child dataset of current Step1 record\\nii)\\tUse Normalize to take out the child records and have them copied as parent records\\n\\nIn the (ii) step, while using ‘normalize’, I get the following error:\\nError: System error: 10099: Graph[34], workunitwrite[47]: Dataset too large to output to workunit (limit 10) megabytes, in result (name=spillISJPA1), Master exception\\n\\nIt worked when the total number of records in the joined set were in the order of several 100s. Now, the joined set contains several 1000s of records when this exception is thrown – the logic works though. \\n\\n\\nThanks\\nBiswanath\", \"post_time\": \"2014-04-22 10:18:31\" },\n\t{ \"post_id\": 5522, \"topic_id\": 1280, \"forum_id\": 10, \"post_subject\": \"Re: Dataset too large to output to WorkUnit\", \"username\": \"tlhumphrey2\", \"post_text\": \"Try placing the following at the top of your attribute:\\n\\n#option('outputLimit',100);\\n\\nThis changes the default (10 mb) max size that can output to a workunit to 100 mb.\", \"post_time\": \"2014-04-21 15:23:49\" },\n\t{ \"post_id\": 5521, \"topic_id\": 1280, \"forum_id\": 10, \"post_subject\": \"Re: Dataset too large to output to WorkUnit\", \"username\": \"bforeman\", \"post_text\": \"Try switching to a SMART JOIN and see if the compiler can optimize the output for you. For more information on SMART JOIN, see the latest blog by Gavin Halliday:\\n\\nhttp://hpccsystems.com/blog/ghalliday\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-04-21 13:43:28\" },\n\t{ \"post_id\": 5518, \"topic_id\": 1280, \"forum_id\": 10, \"post_subject\": \"Dataset too large to output to WorkUnit\", \"username\": \"biswanath_c\", \"post_text\": \"Hi,\\n\\nAm getting this error while running a code :\\n\\nError: System error: 10099: Graph[33], workunitwrite[46]: Dataset too large to output to workunit (limit 10) megabytes, in result (name=spillRP1F11), Master exception (0, 0), 10099, \\n\\nThe same code worked all right with a different set of input data. This error seems to be from the output of a join in my code.\\n\\nIs there a way to increase the 10 mb limit? What should one be doing on such an error?\\n\\nThanks\\nBiswanath\", \"post_time\": \"2014-04-21 10:26:04\" },\n\t{ \"post_id\": 6174, \"topic_id\": 1282, \"forum_id\": 10, \"post_subject\": \"Re: Append rows dynamically to a dataset\", \"username\": \"rtaylor\", \"post_text\": \"micevepay,\\n\\nIf I understand correctly what you're trying to accomplish, then I believe you can do that by simply treating the nested child dataset as a separate dataset and using PROJECT to produce your C result set. This technique is shown in the Advanced ECL (working with Relational data) course.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-08-05 13:37:05\" },\n\t{ \"post_id\": 6163, \"topic_id\": 1282, \"forum_id\": 10, \"post_subject\": \"Re: Append rows dynamically to a dataset\", \"username\": \"micevepay\", \"post_text\": \"Hello, I am trying to do something similar with appending after a loop. Except in my case, A is dataset with a set of children. Each child, B1 - Bn, B has a different number of records (B1max, B2max,....Bnmax). For each B I am performing some mathematical calculations and returning solutions. I would like for that solution, C, to have n number or record results. \\n\\n\\n\\nC += Loop(AB1 to ABn).\", \"post_time\": \"2014-08-04 19:41:51\" },\n\t{ \"post_id\": 5540, \"topic_id\": 1282, \"forum_id\": 10, \"post_subject\": \"Re: Append rows dynamically to a dataset\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"Sure. Will mail it your id after getting checking with the client?\", \"post_time\": \"2014-04-23 14:13:01\" },\n\t{ \"post_id\": 5539, \"topic_id\": 1282, \"forum_id\": 10, \"post_subject\": \"Re: Append rows dynamically to a dataset\", \"username\": \"rtaylor\", \"post_text\": \"Can you post the code for function that produces the child dataset that triggers the issue?\", \"post_time\": \"2014-04-23 14:03:23\" },\n\t{ \"post_id\": 5538, \"topic_id\": 1282, \"forum_id\": 10, \"post_subject\": \"Re: Append rows dynamically to a dataset\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"I tried increasing the limit too, but it didn't work. \\n\\nSo, I'm suspecting there is something in that particular piece of code that triggers an immediate write to work unit (there was something about spills.. in the error message)?\\n\\nGayathri\", \"post_time\": \"2014-04-23 14:01:19\" },\n\t{ \"post_id\": 5536, \"topic_id\": 1282, \"forum_id\": 10, \"post_subject\": \"Re: Append rows dynamically to a dataset\", \"username\": \"tlhumphrey2\", \"post_text\": \"There is a limit to how large of a dataset you can output to a workunit, 10 Mb. But, you can increase this limit with the #OPTION command. For example:\\n\\n#OPTION('outputLimit',100)\\n\\nWould increase the limit to 100 Mb.\", \"post_time\": \"2014-04-23 12:48:40\" },\n\t{ \"post_id\": 5535, \"topic_id\": 1282, \"forum_id\": 10, \"post_subject\": \"Re: Append rows dynamically to a dataset\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"Thanks Richard! That worked. \\n\\nOn an (un)related note, is there a memory limitation on datasets returned by a 'function'? \\n\\nIn my scenario, within ITERATE, a function call is made which computes and returns child dataset records and for some scenarios, it fails with a 'dataset too large to output to workunit' error. \\n\\nIf I comment out the function call (and associated functionality), the code executes fine. When and why would the code attempt to 'output' to workunit?\\n\\nRegards,\\nGayathri\", \"post_time\": \"2014-04-23 10:18:09\" },\n\t{ \"post_id\": 5534, \"topic_id\": 1282, \"forum_id\": 10, \"post_subject\": \"Re: Append rows dynamically to a dataset\", \"username\": \"rtaylor\", \"post_text\": \"Gayathri,\\n\\nYou simply reference the nested child dataset as if it were a separate dataset. Like this:ParentRec := RECORD\\n INTEGER1 NameID;\\n STRING20 Name;\\nEND;\\nChildRec := RECORD\\n INTEGER1 NameID;\\n STRING20 Addr;\\nEND;\\nDenormedRec := RECORD\\n ParentRec;\\n DATASET(ChildRec) Children{MAXCOUNT(5)};\\nEND;\\n\\nDS := DATASET([ {1,'Gavin',[{1,'10 Malt Lane'}]},\\n {2,'Liz', [{2,'10 Malt Lane'},\\t\\n {2,'3 The cottages'}]},\\n {3,'Mr Nobody',[]},\\n {4,'Anywhere',[{4,'Here'},\\t\\n {4,'There'},\\t\\n {4,'Near'},\\t\\n {4,'Far'}]}], \\n DenormedRec); \\nChildDS := DS.Children;\\n\\nChildDS;
You recall from the Advanced ECL class that each child dataset can be referenced as if it were a separate dataset.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-04-22 19:23:30\" },\n\t{ \"post_id\": 5533, \"topic_id\": 1282, \"forum_id\": 10, \"post_subject\": \"Re: Append rows dynamically to a dataset\", \"username\": \"tlhumphrey2\", \"post_text\": \"The way I would handle this problem would be to start by making a new dataset, from A, that contains only the essential fields (why? So, each record is small and therefore takes little time to distribute). I believe the essentials are: 1) a unique identifier for each record in a successive record pair of A, and 2) fields of A that are used to filter B records. Let us call this new dataset, Apairs.\\n\\nThen JOIN Apairs and B where the JOIN condition does the filtering of B. The record layout of the JOIN’s resulting dataset would contain 1) the unique identifiers for each record in a successive record pair of A and a single B record. Lets call the JOIN’s resulting dataset, Apairs_and_B.\\n\\nThen, you would use a ROLLUP to make a dataset containing: 1) a unique identifier for each record in a successive record pair of A and 2) a child dataset that contains all B records of Apairs_and_B associated with the pair of A records.\\n\\nAt this point, I’m not sure where you want to go with this because having a B child dataset that is part of a single record of A doesn’t seem right since it takes two successive A records to identify the B records.\", \"post_time\": \"2014-04-22 18:24:58\" },\n\t{ \"post_id\": 5532, \"topic_id\": 1282, \"forum_id\": 10, \"post_subject\": \"Re: Append rows dynamically to a dataset\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"Richard,\\nHow can I extract the child dataset directly without using NORMALIZE?\\n\\nAt the end of it, I need a recordset C where\\nC = A + cumulative(child dataset populated in each field of A)\\n\\nIs there another way to achieve this?\\n\\n@tlhumphrey2,\\nThere are some dependent fields between A and B based on which, dynamically I would decide which rows of B should be fetched for a particular set of rows of A.\", \"post_time\": \"2014-04-22 17:10:44\" },\n\t{ \"post_id\": 5530, \"topic_id\": 1282, \"forum_id\": 10, \"post_subject\": \"Re: Append rows dynamically to a dataset\", \"username\": \"rtaylor\", \"post_text\": \"Gayathri,\\n\\nI would go for option iii first -- it's the simplest solution. \\n\\nAnd you don't need to use NORMALIZE to extract the resulting child dataset records, because you can just treat the nested child dataset as a separate dataset for subsequent operations.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-04-22 14:36:05\" },\n\t{ \"post_id\": 5528, \"topic_id\": 1282, \"forum_id\": 10, \"post_subject\": \"Re: Append rows dynamically to a dataset\", \"username\": \"tlhumphrey2\", \"post_text\": \"Would you provide more details? Here is what I got:\\n\\nFor each pair of successive records in A you plan to filter (what?) using B. And, how are you using B?\", \"post_time\": \"2014-04-22 14:18:38\" },\n\t{ \"post_id\": 5525, \"topic_id\": 1282, \"forum_id\": 10, \"post_subject\": \"Append rows dynamically to a dataset\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"I have 2 datasets - A and B. I need to iterate over A and for every successive pair of records in A, I need to do process & filter on B to return a subset of records. \\n\\nMy target is to accumulate all such filtered records, into a result set C. \\n\\nI thought of the following options to achieve this, but which of these would be optimal? Is there any other alternative? I am looking at doing this for 10K records of A, to begin with. \\ni) LOOP - each iteration's output will contain the records appended during that iteration \\nii) GRAPH - output keeps building with successive iterations. Final iteration's output dataset will contain all appended records\\niii) A child dataset field in A where I push all related records from B after processing. At the end of iterate, use NORMALIZE to pull out those records
\\n\\nThanks,\\nGayathri\", \"post_time\": \"2014-04-22 10:54:02\" },\n\t{ \"post_id\": 5674, \"topic_id\": 1285, \"forum_id\": 10, \"post_subject\": \"Re: Layout\", \"username\": \"rtaylor\", \"post_text\": \"If your input dataset contains the XML data in the structure, then all you need is the RECORD structure and DATASET declaration with the correct xpaths specified, like this://if your Sprayed file:'student::test' contains: \\n//<dataset>\\n//<Row><FName>George</FName><LName>Jetson</LName><EmpID>42</EmpID></Row>\\n//<Row><FName>Fred</FName><LName>Flintstone</LName><EmpID>88</EmpID></Row>\\n//<Row><FName>Evel</FName><LName>Knievel</LName><EmpID>666</EmpID></Row>\\n//</dataset>\\n\\nnamesRec := RECORD\\n UNSIGNED2 EmployeeID{xpath('EmpID')};\\n STRING10 Firstname{xpath('FName')};\\n STRING10 Lastname{xpath('LName')};\\nEND;\\nB := DATASET('~student::test', namesRec, XML('dataset/Row'));\\nOUTPUT(B);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-05-09 13:25:28\" },\n\t{ \"post_id\": 5673, \"topic_id\": 1285, \"forum_id\": 10, \"post_subject\": \"Re: Layout\", \"username\": \"kumar2k14\", \"post_text\": \"I don't want to do manually and update the values each time for the new incoming file to the below defined x. \\nx:='<Row><FName>George</FName><LName>Jetson</LName><EmpID>42</EmpID></Row>'; \\nHow I can form the above structure of x:= form dynamically.\\n\\nOr can I do the below \\n\\n\\nSprayfile:'student::test' contains \\n<Row><FName>George</FName><LName>Jetson</LName><EmpID>42</EmpID></Row>\\n\\nnamesRec := RECORD\\nUNSIGNED2 EmployeeID{xpath('EmpID')};\\nSTRING10 Firstname{xpath('FName')};\\nSTRING10 Lastname{xpath('LName')};\\nEND;\\nB := DATASET('~student::test', namesRec, XML('Row'));\\nrec := FROMXML(namesRec,B);\\nOUTPUT(rec);\\n
\", \"post_time\": \"2014-05-09 12:15:41\" },\n\t{ \"post_id\": 5672, \"topic_id\": 1285, \"forum_id\": 10, \"post_subject\": \"Re: Layout\", \"username\": \"rtaylor\", \"post_text\": \"what code should i use for x := '<Row><FName>George</FName><LName>Jetson</LName><EmpID>42</EmpID></Row>';
I don't understand the question. \\n\\nThis example code is self-contained and fully functional -- it runs correctly on my machine. Doe sit not produce a result on yours?\", \"post_time\": \"2014-05-08 20:29:42\" },\n\t{ \"post_id\": 5671, \"topic_id\": 1285, \"forum_id\": 10, \"post_subject\": \"Re: Layout\", \"username\": \"kumar2k14\", \"post_text\": \"what code should i use for x := '<Row><FName>George</FName><LName>Jetson</LName><EmpID>42</EmpID></Row>';\\n\\nin the below example..\\n\\nnamesRec := RECORD\\n UNSIGNED2 EmployeeID{xpath('EmpID')};\\n STRING10 Firstname{xpath('FName')};\\n STRING10 Lastname{xpath('LName')};\\nEND;\\n\\nx := '<Row><FName>George</FName><LName>Jetson</LName><EmpID>42</EmpID></Row>'; \\n\\nrec := FROMXML(namesRec,x);\\nOUTPUT(rec);\", \"post_time\": \"2014-05-08 20:14:24\" },\n\t{ \"post_id\": 5667, \"topic_id\": 1285, \"forum_id\": 10, \"post_subject\": \"Re: Layout\", \"username\": \"rtaylor\", \"post_text\": \"And is it possible to create it dynamically "creating the of xml datasets" which was shown in solution instead...
I don't understand the question.\\n\\nIn my previous example, I start by creating the XML dataset because the rest of the code needs it to work, and I wanted to make sure I had duplicated the XML structure you had previously shown. For YOUR code, you should already have that XML dataset provided by whomever is supplying that file to you.\\n\\nRichard\", \"post_time\": \"2014-05-08 13:31:29\" },\n\t{ \"post_id\": 5665, \"topic_id\": 1285, \"forum_id\": 10, \"post_subject\": \"Re: Layout\", \"username\": \"kumar2k14\", \"post_text\": \"I will share the example soon...\\nAnd is it possible to create it dynamically "creating the of xml datasets" which was shown in solution instead...\", \"post_time\": \"2014-05-07 20:59:42\" },\n\t{ \"post_id\": 5662, \"topic_id\": 1285, \"forum_id\": 10, \"post_subject\": \"Re: Layout\", \"username\": \"rtaylor\", \"post_text\": \"If the incoming XML dataset does not use consistent tags you fire the developers that created it! \\n\\nSeriously, can you show me an example of what you mean?\\n\\nRichard\", \"post_time\": \"2014-05-07 19:48:14\" },\n\t{ \"post_id\": 5660, \"topic_id\": 1285, \"forum_id\": 10, \"post_subject\": \"Re: Layout\", \"username\": \"kumar2k14\", \"post_text\": \"Thanks Richard\\n\\nHow to define the xml dataset(the first step in solution provided) when the incoming xml file is not consistent for child data.\", \"post_time\": \"2014-05-07 19:24:04\" },\n\t{ \"post_id\": 5657, \"topic_id\": 1285, \"forum_id\": 10, \"post_subject\": \"Re: Layout\", \"username\": \"rtaylor\", \"post_text\": \"So I started by creating the XML dataset, like this:
ds := DATASET([\\n{' <School>\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t'},\\n{' <Admission>SA</Admission> \\t\\t'},\\n{' <Students>\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t'},\\n{' <Student>\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t'},\\n{' <Id>S</Id> \\t\\t\\t\\t\\t\\t\\t\\t'},\\n{' <Persons>\\t\\t\\t\\t\\t\\t\\t\\t\\t'},\\n{' <Person>Gav</Person> \\t\\t'},\\n{' <Person>Rik</Person> \\t\\t'},\\n{' <Person>Pow</Person> \\t\\t'},\\n{' </Persons>\\t\\t\\t\\t\\t\\t\\t\\t'},\\n{' </Student>\\t\\t\\t\\t\\t\\t\\t\\t\\t'},\\n{' <Student>\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t'},\\n{' <Id>S</Id> \\t\\t\\t\\t\\t\\t\\t\\t'},\\n{' <Persons>\\t\\t\\t\\t\\t\\t\\t\\t\\t'},\\n{' <Person>Gavin</Person> \\t'},\\n{' <Person>Ricardo</Person>'}, \\n{' <Person>Powell</Person> '},\\n{' </Persons>\\t\\t\\t\\t\\t\\t\\t\\t'},\\n{' </Student>\\t\\t\\t\\t\\t\\t\\t\\t\\t'},\\n{' </Students>\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t'},\\n{' </School>\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t'}\\n],{STRING txt});\\n\\nOUTPUT(ds,,'~RTTEST::XML::ReadChildAsSTRING',CSV(SEPARATOR(''),QUOTE('')),OVERWRITE);
And then I took your code and re-wrote it like this:PersonRec := RECORD\\n STRING Person{xpath('')};\\nEND;\\n\\nStudentRec := RECORD\\n STRING Id{xpath('Id')};\\n DATASET(PersonRec) Names{XPATH('Persons/Person')};\\nEND;\\n \\nSchoolRec := RECORD\\n String Admission{xpath('Admission')}; \\n DATASET(StudentRec) Students{XPATH('Students/Student')};\\nEND;\\nB := DATASET('~RTTEST::XML::ReadChildAsSTRING', SchoolRec, XML('School'));\\nB;
This will split out each tagged value into its own field in nested child datasets (the way I would approach the problem).\\n\\nThen, since you wanted to show the child datasets as a STRING containing XML text, I re-did it this way:MyStudentRec := RECORD\\n STRING Id{xpath('Id')};\\n STRING Names{XPATH('Persons/<>')};\\nEND;\\nMySchoolRec := RECORD\\n STRING Admission{xpath('Admission')}; \\n DATASET(MyStudentRec) Students{XPATH('Students/Student')};\\nEND;\\nC := DATASET('~RTTEST::XML::ReadChildAsSTRING', MySchoolRec, XML('School'));\\nC;
And now result shows the set of "Person" tags in its own STRING field as XML text.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-05-07 15:18:15\" },\n\t{ \"post_id\": 5655, \"topic_id\": 1285, \"forum_id\": 10, \"post_subject\": \"Re: Layout\", \"username\": \"kumar2k14\", \"post_text\": \"Thanks !\\n\\nData is showing empty results for the xml elements which has multiple child records and not sure what is missing...\\n\\nHere is the extract of the sample xml.\\n\\n- <School>\\n <Admission>SA</Admission> \\n- <Students>\\n- <Student>\\n <Id>S</Id> \\n- <Persons>\\n <Person>Gav</Person> \\n <Person>Rik</Person> \\n <Person>Pow</Person> \\n </Persons>\\n </Student>\\n- <Student>\\n <Id>S</Id> \\n- <Persons>\\n <Person>Gavin</Person> \\n <Person>Ricardo</Person> \\n <Person>Powell</Person> \\n </Persons>\\n </Student>\\n </Students>\\n </School>\\n\\nCode which i tried\\n\\n\\nPersonRec:=\\n STRING Person{xpath('Person')};\\n End;\\n\\n StudentRec := RECORD\\n STRING Id{xpath('Id')};\\n Dataset(PersonRec) NamesRec{XPATH('Persons/Person')};\\n END;\\n\\t\\t\\t\\t\\t\\nSchoolRec := RECORD\\n String Admission{xpath('Admission')}; \\n\\tRECORDOF(PersonsRec) StudentRec {XPATH('Students/Student')};\\nEND;\\n\\t B := DATASET('~student::test', SchoolRec, XML('School'));\\n\\t b;\\n\\n
\\n\\nThanks for your help in advance..\", \"post_time\": \"2014-05-07 14:18:21\" },\n\t{ \"post_id\": 5654, \"topic_id\": 1285, \"forum_id\": 10, \"post_subject\": \"Re: Layout\", \"username\": \"rtaylor\", \"post_text\": \"I have a child dataset which has multiple records and i want to show all child records in single row.\\n\\nis it possible using SET of STRING.\\n\\nI want to show all the xml data in single row this is what i want to achieve.
If you just want to show the raw XML, then you can simply put all the child records into a single STRING field and that's what you'll get (no need to use SET OF STRING). If you then need to split out the individual child records from that STRING field, you can use PARSE to extract them all.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-05-07 14:03:59\" },\n\t{ \"post_id\": 5651, \"topic_id\": 1285, \"forum_id\": 10, \"post_subject\": \"Re: Layout\", \"username\": \"kumar2k14\", \"post_text\": \"Thanks for the responses.\\n\\nI have a child dataset which has multiple records and i want to show all child records in single row.\\n\\nis it possible using SET of STRING.\\n\\nI want to show all the xml data in single row this is what i want to achieve.\", \"post_time\": \"2014-05-07 12:58:54\" },\n\t{ \"post_id\": 5562, \"topic_id\": 1285, \"forum_id\": 10, \"post_subject\": \"Re: Layout\", \"username\": \"rtaylor\", \"post_text\": \"This RECORD structure should work, assuming your file has "dataset" as its file tag and "Student" is your record tag.\\nRec := RECORD\\n\\tSTRING1 TypeOfUpdate{XPATH('TypeOfUpdate')}; \\n\\tUNSIGNED1 RegDateMonth{XPATH('RegDate/Month')}; \\n\\tUNSIGNED1 RegDateDay{XPATH('RegDate/Day')}; \\n\\tUNSIGNED2 RegDateYear{XPATH('RegDate/Year')}; \\n\\tUNSIGNED1 DateOfJoinedeMonth{XPATH('DateOfJoined/Month')}; \\n\\tUNSIGNED1 DateOfJoinedDay{XPATH('DateOfJoined/Day')}; \\n\\tUNSIGNED2 DateOfJoinedYear{XPATH('DateOfJoined/Year')}; \\n\\tUNSIGNED1 DateReportedMonth{XPATH('DateReported/Month')}; \\n\\tUNSIGNED1 DateReportedDay{XPATH('DateReported/Day')}; \\n\\tUNSIGNED2 DateReportedYear{XPATH('DateReported/Year')}; \\n\\tUNSIGNED1 TypeCategory{XPATH('TypeCategory')};\\nEND;\\n\\nds := DATASET('MyXMLfilename',Rec,XML('dataset/Student'));
The key to working with XML data is in defining the XPATH to the actual data.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-04-28 17:59:52\" },\n\t{ \"post_id\": 5554, \"topic_id\": 1285, \"forum_id\": 10, \"post_subject\": \"Re: Layout\", \"username\": \"tlhumphrey2\", \"post_text\": \"The following record structure will work the xml you show:\\n\\nDate := RECORD\\n UNSIGNED month;\\n UNSIGNED day;\\n UNSIGNED year;\\nEND;\\n\\nStudent := RECORD\\n STRING1 typeofupdate;\\n Date RegDate;\\n Date DateOfJoined\\n Date DateReported;\\n UNSIGNED typecategory;\\nEND;\\n
\\n\\nBut, you might be wondering how to get an inputted xml file into a dataset having the Student record structure as its record layout. The following is an example:\\n\\nstudents:= DATASET('~sameer::students_xml',Student,XML('students/student'));\\n\\nThe above DATASET statement will bring in the dataset that was sprayed into sameer::students_xml' under the following conditions:\\n\\n1. All xml tags are in lower case and have the same names as the field names in the above record structures (Student and Date).\\n2. Linefeeds after '>' have been removed.\\n\\nIf your tag names don't match the field names or are not in lower case then you need an XPATH statement after the field definition in the record structures above.\\n\\nFor example, one of the tags in your xml is TypeOfUpdate. So in the Student record structure, the field typeofupdate should look like the following:\\n\\n STRING1 typeofupdate {XPATH('TypeOfUpdate')};\\n
\", \"post_time\": \"2014-04-28 14:34:28\" },\n\t{ \"post_id\": 5552, \"topic_id\": 1285, \"forum_id\": 10, \"post_subject\": \"Re: Layout\", \"username\": \"sameermsc\", \"post_text\": \"Hi Kumar,\\n\\nwhat do you mean by SET of STRING, what do you want to achieve ?\\nyour question is not clear to me\\n\\nRegards,\\nSameer\", \"post_time\": \"2014-04-28 07:03:05\" },\n\t{ \"post_id\": 5551, \"topic_id\": 1285, \"forum_id\": 10, \"post_subject\": \"Layout\", \"username\": \"kumar2k14\", \"post_text\": \"How to define a record layout for the incoming xml file using the SET OF STRING.\\n\\n<Student>\\n <TypeOfUpdate>A</TypeOfUpdate> \\n- <RegDate>\\n <Month>1</Month> \\n <Day>2</Day> \\n <Year>2012</Year> \\n </RegDate>\\n- <DateOfJoined>\\n <Month>2</Month> \\n <Day>2</Day> \\n <Year>2012</Year> \\n </DateOfJoined>\\n- <DateReported>\\n <Month>3</Month> \\n <Day>3</Day> \\n <Year>2012</Year> \\n /DateReported>\\n <TypeCategory>22</TypeCategory> \\n </Student>\\n\\nThanks for your help in advance !\", \"post_time\": \"2014-04-28 01:48:29\" },\n\t{ \"post_id\": 5622, \"topic_id\": 1286, \"forum_id\": 10, \"post_subject\": \"Re: read csv in Kmeans\", \"username\": \"rtaylor\", \"post_text\": \"Sailorconan1,\\n\\nGlad to be of help. \\n\\nYes, we have a tremendous amount of information available about ECL, so I can only re-iterate my suggestion that you go through the free online ECL courses available at http://learn.lexisnexis.com/hpcc -- they are not a waste of time (whereas, this kind of simple mistake can be ).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-05-02 15:21:47\" },\n\t{ \"post_id\": 5618, \"topic_id\": 1286, \"forum_id\": 10, \"post_subject\": \"Re: read csv in Kmeans\", \"username\": \"sailorconan1\", \"post_text\": \"Richard,Thanks! As you said, I got more information from http://hpccsystems.com/download/docs/ec ... TASET.html\", \"post_time\": \"2014-05-02 15:07:48\" },\n\t{ \"post_id\": 5616, \"topic_id\": 1286, \"forum_id\": 10, \"post_subject\": \"Re: read csv in Kmeans\", \"username\": \"rtaylor\", \"post_text\": \"Sailorconan1,
Why the size of file has to be multiple of 16???\\n\\nDo I need to adjust the size of file??? Thats weird!
You're using a CSV file, but you're defining it as a FLAT (THOR) file (with a fixed-length 16-byte RECORD structure). \\n\\nChanging your DATASET definition to this should get you a little further:IMPORT csvtest;\\nEXPORT File_factor := DATASET('~kmeans::QR::test',csvtest.Layout_factor,CSV);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-05-02 14:40:35\" },\n\t{ \"post_id\": 5615, \"topic_id\": 1286, \"forum_id\": 10, \"post_subject\": \"Re: read csv in Kmeans\", \"username\": \"sailorconan1\", \"post_text\": \"\", \"post_time\": \"2014-05-02 14:32:17\" },\n\t{ \"post_id\": 5607, \"topic_id\": 1286, \"forum_id\": 10, \"post_subject\": \"Re: read csv in Kmeans\", \"username\": \"sailorconan1\", \"post_text\": \"This is my data file. \\n\\n\\ngrade\\tsize\\tnodes\\tage\\n3\\t999\\t1\\t64\\n3\\t50\\t98\\t70\\n9\\t999\\t99\\t39\\n9\\t40\\t98\\t81\\n3\\t30\\t98\\t60\\n3\\t35\\t98\\t81\\n9\\t20\\t98\\t52\\n2\\t22\\t0\\t61\\n4\\t15\\t98\\t40\\n9\\t20\\t98\\t61\\n2\\t35\\t99\\t68\\n1\\t30\\t98\\t72\\n3\\t999\\t98\\t59\\n3\\t999\\t98\\t56\\n3\\t45\\t98\\t64\\n3\\t999\\t98\\t76\", \"post_time\": \"2014-05-01 15:56:17\" },\n\t{ \"post_id\": 5606, \"topic_id\": 1286, \"forum_id\": 10, \"post_subject\": \"Re: read csv in Kmeans\", \"username\": \"sailorconan1\", \"post_text\": \"My codes\", \"post_time\": \"2014-05-01 15:55:21\" },\n\t{ \"post_id\": 5602, \"topic_id\": 1286, \"forum_id\": 10, \"post_subject\": \"Re: read csv in Kmeans\", \"username\": \"sailorconan1\", \"post_text\": \"Why the size of file has to be multiple of 16???\\n\\nDo I need to adjust the size of file??? Thats weird!\", \"post_time\": \"2014-05-01 15:34:47\" },\n\t{ \"post_id\": 5601, \"topic_id\": 1286, \"forum_id\": 10, \"post_subject\": \"Re: read csv in Kmeans\", \"username\": \"sailorconan1\", \"post_text\": \"I uploaded a csv file, and spray it. Follow the tutorial http://cdn.hpccsystems.com/releases/CE- ... .2.2-1.pdf\\n\\nBut Got the following error:\\n\\nError: System error: 1: File /var/lib/HPCCSystems/hpcc-data/thor/kmeans/qr/test._1_of_1 size is 215 which is not a multiple of 16 (0, 0), 1, \\n\\n\\nThis is my file:\\ngrade\\tsize\\tnodes\\tage\\n3\\t999\\t1\\t64\\n3\\t50\\t98\\t70\\n9\\t999\\t99\\t39\\n9\\t40\\t98\\t81\\n3\\t30\\t98\\t60\\n3\\t35\\t98\\t81\\n9\\t20\\t98\\t52\\n2\\t22\\t0\\t61\\n4\\t15\\t98\\t40\", \"post_time\": \"2014-05-01 15:31:52\" },\n\t{ \"post_id\": 5589, \"topic_id\": 1286, \"forum_id\": 10, \"post_subject\": \"Re: read csv in Kmeans\", \"username\": \"rtaylor\", \"post_text\": \"
I put the test.csv under the same directory with the code.
Your code repository is on your local hard drive, whereas the data files should be sprayed onto your cluster. That's why you're getting that error -- the file is not on the cluster.\\n\\nHave you gone through our free online introductory courses to ECL? Spraying is one of the first things covered. You can sign up here: http://learn.lexisnexis.com/hpcc\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-04-29 18:28:34\" },\n\t{ \"post_id\": 5588, \"topic_id\": 1286, \"forum_id\": 10, \"post_subject\": \"Re: read csv in Kmeans\", \"username\": \"sailorconan1\", \"post_text\": \"Can anyone help me?\", \"post_time\": \"2014-04-29 17:03:09\" },\n\t{ \"post_id\": 5569, \"topic_id\": 1286, \"forum_id\": 10, \"post_subject\": \"Re: read csv in Kmeans\", \"username\": \"sailorconan1\", \"post_text\": \"\\nIMPORT ML;\\n//grade\\tsize\\tnodes\\tage\\n\\nCSVRecord := RECORD\\n\\tUNSIGNED4 grade;\\n\\tUNSIGNED4 size;\\n\\tUNSIGNED4 nodes;\\n\\tUNSIGNED4 age;\\nEND;\\n\\ntestData := DATASET('test.csv',CSVRecord, CSV(HEADING(1)));\\n\\ntestSize := 16;\\n\\ncentroids := DATASET([{1,1,1,1}, {2,2,2,2}], {UNSIGNED4 grade, UNSIGNED4 size, UNSIGNED4 nodes, UNSIGNED4 age});\\nML.ToField(centroids, o2);\\nML.ToField(testData,o1);\\nKMeans := ML.Cluster.KMeans(o1, o2, 10, .01, ML.Cluster.DF.QEuclidean);\\n\\nKmeans.convergence;\\nKmeans.result();
\\n\\nIs it correct? It doesn't show any output \\nI put the test.csv under the same directory with the code. \\n\\nError: System error: 10001: Graph[1], Missing logical file thor::test.csv\\n (0, 0), 10001,\", \"post_time\": \"2014-04-28 21:49:56\" },\n\t{ \"post_id\": 5567, \"topic_id\": 1286, \"forum_id\": 10, \"post_subject\": \"Re: read csv in Kmeans\", \"username\": \"tlhumphrey2\", \"post_text\": \"You always need to provide the starting values for the cluster centroids.\", \"post_time\": \"2014-04-28 19:43:38\" },\n\t{ \"post_id\": 5564, \"topic_id\": 1286, \"forum_id\": 10, \"post_subject\": \"Re: read csv in Kmeans\", \"username\": \"sailorconan1\", \"post_text\": \"Thank you! It works!\\n\\nI have another question:\\nDo I need to specify the centroids? In R, you don't have to specify the centroids.\\n\\nIf my dataset has 4-dimension data, how can I set two centroids as (1,1,1,1) and (2,2,2,2) in the program?\\n
centroids := DATASET([{1,1,1,1}, {2,2,2,2}], {UNSIGNED2 id, REAL4 value});
\", \"post_time\": \"2014-04-28 18:16:37\" },\n\t{ \"post_id\": 5559, \"topic_id\": 1286, \"forum_id\": 10, \"post_subject\": \"Re: read csv in Kmeans\", \"username\": \"tlhumphrey2\", \"post_text\": \"The only thing you are missing is the conversion of your testData. Like you did for centroids, you must convert testData using ToField. Something like the following:\\n\\nToField(testData,NumericField_testData);
\\n\\nThen NumericField_testData becomes the first parameter of KMeans instead of testData.\", \"post_time\": \"2014-04-28 17:19:54\" },\n\t{ \"post_id\": 5556, \"topic_id\": 1286, \"forum_id\": 10, \"post_subject\": \"read csv in Kmeans\", \"username\": \"sailorconan1\", \"post_text\": \"I would like to read a csv file as the input of Kmeans. I use the the code in http://hpccsystems.com/bb/viewtopic.php?f=10&t=1151\\n\\nIMPORT ML;\\nCSVRecord := RECORD\\n\\tUNSIGNED4 grade;\\n\\tUNSIGNED4 size;\\n\\tUNSIGNED4 nodes;\\n\\tUNSIGNED4 age;\\nEND;\\n\\ntestData := DATASET('test.csv',CSVRecord, CSV(HEADING(1)));\\n\\ntestSize := 16;\\n\\n//Do I need to specify the centroids?\\n// How can I set the 2 centroids as (1,1,1,1) and (2,2,2,2)?\\ncentroids := DATASET([{1,1}, {2,2}], {UNSIGNED2 id, REAL4 value});\\n\\nML.ToField(centroids, o2);\\nKMeans := ML.Cluster.KMeans(testData, o2, 10, .01,ML.Cluster.DF.QEuclidean);\\n\\nKmeans.convergence;\\nKmeans.result();
\\n\\n\\nError: Parameter d01 type mismatch - expected Table of numericfield, given Table of csvrecord (18, 29), 2064,\", \"post_time\": \"2014-04-28 16:12:24\" },\n\t{ \"post_id\": 5633, \"topic_id\": 1289, \"forum_id\": 10, \"post_subject\": \"Re: ECL constructs affecting DISTRIBUTE\", \"username\": \"DSC\", \"post_text\": \"That makes a lot of sense. Thanks, Richard!\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2014-05-05 13:33:51\" },\n\t{ \"post_id\": 5632, \"topic_id\": 1289, \"forum_id\": 10, \"post_subject\": \"Re: ECL constructs affecting DISTRIBUTE\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nI make a habit of always HASHing all the equality fields in the JOIN condition simply because that makes sense to me, but HASHing on one of the fields, if the JOIN condition is all AND logic would also work. \\n\\nHowever, the result of the DISTRIBUTE is always that all records with the same hash value end up on the same node. So on a small cluster, hashing on just lastname (instead of last, first, middle) will put all the SMITHs on a single node. And with a large enough dataset there could be enough SMITH records to not be able to fit them all in memory (depending on the record size), which would create disk spill and slow things down.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-05-05 13:30:44\" },\n\t{ \"post_id\": 5626, \"topic_id\": 1289, \"forum_id\": 10, \"post_subject\": \"Re: ECL constructs affecting DISTRIBUTE\", \"username\": \"DSC\", \"post_text\": \"Exploring this a little bit to help my understanding.\\n\\nI thought that it wasn't necessary to hash all of the fields used in a JOIN condition, that using only one field used in an equality test would be sufficient. For something like this:\\n\\nLEFT.a = RIGHT.a AND LEFT.b = RIGHT.b AND LEFT.c = RIGHT.c
\\nMy thinking was that distributing both recordsets on 'a' would be enough. Logically, matching records could only be local after such a distribute, never non-local, so a LOCAL JOIN could then be used.\\n\\nWhy would it be necessary to use all three fields in this example for the DISTRIBUTE? Is there a more complex example that shows the behavior, perhaps?\\n\\nThanks, Richard!\\n\\nDan\", \"post_time\": \"2014-05-03 13:42:52\" },\n\t{ \"post_id\": 5612, \"topic_id\": 1289, \"forum_id\": 10, \"post_subject\": \"Re: ECL constructs affecting DISTRIBUTE\", \"username\": \"rtaylor\", \"post_text\": \"Gayathri,\\n\\nIf you want to do a LOCAL JOIN, then you first need to DISTRIBUTE the two datasets by a hash of all the fields used in the equality portion of the JOIN condition. At that point a LOCAL JOIN and a global JOIN should both return the same result. If they don't, then you need to report the issue in JIRA, preferably with example code and data that demonstrates the problem.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-05-02 13:21:14\" },\n\t{ \"post_id\": 5610, \"topic_id\": 1289, \"forum_id\": 10, \"post_subject\": \"Re: ECL constructs affecting DISTRIBUTE\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"The basis for my question is this:\\nI have 2 datasets A and B. I explicitly distribute both on a common field myId. If I do a 'local' join, my resultant set has fewer records than if I did a regular join. The results returned by 'regular join' were correct, though. How is this possible? \\n\\nhash32(myId) would hash same Id records to same node? And, a 'local' join should have all relevant records from both A and B in same node? Why would it return fewer records then? \\n\\nRegards,\\nGayathri\", \"post_time\": \"2014-05-02 11:01:58\" },\n\t{ \"post_id\": 5595, \"topic_id\": 1289, \"forum_id\": 10, \"post_subject\": \"Re: ECL constructs affecting DISTRIBUTE\", \"username\": \"tlhumphrey2\", \"post_text\": \"As Richard said, the sole purpose of the DISTRIBUTE is to spread your dataset across the nodes of a THOR in such a way so an ECL operation(JOIN, SORT, ROLLUP, ITERATE, etc.) can be done locally (using LOCAL). And thereby speed up the execution.\\n\\nSo, it is very important that you DISTRIBUTE so all data needed for an ECL operation is available on a single node. For example, if you SORT on the field lastname and you want the SORT to be LOCAL, you must make sure your DISTRIBUTE places all records having the same lastname on the same node. A DISTRIBUTE like the following would do that:\\n\\nDISTRIBUTE(mydataset, HASH32(lastname));
\", \"post_time\": \"2014-04-30 12:54:55\" },\n\t{ \"post_id\": 5579, \"topic_id\": 1289, \"forum_id\": 10, \"post_subject\": \"Re: ECL constructs affecting DISTRIBUTE\", \"username\": \"rtaylor\", \"post_text\": \"Gayathri,Assume that all through, the distribution criteria remains the same.
If it's the same distribution AND all the subsequent operations use the LOCAL option (which is the primary purpose of using DISTRIBUTE in the first place), then no re-DISTRIBUTE is necessary. \\n\\nThe LOCAL option specifies that the operation is accomplished separately and independently on each node on whatever data is already on each node (as the result of the DISTRIBUTE). Therefore, the records will all stay on the same nodes.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-04-29 13:37:46\" },\n\t{ \"post_id\": 5574, \"topic_id\": 1289, \"forum_id\": 10, \"post_subject\": \"Re: ECL constructs affecting DISTRIBUTE\", \"username\": \"bforeman\", \"post_text\": \"Hi Gayathri,\\n\\nMultiple DISTRIBUTEs in any process are not uncommon. The important thing is to monitor your Graph after each operation and monitor for skew. If you see something like "+200%, -100%" in the middle of a process should should probably re-DISTRIBUTE at that point.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-04-29 11:43:14\" },\n\t{ \"post_id\": 5572, \"topic_id\": 1289, \"forum_id\": 10, \"post_subject\": \"ECL constructs affecting DISTRIBUTE\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"If a series of operations, say, JOIN, SORT, ROLLUP, ITERATE etc. need to be performed on a DISTRIBUTEd dataset, is it sufficient to DISTRIBUTE just once? Or, can any of these keywords alter the record's location necessitating a re-distribution?\\n\\nAssume that all through, the distribution criteria remains the same. \\n\\nRegards,\\nGayathri\", \"post_time\": \"2014-04-29 08:54:06\" },\n\t{ \"post_id\": 5575, \"topic_id\": 1290, \"forum_id\": 10, \"post_subject\": \"Re: Function call or inline expression - which is better in \", \"username\": \"bforeman\", \"post_text\": \"Hi Gayathri,\\n\\nThe proof is in the timings, and the compiler does such a great job in optimization that you might not see a big difference, but writing a FUNCTION call makes your ECL more atomic and that is generally a best practice.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-04-29 11:48:33\" },\n\t{ \"post_id\": 5573, \"topic_id\": 1290, \"forum_id\": 10, \"post_subject\": \"Function call or inline expression - which is better in JOIN\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"While joining 2 'large' datasets - wordsInSentence and dictionaryWords, I need to strip off special characters from LHS before comparing with RHS. \\n\\nWhich of the following is the better approach for doing this?\\n\\ni) use inline JOIN expression like this\\njoin(wordsInSentence \\n , dictionaryWords\\n , STD.ToUpperCase(STD.Str.FilterOut(left.word,'().,')) = STD.ToUpperCase(right.dictWord)\\n , inner)
\\n\\nii) declare a function and call the same from JOIN expression\\n\\ncleanString (STRING inString) := FUNCTION \\n\\toutString := STD.Str.FilterOut(inString,'().,');\\t\\n\\tRETURN STD.ToUpperCase(outString);\\nEND;\\n\\njoin(wordsInSentence \\n , dictionaryWords\\n , cleanString(left.word) = STD.ToUpperCase(right.dictWord)\\n , inner)
\\n\\nRegards,\\nGayathri\", \"post_time\": \"2014-04-29 10:00:33\" },\n\t{ \"post_id\": 5594, \"topic_id\": 1292, \"forum_id\": 10, \"post_subject\": \"Re: WLAM\", \"username\": \"jchangav\", \"post_text\": \"Thanks a lot. That worked. Was under the assumption that HPCC would accept zip files.\", \"post_time\": \"2014-04-29 22:15:40\" },\n\t{ \"post_id\": 5591, \"topic_id\": 1292, \"forum_id\": 10, \"post_subject\": \"Re: WLAM\", \"username\": \"bforeman\", \"post_text\": \"Hello,\\n\\nIt looks like you did not extract the log file from the zip file, and instead you sprayed the zip file itself. What you need to do is EXTRACT the all.xlog file from the "Weblog example.zip" file and then spray the all.xlog file as instructed in the README.\\n\\nYou should see a file size of 5,417,327,024 bytes after the spray. \\n\\nI ran the Stats and ContentSummary from a 3 node THOR cluster and the results were beautiful!\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-04-29 19:44:35\" },\n\t{ \"post_id\": 5590, \"topic_id\": 1292, \"forum_id\": 10, \"post_subject\": \"WLAM\", \"username\": \"jchangav\", \"post_text\": \"Completely new to HPCC so I may be doing something stupid.\\n\\nI just deployed an HPCC cluster on AWS and install the WLAM module from github. I followed the instructions there to install WLAM, upload and spray the sample log data file (from the zip file). I then executed the following from the IDE (as given in the instructions):\\n\\nIMPORT * FROM WLAM.WebLogs;\\nAnalysis(File_WebLogs.Logs).Stats;\\nAnalysis(File_WebLogs.Logs).ContentSummary;\\n\\nI get no results (I only see the header). I then went to EclWatch and selected the ‘Browse Logical Files’ option. There I see parsedweblogs file is 0 size. Not sure what I am doing wrong or how to debug this.\", \"post_time\": \"2014-04-29 18:29:26\" },\n\t{ \"post_id\": 5631, \"topic_id\": 1297, \"forum_id\": 10, \"post_subject\": \"Re: ConvertToDLL Failed\", \"username\": \"bforeman\", \"post_text\": \"Hi Biswanath,\\n\\nI have never seen this error, and certainly this is not typical. I would suggest that you open a report in the Community Issue Tracker, and include the logs with your post. (Have you seen the ZAP option in the Workunit Details?)\\n\\nhttps://track.hpccsystems.com/secure/Dashboard.jspa\\n\\nTwo things come to mind with this type of error, possibly a connection issue with your cluster or some kind of resource issue, but the logs will provide a better clue.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-05-05 12:35:41\" },\n\t{ \"post_id\": 5629, \"topic_id\": 1297, \"forum_id\": 10, \"post_subject\": \"ConvertToDLL Failed\", \"username\": \"biswanath_c\", \"post_text\": \"Hi,\\n\\nFor some of the workunits i get an error "ConvertToDLL Failed". I have no clue how to find which line or what is causing this.\\n\\nAny pointers on what might cause this error commonly?\\n\\nThanks\\nBiswanath\", \"post_time\": \"2014-05-05 07:51:51\" },\n\t{ \"post_id\": 5653, \"topic_id\": 1299, \"forum_id\": 10, \"post_subject\": \"Re: Filtering a dataset\", \"username\": \"rtaylor\", \"post_text\": \"Biswanath,\\n\\nOnce again, I would define the vertical slice TABLE and then DISTRIBUTE that table, since the vertical slice effect will eliminate some fields from the need to be moved across the network (and the only reason to do it the other way is if you need to work with both).\\n\\nRichard\", \"post_time\": \"2014-05-07 13:58:10\" },\n\t{ \"post_id\": 5649, \"topic_id\": 1299, \"forum_id\": 10, \"post_subject\": \"Re: Filtering a dataset\", \"username\": \"biswanath_c\", \"post_text\": \"Richard,\\n\\nI have a related question.\\n\\nIf I have a distributed dataset. It's distributed on field1 and contains fields 1 to be 10.\\n\\nIf I create a table from the dataset containing only fields 1 & 2, will the resultant table also be distributed on field1?\\n\\nThanks\\nBiswanath\", \"post_time\": \"2014-05-07 04:22:39\" },\n\t{ \"post_id\": 5648, \"topic_id\": 1299, \"forum_id\": 10, \"post_subject\": \"Re: Filtering a dataset\", \"username\": \"biswanath_c\", \"post_text\": \"Got it. Thank you Richard.\", \"post_time\": \"2014-05-07 04:14:28\" },\n\t{ \"post_id\": 5647, \"topic_id\": 1299, \"forum_id\": 10, \"post_subject\": \"Re: Filtering a dataset\", \"username\": \"rtaylor\", \"post_text\": \"Biswanath,\\n\\nIf the intention is to end up with a dataset filtered on one field and distributed by another, I would simply DISTRIBUTE the filtered dataset, since the filter will eliminate some records from the need to be moved. IOW, I would write it like this:filteredDS := dsUndistributed(field2 = 'Yes');\\nds := DISTRIBUTE(filteredDS , hash32(field1));
\\nFiltering after the DISTRIBUTE should maintain the distribution, but is only useful if subsequent operations would need both the filtered and unfiltered dataset.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-05-06 18:29:03\" },\n\t{ \"post_id\": 5642, \"topic_id\": 1299, \"forum_id\": 10, \"post_subject\": \"Filtering a dataset\", \"username\": \"biswanath_c\", \"post_text\": \"Hi,\\n\\nSay, I have a dataset ds that has been distributed on a field across the nodes using a DISTRIBUTE function call. Now, if i apply a filter on ds and assign the output to filteredDS, will filteredDS also be automatically distributed (i.e., will filtering happen locally on the individual nodes)? or should i explicitly call DISTRIBUTE on filteredDS to distribute it?\\n\\n\\nds := DISTRIBUTE(dsUndistributed, hash32(dsUndistributed.field1));\\nfilteredDS := ds(field2 = 'Yes');\\n
\\n\\nThanks\\nBiswanath\", \"post_time\": \"2014-05-06 11:35:03\" },\n\t{ \"post_id\": 5677, \"topic_id\": 1300, \"forum_id\": 10, \"post_subject\": \"Re: Roxie - Blank Columns missing from Schema\", \"username\": \"tlhumphrey2\", \"post_text\": \"When you say, “those columns are not being returned in the dataset”, I have to ask what form are you looking at the “returned results”. Are you seeing the results thru a client’s web page? Or, are you looking at the results in xml form? How are you seeing the results?\", \"post_time\": \"2014-05-12 14:26:14\" },\n\t{ \"post_id\": 5644, \"topic_id\": 1300, \"forum_id\": 10, \"post_subject\": \"Roxie - Blank Columns missing from Schema\", \"username\": \"David Dasher\", \"post_text\": \"Hello all\\n\\nI'm having a few issues returning some results back from Roxie, if a certain result set does not have any data in certain columns those columns are not being returned in the dataset. \\n\\nIs there a way of forcing all column headers back regardless? I'm sure I could do a map statement inside the query and check there, however, I just thought I'd ask.\\n\\nKind regards\\n\\nDavid\", \"post_time\": \"2014-05-06 12:45:44\" },\n\t{ \"post_id\": 5656, \"topic_id\": 1302, \"forum_id\": 10, \"post_subject\": \"Re: COUNTERs and DISTRIBUTEs\", \"username\": \"rtaylor\", \"post_text\": \"Biswanath,Say I have a dataset containing two fields - chapter number and line of text. I have distributed the dataset on chapter number. Now, I'd like to iterate through the dataset and assign a line number to each line using a counter.
If you need to make the line numbers restart with each new chapter, then you'll need to use GROUP. like this:IMPORT STD;\\nds := DATASET([{1,'Line 1 Chapter 1'},\\n {1,'Line 2 Chapter 1'},\\n {2,'Line 1 Chapter 2'},\\n {2,'Line 2 Chapter 2'},\\n {3,'Line 1 Chapter 3'},\\n {3,'Line 2 Chapter 3'},\\n {3,'Line 3 Chapter 3'},\\n {4,'Line 1 Chapter 4'},\\n {4,'Line 2 Chapter 4'},\\n {5,'Line 1 Chapter 5'},\\n {5,'Line 2 Chapter 5'},\\n {6,'Line 1 Chapter 6'},\\n {6,'Line 2 Chapter 6'},\\n {6,'Line 3 Chapter 6'}],{unsigned1 Chap, STRING20 txt});\\n\\ndist := DISTRIBUTE(ds,chap);\\ns_dist := SORT(dist,chap,LOCAL);\\ng_dist := GROUP(s_dist,chap,LOCAL);\\nRec := RECORD\\n unsigned1 Chap;\\n unsigned1 LineNum;\\n STRING20 txt;\\n unsigned1 NodeNum;\\nEND;\\nRec XF(g_dist L, INTEGER C) := TRANSFORM\\n SELF.LineNum := C;\\n SELF.NodeNum := STD.system.Thorlib.NODE()+1;\\n SELF := L;\\nEND;\\nP := PROJECT(g_dist,XF(LEFT,COUNTER),LOCAL);\\nP;\\nSORT(UNGROUP(P),chap,LineNum);\\n
This will sequentially number the lines within each chapter, separately and independently on each node (I threw in the Node numbers just to see where the work was done on each GROUP).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-05-07 14:31:36\" },\n\t{ \"post_id\": 5652, \"topic_id\": 1302, \"forum_id\": 10, \"post_subject\": \"Re: COUNTERs and DISTRIBUTEs\", \"username\": \"bforeman\", \"post_text\": \"Hi Biswanath,\\n\\nSince you have distributed the recordset, this means that you will be using LOCAL iteration, and yes, you need to handle the sequencing in your TRANSFORM.\\n\\nThe secret is knowing what node is being processed and how many nodes are in your target cluster.\\n\\nUsing ITERATE, we can sequence locally using the following formula:\\n\\n SELF.LineNumbers := IF(L.LineNumbers=0,std.system.thorlib.node()+1,L.LineNumbers+CLUSTERSIZE);
\\n\\nYou can also do the same with a LOCAL PROJECT:\\n\\n\\tnode := STD.System.Thorlib.node();\\n\\tnodes := CLUSTERSIZE;\\n\\tSELF.LineNumbers := ((c-1) * (nodes-1)) + node + c;\\n\\tSELF := L;\\n//c = the PROJECT COUNTER\\n
\\n\\nHope this helps!\\n\\nBob\", \"post_time\": \"2014-05-07 13:57:50\" },\n\t{ \"post_id\": 5650, \"topic_id\": 1302, \"forum_id\": 10, \"post_subject\": \"COUNTERs and DISTRIBUTEs\", \"username\": \"biswanath_c\", \"post_text\": \"Hi,\\n\\nSay I have a dataset containing two fields - chapter number and line of text. I have distributed the dataset on chapter number. Now, I'd like to iterate through the dataset and assign a line number to each line using a counter.\\n\\nIs it possible to do this using a COUNTER in iterate? I mean, will counter generate line numbers that keep incrementing across chapters and that should not restart from 1 for every chapter?\\n\\nThanks\\nBiswanath\", \"post_time\": \"2014-05-07 04:28:20\" },\n\t{ \"post_id\": 5669, \"topic_id\": 1304, \"forum_id\": 10, \"post_subject\": \"Re: Measure execution time\", \"username\": \"rtaylor\", \"post_text\": \"Have you looked at the Timings section of the workunit in ECL Watch? You can also look at the WorkunitTimings function in the Standard Library (STD.System.Workunit.WorkunitTimings ( wuid )). Documentation for the Standard Library is in the F1 Help file from the IDE, or the Standard Library Reference PDF available here: http://hpccsystems.com/download/docs/learning-ecl\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-05-08 14:06:30\" },\n\t{ \"post_id\": 5663, \"topic_id\": 1304, \"forum_id\": 10, \"post_subject\": \"Measure execution time\", \"username\": \"sailorconan1\", \"post_text\": \"I would like to measure the execution time of a program, how to code it?\", \"post_time\": \"2014-05-07 19:50:57\" },\n\t{ \"post_id\": 5668, \"topic_id\": 1305, \"forum_id\": 10, \"post_subject\": \"Re: Error in JOIN condition ...\", \"username\": \"rtaylor\", \"post_text\": \"Biswanath,How do I do a join after removing the embedded whitespaces on field1?
This is a case for a specialized INDEX. \\n\\nYou're getting the error because the result of the removeSpaces function cannot exactly match the key field values in your INDEX. Therefore, you need to build your INDEX with the spaces already removed from your key field, then use that specialized INDEX in your JOIN, so that your code looks like this:KEYED(RIGHT.field1 = removeSpaces(LEFT.field1))
\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-05-08 14:02:05\" },\n\t{ \"post_id\": 5666, \"topic_id\": 1305, \"forum_id\": 10, \"post_subject\": \"Error in JOIN condition ...\", \"username\": \"biswanath_c\", \"post_text\": \"Hi,\\n\\nI am performing an inner join on 2 datasets. I have a key field on the right dataset and am using a keyed condition on that field. But i am getting an error that says:\\n\\nError: Key condition (KEYED(removeSpaces(RIGHT.field1) = removeSpaces(LEFT.field1))) does not have any comparisons against key fields
\\n\\nBasically field1 can have whitespaces in the middle; i need to be able to remove the whitespaces and then do a compare in the join.\\n\\nSo i am using this condition in the join:\\n KEYED(removeSpaces(RIGHT.field1) = removeSpaces(LEFT.field1))
\\n\\nremoveSpaces is a local function that i use to remove embedded whitespaces. But this gives the error that I had mentioned above.\\n\\nBut when i use the join condition without calling the removeSpaces function, i do NOT get the error. This works:\\n\\nKEYED(RIGHT.field1 = LEFT.field1)
\\n\\nHow do I do a join after removing the embedded whitespaces on field1?\\n\\nThanks\\nBiswanath\", \"post_time\": \"2014-05-08 12:08:24\" },\n\t{ \"post_id\": 5723, \"topic_id\": 1315, \"forum_id\": 10, \"post_subject\": \"Re: Invalid Pointer (During Smart Join)\", \"username\": \"jsmith\", \"post_text\": \"It looks like may have been fixed by HPCC-10896, which is in 4.2.2-rc13 and this is rc12.\\n\\nCan you try and rerun on a rc13 or 4.2.4 build?\", \"post_time\": \"2014-05-19 17:42:59\" },\n\t{ \"post_id\": 5712, \"topic_id\": 1315, \"forum_id\": 10, \"post_subject\": \"Invalid Pointer (During Smart Join)\", \"username\": \"lokesh\", \"post_text\": \"Hi,\\n\\nI am facing this issue for couple of days now.\\n\\nWhen I run the code I get this error and workunit fails.\\n\\n1303: System error: 1303: Graph[114], diskwrite[118]: SLAVE 192.168.139.3:20500: Invalid pointer,
\\n\\nNow I have checked the logs, forums, documentation for this error but with no luck.\\n\\nInitially my thoughts were that somehow it is overstepping the memory bounds on which it can spill (it occurs only while spilling), so I tried changing the number of nodes. It worked (at least once). But now I start to get same error again.\\n\\nI am using linode to set up my hpcc cluster. There are 31 slave nodes in the cluster each with 8GB ram, 200GB HDD, and 6 cores. I have setup 11 slaves per node.\\nMy raw data file that goes as input are - 450GB/350GB and 400MB. \\n\\nAny pointers. I can not reveal the source code, and I don't know any other way to replicate this. I will try and post the source code by obscuing the sensitive stuff.\\n\\nAttached is the log file:\", \"post_time\": \"2014-05-19 12:34:19\" },\n\t{ \"post_id\": 5828, \"topic_id\": 1322, \"forum_id\": 10, \"post_subject\": \"Re: Row too large\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"Thanks Bob, Gavin. \\n\\nThat explains it!\", \"post_time\": \"2014-06-04 10:40:25\" },\n\t{ \"post_id\": 5827, \"topic_id\": 1322, \"forum_id\": 10, \"post_subject\": \"Re: Row too large\", \"username\": \"ghalliday\", \"post_text\": \"There is unfortunately still a restriction legacy requirement to know the maximum record size when viewing records from within the IDE.\\n\\n(See https://track.hpccsystems.com/browse/HPCC-9470 for the relevant issue).\\n\\nYou can work around it by specifying maxcount on datasets, and maxlengths on fields (as you used to be required to do in the pre open source versions). Hopefully this will be fixed in 5.2.\", \"post_time\": \"2014-06-04 08:39:49\" },\n\t{ \"post_id\": 5825, \"topic_id\": 1322, \"forum_id\": 10, \"post_subject\": \"Re: Row too large\", \"username\": \"bforeman\", \"post_text\": \"Yes, the docs are very clear here \\nUnder RECORD and MAXLENGTH:\\n\\n
The default maximum size of a record containing variable-length fields is 4096 bytes (this may be overridden by using #OPTION(maxLength,####) to change the default).
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-06-03 17:44:18\" },\n\t{ \"post_id\": 5823, \"topic_id\": 1322, \"forum_id\": 10, \"post_subject\": \"Re: Row too large\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"I did some more analysis to identify the cause of error and in the process, fixed the issue (though only temporarily, I am afraid!) \\n\\nMy Record structure fields cumulatively take up around 2050 bytes. Apart from this, it has a child dataset field - each record of which has 3 fields amounting to ~20bytes. \\n\\nI have a DENORMALIZE statement that would identify and mount child records into the said field in the parent row. And, this dataset, when viewed in ECL watch, gives the 'Row too large' message - this is the exact message. No additional details are provided. \\n\\nWhen I debugged, I discovered that my DENORMALIZE statement was generating lot of child rows (1000+) that had to be embedded in a single parent row. In my case, it shouldn't have generated such a large number but did because of a logic issue. So, when I fixed that issue, the number of child records was brought down (in most cases, less than 20) and the error stopped and I was able to view the output. So, I believe it has to do something with the no. of bytes allowed for a row. \\n\\nIf needed, I can replicate this issue and attach a screenshot of the message. \\n\\nRegards,\\nGayathri\", \"post_time\": \"2014-06-03 16:34:48\" },\n\t{ \"post_id\": 5755, \"topic_id\": 1322, \"forum_id\": 10, \"post_subject\": \"Re: Row too large\", \"username\": \"bforeman\", \"post_text\": \"Hi Gayathri,\\n\\nWhat is the size of your RECORD structure for the output?\\nThere is no limitations set by the ECL IDE, so we suspect this is an ESP based error.\\nIf you can send us the exact error message, that will verify it…\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-05-28 13:14:32\" },\n\t{ \"post_id\": 5737, \"topic_id\": 1322, \"forum_id\": 10, \"post_subject\": \"Row too large\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"One of my ECL modules runs successfully to completion. However, I am unable to view the output in the IDE or in ECL Watch. \\n\\nIDE shows a blank result page. ECL Watch displays the number of rows for the result and the zip/gz/xls options. But, when I try to open the result set, it displays a 'row too large' message. \\n\\nIs there a limit for row size? \\n\\nGayathri\", \"post_time\": \"2014-05-23 10:05:32\" },\n\t{ \"post_id\": 5829, \"topic_id\": 1341, \"forum_id\": 10, \"post_subject\": \"Re: Roxie: Different version of file error\", \"username\": \"bforeman\", \"post_text\": \"Hi,\\n\\nWell, let's take the simple solution first, and then talk about a possible better strategy down the road.\\n\\nIn your current set up, every time you update the data you will need to delete the current active query and then simply republish it with the new data.\\n\\nLooking ahead, the general best practice when THOR and ROXIE are sharing a single Dali is to use superfiles and package maps. Superfiles are documented very well in the Programmer's Guide, and Package Maps are a part of the ECL Command Line documentation. So with a superfile, updates to the superfile are processed through sub-files, and the Package Map is simply an XML file that contains the super and subfiles used in your queries. When you deactivate and activate a package map, the new data become part of the query without the need to bring the query down.\\n\\nThere is a good online lesson in the Advanced ROXIE course that details this process.\\n\\nHope this helps!\\n\\nBob\", \"post_time\": \"2014-06-04 14:02:59\" },\n\t{ \"post_id\": 5821, \"topic_id\": 1341, \"forum_id\": 10, \"post_subject\": \"Roxie: Different version of file error\", \"username\": \"jgostylo\", \"post_text\": \"As a reference I am using HPCC version "enterprise_4.2.2-1".\\n\\nI must be doing something wrong but I am not sure what it is. I have a Thor process that outputs some data stats to a logical file and I am publishing a Roxie query to read that data file so I can view it on a web page. This is the Thor code:\\n\\nOUTPUT(m.indiciaOverallStats,, '~pitcob::' + env.currentSelectorName + '::prfstats::indicia', OVERWRITE);\\nOUTPUT(m.instanceToClusterStats,, '~pitcob::' + env.currentSelectorName + '::prfstats::instances', OVERWRITE);\\nOUTPUT(m.clusterToInstanceStats,, '~pitcob::' + env.currentSelectorName + '::prfstats::clusters', OVERWRITE);
\\n\\nI look at the logical file contents in ECLWatch and everything looks fine so I am pretty certain there is nothing wrong with this part of the code. When I run the Roxie query I get this error:\\n\\nException\\nReported by: Roxie\\nMessage: Different version of pitcob::prod::prfstats::clusters already loaded: sizes = 0 0 Date = 2014-06-03T15:01:13 2014-05-30T21:36:40\\n\\nThis is the Roxie code:\\n\\nIndiciaOverallStatsRec := RECORD\\n\\tUNSIGNED8\\t\\tindicia_count;\\n\\tSTRING20\\t\\tearliest_timestamp;\\n\\tSTRING20\\t\\tlatest_timestamp;\\nEND;\\n\\nInstanceToClusterStatsRec := RECORD\\n\\tUNSIGNED8\\t\\tinstance_count;\\n\\tSTRING20\\t\\tearliest_timestamp;\\n\\tSTRING20\\t\\tlatest_timestamp\\nEND;\\n\\nClusterToInstanceStatsRec := RECORD\\n\\tUNSIGNED8\\t\\tcluster_count;\\n\\tREAL4\\t\\t\\taverage_instance_count;\\n\\tUNSIGNED8\\t\\tmax_instance_count;\\n\\tSTRING20\\t\\tearliest_timestamp;\\n\\tSTRING20\\t\\tlatest_timestamp;\\nEND;\\n\\nIndiciaDS := DATASET\\n(\\n\\t'~pitcob::prod::prfstats::indicia',\\n\\tIndiciaOverallStatsRec,\\n\\tTHOR\\n);\\nInstanceDS := DATASET\\n(\\n\\t'~pitcob::prod::prfstats::instances',\\n\\tInstanceToClusterStatsRec,\\n\\tTHOR\\n);\\nClusterDS := DATASET\\n(\\n\\t'~pitcob::prod::prfstats::clusters',\\n\\tClusterToInstanceStatsRec,\\n\\tTHOR\\n);\\n\\nindiciaRecordSet := IndiciaDS(indicia_count > 0);\\ninstanceRecordSet := InstanceDS(instance_count > 0);\\nclusterRecordSet := ClusterDS(cluster_count > 0);\\n\\nOUTPUT(indiciaRecordSet);\\nOUTPUT(instanceRecordSet);\\nOUTPUT(clusterRecordSet);
\\n\\nWhen I add OPT to the DATASET I notice that duplicate logical files are created.\\n\\nWhat is causing this error?\", \"post_time\": \"2014-06-03 15:16:13\" },\n\t{ \"post_id\": 5895, \"topic_id\": 1353, \"forum_id\": 10, \"post_subject\": \"Re: different thor different result?\", \"username\": \"Chang\", \"post_text\": \"Hi Bob,\\n\\nI tested on without HEADING, but it is still not working on cluster thor.\\nW20140613-134225 is the WUID\\nIt's been at least 30min since I submit the job.\\n\\nI think the connection of Dev environment is not good today for a lot of jobs submitted today got error "could not open logical file".\\n\\nThanks,\\nChang\\n\\n[quote="bforeman":j8uycqa0]See my previous message to you. If you still have a problem, I would submit a report to the Community Issue page with the ZAP reports attached from each workunit and a detailed description to the development team.\\n\\nI am also concerned with the number of sub-files in your super file. The rule of thumb is to keep your sub files under 100, and you are at 172. You can improve the performance of your filtering if you consolidate the sub files.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-06-13 18:31:42\" },\n\t{ \"post_id\": 5894, \"topic_id\": 1353, \"forum_id\": 10, \"post_subject\": \"Re: different thor different result?\", \"username\": \"Chang\", \"post_text\": \"Hi Richard,\\n\\nThe files are on Thor400_72.\\nI tried to do the same thing on 400_72, but didn't get the expected result.\\n\\nThanks,\\nChang\\n\\n[quote="rtaylor":3ay2i1av]What cluster is the dataset on? Is it the hThor cluster? If so, that might explain why hThor gives a result and the 40 node cluster is maybe trying to read the same file 40 times (hence the slowness)??\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-06-13 17:45:44\" },\n\t{ \"post_id\": 5893, \"topic_id\": 1353, \"forum_id\": 10, \"post_subject\": \"Re: different thor different result?\", \"username\": \"rtaylor\", \"post_text\": \"What cluster is the dataset on? Is it the hThor cluster? If so, that might explain why hThor gives a result and the 40 node cluster is maybe trying to read the same file 40 times (hence the slowness)??\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-06-13 17:35:58\" },\n\t{ \"post_id\": 5892, \"topic_id\": 1353, \"forum_id\": 10, \"post_subject\": \"Re: different thor different result?\", \"username\": \"bforeman\", \"post_text\": \"See my previous message to you. If you still have a problem, I would submit a report to the Community Issue page with the ZAP reports attached from each workunit and a detailed description to the development team.\\n\\nI am also concerned with the number of sub-files in your super file. The rule of thumb is to keep your sub files under 100, and you are at 172. You can improve the performance of your filtering if you consolidate the sub files.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-06-13 15:47:55\" },\n\t{ \"post_id\": 5891, \"topic_id\": 1353, \"forum_id\": 10, \"post_subject\": \"Re: different thor different result?\", \"username\": \"Chang\", \"post_text\": \"Sorry, it turned out to be an illusion after staring at the ECL for 50min... thor11 didn't give me result, but gave me 171 lines of warning. W20140613-101112\\n\\n\\n[quote="bforeman":3ymxkjy3]Ok, that's encouraging, I still think your DATASET might be defined incorrectly for the superfile, try removing the HEADING attribute.\\n\\nBob\", \"post_time\": \"2014-06-13 15:38:45\" },\n\t{ \"post_id\": 5890, \"topic_id\": 1353, \"forum_id\": 10, \"post_subject\": \"Re: different thor different result?\", \"username\": \"bforeman\", \"post_text\": \"Ok, that's encouraging, I still think your DATASET might be defined incorrectly for the superfile, try removing the HEADING attribute.\\n\\nBob\", \"post_time\": \"2014-06-13 14:53:48\" },\n\t{ \"post_id\": 5889, \"topic_id\": 1353, \"forum_id\": 10, \"post_subject\": \"Re: different thor different result?\", \"username\": \"Chang\", \"post_text\": \"just got thor11 result (W20140613-101422). it looked that filter on '2011-03-28' worked on thor11.\", \"post_time\": \"2014-06-13 14:36:56\" },\n\t{ \"post_id\": 5888, \"topic_id\": 1353, \"forum_id\": 10, \"post_subject\": \"Re: different thor different result?\", \"username\": \"Chang\", \"post_text\": \"Hi Bob,\\n\\nyes. My intention is to filter the header.\\n\\nI tried HEADING(1) and tested on the date '2011-03-28' on both hthor and thor11.\\nhthor gave me the result with just 1min.\\nthor11 still haven't give me any result after almost 40mins.\\n\\nHow can I test whether it is a valid DATASET? W20140613-101422\\nSo hthor can process invalid DATASET? W20140613-101112\\n\\nI will try no HEADING attribute after I get result from thor11\\n\\n\\nthanks,\\nChang\\n[quote="bforeman":1o4j6lqd]Hi Chang,\\n\\nI've reached out to the developers for more info, but I have questions for you.\\n\\nYour filter looks to be extracting the first HEADER record from each of the superfile sub-files, is this intended?\\n\\nWhat if you try to filter on an actual trip date?\\n\\nFurthermore, I don't know if your DATASET statement is valid (correct):\\n\\nEXPORT pA_trip := DATASET('~thor400_72::thor400_72::telematics::wunelli::partnera_tripdata', SuperFileLayout.pA_Trip_R,CSV([b]HEADING(SINGLE)[/b]));
\\n\\nIf I read the docs correctly, SINGLE is only valid in an OUTPUT statement. And since this DATASET references multiple subfiles, each with a HEADER, I'm not sure your superfile definition is correct. What if you leave the CSV without the HEADING attribute, or change it to HEADING(1) as documented?\\n\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-06-13 14:34:15\" },\n\t{ \"post_id\": 5887, \"topic_id\": 1353, \"forum_id\": 10, \"post_subject\": \"Re: different thor different result?\", \"username\": \"bforeman\", \"post_text\": \"Hi Chang,\\n\\nI've reached out to the developers for more info, but I have questions for you.\\n\\nYour filter looks to be extracting the first HEADER record from each of the superfile sub-files, is this intended?\\n\\nWhat if you try to filter on an actual trip date?\\n\\nFurthermore, I don't know if your DATASET statement is valid (correct):\\n\\nEXPORT pA_trip := DATASET('~thor400_72::thor400_72::telematics::wunelli::partnera_tripdata', SuperFileLayout.pA_Trip_R,CSV([b]HEADING(SINGLE)[/b]));
\\n\\nIf I read the docs correctly, SINGLE is only valid in an OUTPUT statement. And since this DATASET references multiple subfiles, each with a HEADER, I'm not sure your superfile definition is correct. What if you leave the CSV without the HEADING attribute, or change it to HEADING(1) as documented?\\n\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-06-13 13:59:25\" },\n\t{ \"post_id\": 5885, \"topic_id\": 1353, \"forum_id\": 10, \"post_subject\": \"Re: different thor different result?\", \"username\": \"Chang\", \"post_text\": \"here's the cluster ip\\nhttp://10.194.10.2:8010/\\n\\nThanks,\\nChang\\n[quote="bforeman":orgv5acf]What cluster IP? Can you post the entire URL of the workunit?\", \"post_time\": \"2014-06-13 13:10:39\" },\n\t{ \"post_id\": 5884, \"topic_id\": 1353, \"forum_id\": 10, \"post_subject\": \"Re: different thor different result?\", \"username\": \"bforeman\", \"post_text\": \"What cluster IP? Can you post the entire URL of the workunit?\", \"post_time\": \"2014-06-13 13:06:43\" },\n\t{ \"post_id\": 5883, \"topic_id\": 1353, \"forum_id\": 10, \"post_subject\": \"Re: different thor different result?\", \"username\": \"Chang\", \"post_text\": \"Hi Bob,\\n\\nThank you for your prompt reply!\\n\\nI got blank from other clusters. \\nW20140613-075642 is the result I got from hthor. Which is what I expected.\\nW20140613-075011 is the result I got from 400_72. Which is not right.\\n\\nI didn't get any warning or error.\\n\\nAnd based on my logical file name search, I didn't see any different data on the thor clusters.\\n\\nThanks,\\nChang\\n[quote="bforeman":3a0exbk9]Hi Chang,\\n\\nWell, hthor is a single node and the other clusters should be multiple nodes. What results are you seeing on these other clusters? Incorrect or blank? Are you getting any warnings or errors? Version information? Could there be different data on the THOR clusters?\\n\\nBottom line is that if you sprayed the data correctly, your results should be consistent.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-06-13 12:58:47\" },\n\t{ \"post_id\": 5882, \"topic_id\": 1353, \"forum_id\": 10, \"post_subject\": \"Re: different thor different result?\", \"username\": \"bforeman\", \"post_text\": \"Hi Chang,\\n\\nWell, hthor is a single node and the other clusters should be multiple nodes. What results are you seeing on these other clusters? Incorrect or blank? Are you getting any warnings or errors? Version information? Could there be different data on the THOR clusters?\\n\\nBottom line is that if you sprayed the data correctly, your results should be consistent.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-06-13 12:52:54\" },\n\t{ \"post_id\": 5881, \"topic_id\": 1353, \"forum_id\": 10, \"post_subject\": \"different thor different result?\", \"username\": \"Chang\", \"post_text\": \"Hi all,\\n\\nI submitted exactly the same code on hthor, thor400 and thor50 in the dev environment.\\nHowever, only hthor gave me the result.\\n\\nThe code is extremely simple:\\n\\nIMPORT TELEMATICS_WUNELLI AS TW;\\n\\n\\nOUTPUT(tw.files.pa_trip(TripDate='TripDate'));
\\n\\nbut only hthor gave me the result.\\nAny idea about why this happen?\", \"post_time\": \"2014-06-13 12:02:13\" },\n\t{ \"post_id\": 5904, \"topic_id\": 1356, \"forum_id\": 10, \"post_subject\": \"Re: Generating Random TimeStamps\", \"username\": \"bforeman\", \"post_text\": \"Here is a function that was contributed by one of our developers:\\n\\nexport getTimeStamp () := function\\n\\n// Function to get time in HHMMSS Milliseconds format\\n// Courtesy : Sanjay\\n\\nstring14 getTime() := BEGINC++\\n struct timeval tv;\\n struct timezone tz;\\n struct tm *tm;\\n gettimeofday(&tv, &tz);\\n tm=localtime(&tv.tv_sec);\\n sprintf(__result, "%02d%02d%02d%06d%02d", tm->tm_hour, tm->tm_min,\\n tm->tm_sec, tv.tv_usec);\\nENDC++;\\n\\nreturn getTime();\\n\\nend;
\\n\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-06-18 12:31:38\" },\n\t{ \"post_id\": 5902, \"topic_id\": 1356, \"forum_id\": 10, \"post_subject\": \"Generating Random TimeStamps\", \"username\": \"Sruthi\", \"post_text\": \"Hi,\\n\\nCould anyone please tell me how to generate random timestamps in ecl?\", \"post_time\": \"2014-06-17 12:04:39\" },\n\t{ \"post_id\": 5927, \"topic_id\": 1360, \"forum_id\": 10, \"post_subject\": \"Re: Short hand in transform with Skip\", \"username\": \"rlbars5\", \"post_text\": \"Works Great.. \\nJust an example I tested and it SKIPS the record in the IF condition.\\nThanks alot for the help\\n\\nrec:= record\\nString10 str1;\\nString10 str2;\\nend;\\n\\nds:=dataset([{'test','1'},\\n\\t {'test','2'},\\n\\t {'test','3'},\\n\\t {'test',''}],rec);\\n\\t\\t\\t\\t\\t\\t \\npro:=project(ds,transform(rec,\\t\\t\\tself.str2:=if(LEFT.str2='',SKIP,LEFT.str2),self:=LEFT));\\n\\t\\t\\t\\t\\t\\t\\t\\t\\noutput(pro);\", \"post_time\": \"2014-06-19 17:20:33\" },\n\t{ \"post_id\": 5920, \"topic_id\": 1360, \"forum_id\": 10, \"post_subject\": \"Re: Short hand in transform with Skip\", \"username\": \"tlhumphrey2\", \"post_text\": \"You can do this:\\nPro := PROJECT(dataset, \\n TRANSFORM(resulttype,\\n SELF.field:=IF(LEFT.fieldx=9999,SKIP,LEFT.fieldx)\\n SELF := LEFT\\n )\\n );\\n
\\n\\nIn the above, I have an IF statement on the right hand side of SELF.field assignment where if the value of some field of resulttype has a particular value we SKIP.\\n\\nThe IF statement cannot be on the right hand side of "SELF:=LEFT".\", \"post_time\": \"2014-06-19 15:06:59\" },\n\t{ \"post_id\": 5918, \"topic_id\": 1360, \"forum_id\": 10, \"post_subject\": \"Short hand in transform with Skip\", \"username\": \"rlbars5\", \"post_text\": \"Is it Possible to write a short hand of transform with a SKIP condition\\neg:A normal transform with a SKIP condition can be written as \\n\\nresulttype funcname ( parameterlist ) := TRANSFORM [, SKIP( condition )]\\nSELF.outfield := transformation;\\nEND;\\n\\nIs it possible to write a SKIP condition for the below code ?\\n\\nPro := PROJECT(dataset, TRANSFORM(resulttype,SELF := LEFT));\", \"post_time\": \"2014-06-19 14:33:01\" },\n\t{ \"post_id\": 10533, \"topic_id\": 1365, \"forum_id\": 10, \"post_subject\": \"Re: Failed to receive reply from thor\", \"username\": \"jwilt\", \"post_text\": \"For those interested, see also\\nhttps://track.hpccsystems.com/browse/HPCC-16051\", \"post_time\": \"2016-08-11 23:57:46\" },\n\t{ \"post_id\": 10393, \"topic_id\": 1365, \"forum_id\": 10, \"post_subject\": \"Re: Failed to receive reply from thor\", \"username\": \"jwilt\", \"post_text\": \"Does anyone know if the LOOP issues mentioned above, in "earlier HPCC versions", have been fixed?\\nI have a case where a LOOP query runs fine on Roxie and Hthor, but fails on Thor, with this same "Failed to receive reply from thor" error.\\nThis is HPCC 5.2.4-1.\\nThanks.\", \"post_time\": \"2016-08-04 20:28:13\" },\n\t{ \"post_id\": 5977, \"topic_id\": 1365, \"forum_id\": 10, \"post_subject\": \"Re: Failed to receive reply from thor\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"Thanks Richard! 'Dataset from Transform' was new to me..\", \"post_time\": \"2014-06-27 10:50:32\" },\n\t{ \"post_id\": 5975, \"topic_id\": 1365, \"forum_id\": 10, \"post_subject\": \"Re: Failed to receive reply from thor\", \"username\": \"rtaylor\", \"post_text\": \"Gayathri,I can think of an alternative method too - insert records first using TRANSFORM/NORMALIZE and then, do an ITERATE to update newSum field. Will that be preferred over a LOOP?
Yes, that would be more -straight-forward than LOOP. As I said previously, LOOP is meant for recursion, not simple iteration.\\n\\nHowever, I prefer the new DATASET from TRANSFORM syntax for new record generation like this (http://hpccsystems.com/download/docs/ecl-language-reference/html/DATASET_from_TRANSFORM.html), \\nso I would do it this way:numStruct := record\\n integer4 SNo;\\n integer4 Num;\\n integer4 NumSum;\\nend;\\n\\ninitialDS := DATASET([{1,0,0}, {2,1,1}], numStruct);\\n\\ngenRow(DATASET(numStruct) ds, INTEGER NumToAdd) := FUNCTION\\n ctr := COUNT(ds);\\n newNum := RANDOM() % 10;\\n\\n NewDS := DATASET(NumToAdd,\\n TRANSFORM(numStruct,\\n\\t SELF.Sno := ctr + COUNTER;\\n\\t SELF.Num := newNum;\\n\\t SELF.NumSum := IF(COUNTER = 1, \\n\\t ds[ctr].NumSum,0)));\\n NewDSi := ITERATE(NewDS,\\n TRANSFORM(numStruct,\\n\\t SELF.NumSum := IF(COUNTER = 1,\\n RIGHT.NumSum + RIGHT.Num,\\n LEFT.NumSum + RIGHT.Num),\\n\\t SELF := RIGHT));\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n RETURN (ds & newDSi);\\nEND; \\n\\nnewDS := genRow(initialDS,5);\\nOUTPUT(newDS);
Note that the ITERATE is only over the new records to build the NumSum. Also, I removed the "RANDOM() problem" by calling it only once for each new record and using the ITERATE to build the correct running totals.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-06-26 13:53:34\" },\n\t{ \"post_id\": 5973, \"topic_id\": 1365, \"forum_id\": 10, \"post_subject\": \"Re: Failed to receive reply from thor\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"Richard,\\n\\nIn both your examples, newSum is based on the 'initial' dataset's last record. In my case, newSum should be based on the last-added record. Wouldn't I need a LOOP to achieve this? \\n\\nI can think of an alternative method too - insert records first using TRANSFORM/NORMALIZE and then, do an ITERATE to update newSum field. Will that be preferred over a LOOP?\\n\\nRegards,\\nGayathri\", \"post_time\": \"2014-06-26 09:24:30\" },\n\t{ \"post_id\": 5965, \"topic_id\": 1365, \"forum_id\": 10, \"post_subject\": \"Re: Failed to receive reply from thor\", \"username\": \"rtaylor\", \"post_text\": \"Gayathri,\\n\\nHere's another way to do what you want using NORMALIZE to generate the new records instead of trying to do it with LOOP (which was really designed more for recursion, not this type of simple iteration):numStruct := record\\n integer4 SNo;\\n integer4 Num;\\n integer4 NumSum;\\nend;\\n\\ninitialDS := DATASET([{1,0,0}, {2,1,1}], numStruct);\\n\\ngenRow(DATASET(numStruct) ds, INTEGER NumToAdd) := FUNCTION\\n\\tctr := COUNT(ds);\\n\\n\\tEmptyDS := DATASET([{1,1,1}],numStruct);\\n\\n\\tnumStruct XF(numStruct L, INTEGER C) := TRANSFORM\\n\\t SELF.Sno := ctr + C;\\n\\t newNum := random() % 10 ;\\n\\t SELF.Num := newNum;\\n\\t SELF.NumSum := ds[ctr].NumSum + newNum;\\n\\tEND;\\t \\n\\tnewRecs := NORMALIZE(EmptyDS,NumToAdd,XF(LEFT,COUNTER));\\n\\t\\n\\tRETURN (ds & newRecs);\\nEND; \\n\\nnewDS := genRow(initialDS,5);\\nOUTPUT(newDS);
\\nAnd here's yet another way that uses a DATASET from TRANSFORM to add the additional records (even simpler than NORMALIZE):numStruct := record\\n integer4 SNo;\\n integer4 Num;\\n integer4 NumSum;\\nend;\\n\\ninitialDS := DATASET([{1,0,0}, {2,1,1}], numStruct);\\n\\ngenRow(DATASET(numStruct) ds, INTEGER NumToAdd) := FUNCTION\\n ctr := COUNT(ds);\\n NewDS := DATASET(NumToAdd,\\n TRANSFORM(numStruct,\\n SELF.Sno := ctr + COUNTER;\\n newNum := random() % 10;\\n SELF.Num := newNum;\\n SELF.NumSum := ds[ctr].NumSum + newNum));\\t \\n RETURN (ds & newDS);\\nEND; \\n\\nnewDS := genRow(initialDS,5);\\nOUTPUT(newDS);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-06-25 14:30:19\" },\n\t{ \"post_id\": 5964, \"topic_id\": 1365, \"forum_id\": 10, \"post_subject\": \"Re: Failed to receive reply from thor\", \"username\": \"tlhumphrey2\", \"post_text\": \"There are some problems with LOOP with earlier versions of the HPCC platform. First, this probably works on hthor.\\n\\nWhat version of HPCC Platform do you have?\", \"post_time\": \"2014-06-25 13:20:36\" },\n\t{ \"post_id\": 5961, \"topic_id\": 1365, \"forum_id\": 10, \"post_subject\": \"Re: Failed to receive reply from thor\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"Bob,\\n\\nI put this in a loop since I want to generate multiple such rows dynamically; loopcounter specifies this number.\\n\\nI tried executing the code in hThor like you mentioned, and it worked (even with the loop)! But, it fails in thor. What could be the reason for this behavior?\\n\\nRegards,\\nGayathri\", \"post_time\": \"2014-06-25 06:24:59\" },\n\t{ \"post_id\": 5953, \"topic_id\": 1365, \"forum_id\": 10, \"post_subject\": \"Re: Failed to receive reply from thor\", \"username\": \"rtaylor\", \"post_text\": \"Gayathri,\\n\\nYour code works fine for me on hthor. \\n\\nBut, is there any reason you can't just do it this way?numStruct := record\\n integer4 SNo;\\n integer4 Num;\\n integer4 NumSum;\\nend;\\n\\ninitialDS := dataset([{1,0,0}, {2,1,1}], numStruct);\\n\\nctr := count(initialDS);\\nnewNum := random() % 10;\\nnewSum := initialDS[ctr].NumSum + newNum;\\nnewDS := initialDS + ROW({ctr+1,newNum,newSum},numStruct);\\noutput(newDS);
\\nUsually, in ECL, the simple approach is best.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-06-24 15:47:59\" },\n\t{ \"post_id\": 5950, \"topic_id\": 1365, \"forum_id\": 10, \"post_subject\": \"Re: Failed to receive reply from thor\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"Bob,\\n\\nI modeled mine after the 'BuildAnagrams' sample shared with ECL IDE installation - this uses a function too, as loopbody. \\n\\nRegards,\\nGayathri\", \"post_time\": \"2014-06-24 10:49:49\" },\n\t{ \"post_id\": 5949, \"topic_id\": 1365, \"forum_id\": 10, \"post_subject\": \"Re: Failed to receive reply from thor\", \"username\": \"bforeman\", \"post_text\": \"Hi Gayathri,\\n\\nI guess you need to find a different approach. I can verify your behavior, but I'm not sure the your function is the correct form of a "loopbody". The docs clearly state:\\n\\nThe operation to iteratively perform. This may be a PROJECT, JOIN, or other such operation. ROWS(LEFT) is always used as the operation's first parameter, indicating the specified dataset is the input parameter.
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-06-24 10:35:34\" },\n\t{ \"post_id\": 5939, \"topic_id\": 1365, \"forum_id\": 10, \"post_subject\": \"Failed to receive reply from thor\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"Is there a problem with the following function I wrote to generate a new row on the fly? I have intentionally kept the loop counter as 1.\\n\\n\\nnumStruct := record\\n\\tinteger4 SNo;\\n\\tinteger4 Num;\\n\\tinteger4 NumSum;\\nend;\\n\\n\\tinitialDS := dataset([{1,0,0}\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t, {2,1,1}]\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t, numStruct);\\n\\n\\nnumStruct genRow(dataset(numStruct) ds) := function\\n\\tctr := count(ds);\\n\\t\\n\\tnewNum := random() % 10;\\n\\tnewSum := ds[ctr].NumSum + newNum;\\n\\n\\tnewRec := dataset([{ctr+1,newNum,newSum}]\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t,numStruct);\\n\\n\\treturn (ds + newRec);\\n\\t\\nend;\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\nnewDS := loop(initialDS, 1, genRow(rows(left)));\\noutput(newDS);\\n
\\n\\nWhen I try to execute this, I get the following error:\\n\\nError: System error: -1: Failed to receive reply from thor...; (-1, Failed to receive reply from thor ...) (0, 0), -1, \\n\\nIf I pull the function code outside and execute it with loop commented, it works fine. What could be causing this problem?\\n\\nRegards,\\nGayathri\", \"post_time\": \"2014-06-23 10:48:44\" },\n\t{ \"post_id\": 6002, \"topic_id\": 1375, \"forum_id\": 10, \"post_subject\": \"Re: Grouping\", \"username\": \"KatyChow\", \"post_text\": \"Thank you. I tried the SUM(Group,If(x,y,z)); method and it worked wonderfully!\", \"post_time\": \"2014-07-01 13:15:26\" },\n\t{ \"post_id\": 5999, \"topic_id\": 1375, \"forum_id\": 10, \"post_subject\": \"Re: Grouping\", \"username\": \"ghalliday\", \"post_text\": \"With a similar change to the definition of H.\\n\\nThe SUM/COUNT etc. values are only fully evaluated when the aggregate is complete - so they can't be enclosed in an expression which includes fields from the input file - unless those fields are part of the grouping expressions.\", \"post_time\": \"2014-07-01 09:31:11\" },\n\t{ \"post_id\": 5998, \"topic_id\": 1375, \"forum_id\": 10, \"post_subject\": \"Re: Grouping\", \"username\": \"ghalliday\", \"post_text\": \"I think by\\n\\nDECIMAL5_2 N := IF(h= '00' OR h= '01', SUM(GROUP,e),0);\\n\\nYou probably mean \\n\\nDECIMAL5_2 N := SUM(GROUP,IF(h= '00' OR h= '01', e, 0)));\\n\\nor even\\n\\nDECIMAL5_2 N := SUM(GROUP,e, (h= '00' OR h= '01'))); // filtered sum syntax\\n\\ni.e. Add up a set of values which are either e if a condition is true, or 0 if it is false, where the condition is based on some fields in your input dataset.\", \"post_time\": \"2014-07-01 09:27:08\" },\n\t{ \"post_id\": 5997, \"topic_id\": 1375, \"forum_id\": 10, \"post_subject\": \"Re: Grouping\", \"username\": \"bforeman\", \"post_text\": \"Hi Katy,\\n\\nWithout seeing what line is posting the error, it's tough to analyze based on your example, but I would try to break out your grouping for each individual field, and then possibly PROJECT the TABLE result and then conditionally filter out what you are trying to do in the PROJECT TRANSFORM. The compiler is telling you that the GROUPING cannot be done on your target field based on your expression. \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-07-01 07:24:02\" },\n\t{ \"post_id\": 5994, \"topic_id\": 1375, \"forum_id\": 10, \"post_subject\": \"Grouping\", \"username\": \"KatyChow\", \"post_text\": \"Hello HPCC community,\\n\\nI am trying to create a table that needs to group by 3 different criteria. I also need to create a lot of different new columns within this table. I keep on getting a "Field [specific field name here] in TABLE does not appear to be properly defined by grouping conditions. Could someone clarify how I should be writing my code so that it can work?\\n\\nex. \\n\\nAttrTable := TABLE(Attr0(a<> 0), {a; b; c;\\nUNSIGNED6 T := SecondsApart(MAX(GROUP,d),MIN(GROUP,d));\\nDECIMAL5_2 M := SUM(GROUP,e);\\nDECIMAL5_2 N := IF(h= '00' OR h= '01', SUM(GROUP,e),0);\\nDECIMAL5_2 H := IF(x >= 55, SUM(GROUP,y),0) + IF(z*3600 >= 55, SUM(GROUP,w),0);},\\na, b, c);\\n\\nWhat is the correct way to do this?\", \"post_time\": \"2014-06-30 20:38:57\" },\n\t{ \"post_id\": 6009, \"topic_id\": 1377, \"forum_id\": 10, \"post_subject\": \"Re: Good method to find specific letters in strings?\", \"username\": \"KatyChow\", \"post_text\": \"Thanks Bob!\", \"post_time\": \"2014-07-02 13:02:41\" },\n\t{ \"post_id\": 6008, \"topic_id\": 1377, \"forum_id\": 10, \"post_subject\": \"Re: Good method to find specific letters in strings?\", \"username\": \"bforeman\", \"post_text\": \"Hi Katy,\\n\\nCheck out the String Libraries in the Standard Library Reference PDF. There are quite a few functions that you can use, like Find and StartsWith and EndsWith.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-07-02 07:39:12\" },\n\t{ \"post_id\": 6004, \"topic_id\": 1377, \"forum_id\": 10, \"post_subject\": \"Good method to find specific letters in strings?\", \"username\": \"KatyChow\", \"post_text\": \"Hello,\\n\\nI am trying to filter out bad data that has a format which either starts with 0- or ends with FVD. The only issue with looking for the values with FVD is that they are not in the same spot for every string. \\nex.\\n0-100582642FVD \\n0-100583049FVD \\n0-100585097FVD \\n0-1006009514FVD\\n0-100615860FVD \\n0-1006664170FVD\\n0-100696925FVD \\n0-100697966FVD \\n\\nIf I want to use a filter that looks for the FVD on my data set but the string length is different what should I do? Is there a general command I can just call or do I need to make several different filters for the string length issue?\", \"post_time\": \"2014-07-01 15:00:02\" },\n\t{ \"post_id\": 6140, \"topic_id\": 1401, \"forum_id\": 10, \"post_subject\": \"Re: An odd thing about REGEXREPLACE\", \"username\": \"xiaolou\", \"post_text\": \"[quote="rtaylor":cbybhncr]xiaolou,\\n\\nOK, I don't understand what the problem is here. When I run your code I get \\n<1><2><3><4> \\nas the first result and \\n1234 \\nas the second result, which is what I thought you wanted from the way I read your post.\\n\\nSo, what's the issue?\\n\\nRichard\\nRichard, \\nThe second expected result should be '1234', the half-width characters.\\n\\nxiaolou.\", \"post_time\": \"2014-07-30 00:51:21\" },\n\t{ \"post_id\": 6136, \"topic_id\": 1401, \"forum_id\": 10, \"post_subject\": \"Re: An odd thing about REGEXREPLACE\", \"username\": \"rtaylor\", \"post_text\": \"xiaolou,\\n\\nOK, I don't understand what the problem is here. When I run your code I get \\n<1><2><3><4> \\nas the first result and \\n1234 \\nas the second result, which is what I thought you wanted from the way I read your post.\\n\\nSo, what's the issue?\\n\\nRichard\", \"post_time\": \"2014-07-29 19:41:18\" },\n\t{ \"post_id\": 6133, \"topic_id\": 1401, \"forum_id\": 10, \"post_subject\": \"An odd thing about REGEXREPLACE\", \"username\": \"xiaolou\", \"post_text\": \"Hi, \\nI try to use REGEXREPLACE to process some unicode text, and find an odd thing.\\nFollowing code works fine, it gives me expected result '<1><2><3><4>'.\\nUNICODE Fn_Test1(UNICODE input) :=FUNCTION\\n result:=u'<'+input+u'>';\\n\\t\\tRETURN result;\\nEND;\\n\\noutput(REGEXREPLACE(u'[1234567890]',u'1234',Fn_Test1(u'$0')));
\\n\\nHowever, following code can not give me the expected result '1234'.\\nUNICODE Fn_Test2(UNICODE input) :=FUNCTION\\n result:=CASE(input,u'0'=>u'0',\\n\\t\\t u'1'=>u'1',\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t u'2'=>u'2',\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t u'3'=>u'3',\\n\\t\\t\\t\\t\\t\\t\\t u'4'=>u'4',\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t u'5'=>u'5',\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t u'6'=>u'6',\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t u'7'=>u'7',\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t u'8'=>u'8',\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t u'9'=>u'9',\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t input);\\n\\t\\tRETURN result;\\nEND;\\n\\noutput(REGEXREPLACE(u'[1234567890]',u'1234',Fn_Test2(u'$0')));
\\n\\nWhy is this? And how to fix this?\", \"post_time\": \"2014-07-29 15:33:06\" },\n\t{ \"post_id\": 6185, \"topic_id\": 1403, \"forum_id\": 10, \"post_subject\": \"Re: No access to Dali despite NOTHOR and actions in transfor\", \"username\": \"gmarcan\", \"post_text\": \"Hi Richard,\\n\\nThank you for the tip. "Apply" is exactly what I was looking for and it works perfectly. I still have to use NOTHOR, but the code runs on the cluster now with no Dali errors. Here is my updated code:\\n\\n\\ntagSubFiles(string superFile,string newTag) := FUNCTION \\n\\t\\t\\t\\tsubFileNames := fileservices.SuperFileContents(superFile);\\n\\t\\t\\t\\treturn apply(subFileNames,fileservices.SetFileDescription( '~'+Name,newTag));\\n\\t\\t\\t\\tEND;\\n\\nNOTHOR(tagSubFiles(stringFileName,stringNewTag));\\n
\\n\\nThanks again.\\n\\nRegards,\\n\\nGabriel\", \"post_time\": \"2014-08-06 14:26:26\" },\n\t{ \"post_id\": 6149, \"topic_id\": 1403, \"forum_id\": 10, \"post_subject\": \"Re: No access to Dali despite NOTHOR and actions in transfor\", \"username\": \"rtaylor\", \"post_text\": \"Gabriel,\\n\\nHave you tried using APPLY, something like this?ds := STD.file.SuperFileContents(archiveSuperFile);\\nAPPLY(ds(STD.file.GetFileDescription( '~'+ds.name)=''),\\n STD.file.SetFileDescription( '~'+ds.name , newTag));
That approach may work for you.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-07-31 20:33:11\" },\n\t{ \"post_id\": 6148, \"topic_id\": 1403, \"forum_id\": 10, \"post_subject\": \"Re: No access to Dali despite NOTHOR and actions in transfor\", \"username\": \"gmarcan\", \"post_text\": \"Hi Richard,\\n\\nWhen I leave out the when, I get a check error:\\n\\nError: syntax error near "assignNewTag" : expected < (12, 69), 3002, \\n\\nI assumed this is because the function has no return value and is hence treated as an action? That is why I used the "when" function, which is the only way I found to push the action through while preforming the record level transform.\\n\\nRegards,\\n\\n- Gabriel\", \"post_time\": \"2014-07-31 20:14:53\" },\n\t{ \"post_id\": 6147, \"topic_id\": 1403, \"forum_id\": 10, \"post_subject\": \"Re: No access to Dali despite NOTHOR and actions in transfor\", \"username\": \"rtaylor\", \"post_text\": \"Gabriel,\\n\\nI'm frankly surprised that your code syntax checks, since your use of WHEN is not what WHEN was intended for.\\n\\nWhat happens if you simply remove the WHEN and make the line this: SELF.description := if (tempDescription != '',tempDescription,assignNewTag);
That's the way I would start writing the code.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-07-31 20:04:36\" },\n\t{ \"post_id\": 6146, \"topic_id\": 1403, \"forum_id\": 10, \"post_subject\": \"Re: No access to Dali despite NOTHOR and actions in transfor\", \"username\": \"gmarcan\", \"post_text\": \"Hi Bob. Thanks for replying. I get the same error if I leave out the NOTHOR\\n\\n- Gabriel\", \"post_time\": \"2014-07-31 19:52:02\" },\n\t{ \"post_id\": 6145, \"topic_id\": 1403, \"forum_id\": 10, \"post_subject\": \"Re: No access to Dali despite NOTHOR and actions in transfor\", \"username\": \"bforeman\", \"post_text\": \"What happens if you remove the NOTHOR directives? Why were you using them in the first place?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-07-31 19:37:17\" },\n\t{ \"post_id\": 6141, \"topic_id\": 1403, \"forum_id\": 10, \"post_subject\": \"No access to Dali despite NOTHOR and actions in transforms\", \"username\": \"gmarcan\", \"post_text\": \"Hi,\\n\\nPlease see code below. What I am trying to do is change the logical description of files in a superfile if a description does not already exist\\n\\n\\nprepDate := '20140730';\\nnewTag := 'process_date '+prepDate;\\ntagUntaggedArchivedFile (string rawSuperFile) := FUNCTION \\n\\t\\t\\t archiveSuperFile := rawSuperFile+'_archive';\\n\\t\\t\\t\\tsubFileNames := nothor(fileservices.SuperFileContents(archiveSuperFile));\\n\\t\\t\\t\\t{subFileNames, string description :=''} tUpdateDescription(subFileNames L) := TRANSFORM\\n\\t\\t\\t\\t\\n\\t\\t\\t\\t\\t\\ttempDescription := trim(fileservices.GetFileDescription( '~'+L.name));\\n\\t\\t\\t\\t\\t\\tassignNewTag := nothor(fileservices.SetFileDescription( '~'+L.name , newTag));\\n\\t\\t\\t\\t\\t\\t\\n\\t\\t\\t\\t\\t\\tSELF.description := if (tempDescription != '',tempDescription,when('',assignNewTag));\\n\\t\\t\\t\\t\\t\\tSELF := L;\\n\\t\\t\\t\\t\\t\\t\\n\\t\\t\\t\\tEND;\\n\\t\\t\\t\\treturn output(nothor(project(subFileNames,tUpdateDescription(LEFT))));\\n\\t\\t\\tEND;
\\n\\nThe code works when I use hthor, but when I try to run it on a cluster - I get a "No access to Dali" error. As you can see, my code already uses the NOTHOR directive. I suspect the issue is related to the "when" function, but I don't know.\\n\\nIn addition, although the "when" function does the job of changing the file description of each filename in the recordset, I get a sense this might not be the best solution. In that respect, how do I preform an action on each record (or field within a record)? (in a transform or otherwise)?\\n\\nThank you.\", \"post_time\": \"2014-07-30 15:59:36\" },\n\t{ \"post_id\": 6161, \"topic_id\": 1407, \"forum_id\": 10, \"post_subject\": \"Re: Moving mass files to another cluster\", \"username\": \"gouldbrfl\", \"post_text\": \"Thanks\", \"post_time\": \"2014-08-04 19:07:47\" },\n\t{ \"post_id\": 6160, \"topic_id\": 1407, \"forum_id\": 10, \"post_subject\": \"Re: Moving mass files to another cluster\", \"username\": \"rtaylor\", \"post_text\": \"Mike,\\n\\nNone that I know of. You could put in a JIRA request for a logical file function that does the same rename and delete sequence your ECL code is doing.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-08-04 18:40:52\" },\n\t{ \"post_id\": 6159, \"topic_id\": 1407, \"forum_id\": 10, \"post_subject\": \"Re: Moving mass files to another cluster\", \"username\": \"gouldbrfl\", \"post_text\": \"Thanks,\\n\\nIs there any way to handle something like a transaction for logical files? If either of the renames don't work, then the I do not want to delete.\\n\\nBest Regards\\n\\nMichael Gould\", \"post_time\": \"2014-08-04 18:00:38\" },\n\t{ \"post_id\": 6158, \"topic_id\": 1407, \"forum_id\": 10, \"post_subject\": \"Re: Moving mass files to another cluster\", \"username\": \"rtaylor\", \"post_text\": \"Mike,\\n\\nYou can get rid of that error by just removing the calls to StartSuperfileTransaction() and FinishSuperfileTransaction() because all you're doing in that "transaction frame" is calling the RenameLogicalFile() function, which has nothing to do with superfiles.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-08-04 17:57:40\" },\n\t{ \"post_id\": 6156, \"topic_id\": 1407, \"forum_id\": 10, \"post_subject\": \"Moving mass files to another cluster\", \"username\": \"gouldbrfl\", \"post_text\": \"CopytoCluster(string filename) := Function\\t\\t\\t\\t\\t \\n \\t\\t\\t\\t\\t\\t\\t\\t\\t\\t serv := 'server=http:IP address goes here';\\n nsplit := ' nosplit=1 ';\\n dstcluster := 'dstcluster=clustername goes here';\\n over := 'overwrite=1 ';\\n repl := 'replicate=1 ';\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tcomp :='compress=1 ';\\n action := 'action=copy ';\\n wrap := 'wrap=1 ';\\n \\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tsrcname := 'srcname=~'+filename + ' ';\\n dstname := 'dstname=~'+filename + '_copy ';\\n srcdali := 'srcdali=daili IP goes here';\\n copyfilecmd := serv + over + repl + action + dstcluster + dstname + srcname + nsplit + wrap + comp + srcdali;\\n \\n output(copyfilecmd);\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\treturn Sequential(\\n STD.File.DfuPlusExec(copyfilecmd)\\n ,nothor(STD.File.StartSuperFileTransaction())\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t,nothor(STD.File.RenameLogicalFile(filename, filename + '_old'))\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t,nothor(STD.File.RenameLogicalFile(filename + '_copy', filename))\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t,nothor(STD.File.DeleteLogicalFile(filename + '_old'))\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t,nothor(STD.File.FinishSuperFileTransaction())\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t );\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n END;\\n\\t\\t\\t\\t\\t\\t \\n InRec := RECORD\\n BOOLEAN superfile;\\n UNSIGNED8 size;\\n UNSIGNED8 rowcount;\\n STRING19 modified;\\n STRING owner;\\n STRING cluster;\\nEND;\\n\\n OutRec := Record\\nSTRING name;\\nEND;\\n\\n\\nds := STD.File.LogicalFileList('*::in::*',TRUE,FALSE);\\n\\nds1 := ds(cluster <> 'cluster name goes here');\\n\\napply(ds1,CopytoCluster(name));\\n\\nHere is the error that I get\\n\\nError: Cannot call function startsuperfiletransaction in a non-global context (47, 1), 4055, \\n\\nI need to rename the files once they are copied to the new cluster. How would I go about that. Do I put the apply in a Sequential with the STD.File commands?\\n\\nBest Regards\\n\\nMichael Gould\", \"post_time\": \"2014-08-04 13:38:45\" },\n\t{ \"post_id\": 6196, \"topic_id\": 1413, \"forum_id\": 10, \"post_subject\": \"Re: Define a PATTERN by an UNICODE parameter\", \"username\": \"xiaolou\", \"post_text\": \"[quote="bforeman":inzernjr]Your code looks good to me, I would suggest that you open an issue in the Issue Tracker so that the development team can get a more detailed look at the Syntax Error.\\n\\nhttps://track.hpccsystems.com/secure/Dashboard.jspa\\n\\nRegards,\\n\\nBob\\n\\nOk, I have created a issue . https://track.hpccsystems.com/browse/IDE-376\\n\\nThanks, Bob.\", \"post_time\": \"2014-08-08 01:37:35\" },\n\t{ \"post_id\": 6191, \"topic_id\": 1413, \"forum_id\": 10, \"post_subject\": \"Re: Define a PATTERN by an UNICODE parameter\", \"username\": \"bforeman\", \"post_text\": \"Your code looks good to me, I would suggest that you open an issue in the Issue Tracker so that the development team can get a more detailed look at the Syntax Error.\\n\\nhttps://track.hpccsystems.com/secure/Dashboard.jspa\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-08-07 12:05:25\" },\n\t{ \"post_id\": 6184, \"topic_id\": 1413, \"forum_id\": 10, \"post_subject\": \"Re: Define a PATTERN by an UNICODE parameter\", \"username\": \"xiaolou\", \"post_text\": \"[quote="bforeman":yby43rpb]However, when I passed the "regex" parameter by a UNICODE definition name as following code , I got a syntax error.
\\n\\nAaron, did you post the syntax error? Is using the "u" constant valid in a PATTERN?\\n\\nI found this in our documentation:\\n\\nEscape sequences can be used to define UNICODE Character ranges. The encoding is UTF-16 Big Endian. \\nFor example:\\nPATTERN AnyChar := PATTERN(U'[\\\\u0001-\\\\u7fff]');\\n
\\n\\nRegards,\\n\\nBob\\n\\nBob, thanks for your reply.\\nI think using 'u' is okay in a PATTERN, because following code works fine.\\n\\nfn_test(input,regex) :=FUNCTIONMACRO\\n PATTERN p := PATTERN( regex ); \\n RETURN PARSE(DATASET([{input}],{unicode c}),c,p,{INTEGER4 match_position := MATCHPOSITION(p),UNICODE match_text :=MATCHUNICODE(p)}, MAX ,MANY ,BEST);\\nENDMACRO;\\n\\ncontent:=u'健康増進法に規定する健康増進事業実施者として、「健康増進事業実施者に対する健康診査の実施等に関する指針」(平成16年厚生労働省告示第242号)や、「健康保険法に基づく保健事業の実施等に関する指針」(平成16年厚生労働省告示第308号)に基づき、被保険者等の健康の保持増進のための健康教育・健康相談・健康診査等の事業を積極的に実施するとともに、専門スタッフを活用した保健指導や健康づくりに取り組むこと。';\\noutput(fn_test(content,u'(健康増進法|健康保険法|保険法)'));\\n
\\n\\nThe problem is I don't known how to define a PATTERN by a existing STRING or UNICODE definition.\\n\\nBy the way, here is the syntax error .\\n\\nError: syntax error near "names" : expected string, unicode-string \\n
\", \"post_time\": \"2014-08-06 13:33:31\" },\n\t{ \"post_id\": 6183, \"topic_id\": 1413, \"forum_id\": 10, \"post_subject\": \"Re: Define a PATTERN by an UNICODE parameter\", \"username\": \"bforeman\", \"post_text\": \"However, when I passed the "regex" parameter by a UNICODE definition name as following code , I got a syntax error.
\\n\\nAaron, did you post the syntax error? Is using the "u" constant valid in a PATTERN?\\n\\nI found this in our documentation:\\n\\nEscape sequences can be used to define UNICODE Character ranges. The encoding is UTF-16 Big Endian. \\nFor example:\\nPATTERN AnyChar := PATTERN(U'[\\\\u0001-\\\\u7fff]');\\n
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-08-06 12:47:23\" },\n\t{ \"post_id\": 6173, \"topic_id\": 1413, \"forum_id\": 10, \"post_subject\": \"Define a PATTERN by an UNICODE parameter\", \"username\": \"xiaolou\", \"post_text\": \"Hi,\\n\\nI defined a FUNCTIONMACRO to get all matched position and text of an UNICODE content for a specific regular expression . \\nfn_test(input,regex) :=FUNCTIONMACRO\\n PATTERN p := PATTERN( regex );\\t\\n\\t\\tRETURN PARSE(DATASET([{input}],{unicode c}),c,p,{INTEGER4 match_position := MATCHPOSITION(p),UNICODE match_text :=MATCHUNICODE(p)}, MAX ,MANY ,BEST);\\nENDMACRO;
\\nFollowing code works fine.\\ncontent:=u'健康増進法に規定する健康増進事業実施者として、「健康増進事業実施者に対する健康診査の実施等に関する指針」(平成16年厚生労働省告示第242号)や、「健康保険法に基づく保健事業の実施等に関する指針」(平成16年厚生労働省告示第308号)に基づき、被保険者等の健康の保持増進のための健康教育・健康相談・健康診査等の事業を積極的に実施するとともに、専門スタッフを活用した保健指導や健康づくりに取り組むこと。';\\noutput(fn_test(content,u'(健康増進法|健康保険法|保険法)'));
\\n\\nHowever, when I passed the "regex" parameter by a UNICODE definition name as following code , I got a syntax error.\\nUNICODE names :=u'(健康増進法|健康保険法|保険法)';\\noutput(fn_test(content,names));
\\n\\nHow to correct this?\\n\\nAaron.\", \"post_time\": \"2014-08-05 13:21:30\" },\n\t{ \"post_id\": 6192, \"topic_id\": 1415, \"forum_id\": 10, \"post_subject\": \"Accessing WS-ECL Services from .Net\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"In the past, I have successfully called a WS-ECL service from .Net (I used the VM, of course, and didn't have to give my credentials while connecting to WS-ECL through browser or from .Net). \\n\\nBut now, when I try the same in a client setup (where I give credentials to connect to ECL Watch or WS-ECL through browser), while adding the service reference in .Net, the authentication window repeatedly pops up - once with 'http://.... servicename' and the next time with 'http://... servicename/_vti_bin/..$metadata'. It doesn't get past this even after giving the same credentials I use to connect to WS-ECL in browser. \\n\\nIs this due to some security configuration in the installation? Or, could I be doing something wrong?\\n\\nRegards,\\nGayathri\", \"post_time\": \"2014-08-07 12:35:25\" },\n\t{ \"post_id\": 6244, \"topic_id\": 1426, \"forum_id\": 10, \"post_subject\": \"Re: Compile/Link failed for embedded java program\", \"username\": \"JimD\", \"post_text\": \"It sounds like you installed an RPM version without plugin support.\\n\\nFor RPM based systems, there are two different installation packages available. One package includes the optional plug-ins to support embedded code from other languages, such as JAVA, JavaScript, R, or Python.\\n\\nIf you want support for other languages, choose the package for your distro that begins with:\\n\\nhpccsystems-platform_community-with-plugins-\\n\\nYou must install the packages that have the plug-ins using the --nodeps option. \\n\\nThen you will have to install the dependencies for each language you wish to support. The dependencies to support each language are installed separately. \\n\\nHope this helps,\\n\\nJim\", \"post_time\": \"2014-08-20 20:05:50\" },\n\t{ \"post_id\": 6242, \"topic_id\": 1426, \"forum_id\": 10, \"post_subject\": \"Re: Compile/Link failed for embedded java program\", \"username\": \"bforeman\", \"post_text\": \"I remember seeing this checklist on the web site:\\n\\nhttps://wiki.hpccsystems.com/display/hpcc/Java+Integration\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-08-20 18:01:06\" },\n\t{ \"post_id\": 6240, \"topic_id\": 1426, \"forum_id\": 10, \"post_subject\": \"Compile/Link failed for embedded java program\", \"username\": \"littlewhite\", \"post_text\": \"Hi,\\n\\nWhen I tried to compile a java embedded ecl code, it reports the following error.\\n\\neclcc\\tunknown: 3118: Mismatch in subminor version number (4.2.2 v 4.2.4)\\neclcc\\t0:\\neclcc\\t0: ---------- compiler output --------------\\neclcc\\t0: /usr/bin/ld: cannot find -ljavaembed\\neclcc\\t0: collect2: ld returned 1 exit status\\neclcc\\t0:\\neclcc\\t0: --------- end compiler output -----------\\n\\nDoes it mean the library of java embed is missing? Where is the directory I can check for it?\\n\\nThank you in advance.\", \"post_time\": \"2014-08-19 19:20:23\" },\n\t{ \"post_id\": 6248, \"topic_id\": 1427, \"forum_id\": 10, \"post_subject\": \"Re: Index Error\", \"username\": \"David Dasher\", \"post_text\": \"Hi Bob\\n\\nMany thanks for the reply, I'll take a look.\\n\\nRegards\\n\\nDavid\", \"post_time\": \"2014-08-22 14:11:29\" },\n\t{ \"post_id\": 6247, \"topic_id\": 1427, \"forum_id\": 10, \"post_subject\": \"Re: Index Error\", \"username\": \"bforeman\", \"post_text\": \"Hi David,\\n\\nYou will need to build a standard index that contains a document id and byte pointer to the document record, and then use FETCH to retrieve the actual document information. \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-08-22 12:57:18\" },\n\t{ \"post_id\": 6246, \"topic_id\": 1427, \"forum_id\": 10, \"post_subject\": \"Index Error\", \"username\": \"David Dasher\", \"post_text\": \"Hello\\n\\nWe have some text columns that are pretty big that I need in an index. \\n\\nI am getting the following error building \\nError: System error: 0: Graph[1609], SLAVE 10.10.0.9:20900: Graph[1609], indexwrite[1612]: key+payload (5107) exceeds max length (4096), (0, 0), 0, \\n\\nWhat is the best strategy for building indexes with large text fields from documents?\\n\\nRegards\\n\\nDavid\", \"post_time\": \"2014-08-21 13:01:36\" },\n\t{ \"post_id\": 6266, \"topic_id\": 1430, \"forum_id\": 10, \"post_subject\": \"Re: Spraying Mulitple Xml from Flat file\", \"username\": \"pius_francis\", \"post_text\": \"Thanks a lot \", \"post_time\": \"2014-09-02 13:02:14\" },\n\t{ \"post_id\": 6265, \"topic_id\": 1430, \"forum_id\": 10, \"post_subject\": \"Re: Spraying Mulitple Xml from Flat file\", \"username\": \"bforeman\", \"post_text\": \"Hi Pius,\\n\\nI think that the best course would be to spray the flat file as a single Delimited file to the cluster, and then extract the XML streams using PARSE and simply OUTPUT each record to new XML files as needed. In other words, use ECL to get the XML that you need.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-09-02 12:18:36\" },\n\t{ \"post_id\": 6264, \"topic_id\": 1430, \"forum_id\": 10, \"post_subject\": \"Spraying Mulitple Xml from Flat file\", \"username\": \"pius_francis\", \"post_text\": \"Hi,\\n I have a flat file which has 'n' number of xml files in side it. Can we spray that flat file into individual xml's? Please help me out regarding the same.\\n\\nThanks,\\nPius\", \"post_time\": \"2014-09-01 08:39:54\" },\n\t{ \"post_id\": 6294, \"topic_id\": 1441, \"forum_id\": 10, \"post_subject\": \"Re: Dataset not Active Error\", \"username\": \"rtaylor\", \"post_text\": \"Katy,\\n\\nCan you post your code, please?\\n\\nRichard\", \"post_time\": \"2014-09-11 20:11:32\" },\n\t{ \"post_id\": 6292, \"topic_id\": 1441, \"forum_id\": 10, \"post_subject\": \"Dataset not Active Error\", \"username\": \"KatyChow\", \"post_text\": \"Hi,\\n\\nDoes anyone know why I will write a join and it will output, but when I call it in a table it says my dataset is not active?\\n\\nThanks!\\n\\nKaty Chow\", \"post_time\": \"2014-09-11 16:14:29\" },\n\t{ \"post_id\": 6425, \"topic_id\": 1442, \"forum_id\": 10, \"post_subject\": \"Re: JOIN LHS not in sorted order\", \"username\": \"srbhkmr\", \"post_text\": \"In case it helps I've added ecl compiler logfiles for the two scenarios:\\n\\nWhen LINE:2 is uncommented, the log file shows that PROJECT [actualtable] has been spotted as an item to be hoisted whereas, in other case there is no such decision being made.\", \"post_time\": \"2014-10-13 11:41:46\" },\n\t{ \"post_id\": 6411, \"topic_id\": 1442, \"forum_id\": 10, \"post_subject\": \"Re: JOIN LHS not in sorted order\", \"username\": \"srbhkmr\", \"post_text\": \"Hi Bob, Thanks for the reply. That issue on tracker that you mentioned is a little different. the problem I have is as following:\\nFor the undermentioned ECL code I get a kind of weird behaviour and finally an ERROR saying: "JOIN LHS not in sorted order".\\n\\n\\n
\\ng := DATASET(somefile1, somefile1_layout,CSV(HEADING(1), QUOTE('"')));\\n\\nM24_test_ := DATASET(masterTable24x7_testSet, masterTable_24x7_layout, CSV(HEADING(1)));\\nM24_test := M24_test_(col1 <> 0);\\n\\nJ := JOIN(M24_test, g, LEFT.col1 = RIGHT.col1, TRANSFORM(RECORDOF(LEFT) OR RECORDOF(RIGHT), SELF := LEFT; SELF := RIGHT;), SMART);\\n\\nactualTable_ := TABLE(J, {subs_id, catId}, subs_id, catId, UNSORTED, MERGE);\\nactualTable := PROJECT(actualTable_, TRANSFORM(mapping_layout, \\nSELF.id := LEFT.subs_id; SELF.label := LEFT.catId));\\n\\nOUTPUT(actualTable, NAMED('actualTable')); //LINE: 1\\n\\nrecommTable := DATASET(recommTableFile, mapping_layout, CSV(HEADING(1)));\\n\\nUNSIGNED4 total_population := COUNT(TABLE(M24_test, {subs_id}, subs_id, UNSORTED, MERGE));\\n\\nrecomm := recommTable;\\nactual := actualTable;\\nN := total_population;\\n \\nlabels := TABLE(recomm, {label}, label, UNSORTED, MERGE);\\n \\nR := PROJECT(labels, TRANSFORM(contMatrix_layout,\\n SELF.label := LEFT.label;\\n SELF.total_recommendations := COUNT( recomm(label=LEFT.label) );\\n SELF.tp := COUNT( JOIN(recomm(label=LEFT.label), actual, LEFT.id = RIGHT.id and LEFT.label = RIGHT.label, TRANSFORM(LEFT)));\\n SELF.fp := COUNT(recomm(label=LEFT.label)) - SELF.tp ;\\n SELF.fn := COUNT( (actual - recomm)(label=LEFT.label) ); //LINE: 2\\n SELF.tn := N - SELF.tp - SELF.fp - SELF.fn;\\n )\\n );\\nPROJECT(R, TRANSFORM(RECORDOF(R),\\n SELF.prec := LEFT.tp/(LEFT.tp + LEFT.fp);\\n SELF.rec := LEFT.tp/(LEFT.tp + LEFT.fn);\\n SELF.mcc := (LEFT.tp * LEFT.tn - LEFT.fp * LEFT.fn) / sqrt(LEFT.tp+LEFT.fn) / sqrt(LEFT.fp + LEFT.tn) / sqrt(LEFT.tp + LEFT.fp) / sqrt(LEFT.fn + LEFT.tn);\\n SELF.segment_lift := (LEFT.tp / COUNT(recomm(label=LEFT.label))) / ((COUNT(actual(label=LEFT.label))) / N);\\n SELF := LEFT;\\n )\\n );\\n
\\n\\nIt took me a while to narrow it down to a minimal code and reproduce it. This code is just a representation of the flow of things happening, but I can also share the exact code and datasets to reproduce this problem.\\n\\nIf I keep the OUTPUT statement on LINE:1 commented then the Workunit works fine and generates proper results, but if I keep it uncommented it complains that: JOIN LHS not in sorted order for the JOIN operation in LINE:2\\n\\nFollowing is the observation on the graphs being generated for two scenarios:\\n\\nCase1: LINE:1 is commented: JOIN at LINE:2 is being instantiated as a global JOIN and works fine.\\nCase2: LINE1 is not commented: JOIN at LINE:2 is being instantiated as a HASH_DISTRIBUTE followed by a LOCAL_JOIN.\\n\\nIt looks quite weird why the engine would take two different approaches to do the JOIN at LINE:2 just because of some OUTPUT statement of one of the participant dataset is on.\\nand even if it decides to do a HashDistribute-LocalJoin I think the environment is not being set up properly as it complains that the LHS is not yet sorted for this operation.\\n\\nCould someone please throw some light on this. I'm using community_5.0.0-3 built on ubuntu 14.04. \\n\\nPFA the logs of MasterNode for the two scenarios.\\n\\n\\nThanks,\", \"post_time\": \"2014-10-09 07:53:04\" },\n\t{ \"post_id\": 6297, \"topic_id\": 1442, \"forum_id\": 10, \"post_subject\": \"Re: JOIN LHS not in sorted order\", \"username\": \"bforeman\", \"post_text\": \"I've seen another report on this ECL file that is still open in the issue tracker, but not sure if it could be related:\\nhttps://track.hpccsystems.com/browse/ML-223\\n\\nIf you change versions back down to 4.X, do you still get the same error?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-09-12 17:58:50\" },\n\t{ \"post_id\": 6296, \"topic_id\": 1442, \"forum_id\": 10, \"post_subject\": \"JOIN LHS not in sorted order\", \"username\": \"srbhkmr\", \"post_text\": \"A Worunit shows me following error:\\n\\neclagent\\t-1: System error: -1: Graph[82], join[86]: SLAVE 192.168.136.205:20500: JOIN LHS not in sorted order,
\\n\\nThe referred JOIN operation is from Associate.ecl (line: 237) from ML library, where I don't see why that particular operation would require the LHS to be in sorted order.\\nInterestingly the same code used to work for an input of smaller dataset for Rule Mining but the same code fails for a bigger dataset. \\n\\nCould anyone throw some light about thsi error message, as why this happens?\\nI'm using community_5.0.0-3 version of HPCC-platform on ubuntu 14.04\\n\\nThanks,\", \"post_time\": \"2014-09-12 11:22:25\" },\n\t{ \"post_id\": 6303, \"topic_id\": 1443, \"forum_id\": 10, \"post_subject\": \"Re: Vertical Slice : TABLE vs PROJECT\", \"username\": \"lpezet\", \"post_text\": \"Thanks a lot!\", \"post_time\": \"2014-09-15 14:16:10\" },\n\t{ \"post_id\": 6302, \"topic_id\": 1443, \"forum_id\": 10, \"post_subject\": \"Re: Vertical Slice : TABLE vs PROJECT\", \"username\": \"rtaylor\", \"post_text\": \"Luc,\\n\\nThe primary difference between the two is age -- that TABLE syntax has been around since day one (about 15 years), while that form of PROJECT was added to the language a couple of years ago.\\n\\nAlso, there are fundamental differences to how the RECORD structure must be defined for these two alternatives, demonstrated by this example code:IMPORT STD;\\n\\nds := DATASET([{'a',1},{'b',2},{'c',3}],{STRING1 Ltr,UNSIGNED1 Nbr});\\n\\nds;\\n\\nNewRec1 := RECORD\\n UNSIGNED1 Nbr;\\n STRING1 Ltr;\\nEND;\\nNewRec2 := RECORD\\n UNSIGNED1 Nbr := ds.Nbr+ 10;\\n STRING1 Ltr := STD.Str.ToUpperCase(ds.Ltr);\\nEND;\\n\\n//TABLE(ds,NewRec1); //sybtax errors\\nPROJECT(ds,NewRec1);\\nTABLE(ds,NewRec2);\\nPROJECT(ds,NewRec2);
\\nNote carefully the difference in result for the last two.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-09-15 13:31:15\" },\n\t{ \"post_id\": 6299, \"topic_id\": 1443, \"forum_id\": 10, \"post_subject\": \"Vertical Slice : TABLE vs PROJECT\", \"username\": \"lpezet\", \"post_text\": \"Hi!\\n\\nIs there more to it in the "Vertical Slice" definition for TABLE?\\nBy that I mean, what's the different between the following 2 snippets?\\n\\nTABLE(People, SomeNewLayout);\\n
\\nand\\n\\nPROJECT(People, SomeNewLayout);\\n
\\n\\nThanks!\\nLuc.\", \"post_time\": \"2014-09-12 22:02:03\" },\n\t{ \"post_id\": 6372, \"topic_id\": 1453, \"forum_id\": 10, \"post_subject\": \"Re: PIPE\", \"username\": \"lpezet\", \"post_text\": \"Perfect! It works!\\n\\nThank you very much.\", \"post_time\": \"2014-09-22 21:34:42\" },\n\t{ \"post_id\": 6371, \"topic_id\": 1453, \"forum_id\": 10, \"post_subject\": \"Re: PIPE\", \"username\": \"bforeman\", \"post_text\": \"Hi Luc,\\n\\nTry adding the relative path to your PIPE command, something like this:\\n\\nPIPE('/var/lib/HPCCSystems/mythor/hpccwhoami', whoami_layout, CSV);
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-09-22 19:27:22\" },\n\t{ \"post_id\": 6334, \"topic_id\": 1453, \"forum_id\": 10, \"post_subject\": \"PIPE\", \"username\": \"lpezet\", \"post_text\": \"Hello!\\n\\nI'm trying to run a command from ECL using PIPE.\\nI created a very simple script called "hpccwhoami":\\n#!/bin/sh\\nhostname="something"\\nip=99.99.99.99\\necho "$hostname,$ip"\\n
\\nI can run it just fine locally (i.e. it has execution mode and for everybody).\\nI deployed it in /var/lib/HPCCSystems/mythor on all my nodes.\\nI then wrote the following ECL code:\\n\\nwhoami_layout := RECORD\\n STRING hostname;\\n STRING ip;\\nEND;\\nPIPE('hpccwhoami', whoami_layout, CSV);\\n
\\nIt compiles just fine, but when submitting it I get the following error:\\n\\nError: System error: 2: Could not run pipe process hpccwhoami (0, 0), 2, \\n
\\n\\nWhat am I doing wrong?\\n\\n\\nThanks!\\nLuc.\", \"post_time\": \"2014-09-18 21:23:08\" },\n\t{ \"post_id\": 6367, \"topic_id\": 1458, \"forum_id\": 10, \"post_subject\": \"Re: Preserving trailing spaces on OUTPUT(CSV...\", \"username\": \"Allan\", \"post_text\": \"Thanks Richard,\\n\\nI've checked with the developer consuming this output and there happy with a trailing 'Z' at the end of every line.\\n\\nBit of a hack, 'But at my back I always hear time's winged chariot hurrying near;'\\n\\nCheers\\n\\nAllan\", \"post_time\": \"2014-09-22 15:22:59\" },\n\t{ \"post_id\": 6366, \"topic_id\": 1458, \"forum_id\": 10, \"post_subject\": \"Re: Preserving trailing spaces on OUTPUT(CSV...\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nOK, then if you really really need to keep those trailing spaces AND produce the result as a CSV file, then you need to construct it yourself as a flat file, something like this:rd := RECORD\\n STRING12 one;\\nEND;\\n\\nd := DATASET ([{'a c ,a c '}],rd);\\n\\nOUTPUT(d,,'~RTTEST::TEST::CSVoutput',OVERWRITE);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-09-22 14:09:18\" },\n\t{ \"post_id\": 6365, \"topic_id\": 1458, \"forum_id\": 10, \"post_subject\": \"Re: Preserving trailing spaces on OUTPUT(CSV...\", \"username\": \"Allan\", \"post_text\": \"in addition the logical files on THOR only has size '4'. So the problem is not with the despray.\\n\\n\\nLogical Name\\tDescription\\tSize\\tRecords\\tModified (UTC/GMT)\\tOwner\\tCluster\\tParts\\nafw::output 4\\t 1\\t2014-09-22 13:16:58\\tallan\\tthordev10_2\\t50\\n
\", \"post_time\": \"2014-09-22 13:42:39\" },\n\t{ \"post_id\": 6364, \"topic_id\": 1458, \"forum_id\": 10, \"post_subject\": \"Re: Preserving trailing spaces on OUTPUT(CSV...\", \"username\": \"Allan\", \"post_text\": \"Hi Richard,\\n\\nYes the despray is wrong.\\n\\n$ cat -vet afw.output\\na c$\\n
\\n\\nYours\\n\\nAllan\", \"post_time\": \"2014-09-22 13:38:35\" },\n\t{ \"post_id\": 6363, \"topic_id\": 1458, \"forum_id\": 10, \"post_subject\": \"Re: Preserving trailing spaces on OUTPUT(CSV...\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nHave you examined the file with a text editor? IOW, are you sure the training spaces are NOT there, or is it possible it's an ECL Watch/ECL IDE display bug?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-09-22 12:58:35\" },\n\t{ \"post_id\": 6362, \"topic_id\": 1458, \"forum_id\": 10, \"post_subject\": \"Preserving trailing spaces on OUTPUT(CSV...\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nIs there any way to preserve trailing spaces on CSV output?\\nThe following code shows the issue, the output is 4 characters in length, not the 6 characters I require:\\n\\nrd := RECORD\\n STRING one;\\nEND;\\n\\nd := DATASET ([{'a c '}],rd);\\n\\nOUTPUT(d,,'~afw::output',CSV(quote(''),terminator('\\\\n'),separator(',')),OVERWRITE);
\\n\\nUsing a STRINGn instead of STRING makes no difference.\", \"post_time\": \"2014-09-22 11:28:57\" },\n\t{ \"post_id\": 6401, \"topic_id\": 1467, \"forum_id\": 10, \"post_subject\": \"Re: RoundUP is not working properly\", \"username\": \"rtaylor\", \"post_text\": \"Chang,\\n\\nIf you run the code I posted previously, you will see that the floating point result of (5.2 - 3.1) / 0.3
is not 7, but actually 7.000000000000001 which correctly rounds up to 8, producing 9 when you add 1 to that. This is an artifact of the way decimal fractions are represented in the binary IEEE floating point format. That's why floating point math on computers can be problematic.\\n\\nHowever, if you use DECIMAL data types for all terms in your calculations, then instead of doing the work on the math co-processor chip you will invoke our Binary Coded Decimal (BCD) math library that will do the math in Base-10 instead of Base-2. IOW, the BCD library will calculate exactly the same way as if you were doing it with paper and pencil. The downside is that, being software instead of a hardware solution, it won't go as fast. But the upside is that DECIMAL data types give you up to 32 digits of precision (you're limited to 15 with floating point), so it can be much more precise.\\n\\nHere's my previous example, re-coded to use DECIMAL types:RetMod(DECIMAL20_18 val) := MODULE\\n\\tEXPORT x0 := val;\\n\\tEXPORT x1 := val-3.1;\\n\\tEXPORT x2 := x1/0.3;\\n\\tEXPORT x3 := ROUNDUP(x2);\\n\\tEXPORT x4 := x3+1;\\nEND;\\n\\nR1 := RetMod((DECIMAL20_18)4.1);\\nR2 := RetMod((DECIMAL20_18)5.2);\\nR3 := RetMod((DECIMAL20_18)10.3);\\n\\nds1 := DATASET([{R1.x0,R1.x1,R1.x2,R1.x3,R1.x4},\\n {R2.x0,R2.x1,R2.x2,R2.x3,R2.x4},\\n {R3.x0,R3.x1,R3.x2,R3.x3,R3.x4}],\\n {DECIMAL20_18 Input_Value,DECIMAL20_18 Step1,DECIMAL20_18 Step2,INTEGER Step3,INTEGER Step4});\\nds1;
Ans now the result look like this:\\n4.1 1 3.333333333333333333 4\\t5\\n5.2 2.1 7 7\\t8\\n10.3 7.2 24 24\\t25\\n
\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-10-02 21:09:35\" },\n\t{ \"post_id\": 6400, \"topic_id\": 1467, \"forum_id\": 10, \"post_subject\": \"Re: RoundUP is not working properly\", \"username\": \"Chang\", \"post_text\": \"Hi Richard,\\n\\nThank you for the reply.\\n abc:=ROUNDUP((5.2-3.1)/0.3)+1;\\noutput(abc);\\n\\nthe output I got for ABC is 9.\\nHowever, I'm expecting it to be 8.\\nfor (5.2-3.1)/0.3=2.1/0.3=7 and the roundup(7)=7 so the result is 7+1=8.\\n\\nI don't know why it return 9.\\n\\nbest,\\nChang\", \"post_time\": \"2014-10-02 20:11:20\" },\n\t{ \"post_id\": 6399, \"topic_id\": 1467, \"forum_id\": 10, \"post_subject\": \"Re: RoundUP is not working properly\", \"username\": \"rtaylor\", \"post_text\": \"Chang,\\n\\nThis is a public forum, so W20141002-153451 could be on any HPCC cluster, anywhere in the world.\\n\\nAssuming you're a Lexis employee or contractor, then you should use the internal support methods we have in place to explore why this particular workunit had problems. Ask your manager how you should pursue the issue.\\n\\nIf you want to provide me with the example input records that I previously asked for to make your first posted example code run, then I will be glad to help.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-10-02 19:50:08\" },\n\t{ \"post_id\": 6398, \"topic_id\": 1467, \"forum_id\": 10, \"post_subject\": \"Re: RoundUP is not working properly\", \"username\": \"Chang\", \"post_text\": \"Hi Rtaylor,\\n\\nThank you for the reply.\\nwould you please check wid W20141002-153451\\nit seems that ECL is not outputting a lot of tables I asked.\", \"post_time\": \"2014-10-02 19:41:43\" },\n\t{ \"post_id\": 6397, \"topic_id\": 1467, \"forum_id\": 10, \"post_subject\": \"Re: RoundUP is not working properly\", \"username\": \"rtaylor\", \"post_text\": \"Chang,\\n\\nThe numbers returned by your simple example are 5, 9, and 26. These are all correct for the calculation you've specified.\\n\\nHere is the key calculation in your example code:SELF.Finalnumber:= ROUNDUP((LEFT.number-3.1)/0.3)+1)
\\nand in this example, I have broken that down into its component operations:RetMod(REAL val) := MODULE\\n\\tEXPORT X0 := val;\\n\\tEXPORT X1 := val-3.1;\\n\\tEXPORT X2 := x1/0.3;\\n\\tEXPORT X3 := ROUNDUP(x2);\\n\\tEXPORT X4 := x3+1;\\nEND;\\n\\nR1 := RetMod(4.1);\\nR2 := RetMod(5.2);\\nR3 := RetMod(10.3);\\n\\nds1 := DATASET([{R1.x0,R1.x1,R1.x2,R1.x3,R1.x4},\\n {R2.x0,R2.x1,R2.x2,R2.x3,R2.x4},\\n {R3.x0,R3.x1,R3.x2,R3.x3,R3.x4}],\\n {REAL Input_Value,REAL Step1,REAL Step2,INTEGER Step3,INTEGER Step4});\\nds1;
The MODULE structure shows exactly how the result is calculated, and enables us to see the result from each component step: \\n
I used an inline DATASET to produce the step-by-step results in a single table so you can see exactly how each value is calculated. The results look like this:\\n4.1 0.9999999999999996 3.333333333333332 4 5\\n5.2 2.1 7.000000000000001 8 9\\n10.3 7.200000000000001 24.0\\t 25 26
There are a couple of interesting items here. \\n\\nThe first is that 4.1-3.1=0.9999999999999996 -- this is an artifact of the IEEE floating point format and the fact that you're limited to 15 significant digits. Binary representation of decimal numbers introduces some imprecision, because there are some decimal values that cannot be represented in the IEEE format. This is shown nicely on this page: http://en.wikipedia.org/wiki/Double-precision_floating-point_format where it has this example:Hex bit pattern = Decimal value \\n3ff0 0000 0000 0000 = 1\\n3ff0 0000 0000 0001 ≈ 1.0000000000000002, the smallest number > 1\\n
That kind of imprecision also accounts for the fact that what appears to be 24.0 gets rounded UP to 25. Notice that the last decimal place in the result of the X1 calculation (7.200000000000001) has a 1 in it. That accounts for the rounding -- the actual value is not 24.0 because some small fractional portion is present, therefore the rounding occurs.\\n\\nAs to the more complex example in your first post, if you could provide a small example of your ILRawScore_DS input dataset, then I would be happy to perform the same type of analysis on that code. Since that code uses DECIMAL data types, the reasons for the anomalies you're seeing could be different. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-10-02 18:47:58\" },\n\t{ \"post_id\": 6396, \"topic_id\": 1467, \"forum_id\": 10, \"post_subject\": \"Re: RoundUP is not working properly\", \"username\": \"Chang\", \"post_text\": \"Another simple and straight forward deomo is\\nmyrec := {REAL number};\\nbtable := DATASET([{4.1},{5.2},{10.3}], myrec);\\n \\nfinalrec:={REAL Finalnumber}; \\nRounding_up:=project(btable,TRANSFORM(finalrec,SELF.Finalnumber:= ROUNDUP((LEFT.number-3.1)/0.3)+1;));\\noutput(Rounding_up);\\n\\nthis will give you the wrong result also.\", \"post_time\": \"2014-10-02 15:30:01\" },\n\t{ \"post_id\": 6395, \"topic_id\": 1467, \"forum_id\": 10, \"post_subject\": \"RoundUP is not working properly\", \"username\": \"Chang\", \"post_text\": \"WID W20141002-101130\\n\\nI have a transform which project ILRawScore_DS to ILIntTemp_DS.\\n\\nILIntTemp_R := RECORD\\n\\tILRawScore_DS;\\n\\tUNSIGNED1 HailT10;\\n\\tUNSIGNED1 HailT100;\\n\\tUNSIGNED1 WindT10;\\n\\tUNSIGNED1 WindT100;\\n\\tUNSIGNED1 WdHlT10;\\n\\tUNSIGNED1 WdHlT100;\\nEND;\\n\\nILIntTemp_R ILIntTemp_T(ILRawScore_DS L):= TRANSFORM\\n\\tSELF.WindT10 := ROUNDUP((L.WDLossExpScore-(DECIMAL8_6)4.608829)/(DECIMAL8_6)0.193555)+1;\\n\\tSELF.WindT100:= ROUNDUP((L.WDLossExpScore-(DECIMAL8_6)4.591028)/(DECIMAL8_6)0.019476)+9;\\t\\n\\tSELF.HailT10 := ROUNDUP((L.HLLossExpScore-(DECIMAL8_6)3.121304)/(DECIMAL8_6)0.378162)+1;\\n\\tSELF.HailT100:= ROUNDUP((L.HLLossExpScore-(DECIMAL8_6)3.065935)/(DECIMAL8_6)0.038200)+9;\\n\\tSELF.WdHlT10 := ROUNDUP((L.WHLossExpScore-(DECIMAL8_6)7.967634)/(DECIMAL8_6)0.524486)+1;\\n\\tSELF.WdHlT100:= ROUNDUP((L.WHLossExpScore-(DECIMAL8_6)7.904754)/(DECIMAL8_6)0.052939)+9;\\n\\tSELF:=L;\\nEND;\\n\\nILIntTemp_DS :=\\tPROJECT(ILRawScore_DS,ILIntTemp_T(LEFT));\\nOUTPUT(ILIntTemp_DS,NAMED('TempScore'));\\nOUTPUT(ILINtTemp_DS(LN_KeyChar in ['78986477','79003338']),NAMED('TempCheck')); \\n\\nHowever, if you look at the result TempCheck, you'll found the two records supposed to have WindT10=2, but they all got 1.\\nI'm wondering why ECL give wrong result at such simple math?\", \"post_time\": \"2014-10-02 15:01:33\" },\n\t{ \"post_id\": 6493, \"topic_id\": 1471, \"forum_id\": 10, \"post_subject\": \"Re: Trouble with MonitorFile\", \"username\": \"JimD\", \"post_text\": \"Yes, this is/was an issue: \\n\\nhttps://track.hpccsystems.com/browse/HPCC-12401\\n\\n...and it seems to have been resolved for a future release.\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2014-10-23 14:26:11\" },\n\t{ \"post_id\": 6492, \"topic_id\": 1471, \"forum_id\": 10, \"post_subject\": \"Re: Trouble with MonitorFile\", \"username\": \"bforeman\", \"post_text\": \"It would probably be a good idea to log this immediately in the Community Issue Tracker.\\n\\nhttps://track.hpccsystems.com/secure/Dashboard.jspa\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-10-22 20:49:09\" },\n\t{ \"post_id\": 6406, \"topic_id\": 1471, \"forum_id\": 10, \"post_subject\": \"Trouble with MonitorFile\", \"username\": \"jgostylo\", \"post_text\": \"We recently did an upgrade to 5.0.0-3 Enterprise and after recompiling and republishing our ECL we are seeing an issue where the MonitorFile tasks are not working. I have compressed this down to a small reproducible.\\n\\n\\nIMPORT * FROM Std;\\n\\nMyEventName := 'MyFileEvent';\\nFileName := '/var/lib/HPCCSystems/mydropzone/analytics_incoming/*';\\nLZ := 'a.correct.ip.address';\\nStd.File.MonitorFile(MyEventName,LZ,FileName);\\nOUTPUT('File Found') : WHEN(EVENT(MyEventName,'*'),COUNT(1));\\n
\\n\\nLooking at the logs, MonitorFile is broadcasting the event "DfuFileMonitor" and not "MyFileEvent". What can I do about this. I have several file monitors running on an instance so I definitely need separate events. Also, as far as I know this is incorrect behavior.\", \"post_time\": \"2014-10-08 14:47:52\" },\n\t{ \"post_id\": 6528, \"topic_id\": 1477, \"forum_id\": 10, \"post_subject\": \"Re: Reg: Word Count for text file\", \"username\": \"swethareddy01\", \"post_text\": \"Hai,\\n\\nThank you Richard.\\n\\nSwetha\", \"post_time\": \"2014-10-29 19:00:12\" },\n\t{ \"post_id\": 6519, \"topic_id\": 1477, \"forum_id\": 10, \"post_subject\": \"Re: Reg: Word Count for text file\", \"username\": \"rtaylor\", \"post_text\": \"Swetha,\\n\\nAs we discuss in the Introduction to ECL class (free and available online at http://learn.lexisnexis.com/hpcc), the IDE defaults to showing you only the first 100 results when you do only a simple OUTPUT to see the result of your query. \\n\\nTo see more result records than that, you can either:
Like this:\\nOUTPUT(wordCountTable); //first 100 recs (unless you use the More button)\\nCHOOSEN(wordCountTable, 105); //first 105 recs\\nOUTPUT(wordCountTable,ALL); //either all recs, or an error if result > 10Mb\\nOUTPUT(wordCountTable,,'MyNewFileNme'); //all recs written to disk
\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-10-28 17:50:02\" },\n\t{ \"post_id\": 6517, \"topic_id\": 1477, \"forum_id\": 10, \"post_subject\": \"Re: Reg: Word Count for text file\", \"username\": \"swethareddy01\", \"post_text\": \"Hai, \\n\\nFor word count code. ecl is displaying only first 100 records as output. But i want to see all the records in the output.\\n\\nHelp me in achieving the same.\\n\\nThank you.\\n\\nSwetha\", \"post_time\": \"2014-10-28 17:14:19\" },\n\t{ \"post_id\": 6516, \"topic_id\": 1477, \"forum_id\": 10, \"post_subject\": \"Re: Reg: Word Count for text file\", \"username\": \"swethareddy01\", \"post_text\": \"Hai Richard,\\n\\nSure.. Thank you so much.\\n\\nSwetha.\", \"post_time\": \"2014-10-28 17:11:23\" },\n\t{ \"post_id\": 6510, \"topic_id\": 1477, \"forum_id\": 10, \"post_subject\": \"Re: Reg: Word Count for text file\", \"username\": \"rtaylor\", \"post_text\": \"Swetha,How to write the same word count code using java in ecl.?\\n\\nPlease help me with the code.
Sorry, but you'll have to ask a Java programmer to write that code for you. Java is not one of the languages I code in.\\n\\nRichard\", \"post_time\": \"2014-10-28 13:24:30\" },\n\t{ \"post_id\": 6508, \"topic_id\": 1477, \"forum_id\": 10, \"post_subject\": \"Re: Reg: Word Count for text file\", \"username\": \"swethareddy01\", \"post_text\": \"Hai,\\n\\nHow to write the same word count code using java in ecl.?\\n\\nPlease help me with the code.\\n\\nThank you.\", \"post_time\": \"2014-10-28 01:18:36\" },\n\t{ \"post_id\": 6499, \"topic_id\": 1477, \"forum_id\": 10, \"post_subject\": \"Re: Reg: Word Count for text file\", \"username\": \"cheapfut15coins\", \"post_text\": \"Distinct food and also a preferred for the children additionally.\\nbuy fifa 15 coins\\ncheap fut 15 coins\", \"post_time\": \"2014-10-25 05:36:17\" },\n\t{ \"post_id\": 6428, \"topic_id\": 1477, \"forum_id\": 10, \"post_subject\": \"Re: Reg: Word Count for text file\", \"username\": \"rtaylor\", \"post_text\": \"swethareddy01,\\n\\nIt sounds like your data probably looks like this:wordsDS := DATASET([\\n {'The demo example for word count to count word'}, \\n {'The example demo for count word to count'}, \\n {'The quick brown fox jumped over the lazy red dog'} \\n\\t\\t\\t\\t\\t\\t],WordLayout);
\\nIf so, then you need to first break out the individual words before the TABLE function can do a crosstab on them to produce your count of the number of discrete instances of each word.\\n\\nHere's how I would do it:IMPORT STD;\\n\\nWordLayout := RECORD\\n STRING word;\\nEND;\\n\\nwordsDS := DATASET([\\n {'The demo example for word count to count word'}, \\n {'The example demo for count word to count'}, \\n {'The quick brown fox jumped over the lazy red dog'} \\n\\t\\t\\t\\t\\t\\t],WordLayout);\\n\\nWordLayout XF(WordLayout L, INTEGER C, INTEGER Cnt) := TRANSFORM\\n WordStart := IF(C=1,1,STD.str.Find(L.word,' ',C-1)+1); \\n WordEnd := IF(C=Cnt,LENGTH(L.word),STD.str.Find(L.word,' ',C)-1); \\n SELF.word := L.word[WordStart .. WordEnd];\\nEND;\\t\\t\\t\\t\\t\\t\\nEachWord := NORMALIZE(wordsDS,\\n STD.str.WordCount(LEFT.word),\\n XF(LEFT,COUNTER,STD.str.WordCount(LEFT.word)));\\n\\nWordCountLayout := RECORD\\n EachWord.word;\\n wordCount := COUNT(GROUP);\\nEND;\\n\\nwordCountTable := TABLE(EachWord, WordCountLayout, word);\\n\\nOUTPUT(wordCountTable);
This produces the result you want. \\n\\nNotice that I'm using NORMALIZE to extract each word out into its own record in the EachWord record set. Also, I'm passing an additional parameter tot eh TRANSFORM function so it can know the maximum number of words in each record. \\n\\nOnce all the words have been extracted into separate records in EachWord, then the TABLE function can operate on EachWord just the way you were previously trying to do it.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-10-13 19:47:22\" },\n\t{ \"post_id\": 6427, \"topic_id\": 1477, \"forum_id\": 10, \"post_subject\": \"Re: Reg: Word Count for text file\", \"username\": \"DSC\", \"post_text\": \"It looks like your datafile contains lines of text with each line containing comma-delimited words. If that is right, then what you need to do is read the text line-by-line (instead of by word) and parse the words out before trying to count them. Something like this should work:\\n\\nIMPORT Std;\\n\\nLineLayout := RECORD\\n\\tSTRING line;\\nEND;\\n\\nlinesDS := DATASET\\n\\t(\\n\\t\\t'~thor::word_list_csv',\\n\\t\\tLineLayout, \\n\\t\\tCSV(heading(1),separator(''),quote(''))\\n\\t);\\n\\nWordLayout := RECORD\\n\\tSTRING word;\\nEND;\\n\\nLineWordsLayout := RECORD\\n\\tDATASET(WordLayout)\\twords;\\nEND;\\n\\nwordsTemp := PROJECT\\n\\t(\\n\\t\\tlinesDS,\\n\\t\\tTRANSFORM\\n\\t\\t\\t(\\n\\t\\t\\t\\tLineWordsLayout,\\n\\t\\t\\t\\tSELF.words := DATASET(Std.Str.SplitWords(LEFT.line, ','), WordLayout)\\n\\t\\t\\t)\\n\\t);\\n\\nwordsDS := wordsTemp.words;\\n\\nWordCountLayout := RECORD\\n\\twordsDS.word;\\n\\twordCount := COUNT(GROUP);\\nEND;\\n\\nwordCountTable := TABLE(wordsDS, WordCountLayout, word);\\n\\nOUTPUT(wordCountTable);
\\nNotice that I changed your SEPARATOR argument in the DATASET command. By making that an empty string, the entire line of text will be parsed into the single STRING field in your record.\\n\\nHope this helps.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2014-10-13 13:14:08\" },\n\t{ \"post_id\": 6421, \"topic_id\": 1477, \"forum_id\": 10, \"post_subject\": \"Reg: Word Count for text file\", \"username\": \"swethareddy01\", \"post_text\": \"Hai..\\n\\nI am trying to count the frequency of each word of a text file i sprayed. But I am getting word count for only first word of each row but i want my output to be the word count for entire row of whole document.\\n\\nMy code is as follows:\\nWordLayout := RECORD\\n STRING word;\\nend;\\n\\nwordsDS := DATASET('~thor::word_list_csv', WordLayout, \\nCSV(heading(1),separator(','),quote('')));\\n\\nWordCountLayout := record\\n wordsDS.word;\\n wordCount := COUNT(GROUP);\\nend;\\n\\nwordCountTable := TABLE(wordsDS, WordCountLayout, word);\\n\\nOUTPUT(wordCountTable);\\n\\nThe output is:\\nword count\\nThe demo example for word count to count word 1\\n\\nBut i want my output as follows:\\nword\\nThe 1\\ndemo 1\\nexample 1\\nfor 1\\nword 2\\ncount 2\\nto 1\\n\\nPlease help me in this issue.\\n\\nThanks in advance.\", \"post_time\": \"2014-10-09 23:40:33\" },\n\t{ \"post_id\": 6434, \"topic_id\": 1479, \"forum_id\": 10, \"post_subject\": \"Re: dataset has header from CSV\", \"username\": \"Chang\", \"post_text\": \"hi srbhkmr,\\n\\nThank you for the prompt reply. I tried heading(1) but it was not giving me the expected result. \\nI guess I may just modify my sas code and output the csv file without header.\\n\\nAgain, thank you for the quick reply!
\", \"post_time\": \"2014-10-14 12:57:42\" },\n\t{ \"post_id\": 6433, \"topic_id\": 1479, \"forum_id\": 10, \"post_subject\": \"Re: dataset has header from CSV\", \"username\": \"srbhkmr\", \"post_text\": \"I think what you require there is:\\n
...',csv(heading(1))
\\ninstead of:\\n'...,csv(heading(single))
\\n'single' is used while writing a csv and making sure we get only a single header written from multiple processes.\\n\\nThanks,\", \"post_time\": \"2014-10-14 12:54:29\" },\n\t{ \"post_id\": 6432, \"topic_id\": 1479, \"forum_id\": 10, \"post_subject\": \"dataset has header from CSV\", \"username\": \"Chang\", \"post_text\": \"Hi,\\n\\nI came across a problem, I output a csv file from SAS and sprayed it onto hpcc.\\nI defined a:= dataset('<file logical name>',csv(heading(single)));\\nhowever, it showed that the header of the csv file is still in a.\\n\\nThis is not the first time I came across this problem. Anyone has any idea about how to solve this issue?\\n\\nThanks,\\nChang\", \"post_time\": \"2014-10-14 12:47:24\" },\n\t{ \"post_id\": 6471, \"topic_id\": 1481, \"forum_id\": 10, \"post_subject\": \"Re: Levenshtein Distance\", \"username\": \"gsmith\", \"post_text\": \"Also there is a bunch of Edit Distance implementations in the StringMatch Bundle:\\nhttps://github.com/hpcc-systems/ecl-bun ... tringMatch\", \"post_time\": \"2014-10-16 18:06:25\" },\n\t{ \"post_id\": 6463, \"topic_id\": 1481, \"forum_id\": 10, \"post_subject\": \"Re: Levenshtein Distance\", \"username\": \"rtaylor\", \"post_text\": \"Biswanath,\\n\\nFrom the String standard library function docs for STD.Str.EditDistance()The EditDistance function returns a standard Levenshtein distance algorithm score for the edit distance between string1 and string2. This score i\\\\reflects the minimum number of operations needed to transform string1 into string2.
HTH,\\n\\nRichard\", \"post_time\": \"2014-10-16 15:46:22\" },\n\t{ \"post_id\": 6462, \"topic_id\": 1481, \"forum_id\": 10, \"post_subject\": \"Levenshtein Distance\", \"username\": \"biswanath_c\", \"post_text\": \"Is there an ECL function available to calculate the Levenshtein distance between 2 strings?\\n\\nThanks\\nBiswanath\", \"post_time\": \"2014-10-16 15:31:31\" },\n\t{ \"post_id\": 6748, \"topic_id\": 1482, \"forum_id\": 10, \"post_subject\": \"Re: How to find time taken for function to execute?\", \"username\": \"chanbchen\", \"post_text\": \"Hi,\\n\\nI have a similar requirement too. Is using independent the best way to do this?\\n\\nI need to measure time taken by every step in my algorithm. If any step takes more than 2 seconds, i need to just skip the remaining steps; that's my requirement.\\n\\nThanks\", \"post_time\": \"2015-01-02 17:08:41\" },\n\t{ \"post_id\": 6663, \"topic_id\": 1482, \"forum_id\": 10, \"post_subject\": \"Re: How to find time taken for function to execute?\", \"username\": \"nawaz2525\", \"post_text\": \"Srini,\\n\\nI don't know why it should work on Roxie and not on Thor, so please report it in JIRA.\\n\\n\\n\\n\\n___________________\\nhttp://pass4-sure.info\", \"post_time\": \"2014-12-04 06:39:22\" },\n\t{ \"post_id\": 6476, \"topic_id\": 1482, \"forum_id\": 10, \"post_subject\": \"Re: How to find time taken for function to execute?\", \"username\": \"chennapooja\", \"post_text\": \"Hello,\\n\\n Actually I have a requirement to check the time taken for processing the records which I give as an input through spray file using algorithms like decision trees, kmeans etc...That means, total time taken to process the whole data has to be printed. when I use msTick function before and after my function call, its just giving 2 or 3 seconds, however large my dataset is....so what is suggested, can I use msTick function at all for this purpose?\\n I need to do it programmatically instead of checking workunit timings.\\n\\nRegards,\\nPooja.\", \"post_time\": \"2014-10-20 04:33:23\" },\n\t{ \"post_id\": 6474, \"topic_id\": 1482, \"forum_id\": 10, \"post_subject\": \"Re: How to find time taken for function to execute?\", \"username\": \"chennapooja\", \"post_text\": \"Thanks for the responses, I understood where I am wrong.\", \"post_time\": \"2014-10-16 19:04:30\" },\n\t{ \"post_id\": 6473, \"topic_id\": 1482, \"forum_id\": 10, \"post_subject\": \"Re: How to find time taken for function to execute?\", \"username\": \"chennapooja\", \"post_text\": \"Thanks for the responses, I understood where I am wrong.\", \"post_time\": \"2014-10-16 19:04:04\" },\n\t{ \"post_id\": 6469, \"topic_id\": 1482, \"forum_id\": 10, \"post_subject\": \"Re: How to find time taken for function to execute?\", \"username\": \"rtaylor\", \"post_text\": \"chennapooja,\\n\\nYou are making the mistake of thinking procedurally -- IOW, your question presumes that your code is going to execute in the order that you wrote it.\\n\\nECL is a declarative, non-procedural language. When you write ECL code you are NOT writing "executable code" but just defining the expressions whose result you want to see. You write code that defines WHAT you want then the compiler decides HOW to get you that result. That means that the order you write your code is not necessarily the order in which the compiler chooses to execute your job.\\n Later I am trying to find the difference between two variables, but I am getting the value as zero.
ECL has no "variables" -- only definitions. That's why your output of t3 is always 0 -- because the t1 and t2 definitions are re-calculated to produce the t3 result. Since both values end up the same that way, the result is always zero.\\n\\nThe timing for single function call can be tested in ECL by running a query in which only that one function is called, then just look at the workunit timings in ECL Watch. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-10-16 17:49:30\" },\n\t{ \"post_id\": 6467, \"topic_id\": 1482, \"forum_id\": 10, \"post_subject\": \"Re: How to find time taken for function to execute?\", \"username\": \"DSC\", \"post_text\": \"What you're seeing is a normal optimization of the ECL compiler. t1 in your example isn't actually evaluated until it is needed (this is why the ECL instructors stress that something like 'foo := '123' means "is defined as" not "is assigned a value of."\\n\\nAnyway, t1 isn't actually used until you try to compute the difference between t2 (which is also a definition) and t1. Because both are evaluated at the same time, the difference is zero.\\n\\nA workaround for this particular case is to force t1 to be evaluated in a different context. IIRC, you can use something like this to force it to be evaluated before everything else:\\n\\n unsigned4 t1:=STD.System.Debug.msTick() : INDEPENDENT;
\\nYou may have to use GLOBAL instead of INDEPENDENT. Also, I believe that you could use the STORED() workflow service instead.\\n\\nHope this helps.\\n\\nDan\", \"post_time\": \"2014-10-16 16:49:03\" },\n\t{ \"post_id\": 6466, \"topic_id\": 1482, \"forum_id\": 10, \"post_subject\": \"How to find time taken for function to execute?\", \"username\": \"chennapooja\", \"post_text\": \"Hello,\\n\\nI am facing an issue with standard function STD.System.Debug.msTick which I am using before and after my function call and storing the result in separate variables. Later I am trying to find the difference between two variables, but I am getting the value as zero.\\n\\nimport std;\\n\\nunsigned4 t1:=STD.System.Debug.msTick();\\n\\nOutput(t1);\\nOutput('HelloWorld');\\nOutput('HelloWorld');\\nOutput('HelloWorld');\\nunsigned4 t2:=STD.System.Debug.msTick();\\nOutput(t2);\\ninteger t3:=SELF.t2 - SELF.t1;\\nOutput(t3);\\n\\nAbove is a sample program but my actual requirement is to find the time taken for one function call to give its output.\\n\\nOutput for t3 in above program is 0 where as I am getting values for t1 and t2.\\n\\nPlease provide some inputs, thanks in advance.\", \"post_time\": \"2014-10-16 16:14:37\" },\n\t{ \"post_id\": 6588, \"topic_id\": 1484, \"forum_id\": 10, \"post_subject\": \"Re: Reg: Word Count\", \"username\": \"rtaylor\", \"post_text\": \"Swetha,\\n\\nThe workunit shows timings for each graph and sub-graph in the job.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-11-10 16:10:38\" },\n\t{ \"post_id\": 6587, \"topic_id\": 1484, \"forum_id\": 10, \"post_subject\": \"Re: Reg: Word Count\", \"username\": \"bforeman\", \"post_text\": \"Hi Swetha,\\n\\nMy advice would be to run several samples and then compare the timings on each one. There are too many factors to give you an exact time per word. Factors include cluster size, bandwidth, size of dataset to process and more.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-11-10 15:49:38\" },\n\t{ \"post_id\": 6584, \"topic_id\": 1484, \"forum_id\": 10, \"post_subject\": \"Re: Reg: Word Count\", \"username\": \"swethareddy01\", \"post_text\": \"Hai,\\n\\nHow to determine the time taken to count each word or entire file in word count program?\\n\\nThank you.\\n\\nSwetha\", \"post_time\": \"2014-11-09 03:15:29\" },\n\t{ \"post_id\": 6498, \"topic_id\": 1484, \"forum_id\": 10, \"post_subject\": \"Re: Reg: Word Count\", \"username\": \"cheapfut15coins\", \"post_text\": \"There exists only a minimal make pertaining to.\", \"post_time\": \"2014-10-25 05:35:40\" },\n\t{ \"post_id\": 6488, \"topic_id\": 1484, \"forum_id\": 10, \"post_subject\": \"Re: Reg: Word Count\", \"username\": \"rtaylor\", \"post_text\": \"One minor change in the TRANSFORM takes care of that:IMPORT STD;\\n\\nWordLayout := RECORD\\nSTRING word;\\nend;\\n\\nwordsDS1 := DATASET([\\n {'The demo example. for word count to count word'}, \\n {'The example demo for. count word to count'}, \\n {'The quick brown fox jumped. over the lazy red dog'} \\n\\t\\t\\t\\t\\t\\t],WordLayout);\\nwordsDS2 := DATASET([\\n {'The real. example for word count to count word'}, \\n {'The second example. demo for count word to count'}, \\n {'The quick. brown fox jumped over the lazy red dog.'} \\n\\t\\t\\t\\t\\t\\t],WordLayout);\\n\\nWordLayout XF(WordLayout L, INTEGER C, INTEGER Cnt) := TRANSFORM\\n WordStart := IF(C=1,1,STD.str.Find(L.word,' ',C-1)+1); \\n WordEnd := IF(C=Cnt,LENGTH(L.word),STD.str.Find(L.word,' ',C)-1); \\n TheWord := L.word[WordStart .. WordEnd];\\n SELF.word := IF(TheWord[LENGTH(TheWord)]='.',TheWord[..LENGTH(TheWord)-1],TheWord);\\nEND;\\t\\t\\t\\nAllFiles := wordsDS1+wordsDS2;\\t\\t\\t\\nEachWord := NORMALIZE(AllFiles,\\n STD.str.WordCount(LEFT.word),\\n XF(LEFT,COUNTER,STD.str.WordCount(LEFT.word)));\\n\\nWordCountLayout := record\\n EachWord.word;\\n wordCount := COUNT(GROUP);\\nend;\\n\\nwordCountTable := TABLE(EachWord, WordCountLayout, word);\\n\\nOUTPUT(wordCountTable);
\", \"post_time\": \"2014-10-20 19:57:10\" },\n\t{ \"post_id\": 6487, \"topic_id\": 1484, \"forum_id\": 10, \"post_subject\": \"Re: Reg: Word Count\", \"username\": \"swethareddy01\", \"post_text\": \"Hai Richard,\\n\\nThank you so much for helping me in getting the result. But, this code is not getting the output correctly if the text file contains ',' in it. It is not reading the word next to ','. Also in dataset syntax in place of record of text i am giving name of the thor file.\\n\\nOnce again thank you.\\n\\nSwetha\", \"post_time\": \"2014-10-20 19:45:24\" },\n\t{ \"post_id\": 6486, \"topic_id\": 1484, \"forum_id\": 10, \"post_subject\": \"Re: Reg: Word Count\", \"username\": \"rtaylor\", \"post_text\": \"Swetha,\\n\\nSince all the text files have the same structure, you only need to concatenate them all together into a single recordset to count words using exactly the same code I previously posted, like this:IMPORT STD;\\n\\nWordLayout := RECORD\\nSTRING word;\\nend;\\n\\nwordsDS1 := DATASET([\\n {'The demo example for word count to count word'}, \\n {'The example demo for count word to count'}, \\n {'The quick brown fox jumped over the lazy red dog'} \\n\\t\\t\\t\\t\\t\\t],WordLayout);\\nwordsDS2 := DATASET([\\n {'The real example for word count to count word'}, \\n {'The second example demo for count word to count'}, \\n {'The quick brown fox jumped over the lazy red dog'} \\n\\t\\t\\t\\t\\t\\t],WordLayout);\\n\\nWordLayout XF(WordLayout L, INTEGER C, INTEGER Cnt) := TRANSFORM\\n WordStart := IF(C=1,1,STD.str.Find(L.word,' ',C-1)+1); \\n WordEnd := IF(C=Cnt,LENGTH(L.word),STD.str.Find(L.word,' ',C)-1); \\n SELF.word := L.word[WordStart .. WordEnd];\\nEND;\\t\\t\\t\\nAllFiles := wordsDS1+wordsDS2;\\t\\t\\t\\nEachWord := NORMALIZE(AllFiles,\\n STD.str.WordCount(LEFT.word),\\n XF(LEFT,COUNTER,STD.str.WordCount(LEFT.word)));\\n\\nWordCountLayout := record\\n EachWord.word;\\n wordCount := COUNT(GROUP);\\nend;\\n\\nwordCountTable := TABLE(EachWord, WordCountLayout, word);\\n\\nOUTPUT(wordCountTable);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-10-20 18:04:25\" },\n\t{ \"post_id\": 6485, \"topic_id\": 1484, \"forum_id\": 10, \"post_subject\": \"Re: Reg: Word Count\", \"username\": \"swethareddy01\", \"post_text\": \"Hai Richard,\\n\\nDo we need to write same word count code for each text file we are using or is there any other way to do it.? like we can store all text files in a single file, zip the file, spray it, and use in the code (tried, not worked) .\\n\\nOr can we use any loop for it?\\n\\nHere is my sample code\\n\\n
WordLayout := RECORD\\n STRING word;\\t\\nend;\\nWordLayout1 := RECORD \\n\\tSTRING word1;\\nend;\\n\\nwordsDS := DATASET('~thor::1.txt', WordLayout, \\nCSV(heading(0),terminator(' '),quote('')));\\nwordsDS1 := DATASET('~thor::2.txt', WordLayout1, \\nCSV(heading(0),terminator(' '),quote('')));\\n\\nWordCountLayout := record\\n wordsDS.word;\\t\\n wordCount := COUNT(GROUP);\\nend;\\n\\nWordCountLayout1 := record\\n wordsDS1.word1;\\t\\n wordCount := COUNT(GROUP);\\nend;\\n\\nwordCountTable := TABLE(wordsDS, WordCountLayout, word);\\n\\nwordCountTable1 := TABLE(wordsDS1, WordCountLayout1, word1);\\n\\nOUTPUT(wordCountTable);\\n\\nOUTPUT(wordCountTable1);
\\n\\nThank You.\\n\\nSwetha\", \"post_time\": \"2014-10-20 16:37:38\" },\n\t{ \"post_id\": 6484, \"topic_id\": 1484, \"forum_id\": 10, \"post_subject\": \"Re: Reg: Word Count\", \"username\": \"rtaylor\", \"post_text\": \"Swetha,\\n\\nIf the text files are stored in a single file, then I have to assume each file is a separate record in that single file. Therefore, the same code applies.\\n\\nHowever, if that is not the case, please provide a small sample of the type of data you're working with.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-10-20 13:48:27\" },\n\t{ \"post_id\": 6483, \"topic_id\": 1484, \"forum_id\": 10, \"post_subject\": \"Re: Reg: Word Count\", \"username\": \"swethareddy01\", \"post_text\": \"Hai Richard,\\n\\nYou are right, but the link you are pointing is for a single text file. Now my question is for a group of text files stored in a single file.\\n\\nThank you for the response.\\n\\nSwetha\", \"post_time\": \"2014-10-20 13:40:37\" },\n\t{ \"post_id\": 6481, \"topic_id\": 1484, \"forum_id\": 10, \"post_subject\": \"Re: Reg: Word Count\", \"username\": \"rtaylor\", \"post_text\": \"This question was answered in this thread: http://hpccsystems.com/bb/viewtopic.php?f=10&t=1477&sid=9c2a396296e37f3502a36183b9978c28\", \"post_time\": \"2014-10-20 13:03:44\" },\n\t{ \"post_id\": 6478, \"topic_id\": 1484, \"forum_id\": 10, \"post_subject\": \"Reg: Word Count\", \"username\": \"swethareddy01\", \"post_text\": \"Hai all\\n\\nIs it possible to count each word in a file system which consist of more than one text files (like hadoop) simultaneously?\\n\\nPlease help me with the code (if it is possible).\\n\\nSwetha\", \"post_time\": \"2014-10-20 12:57:03\" },\n\t{ \"post_id\": 6480, \"topic_id\": 1485, \"forum_id\": 10, \"post_subject\": \"Re: Reg: Word Count\", \"username\": \"rtaylor\", \"post_text\": \"This question was answered in this thread: http://hpccsystems.com/bb/viewtopic.php?f=10&t=1477&sid=9c2a396296e37f3502a36183b9978c28\", \"post_time\": \"2014-10-20 13:03:19\" },\n\t{ \"post_id\": 6479, \"topic_id\": 1485, \"forum_id\": 10, \"post_subject\": \"Reg: Word Count\", \"username\": \"swethareddy01\", \"post_text\": \"Hai all\\n\\nIs it possible to count each word in a file system which consist of more than one text files (like hadoop) simultaneously?\\n\\nPlease help me with the code (if it is possible).\\n\\nSwetha\", \"post_time\": \"2014-10-20 12:57:33\" },\n\t{ \"post_id\": 6553, \"topic_id\": 1488, \"forum_id\": 10, \"post_subject\": \"Re: how to display milliseconds\", \"username\": \"bforeman\", \"post_text\": \"Here is another variation. (thanks Gavin!)\\n\\n/* A timeval has two components, both ints. One (called tv_sec) is exactly the value that would be returned by time, the time in seconds since 1/1/1970.\\nThe other (called tv_usec) is the number of microseconds into that\\nsecond. Don't be fooled: although the units are microseconds, the value\\nis nothing like that accurate.\\n*/\\n\\nGetTimeDateMS_Ex() := function\\n\\nUNSIGNED getTimeDatems_c() := BEGINC++\\n #include <sys/time.h> \\n #body \\n struct timeval tv;\\n gettimeofday(&tv,NULL);\\n return tv.tv_sec*(uint64_t)1000000+tv.tv_usec;\\n ENDC++;\\n return getTimeDatems_c();\\n end;\\n\\n\\n val := GetTimeDateMS_Ex();\\n val;
\", \"post_time\": \"2014-11-03 14:58:52\" },\n\t{ \"post_id\": 6522, \"topic_id\": 1488, \"forum_id\": 10, \"post_subject\": \"Re: how to display milliseconds\", \"username\": \"DSC\", \"post_text\": \"Does this do what you want?\\n\\nREAL8 CurrentUTCTimeInSecondsWithPrecision() := BEGINC++\\n #option pure\\n #option action\\n #include <time.h>\\n #body\\n\\n struct timeval tv;\\n double result = 0.0;\\n\\n if (gettimeofday(&tv,NULL) == 0)\\n {\\n result = tv.tv_sec + (tv.tv_usec / 1000000.0); \\n }\\n\\n return result;\\nENDC++;\\n\\nCurrentUTCTimeInSecondsWithPrecision();
\\n\\nDan\", \"post_time\": \"2014-10-28 20:38:32\" },\n\t{ \"post_id\": 6518, \"topic_id\": 1488, \"forum_id\": 10, \"post_subject\": \"Re: how to display milliseconds\", \"username\": \"bforeman\", \"post_text\": \"It gives you elapsed time, which is much more useful than just the current time. \\n\\nLook at the example in the msTick documentation, which shows the elapsed time from record to record.\\n\\nLet me check with development to see if there are other alternatives.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-10-28 17:35:43\" },\n\t{ \"post_id\": 6515, \"topic_id\": 1488, \"forum_id\": 10, \"post_subject\": \"Re: how to display milliseconds\", \"username\": \"chennapooja\", \"post_text\": \"Hello,\\n\\n But does it give current time in milliseconds?\", \"post_time\": \"2014-10-28 16:57:43\" },\n\t{ \"post_id\": 6514, \"topic_id\": 1488, \"forum_id\": 10, \"post_subject\": \"Re: how to display milliseconds\", \"username\": \"bforeman\", \"post_text\": \"Again, look at the debug.mstick() function.\", \"post_time\": \"2014-10-28 16:32:55\" },\n\t{ \"post_id\": 6513, \"topic_id\": 1488, \"forum_id\": 10, \"post_subject\": \"Re: how to display milliseconds\", \"username\": \"chennapooja\", \"post_text\": \"Hello,\\n\\n Thanks for the replies. But I need to display milliseconds. The format given is stopped at seconds.\", \"post_time\": \"2014-10-28 16:27:10\" },\n\t{ \"post_id\": 6509, \"topic_id\": 1488, \"forum_id\": 10, \"post_subject\": \"Re: how to display milliseconds\", \"username\": \"bforeman\", \"post_text\": \"Also, see:\\n\\nhttp://hpccsystems.com/bb/viewtopic.php?f=10&t=61&hilit=milliseconds&sid=a90dfd61d92ccbca9e905e23850d055d\\n\\nBob\", \"post_time\": \"2014-10-28 11:58:51\" },\n\t{ \"post_id\": 6504, \"topic_id\": 1488, \"forum_id\": 10, \"post_subject\": \"Re: how to display milliseconds\", \"username\": \"bforeman\", \"post_text\": \"Here is what I use to display date and time. It goes down to the second.\\n\\nGetTimeDate_Ex() := function\\n// Function to get time in HHMMSS format\\n// Courtesy : Development team\\nstring17 getTimeDate_c() := BEGINC++\\n#ifdef _WIN32\\n#include <time.h>\\n#endif\\t\\n#body\\n\\t// Declarations\\n\\tstruct tm localt; // localtime in 'tm' structure\\n\\ttime_t timeinsecs; // variable to store time in secs\\n\\tchar temp[18];\\n\\t\\n\\t// Get time in sec since Epoch\\n\\ttime(&timeinsecs); \\n\\t// Convert to local time\\n#ifdef _WIN32\\n\\tlocaltime_s(&localt,&timeinsecs);\\n\\tstrftime(temp, sizeof(temp), "%Y-%m-%d%H%M%S%w", &localt); // Formats the localtime to YYYY-MM-DDHHMMSSW where W is the weekday\\n\\tif (temp[16]=='0')\\t\\t// convert %w to %u\\n\\t\\ttemp[16]='7';\\n#else\\n\\tlocaltime_r(&timeinsecs,&localt);\\n\\tstrftime(temp, sizeof(temp), "%F%H%M%S%u", &localt); // Formats the localtime to YYYY-MM-DDHHMMSSW where W is the weekday\\n#endif\\n\\tmemcpy(__result, &temp, 17);\\nENDC++;\\nreturn getTimeDate_c();\\nend;\\n\\n\\nval := GetTimeDate_Ex();\\nval;
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-10-27 12:22:00\" },\n\t{ \"post_id\": 6497, \"topic_id\": 1488, \"forum_id\": 10, \"post_subject\": \"how to display milliseconds\", \"username\": \"chennapooja\", \"post_text\": \"Hello,\\n\\nDoes anyone tell me how can I display milli seconds also in current time using ECL?\\n\\nThanks in advance.\", \"post_time\": \"2014-10-24 22:10:48\" },\n\t{ \"post_id\": 6520, \"topic_id\": 1490, \"forum_id\": 10, \"post_subject\": \"Re: Recursive Dependency\", \"username\": \"rtaylor\", \"post_text\": \"Can you post the code that produces this error, please?\", \"post_time\": \"2014-10-28 18:02:53\" },\n\t{ \"post_id\": 6511, \"topic_id\": 1490, \"forum_id\": 10, \"post_subject\": \"Recursive Dependency\", \"username\": \"biswanath_c\", \"post_text\": \"I have a module with a few functions. When i compile that module, i get an error 'Definition of <module> contains a recursive dependency'. \\n\\nBut i checked and don't see any recursive function calls.\\n\\nIs there any other reason which might cause this error?\", \"post_time\": \"2014-10-28 14:56:53\" },\n\t{ \"post_id\": 6554, \"topic_id\": 1498, \"forum_id\": 10, \"post_subject\": \"Re: Full Text Index\", \"username\": \"rtaylor\", \"post_text\": \"biswanath_c,\\n\\nThis is a topic that could be expounded upon for days, so let me first point you to the general answer: an "inverted word index" is what you'll need to implement in your HPCC environment. There are several variations that you can use, depending in your exact needs. The theory and concepts of the subject are discussed here: http://en.wikipedia.org/wiki/Inverted_index\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-11-03 15:08:52\" },\n\t{ \"post_id\": 6549, \"topic_id\": 1498, \"forum_id\": 10, \"post_subject\": \"Full Text Index\", \"username\": \"biswanath_c\", \"post_text\": \"I have a MySQL code implementing a full text index that looks like this:\\n\\n\\nWHERE MATCH (field1, field2) AGAINST (\\n CONCAT('+(', input1, ')', input2\\n\\n\\nI am looking at moving this to HPCC. I was wondering if there's any special construct that I should be aware of to implement full text index in ECL.\\n\\nSo, say if input1 contains 2 words - word1 and word2, i need to pick all records with field1 containing word1 and all also records containing word2 as well.\\n\\nWhat's the best way to implement this in ECL?\\n\\nThanks\", \"post_time\": \"2014-11-03 11:40:12\" },\n\t{ \"post_id\": 6555, \"topic_id\": 1499, \"forum_id\": 10, \"post_subject\": \"Re: keyed\", \"username\": \"rtaylor\", \"post_text\": \"biswanath_c,\\n\\nShort answer: no.\\n\\nKEYED is only needed in the index filter when you are not providing a value for the leading element(s) of the index (in this case, kf1). In that case, you would also need WILD, like this: i(keyed(kf2 = 'sample'), WILD(kf1);
which will get all results where kf2='sample' no matter what values are in kf1.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-11-03 15:14:29\" },\n\t{ \"post_id\": 6551, \"topic_id\": 1499, \"forum_id\": 10, \"post_subject\": \"keyed\", \"username\": \"biswanath_c\", \"post_text\": \"Say I have an index i with 2 key fields kf1 & kf2.\\n\\nIs there any difference between these 2 filters?:\\n\\n1. i(keyed(kf1='sample' and kf2 = 'sample');\\n2. i(kf1='sample' and kf2 = 'sample');\\n\\nThanks\", \"post_time\": \"2014-11-03 14:14:00\" },\n\t{ \"post_id\": 6576, \"topic_id\": 1505, \"forum_id\": 10, \"post_subject\": \"Re: Request for Defining an Image Dataset\", \"username\": \"bforeman\", \"post_text\": \"Hi Tarik,\\n\\nI believe that what you will need to do is extract the images first from the compressed file on to your landing zone, and then use the BLOB spray to move all of the images into a new file on the HPCC cluster, then define that file as described in the Programmer's Guide section "Working With BLOBs".\\n\\nI will try to find other examples for you, but it is just a busy week as I am presenting training.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-11-07 13:42:31\" },\n\t{ \"post_id\": 6568, \"topic_id\": 1505, \"forum_id\": 10, \"post_subject\": \"Request for Defining an Image Dataset\", \"username\": \"Tarik Alafif\", \"post_text\": \"I'm an ECL new programmer using your HPCC system. I'm trying learn ECL programming language. I uploaded a dataset of images files in a zipped folder (images.tar.gz) using the landing zone in ECL Watch. I have an issue of how to define this dataset and read images and image file names using ECL prog. language in ECL Watch Playground? The documentation gives only one example of defining unstructured datasets. Please help me with some examples. Thanks.\\nimageRecord := RECORD\\nSTRING filename;\\nDATA image;\\n//first 4 bytes contain the length of the image data\\n//UNSIGNED8 RecPos{virtual(fileposition)};\\nEND;\\nimageData := DATASET('images.tar.gz',imageRecord,FLAT);\\n//imageKey := INDEX(imageData,{filename,fpos},'LE::imageKey');\\n//BUILDINDEX(imageKey);\\n\\noutput (filename);
\", \"post_time\": \"2014-11-05 22:36:12\" },\n\t{ \"post_id\": 6591, \"topic_id\": 1506, \"forum_id\": 10, \"post_subject\": \"Re: Accept a dataset as input in Roxie Query\", \"username\": \"omnibuzz\", \"post_text\": \"Thank you, Richard. JIRA issue created. Link provided below for reference.\\nhttps://track.hpccsystems.com/browse/HPCC-12568\\n\\nSrini\", \"post_time\": \"2014-11-11 20:01:41\" },\n\t{ \"post_id\": 6581, \"topic_id\": 1506, \"forum_id\": 10, \"post_subject\": \"Re: Accept a dataset as input in Roxie Query\", \"username\": \"rtaylor\", \"post_text\": \"Srini,\\n\\nI don't know why it should work on Roxie and not on Thor, so please report it in JIRA.\\n\\nRichard\", \"post_time\": \"2014-11-07 19:03:52\" },\n\t{ \"post_id\": 6580, \"topic_id\": 1506, \"forum_id\": 10, \"post_subject\": \"Re: Accept a dataset as input in Roxie Query\", \"username\": \"omnibuzz\", \"post_text\": \"Richard - I see that the same code doesn't work when I publish it in Thor. Is it by design? Or am I doing something wrong?\\n-Srini\", \"post_time\": \"2014-11-07 18:41:36\" },\n\t{ \"post_id\": 6573, \"topic_id\": 1506, \"forum_id\": 10, \"post_subject\": \"Re: Accept a dataset as input in Roxie Query\", \"username\": \"omnibuzz\", \"post_text\": \"That worked fine. I think I tried all variations except that one \\nThanks, Richard. \\n-Srini\", \"post_time\": \"2014-11-06 17:05:05\" },\n\t{ \"post_id\": 6570, \"topic_id\": 1506, \"forum_id\": 10, \"post_subject\": \"Re: Accept a dataset as input in Roxie Query\", \"username\": \"rtaylor\", \"post_text\": \"Srini,\\n\\nTry this:
EXPORT DataSetInputForRoxieQuery := FUNCTION\\nChildRec := RECORD\\n STRING Col1;\\n STRING Col2;\\n STRING Col3;\\nEND;\\n\\nParentRec := RECORD\\n STRING PCol1;\\n STRING PCol2;\\n STRING PCol3;\\n DATASET(ChildRec) ChildDS;\\nEND;\\n\\ndsInput := DATASET([],ParentRec) :STORED('dsInput')\\n.\\n.\\n.\\nEND\\n
\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-11-06 13:37:43\" },\n\t{ \"post_id\": 6569, \"topic_id\": 1506, \"forum_id\": 10, \"post_subject\": \"Accept a dataset as input in Roxie Query\", \"username\": \"omnibuzz\", \"post_text\": \"The examples we have about Roxie programming in ECL reference shows accepting a few parameters as input (using the stored workflow service).\\n\\nIs there an example that will show how to accept a complex Dataset as input.. Here is the code that I started to write and where I am stuck..\\n\\n\\nEXPORT DataSetInputForRoxieQuery := FUNCTION\\n ChildRec := RECORD\\n STRING Col1;\\n STRING Col2;\\n STRING Col3;\\n END;\\n \\n ParentRec := RECORD\\n STRING PCol1;\\n STRING PCol2;\\n STRING PCol3;\\n DATASET(ChildRec) ChildDS;\\n END;\\n \\n DATASET(ParentRec) dsInput := ???? :STORED('dsInput')\\n.\\n.\\n.\\nEND\\n
\", \"post_time\": \"2014-11-06 04:21:09\" },\n\t{ \"post_id\": 6606, \"topic_id\": 1514, \"forum_id\": 10, \"post_subject\": \"Re: Nested iterations\", \"username\": \"DSC\", \"post_text\": \"One way to do this is is to create a record structure containing the combined values, then iterate through those values. You didn't mention what the results would be, so I'll go with a simple string concatenation result in this example (warning: I'm typing it straight into this page, so there may be errors):\\n\\nStringLayout := RECORD\\n STRING s;\\nEND;\\n\\nstringData := DATASET([s1, s2, s3, s4, s5], StringLayout);\\n\\nIntegerLayout := RECORD\\n INTEGER i;\\nEND;\\n\\nintegerData := DATASET([i1, i2, i3, i4, i5], IntegerLayout);\\n\\nDataLayout := RECORD\\n StringLayout;\\n IntegerLayout;\\nEND;\\n\\njoinedData := JOIN\\n (\\n stringData,\\n integerData,\\n TRUE, // Every record combination matches\\n TRANSFORM\\n (\\n DataLayout,\\n SELF:=LEFT,\\n SELF:=RIGHT\\n ),\\n ALL);\\n\\nresult := PROJECT\\n (\\n joinedData,\\n TRANSFORM\\n (\\n StringLayout,\\n SELF.s := LEFT.s + ' = ' + (STRING)RIGHT.i\\n )\\n );\\n\\nOUTPUT(result);
\\nHope this helps.\\n\\nDan\", \"post_time\": \"2014-11-18 12:11:03\" },\n\t{ \"post_id\": 6604, \"topic_id\": 1514, \"forum_id\": 10, \"post_subject\": \"Nested iterations\", \"username\": \"biswanath_c\", \"post_text\": \"Hi,\\n\\nI have 5 integer attributes i1, i2, i3, i4, i5 and 5 string attributes s1, s2, s3, s4 and s5.\\n\\nI need to invoke a function f by passing all combinations of the above mentioned attributes. So, that'd be invoking f(i1, s1), f(i1, s2) and so on till f(i1, s5). And again f(i2, s1), f(i2, s2) and so on till all 25 combinations are covered.\\n\\nAm curious as to what's the best way to implement this in ECL?\\n\\nThanks\\nBiswanath\", \"post_time\": \"2014-11-18 11:21:10\" },\n\t{ \"post_id\": 6616, \"topic_id\": 1519, \"forum_id\": 10, \"post_subject\": \"Re: creating layouts dynamically\", \"username\": \"rtaylor\", \"post_text\": \"You can use #EXPORT or #EXPORTXML to generate an ECL RECORD structure.\", \"post_time\": \"2014-11-20 13:46:10\" },\n\t{ \"post_id\": 6615, \"topic_id\": 1519, \"forum_id\": 10, \"post_subject\": \"creating layouts dynamically\", \"username\": \"ceejac\", \"post_text\": \"Hi,\\n\\nI just wanted to know if there is any way to dynamically get the layout of a thor file \\n\\nRegards,\\nCeejac\", \"post_time\": \"2014-11-20 11:19:41\" },\n\t{ \"post_id\": 6691, \"topic_id\": 1525, \"forum_id\": 10, \"post_subject\": \"Re: Passing datasets to function calls\", \"username\": \"rtaylor\", \"post_text\": \"Thank you! \\n\\nOur mission here is to try to provide help to everyone in the community.\\n\\nRichard\", \"post_time\": \"2014-12-15 14:40:20\" },\n\t{ \"post_id\": 6634, \"topic_id\": 1525, \"forum_id\": 10, \"post_subject\": \"Re: Passing datasets to function calls\", \"username\": \"rtaylor\", \"post_text\": \"Biswanath,\\n\\nYes, you can pass dataset patameters to functions in ECL, as discussed here:http://hpccsystems.com/download/docs/ecl-language-reference/html/Passing_DATASET_Parameters.html\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-11-25 16:28:05\" },\n\t{ \"post_id\": 6629, \"topic_id\": 1525, \"forum_id\": 10, \"post_subject\": \"Passing datasets to function calls\", \"username\": \"biswanath_c\", \"post_text\": \"Hi,\\n\\nIs it okay to pass datasets as input parameters to invoke a function call? or will it cause a significant performance overhead?\\n\\n ds := <some processing that generates the dataset>\\nthisisafunctioncall(ds);
\\n\\nThanks\\nBiswanath\", \"post_time\": \"2014-11-24 12:45:02\" },\n\t{ \"post_id\": 6644, \"topic_id\": 1528, \"forum_id\": 10, \"post_subject\": \"Re: Character Search\", \"username\": \"DSC\", \"post_text\": \"Egad. I somehow missed that, and I looked before posting. I cannot let my wife hear about this.\\n\\nStd.Str.Filter() is definitely the way to go.\", \"post_time\": \"2014-12-01 02:13:07\" },\n\t{ \"post_id\": 6643, \"topic_id\": 1528, \"forum_id\": 10, \"post_subject\": \"Re: Character Search\", \"username\": \"dustinskaggs\", \"post_text\": \"It looks like Std.Str.Filter does exactly what you are looking for.\", \"post_time\": \"2014-12-01 01:33:26\" },\n\t{ \"post_id\": 6640, \"topic_id\": 1528, \"forum_id\": 10, \"post_subject\": \"Re: Character Search\", \"username\": \"DSC\", \"post_text\": \"Manipulating strings is one of the places where it makes a lot of sense to use C++ code rather than ECL. Here is one way to do it, with an example call included:\\n\\nSTRING FindContainingChars(STRING source, VARSTRING search) := BEGINC++\\n if (source && source[0] && search && search[0])\\n {\\n __result = reinterpret_cast<char*>(rtlMalloc(lenSource));\\n memset(__result, 0, lenSource);\\n \\n for (unsigned int x = 0; x < lenSource; x++)\\n {\\n if (strchr(search, source[x]) != NULL)\\n {\\n __result[__lenResult++] = source[x];\\n }\\n }\\n }\\nENDC++;\\n\\nsearch := 'aeiou';\\nsource := 'This is a source string';\\n\\nresult := FindContainingChars(source, search);\\n\\nOUTPUT(result);
\\nCheers,\\n\\nDan\", \"post_time\": \"2014-11-29 14:26:44\" },\n\t{ \"post_id\": 6638, \"topic_id\": 1528, \"forum_id\": 10, \"post_subject\": \"Character Search\", \"username\": \"biswanath_c\", \"post_text\": \"Hi,\\n\\nI have a requirement where i need to find all occurrences of the characters in a search string that are contained in a target string.\\n\\nsearch := 'aeiou';\\nsource := 'This is a source string';\\n\\nI need a function that'd return all vowels from the string - in this case, 'iiaouei'.\\n\\nIs there a function available for this?\\n\\nTHanks\", \"post_time\": \"2014-11-28 13:42:19\" },\n\t{ \"post_id\": 6641, \"topic_id\": 1529, \"forum_id\": 10, \"post_subject\": \"Re: Unicode functions\", \"username\": \"rtaylor\", \"post_text\": \"ECL's UNICODE support is documented in the ECL Language Reference. What do you want to know about?\", \"post_time\": \"2014-11-30 15:31:21\" },\n\t{ \"post_id\": 6639, \"topic_id\": 1529, \"forum_id\": 10, \"post_subject\": \"Unicode functions\", \"username\": \"chanbchen\", \"post_text\": \"Is there a separate documentation available for the unicode library API in ECL?\\nThanks\", \"post_time\": \"2014-11-28 14:49:23\" },\n\t{ \"post_id\": 6845, \"topic_id\": 1537, \"forum_id\": 10, \"post_subject\": \"Re: How to pass a dataset to embedded java?\", \"username\": \"ohhteriiii\", \"post_text\": \"I'm trying to pass a dataset to an embedded java function. The java function takes an array of objects as parameter. Besides, the names to the fields of ECL are mapped with the names to fields of java object. ????\\n70-341\", \"post_time\": \"2015-01-24 05:03:27\" },\n\t{ \"post_id\": 6683, \"topic_id\": 1537, \"forum_id\": 10, \"post_subject\": \"Re: How to pass a dataset to embedded java?\", \"username\": \"bforeman\", \"post_text\": \"I believe the EMBED Java capability was first introduced in version 4.2, but there were additions and bug fixes that were released in Version 5.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-12-10 12:43:54\" },\n\t{ \"post_id\": 6680, \"topic_id\": 1537, \"forum_id\": 10, \"post_subject\": \"Re: How to pass a dataset to embedded java?\", \"username\": \"nshi\", \"post_text\": \"Thanks for your reply. I just want to make sure one more thing, is this feature only supported by Hpcc System 5.0 or upper?\", \"post_time\": \"2014-12-10 03:14:12\" },\n\t{ \"post_id\": 6678, \"topic_id\": 1537, \"forum_id\": 10, \"post_subject\": \"Re: How to pass a dataset to embedded java?\", \"username\": \"bforeman\", \"post_text\": \"The information on how to pass a dataset to embedded java can be found in this new post:\\n\\nhttp://hpccsystems.com/bb/viewtopic.php?f=41&t=1509&sid=1aff038c85a3d0513695899acc1997d9\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-12-09 14:20:32\" },\n\t{ \"post_id\": 6675, \"topic_id\": 1537, \"forum_id\": 10, \"post_subject\": \"How to pass a dataset to embedded java?\", \"username\": \"nshi\", \"post_text\": \"Hi HPCC team,\\n\\nI'm trying to pass a dataset to an embedded java function. The java function takes an array of objects as parameter. Besides, the names to the fields of ECL are mapped with the names to fields of java object. \\n\\nHowever, it continues to report an error "error C4187: Type Table not supported for embedded/external scripts".\\n\\nThen I try to execute some code I found in HPCC github.\\nhttps://github.com/hpcc-systems/HPCC-Pl ... stream.ecl\\n\\nI got the same kind of error again.\\n"error C4187: Type row of <unnamed> not supported for embedded/external scripts" \\nDid I miss something? Thanks in advance for help.\", \"post_time\": \"2014-12-08 17:08:51\" },\n\t{ \"post_id\": 6732, \"topic_id\": 1549, \"forum_id\": 10, \"post_subject\": \"Re: Unused attributes\", \"username\": \"chanbchen\", \"post_text\": \"That was helpful. Thanks!\", \"post_time\": \"2014-12-30 15:30:15\" },\n\t{ \"post_id\": 6731, \"topic_id\": 1549, \"forum_id\": 10, \"post_subject\": \"Re: Unused attributes\", \"username\": \"rtaylor\", \"post_text\": \"chanbchen,\\n\\nSince those functions obviously return record sets, then the a2 function should not be used at all if the IF expression is true. \\n\\nBTW, your code would be more efficient this way:\\na1 := function1(p1, p2);\\na2 := function2(p1, p2);\\n\\na3 := if( EXISTS(a1), a1, a2);
The EXISTS function stops counting at "1" while the COUNT function counts all the records before the logical expression can be evaluated.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-12-30 14:04:04\" },\n\t{ \"post_id\": 6727, \"topic_id\": 1549, \"forum_id\": 10, \"post_subject\": \"Unused attributes\", \"username\": \"chanbchen\", \"post_text\": \"Hi,\\n\\nI have a question regarding unused attributes in ECL.\\n\\nsay I have this code:\\n\\n\\n\\na1 := function1(p1, p2);\\na2 := function2(p1, p2);\\n\\na3 := if( count(a1) > 0, a1, a2);\\n\\n
\\n\\nIn the above code, if a1 happens have a few rcords, then a2 would not be used at all (and hence a call to function2 won't be necssary at all); so given this context, will function2 be called at all (if a1 is not empty)? I was wondering if the ECL compiler is designed this way.\\n\\nThanks\", \"post_time\": \"2014-12-29 13:18:41\" },\n\t{ \"post_id\": 6746, \"topic_id\": 1551, \"forum_id\": 10, \"post_subject\": \"Re: Logical files overwrite ...\", \"username\": \"chanbchen\", \"post_text\": \"Thanks Richard!\", \"post_time\": \"2015-01-02 14:23:15\" },\n\t{ \"post_id\": 6745, \"topic_id\": 1551, \"forum_id\": 10, \"post_subject\": \"Re: Logical files overwrite ...\", \"username\": \"rtaylor\", \"post_text\": \"chanbchen,\\n\\nSince you're talking about writing logical files, then you're talking about jobs running on Thor. Thor only runs one workunit at a time, so concurrence should not be a problem. Yes, the OVERWRITE option will cause the second job to overwrite the result of the first. \\n\\nThe only way to have concurrently running jobs on Thor is to have them running in separate instances of Thor (usually on another cluster). If two concurrently running workunits both write to the same logical filename, my guess is that the first would have a lock on that file and the second will either error out or wait. An interesting scenario to test.\\n\\nOf course, the easy way out of this situation is to always use unique logical filenames in your code, and only run a single piece of code on one cluster in the environment and not simultaneously on several.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-12-31 20:19:59\" },\n\t{ \"post_id\": 6737, \"topic_id\": 1551, \"forum_id\": 10, \"post_subject\": \"Logical files overwrite ...\", \"username\": \"chanbchen\", \"post_text\": \"Hi,\\n\\nCan 2 WUs write into the same logical file at the same time if overwrite option is used in the output statement? or will it cause an error?\\n\\nThanks\", \"post_time\": \"2014-12-31 14:22:08\" },\n\t{ \"post_id\": 6747, \"topic_id\": 1552, \"forum_id\": 10, \"post_subject\": \"Resource Temporarily Unavailable\", \"username\": \"chanbchen\", \"post_text\": \"Hi,\\n\\nI have seen this error quite a few times now:\\n\\n\\neclagent 11: System error: 11: Graph[504], SLAVE <ip address>:7800: Resource temporarily unavailable
\\n\\nThe WU that i used handled logical files - it reads from logical files as well as writes to some logical files. The error log in ECL Watch showed only the above line.\\n\\nI have a quesiton here - what "resource" does this error refer to exactly? WU memory? or logical files?\\n\\nThanks\", \"post_time\": \"2015-01-02 14:27:11\" },\n\t{ \"post_id\": 6912, \"topic_id\": 1553, \"forum_id\": 10, \"post_subject\": \"Re: Understanding this behavior\", \"username\": \"john holt\", \"post_text\": \"I think that I have replicated the behavior on my 5.0.2 system. We have a segment fault, and the stack trace says:\\n/usr/lib/python2.7/dist-packages/numpy/core/multiarray.so(+0xca9f8) [0x7f90006959f8]\\n\\nI’ll look some more to see why. Since the point is probably to integrate the ML with the Python Scientific Numeric package, it will prove challenging to find a work around.\\n\\nAre there specific features of the ML package that you want to use in this test?\", \"post_time\": \"2015-02-06 14:38:55\" },\n\t{ \"post_id\": 6756, \"topic_id\": 1553, \"forum_id\": 10, \"post_subject\": \"Understanding this behavior\", \"username\": \"lokesh\", \"post_text\": \"Hi all,\\n\\nI am facing an unusual problem.\\n\\nWhen I run the following code:\\nIMPORT Python;\\nIMPORT ML;\\n\\nMatrixLayout := {unsigned x, unsigned y, real value};\\n\\nINTEGER GetNumberOfProminentFeatures(unsigned numCols) := EMBED(Python)\\n import numpy\\n return numCols + numCols\\nENDEMBED;\\n\\nSigma := DATASET([{1, 1, 0}, { 1, 2, 1}, {1, 3, 4}, {1, 4, 9},\\n {2, 1, 1}, { 2, 2, 2}, {2, 3, 5}, {2, 4, 10},\\n\\t\\t\\t\\t {3, 1, 4}, { 3, 2, 5}, {3, 3, 8}, {3, 4, 13},\\n {4, 1, 9}, { 4, 2, 10}, {4, 3, 13}, {4, 4, 18}],\\n MatrixLayout);\\n\\nMatrixLayout transformdata(MatrixLayout inp) := TRANSFORM\\n SELF.x := inp.x;\\n SELF.y := inp.y;\\n SELF.value := inp.value;\\nEND;\\n\\nUNSIGNED GetPriorityIndex(DATASET(MatrixLayout) Sigma) := FUNCTION\\n Sigma_mat := PROJECT(Sigma, transformdata(LEFT)); \\n numRows := COUNT(Sigma_mat);\\n RETURN GetNumberOfProminentFeatures(numRows); \\nEND; \\n\\n\\nGetPriorityIndex(Sigma);
\\n\\nI get expected results.\\n\\nWhen I make the small change in the above code \\n\\nIMPORT Python;\\nIMPORT ML;\\n\\nMatrixLayout := {unsigned x, unsigned y, real value};\\n\\nINTEGER GetNumberOfProminentFeatures(unsigned numCols) := EMBED(Python)\\n import numpy\\n return numCols + numCols\\nENDEMBED;\\n\\nSigma := DATASET([{1, 1, 0}, { 1, 2, 1}, {1, 3, 4}, {1, 4, 9},\\n {2, 1, 1}, { 2, 2, 2}, {2, 3, 5}, {2, 4, 10},\\n\\t\\t\\t\\t {3, 1, 4}, { 3, 2, 5}, {3, 3, 8}, {3, 4, 13},\\n {4, 1, 9}, { 4, 2, 10}, {4, 3, 13}, {4, 4, 18}],\\n ML.Types.numericfield);\\n\\nMatrixLayout transformdata(ML.Types.numericfield inp) := TRANSFORM\\n SELF.x := inp.id;\\n SELF.y := inp.number;\\n SELF.value := inp.value;\\nEND;\\n\\nUNSIGNED GetPriorityIndex(DATASET(ML.Types.numericfield) Sigma) := FUNCTION\\n Sigma_mat := PROJECT(Sigma, transformdata(LEFT)); \\n numRows := COUNT(Sigma_mat);\\n RETURN GetNumberOfProminentFeatures(numRows); \\nEND; \\n\\n\\nGetPriorityIndex(Sigma);\\n
\\n\\nit works only on the alternate runs.\\n\\nThe problem is also resolved if I comment the import numpy
statement in the second code snippet.\\n\\nThe error is following:\\n-1: System error: -1: Failed to receive reply from thor 10.10.1.114:20000; (-1, Failed to receive reply from thor 10.10.1.114:20000)
\\n\\nThe only changes in the above two snippets is the record definition from MatrixLayout
to ML.Types.NumericField
at three places.\\n\\nI am using HPCC community_5.0.4-1 on Ubuntu 14.04 over ECL Playground.\\n\\nAny thoughts, logs that can help in resolving this.\", \"post_time\": \"2015-01-07 14:49:55\" },\n\t{ \"post_id\": 6777, \"topic_id\": 1557, \"forum_id\": 10, \"post_subject\": \"Re: CSV-Spray\", \"username\": \"NSD\", \"post_text\": \"the funny thing is, ive configured the vm by myself with comm-edition. the tutorials work fine, my code not so fine.. hmm whatever, then i will code onto 1 device. maybe i'll get it to work if it really starts to annoying me.\\n\\ni'll report then.\\n\\nthx for your support!\", \"post_time\": \"2015-01-09 19:22:36\" },\n\t{ \"post_id\": 6776, \"topic_id\": 1557, \"forum_id\": 10, \"post_subject\": \"Re: CSV-Spray\", \"username\": \"rtaylor\", \"post_text\": \"NSD,\\n\\nIdeas? Yes. \\n\\nThe VM is designed to be a POC/R&D/"ECL playtoy" type of tool. Therefore, my experience with it has always been with the VM and IDE on the same physical box. It is designed to provide an easy way to teach the syntax and concepts of ECL/HPCC so that you can learn before you spend $$ to create real multi-node clusters to do real-world work.\\n\\nTherefore, I have no idea why you have the problem you do, nor how to solve it.
\\n\\nIf you really want to play with multiple workstations querying a single HPCC environment, then my suggestion is to download the Community Edition here:http://hpccsystems.com/download/free-community-edition/server-platform and configure a 1-node cluster that all your multiple workstations can connect to. Once you have that up and running, it will be relatively simple to expand to multiple-node clusters for real "massively parallel processing" power.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-01-09 18:59:50\" },\n\t{ \"post_id\": 6775, \"topic_id\": 1557, \"forum_id\": 10, \"post_subject\": \"Re: CSV-Spray\", \"username\": \"NSD\", \"post_text\": \"thank you, it worked when i do the OUTPUT of that dataset in another file. this confuses me, because\\n\\n
IMPORT Tutorial;\\nEXPORT Dataset_OriginalPerson :=\\nDATASET('~tutorial::MT::OriginalPerson',Tutorial.Class_People,THOR);
\\n\\nworked.\\n\\nanyway, there was another problem. heres my peripherie:\\n\\n- Computer 1 with Windows7 (physical)\\n- Computer 2 with Windows7 (physical)\\n- Computer 3 with Lubuntu (virtual) HPCC-SERVER\\n\\nComputer3 is a VM on Computer2. If i send the discussed ECL-Query from Computer1 to 3, i get the error\\nSystem error: 0: Workunit was compiled for eclagent interface version 0, this eclagent requires version 155..155
\\n\\nBut, if i send the Query from Computer2, all works. C1 and C2 are able to communicate with each other. I've to connect C2-->C3 in the same way as C1-->C3 and both got the same ECL IDE Version.\\n\\nIdeas? \", \"post_time\": \"2015-01-09 17:03:50\" },\n\t{ \"post_id\": 6772, \"topic_id\": 1557, \"forum_id\": 10, \"post_subject\": \"Re: CSV-Spray\", \"username\": \"rtaylor\", \"post_text\": \"NSD,
it was sprayed to the Cluster, but with 0 Records (Record field is empty).
The empty Records field in ECL Watch is expected on a CSV file after spraying. The reason is that "spray" is a "dumb" operation -- its mission is to get the data onto the cluster as fast as possible, and its only requirement is to ensure that a single record never spans multiple nodes. Therefore, for CSV (a variable-length record format), spray does not bother to count how many records there are. A fixed-length spray does populate that Records field only because it simply does the math (it does NOT count the records).\\n\\nOnce you've done the spray, your code just needs one more thing:IMPORT StammDaten;\\n\\nCSVRecord := StammDaten.Class_Patient;\\nfile := DATASET('StammDaten.csv', \\n CSVrecord,\\n CSV(HEADING(1),\\n SEPARATOR(','),\\n TERMINATOR(['\\\\n','\\\\r\\\\n','\\\\n\\\\r'])));\\nOUTPUT(file);
OUTPUT is the action that produces a "query" and eliminates your warning "Query is empty"\\n\\nThese basics are all covered in the free online eLearning courses available here: http://learn.lexisnexis.com/hpcc\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-01-09 15:04:38\" },\n\t{ \"post_id\": 6771, \"topic_id\": 1557, \"forum_id\": 10, \"post_subject\": \"CSV-Spray\", \"username\": \"NSD\", \"post_text\": \"Hi,\\n\\nso, I've go a new Problem, which I couldn't fix. Now it's spraying a CSV via EVL Watch or ECL IDE. I've tried two methods:\\n\\n1. Spray via ECL Watch:\\n===========================\\n - Files > Landing Zones > Checked the Box of a single CSV-File > Spay: Delimited\\n - Group: mythor\\n - name Prefix: mt::csv\\n - Target Name: StammDaten\\n - Format: ASCII (tried UTF-8 also)\\n - Max Record Length: 8192 \\n - Seperators: ; (tried \\\\; also)\\n - Line-Terminators: \\\\n, \\\\r\\\\n (in all combinations)\\n - Checked [Overwrite] and [Record Structure Present] (got 1 Headline)\\n\\nit seems, that the Software wont take my Options during the spay, all Options were set to default (e.g. the delimiter/seperator). How to handle this? I've read, that this is a Bug.\\n\\nAnyway, it was sprayed to the Cluster, but with 0 Records (Record field is empty). So there was no chance to get the Data in a Dataset with ECL or anything else (size 372385 is not a multiple of 123 after a system restart i got this error: System error: 0: Workunit was compiled for eclagent interface version 0, this eclagent requires version 155..155).\\n\\n\\n2. Read vie ECL IDE:\\n===========================\\nClass_Patient\\nEXPORT Class_Patient := \\n\\tRECORD\\n\\t\\tUNSIGNED1 \\tVar1;\\n\\t\\tUNSIGNED1 \\tVar2;\\n\\t\\tSTRING1\\t\\tVar3;\\n\\t\\tSTRING21\\tVar4;\\n//.... more vars\\n\\n\\tEND;
\\n\\nSTRG+N\\nIMPORT StammDaten;\\n\\nCSVRecord := StammDaten.Class_Patient;\\nfile := DATASET('StammDaten.csv', \\n CSVrecord,\\n CSV(HEADING(1),\\n SEPARATOR(','),\\n TERMINATOR(['\\\\n','\\\\r\\\\n','\\\\n\\\\r'])));\\n // tried //192.168.178.20/var/lib/HPCCSystems/mydropzone/StammDaten.csv also in various combinations\\n
\\nMy Warning is now: Warning: (1,0): error C0003: Query is empty\\n\\nIf you need CSV-sample-data please let me know. I really don't know what to do more..\\n\\nThx!\", \"post_time\": \"2015-01-09 12:31:14\" },\n\t{ \"post_id\": 6774, \"topic_id\": 1558, \"forum_id\": 10, \"post_subject\": \"Re: Some large ROXIE query params not interpreted correctly\", \"username\": \"rtaylor\", \"post_text\": \"kpolicano,\\n\\nOK, I have duplicated this issue -- on both hThor and Roxie. If the query is published and then your test data is submitted, then in both cases the second number is changed. Also, in both cluster types, if you simply run the job from the IDE then the second number is NOT changed. Therefore, the issue has something to do with the publishing process and/or the SOAP/JSON interface to the query. This may or may not be a bug -- please report it in JIRA if you feel that it is.\\n\\nHOWEVER, you said:The dataset contains large numbers that are sent as strings but need to be interpreted as unsigned integers.
but your code defines the bigNum field as an UNSIGNED8 and not a STRING. \\n\\nModifying your code like this makes it all work correctly in all circumstances:input_ds := dataset([{'733743134735141420', 0}, \\n {'9976372036854875806', 1}], \\n {STRING20 bigNum, unsigned1 smallNum}) \\n : stored('input_ds');\\n\\t\\t\\t\\t\\nRECORDOF(input_ds) XF(input_ds L) := TRANSFORM\\n UNSIGNED8 InVal := (UNSIGNED8)L.bigNum; //cast to UNSIGNED8\\n UNSIGNED8 OutVal := InVal-1; //do something with it\\n SELF.bigNum := (STRING20)OutVal; //cast back to string\\n SELF := L;\\nEND;\\t\\t\\t\\t\\nout_ds := PROJECT(input_ds,XF(LEFT));\\t\\t\\t\\t \\nOUTPUT(out_ds);
Note that the DATASET now defines the bigNum field as a STRING20 -- which is what the data actually is. The PROJECT transformation I added demonstrates how that STRING20 data needs to be handled to work with it as an UNSIGNED8 within the rest of your code.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-01-09 16:38:24\" },\n\t{ \"post_id\": 6773, \"topic_id\": 1558, \"forum_id\": 10, \"post_subject\": \"Some large ROXIE query params not interpreted correctly\", \"username\": \"kpolicano\", \"post_text\": \"I have a requirement to send a dataset as input to a ROXIE query. The dataset contains large numbers that are sent as strings but need to be interpreted as unsigned integers.\\n\\nHere's an example query which simply spits out the input dataset:\\n\\n\\ninput_ds := dataset([{'733743134735141420', 0}, \\n {'9976372036854875806', 1}], \\n {unsigned8 bigNum, unsigned1 smallNum}) \\n : stored('input_ds');\\n\\noutput(input_ds);\\n
\\n\\nWhen I run this on HTHOR, it works correctly:\\n\\n\\nbignum smallnum\\n733743134735141420 0\\n9976372036854875806 1\\n
\\n\\n\\nHowever, when I publish this query to ROXIE, there are cases where the outputs do not match the inputs. Here is an example JSON request:\\n\\nRequest:\\n\\n{\\n "testquery": {\\n "input_ds": {\\n "Row": [\\n {\\n "bignum": "10863752816033224386",\\n "smallnum": 1\\n }\\n ]\\n }\\n }\\n}\\n
\\n\\nResponse:\\n\\n{\\n "testqueryResponse": {\\n "sequence": 0,\\n "Results": {\\n "result_1": {\\n "Row": [\\n {\\n "bignum": 9223372036854776000,\\n "smallnum": 1\\n }\\n ]\\n }\\n }\\n }\\n}\\n
\\n\\nIt seems like this happens with all "bignum" inputs larger than 2^63, even though "unsigned8" should be able to hold up to 2^64 - 1. Is there any way to avoid this?\", \"post_time\": \"2015-01-09 15:32:18\" },\n\t{ \"post_id\": 6797, \"topic_id\": 1563, \"forum_id\": 10, \"post_subject\": \"Re: Aggregates on sub-sets of data in a cross-tab report\", \"username\": \"Allan\", \"post_text\": \"Thanks very much Richard\\n\\nAll the best\\n\\nAllan\", \"post_time\": \"2015-01-16 07:15:48\" },\n\t{ \"post_id\": 6792, \"topic_id\": 1563, \"forum_id\": 10, \"post_subject\": \"Re: Aggregates on sub-sets of data in a cross-tab report\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nFrankly, I'm surprised it syntax checks. \\n
REAL AverageTRUE := AVE(GROUP,d(condition).Amo); // <== WRONG\\n
Your filter is being ignored (as you discovered), because you're putting it in a field qualification syntax, which, even if it worked, would only change the sum of the values to average and not the count. To properly do the average, that filter would somehow have to apply to both the field values and the count of records where that condition is TRUE. The COUNT function has an optional parameter to do this, but the AVE function does not.\\n\\nSo you need to break it down into the component parts and do the calculation yourself, like this:REAL AverageTRUE := SUM(GROUP,IF(d.condition=TRUE,d.Amo,0)) /\\n COUNT(GROUP,d.condition);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-01-15 16:07:58\" },\n\t{ \"post_id\": 6791, \"topic_id\": 1563, \"forum_id\": 10, \"post_subject\": \"Aggregates on sub-sets of data in a cross-tab report\", \"username\": \"Allan\", \"post_text\": \"Hi,\\nA report, grouped by an account number, needs various stats out of said group filtered on some condition within the group. e.g. average transaction times for transactions < 1 second being distinct from those transactions taking longer than 1 sec.\\nIn the code extract below the item high-lighted is not the required result, any ideas to achieve what's required?\\n\\nRec := RECORD\\n STRING1 Acc;\\n BOOLEAN Condition;\\n UNSIGNED Amo;\\nEND;\\n\\nd := DATASET([{'A',TRUE,1},\\n {'A',TRUE,2},\\n {'A',TRUE,3},\\n {'A',FALSE,4},\\n {'A',FALSE,5},\\n {'A',FALSE,6},\\n {'B',TRUE,8},\\n {'B',TRUE,8},\\n {'B',TRUE,8},\\n {'B',TRUE,8},\\n {'B',FALSE,1},\\n {'B',FALSE,1}],Rec);\\nTRec := RECORD\\n STRING1 Account := d.Acc;\\n UNSIGNED TotalAll := COUNT(GROUP);\\n UNSIGNED TotalTRUE := COUNT(GROUP,d.condition);\\n REAL AverageAll := AVE(GROUP,d.Amo);\\n REAL AverageTRUE := AVE(GROUP,d(condition).Amo); // <== WRONG\\nEND;\\n\\nTABLE(d,TRec,Acc);\\n
\\n\\nYours\\n\\nAllan\", \"post_time\": \"2015-01-15 09:59:32\" },\n\t{ \"post_id\": 6813, \"topic_id\": 1565, \"forum_id\": 10, \"post_subject\": \"Re: DISTRIBUTE and LOCAL\", \"username\": \"rtaylor\", \"post_text\": \"BTDT \", \"post_time\": \"2015-01-20 16:31:13\" },\n\t{ \"post_id\": 6810, \"topic_id\": 1565, \"forum_id\": 10, \"post_subject\": \"Re: DISTRIBUTE and LOCAL\", \"username\": \"janet.anderson\", \"post_text\": \"Wow, I feel exceptionally embarrassed. Thank you for your help.\", \"post_time\": \"2015-01-20 14:24:31\" },\n\t{ \"post_id\": 6801, \"topic_id\": 1565, \"forum_id\": 10, \"post_subject\": \"Re: DISTRIBUTE and LOCAL\", \"username\": \"rtaylor\", \"post_text\": \"Janet,\\n\\nYou are getting different results because the DISTRIBUTE is not being used by your TABLE functions.\\n\\nTry it this way:
IMPORT stepna.Lytx as L;\\n\\nt := distribute(L.trips, hash32(TelematicsDeviceId));\\n\\n/* unique trips w trackpoints */\\ntpd := table(t, {TelematicsDeviceId, tripid}, TelematicsDeviceId, tripid, local);\\ntpd2 := table(t, {TelematicsDeviceId, tripid}, TelematicsDeviceId, tripid);\\n\\ntpd;\\ntpd2;\\n\\ncount(tpd);\\ncount(tpd2);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-01-16 18:52:45\" },\n\t{ \"post_id\": 6800, \"topic_id\": 1565, \"forum_id\": 10, \"post_subject\": \"DISTRIBUTE and LOCAL\", \"username\": \"janet.anderson\", \"post_text\": \"If I DISTRIBUTE on HASH32(field A), and then do a TABLE that groups by fields A and B, why do I get different counts depending on whether I use LOCAL or not? I found another post that implies this should not be the case (i.e., I should not have to DISTRIBUTE on all join conditions, so why all group conditions?):\\nhttp://hpccsystems.com/bb/viewtopic.php?f=10&t=1289&p=5633&hilit=distribute&sid=d66114161b9ac42b7abe5ddfeb67394e&sid=0c0618083fcf0a68df4cba8cb9d864dd#p5633\\n\\n\\nIMPORT stepna.Lytx as L;\\n\\nt := distribute(L.trips, hash32(TelematicsDeviceId));\\n\\n/* unique trips w trackpoints */\\ntpd := table(L.trackpoints, {TelematicsDeviceId, tripid}, TelematicsDeviceId, tripid, local);\\ntpd2 := table(L.trackpoints, {TelematicsDeviceId, tripid}, TelematicsDeviceId, tripid);\\n\\ntpd;\\ntpd2;\\n\\ncount(tpd);\\ncount(tpd2);\\n
\", \"post_time\": \"2015-01-16 17:16:18\" },\n\t{ \"post_id\": 6947, \"topic_id\": 1570, \"forum_id\": 10, \"post_subject\": \"Re: Problem with XML Parsing\", \"username\": \"itnasasta\", \"post_text\": \"It will probably work with the prefixes, but may lead to ambiguity problems. Since the DATASET specifies /student_rec/student parsing already, the references in the record set should be relative to that. I was discussing this with Bob and others yesterday and afaics it is actually working.. The problem is not with the query or the execution, but with the result display in the IDE or EclWatch.???\\n\\n___________\\nGet mcse books for test king and braindumps.com - ccna course fees success guaranteed. Our high qualitycallutheran.edu you well before Stanford University of selftestengine gmat.\", \"post_time\": \"2015-02-12 06:53:40\" },\n\t{ \"post_id\": 6834, \"topic_id\": 1570, \"forum_id\": 10, \"post_subject\": \"Re: Problem with XML Parsing\", \"username\": \"joecella\", \"post_text\": \"It may also help the "view-ability" to output as:\\n\\noutput(studentXml,noxpath);\", \"post_time\": \"2015-01-22 20:34:21\" },\n\t{ \"post_id\": 6832, \"topic_id\": 1570, \"forum_id\": 10, \"post_subject\": \"Re: Problem with XML Parsing\", \"username\": \"bforeman\", \"post_text\": \"Jerry and Jake,\\n\\nI just discovered that the results display just fine in the Legacy ECL Watch.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-01-22 17:43:23\" },\n\t{ \"post_id\": 6831, \"topic_id\": 1570, \"forum_id\": 10, \"post_subject\": \"Re: Problem with XML Parsing\", \"username\": \"Jerry\", \"post_text\": \"Hi Jake,\\n\\nThanks a lot.\\nAs you have mentioned, the correct output is present in the downloaded file.\\nI will also keep in mind your suggestion about prefixes.\\n\\nRegards,\\nJerry\", \"post_time\": \"2015-01-22 14:21:16\" },\n\t{ \"post_id\": 6830, \"topic_id\": 1570, \"forum_id\": 10, \"post_subject\": \"Re: Problem with XML Parsing\", \"username\": \"jsmith\", \"post_text\": \"Hi,\\n\\none thing I notice, you should not need the '/school_rec/student/' in your LayoutStudent record definition, i.e.\\n\\nLayoutStudent := RECORD\\n STRING name {XPATH('name')};\\n DATASET (LayoutManyID) cluster {XPATH('Cluster')};\\nEND;\\n
\\n\\n.. should suffice.\\n\\nIt will probably work with the prefixes, but may lead to ambiguity problems. Since the DATASET specifies /student_rec/student parsing already, the references in the record set should be relative to that.\\n\\nI was discussing this with Bob and others yesterday and afaics it is actually working.. The problem is not with the query or the execution, but with the result display in the IDE or EclWatch.\\n\\nIf in EclWatch you look at the Output/Result and download the zipped raw(xml) formatted version, you should be able to see the correct result.\\n\\nI will add some comments to the JIRA.\\n\\nHope that helps.\", \"post_time\": \"2015-01-22 13:12:04\" },\n\t{ \"post_id\": 6829, \"topic_id\": 1570, \"forum_id\": 10, \"post_subject\": \"Re: Problem with XML Parsing\", \"username\": \"Jerry\", \"post_text\": \"Thank you Bob.\\nI have raised https://track.hpccsystems.com/browse/HPCC-12894 to track this issue.\", \"post_time\": \"2015-01-22 08:05:15\" },\n\t{ \"post_id\": 6822, \"topic_id\": 1570, \"forum_id\": 10, \"post_subject\": \"Re: Problem with XML Parsing\", \"username\": \"bforeman\", \"post_text\": \"Hi Jerry,\\n\\nI can see no flaws in your logic or code. Can you please open a JIRA issue and include a sample of your code and XML file?\\n\\nhttps://track.hpccsystems.com/secure/Dashboard.jspa\\n\\nAlso, be sure to mention what HPCC version you are using!\\n\\nThank you!\\n\\nBob\", \"post_time\": \"2015-01-21 18:29:16\" },\n\t{ \"post_id\": 6818, \"topic_id\": 1570, \"forum_id\": 10, \"post_subject\": \"Problem with XML Parsing\", \"username\": \"Jerry\", \"post_text\": \"Hello Folks,\\n\\nI am facing a small problem with XML Parsing.\\nI am trying to parse the following record.\\n<school_rec>\\n <student std="6" gender="M">\\n <name>John</name>\\n <Cluster>\\n <id id_type="roll_number">\\n <mark>1</mark>\\n </id>\\n </Cluster>\\n </student>\\n <student std="9" gender="F">\\n <name>Rose</name>\\n <Cluster>\\n <id id_type="roll_number">\\n <mark>3</mark>\\n <mark>4</mark>\\n </id>\\n <id id_type="class_number">\\n <mark>3</mark>\\n <mark>4</mark>\\n </id>\\n </Cluster>\\n </student>\\n <student std="6" gender="M">\\n <name>Kennedy</name>\\n <Cluster>\\n <id id_type="roll_number">\\n <mark>5</mark>\\n <mark>6</mark>\\n <mark>7</mark>\\n </id>\\n <id id_type="class_number">\\n <mark>5</mark>\\n <mark>6</mark>\\n <mark>7</mark>\\n </id>\\n </Cluster>\\n </student>\\n</school_rec>
\\n\\nI have sprayed the file as XML with student as the Row Tag.\\nFollowing is how I am trying to read the file.\\nLayoutMark := RECORD\\n String mark {XPATH('')};\\nEND;\\n\\nLayoutID := RECORD\\n String id_type {XPATH('@id_type')};\\n\\tDATASET (LayoutMark) marks {XPATH('mark')};\\nEND;\\n\\nLayoutManyID := RECORD\\n DATASET (LayoutID) id {XPATH('id')};\\nEND;\\n\\nLayoutStudent := RECORD\\n STRING name {XPATH('/school_rec/student/name')};\\n DATASET (LayoutManyID) cluster {XPATH('/school_rec/student/Cluster')};\\nEND;\\n\\nstudentXml := DATASET('~hpcc::xml::xmlTest',LayoutStudent,XML('school_rec/student'));\\nstudentXml;
\\nThe problem I am facing is that I am unable to read /school_rec/student/Cluster/id/@id_type. What is confusing me is that I am able to read /school_rec/student/Cluster/id/mark but not the attribute.\\nWould some one please tell what I am doing wrong while reading the attribute?\\nThanks in advance.\\n\\nRegards,\\nJerry\", \"post_time\": \"2015-01-21 07:51:39\" },\n\t{ \"post_id\": 6858, \"topic_id\": 1575, \"forum_id\": 10, \"post_subject\": \"Re: How to keep small index and files in memory in roxie\", \"username\": \"rtaylor\", \"post_text\": \"Kevin,\\n\\nOn Roxie, PRELOAD is only going to work the way you expect it to on published queries. That means that using the ECL IDE, selecting Roxie as the target and clicking the Submit button will give you the behavior you have already seen. \\n\\nAlso, PRELOAD only operates on datasets, not indexes. Index Btrees are always cached in memory on Roxie. There is a global RoxieTopology option to preload indexes, but it’s not a good idea to use it unless you have a tremendous amount of RAM. Our documentation will be updated to reflect this (new to me) information. \\n\\nInstead, you should select Roxie as the target, click on the down arrow portion of the Submit button and then click on Compile. Once the workunit is compiled, open its ECL Watch page and click on the Publish button. Once the query is published, then you can run it from the WsECL page (same IP as your ECL Watch page but port 8002 instead of 8010). \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-01-26 18:46:52\" },\n\t{ \"post_id\": 6853, \"topic_id\": 1575, \"forum_id\": 10, \"post_subject\": \"Re: How to keep small index and files in memory in roxie\", \"username\": \"kevinLv\", \"post_text\": \"Hi Richard,\\n I have tried that function and option, when I submit workunit to roxie in ECL IDE, all preload dataset and index are loaded to memory; but once this workunit finished, all dataset and index are released from memory automatically.\\n public this workunit as roxie query doesn't preload any file either. It seems preload option just works when we submit query work unit in ECL IDE.\\nBR,\\nKevin Lv\", \"post_time\": \"2015-01-26 05:16:06\" },\n\t{ \"post_id\": 6844, \"topic_id\": 1575, \"forum_id\": 10, \"post_subject\": \"Re: How to keep small index and files in memory in roxie\", \"username\": \"rtaylor\", \"post_text\": \"Kevin,\\n\\nTake a look at the PRELOAD function docs for an example if how to use that function. Note that it also references the PRELOAD option that you can put on a DATASET or INDEX definition to accomplish the same purpose.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-01-23 17:47:00\" },\n\t{ \"post_id\": 6840, \"topic_id\": 1575, \"forum_id\": 10, \"post_subject\": \"How to keep small index and files in memory in roxie\", \"username\": \"kevinLv\", \"post_text\": \"Hi, \\n I have a complex query which include 16 index or dataset reading, this causes to many random disk seeking, so this query's performance is very poor. After check the file size, I found most of these files are small or middle size, I think preload all of them into memory can avoid unnecessary disk seeking, but I have no ideal about how to do it, could you please give a guide or some code block, thanks.\\nBR,\\nKevin Lv\", \"post_time\": \"2015-01-23 06:43:34\" },\n\t{ \"post_id\": 6873, \"topic_id\": 1584, \"forum_id\": 10, \"post_subject\": \"Re: Conditional Recordset for CrossTab/Table report\", \"username\": \"rtaylor\", \"post_text\": \"Ben,\\n\\nSince you're using #EXPAND, I have to assume that this is not a FUNCTION but a FUNCTIONMACRO. Therefore you should be looking at #IF.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-01-28 19:59:57\" },\n\t{ \"post_id\": 6871, \"topic_id\": 1584, \"forum_id\": 10, \"post_subject\": \"Conditional Recordset for CrossTab/Table report\", \"username\": \"benhastings\", \"post_text\": \"I’m trying to get a count of entries based on a variable that is passed in upon execution. The table with the main structure might be called ‘timers’. I want to get a count that will either include or exclude the fields for region and/or country. I am, however, getting an error using the following code (ifblocks not supported in grouped aggregates). Any ideas on how to proceed?\\n\\n\\n\\ngeoLevel='global'; // or 'region' or 'country'\\n\\n summaryList:=MAP(\\n geoLevel='region'=>'CPC, page,zone,obshr',\\n geoLevel='country'=>'CPC, page,zone,country,obshr',\\n 'CPC, page,obshr'\\n );\\n\\nmainRec:=RECORD\\n\\tstring cpc;\\n\\tstring page;\\n\\tstring region;\\n\\tstring country;\\n\\tinteger hour;\\n\\treal metricVal;\\nEND;\\n\\ntimers := DATASET(<my input file>,mainRec);\\n\\nvariableRec := RECORD\\n\\ttimers.CPC;\\n\\ttimers.page;\\n\\tSTRING timer :='vol';\\n\\tIFBLOCK(geoLevel='region' OR geoLevel='country')\\n\\t timers.zone;\\n\\tEND;\\n\\tIFBLOCK(geoLevel='country')\\n\\t timers.country;\\n\\tEND;\\n\\ttimers.OBSHR;\\n\\treal metricVal := COUNT(GROUP);\\t\\t\\nEND;\\n\\nsummaryTable := TABLE(timers, variableRec, #EXPAND(summaryList)); \\n
\\n\\nHowever, ECL complains that “IFBLOCKS are not supported inside grouped aggregates.”\", \"post_time\": \"2015-01-28 19:19:19\" },\n\t{ \"post_id\": 7013, \"topic_id\": 1585, \"forum_id\": 10, \"post_subject\": \"Re: Inverted Index\", \"username\": \"bforeman\", \"post_text\": \"This error is clear.\\n\\nYou simply have not built the index that the query requires.\\n\\nkjv.File_KJV.Bld
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-02-24 02:32:05\" },\n\t{ \"post_id\": 7012, \"topic_id\": 1585, \"forum_id\": 10, \"post_subject\": \"Re: Inverted Index\", \"username\": \"swethareddy01\", \"post_text\": \"Hai Bob,\\n\\nThank you for your help.\\nI created new BWR and executing queries, while trying to execute below query,\\n\\nIMPORT kjv;\\n\\nk := kjv.Inversion.key;\\n\\ns := kjv.Inversion.Search('LOVE OF GOD');\\nkjv.File_KJV.GetBatch(s);
\\n\\nI got error as\\nError: System error: 10001: Graph[7], Missing logical file key::kjv_txt\\n\\nKindly explain me why are we using key text files and how to resolve this error.?\\n\\nThank you.\\n\\nSwetha\", \"post_time\": \"2015-02-24 01:02:57\" },\n\t{ \"post_id\": 6977, \"topic_id\": 1585, \"forum_id\": 10, \"post_subject\": \"Re: Inverted Index\", \"username\": \"bforeman\", \"post_text\": \"Create a new BWR file in the KJV folder. I named mine BWR_TestInversion.\\n\\nThen, two lines of code:\\n\\nIMPORT KJV;\\nKJV.Inversion.Records;
\\n\\nYes, you are probably adding your code in a spot that is causing recursion. If the goal of your code is to just view records, you can call it externally as it is exported from the Inversion module.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-02-18 13:29:40\" },\n\t{ \"post_id\": 6974, \"topic_id\": 1585, \"forum_id\": 10, \"post_subject\": \"Re: Inverted Index\", \"username\": \"swethareddy01\", \"post_text\": \"Hai Bob,\\n\\nNow I can able to execute without any errors, But if I add any syntax in the middle of the code, its throwing some errors.\\n\\nI added \\nrec := kjv.Inversion.records;\\nrec
\\n\\nerrors are \\n\\n\\nError: Definition of Inversion contains a recursive dependency (0, 0 - kjv.Inversion)\\nError: syntax error near "." : expected ';' (25, 21 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\kjv\\\\Inversion.ecl)\\nError: Unknown identifier "Key" (29, 26 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\kjv\\\\Inversion.ecl)\\nError: Unknown identifier "Key" (39, 50 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\kjv\\\\Inversion.ecl)\\nError: syntax error near "END" : expected APPLY, BIG_ENDIAN, BUILD, DISTRIBUTION, expression, EXPORT, FEATURE, IMPORT, KEYDIFF, KEYPATCH, LITTLE_ENDIAN, LOADXML, NOTIFY, OUTPUT, PACKED, PARALLEL, PATTERN, __PLATFORM__, RANGE, RECORD, RETURN, ROWSET, RULE, SET, SHARED, type-name, SUCCESS, TOKEN, TRANSFORM, TYPEOF, UNSIGNED, UPDATE, WAIT, WILD, <, datarow, identifier, action, constant, #CONSTANT, #OPTION, #WORKUNIT, #STORED, #LINK, #ONWARNING, '^', ';', '$' (47, 2 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\kjv\\\\Inversion.ecl)\\nError: Unknown identifier "GraphBody" (49, 35 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\kjv\\\\Inversion.ecl)\\nError: syntax error near "G" : expected RANGE, ROWSET, SELF, SUCCESS, datarow, dataset, dictionary, module-name, identifier, identifier, function-name, identifier, macro-name, '^', '(', '[' (50, 28 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\kjv\\\\Inversion.ecl)\\nError: syntax error near "END" : expected APPLY, BIG_ENDIAN, BUILD, DISTRIBUTION, expression, EXPORT, FEATURE, IMPORT, KEYDIFF, KEYPATCH, LITTLE_ENDIAN, LOADXML, NOTIFY, OUTPUT, PACKED, PARALLEL, PATTERN, __PLATFORM__, RANGE, RECORD, RETURN, ROWSET, RULE, SET, SHARED, type-name, SUCCESS, TOKEN, TRANSFORM, TYPEOF, UNSIGNED, UPDATE, WAIT, WILD, <, datarow, identifier, action, constant, #CONSTANT, #OPTION, #WORKUNIT, #STORED, #LINK, #ONWARNING, '^', ';', '$' (53, 2 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\kjv\\\\Inversion.ecl)\\nError: syntax error near "END" : expected APPLY, BIG_ENDIAN, BUILD, DISTRIBUTION, expression, EXPORT, FEATURE, IMPORT, KEYDIFF, KEYPATCH, LITTLE_ENDIAN, LOADXML, NOTIFY, OUTPUT, PACKED, PARALLEL, PATTERN, __PLATFORM__, RANGE, RECORD, RETURN, ROWSET, RULE, SET, SHARED, type-name, SUCCESS, TOKEN, TRANSFORM, TYPEOF, UNSIGNED, UPDATE, WAIT, WILD, <, datarow, identifier, action, constant, #CONSTANT, #OPTION, #WORKUNIT, #STORED, #LINK, #ONWARNING, '^', ';', '$' (54, 3 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\kjv\\\\Inversion.ecl)\\nError: syntax error : expected APPLY, BIG_ENDIAN, BUILD, DISTRIBUTION, expression, EXPORT, FEATURE, IMPORT, KEYDIFF, KEYPATCH, LITTLE_ENDIAN, LOADXML, NOTIFY, OUTPUT, PACKED, PARALLEL, PATTERN, __PLATFORM__, RANGE, RECORD, RETURN, ROWSET, RULE, SET, SHARED, type-name, SUCCESS, TOKEN, TRANSFORM, TYPEOF, UNSIGNED, UPDATE, WAIT, WILD, <, datarow, identifier, action, constant, #CONSTANT, #OPTION, #WORKUNIT, #STORED, #LINK, #ONWARNING, '^', ';', '$' (55, 2 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\kjv\\\\Inversion.ecl)\", \"post_time\": \"2015-02-17 22:49:51\" },\n\t{ \"post_id\": 6973, \"topic_id\": 1585, \"forum_id\": 10, \"post_subject\": \"Re: Inverted Index\", \"username\": \"bforeman\", \"post_text\": \"Where is the END for the MODULE structure? \\n\\nIt looks like you have a bit of code missing from the tutorial.\\n\\nBob\\n\\nIMPORT * FROM Std.Str;\\n\\nEXPORT File_KJV := MODULE\\n// Note - this will NOT work well in a distributed system as it requires iterates and rollups\\n// that stream from one node to the next.\\n// Of course - this file processes in almost no-time on a laptop - so multi-node should not be an issue\\nR := RECORD\\n STRING Txt;\\n END;\\n\\t\\nd := DATASET('~tutorialkjv::bmf',R,CSV(SEPARATOR('')));\\n\\nR TextOntoReference(R le, R ri) := TRANSFORM\\n SELF.Txt := le.Txt + ' ' + ri.Txt;\\n END;\\t\\n\\nRld := ROLLUP(d,LEFT.Txt[1]='$' AND RIGHT.Txt[1]<>'$',TextOntoReference(LEFT,RIGHT))\\n :PERSIST('~KJV::BMF::ROLLUP');\\n\\nUNSIGNED GetChapter(STRING S) := (UNSIGNED)S[1..Find(S,':',1)-1];\\nUNSIGNED GetVerse(STRING S) := (UNSIGNED)S[Find(S,':',1)+1..];\\n\\nR2 := RECORD\\n \\tSTRING5 Book := GetNthWord(Rld.Txt,2);\\n\\tUNSIGNED1 Booknum := 0;\\n\\tUNSIGNED1 Chapter := GetChapter(GetNthWord(Rld.Txt,3));\\n\\tUNSIGNED1 Verse := GetVerse(GetNthWord(Rld.Txt,3));\\n\\tSTRING Verse_Text := Rld.Txt[Find(Rld.Txt,' ',3)+1..];\\n END;\\n\\t\\nt := TABLE(Rld,R2);\\nR2 NoteBookNum(R2 le, R2 ri) := TRANSFORM\\n SELF.Booknum := IF( le.book=ri.book, le.booknum, le.booknum+1 );\\n SELF := ri;\\n END;\\n\\nEXPORT Txt := ITERATE(t,NoteBookNum(LEFT,RIGHT));\\nEXPORT Key := INDEX(Txt,{BookNum,Chapter,Verse},{Verse_Text},'~key::kjv_txt');\\nEXPORT Bld := BUILDINDEX(Key,OVERWRITE);\\n\\nEXPORT Layout_Reference := RECORD\\n UNSIGNED1 BookNum;\\n\\tUNSIGNED1 Chapter;\\n\\tUNSIGNED1 Verse;\\n END;\\n\\nEXPORT Get(UNSIGNED1 pBookNum,UNSIGNED1 pChapter, UNSIGNED1 pVerse) := FUNCTION\\n\\tRETURN Key(BookNum=pBookNum,pChapter=0 OR pChapter=Chapter,pVerse=0 OR pVerse=Verse);\\n END;\\n\\t\\nEXPORT GetBatch(DATASET(Layout_Reference) refs) := FUNCTION\\n\\tRETURN JOIN(refs,Key,LEFT.BookNum=RIGHT.BookNum AND LEFT.Chapter=RIGHT.Chapter AND LEFT.Verse=RIGHT.Verse,TRANSFORM(RIGHT));\\n END;\\n\\t\\nEND;\\n
\", \"post_time\": \"2015-02-17 18:31:36\" },\n\t{ \"post_id\": 6972, \"topic_id\": 1585, \"forum_id\": 10, \"post_subject\": \"Re: Inverted Index\", \"username\": \"swethareddy01\", \"post_text\": \"Hai Bob,\\n\\nHere is my file structure,\\n\\nI created a folder named "kjv" on "MyFiles" repository in which i have created ecl programs "File_KJV.ecl and Inversion.ecl"\\n\\nwhen i compile File_KJV.ecl, I got the error saying \\nError: syntax error : expected ';' (19,4)\\nimport * from Std.Str;\\n\\nEXPORT File_KJV := MODULE\\n// Note - this will NOT work well in a distributed system as it requires iterates and rollups\\n// that stream from one node to the next.\\n// Of course - this file processes in almost no-time on a laptop - so multi-node should not be an issue\\nR := RECORD\\n \\tSTRING Txt;\\n END;\\n\\t\\nd := DATASET('kjv_text',R,CSV(SEPARATOR('')));\\n\\nR TextOntoReference(R le, R ri) := TRANSFORM\\n SELF.Txt := le.Txt + ' ' + ri.Txt;\\n END;\\t\\n\\t\\nRld := ROLLUP(d,LEFT.Txt[1]='$' AND RIGHT.Txt[1]<>'$',TextOntoReference(LEFT,RIGHT));\\n\\nRld
\\n\\nwhen I compile Inversion.ecl, I got the following errors\\nError: syntax error : expected ';' (19, 4 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\kjv\\\\File_KJV.ecl)\\nError: Object 'kjv' does not have a member named 'File_KJV' (7, 15 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\kjv\\\\Inversion.ecl)\\nError: Unknown identifier "File_KJV" (7, 15 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\kjv\\\\Inversion.ecl)\\nError: Unknown identifier "le" (17, 15 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\kjv\\\\Inversion.ecl)\\nError: SELF not legal here (19, 17 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\kjv\\\\Inversion.ecl)\\nError: Unknown identifier "Word" (19, 22 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\kjv\\\\Inversion.ecl)\\nError: syntax error near ":=" : expected '.' (20, 22 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\kjv\\\\Inversion.ecl)\\nError: Unknown identifier "I" (22, 16 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\kjv\\\\Inversion.ecl)\\nError: Definition of 'Inversion' has a trailing public definition 'Records' (24, 1 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\kjv\\\\Inversion.ecl)\\nError: Definition of Inversion contains a recursive dependency (0, 0 - kjv.Inversion)\\nError: syntax error near "." : expected ';' (26, 21 - \\n\\nCode for Inversion.ecl\\nIMPORT kjv;\\nIMPORT * from Std.Str;\\n\\nEXPORT Inversion := MODULE\\n\\nSHARED I := $.File_KJV.Txt;\\nSHARED R := RECORD\\n UNSIGNED1 BookNum := 0;\\n UNSIGNED1 Chapter := 0;\\n UNSIGNED1 Verse := 0;\\n UNSIGNED1 Word_Pos := 0;\\n STRING Word := '';\\n END;\\n\\nClean(STRING s) := Filter(S,'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 \\\\'');\\n R TakeWord(I le,UNSIGNED1 C) := TRANSFORM\\n SELF.Word_Pos := C;\\n SELF.Word := ToUpperCase(GetNthWord(Clean(le.verse_Text),c));\\n SELF := le;\\n END;\\nN := NORMALIZE(I,WordCount(Clean(LEFT.verse_Text)),TakeWord(LEFT,COUNTER));\\n\\nEXPORT Records := N;\\n\\nrec := kjv.Inversion.records;\\nrec
\", \"post_time\": \"2015-02-17 18:22:57\" },\n\t{ \"post_id\": 6971, \"topic_id\": 1585, \"forum_id\": 10, \"post_subject\": \"Re: Inverted Index\", \"username\": \"bforeman\", \"post_text\": \"Did you create a folder named "kjv"? \\nWhere is your Inversion definition located?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-02-17 17:35:29\" },\n\t{ \"post_id\": 6970, \"topic_id\": 1585, \"forum_id\": 10, \"post_subject\": \"Re: Inverted Index\", \"username\": \"swethareddy01\", \"post_text\": \"Hai Richard,\\n\\nStill facing the same error,\\n\\nError: Import names unknown module "kjv"(1,15)\\nError: Unknown identifier "kjv" (25, 8)\\nIMPORT kjv;\\nIMPORT * from Std.Str;
\\n\\nThank you,\\n\\nSwetha\", \"post_time\": \"2015-02-17 16:57:14\" },\n\t{ \"post_id\": 6969, \"topic_id\": 1585, \"forum_id\": 10, \"post_subject\": \"Re: Inverted Index\", \"username\": \"rtaylor\", \"post_text\": \"Swetha,\\n\\nTry changing this line:IMPORT * from kjv;;
to this:IMPORT kjv;
FWIW, I always teach that using the "IMPORT * from" syntax is a bad habit to get into, because it can cause you problems down the road.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-02-17 16:49:20\" },\n\t{ \"post_id\": 6968, \"topic_id\": 1585, \"forum_id\": 10, \"post_subject\": \"Re: Inverted Index\", \"username\": \"swethareddy01\", \"post_text\": \"Hai,\\n\\nWhen i tried to compile the following code,\\n\\nIMPORT \\t* from kjv;;\\nIMPORT * from Std.Str;\\n\\nEXPORT Inversion := MODULE\\n\\nSHARED I := $.File_KJV.Txt;\\nSHARED R := RECORD\\n UNSIGNED1 BookNum := 0;\\n UNSIGNED1 Chapter := 0;\\n UNSIGNED1 Verse := 0;\\n UNSIGNED1 Word_Pos := 0;\\n STRING Word := '';\\n END;\\n\\nClean(STRING s) := Filter(S,'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 \\\\'');\\n R TakeWord(I le,UNSIGNED1 C) := TRANSFORM\\n SELF.Word_Pos := C;\\n SELF.Word := ToUpperCase(GetNthWord(Clean(le.verse_Text),c));\\n SELF := le;\\n END;\\nN := NORMALIZE(I,WordCount(Clean(LEFT.verse_Text)),TakeWord(LEFT,COUNTER));\\n\\nEXPORT Records := N;\\n\\nrec := kjv.Inversion.records;\\nrec
\\n\\nI got this error: Error: Unknown identifier "kjv" (25, 8)\\n\\nKindly assist me to resolve this error.\\n\\nThank you,\\n\\nSwetha\", \"post_time\": \"2015-02-17 16:40:58\" },\n\t{ \"post_id\": 6894, \"topic_id\": 1585, \"forum_id\": 10, \"post_subject\": \"Re: Inverted Index\", \"username\": \"bforeman\", \"post_text\": \"Since your CLEAN definition is local and inline with the Inversion module, you don't need to qualify it in your code. \\n\\nInstead of:\\n\\nN := NORMALIZE(I,WordCount($.Clean(LEFT.verse_Text)),TakeWord(LEFT,COUNTER));
\\n\\nJust do:\\n\\nN := NORMALIZE(I,WordCount(Clean(LEFT.verse_Text)),TakeWord(LEFT,COUNTER));
\\n\\nand you do not need to EXPORT Clean, just move it inside the MODULE structure:\\n\\n Clean(STRING s) := Filter(S,'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 \\\\'');\\n R TakeWord(I le,UNSIGNED1 C) := TRANSFORM\\n SELF.Word_Pos := C;\\n SELF.Word := ToUpperCase(GetNthWord(Clean(le.verse_Text),c));\\n SELF := le;\\n END;\\n \\n N := NORMALIZE(I,WordCount(Clean(LEFT.verse_Text)),TakeWord(LEFT,COUNTER));
\\n\\nThis compiles fine for me, using your code.\\n\\nBob\", \"post_time\": \"2015-02-03 17:15:24\" },\n\t{ \"post_id\": 6893, \"topic_id\": 1585, \"forum_id\": 10, \"post_subject\": \"Re: Inverted Index\", \"username\": \"swethareddy01\", \"post_text\": \"Hai Bob,\\n\\nI tried with the export suggested, but unable to resolve the errors (got the same errors).\\n\\nFollowing is the ecl program, I am using the same code given in the tutorial.\\n\\nIMPORT * from Std.Str;\\nEXPORT Clean(STRING s) := Filter(S,'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 \\\\'');\\nEXPORT Inversion := MODULE\\nSHARED I := $.File_KJV.Txt;\\nSHARED R := RECORD\\n UNSIGNED1 BookNum := 0;\\n UNSIGNED1 Chapter := 0;\\n UNSIGNED1 Verse := 0;\\n UNSIGNED1 Word_Pos := 0;\\n STRING Word := '';\\n END;\\nR TakeWord(I le,UNSIGNED1 C) := TRANSFORM\\n SELF.Word_Pos := C;\\n SELF.Word := ToUpperCase(GetNthWord($.Clean(le.verse_Text),c));\\n SELF := le;\\n END;\\nN := NORMALIZE(I,WordCount($.Clean(LEFT.verse_Text)),TakeWord(LEFT,COUNTER));\\nEXPORT Records := N;\\nEXPORT Key := INDEX(Records,{STRING20 Wrd := Word,BookNum,Chapter,Verse,Word_Pos},'~key::kjv_inversion');\\nEXPORT Bld := BUILDINDEX(Key,OVERWRITE);\\nEXPORT Search(STRING S) := FUNCTION\\nD := DATASET([{s}],{ STRING T });\\nR SearchBlock(D le,UNSIGNED C) := TRANSFORM\\n SELF.Word := GetNthWord(le.T,C);\\n END;\\nN := NORMALIZE(D,WordCount(S),SearchBlock(LEFT,COUNTER));\\nR GraphBody(SET OF DATASET(R) I,UNSIGNED C) := FUNCTION\\n RETURN DEDUP( \\n MAP ( C = 1 => PROJECT(Key(Wrd=I[0][1].Word),TRANSFORM(R,SELF := LEFT)),\\n JOIN(I[C-1],Key,RIGHT.Wrd=I[0][C].Word\\n AND LEFT.BookNum=RIGHT.BookNum\\n AND LEFT.Chapter=RIGHT.Chapter\\n AND LEFT.Verse=RIGHT.Verse,TRANSFORM(R,SELF := LEFT))\\n )\\n ,BookNum,Chapter,Verse);\\n END;\\n G := GRAPH(N,COUNT(N),GraphBody(ROWSET(LEFT),COUNTER));\\n RETURN PROJECT(G,TRANSFORM($.File_KJV.Layout_Reference,SELF := LEFT));\\n END; \\n END;
\", \"post_time\": \"2015-02-03 17:03:43\" },\n\t{ \"post_id\": 6892, \"topic_id\": 1585, \"forum_id\": 10, \"post_subject\": \"Re: Inverted Index\", \"username\": \"bforeman\", \"post_text\": \"The first error will probably clear everything else. You are missing the "Clean" file definition in your repository folder. Based on the tutorial, it should look like this:\\n\\nIMPORT * FROM STD.STR;\\nEXPORT Clean(STRING s) := \\n Filter(S,'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 \\\\'');
\\n\\nAdd this to your repository folder and recompile.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-02-03 16:39:16\" },\n\t{ \"post_id\": 6891, \"topic_id\": 1585, \"forum_id\": 10, \"post_subject\": \"Re: Inverted Index\", \"username\": \"swethareddy01\", \"post_text\": \"Hai Bob,\\n\\nI have two programs on my ecl ide.\\n1)File_KJV.ecl which has code from the link http://www.dabhand.org/ECL/construct_a_simple_bible_search.htm, I can able to execute this program without any errors.\\n\\n2)File_KJV1.ecl which has code from the link http://www.dabhand.org/ECL/construct_a_simple_bible_search.htm, which is giving me the following errors.\\n\\nError: Object '_local_directory_' does not have a member named 'Clean' (13, 51 - \\nError: Unknown identifier "Clean" (13, 51 - \\nError: Object '_local_directory_' does not have a member named 'le' (13, 57 - \\nError: Object '_local_directory_' does not have a member named 'verse_Text' (13, 60 - \\nError: Object '_local_directory_' does not have a member named 'c' (13, 72 - \\nError: Object '_local_directory_' does not have a member named 'SELF' (14, 13 - \\nError: Incompatible types: can not assign Integer to Record r (11, 11 - \\nError: Object '_local_directory_' does not have a member named 'Clean' (16, 30 - \\nError: Unknown identifier "Clean" (16, 30 - \\nError: syntax error near "Records" : expected ',' (18, 21 - \\nError: syntax error near "Key" : expected RANGE, ROWSET, SELF, SUCCESS, datarow, dataset, dictionary, module-name, identifier, identifier, function-name, identifier, macro-name, '^', '(', '[' (19, 26 - \\nError: syntax error near "Key" : expected RANGE, ROWSET, SELF, SUCCESS, datarow, dataset, dictionary, module-name, identifier, identifier, function-name, identifier, macro-name, '^', '(', '[' (28, 50 - \\nError: syntax error near "END" : expected APPLY, BIG_ENDIAN, BUILD, DISTRIBUTION, expression, EXPORT, FEATURE, IMPORT, KEYDIFF, KEYPATCH, LITTLE_ENDIAN, LOADXML, NOTIFY, OUTPUT, PACKED, PARALLEL, PATTERN, __PLATFORM__, RANGE, RECORD, RETURN, ROWSET, RULE, SET, SHARED, type-name, SUCCESS, TOKEN, TRANSFORM, TYPEOF, UNSIGNED, UPDATE, WAIT, WILD, <, datarow, identifier, action, constant, #CONSTANT, #OPTION, #WORKUNIT, #STORED, #LINK, #ONWARNING, '^', ';', '$' (35, 3 - \\nError: Unknown identifier "GraphBody" (36, 35 - \\nError: syntax error near "G" : expected RANGE, ROWSET, SELF, SUCCESS, datarow, dataset, dictionary, module-name, identifier, identifier, function-name, identifier, macro-name, '^', '(', '[' (37, 28 - \\nError: syntax error near "END" : expected APPLY, BIG_ENDIAN, BUILD, DISTRIBUTION, expression, EXPORT, FEATURE, IMPORT, KEYDIFF, KEYPATCH, LITTLE_ENDIAN, LOADXML, NOTIFY, OUTPUT, PACKED, PARALLEL, PATTERN, __PLATFORM__, RANGE, RECORD, RETURN, ROWSET, RULE, SET, SHARED, type-name, SUCCESS, TOKEN, TRANSFORM, TYPEOF, UNSIGNED, UPDATE, WAIT, WILD, <, datarow, identifier, action, constant, #CONSTANT, #OPTION, #WORKUNIT, #STORED, #LINK, #ONWARNING, '^', ';', '$' (38, 3 - \\nError: syntax error near "END" : expected APPLY, BIG_ENDIAN, BUILD, DISTRIBUTION, expression, EXPORT, FEATURE, IMPORT, KEYDIFF, KEYPATCH, LITTLE_ENDIAN, LOADXML, NOTIFY, OUTPUT, PACKED, PARALLEL, PATTERN, __PLATFORM__, RANGE, RECORD, RETURN, ROWSET, RULE, SET, SHARED, type-name, SUCCESS, TOKEN, TRANSFORM, TYPEOF, UNSIGNED, UPDATE, WAIT, WILD, <, datarow, identifier, action, constant, #CONSTANT, #OPTION, #WORKUNIT, #STORED, #LINK, #ONWARNING, '^', ';', '$' (39, 3 - \\nError: syntax error : expected APPLY, BIG_ENDIAN, BUILD, DISTRIBUTION, expression, EXPORT, FEATURE, IMPORT, KEYDIFF, KEYPATCH, LITTLE_ENDIAN, LOADXML, NOTIFY, OUTPUT, PACKED, PARALLEL, PATTERN, __PLATFORM__, RANGE, RECORD, RETURN, ROWSET, RULE, SET, SHARED, type-name, SUCCESS, TOKEN, TRANSFORM, TYPEOF, UNSIGNED, UPDATE, WAIT, WILD, <, datarow, identifier, action, constant, #CONSTANT, #OPTION, #WORKUNIT, #STORED, #LINK, #ONWARNING, '^', ';', '$' (39, 7 - [/list]
\", \"post_time\": \"2015-02-03 16:31:27\" },\n\t{ \"post_id\": 6890, \"topic_id\": 1585, \"forum_id\": 10, \"post_subject\": \"Re: Inverted Index\", \"username\": \"bforeman\", \"post_text\": \"Hi Swetha,\\n\\nWhat errors are you seeing with the tutorial? I think if we can get you through the compile and submit process, we can help you in understanding the code.\\n\\nThere are many ways to search for a word in a text document. Free-form text parsing immediately comes to mind. But before we go any deeper, the first thing to determine is exactly what you need to do.\\n\\nLet's look at your errors first. \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-02-03 14:21:02\" },\n\t{ \"post_id\": 6887, \"topic_id\": 1585, \"forum_id\": 10, \"post_subject\": \"Inverted Index\", \"username\": \"swethareddy01\", \"post_text\": \"Hai,\\n\\nI am working on inverted index, I would like to search for a word in a given text file. Please help me with the code or resources.\\n\\nI tried to execute the code from the link http://www.dabhand.org/ECL/construct_a_simple_bible_searchII.htm, but stuck with errors. Also I din't understand the code, like text file used and the word given to search.\\n\\nThank you.\\n\\nSwetha\", \"post_time\": \"2015-02-02 23:30:52\" },\n\t{ \"post_id\": 6916, \"topic_id\": 1588, \"forum_id\": 10, \"post_subject\": \"Re: Unable to use SKIP in a TRANSFORM\", \"username\": \"Allan\", \"post_text\": \"Thanks Richard for this.\\n\\nYes - will open JIRA. Your work-around will work, but I was trying to do all the work in the TRANSFORM.\\n\\nCheers\\n\\nAllan\", \"post_time\": \"2015-02-06 19:21:29\" },\n\t{ \"post_id\": 6913, \"topic_id\": 1588, \"forum_id\": 10, \"post_subject\": \"Re: Unable to use SKIP in a TRANSFORM\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nYou should report this issue in JIRA. I think it may be due to the nested TRANSFORM "confusing" things.\\n\\nBut if you just eliminate the problematic SKIP and add a simple filter to the end of the JOIN, then you get what I believe you want, like this:RTL := RECORD\\n STRING6 Acc;\\n STRING50 ABIList;\\nEND;\\n\\nRABI := RECORD\\n STRING5 ABI;\\nEND;\\n\\nRDrvFile := RECORD\\n STRING6 Acc;\\n DATASET(RABI) ABIs;\\nEND;\\n\\nTL := DATASET([{'1','C0001|A0001|B0001'},\\n {'2','C0002'},\\n {'3','A0008|B0008'}],RTL);\\n \\nDrvFile := DATASET([{'1',DATASET([{'A0001'},{'B0007'}],RABI)},\\n {'3',DATASET([{'D0008'},{'E0008'}],RABI)}],RDrvFile);\\n \\nRRes := RECORD\\n RTL AND NOT ABIList tl;\\n DATASET(RABI) ABIs;\\nEND;\\n\\nRRes MakeBind(RTL T,RDrvFile R) := TRANSFORM\\n\\n RABI matchOnABI (RABI L) := TRANSFORM\\n SELF.ABI := IF (REGEXFIND('.*'+L.ABI+'.*',T.ABIList),L.ABI,SKIP);\\n END;\\n SELF.ABIs := PROJECT(R.ABIs,matchOnABI(LEFT));\\n SELF.tl := T;\\nEND;\\n\\nJOIN(TL,DrvFile,LEFT.Acc = RIGHT.Acc,MakeBind(LEFT,RIGHT),LOCAL,ALL)(EXISTS(ABIs));
\\nHTH, \\n\\nRichard\", \"post_time\": \"2015-02-06 16:39:41\" },\n\t{ \"post_id\": 6910, \"topic_id\": 1588, \"forum_id\": 10, \"post_subject\": \"Re: Unable to use SKIP in a TRANSFORM\", \"username\": \"Allan\", \"post_text\": \"By the way, I know I could add a condition to the JOIN to filter out records that won't generate a row, but then I have to re-do the same kind of filter inside the transform to get the actual ABI list.\\n\\nAnd it does not address the question. Its just a work-around.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2015-02-06 10:35:37\" },\n\t{ \"post_id\": 6909, \"topic_id\": 1588, \"forum_id\": 10, \"post_subject\": \"Unable to use SKIP in a TRANSFORM\", \"username\": \"Allan\", \"post_text\": \"Hi,\\nThe following code fails to syntax check at line marked. I can't see why, and yes I have re-read the reference manual.\\nI need to be able to SKIP a record where a transaction does not contain any ABI's referenced in the report drive file.\\n\\nRTL := RECORD\\n STRING6 Acc;\\n STRING50 ABIList;\\nEND;\\n\\nRABI := RECORD\\n STRING5 ABI;\\nEND;\\n\\nRDrvFile := RECORD\\n STRING6 Acc;\\n DATASET(RABI) ABIs;\\nEND;\\n\\nTL := DATASET([{'1','C0001|A0001|B0001'},\\n {'2','C0002'},\\n\\t\\t\\t\\t\\t\\t\\t {'3','A0008|B0008'}],RTL);\\n\\t\\t\\t\\t\\t\\t\\t \\nDrvFile := DATASET([{'1',DATASET([{'A0001'},{'B0007'}],RABI)},\\n {'3',DATASET([{'D0008'},{'E0008'}],RABI)}],RDrvFile);\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\nRRes := RECORD\\n RTL AND NOT ABIList tl;\\n DATASET(RABI) ABIs;\\nEND;\\n\\nRRes MakeBind(RTL T,RDrvFile R) := TRANSFORM\\n\\n RABI matchOnABI (RABI L) := TRANSFORM\\n SELF.ABI := IF (REGEXFIND('.*'+L.ABI+'.*',T.ABIList),L.ABI,SKIP);\\n END;\\n x := PROJECT(R.ABIs,matchOnABI(LEFT));\\n SELF.tl := IF(EXISTS(x),T,SKIP); // <== **** FAILS ****\\n SELF.ABIs := x;\\nEND;\\n\\nJOIN(TL,DrvFile,LEFT.Acc = RIGHT.Acc,MakeBind(LEFT,RIGHT),LOCAL,ALL);\\n
\\n\\nAny ideas?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2015-02-06 10:27:23\" },\n\t{ \"post_id\": 6922, \"topic_id\": 1589, \"forum_id\": 10, \"post_subject\": \"Re: Space padding in indexes\", \"username\": \"rtaylor\", \"post_text\": \"chanbchen,\\n\\nA STRING10 field will always be space padded to 10 characters.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-02-09 17:53:48\" },\n\t{ \"post_id\": 6920, \"topic_id\": 1589, \"forum_id\": 10, \"post_subject\": \"Re: Space padding in indexes\", \"username\": \"chanbchen\", \"post_text\": \"Thank you Rich. I have 1 more question on this.\\n\\nWhile fetching from an index, I understand that the key field would be trimmed during comparison; but the dataset resulting from the index fetch - will the fields in it have padded spaces?\\n\\n\\nds := index1(keyfield = 'ABC');\\nprocessedData := processDS(ds);\\n
\\n\\nSo the index fetch keyfield='ABC' would be accurate as the key field would be trimmed during index read. But when i pass ds into a function as shown above, will the fields in it have padded spaces? Should I do a trim explicitly?\\n\\nThanks\", \"post_time\": \"2015-02-09 13:55:27\" },\n\t{ \"post_id\": 6914, \"topic_id\": 1589, \"forum_id\": 10, \"post_subject\": \"Re: Space padding in indexes\", \"username\": \"rtaylor\", \"post_text\": \"chanbchen,\\n\\nThe STRING data type is space padded, therefore it would also be space padded in an INDEX. Those spaces don't hurt anything because INDEXes are always LZW compressed and trailing spaces are always trimmed when strings are compared, so that 'ABC ' always equals 'ABC'\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-02-06 16:44:25\" },\n\t{ \"post_id\": 6911, \"topic_id\": 1589, \"forum_id\": 10, \"post_subject\": \"Space padding in indexes\", \"username\": \"chanbchen\", \"post_text\": \"Hi,\\n\\nI have a dataset containing a string field. I want to create an index with that string as the key. So I declare a string10 field in the index's layout. But after calling buildindex, i see that the key field has extra spaces padded to fill the 10 character length specified in the layout even if the actual data is only 4 character long.\\n\\nIs this normal for indexes to pad spaces?\\n\\nThanks\", \"post_time\": \"2015-02-06 14:12:01\" },\n\t{ \"post_id\": 6955, \"topic_id\": 1598, \"forum_id\": 10, \"post_subject\": \"Re: What is meant by "Failed to create process in ..."\", \"username\": \"tlhumphrey2\", \"post_text\": \"Created JIRA HPCC-13048.\", \"post_time\": \"2015-02-13 14:15:27\" },\n\t{ \"post_id\": 6953, \"topic_id\": 1598, \"forum_id\": 10, \"post_subject\": \"Re: What is meant by "Failed to create process in ..."\", \"username\": \"rtaylor\", \"post_text\": \"Tim,If I don't do the OUTPUT (last statement) and remove 'drefrexfull:=' just left of PIPE, I get the expected output to the workunit.
Sounds to me like time to submit it to JIRA \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-02-12 20:04:40\" },\n\t{ \"post_id\": 6951, \"topic_id\": 1598, \"forum_id\": 10, \"post_subject\": \"What is meant by "Failed to create process in ..."\", \"username\": \"tlhumphrey2\", \"post_text\": \"I get the error message: Error: System error: 10003: Graph[1], SLAVE 10.0.0.88:20700: Graph[1], piperead[2]: Failed to create process in /var/lib/HPCCSystems/mythor/ for : /var/lib/HPCCSystems/mythor/bz2_unzip_drefrexfull.sh\\n\\nMy ecl code is the following:\\n
layout_drefrexfull := RECORD\\nUNSIGNED5 CitingID;\\nUNSIGNED5 CitedID;\\nEND;\\ndrefrexfull:=PIPE('/var/lib/HPCCSystems/mythor/bz2_unzip_drefrexfull.sh', layout_drefrexfull, CSV);\\nOUTPUT(drefrexfull,,'drefrexfull',OVERWRITE);
\\n\\nThe bash script that the PIPE uses is:\\n#!/bin/bash -e\\ntar -O -jxvf /var/lib/HPCCSystems/mydropzone/drefrexfull.tar.bz2|head|sed "s/[ \\\\t]/,/"\\n
\\n\\nIf I don't do the OUTPUT (last statement) and remove 'drefrexfull:=' just left of PIPE, I get the expected output to the workunit.\", \"post_time\": \"2015-02-12 19:01:15\" },\n\t{ \"post_id\": 7017, \"topic_id\": 1600, \"forum_id\": 10, \"post_subject\": \"Re: LOOP query\", \"username\": \"rtaylor\", \"post_text\": \"Pooja,In a loop body, can I issue break kind of statement to terminate the loop? \\nwhat are the different ways to terminate loop...other than template language?\\n
Template language is NOT applicable to the LOOP function. As to the various ways to terminate the LOOP, take a look at the LOOP docs here: http://hpccsystems.com/download/docs/ecl-language-reference/html/LOOP.html, especially the fourth and fifth forms of LOOP. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-02-24 14:21:37\" },\n\t{ \"post_id\": 7014, \"topic_id\": 1600, \"forum_id\": 10, \"post_subject\": \"Re: LOOP query\", \"username\": \"chennapooja\", \"post_text\": \"Hello,\\n\\nIn a loop body, can I issue break kind of statement to terminate the loop? \\nwhat are the different ways to terminate loop...other than template language?\\n\\nThanks,\\nPooja.\", \"post_time\": \"2015-02-24 02:51:17\" },\n\t{ \"post_id\": 7002, \"topic_id\": 1600, \"forum_id\": 10, \"post_subject\": \"Re: LOOP query\", \"username\": \"chennapooja\", \"post_text\": \"Dear Richard,\\n\\n Thanks a lot and sorry for posting even before checking code properly. I did not see last part of the code - looping. In the function declaration, I did not give Dataset for parameter , for which I got mismatch parameters error and I got stuck there. \\n The code provided is working perfect and I tested with 20 nodes graph, performance is good. Infact, I am getting thor time as '0.00', I need to check with more large dataset.\\n Thanks a lot for clear explanations and I would post next time ECL code in <code></code> for any doubts \\n\\nThanks,\\nPooja.\", \"post_time\": \"2015-02-19 22:42:02\" },\n\t{ \"post_id\": 7001, \"topic_id\": 1600, \"forum_id\": 10, \"post_subject\": \"Re: LOOP query\", \"username\": \"rtaylor\", \"post_text\": \"Pooja,
But my query here is, the example code I have sent contains only 3 iterations, but I want the same code to run for n number of iterations, in such a case I need to call "ProcessIteration" function multiple times, I want to avoid that also
That is exactly what the LOOP function is designed to do. The number of iterations is determined by the value of the second parameter to the LOOP. I am checking whether I can produce final iteration result only without giving the user previous iteration results.
Notice that my code only produces the final result and not the interim results. There is one more problem here, input for first iteration differs from all other iterations
You pass in the values to start with to the FUNCTION.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-02-19 21:48:45\" },\n\t{ \"post_id\": 6998, \"topic_id\": 1600, \"forum_id\": 10, \"post_subject\": \"Re: LOOP query\", \"username\": \"chennapooja\", \"post_text\": \"Thanks Richard.\\n\\nI actually did similar thing but not completely successful yet. I will try the below code. But my query here is, the example code I have sent contains only 3 iterations, but I want the same code to run for n number of iterations, in such a case I need to call "ProcessIteration" function multiple times, I want to avoid that also. I am checking whether I can produce final iteration result only without giving the user previous iteration results. There is one more problem here, input for first iteration differs from all other iterations, so checking for alternatives so that a single function runs for all iterations and also instead of calling it multiple times, it has to repeat on itself if I specify number of iterations. Any inputs will be greatful and thanks for the help provided.\", \"post_time\": \"2015-02-19 20:58:13\" },\n\t{ \"post_id\": 6997, \"topic_id\": 1600, \"forum_id\": 10, \"post_subject\": \"Re: LOOP query\", \"username\": \"rtaylor\", \"post_text\": \"Pooja,\\n\\nFirst, please use the "[ code ]your code goes here[ /code ]" tags to surround your example code so that you don't lose your code formatting (indention for readability is important), and so it is easy to for me copy/paste your code. Just press the Code button at the top of the comment editing window to put these in automatically.\\n\\nHere is how I would re-write your code:\\nLayout_Node := RECORD\\n UNSIGNED1 NodeID;\\n REAL CurrentRank;\\n UNSIGNED1 NbCount;\\nEND;\\n\\nNode := DATASET([{1,0.2,2},\\n {2,0.2,2}, \\n {3,0.2,1},\\n {4,0.2,1},\\n {5,0.2,3}],Layout_Node);\\n// output(Node,NAMED('Node'));\\n\\nLayout_Neighbors := RECORD\\n UNSIGNED1 NeighborID;\\n REAL IncomingValue;\\nEND;\\n\\nLayout_Nodes_Link := RECORD\\n UNSIGNED1 NodeID;\\n Layout_Neighbors; \\nEND;\\n\\nNeighbors := DATASET([{1,2,0.0},\\n {1,4,0.0},\\n {2,3,0.0},\\n {2,5,0.0},\\n {3,4,0.0},\\n {4,5,0.0},\\n {5,1,0.0},\\n {5,2,0.0},\\n {5,3,0.0}],Layout_Nodes_Link);\\n\\nLayout_Combined := RECORD\\n Layout_Node;\\n DATASET(Layout_Neighbors) Neighbors; \\nEND;\\nP_recs := PROJECT(Node, TRANSFORM(Layout_Combined,SELF := LEFT; SELF := []));\\n// output(p_recs,NAMED('p_recs'));\\n\\n//up to here, it's all your previous code\\n//following are my changes \\n//*******************************************\\n//the loopbody FUNCTION\\nProcessIteration(DATASET(Layout_Combined) InDS) := FUNCTION\\n P := PROJECT(InDS,TRANSFORM(Layout_Combined,SELF.Neighbors := [],SELF := LEFT));\\n\\n Layout_Combined XF1(Layout_Combined L, Layout_Nodes_Link R) := TRANSFORM\\n SELF.Neighbors := L.Neighbors + \\n ROW({R.NeighborID,L.CurrentRank/L.Nbcount}, Layout_Neighbors);\\n SELF := L;\\n END; \\n I := DENORMALIZE(P, Neighbors, LEFT.nodeid=RIGHT.nodeid, XF1(LEFT,RIGHT));\\n\\n Layout_Combined XF2(Layout_Combined L, Layout_Nodes_Link R) := TRANSFORM\\n SELF.CurrentRank := SUM((I.Neighbors(NeighborId=L.NodeID)), incomingvalue);\\n SELF.Neighbors := L.Neighbors + \\n ROW({R.NeighborID,R.IncomingValue}, Layout_Neighbors);\\n SELF := L;\\n END; \\n RETURN DENORMALIZE(P, Neighbors, LEFT.nodeid=RIGHT.nodeid, XF2(LEFT,RIGHT)); \\nEND;\\n\\n//Just calling the function 3 times \\nP1 := ProcessIteration(P_Recs);\\nP2 := ProcessIteration(P1);\\nP3 := ProcessIteration(P2);\\nOUTPUT(P3,NAMED('MYiteration31'));\\n\\n//or using a LOOP for 3 iterations, calling the function as the loopbody parameter\\nL1 := LOOP(P_Recs,3,ProcessIteration(ROWS(LEFT)));\\nOUTPUT(L1,NAMED('LOOPiteration31'))
The first thing I did was look at your nested PROJECT(PROJECT(...)) code to try to understand what you were doing with it. I then replaced it with a single PROJECT to accomplish the same purpose. \\n\\nNext, I worked at understanding what your code does and found that you have three instances coded of the "same" three-step process. So I encapsulated the three-step process into a single FUNCTION structure that takes an input DATASET parameter as its starting point. That makes your process standard for each iteration. \\n\\nThen I embedded the TRANSFORM functions for the two DENORMALIZE operations into the FUNCTION, which allowed me to eliminate the third parameter to the second TRANSFORM in favor of simply using the result of the first DENORMALIZE.\\n\\nWhen you run your code then run mine, you'll find they produce exactly the same result from your inline datasets. Please test it both ways on your large datasets (as separate workunits so you can see if there is any significant performance difference) and verify that the results are in fact duplicated.\\n\\nLet me know how it works out,\\n\\nRichard\", \"post_time\": \"2015-02-19 20:48:47\" },\n\t{ \"post_id\": 6983, \"topic_id\": 1600, \"forum_id\": 10, \"post_subject\": \"Re: LOOP query\", \"username\": \"chennapooja\", \"post_text\": \"Hello Richard,\\n\\n I am trying to modify below code such that repeated code for each iteration can be removed so that piece of code can be looped or iterated for giving results.\\n\\nIMPORT TrainingPooja.PageRank as PageRank;\\n \\nLayout_Node := RECORD\\n UNSIGNED1 NodeID;\\n REAL CurrentRank;\\n UNSIGNED1 NbCount;\\nEND;\\n\\n//Node := DATASET([\\n// {1,0.2,2},\\n// {2,0.2,2}, \\n// {3,0.2,1},\\n// {4,0.2,1},\\n// {5,0.2,3}],Layout_Node);\\n \\n //output(Node);\\n \\nOUTPUT(PageRank.Raw_File_Import);\\nOUTPUT(PageRank.Raw_File_Import_Neighbors);\\n\\nNode := PageRank.Raw_File_Import;\\n \\n Layout_Neighbors := RECORD\\n UNSIGNED1 NeighborID;\\n REAL IncomingValue;\\nEND;\\n\\nLayout_Nodes_Link := RECORD\\n UNSIGNED1 NodeID;\\n Layout_Neighbors; //nested RECORD structure\\nEND;\\n\\n//Neighbors := DATASET([\\n// {1,2,0.0},\\n// {1,4,0.0},\\n// {2,3,0.0},\\n// {2,5,0.0},\\n// {3,4,0.0},\\n// {4,5,0.0},\\n// {5,1,0.0},\\n// {5,2,0.0},\\n// {5,3,0.0}],Layout_Nodes_Link);\\n\\nNeighbors := PageRank.Raw_File_Import_Neighbors;\\n \\nLayout_Combined := RECORD\\n Layout_Node;\\n DATASET(Layout_Neighbors) Neighbors; //nested child DATASET \\nEND;\\n\\n\\nP_recs := PROJECT(Node, TRANSFORM(Layout_Combined,SELF := LEFT; SELF := []));\\noutput(p_recs);\\nLayout_Combined CombineRecs(Layout_Combined L, Layout_Nodes_Link R) := TRANSFORM\\n SELF.Neighbors := L.Neighbors + ROW({R.NeighborID,R.IncomingValue}, Layout_Neighbors);\\n SELF := L;\\nEND; //input and output types\\n\\nNestedNodeNeighbors := DENORMALIZE(P_recs, Neighbors,\\n LEFT.nodeid=RIGHT.nodeid,\\n CombineRecs(LEFT,RIGHT));\\n \\n//Result 1 \\nOUTPUT(NestedNodeNeighbors);\\n\\n// Calculate neighbor outgoing values - transformation\\nLayout_Combined IterateNodes(Layout_Combined L, Layout_Nodes_Link R) := TRANSFORM\\n SELF.Neighbors := L.Neighbors + ROW({R.NeighborID,L.CurrentRank/L.Nbcount}, Layout_Neighbors);\\n SELF := L;\\nEND; //input and output types\\n\\n// Calculate current rank - transformation\\nLayout_Combined IterateNodes1(Layout_Combined L, Layout_Nodes_Link R, Layout_Combined temp) := TRANSFORM\\n SELF.CurrentRank := SUM((temp.Neighbors(NeighborId=L.NodeID)), incomingvalue);\\n SELF.Neighbors := L.Neighbors + ROW({R.NeighborID,R.IncomingValue}, Layout_Neighbors);\\n SELF := L;\\nEND; //input and output types\\n\\n \\n// Iteratoin 1 \\nIteration1 := DENORMALIZE(P_recs, Neighbors,\\n LEFT.nodeid=RIGHT.nodeid,\\n IterateNodes(LEFT,RIGHT));\\n\\n// Result 2\\nOUTPUT(Iteration1);\\n\\nIteration11 := DENORMALIZE(P_recs, Neighbors,\\n LEFT.nodeid=RIGHT.nodeid,\\n IterateNodes1(LEFT,RIGHT, Iteration1));\\n\\n//Result 3 \\nOUTPUT(Iteration11);\\n\\n\\n\\n// Iteratoin 2 \\n//Node2 := PROJECT(Iteration11, TRANSFORM(Layout_Node,SELF := LEFT; SELF := []));\\n\\n \\nP_recs2 := PROJECT(PROJECT(Iteration11, TRANSFORM(Layout_Node,SELF := LEFT; SELF := [])), \\n\\t\\t\\t\\t TRANSFORM(Layout_Combined,SELF := LEFT; SELF := []));\\n\\nIteration2 := DENORMALIZE(P_recs2, Neighbors, LEFT.nodeid=RIGHT.nodeid, IterateNodes(LEFT,RIGHT));\\nOUTPUT(Iteration2);\\n\\nIteration21 := DENORMALIZE(P_recs2, Neighbors, LEFT.nodeid=RIGHT.nodeid, IterateNodes1(LEFT,RIGHT, Iteration2)); \\nOUTPUT(Iteration21);\\n\\n// Iteratoin 3 \\nP_recs3 := PROJECT(PROJECT(Iteration21, TRANSFORM(Layout_Node,SELF := LEFT; SELF := [])), \\n\\t\\t\\t\\t TRANSFORM(Layout_Combined,SELF := LEFT; SELF := []));\\n\\nIteration3 := DENORMALIZE(P_recs3, Neighbors, LEFT.nodeid=RIGHT.nodeid, IterateNodes(LEFT,RIGHT));\\nOUTPUT(Iteration2);\\n\\nIteration31 := DENORMALIZE(P_recs3, Neighbors, LEFT.nodeid=RIGHT.nodeid, IterateNodes1(LEFT,RIGHT, Iteration3)); \\nOUTPUT(Iteration31);\\n\\n\\nAbove code calculates page rank.\\n\\nRegards,\\nPooja.\", \"post_time\": \"2015-02-18 21:56:11\" },\n\t{ \"post_id\": 6979, \"topic_id\": 1600, \"forum_id\": 10, \"post_subject\": \"Re: LOOP query\", \"username\": \"rtaylor\", \"post_text\": \"Pooja,\\n\\nPerhaps you should just describe exactly what you're trying to accomplish, with example input data and what your expected results look like. Then I might be able to show you some example code of how to do it.\\n\\nRichard\", \"post_time\": \"2015-02-18 17:59:07\" },\n\t{ \"post_id\": 6978, \"topic_id\": 1600, \"forum_id\": 10, \"post_subject\": \"Re: LOOP query\", \"username\": \"chennapooja\", \"post_text\": \"Thanks Richard.\\n\\nIteration what I meant actually is I have transformed two datasets and got two tables, now these two tables as a whole using join should go as input for performing another iteration. Is this possible? In LOOP, ITERATE, we are using output of first row to second row or repeating loop only for a single row multiple number of times. Is my understanding correct? Please provide inputs.\\n\\nThanks in advance.\", \"post_time\": \"2015-02-18 17:20:28\" },\n\t{ \"post_id\": 6964, \"topic_id\": 1600, \"forum_id\": 10, \"post_subject\": \"Re: LOOP query\", \"username\": \"rtaylor\", \"post_text\": \"Pooja,\\n\\nShort answer: yes, LOOP can do that (either of the first two forms loops a specified number of times).\\n\\nThe longer answer is, depending on your exact circumstances and requirements you might also be able to use ITERATE or PROCESS to achieve what you need.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-02-17 15:11:43\" },\n\t{ \"post_id\": 6954, \"topic_id\": 1600, \"forum_id\": 10, \"post_subject\": \"LOOP query\", \"username\": \"chennapooja\", \"post_text\": \"Hi,\\n\\n I have one scenario where result from one iteration should be used in next iteration and iterations should be some fixed number. Can LOOP be used to solve this?\\n\\n Thanks in advance.\\n\\nRegards,\\nPooja.\", \"post_time\": \"2015-02-12 23:24:34\" },\n\t{ \"post_id\": 7007, \"topic_id\": 1602, \"forum_id\": 10, \"post_subject\": \"Re: Dataset too large\", \"username\": \"NSD\", \"post_text\": \"It was a JOIN over 5 Tables. 2 of them were really big and I just limited the result to the COUNT(*) amount in the ECL IDE.\", \"post_time\": \"2015-02-22 15:01:44\" },\n\t{ \"post_id\": 6987, \"topic_id\": 1602, \"forum_id\": 10, \"post_subject\": \"Re: Dataset too large\", \"username\": \"bforeman\", \"post_text\": \"Any chance on sending us the before and after code? It might be something that we want to add to our training to emphasize best practices.\\n\\nPlease email me at robert.foreman@lexisnexis.com\\n\\nThanks!\\n\\nBob\", \"post_time\": \"2015-02-19 12:55:21\" },\n\t{ \"post_id\": 6985, \"topic_id\": 1602, \"forum_id\": 10, \"post_subject\": \"Re: Dataset too large\", \"username\": \"NSD\", \"post_text\": \"Hi,\\n\\nI've fixed it. It was bad ECL Code.\\n\\nThx anyway \", \"post_time\": \"2015-02-19 10:04:56\" },\n\t{ \"post_id\": 6982, \"topic_id\": 1602, \"forum_id\": 10, \"post_subject\": \"Re: Dataset too large\", \"username\": \"bforeman\", \"post_text\": \"Is it possible to attach the actual ECL? The HPCC team would like a look at it.\\n\\nThank You,\\n\\nBob\", \"post_time\": \"2015-02-18 21:08:53\" },\n\t{ \"post_id\": 6963, \"topic_id\": 1602, \"forum_id\": 10, \"post_subject\": \"Dataset too large\", \"username\": \"NSD\", \"post_text\": \"Hi,\\n\\nI've 3 JOIN() overall, 2 Datasets of them are large. Now I'm getting following Error:\\n\\n
Error: System error: 10099: Graph[1], workunitwrite[7]: Dataset too large to output to workunit (limit is set to 2000) megabytes, in result (sequence=0), Master exception
\\n\\n\\nI've tried to max this limit to 3000MB with\\n\\n#OPTION('outputlimit',3000);
\\n\\nBut it seems, that the internal limit is 2000. Is there a Chance to maximize that?\\n\\nThx!\", \"post_time\": \"2015-02-17 15:05:15\" },\n\t{ \"post_id\": 7010, \"topic_id\": 1605, \"forum_id\": 10, \"post_subject\": \"Re: Keyed Components\", \"username\": \"NSD\", \"post_text\": \"thx, worked.\", \"post_time\": \"2015-02-22 15:10:32\" },\n\t{ \"post_id\": 6990, \"topic_id\": 1605, \"forum_id\": 10, \"post_subject\": \"Re: Keyed Components\", \"username\": \"rtaylor\", \"post_text\": \"NSD,\\n\\nIn addition to what Bob said, I noticed that your INDEX uses all the fields in the DATASET as search terms. That means you do NOT need any payload at all. If your intention is to build a payload INDEX with just PID as the search term (which would also eliminate the error you were getting), then simply define your INDEX like this: EXPORT IDX :=\\n INDEX(\\n CSVDataset, //RecordSet\\n {PID}, //Indexed Keys\\n {CSVDataset}, // <--- PAYLOAD \\n '~mt::idx::AerzteDaten'); //Index File\\n \\n
Using the name of the DATASET as the single entry in your payload will automatically payload all the non-search-term fields from that dataset (the compiler is smart enough to include each field only once, as a search term or a payload field, when given this form of syntax).\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-02-19 14:55:57\" },\n\t{ \"post_id\": 6988, \"topic_id\": 1605, \"forum_id\": 10, \"post_subject\": \"Re: Keyed Components\", \"username\": \"bforeman\", \"post_text\": \"The warning is actually harmless, as your code should still run, is that true?\\n\\nLook at your index search fields:\\n\\n{ArztID, FG_NR, PID, Jahr, Q1Q4, fpos}
\\n\\n...and then your JOIN condition:\\n\\nLEFT.PID = RIGHT.PID
\\n\\nYou are filtering on PID, but there are two upper key components in the INDEX.\\n\\nThis code should remove the warning:\\n\\nKEYED(LEFT.PID = RIGHT.PID) AND WILD(ArztID) AND WILD(FG_NR)
\\n\\nSee the LRM for more info on KEYED and WILD.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-02-19 13:03:44\" },\n\t{ \"post_id\": 6986, \"topic_id\": 1605, \"forum_id\": 10, \"post_subject\": \"Keyed Components\", \"username\": \"NSD\", \"post_text\": \"Hi,\\n\\nI've created a payload Index, but if I do a JOIN i get the following Warning\\n\\nWarning: keyed filter on pid follows unkeyed component fg_nr in the key '~mt::idx::AerzteDaten')
\\n\\nHere's the Module, that creates the Index (only in this case all fields are declared as an index):\\n\\nEXPORT AerzteDaten := MODULE\\n\\n\\tEXPORT Layout :=\\n\\t\\tRECORD\\n\\t\\t\\tSTRING12\\t\\tArztID;\\n\\t\\t\\tUNSIGNED1\\t\\tFG_NR;\\n\\t\\t\\tSTRING12\\t\\tPID;\\n\\t\\t\\tUNSIGNED2\\t\\tJahr;\\n\\t\\t\\tUNSIGNED1\\t\\tQ1Q4;\\n\\tEND;\\n\\t\\n\\t\\n\\t\\n\\tEXPORT CSVDataset := \\n\\t\\tDATASET\\n\\t\\t(\\n\\t\\t\\t\\t'~mt::csv::AerzteDaten',\\n\\t\\t\\t\\t{Layout, UNSIGNED8 fpos {virtual(fileposition)}},\\n\\t\\t\\t\\tCSV\\n\\t\\t\\t\\t(\\n\\t\\t\\t\\t\\t\\tHEADING(1),\\n\\t\\t\\t\\t\\t\\tSEPARATOR(';'),\\n\\t\\t\\t\\t\\t\\tTERMINATOR(['\\\\n','\\\\r\\\\n','\\\\n\\\\r'])\\n\\t\\t\\t\\t)\\n\\t\\t );\\n\\t\\t \\n\\t\\t \\n\\t\\t \\n\\t\\n\\tEXPORT IDX :=\\n\\t\\tINDEX(\\n\\t\\t\\t\\t\\tCSVDataset,\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t//RecordSet\\n\\t\\t\\t\\t\\t{ArztID, FG_NR, PID, Jahr, Q1Q4, fpos},\\t\\t//Indexed Keys\\n\\t\\t\\t\\t\\t{ArztID, FG_NR, PID, Jahr, Q1Q4},\\t\\t// <--- PAYLOAD\\n\\t\\t\\t\\t\\t'~mt::idx::AerzteDaten');\\t\\t\\t\\t\\t\\t//Index File\\n\\t\\t\\t\\t\\t\\n\\t\\t\\t\\t\\t\\nEND;
\\n\\n\\nThe Index is built, simply and standard, with this code:\\n\\n#OPTION('outputlimit',1000);\\t\\t//Needed, due to DataSet too large\\nIMPORT Masterthesis;\\n\\nBUILDINDEX(Masterthesis.AerzteDaten.IDX, OVERWRITE);
\\n\\n\\n\\nAnd heres my join:\\n\\n#OPTION('outputlimit',2000);\\nIMPORT $, Masterthesis.FacharztCode, Masterthesis.Krankheiten, Masterthesis.KrankheitsDaten, Masterthesis.AerzteDaten, Masterthesis.StammDaten;\\n\\nr1 := RECORD StammDaten.Layout; \\n KrankheitsDaten.Layout; END;\\n\\nr1 Xform1(StammDaten.CSVDataset SD, KrankheitsDaten.IDX KD) := \\n TRANSFORM SELF := SD; SELF := KD; END;\\n\\nJ1 := JOIN( StammDaten.CSVDataset, KrankheitsDaten.IDX,\\n LEFT.PID = RIGHT.PID,\\n Xform1(LEFT,RIGHT) );\\n\\n\\n\\n\\nr2 := RECORD StammDaten.Layout; \\n KrankheitsDaten.Layout;\\n Krankheiten.Layout; END;\\n\\nr2 Xform2(J1 J1, Krankheiten.IDX K) := \\n TRANSFORM SELF := J1; SELF := K; END;\\n\\nJ2 := JOIN( J1,\\n Krankheiten.IDX,\\n LEFT.MXG = RIGHT.MXG,\\n Xform2(LEFT,RIGHT) );\\n\\n\\n\\n\\nr3 := RECORD StammDaten.Layout;\\n KrankheitsDaten.Layout;\\n Krankheiten.Layout;\\n AerzteDaten.Layout; END;\\n\\nr3 Xform3(J2 J2, AerzteDaten.IDX AeD) := \\n TRANSFORM SELF := J2; SELF := AeD; END;\\n\\nJ3 := JOIN( J2,\\n AerzteDaten.IDX,\\n LEFT.PID = RIGHT.PID AND LEFT.Jahr = RIGHT.Jahr AND \\n LEFT.Q1Q4 = RIGHT.Q1Q4,\\n Xform3(LEFT,RIGHT) );\\n\\n\\n\\nr4 := RECORD StammDaten.Layout;\\n KrankheitsDaten.Layout;\\n Krankheiten.Layout;\\n AerzteDaten.Layout;\\n FacharztCode.Layout; END;\\n\\nr4 Xform4(J3 J3, FacharztCode.IDX FC) := \\n TRANSFORM SELF := J3; SELF := FC; END;\\n\\nJ4 := JOIN( J3,\\n FacharztCode.IDX,\\n LEFT.FG_NR = RIGHT.FacharztCode,\\n Xform4(LEFT,RIGHT) );\\n\\n\\n\\nCOUNT(J4);\\nAVE(J4, J4.Alter_TN);\\nAVE(J4, J4.Alter_PSM);
\\n\\n\\nThank you for your help.\", \"post_time\": \"2015-02-19 10:10:53\" },\n\t{ \"post_id\": 7004, \"topic_id\": 1608, \"forum_id\": 10, \"post_subject\": \"Re: case insensitive comparison\", \"username\": \"rtaylor\", \"post_text\": \"Srini,\\n\\nLike this:IMPORT STD;\\nset of string s1 := ['check','case','sensitive'];\\nset of string s2 := ['CHECK','case','sensitive'];\\nds := PROJECT(DATASET(s2,{STRING s}),\\n TRANSFORM({STRING s},\\n SELF.s := STD.Str.ToLowerCase(LEFT.s)));\\nset of string s3 := SET(ds,s);\\n\\nStr1 := 'CASE'; \\nStr2 := 'Check'; \\n\\nStr1 IN s1; //False\\nSTD.Str.ToLowerCase(Str1) IN s1; //True\\nStr2 IN s3; //False\\nSTD.Str.ToLowerCase(Str2) IN s3; //True
\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-02-20 19:08:15\" },\n\t{ \"post_id\": 7003, \"topic_id\": 1608, \"forum_id\": 10, \"post_subject\": \"case insensitive comparison\", \"username\": \"omnibuzz\", \"post_text\": \"I have a basic question. How do I do a case-insensitive comparison during join between string columns or when filtering etc. Is there some type of a collation that I can set at a dataset level or a #OPTION or something in the environment.xml. It looks like too much of boiler plate code (not to mention processing) to convert everything to a lower or an upper case.\\n\\nIn a nutshell, I want the following to return me true.\\n\\nset of string s := ['check','case','sensitive'];\\n'CASE' in s;
\", \"post_time\": \"2015-02-20 17:07:48\" },\n\t{ \"post_id\": 7058, \"topic_id\": 1610, \"forum_id\": 10, \"post_subject\": \"Re: Problem while reading xml files\", \"username\": \"Anjali\", \"post_text\": \"Thank you Bob..\\n\\n I will surely try this out and let you know..\\n\\n\\nThanks,\\nAnjali\", \"post_time\": \"2015-03-03 10:27:02\" },\n\t{ \"post_id\": 7054, \"topic_id\": 1610, \"forum_id\": 10, \"post_subject\": \"Re: Problem while reading xml files\", \"username\": \"bforeman\", \"post_text\": \"After discussion with members of development, we have opened a JIRA related to this post:\\n\\nhttps://track.hpccsystems.com/browse/HPCC-13134\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-03-02 20:42:31\" },\n\t{ \"post_id\": 7050, \"topic_id\": 1610, \"forum_id\": 10, \"post_subject\": \"Re: Problem while reading xml files\", \"username\": \"bforeman\", \"post_text\": \"To add one more thing, using NOXPATH on the OUTPUT will eliminate the bad tag generation, but you will need to rewrite the RECORD to a simplified layout to read the new file again.\\n\\nBob\", \"post_time\": \"2015-03-02 19:18:34\" },\n\t{ \"post_id\": 7045, \"topic_id\": 1610, \"forum_id\": 10, \"post_subject\": \"Re: Problem while reading xml files\", \"username\": \"bforeman\", \"post_text\": \"Hi Anjali,\\n\\nI am checking with the development team for alternatives, but for now it looks like you may have to modify the user's RECORD before output of the new XML. You could parse his input file and using template language generate the correct record structure that you need to output. There are a lot of good examples in the Language Reference, starting with the #FOR statement.\\n\\nThe only way to have the xml files identical for input and output is to use embedded records rather than complex xpaths. Also, Richard Taylor was right in his design comment saying intermediary files shouldn’t really be XML in the first place.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-03-02 13:18:02\" },\n\t{ \"post_id\": 7043, \"topic_id\": 1610, \"forum_id\": 10, \"post_subject\": \"Re: Problem while reading xml files\", \"username\": \"Anjali\", \"post_text\": \"Hi Richard,\\n\\nThank you so much for your reply..\\n\\nAccording to my requirement,the file should be written as an XML file.~xmlOut will be generated by a different user where I will have no control over them.\\n\\n\\nThanks,\\nAnjali\", \"post_time\": \"2015-03-02 09:09:54\" },\n\t{ \"post_id\": 7042, \"topic_id\": 1610, \"forum_id\": 10, \"post_subject\": \"Re: Problem while reading xml files\", \"username\": \"Anjali\", \"post_text\": \"Thank you Bob..,\\n\\nEmbedded data set way of approach helps me to write the file properly.But unfortunately, according to the requirements xmlOut will be generated by a different user and I will have no control over how it is written. \\n\\nIs there any way to resolve this issue?\\n\\n\\nThanks,\\nAnjali\", \"post_time\": \"2015-03-02 09:00:15\" },\n\t{ \"post_id\": 7036, \"topic_id\": 1610, \"forum_id\": 10, \"post_subject\": \"Re: Problem while reading xml files\", \"username\": \"bforeman\", \"post_text\": \"Anjali,\\n\\nIf you have to write this file as an XML output, you will need to use embedded datasets to accomplish this:\\n\\nDescRec := RECORD\\n string1000 content {xpath('content')};\\n string40 language {xpath('@language')};\\n string40 author {xpath('author')};\\n END;\\n\\t\\t \\nlocrec := RECORD\\n string40 country {xpath('country')};\\n string40 Region {xpath('region')};\\n\\tEND;\\n \\nLayout := record\\n string20 article_id {xpath('@id')};\\n descrec Description {xpath('description')};\\n string40 publisher := xmltext('publisher');\\n string40 source := xmltext('source');\\n string40 source_category := xmltext('source_category');\\n locrec location {xpath('location')};\\n string40 date {xpath('article_date')};\\n string40 Genre :=xmltext('genre');\\n end;\\n \\n ds:=dataset('~forum::xmlread',layout,xml('Row/article'));\\n output(ds,,'~xmlOut',xml('article'),overwrite);\\n \\n newds := dataset('~xmlOut',layout,xml('Dataset/article'));\\n OUTPUT(newds);
\\n\\nThe newds output now uses the original record structure, and you are able to read it correctly with no errors.\\n\\nI believe that the XML OUTPUT option will only work with single XPATH references, but I need to open a JIRA report to confirm this with development.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-02-27 16:44:41\" },\n\t{ \"post_id\": 7034, \"topic_id\": 1610, \"forum_id\": 10, \"post_subject\": \"Re: Problem while reading xml files\", \"username\": \"rtaylor\", \"post_text\": \"Anjali,Assume the previous user had used this layout to read the file, do some processing and write the output as the logical file ‘~xmlOut’.
My question is this: If this file is being generated within the HPCC environment, and its purpose is to provide input to further HPCC operations and it is not a file meant to be sent out to some end-user outside of the HPCC environment, then why is it in an XML format at all? \\n\\nXML is a perfectly fine data interchange format between diverse computing systems, but within an HPCC environment there is little reason to be writing "intermediate" data in any format other than binary flat files. Within HPCC, flat files are the most efficient data storage format we support, both for read/write performance and disk storage requirements. \\n\\nXML files in HPCC are most useful as external input files (over which you have little/no control) and as output files going to external non-HPCC systems. But they are vastly less efficient for use within and between HPCC environments than flat files. \\n\\nYes, certainly use XML as the final output format to end-users or non-HPCC systems that require data to be in XML, but not for feeding the next step in a multi-step HPCC processing operation. \\n\\nI see this kind of problem as a fundamental design flaw. It's the type of flaw most likely created by someone with a background in systems where XML is the standard format for everything and who is not yet intimately familiar with HPCC. My suggestion for correction would be to change the design so that your input file is a flat file and not XML, which should eliminate the problem completely by getting a RECORD structure from your call to std.file.getlogicalfileattribute(<fileName>, ‘ECL’) without all the xpath issues.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-02-27 15:14:55\" },\n\t{ \"post_id\": 7033, \"topic_id\": 1610, \"forum_id\": 10, \"post_subject\": \"Re: Problem while reading xml files\", \"username\": \"bforeman\", \"post_text\": \"Hi Anjali,\\n\\nThank you for the sample XML, and I can confirm the same issue as you have reported. Using the XML OUTPUT option looks to be generating invalid tags when there is a complex XPATH involved. I want to research this a little more today, as I think the CSV OUTPUT option may be an alternative using XMLENCODE. Let me get back to you.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-02-27 14:40:35\" },\n\t{ \"post_id\": 7030, \"topic_id\": 1610, \"forum_id\": 10, \"post_subject\": \"Re: Problem while reading xml files\", \"username\": \"Anjali\", \"post_text\": \"Hi Bob,\\n\\nThank you so much for your reply..\\n\\nAttaching the sample xml file, and given below the dataset declaration for the same,\\n\\n Layout := record\\n string20 article_id {xpath('/Row/article/@id')} ;\\n string1000 content {xpath('description/content')};\\n string40 language {xpath('description/@language')};\\n string40 author {xpath('description/author')};\\n string40 publisher := xmltext('publisher');\\n string40 source := xmltext('source');\\n string40 source_category := xmltext('source_category');\\n string40 country {xpath('location/country')};\\n string40 date {xpath('article_date')};\\n string40 Genre :=xmltext('genre');\\n string40 Region {xpath('location/region')}; \\n end;\\n \\n ds:=dataset('~forum::xmlread',layout,xml('Row/article'));\\n output(ds,,'~xmlOut',xml,overwrite);
\\n\\nRegards,\\nAnjali\", \"post_time\": \"2015-02-27 06:32:29\" },\n\t{ \"post_id\": 7021, \"topic_id\": 1610, \"forum_id\": 10, \"post_subject\": \"Re: Problem while reading xml files\", \"username\": \"bforeman\", \"post_text\": \"When you OUTPUT the file to XML, what is shown in the ECL field in the ECL Watch? Does it match the original layout?\\n\\nI'm thinking that it might be the field above that could be generating the error:\\n\\nstring20 article_id {xpath('/Row/article/@id')} ;\\nstring1000 content {xpath('description/content')};
\\n\\nTry removing the "/" in front of Row:\\n\\nstring20 article_id {xpath('Row/article/@id')} ;\\nstring1000 content {xpath('description/content')};
\\n\\nBut in order for us to try to reproduce your error, can you attach a sample of the actual XML with your DATASET declaration? \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-02-25 14:18:18\" },\n\t{ \"post_id\": 7020, \"topic_id\": 1610, \"forum_id\": 10, \"post_subject\": \"Problem while reading xml files\", \"username\": \"Anjali\", \"post_text\": \"I have a requirement to infer the layout of an XML logical file and use the same to read the file and do some processing. I am presently using \\nstd.file.getlogicalfileattribute(<fileName>, ‘ECL’) to read the layout of the file. It worked well until I encountered the following scenario… \\n\\nAssume the previous user had used this layout to read the file, do some processing and write the output as the logical file ‘~xmlOut’. \\n\\nLayout := record\\n string20 article_id {xpath('/Row/article/@id')} ;\\n string1000 content {xpath('description/content')};\\n string40 language {xpath('description/@language')};\\n string40 author {xpath('description/author')};\\n string40 publisher := xmltext('publisher');\\n string40 source := xmltext('source');\\n string40 source_category := xmltext('source_category');\\n string40 country {xpath('location/country')};\\n string40 date {xpath('article_date')};\\n string40 Genre :=xmltext('genre');\\n string40 Region {xpath('location/region')}; \\n end;\\n\\nIn this case, I am unable to read xmlOut again using the layout I get from the ECL attribute even though it is same as the structure above. The error I get is as follows:\\n\\nError: System error: 2: Error - syntax error "Expecting ">"" [line 2, file offset 81]\\nd>_18/article_id><description/c*ERROR*ontent>This is a sample description (/var/lib/HPCCSystems/hpcc-data/thor/hpccuser/xmlwrite._1_of_1) (in Xml Read G1 E2) (0, 0), 2,\\n\\nAfter analyzing this further, I have found that the field Names of the file are ‘description/content’, ‘description/author’ etc. instead of content and author and because of this, while trying to read the file, I get the error. If I, however, after reading the dataset, transform the dataset to the following structure (removing any xpaths involving ‘/’) before writing it as xmlOut, I am able to read it without any problems.\\n\\nXformLayout:=RECORD \\n string20 article_id;\\n string1000 content;\\n string40 language;\\n string40 author;\\n string40 publisher;\\n string40 source;\\n string40 source_category;\\n string40 country;\\n string40 date;\\n string40 Genre;\\n string40 Region; \\nend; \\n\\nUnfortunately, xmlOut will be generated by a different user and I will have no control over how it is written. \\nIs there another way for me to read this file without errors?\", \"post_time\": \"2015-02-25 11:57:46\" },\n\t{ \"post_id\": 7038, \"topic_id\": 1612, \"forum_id\": 10, \"post_subject\": \"Re: Logical File Read Error\", \"username\": \"rtaylor\", \"post_text\": \"Pooja,\\n\\nThe fact that the error occurs on line 101 tells me there's a lot of code there that I haven't seen, and that the error you're seeing probably has nothing to do with the original reason for this thread.\\n\\nIf you put the tilde in the DATASET, can you do a simple OUTPUT of that DATASET? If so, then show me all of the code (at least up to line 101), please.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-02-27 18:37:43\" },\n\t{ \"post_id\": 7037, \"topic_id\": 1612, \"forum_id\": 10, \"post_subject\": \"Re: Logical File Read Error\", \"username\": \"chennapooja\", \"post_text\": \"Dear Richard,\\n\\n Now if I use tilde at both places, I am getting below error:\\n "Error: INTERNAL: Dataset is not active: 'left' (101, 5), 4153, ".\\n I saw in other posts that there is manual for error handling or error codes, but I did not get it from downloads section. Can you please share that if it is available.\\n Also, for this particular coding and testing, I am not using training cluster because its common to everyone, I am doing it using VMWare. We are using training cluster for checking performance because its much faster than our own cluster and VMWare.\\n \\nThanks and Regards,\\nPooja.\", \"post_time\": \"2015-02-27 16:49:32\" },\n\t{ \"post_id\": 7035, \"topic_id\": 1612, \"forum_id\": 10, \"post_subject\": \"Re: Logical File Read Error\", \"username\": \"rtaylor\", \"post_text\": \"Pooja,\\n\\nOK, so just add the tilde to the DATASET and you should be fine. \\n\\nBTW, is there a particular reason you're naming your file ".::oldXML" without any directory? Since you're using one of my training clusters, I would prefer you to put all your files in a single subdirectory so it's easy to spot your files and they won't be mistakenly deleted by someone else (like me or Bob) as we do cleanup between classes. I would suggest something like: "~KSU::Pooja::oldXML"\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-02-27 16:17:40\" },\n\t{ \"post_id\": 7032, \"topic_id\": 1612, \"forum_id\": 10, \"post_subject\": \"Re: Logical File Read Error\", \"username\": \"chennapooja\", \"post_text\": \"Dear Richard,\\n\\n If I remove tilde while writing to cluster, I am getting "Error: SIG: Segmentation fault(11), accessing 0000000000000030, IP=00007F1047DCA58B (0, 0), 1000," this error and there is no file oldTbl created at all.\\n\\nRegards,\\nPooja.\", \"post_time\": \"2015-02-27 14:16:33\" },\n\t{ \"post_id\": 7031, \"topic_id\": 1612, \"forum_id\": 10, \"post_subject\": \"Re: Logical File Read Error\", \"username\": \"rtaylor\", \"post_text\": \"Pooja,\\n\\nThe problem is your filename constants. You have a leading tilde (~) in the OUTPUT where you don't need one, and you don't have one in your DATASET, where you do.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-02-27 14:01:58\" },\n\t{ \"post_id\": 7029, \"topic_id\": 1612, \"forum_id\": 10, \"post_subject\": \"Logical File Read Error\", \"username\": \"chennapooja\", \"post_text\": \"Dear Team,\\n\\n I am trying to save the output of a table to thor and then read it again. but getting "Error: System error: 0: Read: Logical file name 'hthor::.::oldTbl' could not be resolved (0, 0), 0, " - this particular error. \\n\\n I used below code for writing:\\n O := OUTPUT(P,,'~.::oldTbl',OVERWRITE);\\n\\n
where P is a transformed table and\\n\\n I used below code for reading:\\n \\n print := DATASET('.::oldTbl',\\n{layout_combined,UNSIGNED8 __fpos {virtual(fileposition)}},\\nFLAT);\\noutput(print);\\n\\n
\\n Below are the details of oldTbl logical file I got when searched in ECL watch:\\n \\nCluster Name: \\nhthor__myeclagent\\n\\nDescription: \\nJob Name: \\nloop_pagerank\\n\\nContent Type: \\nflat\\n\\nFile Size: \\n70\\n\\nActual Size: \\n\\nModified: \\n2015-02-25 06:50:47\\n\\nDirectory: \\n/var/lib/HPCCSystems/hpcc-data/eclagent\\n\\nPath Mask: \\noldtbl._1_of_1\\n\\nRecord Size: \\n0\\n\\nRecord Count: \\n5\\n\\n\\nPlease help me in identifying the error, thanks in advance. \\n \\nRegards,\\nPooja.\", \"post_time\": \"2015-02-27 04:47:37\" },\n\t{ \"post_id\": 7073, \"topic_id\": 1617, \"forum_id\": 10, \"post_subject\": \"Re: Language Based Operations ...\", \"username\": \"DSC\", \"post_text\": \"There is no built-in dictionary for this purpose, but it is not difficult to build your own. The biggest problem is obtaining a dictionary that is sufficiently complete for your use. I did something like this once and obtained the words from http://wordnet.princeton.edu. If you distill that data such that you wind up with one word per line then it becomes fairly easy to load it into HPCC and use it from ECL.\\n\\nYou didn't mention what you're goal is, but be aware that many words are imported from one language to another. If you're trying to determine whether some text is in English or not, you should probably test more than a single word. It would be better to test a statistically-significant portion of the text and see if the vast majority of the words are found in the English language.\\n\\nHope this helps.\\n\\nDan\", \"post_time\": \"2015-03-04 15:50:02\" },\n\t{ \"post_id\": 7072, \"topic_id\": 1617, \"forum_id\": 10, \"post_subject\": \"Language Based Operations ...\", \"username\": \"chanbchen\", \"post_text\": \"Hi,\\n\\nSay if I want to find out if a word in a text file is from the English language or not. Is there some kind of a built in dictionary available in HPCC to do this? or what would be the best way to achieve this?\\n\\nThanks\", \"post_time\": \"2015-03-04 15:23:23\" },\n\t{ \"post_id\": 7597, \"topic_id\": 1629, \"forum_id\": 10, \"post_subject\": \"Re: A checksum of a Logical/Super File\", \"username\": \"zeeshan\", \"post_text\": \"Some way of sampling, knowing the same rows are in the sample would be great. The attribute should not be tied to a particular data structure. (may require use of Macros)..\\n\\n\\n\\nPrep4sure ITIL Foundation\", \"post_time\": \"2015-05-14 10:17:27\" },\n\t{ \"post_id\": 7118, \"topic_id\": 1629, \"forum_id\": 10, \"post_subject\": \"A checksum of a Logical/Super File\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nWhen migrating THOR's, or for other reasons (e.g. running mirror systems) it would be handy to be able to run a quick attribute against a logical file (or superfile) that returns say a MD5 checksum. Such an attribute would be a component of any validation process.\\nThere is STD.File.VerifyFile but that would not check the content of a file.\\nOne could use the 'size' and 'rowcounts' information returned from a call to LogicalFileList, but again the content could differ on files exactly the same size.\\n\\nThe other constraint is that this has to run quickly, we have some very big files!\\n\\nSome way of sampling, knowing the same rows are in the sample would be great. The attribute should not be tied to a particular data structure. (may require use of Macros)\\n\\nYours\\n\\nAllan\", \"post_time\": \"2015-03-10 15:01:06\" },\n\t{ \"post_id\": 7365, \"topic_id\": 1660, \"forum_id\": 10, \"post_subject\": \"Re: Sort not working\", \"username\": \"bforeman\", \"post_text\": \"Cool! Thanks for the feedback Don!\", \"post_time\": \"2015-04-15 16:36:47\" },\n\t{ \"post_id\": 7364, \"topic_id\": 1660, \"forum_id\": 10, \"post_subject\": \"Re: Sort not working\", \"username\": \"georgeb2d\", \"post_text\": \"I finally got back to this. The series of Join(FULL OUTER) statements works. \\n\\nI also had to add some SORT statements with SKEW(1.0) to the records as they were building. Otherwise I was getting the following:\\n//System error: 10083: Graph[16], join[19]: JOIN failed, \\n// skewed LHS. Graph[16], join[19]: Exceeded skew limit: 0.033333, estimated skew: 1.000000 (0, 0), 10083, \\n\\n\\nThanks,\\nDon\", \"post_time\": \"2015-04-15 16:27:49\" },\n\t{ \"post_id\": 7240, \"topic_id\": 1660, \"forum_id\": 10, \"post_subject\": \"Re: Sort not working\", \"username\": \"bforeman\", \"post_text\": \"The code worked fine as soon as I just was using RhodeIsland. Rhode Island only has about 30,000 rows, Missouri - 355,000, Nebraska - 170,000, and Georgia - 360,000.
\\n\\nRows are one thing, what is the record size in each DATASET? Are they exactly alike? If you recall, we appended 841,400 records to another 841,400 in our training class, which only used a 3-node cluster. \\n\\nWhy not try a series of JOIN statements. Add Rhode Island to Missouri, use that result and JOIN to Nebraska, etc. \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-01 19:08:46\" },\n\t{ \"post_id\": 7239, \"topic_id\": 1660, \"forum_id\": 10, \"post_subject\": \"Re: Sort not working\", \"username\": \"georgeb2d\", \"post_text\": \"I am using Geospatial data in the datasets. In the workunit I was attempting to add the RoadSegment datasets of Missouri + Nebraska + Georgia + RhodeIsland. I eventually discovered this was the part that was causing HPCC to crash. The code worked fine as soon as I just was using RhodeIsland. Rhode Island only has about 30,000 rows, Missouri - 355,000, Nebraska - 170,000, and Georgia - 360,000. Why is this small number of rows causing the crash? That is my real question.\", \"post_time\": \"2015-04-01 19:00:34\" },\n\t{ \"post_id\": 7238, \"topic_id\": 1660, \"forum_id\": 10, \"post_subject\": \"Re: Sort not working\", \"username\": \"bforeman\", \"post_text\": \"Hi Don, \\n\\nA few things:\\n\\n1. Why are you using PARALLEL near Line 78? The OUTPUTs should all be implicitly parallel anyway.\\n2. I would incrementally test your code. Start with the first couple of results and than incrementally add on from there to see what is crashing the workunit.\\n3. You may need to ZAP the workunit and send development the logs to review.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-01 18:37:24\" },\n\t{ \"post_id\": 7233, \"topic_id\": 1660, \"forum_id\": 10, \"post_subject\": \"Sort not working\", \"username\": \"georgeb2d\", \"post_text\": \"From the thormaster log:\\n\\n"ERROR: WARNING: exception during sort - activity(sort, 19) : MP link closed (10.194.10.49:6600)"\\n0000703D 2015-03-31 11:52:31.379 18184 11142 "ERROR: In CMasterActivity::main - activity(sort, 19) : Graph[1], sort[19]: MP link closed (10.194.10.49:6600), Master exception"\\n0000703E 2015-03-31 11:52:31.379 18184 11139 "ERROR: In CMasterActivity::main - activity(remoteresult, 26) : Graph[1], remoteresult[26]: MP link closed (10.194.10.49:6600), Master exception"\\n\\nThis is for WorkUnit W20150331-114657 on Alpha-Dev-Thor\\n\\nAny assistance would be helpful.\\n\\nDon\", \"post_time\": \"2015-03-31 16:22:17\" },\n\t{ \"post_id\": 7251, \"topic_id\": 1661, \"forum_id\": 10, \"post_subject\": \"Re: Parsing a very long line\", \"username\": \"georgeb2d\", \"post_text\": \"That works well. \\n\\nMany thanks,\\nDon\", \"post_time\": \"2015-04-03 13:32:16\" },\n\t{ \"post_id\": 7250, \"topic_id\": 1661, \"forum_id\": 10, \"post_subject\": \"Re: Parsing a very long line\", \"username\": \"rtaylor\", \"post_text\": \"Don,\\n\\nTry this code against your data:\\nPATTERN Number := PATTERN ('[0-9]');\\nPATTERN Startpoly := '((' | '(((';\\nPATTERN Endpoly := '))' | ')))';\\nPATTERN PointSep := ', ';\\nPATTERN Lat := OPT('-') REPEAT(Number,1,2) OPT('.' Number+);\\nPATTERN Long := OPT('-') REPEAT(Number,1,3) OPT('.' Number+);\\nPATTERN Point := Long ' ' Lat OPT(PointSep);\\nPATTERN Poly := StartPoly REPEAT(Point) EndPoly;\\n\\nRec := RECORD\\n STRING Point1 := MATCHTEXT(Point[1]);\\n STRING Lat := MATCHTEXT(Point[1]/Lat);\\n STRING Long := MATCHTEXT(Point[1]/Long);\\n STRING POLY{MAXLENGTH(30000)} := 'POLYGON '+ MATCHTEXT(Poly);\\nEND;\\nPARSE(ds,line,Poly,Rec,MAXLENGTH(20000),FIRST);
Note that your long last line will not display in the IDE but you can see it in ECL Watch. \\n\\nThe key change here (besides reversing the order of Lat and Long in the Point definition) is the addition of MAXLENGTH to both the PARSE and the output RECORD structure POLY field. Without MAXLENGTH, both of these default to 4096 (4K). That one line is almost 20,000 characters, which is why neither you nor I were getting anything from it. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-04-03 13:23:32\" },\n\t{ \"post_id\": 7248, \"topic_id\": 1661, \"forum_id\": 10, \"post_subject\": \"Re: Parsing a very long line\", \"username\": \"georgeb2d\", \"post_text\": \"The longitude is first in this particular file. \\n\\nI had counted incorrectly. There are 138 polygons in this particular multipolygon. \\nThe first 137 polygons appear to be showing up.\\nThis is polygon 138, which is now partially showing up:\\n((-180 -90, 180 -90, 180 -84.30224609375, 171.905029296875 -83.80169677734375, 172.318603515625 -83.59722900390625, 169.25082397460937 -83.33001708984375, 167.4697265625 -83.439178466796875, 168.69247436523437 -83.146408081054687, 163.44613647460937 -82.265029907226562, 163.85946655273437 -82.174179077148438, 162.27584838867187 -81.6622314453125, 160.39111328125 -81.521408081054688, 162.19390869140625 -81.297805786132813, 160.62612915039062 -81.20501708984375, 160.83944702148437 -81.078079223632812, 160.50308227539062 -80.952789306640625, 160.84390258789062 -80.89306640625, 160.01556396484375 -80.780303955078125, 161.17446899414062 -80.63555908203125, 159.7569580078125 -80.569747924804687, 160.8919677734375 -80.37750244140625, 158.0572509765625 -80.280563354492188, 160.52444458007812 -80.041397094726563, 159.09280395507812 -79.971115112304688, 160.73806762695312 -79.453079223632813, 160.04055786132813 -79.155838012695312, 160.49417114257812 -79.020858764648438, 164.49444580078125 -78.569747924804688, 167.26943969726562 -78.655838012695312, 165.68359375 -78.391693115234375, 165.54000854492187 -78.00335693359375, 164.20669555664062 -78.143890380859375, 164.56500244140625 -77.73638916015625, 163.61083984375 -77.696121215820313, 163.89697265625 -77.466690063476562, 163.23834228515625 -77.041397094726563, 162.33279418945313 -76.951416015625, 163.06973266601562 -76.730850219726563, 162.62362670898437 -76.619461059570313, 162.9322509765625 -76.583084106445313, 162.87557983398437 -76.247787475585938, 162.33779907226562 -76.167236328125, 163.136962890625 -75.936676025390625, 162.53973388671875 -75.108917236328125, 163.87167358398437 -74.953353881835938, 163.65667724609375 -74.774459838867187, 164.14251708984375 -74.616119384765625, 165.33331298828125 -74.672805786132812, 164.78085327148437 -74.136962890625, 166.129150390625 -74.127243041992188, 165.548583984375 -73.932510375976563, 167.56167602539062 -73.403350830078125, 169.07916259765625 -73.52752685546875, 169.26641845703125 -73.083908081054687, 170.32223510742187 -72.580307006835938, 170.30416870117187 -72.30084228515625, 169.87164306640625 -72.379196166992188, 170.12997436523437 -72.051406860351563, 170.99444580078125 -71.86419677734375, 170.45303344726562 -71.351669311523438, 170.21527099609375 -71.282791137695312, 170.27224731445312 -71.660018920898438, 167.76751708984375 -70.780838012695313, 166.46304321289062 -70.701950073242188, 166.77166748046875 -70.611679077148437, 163.55389404296875 -70.465286254882812, 163.7852783203125 -70.628067016601563, 163.52084350585938 -70.675003051757812, 162.75390625 -70.280303955078125, 160.96890258789063 -70.255569458007812, 157.23638916015625 -68.942520141601563, 156.33612060546875 -69.240570068359375, 154.856689453125 -69.102523803710938, 154.28668212890625 -68.863082885742188, 154.68585205078125 -68.61500358581543, 153.77835083007812 -68.3436279296875, 153.76861572265625 -68.9222412109375, 151.18502807617187 -68.980026245117188, 150.99252319335938 -68.390853881835937, 148.8133544921875 -68.335281372070312, 148.21722412109375 -68.127243041992188, 148.6480712890625 -67.98919677734375, 148.02944946289062 -67.844467163085938, 146.95333862304687 -68.141693115234375, 147.15084838867187 -67.990859985351563, 146.64279174804687 -67.709457397460937, 145.31500244140625 -67.61334228515625, 145.87530517578125 -67.19586181640625, 145.40444946289062 -67.017227172851563, 144.5855712890625 -67.245285034179688, 143.4000244140625 -66.85113525390625, 142.56640625 -66.994171142578125, 134.94778442382812 -66.093063354492188, 135.27224731445312 -65.4364013671875, 134.43783569335937 -64.926132202148438, 134.10061645507812 -65.123062133789063, 134.4364013671875 -66.001419067382812, 134.24530029296875 -66.201126098632812, 130.35198974609375 -66.225845336914062, 128.83477783203125 -67.14251708984375, 126.92864990234375 -66.834457397460937, 126.98835754394531 -66.453582763671875, 126.35616302490234 -66.27972412109375, 125.19001770019531 -66.73419189453125, 122.18335723876953 -66.547805786132812, 117.76699829101562 -66.98974609375, 116.18003845214844 -66.366683959960937, 114.51835632324219 -66.4727783203125, 114.43142700195312 -66.179473876953125, 113.31533813476562 -65.713348388671875, 110.8917236328125 -66.063629150390625, 110.6297607421875 -66.486679077148438, 108.82421875 -66.831130981445312, 107.80477905273437 -66.398361206054687, 102.62615966796875 -65.901123046875, 100.95391845703125 -66.080841064453125, 99.2830810546875 -66.880844116210937, 98.261962890625 -66.51611328125, 97.564453125 -66.740570068359375, 92.00531005859375 -66.533905029296875, 88.96722412109375 -66.761398315429688, 88.22723388671875 -66.036392211914063, 88.1094970703125 -66.65252685546875, 87.502517700195313 -66.894744873046875, 85.79168701171875 -67.177780151367187, 83.403396606445313 -67.15667724609375, 82.6650390625 -67.393890380859375, 82.014480590820312 -67.251678466796875, 81.469741821289062 -67.505294799804687, 82.057266235351563 -67.6722412109375, 78.116409301757813 -68.459732055664063, 77.747268676757812 -69.116973876953125, 72.901138305664063 -70.021957397460938, 72.615615844726563 -70.20361328125, 72.863113403320312 -70.432785034179687, 71.504470825195313 -70.953903198242188, 71.248123168945313 -71.389450073242188, 71.449783325195313 -71.546112060546875, 70.864944458007813 -71.930145263671875, 68.936416625976563 -72.425003051757812, 67.337554931640625 -72.063339233398438, 67.901138305664062 -71.64447021484375, 67.605300903320313 -71.583084106445312, 69.243911743164062 -70.670578002929688, 69.144790649414063 -70.331390380859375, 68.661697387695312 -70.3677978515625, 68.850296020507813 -70.532241821289062, 68.593948364257813 -70.770584106445312, 67.646133422851562 -70.386398315429688, 68.094741821289062 -69.879470825195313, 69.205337524414063 -69.863906860351562, 69.340042114257812 -69.638626098632813, 68.871170043945313 -69.379470825195313, 69.743118286132813 -69.363082885742188, 69.292007446289063 -69.102783203125, 70.102005004882813 -68.523895263671875, 69.644790649414063 -67.75390625, 59.110618591308594 -67.4122314453125, 56.729522705078125 -66.901123046875, 57.311973571777344 -66.563339233398437, 55.619232177734375 -66.0069580078125, 53.778961181640625 -65.84002685546875, 50.413116455078125 -66.441970825195313, 50.169769287109375 -66.743057250976563, 50.689483642578125 -67.181394577026367, 49.151702880859375 -67.088623046875, 49.163970947265625 -66.862808227539062, 48.262855529785156 -67.167526245117188, 49.167266845703125 -67.38336181640625, 48.217010498046875 -67.634445190429688, 47.395606994628906 -67.722518920898437, 47.009498596191406 -67.550308227539062, 47.448951721191406 -67.417800903320313, 46.551429748535156 -67.277252197265625, 46.246429443359375 -67.357513427734375, 46.308929443359375 -67.641693115234375, 41.103126525878906 -68.523895263671875, 39.760353088378906 -68.9544677734375, 39.701759338378906 -69.648635864257813, 38.650367736816406 -69.988632202148438, 38.564491271972656 -69.500579833984375, 38.239234924316406 -69.9927978515625, 37.145057678222656 -69.664749145507813, 37.850929260253906 -69.533905029296875, 37.916175842285156 -69.251419067382813, 36.722572326660156 -69.728073120117188, 36.126441955566406 -69.533615112304688, 36.414527893066406 -69.314453125, 34.140357971191406 -68.4827880859375, 33.424232482910156 -68.651412963867187, 33.445045471191406 -68.957244873046875, 32.902320861816406 -69.276947021484375, 31.083419799804688 -69.773895263671875, 23.246757507324219 -70.546417236328125, 18.036491394042969 -69.96917724609375, 15.705924987792969 -70.278623580932617, 14.165641784667969 -70.158905029296875, 12.740348815917969 -70.2811279296875, 12.046745300292969 -70.717514038085938, 11.270622253417969 -70.710281372070312, 8.6828536987304687 -70.074462890625, 9.0903854370117188 -70.318344116210937, 8.4806442260742187 -70.477249145507813, 7.55731201171875 -70.168350219726563, 6.69952392578125 -70.586395263671875, 2.80316162109375 -70.846954345703125, -0.2975006103515625 -71.658889770507813, -0.92305755615234375 -71.58917236328125, -0.7772216796875 -71.389175415039063, -1.0533294677734375 -71.27667236328125, -2.0938873291015625 -71.486114501953125, -2.2616653442382812 -71.17083740234375, -6.0091629028320313 -71.418899536132813, -6.1038894653320312 -71.144454956054688, -5.4411087036132812 -70.88250732421875, -6.0211105346679687 -70.699722290039063, -7.7233352661132812 -71.429168701171875, -7.3377761840820313 -71.69195556640625, -8.366943359375 -71.825836181640625, -8.9308319091796875 -71.233612060546875, -9.8787155151367187 -70.900970458984375, -10.430000305175781 -70.986953735351562, -10.048614501953125 -71.11138916015625, -11.019447326660156 -71.6572265625, -11.592781066894531 -71.56195068359375, -11.556503295898438 -71.278823852539063, -12.276947021484375 -71.37750244140625, -11.325836181640625 -71.965011596679688, -11.65777587890625 -72.326950073242187, -13.406112670898437 -72.82305908203125, -14.469169616699219 -72.796676635742187, -13.731109619140625 -73.020843505859375, -16.023612976074219 -73.320281982421875, -16.898887634277344 -73.773056030273438, -16.069168090820313 -73.747512817382813, -16.919166564941406 -73.976119995117188, -16.32611083984375 -74.06390380859375, -13.704719543457031 -73.94195556640625, -17.147224426269531 -74.78472900390625, -18.256385803222656 -75.495834350585937, -17.758056640625 -75.729736328125, -28.305274963378906 -76.565292358398438, -33.886116027832031 -77.660568237304687, -36.302780151367188 -78.7791748046875, -33.001113891601563 -79.453903198242188, -27.085556030273438 -79.013336181640625, -30.210556030273438 -79.666671752929688, -28.0625 -80.004730224609375, -28.319442749023438 -80.276947021484375, -41.4888916015625 -81.383346557617188, -43.084449768066406 -81.851394653320312, -53.725837707519531 -82.150283813476562, -58.210838317871094 -83.033340454101563, -59.079727172851563 -82.965835571289063, -58.783058166503906 -82.645004272460938, -59.648056030273437 -82.43389892578125, -74.693893432617188 -80.697235107421875, -76.089447021484375 -80.197235107421875, -79.849456787109375 -79.954452514648437, -77.321945190429687 -80.011398315429687, -76.0836181640625 -79.645843505859375, -77.029449462890625 -79.279449462890625, -80.202789306640625 -79.228897094726563, -80.63555908203125 -79.619171142578125, -84.1077880859375 -78.36083984375, -78.957504272460938 -78.813339233398438, -77.487503051757813 -78.52667236328125, -81.470001220703125 -77.897232055664063, -80.605560302734375 -77.886398315429687, -81.4969482421875 -77.657791137695313, -74.853347778320312 -78.14111328125, -72.839736938476563 -77.6400146484375, -75.632781982421875 -77.508621215820313, -78.476669311523438 -76.4072265625, -77.77166748046875 -75.918899536132813, -76.380569458007812 -76.06500244140625, -75.4444580078125 -76.546401977539062, -70.476394653320313 -76.707778930664063, -69.385284423828125 -76.296112060546875, -63.429168701171875 -75.4505615234375, -63.0947265625 -75.3255615234375, -64.447784423828125 -75.29779052734375, -63.095840454101563 -75.130844116210938, -63.989448547363281 -75.008056640625, -63.0513916015625 -74.895278930664063, -63.245834350585938 -74.604171752929688, -62.531394958496094 -74.982513427734375, -61.887504577636719 -74.832229614257813, -61.755279541015625 -74.306394577026367, -61.036392211914063 -74.098342895507813, -61.762504577636719 -73.913894653320312, -60.587783813476563 -73.70611572265625, -61.609169006347656 -73.546112060546875, -61.836669921875 -73.36639404296875, -61.436393737792969 -73.343612670898437, -61.919448852539063 -73.14306640625, -60.599723815917969 -73.36029052734375, -59.849449157714844 -73.233062744140625, -59.774169921875 -72.900283813476563, -60.617225646972656 -73.028623580932617, -60.616111755371094 -72.643341064453125, -61.270835876464844 -72.700286865234375, -61.549728393554688 -72.42279052734375, -60.852500915527344 -72.388900756835938, -61.074722290039063 -72.163619995117187, -60.865837097167969 -72.003067016601563, -62.552505493164063 -72.047500610351563, -60.904167175292969 -71.73638916015625, -61.929725646972656 -71.656402587890625, -60.9425048828125 -71.158340454101563, -61.380279541015625 -70.815292358398437, -62.128890991210938 -70.865570068359375, -61.484451293945313 -70.520278930664063, -62.490837097167969 -70.380569458007813, -61.940834045410156 -70.229171752929687, -62.635002136230469 -69.853347778320312, -62.475837707519531 -69.45306396484375, -63.638893127441406 -69.22723388671875, -63.368057250976562 -69.050003051757813, -63.706672668457031 -68.742507934570313, -63.210556030273438 -68.786956787109375, -63.974166870117188 -68.536392211914062, -62.728340148925781 -68.410003662109375, -64.3900146484375 -68.513336181640625, -63.857780456542969 -68.842788696289063, -65.313339233398438 -68.711395263671875, -65.0916748046875 -68.44195556640625, -65.5966796875 -68.347503662109375, -64.768341064453125 -68.122787475585937, -65.723617553710937 -68.148056030273437, -65.329177856445312 -67.9747314453125, -65.617782592773438 -67.8800048828125, -65.290283203125 -67.67333984375, -65.6138916015625 -67.559173583984375, -64.77056884765625 -67.317230224609375, -64.837783813476562 -66.951400756835938, -63.743614196777344 -66.891677856445312, -64.196670532226563 -66.720291137695313, -63.675559997558594 -66.218338012695313, -62.608894348144531 -66.728347778320312, -62.443611145019531 -66.430282592773438, -62.885284423828125 -66.262786865234375, -62.181396484375 -66.180282592773438, -61.713890075683594 -66.46917724609375, -61.416671752929688 -66.125289916992188, -60.93695068359375 -66.260009765625, -60.557502746582031 -65.945281982421875, -61.872779846191406 -66.171951293945313, -62.459724426269531 -65.90472412109375, -61.683059692382813 -65.538619995117187, -62.089729309082031 -65.440292358398437, -61.959449768066406 -65.180282592773438, -59.918891906738281 -64.4122314453125, -59.505279541015625 -64.5372314453125, -59.493057250976563 -64.316116333007812, -58.778640747070313 -64.535812377929687, -58.600837707519531 -63.948890686035156, -57.3800048828125 -63.460556030273437, -56.721389770507813 -63.5947265625, -56.946395874023438 -63.4505615234375, -57.205001831054688 -63.205558776855469, -58.910835266113281 -63.533058166503906, -59.445838928222656 -63.893058776855469, -60.991668701171875 -64.035842895507813, -60.941947937011719 -64.277511596679688, -61.935279846191406 -64.690567016601563, -62.45361328125 -64.58917236328125, -62.617782592773437 -64.7550048828125, -62.319450378417969 -64.858901977539063, -62.938056945800781 -64.797500610351562, -63.090560913085938 -65.133895874023438, -63.879447937011719 -65.018341064453125, -64.053070068359375 -65.421951293945313, -63.712783813476563 -65.50250244140625, -64.65972900390625 -65.740280151367188, -64.453338623046875 -65.981674194335938, -65.689727783203125 -66.125, -66.497222900390625 -66.622512817382813, -66.40472412109375 -66.88555908203125, -67.48583984375 -67.077224731445313, -67.6239013671875 -67.553619384765625, -66.427230834960938 -67.533340454101563, -67.1572265625 -68.01055908203125, -66.589736938476563 -68.238067626953125, -67.165557861328125 -68.2933349609375, -66.932235717773438 -68.773895263671875, -67.49444580078125 -68.815567016601563, -66.657791137695313 -69.017791748046875, -68.838058471679688 -69.417236328125, -67.703903198242188 -70.593063354492188, -67.400283813476562 -71.040008544921875, -67.5372314453125 -71.454727172851562, -66.859176635742187 -71.89556884765625, -66.797225952148438 -72.407791137695313, -69.4283447265625 -73.197235107421875, -76.962783813476562 -73.873062133789063, -76.61834716796875 -73.576675415039063, -78.78973388671875 -73.693069458007813, -78.958892822265625 -73.392501831054687, -80.695556640625 -73.050567626953125, -80.519180297851563 -73.446395874023437, -81.275283813476562 -73.36944580078125, -81.041397094726563 -73.715560913085938, -82.134170532226562 -73.943344116210937, -85.5997314453125 -73.558334350585937, -85.470840454101563 -73.3497314453125, -85.9666748046875 -73.041397094726563, -86.840835571289062 -73.336395263671875, -88.70916748046875 -73.179458618164062, -88.335845947265625 -72.816116333007812, -89.261123657226563 -72.639450073242188, -89.5372314453125 -72.633621215820313, -89.321121215820313 -73.054168701171875, -90.861114501953125 -73.326675415039063, -102.09916687011719 -73.084732055664063, -103.17611694335937 -72.73333740234375, -103.60334777832031 -72.891952514648438, -103.03611755371094 -73.326400756835937, -99.175567626953125 -73.619171142578125, -103.01334381103516 -73.629180908203125, -102.90139770507812 -73.875839233398438, -101.65834045410156 -73.9989013671875, -101.33194732666016 -74.480560302734375, -100.24806213378906 -74.4908447265625, -100.51555633544922 -74.671112060546875, -100.151123046875 -74.760833740234375, -100.85166931152344 -74.814727783203125, -99.510284423828125 -75.092514038085937, -110.383056640625 -75.306121826171875, -110.961669921875 -75.157791137695313, -109.92888641357422 -74.761398315429687, -110.15471649169922 -74.2841796875, -111.50389099121094 -74.191680908203125, -111.38583374023437 -74.46223258972168, -111.72445678710937 -74.58723258972168, -111.34306335449219 -74.759445190429688, -112.65416717529297 -74.858612060546875, -113.55555725097656 -74.634445190429688, -112.93028259277344 -74.455001831054687, -113.43361663818359 -74.47222900390625, -113.1864013671875 -74.168899536132812, -114.00361633300781 -73.889175415039062, -114.81001281738281 -74.104736328125, -114.69777679443359 -74.46917724609375, -117.39695739746094 -74.530838012695313, -117.74305725097656 -74.308624267578125, -118.53167724609375 -74.613616943359375, -121.47834777832031 -74.742507934570313, -133.26251220703125 -74.84722900390625, -134.30389404296875 -74.532791137695313, -146.2952880859375 -76.03973388671875, -145.47946166992187 -76.443344116210938, -146.49166870117187 -76.367507934570313, -148.10128784179687 -76.095657348632813, -149.50613403320312 -76.38751220703125, -146.9322509765625 -76.451950073242188, -145.45639038085937 -76.760284423828125, -146.10223388671875 -76.840011596679688, -145.29888916015625 -77.0291748046875, -146.3013916015625 -76.999176025390625, -145.84280395507812 -77.114181518554688, -146.22750854492187 -77.165008544921875, -145.87890625 -77.306394577026367, -146.26861572265625 -77.466400146484375, -147.0322265625 -77.220840454101563, -147.07168579101562 -77.372787475585937, -147.49307250976562 -77.296951293945313, -147.59140014648437 -77.422225952148438, -148.58111572265625 -77.50445556640625, -148.5855712890625 -77.594451904296875, -148.7327880859375 -77.625564575195312, -149.4141845703125 -77.570846557617188, -149.08724975585937 -77.696121215820313, -149.6622314453125 -77.761123657226563, -152.06112670898437 -77.325286865234375, -153.10641479492187 -77.497222900390625, -153.0836181640625 -77.286666870117188, -153.790283203125 -77.174728393554688, -155.86138916015625 -77.084457397460937, -156.48333740234375 -77.358612060546875, -157.75308227539062 -77.108062744140625, -157.69696044921875 -77.571395874023437, -158.17584228515625 -77.859725952148438, -157.9102783203125 -78.001953125, -153.75030517578125 -78.308334350585937, -155.90640258789062 -78.7197265625, -146.81195068359375 -79.88751220703125, -145.524169921875 -80.460556030273437, -148.41336059570312 -81.357513427734375, -154.92169189453125 -81.001953125, -156.96002197265625 -81.251678466796875, -154.20501708984375 -81.552230834960937, -153.98919677734375 -81.629180908203125, -154.89141845703125 -81.905014038085938, -151.794189453125 -82.577789306640625, -153.00750732421875 -83.087509155273438, -153.05307006835937 -84.012786865234375, -149.29473876953125 -84.56195068359375, -138.58944702148437 -84.984725952148438, -139.66140747070312 -85.245010375976563, -148.13833618164062 -85.090835571289062, -150.23947143554687 -85.463623046875, -157.48333740234375 -85.448623657226563, -180 -84.305343627929688, -180 -90)))\\n\\n\\n\\n You can see it ends with the triple ))), but that should not really matter. My code is now partially pulling in this line:\\nIt starts with: 25, -81.041397094726563 -73.715560913085938, -82.134170532226562 -73.943344116210937, -85.5997314453125 -73.558334350585937, -85.470840454101563 and goes to the end of the polygon. It just does not start at the beginning of the polygon. It is at the end of this message.\\n\\nYours does not even reference this line. \\n\\nMy revised code and results are in:\\nW20150402-155205 on Alpha-Dev-Thor\\n\\nHere it is:\\nimport ROADLINK AS X;\\n\\nOUTPUT(X.Files.WorldBordersRawDset);\\nOUTPUT(X.Files.WorldBordersRawDset[3].wkt_geog);\\n\\ndon1 := X.Files.WorldBordersRawDset[3].wkt_geog;\\n\\n PATTERN Numbers := PATTERN ('(([0-9]))');\\nPATTERN Comma := [','];\\nPATTERN ws := [' '];\\nPATTERN minus := ['-'];\\nPATTERN Period := ['.'];\\nPATTERN FirstParen := ['((('];\\nPATTERN TwoRParens := ['))'];\\nPATTERN TwoLParens := ['(('];\\nPATTERN ThreeRParens := [')))'];\\nPATTERN BeginFirstPoly := FirstParen ;\\nPATTERN EndPoly := TwoRParens ;\\n\\nPATTERN BeginAnyPoly := TwoLParens ;\\n\\nPATTERN EndLastPoly := ThreeRParens;\\nPATTERN LatLong := PATTERN('[ ,-[0-9] ');\\nPATTERN InPoly := OPT(ws) OPT(Minus) Numbers+ OPT(Period) OPT(Numbers+) OPT(ws) OPT(Minus) OPT(Numbers+) OPT(Period) OPT(Numbers+) OPT(ws) OPT(NUMBERS+) OPT(ws) OPT(NUMBERS+) OPT(ws) OPT(COMMA)\\n ;\\nPATTERN FirstPoly := BeginFirstPoly REPEAT(InPoly) EndPoly;\\nPATTERN MiddlePoly := BeginAnyPoly REPEAT(InPoly) EndPoly;\\nPATTERN LastPoly := BeginAnyPoly REPEAT(InPoly) EndLastPoly;\\nPattern StartLastPoly := ['((-180 -90,'];\\nPATTERN AfterMulti := REPEAT(MiddlePoly,1,500) | REPEAT(InPoly) EndLastPoly;\\t\\t\\t\\nps1 := RECORD\\n STRING ISO2 := X.Files.WorldBordersRawDset.ISO2;\\n STRING POLY := 'POLYGON '+ MATCHTEXT(AfterMulti);\\nEND;\\np1 := PARSE( X.Files.WorldBordersRawDset ,wkt_geog ,AfterMulti , ps1\\n\\t\\t, NOCASE ,KEEP(600) );\\nOUTPUT(p1,NAMED('p1'));\\t\\nOUTPUT(COUNT(p1),NAMED('COUNT_p1'));\\t\\nOUTPUT(Count(p1(ISO2='AQ' )),NAMED('COUNT_AQ'));\\t\\t\\t\\t\\t\\t \\t\\t\\nOUTPUT(p1[Count(p1(ISO2='AQ' ))+1],NAMED('NextLast_AQ_p1'));\\t\\t\\t\\t\\t \\nOUTPUT(p1[Count(p1(ISO2='AQ' ))+2],NAMED('Last_AQ_p1'));\\nOUTPUT(LENGTH(X.Files.WorldBordersRawDset[3].wkt_geog),NAMED('LENGTH_AQ_MULTI'));\\n NewDS := p1(ISO2='AQ');\\n OUTPUT(NewDs,,'~thor::wunelli::roadlink::Ant_1',OVERWRITE);
\\n\\n138th row now being output: (Note I added POLYGON to the start of the line\\nPOLYGON 25, -81.041397094726563 -73.715560913085938, -82.134170532226562 -73.943344116210937, -85.5997314453125 -73.558334350585937, -85.470840454101563 -73.3497314453125, -85.9666748046875 -73.041397094726563, -86.840835571289062 -73.336395263671875, -88.70916748046875 -73.179458618164062, -88.335845947265625 -72.816116333007812, -89.261123657226563 -72.639450073242188, -89.5372314453125 -72.633621215820313, -89.321121215820313 -73.054168701171875, -90.861114501953125 -73.326675415039063, -102.09916687011719 -73.084732055664063, -103.17611694335937 -72.73333740234375, -103.60334777832031 -72.891952514648438, -103.03611755371094 -73.326400756835937, -99.175567626953125 -73.619171142578125, -103.01334381103516 -73.629180908203125, -102.90139770507812 -73.875839233398438, -101.65834045410156 -73.9989013671875, -101.33194732666016 -74.480560302734375, -100.24806213378906 -74.4908447265625, -100.51555633544922 -74.671112060546875, -100.151123046875 -74.760833740234375, -100.85166931152344 -74.814727783203125, -99.510284423828125 -75.092514038085937, -110.383056640625 -75.306121826171875, -110.961669921875 -75.157791137695313, -109.92888641357422 -74.761398315429687, -110.15471649169922 -74.2841796875, -111.50389099121094 -74.191680908203125, -111.38583374023437 -74.46223258972168, -111.72445678710937 -74.58723258972168, -111.34306335449219 -74.759445190429688, -112.65416717529297 -74.858612060546875, -113.55555725097656 -74.634445190429688, -112.93028259277344 -74.455001831054687, -113.43361663818359 -74.47222900390625, -113.1864013671875 -74.168899536132812, -114.00361633300781 -73.889175415039062, -114.81001281738281 -74.104736328125, -114.69777679443359 -74.46917724609375, -117.39695739746094 -74.530838012695313, -117.74305725097656 -74.308624267578125, -118.53167724609375 -74.613616943359375, -121.47834777832031 -74.742507934570313, -133.26251220703125 -74.84722900390625, -134.30389404296875 -74.532791137695313, -146.2952880859375 -76.03973388671875, -145.47946166992187 -76.443344116210938, -146.49166870117187 -76.367507934570313, -148.10128784179687 -76.095657348632813, -149.50613403320312 -76.38751220703125, -146.9322509765625 -76.451950073242188, -145.45639038085937 -76.760284423828125, -146.10223388671875 -76.840011596679688, -145.29888916015625 -77.0291748046875, -146.3013916015625 -76.999176025390625, -145.84280395507812 -77.114181518554688, -146.22750854492187 -77.165008544921875, -145.87890625 -77.306394577026367, -146.26861572265625 -77.466400146484375, -147.0322265625 -77.220840454101563, -147.07168579101562 -77.372787475585937, -147.49307250976562 -77.296951293945313, -147.59140014648437 -77.422225952148438, -148.58111572265625 -77.50445556640625, -148.5855712890625 -77.594451904296875, -148.7327880859375 -77.625564575195312, -149.4141845703125 -77.570846557617188, -149.08724975585937 -77.696121215820313, -149.6622314453125 -77.761123657226563, -152.06112670898437 -77.325286865234375, -153.10641479492187 -77.497222900390625, -153.0836181640625 -77.286666870117188, -153.790283203125 -77.174728393554688, -155.86138916015625 -77.084457397460937, -156.48333740234375 -77.358612060546875, -157.75308227539062 -77.108062744140625, -157.69696044921875 -77.571395874023437, -158.17584228515625 -77.859725952148438, -157.9102783203125 -78.001953125, -153.75030517578125 -78.308334350585937, -155.90640258789062 -78.7197265625, -146.81195068359375 -79.88751220703125, -145.524169921875 -80.460556030273437, -148.41336059570312 -81.357513427734375, -154.92169189453125 -81.001953125, -156.96002197265625 -81.251678466796875, -154.20501708984375 -81.552230834960937, -153.98919677734375 -81.629180908203125, -154.89141845703125 -81.905014038085938, -151.794189453125 -82.577789306640625, -153.00750732421875 -83.087509155273438, -153.05307006835937 -84.012786865234375, -149.29473876953125 -84.56195068359375, -138.58944702148437 -84.984725952148438, -139.66140747070312 -85.245010375976563, -148.13833618164062 -85.090835571289062, -150.23947143554687 -85.463623046875, -157.48333740234375 -85.448623657226563, -180 -84.305343627929688, -180 -90)))\\n\\n\\nThanks,\\nDon\", \"post_time\": \"2015-04-02 20:31:44\" },\n\t{ \"post_id\": 7247, \"topic_id\": 1661, \"forum_id\": 10, \"post_subject\": \"Re: Parsing a very long line\", \"username\": \"rtaylor\", \"post_text\": \"Don,\\n\\nThe reason that Lat was defined with REPEAT(Number,1,2) while Long was defined with REPEAT(Number,1,3) is that the maximum Latitude is +/- 90 degrees, while the maximum Longitude is +/- 180 degrees. I am, of course, assuming that the number pairs are a Latitude followed by a Longitude (the standard way of representing points on the globe), so if I'm incorrect in this please tell me.\\n\\nRichard\", \"post_time\": \"2015-04-02 16:38:10\" },\n\t{ \"post_id\": 7246, \"topic_id\": 1661, \"forum_id\": 10, \"post_subject\": \"Re: Parsing a very long line\", \"username\": \"bforeman\", \"post_text\": \"Hi Don,\\n\\nIt looks like the parsing is correct based on your pattern. In the 46136 byte STRING, I counted 137 pairs of double parenthesis. Where are you getting 274?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-02 16:30:03\" },\n\t{ \"post_id\": 7245, \"topic_id\": 1661, \"forum_id\": 10, \"post_subject\": \"Re: Parsing a very long line\", \"username\": \"georgeb2d\", \"post_text\": \"I worked with the second suggestion. \\n\\nI made a minor change:\\nPATTERN Lat := OPT('-') REPEAT(Number,1,3) OPT('.' Number+);\\nfrom\\nPATTERN Lat := OPT('-') REPEAT(Number,1,2) OPT('.' Number+);\\n\\nStill got the first 137 rows. \\nThe length of the multipolygon is 46136. I am wondering if I am hitting some sort of length limitation or number of different rows limitation.\\n\\nThanks,\\nDon\", \"post_time\": \"2015-04-02 15:19:09\" },\n\t{ \"post_id\": 7244, \"topic_id\": 1661, \"forum_id\": 10, \"post_subject\": \"Re: Parsing a very long line\", \"username\": \"georgeb2d\", \"post_text\": \"I will try the second suggestion. \\nI will need to figure how to implement the first suggestion since the first 138 are matching. The documentation implies NOT Matching seems to only work when there are no matches. (Generate a row if there were no matches on the input row. All calls to the MATCHED()\\nfunction return false inside the resultstructure.) There are 138 matches. There just needs to be 274 or so.\\n\\nThanks for your assistance,\\nDon\", \"post_time\": \"2015-04-02 14:30:37\" },\n\t{ \"post_id\": 7243, \"topic_id\": 1661, \"forum_id\": 10, \"post_subject\": \"Re: Parsing a very long line\", \"username\": \"rtaylor\", \"post_text\": \"Don,\\n\\nLooking at your code, I think your pattern definitions might be unnecessarily complex. Try this code, then run it against your data and see if you get close to what you want:str :=\\n'MULTIPOLYGON ( ' + \\n'((-45.005279541015625 -60.72528076171875, -45.025840759277344 -60.641944885253906, -45.071113586425781 -60.631111145019531, -45.005279541015625 -60.72528076171875)), ' + \\n'((-44.78472900390625 -60.734451293945313, -44.428062438964844 -60.723335266113281, -44.531394958496094 -60.675559997558594, -44.78472900390625 -60.734451293945313)), ' + \\n'((-45.145278930664062 -60.76611328125, -45.550834655761719 -60.547225952148438, -46.023612976074219 -60.61083984375, -45.145278930664062 -60.76611328125)), ' + \\n'((-55.49444580078125 -61.126670837402344, -54.646392822265625 -61.092781066894531, -55.09222412109375 -61.098060607910156, -55.49444580078125 -61.126670837402344)), ' + \\n'((-54.046951293945313 -61.269447326660156, -54.0322265625 -61.088058471679688, -54.2005615234375 -61.236114501953125, -54.046951293945313 -61.269447326660156)), ' + \\n'((-58.98638916015625 -62.214447021484375, -57.6219482421875 -61.90972900390625, -58.400283813476562 -61.938613891601563, -58.98638916015625 -62.214447021484375)), ' + \\n'((-58.992500305175781 -62.3477783203125, -58.816390991210938 -62.298057556152344, -59.208892822265625 -62.285560607910156, -58.992500305175781 -62.3477783203125)), ' + \\n'((-59.439170837402344 -62.447225570678711, -59.327507019042969 -62.371391296386719, -59.678337097167969 -62.3638916015625, -59.439170837402344 -62.447225570678711)))' ;\\n\\nds := DATASET([{str}],{STRING line});\\n\\nPATTERN Number := PATTERN ('[0-9]');\\nPATTERN Startpoly := '((';\\nPATTERN Endpoly := '))';\\nPATTERN PointSep := ', ';\\nPATTERN Lat := OPT('-') REPEAT(Number,1,2) OPT('.' Number+);\\nPATTERN Long := OPT('-') REPEAT(Number,1,3) OPT('.' Number+);\\nPATTERN Point := Lat ' ' Long OPT(PointSep);\\nPATTERN Poly := StartPoly REPEAT(Point) EndPoly;\\n\\nRec := RECORD\\n STRING Point := MATCHTEXT(Point[1]);\\n STRING Lat := MATCHTEXT(Point[1]/Lat);\\n STRING Long := MATCHTEXT(Point[1]/Long);\\n STRING txt := MATCHTEXT;\\nEND;\\nPARSE(ds,line,Poly,Rec);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-04-02 14:25:39\" },\n\t{ \"post_id\": 7242, \"topic_id\": 1661, \"forum_id\": 10, \"post_subject\": \"Re: Parsing a very long line\", \"username\": \"bforeman\", \"post_text\": \"Hi Don,\\n\\nYou might need to modify the PARSE and use NOT MATCHED ONLY to examine what patterns are being rejected.\\n\\nFor example, is your InPoly PATTERN accounting for numbers that do not use a decimal?\\n\\n((-180 -90, 180 -90, 180 -84.30224609375, 171.905029296875 -83.80169677734375,
\\n\\nYou may need to tweak it a little bit. \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-02 13:31:44\" },\n\t{ \"post_id\": 7241, \"topic_id\": 1661, \"forum_id\": 10, \"post_subject\": \"Parsing a very long line\", \"username\": \"georgeb2d\", \"post_text\": \"I am working with geospatial data, in particular a multipolygon. The particular multipolygon I am working with is extremely long. \\n\\nIt is in the form of:\\nMULTIPOLYGON (((-45.005279541015625 -60.72528076171875, -45.025840759277344 -60.641944885253906, -45.071113586425781 -60.631111145019531, -45.005279541015625 -60.72528076171875)), ((-44.78472900390625 -60.734451293945313, -44.428062438964844 -60.723335266113281, -44.531394958496094 -60.675559997558594, -44.78472900390625 -60.734451293945313)), ((-45.145278930664062 -60.76611328125, -45.550834655761719 -60.547225952148438, -46.023612976074219 -60.61083984375, -45.145278930664062 -60.76611328125)), ((-55.49444580078125 -61.126670837402344, -54.646392822265625 -61.092781066894531, -55.09222412109375 -61.098060607910156, -55.49444580078125 -61.126670837402344)), ((-54.046951293945313 -61.269447326660156, -54.0322265625 -61.088058471679688, -54.2005615234375 -61.236114501953125, -54.046951293945313 -61.269447326660156)), ((-58.98638916015625 -62.214447021484375, -57.6219482421875 -61.90972900390625, -58.400283813476562 -61.938613891601563, -58.98638916015625 -62.214447021484375)), ((-58.992500305175781 -62.3477783203125, -58.816390991210938 -62.298057556152344, -59.208892822265625 -62.285560607910156, -58.992500305175781 -62.3477783203125)), ((-59.439170837402344 -62.447225570678711, -59.327507019042969 -62.371391296386719, -59.678337097167969 -62.3638916015625, -59.439170837402344 -62.447225570678711)), and so on, with the last (( numbers ending in ))).\\n\\nI have been able to parse the line partially successful. Out of the MULTIPOLYGON above it creates 137 rows. However, it should be producing 278 rows. It produces the first 137 polygons in the multipolygon. \\n\\nHere are the relevant code snippets:\\n PATTERN Numbers := PATTERN ('(([0-9]))');\\nPATTERN Comma := [','];\\nPATTERN ws := [' '];\\nPATTERN minus := ['-'];\\nPATTERN Period := ['.'];\\nPATTERN FirstParen := ['((('];\\nPATTERN TwoRParens := ['))'];\\nPATTERN TwoLParens := ['(('];\\nPATTERN ThreeRParens := [')))'];\\nPATTERN BeginFirstPoly := FirstParen ;\\nPATTERN EndPoly := TwoRParens ;\\n\\nPATTERN BeginAnyPoly := TwoLParens ;\\n\\nPATTERN EndLastPoly := ThreeRParens;\\nPATTERN LatLong := PATTERN('[ ,-[0-9] ');\\nPATTERN InPoly := OPT(ws) OPT(Minus) Numbers+ Period Numbers+ ws OPT(Minus) Numbers+ Period Numbers+ OPT(ws) OPT(NUMBERS+) OPT(ws) OPT(NUMBERS+) OPT(ws) OPT(COMMA)\\n\\t\\t\\t\\t\\t\\t\\t ;\\nPATTERN FirstPoly := BeginFirstPoly REPEAT(InPoly) EndPoly;\\nPATTERN MiddlePoly := BeginAnyPoly REPEAT(InPoly) EndPoly;\\nPATTERN LastPoly := BeginAnyPoly REPEAT(InPoly) EndLastPoly;\\n\\t\\t\\n PATTERN AfterMulti := FirstPoly| MiddlePoly ;\\n\\nps1 := RECORD\\n STRING ISO2 := X.Files.WorldBordersRawDset.ISO2;\\n STRING POLY := MATCHTEXT(AfterMulti);\\nEND;\\np1 := PARSE( \\n X.Files.WorldBordersRawDset\\n\\t\\t\\t\\t ,wkt_geog\\n\\t\\t\\t\\t\\t\\t\\t ,AfterMulti\\n\\t\\t\\t\\t , ps1\\n\\t\\t\\t\\t\\t\\t\\t ,MAXLENGTH(1000000)\\n\\t\\t\\t\\t\\t\\t\\t\\t , NOCASE );\\nOUTPUT(p1);\\t\\t\\nOUTPUT(Count(p1(ISO2='AQ' )));\\t\\t\\t\\t\\t\\t \\t\\t\\t\\t\\t\\t\\t \\nOUTPUT(p1[Count(p1(ISO2='AQ' ))]);\\nOUTPUT(LENGTH(X.Files.WorldBordersRawDset[3].wkt_geog));\\n\\nThis is Workunit W201501401-150733-1.\\n\\nThanks,\\nDon\", \"post_time\": \"2015-04-01 19:31:31\" },\n\t{ \"post_id\": 7280, \"topic_id\": 1668, \"forum_id\": 10, \"post_subject\": \"Re: Building Significant Numbers\", \"username\": \"georgeb2d\", \"post_text\": \"I am following in Andrew Farrell's footsteps. He has already built an RTree functionality in HPCC. I am just trying to understand it and get it to work.\", \"post_time\": \"2015-04-07 15:30:53\" },\n\t{ \"post_id\": 7279, \"topic_id\": 1668, \"forum_id\": 10, \"post_subject\": \"Re: Building Significant Numbers\", \"username\": \"rtaylor\", \"post_text\": \"HPCC INDEX files use a B-tree, and I have not heard about an R-tree option for them. I have to assume you're going to build the R-tree functionality yourself somehow.\\n\\nGood luck!
\\n\\nRichard\", \"post_time\": \"2015-04-07 15:29:31\" },\n\t{ \"post_id\": 7277, \"topic_id\": 1668, \"forum_id\": 10, \"post_subject\": \"Re: Building Significant Numbers\", \"username\": \"georgeb2d\", \"post_text\": \"As I have contemplated this more, These latitude and longitude are going into an RTREE index, with the points signifying the rectangles involved. For the RTREE can these be strings?\", \"post_time\": \"2015-04-07 15:22:22\" },\n\t{ \"post_id\": 7276, \"topic_id\": 1668, \"forum_id\": 10, \"post_subject\": \"Re: Building Significant Numbers\", \"username\": \"georgeb2d\", \"post_text\": \"That sure simplifies everything.\\n\\nThanks,\\nDon\", \"post_time\": \"2015-04-07 15:11:40\" },\n\t{ \"post_id\": 7271, \"topic_id\": 1668, \"forum_id\": 10, \"post_subject\": \"Re: Building Significant Numbers\", \"username\": \"rtaylor\", \"post_text\": \"
The data is in REAL, so needs to be cast to INTEGER in order for it to be used in an index.
Instead of casting to INTEGER, just cast them to STRING, Since an INDEX is always LZW compressed, the extra bytes are irrelevant and using STRING should eliminate your other problems as well.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-04-07 14:37:05\" },\n\t{ \"post_id\": 7270, \"topic_id\": 1668, \"forum_id\": 10, \"post_subject\": \"Building Significant Numbers\", \"username\": \"georgeb2d\", \"post_text\": \"I am working on a project that needs to use longitude and latitude for an index. \\n\\nThe data is in REAL, so needs to be cast to INTEGER in order for it to be used in an index. Four points, for example: [-1.866770029067993 ,-1.842769980430603 ,55.62379837036133 ,55.65330123901367] \\n\\nI multiplied these each by 1000000000000000. This worked fine for the numbers between 10 and -10, but gives problems beyond those ranges. For example:\\n55.62379837036133 becomes 55623798370361328. So what it is doing is adding a significant digit that is not correct. \\n\\nI fixed this in a transformation:\\nLE.miny is the input of 55.62379837036133. Miny is the output of 55623798370361330.\\n\\tINTEGER8 MINY:= IF (LE.miny >= 10 , (INTEGER8)( LE.miny * 100000000000000),\\n\\t\\t\\t\\tIF (LE.miny <= -10 , \\n (INTEGER8)( LE.miny * 100000000000000),\\n LE.miny * 1000000000000000) ) ;\\n\\t\\n SELF.BBOXminy := IF (LE.miny >= 10 , MINy * 10,\\n\\t\\t\\t IF (LE.miny <= -10 , MINy * 10,\\n MINy) ) ;
\\n\\nUsing this gives an output of:\\n[-1866770029067993,\\t-18427699804306032, 55623798370361330, 55653301239013670]\\n\\nI also need to do a similar fix if the longitude is greater than 100 or less than -100. I can add more IF statements but I sense there must be a better way.\\n\\nPlease tell me a better way.\\nThanks for your assistance.\", \"post_time\": \"2015-04-07 14:18:53\" },\n\t{ \"post_id\": 10013, \"topic_id\": 1677, \"forum_id\": 10, \"post_subject\": \"Re: Deep Learning\", \"username\": \"john holt\", \"post_text\": \"I would be very surprised if there was a version dependency. Any recent version of BLAS/ATLAS should be sufficient. What specific problem are you encountering?\", \"post_time\": \"2016-07-25 12:35:08\" },\n\t{ \"post_id\": 10003, \"topic_id\": 1677, \"forum_id\": 10, \"post_subject\": \"Re: Deep Learning\", \"username\": \"jwilt\", \"post_text\": \"Could someone post which Atlas/Cblas we need, versions, etc., to work with ML's PBBlas?\", \"post_time\": \"2016-07-24 01:01:27\" },\n\t{ \"post_id\": 7429, \"topic_id\": 1677, \"forum_id\": 10, \"post_subject\": \"Re: Deep Learning\", \"username\": \"chennapooja\", \"post_text\": \"Dear Bob,\\n\\n Thanks for the help. Its working perfectly now.\\n\\nRegards,\\nPooja.\", \"post_time\": \"2015-04-21 20:21:52\" },\n\t{ \"post_id\": 7428, \"topic_id\": 1677, \"forum_id\": 10, \"post_subject\": \"Re: Deep Learning\", \"username\": \"bforeman\", \"post_text\": \"Hi Pooja,\\n\\nYou should be good to go now. My test was successful, see W20150421-150447 \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-21 19:01:38\" },\n\t{ \"post_id\": 7427, \"topic_id\": 1677, \"forum_id\": 10, \"post_subject\": \"Re: Deep Learning\", \"username\": \"chennapooja\", \"post_text\": \"Dear Bob,\\n\\n Thanks. I did not get any other issues except that compiler/link error.\\n\\nRegards,\\nPooja.\", \"post_time\": \"2015-04-21 15:56:53\" },\n\t{ \"post_id\": 7425, \"topic_id\": 1677, \"forum_id\": 10, \"post_subject\": \"Re: Deep Learning\", \"username\": \"bforeman\", \"post_text\": \"Hi Pooja,\\n\\nAs far as I know, the NeuralNetworks ECL module is still under development on GitHub, and has not been officially released on the HPCC portal. I am seeing compiler errors and missing Types (Types.l_result) that need to be corrected. Did you fix the code on GitHub before attempting the compile?\\n\\nAfter making some corrections, I still see what you are reporting, contacting the system administrator to resolve. Will keep you posted.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-21 15:23:32\" },\n\t{ \"post_id\": 7424, \"topic_id\": 1677, \"forum_id\": 10, \"post_subject\": \"Re: Deep Learning\", \"username\": \"chennapooja\", \"post_text\": \"Dear Bob,\\n\\n Its same from github repository.IMPORT ML;\\nIMPORT * FROM $;\\nIMPORT $.Mat;\\nIMPORT * FROM ML.Types;\\nIMPORT PBblas;\\nLayout_Cell := PBblas.Types.Layout_Cell;\\nLayout_Part := PBblas.Types.Layout_Part;\\n\\nEXPORT NeuralNetworks (DATASET(Types.DiscreteField) net,UNSIGNED4 prows=0, UNSIGNED4 pcols=0, UNSIGNED4 Maxrows=0, UNSIGNED4 Maxcols=0) := MODULE\\n\\n//initialize bias values in the neural network\\n//each bias matrix is a vector\\n//bias with no=L means the bias that goes to the layer L+1 so its size is equal to number of nodes in layer L+1\\n EXPORT IntBias := FUNCTION\\n //New Randome Matrix Generator\\n Mat.Types.Element RandGen(UNSIGNED4 c, UNSIGNED4 NumRows) := TRANSFORM\\n SELF.x := ((c-1) % NumRows) + 1;\\n SELF.y := ((c-1) DIV NumRows) + 1;\\n SELF.value := 1;\\n END;\\n //Creat the first weight matrix with no=1 (weight matrix between layer 1 and layer 2)\\n b1rows := net(id=(2))[1].value;\\n b1cols := 1;\\n b1size := b1rows*b1cols;\\n b1 := DATASET(b1size, RandGen(COUNTER, b1rows),DISTRIBUTED);\\n b1no := Mat.MU.To(b1, 1);\\n //step function for initialize the rest of the weight matrices\\n Step(DATASET(Mat.Types.MUElement) InputBias, INTEGER coun) := FUNCTION\\n L := coun+1; //creat the weight between layers L and L+1\\n brows := net(id=(L+1))[1].value;\\n bcols := 1;\\n bsize := brows*bcols;\\n b := DATASET(bsize, RandGen(COUNTER, brows),DISTRIBUTED);\\n bno := Mat.MU.To(b, L);\\n RETURN InputBias+bno;\\n END;\\n LoopNum := MAX(net,id)-2;\\n initialized_Bias := LOOP(b1no, COUNTER <= LoopNum, Step(ROWS(LEFT),COUNTER));\\n RETURN initialized_Bias;\\n END;\\n//initialize the weights in a neural network\\n//the output is in ML.Mat.Types.MUElement format that each wight matrix has its own id, "no" value assigned to each wight marix represents the wight matrix belongs to the weight between layer no and layer no+1\\n//the size of the wight matrix with "no" value is i*j which i is the number of nodes in layer no+1 and j is the number of nodes in layer no\\n//the structure of the neural network is shown in the net record set\\n//in the net record set each id shows the layer numebr and the corresponding value shows number of nodes in that layer\\n//for example net:=DATASET ([{1,1,4},{2,1,2},{3,1,5}],Types.NumericField) shows a network that layer 1 has 4 nodes, layer 2 has 2 nodes and layer 3 has 5\\n EXPORT IntWeights := FUNCTION\\n //Generate a random number\\n Produce_Random () := FUNCTION\\n G := 1000000;\\n R := (RANDOM()%G) / (REAL8)G;\\n RETURN R;\\n END;\\n //New Randome Matrix Generator\\n Mat.Types.Element RandGen(UNSIGNED4 c, UNSIGNED4 NumRows) := TRANSFORM\\n SELF.x := ((c-1) % NumRows) + 1;\\n SELF.y := ((c-1) DIV NumRows) + 1;\\n SELF.value := Produce_Random();\\n END;\\n //Creat the first weight matrix with no=1 (weight matrix between layer 1 and layer 2)\\n w1rows := net(id=2)[1].value;\\n w1cols := net(id=1)[1].value;\\n w1size := w1rows*w1cols;\\n w1 := DATASET(w1size, RandGen(COUNTER, w1rows),DISTRIBUTED);\\n w1no := Mat.MU.To(w1, 1);\\n //step function for initialize the rest of the weight matrices\\n Step(DATASET(Mat.Types.MUElement) InputWeight, INTEGER coun) := FUNCTION\\n L := coun+1; //creat the weight between layers L and L+1\\n wrows := net(id=(L+1))[1].value;\\n wcols := net(id=L)[1].value;\\n wsize := wrows*wcols;\\n w := DATASET(wsize, RandGen(COUNTER, wrows),DISTRIBUTED);\\n wno := Mat.MU.To(w, L);\\n RETURN InputWeight+wno;\\n END;\\n LoopNum := MAX(net,id)-2;\\n initialized_weights := LOOP(w1no, COUNTER <= LoopNum, Step(ROWS(LEFT),COUNTER));\\n RETURN initialized_weights;\\n END;\\n //in the built model the no={1,2,..,NL-1} are the weight indexes\\n //no={NL+1,NL+2,..,NL+NL} are bias indexes that go to the second, third, ..,NL)'s layer respectively\\n EXPORT Model(DATASET(Types.NumericField) mod) := FUNCTION\\n modelD_Map :=\\tDATASET([{'id','ID'},{'x','1'},{'y','2'},{'value','3'},{'no','4'}], {STRING orig_name; STRING assigned_name;});\\n FromField(mod,Mat.Types.MUElement,dOut,modelD_Map);\\n RETURN dOut;\\n END;\\n EXPORT ExtractWeights (DATASET(Types.NumericField) mod) := FUNCTION\\n NNmod := Model (mod);\\n NL := MAX (net, id);\\n RETURN NNmod (no<NL);\\n END;\\n EXPORT ExtractBias (DATASET(Types.NumericField) mod) := FUNCTION\\n NNmod := Model (mod);\\n NL := MAX (net, id);\\n B := NNmod (no>NL);\\n Mat.Types.MUElement Sno (Mat.Types.MUElement l) := TRANSFORM\\n SELF.no := l.no-NL;\\n SELF := l;\\n END;\\n RETURN PROJECT (B,Sno(LEFT));\\n END;\\n /*\\n implementation based on stanford deep learning toturi al (http://ufldl.stanford.edu/wiki/index.php/Neural_Networks)\\n X is input data\\n w and b represent the structure of neural network\\n w represnts weight matrices : matrix with id=L means thw weight matrix between layer L and layer L+1\\n w(i,j) with id=L represents the weight between unit i of layer L+1 and unit j of layer L\\n b represent bias matrices\\n b with id = L shows the bias value for the layer L+1\\n b(i) with id= L show sthe bias value goes to uni i of layer L\\n */\\n\\n // back propagation algorithm\\n BP(DATASET(Types.NumericField) X,DATASET(Types.NumericField) Y,DATASET(Mat.Types.MUElement) IntW, DATASET(Mat.Types.MUElement) Intb, REAL8 LAMBDA=0.001, REAL8 ALPHA=0.1, UNSIGNED2 MaxIter=100) := MODULE\\n dt := Types.ToMatrix (X);\\n //SHARED dTmp := Mat.InsertColumn(dt,1,1.0); // add the intercept column\\n dTmp := dt;\\n SHARED d := Mat.Trans(dTmp); //in the entire of the calculations we work with the d matrix that each sample is presented in one column\\n SHARED m := MAX (d, d.y); //number of samples\\n SHARED m_1 := 1/m;\\n yt := Types.ToMatrix (Y);\\n SHARED Ytmp := Mat.Trans(yt);\\n SHARED sizeRec := RECORD\\n PBblas.Types.dimension_t m_rows;\\n PBblas.Types.dimension_t m_cols;\\n PBblas.Types.dimension_t f_b_rows;\\n PBblas.Types.dimension_t f_b_cols;\\n END;\\n //Map for Matrix d.\\n SHARED havemaxrow := maxrows > 0;\\n SHARED havemaxcol := maxcols > 0;\\n SHARED havemaxrowcol := havemaxrow and havemaxcol;\\n SHARED dstats := Mat.Has(d).Stats;\\n SHARED d_n := dstats.XMax;\\n SHARED d_m := dstats.YMax;\\n SHARED Ystats := Mat.Has(Ytmp).Stats;\\n SHARED output_num := Ystats.XMax;\\n derivemap := IF(havemaxrowcol, PBblas.AutoBVMap(d_n, d_m,prows,pcols,maxrows, maxcols),\\n IF(havemaxrow, PBblas.AutoBVMap(d_n, d_m,prows,pcols,maxrows),\\n IF(havemaxcol, PBblas.AutoBVMap(d_n, d_m,prows,pcols,,maxcols),\\n PBblas.AutoBVMap(d_n, d_m,prows,pcols))));\\n SHARED sizeTable := DATASET([{derivemap.matrix_rows,derivemap.matrix_cols,derivemap.part_rows(1),derivemap.part_cols(1)}], sizeRec);\\n //Create block matrix d\\n dmap := PBblas.Matrix_Map(sizeTable[1].m_rows,sizeTable[1].m_cols,sizeTable[1].f_b_rows,sizeTable[1].f_b_cols);\\n ddist := DMAT.Converted.FromElement(d,dmap);\\n //Create block matrix Ytmp\\n Ymap := PBblas.Matrix_Map(output_num,sizeTable[1].m_cols,sizeTable[1].f_b_rows,sizeTable[1].f_b_cols);\\n Ydist := DMAT.Converted.FromElement(Ytmp,Ymap);\\n //Creat block matrices for weights\\n w1_mat := Mat.MU.From(IntW,1);\\n w1_mat_x := Mat.Has(w1_mat).Stats.Xmax;\\n w1_mat_y := Mat.Has(w1_mat).Stats.Ymax;\\n w1map := PBblas.Matrix_Map(w1_mat_x, w1_mat_y, sizeTable[1].f_b_rows, sizeTable[1].f_b_rows);\\n w1dist := DMAT.Converted.FromElement(w1_mat,w1map);\\n w1no := PBblas.MU.TO(w1dist,1);\\n //loopbody to creat the rest of weight blocks\\n CreatWeightBlock(DATASET(PBblas.Types.MUElement) inputWno, INTEGER coun) := FUNCTION\\n L := coun+1; //creat the weight block for weight between layers L and L+1\\n w_mat := Mat.MU.From(IntW,L);\\n w_mat_x := Mat.Has(w_mat).Stats.Xmax;\\n w_mat_y := Mat.Has(w_mat).Stats.Ymax;\\n wmap := PBblas.Matrix_Map(w_mat_x, w_mat_y, sizeTable[1].f_b_rows , sizeTable[1].f_b_rows);\\n wdist := DMAT.Converted.FromElement(w_mat,wmap);\\n wno := PBblas.MU.TO(wdist,L);\\n RETURN inputWno+wno;\\n END;\\n iterations := MAX(IntW,no)-1;\\n weightsdistno := LOOP(w1no, COUNTER <= iterations, CreatWeightBlock(ROWS(LEFT),COUNTER));\\n //two kind of Bias blocks are calculated\\n //1- each bias vector is converted to block format\\n //2-each Bias vector is repeated first to m columns, then the final repreated bias matrix is converted to block format\\n //the second kind of bias is calculated to make the next calculations easier, the first vector bias format is used just when we\\n //want to update the bias vectors\\n //Creat block vectors for Bias (above case 1)\\n b1vec := Mat.MU.From(Intb,1);\\n b1vec_x := Mat.Has(b1vec).Stats.Xmax;\\n b1vecmap := PBblas.Matrix_Map(b1vec_x, 1, sizeTable[1].f_b_rows, 1);\\n b1vecdist := DMAT.Converted.FromElement(b1vec,b1vecmap);\\n b1vecno := PBblas.MU.TO(b1vecdist,1);\\n //loopbody to creat the rest of bias vector blocks\\n CreatBiasVecBlock(DATASET(PBblas.Types.MUElement) inputb, INTEGER coun) := FUNCTION\\n L := coun+1; //creat the weight block for weight between layers L and L+1\\n b_mat := Mat.MU.From(Intb,L);\\n b_mat_x := Mat.Has(b_mat).Stats.Xmax;\\n bmap := PBblas.Matrix_Map(b_mat_x, 1, sizeTable[1].f_b_rows, 1);\\n bdist := DMAT.Converted.FromElement(b_mat,bmap);\\n bno := PBblas.MU.TO(bdist,L);\\n RETURN inputb+bno;\\n END;\\n biasVecdistno := LOOP(b1vecno, COUNTER <= iterations, CreatBiasVecBlock(ROWS(LEFT),COUNTER));\\n //Creat block matrices for Bias (repeat each bias vector to a matrix with m columns) (above case 2)\\n b1_mat := Mat.MU.From(Intb,1);\\n b1_mat_x := Mat.Has(b1_mat).Stats.Xmax;\\n b1_mat_rep := Mat.Repmat(b1_mat, 1, m); // Bias vector is repeated in m columns to make the future calculations easier\\n b1map := PBblas.Matrix_Map(b1_mat_x, m, sizeTable[1].f_b_rows, sizeTable[1].f_b_cols);\\n b1dist := DMAT.Converted.FromElement(b1_mat_rep,b1map);\\n b1no := PBblas.MU.TO(b1dist,1);\\n //loopbody to creat the rest of bias blocks\\n CreatBiasBlock(DATASET(PBblas.Types.MUElement) inputb, INTEGER coun) := FUNCTION\\n L := coun+1; //creat the weight block for weight between layers L and L+1\\n b_mat := Mat.MU.From(Intb,L);\\n b_mat_x := Mat.Has(b_mat).Stats.Xmax;\\n b_mat_rep := Mat.Repmat(b_mat, 1, m); // Bias vector is repeated in m columns to make the future calculations easier\\n bmap := PBblas.Matrix_Map(b_mat_x, m, sizeTable[1].f_b_rows, sizeTable[1].f_b_cols);\\n bdist := DMAT.Converted.FromElement(b_mat_rep,bmap);\\n bno := PBblas.MU.TO(bdist,L);\\n RETURN inputb+bno;\\n END;\\n biasMatdistno := LOOP(b1no, COUNTER <= iterations, CreatBiasBlock(ROWS(LEFT),COUNTER));\\n // creat ones vector for calculating bias gradients\\n Layout_Cell gen(UNSIGNED4 c, UNSIGNED4 NumRows, REAL8 v) := TRANSFORM\\n SELF.x := ((c-1) % NumRows) + 1;\\n SELF.y := ((c-1) DIV NumRows) + 1;\\n SELF.v := v;\\n END;\\n onesmap := PBblas.Matrix_Map(m, 1, sizeTable[1].f_b_cols, 1);\\n ones := DATASET(m, gen(COUNTER, m, 1.0),DISTRIBUTED);\\n onesdist := DMAT.Converted.FromCells(onesmap, ones);\\n //functions used\\n PBblas.Types.value_t sigmoid(PBblas.Types.value_t v, PBblas.Types.dimension_t r, PBblas.Types.dimension_t c) := 1/(1+exp(-1*v));\\n //make parameters\\n NumLayers := MAX (net, id);\\n //define the Trasnfroms to add and decrease the Numlayers\\n PBblas.Types.MUElement Addno (PBblas.Types.MUElement l) := TRANSFORM\\n SELF.no := l.no+NumLayers;\\n SELF := l;\\n END;\\n PBblas.Types.MUElement Subno (PBblas.Types.MUElement l) := TRANSFORM\\n SELF.no := l.no-NumLayers;\\n SELF := l;\\n END;\\n //creat the parameters to be passed to the main gradient descent loop\\n biasVecdistno_added := PROJECT (biasVecdistno,Addno(LEFT));\\n param_tobe_passed := weightsdistno + biasVecdistno_added;\\n FF(DATASET(PBblas.Types.MUElement) w, DATASET(PBblas.Types.MUElement) b ):= FUNCTION\\n w1 := PBblas.MU.From(W, 1); // weight matrix between layer 1 and layer 2 of the neural network\\n b1 := PBblas.MU.From(b, 1); //bias entered to the layer 2 of the neural network\\n //z2 = w1*X+b1;\\n z2 := PBblas.PB_dgemm(FALSE, FALSE,1.0,w1map, W1, dmap, ddist, b1map,b1, 1.0 );\\n //a2 = sigmoid (z2);\\n a2 := PBblas.Apply2Elements(b1map, z2, sigmoid);\\n a2no := PBblas.MU.To(a2,2);\\n\\n FF_Step(DATASET(PBblas.Types.MUElement) InputA, INTEGER coun) := FUNCTION\\n L := coun+1;\\n wL := PBblas.MU.From(w, L); // weight matrix between layer L and layer L+1 of the neural network\\n wL_x := net(id=(L+1))[1].value;\\n wL_y := net(id=(L))[1].value;;\\n bL := PBblas.MU.From(b, L); //bias entered to the layer L+1 of the neural network\\n bL_x := net(id=(L+1))[1].value;\\n aL := PBblas.MU.From(InputA, L); //output of layer L\\n aL_x := net(id=(L))[1].value;;\\n wLmap := PBblas.Matrix_Map(wL_x, wL_y, sizeTable[1].f_b_rows, sizeTable[1].f_b_rows);\\n bLmap := PBblas.Matrix_Map(bL_x, m, sizeTable[1].f_b_rows, sizeTable[1].f_b_cols);\\n aLmap := PBblas.Matrix_Map(aL_x,m,sizeTable[1].f_b_rows,sizeTable[1].f_b_cols);\\n //z(L+1) = wL*aL+bL;\\n zL_1 := PBblas.PB_dgemm(FALSE, FALSE,1.0,wLmap, wL, aLmap, aL, bLmap,bL, 1.0 );\\n //aL_1 = sigmoid (zL_1);\\n aL_1 := PBblas.Apply2Elements(bLmap, zL_1, sigmoid);\\n aL_1no := PBblas.MU.To(aL_1,L+1);\\n RETURN InputA+aL_1no;\\n END;//end FF_step\\n final_A := LOOP(a2no, COUNTER <= iterations, FF_Step(ROWS(LEFT),COUNTER));\\n return final_A;\\n END;//end FF\\n Delta(DATASET(PBblas.Types.MUElement) w, DATASET(PBblas.Types.MUElement) b, DATASET(PBblas.Types.MUElement) A ):= FUNCTION\\n PBblas.Types.value_t siggrad(PBblas.Types.value_t v, PBblas.Types.dimension_t r, PBblas.Types.dimension_t c) := v*(1-v);\\n A_end := PBblas.MU.From(A,NumLayers);\\n siggrad_A_end := PBblas.Apply2Elements(Ymap, A_end, siggrad);\\n a_y := PBblas.PB_daxpy(-1, Ydist, A_end);//-1 * (y-a) = a-y\\n Delta_End := PBblas.HadamardProduct(Ymap, a_y, siggrad_A_end);\\n Delta_End_no := PBblas.MU.To(Delta_End,NumLayers);\\n Delta_Step(DATASET(PBblas.Types.MUElement) InputD, INTEGER coun) := FUNCTION\\n L := NumLayers - coun ;\\n DL_1 := PBblas.MU.From(InputD, L+1);//Delta for layer L+1:DL_1\\n DL_1_x := net(id=(L+1))[1].value;\\n DL_1_y := m;\\n wL := PBblas.MU.From(w, L); // weight matrix between layer L and layer L+1 of the neural network\\n wL_x := net(id=(L+1))[1].value;\\n wL_y := net(id=(L))[1].value;\\n aL := PBblas.MU.From(A, L);//output of layer L\\n aL_x := net(id=(L))[1].value;\\n aL_y := m;\\n DL_1map := PBblas.Matrix_Map(DL_1_x,m,sizeTable[1].f_b_rows,sizeTable[1].f_b_cols);\\n wLmap := PBblas.Matrix_Map(wL_x, wL_y, sizeTable[1].f_b_rows, sizeTable[1].f_b_rows);\\n aLmap := PBblas.Matrix_Map(aL_x,m,sizeTable[1].f_b_rows,sizeTable[1].f_b_cols);\\n siggrad_aL := PBblas.Apply2Elements(aLmap, aL, siggrad);\\n //wLtDL_1=wL(transpose)*DL_1\\n wLtDL_1 := PBblas.PB_dgemm (TRUE, FALSE, 1.0, wLmap, wL, DL_1map, DL_1, aLmap);\\n //calculated delta = delta_L = wLtDL_1 .* siggrad_aL\\n Delta_L := PBblas.HadamardProduct(aLmap, wLtDL_1, siggrad_aL);\\n Delta_L_no := PBblas.MU.To(Delta_L,L);\\n RETURN InputD+Delta_L_no;\\n END;//END Delta_Step\\n final_Delta := LOOP(Delta_End_no, COUNTER <= iterations, Delta_Step(ROWS(LEFT),COUNTER));\\n RETURN final_Delta;\\n END;//END Delta\\n WeightGrad(DATASET(PBblas.Types.MUElement) w, DATASET(PBblas.Types.MUElement) A, DATASET(PBblas.Types.MUElement) Del ):= FUNCTION\\n //calculate update term for wights (1/m*(DELTAw) + LAMBDA*w)\\n //w1_g1=d2*a1'\\n D2 := PBblas.MU.From(Del, 2);\\n D2_x := net(id=(2))[1].value;\\n D2_y := m;\\n D2_map := PBblas.Matrix_Map(D2_x,m,sizeTable[1].f_b_rows,sizeTable[1].f_b_cols);\\n w1_g1_map := PBblas.Matrix_Map(net(id=(2))[1].value,net(id=(1))[1].value,sizeTable[1].f_b_rows,sizeTable[1].f_b_rows);\\n w1_g1 := PBblas.PB_dgemm(FALSE, TRUE,1.0,D2_map, D2, dmap, ddist, w1_g1_map );\\n //wight decay term :lambda* w1;\\n w1 := PBblas.MU.From(w, 1);\\n w1_g2 := PBblas.PB_dscal(LAMBDA, w1);\\n //w1_g := 1/m*w1_g1 + w1_g2\\n w1_g := PBblas.PB_daxpy(m_1, w1_g1, w1_g2);\\n w1_g_no := PBblas.MU.To(w1_g,1);\\n WeightGrad_Step(DATASET(PBblas.Types.MUElement) InputWG, INTEGER coun) := FUNCTION\\n L := coun+1;\\n //calculate update term for wights (1/m*(DELTAw) + LAMBDA*w)\\n //w1_g1=d2*a1'\\n DL_1 := PBblas.MU.From(Del, L+1);\\n DL_1_x := net(id=(L+1))[1].value;\\n DL_1_y := m;\\n DL_1_map := PBblas.Matrix_Map(DL_1_x,m,sizeTable[1].f_b_rows,sizeTable[1].f_b_cols);\\n aL := PBblas.MU.From(A, L);//output of layer L\\n aL_x := net(id=(L))[1].value;\\n aL_y := m;\\n aLmap := PBblas.Matrix_Map(aL_x,m,sizeTable[1].f_b_rows,sizeTable[1].f_b_cols);\\n wL_g1_map := PBblas.Matrix_Map(net(id=(L+1))[1].value,net(id=(L))[1].value,sizeTable[1].f_b_rows,sizeTable[1].f_b_rows);\\n wL_g1 := PBblas.PB_dgemm(FALSE, TRUE,1.0,DL_1_map, DL_1, aLmap, aL, wL_g1_map );\\n //wight decay term :lambda* w1;\\n wL := PBblas.MU.From(w, L);\\n wL_g2 := PBblas.PB_dscal(LAMBDA, wL);\\n //w1_g := 1/m*w1_g1 + w1_g2\\n wL_g := PBblas.PB_daxpy(m_1, wL_g1, wL_g2);\\n wL_g_no := PBblas.MU.To(wL_g,L);\\n RETURN InputWG+wL_g_no;\\n END;//WeightGrad_Step\\n final_WG := LOOP(w1_g_no, COUNTER <= iterations, WeightGrad_Step(ROWS(LEFT),COUNTER));\\n RETURN final_WG;\\n END;//END WeightGrad\\n BiasGrad (DATASET(PBblas.Types.MUElement) Del ):= FUNCTION\\n D2 := PBblas.MU.From(Del, 2);\\n D2_x := net(id=(2))[1].value;\\n D2_y := m;\\n D2_map := PBblas.Matrix_Map(D2_x,m,sizeTable[1].f_b_rows,sizeTable[1].f_b_cols);\\n b1_g_map := PBblas.Matrix_Map(D2_x,1,sizeTable[1].f_b_rows,1);\\n b1_g_tmp := PBblas.PB_dgemm(FALSE, FALSE,1.0,D2_map, D2, onesmap, onesdist, b1_g_map);\\n b1_g := PBblas.PB_dscal(m_1, b1_g_tmp);\\n b1_g_no := PBblas.MU.To(b1_g,1);\\n BiasGrad_Step(DATASET(PBblas.Types.MUElement) InputBG, INTEGER coun) := FUNCTION\\n L := coun +1 ;\\n DL_1 := PBblas.MU.From(Del, L+1);\\n DL_1_x := net(id=(L+1))[1].value;\\n DL_1_y := m;\\n DL_1_map := PBblas.Matrix_Map(DL_1_x,m,sizeTable[1].f_b_rows,sizeTable[1].f_b_cols);\\n bL_g_map := PBblas.Matrix_Map(DL_1_x,1,sizeTable[1].f_b_rows,1);\\n bL_g_tmp := PBblas.PB_dgemm(FALSE, FALSE,1.0,DL_1_map, DL_1, onesmap, onesdist, bL_g_map);\\n bL_g := PBblas.PB_dscal(m_1, bL_g_tmp);\\n bL_g_no := PBblas.MU.To(bL_g,L);\\n RETURN InputBG+bL_g_no;\\n END;//END BiasGrad_Step\\n final_bg := LOOP(b1_g_no, COUNTER <= iterations, BiasGrad_Step(ROWS(LEFT),COUNTER));\\n RETURN final_bg;\\n END;//End BiasGrad\\n GradDesUpdate (DATASET(PBblas.Types.MUElement) tobeUpdated, DATASET(PBblas.Types.MUElement) GradDesTerm ):= FUNCTION\\n tmp1 := PBblas.MU.From(tobeUpdated, 1);\\n gterm1 := PBblas.MU.From(GradDesTerm, 1);\\n tmp1_updated := PBblas.PB_daxpy(-1, PBblas.PB_dscal(ALPHA, gterm1), tmp1);\\n tmp1_updated_no := PBblas.MU.To(tmp1_updated,1);\\n GradDesUpdate_Step(DATASET(PBblas.Types.MUElement) Inputtmp, INTEGER coun) := FUNCTION\\n L := coun + 1;\\n tmpL := PBblas.MU.From(tobeUpdated, L);\\n gtermL := PBblas.MU.From(GradDesTerm, L);\\n tmpL_updated := PBblas.PB_daxpy(-1, PBblas.PB_dscal(ALPHA, gtermL), tmpL);\\n tmpL_updated_no := PBblas.MU.To(tmpL_updated,L);\\n RETURN Inputtmp+tmpL_updated_no;\\n END;//End GradDesUpdate_Step\\n final_updated := LOOP(tmp1_updated_no, iterations, GradDesUpdate_Step(ROWS(LEFT),COUNTER));\\n RETURN final_updated;\\n END;//End GradDesUpdate\\n //main Loop ieteration in back propagation algorithm that does the gradient descent and weight and bias updates\\n GradDesLoop (DATASET(PBblas.Types.MUElement) Intparams ):= FUNCTION\\n GradDesLoop_Step (DATASET(PBblas.Types.MUElement) Inputparams) := FUNCTION\\n w_in := Inputparams (no<NumLayers);//input weight parameter in PBblas.Types.MUElement format\\n b_in_tmp := Inputparams (no>NumLayers);\\n b_in := PROJECT (b_in_tmp,Subno(LEFT));//input bias parameter in PBblas.Types.MUElement format\\n //creat matrix of each bias vector by repeating each bias vector in m columns (to make the following calculations easier)\\n b_in1 := PBblas.MU.From(b_in,1);\\n b_in1_mat := ML.DMat.Converted.FromPart2Elm (b_in1);\\n b_in1_mat_x := Mat.Has(b_in1_mat).Stats.Xmax;\\n b_in1_mat_rep := Mat.Repmat(b_in1_mat, 1, m); // Bias vector is repeated in m columns to make the future calculations easier\\n b_in1map := PBblas.Matrix_Map(b_in1_mat_x, m, sizeTable[1].f_b_rows, sizeTable[1].f_b_cols);\\n b_in1dist := DMAT.Converted.FromElement(b_in1_mat_rep,b_in1map);\\n b_in1no := PBblas.MU.TO(b_in1dist,1);//first bias vector is converted to a matrix, now convert the rest of bias vectors into teh matrix\\n //loopbody to creat the rest of bias matrix blocks\\n Creat_BiasBlock(DATASET(PBblas.Types.MUElement) inputb, INTEGER coun) := FUNCTION\\n L := coun+1; //creat the weight block for weight between layers L and L+1\\n b_inL := PBblas.MU.From(b_in,L);\\n b_inL_mat := ML.DMat.Converted.FromPart2Elm (b_inL);\\n b_inL_mat_x := Mat.Has(b_inL_mat).Stats.Xmax;\\n b_inL_mat_rep := Mat.Repmat(b_inL_mat, 1, m); // Bias vector is repeated in m columns to make the future calculations easier\\n b_inLmap := PBblas.Matrix_Map(b_inL_mat_x, m, sizeTable[1].f_b_rows, sizeTable[1].f_b_cols);\\n b_inLdist := DMAT.Converted.FromElement(b_inL_mat_rep, b_inLmap);\\n b_inLno := PBblas.MU.TO(b_inLdist,L);\\n //RETURN inputb+bno;\\n RETURN b_inLno+inputb;\\n END;\\n //b_in_rep := LOOP(b_in1no, COUNTER <= iterations, Creat_BiasBlock(ROWS(LEFT),COUNTER));//matrices of converted bias vectors\\n b_in_rep := LOOP(b_in1no, iterations, Creat_BiasBlock(ROWS(LEFT),COUNTER));//matrices of converted bias vectors\\n //w_in , b_in and b_in_repno are three block matrices we are going to work with\\n //w_in : weight matrices\\n //b_in : bias matrices\\n //b_in_rep : each bias vector is repeated m columns to make the calculations easier\\n //in all the calculations and defined functions (FF, DELTA) the repeated bias matrices are used, the only time\\n //that the bias vector is used is when we update the bias in "GradDesUpdate".\\n //1- apply the Feed Forward pass\\n A_ffpass := FF (w_in,b_in_rep);\\n //2-apply the back propagation step to update the parameters\\n D_delta := DELTA (w_in, b_in_rep, A_ffpass);\\n Weight_GD := WeightGrad(w_in, A_ffpass, D_delta);\\n Bias_GD := BiasGrad (D_delta);\\n NewWeight := GradDesUpdate (w_in, Weight_GD);\\n NewBias := GradDesUpdate (b_in, Bias_GD);\\n NewBias_added := PROJECT (NewBias,Addno(LEFT));\\n Updated_Params := NewWeight + NewBias_added;\\n RETURN Updated_Params;\\n END;//END GradDesLoop_Step\\n Final_Updated_Params := LOOP(Intparams, COUNTER <= MaxIter, GradDesLoop_Step(ROWS(LEFT)));\\n RETURN Final_Updated_Params;\\n END;//END GradDesLoop\\n NNparams := GradDesLoop (param_tobe_passed);// NNparams is in PBblas.Types.MUElement format\\n //convert NNparams to Numeric Field format\\n nnparam1 := PBblas.MU.From(NNparams,1);\\n nnparam1_mat := DMat.Converted.FromPart2Elm (nnparam1);\\n nnparam1_mat_no := Mat.MU.TO(nnparam1_mat,1);\\n NL := MAX (net, id);\\n Mu_convert(DATASET(Mat.Types.MUElement) inputMU, INTEGER coun) := FUNCTION\\n L := IF(coun < NL-1, coun+1, coun+2);\\n nnparamL := PBblas.MU.From(NNparams,L);\\n nnparamL_mat := DMat.Converted.FromPart2Elm (nnparamL);\\n nnparamL_mat_no := Mat.MU.TO(nnparamL_mat,L);\\n RETURN inputMU+nnparamL_mat_no;\\n END;\\n NNparams_MUE := LOOP(nnparam1_mat_no, 2*NL-3, Mu_convert(ROWS(LEFT),COUNTER));\\n AppendID(NNparams_MUE, id, NNparams_MUE_id);\\n ToField (NNparams_MUE_id, NNparams_MUE_out, id, 'x,y,value,no');\\n EXPORT Mod := NNparams_MUE_out;//mod is in NumericField format\\n //EXPORT alaki := biasVecdistno_added;\\n END;// END BP\\n EXPORT NNLearn(DATASET(Types.NumericField) Indep, DATASET(Types.NumericField) Dep,DATASET(Mat.Types.MUElement) IntW, DATASET(Mat.Types.MUElement) Intb, REAL8 LAMBDA=0.001, REAL8 ALPHA=0.1, UNSIGNED2 MaxIter=100) := BP(Indep,Dep, IntW, Intb, LAMBDA, ALPHA, MaxIter).mod;\\n //this function applies the feed forward pass to the input dataset (Indep) based on the input neural network model (Learntmod)\\n EXPORT NNOutput(DATASET(Types.NumericField) Indep,DATASET(Types.NumericField) Learntmod) :=FUNCTION\\n //used fucntion\\n PBblas.Types.value_t sigmoid(PBblas.Types.value_t v, PBblas.Types.dimension_t r, PBblas.Types.dimension_t c) := 1/(1+exp(-1*v));\\n dt := Types.ToMatrix (Indep);\\n //dTmp := Mat.InsertColumn(dt,1,1.0); // add the intercept column\\n dTmp := dt;\\n d := Mat.Trans(dTmp); //in the entire of the calculations we work with the d matrix that each sample is presented in one column\\n m := MAX (d, d.y); //number of samples\\n m_1 := 1/m;\\n sizeRec := RECORD\\n PBblas.Types.dimension_t m_rows;\\n PBblas.Types.dimension_t m_cols;\\n PBblas.Types.dimension_t f_b_rows;\\n PBblas.Types.dimension_t f_b_cols;\\n END;\\n //Map for Matrix d.\\n havemaxrow := maxrows > 0;\\n havemaxcol := maxcols > 0;\\n havemaxrowcol := havemaxrow and havemaxcol;\\n dstats := Mat.Has(d).Stats;\\n d_n := dstats.XMax;\\n d_m := dstats.YMax;\\n NL := MAX(net,id);\\n iterations := NL-2;\\n output_num := net(id=NL)[1].value;\\n derivemap := IF(havemaxrowcol, PBblas.AutoBVMap(d_n, d_m,prows,pcols,maxrows, maxcols),\\n IF(havemaxrow, PBblas.AutoBVMap(d_n, d_m,prows,pcols,maxrows),\\n IF(havemaxcol, PBblas.AutoBVMap(d_n, d_m,prows,pcols,,maxcols),\\n PBblas.AutoBVMap(d_n, d_m,prows,pcols))));\\n SHARED sizeTable := DATASET([{derivemap.matrix_rows,derivemap.matrix_cols,derivemap.part_rows(1),derivemap.part_cols(1)}], sizeRec);\\n //Create block matrix d\\n dmap := PBblas.Matrix_Map(sizeTable[1].m_rows,sizeTable[1].m_cols,sizeTable[1].f_b_rows,sizeTable[1].f_b_cols);\\n ddist := DMAT.Converted.FromElement(d,dmap);\\n //Extract Weights and Bias\\n W_mat := ExtractWeights (Learntmod);\\n B_mat := ExtractBias (Learntmod);\\n //creat w1 partion block matrix\\n w1_mat := Mat.MU.From(W_mat,1);\\n w1_mat_x := Mat.Has(w1_mat).Stats.Xmax;\\n w1_mat_y := Mat.Has(w1_mat).Stats.Ymax;\\n w1map := PBblas.Matrix_Map(w1_mat_x, w1_mat_y, sizeTable[1].f_b_rows, sizeTable[1].f_b_rows);\\n w1dist := DMAT.Converted.FromElement(w1_mat,w1map);\\n //repeat b1 vector in m columsn and the creat the partion block matrix\\n b1_mat := Mat.MU.From(B_mat,1);\\n b1_mat_x := Mat.Has(b1_mat).Stats.Xmax;\\n b1_mat_rep := Mat.Repmat(b1_mat, 1, m); // Bias vector is repeated in m columns to make the future calculations easier\\n b1map := PBblas.Matrix_Map(b1_mat_x, m, sizeTable[1].f_b_rows, sizeTable[1].f_b_cols);\\n b1dist := DMAT.Converted.FromElement(b1_mat_rep,b1map);\\n //calculate a2 (output from layer 2)\\n //z2 = w1*X+b1;\\n z2 := PBblas.PB_dgemm(FALSE, FALSE,1.0,w1map, w1dist, dmap, ddist, b1map, b1dist, 1.0);\\n //a2 = sigmoid (z2);\\n a2 := PBblas.Apply2Elements(b1map, z2, sigmoid);\\n FF_Step(DATASET(Layout_Part) A, INTEGER coun) := FUNCTION\\n L := coun + 1;\\n aL := A; //output of layer L\\n aL_x := net(id=L)[1].value;;\\n aLmap := PBblas.Matrix_Map(aL_x,m,sizeTable[1].f_b_rows,sizeTable[1].f_b_cols);\\n //creat wL partion block matrix\\n wL_mat := Mat.MU.From(W_mat,L);\\n wL_mat_x := Mat.Has(wL_mat).Stats.Xmax;\\n wL_mat_y := Mat.Has(wL_mat).Stats.Ymax;\\n wLmap := PBblas.Matrix_Map(wL_mat_x, wL_mat_y, sizeTable[1].f_b_rows, sizeTable[1].f_b_rows);\\n wLdist := DMAT.Converted.FromElement(wL_mat,wLmap);\\n //repeat b1 vector in m columsn and the creat the partion block matrix\\n bL_mat := Mat.MU.From(B_mat,L);\\n bL_mat_x := Mat.Has(bL_mat).Stats.Xmax;\\n bL_mat_rep := Mat.Repmat(bL_mat, 1, m); // Bias vector is repeated in m columns to make the future calculations easier\\n bLmap := PBblas.Matrix_Map(bL_mat_x, m, sizeTable[1].f_b_rows, sizeTable[1].f_b_cols);\\n bLdist := DMAT.Converted.FromElement(bL_mat_rep,bLmap);\\n //calculate a(L+1) (output from layer L)\\n //z(L+1) = wL*X+bL;\\n zL_1 := PBblas.PB_dgemm(FALSE, FALSE,1.0, wLmap, wLdist, aLmap, aL, bLmap, bLdist, 1.0);\\n //aL_1 = sigmoid (zL_1);\\n aL_1 := PBblas.Apply2Elements(bLmap, zL_1, sigmoid);\\n RETURN aL_1;\\n END;\\n final_A := LOOP(a2, COUNTER <= iterations, FF_Step(ROWS(LEFT),COUNTER));\\n final_A_mat := DMat.Converted.FromPart2Elm(final_A);\\n Types.l_result tr(Mat.Types.Element le) := TRANSFORM\\n SELF.value := le.x;\\n SELF.id := le.y;\\n SELF.number := 1; //number of class\\n SELF.conf := le.value;\\n END;\\n RETURN PROJECT (Final_A_mat, tr(LEFT));\\n END;// END NNOutput\\n EXPORT NNClassify(DATASET(Types.NumericField) Indep,DATASET(Types.NumericField) Learntmod) := FUNCTION\\n Dist := NNOutput(Indep, Learntmod);\\n numrow := MAX (Dist,Dist.value);//number of nodes in the last layer of the neural network\\n // d_Dist := DISTRIBUTE (Dist, id);\\n // S:= SORT(d_Dist,id,conf,LOCAL);\\n d_grpd := GROUP(Dist, id, ALL);\\n GS := SORT(d_grpd, conf);\\n S := GROUP(GS); // Ungrouped GS\\n SeqRec := RECORD\\n l_result;\\n INTEGER8 Sequence := 0;\\n END;\\n //add seq field to S\\n SeqRec AddS (S l, INTEGER c) := TRANSFORM\\n SELF.Sequence := c%numrow;\\n SELF := l;\\n END;\\n Sseq := PROJECT(S, AddS(LEFT,COUNTER),LOCAL);\\n classified := Sseq (Sseq.Sequence=0);\\n RETURN PROJECT(classified,l_result,LOCAL);\\n END; // END NNClassify\\n \\nEND;//END NeuralNetworks
\\n\\nRegards,\\nPooja.\", \"post_time\": \"2015-04-20 20:52:54\" },\n\t{ \"post_id\": 7423, \"topic_id\": 1677, \"forum_id\": 10, \"post_subject\": \"Re: Deep Learning\", \"username\": \"bforeman\", \"post_text\": \"Hi Pooja,\\n\\nCan you please attach the NeuralNetworks.ECL file?\\n\\nThanks,\\n\\nBob\", \"post_time\": \"2015-04-20 20:49:45\" },\n\t{ \"post_id\": 7422, \"topic_id\": 1677, \"forum_id\": 10, \"post_subject\": \"Re: Deep Learning\", \"username\": \"chennapooja\", \"post_text\": \"Dear Bob,\\n\\n I have tested it now and getting below errors. Its same issue which I am facing when I execute using VMWare.\\n\\n \\n Error eclcc 3000 Compile/Link failed for W20150420-163227 (see '//10.0.1.0/var/lib/HPCCSystems/myeclccserver/eclcc.log' for details) 0 0 W20150420-163227 \\n\\nWarning eclcc 1048 EXPORT/SHARED qualifiers are ignored in this context 7 478 C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\MyFiles\\\\ML\\\\NeuralNetworks.ecl \\n\\nWarning eclcc 0 (0,0): error C6003: C++ link error: cannot find -lcblas 0 0 \\n\\nWarning eclcc 0 0 0 \\n\\nWarning eclcc 0 ---------- compiler output -------------- 0 0 \\n\\nWarning eclcc 0 /usr/bin/ld: cannot find -lcblas 0 0 \\n\\nWarning eclcc 0 collect2: ld returned 1 exit status 0 0 \\n\\nWarning eclcc 0 0 0 \\n\\nWarning eclcc 0 --------- end compiler output ----------- 0 0 \\n\\nInfo eclcc 3118 Mismatch in minor version number (5.0.0 v 5.2.0) 0 0 unknown \\n\\n\\nRegards,\\nPooja.\", \"post_time\": \"2015-04-20 20:33:32\" },\n\t{ \"post_id\": 7419, \"topic_id\": 1677, \"forum_id\": 10, \"post_subject\": \"Re: Deep Learning\", \"username\": \"bforeman\", \"post_text\": \"Hi Pooja,\\n\\nThe cluster has been upgraded. Please retest and let us know if its working for you.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-20 12:59:30\" },\n\t{ \"post_id\": 7405, \"topic_id\": 1677, \"forum_id\": 10, \"post_subject\": \"Re: Deep Learning\", \"username\": \"chennapooja\", \"post_text\": \"Dear Bob,\\n\\n Thanks...I will check it after updating the libraries in the cluster.\\n\\nRegards,\\nPooja.\", \"post_time\": \"2015-04-17 23:25:38\" },\n\t{ \"post_id\": 7404, \"topic_id\": 1677, \"forum_id\": 10, \"post_subject\": \"Re: Deep Learning\", \"username\": \"bforeman\", \"post_text\": \"Hi Pooja,\\n\\nI am sorry for the late reply, as I was traveling for most of the day. I will have our HPCC Support Team upgrade the cluster to support the blas libraries on Monday.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-17 23:23:31\" },\n\t{ \"post_id\": 7401, \"topic_id\": 1677, \"forum_id\": 10, \"post_subject\": \"Re: Deep Learning\", \"username\": \"chennapooja\", \"post_text\": \"Dear Edin,\\n\\n I got access to this cluster on December 22, 2014. I do not know exactly who created it but I have got these credentials from my professor. \\n\\nHow can I resolve it in VMWare?\\n\\nRegards,\\nPooja.\", \"post_time\": \"2015-04-17 16:21:45\" },\n\t{ \"post_id\": 7399, \"topic_id\": 1677, \"forum_id\": 10, \"post_subject\": \"Re: Deep Learning\", \"username\": \"edin.muharemagic\", \"post_text\": \"Hello Pooja,\\n\\nCould you please provide more information on the https://216.19.105.7:8010 cluster? When were you granted an account on that cluster, and who helped you with it?\\n\\nI am trying to figure out who owns that cluster so that I can request an appropriate update.\\n\\nRegards,\\nEdin\", \"post_time\": \"2015-04-17 16:13:13\" },\n\t{ \"post_id\": 7397, \"topic_id\": 1677, \"forum_id\": 10, \"post_subject\": \"Re: Deep Learning\", \"username\": \"chennapooja\", \"post_text\": \"Dear Bob/Edin,\\n\\n Thanks for the replies.\\n \\n Training cluster I am using is https://216.19.105.7:8010 and though I have putty and WINSCP, I am not able to login as connection is being refused. Also I guess I do not have access to do anything inside this cluster.\\n\\n For VMWare, I could not find any eclcc.log in System Servers section but found this information in the workunit and in SystemServers/eclccserver.log:\\n\\n Error\\teclcc\\t3000\\tCompile/Link failed for W20150417-142917 (see '//192.168.56.129/var/lib/HPCCSystems/myeclccserver/eclcc.log' for details)\\t0\\t0\\tW20150417-142917\\n\\nWarning\\teclcc\\t1048\\tEXPORT/SHARED qualifiers are ignored in this context\\t7\\t478\\tC:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\MyFiles\\\\ML\\\\NeuralNetworks.ecl\\n\\nWarning\\teclcc\\t0\\t\\t0\\t0\\t\\n\\nWarning\\teclcc\\t0\\t---------- compiler output --------------\\t0\\t0\\t\\n\\nWarning\\teclcc\\t0\\t/usr/bin/ld: cannot find -lcblas\\t0\\t0\\t\\n\\nWarning\\teclcc\\t0\\tcollect2: ld returned 1 exit status\\t0\\t0\\t\\n\\nWarning\\teclcc\\t0\\t\\t0\\t0\\t\\n\\nWarning\\teclcc\\t0\\t--------- end compiler output -----------\\t0\\t0\\t\\n\\nInfo\\teclcc\\t3118\\tMismatch in subminor version number (5.0.0 v 5.0.2)\\t0\\t0\\tunknown\\n\\n Please help me in this aspect.\\n\\nThanks and Regards,\\nPooja.\", \"post_time\": \"2015-04-17 14:47:14\" },\n\t{ \"post_id\": 7396, \"topic_id\": 1677, \"forum_id\": 10, \"post_subject\": \"Re: Deep Learning\", \"username\": \"edin.muharemagic\", \"post_text\": \"Hi Pooja, \\n\\nThe ECL-ML library includes implementations for the following Deep Learning algorithms:\\n\\nSoftMax (Classify.SoftMax) \\nThe implementation of SoftMax regression classifier is done based on Stanford Deep Learning tutorial available at http://ufldl.stanford.edu/wiki/index.ph ... Regression.\\nSoftmax regression model generalizes logistic regression to classification problems where the class label can take on more than two possible values. It is a supervised learning algorithm which can be used in conjunction with deep learning/unsupervised feature learning methods.
\\nNeuralNetworks (ML.NeuralNetworks)\\nThis is an implementation of multi-layer feed forward Neural Network. The implementation is based on Stanford Deep Learning tutorial available at : http://ufldl.stanford.edu/wiki/index.ph ... _Algorithm
\\nDeepLearning (ML.DeepLearning)\\nUse this module to performe deep learning algorithms and build deep learning networks. The implementation is based on Stanford Deep Learning tutorial available at : http://ufldl.stanford.edu/wiki/index.php/UFLDL_Tutorial\\n
\\nSparse Autoencoder (Deep Learning. Sparse_Autoencoder)\\nThe implementation of Sparse Autoencoder. Sparse Autoencoder is actually a 3 layers neural network in which input and output layer receive the input data. So the number of nodes in the first and third layer (output layer) of the network is the same and it equals to the number of input features. Number of hidden layer nodes (second layer) is a user-defined parameters. \\n
\\n\\nThe error you are getting could mean that the BLAS libraries are not installed on your cluster. Here's what you need to check:\\nThe BLAS/ATLAS development packages need to be installed on every node of the cluster. The packages may already be present if this is a CentOS system, as this distribution does not add a soft link into a location used by the default linker.\\n\\n The cblas.h header file was installed on at least the eclccserver node because the code compiles.\\n\\n If the packages have been installed, you will see them by running whereis libcblas on each node. The path /usr/lib is the correct answer. If they show somewhere else, like lib64, you will need to add a soft link.\\n
\\n\\nRegards,\\nEdin\", \"post_time\": \"2015-04-17 14:09:54\" },\n\t{ \"post_id\": 7395, \"topic_id\": 1677, \"forum_id\": 10, \"post_subject\": \"Re: Deep Learning\", \"username\": \"bforeman\", \"post_text\": \"Hi Pooja,\\n\\nWhat training cluster are you using? It may need to be upgraded with the appropriate library or perhaps the configuration needs to add a path to that location, or the file copied to another location. If you have permission to access the cluster, you can use WINSCP or Putty to search for that file on the cluster. \\n\\nNOTE: I will be traveling in a few minutes, so there might be a delay in my reply until this later this afternoon.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-17 13:45:25\" },\n\t{ \"post_id\": 7394, \"topic_id\": 1677, \"forum_id\": 10, \"post_subject\": \"Re: Deep Learning\", \"username\": \"chennapooja\", \"post_text\": \"Dear Bob,\\n\\nThanks for the reply. My target is set to thor only. Also if I am using vmware, then I find no errors but when I use training cluster, it says that cblas.h header file is missing apart from that compile/link failed error.Its a header file included in embedded c++ code in one related file. Now how to find, why its not able to find that included file?\\n\\nRegards,\\nPooja.\", \"post_time\": \"2015-04-17 13:37:21\" },\n\t{ \"post_id\": 7392, \"topic_id\": 1677, \"forum_id\": 10, \"post_subject\": \"Re: Deep Learning\", \"username\": \"bforeman\", \"post_text\": \"Hi Pooja,\\n\\nCompile/Link failed for W20150322-004815
\\n\\nIn most cases, the error that you see usually means that there was a problem finding the compiler. In my classes,when a student sees that error is usually means that their target of their ECL is set to "Local" instead of "Thor". Check your target first and make sure it is set to Thor.\\n\\nAs to the log file for the ECLCC Server, you can find and browse that log in the Systems Servers section of your ECL Watch. Look for ECL CC Servers and myeclccserver.\\n\\nI would like to know which kind of deep learning is implemented, Is it like Deep Belief Network or Boltzmann machine or convolutional networks etc...Are there any further proposals in the deep learning.\\nPlease share if there is any documentation related to deep learning algorithms implemented.
\\n\\nLet me check with the Machine Learning developers. My educated guess is that outside of the PDF documentation provided, there may be additional info in the GitHub Repository. I will check and let you know.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-17 11:47:31\" },\n\t{ \"post_id\": 7311, \"topic_id\": 1677, \"forum_id\": 10, \"post_subject\": \"Deep Learning\", \"username\": \"chennapooja\", \"post_text\": \"Dear Team,\\n\\nI would like to know which kind of deep learning is implemented, Is it like Deep Belief Network or Boltzmann machine or convolutional networks etc...Are there any further proposals in the deep learning.\\nPlease share if there is any documentation related to deep learning algorithms implemented. Also, please help me with how to check or ssh through eclcc.log as I am getting an error while running Neural networks code and it has to be checked in eclcc.log. Error is something like "Error: Compile/Link failed for W20150322-004815 (see '//192.168.56.129/var/lib/HPCCSystems/myeclccserver/eclcc.log' for details) (0, 0), 3000, W20150322-004815"\\n\\nThanks in advance,\\nPooja.\", \"post_time\": \"2015-04-10 17:38:27\" },\n\t{ \"post_id\": 7420, \"topic_id\": 1680, \"forum_id\": 10, \"post_subject\": \"Re: Error in reading a line more than 10MB\", \"username\": \"bforeman\", \"post_text\": \"This issue has been fixed. Please try the following in your code:\\n\\n#option('maxCsvRowSizeMb', '20');
;\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-20 13:06:31\" },\n\t{ \"post_id\": 7403, \"topic_id\": 1680, \"forum_id\": 10, \"post_subject\": \"Re: Error in reading a line more than 10MB\", \"username\": \"pius_francis\", \"post_text\": \"Thanks a lot Bob and Hall\", \"post_time\": \"2015-04-17 19:06:26\" },\n\t{ \"post_id\": 7331, \"topic_id\": 1680, \"forum_id\": 10, \"post_subject\": \"Re: Error in reading a line more than 10MB\", \"username\": \"bforeman\", \"post_text\": \"See #OPTION and outputLimit \\nDefault: 10 Sets maximum size (in Mb) of result stored in workunit.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-13 20:12:37\" },\n\t{ \"post_id\": 7316, \"topic_id\": 1680, \"forum_id\": 10, \"post_subject\": \"Error in reading a line more than 10MB\", \"username\": \"pius_francis\", \"post_text\": \"I am having a dataset where each record corresponds to a xml file. I am getting an erro while processing the dataset if the size of the record or xml is greater than 10 MB. Is there is a way to rectify it.\", \"post_time\": \"2015-04-11 08:47:39\" },\n\t{ \"post_id\": 7469, \"topic_id\": 1683, \"forum_id\": 10, \"post_subject\": \"Re: Number of occurrences of the string\", \"username\": \"swethareddy01\", \"post_text\": \"Thank you very much Bob \", \"post_time\": \"2015-04-27 15:36:21\" },\n\t{ \"post_id\": 7468, \"topic_id\": 1683, \"forum_id\": 10, \"post_subject\": \"Re: Number of occurrences of the string\", \"username\": \"bforeman\", \"post_text\": \"Hi Swetha,\\n\\nAll you need to do is to perform a SELF-JOIN just after the ITERATE and before the last PROJECT. This will get the result that you need.\\n\\nLet me give you a hint
\\n\\n
NewR SlimIt(SimOut Le,SimOut Ri) := TRANSFORM\\n SELF.lSentenceid := Le.LSentenceID;\\n SELF.rSentenceid := Ri.RSentenceID;\\n SELF.LeftTxt := Le.LeftTxt;\\n SELF.RightTxt := Ri.RightTxt;\\n SELF := Le;\\nEND;\\n\\nSentCompare := JOIN(SimOut,SimOut,SimilarWords(LEFT.LeftTxt,RIGHT.RightTxt) > 0,SlimIt(LEFT,RIGHT),ALL);\\t \\n \\nCountAlike := PROJECT(SentCompare,TRANSFORM(FinalRec,SELF.WordsAlike := SimilarWords(LEFT.LeftTxt,LEFT.RightTxt),SELF := LEFT));\\n \\nCountAlike;
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-27 15:24:04\" },\n\t{ \"post_id\": 7462, \"topic_id\": 1683, \"forum_id\": 10, \"post_subject\": \"Re: Number of occurrences of the string\", \"username\": \"swethareddy01\", \"post_text\": \"Thank you Bob,\\n\\nWith the above code, output is as below.\\n\\nlsentenceid rsentenceid wordsalike\\n1 1 11\\n2 1 3\\n3 2 3\\n4 3 2\\n\\nBut I want my output to be\\n\\nlsentenceid rsentenceid wordsalike\\n1 2 3\\n1 3 1\\n1 4 2\\n2 3 3\\n2 4 3\\n3 4 2\\n\\nI have made changes to the SimCount function but unable to get the output.\\n\\nPlease help me to get the above output.\\n\\nThanks,\\nSwetha\", \"post_time\": \"2015-04-27 00:25:48\" },\n\t{ \"post_id\": 7447, \"topic_id\": 1683, \"forum_id\": 10, \"post_subject\": \"Re: Number of occurrences of the string\", \"username\": \"bforeman\", \"post_text\": \"Hi Swetha,\\n\\nHere is the code you are looking for:\\n\\nIMPORT $,STD;\\nRraw := RECORD\\n UNSIGNED Sentenceid; \\n\\tSTRING Txt;\\n END;\\n \\nd := DATASET([{1,'Now is the time for all good developers to use ECL.'},\\n {2,'ECL is intuitive, easy to use, and is also extensible.'},\\n\\t\\t\\t\\t\\t\\t\\t{3,'Scalabilty is also a key feature.'},\\n {4,'ECL is also non-procedural and self optimizing.'}],RRaw);\\n\\nNewR := RECORD\\n UNSIGNED lSentenceid;\\n UNSIGNED rSentenceid;\\n UNSIGNED1 WordsAlike;\\n STRING LeftTxt;\\n STRING RightTxt;\\n END;\\n\\nSimilarWords(STRING Ltxt,STRING Rtxt) := FUNCTION\\n ParseWords(STRING txt ) := FUNCTION\\n PATTERN Ltrs := PATTERN('[A-Za-z0-9()/.,]');\\n PATTERN Char := Ltrs | '-' | '\\\\'';\\n TOKEN Word := Char+;\\n ds := DATASET([{txt}],{STRING line});\\n RETURN PARSE(ds,line,Word,{STRING word := MATCHTEXT(Word),UNSIGNED wordPos := MATCHPOSITION(Word)},ALL);\\n END;\\n\\nLWordSet := ParseWords(Ltxt);\\nRWordSet := ParseWords(RTxt);\\n\\nRETURN COUNT(JOIN(LWordSet,RWordset, Left.Word = RIGHT.WORD));\\nEND;\\n\\n//Build a PROJECT for the ITERATE\\nNewR PrepIterate(Rraw Le) := TRANSFORM\\n SELF.LSentenceID := Le.SentenceID;\\n SELF.RSentenceID := 0; \\n SELF.WordsAlike := 0;\\n SELF.RightTxt := '';\\n SELF.LeftTxt := Le.Txt;\\n END;\\n\\nPrepRecs := PROJECT(d,PrepIterate(LEFT));\\n// PrepRecs;\\n\\n//ITERATE here\\nNewR SimCount(PrepRecs Le,PrepRecs Ri) := TRANSFORM\\n SELF.RSentenceID := IF(Le.LSentenceID = 0,Ri.LSentenceID,Ri.LSentenceID-1);\\n SELF.LSentenceID := Ri.LSentenceID;\\n SELF.LeftTxt := Ri.LeftTxt;\\n SELF.RightTxt := IF(Le.LeftTxt = '',Ri.LeftTxt,Le.LeftTxt);\\n SELF.WordsAlike := 0;\\n END;\\n \\n \\n SimOut := ITERATE(PrepRecs,SimCount(LEFT,RIGHT));\\n \\n // SimOut;\\nFinalRec := RECORD\\n UNSIGNED lSentenceid;\\n UNSIGNED rSentenceid;\\n UNSIGNED1 WordsAlike;\\n END;\\n \\n CountAlike := PROJECT(SimOut,TRANSFORM(FinalRec,SELF.WordsAlike := SimilarWords(LEFT.LeftTxt,LEFT.RightTxt),SELF := LEFT));\\n \\n CountAlike;\\n
\\n\\n\\nThe only requirement is that the SentenceIDs need to be sequential. Resequencing or sorting may be necessary with actual real data.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-24 16:02:00\" },\n\t{ \"post_id\": 7446, \"topic_id\": 1683, \"forum_id\": 10, \"post_subject\": \"Re: Number of occurrences of the string\", \"username\": \"bforeman\", \"post_text\": \"Hi Swetha,\\n\\nYou are trying to apply the SentenceIDs to the ParseWords function, which will not work. ParseWords reads words, not numbers.\\n\\nYou need to start with a dataset that identifies each line or sentence, and also assigns a unique id to each line. Then as you process each sentence to look for similar words, you can simply replace the output with the Ids.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-24 15:26:54\" },\n\t{ \"post_id\": 7445, \"topic_id\": 1683, \"forum_id\": 10, \"post_subject\": \"Re: Number of occurrences of the string\", \"username\": \"swethareddy01\", \"post_text\": \"Hai Bob,\\n\\nI working with the similar words example and stuck with errors, kindly help me on this.\\n\\nNewRec := RECORD\\nUNSIGNED1 WordsAlike;\\nUNSIGNED id1;\\nUNSIGNED id2;\\nEND;\\n\\nSimilarWords(STRING Ltxt,STRING Rtxt) := FUNCTION\\n ParseWords(UNSIGNED id ) := FUNCTION\\n PATTERN Ltrs := PATTERN('[A-Za-z0-9()/.,]');\\n PATTERN Char := Ltrs | '-' | '\\\\'';\\n TOKEN Word := Char+;\\n ds := DATASET([{id}]);\\n RETURN PARSE(ds,id1,id2,Word,{STRING word := MATCHTEXT(Word),UNSIGNED wordPos := MATCHPOSITION(Word)},ALL);\\n END;\\n\\t\\nLWordSet := ParseWords(Lid);\\nRWordSet := ParseWords(Rid);\\n\\nRETURN COUNT(JOIN(LWordSet,RWordset, Left.Word = RIGHT.WORD));\\nEND;\\n\\n//Build a PROJECT for the ITERATE\\nNewR PrepIterate(Rraw Le) := TRANSFORM\\nSELF.WordsAlike := 0;\\nSELF.id1 := '';\\nSELF.id2 := Le.id;\\nEND;\\n\\nPrepRecs := PROJECT(d,PrepIterate(LEFT));\\n//PrepRecs;\\n\\n//ITERATE here\\nNewR SimCount(PrepRecs Le,PrepRecs Ri) := TRANSFORM\\nSELF.id1 := Ri.id2;\\nSELF.id2 := IF(Le.id1 = '',Ri.id1,Le.id1);\\nSELF.WordsAlike := 0;\\nEND;\\n\\n\\nSimOut := ITERATE(PrepRecs,SimCount(LEFT,RIGHT));\\n\\n//SimOut;\\n\\nCountAlike := PROJECT(SimOut,TRANSFORM(NewRec,SELF.WordsAlike := SimilarWords(LEFT.id1,LEFT.id2),SELF := LEFT));\\n\\nCountAlike;
\\n\\nThanks,\\nSwetha\", \"post_time\": \"2015-04-24 15:16:35\" },\n\t{ \"post_id\": 7442, \"topic_id\": 1683, \"forum_id\": 10, \"post_subject\": \"Re: Number of occurrences of the string\", \"username\": \"bforeman\", \"post_text\": \"Hi Swetha,\\n\\nMy SimilarWords example I posted earlier should do that, or you may need to modify it slightly.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-24 12:28:50\" },\n\t{ \"post_id\": 7441, \"topic_id\": 1683, \"forum_id\": 10, \"post_subject\": \"Re: Number of occurrences of the string\", \"username\": \"swethareddy01\", \"post_text\": \"Hai Bob,\\n\\nThank you for your help!\\n\\nWhat I am looking for is,\\n\\nResult 3:\\nid1 id2 count of words\\n2 3 5\\n3 2 4\\n1 2 4\\n1 3 2\\n2 1 5\\n3 1 2\\n\\nKindly help me to achieve the same.\\n\\nThank you.\\nSwetha\", \"post_time\": \"2015-04-24 02:30:09\" },\n\t{ \"post_id\": 7436, \"topic_id\": 1683, \"forum_id\": 10, \"post_subject\": \"Re: Number of occurrences of the string\", \"username\": \"bforeman\", \"post_text\": \"Hi Swetha,\\n\\nI think you are almost there. In your inverted index, you have a word, word position, and a docid. I think the docid should really be a sentenceid. Just by sorting the NORMALIZE result by word, you can get what you need for Result1:\\n\\ndocid pos word\\n1\\t6\\tALL\\n2\\t9\\tALSO\\n3\\t3\\tALSO\\n2\\t7\\tAND\\n3\\t5\\tAND\\n1\\t8\\tDEVELOPERS\\n\\nand from here each sentence can be commoned up using a FULL OUTER SELF JOIN with a DEDUP to achieve what you are trying to do in Result2:\\n\\ndocidsort := SORT(Records,Word);\\ndocidsort;\\n\\ncommonrec := RECORD\\n UNSIGNED id1;\\n UNSIGNED id2;\\n STRING word;\\n END;\\n\\t\\n\\tcommonrec commonup(docidsort Le,docidsort Ri) := TRANSFORM\\n SELF.id1 := Le.docid;\\n\\t SELF.id2 := Ri.docid;\\n\\t SELF := Le;\\n\\t END;\\n\\t\\n\\tSJOIN := JOIN(docidsort,docidsort,LEFT.word=RIGHT.Word,commonup(LEFT,RIGHT),FULL OUTER);\\n\\t\\n\\tnodups := DEDUP(sJOIN(id1 <> id2),LEFT=RIGHT);\\n\\t\\n\\tnodups;
\\n\\nMy Result:\\nsentenceid1 sentenceid2 word\\n2\\t3\\tALSO\\n3\\t2\\tALSO\\n2\\t3\\tAND\\n3\\t2\\tAND\\n1\\t2\\tECL\\n1\\t3\\tECL\\n2\\t1\\tECL\\n2\\t3\\tECL\\n3\\t1\\tECL\\n\\nBut note we have not done anything with the INDEX. I guess I should have asked this in the beginning of the thread: What is it you are eventually trying to do?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-23 08:21:48\" },\n\t{ \"post_id\": 7430, \"topic_id\": 1683, \"forum_id\": 10, \"post_subject\": \"Re: Number of occurrences of the string\", \"username\": \"swethareddy01\", \"post_text\": \"Hi Bob,\\n\\nI am working on some extensions to the inverted index code and the result I am working is as below.\\n\\nResult1:\\n\\nsentence1 word1\\nsentence2 word1\\nsentence5 word1\\nsentence6 word1\\nsentence2 word2\\nsentence5 word2\\nsentence6 word2 .... and so on\\n\\nResult2:\\n (number of words appeared\\n in the sentences)\\nsentence1 sentence2 1\\nsentence1 sentence5 1\\nsentence1 sentence6 1\\nsentence2 sentence5 2\\nsentence2 sentence6 2\\nsentence5 sentence6 2.... and so on\\n\\nkindly help me to achieve the same.\\n\\nThank you!\\nSwetha\", \"post_time\": \"2015-04-22 04:07:00\" },\n\t{ \"post_id\": 7415, \"topic_id\": 1683, \"forum_id\": 10, \"post_subject\": \"Re: Number of occurrences of the string\", \"username\": \"swethareddy01\", \"post_text\": \"Thank you Bob. \", \"post_time\": \"2015-04-20 01:03:53\" },\n\t{ \"post_id\": 7414, \"topic_id\": 1683, \"forum_id\": 10, \"post_subject\": \"Re: Number of occurrences of the string\", \"username\": \"bforeman\", \"post_text\": \"Hi Swetha,\\n\\nHere is one way to approach your problem:\\n\\n
IMPORT $,STD;\\nRraw := RECORD\\n STRING Txt;\\n END;\\n \\nd := DATASET([{'Now is the time for all good developers to use ECL.'},\\n {'ECL is intuitive, easy to use, and is also extensible.'},\\n\\t {'Scalabilty is also a key feature.'},\\n {'ECL is also non-procedural and self optimizing.'}],RRaw);\\n\\nNewR := RECORD\\n UNSIGNED1 WordsAlike;\\n STRING LeftTxt;\\n STRING RightTxt;\\n END;\\n\\nSimilarWords(STRING Ltxt,STRING Rtxt) := FUNCTION\\n ParseWords(STRING txt ) := FUNCTION\\n PATTERN Ltrs := PATTERN('[A-Za-z0-9()/.,]');\\n PATTERN Char := Ltrs | '-' | '\\\\'';\\n TOKEN Word := Char+;\\n ds := DATASET([{txt}],{STRING line});\\n RETURN PARSE(ds,line,Word,{STRING word := MATCHTEXT(Word),UNSIGNED wordPos := MATCHPOSITION(Word)},ALL);\\n END;\\n\\nLWordSet := ParseWords(Ltxt);\\nRWordSet := ParseWords(RTxt);\\n\\nRETURN COUNT(JOIN(LWordSet,RWordset, Left.Word = RIGHT.WORD));\\nEND;\\n\\n//Build a PROJECT for the ITERATE\\nNewR PrepIterate(Rraw Le) := TRANSFORM\\n SELF.WordsAlike := 0;\\n SELF.RightTxt := '';\\n SELF.LeftTxt := Le.Txt;\\n END;\\n\\nPrepRecs := PROJECT(d,PrepIterate(LEFT));\\n//PrepRecs;\\n\\n//ITERATE here\\nNewR SimCount(PrepRecs Le,PrepRecs Ri) := TRANSFORM\\n SELF.LeftTxt := Ri.LeftTxt;\\n SELF.RightTxt := IF(Le.LeftTxt = '',Ri.LeftTxt,Le.LeftTxt);\\n SELF.WordsAlike := 0;\\nEND;\\n\\n\\nSimOut := ITERATE(PrepRecs,SimCount(LEFT,RIGHT));\\n\\n//SimOut;\\n\\nCountAlike := PROJECT(SimOut,TRANSFORM(NewR,SELF.WordsAlike := SimilarWords(LEFT.LeftTxt,LEFT.RightTxt),SELF := LEFT));\\n\\nCountAlike;\\n
\\n\\nOnce you have your DATASET organized by sentences, you can ITERATE to combine sentence pairs and then PROJECT each pair and call a FUNCTION that converts each sentence into a word table, and then looks for matching words in the word tables via a JOIN.\\n\\nHope this helps,\\n\\nBob\", \"post_time\": \"2015-04-19 17:01:03\" },\n\t{ \"post_id\": 7409, \"topic_id\": 1683, \"forum_id\": 10, \"post_subject\": \"Re: Number of occurrences of the string\", \"username\": \"bforeman\", \"post_text\": \"Hi Swetha,\\n\\nOnce you have your text document ready, I think this is how I would approach it.\\n\\nI would use ITERATE to process the line(sentence) pairs. Inside the ITERATE, I would call a FUNCTION that takes each LEFT and RIGHT line, and converts each to a word table. I would then RETURN the COUNT of the JOIN of the matching words in the LEFT and RIGHT.\\n\\nI am reaching out to other colleagues for their opinion, but since this is the weekend I would not expect a follow up until Monday.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-18 14:49:16\" },\n\t{ \"post_id\": 7408, \"topic_id\": 1683, \"forum_id\": 10, \"post_subject\": \"Re: Number of occurrences of the string\", \"username\": \"swethareddy01\", \"post_text\": \"Thank you Bob,\\n\\nI would like to find the similarity of sentences with respect to the words in each sentence of a text file.\\n\\nCould you please give me an example of FindCount?\\n\\nI want my output to be as follows:\\nsentence1 sentence2 similarity_count\\n\\nKindly help me to achieve the same.\\n\\nThanks \\nSwetha\", \"post_time\": \"2015-04-18 03:27:36\" },\n\t{ \"post_id\": 7333, \"topic_id\": 1683, \"forum_id\": 10, \"post_subject\": \"Re: Number of occurrences of the string\", \"username\": \"bforeman\", \"post_text\": \"I am still not 100% certain what you are trying to do, but what about using the FindCount String function in our standard library?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-13 20:20:11\" },\n\t{ \"post_id\": 7330, \"topic_id\": 1683, \"forum_id\": 10, \"post_subject\": \"Number of occurrences of the string\", \"username\": \"swethareddy01\", \"post_text\": \"Hello All,\\n\\nI would like to find the number of occurrences of the second string in the first string with respect to each and every word.\\n\\nHow can I find this for a text file? Please help me to find the same.\\n\\nThank you.\\n\\nSwetha\", \"post_time\": \"2015-04-13 16:08:12\" },\n\t{ \"post_id\": 7540, \"topic_id\": 1689, \"forum_id\": 10, \"post_subject\": \"Re: Inverted Index\", \"username\": \"swethareddy01\", \"post_text\": \"Thank you John,\\n\\nI am trying with the join, and got below errors:\\n\\nError: Parameter le type mismatch - expected row of rec, given row of <unnamed> (45, 55) \\nError: Omitted parameter ri has no default value (45, 47)\\n\\nCould you please help me to resolve the errors.\\nIMPORT kjv;\\nIMPORT * from Std.Str;\\n\\n//EXPORT Inversion := MODULE\\n\\n//SHARED \\nI := $.File_KJV.Txt;\\n//SHARED \\nR := RECORD\\n UNSIGNED1 BookNum := 0;\\n UNSIGNED1 Chapter := 0;\\n UNSIGNED1 Verse := 0;\\n UNSIGNED1 Word_Pos := 0;\\n\\t\\t\\t\\t\\t\\t//STRING verse_Text := '';\\n STRING Word := '';\\n END;\\n\\t\\t\\t\\t\\t\\n\\tsRec := RECORD\\n\\t UNSIGNED1 Verse1 := 0;\\n\\t\\t\\tUNSIGNED1 Verse2 := 0;\\n\\t\\t\\tUNSIGNED1 count := 0;\\n\\tEND;\\n\\nClean(STRING s) := Filter(S,'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 \\\\'');\\n R TakeWord(I le,UNSIGNED1 C) := TRANSFORM\\n SELF.Word_Pos := C;\\n SELF.Word := ToUpperCase(GetNthWord(Clean(le.verse_Text),c));\\n SELF := le;\\n END;\\nN := NORMALIZE(I,WordCount(Clean(LEFT.verse_Text)),TakeWord(LEFT,COUNTER));\\nRec := RECORD\\n \\tSTRING stop;\\n END;\\n\\t\\n\\tds := DATASET('~.::stopwords.txt',Rec,CSV(SEPARATOR('')));\\n\\t\\nN compare(Rec le, Rec ri) := TRANSFORM\\nSELF.Word := IF(le.stop = ri.stop, SKIP, '') ;\\n//SELF.word := ri.stop;\\n//SELF.stop1 := le.stop - ri.stop\\nSELF := le;\\nend;\\n\\nnewrec := join(N, ds, left.word = right.stop, compare(left, right));\\nnewrec;\\n\\nRecords := N;\\n\\nKey := INDEX(Records,{STRING20 Wrd := Word,BookNum,Chapter,Verse,Word_Pos},'~key::kjv_inversion');\\nBld := BUILD(Key,OVERWRITE);
\", \"post_time\": \"2015-05-05 16:03:00\" },\n\t{ \"post_id\": 7534, \"topic_id\": 1689, \"forum_id\": 10, \"post_subject\": \"Re: Inverted Index\", \"username\": \"john holt\", \"post_text\": \"There are also other choices available as well. You could create a dictionary of the noise words and do a lookup and use SKIP if the word is present. You could also use a set and test membership.\", \"post_time\": \"2015-05-04 20:41:49\" },\n\t{ \"post_id\": 7533, \"topic_id\": 1689, \"forum_id\": 10, \"post_subject\": \"Re: Inverted Index\", \"username\": \"john holt\", \"post_text\": \"You would remove the noise words after the NORMALIZE(...) function. Use a LEFT ONLY join against your stop word list.\", \"post_time\": \"2015-05-04 20:39:14\" },\n\t{ \"post_id\": 7532, \"topic_id\": 1689, \"forum_id\": 10, \"post_subject\": \"Re: Inverted Index\", \"username\": \"swethareddy01\", \"post_text\": \"Hi All,\\n\\nI want to remove stop words from the dataset before using it for inverted index.\\n\\nHow do I achieve the same?\\n\\nIMPORT kjv;\\nIMPORT * from Std.Str;\\n\\n//EXPORT Inversion := MODULE\\n\\n//SHARED \\nI := $.File_KJV.Txt;\\n//SHARED \\nR := RECORD\\n UNSIGNED1 BookNum := 0;\\n UNSIGNED1 Chapter := 0;\\n UNSIGNED1 Verse := 0;\\n UNSIGNED1 Word_Pos := 0;\\n\\t\\t\\t\\t\\t\\t//STRING verse_Text := '';\\n STRING Word := '';\\n END;\\n\\t\\t\\t\\t\\t\\n\\tsRec := RECORD\\n\\t UNSIGNED1 Verse1 := 0;\\n\\t\\t\\tUNSIGNED1 Verse2 := 0;\\n\\t\\t\\tUNSIGNED1 count := 0;\\n\\tEND;\\n\\nClean(STRING s) := Filter(S,'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 \\\\'');\\n R TakeWord(I le,UNSIGNED1 C) := TRANSFORM\\n SELF.Word_Pos := C;\\n SELF.Word := ToUpperCase(GetNthWord(Clean(le.verse_Text),c));\\n SELF := le;\\n END;\\nN := NORMALIZE(I,WordCount(Clean(LEFT.verse_Text)),TakeWord(LEFT,COUNTER));\\n\\nRecords := N;
\\n\\nThanks,\\nSwetha\", \"post_time\": \"2015-05-04 18:37:17\" },\n\t{ \"post_id\": 7407, \"topic_id\": 1689, \"forum_id\": 10, \"post_subject\": \"Re: Inverted Index\", \"username\": \"bforeman\", \"post_text\": \"Here is a quick example to get you started:\\n\\nIMPORT $,STD;\\nRraw := RECORD\\n STRING Txt;\\n END;\\n \\nd := DATASET([{'Now is the time for all good developers to use ECL.'}],RRaw);\\nd1 := DATASET([{'ECL is intuitive, easy to use, and is also extensible.'}],Rraw);\\nd2 := DATASET([{'ECL is also non-procedural and self optimizing.'}],RRaw);\\n\\nNewR := RECORD\\n UNSIGNED1 DocID;\\n STRING Txt;\\n END;\\n\\nnewd := PROJECT(d,TRANSFORM(NewR,SELF.DocID := 1,SELF := LEFT));\\nnewd1 := PROJECT(d1,TRANSFORM(NewR,SELF.DocID := 2,SELF := LEFT));\\nnewd2 := PROJECT(d2,TRANSFORM(NewR,SELF.DocID := 3,SELF := LEFT));\\n\\nwds := newd+newd1+newd2;\\n\\nI := wds;\\nClean(STRING s) := STD.STR.Filter(S,'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 \\\\'');\\n\\nR := RECORD\\n UNSIGNED1 DocID := 0;\\n UNSIGNED1 Word_Pos := 0;\\n STRING Word := '';\\n END;\\n\\nR TakeWord(I le,UNSIGNED1 C) := TRANSFORM\\n SELF.Word_Pos := C;\\n SELF.Word := STD.Str.ToUpperCase(STD.STR.GetNthWord(Clean(le.txt),c));\\n SELF := le;\\n END;\\n\\nN := NORMALIZE(I,STD.STR.WordCount(Clean(LEFT.Txt)),TakeWord(LEFT,COUNTER));\\n\\nRecords := N;\\nKey := INDEX(Records,{STRING20 Wrd := Word,Word_Pos,DocID},'~key::your_inversion');\\nBld := BUILD(Key,OVERWRITE);\\n\\nBld;
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-18 00:00:25\" },\n\t{ \"post_id\": 7406, \"topic_id\": 1689, \"forum_id\": 10, \"post_subject\": \"Re: Inverted Index\", \"username\": \"bforeman\", \"post_text\": \"You are almost there. It looks like your code just need to add a document id.\\n\\nI would use a PROJECT to add the additional field to your 3 datasets, and then append after that.\\n\\nWhen you NORMALIZE, the SELF := Le in the TRANSFORM should move the Document ID over accordingly. Then you can INDEX on the WORD, Word Position, and Document ID to complete your inverted index.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-17 23:35:08\" },\n\t{ \"post_id\": 7402, \"topic_id\": 1689, \"forum_id\": 10, \"post_subject\": \"Re: Inverted Index\", \"username\": \"swethareddy01\", \"post_text\": \"Hi,\\n\\nI am working on Inverted Index but unable to figure out code to find the document name in the output.\\n\\nI want output as word, word_pos and document ID/name.\\n\\nBelow is the code\\nimport * from Std.Str;\\n\\nR := RECORD\\n \\tSTRING Txt;\\n END;\\n\\t\\nd := DATASET('~thor::doc1.txt',R,CSV(SEPARATOR('')));\\nd1 := DATASET('~thor::doc2.txt',R,CSV(SEPARATOR('')));\\nd2 := DATASET('~thor::doc3.txt',R,CSV(SEPARATOR('')));\\n\\n\\nwds := d+d1+d2;\\n\\nWordLayout := RECORD \\n UNSIGNED1 Word_Pos := 0;\\n STRING Word := '';\\n END;\\n\\nClean(STRING s) := Filter(S,'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 \\\\'');\\n WordLayout TakeWord(wds le,UNSIGNED1 C) := TRANSFORM\\n SELF.Word_Pos := C;\\n SELF.Word := ToUpperCase(GetNthWord(Clean(le.Txt),c));\\n SELF := le;\\n END;\\nN := NORMALIZE(wds,WordCount(Clean(LEFT.Txt)),TakeWord(LEFT,COUNTER));\\n\\nRecords := N;\\n\\nRecords;\\n\\n
\\n\\nPlease help me to find, How to search for a word using document id/name and position in ecl?\\n\\nThanks,\\nSwetha\", \"post_time\": \"2015-04-17 17:07:22\" },\n\t{ \"post_id\": 7400, \"topic_id\": 1689, \"forum_id\": 10, \"post_subject\": \"Re: Inverted Index\", \"username\": \"swethareddy01\", \"post_text\": \"Thank you Bob and John,\\n\\nI am testing the KJV example, and I got error for the below code\\nSearch(STRING S) := FUNCTION\\nD := DATASET([{s}],{ STRING T });\\nR SearchBlock(D le,UNSIGNED C) := TRANSFORM\\n SELF.Word := GetNthWord(le.T,C);\\n END;\\nN := NORMALIZE(D,WordCount(S),SearchBlock(LEFT,COUNTER));\\nR GraphBody(SET OF DATASET(R) I,UNSIGNED C) := FUNCTION\\n RETURN DEDUP( \\n MAP ( C = 1 => PROJECT(Key(Wrd=I[0][1].Word),TRANSFORM(R,SELF := LEFT)),\\n JOIN(I[C-1],Key,RIGHT.Wrd=I[0][C].Word\\n AND LEFT.BookNum=RIGHT.BookNum\\n AND LEFT.Chapter=RIGHT.Chapter\\n AND LEFT.Verse=RIGHT.Verse,TRANSFORM(R,SELF := LEFT))\\n )\\n ,BookNum,Chapter,Verse);\\n\\n END;\\n\\n G := GRAPH(N,COUNT(N),GraphBody(ROWSET(LEFT),COUNTER));\\n RETURN PROJECT(G,TRANSFORM($.File_KJV.Layout_Reference,SELF := LEFT));\\n\\t\\n \\nEND;\\ns := Search('LOVE OF GOD');\\nkjv.File_KJV.GetBatch(s);
\\n\\nand the error is \\nSyntax error near NORMALIZE definition like near ":=" : expected ';'\\n\\nPlease help me to resolve the error and get the output.\\n\\nThanks,\\nSwetha\", \"post_time\": \"2015-04-17 16:18:36\" },\n\t{ \"post_id\": 7388, \"topic_id\": 1689, \"forum_id\": 10, \"post_subject\": \"Re: Inverted Index\", \"username\": \"john holt\", \"post_text\": \"Swetha,\\n Have you thought much about the kinds of search expressions that you want to resolve with this index? This index will support simple Boolean search expressions including adjacency (like a phrase).\\n\\n\\n The index layout suggested in the KJV example uses a compound document identifier, Book, Chapter, and Verse. Your needs may not require this level of detail. For instance, you could just have a single level, the document, and replace the 3 fields with a single document identifier field.\\n\\n\\n Finally, the KJV example has a relatively simple view of a "term" as an alphanumeric(plus an apostrophe) string delimited by spaces. This may or may not be a suitable definition of term for your intended application.\", \"post_time\": \"2015-04-16 20:08:22\" },\n\t{ \"post_id\": 7387, \"topic_id\": 1689, \"forum_id\": 10, \"post_subject\": \"Re: Inverted Index\", \"username\": \"bforeman\", \"post_text\": \"Hi Swetha,\\n\\nIn a nutshell, here is what you essentially need to build an inverted index:\\n\\n\\nIMPORT $,STD;\\nClean(STRING s) := STD.STR.Filter(S,'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 \\\\'');\\nI := $.File_KJV.Txt;\\nR := RECORD\\n UNSIGNED1 BookNum := 0;\\n UNSIGNED1 Chapter := 0;\\n UNSIGNED1 Verse := 0;\\n UNSIGNED1 Word_Pos := 0;\\n STRING Word := '';\\n END;\\n\\nR TakeWord(I le,UNSIGNED1 C) := TRANSFORM\\n SELF.Word_Pos := C;\\n SELF.Word := STD.Str.ToUpperCase(STD.STR.GetNthWord(Clean(le.verse_Text),c));\\n SELF := le;\\n END;\\n\\nN := NORMALIZE(I,WordCount(Clean(LEFT.verse_Text)),TakeWord(LEFT,COUNTER));\\n\\nRecords := N;\\nKey := INDEX(Records,{STRING20 Wrd := Word,BookNum,Chapter,Verse,Word_Pos},'~key::your_inversion');\\nBld := BUILD(Key,OVERWRITE);\\n
\\nNow of course, you will need to change the $.File_KJV.Txt to your file, and also change the RECORD R to match your file's field format.\\n\\nHope this helps,\\n\\nBob\", \"post_time\": \"2015-04-16 16:59:44\" },\n\t{ \"post_id\": 7369, \"topic_id\": 1689, \"forum_id\": 10, \"post_subject\": \"Inverted Index\", \"username\": \"swethareddy01\", \"post_text\": \"Hi All,\\n\\nPlease help me with the inverted index code, how to build inverted index for text file?\\n\\nI tried the KJV example, but I want simple inverted index code.\\n\\nThank you\\n\\nSwetha\", \"post_time\": \"2015-04-15 23:02:15\" },\n\t{ \"post_id\": 7385, \"topic_id\": 1690, \"forum_id\": 10, \"post_subject\": \"Re: Full Outer Join Giving Many More Rows than Exist\", \"username\": \"georgeb2d\", \"post_text\": \"I looked at the data and the rows are not duplicates. So I added the datasets and it seems to be running fine.\", \"post_time\": \"2015-04-16 15:23:12\" },\n\t{ \"post_id\": 7384, \"topic_id\": 1690, \"forum_id\": 10, \"post_subject\": \"Re: Full Outer Join Giving Many More Rows than Exist\", \"username\": \"bforeman\", \"post_text\": \"Ok, so I guess a DEDUP is called for after the JOIN? \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-16 15:21:26\" },\n\t{ \"post_id\": 7383, \"topic_id\": 1690, \"forum_id\": 10, \"post_subject\": \"Re: Full Outer Join Giving Many More Rows than Exist\", \"username\": \"georgeb2d\", \"post_text\": \"I think i discovered the problem. In RoadlinkPipe1 there are 1588 records that have a boundaryid of 372. So I suspect that somehow that is causing the problem.\", \"post_time\": \"2015-04-16 15:06:25\" },\n\t{ \"post_id\": 7382, \"topic_id\": 1690, \"forum_id\": 10, \"post_subject\": \"Re: Full Outer Join Giving Many More Rows than Exist\", \"username\": \"georgeb2d\", \"post_text\": \"From the graph of the WU above \\nThe PipelineIrelandwBoundaryID_DS has a count of 153,214.\\nThe Join has a count of 245,489,771.\", \"post_time\": \"2015-04-16 14:35:12\" },\n\t{ \"post_id\": 7381, \"topic_id\": 1690, \"forum_id\": 10, \"post_subject\": \"Re: Full Outer Join Giving Many More Rows than Exist\", \"username\": \"georgeb2d\", \"post_text\": \"I did a Crosstab on the results: \\nBoundaryID Count\\n36 718,064\\n372 243,303,803\\n554 67,795\\n826 1,400,080\\n\\nAll of these make sense except the 372.\", \"post_time\": \"2015-04-16 14:29:11\" },\n\t{ \"post_id\": 7380, \"topic_id\": 1690, \"forum_id\": 10, \"post_subject\": \"Re: Full Outer Join Giving Many More Rows than Exist\", \"username\": \"bforeman\", \"post_text\": \"Hi Don,\\n\\nWhat is your COUNT of PipelineIrelandwBoundaryID_DS and RoadSegment1Irelandu ?\\nYou may have to extract a sample to see why the bloating after the JOIN is occurring.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-16 14:23:35\" },\n\t{ \"post_id\": 7379, \"topic_id\": 1690, \"forum_id\": 10, \"post_subject\": \"Full Outer Join Giving Many More Rows than Exist\", \"username\": \"georgeb2d\", \"post_text\": \"I am doing an outer join on two datasets. One dataset(Pipeline1) has 2.1 million records. The other dataset(Pipeline Ireland) has 153,000 records. After the Join there are 24 million records. Please tell me what I have done wrong. \\n\\nIreland Pipeline is a dataset that has no boundary id. I do a project to add the boundary ID. Then it has the same Layout the other dataset has. That seems to be the problem but I do not know why. \\n\\nThere should be no boundaryIds that match between Pipeline1 and Ireland Pipeline.\\n\\nHere is the code:\\n
// Have to define pipes as Datasets and then \\n// do Outer Join\\n// Layout is Roadlink.Layouts.RoadSegmentsLayout\\n// Try doing Ireland first\\nIreland_Logical_Filename := '~thor::wunelli::roadlink::roadsegmentspipe9Ireland';\\nPipelineIreland_DSo := DATASET(Ireland_Logical_Filename, RECORDOF(X.Layouts.RoadSegmentsLayout) AND NOT [BoundaryID] , CSV(SEPARATOR('|' )));\\nPipelineIreland_DS := PipelineIreland_DSo(metres>0);\\nOUTPUT(count(PipelineIreland_DS));\\n\\n// BoundaryID comes from WorldBorders for Ireland\\nRoadlink.Layouts.RoadSegmentsLayout AddBoundaryID(PipelineIreland_DS Le):= TRANSFORM\\n SELF.BoundaryID := 372;\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t SELF :=Le;\\nEND; \\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\nPipelineIrelandwBoundaryID_DS := PROJECT(PipelineIreland_DS,\\n AddBoundaryID(LEFT));\\n\\n\\n\\nPipeline1_Logical_Filename := '~thor::wunelli::roadlink::roadsegmentspipe1';\\nPipeline1_DSo := DATASET(Pipeline1_Logical_Filename, X.Layouts.RoadSegmentsLayout,CSV(SEPARATOR('|' )));\\nPipeline1_DS := Pipeline1_DSo(metres>0);\\nOUTPUT(Count(PipeLine1_DS), NAMED('Count_1')); \\n\\nRoadlink.Layouts.RoadSegmentsLayout JoinemIreland(PipelineIrelandwBoundaryID_DS LE, Pipeline1_DS R):= TRANSFORM\\n SELF := IF (LE.roadsegid='', R, LE ) ;\\n\\t\\t\\t\\t\\t // SELF := R; \\n\\tEND;\\t\\t\\t\\t\\t\\t \\n\\n\\n// Outer Join on Pipeline1 and Ireland\\nRoadSegment1Irelandu := JOIN (PipelineIrelandwBoundaryID_DS,Pipeline1_DS, \\n LEFT.BoundaryId = RIGHT.BoundaryID, \\n JOINEMIreland(LEFT,RIGHT),FULL OUTER); \\nRoadSegment1Ireland := SORT(RoadSegment1Irelandu, BoundaryID, SKEW(1.0));\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t \\nOUTPUT(Count(RoadSegment1Ireland), NAMED('Count_1Ireland'));\\n\\n
On Alpha_Dev_Thor this was W20150416-095648. I do this join with all the other Pipelines and everything appears fine. It is only on this join that I get these absurd results. \\n\\nThanks for any assistance.\", \"post_time\": \"2015-04-16 14:12:07\" },\n\t{ \"post_id\": 7565, \"topic_id\": 1707, \"forum_id\": 10, \"post_subject\": \"Re: Producing fewer rows from a Normalize\", \"username\": \"rtaylor\", \"post_text\": \"Don,\\n\\nSo, about twice as fast, eh? \\n\\nRichard\", \"post_time\": \"2015-05-08 15:09:45\" },\n\t{ \"post_id\": 7564, \"topic_id\": 1707, \"forum_id\": 10, \"post_subject\": \"Re: Producing fewer rows from a Normalize\", \"username\": \"georgeb2d\", \"post_text\": \"When I got back to this the time to perform this for NY changed from 7 minutes to under 4 minutes. \\n\\nThanks for your solution.\\n\\nDon\", \"post_time\": \"2015-05-08 14:41:23\" },\n\t{ \"post_id\": 7498, \"topic_id\": 1707, \"forum_id\": 10, \"post_subject\": \"Re: Producing fewer rows from a Normalize\", \"username\": \"georgeb2d\", \"post_text\": \"You were correct in your assumptions. I will try it. \\n\\nThanks,\\nDon\", \"post_time\": \"2015-04-30 15:55:41\" },\n\t{ \"post_id\": 7496, \"topic_id\": 1707, \"forum_id\": 10, \"post_subject\": \"Re: Producing fewer rows from a Normalize\", \"username\": \"rtaylor\", \"post_text\": \"Don,
I normalize this with a transformation into each set of longitude, latitude, with the associated road segment id, and calculate a grid number based on the longitude and latitude.
\\nYour code looks like the only reason you need the lat/long at this point is to calculate the grid number, correct?\\nSo the resultant row in the file has a latitude, longitude, road Segment Id and Grid number. This has around 140 million rows.
\\nSo if my previous assumption is correct, then the the purpose of this is to associate a a road segmentID with each grid number, correct? And it sounds like each set of lat/longs will produce a number of duplicate grid numbers, but the whole set will produce one or more grid numbers per road segment ID, correct?\\nThen the next step of the process sorts by grid number and road segment id. \\n\\nThen I dedup to get only one road segment id per grid number. I do not need the latitude and longitude. THis produces around 13 million rows.
So you're just keeping one of the lat/longs that produce a single grid number? Does it matter which one? So the reason you start with 11 million road segment ID records and it expands to \\n13 million is the fact that you can calculate multiple grid numbers from the set of lat/longs associated with a single road segment.\\n\\nSo I would approach this by just calculating all the grid numbers for each lat/long, keeping the road segment ID, to produce your 140 million row result with this structure:GridRec := RECORD\\n\\tINTEGER Grid_number := 0;\\n\\tReal Longitude;\\n\\tReal Latitude;\\n\\tX.Layouts.RoadSegmentsCategoryLayout.RoadSegId;\\nEND;
That should be faster than carrying around all those other fields. Then sort/dedup that to get the ones to keep, then just do an inner JOIN against the initial file to produce your FinalResult records.\\n \\nHTH,\\n\\nRichard\", \"post_time\": \"2015-04-30 15:41:03\" },\n\t{ \"post_id\": 7493, \"topic_id\": 1707, \"forum_id\": 10, \"post_subject\": \"Producing fewer rows from a Normalize\", \"username\": \"georgeb2d\", \"post_text\": \"I have a dataset that has a string with a long list of longitude, latitude points, then some other fields with an associated road segment id. This has around 11 million rows. \\n\\nThe string looks like Longitude Latitude, Longitude Latitude, Longitude latitude, etc.\\n \\nI normalize this with a transformation into each set of longitude, latitude, with the associated road segment id, and calculate a grid number based on the longitude and latitude. \\n\\n\\nSo the resultant row in the file has a latitude, longitude, road Segment Id and Grid number. This has around 140 million rows. \\n\\nThen the next step of the process sorts by grid number and road segment id. \\n\\nThen I dedup to get only one road segment id per grid number. I do not need the latitude and longitude. THis produces around 13 million rows. \\n\\n\\nIs there a way that I could: \\n\\nIn the normalize produce just one row per grid number per road segment id.\\n\\n Or at the very least, \\nwhen there are two rows side by side with the same roadsegment id and grid number, \\nJust write out one of them? \\n \\nHere is a code snippet:\\nSHARED FinalRec := RECORD\\n INTEGER Grid_number := 0;\\n Real Longitude;\\n Real Latitude;\\n X.Layouts.RoadSegmentsCategoryLayout.RoadSegId;\\n X.Layouts.RoadSegmentsCategoryLayout.BoundaryId;\\n X.Layouts.RoadSegmentsCategoryLayout.RoadSpeedlimit;\\n INTEGER RoadCategory := 0;\\n X.Layouts.RoadSegmentsCategoryLayout.RoadTypeId;\\n X.Layouts.RoadSegmentsCategoryLayout.Validated;\\t\\n X.Layouts.RoadSegmentsCategoryLayout.WKT;\\n // STRING GEOM_EPSG ; \\nEND; \\n\\n// Note these were for NY\\n// REAL MIN_Longitude := -79.8227;\\n// REAL MIN_latitude := 40.4984;\\n\\n// For Continental USA\\nREAL MIN_Longitude := -124.7581; // -124.758018 \\nREAL MIN_latitude := 24.523; //24.524334\\nINTEGER2 Number_Grids_per_Longitude := 237; // From EXCEL ROUNDUP((Max_lat - Min_Lat)/0.105) \\n\\n// Produce the next record with Latitude and Longitude\\nFinalRec XF1(ContinentalUSARoadCategoryXYSets_DS Le, INTEGER C) := TRANSFORM\\n \\n lineSet := lib_stringlib.stringlib.splitwords(Le.XYSets,',',TRUE);\\n\\t\\tSplit_Lat_Long := lib_stringlib.stringlib.splitwords(lineset[C],' ',TRUE);\\n\\t\\tSELF.Longitude := IF (Split_lat_long[1] = '',(REAL)Split_lat_long[2],(REAL)Split_lat_long[1]);\\n SELF.Latitude := IF (Split_lat_long[1] = '',(REAL)Split_lat_long[3],(REAL)Split_lat_long[2]);\\n\\t\\tSELF.Grid_Number := (INTEGER)((SELF.Longitude-min_longitude)/.025)*(Number_grids_per_longitude)\\n\\t\\t\\t +1 +(INTEGER)((SELF.Latitude-min_latitude)/.105 ) ;\\n\\t\\tSELF := LE;\\nEND;\\n\\nSHARED ContinentalUSARoadCategoryGrid_DS:= NORMALIZE(\\tContinentalUSARoadCategoryXYSets_DS\\n , COUNT(\\tlib_stringlib.stringlib.splitwords(LEFT.XYSets,',',TRUE)), XF1(LEFT,COUNTER));\\n
\\nThanks,\\nDon\", \"post_time\": \"2015-04-30 13:32:34\" },\n\t{ \"post_id\": 7567, \"topic_id\": 1723, \"forum_id\": 10, \"post_subject\": \"Re: Transforming and Adding Rows in a Dataset\", \"username\": \"rtaylor\", \"post_text\": \"Don,I guess I need to have a transformation call the function Macro?
Pretty much. That's the first thing I would try.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-05-08 18:50:33\" },\n\t{ \"post_id\": 7566, \"topic_id\": 1723, \"forum_id\": 10, \"post_subject\": \"Re: Transforming and Adding Rows in a Dataset\", \"username\": \"georgeb2d\", \"post_text\": \"First attemepted a FunctionMacro. The problem is that it does only one record in the dataset.\\n\\nSo did a Normalize and that worked.\\n\\nGRID_MORE_LAYOUT DoNorm(AddedRecs Le, INTEGER C) := TRANSFORM\\n SELF.Grid_number := CHOOSE(C,Le.Grid_number + 6474, Le.Grid_number + 6475, Le.Grid_number -2, LE.Grid_number -1, LE.Grid_number );\\n\\t\\tMultiplier := (Le.Row_number -1 ) * 100;\\n\\t\\tSELF.Row_number := Multiplier + C;\\n\\t\\tSELF.Latitude := 0;\\n\\t\\tSELF.Longitude := 0;\\n\\t\\tSELF.Difference_Previous :=0;\\n\\t\\tSELF := Le;\\n\\t\\t\\nEND;\\t\\t\\n\\nNewRecs := NORMALIZE(AddedRecs(Difference_Previous = -6473),4,DoNorm(LEFT,COUNTER));\\nOUTPUT(newRecs);\\n
\", \"post_time\": \"2015-05-08 18:28:56\" },\n\t{ \"post_id\": 7563, \"topic_id\": 1723, \"forum_id\": 10, \"post_subject\": \"Transforming and Adding Rows in a Dataset\", \"username\": \"georgeb2d\", \"post_text\": \"Suppose we have a set of x,y co-ordinates that describe a road. If the points are connected we have a line string. \\nThen I superimpose a grid over the points. I produce a grid number based on the x and y co-ordinates.\\n\\nSo my dataset consists of a row_number, grid_number, the road_id, longitude, latitude, difference_previous.\\n\\nThe difference_previous is the difference between the current grid_number for that row_number \\nand the pervious grid_number at the previous row_number.\\n\\nHere is the problem: I want to maintain the same grid size, but do not want to skip a grid. In other words, the line between two \\nx,y co-ordinates may skip a grid. If it does so I want to create at least one new row based on that. \\n\\nFor illustration here are two rows:\\n\\nGRID_MORE_LAYOUT := RECORD\\n INTEGER8 Row_number;\\n INTEGER8 Grid_number;\\n INTEGER8 Road_ID;\\n INTEGER8 Boundary_id;\\n REAL longitude;\\n REAL latitude;\\n INTEGER8 Difference_Previous ;\\nEND; \\n
\\n30\\t23085203\\t265542\\t902\\t-73.762041\\t41.777891\\t0\\n31\\t23078730\\t265542\\t902\\t-73.762466\\t41.778733\\t-6473\\n\\nAssuming there are 6475 grids per column.\\n\\nTo picture this Grid:\\n\\nx1\\tx4\\tx7\\nx2\\tx5\\tx8\\nx3\\tx6\\tx9\\n\\nAssuming x5 is our target grid the Grid numbers for these would be:\\n\\nx5-6476\\tx5-1\\tx5+6474\\nx5-6475\\tx5\\tx5+6475\\nx5-6474\\tx5+1\\tx5+6476\\n\\nSo point 30 is in Grid x5 and point 31 is in Grid x5-6473. \\n\\nOr using the grid numbers from above, points 30 and 31 would be illustrated by this grid:\\n23078727\\t23085202\\t23091677\\n23078728\\t23085203\\t23091678\\n23078729\\t23085204\\t23091679\\n23078730\\t \\t\\n\\n\\nAssuming the point to point is always a straight line that meant the line between these two points either went\\nthrough x5-6475(2307828) and x5-6474(2307829) to get to Grid x5-6473(2307830) OR went through x5+1(23085204) \\nand x5+2(23085204) to get to Grid x5-6473, or a combination of these depending upon where in the grids the point 30 and point 31 are.\\n\\nMy goal is to change the dataset so it would be:\\n30\\t23085203\\t265542\\t902\\t-73.762041\\t41.777891\\t0\\n3001\\t23085204\\t265542\\t902\\t 0\\n3002\\t23085205\\t265542\\t902\\t 0\\n3003\\t23078728\\t265542\\t902\\t 0\\n3004\\t23078729\\t265542\\t902\\t 0\\n31\\t23078730\\t265542\\t902\\t-73.762466\\t41.778733\\t1\\nNote: I could not get the format to put the 0s in 3001 to 3004 in the last column. The previous two columns would be blank.\\n\\nI know that whenever the grid_difference is -6473 I need to do something like this. The question is, how do I do this? \\nAnother code snippet:\\nGRID_MORE_LAYOUT AddThem(NYSmallerGrid_DS L, NYSmallerGrid_DS R) := TRANSFORM\\n Diff := R.Grid_number - L.Grid_number;\\t\\n\\tDiff2 := IF(L.Row_number = 0,0,Diff);\\n\\tSELF.Difference_Previous := Diff2;\\n\\tSELF := R;\\nEND;\\n\\nAddedRecs := ITERATE(NYSmallerGrid_DS,AddThem(LEFT,RIGHT));\\n
\\nOn Alpha_DEV_Thor the workunit is W20150508-084530.\", \"post_time\": \"2015-05-08 14:22:44\" },\n\t{ \"post_id\": 7594, \"topic_id\": 1727, \"forum_id\": 10, \"post_subject\": \"Re: iPropertyTree:Ambiguous xpath used getProp error\", \"username\": \"bforeman\", \"post_text\": \"Hi Gayathri,\\n\\nI guess since this is your local HPCC VM you could simply delete the image and reinstall the 5.2 VM. The only inconvenience is that you would need to respray some files, but since your source code is local you would not lose any work.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-05-13 12:58:48\" },\n\t{ \"post_id\": 7591, \"topic_id\": 1727, \"forum_id\": 10, \"post_subject\": \"Re: iPropertyTree:Ambiguous xpath used getProp error\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"I would delete if I could \\n\\nNothing is listed when I get to the ECL Watch page for DFU Work units. All I see is this ambiguous xpath error. I guess this error is being thrown when it is trying to fetch the list of work units. \\n\\nGayathri\", \"post_time\": \"2015-05-12 19:47:59\" },\n\t{ \"post_id\": 7590, \"topic_id\": 1727, \"forum_id\": 10, \"post_subject\": \"Re: iPropertyTree:Ambiguous xpath used getProp error\", \"username\": \"bforeman\", \"post_text\": \"Try deleting the bad DFU workunit(s), and then explicitly STOP, and then START the VM again. Or for that matter, you can delete them all
It won't affect what you jave already sprayed.\\n\\nBob\", \"post_time\": \"2015-05-12 19:30:13\" },\n\t{ \"post_id\": 7589, \"topic_id\": 1727, \"forum_id\": 10, \"post_subject\": \"Re: iPropertyTree:Ambiguous xpath used getProp error\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"Bob,\\n\\nI get this error even after I restart the VM. I am unable to list all DFU work units. Error comes up as soon as I get to that page. \\n\\nI am able to spray files through ECL watch. I was able to spray another file through dfuplus as well. Problem seems to be with this file - I'll check on that. But, how can I get rid of this error from my ECL Watch? \\n\\nGayathri\", \"post_time\": \"2015-05-12 19:22:53\" },\n\t{ \"post_id\": 7588, \"topic_id\": 1727, \"forum_id\": 10, \"post_subject\": \"Re: iPropertyTree:Ambiguous xpath used getProp error\", \"username\": \"bforeman\", \"post_text\": \"Can you just stop and restart your VM?\\nYou can also try setting the DFU Workunit to failed, and then try spraying again.\\n\\nCan you spray other files, or is this just isolated to this single file?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-05-12 17:54:38\" },\n\t{ \"post_id\": 7586, \"topic_id\": 1727, \"forum_id\": 10, \"post_subject\": \"iPropertyTree:Ambiguous xpath used getProp error\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"I am using a 5.2 VM and I was trying to spray a fixed length file from command line. It is a 17MB file but seemed to take a very long time to spray. So, after giving it more than 5 min, I aborted the DFU workunit from ECL Watch and submitted the command again. This time too, I kept getting the 0% done again and again for sometime. \\n\\nIf I go to ECL watch to check the DFU workunit status, I get the following error:\\n\\nFileSpray.GetDFUWorkunits iPropertyTree: Ambiguous xpath used getProp: ambiguous xpath "Progress"\\n\\nThis comes repeatedly but nothing happens. If I try submitting the file spray command again, it goes into queue mode. I restarted the VM too but it didnt help. \\n\\nIs there a way to get out of this error and resume normal processing?\\n\\nGayathri\", \"post_time\": \"2015-05-12 17:29:38\" },\n\t{ \"post_id\": 7636, \"topic_id\": 1737, \"forum_id\": 10, \"post_subject\": \"Re: Embedded MySQL\", \"username\": \"rtaylor\", \"post_text\": \"Alex,
The table in question has about 900 million records in it
Given that much data I would not try to stream it. I would do a data dump from MySql and then just spray that dump into HPCC to start working with it.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-05-21 19:56:18\" },\n\t{ \"post_id\": 7631, \"topic_id\": 1737, \"forum_id\": 10, \"post_subject\": \"Embedded MySQL\", \"username\": \"alex\", \"post_text\": \"I am trying to read a table from a MySQL database into an ECL dataset:\\n\\n\\nimport mysql;\\n\\nLayout := RECORD\\n //fields\\nEND;\\n\\ndataset(Layout) testMySQL := EMBED(mysql : server(myServer), user(myUser), database(myDb))\\n select * from sqlTable;\\nENDEMBED;\\n\\nds := testMySQL();\\nds_dist := DISTRIBUTE(ds, HASH64(idfield));\\noutput(ds_dist, , '~path::to::file', thor);\\n
\\n\\nIf I run this from hthor, the workunit goes for several minutes but eventually fails, with the unhelpful message:\\nError: Error: 0: mysql: (0,0) 0\\n\\nIf I run this from the thor cluster, the workunit eventually fails with a different message:\\nError: System error: 0: Graph[1], streamediterator[2]: SLAVE #1May be omitted if all return values are actions (the default would then be no action), or all return values are record sets (the default would then be an empty record set).
Therefore, I presume the compiler is treating your indexes as datasets, because of that and giving you the error. Try adding a default elsevalue INDEX and see if that cures the error.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-05-21 20:24:35\" },\n\t{ \"post_id\": 7634, \"topic_id\": 1738, \"forum_id\": 10, \"post_subject\": \"Choosing an IDX on basis of Map\", \"username\": \"georgeb2d\", \"post_text\": \"Hello,\\n\\nI am choosing between six indexes based on a string that tells me which index to use. \\n\\nFor example if I am calculating something in the US I want to calculate based on a US Index. If I am in the UK I want something based on a UK index. I am trying to use MAP for this but when I get to the join it says:\\n\\tRight dataset (IF) for a keyed join is not a key\\n\\nHere are code snippets:\\n\\nFrom Files:\\n\\tEXPORT ROAD_CONTINENTAL_USA_IDX := INDEX({Layouts.BLOB_GRID_ROAD_SEGMENTS_CATEGORY_IDX_LAYOUT.Grid_number},\\nRECORDOF(Layouts.BLOB_GRID_ROAD_SEGMENTS_CATEGORY_IDX_LAYOUT)\\n,FILE_KEY_GRID_ROAD_CONTINENTAL_USA_SF) ;\\t\\t\\t\\t\\t\\t\\t\\t \\n
\\n\\nFrom ECl Attribute:\\nGrid_IDX := MAP(Which_Grid = 'USA' => X.FILES.ROAD_CONTINENTAL_USA_IDX ,\\n\\t\\tWhich_Grid = 'UK_IRELAND' => X.FILES.ROAD_UK_IRELAND_IDX ,\\n\\t\\tWhich_Grid = 'HAWAII' => X.FILES.ROAD_HAWAII_IDX \\n\\t\\t\\t\\t\\t\\t\\t\\t);\\n...\\n DistanceToRoad := JOIN( Pulse_DS, Grid_IDX, \\n\\tKEYED(LEFT.Grid_Number = RIGHT.Grid_Number, ), CalcDistance(LEFT,RIGHT));\\t \\n \\n
\\n\\nInstead of writing six different attributes I want to be able to just write the one attribute. I am not stuck on MAP but need a way to choose the six different indexes. Thanks for any assistance.\\n\\nON Alpha_DEV_Thor this is W20150521-135612.\", \"post_time\": \"2015-05-21 18:27:38\" },\n\t{ \"post_id\": 7675, \"topic_id\": 1742, \"forum_id\": 10, \"post_subject\": \"Re: Best way of using an Index\", \"username\": \"bforeman\", \"post_text\": \"Hi Jerry,\\n\\nAbsolutely no difference! \\n\\nROXIE is only better, as it is designed to read INDEX files, and the IMPORT and EXPORT you mention is irrelevant, since all ROXIE queries are pre-compiled. That's what makes this platform great IMO.\\n\\nIf you haven't had a chance yet, check out the online ROXIE classes. They demonstrate a lot of good techniques and best practices as well.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-05-28 11:50:11\" },\n\t{ \"post_id\": 7672, \"topic_id\": 1742, \"forum_id\": 10, \"post_subject\": \"Re: Best way of using an Index\", \"username\": \"Jerry\", \"post_text\": \"Hi Bob,\\n\\nFrom your example it was clear how to reuse the Index in THOR.\\n\\nI would also assume that in ROXIE there is no difference in performance between the two approaches,i.e IMPORT & EXPORT and declaring a previously defined Index. \\n\\nThanks a lot for your help.\\n\\nRegards,\\nJerry\", \"post_time\": \"2015-05-28 06:28:08\" },\n\t{ \"post_id\": 7653, \"topic_id\": 1742, \"forum_id\": 10, \"post_subject\": \"Re: Best way of using an Index\", \"username\": \"bforeman\", \"post_text\": \"Hi Jerry,\\n\\nIf you are talking about reusing the INDEX in other workunits in THOR, I would define the INDEX first using the INDEX statement, EXPORT that definition, and then BUILD the EXPORTed INDEX definition. \\n\\nHere is one technique and best practice that we show in class:\\n\\nIMPORT STD;\\nUpperIt(STRING txt) := Std.Str.ToUpperCase(txt);\\n\\nEXPORT File_Persons := MODULE\\n EXPORT Layout := RECORD\\n UNSIGNED8 ID;\\n STRING15 FirstName;\\n STRING25 LastName;\\n STRING15 MiddleName;\\n STRING2 NameSuffix;\\n STRING8 FileDate;\\n UNSIGNED2 BureauCode;\\n STRING1 MaritalStatus;\\n STRING1 Gender;\\n UNSIGNED1 DependentCount;\\n STRING8 BirthDate;\\n STRING42 StreetAddress;\\n STRING20 City;\\n STRING2 State;\\n STRING5 ZipCode;\\n END;\\n\\n EXPORT File := DATASET('~CLASS::BMF::Intro::Persons',Layout,FLAT); \\n EXPORT IDX_IDpay := INDEX(File,{ID},{File},'~CLASS::BMF::KEY::Persons::IDpay'); \\n EXPORT IDX_LFname := INDEX(File,{Lastname,Firstname},{ID},'~CLASS::BMF::KEY::Persons::LFname');\\n EXPORT IDX_LFnameU := INDEX(File,{STRING25 Lastname := UpperIt(Lastname),STRING15 Firstname := UpperIt(Firstname)},{ID},'~CLASS::BMF::KEY::Persons::LFnameU');\\n \\n EXPORT BLD_IDX_IDpay := BUILD(IDX_IDpay,OVERWRITE);\\n EXPORT BLD_IDX_LFname := BUILD(IDX_LFname,OVERWRITE);\\n EXPORT BLD_IDX_LFnameU := BUILD(IDX_LFnameU,OVERWRITE);\\n EXPORT BLD_ALL := PARALLEL(BLD_IDX_IDpay,BLD_IDX_LFname,BLD_IDX_LFnameU);\\t\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\nEND;
\\n\\nWe wrap all of the logic inside of a MODULE, which allow multiple EXPORT ststements in one file. There are 3 payload INDEXes defined, and then EXPORTed actions that can build one or all of them as needed.\\n\\nFor example, in another file:\\n\\nIMPORT Myfoldername;\\nMyfoldername.File_Persons.BLD_ALL;
\\n\\nHope this helps,\\n\\nBob\", \"post_time\": \"2015-05-26 12:27:55\" },\n\t{ \"post_id\": 7650, \"topic_id\": 1742, \"forum_id\": 10, \"post_subject\": \"Best way of using an Index\", \"username\": \"Jerry\", \"post_text\": \"Hi,\\n\\nOut of the two options given below, which is the better way to access an Index attribute in Roxie and why?\\n\\n1. Create the Index in THOR;EXPORT the Index attribute; Use the Index after IMPORTing\\n2. Create the Index in THOR;Use INDEX() to declare and access the previously created Index.\\n\\nRegards,\\nJerry\", \"post_time\": \"2015-05-25 10:56:54\" },\n\t{ \"post_id\": 7669, \"topic_id\": 1747, \"forum_id\": 10, \"post_subject\": \"Re: Memory Pool Exhausted\", \"username\": \"David Dasher\", \"post_text\": \"My hero, thanks.\\n\\nDavid\", \"post_time\": \"2015-05-27 14:46:06\" },\n\t{ \"post_id\": 7668, \"topic_id\": 1747, \"forum_id\": 10, \"post_subject\": \"Re: Memory Pool Exhausted\", \"username\": \"rtaylor\", \"post_text\": \"David, Looking at the graphs I can't see an id for 188
When you're looking at the graphs you'll see a number in the upper-left corner of each subgraph -- that's the activity number of the sub-graph -- let's say it's 10. If you then click on the first activity in that subgraph then look at the Debug toolbox you should then see a _GlobalID field that will have the number 11. That's where you should find your Lookup Join 188. \\n\\nJust look for the sub-graph with the largest number < 188 and then click the Lookup Join activity in that sub-graph and it should be 188.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-05-27 14:32:19\" },\n\t{ \"post_id\": 7667, \"topic_id\": 1747, \"forum_id\": 10, \"post_subject\": \"Memory Pool Exhausted\", \"username\": \"David Dasher\", \"post_text\": \"Hi\\n\\nI'm having an issue with a Roxie query where I'm getting Memory Pool Exhausted. I'm pretty sure it's caused by something I have done or am doing inefficiently. The error (Below) refers to a specific join in this case 188. How do I find the specific join in Thor/ HThor? Looking at the graphs I can't see an id for 188......\\n\\n<queryengine2Response xmlns="urn:hpccsystems:ecl:queryengine2">\\n<Result>\\n<Exception>\\n<Source>Roxie</Source>\\n<Code>1301</Code>\\n<Message>\\nMemory pool exhausted: pool (4096 pages) exhausted, requested 1 (in Lookup Join 188)\\n</Message>\\n</Exception>\\n</Result>\\n</queryengine2Response>
\\n\\nAny help would be appreciated.\\n\\nThanks\\n\\nDavid\", \"post_time\": \"2015-05-27 14:22:45\" },\n\t{ \"post_id\": 7685, \"topic_id\": 1749, \"forum_id\": 10, \"post_subject\": \"Re: Getting a layout of a logical file or listing of logical\", \"username\": \"rtaylor\", \"post_text\": \"Log into your environment from a different machine. If it works there, then it's a Help Desk problem with your box.\", \"post_time\": \"2015-05-28 15:35:50\" },\n\t{ \"post_id\": 7684, \"topic_id\": 1749, \"forum_id\": 10, \"post_subject\": \"Re: Getting a layout of a logical file or listing of logical\", \"username\": \"georgeb2d\", \"post_text\": \"The file listing worked fine. Thanks.\", \"post_time\": \"2015-05-28 15:34:06\" },\n\t{ \"post_id\": 7683, \"topic_id\": 1749, \"forum_id\": 10, \"post_subject\": \"Re: Getting a layout of a logical file or listing of logical\", \"username\": \"georgeb2d\", \"post_text\": \"I have been talking with Operations since 6 PM yesterday. They are stumped.\", \"post_time\": \"2015-05-28 15:33:00\" },\n\t{ \"post_id\": 7682, \"topic_id\": 1749, \"forum_id\": 10, \"post_subject\": \"Re: Getting a layout of a logical file or listing of logical\", \"username\": \"rtaylor\", \"post_text\": \" If my ECL watch was working, I would just use that.
You need to talk to Operations to figure out what your ECL Watch problem is.\", \"post_time\": \"2015-05-28 15:31:30\" },\n\t{ \"post_id\": 7681, \"topic_id\": 1749, \"forum_id\": 10, \"post_subject\": \"Re: Getting a layout of a logical file or listing of logical\", \"username\": \"georgeb2d\", \"post_text\": \"I tried this:\\n#EXPORTXML(Fred,'~thor::key::roadlink::continental_usa');\\n\\n Error: DATASET or TABLE expression expected (3, 16), 2076, \\n\\nI do not have a dataset, only a logical file name. If my ECL watch was working, I would just use that.\", \"post_time\": \"2015-05-28 15:19:11\" },\n\t{ \"post_id\": 7680, \"topic_id\": 1749, \"forum_id\": 10, \"post_subject\": \"Re: Getting a layout of a logical file or listing of logical\", \"username\": \"rtaylor\", \"post_text\": \"georgeb2d,So I am wondering if there is a way in ECL to get a layout of a file.
Yes. #EXPORTXML will do that, assuming the DFU has the structure in its metadata about the file (IOW, if the file was created on HPCC and not a file that was just sprayed).\\nAnother related question is there a way to get a listing of logical files?
The STD.File.LogicalFileList() standard library function does that.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-05-28 15:08:27\" },\n\t{ \"post_id\": 7679, \"topic_id\": 1749, \"forum_id\": 10, \"post_subject\": \"Getting a layout of a logical file or listing of logical fil\", \"username\": \"georgeb2d\", \"post_text\": \"My ECL watch is not working with my logical files. \\n\\nSo I am wondering if there is a way in ECL to get a layout of a file. \\n\\nAnother related question is there a way to get a listing of logical files?\", \"post_time\": \"2015-05-28 15:04:04\" },\n\t{ \"post_id\": 7687, \"topic_id\": 1750, \"forum_id\": 10, \"post_subject\": \"Re: Parallel and Sequential\", \"username\": \"rtaylor\", \"post_text\": \"First off, your code becomes much more readable structured this way:S1 := SEQUENTIAL(build_Hawaii_idx,Hawaii_IDX_To_Built,mv_Key_Hawaii);\\nS2 := SEQUENTIAL(build_UK_IRELAND_idx,UK_IRELAND_IDX_To_Built,mv_Key_UK_IRELAND);\\nP1 := PARALLEL(S1,S2,build_Continental_USA_IDX )\\nbuildKeys := SEQUENTIAL(P1,Continental_USA_IDX_to_built, mv_Key_Continental_USA);\\n
My understanding of this is that it should do the two Sequential builds at the same time as it does build_Continental_USA_IDX
You are misunderstanding the meaning of PARALLEL, which does not force parallel execution but only allows it -- it is still up to the compiler whether parallel execution actually happens.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-05-28 17:49:02\" },\n\t{ \"post_id\": 7686, \"topic_id\": 1750, \"forum_id\": 10, \"post_subject\": \"Parallel and Sequential\", \"username\": \"georgeb2d\", \"post_text\": \"Possibly I am not understanding the definitions of these two commands.\\n\\nIn my code I have:\\nbuildKeys := SEQUENTIAL(PARALLEL(SEQUENTIAL(build_Hawaii_idx,Hawaii_IDX_To_Built,mv_Key_Hawaii)\\n\\t ,SEQUENTIAL( build_UK_IRELAND_idx,UK_IRELAND_IDX_To_Built,mv_Key_UK_IRELAND)\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t, build_Continental_USA_IDX )\\n\\t , Continental_USA_IDX_to_built\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t, mv_Key_Continental_USA); \\n
\\nMy understanding of this is that it should do the two Sequential builds at the same time as it does build_Continental_USA_IDX.\\nHowever, in watching the graphs it appeared to do the Hawaii set, then the UK set, and then began the build_Continental_USA_IDX. (This is the order of the size of the indexes, by the way)\\nOn Alpha_DEV_THOR the WorkUnit was W20150527-115409.\\n\\nThree graphs were created by the Work UNIT. Graph 1 is the Hawaii build, Graph 2 is the UK build, and Graph 3 is the Continental USA build. \\nGraph 1 started at 12:00 and completed at 12:02:35.\\nGraph 2 started at 12:02:37 and completed at 12:19:06.\\nGraph 3 started at 12:22:03 and completed at 03:51.\\n\\nI knew the Continental USA build would take the longest amount of time so I wanted that one to run while the other two did as well. This does not appear parallel at all. The indexes pull from the same dataset to start with, but then run very separately, i.e., only Hawaii data, only UK data, only Continental USA data. At a later point in development I will be building 12 indexes, and want them to run at the same time, rather than one after another. \\n\\nWhat is the best option for me to do this the quickest possible?\", \"post_time\": \"2015-05-28 17:35:40\" },\n\t{ \"post_id\": 7694, \"topic_id\": 1751, \"forum_id\": 10, \"post_subject\": \"Re: Returning an index and outputting a dataset to a file\", \"username\": \"georgeb2d\", \"post_text\": \"Thanks for clarifying that. I will return the dataset. I did not realize the entire function is called every time the Index is used. That is not good. That means the entire dataset would be rebuilt -- which for the Continental USA is 3 hours worth. \\n\\nHere is a code snippet:\\n\\n\\tHawaii_Complete_IDX := X.fn_key_Road_Segments(X.Constants().isHawaiiGrid, X.build_Hawaii);\\n\\tRoxieKeybuild.Mac_SK_BuildProcess_v2_Local(Hawaii_Complete_IDX , X.Files.FILE_KEY_GRID_ROAD_HAWAII , X.FILES.KEY_ROADLINK_PREFIX_NAME + '::' + sbuild_date + '::' + X.FILES.SUFFIX_NAME_GRID_HAWAII , build_Hawaii_IDX);\\n RoxieKeyBuild.Mac_SK_Move_to_Built_v2(X.Files.FILE_KEY_GRID_ROAD_HAWAII , X.FILES.KEY_ROADLINK_PREFIX_NAME + '::' + sbuild_date + '::' + X.FILES.SUFFIX_NAME_GRID_HAWAII, Hawaii_IDX_To_Built);\\n RoxieKeybuild.MAC_SK_Move_V2(X.Files.FILE_KEY_GRID_ROAD_HAWAII , 'Q', mv_Key_Hawaii);\\n buildKeys := SEQUENTIAL(build_Hawaii_idx); ,Hawaii_IDX_To_Built,mv_Key_Hawaii);\\n
\\n\\nThanks.\", \"post_time\": \"2015-05-29 15:13:30\" },\n\t{ \"post_id\": 7693, \"topic_id\": 1751, \"forum_id\": 10, \"post_subject\": \"Re: Returning an index and outputting a dataset to a file\", \"username\": \"rtaylor\", \"post_text\": \"Don,\\n\\nWhere is the BUILD for that INDEX? You don't actually have an index until you have built it, so yes you can obviously return the INDEX definition but it does no good without the BUILD. \\n\\nAlso, once you have built it, every time you want to use that INDEX your FUNCTION returns, the rest of the code in the FUNCTION will be called and all the work will be re-done -- unless you have another separate INDEX declaration somewhere that duplicates the INDEX you're returning (which is what you should have anyway and is the reason why it makes no sense to me to return the INDEX from this FUNCTION).\\n\\nBottom line -- although you can make it work I doubt the designers of the language would highlight this as "best practice" code. There are more straight-forward ways of doing what you're trying to do. The "best practice" approach is the principle we try to teach with every forum post we reply to. Therefore you might want to take another look at the code example in my previous reply.\\n\\nRichard\", \"post_time\": \"2015-05-29 13:10:19\" },\n\t{ \"post_id\": 7691, \"topic_id\": 1751, \"forum_id\": 10, \"post_subject\": \"Re: Returning an index and outputting a dataset to a file\", \"username\": \"georgeb2d\", \"post_text\": \"An interesting answer since I figured out how to do it. \\n\\nThere was a successful run on Alpha_DEV_Thor. WU W20150528-174055.\\n\\nHere is the snippet of code in the function:\\n Final_DS := JOIN(ds, dsFinalRecs_DS,\\n LEFT.RoadSegID = RIGHT.RoadSegID AND\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t LEFT.BoundaryID = RIGHT.BoundaryID,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t JoinEm(LEFT,RIGHT));\\n\\n OUTPUT(FINAL_DS,,\\tGRID_SETUP[1].GRID_BASE_FILE, THOR, OVERWRITE);\\n\\t\\t\\n\\t\\tDS_Grid := DATASET(GRID_SETUP[1].GRID_BASE_FILE, X.Layouts.BLOB_GRID_ROAD_SEGMENTS_CATEGORY_LAYOUT, THOR);\\n\\t\\t \\n RETURN INDEX(DS_Grid,\\n\\t\\t {Grid_number},{WKT,RoadSegId,BoundaryId,RoadSpeedlimit,RoadCategory,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tRoadTypeId,Validated}\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t ,Grid_Setup[1].GRID_KEY ) ;\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t \\nend;
\", \"post_time\": \"2015-05-28 21:58:37\" },\n\t{ \"post_id\": 7690, \"topic_id\": 1751, \"forum_id\": 10, \"post_subject\": \"Re: Returning an index and outputting a dataset to a file\", \"username\": \"rtaylor\", \"post_text\": \"Don,I am building a function that returns an index.
An INDEX definition is a declaration of a file on disk, It is not a recordset. Therefore, you cannot RETURN an INDEX from a function.\\n There is elaborate code to build the dataset.
And that is the code that properly belongs in a FUNCTION, like this:fn_dons := FUNCTION\\n//... \\n RETURN JOIN(ds, dsFinalRecs_DS,\\n LEFT.RoadSegID = RIGHT.RoadSegID AND\\n LEFT.BoundaryID = RIGHT.BoundaryID,\\n JoinEm(LEFT,RIGHT));\\nEND;
Then I want to write the data set to a file, and then return the INDEX.
There are several problems with this:\\n
\\nTherefore, your code should end up more like this:fn_dons := FUNCTION\\n//... \\n RETURN JOIN(ds, dsFinalRecs_DS,\\n LEFT.RoadSegID = RIGHT.RoadSegID AND\\n LEFT.BoundaryID = RIGHT.BoundaryID,\\n JoinEm(LEFT,RIGHT));\\nEND;\\n\\nWriteDS := OUTPUT(fn_dons,,GRID_SETUP.GRID_BASE_FILE, OVERWRITE);\\n \\nDS := DATASET(GRID_SETUP.GRID_BASE_FILE,RECORDOF(fn_dons),FLAT); \\n \\nIDX_Final_DS := INDEX(DS,{DS.Grid_number},{DS},Grid_Setup[1].GRID_KEY ) ;\\nBldIDX := BUILD(IDX_Final_DS,OVERWRITE);\\n\\nSEQUENTIAL(WriteDS,BldIDX);
Note the use of SEQUENTIAL here. You have to have that so the OUTPUT completes writing the file to disk before you BUILD the INDEX from that file.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-05-28 20:58:21\" },\n\t{ \"post_id\": 7688, \"topic_id\": 1751, \"forum_id\": 10, \"post_subject\": \"Returning an index and outputting a dataset to a file\", \"username\": \"georgeb2d\", \"post_text\": \"I am building a function that returns an index. There is elaborate code to build the dataset. Then I want to write the data set to a file, and then return the INDEX. As I look at the code, it will skip writing the output. How do I make it write the output and return the INDEX. \\n\\nHere is a code snippet:\\n\\nfn_dons := FUNCTION\\n...\\n Final_DS := JOIN(ds, dsFinalRecs_DS,\\n LEFT.RoadSegID = RIGHT.RoadSegID AND\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t LEFT.BoundaryID = RIGHT.BoundaryID,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t JoinEm(LEFT,RIGHT));\\n\\n Final_File := OUTPUT(FINAL_DS,,\\tGRID_SETUP.GRID_BASE_FILE, THOR, OVERWRITE);\\n\\t\\t\\n\\t\\t\\n\\t\\t \\n RETURN INDEX(Final_DS,\\n {X.Layouts.BLOB_GRID_ROAD_SEGMENTS_CATEGORY_LAYOUT.Grid_number},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tRECORDOF(X.Layouts.BLOB_GRID_ROAD_SEGMENTS_CATEGORY_LAYOUT)\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t ,Grid_Setup[1].GRID_KEY ) ;\\n\\tEND;
\\n\\nI really appreciate the help given on this forum.I hope I am not abusing it.\\nDon\", \"post_time\": \"2015-05-28 19:37:33\" },\n\t{ \"post_id\": 7724, \"topic_id\": 1759, \"forum_id\": 10, \"post_subject\": \"Re: Dynamically declare a column in Table function\", \"username\": \"David Dasher\", \"post_text\": \"Excellent, thanks Bob\\n\\nDavid\", \"post_time\": \"2015-06-03 12:27:00\" },\n\t{ \"post_id\": 7723, \"topic_id\": 1759, \"forum_id\": 10, \"post_subject\": \"Re: Dynamically declare a column in Table function\", \"username\": \"bforeman\", \"post_text\": \"Hi David,\\n\\nI think you are on the right track.\\n\\nUsing a MAP to calculate a dynamic value which is then included in a generic column in your table is definitely achievable with ECL. For example, in one of our training queries we do something similar:\\n\\nHighValue := IF($.IsValidAmount($.DN.Property.Total_value) AND\\n $.IsValidAmount($.DN.Property.Assessed_value),\\n IF($.DN.Property.Total_value > $.DN.Property.Assessed_value,\\n $.DN.Property.Total_value,\\n $.DN.Property.Assessed_value),\\n IF($.IsValidAmount($.DN.Property.Total_value),\\n $.DN.Property.Total_value,\\n $.DN.Property.Assessed_value));
\\n\\nThis example uses some nested IF functions, but a MAP could also be used in this context. Then the value of HighValue could be added to any TABLE as needed.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-06-03 12:21:10\" },\n\t{ \"post_id\": 7721, \"topic_id\": 1759, \"forum_id\": 10, \"post_subject\": \"Dynamically declare a column in Table function\", \"username\": \"David Dasher\", \"post_text\": \"Hello\\n\\nI have a table structure being generated where a column could change. Ideally I'd like to pass the column name as a parameter in a Roxie Query. \\n\\nIn the code below instead of using fa_agecl2 that would be my Roxie Parameter which I could define on the fly. \\n\\nSubLevelAgg := TABLE(RespondentsDescriptionAgg, \\n{s_SurveyDescription, \\nq_Questionl1description,\\nfa_Agecl2,\\nAgecl2_Respondent_count := COUNT(group),\\nAgecl2_PBT_Weighting_SUM := SUM(group, FA_PBT_Weighting)},\\ns_SurveyDescription,q_Questionl1description,fa_agecl2, MERGE);
\\n\\nI could prepare the data initially with a map statement into a generic column. I was just curious if this could work?\\n\\nThanks\\n\\nDavid\", \"post_time\": \"2015-06-03 11:53:48\" },\n\t{ \"post_id\": 7801, \"topic_id\": 1767, \"forum_id\": 10, \"post_subject\": \"Publish query with imports in shared repo using the command \", \"username\": \"bforeman\", \"post_text\": \"Fantastic! Thanks for the feedback Sandra! I will pass this on to the documentation team so the next person who goes down this road will have an easier path \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-06-18 15:08:17\" },\n\t{ \"post_id\": 7800, \"topic_id\": 1767, \"forum_id\": 10, \"post_subject\": \"Re: Publish a query with imports using the command line\", \"username\": \"SandraBuda\", \"post_text\": \"Hi Bob,\\n\\nThank you very much for all your replies. I've managed to publish a query with imports. \\nThese are the steps followed:\\n1. Eclplus command to retrieve a workunit id.\\n
\\neclplus server=<yourip> cluster=<cluster> queue=<queue> user=<yourusername> password=<yourpassword> @roxie_test.ecl\\n
\\n2. Using the workunit id retrieved:\\n\\necl publish --name=test_commandline_imports <workunitid>\\n
\\n\\nAll in one for automation:\\n ecl publish --name=test_commandline_imports $(eclplus server=<yourip> cluster=<cluster> queue=<queue> user=<youtusername> password=<yourpassword> @roxie_test.ecl | grep -o "W[0-9]*-[0-9]*")\\n
\\n\\n(I've set the parameters eclWatchIp, eclUserName, eclPassword and eclWatchPort in ecl.ini in the local folder accordingly for ecl publish. Same can be done for eclplus in eclplus.ini)\\n\\nThanks once again.\\nBest regards,\\nSandra\", \"post_time\": \"2015-06-18 14:56:25\" },\n\t{ \"post_id\": 7795, \"topic_id\": 1767, \"forum_id\": 10, \"post_subject\": \"Re: Publish a query with imports using the command line\", \"username\": \"bforeman\", \"post_text\": \"Sandra,\\n\\nDid you also try:\\n\\necl publish --target=roxie_dev -I=http://10.193.129.1/ --main=roxie_test --activate roxie_test.ecl
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-06-18 13:05:50\" },\n\t{ \"post_id\": 7792, \"topic_id\": 1767, \"forum_id\": 10, \"post_subject\": \"Re: Publish a query with imports using the command line\", \"username\": \"bforeman\", \"post_text\": \"Hi Sandra,\\n\\nI forgot to mention, before you PUBLISH, you need to compile the query.\\n\\nOn the command line, use the following syntax to do this:\\n\\nEclcc –I <root directory-of-sources> <fullpath-of-main-file>
\\n\\nAfter that, the publish option should just be a parallel copy from THOR to your target ROXIE.\\n\\nAre you able to publish this query from the ECL Watch? There may be information there that helps you with the command line publish.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-06-18 12:36:50\" },\n\t{ \"post_id\": 7791, \"topic_id\": 1767, \"forum_id\": 10, \"post_subject\": \"Re: Publish a query with imports using the command line\", \"username\": \"SandraBuda\", \"post_text\": \"Hi Bob,\\n\\nThank you for your reply. Adapting your query to:\\n ecl publish --target=roxie_dev -I=http://10.193.129.1/ --activate -main roxie_test.ecl
\\n\\nreturned the following error for me:\\n[size=85:zul3krgv]eclserver: roxie_test.ecl(0,0): Error C3: Could not resolve attribute roxie_test.ecl\\n1 error(s), 0 warning(s)\\n\\n\\nThe ip used in -I is the Roxie IP pointing to a Linux based system. I'm not sure where to find the repository there.\\n\\nI also tried:\\necl publish --target=roxie_dev -I=http://10.193.129.1/ --main=Test --activate roxie_test.ecl\\n
\\nbut received the following error:\\n[size=85:zul3krgv]roxie_test.ecl(1,29): error C2081: Import names unknown module "Test"\\nroxie_test.ecl(3,13): error C2167: Unknown identifier before "." (expected :=)\\n2 errors, 0 warning\\nError creating archive\\n\\n\\nroxie_test.ecl contains IMPORT Test.Roxie_import; Roxie_import.output_hello_world;\\nand Roxie_import.ecl contains output_hello_world which is a function that outputs hello world.\\n\\nRegards,\\nSandra\", \"post_time\": \"2015-06-18 10:23:19\" },\n\t{ \"post_id\": 7779, \"topic_id\": 1767, \"forum_id\": 10, \"post_subject\": \"Re: Publish a query with imports using the command line\", \"username\": \"bforeman\", \"post_text\": \"Hi Sandra,\\n\\nHere is some more information from a chat I had with the development team:\\n\\nIt depends on the query. If it is self contained I would probably change to the directory. If something more complex I would use –main:\\n\\necl publish --target=roxie --name=TEST_COMMANDLINE --activate –I C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\MyFolder –main roxie_test
\\n\\n(syntax may vary a bit)\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-06-16 17:35:56\" },\n\t{ \"post_id\": 7774, \"topic_id\": 1767, \"forum_id\": 10, \"post_subject\": \"Re: Publish a query with imports using the command line\", \"username\": \"SandraBuda\", \"post_text\": \"Hi Richard,\\n\\nThank you for your clarification. It sounds like a potential solution. Should I use roxie ip? Ssh-ing to that ip reveals a linux based system. Where would I find the repository?\\n\\nIs there no other way to publish a query (that also has imports) on a roxie server from the command line?\\n\\nBest regards,\\nSandra\", \"post_time\": \"2015-06-16 13:35:45\" },\n\t{ \"post_id\": 7770, \"topic_id\": 1767, \"forum_id\": 10, \"post_subject\": \"Re: Publish a query with imports using the command line\", \"username\": \"rtaylor\", \"post_text\": \"Sandra,\\n\\nIf you can map a drive to that box then Bob's first example should work by naming the explicit path to the file in the code repository you want to run.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-06-16 13:13:21\" },\n\t{ \"post_id\": 7768, \"topic_id\": 1767, \"forum_id\": 10, \"post_subject\": \"Re: Publish a query with imports using the command line\", \"username\": \"SandraBuda\", \"post_text\": \"Hi Richard,\\n\\nThank you for your reply. I can ssh to that ip...\\nWould that be for finding the repository on the server? It is running linux.\\n\\nRegards,\\nSandra\", \"post_time\": \"2015-06-16 09:30:34\" },\n\t{ \"post_id\": 7762, \"topic_id\": 1767, \"forum_id\": 10, \"post_subject\": \"Re: Publish a query with imports using the command line\", \"username\": \"rtaylor\", \"post_text\": \"Sandra,\\n\\nCan you map a drive to that IP?\\n\\nRichard\", \"post_time\": \"2015-06-15 15:07:53\" },\n\t{ \"post_id\": 7758, \"topic_id\": 1767, \"forum_id\": 10, \"post_subject\": \"Re: Publish a query with imports using the command line\", \"username\": \"SandraBuda\", \"post_text\": \"Hi Bob,\\n\\nThank you very much for your prompt response.\\nI've added clienttools/bin to the PATH and I am running ecl publish in the local directory of the roxie_test.ecl file.\\n\\nHowever, the repository is located on a server and not on my local machine, so unfortunately specifying the relative path from my local machine didn't solve the issue for me.\\n\\nI've set the parameters eclWatchIp, eclUserName, eclPassword and eclWatchPort in ecl.ini in the local folder accordingly, and publishing a hello world did work successfully; however, the imports that aren't being picked up from the repository on that ip...\\n\\nBest regards,\\nSandra\", \"post_time\": \"2015-06-15 13:21:19\" },\n\t{ \"post_id\": 7756, \"topic_id\": 1767, \"forum_id\": 10, \"post_subject\": \"Re: Publish a query with imports using the command line\", \"username\": \"bforeman\", \"post_text\": \"Hi Sandra,\\n\\nDid you try modifying your script to include the relative path?\\n\\nExample:\\necl publish --target=roxie --name=TEST_COMMANDLINE --activate C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\MyFolder\\\\roxie_test.ecl
\\n\\nOr even the other way around might work. Navigate to your actual repository folder and then path back to the ECL executable:\\n\\nC:\\\\Program Files (x86)\\\\HPCCSystems\\\\5.2.2\\\\clienttools\\\\bin\\\\ecl publish --target=roxie --name=TEST_COMMANDLINE --activate roxie_test.ecl
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-06-15 12:37:15\" },\n\t{ \"post_id\": 7755, \"topic_id\": 1767, \"forum_id\": 10, \"post_subject\": \"Publish a query with imports using the command line\", \"username\": \"SandraBuda\", \"post_text\": \"Hi,\\n\\nI would like to know how to solve adding imports when publishing a query using the command line ecl publish.\\nPublishing a query using the command line completed successfully for me for a hello world example with no imports using the following command:\\n\\n ecl publish --target=roxie --name=TEST_COMMANDLINE --activate roxie_test.ecl
\\n\\nwhere roxie_test.ecl contains only the following code: OUTPUT('Hello World!'); and where ecl.ini contains the configuration parameters, i.e., eclWatchIp, eclUserName, eclPassword and eclWatchPort. \\n\\nHowever, when the ecl code contains imports, I did not manage to publish the query running the same command from above due to the following error: error C2081: Import names unknown module
\\n\\nAny help would be very much appreciated.\\nThank you in advance,\\nSandra\", \"post_time\": \"2015-06-15 12:00:41\" },\n\t{ \"post_id\": 7828, \"topic_id\": 1779, \"forum_id\": 10, \"post_subject\": \"Re: Skew limit is not enforced\", \"username\": \"Ramesh Pachamuthu\", \"post_text\": \"Thanks Richard !!\\n\\nRegards,\\nRamesh\", \"post_time\": \"2015-06-25 05:48:19\" },\n\t{ \"post_id\": 7812, \"topic_id\": 1779, \"forum_id\": 10, \"post_subject\": \"Re: Skew limit is not enforced\", \"username\": \"rtaylor\", \"post_text\": \"Ramesh,\\n\\nI just ran this variant of your code (the only real change is I'm generating the dataset):
some_layout := RECORD\\n UNSIGNED1 some_field; \\nEND;\\n\\n//generate a million records on each node with half having the same value (1)\\nin_ds := DATASET(1000000000,\\n TRANSFORM(some_Layout,\\n SELF.some_field := IF(COUNTER % 2 = 0,1,COUNTER % 255 + 1)),\\n LOCAL ) ; \\n\\nout_ds := SORT ( in_ds , some_field , SKEW ( 0.15 ) ) ; \\nOUTPUT(out_ds , , '~RTTEST::OUT::some_out_file' ,overwrite);
\\nI ran this code on a 3-node training cluster using release 5.2.0-1 and my job failed with this error:System error: 10084: Graph[1], sort[3]: SORT failed. Graph[1], sort[3]: Exceeded skew limit: 0.150000, estimated skew: 0.497059
\\nSo your solution is to upgrade to the 5.2 release where this skew problem has already been solved.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-06-22 14:35:08\" },\n\t{ \"post_id\": 7810, \"topic_id\": 1779, \"forum_id\": 10, \"post_subject\": \"Skew limit is not enforced\", \"username\": \"Ramesh Pachamuthu\", \"post_text\": \"Hi,\\n\\nI am trying to sort a dataset of ~1 million rows. During SORT operation data is skewed about +393%,-100%. Using SKEW option with SORT was not enforcing the skew limit. \\nMy question is, why the given skew-limit is not enforced ? \\nWhether HPCC has any internal logic that decides to ignore given skew-limit ?\\n\\nThis is my ECL code:\\n\\nin_ds := DATASET ( '~some_in_file' , some_layout , thor ) ; // data is evenly distributed\\nout_ds := SORT ( in_ds , some_field , SKEW ( 0.15 ) ) ; // actual skew is +393%,-100%\\noutput(out_ds , , '~some_out_file' );\\n
\\nI am using 4.0.2-2 HPCC version.\\n\\nAny help would be very much appreciated. Thanks in advance.\\n\\nRegards,\\nRamesh\", \"post_time\": \"2015-06-22 13:15:32\" },\n\t{ \"post_id\": 7822, \"topic_id\": 1783, \"forum_id\": 10, \"post_subject\": \"Re: Can you write a function that takes in a dataset?\", \"username\": \"rtaylor\", \"post_text\": \"KatyChow,\\n\\nSure you can do that, just like this:IMPORT STD;\\nMyRec := RECORD\\n STRING1 Value1;\\n STRING1 Value2;\\nEND;\\nds1 := DATASET([{'C','G'},{'A','B'}],MyRec);\\nds2 := DATASET([{'D','G'},{'E','B'}],MyRec);\\nds3 := DATASET([{'G','G'},{'J','B'}],MyRec);\\n\\nMyFunc(DATASET(MyRec) D) := FUNCTION\\n MyRec XF(MyRec L) := TRANSFORM\\n SELF.Value1 := STD.Str.ToLowercase(L.Value1);\\n SELF.Value2 := STD.Str.ToLowercase(L.Value2);\\n END;\\n OutDS := PROJECT(D,XF(LEFT));\\n RETURN OutDS;\\nEND;\\n\\nres := MyFunc(ds1) + MyFunc(ds2) + MyFunc(ds3);\\nOUTPUT(res);
\\nPassing a DATASET as a parameter is discussed here: http://hpccsystems.com/download/docs/ecl-language-reference/html/DATASET_as_a_Parameter_Type.html\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-06-24 14:51:54\" },\n\t{ \"post_id\": 7820, \"topic_id\": 1783, \"forum_id\": 10, \"post_subject\": \"Can you write a function that takes in a dataset?\", \"username\": \"KatyChow\", \"post_text\": \"I am trying to do some tabling with 3 datasets that have the same record structure. I want the same results from all three of the tables I planned on creating. I know I can easily copy and paste and get what I want, but I am wondering if I could just write a function that takes in a dataset, tables it, and then outputs the file?\\n\\nThanks!\", \"post_time\": \"2015-06-24 14:21:01\" },\n\t{ \"post_id\": 7869, \"topic_id\": 1794, \"forum_id\": 10, \"post_subject\": \"Re: How to write optimized code in ECL ?\", \"username\": \"rtaylor\", \"post_text\": \"Shub,\\n\\nThe subject of optimizing ECL code is extremely broad and usually problem-specific. Here are some resources you already have to answer this: \\n
\\nThe bottom line here is, the HPCC Systems web portal makes a very large amount of information about HPCC and ECL freely available to you, 24/7, along with providing this forum for asking specific questions and getting help from the entire HPCC community.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-07-01 13:55:58\" },\n\t{ \"post_id\": 7866, \"topic_id\": 1794, \"forum_id\": 10, \"post_subject\": \"How to write optimized code in ECL ?\", \"username\": \"vyasshub\", \"post_text\": \"Hi All,\\nPlease share some steps for writing optimized code in ECL.\\nFor Example :\\nIt is always better to use UNSIGNED4 as data type for "date" in comparision to STRING8.\\nPlease add more.\\n\\nThanks,\\nShub\", \"post_time\": \"2015-07-01 04:21:40\" },\n\t{ \"post_id\": 7876, \"topic_id\": 1796, \"forum_id\": 10, \"post_subject\": \"Re: SuperFile transacation not completing\", \"username\": \"Jerry\", \"post_text\": \"Update :- \\nI ran the same program in HPCC version 5.2.0 and it was executed correctly.\\nHence closing this thread.\", \"post_time\": \"2015-07-06 09:52:09\" },\n\t{ \"post_id\": 7871, \"topic_id\": 1796, \"forum_id\": 10, \"post_subject\": \"SuperFile transacation not completing\", \"username\": \"Jerry\", \"post_text\": \"Hi,\\n\\nI am trying my hand in automated incremental data processing and I am using 4.0.2-2 HPCC version.\\n\\n I have created a SuperFile and a SuperIndex. When the first batch of data came in, my program created a subfile and a corresponding subindex and added them to SuperFile and SuperIndex. I then published my Roxie query and found out that it was working fine. I also created a packagemap and added my subindex and SuperIndex. \\n When the second batch of data came in I expected the same set of events to happen, but found out that \\n a. subfile was created and got added to SuperFile\\n b. subindex was created, but the transaction to add subindex to SuperIndex is not completing\\nI checked the logs and it seems like the transaction has locked the superindex, but is unable to add the new subindex to SuperIndex and then the lock times out. These set of events are then repeated.\\n\\n I then took down my Roxie query and tried to process my data again. This time my data got processed successfully. My aim is to process data without bringing down my published Roxie query. Would someone please tell me what is that I am doing wrong?\\n\\nThanks in advance.\\n\\nJerry\", \"post_time\": \"2015-07-03 15:02:35\" },\n\t{ \"post_id\": 7917, \"topic_id\": 1802, \"forum_id\": 10, \"post_subject\": \"Re: Default packagemap\", \"username\": \"Jerry\", \"post_text\": \"Thanks for the information Bob.\\nDeleting the packagemap instead of deactivating it solved the issue.\\n\\nJerry\", \"post_time\": \"2015-07-15 10:16:38\" },\n\t{ \"post_id\": 7911, \"topic_id\": 1802, \"forum_id\": 10, \"post_subject\": \"Re: Default packagemap\", \"username\": \"bforeman\", \"post_text\": \"AFAIK the package map addresses all queries on a cluster, once you implement a package map, it has to be tied to all active queries on your cluster.\", \"post_time\": \"2015-07-14 17:37:57\" },\n\t{ \"post_id\": 7898, \"topic_id\": 1802, \"forum_id\": 10, \"post_subject\": \"Default packagemap\", \"username\": \"Jerry\", \"post_text\": \"HPCC version - 4.0.2-2 \\n\\nFor implementing automated query publishing, I had created a packagemap.\\nAfter finishing with my project I deactivated the packagemap as I was no longer planning to use that project again. The problem this caused was that I could no longer run other published queries in Roxie. I could see the published queries, but if i try to run, an error 'Unknown query - no package active' is thrown. This doesnt make any sense to me as these queries are not at all related to the packagemap. As a temporary solution I activated my packagemap again. This helped me to run all the simple published queries but not the ones that refer to data that resides on the cluster.\\nHence\\n1. I would like to know how to load the default packagemap instead of the one that i created.\\n\\nThanks in advance\\n\\nJerry\", \"post_time\": \"2015-07-10 14:15:43\" },\n\t{ \"post_id\": 7914, \"topic_id\": 1803, \"forum_id\": 10, \"post_subject\": \"Re: "See the thor graph of a workunit" in eclplus\", \"username\": \"Shilpa\", \"post_text\": \"I gave a graph name to the action and it worked.\\n\\nThanks Jim\\nCheers \", \"post_time\": \"2015-07-15 06:18:19\" },\n\t{ \"post_id\": 7912, \"topic_id\": 1803, \"forum_id\": 10, \"post_subject\": \"Re: "See the thor graph of a workunit" in eclplus\", \"username\": \"JimD\", \"post_text\": \"Upon further investigation, it seems that the action required a graph name to be specified:\\n\\nFor example:\\n{code}\\neclplus server=192.168.56.120 action=graph graph=graph1 wuid=W20150610-113741\\n{code}\\n\\n1) I will be adding this detail to documentation. \\n https://track.hpccsystems.com/browse/HPCC-13904\\n2) The issue which caused the backend process to crash when a graph name was not sent is being fixed so that if graph name is omitted, it will return data for all graphs. . https://track.hpccsystems.com/browse/HPCC-13903\\n\\nI want to thank you again for your question. It helped us improve the software and the documentation. This is how open-source community works. \\n\\nJim\", \"post_time\": \"2015-07-14 17:56:14\" },\n\t{ \"post_id\": 7909, \"topic_id\": 1803, \"forum_id\": 10, \"post_subject\": \"Re: "See the thor graph of a workunit" in eclplus\", \"username\": \"JimD\", \"post_text\": \"I was able to reproduce this, so I entered the issue in Jira:\\nhttps://track.hpccsystems.com/browse/HPCC-13903\\n\\nIn the meantime, you can use ECL Watch to view the graph. (Select the Graph tab from the Workunit details page)\", \"post_time\": \"2015-07-14 13:51:35\" },\n\t{ \"post_id\": 7906, \"topic_id\": 1803, \"forum_id\": 10, \"post_subject\": \""See the thor graph of a workunit" in eclplus\", \"username\": \"Shilpa\", \"post_text\": \"I am exploring eclplus client tool (Version 5.2.0-1).I gave server and cluster details in eclplus.ini. All the options available in action are working fine except "graph".\\n\\nI am running a ECL script like this:-\\neclplus action=graph wuid=W20150714-113228\\nand the command fails with the error "SOAP rpc error".\\n\\nHelp me to resolve this issue?\", \"post_time\": \"2015-07-14 06:55:18\" },\n\t{ \"post_id\": 7978, \"topic_id\": 1828, \"forum_id\": 10, \"post_subject\": \"Re: Latin 9 to Unicode conversion\", \"username\": \"john holt\", \"post_text\": \"The LOCALE on a record definition is not going to help. A locale and a code-page don't really equate.\\n\\nSince you know that your strings are encoded as 8859-15 (Latin-9), you want to say that when you use the TOUNICODE(...) function.\\n\\nI have attached an example below. In this example, I used the TRANSFER(...) function to explicitly treat the string as a sequence of bytes (DATA) because the TOUNICODE(...) function expects a DATA string.\\n\\n
// Selected code points from latin-9 (8859-15)\\n// The 8859-15 set is the same as 8859-1 except for 8 points.\\n// A4, A6, A8, B4, B8, BC, BD, BE, which are : Euro currency, Capital S w/caron\\n// small S w/caron, Capital Z w/caron, small Z w/caron, Capital OE ligature,\\n// small OE ligature, and capital Y w/diaeresis respectively.\\n// Characters replaced are: currency, broken bar, diaeresis, acute accent,\\n// cedilla, 1/4, 1/2, 3/4\\n\\nSTRING test_string := 'Characters of interest are: ' \\n + (STRING8) x'A4A6A8B4B8BCBDBE'\\n + '.';\\nOUTPUT(test_string, NAMED('String_as_default'));\\nUNICODE from_8859_1 := TOUNICODE(TRANSFER(test_string, DATA), '8859-1');\\nOUTPUT(from_8859_1, NAMED('As_unicode_from_8859_1'));\\nUNICODE from_8859_15 := TOUNICODE(TRANSFER(test_string, DATA), '8859-15');\\nOUTPUT(from_8859_15, NAMED('As_unicode_from_8859_15'));
\", \"post_time\": \"2015-08-03 16:04:13\" },\n\t{ \"post_id\": 7964, \"topic_id\": 1828, \"forum_id\": 10, \"post_subject\": \"Re: Latin 9 to Unicode conversion\", \"username\": \"Cologero\", \"post_text\": \"I couldn't get that to work. Defining the field as unicode doesn't change anything. Also, I don't see where Latin 9 is related to any locale. ISO-8859-15 is not a unicode encoding.\", \"post_time\": \"2015-07-30 13:27:44\" },\n\t{ \"post_id\": 7958, \"topic_id\": 1828, \"forum_id\": 10, \"post_subject\": \"Re: Latin 9 to Unicode conversion\", \"username\": \"Cologero\", \"post_text\": \"That is not helpful, since the encoding of the field is ISO-8859-15, not unicode. The locale has no effect.\", \"post_time\": \"2015-07-29 15:04:14\" },\n\t{ \"post_id\": 7957, \"topic_id\": 1828, \"forum_id\": 10, \"post_subject\": \"Re: Latin 9 to Unicode conversion\", \"username\": \"rtaylor\", \"post_text\": \"Colgero,\\n\\nWhy not just use the LOCALE option on the RECORD structure for the file and define the string fields as UNICODE?\\n\\nThe LOCALE option docs says to specify the locale as: A string constant containing a valid locale code, as specified in ISO standards 639 and 3166.
So you should be able to use any locale code where Latin-9 is used.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-07-29 14:44:13\" },\n\t{ \"post_id\": 7955, \"topic_id\": 1828, \"forum_id\": 10, \"post_subject\": \"Latin 9 to Unicode conversion\", \"username\": \"Cologero\", \"post_text\": \"We have an incoming CSV file in Latin 9 (ISO-8859-15) encoding that we need to convert to unicode. Most of the characters convert fine. There are a few that do not: e.g., 'ž'\\n\\nI tried a BEGINC++ structure to convert the string to UTF-8. However, converting the 'ž' (0xb8 in Latin 9) to a two byte wide UTF-8 character doesn't work. I don't understand the internal representation of unicode strings. If I convert 'ž' to DATA, it displays as 0x1A which can't be correct.\\n\\nAny suggestions?\", \"post_time\": \"2015-07-29 14:28:29\" },\n\t{ \"post_id\": 8000, \"topic_id\": 1834, \"forum_id\": 10, \"post_subject\": \"Re: Error 10099 occurring during MACRO call.\", \"username\": \"mrumsey\", \"post_text\": \"I did a small update to the code (my first reply) and was already using the #OPTION, so that change may have fixed my problems and I didn't notice it. \\n\\nI will implement your changes and test them later today. I doubt I will see much on ~1.5 million records and 6 groups, but we have some larger datasets this may be used with. There should be great gains there.\\n\\nThanks again!\\n\\nMatt Rumsey\", \"post_time\": \"2015-08-06 14:42:41\" },\n\t{ \"post_id\": 7999, \"topic_id\": 1834, \"forum_id\": 10, \"post_subject\": \"Re: Error 10099 occurring during MACRO call.\", \"username\": \"rtaylor\", \"post_text\": \"Matt,Do you know why I would get the spill size error one day and not the next?\\nIf I didn't change much or anything, why would I get this issue?
There's a big difference here between "didn't change much" and "didn't change anything." If it's "much" then you could have found the one little thing to make it right. If it's "anything" then ???\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-08-06 14:39:19\" },\n\t{ \"post_id\": 7998, \"topic_id\": 1834, \"forum_id\": 10, \"post_subject\": \"Re: Error 10099 occurring during MACRO call.\", \"username\": \"mrumsey\", \"post_text\": \"I will definitely check that out when I get a chance!\\n\\nDo you know why I would get the spill size error one day and not the next?\\n\\nIf I didn't change much or anything, why would I get this issue? \\n\\nThanks again!\\n\\nMatt Rumsey\", \"post_time\": \"2015-08-06 14:22:29\" },\n\t{ \"post_id\": 7997, \"topic_id\": 1834, \"forum_id\": 10, \"post_subject\": \"Re: Error 10099 occurring during MACRO call.\", \"username\": \"rtaylor\", \"post_text\": \"Matt,\\n\\nI wasn't expecting those changes to make much difference, if any. The larger the amount of data the bigger difference I would expect to see (if there is any appreciable difference at all, that is). \\n\\nI was hoping that defining the DISTRIBUTE(infile...) separately might get the compiler to optimize it and show a difference, but ... \\n
I also noticed that both sets of code take twice as long on a 400 thor than on a 50 thor. I assume that this is due to the overhead in processing distributes and sorts.
That tells me you're currently working with too little data to be bothering with the 400-node cluster. Stay with the 50-node cluster until you have more data than the 50 can handle.\\n\\nHere's a couple more minor changes. I changed all the HASH functions to HASH32. They're both 32-bit algorithms, but HASH32 is a bit better than HASH. I also changed the base_calculation JOIN to use a base_calc_tbl vertical slice TABLE instead of the full infile records. That should help a little with memory requirements for that JOIN.\\nIMPORT ML,STD;\\n\\nEXPORT Add_Medians(InFile, GroupBy, Compute, OutFile) := MACRO \\n\\n #uniquename(unique_list)\\n %unique_list% := SORT(PROJECT(InFile, \\n TRANSFORM({STRING25 GroupBy},\\n SELF.GroupBy := (STRING)LEFT.GroupBy)),\\n GroupBy);\\n\\n //This replaces the slim_rec, slim_in, and base_unique_list definitions\\n //since in a MACRO there is little need to break out definitions separately \\n //if they're only used once\\n\\n #uniquename(infile_dist)\\n %infile_dist% := DISTRIBUTE(InFile, HASH32((STRING)GroupBy));\\n\\n //this DISTRIBUTE code was in two separate JOINs below\\n //(base_calculation and OutFile)\\n //so it's better done as a separate definition\\n //so the compiler might optimize \\n \\n #uniquename(num_rec)\\n %num_rec% := RECORD \\n UNSIGNED8 Number;\\n STRING25 GroupBy;\\n END;\\n\\n #uniquename(add_num)\\n %num_rec% %add_num%(%Unique_List% Le, INTEGER C) := TRANSFORM\\n SELF.Number := C;\\n SELF.GroupBy := Le.GroupBy;\\n END;\\n\\n //%unique_list% was already sorted by GroupBy so no need to re-SORT\\n #uniquename(dds_numeric_group)\\n %dds_numeric_group% := DEDUP(PROJECT(%unique_list%, %add_num%(LEFT, COUNTER)),Groupby);\\n\\n #uniquename(base_calc_tbl)\\n %base_calc_tbl% := TABLE(%infile_dist%,{GroupBy,Compute},LOCAL); \\n //added this TABLE to lessen the size of thr records required by the \\n //base_calculation JOIN\\n\\n #uniquename(XF) \\n ML.Types.NumericField %XF%(%base_calc_tbl% Le, %dds_numeric_group% Ri, INTEGER C) := TRANSFORM\\n SELF.ID := C;\\n SELF.Number := Ri.Number;\\n SELF.Value := Le.Compute;\\n END;\\n\\n #uniquename(base_calculation)\\n %base_calculation% := JOIN(%base_calc_tbl%, \\n DISTRIBUTE(%dds_numeric_group%, HASH32((STRING)GroupBy)),\\n LEFT.GroupBy = RIGHT.GroupBy,\\n %XF%(LEFT, RIGHT, COUNTER),\\n LOCAL);\\n\\n #uniquename(median_table)\\n %median_table% := ML.FieldAggregates(%base_calculation%).Medians;\\n\\n #uniquename(prep_rec)\\n %prep_rec% := RECORD\\n STRING25 GroupBy;\\n DECIMAL9_1 Median;\\nEND;\\n\\n #uniquename(matchback)\\n %prep_rec% %matchback%(%dds_numeric_group% Le, %median_table% Ri) := transform\\n SELF.GroupBy := Le.GroupBy;\\n SELF.Median := Ri.Median;\\n END;\\n#uniquename(return_group)\\n %Return_Group% := JOIN( DISTRIBUTE(%dds_numeric_group%, Number), \\n DISTRIBUTE(%median_table%, Number),\\n LEFT.Number = RIGHT.Number,\\n %matchback%(LEFT, RIGHT),\\n LOCAL);\\n #uniquename(outrec)\\n %outrec% := RECORD\\n RECORDOF(InFile);\\n DECIMAL9_1 Median;\\n END;\\n\\n #uniquename(add_med)\\n %outrec% %add_med%(InFile Le, %Return_Group% Ri) := TRANSFORM\\n SELF.median := Ri.Median;\\n SELF := Le;\\n END;\\n\\n OutFile := JOIN(%infile_dist%,\\n DISTRIBUTE(%Return_Group%, HASH32((STRING)GroupBy)),\\n (STRING)LEFT.GroupBy = (STRING)RIGHT.GroupBy,\\n %add_med%(LEFT, RIGHT),\\n LOCAL); \\n\\nENDMACRO;
\\nLet me know if that makes any improvement for you.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-08-06 14:17:41\" },\n\t{ \"post_id\": 7996, \"topic_id\": 1834, \"forum_id\": 10, \"post_subject\": \"Re: Error 10099 occurring during MACRO call.\", \"username\": \"mrumsey\", \"post_text\": \"Thanks for the response!\\n\\nYour adjustments make sense, but didn't positively adjust my run-time. This is probably more due to the limited number groups in my dataset (6). I expect that your code is probably going to make headway the more groups we have, since the sort functions would take up more time with more groups. \\n\\nSince this is a very real possibility, and since the run-time difference is ~10 seconds, I will probably stick with your code for more overall efficiency (unless I am completely wrong).\\n\\nI also noticed that both sets of code take twice as long on a 400 thor than on a 50 thor. I assume that this is due to the overhead in processing distributes and sorts.\\n\\nIf any my thinking is wrong, let me know. \\n\\nThanks again for taking the time to review my code!\", \"post_time\": \"2015-08-06 13:29:18\" },\n\t{ \"post_id\": 7994, \"topic_id\": 1834, \"forum_id\": 10, \"post_subject\": \"Re: Error 10099 occurring during MACRO call.\", \"username\": \"rtaylor\", \"post_text\": \"Matt,\\n\\nI took the liberty of going through your MACRO code and here are a couple of suggested changes that may or may not help:\\nIMPORT ML,STD;\\n\\nEXPORT Add_Medians(InFile, GroupBy, Compute, OutFile) := MACRO \\n\\n #uniquename(unique_list)\\n %unique_list% := SORT(PROJECT(InFile, \\n TRANSFORM({STRING25 GroupBy},\\n SELF.GroupBy := (STRING)LEFT.GroupBy)),\\n GroupBy);\\n\\n //This replaces the slim_rec, slim_in, and base_unique_list definitions\\n //since in a MACRO there is little need to break out definitions separately \\n //if they're only used once\\n\\n #uniquename(infile_dist)\\n %infile_dist% := DISTRIBUTE(InFile, HASH((STRING)GroupBy));\\n\\n //this DISTRIBUTE code was in two separate JOINs below\\n //(base_calculation and OutFile)\\n //so it's better done as a separate definition\\n //so the compiler might optimize \\n \\n #uniquename(num_rec)\\n %num_rec% := RECORD \\n UNSIGNED8 Number;\\n STRING25 GroupBy;\\n END;\\n\\n #uniquename(add_num)\\n %num_rec% %add_num%(%Unique_List% Le, INTEGER C) := TRANSFORM\\n SELF.Number := C;\\n SELF.GroupBy := Le.GroupBy;\\n END;\\n\\n //%unique_list% was already sorted by GroupBy so no need to re-SORT\\n #uniquename(dds_numeric_group)\\n %dds_numeric_group% := DEDUP(PROJECT(%unique_list%, %add_num%(LEFT, COUNTER)),Groupby);\\n\\n #uniquename(XF) \\n ML.Types.NumericField %XF%(InFile Le, %dds_numeric_group% Ri, INTEGER C) := TRANSFORM\\n SELF.ID := C;\\n SELF.Number := Ri.Number;\\n SELF.Value := Le.Compute;\\n END;\\n\\n #uniquename(base_calculation)\\n %base_calculation% := JOIN(%infile_dist%, \\n DISTRIBUTE(%dds_numeric_group%, HASH((STRING)GroupBy)),\\n LEFT.GroupBy = RIGHT.GroupBy,\\n %XF%(LEFT, RIGHT, COUNTER),\\n LOCAL);\\n\\n #uniquename(median_table)\\n %median_table% := ML.FieldAggregates(%base_calculation%).Medians;\\n\\n #uniquename(prep_rec)\\n %prep_rec% := RECORD\\n STRING25 GroupBy;\\n DECIMAL9_1 Median;\\nEND;\\n\\n #uniquename(matchback)\\n %prep_rec% %matchback%(%dds_numeric_group% Le, %median_table% Ri) := transform\\n SELF.GroupBy := Le.GroupBy;\\n SELF.Median := Ri.Median;\\n END;\\n#uniquename(return_group)\\n %Return_Group% := JOIN( DISTRIBUTE(%dds_numeric_group%, Number), \\n DISTRIBUTE(%median_table%, Number),\\n LEFT.Number = RIGHT.Number,\\n %matchback%(LEFT, RIGHT),\\n LOCAL);\\n #uniquename(outrec)\\n %outrec% := RECORD\\n RECORDOF(InFile);\\n DECIMAL9_1 Median;\\n END;\\n\\n #uniquename(add_med)\\n %outrec% %add_med%(InFile Le, %Return_Group% Ri) := TRANSFORM\\n SELF.median := Ri.Median;\\n SELF := Le;\\n END;\\n\\n OutFile := JOIN(%infile_dist%,\\n DISTRIBUTE(%Return_Group%, HASH((STRING)GroupBy)),\\n (STRING)LEFT.GroupBy = (STRING)RIGHT.GroupBy,\\n %add_med%(LEFT, RIGHT),\\n LOCAL); \\n\\nENDMACRO;
\\nPlease let me know if my suggestions help or not. I don't think I changed your overall logic, but I'd like to hear about it if I did. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-08-06 10:54:14\" },\n\t{ \"post_id\": 7987, \"topic_id\": 1834, \"forum_id\": 10, \"post_subject\": \"Re: Error 10099 occurring during MACRO call.\", \"username\": \"mrumsey\", \"post_text\": \"I updated the code to not be crappy. It works pretty quickly on 1.5million records assuming I have:\\n
#OPTION('outputLimit', 100);
\\n\\nI guess I don't want to assume that others will remember to put this option at the top of their code and still would like to know if I can prevent the massive spill writes from occurring. \\n\\n\\nIMPORT ML,STD;\\n\\nEXPORT Add_Medians(InFile, GroupBy, Compute, OutFile) := MACRO\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n#uniquename(slim_rec)\\n%slim_rec% := RECORD\\n\\tSTRING25 GroupBy;\\nEND;\\n\\n#uniquename(slim_in)\\n%slim_rec% %slim_in%(InFile Le) := TRANSFORM\\n\\tSELF.GroupBy\\t:= (STRING)Le.GroupBy;\\nEND;\\n\\n#uniquename(base_unique_list)\\n%base_unique_list% := PROJECT(InFile, %slim_in%(LEFT)); \\n\\n#uniquename(unique_list)\\n%unique_list% := SORT(%base_unique_list%, GroupBy);\\n\\n#uniquename(num_rec)\\n%num_rec% := RECORD\\t\\n\\tUNSIGNED8 Number;\\n\\tSTRING25\\tGroupBy;\\nEND;\\n\\n#uniquename(add_num)\\n%num_rec% %add_num%(%Unique_List% Le, INTEGER C) := TRANSFORM\\n\\tSELF.Number \\t:= \\tC;\\n\\tSELF.GroupBy\\t:=\\tLe.GroupBy;\\nEND;\\n\\n#uniquename(numeric_group)\\n%numeric_group% := PROJECT(%unique_list%, %add_num%(LEFT, COUNTER));\\n\\n#uniquename(dds_numeric_group)\\n%dds_numeric_group% := DEDUP(SORT(%numeric_group%, GroupBy), GroupBy);\\n\\n#uniquename(XF) \\nML.Types.NumericField %XF%(InFile Le, %dds_numeric_group% Ri, INTEGER C) := TRANSFORM\\n SELF.ID := C;\\n SELF.Number := Ri.Number;\\n SELF.Value := Le.Compute;\\nEND;\\n\\n#uniquename(base_calculation)\\n%base_calculation% := JOIN(\\tDISTRIBUTE(InFile, HASH((STRING)GroupBy)), \\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tDISTRIBUTE(%dds_numeric_group%, HASH((STRING)GroupBy)),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tLEFT.GroupBy = RIGHT.GroupBy,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t%XF%(LEFT, RIGHT, COUNTER),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tLOCAL);\\n\\n#uniquename(median_table)\\n%median_table% := ML.FieldAggregates(%base_calculation%).Medians;\\n\\n#uniquename(prep_rec)\\n%prep_rec% := RECORD\\n\\tSTRING25 GroupBy;\\n\\tDECIMAL9_1 Median;\\nEND;\\n\\n#uniquename(matchback)\\n%prep_rec% %matchback%(%dds_numeric_group% Le, %median_table% Ri) := transform\\n\\tSELF.GroupBy := Le.GroupBy;\\n\\tSELF.Median := Ri.Median;\\nEND;\\n\\n#uniquename(return_group)\\n%Return_Group%\\t:=\\tJOIN(\\tDISTRIBUTE(%dds_numeric_group%, Number), \\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tDISTRIBUTE(%median_table%, Number),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tLEFT.Number = RIGHT.Number,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t%matchback%(LEFT, RIGHT),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tLOCAL);\\n#uniquename(outrec)\\n%outrec% := RECORD\\n\\tRECORDOF(InFile);\\n\\tDECIMAL9_1 Median;\\nEND;\\n\\n#uniquename(add_med)\\n%outrec% %add_med%(InFile Le, %Return_Group% Ri) := TRANSFORM\\n\\tSELF.median := Ri.Median;\\n\\tSELF \\t\\t\\t\\t:= Le;\\nEND;\\n\\nOutFile := JOIN(DISTRIBUTE(InFile, HASH((STRING)GroupBy)),\\n\\t\\t\\t\\t\\t\\t\\t\\tDISTRIBUTE(%Return_Group%, HASH((STRING)GroupBy)),\\n\\t\\t\\t\\t\\t\\t\\t\\t(STRING)LEFT.GroupBy = (STRING)RIGHT.GroupBy,\\n\\t\\t\\t\\t\\t\\t\\t\\t%add_med%(LEFT, RIGHT),\\n\\t\\t\\t\\t\\t\\t\\t\\tLOCAL);\\t\\t\\t\\t\\t\\t\\t \\n\\nENDMACRO;
\", \"post_time\": \"2015-08-04 21:17:11\" },\n\t{ \"post_id\": 7985, \"topic_id\": 1834, \"forum_id\": 10, \"post_subject\": \"Error 10099 occurring during MACRO call.\", \"username\": \"mrumsey\", \"post_text\": \"I am attempting to write a MACRO that will allow my team to input any dataset and calculate the Median value of groups. The code works on small test datasets, but not large test datasets (~1million rows, many input colomns). \\n\\nBelow is my MACRO, which takes 4 parameters:\\nInput file\\nGrouping attribute name\\nAttribute on which to calculate medians\\nOutput filename
\\n\\nI am getting the following error when I run on large datasets:\\nError: System error: 10099: Graph[1], workunitwrite[7]: Dataset too large to output to workunit (limit is set to 10) megabytes, in result (name=spill1), Master exception (0, 0), 10099,
\\n\\nThis output is an internal output between graphs (spill1) that happens around the call for the function %add_num% which literally just adds an incremental integer via a PROJECT function.\\n\\nDoes anyone know why this output/error is being generated?\\n\\n\\nIMPORT ML,STD;\\n\\nEXPORT Add_Medians(InFile, GroupBy, Compute, OutFile) := MACRO\\t\\t\\t\\t\\t\\t\\t\\t\\n\\n#uniquename(slim_rec)\\n%slim_rec% := RECORD\\n\\tSTRING25 GroupBy;\\nEND;\\n\\n#uniquename(slim_in)\\n%slim_rec% %slim_in%(InFile Le) := TRANSFORM\\n\\tSELF.GroupBy\\t:= (STRING)Le.GroupBy;\\nEND;\\n\\n#uniquename(base_unique_list)\\n%base_unique_list% := PROJECT(InFile, %slim_in%(LEFT)); \\n\\n#uniquename(unique_list)\\n%unique_list% := SORT(%base_unique_list%, GroupBy);\\n\\n#uniquename(num_rec)\\n%num_rec% := RECORD\\t\\n\\tUNSIGNED8 Number;\\n\\tSTRING25\\tGroupBy;\\nEND;\\n\\n#uniquename(add_num)\\n%num_rec% %add_num%(%Unique_List% Le, INTEGER C) := TRANSFORM\\n\\tSELF.Number \\t:= \\tC;\\n\\tSELF.Groupby\\t:=\\tLe.GroupBy;\\nEND;\\n\\n#uniquename(numeric_group)\\n%numeric_group% := PROJECT(%unique_list%, %add_num%(LEFT, COUNTER));\\n\\n#uniquename(XF) \\nML.Types.NumericField %XF%(InFile Le, INTEGER C) := TRANSFORM\\n SELF.ID := C;\\n SELF.Number := %numeric_group%(GroupBy = Le.GroupBy)[1].Number;\\n SELF.Value := Le.Compute;\\nEND;\\n\\n#uniquename(base_calculation)\\n%base_calculation% := PROJECT(InFile, %XF%(LEFT,COUNTER));\\n\\n#uniquename(median_table)\\n%median_table% := ML.FieldAggregates(%base_calculation%).Medians;\\n\\n#uniquename(prep_rec)\\n%prep_rec% := RECORD\\n\\tSTRING25 GroupBy;\\n\\tDECIMAL9_1 Median;\\nEND;\\n\\n#uniquename(matchback)\\n%prep_rec% %matchback%(%numeric_group% Le, %median_table% Ri) := transform\\n\\tSELF.GroupBy := Le.GroupBy;\\n\\tSELF.Median := Ri.Median;\\nEND;\\n\\n#uniquename(return_group)\\n%Return_Group%\\t:=\\tJOIN(\\tDISTRIBUTE(%numeric_group%, Number), \\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tDISTRIBUTE(%median_table%, Number),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tLEFT.Number = RIGHT.Number,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t%matchback%(LEFT, RIGHT),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tLOCAL);\\n#uniquename(outrec)\\n%outrec% := RECORD\\n\\tRECORDOF(InFile);\\n\\tDECIMAL9_1 Median;\\nEND;\\n\\n#uniquename(add_med)\\n%outrec% %add_med%(InFile Le, %Return_Group% Ri) := TRANSFORM\\n\\tSELF.median := Ri.Median;\\n\\tSELF \\t\\t\\t\\t:= Le;\\nEND;\\n\\nOutFile := JOIN(DISTRIBUTE(InFile, HASH((STRING)GroupBy)),\\n\\t\\t\\t\\t\\t\\t\\t\\tDISTRIBUTE(%Return_Group%, HASH((STRING)GroupBy)),\\n\\t\\t\\t\\t\\t\\t\\t\\t(STRING)LEFT.GroupBy = (STRING)RIGHT.GroupBy,\\n\\t\\t\\t\\t\\t\\t\\t\\t%add_med%(LEFT, RIGHT),\\n\\t\\t\\t\\t\\t\\t\\t\\tLOCAL);\\t\\t\\t\\t\\t\\t\\t \\n\\nENDMACRO;
\\n\\nThanks,\\n\\nMatt Rumsey\", \"post_time\": \"2015-08-04 16:59:52\" },\n\t{ \"post_id\": 7995, \"topic_id\": 1835, \"forum_id\": 10, \"post_subject\": \"Re: DeNormalize Rows error\", \"username\": \"georgeb2d\", \"post_text\": \"Thanks for your help. It is working now. \\n\\nThe AggActiveBadgeSearch_DS was already in the layout of CalcType8_REC. \\n\\nSo I simply deleted the 'Group' and changed ROWS(RIGHT) to RIGHT and the code worked. \\n\\nI also used your SetNames to make the code more readable.\\n\\nMany thanks again,\\nDon\", \"post_time\": \"2015-08-06 12:26:18\" },\n\t{ \"post_id\": 7992, \"topic_id\": 1835, \"forum_id\": 10, \"post_subject\": \"Re: DeNormalize Rows error\", \"username\": \"rtaylor\", \"post_text\": \"Don,\\n\\nYou have two issues with this code. \\n\\nFirst, for both forms of DENORMALIZE, the left recordset and the resulting recordset format must be the same. That means the parent data has to already be in the result format (usually done by either TABLE or PROJECT).\\n\\nSecond, you do not need the GROUP form of DENORMALIZE to accomplish this because you're not creating a nested child dataset, you're just filling in three "slots" with the values from three separate child records. \\n\\nTry it this way:\\nSetNames := ['UNIQUE_JOURNEY_DAYS_COUNT_THRESHOLD',\\n 'JOURNEY_COUNT_THRESHOLD',\\n 'DISTANCE_THRESHOLD'];\\n\\nCalcType8_REC GetClientSettings(CalcType8_REC Le, \\n X.Files.DS_BASE_CLIENT_SETTING Ri):=TRANSFORM\\n SELF.UNIQUE_JOURNEY_DAYS_COUNT_THRESHOLD := \\n IF(RI.SettingName = SetNames[1], \\n Ri.SettingValue, \\n LE.UNIQUE_JOURNEY_DAYS_COUNT_THRESHOLD);\\n SELF.JOURNEY_COUNT_THRESHOLD := \\n IF(RI.SettingName = SetNames[2], \\n Ri.SettingValue, \\n LE.JOURNEY_COUNT_THRESHOLD); \\n SELF.DISTANCE_THRESHOLD := \\n IF(RI.SettingName = SetNames[3], \\n Ri.SettingValue , \\n LE.DISTANCE_THRESHOLD); \\n SELF := Le;\\nEND;\\n\\nAllThresholdsAggActiveBadgeSearch_DS := \\n DENORMALIZE(TABLE(AggActiveBadgeSearch_DS,CalcType8_REC), \\n X.Files.DS_BASE_CLIENT_SETTING(SettingName IN SetNames), \\n LEFT.ClientSrcRID = RIGHT.ClientSrcRID,\\n GetClientSettings(LEFT,RIGHT));
\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-08-06 08:39:29\" },\n\t{ \"post_id\": 7990, \"topic_id\": 1835, \"forum_id\": 10, \"post_subject\": \"DeNormalize Rows error\", \"username\": \"georgeb2d\", \"post_text\": \"This is what I am trying to do:\\n\\nI have a Dataset with three rows I want for each row of the Parent Dataset.\\n\\nThe Parent Record is: \\n CalcType8_REC := RECORD\\n\\tX.Layouts.Telematics_ID;\\n\\tX.Layouts.CLIENT_SRC_RID;\\n\\tX.Layouts.AGGREGATED_USER_ATTRIBUTES.TotalDistanceMeters;\\n\\tX.Layouts.AGGREGATED_USER_ATTRIBUTES.TotalValidJourneysCount;\\n\\tX.Layouts.AGGREGATED_USER_ATTRIBUTES.DaysWithValidJourneyCount;\\n\\tSTRING DISTANCE_THRESHOLD { MAXLENGTH(255) } := ''; \\n\\tSTRING JOURNEY_COUNT_THRESHOLD { MAXLENGTH(255) } := ''; \\n\\tSTRING UNIQUE_JOURNEY_DAYS_COUNT_THRESHOLD { MAXLENGTH(255) } := ''; \\t\\t\\n\\tX.Layouts.client_badge.SingleLevelTarget ;\\n END;
\\nThe child Records are enclosed in the Dataset:\\nDS_BASE_CLIENT_SETTING. I need three rows per each CLIENT_SRC_RID. The three rows are where SettingName IN ['UNIQUE_JOURNEY_DAYS_COUNT_THRESHOLD','JOURNEY_COUNT_THRESHOLD','DISTANCE_THRESHOLD']. \\nI want to put the Thresholds in their matching names. \\n\\nHere is the code:\\nCalcType8_REC GetClientSettings(AggActiveBadgeSearch_DS Le, X.Files.DS_BASE_CLIENT_SETTING Ri):=TRANSFORM\\n\\t\\tSELF.UNIQUE_JOURNEY_DAYS_COUNT_THRESHOLD := IF(RI.SettingName = 'UNIQUE_JOURNEY_DAYS_COUNT_THRESHOLD', Ri.SettingValue, LE.UNIQUE_JOURNEY_DAYS_COUNT_THRESHOLD);\\n\\t\\tSELF.JOURNEY_COUNT_THRESHOLD := IF(RI.SettingName = 'JOURNEY_COUNT_THRESHOLD', Ri.SettingValue, LE.JOURNEY_COUNT_THRESHOLD); \\n\\t\\tSELF.DISTANCE_THRESHOLD := IF(RI.SettingName = 'DISTANCE_THRESHOLD', Ri.SettingValue , LE.DISTANCE_THRESHOLD); \\n\\t\\tSELF := Le;\\n\\tEND;\\n\\n\\n\\tAllThresholdsAggActiveBadgeSearch_DS := DENORMALIZE(AggActiveBadgeSearch_DS, X.Files.DS_BASE_CLIENT_SETTING(SettingName IN ['UNIQUE_JOURNEY_DAYS_COUNT_THRESHOLD','JOURNEY_COUNT_THRESHOLD','DISTANCE_THRESHOLD']), LEFT.ClientSrcRID = RIGHT.ClientSrcRID,\\n\\t\\tGROUP, GetClientSettings(LEFT,ROWS(RIGHT)));\\n\\n
\\n\\nI can write the code to do this by running a Join three times but I am trying to simplify the matter. When I run the above code I get the error:\\nError: ROWS.settingname - no active row for Table ROWS inside transform (use LEFT?) \\n\\nOn Alpha_DEV_Thor this is WU W20150805-152914.\\n\\nThanks for any assistance.\\n\\nDon\", \"post_time\": \"2015-08-05 20:03:54\" },\n\t{ \"post_id\": 8026, \"topic_id\": 1843, \"forum_id\": 10, \"post_subject\": \"Re: Characters Count\", \"username\": \"BarrOs01\", \"post_text\": \"thanks, it works great!\", \"post_time\": \"2015-08-20 14:32:44\" },\n\t{ \"post_id\": 8025, \"topic_id\": 1843, \"forum_id\": 10, \"post_subject\": \"Re: Characters Count\", \"username\": \"rtaylor\", \"post_text\": \"BarrOs01,\\n\\nJust like this:IMPORT STD;\\nCharCount(STRING Str) := FUNCTION\\n Chars := TRIM(Str,ALL);\\n Len := LENGTH(Chars);\\n ds := DATASET(Len,\\n TRANSFORM({STRING1 Char},\\n SELF.Char := STD.Str.ToUpperCase(Chars[COUNTER])));\\t\\n RETURN TABLE(ds,{Char,Cnt := COUNT(GROUP)},Char);\\nEND;\\n\\nCharCount('HPCC Systems');\\n\\n//and the result is:\\n// M\\t1\\n// Y\\t1\\n// S\\t3\\n// P\\t1\\n// C\\t2\\n// H\\t1\\n// E\\t1\\n// T\\t1
The ALL option on TRIM removes all spaces (leading, trailing, and embedded), so the resulting LENGTH is the number of characters in the string. Then the DATASET calls its TRANSFORM that number of times, allowing you to extract each individual character so that the TABLE can do a simple crosstab on the resulting dataset to give you your result.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-08-20 14:08:36\" },\n\t{ \"post_id\": 8022, \"topic_id\": 1843, \"forum_id\": 10, \"post_subject\": \"Characters Count\", \"username\": \"BarrOs01\", \"post_text\": \"Can someone share a script of how to count unique characters in a given string.\\n\\nExample: "HPCC Systems"\\nH=1\\nP=1\\nC=2\\nS=3\\nY=1\\nT=1\\nE=1\\nM=1\", \"post_time\": \"2015-08-19 19:10:58\" },\n\t{ \"post_id\": 8032, \"topic_id\": 1844, \"forum_id\": 10, \"post_subject\": \"Re: Convert String Fields of a Dataset to Uppercase\", \"username\": \"somberi\", \"post_text\": \"Thanks for the Info. I would like to take up this task. I'll start and let you know of the progress that I make. Thanks again!\", \"post_time\": \"2015-08-24 15:19:09\" },\n\t{ \"post_id\": 8031, \"topic_id\": 1844, \"forum_id\": 10, \"post_subject\": \"Re: Convert String Fields of a Dataset to Uppercase\", \"username\": \"rtaylor\", \"post_text\": \"Somberi,\\n\\nThere is no "UpperCaseAllStringsInAllRecords" function currently in ECL. \\n\\nIf this is a continuing requirement of yours (as in, you need this all the time and not just once), then I would suggest you could write a new C++ function to do that and contribute it to the community. Remember, ECL is Open Source and you are encouraged to participate in exactly this manner.\\n\\nIf you are interested in taking on this task, I would suggest looking at the source code for the ASCII and EBCDIC functions. These functions both operate on all string fields of all records of a dataset, translating to/from ASCII/EBCDIC, so they would provide a good template for how to accomplish a simple uppercase of all string fields in all records.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-08-24 14:33:51\" },\n\t{ \"post_id\": 8023, \"topic_id\": 1844, \"forum_id\": 10, \"post_subject\": \"Convert String Fields of a Dataset to Uppercase\", \"username\": \"somberi\", \"post_text\": \"Hi\\n\\nI'm looking to convert the string contents of a dataset to uppercase. Is there any other way other than calling the toUpperCase() on each of the individual fields?\\n\\nThanks\\nSomberi\", \"post_time\": \"2015-08-20 09:11:44\" },\n\t{ \"post_id\": 8046, \"topic_id\": 1852, \"forum_id\": 10, \"post_subject\": \"Re: FileServices Documentation\", \"username\": \"JimD\", \"post_text\": \"The Standard Library Reference has documentation for all the STD.File (file services) functions. \\n\\nYou can download the PDF (see link below) or access it in help file format by pressing F1 in the IDE Editor. \\n\\nhttp://hpccsystems.com/download/docs/st ... -reference\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2015-08-27 19:28:44\" },\n\t{ \"post_id\": 8045, \"topic_id\": 1852, \"forum_id\": 10, \"post_subject\": \"FileServices Documentation\", \"username\": \"mrumsey\", \"post_text\": \"I am looking for documentation on FileServices functions. Does anyone know where they are?\\n\\nI am tired of production datasets having module record structures change before the actual data change goes into effect. I want to see if I can find a function that will allow me to evaluate the dataset call or dataset structure and change the definition should there be an error.\\n\\nThanks,\\n\\nMatt\", \"post_time\": \"2015-08-27 14:58:56\" },\n\t{ \"post_id\": 8060, \"topic_id\": 1854, \"forum_id\": 10, \"post_subject\": \"Re: Lookup Mystery --- Records Missing when do Lookup\", \"username\": \"rtaylor\", \"post_text\": \"Don,I see it now in the documentation. I don't think it is real obvious
A LOOKUP JOIN has always been, in every tool I've ever used (not just HPCC), a join of a dataset to a lookup table. And a lookup table always has a 1-M relationship with the file it's being joined to. It's standard third-normal-form database terminology/technology that was covered in the Intro to Thor class.\\n\\nGlad you have a solution,\\n\\nRichard\", \"post_time\": \"2015-09-01 17:10:47\" },\n\t{ \"post_id\": 8058, \"topic_id\": 1854, \"forum_id\": 10, \"post_subject\": \"Re: Lookup Mystery --- Records Missing when do Lookup\", \"username\": \"georgeb2d\", \"post_text\": \"That was it. I see it now in the documentation. I don't think it is real obvious. \\n\\nThanks again.\", \"post_time\": \"2015-09-01 15:52:25\" },\n\t{ \"post_id\": 8056, \"topic_id\": 1854, \"forum_id\": 10, \"post_subject\": \"Re: Lookup Mystery --- Records Missing when do Lookup\", \"username\": \"rtaylor\", \"post_text\": \"The LOOKUP option implies a MANY-ONE relationship, with the ONE side the right dataset. Is it possible you have the files reversed?\\n\\nTry making it a MANY LOOKUP and see what difference that makes.\\n\\nBTW, you can re-use the same TRANSFORM function for all three JOINs you're doing.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-09-01 15:46:35\" },\n\t{ \"post_id\": 8054, \"topic_id\": 1854, \"forum_id\": 10, \"post_subject\": \"Re: Lookup Mystery --- Records Missing when do Lookup\", \"username\": \"georgeb2d\", \"post_text\": \"Here is the data:\\nAUSlim_Rec := RECORD\\n\\tSTRING36 TID;\\n\\tUNSIGNED8 CID;\\nEND;
\\nTID4 \\t100\\nTID5 \\t100\\nTID6 \\t100\\nTID7 \\t100\\nTID1 \\t100\\nTID2 \\t100\\nTID3 \\t100\\nThis is distributed by HASH32(TID);\\n\\nCBSlim_Rec := RECORD\\n\\t UNSIGNED8 CB_SRID;\\n\\t STRING BLevel ;\\n INTEGER Type;\\n\\t UNSIGNED8 CID;\\t\\n\\tEND;\\n
\\nData for CBSlim:\\n1\\t11\\t1\\t100\\n11\\t100\\t2\\t100\\n21\\t100\\t3\\t100\\n31\\t100\\t5\\t100\\n51\\t100\\t7\\t100\\n61\\t18\\t8\\t100\\n71\\t19\\t9\\t100\\n81\\t21\\t11\\t100\\n91\\t22\\t12\\t100\\n101\\t23\\t13\\t100\\n121\\t25\\t15\\t100\\n131\\t26\\t16\\t100\\n161\\t30\\t20\\t100\\n171\\t31\\t21\\t100\\n181\\t32\\t22\\t100\\n191\\t34\\t24\\t100\\n201\\t35\\t25\\t100\\n\\n AllBadgesForActiveUsers_REC := RECORD\\n\\tSTRING36 TID;\\n\\tUNSIGNED8 CB_SRID;\\n\\tSTRING BLevel ;\\n\\tINTEGER Type;\\n\\tUNSIGNED8 CID;\\t\\n END;\\n AllBadgesForActiveUsers_REC AttachClientBadges(dtAUSlim_DS LE,CBSlim_DS RI):= TRANSFORM\\n \\tSELF.TID := LE.TID;\\n \\tSELF := RI;\\n END;\\n dtAllBadgesForActiveUsers := JOIN(dtAUSlim_DS, CBSlim_DS, LEFT.CID = RIGHT.CID, AttachClientBadges(LEFT,RIGHT) //, LOOKUP\\n \\t\\t);\\t \\n \\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n OUTPUT(dtAllBadgesForActiveUsers , NAMED('No_Lookup'));\\n OUTPUT(COUNT(dtAllBadgesForActiveUsers) , NAMED('CT_No_Lookup'));\\n \\t\\n AllBadgesForActiveUsers_REC AttachClientBadges2(dtAUSlim_DS LE, CBSlim_DS RI):= TRANSFORM\\n \\tSELF.TID := LE.TID;\\n \\tSELF := RI;\\n END;\\n dtAllBadgesForActiveUsers2 := JOIN(dtAUSlim_DS, CBSlim_DS, LEFT.CID = RIGHT.CID, AttachClientBadges2(LEFT,RIGHT) , LOOKUP\\n \\t\\t);\\t \\n \\nOUTPUT(dtAllBadgesForActiveUsers2 , NAMED('Lookup'));\\nOUTPUT(COUNT(dtAllBadgesForActiveUsers2) , NAMED('CT_Lookup'));\\n
\\n\\nNo Lookup Output:\\nTID4 \\t1\\t11\\t1\\t100\\nTID4 \\t11\\t100\\t2\\t100\\nTID4 \\t21\\t100\\t3\\t100\\nTID4 \\t31\\t100\\t5\\t100\\nTID4 \\t51\\t100\\t7\\t100\\nTID4 \\t61\\t18\\t8\\t100\\nTID4 \\t71\\t19\\t9\\t100\\nTID4 \\t81\\t21\\t11\\t100\\nTID4 \\t91\\t22\\t12\\t100\\nTID4 \\t101\\t23\\t13\\t100\\nTID4 \\t121\\t25\\t15\\t100\\nTID4 \\t131\\t26\\t16\\t100\\nTID4 \\t161\\t30\\t20\\t100\\nTID4 \\t171\\t31\\t21\\t100\\nTID4 \\t181\\t32\\t22\\t100\\nTID4 \\t191\\t34\\t24\\t100\\nTID4 \\t201\\t35\\t25\\t100\\nTID5 \\t1\\t11\\t1\\t100\\nTID5 \\t11\\t100\\t2\\t100\\nTID5 \\t21\\t100\\t3\\t100\\nTID5 \\t31\\t100\\t5\\t100\\nTID5 \\t51\\t100\\t7\\t100\\nTID5 \\t61\\t18\\t8\\t100\\nTID5 \\t71\\t19\\t9\\t100\\netc. \\n119 rows\\n\\nLookup Output:\\nTID4 \\t1\\t11\\t1\\t100\\nTID5 \\t1\\t11\\t1\\t100\\nTID6 \\t1\\t11\\t1\\t100\\nTID7 \\t1\\t11\\t1\\t100\\nTID1 \\t1\\t11\\t1\\t100\\nTID2 \\t1\\t11\\t1\\t100\\nTID3 \\t1\\t11\\t1\\t100\\n\\nSo my question is, why only 7 rows with a LOOKUP? 119 rows with no Lookup? What am I missing?\\n\\nThanks.\", \"post_time\": \"2015-09-01 15:38:06\" },\n\t{ \"post_id\": 8052, \"topic_id\": 1854, \"forum_id\": 10, \"post_subject\": \"Re: Lookup Mystery --- Records Missing when do Lookup\", \"username\": \"georgeb2d\", \"post_text\": \"Those are two different fields. This is WorkUnit W20150901-093232 on Alpha_Dev_Thor. I will see if I can build a subset of the data and recreate the scenario above.\", \"post_time\": \"2015-09-01 14:47:09\" },\n\t{ \"post_id\": 8050, \"topic_id\": 1854, \"forum_id\": 10, \"post_subject\": \"Re: Lookup Mystery --- Records Missing when do Lookup\", \"username\": \"rtaylor\", \"post_text\": \"What's the difference between TID and CID?\\n\\nIf you can produce a self-contained example that reproduces the problem, that would help a lot.\\n\\nRichard\", \"post_time\": \"2015-09-01 14:41:36\" },\n\t{ \"post_id\": 8048, \"topic_id\": 1854, \"forum_id\": 10, \"post_subject\": \"Lookup Mystery --- Records Missing when do Lookup\", \"username\": \"georgeb2d\", \"post_text\": \"\\tAllBadgesForActiveUsers_REC AttachClientBadges(dtActiveUsers LE,ClientBadgesDefined_DS RI):= TRANSFORM\\n\\t\\tSELF.TID := LE.TID;\\n\\t SELF.Progress := 0;\\n\\t\\tSELF.CreatedDtm := Build_DateTime; \\n\\t\\tSELF := RI;\\n\\tEND;\\n\\tdtAllBadgesForActiveUsers := JOIN(dtActiveUsers, ClientBadgesDefined_DS, LEFT.CID = RIGHT.CID, AttachClientBadges(LEFT,RIGHT) \\n//, LOOKUP\\n);\\t
\\n\\nWhen I run this with Lookup I only get 7 rows. When I run this as above I get 119 rows. 119 rows is the correct answer. dtActiveUsers is distributed by TID. ClientBadgesDefined_DS is not sorted or distributed. \\n\\nI thought LOOKUP simply put the Dataset on the Right on all the nodes and then processed the data on every node. So I am confused as to why I get different results. More must be occuring...\", \"post_time\": \"2015-09-01 14:30:19\" },\n\t{ \"post_id\": 8098, \"topic_id\": 1866, \"forum_id\": 10, \"post_subject\": \"Re: Right Outer and Local\", \"username\": \"georgeb2d\", \"post_text\": \"Oops! Due to the newness of this code I am creating my own data. I thought I had Distributed it by TID but I had not. Once I did that everything is working fine.\\n\\nIs there any way to tell how a Dataset is Distributed without searching the code?\", \"post_time\": \"2015-09-08 15:14:07\" },\n\t{ \"post_id\": 8096, \"topic_id\": 1866, \"forum_id\": 10, \"post_subject\": \"Re: Right Outer and Local\", \"username\": \"bforeman\", \"post_text\": \"If you say that even after the DISTRIBUTE you are not getting the correct results,\\nhave you tried using the new SMART Join type? The compiler will work out the best way to accomplish the JOIN. \\n\\nAnd why are you using INDEPENDENT for this JOIN? Have you tried your JOIN without it?\\n\\nAnd finally, what does your DISTRIBUTE look like? Can you post the code please?\\n\\nThanks!\\n\\nBob\", \"post_time\": \"2015-09-08 15:08:23\" },\n\t{ \"post_id\": 8094, \"topic_id\": 1866, \"forum_id\": 10, \"post_subject\": \"Re: Right Outer and Local\", \"username\": \"georgeb2d\", \"post_text\": \"So is there a way I can Distribute so the Right Outer will work with the Local? Or do I have to test this every time I do a local? Should I be concerned it works in testing but then in production it does not work? \\n\\nBoth datasets are distributed by HASH32(TID) so to me they should both be on the same node. I also tried Distributing by TID and TypeID with no different result. This makes me hesitant to use LOCAL.\\n\\nOr is there something special about RIGHT OUTER joins and LOCAL?\", \"post_time\": \"2015-09-08 14:32:17\" },\n\t{ \"post_id\": 8092, \"topic_id\": 1866, \"forum_id\": 10, \"post_subject\": \"Re: Right Outer and Local\", \"username\": \"bforeman\", \"post_text\": \"Hi Don,\\n\\nIf you are getting correct results without the use of LOCAL, that implies that you may not be distributing your data correctly, in other words, there are records that need to be accessed on other nodes that are not available when you specify LOCAL, because LOCAL only works on the records that exist on that particular node.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-09-08 14:19:41\" },\n\t{ \"post_id\": 8090, \"topic_id\": 1866, \"forum_id\": 10, \"post_subject\": \"Right Outer and Local\", \"username\": \"georgeb2d\", \"post_text\": \"I have two sets of records distributed by field TID.\\n\\nThis is what I want to do:\\nI want to Join these two datasets by TID And TypeID. \\nFor the records where an Inner Join occurs I want to check a condition on the left side, and if the condition is met, I want to ignore that record. \\nOtherwise I want to keep that record. \\n\\nIf there are only records on the right dataset I want to keep those records. \\n\\nIf I do a right outer join with a Local I get all the records in the right dataset, including those I do not want. \\n\\nIf I do a right outer join without a Local the join works as I want it to.\\n\\nHere is the TRANSFORM:\\n\\tAllBadgesForActiveUsers_REC \\tActiveBadges(dtOld_Badges_DS LE, dtAllBadgesForActiveUsers RI):= TRANSFORM\\n\\t\\t\\t// If LastLevelJourneyID has a value, Badge has already been calculated\\n\\t\\tCompletedBadge := IF(LE.LastLevelJourneyID > 0 AND LE.TypeID > 0 , TRUE, FALSE);\\t\\n\\t\\t\\t// Might be new TID in RI so want to capture all of these \\n\\t\\t\\t// Three Cases: 1.Completed Badge, so Skip\\n\\t\\t\\t//\\t\\t2. Only TelematicsId on Right because new TelematicsID\\n\\t\\t\\t//\\t\\t3. Partial Progress so want to keep\\n\\t\\tSELF.TID := IF (CompletedBadge, SKIP, RI.TID);\\n\\t\\t\\t// Keep track of BadgeProgress if any has happenned, otherwise 0\\n\\t\\tSELF.BadgeProgress := IF(LE.BadgeProgress > 0, LE.BadgeProgress, 0);\\n\\t\\tSELF.CreatedDtm := IF(LE.CreatedDtm > 0, LE.CreatedDtm, RI.CreatedDtm);\\n\\t\\tSELF.UTCStartDateYDM := IF(LE.UTCStartDateYDM > 0, LE.UTCStartDateYDM, 0);\\n\\t\\tSELF := RI;\\n\\tEND;\\n\\tdtBadgesAreComputing:= JOIN(dtOld_Badges_DS, dtAllBadgesForActiveUsers, LEFT.TID = RIGHT.TID AND LEFT.TypeID = RIGHT.TypeID, ActiveBadges(LEFT,RIGHT), RIGHT OUTER \\n\\t\\t\\t\\t\\t//\\t, LOCAL\\n\\t\\t\\t\\t\\t\\t):INDEPENDENT;
\\n\\nWhat am I missing? \\n\\nThanks,\\nDon\", \"post_time\": \"2015-09-08 14:09:10\" },\n\t{ \"post_id\": 8138, \"topic_id\": 1882, \"forum_id\": 10, \"post_subject\": \"Re: iterate each row compare with all other rows\", \"username\": \"rtaylor\", \"post_text\": \"gopi,\\n\\nThis code does what you want:IMPORT STD;\\nLay_Word := Record\\n\\tInteger ID;\\n\\tString Word;\\n\\tBoolean Is_SubSet_Word := false;\\nEnd;\\n\\nds_word := dataset([\\n\\t\\t{1, 'united states of america'},\\n\\t\\t{2, 'united states'},\\n\\t\\t{3, 'america'},\\n\\t\\t{4, 'united states president'},\\n\\t\\t{5, 'birth certificate'},\\n\\t\\t{6, 'certificate'},\\n\\t\\t{7, 'birth'},\\n\\t\\t{8, 'certificate format'},\\n\\t\\t{9, 'states of america'}\\n\\t\\t],Lay_Word);\\n\\nDEDUP(ds_word,STD.Str.Find(LEFT.Word,RIGHT.word)<>0,all,LOCAL);
The problem with DEDUP,ALL is that it is not available for global operations (remove the LOCAL option and you'll see what I mean) and should not be used on 10K+ records, because it evaluates all possible commutative pairs. That makes it an "expensive" operation.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-09-16 14:00:59\" },\n\t{ \"post_id\": 8136, \"topic_id\": 1882, \"forum_id\": 10, \"post_subject\": \"iterate each row compare with all other rows\", \"username\": \"gopi\", \"post_text\": \"Hi,\\n\\nHow to iterate each row and compare with all other rows. \\n\\nSample Input :\\n\\nLay_Word := Record\\n Integer ID;\\n String Word;\\n Boolean Is_SubSet_Word := false;\\nEnd;\\n\\nds_word := dataset([\\n{1, 'united states of america'},\\n{2, 'united states'},\\n{3, 'america'},\\n{4, 'united states president'},\\n{5, 'birth certificate'},\\n{6, 'certificate'},\\n{7, 'birth'},\\n{8, 'certificate format'},\\n{9, 'states of america'}\\n],Lay_Word);\\n\\nCompare each record with all other records and eliminate the record that has repeating word/words that are found when comparing. So the final output should be having the dataset in which there are no repeating words in same order when compared to other records.\\n\\nOutput :\\n\\nID\\tWord\\n1\\tunited states of america\\n4\\tunited states president\\n5\\tbirth certificate\\n8\\tcertificate format\\n\\nHow can i achieve the same in ECL?\\n\\nTried using the project and normalize, but it didnt help.. \\n\\nLay_Word := Record\\n\\tInteger ID;\\n\\tString Word;\\n\\tBoolean Is_SubSet_Word := false;\\nEnd;\\n\\nds_word := dataset([\\n{1, 'united states of america'},\\n{2, 'united states'},\\n{3, 'america'},\\n{4, 'united states president'},\\n{5, 'birth certificate'},\\n{6, 'certificate'},\\n{7, 'birth'},\\n{8, 'certificate format'}\\n],Lay_Word);\\n\\nLay_Word_Output := Record\\n\\tDataset(recordof(ds_word)) child;\\nEnd;\\n\\nrecordof(ds_word) Trans2(ds_word L, integer C, STRING Str, integer N) := Transform\\n\\tself.Is_SubSet_Word := regexfind(L.Word, Str, nocase);\\n\\tself := L;\\nEnd;\\n\\nLay_Word_Output Trans1(Lay_Word L) := Transform\\n\\tds_input := ds_word;\\n\\tLen := count(ds_input);\\n\\tself.child := Normalize(ds_word, Len, Trans2(left, counter, L.word, Len));\\nEnd;\\n\\nds_out := Project(ds_word, Trans1(left));\\n\\nds_out.child(Is_SubSet_Word = false);\\n
\\nPlease help..\\n\\nThanks a lot in advance.\\n\\nRegards,\", \"post_time\": \"2015-09-16 07:32:05\" },\n\t{ \"post_id\": 8142, \"topic_id\": 1884, \"forum_id\": 10, \"post_subject\": \"Re: Function that operates like unpivot in SQL\", \"username\": \"rtaylor\", \"post_text\": \"Katy,\\n\\nThe NORMALIZE function is what you want, like this:\\n\\nRec := RECORD\\n INTEGER4 StudentID ,\\n DECIMAL2_1 Marks1;\\n DECIMAL2_1 Marks2;\\n DECIMAL2_1 Marks3;\\nEND;\\t\\n\\nStudent := DATASET([{1, 5.6, 7.3, 4.2},\\n {2, 4.8, 7.9, 6.5},\\n {3, 6.8, 6.6, 8.9},\\n {4, 8.2, 9.3, 9.1},\\n {5, 6.2, 5.4, 4.4}],Rec);\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\nStudent;\\n\\n//UNPIVOT like this:\\noutRec := RECORD\\n INTEGER4 StudentID;\\n STRING6 MarksNo;\\n DECIMAL2_1 MarksRecd;\\nEND;\\t\\n\\noutRec XF(Rec L, INTEGER C) := TRANSFORM\\n SELF.MarksNo := 'Marks' + (STRING1)C;\\n SELF.MarksRecd := CHOOSE(C,L.Marks1,L.Marks2,L.Marks3);\\n SELF := L;\\nEND; \\nUnpivot := NORMALIZE(Student,3,XF(LEFT,COUNTER));\\nUnpivot;
This code duplicates the example SQL code on the page you referenced.\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-09-17 19:18:18\" },\n\t{ \"post_id\": 8140, \"topic_id\": 1884, \"forum_id\": 10, \"post_subject\": \"Function that operates like unpivot in SQL\", \"username\": \"KatyChow\", \"post_text\": \"Hello,\\n\\nIs there a built in function in HPCC that operates like unpivot in SQL? If so where can I find it and is there an example I can reference?\\n\\nLink to example: http://www.sqlservercurry.com/2011/01/u ... erver.html\\n\\nThank you!\", \"post_time\": \"2015-09-17 16:54:39\" },\n\t{ \"post_id\": 8206, \"topic_id\": 1894, \"forum_id\": 10, \"post_subject\": \"Re: Integrate Visualizations\", \"username\": \"iemem15\", \"post_text\": \"Yes I'v made the examples\\n\\nWhat I'm trying to do is to use the framework from github. I have trouble understanding how can I run the demos or examples to get the data. Do I need to create the XML templates and the manifest.\\n\\nI have looked when installing from source I get HPCC-Platform/esp/src/Visualization, and there's the objects from github.\\n\\nI'v looked that there's an example of a dashboard, but I don't know how to publish it.\\n\\nThanks in advance.\", \"post_time\": \"2015-09-24 19:39:46\" },\n\t{ \"post_id\": 8168, \"topic_id\": 1894, \"forum_id\": 10, \"post_subject\": \"Re: Integrate Visualizations\", \"username\": \"rtaylor\", \"post_text\": \"Have you looked at this doc? http://cdn.hpccsystems.com/install/docs/3_4_2_1/visualizing_ecl_results.pdf\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-09-22 17:22:42\" },\n\t{ \"post_id\": 8160, \"topic_id\": 1894, \"forum_id\": 10, \"post_subject\": \"Integrate Visualizations\", \"username\": \"iemem15\", \"post_text\": \"Is there any example for using https://github.com/hpcc-systems/Visualization\\n\\nI have used the XSLT an do some templates, Do I need to put the html code or is there aother way to use it?\\n\\nAre there any exmples, I couldn't figure out how to add the results from thor or roxie or even manual dataset.\\n\\nThanks in advance...\", \"post_time\": \"2015-09-21 22:23:35\" },\n\t{ \"post_id\": 8198, \"topic_id\": 1900, \"forum_id\": 10, \"post_subject\": \"Re: Specifying remote dali using ecl publish\", \"username\": \"JimD\", \"post_text\": \"I updated the PDF yesterday after I discovered it was missing from the documentation.\\n\\nhttps://track.hpccsystems.com/browse/HPCC-14275\\n\\nThanks for your question.\\n\\nJim\", \"post_time\": \"2015-09-24 12:12:27\" },\n\t{ \"post_id\": 8176, \"topic_id\": 1900, \"forum_id\": 10, \"post_subject\": \"Re: Specifying remote dali using ecl publish\", \"username\": \"xili\", \"post_text\": \"Thanks! That works perfectly. I also just found that is listed in the options using command "ecl help publish", just not listed in the pdf.\", \"post_time\": \"2015-09-23 16:41:13\" },\n\t{ \"post_id\": 8174, \"topic_id\": 1900, \"forum_id\": 10, \"post_subject\": \"Re: Specifying remote dali using ecl publish\", \"username\": \"JimD\", \"post_text\": \"You can specify that using the --daliip= parameter. \\n\\nfor example:\\n\\necl publish --target=roxie --daliip=127.0.0.1 --name=FindPersonService -a findperson.ecl
\", \"post_time\": \"2015-09-23 16:12:21\" },\n\t{ \"post_id\": 8172, \"topic_id\": 1900, \"forum_id\": 10, \"post_subject\": \"Specifying remote dali using ecl publish\", \"username\": \"xili\", \"post_text\": \"Hello,\\n\\nI would like to publish a Roxie query using the command line utility - ecl publish, but with a remote dali. I checked the HPCC Client Tools document, but couldn't find a way to specify a remote dali in the ecl publish command. I was wondering if it is possible to do that or is there any alternative?\\n\\nMany thanks,\\nXi\", \"post_time\": \"2015-09-23 09:50:22\" },\n\t{ \"post_id\": 8194, \"topic_id\": 1902, \"forum_id\": 10, \"post_subject\": \"Re: Detecting if a DATASET Exists\", \"username\": \"mrumsey\", \"post_text\": \"I used:\\n\\nSTD.File.FileExists('~foreign::10.194.12.1::insurview::correlator::quarterly::2016Q1')
\\n\\nAnd still got the error. I pulled that IP address from the _Control.IPAddress location that I found under the ut.foreign_prod function. \\n\\nI will report this in JIRA and I'll have to manually adjust the code for now.\\n\\nThanks,\\n\\nMatt Rumsey\", \"post_time\": \"2015-09-23 19:18:51\" },\n\t{ \"post_id\": 8192, \"topic_id\": 1902, \"forum_id\": 10, \"post_subject\": \"Re: Detecting if a DATASET Exists\", \"username\": \"rtaylor\", \"post_text\": \"Should work then. Report the issue in JIRA, please\", \"post_time\": \"2015-09-23 19:12:12\" },\n\t{ \"post_id\": 8190, \"topic_id\": 1902, \"forum_id\": 10, \"post_subject\": \"Re: Detecting if a DATASET Exists\", \"username\": \"mrumsey\", \"post_text\": \"I am currently using:\\n\\nut.foreign_prod+'File_Location'
\\n\\nWill this not work with the STD.File functions? Will I need to bypass the foreign_prod function?\", \"post_time\": \"2015-09-23 19:11:03\" },\n\t{ \"post_id\": 8188, \"topic_id\": 1902, \"forum_id\": 10, \"post_subject\": \"Re: Detecting if a DATASET Exists\", \"username\": \"rtaylor\", \"post_text\": \"Matt,\\n\\nYou need to get your filename syntax correct. Look at this doc: http://hpccsystems.com/download/docs/ecl-language-reference/html/Foreign_Files.html\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-09-23 19:09:04\" },\n\t{ \"post_id\": 8186, \"topic_id\": 1902, \"forum_id\": 10, \"post_subject\": \"Re: Detecting if a DATASET Exists\", \"username\": \"mrumsey\", \"post_text\": \"If I think about it for a second...I am on the Development server and I am checking a file (Superfile - Insurview) from the Production server. Could it be that my dev thor isn't given permissions to query the Prod DFU?\", \"post_time\": \"2015-09-23 18:58:51\" },\n\t{ \"post_id\": 8184, \"topic_id\": 1902, \"forum_id\": 10, \"post_subject\": \"Re: Detecting if a DATASET Exists\", \"username\": \"rtaylor\", \"post_text\": \"Matt,\\n\\nIs your IMPORT STD; missing?\\n\\nOr perhaps the parameter you're passing is not a filename in the format DFU requires?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-09-23 18:57:10\" },\n\t{ \"post_id\": 8182, \"topic_id\": 1902, \"forum_id\": 10, \"post_subject\": \"Re: Detecting if a DATASET Exists\", \"username\": \"mrumsey\", \"post_text\": \"Thanks Richard!\\n\\nI am getting the following error when I use STD.File.FileExists:\\n\\nError: System error: -1: Graph[1], if[4]: SLAVE #75 [10.194.72.75:16600]: No access to Dali - this normally means a plugin call is being called from a thorslave, (0, 0), -1,
\\n\\nAny idea what is going on here? Is this function blocked?\", \"post_time\": \"2015-09-23 18:54:47\" },\n\t{ \"post_id\": 8180, \"topic_id\": 1902, \"forum_id\": 10, \"post_subject\": \"Re: Detecting if a DATASET Exists\", \"username\": \"rtaylor\", \"post_text\": \"Matt,\\n\\nYou're trying to use the ECL EXISTS() function (which checks for the existence of any records in its recordset parameter) to see if there's a file on disk. That will never work.\\n\\nWhat you need is the STD.File.FileExists() function from the Standard Library.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-09-23 18:13:34\" },\n\t{ \"post_id\": 8178, \"topic_id\": 1902, \"forum_id\": 10, \"post_subject\": \"Detecting if a DATASET Exists\", \"username\": \"mrumsey\", \"post_text\": \"Hello HPCC Forums,\\n\\nI am trying to check if a dataset exists before accessing it. I am working with a superfile across multiple years. At some points, sub-files may not exist yet. I need to see if the sub-file exists before accessing it; however, the compiler seems to be too smart. \\n\\nUsing CATCH or IF(EXISTS()) logic returns an error if the dataset doesn't exist.\\n\\n\\nMyFile := SuperFile.Yearly.YearFile + //works\\n SuperFile.Quarterly.(Year-1)+'Q4_File' + //works\\n SuperFile.Quarterly.(Year+1)+'Q1_File'; //Works if file sub-file exists.\\n
\\n\\nI've replaced SuperFile.Quarterly.(Year+1)+'Q1_File'
with both\\n\\nIF(EXISTS(SuperFile.Quarterly.(Year+1)+'Q1_File'), \\n SuperFile.Quarterly.(Year+1)+'Q1_File', \\n DATASET([], SuperFile.RecordStructure)\\n );\\n\\nAND \\n\\nCATCH(SuperFile.Quarterly.(Year+1)+'Q1_File', SKIP);\\n
\\n\\nNeither replacement has worked. Does anyone have any suggestions? \\n\\nThanks in advance!\\n\\nMatt Rumsey\", \"post_time\": \"2015-09-23 17:53:20\" },\n\t{ \"post_id\": 8270, \"topic_id\": 1924, \"forum_id\": 10, \"post_subject\": \"hpcc-streaming-kafka - Dali SDS client transactions error\", \"username\": \"iemem15\", \"post_text\": \"I'm running the hpcc-streaming-kafka, when I schedule the BuildIndex_Scheduler.ecl, I started receiving \\n"WARNING: Excessive concurrent Dali SDS client transactions. Transaction delayed."
\\nAfter 3 retires I received form mythor log\\n000007BE 2015-10-10 13:35:04.367 20100 26066 "ERROR: -6: Graph[79], SOAP_datasetdataset[81]: SLAVE #1 [192.168.198.1:20100]: <Error><text>timeout expired Target: C!127.0.0.1, Raised in: /var/lib/jenkins/workspace/CE-Candidate-withplugins-5.2.8-1/CE/ubuntu-15.04-amd64/HPCC-Platform/system/jlib/jsocket.cpp, line 1524</text><url>http://127.0.0.1:8010/WsPackageProcess</url></Error>, "\\n000007BF 2015-10-10 13:35:04.367 20100 26066 "INFORM [EXCEPTION]"\\n000007C0 2015-10-10 13:35:04.367 20100 26066 "ERROR: -6: Graph[79], SOAP_datasetdataset[81]: SLAVE #1 [192.168.198.1:20100]: <Error><text>timeout expired Target: C!127.0.0.1, Raised in: /var/lib/jenkins/workspace/CE-Candidate-withplugins-5.2.8-1/CE/ubuntu-15.04-amd64/HPCC-Platform/system/jlib/jsocket.cpp, line 1524</text><url>http://127.0.0.1:8010/WsPackageProcess</url></Error>, "\\n000007C1 2015-10-10 13:35:04.367 20100 26066 "Posting exception: Graph[79], SOAP_datasetdataset[81]: SLAVE #1 [192.168.198.1:20100]: <Error><text>timeout expired Target: C!127.0.0.1, Raised in: /var/lib/jenkins/workspace/CE-Candidate-withplugins-5.2.8-1/CE/ubuntu-15.04-amd64/HPCC-Platform/system/jlib/jsocket.cpp, line 1524</text><url>http://127.0.0.1:8010/WsPackageProcess</url></Error>, to agent 192.168.198.1 for workunit(W20151010-130405)"\\n000007C2 2015-10-10 13:35:04.373 20100 26066 "INFORM [EXCEPTION]"\\n000007C3 2015-10-10 13:35:04.373 20100 26066 "Abort condition set - activity(workunitwrite, 83)"\\n000007C4 2015-10-10 13:35:04.373 20100 26066 "Abort condition set - activity(firstn, 82)"\\n000007C5 2015-10-10 13:35:04.373 20100 26066 "Abort condition set - activity(SOAP_datasetdataset, 81)"\\n000007C6 2015-10-10 13:35:04.373 20100 26066 "Abort condition set - activity(inlinetable, 80)"\\n000007C7 2015-10-10 13:35:04.376 20100 20100 " - graph(graph9, 79) : Graph[79], SOAP_datasetdataset[81]: SLAVE #1 [192.168.198.1:20100]: <Error><text>timeout expired Target: C!127.0.0.1, Raised in: /var/lib/jenkins/workspace/CE-Candidate-withplugins-5.2.8-1/CE/ubuntu-15.04-amd64/HPCC-Platform/system/jlib/jsocket.cpp, line 1524</text><url>http://127.0.0.1:8010/WsPackageProcess</url></Error>, "\\n000007C8 2015-10-10 13:35:04.386 20100 26066 ",Timing,ThorGraph,mythor,W20151010-130405,9,79,1,1500502,FAILED,mythor,thor.thor,thor_roxie.thor"\\n000007C9 2015-10-10 13:35:04.387 20100 20100 "ERROR: -6: /var/lib/jenkins/workspace/CE-Candidate-withplugins-5.2.8-1/CE/ubuntu-15.04-amd64/HPCC-Platform/thorlcr/graph/thgraphmaster.cpp(2113) : Graph[79], Graph[79], SOAP_datasetdataset[81]: SLAVE #1 [192.168.198.1:20100]: <Error><text>timeout expired Target: C!127.0.0.1, Raised in: /var/lib/jenkins/workspace/CE-Candidate-withplugins-5.2.8-1/CE/ubuntu-15.04-amd64/HPCC-Platform/system/jlib/jsocket.cpp, line 1524</text><url>http://127.0.0.1:8010/WsPackageProcess</url></Error>, "\\n000007CA 2015-10-10 13:35:04.387 20100 20100 "ERROR: -6: Graph[79], Graph[79], SOAP_datasetdataset[81]: SLAVE #1 [192.168.198.1:20100]: <Error><text>timeout expired Target: C!127.0.0.1, Raised in: /var/lib/jenkins/workspace/CE-Candidate-withplugins-5.2.8-1/CE/ubuntu-15.04-amd64/HPCC-Platform/system/jlib/jsocket.cpp, line 1524</text><url>http://127.0.0.1:8010/WsPackageProcess</url></Error>, "\\n000007CB 2015-10-10 13:35:04.387 20100 20100 "INFORM [EXCEPTION]"\\n000007CC 2015-10-10 13:35:04.387 20100 20100 "ERROR: -6: Graph[79], Graph[79], SOAP_datasetdataset[81]: SLAVE #1 [192.168.198.1:20100]: <Error><text>timeout expired Target: C!127.0.0.1, Raised in: /var/lib/jenkins/workspace/CE-Candidate-withplugins-5.2.8-1/CE/ubuntu-15.04-amd64/HPCC-Platform/system/jlib/jsocket.cpp, line 1524</text><url>http://127.0.0.1:8010/WsPackageProcess</url></Error>, "\\n000007CD 2015-10-10 13:35:04.387 20100 20100 "INFORM [EXCEPTION]"\\n
\\n\\nThis is Code is from the DeployPackage.ecl\\n\\nIMPORT $, STD;\\n\\nListAllQueries := {\\n STRING QuerySetName { XPATH('QuerySetName') },\\n STRING filter { XPATH('Filter') },\\n STRING ClusterName { XPATH('ClusterName') },\\n STRING FilterType { XPATH('FilterType') }\\n};\\n\\nQueryNameLayout := {\\n STRING query_name {maxlength(1024), xpath('Name')}\\n};\\n\\n// List of All Queries\\nDeployedQueries := FUNCTION \\n requestDataset := DATASET([{'roxie', '', '', 'All'}], ListAllQueries);\\n response := \\n SOAPCALL(\\n requestDataset, \\n $.constants.roxieUrl_WsWorkunits, \\n 'WUQuerysetDetails', \\n ListAllQueries,\\n TRANSFORM(LEFT),\\n DATASET(QueryNameLayout),\\n literal,\\n XPATH('WUQuerySetDetailsResponse/QuerysetQueries/QuerySetQuery')\\n );\\n \\n // remove data queries and dedup\\n RETURN DEDUP(response(query_name[1..9] != '_roxiepkg'), query_name, ALL);\\nEND;\\n\\n// Filter out the queries for telematics project\\ntelematicsqueries := DeployedQueries(query_name[1..10] = 'telematics');\\n\\nRecWithSuperFile := RECORD\\n STRING queryname;\\n STRING superfile;\\nEND;\\n\\n// Right now the ROXIE Query names are hardcoded. This will change with newer version to get all queries deployed on ROXIE and build the package against all queries.\\ngetSuperFile(STRING queryname) := FUNCTION\\n mapOfQueries := \\n MAP((queryname = 'telematics_service_accdec') => $.files.SUPERKEY_ACCDEC,\\n (queryname = 'telematics_service_km_by_speed') => $.files.SUPERKEY_SPEED,\\n '');\\n RETURN mapOfQueries; \\nEND;\\n\\nRecWithSuperFile Xform(DeployedQueries dq) := TRANSFORM\\n SELF.queryname := dq.query_name;\\n SELF.superfile := getSuperFile(dq.query_name); \\nEND;\\n\\nds_results := PROJECT(telematicsqueries, Xform(LEFT));\\n// ds_results;\\n\\n// Form Package for Each Query\\npack_for_query1 := \\n $.CreatePackageMapString(ds_results[1].queryName, \\n ds_results[1].superfile, \\n STD.Str.FindReplace(ds_results[1].superfile, '~', ''));\\n \\npack_for_query2 := \\n $.CreatePackageMapString(ds_results[2].queryName, \\n ds_results[2].superfile, \\n STD.Str.FindReplace(ds_results[2].superfile, '~', ''));\\n\\ncompletePackage := '<RoxiePackages>' + pack_for_query1 + pack_for_query2 +'</RoxiePackages>'; \\n// completePackage;\\n\\nRequestLayout := RECORD\\n STRING packageMapData {XPATH('Info')};\\n BOOLEAN overwritePackage {XPATH('OverWrite')};\\n BOOLEAN activatePackage {XPATH('Activate')};\\n STRING targetCluster {XPATH('Target')};\\n STRING packageMapID {XPATH('PackageMap')};\\n STRING Process {XPATH('Process')};\\n STRING DaliIp {XPATH('DaliIp')};\\nEND;\\n\\nrequest := \\n DATASET(\\n [{completePackage,\\n TRUE,\\n TRUE,\\n $.constants.Roxie_Clustername,\\n $.constants.Package_Name,\\n '*',\\n $.constants.Dali_IP\\n }],\\n RequestLayout\\n );\\n\\nResponseLayout := RECORD\\n STRING code {XPATH('Code')};\\n STRING description {XPATH('Description')};\\nEND;\\n\\nEXPORT DeployPackage := \\n SOAPCALL(\\n request,\\n $.constants.RoxieUrl_WsPackageProcess,\\n 'AddPackage',\\n RequestLayout,\\n TRANSFORM(LEFT),\\n DATASET(ResponseLayout),\\n XPATH('AddPackageResponse/status')\\n );
\\n\\nDo you have any ideas on why is giving this exceptions, also I can't query the ECL watch.\\n\\nThanks,\", \"post_time\": \"2015-10-10 19:16:42\" },\n\t{ \"post_id\": 8318, \"topic_id\": 1936, \"forum_id\": 10, \"post_subject\": \"Re: External Language Support- R plugin\", \"username\": \"Anjali\", \"post_text\": \"I have one more question,\\n\\nwhile installing HPCC server platform with plugins,i was asked to install the dependencies libRcpp.so and libRinside.so. I search for a suitable download link for long,but couldn't able to find one.Instead of libRcpp.so i got R-Rcpp package download link.Is both are same?\\n\\nElse can anyone provide a direction?\\n\\n\\nThanks,\\nAnjali\", \"post_time\": \"2015-10-16 04:21:34\" },\n\t{ \"post_id\": 8316, \"topic_id\": 1936, \"forum_id\": 10, \"post_subject\": \"Re: External Language Support- R plugin\", \"username\": \"Anjali\", \"post_text\": \"Hi,\\n\\nThank you Tlhumphrey..\\n\\nLet me try it using 5.4.0-1.\\nAre you using local environment or 5.4.0-1-with plugins version?\\n\\nThanks,\\nAnjali\", \"post_time\": \"2015-10-16 04:12:51\" },\n\t{ \"post_id\": 8314, \"topic_id\": 1936, \"forum_id\": 10, \"post_subject\": \"Re: External Language Support- R plugin\", \"username\": \"Anjali\", \"post_text\": \"Hi,\\n\\nThank you Richard..\\nI will surely try it out using BEGINC++,but my intention here is to explore external language support for R language.\\n\\n\\nThanks,\\nAnjali\", \"post_time\": \"2015-10-16 04:05:37\" },\n\t{ \"post_id\": 8308, \"topic_id\": 1936, \"forum_id\": 10, \"post_subject\": \"Re: External Language Support- R plugin\", \"username\": \"tlhumphrey2\", \"post_text\": \"I tried your code on the HPCC Platform 5.4.0-1 and I got no errors. Plus, the testSet results were output correctly, i.e. [2,1,3].\", \"post_time\": \"2015-10-15 17:06:29\" },\n\t{ \"post_id\": 8304, \"topic_id\": 1936, \"forum_id\": 10, \"post_subject\": \"Re: External Language Support- R plugin\", \"username\": \"rtaylor\", \"post_text\": \"Anjali,\\n\\nYes, passing sets to functions written in other embedded languages can be problematic. Take a look at the BEGINC++ docs to see how they have to be handled in C++. Hopefully that will help.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-10-15 14:15:45\" },\n\t{ \"post_id\": 8302, \"topic_id\": 1936, \"forum_id\": 10, \"post_subject\": \"External Language Support- R plugin\", \"username\": \"Anjali\", \"post_text\": \"Hi,\\n\\nI am trying to explore external language support feature for R.\\nI am using my local environment (by running HPCCSystemVM 5.2.4-1 in VMware player) for running the code.\\n\\nIn order to test R integration, i have submitted the below code in thor.\\nIMPORT R;\\n integer add1(integer val) := EMBED(R)\\n val+1\\n ENDEMBED;\\n\\n string cat(varstring what, string who) := EMBED(R)\\n paste(what,who)\\n ENDEMBED;\\n\\n data testData(data val) := EMBED(R)\\n val[1] = val[2];\\n val;\\n ENDEMBED;\\n\\n set of integer testSet(set of integer val) := EMBED(R)\\n t = val[1];\\n val[1] = val[2];\\n val[2] = t;\\n val;\\n ENDEMBED;\\n\\n add1(10); \\n cat('Hello', 'World');\\n testData(D'ab');\\n testSet([1,2,3]);
\\n\\nSyntax check was success.But while submitting,all the function calls giving proper result except the last one(testset()).This function call throwing an error saying Error: 0: Rembed: Unsupported parameter type (0, 0), 0,
\\n\\nFrom further analysis i realized that,i will get the same error for any function call having sets (set of strings,set of integer...)as parameter type.\\n\\nCan any one help me to figure it out?\\n\\nAlso it would be great if anyone can give a guideline for R language support in ECL,as i am struggling with limited documentations.\\n\\nThanks,\\nAnjali\", \"post_time\": \"2015-10-15 12:43:32\" },\n\t{ \"post_id\": 22733, \"topic_id\": 1944, \"forum_id\": 10, \"post_subject\": \"Re: Emailing dataset\", \"username\": \"rtaylor\", \"post_text\": \"James,\\n\\nHere's a single FUNCTIONMACRO that does what you need -- produces a single string containing all fields and records from a dataset in CSV format (including newline characters between CSV records):\\nEXPORT DatasetAsCsvString(InDS, IncludeHeader = TRUE) := FUNCTIONMACRO\\n\\n StringRec := {STRING Str};\\n\\n #DECLARE(HdrStr)\\n #DECLARE(RecStr)\\n #DECLARE(comma)\\n #SET(comma,0)\\n #EXPORTXML(out, InDS)\\n #FOR (out)\\n #FOR (Field)\\n #IF (%comma% = 0)\\n #APPEND(HdrStr, %'{@label}'% )\\n #IF (%'{@type}'% = 'string')\\n #APPEND(RecStr, 'TRIM(LEFT.' + %'{@label}'% + ')')\\n #ELSE \\n #APPEND(RecStr, '(STRING)LEFT.' + %'{@label}'% )\\n #END \\n #SET(comma,1)\\n #ELSE\\n #APPEND(HdrStr, ',' + %'{@label}'% )\\n #IF (%'{@type}'% = 'string')\\n #APPEND(RecStr, ' + \\\\',\\\\' + TRIM(LEFT.' + %'{@label}'% + ')' )\\n #ELSE \\n #APPEND(RecStr, ' + \\\\',\\\\' + (STRING)LEFT.' + %'{@label}'% )\\n #END\\n #END\\n #END\\n #END\\n\\n LayoutStr := %'HdrStr'% ;\\n\\n Recs := PROJECT(InDS,TRANSFORM(StringRec,SELF.Str := #EXPAND(%'RecStr'%)));\\n RecsOut := ROLLUP(Recs,1=1,TRANSFORM(StringRec,\\n SELF.Str := LEFT.Str + '\\\\n' + RIGHT.Str));\\n\\n RETURN IF(IncludeHeader,\\n LayoutStr + '\\\\n' + RecsOut[1].Str, \\n RecsOut[1].Str);\\nENDMACRO;
I wouldn't recommend using this with terribly large datasets, but it should work to allow you to attach the result string to an email.\\n\\nI encapsulated the Template Language code in the FUNCTIONMACRO to build both the header record and the PROJECT's TRANSFORM expression in a single pass. The ROLLUP is required to put all the record strings into the single string result you need.\\n\\nI did note that, in your previous MACROS you were using #UNIQUENAME when you meant to use #DECLARE, and that you were using #TEXT where it wasn't strictly necessary (the %'symbol'% text form being sufficient). \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-08-22 18:36:19\" },\n\t{ \"post_id\": 22713, \"topic_id\": 1944, \"forum_id\": 10, \"post_subject\": \"Re: Emailing dataset\", \"username\": \"james.wilson\", \"post_text\": \"Hi Somberi\\n\\nSorry, no, I never did figure it out. I haven't looked at it recently, when I get the chance I'll have a play and see if I can come up with anything.\", \"post_time\": \"2018-08-22 13:22:32\" },\n\t{ \"post_id\": 22703, \"topic_id\": 1944, \"forum_id\": 10, \"post_subject\": \"Re: Emailing dataset\", \"username\": \"somberi\", \"post_text\": \"Hi James,\\n\\nWere you able to get this resolved? Just having the two macros with the template within the functionmacro would solve i guess. \\n\\nThanks\\nRS\", \"post_time\": \"2018-08-22 13:13:55\" },\n\t{ \"post_id\": 18433, \"topic_id\": 1944, \"forum_id\": 10, \"post_subject\": \"Re: Emailing dataset\", \"username\": \"james.wilson\", \"post_text\": \"After a (very!) long delay I'm looking at this again. I've come up with a collection of macros to convert a dataset to a string containing the CSV data to use in SendEmailAttachText. Here it is:\\n\\n\\nSHARED MAC_MakeCsvConcatValue(lay) := MACRO\\n #UNIQUENAME(TheList)\\n #SET(TheList,'')\\n #UNIQUENAME(sep)\\n #SET(sep,'')\\n #UNIQUENAME(out)\\n #EXPORTXML(out, lay)\\n #FOR (out)\\n #FOR (Field)\\n #APPEND(TheList, %'sep'%)\\n #APPEND(TheList, 'L.')\\n #APPEND(TheList, #TEXT(%{@label}%))\\n #SET(sep,' + \\\\',\\\\' + ')\\n #END\\n #END\\n %'TheList'%\\nENDMACRO;\\n\\nSHARED MAC_MakeCsvHeaderList(lay) := MACRO\\n #UNIQUENAME(TheList)\\n #SET(TheList,'')\\n #UNIQUENAME(sep)\\n #SET(sep,'')\\n #UNIQUENAME(out)\\n #EXPORTXML(out, lay)\\n #FOR (out)\\n #FOR (Field)\\n #APPEND(TheList, %'sep'%)\\n #APPEND(TheList, #TEXT(%{@label}%))\\n #SET(sep,',')\\n #END\\n #END\\n %'TheList'%\\nENDMACRO;\\n\\nEXPORT DatasetAsCsvString(InputDataset, IncludeHeader = TRUE) := FUNCTIONMACRO\\n SingleStringLayout := {STRING single_string_field};\\n InputLayout := RECORDOF(InputDataset);\\n SingleStringLayout MakeSingleString(InputLayout L) := TRANSFORM\\n SELF.single_string_field := #EXPAND(MAC_MakeCsvConcatValue(InputLayout));\\n END;\\n RETURN IF(IncludeHeader, MAC_MakeCsvHeaderList(InputLayout) + '\\\\n', '')\\n + UKServices_Utilities.ConcatenateStringFields(PROJECT(InputDataset\\n , MakeSingleString(LEFT)\\n )\\n , single_string_field, '\\\\n');\\nENDMACRO;\\n
\\n\\nThis works. Woo hoo! However when I tried to put it in to a repository and call it I get the error:\\nUnknown identifier "MAC_MakeCsvConcatValue"\\n\\nHere's my test code for calling it:\\n\\nLayout := RECORD\\n STRING string_field1;\\n STRING string_field2;\\n INTEGER integer_field;\\nEND;\\n\\nDs := DATASET([{'a', 'z', 1}\\n , {'b', 'y', 2}\\n , {'c', 'x', 3}\\n ], Layout);\\n\\n\\n// x := DatasetAsCsvString(Ds);\\n// y := DatasetAsCsvString(Ds, FALSE);\\nx := UKServices_Utilities.DatasetAsCsvString(Ds);\\ny := UKServices_Utilities.DatasetAsCsvString(Ds, FALSE);\\n\\nx;\\ny;\\n
\\n\\nPresumably this is because those aren't visible now they're in the repository. Presumably I could split the two MACROs out in to their own files in the repository but I don't really want to do that as they're so specific to DatasetAsCsvString. I tried putting them in to the FUNCTIONMACRO but couldn't get that to work, can anyone cleverer than me work out how to do this?\", \"post_time\": \"2017-07-31 15:29:58\" },\n\t{ \"post_id\": 8338, \"topic_id\": 1944, \"forum_id\": 10, \"post_subject\": \"Re: Emailing dataset\", \"username\": \"rtaylor\", \"post_text\": \"James, Do I have to write separate code for each dataset to get the results in HTML or CSV format?
If you're going to send the data as CSV, then you should just use the SendEmailAttachText() function, since CSV files are just ASCII text anyway. \\n\\nWhichever function you use, you will need to write a function to get each simple dataset result into the format that can attach to the email. The functions should end up with the entire result dataset formatted into a single string for transmission.\\n\\nSince you have several different result files to send with different formats for each, I would suggest emailing each separately.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-10-19 17:31:13\" },\n\t{ \"post_id\": 8336, \"topic_id\": 1944, \"forum_id\": 10, \"post_subject\": \"Re: Emailing dataset\", \"username\": \"james.wilson\", \"post_text\": \"Hi Jim\\n\\nThanks, yes, that's the function I'm using to send the email, it's getting the contents for the attachment parameter that's the issue. Do I have to write separate code for each dataset to get the results in HTML or CSV format?\", \"post_time\": \"2015-10-19 13:24:52\" },\n\t{ \"post_id\": 8334, \"topic_id\": 1944, \"forum_id\": 10, \"post_subject\": \"Re: Emailing dataset\", \"username\": \"JimD\", \"post_text\": \"I would use:\\n\\n\\nSTD.System.Email.SendEmailAttachData \\n
\\n\\nSee https://hpccsystems.com/download/docume ... hData.html\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2015-10-19 13:08:21\" },\n\t{ \"post_id\": 8330, \"topic_id\": 1944, \"forum_id\": 10, \"post_subject\": \"Emailing dataset\", \"username\": \"james.wilson\", \"post_text\": \"I'm looking to send the results of a workunit (gathering some stats) in an email, as the recipients don't have access to Thor. It's a couple of simple datasets with similar but not identical layouts. The best I can some up with at the moment is to use STD.SendEmailAttachData and to write a function to convert the each of the datasets to CSV or HTML. Is there no way HPCC can do this for me? The code must be there for ECL Watch! If I do have to convert them myself do I have to write a function for each layout, or is there some way I can write a macro function that will do any dataset?\", \"post_time\": \"2015-10-19 09:23:00\" },\n\t{ \"post_id\": 8354, \"topic_id\": 1948, \"forum_id\": 10, \"post_subject\": \"Re: Understanding through pipe\", \"username\": \"omnibuzz\", \"post_text\": \"Thank you, Bob. That was helpful.\\nRegards\\nSrini\", \"post_time\": \"2015-10-21 16:13:50\" },\n\t{ \"post_id\": 8350, \"topic_id\": 1948, \"forum_id\": 10, \"post_subject\": \"Re: Understanding through pipe\", \"username\": \"bforeman\", \"post_text\": \"Hi Srini,\\n\\nUse of grep is ok in itself, but that ECL will give the pipe (and grep) a stream of records of { STRING } format.\\nWhich means our internal variable string format, prefixed 4 byte length, followed by data.\\nIt won't see plain text lines, which is what grep is expecting.\\n\\nWhat you probably want is: \\n\\nRec := {STRING Text};\\nds:= DATASET([{'Alpha 1'},{'Alpha 2'},{'Alpha 3'},{'Beta 1'},{ 'Beta 2'},{'Beta 3'}],Rec);\\nds;\\n \\nPIPE(ds,'grep Alpha',CSV,OUTPUT(CSV));\\n
\\nThat tells PIPE that input and output is CSV plain text and will format the input records as such.\\n\\nThere's another problem though, that you won't see on hthor or 1 slave Thor, but will on a N-slave Thor.\\ngrep doesn't like to be run then closed without having seen any input.\\nSo in the above example, slave 1 will see all the data, slave 2->N will see none.\\nAll will run grep, 1 will be fine, 2->N will open/then close the input and grep complains (returns an error code).\\n\\nHTH,\\n\\nBob\\n\\n\\nPS - A big thanks to Jake for providing this insight!\", \"post_time\": \"2015-10-21 15:15:10\" },\n\t{ \"post_id\": 8344, \"topic_id\": 1948, \"forum_id\": 10, \"post_subject\": \"Understanding through pipe\", \"username\": \"omnibuzz\", \"post_text\": \"I am trying to understand how through pipe works. Can you help make this contrived example work?\\nRec := {STRING Text};\\nds:= DATASET([{'Alpha 1'},{'Alpha 2'},{'Alpha 3'},{'Beta 1'},{ 'Beta 2'},{'Beta 3'}],Rec);\\nds;\\n\\nPIPE(ds,'grep Alpha');
\\n\\nI am expecting the result to be a dataset with the first 3 records.\\n\\nAlso, can an example be shown using REPEAT? Here a requirement, If each row contains a folder path in the input dataset and I want to do an "ls" on every row and give it as one single result with all folder contents.\\n\\nThanks\\nSrini\", \"post_time\": \"2015-10-20 12:44:27\" },\n\t{ \"post_id\": 8384, \"topic_id\": 1956, \"forum_id\": 10, \"post_subject\": \"Re: Dealing with word documents\", \"username\": \"rtaylor\", \"post_text\": \"ceejac,\\n\\nLook at the "Working with BLOBs" article in the Porgrammer's Guide.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-10-28 15:29:23\" },\n\t{ \"post_id\": 8378, \"topic_id\": 1956, \"forum_id\": 10, \"post_subject\": \"Dealing with word documents\", \"username\": \"ceejac\", \"post_text\": \"Hai,\\n\\nI am working on this PoC which deals with a bunch of word documents as input. I want to merge all these documents into a single text file with each record as each document and store it in hpcc for further processing.Is there any way to incorporate the document conversion also in hpcc?\\n\\nRegards,\\nceejac\", \"post_time\": \"2015-10-27 07:16:26\" },\n\t{ \"post_id\": 8380, \"topic_id\": 1958, \"forum_id\": 10, \"post_subject\": \"Graph Edge NodeMaxRowsProcessed\", \"username\": \"georgeb2d\", \"post_text\": \"I am trying to understand what this column means in ECL graphs. Also its twin, NodeMinRowsProcessed. I thought what it meant was the Max Node that a row was processed on. I had a process where the NodeMaxRowsProcessed was 317 and the NodeMinRowsProcessed was 2. I thought that meant there was no data on nodes 318 to 400, and no data on node 1. I am noticing on some graphs that the number on NodeMaxRowsProcessed is less than NodeMinRowsProcessed, so it probably means the node where the max rows were processed. \\n\\nIf so, does the graph give the max node a row was processed on, as well as the minimum row? So I can see if the data needs to be distributed? Or do I realize that from a different analysis? \\n\\nIs there any documentation or a video on reading the graphs in ECL?\\n\\nThanks,\\nDon\", \"post_time\": \"2015-10-27 15:00:40\" },\n\t{ \"post_id\": 8424, \"topic_id\": 1960, \"forum_id\": 10, \"post_subject\": \"Re: How many times JOIN Get executed in following code ?\", \"username\": \"vyasshub\", \"post_text\": \"Yes, from Graph it looks like its executing once only.\\nThanks for confirming.\", \"post_time\": \"2015-11-05 04:00:05\" },\n\t{ \"post_id\": 8394, \"topic_id\": 1960, \"forum_id\": 10, \"post_subject\": \"Re: How many times JOIN Get executed in following code ?\", \"username\": \"bforeman\", \"post_text\": \"If you look at the graph of your code sample, yes, it looks like the JOIN is only executed once per record.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-10-29 13:20:04\" },\n\t{ \"post_id\": 8382, \"topic_id\": 1960, \"forum_id\": 10, \"post_subject\": \"How many times JOIN Get executed in following code ?\", \"username\": \"vyasshub\", \"post_text\": \"Please let me know how many times does the Join gets executed. My assumption is that the JOIN would be executed only once and its output will be used inside the Transform function 'Trans3()' 4 times.\\n\\n\\nRec1 := RECORD\\n\\tinteger1 sno;\\n\\tSTRING Name;\\nEND;\\n\\nRec2 := RECORD\\n\\tinteger1 sno;\\n\\tSTRING addr;\\nEND;\\n\\nDS1 := DATASET([{1, 'a'},{2, 'b'},{3, 'c'}], Rec1);\\nDS2 := DATASET([{1, 'addr1'},{2, 'addr2'},{3, 'addr3'}], Rec2);\\n\\nSHARED joinDS := JOIN(DS1, DS2, LEFT.Sno = RIGHT.Sno);\\n\\nJoinDS;\\n\\nRec3 := RECORD\\n\\tinteger1 no;\\nEND;\\n\\nDS3 := DATASET([{1},{2},{3},{4}],Rec3);\\n\\nRec3 Trans3(Rec3 L) := TRANSFORM\\n\\tSELF.no := joinDS(Sno = L.no)[1].Sno;\\nEND;\\n\\nDS4 := PROJECT(DS3, Trans3(LEFT));\\nDS4;\", \"post_time\": \"2015-10-28 13:41:56\" },\n\t{ \"post_id\": 8448, \"topic_id\": 1982, \"forum_id\": 10, \"post_subject\": \"Re: How to do multiple match by RegexFind?\", \"username\": \"bforeman\", \"post_text\": \"Richard's example is the best and easiest, but just to show you the flexibility of ECL, here is another approach:\\n\\nstring8 s :='a ab abc';\\n\\nParseWords(STRING LineIn) := FUNCTION\\n PATTERN Ltrs := PATTERN('[A-Za-z]');\\n PATTERN Char := Ltrs | '-' | '\\\\'';\\n TOKEN Word := Char+;\\n ds \\t := DATASET([{LineIn}],{STRING line});\\n RETURN PARSE(ds,line,Word,{STRING10 Pword := MATCHTEXT(Word)},ALL);\\n END;\\n\\t\\t\\nParseWords(s);\\t\\t
\\n\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-11-06 17:13:01\" },\n\t{ \"post_id\": 8440, \"topic_id\": 1982, \"forum_id\": 10, \"post_subject\": \"Re: How to do multiple match by RegexFind?\", \"username\": \"rtaylor\", \"post_text\": \"WeiDong,I wanna make it output like an array of string, including "a","ab","abc".
You don't need Regular expressions to accomplish that. Just use our STD.Str.SplitWords function from the Standard library, like this:\\nIMPORT STD;\\nstring8 s :='a ab abc';\\nSTD.Str.SplitWords(s, ' ');
\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-11-06 15:50:00\" },\n\t{ \"post_id\": 8436, \"topic_id\": 1982, \"forum_id\": 10, \"post_subject\": \"How to do multiple match by RegexFind?\", \"username\": \"WeiDong\", \"post_text\": \"Hello:\\n\\nFirst paste my codes below:\\n\\n\\nstring regstr := '(?<=\\\\\\\\b)\\\\\\\\w+?(?=\\\\\\\\b)';\\nstring8 s :='a ab abc';\\nExport MainEntry := regexfind(regstr,s);\\n
\\n\\nQuestion:\\nI wanna make it output like an array of string, including "a","ab","abc". This works well with "Regex" class using the "Matches" method in C#, but how to do in ECL?\\n\\nThanks!\", \"post_time\": \"2015-11-06 07:12:14\" },\n\t{ \"post_id\": 8542, \"topic_id\": 1990, \"forum_id\": 10, \"post_subject\": \"Re: How to re-assign a new value to a recordSet array?\", \"username\": \"rtaylor\", \"post_text\": \"WeiDong, How to update certain rows' column values and write them back to the original data source file on the landing zone or cluster?
That is simply not possible to do in HPCC. One abiding foundation principle of HPCC is "never throw anything away" so if you are reading data from a file in a particular workunit you are NOT ALLOWED to write back to that same file in the same workunit. This prevents you from ever mistakenly overwriting data that should not have been overwritten.\\n\\nYou CAN write the updated data to a new file on your cluster. You could then run another workunit to read that new file and write it to disk overwriting the original file, if you really really want to, but that defeats the purpose of not allowing writes to files you're reading from.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-11-10 15:01:02\" },\n\t{ \"post_id\": 8532, \"topic_id\": 1990, \"forum_id\": 10, \"post_subject\": \"Re: How to re-assign a new value to a recordSet array?\", \"username\": \"bforeman\", \"post_text\": \"The simplest way to do this is to use a PROJECT, and then update your field values in the TRANSFORM. If it is a static update, the statement in the TRANSFORM would be simple, but if your field value is dynamic, you could possibly synchronize a set of values to update with the PROJECT COUNTER value used as an index into the SET.\\n\\nExample code:\\n\\nMyRec := RECORD\\n\\tSTRING1 Value1;\\n\\tSTRING1 Value2;\\nEND;\\n\\nSomeFile := DATASET([{'C',''},{'C',''},{'A',''},{'B',''},{'A',''}],MyRec);\\n\\nUpdateSet := ['A','B','C','D','E'];\\n\\nMyOutRec := RECORD\\n\\tmyRec.Value1;\\n\\tmyRec.Value2;\\nEND;\\n\\nMyOutRec CatThem(SomeFile Le, INTEGER Cnt) := TRANSFORM\\n \\tSELF.value2 := UpdateSet[Cnt];\\n \\tSELF.value1 := Le.value1;\\nEND;\\n\\nCatRecs := PROJECT(SomeFile,CatThem(LEFT,COUNTER));\\nOUTPUT(CatRecs);
\\n\\n\\n...and then of course just use OUTPUT to update your file on the cluster.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-11-10 13:16:06\" },\n\t{ \"post_id\": 8524, \"topic_id\": 1990, \"forum_id\": 10, \"post_subject\": \"Re: How to re-assign a new value to a recordSet array?\", \"username\": \"WeiDong\", \"post_text\": \"Maybe you misunderstand what I mean:\\n\\nMy codes is ONLY a sample, what I really want to ask is:\\n\\n1) How to update certain rows' column values and write them back to the original data source file on the landing zone or cluster? e.g: If I wanna update from 1st row to 10nd row, and the row has the column named "FirstName". I wanna update them as:\\n\\n1st row's FirstName to "Name1".\\n2nd row's FirstName to "Name2".\\n……\\n10th row's Firstname to "Name10".\\n\\n2) How to update for a certain condition? You know that in the traditional SQL we can use "update xxx set somecolumn1 = new value where……". But how to do that in ECL?\\n\\nThanks!\", \"post_time\": \"2015-11-10 01:11:20\" },\n\t{ \"post_id\": 8466, \"topic_id\": 1990, \"forum_id\": 10, \"post_subject\": \"Re: How to re-assign a new value to a recordSet array?\", \"username\": \"bforeman\", \"post_text\": \"Hi WeiDong,\\n\\nThere are two simple ways to solve this.\\n\\n1. Remove the EXPORT from your OUTPUT action, and then submit the workunit.\\n\\nOR\\n\\n2. Create a Builder Window Runnable file that IMPORTs your EXPORTed DATASET definition, and then simply OUTPUT what you need.\\n\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-11-09 14:30:31\" },\n\t{ \"post_id\": 8456, \"topic_id\": 1990, \"forum_id\": 10, \"post_subject\": \"How to re-assign a new value to a recordSet array?\", \"username\": \"WeiDong\", \"post_text\": \"Hello all:\\n\\nI wrote codes like this as a beginner:\\n\\nLayout_Persons := {\\ninteger4 RecId,\\nstring15 FirstName,\\nstring25 LastName,\\nstring15 MiddleName,\\nstring2 NameSuffix,\\nstring8 FileDate,\\nunsigned2 BureauCode,\\nstring1 MaritalStatus,\\nstring1 Gender,\\nunsigned1 DependentCourt,\\nstring8 Birthday,\\nstring42 StreetAddress,\\nstring20 City,\\nstring2 Status,\\nstring5 ZipCode\\n};\\n\\ntempDBSource := dataset('~Online::william::intro:Persons',Layout_Persons,thor);\\ntempDBSource[1].LastName ='dbf';\\ntempDBSource[2].LastName :='efg';\\nExport MainEntry := Output(tempDBSource,{FirstName,LastName},'~Online::william::intro:PersonsTest',overwrite,update);
\\n\\nHowever the compiler tells me that "WHEN must be used to associate an action with a definition……". So how to solve this?\\n\\nThanks!\", \"post_time\": \"2015-11-09 06:02:07\" },\n\t{ \"post_id\": 22253, \"topic_id\": 2002, \"forum_id\": 10, \"post_subject\": \"Re: How to do multiple "FindReplace" for a single string val\", \"username\": \"Allan\", \"post_text\": \"Just noticed this post.\\n\\nYou dont have to use a library function. The language itself has this functionality:\\n\\ns := 'ABCDE';\\nREGEXREPLACE('[ABD]',s,'Z');\\n
\\n\\nI always use a language feature over a library function if there is a choice as the compiler can be clever with a language construct and might fold the whole expression away to nothing. With a library function it's forced to perform a call (which is not cheap) and all register tracking/ optimizations are lost. \\n\\nYours\\nAllan\", \"post_time\": \"2018-07-03 16:47:10\" },\n\t{ \"post_id\": 8586, \"topic_id\": 2002, \"forum_id\": 10, \"post_subject\": \"Re: How to do multiple "FindReplace" for a single string val\", \"username\": \"rtaylor\", \"post_text\": \"WeiDong,\\n\\nYou can use the STD.Str.SubstituteIncluded() function to do exactly that, just like this:IMPORT STD;\\nSTD.Str.SubstituteIncluded('ABCDE', 'ABD', 'Z');
\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-11-12 15:08:06\" },\n\t{ \"post_id\": 8582, \"topic_id\": 2002, \"forum_id\": 10, \"post_subject\": \"How to do multiple "FindReplace" for a single string value?\", \"username\": \"WeiDong\", \"post_text\": \"Hello:\\n\\n I have a string "ABCDE"\\n\\n with another string array:\\n ['A','B','D']\\n\\n I wanna replace all the letters in Group B From Group A to the letter "Z", so the result is:\\n\\n ZZCZE\\n\\n FindReplace can only accept one string, in the other languages we have foreach /for……loop. But how to implement that in ECL? Please don't use FindReplace three times, because we don't know how many replacement chars there are in the string array. So this solution should be general enough:)\", \"post_time\": \"2015-11-12 02:08:18\" },\n\t{ \"post_id\": 8590, \"topic_id\": 2004, \"forum_id\": 10, \"post_subject\": \"Re: Some General Questions about Collection And Variable\", \"username\": \"rtaylor\", \"post_text\": \"WeiDong,1) In the other programming languages we can controller with array by "Add","Remove" or edit some specific value through the index. But how to cope with them in ECL?
First off (and this is important) -- HPCC is not an RDBMS, which means that it is not an Oracle or MySQL type of tool where a lot is done for you. HPCC uses ISAM files with the caveat that files that are read from in a workunit cannot be overwritten in that same workunit, This prevents you from accidentally destroying valuable data because HPCC is also not an OLTP system -- you are always working with your data in a batch mode, not real-time. Questions:\\n1) How to add a new row?
You could use the ROW() function.2) Delete an existing row?
Simply filter that record out before you write the new file to disk.3) Modify a specific row's "Value" field's content?
The TRANSFORM function accomplishes that, and its simplest implementation is with PROJECT (although you can do it with any function that uses TRANSFORMs).2) Can we re-assign a variable in ECL?
NO. ECL is a declarative language so there is no such thing in ECL as a "variable" -- they are definitions. And just like the Dictionary, each "word" (ECL definition name) has exactly one entry defining what it is (within a given visibility scope).3) If I have a string value ("ABABC"), and I wanna find the index of the first letter A, which function should I use?
STD.STR.Find() is the function I would use.If we cannot and if we wanna re-use the varaible, how can we reuse the same varible for the special usage? And if I have a Module, where there's a shared variable, and there's also a function there, with the same name variable, is that a local variable belonging to the function itself or the module-based global variable?
\\nAll of these questions are the kind of fundamental questions that are already addressed in the online ECL training courses available free to everybody in the world here (in English): https://learn.lexisnexis.com/hpcc and also offered in Mandarin here: https://hpccsystems.com/community/onlinetraining/intros\\n\\nI strongly suggest every new ECL user take the ECL courses first, and THEN start trying to figure out how to apply HPCC to your specific problem space.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-11-12 16:23:25\" },\n\t{ \"post_id\": 8584, \"topic_id\": 2004, \"forum_id\": 10, \"post_subject\": \"Some General Questions about Collection And Variable\", \"username\": \"WeiDong\", \"post_text\": \"Hello all:\\n\\n1) In the other programming languages we can controller with array by "Add","Remove" or edit some specific value through the index. But how to cope with them in ECL?\\n\\nExample:\\n\\n MyStru :=\\n {\\n String Value;\\n };\\n\\n resultCollection := dataset([{'A'},{'B'}],MyStru);\\n
\\n\\nQuestions:\\n1) How to add a new row? \\n2) Delete an existing row? \\n3) Modify a specific row's "Value" field's content?\\n\\n2) Can we re-assign a variable in ECL? This means whether we can do something like this following?\\n\\n\\nstring a:= 'a';\\na := a+'b';\\n
\\n\\nIf we cannot and if we wanna re-use the varaible, how can we reuse the same varible for the special usage? And if I have a Module, where there's a shared variable, and there's also a function there, with the same name variable, is that a local variable belonging to the function itself or the module-based global variable?\\n\\n3) If I have a string value ("ABABC"), and I wanna find the index of the first letter A, which function should I use?\", \"post_time\": \"2015-11-12 05:04:54\" },\n\t{ \"post_id\": 9564, \"topic_id\": 2012, \"forum_id\": 10, \"post_subject\": \"Re: Cannot find java class when embeded with java code\", \"username\": \"kevinLv\", \"post_text\": \"I met with the same issue. In my thor cluster, I could easily call example Java called JavaCat. But those classes I created by myself can not be resolved. Please exchange once you got any solution. Thanks.\", \"post_time\": \"2016-04-22 03:17:29\" },\n\t{ \"post_id\": 8608, \"topic_id\": 2012, \"forum_id\": 10, \"post_subject\": \"Cannot find java class when embeded with java code\", \"username\": \"WeiDong\", \"post_text\": \"Hello:\\n\\nI have a java project code compiled successfully with Eclipse:\\n\\n【This is model】\\npackage common;\\npublic class Student {\\n\\tpublic int getId() {\\n\\t\\treturn id;\\n\\t}\\n\\n\\tpublic void setId(int id) {\\n\\t\\tthis.id = id;\\n\\t}\\n\\n\\tpublic String getName() {\\n\\t\\treturn name;\\n\\t}\\n\\n\\tpublic void setName(String name) {\\n\\t\\tthis.name = name;\\n\\t}\\n\\n\\tpublic int id = 0;\\n\\tpublic String name = "";\\n}
\\n\\n【This is the main static method】\\npackage common;\\n\\nimport java.util.ArrayList;\\n\\nimport common.Student;\\n\\npublic class CommonFuncs {\\n\\tpublic static ArrayList<Student> GetStudents(ArrayList<Student> students) {\\n\\t\\tStudent s = null;\\n\\t\\tfor (int i = 0; i < students.size(); ++i) {\\n\\t\\t\\ts = students.get(i);\\n\\t\\t\\ts.setId(i+1);\\n\\t\\t\\ts.setName("Name" + (i+1));\\n\\t\\t}\\n\\t\\treturn students;\\n\\t}\\n}
\\n\\nNow I tried to use java execution below:\\n\\nImport java;\\n\\nExport TestJava := Module\\n\\tExport Student:={\\n\\tinteger id,\\n\\tstring name\\n};\\n\\tExport getStudents(LINKCOUNTED dataset(Student)stus):=\\n\\t IMPORT(java,\\n 'common/CommonFuncs.GetStudents:(Ljava/util/ArrayList;)Ljava/util/ArrayList;' : classpath('/eclipse/workspace/JavaFuncs/bin/common'));\\nEnd;
\\n\\nHere're my calling codes:\\n\\nImport Funcs.testjava;\\n\\ntestData := dataset([{0,'William Dong Testing'}],testjava.Student);\\nExport MainEntry :=testjava.getStudents(testData);
\\n\\nBoth my java class files are created in c:/eclipse/workspace/JavaFuncs/bin/common/, and my ECL project is also in C:\\\\eclipse\\\\workspace\\\\Test. \\n\\nQuestions:\\n1)\\nIn running, it tells me "System error: 0: javaembed: Failed to resolve class name common.CommonFuncs", so how to solve that problem? Must I copy class file and create the path folders onto the Server's site (I mean I must create the path c:/eclipse/workspace/JavaFuncs/bin/common/, with the two class files copied there)? Is there any way for local debugging or testing?\\n2) \\nJava's class fields can be equalled to ECL's recordSet's fields. Must they be public or just private with public "Getter/Setter"? Is there anything I should change with my codes in Java or ECL?\\n\\nThanks!\", \"post_time\": \"2015-11-17 08:33:41\" },\n\t{ \"post_id\": 8648, \"topic_id\": 2014, \"forum_id\": 10, \"post_subject\": \"Re: Checking headers(column name) of a file before spraying\", \"username\": \"sunilatCTS\", \"post_text\": \"Thanks Richard, I am planning to post this issue in JIRA.\\n\\n\\nRegards,\\nSunil\", \"post_time\": \"2015-11-19 16:56:27\" },\n\t{ \"post_id\": 8646, \"topic_id\": 2014, \"forum_id\": 10, \"post_subject\": \"Re: Checking headers(column name) of a file before spraying\", \"username\": \"rtaylor\", \"post_text\": \"Sunil,Chances are high that your cluster can go down.
Then this is the kind of issue that should go into a JIRA ticket.\\n\\nHTH\\n\\nRichard\", \"post_time\": \"2015-11-19 16:23:53\" },\n\t{ \"post_id\": 8642, \"topic_id\": 2014, \"forum_id\": 10, \"post_subject\": \"Re: Checking headers(column name) of a file before spraying\", \"username\": \"sunilatCTS\", \"post_text\": \"Hi Jim,\\n\\nThanks for the illustration, i have already tried this and i was able to get the output for the Extenallogicalfile name. \\n\\nPlease find the way i want this to be experimented.\\n\\n****WARNING !WARNING !WARNING !WARNING !WARNING !WARNING !WARNING !WARNING ! : Executing the following steps can result in the cluster going down. I have experienced this and the cluster i am using has HPCC version 5.2.4-1.\\n\\n********************************************************************************\\nStep1 : Create inline dataset.Execute the following code.\\n\\nREC := RECORD\\n\\n\\tSTRING Filename;\\n\\tSTRING FileExtension;\\n\\tSTRING FieldHeaders;\\n\\nEND;\\n\\nDS := DATASET([{'employee','csv','employeeid,employeefirstname,employeelastname'},\\n\\t\\t\\t\\t\\t\\t\\t {'project','csv','Projectid, Projectname, Projectlocation'}],REC);\\nDS;\\n\\nOutput(DS,,'~experiment::sample::filelist',OVERWRITE);\\n\\n*********************************************************************************\\nSTEP 2: \\n\\nUnzip the attached .zip file to extract a folder named expfolder and upload this folder to the landing zone path that reflects '/var/lib/HPCCSystems/mydropzone/'\\n\\n*********************************************************************************\\n\\nSTEP 3:\\n\\nSAVE the extracted ECL function expValidateHeaders under your code repository.\\n\\nSTEP 4:\\n\\nCall the above function from a builder window with the folder name 'expfolder' as paramter\\n\\nlet us assume you had saved this code under the folder validator in your repository,\\n\\nIMPORT Validator;\\nValidator.expValidateHeaders('expfolder');\\n\\nChances are hight that your cluster can go down.\", \"post_time\": \"2015-11-18 20:37:39\" },\n\t{ \"post_id\": 8628, \"topic_id\": 2014, \"forum_id\": 10, \"post_subject\": \"Re: Checking headers(column name) of a file before spraying\", \"username\": \"JimD\", \"post_text\": \"Here is some simple code I have that uses STD.File.ExternalLogicalFileName. Remember this function only returns the Logical Filename for a file on a LZ. \\n\\n\\nIMPORT STD;\\nIP := '127.0.0.1'; //Use YOUR IP here\\nfile := '/var/lib/HPCCSystems/mydropzone/OriginalPerson';\\nLayout_Person := RECORD\\n STRING15 FirstName;\\n STRING25 LastName;\\n STRING15 MiddleName;\\n STRING5 Zip;\\n STRING42 Street;\\n STRING20 City;\\n STRING2 State;\\nEND;\\nDS1 := DATASET(STD.File.ExternalLogicalFileName(IP,file),\\n Layout_Person, FLAT);\\nOUTPUT(DS1);\\n\\n
\\n\\nPlease try something like to make sure you can access your Landing zone files, then we can dig deeper into your code.\\n\\nHTH,\\nJim\", \"post_time\": \"2015-11-17 18:50:09\" },\n\t{ \"post_id\": 8626, \"topic_id\": 2014, \"forum_id\": 10, \"post_subject\": \"Checking headers(column name) of a file before spraying\", \"username\": \"sunilatCTS\", \"post_text\": \"I have a requirement by which i need to validate a set of files (in a folder) for their column names to see if they match against an expected value.\\n\\nI have an inline dataset that i have saved as a logical file that contains the list of files that folder needs to have. Each row that represents a file has a field that has the expected headernames separated by comma.\\n\\nthe logic is to read this dataset and query the file that is available in the landing zone(under the designated folder). This i guess could be done by the library function "ExternalLogicalFileName" that could be used to directly read the file from landing zone.I do not want to spray this file and do the validation. the validation needs to happen prior to spraying.\\n\\nHowever upon executing this logic, i get the following error.\\n\\n"Error: System error: -1: Graph[1], diskcount[2]: SLAVE #1 [10.52.40.204:20100]: No access to Dali - this normally means a plugin call is being called from a thorslave, (0, 0), -1, ".\\n\\nI tried using NOTHOR against the statement "ExternalLogicalName" but i got the following error instead.\\n\\nError: NOTHOR expression DATASET in filterout appears to access a parent dataset - this may cause a dataset not active error (49, 84), 4544,\", \"post_time\": \"2015-11-17 17:45:35\" },\n\t{ \"post_id\": 8688, \"topic_id\": 2026, \"forum_id\": 10, \"post_subject\": \"Re: IF always producing TRUE results\", \"username\": \"georgeb2d\", \"post_text\": \"An output is produced. The code I put was not the entire ecl. That is in the workUnit referenced above. I will add more code here:\\n\\nDesprayReport := FileServices.Despray(ReportFileNames.LogicalFile + pseq, ReportFileNames.RemoteServer, RemoteFile, , , , true);\\n\\t\\n\\tSEQUENTIAL(WriteReport, DesprayReport ,dogs);\\n
\\n\\nAfter talking with a more experienced developer, he told me that we make choices at the output level, not the layout level as I had first attempted. He suggested the optimizer might be changing the code to produce outputs I do not want from the layouts.\\n\\nI changed the code to do the following:\\n\\n WriteReportmiles := OUTPUT(MilesSortedOut, \\n , \\n ReportFileNames.LogicalFile, \\n CSV(HEADING(SINGLE)),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t compressed, OVERWRITE); \\n\\t WriteReportKM := OUTPUT(KMSortedOut, \\n , \\n ReportFileNames.LogicalFile, \\n CSV(HEADING(SINGLE)),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t compressed, OVERWRITE); \\n\\t DesprayReport := FileServices.Despray(ReportFileNames.LogicalFile, ReportFileNames.RemoteServer, RemoteFile, , , , true);\\n\\tdoit := IF(Miles_Kilometers IN ['K', 'k', 'KM', 'km'], WriteReportKM, WriteReportmiles);\\n\\tSEQUENTIAL(doit, DesprayReport );\\n
\\n\\nThis works fine. I still do not know why the first attempt did not work as I intended.\", \"post_time\": \"2015-11-25 16:25:43\" },\n\t{ \"post_id\": 8678, \"topic_id\": 2026, \"forum_id\": 10, \"post_subject\": \"Re: IF always producing TRUE results\", \"username\": \"tlhumphrey2\", \"post_text\": \"The fact that you gave your OUTPUT a name, WriteReport, means it isn't executed. So, what ever you see in this file wasn't placed there by this OUTPUT statement.\", \"post_time\": \"2015-11-24 19:07:21\" },\n\t{ \"post_id\": 8676, \"topic_id\": 2026, \"forum_id\": 10, \"post_subject\": \"Re: IF always producing TRUE results\", \"username\": \"georgeb2d\", \"post_text\": \"Curiouser and curiouser. What I want is a format with KM or miles. What I am getting is a format with the layout always in km. However, the calculations are correct for either miles or km. This is very confusing. \\n\\nWU W20151124-131607 on Alpha Dev Thor.\", \"post_time\": \"2015-11-24 18:47:28\" },\n\t{ \"post_id\": 8668, \"topic_id\": 2026, \"forum_id\": 10, \"post_subject\": \"IF always producing TRUE results\", \"username\": \"georgeb2d\", \"post_text\": \"If a variable in the dataset says 'k' I want the output to do a layout in Km. Otherwise I want the layout in miles, as it is today. \\n\\nI get the value of the variable. If it is 'k' it is in the set Constants.CR_KILOMETERS.\\n\\nConstants.CR_KILOMETERS := ['K', 'k', 'KM', 'km'];\\n If it is in that I set I want the dataset projected onto the KM layout from the miles layout. The default is the miles layout. I attempting to use an IF statement to point the direction. \\n\\nHowever, no matter what the value of the variable is, the IF statement always produces the trueresult. \\n\\nA Code snippet:\\n MilesSortedOut := SORT(ReportOut, PolicyNumber, MobileNumber);\\n X.ReportLayouts.KmReportLayout KMForm(MilesSortedOut LE ):= TRANSFORM\\n\\tSELF.EstAnnualRecordedKm := LE.EstAnnualRecordedMiles * X.Constants.SPEED_KPH_CONVERTER;\\n\\tSELF.accumulatedKm := LE.accumulatedmiles * X.Constants.SPEED_KPH_CONVERTER;\\n\\tSELF := LE;\\n END;\\t\\n KmSortedOut := PROJECT(MilesSortedOut, KMForm(LEFT)); \\n\\t\\t\\n Config := PROJECT(Report, X.ReportLayouts.rReportConfigDataLayout)[1];\\n Miles_kilometers := 'm'; //Config.MilesKilometers;\\n SortedOut := IF(Miles_Kilometers IN X.Constants.CR_KILOMETERS, KmSortedOut, MilesSortedOut);\\n ReportFileNames := X.CarrierReportFileNames(Config, 'D');\\n\\t\\tRemoteFile := ReportFileNames.RemoteFile;\\n\\n WriteReport := OUTPUT(SortedOut, \\n , \\n ReportFileNames.LogicalFile, \\n CSV(HEADING(SINGLE)),\\n compressed, OVERWRITE);
\\n\\nAbove I intentionally put in 'm' for Miles_kilometers. It produced Kilometers. No matter what I put the IF statement always puts KmSortedOut as the result. IF I switch the IF statement to have MilesSortedOut as first or the trueresult, it always produces miles.\\n\\nIf I redo the IF statement like:\\nIF(Miles_Kilometers IN X.Constants.CR_KILOMETERS, TRUE, FALSE); I get the correct TRUE or FALSE. I assume I am trying to misuse the IF statement somehow. If so, what should I use?\\n\\nThis is WU W20151124-101651 on Alpha_Dev_Thor.\\n\\nThanks as always,\\nDon\", \"post_time\": \"2015-11-24 15:36:41\" },\n\t{ \"post_id\": 8738, \"topic_id\": 2034, \"forum_id\": 10, \"post_subject\": \"Re: Generating NAMED 'Outputs' from the contents of a DATASE\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nI have to agree with Tim that a full description of the problem you're trying to solve would be most helpful, because it is entirely possible that the NAMED OUTPUT approach to solving it is not the best way to handle the situation.\\n\\nIn reading back to your initial post it appears to me the issue is that you're trying to get a web page to read customer-specific results from a Thor workunit. IOW, you're trying to use Thor as part of a customer-facing solution (rarely a good idea in HPCC ) instead of simply publishing the customer query and its related data to a Roxie that the website can hit. Is that correct?\\n\\nRichard\", \"post_time\": \"2015-12-07 16:08:36\" },\n\t{ \"post_id\": 8734, \"topic_id\": 2034, \"forum_id\": 10, \"post_subject\": \"Re: Generating NAMED 'Outputs' from the contents of a DATASE\", \"username\": \"tlhumphrey2\", \"post_text\": \"It would help me if you provided more details of the problem you are trying to solve: statement of the problem, inputs (with examples), outputs (with examples).\", \"post_time\": \"2015-12-07 14:29:33\" },\n\t{ \"post_id\": 8732, \"topic_id\": 2034, \"forum_id\": 10, \"post_subject\": \"Re: Generating NAMED 'Outputs' from the contents of a DATASE\", \"username\": \"Allan\", \"post_text\": \"Thanks for the reply,\\n\\nUnfortunately I don't know the list of customers as compile time. Only at run-time.\\nI was hoping to isolate customer specific data to a particular named result that is identifiable by processes external to HPCC.\\n\\nHo Hum.\", \"post_time\": \"2015-12-07 10:37:36\" },\n\t{ \"post_id\": 8716, \"topic_id\": 2034, \"forum_id\": 10, \"post_subject\": \"Re: Generating NAMED 'Outputs' from the contents of a DATASE\", \"username\": \"tlhumphrey2\", \"post_text\": \"Would outputting each to a file instead of to the workunit work? For example:\\n
OUTPUT(x[1],,'Customer1_NUM_Contributions',OVERWRITE);\\nOUTPUT(x[2],,'Customer2_NUM_Contributions',OVERWRITE);\\nOUTPUT(x[3],,'Customer3_NUM_Contributions',OVERWRITE);\\n
\", \"post_time\": \"2015-12-02 14:11:14\" },\n\t{ \"post_id\": 8714, \"topic_id\": 2034, \"forum_id\": 10, \"post_subject\": \"Re: Generating NAMED 'Outputs' from the contents of a DATASE\", \"username\": \"Allan\", \"post_text\": \"OK I'm thinking of constructing ECL, for each customer, that I then run as a child WU.\\n\\nThe SOAPCALL to run WU's returns the WUid which the parent can collate into URL's passed to web services.Each customer having its own WU with results in it.\\n\\nHum\", \"post_time\": \"2015-12-02 13:22:50\" },\n\t{ \"post_id\": 8712, \"topic_id\": 2034, \"forum_id\": 10, \"post_subject\": \"Re: Generating NAMED 'Outputs' from the contents of a DATASE\", \"username\": \"Allan\", \"post_text\": \"Just thought, I could despray each record as its own physical file then spray back in identifying the customer in the name of the logical file created!\\n\\nErr - not nice.\", \"post_time\": \"2015-12-02 12:10:02\" },\n\t{ \"post_id\": 8710, \"topic_id\": 2034, \"forum_id\": 10, \"post_subject\": \"Generating NAMED 'Outputs' from the contents of a DATASET\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nMy input is a DATASET of the form:\\n\\nx := DATASET([{'Customer1',45},\\n {'Customer2',77},\\n {'Customer3',28}],{STRING Target,INTEGER NumContributions});\\n
\\nWhat ECL I need to generate from this is:\\n\\na := OUTPUT(x[1],NAMED('Customer1_NUM_Contributions'));\\nb := OUTPUT(x[2],NAMED('Customer2_NUM_Contributions'));\\nc := OUTPUT(x[3],NAMED('Customer3_NUM_Contributions'));\\nPARALLEL(a,b,c);\\n
\\n\\nI'm doing this so that subsequent Web processes can pick up data specific to a customer from the resultant WU.\\n\\nAny ideas how to implement this?\\n\\nI Could just output the entire dataset, the web processes then filtering for a specific customer but this exposes data for all customers to web services outside HPCC.\\n\\nYours\\nAllan\", \"post_time\": \"2015-12-02 10:26:19\" },\n\t{ \"post_id\": 8816, \"topic_id\": 2048, \"forum_id\": 10, \"post_subject\": \"Re: Issue about 'distribution' function\", \"username\": \"rtaylor\", \"post_text\": \"Yijie,Is it possible to let the function return the exact number of unique values instead of giving an estimate for a huge data?
DISTRIBUTION is not a function, it is an action. There is a huge difference between the two. A function may be used in an expression context while an action cannot.\\n\\nYou can get the exact count by either running crosstab report or counting the unique values, something like this:\\nCOUNT(DEDUP(SORT(TABLE(ds,{myfield}),myfield)));\\n
\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-12-10 18:20:09\" },\n\t{ \"post_id\": 8814, \"topic_id\": 2048, \"forum_id\": 10, \"post_subject\": \"Re: Issue about 'distribution' function\", \"username\": \"xueyij01\", \"post_text\": \"[quote="JimD":1fd98qq8]Richard, \\n\\nConsider us notified \\n\\nI have entered a Jira issue:\\n\\nhttps://track.hpccsystems.com/browse/HPCC-14652\\n\\nHTH,\\nJim\\n\\nJim, \\n\\nIs it possible to let the function return the exact number of unique values instead of giving an estimate for a huge data? Thanks.\\n\\nYijie\", \"post_time\": \"2015-12-10 18:05:55\" },\n\t{ \"post_id\": 8812, \"topic_id\": 2048, \"forum_id\": 10, \"post_subject\": \"Re: Issue about 'distribution' function\", \"username\": \"JimD\", \"post_text\": \"Richard, \\n\\nConsider us notified
\\n\\nI have entered a Jira issue:\\n\\nhttps://track.hpccsystems.com/browse/HPCC-14652\\n\\nHTH,\\nJim\", \"post_time\": \"2015-12-10 18:02:06\" },\n\t{ \"post_id\": 8808, \"topic_id\": 2048, \"forum_id\": 10, \"post_subject\": \"Re: Issue about 'distribution' function\", \"username\": \"rtaylor\", \"post_text\": \"Yijie,
when I tried to run the 'dataset' command to use the results of 'distribution', it (W20151210-103808) complained again.
DISTRIBUTION is not a function, it is an action -- meaning it produces a result in the workunit. You can use its NAMED option to name that result so it may be used in a different workunit -- which means you need to use your 'dataset' code in a separate workunit to have it work.. For the second issue, ... I had the following result from the 'distribution' function.\\n\\n<XML><Field name="ln_seqnum" estimate="4000000"/></XML>\\n\\nThe ln_seqnum has more than 28 million unique values.
Thank you, I'll let the doc guys know they need to update the DISTRIBUTION doc with this information.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-12-10 16:56:29\" },\n\t{ \"post_id\": 8804, \"topic_id\": 2048, \"forum_id\": 10, \"post_subject\": \"Re: Issue about 'distribution' function\", \"username\": \"xueyij01\", \"post_text\": \"Richard, \\n\\nThank you for your quick reply. \\n\\n1. Thank you for pointing out the error. I changed the name and it (WUID:W20151210-100458) works. Thanks!\\n\\nHowever, when I commented the 'distribute' command in line 1150, it complained with the same error \\nSystem error: 10023: Graph[1], value Category_9_d in workunit is undefined,\\n
\\nas in W20151210-100530. I run more tests without distributing the dataset. W20151210-102708 showed that 'distribution' worked. However, when I tried to run the 'dataset' command to use the results of 'distribution', it (W20151210-103808) complained again. \\n\\nWould you please check that? Thanks.\\n\\n\\n2. For the second issue, please refer to the job (WUID: W20151210-100000). I had the following result from the 'distribution' function.\\n\\n<XML><Field name="ln_seqnum" estimate="4000000"/></XML>
\\n\\nThe ln_seqnum has more than 28 million unique values.\\n\\n\\nThank you so much!\\n\\nYijie\", \"post_time\": \"2015-12-10 16:26:19\" },\n\t{ \"post_id\": 8792, \"topic_id\": 2048, \"forum_id\": 10, \"post_subject\": \"Re: Issue about 'distribution' function\", \"username\": \"rtaylor\", \"post_text\": \"xueyij01,1. When I specified a attributes name and use the code in the document of 'distribution' function to parse out the XML output.
The problem here is the optional second parameter to DISTRIBUTION (https://hpccsystems.com/download/documentation/ecl-language-reference/html/DISTRIBUTION.html) must be a field in the dataset named as its first parameter. Your syntax looks like you're trying to do a forward reference (never allowed in ECL) to your next definition, and that simply won't work.distribution(ds2,Category_9,named('Category_9')); \\nCategory_9 := dataset(row(transform({string line},self.line := workunit('Category_9',string))));
2. When the dataset is huge, the function just give an estimated value of the number of distinct value. How could I get the exact number?
You can write a simple crosstab report (taught in the beginning of our Intro to Thor classes) to do that. But I've never heard of DISTRIBUTION returning estimated values (and the docs don't say that), so how have you determined this is the case?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-12-09 22:05:06\" },\n\t{ \"post_id\": 8788, \"topic_id\": 2048, \"forum_id\": 10, \"post_subject\": \"Issue about 'distribution' function\", \"username\": \"xueyij01\", \"post_text\": \"Hello \\n\\nI am using the 'distribution' function to find the number of unique values in one attribute. I met a couple of issues. \\n\\n1. When I specified a attributes name and use the code in the document of 'distribution' function to parse out the XML output. \\n\\n\\ndistribution(ds2,Category_9,named('Category_9')); \\nCategory_9 := dataset(row(transform({string line},self.line := workunit('Category_9',string)))); \\n\\nres := RECORD\\n STRING Fieldname := XMLTEXT('@name');\\n STRING Cnt := XMLTEXT('@distinct');\\nEND;\\n\\ndist_Category_9 := PARSE(Category_9,line,res,XML('XML/Field')); \\noutput(dist_Category_9);\\n
\\n\\nIt returns an error message:\\n\\nSystem error: 10023: Graph[1], value Category_9 in workunit is undefined\\n\\nCould someone help me about this error? Thanks.\\n\\n2. When the dataset is huge, the function just give an estimated value of the number of distinct value. How could I get the exact number? Thanks.\", \"post_time\": \"2015-12-09 19:42:37\" },\n\t{ \"post_id\": 8938, \"topic_id\": 2076, \"forum_id\": 10, \"post_subject\": \"Re: How to handle comma with in the double quote\", \"username\": \"JimD\", \"post_text\": \"Great! I am glad it works for you. \\n\\nI used " as the QUOTE option when spraying the file. Double quote is the default when you spray from ECL Watch.\\n\\nJim\", \"post_time\": \"2015-12-18 19:55:52\" },\n\t{ \"post_id\": 8936, \"topic_id\": 2076, \"forum_id\": 10, \"post_subject\": \"Re: How to handle comma with in the double quote\", \"username\": \"sathya.akrec\", \"post_text\": \"Small correction. Not while Despraying while Spraying i made the change of quote to be double quote instead of single quote\", \"post_time\": \"2015-12-18 19:31:26\" },\n\t{ \"post_id\": 8934, \"topic_id\": 2076, \"forum_id\": 10, \"post_subject\": \"Re: How to handle comma with in the double quote\", \"username\": \"sathya.akrec\", \"post_text\": \"Thanks JIM. This one worked after i changed the sourceQuote option while despraying the file to Double quote from Single quote.\", \"post_time\": \"2015-12-18 19:01:54\" },\n\t{ \"post_id\": 8930, \"topic_id\": 2076, \"forum_id\": 10, \"post_subject\": \"Re: How to handle comma with in the double quote\", \"username\": \"JimD\", \"post_text\": \"Here is my code:\\n\\n\\nInputFile2 := '~gopi::gopi.csv';\\n\\nCSVRecord := RECORD\\n string field1;\\n string field2;\\n string field3;\\n string field4;\\nEND;\\n\\nds_data3 := DATASET(InputFile2,\\n CSVrecord,\\n CSV(HEADING(1),\\n SEPARATOR([',','\\\\t']), \\n TERMINATOR(['\\\\n','\\\\r\\\\n','\\\\n\\\\r'])));\\n\\nOutput(ds_data3, Named('ds_data3'));\\n
\", \"post_time\": \"2015-12-18 17:09:46\" },\n\t{ \"post_id\": 8924, \"topic_id\": 2076, \"forum_id\": 10, \"post_subject\": \"Re: How to handle comma with in the double quote\", \"username\": \"sathya.akrec\", \"post_text\": \"Jim,\\n\\nCould you please paste the code that you tried. When i tried removing the Escape definition i am still getting the output as \\n\\nField1 Field2 Field3 Field4\\n1\\tgopi\\t123\\tFALSE\\n2\\t"sat\\thya"\\t456\\n\\nThanks,\\nSathya\", \"post_time\": \"2015-12-18 16:43:49\" },\n\t{ \"post_id\": 8922, \"topic_id\": 2076, \"forum_id\": 10, \"post_subject\": \"Re: How to handle comma with in the double quote\", \"username\": \"JimD\", \"post_text\": \"I created a similar csv file and sprayed it:\\n\\nHere are my file contents:\\n\\nField1,Field2,Field3,Field4\\n1,gopi,123,FALSE\\n2,"sat,hya",456,FALSE\\n
\\n\\nI removed the Escape('"') portion of your DATASET definition and ran your code.\\nThe results were:\\n\\n\\n\\t1\\tgopi 123 FALSE\\n\\t2\\tsat,hya 456 FALSE\\n
\\nHTH,\\n\\nJim\", \"post_time\": \"2015-12-18 16:20:23\" },\n\t{ \"post_id\": 8918, \"topic_id\": 2076, \"forum_id\": 10, \"post_subject\": \"How to handle comma with in the double quote\", \"username\": \"gopi\", \"post_text\": \"Hi,\\n\\nHow to handle comma with in the double quote while reading CSV file?\\n\\nFor example input have file field values have comma with in double quote. \\n\\n\\n/* CSV File Data\\nfield1,field2,field3,field4\\n1,gopi,9884382626,false\\n2,"sat,hya",123456789,false\\n*/\\n\\nInputFile1 := '~innovation::test_gopi::Sample1.spray';\\nInputFile2 := '~innovation::test_gopi::Sample2_double_quote.spray';\\n\\nCSVRecord := RECORD\\n string field1;\\n string field2;\\n string field3;\\n\\tstring field4;\\nEND;\\n\\nds_data3 := DATASET(InputFile2,\\n CSVrecord,\\n CSV(HEADING(1),\\n SEPARATOR([',','\\\\t']),\\n\\t\\t Escape('"'),\\n TERMINATOR(['\\\\n','\\\\r\\\\n','\\\\n\\\\r'])));\\n\\nOutput(ds_data3, Named('ds_data3'));\\n
\\n\\nExpected Output \\n\\nfield1,field2,field3,field4\\n1,gopi,9884382626,false\\n2,"sathya",123456789,false\", \"post_time\": \"2015-12-18 15:18:51\" },\n\t{ \"post_id\": 8968, \"topic_id\": 2086, \"forum_id\": 10, \"post_subject\": \"Re: XML Output Question\", \"username\": \"chuck.beam\", \"post_text\": \"Thanks Richard! That worked!\", \"post_time\": \"2015-12-22 16:42:50\" },\n\t{ \"post_id\": 8964, \"topic_id\": 2086, \"forum_id\": 10, \"post_subject\": \"Re: XML Output Question\", \"username\": \"rtaylor\", \"post_text\": \"Chuck,\\n\\nTry it this way:SHARED Primary_Holder_Addr := AI_Common.Layouts.Primary_Holder_Addr;\\n\\nEXPORT ID_Address := RECORD\\n // dppaReason="INSURANCE" referenceId="000000000"\\n\\tSTRING11 dppaReason{XPATH('@dppaReason')};\\n\\tSTRING9 referenceId{XPATH('@referenceId')};//="000000000"\\n\\tSTRING42 STREET;\\n\\tSTRING28 CITY;\\n\\tSTRING2 STATE;\\n\\tSTRING5 ZIPCODE;\\nEND;\\n\\nEXPORT Build_XML(\\tDATASET(Primary_Holder_Addr) Addresses):= FUNCTION\\n\\nID_FILE_NAME := '~THOR::EXTRACT::AI::YDM::ID::' + (STRING)std.date.Today();\\n\\nID_Address CreateID_Address(Primary_Holder_Addr L) := TRANSFORM\\n\\tSELF.dppaReason := '"INSURANCE"';\\n\\tSELF.referenceId := '"000000000"';\\n\\tSELF.Street := IF(TRIM(L.HOUSE_NUM) <> '', TRIM(L.HOUSE_NUM) + ' ', '') + TRIM(L.STREET_NAME) + IF(TRIM(L.APT_NUM) <> '',' # ' + TRIM(L.APT_NUM), '');\\n\\tSELF.City\\t:=\\tL.RCITY;\\n\\tSELF.State\\t:=\\tL.RST;\\n\\tSELF.ZipCode\\t:=\\tL.RZIP;\\nEND;\\n\\nID_AddressRows\\t:= PROJECT(Addresses, CreateID_Address(LEFT));\\n\\nHEADING_TEXT := '<?xml version="1.0" encoding="UTF-8"?>\\\\r\\\\n' +\\n\\t\\t'<AddressRequests xmlns:xsi="http://www.w3.org/2001/XMLSchemainstance" ' +\\n\\t\\t'xsi:noNamespaceSchemaLocation="file:YouthfulDriverMonitoringRequest.xsd" ' + \\n\\t\\t'requestDate="' + (STRING)std.date.Today() + '">\\\\r\\\\n';\\n\\nFOOTER_TEXT\\t:= '</AddressRequests>';\\n\\n\\nBuildFile\\t:= OUTPUT(ID_AddressRows,,ID_FILE_NAME, XML('AddressRequest',\\n\\t\\t\\t\\t\\t\\t\\t\\t\\tHEADING(HEADING_TEXT,FOOTER_TEXT), TRIM), OVERWRITE, EXPIRE(90));\\n\\nTODAY := (STRING)std.date.Today();\\n\\nbProdEnv\\t:= _Control.ThisEnvironment.name = 'Prod';\\nfolder\\t:= '/data/ActiveInsights_DEVCAB/YDM_Files/';\\nlandingzone := IF(bProdEnv, _Control.IPAddress.prodlz, _Control.IPAddress.unixland);\\n\\nDesprayFile := STD.File.DeSpray(ID_FILE_NAME,\\n\\t\\tlandingzone,\\n\\t\\tfolder + 'ID_' + TODAY + '.xml',\\n\\t\\t,,,TRUE);\\n\\nSEQUENTIAL(BuildFile, DesprayFile);\\n\\nRETURN TRUE;\\n\\nEND;
You may need to adjust this code for the " characters -- I can't run it so I'm not sure if they're automatically added to XML attribute text or not.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-12-22 15:21:36\" },\n\t{ \"post_id\": 8956, \"topic_id\": 2086, \"forum_id\": 10, \"post_subject\": \"XML Output Question\", \"username\": \"chuck.beam\", \"post_text\": \"I need to output XML files for youthful driver monitoring for the NIC states. \\n\\nI am have an issue with the files.\\n\\nThe XML format include a parent tag with two attributes - dppaReason and referenceId.\\n\\nThe dppaReason is always "INSURANCE" but the referecenID is a unique sequence reference number, see example below.\\n\\n<?xml version="1.0" encoding="UTF-8"?>\\n<AddressRequests xmlns:xsi="http://www.w3.org/2001/XMLSchemainstance" xsi:noNamespaceSchemaLocation="file:YouthfulDriverMonitoringRequest.xsd" requestDate="2015-11-24">\\n\\t<AddressRequest dppaReason="INSURANCE" referenceId="54098465">\\n\\t\\t<Street>1008 GRANT ST</Street>\\n\\t\\t<City>MONTPELIER</City>\\n\\t\\t<State>ID</State>\\n\\t\\t<ZipCode>83254</ZipCode>\\n\\t</AddressRequest>\\n</AddressRequests>\\n\\n\\nI have attempted to use the ROWTAG to create the tag, but the result is not correct, see example below.\\n\\n<?xml version="1.0" encoding="UTF-8"?>\\n<AddressRequests xmlns:xsi="http://www.w3.org/2001/XMLSchemainstance" xsi:noNamespaceSchemaLocation="file:YouthfulDriverMonitoringRequest.xsd" requestDate="20151221">\\n\\t<AddressRequest dppaReason="INSURANCE" referenceId="000000000"><street>7040 ASH CT # A</street><city>COMMERCE CITY</city><state>CO</state><zipcode>80022</zipcode></AddressRequest dppaReason="INSURANCE" referenceId="000000000">\\n</AddressRequests>\\n\\nIs there a way to define attribute on the ROWTAG?\\n\\nAlso, is there a way to format the resulting XML with line breaks?\\n\\nHere is my code:\\n\\nSHARED Primary_Holder_Addr \\t\\t:= AI_Common.Layouts.Primary_Holder_Addr;\\n\\t\\t\\n\\tEXPORT ID_Address := RECORD\\n\\t\\tSTRING42 \\tSTREET;\\n\\t\\tSTRING28 \\tCITY;\\n\\t\\tSTRING2 \\tSTATE;\\n\\t\\tSTRING5 \\tZIPCODE;\\n\\tEND;\\n\\t\\n\\tEXPORT Build_XML(\\tDATASET(Primary_Holder_Addr) Addresses):= FUNCTION\\n\\t\\t\\n\\t\\tID_FILE_NAME \\t\\t:= '~THOR::EXTRACT::AI::YDM::ID::' + (STRING)std.date.Today();\\n\\t\\t\\n\\t\\tID_Address CreateID_Address(Primary_Holder_Addr L) := TRANSFORM\\n\\t\\t\\tSELF.Street \\t:= \\tIF(TRIM(L.HOUSE_NUM) <> '', TRIM(L.HOUSE_NUM) + ' ', '') + TRIM(L.STREET_NAME) + IF(TRIM(L.APT_NUM) <> '',' # ' + TRIM(L.APT_NUM), '');\\n\\t\\t\\tSELF.City\\t\\t\\t:=\\tL.RCITY;\\n\\t\\t\\tSELF.State\\t\\t:=\\tL.RST;\\n\\t\\t\\tSELF.ZipCode\\t:=\\tL.RZIP;\\n\\t\\tEND;\\n\\t\\t\\n\\t\\tID_AddressRows\\t:= PROJECT(Addresses, CreateID_Address(LEFT));\\n\\t\\t\\n\\t\\tHEADING_TEXT \\t:= \\t'<?xml version="1.0" encoding="UTF-8"?>\\\\r\\\\n' +\\n\\t\\t\\t\\t\\t'<AddressRequests xmlns:xsi="http://www.w3.org/2001/XMLSchemainstance" ' +\\n\\t\\t\\t\\t\\t'xsi:noNamespaceSchemaLocation="file:YouthfulDriverMonitoringRequest.xsd" ' + \\n\\t\\t\\t\\t\\t'requestDate="' + (STRING)std.date.Today() + '">\\\\r\\\\n';\\n\\t\\t\\n\\t\\tFOOTER_TEXT\\t:= '</AddressRequests>';\\n\\t\\t\\n\\t\\tBuildFile\\t\\t:= OUTPUT(ID_AddressRows,,ID_FILE_NAME, XML('AddressRequest dppaReason="INSURANCE" referenceId="000000000"',\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tHEADING(HEADING_TEXT,FOOTER_TEXT), TRIM), OVERWRITE, EXPIRE(90));\\n\\t\\t\\n\\t\\tTODAY \\t\\t\\t:= (STRING)std.date.Today();\\n\\t\\t\\n\\t\\tbProdEnv\\t\\t\\t:= _Control.ThisEnvironment.name = 'Prod';\\n\\t\\tfolder\\t\\t\\t\\t:= '/data/ActiveInsights_DEVCAB/YDM_Files/';\\n\\t\\tlandingzone \\t:= IF(bProdEnv, _Control.IPAddress.prodlz, _Control.IPAddress.unixland);\\n\\t\\t\\n\\t\\tDesprayFile := STD.File.DeSpray(ID_FILE_NAME,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tlandingzone,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tfolder + 'ID_' + TODAY + '.xml',\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t,,,TRUE);\\n\\t\\t\\n\\t\\tSEQUENTIAL(BuildFile, DesprayFile);\\n\\t\\t\\n\\t\\tRETURN TRUE;\\n\\t\\t\\n\\tEND;\\n\\nThanks!\\nChuck\", \"post_time\": \"2015-12-21 18:09:47\" },\n\t{ \"post_id\": 8992, \"topic_id\": 2094, \"forum_id\": 10, \"post_subject\": \"Re: "How to freeze a definition"\", \"username\": \"tdelbecque\", \"post_text\": \"Hi Bob,\\n\\nIt works with INDEPENDENT \\n\\nThanks very much.\\n\\nBests, Thierry.\", \"post_time\": \"2015-12-30 16:37:59\" },\n\t{ \"post_id\": 8990, \"topic_id\": 2094, \"forum_id\": 10, \"post_subject\": \"Re: "How to freeze a definition"\", \"username\": \"bforeman\", \"post_text\": \"Hmmm, that worked for me. \\n\\nTry INDEPENDENT instead of GLOBAL.\\n\\nBob\", \"post_time\": \"2015-12-30 16:33:00\" },\n\t{ \"post_id\": 8988, \"topic_id\": 2094, \"forum_id\": 10, \"post_subject\": \"Re: "How to freeze a definition"\", \"username\": \"tdelbecque\", \"post_text\": \"Hello Bob, thanks for your quick answer. \\n\\nUnfortunatelly it does not seem to solve the problem.\\n\\nBests, Thierry\", \"post_time\": \"2015-12-30 16:29:40\" },\n\t{ \"post_id\": 8986, \"topic_id\": 2094, \"forum_id\": 10, \"post_subject\": \"Re: "How to freeze a definition"\", \"username\": \"bforeman\", \"post_text\": \"Try using GLOBAL in the function call:\\n\\nTIMESTAMP := getTimeStamp ():GLOBAL;\\n\\nI think this should work for you.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-12-30 16:21:04\" },\n\t{ \"post_id\": 8984, \"topic_id\": 2094, \"forum_id\": 10, \"post_subject\": \""How to freeze a definition"\", \"username\": \"tdelbecque\", \"post_text\": \"Hello, I have a very stupid question, sorry for that, I believe I knew the solution in the past but my memory is weak these days ...\\n\\nLet getTimeStamp () be a function that computes the timestamp when called. Then, in the following pseudo code:\\n\\n// BEGIN\\nTIMESTAMP := getTimeStamp (); // PT0\\n\\nOUTPUT (TIMESTAMP); // PT 1\\n\\n... SOME TIME CONSUMING OPERATION\\n\\nOUTPUT (TIMESTAMP); // PT2\\n// END\\n\\nthe results of the OUTPUTS are different between PT1 and PT2; I know why it is so, but for now I need the value of TIMESTAMP to be "frozen" at what it was at PT0, rather than being re-evaluated: if getTimeStamp () computes 9 at PT0, I need to see 9 at pt2. \\n\\nI hope this is not confusing.\\n\\nCould someone be helpful with the old guy and remind him the trick ?\\n\\nThanks in advance.\\n\\nBests.\", \"post_time\": \"2015-12-30 15:49:59\" },\n\t{ \"post_id\": 9002, \"topic_id\": 2098, \"forum_id\": 10, \"post_subject\": \"Re: exportXML - ignore child dataset fields\", \"username\": \"rtaylor\", \"post_text\": \"Gayathri,\\n\\nTry it this way:
\\nNamesRecord := RECORD\\n STRING10 first;\\n STRING20 last;\\nEND;\\n \\nr := RECORD\\n UNSIGNED4 dg_parentid;\\n STRING10 dg_firstname;\\n STRING dg_lastname;\\n UNSIGNED1 dg_prange;\\n DATASET(NamesRecord) childNames;\\nEND;\\n \\nds := DATASET('~RTTEST::OUT::ds', r, THOR);\\n\\n#exportxml(allRows,r);\\n#declare(childDS)\\n#for(allRows)\\n #set(childDS,0)\\n #for(Field)\\n //if beginning of child ds structure, set symbol\\n #if(%'{@isDataset}'% <> '')\\n #set(childDS,1)\\n #end\\n //print if symbol is not set\\n #if(%childDS% = 0 )\\n output(%'{@label}'%);\\n #else\\n //reset symbol if child ds looping has ended\\n #if(%'{@isEnd}'% <> '')\\n #set(childDS,0)\\n #end\\n #end\\n #end\\n#end
\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-01-04 15:09:23\" },\n\t{ \"post_id\": 8998, \"topic_id\": 2098, \"forum_id\": 10, \"post_subject\": \"exportXML - ignore child dataset fields\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"A very happy new year to everyone. \\n\\nA simplified form of my requirement goes like this - I want to programmatically iterate through a dataset and print out only parent-level field names. \\n\\nI am currently doing the same using exportXML and template language. I use #for loop and print out %'{@label}'% - this works well for regular fields. \\n\\n#for(allRows)\\n #for(Field)\\n output(%'{@label}'%);\\n #end\\n#end\\n
\\n\\nHowever, for child datasets, the #for loop iterates through all fields of child dataset as well which is not required in my case. Is there a way to distinguish between the two and print only the parent-level fields? \\n\\nI am currently handling this in an unclean manner like this. Is there a better way?\\n\\n#for(allRows)\\n #set(childDS,0)\\n #for(Field)\\n //if beginning of child ds structure, set symbol\\n #if(%'{@isDataset}'% <> '')\\n #set(childDS,1)\\n #end\\n //print if symbol is not set\\n #if(childDS = 0 )\\n output(%'{@label}'%);\\n #else\\n //reset symbol if child ds looping has ended\\n #if(%'{@isEnd}'% <> '')\\n #set(isChild,0)\\n #end\\n #end\\n #end\\n#end\\n
\\n\\n- Gayathri\", \"post_time\": \"2016-01-04 11:10:33\" },\n\t{ \"post_id\": 9006, \"topic_id\": 2100, \"forum_id\": 10, \"post_subject\": \"Re: Macro to call other macro on all fields in a dataset\", \"username\": \"rtaylor\", \"post_text\": \"Janet,\\n\\nThis code works on my 5.4.2-1 cluster:TestMacro(InDS, X) := macro\\n output(table(InDS, \\n {cnt_recs := count(group), \\n sum_recs := sum(group, InDS.X), \\n avg_recs := ave(group, InDS.X)}), \\n named(#TEXT(x)));\\nendmacro;\\n\\nTestMacroToCallMacros(DatasetIn, MacroToCall):=MACRO\\n // LOADXML('<xml/>');\\n #DECLARE(CallToMacro) #SET(CallToMacro,'')\\n #EXPORTXML(fields,RECORDOF(DatasetIn))\\n #FOR(fields)\\n #FOR(Field)\\n #IF(%'{@type}'% IN ['unsigned','integer','real','decimal','udecimal']) \\n // current field is number type\\n #APPEND(CallToMacro, MacroToCall+'('+#TEXT(DatasetIn)+','+%'{@label}'%+');\\\\n')\\n #END // end IF\\n #END // end FOR(field)\\n #END // end FOR(fields)\\n %'CallToMacro'%; //show me the generated code \\n %CallToMacro%; //run the generated code\\nENDMACRO;\\n\\npeople := DATASET([ {1, 'Anderson', 'Aaron', 20, 3},\\n {2, 'Barker', 'Bob', 21, 5},\\n {3, 'Chow', 'Cathy', 22, 7},\\n {4, 'Darling', 'Denise', 23, 3},\\n {5, 'Early', 'Erin', 24, 6},\\n {6, 'Fabbri', 'Felipe', 25, 4},\\n {7, 'Gadson', 'Gabriel', 26, 2},\\n {8, 'Hadden', 'Heather', 27, 1},\\n {9, 'Ikram', 'Ian', 28, 7},\\n {10, 'Johnson', 'Jerry', 29, 0}], \\n { unsigned2 ID, \\n string20 lastname, \\n string20 firstname, \\n unsigned2 age, \\n unsigned2 psuedorandom});\\n\\nTestMacroToCallMacros(people, 'TestMacro');
Note that the REGEXREPLACE() function was unnecessary, as was the LOADXML() (if you're on an older build you may still need the LOADXML() function).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-01-04 21:56:50\" },\n\t{ \"post_id\": 9004, \"topic_id\": 2100, \"forum_id\": 10, \"post_subject\": \"Macro to call other macro on all fields in a dataset\", \"username\": \"janet.anderson\", \"post_text\": \"I want to be able to take a arbitrary dataset and for every field in the dataset call another macro. So for example, I want to take a dataset and do some relevant EDA on each field to help create modeling attributes. Currently I have a generic placeholder for the EDA macro; it will be replaced later with something more complex: \\n\\nEXPORT TestMacro(InDS, X) := macro\\noutput(table(InDS, {cnt_recs := count(group), sum_recs := sum(group, InDS.X), avg_recs := ave(group, InDS.X)}), named(#TEXT(x)));\\nendmacro;\\n
\\n\\nThe macro to loop through each field in a dataset and call TestMacro is where I am running into errors:\\n\\nEXPORT TestMacroToCallMacros(DatasetIn, MacroToCall):=MACRO\\n LOADXML('<xml/>');\\n #DECLARE(CallToMacro) #SET(CallToMacro,'')\\n #EXPORTXML(fields,RECORDOF(DatasetIn))\\n #FOR(fields)\\n #FOR(Field)\\n #IF(REGEXREPLACE('[^a-z]',%'{@type}'%,'') IN ['unsigned','integer','real','decimal','udecimal'] // current field is number type\\n #APPEND(CallToMacro, MacroToCall+'('+#TEXT(DatasetIn)+','+%'{@label}'%+');')\\n #END // end IF\\n #END // end FOR(field)\\n #END // end FOR(fields)\\n#EXPAND(CallToMacro);\\nENDMACRO;\\n
\\n\\nHere is an example of my call to TestMacroToCallMacros:\\n\\npeople := DATASET([{1, 'Anderson', 'Aaron', 20, 3},\\n{2, 'Barker', 'Bob', 21, 5},\\n{3, 'Chow', 'Cathy', 22, 7},\\n{4, 'Darling', 'Denise', 23, 3},\\n{5, 'Early', 'Erin', 24, 6},\\n{6, 'Fabbri', 'Felipe', 25, 4},\\n{7, 'Gadson', 'Gabriel', 26, 2},\\n{8, 'Hadden', 'Heather', 27, 1},\\n{9, 'Ikram', 'Ian', 28, 7},\\n{10, 'Johnson', 'Jerry', 29, 0}], {unsigned2 ID, string20 lastname, string20 firstname, unsigned2 age, unsigned2 psuedorandom});\\n\\nimport zz_JanetAnderson;\\nzz_JanetAnderson.TestMacroToCallMacros(people, 'zz_JanetAnderson.TestMacro');\\n
\\n\\nCan you please recommend a fix to the code, or an alternate way of achieving the result?\", \"post_time\": \"2016-01-04 21:24:21\" },\n\t{ \"post_id\": 9232, \"topic_id\": 2146, \"forum_id\": 10, \"post_subject\": \"Re: Assign schema for logical file In HPCC\", \"username\": \"rtaylor\", \"post_text\": \"prathmesh,\\n\\nSimple answer: no.\\n\\nLonger answer: You can set up security constraints in LDAP as to who can see/read/write any given dataset.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-02-05 20:00:30\" },\n\t{ \"post_id\": 9226, \"topic_id\": 2146, \"forum_id\": 10, \"post_subject\": \"Re: Assign schema for logical file In HPCC\", \"username\": \"prathmesh.datye\", \"post_text\": \"Thanks Richards,\\n\\nBut there having any concepts like users or owner and our created files belongs to them.So can we use that in our SQL query or any other attribute use in SQL query to define particular file.\\nlike anyAttribute.tablename in SQL query.\", \"post_time\": \"2016-02-04 03:41:12\" },\n\t{ \"post_id\": 9216, \"topic_id\": 2146, \"forum_id\": 10, \"post_subject\": \"Re: Assign schema for logical file In HPCC\", \"username\": \"rtaylor\", \"post_text\": \"prathmesh,\\n\\nOK, now I understand your question.\\nNormally if we considered databases like ORACLE,MySQl in those databases they have schema and that schema contain no. of tables
HPCC is not an RDBMS.Similarly in HPCC I want to put our created logical files in schema or assign schema during creation of logical file.
HPCC does not use schemas. Each table/dataset/file is simply an ISAM file and you define each file in your ECL code with a RECORD structure and DATASET declaration.I had checked in HPCC it contain 1 schema named information_schema.I think it is system schema.Similarly I want to create my own schema and insert my logical file inside it.
Again, in ECL we do not use schemas. That "information_schema" is most likely part of the HPCC internal infrastructure. \\n\\nHave you gone through our free online ECL training courses, or are you trying to learn on your own? The courses are described here: https://hpccsystems.com/enterprise-services/professional-training and links to the courses are available here: https://hpccsystems.com/getting-started/training-classes\\n\\nThe first course (Introduction to ECL) contains an overview of HPCC and explains just how different HPCC is to other data manipulation systems you may have used.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-02-03 15:08:46\" },\n\t{ \"post_id\": 9212, \"topic_id\": 2146, \"forum_id\": 10, \"post_subject\": \"Re: Assign schema for logical file In HPCC\", \"username\": \"prathmesh.datye\", \"post_text\": \"Thanks Richard\\n\\nNormally if we considered databases like ORACLE,MySQl in those databases they have schema and that schema contain no. of tables.\\nSimilarly in HPCC I want to put our created logical files in schema or assign schema during creation of logical file.\\nI had checked in HPCC it contain 1 schema named information_schema.I think it is system schema.Similarly I want to create my own schema and insert my logical file inside it.\\nSo when I access that table using SQL I want to fire query like \\n[color=#FF0000:17j83j2q]Select * from schema_name.table_name;\", \"post_time\": \"2016-02-03 04:22:56\" },\n\t{ \"post_id\": 9182, \"topic_id\": 2146, \"forum_id\": 10, \"post_subject\": \"Re: Assign schema for logical file In HPCC\", \"username\": \"rtaylor\", \"post_text\": \"prathmesh,I have created logical file using OUTPUT command which is considered as table in HPCC.\\nI am able to access that tables data.But If I see metadata for that table then schema is null for them.
I'm not quite sure I fully understand your question. \\n\\nWhenever I OUTPUT a dataset in ECL code, then the file's RECORD structure and other metadata about that file are always fully populated in the Logical File Details page for that file. \\n\\nSo exactly what do you mean by "schema" in your context and how are you looking for it (and not finding it)?\\n\\nRichard\", \"post_time\": \"2016-02-02 13:57:56\" },\n\t{ \"post_id\": 9176, \"topic_id\": 2146, \"forum_id\": 10, \"post_subject\": \"Assign schema for logical file In HPCC\", \"username\": \"prathmesh.datye\", \"post_text\": \"I have created logical file using OUTPUT command which is considered as table in HPCC.\\nI am able to access that tables data.But If I see metadata for that table then schema is null for them.\\nPlease tell me how I can assign schema to those tables during creation of logical files.I need to put all logical files in a schema.\\nPlease reply.\", \"post_time\": \"2016-02-02 03:32:42\" },\n\t{ \"post_id\": 22793, \"topic_id\": 2160, \"forum_id\": 10, \"post_subject\": \"Re: Using SuperKeys without payload.\", \"username\": \"rtaylor\", \"post_text\": \"Jim,\\n\\nBased on the quick experiment I just tried, it looks to me like FETCH does "work" (no syntax errors and code actually runs) but using a Superfile and Superkey returns "garbage" records from code like this:FETCH(superfile,SuperKey(filter),RIGHT.RecPos);
\\nMy suggestion (assuming each record has a unique identifier) is to use a payload SuperKey (with UID as the only search term and all other fields as the payload) instead of a Superfile of datasets. Make all your "real" search term keys (super or otherwise) only have the UID as their payloads. \\n\\nThat way, instead of using FETCH you can simply do a JOIN between your payload field values from your "search keys" matching to the UID search term of your payload SuperKey (dataset replacement). Or, you could eliminate the JOIN and just do a simple filter on the payload SuperKey of the set of records IN the set of UID values from your search keys.\\n\\nIf the payload fields create INDEX records too big, then you could try splitting the payload fields across two UID indexes and you'd just have to do two JOINs to get the final result (or a filter on each 1/2 payload then a JOIN between those results).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-08-27 20:31:54\" },\n\t{ \"post_id\": 22743, \"topic_id\": 2160, \"forum_id\": 10, \"post_subject\": \"Re: Using SuperKeys without payload.\", \"username\": \"jwilt\", \"post_text\": \"So... what is the state of running FETCH() against a superkey?\\nIs it possible?\\n(Version-specific?)\\n\\nIf we use:\\n { VIRTUAL( logicalfilename ) }\\nto build a key...\\n\\nHow do we take advantage of that with FETCH()?\\n\\nI may be missing something. Have checked documentation, google'd, etc.\\nThanks!\", \"post_time\": \"2018-08-24 01:07:45\" },\n\t{ \"post_id\": 9260, \"topic_id\": 2160, \"forum_id\": 10, \"post_subject\": \"Re: Using SuperKeys without payload.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,Our BASE files are HUGE. We cannot duplicate this amount of data in a payload.
INDEXes are always compressed, so if most of your data is text, it should compress very well. Is this data being queried on Roxie or Thor? If Roxie, then you really do want to only have that data in INDEXes (for the compression). If Thor... \\n\\nDo you have a globally unique identifier (UID) on each record (highly recommended)? If so, then you only really need one payload INDEX with that UID as the only search term. Building your search keys then just means the "payload" of each search INDEX is the UID that you can then use to filter the one payload INDEX to get the data. Each INDEX file would be part of a SuperKey of that INDEX, so that all your queries just use the SuperKeys. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-02-19 16:14:55\" },\n\t{ \"post_id\": 9256, \"topic_id\": 2160, \"forum_id\": 10, \"post_subject\": \"Re: Using SuperKeys without payload.\", \"username\": \"Allan\", \"post_text\": \"Hi Richard,\\n\\nOur BASE files are HUGE. We cannot duplicate this amount of data in a payload.\\nWe'll accept the extra IO as a compromise, it will still be many orders of magnitude faster that a trawl through a BASE file.\\n\\nThere is one logical BASE file for a day, (these are rolled up into a month file on the last day of every month but that is a detail that does not effect this discussion)\\n\\nAll these logical files are, of course, in a superfile.\\n\\nSo for fast lookup of the entire dataset I was investigating a SuperKey where the INDEX files in said superkey are in a one-to-one correspondence with a BASE file. \\n\\nThe question I was investigating was the implementation of a FETCH where the 'fileposition' alone will not uniquely identify the record in the BASE file.\\nThe documentation on FETCH 'position' just says:\\nAn expression that provides the means of locating the correct record in the basedataset (usually the field within the index containing the fileposition value).\\n\\nbut in this case a single field in an INDEX is not enough.\\nI hope this explains my question.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2016-02-19 09:49:41\" },\n\t{ \"post_id\": 9250, \"topic_id\": 2160, \"forum_id\": 10, \"post_subject\": \"Re: Using SuperKeys without payload.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,So if your BASE file does not change, is a payload truly necessary?
If your base dataset hasn't changed, you also have no need to change your INDEXes. \\n\\nThe purpose of payload INDEXes is to eliminate one disk read by having the data you need "instantly" available at the time that the INDEX has found the correct record to pull it from, without having to read that data from a separate DATASET. Disk I/O being the slowest aspect to what we do, eliminating as much disk I/O as possible is what makes things go faster. That's why payload INDEXes were created.If KEY file(s) were generated for each BASE file every day that contained the above 'VIRTUAL' information could a SuperKey approach be used to access data?
I'm not sure I understand what you're suggesting here. Can you please elaborate?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-02-18 16:04:13\" },\n\t{ \"post_id\": 9248, \"topic_id\": 2160, \"forum_id\": 10, \"post_subject\": \"Using SuperKeys without payload.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nIn reading up this forum and the HPCC documentation, SuperKeys seem to work because they have payload and need not make a reference to an external BASE file.\\n\\nHowever within HPCC there is support for:\\n\\n{ VIRTUAL( logicalfilename ) }\\nand\\n{ VIRTUAL( fileposition ) }\\n\\nSo if your BASE file does not change, is a payload truly necessary?\\nIn our set-up, new daily data is built into its own BASE file.\\nIf KEY file(s) were generated for each BASE file every day that contained the above 'VIRTUAL' information could a SuperKey approach be used to access data?\\n\\nI'm going to play around with this, but someone might have already done this and know the answers and any pitfalls.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2016-02-18 14:43:42\" },\n\t{ \"post_id\": 9262, \"topic_id\": 2162, \"forum_id\": 10, \"post_subject\": \"Re: Convert very Low number to String Gets us scientificNota\", \"username\": \"raja\", \"post_text\": \"got the alternate approach from DSC as to use REALFormat(expression / lowervalue, width, decimals), this will keep the result of expression or lower numeric value as it is without changing the value to scientific notation and converts to String form.\\n\\nThanks to DSC and Rtaylor.\\n\\nRegards,\\nRaja\", \"post_time\": \"2016-02-19 20:39:30\" },\n\t{ \"post_id\": 9258, \"topic_id\": 2162, \"forum_id\": 10, \"post_subject\": \"Re: Convert very Low number to String Gets us scientificNota\", \"username\": \"raja\", \"post_text\": \"hi Rtaylor,\\n\\nGot the info from DSC that rather converting the result value directly to String by typecast, if we try RealFormat(), we can eliminate this data issue.\\n\\n s := realformat(0.5 / 158694, 12, 6);\\n// S := (STRING) I;\\nT := (INTEGER) S;\\nOUTPUT(T);\\n\\nRegards,\\nRaja\", \"post_time\": \"2016-02-19 15:35:29\" },\n\t{ \"post_id\": 9254, \"topic_id\": 2162, \"forum_id\": 10, \"post_subject\": \"Re: Convert very Low number to String Gets us scientificNota\", \"username\": \"rtaylor\", \"post_text\": \"Raja,\\n\\nInteresting -- I duplicated your results and I have no better workaround to suggest. \\n\\nPlease report the issue in JIRA (https://track.hpccsystems.com)\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-02-18 21:58:16\" },\n\t{ \"post_id\": 9252, \"topic_id\": 2162, \"forum_id\": 10, \"post_subject\": \"Convert very Low number to String Gets us scientificNotation\", \"username\": \"raja\", \"post_text\": \"hi All,\\n There is a problem when I treat the numbers in String type, in the case I have very low number as a results dividing a small number by very large number.\\n\\nEx : the result of Divide operation is : 0.000000000415\\n\\nwhen I convert the result directly to STring, the string will be 4.15e-10\\n\\nif I use this string to compare with other number by typecasting it will be taken as 4.15. hence I am ended into wrong value.\\n\\nthe one workaround I did is, typecasted the result (of divide operation) to Decimal (ex.Decimal32_16) then I converted to String, now the string will be as '0.000000000415'. so this works fine.\\n\\nI would like to know do we have any other solution rather than this workaround.\\n\\nThanks,\\nRaja\", \"post_time\": \"2016-02-18 19:57:48\" },\n\t{ \"post_id\": 9294, \"topic_id\": 2172, \"forum_id\": 10, \"post_subject\": \"Python and JSON\", \"username\": \"putnik\", \"post_text\": \"Did anybody use Python to read Python with embedded Python?\", \"post_time\": \"2016-03-03 20:10:04\" },\n\t{ \"post_id\": 9298, \"topic_id\": 2176, \"forum_id\": 10, \"post_subject\": \"Python and JSON\", \"username\": \"putnik\", \"post_text\": \"Did anybody use Embedded Python call with json lib .\\nLike loads or dumps ?\", \"post_time\": \"2016-03-04 15:10:53\" },\n\t{ \"post_id\": 9348, \"topic_id\": 2196, \"forum_id\": 10, \"post_subject\": \"Re: Using EVENTS to target specific workunits.\", \"username\": \"Allan\", \"post_text\": \"Hum,\\n\\nEVENTEXTRA actually works in a simple scenario, - great one to remember for the future.\\nUnfortunately the actual functionality I require is more complex.\\n\\nThe user can indicate to the Doxie query that they have started a long (big) search. (i.e. indicate to the code to unconditionally despray results rather than return results)\\n\\nWhen an unconditional despray is required the WU should start the query and return immediately to the user, leaving the query to complete entirely in the background.\\nBut, of course, with:\\n\\n doit := FUNCTION\\n RETURN IF(EVENTEXTRA('returnTo') = WORKUNIT AND ValidEcl AND DeSprayOnSize != 0,LaterReturn);\\n END;\\ndoit : WHEN(EVENT('ID','*'),COUNT(1));\\n
\\n\\nthe initiating WU is still left running though, with the conditions in the FUNCTION, will not actually do anything when it fires up.\\n\\nI'm still investigating WUWaitOnComplete SOAPCALL, it's cleaner and wont suffer from this hiccup.\\n\\nThanks Richard\", \"post_time\": \"2016-03-16 20:36:13\" },\n\t{ \"post_id\": 9346, \"topic_id\": 2196, \"forum_id\": 10, \"post_subject\": \"Re: Using EVENTS to target specific workunits.\", \"username\": \"Allan\", \"post_text\": \"Thanks Richard,\\n\\nI saw EVENTEXTRA but could not see how to incorporate it into what I'm doing.\\n\\nI'll give it a whirl.\\n\\nThanks\\n\\nAllan\", \"post_time\": \"2016-03-16 17:51:31\" },\n\t{ \"post_id\": 9342, \"topic_id\": 2196, \"forum_id\": 10, \"post_subject\": \"Re: Using EVENTS to target specific workunits.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nI'd suggest adding the EVENTEXTRA function into the mix. \\n\\nUse the EVENT function to get notification of the event's firing, and wildcard the EVENT subtype parameter as "tight" as you can, to prevent too many possibilities. Then use the EVENTEXTRA function to get the actual passed WU value and compare it to the WORKUNIT value -- you instigate the result return from the current WU only if the two match.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-03-16 13:20:42\" },\n\t{ \"post_id\": 9340, \"topic_id\": 2196, \"forum_id\": 10, \"post_subject\": \"Re: Using EVENTS to target specific workunits.\", \"username\": \"Allan\", \"post_text\": \"Hum,\\n\\nI notice in the available WU services there is a 'WUWaitComplete' service.\\nIs there documentation for this? Does someone know how to drive this? Will it do what I need?\", \"post_time\": \"2016-03-16 10:33:44\" },\n\t{ \"post_id\": 9338, \"topic_id\": 2196, \"forum_id\": 10, \"post_subject\": \"Using EVENTS to target specific workunits.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI've had a similar issue to this before:\\nhttps://hpccsystems.com/bb/viewtopic.php?f=10&t=1142&p=5059&hilit=NOTIFY#p5059\\n\\nBut my current problem cannot be resolved in the same way.\\nI have some dynamically created ECL that I can execute via SOAPCALL, no problem.\\nThe initiating WU must return results from this called ECL to the user (its a Doxie Query). Again no problem as I can use:\\n\\nDATASET(WORKUNIT(RunQueryWUid,'RES'),Results);\\n
\\nMy problem comes in signaling back to the initiating WU that the Query is complete and results are ready to be picked up.\\n\\nThe NOTIFY/WAIT approach only works for constant strings so the following will work:\\n\\nExecuting WU: NOTIFY('ID',<WORKUNIT id of caller>);\\nWaiting WU: WAIT(EVENT('ID','W2016*'));\\n
\\nBut the following fails, in so much as the caller never comes out of its 'wait' state:\\n\\nExecuting WU: NOTIFY('ID',<WORKUNIT id of caller>);\\nWaiting WU: WAIT(EVENT('ID',WORKUNIT));\\n
\\n\\nAs this is a Doxie Query many of these can be running simultaneously, the called WU must only signal the WU that initiated it, not any others. So I cannot use a constant string as the discriminator. Its interesting that the 1st case works, it implies the NOTIFY is generating the signal, but its the WAIT that is not catching it.\\n\\nThe ECL Reference manual does have examples of signaling a service that is already running, but we already have enough services running and its yet another service to maintain.\\n\\nI enclose some example code, I've left out the SOAPCALL code as it just clouds the issue.\\n\\n#workunit('name','Call and Wait');\\necl := '#workunit(\\\\'name\\\\',\\\\'Execute\\\\');\\\\n'\\n +'NOTIFY(\\\\'ID\\\\',\\\\''+WORKUNIT+'\\\\');';\\n\\nRunQueryWUid := SubmitNewWorkunit(ecl);\\n\\nSEQUENTIAL(RunQueryWUid\\n// ,WAIT(EVENT('ID','W2016*'))\\n ,WAIT(EVENT('ID',WORKUNIT))\\n ,OUTPUT('POST CALL'));\\n
\\n\\nAny help would be greatly appreciated.\\nYours\\nAllan\", \"post_time\": \"2016-03-16 09:50:13\" },\n\t{ \"post_id\": 9386, \"topic_id\": 2204, \"forum_id\": 10, \"post_subject\": \"Re: Control over the ESP Query screen.\", \"username\": \"Allan\", \"post_text\": \"Thanks Jim,\\n\\nNo never heard of DESDL, but thanks to you I have now.\\n\\nWill investigate your suggested approach.\\n\\nThanks very much\\n\\nAllan\", \"post_time\": \"2016-03-23 09:36:41\" },\n\t{ \"post_id\": 9372, \"topic_id\": 2204, \"forum_id\": 10, \"post_subject\": \"Re: Control over the ESP Query screen.\", \"username\": \"JimD\", \"post_text\": \"Have you looked at DESDL (Dynamic ESDL) as an alternative to using Java or C#, etc to create an application to consume the service?\\n\\nDynamic ESDL (Enterprise Service Description Language) is a methodology that helps you develop and manage web-based query interfaces quickly and consistently.\\nDynamic ESDL takes an interface-first development approach. It leverages the ESDL Language to create a common interface “contract” that both Roxie Query and Web interface developers will adhere to. It is intended to allow developers to create production web services, with clean interfaces that can evolve and grow over time.\\n\\nDESDL supports adding text to its default interface form. See http://cdn.hpccsystems.com/releases/CE- ... df#page=28\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2016-03-22 11:49:52\" },\n\t{ \"post_id\": 9370, \"topic_id\": 2204, \"forum_id\": 10, \"post_subject\": \"Re: Control over the ESP Query screen.\", \"username\": \"Allan\", \"post_text\": \"Hi Richard,\\n\\nYes port 8002. This is an internal product, only every available via 8002.\\nYours\\nAllan\", \"post_time\": \"2016-03-22 09:30:29\" },\n\t{ \"post_id\": 9368, \"topic_id\": 2204, \"forum_id\": 10, \"post_subject\": \"Re: Control over the ESP Query screen.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,As part of my published query I would like to output some explanatory text on the query screen, is this possible?
Do you mean the default query page that's created on port 8002 (default) for you to test it? If so, then the "normal" modus operandi is for the ECL programmer to use that default query page for testing, but give end-users a GUI written in Java or C# or ... That way, you can provide them with any UI enhancements you feel will help them.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-03-21 20:35:58\" },\n\t{ \"post_id\": 9366, \"topic_id\": 2204, \"forum_id\": 10, \"post_subject\": \"Control over the ESP Query screen.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nAs part of my published query I would like to output some explanatory text on the query screen, is this possible?\\n\\nAlso, allowed queries can be quite long. I would like to display a larger text box than is currently displaying. I've tried using STRING2048 instead of STRING but this has had no effect on the size of the displayed text box.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2016-03-21 11:08:13\" },\n\t{ \"post_id\": 9444, \"topic_id\": 2228, \"forum_id\": 10, \"post_subject\": \"Re: Calling macro with Variable\", \"username\": \"rtaylor\", \"post_text\": \"A MACRO generates ECL code by doing lexical substitution of the tokens defined as the MACRO's parameter list, wherever they appear in the code to produce, with the actual values passed to the MACRO when it is called. \\n\\nThat means that, whatever you pass to the MACRO must be in a form that will generate syntactically correct ECL code when the MACRO is called.\\n\\nFor example, given this MACRO:mac_ex( defname, parm1 ) := MACRO\\n defname := parm1;\\n OUTPUT(defname);\\nENDMACRO;
\\n\\nWhen you call the MACRO like this, it runs correctly:\\nmac_ex(fred,'john'); \\n//generates this ECL:\\n // fred := 'john';\\n // OUTPUT(fred);\\n
\\n\\nBut when you call the MACRO like this, it does NOT even syntax check:\\nmac_ex(fred rick,'john'); \\n//generates this ECL, which :\\n // fred rick := 'john';\\n // OUTPUT(fred rick);
because the defname token is replaced with the "fred rick" value being passed, and definition names cannot contain spaces.\\n\\nSo your problem is, the expression you are passing to your MACRO is not appropriate for the position within the generated code that it is being placed.\\n\\nBTW, ECL does not have "variables" -- what you're calling a "variable" is just an expression.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-03-30 20:02:08\" },\n\t{ \"post_id\": 9442, \"topic_id\": 2228, \"forum_id\": 10, \"post_subject\": \"Re: Calling macro with Variable\", \"username\": \"georgeb2d\", \"post_text\": \"I just got it to work. \\n.mac_buildArray(NumberGridsPerLongitude,\\n(UNSIGNED1)(ArrayAcross_10-9)\\n,ArrayHeight_10,DScolumn1_10);
\\n\\nApparently the type has to be explicit?\", \"post_time\": \"2016-03-30 19:58:01\" },\n\t{ \"post_id\": 9440, \"topic_id\": 2228, \"forum_id\": 10, \"post_subject\": \"Calling macro with Variable\", \"username\": \"georgeb2d\", \"post_text\": \"I have built a macro to build a dataset with different numbers to build an array a column at a time. It returns a dataset. However, I cannot seem to get the macro call to swallow a variable. For example:\\n\\n SHARED UNSIGNED3 NumberGridsPerLongitude := 366667;\\n\\t\\t\\n UNSIGNED1 ArrayAcross_10 := 10;\\n UNSIGNED1 ArrayHeight_10 := 10;\\n mac_buildArray(NumberGridsPerLongitude,ArrayAcross_10-9,ArrayHeight_10,DScolumn1_10);\\n
\\n\\ngives widely different results than:\\n\\n SHARED UNSIGNED3 NumberGridsPerLongitude := 366667;\\n\\t\\t\\n UNSIGNED1 ArrayHeight_10 := 10;\\nmac_buildArray(NumberGridsPerLongitude,1,ArrayHeight_10,DScolumn1_11);\\n
\\n\\nThe only difference in the two calls is the second variable. It is a constant in the second call (1) and is computed on the first call (ArrayAcross_10-9). The second call gives the correct answer (or the response I am looking for). The first call does not.\\n\\nI would like to be able to use a variable rather than hard coding every line. Is there a way I can do that?\\n\\nThis is W20160330-152930 on Alpha Dev Thor.\", \"post_time\": \"2016-03-30 19:36:04\" },\n\t{ \"post_id\": 9460, \"topic_id\": 2230, \"forum_id\": 10, \"post_subject\": \"Re: Calling Macro with a Dataset Element\", \"username\": \"rtaylor\", \"post_text\": \"The key to working with MACROs is thoroughly understanding the code that the MACRO is designed to generate.\\n\\nA simple example:mac_ex1( defname, ds ) := MACRO\\n defname := ds;\\n OUTPUT(defname);\\nENDMACRO;
This MACRO expects two parameters, the name of the definition to create, and the name of a dataset to OUTPUT. \\n\\nWhen you call it like this:MyDS := DATASET([{'A'},{'B'},{'C'}],{STRING5 F1});\\n\\nmac_ex1(ex1,MyDS);\\n//it generates this code:\\n// ex1 := MyDS;\\n// OUTPUT(ex1);
it produces a result of the records in the dataset.\\n\\nA slightly more complex example:mac_ex2( defname, ds, filter ) := MACRO\\n defname := ds(filter);\\n OUTPUT(defname);\\nENDMACRO;
Notice that the third parameter ("filter" ) is used in the code as a recordset filter, so that parameter is expected to be a BOOLEAN expression, which also implicitly limits the ds parameter to being a recordset. \\n\\nThe mac_ex1 MACRO was simple enough that you could get away with passing almost anything as its ds parameter, but not in mac_ex2 -- that syntax requires that whatever is passed as the ds and filter parameters, they must be syntactically correct when combined as "ds(filter)". \\n\\nI'm calling it like this:mac_ex2(ex2, ex1, F1 <> 'B' );\\n//it generates this code:\\n// ex2 := ex1(F1 <> 'B');\\n// OUTPUT(ex2);
and you will note that I'm passing "ex2" as the name of this new generated definition, and passing the "ex1" recordset (produced by the first macro call) as its dataset parameter. The BOOLEAN expression "F1 <> 'B'" is the third parameter, which will be used as a filter. When you run both of these macro calls together, result 2 produces just the A and C records.\\n\\nOnce again, understanding what code the MACRO is designed to generate is the only way to know exactly what possible parameters can be passed to the MACRO that will generate syntactically correct ECL. Remember, a MACRO is an ECL code generator and not a function.\\n\\nOne more example:mac_ex2(ex3, COUNT, ex2 );\\n//it generates this code:\\n// ex3 := COUNT(ex2);\\n// OUTPUT(ex3);
and here I'm able to pass COUNT and ex2 and it works because I understand the code the MACRO generates well enough to know that COUNT(ex2) is just as syntactically correct in this context as ex1(F1 <> 'B') was in the previous.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-03-31 20:57:39\" },\n\t{ \"post_id\": 9458, \"topic_id\": 2230, \"forum_id\": 10, \"post_subject\": \"Re: Calling Macro with a Dataset Element\", \"username\": \"georgeb2d\", \"post_text\": \"Let me restate that. Apparently I do not know how to pass a dataset to a macro. Could you point me to an example?\", \"post_time\": \"2016-03-31 20:08:33\" },\n\t{ \"post_id\": 9454, \"topic_id\": 2230, \"forum_id\": 10, \"post_subject\": \"Re: Calling Macro with a Dataset Element\", \"username\": \"rtaylor\", \"post_text\": \"Apparently Datasets are too complicated for the macro to handle.
I have written many MACROs that take datasets as parameters. \\n\\nThe MACRO must be passed whatever will generate syntax-correct ECL in order to work. That does include anything in ECL.\", \"post_time\": \"2016-03-31 13:10:30\" },\n\t{ \"post_id\": 9450, \"topic_id\": 2230, \"forum_id\": 10, \"post_subject\": \"Re: Calling Macro with a Dataset Element\", \"username\": \"georgeb2d\", \"post_text\": \"Apparently Datasets are too complicated for the macro to handle. When I changed to sets I was able to get it to work.\\n\\n SHARED UNSIGNED3 NumberGridsPerLongitude := 366667;\\n\\tColumnNumber := [1,2];\\n\\tArrayHeight := [10,10];\\n\\t\\tmac_buildArray(NumberGridsPerLongitude,ColumnNumber[1],ArrayHeight[1],DScolumn1_10);\\n\\t\\tmac_buildArray(NumberGridsPerLongitude,ColumnNumber[2],ArrayHeight[2],DScolumn2_10);\\n
\\n\\nThanks for your assistance.\", \"post_time\": \"2016-03-30 22:09:05\" },\n\t{ \"post_id\": 9448, \"topic_id\": 2230, \"forum_id\": 10, \"post_subject\": \"Re: Calling Macro with a Dataset Element\", \"username\": \"rtaylor\", \"post_text\": \"My last post to your previous thread https://hpccsystems.com/bb/viewtopic.php?f=10&t=2228 explains this, also. The problem is the same.\", \"post_time\": \"2016-03-30 20:25:23\" },\n\t{ \"post_id\": 9446, \"topic_id\": 2230, \"forum_id\": 10, \"post_subject\": \"Calling Macro with a Dataset Element\", \"username\": \"georgeb2d\", \"post_text\": \"I would like to define a dataset, then push elements of the dataset into a macro. It is most resistant.\\nThis works:\\n\\n SHARED UNSIGNED3 NumberGridsPerLongitude := 366667;\\n ArraySearch_Layout := RECORD\\n\\t\\t UNSIGNED1 ColumnNumber;\\n\\t\\t\\tUNSIGNED1 ArrayHeight;\\n\\t\\tEND;\\n\\t\\tArraySearchDS := DATASET([{1,10},{2,10},{3,10},{4,10},{5,10},{6,10},{7,9},{8,8},{9,6},{10,5}],ArraySearch_Layout);\\n RoadlinkOpt.mac_buildArray(NumberGridsPerLongitude,\\tArraySearchDS[1].ColumnNumber,ArrayHeight_10,DScolumn1_10);\\n\\t\\t\\n
\\nWhen I change the last line to call a second element in the dataset:\\n\\nmac_buildArray(NumberGridsPerLongitude,\\tArraySearchDS[1].ColumnNumber,ArraySearchDS[1].ArrayHeight,DScolumn1_10); \\n
\\nError Messages: \\nError: Constant expression expected (54, 18), 2071, RoadlinkOpt.mac_buildArray\\nError: While expanding macro mac_buildarray (20, 140), 2071, \\nError: Unknown identifier "DScolumn2_10" (45, 11), 2167, \\nError: syntax error near "SearchArrayDS_10" : expected RANGE, ROWSET, SELF, SUCCESS, datarow, dataset, dictionary, module-name, identifier, identifier, function-name, identifier, macro-name, '^', '(', '[' (51, 16), 3002, \\nError: Constant expression expected (54, 18), 2071, RoadlinkOpt.mac_buildArray\\nError: While expanding macro mac_buildarray (20, 140), 2071, \\nError: Unknown identifier "DScolumn2_10" (45, 11), 2167, \\nError: syntax error near "SearchArrayDS_10" : expected RANGE, ROWSET, SELF, SUCCESS, datarow, dataset, dictionary, module-name, identifier, identifier, function-name, identifier, macro-name, '^', '(', '[' (51, 16), 3002, \\nInformation: Definition is sandboxed, 1033, RoadlinkOpt.mac_buildArrayAcrossUpDown\\nInformation: Definition is sandboxed, 1033, RoadlinkOpt.Layouts\\nInformation: Definition is sandboxed, 1033, RoadlinkOpt.mac_buildArray\\nInformation: Definition is sandboxed, 1033, RoadlinkOpt.mac_buildArrayAcrossUpDown\\nInformation: Definition is sandboxed, 1033, RoadlinkOpt.Layouts\\nInformation: Definition is sandboxed, 1033, RoadlinkOpt.mac_buildArray\\n\\nCan this be done? \\nI will try another approach with pushing both elements of the dataset to the macro and see if that works.\", \"post_time\": \"2016-03-30 20:19:49\" },\n\t{ \"post_id\": 9470, \"topic_id\": 2232, \"forum_id\": 10, \"post_subject\": \"Re: converting string with list of names into an array\", \"username\": \"rtaylor\", \"post_text\": \"nileshdchavan,I'm curious to know if the same can be achieved using the PARSE construct. Can you explain?
Yes, it can also be done with PARSE, but the code is a bit more comoplex:IMPORT STD;\\nnameList:='Jeff, Matt, Robin, Tom, Jen, Joe Schmoe, Fred';\\nnamesRecord := {STRING name};\\ndatafile := DATASET([nameList],namesRecord );\\n\\nPATTERN name := PATTERN('[A-Z][ A-Za-z]+');\\nTOKEN names := name OPT(', ');\\n\\nParseRecord := RECORD\\n STRING name := MATCHTEXT(name);\\nEND;\\nNamesarrayList := PARSE(datafile, name, names, ParseRecord);\\nNamesarrayList;
\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-04-04 17:09:16\" },\n\t{ \"post_id\": 9466, \"topic_id\": 2232, \"forum_id\": 10, \"post_subject\": \"Re: converting string with list of names into an array\", \"username\": \"nileshdchavan\", \"post_text\": \"Thanks Richard for quick response. This worked.\\n\\nI'm curious to know if the same can be achieved using the PARSE construct. Can you explain?\", \"post_time\": \"2016-04-01 21:16:53\" },\n\t{ \"post_id\": 9464, \"topic_id\": 2232, \"forum_id\": 10, \"post_subject\": \"Re: converting string with list of names into an array\", \"username\": \"rtaylor\", \"post_text\": \"nileshdchavan ,\\n\\nTake a look at the STD.Str.SplitWords() function in the Standard Library Reference:IMPORT STD;\\nnameList:='Jeff, Matt, Robin, Tom, Jen';\\nSetNames := STD.Str.SplitWords(nameList,', ');\\ndataset(SetNames,{STRING name});\\n\\n'Matt' IN SetNames;
\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-04-01 20:55:49\" },\n\t{ \"post_id\": 9462, \"topic_id\": 2232, \"forum_id\": 10, \"post_subject\": \"converting string with list of names into an array\", \"username\": \"nileshdchavan\", \"post_text\": \"I have a string -\\n\\nstring nameList:='Jeff, Matt, Robin, Tom, Jen';\\n\\nI want to parse and create an array with individual names so that i can use it with IN operator. How this can be done? I tried below, but it is not working.\\n\\nnamesRecord := RECORD\\n\\t\\tstring28 name;\\n\\tEND;\\n\\t\\n\\tdatafile := DATASET([nameList],namesRecord );\\n\\t\\n\\tpattern comma1 := [','];\\n\\t\\n NamesarrayList := PARSE(datafile, name, comma1, namesRecord , namesRecord );\\n\\nThe ECL language reference does not give enough idea about. Can you please help?\\n\\nAlso, is there a better way to do this other than PARSE?\", \"post_time\": \"2016-04-01 20:26:56\" },\n\t{ \"post_id\": 9498, \"topic_id\": 2238, \"forum_id\": 10, \"post_subject\": \"Re: Merging or splitting string(s) to a maximum length.\", \"username\": \"Allan\", \"post_text\": \"Thanks Richard,\\n\\nI think your method might be better, the AGGREGATE approach does not account for the ',' separator in the 2nd transform, which can then take any one single output string over the 'n' limit.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2016-04-07 17:39:22\" },\n\t{ \"post_id\": 9494, \"topic_id\": 2238, \"forum_id\": 10, \"post_subject\": \"Re: Merging or splitting string(s) to a maximum length.\", \"username\": \"rtaylor\", \"post_text\": \"Allan, \\n\\nHere's my take on it:IMPORT STD;\\ns := 'k=v,j=444,p=q,r=ss2,z=123456789';\\nSetExpr := STD.Str.SplitWords(s,',');\\nds := DATASET(SetExpr,{STRING expr});\\nassert(ds,LENGTH(TRIM(expr))<=100,'Max Length Exceeded on Input',FAIL);\\n\\nROLLUP(ds,\\n LENGTH(TRIM(LEFT.expr)) + LENGTH(TRIM(RIGHT.expr)) < 10,\\n TRANSFORM({STRING expr},\\n\\t\\t\\t SELF.expr := LEFT.expr + ',' + RIGHT.expr));
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-04-07 15:47:46\" },\n\t{ \"post_id\": 9490, \"topic_id\": 2238, \"forum_id\": 10, \"post_subject\": \"Re: Merging or splitting string(s) to a maximum length.\", \"username\": \"Allan\", \"post_text\": \"Hooray the method above works:\\n\\nSlightly adapting an example for AGGREGATE from the ECL Ref manual I get:\\n\\ninRecord := RECORD\\n UNSIGNED box;\\n STRING text{MAXLENGTH(10)};\\nEND;\\ninTable := DATASET([{4,'Fred'},{10,'Freddy'},\\n {16,'Freddi'},{23,'Fredrik'},{30,'FredJon'}\\n,{35,'Allan'}\\n,{39,'Anna'}\\n,{44,'colin'}\\n,{60,'Something longer'}\\n,{64,'Bill'}\\n,{70,'Graham'}\\n\\t\\t], inRecord);\\n\\n//Example 1: Produce a list of box contents by concatenating a string:\\n\\noutRecord1 := RECORD\\n UNSIGNED box;\\n STRING contents{MAXLENGTH(200)};\\nEND;\\noutRecord1 t1(inRecord l, outRecord1 r) := TRANSFORM\\n SELF.box := l.box;\\n SELF.contents := r.contents + IF(r.contents <> '', ',', '') + l.text;\\nEND;\\n\\noutRecord1 t2(outRecord1 r1, outRecord1 r2) := TRANSFORM\\n SELF.box := r1.box;\\n SELF.contents := r1.contents + ',' + r2.contents;\\nEND;\\nOUTPUT(AGGREGATE(inTable, outRecord1, t1(LEFT, RIGHT), t2(RIGHT1, RIGHT2), LEFT.box DIV 25));\\n
\\nProducing\\n\\n4\\tFred,Freddy,Freddi,Fredrik\\n30\\tFredJon,Allan,Anna,colin\\n60\\tSomething longer,Bill,Graham\\n
\", \"post_time\": \"2016-04-07 15:12:43\" },\n\t{ \"post_id\": 9488, \"topic_id\": 2238, \"forum_id\": 10, \"post_subject\": \"Re: Merging or splitting string(s) to a maximum length.\", \"username\": \"Allan\", \"post_text\": \"Hum,\\nITERATE over the input dataset accumulating the length of each key to produce:\\n\\n11 LINKTYPE=0\\n15 B=0\\n19 K=P\\n...\\n
\\nThen use the grouping condition in AGGREGATE to group all those records whose lengths < 'n', followed by those those lengths are in the next bucket of 'n' etc.\\n \\nEasier said than done.\", \"post_time\": \"2016-04-07 14:41:53\" },\n\t{ \"post_id\": 9486, \"topic_id\": 2238, \"forum_id\": 10, \"post_subject\": \"Merging or splitting string(s) to a maximum length.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\nOne problem that can be expressed two ways.\\nI have individual small components of a key of the form 'k=v' each in its own row in a dataset.\\nI would like to concatenate them together (comma separator) in chunks up to 'n' characters wide, but breaking on a separator. i.e. no partial 'K=v' in any one chunk.\\n\\nAlternatively take one long string of 'k=v,j=444,p=q,r=ss2...' and break that into chunks of up to 'n' characters, again ensuring no partial 'k=v' is generated. \\n\\n(In my case n = 100)\\nIf any one 'k=v' is > 'n' characters then the WU should ASSERT.\\n\\nAny ideas?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2016-04-07 13:47:15\" },\n\t{ \"post_id\": 9584, \"topic_id\": 2264, \"forum_id\": 10, \"post_subject\": \"Re: Atomic operations on HPCC.\", \"username\": \"rtaylor\", \"post_text\": \"Great! See you then then. \\n\\nRichard\", \"post_time\": \"2016-04-29 08:34:58\" },\n\t{ \"post_id\": 9582, \"topic_id\": 2264, \"forum_id\": 10, \"post_subject\": \"Re: Atomic operations on HPCC.\", \"username\": \"Allan\", \"post_text\": \"I Look forward to catching up with you.\\n\\nThis can wait till June. I'll try and remember to bore you with this issue then.\\n\\nCheers\\n\\nAllan\", \"post_time\": \"2016-04-29 08:32:38\" },\n\t{ \"post_id\": 9556, \"topic_id\": 2264, \"forum_id\": 10, \"post_subject\": \"Re: Atomic operations on HPCC.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nIt sounds like you're renaming the files after they've been sprayed. Is that correct?\\n\\nIf you're doing your spray of these customer files in ECL code, then why not simply spray to the new name you want it to be, making the spray do the rename for you? You might also use the STD.System.Util.GetUniqueInteger function to produce the unique ids for you.\\n\\nBTW, I will teaching be in your office in June, so we can face-to-face then if the issue hangs on that long.
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-04-21 10:09:24\" },\n\t{ \"post_id\": 9554, \"topic_id\": 2264, \"forum_id\": 10, \"post_subject\": \"Re: Atomic operations on HPCC.\", \"username\": \"Allan\", \"post_text\": \"Hi Richard,\\n\\nEvery file presented for processing by a customer (be it a contribution or batch enquiry)\\nneeds to be allocated an ID, that persists over time and is unique over time.\\n\\nOur current solution is summarised above, this works as long a multiple WU's are not concurrently attempting to get a unique ID. We then hit problems with two or more reads of the file in its initial state, leading to WU's using the same ID.\\n\\nAs I said we have a work around in that we never let such WU's run together, but I'm looking for the correct fix for this, that may or may not involve files. If not addressed this issue will become more of a problem as our product range and customer base grows.\\n\\nYours\\nAllan\", \"post_time\": \"2016-04-21 09:10:45\" },\n\t{ \"post_id\": 9550, \"topic_id\": 2264, \"forum_id\": 10, \"post_subject\": \"Re: Atomic operations on HPCC.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nI'm not quite sure what you'r trying to accomplish here. Can you explain, please?\\n\\nRichard\", \"post_time\": \"2016-04-19 12:35:08\" },\n\t{ \"post_id\": 9548, \"topic_id\": 2264, \"forum_id\": 10, \"post_subject\": \"Atomic operations on HPCC.\", \"username\": \"Allan\", \"post_text\": \"We have to allocated a unique ID to each file presented to an ECL attribute for processing.\\nThis has been implemented as a 'get/set' operating on the name of a specific logical file.\\n\\ne.g. a call to this get/set operation renames file:\\n\\n
~maintainid::123456789
\\n\\nto\\n\\n~maintainid::123456790
\\n\\nand returns 123456789 as the unique id.\\n\\nThis works, just, but suffers from the fact that the internals of this get/set function (the rename) do not perform atomically; consequently we have to avoid concurrent use of this function. (we do this with our harness Linux scripts making sure only one file is presented at a time)\\n\\nIs there a way, in HPCC, to rename with locking, or failing that, use some other method of persisting unique numbers that can be safely updated even when accessed by concurrently running WU's?\", \"post_time\": \"2016-04-18 14:21:30\" },\n\t{ \"post_id\": 9580, \"topic_id\": 2266, \"forum_id\": 10, \"post_subject\": \"Re: Error on Roxie\", \"username\": \"bforeman\", \"post_text\": \"Hi David,\\n\\nI have never seen this error personally, but it has been reported a few times in the Community Issue Tracker. Some of these reports might provide a clue, for example:\\n\\nhttps://track.hpccsystems.com/browse/HPCC-10769\\n\\nEssentially you may need to examine parts of your ECL code to see if you can improve its efficiency. You may also want to open a report with detailed information (version, code sample, etc.) for the development team.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2016-04-28 18:02:05\" },\n\t{ \"post_id\": 9552, \"topic_id\": 2266, \"forum_id\": 10, \"post_subject\": \"Error on Roxie\", \"username\": \"David Dasher\", \"post_text\": \"Hi All\\n\\nWe are just testing a new multimode Roxie setup and are receiving the following error with a query. On a single node setup the query works fine. \\n\\nCan somebody please point me in the right direction.\\n\\n\\nException\\n Reported by: Roxie\\n Message: Packet length 3432953 exceeded maximum sending packet uid=0x00000033 activityId=617 pri=LOW queryHash=41813f142f5e5f8 ch=1\\n
\\nMany thanks\\n\\nDavid\", \"post_time\": \"2016-04-20 13:40:13\" },\n\t{ \"post_id\": 9574, \"topic_id\": 2268, \"forum_id\": 10, \"post_subject\": \"Re: cosine similarity between the document\", \"username\": \"rtaylor\", \"post_text\": \"gopi,\\n\\nOK, I have re-written your code, trying to optimize it so that it might run correctly against your 8 million records. My version produces exactly the same final result as yours from the example data provided.\\n\\nHere's my version:Lay_words := RECORD\\n UNSIGNED Id;\\n STRING word;\\nEND;\\n\\nds_words := DATASET([\\n{1,'Julie'},{1,'loves'},{1,'me'},{1,'more'},{1,'than'},{1,'Linda'},{1,'loves'},{1,'me'},\\n{2,'Jane'},{2,'likes'},{2,'me'},{2,'more'},{2,'than'},{2,'Julie'},{2,'loves'},{2,'me'}\\n], Lay_words);\\n\\n//Section 1 **********************************************************\\nLay_words_cnt := RECORD\\n ds_words.Id;\\n UNSIGNED8 word_hash := HASH64(ds_words.word);\\n UNSIGNED2 frequency := COUNT(GROUP);\\nEND;\\n\\nds_words_frq := SORT(TABLE(ds_words, {Lay_words_cnt}, Id, Word),id);\\nOUTPUT(ds_words_frq, Named('ds_words_frq')); \\n\\n//Section 2 **********************************************************\\nLay_Word_Child := RECORD\\n UNSIGNED8 word_hash;\\n UNSIGNED2 frequency;\\nEND;\\n\\nLay_Doc_Word := RECORD\\n UNSIGNED Id;\\n DATASET(Lay_Word_Child) Word_child;\\nEND;\\n\\nIDtbl := TABLE(ds_words_frq,{id},id);\\nPID := PROJECT(IDtbl,TRANSFORM(Lay_Doc_Word,SELF := LEFT,SELF := []));\\n\\nLay_Doc_Word BaseXF(PID L,ds_words_frq R) := TRANSFORM\\n SELF.Word_child := L.Word_child + ROW({R.word_hash,R.frequency},Lay_Word_Child);\\n SELF := L;\\nEND;\\t\\nds_doc_Base := DENORMALIZE(PID,ds_words_frq,LEFT.id=RIGHT.id,BaseXF(LEFT,RIGHT));\\nOUTPUT(ds_doc_Base, Named('ds_doc_Base')); \\n\\n//Section 3 **********************************************************\\nLay_Doc_Combination := RECORD\\n UNSIGNED8 source_id;\\n UNSIGNED8 target_id;\\nEND;\\n\\n// Join the base dataset with itself to go over all of the possible matches \\nds_Doc_Combination := JOIN(IDtbl, IDtbl, LEFT.Id != RIGHT.Id, \\n TRANSFORM(Lay_Doc_Combination,\\n SELF.source_id := LEFT.id,\\n SELF.target_id := RIGHT.id),ALL);\\nOUTPUT(ds_Doc_Combination, NAMED('ds_Doc_Combination')); \\n\\n//Section 4 **********************************************************\\nLay_Doc_Similarity_Base := RECORD\\n Lay_Doc_Combination;\\n REAL cosineSimilarity;\\nEND; \\n\\nLay_Doc_Similarity_Base Trans_Similarity(Lay_Doc_Combination L) := TRANSFORM\\n ds1 := ds_doc_Base(id = L.source_id)[1].Word_child;\\n ds2 := ds_doc_Base(id = L.target_id)[1].Word_child;\\n dot_join := JOIN(ds1, ds2,LEFT.word_hash = RIGHT.word_hash,\\n TRANSFORM({UNSIGNED8 word_hash, INTEGER product}, \\n SELF.word_hash := RIGHT.word_hash, \\n SELF.product := LEFT.frequency*RIGHT.frequency));\\n dot_product := SUM( dot_join, product);\\n \\n str1_squares := SUM(TABLE(ds1, {INTEGER squares := frequency*frequency} ), squares);\\n str2_squares := SUM(TABLE(ds2, {INTEGER squares := frequency*frequency} ), squares);\\n magnitudes_product := SQRT(str1_squares) * SQRT(str2_squares);\\n \\n SELF.cosineSimilarity := dot_product/magnitudes_product; \\n SELF := L;\\nEND;\\n\\nds_Doc_Similarity_Base := PROJECT(ds_Doc_Combination, Trans_Similarity(LEFT));\\nOUTPUT(ds_Doc_Similarity_Base, Named('ds_Doc_Similarity_Base'));
Let's go over the diferences, section by section.\\n\\nSection 1: I consolidated your separate PROJECT and TABLE functions into a single TABLE function to do both the grouping and the HASH64 of the individual words. I moved the SORT to act on the final result (the group-by form of TABLE never needs its input to be SORTed, it handles that automatically). I also removed the Word field since it was never used subsequently.\\n\\nYou may want to consider using HASH32 instead of HASH64. That will cut the data size for that field in half (more data can fit in memory). Since you're only hashing individual words you are unlikely to ever get duplicate hash values from different words. In my testing I found that HASH32 does produce the same end result from your test data.\\n\\nSection 2: I changed the creation of the nested child dataset to use DENORMALIZE instead of your ROLLUP. The primary advantage to this method is the IDtbl definition that comes into use in Section 3. \\n\\nSection 3: Here are the big changes. All this section really needs to do is create all the pairs of source and target ids, so I completely removed the nested child datasets from this section. That eliminates a LOT of duplicated data that is still available from the ds_doc_Base just created in Section 2. And, by using the IDtbl to do the self-JOIN instead of the ds_doc_Base, you can fit more records into memory at the same time.\\n\\nBTW, do the pairs of records need to be commutative? Your code results in both the 1,2 and 2,1 pairs, and both combinations show the same cosineSimilarity numbers, so you're doing double the work if those numbers will always be the same. If you only need unique and not commutative pairs, then the id pairs generation logic will need to change to this:ds_Doc_Combination := JOIN(IDtbl, IDtbl, LEFT.Id < RIGHT.Id , \\n TRANSFORM(Lay_Doc_Combination,\\n SELF.source_id := LEFT.id,\\n SELF.target_id := RIGHT.id),ALL);
\\nSection 4: The ds1 and ds2 definitions now just reference the appropriate nested child datasets from ds_doc_Base -- that eliminates a lot of unnecessary "carry-around" of that data. I split the SUM from the JOIN just to make the code more readable, and did the same thing with the squares/magnitude computations. \\n\\nAll in all, this version should be more efficient and hopefully will work with your 8 million record dataset.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-04-27 13:55:49\" },\n\t{ \"post_id\": 9572, \"topic_id\": 2268, \"forum_id\": 10, \"post_subject\": \"cosine similarity between the document\", \"username\": \"gopi\", \"post_text\": \"I have to find cosine similarity between the document for my 8 million records.\\n\\nI have sample code, but I am not able to run this code for my 8 million record, \\n\\nIs there any way to do self-join to get possible combination ( I mean dataset join with same dataset ).\\n\\nMy Code\\n\\nLay_words := Record\\n\\tUnsigned Id;\\n\\tString word;\\nEnd;\\n\\nds_words := Dataset([\\n{1, 'Julie'}, {1, 'loves'}, {1, 'me'}, {1, 'more'}, {1, 'than'}, {1, 'Linda'}, {1, 'loves'}, {1, 'me'},\\n{2, 'Jane'}, {2, 'likes'}, {2, 'me'}, {2, 'more'}, {2, 'than'}, {2, 'Julie'}, {2, 'loves'}, {2, 'me'}\\n], Lay_words);\\n\\nLay_words_hash := Record\\n\\tUnsigned Id;\\n\\tString word;\\n\\tUnsigned8 word_hash;\\nEnd;\\n\\nds_word_hash := Project(ds_words, Transform(Lay_words_hash, self.word_hash := hash64(left.word), self := left));\\n\\nLay_words_cnt := Record\\n\\tds_word_hash.Id;\\n\\tds_word_hash.word_hash;\\n\\tUnsigned2 frequency := count(group);\\nEnd;\\n\\nds_words_frq := Table(Sort(ds_word_hash, Id, word_hash), {Lay_words_cnt}, Id, Word_hash);\\nds_words_frq;\\n\\nLay_Word_Child := Record\\n\\tUnsigned8 word_hash;\\n\\tUnsigned2 frequency;\\nEnd;\\n\\nLay_Doc_Word := Record\\n\\tUnsigned Id;\\n\\tDataset(Lay_Word_Child) Word_child;\\nEnd;\\n\\nLay_Doc_Word Tran_Child(Lay_words_cnt L, Dataset(Lay_words_cnt) R) := Transform\\n\\tself.Id := L.Id;\\n\\tself.Word_child := Project(R, Lay_Word_Child);\\nEnd;\\n\\nds_words_group := Group(Sort(ds_words_frq, Id), Id);\\nds_doc_Base := Rollup(ds_words_group, Group, Tran_Child(left, rows(left))); \\nds_doc_Base;\\t\\t\\t\\n\\n\\nLay_Doc_Combination := REcord\\n\\tUnsigned8 source_id;\\n\\tUnsigned8 target_id;\\n\\tDataset(Lay_Word_Child) source_word_list;\\n\\tDataset(Lay_Word_Child) target_word_list;\\nEnd;\\n\\n// Join the base dataset with itself to go over all of the possible matches \\nds_Doc_Combination := Join(ds_doc_Base, ds_doc_Base, left.Id != right.Id, \\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tTransform(Lay_Doc_Combination,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tself.source_id := left.id,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tself.target_id := right.id,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tself.source_word_list := left.Word_child,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tself.target_word_list := right.Word_child,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t),all);\\nds_Doc_Combination;\\t\\t\\t\\t\\t\\n\\n\\nLay_Doc_Similarity_Base := Record\\n\\tLay_Doc_Combination and not [source_word_list, target_word_list];\\n\\tReal cosineSimilarity;\\nEnd;\\t\\n\\nLay_Doc_Similarity_Base Trans_Similarity(Lay_Doc_Combination L) := Transform\\n\\t\\n\\tds1\\t:= L.source_word_list;\\n\\tds2\\t:= L.target_word_list;\\n\\t\\n\\tdot_product := SUM( JOIN(ds1, ds2, LEFT.word_hash = RIGHT.word_hash, \\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t TRANSFORM({Unsigned8 word_hash, INTEGER product}, SELF.word_hash := RIGHT.word_hash, \\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t SELF.product := LEFT.frequency*RIGHT.frequency)) ,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t product);\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t \\n\\t\\n\\t\\n\\tstr1_magnitude := SQRT( SUM(TABLE(ds1, {INTEGER squares := frequency*frequency} ), squares) );\\n\\tstr2_magnitude := SQRT( SUM(TABLE(ds2, {INTEGER squares := frequency*frequency} ), squares) );\\n\\t\\n\\tmagnitudes_product := str1_magnitude * str2_magnitude;\\n\\t\\n\\tself.cosineSimilarity := dot_product/magnitudes_product;\\t\\n\\t\\n\\tself := L;\\nEnd;\\n\\nds_Doc_Similarity_Base := Project(ds_Doc_Combination, Trans_Similarity(left));\\noutput(ds_Doc_Similarity_Base, Named('ds_Doc_Similarity_Base'));\\t\\n
\\n\\nThanks\", \"post_time\": \"2016-04-27 08:21:27\" },\n\t{ \"post_id\": 9854, \"topic_id\": 2278, \"forum_id\": 10, \"post_subject\": \"Re: SET OF parameters to BEGINC++\", \"username\": \"Allan\", \"post_text\": \"Thanks Gavin,\\n\\nFirst apologies for the delay in replying, I've been on other jobs.\\n\\nI must admit I don't understand your reply in the context of the question.\\n\\nok one can do:\\n\\nset of integer mySet := ALL;\\n
\\nSo what would 'mySet' look like to a C++ function if passed as a parameter?\\nObviously one can't pass the entire set of integers.\\n\\nYours\\nAllan\", \"post_time\": \"2016-06-28 09:12:39\" },\n\t{ \"post_id\": 9816, \"topic_id\": 2278, \"forum_id\": 10, \"post_subject\": \"Re: SET OF parameters to BEGINC++\", \"username\": \"ghalliday\", \"post_text\": \"isAllXXX is used to represent a set that contains everything.\\n\\nE.g., \\nset of unicode20 mySet := ALL;\\n\\nalwaysTrue := someValue IN mySet;\\n\\nIt is useful for allowing a filter that is conditionally applied. E.g,\\n\\nvalidNames := IF(checkName, namesToCheck, ALL);\\n\\nvalidMatches := myDataset(name in validNames);\", \"post_time\": \"2016-06-23 11:43:11\" },\n\t{ \"post_id\": 9592, \"topic_id\": 2278, \"forum_id\": 10, \"post_subject\": \"SET OF parameters to BEGINC++\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nCould someone please explain the 'SET OF ...' parameters to embedded C++.\\n\\nThe ECL Ref Manual (page 112 on v5.4.8-1) just has:\\n\\nSets are passed as a set of parameters (all, size, pointer):\\nSET OF UNSIGNED4 ABC -> bool isAllAbc, size32_t lenAbc, const void * abc\\n
\\n\\nSo I don't understand what the 'isAll' BOOLEAN is telling me, and how does this all work with variable length elements? e.g. SET OF UNICODE\\n\\nThere is an example using 'SET OF' but, in my opinion, its not very illuminating\", \"post_time\": \"2016-05-04 14:05:18\" },\n\t{ \"post_id\": 9684, \"topic_id\": 2294, \"forum_id\": 10, \"post_subject\": \"Re: HPCC Visualization Framework - github version\", \"username\": \"rameshpachamuthu\", \"post_text\": \"Hi Smith,\\n\\nThanks for your response.\\n\\nI could imitate your examples and those are working fine.\\n\\nIn addition, I like to use 'HPCC Visualization Framework' available in Github with its all features. I do not want to modify a lot in the JavaScript files to get it work with my HPCC server. So I have replaced urls for Roxie & WsWorkunits in https://github.com/hpcc-systems/Visualization/blob/master/test/marshallerFactory.js file. Now I am getting JSON parsing error in https://github.com/hpcc-systems/Visualization/blob/master/src/marshaller/HipieDDL.js file.\\n\\nURLs for Workunits & Roxie are giving a valid JSON response. Hence I suspect that there could be mismatch between JSON that is expected and received.\\n\\nPlease clarify, whether there is any specific format of JSON response that is expected in 'Visualization\\\\src\\\\marshaller\\\\HipieDDL.js' from the Workunit/Roxie url,or I am missing something?\\n\\nRegards, \\nRamesh\", \"post_time\": \"2016-05-31 15:05:54\" },\n\t{ \"post_id\": 9680, \"topic_id\": 2294, \"forum_id\": 10, \"post_subject\": \"Re: HPCC Visualization Framework - github version\", \"username\": \"gsmith\", \"post_text\": \"I have a couple of examples on bl.ocks.org: http://bl.ocks.org/GordonSmith including two which fetch data from a workunit and a roxie service respectively:\\n* Workunit: http://bl.ocks.org/GordonSmith/71e28d9345fe6c3e64cf\\n* Roxie: http://bl.ocks.org/GordonSmith/e861d41a234c35cfceb6\\nNote: Neither links above will work for you, as the server they are attempting to fetch data from no longer exists, but if you execute the ECL on your own server and change the URL inside the JavaScript to match your servers IP address you should be up and running in no time.\\n\\nSome additional links:\\n* GitHub Project: https://github.com/hpcc-systems/Visualization\\n* Wiki: https://github.com/hpcc-systems/Visualization/wiki\\n* API Demo/Test Page: https://rawgit.com/hpcc-systems/Visualization/master/demos/dermatology.html\", \"post_time\": \"2016-05-30 15:14:55\" },\n\t{ \"post_id\": 9678, \"topic_id\": 2294, \"forum_id\": 10, \"post_subject\": \"HPCC Visualization Framework - github version\", \"username\": \"rameshpachamuthu\", \"post_text\": \"Hi,\\n\\nI am trying to visualize Workunit and Roxie query results using HPCC-Visualization framework available @ https://github.com/hpcc-systems/Visualization. I was trying to find some 'config or property file' where I can define my HPCC Server and Roxie details in order to visualize the result using this framework. But I could not find anything.\\n\\nI have below questions,\\n1. I like to know, whether my requirement is feasible with HPCC-Visualization framework available in github or I should wait till HPCC-framework-6.0.0-beta-2 release?\\n2. Is there any document or reference material to HPCC-Visualization framework that could help me. \\n\\nThanks in advance.\\n\\nRegards,\\nRamesh\", \"post_time\": \"2016-05-30 12:40:43\" },\n\t{ \"post_id\": 9808, \"topic_id\": 2302, \"forum_id\": 10, \"post_subject\": \"Re: Blocked Workunits and ECL Watch\", \"username\": \"Celia\", \"post_text\": \"Hello,\\n\\nStill don't know why all the system crashed, but I fulfill to compile again my code by re-starting HPCC : \\n\\nsudo service hpcc-init restart\\n\\nHope it can helps if someone has the same issue !\\n\\nRegards,\\n\\nCélia\", \"post_time\": \"2016-06-21 12:05:25\" },\n\t{ \"post_id\": 9706, \"topic_id\": 2302, \"forum_id\": 10, \"post_subject\": \"Blocked Workunits and ECL Watch\", \"username\": \"Celia\", \"post_text\": \"Hi,\\n\\nI've already read some posts on this subject but I haven't found an answer for my problem. Here it is :\\nI'm new on HPCC and tried to load data from a sprayed file but the WU failed because of a XPATH problem. Once corrected, I've submitted again the WU but it's blocked now. I wanted to delete the WU through ECL Watch but the action button doesn't do anything (ECL/Workunits/Delete). I just fulfilled to delete it in ECL IDE but it's still blocked.\\n\\nWhat can I try to unblock my HPCC please ?\\n\\nThanks in advance,\", \"post_time\": \"2016-06-07 14:35:17\" },\n\t{ \"post_id\": 9732, \"topic_id\": 2310, \"forum_id\": 10, \"post_subject\": \"Re: Macro expansion not working in all contexts\", \"username\": \"Allan\", \"post_text\": \"Hi Richard,\\n\\nYes, just investigating IFBLOCKS may well be the way to go, but I would still like to understand this issue with MACROs.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2016-06-10 17:36:54\" },\n\t{ \"post_id\": 9728, \"topic_id\": 2310, \"forum_id\": 10, \"post_subject\": \"Re: Macro expansion not working in all contexts\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nHave you thought about using IFBLOCK in your RECORD Structure for the file?\\n\\nOtherwise, I'll be in your office for three weeks starting Monday, so we can talk about it then.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-06-10 13:38:06\" },\n\t{ \"post_id\": 9726, \"topic_id\": 2310, \"forum_id\": 10, \"post_subject\": \"Macro expansion not working in all contexts\", \"username\": \"Allan\", \"post_text\": \"We have a scenario where one logical file (DATASET) fields are being used to hold different data for different customers. Consequently we need a mapping from the name the customer identifies a field by and the actual field name in the DATASET.\\nI have this prototype:\\n\\n#workunit('name','test field re-id');\\n\\nTGTCUSTOMER:='B';\\n\\nRD := RECORD\\n STRING F1;\\n STRING F2;\\nEND;\\n\\nD := DATASET([{'43,000','LAT:45.3,LONG:-1.9'}],RD);\\n\\nremap(cfield) := MACRO\\n\\n #DECLARE(f);\\n #IF (TGTCUSTOMER = 'A')\\n #IF(#TEXT(cfield) = 'BuildingTIS')\\n #SET(f,'F1')\\n #ELSIF(#TEXT(cfield) = 'GeoCode')\\n #SET(f,'F2')\\n #ELSE\\n #ERROR('Unknown field name for customer A')\\n #END\\n #ELSEIF (TGTCUSTOMER = 'B')\\n #IF(#TEXT(cfield) = 'GeoCode')\\n #SET(f,'F1')\\n #ELSIF(#TEXT(cfield) = 'Address')\\n #SET(f,'F2')\\n #ELSE\\n #ERROR('Unknown field name for customer B')\\n #END\\n #ELSE\\n #ERROR('Unknown customer')\\n #END\\n %f%\\nENDMACRO;\\n\\nremap(GeoCode) := 'ABC';\\nF1;\\nremap(GeoCode);\\n//D(remap(GeoCode) = 'ABC');\\nD(F1 = 'ABC');\\n
\\nThe example setup in the WU (using customer ‘B’) has the macro generating ‘F1’ but I can’t use the result of the MACRO in all context?\\nSo \\n\\nremap(GeoCode) := 'ABC';\\nF1;\\nremap(GeoCode);\\n
\\nworks fine but\\n\\nD(remap(GeoCode) = 'ABC');\\n
\\nFails, when the MACRO should just be generating:\\n\\nD(F1 = 'ABC');\\n
\\nWhich is a valid ECL construct.\\nCould someone clarify what is going on?\\n\\nAs an aside I initially attempted to use an INLINE dataset and access a particular mapping using a key of 'customer'. This failed because of 'non-constant expression', but everything in the construct was constant at compile time. The 'customer', the inline dataset all could be worked out by the macro processor.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2016-06-10 13:30:04\" },\n\t{ \"post_id\": 26123, \"topic_id\": 2330, \"forum_id\": 10, \"post_subject\": \"Re: Reading a MultiLevel JSON into Thor\", \"username\": \"wjblack\", \"post_text\": \"Added SET OF STRING familyNames{xpath('familyNames')};\\n\\n\\nnamesRec := RECORD\\n UNSIGNED2 EmployeeID{xpath('EmpID')};\\n STRING10 Firstname{xpath('FName')};\\n STRING10 Lastname{xpath('LName')};\\nEND;\\n\\nEmpRec := RECORD\\n DATASET(namesRec) AccountInfo{xpath('EmpRec')};\\n SET OF STRING familyNames{xpath('familyNames')};\\nEND; \\nx := '{"EmpRec": {"EmpID": 42,"FName": "George" , "LName": "Jetson" }}';\\ny := '{"EmpRec": {"EmpID": 42,"FName": "George" , "LName": "Jetson" }, "familyNames": ["Oh George","Jetson My Boy" ]}';\\n\\nrec := FROMJSON(EmpRec,y);\\nOUTPUT(rec);\\n
\", \"post_time\": \"2019-05-08 16:11:08\" },\n\t{ \"post_id\": 26113, \"topic_id\": 2330, \"forum_id\": 10, \"post_subject\": \"Re: Reading a MultiLevel JSON into Thor\", \"username\": \"wjblack\", \"post_text\": \"How would you handle a list of items like in dataset 'y' for 'familyNames'?\\n\\n\\nnamesRec := RECORD\\n UNSIGNED2 EmployeeID{xpath('EmpID')};\\n STRING10 Firstname{xpath('FName')};\\n STRING10 Lastname{xpath('LName')};\\nEND;\\n\\nEmpRec := RECORD\\n DATASET(namesRec) AccountInfo{xpath('EmpRec')};\\nEND; \\nx := '{"EmpRec": {"EmpID": 42,"FName": "George" , "LName": "Jetson" }}';\\ny := '{"EmpRec": {"EmpID": 42,"FName": "George" , "LName": "Jetson" }, "familyNames": ["Oh George","Jetson My Boy" ]}';\\n\\nrec := FROMJSON(EmpRec,x);\\nOUTPUT(rec);\\n
\", \"post_time\": \"2019-05-08 16:01:11\" },\n\t{ \"post_id\": 9778, \"topic_id\": 2330, \"forum_id\": 10, \"post_subject\": \"Re: Reading a MultiLevel JSON into Thor\", \"username\": \"georgeb2d\", \"post_text\": \"Thanks. Works great.\", \"post_time\": \"2016-06-15 17:38:24\" },\n\t{ \"post_id\": 9774, \"topic_id\": 2330, \"forum_id\": 10, \"post_subject\": \"Re: Reading a MultiLevel JSON into Thor\", \"username\": \"rtaylor\", \"post_text\": \"Hold on, I spoke too soon. \\n\\nI made two changes to your code and now it works:
namesRec := RECORD\\n UNSIGNED2 EmployeeID{xpath('EmpID')};\\n STRING10 Firstname{xpath('FName')};\\n STRING10 Lastname{xpath('LName')};\\nEND;\\n\\nEmpRec := RECORD\\n DATASET(namesRec) AccountInfo{xpath('EmpRec')};\\nEND; \\nx := '{"EmpRec": {"EmpID": 42,"FName": "George" , "LName": "Jetson" } }'; \\n\\nrec := FROMJSON(EmpRec,x);\\nOUTPUT(rec);
I deleted the double quotes you had around your nested curly braces, and I changed the RECORD structure name in the FROMJSON function to EmpRec. It works correctly now -- no JIRA needed.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-06-15 13:49:50\" },\n\t{ \"post_id\": 9772, \"topic_id\": 2330, \"forum_id\": 10, \"post_subject\": \"Re: Reading a MultiLevel JSON into Thor\", \"username\": \"Gleb Aronsky\", \"post_text\": \"You can submit a ticket here http://track.hpccsystems.com\", \"post_time\": \"2016-06-15 13:23:58\" },\n\t{ \"post_id\": 9770, \"topic_id\": 2330, \"forum_id\": 10, \"post_subject\": \"Re: Reading a MultiLevel JSON into Thor\", \"username\": \"georgeb2d\", \"post_text\": \"Thanks. How do I submit a JIRA ticket?\", \"post_time\": \"2016-06-15 12:20:29\" },\n\t{ \"post_id\": 9768, \"topic_id\": 2330, \"forum_id\": 10, \"post_subject\": \"Re: Reading a MultiLevel JSON into Thor\", \"username\": \"rtaylor\", \"post_text\": \"georgeb2d,\\n\\nI have also been unable to make this work. It appears that TOJSON and FROMJSON don't currently handle nested child datasets. You should submit a JIRA ticket for this.\\n\\nThe only workaround I can think of would be using PARSE to extract the data values from the JSON string.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-06-15 10:07:07\" },\n\t{ \"post_id\": 9764, \"topic_id\": 2330, \"forum_id\": 10, \"post_subject\": \"Reading a MultiLevel JSON into Thor\", \"username\": \"georgeb2d\", \"post_text\": \"I have a JSON string like this:\\n{\\naccount : \\n {\\n name : “string”,\\n Id : int,\\n legacy : { base : “string”, suffix : “string”},\\n status : “string”,\\n },\\nproduct : “Don1”,\\nproductConfig : \\n\\t {\\n generalConfig : \\n\\t\\t {\\n ConfigId : int,\\n Id2 : int,\\n ProdStatus : “string”,\\n },\\n email_notification: \\n {\\n EmailAddress : “string”,\\n sendFromEmailAddress : “string”\\n }\\n }\\n}\\nIn order to work toward the above I put together a simpler string to try to get it to work:\\n\\nnamesRec := RECORD\\n UNSIGNED2 EmployeeID{xpath('EmpID')};\\n STRING10 Firstname{xpath('FName')};\\n STRING10 Lastname{xpath('LName')};\\nEND;\\n\\nEmpRec := RECORD\\n DATASET(namesRec) AccountInfo{xpath('EmpRec')};\\nEND;\\t\\t\\nx := '{"EmpRec":"{""EmpID": 42,"FName": "George" , "LName": "Jetson" "}" }'; \\n\\nrec := FROMJSON(namesRec,x);\\nOUTPUT(rec);
\\n\\nThe error I get is this:\\nError: System error: 2: Error - syntax error "expected ',' or '}'" [file offset 14]\\n{"EmpRec":"{""*ERROR*EmpID": 42,"FName": "George" , "LName": "Jetson" "}" }\\n\\nI have tried all sorts of ways to get this to work but I am stumped. Maybe I need to Parse the string first? Please assist.\", \"post_time\": \"2016-06-14 22:27:15\" },\n\t{ \"post_id\": 9842, \"topic_id\": 2336, \"forum_id\": 10, \"post_subject\": \"Re: How to read child element text value\", \"username\": \"bforeman\", \"post_text\": \"I am assuming this is part of a larger file, so first you want to spray the file to your cluster using the XML option and "student" as your row tag.\\n\\nNext, create your RECORD and DATASET, something like this:\\n\\nrec := RECORD\\n STRING name;\\n STRING text{XPATH('data/text')};\\n STRING result{XPATH('data/text/result')};\\n STRING grade{XPATH('data/text/grade')};\\n STRING mark{XPATH('data/text/grade/mark')};\\n STRING street{XPATH('data/text/street')};\\n STRING city{XPATH('data/text/city')};\\nEND;\\n\\nds := DATASET('~TEST::StudentXML',rec,XML('studentlist/student'));\\n\\nds;
\\n\\nThis will extract your data, and then you can use a simple PROJECT and TRIM all of the data into one or two fields as needed.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2016-06-24 18:51:04\" },\n\t{ \"post_id\": 9806, \"topic_id\": 2336, \"forum_id\": 10, \"post_subject\": \"How to read child element text value\", \"username\": \"gopi\", \"post_text\": \"How to read the child element text value by using xpath for the below sample XML,\\n\\n<studentlist>\\n<student>\\n <name>david</name>\\n <data>\\n <text>\\n good student\\n <result>pass</result>\\n <grade> A \\n <mark>86</mark>\\n </grade>\\n over all he is very good\\n </text>\\n </data>\\n</student>\\n<student>\\n <name>smith</name>\\n <data>\\n <text>\\n address\\n <street>second main road</street>\\n <city>new york</city>\\n </text>\\n </data>\\n</student>\\n</studentlist>\\n\\nExpected output\\n\\nName\\ttext\\ndavid\\tgood student pass A 86 over all he is very good\\nsmith\\tAddress second main road new york\\n\\nThanks\", \"post_time\": \"2016-06-20 14:18:50\" },\n\t{ \"post_id\": 29543, \"topic_id\": 2342, \"forum_id\": 10, \"post_subject\": \"Re: EMBED MySQL Insert using dataset\", \"username\": \"harshdesai\", \"post_text\": \"IMPORT mysql;\\ninteger ExtractIt(String Query) := EMBED(mysql : user('rchapman'),database('test'),server('127.0.0.1'), port('3306'))\\n?\\nENDEMBED;\\n\\nQuery := 'select count(1) from transaction_log' ;\\nExtractIt(Query);\\n\\n\\n\\n\\nEven if there is query ,How to negotiate quote with escape sequence .\\n\\nSELECT * FROM transaction_log WHERE account_number NOT IN ('10000001','10000024') AND batch_job_id IS NOT NULL AND SUBSTR(date_added, 1, 10) = SUBDATE(CURDATE(),1) ORDER BY date_added DESC;\\n\\nAlso can you please let us know if there is any extract limit or I can extract all at once.\\n\\nRegards\\nHarsh Desai\", \"post_time\": \"2020-02-14 04:21:30\" },\n\t{ \"post_id\": 28503, \"topic_id\": 2342, \"forum_id\": 10, \"post_subject\": \"Re: EMBED MySQL Insert using dataset\", \"username\": \"harshdesai\", \"post_text\": \"Hi Team,\\nAny luck how i can pass query runtime .\\n\\nRegards\\nHarsh\", \"post_time\": \"2019-12-17 09:37:15\" },\n\t{ \"post_id\": 28483, \"topic_id\": 2342, \"forum_id\": 10, \"post_subject\": \"Re: EMBED MySQL Insert using dataset\", \"username\": \"harshdesai\", \"post_text\": \"Hi Team,\\nCan you please suggest how can i pass query runtime\\nTrying to pass query runtime to extract as this mainly for count respectives.\\n\\nIMPORT mysql;\\ninteger ExtractIt(String Query) := EMBED(mysql : user('rchapman'),database('test'),server('127.0.0.1'), port('3306'))\\n?\\nENDEMBED;\\n\\nQuery := 'select count(1) from transaction_log' ;\\nExtractIt(Query);\\n\\n\\n\\n\\nEven if there is query ,How to negotiate quote with escape sequence .\\n\\nSELECT * FROM transaction_log WHERE account_number NOT IN ('10000001','10000024') AND batch_job_id IS NOT NULL AND SUBSTR(date_added, 1, 10) = SUBDATE(CURDATE(),1) ORDER BY date_added DESC;\", \"post_time\": \"2019-12-16 09:24:21\" },\n\t{ \"post_id\": 27653, \"topic_id\": 2342, \"forum_id\": 10, \"post_subject\": \"Re: EMBED MySQL Insert using dataset\", \"username\": \"DSC\", \"post_text\": \"A file containing examples of ECL interacting with MySQL can be found within the HPCC Systems source code: https://github.com/hpcc-systems/HPCC-Platform/blob/master/initfiles/examples/embed/mysql-simple.ecl. From that file:\\n\\n\\ninitialize(dataset(childrec) values) := EMBED(mysql : user('rchapman'),database('test'),server('127.0.0.1'), port('3306'))\\n INSERT INTO tbl1 values (?, ?, ?, ?, ?, ?, ?, ?, ?);\\nENDEMBED;\\n
\\nThere are a few things to note:\\n\\n* The function does not specify a data type for a returned value. Therefore, the function is defined as an action. This affects how you invoke it. It will be similar to how you would use OUTPUT() within your ECL code.\\n\\n* A dataset is passed into the function as an argument. The binding of each record's values is positional: The first field in each record is bound to the first '?' in the SQL statement, the second field to the second '?' and so on.\\n\\n* The MySQL plugin accepts a number of options for connecting to the database. The general form is EMBED(mysql : option(value) [, option(value)]*). The options let you define things like the IP address of the MySQL server, the user and password for connecting, the database to connect to, etc. The example file uses most of those options in one function or another.\\n\\nHope this helps!\\n\\nDan\", \"post_time\": \"2019-09-27 12:15:25\" },\n\t{ \"post_id\": 27643, \"topic_id\": 2342, \"forum_id\": 10, \"post_subject\": \"Re: EMBED MySQL Insert using dataset\", \"username\": \"harshdesai\", \"post_text\": \"Hi Richard,\\nLogging was just was an example , Once I can dump data to SQL Database , Dashboard tools can be connected run time .\\nHence I am looking to pass dataset to sqlembbed and insert same data in tables .That respective tables can be views in BI Tools.\", \"post_time\": \"2019-09-27 11:56:26\" },\n\t{ \"post_id\": 27633, \"topic_id\": 2342, \"forum_id\": 10, \"post_subject\": \"Re: EMBED MySQL Insert using dataset\", \"username\": \"rtaylor\", \"post_text\": \"Harsh Desai,\\n\\nHave you considered just using the logging functions in our Standard Library?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-09-27 11:47:22\" },\n\t{ \"post_id\": 27623, \"topic_id\": 2342, \"forum_id\": 10, \"post_subject\": \"Re: EMBED MySQL Insert using dataset\", \"username\": \"harshdesai\", \"post_text\": \"Using Embedded Sql ,I intend to pass a dataset and insert same values in SQL tables for Logging perspective.\\n\\nRegards\\nHarsh Desai\", \"post_time\": \"2019-09-27 04:00:54\" },\n\t{ \"post_id\": 27613, \"topic_id\": 2342, \"forum_id\": 10, \"post_subject\": \"Re: EMBED MySQL Insert using dataset\", \"username\": \"DSC\", \"post_text\": \"Hi harshdesai,\\n\\nAre you asking about reading data from a MySQL database, or writing data to it?\", \"post_time\": \"2019-09-26 19:22:24\" },\n\t{ \"post_id\": 27603, \"topic_id\": 2342, \"forum_id\": 10, \"post_subject\": \"Re: EMBED MySQL Insert using dataset\", \"username\": \"harshdesai\", \"post_text\": \"Hi Team,\\nHow can read dataset in hpcc sql embedded , so rather to create file i can prepare Dynamic insert in SQL embed and upload data \\n\\nCan you please suggest how to read a dataset in sql and insert in table.\", \"post_time\": \"2019-09-26 06:11:07\" },\n\t{ \"post_id\": 9850, \"topic_id\": 2342, \"forum_id\": 10, \"post_subject\": \"Re: EMBED MySQL Insert using dataset\", \"username\": \"chuck.beam\", \"post_text\": \"Hi Richard,\\n\\nI am trying to do as you suggested, but I have a couple of questions.\\n\\nFirst, how do I wrap the STRING fields in single quotes in the OUTPUT and DESPRAYed file?\\n\\nSecond, how do I call the LOAD DATA FILE function from ECL?\\n\\nMy code below fails with the message:\\nError: System error: 0: Graph[9], SLAVE #1 [10.194.10.41:6600]: Graph[9], workunitwrite[12]: mysql: This command is not supported in the prepared statement protocol yet, (0, 0), 0, \\n\\n\\n\\nDATASET(Layout_DD_TransactionLogOut) LoadData() := EMBED(mysql : user('username'),\\nserver('server'),\\nport('port'),\\ndatabase('db'),\\npassword('password')) \\n\\t\\tLOAD DATA INFILE '/data/ActiveInsights_DEVCAB/DD_LOG/TransactionOut.csv' \\n\\t\\tINTO TABLE log_dd.transaction_log \\n\\t\\tFIELDS TERMINATED BY ',' \\n\\t\\tLINES TERMINATED BY '\\\\n';\\nENDEMBED;\\n\\nOUTPUT(LoadData(), NAMED('OUT4'));
\", \"post_time\": \"2016-06-27 12:33:27\" },\n\t{ \"post_id\": 9836, \"topic_id\": 2342, \"forum_id\": 10, \"post_subject\": \"Re: EMBED MySQL Insert using dataset\", \"username\": \"chuck.beam\", \"post_text\": \"OK, I will go back to them and discuss your suggestion for despray and bulk insert.\\n\\nThanks\\nChuck\", \"post_time\": \"2016-06-24 15:58:21\" },\n\t{ \"post_id\": 9834, \"topic_id\": 2342, \"forum_id\": 10, \"post_subject\": \"Re: EMBED MySQL Insert using dataset\", \"username\": \"rtaylor\", \"post_text\": \"Chuck,\\n\\nGiven that you said, "millions of records" I wouldn't even consider doing it directly from ECL. If it were onesy-twosey ... \\n\\nRichard\", \"post_time\": \"2016-06-24 15:56:57\" },\n\t{ \"post_id\": 9828, \"topic_id\": 2342, \"forum_id\": 10, \"post_subject\": \"Re: EMBED MySQL Insert using dataset\", \"username\": \"chuck.beam\", \"post_text\": \"Hi Richard,\\n\\nThe despray and bulk load is an option, however they wanted me to explorer updating the MySQL database directly from the ECL code.\\n\\nI have been able to insert records, but not using my dataset.\\n\\nDo you think this is possible?\\n\\nThanks\\nChuck\", \"post_time\": \"2016-06-24 14:50:18\" },\n\t{ \"post_id\": 9826, \"topic_id\": 2342, \"forum_id\": 10, \"post_subject\": \"Re: EMBED MySQL Insert using dataset\", \"username\": \"rtaylor\", \"post_text\": \"Chuck,\\n\\nWhy not just write the data to a CSV file, despray it, then do a bulk import to the SQL database?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-06-24 14:47:11\" },\n\t{ \"post_id\": 9822, \"topic_id\": 2342, \"forum_id\": 10, \"post_subject\": \"EMBED MySQL Insert using dataset\", \"username\": \"chuck.beam\", \"post_text\": \"I have been tasked with writing a dataset to a MySQL database.\\n\\nThe dataset will be quite large millions of records.\\n\\nI would like to pass the dataset as a parameter to my EMBEDed code, see code below.\\n\\nI am getting a syntax error.\\n\\nCan someone help with this syntax?\\n\\nThanks!\\nChuck\\n\\n\\n
IMPORT mysql, AI_TransactionLogging;\\n\\nLayout_DD_TransactionLog := RECORD\\n\\t STRING \\t\\t\\ttransaction_id;\\n\\t\\tINTEGER \\t\\tproduct_id := 22;\\n\\t\\tSTRING \\t\\t\\tdate_added;\\n\\t\\tSTRING \\t\\t\\tservice_type;\\n\\t\\tSTRING \\t\\t\\tspecial_billing_id;\\n\\t\\tSTRING \\t\\t\\treport_code;\\n\\t\\tSTRING \\t\\t\\treport_usage;\\n\\t\\tSTRING \\t\\t\\trequestor;\\n\\t\\tSTRING \\t\\t\\treference_number;\\n\\t\\tSTRING \\t\\t\\taccount_base;\\n\\t\\tSTRING \\t\\t\\taccount_suffix;\\n\\t\\tINTEGER \\t\\taccount_id;\\n\\t\\tINTEGER \\t\\tcustomer_id;\\n\\t\\tSTRING \\t\\t\\tanchor_transaction_id;\\n\\t\\tINTEGER \\t\\tanchor_product_id;\\n\\t\\tSTRING \\t\\t\\tfull_quote_back;\\n\\t\\tSTRING \\t\\t\\ti_date_ordered;\\n\\t\\tSTRING \\t\\t\\ti_addr_house_num;\\n\\t\\tSTRING \\t\\t\\ti_addr_apt_num;\\n\\t\\tSTRING \\t\\t\\ti_addr_line;\\n\\t\\tSTRING \\t\\t\\ti_addr_state;\\n\\t\\tSTRING \\t\\t\\ti_addr_zip;\\n\\t\\tSTRING \\t\\t\\ti_addr_city;\\n\\t\\tSTRING \\t\\t\\ti_addr_county;\\n\\t\\tSTRING \\t\\t\\ti_addr_country;\\n\\t\\tINTEGER \\t\\ti_addr_type;\\n\\t\\tSTRING \\t\\t\\taddr_household_sur_name;\\n\\t\\tINTEGER \\t\\tlisted_driver_count;\\n\\t\\tINTEGER \\t\\tconfirmed_count;\\n\\t\\tINTEGER \\t\\tnot_confirmed_count;\\n\\t\\tINTEGER \\t\\tnot_found_count;\\n\\t\\tINTEGER \\t\\tdiscovered_surname_count;\\n\\t\\tINTEGER \\t\\tdiscovered_surname_exception_count;\\n\\t\\tSTRING9 \\t\\tresult_format;\\n\\t\\tDECIMAL4_1 \\trecord_version;\\n\\t\\tSTRING \\t\\t\\tprocessing_status;\\n\\t\\tSTRING \\t\\t\\tinquiry_status;\\n\\t\\tSTRING \\t\\t\\tinquiry_processing_status;\\n\\t\\tINTEGER \\t\\tbilling_type_id;\\n\\t\\tDECIMAL18_9 price;\\n\\t\\tINTEGER \\t\\tcurrency;\\n\\t\\tINTEGER \\t\\tpricing_error_code;\\n\\t\\tINTEGER \\t\\tfree;\\n\\t\\tSTRING \\t\\t\\treport_options;\\n\\t\\tINTEGER \\t\\ttransaction_code;\\n\\t\\tSTRING \\t\\t\\treturn_node_id;\\n\\t\\tSTRING \\t\\t\\trequest_node_id;\\n\\t\\tINTEGER \\t\\torder_status_code;\\n\\t\\tSTRING \\t\\t\\tproduct_line;\\n\\t\\tINTEGER \\t\\tlogin_history_id;\\n\\t\\tSTRING \\t\\t\\tip_address;\\n\\t\\tDECIMAL8_4 \\tresponse_time;\\n\\t\\tSTRING \\t\\t\\tesp_method;\\n\\t\\tINTEGER \\t\\tbatch_job_id;\\n\\t\\tINTEGER \\t\\tbatch_seq_number;\\n\\t\\tSTRING \\t\\t\\tis_active;\\n\\t\\tSTRING \\t\\t\\tuser_added;\\n\\t\\tSTRING \\t\\t\\tuser_changed;\\n\\t\\tSTRING \\t\\t\\tdate_changed;\\n\\tEND;\\n\\t\\n\\tTestTransaction_DS := DATASET([{'116122R65_CAB', 22, '6/15/2016 12:08:29 PM', 'Z', '', '4293', 'PA', 'HORACE MANN', \\n\\t\\t\\t\\t\\t'16167121100007', '990470', 'DDR', 8011322, 99900, '116122R65', 31, \\n\\t\\t\\t\\t\\t'17742STSM99 1MQ3 JXVVBA', '6/10/2016 12:00:00 AM', '318', \\n\\t\\t\\t\\t\\t'', 'W 30TH ST', 'MD', '21211', 'BALTIMORE', '', 'USA', 1, 'FUCILE', 1, 0, 1, 0, 0, 0, \\n\\t\\t\\t\\t\\t'EDITS', 2.0, 'Z', 'R', 'N', 0, 0.000000000, 0, 0, 0, 0, 0, '', 'B22222000', 100, '', 0, \\n\\t\\t\\t\\t'1\\t0.173.217.9', 0.3570, 'DriverDiscovery', 0, 0, 1, 'vin_tss', '', ''},\\n\\t\\t\\t\\t{'116122R66_CAB', 22, '6/15/2016 12:08:51 PM', 'Z', '', '4293', 'PA', 'HORACE MANN', \\n\\t\\t\\t\\t'16167121100008', '990300', 'MXC', 6024501, 10057, '116122R66', 31, \\n\\t\\t\\t\\t'17742STSM99 1MQ3 JXVVBA', '6/10/2016 12:00:00 AM', '318', \\n\\t\\t\\t\\t'', 'W 30TH ST', 'MD', '21211', 'BALTIMORE', '', 'USA', 1, 'FUCILE', 1, 0, 1, 0, 0, 2, \\n\\t\\t\\t\\t'EDITS', 2.0, 'Z', 'C', 'Y', 0, 0.000000000, 0, 0, 0, 0, 0, '', 'B22222000', 100, '', 0, \\n\\t\\t\\t\\t'10.173.217.9', 0.3900, 'DriverDiscovery', 0, 0, 1, 'vin_tss', '', ''},\\n\\t\\t\\t\\t{'116122R67_CAB', 22, '6/15/2016 12:20:14 PM', 'Z', '', '0069', 'PA', 'USAA-YDD', '16167121100009', \\n\\t\\t\\t\\t'502473', '', 5721721, 10087, '116122R67', 24, '1300 D47YFA PRH A5FTEJ', \\n\\t\\t\\t\\t'6/15/2016 12:00:00 AM', '1003', '', 'BROOKE RD', 'MD', '20743', 'CAPITOL HEIGHTS', '', 'USA', 1, \\n\\t\\t\\t\\t'STANLEY', 1, 0, 0, 1, 0, 0, 'EDITS', 2.0, 'Z', 'R', 'N', 0, 0.000000000, 0, 0, 0, 0, 0, '', 'B22222000', \\n\\t\\t\\t\\t100, '', 0, '10.173.217.9', 0.6460, 'DriverDiscovery', 0, 0, 1, 'vin_tss', '', ''}], Layout_DD_TransactionLog);\\n\\t\\n\\tDATASET(Layout_DD_TransactionLog) addTransactions(DATASET(Layout_DD_TransactionLog) Transaction_DS) := EMBED(\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tmysql : user('username'),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tserver('server'),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tport('port'),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tdatabase('db'),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tpassword('password')) \\n\\t\\tINSERT INTO log_dd.transaction_log\\n\\t\\t\\t\\t(transaction_id, product_id, date_added, service_type, special_billing_id, report_code, \\n\\t\\t\\t\\treport_usage, requestor, reference_number, account_base, account_suffix, account_id, customer_id, \\n\\t\\t\\t\\tanchor_transaction_id, anchor_product_id, full_quote_back, i_date_ordered, i_addr_house_num, \\n\\t\\t\\t\\ti_addr_apt_num, i_addr_line, i_addr_state, i_addr_zip, i_addr_city, i_addr_county, i_addr_country, \\n\\t\\t\\t\\ti_addr_type, addr_household_sur_name, listed_driver_count, confirmed_count, not_confirmed_count, \\n\\t\\t\\t\\tnot_found_count, discovered_surname_count, discovered_surname_exception_count, result_format, \\n\\t\\t\\t\\trecord_version, processing_status, inquiry_status, inquiry_processing_status, billing_type_id, \\n\\t\\t\\t\\tprice, currency, pricing_error_code, free, transaction_code, return_node_id, \\n\\t\\t\\t\\trequest_node_id, order_status_code, product_line, login_history_id, ip_address, response_time, \\n\\t\\t\\t\\tesp_method, batch_job_id, batch_seq_number, is_active)\\n\\t\\t\\t\\tVALUES \\n\\t\\t\\t\\t?;\\nENDEMBED;\\n\\nNew_Transaction_DS := addTransactions(TestTransaction_DS);\\n\\nOUTPUT(New_Transaction_DS, NAMED('New_Transaction_DS'));
\", \"post_time\": \"2016-06-24 12:09:05\" },\n\t{ \"post_id\": 9910, \"topic_id\": 2350, \"forum_id\": 10, \"post_subject\": \"Re: System error: 1301: Memory pool exhausted: pool\", \"username\": \"sort\", \"post_text\": \"See if your issue is the same or related to \\nhttps://track.hpccsystems.com/browse/HPCC-15822\", \"post_time\": \"2016-07-06 19:24:27\" },\n\t{ \"post_id\": 9870, \"topic_id\": 2350, \"forum_id\": 10, \"post_subject\": \"Re: System error: 1301: Memory pool exhausted: pool\", \"username\": \"bforeman\", \"post_text\": \"Hi Chuck,\\n\\nI guess the first question is what is your target cluster size?\\nCan you possibly run/test this on a larger cluster?\\n\\nPerhaps you can reduce the job into smaller outputs, and then append the recordsets together at the end.\\n\\nAlso, check the JIRA to see if this is not an active issue.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2016-06-30 18:18:59\" },\n\t{ \"post_id\": 9866, \"topic_id\": 2350, \"forum_id\": 10, \"post_subject\": \"System error: 1301: Memory pool exhausted: pool\", \"username\": \"chuck.beam\", \"post_text\": \"I am getting the error below when attempting to insert records into a MySQL database using EMBED.\\n\\nError: System error: 1301: Memory pool exhausted: pool (1216 pages) exhausted, requested 1 (0, 0), 1301, \\n\\nThe query runs fine for 100 records but I get this error when I attempt to load 1,000,000 records.\\n\\nAny suggestions?\\n\\nHere is the code:\\n\\n\\nIMPORT mysql;\\n\\ncopy_Record :=\\n record\\n varstring transaction_id;\\n integer4 product_id;\\n varstring date_added;\\n varstring service_type;\\n varstring special_billing_id;\\n varstring report_code;\\n varstring report_usage;\\n varstring requestor;\\n varstring reference_number;\\n varstring account_base;\\n varstring account_suffix;\\n integer4 account_id;\\n integer4 customer_id;\\n varstring anchor_transaction_id;\\n integer4 anchor_product_id;\\n varstring full_quote_back;\\n varstring i_date_ordered;\\n varstring i_addr_house_num;\\n varstring i_addr_apt_num;\\n varstring i_addr_line;\\n varstring i_addr_state;\\n varstring i_addr_zip;\\n varstring i_addr_city;\\n varstring i_addr_county;\\n varstring i_addr_country;\\n integer4 i_addr_type;\\n varstring addr_household_sur_name;\\n integer4 listed_driver_count;\\n integer4 confirmed_count;\\n integer4 not_confirmed_count;\\n integer4 not_found_count;\\n integer4 discovered_surname_count;\\n integer4 discovered_surname_exception_count;\\n varstring result_format;\\n varstring record_version;\\n varstring processing_status;\\n varstring inquiry_status;\\n varstring inquiry_processing_status;\\n integer4 billing_type_id;\\n varstring price;\\n integer4 currency;\\n integer4 pricing_error_code;\\n integer4 free;\\n integer4 transaction_code;\\n varstring return_node_id;\\n varstring request_node_id;\\n integer4 order_status_code;\\n varstring product_line;\\n integer4 login_history_id;\\n varstring ip_address;\\n varstring response_time;\\n varstring esp_method;\\n integer4 batch_job_id;\\n integer4 batch_seq_number;\\n end;\\n\\n// Read x number of trasactions from the database\\nDATASET(copy_Record) getTransactions(INTEGER MaxRecords) := EMBED(mysql : user('username'),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tserver('server'),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tport('port'),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tdatabase('db'),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tpassword('password')) \\n\\t\\tSELECT transaction_id, product_id, date_added, service_type, special_billing_id, report_code, \\n\\t\\t\\t\\treport_usage, requestor, reference_number, account_base, account_suffix, account_id, customer_id, \\n\\t\\t\\t\\tanchor_transaction_id, anchor_product_id, full_quote_back, i_date_ordered, i_addr_house_num, \\n\\t\\t\\t\\ti_addr_apt_num, i_addr_line, i_addr_state, i_addr_zip, i_addr_city, i_addr_county, i_addr_country, \\n\\t\\t\\t\\ti_addr_type, addr_household_sur_name, listed_driver_count, confirmed_count, not_confirmed_count, \\n\\t\\t\\t\\tnot_found_count, discovered_surname_count, discovered_surname_exception_count, result_format, \\n\\t\\t\\t\\trecord_version, processing_status, inquiry_status, inquiry_processing_status, billing_type_id, \\n\\t\\t\\t\\tprice, currency, pricing_error_code, free, transaction_code, return_node_id, \\n\\t\\t\\t\\trequest_node_id, order_status_code, product_line, login_history_id, ip_address, response_time, \\n\\t\\t\\t\\tesp_method, batch_job_id, batch_seq_number \\n\\t\\t\\tFROM log_dd.transaction_log LIMIT ?;\\nENDEMBED;\\n\\nTransaction_DS := getTransactions(1000000);\\n\\nOUT1 := OUTPUT(Transaction_DS,NAMED('Transaction_DS'));\\n\\ncopy_Record SetTransactionID(copy_Record L, INTEGER C) := TRANSFORM\\n\\tSELF.transaction_id := 'CHUCK2-ECL-' + C;\\n\\tSELF \\t\\t\\t\\t\\t\\t\\t\\t:= L;\\nEND;\\n\\nInsert_DS := PROJECT(Transaction_DS, SetTransactionID(LEFT, COUNTER));\\n\\nOUT2 := OUTPUT(Insert_DS,NAMED('Insert_DS'));\\n\\ninitialize(dataset(copy_Record) values) := EMBED(mysql : \\tuser('username'),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tserver('server'),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tport('port'),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tdatabase('db'),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tpassword('password')) \\n\\tINSERT INTO log_dd.transaction_log\\n\\t\\t\\t\\t(transaction_id, product_id, date_added, service_type, special_billing_id, report_code, \\n\\t\\t\\t\\treport_usage, requestor, reference_number, account_base, account_suffix, account_id, customer_id, \\n\\t\\t\\t\\tanchor_transaction_id, anchor_product_id, full_quote_back, i_date_ordered, i_addr_house_num, \\n\\t\\t\\t\\ti_addr_apt_num, i_addr_line, i_addr_state, i_addr_zip, i_addr_city, i_addr_county, i_addr_country, \\n\\t\\t\\t\\ti_addr_type, addr_household_sur_name, listed_driver_count, confirmed_count, not_confirmed_count, \\n\\t\\t\\t\\tnot_found_count, discovered_surname_count, discovered_surname_exception_count, result_format, \\n\\t\\t\\t\\trecord_version, processing_status, inquiry_status, inquiry_processing_status, billing_type_id, \\n\\t\\t\\t\\tprice, currency, pricing_error_code, free, transaction_code, return_node_id, \\n\\t\\t\\t\\trequest_node_id, order_status_code, product_line, login_history_id, ip_address, response_time, \\n\\t\\t\\t\\tesp_method, batch_job_id, batch_seq_number)\\n\\t\\t\\t\\tVALUES \\n\\t\\t\\t\\t(?, ?, ?, ?, ?, ?, ?, ?, ?, ?,\\n\\t\\t\\t\\t?, ?, ?, ?, ?, ?, ?, ?, ?, ?,\\n\\t\\t\\t\\t?, ?, ?, ?, ?, ?, ?, ?, ?, ?,\\n\\t\\t\\t\\t?, ?, ?, ?, ?, ?, ?, ?, ?, ?,\\n\\t\\t\\t\\t?, ?, ?, ?, ?, ?, ?, ?, ?, ?,\\n\\t\\t\\t\\t?, ?, ?, ?)\\n\\tENDEMBED;\\n\\nOUT3 := initialize(Insert_DS);\\n\\nSEQUENTIAL(OUT1, OUT2, OUT3);\\n\\n\\n\\n
\", \"post_time\": \"2016-06-29 13:16:24\" },\n\t{ \"post_id\": 9880, \"topic_id\": 2352, \"forum_id\": 10, \"post_subject\": \"Re: Casting string to dataset handle\", \"username\": \"srbhkmr\", \"post_text\": \"Okay. I just realized probably this can't be done as MACROs need to expand the code and make a compiled unit for runtime.\", \"post_time\": \"2016-07-01 12:34:52\" },\n\t{ \"post_id\": 9878, \"topic_id\": 2352, \"forum_id\": 10, \"post_subject\": \"Re: Casting string to dataset handle\", \"username\": \"srbhkmr\", \"post_text\": \"Thanks for the reply Richard.\\nYes, MACROs can act on Datasets of varying layouts. I think I have already achieved that.\\n\\nI will illustrate my problm with following example:\\n\\nI have a datasets.ecl with all datasets defined in it.\\n\\nEXPORT datasets := MODULE\\n EXPORT D1_layout := {...};\\n ...\\n\\n EXPORT D1 := DATASET(...);\\n EXPORT D2 := DATASET(...);\\n ...\\n EXPORT D100 := DATASET(...);\\nEND;\\n
\\n\\nThen I want to run the following piece of MACRO on a selected dataset only and I want to defer that decision to run-time.\\n\\nIMPORT * FROM $.datasets;\\n\\nsummarize(inDataset, inlayout, cut_off) := MACRO\\n ...\\nENDMACRO;\\n\\n//########################################################\\n\\nsummarize(D1, D1_layout, 50); // Works fine \\nsummarize(D12, D12_layout, 50); // Works fine \\n\\n//########################################################\\n\\nSTRING dataset_to_use := '' : STORED('datast_to_use');\\n\\nsummarize((DATASET)dataset_to_use, (RECORD OF)(DATASET(datast_to_use)), 50); //?\\n// How can I cast a string as DATASET Or RECORD layout definition?\\n\\n//########################################################\\n\\n
\\n\\nAt runtime I want to make the decision on what Dataset should the MACRO code run. STORED variables are one way to achieve that but the value I get in STORED variable is a STRING and typecasting it to a DATASET or RECORD won't work directly as I attempted.\\n\\nso, is there any way I can make this choice at run-time i.e. on which dataset D1,D2,..Dn should my summarize.ecl run?\\n\\nThanks,\", \"post_time\": \"2016-07-01 12:30:09\" },\n\t{ \"post_id\": 9874, \"topic_id\": 2352, \"forum_id\": 10, \"post_subject\": \"Re: Casting string to dataset handle\", \"username\": \"rtaylor\", \"post_text\": \"srbhkmr,\\n\\nThe concept of performing a standard process on any dataset with any record structure is exactly what the MACRO and FUNCTIONMACRO structures are for.\\n\\nThese topics are covered in our online eLearning course "Applied ECL – ECL Code Generation Tools" and you may read up on them here: https://hpccsystems.com/download/documentation/ecl-language-reference/html/MACRO_Structure.html and here: https://hpccsystems.com/download/documentation/ecl-language-reference/html/FUNCTIONMACRO_Structure.html\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-07-01 12:08:26\" },\n\t{ \"post_id\": 9872, \"topic_id\": 2352, \"forum_id\": 10, \"post_subject\": \"Casting string to dataset handle\", \"username\": \"srbhkmr\", \"post_text\": \"I have 100's of datasets and their record definitions defined in a module and EXPORTed.\\nIn another ECL-macro script I want to run some summarization code on these datasets(which are already IMPORTed).\\nI want to run this summarization one at a time with only on a selected dataset.\\n\\nAt runtime what I can provide is name of dataset as STRING through a STORED variable. so, how does one typecast a STRING variable into a Dataset handle, if it's possible?\\n\\nOr any other neat way to do this...\\n\\nThanks,\", \"post_time\": \"2016-07-01 11:51:12\" },\n\t{ \"post_id\": 9982, \"topic_id\": 2376, \"forum_id\": 10, \"post_subject\": \"Re: Defining a SET OF DATASETS from child datasets.\", \"username\": \"Allan\", \"post_text\": \"Thanks Richard\\n\\nDone: HPCC-15924\\n\\nYours\\n\\nAllan\", \"post_time\": \"2016-07-22 09:15:41\" },\n\t{ \"post_id\": 9968, \"topic_id\": 2376, \"forum_id\": 10, \"post_subject\": \"Re: Defining a SET OF DATASETS from child datasets.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nYou should add a JIRA ticket asking for the SET function to be expanded to include generating a set of Datasets so that your first code attempt would actually work.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-07-18 15:40:34\" },\n\t{ \"post_id\": 9954, \"topic_id\": 2376, \"forum_id\": 10, \"post_subject\": \"Re: Defining a SET OF DATASETS from child datasets.\", \"username\": \"Allan\", \"post_text\": \"\\n[paddr.keys]\\n
\\nCompiles and runs but does not return correct results.\", \"post_time\": \"2016-07-13 09:23:35\" },\n\t{ \"post_id\": 9938, \"topic_id\": 2376, \"forum_id\": 10, \"post_subject\": \"Defining a SET OF DATASETS from child datasets.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nThis should be simple but this has me stumped.\\n\\nI have a set of records whose single field is a DATASET.\\ne.g.\\n\\nAddressDS := RECORD\\n DATASET(RECORDOF(AddressKey)) keys;\\nEND;\\n\\nAddressDS GetAddressRecs(RECORDOF(QueryAddressWords) L) := TRANSFORM\\n SELF.keys := AddressKey(SomeFilterOnWords(L.word));\\nEND;\\n\\npaddr := PROJECT(QueryAddressWords,GetAddressRecs(LEFT));\\n
\\nAll these datasets are obviously the same type so I now want to MERGEJOIN or do a STEPPED JOIN on a SET OF DATASETS. But how do I convert these child datasets into a set of datasets that can be used as input to either MERGEJOIN or JOIN?\\n\\nThe obvious:\\n\\nSET(paddr,keys);\\n
\\nfails syntax check with:\\nError: syntax error near ")" : expected &&, =, <>, '+', '-', '&', '.', '[' (48, 29), 3002, Filter_Query\\n
\\nAny ideas?\\nYours\\nAllan\\nBy the way I have althernative methods (ROLLUP) to achieve my result but I would still like to know how to do this.\", \"post_time\": \"2016-07-12 11:23:34\" },\n\t{ \"post_id\": 9970, \"topic_id\": 2380, \"forum_id\": 10, \"post_subject\": \"Re: OUTPUT dataset with QUOTE\", \"username\": \"rtaylor\", \"post_text\": \"This might be a little cleaner:AddQuotes(STRING s) := '"' + TRIM(s) + '"';\\nquotedDataset := PROJECT(\\n rawDataset,\\n TRANSFORM(\\n RECORDOF(rawDataset),\\n SELF.firstname := AddQuotes(LEFT.firstname);\\n SELF.lastName := AddQuotes(LEFT.lastname);\\n SELF := LEFT;\\n )\\n);
You just need to ensure that your string fields have enough length to accommodate the extra two characters in addition to the actual data.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-07-18 15:53:51\" },\n\t{ \"post_id\": 9962, \"topic_id\": 2380, \"forum_id\": 10, \"post_subject\": \"Re: OUTPUT dataset with QUOTE\", \"username\": \"dsanchez\", \"post_text\": \"I can think of two workarounds for this (since that option is not available I think).\\n1- You could add the space (or other characters that you know are always present) to the delimiters or terminators character list so it understands those values as problematic and adds the quotes itself.\\n2- Less "hacky" but more work. You can do a project before the output and remove the quote option from the output. Something like this:\\n\\nquotedDataset := PROJECT(\\n rawDataset,\\n TRANSFORM(\\n RECORDOF(rawDataset),\\n SELF.name := '"' + LEFT.name + '"';\\n SELF.lastName := '"' + LEFT.name + '"'\\n SELF := LEFT;\\n )\\n);\\n
\", \"post_time\": \"2016-07-14 13:37:51\" },\n\t{ \"post_id\": 9958, \"topic_id\": 2380, \"forum_id\": 10, \"post_subject\": \"OUTPUT dataset with QUOTE\", \"username\": \"omnibuzz\", \"post_text\": \"Hi,\\n When I try to output a dataset, like this:\\nLayout_Person := RECORD\\n UNSIGNED1 PersonID;\\n STRING15 name;\\n STRING25 Address;\\nEND;\\n\\nallPeople := DATASET([ {1,'Fred Smith','1234 Avenue 1'},\\n {2,'Blow,Joe','2345 Avenue 2'},\\n {3,'Smith, Jane','#12, Avenue 4'}],Layout_Person);\\n\\nOUTPUT(AllPeople,,'test.csv',CSV(QUOTE('"')));\\n
\\n\\nI get the output as:\\n\\n1,Fred Smith,1234 Avenue 1\\n2,"Blow,Joe",2345 Avenue 2\\n3,"Smith, Jane","#12, Avenue 4"\\n\\nIt is smart that it is enclosing only the problematic values with quotes. \\nIs there any way I can get all string values enclosed with quotes, like this?\\n\\n1,"Fred Smith","1234 Avenue 1"\\n2,"Blow,Joe","2345 Avenue 2"\\n3,"Smith, Jane","#12, Avenue 4"\\n\\nCheers\\nSrini\", \"post_time\": \"2016-07-14 00:14:16\" },\n\t{ \"post_id\": 9980, \"topic_id\": 2382, \"forum_id\": 10, \"post_subject\": \"Re: Best way to index and search text\", \"username\": \"dsanchez\", \"post_text\": \"That was really really helpful! Thank you. Very interesting tutorial (but a little outdated I think, definitely worth keeping it in bookmarks)\", \"post_time\": \"2016-07-22 08:26:48\" },\n\t{ \"post_id\": 9975, \"topic_id\": 2382, \"forum_id\": 10, \"post_subject\": \"Re: Best way to index and search text\", \"username\": \"bforeman\", \"post_text\": \"Hi Daniel,\\n\\nTo give you some ideas, check out David Bayliss' tutorial at:\\nhttp://www.dabhand.org/ECL/construct_a_simple_bible_search.htm\\n\\nIn it, he discusses text search options, and demonstrates how to build an inverted index to use for text searching. \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2016-07-20 17:39:49\" },\n\t{ \"post_id\": 9960, \"topic_id\": 2382, \"forum_id\": 10, \"post_subject\": \"Best way to index and search text\", \"username\": \"dsanchez\", \"post_text\": \"Hi all,\\n\\nI am trying to search a text fields (addresses actually). In other systems I would be tokenizing the text and creating a token index with some search tree engine and the queries would be reasonably fast.\\n\\nTo do the same thing on HPCC I was thinking about child datasets with the tokens for each record, then normalize that and index the token dataset. Searching would be done by tokenizing the search term, looking at that index, assigning scores on how many tokens each record had matched and returning the highest one.\\n\\nI don't want to reinvent the wheel so... Is there anything there to make this job easier?\\n\\nCheers!\\nDaniel\", \"post_time\": \"2016-07-14 11:00:39\" },\n\t{ \"post_id\": 10151, \"topic_id\": 2441, \"forum_id\": 10, \"post_subject\": \"Re: Is there a way to make a workunit read-only?\", \"username\": \"james.wilson\", \"post_text\": \"Created: https://track.hpccsystems.com/browse/HPCC-16024\", \"post_time\": \"2016-07-29 07:45:15\" },\n\t{ \"post_id\": 10121, \"topic_id\": 2441, \"forum_id\": 10, \"post_subject\": \"Re: Is there a way to make a workunit read-only?\", \"username\": \"rtaylor\", \"post_text\": \"James,\\n\\nNo, I think you ARE the first! \\n\\nSubmit a JIRA ticket asking for the feature (http://track.hpccsystems.com).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-07-28 17:28:34\" },\n\t{ \"post_id\": 10111, \"topic_id\": 2441, \"forum_id\": 10, \"post_subject\": \"Is there a way to make a workunit read-only?\", \"username\": \"james.wilson\", \"post_text\": \"I'm doing some debugging where I'm calling various bits of code. I'm being careful to read through it all first to make sure there aren't any OUTPUTs to files but there's always the danger of missing one. It would be great to have an option that aborted if the workunit attempted to modify (write/delete/rename) any files, and I can't be the first person to have thought this, but I can't find anything in the documentation for #OPTION or #WORKUNIT. Does anyone know if there is such a thing?\", \"post_time\": \"2016-07-28 16:10:11\" },\n\t{ \"post_id\": 12353, \"topic_id\": 2563, \"forum_id\": 10, \"post_subject\": \"Re: WSECL - Json not returning\", \"username\": \"rtaylor\", \"post_text\": \"David,\\n\\nSounds to me like it's time for a JIRA
\\n\\nRichard\", \"post_time\": \"2016-10-24 15:42:03\" },\n\t{ \"post_id\": 12333, \"topic_id\": 2563, \"forum_id\": 10, \"post_subject\": \"Re: WSECL - Json not returning\", \"username\": \"rqg0717\", \"post_text\": \"Hi David,\\n\\nI wonder if you have figured it out. I am getting the same issue. Please let me know. Thank you.\\n\\nJames\", \"post_time\": \"2016-10-21 22:08:11\" },\n\t{ \"post_id\": 10503, \"topic_id\": 2563, \"forum_id\": 10, \"post_subject\": \"WSECL - Json not returning\", \"username\": \"David Dasher\", \"post_text\": \"Hi all\\n\\nWe have one query in Roxie which is returning data via most available types Table, XML and SOAP, however nothing will return via JSON. I have other queries which are very similar which return Json with no problems at all. \\n\\nI created a separate query which returns the same dataset only aggregated by month instead of day and it works fine. \\n\\nAll I get is\\n\\n
{"dashboardmultiuserengagementbysiteResponse": {"sequence": 0, "Results": {"result_1": {"Row": []}}}}
\\n\\nCan anyone point me in the right direction?\\n\\nThanks\\n\\nDavid\", \"post_time\": \"2016-08-11 11:28:10\" },\n\t{ \"post_id\": 10813, \"topic_id\": 2673, \"forum_id\": 10, \"post_subject\": \"Re: Building a custom audit report\", \"username\": \"John Meier\", \"post_text\": \"Nice! It's only been about 1 week after the training and I still feel like a nube! The trick is learning how to take what I know (about 35yrs of programming) and re-think how I get ECL to do it. Thanks for the quick response!\", \"post_time\": \"2016-08-19 19:39:53\" },\n\t{ \"post_id\": 10793, \"topic_id\": 2673, \"forum_id\": 10, \"post_subject\": \"Re: Building a custom audit report\", \"username\": \"rtaylor\", \"post_text\": \"John,\\n\\nSince you appear to have several records for each you could just assemble an inline dataset for each then append them all together, something like this:AuditRptRec := RECORD\\n STRING20 Prompt;\\n STRING20 Cnt;\\nEND;\\nStartDS := DATASET([],AuditRptRec);\\nAuditRecs(DATASET(AuditRptRec) InDS,UNSIGNED4 C1,UNSIGNED4 C2,UNSIGNED4 C3) := FUNCTION \\n ThisdS := DATASET([ {'Total records.........:', C1},\\n\\t\\t\\t{'Header records........:', C2},\\n\\t\\t\\t{'Data Records..........:', C3}],\\n\\t\\t\\tAuditRptRec);\\n RETURN InDS + ThisDS;\\nEND;\\n\\t\\nds1 := AuditRecs(StartDS,120,3,117); \\t\\t\\t\\t\\t\\t\\t\\t\\nds2 := AuditRecs(ds1,150,1,149); \\t\\t\\t\\t\\t\\t\\t\\t\\n\\nds2;
\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-08-19 17:38:10\" },\n\t{ \"post_id\": 10783, \"topic_id\": 2673, \"forum_id\": 10, \"post_subject\": \"Building a custom audit report\", \"username\": \"John Meier\", \"post_text\": \"I'm trying to develop a custom audit report for files being ingested into HPCC. It would contain basically two columns: description and count. For example:\\nTotal records.........: 120\\nHeader records......: 003\\nData Records........: 117\\nIt would be much more detailed but that's the idea. The problem is I cannot find an example anywhere in the documentation that would lend to how to accomplish this. I thought of creating a table, but how would you add a single "record" at a time? Then, when everything is done, I could write the output to a dataset. Is there a better way in ECL to accomplish this? Is there an example of how to do this? \\n\\nI appreciate the assistance.\", \"post_time\": \"2016-08-19 16:07:09\" },\n\t{ \"post_id\": 10853, \"topic_id\": 2693, \"forum_id\": 10, \"post_subject\": \"Re: LOOP with DICTIONARY fails\", \"username\": \"bforeman\", \"post_text\": \"Hi Srini,\\n\\nI verified the same errors on my system, and tried a few variations that were not successful. I would strongly recommend that you submit this report to the Community Issue Tracker (JIRA)\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2016-08-22 14:18:55\" },\n\t{ \"post_id\": 10833, \"topic_id\": 2693, \"forum_id\": 10, \"post_subject\": \"LOOP with DICTIONARY fails\", \"username\": \"omnibuzz\", \"post_text\": \"Hi,\\n Below is a self-contained code which fails in THOR when I try to lookup through a rather large dictionary. (it works in HTHOR though)\\n\\n#OPTION('outputLimit',2000);\\nRec1 := RECORD \\n\\tUNSIGNED4 Id1;\\n\\tUNSIGNED4 Id2;\\nEND;\\n\\nDS1 := DATASET([{1,2},{10000001,10000002},{20000001,20000002},{30000001,30000002}],Rec1);\\nTenMil := DATASET(10000000,TRANSFORM({UNSIGNED Val},SELF.Val := COUNTER - 1));\\nDS1Exploded := JOIN(DS1,TenMil,TRUE,TRANSFORM(Rec1,SELF.Id1 := LEFT.Id1 + RIGHT.Val; SELF.Id2 := LEFT.Id2 + RIGHT.Val),ALL);\\nDict1 := DICTIONARY(DS1Exploded,{Id1=>Id2}); // : INDEPENDENT;\\n\\nRec2 := RECORD\\n\\tUNSIGNED4 Id;\\n\\tSTRING1\\tGrpId;\\n\\tBOOLEAN IsNextIter;\\nEND;\\n\\nDS2 := DATASET([{1,'A',TRUE},{10000001,'B',TRUE},{20000001,'C',TRUE},{30000001,'D',TRUE}],Rec2);\\n\\n\\nloopBody(DATASET(Rec2) ds) :=\\n\\t\\tPROJECT(ds,TRANSFORM(Rec2,\\n SELF.IsNextIter := FALSE;SELF := LEFT)) + \\n\\tPROJECT(ds,TRANSFORM(Rec2,\\n SELF.Id := Dict1[LEFT.Id].Id2;\\n SELF.GrpId := LEFT.GrpId;\\n\\t\\tSELF.IsNextIter := TRUE;))(Id >0);\\n\\nOUTPUT(LOOP(DS2,LEFT.IsNextIter,loopBody(ROWS(LEFT))));\\n
\\n\\nThis is the error that I get\\nWarning: assert(!rawFilename) failed - file: /var/lib/jenkins/workspace/CE-Candidate-5.4.6-1/CE/centos-7.0-x86_64/HPCC-Platform/ecl/hqlcpp/hqlhtcpp.cpp, line 10366\\nInfo: Mismatch in subminor version number (5.4.2 v 5.4.6) (0, 0 - unknown)\\nWarning: (0,0): error C3000: assert(!rawFilename) failed - file: /var/lib/jenkins/workspace/CE-Candidate-5.4.6-1/CE/centos-7.0-x86_64/HPCC-Platform/ecl/hqlcpp/hqlhtcpp.cpp, line 10366
\\n\\nAnd when I create the DICTIONARY as INDEPENDENT work flow, it doesn't fail immediately, but waits for some time and throws the following error\\nError: System error: 4: MP link closed (XXX.XXX.XXX.XXX:XXXXX)
\\n\\nLet me know how to run this code efficiently in thor. I am okay for alternate approaches too. \\n\\nCheers\\nSrini\", \"post_time\": \"2016-08-22 02:19:49\" },\n\t{ \"post_id\": 11403, \"topic_id\": 2863, \"forum_id\": 10, \"post_subject\": \"Re: SuperFileExists and Dataset Issues\", \"username\": \"chuck.beam\", \"post_text\": \"Thanks Richard!\\n\\nThat seems to have solved my issue!\\n\\nChuck\", \"post_time\": \"2016-09-27 13:48:30\" },\n\t{ \"post_id\": 11393, \"topic_id\": 2863, \"forum_id\": 10, \"post_subject\": \"Re: SuperFileExists and Dataset Issues\", \"username\": \"rtaylor\", \"post_text\": \"Chuck,\\n\\nThe SuperFileExists() function is only interrogating the DFU metadata, so it shouldn't be run from every node of your Thor cluster to get you the answer. It probably works on hThor because that's a 1-node "cluster." \\n\\nSo if you need to run this code on Thor, try adding NOTHOR around that function call, like this:FILE_EXISTS := NOTHOR(STD.File.SuperFileExists('~' + AI_HomeListing.Files.ATTR_LOG_FILE));
\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-09-27 13:33:54\" },\n\t{ \"post_id\": 11383, \"topic_id\": 2863, \"forum_id\": 10, \"post_subject\": \"Re: SuperFileExists and Dataset Issues\", \"username\": \"chuck.beam\", \"post_text\": \"New Information!\\n\\nIf I run the code on hthor_dev, it works.\\n\\nNot sure why?\", \"post_time\": \"2016-09-27 13:14:56\" },\n\t{ \"post_id\": 11373, \"topic_id\": 2863, \"forum_id\": 10, \"post_subject\": \"SuperFileExists and Dataset Issues\", \"username\": \"chuck.beam\", \"post_text\": \"I have code to check if a SuperFile exists and combine the existing data with a new record. When the code executes, the workunit hangs and eventually times out.\\n\\nHere is the error message:\\nGraph graph6[45], Timeout receiving from slaves after graph sent (in item 5)\\n\\nMy code is below.\\n\\nMy work unit is on Alpha DEV - W20160927-085828\\n\\nCan someone tell me what I am doing wrong?\\n\\nThanks!\\n\\nEXPORT fn_LogAttributeBuild(INTEGER RUN_DATE, DATASET(RAW_MLS_REC) RAW_MLS_DS, DATASET(ATT_REC) FINAL_ATTR_DS) := FUNCTION\\n\\n // Set Load Date and Work Unit\\n TOTAL_MLS_RECORDS := COUNT(RAW_MLS_DS);\\n TOTAL_ATTR_RECORDS := COUNT(FINAL_ATTR_DS); \\n TOTAL_ACTIVE_RECORDS := COUNT(FINAL_ATTR_DS(ACTIVE_FLAG = 'Y')); \\n BLANK_STATUS_RECORDS := COUNT(FINAL_ATTR_DS(MLS_LIST_STATUS = ''));\\n INSERTED_RECORDS := COUNT(FINAL_ATTR_DS(INSERT_REC = TRUE));\\n EXPIRED_RECORDS := COUNT(FINAL_ATTR_DS(EXPIRE_REC = TRUE));\\n UPDATED_RECORDS := COUNT(FINAL_ATTR_DS(UPDATE_REC = TRUE));\\n \\n ATTR_LOG_DS := DATASET([{RUN_DATE, \\n TOTAL_MLS_RECORDS,\\n TOTAL_ATTR_RECORDS,\\n TOTAL_ACTIVE_RECORDS,\\n BLANK_STATUS_RECORDS,\\n INSERTED_RECORDS,\\n EXPIRED_RECORDS,\\n UPDATED_RECORDS,\\n WORKUNIT}\\n ],ATTR_LOG_REC);\\n \\n FILE_EXISTS := STD.File.SuperFileExists('~' + AI_HomeListing.Files.ATTR_LOG_FILE);\\n UPDATE_ATTR_LOG_DS := IF(FILE_EXISTS, \\n DATASET('~' + AI_HomeListing.Files.ATTR_LOG_FILE, AI_HomeListing.Layouts.ATTR_LOG_REC, THOR) + ATTR_LOG_DS, \\n ATTR_LOG_DS);\\n\\n \\n\\n OUT1 := OUTPUT(UPDATE_ATTR_LOG_DS, NAMED('UPDATE_ATTR_LOG_DS'));\\n \\n // Now promote the master file\\n/* PromoteMasterFile\\t:= AI_Common.FileUtil.FN_OutputAndPromoteFile( UPDATE_ATTR_LOG_DS,\\n \\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t'~' + AI_HomeListing.Files.ATTR_LOG_FILE_PREFIX, \\n \\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tAI_HomeListing.Files.ATTR_LOG_FILE_SUFFIX,\\n WORKUNIT);\\n \\n*/\\n SEQUENTIAL(OUT1);\\n \\n RETURN TRUE;\\nEND;
\", \"post_time\": \"2016-09-27 13:04:42\" },\n\t{ \"post_id\": 11611, \"topic_id\": 2873, \"forum_id\": 10, \"post_subject\": \"Re: Error - Failed to read key header: file too small, could\", \"username\": \"nileshdchavan\", \"post_text\": \"Hello,\\n\\nI think it was related to layout mismatch. I figured that out and resolved it. Thanks for checking.\\n\\n-Nilesh\", \"post_time\": \"2016-09-29 17:59:50\" },\n\t{ \"post_id\": 11493, \"topic_id\": 2873, \"forum_id\": 10, \"post_subject\": \"Re: Error - Failed to read key header: file too small, could\", \"username\": \"bforeman\", \"post_text\": \"Hello,\\n\\ncan you provide a sample of your code that produced this error? I have not seen that one before.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2016-09-27 19:17:03\" },\n\t{ \"post_id\": 11433, \"topic_id\": 2873, \"forum_id\": 10, \"post_subject\": \"Error - Failed to read key header: file too small, could not\", \"username\": \"nileshdchavan\", \"post_text\": \"Meaning of following Error -\\n\\nError: System error: 0: Graph[1], SLAVE #12 [10.144.106.12:6600]: Graph[1], workunitwrite[5]: Failed to read key header: file too small, could not read 256 bytes, (0, 0), 0,\", \"post_time\": \"2016-09-27 16:07:03\" },\n\t{ \"post_id\": 11621, \"topic_id\": 2891, \"forum_id\": 10, \"post_subject\": \"MP Link Closed Erro\", \"username\": \"nileshdchavan\", \"post_text\": \"Any idea about this error -\\n\\n\\nError: System error: 4: MP link closed (10.XXX.XXX.5:23200) // masked the IP address\\n\\nIs this a system related issue? What does this signifies?\", \"post_time\": \"2016-09-29 18:01:33\" },\n\t{ \"post_id\": 11813, \"topic_id\": 2901, \"forum_id\": 10, \"post_subject\": \"Re: REAL4 Display\", \"username\": \"rtaylor\", \"post_text\": \"I don't understand why that makes a difference. Please elucidate.
Because the IEEE format can represent 0.4 correctly in an 8-byte float but not a 4-byte float.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-10-05 16:18:01\" },\n\t{ \"post_id\": 11703, \"topic_id\": 2901, \"forum_id\": 10, \"post_subject\": \"Re: REAL4 Display\", \"username\": \"georgeb2d\", \"post_text\": \"I just did this\\n\\nREAL4 nighttimedrvcap := 0.4;\\nOUTPUT (nighttimedrvcap, NAMED('nighttimedrvcap'));\\n\\n\\nREAL8 nighttimedrvcap2 := 0.4;\\nOUTPUT (nighttimedrvcap2, NAMED('nighttimedrvcap2'));
\\n\\nFor nighttimedrvcap I got:\\n0.4000000059604645\\n\\nFor nighttimedrvcap2 I got:\\n0.4\\n\\nI don't understand why that makes a difference. Please elucidate.\\n\\nAnother note:\\nI changed a layout from REAL4 to REAL8, then produced the results with HPCC code. Then I went to contents in ECL watch and the same result had occurred. \\n\\nThis makes no sense to me but I am sure there is a good answer.\\n\\nThanks.\", \"post_time\": \"2016-09-30 20:23:18\" },\n\t{ \"post_id\": 11683, \"topic_id\": 2901, \"forum_id\": 10, \"post_subject\": \"Re: REAL4 Display\", \"username\": \"georgeb2d\", \"post_text\": \"Thanks. That is what I needed.\", \"post_time\": \"2016-09-30 15:02:40\" },\n\t{ \"post_id\": 11643, \"topic_id\": 2901, \"forum_id\": 10, \"post_subject\": \"Re: REAL4 Display\", \"username\": \"rtaylor\", \"post_text\": \"georgeb2d,\\n\\nThis URL pretty much explains the issue: http://stackoverflow.com/questions/2100490/floating-point-inaccuracy-examples\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-09-30 13:58:19\" },\n\t{ \"post_id\": 11631, \"topic_id\": 2901, \"forum_id\": 10, \"post_subject\": \"REAL4 Display\", \"username\": \"georgeb2d\", \"post_text\": \"Why does this code:\\n\\nREAL4 nighttimedrvcap := 0.4;\\nOUTPUT (nighttimedrvcap, NAMED('nighttimedrvcap'));\\n
\\nDisplay: \\n0.4000000059604645\\n\\nI guess this has to do with precision and machine language, but a user is asking me this question so I want to give a good answer. \\n\\nPlease assist. \\n\\nThanks.\", \"post_time\": \"2016-09-30 12:21:18\" },\n\t{ \"post_id\": 12021, \"topic_id\": 2953, \"forum_id\": 10, \"post_subject\": \"Re: SOAPCALL record set IN and record set OUT\", \"username\": \"rtaylor\", \"post_text\": \"Srini,\\n\\nTry the Programmer's Guide article: "SOAPCALL from Thor to Roxie" -- the second example is recordset in, recordset out.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-10-17 15:51:05\" },\n\t{ \"post_id\": 11913, \"topic_id\": 2953, \"forum_id\": 10, \"post_subject\": \"SOAPCALL record set IN and record set OUT\", \"username\": \"omnibuzz\", \"post_text\": \"Hi,\\n I am trying to follow the documentation for the SOAPCALL. When I try the following:\\n\\n//recordset in, recordset out\\nManyRec2 := SOAPCALL(InputDataset,ipspw,svc,{STRING500 InData := 'Some Input Data'},DATASET(OutRec1));\\n
\\n\\nThe SOAPCALL is made with 'Some Input Data' as the input (for each row in the InputDataset) and not the actual values from the InputDataset. \\n\\nIf I try to remove the default value and I try something like this:\\nInputDataset := DATASET([{'sdsd'},{'rtytyt'}],{STRING500 InData});\\nSOAPCALL(InputDataset,'https://127.0.0.1:8022/','MyModule.SomeService',{STRING500 InData});\\n
\\n\\nIt gives the following error: \\nError: Need to supply a value for field 'InData' (2, 72)
\\n\\nCan you help.\\nThanks\\nSrini\", \"post_time\": \"2016-10-09 15:54:32\" },\n\t{ \"post_id\": 12071, \"topic_id\": 2963, \"forum_id\": 10, \"post_subject\": \"Re: DENORMALIZE condition\", \"username\": \"rqg0717\", \"post_text\": \"Thank you for your reply, Mr. Taylor. I think I have to trim the original data in order to match the IDs.\", \"post_time\": \"2016-10-17 19:15:00\" },\n\t{ \"post_id\": 12041, \"topic_id\": 2963, \"forum_id\": 10, \"post_subject\": \"Re: DENORMALIZE condition\", \"username\": \"rtaylor\", \"post_text\": \"James,\\n\\nThe LEFT.ID=RIGHT.ID expression is a Boolean expression that specifies how to link the parent records to the child records. In this example, "ID" is the name of the linking fields in the parent and child recs. \\n\\nYour example code didn't show the RECORD structures of your patient and medication files, so all I can say is that you need to make that expression refer to the specific fields that establish the link between the records. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-10-17 16:44:31\" },\n\t{ \"post_id\": 11923, \"topic_id\": 2963, \"forum_id\": 10, \"post_subject\": \"DENORMALIZE condition\", \"username\": \"rqg0717\", \"post_text\": \"Dear all,\\n\\nThis is James. I am new to ECL. I am trying to merge two sets of data using DENORMALIZE. in DENORMALIZE(parentrecset, childrecset, condition, transform [,LOCAL] [,NOSORT]), I was wondering how to set condition to something like LEFT.ID contains RIGHT.ID instead of using LEFT.ID = RIGHT.ID. I have tried to use STD.Str.Contains but it does not work since LEFT.ID and RIGHT.ID are record sets. \\n\\n\\nIMPORT $, STD;\\np := $.Patient;\\nm := $.Medication;\\n\\ncombPM := Record\\n\\tp.Layout_patient;\\n\\tunsigned1 mCount;\\n\\tdataset(m.Layout_medication) mRecs{maxcount(100)};\\nend;\\n\\ncombPM parentMove(p.Layout_patient L) := transform\\n\\tself.mCount := 0;\\n\\tself.mRecs := [];\\n\\tself := L;\\nend;\\n\\nparentOnly := project(p.File, parentMove(left));\\n\\ncombPatMO childMove(combPM L, m.Layout_medication R, INTEGER C):= transform\\n\\tself.mCount := C;\\n\\tself.mRecs := L.mRecs + R;\\n\\tself := L;\\nend;\\n\\nEXPORT LinkPatientMO := DENORMALIZE(parentOnly, m.File,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t STD.Str.Contains(RIGHT.ID, LEFT.ID, false), //STD.Str.Contains('Medication/Patient/Patient-1111','Patient-1111', false);\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t childMove(LEFT, RIGHT, COUNTER))\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t :PERSIST('~demo::PERSIST::PatientMedication');\\n\\n
\\n\\nPlease advise. Thanks a lot.\\n\\nSincerely,\\nJames\", \"post_time\": \"2016-10-10 15:41:35\" },\n\t{ \"post_id\": 12081, \"topic_id\": 2971, \"forum_id\": 10, \"post_subject\": \"Re: how to get the layout/record structure of logical file\", \"username\": \"JimD\", \"post_text\": \"Also...\\n\\nFor CSV files:\\n\\nIf you spray a CSV file and the first row has column header information, you can use the Record Structure Present checkbox in ECL watch when you spray. \\n\\nIf you are using DFUPlus to spray, specify recordstructurepresent=1 for that feature.\\n\\nIf you are using the Standard Library method to spray (STD.File.SprayVariable) use the recordStructurePresent parameter. \\n\\nThis ensures the ECL tab is populated with the RECORD layout in ECL.\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2016-10-17 19:54:49\" },\n\t{ \"post_id\": 12051, \"topic_id\": 2971, \"forum_id\": 10, \"post_subject\": \"Re: how to get the layout/record structure of logical file\", \"username\": \"rtaylor\", \"post_text\": \"Nilesh,\\n\\nIf the file was created in HPCC, then you can simply open the Logical Files Detail page for that file and the RECORD structure will be on the ECL tab.\\n\\nIf the file was just sprayed, then you need to either get the file's provider to tell you the structure, or explore it yourself and determine what you can about it from the data itself, or find whoever has already worked with the file and get the RECORD structure from them.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-10-17 16:49:57\" },\n\t{ \"post_id\": 11951, \"topic_id\": 2971, \"forum_id\": 10, \"post_subject\": \"how to get the layout/record structure of logical file\", \"username\": \"nileshdchavan\", \"post_text\": \"I have a logical file which i need to read.\\n\\nLogical File - '~chavannd::Master::Analysis::outputNodes'\\n\\nHow do i get the layout of this logical file? Is there a way in ECL to do that? I have no idea about the layout that was used to save the data. \\n\\nPlease advise. Thank you.\\n\\n-Nilesh\", \"post_time\": \"2016-10-13 19:10:36\" },\n\t{ \"post_id\": 11991, \"topic_id\": 2981, \"forum_id\": 10, \"post_subject\": \"Re: MP link closed Error\", \"username\": \"chuck.beam\", \"post_text\": \"I have discovered, this error only occurs on platforms running 6.0.x. Alpha-DEV THOR 21\\n\\nThe code runs fine on platforms running 5.6.x. Alpha-DEV THOR 11\\n\\nAny help would be much appreciated.\\n\\nChuck\", \"post_time\": \"2016-10-17 13:16:07\" },\n\t{ \"post_id\": 11981, \"topic_id\": 2981, \"forum_id\": 10, \"post_subject\": \"Re: MP link closed Error\", \"username\": \"chuck.beam\", \"post_text\": \"Here is what I am seeing in the logs:\\n\\n00000905 2016-10-14 08:24:58.563 16533 16533 "Processing graph - graph(graph11, 82)"\\n00000906 2016-10-14 08:24:58.563 16533 16533 "preStart - activity(ch=0, denormalizegroup, 87)"\\n00000907 2016-10-14 08:24:58.573 16533 16533 ",Progress,Thor,StartSubgraph,thor50_42b,W20161014-082019,11,82,thor50_42,thor50_42.thor,thor50_42_ctqa.thor"\\n00000908 2016-10-14 08:24:58.573 16533 31009 "process - activity(ch=0, denormalizegroup, 87)"\\n00000909 2016-10-14 08:24:58.574 16533 31009 "Sort setup cosort=false, needconnect=true has key serializer - activity(ch=0, denormalizegroup, 87)"\\n0000090A 2016-10-14 08:24:58.574 16533 31009 "CSortMaster::ConnectSlaves - activity(ch=0, denormalizegroup, 87)"\\n0000090B 2016-10-14 08:24:58.574 16533 31014 "Connect to 10.194.42.1:16600 - activity(ch=0, denormalizegroup, 87)"\\n0000090C 2016-10-14 08:24:58.574 16533 31015 "Connect to 10.194.42.2:16600 - activity(ch=0, denormalizegroup, 87)"\\n0000090D 2016-10-14 08:24:58.574 16533 31016 "Connect to 10.194.42.3:16600 - activity(ch=0, denormalizegroup, 87)"\\n0000090E 2016-10-14 08:24:58.574 16533 31017 "Connect to 10.194.42.4:16600 - activity(ch=0, denormalizegroup, 87)"\\n0000090F 2016-10-14 08:24:58.574 16533 31018 "Connect to 10.194.42.5:16600 - activity(ch=0, denormalizegroup, 87)"\\n00000910 2016-10-14 08:24:58.574 16533 31019 "Connect to 10.194.42.6:16600 - activity(ch=0, denormalizegroup, 87)"\\n00000911 2016-10-14 08:24:58.574 16533 31021 "Connect to 10.194.42.8:16600 - activity(ch=0, denormalizegroup, 87)"\\n00000912 2016-10-14 08:24:58.574 16533 31020 "Connect to 10.194.42.7:16600 - activity(ch=0, denormalizegroup, 87)"\\n00000913 2016-10-14 08:24:58.574 16533 31023 "Connect to 10.194.42.10:16600 - activity(ch=0, denormalizegroup, 87)"\\n00000914 2016-10-14 08:24:58.574 16533 31022 "Connect to 10.194.42.9:16600 - activity(ch=0, denormalizegroup, 87)"\\n00000915 2016-10-14 08:24:58.574 16533 31024 "Connect to 10.194.42.11:16600 - activity(ch=0, denormalizegroup, 87)"\\n00000916 2016-10-14 08:24:58.574 16533 31025 "Connect to 10.194.42.12:16600 - activity(ch=0, denormalizegroup, 87)"\\n00000918 2016-10-14 08:24:58.574 16533 31027 "Connect to 10.194.42.14:16600 - activity(ch=0, denormalizegroup, 87)"\\n00000917 2016-10-14 08:24:58.574 16533 31026 "Connect to 10.194.42.13:16600 - activity(ch=0, denormalizegroup, 87)"\\n00000919 2016-10-14 08:24:58.574 16533 31028 "Connect to 10.194.42.15:16600 - activity(ch=0, denormalizegroup, 87)"\\n0000091B 2016-10-14 08:24:58.574 16533 31030 "Connect to 10.194.42.17:16600 - activity(ch=0, denormalizegroup, 87)"\\n0000091A 2016-10-14 08:24:58.574 16533 31029 "Connect to 10.194.42.16:16600 - activity(ch=0, denormalizegroup, 87)"\\n0000091C 2016-10-14 08:24:58.574 16533 31031 "Connect to 10.194.42.18:16600 - activity(ch=0, denormalizegroup, 87)"\\n0000091E 2016-10-14 08:24:58.575 16533 31033 "Connect to 10.194.42.20:16600 - activity(ch=0, denormalizegroup, 87)"\\n0000091D 2016-10-14 08:24:58.575 16533 31032 "Connect to 10.194.42.19:16600 - activity(ch=0, denormalizegroup, 87)"\\n0000091F 2016-10-14 08:24:58.575 16533 31034 "Connect to 10.194.42.21:16600 - activity(ch=0, denormalizegroup, 87)"\\n00000920 2016-10-14 08:24:58.575 16533 31035 "Connect to 10.194.42.22:16600 - activity(ch=0, denormalizegroup, 87)"\\n00000921 2016-10-14 08:24:58.575 16533 31036 "Connect to 10.194.42.23:16600 - activity(ch=0, denormalizegroup, 87)"\\n00000922 2016-10-14 08:24:58.575 16533 31037 "Connect to 10.194.42.24:16600 - activity(ch=0, denormalizegroup, 87)"\\n00000923 2016-10-14 08:24:58.575 16533 31038 "Connect to 10.194.42.25:16600 - activity(ch=0, denormalizegroup, 87)"\\n00000925 2016-10-14 08:24:58.575 16533 31040 "Connect to 10.194.42.27:16600 - activity(ch=0, denormalizegroup, 87)"\\n00000926 2016-10-14 08:24:58.575 16533 31041 "Connect to 10.194.42.28:16600 - activity(ch=0, denormalizegroup, 87)"\\n00000924 2016-10-14 08:24:58.575 16533 31039 "Connect to 10.194.42.26:16600 - activity(ch=0, denormalizegroup, 87)"\\n00000927 2016-10-14 08:24:58.575 16533 31042 "Connect to 10.194.42.29:16600 - activity(ch=0, denormalizegroup, 87)"\\n00000928 2016-10-14 08:24:58.575 16533 31043 "Connect to 10.194.42.30:16600 - activity(ch=0, denormalizegroup, 87)"\\n00000929 2016-10-14 08:24:58.575 16533 31044 "Connect to 10.194.42.31:16600 - activity(ch=0, denormalizegroup, 87)"\\n0000092A 2016-10-14 08:24:58.575 16533 31045 "Connect to 10.194.42.32:16600 - activity(ch=0, denormalizegroup, 87)"\\n0000092B 2016-10-14 08:24:58.575 16533 31046 "Connect to 10.194.42.33:16600 - activity(ch=0, denormalizegroup, 87)"\\n0000092C 2016-10-14 08:24:58.575 16533 31047 "Connect to 10.194.42.34:16600 - activity(ch=0, denormalizegroup, 87)"\\n0000092D 2016-10-14 08:24:58.575 16533 31048 "Connect to 10.194.42.35:16600 - activity(ch=0, denormalizegroup, 87)"\\n0000092E 2016-10-14 08:24:58.575 16533 31050 "Connect to 10.194.42.37:16600 - activity(ch=0, denormalizegroup, 87)"\\n0000092F 2016-10-14 08:24:58.575 16533 31049 "Connect to 10.194.42.36:16600 - activity(ch=0, denormalizegroup, 87)"\\n00000930 2016-10-14 08:24:58.575 16533 31051 "Connect to 10.194.42.38:16600 - activity(ch=0, denormalizegroup, 87)"\\n00000931 2016-10-14 08:24:58.575 16533 31052 "Connect to 10.194.42.39:16600 - activity(ch=0, denormalizegroup, 87)"\\n00000932 2016-10-14 08:24:58.575 16533 31053 "Connect to 10.194.42.40:16600 - activity(ch=0, denormalizegroup, 87)"\\n00000933 2016-10-14 08:24:58.576 16533 31054 "Connect to 10.194.42.41:16600 - activity(ch=0, denormalizegroup, 87)"\\n00000934 2016-10-14 08:24:58.576 16533 31055 "Connect to 10.194.42.42:16600 - activity(ch=0, denormalizegroup, 87)"\\n00000935 2016-10-14 08:24:58.576 16533 31056 "Connect to 10.194.42.43:16600 - activity(ch=0, denormalizegroup, 87)"\\n00000936 2016-10-14 08:24:58.576 16533 31057 "Connect to 10.194.42.44:16600 - activity(ch=0, denormalizegroup, 87)"\\n00000937 2016-10-14 08:24:58.576 16533 31058 "Connect to 10.194.42.45:16600 - activity(ch=0, denormalizegroup, 87)"\\n00000938 2016-10-14 08:24:58.576 16533 31059 "Connect to 10.194.42.46:16600 - activity(ch=0, denormalizegroup, 87)"\\n00000939 2016-10-14 08:24:58.576 16533 31060 "Connect to 10.194.42.47:16600 - activity(ch=0, denormalizegroup, 87)"\\n0000093A 2016-10-14 08:24:58.576 16533 31061 "Connect to 10.194.42.48:16600 - activity(ch=0, denormalizegroup, 87)"\\n0000093B 2016-10-14 08:24:58.576 16533 31062 "Connect to 10.194.42.49:16600 - activity(ch=0, denormalizegroup, 87)"\\n0000093C 2016-10-14 08:24:58.576 16533 31063 "Connect to 10.194.42.50:16600 - activity(ch=0, denormalizegroup, 87)"\\n0000093D 2016-10-14 08:24:58.593 16533 31009 "Sort Setup Complete - activity(ch=0, denormalizegroup, 87)"\\n0000093E 2016-10-14 08:24:58.593 16533 31009 "JOIN waiting for barrier.1 - activity(ch=0, denormalizegroup, 87)"\\n0000093F 2016-10-14 08:24:58.770 16533 31009 "JOIN barrier.1 raised - activity(ch=0, denormalizegroup, 87)"\\n00000940 2016-10-14 08:24:58.770 16533 31009 "Sort: canoptimizenullcolumns=false, usepartitionrow=false, betweensort=false skewWarning=0.000000 skewError=0.000000 minisortthreshold=0 - activity(ch=0, denormalizegroup, 87)"\\n00000941 2016-10-14 08:24:58.782 16533 31009 "Total recs in mem = 0 scaled recs= 0 size = 0 bytes, minrecsonnode = 0, maxrecsonnode = 0 - activity(ch=0, denormalizegroup, 87)"\\n00000942 2016-10-14 08:24:58.782 16533 31009 "Tot = 0 - activity(ch=0, denormalizegroup, 87)"\\n00000943 2016-10-14 08:24:58.782 16533 31009 " 0.00 : Calculating split map"\\n00000944 2016-10-14 08:24:58.782 16533 31009 "--------------------------------------"\\n00000945 2016-10-14 08:24:58.782 16533 31009 "Gather - no nodes spilt to disk - activity(ch=0, denormalizegroup, 87)"\\n00000946 2016-10-14 08:24:58.783 16533 31009 "Split point 0: 0 rows on 10.194.42.1:16602 - activity(ch=0, denormalizegroup, 87)"\\n00000947 2016-10-14 08:24:58.783 16533 31009 "Split point 1: 0 rows on 10.194.42.2:16602 - activity(ch=0, denormalizegroup, 87)"\\n00000948 2016-10-14 08:24:58.783 16533 31009 "Split point 2: 0 rows on 10.194.42.3:16602 - activity(ch=0, denormalizegroup, 87)"\\n00000949 2016-10-14 08:24:58.783 16533 31009 "Split point 3: 0 rows on 10.194.42.4:16602 - activity(ch=0, denormalizegroup, 87)"\\n0000094A 2016-10-14 08:24:58.783 16533 31009 "Split point 4: 0 rows on 10.194.42.5:16602 - activity(ch=0, denormalizegroup, 87)"\\n0000094B 2016-10-14 08:24:58.783 16533 31009 "Split point 5: 0 rows on 10.194.42.6:16602 - activity(ch=0, denormalizegroup, 87)"\\n0000094C 2016-10-14 08:24:58.783 16533 31009 "Split point 6: 0 rows on 10.194.42.7:16602 - activity(ch=0, denormalizegroup, 87)"\\n0000094D 2016-10-14 08:24:58.783 16533 31009 "Split point 7: 0 rows on 10.194.42.8:16602 - activity(ch=0, denormalizegroup, 87)"\\n0000094E 2016-10-14 08:24:58.783 16533 31009 "Split point 8: 0 rows on 10.194.42.9:16602 - activity(ch=0, denormalizegroup, 87)"\\n0000094F 2016-10-14 08:24:58.783 16533 31009 "Split point 9: 0 rows on 10.194.42.10:16602 - activity(ch=0, denormalizegroup, 87)"\\n00000950 2016-10-14 08:24:58.783 16533 31009 "Split point 10: 0 rows on 10.194.42.11:16602 - activity(ch=0, denormalizegroup, 87)"\\n00000951 2016-10-14 08:24:58.783 16533 31009 "Split point 11: 0 rows on 10.194.42.12:16602 - activity(ch=0, denormalizegroup, 87)"\\n00000952 2016-10-14 08:24:58.783 16533 31009 "Split point 12: 0 rows on 10.194.42.13:16602 - activity(ch=0, denormalizegroup, 87)"\\n00000953 2016-10-14 08:24:58.783 16533 31009 "Split point 13: 0 rows on 10.194.42.14:16602 - activity(ch=0, denormalizegroup, 87)"\\n00000954 2016-10-14 08:24:58.783 16533 31009 "Split point 14: 0 rows on 10.194.42.15:16602 - activity(ch=0, denormalizegroup, 87)"\\n00000955 2016-10-14 08:24:58.783 16533 31009 "Split point 15: 0 rows on 10.194.42.16:16602 - activity(ch=0, denormalizegroup, 87)"\\n00000956 2016-10-14 08:24:58.783 16533 31009 "Split point 16: 0 rows on 10.194.42.17:16602 - activity(ch=0, denormalizegroup, 87)"\\n00000957 2016-10-14 08:24:58.783 16533 31009 "Split point 17: 0 rows on 10.194.42.18:16602 - activity(ch=0, denormalizegroup, 87)"\\n00000958 2016-10-14 08:24:58.783 16533 31009 "Split point 18: 0 rows on 10.194.42.19:16602 - activity(ch=0, denormalizegroup, 87)"\\n00000959 2016-10-14 08:24:58.783 16533 31009 "Split point 19: 0 rows on 10.194.42.20:16602 - activity(ch=0, denormalizegroup, 87)"\\n0000095A 2016-10-14 08:24:58.783 16533 31009 "Split point 20: 0 rows on 10.194.42.21:16602 - activity(ch=0, denormalizegroup, 87)"\\n0000095B 2016-10-14 08:24:58.783 16533 31009 "Split point 21: 0 rows on 10.194.42.22:16602 - activity(ch=0, denormalizegroup, 87)"\\n0000095C 2016-10-14 08:24:58.783 16533 31009 "Split point 22: 0 rows on 10.194.42.23:16602 - activity(ch=0, denormalizegroup, 87)"\\n0000095D 2016-10-14 08:24:58.783 16533 31009 "Split point 23: 0 rows on 10.194.42.24:16602 - activity(ch=0, denormalizegroup, 87)"\\n0000095E 2016-10-14 08:24:58.783 16533 31009 "Split point 24: 0 rows on 10.194.42.25:16602 - activity(ch=0, denormalizegroup, 87)"\\n0000095F 2016-10-14 08:24:58.783 16533 31009 "Split point 25: 0 rows on 10.194.42.26:16602 - activity(ch=0, denormalizegroup, 87)"\\n00000960 2016-10-14 08:24:58.783 16533 31009 "Split point 26: 0 rows on 10.194.42.27:16602 - activity(ch=0, denormalizegroup, 87)"\\n00000961 2016-10-14 08:24:58.783 16533 31009 "Split point 27: 0 rows on 10.194.42.28:16602 - activity(ch=0, denormalizegroup, 87)"\\n00000962 2016-10-14 08:24:58.783 16533 31009 "Split point 28: 0 rows on 10.194.42.29:16602 - activity(ch=0, denormalizegroup, 87)"\\n00000963 2016-10-14 08:24:58.783 16533 31009 "Split point 29: 0 rows on 10.194.42.30:16602 - activity(ch=0, denormalizegroup, 87)"\\n00000964 2016-10-14 08:24:58.783 16533 31009 "Split point 30: 0 rows on 10.194.42.31:16602 - activity(ch=0, denormalizegroup, 87)"\\n00000965 2016-10-14 08:24:58.783 16533 31009 "Split point 31: 0 rows on 10.194.42.32:16602 - activity(ch=0, denormalizegroup, 87)"\\n00000966 2016-10-14 08:24:58.783 16533 31009 "Split point 32: 0 rows on 10.194.42.33:16602 - activity(ch=0, denormalizegroup, 87)"\\n00000967 2016-10-14 08:24:58.783 16533 31009 "Split point 33: 0 rows on 10.194.42.34:16602 - activity(ch=0, denormalizegroup, 87)"\\n00000968 2016-10-14 08:24:58.783 16533 31009 "Split point 34: 0 rows on 10.194.42.35:16602 - activity(ch=0, denormalizegroup, 87)"\\n00000969 2016-10-14 08:24:58.783 16533 31009 "Split point 35: 0 rows on 10.194.42.36:16602 - activity(ch=0, denormalizegroup, 87)"\\n0000096A 2016-10-14 08:24:58.783 16533 31009 "Split point 36: 0 rows on 10.194.42.37:16602 - activity(ch=0, denormalizegroup, 87)"\\n0000096B 2016-10-14 08:24:58.783 16533 31009 "Split point 37: 0 rows on 10.194.42.38:16602 - activity(ch=0, denormalizegroup, 87)"\\n0000096C 2016-10-14 08:24:58.783 16533 31009 "Split point 38: 0 rows on 10.194.42.39:16602 - activity(ch=0, denormalizegroup, 87)"\\n0000096D 2016-10-14 08:24:58.783 16533 31009 "Split point 39: 0 rows on 10.194.42.40:16602 - activity(ch=0, denormalizegroup, 87)"\\n0000096E 2016-10-14 08:24:58.783 16533 31009 "Split point 40: 0 rows on 10.194.42.41:16602 - activity(ch=0, denormalizegroup, 87)"\\n0000096F 2016-10-14 08:24:58.783 16533 31009 "Split point 41: 0 rows on 10.194.42.42:16602 - activity(ch=0, denormalizegroup, 87)"\\n00000970 2016-10-14 08:24:58.783 16533 31009 "Split point 42: 0 rows on 10.194.42.43:16602 - activity(ch=0, denormalizegroup, 87)"\\n00000971 2016-10-14 08:24:58.783 16533 31009 "Split point 43: 0 rows on 10.194.42.44:16602 - activity(ch=0, denormalizegroup, 87)"\\n00000972 2016-10-14 08:24:58.783 16533 31009 "Split point 44: 0 rows on 10.194.42.45:16602 - activity(ch=0, denormalizegroup, 87)"\\n00000973 2016-10-14 08:24:58.783 16533 31009 "Split point 45: 0 rows on 10.194.42.46:16602 - activity(ch=0, denormalizegroup, 87)"\\n00000974 2016-10-14 08:24:58.783 16533 31009 "Split point 46: 0 rows on 10.194.42.47:16602 - activity(ch=0, denormalizegroup, 87)"\\n00000975 2016-10-14 08:24:58.783 16533 31009 "Split point 47: 0 rows on 10.194.42.48:16602 - activity(ch=0, denormalizegroup, 87)"\\n00000976 2016-10-14 08:24:58.783 16533 31009 "Split point 48: 0 rows on 10.194.42.49:16602 - activity(ch=0, denormalizegroup, 87)"\\n00000977 2016-10-14 08:24:58.783 16533 31009 "Split point 49: 0 rows on 10.194.42.50:16602 - activity(ch=0, denormalizegroup, 87)"\\n00000978 2016-10-14 08:24:58.783 16533 31009 "Starting Merge of 0 records - activity(ch=0, denormalizegroup, 87)"\\n00000979 2016-10-14 08:24:58.811 16533 31009 "JOIN waiting for barrier.2 - activity(ch=0, denormalizegroup, 87)"\\n0000097A 2016-10-14 08:24:58.812 16533 31009 "JOIN barrier.2 raised - activity(ch=0, denormalizegroup, 87)"\\n0000097B 2016-10-14 08:24:58.812 16533 31009 "Sort Done in - activity(ch=0, denormalizegroup, 87)"\\n0000097C 2016-10-14 08:24:58.816 16533 31009 "Sort Done - activity(ch=0, denormalizegroup, 87)"\\n0000097D 2016-10-14 08:24:58.816 16533 31009 "Sort setup cosort=true, needconnect=false has key serializer - activity(ch=0, denormalizegroup, 87)"\\n0000097E 2016-10-14 08:24:58.829 16533 31009 "Sort Setup Complete - activity(ch=0, denormalizegroup, 87)"\\n0000097F 2016-10-14 08:24:58.829 16533 31009 "JOIN waiting for barrier.3 - activity(ch=0, denormalizegroup, 87)"\\n00000980 2016-10-14 08:24:58.830 16533 31009 "JOIN barrier.3 raised - activity(ch=0, denormalizegroup, 87)"\\n00000981 2016-10-14 08:24:58.830 16533 31009 "Sort: canoptimizenullcolumns=false, usepartitionrow=false, betweensort=false skewWarning=0.000000 skewError=0.000000 minisortthreshold=0 - activity(ch=0, denormalizegroup, 87)"\\n00000982 2016-10-14 08:24:58.841 16533 31009 "Total recs in mem = 13408 scaled recs= 13408 size = 2997216 bytes, minrecsonnode = 0, maxrecsonnode = 487 - activity(ch=0, denormalizegroup, 87)"\\n00000983 2016-10-14 08:24:58.942 16533 16593 "ERROR: 10056: /var/lib/jenkins2/workspace/LN-Candidate-with-Plugins-6.0.6-rc4/LN/centos-6.0-x86_64/HPCC-Platform/thorlcr/master/thgraphmanager.cpp(958) : abortThor : Watchdog has lost connectivity with Thor slave: 10.194.42.1:16600 (Process terminated or node down?)"\\n00000984 2016-10-14 08:24:58.942 16533 29131 "ERROR: 4: MP link closed (10.194.42.1:16600)"\\n00000985 2016-10-14 08:24:58.942 16533 29131 "Posting exception: MP link closed (10.194.42.1:16600) to agent 10.194.73.204 for workunit(W20161014-082019)"\\n00000986 2016-10-14 08:24:58.942 16533 16593 "abortThor called"\\n00000987 2016-10-14 08:24:58.942 16533 16593 "Stopping jobManager"\\n00000988 2016-10-14 08:24:58.942 16533 16593 "aborting any current active job"\\n00000989 2016-10-14 08:24:58.942 16533 31009 "WARNING: WARNING: MPERR_link_closed in SortDone - activity(ch=0, denormalizegroup, 87)"\\n0000098A 2016-10-14 08:24:58.955 16533 31009 "ERROR: 4: /var/lib/jenkins2/workspace/LN-Candidate-with-Plugins-6.0.6-rc4/LN/centos-6.0-x86_64/HPCC-Platform/thorlcr/msort/tsortm.cpp(119) : Disconnecting sort node : MP link closed (10.194.42.1:16600)"\\n0000098B 2016-10-14 08:24:58.955 16533 31009 "process exit - activity(ch=0, denormalizegroup, 87)"\\n0000098C 2016-10-14 08:24:58.997 16533 29131 "INFORM [EXCEPTION]"\\n0000098D 2016-10-14 08:24:58.997 16533 29131 "Abort condition set - activity(ch=0, diskwrite, 90)"\\n0000098E 2016-10-14 08:24:58.997 16533 29131 "Abort condition set - activity(ch=0, filter, 89)"\\n0000098F 2016-10-14 08:24:58.997 16533 29131 "Abort condition set - activity(ch=0, project, 88)"\\n00000990 2016-10-14 08:24:58.997 16533 29131 "Abort condition set - activity(ch=0, denormalizegroup, 87)"\\n00000991 2016-10-14 08:24:58.997 16533 29131 "Abort condition set - activity(ch=0, normalizechild, 84)"\\n00000992 2016-10-14 08:24:58.997 16533 29131 "Abort condition set - activity(ch=0, diskread, 83)"\\n00000993 2016-10-14 08:24:58.997 16533 29131 "Abort condition set - activity(ch=0, if, 86)"\\n00000994 2016-10-14 08:24:58.997 16533 29131 "Abort condition set - activity(ch=0, diskread, 85)"\\n00000995 2016-10-14 08:24:58.998 16533 29131 "Aborting master graph - graph(graph11, 82) : MP link closed (10.194.42.1:16600)"\\n00000996 2016-10-14 08:24:58.998 16533 16533 " - graph(graph11, 82) : MP link closed (10.194.42.1:16600)"\\n00000997 2016-10-14 08:24:58.998 16533 16533 "during abort() - graph(graph11, 82) : MP link closed (10.194.42.1:16600)"\\n00000998 2016-10-14 08:24:59.000 16533 29131 "ERROR: 4: /var/lib/jenkins2/workspace/LN-Candidate-with-Plugins-6.0.6-rc4/LN/centos-6.0-x86_64/HPCC-Platform/thorlcr/graph/thgraphmaster.cpp(1416) : broadcastSendAsync : MP link closed (10.194.42.1:16600)"\\n00000999 2016-10-14 08:25:47.649 16533 29132 "SYS: PU= 4% MU= 6% MAL=2710147072 MMP=2657128448 SBK=53018624 TOT=3307584K RAM=1087476K SWP=0K"\\n0000099A 2016-10-14 08:25:47.650 16533 29132 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=0.7 kw/s=23.4 bsy=0 [sdb] r/s=0.0 kr/s=0.0 w/s=0.0 kw/s=0.0 bsy=0 NIC: rxp/s=4379161.5 rxk/s=0.0 txp/s=272688.2 txk/s=0.0 CPU: usr=3 sys=0 iow=0 idle=95"\\n0000099B 2016-10-14 08:25:58.997 16533 29129 "WARNING: 4: /var/lib/jenkins2/workspace/LN-Candidate-with-Plugins-6.0.6-rc4/LN/centos-6.0-x86_64/HPCC-Platform/thorlcr/graph/thgraphmaster.cpp(77) : FAILED TO RECOVER FROM EXCEPTION, STOPPING THOR : MP link closed (10.194.42.1:16600)"
\", \"post_time\": \"2016-10-14 12:29:24\" },\n\t{ \"post_id\": 11961, \"topic_id\": 2981, \"forum_id\": 10, \"post_subject\": \"MP link closed Error\", \"username\": \"chuck.beam\", \"post_text\": \"I am getting the error below:\\n\\nSystem error: 4: MP link closed\\n\\nI am filtering a dataset and then attempting to DENORMALIZE.\\n\\nIf I comment out the fn_FilterPropertiesSuppressionPeriod and DENORMALIZE the (unfiltered) PROPERTIES, it works fine.\\n\\nPlease help.\\n\\n\\n\\nSUPP_FILTERED_PROP_DS := fn_FilterPropertiesSuppressionPeriod(PROPERTIES, ExtractRequest);\\n\\n// Now the Event JOIN\\n\\tATTR_DS_Dist := DISTRIBUTE(STATUS_FILTERED_ATTR_DS, HASH32(PRIM_RANGE, PRIM_NAME, ADDR_SUFFIX, SEC_RANGE, STATE, ZIP));\\n\\tSUPP_FILTERED_PROP_DS_DIST := DISTRIBUTE(SUPP_FILTERED_PROP_DS, HASH32(PRIM_RANGE, PRIM_NAME, ADDR_SUFFIX, SEC_RANGE, ST, ZIP));\\n\\t\\n\\tPROPERTIES_MLS_REC xform2(PROPERTIES_MLS_REC L, DATASET(ATTR_REC) R) := TRANSFORM\\n\\t\\tSELF.MLS_Recs\\t:= CHOOSEN(R, AI_Common.Constants.MLS_RECORD_COUNT);\\n\\t\\tSELF := L;\\n\\tEND;\\n\\tjoinedBookXEvent \\t\\t:= \\tDENORMALIZE(SUPP_FILTERED_PROP_DS_DIST, ATTR_DS_Dist, \\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tLEFT.prim_range = RIGHT.prim_range AND \\nLEFT.prim_name = RIGHT.prim_name AND\\nLEFT.addr_suffix = RIGHT.addr_suffix AND\\nLEFT.sec_range = RIGHT.sec_range AND\\nLEFT.zip = RIGHT.zip \\tAND\\nLEFT.st = RIGHT.state AND\\nLEFT.INCEPTION_DATE < RIGHT.MLS_LIST_DT,\\nGROUP,\\nxform2(LEFT, ROWS(RIGHT)), LOCAL);\\n
\\n\\n\\nHere is the filter code in the fn_FilterPropertiesSuppressionPeriod function:\\n\\nEXPORT fn_FilterPropertiesSuppressionPeriod(DATASET(AI_Common.Layouts.PROPERTIES_MLS_REC) PROPERTIES, \\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tAI_Common.Layouts.AI_MR_List ExtractRequest) := FUNCTION\\n\\t\\t\\nSCHEDULED_RUN_DATE := EXTRACTREQUEST.SCHEDULEDRUNDATE;\\nRUN_DATE \\t\\t := IF(LENGTH((STRING)SCHEDULED_RUN_DATE) = 10, \\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tSCHEDULED_RUN_DATE[1..4] + SCHEDULED_RUN_DATE[6..7] + SCHEDULED_RUN_DATE[9..10], \\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t(STRING8)STD.Date.Today());\\n\\t\\t\\n\\t\\tEXCLUSION_DS := AI_HomeListing_Exclusion.Files.DS_HomeListingExclusion;\\n\\n\\t\\tSUPPRESSION_PERIOD := ExtractRequest.Product_HML.Suppression_Period;\\n\\t\\tSUPPRESSION_DATE := AI_Common.Common.RollBack_date(RUN_DATE, (INTEGER)SUPPRESSION_PERIOD * -1);\\n\\t\\t\\n\\t\\tSUPPRESSION_DS \\t:= EXCLUSION_DS( DATE_REPORTED > (INTEGER)SUPPRESSION_DATE AND \\nACCOUNT_NUMBER = ExtractRequest.ACCOUNT_NUMBER AND\\nACCOUNT_SUFFIX = ExtractRequest.ACCOUNT_SUFFIX );\\n\\t\\n\\t\\tPROPERTIES_DIST\\t := DISTRIBUTE(PROPERTIES, HASH32(PRIM_RANGE, PRIM_NAME, ADDR_SUFFIX, SEC_RANGE, ST, ZIP));\\n\\t\\tSUPPRESSION_DS_DIST := DISTRIBUTE(SUPPRESSION_DS, HASH32(PRIM_RANGE, PRIM_NAME, ADDR_SUFFIX, SEC_RANGE, STATE, ZIP));\\n\\t\\n\\t\\tFILTERED_PROPERTY_DS := JOIN(PROPERTIES_DIST, SUPPRESSION_DS_DIST,\\n LEFT.POLICY_NUMBER = RIGHT.POLICY_NUMBER AND\\n LEFT.AMBEST = RIGHT.AMBEST AND\\n LEFT.INSURANCE_TYPE = RIGHT.INSURANCE_TYPE AND\\n LEFT.PRIM_RANGE = RIGHT.PRIM_RANGE AND\\n LEFT.PRIM_NAME = RIGHT.PRIM_NAME AND\\n LEFT.ADDR_SUFFIX = RIGHT.ADDR_SUFFIX AND\\n LEFT.SEC_RANGE = RIGHT.SEC_RANGE AND\\n LEFT.ST = RIGHT.STATE AND\\n LEFT.ZIP = RIGHT.ZIP,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tTRANSFORM(LEFT), LEFT ONLY, LOCAL);\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\t\\tRETURN FILTERED_PROPERTY_DS;\\n\\nEND;
\", \"post_time\": \"2016-10-14 11:31:37\" },\n\t{ \"post_id\": 12243, \"topic_id\": 3023, \"forum_id\": 10, \"post_subject\": \"Re: how to merge records\", \"username\": \"rtaylor\", \"post_text\": \"James,\\n\\nNo, the GROUP function is not the same as the "group by" form of TABLE (which is what I changed your code to use).\\n\\nRichard\", \"post_time\": \"2016-10-19 21:02:12\" },\n\t{ \"post_id\": 12233, \"topic_id\": 3023, \"forum_id\": 10, \"post_subject\": \"Re: how to merge records\", \"username\": \"rqg0717\", \"post_text\": \"Dear Mr. Taylor,\\n\\nI got it. Thanks a lot for your reply. \\n\\nSincerely,\\nJames\", \"post_time\": \"2016-10-19 20:43:29\" },\n\t{ \"post_id\": 12213, \"topic_id\": 3023, \"forum_id\": 10, \"post_subject\": \"Re: how to merge records\", \"username\": \"rtaylor\", \"post_text\": \"James,\\n\\nVery good - you were really close. Here's my version of your code:Layout_claims := RECORD\\n STRING8 name;\\nEND;\\nlayout := RECORD\\n UNSIGNED1 id;\\n DATASET(Layout_claims) Claims;\\nEND;\\n\\ntemp := DATASET([{1,DATASET([{'testtest'}],Layout_claims)},\\n {1,DATASET([{'1234test'}],Layout_claims)},\\n {1,DATASET([{'test5678'}],Layout_claims)},\\n {2,DATASET([{'4321test'}],Layout_claims)},\\n {2,DATASET([{'8765test'}],Layout_claims)},\\n {2,DATASET([{'test4321'}],Layout_claims)}],layout);\\n\\nparent := table(temp, {id}, id);\\n\\nlayout parentMove(parent L) := transform\\n self.Claims := [];\\n self := L;\\nend;\\n\\nparentOnly := project(parent, parentMove(left));\\n\\nlayout childMove(layout L, layout R):= transform\\n self.Claims := L.Claims + R.Claims;\\n self := L;\\nend;\\n \\nresults := DENORMALIZE(parentOnly, temp, LEFT.id = RIGHT.id, childMove(LEFT, RIGHT));\\n\\noutput(results);
The "group by" form of TABLE eliminates the DEDUP, and you were not using the passed COUNTER value so it's unnecessary.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-10-19 20:06:45\" },\n\t{ \"post_id\": 12203, \"topic_id\": 3023, \"forum_id\": 10, \"post_subject\": \"Re: how to merge records\", \"username\": \"rqg0717\", \"post_text\": \"Okay, I think I figure it out eventually.\\n\\n\\nLayout_claims := RECORD\\n STRING8 name;\\nEND;\\nlayout := RECORD\\n UNSIGNED1 id;\\n DATASET(Layout_claims) Claims;\\nEND;\\n\\ntemp := DATASET([{1,DATASET([{'testtest'}],Layout_claims)},\\n {1,DATASET([{'1234test'}],Layout_claims)},\\n {1,DATASET([{'test5678'}],Layout_claims)},\\n {2,DATASET([{'4321test'}],Layout_claims)},\\n {2,DATASET([{'8765test'}],Layout_claims)},\\n {2,DATASET([{'test4321'}],Layout_claims)}],layout);\\n\\nlayout_parent := RECORD\\n UNSIGNED1 id;\\nEND;\\n\\nparent := dedup(table(temp, {id}), id);\\n\\nlayout parentMove(layout_parent L) := transform\\n\\tself.Claims := [];\\n\\tself := L;\\nend;\\n\\nparentOnly := project(parent, parentMove(left));\\n\\nlayout childMove(layout L, layout R, INTEGER C):= transform\\n\\tself.Claims := L.Claims + R.Claims;\\n\\tself := L;\\nend;\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t \\nresults := DENORMALIZE(parentOnly, temp, LEFT.id = RIGHT.id, childMove(LEFT, RIGHT, COUNTER));\\n\\noutput(results);\\n
\\n\\nPlease let me know if there is a easier or simpler way to do it. Thank you.\\n\\nBest,\\nJames\", \"post_time\": \"2016-10-19 19:19:41\" },\n\t{ \"post_id\": 12193, \"topic_id\": 3023, \"forum_id\": 10, \"post_subject\": \"Re: how to merge records\", \"username\": \"rqg0717\", \"post_text\": \"Dear Mr. Taylor,\\n\\nThank you for your reply.\\n\\nI have taken the Advanced ECL class online already. I am taking HPCC Introduction to ROXIE now. I think I should use DENORMALIZE and TRANSFORM but I do not know how to do that within the same dataset. Could you please give me a hint? Thank you.\\n\\nSincerely,\\nJames\", \"post_time\": \"2016-10-19 18:50:20\" },\n\t{ \"post_id\": 12183, \"topic_id\": 3023, \"forum_id\": 10, \"post_subject\": \"Re: how to merge records\", \"username\": \"rtaylor\", \"post_text\": \"James,\\n\\nThat specific scenario is covered in the Advanced ECL class, where we deal with construction/deconstruction of "nested child datasets" and how ECL is designed to make querying them really simple. Which of our online (or live instructor) ECL classes have you taken so far?\\n\\nRichard\", \"post_time\": \"2016-10-19 18:19:55\" },\n\t{ \"post_id\": 12173, \"topic_id\": 3023, \"forum_id\": 10, \"post_subject\": \"how to merge records\", \"username\": \"rqg0717\", \"post_text\": \"dear all,\\n\\nI have a quick question on how to merge records, e.g.\\n\\n\\nLayout_claims := RECORD\\n STRING8 name;\\nEND;\\nlayout := RECORD\\n UNSIGNED1 id;\\n DATASET(Layout_claims) Claims;\\nEND;\\n\\nresults := DATASET([{1,DATASET([{'testtest'}],Layout_claims)},\\n {1,DATASET([{'1234test'}],Layout_claims)},\\n {1,DATASET([{'test5678'}],Layout_claims)},\\n {2,DATASET([{'4321test'}],Layout_claims)},\\n {2,DATASET([{'8765test'}],Layout_claims)},\\n {2,DATASET([{'test4321'}],Layout_claims)}],layout);\\n
\\noutput of results would be like this:\\n\\nid\\tclaims\\n1\\t<Row><name>testtest</name></Row>\\n1\\t<Row><name>1234test</name></Row>\\n1\\t<Row><name>test5678</name></Row>\\n2\\t<Row><name>4321test</name></Row>\\n2\\t<Row><name>8765test</name></Row>\\n2\\t<Row><name>test4321</name></Row>\\n
\\nso my question is how to merge the data by the id in order to get the results like this:\\n\\nid\\tclaims\\n1\\t<Row><name>testtest</name></Row><Row><name>1234test</name></Row><Row><name>test5678</name></Row>\\n2\\t<Row><name>4321test</name></Row><Row><name>8765test</name></Row><Row><name>test4321</name></Row>\\n
\\nPlease let me know. Thanks a lot.\\n\\nSincerely,\\nJames\", \"post_time\": \"2016-10-19 16:18:48\" },\n\t{ \"post_id\": 12313, \"topic_id\": 3053, \"forum_id\": 10, \"post_subject\": \"How to debug a segmentation fault?\", \"username\": \"kpolicano\", \"post_text\": \"I don't really know where to go from here:\\n\\n00000000 2016-10-21 13:17:59.627 28921 28921 "Logging to /mnt/disk1/var/log/HPCCSystems/eclagent/eclagent.2016_10_21.log"\\n00000001 2016-10-21 13:17:59.964 28921 28921 "ECLAGENT build internal_4.2.10-rc2"\\n00000002 2016-10-21 13:17:59.969 28921 28921 "Waiting for workunit lock"\\n00000003 2016-10-21 13:17:59.970 28921 28921 "Obtained workunit lock"\\n00000004 2016-10-21 13:18:01.736 28921 28921 "Loading dll (libW20161021-131756.so) from location /var/lib/HPCCSystems/dllserver/temp/libW20161021-131756.so"\\n00000005 2016-10-21 13:18:01.738 28921 28921 "Starting process"\\n00000006 2016-10-21 13:18:01.738 28921 28921 "RoxieMemMgr: Setting memory limit to 314572800 bytes (300 pages)"\\n00000007 2016-10-21 13:18:01.738 28921 28921 "RoxieMemMgr: 320 Pages successfully allocated for the pool - memsize=335544320 base=0x7f0c07f00000 alignment=1048576 bitmapSize=10"\\n00000008 2016-10-21 13:18:01.738 28921 28921 "Waiting for run lock"\\n00000009 2016-10-21 13:18:01.802 28921 28921 "Obtained run lock"\\n0000000A 2016-10-21 13:18:01.803 28921 28921 "setResultString(search_str,-1,'Picture Houses v Wednesbury')"\\n0000000B 2016-10-21 13:18:01.803 28921 28921 "setResultUInt(num_results,-1,10)"\\n0000000C 2016-10-21 13:18:01.804 28921 28921 "setResultUInt(start_rec,-1,1)"\\n0000000D 2016-10-21 13:18:01.812 28921 28921 "setResultString(glG83M5,-3,'picture houses v wednesbury')"\\n0000000E 2016-10-21 13:18:01.813 28921 28921 "setResultInt(glI83M5,-3,20)"\\n0000000F 2016-10-21 13:18:01.814 28921 28921 "Executing hthor graph graph1"\\n00000010 2016-10-21 13:18:01.821 28921 28921 "Executing subgraph 34"\\n00000011 2016-10-21 13:18:01.821 28921 28921 "Executing subgraph 18"\\n00000012 2016-10-21 13:18:01.821 28921 28921 "Executing subgraph 1"\\n00000013 2016-10-21 13:18:01.822 28921 28921 "Completed subgraph 1"\\n00000014 2016-10-21 13:18:01.833 28921 28921 ",FileAccess,HThor,READ,hthor,policaka,relengine::vertlink::dev::v16::superkeys::doc_titles_by_sub_keyword,W20161021-131756,graph1"\\n00000015 2016-10-21 13:18:01.838 28921 28921 "DISKWRITE: using temporary filename /var/lib/HPCCSystems/eclagent/temp/W20161021-131756.~spill__scope__B93M5__W20161021-131756"\\n00000016 2016-10-21 13:18:02.889 28921 28921 "setResultInt(aK83M5,-3,174325)"\\n00000017 2016-10-21 13:18:02.986 28921 28921 "Completed subgraph 18"\\n00000018 2016-10-21 13:18:04.388 28921 28921 "setResultUInt(total_doc_count,0,174325)"\\n00000019 2016-10-21 13:18:04.678 28921 28921 "Completed subgraph 34"\\n0000001A 2016-10-21 13:18:06.112 28921 28921 "Executing subgraph 44"\\n0000001B 2016-10-21 13:18:06.112 28921 28921 "Executing subgraph 36"\\n0000001C 2016-10-21 13:18:06.112 28921 28921 "Executing subgraph 27"\\n0000001D 2016-10-21 13:18:06.112 28921 28921 "DISKWRITE: using temporary filename /var/lib/HPCCSystems/eclagent/temp/W20161021-131756.~spill__scope__autoM83M5__W20161021-131756"\\n0000001E 2016-10-21 13:18:06.409 28921 28921 ",FileAccess,HThor,READ,hthor,policaka,relengine::vertlink::dev::v16::superkeys::docs_by_pguid,W20161021-131756,graph1"\\n0000001F 2016-10-21 13:18:06.409 28921 28930 "Reading file /var/lib/HPCCSystems/eclagent/temp/W20161021-131756.~spill__scope__B93M5__W20161021-131756"\\n00000020 2016-10-21 13:18:06.492 28921 28921 "Completed subgraph 27"\\n00000021 2016-10-21 13:18:07.570 28921 28921 "DISKWRITE: using temporary filename /var/lib/HPCCSystems/eclagent/temp/W20161021-131756.~spill__scope__C93M5__W20161021-131756"\\n00000022 2016-10-21 13:18:07.570 28921 28921 "Reading file /var/lib/HPCCSystems/eclagent/temp/W20161021-131756.~spill__scope__autoM83M5__W20161021-131756"\\n00000023 2016-10-21 13:18:07.570 28921 28921 "Completed subgraph 36"\\n00000024 2016-10-21 13:18:07.573 28921 28921 "================================================"\\n00000025 2016-10-21 13:18:07.573 28921 28921 "Signal: 11 Segmentation fault"\\n00000026 2016-10-21 13:18:07.573 28921 28921 "Fault IP: 00007F0C36A0F19A"\\n00000027 2016-10-21 13:18:07.573 28921 28921 "Accessing: 0000000000000000"\\n00000028 2016-10-21 13:18:07.573 28921 28921 "Registers:"\\n00000029 2016-10-21 13:18:07.573 28921 28921 "EAX:000000000075AE10 EBX:000000000075AB90 ECX:0000000000000001 EDX:0000000000000000 ESI:0000000000755A00 EDI:0000000000000000"\\n0000002A 2016-10-21 13:18:07.573 28921 28921 "CS:EIP:0033:00007F0C36A0F19A"\\n0000002B 2016-10-21 13:18:07.573 28921 28921 " ESP:00007FFF5F2C8EE0 EBP:0000000000000001"\\n0000002C 2016-10-21 13:18:07.573 28921 28921 "Stack[00007FFF5F2C8EE0]: 0000000000000001 0000000100000000 0000000000000001 0000000800000000 0000000000000008 0000000100000000 0000000000000001 0000000800000000"\\n0000002D 2016-10-21 13:18:07.573 28921 28921 "Stack[00007FFF5F2C8F00]: 0000000000000008 0075A9F000000000 000000000075A9F0 0000000100000000 0000000000000001 0000000000000000 0000000000000000 269B876A00000000"\\n0000002E 2016-10-21 13:18:07.573 28921 28921 "Stack[00007FFF5F2C8F20]: 00007F0C269B876A 36A0F3A000007F0C 00007F0C36A0F3A0 0075A9F000007F0C 000000000075A9F0 00795C0000000000 0000000000795C00 0075AAF800000000"\\n0000002F 2016-10-21 13:18:07.573 28921 28921 "Stack[00007FFF5F2C8F40]: 000000000075AAF8 0000000000000000 0000000000000000 00755A0000000000 0000000000755A00 36A0F4A900000000 00007F0C36A0F4A9 0075AA1000007F0C"\\n00000030 2016-10-21 13:18:07.573 28921 28921 "Stack[00007FFF5F2C8F60]: 000000000075AA10 0075AA1000000000 000000000075AA10 5F2C903000000000 00007FFF5F2C9030 36A0F41000007FFF 00007F0C36A0F410 0000000800007F0C"\\n00000031 2016-10-21 13:18:07.573 28921 28921 "Stack[00007FFF5F2C8F80]: 0000000000000008 269B88D300000000 00007F0C269B88D3 0000000000007F0C 0000000000000000 0000000000000000 0000000000000000 00755A1800000000"\\n00000032 2016-10-21 13:18:07.573 28921 28921 "Stack[00007FFF5F2C8FA0]: 0000000000755A18 0079946000000000 0000000000799460 3443281000000000 00007F0C34432810 09C522A800007F0C 00007F0C09C522A8 3443285800007F0C"\\n00000033 2016-10-21 13:18:07.573 28921 28921 "Stack[00007FFF5F2C8FC0]: 00007F0C34432858 0000000100007F0C 0000000000000001 00799DF000000000 0000000000799DF0 0000001000000000 00007F0C00000010 0079948000007F0C"\\n00000034 2016-10-21 13:18:07.573 28921 28921 "Backtrace:"\\n00000035 2016-10-21 13:18:07.576 28921 28921 " /opt/HPCCSystems/lib/libjlib.so(_Z16PrintStackReportv+0x28) [0x7f0c33a469c8]"\\n00000036 2016-10-21 13:18:07.576 28921 28921 " /opt/HPCCSystems/lib/libjlib.so(_Z13excsighandleriP7siginfoPv+0x9da) [0x7f0c33a4766a]"\\n00000037 2016-10-21 13:18:07.576 28921 28921 " /lib64/libpthread.so.0(+0xf7e0) [0x7f0c30e5c7e0]"\\n00000038 2016-10-21 13:18:07.576 28921 28921 " /opt/HPCCSystems/lib/libhthor.so(_ZN11EclSubGraph9doExecuteEPKhb+0x11a) [0x7f0c36a0f19a]"\\n00000039 2016-10-21 13:18:07.576 28921 28921 " /opt/HPCCSystems/lib/libhthor.so(_ZN11EclSubGraph16executeSubgraphsEPKh+0x90) [0x7f0c36a0f3a0]"\\n0000003A 2016-10-21 13:18:07.576 28921 28921 " /opt/HPCCSystems/lib/libhthor.so(_ZN11EclSubGraph8evaluateEjPKh+0x89) [0x7f0c36a0f4a9]"\\n0000003B 2016-10-21 13:18:07.576 28921 28921 " /var/lib/HPCCSystems/dllserver/temp/libW20161021-131756.so(+0x458d3) [0x7f0c269b88d3]"\\n0000003C 2016-10-21 13:18:07.576 28921 28921 " /opt/HPCCSystems/lib/libhthor.so(_ZN15EclGraphElement12callOnCreateER9IHThorArgR13IAgentContext+0x82) [0x7f0c36a0cbd2]"\\n0000003D 2016-10-21 13:18:07.576 28921 28921 " /opt/HPCCSystems/lib/libhthor.so(_ZN15EclGraphElement12createHelperER13IAgentContextP11EclSubGraph+0x5a) [0x7f0c36a0ccda]"\\n0000003E 2016-10-21 13:18:07.576 28921 28921 " /opt/HPCCSystems/lib/libhthor.so(_ZN15EclGraphElement14createActivityER13IAgentContextP11EclSubGraph+0x5f) [0x7f0c36a0ce6f]"\\n0000003F 2016-10-21 13:18:07.576 28921 28921 " /opt/HPCCSystems/lib/libhthor.so(_ZN15EclGraphElement14createActivityER13IAgentContextP11EclSubGraph+0x4cb) [0x7f0c36a0d2db]"\\n00000040 2016-10-21 13:18:07.576 28921 28921 " /opt/HPCCSystems/lib/libhthor.so(_ZN15EclGraphElement14createActivityER13IAgentContextP11EclSubGraph+0x4cb) [0x7f0c36a0d2db]"\\n00000041 2016-10-21 13:18:07.576 28921 28921 " /opt/HPCCSystems/lib/libhthor.so(_ZN15EclGraphElement14createActivityER13IAgentContextP11EclSubGraph+0x4cb) [0x7f0c36a0d2db]"\\n00000042 2016-10-21 13:18:07.576 28921 28921 " /opt/HPCCSystems/lib/libhthor.so(_ZN15EclGraphElement14createActivityER13IAgentContextP11EclSubGraph+0x4cb) [0x7f0c36a0d2db]"\\n00000043 2016-10-21 13:18:07.576 28921 28921 " /opt/HPCCSystems/lib/libhthor.so(_ZN15EclGraphElement14createActivityER13IAgentContextP11EclSubGraph+0x4cb) [0x7f0c36a0d2db]"\\n00000044 2016-10-21 13:18:07.576 28921 28921 " /opt/HPCCSystems/lib/libhthor.so(_ZN15EclGraphElement14createActivityER13IAgentContextP11EclSubGraph+0x4cb) [0x7f0c36a0d2db]"\\n00000045 2016-10-21 13:18:07.576 28921 28921 " /opt/HPCCSystems/lib/libhthor.so(_ZN15EclGraphElement14createActivityER13IAgentContextP11EclSubGraph+0x4cb) [0x7f0c36a0d2db]"\\n00000046 2016-10-21 13:18:07.576 28921 28921 " /opt/HPCCSystems/lib/libhthor.so(_ZN15EclGraphElement14createActivityER13IAgentContextP11EclSubGraph+0x4cb) [0x7f0c36a0d2db]"\\n00000047 2016-10-21 13:18:07.576 28921 28921 " /opt/HPCCSystems/lib/libhthor.so(_ZN11EclSubGraph16createActivitiesEv+0x86) [0x7f0c36a0d406]"\\n00000048 2016-10-21 13:18:07.576 28921 28921 " /opt/HPCCSystems/lib/libhthor.so(_ZN11EclSubGraph7executeEPKh+0xa0) [0x7f0c36a0e3d0]"\\n00000049 2016-10-21 13:18:07.576 28921 28921 " /opt/HPCCSystems/lib/libhthor.so(_ZN8EclGraph7executeEPKh+0x8e) [0x7f0c36a0e64e]"\\n0000004A 2016-10-21 13:18:07.576 28921 28921 " /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent12executeGraphEPKcbjPKv+0x16e) [0x7f0c36a1210e]"\\n0000004B 2016-10-21 13:18:07.576 28921 28921 " /var/lib/HPCCSystems/dllserver/temp/libW20161021-131756.so(+0x374e9) [0x7f0c269aa4e9]"\\n0000004C 2016-10-21 13:18:07.576 28921 28921 " /opt/HPCCSystems/lib/libworkunit.so(_ZN15WorkflowMachine11performItemEjj+0x7d) [0x7f0c3568fc9d]"\\n0000004D 2016-10-21 13:18:07.576 28921 28921 " /opt/HPCCSystems/lib/libworkunit.so(_ZN15WorkflowMachine13doExecuteItemER20IRuntimeWorkflowItemj+0x41) [0x7f0c35690cc1]"\\n0000004E 2016-10-21 13:18:07.576 28921 28921 " /opt/HPCCSystems/lib/libworkunit.so(_ZN15WorkflowMachine11executeItemEjj+0x2af) [0x7f0c3569076f]"\\n0000004F 2016-10-21 13:18:07.576 28921 28921 " /opt/HPCCSystems/lib/libworkunit.so(_ZN15WorkflowMachine7performEP18IGlobalCodeContextP11IEclProcess+0x100) [0x7f0c35691180]"\\n00000050 2016-10-21 13:18:07.576 28921 28921 " /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent10runProcessEP11IEclProcess+0x1b6) [0x7f0c36a006b6]"\\n00000051 2016-10-21 13:18:07.576 28921 28921 " /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent9doProcessEv+0x2fc) [0x7f0c36a00b3c]"\\n00000052 2016-10-21 13:18:07.576 28921 28921 " /opt/HPCCSystems/lib/libhthor.so(_Z13eclagent_mainiPPKcP12StringBufferb+0x8b9) [0x7f0c36a01e89]"\\n00000053 2016-10-21 13:18:07.576 28921 28921 " eclagent(main+0x5b) [0x40122b]"\\n00000054 2016-10-21 13:18:07.576 28921 28921 " /lib64/libc.so.6(__libc_start_main+0xfd) [0x7f0c30ad7d1d]"\\n00000055 2016-10-21 13:18:07.576 28921 28921 " eclagent() [0x4010f9]"\\n00000056 2016-10-21 13:18:07.576 28921 28921 "ThreadList:\\n00000057 2016-10-21 13:18:07.576 28921 28921 "SIG: Segmentation fault(11), accessing 0000000000000000, IP=00007F0C36A0F19A"
\", \"post_time\": \"2016-10-21 17:24:15\" },\n\t{ \"post_id\": 18863, \"topic_id\": 3063, \"forum_id\": 10, \"post_subject\": \"Re: SOAP rpc error when using "ecl run" command in Linux\", \"username\": \"rtaylor\", \"post_text\": \"David,\\n\\nNo resolution that I'm aware of.(I see the Jira ticket https://track.hpccsystems.com/browse/HPCC-13971 mentioning this issue, but that ticket is currently unresolved.)
I'd suggest you add a comment to the above-named JIRA ticket.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-09-19 18:39:49\" },\n\t{ \"post_id\": 18853, \"topic_id\": 3063, \"forum_id\": 10, \"post_subject\": \"Re: SOAP rpc error when using "ecl run" command in Linux\", \"username\": \"Rohit\", \"post_text\": \"Hi,\\n\\nSince this issue still persists in the 'ecl run' procedure, the workaround that you may consider is the following:\\n\\n1. Immediately after starting the ECL job using 'ecl run' command, write bash script to find out the Workunit ID of your job by using the 'ecl getwuid' command.\\n\\n2. Remember, this next command 'ecl getwuid' will execute only after either the 'ecl run' command has successfully completed (or errored out) within 2 hours or the 'ecl run' command has exited due to the SOAP rpc timeout error after 2 hours. In either case, you will get the Workunit ID for your job.\\n\\n3. Then use a bash 'do while true' loop to keep checking the status of this ECL workunit every 'n' seconds (using the 'ecl status' command). This will allow you to control the next sequence of events based on the various statuses of the job (for example: running / failed / aborted / completed / compiling).\\n\\nHope this helps.\", \"post_time\": \"2017-09-19 18:24:52\" },\n\t{ \"post_id\": 18843, \"topic_id\": 3063, \"forum_id\": 10, \"post_subject\": \"Re: SOAP rpc error when using "ecl run" command in Linux\", \"username\": \"davidefanchini\", \"post_text\": \"Hi,\\n\\nis there a workaround for this error for computations that take more than 2 hours?\\n\\nThank you very much\", \"post_time\": \"2017-09-18 11:37:30\" },\n\t{ \"post_id\": 12323, \"topic_id\": 3063, \"forum_id\": 10, \"post_subject\": \"SOAP rpc error when using "ecl run" command in Linux\", \"username\": \"Rohit\", \"post_text\": \"Hi,\\n\\nI use "ecl run" command to execute ECL jobs in AWS Linux environment. I also use the option "--wait=86400000" with the "ecl run" command (to specify a 24-hour or 86,400,000 ms wait time).\\n\\nWhenever any ECL job (executed using the "ecl run" command) runs over 2 hours, the "ecl run" bash command returns the following error (however, the ECL job continues running on the cluster):\\nSOAP rpc error[errorCode = -6\\t message = timeout expired\\nTarget: C!111.11.11.111, Raised in: /var/lib/jenkins/workspace/CE-Candidate-5.4.6-1/CE/centos-7.0-x86_64/HPCC-Platform/system/jlib/jsocket.cpp, line 1600 ]
\\n\\nAny suggestions to avoid this error in this procedure call?\\n\\n(I see the Jira ticket https://track.hpccsystems.com/browse/HPCC-13971 mentioning this issue, but that ticket is currently unresolved.)\\n\\nThanks\", \"post_time\": \"2016-10-21 17:46:16\" },\n\t{ \"post_id\": 12533, \"topic_id\": 3103, \"forum_id\": 10, \"post_subject\": \"Re: LEFT/RIGHT ONLY Joins\", \"username\": \"rtaylor\", \"post_text\": \"Daniel,\\n\\nOK, if count is a search term in the INDEX and not a payload field, then I have to presume it is NOT the first element in the key. I also have to assume that the count field does contain some negative values (which I would not normally expect in a field named "count").\\n\\nBased on those assumptions, try your examples this way:JOIN(\\n DS,\\n IDX,\\n KEYED(LEFT.id=RIGHT.id AND RIGHT.count < 0),\\n TRANSFORM(...),\\n RIGHT ONLY\\n)
\\nand this://using KEYED and WILD to make sure the INDEX isn't treated as a DATASET\\nDS2 := IDX(KEYED(count>=0),WILD(id)); //including 0 here to create exact opposites\\nJOIN(\\n DS,\\n DS2,\\n LEFT.id=RIGHT.id,\\n TRANSFORM(...),\\n RIGHT ONLY\\n)
\\nIf these changes still produce different results, then let's go back to the beginning and tell me exactly what you're trying to accomplish and exactly what the structure of your INDEX declaration is. Also tell me exactly how the results differ between the two examples.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-10-26 14:01:18\" },\n\t{ \"post_id\": 12513, \"topic_id\": 3103, \"forum_id\": 10, \"post_subject\": \"Re: LEFT/RIGHT ONLY Joins\", \"username\": \"dsanchez\", \"post_text\": \"Count is part of the key and yeah I know they are not exact opposites, is just and example of the minimum amount of code needed to reproduce the case.\", \"post_time\": \"2016-10-26 08:34:00\" },\n\t{ \"post_id\": 12463, \"topic_id\": 3103, \"forum_id\": 10, \"post_subject\": \"Re: LEFT/RIGHT ONLY Joins\", \"username\": \"rtaylor\", \"post_text\": \"Daniel,\\n\\nIs IDX.count a payload field or a search term in the index?\\n\\nAlso, count < 0 and count > 0 are not exact opposites -- what about count = 0?\\n\\nRichard\", \"post_time\": \"2016-10-25 18:26:54\" },\n\t{ \"post_id\": 12443, \"topic_id\": 3103, \"forum_id\": 10, \"post_subject\": \"Re: LEFT/RIGHT ONLY Joins\", \"username\": \"dsanchez\", \"post_text\": \"I "negated" the condition since I expect the RIGHT ONLY Join to return what DOESN'T match the expression.\", \"post_time\": \"2016-10-25 17:41:27\" },\n\t{ \"post_id\": 12433, \"topic_id\": 3103, \"forum_id\": 10, \"post_subject\": \"Re: LEFT/RIGHT ONLY Joins\", \"username\": \"rtaylor\", \"post_text\": \"Daniel,\\n\\nYour example code and description are not in agreement.\\n\\nYou code says: RIGHT.count < 0\\n\\nBut your text says: ignoring the condition of "count > 0"\\n\\nIs this a typo, or the problem? \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-10-25 17:29:59\" },\n\t{ \"post_id\": 12413, \"topic_id\": 3103, \"forum_id\": 10, \"post_subject\": \"LEFT/RIGHT ONLY Joins\", \"username\": \"dsanchez\", \"post_text\": \"I wanted to ask a question and possible bug in the ONLY joins.\\n\\nI am trying to do a JOIN between a dataset and an index. I am trying to make it a keyed join obviously and for that I need the index on the right side but I am finding the following problem.\\n\\nThis code:\\n
\\nJOIN(\\n\\tDS,\\n\\tIDX,\\n\\tLEFT.id=RIGHT.id AND RIGHT.count < 0,\\n\\tTRANSFORM(...),\\n\\tRIGHT ONLY\\n)\\n
\\nAnd this code:\\n\\nDS2 := IDX(count>0);\\nJOIN(\\n\\tDS,\\n\\tDS2,\\n\\tLEFT.id=RIGHT.id,\\n\\tTRANSFORM(...),\\n\\tRIGHT ONLY\\n)\\n
\\n\\nThe first one is the "optimal" code since it uses the index for the join and at the same time \\nIs not returning the same results. It looks like the first one is returning everything that doesn't match left on "id" but is ignoring the condition of "count > 0". I don't know if this is the expected behavior since the documentation kind of says that (every record from RIGHT with no match on LEFT) but I was expecting that it was "every record on RIGHT that doesn't match the expression".\\n\\nCould I get some clarification on this so I can work around it or log a ticket on Jira?\", \"post_time\": \"2016-10-25 15:57:19\" },\n\t{ \"post_id\": 13373, \"topic_id\": 3191, \"forum_id\": 10, \"post_subject\": \"Re: Text File Processing\", \"username\": \"Allan\", \"post_text\": \"I find some checks are more easily done outside HPCC.\\nIf you have Linux then you have 'awk', a very useful (and fast) sub-system, where counting and identifying records with the wrong number of fields is a one liner.\\n(validating checksums on injested feeds is also easier with Shell scripts.)\\nYours\\n\\nAllan\", \"post_time\": \"2016-11-21 09:04:48\" },\n\t{ \"post_id\": 13093, \"topic_id\": 3191, \"forum_id\": 10, \"post_subject\": \"Re: Text File Processing\", \"username\": \"rtaylor\", \"post_text\": \"Vishnu,how does HPCC deal with missing fields in records?
If you mean in a CSV file, then there will always be a field delimiter that indicates the value is missing if the field is between fields that do have values. Missing fields at the end of a CSV record are ignored. \\n\\nBut how they are treated depends on how you define the field in your RECORD structure -- missing STRING fields will be blank, and missing numeric (INTEGER, UNSIGNED REAL, etc) fields will be zero.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-11-09 17:34:41\" },\n\t{ \"post_id\": 13083, \"topic_id\": 3191, \"forum_id\": 10, \"post_subject\": \"Re: Text File Processing\", \"username\": \"vchinta\", \"post_text\": \"Richard,\\n\\nThanks for the help. really appreciate it. Doing the training videos now . \\n\\nI had another question though, how does HPCC deal with missing fields in records?\", \"post_time\": \"2016-11-09 17:08:05\" },\n\t{ \"post_id\": 12883, \"topic_id\": 3191, \"forum_id\": 10, \"post_subject\": \"Re: Text File Processing\", \"username\": \"rtaylor\", \"post_text\": \"Vishnu,\\n\\nOK, you rproblem is that you sprayed the file as CSV file but you're declaring it as a THOR/FLAT file. You need to change the THOR option on your DATASET to CSV (ensuring you specify the correct delimiters).\\n\\nAnd, once again, this is exact issue covered in the first Intro to ECL class. Taking our online courses is NOT a waste of time.
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-11-07 19:40:35\" },\n\t{ \"post_id\": 12863, \"topic_id\": 3191, \"forum_id\": 10, \"post_subject\": \"Re: Text File Processing\", \"username\": \"vchinta\", \"post_text\": \"Richard,\\n\\n\\nIMPORT imagefeatures;\\nEXPORT File_TestImages := \\nDATASET('~vc::sampleimages::sample-hpcc',imagefeatures.test_images,THOR);\\n\\nWhen I run this, I get the following error\\nError: System error: 1: File //10.149.0.39/var/lib/HPCCSystems/hpcc-data/thor/vc/sampleimages/sample-hpcc._6_of_10 size is 134 which is not a multiple of 138 (in Disk Read G1 E2) (0, 0), 1, \\n\\nVishnu\", \"post_time\": \"2016-11-07 19:08:00\" },\n\t{ \"post_id\": 12791, \"topic_id\": 3191, \"forum_id\": 10, \"post_subject\": \"Re: Text File Processing\", \"username\": \"rtaylor\", \"post_text\": \"Vishnu,\\n\\nThose tutorial steps are specific for the fixed-length record flat file it's designed to work with. So you sprayed a CSV file, but did you make your DATASET declaration define it as a CSV file, or did you use THOR as the file type? Exactly what error are you getting?\\n\\nI'd suggest before going too much further that you go through our free online ECL courses here: https://learn.lexisnexis.com/hpcc. They will explain the fundamentals of ECL programming so you can then more easily extrapolate from our examples to your real-world problems.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-11-03 14:06:30\" },\n\t{ \"post_id\": 12781, \"topic_id\": 3191, \"forum_id\": 10, \"post_subject\": \"Re: Text File Processing\", \"username\": \"vchinta\", \"post_text\": \"Richard,\\n\\nI'm not sure what you man. I followed the tutorial on this link http://cdn.hpccsystems.com/releases/CE- ... .0.6-1.pdf\\n\\nSprayed the file onto cluster as a delimited file then followed steps on pages 11-14\\n\\nVishnu\", \"post_time\": \"2016-11-03 00:30:06\" },\n\t{ \"post_id\": 12771, \"topic_id\": 3191, \"forum_id\": 10, \"post_subject\": \"Re: Text File Processing\", \"username\": \"vchinta\", \"post_text\": \"Richard,\\nI'm not sure what you mean, I followed this tutorial,\\nhttp://cdn.hpccsystems.com/releases/CE- ... .0.6-1.pdf\\n\\nI sprayed the file onto the cluster as a delimited file and followed steps on pages 11-14\\n\\nVishnu\", \"post_time\": \"2016-11-02 20:05:11\" },\n\t{ \"post_id\": 12691, \"topic_id\": 3191, \"forum_id\": 10, \"post_subject\": \"Re: Text File Processing\", \"username\": \"rtaylor\", \"post_text\": \"Vishnu, OK
I tried to count number of records but the file doesnt seem to match the structure I have created
Exactly how does it not seem to match? You show the RECORD structure but not the DATASET declaration for the file -- that would help.\\n\\nRichard\", \"post_time\": \"2016-10-31 19:14:39\" },\n\t{ \"post_id\": 12681, \"topic_id\": 3191, \"forum_id\": 10, \"post_subject\": \"Re: Text File Processing\", \"username\": \"vchinta\", \"post_text\": \"Richard,\\n\\nApologies, I wasn't clear. The links dont contain the data, they're part of the data, the first two lines(from 008... till nature) is a sample record of the dataset.\\n\\nVishnu\", \"post_time\": \"2016-10-31 18:44:40\" },\n\t{ \"post_id\": 12671, \"topic_id\": 3191, \"forum_id\": 10, \"post_subject\": \"Re: Text File Processing\", \"username\": \"rtaylor\", \"post_text\": \"vchinta,\\n\\nYour links just show me pictures, not a CSV-type dataset. Please post some example records.\\n\\nRichard\", \"post_time\": \"2016-10-31 18:37:49\" },\n\t{ \"post_id\": 12651, \"topic_id\": 3191, \"forum_id\": 10, \"post_subject\": \"Text File Processing\", \"username\": \"vchinta\", \"post_text\": \"0000089f-90c4-35fa-a9fc-bdba845b2677\\thttp://d.yimg.com/sr/flcr/2/0000089f-90 ... ba845b2677\\thttp://www.flickr.com/photos/kiwinz/3527600132/\\tkiwinz\\t2009-05-13T08:48:19Z\\tFlown the coop\\tAttribution (CC BY 2.0) http://creativecommons.org/licenses/by/2.0/\\tnature\\n\\nThis is a sample record of a text file that I am trying to process. I sprayed it as a delimited file with \\\\t as the seperator.\\n\\nI tried to count number of records but the file doesnt seem to match the structure I have created. \\nEXPORT test_images := RECORD\\n\\tSTRING36 FileName;\\n\\tSTRING64 Url1;\\n STRING64 Url2;\\n\\tSTRING50 Username;\\n\\tSTRING22 Time;\\n STRING40 Caption;\\n STRING30 CC;\\n STRING10 Tag;\\nEnd;\\nThis might seem like a very simple question but I'd appreciate the help.\", \"post_time\": \"2016-10-31 16:20:50\" },\n\t{ \"post_id\": 12811, \"topic_id\": 3211, \"forum_id\": 10, \"post_subject\": \"Re: Spraying Multiple Files\", \"username\": \"rtaylor\", \"post_text\": \"John,I have an ECL job that can spray multiple .CSV files (by using a wildcard) to a single target file. Having not found any documentation on this, but I'm guessing that the order of files returned from the wildcard is the order of the data in the target file.
Since, AFAIK only DFUplus.exe allows wildcard use, I have to assume you're using DFUplus.exe to do your spray. Here's the PDF on that tool: http://cdn.hpccsystems.com/releases/CE-Candidate-6.0.6/docs/TheECLIDEandHPCCClientTools-6.0.6-1.pdfHere's my question: is there a way from within ECL to determine the file names and order in which they were sprayed to the target? It would help to be able to back-track a failed record to it's source file.
Pages 103 & 104 have the information you're looking for (filename{:length}).\\n\\nAll our HPCC/ECL docs are available for download here: https://hpccsystems.com/download/documentation\\n\\nYou should also look at the ECL Programmer's Guide article "Working with BLOBs" which deals directly with this wildcard spray scenario. The ECL Programmer's Guide is available in the .chm file from within the IDE (F1 is the "magic" key for this) and also downloadable in PDF format from the above-mentioned URL.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-11-03 17:35:00\" },\n\t{ \"post_id\": 12801, \"topic_id\": 3211, \"forum_id\": 10, \"post_subject\": \"Spraying Multiple Files\", \"username\": \"John Meier\", \"post_text\": \"I have an ECL job that can spray multiple .CSV files (by using a wildcard) to a single target file. Having not found any documentation on this, but I'm guessing that the order of files returned from the wildcard is the order of the data in the target file.\\nSo if I spray File1*.csv and on the LandingZone I have File01.csv, File10.csv, File11.csv, I'll only spray 2 files (File10 and File11).\\nIf the file contents are:\\nFile10\\n------\\nrecnbr5...Type1..person data\\nrecnbr6...Type2..person data\\n\\nFile11\\n------ \\nrecnbr1...Type1..person data\\nrecnbr2...Type2..person data\\n\\nOn the spray target , I'll get:\\nPersons\\n-------\\nrecnbr5...Type1..person data\\nrecnbr6...Type2..person data\\nrecnbr1...Type1..person data\\nrecnbr2...Type2..person data\\n\\nSo, within a different ECL job, I am now going to process "Persons". Type1 data are primary records, Type2 are secondary records. I ITERATE over the file appending keys that link the Type1 to it's corresponding Type2. As part of the key I include a file number (I can tell when the records change so I create a number 01, 02, etc.) and the record sequence number within that particular file.\\n\\nHere's my question: is there a way from within ECL to determine the file names and order in which they were sprayed to the target? It would help to be able to back-track a failed record to it's source file. Things I know when I execute the code would include the target file name and the date it was sprayed. I've looked in the Standard Library Reference and nothing stands out.\\n\\nThanks for the assist!\\nJohn\", \"post_time\": \"2016-11-03 15:18:06\" },\n\t{ \"post_id\": 13853, \"topic_id\": 3233, \"forum_id\": 10, \"post_subject\": \"Re: Do we have inbuilt telephone number parser in HPCC?\", \"username\": \"Allan\", \"post_text\": \"I used to use RegexBuddy https://www.regexbuddy.com/ to get my regular expressions straight, now I just use HPCC's REGEXFIND directly.\\n\\nThere is detailed information out there on parsing telephone numbers, try this http://blog.stevenlevithan.com/archives/validate-phone-number/comment-page-2 for a start.\", \"post_time\": \"2016-12-19 14:51:44\" },\n\t{ \"post_id\": 12913, \"topic_id\": 3233, \"forum_id\": 10, \"post_subject\": \"Re: Do we have inbuilt telephone number parser in HPCC?\", \"username\": \"rtaylor\", \"post_text\": \"Nilesh,\\n\\nAFAIK, there is no such standard parser included in the HPCC/ECL platform. You can use ECL's PARSE function to create one. \\n\\nBut if you're working within the RELX Group, then you should ask your HPCC/ECL colleagues if they've already written one that you can use.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-11-08 15:28:47\" },\n\t{ \"post_id\": 12873, \"topic_id\": 3233, \"forum_id\": 10, \"post_subject\": \"Do we have inbuilt telephone number parser in HPCC?\", \"username\": \"nileshdchavan\", \"post_text\": \"Hello,\\n\\nI'm working on parsing a telephone number in the data to format the numbers and represent them in one common standardized format for a particular country/locale.\\n\\nJust curious to know if there is any inbuilt package available to parse these telephone number and convert them to standardized format?\\n\\nYour help is appreciated. \\n\\nThank you.\\n-Nilesh Chavan.\", \"post_time\": \"2016-11-07 19:36:02\" },\n\t{ \"post_id\": 13283, \"topic_id\": 3243, \"forum_id\": 10, \"post_subject\": \"Re: Can we use SALT on VM?\", \"username\": \"rtaylor\", \"post_text\": \"Nilesh,\\n\\nThe simple way to answer the question would be to try it and see.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-11-16 17:05:22\" },\n\t{ \"post_id\": 12893, \"topic_id\": 3243, \"forum_id\": 10, \"post_subject\": \"Can we use SALT on VM?\", \"username\": \"nileshdchavan\", \"post_text\": \"Just wanted to know if the SALT can be installed and run on VM?\", \"post_time\": \"2016-11-07 21:19:43\" },\n\t{ \"post_id\": 13003, \"topic_id\": 3253, \"forum_id\": 10, \"post_subject\": \"Re: Do we have fuzzy matching readily available in HPCC?\", \"username\": \"nileshdchavan\", \"post_text\": \"Thanks Jim and Richard. This helps.\", \"post_time\": \"2016-11-08 19:08:03\" },\n\t{ \"post_id\": 12983, \"topic_id\": 3253, \"forum_id\": 10, \"post_subject\": \"Re: Do we have fuzzy matching readily available in HPCC?\", \"username\": \"JimD\", \"post_text\": \"If you are looking for a fuzzy match based upon phonetics, you should look at the Metaphone functions in the Standard Library.\\n\\nhttp://cdn.hpccsystems.com/releases/CE- ... f#page=104\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2016-11-08 19:02:23\" },\n\t{ \"post_id\": 12923, \"topic_id\": 3253, \"forum_id\": 10, \"post_subject\": \"Re: Do we have fuzzy matching readily available in HPCC?\", \"username\": \"rtaylor\", \"post_text\": \"Nilesh,\\n\\nAFAIK, there is no standard fuzzy name matching function included in the HPCC/ECL platform. \\n\\nBut if you're working within the RELX Group, then you should ask your HPCC/ECL colleagues if they've already written one that you can use.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-11-08 15:31:41\" },\n\t{ \"post_id\": 12903, \"topic_id\": 3253, \"forum_id\": 10, \"post_subject\": \"Do we have fuzzy matching readily available in HPCC?\", \"username\": \"nileshdchavan\", \"post_text\": \"Any specific functions or packages readily available in HPCC?\\n\\ni'm looking for the name matching.\", \"post_time\": \"2016-11-07 21:35:46\" },\n\t{ \"post_id\": 12973, \"topic_id\": 3263, \"forum_id\": 10, \"post_subject\": \"Re: Error uploading a file to landing zone\", \"username\": \"nileshdchavan\", \"post_text\": \"Gotcha. I just wanted to make sure i'm not doing anything wrong before i reach out to Admin . Thank you for your quick help!\", \"post_time\": \"2016-11-08 16:38:39\" },\n\t{ \"post_id\": 12963, \"topic_id\": 3263, \"forum_id\": 10, \"post_subject\": \"Re: Error uploading a file to landing zone\", \"username\": \"rtaylor\", \"post_text\": \"Nilesh,
[8007: c:/data//input_oc.csv.part[10.XXX.0.XX:7100] ERROR: RFSERR_RenameFailed(13) 'CFile::rename(c:/data//input_oc.csv.part, c:/data//input_oc.csv), Permission denied']
The error says: permission denied. Looks like it's probably a rights issue that you need to bring up to the system administrator.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-11-08 16:35:45\" },\n\t{ \"post_id\": 12953, \"topic_id\": 3263, \"forum_id\": 10, \"post_subject\": \"Re: Error uploading a file to landing zone\", \"username\": \"nileshdchavan\", \"post_text\": \"I'm not sure, i'm not editing the path and hence i dont know where that came from.\\n\\nhere are my settings before loading the files. See attached snapshot.\\n\\nWhen i click on the Upload Now button, i get the error i mentioned above.\", \"post_time\": \"2016-11-08 16:21:47\" },\n\t{ \"post_id\": 12943, \"topic_id\": 3263, \"forum_id\": 10, \"post_subject\": \"Re: Error uploading a file to landing zone\", \"username\": \"rtaylor\", \"post_text\": \"Nilesh,c:/data//input_oc.csv
Where did the double slash come from?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-11-08 16:04:31\" },\n\t{ \"post_id\": 12933, \"topic_id\": 3263, \"forum_id\": 10, \"post_subject\": \"Error uploading a file to landing zone\", \"username\": \"nileshdchavan\", \"post_text\": \"I'm trying to upload the input csv data file to landing zone to spray it later but getting following error -\\n\\n[8007: c:/data//input_oc.csv.part[10.XXX.0.XX:7100] ERROR: RFSERR_RenameFailed(13) 'CFile::rename(c:/data//input_oc.csv.part, c:/data//input_oc.csv), Permission denied']\\n\\nFile Name - input_oc.csv\\n\\nAm i missing anything ?\", \"post_time\": \"2016-11-08 15:57:30\" },\n\t{ \"post_id\": 13043, \"topic_id\": 3273, \"forum_id\": 10, \"post_subject\": \"Re: How do we despray logical file to landing zone in csv/ex\", \"username\": \"rtaylor\", \"post_text\": \"Nilesh,not sure how to despray the file as .csv file
Despray doesn't need any options for that. It desprays whatever kind of file it is AS whatever kind of file it is.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-11-09 14:34:28\" },\n\t{ \"post_id\": 13013, \"topic_id\": 3273, \"forum_id\": 10, \"post_subject\": \"How do we despray logical file to landing zone in csv/excl?\", \"username\": \"nileshdchavan\", \"post_text\": \"Hello,\\n\\nI want to save my logical file to spreadsheet (excel file) but my file has more than 50K records hence when i try to save that to .xls, it gives me following error -\\n\\n20064 \\t2016-11-08 21:32:03 GMT: The data set is too big to be converted to an Excel file. Please use the gzip link to download a compressed XML data file.\\n\\ni thought of using despray, and it was successfull but not sure how to despray the file as .csv file. The online video tutorials explains basic despray without any option.\\n\\nCan you please help with this? Thank you.\", \"post_time\": \"2016-11-08 21:34:10\" },\n\t{ \"post_id\": 13323, \"topic_id\": 3353, \"forum_id\": 10, \"post_subject\": \"Re: NodeMinRowsProcessed in the Graphs\", \"username\": \"rtaylor\", \"post_text\": \"georgeb2d,1. What does NodeMinRowsProcessed mean in 6.0?
This is similar to the way the skew numbers are evaluated -- the number of records that were processed on the one node that processed the fewest (for NodeMinRowsProcessed) or most (for NodeMaxRowsProcessed) rows of data. They definitely do NOT specify which node that is. I don't have a 5.0 system readily available to verify it, but my assumption is that this has not changed for 6.0 and that is also the way it was in 5.0.\\n2. Is the best way to determine if the distribution is not working well would be to compare the the number of average rows per node vs the maximum rows processed on a node vs the minimum rows processed? Also to look at the maximum and minimum skew?
Yes.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-11-17 14:45:33\" },\n\t{ \"post_id\": 13313, \"topic_id\": 3353, \"forum_id\": 10, \"post_subject\": \"NodeMinRowsProcessed in the Graphs\", \"username\": \"georgeb2d\", \"post_text\": \"I am confused about what this means in the graphs. \\n\\nAt one point I was told this is the minimum node that had data and NodeMaxRowsProcessed in the Graphs was the maximum node that had data in the graphs. So if NodeMinRowsProcessed had 2 in it and NodeMaxRowsProcessed had 317 in it for a 400 node cluster nodes 1, and nodes 318 to 400 had no data. That seemed to be the case for the 5.0 graphs. \\n\\nHowever, looking at graphing in 6.0 I find that NodeMinRowsProcessed and NodeMaxRowsProcessed seems to mean something different. NodeMinRowsProcessed seems to mean the node where the minimum rows were processed. NodeMasRowsProcessed seems to mean the node where the maximum rows were processed.\\n\\nI have graphs in 6.0 where the number for NodeMinRowsProcessed = 324 and the NodeMaxRowsProcessed = 106.\\n\\nSo two questions:\\n1. What does NodeMinRowsProcessed mean in 6.0? \\n\\n2. Is the best way to determine if the distribution is not working well would be to compare the the number of average rows per node vs the maximum rows processed on a node vs the minimum rows processed? Also to look at the maximum and minimum skew? \\n\\nI am open to suggestions.\", \"post_time\": \"2016-11-17 14:23:40\" },\n\t{ \"post_id\": 13773, \"topic_id\": 3383, \"forum_id\": 10, \"post_subject\": \"Re: Interesting grouping problem.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nInteresting. I'll have a think on it.\\n\\nThe key elements of this solution are:
Simple \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-12-14 04:37:55\" },\n\t{ \"post_id\": 13763, \"topic_id\": 3383, \"forum_id\": 10, \"post_subject\": \"Re: Interesting grouping problem.\", \"username\": \"Allan\", \"post_text\": \"Once again, Thanks Richard.\\n\\nThese will take a bit of time to digest. Just wondered if this would be a good class exercise, as its simple to state, yet tricky to implement. It would at least sort the men form the boys, and you may get back some interesting implementations. (I know which category I'd end up in) \\n\\nYours\\n\\nAllan\", \"post_time\": \"2016-12-13 20:05:59\" },\n\t{ \"post_id\": 13753, \"topic_id\": 3383, \"forum_id\": 10, \"post_subject\": \"Re: Interesting grouping problem.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nAnd a slightly more terse version that also numbers each group sequentially:
IMPORT Std;\\nd_rec := {unsigned1 id,UNSIGNED4 date};\\nd := DATASET([{1,20150101}\\n ,{2,20150201}\\n ,{3,20150301}\\n ,{4,20150401}\\n ,{5,20160229}\\n ,{6,20170101}\\n ,{7,20170102}\\n ,{8,20170103}\\n ,{9,20180601}],d_rec);\\nOutRec := RECORD\\n unsigned1 id;\\n DATASET(d_rec) Grp;\\nEND;\\n\\nRawGrps := PROJECT(d,TRANSFORM(OutRec,\\n SELF.Grp := d(Date BETWEEN LEFT.Date AND\\n Std.Date.AdjustCalendar(LEFT.Date,1)),\\n SELF := LEFT));\\nOutRec IterXF(OutRec L, OutRec R) := TRANSFORM\\n ds := PROJECT(R.Grp,TRANSFORM(d_rec,\\n SKIP(LEFT.id IN SET(L.Grp,id)),\\n SELF := LEFT));\\n SELF.ID := IF(EXISTS(ds),L.ID+1, SKIP);\\n SELF := R;\\nEND;\\n\\nJustGrps := ITERATE(RawGrps,IterXF(LEFT,RIGHT));\\nJustGrps;
\\nAnd, of course, you could turn it into a FUNCTION, like this:IMPORT Std;\\nd_rec := {UNSIGNED4 id,UNSIGNED4 date};\\nGroupWithinYear(DATASET(d_rec) d) := FUNCTION\\n OutRec := RECORD\\n UNSIGNED4 id;\\n DATASET(d_rec) Grp;\\n END;\\n RawGrps := PROJECT(d,TRANSFORM(OutRec,\\n SELF.Grp := d(Date BETWEEN LEFT.Date AND\\n Std.Date.AdjustCalendar(LEFT.Date,1)),\\n SELF := LEFT));\\n OutRec IterXF(OutRec L, OutRec R) := TRANSFORM\\n ds := PROJECT(R.Grp,TRANSFORM(d_rec,\\n SKIP(LEFT.id IN SET(L.Grp,id)),\\n SELF := LEFT));\\n SELF.ID := IF(EXISTS(ds),L.ID+1, SKIP);\\n SELF := R;\\n END;\\n JustGrps := ITERATE(RawGrps,IterXF(LEFT,RIGHT));\\n RETURN JustGrps;\\nEND;\\t\\n\\nds := DATASET([{1,20150101}\\n ,{2,20150201}\\n ,{3,20150301}\\n ,{4,20150401}\\n ,{5,20160229}\\n ,{6,20170101}\\n ,{7,20170102}\\n ,{8,20170103}\\n ,{9,20180601}],d_rec);\\n\\nGroupWithinYear(ds);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-12-13 07:55:46\" },\n\t{ \"post_id\": 13733, \"topic_id\": 3383, \"forum_id\": 10, \"post_subject\": \"Re: Interesting grouping problem.\", \"username\": \"Allan\", \"post_text\": \"Brilliant Richard!\\n\\nThanks very much.\\nYours\\nAllan\", \"post_time\": \"2016-12-12 07:34:33\" },\n\t{ \"post_id\": 13701, \"topic_id\": 3383, \"forum_id\": 10, \"post_subject\": \"Re: Interesting grouping problem.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nOK, here's my take on getting to where you want to be:IMPORT Std;\\nd := DATASET([{1,20150101}\\n ,{2,20150201}\\n ,{3,20150301}\\n ,{4,20150401}\\n ,{5,20160229}\\n ,{6,20170101}\\n ,{7,20170102}\\n ,{8,20170103}\\n ,{9,20180601}],{unsigned1 id,UNSIGNED4 date});\\nd_rec := RECORDOF(d);\\nOutRec := RECORD\\n unsigned1 id;\\n DATASET(d_rec) Grp;\\nEND;\\n\\nOutRec XF(d L) := TRANSFORM\\n SELF.Grp := d(Date BETWEEN L.Date AND Std.Date.AdjustCalendar(L.Date,1));\\n SELF := L;\\nEND;\\nRawGrps := PROJECT(d,XF(LEFT));\\n\\nOutRec IterXF(OutRec L, OutRec R) := TRANSFORM\\n FullSet := SET(L.Grp,id);\\n d_rec ProjXF(d_rec Chld) := TRANSFORM,SKIP(Chld.id IN FullSet)\\n SELF := Chld;\\n END;\\t\\t\\n SELF.ID := IF(EXISTS(PROJECT(R.Grp,ProjXF(LEFT))),R.ID, SKIP);\\n SELF := R;\\nEND;\\n\\nJustGrps := ITERATE(RawGrps,IterXF(LEFT,RIGHT));\\nJustGrps;
I'm nesting a PROJECT inside an ITERATE to SKIP those RawGrps whose child datasets are simply subsets of the previous Group. The result is exactly what your example suggested it should be. Hopefully this technique will be applicable to your real problem. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-12-07 11:19:01\" },\n\t{ \"post_id\": 13671, \"topic_id\": 3383, \"forum_id\": 10, \"post_subject\": \"Re: Interesting grouping problem.\", \"username\": \"Allan\", \"post_text\": \"Well,\\n\\nItems 1,2,3 and 4 are obviously within one group as they are all within 1 year of each other.\\nItems 3,4 and 5 are also within 1 year of each other but items 1 and 2 are not within that group as they are too early.\\nItems 5,6,7 and 8 are within 1 year of each other, but all previous items are too early to be in this group.\\nItem 9 is a soul member of a group as no other dates are within 1 year of it.\", \"post_time\": \"2016-12-07 08:42:01\" },\n\t{ \"post_id\": 13493, \"topic_id\": 3383, \"forum_id\": 10, \"post_subject\": \"Re: Interesting grouping problem.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,
one has to take this result and somehow detect that group 3 contains an entry that can not below with an entry in group 1.
What exactly is the criteria for determining that? I didn't detect a pattern to your example result groups.\\n\\nRichard\", \"post_time\": \"2016-11-30 13:08:43\" },\n\t{ \"post_id\": 13483, \"topic_id\": 3383, \"forum_id\": 10, \"post_subject\": \"Re: Interesting grouping problem.\", \"username\": \"Allan\", \"post_text\": \"Thanks for this Richard,\\n\\nI was, in fact, generating something like this but it does not generate the groups as set out above, In the results from our example one has to take this result and somehow detect that group 3 contains an entry that can not below with an entry in group 1.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2016-11-30 10:25:27\" },\n\t{ \"post_id\": 13443, \"topic_id\": 3383, \"forum_id\": 10, \"post_subject\": \"Re: Interesting grouping problem.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nI wouod approach it something like this:IMPORT Std;\\nd := DATASET([{1,20150101}\\n ,{2,20150201}\\n ,{3,20150301}\\n ,{4,20150401}\\n ,{5,20160229}\\n ,{6,20170101}\\n ,{7,20170102}\\n ,{8,20170103}\\n ,{9,20180601}],{unsigned1 id,UNSIGNED4 date});\\n\\nOutRec := RECORD\\n unsigned1 id;\\n DATASET(RECORDOF(d)) Grp;\\nEND;\\n\\nOutRec XF(d L) := TRANSFORM\\n SELF.Grp := d(Date BETWEEN L.Date AND Std.Date.AdjustCalendar(L.Date,1));\\n SELF := L;\\nEND;\\nPROJECT(d,XF(LEFT));
\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-11-29 14:49:18\" },\n\t{ \"post_id\": 13353, \"topic_id\": 3383, \"forum_id\": 10, \"post_subject\": \"Interesting grouping problem.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI'm been working on this for a couple of days now and, to be frank, its got me beat.\\nI need to group a set of records that are all within 1 year of each other, so in the example below:\\n\\nd := DATASET([{1,20150101}\\n ,{2,20150201}\\n ,{3,20150301}\\n ,{4,20150401}\\n ,{5,20160229}\\n ,{6,20170101}\\n ,{7,20170102}\\n ,{8,20170103}\\n ,{9,20180601}],{unsigned1 id,UNSIGNED4 date});\\n
\\nThe records should end up grouped thus:\\n\\n1,2,3,4\\n3,4,5\\n5,6,7,8\\n9\\n
\\nNote that records 3,4 and 5 end up on more than 1 group.\\nWith JOINS I can match records within 1 year and those that are definitely in separate groups, but that does not get me any further into the grouping process.\\n\\nSo simplify a bit, the problem can be stated as just numbers where records within a numeric range are in a group.\\nI'm wondering if the dreaded GRAPH or LOOP functions get involved?\\n\\nAny road up, any help/pointers would be gratefully received.\\nCheers\\nAllan\", \"post_time\": \"2016-11-20 08:56:25\" },\n\t{ \"post_id\": 13593, \"topic_id\": 3423, \"forum_id\": 10, \"post_subject\": \"Re: Finding Documentation for Available Services and Librari\", \"username\": \"rtaylor\", \"post_text\": \"John,\\n\\nInstalling the latest ECL IDE always gets you the latest help file (press F1 in any editor window) and that help file contains the latest ECL Language Reference, Programmer's Guide, and Service Library Reference. \\n\\nThe Service Library Reference documents most of the library functions. Any not already documented there have JavaDoc information in their respective code files (look under the "ecllibrary" folder in your Repository).\\n\\nAnd, of course, all the latest documentation is always available for download here: https://hpccsystems.com/download/documentation\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-12-03 10:07:47\" },\n\t{ \"post_id\": 13583, \"topic_id\": 3423, \"forum_id\": 10, \"post_subject\": \"Finding Documentation for Available Services and Libraries\", \"username\": \"John Meier\", \"post_text\": \"I've been looking at some of the libraries included with our HPCC installation and I find it amazing at what's available. Only trouble is, I cannot find any documentation on this stuff (what it does / how to use it...). I could have used some help with node() in lib_thorlib.thorlib or SprayVariable in lib_fileservices.fileservices. Where is the (or should I ask "Is there any") documentation on this stuff?\\n\\nJohn\", \"post_time\": \"2016-12-02 18:39:01\" },\n\t{ \"post_id\": 13653, \"topic_id\": 3443, \"forum_id\": 10, \"post_subject\": \"Re: Post Join Sort Keep(2)\", \"username\": \"newportm\", \"post_text\": \"Hey Richard,\\n\\nThat is what I ended up doing, I was just looking for a way to utilize a post join sort and the keep option of the join. Without having to do the extra sort and dedup.\\n\\nThanks for your confirmation. \\n\\nTN\", \"post_time\": \"2016-12-06 12:46:13\" },\n\t{ \"post_id\": 13643, \"topic_id\": 3443, \"forum_id\": 10, \"post_subject\": \"Re: Post Join Sort Keep(2)\", \"username\": \"rtaylor\", \"post_text\": \"newportm,\\n\\nHere's how I would do that:Titles := DATASET([{'Dr',1},{'Esq',2},{'Mr',3},\\n {'Mrs',4},{'Ms',5}],{STRING3 Title,UNSIGNED1 Ranking});\\nContacts := DATASET([{1,'Fred','Mr'},{1,'Sue','Dr'},{1,'Jo','Mrs'},\\n {2,'Fred','Ms'},{2,'Sue','Dr'},{2,'Jo','Mrs'}],\\n {UNSIGNED1 CmpID, STRING5 Name, STRING3 Title});\\n\\nJ1 := JOIN(Contacts,Titles,LEFT.Title=RIGHT.Title);\\nDEDUP(SORT(J1,CmpID,Ranking),LEFT.CmpID=RIGHT.CmpID,KEEP(2));
\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-12-06 05:43:53\" },\n\t{ \"post_id\": 13633, \"topic_id\": 3443, \"forum_id\": 10, \"post_subject\": \"Post Join Sort Keep(2)\", \"username\": \"newportm\", \"post_text\": \"Table A has a list of contacts including Name and Title\\n\\nTable B is a lookup table with contact title ranked.\\n\\nI need to join contact name table with the title table and keep only the top 2 ranked contacts by title with as few steps as possible.\\n\\nI was looking at using an Inner JOIN , Ordered TRUE , Keep(2)\\n\\nHowever, I'm not sure how to sort the result for the keep2 to select only the highest ranked contacts. Note They are not always 1 and 2 on the ranked value.\\n\\nThanks\", \"post_time\": \"2016-12-05 16:02:13\" },\n\t{ \"post_id\": 13953, \"topic_id\": 3493, \"forum_id\": 10, \"post_subject\": \"Re: Fully Qualified String Field Definitions\", \"username\": \"rtaylor\", \"post_text\": \"Chuck,\\n\\nYes, absolutely. If the string field is sparsely populated but possibly long when it does have data, then STRING is the way I would go.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-01-03 16:13:07\" },\n\t{ \"post_id\": 13933, \"topic_id\": 3493, \"forum_id\": 10, \"post_subject\": \"Re: Fully Qualified String Field Definitions\", \"username\": \"chuck.beam\", \"post_text\": \"Thanks Richard,\\n\\nBased on your comments, I should use the unqualified STRING for my current code.\\n\\nThe fields are defined in the raw data as STRING255, but they rarely contains any data at all (most records are blank).\\n\\nThis would require the least amount of space on disk and in memory, correct?\\n\\nChuck\", \"post_time\": \"2017-01-03 16:08:30\" },\n\t{ \"post_id\": 13923, \"topic_id\": 3493, \"forum_id\": 10, \"post_subject\": \"Re: Fully Qualified String Field Definitions\", \"username\": \"rtaylor\", \"post_text\": \"Chuck,Am I better off to fully qualify my STRING fields or allow them to be variable length?
The standard answer to this question is: With regards to my question, does either option affect the size of my dataset in memory or on disk?
Yes, both affect the storage and memory size:
Therefore, if you define a string field that always contains 2 characters as a STRING2 it occupies two bytes of storage, but define it as a STRING and it will occupy six.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-01-03 16:02:52\" },\n\t{ \"post_id\": 13863, \"topic_id\": 3493, \"forum_id\": 10, \"post_subject\": \"Fully Qualified String Field Definitions\", \"username\": \"chuck.beam\", \"post_text\": \"I have a question regarding STRING field definitions.\\n\\nAm I better off to fully qualify my STRING fields or allow them to be variable length?\\n\\nFor example I am working with a data file called BuildFax which contains multiple string data elements which can be up to 1000 characters in length.\\n\\nWhen I define the ECL fields as STRING1000 the strings are padded and difficult to view in ECL Watch.\\n\\nIf I define the ECL fields simply as STRING, the string fields are adjusted to the lengthof the field value and much easier to read in ECL Watch.\\n\\nWith regards to my question, does either option affect the size of my dataset in memory or on disk?\\n\\nWhat is the best practice I should follow?\\n\\nThanks\\nChuck\", \"post_time\": \"2016-12-19 16:24:24\" },\n\t{ \"post_id\": 13963, \"topic_id\": 3513, \"forum_id\": 10, \"post_subject\": \"Re: No physical file part for logical file\", \"username\": \"rtaylor\", \"post_text\": \"Janet,\\n\\nIt just worked fine on my 3-node training cluster on 6.0.4-1 so my first question is: since you're running this test on a 400-node cluster, are there at less than 400 records in that file? If so, try it again with a much larger file and see if that fixes it. Either way, a JIRA report is in order.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-01-03 16:27:54\" },\n\t{ \"post_id\": 13893, \"topic_id\": 3513, \"forum_id\": 10, \"post_subject\": \"No physical file part for logical file\", \"username\": \"janet.anderson\", \"post_text\": \"Hello,\\n\\nI'm helping a team member with a piece of code that gives the following error: "Error: System error: 10004: Graph[1], SLAVE #9 [...]: Graph[1], csvread[2]: No physical file part for logical file in::myproject::insourcetest, found at given locations: //.../var/lib/HPCCSystems/hpcc-data/thor/in/myproject/insourcetest._321_of_400 (Error = 115) (0, 0)"\\n\\nThe code is below (with IPs masked):\\n\\nrec := record\\nstring x;\\nend;\\n\\nfile := dataset('~in::myproject::insourcetest',rec,csv);\\n\\nfile;\\n\\nout := output(file,, '~thor::temp',\\n csv(heading(0), separator(','), quote([]), terminator(['\\\\n', '\\\\r\\\\n']), maxlength(418)), compressed, overwrite); \\n\\t\\t\\t\\ndespray := FileServices.DeSpray('~thor::temp', '99.999.99.99',\\n '/home/mypath/insourcetest', -1,\\n 'http://11.111.11.111:8010/FileSpray', , TRUE); \\n \\nsequential(out, despray);
\\n\\nThe first thing I notice is that they are taking a csv file, creating a dataset, then creating a new csv file from the dataset. But although it's not optimal, I don't see why it should throw an error. When I run the code piecemeal, I get the error at the point of the output. Also, I tried modifying the output statement to be csv with no options and a flat file and still got errors. Can you help my team understand why we get this error and how to resolve it?\", \"post_time\": \"2016-12-22 17:41:12\" },\n\t{ \"post_id\": 14513, \"topic_id\": 3553, \"forum_id\": 10, \"post_subject\": \"Re: Error trying to despray images contained in BLOB\", \"username\": \"afarrell\", \"post_text\": \"Hi Attila,\\n\\nThank you for your help, based on your example I was able to find what was at fault with my despray strategy:\\n* I should project & transform my dataset to the simple 2 field layout ("filename" and "geotiff") as you have in "imageRecord"\\n* I should despray to the landingzone drop folder and not despray to my home folder on the same server as the landing zone, this is probably due to permissions of the hpcc user\\n* I should use dstip to give the IP Address of the landingzone I am despraying to\\n* I should have asterix "*" at the end of my dstfile argument /data/HPCCSystems/dropzone/imageout/*\\n\\nI am despraying the image files as expected now.\\n\\nMy final despray command was like this (with some details redacted):\\n\\n\\ndfuplus username=USER password=PASSWORD action=despray server=http://HOST:9010 dstfile=/data/HPCCSystems/dropzone/imageout/* dstip=LANDINGZONE_IP_ADDRESS srcname=ati::test_images_with_prefix SPLITPREFIX=filename,filesize overwrite=1\\n
\\n\\nBest regards,\\n\\n-Andrew\", \"post_time\": \"2017-01-12 14:10:35\" },\n\t{ \"post_id\": 14503, \"topic_id\": 3553, \"forum_id\": 10, \"post_subject\": \"Re: Error trying to despray images contained in BLOB\", \"username\": \"AttilaV\", \"post_text\": \"Hi,\\n\\nAny progress in this issue?\\n\\nAttila\", \"post_time\": \"2017-01-12 13:27:44\" },\n\t{ \"post_id\": 14423, \"topic_id\": 3553, \"forum_id\": 10, \"post_subject\": \"Re: Error trying to despray images contained in BLOB\", \"username\": \"AttilaV\", \"post_text\": \"Hi,\\n\\nSo, I built a 5.6.8-1Debug version on my dev environment and played with images, blobs and DFUPlus. Here are some result:\\n\\nTo spray a directory with images (in my case there were 9 png files) I used this command:\\n\\ndfuplus action=spray server=. srcip=. srcfile='/var/lib/HPCCSystems/mydropzone/images/*' dstname=ati::test_images_with_prefix dstcluster=mythor PREFIX=filename,filesize overwrite=1 nosplit=1
\\n\\n(the '.' in server and srcip hase same effect then any real IP addres and similar to http://localhost:8010 in my sys)\\n\\n\\nTo read the file in ECL code (in ECL Playgound):\\n/*\\n Example code - use without restriction. \\n*/\\nimageRecord := RECORD\\n string filename;\\n data geotiff;\\nEND;\\nimageData := DATASET('~ati::test_images_with_prefix',imageRecord,FLAT);\\nimageData;\\n
\\nAnd finally despray images from the logical file:\\n\\ndfuplus action=despray server=http://localhost:8010 dstfile=despray-images-prefix-test srcname=ati::test_images_with_prefix SPLITPREFIX=filename,filesize overwrite=1
\\n\\nIMPORTANT: Spray without filesize prefix generates wrong file structure, which is not readable and desprayable.\\n\\nI hope this helps. If you have more question or problem please don't hesitate.\", \"post_time\": \"2017-01-10 15:50:50\" },\n\t{ \"post_id\": 14413, \"topic_id\": 3553, \"forum_id\": 10, \"post_subject\": \"Re: Error trying to despray images contained in BLOB\", \"username\": \"afarrell\", \"post_text\": \"hi,\\n\\nthe filename was changed when in pasting into the forum, but the same error is seen when both spray-in and despray-out logical filenames match. I get the same error also when changing the SPLITPREFIX to SPLTPREFIX=FILENAME.\\n\\nDo you have an example of spraying a folder of JPEG images stored on the landingzone into a dataset (one row per image and each stored in a DATA BLOB field) and then despraying the same dataset of JPEG images back to another folder of the landing zone?\\n\\nI am happy that the spray action is working for me but I would really benefit from a worked example for the despray action.\", \"post_time\": \"2017-01-10 14:47:19\" },\n\t{ \"post_id\": 14223, \"topic_id\": 3553, \"forum_id\": 10, \"post_subject\": \"Re: Error trying to despray images contained in BLOB\", \"username\": \"AttilaV\", \"post_text\": \"Hi,\\n\\nIn the first post the despray source (logical) file name was: '~myusername::despray::tiles'. \\nIn the spray which creates a logical file of images blobs uses 'custname::tiles::geodataset' as target.\\n\\nIt looks like to me the target of spray and source of despray isn't the same. Some processing happened between the spray and despray? I suspect this because the spray uses 'PREFIX=FILENAME,FILESIZE', but the file dump, which I received contains only file names as prefixes.\\n\\nAttila\", \"post_time\": \"2017-01-09 17:28:45\" },\n\t{ \"post_id\": 14123, \"topic_id\": 3553, \"forum_id\": 10, \"post_subject\": \"Re: Error trying to despray images contained in BLOB\", \"username\": \"afarrell\", \"post_text\": \"Hi,\\n\\nOk, two more questions:\\n\\n1. May I get the command used to spray those images into HPCC?\\n\\ndfuplus action=spray server=http://host:9010 username=myusername overwrite=1 replicate=1 srcip=mysrcIP srcfile=/data/HPCCSystems/dropzone/image_tiles/*.tif dstcluster=thor03_20way dstname=custname::tiles::geodataset PREFIX=FILENAME,FILESIZE nosplit=1
\\n\\n2. May I get the dump at the beginning (1~2kb) of the first file part (I presume it is called something like this /var/lib/HPCCSYStems/hpcc-data/<cluster_and_or_path>/myusername/despray/tiles._1_of_X - where X is the number of parts generated.)\\n\\nI will email this to you.\\n\\nThanks\", \"post_time\": \"2017-01-09 15:32:49\" },\n\t{ \"post_id\": 14113, \"topic_id\": 3553, \"forum_id\": 10, \"post_subject\": \"Re: Error trying to despray images contained in BLOB\", \"username\": \"AttilaV\", \"post_text\": \"Hi,\\n\\nOk, two more questions:\\n\\n1. May I get the command used to spray those images into HPCC?\\n\\n2. May I get the dump at the beginning (1~2kb) of the first file part (I presume it is called something like this /var/lib/HPCCSYStems/hpcc-data/<cluster_and_or_path>/myusername/despray/tiles._1_of_X - where X is the number of parts generated.)\\n\\nThanks\", \"post_time\": \"2017-01-09 15:24:05\" },\n\t{ \"post_id\": 14093, \"topic_id\": 3553, \"forum_id\": 10, \"post_subject\": \"Re: Error trying to despray images contained in BLOB\", \"username\": \"afarrell\", \"post_text\": \"Same problem unfortunately\\n\\n\\n#!/bin/bash\\nif [ $# -eq 0 ]\\n then\\n echo "usage: despray_raster.sh [IP] [OUT_FOLDER] [LOGICAL_FILE] [USERNAME]"\\n else\\n read -s -p "Enter Password: " mypassword\\n echo ""\\n echo "Running dfuplus"\\n dfuplus password=$mypassword username="$4" action=despray server="$1" dstfile="$2" srcname="$3" SPLITPREFIX=FILENAME,FILESIZE \\nfi\\n
\\n\\nResults in \\n\\n\\nSubmitted WUID D20170109-150717\\nD20170109-150717 status: queued\\nFailed: DFUWU: cannot construct part file name\\n
\", \"post_time\": \"2017-01-09 15:10:15\" },\n\t{ \"post_id\": 14083, \"topic_id\": 3553, \"forum_id\": 10, \"post_subject\": \"Re: Error trying to despray images contained in BLOB\", \"username\": \"AttilaV\", \"post_text\": \"Hi, \\nMay I ask you to try despray with 'SPLITPREFIX=FILENAME,[color=#FF0000:2ye2f9c6]FILESIZE' in your DFUPlus command?\\n\\nThanks\", \"post_time\": \"2017-01-09 15:04:59\" },\n\t{ \"post_id\": 14073, \"topic_id\": 3553, \"forum_id\": 10, \"post_subject\": \"Re: Error trying to despray images contained in BLOB\", \"username\": \"afarrell\", \"post_text\": \"Thanks Attila\\n\\n-A\", \"post_time\": \"2017-01-09 14:40:21\" },\n\t{ \"post_id\": 14063, \"topic_id\": 3553, \"forum_id\": 10, \"post_subject\": \"Re: Error trying to despray images contained in BLOB\", \"username\": \"AttilaV\", \"post_text\": \"Hi,\\n\\nThank you for your reply. \\nI will try to reproduce the situation (spray some images into my dev environment and then despray them) later today and I will come back my findings.\\n\\nAttila\", \"post_time\": \"2017-01-09 14:33:55\" },\n\t{ \"post_id\": 14053, \"topic_id\": 3553, \"forum_id\": 10, \"post_subject\": \"Re: Error trying to despray images contained in BLOB\", \"username\": \"afarrell\", \"post_text\": \"Hi Attila,\\n\\nQ. What is your aim with that desprays? \\nA. to extract the image data stored in the BLOB fields and reconstitute each BLOB as an individual image file on the landing zone. \\ne.g. \\ndataset:\\ndataset([{'myimage1.tif',234234,49492A004A05000012000001030001000000000100000101030001.....}], { string filename, integer8 filesize, data geotiff{blob} }); \\ngets extracted as :\\n/data/HPCCSystems/dropzone/my_raster_despray/myimage1.tif\\n\\nQ. What kind of target file structure do you expect/aim? \\nA. Image file >> https://en.wikipedia.org/wiki/GeoTIFF\\n\\nQ. How do you want to use the generated target?\\nA. We will be viewing the images in a GIS tool we use\\n\\n...\\n\\nSome background:\\nWe spray > 0.5 million geospatial raster image tiles into HPCC, each pixel in these images is associated a specific numerical value correlating to risk severity in a model (e.g. flood risk). We perform spatial filtering in HPCC and retrieve pixel values for locations (longitude,latitude) that match our criteria e.g. a point on the map. This is in production and actively used by customers via ROXIE and by data engineers in THOR.\\n\\nThe reason I want to despray is that I may want to filter the dataset and retrieve 4 images out of 0.5 millon and view the images as layers on a map to debug issues and perform data quality analysis.\", \"post_time\": \"2017-01-09 14:26:21\" },\n\t{ \"post_id\": 14033, \"topic_id\": 3553, \"forum_id\": 10, \"post_subject\": \"Re: Error trying to despray images contained in BLOB\", \"username\": \"AttilaV\", \"post_text\": \"Hi,\\n\\nWhat is your aim with that desprays? What kind of target file structure do you expect/aim? How do you want to use the generated target?\\n\\nThanks\\n\\nAttila\", \"post_time\": \"2017-01-09 13:56:40\" },\n\t{ \"post_id\": 14003, \"topic_id\": 3553, \"forum_id\": 10, \"post_subject\": \"Re: Error trying to despray images contained in BLOB\", \"username\": \"afarrell\", \"post_text\": \"Thanks Bob,\\n\\nwe are currently using 5.6.8-1\\n\\n-A\", \"post_time\": \"2017-01-06 10:26:36\" },\n\t{ \"post_id\": 13993, \"topic_id\": 3553, \"forum_id\": 10, \"post_subject\": \"Re: Error trying to despray images contained in BLOB\", \"username\": \"bforeman\", \"post_text\": \"Hi Andy,\\n\\nYour syntax looks good here, I forwarded your post to the development team for review. If you don't hear anything soon, I would create a new issue in the Issue Tracking System. What is the server version?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2017-01-05 18:58:26\" },\n\t{ \"post_id\": 13983, \"topic_id\": 3553, \"forum_id\": 10, \"post_subject\": \"Error trying to despray images contained in BLOB\", \"username\": \"afarrell\", \"post_text\": \"Hi,\\n\\nI am trying to despray image files contained in a blob field, each blob field (geotiff) contains one TIFF image. Any suggestions? I want to populate a directory with 1 image per row in the source dataset.\\n\\nI get the following error when I try to despray:\\nFailed: DFUWU: cannot construct part file name\\n\\nthis is my dfuplus shell script\\n\\nUsage:\\n./despray_raster.sh "http://host:9010" "/data/HPCCSystems/dropzone/my_raster_despray/" "~myusername::despray::tiles" MyUsername
\\n\\n\\n#!/bin/bash\\nif [ $# -eq 0 ]\\n then\\n echo "usage: despray_raster.sh [IP] [OUT_FOLDER] [LOGICAL_FILE] [USERNAME]"\\n else\\n read -s -p "Enter Password: " mypassword\\n echo ""\\n echo "Running dfuplus"\\n dfuplus password=$mypassword username="$4" action=despray server="$1" dstfile="$2" srcname="$3" SPLITPREFIX=FILENAME \\nfi
\\n\\nThe dataset layout\\n{ string filename, integer8 filesize, data geotiff{blob} };
\", \"post_time\": \"2017-01-04 11:39:19\" },\n\t{ \"post_id\": 17041, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"rtaylor\", \"post_text\": \"Vishnu,... filesize, which defaults to a 4-byte integer
Try it this way:imageRecord := RECORD\\n string filename;\\n INTEGER4 RecPos;\\n DATA image; \\nEND;\\nimageData := DATASET('~vchinta::testimages2',imageRecord,FLAT);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-05-11 14:01:51\" },\n\t{ \"post_id\": 17031, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"vchinta\", \"post_text\": \"So line 515 column 0 indicates that the problem is introduced somewhere before that line.
\\n\\nFixed the errors, one of the function names was the issue. \\nPage 63 of the ClientTools.PDF (http://cdn.hpccsystems.com/releases/CE- ... 2.10-1.pdf) documents the DFUplus spray options and the one you're interested in is filesize, which defaults to a 4-byte integer. You can define that in your ECL just as you did the filename.\\n
\\n\\nI tried this but filesize does not work. \\n\\nThis is the code I'm using to spraydfuplus action=spray srcip=10.149.0.39 srcfile=/var/lib/HPCCSystems/mydropzone/*.jpeg server=**** username=vchinta password=**** dstname=vchinta::testimages2 dstcluster=mythor PREFIX=filesize,filename overwrite=1 nosplit=1
\\nand to readimageRecord := RECORD\\n\\tstring filename;\\n\\tUNSIGNED8 RecPos{virtual(filesize)};\\n\\tDATA image; \\nEND;\\nimageData := DATASET('~vchinta::testimages2',imageRecord,FLAT);
\\n\\nDid I make any mistake?\\n\\nThanks,\\nVishnu\", \"post_time\": \"2017-05-11 13:48:33\" },\n\t{ \"post_id\": 16971, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"rtaylor\", \"post_text\": \"Vishnu, I'm also trying to get this done using the external service implementation in HPCC. But it won't let me pass DATA as a parameter and it won't let me typecast DATA to STRING or UNICODE which according to the documentation should be possible. Is there a solution to this problem?
I suggest that you submit a JIRA ticket for this problem and see what the developers have to say.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-05-08 17:38:41\" },\n\t{ \"post_id\": 16961, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"vchinta\", \"post_text\": \"So line 515 column 0 indicates that the problem is introduced somewhere before that line.
\\n\\nOh thanks, I'll try to fix that. I'm also trying to get this done using the external service implementation in HPCC. But it won't let me pass DATA as a parameter and it won't let me typecast DATA to STRING or UNICODE which according to the documentation should be possible. Is there a solution to this problem? Somehow I can pass DATA as a parameter to the external service .SO\\n\\nVishnu\", \"post_time\": \"2017-05-08 16:13:15\" },\n\t{ \"post_id\": 16951, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"rtaylor\", \"post_text\": \"Vishnu,\\n\\nIt's a one-pass compiler, so the first error is the only one you need to worry about (everything else could be "domino effect"):Error: expected unqualified-id before string constant (515, 0), 6003, jpeg_decoder.h
So line 515 column 0 indicates that the problem is introduced somewhere before that line.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-05-08 14:49:19\" },\n\t{ \"post_id\": 16941, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"vchinta\", \"post_text\": \"Richard, I saved it in the same directory where HPCC saves the generated cpp files and that worked. But now it gives me these erros\\nError: expected unqualified-id before string constant (515, 0), 6003, jpeg_decoder.h\\nError: ‘"ll"’ cannot be used as a function (528, 0), 6003, jpeg_decoder.h\\nError: ‘"ll"’ cannot be used as a function (529, 0), 6003, jpeg_decoder.h\\nError: ‘"ll"’ cannot be used as a function (530, 0), 6003, jpeg_decoder.h\\nError: ‘"ll"’ cannot be used as a function (532, 0), 6003, jpeg_decoder.h\\nError: ‘"ll"’ cannot be used as a function (533, 0), 6003, jpeg_decoder.h\\nError: ‘"ll"’ cannot be used as a function (537, 0), 6003, jpeg_decoder.h\\nError: ‘"ll"’ cannot be used as a function (538, 0), 6003, jpeg_decoder.h\\nError: ‘"ll"’ cannot be used as a function (539, 0), 6003, jpeg_decoder.h\\nError: ‘"ll"’ cannot be used as a function (556, 0), 6003, jpeg_decoder.h\\nError: ‘"ll"’ cannot be used as a function (557, 0), 6003, jpeg_decoder.h\\nError: ‘"ll"’ cannot be used as a function (558, 0), 6003, jpeg_decoder.h\\nError: ‘"ll"’ cannot be used as a function (561, 0), 6003, jpeg_decoder.h\\nError: ‘"ll"’ cannot be used as a function (562, 0), 6003, jpeg_decoder.h\\nError: ‘"ll"’ cannot be used as a function (566, 0), 6003, jpeg_decoder.h\\nError: ‘"ll"’ cannot be used as a function (567, 0), 6003, jpeg_decoder.h\\nError: ‘"ll"’ cannot be used as a function (568, 0), 6003, jpeg_decoder.h\\nError: expected primary-expression before ‘inline’ (653, 0), 6003, jpeg_decoder.h\\nError: expected ‘;’ before ‘inline’ (653, 0), 6003, jpeg_decoder.h\\nError: expected ‘}’ before end of line (90, 0), 6003, W20170502-205953_1.cpp\\nError: expected declaration before end of line (90, 0), 6003, W20170502-205953_1.cpp\\nError: Compile/Link failed for W20170502-205953 (see '//10.149.0.38/var/lib/HPCCSystems/myeclccserver/eclcc.log' for details) (0, 0), 3000, W20170502-205953\\nWarning: (0, 0), 0, \\nWarning: ---------- compiler output -------------- (0, 0), 0, \\nWarning: In file included from W20170502-205953_1.cpp:65: (0, 0), 0, \\nWarning: jpeg_decoder.h: In function ‘long long int user1(ICodeContext*, size32_t, const void*)’: (0, 0), 0, \\nWarning: jpeg_decoder.h:515: error: expected unqualified-id before string constant (0, 0), 0, \\nWarning: jpeg_decoder.h: In member function ‘void user1(ICodeContext*, size32_t, const void*)::Decoder::_UpsampleH(user1(ICodeContext*, size32_t, const void*)::Decoder::Component*)’: (0, 0), 0, \\nWarning: jpeg_decoder.h:528: error: ‘"ll"’ cannot be used as a function (0, 0), 0, \\nWarning: jpeg_decoder.h:529: error: ‘"ll"’ cannot be used as a function (0, 0), 0, \\nWarning: jpeg_decoder.h:530: error: ‘"ll"’ cannot be used as a function (0, 0), 0, \\nWarning: jpeg_decoder.h:532: error: ‘"ll"’ cannot be used as a function (0, 0), 0, \\nWarning: jpeg_decoder.h:533: error: ‘"ll"’ cannot be used as a function (0, 0), 0, \\nWarning: jpeg_decoder.h:537: error: ‘"ll"’ cannot be used as a function (0, 0), 0, \\nWarning: jpeg_decoder.h:538: error: ‘"ll"’ cannot be used as a function (0, 0), 0, \\nWarning: jpeg_decoder.h:539: error: ‘"ll"’ cannot be used as a function (0, 0), 0, \\nWarning: jpeg_decoder.h: In member function ‘void user1(ICodeContext*, size32_t, const void*)::Decoder::_UpsampleV(user1(ICodeContext*, size32_t, const void*)::Decoder::Component*)’: (0, 0), 0, \\nWarning: jpeg_decoder.h:556: error: ‘"ll"’ cannot be used as a function (0, 0), 0, \\nWarning: jpeg_decoder.h:557: error: ‘"ll"’ cannot be used as a function (0, 0), 0, \\nWarning: jpeg_decoder.h:558: error: ‘"ll"’ cannot be used as a function (0, 0), 0, \\nWarning: jpeg_decoder.h:561: error: ‘"ll"’ cannot be used as a function (0, 0), 0, \\nWarning: jpeg_decoder.h:562: error: ‘"ll"’ cannot be used as a function (0, 0), 0, \\nWarning: jpeg_decoder.h:566: error: ‘"ll"’ cannot be used as a function (0, 0), 0, \\nWarning: jpeg_decoder.h:567: error: ‘"ll"’ cannot be used as a function (0, 0), 0, \\nWarning: jpeg_decoder.h:568: error: ‘"ll"’ cannot be used as a function (0, 0), 0, \\nWarning: jpeg_decoder.h: In function ‘long long int user1(ICodeContext*, size32_t, const void*)’: (0, 0), 0, \\nWarning: jpeg_decoder.h:653: error: expected primary-expression before ‘inline’ (0, 0), 0, \\nWarning: jpeg_decoder.h:653: error: expected ‘;’ before ‘inline’ (0, 0), 0, \\nWarning: W20170502-205953_1.cpp:90: error: expected ‘}’ before end of line (0, 0), 0, \\nWarning: W20170502-205953_1.cpp: At global scope: (0, 0), 0, \\nWarning: W20170502-205953_1.cpp:90: error: expected declaration before end of line (0, 0), 0, \\nWarning: g++: W20170502-205953_1.cpp.o: No such file or directory (0, 0), 0, \\nWarning: (0, 0), 0, \\nWarning: --------- end compiler output ----------- (0, 0), 0,
\\nThe lines its referring to don't have any OR operators, any idea why these errors are occurring?\\n\\nThanks,\\nVishnu\", \"post_time\": \"2017-05-07 19:34:24\" },\n\t{ \"post_id\": 16891, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"rtaylor\", \"post_text\": \"Vishnu,\\n\\nThe compiler has to be able to find it, so I'd start there.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-04-27 16:03:55\" },\n\t{ \"post_id\": 16881, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"vchinta\", \"post_text\": \"Richard,\\n\\nWhere would I store the header file though?\\n\\nVishnu\", \"post_time\": \"2017-04-27 15:59:53\" },\n\t{ \"post_id\": 16871, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"rtaylor\", \"post_text\": \"Vishnu,\\n\\nI don't know. Try it simple test once and see.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-04-27 15:58:46\" },\n\t{ \"post_id\": 16861, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"vchinta\", \"post_text\": \"Richard,\\n\\nAh, found my mistake. I tried that without including the spray options. I have another question regarding the BeginC++ structiure. I wanted to use this image decoder http://h4ck3r.net/2009/12/02/mini-jpeg-decoder/ which has a header file and a cpp file. Is this possible to implement in ECL?\\n\\nVishnu\", \"post_time\": \"2017-04-26 17:51:48\" },\n\t{ \"post_id\": 16831, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"rtaylor\", \"post_text\": \"Vishnu,\\n\\nPage 63 of the ClientTools.PDF (http://cdn.hpccsystems.com/releases/CE-Candidate-6.2.10/docs/HPCCClientTools-6.2.10-1.pdf) documents the DFUplus spray options and the one you're interested in is filesize, which defaults to a 4-byte integer. You can define that in your ECL just as you did the filename.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-04-25 14:41:37\" },\n\t{ \"post_id\": 16811, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"vchinta\", \"post_text\": \"Richard,\\n\\nIs it possible to generate filesize like we do for fileposition? Couldn't find anything in the guide.\\n\\nVishnu\", \"post_time\": \"2017-04-23 17:52:32\" },\n\t{ \"post_id\": 16703, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"richardkchapman\", \"post_text\": \"You _could_ though I don't think you'd want to - there is no (documented) way to have the C++ code read a record at a time, and it would end up having to pull the whole dataset into memory before passing a pointer to the start of it to your embedded C++. Better to have the C++ take a single record, and call it from within a transform (or use it in place of a transform) - see the bloom bundle for an example of that.\", \"post_time\": \"2017-04-21 06:45:42\" },\n\t{ \"post_id\": 16693, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"vchinta\", \"post_text\": \"Richard,\\n\\nimageRecord := RECORD\\n STRING filename;\\n DATA image; \\n UNSIGNED8 RecPos{virtual(fileposition)};\\nEND;\\nimageData := DATASET('LE::imagedb',imageRecord,FLAT);
\\n\\nIs it possible to use the imageData Dataset as a parameter for my BeginC++ structure?\\n\\nVishnu\", \"post_time\": \"2017-04-21 06:00:29\" },\n\t{ \"post_id\": 16613, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"vchinta\", \"post_text\": \"I can't re-write the C code for you
\\n\\nRichard,\\n\\nThat wasn't my intention ,was just putting it out there. Its called the nano jpeg decoder incase anyone is interested.\\n\\nVishnu\", \"post_time\": \"2017-04-18 19:46:08\" },\n\t{ \"post_id\": 16603, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"rtaylor\", \"post_text\": \"Vishnu,
Yes, that is exactly what I'm trying to do. This is the decoder I'm using : http://svn.emphy.de/nanojpeg/trunk/nanojpeg/nanojpeg.c
That link just gets me a web filter violation ("gambling") but either way I can't re-write the C code for you -- I read/understand C but I'm not a C programmer. Sorry.\\n\\nRichard\", \"post_time\": \"2017-04-18 19:44:15\" },\n\t{ \"post_id\": 16593, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"vchinta\", \"post_text\": \"Another possibility, if your library is Open Source, would be to re-write the library functions that expect a file parameter to take the content as a data stream instead. Then you could just compile it as a plugin library and call the functions from your ECL code.
\\n\\nYes, that is exactly what I'm trying to do. This is the decoder I'm using : http://svn.emphy.de/nanojpeg/trunk/nanojpeg/nanojpeg.c\", \"post_time\": \"2017-04-18 19:33:23\" },\n\t{ \"post_id\": 16583, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"rtaylor\", \"post_text\": \"Vishnu,is there any way for me to pass the entire jpeg file
The spray to HPCC places the data content of each file in a single DATA field (aka BLOB field) of a single record. So you can pass the binary data from the JPG file, but the file itself is not on HPCC at that point (it's still on the Landing Zone). \\n\\nIf all the functions in your library expect to be working with a disk file and not just the data from the file, then one workaround suggestion would be to write a "wrapper" program -- a command-line Linux program that takes that data content as a parameter and writes it to a temporary file then uses the library function to do "whatever" then returns your result. You would call that program using the PIPE() function. \\n\\nAnother possibility, if your library is Open Source, would be to re-write the library functions that expect a file parameter to take the content as a data stream instead. Then you could just compile it as a plugin library and call the functions from your ECL code.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-04-18 19:30:48\" },\n\t{ \"post_id\": 16573, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"vchinta\", \"post_text\": \"[quote="rtaylor":2j9d8ww5]Vishnu,Any help on how I go about linking this with my image processing code using openCV(C++ or JAVA)?
AFAIK, no one has yet implemented any image processing library in HPCC. You are welcome to be the first. \\n\\nRichard\\nRichard,\\n\\nI am trying things using the BeginC++ structure and using existing JPEG Decoders but is there any way for me to pass the entire jpeg file as a parameter to the BeginC++ structure or is passing the records and using the Binary Data the only way to proceed(much more complicated to do)?\\n\\nThanks\\nVishnu\", \"post_time\": \"2017-04-18 18:02:19\" },\n\t{ \"post_id\": 16563, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"rtaylor\", \"post_text\": \"Vishnu,
Any help on how I go about linking this with my image processing code using openCV(C++ or JAVA)?
AFAIK, no one has yet implemented any image processing library in HPCC. You are welcome to be the first. \\n\\nRichard\", \"post_time\": \"2017-04-18 17:57:41\" },\n\t{ \"post_id\": 16543, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"vchinta\", \"post_text\": \"Richard,\\n\\nI managed to spray using dfuplus and read the image data using ECL, each record looks something like this\\n
\\n# File Binarydata\\n1\\t5.jpeg\\tFFD8FFE000104A46494600010100000100010000FFDB008400090607131......
\\nAny help on how I go about linking this with my image processing code using openCV(C++ or JAVA)? Can I pass 5.jpeg as an input or is the binary Data the only data that I can use to analyze the images?\\n\\nThanks in advance,\\nVishnu\", \"post_time\": \"2017-04-17 03:24:12\" },\n\t{ \"post_id\": 16471, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"AttilaV\", \"post_text\": \"Hi,\\n\\nPlease check this comment:\\nviewtopic.php?f=10&t=3553&start=10#p14423\\n\\nIf you have any more question I will happy to help.\\n\\nThanks\\n\\nAttila\", \"post_time\": \"2017-04-13 13:40:17\" },\n\t{ \"post_id\": 16453, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"rtaylor\", \"post_text\": \"vchinta,Went through this: https://github.com/hpcc-systems/HPCC-Pl ... de%20Files but didn't find anything on BLOB spray,
The Programmer's Guide Working with BLOBs article discusses the issue (press F1 in the ECL IDE and the entire Programmer's Guide is in the Help file). There is no downloadable code file for that article. The examples are all in the text.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-04-12 21:50:17\" },\n\t{ \"post_id\": 16383, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"vchinta\", \"post_text\": \"Richard,\\n\\nI tried the spray using the dfuplus commandline tool\\nC:\\\\Program Files (x86)\\\\HPCCSystems\\\\6.2.6\\\\clienttools\\\\bin>dfuplus action=spray sr\\ncip=127.0.0.1 srcfile='C:\\\\Users\\\\vchinta\\\\Desktop\\\\Images\\\\*' server=131.91.163.33 u\\nsername=****** password=******* dstname=vchinta::testimages dstcluster=myth\\nor PREFIX=filename,filesize overwrite=1 nosplit=1\\n/
\\n\\nGot the following error\\nChecking for local Dali File Server\\n00000000 2017-04-12 10:57:52 9128 7340 "Throttler(stdCmdThrotlter): Increasing\\n limit from 0 to 80"\\n00000001 2017-04-12 10:57:52 9128 7340 "Throttler(slowCmdThrotlter): Increasin\\ng limit from 0 to 20"\\n00000002 2017-04-12 10:57:52 9128 7340 "Throttler(slowCmdThrotlter): New delay\\nMs=5000, previous: 1000"\\n00000003 2017-04-12 10:57:52 9128 7340 "Throttler(slowCmdThrotlter): New cpuTh\\nreshold=75, previous: 85"\\n00000004 2017-04-12 10:57:52 9128 7340 "CRemoteFileServer: maxThreads = 100, m\\naxThreadsDelayMs = 60000, maxAsyncCopy = 10"\\n00000005 2017-04-12 10:57:52 9128 7340 "Throttler(stdCmdThrotlter): disabled,\\nprevious limit: 80"\\n00000006 2017-04-12 10:57:52 9128 7340 "Throttler(slowCmdThrotlter): disabled,\\n previous limit: 20"\\n00000007 2017-04-12 10:57:52 9128 7340 "Throttler(slowCmdThrotlter): New delay\\nMs=1000, previous: 5000"\\n00000008 2017-04-12 10:57:52 9128 7340 "Throttler(slowCmdThrotlter): New cpuTh\\nreshold=85, previous: 75"\\nStarted local Dali file server on 127.0.0.1:7100\\n\\nFixed spraying from 'C:\\\\Users\\\\vchinta\\\\Desktop\\\\Images\\\\*' on 127.0.0.1:7100 to vch\\ninta::testimages\\nSubmitted WUID D20170412-105752\\n0% Done\\nFailed: Localhost address used in remote file name: '/var/lib/HPCCSystems/myesp/\\n'C:/Users/vchinta/Desktop/Images/*''\\n00000009 2017-04-12 10:58:02 9128 3748 "ERROR: 0: D:\\\\jenkins2\\\\workspace\\\\CE-Can\\ndidate-clienttools-win-32bit-6.2.6-1\\\\CE\\\\Windows_2k8_Servers\\\\HPCC-Platform\\\\system\\n\\\\jlib\\\\jthread.cpp(301) : unknown : Unknown exception in Thread unknown"\\n
\", \"post_time\": \"2017-04-12 16:02:54\" },\n\t{ \"post_id\": 15993, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"vchinta\", \"post_text\": \"Richard,\\n\\nForgot to do that, my bad. I was trying to do the spray from the IDE. Went through this: https://github.com/hpcc-systems/HPCC-Pl ... de%20Files but didn't find anything on BLOB spray, might've missed it. Could you point me to some resource where I can figure out how to do it while your team works on the JIRA ticket?\\n\\nThanks,\\nVishnu\", \"post_time\": \"2017-03-24 04:32:04\" },\n\t{ \"post_id\": 15913, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"rtaylor\", \"post_text\": \"Vishnu,\\n\\nThanks! And whenever you submit a JIRA ticket that comes from a Forum discussion, you should always reference the Forum thread in the JIRA ticket so the developers can get all the background information already discussed (I added that link already ).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-03-20 23:48:44\" },\n\t{ \"post_id\": 15903, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"vchinta\", \"post_text\": \"Richard,\\n\\nHere's the JIRA issue I created : https://track.hpccsystems.com/browse/HPCC-17221\\n\\nVishnu\", \"post_time\": \"2017-03-20 23:39:57\" },\n\t{ \"post_id\": 15763, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"rtaylor\", \"post_text\": \"Vishnu,\\n\\nHere's our JIRA for HPCC: https://track.hpccsystems.com\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-03-10 11:57:05\" },\n\t{ \"post_id\": 15733, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"vchinta\", \"post_text\": \"Richard,\\n\\nWas going to create the ticket if it didn't work this time, should I go ahead and do it? Where do I do it?\\n\\nUsing ECL watch: Check the five jpgs from my landing zone and blob spray\\n\\nVIshnu\", \"post_time\": \"2017-03-09 15:09:48\" },\n\t{ \"post_id\": 15713, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"rtaylor\", \"post_text\": \"Vishnu,\\n\\nTwo questions:\\n1) Did you submit these issues in JIRA as I suggested? If so, what's the ticket number?\\n2) Which type of spray did you try and how (ECL Watch, ECL code, or DFUplus)?\\n\\nRichard\", \"post_time\": \"2017-03-09 11:03:08\" },\n\t{ \"post_id\": 15703, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"vchinta\", \"post_text\": \"Richard,\\n\\nI tried this again but when I tried to spray five images, I get this error\\n\\nID:\\nD20170308-143640\\nCluster Name:\\nthor\\nJob Name:\\n\\nfiveimagesdata\\nDFU Server Name:\\nmydfuserver\\nQueue:\\ndfuserver_queue\\nUser:\\nvchinta\\nProtected:\\nCommand:\\nSpray (Import)\\nState: failed\\t\\n▼ \\n \\nTime Started:\\n2017-03-08 19:36:40\\nTime Stopped:\\n2017-03-08 19:36:54\\nPercent Done:\\n0%\\nProgress Message:\\n0% Done\\nSummary Message:\\nFailed: Remote Filename: Cannot resolve single part from wild/multi filename\\n\\nDo you know what I did wrong?\\n\\nVishnu\", \"post_time\": \"2017-03-09 00:16:19\" },\n\t{ \"post_id\": 14923, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"rtaylor\", \"post_text\": \"Vishnu,\\n\\nThen I can only suggest it's time to create a JIRA ticket for the issue, referencing this discussion thread.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-01-31 14:34:37\" },\n\t{ \"post_id\": 14893, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"vchinta\", \"post_text\": \"Richard,\\n\\nThe first one.\\n\\nVishnu\", \"post_time\": \"2017-01-30 19:17:53\" },\n\t{ \"post_id\": 14873, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"rtaylor\", \"post_text\": \"Vishnu,\\n\\nWhich did you do, this one?
imageData[1].image
Or this one?\\nPROJECT(ImageData,TRANSFORM(outrec,SELF.outfld := ParseOneImageFunc(LEFT.image)));
\\n\\nRichard\", \"post_time\": \"2017-01-30 16:30:50\" },\n\t{ \"post_id\": 14853, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"vchinta\", \"post_text\": \"Richard,\\n\\nTried it, still get same error.\\n\\nVishnu\", \"post_time\": \"2017-01-27 00:38:00\" },\n\t{ \"post_id\": 14773, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"rtaylor\", \"post_text\": \"Vishnu, \\n\\nUse the same code I did:imageData[1].image
You'll only be accessing one record at a time anyway, probably something like this:\\nPROJECT(ImageData,TRANSFORM(outrec,SELF.outfld := ParseOneImageFunc(LEFT.image)));
\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-01-19 20:35:55\" },\n\t{ \"post_id\": 14763, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"vchinta\", \"post_text\": \"Richard,\\n\\nSame version as you 6.2.0-1. I get the error while trying to extract the image data using the code from the guide, not while spraying the images. Spray worked fine.\\n\\nMy logical files is images::fiveimages\\n\\nand code:\\nimageRecord := RECORD\\n STRING filename;\\n DATA image; \\n UNSIGNED8 RecPos{virtual(fileposition)};\\nEND;\\nimageData := DATASET('~images::fiveimages',imageRecord,FLAT);\\n\\noutput(imageData);\\nMy IDE is an older version though, 6.0.1\\n\\n\\nVishnu\", \"post_time\": \"2017-01-19 20:12:49\" },\n\t{ \"post_id\": 14703, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"rtaylor\", \"post_text\": \"Vishnu,\\n\\nWhat version of HPCC are you running?\\n\\nI tried it on 6.2.0-1, did the spray through ECL Watch (a dozen JPGs), and it worked fine for me.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-01-19 14:07:33\" },\n\t{ \"post_id\": 14683, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"vchinta\", \"post_text\": \"Richard,\\n\\nTried this after spraying a single image as a BLOB, I get the following error. Did the spray and upload from ECL watch, not the dfuplus command.\\n\\nError: System error: 3000: assert(required <= maxOffset) failed - file: /mnt/disk1/jenkins/workspace/CE-Candidate-6.2.0-1/CE/centos-6.0-x86_64/HPCC-Platform/common/thorhelper/thorcommon.ipp, line 820 (in Disk Read G1 E2) (0, 0), 3000, \\n\\nVishnu\", \"post_time\": \"2017-01-19 00:54:24\" },\n\t{ \"post_id\": 14653, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"rtaylor\", \"post_text\": \"Vishnu,So I've found few relatively easy to use image processing packages for R and I also have to the rHPCC package installed. So if I can figure out a way to extract the image data from the blob, it might not be too difficult to run the tasks. Do you think this would work?
Extracting the image data should be as simple as this://this image dataset code is from the "Working with BLOBs" Programmer's Guide article\\nimageRecord := RECORD\\n STRING filename;\\n DATA image; \\n UNSIGNED8 RecPos{virtual(fileposition)};\\nEND;\\nimageData := DATASET('LE::imagedb',imageRecord,FLAT);\\n\\nimageData[1].image; //get image data from first record\\n //you can just pass this to your image parsing code
\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-01-18 16:30:01\" },\n\t{ \"post_id\": 14643, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"vchinta\", \"post_text\": \"RIchard, \\n\\nI mean there's also OpenCV and LibJPEG in C++, which are pretty easy as well but I'm not sure how I'd use the C++ code, some help there would be great.\\n\\nVishnu\", \"post_time\": \"2017-01-17 22:24:44\" },\n\t{ \"post_id\": 14633, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"vchinta\", \"post_text\": \"Richard,\\n\\nSo I've found few relatively easy to use image processing packages for R and I also have to the rHPCC package installed. So if I can figure out a way to extract the image data from the blob, it might not be too difficult to run the tasks. Do you think this would work?\\nVishnu\", \"post_time\": \"2017-01-17 22:00:15\" },\n\t{ \"post_id\": 14623, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"rtaylor\", \"post_text\": \"Vishnu,I'm more interested in measuring the data I/O and resources utilized while doing these tasks and working with image data.
OK, THAT is a different problem. Based on your original post I had assumed that you just needed to accomplish the task but if you just want to use that task as a benchmark for HPCC -- totally different issue.\\n\\nSo, if you want to benchmark working with image data in HPCC then I would suggest that using a shell script that's designed to parse an image file on disk and produce a text file might not be the best approach. That is most definitely NOT a native-to-HPCC way of doing things.\\n\\nA more HPCC-centric way of doing it would be to figure out how to accomplish the same result with the BLOB data as the input (an image BLOB is just a variable-length DATA field containing the image data) and either a text STRING or record set as the output result. Once you've written the code to do the job natively in HPCC, then you can benchmark getting it done in parallel on an HPCC cluster versus using your shell script on whatever platform it is native to.\\n\\nSo, your real job would first be to duplicate your shell script's image information extraction logic in ECL (or in C++ or any other language that HPCC supports as embedded code) so it can run directly against the image data in the BLOB. \\n \\nFWIW and AFAIK, doing this would be blazing new trails in ECL/HPCC capabilities and the entire community would benefit from it. If I can help in any way, please just ask.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-01-17 21:42:47\" },\n\t{ \"post_id\": 14613, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"vchinta\", \"post_text\": \"Hi Richard,\\n\\nWouldn't doing the task manually on multiple computers defeat the purpose of using HPCC? I don't necessarily need to append the text output to the BLOB records. I'm more interested in measuring the data I/O and resources utilized while doing these tasks and working with image data.\\n\\nVishnu\", \"post_time\": \"2017-01-17 19:58:12\" },\n\t{ \"post_id\": 14593, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"rtaylor\", \"post_text\": \"Vishnu,Approx 1,000,000 images in a single batch.
So a single box won't do.So according to the second method, how would HPCC know how to execute the script
HPCC wouldn't. What I was describing was a pre-processing step that would happen before spraying the files as BLOBs. on files present only on that node(since they haven't been sprayed and all files are in the landing zone, assuming files on landing zone are not distributed across nodes as well?).
A Landing Zone is a single box (not a node in a cluster).\\n\\n
\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-01-13 20:50:24\" },\n\t{ \"post_id\": 14583, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"vchinta\", \"post_text\": \"Richard,\\n\\nthanks a lot, that clears up quite a few questions I had.\\n\\nSo, the real question is: how many JPGs do you have to process and how are they coming to you -- in a single batch or steadily streaming in?\\nIf they're coming in one at a time (or small batches), then you could run your script as they appear, as a pre-process step before putting them on the LZ.\\nIf they're coming in a single batch with a large number of files in the batch and processing them all on a single box would be too slow, then you could just manually distribute the files onto several machines and setup your script to operate on the files you have put on each machine (old-school manual parallel processing
\\n\\n Approx 1,000,000 images in a single batch. So according to the second method, how would HPCC know how to execute the script on files present only on that node(since they haven't been sprayed and all files are in the landing zone, assuming files on landing zone are not distributed across nodes as well?).\\n\\n\\nVishnu\", \"post_time\": \"2017-01-13 20:13:08\" },\n\t{ \"post_id\": 14573, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"rtaylor\", \"post_text\": \"Vishnu,\\n\\nIf you have 100 JPGs to run then it will take 100 iterations of the script, no matter what. Massive parallelism just makes the entire process go faster by running separate instances of the process on each node at the same time. So 100 files on a 10-node cluster will go about 10X faster than processing all 100 on a single box, since each box only has to process 10 files. That's the advantage of parallel processing.\\n\\nHowever, if you don't have the JPGs as separate files for your script to operate on (which you don't once they've been sprayed to Thor as BLOBs), then you'll have the overhead of extracting each JPG from the BLOB to a file for the script to run against (and I don't know how you would do that) and then the overhead of cleaning up after each run of the script -- thus my suggestion of using a wrapper program around the script.\\n\\nSo, the real question is: how many JPGs do you have to process and how are they coming to you -- in a single batch or steadily streaming in? ) -- once again, this would be a pre-process step before spray.
\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-01-13 19:37:50\" },\n\t{ \"post_id\": 14563, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"vchinta\", \"post_text\": \"Richard,\\n\\nWouldn't running the script with files in the landing zone increase data I/O? I'm assuming when you execute it on THOR, HPCC checks data locality and executes the script with files local to the nodes, therefore minimizing data I/O. This might seem like a trivial question, but I'm still trying to completely understand how storage and data I/O works in HPCC.\\n\\nVishnu\", \"post_time\": \"2017-01-13 18:44:53\" },\n\t{ \"post_id\": 14553, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"rtaylor\", \"post_text\": \"Vishnu,\\n\\nThat would be the most efficient way to store those JPGs in Thor. So your problem then would be getting each JPG out of the BLOB and into a disk file that your script can reference. And I have no answer for that one.\\n\\nGiven that you already have all the JPGs as separate disk files in your Landing Zone before spraying them, it may be better to run that script as a pre-process step before the spray and aggregate the script's output text files into a separate dataset to spray to Thor. Then you could just JOIN the sprayed output text dataset to the BLOB file and append the relevant data to each matching record.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-01-13 18:24:57\" },\n\t{ \"post_id\": 14543, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"vchinta\", \"post_text\": \"Richard,\\n\\nYes, script does take input through stdin. Follow up question: The images have to sprayed onto THOR in the BLOB format right? \\n\\nVishnu\", \"post_time\": \"2017-01-13 18:04:53\" },\n\t{ \"post_id\": 14533, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Re: Shell Scripts in PIPE\", \"username\": \"rtaylor\", \"post_text\": \"Vishnu,\\n\\nThe requirement for PIPE is that it takes data in through stdin and produces output to stdout. Does the script meet those requirements? If not, then you would need to write a wrapper program that does.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-01-13 18:00:33\" },\n\t{ \"post_id\": 14523, \"topic_id\": 3593, \"forum_id\": 10, \"post_subject\": \"Shell Scripts in PIPE\", \"username\": \"vchinta\", \"post_text\": \"Hello,\\n\\nI have a shell script that accepts .jpg as input and outputs a text file with some stats after performing basic feature extraction(dominant color etc) tasks. Is it possible to use the PIPE command to run this script with a list of .jpgs on HPCC.\\n\\nThanks in advance for your help.\\n\\nVishnu\", \"post_time\": \"2017-01-13 16:55:52\" },\n\t{ \"post_id\": 14753, \"topic_id\": 3613, \"forum_id\": 10, \"post_subject\": \"Re: EMBED Python execution\", \"username\": \"iMikePayne\", \"post_text\": \"The ips are different. Ending in .1-.5. 1 being master.\", \"post_time\": \"2017-01-19 15:00:50\" },\n\t{ \"post_id\": 14743, \"topic_id\": 3613, \"forum_id\": 10, \"post_subject\": \"Re: EMBED Python execution\", \"username\": \"tlhumphrey2\", \"post_text\": \"You can see thor slave nodes' IPs from ecl watch: Operations -> Cluster Processes -> mythor\", \"post_time\": \"2017-01-19 14:59:45\" },\n\t{ \"post_id\": 14733, \"topic_id\": 3613, \"forum_id\": 10, \"post_subject\": \"Re: EMBED Python execution\", \"username\": \"iMikePayne\", \"post_text\": \"Not sure as it is an internal cluster. What prompted the 4 node test is was that the AWS instance where master and slave are on separate nodes behaved the same way.\", \"post_time\": \"2017-01-19 14:48:23\" },\n\t{ \"post_id\": 14723, \"topic_id\": 3613, \"forum_id\": 10, \"post_subject\": \"Re: EMBED Python execution\", \"username\": \"tlhumphrey2\", \"post_text\": \"Your are working on a 4 node thor. Do you know if the 4 nodes are on the same computer and therefore have the same IP address? (note: you can build a thor cluster which has more than one node per computer (IP)).\", \"post_time\": \"2017-01-19 14:27:53\" },\n\t{ \"post_id\": 14713, \"topic_id\": 3613, \"forum_id\": 10, \"post_subject\": \"Re: EMBED Python execution\", \"username\": \"iMikePayne\", \"post_text\": \"I've created a JIRA for anyone interested in following:\\n\\nhttps://track.hpccsystems.com/browse/HPCC-16907\", \"post_time\": \"2017-01-19 14:22:10\" },\n\t{ \"post_id\": 14693, \"topic_id\": 3613, \"forum_id\": 10, \"post_subject\": \"Re: EMBED Python execution\", \"username\": \"iMikePayne\", \"post_text\": \"I am executing on thor and it is running on the master and not the slave.\\n\\n ) -- once again, this would be a pre-process step before spray.
IMPORT Python;\\n\\nnamerec := RECORD\\n string name;\\nEND;\\n\\nnames := DATASET([{'Rec1:'}, {'Rec2:'}, {'Rec3'},{'Rec4:'}], namerec,distributed);\\n\\nstring getips(dataset(namerec) input) := EMBED(Python) \\nimport socket;\\n \\ns = ''\\nfor n in input:\\n s = s+n.name + ' ' + socket.gethostname()+'|'\\nreturn s;\\nENDEMBED;\\n\\noutput(getips(names));
\\n\\nAll return the same ip address on a four node cluster.\", \"post_time\": \"2017-01-19 13:08:59\" },\n\t{ \"post_id\": 14673, \"topic_id\": 3613, \"forum_id\": 10, \"post_subject\": \"Re: EMBED Python execution\", \"username\": \"tlhumphrey2\", \"post_text\": \"If you are executing on thor, the embed python should execute on all thor slaves (and maybe the master -- I'm not sure about the master). If you are running on hthor, it will run ONLY on the master.\", \"post_time\": \"2017-01-18 20:39:18\" },\n\t{ \"post_id\": 14663, \"topic_id\": 3613, \"forum_id\": 10, \"post_subject\": \"EMBED Python execution\", \"username\": \"iMikePayne\", \"post_text\": \"Hi,\\n\\nI am working with the embed function for python in ECL. I see when I use the function it is executing on the thor master instead of the thor slaves. Is this behavior expected? Do all the embed functions work like this?\\n\\nRight now I have thor master and slave on different nodes. Currently using one slave.\", \"post_time\": \"2017-01-18 20:03:40\" },\n\t{ \"post_id\": 14793, \"topic_id\": 3623, \"forum_id\": 10, \"post_subject\": \"Re: Create SetOF from a STRING variable\", \"username\": \"rtaylor\", \"post_text\": \"John,\\n\\nFirst -- you really must stop referring to ECL definitions as "variables" -- there is no such thing in ECL. An ECL definition cannot vary (you only get to define it once within a given visibility scope), so it is inappropriate to call them "variables". I make a point of this in class because you need to think differently about how to program ECL. One simple way to reinforce the fundamental difference between ECL and other languages (declarative and non-procedural versus imperative and procedural) is by changing the way you talk about it. Therefore, the semantics actually ARE important. \\n\\nSo, here's one way to do what you want:
ds := DATASET([ {'CLIENT_02', 'CACOTX'}], Layout_CUSTOMER_Filter_STATE);\\n\\nSTRING ReformatStateStr(STRING s) := FUNCTION\\n rec := {STRING st};\\n Tmp := DATASET(LENGTH(s),TRANSFORM(rec,SKIP(COUNTER % 2 = 0),\\n SELF.st := s[COUNTER..COUNTER+1]));\\n RetDs := ITERATE(tmp,TRANSFORM(rec,SELF.st := LEFT.st + '\\\\'' + RIGHT.st + '\\\\', '));\\n RETURN RetDs[COUNT(RetDs)].st[1 .. LENGTH(TRIM(RetDs[COUNT(RetDs)].st))-1];\\nEND;\\n\\nReformatStateStr(ds[1].states);
I'm using the TRANSFORM form of DATASET to extract the states (note the SKIP on the TRANSFORM that does the real work here). Then using ITERATE to reconstruct the state string with the quotes and commas. The RETURN just sends back the last record's string, minus the last comma.\\n\\nBut if the real task is to get the states into the SET so you can use the IN operator, then you can shortcut the whole thing like this:SET OF STRING2 SetFromStateStr(STRING s) := FUNCTION\\n rec := {STRING2 st};\\n Tmp := DATASET(LENGTH(s),TRANSFORM(rec,SKIP(COUNTER % 2 = 0),\\n SELF.st := s[COUNTER..COUNTER+1]));\\n RETURN SET(Tmp,st);\\nEND;\\n\\nSetFromStateStr('CACOTX');
\\nIn fact, you could make this one function handle either form of input data, like this:SET OF STRING2 SetFromStateStr(STRING s) := FUNCTION\\n rec := {STRING2 st};\\n str := STD.Str.FilterOut(TRIM(s,ALL),'\\\\',');\\n Tmp := DATASET(LENGTH(str),TRANSFORM(rec,SKIP(COUNTER % 2 = 0),\\n SELF.st := str[COUNTER..COUNTER+1]));\\n RETURN SET(Tmp,st);\\nEND;\\n\\nSetFromStateStr('CACOTX');\\nSetFromStateStr('\\\\'CA\\\\', \\\\'CO\\\\', \\\\'TX\\\\'');
By using TRIM and FilterOut on the input string this function can handle either input form, eliminating the need for your REGEXREPLACE and SplitWords functions.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-01-20 16:24:54\" },\n\t{ \"post_id\": 14783, \"topic_id\": 3623, \"forum_id\": 10, \"post_subject\": \"Create SetOF from a STRING variable\", \"username\": \"John Meier\", \"post_text\": \"I have a parameter file which contains a column called STATES. It contains any number of state codes which will be used to filter a source file. I take the field, create a list by using SplitWords, which is then cast a SET OF STRING02, which allows to test against another variable using the “IN”.\\n\\nLayout_CUSTOMER_Filter_STATE := RECORD\\n STRING CLIENT_NAME;\\n STRING STATES;\\nEND;\\n\\n// define the file\\nDS_CUST_FILTER_ST := DATASET([ {'CLIENT_01', '\\\\'' + 'CA' + '\\\\'' + ', ' + \\n '\\\\'' + 'CO' + '\\\\'' + ', ' + \\n '\\\\'' + 'TX' + '\\\\''}\\n ]\\n , Layout_CUSTOMER_Filter_STATE\\n );\\n// client_name\\tstates\\n// ------------- -----------------\\n// CLIENT_01\\t'CA', 'CO', 'TX'\\n\\n// read the single parameter record STATES field into the variable PARM_STATES\\n// then remove all single quotes\\nPARM_STATES := DS_CUST_FILTER_ST[1].STATES;\\nTMP_PARM_STATES := STD.Str.FindReplace(PARM_STATES, '\\\\'','');\\n// PARM_STATES\\t TMP_ PARM_STATES\\n// ------------------ -----------------\\n// 'CA', 'CO', 'TX' CA, CO, TX\\n\\nSET OF STRING02 SetOfStates := STD.STr.SplitWords(TMP_PARM_STATES, ', ');\\nSTRING02 TEST_ST01 := 'CA';\\nSTRING02 TEST_ST02 := 'NJ';\\nTEST01 := TEST_ST01 IN SetOfStates; // results in "TRUE"\\nTEST02 := TEST_ST02 IN SetOfStates; // results in "FALSE"\\n\\nSo here is my question: Is there a way to loop through a STRING variable in order to format it so that it looks like TMP_PARM_STATES? Say the incoming data looks like CACOTX. I can determine LENGTH / 02 that there are three states, but there could be any number (up to 50). How can I make multiple passes so that the final results yield CA, CO, TX? ITERATE, LOOP , GRAPH processes datasets. Any thoughts?\\n\\nThanks,\\nJohn Meier\", \"post_time\": \"2017-01-19 23:37:09\" },\n\t{ \"post_id\": 15283, \"topic_id\": 3653, \"forum_id\": 10, \"post_subject\": \"Re: Post a message response if data is not found through sea\", \"username\": \"rtaylor\", \"post_text\": \"Poorna,\\n\\nI answered that in my previous post: You can't make the OUTPUT return either a recordset or a string.
Add this to the code you already have:\\nOUTPUT(IF(EXISTS(policyJoin),'','No Result'));
Your code will then always be returning two results -- a string and a recordset. If there is a recordset to return the string will be empty, otherwise the string will contain "no result"
\\n\\nRichard\", \"post_time\": \"2017-02-14 21:20:50\" },\n\t{ \"post_id\": 15233, \"topic_id\": 3653, \"forum_id\": 10, \"post_subject\": \"Re: Post a message response if data is not found through sea\", \"username\": \"Poorna\", \"post_text\": \"Richard, \\nOUTPUT(IF(EXISTS(policyJoin),'','No Result'));
\\n\\nFor the output statement above, if there was a recordset to return the string will be empty, and what if there was a recordset to return and i wanted to return that as the output instead of the empty string? How can this statement be altered to achieve that? Also I would still want to respond with the message 'No results' if there was no matched records.\\n\\nThank you,\\nPoorna\", \"post_time\": \"2017-02-13 16:23:17\" },\n\t{ \"post_id\": 14901, \"topic_id\": 3653, \"forum_id\": 10, \"post_subject\": \"Re: Post a message response if data is not found through sea\", \"username\": \"Poorna\", \"post_text\": \"Thanks Richard,for the explanation, also i was able to get the intended result after adding the code you had suggested. Now i am able to get the response \\n\\nThanks,\\nPoorna\", \"post_time\": \"2017-01-30 21:38:46\" },\n\t{ \"post_id\": 14863, \"topic_id\": 3653, \"forum_id\": 10, \"post_subject\": \"Re: Post a message response if data is not found through sea\", \"username\": \"rtaylor\", \"post_text\": \"Poorna,\\n\\nThe problem is you're meant to be returning a recordset (your JOIN result), and if there's no records in the result it will simply return an empty recordset. You can't make the OUTPUT return either a recordset or a string. \\n\\nTry adding this code to the code you already have:
OUTPUT(IF(EXISTS(policyJoin),'','No Result'));
Your code wil then always be returning two results -- a string and a recordset. If there is a recordset to return the string will be empty, otherwise the string will contain "no result"\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-01-27 19:16:51\" },\n\t{ \"post_id\": 14843, \"topic_id\": 3653, \"forum_id\": 10, \"post_subject\": \"Post a message response if data is not found through search\", \"username\": \"Poorna\", \"post_text\": \"Hello,\\n\\nI am trying to post a message response "No results" if the data that is searched for is not found. Not able to figure out how this could be done in the service. I have attached my code.\\n\\n\\n\\t \\nDS_Request := DATASET([],CCL_layout) : STORED('Policy_Request',few);\\n \\n\\t\\nKeyPolicy2 := CurrentCarrierLife.KEY_POLICY ;\\n\\t Layout_Keys.POLICY_INFO xPolicy2 ({KeyPolicy2} L) := TRANSFORM\\n\\t SELF := L;\\nEND;\\n\\t\\npolicyJoin := JOIN(DS_Request, KeyPolicy2,\\nkeyed(IF (LEFT.ApplicationNumber <> '', RIGHT.ApplicationNumber = LEFT.ApplicationNumber, TRUE)) AND \\t \\nkeyed(IF (LEFT.PolicyNumber <> '', RIGHT.PolicyNumber = LEFT.PolicyNumber, TRUE)) AND\\t\\nRIGHT.AMBestNumber = LEFT.AMBestNumber,\\nxPolicy2(Right), LIMIT(joinMax), LEFT OUTER);\\n\\nOUTPUT (policyJoin, named('Key_Policy'));\\n\\n
\\n\\nThe search is based on AMBest number, so if the given AMBestNumber is not found, a "No results" message should be returned in the service. Any thoughts on how this could be done?\\n\\nThanks,\\nPoorna\", \"post_time\": \"2017-01-26 17:00:45\" },\n\t{ \"post_id\": 14973, \"topic_id\": 3663, \"forum_id\": 10, \"post_subject\": \"Re: Roxie TOPN for sort field chosen at query time\", \"username\": \"afarrell\", \"post_text\": \"Hi Richard,\\n\\nRT > How large are the records? IOW, can > 1 million records all fit into memory on a single node? \\n\\nAF > Depending on what needs to be sorted, upwards of 40GB might need to be retrieved from slave nodes to be processed\\non the worker, I am in favour of a strategy that keeps records in place and applies divide and conquer thinking, with just\\nthe result or a substatially reduced part-result being transmitted accross the network.\\n\\nRT > If the answer is no, then your approach seems to me to be pretty reasonable, but I would still do some testing of both against real datasets (if that's at all possible).\\n\\nAF > I suppose no, I think we struggle with the large volumes transporting records on to a single node.\\n\\nRT > The obvious trade-off with using ALLNODES is overall Roxie performance, since each query would then involve all the nodes. Given this solution you might need to dedicate that Roxie to servicing only this single query.\\n\\nAF > That is a fair point, as it stands we engage all slave nodes to retrieve data in their respective index parts,\\nI think with the use of ALLNODES we will engage all slave nodes to greater effect and hopefully see reduced\\nlatency with regard to responding to end-user requests as a whole. \\n\\nthanks,\\n\\n-Andrew\", \"post_time\": \"2017-02-01 10:39:05\" },\n\t{ \"post_id\": 14943, \"topic_id\": 3663, \"forum_id\": 10, \"post_subject\": \"Re: Roxie TOPN for sort field chosen at query time\", \"username\": \"rtaylor\", \"post_text\": \"afarrell,\\n\\nHow large are the records? IOW, can > 1 million records all fit into memory on a single node? If the answer is no, then your approach seems to me to be pretty reasonable, but I would still do some testing of both against real datasets (if that's at all possible).\\n\\nThe obvious trade-off with using ALLNODES is overall Roxie performance, since each query would then involve all the nodes. Given this solution you might need to dedicate that Roxie to servicing only this single query.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-01-31 16:00:24\" },\n\t{ \"post_id\": 14903, \"topic_id\": 3663, \"forum_id\": 10, \"post_subject\": \"Roxie TOPN for sort field chosen at query time\", \"username\": \"afarrell\", \"post_text\": \"Hi what is the best approach for doing TOPN in Roxie with a result set > 1 million and where a user can can choose a sort field at run time?\\n\\nOur current thinking is to use ALLNODES\\n\\n\\nCHOOSEN( // global choosen\\n SORT( // global sort\\n ALLNODES(\\n LOCAL(\\n CHOOSEN( // local choosen\\n SORT( // local sort\\n inx,\\n sortField\\n ),\\n 50\\n ) //end choosen\\n ) //end local\\n ), // end allnodes\\n sortfield\\n ), // end global sort\\n 50\\n) // end global choosen\\n
\\n\\nor is there a smarter way of doing this large sort at query time... the average case will always be > 1 million records to sort\", \"post_time\": \"2017-01-30 23:29:02\" },\n\t{ \"post_id\": 15653, \"topic_id\": 3673, \"forum_id\": 10, \"post_subject\": \"Re: How to exclude records with certain words at the end\", \"username\": \"rtaylor\", \"post_text\": \"Vishnu,Is there a way to use Find() without case sensitivity?
Just use the either the Std.Str.ToUpperCase() or Std.Str.ToLowerCase() functions.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-03-07 11:38:03\" },\n\t{ \"post_id\": 15633, \"topic_id\": 3673, \"forum_id\": 10, \"post_subject\": \"Re: How to exclude records with certain words at the end\", \"username\": \"vchinta\", \"post_text\": \"Richard,\\n\\nYou should also take a closer look at the docs for that Std.Str.Contains() function -- it doesn't do what I think you want it to do. You'll probably need to use Std.Std.Find() instead.
\\n\\nYep, I realized that once I saw the results of my query with Contains, thanks.\\n\\nIs there a way to use Find() without case sensitivity? I found nothing in the documentation\\n\\nVishnu\", \"post_time\": \"2017-03-06 18:49:14\" },\n\t{ \"post_id\": 15583, \"topic_id\": 3673, \"forum_id\": 10, \"post_subject\": \"Re: How to exclude records with certain words at the end\", \"username\": \"rtaylor\", \"post_text\": \"Vishnu,I get this error: Warning: (32,13): error C2167: Unknown identifier "STD" (0, 0), 0, \\nAny idea what I'm doing wrong.
You need toIMPORT Std;
to use anything from the Standard Library.\\n\\nYou should also take a closer look at the docs for that Std.Str.Contains() function -- it doesn't do what I think you want it to do. You'll probably need to use Std.Std.Find() instead.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-03-06 09:58:51\" },\n\t{ \"post_id\": 15573, \"topic_id\": 3673, \"forum_id\": 10, \"post_subject\": \"Re: How to exclude records with certain words at the end\", \"username\": \"vchinta\", \"post_text\": \"Richard,\\n\\nI tried running code similar to this but using STD.Str.Contains to get records that contain a certain string in one of the columns of the data\\nCountImages := RECORD\\n STRING field1;\\n STRING field2;\\n STRING field3;\\n STRING field4;\\n STRING field5;\\n STRING field6;\\n STRING field7;\\n STRING field8;\\n STRING field9;\\n STRING field10;\\n STRING field11;\\n STRING field12;\\n STRING field13;\\n STRING field14;\\n STRING field15;\\n STRING field16;\\n STRING field17;\\n STRING field18;\\n STRING field19;\\n STRING field20;\\n STRING field21;\\n STRING field22;\\n STRING field23;\\n STRING field24;\\n STRING field25;\\nEnd;\\nFile_TestImages := \\nDATASET('~vchinta::100mdata::yfcc100m_dataset',CountImages,CSV);\\nImagesTable := TABLE(File_TestImages);\\n//COUNT(TABLE(ImagesTable,{field8},field8));\\nImagesTable(STD.Str.Contains(field8,'Canon', false));\\noutput(ImagesTable);
\\n\\nI get this error: Warning: (32,13): error C2167: Unknown identifier "STD" (0, 0), 0, \\nAny idea what I'm doing wrong.\\n\\nVishnu\", \"post_time\": \"2017-03-03 23:41:10\" },\n\t{ \"post_id\": 14963, \"topic_id\": 3673, \"forum_id\": 10, \"post_subject\": \"Re: How to exclude records with certain words at the end\", \"username\": \"nileshdchavan\", \"post_text\": \"Thanks Richard. \\n\\nYou are correct, the first solution i tried already and as i mentioned, it works only when i have one string/surname for exclude. But the problem was for the multiple string/surname exclusion hence was trying a join, but that did not work as expected.\\n\\nI tried your approach and yes it is working as desired. I did not know the loop can be used for such operation. Thanks much Richard for the simple solution. \\n\\n-Nilesh\", \"post_time\": \"2017-01-31 17:09:29\" },\n\t{ \"post_id\": 14953, \"topic_id\": 3673, \"forum_id\": 10, \"post_subject\": \"Re: How to exclude records with certain words at the end\", \"username\": \"rtaylor\", \"post_text\": \"Nilesh,\\n\\nThe simple solution looks like this:IMPORT Std;\\nrec:= RECORD\\n STRING name;\\n UNSIGNED Age;\\n STRING country;\\nEND;\\n\\npeople := DATASET([{'Nilesh Chavan', 30, 'India'},\\n {'Akshay Mittal', 28, 'USA'},\\n {'Yash Mittal', 28, 'USA'}], rec);\\nPeople(NOT Std.Str.EndsWith(name,'Chavan'));
This is just a recordset filter using the Std.Str.EndsWith() function to filter out the relevant records.\\n\\nHowever, your inclusion of the excludeSurnames DATASET in your example indicates to me that you probably want to exclude ALL the records with ANY of the surnames specified in that DATASET. That makes it a very different problem.\\n\\nHere's how I would approach that version of your problem:IMPORT Std;\\nrec:= RECORD\\n STRING name;\\n UNSIGNED Age;\\n STRING country;\\nEND;\\npeople := DATASET([{'Nilesh Chavan', 30, 'India'},\\n {'Akshay Smith', 28, 'USA'},\\n {'Akshay Jones', 28, 'USA'},\\n {'Akshay Mittal', 28, 'USA'},\\n {'Yash Mittal', 28, 'USA'}], rec);\\nsurnamerec := {STRING surname};\\nexcludeSurnames := DATASET([{'Chavan'},{'Smith'},{'Jones'}], surnamerec);\\nExclusionCnt := COUNT(excludeSurnames);\\n\\nFilterRecs(DATASET(rec) ds, STRING exclude) := ds(NOT Std.Str.EndsWith(name,exclude));\\n\\nLOOP(people,ExclusionCnt,FilterRecs(ROWS(LEFT),excludeSurnames[COUNTER].surname));
This is a fairly straight-forward example of how you can use LOOP to define running the same operation against each previous iteration's result set. \\n\\nThis example uses the simplest form of LOOP, where it iterates a fixed number of times, calling the FilterRecs() function on each iteration, passing it the previous result set (that's the ROWS(LEFT) parameter) along with the exclusion name for the next iteration.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-01-31 16:49:24\" },\n\t{ \"post_id\": 14913, \"topic_id\": 3673, \"forum_id\": 10, \"post_subject\": \"How to exclude records with certain words at the end\", \"username\": \"nileshdchavan\", \"post_text\": \"Hello,\\n\\nI have dataset which has Full Name, Age, Country information for persons. From this dataset, i want to filter the names which does not end with specific strings (surnames). How can i achieve this. I tried using the Str.EndsWith function, but this works only on one string and not on the set of strings.\\n\\nrec:= RECORD\\n string name;\\n unsigned Age;\\nEND;\\n\\nsurname := RECORD\\n string surname;\\nEND;\\npeoples := DATASET([{'Nilesh Chavan', 30, 'India'},\\n {'Akshay Mittal', 28, 'USA'},\\n {'Yash Mittal', 28, 'USA'}], rec);\\n\\nexcludeSurnames:= DATASET([{'Chavan'}], surname);\\n\\nI want to exclude the records having last string as 'Chavan'. I tried the str.endswith but that did not work. Could you please advise, how this can be achieved?\\n\\nI want my resultant dataset to have following records only -\\n\\n{'Akshay Mittal', 28, 'USA'},\\n{'Yash Mittal', 28, 'USA'}\\n\\nPlease advise. Thank you.\\n-Nilesh\", \"post_time\": \"2017-01-31 03:02:09\" },\n\t{ \"post_id\": 15103, \"topic_id\": 3683, \"forum_id\": 10, \"post_subject\": \"Re: eclplus Client Tool limitation of 100 rows in result set\", \"username\": \"rtaylor\", \"post_text\": \"Rohit,\\n\\nI see that your JIRA ticket was updated saying that eclplus.exe has been deprecated in favor of ecl.exe. In my perusal of the docs I don't see that capability yet in ecl.exe. \\n\\nTherefore, you need to submit another JIRA asking for the feature to be ported to ecl.exe and implemented with a descending sort order instead of ascending.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-02-08 15:58:27\" },\n\t{ \"post_id\": 15043, \"topic_id\": 3683, \"forum_id\": 10, \"post_subject\": \"Re: eclplus Client Tool limitation of 100 rows in result set\", \"username\": \"Rohit\", \"post_text\": \"Thanks Richard.\\n\\nAs suggested, I have created a Jira requesting for this change: https://track.hpccsystems.com/browse/HPCC-17020\", \"post_time\": \"2017-02-07 21:29:03\" },\n\t{ \"post_id\": 15003, \"topic_id\": 3683, \"forum_id\": 10, \"post_subject\": \"Re: eclplus Client Tool limitation of 100 rows in result set\", \"username\": \"rtaylor\", \"post_text\": \"Rohit,Is there any way to get around this limitation?
None that I am aware of. I suggest you submit a JIRA ticket to have the order changed from ascending to descending (which would make more sense for workunit IDs anyway).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-02-06 15:22:01\" },\n\t{ \"post_id\": 14983, \"topic_id\": 3683, \"forum_id\": 10, \"post_subject\": \"eclplus Client Tool limitation of 100 rows in result set\", \"username\": \"Rohit\", \"post_text\": \"Hi,\\n\\nI need to find if there is currently any Workunit in 'running' status on the cluster.\\n\\nFor this, I am using the command "eclplus action=list" in Linux to list the Workunits with their respective statuses.\\n\\nHowever, I see the following 2 limitations in my approach:\\n1. eclplus lists the Workunits in ascending order of Workunit ID, which means that the latest Workunit is listed at the last.\\n2. eclplus caps the number of results to 100.\\n\\nWhen there are more than 100 Workunit logs on the cluster, then both the above-mentioned limitations prevent me from finding any currently running Workunit (because the currently running Workunit will appear after the 100 rows in the result-set.\\n\\nIs there any way to get around this limitation?\\n\\nThanks\", \"post_time\": \"2017-02-05 02:39:52\" },\n\t{ \"post_id\": 15553, \"topic_id\": 3871, \"forum_id\": 10, \"post_subject\": \"Re: Larger Dataset on Left in Join\", \"username\": \"janet.anderson\", \"post_text\": \"Thanks, Richard. But to generalize the problem: in a real world situation where you know that user input could wildly shift the size of the filtered dataset, would you put in conditional checks to see which dataset was larger and dynamically change the join for each situation? What would be the strategy outside of the contrived lab exercise? Also, how badly would things break if I made the wrong choice (less efficient vs hanging for hours vs potentially wrong results)?\", \"post_time\": \"2017-03-01 14:47:33\" },\n\t{ \"post_id\": 15543, \"topic_id\": 3871, \"forum_id\": 10, \"post_subject\": \"Re: Larger Dataset on Left in Join\", \"username\": \"rtaylor\", \"post_text\": \"Janet,So what is the potential impact of having "RETURN JOIN(cszfile,Fetch_Persons," vs "RETURN JOIN(Fetch_Persons,cszfile," in the code below
A more important question would be, what is the impact of the ALL option if the Fetch_Persons is the rhs dataset in the JOIN and is a huge number of records, since the ALL option means you're telling the system the rhs is a "small" dataset that can fit fully in memory on each node?\\n\\nIn this instance, either way would be appropriate because there will never be a problem with the number of records (that class exercise code was written with a very good understanding of the training data involved). \\n\\nHowever, in any real-world situation you would need to thoroughly understand the data you're actually working with to make the determination of which would be better.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-03-01 14:19:55\" },\n\t{ \"post_id\": 15501, \"topic_id\": 3871, \"forum_id\": 10, \"post_subject\": \"Larger Dataset on Left in Join\", \"username\": \"janet.anderson\", \"post_text\": \"In the Intro to Roxie training, Lab Excercise 3, we are encapsulating a join within a function, and one of the datasets being joined is filtered based on the parameters passed to the function. When we do a join, the larger dataset should be on the left, but in this instance you won't know which dataset is larger (it depends on the parameters being passed). So what is the potential impact of having "RETURN JOIN(cszfile,Fetch_Persons," vs "RETURN JOIN(Fetch_Persons,cszfile," in the code below (assuming the basefile was huge, so there really could be a substantial swing in the size of the dataset based on the parameters entered)?\\n\\nIMPORT $;\\nbasefile := $.File_Persons_Slim.FilePlus;\\nbasekey := $.File_Persons_Slim.IDX_lname_fname;\\ncszfile := $.File_LookupCSZ.FilePlus;\\nEXPORT Fetch_Persons_LFname(STRING25 l_key,STRING15 f_key) := FUNCTION\\nFilteredKey := IF(f_key = '',\\nbasekey(LastName=l_key),\\nbasekey(LastName=l_key,FirstName=f_key));\\nFetch_Persons := FETCH(basefile, FilteredKey, RIGHT.RecPos);\\nOutRec := RECORD\\nRECORDOF(basefile) AND NOT [RecPos, CSZ_ID];\\nRECORDOF(cszfile) AND NOT [RecPos, CSZ_ID];\\nEND;\\nOutRec JoinEm(cszfile R,basefile L) := TRANSFORM\\nSELF := L;\\nSELF := R;\\nEND;\\nRETURN JOIN(cszfile,Fetch_Persons,\\nLEFT.CSZ_ID = RIGHT.CSZ_ID,\\nJoinEm(LEFT,RIGHT),ALL);\\nEND;
\", \"post_time\": \"2017-02-28 22:34:14\" },\n\t{ \"post_id\": 15753, \"topic_id\": 3923, \"forum_id\": 10, \"post_subject\": \"Re: COUNT query performance\", \"username\": \"rtaylor\", \"post_text\": \"enolan,\\n\\nWithout actually looking over your code I have no good answers for you, but I do have some observations that may be helpful.I have a query that returns a dataset of 1,100,000 records (paginated in pages of 100) and the total count of the overall dataset.\\n...\\nThe query is carried out on a 20 node roxie.
It is extremely atypical to have a Roxie query return so many records. Given that your total query processing time must be > 13 seconds (an egregiously long response time for most Roxie queries) I would suggest you try running this code on a similar sized Thor and compare the run times. You may be surprised.\\n\\nAssuming a Thor solution is unacceptable, I can also suggest that your 20-node Roxie may just be too small. \\n\\nYou also mention having tried ALLNODES, which I can strongly suggest may be needed for more than just the COUNT. Without using ALLNODES, only a single server process on one Roxie node will aggregate all the data required by the query and process it for return. Given a final result set of 1.1 million records, I would expect that processing would massively overload that one node. \\n\\nRefactoring the code to encompass as much work as possible within ALLNODES would seem to me to be the only way to handle this size of query on a Roxie. That would "divide and conquer" the workload amongst all the nodes instead of just one.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-03-10 11:37:33\" },\n\t{ \"post_id\": 15723, \"topic_id\": 3923, \"forum_id\": 10, \"post_subject\": \"COUNT query performance\", \"username\": \"enolan\", \"post_text\": \"I have a query that returns a dataset of 1,100,000 records (paginated in pages of 100) and the total count of the overall dataset. From the graph, I see that counting the dataset adds 13 seconds to the overall query time. Is there any faster way to perform this count? My current approach is simply:\\n\\ncnt := count(ds);\\n\\nThe query is carried out on a 20 node roxie. I tried out a couple of different methods to do a count per node (using combinations of local and allnodes) but none have solved the issue. \\n\\nAny pointers would be appreciated!\", \"post_time\": \"2017-03-09 14:31:04\" },\n\t{ \"post_id\": 16083, \"topic_id\": 3953, \"forum_id\": 10, \"post_subject\": \"Re: Table Failure\", \"username\": \"georgeb2d\", \"post_text\": \"Postnote. I was working on this table again today. I added a row to the table and got the same error.\\n\\nError: assert(left->queryType() == right->queryType()) failed - file: /var/lib/jenkins/workspace/LN-Candidate-with-Plugins-6.2.10-rc3/LN/centos-6.0-x86_64/HPCC-Platform/ecl/hqlcpp/hqlcpp.cpp, line 7370 (27, 35 - TelematicsConsolidation_DNAv2.fn_computeDNAv2Generic)\\n\\nThe table was like this:\\n\\n Intermediate_DNAv2_Rec := RECORD\\n\\t JEP_RSL.JourneyID;\\n\\tREAL4 RSLGT0Duration := SUM(GROUP, IF(JEP_RSL.RSL > 0, JEP_RSL.Duration, 0)); \\n\\tREAL4 RSLPercent := SUM(GROUP, IF(JEP_RSL.RSL > 0, JEP_RSL.Duration, 0)) / SUM(GROUP, JEP_RSL.Duration);\\n\\tREAL4 V2MetersDistanceHighway := SUM(GROUP, IF(JEP_RSL.isValidInfo AND JEP_RSL.RSL > Constants_Score.DNAv2HighwaySpeedThreshold, JEP_RSL.Distance, [code][/code]0));\\n\\tREAL4 V2SecondsDurationHighway := SUM(GROUP, IF(JEP_RSL.isValidInfo AND JEP_RSL.RSL > Constants_Score.DNAv2HighwaySpeedThreshold, JEP_RSL.Duration, 0));\\n\\tREAL4 V2AvgSpeedHighway := 0;\\n\\t END;
\\n\\nThe above gave the same error as before. I had added the RSLGT0Duration to it. Out of curiosity I changed the order since the RSLGT0Duration is the same as the denominator in RSLPercent. I moved RSLGT0Duration to the end. To my surprise the code worked. \\n Intermediate_DNAv2_Rec := RECORD\\n\\t JEP_RSL.JourneyID;\\n\\tREAL4 RSLPercent := SUM(GROUP, IF(JEP_RSL.RSL > 0, JEP_RSL.Duration, 0)) / SUM(GROUP, JEP_RSL.Duration);\\n\\tREAL4 V2MetersDistanceHighway := SUM(GROUP, IF(JEP_RSL.isValidInfo AND JEP_RSL.RSL > Constants_Score.DNAv2HighwaySpeedThreshold, JEP_RSL.Distance, [code][/code]0));\\n\\tREAL4 V2SecondsDurationHighway := SUM(GROUP, IF(JEP_RSL.isValidInfo AND JEP_RSL.RSL > Constants_Score.DNAv2HighwaySpeedThreshold, JEP_RSL.Duration, 0));\\n\\tREAL4 V2AvgSpeedHighway := 0;\\n REAL4 RSLGT0Duration := SUM(GROUP, IF(JEP_RSL.RSL > 0, JEP_RSL.Duration, 0));\\n\\t END;
\\n\\nSo I am wondering if the problem in ECL code has anything to with the order of the rows in the table. It did not work, and then it did. \\n\\nI have also put these comments in the Jira ticket: HPCCHPCC-17213 \\nTable is not able to Perform Ratio\", \"post_time\": \"2017-03-28 19:00:40\" },\n\t{ \"post_id\": 15873, \"topic_id\": 3953, \"forum_id\": 10, \"post_subject\": \"Re: Table Failure\", \"username\": \"tlhumphrey2\", \"post_text\": \"thanks you\", \"post_time\": \"2017-03-14 20:25:28\" },\n\t{ \"post_id\": 15863, \"topic_id\": 3953, \"forum_id\": 10, \"post_subject\": \"Re: Table Failure\", \"username\": \"georgeb2d\", \"post_text\": \"Jira ticket created.\", \"post_time\": \"2017-03-14 19:44:11\" },\n\t{ \"post_id\": 15853, \"topic_id\": 3953, \"forum_id\": 10, \"post_subject\": \"Re: Table Failure\", \"username\": \"tlhumphrey2\", \"post_text\": \"Go to http://track.hpccsystems.com. Create a user's account. Click the "Create issue" button and follow instructions.\", \"post_time\": \"2017-03-14 18:50:59\" },\n\t{ \"post_id\": 15843, \"topic_id\": 3953, \"forum_id\": 10, \"post_subject\": \"Re: Table Failure\", \"username\": \"georgeb2d\", \"post_text\": \"Thanks. I am not quite sure how to submit a Jira report. Are there instructions?\", \"post_time\": \"2017-03-14 16:55:12\" },\n\t{ \"post_id\": 15833, \"topic_id\": 3953, \"forum_id\": 10, \"post_subject\": \"Re: Table Failure\", \"username\": \"tlhumphrey2\", \"post_text\": \"Your work-around is probably the best thing to do. You are getting an internal error that you shouldn't be getting, i.e. there is a bug in platform code.\\n\\nWould you write a jira report on this so someone from our platform team can fix the problem.\", \"post_time\": \"2017-03-14 16:53:26\" },\n\t{ \"post_id\": 15813, \"topic_id\": 3953, \"forum_id\": 10, \"post_subject\": \"Table Failure\", \"username\": \"georgeb2d\", \"post_text\": \"I am attempting to build a crosstab report table and must have something wrong. \\nI am getting this error:\\nError: assert(left->queryType() == right->queryType()) failed - file: /mnt/disk1/jenkins/workspace/LN-Candidate-with-Plugins-6.2.10-rc1/LN/centos-6.0-x86_64/HPCC-Platform/ecl/hqlcpp/hqlcpp.cpp, line 7370 (19, 29), 3000, \\n\\nHere a code snippet:\\n\\n Intermediate_DNAv2_Rec := RECORD\\n\\t JEP_RSL.JourneyID;\\n\\t\\t\\t\\t REAL4 RSLPercent := SUM(GROUP, IF(JEP_RSL.RSL > 0, JEP_RSL.Duration, 0))/ SUM(GROUP, JEP_RSL.Duration);\\n\\t\\t\\t\\t REAL4 DistanceHighwayRoadsCleanMeter := SUM(GROUP, IF(JEP_RSL.isValidInfo AND JEP_RSL.RSL > Constants_Score.DNAv2HighwaySpeedThreshold, JEP_RSL.Distance, 0));\\n\\t\\t\\t\\t REAL4 AvgSpeedHighwayRoads := SUM(GROUP, IF(JEP_RSL.isValidInfo AND JEP_RSL.RSL > Constants_Score.DNAv2HighwaySpeedThreshold, JEP_RSL.Distance, 0))/ SUM(GROUP, IF(JEP_RSL.isValidInfo AND JEP_RSL.RSL > Constants_Score.DNAv2HighwaySpeedThreshold, JEP_RSL.Duration, 0));\\n\\t\\t\\t\\t REAL4 AvgSpeedHighwayRoadsTop := SUM(GROUP, IF(JEP_RSL.isValidInfo AND JEP_RSL.RSL > Constants_Score.DNAv2HighwaySpeedThreshold, JEP_RSL.Distance, 0));\\n\\t\\t\\t\\t REAL4 AvgSpeedHighwayRoadsBottom := SUM(GROUP, IF(JEP_RSL.isValidInfo AND JEP_RSL.RSL > Constants_Score.DNAv2HighwaySpeedThreshold, JEP_RSL.Duration, 0));\\n\\t END;\\n\\t Intermediate_DNAv2_Tab := TABLE(JEP_RSL, Intermediate_DNAv2_Rec, JourneyID, LOCAL);\\n
\\n\\nThe compile gives no errors. If I comment out the line with AvgSpeedHighwayRoads the code runs. Otherwise I get the error above. AvgSpeedHighwayRoads is the ratio AvgSpeedHighwayRoadsTop/ AvgSpeedHighwayRoadsBottom. So I am assuming I am violating some rule in ECL. \\nHowever a similar ratio is RSLPercent and that is working. The only change I can see is that it does not have an IF statement in the numerator. I guess I can build the table first without AvgSpeedHighwayRoads and then do a PROJECT to get the final results but my preference is to get this working. \\n\\nThanks for your assistance.\", \"post_time\": \"2017-03-14 15:20:29\" },\n\t{ \"post_id\": 16713, \"topic_id\": 3993, \"forum_id\": 10, \"post_subject\": \"Re: MySql connections staying open but not reused\", \"username\": \"NP\", \"post_text\": \"Hi Richard,\\n\\nThanks for looking into this. \\n\\nSorry, a bit late, but - \\n\\n[quote="richardkchapman":2a578ubm]Are you running against Thor? Roxie? Eclagent? I was running this on THOR when I spotted the problem.\\n\\n[quote="richardkchapman":2a578ubm]What version of the HPCC platform are you running? 6.2.2\\n\\n[quote="richardkchapman":2a578ubm]and that thor is leaking MySQL connections when a workunit is unloaded. I thought it might be something like that.\\n\\nYou should be able to work around the issue by requesting that the MySQL connection is not cached by adding CACHE(0) to the options list on the EMBED statement.
thanks for the workaround! I tried a few things myself, but could not find one that worked. Our workaround was to get mysql to kill off connections after 5 minutes \\n\\n[quote="richardkchapman":2a578ubm](and thanks for the report - I wish all problem reports were as precise as this, with simple steps to reproduce!) Glad it was helpful\", \"post_time\": \"2017-04-21 09:17:29\" },\n\t{ \"post_id\": 16673, \"topic_id\": 3993, \"forum_id\": 10, \"post_subject\": \"Re: MySql connections staying open but not reused\", \"username\": \"richardkchapman\", \"post_text\": \"(and thanks for the report - I wish all problem reports were as precise as this, with simple steps to reproduce!)\", \"post_time\": \"2017-04-20 14:58:08\" },\n\t{ \"post_id\": 16663, \"topic_id\": 3993, \"forum_id\": 10, \"post_subject\": \"Re: MySql connections staying open but not reused\", \"username\": \"richardkchapman\", \"post_text\": \"A little experimentation confirms that the difference between your scenarios is that one will hit thor while the other does not - and that thor is leaking MySQL connections when a workunit is unloaded.\\n\\nI have raised a ticket at https://track.hpccsystems.com/browse/HPCC-17450\\n\\nYou should be able to work around the issue by requesting that the MySQL connection is not cached by adding CACHE(0) to the options list on the EMBED statement.\", \"post_time\": \"2017-04-20 14:57:07\" },\n\t{ \"post_id\": 16653, \"topic_id\": 3993, \"forum_id\": 10, \"post_subject\": \"Re: MySql connections staying open but not reused\", \"username\": \"richardkchapman\", \"post_text\": \"My guess would be that the addition of the extra DATASET statement pushes the query from being simple enough to run purely on eclagent (which will terminate at the end of each run and free the connections) to running on thor (which stays loaded and should try to cache the connections for reuse). But I'd have to try to reproduce it to be sure (and the information I requested would be useful for that)\", \"post_time\": \"2017-04-20 14:14:55\" },\n\t{ \"post_id\": 16643, \"topic_id\": 3993, \"forum_id\": 10, \"post_subject\": \"Re: MySql connections staying open but not reused\", \"username\": \"richardkchapman\", \"post_text\": \"Can you provide a few details:\\n\\nWhat version of the HPCC platform are you running?\\n\\nAre you running against Thor? Roxie? Eclagent?\\n\\nThe MySQL plugin will attempt to cache connections to be reused by future queries, and it may be that what you are observing is related to that - though if the number is continuing to rise then it suggests something is not working as intended.\", \"post_time\": \"2017-04-20 14:12:28\" },\n\t{ \"post_id\": 16003, \"topic_id\": 3993, \"forum_id\": 10, \"post_subject\": \"MySql connections staying open but not reused\", \"username\": \"NP\", \"post_text\": \"Given a simple table\\n\\n
CREATE TABLE TestTable (id int);
\\nwith 10 records, if I run \\n\\n\\nIMPORT MySQL;\\n\\nTestLayout := RECORD\\n INTEGER Id;\\nEND;\\n\\nDATASET(TestLayout) LoadTestTable(INTEGER minLimit = 0) := EMBED(mySql : server('myserver'), database('mydb'), user('myuser'), password('mypass'), port('myport'))\\n SELECT ID FROM TestTable WHERE ID > ?;\\nENDEMBED;\\n\\nLoadTestTable();\\nLoadTestTable(5);
\\nthe db connection gets closed (SHOW PROCESSLIST in MySql does not show any connections in sleep state).\\n\\nIf I just append \\n\\n\\nDATASET(\\n [\\n {'A', 1}, \\n {'B', 2}, \\n {'A', 3}, \\n {'C', 4}, \\n {'B', 5} \\n ], \\n {STRING S, INTEGER I}\\n);
\\nat the end of that code, and run it all, the connection stays in the sleep state. Every time I run the code, a new connection is opened and remains open until the timeout (default 8h on MySql). After a while "mysql: Failed to connect (Too many connections)" happens. \\n\\nI would expect the connection to be closed every time when the query in the embed is completed, or at least to be reused if it is kept open. As a workaround we've reduced the wait-timeout on MySql to 5min, but I would like to understand this behaviour better. \\n\\nSo, am I doing something wrong here, is there a bug causing connections to sometimes stay open, or is this expected behaviour (and if so, what are the rules)?\", \"post_time\": \"2017-03-24 13:56:15\" },\n\t{ \"post_id\": 16093, \"topic_id\": 4053, \"forum_id\": 10, \"post_subject\": \"Re: Publish Roxie Query from local GIT using local ECL Impor\", \"username\": \"afarrell\", \"post_text\": \"I may have resolved this... with the use of "-legacy" and "-L" and "-I"\\n\\nthis command will run one dir up from the project root (hence the ../) \\n\\necl publish roxie_cluster_name myservice.ecl -L../ -I../ -v --name="services.myservice" -s $server -u $user -pw $password -legacy
\", \"post_time\": \"2017-03-29 12:40:02\" },\n\t{ \"post_id\": 16073, \"topic_id\": 4053, \"forum_id\": 10, \"post_subject\": \"Publish Roxie Query from local GIT using local ECL Imports\", \"username\": \"afarrell\", \"post_text\": \"Hi,\\n\\nWe have transitioned from using the legacy MySQL based ECL Attribute repository to using GIT. This has been working fine for all our THOR work provided that we have added the “--legacy” argument as configuration to our compiler. In this case all our local ECL imports are working for each THOR workunit we run…. That the good.\\n\\nBut the bad…\\n\\nWe are not having the same luck with roxie. Within the ECL IDE, we have been unable to publish our roxie services while pointed to our local GIT repository. The compiler cannot find and resolve and ECL imports, the “--legacy” option is rejected by the compiler when added as an a compiler argument.\\n\\nWe have not had any joy using the “ecl publish” command at the command line while pointed to our local GIT repository. Is there any way to send an ecl file to be published from the local directory that also resolves all ECL imports?\\n\\nThanks,\\n\\n-A\", \"post_time\": \"2017-03-28 16:05:44\" },\n\t{ \"post_id\": 16513, \"topic_id\": 4133, \"forum_id\": 10, \"post_subject\": \"Re: String Replace\", \"username\": \"rtaylor\", \"post_text\": \"Vishnu,\\n\\nUse OUTPUT to write it to a new dataset. Alternatively, you can use the ALL option on OUTPUT to view all the result records (if its < 10Mb of data total).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-04-13 23:14:40\" },\n\t{ \"post_id\": 16503, \"topic_id\": 4133, \"forum_id\": 10, \"post_subject\": \"Re: String Replace\", \"username\": \"vchinta\", \"post_text\": \"Richard,\\n\\nLastly, how do I get all the resultant rows as I get only 100 rows in the ECL IDE results?\\n\\nVishnu\", \"post_time\": \"2017-04-13 22:27:40\" },\n\t{ \"post_id\": 16483, \"topic_id\": 4133, \"forum_id\": 10, \"post_subject\": \"Re: String Replace\", \"username\": \"rtaylor\", \"post_text\": \"Vishnu,\\n\\nI'm still not clear on exactly what your result should be, so here are my two guesses (choose whichever):IMPORT Std;\\ns := '56558566:Duval+Street:Suburb,2432044:Key+West:Town,12587846:Monroe:County,2347568:Florida:State,12772085:33040:Zip,23424977:United+States:Country,56043648:America%2FNew_York:Timezone';\\nrec := {STRING field1,STRING field2};\\nds := DATASET([{'',s}],rec);\\nds;\\n\\nPROJECT(ds,TRANSFORM(rec,\\n ZipPos := Std.Str.Find(LEFT.Field2,'Zip');\\n SELF.Field1 := '';\\n SELF.Field2 := LEFT.Field2[ZipPos-6..ZipPos-2])); \\n\\nPROJECT(ds,TRANSFORM(rec,\\n ZipPos := Std.Str.Find(LEFT.Field2,'Zip');\\n SELF.Field2 := LEFT.Field2;\\n SELF.Field1 := LEFT.Field2[ZipPos-6..ZipPos-2]));
\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-04-13 17:15:13\" },\n\t{ \"post_id\": 16473, \"topic_id\": 4133, \"forum_id\": 10, \"post_subject\": \"Re: String Replace\", \"username\": \"vchinta\", \"post_text\": \"Richard,\\n\\nIf your intent is just to parse out the zip value
\\nYes, this is what I'm trying to do but then I want to replace the data in the field with the zip for all records.\\n\\nMy dataset is defined as\\n\\nExport USImages:= RECORD\\n STRING field1;\\n STRING field2;\\nEND;\\n
\\nwhere field2 has the data I'm interested in and this is what I tried\\n\\nResultsTable := USImages(STD.Str.FindReplace(field2,field2,field2[USImages(STD.Str.Find(USImages.field2,'Zip',1)-6)..(STD.Str.Find(USImages.field2,'Zip',1)-2)]));\\n
\\nWould be really thankful if you could fix this piece of code.\\n\\nThanks,\\nVishnu\", \"post_time\": \"2017-04-13 15:33:14\" },\n\t{ \"post_id\": 16461, \"topic_id\": 4133, \"forum_id\": 10, \"post_subject\": \"Re: String Replace\", \"username\": \"rtaylor\", \"post_text\": \"Vishnu,\\n\\nIf your intent is just to parse out the zip value, you can do it like this:IMPORT Std;\\ns := '56558566:Duval+Street:Suburb,2432044:Key+West:Town,12587846:Monroe:County,2347568:Florida:State,12772085:33040:Zip,23424977:United+States:Country,56043648:America%2FNew_York:Timezone';\\n\\nZipPos := Std.Str.Find(s,'Zip');\\ns[ZipPos-6..ZipPos-2]; //only extracting zip
You can just use the Std.Str.Find() function to find where the "zip" position is, then use ECL's string slicing syntax to "slice out" just the value you're looking for. In this case, the 5-digit zip code. \\n\\nBut if what you really need is to parse out all the key/value pairs in the string along with their data ids (that's what the data in the string looks like to me), then you can do it like this:IMPORT Std;\\ns := '56558566:Duval+Street:Suburb,2432044:Key+West:Town,12587846:Monroe:County,2347568:Florida:State,12772085:33040:Zip,23424977:United+States:Country,56043648:America%2FNew_York:Timezone';\\n\\nSlen := LENGTH(s);\\nds := DATASET([{s}],{STRING line}); //define the string as a 1-record dataset\\n\\n//then find all the commas and their positions, using Form 1 of NORMALIZE\\n//and filtering for only the resulting comma records\\nN := NORMALIZE(ds,Slen,\\n TRANSFORM({UNSIGNED LastPos,UNSIGNED Pos,BOOLEAN IsComma}, \\n SELF.LastPos := 0,\\n SELF.Pos := COUNTER,\\n SELF.IsComma := s[COUNTER] = ','))(IsComma=TRUE);\\n\\n//use ITERATE to fill in the previous element ending positions\\nElements := ITERATE(N,TRANSFORM(RECORDOF(N),\\n SELF.LastPos := LEFT.Pos,\\n SELF := RIGHT));\\n\\n//we don't want to forget about the last element in the string\\nLastRow := ROW({s[Elements[COUNT(Elements)].Pos+1..]},RECORDOF(ds));\\n\\n//put each element into separate records in a single recordset\\nElementRecs := PROJECT(Elements,\\n TRANSFORM(RECORDOF(ds),\\n SELF.line := s[LEFT.LastPos+1 .. LEFT.Pos-1])) \\n + LastRow; //appending the last row to the PROJECT result \\n\\n//then parse out the data values for each, just using string slicing again\\nOutRec := RECORD\\n STRING ElementID;\\n STRING ElementKey;\\n STRING ElementVal;\\nEND;\\n\\nOutRec XF(ElementRecs L) := TRANSFORM\\n\\tPos1 := Std.Str.Find(L.line,':',1);\\n\\tPos2 := Std.Str.Find(L.line,':',2);\\n\\tSELF.ElementID := L.line[1..Pos1-1];\\n\\tSELF.ElementKey := L.line[Pos2+1..];\\n\\tSELF.ElementVal := L.line[Pos1+1..Pos2-1];\\nEND;\\t\\nP1 := PROJECT(ElementRecs,XF(LEFT));\\nP1;
This is another exercise in "string slicing" using NORMALIZE, ITERATE, and PROJECT to produce the desired result.\\n\\nAnd, of course, if you need that to be a function to handle any like-formatted string, then the code can look like this:IMPORT Std;\\nSplitFormattedString(STRING s) := FUNCTION\\n rec := {STRING line};\\n N := NORMALIZE(DATASET([{s}],rec),LENGTH(s),\\n TRANSFORM({UNSIGNED Lpos,UNSIGNED Pos,BOOLEAN IsComma}, \\n SELF.Lpos := 0,\\n SELF.Pos := COUNTER,\\n SELF.IsComma := s[COUNTER] = ','))(IsComma=TRUE);\\n Elements := ITERATE(N,TRANSFORM(RECORDOF(N),\\n SELF.Lpos := LEFT.Pos,\\n SELF := RIGHT));\\n LastRow := ROW({s[Elements[COUNT(Elements)].Pos+1..]},rec); \\n ElementRecs := PROJECT(Elements,\\n TRANSFORM(rec,\\n SELF.line := s[LEFT.Lpos+1 .. LEFT.Pos-1])) \\n + LastRow;\\n OutRec := RECORD\\n STRING ElementID;\\n STRING ElementKey;\\n STRING ElementVal;\\n END;\\n OutRec XF(ElementRecs L) := TRANSFORM\\n Pos1 := Std.Str.Find(L.line,':',1);\\n Pos2 := Std.Str.Find(L.line,':',2);\\n SELF.ElementID := L.line[1..Pos1-1];\\n SELF.ElementKey := L.line[Pos2+1..];\\n SELF.ElementVal := L.line[Pos1+1..Pos2-1];\\n END; \\n RETURN PROJECT(ElementRecs,XF(LEFT));\\nEND;\\n\\nMyStr := '56558566:Duval+Street:Suburb,2432044:Key+West:Town,12587846:Monroe:County,2347568:Florida:State,12772085:33040:Zip,23424977:United+States:Country,56043648:America%2FNew_York:Timezone';\\nSplitFormattedString(MyStr);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-04-13 00:22:46\" },\n\t{ \"post_id\": 16413, \"topic_id\": 4133, \"forum_id\": 10, \"post_subject\": \"Re: String Replace\", \"username\": \"vchinta\", \"post_text\": \"Richard,\\n\\nEdited the original message.\\n\\nVishnu\", \"post_time\": \"2017-04-12 18:43:08\" },\n\t{ \"post_id\": 16403, \"topic_id\": 4133, \"forum_id\": 10, \"post_subject\": \"Re: String Replace\", \"username\": \"rtaylor\", \"post_text\": \"Vishnu,\\n\\nYour example string got truncated. \\n\\nPlease re-post (it would be best to put it inside a code section using the "Code" button on the editing window).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-04-12 18:41:40\" },\n\t{ \"post_id\": 16323, \"topic_id\": 4133, \"forum_id\": 10, \"post_subject\": \"String Replace\", \"username\": \"vchinta\", \"post_text\": \"I have a Dataset with string Field which looks like this:\\n56558566:Duval+Street:Suburb,2432044:Key+West:Town,12587846:Monroe:County,2347568:Florida:State,12772085:33040:Zip,23424977:United+States:Country,56043648:America%2FNew_York:Timezone
\\n\\nHow do I replace the contents of this field with 5 characters before ":Zip"(33040 in this case)? I tried to find the index of the occurrence of 'Zip' and subtract 6 from it to get to 33040 and then use FindReplace but ran into some syntax errors.\\n\\nThanks in Advance.\\n\\nVishnu\", \"post_time\": \"2017-04-11 22:58:16\" },\n\t{ \"post_id\": 16633, \"topic_id\": 4163, \"forum_id\": 10, \"post_subject\": \"Re: can an OUTPUT of a CSV file be compressed?\", \"username\": \"rtaylor\", \"post_text\": \"Steve,\\n\\nThe OUTPUT action does have a COMPRESSED option when producing flat files; https://hpccsystems.com/download/documentation/ecl-language-reference/html/OUTPUT_Thor-Flat_Files.html \\n\\nAnd although COMPRESSED is not documented for CSV files it is valid for use. https://hpccsystems.com/download/documentation/ecl-language-reference/html/OUTPUT_CSV_Files.html\\n\\nThis example shows how:SomeFile1 := DATASET([{1,'A'},{1,'B'},{1,'C'},{1,'D'},{1,'E'},\\n {1,'F'},{1,'G'},{1,'H'},{1,'I'},{1,'J'}],\\n\\t\\t\\t\\t\\t{INTEGER1 number,STRING1 Letter});\\n\\nWriteFile := OUTPUT(SomeFile1,,'~RTTEST::csv::compresstest1',CSV,COMPRESSED);\\n\\nCfile := DATASET('~RTTEST::csv::compresstest1',\\n {INTEGER1 number,STRING1 Letter},CSV,__COMPRESSED__);\\nSEQUENTIAL(WriteFile,OUTPUT(Cfile));
The __COMPRESSED__ option on a CSV DATASET is also undocumented. These doc issues have been reported in JIRA: https://track.hpccsystems.com/browse/HPCC-17445\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-04-19 20:00:26\" },\n\t{ \"post_id\": 16623, \"topic_id\": 4163, \"forum_id\": 10, \"post_subject\": \"can an OUTPUT of a CSV file be compressed?\", \"username\": \"stephenj2585\", \"post_text\": \"I have an OUTPUT statement writing a CSV file. Can that also include a __COMPRESSED__ STATEMENT?\\n\\nThanks!\\n\\nSteve Jones\", \"post_time\": \"2017-04-19 17:32:59\" },\n\t{ \"post_id\": 16763, \"topic_id\": 4193, \"forum_id\": 10, \"post_subject\": \"Re: Trying to use STD.File.DeSpray\", \"username\": \"rtaylor\", \"post_text\": \"Katy,\\n\\nAt first glance your problem appears to me to be that you've inappropriately included data types for each parameter in your call to your DeSpray_LZ() function. Therefore, the compiler thinks you're trying to do something other than just call your function with those parameters. You've "confused" it. \\n\\nTry removing those, making your function call look like this:
DeSpray_LZ( LogicalFileName, '/data', outputFileName, 'barbara.norman@lexisnexisrisk.com', 'http://landingzone.telematics.lexisnexis.eu');
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-04-21 14:09:30\" },\n\t{ \"post_id\": 16753, \"topic_id\": 4193, \"forum_id\": 10, \"post_subject\": \"Trying to use STD.File.DeSpray\", \"username\": \"KatyChow\", \"post_text\": \"Hi there!\\n\\nI'm trying to us the STD.File.Despray call, but somehow I keep getting the following error.\\n\\nError: syntax error near "LogicalFileName" : expected :=\\n\\nHere is some pseudo code that I've snipped and put together. \\n/* Importing Libraries */\\nImport TelematicsConsolidation;\\nIMPORT lib_stringlib;\\nIMPORT ut;\\nIMPORT Std;\\nIMPORT TelematicsV2_Services;\\n\\n/* Define Today's Date & Other Constants*/\\nTODAY := (integer)StringLib.GetDateYYYYMMDD();\\nclientsrcrid := 10411; \\nstartdate :='2016-08-01'; \\nenddate := '2017-03-01';\\nyourinitials := 'BN';\\nclientname := 'MapfreSpain';\\n\\noutputFileName := 'telematics_' + clientname + '_EnhancedPulses_' + (string) TODAY + '.csv';\\nLogicalFileName := '~telematics::' + yourinitials +'::'+ clientname + '::EnhancedPulses::' + (string) TODAY + '.csv';\\n// LogicalFileName;\\n\\nJourneys := TelematicsConsolidation.Files_Journey.DS_ALL_JOURNEY;\\n\\noutput(journeys,,LogicalFileName,CSV(Heading(single),separator('|')),overwrite);\\n\\nDeSpray_LZ(string inFileName, string outDir, string outFileName, string email, string ip) := Function\\n\\noutName := outDir + outFileName;\\nDesprayOutFL := STD.File.DeSpray(inFileName,\\n ip,\\n outName, \\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t -1,\\n 'http://10.245.36.43:8010/FileSpray', \\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t , \\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t TRUE);\\t\\n\\t\\t\\nEmailText := inFileName + ' has been desprayed to the LZ.' + '\\\\n\\\\n Label: ' + outName;\\t\\t\\t\\t\\t\\t\\t\\t\\nSendEmail := STD.System.Email.SendEmail(email, //email to\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t'Despray completed',\\t\\t\\t\\t //subject \\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tEmailText);\\t\\t\\t\\t //email body\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\nRETURN sequential(DesprayOutFL, SendEmail);\\nEND;\\n// missing outDirectory & ip address for LZ\\nDeSpray_LZ(string LogicalFileName, string '/data', string outputFileName, string 'barbara.norman@lexisnexisrisk.com', string 'http://landingzone.telematics.lexisnexis.eu');
\\n\\nI don't really understand what that first input for the despray is asking for if it is not what I have named my logical file? \\n\\nAny advice would be greatly appreciated!\\n\\nThank you!\\n\\nKaty\", \"post_time\": \"2017-04-21 13:59:11\" },\n\t{ \"post_id\": 16851, \"topic_id\": 4201, \"forum_id\": 10, \"post_subject\": \"Re: STORED causing every path in CASE to evaluate\", \"username\": \"NP\", \"post_text\": \"Thanks Richard.\\n\\n[quote="rtaylor":3pp37g5t]Perfect candidate for a JIRA issue.\\n\\nI wanted to check here first if there was something I messed up in the code (for instance, "well of course you shouldn't use STORED on THOR" ) \\n\\nI will create a JIRA issue.\\n\\nThanks\\n\\nNP\", \"post_time\": \"2017-04-25 15:09:18\" },\n\t{ \"post_id\": 16841, \"topic_id\": 4201, \"forum_id\": 10, \"post_subject\": \"Re: STORED causing every path in CASE to evaluate\", \"username\": \"rtaylor\", \"post_text\": \"NP,\\n\\nPerfect candidate for a JIRA issue.
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-04-25 14:43:08\" },\n\t{ \"post_id\": 16821, \"topic_id\": 4201, \"forum_id\": 10, \"post_subject\": \"STORED causing every path in CASE to evaluate\", \"username\": \"NP\", \"post_text\": \"If I run the following code, on thor or hthor, on v6.2.2-1, \\n\\n
\\nCaseVar := 'A' /*: STORED('CaseVar')*/;\\n\\nMyLayout := RECORD\\n STRING col1;\\nEND;\\n\\nds1 := DATASET([{'DS1'}], MyLayout);\\nds2 := DATASET([{'DS2'}], MyLayout);\\n\\nLOCAL Result := CASE(CaseVar,\\n 'A' => ds1,\\n 'B' => ds2,\\n ds1\\n );\\n\\nResult; \\n
\\n\\nwhen I check the graph, I can see it will evaluate just one branch in CASE, the one for 'A' (see Graph1.jpg).\\n\\n[attachment=1:3byllssi]Graph1.JPG\\n\\nOn the other hand, if I uncomment the \\n\\n: STORED('CaseVar')
\\nin line 1, and run it again, it will evaluate all branches of the CASE (see Graph2.jpg).\\n\\n[attachment=0:3byllssi]Graph2.jpg\\n\\nIn a realistic scenario, when I execute a file like this form jenkins and pass in a value for CaseVar, it will still evaluate the right hand side of every branch in the case. For a CASE with 10 branches (each calling a different function, manipulating lots of data to produce the result dataset), the whole query becomes quite slow since it is doing 10x more work than it should.\\n\\nIs there something wrong with this code, is there something wrong with CASE, or is this expected behaviour (and if so, what is a better approach, so that I don't execute all the branches unnecessarily)? \\n\\nMany thanks,\\n\\nNP\", \"post_time\": \"2017-04-24 15:37:02\" },\n\t{ \"post_id\": 17061, \"topic_id\": 4221, \"forum_id\": 10, \"post_subject\": \"Re: UNSORTED AND ORDERED in Table\", \"username\": \"georgeb2d\", \"post_text\": \"Thanks.\", \"post_time\": \"2017-05-11 17:44:59\" },\n\t{ \"post_id\": 17051, \"topic_id\": 4221, \"forum_id\": 10, \"post_subject\": \"Re: UNSORTED AND ORDERED in Table\", \"username\": \"bforeman\", \"post_text\": \"https://track.hpccsystems.com/browse/HPCC-17613\", \"post_time\": \"2017-05-11 17:26:14\" },\n\t{ \"post_id\": 16981, \"topic_id\": 4221, \"forum_id\": 10, \"post_subject\": \"UNSORTED AND ORDERED in Table\", \"username\": \"georgeb2d\", \"post_text\": \"I am not quite sure what these do. Would I be concerned more if I had sorted the dataset before running the table? If I have no sort should I always run UNSORTED for better performance? \\n\\nUNSORTED says that I don't care about the order of the groups. If I leave it out does that mean the GROUPS are sorted? How are they sorted? If I doing the table based on a field, does it sort it alphabetically? \\n\\nORDERED/UNORDERED What order am I concerned about? Usually I am running a table on a dataset with a grouping based on a field. Should I always run it UNORDERED? \\n\\nSimilar questions about STABLE/UNSTABLE. \\n\\nIs there more documentation on TABLE besides the Language reference? Having come from a C/C++ background, I am also curious about numthreads but have not really seen any helpful documentation.\", \"post_time\": \"2017-05-08 17:56:20\" },\n\t{ \"post_id\": 17133, \"topic_id\": 4261, \"forum_id\": 10, \"post_subject\": \"Re: DISTRIBUTE, JOIN & Co.\", \"username\": \"rtaylor\", \"post_text\": \"Luke, \\n\\nSo you can just replace the ROLLUP with TABLE and be GTG.\\n\\nRichard\", \"post_time\": \"2017-05-19 18:22:33\" },\n\t{ \"post_id\": 17131, \"topic_id\": 4261, \"forum_id\": 10, \"post_subject\": \"Re: DISTRIBUTE, JOIN & Co.\", \"username\": \"lpezet\", \"post_text\": \"Hi Richard!\\n\\nIn my actual code the JOIN will do some tests (test the child is within some parameters of the parent).\\nThe TABLE would work thereafter but my JOIN now returns the RIGHT record (products here) so the ROLLUP kinda does an "update" (filling in fields like amounts, counts, etc. while keeping the rest unchanged). \\nIf I were to use a TABLE, I'd end up JOINing again to get the rest of the fields, either here or later on when creating reports.\\n\\nNot sure if that makes sense...\", \"post_time\": \"2017-05-19 14:54:03\" },\n\t{ \"post_id\": 17121, \"topic_id\": 4261, \"forum_id\": 10, \"post_subject\": \"Re: DISTRIBUTE, JOIN & Co.\", \"username\": \"rtaylor\", \"post_text\": \"Luke,\\n\\nYou are correct -- that line of your ROLLUP transform code was more appropriate for an ITERATE.\\n\\nBut my question still remains -- why not just do the two TABLE functions I used?\\n\\nRichard\", \"post_time\": \"2017-05-19 11:35:02\" },\n\t{ \"post_id\": 17111, \"topic_id\": 4261, \"forum_id\": 10, \"post_subject\": \"Re: DISTRIBUTE, JOIN & Co.\", \"username\": \"lpezet\", \"post_text\": \"[quote="rtaylor":8iqhq8a3] but I did narrow it down to the fact that the Process_all() function is generating different data values into the paid_amount field of the SaleSummaryDS with each pass.\\n\\n\\nThat's exactly my problem indeed. I simplified a lot of the processing to provide this code (like more logic in the JOIN based on pair of records, then OR-ing some booleans in the ROLLUP, etc.) but it does replicate the problem I'm having.\\n\\nI think I figured it out. The ROLLUP in this code would not yield the expected result because of this:\\n\\nSELF.product_paid_amount := LEFT.product_paid_amount + RIGHT.paid_amount;\\n
\\nReason being (correct me if I'm wrong) is that it would miss the paid_amount from the LEFT record of the very first pair in the series.\\nAnd this is what makes it all random in the end. I sort by episode_id but those paid_amount might come in different order between runs.\\nAn easy test is to store $1.0 for paid_amount for all sales and realize the total_product_paid_amount in the end is $5,000,000 (first sale of each product) short of $50,000,000 (5M products * $1.0 * 10 sales).\\n\\n\\nSorry for the trouble...\", \"post_time\": \"2017-05-19 01:37:19\" },\n\t{ \"post_id\": 17101, \"topic_id\": 4261, \"forum_id\": 10, \"post_subject\": \"Re: DISTRIBUTE, JOIN & Co.\", \"username\": \"rtaylor\", \"post_text\": \"Luke,\\n\\nNot sure why it's getting different results each time, but I did narrow it down to the fact that the Process_all() function is generating different data values into the paid_amount field of the SaleSummaryDS with each pass.\\n\\nBut, since your SaleDS file contains all the data you need, why not just do it this way:product_sale_layout := RECORD\\n STRING product_id;\\n STRING sale_id;\\n STRING seller_id;\\n DECIMAL14_2 paid_amount;\\nEND;\\n\\nSaleDS := DATASET( 'test::sales', product_sale_layout, THOR );\\nT1 := TABLE( SaleDS,\\n {product_id,DECIMAL20_2 product_paid_amount := SUM(GROUP, paid_amount)},\\n product_id);\\t\\t\\nTABLE( T1,{ \\n UNSIGNED total_products := COUNT(GROUP);\\n DECIMAL20_2 total_product_paid_amount := SUM(GROUP, product_paid_amount);\\n DECIMAL20_2 avg_product_paid_amount := AVE(GROUP, product_paid_amount);\\n DECIMAL20_2 min_product_paid_amount := MIN(GROUP, product_paid_amount);\\n DECIMAL20_2 max_product_paid_amount := MAX(GROUP, product_paid_amount);\\n DECIMAL20_2 sd_product_paid_amount := SQRT(VARIANCE(GROUP, product_paid_amount));\\n DECIMAL12_8 cv_product_paid_amount := SQRT(VARIANCE(GROUP, product_paid_amount))\\n / AVE(GROUP, product_paid_amount);\\n }, FEW);
This eliminates the "hoops" you're jumping through to produce the SaleSummaryDS file at all.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-05-18 19:36:39\" },\n\t{ \"post_id\": 17091, \"topic_id\": 4261, \"forum_id\": 10, \"post_subject\": \"DISTRIBUTE, JOIN & Co.\", \"username\": \"lpezet\", \"post_text\": \"I'm bumping into quite the quadratic conundrum here.\\nI'm on a quest to avoid SKEW LIMIT error messages of death and to that end I'm trying to use LOCAL as much as possible in my code. Which means I gotta DISTRIBUTE my data appropriately (correct me if I'm wrong). \\nI also very much like 100% utilization of my cluster as much as possible (there's nothing more frustrating than a 10-node/40 way cluster working only 2 nodes at a time over long periods of time).\\n\\nI attached an ECL script to demonstrate my problem.\\nInstructions are in the code but basically what it does is:\\n1. (generate_data()) generate 2 pieces of data (parent/child kinda thing)\\n2. (process_data()) join them, roll them up (to sum up values in children and report at parent level) and finally store the results\\n3. (report()) I then run simple TABLE on the results (pretty much a copy/paste of ML.FieldAggregates.Simple).\\n
\\nThe problem is that, as is, running process_all() (which is process_data()+report()) multiple times yields different results (I tried on 2 different clusters).\\nRunning report() only multiple times yields same results though.\\n\\nRight now it's hard for me to pinpoint where the problem is (or are?) but here are some tinkering I did to make it yield consistent results:\\n\\nA. Save products and sales (parents and children) with a DISTRIBUTE using the same field I'm using later on in the JOIN in process_all() (this is Step 7. in the instructions).\\nB. Give up DISTRIBUTE and LOCAL in process_data(). I can hear the SKEW-LIMIT monster crawling his way back to me already...\\n
\\n\\nNow the questions:\\nIf trick A. works, does that mean that the A_0 and A_1 DISTRIBUTE are not doing their job properly? Or is it something else down the (code) line?\\nWhat am I doing wrong and/or misunderstanding here?\\n\\nThank you for your help,\\nLuke.\\n\\n\\nPS: I ran Preflight Certification on the cluster I'm using (6.2.4-1) and everything checked out.\", \"post_time\": \"2017-05-17 16:38:05\" },\n\t{ \"post_id\": 17253, \"topic_id\": 4263, \"forum_id\": 10, \"post_subject\": \"Re: Preserving insignificant zeros in DECIMALn_m to STRING c\", \"username\": \"Allan\", \"post_text\": \"Thanks Richard\", \"post_time\": \"2017-05-30 21:41:08\" },\n\t{ \"post_id\": 17243, \"topic_id\": 4263, \"forum_id\": 10, \"post_subject\": \"Re: Preserving insignificant zeros in DECIMALn_m to STRING c\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nNot quite sure just how computationally intensive this solution might be, but here's another way to get it done:#WORKUNIT('name','Perserve trailing zeros');\\nIMPORT Std;\\nd := DATASET([{-3.00}\\n ,{-2.99}\\n ,{-1.40}\\n ,{-0.09}\\n ,{ 0.99}\\n ,{ 1.40}\\n ,{+2.98}\\n ,{+3.00}],{DECIMAL5_2 a});\\nTrailZero(STRING s, UNSIGNED1 decs) := FUNCTION\\n Zeroes := '000000000000000000000000000000000000000000';\\n InLen := LENGTH(s);\\n DotPos := Std.Str.Find(s,'.',1);\\n STRING RetStr := s + IF(DotPos=0,'.','') + Zeroes;\\n RetLen := IF(DotPos=0,InLen + Decs + 1,DotPos+Decs);\\n RETURN RetStr[1..RetLen];\\nEND;\\n\\nPROJECT(d,TRANSFORM({STRING a},SELF.a := TrailZero((STRING)LEFT.a,2)));
\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-05-30 17:41:58\" },\n\t{ \"post_id\": 17203, \"topic_id\": 4263, \"forum_id\": 10, \"post_subject\": \"Re: Preserving insignificant zeros in DECIMALn_m to STRING c\", \"username\": \"Allan\", \"post_text\": \"HI Richard,\\n\\nThanks for this, yes there is REALFORMAT, but my original question asked:\\nWhat's the least computational effort\\nUnless the compiler is doing something clever REALFORMAT involves setting up a call frame on the stack, passing parameters, doing a call, doing the work, cleaning up the call frame and returning the result. In addition the underlying C Compiler can't do any optimisation over a call.\\nFor such a simple job I was trying to avoid all that.\\n\\nYours\\nAllan\", \"post_time\": \"2017-05-30 12:26:20\" },\n\t{ \"post_id\": 17183, \"topic_id\": 4263, \"forum_id\": 10, \"post_subject\": \"Re: Preserving insignificant zeros in DECIMALn_m to STRING c\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nHow about using REALFORMAT(), like this:d := DATASET([{-3.00}\\n ,{-2.99}\\n ,{-1.40}\\n ,{-0.09}\\n ,{ 0.99}\\n ,{ 1.40}\\n ,{+2.98}\\n ,{+3.00}],{DECIMAL5_2 a});\\n\\nPROJECT(d,TRANSFORM({STRING a},SELF.a := TRIM(REALFORMAT(LEFT.a,20,2),LEFT)));\\n
\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-05-25 20:27:53\" },\n\t{ \"post_id\": 17173, \"topic_id\": 4263, \"forum_id\": 10, \"post_subject\": \"Re: Preserving insignificant zeros in DECIMALn_m to STRING c\", \"username\": \"Allan\", \"post_text\": \"Perhaps using FUNCTIONMACROs one can generalise for any precision?\", \"post_time\": \"2017-05-25 15:19:12\" },\n\t{ \"post_id\": 17163, \"topic_id\": 4263, \"forum_id\": 10, \"post_subject\": \"Re: Preserving insignificant zeros in DECIMALn_m to STRING c\", \"username\": \"Allan\", \"post_text\": \"Can someone/anyone improve on:\\n\\n#WORKUNIT('name','Perserve trailing zeros');\\nd := DATASET([{-3.00}\\n ,{-2.99}\\n ,{-1.40}\\n ,{-0.09}\\n ,{ 0.99}\\n ,{ 1.40}\\n ,{+2.98}\\n ,{+3.00}],{DECIMAL5_2 a});\\n \\n \\n{STRING b} Conv(RECORDOF(d) L) := TRANSFORM\\n neg := L.a < 0;\\n n := IF(neg,L.a-0.001,L.a+0.001);\\n SELF.b := IF(neg,(STRING5)n,(STRING4)n);\\nEND;\\n\\ne := PROJECT(d,Conv(LEFT));\\ne;\\n\\n{STRING b;\\n UNSIGNED len} GetLen(RECORDOF(e) L) := TRANSFORM\\n SELF.len := LENGTH(L.b);\\n SELF := L;\\nEND;\\n\\nPROJECT(e,GetLen(LEFT));\\n
\", \"post_time\": \"2017-05-25 15:10:42\" },\n\t{ \"post_id\": 17153, \"topic_id\": 4263, \"forum_id\": 10, \"post_subject\": \"Re: Preserving insignificant zeros in DECIMALn_m to STRING c\", \"username\": \"Allan\", \"post_text\": \"Actually if you know the sign and you know the target type, you can do:\\n\\nDECIMAL5_2 a := 1.40;\\nb := a+0.001;\\n(STRING4) b;\\n
\", \"post_time\": \"2017-05-25 14:01:26\" },\n\t{ \"post_id\": 17143, \"topic_id\": 4263, \"forum_id\": 10, \"post_subject\": \"Preserving insignificant zeros in DECIMALn_m to STRING conve\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nWhat's the least computational effort in converting a DECIMALn_m to STRING while preserving trailing zeros?\\n\\nDECIMAL5_3 a := -1.400;\\n(STRING) a;\\n
\\nfor the result to be -1.400\", \"post_time\": \"2017-05-25 13:57:05\" },\n\t{ \"post_id\": 17343, \"topic_id\": 4283, \"forum_id\": 10, \"post_subject\": \"Re: Picking up IDE Preferences Programmaticlly\", \"username\": \"Allan\", \"post_text\": \"Thanks Bob,\\n\\nI may open a ticket with the core team.\\n\\nYours\\nAllan\", \"post_time\": \"2017-06-08 11:51:17\" },\n\t{ \"post_id\": 17263, \"topic_id\": 4283, \"forum_id\": 10, \"post_subject\": \"Re: Picking up IDE Preferences Programmaticlly\", \"username\": \"bforeman\", \"post_text\": \"Hi Allan,\\n\\nInteresting question. If you treat the ECL IDE configuration file like an INI file you can extract the information you need from there I think. Checking with the developers for additional thoughts.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2017-05-31 14:07:44\" },\n\t{ \"post_id\": 17213, \"topic_id\": 4283, \"forum_id\": 10, \"post_subject\": \"Picking up IDE Preferences Programmaticlly\", \"username\": \"Allan\", \"post_text\": \"Hi,\\nI'm doing quite a few SOAP calls to WsWorkunits and WsAttributes, these require the appropriate URL to the HPCC service. The IDE has 'preferences' where all those lovely URL's for my session are held.\\n\\nIs it possible to access the 'preferences' from within ECL (or a pragma), so, when working from the IDE, I don't have to keep defining and passing URL's around the place. (for one thing it would save the 10 minutes I always seem to take in finding my typo in the URL I've used!)\\n\\nYours\\nAllan\", \"post_time\": \"2017-05-30 12:40:51\" },\n\t{ \"post_id\": 17333, \"topic_id\": 4313, \"forum_id\": 10, \"post_subject\": \"Re: Getting Error: Output( ) appears to be conext dependent\", \"username\": \"KatyChow\", \"post_text\": \"Thank you so much for explaining Richard!\", \"post_time\": \"2017-06-05 19:43:27\" },\n\t{ \"post_id\": 17323, \"topic_id\": 4313, \"forum_id\": 10, \"post_subject\": \"Re: Getting Error: Output( ) appears to be conext dependent\", \"username\": \"rtaylor\", \"post_text\": \"Katy,\\n\\nYour problem here is that you're using the GROUP form of ROLLUP (form 3) but your TRANSFORM function is written for the non-GROUP forms (forms 1 & 2). As the ROLLUP docs state:For form 3 of ROLLUP, the transform function must take at least two parameters: a LEFT record which must be in the same format as the recordset, and a ROWS(LEFT) whose format must be a DATASET(RECORDOF(recordset)) parameter. The format of the resulting record set may be different from the inputs.
Here's a simple example showing the difference (both ROLLUP forms shown in this example produce the same result):MyRec := RECORD\\n\\tSTRING1 Value1;\\n\\tSTRING1 Value2;\\n\\tUNSIGNED1 Value3;\\nEND;\\n\\nSomeFile := DATASET([{'C','G',1},\\n {'C','C',2},\\n {'C','A',2},\\n {'A','X',3},\\n {'B','G',4},\\t \\t\\t\\t\\t\\t \\n {'A','B',5}],MyRec);\\n\\nSortedTable := SORT(SomeFile,Value1);\\nOUTPUT(SortedTable);\\n\\n//The "usual" form of ROLLUP (form 1):\\nMyRec RollThem(MyRec L, MyRec R) := TRANSFORM\\n SELF.Value2 := MIN(L.Value2,R.Value2);\\n SELF.Value3 := MIN(L.Value3,R.Value3);\\n SELF := L;\\nEND;\\n\\nRolledUpRecs := ROLLUP(SortedTable,\\n LEFT.Value1 = RIGHT.Value1,\\n RollThem(LEFT,RIGHT));\\n\\nOUTPUT(RolledUpRecs );\\n\\n//GROUP form of ROLLUP (form 3):\\nGrecs := GROUP(SortedTable,Value1); //note this uses the SORTed recordset\\n\\nMyRec RollThem2(MyRec L, DATASET(MyRec) R) := TRANSFORM\\n SELF.Value1 := L.Value1;\\n SELF.Value2 := MIN(R,R.Value2);\\n SELF.Value3 := MIN(R,R.Value3);\\nEND;\\n\\nROLLUP(Grecs,GROUP,RollThem2(LEFT,ROWS(LEFT)));
Note the second parameter to the second TRANSFORM is different, and that the MIN functions are also using a different form in the second TRANSFORM. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-06-05 19:25:22\" },\n\t{ \"post_id\": 17313, \"topic_id\": 4313, \"forum_id\": 10, \"post_subject\": \"Getting Error: Output( ) appears to be conext dependent\", \"username\": \"KatyChow\", \"post_text\": \"Hi There!\\n\\nI keep getting this error when I am trying to look at a dataset I'm creating.. \\n\\nError: OUTPUT() appears to be context dependent - this may cause a dataset not active error (445, 1), 4533, \\n\\nCan someone help me not get this error? I can run it without the last output but I cannot run when I try to output my dataset after my rollup. \\ng_JoiningAllCalendarDates := group(JoiningAllCalendarDates, telematicsid,int_date);\\n\\n// this section is to roll up on the driver level by telematicsid and reportdate descending\\n\\nStandardReportScoresRec := RECORD\\n\\tRECORDOF(g_JoiningAllCalendarDates);\\nEND;\\n\\n// roll up transform\\nStandardReportScoresRec ReportDateRollUp(StandardReportScoresRec Le, StandardReportScoresRec Ri) := TRANSFORM\\n\\tSELF.totaldistance:= Le.totaldistance+Ri.totaldistance;\\n\\tSELF.totalhardbrakes:= LE.totalhardbrakes+RI.totalhardbrakes;\\n\\tSELF.totalhardbrakes_secbysecacc:= LE.totalhardbrakes_secbysecacc+RI.totalhardbrakes_secbysecacc;\\n\\tSELF.weekdaynightdrvmiles:= LE.weekdaynightdrvmiles+RI.weekdaynightdrvmiles;\\n\\tSELF.weekendnightdrvmiles:= LE.weekendnightdrvmiles+RI.weekendnightdrvmiles;\\n\\tSELF.weekdayeveningdrvmiles:= LE.weekdayeveningdrvmiles+RI.weekdayeveningdrvmiles;\\n\\tSELF.weekendeveningdrvmiles:= LE.weekendeveningdrvmiles+RI.weekendeveningdrvmiles;\\n\\tSELF.saturdaymorningdrvmiles:= LE.saturdaymorningdrvmiles+RI.saturdaymorningdrvmiles;\\n\\tSELF.sundaymorningdrvmiles:= LE.sundaymorningdrvmiles+RI.sundaymorningdrvmiles;\\n\\tSELF.hwy15spdingmiles:= LE.hwy15spdingmiles+RI.hwy15spdingmiles;\\n\\tSELF.hwyspdoppmiles:= LE.hwyspdoppmiles+RI.hwyspdoppmiles;\\n\\tSELF.lcl10spdingmiles:= LE.lcl10spdingmiles+RI.lcl10spdingmiles;\\n\\tSELF.lclspdoppmiles:= LE.lclspdoppmiles+RI.lclspdoppmiles;\\n\\tSELF.totalturndegrees:= LE.totalturndegrees+RI.totalturndegrees;\\n\\tSELF.tripcnts:= LE.tripcnts+RI.tripcnts;\\n\\tSELF := LE;\\nEND;\\n\\nStandardReportScores00 := ROLLUP(g_JoiningAllCalendarDates,GROUP, ReportDateRollUp(LEFT,RIGHT));\\nOUTPUT(StandardReportScores00);
\\n\\nThanks!\\n\\nKaty\", \"post_time\": \"2017-06-05 18:36:16\" },\n\t{ \"post_id\": 17373, \"topic_id\": 4333, \"forum_id\": 10, \"post_subject\": \"Re: Creating new Dataset Records For Gaps\", \"username\": \"rtaylor\", \"post_text\": \"georgeb2d,\\n\\nHere's how I would do that:IMPORT Std;\\nRec := RECORD\\n Std.Date.Days_t MyDate; //a "days since" date field\\n UNSIGNED6 jid;\\n STRING5 zip;\\nEND;\\n\\nds1 := DATASET([{Std.Date.FromJulianYMD(2017,1,1),1,30301},\\n {Std.Date.FromJulianYMD(2017,1,4),2,30304},\\n {Std.Date.FromJulianYMD(2017,1,6),3,30306}],Rec);\\nMinDate := MIN(ds1,MyDate);\\nDateRange := MAX(ds1,MyDate) - MinDate + 1;\\n\\n//first create a new dataset with all the dates in the range:\\nds2 := DATASET(DateRange,\\n TRANSFORM(Rec,SELF.MyDate := MinDate + COUNTER - 1, SELF := []));\\n\\n//then LEFT OUTER JOIN that to ds1 to pick up the data from ds1:\\nds3 := JOIN(ds2,ds1,LEFT.MyDate=RIGHT.MyDate,\\n TRANSFORM(rec,SELF.MyDate := LEFT.MyDate, SELF := RIGHT),\\n\\t\\t\\t\\t\\t\\tLEFT OUTER);\\n\\n//then ITERATE through that to do the "fill in" phase:\\nITERATE(SORT(ds3,MyDate),\\n TRANSFORM(Rec,\\n SELF.jid := IF(RIGHT.jid=0,LEFT.jid,RIGHT.jid),\\n SELF.zip := IF(RIGHT.zip='',LEFT.zip,RIGHT.zip),\\n SELF := RIGHT));
\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-06-09 18:33:14\" },\n\t{ \"post_id\": 17363, \"topic_id\": 4333, \"forum_id\": 10, \"post_subject\": \"Creating new Dataset Records For Gaps\", \"username\": \"georgeb2d\", \"post_text\": \"Hello,\\n\\nSuppose I have 3 records, dated 1/1, 1/4 and 1/6. Records are missing for 1/2, 1/3 and 1/5\\nSo the dataset begins as:\\n1/1 jid=1 zip=30301\\n1/4 jid=2 zip= 30304\\n1/6 jid=3 zip=30306\\n\\nI want to duplicate the record twice for 1/1 and put a date upon it as 1/2 and 1/3. I also want to duplicate the record for 1/4 and put a date of 1/5 on it. I want to be able to do this dynamically. I want to somehow compare the two adjacent records and create the appropriate number of records so the final dataset will look like:\\n1/1 jid=1 zip=30301\\n1/2 jid=1 zip=30301\\n1/3 jid=1 zip=30301\\n1/4 jid=2 zip=30304\\n1/5 jid=2 zip=30304\\n1/6 jid=3 zip=30306\\n\\nI could go through the first time and subtract the dates from 1/1 and 1/4 and get 3 and subtract 1 from it. Same with 1/4 and 1/6. Then I would have:\\n1/1 2\\n1/4 1\\n\\nThen use this number to do a normalize for that number the next time through with an appropriate transform but that seems very clunky. I am sure you have a better suggestion. \\n\\nThanks.\", \"post_time\": \"2017-06-09 16:16:15\" },\n\t{ \"post_id\": 17673, \"topic_id\": 4393, \"forum_id\": 10, \"post_subject\": \"Re: Compressing a record and Getting its Size\", \"username\": \"georgeb2d\", \"post_text\": \"Two replies:\\nI had tried LENGTH of field and it was not helpful. We split the original record into STRING10000 size segments and then compress these records. one of those segments does not compress properly. Of course we could use STRING9000 (or 8000 or smaller) size segments to finally get it to work but that would just hide the issue and the bad record, which is not what is desired. \\n\\nI submitted the jira ticket. AFAIK appears far more sinister than it is..LOL.\", \"post_time\": \"2017-06-28 19:48:08\" },\n\t{ \"post_id\": 17623, \"topic_id\": 4393, \"forum_id\": 10, \"post_subject\": \"Re: Compressing a record and Getting its Size\", \"username\": \"rtaylor\", \"post_text\": \"Don,\\n\\nAFAIK, there is no way to determine the post-compression size of a field. I suggest you submit a JIRA ticket asking for that feature.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-06-26 12:55:30\" },\n\t{ \"post_id\": 17613, \"topic_id\": 4393, \"forum_id\": 10, \"post_subject\": \"Re: Compressing a record and Getting its Size\", \"username\": \"bforeman\", \"post_text\": \"Hi Don,\\n\\nHave you tried using LENGTH on that field?\\n\\nBob\", \"post_time\": \"2017-06-26 12:50:09\" },\n\t{ \"post_id\": 17563, \"topic_id\": 4393, \"forum_id\": 10, \"post_subject\": \"Compressing a record and Getting its Size\", \"username\": \"georgeb2d\", \"post_text\": \"I hoping for some debugging tips. \\n\\nCurrently we have an occasional problem that causes the ECL BuildIndex not to work. In QC last night the following error occurred:\\n\\nGraph graph83[2691], indexwrite[2695]: SLAVE #6 []: Key row too large to fit within a key node (uncompressed size=12140, variable=true, pos=0), - caused by (0, Key row too large to fit within a key node (uncompressed size=12140, variable=true, pos=0)) (in item 286)\\n\\nThe dataset has a certain field and occasionally the extract on that field from the database does not work properly. So when that row in the dataset is built into an index to be compressed the key row is too large. \\n\\nThe only way I know to debug this is to get the dataset and attempt to build an index on it. Then I keep splitting the dataset and trying to build indexes on these splits until I find the culprit. This morning it was line 610 out of 808 records, and I am grateful that 808 was not a large number. \\n\\nThere must be an easier way. As I understand it, if I look at the size of each row, that is not the problem. It is the size of the row once it has been compressed. So if somehow I could see the size of each row once it is compressed then I could quickly find the row that is causing the problem. The SIZEOF function returns the total number of bytes defined for storage of the specified data structure or field, so I do not think that would even help. \\n\\nI looked at all the documentation I have and could find nothing to compress the rows, and further any way to get the size of it if I somehow compressed it. \\n\\nI am open to suggestions on how to discover the aberrant record(s) more easily.\", \"post_time\": \"2017-06-20 17:02:40\" },\n\t{ \"post_id\": 18243, \"topic_id\": 4543, \"forum_id\": 10, \"post_subject\": \"Re: Keys usting UTF8 fields\", \"username\": \"rtaylor\", \"post_text\": \"And more about this issue, per the developers:\\n**********************************************************************************\\nThere are problems with having Unicode in indexes – especially our current implementation of indexes. There are generally two solutions:\\n\\n1) If you only need exact matches:\\n\\nIn this case the simplest method is to store a HASH64() of the string in the keyed portion, and the original string in the payload. Your search then becomes\\nBUILD(myDataset, {hashText = HASH64(text)}, {payloadText = text },'indexfile');\\n\\nmyIndex(KEYED(hashText = HASH64(searchText)) AND (payloadText = searchText))\\n
\\n2) If you need ordering (e.g., for range searches) then you need to use KEYUNICODE.\\n\\nUnfortunately there is no upper limit to the length that KEYUNICODE returns (even for a single character string!). You have a similar problem that you cannot have variable length strings in the keyed portion. So you need to store the first N bytes in the keyed portion, and the rest in the payload:\\n\\nBUILD(myDataset, { data20 keyedText := KEYUNICODE(text)[1..20]}, {payloadText := KEYUNICODE(text)[21..] });\\n\\nmyIndex(KEYED(keyedText = KEYUNICODE(searchText)[1..20]) AND (payloadText = KEYUNICODE(searchText)[21..]))
\\nAs far as Unicode vs utf8 go they are different representations of the same information and can contain the same characters. Unicode is implemented as UTR-16LE\\n**********************************************************************************\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-07-10 13:11:26\" },\n\t{ \"post_id\": 18233, \"topic_id\": 4543, \"forum_id\": 10, \"post_subject\": \"Re: Keys usting UTF8 fields\", \"username\": \"rtaylor\", \"post_text\": \"OK, now I see the problem. That's because UTF8 is an inherently variable-length data type where each character can be expressed as anything from 1 to 6 bytes, depending on the character itself.\\n\\nBut either way, such a long key field is not going to be efficient, so I suggest you think about shortening it for the index.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-07-07 21:02:37\" },\n\t{ \"post_id\": 18223, \"topic_id\": 4543, \"forum_id\": 10, \"post_subject\": \"Re: Keys usting UTF8 fields\", \"username\": \"georgeb2d\", \"post_text\": \"UTF8_255 Fieldname; is still giving the error message:\\n\\nError: Variable size fields (Fieldname) are not supported inside indexes \\n\\nUNICODE255 Fieldname; does work.\", \"post_time\": \"2017-07-07 20:56:33\" },\n\t{ \"post_id\": 18213, \"topic_id\": 4543, \"forum_id\": 10, \"post_subject\": \"Re: Keys usting UTF8 fields\", \"username\": \"rtaylor\", \"post_text\": \"Yes, or maybe like this: UTF8_266 fieldname;
But a 266-character Unicode string is not going to be a terribly efficient key. You might want to limit it to the first 20 or 30 characters, something like this:\\nIDX := INDEX(ds,{UTF8_20 First20 := ds.LongUTF8_field},{ds.UID},'keyfilename');
Caveat: this code is "off the top of my head" so it may need tweaking. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-07-07 20:41:20\" },\n\t{ \"post_id\": 18203, \"topic_id\": 4543, \"forum_id\": 10, \"post_subject\": \"Re: Keys usting UTF8 fields\", \"username\": \"georgeb2d\", \"post_text\": \"Something like this?\\n UTF8 FieldName { MAXLENGTH (266) };\\n\\nThat did not work..but this did:\\n UNICODE266 FieldName ;\", \"post_time\": \"2017-07-07 20:11:22\" },\n\t{ \"post_id\": 18193, \"topic_id\": 4543, \"forum_id\": 10, \"post_subject\": \"Re: Keys usting UTF8 fields\", \"username\": \"rtaylor\", \"post_text\": \"Don,\\n\\nDiscover the max length of the UTF data you want to use as the key field and define it as that (or larger).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-07-07 20:02:19\" },\n\t{ \"post_id\": 18183, \"topic_id\": 4543, \"forum_id\": 10, \"post_subject\": \"Keys usting UTF8 fields\", \"username\": \"georgeb2d\", \"post_text\": \"AS our ECL code is used in other countries, we are experiencing the problem of needing UTF8 fields in the keys. How can that be done? When I try to compile I get the error message that I cannot use variable length fields. \\n\\nAny suggestions?\", \"post_time\": \"2017-07-07 19:52:55\" },\n\t{ \"post_id\": 18363, \"topic_id\": 4583, \"forum_id\": 10, \"post_subject\": \"Re: UNICODE beginning a Field Name\", \"username\": \"JimD\", \"post_text\": \"It looks like the compiler thinks you are using the form UNICODE_locale.\\n\\nfor example:\\n
UNICODE_de5 MyUnicodeString := U'abcd\\\\353';\\n // becomes 'abcdë' with a German locale\\n
\\n\\nNaming a field UTF8_anything produces a similar syntax error. \\n\\nFor now, I suggest either removing the underscore or adding something before it like MyUNICODE_\\n\\nI am looking into this further and have opened this Jira issue to discuss either changing the behavior or documenting it. https://track.hpccsystems.com/browse/HPCC-18071 I suspect documenting it will be the choice. \\n\\nHTH, \\nJim\", \"post_time\": \"2017-07-24 17:53:43\" },\n\t{ \"post_id\": 18343, \"topic_id\": 4583, \"forum_id\": 10, \"post_subject\": \"Re: UNICODE beginning a Field Name\", \"username\": \"bforeman\", \"post_text\": \"Hi Don,\\n\\nPerhaps it is not intentional. Please enter this as an issue and the compiler team can investigate.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2017-07-24 16:47:32\" },\n\t{ \"post_id\": 18323, \"topic_id\": 4583, \"forum_id\": 10, \"post_subject\": \"UNICODE beginning a Field Name\", \"username\": \"georgeb2d\", \"post_text\": \"When I do this:\\n\\nUNSIGNED2 UNICODE_FIXED_LENGTH := 10;\\nOUTPUT (UNICODE_FIXED_LENGTH);\\n
\\nIt does not compile.\\nError: syntax error near "UNICODE_FIXED_LENGTH" : expected datarow, identifier, pattern-name, action, pattern (1, 11), 3002, \\nError: syntax error near "UNICODE_FIXED_LENGTH" : expected < (2, 9), 3002, \\n\\nWhen I do this:\\n\\nUNSIGNED2 FIXED_UNICODE_LENGTH := 10;\\nOUTPUT (FIXED_UNICODE_LENGTH);\\n
\\nIt does compile.\\n\\nWhen I do this:\\n\\nUNSIGNED2 String_FIXED_LENGTH := 10;\\nOUTPUT (String_FIXED_LENGTH);\\n
\\nIt does compile.\\n\\nSo I guess there is some rule you can't start a field name with UNICODE?\", \"post_time\": \"2017-07-24 15:10:32\" },\n\t{ \"post_id\": 18503, \"topic_id\": 4623, \"forum_id\": 10, \"post_subject\": \"Re: Bit Array\", \"username\": \"bforeman\", \"post_text\": \"Hi Allan,\\n\\nHave you seen Richard Taylor's blog on Bit Fiddling? There may be something there that you can use.\\n\\nhttps://hpccsystems.com/blog/tips-and-tricks-ecl-part-1-bit-fiddling\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2017-08-03 15:36:03\" },\n\t{ \"post_id\": 18493, \"topic_id\": 4623, \"forum_id\": 10, \"post_subject\": \"Bit Array\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nAnyone out there have a function suite to set/clear/test bits in something like a DATA item?\\n\\nI'm currently implementing a bitmap as a dataset but hitting performance issues.\\n\\nYours\\nAllan\", \"post_time\": \"2017-08-03 09:22:49\" },\n\t{ \"post_id\": 21601, \"topic_id\": 4633, \"forum_id\": 10, \"post_subject\": \"Re: Processing a Zipped File\", \"username\": \"jwilt\", \"post_text\": \"John, if you're watching...\\nDid you ever get this working?\", \"post_time\": \"2018-04-10 23:29:47\" },\n\t{ \"post_id\": 18523, \"topic_id\": 4633, \"forum_id\": 10, \"post_subject\": \"Re: Processing a Zipped File\", \"username\": \"bforeman\", \"post_text\": \"Hi John,\\n\\nTry spraying the file as a CSV (Delimited) and then remove the THOR reference from your PIPE command.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2017-08-04 19:23:22\" },\n\t{ \"post_id\": 18513, \"topic_id\": 4633, \"forum_id\": 10, \"post_subject\": \"Processing a Zipped File\", \"username\": \"John Meier\", \"post_text\": \"I have a file that is sent to me zipped (datafile.zip) the contents are records with a total length of 1742 bytes.\\n\\nI have sprayed the file as a BLOB (since the data is compressed). I then attempt to read the file and process the records. I am using the PIPE option of the DATASET and pass it 'gunzip' (which exists on the server and when tested on the server, it decompresses the file).\\n\\n\\nlayout_zipfile := RECORD\\n string1742 fullrec;\\nEND;\\n\\nds_zipfile := dataset( '~spray::target::20170803::p701650.compressed_data'\\n , layout_zipfile\\n , PIPE('gunzip', THOR)\\n );\\n
\\nds_zipfile;\\n\\nBut I keep getting the message: System error: -1: CFileSerialStream::get read past end of stream\\n\\nI'm not sure if I'm spraying the data correctly (should I spray as a CSV even though uncompressed, the data is fixed?). Any insight would be greatly appreciated.\\nP.S. I tried using an EMBED for python, but it wouldn't accept the embedded code (a simple "HELLO WORLD") and we have no code in production I could reference.\\n\\nThank You\", \"post_time\": \"2017-08-04 14:32:42\" },\n\t{ \"post_id\": 18833, \"topic_id\": 4713, \"forum_id\": 10, \"post_subject\": \"Re: Resource limit spill: Heavyweight (2>1)\", \"username\": \"rtaylor\", \"post_text\": \"I am wondering if there is a way to limit these disk spills to cases where these are really needed.
I would expect the developers to answer this with, "They are limited to only those cases where they are really needed." IOW, this is a situation to report through JIRA so the developers can have a direct look at what you're doing.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-09-14 21:15:39\" },\n\t{ \"post_id\": 18823, \"topic_id\": 4713, \"forum_id\": 10, \"post_subject\": \"Re: Resource limit spill: Heavyweight (2>1)\", \"username\": \"georgeb2d\", \"post_text\": \"I am looking at a graph as it runs. Before a large part of it has run it already has the disk spills in parts that have not run as yet, and Resource limit spill: Heavyweight (2>1). This is before the program even knows how many records result from a join, etc. \\n\\nThat makes me think there is something else going on. It looks like the compiler is assuming there will be too much data, whether there is or is not. \\n\\nI am working with a system where the CPU is the bottleneck when it does a disk spill, etc. Currently it is cost prohibitive to upgrade the CPU so I am wondering if there is a way to limit these disk spills to cases where these are really needed.\", \"post_time\": \"2017-09-14 18:45:54\" },\n\t{ \"post_id\": 18803, \"topic_id\": 4713, \"forum_id\": 10, \"post_subject\": \"Re: Resource limit spill: Heavyweight (2>1)\", \"username\": \"georgeb2d\", \"post_text\": \"Thanks. That is what I suspected but could not find any documentation to support my thesis.\", \"post_time\": \"2017-09-12 20:51:46\" },\n\t{ \"post_id\": 18753, \"topic_id\": 4713, \"forum_id\": 10, \"post_subject\": \"Re: Resource limit spill: Heavyweight (2>1)\", \"username\": \"rtaylor\", \"post_text\": \"georgeb2d,\\n\\nThe term "spill" indicates that there's too much data at that point in the process to maintain it all in memory, so a "spill to disk" is happening. Disk I/O being the slowest part of computing, this is guaranteed to slow things down.\\n\\nThe general rule to follow to avoid as much of this as possible is to always make sure you're only working with just the data the process actually needs (using vertical slice TABLEs can help with that) at every step in your process. Another possible option would be to increase the number of nodes in your Thor cluster. And a third possibility, if this is caused by heavily skewed data, would be to change your process logic so the skew doesn't affect the processing.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-09-11 16:08:40\" },\n\t{ \"post_id\": 18713, \"topic_id\": 4713, \"forum_id\": 10, \"post_subject\": \"Resource limit spill: Heavyweight (2>1)\", \"username\": \"georgeb2d\", \"post_text\": \"Hello,\\n\\nLooking through a workunit that is taking a long time to run. It is dealing with large files and I keep seeing this message. \\n\\nWhat does it mean?\\n\\nThanks.\", \"post_time\": \"2017-09-08 14:37:08\" },\n\t{ \"post_id\": 19603, \"topic_id\": 4853, \"forum_id\": 10, \"post_subject\": \"Re: ECL Compilation Times\", \"username\": \"ghalliday\", \"post_text\": \"It doesn't compile it twice, but it does currently parse the query twice - once on the client machine to work out which source files are required, and once on the server to process the query.\\n\\nIt sounds like something in your template processing is taking a long time. I'm not sure why it would be excessive without looking at the query in more detail.\\n\\nIf it is possible to email me an archive I can investigate exactly what is going on. (Or create a jira and attach the example if you are happy for it to be public.)\\n\\ngavin.halliday at lexisnexisrisk.com\", \"post_time\": \"2017-10-19 15:24:23\" },\n\t{ \"post_id\": 19563, \"topic_id\": 4853, \"forum_id\": 10, \"post_subject\": \"Re: ECL Compilation Times\", \"username\": \"bforeman\", \"post_text\": \"I have forwarded your question to the developers. The compiler only compiles once locally, but in my understanding links on the server side. 30 minutes seems to me to be a little long, they may ask you to submit the workunit in question.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2017-10-18 13:57:17\" },\n\t{ \"post_id\": 19263, \"topic_id\": 4853, \"forum_id\": 10, \"post_subject\": \"ECL Compilation Times\", \"username\": \"micevepay\", \"post_text\": \"Can anyone explain the effects of template language, macros, and function macros on compile time? I am seeing situations where my compile time goes from a few seconds to almost 30 minutes depending of the possible number times a function macro is used. \\n\\nWhat I find interesting is the first half of the time the ECL IDE only shows the workunit as "submitted" and doesn't show in ECL Watch. The other showing "compiling" in ECL Watch when it finally shows up. Is ECL being compiled twice (client and server side)?\", \"post_time\": \"2017-10-02 15:04:18\" },\n\t{ \"post_id\": 19773, \"topic_id\": 4993, \"forum_id\": 10, \"post_subject\": \"Re: Calling Macro Twice\", \"username\": \"georgeb2d\", \"post_text\": \"Thanks. That did it.\", \"post_time\": \"2017-10-27 18:01:12\" },\n\t{ \"post_id\": 19763, \"topic_id\": 4993, \"forum_id\": 10, \"post_subject\": \"Re: Calling Macro Twice\", \"username\": \"rtaylor\", \"post_text\": \"If I just do this for Place_One I have no problems. \\nIt is when I try to do this for Place_One and Place_Two I get the error.
Generating the same code twice is exactly the problem. Each time you call the MACRO it is generating a definition for GridBaseFile, GridKey, and GridKeySF. IOW, this MACRO is designed to run exactly once. \\n\\nTo correct that you need to use #UNIQUENAME on these three definitions so that they aren't named the same for every instance of the MACRO (the same way you're doing for GridConstants).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-10-27 17:52:42\" },\n\t{ \"post_id\": 19753, \"topic_id\": 4993, \"forum_id\": 10, \"post_subject\": \"Calling Macro Twice\", \"username\": \"georgeb2d\", \"post_text\": \"I am trying to call the same macro twice with two different inputs but keep getting the same error. I have tried many variations but have not gotten anywhere. \\nHere is the calling code:\\nIMPORT DirOne;\\nDirOne.mac_Build_GridNames('Place_One', 'Place_One', 'Place_One', 'OLD', QC2Place_OneResult);\\nOUTPUT(QC2Place_OneResult);\\nDirOne.mac_Build_GridNames('Place_Two', 'Place_Two', 'Place_Two', 'OLD', QC2Place_TwoResult);\\nOUTPUT(QC2Place_TwoResult);\\n
\\nHere is the macro:\\n\\nIMPORT * FROM DirOne;\\nIMPORT STD;\\n\\nEXPORT mac_Build_GridNames(SharedConstants_Prefix, Constants_Prefix, Files_Suffix, Old_or_New, Result) := MACRO\\n #UNIQUENAME(GridConstantsTemplate)\\n\\tLOCAL %GridConstantsTemplate% := #TEXT(\\n\\t #IF (Old_or_New = 'OLD')\\n \\t\\tGridBaseFile := DirOne_UTILITIES.FILES.BASE_FILE_FILES_SUFFIX;\\n\\t\\t GridKey := DirOne_UTILITIES.FILES.FILE_KEY_GRID_ROAD_FILES_SUFFIX; \\n\\t\\t GridKeySF := DirOne_UTILITIES.FILES.FILE_KEY_GRID_ROAD_FILES_SUFFIX_SF;\\n\\t\\t#ELSE\\n\\t\\t GridBaseFile := DirOne.FILES.BASE_FILE_FILES_SUFFIX;\\n\\t\\t GridKey := DirOne.FILES.FILE_KEY_GRID_ROAD_FILES_SUFFIX; \\n\\t\\t GridKeySF := DirOne.FILES.FILE_KEY_GRID_ROAD_FILES_SUFFIX_SF;\\n\\t\\t#END\\n\\t);\\n #UNIQUENAME(GridConstants)\\n\\t%GridConstants% := STD.Str.FindReplace(\\n\\t\\tSTD.Str.FindReplace(\\n\\t\\t\\tSTD.Str.FindReplace(\\n\\t\\t\\t\\t%GridConstantsTemplate%,\\n\\t\\t\\t\\t'SHAREDCONSTANTS_PREFIX',\\n\\t\\t\\t\\tSharedConstants_Prefix\\n\\t\\t\\t),\\n\\t\\t\\t'FILES_SUFFIX',\\n\\t\\t\\tFiles_Suffix\\n\\t\\t),\\n\\t\\t'CONSTANTS_PREFIX', \\n\\t\\tConstants_Prefix\\n\\t);\\n\\n\\t#EXPAND(%GridConstants%);\\n\\n #UNIQUENAME(Dogbreath)\\n\\t %Dogbreath% := DATASET(\\n\\t\\t[\\n\\t\\t\\t{\\n\\t\\t\\t\\tGridKey, \\n\\t\\t\\t\\tGridKeySF, \\n\\t\\t\\t\\tGridBaseFile, \\n\\n\\t\\t\\t}\\n\\t\\t], \\n\\t\\tDirOne_Utilities.Layouts.SETUP_FILE_NAMES\\n\\t);\\n\\tRESULT := %Dogbreath%;\\nENDMACRO;\\n
\\n\\nHere is the error:\\nError: syntax error near ":=" (35, 20 - \\nDirOne.mac_Build_GridNames)\\nError: While expanding macro <param> (34, 26 - DirOne.mac_Build_GridNames)\\nError: While expanding macro mac_build_gridnames (6, 95)\\nError: syntax error near ":=" (36, 14 - DirOne.mac_Build_GridNames)\\nError: While expanding macro <param> (34, 26 - DirOne.mac_Build_GridNames)\\nError: While expanding macro mac_build_gridnames (6, 95)\\nError: Too many errors (max = 5); Aborting... (36, 16 - DirOne.mac_Build_GridNames)\\n\\nIf I just do this for Place_One I have no problems. \\nIt is when I try to do this for Place_One and Place_Two I get the error. \\nI think it has to do with Result being the same in both but I do not know how to get around this. I have tried a lot of variations but have not succeeded, even trying LOCAL. \\n\\nAny help is appreciated.\", \"post_time\": \"2017-10-27 17:28:39\" },\n\t{ \"post_id\": 19933, \"topic_id\": 5053, \"forum_id\": 10, \"post_subject\": \"Re: Open and load xml file\", \"username\": \"rtaylor\", \"post_text\": \"Francisco,I was wondering if there was a way of opening and loading the file once during compilation time...
AFAIK, only through the "drilldown" technique I mentioned previously.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-11-13 13:41:43\" },\n\t{ \"post_id\": 19923, \"topic_id\": 5053, \"forum_id\": 10, \"post_subject\": \"Re: Open and load xml file\", \"username\": \"francisco_escher\", \"post_text\": \"Thank you, Richard!\\n\\nI know that the template language is such a powerful tool, and I had gone through the possibilities that you mentioned, but is not really what I was looking for.\\n\\nMy desire was to have xml files as 'config' files so I can load them as strings to automate ECL according to the xml I was loading (using template language as well).\\n\\nI know I can build the string in ECL but the solution is not so elegant.\\n\\nI have also tried to open the file with embeded c++ but this won't work as well.\\n\\nI was wondering if there was a way of opening and loading the file once during compilation time...\\n\\nThanks again,\\nFrancisco Escher.\", \"post_time\": \"2017-11-13 11:27:46\" },\n\t{ \"post_id\": 19913, \"topic_id\": 5053, \"forum_id\": 10, \"post_subject\": \"Re: Open and load xml file\", \"username\": \"rtaylor\", \"post_text\": \"Francisco,\\n\\nIf you look at the LOADXML docs, you'll see that it is not designed to read XML from a file. LOADXML was originally designed to make text from a SOAP-style XML stream available for use in ECL MACROs to generate ECL code. It was used to accomplish tasks such as taking user-chosen parameters from a website and generating ECL to fetch just those records meeting that set of parameters from data that contains too many possible parameter combinations to efficiently pre-write code for all the possibilities.\\n\\nOne way to get around this would be to use the ECL IDE's "drilldown" capability to get the XML from the file and output that as a workunit result, then use the "drilldown" technique to pass that result as a constant string to another workunit that uses LOADXML to load it and Template Language to parse it to generate ECL code to run your job. That technique will be discussed by Bob Foreman in the upcoming Tech Talk this Thursday, November 16, 2017 (https://hpccsystems.com/community/events/download-tech-talks-hpcc-systems-community-episode-9). \\n\\nOf course, if you just want to parse the XML to extract data, then you don't need LOADXML at all. You can either define the file as an XML DATASET and create the proper RECORD structure using XPATH on each field definition (if it contains "well-formed" XML), or you can define it as a CSV or FLAT DATASET with a single variable-length STRING field, then use the PARSE function to extract your data from the XML text.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-11-13 10:16:04\" },\n\t{ \"post_id\": 19903, \"topic_id\": 5053, \"forum_id\": 10, \"post_subject\": \"Open and load xml file\", \"username\": \"francisco_escher\", \"post_text\": \"Hello,\\n\\nI am looking for a way of opening a xml file and using it inside a LOADXML statement. Has anyone ever tries this?\", \"post_time\": \"2017-11-12 23:42:04\" },\n\t{ \"post_id\": 20053, \"topic_id\": 5083, \"forum_id\": 10, \"post_subject\": \"Re: Yet another grouping problem\", \"username\": \"Allan\", \"post_text\": \"Thanks Richard,\\n\\nGot a solution of my own, will get round to comparing performance sometime.\\n\\nThanks very much\\n\\nAllan\", \"post_time\": \"2017-11-22 08:15:02\" },\n\t{ \"post_id\": 20033, \"topic_id\": 5083, \"forum_id\": 10, \"post_subject\": \"Re: Yet another grouping problem\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nOK, here's the same process but with LOCAL added://NOTICE that I started by adding a UID field to each record\\nr := {UNSIGNED1 UID, UNSIGNED1 addr, UNSIGNED4 date, \\n STRING10 name, STRING1 Some_data_on_person};\\nds := DATASET([{1,1,19951015,'Allan','X'},\\n {2,1,19961111,'Nina','A'},\\n {3,1,19970213,'Nina','B'},\\n {4,1,19980314,'Allan','Y'},\\n {5,2,19930101,'Allan','P'}],r);\\n\\n//DISTRIBUTE sets up for the LOCAL Operations\\ndds := DISTRIBUTE(ds,HASH32(addr));\\n\\n//extract just the unique Addr/date combinations\\nAddrDate := SORT(TABLE(dds,{addr,date},addr,date,LOCAL),addr,-date,LOCAL);\\n\\n//organize the child records\\nChildren := SORT(TABLE(dds,{addr,date,name,UID},LOCAL),addr,name,-UID,LOCAL);\\n//the descending UID sort allows DEDUP to just get the latest record for each name\\n\\n//vertical slice for more efficient operation\\nChildRecs := TABLE(dds,{UID,name,Some_data_on_person},LOCAL);\\n\\n//PROJECT just the parent data into the Nested Child dataset \\nChildRec := {STRING10 name, STRING1 Some_data_on_person};\\nOutRec := RECORD\\n ds.addr; \\n ds.date;\\n DATASET(ChildRec) NameData;\\nEND;\\t \\nParentData := PROJECT(AddrDate,TRANSFORM(OutRec, \\n Kids := DEDUP(Children(addr = LEFT.addr,\\n date <= LEFT.Date),\\n name); \\n SetKids := SET(Kids,UID);\\n SELF.NameData := PROJECT(ChildRecs(UID IN SetKids),\\n ChildRec);\\n SELF := LEFT),LOCAL);\\nSORT(ParentData,addr,date);\\n
\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-11-20 19:16:41\" },\n\t{ \"post_id\": 20023, \"topic_id\": 5083, \"forum_id\": 10, \"post_subject\": \"Re: Yet another grouping problem\", \"username\": \"Allan\", \"post_text\": \"Hum Richard,\\n\\nLooking at your ECL, for every major key (addrid, date), and we've a lot of them, we're PROJECting the entire name information, admittedly filtered but still filtering the same child dataset a LOT of times. is there any way this entire process could all be made LOCAL? that would help.\\n\\nYours\\nAllan\", \"post_time\": \"2017-11-17 20:48:26\" },\n\t{ \"post_id\": 20013, \"topic_id\": 5083, \"forum_id\": 10, \"post_subject\": \"Re: Yet another grouping problem\", \"username\": \"Allan\", \"post_text\": \"Thanks Richard for this very prompt reply.\\n\\nI'll try it out, since posting the question I've worked out a solution using two iterates but Your's looks neater, though I'm not sure how long the SET's will take to run.\\n\\nI'll give your solution a try, it will take me a bit of time to alter it for my actual code plus its late Friday, so don't hold your breadth.\\n\\nOnce again, thanks very much.\\n\\nAllan\", \"post_time\": \"2017-11-17 20:28:38\" },\n\t{ \"post_id\": 20003, \"topic_id\": 5083, \"forum_id\": 10, \"post_subject\": \"Re: Yet another grouping problem\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nOK, here's a solution that creates your nested child DATASET using two PROJECTs://NOTICE that I started by adding a UID field to each record\\nr := {UNSIGNED1 UID, UNSIGNED1 addr, UNSIGNED4 date, \\n STRING10 name, STRING1 Some_data_on_person};\\nds := DATASET([{1,1,19951015,'Allan','X'},\\n {2,1,19961111,'Nina','A'},\\n {3,1,19970213,'Nina','B'},\\n {4,1,19980314,'Allan','Y'},\\n {5,2,19930101,'Allan','P'}],r);\\n\\n//extract just the unique Addr/date combinations\\nAddrDate := TABLE(ds,{addr,date},addr,date);\\n\\n//organize the child records\\nChildren := SORT(TABLE(ds,{addr,date,name,UID}),addr,name,-UID);\\n//the descending UID sort allows DEDUP to just get the latest record for each name\\n\\n//vertical slice for more efficient operation\\nChildRecs := TABLE(ds,{UID,name,Some_data_on_person});\\n\\n//a normal PROJECT to get parent data into the Nested Child dataset \\n// and a nested PROJECT to get the kids\\nChildRec := {STRING10 name, STRING1 Some_data_on_person};\\nOutRec := RECORD\\n ds.addr; \\n ds.date;\\n DATASET(ChildRec) NameData;\\nEND;\\t \\nParentData := PROJECT(AddrDate,TRANSFORM(OutRec, \\n Kids := DEDUP(Children(addr = LEFT.addr,\\n date <= LEFT.Date),\\n name); \\n SetKids := SET(Kids,UID);\\n SELF.NameData := PROJECT(ChildRecs(UID IN SetKids),\\n ChildRec);\\n SELF := LEFT));\\nParentData;
\\nI used the ChildRecs TABLE this way to easily allow for your "Some_data_on_person" to be extrapolated to multiple fields, not just a single one-character string. \\n\\nHopefully, this solution won't "slug" your Thor. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-11-17 15:23:05\" },\n\t{ \"post_id\": 19993, \"topic_id\": 5083, \"forum_id\": 10, \"post_subject\": \"Yet another grouping problem\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI have a flat dataset of dated address IDs tied to a person. e.g.\\n
\\naddr date name Some_data_on_person\\n1 19951015 Allan X\\n1 19961111 Nina A\\n1 19970213 Nina B\\n1 19980314 Allan Y\\n2 19930101 Allan P\\n
\\nFor every major key (that is addr and date) I need to pull forward any information on a person that does NOT exist at that date. If data for a person does exist at that date it is taken as an update and any historic information, for that person, must not be pulled forward.\\nSo the result I require from the input above is:\\n\\n\\n1 19951015 Allan X\\n1 19961111 Allan X\\n Nina A\\n1 19970213 Allan X\\n Nina B\\n1 19980314 Allan Y\\n Nina B\\n2 19930101 Allan P\\n
\\nNow I can produce this but I'm using child datasets in the computation, which completely slugs at some versions of HPCC (fortunately v6 of hpcc works fine, but this ECL must run on older versions)\\nIs there a way to generate this result without the use of child datasets?\\ne.g. generating some common sequence number that can then be used in a subsequent self JOIN?\\n\\nJust to clarify: The final result is a dataset with child people dataset, I just need the computation to be flat, i.e. sweep through the input cleanly so THOR does not slug.\\nYours\\nAllan\", \"post_time\": \"2017-11-17 11:21:02\" },\n\t{ \"post_id\": 20083, \"topic_id\": 5113, \"forum_id\": 10, \"post_subject\": \"Re: Distinct Count during a table?\", \"username\": \"JimD\", \"post_text\": \"Would DISTRIBUTION work for you? \\n\\nhttps://hpccsystems.com/training/docume ... UTION.html\\n\\nHTH,\\nJim\", \"post_time\": \"2017-11-27 20:12:17\" },\n\t{ \"post_id\": 20073, \"topic_id\": 5113, \"forum_id\": 10, \"post_subject\": \"Distinct Count during a table?\", \"username\": \"KatyChow\", \"post_text\": \"Hi there,\\n\\nIs there something similar to the SQL distinct count function? Right now I am creating two different tables one with the groupings to get the distinct counts and one that does my regular aggregations then joining on a match key. \\n\\nThanks!\\n\\nKaty\", \"post_time\": \"2017-11-27 11:20:56\" },\n\t{ \"post_id\": 20583, \"topic_id\": 5233, \"forum_id\": 10, \"post_subject\": \"Re: Why is it that I can output a table but not write into f\", \"username\": \"KatyChow\", \"post_text\": \"Hi Richard!\\n\\nAh okay! I get it now!\\n\\nThanks!!\\n\\nKaty\", \"post_time\": \"2018-01-22 21:32:58\" },\n\t{ \"post_id\": 20573, \"topic_id\": 5233, \"forum_id\": 10, \"post_subject\": \"Re: Why is it that I can output a table but not write into f\", \"username\": \"rtaylor\", \"post_text\": \"Katy,why this would run when I didn't output into a file?
Because in order to do a simple OUTPUT (IOW, "show me the first 100 records") the system didn't have to read enough records to encounter the data problem. But when you asked it to write all the result data to a file, it had to process all the data, and that's when it found the problem.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-01-22 21:28:54\" },\n\t{ \"post_id\": 20563, \"topic_id\": 5233, \"forum_id\": 10, \"post_subject\": \"Re: Why is it that I can output a table but not write into f\", \"username\": \"KatyChow\", \"post_text\": \"Hi Richard,\\n\\nI guess I'm still confused about why this would run when I didn't output into a file?\\n\\nThanks!\\n\\nKaty\", \"post_time\": \"2018-01-22 21:09:02\" },\n\t{ \"post_id\": 20553, \"topic_id\": 5233, \"forum_id\": 10, \"post_subject\": \"Re: Why is it that I can output a table but not write into f\", \"username\": \"rtaylor\", \"post_text\": \"Katy,"End of stream encountered whilst parsing" [file offset 68601]
This error basically says the code ran out of file before it found what it was looking for. IOW, this is most likely a data error. \\n\\nI'd suggest taking a very close look at your input file(s) right around the 68601 byte position, or prior to. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-01-22 21:04:27\" },\n\t{ \"post_id\": 20543, \"topic_id\": 5233, \"forum_id\": 10, \"post_subject\": \"Re: Why is it that I can output a table but not write into f\", \"username\": \"KatyChow\", \"post_text\": \"Oh! Oops! oKay! \\n\\nHere it is\\nError: System error: 1: Graph graph1[1], project[6]: SLAVE #91 [10.194.198.91:22000]: Error - end of stream "End of stream encountered whilst parsing" [file offset 68601]\\n>0.3272036910057068</Value></Attribute>G*ERROR*, - caused by (1, Error - end of stream "End of stream encountered whilst parsing" [file offset 68601]\\n>0.3272036910057068</Value></Attribute>G*ERROR*) (0, 0), 1, \\nWarning: Activity 6 created a complex helper class (21714) (2710, 22), 4538,\", \"post_time\": \"2018-01-22 20:51:11\" },\n\t{ \"post_id\": 20533, \"topic_id\": 5233, \"forum_id\": 10, \"post_subject\": \"Re: Why is it that I can output a table but not write into f\", \"username\": \"rtaylor\", \"post_text\": \"Katy,\\n\\nRemember, this is a public forum, so many non-LN people will not be able to see your workunit. Therefore, please post the specific error you are now getting.\\n\\nRichard\", \"post_time\": \"2018-01-22 20:48:25\" },\n\t{ \"post_id\": 20523, \"topic_id\": 5233, \"forum_id\": 10, \"post_subject\": \"Re: Why is it that I can output a table but not write into f\", \"username\": \"KatyChow\", \"post_text\": \"Hi Richard,\\n\\nNo such luck. I'm getting a different error now... .\\n\\nHere's the WUID \\nAttaching trucated code:\\nO(x, num = 100) := MACRO\\n OUTPUT(CHOOSEN(x, num), NAMED(#TEXT(x)));\\nENDMACRO;\\n\\nOC(x, num = 100) := MACRO\\n OUTPUT(CHOOSEN(x, num), NAMED(#TEXT(x)));\\n OUTPUT(COUNT(x), NAMED('Num_' + #TEXT(x)));\\nENDMACRO;\\n\\nOA(x) := MACRO\\n OUTPUT(x, NAMED(#TEXT(x)), ALL);\\nENDMACRO;\\n\\nC(x) := MACRO\\n OUTPUT(COUNT(x), NAMED('Num_' + #TEXT(x)));\\nENDMACRO;\\n\\n rec1 := RECORD\\n string transaction_id;\\n string11 product_id;\\n string19 date_added;\\n string4 process_type;\\n string8 processing_time;\\n string10 vendor_code;\\n string20 request_type;\\n string20 product_version;\\n string15 reference_number;\\n string content_data{blob, maxlength(2000000)};\\n string6 process_status;\\n END;\\n \\n logs := dataset('~base::fcra_mbsi::inquiry_history::qa::id',rec1,thor)(date_added[1..10] >= '2017-12-01' AND date_added[1..10] <= '2017-12-31');\\n OC(logs);\\n s_logs := sort(logs,transaction_id);\\n OC(s_logs);\\n \\n model_attr := logs(request_type = 'MODEL_ATTRIBUTES');\\n OC(model_attr);\\n \\n pub_recs_boca0 := logs(request_type = 'FCRA_PUB_REC_RESP');\\n OC(pub_recs_boca0);\\n \\n // need to string-a-fy the blob....\\n pub_recs_boca := project(pub_recs_boca0, TRANSFORM({recordof(pub_recs_boca0); string content_str;},\\n \\n\\n \\n True_Scores := model_attr(REGEXFIND('<status_message>No Score</status_message>',content_data)=FALSE);\\n OC(True_Scores);\\n \\n MV36 := True_Scores(REGEXFIND('<name>MV36</name>',content_data));\\n MX36 := True_Scores(REGEXFIND('<name>MX36</name>',content_data));\\n MNC2 := True_Scores(REGEXFIND('<name>MNC2</name>',content_data));\\n \\n Models_ds := MV36+MX36+MNC2;\\n OC(Models_ds);\\n \\n boca_attr_layout := record\\n string16 transaction_id;\\n\\tSTRING fcra;\\n\\tSTRING cb_allowed;\\n\\tSTRING account;\\n\\tSTRING seq;\\n ...\\nEND;\\n\\n\\n boca_attr_layout parse_boca_attr_blob (rec1 l) := transform\\n self.transaction_id := l.transaction_id;\\n content := l.content_data;\\t \\n\\t str_start := std.str.find(content,'<Attributes>'); \\n\\t str_end := std.str.find(content,'</Attributes>');\\n\\t str_start2_find := content[str_end+13..]; \\n\\t str_start2 := std.str.find(str_start2_find,'<Attributes>');\\n\\t str_end2 := std.str.find(str_start2_find,'</Attributes>');\\n attr_string1 := content[str_start .. str_end-1];\\n\\t attr_string2 := str_start2_find[str_start2+12 .. str_end2+12];\\n\\t attr_string := attr_string1 + attr_string2;\\n wrapped_attr_string := '<s>'+attr_string+'</s>'; \\n\\t\\n nv_pair := RECORD\\n STRING50 Name {xpath('Name')};\\n STRING Value {xpath('Value'), maxlength(256)};\\n END;\\n temp_layout := record\\n\\t dataset(nv_pair) attr{xpath('Attributes/Attribute')};\\n\\t END;\\n attr_table := fromxml(temp_layout,attr_string);\\n\\t \\n\\t SELF.fcra := attr_table.attr(name = 'fcra')[1].value;\\n\\t SELF.cb_allowed := attr_table.attr(name = 'cb_allowed')[1].value;\\n\\t SELF.account := attr_table.attr(name = 'account')[1].value;\\n\\t SELF.seq := attr_table.attr(name = 'seq')[1].value;\\n\\t ...\\n\\tEND;\\n\\t\\n\\tboca_attr_parsed := project(pub_recs_boca,parse_boca_attr_blob(left));\\n\\t// OUTPUT(boca_attr_parsed,named('boca_atts_parsed'));\\nOUTPUT(boca_attr_parsed,,'~MVR::KC::Boca_parsedXML', EXPIRE(5));
\", \"post_time\": \"2018-01-22 20:42:44\" },\n\t{ \"post_id\": 20513, \"topic_id\": 5233, \"forum_id\": 10, \"post_subject\": \"Re: Why is it that I can output a table but not write into f\", \"username\": \"rtaylor\", \"post_text\": \"Katy,\\n\\nTry it without the THOR option on your OUTPUT, like this:OUTPUT(filename,,'~filename');
\\nThe default output file type is already "Thor" (AKA: FLAT files) so the THOR option on OUTPUT specifies to "OUTPUT Workunit Files" which are documented here:https://hpccsystems.com/training/documentation/ecl-language-reference/html/OUTPUT.html%23OUTPUT_Workunit_Files\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-01-22 20:14:17\" },\n\t{ \"post_id\": 20503, \"topic_id\": 5233, \"forum_id\": 10, \"post_subject\": \"Why is it that I can output a table but not write into file?\", \"username\": \"KatyChow\", \"post_text\": \"Hi there!\\n\\nI've been trying to parse out some XML information and had to brut force a string merge so that I can get all the fields I want into 1 table. The issue I am having is when I write OUTPUT(filename); it is fine. When I write OUTPUT(filename,,'~filename',THOR); The code will brake and tell me I have a parsing error. This also happens when I am trying to join that dataset onto another dataset. Why is this happening?\\n\\nI am so stuck! Help! Please!!\\n\\nKaty\\n\\nAttaching workunits (code is too long) & I ran this in Alpha Prod environment!\\n\\nHere is a working WUID \\nHere is a non working WUID\", \"post_time\": \"2018-01-22 19:19:34\" },\n\t{ \"post_id\": 20603, \"topic_id\": 5253, \"forum_id\": 10, \"post_subject\": \"Matrix Multiply with embedded python and GPU\", \"username\": \"tlhumphrey2\", \"post_text\": \"Does anyone know a better data structure for matrices being passed to embedded python where a GPU will multiply them? By better I mean the conversion to a GPU format will take the least amount of time.\\n\\nMy ecl and embedded python code and more details are in the following post: https://hpccsystems.com/bb/viewtopic.php?f=23&t=5243\", \"post_time\": \"2018-01-29 13:05:09\" },\n\t{ \"post_id\": 20733, \"topic_id\": 5293, \"forum_id\": 10, \"post_subject\": \"Re: SoapCall Create WUID from Local Git Rep\", \"username\": \"newportm\", \"post_text\": \"Hey Richard,\\n\\nSo in the "old way" the soapCall would build the BWR code and execute the code called from the bwr from the WsAttributes server. Well, now there is local and Thor target level configuration that points to different repositories. In the Compiler Tab of preferences, I have added my local ECL Repositories. When I submit a job, because my local repository is defined it is used as my codebase. When the job is submitted from another job it seems not to know that I submitted it and none of my local configurations are used. Instead, it acts as if ECL Folder under compiler tab is empty and uses the Git Lib that was configured for that specific Thor Target. \\n\\nWhat I need, is an open to tell the compiler during the build of the new WUID that I need to overwrite the Target Repository and use my local code base. \\n\\nMore clear? \\n\\nTim N\", \"post_time\": \"2018-02-07 18:55:24\" },\n\t{ \"post_id\": 20723, \"topic_id\": 5293, \"forum_id\": 10, \"post_subject\": \"Re: SoapCall Create WUID from Local Git Rep\", \"username\": \"rtaylor\", \"post_text\": \"newportm,\\n\\nI'm wondering why any ECL code has to change at all simply because you've changed its storage location? Yes, the syntax rules change around IMPORT and full qualification, but that affects all ECL code, not just SOAPCALL,\\n\\nAFAIK, SOAPCALL doesn't care where the code is stored, so why does it need to change? What happens when you try running your SOAPCALL the old same way?\\n\\nUnless, of course, I'm misunderstanding the problem. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-02-07 18:44:20\" },\n\t{ \"post_id\": 20713, \"topic_id\": 5293, \"forum_id\": 10, \"post_subject\": \"SoapCall Create WUID from Local Git Rep\", \"username\": \"newportm\", \"post_text\": \"Hello,\\n\\nI have recently switched over to Git source management and ran into an issue submitting WUID from another workunit. \\n\\nCurrently using MYSQL Source management we submit the code like this: \\n
dWUSubmitResult:= soapcall('http://' + TargetESPAddress + ':' + TargetESPPort + '/WsWorkunits',\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t 'WUSubmit',\\nrWUSubmitRequest,\\nrWUSubmitResponse,//dataset(rWUSubmitResponse),\\nXPATH('WUSubmitResponse/Exceptions/Exception')\\n);
\\n\\nIn doing so, we pass in a set of XPath parameters to tell the SoapCall how and where to submit the WUID. In GIT, we have a default repository that each target cluster submits on. My question is what is the option for me to use my local repository when making this call. \\n \\nrWUSubmitRequest\\t:= record\\nstring\\tWUID{XPATH('Wuid'),maxlength(20)} :=\\tpWUID;\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\nstring Cluster{XPATH('Cluster'),maxlength(30)}\\t:=\\tpCluster;\\t\\t\\t\\t\\t\\t\\nstring Queue{XPATH('Queue'),maxlength(30)} :=\\tpQueue;\\nstring Snapshot{XPATH('Snapshot'),maxlength(10)}:=\\t'';\\nstring MaxRunTime{XPATH('MaxRunTime'),maxlength(10)} :=\\t'0';\\nstring\\tBlock{XPATH('BlockTillFinishTimer'),maxlength(10)}\\t:=\\t'0';\\nend;
\", \"post_time\": \"2018-02-07 17:17:51\" },\n\t{ \"post_id\": 21423, \"topic_id\": 5313, \"forum_id\": 10, \"post_subject\": \"Re: Parallel Embed - R, Python, etc..\", \"username\": \"rtaylor\", \"post_text\": \"rken,\\n\\nOK, your problem is limiting the set of records you're putting into the letters field in your PROJECT TRANSFORM function. The LOCAL option doesn't change the fact that you're asking for all the records in the recordset. So to do what you want you need to add a filter to limit to just the recs on the same node. That means adding a node number to the letters, also. Here's how I do it:IMPORT Std;\\n\\ndsData := DATASET([{'A'},{'B'},{'C'},{'D'},{'E'},\\n {'F'},{'G'},{'H'},{'I'},{'J'},\\n {'K'},{'L'},{'M'} ,{'N'},{'O'},\\n {'P'},{'Q'},{'R'},{'S'},{'T'},\\n {'U'},{'V'},{'W'},{'X'},{'Y'},{'Z'}],\\n {STRING1 Letter});\\nddsData := DISTRIBUTE(dsData); //distributes the letters to the nodes\\n\\nNodeLtrRec := {STRING1 Letter,UNSIGNED1 node};\\nndsData := PROJECT(ddsData,TRANSFORM(NodeLtrRec,\\n SELF.node := Std.system.Thorlib.Node()+1, \\n SELF := LEFT));\\nndsData; //node-numbered distributed letters\\n\\nModelRec := {UNSIGNED1 node, INTEGER i};\\ndsModel := DATASET(1,TRANSFORM(ModelRec,\\n SELF.node := Std.system.Thorlib.Node()+1, \\n SELF.i := 9),LOCAL);\\ndsModel; //one rec per node\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t \\n\\nResultRec := {ModelRec, DATASET(ModelRec) model, DATASET(NodeLtrRec) letters};\\n\\nResultRec XF(dsmodel L) := TRANSFORM\\n SELF.model := L;\\n SELF.letters := ndsData(node=L.node); //note the filter here\\t\\n SELF := L;\\nEND;\\npds := PROJECT(dsmodel,XF(LEFT),LOCAL);\\n\\nOUTPUT(pds,NAMED('Node_Numbered_DS'));
\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-04-04 09:34:30\" },\n\t{ \"post_id\": 21413, \"topic_id\": 5313, \"forum_id\": 10, \"post_subject\": \"Re: Parallel Embed - R, Python, etc..\", \"username\": \"rken\", \"post_text\": \"I removed the python bit to try and simplify, but I am still struggling to get the expected output... I tried with and without "LOCAL" on the PROJECT() without any difference, which seems like LOCAL is not doing anything and/or the dsData is not being distributed correctly.\\n\\nThis is what I think the code is doing:\\n-Create a dataset on each node that has node id and an arbitrary integer\\n-Distribute the alphabet to all the nodes resulting in 26/4 letters per node\\n-Locally transform the two, now distributed, datasets to have a node id, integer, and child records of 26/4 letters.\\n\\nThe result is 4 rows with node id, integer, and 26 letters.\\n\\ndsModel := DATASET(1,TRANSFORM({UNSIGNED1 node, INTEGER i},\\n SELF.node := Std.system.Thorlib.Node()+1, SELF.i := 9, SELF := []),LOCAL);\\ndsData := DATASET([{'A'},{'B'},{'C'},{'D'},{'E'},\\n {'F'},{'G'},{'H'},{'I'},{'J'},\\n {'K'},{'L'},{'M'} ,{'N'},{'O'},\\n {'P'},{'Q'},{'R'},{'S'},{'T'},\\n {'U'},{'V'},{'W'},{'X'},{'Y'},{'Z'}],\\n {STRING1 Letter});\\nddsData := DISTRIBUTE(dsData); //distributes the letters to the nodes\\npds := PROJECT(dsModel,TRANSFORM({UNSIGNED1 node, TYPEOF(dsModel) model, TYPEOF(dsData) letters},\\n SELF.node := Std.system.Thorlib.Node()+1, SELF.model := dsModel, SELF.letters := ddsData, SELF := LEFT), LOCAL);\\nOUTPUT(pds,NAMED('Node_Numbered_DS'));
\", \"post_time\": \"2018-04-03 22:07:04\" },\n\t{ \"post_id\": 21283, \"topic_id\": 5313, \"forum_id\": 10, \"post_subject\": \"Re: Parallel Embed - R, Python, etc..\", \"username\": \"rtaylor\", \"post_text\": \"rken,\\n\\nIt looks to me like your Python code is still just running in node 1 and then you're distributing that to the other nodes to get the node numbers. BTW, your CHOOSEN will get you the first 80 recs from the first node that reports back (usually node 1 ). That may explain it.\\n\\nSo try it something like this instead:
//create a nested child dataset with 1 rec on each node\\nds1 := DATASET(1,TRANSFORM({UNSIGNED1 node, DATASET(paramOutput) macs},\\n SELF.node := Std.system.Thorlib.Node()+1, SELF := []),LOCAL);\\nsubset := DISTRIBUTE(CHOOSEN($.my_data, 80)); //distribute the input recs\\n //then do a PROJECT LOCAL to call your Python code\\npds := PROJECT(ds1,TRANSFORM(RECORDOF(ds1), \\n SELF.macs := pyscript(subset), SELF := LEFT),LOCAL);\\nOUTPUT(pds);\\n
I would expect this to end up with one record on each node with ~20 child recs in each. Let me know what you get. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-03-20 13:23:00\" },\n\t{ \"post_id\": 21273, \"topic_id\": 5313, \"forum_id\": 10, \"post_subject\": \"Re: Parallel Embed - R, Python, etc..\", \"username\": \"rken\", \"post_text\": \"I tried the below code and get a somewhat confusing result. I get the dataset value along with the node.id for each row. The id goes from 1-4 for my 4 nodes, equally distributed as expected. However my python code is returning a mac address of just the first node (on each row).\\n\\nRecord inputed into pyembed\\n
paramOutput := RECORD\\n\\tINTEGER1 imageLabel;\\n\\t//other values...\\nEND;
\\n\\nRecord returned from pyembed\\nparamOutput := RECORD\\n\\tINTEGER imageLabel;\\n\\tINTEGER mac;\\nEND;
\\n\\nECL Code:\\nDATASET(paramOutput) pyscript(DATASET($.my_data_type) mydata) := EMBED(Python)\\nfrom time import strftime, gmtime\\nfrom uuid import getnode as get_mac\\n\\nout = []\\naddress = get_mac()\\n\\nfor i, image in enumerate(mydata):\\n\\tout.append((image[0], get_mac()))\\nreturn out\\nENDEMBED;\\n\\nsubset := CHOOSEN($.my_data, 80);\\nds := pyscript(subset);\\ndds := DISTRIBUTE(ds);\\npds := PROJECT(dds,TRANSFORM({INTEGER imageLabel, INTEGER mac, UNSIGNED1 node}, SELF.node := Std.system.Thorlib.Node()+1, SELF := LEFT));\\nOUTPUT(pds);
\\n\\nReturns 80 rows, each with a different imageLabel, mac address(all the same), and node number.\\n\\nI'm essentially trying to distribute the 80 records to my nodes, have the pyembed process it, and return the results. I feel I am close, but can't seem to see it...\", \"post_time\": \"2018-03-19 23:32:02\" },\n\t{ \"post_id\": 21253, \"topic_id\": 5313, \"forum_id\": 10, \"post_subject\": \"Re: Parallel Embed - R, Python, etc..\", \"username\": \"rtaylor\", \"post_text\": \"rken,\\n\\nThat sounds like the job is so simple that hThor is "hijacking it." You can ensure it runs in Thor by adding in some code something like this: IMPORT Std;\\nds := DATASET([{'A'},{'B'},{'C'},{'D'},{'E'},\\n {'F'},{'G'},{'H'},{'I'},{'J'},\\n {'K'},{'L'},{'M'} ,{'N'},{'O'},\\n {'P'},{'Q'},{'R'},{'S'},{'T'},\\n {'U'},{'V'},{'W'},{'X'},{'Y'},{'Z'}],\\n {STRING1 Letter});\\ndsd := DISTRIBUTE(ds);\\npds := PROJECT(dsd,TRANSFORM({STRING1 Letter,UNSIGNED1 node},\\n SELF.node := Std.system.Thorlib.Node()+1, SELF := LEFT));\\nOUTPUT(pds,NAMED('Node_Numbered_DS'));
I put in the node numbering to absolutely demonstrate that this code is running on each separate node.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-03-19 16:56:01\" },\n\t{ \"post_id\": 21143, \"topic_id\": 5313, \"forum_id\": 10, \"post_subject\": \"Re: Parallel Embed - R, Python, etc..\", \"username\": \"rken\", \"post_text\": \"I ran a simple test where the python embeded code just writes to a file it's mac address, it only writes it to the file on node 1, not on each file on each node in my THOR. Perhaps this isn't the best test case?\", \"post_time\": \"2018-03-15 22:37:08\" },\n\t{ \"post_id\": 20763, \"topic_id\": 5313, \"forum_id\": 10, \"post_subject\": \"Re: Parallel Embed - R, Python, etc..\", \"username\": \"rtaylor\", \"post_text\": \"rken,\\n\\nSince every Thor node runs exactly the same .so file, and each node simply operates on whatever data is on that node (for operations that don't need to swap data between the nodes for correct execution), I would assume that your embedded Python code would do the same. IOW, parallel operation is the default mode on a multi-node Thor. \\n\\nWhat happens when you try running a test job?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-02-08 19:42:37\" },\n\t{ \"post_id\": 20753, \"topic_id\": 5313, \"forum_id\": 10, \"post_subject\": \"Parallel Embed - R, Python, etc..\", \"username\": \"rken\", \"post_text\": \"Is there a way, and how would one go about coding, to have an embed structure parallelized, assuming the plugins are on each and every node? Specifically, I am interested in have python embeds to be parallelized. i.e. have my python code run separately on each node.\\n\\nhttps://wiki.hpccsystems.com/display/hp ... ntegration\\nhttps://hpccsystems.com/training/docume ... cture.html\\n\\nI know PIPE can be used to parallelize some tasks, but I was hoping to use embed. I have tried using LOCAL on the dataset that is being passed into my embed to no avail.\\n\\nThanks in advance\", \"post_time\": \"2018-02-08 18:35:10\" },\n\t{ \"post_id\": 22053, \"topic_id\": 5333, \"forum_id\": 10, \"post_subject\": \"Re: Passing a Value from Unix to ECL script\", \"username\": \"Allan\", \"post_text\": \"Hi Harsh,\\n\\nAs Jo said there are a number of ways to accomplish this.\\n\\nI've just constructed ECL in a ksh script, then executed it using Perl CPAN libraries (LWP::UserAgent). Once you have a nice Perl script written, running ECL WU from Unix is trivial.\\n\\nYou don't have to publish any queries this way.\\nYours\\nAllan\", \"post_time\": \"2018-05-25 07:48:30\" },\n\t{ \"post_id\": 20953, \"topic_id\": 5333, \"forum_id\": 10, \"post_subject\": \"Re: Passing a Value from Unix to ECL script\", \"username\": \"harshdesai\", \"post_text\": \"Thanks Richard\", \"post_time\": \"2018-03-02 18:35:51\" },\n\t{ \"post_id\": 20893, \"topic_id\": 5333, \"forum_id\": 10, \"post_subject\": \"Re: Passing a Value from Unix to ECL script\", \"username\": \"rtaylor\", \"post_text\": \"Harsh,\\n\\nJust to expand a bit on Jo's reply - the process of pre-compiling and publishing queries is covered in both of our online Roxie courses, available here: https://hpccsystems.com/training#Classes\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-02-23 13:02:35\" },\n\t{ \"post_id\": 20883, \"topic_id\": 5333, \"forum_id\": 10, \"post_subject\": \"Re: Passing a Value from Unix to ECL script\", \"username\": \"jprichard\", \"post_text\": \"I am sure there are a couple of ways to tackle this but a simple approach might be the following.\\n\\n1. Your code needs to be compiled and published as a query on either thor or hthor with input query parameters.\\n\\nIMPORT STD;\\n\\nSTRING IP :='http://10.224.84.2';\\nSTRING port_ := '8010';\\nSTRING FileName := '~thorde::future::Testpolicy' : STORED('InFile');\\nSTRING Cluster := '' : STORED('InCluster');\\nSTRING include_fpos := 'N';\\n\\n// Build the soap URL\\nSTRING URl := IP+':'+(STRING)port_+'/WsDfu/';\\n// Do soap call and other stuff here...\\n\\n2. Test that out by manually loading the form on WSECL (eclwatch ip but port 8002) and make sure it works like you want. \\n\\n3. Once your query is working, call it from a UNIX script using curl and the URL of the query that you have published with the parameter in the URL.\\nhttp://<ipaddress>:8002/WsEcl/xslt/query/hthor/myeclquery?infile=~thorde::future::Testpolicy&InCluster=mythor\\n\\nTake it one step at a time and ask questions if you need more info on publishing queries etc..\\n\\nHTH\\n\\nJo\", \"post_time\": \"2018-02-23 12:56:14\" },\n\t{ \"post_id\": 20873, \"topic_id\": 5333, \"forum_id\": 10, \"post_subject\": \"Passing a Value from Unix to ECL script\", \"username\": \"harshdesai\", \"post_text\": \"HI ALL,\\nI have one sample ECL script as below\\n[hpcc@hcdevlz Recordtest]$ head renderRecordString.ecl\\nIMPORT STD;\\n\\nSTRING IP :='http://10.224.84.2';\\nSTRING port_ := '8010';\\nSTRING FileName := '~thorde::future::Testpolicy';\\nSTRING Cluster := '';\\nSTRING include_fpos := 'N';\\n\\n// Build the soap URL\\nSTRING URl := IP+':'+(STRING)port_+'/WsDfu/';\\n---------------------------------------------------------\\nIn above ECL script i want this ECL to accept parameter FileName from Unix which can be passed as while running shell Script .\\n\\nExample \\n./Unixwrapper.sh thorde::future::Testpolicy\\n\\n\\nIs it possible to pass parameter used by ECL to be sent by UNIX.Kindly help me with the same.\\n\\n\\nRegards\\nHarsh Desai\", \"post_time\": \"2018-02-23 08:35:33\" },\n\t{ \"post_id\": 21023, \"topic_id\": 5353, \"forum_id\": 10, \"post_subject\": \"Re: Layout Creation using DFUInfo\", \"username\": \"bforeman\", \"post_text\": \"Hi Harsh,\\nAble to fetch details by changing the DFUInfoRequest code tagging to Name instead of FileName
\\n\\nYes, this is what I had in your example:\\n\\nDFUInfoRequest := RECORD\\n STRING Name{XPATH('Name')} := fName;\\n STRING FileName{XPATH('FileName')} := fName;\\nEND;\\n
\\n\\nBut you are right, Name is the important element!\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2018-03-05 13:58:28\" },\n\t{ \"post_id\": 21013, \"topic_id\": 5353, \"forum_id\": 10, \"post_subject\": \"Re: Layout Creation using DFUInfo\", \"username\": \"harshdesai\", \"post_text\": \"Hi BOB,\\nAble to fetch details by changing the DFUInfoRequest code tagging to Name instead of FileName\\n\\nRegards\\nHarsh Desai\", \"post_time\": \"2018-03-05 13:21:27\" },\n\t{ \"post_id\": 21003, \"topic_id\": 5353, \"forum_id\": 10, \"post_subject\": \"Re: Layout Creation using DFUInfo\", \"username\": \"harshdesai\", \"post_text\": \"HI BOB,\\nPlease find the output for same\\n\\n<Ecl></Ecl><Filename></Filename>\\n\\nCan you please guide on the same\\n\\n// SOAP request data structure\\nDFUInfoRequest := \\n RECORD\\n STRING FileName{XPATH('FileName')} := fName;\\n END;\\n\\t\\t\\t\\n\\n// SOAP result data structure\\n\\nDFUFileDetail := RECORD\\n STRING Ecl{XPATH('Ecl')};\\n STRING Filename{XPATH('Filename')};\\nEND;\\n\\n//Soap Response Structure\\nDFUInfoResponse := RECORD\\nDFUFileDetail DFUFileDetail {XPATH('FileDetail')};\\nEnd;\\n\\n\\n//Soap Call \\n\\nDFUInfoRequestSoapCall := SOAPCALL(URl\\n,'DFUInfo'\\n,DFUInfoRequest\\n,DFUInfoResponse\\n,XPATH('DFUInfoResponse')\\n);\", \"post_time\": \"2018-03-05 04:55:36\" },\n\t{ \"post_id\": 20993, \"topic_id\": 5353, \"forum_id\": 10, \"post_subject\": \"Re: Layout Creation using DFUInfo\", \"username\": \"bforeman\", \"post_text\": \"Got it!\\n\\nIn your SOAPCALL, change the XPATH to:\\n\\nDFUInfoRequestSoapCall := SOAPCALL(URl\\n,'DFUInfo'\\n,DFUInfoRequest\\n,DFUInfoResponse\\n,XPATH('DFUInfoResponse')\\n);
\\n\\nand\\n\\nDFUInfoResponse := RECORD\\n DFUFileDetail DFUFileDetail{XPATH('FileDetail')};\\nEND;
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2018-03-02 20:28:39\" },\n\t{ \"post_id\": 20983, \"topic_id\": 5353, \"forum_id\": 10, \"post_subject\": \"Re: Layout Creation using DFUInfo\", \"username\": \"harshdesai\", \"post_text\": \"HI BOB ,\\nGreeting\\nIf I do SOAP test it works but not via ECL I don't get output in ECL tag \\n\\nI mean, SOAP Request \\n<?xml version="1.0" encoding="UTF-8"?>\\n<soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/" xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/" xmlns="urn:hpccsystems:ws:wsdfu">\\n <soap:Body>\\n <DFUInfoRequest>\\n <Name>thor::DetailedStats1.csv</Name>\\n <Cluster/>\\n <UpdateDescription>0</UpdateDescription>\\n <FileName/>\\n <FileDesc/>\\n </DFUInfoRequest>\\n </soap:Body>\\n</soap:Envelope>\\n\\n\\nResponse \\n\\n<?xml version="1.0" encoding="utf-8"?>\\n<soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/" xmlns:wsse="http://schemas.xmlsoap.org/ws/2002/04/secext">\\n <soap:Body>\\n <DFUInfoResponse xmlns="urn:hpccsystems:ws:wsdfu">\\n <FileDetail>\\n <Name>thor::detailedstats1.csv</Name>\\n <Filename>detailedstats1.csv</Filename>\\n <Prefix>thor</Prefix>\\n <NodeGroup>thor20_dev02</NodeGroup>\\n <NumParts>20</NumParts>\\n <Description/>\\n <Dir>/var/lib/HPCCSystems/hpcc-data/thor/thor</Dir>\\n <PathMask>detailedstats1.csv._$P$_of_20</PathMask>\\n <Filesize>25,742</Filesize>\\n <RecordSize>0</RecordSize>\\n <RecordCount>480</RecordCount>\\n <Wuid>W20180228-050818</Wuid>\\n <Owner>NamdPo01</Owner>\\n <JobName/>\\n <Format>utf8n</Format>\\n <CsvSeparate>\\\\,</CsvSeparate>\\n <CsvTerminate></CsvTerminate>\\n <Modified>2018-02-28 10:38:01</Modified>\\n <Ecl>RECORD\\n string groupname;\\n string field;\\n integer8 valuecount;\\n integer8 grouptotal;\\n END;\\n</Ecl>\\n\\n\\nSo ideally I think we should have got value in ECL tag. in ECL script\\n\\nRegards\", \"post_time\": \"2018-03-02 19:15:20\" },\n\t{ \"post_id\": 20973, \"topic_id\": 5353, \"forum_id\": 10, \"post_subject\": \"Re: Layout Creation using DFUInfo\", \"username\": \"bforeman\", \"post_text\": \"same works in SOAP test call
\\nHi Harsh,\\n\\nWhat test are you referring to?\\n\\nIf you don't need to use SOAPCALL, this works perfectly:\\nOUTPUT(STD.File.GetLogicalFileAttribute(fname,'ECL'));
\\n\\nBob\", \"post_time\": \"2018-03-02 19:10:34\" },\n\t{ \"post_id\": 20963, \"topic_id\": 5353, \"forum_id\": 10, \"post_subject\": \"Layout Creation using DFUInfo\", \"username\": \"harshdesai\", \"post_text\": \"I am trying to get the ECL layout for Csv File using DfuInfo soap call\\n\\nIMPORT STD;\\n\\nSTRING IP :='http://10.224.84.2';\\nSTRING port_ := '8010';\\n// STRING FileNameInput := '':STORED('Input_file_name'); //'~thorde::future::scrubsgrouped_input1_w20171213-092727_dinputinlay_policy'; \\nSTRING FileNameInput := '~thor::DetailedStats1.csv'; \\nSTRING FileName := IF(STD.Str.Contains(FileNameInput,'~',true),FileNameInput,'~'+FileNameInput);\\nSTRING Cluster := '';\\nSTRING include_fpos := 'N';\\n\\n// Build the soap URL\\nSTRING URl := IP+':'+(STRING)port_+'/WsDfu/?ver_=1.36';\\nSTRING fName := FileName;\\nfname;\\n\\n// SOAP request data structure\\nDFUInfoRequest := \\n RECORD\\n STRING FileName{XPATH('FileName')} := fName;\\n END;\\n\\t\\t\\t\\n\\n// SOAP result exception data structure\\n\\nDFUFileDetail := RECORD\\n STRING Ecl{XPATH('Ecl')};\\n STRING Filename{XPATH('Filename')};\\nEND;\\n\\nDFUInfoResponse := RECORD\\n DFUFileDetail DFUFileDetail{XPATH('FileDetail/DFUFileDetail')};\\n \\nEND;\\n\\n\\nDFUInfoRequestSoapCall := SOAPCALL(URl\\n ,'DFUInfo'\\n ,DFUInfoRequest\\n ,DFUInfoResponse\\n ,XPATH('DFUInfoRequest')\\n );\\nDFUInfoRequestSoapCall;\\n\\n\\nIt generates below Output ,with out anything assigned but same works in SOAP test call \\n\\n<Ecl></Ecl><Filename></Filename>\\n\\n\\nRegards\\nHarsh Desai\", \"post_time\": \"2018-03-02 18:38:46\" },\n\t{ \"post_id\": 21043, \"topic_id\": 5363, \"forum_id\": 10, \"post_subject\": \"Re: 15-second timeouts in Activity requests?\", \"username\": \"jwilt\", \"post_text\": \"Posted a new internal ticket for this, https://track.hpccsystems.com/browse/HPCC-19249 \\n\\nFYI.\", \"post_time\": \"2018-03-07 02:00:26\" },\n\t{ \"post_id\": 21033, \"topic_id\": 5363, \"forum_id\": 10, \"post_subject\": \"15-second timeouts in Activity requests?\", \"username\": \"jwilt\", \"post_text\": \"In ecl.log, I'm seeing logs of requests that seem to have a TxSummary record of about 15 seconds. \\nThese aren't consecutive in the log file, but they look something like:\\n\\n\\n...111111 2222 "SOAP method <Activity> from XXXXX@NN.NN.NN.NN."\\n... (other lines from different threads, ...)\\n...111111 2222 "TxSummary[activeReqs=N;user=XXXXX@NN.NN.NN.NN;total=15013ms"\\n
\\n\\nOf such pairs taking >10000ms (10sec) to return, about 3/4 of these are very nearly 15 seconds (+ a few ms). So, there seems to be a timeout of 15 seconds someplace.\\nI've checked our configuration, don't see it there.\\nIs this fixed? Or, where would I find/tweak it?\\nAny help would be appreciated.\\nThanks.\", \"post_time\": \"2018-03-06 23:38:18\" },\n\t{ \"post_id\": 21903, \"topic_id\": 5513, \"forum_id\": 10, \"post_subject\": \"Re: Working with a flat text file\", \"username\": \"rtaylor\", \"post_text\": \"rsghatpa,\\n\\nI tried your code like this:ds := DATASET([{'1:1488844_3,822109_5,885013_4,30878_4,823519_3,893988_3,124105_4,1248029_3,1842128_4,2238063_3,1503895_4,2207774_5,2590061_3,2442_3,543865_4,1209119_4,804919_4,1086807_3,1711859_4,372233_5,1080361_3,1245640_3,558634_4,2165002_4,1181550_3,1227322_4,427928_4,814701_5,808731_4,662870_5,337541_5,786312_3,1133214_4,1537427_4,1209954_5,2381599_3,525356_2,1910569_4,2263586_4,2421815_2,1009622_1,1481961_2,401047_4,2179073_3,1434636_3,93986_5,1308744_5,2647871_4,1905581_5,2508819_3,1578279_1,1159695_4,2588432_3,2423091_3,470232_4,2148699_2,1342007_3,466135_4,2472440_3,1283744_3,1927580_4,716874_5,4326_4,1546549_5,1493697_1,880166_5,535396_2,494609_4,1961619_5,883478_4,793564_4,1567202_2,573537_4,1972040_4,1838912_3,411705_4,2244518_5,584542_5,667730_5,2488120_5,1926776_1,38052_3,1196100_4,314933_3,1792741_2,769643_1,2477242_5,1421006_3,729846_4,1719610_2,1696031_4,1817215_4,406057_4,636262_1,1245406_4,1834590_3,593225_3,1011918_4,1665054_4,2630337_5,1155747_3,2439493_1,479924_5,530789_5,765860_4,231001_3,1493615_5,1850615_2,68959_3,147386_5,624035_5,782308_5,1116080_5,421374_5,1158759_4,1025798_3,1215397_5,2475251_4,321111_2,2162676_3,2635437_4,2389367_1,485622_5,235553_4,831869_4,99400_5,684876_4,1871179_3,1107678_5,642036_3,700890_5,2289956_5,2040859_1,1524964_3,121318_4,317050_5,2287003_5,59052_2,893742_4,1346257_3,55016_3,30245_5,743633_4,1596531_5,1125499_5,706832_4,2465337_3,2291422_1,1777406_3,1904905_4,2450433_3,1348967_2,638020_3,2217779_4,194280_1,493009_4,1567167_4,850327_5,520386_3,320540_2,1188228_2,57961_4,1113230_3,1374216_2,595778_3,209573_4,2354601_5,2563596_4,835265_4,1819474_3,1447104_3,1100940_1,143274_3,2329565_4,181592_4,936396_2,1125797_3,2283366_3,514495_4,1772176_3,1877347_4,1287892_4,255443_2,890669_4,1989766_4,2315073_4,14756_4,907623_3,991423_4,1604238_4,1027056_3,2025883_5,732936_5,563962_5,799442_4,352635_5,2537543_5,1564395_4,1655178_4,573434_4,1141189_4,383247_5,1763921_5,1943970_5,322009_3,2333817_3,2095681_2,1149588_4,2354740_5,2421360_5,496087_2,2191781_1,1694083_4,818416_3,701960_5,2090477_4,1664010_5,2583822_5,369646_5,2234063_4,259799_4,1077982_4,2631796_4,1122383_3,1508526_3,1600207_5,1283117_5,1727869_5,1522799_4,1394012_5,1558286_3,1155602_3,361066_3,1743210_5,1148389_4,2268101_4,519684_5,767518_5,122197_1,2112162_4,1073367_3,400162_5,1524343_5,741245_4,2563768_3,1406595_4,1137010_4,60343_5,225765_4,2530404_3,437881_3,1935793_1,134001_4,2607300_3,1008986_4,94565_4,828410_4,1805202_4,1922925_4,1435717_5,2277395_4,2305014_5,166041_4,2413320_4,87113_2,722591_5,2291306_1,2010770_4,255383_5,1873429_4,1647618_4,608234_5,42930_3,1462072_5,685565_5,3321_3,2554942_4,1874547_4,2269844_5,34907_3,1779903_4,2576424_4,230112_3,508727_3,1603525_3,172264_4,1182185_4,2275470_2,491531_5,1346432_4,1554712_5,1450941_5,1714116_3,2016488_4,1782762_4,1343170_5,2565752_4,435841_3,2242821_5,638824_5,2256485_1,101597_5,623036_5,1559445_5,1723381_5,1824586_4,2233105_4,682963_3,2529547_5,504620_2,1682104_4,16272_4,2491785_5,978412_5,2054145_3,2444240_3,547732_3,811790_5,31913_4,437111_4,640588_4,2625019_3,2605190_5,915_5,1430587_4,2544219_5,2603381_5,305344_1,2569099_1,2430356_4,885165_4,2380806_5,1512406_1,1774623_4,2226525_4,2537076_4,2060858_4,498469_5,68033_4,1819146_5,2088415_4,473070_5,1823641_5,1839976_2,14924_5,1852606_4,453694_5,921487_2,1022254_5,2464081_4,1228324_4,1563530_4,1181170_3,1357013_3,21722_4,288420_5,1739170_5,2584676_3,2013504_4,1245176_4,269524_3,661344_3,652324_3,2239213_3,863302_4,758850_4,1884755_2,544833_3,1562707_1,810700_5,837756_5,155164_4,493945_5,1565175_5,2005193_4,1605780_4,1294335_2,608576_4,659505_4,1604707_4,2630797_5,402266_5,752642_3,1906145_4,389872_2,1462866_2,1952116_4,54774_4,1776980_5,1494196_5,253794_5,1569513_3,596728_2,1107588_1,1133763_3,1398076_4,1178171_4,984369_3,2618594_4,1653834_4,2322840_3,2207647_4,1994111_4,1824044_4,2255037_3,2056022_3,1458179_4,1508350_4,1168571_5,766489_3,1424199_5,2054180_3,448902_5,1547173_3,1751103_4,121073_5,2609436_4,1398626_2,1311231_3,2279000_3,236921_5,2566259_5,758937_4,2260684_4,1190829_4,136106_3,344753_3,568930_5,206115_4,2390644_3,2078679_5,1682651_4,386915_4,972136_3,1806515_3,11589_3,2118461_5,444411_3,691108_4,332401_3,1278488_4,358776_4,387418_1,872408_4,646098_4,396595_5,1366860_4,1046882_3,470861_5,1455257_4,1274780_3,379184_4,1273630_4,492291_3,145873_3,1388284_5,712610_4,1116065_1,660499_1,1918987_4,1357894_3,190418_3,1060658_3,1443203_4,1772839_5,2385774_3,1059319_3,831775_4,881346_5,1066317_4,13651_3,208920_4,308753_5,2564257_3,565041_4,1602153_4,173930_4,202811_3,353369_3,1201176_4,2047577_3,685113_4,1686060_5,151004_5,2126192_3,1981464_4,1862581_4,1255780_5,1962300_3,1515355_3,1001779_4,2093105_3,1123959_3,1876297_5,1364481_4,998236_5,328415_3,1347129_4,1117062_4,1033930_3,45117_5,1005769_5,712609_4,740495_4,2497991_4,1017324_4,120491_5,1645794_4,1658790_3,2451020_4,1878798_4,1790903_4,1254683_1,874943_5,121456_4,1140108_4,515436_1,272689_5,1247177_3,263240_3,2539549_3,2565654_5,334701_3,42921_3,2011399_5,433945_5,2151149_4,1415954_2,1086360_3,2419258_4,2380848_5,1550216_1,596533_5,287901_5,188613_4,1654508_3,1313126_5,51334_4,2374451_4,2031093_4,548064_5,946102_5,1790158_4,1403184_3,1535440_4,1426604_4,1815755_5'},\\n{'2:2059652_4,1666394_3,1759415_4,1959936_5,998862_4,2625420_2,573975_3,392722_4,1401650_4,988104_3,977632_4,2557870_4,1793899_5,1340535_5,1888322_5,1283598_3,1784150_4,2271251_5,65932_3,1828884_5,1878728_4,1922778_3,1176404_4,2265116_3,1078701_4,1832577_4,748922_5,1013802_1,1131325_2,2244378_4,494639_2,636262_1,1903158_4,220427_4,2439493_1,2225116_4,1445632_5,2592823_4,1288603_5,2556926_3,1190070_4,1312846_3,2226229_3,1563935_1,69809_5,1349753_3,785768_3,426476_5,810636_4,468713_5,222290_4,349407_5,311232_2,2596999_4,1025601_5,1743759_4,2385553_5,1374216_1,526466_4,2648861_3,1210631_3,2314531_4,618272_1,2532807_3,412535_4,1315005_4,1358911_5,507603_1,1507649_5,845529_5,1479907_5,236271_2,2422676_3,1636093_5,995594_5,1664010_4,2431481_3,1980668_5,402321_4,1344564_3,1632603_3,2567280_3,1623166_3,521932_4,105086_5,2072554_5,2231529_3,2103439_2,261764_1,193476_5,1576540_4,1783594_5,503334_4,183903_5,2606799_1,1236127_4,2375962_3,2212071_3,1252841_3,247898_5,970975_3,305344_1,1581186_4,1129620_3,584750_3,11409_5,1875495_2,1403217_2,2147527_1,2418486_4,1476323_5,2345723_4,2640085_5,1803154_2,1251170_2,527491_4,391517_4,1398626_3,828919_5,196494_5,715897_5,268917_2,41422_4,1806515_3,2118461_4,387418_1,2019055_5,348960_1,1167731_4,2468831_5,219925_4,1025193_5,630887_5,1461435_1,1838586_1,1515430_3,1807053_5,1172326_5,1785842_3,803752_3,1581265_3,515436_1,1824543_4,1283204_3,1272122_5'}],\\n{STRING field1});\\n\\nhistogram_dataset2 := ds;\\n\\nPATTERN histogramRule1 := PATTERN('(?<=_)[1]');\\nRULE userRatingRule1 := histogramRule1;\\nmatchText1 := {STRING100 out3 := MATCHTEXT(userRatingRule1)}; \\nuserRatingAnswerOne := PARSE(histogram_dataset2, field1, userRatingRule1, matchText1, BEST, MANY, NOCASE);\\nuserRatingAnswerOne;
and it worked -- no errors.\\n\\nTherefore, I suggest that the problem may be that you sprayed the file as a CSV but defined it as a THOR file (flat file). Try changing it to CSV and you may get past that error.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-05-11 14:52:43\" },\n\t{ \"post_id\": 21823, \"topic_id\": 5513, \"forum_id\": 10, \"post_subject\": \"Re: Working with a flat text file\", \"username\": \"rsghatpa\", \"post_text\": \"Here are 2 lines of the same :\\n\\n1:1488844_3,822109_5,885013_4,30878_4,823519_3,893988_3,124105_4,1248029_3,1842128_4,2238063_3,1503895_4,2207774_5,2590061_3,2442_3,543865_4,1209119_4,804919_4,1086807_3,1711859_4,372233_5,1080361_3,1245640_3,558634_4,2165002_4,1181550_3,1227322_4,427928_4,814701_5,808731_4,662870_5,337541_5,786312_3,1133214_4,1537427_4,1209954_5,2381599_3,525356_2,1910569_4,2263586_4,2421815_2,1009622_1,1481961_2,401047_4,2179073_3,1434636_3,93986_5,1308744_5,2647871_4,1905581_5,2508819_3,1578279_1,1159695_4,2588432_3,2423091_3,470232_4,2148699_2,1342007_3,466135_4,2472440_3,1283744_3,1927580_4,716874_5,4326_4,1546549_5,1493697_1,880166_5,535396_2,494609_4,1961619_5,883478_4,793564_4,1567202_2,573537_4,1972040_4,1838912_3,411705_4,2244518_5,584542_5,667730_5,2488120_5,1926776_1,38052_3,1196100_4,314933_3,1792741_2,769643_1,2477242_5,1421006_3,729846_4,1719610_2,1696031_4,1817215_4,406057_4,636262_1,1245406_4,1834590_3,593225_3,1011918_4,1665054_4,2630337_5,1155747_3,2439493_1,479924_5,530789_5,765860_4,231001_3,1493615_5,1850615_2,68959_3,147386_5,624035_5,782308_5,1116080_5,421374_5,1158759_4,1025798_3,1215397_5,2475251_4,321111_2,2162676_3,2635437_4,2389367_1,485622_5,235553_4,831869_4,99400_5,684876_4,1871179_3,1107678_5,642036_3,700890_5,2289956_5,2040859_1,1524964_3,121318_4,317050_5,2287003_5,59052_2,893742_4,1346257_3,55016_3,30245_5,743633_4,1596531_5,1125499_5,706832_4,2465337_3,2291422_1,1777406_3,1904905_4,2450433_3,1348967_2,638020_3,2217779_4,194280_1,493009_4,1567167_4,850327_5,520386_3,320540_2,1188228_2,57961_4,1113230_3,1374216_2,595778_3,209573_4,2354601_5,2563596_4,835265_4,1819474_3,1447104_3,1100940_1,143274_3,2329565_4,181592_4,936396_2,1125797_3,2283366_3,514495_4,1772176_3,1877347_4,1287892_4,255443_2,890669_4,1989766_4,2315073_4,14756_4,907623_3,991423_4,1604238_4,1027056_3,2025883_5,732936_5,563962_5,799442_4,352635_5,2537543_5,1564395_4,1655178_4,573434_4,1141189_4,383247_5,1763921_5,1943970_5,322009_3,2333817_3,2095681_2,1149588_4,2354740_5,2421360_5,496087_2,2191781_1,1694083_4,818416_3,701960_5,2090477_4,1664010_5,2583822_5,369646_5,2234063_4,259799_4,1077982_4,2631796_4,1122383_3,1508526_3,1600207_5,1283117_5,1727869_5,1522799_4,1394012_5,1558286_3,1155602_3,361066_3,1743210_5,1148389_4,2268101_4,519684_5,767518_5,122197_1,2112162_4,1073367_3,400162_5,1524343_5,741245_4,2563768_3,1406595_4,1137010_4,60343_5,225765_4,2530404_3,437881_3,1935793_1,134001_4,2607300_3,1008986_4,94565_4,828410_4,1805202_4,1922925_4,1435717_5,2277395_4,2305014_5,166041_4,2413320_4,87113_2,722591_5,2291306_1,2010770_4,255383_5,1873429_4,1647618_4,608234_5,42930_3,1462072_5,685565_5,3321_3,2554942_4,1874547_4,2269844_5,34907_3,1779903_4,2576424_4,230112_3,508727_3,1603525_3,172264_4,1182185_4,2275470_2,491531_5,1346432_4,1554712_5,1450941_5,1714116_3,2016488_4,1782762_4,1343170_5,2565752_4,435841_3,2242821_5,638824_5,2256485_1,101597_5,623036_5,1559445_5,1723381_5,1824586_4,2233105_4,682963_3,2529547_5,504620_2,1682104_4,16272_4,2491785_5,978412_5,2054145_3,2444240_3,547732_3,811790_5,31913_4,437111_4,640588_4,2625019_3,2605190_5,915_5,1430587_4,2544219_5,2603381_5,305344_1,2569099_1,2430356_4,885165_4,2380806_5,1512406_1,1774623_4,2226525_4,2537076_4,2060858_4,498469_5,68033_4,1819146_5,2088415_4,473070_5,1823641_5,1839976_2,14924_5,1852606_4,453694_5,921487_2,1022254_5,2464081_4,1228324_4,1563530_4,1181170_3,1357013_3,21722_4,288420_5,1739170_5,2584676_3,2013504_4,1245176_4,269524_3,661344_3,652324_3,2239213_3,863302_4,758850_4,1884755_2,544833_3,1562707_1,810700_5,837756_5,155164_4,493945_5,1565175_5,2005193_4,1605780_4,1294335_2,608576_4,659505_4,1604707_4,2630797_5,402266_5,752642_3,1906145_4,389872_2,1462866_2,1952116_4,54774_4,1776980_5,1494196_5,253794_5,1569513_3,596728_2,1107588_1,1133763_3,1398076_4,1178171_4,984369_3,2618594_4,1653834_4,2322840_3,2207647_4,1994111_4,1824044_4,2255037_3,2056022_3,1458179_4,1508350_4,1168571_5,766489_3,1424199_5,2054180_3,448902_5,1547173_3,1751103_4,121073_5,2609436_4,1398626_2,1311231_3,2279000_3,236921_5,2566259_5,758937_4,2260684_4,1190829_4,136106_3,344753_3,568930_5,206115_4,2390644_3,2078679_5,1682651_4,386915_4,972136_3,1806515_3,11589_3,2118461_5,444411_3,691108_4,332401_3,1278488_4,358776_4,387418_1,872408_4,646098_4,396595_5,1366860_4,1046882_3,470861_5,1455257_4,1274780_3,379184_4,1273630_4,492291_3,145873_3,1388284_5,712610_4,1116065_1,660499_1,1918987_4,1357894_3,190418_3,1060658_3,1443203_4,1772839_5,2385774_3,1059319_3,831775_4,881346_5,1066317_4,13651_3,208920_4,308753_5,2564257_3,565041_4,1602153_4,173930_4,202811_3,353369_3,1201176_4,2047577_3,685113_4,1686060_5,151004_5,2126192_3,1981464_4,1862581_4,1255780_5,1962300_3,1515355_3,1001779_4,2093105_3,1123959_3,1876297_5,1364481_4,998236_5,328415_3,1347129_4,1117062_4,1033930_3,45117_5,1005769_5,712609_4,740495_4,2497991_4,1017324_4,120491_5,1645794_4,1658790_3,2451020_4,1878798_4,1790903_4,1254683_1,874943_5,121456_4,1140108_4,515436_1,272689_5,1247177_3,263240_3,2539549_3,2565654_5,334701_3,42921_3,2011399_5,433945_5,2151149_4,1415954_2,1086360_3,2419258_4,2380848_5,1550216_1,596533_5,287901_5,188613_4,1654508_3,1313126_5,51334_4,2374451_4,2031093_4,548064_5,946102_5,1790158_4,1403184_3,1535440_4,1426604_4,1815755_5\\n2:2059652_4,1666394_3,1759415_4,1959936_5,998862_4,2625420_2,573975_3,392722_4,1401650_4,988104_3,977632_4,2557870_4,1793899_5,1340535_5,1888322_5,1283598_3,1784150_4,2271251_5,65932_3,1828884_5,1878728_4,1922778_3,1176404_4,2265116_3,1078701_4,1832577_4,748922_5,1013802_1,1131325_2,2244378_4,494639_2,636262_1,1903158_4,220427_4,2439493_1,2225116_4,1445632_5,2592823_4,1288603_5,2556926_3,1190070_4,1312846_3,2226229_3,1563935_1,69809_5,1349753_3,785768_3,426476_5,810636_4,468713_5,222290_4,349407_5,311232_2,2596999_4,1025601_5,1743759_4,2385553_5,1374216_1,526466_4,2648861_3,1210631_3,2314531_4,618272_1,2532807_3,412535_4,1315005_4,1358911_5,507603_1,1507649_5,845529_5,1479907_5,236271_2,2422676_3,1636093_5,995594_5,1664010_4,2431481_3,1980668_5,402321_4,1344564_3,1632603_3,2567280_3,1623166_3,521932_4,105086_5,2072554_5,2231529_3,2103439_2,261764_1,193476_5,1576540_4,1783594_5,503334_4,183903_5,2606799_1,1236127_4,2375962_3,2212071_3,1252841_3,247898_5,970975_3,305344_1,1581186_4,1129620_3,584750_3,11409_5,1875495_2,1403217_2,2147527_1,2418486_4,1476323_5,2345723_4,2640085_5,1803154_2,1251170_2,527491_4,391517_4,1398626_3,828919_5,196494_5,715897_5,268917_2,41422_4,1806515_3,2118461_4,387418_1,2019055_5,348960_1,1167731_4,2468831_5,219925_4,1025193_5,630887_5,1461435_1,1838586_1,1515430_3,1807053_5,1172326_5,1785842_3,803752_3,1581265_3,515436_1,1824543_4,1283204_3,1272122_5\", \"post_time\": \"2018-05-02 15:12:33\" },\n\t{ \"post_id\": 21813, \"topic_id\": 5513, \"forum_id\": 10, \"post_subject\": \"Working with a flat text file\", \"username\": \"rsghatpa\", \"post_text\": \"I sprayed a file as a csv with no delimiters and no quotes. The first 2 lines would be posted in the 1st reply.\\n\\nI also passed the maxRecordSize as 2147483646 (1 less than INT.MAX) because of the large record size / line.\\n\\nThe ECL Record structure is :\\nRECORD\\n STRING field1;\\nEND;\\n\\nI had the following questions :\\n1. Def : UNKNOWN char field1\\nWhy UNKNOWN?\\n2.XML :\\n<?xml version="1.0"?>\\n<Table>\\n <Field ecltype="string" label="field1" name="field1" position="0" rawtype="-983036" size="-15" type="string"></Field>\\n <filename>file1</filename>\\n</Table>\\n\\nWhy is the rawtype negative?\\n\\nMy corresponding ECL code is as follows:\\n\\ndataset_name := '~benchmark::file1';\\nrs := RECORD\\n STRING field1;\\nEND;\\n\\nhistogram_dataset2 := DATASET(dataset_name, rs, THOR);\\n\\nPATTERN histogramRule1 := PATTERN('(?<=_)[1]');\\nRULE userRatingRule1 := histogramRule1;\\nmatchText1 := {STRING100 out3 := MATCHTEXT(userRatingRule1)}; \\nuserRatingAnswerOne := PARSE(histogram_dataset2, field1, userRatingRule1, matchText1, BEST, MANY, NOCASE);
\\n\\necl run HistogramRating.ecl --target=thor --server=master:8010\\n\\nW20180502-145535 failed\\n<Result>\\n <Exception><Code>4294967295</Code><Source>eclagent</Source><Message>System error: -1: Graph graph1[1], diskread[2]: SLAVE #1 [172.31.15.1:20100]: CFileSerialStream::get read past end of stream, CFileSerialStream::get read past end of stream - handling file: /var/lib/HPCCSystems/hpcc-data/thor/benchmark/file1._1_of_2 - caused by (-1, CFileSerialStream::get read past end of stream)</Message></Exception>\\n</Result>\\n\\nHow to resolve this error?\", \"post_time\": \"2018-05-02 15:12:02\" },\n\t{ \"post_id\": 21963, \"topic_id\": 5603, \"forum_id\": 10, \"post_subject\": \"Re: Memory limit exceeded error when trying a rollup group\", \"username\": \"rtaylor\", \"post_text\": \"Manish,\\n\\nHere's the important part of the error message:Out of memory whilst loading group for rollup group, group/set size = 207904, Fixed rows, size = 8252
If that doesn't help you with the problem, please post the code that created the error along with a description of the hardware you're running the cluster on.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-05-18 13:44:06\" },\n\t{ \"post_id\": 21953, \"topic_id\": 5603, \"forum_id\": 10, \"post_subject\": \"Memory limit exceeded error when trying a rollup group\", \"username\": \"ManishJaychand\", \"post_text\": \"Hi ,\\n\\n I am trying to run aggregate function using the rollup group. I am getting a memory limit exceeded error. \\n\\n Error: System error: 1300: Graph graph1[1], rollupgroup[5]: SLAVE #1 [10.224.89.21:20100]: Graph graph1[1], sort[3]: Memory limit exceeded: current 40960, requested 1, limit 40960 active(1) heap(40960/40960), - caused by (1300, Memory limit exceeded: current 40960, requested 1, limit 40960 active(1) heap(40960/40960)), Out of memory whilst loading group for rollup group, group/set size = 207904, Fixed rows, size = 8252\\n\\nI am unable to understand the issue. Any inputs on this? what can be the possible reason? When does these kinds of issues occur?\", \"post_time\": \"2018-05-18 10:23:15\" },\n\t{ \"post_id\": 22023, \"topic_id\": 5623, \"forum_id\": 10, \"post_subject\": \"Re: Use of TYPE structure 'LOAD'\", \"username\": \"Allan\", \"post_text\": \"Thanks Richard,\\n\\nYes you're right. As I said I have no problem actually constructing the result I need, I was just investigating making use of LOAD callback. I'd not used this feature before, and there's nothing like a real life project for learning.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2018-05-22 08:04:02\" },\n\t{ \"post_id\": 22003, \"topic_id\": 5623, \"forum_id\": 10, \"post_subject\": \"Re: Use of TYPE structure 'LOAD'\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nThe TYPE structure is designed for "Alien" data types -- data types included in external files that are not directly supported by HPCC. The TYPE structure allows you to write your own callback functions (LOAD, STORE, etc.) to translate that "alien" data into a format that is natively supported.\\n\\nSince your date ranges are implied by the ordinal record position and not directly specified, the TYPE structure would be inappropriate for this, IMO.\\n\\nI'd suggest just using the Date library's Julian data functions to create an expression that would calculate the range for each record based on the COUNTER value in a PROJECT and populate the date range fields that way.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-05-21 15:14:47\" },\n\t{ \"post_id\": 21993, \"topic_id\": 5623, \"forum_id\": 10, \"post_subject\": \"Use of TYPE structure 'LOAD'\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI have presented to me a XML file with nested data sets. Each record in this dataset is tied to a date range though the actual date range is NOT in the XML. The ordinal position of the record in the child dataset specifies the date range.\\ne.g.\\nRecord 1 29th April - 5th May\\nRecord 2 6th May - 12th May\\nRecord 3 13th May - 19th May\\n\\nI know the dates to apply to each record, it would be nice to have the dates in the resultant dataset once the load is complete.\\n\\nNow I can do an XMLPROJECT or just a straight PROJECT once the load is complete, but I thought I could use the Type structure LOAD qualifier to a record definition to do the whole load and transform in one operation. But I can't see how.\\nThere is no indication given to the function used by the LOAD of its ordinal position so I'm a bit stumped.\\nHowever I also know there are some very clever people out there on the forum who might have ideas.\\n\\nYours intrigued\\nAllan\", \"post_time\": \"2018-05-21 15:00:05\" },\n\t{ \"post_id\": 22313, \"topic_id\": 5713, \"forum_id\": 10, \"post_subject\": \"Re: Grouping by criteria supplied at runtime.\", \"username\": \"Allan\", \"post_text\": \"Actually found the solution, as the DATASET of criteria (dataset 2) is small I can do a JOIN ALL where the 2nd dataset can be on the RIGHT, copied to each node and compared against every record in the data DATASET (dataset 1), skipping those records that don't match that particular filter presented to the TRANSFORM.\\nSimplified the example down to 2 criteria, but holds just as well for my production code.\\nRBucket := RECORD\\n INTEGER id;\\n INTEGER Account;\\n STRING1 Brand;\\nEND;\\n\\nBucket := DATASET([{1,0,'A'},{2,0,'C'},{3,10005,''},{4,10005,'B'}],RBucket);\\n\\nRData := RECORD\\n INTEGER id;\\n INTEGER Tid;\\n INTEGER Account;\\n STRING1 Brand;\\n REAL Somedata;\\nEND;\\n\\nTL := DATASET([{0,10,10005,'A',1.7},{0,20,10007,'A',1.6},{0,30,10005,'B',2.0},{0,40,10008,'C',4.0},{0,50,10005,'C',1.8},{0,60,10007,'C',0.4},{0,70,10007,'A',0.1}],RData);\\n\\nRData Doit(RData L,RBucket R) := TRANSFORM,SKIP( (R.Account != 0 and R.Account != L.Account)\\n OR (R.Brand != '' AND R.Brand != L.Brand))\\n SELF.id := R.id;\\n SELF := L;\\nEND;\\n\\ngrp := SORT(JOIN(TL,Bucket,TRUE,DoIt(LEFT,RIGHT),ALL),id,tid);\\ngrp;\\n\\nRTABLE := RECORD\\n INTEGER id := grp.id;\\n INTEGER Cnt := COUNT(GROUP);\\n REAL totalResponseTime := SUM(GROUP,grp.somedata);\\nEND;\\n\\nTABLE(grp,RTABLE,id);\\n
\\n\\nYours\\nAllan\", \"post_time\": \"2018-07-04 08:53:37\" },\n\t{ \"post_id\": 22263, \"topic_id\": 5713, \"forum_id\": 10, \"post_subject\": \"Grouping by criteria supplied at runtime.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI have to count the number of records in a DATASET that match up to 5 criteria, the criteria themselves are held in another DATASET. So a Count is generated for every row in the 2nd DATASET. All the fields in the 2nd DATASET are individually nullable the only stipulation being that at least 1 criteria be supplied. By constructing a bitmap of the criteria present in every row of the 2nd DATASET one could do:\\n\\nJOIN(DATASET_1,DATASET_2,\\n (RIGHT.bitmap = 1 AND LEFT.criteria_1 = RIGHT.criteria_1)\\n OR (RIGHT.bitmap = 2 AND LEFT.criteria_2 = RIGHT.criteria_2)\\n OR (RIGHT.bitmap = 3 AND LEFT.criteria_1 = RIGHT.criteria_1 AND LEFT.criteria_2 = RIGHT.criteria_2)\\n OR (RIGHT.bitmap = 4 AND LEFT.criteria_3 = RIGHT.criteria_3)\\n .\\n .\\n .\\n OR (RIGHT.bitmap = 32....\\n
\\nBut this seems VERY messy and there must be a neater way to do this.\\n\\nAny ideas anyone?\\nYours\\nAllan\", \"post_time\": \"2018-07-03 17:01:14\" },\n\t{ \"post_id\": 22593, \"topic_id\": 5813, \"forum_id\": 10, \"post_subject\": \"Re: ecl from a windows docker\", \"username\": \"rtaylor\", \"post_text\": \"mansfield_bitter,\\n\\nThis is a perfect bug report, which needs to be added to JIRA (https://track.hpccsystems.com) so the developers will absolutely see it (they're not guaranteed to see this post) and can look at the issue. If you add it, you are automatically sent updates tracking the bug fix process. If I add it, you will not be automatically "in the loop" on it, so I suggest you add the report. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-08-13 13:20:29\" },\n\t{ \"post_id\": 22583, \"topic_id\": 5813, \"forum_id\": 10, \"post_subject\": \"ecl from a windows docker\", \"username\": \"mansfield_bitter\", \"post_text\": \"May be a bit niche this! \\n\\nSo we are able to compile and send jobs using the command line in the following circumstances (Note we are using the base HPCC container available from Docker):\\n\\n
ecl.exe in windows
\\necl command from linux
\\necl command from a Docker container running on Linux
\\necl command from a Docker container running on Windows without importing our, admittedly rather large, repo
\\necl command from a Docker container running on Windows, importing a couple of ecl files as a test repo
\\n\\nThe natural extension to the final case is to run ecl with -I=[our repo location] under Windows which is actually what we want to do. Unfortunately there seems to be an RCP error while communicating with SOAP in this instance. What happens is the code is compiled and sent to the server for deployment but the Workunit is not run, instead hanging at compiled (but will run successfully if you force a resubmit). \\n\\nInvestigation of stack traces indicates that the server returns 200 following deployment but then gives:\\n\\npoll([{fd=3, events=POLLIN}], 1, 7200000) = 1 ([{fd=3, revents=POLLIN}])\\nrecvfrom(3, "HTTP/1.1 200 OK\\\\r\\\\nContent-Type: t"..., 1024, 0, NULL, NULL) = 728\\nclock_gettime(CLOCK_MONOTONIC, {66390, 470470300}) = 0\\nshutdown(3, SHUT_RDWR) = 0\\nclose(3) = 0\\nbrk(0x10cd000) = 0x10cd000\\nmunmap(0x7f3459e0a000, 675840) = 0\\nwrite(1, "\\\\n", 1) = 1\\nwrite(1, "Deploying ECL Archive ./big_test"..., 37) = 37\\nbrk(0x10ef000) = 0x10ef000\\nbrk(0x112d000) = 0x112d000\\nbrk(0x118d000) = 0x118d000\\nbrk(0x11ed000) = 0x11ed000\\nclock_gettime(CLOCK_MONOTONIC, {66390, 478728700}) = 0\\nclock_gettime(CLOCK_MONOTONIC, {66390, 478945500}) = 0\\nclock_gettime(CLOCK_MONOTONIC, {66390, 479077100}) = 0\\nsocket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 3\\nfcntl(3, F_GETFL) = 0x2 (flags O_RDWR)\\nfcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0\\nconnect(3, {sa_family=AF_INET, sin_port=htons(8010), sin_addr=inet_addr("10.53.57.31")}, 16) = -1 EINPROGRESS (Operation now in progress)\\npoll([{fd=3, events=POLLOUT}], 1, 2999) = 1 ([{fd=3, revents=POLLOUT}])\\ngetsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0\\nfcntl(3, F_GETFL) = 0x802 (flags O_RDWR|O_NONBLOCK)\\nfcntl(3, F_SETFL, O_RDWR) = 0\\ngetsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0\\nsetsockopt(3, SOL_TCP, TCP_NODELAY, [1], 4) = 0\\ngetsockname(3, {sa_family=AF_INET, sin_port=htons(55116), sin_addr=inet_addr("172.17.0.2")}, [16]) = 0\\nclock_gettime(CLOCK_MONOTONIC, {66390, 499252700}) = 0\\nbrk(0x122d000) = 0x122d000\\nclock_gettime(CLOCK_MONOTONIC, {66390, 499474500}) = 0\\nsendto(3, "POST /WsWorkunits?upload_ HTTP/1"..., 208, MSG_NOSIGNAL, NULL, 0) = 208\\nclock_gettime(CLOCK_MONOTONIC, {66390, 499701000}) = 0\\nclock_gettime(CLOCK_MONOTONIC, {66390, 499989200}) = 0\\nsendto(3, "<?xml version=\\\\"1.0\\\\" encoding=\\\\"ut"..., 131765, MSG_NOSIGNAL, NULL, 0) = 131765\\nclock_gettime(CLOCK_MONOTONIC, {66390, 502524000}) = 0\\nclock_gettime(CLOCK_MONOTONIC, {66390, 502868000}) = 0\\npoll([{fd=3, events=POLLIN}], 1, 7200000) = 1 ([{fd=3, revents=POLLIN}])\\nrecvfrom(3, "", 1024, 0, NULL, NULL) = 0\\nclock_gettime(CLOCK_MONOTONIC, {66401, 576057600}) = 0\\ngettimeofday({1533806883, 187820}, NULL) = 0\\ngettid() = 49\\nwrite(2, "HTTP Status \\\\n", 13) = 13\\ngettimeofday({1533806883, 190495}, NULL) = 0\\ngettid() = 49\\nwrite(2, "SOAP_RPC_ERROR = \\\\n", 18) = 18\\nfutex(0x7f34590af680, FUTEX_WAKE_PRIVATE, 2147483647) = 0\\nclose(3) = 0\\nbrk(0x10ee000) = 0x10ee000\\nwrite(2, "\\\\nSOAP rpc error\\\\n", 16) = 16\\nexit_group(2) = ?\\n+++ exited with 2 +++\\n
\\n\\nWe have tried to alter Docker's network mode to host but to no avail so are assuming something to do with the size of the repo is allowing the connection to drop? I'm wondering if anyone has encountered similar issues and/or knows of a solution, please?\", \"post_time\": \"2018-08-10 10:16:50\" },\n\t{ \"post_id\": 30813, \"topic_id\": 5823, \"forum_id\": 10, \"post_subject\": \"Re: ECL ML Kmeans - SKEW error\", \"username\": \"bforeman\", \"post_text\": \"If you could please create a JIRA report that documents your current issue with ML.Analysis.Classification.Accuracy with the ZAP Analysis report attached, I will certainly look at it, and this will give other developers a chance to look at it. Unfortunately I am wrapping up training today so I will have to revisit this after class.\\n\\nThank you!\\n\\nBob\", \"post_time\": \"2020-05-21 12:10:16\" },\n\t{ \"post_id\": 30803, \"topic_id\": 5823, \"forum_id\": 10, \"post_subject\": \"Re: ECL ML Kmeans - SKEW error\", \"username\": \"tpay\", \"post_text\": \"I do not think that there is a JIRA report. We have been exchanging emails. If you follow the link on my previous post you will see the new issue. (By the way, I am just using ML.Analysis.Classification.Accuracy at the moment. I do not have time to dive into that other issue. Ok.)\", \"post_time\": \"2020-05-20 20:45:29\" },\n\t{ \"post_id\": 30793, \"topic_id\": 5823, \"forum_id\": 10, \"post_subject\": \"Re: ECL ML Kmeans - SKEW error\", \"username\": \"bforeman\", \"post_text\": \"What is the number of the Jira report? I will look at it.\\n\\nBob\", \"post_time\": \"2020-05-20 20:42:04\" },\n\t{ \"post_id\": 30783, \"topic_id\": 5823, \"forum_id\": 10, \"post_subject\": \"Re: ECL ML Kmeans - SKEW error\", \"username\": \"tpay\", \"post_text\": \"Hi Bob, Is it possible for you to take a look at this as well? viewtopic.php?uid=4453&f=10&t=8093&start=0 I can email you the Zap reports as well as well forward my email exchanges with Roger. Thanks Tayfun\", \"post_time\": \"2020-05-20 20:24:38\" },\n\t{ \"post_id\": 30443, \"topic_id\": 5823, \"forum_id\": 10, \"post_subject\": \"Re: ECL ML Kmeans - SKEW error\", \"username\": \"bforeman\", \"post_text\": \"When I use distribute ML.Analysis.Classification.Accuracy works, but ML.Analysis.Classification.AccuracyByClass still produces the same error.\\n
\\n\\nThank you Tayfun! Would you please open a Jira issue on this, with sample code if possible and steps to reproduce contained in the report? \\nhttps://track.hpccsystems.com/secure/Dashboard.jspa\\n\\nThere is a category there for Machine Learning.\\n\\nThank You!\\n\\nBob\", \"post_time\": \"2020-05-07 13:18:04\" },\n\t{ \"post_id\": 30433, \"topic_id\": 5823, \"forum_id\": 10, \"post_subject\": \"Re: ECL ML Kmeans - SKEW error\", \"username\": \"tpay\", \"post_text\": \"Hi Bob, \\n\\nWhen I use distribute ML.Analysis.Classification.Accuracy works, but ML.Analysis.Classification.AccuracyByClass still produces the same error. \\n\\nThanks\\nTayfun Pay\", \"post_time\": \"2020-05-06 16:34:23\" },\n\t{ \"post_id\": 30423, \"topic_id\": 5823, \"forum_id\": 10, \"post_subject\": \"Re: ECL ML Kmeans - SKEW error\", \"username\": \"bforeman\", \"post_text\": \"Tpay,\\n\\nDid you try the DISTRIBUTE as Roger suggested? If you did and you are still seeing the error, a JIRA may be needed so we can investigate further.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2020-05-06 13:41:59\" },\n\t{ \"post_id\": 30413, \"topic_id\": 5823, \"forum_id\": 10, \"post_subject\": \"Re: ECL ML Kmeans - SKEW error\", \"username\": \"tpay\", \"post_text\": \"I am currently having the same exact issue when I try to run LogisticRegression. \\n\\nSystem error: 10084: Graph graph1[431], sort[433]: SORT failed. Graph graph1[431], sort[433]: Exceeded skew limit: 0.250000, estimated skew: 1.000000\\n\\nThe call to the Sort() function in ML_Core > Analysis.ecl is producing this error.\", \"post_time\": \"2020-05-05 22:30:32\" },\n\t{ \"post_id\": 22683, \"topic_id\": 5823, \"forum_id\": 10, \"post_subject\": \"Re: ECL ML Kmeans - SKEW error\", \"username\": \"tlhumphrey2\", \"post_text\": \"Did you randomly distribute your data across all the nodes of your cluster before executing kmeans? If not that is probably why the sort exceeded the skew limit.\", \"post_time\": \"2018-08-21 16:34:41\" },\n\t{ \"post_id\": 22673, \"topic_id\": 5823, \"forum_id\": 10, \"post_subject\": \"Re: ECL ML Kmeans - SKEW error\", \"username\": \"Roger Dev\", \"post_text\": \"I would try making sure the records are distributed before calling Kmeans.\\nFor example:\\ndresult2 := DISTRIBUTE(result, id);\\ndc2 := DISTRIBUTE(result, id);\\n\\nThen use dresult2 and dc2 as the input to Kmeans.\\n\\nIf that fails, then it would be helpful if you could look into the graph (using ECLWatch) and try to identify the line of code represented by the sort (i.e. graph2, subgraph 11, activity 19).\", \"post_time\": \"2018-08-21 16:10:46\" },\n\t{ \"post_id\": 22663, \"topic_id\": 5823, \"forum_id\": 10, \"post_subject\": \"Re: ECL ML Kmeans - SKEW error\", \"username\": \"maniblitz\", \"post_text\": \"The content of the masked area contains two datasets that do not have any transformation operated. There is no sort, transform or distribute function involved in that part. Nevertheless, after checking the function kmeans in the Github repository, I found out that the there is a SORT function involved in the kmeans algorithm. \\n\\nThe code masked has no problem as it was tested before and provided the anticipated results. I which to know if there is a way to modify the skew limit for the kmeans function without having to completely modify the ECL ML source code.\", \"post_time\": \"2018-08-20 14:33:43\" },\n\t{ \"post_id\": 22633, \"topic_id\": 5823, \"forum_id\": 10, \"post_subject\": \"Re: ECL ML Kmeans - SKEW error\", \"username\": \"rtaylor\", \"post_text\": \"maniblitz,SORT failed. Graph graph2[11], sort[19]: Exceeded skew limit: 0.250000, estimated skew: 1.000000
Since I don't see a SORT in your posted code, I have to assume it's in that code "### for privacy reasons, part of the code cannot be presented ###" section.\\n\\nSo, just addressing the error message, it's telling you the skew is 1.0 and the max skew should be 0.25 -- which tells me that all the data is being SORTed on a single node for some reason. Is it possible that every record has the same value in the SORT expression(s)?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-08-15 19:10:58\" },\n\t{ \"post_id\": 22613, \"topic_id\": 5823, \"forum_id\": 10, \"post_subject\": \"ECL ML Kmeans - SKEW error\", \"username\": \"maniblitz\", \"post_text\": \"System error: 10084: Graph graph2[11], sort[19]: SORT failed. Graph graph2[11], sort[19]: Exceeded skew limit: 0.250000, estimated skew: 1.000000
\\n\\nAfter trying to cluster 800k elements on a 4 nodes cluster using kmeans, I ended up with the error listed above.\\n\\n\\n\\nIMPORT STD;\\nIMPORT $;\\nIMPORT * FROM ML;\\nIMPORT * FROM ML.Cluster;\\nIMPORT * FROM ML.Types;\\n\\n### for privacy reasons, part of the code cannot be presented ###\\n\\n// result is a set of 800k 300 dimensions vectors\\n// c is a set of 3 300 dimension vectors set as centroids\\n\\nToField(result,dresult);\\nToField(c,dc);\\n \\ntarget := KMeans(dresult,dc,30,0.3,fDist:= DF.Cosine);\\n\\noutput(target);\\n\\n
\", \"post_time\": \"2018-08-14 17:45:13\" },\n\t{ \"post_id\": 23203, \"topic_id\": 5943, \"forum_id\": 10, \"post_subject\": \"Re: Accessing MODULE level attribs from a FUC..MACRO in said\", \"username\": \"Allan\", \"post_text\": \"Thanks Richard,\\n\\nYour example is exactly the same as the one I posted on Sat Oct 01 (EXPORT/SHARED difference) \\nI think we've bottomed out this subject, and I have raised the issue with the core team.\\nI'm just confused as to why an example in a builder window would behave differently to an example held in an ECL file.\\n\\nBut no big beef over it.\\nYours\\nAllan\", \"post_time\": \"2018-10-06 12:46:35\" },\n\t{ \"post_id\": 23173, \"topic_id\": 5943, \"forum_id\": 10, \"post_subject\": \"Re: Accessing MODULE level attribs from a FUC..MACRO in said\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nOK, here's my version that actually compiles and works. The TopLevel MODULE is in a folder named "Test":EXPORT Toplevel := MODULE\\n EXPORT Attrib1 := OUTPUT('pass');\\n \\n EXPORT f(param) := FUNCTIONMACRO\\n RETURN WHEN(OUTPUT(param,NAMED('Parameter')),Test.Toplevel.Attrib1);\\n ENDMACRO;\\nEND;
\\nThen run it like this from any builder window:IMPORT Test; //have to import to call the FUNCTIONMACRO\\nTest.Toplevel.f(DATASET([{'abc'},{'def'}],{string txt}));
\\nYou'll note that I had to change your SHARED to an EXPORT, because it has to compile at the location of the generated code. You'll also note that the additional IMPORT is not needed, because the IMPORT for the call must be present anyway, so you just need to fully qualify and EXPORT the definition you want to use from the same MODULE.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-10-05 15:13:40\" },\n\t{ \"post_id\": 23163, \"topic_id\": 5943, \"forum_id\": 10, \"post_subject\": \"Re: Accessing MODULE level attribs from a FUC..MACRO in said\", \"username\": \"Allan\", \"post_text\": \"Hum,\\nI made minor change, Attrib1 changed to EXPORT from SHARED.\\nIf I leave Attrib1 as SHARED I get error:\\n\\nError: Cannot access SHARED symbol 'Attrib1' in another module (6, 61), 2390, \\n
\\n \\nBut even with the change to export I can't get your example to compile, I get:\\nError: Import names unknown module "Toplevel" (5, 23), 2081, \\nError: While expanding macro f (10, 52), 2081, \\n
\\nRunning with and without '-legacy' command line qualifier.\\n\\n---------------------------------------------\\nFurther hum\\nI was running the above in a builder window. If I put the 'TopLevel' in its own ECL file, then referenced 'f' in that file the WU compiled and ran as expected.\\nExperiments.Toplevel.f(DATASET([{'abc'},{'def'}],{string txt}));\\n
\\n\\nI'm now confused as to what the differences are between builder windows and ECL files.\\n\\nYours\\nAllan\", \"post_time\": \"2018-10-05 14:40:55\" },\n\t{ \"post_id\": 23153, \"topic_id\": 5943, \"forum_id\": 10, \"post_subject\": \"Re: Accessing MODULE level attribs from a FUC..MACRO in said\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nI think you misunderstood me when I said: "Adding an IMPORT to the code the FUNCTIONMACRO generates" -- I meant that the IMPORT should go inside the FUNCTIONMACRO (making it part of the code it generates), like this:EXPORT Toplevel := MODULE\\n SHARED Attrib1 := OUTPUT('pass');\\n \\n EXPORT f(param) := FunctionMACRO\\n IMPORT Toplevel;\\n RETURN WHEN(OUTPUT(param,NAMED('Parameter')),Toplevel.Attrib1);\\n ENDMACRO;\\nEND;
Then you should be able to call it like this:Toplevel.f(DATASET([{'abc'},{'def'}],{string txt}));
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-10-04 17:31:15\" },\n\t{ \"post_id\": 23143, \"topic_id\": 5943, \"forum_id\": 10, \"post_subject\": \"Re: Accessing MODULE level attribs from a FUC..MACRO in said\", \"username\": \"Allan\", \"post_text\": \"Hi Richard,\\nI may be missing something, perhaps we're talking at cross purposes.\\nYou idea compiles and works, I reproduce my example for the benefit of anyone following this forum item.\\n\\nEXPORT Toplevel := MODULE\\n SHARED Attrib1 := OUTPUT('pass');\\n\\t \\n EXPORT f(param) := FunctionMACRO\\n\\tRETURN WHEN(OUTPUT(param,NAMED('Parameter')),Toplevel.Attrib1);\\n ENDMACRO;\\nEND;\\n
\\nThen to use:\\nIMPORT Toplevel;;\\nToplevel.f(DATASET([{'abc'},{'def'}],{string txt}));\\n
\\n\\nBut this is a very simple example.\\nI may have multiple attributes, all different instances of the same MODULE. (A MODULE that takes parameters thus its instance being in a state distinct from other instances of the same MODULE and contains an EXPORTED FUNCTIONMACRO)\\n\\nI then want to call that FUCNTIONMACRO for a particular instance, how can the FUNCTIONMACRO know what shared/exported attributes in the same MODULE it can use?\\nAs it stands the only way I can see to do that is my example at the start of this discussion.\\n\\nIn my original reply I imagined an IMPORT of the application having to be included in the code of the service it was using. That way madness lies.\\nBut I miss-understood your original comment.\\n\\nBy the way I've opened ticket https://track.hpccsystems.com/browse/HPCC-20655 about this issue with the core team.\\n\\nYours\\nAllan\", \"post_time\": \"2018-10-03 21:28:22\" },\n\t{ \"post_id\": 23123, \"topic_id\": 5943, \"forum_id\": 10, \"post_subject\": \"Re: Accessing MODULE level attribs from a FUC..MACRO in said\", \"username\": \"rtaylor\", \"post_text\": \"Allan,Your suggestion ties a application, the caller, to a service, the callee.
I don't see that. Adding an IMPORT to the code the FUNCTIONMACRO generates and fully qualifying the functions from that IMPORT simply ensures the FUNCTIONMACRO will generate working code from wherever it's called.\\n\\nBTW, did you try it? Did it work for you?\\n\\nRichard\", \"post_time\": \"2018-09-27 13:39:36\" },\n\t{ \"post_id\": 23103, \"topic_id\": 5943, \"forum_id\": 10, \"post_subject\": \"Re: Accessing MODULE level attribs from a FUC..MACRO in said\", \"username\": \"Allan\", \"post_text\": \"Thanks Richard,\\n\\nYes FUNCTIONMACRO's are definitely compiled in the context of the caller, they could not be understood in any other way. (a note for trainers)\\nThat being said other languages have the same problem and solve it various ways, as I mentioned C++, JavaScript etc has 'this' to refer to an instance of a class.\\nGiven ECL does not have such a construct yet (I'll be raising a ticket with the core team) I think my solution is fine as it stands.\\n\\nYour suggestion ties a application, the caller, to a service, the callee.\\n \\nYours\\nAllan\", \"post_time\": \"2018-09-27 07:11:34\" },\n\t{ \"post_id\": 23073, \"topic_id\": 5943, \"forum_id\": 10, \"post_subject\": \"Re: Accessing MODULE level attribs from a FUC..MACRO in said\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nInteresting. I recently ran across a similar issue with a FUNCTIONMACRO within a MODULE. My issue was a simple IMPORT that I needed to include within the FUNCTIONMACRO to ensure that the standard library functions I wanted to use were visible. This led me to think about scoping in the context of a FUNCTIONMACRO, leading me to conclude that the scoping/visibility of the code generated by the FUNCTIONMACRO is that of the calling code, not the location of the FUNCTIONMACRO code itself. A fine point to understand if you're going to be using them a lot. \\n\\nTherefore, I suggest that you try adding an IMPORT TopLevel as the first line within the FUNCTIONMACRO and then just fully qualify your VadidateSomething call and see how that works.\\n\\nLet me know -- HTH,\\n\\nRichard\", \"post_time\": \"2018-09-26 19:34:56\" },\n\t{ \"post_id\": 23063, \"topic_id\": 5943, \"forum_id\": 10, \"post_subject\": \"Accessing MODULE level attribs from a FUC..MACRO in said MOD\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nA FUCNTIONMACRO inside a MODULE may well want to access EXPORTED attributes in the same instance of the MODULE. The only way I can see to do this is to pass the instance of the MODULE as a parameter to the FUNCTIONMACRO.\\n\\nBut do I need to do this? Is there an equivalent to the ‘C++’ ‘this’ for referencing the MODULEs instances EXPORTED attributes.\\nWhat I have at the moment works:\\n
\\nTopLevel := MODULE\\n EXPORT VadidateSomething := ORDERED(action,action,...);\\n EXPORT fm(moduleHarness) := FUNCTIONMACRO\\n\\n RETURN WHEN(Dosomething,moduleHarness.VadidateSomething);\\n ENDMACRO;\\nEND;\\n
\\n\\nThen use with:\\n\\nm := TopLevel;\\nm.fm(m);\\n
\\n\\nBut it looks clunky.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2018-09-26 18:37:01\" },\n\t{ \"post_id\": 23733, \"topic_id\": 5963, \"forum_id\": 10, \"post_subject\": \"Re: Anyone written an generalized TRANSPOSE MACRO?\", \"username\": \"rtaylor\", \"post_text\": \"mansfield_bitter,\\n\\nThanks for getting me to re-visit this one, since I have now come up with a much better solution that generates the transposed dataset into separate fields instead of a single comma-delimited string. \\n\\nHere's the code:
Transpose(ds, recCnt) := FUNCTIONMACRO \\n #EXPORTXML(Struct,ds); //generate xml structure to parse\\n\\n #DECLARE(code1); //create "code1" symbol \\n #SET(code1,'FldSet := ['); //initialize \\n #DECLARE(code2); \\n #SET(code2,''); //set defs \\n #DECLARE(code3); \\n #SET(code3,'ChooseSet(UNSIGNED C) := CHOOSE(C'); //set chooser\\n #DECLARE(code4); \\n #SET(code4,'OutRec := RECORD\\\\n STRING Col0;\\\\n'); //output RECORD structure\\n #DECLARE(code5); \\n #SET(code5,'DATASET(FldCnt,\\\\n TRANSFORM(OutRec,\\\\n RecSet := ChooseSet(COUNTER);\\\\n SELF.Col0 := fldSet[COUNTER]'); //DATASET to return \\n\\t\\t\\n #DECLARE(ctr); \\n #SET(ctr,0); //initialize to 0\\n #FOR (Struct) \\n #FOR (field) //for each "Field" tag in the generated xml\\n #SET(ctr,%ctr% + 1); //increment the "ctr" symbol\\n #IF (%ctr% = 1) //and detect first iteration\\n #APPEND(code1,'\\\\'' + %'{@name}'% + '\\\\'' ); \\n #ELSE\\n #APPEND(code1,',\\\\'' + %'{@name}'% + '\\\\'' ); \\n #END\\n #APPEND(code2,%'{@name}'% +'_set := SET(' + #TEXT(ds) + ',(STRING)' + %'{@name}'% + ');\\\\n'); \\n\\t\\t\\t\\t#APPEND(code3,',' + %'{@name}'% +'_set'); \\n #END\\n #END\\n #APPEND(code1,'];\\\\nFldCnt := COUNT(FldSet);\\\\n'); \\n #APPEND(code3,');\\\\n'); \\n\\n #DECLARE(Ndx)\\n #SET(Ndx, 1); //initialize Ndx to 1\\n #LOOP\\n #IF(%Ndx% > recCnt) \\n #BREAK // break out of the loop\\n #ELSE //otherwise\\n #APPEND(code4,' STRING Col' + %'Ndx'% + ';\\\\n'); \\n #APPEND(code5,',\\\\n SELF.Col' + %'Ndx'% + ' := RecSet[' + %'Ndx'% + ']'); \\n #SET (Ndx, %Ndx% + 1)\\n #END\\n #END\\n #APPEND(code4,'END;\\\\n'); \\n #APPEND(code5,'\\\\n ));\\\\n'); \\n\\n // RETURN %'code1'% + %'code2'% + %'code3'% + %'code4'% + %'code5'% ; //just to look at generated code\\n %code1% //generates the actual code\\n %code2% \\n %code3% \\n %code4% \\n RETURN %code5%\\n ENDMACRO;\\n\\nds1 := DATASET([{'March',1,4,7},\\n {'April',2,5,8},\\n {'July',13,16,19},\\n {'May',3,6,9},\\n {'June',10,11,12}],\\n {STRING10 Month, UNSIGNED1 Jeff, UNSIGNED1 Fred, UNSIGNED1 Marty});\\nOUTPUT(ds1,NAMED('input')); //look at input data\\nOUTPUT(Transpose(ds1,COUNT(ds1)),NAMED('Transposed'));
\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-11-30 23:08:31\" },\n\t{ \"post_id\": 23723, \"topic_id\": 5963, \"forum_id\": 10, \"post_subject\": \"Re: Anyone written an generalized TRANSPOSE MACRO?\", \"username\": \"mansfield_bitter\", \"post_text\": \"Should be doable if there's a way to specify the RECORD definition properly. It's not too much trouble to hack the above into something that returns an inline dataset (see below) but the compiler is not happy about having the RECORD generated on the fly. Ideas? Would also mean I could solve an unstack function I've been troubling with. \\n\\n\\nRecFromSet(SET OF STRING s) := FUNCTION //rolls a set of strings to a single string\\n OutRec := {STRING out};\\n ds := DATASET(s,OutRec);\\n rolledDs := ROLLUP(ds, 1=1, TRANSFORM(OutRec, SELF.out := TRIM(LEFT.out) + '\\\\',\\\\'' + RIGHT.out))[1].out;\\n RETURN '\\\\'' + TRIM(rolledDs, LEFT, RIGHT) + '\\\\'';\\nEND;\\n\\nTranspose(ds) := FUNCTIONMACRO \\n #EXPORTXML(Struct,ds); //generate xml structure to parse\\n #DECLARE(recDef); \\n #SET(recDef,'{'); //initialize \\n #DECLARE(code1); //create "code1" symbol \\n #SET(code1,'FldSet := ['); //initialize \\n #DECLARE(code2); \\n #SET(code2,''); \\n #DECLARE(code3); \\n #SET(code3,'DATASET('); \\n #DECLARE(ctr); \\n #SET(ctr,0); //initialize to 0\\n #FOR (Struct) \\n #FOR (field) //for each "Field" tag in the generated xml\\n #SET(ctr,%ctr% + 1); //increment the "ctr" symbol\\n #IF (%ctr% = 1) //and detect first iteration\\n #APPEND(code1,'\\\\'' + %'{@name}'% + '\\\\'' ); \\n #APPEND(code3,'\\\\n [{#EXPAND(RecFromSet(' + %'{@name}'% +'_set))}'); \\n #APPEND(recDef, 'STRING x;')\\n #ELSE\\n #APPEND(code1,',\\\\'' + %'{@name}'% + '\\\\'' ); \\n #APPEND(code3,',\\\\n{#EXPAND(RecFromSet(' + %'{@name}'% +'_set))}'); \\n #APPEND(recDef, 'STRING #EXPAND(' + %'{@name}'% + '_set[1]);')\\n #END\\n #APPEND(code2,%'{@name}'% +'_set := SET(' + #TEXT(ds) + ',(STRING)' + %'{@name}'% + ');\\\\n'); \\n #END\\n #END\\n #APPEND(recDef,'}')\\n #APPEND(code1,'];\\\\nFldCnt := COUNT(FldSet);\\\\n'); \\n #APPEND(code3,'], '+ %'recDef'% +' );\\\\n'); \\n\\n RETURN %'code1'% + %'code2'% + %'code3'% ; //just to look at generated code\\n // %code1% //generates the actual code\\n // %code2% \\n // RETURN %code3%\\n ENDMACRO;\\n
\\n\\nThe above would also need the top row removing but you get the idea.\", \"post_time\": \"2018-11-30 15:28:03\" },\n\t{ \"post_id\": 23303, \"topic_id\": 5963, \"forum_id\": 10, \"post_subject\": \"Re: Anyone written an generalized TRANSPOSE MACRO?\", \"username\": \"rtaylor\", \"post_text\": \"\", \"post_time\": \"2018-10-11 16:41:41\" },\n\t{ \"post_id\": 23283, \"topic_id\": 5963, \"forum_id\": 10, \"post_subject\": \"Re: Anyone written an generalized TRANSPOSE MACRO?\", \"username\": \"Allan\", \"post_text\": \"Richard,\\n\\nActually your TRANSPOSE works a treat! Just what I needed.\\nPossible gold coin winging your way.\\n\\nYours\\nAllan\", \"post_time\": \"2018-10-11 10:29:19\" },\n\t{ \"post_id\": 23253, \"topic_id\": 5963, \"forum_id\": 10, \"post_subject\": \"Re: Anyone written an generalized TRANSPOSE MACRO?\", \"username\": \"Allan\", \"post_text\": \"Yes Richard,\\n\\nFind for me, but I was thinking of a more self contained complete solution. One that could be uploaded to the 'Tips & Tricks' section of the forum.\\n\\nI'm mulling over an implementation myself. I think any production version should have an option to name the output fields. As it stands post transpose, there is no mapping between old and new data sets field names.\\n\\nYours\\nAllan\", \"post_time\": \"2018-10-10 09:28:37\" },\n\t{ \"post_id\": 23243, \"topic_id\": 5963, \"forum_id\": 10, \"post_subject\": \"Re: Anyone written an generalized TRANSPOSE MACRO?\", \"username\": \"rtaylor\", \"post_text\": \"Allan,
Yes for me the result has to end up in a dataset.
You can take the existing result and write it to disk -- it's a CSV file at that point. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-10-09 16:57:37\" },\n\t{ \"post_id\": 23233, \"topic_id\": 5963, \"forum_id\": 10, \"post_subject\": \"Re: Anyone written an generalized TRANSPOSE MACRO?\", \"username\": \"Allan\", \"post_text\": \"Hi Richard,\\n\\nThough I did not expect an answer from your, I kind of knew it would tickle your fancy.\\nPressure of work means I won't be able to follow this up immediately, but 'I'll be back' to quote Arnold Schwarzenegger.\\n\\nPS. Yes for me the result has to end up in a dataset.\\nWhen we (you) get this solid it could be placed on the 'tips and tricks' section of the forum as its of general use.\\n\\nCheers\\nAllan\", \"post_time\": \"2018-10-09 09:05:59\" },\n\t{ \"post_id\": 23223, \"topic_id\": 5963, \"forum_id\": 10, \"post_subject\": \"Re: Anyone written an generalized TRANSPOSE MACRO?\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nInteresting problem. I don't know of anyone that's done this, but here's my first take on it. Note that I'm assuming that CSV-style output will be sufficient. Here's my FUNCTION and FUNCTIONMACRO that do all the work:
\\nRecFromSet(SET OF STRING s) := FUNCTION //rolls a set of strings to a single string\\n OutRec := {STRING out};\\n ds := DATASET(s,OutRec);\\n RETURN ROLLUP(ds,1=1,\\n TRANSFORM(OutRec,SELF.out := TRIM(LEFT.out) + ',' + RIGHT.out))[1].out;\\nEND;\\n\\nTranspose(ds) := FUNCTIONMACRO\\t\\t\\t\\t\\t\\t\\t\\n #EXPORTXML(Struct,ds); //generate xml structure to parse\\n #DECLARE(code1); //create "code1" symbol \\n #SET(code1,'FldSet := ['); //initialize \\n #DECLARE(code2); \\n #SET(code2,''); \\n #DECLARE(code3); \\n #SET(code3,'DATASET(FldCnt,TRANSFORM({STRING Fld},\\\\n SELF.Fld := CHOOSE(COUNTER,'); \\n #DECLARE(ctr); \\n #SET(ctr,0); //initialize to 0\\n #FOR (Struct) \\n #FOR (field) //for each "Field" tag in the generated xml\\n #SET(ctr,%ctr% + 1); //increment the "ctr" symbol\\n #IF (%ctr% = 1) //and detect first iteration\\n #APPEND(code1,'\\\\'' + %'{@name}'% + '\\\\'' ); \\n #APPEND(code3,'\\\\n FldSet[COUNTER] + \\\\',\\\\' + RecFromSet(' + \\n %'{@name}'% +'_set)'); \\n #ELSE\\n #APPEND(code1,',\\\\'' + %'{@name}'% + '\\\\'' ); \\n #APPEND(code3,',\\\\n FldSet[COUNTER] + \\\\',\\\\' + RecFromSet(' + \\n %'{@name}'% +'_set)'); \\n #END\\n #APPEND(code2,%'{@name}'% +'_set := SET(' + #TEXT(ds) + ',(STRING)' +\\n %'{@name}'% + ');\\\\n'); \\n #END\\n #END\\n #APPEND(code1,'];\\\\nFldCnt := COUNT(FldSet);\\\\n'); \\n #APPEND(code3,')));\\\\n'); \\n\\n // RETURN %'code1'% + %'code2'% + %'code3'% ; //just to look at generated code\\n %code1% //generates the actual code\\n %code2% \\n RETURN %code3%\\nENDMACRO;
\\nThen I call the FUNCTIONMACRO to generate the code and do the work:\\nds1 := DATASET([{'March',1,4,7},\\n {'April',2,5,8},\\n {'May',3,6,9}],\\n {STRING10 Month, UNSIGNED1 Jeff, UNSIGNED1 Fred, UNSIGNED1 Marty});\\nOUTPUT(ds1,NAMED('input')); //look at input data\\nOUTPUT(Transpose(ds1),NAMED('Transposed'));
\\nAnd this produces a transposed result that looks like this:month,March,April,May \\njeff,1,2,3\\nfred,4,5,6\\nmarty,7,8,9
\\nAnd now I'll have to go have "a think" and see if I can make it put everything into separate fields instead of CSV-style results. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-10-08 19:17:06\" },\n\t{ \"post_id\": 23213, \"topic_id\": 5963, \"forum_id\": 10, \"post_subject\": \"Anyone written an generalized TRANSPOSE MACRO?\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nAnyone written a genaralised TRANSPOSE MACRO (FUNCTIONMACRO)?\\nSomething that rotates a DATASET. See Excel TRANSPOSE for a description of what I'm after.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2018-10-06 12:50:33\" },\n\t{ \"post_id\": 23343, \"topic_id\": 5983, \"forum_id\": 10, \"post_subject\": \"Re: Spraying BLOB from ECL Watch\", \"username\": \"micevepay\", \"post_text\": \"I have created a JIRA ticket for this considering that it is telling me that there is an issue with "Spray Fixed" but I am spraying a BLOB.\\n\\nhttps://track.hpccsystems.com/browse/HPCC-20758\", \"post_time\": \"2018-10-15 17:01:30\" },\n\t{ \"post_id\": 23323, \"topic_id\": 5983, \"forum_id\": 10, \"post_subject\": \"Spraying BLOB from ECL Watch\", \"username\": \"micevepay\", \"post_text\": \"Using version 6.4.8-1, whenever I try to spray as blob I get error \\n\\n
FileSpray.SprayFixed\\nSource network IP not specified
\\n\\nDoesn't happen when trying fixed or delimited spraying. Command line dfuplus doesn't work either. It will get to 100% with a status of "started" but when you open the workunit it says that the file has been deleted and never complete. I also looked through the changelog for newer versions and don't see this being addressed.\", \"post_time\": \"2018-10-12 03:05:42\" },\n\t{ \"post_id\": 23663, \"topic_id\": 6103, \"forum_id\": 10, \"post_subject\": \"Re: DICTIONARY lookup on a range of values.\", \"username\": \"Allan\", \"post_text\": \"Thanks Richard,\\n\\nAs least I know now.\\n\\nYours\\nAllan\", \"post_time\": \"2018-11-21 10:07:59\" },\n\t{ \"post_id\": 23643, \"topic_id\": 6103, \"forum_id\": 10, \"post_subject\": \"Re: DICTIONARY lookup on a range of values.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nI don't think DICTIONARY is applicable to this. If you're looking for better performance from the implicit indexing a DICTIONARY gives you, then I'd suggest using your "filtering a DATASET" approach, but use an INDEX instead of a DATASET.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-11-19 18:54:57\" },\n\t{ \"post_id\": 23623, \"topic_id\": 6103, \"forum_id\": 10, \"post_subject\": \"DICTIONARY lookup on a range of values.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI would like a DICTIONARY to return a value given a probe using a range of key values.\\ne.g. a RGB triplet the three supplied components could be within +- some range of values to return the same colour (e.g. blue)\\nOr with dates there is the concept of 'circa 1950' so I would want the value for key 1950 to be returned for any input say 1948 to 1952.\\nNow I can do this filtering a DATASET: e.g.:\\n\\nBOOLEAN GotEvent(integer tgtDate,INTEGER range = 3) := EXISTS(DATASET(date BETWEEN tgtDate-range and tgtDate+range));\\n
\\nand I can do this with a DICTIONARY if every item in the range is specified, e.g.\\n\\nDICTIONARY(\\nDATASET([{1948,'Festival of Britain'}\\n\\t,{1949,'Festival of Britain'}\\n\\t,{1950,'Festival of Britain'}\\n\\t,{1951,'Festival of Britain'}\\n\\t,{1952,'Festival of Britain'}],{integer date,string event})\\n {date => event});\\n
\\n\\nBut is there a way to use DICTIONARY economically. (I don't mean using a MACRO to expand a dataset to the structure above as that's not clear code nor economical.)\\n\\nYours\\nAllan\", \"post_time\": \"2018-11-19 15:10:43\" },\n\t{ \"post_id\": 24403, \"topic_id\": 6123, \"forum_id\": 10, \"post_subject\": \"Re: Introduction\", \"username\": \"Allan\", \"post_text\": \"So am I, 'Hello'\\n\\nAllan\", \"post_time\": \"2019-01-31 20:29:00\" },\n\t{ \"post_id\": 23703, \"topic_id\": 6123, \"forum_id\": 10, \"post_subject\": \"Introduction\", \"username\": \"maria stella\", \"post_text\": \"I am an developer.\", \"post_time\": \"2018-11-29 05:26:57\" },\n\t{ \"post_id\": 23873, \"topic_id\": 6143, \"forum_id\": 10, \"post_subject\": \"Re: Finding 1st occurrence of a character from a set in a st\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nSure. JIRA \\n\\nRichard\", \"post_time\": \"2018-12-21 16:51:39\" },\n\t{ \"post_id\": 23863, \"topic_id\": 6143, \"forum_id\": 10, \"post_subject\": \"Re: Finding 1st occurrence of a character from a set in a st\", \"username\": \"Allan\", \"post_text\": \"Ah Richard,\\n\\nI missed these replies.\\n\\nThanks very much. But this functionality still seems fundamental enough for it to have its own STD lib function?\\n\\nYours\\nAllan\", \"post_time\": \"2018-12-21 16:25:28\" },\n\t{ \"post_id\": 23773, \"topic_id\": 6143, \"forum_id\": 10, \"post_subject\": \"Re: Finding 1st occurrence of a character from a set in a st\", \"username\": \"rtaylor\", \"post_text\": \"And, of course, if you want to add case INsensitive searches to the mix, here's my "final" form for this function:
\\n
//Find the first or last instance of any of a set of characters within a string,\\n// case sensitive or insensitive\\n\\nIMPORT Std;\\nUpperIt(STRING s) := Std.Str.ToUpperCase(s); \\nFindCharFromSetInString(SET OF STRING SetStr,STRING str,\\n BOOLEAN FirstOne=TRUE,BOOLEAN Caseless=FALSE) := FUNCTION\\n UpperSet := SET(DATASET(COUNT(SetStr),\\n TRANSFORM({STRING1 char},\\n SELF.char := UpperIt(SetStr[COUNTER]))),\\n char);\\n RetDS := DATASET(LENGTH(str),\\n TRANSFORM({UNSIGNED2 Pos,BOOLEAN Fnd},\\n SELF.Pos := COUNTER,\\n SELF.Fnd := IF(Caseless,\\n UpperIt(str[COUNTER]) IN UpperSet,\\n str[COUNTER] IN SetStr)));\\n RetVal := IF(FirstOne,MIN(RetDS(Fnd=TRUE),Pos),MAX(RetDS(Fnd=TRUE),Pos)); \\n RETURN RetVal;\\nEND;\\n\\nFindCharFromSetInString(['a','b','d'],'Allan Bill Charlie'); // <= returns 4 \\nFindCharFromSetInString(['a','b','d'],'Allan Bill Charlie',FALSE); // <= returns 14\\nFindCharFromSetInString(['c','b','d'],'Allan Bill Charlie',,TRUE); // <= returns 7 \\nFindCharFromSetInString(['C','B','D'],'Allan Bill Charlie'); // <= returns 7 \\nFindCharFromSetInString(['C','B','D'],'Allan Bill Charlie',FALSE); // <= returns 12 \\nFindCharFromSetInString(['W','X','Y'],'Allan Bill Charlie'); // <= returns 0\\nFindCharFromSetInString(['W','X','Y'],'Allan Bill Charlie',FALSE); // <= returns 0 \\n
\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-12-10 15:28:08\" },\n\t{ \"post_id\": 23763, \"topic_id\": 6143, \"forum_id\": 10, \"post_subject\": \"Re: Finding 1st occurrence of a character from a set in a st\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nHere's how I would do it:FindOccurrenceOf(SET OF STRING SetStr,STRING str,BOOLEAN FirstOne=TRUE) := FUNCTION\\n RetDS := DATASET(LENGTH(str),\\n TRANSFORM({UNSIGNED2 Pos,BOOLEAN Fnd},\\n SELF.Pos := COUNTER,\\n SELF.Fnd := str[COUNTER] IN SetStr));\\n RetVal := IF(FirstOne,MIN(RetDS(Fnd=TRUE),Pos),MAX(RetDS(Fnd=TRUE),Pos)); \\n RETURN RetVal;\\nEND;\\n\\nFindOccurrenceOf(['C','B','D'],'Allan Bill Charlie'); // <= returns 7 \\nFindOccurrenceOf(['C','B','D'],'Allan Bill Charlie',FALSE); // <= returns 12 \\nFindOccurrenceOf(['W','X','Y'],'Allan Bill Charlie'); // <= returns 0 \\nFindOccurrenceOf(['W','X','Y'],'Allan Bill Charlie',FALSE); // <= returns 0
\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-12-10 14:22:02\" },\n\t{ \"post_id\": 23753, \"topic_id\": 6143, \"forum_id\": 10, \"post_subject\": \"Finding 1st occurrence of a character from a set in a strin\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nMaybe I've missed some functionality in the STD. But usually, in any harness, there is some capability to find an occurrence in a string of from a set of characters. (either 1st or last)\\n\\ne.g. along the lines of:\\nFindFirstOccurrenceOf([CBD],'Allan Bill Charlie') <= returns 7 if counting from 1
\\n.\\nI can do this in ECL, but in a very round about way, e.g.\\n\\ns := '../../dir/../Allan.exe';\\nLENGTH(s) - LENGTH(REGEXREPLACE('^([\\\\\\\\./]){1,}',s,''));\\n
\\nevaluates to '6', being the length of the wild-carded component to the pathname.\\nIs there a better way to find an occurrence in a string from a SET of characters?\\nAnd the NOT of above.\\n\\nYours\\nAllan\", \"post_time\": \"2018-12-10 11:36:07\" },\n\t{ \"post_id\": 23833, \"topic_id\": 6163, \"forum_id\": 10, \"post_subject\": \"Re: Compressing uncompressed files\", \"username\": \"rtaylor\", \"post_text\": \"sajish, I will have to get the list of uncompressed files and need to deal with the list one by one, figuring out the record structure and reading the file and then output as a uncompressed file
OK, this is not a complete solution, but it should help you get a long way down the road:IMPORT Std;\\n//Get list of all non-superfiles:\\nAllFiles := STD.File.LogicalFileList();\\n// NOTHOR(AllFiles);\\n\\n//Then filter out all the already-compressed files:\\nUnCompressed := \\n AllFiles(STD.File.GetLogicalFileAttribute('~'+name,'blockCompressed')='');\\n// NOTHOR(UnCompressedFiles);\\n\\n//Then filter out all the sub-files of superfiles \\n// (which can't be deleted without first removing them from their superfiles)\\nNonSF := UnCompressed(NOT EXISTS(STD.File.LogicalFileSuperowners('~'+name)));\\n// NOTHOR(NonSF); \\n\\n//Then get just the filenames, Record Structures, and file types:\\nNameStruct := \\n TABLE(NonSF,{name,\\n STRING RecStruct := STD.File.GetLogicalFileAttribute('~'+name,'ECL'),\\n STRING FileType := STD.File.GetLogicalFileAttribute('~'+name,'kind')\\n\\t });\\nNOTHOR(NameStruct);
That result gets you to the point of actually writing the code to do an OUTPUT,COMPRESSED on all these files and then delete the uncompressed files. Of course, once you've done that you'll also need to find all the ECL DATASET declarations for the deleted files and update them with the new filenames and the __COMPRESSED__ option.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-12-19 19:47:59\" },\n\t{ \"post_id\": 23823, \"topic_id\": 6163, \"forum_id\": 10, \"post_subject\": \"Re: Compressing uncompressed files\", \"username\": \"sajish\", \"post_text\": \"[quote="rtaylor":2awnfiqq]sajish,\\n\\nThe short answer to that is: yes and no. \\n\\nOne of the foundational principles of the HPCC platform is "never throw anything away (because you might need it later)" so you will find that you cannot overwrite on output a file that you used in that workunit as input. So no, you cannot compress a previously uncompressed logical file, but you CAN read that uncompressed data and write it to a new compressed logical file. Like this:
OUTPUT(uncompressedDataset,,'NewCompressedfilename',COMPRESSED);
Once you've done that, THEN you can delete the original uncompressed file if you need/want to.\\n\\nHTH,\\n\\nRichard\\n\\nThanks Richard, I already have zeroed in on this solution but perhaps wanted to find out if this process was readily available in any function, especially because I need to compress all the uncompressed files in the cluster, I will have to get the list of uncompressed files and need to deal with the list one by one, figuring out the record structure and reading the file and then output as a uncompressed file. \", \"post_time\": \"2018-12-18 14:53:34\" },\n\t{ \"post_id\": 23813, \"topic_id\": 6163, \"forum_id\": 10, \"post_subject\": \"Re: Compressing uncompressed files\", \"username\": \"rtaylor\", \"post_text\": \"sajish,\\n\\nThe short answer to that is: yes and no.
\\n\\nOne of the foundational principles of the HPCC platform is "never throw anything away (because you might need it later)" so you will find that you cannot overwrite on output a file that you used in that workunit as input. So no, you cannot compress a previously uncompressed logical file, but you CAN read that uncompressed data and write it to a new compressed logical file. Like this:
OUTPUT(uncompressedDataset,,'NewCompressedfilename',COMPRESSED);
Once you've done that, THEN you can delete the original uncompressed file if you need/want to.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-12-18 14:07:03\" },\n\t{ \"post_id\": 23803, \"topic_id\": 6163, \"forum_id\": 10, \"post_subject\": \"Compressing uncompressed files\", \"username\": \"sajish\", \"post_text\": \"Hi,\\nIs there a way to compress previously uncompressed logical files?\", \"post_time\": \"2018-12-17 20:23:43\" },\n\t{ \"post_id\": 24583, \"topic_id\": 6263, \"forum_id\": 10, \"post_subject\": \"Re: Getting a weighted sample form a DATASET\", \"username\": \"Allan\", \"post_text\": \"Here is a working example, with unit tests, as a generalised FUNCTIONMACRO\\n\\n/*\\n BucketSample returns a sample of 'TotalSampleSize' number of records from DATASET 'ds'.\\n Where the fraction of the total sample in any one bucket is dictated by a\\n SET OF UNSIGNED 'BucketPercents'. The SUM of all the elements of 'BucketPercents'\\n must equal 100.\\n The call-back FUNCTION 'BucketDescriminator' dictates which 'bucket' any one record\\n belongs in.\\n 'BucketDescriminator' FUNCTION must take two parameters\\n 1. RECORDOF(ds)\\n 2. 'BucketDescriminatorArgument'. This is a pass through argument that allows meta data\\n on the DATASET, or anything else for that matter, to be easily available to the callback.\\n 'BucketDescriminator' returns an UNSIGNED in the range 0 <= f(ds) <= COUNT(BucketPercents),\\n that indicates, as an index, which of the buckets the record belongs to.\\n If the Discriminator returns 0 (Zero) this indicates that the record has been explicitly excluded\\n from any sample.\\n\\n Note the input dataset 'ds' may well have to be read in its own graph, i.e. use INDEPENDENT.\\n*/\\nBucketSample(ds,TotalSampleSize,BucketPercents,BucketDescriminator,BucketDescriminatorArgument) := FUNCTIONMACRO\\n #UNIQUENAME(Id);\\n LOCAL Buckets := DATASET(BucketPercents,{UNSIGNED bucket});\\n LOCAL Chk100Percent := ASSERT(SUM(Buckets,bucket) = 100,'Total allocation to buckets must equal 100%.',FAIL);\\n LOCAL Checks := PARALLEL(Chk100Percent);\\n LOCAL\\n SampleSizes := PROJECT(Buckets\\n ,TRANSFORM({UNSIGNED %id%;UNSIGNED size}\\n ;SELF.%id% := COUNTER\\n ;SELF.size := LEFT.bucket / 100 * TotalSampleSize));\\n \\n // Use 'SKIP' in preference to filtering to reduce spilling to disk.\\n {UNSIGNED %Id%,RECORDOF(ds)} BucketUp(RECORDOF(ds) L) := TRANSFORM,SKIP(BucketDescriminator(L,BucketDescriminatorArgument) = 0)\\n SELF.%Id% := BucketDescriminator(L,BucketDescriminatorArgument);\\n SELF := L;\\n END;\\n LOCAL AllocatedRecsToBucket := PROJECT(ds,BucketUp(LEFT));\\n\\n {DATASET(RECORDOF(ds)) d} GatherSample(RECORDOF(SampleSizes) L) := TRANSFORM\\n SELF.d := PROJECT(ENTH(AllocatedRecsToBucket(%Id% = L.%Id%),L.size)\\n ,TRANSFORM(RECORDOF(ds);SELF := LEFT));\\n END;\\n LOCAL GatheredByBucket := PROJECT(SampleSizes,GatherSample(LEFT));\\n LOCAL Gathered := NORMALIZE(GatheredByBucket,LEFT.d,TRANSFORM(RIGHT));\\n RETURN WHEN(Gathered,Checks);\\nENDMACRO;\\n\\n/////////////////////////////////////////////////////\\n// Unit Test\\n/////////////////////////////////////////////////////\\n\\nINData := NORMALIZE(DATASET([{1}],{INTEGER1 x}),1000000,TRANSFORM({UNSIGNED someDataRow},SELF.someDataRow := COUNTER)) : INDEPENDENT;\\n\\n////////////////////////////////////////////////////////////////\\n// Example call-back \\n////////////////////////////////////////////////////////////////\\n\\nExcludeSet := {UNSIGNED ExcludeLowBound,UNSIGNED ExcludeHighBound};\\n\\nUNSIGNED db(RECORDOF(INData) rec,DATASET(ExcludeSet) Exclude) := FUNCTION\\n DontUse := EXISTS(PROJECT(Exclude,TRANSFORM({BOOLEAN bad};SELF.Bad := rec.someDataRow BETWEEN LEFT.ExcludeLowBound AND LEFT.ExcludeHighBound))(bad));\\n RETURN IF(DontUse,0,(rec.someDataRow % 5)+1);\\nEND;\\n\\nSmpl := BucketSample(INData,100000,[2,70,8,12,8],db,DATASET([{100,1000},{80000,90000}],ExcludeSet));\\n\\nExpectedResults := DATASET([{'Set ending 0 or 5',COUNT(Smpl(someDataRow % 5 = 0)) = 2000}\\n ,{'Set ending 1 or 6',COUNT(Smpl(someDataRow % 5 = 1)) = 70000}\\n ,{'Set ending 2 or 7',COUNT(Smpl(someDataRow % 5 = 2)) = 8000}\\n ,{'Set ending 3 or 8',COUNT(Smpl(someDataRow % 5 = 3)) = 12000}\\n ,{'Set ending 4 or 9',COUNT(Smpl(someDataRow % 5 = 4)) = 8000}\\n ,{'Set ending 4 or 9',COUNT(Smpl(someDataRow % 5 = 4)) = 8000}\\n ,{'Contains invalid records 100 to 1000',NOT EXISTS(Smpl(someDataRow BETWEEN 100 AND 1000))}\\n ,{'Contains invalid records 80000 to 90000',NOT EXISTS(Smpl(someDataRow BETWEEN 80000 AND 90000))}\\n ],{STRING Id;BOOLEAN Pass});\\n// Empty dataset is a PASS\\nOUTPUT(ExpectedResults(NOT Pass),NAMED('FAILED'));\\n
\\nNote an empty dataset result from the unit tests is a PASS.\\nYours\\nAllan\", \"post_time\": \"2019-02-26 10:37:43\" },\n\t{ \"post_id\": 24273, \"topic_id\": 6263, \"forum_id\": 10, \"post_subject\": \"Re: Getting a weighted sample form a DATASET\", \"username\": \"rtaylor\", \"post_text\": \"Allan,I expect you meant SAMPLE(d,Interval) instead of SAMPLE(d,n).
You are absolutely correct! And notice that, for the small IDs the COUNT(d) and n values are the same, making the Interval=1, and SAMPLE(ds,1) returns all the records in the ds. \\n\\nTurning this all into a FUNCTIONMACRO is the next logical step, and you only need to pass in the dataset you want to sample from, the unique ID field, and the number of records you want returned, because the id/count dataset can be easily created for the rest of the code to work on with a simple TABLE, like this:\\n
TABLE(ds,{idfield, ctr := COUNT(GROUP)},idfield);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-01-23 15:24:33\" },\n\t{ \"post_id\": 24263, \"topic_id\": 6263, \"forum_id\": 10, \"post_subject\": \"Re: Getting a weighted sample form a DATASET\", \"username\": \"Allan\", \"post_text\": \"All,\\nThe example code above to actually generate the sample can take a VERY long time as you're filtering the entire input for every ID. IT can also blow the max size of spill files. (10Mb default)\\n\\nI've used good old PROCESS again. With the RIGHT transform keeping a running tally of the records processed for each ID. Then the LEFT transform just SKIPs if, for that LEFT's record ID, the running tally >= maxSampleSize for that ID.\\nWe're selecting sequential records from the input so there is no 'Interval', but it does complete in seconds, and there is no spill file to cause problems.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2019-01-23 14:55:11\" },\n\t{ \"post_id\": 24233, \"topic_id\": 6263, \"forum_id\": 10, \"post_subject\": \"Re: Getting a weighted sample form a DATASET\", \"username\": \"Allan\", \"post_text\": \"Brilliant Richard!\\n\\nAfter a bit of experimentation I decided 1/2 average was the 'best' cutoff between large and small sample size.\\nBy the way, small typo, in your example use of the distribution you calculate 'Interval', but don't use it, I expect you meant SAMPLE(d,Interval) instead of SAMPLE(d,n).\\n\\nI was wondering if a more general solution would be useful to the community.\\ne.g. a FUNCTIONMACRO whose inputs are:\\n 1. the input dataset\\n 2. a call back function which given a record returned the quartile/decile to place the record\\n 3. Total number of records to return in the sample\\n 4. SET OF 4 or 10 or 'n' percentages giving the max percentage of records to return in any one quartile/decile/<other number of buckets> Obviously if the input does not contain enough records fulfilling the criteria for a bucket then 100% of that bucket is returned. \\n\\nThe FUNCTIONMACRO then returns a DATASET (same record type as input) where the count of sample of records for each bucket is the percent of the total requested (param 3)\\nThere would have to be an ASSERT FAIL if the SUM (parameter 4) != 100\\n\\nParameter 2 would be tricky as the decision on which bucket to place the record might require meta data, as is the case in my COUNT of records. The callback would need to know the COUNT in the input dataset for that particular record type (in case above ID).\\nBut then again any such meta-data could be calculated by the user of the FUNCTIONMACRO and made available to the call-back function. It may be handy to have a parameter to the callback which is just a pass-through of meta-data passed into the FUNCTIONMACRO. That way the callback function will always have meta-data easily available to it. \\n\\nAnyway,\\n\\nThanks Very much Richard for helping me progress my project.\\nYours\\nAllan\", \"post_time\": \"2019-01-23 11:33:05\" },\n\t{ \"post_id\": 24213, \"topic_id\": 6263, \"forum_id\": 10, \"post_subject\": \"Re: Getting a weighted sample form a DATASET\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nThanks, this was fun! \\n
SampleSize := 6000000; //six million\\nds := DATASET([{111,61617819},{222,57579975},{333,42477398},{8888,39820652},\\n {1234,26562216},{4321,26060023},{1122,17037490},{27409,14852329},\\n {18309,12606673},{27651,10219386},{32228,8268928},{27724,6577956},\\n {1378,5142707},{10573,4551204},{18560,3845789},{2440,2894748},\\n {15780,1899968},{19480,1101994},{20773,1049426},{33473,983285},\\n {23118,880476},{11214,679229},{19836,549526},{19060,488667},\\n {11356,267270},{19811,130791},{5004,51367},{13733,42917},{10884,2345}],\\n {UNSIGNED id,UNSIGNED ctr});\\nRecCnt := COUNT(ds);\\n\\n//What's the definition of "small" number (keep all recs)?\\n// SmallNum := ROUND(SampleSize/RecCnt); //average?\\t\\t\\t\\nSmallNum := ROUND((SampleSize/RecCnt)/2); //half average? \\t\\t\\t\\t\\t\\n\\nSmallSet := ds(ctr <= SmallNum);\\nLargeSet := ds(ctr > SmallNum);\\n\\nSmallSetSum := SUM(SmallSet,ctr);\\nLargeSetSum := SUM(LargeSet,ctr);\\nLargeTarget := SampleSize - SmallSetSum; //number to get from larger IDs\\n\\nOutRec := RECORD\\n ds;\\n UNSIGNED SampleCnt := ds.ctr;\\nEND;\\nSmallRes := TABLE(SmallSet,OutRec);\\nLargeRes := PROJECT(LargeSet,\\n TRANSFORM(OutRec,\\n Pct := LEFT.ctr/LargeSetSum; //percentage of total Large\\n SELF.SampleCnt := ROUND(LargeTarget * Pct),\\n SELF := LEFT));\\n\\nCntSum := SUM(SmallRes+LargeRes,SampleCnt);\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\nDiff := CntSum - SampleSize; //How close are we to the number we want?\\n\\n//minor final adjustments to produce exact SampleSize number \\nLargeFinal(DATASET(OutRec) d,INTEGER n) := \\n PROJECT(d,\\n TRANSFORM(OutRec,\\n NewCnt := IF(n < 0,LEFT.SampleCnt+1,LEFT.SampleCnt-1);\\n SELF.SampleCnt := IF(ABS(n) >= COUNTER,NewCnt,LEFT.SampleCnt);\\n SELF := LEFT));\\nLargeAdjust := IF(Diff = 0,\\n LargeRes,\\n LargeFinal(SORT(LargeRes,-SampleCnt),Diff)); //modify largest first\\n\\nFinalRes := SmallRes & LargeAdjust;\\nFinalRes; //Exact numbers of records to get for each ID value\\nSUM(FinalRes,SampleCnt); //This should be the same as SampleSize defined above
Then you only need to write the code to get the precise number of sample recs defined for each ID value, something like this://get actual samples something like this:\\nds := DATASET( ... ); //the real dataset to get samples from\\n\\n//d = filtered dataset of one id, n = number of sample recs to get\\nGetSample(DATASET(rec) d,UNSIGNED n) := FUNCTION\\n Interval := TRUNCATE(COUNT(d)/n);\\n Samples := SAMPLE(d,n);\\n RETURN CHOOSEN(Samples,n);\\nEND;\\nPROJECT(FinalRecs,\\n TRANSFORM({FinalRecs,DATASET(RECORDOF(ds)) ChildData},\\n SELF.ChildData := GetSample(ds(ID = LEFT.ID),LEFT.SampleCnt),\\n SELF := LEFT));
\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-01-22 21:33:24\" },\n\t{ \"post_id\": 24203, \"topic_id\": 6263, \"forum_id\": 10, \"post_subject\": \"Getting a weighted sample form a DATASET\", \"username\": \"Allan\", \"post_text\": \"Hi,\\nMy math is not up to this.\\nI have to generate a sample of exactly 6 million records from the set summary below.\\nThe sample has to contain examples of records from every 'id'. But the count of records held be each 'id' varies enormously, so from id 10884 (bottom) all its 2345 records could be included in the sample, but where the counts of records are in the millions they obviously have to be cut down by some amount.\\nIts not a straight percentage from each bucket, as I said the smaller buckets can be accommodated completely in the sample.\\nerr - help\\nid,Count
\\n111,61617819
\\n222,57579975
\\n333,42477398
\\n8888,39820652
\\n1234,26562216
\\n4321,26060023
\\n1122,17037490
\\n27409,14852329
\\n18309,12606673
\\n27651,10219386
\\n32228,8268928
\\n27724,6577956
\\n1378,5142707
\\n10573,4551204
\\n18560,3845789
\\n2440,2894748
\\n15780,1899968
\\n19480,1101994
\\n20773,1049426
\\n33473,983285
\\n23118,880476
\\n11214,679229
\\n19836,549526
\\n19060,488667
\\n11356,267270
\\n19811,130791
\\n5004,51367
\\n13733,42917
\\n10884,2345
\", \"post_time\": \"2019-01-22 16:52:56\" },\n\t{ \"post_id\": 24703, \"topic_id\": 6383, \"forum_id\": 10, \"post_subject\": \"Re: Simple Python Question\", \"username\": \"rtaylor\", \"post_text\": \"Lmanaxi,\\n\\nThese forums are specific only to ECL and HPCC Systems. \\n\\nGeneral questions about how to do Python (in a Python environment) should be addressed to a Python support forum such as: https://www.python.org/community/forums/\\n\\nOnce you are to the point where you want to embed some Python code into ECL code, then please feel free to post those questions here.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-03-04 14:21:33\" },\n\t{ \"post_id\": 25183, \"topic_id\": 6523, \"forum_id\": 10, \"post_subject\": \"Re: Slicing a set of dataset does not work\", \"username\": \"omnibuzz\", \"post_text\": \"I believe template route will not work as it will require me to know the number of records at compile time.\\n\\nI think the core team might have intentionally kept the SET functionality out of child datasets, as managing a large set of big datasets may create its own issues.\\n\\nI will anyway submit a JIRA ticket. \\n\\nThanks for checking it out.\\nRegards\\nSrini\", \"post_time\": \"2019-03-12 15:27:52\" },\n\t{ \"post_id\": 25133, \"topic_id\": 6523, \"forum_id\": 10, \"post_subject\": \"Re: Slicing a set of dataset does not work\", \"username\": \"rtaylor\", \"post_text\": \"Srini,\\n\\nTry throwing in a bit of Template Language to generate the set definition, like this:Rec := RECORD\\n STRING1 Letter;\\n UNSIGNED1 DS;\\nEND;\\n\\nds1 := DATASET([{'A',1},{'B',1},{'C',1},{'D',1},{'E',1}],Rec);\\nds2 := DATASET([{'A',2},{'B',2},{'H',2},{'I',2},{'J',2}],Rec);\\nds3 := DATASET([{'B',3},{'C',3},{'M',3},{'N',3},{'O',3}],Rec);\\nds4 := DATASET([{'A',4},{'B',4},{'R',4},{'S',4},{'T',4}],Rec);\\nds5 := DATASET([{'B',5},{'V',5},{'W',5},{'X',5},{'Y',5}],Rec);\\n\\nParentRec := RECORD\\n UNSIGNED RecID;\\n DATASET(Rec) Child;\\nEND;\\ndsParent := DATASET([{1,ds1},{2,ds2},{3,ds3},{4,ds4},{5,ds5}],ParentRec);\\n\\n #DECLARE (SetString)\\n #DECLARE (Ndx)\\n #SET (SetString, 'SetDS := ['); //initialize SetString \\n #SET (Ndx, 1); //initialize Ndx to 1\\n #LOOP\\n #IF (%Ndx% = COUNT(dsParent)) //if on last rec\\n #BREAK // break out of the loop\\n #ELSE //otherwise\\n #APPEND (SetString, 'dsParent[' + %'Ndx'% + '].Child,');\\n #SET (Ndx, %Ndx% + 1)\\n #END\\n #END\\n //add last element and closing ];\\n #APPEND (SetString, 'dsParent[' + %'Ndx'% + '].Child];'); \\n \\n %SetString%; //generate the SetDS definition\\n // This generates:\\n // SetDS := [dsParent[1].Child,dsParent[2].Child,dsParent[3].Child,\\n dsParent[4].Child,dsParent[5].Child];\\n\\nj1 := MERGEJOIN(SetDS,\\n STEPPED(LEFT.Letter=RIGHT.Letter),\\n SORTED(Letter));\\nj2 := MERGEJOIN(SetDS,\\n STEPPED(LEFT.Letter=RIGHT.Letter),\\n SORTED(Letter),LEFT OUTER);\\nj3 := MERGEJOIN(SetDS,\\n STEPPED(LEFT.Letter=RIGHT.Letter),\\n SORTED(Letter),LEFT ONLY);\\nj4 := MERGEJOIN(SetDS,\\n STEPPED(LEFT.Letter=RIGHT.Letter),\\n SORTED(Letter),MOFN(3));\\nj5 := MERGEJOIN(SetDS,\\n STEPPED(LEFT.Letter=RIGHT.Letter),\\n SORTED(Letter),MOFN(3,4));\\nOUTPUT(j1);\\nOUTPUT(j2);\\nOUTPUT(j3);\\nOUTPUT(j4);\\nOUTPUT(j5);
\\nAnd you could also submit a JIRA asking for the SET function to expand its functionality to include creating the set of datasets the easy way:\\nSetDS := SET(DsParent,Child);
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-03-12 15:04:31\" },\n\t{ \"post_id\": 25053, \"topic_id\": 6523, \"forum_id\": 10, \"post_subject\": \"Re: Slicing a set of dataset does not work\", \"username\": \"omnibuzz\", \"post_text\": \"I need to pass it to the MERGEJOIN construct as a set of datasets to find matches between the children.\", \"post_time\": \"2019-03-12 00:57:22\" },\n\t{ \"post_id\": 25043, \"topic_id\": 6523, \"forum_id\": 10, \"post_subject\": \"Re: Slicing a set of dataset does not work\", \"username\": \"rtaylor\", \"post_text\": \"Srini,\\n\\nWhy do you need a set of child datasets when you can simply use the child dataset as if it were a separate dataset, like this:Rec := RECORD\\n STRING1 Letter;\\n UNSIGNED1 DS;\\nEND;\\n\\nds1 := DATASET([{'A',1},{'B',1},{'C',1},{'D',1},{'E',1}],Rec);\\nds2 := DATASET([{'A',2},{'B',2},{'H',2},{'I',2},{'J',2}],Rec);\\nds3 := DATASET([{'B',3},{'C',3},{'M',3},{'N',3},{'O',3}],Rec);\\nds4 := DATASET([{'A',4},{'B',4},{'R',4},{'S',4},{'T',4}],Rec);\\nds5 := DATASET([{'B',5},{'V',5},{'W',5},{'X',5},{'Y',5}],Rec);\\n\\nParentRec := RECORD\\n DATASET(Rec) Child;\\nEND;\\ndsParent := DATASET([{ds1},{ds2},{ds3},{ds4},{ds5}],ParentRec);\\nOUTPUT(dsParent.Child); //produces all child recs
\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-03-11 23:51:04\" },\n\t{ \"post_id\": 25033, \"topic_id\": 6523, \"forum_id\": 10, \"post_subject\": \"Re: Slicing a set of dataset does not work\", \"username\": \"omnibuzz\", \"post_text\": \"This definitely helped, Richard. Thanks a lot. I got what I wanted. I am, however, stuck with another issue in getting a set of datasets from a denormalized dataset. Do you have any trick to achieve this. \\n\\nHere is a setup for what I am trying to achieve. \\n\\nRec := RECORD\\n STRING1 Letter;\\n UNSIGNED1 DS;\\nEND;\\n\\nds1 := DATASET([{'A',1},{'B',1},{'C',1},{'D',1},{'E',1}],Rec);\\nds2 := DATASET([{'A',2},{'B',2},{'H',2},{'I',2},{'J',2}],Rec);\\nds3 := DATASET([{'B',3},{'C',3},{'M',3},{'N',3},{'O',3}],Rec);\\nds4 := DATASET([{'A',4},{'B',4},{'R',4},{'S',4},{'T',4}],Rec);\\nds5 := DATASET([{'B',5},{'V',5},{'W',5},{'X',5},{'Y',5}],Rec);\\n\\nParentRec := RECORD\\n\\tDATASET(Rec) Child;\\nEND;\\ndsParent := DATASET([{ds1},{ds2},{ds3},{ds4},{ds5}],ParentRec);\\n\\n//Assuming dsParent is what I have to start with, I want to rewrite this line to get the set of datasets from dsparent\\nSET(dsParent,Child);\\n
\\n\\nThank you again for the help.\\nCheers\\nSrini\", \"post_time\": \"2019-03-11 21:03:06\" },\n\t{ \"post_id\": 25023, \"topic_id\": 6523, \"forum_id\": 10, \"post_subject\": \"Re: Slicing a set of dataset does not work\", \"username\": \"rtaylor\", \"post_text\": \"Srini,\\n\\nFor sets of DATASETs you need to use the RANGE function instead of the set-range-of-values syntax, like this:\\nRec := RECORD\\n STRING1 Letter;\\n UNSIGNED1 DS;\\nEND;\\nds1 := DATASET([{'A',1},{'B',1},{'C',1},{'D',1},{'E',1}],Rec);\\nds2 := DATASET([{'A',2},{'B',2},{'H',2},{'I',2},{'J',2}],Rec);\\nds3 := DATASET([{'B',3},{'C',3},{'M',3},{'N',3},{'O',3}],Rec);\\nds4 := DATASET([{'A',4},{'B',4},{'R',4},{'S',4},{'T',4}],Rec);\\nds5 := DATASET([{'B',5},{'V',5},{'W',5},{'X',5},{'Y',5}],Rec);\\nSetDS := [ds1,ds2,ds3,ds4,ds5];\\nOutDS1 := RANGE(Setds,[1,3,5]);\\nOUTPUT(outDS1[1]); \\nOUTPUT(outDS1[2]); \\nOUTPUT(outDS1[3]); \\nOUTPUT(outDS1[1] & outDS1[2] & outDS1[3] & outDS1[4] & outDS1[5]);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-03-11 20:29:09\" },\n\t{ \"post_id\": 25013, \"topic_id\": 6523, \"forum_id\": 10, \"post_subject\": \"Slicing a set of dataset does not work\", \"username\": \"omnibuzz\", \"post_text\": \"I was playing with one of the documented examples and I came across an issue which i am trying to understand.\\nThe following code works and gives the result as [1,2,3]\\n\\nSetInt := [1,2,3,4,5];\\nSetInt[1..3];\\n
\\n\\nBut, the following does not work:\\n\\nRec := RECORD\\n STRING1 Letter;\\n UNSIGNED1 DS;\\nEND;\\n\\nds1 := DATASET([{'A',1},{'B',1},{'C',1},{'D',1},{'E',1}],Rec);\\nds2 := DATASET([{'A',2},{'B',2},{'H',2},{'I',2},{'J',2}],Rec);\\nds3 := DATASET([{'B',3},{'C',3},{'M',3},{'N',3},{'O',3}],Rec);\\nds4 := DATASET([{'A',4},{'B',4},{'R',4},{'S',4},{'T',4}],Rec);\\nds5 := DATASET([{'B',5},{'V',5},{'W',5},{'X',5},{'Y',5}],Rec);\\nSetDS := [ds1,ds2,ds3,ds4,ds5];\\nSetds[1..3];
\\n\\nAm I making a mistake here or is it because the set of dataset is internally not looked as a set but a syntax sugar to better express merge joins.\\nThanks\\nSrini\", \"post_time\": \"2019-03-11 18:33:34\" },\n\t{ \"post_id\": 25313, \"topic_id\": 6593, \"forum_id\": 10, \"post_subject\": \"Re: Having an issue importing a database\", \"username\": \"rtaylor\", \"post_text\": \"rrussell,\\n\\nIs that an Eclipse error code? If not, what is the error message text that accompanies that code?\\n\\nEither way, the only ECL code you show is a RECORD structure, which only defines the layout of fields in a file. To reference the file itself you also need a DATASET declaration. \\n\\nThis is all covered in the first ECL online training course, Intro to ECL, Part 1 (available here: https://learn.lexisnexis.com). Have you gone through that class yet?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-03-18 17:36:12\" },\n\t{ \"post_id\": 25293, \"topic_id\": 6593, \"forum_id\": 10, \"post_subject\": \"Having an issue importing a database\", \"username\": \"rrussell\", \"post_text\": \"I'm using Eclipse as my IDE, and I',m trying to basically create a table and import values from a dataset, but I keep getting an error.\\n\\nThe error I keep getting is Code: 4157.\\n\\nThis is what I have so far.\\nCODE:\\n\\n\\nexport new_file := RECORD\\n\\tSTRING2 Age;\\n\\tSTRING3 Attrition;\\n\\tSTRING25 BusinessTravel;\\n\\tSTRING5 DailyRate;\\n\\tSTRING25 Department;\\n\\tSTRING3 DistanceFromHome;\\n\\tSTRING2 Education;\\n\\tSTRING25 EducationField;\\n\\tSTRING2 EmployeeCount;\\n\\tSTRING3 EmployeeNumber;\\n\\tSTRING2 EnvironmentSatisfaction;\\n\\tSTRING10 Gender;\\n\\tSTRING3 HourlyRate;\\n\\tSTRING2 JobInvolvment;\\n\\tSTRING2 JobLevel;\\n\\tSTRING30 JobRole;\\n\\tSTRING2 JobSatisfaction;\\n\\tSTRING25 MaritalStatus;\\n\\tSTRING6 MonthlyIncome;\\n\\tSTRING6 MonthlyRate;\\n\\tSTRING2 NumCompaniesWorked;\\n\\tSTRING1 Over18;\\n\\tSTRING3 OverTime;\\n\\tSTRING3 PercentSalaryHike;\\n\\tSTRING2 PerformanceRating;\\n\\tSTRING2 RelationshipSatisfaction;\\n\\tSTRING2 StandardHours;\\n\\tSTRING2 StockOptionLevel;\\n\\tSTRING3 TotalWorkingYears;\\n\\tSTRING2 TrainingTimesLastYear;\\n\\tSTRING3 WorkLifeBalance;\\n\\tSTRING2 YearsAtCompany;\\n\\tSTRING2 YearsInCurrentRole;\\n\\tSTRING2 YearsSinceLastPromotion;\\n\\tSTRING2 YearsWithCurrManager;\\nEND;\\n\\n\\nCan someone help me out?\", \"post_time\": \"2019-03-18 14:47:18\" },\n\t{ \"post_id\": 25393, \"topic_id\": 6623, \"forum_id\": 10, \"post_subject\": \"Re: STORED and THOR\", \"username\": \"Allan\", \"post_text\": \"Thanks Richard.\", \"post_time\": \"2019-03-20 11:09:11\" },\n\t{ \"post_id\": 25383, \"topic_id\": 6623, \"forum_id\": 10, \"post_subject\": \"Re: STORED and THOR\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nYou have two choices:
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-03-19 13:36:26\" },\n\t{ \"post_id\": 25363, \"topic_id\": 6623, \"forum_id\": 10, \"post_subject\": \"STORED and THOR\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nThis could be a VERY silly question, but I have ECL query that's written for ROXIE and has arguments passed in using the STORED construct.\\n\\nI would like to run the same ECL on THOR with minimal change if any (preferably no change)\\n\\nAny ideas?\\n\\nYours\\nAllan\", \"post_time\": \"2019-03-19 09:36:21\" },\n\t{ \"post_id\": 25833, \"topic_id\": 6773, \"forum_id\": 10, \"post_subject\": \"Re: IPropertyTree: Ambiguous xpath used getProp\", \"username\": \"rati\", \"post_text\": \"Thanks Richard,\\nI created this error by re-naming a major input dataset. So what you are saying is very much possible in my case too.\\n\\nRegards,\\nRati\", \"post_time\": \"2019-04-18 22:47:38\" },\n\t{ \"post_id\": 25633, \"topic_id\": 6773, \"forum_id\": 10, \"post_subject\": \"Re: IPropertyTree: Ambiguous xpath used getProp\", \"username\": \"rtaylor\", \"post_text\": \"rati,\\n\\nI've seen that error message, and for me the problem has always been that the XPATHs in the RECORD structure are not correctly configured, such that the library is confused about exactly how to find the data. \\n\\nHere's my most recent example of that://trying to define this data:\\t\\nreq := '<ROW>'+\\n '<test:request> '+\\n '<dapp:IDD>16212321321j321n3kj21j3kn213</dapp:IDD>'+\\n '<dapp:Request>'+\\n '<dapp:RecordReqGetRec>'+\\n '<dapp:AfterTimeStamp>2018-03-11</dapp:AfterTimeStamp>'+\\n '<dapp:StatusCodes>'+\\n '<arr:string>NFFF</arr:string>'+\\n '<arr:string>NPPP</arr:string>'+\\n '</dapp:StatusCodes>'+\\n '</dapp:RecordReqGetRec>'+\\n '</dapp:Request>'+\\n '</test:request>'+\\n '</ROW>';
The "problem" here, of course, is getting the nested child dataset entries (arr:string) correctly defined.\\n\\nMy first attempt was to define it like this:layout := RECORD\\n STRING arrString{XPATH('arr:string')}; \\nEND;\\nrLex := RECORD\\n STRING dappIDD{XPATH('test:request/dapp:IDD')}; \\n STRING AfterTimeStamp{XPATH('test:request/dapp:Request/dapp:RecordReqGetRec/dapp:AfterTimeStamp')}; \\n DATASET(Layout) StatusCodes{ XPATH('test:request/dapp:Request/dapp:RecordReqGetRec/dapp:StatusCodes')};\\n STRING arrString1{XPATH('test:request/dapp:Request/dapp:RecordReqGetRec/dapp:StatusCodes/arr:string[1]')};\\n STRING arrString2{XPATH('test:request/dapp:Request/dapp:RecordReqGetRec/dapp:StatusCodes/arr:string[2]')};\\nEND;\\n\\nFROMXML(rLex,req);
Which did NOT work, and got the same error you're talking about.\\n\\nThe solution was to change the XPATHs this way:layout := RECORD\\n STRING arrString{XPATH('')}; // get self..\\nEND;\\nrLex := RECORD\\n STRING dappIDD{XPATH('test:request/dapp:IDD')}; \\n STRING AfterTimeStamp{XPATH('test:request/dapp:Request/dapp:RecordReqGetRec/dapp:AfterTimeStamp')};\\n DATASET(Layout) StatusCodes{ XPATH('test:request/dapp:Request/dapp:RecordReqGetRec/dapp:StatusCodes/arr:string')};\\nEND;\\n\\nFROMXML(rLex,req);
This DOES work.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-04-10 19:45:07\" },\n\t{ \"post_id\": 25623, \"topic_id\": 6773, \"forum_id\": 10, \"post_subject\": \"IPropertyTree: Ambiguous xpath used getProp\", \"username\": \"rati\", \"post_text\": \"what causes these kinds on errors on a cluster ? Is this related to the clusters’s package file ? \\n\\nIPropertyTree: Ambiguous xpath used getProp: ambiguous xpath "Alias[@id='abc.searchservice.16']"\\n \\n‘wsWorkunits.WUListQueries IpropertyTree: Ambigous Xpath used getProp: ambiguous xpath “Alias[@id=’xyz.searchservices.57’]”\\n\\nThis is actually causing the cluster to become unstable.\\n\\nIs there a way in which we can prevent this. Currently the only way we have been able to fix this is to delete the listed query.\", \"post_time\": \"2019-04-10 18:23:55\" },\n\t{ \"post_id\": 26103, \"topic_id\": 6803, \"forum_id\": 10, \"post_subject\": \"Re: Why is this grammar ambiguous?\", \"username\": \"Allan\", \"post_text\": \"OK I've had a reply from Gavin.\\n\\nIt is because it is ambiguous with single token look ahead.\\nE.g.\\nParsing "a" "a" "a".... should that match\\n["a" [ "a"] "a"]\\nor\\n["a" ["a" ["a" \\nYou cannot tell until all the tokens are parsed.\\nTry adding\\n#option ('debugNlp', 1);\\nto the query - it adds a comment to the c++ file describing the grammar.\\n
\\n\\nExample Dump of Grammar in said c++ file:\\nHuman readable form of the grammar\\n\\t\\nOptions: Match(First) Scan(Whole)\\nMatches:\\n\\texpr{3}\\n\\nFeatures:\\nTokens:\\n TOKEN<0> tok0 := 'a';\\n TOKEN<1> tok1 := 'b';\\n TOKEN<2> EOF ;\\nRules:\\n Rule<3> expr\\n CanBeNull(0) First[?] Follow[2 0 1]\\n Production<0>: [] CloningTransform := tok0 expr tok0\\n Production<1>: [] CloningTransform := tok1 expr tok1\\n Production<2>: [] := rule6\\n Rule<4> a\\n CanBeNull(0) First[?] Follow[]\\n Production<3>: [] := tok0\\n Rule<5> b\\n CanBeNull(0) First[?] Follow[]\\n Production<4>: [] := tok1\\n Rule<6> rule6\\n CanBeNull(0) First[?] Follow[2 0 1]\\n Production<5>: [] := tok0\\n Production<6>: [] := tok1\\n Rule<7> rule7\\n CanBeNull(0) First[?] Follow[]\\n Production<7>: [] := expr\\nLexer:\\n\\tEndOfToken: []\\n\\tToken DFA numStates="3" numTransitions="2"\\tSkip DFA numStates="2" numTransitions="48"\\nStates:\\n\\t[0] I={[7,0]} [0->1,1->2,3->3,6->4]\\n\\t[1] I={[0,1],[5,1]} [0->1,1->2,3->5,6->4]\\n\\t[2] I={[1,1],[6,1]} [0->1,1->2,3->6,6->4]\\n\\t[3] I={[7,1]} []\\n\\t[4] I={[2,1]} []\\n\\t[5] I={[0,2]} [0->7]\\n\\t[6] I={[1,2]} [1->8]\\n\\t[7] I={[0,3]} []\\n\\t[8] I={[1,3]} []\\nParser:\\nStates:\\n\\tRoot=0\\n\\t[0]\\t\\tS1\\tS2\\t\\t\\tGoto: 3->3 6->4 \\n\\t[1]\\t\\t{S1R5}\\t{S2R5}\\tR5\\t\\tGoto: 3->5 6->4 \\n\\t[2]\\t\\t{S1R6}\\t{S2R6}\\tR6\\t\\tGoto: 3->6 6->4 \\n\\t[3]\\t\\t\\t\\tA\\t\\tGoto: \\n\\t[4]\\t\\tR2\\tR2\\tR2\\t\\tGoto: \\n\\t[5]\\t\\tS7\\t\\t\\t\\tGoto: \\n\\t[6]\\t\\t\\tS8\\t\\t\\tGoto: \\n\\t[7]\\t\\tR0\\tR0\\tR0\\t\\tGoto: \\n\\t[8]\\t\\tR1\\tR1\\tR1\\t\\tGoto: \\nProductions:\\n\\t[0] rule:3 pop:3\\n\\t[1] rule:3 pop:3\\n\\t[2] rule:3 pop:1\\n\\t[3] rule:4 pop:1\\n\\t[4] rule:5 pop:1\\n\\t[5] rule:6 pop:1\\n\\t[6] rule:6 pop:1\\n\\t[7] rule:7 pop:1\\n
\", \"post_time\": \"2019-05-08 10:24:37\" },\n\t{ \"post_id\": 25723, \"topic_id\": 6803, \"forum_id\": 10, \"post_subject\": \"Why is this grammar ambiguous?\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nTo get my head around the Tomita variant of parsing.\\nI've generated the very simplest grammar, (where there are lots of examples on the Web).\\nJust to parse palindromes. So I have:\\n\\nTOKEN a := 'a';\\nTOKEN b := 'b';\\n\\nattrRec := RECORD\\n STRING val;\\nEND;\\n\\nRULE(attrRec) expr := a USE(attrRec,expr) a \\n | b USE(attrRec,expr) b \\n | (a|b) TRANSFORM(attrRec,SELF.val := $1 );\\n\\ninfile := DATASET([{'aba'},{'a'},{'b'},{'ab'}],{ STRING line });\\n\\nOUTPUT(PARSE(infile,line,expr,{STRING Text := MATCHTEXT},FIRST,WHOLE,PARSE,SKIP([' ','\\\\t']+)));\\n
\\nThis all works, but I get this warning:\\n\\nWarning: The PARSE pattern for activity 3 is ambiguous. This may reduce the efficiency of the PARSE. (15, 8), 4537, \\n
\\nAnd I can't for the life of me work out why.\\n\\nAny ideas?\\n\\nAllan\", \"post_time\": \"2019-04-16 08:41:33\" },\n\t{ \"post_id\": 25923, \"topic_id\": 6843, \"forum_id\": 10, \"post_subject\": \"Re: Returning a Filter\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nNot sure If you've seen my post on 'Multi-dimensional, Multi-variable' searches.\\n\\nhttps://hpccsystems.com/bb/viewtopic.php?f=41&t=6183\\n\\nAs I understand your post it seems to address your problem?\\n\\nYours\\nAllan\", \"post_time\": \"2019-04-25 12:47:26\" },\n\t{ \"post_id\": 25913, \"topic_id\": 6843, \"forum_id\": 10, \"post_subject\": \"Re: Returning a Filter\", \"username\": \"rtaylor\", \"post_text\": \"Artur,I tried to apply the approach you shown, but when setting TRUE to a string or to a unsigned, an error occur, cause it should be boolean.
Yes. Because a filter must always be a Boolean expression, I assumed that your FilterFunc01 and FilterFunc02 functions returned Boolean results. Replacing those functions with Boolean expressions is the appropriate thing to do, but you still need to use TRUE as your filter expression if no parameter is passed, like this:EXPORT myRoxieQuery () := FUNCTION\\n Parms := STORED($.iUserInfo);\\n myDataSet := DATASET('somefile', THOR);\\n filter01 := IF(iParams.name='', TRUE, name=iParams.name);\\n //all recs if no name passed, otherwise filter for passed name\\n filter02 := IF(iParams.age=0, TRUE, age=iParams.age);\\n //all recs if no age passed, otherwise filter for passed age\\n RETURN myDataSet(filter01, filter02);\\nEND
\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-04-23 17:44:09\" },\n\t{ \"post_id\": 25863, \"topic_id\": 6843, \"forum_id\": 10, \"post_subject\": \"Re: Returning a Filter\", \"username\": \"abaruchi\", \"post_text\": \"Hello Richard,\\nThanks for your reply. I tried to apply the approach you shown, but when setting TRUE to a string or to a unsigned, an error occur, cause it should be boolean.\\n\\n\\nIncompatible types: expected Integer, given Boolean,\\n
\\n\\nI tried to use something like this:\\n\\n\\nEXPORT myRoxieQuery () := FUNCTION\\n Parms := STORED($.iUserInfo);\\n myDataSet := DATASET('somefile', THOR);\\n filter01 := IF(iParams.name='', name <> '', name=iParams.name);\\n filter02 := IF(iParams.age=0, age > 0, age=iParams.age);\\n RETURN myDataSet(filter01, filter02);\\nEND\\n
\\n\\nDon't know if it is the best way to do it, since I'm applying some filter (i.e. if a typo is made, and someone has a negative age, it should be returned).\\n\\nThanks,\\n\\n- Artur Baruchi\", \"post_time\": \"2019-04-22 13:05:17\" },\n\t{ \"post_id\": 25843, \"topic_id\": 6843, \"forum_id\": 10, \"post_subject\": \"Re: Returning a Filter\", \"username\": \"rtaylor\", \"post_text\": \"Artur ,[quote="abaruchi":1rfjjwxq]Hi guys,\\n\\nIm implementing a Roxie query and several filters should be applied to the query according to the options passed by the user (i.e. if a given parameter is not passed, the filter shouldnt be applied). \\nI was able to implement, but Im wondering a better way to do that and would like to use functions that return the filter. \\nExample:\\n\\n\\nEXPORT myRoxieQuery () := FUNCTION\\n Parms := STORED($.iUserInfo);\\n myDataSet := DATASET('somefile', THOR);\\n filter01 := FilterFunc01(iParams.name);\\n filter02 := FilterFunc02(iParams.age);\\n RETURN myDataSet(filter01, filter02); \\nEND;\\n
\\n\\nI would like to know if there is someway to implement something like this. The code would be much better (at least it would be cleaner).You can do it like this:\\n\\nEXPORT myRoxieQuery () := FUNCTION\\n Parms := STORED($.iUserInfo);\\n myDataSet := DATASET('somefile', THOR);\\n filter01 := IF(iParams.name='', TRUE,FilterFunc01(iParams.name));\\n filter02 := IF(iParams.age=0, TRUE,FilterFunc02(iParams.age);\\n RETURN myDataSet(filter01, filter02); \\nEND;
So if no value is passed for either filter condition it just defaults to TRUE.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-04-19 14:13:43\" },\n\t{ \"post_id\": 25783, \"topic_id\": 6843, \"forum_id\": 10, \"post_subject\": \"Returning a Filter\", \"username\": \"abaruchi\", \"post_text\": \"Hi guys,\\n\\nIm implementing a Roxie query and several filters should be applied to the query according to the options passed by the user (i.e. if a given parameter is not passed, the filter shouldnt be applied). \\nI was able to implement, but Im wondering a better way to do that and would like to use functions that return the filter. \\nExample:\\n\\n\\nEXPORT myRoxieQuery () := FUNCTION\\n Parms := STORED($.iUserInfo);\\n myDataSet := DATASET('somefile', THOR);\\n filter01 := FilterFunc01(iParams.name);\\n filter02 := FilterFunc02(iParams.age);\\n RETURN myDataSet(filter01, filter02); \\nEND;\\n
\\n\\nI would like to know if there is someway to implement something like this. The code would be much better (at least it would be cleaner).\\n\\nThanks in advance,\\n\\n- Artur Baruchi\", \"post_time\": \"2019-04-18 17:00:49\" },\n\t{ \"post_id\": 25963, \"topic_id\": 6883, \"forum_id\": 10, \"post_subject\": \"Re: Code Documentation\", \"username\": \"rtaylor\", \"post_text\": \"Artur,I was wondering if there is some good practice for code documentation in ECL.
ECL is already a very terse and expressive language so it is predominantly self-documenting -- meaning the code itself tells you exactly what it is doing (remember, ECL code statements are definitions, not executable code). \\n\\nBecause of that, you will find very few comments in our production ECL code, and the vast majority of those comments are of the "I did it this way because ..." variety. \\n\\nIf you look at the ECL code for things like our Date standard library (in your Repository you can find that in the ecllibrary >> std >> Date.ecl file), you can see that we do use the JavaDoc standard format to document all the code in our Standard Library. You can also adopt this practice, if you choose to. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-04-26 13:46:22\" },\n\t{ \"post_id\": 25953, \"topic_id\": 6883, \"forum_id\": 10, \"post_subject\": \"Re: Code Documentation\", \"username\": \"abaruchi\", \"post_text\": \"Hi Allan,\\n\\nI think there is a lack of information regarding ECL code documentation. I found an ECL best practices sometime ago (about how to name variables, code indentation, etc) that may be useful for you.\\nDocument the code, inside the code, is one of the best practices a development team can have. I think it is faster, since you don't need to update two different places when creating new code and you can update the documentation while updating the code, so it is more accurate. \\n\\nLink for doc best practices:\\nhttp://cdn.hpccsystems.com/pdf/ecl_best_practices.pdf\\n\\nThanks for replying. I will keep this thread updated as I get more information about this.\\n\\nAtt.\\nArtur Baruchi\", \"post_time\": \"2019-04-26 13:20:01\" },\n\t{ \"post_id\": 25943, \"topic_id\": 6883, \"forum_id\": 10, \"post_subject\": \"Re: Code Documentation\", \"username\": \"rtaylor\", \"post_text\": \"Allan and Artur,\\n\\nThere were no replies because there are no tools we currently have for that purpose.\\n\\nHPCC is Open Source. Feel free to create one and contribute it. \\n\\nRichard\", \"post_time\": \"2019-04-26 13:03:38\" },\n\t{ \"post_id\": 25933, \"topic_id\": 6883, \"forum_id\": 10, \"post_subject\": \"Re: Code Documentation\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI asked a similar question on the availability of a 'Prittifier' for ECL.\\n\\nhttps://hpccsystems.com/bb/viewtopic.php?f=8&t=5903\\n\\nAlso no replies.\", \"post_time\": \"2019-04-26 08:09:26\" },\n\t{ \"post_id\": 25883, \"topic_id\": 6883, \"forum_id\": 10, \"post_subject\": \"Code Documentation\", \"username\": \"abaruchi\", \"post_text\": \"Hello,\\n\\nI was wondering if there is some good practice for code documentation in ECL. As a python programmer, I usually document my code using Doc String (google doc string) and after run doxygen, for example, to generate some nice visualization for the project. \\nI know that ECL uses some C/C++ comment style, so I think that applying any good practice that already is in use for C/C++ could work. However, I think that it would be nice to hear from the community if ECL already has something in place for this.\\n\\nThanks,\\n\\n- Artur Baruchi\", \"post_time\": \"2019-04-22 16:54:36\" },\n\t{ \"post_id\": 26083, \"topic_id\": 6923, \"forum_id\": 10, \"post_subject\": \"Re: Override a Function of a Module\", \"username\": \"abaruchi\", \"post_text\": \"Hi Allan,\\n\\nThanks for your reply. Actually your message gave me an idea that worked
\\n\\nI read again the ECL documentation and it is possible to specify a single definition as VIRTUAL. So using my first code as reference, would be something like this:\\n\\n
\\nEXPORT myModuleWithAFunction := MODULE\\n EXPORT VIRTUAL myFunction(UNSIGNED numA, UNSIGNED numB) := FUNCTION\\n RETURN numA + numB\\n END;\\nEND;\\n
\\n\\nIt was a good solution, since I could keep the code with minimal changes.\\n\\nThanks,\\n\\nArtur Baruchi\", \"post_time\": \"2019-05-03 20:05:48\" },\n\t{ \"post_id\": 26063, \"topic_id\": 6923, \"forum_id\": 10, \"post_subject\": \"Re: Override a Function of a Module\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nRead up VIRTUAL MODULEs in the ECL Ref manual.\\nYour virtual MODULE can define the interface, then you have two CONCRETE MODULES inheriting from it.\\n\\nThe fact you mention 'non-virtual' means you already know about 'VIRTUAL' in ECL so why can't you use this functionality?\\nYours\\nAllan\\n\\nExample of Use:\\nAbstractMod := MODULE,VIRTUAL\\n EXPORT V1 := 42;\\n EXPORT STRING V2;\\nEND;\\n\\nMod1 := MODULE(AbstractMod)\\n EXPORT STRING V2 := 'Mod1';\\nEND;\\n\\nMod2 := MODULE(AbstractMod)\\n EXPORT STRING V2 := 'Mod2';\\nEND;\\n\\n{INTEGER V1,STRING V2} F(AbstractMod m) := FUNCTION\\n RETURN ROW({m.V1,m.V2},{INTEGER V1,STRING V2});\\nEND;\\n\\nF(Mod1);\\nF(Mod2);\\n
\", \"post_time\": \"2019-05-03 13:10:04\" },\n\t{ \"post_id\": 26053, \"topic_id\": 6923, \"forum_id\": 10, \"post_subject\": \"Re: Override a Function of a Module\", \"username\": \"abaruchi\", \"post_text\": \"Hello Allan,\\n\\nThanks for your reply.\\nWe do have mock data. What happens in my situation, is that this function that Im trying to mock, get some data from a service that returns a SOAPCALL and I don't want to perform any calls to a service outside my code. Im able to mock the dataset returned by this function, but when Im trying to override this function to return that mocked dataset, I got an error saying that Im not able to override a non-virtual module.\\n\\nThanks!\\n\\n- Artur Baruchi\", \"post_time\": \"2019-05-03 12:12:29\" },\n\t{ \"post_id\": 26043, \"topic_id\": 6923, \"forum_id\": 10, \"post_subject\": \"Re: Override a Function of a Module\", \"username\": \"Allan\", \"post_text\": \"Hi abaruchi,\\n\\nDepends upon what you want to achieve with a 'mock up'?\\nI myself and others in my team never try to mock up anything, but exercise unaltered production ready code.\\n\\nWe do this by having different banks of data (logical files/superfiles), which are all accessed via the same 'File' attributes. The MODULE encapsulating said attributes has an ENUM parameter something like:\\n\\n\\nRunType := ENUM(UNSIGNED1,UNIT_TEST,QA,CERT,PRODUCTION);\\nFiles(Runtype rt) := MODULE\\n...\\n
\\n\\nThen the same code gets exercised with UNIT_TEST data as will live production data.\\n\\nYour 'mock up' should be 'mock up' data, not mocked up functions?\\n\\nPerhaps I've got the wrong end of the stick.\\nYours\\nAllan\", \"post_time\": \"2019-05-03 11:30:06\" },\n\t{ \"post_id\": 26013, \"topic_id\": 6923, \"forum_id\": 10, \"post_subject\": \"Override a Function of a Module\", \"username\": \"abaruchi\", \"post_text\": \"Hi there,\\n\\nI would like to know if it is possible to override a function of a non-Virtual Module.\\nTake the following code as example:\\n\\n\\nEXPORT myModuleWithAFunction := MODULE\\n EXPORT myFunction(UNSIGNED numA, UNSIGNED numB) := FUNCTION\\n RETURN numA + numB\\n END;\\nEND;\\n
\\n\\nNow, suppose you need to mock this module and you should override the `myFunction` function. I tried something like this, but since myModuleWithAFunction is not a virtual module, I cannot override the myFunction.\\n\\n\\nmyMockedFunc(UNSIGNED a) := FUNCTION\\n RETURN MODULE($.myModuleWithAFunction)\\n EXPORT myFunction := a;\\n END;\\nEND;\\n
\\n\\nI tried with `SHARED myFunction := a`, but the error persist. So, the question is:\\n- Is it possible to override a function inside a non-virtual module? If yes, how to do it.\\n- If not, how would you test/mock the return of this function? \\n\\nObs.: I tried to simplify the problem to focus in the real error, which is overriding a function. The real code is much more complex, with several calls to other functions and services, the returning value is more complex too. The thing is.. All these complex calling functions is not scope of my code, so I just want to mock a return value and go on. \\n\\n\\nThanks in advance,\\n\\nArtur Baruchi\", \"post_time\": \"2019-05-02 22:50:25\" },\n\t{ \"post_id\": 26313, \"topic_id\": 6963, \"forum_id\": 10, \"post_subject\": \"Re: Adjusting # of Iterations for Template Language #LOOP\", \"username\": \"raja\", \"post_text\": \"hi Matthew,\\n for particular to this requirement, where you would want to call your randomsample for the noofiterations, I think template langauage is not required or it could be little more burden to achieve what we need. This requirement can be easily achieved with Dataset form as below and rollup all the records together to get the summed results out.\\n\\nI have shared the code below, for your reference. we believe this problem can be achived in many ways, this one is one of the simple solutions I could think of.\\n\\nplease let us know alternative solutions which is better in performance if any.\\n\\nprocessedDs := DATASET(\\n NbrIters,\\n TRANSFORM({typeof(ds_useme) x},\\n self.x := zz_mrumsey.FMAC_RandomSample(ds_useme, .25);\\n )\\n );\\n\\n rolledupDs := ROLLUP(processedDs, \\n\\t\\t TRUE, \\n TRANSFORM(RECORDOF(processedDs),\\n\\t\\t\\t SELF.x := IF(EXISTS(left.x), left.x + right.x, RIGHT.X);\\n\\t\\t\\t )\\n\\t );\\n\\nOUTPUT(rolledupDs[1].x);\\n\\nRegards,\\nRaja\", \"post_time\": \"2019-05-15 14:55:38\" },\n\t{ \"post_id\": 26253, \"topic_id\": 6963, \"forum_id\": 10, \"post_subject\": \"Re: Adjusting # of Iterations for Template Language #LOOP\", \"username\": \"bforeman\", \"post_text\": \"Matt, can you share your MACRO solution with the community please?\\nOr just an abstract if the code is proproetary.\\n\\nThanks!\\n\\nBob\", \"post_time\": \"2019-05-14 20:28:47\" },\n\t{ \"post_id\": 26233, \"topic_id\": 6963, \"forum_id\": 10, \"post_subject\": \"Re: Adjusting # of Iterations for Template Language #LOOP\", \"username\": \"mrumsey\", \"post_text\": \"UPDATE:\\n\\nI realize that code I had looked at previously had math (for # of iterations) based on constants supplied prior to compile-time, thereby satisfying the 'Constant Expression' requirement as it could be computed prior to code generation.\\n\\nI am hoping to get around this by using a BWR file but I need a way to generate or discover a record structure. \\n\\nDoes anyone know of a way to generate a full record-structure that is unknown at compile time? I know there is the 'LOOKUP' option for a DATASET call, but it drops fields that are unknown, which makes the code less flexible and less viable for projects other than my current one.\\n\\nI feel this should be possible, as I can get a record structure from HPCC (via ECL Watch) as well as the system telling me when my layout is incorrect.\\n\\nThanks again,\\n\\nMatt Rumsey\", \"post_time\": \"2019-05-13 16:06:54\" },\n\t{ \"post_id\": 26193, \"topic_id\": 6963, \"forum_id\": 10, \"post_subject\": \"Adjusting # of Iterations for Template Language #LOOP\", \"username\": \"mrumsey\", \"post_text\": \"I am trying to create basic bootstrap code in ECL, that can be configurable on run-time for a certain # of iterations (whatever is required to hit our target # of records). \\n\\nOriginally I tried using a count of the created bootstrapped dataset to generate a cutoff, but the code required a Constant Expression. I changed to a number of iterations method, which works...so long as I pass a hard-coded integer.\\n\\nIf I do a calculation to create the # of iterations (as in the code below), it gives me the same 'Constant Expression Expected' error. I have tried utilizing multiple castings, #STORED, #DECLARE, both inside and out of the FunctionMacro, but ECL seems deadset on not letting me have this run-to-run variable input.\\n\\nIs there any way to 'force' the Template Language to accept my simple math, or is there another way to adjust the # of iterations?\\n\\nThanks, \\n\\nMatt Rumsey\\n\\n#LOOP Code in FunctionMacro\\nIMPORT\\tzz_mrumsey;\\n\\nEXPORT FMAC_BootStrap_Basic(d_Input, FinCnt) := FUNCTIONMACRO\\n\\tLOADXML('<xml/>');\\n\\n\\tLOCAL\\tds_0\\t\\t\\t:=\\tDATASET([], RECORDOF(d_Input));\\n\\tLOCAL\\tcnt_Input\\t:=\\tCOUNT(d_Input);\\n\\tLOCAL\\tNumIters\\t:=\\tROUNDUP(FinCnt / (cnt_Input * .25)); //Calculates ~# of Samples Needed. Samples Taken At 25% of Input Dataset\\n\\t\\t\\n\\t#DECLARE(ds_xxx);\\n\\t#DECLARE(ds_xxxOld);\\n\\t#DECLARE(i);\\n\\t#DECLARE(j);\\n\\t\\n\\t#SET(i, 1);\\n\\t#SET(j, 0);\\t\\n\\n\\t#LOOP\\n\\t\\t#SET(ds_xxx, 'ds_')\\n\\t\\t#SET(ds_xxxOld, 'ds_')\\n\\t\\t\\n\\t\\t\\t#IF(%i% > NumIters)\\n\\t\\t\\t\\t#APPEND(ds_xxxOld, %'j'%);\\n\\t\\t\\t\\tRETURN\\t%ds_xxxOld%;\\n\\t\\t\\t\\t#BREAK\\n\\t\\t\\t#ELSE\\t\\t\\t\\n\\t\\t\\t\\t#APPEND\\t(ds_xxx, %'i'%)\\n\\t\\t\\t\\t#APPEND (ds_xxxOld, %'j'%);\\n\\n\\t\\t\\t\\t#UNIQUENAME(ds_Temp)\\n\\t\\t\\t\\t%ds_Temp%\\t:=\\tzz_mrumsey.FMAC_RandomSample(d_Input, .25);\\n\\t\\t\\t\\t%ds_xxx% := %ds_xxxOld% + %ds_Temp%;\\t\\t\\t\\n\\n\\t\\t\\t\\t#SET (j, %i%);\\n\\t\\t\\t\\t#SET (i, %i% + 1);\\n\\t\\t\\t#END\\n\\t#END\\nENDMACRO;
\", \"post_time\": \"2019-05-10 19:30:55\" },\n\t{ \"post_id\": 26583, \"topic_id\": 6993, \"forum_id\": 10, \"post_subject\": \"Re: Trouble loading JSON Dataset\", \"username\": \"rtaylor\", \"post_text\": \"andre,\\n\\nName/value for the attributes won't work because some of the "values" are themselves nested child datasets. I suggest that you either need to fully define all the possible nested child datasets in the RECORD structure, or abandon the effort to define it as a JSON DATASET and simply treat the JSON data as a single STRING field that you can then PARSE to extract the specific information you need.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-05-22 13:51:58\" },\n\t{ \"post_id\": 26543, \"topic_id\": 6993, \"forum_id\": 10, \"post_subject\": \"Re: Trouble loading JSON Dataset\", \"username\": \"andre.martins\", \"post_text\": \"Richard,\\n\\nI already did and revisited these courses looking for a way to solve this problem. But no success.\\n\\nI'm stuck because I have to load the childs datasets as name/value, check the following records.\\nExample:\\n\\nEXPORT AttributeRecord := RECORD\\n STRING name;\\n STRING value;\\nEND;\\n\\nEXPORT BusinessRecord := RECORD\\n STRING business_id {XPATH('business_id')};\\n STRING name {XPATH('name')}; \\n DATASET(AttributeRecord) Attributes {XPATH('attributes')};\\nEND;\\n
\\n\\nI did'nt find a way to work with this attributes that has N different types, since XPATH will only take the value of one attribute.\", \"post_time\": \"2019-05-20 17:59:15\" },\n\t{ \"post_id\": 26483, \"topic_id\": 6993, \"forum_id\": 10, \"post_subject\": \"Re: Trouble loading JSON Dataset\", \"username\": \"rtaylor\", \"post_text\": \"André,\\n\\nNested child datasets are covered in the Advanced ECL (Part 1) online course, and XML/JSON in Advanced ECL (Part 2). I think you'll find those helpful (http://learn.lexisnexis.com/hpcc).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-05-17 20:29:18\" },\n\t{ \"post_id\": 26473, \"topic_id\": 6993, \"forum_id\": 10, \"post_subject\": \"Trouble loading JSON Dataset\", \"username\": \"andre.martins\", \"post_text\": \"Hi there,\\n\\nThis is the first time I'm working with json. I am having troubles trying to load a json dataset file. I uploaded a file into the ECL Watch and now I am trying to load the file into a dataset to clean the data layout and then perform some queries using it.\\n\\nThe json file does not have a root, so it is one register per line. Something like the example below:\\n\\n{"id":"QXAEGFB4oI", "name":"Emerald Chinese Restaurant", "attributes":{"RestaurantsReservations":"True", "GoodForMeal":"{'dessert': False, 'latenight': False}}, "hours":{"Monday":"9:0-0:0","Tuesday":"9:0-0:0"}\\n
\\n\\nI read the documentation and I didn't just really did not understand how I could work with the datasets inside other datasets, like the attributes and the hours that is inside this file.\\nThe 'attributes' field is an array of strings, that each array element is an attribute (name, value), and the 'hours' field is an array of strings of business hours (day, hours).\\n\\nI was thinking about using the XPATH, but there are different attributes names depending of the record.\\nI'm just asking for a direction of how to proceed with this kind of data.\\n\\nThank you,\\nAndré.\", \"post_time\": \"2019-05-17 18:56:08\" },\n\t{ \"post_id\": 26653, \"topic_id\": 7033, \"forum_id\": 10, \"post_subject\": \"Re: Mixing single(FIRST) and multiple Pattern matching (PARS\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nJust to "throw a spanner into the works" here's a totally different approach:rec := {STRING line};\\ndatafile := DATASET([\\n {'1234567,Allan Wrobel,24-03-1958,6 Barkham Rd Woking Surry RG41 4DA.'}\\n ,{'8976 ,Anna White,20-01-1961,55 Walton Rd Cambs PO87 4RT.'}\\n ,{'45432 ,Nina Brown,28-04-1974,27 Alma Dr Chesham Bucks AM12 2WA.'}\\n ],rec);\\n\\nIMPORT Std;\\nres := RECORD\\n UNSIGNED Id1 ;\\n STRING name;\\n STRING dob;\\n RDate Date;\\n STRING address;\\nEND;\\n\\nPROJECT(datafile,\\n TRANSFORM(res,\\n SetVals := Std.Str.SplitWords(LEFT.line,',');\\t\\t\\t\\t\\n SELF.id1 := (UNSIGNED)SetVals[1];\\n SELF.name := SetVals[2];\\n SELF.dob := SetVals[3];\\n SELF.address := SetVals[4];\\n SetDate := Std.Str.SplitWords(SELF.dob,'-');\\n SELF.Date := ROW({SetDate[1],SetDate[2],SetDate[3]},RDate);\\n ));
\\nNo need to use PARSE at all! \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-05-22 17:17:24\" },\n\t{ \"post_id\": 26643, \"topic_id\": 7033, \"forum_id\": 10, \"post_subject\": \"Re: Mixing single(FIRST) and multiple Pattern matching (PARS\", \"username\": \"rtaylor\", \"post_text\": \"Alan,
In your 2nd example, you did not need to anchor 'id' to the start of line using FIRST.\\nCan leaving this out cause problems?
Frankly, I've never used FIRST in any PATTERN definition, so I expect the answer would be, "No, unless it's truly necessary." \\n\\nThe fact that my parse pattern "expr" is explicitly looking for "id sep txt sep dob sep txt;" (an id followed by a sep followed by a txt followed by a sep followed by a dob followed by a sep followed by a txt) eliminates the need for FIRST. \\n\\nI think FIRST is only useful if it's possible that the pattern you're looking to match could also be found somewhere in the middle of your search string and you really only want a match where it starts at the beginning of that string (which it can't in this case, because the search pattern maps the entire search string). \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-05-22 16:37:12\" },\n\t{ \"post_id\": 26633, \"topic_id\": 7033, \"forum_id\": 10, \"post_subject\": \"Re: Mixing single(FIRST) and multiple Pattern matching (PARS\", \"username\": \"Allan\", \"post_text\": \"Richard,\\n\\nIn your 2nd example, you did not need to anchor 'id' to the start of line using FIRST.\\nCan leaving this out cause problems?\\n(I usually like to anchor my patterns somewhere so I don't get spurious matches from unexpected places.)\\n\\nI like the way your example makes use of instances of a match, e.g. num[1] and num[2].\\n\\nYours\\nAllan\", \"post_time\": \"2019-05-22 15:59:55\" },\n\t{ \"post_id\": 26623, \"topic_id\": 7033, \"forum_id\": 10, \"post_subject\": \"Re: Mixing single(FIRST) and multiple Pattern matching (PARS\", \"username\": \"Allan\", \"post_text\": \"Thanks Very much, Richard,\\n\\nI'll go away and inwardly digest your examples.\\n\\nYours\\nAllan\", \"post_time\": \"2019-05-22 15:54:37\" },\n\t{ \"post_id\": 26603, \"topic_id\": 7033, \"forum_id\": 10, \"post_subject\": \"Re: Mixing single(FIRST) and multiple Pattern matching (PARS\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nThe simple answer in this case is to change your RECORD structure, like this:results := RECORD\\n UNSIGNED Id1 := (UNSIGNED) MATCHTEXT(id);\\n STRING name := MATCHTEXT(name);\\n STRING dob := MATCHTEXT(dob);\\n // RDate Date := ROW({MATCHTEXT(day),MATCHTEXT(month),MATCHTEXT(year)},RDate);\\n RDate Date := ROW({MATCHTEXT(dob)[1..2],\\n MATCHTEXT(dob)[4..5],\\n MATCHTEXT(dob)[7..]},RDate);\\n STRING address := MATCHTEXT(address);\\nEND;
That will allow you to handle the same matching text the two ways you want.\\n\\nThe more generic answer would be to use a TRANSFORM instead of a RECORD structure to define your PARSE result. TRANSFORM provides much more flexibility in dealing with each and every bit of data extracted by PARSE.\\n\\nFWIW, here is the way I would have written this PARSE:rec := {STRING line};\\ndatafile := DATASET([\\n {'1234567,Allan Wrobel,24-03-1958,6 Barkham Rd Woking Surry RG41 4DA.'}\\n ,{'8976 ,Anna White,20-01-1961,55 Walton Rd Cambs PO87 4RT.'}\\n ,{'45432 ,Nina Brown,28-04-1974,27 Alma Dr Chesham Bucks AM12 2WA.'}\\n ],rec);\\n\\nPATTERN num := PATTERN('[0-9]');\\nPATTERN num2 := REPEAT(num,2);\\nPATTERN num4 := REPEAT(num,4);\\nPATTERN txt := ANY*;\\nPATTERN sep := OPT(' '+) ',';\\n\\nPATTERN id := num+;\\nPATTERN dob := num2 '-' num2 '-' num4 ;\\nPATTERN expr := id sep txt sep dob sep txt;\\n\\nRDate := RECORD\\n UNSIGNED1 day;\\n UNSIGNED1 month;\\n UNSIGNED2 year;\\nEND;\\n\\nresults := RECORD\\n UNSIGNED Id1 := (UNSIGNED) MATCHTEXT(id);\\n STRING name := MATCHTEXT(txt[1]);\\n STRING dob := MATCHTEXT(dob);\\n RDate Date := ROW({MATCHTEXT(dob/num2[1]),\\n MATCHTEXT(dob/num2[2]),\\n MATCHTEXT(dob/num4)},RDate);\\n STRING address := MATCHTEXT(txt[2]);\\nEND;\\n\\nPARSE(datafile,line,expr,results,BEST);
Fewer and more generic PATTERNs make the code a bit simpler to my mind.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-05-22 14:50:43\" },\n\t{ \"post_id\": 26563, \"topic_id\": 7033, \"forum_id\": 10, \"post_subject\": \"Mixing single(FIRST) and multiple Pattern matching (PARSE)\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nBit of an esoteric question here, where there is an obvious workaround, but the exercise is to improve my understanding of pattern matching with PARSE (non Tiomta variant).\\n\\nI have a simple CSV structure, field separator ',' where, for most fields, I just want the FIRST match. However for dob I want to match it two ways.\\nAs a single string 'DD-MM-YYYY', but also as three UNSIGNEDs DD MM and YYYY.\\nrec := {STRING line};\\ndatafile := DATASET([\\n {'1234567,Allan Wrobel,24-03-1958,6 Barkham Rd Woking Surry RG41 4DA.'}\\n,{'8976 ,Anna White,20-01-1961,55 Walton Rd Cambs PO87 4RT.'}\\n,{'45432 ,Nina Brown,28-04-1974,27 Alma Dr Chesham Bucks AM12 2WA.'}\\n ],rec);\\n\\nPATTERN content := ANY*;\\nPATTERN id := FIRST content;\\nPATTERN name := content;\\nPATTERN dob := content;\\nPATTERN address := content;\\nPATTERN day := PATTERN('[0-9]{2}');\\nPATTERN month := day;\\nPATTERN year := PATTERN('[0-9]{4}');\\nPATTERN exprA := id ',' name ',' dob ',' address;\\nPATTERN exprB := id ',' name ',' day '-' month '-' year ',' address;\\n\\nPATTERN expr := exprA OR exprB;\\n\\nRDate := RECORD\\n UNSIGNED1 day;\\n UNSIGNED1 month;\\n UNSIGNED2 year;\\nEND;\\n\\nresults := RECORD\\n UNSIGNED Id1 := (UNSIGNED) MATCHTEXT(id);\\n STRING name := MATCHTEXT(name);\\n STRING dob := MATCHTEXT(dob);\\n RDate Date := ROW({MATCHTEXT(day),MATCHTEXT(month),MATCHTEXT(year)},RDate);\\n STRING address := MATCHTEXT(address);\\nEND;\\n\\nPARSE(datafile,line,expr,results,FIRST);\\n
\\nBecause of the 'OR' in the 'expr' if I use FIRST the parser just takes pattern 'dob'.\\nIf I use 'ALL' I get all matches 'dob' and 'day'... but multiple times as the other fields match multiple times.\\nI started with the simpler:\\nPATTERN expr := id ',' name ',' (dob OR day '-' month '-' year) ',' address;\\n
\\nTo no avail, same behaviour observed.\\n\\nSo my question:\\nHow does one match multiple times on a component of the entire pattern, when for other components, you only want to match once?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2019-05-21 16:12:03\" },\n\t{ \"post_id\": 26843, \"topic_id\": 7113, \"forum_id\": 10, \"post_subject\": \"Re: Converting DATASET to HTML <Table>\", \"username\": \"rtaylor\", \"post_text\": \"Sure\", \"post_time\": \"2019-06-28 18:00:17\" },\n\t{ \"post_id\": 26823, \"topic_id\": 7113, \"forum_id\": 10, \"post_subject\": \"Re: Converting DATASET to HTML <Table>\", \"username\": \"Allan\", \"post_text\": \"Thanks, Richard\\n\\nCould this go into 'tips-and-tricks'?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2019-06-28 13:37:36\" },\n\t{ \"post_id\": 26813, \"topic_id\": 7113, \"forum_id\": 10, \"post_subject\": \"Re: Converting DATASET to HTML <Table>\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nHere's an example that I wrote awhile ago to generate XML, and as you can see, have just modified to create your table:SetLtr := ['A','B','C','D','E','F','G','H','I','J','K','L'];\\nds := DATASET(12,TRANSFORM({UNSIGNED1 UID,STRING1 Ltr},\\n SELF.UID := COUNTER, \\n SELF.Ltr := SetLtr[COUNTER]));\\n// ds;\\n\\nRec := {STRING xmltxt};\\n// StartXML := '<XML>'; \\n// EndXML := '</XML>';\\n// BldRow(UNSIGNED1 uid, STRING1 Ltr) := \\n// '<row><uid>' + (STRING)uid + '</uid><ltr>' + ltr + '</ltr></row>';\\nStartXML := '<table>';\\nEndXML := '</table>';\\nBldRow(UNSIGNED1 uid, STRING1 Ltr) := \\n '<tr><td>' + (STRING)uid + '</td><td>' + ltr + '</td></tr>';\\np := PROJECT(ds,\\n TRANSFORM(Rec,\\n SELF.xmltxt := BldRow(LEFT.UID,LEFT.Ltr)));\\nr := ROLLUP(p,TRUE,\\n TRANSFORM(Rec,\\n SELF.xmltxt := LEFT.xmltxt + RIGHT.xmltxt\\n\\t\\t\\t\\t\\t\\t\\t\\t ));\\nStartXML + r[1].xmltxt + EndXML;
\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-06-28 12:51:52\" },\n\t{ \"post_id\": 26803, \"topic_id\": 7113, \"forum_id\": 10, \"post_subject\": \"Converting DATASET to HTML <Table>\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI can't believe this has not been done, but I can't find anything on the forum.\\nI have a bog standard DATASET (containing all STRING fields) that I just want to convert to a HTML table construct, e.g.\\n<table><tr><td>r1 c1</td><td>r1 c2</td></tr><tr><td>r2 c1</td><td>r2 c2</td></tr></table>
\\n\\nAnyone already done this, so I'm not re-inventing the Wheel.\\n\\nThanks\\n\\nAllan\", \"post_time\": \"2019-06-28 09:51:41\" },\n\t{ \"post_id\": 28313, \"topic_id\": 7193, \"forum_id\": 10, \"post_subject\": \"Re: Parsing an ECL file\", \"username\": \"Allan\", \"post_text\": \"Janet,\\n\\nThinking on about this, you could run 'Fiddler'\\nhttps://www.telerik.com/download/fiddler\\n\\nRun it while logging in on the IDE and you will find out the HTTP request/responses the IDE is doing to attach to your particular code repository.\\n\\nYou then just have to reproduce those in your code to get the ECL file you want to parse directly without having to look-up old workunits, which, frankly, is Mad Hatter party time.\\n\\nI hope this helps.\\n\\nAllan\", \"post_time\": \"2019-11-20 17:02:02\" },\n\t{ \"post_id\": 28293, \"topic_id\": 7193, \"forum_id\": 10, \"post_subject\": \"Re: Parsing an ECL file\", \"username\": \"Allan\", \"post_text\": \"Janet,\\nIf you think about it, under the hood the IDE is just one huge set of SOAPCALLS.\\n\\nAnything you see in the IDE or can manipulate in the same, you can do programmatically and, as you want to do, implement additional functionality not shipped as standard by LN.\\nCheers\\nAllan\", \"post_time\": \"2019-11-20 16:45:28\" },\n\t{ \"post_id\": 28283, \"topic_id\": 7193, \"forum_id\": 10, \"post_subject\": \"Re: Parsing an ECL file\", \"username\": \"janet.anderson\", \"post_text\": \"Interesting. Thank you.\", \"post_time\": \"2019-11-20 16:38:57\" },\n\t{ \"post_id\": 28273, \"topic_id\": 7193, \"forum_id\": 10, \"post_subject\": \"Re: Parsing an ECL file\", \"username\": \"Allan\", \"post_text\": \"Hi Janet,\\n\\nJust noticed your post, there is an obscure way to access ECL.\\nJust use a SOAPCALL on:\\nSTD.File.GetEspUrl()/WsWorkunits/WUInfo
\\n\\nRequesting just IncludeECL
on an old workunit (its WUID is a parameter)\\nto get the ECL back in the SOAP response.\\n\\nActually trivial to implement, but a bit 'round the houses'.\\nYours\\nAllan\", \"post_time\": \"2019-11-20 16:31:14\" },\n\t{ \"post_id\": 27033, \"topic_id\": 7193, \"forum_id\": 10, \"post_subject\": \"Re: Parsing an ECL file\", \"username\": \"janet.anderson\", \"post_text\": \"Thanks, Richard.\", \"post_time\": \"2019-07-24 13:05:08\" },\n\t{ \"post_id\": 27023, \"topic_id\": 7193, \"forum_id\": 10, \"post_subject\": \"Re: Parsing an ECL file\", \"username\": \"rtaylor\", \"post_text\": \"Janet,\\n\\nThe problem is that, although the IDE knows about the file, you're going to be running that PARSE on a Thor (or hThor) so to make the file available in the cluster you have to be able to define it as a DATASET in your ECL code. \\n\\nIf you can configure your environment to use the machine where your code lives as a Landing Zone (or automate some process to put it on an LZ every time it's updated), you could then define the ECL code file as a Landing Zone file (see the "Scope and Logical Filenames => Landing Zone Files" section in the ECL Language Reference) and then you can PARSE it.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-07-23 21:44:55\" },\n\t{ \"post_id\": 27013, \"topic_id\": 7193, \"forum_id\": 10, \"post_subject\": \"Re: Parsing an ECL file\", \"username\": \"janet.anderson\", \"post_text\": \"Thanks, Richard. I am using git, so I do have the file locally. However, the IDE already knows where the file is and what it's contents are, so is there a way to avoid having to spray it and then read it as a dataset (like I would for a typical text file)? I'm trying to automate a reporting process to be a one click BWR, so is there a sleek way to do this?\", \"post_time\": \"2019-07-23 20:59:19\" },\n\t{ \"post_id\": 26993, \"topic_id\": 7193, \"forum_id\": 10, \"post_subject\": \"Re: Parsing an ECL file\", \"username\": \"rtaylor\", \"post_text\": \"Janet,\\n\\nSince an ECL code file is just a UTF8 text file, you can certainly use PARSE on it. But the real issue is getting to it.\\n\\nIt's easy if you are NOT using an old-school, pre-Open Source, central repository, because that means it's just a file on your local hard drive, and you can do anything you want with it. But, if you are using a central repository (a MySQL database to anybody not inside Lexis Nexis) then I have no idea how you can get to that,\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-07-23 20:18:30\" },\n\t{ \"post_id\": 26983, \"topic_id\": 7193, \"forum_id\": 10, \"post_subject\": \"Parsing an ECL file\", \"username\": \"janet.anderson\", \"post_text\": \"There is an ECL file in my repository that has a series of statements that assign a source description to a source code like:\\nexport src_ABC := 'AB';\\n\\nI run some stats grouped by the source codes in the ECL file, and I want to include the source description. So I would like to create a lookup table using the file (this is a living file, it changes, I shouldn't be hard coding anything). Is there a way to use PARSE on an ECL file in a repo? Is there a better way to go about this?\", \"post_time\": \"2019-07-23 19:46:01\" },\n\t{ \"post_id\": 27593, \"topic_id\": 7343, \"forum_id\": 10, \"post_subject\": \"Re: NAME BUILDINDEX or BUILD\", \"username\": \"bforeman\", \"post_text\": \"Yes, you can actually use the NAMED attribute on BUILD or BUILDINDEX similar to how you use it on OUTPUT. \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2019-09-25 15:28:39\" },\n\t{ \"post_id\": 27583, \"topic_id\": 7343, \"forum_id\": 10, \"post_subject\": \"NAME BUILDINDEX or BUILD\", \"username\": \"newportm\", \"post_text\": \"Looking over the documentation I do not see an option to name the output of BUILD or BUILDINDEX. Can named('IndexName') be used? Is there another option?\", \"post_time\": \"2019-09-25 14:20:36\" },\n\t{ \"post_id\": 28073, \"topic_id\": 7383, \"forum_id\": 10, \"post_subject\": \"Re: ESP Soap to generate Transaction ID\", \"username\": \"harshdesai\", \"post_text\": \"Tried using httpcall and its working and generating at once to be used\\nIMPORT STD, india_ecl_common.ut.Base64;\\n\\n\\n// #WORKUNIT('name','GetTransactionID_BasicAuth');\\nEXPORT GetTransactionID_BasicAuth() := FUNCTION\\nrRequest := RECORD\\n STRING DummyField := '';\\nEND;\\n\\n rTransactionId := RECORD\\n \\tSTRING TransactionID {xpath('TransactionID')};\\t\\t\\n END;\\n\\n\\nrReponse := RECORD\\nSET OF STRING TransactionIDs {xpath('TransactionIDs/TransactionID')};\\nEND;\\n\\n/* rGetTransactionIdentifierResponse := RECORD\\n \\trReponse GetTransactionIdentifierResponse {xpath('GetTransactionIdentifierResponse')};\\n END;\\n*/\\n\\nSTRING IP := '10.224.105.125';\\nSTRING Port := '7280';\\nSTRING URL := 'http://' + IP + ':' + Port + '/WsUtility/GetTransactionIdentifier/?ver_=1.81&NumberToGenerate=5';\\n// STRING URL := 'http://' + IP + ':' + Port + '/WsUtility/GetTransactionIdentifier/?ver_=1.81';\\n\\n//Input Credentials\\nSTRING Username_Val := 'ind_ins_dev';\\nSTRING Password_Val := '1nd1nsDev';\\n\\nSTRING1 _encodeSep := ':';\\n\\n//Base64 encoding\\ncredentialData := (DATA)(Username_Val + _encodeSep + Password_Val);\\nbase64BasicAccessAuth := Base64.encode_data(credentialData);\\nSTRING HTTPHEADER_VAL := 'Basic ' + (STRING) base64BasicAccessAuth;\\n\\n/* lResponse\\t:=\\tsoapcall(URL, \\n \\t\\t\\t\\t\\t\\t\\t\\t\\t\\t 'GetTransactionIdentifier',\\n \\t\\t\\t\\t\\t\\t\\t\\t\\t\\t rRequest,\\n \\t\\t\\t\\t\\t\\t\\t\\t\\t\\t rReponse,\\n \\t\\t\\t\\t\\t\\t\\t\\t\\t\\t XPATH('GetTransactionIdentifierResponse'),\\n \\t\\t\\t\\t\\t\\t\\t\\t\\t\\t HTTPHEADER('Authorization', HTTPHEADER_VAL)\\n \\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t);\\n*/\\nlResponse\\t:=\\thttpcall(URL, \\n \\t\\t\\t\\t\\t\\t\\t\\t\\t\\t 'GET',\\n \\t\\t\\t\\t\\t\\t\\t\\t\\t\\t 'text',\\n \\t\\t\\t\\t\\t\\t\\t\\t\\t\\t rReponse,\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t \\n \\t\\t\\t\\t\\t\\t\\t\\t\\t\\t //XPATH('GetTransactionIdentifierResponse'),\\n \\t\\t\\t\\t\\t\\t\\t\\t\\t\\t HTTPHEADER('Authorization', HTTPHEADER_VAL)\\n \\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t);\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\nRETURN lResponse.TransactionIDs;\\n\\nEND;\\n DS := GetTransactionID_BasicAuth();\\n DS;\", \"post_time\": \"2019-11-08 11:16:06\" },\n\t{ \"post_id\": 28063, \"topic_id\": 7383, \"forum_id\": 10, \"post_subject\": \"Re: ESP Soap to generate Transaction ID\", \"username\": \"anthony.fishbeck\", \"post_text\": \"Harsh,\\n\\nYou seem to be mixing SOAP and URL parameters. We may support this some places, but it's not usually the best way.\\n\\nTry passing NumberToGenerate as a soap parameter instead:\\n\\nrRequest := RECORD\\nINTEGER1 NumberToGenerate {xpath('NumberToGenerate')} := 5;\\nEND;\\n\\nNote that the xpath is important to make sure we preserve case in the variable name ECL is itself case insensitive.\\n\\nLet me know if it works:\\n\\nregards,\\nTony\", \"post_time\": \"2019-11-04 23:00:23\" },\n\t{ \"post_id\": 28053, \"topic_id\": 7383, \"forum_id\": 10, \"post_subject\": \"Re: ESP Soap to generate Transaction ID\", \"username\": \"harshdesai\", \"post_text\": \"EXPORT GetTransactionID_BasicAuth() := FUNCTION\\nrRequest := RECORD\\n STRING DummyField := '';\\nEND;\\n\\n\\nrReponse := RECORD\\nSET OF STRING TransactionIDs {xpath('TransactionIDs/TransactionID')};\\nEND;\\n\\n\\nSTRING URL := 'http://10.224.105.XX:XXXX/WsUtility/GetTransactionIdentifier/?ver_=1.81&NumberToGenerate=5';\\n\\n//Input Credentials\\nSTRING Username_Val := 'iXXXX_dev';\\nSTRING Password_Val := '1XXsDev';\\n\\nSTRING1 _encodeSep := ':';\\n\\n//Base64 encoding\\ncredentialData := (DATA)(Username_Val + _encodeSep + Password_Val);\\nbase64BasicAccessAuth := Base64.encode_data(credentialData);\\nSTRING HTTPHEADER_VAL := 'Basic ' + (STRING) base64BasicAccessAuth;\\n\\nlResponse\\t:=\\tsoapcall(URL, \\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t 'GetTransactionIdentifier',\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t rRequest,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t rReponse,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t XPATH('GetTransactionIdentifierResponse'),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t HTTPHEADER('Authorization', HTTPHEADER_VAL)\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t);\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\nRETURN lResponse;\\n\\nEND;\\nGetTransactionID_BasicAuth();\\n\\n\\nTried as suggested didn't work still give me only one values in output\", \"post_time\": \"2019-11-04 04:09:47\" },\n\t{ \"post_id\": 28043, \"topic_id\": 7383, \"forum_id\": 10, \"post_subject\": \"Re: ESP Soap to generate Transaction ID\", \"username\": \"anthony.fishbeck\", \"post_text\": \"I think because of the way to specify the resulting dataset \\n\\nrReponse := RECORD\\nDataset(rTransactionId) TransactionIDs {xpath('TransactionIDs')};\\nEND;\\n
\\nit is looking for:\\n\\n<GetTransactionIdentifierResponse>\\n <TransactionIDs>\\n <TransactionID>Ec5rkFZ83H3x8T6GEg66qe</TransactionID>\\n </TransactionIDs>\\n <TransactionIDs>\\n <TransactionID>Ec5rkFaBZyjjW2aY7p7Yaz</TransactionID>\\n </TransactionIDs>\\n <TransactionIDs>\\n <TransactionID>Ec5rkFbYM5GpAMgpFS1t8U</TransactionID>\\n </TransactionIDs>\\n <TransactionIDs>\\n <TransactionID>Ec5rkFcpazkzYK5zkQHccx</TransactionID>\\n </TransactionIDs>\\n <TransactionIDs>\\n <TransactionID>Ec5rkFc6Ah5hks3xsJQt7n</TransactionID>\\n </TransactionIDs>\\n</GetTransactionIdentifierResponse>
\\n\\nThe easiest thing would probably be for you to use a set of strings:\\nrReponse := RECORD\\nSET OF STRING TransactionIDs {xpath('TransactionIDs/TransactionID')};\\nEND;\\n
\", \"post_time\": \"2019-11-03 22:04:39\" },\n\t{ \"post_id\": 28033, \"topic_id\": 7383, \"forum_id\": 10, \"post_subject\": \"Re: ESP Soap to generate Transaction ID\", \"username\": \"harshdesai\", \"post_text\": \"Thanks it worked in generating 1 ESP transaction ID.\\nCurrently i am trying to fetch in bulk so can update accordingly against each record.\\n\\n// #WORKUNIT('name','GetTransactionID_BasicAuth');\\nEXPORT GetTransactionID_BasicAuth() := FUNCTION\\nrRequest := RECORD\\n STRING DummyField := '';\\nEND;\\n\\n rTransactionId := RECORD\\n \\tSTRING TransactionID {xpath('TransactionID')};\\t\\t\\n END;\\n\\n\\nrReponse := RECORD\\n\\tDataset(rTransactionId) TransactionIDs {xpath('TransactionIDs')};\\t\\t\\t\\n\\t\\t\\nEND;\\n\\nSTRING IP := '10.224.XX.XXX';\\nSTRING Port := '72XX';\\nSTRING URL := 'http://' + IP + ':' + Port + '/WsUtility/GetTransactionIdentifier/?ver_=1.81';\\n\\n//Input Credentials\\nSTRING Username_Val := 'XXXXX';\\nSTRING Password_Val := 'XXXX';\\n\\nSTRING1 _encodeSep := ':';\\n\\n//Base64 encoding\\ncredentialData := (DATA)(Username_Val + _encodeSep + Password_Val);\\nbase64BasicAccessAuth := Base64.encode_data(credentialData);\\nSTRING HTTPHEADER_VAL := 'Basic ' + (STRING) base64BasicAccessAuth;\\n\\nlResponse\\t:=\\tsoapcall(URL, \\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t 'GetTransactionIdentifier',\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t rRequest,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t rReponse,\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t XPATH('GetTransactionIdentifierResponse'),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t HTTPHEADER('Authorization', HTTPHEADER_VAL)\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t);\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\nRETURN lResponse.TransactionIDs;\\n\\n\\n\\n\\ncopy_Record :=\\n record\\n string50 product_transaction_id;\\n string policy_sequence_id;\\n string25 application_number;\\n string25 policy_number;\\n string plan_type;\\n string policy_term;\\n string current_sum_assured;\\n string date_added;\\n string60 user_added;\\n end;\\n \\n copy_Dataset :=\\n dataset([\\n {'', '1', 'OS00205370 ', '18491348 ', 'EN', '15', '140000.00', '2019-10-30 05:58:22', 'esprili_batch_prod '}, \\n {'', '1', 'LL41908432 ', '15448125 ', 'EN', '10', '115000.00', '2019-10-30 05:58:14', 'esprili_batch_prod '}, \\n], copy_Record);\\n copy_Dataset;\\n \\n \\tDS_transaction_log_request_policy := project(copy_Dataset,transform(copy_Record,\\n \\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tself.product_transaction_id := GetTransactionID_BasicAuth() ;\\n \\t self.policy_sequence_id := Left.policy_sequence_id;\\n \\t self.application_number := Left.application_number;\\n \\t self.policy_number := Left.policy_number;\\n \\t self.policy_term := Left.policy_term;\\n \\t self.current_sum_assured := Left.current_sum_assured;\\n \\t self.user_added := Left.user_added;\\n \\t self.plan_type := Left.plan_type;\\n \\t self.date_added := Left.date_added;\\n \\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t ));\\n DS_transaction_log_request_policy;\\n\\nWhen i try to call in the below transform i get only one esp id .. Ideally since its in transform it should make 10 soap calls,but not able to fetch all 10 ,Even in Graph it says only one SOAP is made which is correct as its fetching in Bulk but i am not able to read the same .\\nEven tried using (http://10.224.XX.XXX:XXXX/WsUtility/Get ... Generate=5) this generates 5 transaction id at once but how can i fetch in dataset or accept this values .\\n\\nEG :- Below is when URL is directly called from browser.\\nhttp://10.224.XX.XXX:XXXX/WsUtility/Get ... oGenerate={1}\\n\\n<GetTransactionIdentifierResponse>\\n<TransactionIDs>\\n<TransactionID>Ec4vHrw7LF6h7Ry5MmB2SL</TransactionID>\\n</TransactionIDs>\\n</GetTransactionIdentifierResponse>\\n\\nhttp://10.224.XX.XXX:XXXX/WsUtility/Get ... Generate=5\\n\\n<GetTransactionIdentifierResponse>\\n<TransactionIDs>\\n<TransactionID>Ec5rkFZ83H3x8T6GEg66qe</TransactionID>\\n<TransactionID>Ec5rkFaBZyjjW2aY7p7Yaz</TransactionID>\\n<TransactionID>Ec5rkFbYM5GpAMgpFS1t8U</TransactionID>\\n<TransactionID>Ec5rkFcpazkzYK5zkQHccx</TransactionID>\\n<TransactionID>Ec5rkFc6Ah5hks3xsJQt7n</TransactionID>\\n</TransactionIDs>\\n</GetTransactionIdentifierResponse>\\n\\nHow can i use this .\\n\\nRegards\\nHarsh\", \"post_time\": \"2019-11-02 08:01:44\" },\n\t{ \"post_id\": 27743, \"topic_id\": 7383, \"forum_id\": 10, \"post_subject\": \"Re: ESP Soap to generate Transaction ID\", \"username\": \"anthony.fishbeck\", \"post_text\": \"There are two ways you can pass credentials through SOAPCALL.\\n\\n1. Insert the credentials into the URL string. Gather the credentials from wherever they are stored and add them to the url in the following format:\\n\\nhttps://username:password@address:port/whatever\\n\\n2. Add the HTTP authentication header yourself:\\n\\n MY_USERNAME := TRIM(username, ALL);\\n MY_USER_PW := TRIM(userPW, LEFT, RIGHT);\\n\\n [color=#FF0000:1u20dcdt]ENCODED_BASIC_CREDENTIALS := IF (\\n MY_USERNAME != '',\\n 'Basic ' + Std.Str.EncodeBase64((DATA)(MY_USERNAME + ':' + MY_USER_PW)),\\n ''\\n );\\n\\n soapResponse := SOAPCALL(ds, ip, svc, inRecord, t(LEFT),DATASET(outRecord), ONFAIL(SKIP), [color=#FF0000:1u20dcdt]HTTPHEADER('Authorization',[color=#FF0000:1u20dcdt]ENCODED_BASIC_CREDENTIALS));\", \"post_time\": \"2019-10-09 13:54:18\" },\n\t{ \"post_id\": 27733, \"topic_id\": 7383, \"forum_id\": 10, \"post_subject\": \"ESP Soap to generate Transaction ID\", \"username\": \"harshdesai\", \"post_text\": \"Hi,\\ncan you please help to understand how can to make call to ESP via Thor/Roxie to fetch data as ESP user will user name password associated ,Couldn't find soap call with such example where it takes this as params.\\n\\nURL :- http://10.224.105.125:7280/WsUtility/Ge ... oductCode={0}&NumberToGenerate={1}\\nEspUserName:- XXXXX\\nEspPassword:- XXXXX\\nHow to can i fetch TransactionID same in soap call respective \\n<GetTransactionIdentifierResponse><TransactionIDs><TransactionID>XXXPPPAAAYV</TransactionID></TransactionIDs></GetTransactionIdentifierResponse>\", \"post_time\": \"2019-10-09 04:39:00\" },\n\t{ \"post_id\": 27933, \"topic_id\": 7413, \"forum_id\": 10, \"post_subject\": \"Re: Reading Excel (*.xls) formatted files into THOR.\", \"username\": \"Allan\", \"post_text\": \"Thanks Richard,\\n\\nWorth an ask.\", \"post_time\": \"2019-10-22 09:54:13\" },\n\t{ \"post_id\": 27913, \"topic_id\": 7413, \"forum_id\": 10, \"post_subject\": \"Re: Reading Excel (*.xls) formatted files into THOR.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nNot that I have ever heard of.\\n\\nRichard\", \"post_time\": \"2019-10-21 16:31:52\" },\n\t{ \"post_id\": 27893, \"topic_id\": 7413, \"forum_id\": 10, \"post_subject\": \"Reading Excel (*.xls) formatted files into THOR.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nOne can always export a Excel spreadsheet as a CSV format and spray/read these files into THOR.\\nIs there a plugin / something that allows xls formatted files to be imported into THOR without the intermediate conversion to CSV format?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2019-10-21 16:18:45\" },\n\t{ \"post_id\": 28243, \"topic_id\": 7483, \"forum_id\": 10, \"post_subject\": \"Re: Conditional issue with MAP & IF\", \"username\": \"harshdesai\", \"post_text\": \"Hi \\nhttps://track.hpccsystems.com/browse/HPCC-23147 \\nZap Reports attached\", \"post_time\": \"2019-11-20 05:20:47\" },\n\t{ \"post_id\": 28193, \"topic_id\": 7483, \"forum_id\": 10, \"post_subject\": \"Re: Conditional issue with MAP & IF\", \"username\": \"ghalliday\", \"post_text\": \"Please can you open a jira.\\n\\nI am likely to need an archive of the query, and some details of which soapcall is being called incorrectly to be able to provide any help.\", \"post_time\": \"2019-11-19 12:30:16\" },\n\t{ \"post_id\": 28173, \"topic_id\": 7483, \"forum_id\": 10, \"post_subject\": \"Conditional issue with MAP & IF\", \"username\": \"harshdesai\", \"post_text\": \"Hi ALL,\\nFacing issue with Map and If/Iff it always call SOAP Function irrespective it falls in condition or not.I have to update DOPS(Params) on basis of Env which i pass but it initiates soapcall always so ideally soap calls fails with connection error, Tried with NoFold too it .\\n\\nSOAP call should run only for PROD env but MAP and IF not giving enough levarage to use it .\\n\\n\\nIMPORT india_ecl_common.dops;\\ndemoFunction := MODULE\\n\\tBoolean IsEnvProd := india_ecl_common.LoadConfig.CI.Constants.isPROD;;\\n\\nupdateBuildVersionKeys := Dops.UpdateVersion('LifePolicyKeys', trim('20191117',left,right), 'DataEngineering.India@lexisnexis.com,indiaroxiepackageteam@lexisnexis.com','Y','R',,'N',,,,, IsEnvProd);\\n updateBuildVersionKeys := NOFOLD(MAP(~IsEnvProd => \\tOUTPUT('Non-prod environment'),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tIsEnvProd => \\tDops.UpdateVersion('LifePolicyKeys', trim('20191117',left,right), 'DataEngineering.India@lexisnexis.com,indiaroxiepackageteam@lexisnexis.com','Y','R',,'N',,,,, IsEnvProd),\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\tOUTPUT('Undefined environment')));\\n\\n // action1 := MAP(IsEnvProd = TRUE => updateBuildVersionKeys, \\n\\t // IsEnvProd = FALSE => OUTPUT('Non-prod environment'), OUTPUT('Undefined environment'));\\n\\t\\n\\t// EXPORT action := MAP(IsEnvProd = TRUE => updateBuildVersionKeys, IsEnvProd = FALSE => OUTPUT('Non-prod environment'), OUTPUT('Undefined environment'));\\n\\t// EXPORT action := nofold(iff(IsEnvProd, updateBuildVersionKeys));\\n\\tEXPORT action := updateBuildVersionKeys;\\nEND;\\n\\ndemoFunction.action;\", \"post_time\": \"2019-11-19 06:28:29\" },\n\t{ \"post_id\": 28333, \"topic_id\": 7493, \"forum_id\": 10, \"post_subject\": \"Re: Equivalent of lib_system.smtpserver in the standard libr\", \"username\": \"Allan\", \"post_text\": \"Will do Richard.\\n\\nAs an aside, I've downloaded the source of the STD from the HPCC community GitHub, and said items are not exposed.\\n\\nCheers\\n\\nAllan\", \"post_time\": \"2019-11-22 07:35:55\" },\n\t{ \"post_id\": 28323, \"topic_id\": 7493, \"forum_id\": 10, \"post_subject\": \"Re: Equivalent of lib_system.smtpserver in the standard libr\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nSubmit a JIRA ticket for that and I'll bet you a pint it gets implemented. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-11-21 13:00:43\" },\n\t{ \"post_id\": 28263, \"topic_id\": 7493, \"forum_id\": 10, \"post_subject\": \"Equivalent of lib_system.smtpserver in the standard library?\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nThe lib_system has exposed:\\n
lib_system.smtpserver\\n lib_system.smtpport\\n lib_system.emailAddress\\n
\\n\\nBut I can't find the same item exposed via the STD.\\nIt seems strange to be able to call:\\nSTD.System.Email.SendEmail
\\nBut then have to supply lib_system
items as parameters.\\nYours\\nAllan\", \"post_time\": \"2019-11-20 16:19:39\" },\n\t{ \"post_id\": 28613, \"topic_id\": 7503, \"forum_id\": 10, \"post_subject\": \"Re: Using REGEX in reverse?\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nI emailed you a .mod file containing my test data generator code. I built a pretty generic system where the only requirements were to be able to generate any number records with any number of fields of any simple data type, with or without a standard distribution curve for the generated data (Gaussian, random, Pareto, etc.). An additional challenge was to make the "random" data duplicable.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-12-31 04:47:22\" },\n\t{ \"post_id\": 28603, \"topic_id\": 7503, \"forum_id\": 10, \"post_subject\": \"Re: Using REGEX in reverse?\", \"username\": \"Allan\", \"post_text\": \"Richard,\\n\\nYour comment:\\nI did write a pretty complex test data generator a while back that I could let you have a look\\n\\nThis is precisely the job assigned to me!\\nI never like re-inventing the wheel, unfortunately I've gone a fare way down my implementation route, (which also has to service as a validator of incoming test data).\\n\\nI'll ping you the proposal I'm working to. (needs updating ha ha).\\n\\nCheers\\n\\nAllan\", \"post_time\": \"2019-12-30 09:11:36\" },\n\t{ \"post_id\": 28593, \"topic_id\": 7503, \"forum_id\": 10, \"post_subject\": \"Re: Using REGEX in reverse?\", \"username\": \"Allan\", \"post_text\": \"No, it's ok Richard. As I said it's a wacky idea, but intriguing.\\nIf one could do it, it would make generating test data SSSSSOOOOO much easier.\\n\\nCheers\\n\\nAllan\", \"post_time\": \"2019-12-30 09:05:17\" },\n\t{ \"post_id\": 28543, \"topic_id\": 7503, \"forum_id\": 10, \"post_subject\": \"Re: Using REGEX in reverse?\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nNo, I've never heard of any code that generates test data from RegEx.\\n\\nI did write a pretty complex test data generator a while back that I could let you have a look at to see if it might meet your needs. PM me if you're interested.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-12-23 07:05:17\" },\n\t{ \"post_id\": 28343, \"topic_id\": 7503, \"forum_id\": 10, \"post_subject\": \"Using REGEX in reverse?\", \"username\": \"Allan\", \"post_text\": \"Hi Community,\\n\\nHas anyone written a 'reverse' Regular expression generator?\\n\\nNormal REGEX takes a candidate textual string and checks that is conforms to some expression.\\n\\nHas anyone generated test data by supplying a regular expression and returning some text that conforms to said input regular expression?\\n\\nMay have to return a DATASET of text as with ORs (|) there are multiple paths through the regular expression.\\n\\nWeird question perhaps, well I'll just put it out there.\\n\\nYours\\nAllan\", \"post_time\": \"2019-11-28 09:10:33\" },\n\t{ \"post_id\": 28363, \"topic_id\": 7513, \"forum_id\": 10, \"post_subject\": \"Re: Constructing records from combinations of text fragments\", \"username\": \"Allan\", \"post_text\": \"An elegant solution from Tony Kirk,\\nOne has to remember that the next iteration to LOOP holds what’s constructed in the previous iteration, |Tony's also incremented the field number it's joining on as you go along.\\nrIn :=\\nrecord\\n unsigned2 FieldPos;\\n string TextValue;\\nend;\\ndIn := dataset([{1, 'Allan'},\\n {1, 'Nina'},\\n {2, '5'},\\n {2, '6'},\\n {2, '7'},\\n {3, 'ABC'},\\n {3, 'DEF'}, // Added these\\n {4, '1A'},\\n {4, '2B'}\\n ], rIn\\n );\\n\\n//----------------------------------------------\\nrIn tJoin(rIn pLeft, rIn pRight) :=\\ntransform\\n self.FieldPos := pRight.FieldPos; // Sets up for next JOIN\\n self.TextValue := trim(pLeft.TextValue) + if(pRight.FieldPos <> 0, ',' + trim(pRight.TextValue), ''); // both TRIMs probably redundant\\nend;\\nfJoin(dataset(rIn) pStartDataset, unsigned2 pLeftFieldPos) := join(pStartDataset, dIn,\\n left.FieldPos = pLeftFieldPos and right.FieldPos = left.FieldPos + 1,\\n tJoin(left, right)\\n );\\n\\n// REQUIRES FieldPos values start with 1 and have no gaps (didn't test what happens if 1 only). Otherwise, \\n// it would require prep to align low value with 1 and each subsequent iterated value one higher and use *that* MAX.\\nlLoopCount := max(dIn, FieldPos) - 1;\\ndLoop := loop(dIn, lLoopCount, fJoin(rows(left), counter)); // Form 1 of LOOP\\n//----------------------------------------------\\n\\noutput(dLoop);\\n
\\n\\nNice one Tony\", \"post_time\": \"2019-11-29 15:08:05\" },\n\t{ \"post_id\": 28353, \"topic_id\": 7513, \"forum_id\": 10, \"post_subject\": \"Constructing records from combinations of text fragments.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI expect this is posted else where as this seems a straightforward question, but given a variable number of text fragments (with a known final position in the record) how does one generate all records for every combination?\\nExample\\n\\nField Position Text\\n1 Allan\\n1 Nina\\n2 5\\n2 6\\n2 7\\n3 ABC\\n
\\n\\nProduces 6 records:\\n\\n\\nAllan,5,ABC\\nAllan,6,ABC\\nAllan,7,ABC\\nNina,5,ABC\\nNina,6,ABC\\nNina,7,ABC\\n
\\nThe number of fields is also variable\\n\\nIn languages that allow recursion this is straightforward, but in ECL one can't reference a function from within itself. I've investigated LOOP but drawn a blank.\\n(Hum something like LOOP where the input record structure holds both the input child datasets AND the resultant string, slowly adding to the output and removing records from the child datasets until all child datasets are all empty????) \\n\\nThanks in advance\\n\\nAllan\", \"post_time\": \"2019-11-28 13:37:13\" },\n\t{ \"post_id\": 28383, \"topic_id\": 7523, \"forum_id\": 10, \"post_subject\": \"Re: Roxie Batch Calls\", \"username\": \"abaruchi\", \"post_text\": \"Updating:\\n\\nI found in documentation (pg 65 and 66 of "ECL Programmers Guide") how to do it using SOAPCALL. Basically, you create a dataset with several "search criteria" and submit to the roxie and you receive a dataset as response with the answer for each dataset used in the request.\\n\\nI think it is the way to do it, however, I need this to work as an API "call" (external system, not Thor, consuming this API and sending in a single XML several search requests). Still figuring out on how to do this part.\\n\\nThanks,\\n\\n--Artur\", \"post_time\": \"2019-12-06 21:23:49\" },\n\t{ \"post_id\": 28373, \"topic_id\": 7523, \"forum_id\": 10, \"post_subject\": \"Roxie Batch Calls\", \"username\": \"abaruchi\", \"post_text\": \"Hi,\\n\\nI would like to know if it is possible to perform a call to a Roxie Query, but instead of performing a single call, I would like to run a call with several entries and get the response of all them.\\nFor example, suppose I have a roxie that returns to me the street name. The input would be the zip code. Suppose I have a list with 5 thousand zipcodes. How could I call this roxie to return this list on single input and not doing it sequentially?\\n\\nThanks in advance.\\n\\nAtt.\\nArtur Baruchi\", \"post_time\": \"2019-12-04 21:03:39\" },\n\t{ \"post_id\": 28673, \"topic_id\": 7583, \"forum_id\": 10, \"post_subject\": \"Re: Request help parsing text.\", \"username\": \"Allan\", \"post_text\": \"This does the job:\\nds := DATASET([\\n {1,'item=Allan and Anna'},\\n {2,'item=\\\\'Allan and Anna\\\\''},\\n {3,'item="Nina Colin"'},\\n {4,'item=R\\\\'allan and anna\\\\''},\\n {5,'item=R"bill and Megan"'}],\\n{unsigned1 UID, string line});\\n\\nPATTERN alpha := PATTERN('[A-Za-z ]')+;\\nPATTERN qChar := ['\\\\'','"'];\\nPATTERN qStr := PATTERN('R');\\nPATTERN quote1 := OPT(qStr) qChar;\\nPATTERN start := 'item=';\\nRULE quoterule := start OPT(quote1) alpha OPT(qChar);\\n\\nRec := {unsigned1 UID, BOOLEAN quoted,BOOLEAN Qualified,STRING Txt};\\nRec XF(ds L) := TRANSFORM\\n SELF.UID := L.UID;\\n SELF.quoted := MATCHED(quote1);\\n SELF.Qualified := MATCHED(qStr);\\n SELF.Txt := MATCHTEXT(alpha);\\nEND;\\n\\nPARSE(ds,line,quoterule,XF(LEFT),FIRST);\\n
\", \"post_time\": \"2020-01-03 12:22:58\" },\n\t{ \"post_id\": 28663, \"topic_id\": 7583, \"forum_id\": 10, \"post_subject\": \"Re: Request help parsing text.\", \"username\": \"Allan\", \"post_text\": \"Thanks Richard,\\n\\nNice example, I'll have to attempt to adjust to distinguish between an R' quoted string and an ' quoted string. (as the 'R' is OPT) Your example does not make the distinction.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2020-01-03 12:14:52\" },\n\t{ \"post_id\": 28623, \"topic_id\": 7583, \"forum_id\": 10, \"post_subject\": \"Re: Request help parsing text.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nOK, purely as an academic exercise ( ), here's how I would approach it:
ds := DATASET([\\n {1,'item=Allan and Anna'},\\n {2,'item=\\\\'Allan and Anna\\\\''},\\n {3,'item="Nina Colin"'},\\n {4,'item=R\\\\'allan and anna\\\\''},\\n {5,'item=R"bill and Megan"'}],\\n {unsigned1 UID, string line});\\n\\nPATTERN alpha := PATTERN('[A-Za-z ]')+;\\nPATTERN qChar := ['\\\\'','"'];\\nPATTERN quote1 := OPT('R') qChar;\\nPATTERN start := 'item=';\\nRULE quoterule := start OPT(quote1) alpha OPT(qChar);\\n\\nRec := {unsigned1 UID, BOOLEAN quoted,STRING Txt};\\nRec XF(ds L) := TRANSFORM\\n SELF.UID := L.UID;\\n SELF.quoted := MATCHED(quote1);\\n SELF.Txt := MATCHTEXT(alpha);\\nEND;\\n\\nPARSE(ds,line,quoterule,XF(LEFT),FIRST);\\n
\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-12-31 08:00:56\" },\n\t{ \"post_id\": 28583, \"topic_id\": 7583, \"forum_id\": 10, \"post_subject\": \"Re: Request help parsing text.\", \"username\": \"Allan\", \"post_text\": \"Happy new year Richard!\\n\\nThe application I'm writing has to generate text with, or without the inputs enclosing quotes (if there are any). It's not as simple as just defining 'always output quotes if a string is quoted'. I need a BOOLEAN attached to a string to indicate 'enclose in quotes'.\\nSo if my input is:\\n\\nAn unquoted string\\n'A quoted, string'\\nR'A quoted, string'\\n
\\nThe output is:\\n\\nQuoted Text\\nFALSE An unquoted string\\nTRUE A quoted, string\\nFALSE A quoted, string\\n
\\n\\nActually this is now just an academic exercise as I've now implemented a completely different regime, but I would still be interested in understanding a parsing solution to this example.\\n\\nMany thanks\\nAllan\", \"post_time\": \"2019-12-30 09:00:52\" },\n\t{ \"post_id\": 28573, \"topic_id\": 7583, \"forum_id\": 10, \"post_subject\": \"Re: Request help parsing text.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nOK, I just looked up what a Python R string is, but I'm not quite understanding what your problem is. Can you show me example input text and the result you'd like to produce from that input, please?\\n\\nRichard\", \"post_time\": \"2019-12-23 08:31:33\" },\n\t{ \"post_id\": 28533, \"topic_id\": 7583, \"forum_id\": 10, \"post_subject\": \"Request help parsing text.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI've hit the buffers on my understanding of parsing text.\\nI have text of the forms:\\n\\nitem=Allan and Anna\\nitem='Allan and Anna'\\nitem="Nina Colin"\\nitem=R'allan and anna'\\nitem=R"bill and Megan"\\n
\\nMuch like 'Python' I want to have an 'R' qualifier tied to strings.\\nhowever the 'R' is matching the normal unquoted string.\\nI've read up about 'pattern1' NOT IN 'pattern2' but after many experiments failed to \\nmatch R'text' to a single Pattern.\\n\\nErr please help?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2019-12-20 11:40:16\" },\n\t{ \"post_id\": 29011, \"topic_id\": 7633, \"forum_id\": 10, \"post_subject\": \"Re: Reading a UTF-16 file\", \"username\": \"SChatman85\", \"post_text\": \"Thanks Both - have raised: https://track.hpccsystems.com/browse/HPCC-23282\", \"post_time\": \"2020-01-10 14:16:39\" },\n\t{ \"post_id\": 29001, \"topic_id\": 7633, \"forum_id\": 10, \"post_subject\": \"Re: Reading a UTF-16 file\", \"username\": \"bforeman\", \"post_text\": \"Hi Stewart,\\n\\nI guess I am confusing locale with encoding. I was thinking this was what you needed to do:\\n\\ninput_lay := RECORD,LOCALE('utf16le')
\\n\\nIf that's not correct then I agree with Richard and a JIRA issue should be opened.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2020-01-10 14:04:15\" },\n\t{ \"post_id\": 28951, \"topic_id\": 7633, \"forum_id\": 10, \"post_subject\": \"Re: Reading a UTF-16 file\", \"username\": \"SChatman85\", \"post_text\": \"Hi Bob,\\n\\nI tried adding the following:\\n\\ninput_lay := RECORD, [b]LOCALE('en')[/b]
\\n\\nThis was tried with the CSV definition with defaults, and with UNICODE - both produced the same.\\n\\nThe layout itself contained one field as STRING, one as UTF8 and one as UNICODE - none of them were parsed as expected.\\n\\nThe file was sprayed with the following encoding setting:\\n\\n,encoding := 'utf16le'
\", \"post_time\": \"2020-01-10 09:09:33\" },\n\t{ \"post_id\": 28921, \"topic_id\": 7633, \"forum_id\": 10, \"post_subject\": \"Re: Reading UTF-32 file\", \"username\": \"rtaylor\", \"post_text\": \"Stewart,\\n\\nOK, confirmed -- 2-byte UNICODE characters is what it looks like.\\n\\nSo now it's time to submit a JIRA and get the developers involved. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-01-09 17:55:55\" },\n\t{ \"post_id\": 28911, \"topic_id\": 7633, \"forum_id\": 10, \"post_subject\": \"Re: Reading UTF-32 file\", \"username\": \"bforeman\", \"post_text\": \"Stewart,\\nDid you try setting the LOCALE in the RECORD statement? What format did you use to spray it? UTF-16, UTF-32? Perhaps it's a matter of translation.\\n\\nBob\", \"post_time\": \"2020-01-09 17:54:26\" },\n\t{ \"post_id\": 28901, \"topic_id\": 7633, \"forum_id\": 10, \"post_subject\": \"Re: Reading UTF-32 file\", \"username\": \"SChatman85\", \"post_text\": \"Hi Richard,\\n\\nHere's a hexdump, hope this format is ok?\\n\\n
\\n00000000 ff fe 22 00 44 00 41 00 54 00 45 00 5f 00 53 00 |..".D.A.T.E._.S.|\\n00000010 54 00 41 00 52 00 54 00 22 00 2c 00 22 00 44 00 |T.A.R.T.".,.".D.|\\n00000020 41 00 54 00 45 00 5f 00 45 00 4e 00 44 00 22 00 |A.T.E._.E.N.D.".|\\n
\\n\\nI believe I tried to define as UNICODE before I tried UTF8 on Bobs suggestion, based on what ECL Watch was showing in the record definition, as it renders there fine.\", \"post_time\": \"2020-01-09 17:02:56\" },\n\t{ \"post_id\": 28871, \"topic_id\": 7633, \"forum_id\": 10, \"post_subject\": \"Re: Reading UTF-32 file\", \"username\": \"rtaylor\", \"post_text\": \"Stewart,Octal dump of raw file:\\n\\n0003760 " \\\\0 I \\\\0 n \\\\0 s \\\\0 t \\\\0 a \\\\0 l \\\\0 m \\\\0\\n0004000 e \\\\0 n \\\\0 t
Could you post a Hex Dump of this data? Your Octal dump looks to me like it's showing 2 bytes per character and each leading byte is a Hex 00. If that's the case, then instead of defining the fields with UTF8 I'd suggest you try using UNICODE.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-01-09 14:59:14\" },\n\t{ \"post_id\": 28861, \"topic_id\": 7633, \"forum_id\": 10, \"post_subject\": \"Re: Reading UTF-32 file\", \"username\": \"bforeman\", \"post_text\": \"Hi Stewart,\\n\\nIn the DATASET, the SEPARATOR and TERMINATOR options are probably not needed since you are using the Delimited spray defaults.\\n\\nTry adding a locale to the UTF8 field in the RECORD (or specify the proper Locale in the RECORD statement itself. According to the docs:\\n\\nThe optional locale specifies a valid unicode locale code, as specified in ISO standards 639 and 3166 (not needed if LOCALE is specified on the RECORD structure containing the field definition).\\n
\\n\\nIf you are still having trouble reading the file, I would suggest submitting a JIRA with all of the details and perhaps some sample data if possible. If you are reading the data in the ECL Watch properly, but not in the ECL IDE, there could be an issue there. I would also try the ECL command line and see what your result looks like in the console.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2020-01-09 14:16:56\" },\n\t{ \"post_id\": 28851, \"topic_id\": 7633, \"forum_id\": 10, \"post_subject\": \"Re: Reading UTF-32 file\", \"username\": \"SChatman85\", \"post_text\": \"Hi Bob,\\n\\n1. Our Production cluster is currently running on 6.4.38.\\n2. The definition is showing in ECL watch as:\\n\\nRECORD\\n UTF8 field1;\\n UTF8 field2;\\n ...
\\n\\nI tried using this as my layout in the DATASET definition, but made no difference.\\n\\n\\nHi Richard.\\n\\nStarting with the DATASET definition I have been trying with the following variations:\\n\\nDATASET( 'logical_filename',input_lay, CSV( HEADING(1),SEPARATOR(','), TERMINATOR(['\\\\n', '\\\\r\\\\n']), MAXLENGTH(40000)))
\\nDATASET( 'logical_filename',input_lay, CSV( HEADING(1),SEPARATOR(','), TERMINATOR(['\\\\n', '\\\\r\\\\n']), UNICODE, MAXLENGTH(40000)))
\\n\\nI have now tried the 3 following Layout definitions:\\n\\ninput_lay := RECORD\\n UTF8 field1;\\n UTF8 field2;\\n UTF8 field3;\\n ...\\nEND;
\\n\\ninput_lay := RECORD\\n STRING field1;\\n STRING field2;\\n STRING field3;\\n ...\\nEND;
\\n\\ninput_lay := RECORD\\n UNICODE field1;\\n UNICODE field2;\\n UNICODE field3;\\n ...\\nEND;
\", \"post_time\": \"2020-01-09 12:00:36\" },\n\t{ \"post_id\": 28823, \"topic_id\": 7633, \"forum_id\": 10, \"post_subject\": \"Re: Reading UTF-32 file\", \"username\": \"rtaylor\", \"post_text\": \"SChatman85,ECL IDE output of the dataset:\\n\\n"Instalment\\n\\nExpected output: Instalment
Can you show us your ECL definition of the RECORD structure and DATASET declaration that produced this result, please?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-01-08 18:16:31\" },\n\t{ \"post_id\": 28813, \"topic_id\": 7633, \"forum_id\": 10, \"post_subject\": \"Re: Reading UTF-32 file\", \"username\": \"bforeman\", \"post_text\": \"I added the encoding value as mentioned above, so the data looks fine when viewing the Sprayed file in ECL Watch - but if I put it into a dataset definition, and output it, then I get extra characters which I am assuming is the extra byte.
\\n\\nWhat is the version of the HPCC cluster you are using? Can you look at the ECL tab of the sprayed file and see the RECORD structure generated? If you use that RECORD with your DATASET how does the OUTPUT look? \\n\\nBob\", \"post_time\": \"2020-01-08 17:19:38\" },\n\t{ \"post_id\": 28803, \"topic_id\": 7633, \"forum_id\": 10, \"post_subject\": \"Re: Reading UTF-32 file\", \"username\": \"SChatman85\", \"post_text\": \"Hi Bob,\\n\\nThanks for the reply. I'm just looking to read a file which is provided in the format of:\\n\\nLittle-endian UTF-16 Unicode text, with very long lines, with CRLF, CR line terminators\\n\\nI added the encoding value as mentioned above, so the data looks fine when viewing the Sprayed file in ECL Watch - but if I put it into a dataset definition, and output it, then I get extra characters which I am assuming is the extra byte.\\n\\nOctal dump of raw file:\\n\\n0003760 " \\\\0 I \\\\0 n \\\\0 s \\\\0 t \\\\0 a \\\\0 l \\\\0 m \\\\0\\n0004000 e \\\\0 n \\\\0 t \\n\\nECL IDE output of the dataset:\\n\\n"Instalment\\n\\nExpected output: Instalment\\n\\nHope that makes sense.\", \"post_time\": \"2020-01-08 17:11:34\" },\n\t{ \"post_id\": 28793, \"topic_id\": 7633, \"forum_id\": 10, \"post_subject\": \"Re: Reading UTF-32 file\", \"username\": \"bforeman\", \"post_text\": \"Hi Stewart,\\n\\nIn the Language Reference Manual, there is support for UTF-16 using the UNICODE field value type in the RECORD structure. The ECL Watch allows delimited spraying using a variety of UTF options. There is also support for converting to/from UNICODE formats using the FROMUNICODE and TOUNICODE functions. \\n\\nSpecifically what are you trying to do?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2020-01-08 17:06:20\" },\n\t{ \"post_id\": 28773, \"topic_id\": 7633, \"forum_id\": 10, \"post_subject\": \"Reading a UTF-16 file\", \"username\": \"SChatman85\", \"post_text\": \"Hi all,\\n\\nI saw a thread from 2011 where it stated:\\n\\n\\nThe system doesn't currently support direct reading of utf16be/le, utf32 files. However the file spray does allow you to convert to/from utf16 to utf8.\\n\\nAnd please feel free to submit a feature request for directly reading utf16
\\n\\nDoes anyone know if this is still the case - of no support of UTF16?\\n\\nI have used encoding := 'utf16le' as part of my call to Fileservices.SprayVariable (seems our version doesn't support this in STD.File.SprayDelimited - even though the docs in that environment show it.)\\n\\nWhen I try and then read it from a DATASET definition the data is not clean, trying several options:\\n\\n\\nDATASET('logical_file_name', layout, CSV(HEADING(1),SEPARATOR(','))\\nDATASET('logical_file_name', layout, CSV(HEADING(1),SEPARATOR(','), UNICODE)\\nDATASET('logical_file_name', layout, CSV(HEADING(1),SEPARATOR(','), UNICODE16)\\n
\", \"post_time\": \"2020-01-08 16:00:58\" },\n\t{ \"post_id\": 28941, \"topic_id\": 7651, \"forum_id\": 10, \"post_subject\": \"Re: High counts of x id sharing same y\", \"username\": \"rtaylor\", \"post_text\": \"mauricexxvi,\\n\\nA simple crosstab report should handle that for you. \\n\\nThese are covered in our free Introduction to ECL (Part 2) online eLearning course (https://learn.lexisnexis.com/Home/Catalog?track=HPCC%20Systems).\\n\\nThey are also discussed in depth in the Programmer's Guide (available here: https://hpccsystems.com/training/documentation/learning-ecl) article titled Cross-Tab Reports.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-01-09 18:10:54\" },\n\t{ \"post_id\": 28891, \"topic_id\": 7651, \"forum_id\": 10, \"post_subject\": \"High counts of x id sharing same y\", \"username\": \"mo0926\", \"post_text\": \"Hello everyone!\\n\\nHow can I find high counts of addresses sharing same id and then count them?\\n\\nBest\", \"post_time\": \"2020-01-09 16:55:42\" },\n\t{ \"post_id\": 29583, \"topic_id\": 7683, \"forum_id\": 10, \"post_subject\": \"Re: Using a RunTime variable in a PATTERN\", \"username\": \"Allan\", \"post_text\": \"The core team pointed out that all this could most probably be accomplished with the built-in:\\n\\nREGEXFINDSET
\\n\\nI've not used it before, glad its been brought to my attention.\\n\\nAllan\", \"post_time\": \"2020-02-17 17:39:02\" },\n\t{ \"post_id\": 29553, \"topic_id\": 7683, \"forum_id\": 10, \"post_subject\": \"Re: Using a RunTime variable in a PATTERN\", \"username\": \"Allan\", \"post_text\": \"Raised 'Suggestion':\\nhttps://track.hpccsystems.com/browse/TS-23\\nwith the Core Team, to perhaps get a more advanced STD.Str.SplitWords\", \"post_time\": \"2020-02-14 07:43:04\" },\n\t{ \"post_id\": 29523, \"topic_id\": 7683, \"forum_id\": 10, \"post_subject\": \"Re: Using a RunTime variable in a PATTERN\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nMuch better solution than I was considering. \\n\\nRichard\", \"post_time\": \"2020-02-13 18:58:35\" },\n\t{ \"post_id\": 29513, \"topic_id\": 7683, \"forum_id\": 10, \"post_subject\": \"Re: Using a RunTime variable in a PATTERN\", \"username\": \"Allan\", \"post_text\": \"ok,\\nThis copes with, recognises and retains, empty fields at the end of records.\\nI appends an 'X' to the input stream to make sure the ITERATE gets run one further iteration, the 'X' itself is not used.\\nThen the test:\\n
LastItem := R.itm = COUNT(sd);\\n
\\nis still valid, but this time can put the new token EOT through the finite state machine.\\nresulting in a natural flow to the handling of the EOT.\\nInd := 'Allan,and anna,,,a,,"it Don\\\\'t mean, a thing, if it ain\\\\'t got that swing.",\\\\'an Inner "Double,Quote",'\\n +' with "another" hello, and "Another with, embedded comma"\\\\',"a quoted, string" , trailing , stuff.,,,';\\n\\nRIn := {UNSIGNED4 Itm;\\n UNSIGNED1 State;\\n STRING Text};\\nsd := DATASET(LENGTH(Ind)+1,\\n TRANSFORM(Rin;\\n SELF.Itm := COUNTER;\\n SELF.State := 0;\\n SELF.Text := IF(COUNTER > LENGTH(Ind),'X',Ind[COUNTER])));\\nOUTPUT(sd,NAMED('INPUT_AS_A_BYTE_STREAM'),ALL);\\n\\nDQ := 1;\\nSQ := 2;\\nSEP := 3;\\nREST := 4;\\nEOT := 5;\\n\\nFSM := DICTIONARY(DATASET([{0,DQ,5},{0,SQ,4},{0,SEP,1},{0,REST,0},{0,EOT,1}\\n ,{1,DQ,7},{1,SQ,6},{1,SEP,3},{1,REST,2},{1,EOT,3}\\n ,{2,DQ,5},{2,SQ,4},{2,SEP,1},{2,REST,0},{2,EOT,1}\\n ,{3,DQ,7},{3,SQ,6},{3,SEP,3},{3,REST,2},{3,EOT,3}\\n ,{4,DQ,4},{4,SQ,0},{4,SEP,4},{4,REST,4},{4,EOT,1}\\n ,{5,DQ,0},{5,SQ,5},{5,SEP,5},{5,REST,5},{5,EOT,1}\\n ,{6,DQ,4},{6,SQ,0},{6,SEP,4},{6,REST,4},{6,EOT,1}\\n ,{7,DQ,0},{7,SQ,5},{7,SEP,5},{7,REST,5},{7,EOT,1}\\n ],{UNSIGNED1 CurrentState,\\n UNSIGNED1 Tokn,\\n UNSIGNED1 NextState})\\n ,{CurrentState, Tokn => NextState});\\n\\nRIn SplitRecordsIntoFields(RIn L,RIn R) := TRANSFORM\\n LastItem := R.itm = COUNT(sd);\\n SELF.State:= FSM[L.State,IF(LastItem\\n ,EOT\\n ,CASE(R.Text[1],\\n '"' => DQ,\\n '\\\\'' => SQ,\\n ',' => SEP,\\n REST))].NextState;\\n SELF.Text := MAP(SELF.State IN [0,4,5] => L.Text + R.Text\\n ,SELF.State = 1 => L.Text\\n ,SELF.State IN [2,6,7] => R.Text\\n , /* [3] */ '');\\n SELF := R;\\nEND;\\n\\nAllStr := ITERATE(sd,SplitRecordsIntoFields(LEFT,RIGHT));\\nOUTPUT(AllStr,NAMED('OUTPUT_FROM_FSM'),ALL);\\n\\nFiltered := AllStr(State IN [1,3]);\\nOUTPUT(Filtered,NAMED('RECORD_CUT_INTO_FIELDS'));\\n
\\n\\nYours\\n\\nAllan\", \"post_time\": \"2020-02-13 18:22:03\" },\n\t{ \"post_id\": 29493, \"topic_id\": 7683, \"forum_id\": 10, \"post_subject\": \"Re: Using a RunTime variable in a PATTERN\", \"username\": \"Allan\", \"post_text\": \"Ah Richard,\\n\\nJust found out your version does not quite work for trailing field separators in a record\\ne.g. \\ntrailing , stuff.,,,
\\nAs you have unconditionally forced 'LastItem' to perform action 'L.Text + R.Text' but that is not the case for trailing separators.\\n\\nTricky things FSM \\n\\nThe 'proper' solution is to have an <end of text> token actually in the byte stream. \\nI'll post and amendment to your solution.\\n\\nJust thought I'd better mention it just in case, being published on the forum, someone actually uses this mad stuff as an example!\\n\\nYours\\n\\nAllan\", \"post_time\": \"2020-02-13 15:38:09\" },\n\t{ \"post_id\": 29483, \"topic_id\": 7683, \"forum_id\": 10, \"post_subject\": \"Re: Using a RunTime variable in a PATTERN\", \"username\": \"Allan\", \"post_text\": \"Thanks Richard,\\n\\nYes, I realised I did not need the SORT but it was not the meat of the program , so just got left in.\\n\\nI like your approach to 'last item' it's tidyer than mine.\\n\\nI must admit, I did not think you would take it any further, just leave it as a curiosity.\\n\\nAny way, Thanks\\n\\nAllan\", \"post_time\": \"2020-02-13 08:27:54\" },\n\t{ \"post_id\": 29473, \"topic_id\": 7683, \"forum_id\": 10, \"post_subject\": \"Re: Using a RunTime variable in a PATTERN\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nHere's my version
:
Ind := 'Allan,and anna,,,a,,"it Don\\\\'t mean, a thing, if it ain\\\\'t got that swing.",\\\\'an Inner "Double,Quote", with "another" hello, and "Another with, embedded comma"\\\\',"a quoted, string" , trailing , stuff.';\\n\\nRIn := {UNSIGNED4 Itm;\\n UNSIGNED1 State;\\n STRING Text};\\nsd := DATASET(LENGTH(Ind),\\n TRANSFORM(Rin;\\n SELF.Itm := COUNTER;\\n SELF.State := 0;\\n SELF.Text := Ind[COUNTER]));\\nOUTPUT(sd,NAMED('INPUT_AS_A_BYTE_STREAM'),ALL);\\n\\nDQ := 1;\\nSQ := 2;\\nSEP := 3;\\nREST := 4;\\n\\nFSM := DICTIONARY(DATASET([{0,DQ,5},{0,SQ,4},{0,SEP,1},{0,REST,0}\\n ,{1,DQ,7},{1,SQ,6},{1,SEP,3},{1,REST,2}\\n ,{2,DQ,5},{2,SQ,4},{2,SEP,1},{2,REST,0}\\n ,{3,DQ,7},{3,SQ,6},{3,SEP,3},{3,REST,2}\\n ,{4,DQ,4},{4,SQ,0},{4,SEP,4},{4,REST,4}\\n ,{5,DQ,0},{5,SQ,5},{5,SEP,5},{5,REST,5}\\n ,{6,DQ,4},{6,SQ,0},{6,SEP,4},{6,REST,4}\\n ,{7,DQ,0},{7,SQ,5},{7,SEP,5},{7,REST,5}\\n ],{UNSIGNED1 CurrentState,\\n UNSIGNED1 Tokn,\\n UNSIGNED1 NextState})\\n ,{CurrentState, Tokn => NextState});\\n\\nRIn QuoteIt(RIn L,RIn R) := TRANSFORM\\n LastItem := R.itm = COUNT(sd);\\n SELF.State:= IF(LastItem,\\n 3,\\n FSM[L.State,CASE(R.Text[1],\\n '"' => DQ,\\n '\\\\'' => SQ, \\n ',' => SEP, \\n REST)].NextState);\\n SELF.Text := MAP(SELF.State IN [0,4,5] OR LastItem => L.Text + R.Text\\n ,SELF.State = 1 => L.Text\\n ,SELF.State IN [2,6,7] => R.Text\\n , /* [3] */ '');\\n SELF := R;\\nEND;\\n\\nAllStr := ITERATE(sd,QuoteIt(LEFT,RIGHT));\\nOUTPUT(AllStr,NAMED('OUTPUT_FROM_FSM'),ALL);\\n\\nFiltered := AllStr(State IN [1,3]);\\nOUTPUT(Filtered,NAMED('RECORD_CUT_INTO_FIELDS'));
You'll note that the problem you mentioned is gone with the addition of the LastItem definition. I also removed the SORT since your DATASET(cnt,TRANSFORM()) will build the records already sorted. And I changed your State fields to UNSIGNED1 since the range of possible values is only 0-7.\\n\\nOf course, my next step would be to take all this code and turn it into a FUNCTION that takes a single STRING parameter. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-02-12 23:42:59\" },\n\t{ \"post_id\": 29463, \"topic_id\": 7683, \"forum_id\": 10, \"post_subject\": \"Re: Using a RunTime variable in a PATTERN\", \"username\": \"Allan\", \"post_text\": \"Richard,\\n Unfortunately FUNCTIONMACRO still complains in the same way.\\nI even tried doing the work in embedded C++ but hit problems there, see other tickets.\\nBut with the Grit between my teeth, I was not going to be defeated.\\nGot The above functionality working, in standard ECL. You can split strings up by a field separator defined at runtime, plus is copes with quoted strings.\\nIs there a competition for the most impenetrable, unmaintainable ECL written? If there is I would like to submit the following.\\nIt splits a string into a DATASET of chars (a byte stream) and runs it through a finite state machine!! \\n
RIn := {UNSIGNED4 Itm;\\n UNSIGNED2 State;\\n STRING Text};\\n\\nInd := 'Allan,and anna,,,a,,"it Don\\\\'t mean, a thing, if it ain\\\\'t got that swing.",\\\\'an Inner "Double,Quote", with "another" hello, and "Another with, embedded comma"\\\\',"a quoted, string" , trailing , stuff.';\\n\\nd := DATASET(LENGTH(Ind),TRANSFORM(Rin;SELF.Itm := COUNTER;SELF.State := 0;SELF.Text := Ind[COUNTER]));\\nsd := SORT(d,Itm);\\nOUTPUT(sd,NAMED('INPUT_AS_A_BYTE_STREAM'),ALL);\\n\\nDQ := 1;\\nSQ := 2;\\nSEP := 3;\\nREST := 4;\\n\\nFiniteStateMachine := DICTIONARY(DATASET([{0,DQ,5},{0,SQ,4},{0,SEP,1},{0,REST,0}\\n ,{1,DQ,7},{1,SQ,6},{1,SEP,3},{1,REST,2}\\n ,{2,DQ,5},{2,SQ,4},{2,SEP,1},{2,REST,0}\\n ,{3,DQ,7},{3,SQ,6},{3,SEP,3},{3,REST,2}\\n ,{4,DQ,4},{4,SQ,0},{4,SEP,4},{4,REST,4}\\n ,{5,DQ,0},{5,SQ,5},{5,SEP,5},{5,REST,5}\\n ,{6,DQ,4},{6,SQ,0},{6,SEP,4},{6,REST,4}\\n ,{7,DQ,0},{7,SQ,5},{7,SEP,5},{7,REST,5}\\n ],{UNSIGNED2 CurrentState;UNSIGNED1 Tokn;UNSIGNED2 NextState})\\n ,{CurrentState,Tokn=> NextState});\\n\\nRIn QuoteIt(RIn L,RIn R) := TRANSFORM\\n\\n SELF.State:= FiniteStateMachine[L.State,CASE(R.Text[1],'"' => DQ,'\\\\'' => SQ, ',' => SEP, REST)].NextState;\\n SELF.Text := MAP(SELF.State IN [0,4,5] => L.Text + R.Text\\n ,SELF.State IN [1] => L.Text\\n ,SELF.State IN [2,6,7] => R.Text\\n , /* [3] */ '');\\n SELF := R;\\nEND;\\n\\nAllStr := ITERATE(sd,QuoteIt(LEFT,RIGHT));\\nOUTPUT(AllStr,NAMED('OUTPUT_FROM_FSM'),ALL);\\n\\nFiltered := AllStr(State IN [1,3]) & AllStr[COUNT(AllStr)];\\nOUTPUT(Filtered,NAMED('RECORD_CUT_INTO_FIELDS'));\\n
\\nThe slightly messy bit at the end (apart from all of it):\\n& AllStr[COUNT(AllStr)];\\n
\\nCould be made cleaner with an <end of text> character appended to the end of the byte stream, but again I did not want to second guess a character to use.\\n\\nAh - I never want to do that again\\n\\nYours\\n\\nAllan\", \"post_time\": \"2020-02-12 19:57:54\" },\n\t{ \"post_id\": 29223, \"topic_id\": 7683, \"forum_id\": 10, \"post_subject\": \"Re: Using a RunTime variable in a PATTERN\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nYeah, using a FUNCTIONMACRO may just work.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-01-28 14:05:10\" },\n\t{ \"post_id\": 29213, \"topic_id\": 7683, \"forum_id\": 10, \"post_subject\": \"Re: Using a RunTime variable in a PATTERN\", \"username\": \"Allan\", \"post_text\": \"Thinking of trying the FUNCTIONMACRO route?\", \"post_time\": \"2020-01-28 10:08:54\" },\n\t{ \"post_id\": 29203, \"topic_id\": 7683, \"forum_id\": 10, \"post_subject\": \"Re: Using a RunTime variable in a PATTERN\", \"username\": \"Allan\", \"post_text\": \"Thanks Richard,\\n\\nYes that was my 1st thought, but it's really sub-optimal.\\nThis could well be rolled out across the UK and Ireland and sods law says some bright spark will want to separate on something not in the list. \\nIts actually more complex than just having a list of separators as you have to ensure the separator is not in the list of allowed punctuation.\\n\\nAlso it offends my sense of Beauty I might have for a language.\\n\\nCheers\\n\\nAllan\", \"post_time\": \"2020-01-28 09:11:43\" },\n\t{ \"post_id\": 29183, \"topic_id\": 7683, \"forum_id\": 10, \"post_subject\": \"Re: Using a RunTime variable in a PATTERN\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nI'd suggest you try pre-defining all the most common CSV delimiters as a set of strings, then pass the one to use as its position in the set to the function, something like this:
SetSeps := [',', '|', '\\\\t', ':'];\\nPATTERN Sep := SetSeps[pFieldDelimiter];
Let me know if that works. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-01-27 14:26:51\" },\n\t{ \"post_id\": 29173, \"topic_id\": 7683, \"forum_id\": 10, \"post_subject\": \"Using a RunTime variable in a PATTERN\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nWith help from Richard I got a RULE together to parse CSV record type input where the field separator can be in the data, (so use of quotes)\\n
PATTERN AlphaNumeric := PATTERN('[[:alnum:]]');\\nPATTERN Space := ' ';\\nPATTERN Sep := ',';\\nPATTERN Punct := PATTERN('[-_+.]');\\nPATTERN AnyTxt := ANY*?;\\n\\nPATTERN OperandText := (AlphaNumeric | Punct | Space)+;\\nPATTERN OperandQuotedText := '\\\\'' AnyTxt '\\\\'';\\nPATTERN OperandDQuotedText := '"' AnyTxt '"';\\n\\nPATTERN Operand := OperandText | OperandQuotedText | OperandDQuotedText;\\n \\nRULE cmds := (Operand Sep) | ('' Sep) | (Operand LAST);\\n \\nd := DATASET([{'"It Don\\\\'t mean a thing, if it ain\\\\'t got that swing",\\\\'a comma, in an operand\\\\',1, 455445 ,,, ,,, Allan and Anna , Nina ,'}],{STRING txt});\\n\\nPARSE(d,txt,cmds,\\n TRANSFORM({STRING txt\\n ;UNSIGNED2 Len},\\n SELF.txt := MATCHTEXT(Operand);\\n SELF.Len :=MATCHLENGTH(Operand)),MANY MIN);\\n
\\nThis works just fine, but the character used as a field separator can vary, e.g. be a '|', So I pass the field separator as a parameter to this FUNCTION, but the compiler barfs if I attempt to use the parameter in defining a PATTERN:\\n\\ne.g.\\n\\n PATTERN Sep := pFieldDelimiter;\\n
\\nThe compiler errors with:\\nError: This expression cannot be included in a pattern (21, 30), 2285, \\n
\\n\\nErr - any ideas:\\n\\nAllan\", \"post_time\": \"2020-01-25 17:00:33\" },\n\t{ \"post_id\": 32273, \"topic_id\": 7713, \"forum_id\": 10, \"post_subject\": \"Re: Working with dates\", \"username\": \"McPP82\", \"post_text\": \"Okay, they have to be UNSIGNED, that's what I was doing wrong then. I've managed to fix my problem thanks to that, thanks!\", \"post_time\": \"2020-10-20 11:18:45\" },\n\t{ \"post_id\": 29983, \"topic_id\": 7713, \"forum_id\": 10, \"post_subject\": \"Re: Working with dates\", \"username\": \"Allan\", \"post_text\": \"If a date is a number its trivial to extract components of a date.\\nAssuming dates are of the form YYYYMMDD\\n\\nthen:\\nYear is just date DIV 10000\\nMonth is just (date DIV 100) % 100\\nDay is just date % 100\\n
\\n\\nAlso dates should be UNSIGNED not INTEGER.\", \"post_time\": \"2020-04-02 10:17:47\" },\n\t{ \"post_id\": 29503, \"topic_id\": 7713, \"forum_id\": 10, \"post_subject\": \"Re: Working with dates\", \"username\": \"mo0926\", \"post_text\": \"Thank you, Richard. That helped clear some of the errors I was getting.\", \"post_time\": \"2020-02-13 18:02:15\" },\n\t{ \"post_id\": 29403, \"topic_id\": 7713, \"forum_id\": 10, \"post_subject\": \"Re: Working with dates\", \"username\": \"rtaylor\", \"post_text\": \"mauricexxvi,\\n\\nThis line of code: Unsigned8 dob := (Unsigned8)indob;
should more properly be this: STRING8 dob := (STRING8)indob;
because otherwise in the next three definitions the system has to implicitly cast your UNSIGNED8 dob to a STRING8 in order to do do the string slicing operation. Better to do a single explicit cast than make the system do it implicitly three times.\\n\\nAlso, this line: Return (Unsigned8)(YY + MM + DD);
is actually changing the value of the input date. IOW, you're not stripping trailing zeroes, you're changing the numeric 20050000 value to 2005. That's why my example returned a STRING8, NOT an UNSIGNED8 value. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-02-10 18:54:44\" },\n\t{ \"post_id\": 29393, \"topic_id\": 7713, \"forum_id\": 10, \"post_subject\": \"Re: Working with dates\", \"username\": \"mo0926\", \"post_text\": \"Perfect! Your solution helped me arrive at this solution. \\n\\nLoseTrailingZeroes(Unsigned4 indob) := Function\\n Unsigned8 dob := (Unsigned8)indob;\\n String4 YY := dob[1..4];\\n String2 DD := if(dob[7..8]='00','',dob[7..8]);\\n String2 MM := if(dob[5..6]='00' AND DD='','',dob[5..6]);\\n Return (Unsigned8)(YY + MM + DD);\\nEnd;\\n\\nFilteredDobs := $.New_Persons.File( Not dob = 0);\\n\\nRec := Record\\n Lexid := Filtereddobs.lexid;\\n Dob := Filtereddobs.dob;\\n Src := Filtereddobs.src;\\nEnd;\\n\\nRec SlimRec( Filtereddobs Le ) := Transform\\n Self.dob := LoseTrailingZeroes(le.dob);\\n Self := Le;\\nEnd;\\n\\nShared RecSlim := Project(FilteredDobs, SlimRec(Left));\\n\\nNonBlankDob := Record\\n RecSlim.dob;\\nEnd;\\n\\nc_dob := Table(RecSlim, Nonblankdob, dob);\\ndc_dob := Distribute(c_dob, Hash(dob));\\nsdc_dob := Sort(dc_dob, dob, Local);\\nShared dsdc_dob := Dedup(sdc_dob, dob, Local);\\n\\ncrosstab_dob := Record\\n RecSlim.dob;\\n RecordCnt := Count(Group);\\nEnd;\\n\\nOut_Crosstab_dob := Table(RecSlim, crosstab_dob, dob);\\nExport S_Crosstab_dob := Sort(Out_Crosstab_dob, -RecordCnt);
\", \"post_time\": \"2020-02-10 18:21:02\" },\n\t{ \"post_id\": 29363, \"topic_id\": 7713, \"forum_id\": 10, \"post_subject\": \"Re: Working with dates\", \"username\": \"rtaylor\", \"post_text\": \"mauricexxvi,\\n\\nHere's the same function, changed to take the UNSIGNED4 date and return a STRING8 date with the trailing zeroes removed:LoseTrailingZeroes(UNSIGNED4 indob) := FUNCTION\\n STRING8 dob := (STRING8)indob;\\n STRING4 YY := dob[1..4];\\n STRING2 DD := IF(dob[7..8]='00','',dob[7..8]);\\n STRING2 MM := IF(dob[5..6]='00' AND DD='','',dob[5..6]);\\n RETURN YY + MM + DD;\\nEND;\\n\\nLoseTrailingZeroes(20050000); //year only\\nLoseTrailingZeroes(20051000); //year+month\\nLoseTrailingZeroes(20051020); //full date\\nLoseTrailingZeroes(20050020); //year+day (invalid date)
\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-02-06 18:19:07\" },\n\t{ \"post_id\": 29353, \"topic_id\": 7713, \"forum_id\": 10, \"post_subject\": \"Re: Working with dates\", \"username\": \"mo0926\", \"post_text\": \"More of a display issue where I am trying to output the dob column without the incomplete dobs. \\n\\nSo taking column A and transforming it to column B below:\\n\\n A. 1. 20051200 B.1. 200512 \\n 2. 19870000 2. 1987\\n 3. 20011011 3. 20011011\", \"post_time\": \"2020-02-06 16:29:52\" },\n\t{ \"post_id\": 29343, \"topic_id\": 7713, \"forum_id\": 10, \"post_subject\": \"Re: Working with dates\", \"username\": \"rtaylor\", \"post_text\": \"mauricexxvi,\\n\\nYes, it does. If the date field is an integer type then you can't remove the trailing zeroes (doing that would change the numeric value of the field). You can only replace trailing zeroes with spaces in a STRING.\\n\\nWhat's the real problem you're trying to solve with this? If it's just a display issue, then you can cast the date to a STRING8 then strip the trailing zeroes and display that. \\n\\nBut if you want them stored that way, then you'd have to change the data type in the file to a STRING8 and live with the doubled storage requirement (4 bytes times billions of records is a lot of disk space).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-02-05 14:41:49\" },\n\t{ \"post_id\": 29323, \"topic_id\": 7713, \"forum_id\": 10, \"post_subject\": \"Re: Working with dates\", \"username\": \"mo0926\", \"post_text\": \"Thank you, Richard! In this case I am dealing with the header file where the date of birth is an integer. Does this change the solution?\", \"post_time\": \"2020-02-04 15:40:39\" },\n\t{ \"post_id\": 29313, \"topic_id\": 7713, \"forum_id\": 10, \"post_subject\": \"Re: Working with dates\", \"username\": \"rtaylor\", \"post_text\": \"mauricexxvi,\\n\\nThat depends on what the input data type is and what you're trying to do with it. I have to assume that your output type is STRING, since you seem to want to get rid of trailing zeroes.\\n\\nHere's one way to approach the issue:LoseTrailingZeroes(STRING8 dob) := FUNCTION\\n STRING4 YY := dob[1..4];\\n STRING2 DD := IF(dob[7..8]='00','',dob[7..8]);\\n STRING2 MM := IF(dob[5..6]='00' AND DD='','',dob[5..6]);\\n RETURN (STRING8)(YY + MM + DD);\\nEND;\\n\\nLoseTrailingZeroes('20050000'); //year only\\nLoseTrailingZeroes('20051000'); //year+month\\nLoseTrailingZeroes('20051020'); //full date
\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-02-04 14:53:48\" },\n\t{ \"post_id\": 29303, \"topic_id\": 7713, \"forum_id\": 10, \"post_subject\": \"Working with dates\", \"username\": \"mo0926\", \"post_text\": \"How does one eliminate zeros in an output of dobs? \\n\\nFor example: output all complete dobs such as 20051201... etc in a column and not partially incomplete dobs like 20050000\", \"post_time\": \"2020-02-03 22:15:39\" },\n\t{ \"post_id\": 29443, \"topic_id\": 7733, \"forum_id\": 10, \"post_subject\": \"Re: Returing references to STRINGs from C++\", \"username\": \"Allan\", \"post_text\": \"Thanks Richard,\\n\\nhttps://track.hpccsystems.com/browse/HPCC-23506\\n\\nCreated.\\n\\nAllan\", \"post_time\": \"2020-02-12 07:40:32\" },\n\t{ \"post_id\": 29433, \"topic_id\": 7733, \"forum_id\": 10, \"post_subject\": \"Re: Returing references to STRINGs from C++\", \"username\": \"rtaylor\", \"post_text\": \"Allan,Err what am I doing wrong?
IMO, nothing! \\n\\nI tried every way I could and got the same error in every instance, although this example with a BOOLEAN return type works fine:
BOOLEAN isUpper(const string mystring) := BEGINC++\\n size_t i=0;\\n while (i < lenMystring)\\n {\\n if (!isupper((byte)mystring[i]))\\n return false;\\n i++;\\n }\\n return true;\\nENDC++;\\nisUpper('JIM');\\nisUpper('Sue');
So I am of the opinion that it's most likely the combination of a STRING parameter and a STRING return type that's bollixing things up here. \\n\\nPlease submit a JIRA on this issue. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-02-11 22:04:46\" },\n\t{ \"post_id\": 29423, \"topic_id\": 7733, \"forum_id\": 10, \"post_subject\": \"Returing references to STRINGs from C++\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI'm attempting to return an address into a STRING from some C++, I have:\\n
STRING SplitUp(CONST STRING dat) := BEGINC++\\n __result = dat;\\n return;\\nENDC++;\\n\\nSplitUp('Abc');\\n
\\nBut the compiler complains with:\\nError: invalid conversion from ‘const char*’ to ‘char*’ [-fpermissive] (24, 15), 6003, W20200211-172713_1.cpp\\n
\\n\\nBut, for once, I've actually read the ECL Ref Manual, where it states:\\n\\nA function that takes a string parameter should also have the type prefixed by const in the ECL code so that modern\\ncompilers don't report errors when constant strings are passed to the function.\\n\\nSame Error if I leave out the CONST.\\n\\nErr what am I doing wrong?\\nCheers\\n\\nAllan\", \"post_time\": \"2020-02-11 17:44:13\" },\n\t{ \"post_id\": 32053, \"topic_id\": 7763, \"forum_id\": 10, \"post_subject\": \"Re: Truth, False, or Blank\", \"username\": \"mo0926\", \"post_text\": \"Thank you!\", \"post_time\": \"2020-09-21 20:46:38\" },\n\t{ \"post_id\": 29613, \"topic_id\": 7763, \"forum_id\": 10, \"post_subject\": \"Re: Truth, False, or Blank\", \"username\": \"rtaylor\", \"post_text\": \"mauricexxvi , \\n\\nYou could do it this way:\\nself.fname_match := MAP(Le.fname = '' AND Ri.fname = '' => '', \\n Le.fname = Ri.fname => 'T',\\n 'F');
\\nSo this code reads: IF the names are blank, return blank, ELSE IF the names match return 'T' ELSE return 'F'.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-02-18 17:02:05\" },\n\t{ \"post_id\": 29603, \"topic_id\": 7763, \"forum_id\": 10, \"post_subject\": \"Truth, False, or Blank\", \"username\": \"mo0926\", \"post_text\": \"Hi everyone, \\n\\nI was wondering how can I compare two files and check for matching records and flag each result accordingly with True, False, or Blank. \\n\\nSo for True or False I can easily use: \\n self.fname_match := if(Le.fname = Ri.fname, 'T', 'F');
\\n\\nBut how can I compare to get True, False or Blank when there is an empty string?\", \"post_time\": \"2020-02-18 16:53:54\" },\n\t{ \"post_id\": 30703, \"topic_id\": 8083, \"forum_id\": 10, \"post_subject\": \"Re: Skew and Distribution\", \"username\": \"David Dasher\", \"post_text\": \"Thanks Richard\\n\\nD\", \"post_time\": \"2020-05-19 12:12:42\" },\n\t{ \"post_id\": 30693, \"topic_id\": 8083, \"forum_id\": 10, \"post_subject\": \"Re: Skew and Distribution\", \"username\": \"rtaylor\", \"post_text\": \"David,\\n\\nSpeed is almost always paramount, and a ~3X difference (38 vs 90 seconds) is always worth it. So I would keep the speed, in this circumstance. But if later code needs a different distribution for optimal performance, then you should do that, too.\\n\\nIOW, Instead of this:JOIN(...,LOOKUP,LEFT OUTER)
try this:DISTRIBUTE(JOIN(...,LOOKUP,LEFT OUTER),HASH32(ClientID))
so that ypur PERSIST writes the data in a less skewed manner that's already set up for your later operation.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-05-19 11:42:53\" },\n\t{ \"post_id\": 30663, \"topic_id\": 8083, \"forum_id\": 10, \"post_subject\": \"Re: Skew and Distribution\", \"username\": \"David Dasher\", \"post_text\": \"Thanks that makes sense Richard.\\n\\nOne further question from your example then if you don't mind.\\n\\nTo get the best/near perfect distribution I'm doing a hash32 of the ID of that table before it even gets there so it persists the distribution after the left lookup join, however, if I distribute it on the ClientId (who can have many rows) the distribution goes to about 10+ / -10 and gives me slower performance taking me to about 1 min 30 seconds instead of 38 seconds. Further down the code I'm doing some other joins and and dedups ,which because of the large number would be better doing locally on the client id but I'm always stuck with the worse distribution and overall slower performance, so it's actually been better to just do lookup joins and distribute where necessary.\\n\\nWould you say you should distribute for the best distribution or the best logical distribution according to how you're going to use the data further down the line even if the performance is slightly worse? Or does speed top trump everything else? (In this case it does for me but it just feels messy).\\n\\nThanks\\n\\nDavid\", \"post_time\": \"2020-05-18 19:35:40\" },\n\t{ \"post_id\": 30653, \"topic_id\": 8083, \"forum_id\": 10, \"post_subject\": \"Re: Skew and Distribution\", \"username\": \"rtaylor\", \"post_text\": \"David,a left only lookup join
OK, that makes a difference, because the skew numbers you're seeing in the graph are on the input records to the Lookup Join and the analyzer warning is on the disk write (where no skew numbers are shown in the graph), so the skewed data is the result of the LEFT ONLY JOIN (as in, just the non-matches). \\n\\nI don't see any way of predicting or controlling that beforehand. You could try throwing a DISTRIBUTE around that JOIN so the PERSIST would be on the re-distributed result, but if that doesn't work I wouldn't worry too much about it.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-05-18 19:20:50\" },\n\t{ \"post_id\": 30643, \"topic_id\": 8083, \"forum_id\": 10, \"post_subject\": \"Re: Skew and Distribution\", \"username\": \"David Dasher\", \"post_text\": \"Hi Richard\\n\\nI can confirm that I am not using a nested child dataset. I will submit a Jira, however the work unit analyser does show more suggestions if I'm doing a lookup other than other joins. \\n\\nSpeed wise I have it about 38 seconds on a left only lookup join using 1.8 billion rows on the left and 1.2 million on the right using a 144 node cluster, to be fair I'm pretty happy with that but if I can squeeze more, then cool.\\n\\nThanks for replying \\n\\nDavid\", \"post_time\": \"2020-05-18 18:56:31\" },\n\t{ \"post_id\": 30633, \"topic_id\": 8083, \"forum_id\": 10, \"post_subject\": \"Re: Skew and Distribution\", \"username\": \"rtaylor\", \"post_text\": \"David,Significant skew in child records causes uneven disk write time (w5:graph3:sg15:a19)
I added the emphasis, because that word is what I think might be the problem. It suggests to me that you're working with a nested child dataset. \\n\\nIf that's the case, then it appears to me that it's very possible the skew numbers shown in the graph may be being calculated just using parent record counts, while the workunit analyzer may be doing a more complex analysis using the actual amount of data in the nested child dataset. IFF that's the case, that would explain the discrepancy between the two.\\n\\nBut, if you're not using nested child datasets, then we'll have to explore further. Either way, this issue is worth submitting a JIRA ticket so the developers can decide which is right(er) -- the WU Analyzer or the graph.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-05-18 17:02:16\" },\n\t{ \"post_id\": 30613, \"topic_id\": 8083, \"forum_id\": 10, \"post_subject\": \"Skew and Distribution\", \"username\": \"David Dasher\", \"post_text\": \"Hi All\\n\\nI'm having a few issues getting the most of our platform due to Skew issues. I'm hoping someone can give me a few pointers. Having read various forum posts and blogs on Distribute and joins I've taken one of our main graphs down from 5 minutes to 1 minute, however, the work unit analyser is telling my I still have skew issues mainly around write times to various persists.\\n\\nI have attached two screen shots of a graph where the work unit analyser suggests we have skew in writing. Where I'm confused is the left side of the graph looks to be distributed well and I'm doing a lookup join from the right (I've tried a local join from the right but this causes further skew and slows the process)\\n\\nI've distributed the data on its Primary key and also the UserId which is actually more beneficial later on but slows the graph down significantly. I've tried a basic distribute with no hash which also gives a worse skew.\\n\\nI'm confused as to what I should be looking at to see the skew in the main graph or the hover data which tells a completely different picture. Even if I understood the hover data I'm not sure what else I can distribute the data with to get better performance.\\n\\nSignificant skew in child records causes uneven disk write time (w5:graph3:sg15:a19)\\n\\n[attachment=1:zolq4s3e]Screenshot 2020-05-15 at 15.39.23.png\\n\\n[attachment=0:zolq4s3e]Screenshot 2020-05-15 at 15.45.30.png\", \"post_time\": \"2020-05-15 15:00:59\" },\n\t{ \"post_id\": 30733, \"topic_id\": 8093, \"forum_id\": 10, \"post_subject\": \"Re: Error Code 1031 to Error Code 7\", \"username\": \"Roger Dev\", \"post_text\": \"Hi,\\nCould you please generate a ZAP Report from your work unit (using ECLWatch) and send it to me at Roger.Dev@lexisnexisrisk.com.\\nThanks,\\nRoger Dev\", \"post_time\": \"2020-05-19 18:39:14\" },\n\t{ \"post_id\": 30623, \"topic_id\": 8093, \"forum_id\": 10, \"post_subject\": \"Error Code 1031 to Error Code 7\", \"username\": \"tpay\", \"post_text\": \"Hi there, \\n\\nI have a data set that has millions of rows. I have been trying to run ML algorithms on it. I got the following errors while using Linear Regression.\\n\\nI first got the following error:\\n\\nSystem error: 1301: Graph graph1[278], diskread[288]: SLAVE #2 [XXX.XXX.XXX.XXX:20100]: Pool memory exhausted: pool id 4194464 exhausted, requested 153 heap(822/48140) global(822/48160), Pool memory exhausted: pool id 4194464 exhausted, requested 153 heap(822/48140) global(822/48160) - handling file: /mnt/var/lib/HPCCSystems/mythor/temp/20100/3__w20200517-054314._2_of_4 - caused by (1301, Pool memory exhausted: pool id 4194464 exhausted, requested 153 heap(822/48140) global(822/48160))\\n\\nThen after deleting some logical files to free up memory I got this second error:\\n\\nSystem error: -7: Graph graph1[299], localresultread[300]: SLAVE #1 [XXX.XXX.XXX.XXX:20100]: Jbuff: Out of Memory (134217728), - caused by (-7, Jbuff: Out of Memory (134217728))\\n\\nHowever, I am not quite sure what this second error is about.\", \"post_time\": \"2020-05-18 15:54:40\" },\n\t{ \"post_id\": 32113, \"topic_id\": 8303, \"forum_id\": 10, \"post_subject\": \"Re: Error 3000:\", \"username\": \"McPP82\", \"post_text\": \"I've run into a similar issue, I'll try to obtain crash logs from the company's admin and will report here, I hope it'll help mrumsey as well.\", \"post_time\": \"2020-09-30 17:11:24\" },\n\t{ \"post_id\": 31623, \"topic_id\": 8303, \"forum_id\": 10, \"post_subject\": \"Re: Error 3000:\", \"username\": \"mrumsey\", \"post_text\": \"I couldn't get ecl zapgen to work. I am not an admin, if that matters.\", \"post_time\": \"2020-08-05 14:32:45\" },\n\t{ \"post_id\": 31613, \"topic_id\": 8303, \"forum_id\": 10, \"post_subject\": \"Re: Error 3000:\", \"username\": \"jsmith\", \"post_text\": \"ok thanks for the update.\\n\\nA full Zap report (inc. slave logs) would be best, short of that, if you could send me the thorslave log (on the node indicated by the IP in the error), it might shed some more light on what the underlying problem was.\\n\\nThanks.\", \"post_time\": \"2020-08-05 14:12:32\" },\n\t{ \"post_id\": 31603, \"topic_id\": 8303, \"forum_id\": 10, \"post_subject\": \"Re: Error 3000:\", \"username\": \"mrumsey\", \"post_text\": \"I'm not sure if there has been an upgrade or not.\\n\\nI do know that I can read in the file, transform the file (simple PROJECT), and then output the results. I only get this error when I add in a line to output to disk for a particular file.\", \"post_time\": \"2020-08-05 13:01:21\" },\n\t{ \"post_id\": 31593, \"topic_id\": 8303, \"forum_id\": 10, \"post_subject\": \"Re: Error 3000:\", \"username\": \"jsmith\", \"post_text\": \"Hi,\\n\\nthe error here is whilst reading the input file, unrelated to the write stage.\\n"assert(required <= maxAvailable()" usually is an indication of a record format mismatch or a file type mismatch.\\n\\n>This code worked for prior files of the same type \\n\\njust to clarify, do you mean previous "~FilePath::SubPath::FileName_*" files can still be read with this ECL, but a recently create ~FilePath::SubPath::FileName_xxxx file fails with this deserialization assert?\\n\\nWas there a HPCC upgrade recently when this started to happen?\\n\\nIf possible, could you send me a ZAP report (via EclWatch), including slave logs to me?\", \"post_time\": \"2020-08-04 22:35:47\" },\n\t{ \"post_id\": 31583, \"topic_id\": 8303, \"forum_id\": 10, \"post_subject\": \"Error 3000:\", \"username\": \"mrumsey\", \"post_text\": \"I am trying to write a file to disk and am getting the following error code.\\n\\nError: System error: 3000: Graph graph1[5], diskread[6]: SLAVE #400 [IPAddress]: assert(required <= maxAvailable()) failed - file: rtlcommon.hpp, line 137, assert(required <= maxAvailable()) failed - file: rtlcommon.hpp, line 137 - handling file: [FileName]._400_of_400 - caused by (3000, assert(required <= maxAvailable()) failed - file: rtlcommon.hpp, line 137) (0, 0), 3000,
\\n\\nMy code is:\\nOUTPUT(DISTRIBUTE(Pay2),,\\t'~FilePath::SubPath::FileName_'\\t+ Year + 'Q' + qtr_nbr, OVERWRITE, THOR, COMPRESSED);
\\n\\nThis code worked for prior files of the same type (spraying a THOR file archived in a linix environment, transforming, re-saving as THOR), but stopped working today.\\n\\nDoes anyone have any ideas how to fix this erorr? I am on Version community_7.10.8-1.\\n\\nThanks,\\n\\nEdit: Removed IP
\", \"post_time\": \"2020-08-04 19:14:48\" },\n\t{ \"post_id\": 31853, \"topic_id\": 8343, \"forum_id\": 10, \"post_subject\": \"Re: Complex helper class\", \"username\": \"rtaylor\", \"post_text\": \"loki,\\n\\nContext is important. Can you post some example code that demonstrates the issue?\\n\\nRichard\", \"post_time\": \"2020-08-20 18:48:09\" },\n\t{ \"post_id\": 31843, \"topic_id\": 8343, \"forum_id\": 10, \"post_subject\": \"Complex helper class\", \"username\": \"loki\", \"post_text\": \"What is the significance of the complex helper class warning? The reason for it seems difficult to track down. (E.g., the activity might just be a PROJECT statement.)\\n\\nIt affects compile time, and perhaps execution time. And it is not linear. For example, if activity 1 has 20,000 bytes(?) and activity 2 has 14,000, Adding an IF statement creates more than 34,000.\\n\\nE.g.\\nY := IF (X,\\n activity 1, activity 2);\\n\\nIs it a problem? If so, can it be ameliorated?\", \"post_time\": \"2020-08-20 18:25:22\" },\n\t{ \"post_id\": 32363, \"topic_id\": 8383, \"forum_id\": 10, \"post_subject\": \"Re: Dynamic Layout using a dataset\", \"username\": \"McPP82\", \"post_text\": \"This is what I'm regularly doing, so, well, I can confirm that it works.\", \"post_time\": \"2020-10-24 14:28:22\" },\n\t{ \"post_id\": 32173, \"topic_id\": 8383, \"forum_id\": 10, \"post_subject\": \"Re: Dynamic Layout using a dataset\", \"username\": \"ghalliday\", \"post_text\": \"I think the way of achieving what you want is to have a file with all the potential fields. (PROJECT the input file if it has fewer fields.) Then have a PROJECT which clears any fields that are specified in the configuration file.\", \"post_time\": \"2020-10-12 15:30:29\" },\n\t{ \"post_id\": 32163, \"topic_id\": 8383, \"forum_id\": 10, \"post_subject\": \"Re: Dynamic Layout using a dataset\", \"username\": \"SChatman85\", \"post_text\": \"Thanks - have raised IDE-997\", \"post_time\": \"2020-10-07 08:07:52\" },\n\t{ \"post_id\": 32153, \"topic_id\": 8383, \"forum_id\": 10, \"post_subject\": \"Re: Dynamic Layout using a dataset\", \"username\": \"rtaylor\", \"post_text\": \"Stuart,\\n\\nI think that one requires a JIRA ticket with full details of your code and which version(s) of ECL/HPCC you're using.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-10-06 19:29:02\" },\n\t{ \"post_id\": 32143, \"topic_id\": 8383, \"forum_id\": 10, \"post_subject\": \"Re: Dynamic Layout using a dataset\", \"username\": \"SChatman85\", \"post_text\": \"Thanks for joining the dots for me on this one! I hadn't thought of carrying out the PROJECT inside the FM - I had been focusing on just trying to get the layout to carry out the PROJECT outside.\\n\\nI had to make one small change to handle field type of boolean - then when I printed out the MACRO RECORD structure it looked good - however when I run the code I receive the error Error: ‘ctx’ was not declared in this scope (156, 21), 6003, W20201006-152704.cpp
\\n\\nI believe this is related to C++ but have no idea how I begin to look at finding what the issue is here?\", \"post_time\": \"2020-10-06 14:28:52\" },\n\t{ \"post_id\": 32133, \"topic_id\": 8383, \"forum_id\": 10, \"post_subject\": \"Re: Dynamic Layout using a dataset\", \"username\": \"rtaylor\", \"post_text\": \"Stuart,\\n\\nOK, this should get you a little further down the road:profile_layout := RECORD\\n STRING fieldname;\\n BOOLEAN field_include;\\nEND;\\nprofile1 := DATASET([\\n {'id',TRUE},\\n {'previous_id',TRUE},\\n {'title',FALSE}\\n ], profile_layout\\n );\\nprofile2 := DATASET([\\n {'id',TRUE},\\n {'previous_id',FALSE},\\n {'title',TRUE}\\n ], profile_layout\\n );\\n\\nActual_Lay := RECORD\\n STRING ID;\\n STRING PREVIOUS_ID;\\n STRING TITLE;\\nEND;\\n\\nDS := DATASET([{'123','21', 'MR'}],Actual_Lay);\\n\\nProfileProject(inds, profds) := FUNCTIONMACRO\\n prof_layout := RECORD\\n STRING fieldname;\\n BOOLEAN field_include;\\n END;\\n\\n #DECLARE(outrec);\\n #SET(outrec,'OutputRecord(DATASET(prof_layout) P) := RECORD\\\\n');\\n #EXPORTXML(Fred,inds);\\n #FOR (Fred)\\n #FOR (Field) \\n #APPEND(outrec,' IFBLOCK(P(fieldname=\\\\'' + \\n %'{@label}'% + '\\\\')[1].field_include=TRUE)\\\\n'); \\n #IF(%'{@size}'% = '-15')\\n #APPEND(outrec,' ' + %'{@type}'% + ' ' + %'{@label}'% + ';\\\\n'); \\n #ELSE\\n #APPEND(outrec,' ' + %'{@type}'% + %'{@size}'% \\n + ' ' + %'{@label}'% + ';\\\\n'); \\n #END\\n #APPEND(outrec,' END;\\\\n');\\n #END;\\n #END;\\n #APPEND(outrec,'END;\\\\n'); \\n %outrec%; //generate the RECORD structure code\\n RETURN PROJECT(inds,OutputRecord(profds));\\nENDMACRO;\\n\\nProfileProject(DS,Profile1);\\nProfileProject(DS,Profile2);
NB -- the Profile field names are now all lowercase, because that's the way they're returned by #EXPORTXML.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-10-02 20:01:51\" },\n\t{ \"post_id\": 32123, \"topic_id\": 8383, \"forum_id\": 10, \"post_subject\": \"Re: Dynamic Layout using a dataset\", \"username\": \"SChatman85\", \"post_text\": \"Thanks for the suggestion Richard.\\n\\nIFBLOCK was not something I was aware of so will definitely take a look at what that offers - however, in terms of a solution I didn't explain the scale of whet I would need to do fully.\\n\\nThe profile definition may have around 100-200 rows (to match our dataset layout) and then I will have 40-50 of these profiles, one for each of the datafiles we want to process.\\n\\n\\nI would need something that works 'dynamically' to generate this as it will not be possible to explicitly code for each scenario.\\n\\nThanks,\\n\\nStuart\", \"post_time\": \"2020-10-01 10:25:32\" },\n\t{ \"post_id\": 32103, \"topic_id\": 8383, \"forum_id\": 10, \"post_subject\": \"Re: Dynamic Layout using a dataset\", \"username\": \"rtaylor\", \"post_text\": \"Stuart,\\n\\nOK, here's how I would do that:profile_layout := RECORD\\n STRING fieldname;\\n BOOLEAN field_include;\\nEND;\\nprofile1 := DATASET([\\n {'ID',TRUE},\\n {'PREVIOUS_ID',TRUE},\\n {'TITLE',FALSE}\\n ], profile_layout\\n );\\nprofile2 := DATASET([\\n {'ID',TRUE},\\n {'PREVIOUS_ID',FALSE},\\n {'TITLE',TRUE}\\n ], profile_layout\\n );\\n\\nActual_Lay := RECORD\\n STRING ID;\\n STRING PREVIOUS_ID;\\n STRING TITLE;\\nEND;\\n\\nDS := DATASET([{'123','21', 'MR'}],Actual_Lay);\\n\\nNew_layout(DATASET(profile_layout) P) := RECORD\\n IFBLOCK(P(fieldname='ID')[1].field_include=TRUE)\\n STRING ID;\\n END;\\n IFBLOCK(P(fieldname='PREVIOUS_ID')[1].field_include=TRUE)\\n STRING PREVIOUS_ID;\\n END;\\n IFBLOCK(P(fieldname='TITLE')[1].field_include=TRUE)\\n STRING TITLE;\\n END;\\nEND;\\n\\nPROJECT(DS,New_Layout(Profile1));\\nPROJECT(DS,New_Layout(Profile2));
Using an IFBLOCK structure around each field makes every field conditional, and passing the profile to use as a parameter to the new RECORD structure allows you to just use a simple PROJECT to the new structure. Notice the two results are different given different passed profiles.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-09-24 12:23:10\" },\n\t{ \"post_id\": 32093, \"topic_id\": 8383, \"forum_id\": 10, \"post_subject\": \"Re: Dynamic Layout using a dataset\", \"username\": \"SChatman85\", \"post_text\": \"Hi Richard,\\n\\nThanks for the reply.\\n\\nI have a dataset which acts as a configuration file (profile), in which there is a BOOLEAN field that can be set to TRUE or FALSE.\\n\\nThis configuration will be the layout of a logical file containing data that I need to carry out actions on.\\n\\nWhat I want to do is change that logical file, removing all fields which are set to FALSE in the configuration.\\n\\n\\nMy plan was to take all the rows in the profile dataset, where the value is TRUE, then take the field_name attribute and make that a layout I could then use within a PROEJECT/TRANSFORM.\\n\\nHope that all makes sense!\\n\\nStuart\", \"post_time\": \"2020-09-24 07:46:21\" },\n\t{ \"post_id\": 32083, \"topic_id\": 8383, \"forum_id\": 10, \"post_subject\": \"Re: Dynamic Layout using a dataset\", \"username\": \"rtaylor\", \"post_text\": \"SChatman85,\\n\\nIn order to provide the best answer, I first need to know what exactly you're trying to accomplish. IOW, what problem are you trying to solve here? \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-09-23 19:15:17\" },\n\t{ \"post_id\": 32073, \"topic_id\": 8383, \"forum_id\": 10, \"post_subject\": \"Dynamic Layout using a dataset\", \"username\": \"SChatman85\", \"post_text\": \"Hi,\\n\\nI am trying to build a layout definition using information from within a dataset.\\n\\nThe below code should hopefully illustrate what I am trying to achieve, although it is in itself quite a messy way of trying to achieve it.\\n\\nThe code runs and produces the layout definition I expect - which is governed by the TRUE/FALSE filter in the profile dataset, but when I try and use it I run into the error:\\n\\nError: Constant expression expected (42, 12), 2071,
\\n\\nIs anyone able to either:\\n provide a better approach for achieving the layout\\n or, help resolve the above error\\n\\nThanks\\n\\nActual_Lay := RECORD\\n STRING ID;\\n STRING PREVIOUS_ID;\\n STRING TITLE;\\nEND;\\n\\nDS := DATASET([{'123','21', 'MR'}],Actual_Lay);\\nDS;\\n\\nprofile_layout := RECORD\\n STRING fieldname;\\n BOOLEAN field_include;\\nEND;\\n\\nprofile := DATASET([\\n {'ID',TRUE},\\n {'PREVIOUS_ID',TRUE},\\n {\\t'TITLE',FALSE}\\n ], profile_layout\\n );\\nprofile;\\n\\ntemp_lay := RECORD\\n profile;\\n STRING ecl_string;\\nEND;\\n\\nprofile(field_include=TRUE);\\ntemp_profile := PROJECT(profile(field_include=TRUE), TRANSFORM(temp_lay, SELF := LEFT; SELF := [];));\\n\\ntemp_lay t_field_append(temp_profile L, temp_profile R) := TRANSFORM\\n SELF.ecl_string := L.ecl_string + 'TYPEOF(DS.' + R.fieldname + ') ' + R.fieldname + ';';\\n SELF := [];\\nEND;\\n\\ntest := ITERATE(temp_profile, t_field_append(LEFT, RIGHT));\\ntest;\\noutput_layout := SORT(test, -LENGTH(ecl_string))[1];\\nOUTPUT('new_layout := RECORD ' + output_layout.ecl_string + ' END;');\\n\\n/* new_layout := RECORD \\n #EXPAND(output_layout.ecl_string)\\n END;\\n \\n ds_new := PROJECT(DS, TRANSFORM(new_layout, SELF := LEFT;));\\n ds_new;\\n*/
\", \"post_time\": \"2020-09-23 15:42:04\" },\n\t{ \"post_id\": 32443, \"topic_id\": 8433, \"forum_id\": 10, \"post_subject\": \"Re: Identifying positions of differences in two strings.\", \"username\": \"Allan\", \"post_text\": \"Thanks Again richard.\\nThis is going to be most useful in comparing layouts between environments and highlighting differences. \\n\\nYours\\n\\nAllan\", \"post_time\": \"2020-10-26 19:17:39\" },\n\t{ \"post_id\": 32433, \"topic_id\": 8433, \"forum_id\": 10, \"post_subject\": \"Re: Identifying positions of differences in two strings.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nSo I had a thought that both scenarios would be easier if the function returned just one record for each mismatched word, so here's the one replacement definition required to do that:\\n DiffWords := JOIN(ds1(WordNum IN SetDiffs),ds2(WordNum IN SetDiffs),\\n LEFT.WordNum = RIGHT.WordNum, \\n TRANSFORM({UNSIGNED WordNum,\\n {WordRec AND NOT WordNum} Lword,\\n {WordRec AND NOT WordNum} Rword},\\n SELF.Lword := LEFT,\\t\\t\\t\\t\\t\\n SELF.Rword := RIGHT,\\t\\t\\t\\t\\t\\n SELF := LEFT));\\t\\t\\t\\t\\t\\n
\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-10-26 18:38:46\" },\n\t{ \"post_id\": 32423, \"topic_id\": 8433, \"forum_id\": 10, \"post_subject\": \"Re: Identifying positions of differences in two strings.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nI would post-process the result (using WordNum) to find any contiguous differences, if necessary. That keeps this a simpler tool, useful for both cases. \\n\\nRichard\", \"post_time\": \"2020-10-26 16:01:35\" },\n\t{ \"post_id\": 32413, \"topic_id\": 8433, \"forum_id\": 10, \"post_subject\": \"Re: Identifying positions of differences in two strings.\", \"username\": \"Allan\", \"post_text\": \"Richard,\\n\\nThis is great.\\n\\nJust one minor point, if consecutive words are different, like:\\n
The Fox jumped over the lazy Dog.\\nThe Dog jumped over the layy Fox.\\n
\\nCurrently, the difference detected in 'lazy' and 'Dog' come out as distinct differences but really it would be nice if they were merged into one reference.\\nThere is enough information in the output for the user of this function to do his own merge (given offset and length), but this could be done for them.\\n\\nThanks Richard, all the best\\n\\nAllan\", \"post_time\": \"2020-10-26 15:55:52\" },\n\t{ \"post_id\": 32383, \"topic_id\": 8433, \"forum_id\": 10, \"post_subject\": \"Re: Identifying positions of differences in two strings.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nHere's my quick take on it:IMPORT Std;\\nWordDiffs(STRING s1,STRING s2,BOOLEAN pNoCase=FALSE) := FUNCTION\\n ins1 := IF(pNoCase=FALSE,s1,Std.Str.ToUpperCase(s1));\\n ins2 := IF(pNoCase=FALSE,s2,Std.Str.ToUpperCase(s2));\\n FindWord(STRING w,STRING s) := Std.Str.Find(s,w,1);\\n WordSet1 := Std.Str.Splitwords(ins1,' ');\\n WordSet2 := Std.Str.Splitwords(ins2,' ');\\n WordRec := {UNSIGNED WordNum,STRING word,UNSIGNED StartPos,UNSIGNED WordLen};\\n WordRec WordXF(INTEGER C, STRING s, SET OF STRING ws) := TRANSFORM\\n SELF.WordNum := C;\\n SELF.word := ws[C];\\n SELF.WordLen := LENGTH(ws[C]);\\n SELF.StartPos := FindWord(TRIM(ws[C] + ' ' + ws[C+1]),s); \\n END;\\n ds1 := DATASET(COUNT(WordSet1),WordXF(COUNTER, ins1, WordSet1));\\n ds2 := DATASET(COUNT(WordSet2),WordXF(COUNTER, ins2, WordSet2));\\n // RETURN ds1+ds2; //just to test positions\\n j := JOIN(ds1,ds2,\\n LEFT.WordNum=RIGHT.WordNum,\\n TRANSFORM({UNSIGNED WordNum,STRING diff},\\n SELF.WordNum := LEFT.WordNum,\\n SELF.diff := ROWDIFF(LEFT,RIGHT)))(diff<>'');\\n SetDiffs := SET(j,WordNum);\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n DiffWords := ds1(WordNum IN SetDiffs) + ds2(WordNum IN SetDiffs);\\t\\t\\t\\t\\t\\n RETURN SORT(DiffWords,WordNum);\\nEND;\\n\\nt1 := 'The Fox jumped over the lazy Dog.';\\nt2 := 'The Dog jumped over the lazy Fox.';\\nt3 := 'The fox jumped over the lazy Fox.';\\n\\nWordDiffs(t1,t2);\\nWordDiffs(t1,t3,TRUE);
I solved the "possible duplicate words" issue by looking for the position of the word and its following word (look at case insensitive and "THE"). Let me know if you see any issues I missed. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-10-26 14:06:54\" },\n\t{ \"post_id\": 32343, \"topic_id\": 8433, \"forum_id\": 10, \"post_subject\": \"Identifying positions of differences in two strings.\", \"username\": \"Allan\", \"post_text\": \"Hi Given two strings, e.g.\\n\\nThe Fox jumped over the lazy Dog.\\nThe Dog jumped over the lazy Fox.\\nI would like a function to return say a dataset of start position of difference and length of difference. e.g\\n\\nStart Position Length\\n5 3\\n30 3\\n
\\nThere should be some criteria to merge differences within words. In the example above Fox and Dog share a common letter, but it's the whole word that is different, not just the individual characters. Kind of Merge differences occurring between white space.\\nThe return format does not have to be exactly as I show above. Preferable it should be suitable for the 'data Visualizations' library, enabling differences to be highlighted. \\n\\nThere is always the EMBED option to drop into other languages and library set, but hey this should be doable in pure ECL?\\n(P.S. Case Sensitivity, include punctuation, collapse white space, that kind of thing, can just be options to the FUNCTION, they don't effect the basic approach much) \\n\\nYours\\n\\nAllan\", \"post_time\": \"2020-10-23 12:01:55\" },\n\t{ \"post_id\": 32753, \"topic_id\": 8553, \"forum_id\": 10, \"post_subject\": \"Re: CSV Headers\", \"username\": \"mo0926\", \"post_text\": \"Sure, will do. Thanks.\", \"post_time\": \"2020-12-17 18:51:34\" },\n\t{ \"post_id\": 32743, \"topic_id\": 8553, \"forum_id\": 10, \"post_subject\": \"Re: CSV Headers\", \"username\": \"rtaylor\", \"post_text\": \"mo0926,\\n\\nI don't need to see the data, but I would like to see your ECL code. You can send it to me in an email if it's company confidential.\\n\\nRichard\", \"post_time\": \"2020-12-17 18:47:35\" },\n\t{ \"post_id\": 32733, \"topic_id\": 8553, \"forum_id\": 10, \"post_subject\": \"Re: CSV Headers\", \"username\": \"mo0926\", \"post_text\": \"It hard to explain without visuals. I can't share the data. The outputs are fine in the ecl watch. The headers there checkout, but once you export onto a csv everything that used to be divided into 7 fields for example: datasetname, logicalkey, prev_version etc .... it is crunched together in line one: field line[datasetname, logicalkey, prev_version, etc..]. \\n\\nDoes that make sense?\", \"post_time\": \"2020-12-17 18:43:42\" },\n\t{ \"post_id\": 32723, \"topic_id\": 8553, \"forum_id\": 10, \"post_subject\": \"Re: CSV Headers\", \"username\": \"rtaylor\", \"post_text\": \"mo0926,How can I fix this issue in the ecl code?
Without looking at your code, the only suggestion I have is that the problem most likely lies in the RECORD structure of the record set you're writing to disk.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-12-17 18:31:43\" },\n\t{ \"post_id\": 32713, \"topic_id\": 8553, \"forum_id\": 10, \"post_subject\": \"CSV Headers\", \"username\": \"mo0926\", \"post_text\": \"Hello I hope someone can help with this. I recently created a build that generates stats that eventually get used in Power Bi. I need to layout the headers nicely for the Power Bi import. However, right now when I export to csv from the ecl watch, it only converts with 2 columns rather than 7. It looks like when I am writing the csv file it only creates 2 columns: column 1 and line1 with all of the data in line1.\\n\\nHow can I fix this issue in the ecl code?\", \"post_time\": \"2020-12-17 17:51:46\" },\n\t{ \"post_id\": 33253, \"topic_id\": 8673, \"forum_id\": 10, \"post_subject\": \"Re: Polling HPCC system for events\", \"username\": \"rtaylor\", \"post_text\": \"John,\\n\\nWhen you open ECL Watch, it opens on the Activity tab of that home page. \\n\\nJust to the right of that Activity tab is the Event Scheduler tab, which lists all the workunits that are currently waiting for events. \\n\\nOn that Event Scheduler tab you can:
\\nHTH,\\n\\nRichard\", \"post_time\": \"2021-03-24 17:59:08\" },\n\t{ \"post_id\": 33243, \"topic_id\": 8673, \"forum_id\": 10, \"post_subject\": \"Polling HPCC system for events\", \"username\": \"John Meier\", \"post_text\": \"Is there a way to poll the HPCC system for all the events that it is currently monitoring? An ECL job may execute a WAIT action looking for an event with no duration specified, so essentially HPCC will be monitoring for that event forever. Same thing with monitoring for a file. There doesn't seem to be a way to remove said monitoring or to even see if that monitoring is active or not. I can imagine that over time the system could get bogged down trying to monitor for items that are no longer going to trigger the events being monitored for.\", \"post_time\": \"2021-03-24 17:25:11\" },\n\t{ \"post_id\": 33663, \"topic_id\": 8723, \"forum_id\": 10, \"post_subject\": \"Re: Sort on Enumvalues rather than data.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\nYou need a mapping between your codes and a sort order.\\nThis can be achieved using a lookup, e.g. DICTIONARY.\\nSomething like this:\\nMyRec := {STRING3 Value1,STRING3 Category1, STRING3 Category2};\\nSomeFile := DATASET([{'NVS','T','ABC'}\\n ,{'NVS','C','ABC'}\\n ,{'A','X','BAC'}\\n ,{'B','G','PQR'}\\n ,{'A','B','AAC'}\\n ,{'NVS','T','AAA'}],MyRec);\\n\\nSortOrder := DATASET([{'AAA',1},{'ABC',2},{'BAC',3},{'AAC',4},{'PQR',5}],{STRING Code,UNSIGNED1 SortOrder});\\n\\nDict := DICTIONARY(SortOrder,{Code => SortOrder});\\n\\nSrtd_values_ds := sort(SomeFile,Dict[Category2].SortOrder);\\nSrtd_values_ds;\\n\\n
\\nDoes this fit your needs?\\nAllan\", \"post_time\": \"2021-06-21 09:29:07\" },\n\t{ \"post_id\": 33403, \"topic_id\": 8723, \"forum_id\": 10, \"post_subject\": \"Sort on Enumvalues rather than data.\", \"username\": \"harshdesai\", \"post_text\": \"Hi ALL ,\\nIn case have to sort on enum values rather the column values as need is in specific order\\nand which has to be consistent and in control rather to be dependent on data.\\nIs there a way to sort\\n\\nMyRec := {STRING3 Value1,STRING3 Category1, STRING3 Category2};\\nSomeFile := DATASET([{'NVS','T','ABC'},{'NVS','C','ABC'},{'A','X','BAC'},\\n{'B','G','PQR'},{'A','B','AAC'},{'NVS','T','AAA'}],MyRec);\\n\\nsrtvalues := enum(unsigned1,AAA, ABC, BAC,AAC,PQR);\\n//need to sort on values of 3rd field which is static but can add latter\\nSrtd_values_ds := sort(SomeFile,srtvalues.AAA);\", \"post_time\": \"2021-04-21 09:01:01\" },\n\t{ \"post_id\": 33993, \"topic_id\": 8952, \"forum_id\": 10, \"post_subject\": \"Re: Error when running logic for a condition that isn't met\", \"username\": \"rtaylor\", \"post_text\": \"Janet,\\n\\nHere's the link: http://track.hpccsystems.com\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2021-08-11 13:39:53\" },\n\t{ \"post_id\": 33983, \"topic_id\": 8952, \"forum_id\": 10, \"post_subject\": \"Re: Error when running logic for a condition that isn't met\", \"username\": \"janet.anderson\", \"post_text\": \"Can you please provide the link to submit a JIRA ticket for HPCC?\", \"post_time\": \"2021-08-11 13:08:30\" },\n\t{ \"post_id\": 33933, \"topic_id\": 8952, \"forum_id\": 10, \"post_subject\": \"Re: Error when running logic for a condition that isn't met\", \"username\": \"rtaylor\", \"post_text\": \"Janet,\\n\\nNo clue here. I think a JIRA is in order.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2021-08-09 17:01:43\" },\n\t{ \"post_id\": 33892, \"topic_id\": 8952, \"forum_id\": 10, \"post_subject\": \"Error when running logic for a condition that isn't met\", \"username\": \"janet.anderson\", \"post_text\": \"I get the following error for the code below: "Error: Omitted parameter 2 has no default value (8, 123), 2062, ". Since #TEXT(pCarrierGeoStateField) is '' because I didn't pass that parameter into my_mac, it shouldn't be trying to call Monthly_Shop_Report_Datacube.modZipStates.macGetStateSet(pCarrierGeoDS, pCarrierGeoStateField) and I shouldn't be getting this error. Can someone a) tell me what I am misunderstanding about ECL, b) tell me a workaround?\\n\\n\\nmy_mac(pCarrierGeoDS = '', pCarrierGeoZipField = '', pCarrierGeoStateField = '') := FUNCTIONMACRO\\n\\nd_shop00 := choosen(Monthly_Shop_Report_Datacube.Files('202106').d_cumulative_auto_shop_summary, 100);\\n\\nd_shop0 := \\n\\tMAP(#TEXT(pCarrierGeoDS) <> '' AND #TEXT(pCarrierGeoZipField) <> '' AND #TEXT(pCarrierGeoStateField) <> '' =>\\n\\t\\t\\t\\td_shop00(idl_shop_zip in Monthly_Shop_Report_Datacube.modZipStates.macGetZipSet(pCarrierGeoDS, pCarrierGeoZipField)\\n\\t\\t\\t\\t\\t\\t AND idl_shop_state in Monthly_Shop_Report_Datacube.modZipStates.macGetStateSet(pCarrierGeoDS, pCarrierGeoStateField)),\\n\\t\\t\\t// #TEXT(pCarrierGeoDS) <> '' AND #TEXT(pCarrierGeoZipField) <> '' =>\\n\\t\\t\\t\\t// d_shop00(idl_shop_zip in Monthly_Shop_Report_Datacube.modZipStates.macGetZipSet(pCarrierGeoDS, pCarrierGeoZipField)),\\n\\t\\t\\t// #TEXT(pCarrierGeoDS) <> '' AND #TEXT(pCarrierGeoStateField) <> '' =>\\n\\t\\t\\t\\t// d_shop00(idl_shop_state in Monthly_Shop_Report_Datacube.modZipStates.macGetStateSet(pCarrierGeoDS, pCarrierGeoStateField)),\\n\\t\\t\\td_shop00(idl_shop_state IN Monthly_Shop_Report_Datacube.Files().us_states_plus_dc));\\n\\nRETURN d_shop0;\\n\\nENDMACRO;\\n\\nmy_mac(Monthly_Shop_Report_Datacube.Files().i_zipcityst, Zip5);\\n
\\n\\nThis is W20210727-142504 on Alpha Dev if more info is needed.\", \"post_time\": \"2021-07-27 18:34:35\" },\n\t{ \"post_id\": 33943, \"topic_id\": 8953, \"forum_id\": 10, \"post_subject\": \"Re: Creating a sample with thousands of criteria.\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nYou might also do it this way:\\nIMPORT Std;\\nStd.Date.Date_t StartDate := 20210101;\\nStd.Date.Date_t EndDate := 20210110;\\nn := 10;\\n\\nnDays := Std.Date.DaysBetween(StartDate,EndDate) + 1;\\nNumRecs := nDays * n;\\nStd.Date.Days_t StartDateJ := Std.Date.FromGregorianDate(StartDate);\\n\\nOutRec := RECORD\\n Std.Date.Date_t ThisDate;\\n UNSIGNED1 WhichRec;\\n STRING10 OtherStuff;\\t\\nEND;\\nOutRec XF(INTEGER C) := TRANSFORM\\n Cmod := C % n;\\n Cday := IF(Cmod <> 0,TRUNCATE(C/10) + 1,TRUNCATE(C/10));\\n SELF.ThisDate := Std.Date.ToGregorianDate(StartDateJ + (Cday-1));\\n SELF.WhichRec := IF(Cmod <> 0,Cmod,n);\\n SELF.OtherStuff := 'junk ' + C;\\nEND;\\nds := DATASET(NumRecs,XF(COUNTER));\\nds;
\\n \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2021-08-09 17:45:35\" },\n\t{ \"post_id\": 33913, \"topic_id\": 8953, \"forum_id\": 10, \"post_subject\": \"Re: Creating a sample with thousands of criteria.\", \"username\": \"Allan\", \"post_text\": \"AH Of Course\\n\\nJust a inner JOIN against a dataset with list of dates with KEEP(n) as a qualifier.\", \"post_time\": \"2021-08-04 08:10:37\" },\n\t{ \"post_id\": 33903, \"topic_id\": 8953, \"forum_id\": 10, \"post_subject\": \"Creating a sample with thousands of criteria.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI have to create a sample DATASET where there are 'n' records for every day between 20160101 and 20210630 about 2008 days.\\nNow there are various ways I've thought of doing this.\\nI could write a MACRO to generate a CHOOSESETS that has a criteria for every day\\ne.g.\\n
x := CHOOESETS(inputDS,date = '20160101' => n\\n ,date = '20160102' => n\\netc\\n
\\nI could SORT by date then ITERATE retaining a counter and just SKIP when inputs counter passes 'n'.\\nBut one feels these are all to complicated and I'm missing a very simple way to do this.\\n\\nBy the way I can error if there are no records for a day by LEFT ONLY join on a datasets of complete date range, so that's not an issue.\\nAny Ideas?\\n\\nAllan\", \"post_time\": \"2021-08-03 14:12:24\" },\n\t{ \"post_id\": 34083, \"topic_id\": 8973, \"forum_id\": 10, \"post_subject\": \"Re: JOIN options\", \"username\": \"rtaylor\", \"post_text\": \"John,I had to use LOCAL because ds_001 was distributed (which I couldn't understand why since it was only 25 records in size).
I assume you thought that because the compiler gave you a warning saying you "used DISTRIBUTE without LOCAL" but it was only a warning, so you did not have to use LOCAL. \\nAnd despite it being smaller, it was on the LEFT side of the JOIN because it was attempting to pull a name from the RIGHT dataset when the KEY_FIELDs matched.
Since you're just looking for matches between the two (an inner JOIN), it doesn't matter which order the two datasets are in for that logic. However, it does matter when you're talking about very large datasets where one is considerably larger than the other. In that case, it's typical to put the large one as the LEFT dataset (1st parm) and the small one as the RIGHT (2nd parm). Since your datasets are both very small, it doesn't matter which order they're in.\\nI tried all of the JOIN options (with the forced LOCAL) and none matched. I distributed ds_002 (1,256 records), dropped the options, kept the LOCAL and it worked. I then dropped the DISTRIBUTEs, dropped the options, switched the LOCAL to HASH and it worked. I have since reworked the code so it doesn't DISTRIBUTE.
Because the record set has only 25 records, DISTRIBUTE is unnecessary, and putting it in the second parm means that SMART has fewer records to copy to every node if it decides to make it an ALL JOIN for you.\\nI was looking to see if my assumptions about why the options weren't working made sense or not.
No, your assumption that it was the number of records in the two record sets was incorrect. Basically, you were just making a common mistake -- making the whole thing too complex (over-thinking the issue). Most of the options on JOIN are meant for working with huge datasets. With your data, a simple inner JOIN is all you really need, without any other options, because there's too little data to see any difference in performance with or without SMART or ALL or ...\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2021-09-09 14:09:30\" },\n\t{ \"post_id\": 34073, \"topic_id\": 8973, \"forum_id\": 10, \"post_subject\": \"Re: JOIN options\", \"username\": \"John Meier\", \"post_text\": \"BTW - the reworked JOIN uses the two non-DISTRIBUTEd files along with the SMART option.\", \"post_time\": \"2021-09-09 13:53:20\" },\n\t{ \"post_id\": 34063, \"topic_id\": 8973, \"forum_id\": 10, \"post_subject\": \"Re: JOIN options\", \"username\": \"John Meier\", \"post_text\": \"I had to use LOCAL because ds_001 was distributed (which I couldn't understand why since it was only 25 records in size). And despite it being smaller, it was on the LEFT side of the JOIN because it was attempting to pull a name from the RIGHT dataset when the KEY_FIELDs matched. I tried all of the JOIN options (with the forced LOCAL) and none matched. I distributed ds_002 (1,256 records), dropped the options, kept the LOCAL and it worked. I then dropped the DISTRIBUTEs, dropped the options, switched the LOCAL to HASH and it worked. I have since reworked the code so it doesn't DISTRIBUTE. I was looking to see if my assumptions about why the options weren't working made sense or not.\", \"post_time\": \"2021-09-09 13:28:24\" },\n\t{ \"post_id\": 34043, \"topic_id\": 8973, \"forum_id\": 10, \"post_subject\": \"Re: JOIN options\", \"username\": \"rtaylor\", \"post_text\": \"John,Am I correct in thinking the record size of ds_001 vs. ds_002 (left vs. right) is the reason?
The first thing I notice is your use of the LOCAL option when every other option you try (SMART/LOOKUP/ALL) creates implicitly local operations anyway. So I would start by using SMART and losing the LOCAL and see if that works for you. Then go on to trying the LOOKUP and ALL options (also without LOCAL present).\\n\\nOne other thing: the typical use of JOIN would have the "large" dataset as the first parameter and the smaller as the second. So try switching the two around.\\n\\nI would also eliminate the DISTRIBUTE, since you originally wanted SMART to pick your best option anyway.\\n\\nLet me know what results you get with those changes.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2021-09-08 17:56:52\" },\n\t{ \"post_id\": 34023, \"topic_id\": 8973, \"forum_id\": 10, \"post_subject\": \"JOIN options\", \"username\": \"John Meier\", \"post_text\": \"I'm working with some code that is demonstrating some unusual behavior.\\nI have two datasets: ds_001 (built from a TABLE statement) with 25 records.\\n Primary field STRING06 KEYFIELD.\\n ds_002 DEDUP(SORT( sourceData, KEYFIELD), KEYFIELD); with 1,256 recs\\n Primary field STRING06 KEYFIELD.\\nds_001 is distributed by KEYFIELD. ds_002 is not.\\n\\nJOIN_DATA := JOIN( ds_001\\n , ds_002\\n , LEFT.KEYFIELD = RIGHT.KEYFIELD\\n , TRANSFORM( RECORDOF(LEFT)\\n , SELF.CO_NAME := RIGHT.CO_NAME;\\n SELF := LEFT;\\n )\\n , SMART\\n , LOCAL // because ds_001 is distributed\\n );\\nI end up with no records in JOIN_DATA. I have displayed both files and I have proven both files contain matching KEYFIELD data. I attempted changing the SMART to LOOKUP and to MANY - all produced an empty file (no matches). These options basically say the right recordset (if the count is low enough) will be copied to all the nodes before the JOIN.\\n\\nI think ds_002 having 1,256 records, this would be low enough for that to be true.\\n\\nI have come to the conclusion that because ds_001 has only 25 records, that SMART/LOOKYP/MANY is ignored and the JOIN is attempted as distributed and since ds_002 is not distributed, NOTHING will match. If I pull the DISTRIBUTE off ds_001 and JOIN / HASH, I get a 100% match in JOIN_DATA. Same thing if I DISTRIBUTE ds_002 by KEYFIELD: SMART/LOOKYP/MANY gets a 100% match in JOIN_DATA.\\n\\nAm I correct in thinking the record size of ds_001 vs. ds_002 (left vs. right) is the reason?\", \"post_time\": \"2021-09-07 20:54:07\" },\n\t{ \"post_id\": 34143, \"topic_id\": 9013, \"forum_id\": 10, \"post_subject\": \"Re: Joining Unrelated Datasets\", \"username\": \"rtaylor\", \"post_text\": \"mo0926,Hello, I was wondering if in ECL we have an easy way of combining two datasets without any relationship between the datasets?
The ONLY way to JOIN two datasets that actually have no relationship (in any language) would be to have a JOIN condition of TRUE. That would result in every record on the left matching every record on the right (i.e. if you have 10 recs on the left and 10 on the right you would end up with a 100 record result). Otherwise, you MUST have some data in both that can be used to match the records.\\nWhat would be the easiest way to add buildversion if the date falls between first_seen and last_seen?
Given this question, I have to assume that you have a date value that can be parsed from that buildversion string. Given that assumption, you could do it something like this://I'm assuming:\\n// all string dates are in YYYYMMDD format\\n// a date is in characters 10 through 17 of the buildversion -- YMMV :)\\nIMPORT Std;\\nBOOLEAN IsInRange(Std.Date.Date_t d, \\n Std.Date.Date_t f, \\n Std.Date.Date_t l) := \\n Std.Date.FromJulianDate(d) BETWEEN \\n Std.Date.FromJulianDate(f) AND\\n Std.Date.FromJulianDate(l); \\nJOIN(ds1,ds2,\\n IsInRange((Std.Date.Date_t)RIGHT.buildversion[10..17],\\n (Std.Date.Date_t)LEFT.first_seen,\\n (Std.Date.Date_t)LEFT.last_seen),\\n TRANSFORM(Output,SELF := LEFT,SELF := RIGHT));
\\nHTH,\\n\\nRichard\", \"post_time\": \"2021-10-12 17:58:39\" },\n\t{ \"post_id\": 34133, \"topic_id\": 9013, \"forum_id\": 10, \"post_subject\": \"Joining Unrelated Datasets\", \"username\": \"mo0926\", \"post_text\": \"Hello, I was wondering if in ECL we have an easy way of combining two datasets without any relationship between the datasets? \\n\\nFor example:\\n\\nlayoyt1 :=record\\nstring phone\\nstring first_seen\\nstring last_seen\\nEnd\\nds1:=table(infile1, layout1, phone, first_seen, last_seen);\\n\\nlayout2 := record\\nstring buildversion;\\nend;\\nds2:= table(infile2, layout2, buildversion);\\n\\nWhat would be the easiest way to add buildversion if the date falls between first_seen and last_seen?\\n\\nOutput := record\\nstring phone\\nstring buildversion\\nstring first_seen\\nstring last_seen\\nEnd\\n\\nThanks for your help in advance.\", \"post_time\": \"2021-10-12 14:46:11\" },\n\t{ \"post_id\": 34243, \"topic_id\": 9033, \"forum_id\": 10, \"post_subject\": \"Re: Escaping Characters that cause issues in XML\", \"username\": \"Allan\", \"post_text\": \"ouch!\", \"post_time\": \"2021-10-20 17:16:52\" },\n\t{ \"post_id\": 34233, \"topic_id\": 9033, \"forum_id\": 10, \"post_subject\": \"Re: Escaping Characters that cause issues in XML\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nYes, I think it is fairly new -- been in the language now about 10+ years \\n\\nRichard\", \"post_time\": \"2021-10-20 15:19:57\" },\n\t{ \"post_id\": 34223, \"topic_id\": 9033, \"forum_id\": 10, \"post_subject\": \"Re: Escaping Characters that cause issues in XML\", \"username\": \"Allan\", \"post_text\": \"Ah Richard,\\n\\nWorks a treat!\\n\\nThat's a new built-in function to me.\\n\\nWill teach me to read the released version of the ECL ref manual!\\n\\nYours\\n\\nAllan\", \"post_time\": \"2021-10-20 15:16:20\" },\n\t{ \"post_id\": 34213, \"topic_id\": 9033, \"forum_id\": 10, \"post_subject\": \"Re: Escaping Characters that cause issues in XML\", \"username\": \"rtaylor\", \"post_text\": \"Allen, \\n\\nHave you tried the XMLENCODE() function?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2021-10-20 11:57:47\" },\n\t{ \"post_id\": 34203, \"topic_id\": 9033, \"forum_id\": 10, \"post_subject\": \"Escaping Characters that cause issues in XML\", \"username\": \"Allan\", \"post_text\": \"Hi Anyone,\\nThis question must have been asked before on this forum, but I can't find it.\\nIs there a function either in the STD or out there somewhere that escapes all those characters that are used as structure identifiers in XML?\\ne.g.\\n
\\n< & > ' "\\n<&>'"\\n
\\nSo given a a string\\nAllan's
\\nwould return \\nAllan's
\\nYours\\nAllan\", \"post_time\": \"2021-10-20 10:36:53\" },\n\t{ \"post_id\": 34413, \"topic_id\": 9073, \"forum_id\": 10, \"post_subject\": \"Re: Smart Join\", \"username\": \"rtaylor\", \"post_text\": \"HPCC_KK,\\n\\nJust to test it, I created a SET containing 84,140 elements (a 10% sampling of the unique ID field from an 841,400 record dataset) then used that SET to filter the original dataset from which it came. It ran successfully in a bit over a minute on my 1-node VM environment. \\n\\nSo, IMO if it works well with an 84K SET, then a 10K SET will also work nicely, so I would personally still use that method.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2021-11-16 18:34:07\" },\n\t{ \"post_id\": 34403, \"topic_id\": 9073, \"forum_id\": 10, \"post_subject\": \"Re: Smart Join\", \"username\": \"HPCC_KK\", \"post_text\": \"CorrectionIndex has 300records as of last week, it will grow rapidly after few months and it could be 10k+ records. I dont think SET is not an option for 10+ records. I am thinking of a lookup join or hash or smart join. Please suggest.\\n\\nThanks Richard.\", \"post_time\": \"2021-11-16 03:03:29\" },\n\t{ \"post_id\": 34393, \"topic_id\": 9073, \"forum_id\": 10, \"post_subject\": \"Re: Smart Join\", \"username\": \"rtaylor\", \"post_text\": \"HPCC_KK,\\n\\nPersonally, given that your CorrectionIndex only has 300 records and is expected to grow slowly, I would not use JOIN at all. \\n\\nI'd do it more like this:SetCorrectionKey := SET(CorrectionIndex,DID);\\ncheckCorrectionKey := InputFile(DID IN SetCorrectionKey);
\\nYour inner JOIN is basically accomplishing exactly the same thing, since your TRANSFORM(LEFT) is just going to give you the records from InputFile that match the set of DID fields in the CorrectionIndex.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2021-11-15 19:35:12\" },\n\t{ \"post_id\": 34363, \"topic_id\": 9073, \"forum_id\": 10, \"post_subject\": \"Smart Join\", \"username\": \"HPCC_KK\", \"post_text\": \"checkCorrectionKey := join(InputFile, CorrectionIndex,\\n keyed(left.DID = right.DID),\\n transform(left), smart);\\n\\t\\t\\t\\t\\t\\t \\n"InputFile" may have one record or 15billion records( as per one dataset we have today in prod)\\n\\n"CorrectionIndex" has currently 300records.. this would eventually grow but not too fast or too big.\\n\\nWith this said, if smart option a good choice in the join or any other suggestions?\\n\\nAlso, when tested this join it gave warning:\\n\\nWarning: Smart specified on an unfiltered keyed join - was this intended? \", \"post_time\": \"2021-11-15 17:41:45\" },\n\t{ \"post_id\": 34593, \"topic_id\": 9153, \"forum_id\": 10, \"post_subject\": \"Re: Sorting a child dataset\", \"username\": \"rtaylor\", \"post_text\": \"John,\\n\\nGlad you found a solution. \\n\\nRichard\", \"post_time\": \"2022-01-04 18:42:43\" },\n\t{ \"post_id\": 34573, \"topic_id\": 9153, \"forum_id\": 10, \"post_subject\": \"Re: Sorting a child dataset\", \"username\": \"John Meier\", \"post_text\": \"I have found a solution: the GROUP function.\\n\\nI first PROJECT the 6.5+billion records into the smaller layout, then DISTRIBUTE the data by the attributes that would cluster related data together on the same node. I then do a LOCAL SORT and GROUP. Now I can do a LOCAL PROJECT where the TRANSFORM sorts the child dataset. It finished in 6:10.004\", \"post_time\": \"2022-01-03 15:26:46\" },\n\t{ \"post_id\": 34563, \"topic_id\": 9153, \"forum_id\": 10, \"post_subject\": \"Sorting a child dataset\", \"username\": \"John Meier\", \"post_text\": \"I have a file which is approximately 6.5 billion records (and growing) - each record has 70+ fields with a child dataset (a 4 field record occurring up to 18 times).\\nI can PROJECT the file into a shorter layout (10 fields and the child dataset) then DISTRIBUTE. I can then SORT the records so they are into groups. However, if I attempt to SORT the child dataset, I get a SYSTEM 4 failure. When I look at the graph, the source read has exploded well over 30+ billion (adding the up to 18 reads per record) until THOR aborts the job.\\n\\nSay the child dataset has a layout of:\\n {string4 code, unsigned4 cost1_limit, unsigned4 cost2_limit}\\nso say I have:\\nPREV.RECORD ChilDS[{'AA', 50000, 100000}, {'BB', 20000, 50000} ] and a \\nCURR.RECORD ChilDS[{'BB', 20000, 50000}, {'AA', 50000, 100000}]\\n\\nI could not compare PREV.RECORD.ChilDS = CURR.RECORD ChilDS as true. Even though they contain the same values, their order is different. If I had SORTed on code, then it would have tested true.\\n\\nGiven the size of the source file, is there a different way to SORT the contents of the child dataset?\", \"post_time\": \"2021-12-30 20:46:33\" },\n\t{ \"post_id\": 34711, \"topic_id\": 9161, \"forum_id\": 10, \"post_subject\": \"Re: WsSql user friendly GUI interface\", \"username\": \"rodrigo.pastrana\", \"post_text\": \"Hi, I'm glad we could help. Thanks for posting, and definitely let us know if you have any other question/concern. Rodrigo.\", \"post_time\": \"2022-01-19 14:04:01\" },\n\t{ \"post_id\": 34701, \"topic_id\": 9161, \"forum_id\": 10, \"post_subject\": \"Re: WsSql user friendly GUI interface\", \"username\": \"rfernandez2007\", \"post_text\": \"Hi Rodrigo,\\nNice workaround. I'll keep it in mind.\\nThank you very much!!\\nWarm Regards\", \"post_time\": \"2022-01-18 14:28:55\" },\n\t{ \"post_id\": 34691, \"topic_id\": 9161, \"forum_id\": 10, \"post_subject\": \"Re: WsSql user friendly GUI interface\", \"username\": \"rodrigo.pastrana\", \"post_text\": \"Hi, since the WsSQL jobs are processed as ECL, there should be a resulting workunit you can access via ECLWatch. The WsSQL result should report the Workunit ID which you can use to view the results via the ECLWatch tools. Thanks.\", \"post_time\": \"2022-01-18 14:03:59\" },\n\t{ \"post_id\": 34631, \"topic_id\": 9161, \"forum_id\": 10, \"post_subject\": \"WsSql user friendly GUI interface\", \"username\": \"rfernandez2007\", \"post_text\": \"Hi,\\nIs there any user-friendly interface to use with wssql? (for a non-developer type of user)\\nThe default one, on port 8510 is a bit basic. \\nFor example, as far as I could test, it returns results in a raw XML format (as opposed to the WsECL that shows a pretty nice table)\\n\\nI understand that the feature is mainly meant to be used programmatically, but perhaps someone can recommend a way of working with it in a more interactive way with some external tool like the ones that exist for relational databases, that for example presents the results as a table, and allows to export the resultset as a csv, or other kind of flat-file.\\n\\nOr any kind of GUI that allows accessing the end-point and retrieving the results in a graphical way.\\nThank you!\", \"post_time\": \"2022-01-13 00:43:28\" },\n\t{ \"post_id\": 34951, \"topic_id\": 9241, \"forum_id\": 10, \"post_subject\": \"Re: HSQL - Unable to locate ECL Client Tools\", \"username\": \"rfernandez2007\", \"post_text\": \"Hi DSC,\\nThank you very much for your answer.\\nTo make it simple, I reinstalled everything without modifying a thing in the defaults. Especially the default path for the clienttools.\\nAnd, it worked as expected. So, thank you very much!!!\\n\\nOne friendly suggestion, I lost many hours trying to make this work, unaware of this pre-existing issue. So to prevent this from happening to other people I kindly suggest someone update the documentation for HSQL, making this limitation clear, and required for the installation to work properly\\n\\nTo answer your other questions>\\n Windows 10\\n hpccsystems-eclide-community_8.4.12-1Windows-i386\\n\\nWarm regards!\\nRicardo\", \"post_time\": \"2022-02-08 17:05:10\" },\n\t{ \"post_id\": 34941, \"topic_id\": 9241, \"forum_id\": 10, \"post_subject\": \"Re: HSQL - Unable to locate ECL Client Tools\", \"username\": \"DSC\", \"post_text\": \"Hi Ricardo,\\n\\nThe problem you describe does not sound like it is related to HSQL. VS Code should be invoking the ECL extension for any .ecl file, no matter how it is created. And it sounds like that is indeed happening, based on the error message of not finding the client tools.\\n\\nA few questions:\\n\\n* What operating system are you working with?\\n* What version of the client tools did you install?\\n* Did you install the client tools in a location other than the one suggested by the installer?\\n\\nWe had a recent issue relating to that last question, for Windows systems (https://track.hpccsystems.com/browse/HPCC-26845). This may be related to your environment.\\n\\nDSC\", \"post_time\": \"2022-02-08 16:15:40\" },\n\t{ \"post_id\": 34891, \"topic_id\": 9241, \"forum_id\": 10, \"post_subject\": \"HSQL - Unable to locate ECL Client Tools\", \"username\": \"rfernandez2007\", \"post_text\": \"Hi,\\nHow are you?\\nI'm giving HSQL a try (https://github.com/hpcc-systems/HSQL), as I found it very interesting, and the whole installation in VSCode went fine (except that I already had the clienttools installed so I skipped this step)\\nNow when I want to run the ecl from the SimpleTest.ecl I get "Unable to locate ECL Client Tools."\\nWhere should I configure the extension to be able to find the already installed client tools?\\nThank you!!!\", \"post_time\": \"2022-02-03 19:55:32\" },\n\t{ \"post_id\": 34991, \"topic_id\": 9251, \"forum_id\": 10, \"post_subject\": \"Re: Despray an index file for backup\", \"username\": \"rtaylor\", \"post_text\": \"Srini,\\n\\nDespray/re-spray to new cluster won't work with an index. I'd suggest you ramp up your "backup" cluster then try the Copy feature in ECL Watch and see if that will work for you.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2022-02-08 20:20:10\" },\n\t{ \"post_id\": 34931, \"topic_id\": 9251, \"forum_id\": 10, \"post_subject\": \"Despray an index file for backup\", \"username\": \"omnibuzz\", \"post_text\": \"We have a situation where we have a multipart index in a Roxie cluster that we want to backup. Is there a way we can despray the index into landing zone and bring up a new cluster and respray it there. If not, are there any suggestions? We are on the cloud and we only have ephemeral storage and no access to NAS volumes.\\nRegards,\\nSrini\", \"post_time\": \"2022-02-08 15:39:57\" },\n\t{ \"post_id\": 35061, \"topic_id\": 9261, \"forum_id\": 10, \"post_subject\": \"Re: HSQL - Tables - how to load from .csv and logical files?\", \"username\": \"Bahar\", \"post_text\": \"Hi Ricardo, \\nPlease try following: \\n\\nexport simpleLayout = CREATE TABLE (\\n personID INTEGER,\\n name string,\\n age integer,\\n wage real,\\n hasHouse integer\\n);\\n\\nexport simpleTable = select * from '~simpledata.csv' type csv layout simpleLayout as TestMe offset 1;\", \"post_time\": \"2022-02-09 21:25:44\" },\n\t{ \"post_id\": 35021, \"topic_id\": 9261, \"forum_id\": 10, \"post_subject\": \"Re: HSQL - Tables - how to load from .csv and logical files?\", \"username\": \"DSC\", \"post_text\": \"Thank you very much for the feedback, Ricardo! HSQL is a work-in-progress so some rough edges are to be expected, but some of the items you found (e.g. aliasing) are really a little rougher than they should be. I will get this feedback incorporated into our issue tracker.\\n\\nThanks again!\\n\\nDan\", \"post_time\": \"2022-02-09 13:01:12\" },\n\t{ \"post_id\": 35001, \"topic_id\": 9261, \"forum_id\": 10, \"post_subject\": \"Re: HSQL - Tables - how to load from .csv and logical files?\", \"username\": \"rfernandez2007\", \"post_text\": \"Hi Dan,\\nIndeed it helps!\\nMy apologies for not having found these examples in the project tree before. Although as you said the syntax is different in more than one aspect.\\n\\nI tried the code and it complained about the file not having a table alias on this line:\\n
export simpleTable = select * from '~simpledata.csv' as aliasname type csv layout simpleLayout offset 1;
\\n\\n\\n[{\\n\\t"resource": "/f:/DataLake/HPCCSystems/HSQL-Projects/LoadFile.hsql",\\n\\t"owner": "_generated_diagnostic_collection_name_#1",\\n\\t"severity": 2,\\n\\t"message": "No TABLE alias provided, using __r_action_0",\\n\\t"startLineNumber": 9,\\n\\t"startColumn": 36,\\n\\t"endLineNumber": 9,\\n\\t"endColumn": 70\\n}]
\\n\\nAnd as expected this is the ecl code generated:\\n\\nexport LoadFile := MODULE\\nEXPORT simpleLayout := {INTEGER personid,STRING name,INTEGER age,REAL wage,INTEGER hashouse};\\n__r_action_1 := FUNCTION\\n__r_action_0 := DATASET('~simpledata.csv',simpleLayout,CSV);\\n__r_action_2 := TABLE(__r_action_0,{ __r_action_0 });\\n__r_action_3 := __r_action_2[2..];\\nRETURN __r_action_3;\\nEND;\\nEXPORT simpleTable := __r_action_1;\\nEND;
\\n\\nI couldn't find a way to add that alias. Using as aliasname triggers a lot of other errors: (way above my understanding)\\n\\n[{\\n\\t"resource": "/f:/DataLake/HPCCSystems/HSQL-Projects/LoadFile.hsql",\\n\\t"owner": "_generated_diagnostic_collection_name_#1",\\n\\t"severity": 8,\\n\\t"message": "No actions can be used if shared or export is used",\\n\\t"startLineNumber": 1,\\n\\t"startColumn": 1,\\n\\t"endLineNumber": 10,\\n\\t"endColumn": 1\\n},{\\n\\t"resource": "/f:/DataLake/HPCCSystems/HSQL-Projects/LoadFile.hsql",\\n\\t"owner": "_generated_diagnostic_collection_name_#1",\\n\\t"severity": 8,\\n\\t"message": "no viable alternative at input ''simpledata.csv'as'",\\n\\t"startLineNumber": 9,\\n\\t"startColumn": 53,\\n\\t"endLineNumber": 9,\\n\\t"endColumn": 53\\n},{\\n\\t"resource": "/f:/DataLake/HPCCSystems/HSQL-Projects/LoadFile.hsql",\\n\\t"owner": "_generated_diagnostic_collection_name_#1",\\n\\t"severity": 8,\\n\\t"message": "mismatched input 'as' expecting SEMICOLON",\\n\\t"startLineNumber": 9,\\n\\t"startColumn": 53,\\n\\t"endLineNumber": 9,\\n\\t"endColumn": 53\\n},{\\n\\t"resource": "/f:/DataLake/HPCCSystems/HSQL-Projects/LoadFile.hsql",\\n\\t"owner": "_generated_diagnostic_collection_name_#1",\\n\\t"severity": 8,\\n\\t"message": "mismatched input 'type' expecting '='",\\n\\t"startLineNumber": 9,\\n\\t"startColumn": 67,\\n\\t"endLineNumber": 9,\\n\\t"endColumn": 67\\n},{\\n\\t"resource": "/f:/DataLake/HPCCSystems/HSQL-Projects/LoadFile.hsql",\\n\\t"owner": "_generated_diagnostic_collection_name_#1",\\n\\t"severity": 8,\\n\\t"message": "mismatched input 'offset' expecting '='",\\n\\t"startLineNumber": 9,\\n\\t"startColumn": 96,\\n\\t"endLineNumber": 9,\\n\\t"endColumn": 96\\n}]
\\n\\nI give you all this information just in case it helps improving something.\\n\\nIn summary:\\n\\n1. I compiled it anyway although it has the alias error.\\n2. It generated the ugly .ecl file\\n3. I defined a kind of BWR to execute the definitions\\n4. I run the files a few times with no results, until I realized the file was expected to be in the cluster and not locally (I felt dumb for a few seconds )\\n5. I uploaded the .csv to the landing zone and sprayed it\\n6. Then it worked\\n\\nMy conclusion is that for using this you still have to have your share of ECL and HPCCSystems knowledge. (mine being still too weak)\\n\\nNow I will step forward into generating visuals and doing some ML. So, you can bet you'll here from me again
.\\nAt least I hope it helps someone else running into the same problem.\\n\\nThank you very much!!!\\nAll the best !!\", \"post_time\": \"2022-02-09 00:26:00\" },\n\t{ \"post_id\": 34971, \"topic_id\": 9261, \"forum_id\": 10, \"post_subject\": \"Re: HSQL - Tables - how to load from .csv and logical files?\", \"username\": \"DSC\", \"post_text\": \"Hi Ricardo,\\n\\nThe following should work, assuming you have a CSV file with the given name:\\n\\n
\\nexport simpleLayout = CREATE TABLE (\\n personID INTEGER,\\n name string,\\n age integer,\\n wage real,\\n hasHouse integer\\n);\\n\\nexport simpleTable = select * from '~hsql::testfiles::simpledata.csv' type csv layout simpleLayout offset 1;\\n
\\n\\nThis was taken from one of the test files, but it was also modified (the file used LAYOUT as a record definition keyword rather than CREATE TABLE and the syntax was modified recently).\\n\\nHope this helps!\\n\\nDan\", \"post_time\": \"2022-02-08 19:27:31\" },\n\t{ \"post_id\": 34961, \"topic_id\": 9261, \"forum_id\": 10, \"post_subject\": \"HSQL - Tables - how to load from .csv and logical files?\", \"username\": \"rfernandez2007\", \"post_text\": \"Hi,\\n\\nCould anybody already working with this tool be so kind to provide (or point me to) a couple of small code examples on how to load flat csv or json files to a table, and the same with logical files inside HPCCSystems?\\nI couldn't find any complete example of these actions in the current documentation.\\n\\nIt will be much appreciated.\\nThank you !!!\\nWarm regards\\nRicardo\", \"post_time\": \"2022-02-08 17:12:53\" },\n\t{ \"post_id\": 35041, \"topic_id\": 9271, \"forum_id\": 10, \"post_subject\": \"Re: ECL Visualizer Bundle - cannot be parsed as a bundle\", \"username\": \"rfernandez2007\", \"post_text\": \"Hi Gordon,\\n\\nI followed your advice. (using 7.x)\\nIt worked !!\\n\\nThank you!!\\nWarm regards\\nRicardo\", \"post_time\": \"2022-02-09 14:44:18\" },\n\t{ \"post_id\": 35031, \"topic_id\": 9271, \"forum_id\": 10, \"post_subject\": \"Re: ECL Visualizer Bundle - cannot be parsed as a bundle\", \"username\": \"gsmith\", \"post_text\": \"This looks like its an issue with the "ecl bundle" command line program in the 8.x client tools.\\n\\nAs a workaround you can install 7.x and use it to install the bundle.\\n\\nTicket ref: https://track.hpccsystems.com/projects/ ... HPCC-27157\", \"post_time\": \"2022-02-09 13:58:14\" },\n\t{ \"post_id\": 35011, \"topic_id\": 9271, \"forum_id\": 10, \"post_subject\": \"ECL Visualizer Bundle - cannot be parsed as a bundle\", \"username\": \"rfernandez2007\", \"post_text\": \"Hi,\\n\\nI'm trying to install the Visualizer bundle and I'm getting the following:\\n\\nIn Ubuntu 20.04 - running as root\\n\\n\\nTrying to install Visualizer\\n\\nroot@vmi729290:~# ecl bundle install -v https://github.com/hpcc-systems/Visualizer.git\\nUsing eclcc path /opt/HPCCSystems/bin/eclcc\\nRunning /opt/HPCCSystems/bin/eclcc --nologfile -showpaths\\neclcc output:\\nCL_PATH=/usr\\nECLCC_ECLBUNDLE_PATH=/root/.HPCCSystems/bundles/\\nECLCC_ECLREPO_PATH=/root/.HPCCSystems/repos/\\nECLCC_ECLINCLUDE_PATH=.\\nECLCC_ECLLIBRARY_PATH=/opt/HPCCSystems/share/ecllibrary/\\nECLCC_INCLUDE_PATH=/opt/HPCCSystems/componentfiles/cl/include\\nECLCC_LIBRARY_PATH=/opt/HPCCSystems/lib:/opt/HPCCSystems/plugins:/opt/HPCCSystems/versioned/python3\\nECLCC_PLUGIN_PATH=/opt/HPCCSystems/plugins:/opt/HPCCSystems/versioned/python3\\nHPCC_FILEHOOKS_PATH=/opt/HPCCSystems/filehooks\\n\\nmkdir /tmp/tmp.qh2xn2\\nRunning git clone --depth=1 https://github.com/hpcc-systems/Visualizer.git "/tmp/tmp.qh2xn2/Visualizer"\\nRunning /opt/HPCCSystems/bin/eclcc - --nologfile --nostdinc -Me --nobundles "-I/tmp/tmp.qh2xn2/Visualizer"\\nwith input IMPORT Visualizer.Bundle as B; [ (UTF8) B.name, (UTF8) B.version, B.description, B.license, B.copyright ] + [ (UTF8) COUNT(b.authors) ] + B.authors + [ (UTF8) COUNT(B.dependsOn) ] + B.dependsOn + [ (UTF8) #IFDEFINED(B.platformVersion, '')]\\n/opt/HPCCSystems/bin/eclcc return code was 2\\n(0,0): error C3000: Internal error: Expected a version number in the url 'https://registry.npmjs.org/@rollup/plugin-alias/-/plugin-alias-3.1.8.tgz'\\n1 error, 0 warning\\n\\neclcc reported:\\n\\nhttps://github.com/hpcc-systems/Visualizer.git cannot be parsed as a bundle\\n\\n
\\n\\n***************************************************************************************\\n\\nTrying to install any other bundle\\n\\n\\nroot@vmi729290:~# ecl bundle install -v https://github.com/hpcc-systems/MySqlImport\\nUsing eclcc path /opt/HPCCSystems/bin/eclcc\\nRunning /opt/HPCCSystems/bin/eclcc --nologfile -showpaths\\neclcc output:\\nCL_PATH=/usr\\nECLCC_ECLBUNDLE_PATH=/root/.HPCCSystems/bundles/\\nECLCC_ECLREPO_PATH=/root/.HPCCSystems/repos/\\nECLCC_ECLINCLUDE_PATH=.\\nECLCC_ECLLIBRARY_PATH=/opt/HPCCSystems/share/ecllibrary/\\nECLCC_INCLUDE_PATH=/opt/HPCCSystems/componentfiles/cl/include\\nECLCC_LIBRARY_PATH=/opt/HPCCSystems/lib:/opt/HPCCSystems/plugins:/opt/HPCCSystems/versioned/python3\\nECLCC_PLUGIN_PATH=/opt/HPCCSystems/plugins:/opt/HPCCSystems/versioned/python3\\nHPCC_FILEHOOKS_PATH=/opt/HPCCSystems/filehooks\\n\\nmkdir /tmp/tmp.oO8zLb\\nRunning git clone --depth=1 https://github.com/hpcc-systems/MySqlImport "/tmp/tmp.oO8zLb/MySqlImport"\\nRunning /opt/HPCCSystems/bin/eclcc - --nologfile --nostdinc -Me --nobundles "-I/tmp/tmp.oO8zLb/MySqlImport"\\nwith input IMPORT MySqlImport.Bundle as B; [ (UTF8) B.name, (UTF8) B.version, B.description, B.license, B.copyright ] + [ (UTF8) COUNT(b.authors) ] + B.authors + [ (UTF8) COUNT(B.dependsOn) ] + B.dependsOn + [ (UTF8) #IFDEFINED(B.platformVersion, '')]\\nBundle info from ECL compiler: [U'MySqlImport', U'1.0.0', U'Create record and dataset definitions from MySQL tables', U'http://www.apache.org/licenses/LICENSE-2.0', U'Copyright (C) 2015 HPCC Systems', U'1', U'Gavin Halliday', U'0', U'']\\n\\nInstalling bundle MySqlImport version 1.0.0\\nUsing bundle path /root/.HPCCSystems/bundles/\\ncp /tmp/tmp.oO8zLb/MySqlImport/MySqlImport.ecl /root/.HPCCSystems/bundles/_versions/MySqlImport/V1_0_0/MySqlImport.ecl\\ncp /tmp/tmp.oO8zLb/MySqlImport/LICENSE /root/.HPCCSystems/bundles/_versions/MySqlImport/V1_0_0/LICENSE\\ncp /tmp/tmp.oO8zLb/MySqlImport/README.md /root/.HPCCSystems/bundles/_versions/MySqlImport/V1_0_0/README.md\\nMySqlImport 1.0.0 Create record and dataset definitions from MySQL tables\\nInstallation complete\\n\\n
\\n\\nIn Windows 10 - running in an elevated cmd\\n\\nTrying to install Visualizer\\n----------------------------\\n\\nC:\\\\Users\\\\Ric\\\\Downloads>ecl bundle install -v https://github.com/hpcc-systems/Visualizer.git\\nUsing eclcc path C:\\\\Program Files (x86)\\\\HPCCSystems\\\\8.4.12\\\\clienttools\\\\bin\\\\eclcc\\nRunning C:\\\\Program Files (x86)\\\\HPCCSystems\\\\8.4.12\\\\clienttools\\\\bin\\\\eclcc --nologfile -showpaths\\neclcc output:\\nCL_PATH=C:\\\\Program Files (x86)\\\\HPCCSystems\\\\8.4.12\\\\clienttools\\\\componentfiles\\\\cl\\nECLCC_ECLBUNDLE_PATH=C:\\\\Users\\\\Ric\\\\AppData\\\\Roaming\\\\HPCCSystems\\\\bundles\\\\\\nECLCC_ECLREPO_PATH=C:\\\\Users\\\\Ric\\\\AppData\\\\Roaming\\\\HPCCSystems\\\\repos\\\\\\nECLCC_ECLINCLUDE_PATH=.\\nECLCC_ECLLIBRARY_PATH=C:\\\\Program Files (x86)\\\\HPCCSystems\\\\8.4.12\\\\clienttools\\\\share\\\\ecllibrary\\\\\\nECLCC_INCLUDE_PATH=C:\\\\Program Files (x86)\\\\HPCCSystems\\\\8.4.12\\\\clienttools\\\\componentfiles\\\\cl\\\\include\\nECLCC_LIBRARY_PATH=C:\\\\Program Files (x86)\\\\HPCCSystems\\\\8.4.12\\\\clienttools\\\\lib;C:\\\\Program Files (x86)\\\\HPCCSystems\\\\8.4.12\\\\clienttools\\\\plugins\\nECLCC_PLUGIN_PATH=C:\\\\Program Files (x86)\\\\HPCCSystems\\\\8.4.12\\\\clienttools\\\\plugins\\nHPCC_FILEHOOKS_PATH=C:\\\\Program Files (x86)\\\\HPCCSystems\\\\8.4.12\\\\clienttools\\\\filehooks\\n\\nmkdir C:\\\\Users\\\\Ric\\\\AppData\\\\Local\\\\Temp\\\\\\\\tmp.000041\\nRunning git clone --depth=1 https://github.com/hpcc-systems/Visualizer.git "C:\\\\Users\\\\Ric\\\\AppData\\\\Local\\\\Temp\\\\\\\\tmp.000041\\\\Visualizer"\\nRunning C:\\\\Program Files (x86)\\\\HPCCSystems\\\\8.4.12\\\\clienttools\\\\bin\\\\eclcc - --nologfile --nostdinc -Me --nobundles "-IC:\\\\Users\\\\Ric\\\\AppData\\\\Local\\\\Temp\\\\\\\\tmp.000041\\\\Visualizer"\\nwith input IMPORT Visualizer.Bundle as B; [ (UTF8) B.name, (UTF8) B.version, B.description, B.license, B.copyright ] + [ (UTF8) COUNT(b.authors) ] + B.authors + [ (UTF8) COUNT(B.dependsOn) ] + B.dependsOn + [ (UTF8) #IFDEFINED(B.platformVersion, '')]\\nC:\\\\Program Files (x86)\\\\HPCCSystems\\\\8.4.12\\\\clienttools\\\\bin\\\\eclcc return code was 2\\n(0,0): error C3000: Internal error: Expected a version number in the url 'https://registry.npmjs.org/@rollup/plugin-alias/-/plugin-alias-3.1.8.tgz'\\n1 error, 0 warning\\n\\neclcc reported:\\n\\nhttps://github.com/hpcc-systems/Visualizer.git cannot be parsed as a bundle
\\n\\nTrying to install any other bundle\\n----------------------------------\\n\\nC:\\\\Users\\\\Ric\\\\Downloads> ecl bundle install -v https://github.com/hpcc-systems/MySqlImport\\nUsing eclcc path C:\\\\Program Files (x86)\\\\HPCCSystems\\\\8.4.12\\\\clienttools\\\\bin\\\\eclcc\\nRunning C:\\\\Program Files (x86)\\\\HPCCSystems\\\\8.4.12\\\\clienttools\\\\bin\\\\eclcc --nologfile -showpaths\\neclcc output:\\nCL_PATH=C:\\\\Program Files (x86)\\\\HPCCSystems\\\\8.4.12\\\\clienttools\\\\componentfiles\\\\cl\\nECLCC_ECLBUNDLE_PATH=C:\\\\Users\\\\Ric\\\\AppData\\\\Roaming\\\\HPCCSystems\\\\bundles\\\\\\nECLCC_ECLREPO_PATH=C:\\\\Users\\\\Ric\\\\AppData\\\\Roaming\\\\HPCCSystems\\\\repos\\\\\\nECLCC_ECLINCLUDE_PATH=.\\nECLCC_ECLLIBRARY_PATH=C:\\\\Program Files (x86)\\\\HPCCSystems\\\\8.4.12\\\\clienttools\\\\share\\\\ecllibrary\\\\\\nECLCC_INCLUDE_PATH=C:\\\\Program Files (x86)\\\\HPCCSystems\\\\8.4.12\\\\clienttools\\\\componentfiles\\\\cl\\\\include\\nECLCC_LIBRARY_PATH=C:\\\\Program Files (x86)\\\\HPCCSystems\\\\8.4.12\\\\clienttools\\\\lib;C:\\\\Program Files (x86)\\\\HPCCSystems\\\\8.4.12\\\\clienttools\\\\plugins\\nECLCC_PLUGIN_PATH=C:\\\\Program Files (x86)\\\\HPCCSystems\\\\8.4.12\\\\clienttools\\\\plugins\\nHPCC_FILEHOOKS_PATH=C:\\\\Program Files (x86)\\\\HPCCSystems\\\\8.4.12\\\\clienttools\\\\filehooks\\n\\nmkdir C:\\\\Users\\\\Ric\\\\AppData\\\\Local\\\\Temp\\\\\\\\tmp.000041\\nRunning git clone --depth=1 https://github.com/hpcc-systems/MySqlImport "C:\\\\Users\\\\Ric\\\\AppData\\\\Local\\\\Temp\\\\\\\\tmp.000041\\\\MySqlImport"\\nRunning C:\\\\Program Files (x86)\\\\HPCCSystems\\\\8.4.12\\\\clienttools\\\\bin\\\\eclcc - --nologfile --nostdinc -Me --nobundles "-IC:\\\\Users\\\\Ric\\\\AppData\\\\Local\\\\Temp\\\\\\\\tmp.000041\\\\MySqlImport"\\nwith input IMPORT MySqlImport.Bundle as B; [ (UTF8) B.name, (UTF8) B.version, B.description, B.license, B.copyright ] + [ (UTF8) COUNT(b.authors) ] + B.authors + [ (UTF8) COUNT(B.dependsOn) ] + B.dependsOn + [ (UTF8) #IFDEFINED(B.platformVersion, '')]\\nBundle info from ECL compiler: [U'MySqlImport', U'1.0.0', U'Create record and dataset definitions from MySQL tables', U'http://www.apache.org/licenses/LICENSE-2.0', U'Copyright (C) 2015 HPCC Systems', U'1', U'Gavin Halliday', U'0', U'']\\n\\nInstalling bundle MySqlImport version 1.0.0\\nUsing bundle path C:\\\\Users\\\\Ric\\\\AppData\\\\Roaming\\\\HPCCSystems\\\\bundles\\\\\\nRunning C:\\\\Program Files (x86)\\\\HPCCSystems\\\\8.4.12\\\\clienttools\\\\bin\\\\eclcc - --nologfile --nostdinc -Me --nobundles "-IC:\\\\Users\\\\Ric\\\\AppData\\\\Roaming\\\\HPCCSystems\\\\bundles"\\nwith input IMPORT ML_Core.Bundle as B; [ (UTF8) B.name, (UTF8) B.version, B.description, B.license, B.copyright ] + [ (UTF8) COUNT(b.authors) ] + B.authors + [ (UTF8) COUNT(B.dependsOn) ] + B.dependsOn + [ (UTF8) #IFDEFINED(B.platformVersion, '')]\\nBundle info from ECL compiler: [U'ML_Core', U'3.2.2', U'Common definitions for Machine Learning', U'See LICENSE.TXT', U'Copyright (C) 2019 HPCC Systems', U'1', U'HPCCSystems', U'0', U'6.2.0']\\n\\nRunning C:\\\\Program Files (x86)\\\\HPCCSystems\\\\8.4.12\\\\clienttools\\\\bin\\\\eclcc - --nologfile --nostdinc -Me --nobundles "-IC:\\\\Users\\\\Ric\\\\AppData\\\\Roaming\\\\HPCCSystems\\\\bundles"\\nwith input IMPORT Trigram.Bundle as B; [ (UTF8) B.name, (UTF8) B.version, B.description, B.license, B.copyright ] + [ (UTF8) COUNT(b.authors) ] + B.authors + [ (UTF8) COUNT(B.dependsOn) ] + B.dependsOn + [ (UTF8) #IFDEFINED(B.platformVersion, '')]\\nBundle info from ECL compiler: [U'Trigram', U'1.0.2', U'Trigram string similarity for UNICODE and SBCS strings', U'http://www.apache.org/licenses/LICENSE-2.0', U'Copyright (C) 2014 HPCC Systems', U'1', U'John Holt', U'0', U'']\\n\\ncp C:\\\\Users\\\\Ric\\\\AppData\\\\Local\\\\Temp\\\\\\\\tmp.000041\\\\MySqlImport\\\\LICENSE C:\\\\Users\\\\Ric\\\\AppData\\\\Roaming\\\\HPCCSystems\\\\bundles\\\\_versions\\\\MySqlImport\\\\V1_0_0\\\\LICENSE\\ncp C:\\\\Users\\\\Ric\\\\AppData\\\\Local\\\\Temp\\\\\\\\tmp.000041\\\\MySqlImport\\\\MySqlImport.ecl C:\\\\Users\\\\Ric\\\\AppData\\\\Roaming\\\\HPCCSystems\\\\bundles\\\\_versions\\\\MySqlImport\\\\V1_0_0\\\\MySqlImport.ecl\\ncp C:\\\\Users\\\\Ric\\\\AppData\\\\Local\\\\Temp\\\\\\\\tmp.000041\\\\MySqlImport\\\\README.md C:\\\\Users\\\\Ric\\\\AppData\\\\Roaming\\\\HPCCSystems\\\\bundles\\\\_versions\\\\MySqlImport\\\\V1_0_0\\\\README.md\\nMySqlImport 1.0.0 Create record and dataset definitions from MySQL tables\\nInstallation complete\\necl 'bundle' command error 0
\\n\\nThe same happens from inside VSCode, which was in fact my first attempt, as I was trying to install it to use it in HSQL.\\n\\nI used the -v verbose option to see if I could get more information, and indeed the error seems consistent in that something might be missing or wrong in the Visualizer repository.\\nI will appreciate a hand to solve this. It seems to be related only to this bundle.\\n\\nThank you very much!!!\\nWarm regards!!\\nRicardo\", \"post_time\": \"2022-02-09 04:30:33\" },\n\t{ \"post_id\": 35141, \"topic_id\": 9281, \"forum_id\": 10, \"post_subject\": \"Re: REAL-BI - npm assorted errors on install\", \"username\": \"rfernandez2007\", \"post_text\": \"Hello Jerry,\\n\\nYou are completely right. \\nWhile you were analyzing the issue, and after some time wandering around and reviewing things, I saw the text you mentioned and changed the value to the suggested value (db).\\nI did that in my Windows installation, and it didn't work. But, at this point, I don't trust that environment any more, as it could be a bit contaminated for so many trials and changes. (deep cleaning is on the way )\\n\\nSo, what I did was a fresh installation in Linux, fresh Docker installation, and fresh REAL-BI installation taking the "db" thing into consideration and also not changing almost anything else. (in my first attempts I had changed several ports)\\n\\nAnd, voilá. It stopped throwing errors and a login page appeared!!!! Yeeeeey!!\\nThen I tried to log in and it told me "wrong credentials".
\\nThen I remembered that this app was relying on the Auth-Service, and after a while, I figured out how to configure a user there. (I've never used it before)\\nAnd ... logged in !!!\\n\\nThank you !!!!! and also Thank you!!!\\nP.S. Unless you find some useful information in my previous attempt to install locally (not Docker), please disregard that post.\", \"post_time\": \"2022-02-11 23:01:36\" },\n\t{ \"post_id\": 35131, \"topic_id\": 9281, \"forum_id\": 10, \"post_subject\": \"Re: REAL-BI - npm assorted errors on install\", \"username\": \"jjacob\", \"post_text\": \"Hi Ricardo,\\n\\nWe looked into the docker issue you faced and it looks like the DB_HOST is the root cause of the issue. When RealBI is setup as Docker containers, the DB_HOST value has to be 'db', which is the name of the database service in docker-compose. \\n\\nhttps://github.com/hpcc-systems/REAL-BI ... xample#L33\\n\\nIf you see errors connecting to MySQL database, please make sure the /db/data folder is deleted and the mysql container recreated. \\n\\nWe'll try to setup a meeting next week to answer your questions and get the application setup on your environment\\n\\nThanks\\nJerry\", \"post_time\": \"2022-02-11 20:34:15\" },\n\t{ \"post_id\": 35121, \"topic_id\": 9281, \"forum_id\": 10, \"post_subject\": \"Re: REAL-BI - npm assorted errors on install\", \"username\": \"rfernandez2007\", \"post_text\": \"Don't worry at all, it happens to all of us
\\n\\nAn update:\\nWhile waiting for your answer, I tried a local installation in a new Linux server.\\nI'll show you the sequence of events according to the instructions: I generated a log for each step I could.\\n\\n1. Run git clone https://github.com/hpcc-systems/REAL-BI.git to copy the project into your computer.\\ngitclone.log\\n\\n2.You will need to intall dependencies for a project. While in a root folder run command npm install, it will install npm packages in root as well as /api and /client folders.\\nReturn: No such file or directory\\nSo it's assuming you already have npm installed.\\n\\nNext: apt-get install npm\\nnpmintallation.log\\n\\n3. In the root folder, rename the .env.example file to .env and fill in the empty values. You can find an explanation inside .env.example.\\n.env\\n\\n4.In /client rename the .env.example file to .env.development and fill in the empty values. You can find explanation inside .env.example .\\n.env.developement\\n\\n5. Create real_bi schema in your local MySQL database.\\nDone- User admin\\n\\n6. Go to /api and run npx sequelize db:migrate to build database tables.\\nIt generates an empty log file (it writes nothing to the log file) but on screen it shows the following.\\n\\n
root@vmi729625:/HPCCSystemsTools/REAL-BI/api# npx sequelize db:migrate > npx_sequelize_migrate.log\\nnpx: installed 21 in 4.375s\\ncommand not found: sequelize
\\n\\nand I had to do this: \\n\\n npm install --save sequelize\\n+ sequelize@6.16.1\\nadded 17 packages from 83 contributors and audited 293 packages in 4.047s\\n\\n57 packages are looking for funding\\n run `npm fund` for details\\n\\nfound 6 moderate severity vulnerabilities\\n run `npm audit fix` to fix them, or `npm audit` for details
\\n\\nand this\\n\\nroot@vmi729625:/HPCCSystemsTools/REAL-BI# npm install -g sequelize-cli\\n/usr/local/bin/sequelize -> /usr/local/lib/node_modules/sequelize-cli/lib/sequelize\\n/usr/local/bin/sequelize-cli -> /usr/local/lib/node_modules/sequelize-cli/lib/sequelize\\n+ sequelize-cli@6.4.1\\nadded 71 packages from 50 contributors in 5.326s
\\n\\nAnd only then it runned .... but:\\n\\nroot@vmi729625:/HPCCSystemsTools/REAL-BI/api# npx sequelize db:migrate > npx_sequelize_migrate.log\\nERROR: Error reading "src/config/database.js". Error: Error: Cannot find module 'dotenv'
\\n\\nEnd of the road for me!\\n\\nAgain, I might be messing up with something, in which case a take full responsibility for my ignorance because although I am an IT person, I'm not a developer, and many of these things are beyond my understanding.\\nBut being a tool intended for non-technical people the installation shouldn't be this stressing.\\n\\nI'm open to understanding whatever I need to, to make this work, but my friendly suggestion would be to polish/simplify this installation process a bit, to make it more friendly and not so depending of everything being already installed, and aligned for this to work, or having the skills to fill all the gaps between the instructions and the reality. \\n\\nAnd again, thank you very much!!\\nWarm regards!\\nRicardo\", \"post_time\": \"2022-02-11 20:22:48\" },\n\t{ \"post_id\": 35111, \"topic_id\": 9281, \"forum_id\": 10, \"post_subject\": \"Re: REAL-BI - npm assorted errors on install\", \"username\": \"jjacob\", \"post_text\": \"Sorry, my bad. I was looking at the "Joined" date
. \\n\\nWe'll look into this issue and get back to you today\", \"post_time\": \"2022-02-11 13:42:03\" },\n\t{ \"post_id\": 35101, \"topic_id\": 9281, \"forum_id\": 10, \"post_subject\": \"Re: REAL-BI - npm assorted errors on install\", \"username\": \"rfernandez2007\", \"post_text\": \"Hi Jerry,\\nGlad to hear from you!\\n\\nOh no, the post is from yesterday late night, or today's early morning
( Fri Feb 11, 2022). \\nLet me know if I can provide any other information.\\n\\nThank you!!!\", \"post_time\": \"2022-02-11 13:34:36\" },\n\t{ \"post_id\": 35091, \"topic_id\": 9281, \"forum_id\": 10, \"post_subject\": \"Re: REAL-BI - npm assorted errors on install\", \"username\": \"jjacob\", \"post_text\": \"Hi Ricardo,\\n\\nWe just noticed this post of yours. Looks like this is from last year. Please let us know if you are still having issues. In the meantime, we'll try to reproduce this at our end\\n\\nThanks\\nJerry\", \"post_time\": \"2022-02-11 13:29:59\" },\n\t{ \"post_id\": 35081, \"topic_id\": 9281, \"forum_id\": 10, \"post_subject\": \"REAL-BI - npm assorted errors on install\", \"username\": \"rfernandez2007\", \"post_text\": \"Hi,\\n\\nI'm in the process of installing REAL-BI.\\nI've followed the instructions for the Docker installation.\\n\\nIn Windows 10 I get the errors that I attach to the post. \\nIn Ubuntu 20.04 some other errors I also attach.\\n\\nWhat they seem to have in common is that they come from the API container and seem to be related to npm and not finding some file or script to execute.\\n\\nI'll really appreciate any clarification and help on how to move on with this installation. \\nThe developer intricacies are way above my understanding, and so I'm a bit (in fact completely
) lost at this point.\\n\\nI also attach my configuration files (.env renamed to .env_forum and docker-compose.yml), in case I'm doing something very wrong and accidentally triggering the errors.\\nIf you need any other information/files please let me know.\\n\\nThank you very much!!!\\nWarm regards\\nRicardo\", \"post_time\": \"2022-02-11 03:13:33\" },\n\t{ \"post_id\": 35415, \"topic_id\": 9365, \"forum_id\": 10, \"post_subject\": \"Configure AuthService with SSL\", \"username\": \"rfernandez2007\", \"post_text\": \"Hi,\\nI'm trying to configure AuthService and I would like to use SSL.\\n\\nI'm not sure about what to put in the nginx configuration file: nginx.conf.template\\n\\n
ssl on;\\n ssl_certificate <cert_path.pem>;\\n ssl_certificate_key <cert_path.key>;\\n
\\n\\nAs per documentation: https://github.com/hpcc-systems/Auth-Service\\n\\nFor SSL, provide CERT_PATH value and update nginx config, go to client/nginx/conf.d/nginx.conf.template fill in coresponding values on line 4 and 5.\\n\\nI've already created a key-pair that resides in the keys folder of the AuthService project. (this is a previous step in the installation but in the documentation it is not related to the SSL configuration)\\nIs that the path I'm supposed to use? or do I have to generate a different "certificate"?\\n\\nAlso, in the .env file there is the following: (which I'm not sure if/how it relates to the other instructions)\\n#SSL Cert locations \\nCERT_PATH=/etc/ssl/certs
\\n\\nFinally, it would help a lot to have an example of how this path looks like in a working nginx configuration file (to eliminate doubts about if it is relative, absolute, includes the key name, and that sort of things)\\n\\n\\nThank you very much!!\\nWarm regards\\nRicardo\", \"post_time\": \"2022-04-27 22:42:59\" },\n\t{ \"post_id\": 156, \"topic_id\": 44, \"forum_id\": 11, \"post_subject\": \"Re: SQL to HPCC\", \"username\": \"SkipSnow\", \"post_text\": \"Bob:\\n\\nI think the question has more to do with migrating the data definitions, and querries etc that support the data than how do you simply move the data.\\n\\nIf so, It is interesting to note that the earliest versions of HPCC did support at least a subset of SQL, and wonder if any of that old code could be 'awoken' and given to the community?\", \"post_time\": \"2011-07-14 19:52:39\" },\n\t{ \"post_id\": 85, \"topic_id\": 44, \"forum_id\": 11, \"post_subject\": \"Re: SQL to HPCC\", \"username\": \"robert.foreman@lexisnexis.com\", \"post_text\": \"Hi Helen,\\n\\nIn the HPCC, you have the option to input data in one of three ways:\\n\\nFixed Length\\nVariable length (known as CSV)\\nXML\\n\\nSo the first step would be to export your SQL data into one of the supported formats. I think that most platforms support the CSV format, so that would be my first choice. After that, you simply copy the exported data to the Landing Zone and then spray from there. The Data Tutorial PDF has more information regarding this process.\\n\\nHope this helps!\\n\\nRegards,\\n\\nBob Foreman\\nLexisNexis\", \"post_time\": \"2011-06-29 16:32:32\" },\n\t{ \"post_id\": 79, \"topic_id\": 44, \"forum_id\": 11, \"post_subject\": \"SQL to HPCC\", \"username\": \"Helen\", \"post_text\": \"My data is quickly out growing its current SQL based implementation (SQL Server / MySQL / Oracle etc). Is there any facilitation available to migrate to HPCC?\", \"post_time\": \"2011-06-29 14:57:45\" },\n\t{ \"post_id\": 213, \"topic_id\": 50, \"forum_id\": 11, \"post_subject\": \"Re: Directory of Zipped Logfiles to HPCC?\", \"username\": \"dabayliss\", \"post_text\": \"Well - I finally found the time to solve my own problem - recorded here for others with the same issue:\\n\\n1) 7zip is a program that will batch-unzip files for you - it also tackles .gz files which is useful for people dealing with weblogs coming from a Linux based apache. I extracted all my zips into a data directory - which gave me a gazillion little logs\\n\\n2) copy *.log all.xlog then produced one concatened csv file\\n\\n3) Uploaded to the landing zone\\n\\n4) Sprayed using the normal crlf stuff as a seperator\\n\\nDavid\", \"post_time\": \"2011-08-01 15:55:16\" },\n\t{ \"post_id\": 122, \"topic_id\": 50, \"forum_id\": 11, \"post_subject\": \"Re: Directory of Zipped Logfiles to HPCC?\", \"username\": \"bforeman\", \"post_text\": \"David, \\n\\nRichard Taylor also added:\\n\\nThe ProgGuide article “Working with BLOBs” tells how to spray multiple files to a single file in Thor, but I have no idea how to unzip them all easily – it would have to be done PRE-spray.\\n\\nHope this helps!\\n\\nBob\", \"post_time\": \"2011-07-11 20:18:54\" },\n\t{ \"post_id\": 121, \"topic_id\": 50, \"forum_id\": 11, \"post_subject\": \"Re: Directory of Zipped Logfiles to HPCC?\", \"username\": \"bforeman\", \"post_text\": \"Hi David,\\n\\nI would definitely get the files unzipped and concatenated prior to the spray. That's just me, I go with what I know, but perhaps someone else out here knows of a more elegant way.\\n\\nCheers,\\n\\nBob\", \"post_time\": \"2011-07-11 20:03:35\" },\n\t{ \"post_id\": 91, \"topic_id\": 50, \"forum_id\": 11, \"post_subject\": \"Directory of Zipped Logfiles to HPCC?\", \"username\": \"dabayliss\", \"post_text\": \"Hey,\\n\\nI have a directory containing thousands of zipped weblog files. Each zip contains one named file - that file contains thousands of lines of weblog info (in a slightly hickey CSV format).\\n\\nWhat is the easiest way to get this (unzipped and) 'sprayed' into the VM? I don't mind if the unzipping happens pre or post spray and I don't mind when the concat happens - just looking for the easiest way to get the data in there ....\\n\\nDavid\", \"post_time\": \"2011-06-30 16:14:35\" },\n\t{ \"post_id\": 160, \"topic_id\": 65, \"forum_id\": 11, \"post_subject\": \"Re: Sort Benchmark\", \"username\": \"HPCC Staff\", \"post_text\": \"We have not, to this point, devoted the resources to producing an official entry although we do run the benchmark internally and produce extremely impressive – albeit not externally verified - results. \\n\\nAs a platform vendor we are really more concerned with how efficient a given architecture makes a particular set of machines; rather than how many machines we can put in a room. We believe that on identical hardware our system will go faster than anything else. Therefore we are releasing the ECL benchmark code we use soon – and we invite people to run the benchmarks and publish their results.\\n\\nThank you!\", \"post_time\": \"2011-07-15 14:14:26\" },\n\t{ \"post_id\": 159, \"topic_id\": 65, \"forum_id\": 11, \"post_subject\": \"Sort Benchmark\", \"username\": \"michaelatln\", \"post_text\": \"Why isn't HPCC a winner based on its benchmark on http://sortbenchmark.org/\", \"post_time\": \"2011-07-15 02:27:21\" },\n\t{ \"post_id\": 163, \"topic_id\": 66, \"forum_id\": 11, \"post_subject\": \"Re: Interaction between big and small data\", \"username\": \"dabayliss\", \"post_text\": \"Hi Jason,\\n\\nWelcome to my life This is very much the sort of system that we have (actually we have a number of them for different parts of the business). As I'm sure you appreciate it is difficult to design an 'optimal' system without more knowledge of the speeds and feeds - but let me outline a few facts for you that I think might help you get there.\\n\\n1) ALL clusters in a single environment (Dali instance) can read from and write to each others disks (in fact you can read between environments too). Where you choose to put your 'master' copy of the data is a matter of system design- but whereever it is everyone will be able to get to it.\\n\\n2) ECL has a notion of superfiles (and superkeys). These are logical files names that can be used by your ECL code - however they can refer to multiple actual files. Therefore it is possible to 'append' data to a file, or even to 'shift' a chunk of data between files without any actual data moving (only meta data)\\n\\n3) In terms of the 'little batches' - as long as you don't need genuine transactions in the computational sense (ie a record lock across multiple files) - there is no real downside to using the HPCC. We have many, many processes working on mini-batches - 10-100 records at a shot. If you do need transactions we use what we call a delta-base - this is an ultra-thin SQL front end which handles transactions in flight and then we rip the data out of it every (say) 15 minutes.\\n\\nAddressing your two principle questions - I believe FACT 1 essentially puts your plan back in play.\\n\\nIn terms of your second point - we have a couple of major (in terms of size and criticality) processes that do this - we use a cascading rollup trick. Obviously you can tweak the numbers to suit your circumstance - but the idea is this -\\nOur 'model' is we have monthly, weekly, daily, hourly and 10 minute files. Our master file is really some collection of these wired together with a superfile. Our 'running' process has the job of spitting out 10 minutes files (every 10 minutes). We then have an hourly job which rolls the last 6 up into an hourly file. Then daily we roll 24 of those up into a daily file. We roll seven of those up into a weekly files etc.\\n\\nIn the 10 minute file a delete is just a record that says: delete this - our query processes WILL apply those deletes on the fly if they need to. Then at each rollup any deletes than can be applied are - others that are not 'paired' yet remain as deletes.\\n\\nNaturally we organize our workflow so that our daily, weekly, monthly rollups occur during those periods when our machinery would otherwise be less busy. (Note - we also have technology such as MERGE which can make the rollups MUCH faster than one might expect)\\n\\nI hope the above makes some sense; if not - or if you have some further questions - please feel free to ask.\\n\\nDavid\", \"post_time\": \"2011-07-15 16:30:59\" },\n\t{ \"post_id\": 161, \"topic_id\": 66, \"forum_id\": 11, \"post_subject\": \"Interaction between big and small data\", \"username\": \"aintnomyth\", \"post_text\": \"Hello,\\nSorry for the long post but I want to give an accurate description of the problems I'm facing. Any help is appreciated!\\n\\nI work for a healthcare decision support business. We're looking for a better solution than the current Oracle/.Net approach. We service dozens of clients, each using one or more: large batch processes (sets of 10-15 text files totaling 20-30GB); small batch processes (files totally 100MB-5GB); or near real-time TCP/IP streams.\\n\\nThe batch data is cleaned, aggregated, and delivered to reporting servers in monthly cycles. Our current technology drives the batch intervals; clients actually want to break the batches into smaller chunks (like daily instead of monthly) as their data tends to be stale by the end of the reporting month. The real-time data is cleaned, analyzed, and compared to previously received batches using processes that run in 15 minute, 1 hour, or 1 day intervals. The streams are tiny, less than 50MB per day of raw data. \\n\\nNote: the streams represents clinical data for active inpatient admissions which are only valuable for a few days. Inpatient data is not appended to any of the batched data. Hospitals have an internal process to alter/enhance the data after patients are discharged, this enhanced discharge data is sent to us in the large batches.\\n\\nI can definitely see the HPCC system solving most of the batch problems but I'm a little fuzzy on two things:\\n1)\\tHandling the streams - My approach would be to separate the batch data from the streams data using 2 THORs. THOR1 would handle large batches and publish to Roxie. THOR2 would handle the micro batches but here’s where my plan falls apart. The micro batches need read-access to data on Roxie, would this happen in THOR2 or would I need to publish to Roxie first? I’m assuming it’s possible for two THORs to publish to the same Roxie. Would I be better off managing that data in an OLTP database since it's not strictly "big data"? The problem with that approach is I would have to duplicate all of the parsing/cleanup/analysis rules in ECL and SQL/.Net.\\n\\n2)\\tReducing batch latency with more frequent smaller batches – batch files could contain new records, updates for existing records, “reversals” indicating deletes of existing data, or duplicated junk data that has already been processed. Since the final output is a blend of detailed and aggregated information we have to re-aggregate practically everything when any data changes. This is expensive in our current environment; the total “ingest, recompute, and publish time” represents our theoretical minimum process latency. HPCC seems to handle this better but I’m not seeing any sort of append/delete mechanism, does that suggest that each data deliverable (on Roxie) inherently recomputes everything, and if it does, do I even care?\\n\\nThanks for the help!\\nJason\", \"post_time\": \"2011-07-15 15:40:55\" },\n\t{ \"post_id\": 176, \"topic_id\": 70, \"forum_id\": 11, \"post_subject\": \"Re: HPCC as Database\", \"username\": \"dabayliss\", \"post_text\": \"We did have someone do that a long time ago; I believe he was successful - at least as a proof of concept.\\n\\nWe have not pursued the idea any further ourselves; ECL has native text indexing and searching capabilities - so we tend to use those for performance and simplicity and to allow us to integrate our text and non-text products.\\n\\nDavid\\n\\nIncidentally we do have a 'boolean search' module (which is built upon our text indexing capability) - that offers sophisticated text searching out of the box.\", \"post_time\": \"2011-07-19 20:36:48\" },\n\t{ \"post_id\": 172, \"topic_id\": 70, \"forum_id\": 11, \"post_subject\": \"HPCC as Database\", \"username\": \"Durai\", \"post_text\": \"Can we use HPCC as a Big data storage device and let lucene/solr search engine to index the data and make it searchable?\\n\\nIs there any attempt made earlier like this?\", \"post_time\": \"2011-07-19 18:09:19\" },\n\t{ \"post_id\": 317, \"topic_id\": 106, \"forum_id\": 11, \"post_subject\": \"Re: handling updated records\", \"username\": \"aintnomyth\", \"post_text\": \"Thanks, David, that was very helpful.\", \"post_time\": \"2011-09-06 12:49:48\" },\n\t{ \"post_id\": 315, \"topic_id\": 106, \"forum_id\": 11, \"post_subject\": \"Re: handling updated records\", \"username\": \"dabayliss\", \"post_text\": \"Incidentally - 'Ingest' is one of the capabilities handled automatically by our SALT code generator ....\\n\\nDavid\", \"post_time\": \"2011-09-02 18:47:07\" },\n\t{ \"post_id\": 314, \"topic_id\": 106, \"forum_id\": 11, \"post_subject\": \"Re: handling updated records\", \"username\": \"dabayliss\", \"post_text\": \"Interestingly I was having a chat this morning with one of our senior guys about the correct approach to developers producing 'elegant' solutions - unfortunately it is forbidden under the constitution
\\n\\nYou ALWAYS want to go with brutally simple - until you can't.\\n\\nIn the case of Ingest there is a very simple fall-back from 'too brutally simple' - and that is the delta - or even cascading delta.\\n\\nThe concept is this: if the file being ingested is tiny then you can 'join' (anti-join etc etc) it against the bigger file very easily (eg lookup join, or PARTITION on a join etc). Thus you can very quickly annotate your 'little' file with all of the flags / notes / sequences you need. You then simply dump it (the little file) down on the disk and 'append' it to the larger file using a superfile (or superkey in the roxie delivery case). You can handle a delete by having a 'delete' record that kills its own instance during the read phase.\\n\\nThen every 'now and again' (obviously dependant upon speeds, feeds and thor cycles) you gather up the original big file, all the data in the little bits and you do the sort/dedup/re-write - and you are back down to one big 'perfect' file and you can start collecting your fragments again.\\n\\nIn the extreme case of a HUGE base file (Petabytes, or perhaps very high TB) with low latency update requirements (say - 5 minutes) then you can go to the 'cascading' version. Same idea - except you might only want to touch the huge file once a month but you don't want thousand of scratty little files laying around. So you (say) roll your 5 minute files into a 1 hour file, then roll your 1 hours files into a daily file, dailies into a weekly and then finally weeklies into the main file once a month. (Obviously I am picking arbitrary numbers - you can have a 27.32 hourly file if you really want)\", \"post_time\": \"2011-09-02 18:45:13\" },\n\t{ \"post_id\": 313, \"topic_id\": 106, \"forum_id\": 11, \"post_subject\": \"handling updated records\", \"username\": \"aintnomyth\", \"post_text\": \"I'm working on a data ingestion process and I keep getting tangled up on record "updates". As far as I know there is no mechanism for updating a logical file (in a SQL sense), correct? \\n\\nThe basic problem is that incoming files can have a mixture of never-before-seen new records, updates to existing records, duplicates of existing records (aka junk), reversal/delete identifiers, and garbage. We need to find the data changes and apply them to the production data repository that is constantly accumulating.\\n\\nThe plan is to structure the program something like this:\\n1. Spray new files as a working logical file\\n2. Compare the working file to the production file, identify New/Updated/Junk data in the working set\\n3. Apply the changed data to production\\n\\nFor step 3, it seems like we have basically two strategies but I keep bouncing between, "gee I don't want to overwork THOR so I better find an elegant way to handle updates", and, "it's THOR, tell it what you want and get out of the way, don't over think it". The strategies are:\\n\\nBrute Force\\n1. Create a filtered production recordset, anti-Join Production with the "working updates" recordset\\n2. Merge the updates and new records with the filtered production data\\n3. Output a new logical file, perform superfile maintenance as needed\\n\\nElegant \\n1. Use a record versioning field\\n2. When updates arrive we find the current record version and increment it\\n3. Hide prior versioned records from downstream processes using a group/filter attribute\\n4. Periodically adjust the dataset to remove hidden records, if desired \\n\\n"Brute force" feels wrong because a relatively small amount of incoming data is prompting the rebuild of a relatively large production file. \\n\\n"Elegant" feels wrong because it's practically a transaction log, and HPCC isn't exactly built for transactions.\\n\\nI keep thinking that I'm forgetting a big picture concept somewhere...it has been a couple years since my ECL classes. Any advice will be much appreciated!\\n\\nThanks,\\nJason\", \"post_time\": \"2011-09-02 17:24:08\" },\n\t{ \"post_id\": 368, \"topic_id\": 118, \"forum_id\": 11, \"post_subject\": \"Re: Roxie needs deployed queries to remain on Thor\", \"username\": \"flavio\", \"post_text\": \"Todd,\\n\\nTo maintain independent copies of the data, you can put Roxie on a separate Dali. This may be an EE only feature, though.\\n\\nHaving said this, you should be able to delete the redundant copy of the data from Thor without affecting the copy of the data in Roxie, but that's not the same as deleting the logical file. I need to check to see if the former is currently implemented and how.\\n\\nThe reason behind the change in Dali's behavior with respect to Roxie was to avoid having these "orphan" files, and also to provide consistency across the environment. This should ensure a more seamless user experience (tool-wise, for example).\\n\\nFlavio\", \"post_time\": \"2011-09-28 15:06:01\" },\n\t{ \"post_id\": 366, \"topic_id\": 118, \"forum_id\": 11, \"post_subject\": \"Re: Roxie needs deployed queries to remain on Thor\", \"username\": \"thildebrant\", \"post_text\": \"Flavio,\\nThank you for your reply.\\nWhat is the method to enable Roxie to maintain independent copies of the data?\\nAnd can you elaborate on the decision to change to only one logical file? What are the benefits?\\n\\nThank you,\\nTodd\", \"post_time\": \"2011-09-28 12:19:33\" },\n\t{ \"post_id\": 365, \"topic_id\": 118, \"forum_id\": 11, \"post_subject\": \"Re: Roxie needs deployed queries to remain on Thor\", \"username\": \"flavio\", \"post_text\": \"Todd,\\n\\nDeleting a logical file deletes all copies of the file, including Roxie's and Thor's. Please keep in mind that Dali is aware of both, Thor and Roxie.\\n\\nIt probably wasn't obvious, but the platform was preventing the removal of the files in Roxie by keeping a lock on them. By rebooting the VM, you effectively circumvented that protection and allowed for the deletion process to complete.\\n\\nIn any case, there are two factors that may make this slightly confusing:\\n\\n
\\n1. If you were used to the old behavior (Roxie creating an independent copy of the data), the current platform can do this but this is not the default.\\n2. The delete process should have probably failed right away rather than just block on a lock until the lock was released, and then completing the removal of the file. It might be a good idea to change the current behavior to fail immediately if the user tries to delete a file for which there is a query deployed to Roxie.\\n
\\n\\nFlavio\", \"post_time\": \"2011-09-28 12:07:39\" },\n\t{ \"post_id\": 356, \"topic_id\": 118, \"forum_id\": 11, \"post_subject\": \"Roxie needs deployed queries to remain on Thor\", \"username\": \"thildebrant\", \"post_text\": \"built indexes in Thor, deployed the query to Roxie, then deleted the index from Thor. Unfortunately, this caused the Roxie query to stop working even though when\\n I deployed the query to Roxie the data got copied into the Roxie data directory and was still there after I had deleted it from Thor.\\n\\nTo re-create using _Certification:\\n1. \\nRun on Thor: _Certification.BuidDataFiles\\n2. \\nRun on Thor: _Certification.build_index\\n3. \\nCompile on Roxie, then Publish: _Certification.ReadIndexService\\n4. \\nVerify that the Roxie service works\\n5. \\nLog into the VM and verify that the index (full_test_distributed_index) exists in both:\\na. \\n/var/lib/HPCCSystems/hpcc-data/thor/certification\\nb. \\n/var/lib/HPCCSystems/hpcc-data/roxie/certification\\n6. \\nUsing ESP, browse to the files in certification and delete them.\\na. \\nThis step won’t actually complete while Roxie is up – it’s holding a lock on the files.\\nb. \\nRebooting the VM releases the lock, and the index gets deleted.\\n7. \\nReboot the VM.\\n8. \\nLog into the VM and verify that the full_test_distributed_index files now exist only in: /var/lib/HPCCSystems/hpcc-data/roxie/certification\\n9. \\nThe Roxie service now fails with error: \\na. \\nQuery readindexservice.1 is suspended because Could not resolve filename certification::full_test_distributed_index\\n\\nSince the index was copied to Roxie, and still exists in roxie’s data directory, the query should still work. (Right now, it appears data has to be double-stored in order for Roxie to work properly in the OSS VM.)\", \"post_time\": \"2011-09-24 13:49:23\" },\n\t{ \"post_id\": 1461, \"topic_id\": 329, \"forum_id\": 11, \"post_subject\": \"Re: Data Ingestion - Automation\", \"username\": \"Durai\", \"post_text\": \"Thanks Richard. I am looking into the details now.\", \"post_time\": \"2012-04-10 04:16:43\" },\n\t{ \"post_id\": 1456, \"topic_id\": 329, \"forum_id\": 11, \"post_subject\": \"Re: Data Ingestion - Automation\", \"username\": \"rtaylor\", \"post_text\": \"Durai,\\n\\nYou can automate data file sprays to Thor using either the DFUplus.exe command line utility or the STD.File.Spray... functions.\\n\\nThen you can automate the subsequent ETL processing of that data on Thor using the ECLplus.exe command line utility. \\n\\nFinally, you can automate Publishing to Roxie using the ECL.exe command line utility.\\n\\nThese command line utilities are all documented in the ClientTools.PDF available for download here: http://hpccsystems.com/community/docs/e ... leinttools\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-04-09 19:27:04\" },\n\t{ \"post_id\": 1451, \"topic_id\": 329, \"forum_id\": 11, \"post_subject\": \"Data Ingestion - Automation\", \"username\": \"Durai\", \"post_text\": \"Can I automate data ingestion to Thor cluster and move the ETL-ed data to Roxie?\\n\\nFor example, I want to direct the flume (Apached Flume) sources to HPCC (considering HPCC thor as sink). Then automate the ETL process with my ECL queries and refined data to be published to Roxie. \\n\\nWith this setup, it is possible to get near real time analytics with HPCC engine, any thoughts or references will help. Thanks in Advance.\\n\\nRegards\\nDurai\", \"post_time\": \"2012-04-09 04:10:06\" },\n\t{ \"post_id\": 1723, \"topic_id\": 380, \"forum_id\": 11, \"post_subject\": \"Re: dfuplus dstname and file scope\", \"username\": \"jeremy\", \"post_text\": \"excellent. thanks!\", \"post_time\": \"2012-06-06 20:39:50\" },\n\t{ \"post_id\": 1722, \"topic_id\": 380, \"forum_id\": 11, \"post_subject\": \"Re: dfuplus dstname and file scope\", \"username\": \"rtaylor\", \"post_text\": \"Jeremy,\\nIt seems that the dstname parameter of dfuplus for spray actions ignores the "~" prefix to denote scope. For instance dstname="~foo::test" and dstname="foo::test" both send the data to the "test" folder of the "foo" scope... where I would expect the 2nd case to end up in "thor::foo::test" ( where "thor" is the default scope name ).
You are correct. DFUplus, the Stndard Library spray functions, and the ECL Watch spray pages all create the name exactly as you specify and do not prepend the default scope name, ever.\\n\\nThe only place where a filename ever gets the default scope name automatically prepended if the leading tilde is missing is in ECL code (specifically, OUTPUT and BUILD).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-06-06 20:33:32\" },\n\t{ \"post_id\": 1719, \"topic_id\": 380, \"forum_id\": 11, \"post_subject\": \"dfuplus dstname and file scope\", \"username\": \"jeremy\", \"post_text\": \"Greetings,\\nIt seems that the dstname parameter of dfuplus for spray actions ignores the "~" prefix to denote scope. For instance dstname="~foo::test" and dstname="foo::test" both send the data to the "test" folder of the "foo" scope... where I would expect the 2nd case to end up in "thor::foo::test" ( where "thor" is the default scope name ).\\nThoughts?\\nThanks.\", \"post_time\": \"2012-06-06 19:07:30\" },\n\t{ \"post_id\": 6482, \"topic_id\": 416, \"forum_id\": 11, \"post_subject\": \"Re: Batch spraying several thousands of XML files\", \"username\": \"rtaylor\", \"post_text\": \"Johni,\\n\\nThe answer to your question was previously posted to this thread: the dfuplus client-side utility. It's documented in the Client Tools PDF (http://cdn.hpccsystems.com/install/docs ... tTools.pdf) \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-10-20 13:07:36\" },\n\t{ \"post_id\": 6475, \"topic_id\": 416, \"forum_id\": 11, \"post_subject\": \"Re: Batch spraying several thousands of XML files\", \"username\": \"Ali\", \"post_text\": \"Is there a way to batch spray XML files? The only way that I know is to go through ECLWatch and choose Spray-XML ... Choose file ... set row tag, etc.
\\n\\n\\n\\n\\n_________\\nWe provide guarantee to pass Keiser University with online exam training mcse and ARM, you can also get best quality Stanford University along with Youtube for your guaranteed success.\", \"post_time\": \"2014-10-18 05:59:31\" },\n\t{ \"post_id\": 2568, \"topic_id\": 416, \"forum_id\": 11, \"post_subject\": \"Re: Batch spraying several thousands of XML files\", \"username\": \"wgsh\", \"post_text\": \"I use dfuplus to spray files to cluster, it seems like multiple process concurrent spraying operation will be put in a queue to execute one by one. how to speed up loading files?\\n\\nthanks,\\njason\", \"post_time\": \"2012-10-19 14:53:02\" },\n\t{ \"post_id\": 1900, \"topic_id\": 416, \"forum_id\": 11, \"post_subject\": \"Re: Batch spraying several thousands of XML files\", \"username\": \"DSC\", \"post_text\": \"[quote="michael-mason":2creee8s]Is there a way to batch spray XML files? The only way that I know is to go through ECLWatch and choose Spray-XML ... Choose file ... set row tag, etc. \\n\\nYou might want to check out the dfuplus client-side utility. It's documented in the Client Tools PDF (http://cdn.hpccsystems.com/install/docs ... tTools.pdf) beginning on page 74. You may have to write your own wrapper script to execute this utility the way you want, but I think you can get what you need one way or another.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-07-05 19:00:55\" },\n\t{ \"post_id\": 1892, \"topic_id\": 416, \"forum_id\": 11, \"post_subject\": \"Batch spraying several thousands of XML files\", \"username\": \"michael-mason\", \"post_text\": \"Hello,\\n\\n<not sure if this is the correct forum ...>\\n\\nIs there a way to batch spray XML files? The only way that I know is to go through ECLWatch and choose Spray-XML ... Choose file ... set row tag, etc. \\n\\nThanks!\\n-Mike\", \"post_time\": \"2012-07-05 14:20:26\" },\n\t{ \"post_id\": 1899, \"topic_id\": 417, \"forum_id\": 11, \"post_subject\": \"Re: Is there a way to change the scope/prefix after spraying\", \"username\": \"Tony Kirk\", \"post_text\": \"I'll assume you sprayed the file without any prefix or directory (i.e. no "::" at all), a la "my_file_name_in_one_string" or similar.\\n\\nWhen viewing the files in ECLWatch file lists, the "scope" displayed as ".::" (as a link) is probably to provide the means to select and filter by that root scope. When defined in your ECL, however, it is not needed, and the leading tilde will signal the name is fully qualified as is.\\n\\n
my_dataset := ('~my_file_name_in_one_string'...);
\\nFor what it's worth, providing the root scope also works, so that\\n\\nmy_dataset := ('~.::my_file_name_in_one_string'...);
\\nappears to work, too.\\n\\nOf course, you always have the ability to rename, at which time you can completely change the name/scope of the logical file (look for RenameLogicalFile).\\n\\nIf I missed something or more questions, let me know.\\n\\n-Tony\", \"post_time\": \"2012-07-05 19:00:51\" },\n\t{ \"post_id\": 1893, \"topic_id\": 417, \"forum_id\": 11, \"post_subject\": \"Is there a way to change the scope/prefix after spraying?\", \"username\": \"michael-mason\", \"post_text\": \"Hi,\\n\\nI just sprayed some XML files, and I didn't specify a prefix for them. Now, when I look at the logical files, the scope is just '.'. So, to access the data, I guess I have to do something like: .:\\n\\nThis is ugly. Is there a way that I can change the scope after spraying?\\n\\nThanks,\\n-Mike\", \"post_time\": \"2012-07-05 14:33:47\" },\n\t{ \"post_id\": 2140, \"topic_id\": 472, \"forum_id\": 11, \"post_subject\": \"Re: Data synchronization and querying\", \"username\": \"rtaylor\", \"post_text\": \"As per my understanding, the ECL queries submitted to a Roxie cluster can be :\\n1. Executed on a remote Thor cluster which has all the BigData(in TB/PB/ZP)
Sorry, but that is not correct. \\n\\nQueries sent to a Roxie are executed on that Roxie -- they may either:
\\nYou could, of course, use SOAPCALL to have your Roxie query launch a Thor job, but that would be working against the system design and not with it.\\n\\n2. Executed on the same Roxie cluster itself, first by referring the remote data till it is getting copied on to Roxie and then, locally
Yes. That scenario is possible. You can have Roxie configured to access data remotely while the data is in the process of being copied from Thor to Roxie. 2. Assuming 2. is happening, \\ni. Again, the query is identical to 1.\\nii. Suppose the query processing is complete at t1 and there is already some new data added to the Thor cluster before t1. Now, how and when does this new data come to Roxie(synchronization)? Again, at t2, if the same/similar query comes in, will it be run on the 'latest' data set? In simple words, how is the data between Thor and Roxie 'synchronized' ?\\n
This question presumes that HPCC operates like an RDBMS and can do OLTP -- this is not the case. HPCC is a batch-processing type of environment. Data files read in a job are never written to, therefore there is no "update" functionality. There are techniques that can be used to make an HPCC environment closely emulate an OLTP system, but accomplishing that requires a fairly complex design and implementation. \\n\\nThor and Roxie serve very different purposes:
\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-08-02 13:54:48\" },\n\t{ \"post_id\": 2133, \"topic_id\": 472, \"forum_id\": 11, \"post_subject\": \"Data synchronization and querying\", \"username\": \"kaliyugantagonist\", \"post_text\": \"Hello,\\n\\nAs per my understanding, the ECL queries submitted to a Roxie cluster can be :\\n1. Executed on a remote Thor cluster which has all the BigData(in TB/PB/ZP)\\n2. Executed on the same Roxie cluster itself, first by referring the remote data till it is getting copied on to Roxie and then, locally\\n\\nThere are a few queries I have here :\\n\\n1. Assuming 1. is happening, the query processing is taking several seconds,probably minutes, given the large data. Now, while these queries are in progress, some new data is sprayed onto this Thor cluster. Now, will the running query consider this new data set or will it continue on the 'older' data set and give results accordingly ?\\n2. Assuming 2. is happening, \\n i. Again, the query is identical to 1.\\n ii. Suppose the query processing is complete at t1 and there is already some new data added to the Thor cluster before t1. Now, how and when does this new data come to Roxie(synchronization)? Again, at t2, if the same/similar query comes in, will it be run on the 'latest' data set? In simple words, how is the data between Thor and Roxie 'synchronized' ?\\n\\nThanks and regards !\", \"post_time\": \"2012-08-02 09:07:27\" },\n\t{ \"post_id\": 3030, \"topic_id\": 662, \"forum_id\": 11, \"post_subject\": \"Re: Reducing Skew\", \"username\": \"rtaylor\", \"post_text\": \"sban,\\n\\nI would think your #1 would be your best/simplest/easiest option.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-12-21 18:54:08\" },\n\t{ \"post_id\": 3028, \"topic_id\": 662, \"forum_id\": 11, \"post_subject\": \"Reducing Skew\", \"username\": \"sban\", \"post_text\": \"Hello,\\n\\nI'm facing an issue wherein my data file is extremely skewed (+2900%, -100%)across slaves.\\n\\nThis is in spite of doing a hash32 distribute on two fields (one of which admittedly has lots of 0s, while the other is a mostly unique 38 digit integer).\\n\\nIs there anything that I can do to reduce this skew? I could:\\n1. Exclude the field with 0s from the hash key\\n2. Use some other hashing function\\n\\nWill be happy to provide whatever other information you need.\", \"post_time\": \"2012-12-20 22:30:15\" },\n\t{ \"post_id\": 3152, \"topic_id\": 686, \"forum_id\": 11, \"post_subject\": \"Re: Data Governance\", \"username\": \"Durai\", \"post_text\": \"Thanks Flavio,. The details are really helpful and exactly what I was looking for.\", \"post_time\": \"2013-01-23 15:30:35\" },\n\t{ \"post_id\": 3151, \"topic_id\": 686, \"forum_id\": 11, \"post_subject\": \"Re: Data Governance\", \"username\": \"flavio\", \"post_text\": \"Durai,\\n\\nI don't think we have any public paper on Data Governance and HPCC. We have, internally, a number of best practices on Data Governance, and a platform called Orbit, which integrates with HPCC and provides data provenance tracking, process workflow automation, QA/QC metrics, etc. The plan is to package and release this Orbit platform around Q2 2013, so we may be also creating some documentation to go with it.\\n\\nHaving said this, there are a number of standard components in our Data Governance best practices, including creating unique identifiers using our SALT process (which in our environment are called LexID), tracking data sources and contractual, legal and regulatory permissible use requirements across the data life-cycle, ensuring proper data disposal as required by contracts and regulations, verifying that data updates meet format and semantic metrics based on our existing data repository indicators, etc.\\n\\nPlease let me know if this helps,\\n\\nFlavio\", \"post_time\": \"2013-01-23 13:54:12\" },\n\t{ \"post_id\": 3125, \"topic_id\": 686, \"forum_id\": 11, \"post_subject\": \"Data Governance\", \"username\": \"Durai\", \"post_text\": \"Hi,\\n\\nWe are working on some proposals that involves Data Governance as primary goals. (Principles and Rules for Data Governance are derived from Enterprise Architecture Governance)\\n\\nIs there any case studies that you can share on 'Data Governance with HPCC'.?\\n\\nThanks\\nDurai\", \"post_time\": \"2013-01-18 06:55:55\" },\n\t{ \"post_id\": 3248, \"topic_id\": 713, \"forum_id\": 11, \"post_subject\": \"Re: Pulling data from social sites\", \"username\": \"bforeman\", \"post_text\": \"The PIPE ECL function is located in the Language Reference Manual.\\nYou can also just type in the word "PIPE" in any ECL file in the ECL IDE, and press the F1 key.\\n\\nI will have a look at your other post.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-01-30 15:39:40\" },\n\t{ \"post_id\": 3246, \"topic_id\": 713, \"forum_id\": 11, \"post_subject\": \"Re: Pulling data from social sites\", \"username\": \"kaliyugantagonist\", \"post_text\": \"Hi bob,\\n\\nThanks for the reply
\\n\\nI'm totally clueless about the ECL PIPE which is faintly mentioned in the HPCC Introduction documentation - please refer the below question.\\nhttp://hpccsystems.com/bb/viewtopic.php?f=10&t=722&sid=22459bc0c057c631e3d7fc685ffe6fa3\\n\\nWhere do I get hold of ECL PIPE - documentation, examples etc. ?\\n\\nThanks and regards !\", \"post_time\": \"2013-01-30 15:12:55\" },\n\t{ \"post_id\": 3232, \"topic_id\": 713, \"forum_id\": 11, \"post_subject\": \"Re: Pulling data from social sites\", \"username\": \"bforeman\", \"post_text\": \"One of our developers has used the Twitter API (https://dev.twitter.com/docs/api) to collect tweets. \\n\\n1/ a linux app to harvest tweets \\n2/ a javascript app to do selective tweet gets\\n3/ a linux app callable from ECL using PIPE to do selective tweet gets\\n\\nShe basically used 2 different approaches, but since the Twitter API has been evolving, I am not sure whether both are still available/supported.\\n1/ She was repeatedly calling the API to get all the tweets. We needed that to create a reasonable training set for our ML classifier\\n2/ She calls the API a few times, passing in a specific term to filter the tweets by. We needed that to get a set of tweets associated with specific topic\\n\\nIn both cases, the app has to be written to keep calling the twitter api, and the code should be written in such a way not to get the twitter service upset
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-01-29 17:03:01\" },\n\t{ \"post_id\": 3209, \"topic_id\": 713, \"forum_id\": 11, \"post_subject\": \"Pulling data from social sites\", \"username\": \"kaliyugantagonist\", \"post_text\": \"I went through the Introduction(white paper PDF) of HPCC and also through the Sentilyze use-case which uses a csv as input.\\n\\nThe Introduction pdf claims that HPCC can pull data from external web sites.\\n\\nHow can this be achieved for social sites like FB,Twitter etc.\\n\\nNote: How to specify the record structure for pulling such data?Will NLP support of HPCC be required?\\n\\nThanks and regards !\", \"post_time\": \"2013-01-28 10:24:19\" },\n\t{ \"post_id\": 3476, \"topic_id\": 762, \"forum_id\": 11, \"post_subject\": \"Re: Data Transform Question\", \"username\": \"DSC\", \"post_text\": \"The TABLE command may be what you are looking for, but I may be misunderstanding your question. If so, you may want to post some example input and desired output values.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-02-16 16:40:44\" },\n\t{ \"post_id\": 3442, \"topic_id\": 762, \"forum_id\": 11, \"post_subject\": \"Data Transform Question\", \"username\": \"buptkang\", \"post_text\": \"Hello there, \\n\\nI am still learning the ECL, and currently I have the pre-exist data format as A(string),B(string),C(numeric value),D(numeric value).\\n\\nI want to build a matrix having row as index of A, column as index of B, (C,D) are the cell value in the matrix.\\n\\nCould somebody tell me how could I do that in ECL? \\n\\nI can get the number of A and B, but I do not know how to convert from a table toward the matrix?\\n\\nThanks\\nBo\", \"post_time\": \"2013-02-13 14:13:01\" },\n\t{ \"post_id\": 4648, \"topic_id\": 1043, \"forum_id\": 11, \"post_subject\": \"Re: Excellent Video on Learning Pentaho or Kettle Transforma\", \"username\": \"Rehan1\", \"post_text\": \"nice video On Pentaho or Kettle Transformatio thank sonam\", \"post_time\": \"2013-09-26 09:46:32\" },\n\t{ \"post_id\": 4633, \"topic_id\": 1043, \"forum_id\": 11, \"post_subject\": \"Excellent Video on Learning Pentaho or Kettle Transformatio\", \"username\": \"sonam\", \"post_text\": \"Hi All,\\n \\nExcellent video for learning Pentaho / Kettle transformation steps. This explains all the concepts very well in a very simple manner. Hope this will be helpful to everyone.\\n \\nVideo Link - http://www.youtube.com/watch?v=ayFt9L0n_rM\", \"post_time\": \"2013-09-24 10:17:45\" },\n\t{ \"post_id\": 6500, \"topic_id\": 1213, \"forum_id\": 11, \"post_subject\": \"Re: Hadoop Admin Information Request\", \"username\": \"cheapfut15coins\", \"post_text\": \"Quite the opposite, If your availability of definitely. \\nhttp://www.proutcoins.com\", \"post_time\": \"2014-10-25 05:37:48\" },\n\t{ \"post_id\": 5237, \"topic_id\": 1213, \"forum_id\": 11, \"post_subject\": \"Re: Hadoop Admin Information Request\", \"username\": \"bforeman\", \"post_text\": \"This site showcases the HPCC Environment as an alternative to Hadoop. The information that we provide are the process and steps involved for developers moving from the Hadoop platform to HPCC. Here are links to some PDFs with that related information:\\n\\nhttp://hpccsystems.com/community/white-papers/ecl-for-hadoopers\\n\\nhttp://hpccsystems.com/community/white-papers/performing-pig-pen\\n\\nhttp://hpccsystems.com/community/white-papers/ecl-for-piggers\\n\\nHope this helps.\\n\\nBob\", \"post_time\": \"2014-02-19 16:54:50\" },\n\t{ \"post_id\": 5233, \"topic_id\": 1213, \"forum_id\": 11, \"post_subject\": \"Hadoop Admin Information Request\", \"username\": \"shashi24\", \"post_text\": \"What are some of the questions a hadoop admin shd ask or request information if someone asks him they want to setup a hadoop environment with data of size 1tb?\", \"post_time\": \"2014-02-19 15:23:54\" },\n\t{ \"post_id\": 6744, \"topic_id\": 1260, \"forum_id\": 11, \"post_subject\": \"Re: Spraying Word document in HPCC THOR\", \"username\": \"rtaylor\", \"post_text\": \"faari,\\n\\nThat sounds like a reasonable approach to use in HPCC, too.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-12-31 20:10:53\" },\n\t{ \"post_id\": 6736, \"topic_id\": 1260, \"forum_id\": 11, \"post_subject\": \"Re: Spraying Word document in HPCC THOR\", \"username\": \"faarisuman\", \"post_text\": \"I have about 10 files that are related by a 3 field composite key (roughly 45 bytes total). In the RDBMS world I would be inclined to convert the natural key into a numeric surrogate key to reduce the footprint and hopefully improve sorts and joins. The 3 key fields constitute 20-30% of the total data size.\\n\\n\\n______________\\nYou can easily check out our high quality itil which prepares you well for the ccent questions You can also get success in real Test-king Certification exam with the quality www.quincy.edu and [url=http://en.wikipedia.org/wiki/University_of_Saint_Joseph_(Connecticut):2h4dtl8u]University of Saint Joseph
IMPORT HeaderFile;\\nEXPORT File_HeaderFile :=\\nDATASET('~transactions::april2014::headerdata',HeaderFile.Layout_Header, csv(separator(',')));
\\n\\nRegards,\\nSameer\", \"post_time\": \"2014-04-04 08:49:21\" },\n\t{ \"post_id\": 5468, \"topic_id\": 1266, \"forum_id\": 11, \"post_subject\": \"Spraying Delimited CSV\", \"username\": \"kmier\", \"post_text\": \"I'm trying to spray a delimited file with integer values where null values equal to 0.\\nThe "Spray Delimited" function on ECL Watch seems to be ignoring the delimiter and it seems to reading the integers as base 256 instead of base 10 and is having issues with variable length data values. I tried using a comma delimited file as well as a tab delimited file and had the same issues with both.\\nSystem Specs:\\n
\\nData File: Transactions_April2014_HeaderData.csv\\n\\n
\\n
\\n35500555,2014,4,1,10,5,302009,0,02197881679,308,956\\n35904820,2014,4,1,9,1,352043,0,10152186187,404,0\\n10078085,2014,4,1,16,3,472017,54661600,10055782374,542,920\\n18701205,2014,4,1,14,1,235872,0,09440308519,748,0\\n32303558,2014,4,1,8,1,188447,0,0,523,0\\n
\\nSpray Delimited Options:\\n
\\nDFU Workunit Details\\n
\\n
\\n
\\n
\\n[attachment=1:25tgq8tf]ViewData.jpg\\nHeaderFile/Layout_Header\\nEXPORT Layout_Header := RECORD\\n\\tINTEGER4 TransID;\\n\\tINTEGER2 tYear;\\n\\tINTEGER1 tMonth;\\n\\tINTEGER1 tDay;\\n\\tINTEGER1 StoreID;\\n\\tINTEGER1 RegID;\\n\\tINTEGER3 TrxID;\\n\\tINTEGER4 AcctID;\\n\\tINTEGER5 CustID;\\n\\tINTEGER2 Cashier;\\n\\tINTEGER2 Manager;\\nEND;
\\nHeaderFile/File_HeaderFile\\nIMPORT HeaderFile;\\nEXPORT File_HeaderFile :=\\nDATASET('~transactions::april2014::headerdata',HeaderFile.Layout_Header,THOR);
\\nExecute HeaderFile/File_HeaderFile\\n\\n
\\nAdded 8 spaces to end of Transactions_April2014_HeaderData.csv to bypass error.\\nRe-uploaded and re-sprayed the file.\\nRe-executed HeaderFile/File_HeaderFile.\\nOutput:\\n[attachment=0:25tgq8tf]DataOutput.jpg\\nAll values are incorrect and number of records has doubled.\\nBased on the values it seems as if each character is being imported as an integer based on its ASCII value (base 256).\\nIs this the correct interpretation of what is going on?\\nIf so, is there a way to import integers without converting to base 256?\", \"post_time\": \"2014-04-03 19:41:24\" },\n\t{ \"post_id\": 6260, \"topic_id\": 1428, \"forum_id\": 11, \"post_subject\": \"Re: Saving Scheme with Record in description\", \"username\": \"rtaylor\", \"post_text\": \"househippo,\\n\\nSure you could do that for sprayed files. For all files created within HPCC, the DFU already has the structure saved in its metadata about the file.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-08-29 16:02:26\" },\n\t{ \"post_id\": 6249, \"topic_id\": 1428, \"forum_id\": 11, \"post_subject\": \"Saving Scheme with Record in description\", \"username\": \"househippo\", \"post_text\": \"I see that every file has a "description" field where you can save a note about the file.\\nCould I have the scheme for the file in the description? or is there another place that I can save it.\", \"post_time\": \"2014-08-23 03:10:43\" },\n\t{ \"post_id\": 6711, \"topic_id\": 1546, \"forum_id\": 11, \"post_subject\": \"Re: Source code of Tweet Analysis in HPCC\", \"username\": \"pius_francis\", \"post_text\": \"ha ha ... i realised after posting Richard \\n
Thanks a lot
\", \"post_time\": \"2014-12-18 16:34:36\" },\n\t{ \"post_id\": 6710, \"topic_id\": 1546, \"forum_id\": 11, \"post_subject\": \"Re: Source code of Tweet Analysis in HPCC\", \"username\": \"rtaylor\", \"post_text\": \"Pius,\\n\\nThe read me and DOCX files that come in the ZIP download should explain everything. I have not yet run this code, so that's the extent of my knowledge.\\n\\nHTH,\\n\\nRichard (not Bob
)\", \"post_time\": \"2014-12-18 16:02:25\" },\n\t{ \"post_id\": 6709, \"topic_id\": 1546, \"forum_id\": 11, \"post_subject\": \"Re: Source code of Tweet Analysis in HPCC\", \"username\": \"pius_francis\", \"post_text\": \"Bob Thanks a lot for your info. Please let me know whether my understanding regarding Syntilyze is correct.\\nSteps:\\n1. Gather Tweets using Twitter API\\n2. Remove noisy tweets\\n3. Feed the input and output pattern to ML from Synsets and Familar words\\n4. Use Naive Bayes algorithm to classify the tweets\\n\\nPlease let me know whether my understanding is correct. If not kindly correct me\\nThanks Pius\", \"post_time\": \"2014-12-18 15:18:37\" },\n\t{ \"post_id\": 6708, \"topic_id\": 1546, \"forum_id\": 11, \"post_subject\": \"Re: Source code of Tweet Analysis in HPCC\", \"username\": \"rtaylor\", \"post_text\": \"Right here:http://hpccsystems.com/community/contributions/data-descriptors-and-simple-example-programs/sentilyze-twitter-sentiment-ana\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-12-18 14:29:51\" },\n\t{ \"post_id\": 6707, \"topic_id\": 1546, \"forum_id\": 11, \"post_subject\": \"Source code of Tweet Analysis in HPCC\", \"username\": \"pius_francis\", \"post_text\": \"I saw the Demo regarding seeing twitter sentiments. Where can i see the cource code of it?\", \"post_time\": \"2014-12-18 11:07:40\" },\n\t{ \"post_id\": 7598, \"topic_id\": 1728, \"forum_id\": 11, \"post_subject\": \"Stream processing engine\", \"username\": \"linhbngo\", \"post_text\": \"I have some questions regarding the stream processing capability of HPCC Systems.\\n\\nDoes HPCCSystems have any component that is similar to Apache Storm/S4? My understanding is that it is possible to use Kafka to stream data into HPCC, but how about processing the stream of data in real time? If there is no such component in HPCC, can HPCC accept incoming processed streams from Storm/S4?\\n\\nThanks!\", \"post_time\": \"2015-05-14 12:22:50\" },\n\t{ \"post_id\": 9010, \"topic_id\": 2096, \"forum_id\": 11, \"post_subject\": \"Re: Data Management (ingestion and more)\", \"username\": \"lpezet\", \"post_text\": \"Thanks for the reply!\\n\\nHmmm...I'm not sure I understand the wildcard idea. Maybe you're one step ahead of me. Care to elaborate?\\n\\nOtherwise that's correct: I'm looking for some guidelines/feedback/thoughts on how to go about downloading and pre-processing some files before loading them into my cluster (with or without further processing), through ECL actions (i.e. trying to avoid either ECL Watch/upload/spray files or logging into the server and running linux commands manually).\\n\\nI guess I'm having 2 problems/uncertainties.\\n\\nUn-parallel\\nData processing is (can be) massively parallel and HPCC Systems cluster(s) handle that just great. But I don't see (and not experiencing) massive parallelism in data ingestion in all cases, especially those where I'm pulling data from a third party. \\nI can load a file from a path at a given IP (Std.File.ExternalLogicalFilename) but I can't (true?) specify the IP address where to run "curl" (PIPE) to download that file from a url and store it at that same path. Am I missing something? Going about it wrong?\\nIf it's right, one thought I had was to make sure hThor is only setup on a single node. hThor being a single-node process, I'd still have to make sure it runs off the same node every time so I can load (Std.File.ExternalLogicalFilename) the data from there. Thoughts?\\n\\nSharing\\nNow let's say the issue mentioned above is not an issue (if it ever was) and I can provide someone with some ECL code and modules to load public school directory data as well as financial data, and it works on their cluster (either it works on any cluster setup, or that someone configured the cluster as stated above).\\n
IMPORT Education.NCES;\\nNCES.download_and_load_directory(2013);\\nNCES.download_and_load_financial(2013, ...);\\n// Then, in a different execution:\\n// dir := NCES.dsDirectory(2013);\\n// fin := NCES.dsFinancial(...);\\n// A := JOIN(dir, fin, LEFT.unitid = RIGHT.unitid, .....);\\n// TABLE(A, { ..... }, ..... );\\n
\\nIs there any other way (now or in the works?) to share that whole Education package besides versioning the ECL code and letting people clone that git repo (or subversion and such)?\\n\\n\\nThanks a lot!\\nLuke.\", \"post_time\": \"2016-01-05 20:46:58\" },\n\t{ \"post_id\": 9008, \"topic_id\": 2096, \"forum_id\": 11, \"post_subject\": \"Re: Data Management (ingestion and more)\", \"username\": \"HPCC Staff\", \"post_text\": \"Hi Luke, thanks for your post! It appears you are looking for a way to download, decompress and parse individual files contained in a remote site accessible to HTTP, but with a well-known path. Try the use of wildcards in the path if that would satisfy this requirement?\", \"post_time\": \"2016-01-05 19:26:27\" },\n\t{ \"post_id\": 8996, \"topic_id\": 2096, \"forum_id\": 11, \"post_subject\": \"Data Management (ingestion and more)\", \"username\": \"lpezet\", \"post_text\": \"Hi,\\n\\nI was wondering if there was any solution/product/plan regarding easier data ingestion, in a "pull fashion" (static data or even web service)?\\nLike downloading a file, maybe unzipping it, loading it and probably running some transformations on it to get something we can work from (so like a DownloadOnSteroid+ELT). Or querying MARTA Bus Real-time RESTful Web Service. And of course all within the coziness of my ECL IDE.\\n\\nI'm really not talking about grabbing data from an RDBMS, and I don't see why I'd need to use Flume for this kind of ingestion (not a big fan of squashing flies with a hammer).\\nBut I end up doing most (if not almost everything) through ECL IDE and I find it frustrating to have to switch to anything else to do data ingestion.\\n\\nLoading third party data is something I have to deal with a lot, but maybe I'm just the exception?\\n\\nI want to run ingestion against a single node and if possible always the same.I can provision extra space for it for data ingestion. Running ECL code against hThor would do then....no?\\n\\n\\nFor example:\\n\\nCurl := MODULE\\n EXPORT info_layout := RECORD\\n STRING content_type;\\n STRING http_code;\\n ...\\n END;\\n\\n EXPORT download( STRING url, STRING localUri, ..... ) := PIPE('curl -w \\\\'%{content_type}\\\\t....\\\\' -s -o ' + localUri + ' "' + url + '"', info_layout, CSV(SEPARATOR('\\\\t')) );\\n \\n ...\\nEND;\\n
\\n\\nand wrap other linux programs as well and end up with some library to use to help ingest data.\\n\\n\\ningest_download() := FUNCTION\\n oLocalPath := '/ingestion/NOAA/GHCN';\\n oLocalFile := oLocalPath + 'something_121415_122015_Weekly.zip';\\n RETURN SEQUENTIAL(\\n #OPTION('targetClusterType','hthor'),\\n OUTPUT( BinUtils.mkdir( oLocalPath, true ), NAMED('CreatePath')),\\n OUTPUT( Curl.download('http://...../something_121415_122015_Weekly.zip', oLocalFile, false), NAMED('Download')),\\n OUTPUT( BinUtils.checksum( oLocalFile ), NAMED('Checksum')),\\n ...\\n OUTPUT( Zip.unzip(oLocalFile, oLocalPath, true), NAMED('Unzipping')),\\n );\\nEND;\\n\\ningest_elt() := FUNCTION\\n ...\\n oDS := DATASET(std.File.ExternalLogicalFilename(LandingZone_IP, File_In), raw_nppes_layout, CSV(HEADING(1), SEPARATOR([',']), QUOTE(['"']), TERMINATOR(['\\\\n','\\\\r\\\\n','\\\\n\\\\r'])));\\n oDist := DISTRIBUTE(oDS, HASH(npi));\\n RETURN OUTPUT(oDist,, pLogicalFile, OVERWRITE);\\nEND;\\n
\\n\\nIs that just plain stupid?\\n\\nI've read a bit about Orbit (although can't find a lot and no release as far as I could see) and some initiative to make HPCC Modules easier to user/integrate or something of the sort.\\nThe vision here is a bit similar. I'd love to create some sort of (ECL) Module/Package that would manage access, ingestion, cataloging, updates, etc. to certain (public or internal) data (e.g. Weather Historical Data or Weather Forecasts Data), and someone would just need to install that module and run the necessary functions/queries from it to ingest data. "The rpm for data ingestion".\\n\\n\\nOne way would be through apt maybe, creating a Debian Package with ECL code in it, and calling it ( in a PIPE-against-hThor" kinda way) to install the code and run whatever I need against it to ingest whatever data it manages.\\nIn an apt way, it'd be like\\n\\napt install noaa-weather-forecast\\napt install noaa-weather-historical\\nnoaa-weather-forecast ingest 3days\\nnoaa-weather-historical ingest temp 2014\\n
\\n\\nand I could wrap each in some PIPE code to run any of those commands through hThor (if viable).\\n\\nThoughts? Comments? Criticism?\\n\\n\\nThanks\\nLuke.\", \"post_time\": \"2016-01-01 20:41:49\" },\n\t{ \"post_id\": 16793, \"topic_id\": 4173, \"forum_id\": 11, \"post_subject\": \"Re: First load\", \"username\": \"rtaylor\", \"post_text\": \"Luke,\\n\\nSpray is a "dumb" operation. The only requirement it has is to get the data as fast as possible to the nodes while ensuring that a single record never spans multiple nodes -- each record must be whole and complete on a single node.\\n \\nLet's say you have a 3 Gb file being sprayed to a 3-node cluster:\\n
This is part of the reason I suggested using flat files instead of CSV.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-04-21 18:00:48\" },\n\t{ \"post_id\": 16783, \"topic_id\": 4173, \"forum_id\": 11, \"post_subject\": \"Re: First load\", \"username\": \"lpezet\", \"post_text\": \"When Spraying, I've notived dfuserver/dafilesrv taking a while (like hours, if not days) before actually sending file content to slaves on cluster.\\nI see in the dfuserver logs "findSplitPoint( ... )" and some percentages in there before getting to the bunch of "Transferring part...." part.\\nAny details someone can share as to how dfuserver/dafilesrv work when spraying a file?\\nCan I spray multiple files at the same time or would the first spray block the rest (like a Thor job if I'm not mistaken)?\\n\\nJust thinking it would help me figure out maybe specs for my "spraying" node \", \"post_time\": \"2017-04-21 17:21:25\" },\n\t{ \"post_id\": 16773, \"topic_id\": 4173, \"forum_id\": 11, \"post_subject\": \"Re: First load\", \"username\": \"lpezet\", \"post_text\": \"Thank you Richard.\", \"post_time\": \"2017-04-21 15:11:54\" },\n\t{ \"post_id\": 16733, \"topic_id\": 4173, \"forum_id\": 11, \"post_subject\": \"Re: First load\", \"username\": \"rtaylor\", \"post_text\": \"Luke,\\n\\nIf you're using AWS for your clusters, then they have a feature called "snapshots" that allow you to save the data from the cluster you bring down and automatically load it into the next cluster you bring up.\\n\\nOther than that, I would simply suggest working with flat files instead of CSV, since they are generally more efficient. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-04-21 13:26:01\" },\n\t{ \"post_id\": 16683, \"topic_id\": 4173, \"forum_id\": 11, \"post_subject\": \"First load\", \"username\": \"lpezet\", \"post_text\": \"Dear HPCC Systems Team,\\n\\nI would like some advice on that very first time when I load sizable data into the cluster.\\n\\n1. I'm talking about one 1TB file here. Would you recommend splitting that file in the first place before any of the methods below?\\n\\n2. Is any of the following methods better when loading big files?\\n\\na. DATASET(std.File.ExternalLogicalFilename(LandingZone_IP, SomeFilePath ), MyLayout, CSV)\\nb. Std.File.SprayDelimited( ... )\\n\\nIs one better than the other? Any other method you would recommend?\\n\\n3. I run analysis on certain datasets, then shut down the cluster, and some time later bring up another cluster, load data, run different analysis, etc.\\nThe problem I'm facing right now is that every time I need to load all that data into my cluster and it takes a lot of time. I wish I could maintain some of that data somewhere and "just restore it" (as in a simple linux cp command vs. another "Spray") for the next cluster. Is that possible? Any recommendations here?\\n\\n\\nThank you for your help!\\nLuke.\", \"post_time\": \"2017-04-20 20:10:33\" },\n\t{ \"post_id\": 25983, \"topic_id\": 6893, \"forum_id\": 11, \"post_subject\": \"Re: Manual Spray\", \"username\": \"bforeman\", \"post_text\": \"Have you tried the remote copy option? I believe that's what we use to move logical files from one cluster to another.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2019-04-29 13:57:32\" },\n\t{ \"post_id\": 25973, \"topic_id\": 6893, \"forum_id\": 11, \"post_subject\": \"Manual Spray\", \"username\": \"lpezet\", \"post_text\": \"Hello!\\n\\nI've been told many (many) times that the spray feature is just a simple file-splitting+ship-file-parts-to-slave-nodes+logical-file-entry process (that doesn't seem THAT simple to me
).\\n\\nI was wondering if it would be possible for me to keep/create those file parts and, upon creating a new cluster, download those files parts onto the slave nodes, and manually create that logical file entry specifying where all those parts are.\\n\\nI've used dfuplus to spray/despray files before and saw the savexml action there. Is it what I should use?\\nWould you have a snippet+explanation/blog post/pdf/anything on how to use it to re-create a logical file? (I see how to use to export the xml, but how do I create a logical file with it?)\\n\\n\\nThanks for the help!\", \"post_time\": \"2019-04-27 17:07:34\" },\n\t{ \"post_id\": 32493, \"topic_id\": 8423, \"forum_id\": 11, \"post_subject\": \"Re: Splitting Files with ECL\", \"username\": \"McPP82\", \"post_text\": \"Thanks for the advice! I've ended up with tons of .csv files after the data gathering part of an investment simulation in this Budapest real estate was over, and had to split them to make them easier to process by different members of my team, as having to deal with a larger file that contains unneeded information was difficult and ineffective. \\n\\nAnd now, I've managed to divide them thanks to your code, and my job has become much simpler.\", \"post_time\": \"2020-10-27 15:26:50\" },\n\t{ \"post_id\": 32483, \"topic_id\": 8423, \"forum_id\": 11, \"post_subject\": \"Re: Splitting Files with ECL\", \"username\": \"rtaylor\", \"post_text\": \"mo0926,\\n\\nAnd here's an even easier way to do it:
Half1 := address_file[1 .. halfoftotal];\\nHalf2 := address_file[halfoftotal+1 ..]; \\nHalf1; \\nHalf2;
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-10-27 14:43:53\" },\n\t{ \"post_id\": 32473, \"topic_id\": 8423, \"forum_id\": 11, \"post_subject\": \"Re: Splitting Files with ECL\", \"username\": \"mo0926\", \"post_text\": \"Nice!! Yeah, I noticed I was getting that one last record in the beginning of the next dataset. In the documentation for choosen it shows "NextFive := CHOOSEN(Person,5,6); // returns next 5 recs from Person" that number six should have meant a +1 for this. Thank you for clarifying that.\", \"post_time\": \"2020-10-27 13:18:10\" },\n\t{ \"post_id\": 32463, \"topic_id\": 8423, \"forum_id\": 11, \"post_subject\": \"Re: Splitting Files with ECL\", \"username\": \"rtaylor\", \"post_text\": \"mo0926,\\n\\nI think you need to tweak that code a bit. \\nRun this example:
Address_File := DATASET([{1},{2},{3},{4},{5}],{UNSIGNED1 r});\\nhalfoftotal := COUNT(address_file) DIV 2;\\n\\naddressHalf1 := choosen(address_file, halfoftotal );// first half records\\naddressHalf2 := choosen(address_file, halfoftotal, halfoftotal ); // second half\\naddressHalf1; // returns 1,2\\naddressHalf2; // returns 2,3\\n\\n//my version:\\nHalf1 := CHOOSEN(address_file, halfoftotal );\\nHalf2 := CHOOSEN(address_file, halfoftotal+1, halfoftotal+1 ); \\nHalf1; // returns 1,2\\nHalf2; // returns 3,4,5
As you see, your code gets records 1,2 in the "first half" and 2,3 as the "second half" while my rewrite gets records 1,2 in the "first half" and 3,4,5 as the "second half." Also note that I removed your ROUND function and replaced it with the DIV operator (integer division) to handle the odd number issue. \\n\\nOf course, assuming you're on a cluster with more than one node, you'll also want to add DISTRIBUTE() to each OUTPUT to disk so each file part has records (otherwise the file parts for the first half will be empty on the second half's nodes, and vice versa).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-10-27 12:44:52\" },\n\t{ \"post_id\": 32453, \"topic_id\": 8423, \"forum_id\": 11, \"post_subject\": \"Re: Splitting Files with ECL\", \"username\": \"mo0926\", \"post_text\": \"Thank you for your help. I found this solution below that works too:\\n\\n\\nI round in case the total number of records is an odd number. \\n\\ntotalrecs := round(count(address_file));\\n\\nhalfoftotal := totalrecs / 2;\\n\\naddressHalf1 := choosen(address_file, halfoftotal );// first half records\\naddressHalf2 := choosen(address_file, halfoftotal, halfoftotal ); // second half\", \"post_time\": \"2020-10-26 20:48:36\" },\n\t{ \"post_id\": 32403, \"topic_id\": 8423, \"forum_id\": 11, \"post_subject\": \"Re: Splitting Files with ECL\", \"username\": \"Tony Kirk\", \"post_text\": \"SAMPLE should be more exact for that purpose.\", \"post_time\": \"2020-10-26 14:28:09\" },\n\t{ \"post_id\": 32393, \"topic_id\": 8423, \"forum_id\": 11, \"post_subject\": \"Re: Splitting Files with ECL\", \"username\": \"mo0926\", \"post_text\": \"Hi Richard, I was trying to evenly split one large file into two smaller files, and write them as such later. I think the option of using the enth function might me helpful here. However, the only problem I see with this solution is that I cannot be sure that some records won't be repeated in both final datasets.\", \"post_time\": \"2020-10-26 14:16:53\" },\n\t{ \"post_id\": 32373, \"topic_id\": 8423, \"forum_id\": 11, \"post_subject\": \"Re: Splitting Files with ECL\", \"username\": \"rtaylor\", \"post_text\": \"mo0926,Is there a way of splitting one large CSV file into several parts before the output?
There are several fairly simple ways to do that, depending on how you want them split.\\n\\nIf you want all records with different values of some field(s), then simple filters for those values and a separate OUTPUT for each will do it.\\n\\nIf you just want to "evenly" split the number of records into separate files, then either the SAMPLE() or ENTH() function will do that, with a separate OUTPUT for each.\\n\\nIf what you want is a single OUTPUT but specific records "assigned" to each file part written to your nodes, then you can use the GROUP() function and OUTPUT that GROUPed recordset.\\n\\nSo, what exactly are you trying to accomplish?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-10-26 11:57:28\" },\n\t{ \"post_id\": 32353, \"topic_id\": 8423, \"forum_id\": 11, \"post_subject\": \"Re: Splitting Files with ECL\", \"username\": \"hwatanuki\", \"post_text\": \"Hello mo0926,\\n\\nThe first thing that came to my mind was to use the LOOP function to iterate across the CSV dataset and perform a loopbody operation. Have you already tried that? \\nMaybe if you want to share more details on what you are trying to accomplish (a code example for instance), we can think on some other possibilities as well. \\n\\nHTH\\nhwatanuki\", \"post_time\": \"2020-10-23 12:55:52\" },\n\t{ \"post_id\": 32333, \"topic_id\": 8423, \"forum_id\": 11, \"post_subject\": \"Splitting Files with ECL\", \"username\": \"mo0926\", \"post_text\": \"Is there a way of splitting one large CSV file into several parts before the output?\", \"post_time\": \"2020-10-22 22:05:11\" },\n\t{ \"post_id\": 71, \"topic_id\": 39, \"forum_id\": 12, \"post_subject\": \"Models for Big Data White Paper\", \"username\": \"HPCC Staff\", \"post_text\": \"We've posted an 18 page white paper written by our Chief Data Scientist, dealing with the issue of data models for Big Data: \\n\\nThe principal performance driver of a Big Data application is the data model in which the Big Data resides. The aim of this paper is to discuss some of the principle data models that exist and are imposed; and then to argue that an industrial strength Big Data solution needs to be able to move between these models with a minimum of effort.\\nhttp://hpccsystems.com/community/white-papers/models-big-data\", \"post_time\": \"2011-06-22 12:54:08\" },\n\t{ \"post_id\": 216, \"topic_id\": 77, \"forum_id\": 12, \"post_subject\": \"Re: Redundant data in raw files\", \"username\": \"aintnomyth\", \"post_text\": \"That definitely helps, thanks for the quick reply.\", \"post_time\": \"2011-08-01 18:41:17\" },\n\t{ \"post_id\": 215, \"topic_id\": 77, \"forum_id\": 12, \"post_subject\": \"Re: Redundant data in raw files\", \"username\": \"dabayliss\", \"post_text\": \"Well - I don't know if I would describe it as 'speculative pre-processing' but essentially yes. Whilst HPCC is probably the fastest thing out there - we are still bound by the laws of physics. In general you should get your data model correct and TIGHT as early in your processing as possible.\\n\\nBy TIGHT I mean:\\na) Fixed fields if possible (and as small as possible)\\nb) Into 'correct' types if possible (numbers as UNSIGNED/INTEGER etc)\\nc) Linking fields as UNSIGNED \\n\\nNow - there is a slightly 'greyer' trade-off with regard to some of the more exotic but compressed types such as QSTRING and UNSIGNED3 etc. It costs more cycles to get data in and out of those types but they are smaller (which means they come off disk faster, go across network links faster and consume less memory). My general rule of thumb is that fields I use 'all the time' I will allow a fatter type that is natural to the system (UNSIGNED4/UNSIGNED8 etc) - fields that are just carried around for occasional use I will squeeze down.\\n\\nHTH\\n\\nDavid\", \"post_time\": \"2011-08-01 18:38:13\" },\n\t{ \"post_id\": 214, \"topic_id\": 77, \"forum_id\": 12, \"post_subject\": \"Redundant data in raw files\", \"username\": \"aintnomyth\", \"post_text\": \"Hello,\\nI have about 10 files that are related by a 3 field composite key (roughly 45 bytes total). In the RDBMS world I would be inclined to convert the natural key into a numeric surrogate key to reduce the footprint and hopefully improve sorts and joins. The 3 key fields constitute 20-30% of the total data size.\\n\\nFrom a performance perspective, does it make sense to do any sort of speculative pre-processing in HPCC?\\n\\nThanks!\", \"post_time\": \"2011-08-01 18:14:44\" },\n\t{ \"post_id\": 6860, \"topic_id\": 476, \"forum_id\": 12, \"post_subject\": \"Re: Spraying using DFUPlus\", \"username\": \"ahmedvu153\", \"post_text\": \"One thing I noticed is that your code generates an INTEGER10 data type for your ISBN field -- you should modify it to make that a DECIMAL10 instead, since INTEGER10 is not a legal data type (the '10' portion defines the number of bytes the field occupies, not the number of digits in the number, so the range of valid values is only 1 through 8). \\n___________________\\n\\nahemd\", \"post_time\": \"2015-01-27 08:36:18\" },\n\t{ \"post_id\": 2153, \"topic_id\": 476, \"forum_id\": 12, \"post_subject\": \"Re: Spraying using DFUPlus\", \"username\": \"rtaylor\", \"post_text\": \"Ankita,1. In the above command, I have not specified record length anywhere - is this an issue? What if a huge data file, say of 10GB is to be sprayed?
Since you did not explicitly set the maxrecordsize option, it defaults to 8192. If you have records larger than 8192, then you must set the maxrecordsize option to whatever is appropriate (I have seen 10000000000 successfully used before).\\n2. The original csv is getting sprayed and de-sprayed correctly using ECL Watch - am I missing any steps?
I just tried a CSV spray and despray, and my files were all correct in my test. Try it again with a different file, documenting exactly what you do each step along the way so that, if you still see anomalous results, you can accurately report your exact process (which would help in trying to figure out what could be going wrong).\\n3. How does HPCC ensure that the file is 'each single record is always whole and complete on a single node'
In the case of CSV and XML files, by dividing the file using the record delimiters.what if I upload a flat file of huge size whose structure I don't know?
As you would need to do in any other data processing environment, you would first need to explore the file to determine its structure (or ask the data provider).Or one 'record' runs to multiple lines of the file?
I believe I answered that here: http://hpccsystems.com/bb/viewtopic.php?f=8&t=473&sid=91a5b96a6eea55fbf263bf4f30a3b436\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-08-03 14:19:51\" },\n\t{ \"post_id\": 2145, \"topic_id\": 476, \"forum_id\": 12, \"post_subject\": \"Spraying using DFUPlus\", \"username\": \"Ankita Singla\", \"post_text\": \"Hello,\\n\\nI have a 3-node HPCC set-up. The different nodes and the processes running on them are shown below :\\n\\nroot@cloudx-767-700:~# service hpcc-init status\\n mydafilesrv ( pid 21286 ) is running...\\n mydfuserver ( pid 6558 ) is running...\\n myeclagent ( pid 6639 ) is running...\\n myeclccserver ( pid 6720 ) is running...\\n myesp ( pid 6800 ) is running...\\n mysasha ( pid 6883 ) is running...\\n
\\n\\nroot@cloudx-798-730:~# service hpcc-init status\\n mydafilesrv ( pid 30555 ) is running...\\n myroxie ( pid 31107 ) is running...\\n
\\n\\nroot@cloudx-799-731:~# sudo service hpcc-init status\\n mydafilesrv ( pid 10293 ) is running...\\n mydali ( pid 10856 ) is running...\\n myeclscheduler ( pid 10963 ) is running...\\n mythor ( pid 16028 ) is running...\\n
\\n\\nI am trying to spray a file using DFUPlus on the cloudx-767-700 I.P is 172.25.37.10\\n\\nroot@cloudx-767-700:~# dfuplus action=spray srcip=172.25.37.10 srcfile=/var/lib/HPCCSystems/mydropzone/Emp.csv dstname=ankita::poc::dfuplus::sprayed dstcluster=mythor prefix=FILENAME,FILESIZE nosplit=1 server=http://172.25.37.10:8010 format=csv username=root password=newuser_123 overwrite=1 replicate=1\\nChecking for local Dali File Server\\n\\nSpraying from /var/lib/HPCCSystems/mydropzone/Emp.csv on 172.25.37.10:7100 to ankita::poc::dfuplus::sprayed\\nSubmitted WUID D20120803-215221\\nD20120803-215221 status: queued\\nD20120803-215221 Finished\\nTotal time taken 1 secs\\n
\\n\\nThe csv file is as follows :\\n\\n\\nName,PsNo,BU,Designation,addr\\nPrachi,10602210,COE,SET,Vashi\\nAnkita,10602192,MFG3,SET,Powai-II
\\n\\nUnder DFU Workunits->Browse, the D20120803-215221 is shown as finished.\\n\\nWhen I de-sprayed the file using the ECL Watch and DFU, the re-constructed file is showing junk values; also the size of this file is greater than the original csv that I uploaded. \\n\\nMy queries are :\\n\\n1. In the above command, I have not specified record length anywhere - is this an issue? What if a huge data file, say of 10GB is to be sprayed?\\n2. The original csv is getting sprayed and de-sprayed correctly using ECL Watch - am I missing any steps?\\n3. How does HPCC ensure that the file is 'each single record is always whole and complete on a single node' - what if I upload a flat file of huge size whose structure I don't know? Or one 'record' runs to multiple lines of the file?\\n\\nThanks and regards !\", \"post_time\": \"2012-08-03 11:13:56\" },\n\t{ \"post_id\": 2172, \"topic_id\": 479, \"forum_id\": 12, \"post_subject\": \"Re: Automatic ECL Record Generation from sprayed file\", \"username\": \"jamesb\", \"post_text\": \"Ok no problem, the only reason I ask is to get a fuller view of how valuable in time saving this feature might be and to help weight whether it is worth investing time in developing out.\\n\\nCan you give me contacts of people who have already worked on some of these ideas? I would love to hear why the feature was not completed out or chosen not to be implemented. \\n\\nI would like to learn about some of the reasoning and decision making processes that go on while deciding on features like this one.\\n\\nThanks for all the replies Richard!\", \"post_time\": \"2012-08-07 19:54:40\" },\n\t{ \"post_id\": 2171, \"topic_id\": 479, \"forum_id\": 12, \"post_subject\": \"Re: Automatic ECL Record Generation from sprayed file\", \"username\": \"rtaylor\", \"post_text\": \"Ah yes -- that would be a question about our actual products and the data they're built from. Sorry, but my area is the ECL language and HPCC platform (keeping to the "theoretical" side of things and away from "production" stuff) -- IOW, I don't know. \\n\\nRichard\", \"post_time\": \"2012-08-07 19:40:51\" },\n\t{ \"post_id\": 2170, \"topic_id\": 479, \"forum_id\": 12, \"post_subject\": \"Re: Automatic ECL Record Generation from sprayed file\", \"username\": \"jamesb\", \"post_text\": \"Ah that makes sense why there hasn't been much done for binary files. So if you were to give the 3 categories a percentage for how frequently each type is dealt with what would your spread be? That is how often do we see binary files, CSV, and XML files?\", \"post_time\": \"2012-08-07 17:56:21\" },\n\t{ \"post_id\": 2169, \"topic_id\": 479, \"forum_id\": 12, \"post_subject\": \"Re: Automatic ECL Record Generation from sprayed file\", \"username\": \"rtaylor\", \"post_text\": \"James,\\n\\nNot that I am aware of. We've done some exploration of this issue in the past, which is why I know CSV files are not too hard to handle (especially if you have a first rec with column headings in it, which is not always the case) and XML files can be parsed to produce a reasonable starting-point RECORD structure, too. \\n\\nBut binary flat files (comprising a major portion of the data receive) are very difficult to do this with, because there's nothing there but the data itself to try to work with. Just deciding what a field might be is an interesting exercise -- for example, is a byte containing ASCII 65 an integer 65 or a capital "A"? And where are the field boundaries -- is that ASCII 0 byte a null terminator on a string, or one of the bytes of a 4-byte integer, or ... ?\\n\\nRichard\", \"post_time\": \"2012-08-07 17:49:25\" },\n\t{ \"post_id\": 2168, \"topic_id\": 479, \"forum_id\": 12, \"post_subject\": \"Re: Automatic ECL Record Generation from sprayed file\", \"username\": \"jamesb\", \"post_text\": \"Thanks for the help Richard! I am curious if there has been any work on auto generating ECL code from files. Has anyone else worked on this or is there any future plans on developing out this feature?\", \"post_time\": \"2012-08-07 17:03:56\" },\n\t{ \"post_id\": 2164, \"topic_id\": 479, \"forum_id\": 12, \"post_subject\": \"Re: Automatic ECL Record Generation from sprayed file\", \"username\": \"rtaylor\", \"post_text\": \"James,\\n\\nGood work! That kind of structure generation is reasonably straight-forward with CSV files, a bit more complicated with XML files, but a LOT more difficult with binary flat files (fixed or variable-length).\\n\\nOne thing I noticed is that your code generates an INTEGER10 data type for your ISBN field -- you should modify it to make that a DECIMAL10 instead, since INTEGER10 is not a legal data type (the '10' portion defines the number of bytes the field occupies, not the number of digits in the number, so the range of valid values is only 1 through 8). \\n\\nMy first blog posting (http://hpccsystems.com/blog/adventures-machine-learning-part-1) contains some ECL code to help define the field sizes of CSV data. Since CSV is an inherently text-based format, there's no reason not to simply make each field a STRING.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-08-06 19:27:41\" },\n\t{ \"post_id\": 2156, \"topic_id\": 479, \"forum_id\": 12, \"post_subject\": \"Re: Automatic ECL Record Generation from sprayed file\", \"username\": \"jamesb\", \"post_text\": \"here is a perl script which is kind of a proof of concept of what I am asking about here.\\n\\n\\n
#!/usr/bin/perl\\n\\nuse strict;\\nuse warnings;\\n\\nuse Data::Dumper;\\n\\nopen FILE, "<", "csv.txt" or die $!;\\n\\nmy @lines = <FILE>;\\n\\n\\n#could check for errors in the CVS file here. make sure that there are fields on the top\\n#of the file as well as that there are same amount of columns as there are row entry fields\\n#etc. more testing could be done before hand.\\n\\nmy @fields;\\nmy %field_data;\\nmy %args;\\nmy @all_tokens;\\nmy %possible_types;\\nforeach my $line (@lines) {\\n \\n unless($args{fields_flag}) {\\n chomp $line;\\n $line =~ s`"``ig; #remove any of the quotes around the fields.\\n @fields = split(',',$line);\\n $args{fields_flag} = 1; #set to 1 for true\\n #print Dumper(@fields);\\n }\\n else {\\n my @tokens = split(',',$line);\\n chomp @tokens;\\n for( my $index = 0; $index < @fields; ++$index) {\\n $field_data{$fields[$index]} .= $tokens[$index].", ";\\n }\\n }\\n}\\n\\nmy @type_identifier_subs = ( \\n #sub {return $_[0] =~ m`\\\\s*\\\\d+\\\\s*` ? 'arbitrary_length_number' : ''; }, #arbitray length number\\n #example : 5.95\\n sub {return $_[0] =~ m`\\\\s*\\\\d+\\\\.\\\\d+\\\\s*` ? 'single_decimal_number' : ''; }, #decimal\\n #example : 1985/01/21\\n sub {return $_[0] =~ m`\\\\s*(?:\\\\")?\\\\s*\\\\d+\\\\/\\\\d+\\\\/\\\\d+\\\\s*(?:\\\\")?\\\\s*` ? 'three_slash_delimited_number' : ''; }, #possible date\\n #example : Douglas Adams\\n sub {return $_[0] =~ m`\\\\s*[A-Z][a-z]+\\\\s+[A-Z][a-z]+\\\\s*` ? 'formal_first_last_name' : ''; }, #formal first and last name\\n #example : 0465026567\\n sub {return $_[0] =~ m`\\\\s*\\\\d{10}\\\\s*` ? 'ISBN_size_number' : ''; }, #ISBN\\n);\\n\\n#idenfity which type of data each column could possibly be.\\n#build a list of possiblities with weighted amounts per type\\n#identified. \\nforeach my $field (keys %field_data) {\\n my @data_tokens = split(',',$field_data{$field});\\n foreach my $data (@data_tokens) {\\n #print $data ."\\\\n";\\n foreach my $type_identifier (@type_identifier_subs) {\\n #print $cleaner->($data) ."\\\\n";\\n my $type = $type_identifier->($data);\\n $possible_types{$field}->{$type} += 1 if($type);\\n }\\n }\\n}\\n\\n#find the best weighted type out of all the filtered types\\n#identified.\\nmy %suggested_ecl_type;\\nforeach my $field (@fields) {\\n my $max_weight = 0;\\n my $best_type;\\n foreach my $type ( sort keys %{$possible_types{$field}}) {\\n if($max_weight < $possible_types{$field}{$type}){\\n $max_weight = $possible_types{$field}{$type};\\n $best_type = $type;\\n }\\n }\\n $suggested_ecl_type{$field} = $best_type;\\n}\\n#print out the potential Record Layout:\\n\\nmy %ecl_types = (\\n 'ISBN_size_number' => 'INTEGER10',\\n 'formal_first_last_name' => 'STRING30',\\n 'three_slash_delimited_number' => 'STRING8',\\n 'single_decimal_number' => 'DECIMAL2_2'\\n);\\n\\nmy $file_name = "csv";\\nmy $dataset_name = '~CLASS::BMF::AdvECL::';\\nprint_layout();\\nsub print_layout {\\n open FILE, ">", "eclcode.ecl" or die $!;\\n \\n print FILE "EXPORT " . $file_name . " := MODULE\\\\n\\\\n";\\n print FILE "EXPORT Layout := RECORD\\\\n";\\n foreach my $field (@fields) {\\n #make sure the fields are lower cased and that spaces are underscored.\\n #or make sure words are in camelcase depending on the specs. We can pull this\\n #information from the project's coding standards configureation file.\\n print FILE "\\\\t " . $ecl_types{$suggested_ecl_type{$field}} . " " . lc$field . "\\\\n";\\n }\\n \\n print FILE "END;\\\\n\\\\n";\\n print FILE "EXPORT File := DATASET(" . $dataset_name . $file_name .",Layout,CSV)\\\\n";\\n close FILE;\\n}\\n\\n\\nprint Dumper(\\\\%possible_types) . "\\\\n";\\n\\nopen FILE, "<", "eclcode.ecl" or die $!;\\n\\nprint $_ while(<FILE>);\\n\\nclose FILE;\\n
\\n\\n\\nHere is the CVS file needed by this script:\\n\\n"REVIEW_DATE","AUTHOR","ISBN","DISCOUNTED_PRICE"\\n"1985/01/21","Douglas Adams",0345391802,5.95\\n"1990/01/12","Douglas Hofstadter",0465026567,9.95\\n"1998/07/15","Timothy ""The Parser"" Campbell",0968411304,18.99\\n"1999/12/03","Richard Friedman",0060630353,5.95\\n"2001/09/19","Karen Armstrong",0345384563,9.95\\n"2002/06/23","David Jones",0198504691,9.95\\n"2002/06/23","Julian Jaynes",0618057072,12.50\\n"2003/09/30","Scott Adams",0740721909,4.95\\n"2004/10/04","Benjamin Radcliff",0804818088,4.95\\n"2004/10/04","Randel Helms",0879755725,4.50
\\n\\n\\nHere is the ECL code which is generated:\\n\\nEXPORT csv := MODULE\\n\\nEXPORT Layout := RECORD\\n\\t STRING20 REVIEW_DATE\\n\\t STRING30 AUTHOR\\n\\t INTEGER10 ISBN\\n\\t DECIMAL2_2 DISCOUNTED_PRICE\\nEND;\\n\\nEXPORT File := DATASET(~CLASS::BMF::AdvECL::csv,Layout,CSV)\\n
\", \"post_time\": \"2012-08-05 02:05:12\" },\n\t{ \"post_id\": 2155, \"topic_id\": 479, \"forum_id\": 12, \"post_subject\": \"Automatic ECL Record Generation from sprayed file\", \"username\": \"jamesb\", \"post_text\": \"Is there a way to automatically generate a Record Layout from analyzing a sprayed file?\\nUsing some fuzzy logic to get as close as possible? And then have a reviewer step through and make sure that all the automated data types are correct?\", \"post_time\": \"2012-08-03 19:56:02\" },\n\t{ \"post_id\": 3274, \"topic_id\": 718, \"forum_id\": 12, \"post_subject\": \"Re: Modelling data for a web application\", \"username\": \"HPCC Staff\", \"post_text\": \"This has been addressed in another forum post:\\n\\nviewtopic.php?t=717&p=3228#p3228\", \"post_time\": \"2013-01-31 15:46:33\" },\n\t{ \"post_id\": 3222, \"topic_id\": 718, \"forum_id\": 12, \"post_subject\": \"Modelling data for a web application\", \"username\": \"kaliyugantagonist\", \"post_text\": \"I'm a HPCC beginner.\\n\\nI sprayed multiple csv files using ECL and DFU and also have fetched the data using ECL record structure. I read that XML files can also be the input and that there is a JDBC driver too which supports read-only queries to the HPCC data.\\n\\nIn my Java-based web application, I'm currently using a MySQL database. It consists of several tables which have BLOB columns but the data arriving is huge and we have decided to switch to a HPCC for quick storage and retrieval.\\n\\nMy questions are as follows:\\n\\nHow do insert my data directly into HPCC - I don't have any kind of file with me(csv, flat, xml etc.)?
\\nThe BLOBs columns in the tables are the result of certain external operations which finish after the initial insert i.e the BLOBs are later 'updated' into the tables - how do I handle such a scenario in HPCC(ECL approach?)
\", \"post_time\": \"2013-01-29 04:17:15\" },\n\t{ \"post_id\": 6139, \"topic_id\": 1402, \"forum_id\": 12, \"post_subject\": \"Re: How to dynamically import data and model data?\", \"username\": \"bforeman\", \"post_text\": \"Thanks James, I have been working with and teaching ECL now for over 3 1/2 years, and am blessed to be surrounded by a team of very talented individuals. Welcome to the community, and always glad to help!\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-07-29 20:10:36\" },\n\t{ \"post_id\": 6138, \"topic_id\": 1402, \"forum_id\": 12, \"post_subject\": \"Re: How to dynamically import data and model data?\", \"username\": \"rqg0717\", \"post_text\": \"Hi Bob,\\n\\nWow, you are really an expert. You answered all the questions I have asked recently. I will look into the option 3. Thanks a lot. \\n\\n[quote="bforeman":2l6f5fss]Hi James,\\n\\nThere are three ways to import data into THOR:\\n\\n1. Using the ECL Watch interface\\n2. Using the DFUPLUS Command Line Utility\\n3. Using the ECL Standard Function Libraries\\n\\nSo to dynamically import data into THOR, you probably want to use option 3. This is a standard operation that we do everyday, as we are always getting updates, some daily, some weekly, some monthly. The trick is to come up with a standard to detect that new data is on the landing zone and ready for import. One technique is to use a CRON job to schedule an ECL event that is triggered by the existence of a "trigger" or "dummy" file that signals the CRON job that data is ready for import. When that file is detected, the CRON job fires an event that starts the dynamic spray process.\\n\\nLook at CRON and WHEN and EVENT in the Language Reference Manual, as well as the Spray Libraries that can be found in the Standard Library Reference PDF.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-07-29 20:06:39\" },\n\t{ \"post_id\": 6137, \"topic_id\": 1402, \"forum_id\": 12, \"post_subject\": \"Re: How to dynamically import data and model data?\", \"username\": \"bforeman\", \"post_text\": \"Hi James,\\n\\nThere are three ways to import data into THOR:\\n\\n1. Using the ECL Watch interface\\n2. Using the DFUPLUS Command Line Utility\\n3. Using the ECL Standard Function Libraries\\n\\nSo to dynamically import data into THOR, you probably want to use option 3. This is a standard operation that we do everyday, as we are always getting updates, some daily, some weekly, some monthly. The trick is to come up with a standard to detect that new data is on the landing zone and ready for import. One technique is to use a CRON job to schedule an ECL event that is triggered by the existence of a "trigger" or "dummy" file that signals the CRON job that data is ready for import. When that file is detected, the CRON job fires an event that starts the dynamic spray process.\\n\\nLook at CRON and WHEN and EVENT in the Language Reference Manual, as well as the Spray Libraries that can be found in the Standard Library Reference PDF.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-07-29 20:03:07\" },\n\t{ \"post_id\": 6135, \"topic_id\": 1402, \"forum_id\": 12, \"post_subject\": \"How to dynamically import data and model data?\", \"username\": \"rqg0717\", \"post_text\": \"Hi all,\\n\\nI was wondering if there was a way to dynamically import data to Thor, since I am dealing with some real-time data. Thank you very much.\\n\\nSincerely,\\nJames\", \"post_time\": \"2014-07-29 18:18:29\" },\n\t{ \"post_id\": 6205, \"topic_id\": 1417, \"forum_id\": 12, \"post_subject\": \"Re: Do HPCC Systems support Ontologies?\", \"username\": \"HPCC Staff\", \"post_text\": \"Regarding additional modules, you can view a full list of what is available here:\\nhttp://hpccsystems.com/products-and-ser ... ts/modules\\n\\nAlso, there are Third Party plugins available here:\\nhttp://hpccsystems.com/products-and-ser ... ts/plugins\\n\\nThank you for your interest!\", \"post_time\": \"2014-08-08 17:44:49\" },\n\t{ \"post_id\": 6204, \"topic_id\": 1417, \"forum_id\": 12, \"post_subject\": \"Re: Do HPCC Systems support Ontologies?\", \"username\": \"dabayliss\", \"post_text\": \"We do not have specific support for owl ontologies. We have had many, many people import xml of various shades and flavors (including RDF) - but to my knowledge there has not been any formalized OWL work.\\n\\nNow - a later incarnation of KEL (0.6) has RDF / SPARQL support penciled in; at which point we would almost certainly have some degree of ontology support.\", \"post_time\": \"2014-08-08 17:16:36\" },\n\t{ \"post_id\": 6199, \"topic_id\": 1417, \"forum_id\": 12, \"post_subject\": \"Re: Do HPCC Systems support Ontologies?\", \"username\": \"rqg0717\", \"post_text\": \"I have found KEL Lite. Is this the only tool/module?\", \"post_time\": \"2014-08-08 14:36:23\" },\n\t{ \"post_id\": 6198, \"topic_id\": 1417, \"forum_id\": 12, \"post_subject\": \"Do HPCC Systems support Ontologies?\", \"username\": \"rqg0717\", \"post_text\": \"Dear all,\\n\\nI was wondering if HPCC Systems support ontologies e.g. OWL ontologies. If positive, is there any document or example I can follow? Thanks a lot.\\n\\nSincerely,\\nJames\", \"post_time\": \"2014-08-08 14:24:46\" },\n\t{ \"post_id\": 211, \"topic_id\": 76, \"forum_id\": 13, \"post_subject\": \"Re: examples of Roxie based apps?\", \"username\": \"dabayliss\", \"post_text\": \"I don't believe so - their front end can be ran off of a number of different SQL back ends (it operates through JDBC). We have done the Pentaho demo at a number of shows but to the best of my knowledge there is nothing up on the web yet for people to play with ...\\n\\nDavid\\n[quote="aintnomyth":2sflo0oo]David, thanks for the information. That gives me a lot to work with.\\n\\nYou mentioned the Pentaho demo, is Roxie actually feeding their BI demo at demo.pentaho.com? Their demo looks impressive, any idea how much data is behind that?\", \"post_time\": \"2011-07-29 22:00:41\" },\n\t{ \"post_id\": 210, \"topic_id\": 76, \"forum_id\": 13, \"post_subject\": \"Re: examples of Roxie based apps?\", \"username\": \"aintnomyth\", \"post_text\": \"David, thanks for the information. That gives me a lot to work with.\\n\\nYou mentioned the Pentaho demo, is Roxie actually feeding their BI demo at demo.pentaho.com? Their demo looks impressive, any idea how much data is behind that?\", \"post_time\": \"2011-07-29 20:46:55\" },\n\t{ \"post_id\": 206, \"topic_id\": 76, \"forum_id\": 13, \"post_subject\": \"Re: examples of Roxie based apps?\", \"username\": \"dabayliss\", \"post_text\": \"Gurgle - that is one of those open-ended questions I always answered badly at college! Let me try to narrow things down a little and I may be able to help:\\n\\n1) Roxie (at least used properly) expects you data -model- to be fairly fixed. Obviously you can change it day-to-day but not minute-to-minute (unless you are in the experimental stage)\\n\\n2) One use for which Roxie is extremely well suited is running a few (hundreds) pre-compiled queries billions of times a day on different smallish (megabytes) datasets. Think: use an index or two, retrieve a handful of data, smack it around, maybe do a couple more index fetches, smack it around some more and hand it back.\\n\\n3) Another use which -appears- the same in when it is possible to pre-build some aggregated statistics on large sets of data (gigabytes/terabytes) and then using those aggregated or partially aggregated results it is possible to -appear- to be running global reports billions of times a day.\\n\\n4) 'Roxie on demand' - The ECL system has the ability to 'compile and execute' a roxie query as a one-shot, one time deal via the ECL server. You can think of this as very similar to SQL <semantics, not syntax>. The ECL server will take your ECL, optimize it, compile it, deploy it and get back the results. The roxie itself executes at full speed; but there is overhead for the compile/optimize etc on the ECL server. This allows you to query ANYTHING you wish that the indexes you build support. Speed depends upon the number of ECL servers you have - but you now need to be thinking of low-seconds latency rather than tens of milliseconds.\\n\\n5)In memory roxie with an 'interpreter' front end: this is the way we did our Pentaho demo (an OLAP tool). We use PRELOAD to get the data (compressed) into memory in roxie. We then wrote ECL that 'interpreted' a range of queries that were passed to it - this range supported all of the query types required to support an OLAP front end.\\n\\n6) In my 'used properly' quote earlier you might have noticed a slice edge - there is another way to use Roxie and Thor that is extremely flexible IF you have ample resources for the volume of data you are processing. Your 'tiny' dataset would be an example of this. Essentially you can use the ECL Parse statement to process a 'language' of your design. You would typically represent the data using some completely generic data model (such as file/property/value triples) and you could process it according to the 'commands' in the language typed in. (The GRAPH statement is excellent for this - I have a simple example of using it here:http://www.dabhand.org/ECL/construct_a_simple_bible_searchII.htm)\\n\\nAs I said, I hope that helps - but I can probably give a more specific answer with more information to go on ....\\n\\nDavid\", \"post_time\": \"2011-07-28 14:52:53\" },\n\t{ \"post_id\": 204, \"topic_id\": 76, \"forum_id\": 13, \"post_subject\": \"examples of Roxie based apps?\", \"username\": \"aintnomyth\", \"post_text\": \"Not sure where to put this post...\\n\\nWe have legacy applications that build and execute dynamic SQL against a reporting database. I think Roxie could replace the reporting database but the implementation seems a little foreign to me, coming from the relational world. Can someone list a few Roxie implementation scenarios/examples to illustrate how it typically meshes with reporting apps?\\n\\nCould Roxie handle open ended decision support apps that traditionally use a data warehouse or OLAP? The data questions are unknown at design-time which seems to conflict with Roxie - this could be a misunderstanding on my part though.\\n\\nCould it be used as a dynamic computation engine for tiny datasets where all of the necessary data (and maybe even ECL) are input arguments?\\n\\nThanks in advance!\", \"post_time\": \"2011-07-28 14:14:13\" },\n\t{ \"post_id\": 1752, \"topic_id\": 379, \"forum_id\": 13, \"post_subject\": \"Re: Iterating Data Analysis\", \"username\": \"pyrannis\", \"post_text\": \"Thanks dabayliss,\\n\\nI will give that a shot and let you know how it works out.\\n\\nStetson\", \"post_time\": \"2012-06-08 18:33:29\" },\n\t{ \"post_id\": 1749, \"topic_id\": 379, \"forum_id\": 13, \"post_subject\": \"Re: Iterating Data Analysis\", \"username\": \"dabayliss\", \"post_text\": \"First a caveat - my knowledge of medicine is minimal - so it is plausible I have misunderstood some of your words. Notwithstanding:\\n\\n1) The only question here is whether or not you know your grouping criteria apriori. If you do then a simple project where you use a MAP to convert your data into categoric variables will work fine.\\nIf you do not know the grouping apriori then the Discretize routines inside ML allows you to categorize the various variables into either percentiles or evenly across a range.\\n\\n2) This is the bit i am least likely to understand. In order to compute survival rate (as I understand it) - you need to know who is entering (through diagnosis) and who is leaving (through death) the population. That is what my first and last is computing. If you have those two I -think- you can compute the survival rate.\\n\\n3) This is a straightforward clustering exercise. The agglomerative clustering piece of the ML libraries perform both the distance metric (we have about half a dozen) and the hierarchical clustering for you.\\n\\n4) The above all work at extreme scale ...\", \"post_time\": \"2012-06-08 18:07:04\" },\n\t{ \"post_id\": 1747, \"topic_id\": 379, \"forum_id\": 13, \"post_subject\": \"Re: Iterating Data Analysis\", \"username\": \"pyrannis\", \"post_text\": \"Thanks for response dabayliss, \\n\\nI am just going to attempt to clarify the spec for you because while the code you have put up is useful, it is not what I need to do.\\n\\nTo start at the beginning I have a set of patient data that concerns a type of cancer. This data contains not personal or identification information, but it contains relevant medical data like size of tumor, how defined the tumor was, what stage cancer it was and so on and so forth.\\n\\nTo get what I need out of this there are several steps.\\n1) I need to group the patients by the information that I wish to compare whether it be the size of the tumor or the stage of the cancer the patient was in, some other piece of information that is is the file or some combination of information so I can look at refined data sets for step 2.\\n\\n2) I need to compute the incidence rates and overall survival rate for each group I create in step 1. For more information I recommend looking up hazard ratio and survival rate on Wikipedia because those are the algorithms I need to implement for this step and I do not wish to make this post overly long\\n\\n3) I need to compute the distance between the clusters so I can cluster them in a Hierarchical fashion so people can look at how closely relate two groups are and then look at how their survival rates and hazard ratios over time differ\\n\\n4) This is more a problem for the future after I get steps 1-3 working but I need to figure out how to do this for arbitrarily amounts of unique patient groups.\\n\\nI hope this clarified things a little bit and if you have further questions please feel free to ask.\\n\\nStetson\", \"post_time\": \"2012-06-08 17:16:00\" },\n\t{ \"post_id\": 1743, \"topic_id\": 379, \"forum_id\": 13, \"post_subject\": \"Re: Iterating Data Analysis\", \"username\": \"dabayliss\", \"post_text\": \"I am still not 100% clear on the spec. However - the ECL way is to work bottom up from the data. So - firstly - suppose I want to know how many people turned up to the doctor in a given month...\\n\\nVisits := TABLE(patient,{month_id,Cnt := COUNT(GROUP)},month_id,FEW);\\n\\nNow - supposing you want to know when a patients first & last visit is:\\n\\nFirsts := TABLE(patient,{patient_id,Frst := MIN(GROUP,month_id),Lst := MAX(GROUP,month_id)},patient_id);\\n\\nNow - suppose you want to annotate the patient data with whether or not this was the first or last visit:\\n\\nSomeType TakeExtrema(Patient le,First ri) := TRANSFORM\\n SELF.IsFirst := le.month_id=ri.frst;\\n SELF.IsLast := le.month_id=ri.lst;\\n SELF := le;\\n END;\\n\\nPatient_Plus := JOIN(patient,firsts,LEFT.patient_id=RIGHT.patient_id,TakeExtrema(LEFT,RIGHT));\\n\\nI believe that following this methodology you will get the information you want in a format that is usable (and it will run with full parallelism.\\n\\nDAB\", \"post_time\": \"2012-06-08 15:43:53\" },\n\t{ \"post_id\": 1733, \"topic_id\": 379, \"forum_id\": 13, \"post_subject\": \"Re: Iterating Data Analysis\", \"username\": \"rtaylor\", \"post_text\": \"Stetson,\\nThe prototype version I have is in R and it takes the following steps:\\n1) Group the patients with the characteristics you want to compare\\n2) Compute the hazard rate and the survival rate over the 5 year period
I have a book on Machine Learning I'm reading now that uses R for all its example code, so I understand that R is a language specifically created for doing statistical analysis. \\n\\nOK, to me your step 1 simply means filtering the patient records to the set that you want to work with. Step 2 is what the NORMALIZE code previously posted can do.\\nFrom here on out I am still figuring out what the prototype is doing so it will get a little vague.\\n3) Compute the distance between the different clusters \\n4) Hierarchically cluster the various groups to see which groups are similar and which groups are different
Now here you're starting to get into what R is all about -- Machine Learning. I'm afraid that I'm a neophyte in that area (that's why I'm reading the book), so my best suggestion is to take a look at our Machine Learning resources, starting here http://hpccsystems.com/ml \\n\\nPerhaps someone with more Machine Learning experience in ECL than I can chime in at this point and teach us both. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-06-08 13:39:39\" },\n\t{ \"post_id\": 1731, \"topic_id\": 379, \"forum_id\": 13, \"post_subject\": \"Re: Iterating Data Analysis\", \"username\": \"pyrannis\", \"post_text\": \"So, the full problem I am trying to solve goes something as follows:\\n*Disclaimer: I will be as up front about all the steps I can be, but I have not figured out the fine details for some of these steps yet.\\n\\nI have a large collection of patient data that contains information about their cancer and the eventual outcome over a five year period. The goal is to take the patient data and extract information about survival and incident rates.\\n\\nThe prototype version I have is in R and it takes the following steps:\\n1) Group the patients with the characteristics you want to compare\\n2) Compute the hazard rate and the survival rate over the 5 year period\\n\\nFrom here on out I am still figuring out what the prototype is doing so it will get a little vague.\\n3) Compute the distance between the different clusters \\n4) Hierarchically cluster the various groups to see which groups are similar and which groups are different\\n\\nRight now I am looking solely at ECL, but there will be a front end user interface at the end of this project where the user can determine which statistics they wish to compare, so I need to keep in mind that the total number of clusters that are being compared is not a fixed number every time this code will be called.\\n\\nThanks in advance,\\n\\nStetson\", \"post_time\": \"2012-06-08 12:50:10\" },\n\t{ \"post_id\": 1728, \"topic_id\": 379, \"forum_id\": 13, \"post_subject\": \"Re: Iterating Data Analysis\", \"username\": \"rtaylor\", \"post_text\": \"pyrannis,
That is exactly what I need to get it started, and running. However if you or anyone else has a second I could use some help understanding the difference between the two sections of code I was given. namely your NORMALIZE calls because I do not really understand why the first one calls the code 60 times for each record and Richards acts computes each step off of the full record.
The first parameter to Bob's NORMALIZE was his BaseData dataset, which contained several records, while the first parameter to my NORMALIZE was the BlankDS dataset, which contained exactly one record. NORMALIZE always calls the TRANSFORM function the number of times specified in its second parameter (in this case, 60) for each record in the dataset specified as its first parameter. That's why my code called the TRANSFORM exactly 60 times.\\n\\nAlso this is an abrupt change of topic, but is there is good tutorial how to use the LEFT and RIGHT pointers anywhere? Something tells me I do not really want to create a new data set every time I need to compute somethings because the next version of this algorithm will probably take of couple of iterations and without using LEFT and RIGHT that could take up a lot of space.
First off, there is no such thing in ECL as a "pointer" -- LEFT and RIGHT are simply "disambiguators" that are used in circumstances where you are operating on a pair of records (or datasets) and need to qualify which record the specific field is from. For example:\\n\\nd := DEDUP(ds,LEFT.Field1 = RIGHT.Field2);
This code defines a deduped recordset, where "duplicates" are any records where the Field1 value in the first record matches the Field2 value in the second. The first rec (the LEFT) will be compared to the second (the RIGHT), and if the values match, the second rec will be thrown away...\\n\\nSecondly, when you say, "I do not really want to create a new data set every time I need to compute somethings ... that could take up a lot of space" you are showing me that you are thinking about ECL procedurally (a bad mistake to make). ECL is a DECLARATIVE, NON-PROCEDURAL language. That means that all your ECL code ever does is define what you want, not how the job gets done -- therefore you are never writing "executable code" when you write ECL. A definition is just that -- a definition. The executable code that actually does the work is generated for you by the compiler. These are fundamental concepts that we drill into students when they come to our ECL training classes (highly recommended).\\n\\nOK, given that clarification , if you would just fully describe the problem you're trying to solve, then we can make some suggestions as to the best "ECLish" approach to take.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-06-07 21:04:43\" },\n\t{ \"post_id\": 1727, \"topic_id\": 379, \"forum_id\": 13, \"post_subject\": \"Re: Iterating Data Analysis\", \"username\": \"pyrannis\", \"post_text\": \"Thank you Richard,\\n\\nThat is exactly what I need to get it started, and running. However if you or anyone else has a second I could use some help understanding the difference between the two sections of code I was given. namely your NORMALIZE calls because I do not really understand why the first one calls the code 60 times for each record and Richards acts computes each step off of the full record.\\n\\nAlso this is an abrupt change of topic, but is there is good tutorial how to use the LEFT and RIGHT pointers anywhere? Something tells me I do not really want to create a new data set every time I need to compute somethings because the next version of this algorithm will probably take of couple of iterations and without using LEFT and RIGHT that could take up a lot of space.\\n\\nThanks in advance.\", \"post_time\": \"2012-06-07 15:55:45\" },\n\t{ \"post_id\": 1724, \"topic_id\": 379, \"forum_id\": 13, \"post_subject\": \"Re: Iterating Data Analysis\", \"username\": \"rtaylor\", \"post_text\": \"OK, so if you want to iterate exactly 60 times, then your code should be something like this:
baseRec := RECORD\\n INTEGER1 Month;\\n INTEGER Casualty;\\nEND;\\n\\nBaseFile := DATASET([{1,10},{2,30},{3,50},{4,55}],baseRec);\\n\\nOutRec := RECORD\\n INTEGER1 Time;\\n REAL4 Hazard_Rate;\\nEND; \\nBlankDS := DATASET([{0,0}],OutRec); \\n\\nOutRec DoHazards(INTEGER t) := TRANSFORM\\n SELF.Time := t;\\n SELF.Hazard_Rate := COUNT(Basefile(Month = t))/COUNT(Basefile(Month > t));\\nEND;\\n\\nnormout := NORMALIZE(BlankDS,60,DoHazards(COUNTER));\\nnormout;
Note the use of the single-record "BlankDS" for NORMALIZE. This allows you to set the specific number of iterations.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-06-06 20:56:03\" },\n\t{ \"post_id\": 1721, \"topic_id\": 379, \"forum_id\": 13, \"post_subject\": \"Re: Iterating Data Analysis\", \"username\": \"pyrannis\", \"post_text\": \"Thanks for the quick reply bforman, \\n\\nHowever, I believe your code performs the hazard calculation on each record 60 times, I need to perform that calculation once at each time step on every record in my data set since all I have is a list of patient data and the time that the they visited the doctor. \\n\\nIf I were to write this in C++ it would look something like\\n\\ndouble Hazard_Function[60];\\n\\nfor(i = 1; i <= 60; ++i)\\n{\\nHazard_Function[i] = Patients_This_Month(i)/Patients_Waiting(i);\\n}\\n\\nPatients_This_Month(i): function that goes through my list of patient data and counts the number of patients that go went to the doctor in that month\\n\\nPatients_Waiting(i): function that goes through my list of patient data and counts the patients that are going to the doctor's office this month and all patients that still have not gone to the doctor's office.\", \"post_time\": \"2012-06-06 20:31:05\" },\n\t{ \"post_id\": 1720, \"topic_id\": 379, \"forum_id\": 13, \"post_subject\": \"Re: Iterating Data Analysis\", \"username\": \"bforeman\", \"post_text\": \"Essentially, what you need to do is to use a NORMALIZE,which gives you the ability to iterate a finite amount. Here's an inline example:\\n\\nbaseRec := RECORD\\n\\tINTEGER1 Time;\\n\\tINTEGER Casualty;\\nEND;\\n\\nBaseFile := DATASET([{1,10},{2,30},{3,50},{4,55}],baseRec);\\n\\t\\t\\t \\nOutRec \\t:= RECORD\\n\\tINTEGER1 Time;\\n\\tREAL4 Hazard_Rate;\\nEND;\\t\\t\\t\\t\\t\\t\\t\\t\\n\\nOutRec DoHazards(BaseFile Le,INTEGER t) := TRANSFORM\\n SELF.Time := Le.time;\\n SELF.Hazard_Rate := COUNT(BaseFile(Le.time = t))/COUNT(BaseFile(Le.time >= t));\\nEND;\\n\\n//I need to be able to assign a range 1:60\\n//to t and output the results to the user.\\n\\nnormout := NORMALIZE(BaseFile,60,DoHazards(LEFT,COUNTER));\\nnormout;
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-06-06 19:17:39\" },\n\t{ \"post_id\": 1718, \"topic_id\": 379, \"forum_id\": 13, \"post_subject\": \"Iterating Data Analysis\", \"username\": \"pyrannis\", \"post_text\": \"Greetings, I am relatively new to this board so I am not sure I am posting in the correct forum. However I have a large data set of a population that I need to compute the hazard rate for a fixed amount of time, and I need to output the survival rate as a function of time so I can plot it and compare to other populations. \\n\\nNow I am very new to programming in ECL so all advice is welcome, but the algorithm I need to use boils down to:\\n\\nHazard_Rate(t) = COUNT($.pop(time = t))/COUNT($.pop(time >= t)\\n\\nI need to be able to assign a range 1:60 to t and output the results to the user.\\n\\nThanks in advance for the help.\", \"post_time\": \"2012-06-06 18:02:25\" },\n\t{ \"post_id\": 4479, \"topic_id\": 1000, \"forum_id\": 13, \"post_subject\": \"Re: Challenges with JSON output from Roxie query\", \"username\": \"anthony.fishbeck\", \"post_text\": \"FYI, I've created two issues to track this:\\n\\nhttps://track.hpccsystems.com/browse/HPCC-9874\\nhttps://track.hpccsystems.com/browse/HPCC-9875\", \"post_time\": \"2013-08-19 14:17:49\" },\n\t{ \"post_id\": 4468, \"topic_id\": 1000, \"forum_id\": 13, \"post_subject\": \"Re: Challenges with JSON output from Roxie query\", \"username\": \"gsmith\", \"post_text\": \"While I am not disagreeing with your comments, it is fairly easy to "flatten" the JSON structure (this code is off the top of my head):\\n\\n\\nvar flattenJSON = function (parent, obj, objectsToRemove) {\\n for (key in obj) {\\n flattenJSON(obj, obj[key], objectsToRemove);\\n if (objectsToRemove.indexOf(key) >= 0) {\\n // Need to test if parent already has property of the same name (and is not null).\\n parent.key = obj[key];\\n delete obj[key];\\n }\\n }\\n}\\n
\\n\\nAnd then to call:\\n\\nflattenJSON(null, rumsearchntileResponse.Results, ["Row"]);\\n
\", \"post_time\": \"2013-08-19 07:04:47\" },\n\t{ \"post_id\": 4466, \"topic_id\": 1000, \"forum_id\": 13, \"post_subject\": \"Re: Challenges with JSON output from Roxie query\", \"username\": \"benhastings\", \"post_text\": \"thank you, gsmith. That is a workaround for one half of the problem. The bigger difficulty is the overly nested nature of the JSON output. The object that is created after parsing the JSON is simply not usable.\\n\\nI think this is a defect - it is technically "valid JSON" but it's not _useful_ JSON. It almost looks like it's taking the same processing as the XML output but then missing some steps.\", \"post_time\": \"2013-08-19 01:40:15\" },\n\t{ \"post_id\": 4465, \"topic_id\": 1000, \"forum_id\": 13, \"post_subject\": \"Re: Challenges with JSON output from Roxie query\", \"username\": \"gsmith\", \"post_text\": \"I recently hit a similar issue (in my case it was parsing the XML result set from a WU - also for a mapping to a viz library). Until I parse the schema correctly and "know" that a field is supposed to be numeric I just popped this into my JavaScript:\\n\\n} else if (!isNaN(parseInt(item[mappings[key]]))) {\\n retVal[key] = parseInt(item[mappings[key]]);\\n} else {\\n retVal[key] = item[mappings[key]];\\n}\\n
\\n\\nIn the worst case it will convert some string numerics to numerics, but with JS's auto casting, it will just get converted back to a string as needs be.\\n\\n(Just a workaround),\\n\\nGordon.\", \"post_time\": \"2013-08-18 13:18:40\" },\n\t{ \"post_id\": 4464, \"topic_id\": 1000, \"forum_id\": 13, \"post_subject\": \"Challenges with JSON output from Roxie query\", \"username\": \"benhastings\", \"post_text\": \"our cluster isn't open to the world, so I won't post an endpoint, but I'll describe the problem:\\n\\nWe are deploying queries to Roxie to be consumed by visualization libraries and the like by our associates. We are going to build an interface to facilitate interaction with these summary datasets. For utility and efficiency, we are going to use the JSON output. However, I've run into a few challenges with the output as it exists. (EXAMPLE JSON BELOW)\\n\\nthey are:\\n1. UNSIGNEDx fields are being converted to character strings with quotes in the output\\n2. There are extra levels of hierarchy that appear extraneous in the output causing JSON.parse(<json_string>) to create an invalid object.\\n\\nPlease advise if there are changes I can make to alter the output or if there is more information required.\\n\\nReturned from Roxie\\n{\\n "rumsearchntileResponse": {\\n "Results": {\\n "Result_1": {\\n "Row": [\\n {\\n "cpc": "SD",\\n "pagecat": "Content Delivery",\\n "zone": "APAC",\\n "country": "AU",\\n "ym": "201307",\\n "wk": "1372550400",\\n "dy": "1372651200",\\n "hr": "1372651200",\\n "minute": "1372651200",\\n "ntile": 5,\\n "metricname": "pgi",\\n "value": 802.0,\\n "__internal_fpos__": "0"\\n },\\n {\\n "cpc": "SD",\\n "pagecat": "Content Delivery",\\n "zone": "APAC",\\n "country": "AU",\\n "ym": "201307",\\n "wk": "1372550400",\\n "dy": "1372651200",\\n "hr": "1372651200",\\n "minute": "1372651200",\\n "ntile": 5,\\n "metricname": "pgl",\\n "value": 3284.0,\\n "__internal_fpos__": "0"\\n },\\n {\\n "cpc": "SD",\\n "pagecat": "Content Delivery",\\n "zone": "APAC",\\n "country": "AU",\\n "ym": "201307",\\n "wk": "1372550400",\\n "dy": "1372651200",\\n "hr": "1372651200",\\n "minute": "1372651200",\\n "ntile": 5,\\n "metricname": "ttfb",\\n "value": 47.0,\\n "__internal_fpos__": "0"\\n }\\n ]\\n } }\\n }\\n}
\\n\\nExpected output:\\n{\\n "Result_1": [\\n {\\n "cpc": "SD",\\n "pagecat": "Content Delivery",\\n "zone": "APAC",\\n "country": "AU",\\n "ym": 201307,\\n "wk": 1372550400,\\n "dy": 1372651200,\\n "hr": 1372651200,\\n "minute": 1372651200,\\n "ntile": 5,\\n "metricname": "pgi",\\n "value": 802.0,\\n "__internal_fpos__": "0"\\n },\\n {\\n "cpc": "SD",\\n "pagecat": "Content Delivery",\\n "zone": "APAC",\\n "country": "AU",\\n "ym": 201307,\\n "wk": 1372550400,\\n "dy": 1372651200,\\n "hr": 1372651200,\\n "minute": 1372651200,\\n "ntile": 5,\\n "metricname": "pgl",\\n "value": 3284.0,\\n "__internal_fpos__": "0"\\n },\\n {\\n "cpc": "SD",\\n "pagecat": "Content Delivery",\\n "zone": "APAC",\\n "country": "AU",\\n "ym": 201307,\\n "wk": 1372550400,\\n "dy": 1372651200,\\n "hr": 1372651200,\\n "minute": 1372651200,\\n "ntile": 5,\\n "metricname": "ttfb",\\n "value": 47.0,\\n "__internal_fpos__": "0"\\n }\\n ]\\n }
\", \"post_time\": \"2013-08-16 19:48:53\" },\n\t{ \"post_id\": 4939, \"topic_id\": 1108, \"forum_id\": 13, \"post_subject\": \"Re: Facebook open sources its SQL-on-Hadoop engine - Presto\", \"username\": \"HPCC Staff\", \"post_text\": \"It is always nice to see more options available to the open source community. We are not aware of any benchmarks comparing Roxie against Presto but welcome anyone wanting to run a test and share results.\", \"post_time\": \"2013-11-13 18:16:25\" },\n\t{ \"post_id\": 4909, \"topic_id\": 1108, \"forum_id\": 13, \"post_subject\": \"Facebook open sources its SQL-on-Hadoop engine - Presto\", \"username\": \"DQ\", \"post_text\": \"Hi,\\nThought of putting this up...if anyone has anything to say on this...like how does Roxie stand up to Presto...seems Presto is 10 times faster than Hive...\\n\\nhttp://gigaom.com/2013/11/06/facebook-o ... rejoices/#!\", \"post_time\": \"2013-11-08 22:15:39\" },\n\t{ \"post_id\": 7437, \"topic_id\": 1667, \"forum_id\": 13, \"post_subject\": \"Re: Can google analytics- TERM CLOUD be used in HPCC ?\", \"username\": \"pius_francis\", \"post_text\": \"Thanks a lot Bob\", \"post_time\": \"2015-04-23 09:25:27\" },\n\t{ \"post_id\": 7435, \"topic_id\": 1667, \"forum_id\": 13, \"post_subject\": \"Re: Can google analytics- TERM CLOUD be used in HPCC ?\", \"username\": \"bforeman\", \"post_text\": \"Yes, this is new to Version 5. You should upgrade at your first opportunity! At the very least you can see it on the HPCC VM.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-23 06:57:37\" },\n\t{ \"post_id\": 7434, \"topic_id\": 1667, \"forum_id\": 13, \"post_subject\": \"Re: Can google analytics- TERM CLOUD be used in HPCC ?\", \"username\": \"pius_francis\", \"post_text\": \"Thanks Bob. Unfortunately i am unable to find the 'visualize' tab. I am using version 4.0.2 -1.\", \"post_time\": \"2015-04-23 05:38:27\" },\n\t{ \"post_id\": 7282, \"topic_id\": 1667, \"forum_id\": 13, \"post_subject\": \"Re: Can google analytics- TERM CLOUD be used in HPCC ?\", \"username\": \"bforeman\", \"post_text\": \"In the workunit results you can look at results or use the “Visualize” tab where there is a word cloud that should be the same thing. You could use ML.Docs to generate the word frequencies to base the word sizing on that. \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-07 18:30:20\" },\n\t{ \"post_id\": 7269, \"topic_id\": 1667, \"forum_id\": 13, \"post_subject\": \"Can google analytics- TERM CLOUD be used in HPCC ?\", \"username\": \"pius_francis\", \"post_text\": \"Can google analytics- TERM CLOUD be used in HPCC ?\", \"post_time\": \"2015-04-07 13:08:20\" },\n\t{ \"post_id\": 8636, \"topic_id\": 2016, \"forum_id\": 13, \"post_subject\": \"Re: SORT required for ITERATE?\", \"username\": \"bforeman\", \"post_text\": \"Hi Janet,\\n\\nSORT is not required for ITERATE, but in some cases, yes, it can help. There may be some uses of ITERATE (like sequencing records) where a SORT is definitely not needed. The success of the ITERATE depends on how it is used, and it is not always required to SORT. \\n\\nUsing a DEDUP, a sorted recordset based on the DEDUP condition is definitely required.\\n\\nBut yes, if ITERATE was to be used on a GROUPed dataset, the GROUP of course should be SORTed.\\n\\nHTH,\\n\\nBob\", \"post_time\": \"2015-11-18 15:07:41\" },\n\t{ \"post_id\": 8632, \"topic_id\": 2016, \"forum_id\": 13, \"post_subject\": \"SORT required for ITERATE?\", \"username\": \"janet.anderson\", \"post_text\": \"I was reviewing the code of a co-worker who is a stat modeler not familiar with ECL. He was doing a GROUP and then an ITERATE. I was concerned because there was no SORT within his grouping and he was trying to determine the change in a value between consecutive records. But strangely the ECL Language Reference doesn't mention sorting with regards to ITERATE at all and the examples don't use SORT either. Am I missing something? Shouldn't ITERATE be as dependent on sort order as a function like DEDUP (where the documentation does mention SORT)?\\n\\nThanks for any feedback.\", \"post_time\": \"2015-11-17 22:27:02\" },\n\t{ \"post_id\": 8832, \"topic_id\": 2058, \"forum_id\": 13, \"post_subject\": \"Re: Difference between adjacent rows?\", \"username\": \"jcma\", \"post_text\": \"Thank you both for your help, I didn't realize we had the PROCESS function. Maybe should have paid attention better during ECL training . \\n\\nCheers,\\nJames\", \"post_time\": \"2015-12-11 19:50:03\" },\n\t{ \"post_id\": 8830, \"topic_id\": 2058, \"forum_id\": 13, \"post_subject\": \"Re: Difference between adjacent rows?\", \"username\": \"bforeman\", \"post_text\": \"I like Richard's solution better, I did not realize that the Row Transform in the PROCESS could be done inline
\\n\\nThe beautiful thing about ECL is that there are many ways to get to the mountain top!\\n\\nThanks Richard!\\n\\nBob\", \"post_time\": \"2015-12-11 19:48:26\" },\n\t{ \"post_id\": 8828, \"topic_id\": 2058, \"forum_id\": 13, \"post_subject\": \"Re: Difference between adjacent rows?\", \"username\": \"rtaylor\", \"post_text\": \"jcma,\\n\\nITERATE won't work in this case, since you want to compare the original prior record to each subsequent record and produce a new recordset. What you want to use is the PROCESS function (it's an "iterate on steroids" function), like this:
rec := RECORD\\n INTEGER Col1;\\n INTEGER Col2;\\nEND;\\nds := DATASET([{2,3},{5,7},{7,7}],rec);\\n\\nrec DSxform(rec L,Rec R) := TRANSFORM\\n SELF.Col1 := L.Col1 - R.Col1;\\n SELF.Col2 := L.Col2 - R.Col2;\\nEND;\\t\\t\\t\\t\\t\\n\\np := PROCESS(ds,\\n ds[1],\\n DSxform(LEFT,RIGHT),\\n TRANSFORM(LEFT));\\nOUTPUT(p);
PROCESS is similar to ITERATE, but it allows you to specify the record that each record in the dataset is "banged" against. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-12-11 19:39:57\" },\n\t{ \"post_id\": 8826, \"topic_id\": 2058, \"forum_id\": 13, \"post_subject\": \"Re: Difference between adjacent rows?\", \"username\": \"bforeman\", \"post_text\": \"Here is the nicest way I can think of doing this:\\n\\n/* Given a dataset:\\n col1 col2\\n row1 2 3\\n row2 5 7\\n row3 7 7\\n \\n then if i run the iterate function, would like to get the following results.\\n \\n col1 col2\\n row1 0 0\\n row2 3 4\\n row3 2 0\\n*/\\nMyRec := RECORD\\n\\tINTEGER2 Value1;\\n\\tINTEGER2 Value2;\\nEND;\\n\\n\\nSomeFile := DATASET([{2,3},{5,7},{7,7}],MyRec);\\n\\nTempRec := RECORD\\n\\tSomeFile.Value1;\\n\\tSomeFile.Value2;\\n\\tINTEGER2 Value3 := 0;\\n\\tINTEGER2 Value4 := 0;\\t\\nEND;\\n\\nBaseRec := DATASET([{0,0,0,0}],TempRec); \\n\\nTempTbl := TABLE(SomeFile,TempRec);\\n\\nTempRec SubThem(TempRec Le, TempRec Ri) := TRANSFORM\\n SELF.value1 := Ri.Value1-Le.Value3;\\n\\tSELF.Value2 := Ri.Value2-Le.Value4; \\n\\tSELF.Value3 := Ri.Value1;\\n\\tSELF.Value4 := Ri.Value2;\\nEND;\\n\\nAddedRecs := ITERATE(TempTbl,SubThem(LEFT,RIGHT));\\n\\noutput(BaseRec+AddedRecs[2..],{Value1,value2});
\\n\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-12-11 19:39:17\" },\n\t{ \"post_id\": 8822, \"topic_id\": 2058, \"forum_id\": 13, \"post_subject\": \"Difference between adjacent rows?\", \"username\": \"jcma\", \"post_text\": \"Good Morning,\\n\\nI am trying to figure out how to take the difference between two adjacent rows in a dataset. What I have tried is as follows:\\n\\n1. make transform function such that new.column = R.column-L.column\\n2. iterate the function on the dataset.\\n\\nso for example, given a dataset:\\n col1 col2\\nrow1 2 3\\nrow2 5 7\\nrow3 7 7\\n\\nthen if i run the iterate function, would like to get the following results.\\n\\n col1 col2\\nrow1 0 0\\nrow2 3 4\\nrow3 2 0\\n\\nIs there a nice way to do this? Appreciate any help...\", \"post_time\": \"2015-12-11 15:40:43\" },\n\t{ \"post_id\": 1402, \"topic_id\": 25, \"forum_id\": 14, \"post_subject\": \"Re: Automation\", \"username\": \"kovacsbv\", \"post_text\": \"The link to the Data Handling guide above is broken. The Data handling guide is here:\\n\\nhttp://hpccsystems.com/community/docs/data-handling\", \"post_time\": \"2012-03-22 16:30:29\" },\n\t{ \"post_id\": 59, \"topic_id\": 25, \"forum_id\": 14, \"post_subject\": \"Re: Automation\", \"username\": \"JimD\", \"post_text\": \"The Data Handling Guide explains File Spraying and De-spraying and it shows how to load data onto your Landing Zone. \\n\\nBefore you begin, you must load the data to a Landing Zone (aka drop zone). The steps are detailed in the Data Handling PDF.\\n\\nTo Spray (or despray) data, there are three methods :\\n\\n1) The ECL Watch interface (documented in Data Handling PDF)\\n\\n2) The DFUPlus command line interface (CLI) which is installed with the ECL IDE (documented in the Client Tools manual).\\n\\n3) In ECL Code using FileServices library functions. (See the ECL Language Reference)\\n\\nYou can use DFUPlus in a script or as a CRON task\\n\\nIn addition, you can use the ECL Scheduler (using the WHEN Clause in ECL code) to submit a job that will wait for an event, such as a new file's arrival, and then automatically spray a file. (See the WHEN Workflow service topic in the ECL Language Reference)\", \"post_time\": \"2011-05-20 14:01:13\" },\n\t{ \"post_id\": 44, \"topic_id\": 25, \"forum_id\": 14, \"post_subject\": \"Re: Automation\", \"username\": \"ewadler\", \"post_text\": \"[quote="sort":30ho7bof]1. The landing zone is just a directory that we use in eclwatch, etc when doing things like spraying / despraying files. We create a directory (currently mydropzone) with the proper permissions. If you want to configure a different directory to use, and eclwatch cannot access it, then you need to make sure you have proper permissions and firewall settings.\\n\\n2. We have some command line tools that do similar work as our web base visual UI tools. These tools communicate to our back end services mostly via SOAP and not odbc / jdbc\\n\\n1. Ok, so are you referring to the "Network Path:" referenced in the Spray links? What do I need to look out for to connect to it? I am in Windows 7 and my firewall is turned off and my network sharing is turned on. I noticed that for Windows 7, the slashes are backwards, so copying and pasting causes a weird error. But, when I change the direction of the slashes, I get a "Windows cannot access {network drive address}", and it says it is an unspecified error. \\n\\n2. Fantastic, sounds like what I am looking for. Can you share them with me? The source might be helpful if possible, so I could write my own applications that didn't need to call command line tools from an app, but I can understand if these are copyright or controlled source tools.\\n\\nthanks\", \"post_time\": \"2011-05-17 19:53:25\" },\n\t{ \"post_id\": 43, \"topic_id\": 25, \"forum_id\": 14, \"post_subject\": \"Re: Automation\", \"username\": \"sort\", \"post_text\": \"1. The landing zone is just a directory that we use in eclwatch, etc when doing things like spraying / despraying files. We create a directory (currently mydropzone) with the proper permissions. If you want to configure a different directory to use, and eclwatch cannot access it, then you need to make sure you have proper permissions and firewall settings.\\n\\n2. We have some command line tools that do similar work as our web base visual UI tools. These tools communicate to our back end services mostly via SOAP and not odbc / jdbc\", \"post_time\": \"2011-05-13 21:11:57\" },\n\t{ \"post_id\": 42, \"topic_id\": 25, \"forum_id\": 14, \"post_subject\": \"Automation\", \"username\": \"ewadler\", \"post_text\": \"I would like to automate the process of uploading and spraying.\\n\\nI noticed that one can potentially use a network drive as the landing zone, but I cannot seem to connect to it, the documentation mentions discussing access to the network share with your administrator, but goes no further into the setup.\\n\\nAlso, I notice that spraying and ecl queries can accomplished via command line. Are there any other tools to accomplish this from within a custom application, such as a jdbc/odbc connector, or does a custom application that utilizes HPCC require sending automated commands to the command line of the host system?\\n\\nthanks\", \"post_time\": \"2011-05-13 20:53:09\" },\n\t{ \"post_id\": 84, \"topic_id\": 45, \"forum_id\": 14, \"post_subject\": \"Re: Recommended Hardware\", \"username\": \"jonburger\", \"post_text\": \"That sort of depends on what you want to do to the data. If you plan on indexing it, or linking it in certain ways which causes the data to grow (one to many relationships, etc), then you will need to size up accordingly. But I would recommend a good starting point is to have 2-3 times the amount of storage as you have data volume.\\n\\nThe other factor in sizing is speed in which you want to run certain tasks. Obviously the more # of nodes, the faster a particular job will perform. \\n\\nThe nice thing about HPCC is that if you find that you need additional space or more performance, you simply scale linearly.\\n\\nSo for instance while you can have a single server with 30TB of space, performance would be on par or possibly even less than a normal relationship database (think Oracle). However a much better solution for HPCC would be to have 20 servers each with 2TB of space (giving a total of 40TB usable). Now you have the potential of many cores, more RAM, more RAID controllers, cache, etc to process your data in a suitable timeframe. Want it faster? Scale it to 50 nodes, or 100, or 400.\\n\\n~Regards\\nJon Burger\\nManager, HPCC Engineering Team\", \"post_time\": \"2011-06-29 16:05:39\" },\n\t{ \"post_id\": 80, \"topic_id\": 45, \"forum_id\": 14, \"post_subject\": \"Recommended Hardware\", \"username\": \"Helen\", \"post_text\": \"I have roughly 10 TB worth of data in plaintext format, What kind of hardware do I need to host on HPCC?\", \"post_time\": \"2011-06-29 14:59:17\" },\n\t{ \"post_id\": 87, \"topic_id\": 48, \"forum_id\": 14, \"post_subject\": \"Community Edition Dependencies\", \"username\": \"bforeman\", \"post_text\": \"To everyone installing the latest Community Edition:\\n\\nMake sure to download and review thoroughly the latest "Installing & Running the HPCC System" PDF. Excellent documentation and required reading. \\n\\nOn Page 56, review the "Specific OS node software Installation commands" \\nFor CentosOS/RedHat, I also needed the following dependency installed:\\n\\nyum install libtool\\n\\nAfter that and the other dependencies already listed, you should be good to go.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2011-06-29 19:20:54\" },\n\t{ \"post_id\": 371, \"topic_id\": 56, \"forum_id\": 14, \"post_subject\": \"Re: ECL IDE Soap Problems\", \"username\": \"HPCC Staff\", \"post_text\": \"The topic he references in his earlier post may be from this thread. Let us know otherwise and we will get an answer for you.\\nhttp://hpccsystems.com/bb/viewtopic.php?f=14&t=59\", \"post_time\": \"2011-09-30 12:54:48\" },\n\t{ \"post_id\": 370, \"topic_id\": 56, \"forum_id\": 14, \"post_subject\": \"Re: ECL IDE Soap Problems\", \"username\": \"promax\", \"post_text\": \"Hi\\n\\nHow did you resolved this issue?\\n\\nWhat did you find ? or work around of the compiler.\\n\\nLooks like I have the exact problem \\n\\nThanks\\nSam\", \"post_time\": \"2011-09-30 10:27:00\" },\n\t{ \"post_id\": 268, \"topic_id\": 56, \"forum_id\": 14, \"post_subject\": \"Re: ECL IDE Soap Problems\", \"username\": \"HPCC Staff\", \"post_text\": \"No need to apologize! Thank you for letting us know the issue is resolved.
\", \"post_time\": \"2011-08-11 19:06:23\" },\n\t{ \"post_id\": 267, \"topic_id\": 56, \"forum_id\": 14, \"post_subject\": \"Re: ECL IDE Soap Problems\", \"username\": \"hli\", \"post_text\": \"Hi All,\\n\\nI just want to let you know I have fixed the problem. It was caused by the compiler path. When I went through other topics, I saw this problem. Sorry for causing any confusion. Thank you so much for your help.\", \"post_time\": \"2011-08-11 18:25:53\" },\n\t{ \"post_id\": 261, \"topic_id\": 56, \"forum_id\": 14, \"post_subject\": \"Re: ECL IDE Soap Problems\", \"username\": \"hli\", \"post_text\": \"Not very clear about what you mean. \\nWhen I configure my IDE for the connection, I just put the IP in it without any port number. Is it not right?\\n\\nOn windows machine, I already added IDE into the allowed program list.\\n\\nWhat does '[no subcode]' mean? And "An HTTP processing error occurred"? Does it necessarily mean a connection issue? or a communication protocol issue?\", \"post_time\": \"2011-08-11 14:18:43\" },\n\t{ \"post_id\": 227, \"topic_id\": 56, \"forum_id\": 14, \"post_subject\": \"Re: ECL IDE Soap Problems\", \"username\": \"sort\", \"post_text\": \"Just to be clear, you can cannot to esp using a web browser, but when you configure your IDE by specifying just the ip in the server box in the preferences window, it does not work. I am trying to eliminate any possible firewall issues as the cause\", \"post_time\": \"2011-08-03 16:26:26\" },\n\t{ \"post_id\": 226, \"topic_id\": 56, \"forum_id\": 14, \"post_subject\": \"Re: ECL IDE Soap Problems\", \"username\": \"hli\", \"post_text\": \"Then, I have no clue about why the IDE can not connect either of the two HPCC servers. It still reports the same Soap Error Message.\", \"post_time\": \"2011-08-03 16:17:01\" },\n\t{ \"post_id\": 225, \"topic_id\": 56, \"forum_id\": 14, \"post_subject\": \"Re: ECL IDE Soap Problems\", \"username\": \"sort\", \"post_text\": \"the port 5480 issue is because that only is available via https (not http) and ONLY if you are running the HPCC VM image, not installing our binary installation file in your own VM\", \"post_time\": \"2011-08-03 15:55:42\" },\n\t{ \"post_id\": 224, \"topic_id\": 56, \"forum_id\": 14, \"post_subject\": \"Re: ECL IDE Soap Problems\", \"username\": \"hli\", \"post_text\": \"Sorry, It was not working on Windows XP, either. The error message goes too quick on XP so that I thought the error did not happen.\\n\\nI guess we should figure out why http://172.27.3.20:5480/ is not working first.\\n\\nThis page does not work for both HPCC systems: one on my VM and one on an independent box.\", \"post_time\": \"2011-08-03 15:26:43\" },\n\t{ \"post_id\": 212, \"topic_id\": 56, \"forum_id\": 14, \"post_subject\": \"Re: ECL IDE Soap Problems\", \"username\": \"hli\", \"post_text\": \"I have installed HPCC on an independent Linux Box. But, I got the same SOAP problem when I tried connecting it via ECL IDE on my Windows 7 machine.\\n\\nHowever, when I downloaded ECL IDE to a Windows XP machine and tried there, it worked through. So, it should be a Windows 7 related problem.\", \"post_time\": \"2011-08-01 15:00:30\" },\n\t{ \"post_id\": 208, \"topic_id\": 56, \"forum_id\": 14, \"post_subject\": \"Re: ECL IDE Soap Problems\", \"username\": \"hli\", \"post_text\": \"I have added ECL IDE into allowed program list. But, it still does not work.\\n\\nI got an independent Linux box now and will try the whole system on it.\", \"post_time\": \"2011-07-28 15:49:47\" },\n\t{ \"post_id\": 207, \"topic_id\": 56, \"forum_id\": 14, \"post_subject\": \"Re: ECL IDE Soap Problems\", \"username\": \"sort\", \"post_text\": \"OK. I was assuming you were installing and running the HPCC VM. That would explain not being able to access port 5480\\n\\nBased on my experiences I've noticed the following:\\n. Windows 7 and VMWare do not always work nicely together. I have network issues with VMWare installed (just installed, not actually running any vm sessions). I use Virtual Box on my windows 7 machine. I have a non windows 7 machine where I install VMWare\\n. Can you check your firewall settings. You may need to add the IDE to your allowed program list\", \"post_time\": \"2011-07-28 15:33:46\" },\n\t{ \"post_id\": 203, \"topic_id\": 56, \"forum_id\": 14, \"post_subject\": \"Re: ECL IDE Soap Problems\", \"username\": \"hli\", \"post_text\": \"From the same windows, I CAN reach 172.27.3.20 with ping.\\n\\nYou are right. On my windows 7 machine, I am running VM with HPCC inside, running IDE and running browser. Through browser, I can access EclWatch. But, through IDE, I can not reach.\\n\\nOne thing here:\\nI did not use your VM version during installation but I directly installed HPCC deb packages onto my existing ubuntu linux VM.\\n\\nIs it a problem?\", \"post_time\": \"2011-07-28 13:58:41\" },\n\t{ \"post_id\": 201, \"topic_id\": 56, \"forum_id\": 14, \"post_subject\": \"Re: ECL IDE Soap Problems\", \"username\": \"sort\", \"post_text\": \"that is interesting. can you ping your VM IP from a command prompt running on the same windows box as your VM. If you cannot connect to the URL we provide in the intial VM page that will point us in a direction to investigate...\\n\\nAssumptions I'm making:\\nYou are doing everything from the same windows machine\\n. Running browser\\n. Running IDE\\n. Running VM\\n. The IP I asked you to try 172.27.3.20 is the IP provided by HPCC after installing our VM (I got that IP from the ECL IDE prefence page you included in an earlier post).\", \"post_time\": \"2011-07-27 21:06:35\" },\n\t{ \"post_id\": 200, \"topic_id\": 56, \"forum_id\": 14, \"post_subject\": \"Re: ECL IDE Soap Problems\", \"username\": \"hli\", \"post_text\": \"I changed the IP address but it still does not work (see the attached picture).\\n\\nUnfortunately, I can not open https://172.27.3.20:5480, which tells me 'Unable to connect'. Does that give hints?\", \"post_time\": \"2011-07-27 20:44:37\" },\n\t{ \"post_id\": 199, \"topic_id\": 56, \"forum_id\": 14, \"post_subject\": \"Re: ECL IDE Soap Problems\", \"username\": \"sort\", \"post_text\": \"The problem is related to the IP being reported in eclwatch. It lists the IP as 127.0.1.1 - localhost. It should be reporting 172.27.3.20 (which is the IP you are specifying in the IDE)\\n\\nCan you go to https://172.27.3.20:5480 and see if the information there is correct?\\n\\nI have attached my initial VM screen that specifies the IP, version etc. \\n\\nCan you send me a screen shot with your info?\\nI am using VMWare player version 3.1.4 build-385536 and have not modified any HPCC settings\\n\\nWhile we investigate, you can try one of the following workarounds:\\n\\n1.\\n- Stop the entire system \\n- Modfiy the /etc/HPCCSystems/environment.xml file. Replace all occurrances of 127.0.1.1 with your actual IP\\n- restart the system.\\n\\nOR\\n\\n2. \\n- stop the entire system\\n- Use "configmgr" to modify the file - this tool is documented in "Installating and Running The HPCC Platform" and Using Configuration Manager"\\\\\\n- in the Hardware section replace the ip for 127.0.1.1 with your actual IP\\n- follow directions for copying the saved xml file to /etc/HPCCSystems/environment.xml\\n- restart\", \"post_time\": \"2011-07-27 19:54:43\" },\n\t{ \"post_id\": 198, \"topic_id\": 56, \"forum_id\": 14, \"post_subject\": \"Re: ECL IDE Soap Problems\", \"username\": \"hli\", \"post_text\": \"It seems my settings are same as yours. Please see the attachments:[attachment=1:19w6yz33]server_setting.jpg\", \"post_time\": \"2011-07-27 19:10:44\" },\n\t{ \"post_id\": 197, \"topic_id\": 56, \"forum_id\": 14, \"post_subject\": \"Re: ECL IDE Soap Problems\", \"username\": \"sort\", \"post_text\": \"In the community edition you are correct, you can leave the user and password blank.\\n\\nI would like to help you troubleshoot your problem. What I would like to see is the exact information in the preference page for the ECL IDE. I need to know the way the server IP was entered (no http, no port) and how we filled in the rest of the information on the page. Also make sure the SSL check box is NOT checked\\n\\nI also would like to see the information regarding the configuration of ESP. Connect your browser to the ip using port 8010. Click on Topology/System Servers. Click on myesp. Under Network Address what exactly does the page display. What services are listed and what ports are they assigned to? Is everything configured as http.\\n\\nI have attached 2 screen shots from my machine as an example\\n\\nWe are investigating further - this information will greatly help us.\", \"post_time\": \"2011-07-27 18:55:44\" },\n\t{ \"post_id\": 194, \"topic_id\": 56, \"forum_id\": 14, \"post_subject\": \"Re: ECL IDE Soap Problems\", \"username\": \"hli\", \"post_text\": \"I got the same problem even after I restarted the HPCC server on my virtual machine (and rebooted my vm).\\n \\nThe problem is that when I tried to log on my server vis ECL IDE, I was asked username and password. As an user of community version, I do not have user/passwd. so, I just specified the IP and took the default option. Then, I got the same ECL IDE Soap Problems.\\n\\nI am sure the IP I used was correct and I could browse EclWatch through the IP.\", \"post_time\": \"2011-07-27 14:39:31\" },\n\t{ \"post_id\": 120, \"topic_id\": 56, \"forum_id\": 14, \"post_subject\": \"Re: ECL IDE Soap Problems\", \"username\": \"mjwalshe\", \"post_text\": \"[quote="arjuna chala":2shxccax]The Ports are:\\n\\n8010 and 8145\\n\\nchears - looks like it was some odity in centos. Rebooting the VM seemed to do the trick.\", \"post_time\": \"2011-07-11 16:04:16\" },\n\t{ \"post_id\": 119, \"topic_id\": 56, \"forum_id\": 14, \"post_subject\": \"Re: ECL IDE Soap Problems\", \"username\": \"arjuna chala\", \"post_text\": \"The Ports are:\\n\\n8010 and 8145\", \"post_time\": \"2011-07-11 14:22:54\" },\n\t{ \"post_id\": 118, \"topic_id\": 56, \"forum_id\": 14, \"post_subject\": \"Re: ECL IDE Soap Problems\", \"username\": \"mjwalshe\", \"post_text\": \"[quote="bforeman":de0arcjc]A dumb question perhaps, but did you verify that the IP address is correct in your Preferences?\\n\\nWell I used the ip that I sucsessfully opened ECL Watch on - maybe its some firewall issues - though I thought I had disabled that. \\n\\nAnyone know which ports the ECL Ide uses?\", \"post_time\": \"2011-07-11 08:12:03\" },\n\t{ \"post_id\": 114, \"topic_id\": 56, \"forum_id\": 14, \"post_subject\": \"Re: ECL IDE Soap Problems\", \"username\": \"bforeman\", \"post_text\": \"A dumb question perhaps, but did you verify that the IP address is correct in your Preferences?\", \"post_time\": \"2011-07-08 17:30:14\" },\n\t{ \"post_id\": 113, \"topic_id\": 56, \"forum_id\": 14, \"post_subject\": \"ECL IDE Soap Problems\", \"username\": \"mjwalshe\", \"post_text\": \"Installed everything can’t get ECL IDE to connect \\n\\nHi got a centos 64bit vm running on VM wareplayer (XP Pro is host) hpcc appears to be running and ECL watch is working fine.\\n\\nHowever when I fire up the ECL IDE I get a SOAP related error \\n\\n29: SOAP 1.1 Fault: SOAP-ENV:Client[no subcode]\\n“an HTTP processing error occurred” \\n\\nThe version of ECL is 6.0.0.6.682.1 – I grabbed the latest one from the eclwatch resources. I have used the hpccdemo and the actual root user \\n\\nMY thought is there is some SOAP component I am missing presumably from the Centos side as this laptop has conected to the test VM fine\", \"post_time\": \"2011-07-08 15:42:28\" },\n\t{ \"post_id\": 136, \"topic_id\": 59, \"forum_id\": 14, \"post_subject\": \"Re: Compiler not configured\", \"username\": \"SkipSnow\", \"post_text\": \"I am not sure how to allow the installation to affect the path. It seems to me that I have no choice about this, should I be restarting the computer after an install, and before running q Builder?\", \"post_time\": \"2011-07-12 23:50:24\" },\n\t{ \"post_id\": 135, \"topic_id\": 59, \"forum_id\": 14, \"post_subject\": \"Re: Compiler not configured\", \"username\": \"SkipSnow\", \"post_text\": \"I have configured the compiler as per its path:\\n c:\\\\Program Files (x86)\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_0 and Now the system is up and running! Thanks so much. Skip.\", \"post_time\": \"2011-07-12 21:50:53\" },\n\t{ \"post_id\": 134, \"topic_id\": 59, \"forum_id\": 14, \"post_subject\": \"Re: Compiler not configured\", \"username\": \"SkipSnow\", \"post_text\": \"Arjuna:\\n\\nThanks so much for your reply. I did a couple of things: \\n1: I ensured that the qBuilder configuration was as you specified, and as the manual specified. \\n\\n2: I tried to push the 'reset' button on the compiler but nothing happened.\\n\\n3: Though I got my querry builder through the resources link from ECL watch I downloaded and reinstalled the qBuilder from the web site as you suggested, It looks like it is the same version as the one I had installed (6.0.0.6.682.1). \\n\\nStill I get the same error: Bye the way, I also have a warning in my 'Error window: Warning: SOAP -ENV: Client [no subcode] "An HTTP processing error occered" Detail, no detail. I believe there is another post on this issue. \\n\\nAny further advise? \\n\\nThanks\\n\\nSkip\", \"post_time\": \"2011-07-12 21:44:44\" },\n\t{ \"post_id\": 133, \"topic_id\": 59, \"forum_id\": 14, \"post_subject\": \"Re: Compiler not configured\", \"username\": \"sort\", \"post_text\": \"The current version is 6.0.0.6.682.1.\\n\\neclcc.exe can be found at c:\\\\Program Files (x86)\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_0.\\n\\nI also let the installation process update my environment path\", \"post_time\": \"2011-07-12 21:31:25\" },\n\t{ \"post_id\": 132, \"topic_id\": 59, \"forum_id\": 14, \"post_subject\": \"Re: Compiler not configured\", \"username\": \"HPCC Staff\", \"post_text\": \"Skip,\\n\\nIn addition are you using the latest ECL IDE that is available on the web site at -\\nhttp://hpccsystems.com/download/free-community-edition. Older versions of Query Builder will not work with the VM.\\n\\nIn addition, if it helps, please refer to the blog article http://arjunachala.blogspot.com/2011/07/ecl-part-i-loading-data.html.\\n\\nThanks\\n\\nArjuna\", \"post_time\": \"2011-07-12 21:00:13\" },\n\t{ \"post_id\": 131, \"topic_id\": 59, \"forum_id\": 14, \"post_subject\": \"Re: Compiler not configured\", \"username\": \"sort\", \"post_text\": \"Try the following (in the ECL IDE / querybuilder)\\n. go to preferences\\n. make sure the server IP is the IP of the eclwatch box (should be the IP of your VM instance)\\n. go to the compiler tab\\n. click the "reset" button\", \"post_time\": \"2011-07-12 20:12:12\" },\n\t{ \"post_id\": 129, \"topic_id\": 59, \"forum_id\": 14, \"post_subject\": \"Compiler not configured\", \"username\": \"SkipSnow\", \"post_text\": \"I downloaded the community edition, and I installed the VMWare player. I got ECL watch to load, and I was able to write the hello world, example and check its syntax in the query builder.\\n\\nHowever the compiler is not configured and I can not run the code, as the code can not be compiled. I can not find the file: eclcc.exe anywhere on my box. Any ideas?\", \"post_time\": \"2011-07-12 19:27:13\" },\n\t{ \"post_id\": 158, \"topic_id\": 60, \"forum_id\": 14, \"post_subject\": \"Re: Can not save to the repository.\", \"username\": \"SkipSnow\", \"post_text\": \"Thanks for all of your help. It got fixed when I applied the changes you requested, and then pressed the reset button on the compiler page, after I applied the changes you recomended. The process to make the repository visible was:\\n1: create directories: \\n 'c:\\\\users\\\\public\\\\public documents\\\\HPCC Systems\\\\ECL\\\\My Files'\\n 'c:\\\\users\\\\public\\\\public documents\\\\HPCC Systems\\\\ECL\\\\Samples'\\n 'c:\\\\users\\\\public\\\\public documents\\\\HPCC Systems\\\\ECL\\\\wu'\\n2: configure the 'woring folder' in the compiler tab of the preferences dialog by browsing to directory: 'c:\\\\users\\\\public\\\\public documents\\\\HPCC Systems\\\\ECL\\\\wu' (I could have typed it instead)\\n\\n3: pushing the 'reset' button on the compiler tab. \\n\\nAfter doing these steps, the ECL folders 'My Files, and Samples' appeared in the ECL folders box in the compiler, and the samples folder got all the samples from the deployment.\", \"post_time\": \"2011-07-15 00:12:45\" },\n\t{ \"post_id\": 157, \"topic_id\": 60, \"forum_id\": 14, \"post_subject\": \"Re: Can not save to the repository.\", \"username\": \"sort\", \"post_text\": \"you definitely need permissions in order to see / write\", \"post_time\": \"2011-07-14 19:59:27\" },\n\t{ \"post_id\": 155, \"topic_id\": 60, \"forum_id\": 14, \"post_subject\": \"Re: Can not save to the repository.\", \"username\": \"HPCC Staff\", \"post_text\": \"Use of SVN should not matter. \\n\\nWe have the IDE developers looking at this issue and will get back to you. We suspect that it has something to do with you not having permissions to the "Documents" directory.\", \"post_time\": \"2011-07-14 17:51:24\" },\n\t{ \"post_id\": 154, \"topic_id\": 60, \"forum_id\": 14, \"post_subject\": \"Re: Can not save to the repository.\", \"username\": \"SkipSnow\", \"post_text\": \"No, it did not help. Alas. Ihave managed to hack an attribute and module into the repository but I am sure I did it in the wrong place. It is in \\n'c:\\\\program files\\\\hpccSystems\\\\HPCC\\\\bin\\\\ver_3_0\\\\ecllibrary\\\\[myModule]\\\\[myAttribute].ecl'\\n\\nI will try to do the same thing in order to deploy a Roxi querry and see if this works.\\n\\nHowever I would like to put the data into the right place.\\n\\nIs it significant that I am using SVN? for other things that this computer is working on?\", \"post_time\": \"2011-07-14 17:44:04\" },\n\t{ \"post_id\": 143, \"topic_id\": 60, \"forum_id\": 14, \"post_subject\": \"Re: Can not save to the repository.\", \"username\": \"arjuna chala\", \"post_text\": \"Hi Skip,\\n\\nWe need to investigate why your defaults are not right. In the meanwhile, in your ECL IDE preferences please do the following:\\n\\nWorking Folder = c:\\\\users\\\\public documents\\\\HPCC Systems\\\\ECL\\\\wu (please create if it does not exist)\\n\\nand then add \\n\\nc:\\\\users\\\\public documents\\\\HPCC Systems\\\\ECL\\\\My Files\\nc:\\\\users\\\\public documents\\\\HPCC Systems\\\\ECL\\\\Samples\\n\\nto the ECL Folders. Let us know if this helps.\\n\\nThanks\\n\\nArjuna\", \"post_time\": \"2011-07-13 19:25:57\" },\n\t{ \"post_id\": 141, \"topic_id\": 60, \"forum_id\": 14, \"post_subject\": \"Re: Can not save to the repository.\", \"username\": \"SkipSnow\", \"post_text\": \"I have folders \\n c:\\\\users\\\\public documents\\\\HPCC Systems\\\\ECL\\\\My Files\\n c:\\\\users\\\\public documents\\\\HPCC Systems\\\\ECL\\\\Samples\\n\\nMy Files was not there, I created it. Samples was there.\\n\\nI can not create a folder 'documents' Could this be the source of the problem?\\n\\nTo be clear, I am using a virgin computer that has never seen an IDE that is not the cummunity edition. So the folder that you refer to that I should rename to 'old' does not exist on my system.\", \"post_time\": \"2011-07-13 15:28:56\" },\n\t{ \"post_id\": 138, \"topic_id\": 60, \"forum_id\": 14, \"post_subject\": \"Re: Can not save to the repository.\", \"username\": \"sort\", \"post_text\": \"Based on another forum post, I assume you are trying to run the ECL IDE on a windows 7 machine.\\nCan you verify that you have the following directories:\\nc:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\nc:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\Samples\\n\\nThey should be defined in the compiler tab in the IDE.\\n\\nIt also sounds like at some poit you may have upgraded from earlier version of the IDE. Can you stop the IDE, rename the following directory (replace xxxx with your username) from \\nc:\\\\Users\\\\xxxx\\\\AppData\\\\Roaming\\\\HPCC Systems\\\\eclide\\nto \\nc:\\\\Users\\\\xxxx\\\\AppData\\\\Roaming\\\\HPCC Systems\\\\eclide_old\\nand start the IDE again.\\n\\nI think this issue and the issue with locating the eclcc.exe issue are related\", \"post_time\": \"2011-07-13 00:39:47\" },\n\t{ \"post_id\": 137, \"topic_id\": 60, \"forum_id\": 14, \"post_subject\": \"Can not save to the repository.\", \"username\": \"SkipSnow\", \"post_text\": \"Repository does not allow me to add attributes or modules, and it is empty.\\n\\nI can spray in a file I get from a public data source (CSV) and create a data set and use it, but I can not save any code to the repository.\\n\\nI do not have a folder named 'myFiles' in the repository, which in so much as I can see I am supposed to have as per the manual? How can I use the repository? \\n\\nThanks in advance for the help.\", \"post_time\": \"2011-07-12 23:53:24\" },\n\t{ \"post_id\": 165, \"topic_id\": 67, \"forum_id\": 14, \"post_subject\": \"Re: Upgrading my Virtual Machine install - IP address\", \"username\": \"bforeman\", \"post_text\": \"OK, Thanks for the reply. So when a new IP address is assigned, we need to spray again. \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2011-07-18 15:24:31\" },\n\t{ \"post_id\": 164, \"topic_id\": 67, \"forum_id\": 14, \"post_subject\": \"Re: Upgrading my Virtual Machine install - IP address\", \"username\": \"sort\", \"post_text\": \"Bob,\\n There is nothing in our VM image to prevent this, but as far as we know whenever you install a new VM, you get an new IP and new disk from VMWare\", \"post_time\": \"2011-07-18 14:18:41\" },\n\t{ \"post_id\": 162, \"topic_id\": 67, \"forum_id\": 14, \"post_subject\": \"Upgrading my Virtual Machine install - IP address\", \"username\": \"bforeman\", \"post_text\": \"Hi team,\\n\\nI want to upgrade my VM install to the latest 3.0.2. How do I salvage my files that are already sprayed to MyThor from the previous VM install? When I upgrade, the IPv4 address is autmatically incremented by one.\\n\\nExample: 192.168.159.130\\nAfter upgrade: 192.168.159.131 \\n\\nIn other words, can I simply reset my IPv4 Address back to the old address?\\n\\nIn the VMware Player, what option would I select to do that (if possible)?\\n\\nThanks!\\n\\nBob Foreman\", \"post_time\": \"2011-07-15 16:02:41\" },\n\t{ \"post_id\": 264, \"topic_id\": 72, \"forum_id\": 14, \"post_subject\": \"Re: Install under Ubuntu 11 - claims package is low quality\", \"username\": \"dabayliss\", \"post_text\": \"This is now working well - thank you ...\\n\\nDavid\", \"post_time\": \"2011-08-11 14:44:30\" },\n\t{ \"post_id\": 263, \"topic_id\": 72, \"forum_id\": 14, \"post_subject\": \"Re: Install under Ubuntu 11 - claims package is low quality\", \"username\": \"richardkchapman\", \"post_text\": \"It seems the root cause of the failure to start was that something (not one of our programs, as far as I can tell) had during a failed installation replaced the standard start-stop-daemon program with a dummy (leaving the original in start-stop-daemon.REAL)\\n\\nSome installs apparently do this to temporarily disable services from starting, but should not leave it in this state. Googling for start-stop-daemon.REAL brings up a number of examples of other people (and other programs) suffering from similar issues.\\n\\nWe can look into putting some sort of check in to detect this problem and make it more obvious what is happening (though we can't really detect EVERY way in which a system may have been messed up by other programs' failed installs...)\", \"post_time\": \"2011-08-11 14:37:50\" },\n\t{ \"post_id\": 232, \"topic_id\": 72, \"forum_id\": 14, \"post_subject\": \"Re: Install under Ubuntu 11 - claims package is low quality\", \"username\": \"dabayliss\", \"post_text\": \"You mean nobody -else- types filenames into Bash .... remember my lifetime Linux experience was <4 hours at the point I was doing that ....\", \"post_time\": \"2011-08-05 14:24:38\" },\n\t{ \"post_id\": 230, \"topic_id\": 72, \"forum_id\": 14, \"post_subject\": \"Re: Install under Ubuntu 11 - claims package is low quality\", \"username\": \"richardkchapman\", \"post_text\": \"[quote="dabayliss":2kui2xum]<btw guys - that is a HORRIBLE name if you are forcing people to type it all in!>\\n\\nNobody types in filenames in bash... The Tab key is your friend.\", \"post_time\": \"2011-08-05 14:17:30\" },\n\t{ \"post_id\": 185, \"topic_id\": 72, \"forum_id\": 14, \"post_subject\": \"Re: Install under Ubuntu 11 - claims package is low quality\", \"username\": \"dabayliss\", \"post_text\": \"/var/lib/HPCCSystems - drwxr-xr-x\\nall of the directories underneath drwxr-xr-x\\nexcept mydropzone drwxrwxrwx\\n\\n/var/log/HPCCSystems - drwxr-xr-x\\nall of the directories underneath drwxr-xr-x\\n\\n/var/run/HPCCSystems - this directory does not exist - I'm going to guess that is a bad thing: right?\\nI do have /var/run with permissions drwxr-xr-x\\n\\n/var/lock/HPCCSystems - I don't have this one either. Again I do have the lock directory with permissions:drwxrwxrwx\\n\\nAll of the directories I had had owner and group hpcc\\n\\nHTH\\n\\nDavid\\n\\n[quote="pschwartz":1qq4hap9]Can you provide a list of permissions on the following directories and all subdirectories?\\n\\n/var/lib/HPCCSystems\\n/var/log/HPCCSystems\\n/var/run/HPCCSystems\\n/var/lock/HPCCSystems\", \"post_time\": \"2011-07-22 14:15:18\" },\n\t{ \"post_id\": 184, \"topic_id\": 72, \"forum_id\": 14, \"post_subject\": \"Re: Install under Ubuntu 11 - claims package is low quality\", \"username\": \"pschwartz\", \"post_text\": \"Can you provide a list of permissions on the following directories and all subdirectories?\\n\\n/var/lib/HPCCSystems\\n/var/log/HPCCSystems\\n/var/run/HPCCSystems\\n/var/lock/HPCCSystems\", \"post_time\": \"2011-07-22 12:54:11\" },\n\t{ \"post_id\": 183, \"topic_id\": 72, \"forum_id\": 14, \"post_subject\": \"Re: Install under Ubuntu 11 - claims package is low quality\", \"username\": \"dabayliss\", \"post_text\": \"Will not allow me to even try to start the script as user hpcc (claims I don't have priveleges. They all still fail - here is the mydali log; I can get any other logs needed ...\\n\\nCOMMAND:: /sbin//start-stop-daemon -S -p /var/run/HPCCSystems/mydali_init.pid -c hpcc:hpcc -d /var/lib/HPCCSystems/mydali -m -x /opt/HPCCSystems/bin/init_dali -b >>/var/log/HPCCSystems/mydali/mydali_init.log 2>&1 ::Issued at Thu Jul 21 19:54:27 EDT 2011\", \"post_time\": \"2011-07-22 00:04:44\" },\n\t{ \"post_id\": 181, \"topic_id\": 72, \"forum_id\": 14, \"post_subject\": \"Re: Install under Ubuntu 11 - claims package is low quality\", \"username\": \"dabayliss\", \"post_text\": \"Ok - I am using the Desktop.\\n\\nInitially I had used the software center to install - and it complained as stated.\\n\\nI then went and download a shell and used the dpkg and it all claimed to install nicely <btw guys - that is a HORRIBLE name if you are forcing people to type it all in!>\\n\\nThe I tried to start things using:\\n\\nsudo service hpcc-init start\\n\\nthe system attempts to install - stepping through every component - every one of which fails.\\n\\nThe docs say the logs will explain why it is not working; they probably do - but not to me. What should I be looking for? Am I supposed to switch to the hpcc user before running the startup?\", \"post_time\": \"2011-07-21 18:28:38\" },\n\t{ \"post_id\": 180, \"topic_id\": 72, \"forum_id\": 14, \"post_subject\": \"Re: Install under Ubuntu 11 - claims package is low quality\", \"username\": \"jonburger\", \"post_text\": \"Hmm.... That's going to be a problem. The install normally creates a user "HPCC" that has the uid of 1000. \\n\\nThree potential solutions I see:\\n\\n1. Skip on by the message as it should be fine once the post-install creates the HPCC user under uid 1000.\\n\\n2. Files get installed as root, then 'chown'ed to HPCC during post install script once the user gets created.\\n\\n3. Documentation might need to be modified to include creating the HPCC user with uid of 1000 prior to install.\\n\\nLikely you are not installing on Ubuntu "Server" and installing on Ubuntu "Desktop" which may have some extra checks for packages.\\n\\nCheers!\", \"post_time\": \"2011-07-21 18:11:07\" },\n\t{ \"post_id\": 179, \"topic_id\": 72, \"forum_id\": 14, \"post_subject\": \"Re: Install under Ubuntu 11 - claims package is low quality\", \"username\": \"pschwartz\", \"post_text\": \"What dpkg command did you use to do the install?\\n\\nFor example did you use just `sudo dpkg -i <package>` or did you add other options to the command?\", \"post_time\": \"2011-07-21 17:57:10\" },\n\t{ \"post_id\": 178, \"topic_id\": 72, \"forum_id\": 14, \"post_subject\": \"Install under Ubuntu 11 - claims package is low quality\", \"username\": \"dabayliss\", \"post_text\": \"Ok - first a disclaimer - I installed Ubuntu so that I can install HPCC - so not exactly a power-user here ...\\n\\nHowever - I installed all of the dependencies (at least the ones in the docs) and then told it to install. It download and started but then claimed the package was low quality and told me to tell you this:\\nLintian check results for /tmp/hpccsystems-platform-community_3.0.3-3natty_amd64.deb:\\nE: hpccsystems-platform: wrong-file-owner-uid-or-gid etc/ 1000/1000\\n\\nE: hpccsystems-platform: wrong-file-owner-uid-or-gid etc/HPCCSystems/ 1000/1000\\n\\nE: hpccsystems-platform: wrong-file-owner-uid-or-gid etc/HPCCSystems/rpmnew/ 1000/1000\\n\\nE: hpccsystems-platform: wrong-file-owner-uid-or-gid etc/HPCCSystems/rpmnew/environment.conf 1000/1000\\n\\nE: hpccsystems-platform: wrong-file-owner-uid-or-gid etc/HPCCSystems/rpmnew/genenvrules.conf 1000/1000\\n\\nE: hpccsystems-platform: wrong-file-owner-uid-or-gid opt/ 1000/1000\\n\\nE: hpccsystems-platform: wrong-file-owner-uid-or-gid opt/HPCCSystems/ 1000/1000\\n\\nE: hpccsystems-platform: wrong-file-owner-uid-or-gid opt/HPCCSystems/LICENSE.txt 1000/1000\\n\\nE: hpccsystems-platform: wrong-file-owner-uid-or-gid opt/HPCCSystems/bin/ 1000/1000\\n\\nE: hpccsystems-platform: wrong-file-owner-uid-or-gid opt/HPCCSystems/bin/agentexec 1000/1000\\n\\nE: hpccsystems-platform: wrong-file-owner-uid-or-gid opt/HPCCSystems/bin/backupnode 1000/1000\\n\\nE: hpccsystems-platform: wrong-file-owner-uid-or-gid opt/HPCCSystems/bin/check_one_dafilesrv 1000/1000\\n\\nE: hpccsystems-platform: wrong-file-owner-uid-or-gid opt/HPCCSystems/bin/combine 1000/1000\\n\\nE: hpccsystems-platform: wrong-file-owner-uid-or-gid opt/HPCCSystems/bin/copyexp 1000/1000\\n\\nE: hpccsystems-platform: wrong-file-owner-uid-or-gid opt/HPCCSystems/bin/dafilesrv 1000/1000\\n\\n--- curtailed due to message size restrictions\\n\\nE: hpccsystems-platform: wrong-file-owner-uid-or-gid opt/HPCCSystems/share/ecllibrary/teststd/uni/TestWordCount.ecl 1000/1000\\n\\n\\nCan I do an 'ignore and continue' or do we have an issue? (Using Ubuntu 11.04)\\n\\nDavid\", \"post_time\": \"2011-07-21 17:32:28\" },\n\t{ \"post_id\": 1122, \"topic_id\": 75, \"forum_id\": 14, \"post_subject\": \"Re: SOAP problem again\", \"username\": \"gsmith\", \"post_text\": \"BTW These are my HPCCBIN/HPCCECL environment settings:\\n\\nD:\\\\temp>set HPCCBIN\\nHPCCBIN=C:\\\\Program Files (x86)\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_0\\\\\\n\\nD:\\\\temp>set HPCCECL\\nHPCCECL=C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\\", \"post_time\": \"2012-02-17 10:47:35\" },\n\t{ \"post_id\": 1121, \"topic_id\": 75, \"forum_id\": 14, \"post_subject\": \"Re: SOAP problem again\", \"username\": \"gsmith\", \"post_text\": \"I have attached two images from the prefs window. One is for the IP entry - note that it is the IP only...\\n\\nThe second is what your compiler window should roughly look like. If it is empty try pressing the "Reset" button to load defaults (these come from environment variables HPCCBIN and HPCCECL). If it is still empty you may need to reboot after the install or manually set HPCCBIN/HPCCECL.\\n\\nEdit: Please ignore the "-main" entry, it was for local testing only.\", \"post_time\": \"2012-02-17 10:43:06\" },\n\t{ \"post_id\": 1120, \"topic_id\": 75, \"forum_id\": 14, \"post_subject\": \"Re: SOAP problem again\", \"username\": \"vamshi123\", \"post_text\": \"hi,\\n am too facing the same problem, i checked with compiler path but it's not working. Even after reinstalling am unable to get output in ECL IDE. Please help me to solve it.\\n Thank you for your time.\", \"post_time\": \"2012-02-17 10:35:46\" },\n\t{ \"post_id\": 209, \"topic_id\": 75, \"forum_id\": 14, \"post_subject\": \"Re: SOAP problem again\", \"username\": \"richard.taylor@lexisnexis.com\", \"post_text\": \"Here's a wild guess -- when you configured the IDE with the IP to hit, did you include the port in that? If so, you should remove the port. The Preferences dialog's Server entry control only wants the IP to hit (the IDE already knows what ports it can use).\", \"post_time\": \"2011-07-28 20:23:49\" },\n\t{ \"post_id\": 196, \"topic_id\": 75, \"forum_id\": 14, \"post_subject\": \"Re: SOAP problem again\", \"username\": \"hli\", \"post_text\": \"Oh, I used 'hpccdemo' account and got the same error.\", \"post_time\": \"2011-07-27 18:51:32\" },\n\t{ \"post_id\": 195, \"topic_id\": 75, \"forum_id\": 14, \"post_subject\": \"SOAP problem again\", \"username\": \"hli\", \"post_text\": \"I posted my problem in the other thread about ECL IDE problem, but nobody answered it. I guess I'd better create a new one:\\n\\nI got the following SOAP problem when I tried to use ECL IDE:\\n\\nWarning: SOAP 1.1 fault: SOAP-ENV:Client[no subcode]\\n“An HTTP processing error occurred” \\nDetail: [no detail]\\n\\nI have the same problem even after I restarted the HPCC server on my virtual machine (and rebooted my vm).\\n\\nThe problem is that when I tried to log on my server via ECL IDE, I was asked username and password. As an user of community version, I do not have user/passwd. so, I just specified the IP and took the default option. Then, I got the same ECL IDE Soap Problems.\\n\\nI am sure the IP I used was correct and I could browse EclWatch through the IP.\", \"post_time\": \"2011-07-27 17:48:08\" },\n\t{ \"post_id\": 234, \"topic_id\": 83, \"forum_id\": 14, \"post_subject\": \"VirtualBoxes.org has pre-built images of OSS systems\", \"username\": \"MAPstr\", \"post_text\": \"I'm not sure if you can use this, but...\\n\\nVirtualboxes has free ready-to-use virtual machine images of open-source operating systems, including CentOS, OpenSUSE and UBUNTU.\\n\\nhttp://virtualboxes.org/images/\", \"post_time\": \"2011-08-06 13:49:18\" },\n\t{ \"post_id\": 257, \"topic_id\": 84, \"forum_id\": 14, \"post_subject\": \"Re: Ubuntu 11 - Could not resolve address 0.0.0.0\", \"username\": \"richardkchapman\", \"post_text\": \"I've opened a bug report to handle cases like this better. We will default to using the first non-loopback interface that gives us a non-null ip address, and fall back to loopback if there is no such interface.\", \"post_time\": \"2011-08-11 13:10:36\" },\n\t{ \"post_id\": 255, \"topic_id\": 84, \"forum_id\": 14, \"post_subject\": \"Re: Ubuntu 11 - Could not resolve address 0.0.0.0\", \"username\": \"dabayliss\", \"post_text\": \"Ok - I edited the \\n/etc/HPCCSystems\\n.conf to use wlan0 and my system now seems to be up and happy ...\\n\\nThank you ...\\n\\nDavid\", \"post_time\": \"2011-08-11 12:40:26\" },\n\t{ \"post_id\": 254, \"topic_id\": 84, \"forum_id\": 14, \"post_subject\": \"Re: Ubuntu 11 - Could not resolve address 0.0.0.0\", \"username\": \"dabayliss\", \"post_text\": \">>/opt/HPCCSystems/sbin/get_ip_address.sh\\n\\nSays 'completed' but only outputs one blank character ...\\n\\n>>cat /env/HPCCSystems/environment.conf\\n\\nSays - error no such file or directory\\nI did a search - and I do have 3 environment.conf in other places - all have the interface=eth0 line. The places are:\\n/etc/HPCCSystems\\n/etc/HPCCSystems/rpmnew\\n/opt/HPCCSystems/etc/HPCCSystems\\n\\n>>hostname -I\\n192.168.2.4 \\n\\n>>ifconfig\\n\\neth0 Link encap:Ethernet HWaddr 18:a9:05:1a:9f:6b \\n UP BROADCAST MULTICAST MTU:1500 Metric:1\\n RX packets:0 errors:0 dropped:0 overruns:0 frame:0\\n TX packets:0 errors:0 dropped:0 overruns:0 carrier:0\\n collisions:0 txqueuelen:1000 \\n RX bytes:0 (0.0 B) TX bytes:0 (0.0 B)\\n Interrupt:42 Base address:0xc000 \\n\\nlo Link encap:Local Loopback \\n inet addr:127.0.0.1 Mask:255.0.0.0\\n inet6 addr: ::1/128 Scope:Host\\n UP LOOPBACK RUNNING MTU:16436 Metric:1\\n RX packets:20 errors:0 dropped:0 overruns:0 frame:0\\n TX packets:20 errors:0 dropped:0 overruns:0 carrier:0\\n collisions:0 txqueuelen:0 \\n RX bytes:1200 (1.2 KB) TX bytes:1200 (1.2 KB)\\n\\nwlan0 Link encap:Ethernet HWaddr 00:26:82:25:70:1d \\n inet addr:192.168.2.4 Bcast:192.168.2.255 Mask:255.255.255.0\\n inet6 addr: fe80::226:82ff:fe25:701d/64 Scope:Link\\n UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1\\n RX packets:4815 errors:0 dropped:0 overruns:0 frame:0\\n TX packets:2816 errors:0 dropped:0 overruns:0 carrier:0\\n collisions:0 txqueuelen:1000 \\n RX bytes:3244579 (3.2 MB) TX bytes:537115 (537.1 KB)\\n\\nThis is a 'best buy' box; it comes with an ethernet port - but we don't use it. The machines are connected wirelessly.\\n\\nDavid\", \"post_time\": \"2011-08-11 12:30:27\" },\n\t{ \"post_id\": 252, \"topic_id\": 84, \"forum_id\": 14, \"post_subject\": \"Re: Ubuntu 11 - Could not resolve address 0.0.0.0\", \"username\": \"richardkchapman\", \"post_text\": \"The roxie error suggests that your machine's ip address has been expanded to 0.0.0.0 by the code that generates the RoxieTopology.xml file from your environment.xml\\n\\nCan you tell me what the following commands output (some may need to be run as root):\\n\\n/opt/HPCCSystems/sbin/get_ip_address.sh\\ncat /env/HPCCSystems/environment.conf\\nhostname -I\\nifconfig\\n\\nI suspect that the environment.conf file specifies interface=eth0, and that you don't have an eth0 interface for some reason. If I change my environment.conf to use an interface that does not exist I can reproduce the same behaviour you are observing. Setting interface= in environment.conf to the name of your primary network interface (as output by ifconfig) should allow you to proceed further (you can use interface=lo if you are only intending to run on a single node, if no other interface is appropriate).\", \"post_time\": \"2011-08-11 09:57:11\" },\n\t{ \"post_id\": 251, \"topic_id\": 84, \"forum_id\": 14, \"post_subject\": \"Re: Ubuntu 11 - Could not resolve address 0.0.0.0\", \"username\": \"richardkchapman\", \"post_text\": \"I just did the following:\\n\\nCreated a new virtual machine (bridged network, 8Gb HDD, 1Gb memory)\\nInstalled Ubuntu 11.04 Desktop - 64bit - using default options as far as possible\\nDownloaded hpccsystems-platform-community_3.0.4-8lucid_amd64.deb from hpccsystems.com using firefox (I selected the option to save rather than to open with the Ubuntu software centre).\\nOpened a terminal window and used the following commands:\\n\\n
\\ncd Downloads\\nsudo dpkg -i hpccsystems-platform-community_3.0.4-8lucid_amd64.deb\\n
\\nGot a message about some missing dependencies, so I ran\\n\\nsudo apt-get -f install\\n
\\nwhich installed the missing dependencies and ran the rest of the hpcc install process. Then I ran\\n\\nsudo service hpcc-init start\\n
\\nand everything started successfully.\\n\\nAnything different to what you did?\", \"post_time\": \"2011-08-11 08:47:29\" },\n\t{ \"post_id\": 249, \"topic_id\": 84, \"forum_id\": 14, \"post_subject\": \"Re: Ubuntu 11 - Could not resolve address 0.0.0.0\", \"username\": \"dabayliss\", \"post_text\": \"Linux ubuntu 2.6.38-10-generic #46-Ubuntu SMP Tue Jun 28 15:07:17 UTC 2011 x86_64 x86_64 x86_64 GNU/Linux\", \"post_time\": \"2011-08-09 19:44:18\" },\n\t{ \"post_id\": 247, \"topic_id\": 84, \"forum_id\": 14, \"post_subject\": \"Re: Ubuntu 11 - Could not resolve address 0.0.0.0\", \"username\": \"richardkchapman\", \"post_text\": \"What does uname -a say?\", \"post_time\": \"2011-08-09 16:09:14\" },\n\t{ \"post_id\": 246, \"topic_id\": 84, \"forum_id\": 14, \"post_subject\": \"Re: Ubuntu 11 - Could not resolve address 0.0.0.0\", \"username\": \"dabayliss\", \"post_text\": \"This was an absolutely clean - never installed HPCC - Ubuntu 11.04 Desktop\\n\\nI am going to try to 'undo' the other one this evening ...\\n\\nDavid\", \"post_time\": \"2011-08-09 15:28:48\" },\n\t{ \"post_id\": 245, \"topic_id\": 84, \"forum_id\": 14, \"post_subject\": \"Re: Ubuntu 11 - Could not resolve address 0.0.0.0\", \"username\": \"richardkchapman\", \"post_text\": \"And this was a clean install of Ubuntu, that you have not tried to install HPCC on before?\\n\\n(Ubuntu 11.04 Desktop, right?)\", \"post_time\": \"2011-08-09 12:59:32\" },\n\t{ \"post_id\": 244, \"topic_id\": 84, \"forum_id\": 14, \"post_subject\": \"Re: Ubuntu 11 - Could not resolve address 0.0.0.0\", \"username\": \"dabayliss\", \"post_text\": \"Everything is 'default out of the box' - Ubuntu and HPCC.\\n\\nIn the HPCC case I followed the instructions on the download page - so:\\n\\nInstalled the dependencies\\nsudo dpkg -i nasty-file-name\\nsudo path/keygen.sh\\nsudo service hpcc-init start\\n\\nhostname -i returns:\\n\\n127.0.1.1\\n\\nDavid\", \"post_time\": \"2011-08-09 12:55:58\" },\n\t{ \"post_id\": 243, \"topic_id\": 84, \"forum_id\": 14, \"post_subject\": \"Re: Ubuntu 11 - Could not resolve address 0.0.0.0\", \"username\": \"richardkchapman\", \"post_text\": \"Had you configured the environment using configmgr or was this using the default 1-way environment straight from installing the .deb file?\\n\\nCan you tell me what hostname -i reports?\", \"post_time\": \"2011-08-09 12:40:41\" },\n\t{ \"post_id\": 242, \"topic_id\": 84, \"forum_id\": 14, \"post_subject\": \"Ubuntu 11 - Could not resolve address 0.0.0.0\", \"username\": \"dabayliss\", \"post_text\": \"Decided to 'Ubuntu-ize' a second machine to see if it had more success with the system.\\n\\nIt did - most of the systems initialized perfectly - two didn't - dali and roxie. I have attached the full logs but the germane bit seems to be:\\n\\n00000050 2011-08-08 18:31:23 3167 3167 "EXCEPTION: (1455): Could not resolve address 0.0.0.0"\\nIn the roxie log and\\n00000046 2011-08-08 19:16:09 8821 8821 "ERROR: Exception - Failed to load main store : CreateIFile cannot resolve //0.0.0.0/var/lib/HPCCSystems/hpcc-mirror/dali/"\\n00000047 2011-08-08 19:16:09 8821 8821 "Failed to load main store"\\n\\nI suspect it is the 0.0.0.0 that is giving a problem ....\\n\\nWhat do I need to tweak?\\n\\ndali:\\n00000000 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... daldap.cpp $ $Id: daldap.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000001 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... client.cpp $ $Id: daclient.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000002 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... acoven.cpp $ $Id: dacoven.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000003 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... dacsds.cpp $ $Id: dacsds.cpp 62962 2011-03-04 11:10:12Z jsmith $"\\n00000004 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... /dadfs.cpp $ $Id: dadfs.cpp 63465 2011-03-25 10:43:19Z nhicks $"\\n00000005 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... adiags.cpp $ $Id: dadiags.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000006 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... /danqs.cpp $ $Id: danqs.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000007 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... /dasds.cpp $ $Id: dasds.cpp 63291 2011-03-18 16:21:00Z jsmith $"\\n00000008 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... dasess.cpp $ $Id: dasess.cpp 64657 2011-05-18 11:46:07Z jsmith $"\\n00000009 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... dasubs.cpp $ $Id: dasubs.cpp 62376 2011-02-04 21:59:58Z sort $"\\n0000000A 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... mtfile.cpp $ $Id: rmtfile.cpp 64457 2011-05-09 17:28:42Z yma $"\\n0000000B 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... mtpass.cpp $ $Id: rmtpass.cpp 62376 2011-02-04 21:59:58Z sort $"\\n0000000C 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... tspawn.cpp $ $Id: rmtspawn.cpp 64028 2011-04-14 14:28:10Z nhicks $"\\n0000000D 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... rmtssh.cpp $ $Id: rmtssh.cpp 64028 2011-04-14 14:28:10Z nhicks $"\\n0000000E 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/trunk/co ... mtsmtp.cpp $ $Id: rmtfile.cpp 59036 2010-08-31 17:54:39Z nhicks $"\\n0000000F 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... ckfile.cpp $ $Id: sockfile.cpp 62595 2011-02-17 14:30:45Z rchapman $"\\n00000010 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... hrpcmp.cpp $ $Id: hrpcmp.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000011 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... pcsock.cpp $ $Id: hrpcsock.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000012 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... pcutil.cpp $ $Id: hrpcutil.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000013 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... mpbase.cpp $ $Id: mpbase.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000014 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... mpcomm.cpp $ $Id: mpcomm.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000015 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... /mplog.cpp $ $Id: mplog.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000016 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... jarray.cpp $ $Id: jarray.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000017 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... vahash.cpp $ $Id: javahash.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000018 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... socket.cpp $ $Id: jbsocket.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000019 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... /jbuff.cpp $ $Id: jbuff.cpp 62376 2011-02-04 21:59:58Z sort $"\\n0000001A 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... /jcomp.cpp $ $Id: jcomp.cpp 65606 2011-06-20 19:10:47Z clo $"\\n0000001B 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... b/jcrc.cpp $ $Id: jcrc.cpp 62376 2011-02-04 21:59:58Z sort $"\\n0000001C 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... jdebug.cpp $ $Id: jdebug.cpp 65040 2011-06-01 14:47:31Z ghalliday $"\\n0000001D 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... except.cpp $ $Id: jexcept.cpp 65025 2011-06-01 11:23:50Z rchapman $"\\n0000001E 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... /jfile.cpp $ $Id: jfile.cpp 63177 2011-03-14 16:28:18Z nhicks $"\\n0000001F 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... /jhash.cpp $ $Id: jhash.cpp 62965 2011-03-04 12:34:40Z ghalliday $"\\n00000020 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... jiface.cpp $ $Id: jiface.cpp 62933 2011-03-03 17:00:04Z nhicks $"\\n00000021 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... ib/jio.cpp $ $Id: jio.cpp 63504 2011-03-26 13:46:15Z nhicks $"\\n00000022 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... /jiter.cpp $ $Id: jiter.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000023 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... b/jlib.cpp $ $Id: jlib.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000024 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... b/jlog.cpp $ $Id: jlog.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000025 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... b/jlzw.cpp $ $Id: jlzw.cpp 64790 2011-05-20 15:20:41Z jsmith $"\\n00000026 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... emleak.cpp $ $Id: jmemleak.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000027 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... /jmisc.cpp $ $Id: jmisc.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000028 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... jmutex.cpp $ $Id: jmutex.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000029 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... bserve.cpp $ $Id: jobserve.cpp 62376 2011-02-04 21:59:58Z sort $"\\n0000002A 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... /jprop.cpp $ $Id: jprop.cpp 62965 2011-03-04 12:34:40Z ghalliday $"\\n0000002B 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... jptree.cpp $ $Id: jptree.cpp 62962 2011-03-04 11:10:12Z jsmith $"\\n0000002C 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... regexp.cpp $ $Id: jregexp.cpp 62376 2011-02-04 21:59:58Z sort $"\\n0000002D 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... b/jsem.cpp $ $Id: jsem.cpp 62376 2011-02-04 21:59:58Z sort $"\\n0000002E 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... b/jset.cpp $ $Id: jset.cpp 62376 2011-02-04 21:59:58Z sort $"\\n0000002F 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... rtsock.cpp $ $Id: jsmartsock.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000030 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... /jsort.cpp $ $Id: jsort.cpp 63168 2011-03-14 11:27:12Z nhicks $"\\n00000031 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... jstats.cpp $ $Id: jstats.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000032 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... stream.cpp $ $Id: jstream.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000033 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... string.cpp $ $Id: jstring.cpp 62922 2011-03-03 14:18:20Z nhicks $"\\n00000034 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... erhash.cpp $ $Id: jsuperhash.cpp 64069 2011-04-18 14:15:50Z rchapman $"\\n00000035 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... thread.cpp $ $Id: jthread.cpp 64340 2011-05-03 09:41:15Z rchapman $"\\n00000036 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... /jtime.cpp $ $Id: jtime.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000037 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... nicode.cpp $ $Id: junicode.cpp 64453 2011-05-09 08:23:35Z ghalliday $"\\n00000038 2011-08-08 19:18:16 9104 9104 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... /jutil.cpp $ $Id: jutil.cpp 64815 2011-05-20 22:01:08Z sort $"\\n00000039 2011-08-08 19:18:16 9104 9104 "Checking backup location: /var/lib/HPCCSystems/hpcc-mirror/dali/"\\n0000003A 2011-08-08 19:18:16 9104 9104 "WARNING: Local path used for backup url: /var/lib/HPCCSystems/hpcc-mirror/dali/"\\n0000003B 2011-08-08 19:18:16 9104 9104 "Backup URL = //0.0.0.0/var/lib/HPCCSystems/hpcc-mirror/dali/"\\n0000003C 2011-08-08 19:18:16 9104 9104 "Server Version = 3.7, required minimum client version 1.5"\\n0000003D 2011-08-08 19:18:16 9104 9104 "ERROR: /home/builds/64bit_11.04_community_3.0.4/ecl_community_3.0.4/dali/base/dacoven.cpp(515) : writing coven backup : CreateIFile cannot resolve //0.0.0.0/var/lib/HPCCSystems/hpcc-mirror/dali/dalicoven.xml"\\n0000003E 2011-08-08 19:18:16 9104 9104 "ERROR: /home/builds/64bit_11.04_community_3.0.4/ecl_community_3.0.4/dali/base/dacoven.cpp(515) : writing coven backup : CreateIFile cannot resolve //0.0.0.0/var/lib/HPCCSystems/hpcc-mirror/dali/dalicoven.xml"\\n0000003F 2011-08-08 19:18:16 9104 9104 "BackupHandler started, async=false"\\n00000040 2011-08-08 19:18:16 9104 9104 "loading store 0, storedCrc=0"\\n00000041 2011-08-08 19:18:16 9104 9104 "Store 0 does not exist, creating new store"\\n00000042 2011-08-08 19:18:16 9104 9104 "store loaded"\\n00000043 2011-08-08 19:18:16 9104 9104 "loading external Environment from: /etc/HPCCSystems/environment.xml"\\n00000044 2011-08-08 19:18:16 9104 9104 "Scanning store for external references"\\n00000045 2011-08-08 19:18:16 9104 9104 "External reference count = 0"\\n00000046 2011-08-08 19:18:16 9104 9104 "ERROR: Exception - Failed to load main store : CreateIFile cannot resolve //0.0.0.0/var/lib/HPCCSystems/hpcc-mirror/dali/"\\n00000047 2011-08-08 19:18:16 9104 9104 "Failed to load main store"\\n00000048 2011-08-08 19:18:16 9104 9104 "ERROR: /home/builds/64bit_11.04_community_3.0.4/ecl_community_3.0.4/dali/server/daserver.cpp(420) : Failed whilst starting servers : CreateIFile cannot resolve //0.0.0.0/var/lib/HPCCSystems/hpcc-mirror/dali/"\\n00000049 2011-08-08 19:18:16 9104 9104 "Suspending 6"\\n0000004A 2011-08-08 19:18:16 9104 9104 "Suspending 5"\\n0000004B 2011-08-08 19:18:16 9104 9104 "Suspending 4"\\n0000004C 2011-08-08 19:18:16 9104 9104 "Suspending 3"\\n0000004D 2011-08-08 19:18:16 9104 9104 "Suspending 2"\\n0000004E 2011-08-08 19:18:16 9104 9104 "Suspending 1"\\n0000004F 2011-08-08 19:18:16 9104 9104 "Suspending subscriptions"\\n00000050 2011-08-08 19:18:16 9104 9104 "Suspended subscriptions"\\n00000051 2011-08-08 19:18:16 9104 9104 "Suspending 0"\\n00000052 2011-08-08 19:18:16 9104 9104 "Stopping 6"\\n00000053 2011-08-08 19:18:16 9104 9104 "Stopping 5"\\n00000054 2011-08-08 19:18:16 9104 9104 "Stopping 4"\\n00000055 2011-08-08 19:18:16 9104 9104 "Stopping 3"\\n00000056 2011-08-08 19:18:16 9104 9104 "Stopping 2"\\n00000057 2011-08-08 19:18:16 9104 9104 "Stopping 1"\\n00000058 2011-08-08 19:18:16 9104 9104 "Stopping 0"\\n00000059 2011-08-08 19:18:16 9104 9111 "BackupHandler stopped"\\n0000005A 2011-08-08 19:18:16 9104 9104 "ERROR: /home/builds/64bit_11.04_community_3.0.4/ecl_community_3.0.4/dali/server/daserver.cpp(460) : Exception : CreateIFile cannot resolve //0.0.0.0/var/lib/HPCCSystems/hpcc-mirror/dali/"\\n\\nRoxie:\\n00000001 2011-08-08 18:31:23 3167 3167 "Roxie starting"\\n00000002 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... server.cpp $ $Id: ccdserver.cpp 65353 2011-06-10 20:38:19Z pschwartz $"\\n00000003 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... litter.cpp $ $Id: csvsplitter.cpp 62567 2011-02-16 16:01:41Z rchapman $"\\n00000004 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... orstep.cpp $ $Id: thorstep.cpp 63546 2011-03-29 12:33:32Z ghalliday $"\\n00000005 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... rstep2.cpp $ $Id: thorstep2.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000006 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... mlread.cpp $ $Id: thorxmlread.cpp 65348 2011-06-10 20:25:49Z pschwartz $"\\n00000007 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... wujobq.cpp $ $Id: wujobq.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000008 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... eftype.cpp $ $Id: deftype.cpp 64094 2011-04-19 08:13:22Z ghalliday $"\\n00000009 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... fvalue.cpp $ $Id: defvalue.cpp 62965 2011-03-04 12:34:40Z ghalliday $"\\n0000000A 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... eclrtl.cpp $ $Id: eclrtl.cpp 64992 2011-05-31 12:36:24Z ghalliday $"\\n0000000B 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... ldistr.cpp $ $Id: rtldistr.cpp 64094 2011-04-19 08:13:22Z ghalliday $"\\n0000000C 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... rtlint.cpp $ $Id: rtlint.cpp 62376 2011-02-04 21:59:58Z sort $"\\n0000000D 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... tlqstr.cpp $ $Id: rtlqstr.cpp 62376 2011-02-04 21:59:58Z sort $"\\n0000000E 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... tlrank.cpp $ $Id: rtlrank.cpp 62376 2011-02-04 21:59:58Z sort $"\\n0000000F 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... lfield.cpp $ $Id: rtlfield.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000010 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... tltype.cpp $ $Id: rtltype.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000011 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... alienv.cpp $ $Id: dalienv.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000012 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... client.cpp $ $Id: daclient.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000013 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... acoven.cpp $ $Id: dacoven.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000014 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... dacsds.cpp $ $Id: dacsds.cpp 62962 2011-03-04 11:10:12Z jsmith $"\\n00000015 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... /dadfs.cpp $ $Id: dadfs.cpp 63465 2011-03-25 10:43:19Z nhicks $"\\n00000016 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... adiags.cpp $ $Id: dadiags.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000017 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... /danqs.cpp $ $Id: danqs.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000018 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... /dasds.cpp $ $Id: dasds.cpp 63291 2011-03-18 16:21:00Z jsmith $"\\n00000019 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... dasess.cpp $ $Id: dasess.cpp 64657 2011-05-18 11:46:07Z jsmith $"\\n0000001A 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... dasubs.cpp $ $Id: dasubs.cpp 62376 2011-02-04 21:59:58Z sort $"\\n0000001B 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... mtfile.cpp $ $Id: rmtfile.cpp 64457 2011-05-09 17:28:42Z yma $"\\n0000001C 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... mtpass.cpp $ $Id: rmtpass.cpp 62376 2011-02-04 21:59:58Z sort $"\\n0000001D 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... tspawn.cpp $ $Id: rmtspawn.cpp 64028 2011-04-14 14:28:10Z nhicks $"\\n0000001E 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... rmtssh.cpp $ $Id: rmtssh.cpp 64028 2011-04-14 14:28:10Z nhicks $"\\n0000001F 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/trunk/co ... mtsmtp.cpp $ $Id: rmtfile.cpp 59036 2010-08-31 17:54:39Z nhicks $"\\n00000020 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... ckfile.cpp $ $Id: sockfile.cpp 62595 2011-02-17 14:30:45Z rchapman $"\\n00000021 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... hrpcmp.cpp $ $Id: hrpcmp.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000022 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... pcsock.cpp $ $Id: hrpcsock.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000023 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... pcutil.cpp $ $Id: hrpcutil.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000024 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... mpbase.cpp $ $Id: mpbase.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000025 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... mpcomm.cpp $ $Id: mpcomm.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000026 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... /mplog.cpp $ $Id: mplog.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000027 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... jarray.cpp $ $Id: jarray.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000028 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... vahash.cpp $ $Id: javahash.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000029 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... socket.cpp $ $Id: jbsocket.cpp 62376 2011-02-04 21:59:58Z sort $"\\n0000002A 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... /jbuff.cpp $ $Id: jbuff.cpp 62376 2011-02-04 21:59:58Z sort $"\\n0000002B 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... /jcomp.cpp $ $Id: jcomp.cpp 65606 2011-06-20 19:10:47Z clo $"\\n0000002C 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... b/jcrc.cpp $ $Id: jcrc.cpp 62376 2011-02-04 21:59:58Z sort $"\\n0000002D 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... jdebug.cpp $ $Id: jdebug.cpp 65040 2011-06-01 14:47:31Z ghalliday $"\\n0000002E 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... except.cpp $ $Id: jexcept.cpp 65025 2011-06-01 11:23:50Z rchapman $"\\n0000002F 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... /jfile.cpp $ $Id: jfile.cpp 63177 2011-03-14 16:28:18Z nhicks $"\\n00000030 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... /jhash.cpp $ $Id: jhash.cpp 62965 2011-03-04 12:34:40Z ghalliday $"\\n00000031 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... jiface.cpp $ $Id: jiface.cpp 62933 2011-03-03 17:00:04Z nhicks $"\\n00000032 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... ib/jio.cpp $ $Id: jio.cpp 63504 2011-03-26 13:46:15Z nhicks $"\\n00000033 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... /jiter.cpp $ $Id: jiter.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000034 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... b/jlib.cpp $ $Id: jlib.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000035 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... b/jlog.cpp $ $Id: jlog.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000036 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... b/jlzw.cpp $ $Id: jlzw.cpp 64790 2011-05-20 15:20:41Z jsmith $"\\n00000037 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... emleak.cpp $ $Id: jmemleak.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000038 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... /jmisc.cpp $ $Id: jmisc.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000039 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... jmutex.cpp $ $Id: jmutex.cpp 62376 2011-02-04 21:59:58Z sort $"\\n0000003A 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... bserve.cpp $ $Id: jobserve.cpp 62376 2011-02-04 21:59:58Z sort $"\\n0000003B 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... /jprop.cpp $ $Id: jprop.cpp 62965 2011-03-04 12:34:40Z ghalliday $"\\n0000003C 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... jptree.cpp $ $Id: jptree.cpp 62962 2011-03-04 11:10:12Z jsmith $"\\n0000003D 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... regexp.cpp $ $Id: jregexp.cpp 62376 2011-02-04 21:59:58Z sort $"\\n0000003E 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... b/jsem.cpp $ $Id: jsem.cpp 62376 2011-02-04 21:59:58Z sort $"\\n0000003F 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... b/jset.cpp $ $Id: jset.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000040 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... rtsock.cpp $ $Id: jsmartsock.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000041 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... /jsort.cpp $ $Id: jsort.cpp 63168 2011-03-14 11:27:12Z nhicks $"\\n00000042 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... jstats.cpp $ $Id: jstats.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000043 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... stream.cpp $ $Id: jstream.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000044 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... string.cpp $ $Id: jstring.cpp 62922 2011-03-03 14:18:20Z nhicks $"\\n00000045 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... erhash.cpp $ $Id: jsuperhash.cpp 64069 2011-04-18 14:15:50Z rchapman $"\\n00000046 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... thread.cpp $ $Id: jthread.cpp 64340 2011-05-03 09:41:15Z rchapman $"\\n00000047 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... /jtime.cpp $ $Id: jtime.cpp 62376 2011-02-04 21:59:58Z sort $"\\n00000048 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... nicode.cpp $ $Id: junicode.cpp 64453 2011-05-09 08:23:35Z ghalliday $"\\n00000049 2011-08-08 18:31:23 3167 3167 "$HeadURL: https://svn.br.seisint.com/ecl/tags/com ... /jutil.cpp $ $Id: jutil.cpp 64815 2011-05-20 22:01:08Z sort $"\\n0000004A 2011-08-08 18:31:23 3167 3167 "community_3.0.4-10 log = /var/log/HPCCSystems/myroxie/roxie.08_08_2011_18_31_23.log originalLogName = 08_08_2011_18_31_23 logBaseName = /var/log/HPCCSystems/myroxie/roxie alias = /var/log/HPCCSystems/myroxie/roxie.log haslogfile = 1 timescan = 1 timediff = 0 "\\n0000004B 2011-08-08 18:31:23 3167 3167 "RoxieMemMgr: Setting memory limit to 1073741824 bytes (1024 pages)"\\n0000004C 2011-08-08 18:31:23 3167 3167 "RoxieMemMgr: 1024 Pages successfully allocated for the pool - memsize=1073741824 base=0x7f2687f00000 alignment=1048576 bitmapSize=32"\\n0000004D 2011-08-08 18:31:23 3167 3167 "Current Hardware Info: CPUs=4, speed=2600 MHz, Mem=7742 MB , primDisk=0 GB, primFree=0 GB, secDisk=0 GB, secFree=0 GB, NIC=0"\\n0000004E 2011-08-08 18:31:23 3167 3171 "Background copy thread 693d30 starting"\\n0000004F 2011-08-08 18:31:23 3167 3172 "HandleCloser thread 693d30 starting"\\n00000050 2011-08-08 18:31:23 3167 3167 "EXCEPTION: (1455): Could not resolve address 0.0.0.0"\\n00000051 2011-08-08 18:31:23 3167 3171 "Background copy thread 693d30 exiting"\\n00000052 2011-08-08 18:31:23 3167 3172 "Handle closer thread 693d30 exiting"\", \"post_time\": \"2011-08-08 23:19:16\" },\n\t{ \"post_id\": 260, \"topic_id\": 87, \"forum_id\": 14, \"post_subject\": \"Re: Cluster name confusion\", \"username\": \"thildebrant\", \"post_text\": \"Bob,\\nThat's correct.\\n\\nTodd\", \"post_time\": \"2011-08-11 13:59:50\" },\n\t{ \"post_id\": 259, \"topic_id\": 87, \"forum_id\": 14, \"post_subject\": \"Re: Cluster name confusion\", \"username\": \"bforeman\", \"post_text\": \"Hi Todd,\\n\\nWhen you say "ECL IDE shows the name of the cluster as thor" are you are referring to the ECL IDE Target drop list? If so, I can confirm your report and will pass it to the development team. Thanks!\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2011-08-11 13:44:22\" },\n\t{ \"post_id\": 256, \"topic_id\": 87, \"forum_id\": 14, \"post_subject\": \"Cluster name confusion\", \"username\": \"thildebrant\", \"post_text\": \"Using OSS community 3.0.1\\nI used the wizard to generate my cluster configuration. It popped out a cluster with a thor \\nnamed mythor. At least, that’s the name in the ESP page. ECL IDE shows the name of the \\ncluster as thor. When submitting jobs, it expects those\\n jobs to be submitted to queue “thor” and cluster “thor”. However, when calling \\nFileServices.Spray*, it expects the cluster name used to be “mythor”.\\n\\n \\n\\nThis name confusion could just be a peculiarity of the configuration generated by the wizard, \\nbut really, it shouldn’t do that. I don’t really care what the name is, but it ought to match \\neverywhere.\", \"post_time\": \"2011-08-11 12:53:52\" },\n\t{ \"post_id\": 369, \"topic_id\": 94, \"forum_id\": 14, \"post_subject\": \"Re: Firewall setup for HPCC Server\", \"username\": \"kovacsbv\", \"post_text\": \"If you're on Linux, you could netstat -pln to get a list of ports being listened to. This would show all open ports, not just the HPCC ones so you have to use discretion and look at the name of the executable opening the port. Also, It won't show any ports the box is trying to open in the outgoing direction, although that's probably not a big deal.\", \"post_time\": \"2011-09-29 14:58:37\" },\n\t{ \"post_id\": 283, \"topic_id\": 94, \"forum_id\": 14, \"post_subject\": \"Re: Firewall setup for HPCC Server\", \"username\": \"jonburger\", \"post_text\": \"I will try to track down a complete list of all ports used in a default setup (although you can change them or grep through the environment.xml).\\n\\nIn the meantime the only port used on nodes in a spray/despray is 7070 (dafilesrv) and that will be accessed via the dfuserver (7439).\\n\\nOther option is to check your firewall logs for what is being denied.\\n\\nI will work with documentation to get a complete listing of ports.\\n\\nThanks,\\n\\nJon\", \"post_time\": \"2011-08-22 15:59:27\" },\n\t{ \"post_id\": 279, \"topic_id\": 94, \"forum_id\": 14, \"post_subject\": \"Firewall setup for HPCC Server\", \"username\": \"hli\", \"post_text\": \"Hi,\\n\\nI have installed a single node HPCC server on my box. During the setup, I had a problem with firewall setup. Although I have set up firewall to allow all the HPCC used ports I knew such as 8010, 7070, 9876, 8008 and so on, the HPCC still could not spry the file in the dropzone to cluster node. When I completely disabled the firewall on the server, all the processes can be smoothly finished. So, could you guys provide an instruction about the firewall setup or give a full list of ports the firewall need open?\\n\\nThanks.\", \"post_time\": \"2011-08-17 14:56:02\" },\n\t{ \"post_id\": 282, \"topic_id\": 95, \"forum_id\": 14, \"post_subject\": \"Re: Multi node installation problem\", \"username\": \"jonburger\", \"post_text\": \"Couple of things you can try:\\n\\n1. Make sure the hostname of the machine(s) in question contain an ip listing for the ip you have listed for that machine in your config.\\n\\n2. On our IB systems our infiniband interface is listed as ib0 (or ib1) and it has no issue bonding to that interface as long as #1 above is done. Same will work for bonding, just make sure your hostname entry in /etc/hosts lists the ip of the bonding interface.\\n\\n3. Validate that it is indeed a binding issue by running a netstat -tanp and validating service is not bound to a different adapter.\\n\\n4. See if the services are running at all in your process list to make sure you don't have any other type of startup issues.\", \"post_time\": \"2011-08-22 15:53:50\" },\n\t{ \"post_id\": 280, \"topic_id\": 95, \"forum_id\": 14, \"post_subject\": \"Multi node installation problem\", \"username\": \"onur\", \"post_text\": \"Hello,\\n\\nI want to evaluate HPCC. I have 3 physical servers with 24core cpu, 256g memory each. \\n\\nFirst I try one node installation which work fine. Then I installed the system to the other nodes and use configuration wizard to make them work together. I restart all hpcc services each node.\\n\\nHowever when I try to login to ECL IDE it gives me SOAP error message. I even couldn't login to EclWatch web page. \\n\\nI guess the problem is our network interfaces. The servers are connected via infiniband in private(bond1), and ethernet in public network(bond0). \\n\\nex:\\nBelow bond0 has public ip and bond1 has private ip. We are not using eth0 at all.\\nIn the environment.xml I've changed the interface parameter eth0 to bond0 but it didn't work. Any idea?\\n\\nbond0 Link encap:Ethernet HWaddr 18:A9:05:59:FE:80\\n inet addr:10.210.232.91 Bcast:10.210.232.255 Mask:255.255.255.0\\n inet6 addr: fe80::1aa9:5ff:fe59:fe80/64 Scope:Link\\n UP BROADCAST RUNNING MASTER MULTICAST MTU:1500 Metric:1\\n RX packets:10009342 errors:0 dropped:0 overruns:0 frame:0\\n TX packets:817714 errors:0 dropped:0 overruns:0 carrier:0\\n collisions:0 txqueuelen:0\\n RX bytes:5830590692 (5.4 GiB) TX bytes:94781126 (90.3 MiB)\\n\\nbond1 Link encap:InfiniBand HWaddr 80:00:00:48:FE:80:00:00:00:00:00:00:00:00:00:00:00:00:00:00\\n inet addr:182.2.1.3 Bcast:182.2.255.255 Mask:255.255.0.0\\n inet6 addr: fe80::223:7dff:ff94:5201/64 Scope:Link\\n UP BROADCAST RUNNING MASTER MULTICAST MTU:1500 Metric:1\\n RX packets:132893 errors:0 dropped:0 overruns:0 frame:0\\n TX packets:117473 errors:0 dropped:1 overruns:0 carrier:0\\n collisions:0 txqueuelen:0\\n RX bytes:7183908 (6.8 MiB) TX bytes:5317338 (5.0 MiB)\", \"post_time\": \"2011-08-19 06:37:59\" },\n\t{ \"post_id\": 311, \"topic_id\": 103, \"forum_id\": 14, \"post_subject\": \"Re: Wizard Environment Setup\", \"username\": \"jsmith\", \"post_text\": \"Depending on the profile of the jobs ran and hardware setup, having >1 slave per node may increase your throughput.\\nThor will use all cpus available for intense operation like. sorting/joining, other actions tend to be more single threaded, e.g. read/write filter.\\nSo, increasing the # slaves per node, may increase cpu utilization if the jobs are for example mainly cpu intensive in transform actions.\\nYou might also see lift from other resource types, e.g. if your jobs are very disk bound and you have fast disks/FS, the concurrency of multiple slaves may help.\\n\\nHowever, if you define multiple slaves per node, you must also divide the physical memory up between them in your configuration. That will mean greater partitioning and a smaller memory workset for in-memory operations on each slave, which in turn may result in more spilling and an increased amount of merging.\\n\\nHope that helps.\", \"post_time\": \"2011-09-02 15:50:09\" },\n\t{ \"post_id\": 308, \"topic_id\": 103, \"forum_id\": 14, \"post_subject\": \"Wizard Environment MultiNode Setup (thor slaves per node)\", \"username\": \"greg.whitaker@lexisnexis.com\", \"post_text\": \"One of the options in the Evironment setup is:\\n"Number of Thor slaves per node (default 1)"\\n\\nQuestion: Is this used when the nodes have multiple CPUs or is this configurable for other reasons, if so what are they.\", \"post_time\": \"2011-09-02 14:46:46\" },\n\t{ \"post_id\": 310, \"topic_id\": 104, \"forum_id\": 14, \"post_subject\": \"Re: Wizard Environment Setup (IPs vs Nodes)\", \"username\": \"sridhar.meda\", \"post_text\": \"Question 1: Does the number of IP addresses and number of nodes have to match?\\nAnswer: They do not have to match. You can configure the environment according to your needs but the number of Roxie nodes or Thor nodes cannot exceed the number of ip addresses provided. \\n\\nQuestion 2: Can there be 10 IP addresses for the system and 20 roxie nodes?\\nAnswer: This scenario is not allowed. In this case, an error message would be displayed saying that number of nodes cannot exceed the number of IP addresses provided.\\n\\nQuestion 3: Can a thor and roxie exist on the same IP address?\\nAnswer: This scenario is allowed but there may be performance implications.\", \"post_time\": \"2011-09-02 15:25:40\" },\n\t{ \"post_id\": 309, \"topic_id\": 104, \"forum_id\": 14, \"post_subject\": \"Wizard Environment MultiNode Setup (IPs vs Nodes)\", \"username\": \"greg.whitaker@lexisnexis.com\", \"post_text\": \"Multi-node setup:\\nThe Environment Setup Wizard asks for a list of IP addresses.\\nThen it ask for number of Thor and Roxie nodes.\\n\\nQuestion 1: Does the number of IP addresses and number of nodes have to match?\\nExample: 10 IP Addresses, and 10 Roxie nodes.\\nQuestion 2: Can there be 10 IP addresses for the system and 20 roxie nodes?\\nQuestion 3: Can a thor and roxie exist on the same IP address?\\nExample: 10 IP Addresses, and 10 Roxie nodes and 10 Thor nodes.\", \"post_time\": \"2011-09-02 15:05:46\" },\n\t{ \"post_id\": 327, \"topic_id\": 107, \"forum_id\": 14, \"post_subject\": \"Re: Centos 6 minimal install iso\", \"username\": \"pschwartz\", \"post_text\": \"Todd,\\n\\nIf you select the server install (no GUI), for CentOS 5.x you will get a very small foot print install. Once that is done, make sure you install our dependencies from both the base CentOS repository and EPEL, and you will be good to go with a minimal system.\\n\\nThe base server install includes the following:\\nCore CentOS + Kernel (all required packages for a base system)\\nSSH client tools\\n\\nIn a recent VM I have done an install on, this totaled about 500mb installed before our required packages.\\n\\n-Philip\", \"post_time\": \"2011-09-09 13:29:31\" },\n\t{ \"post_id\": 326, \"topic_id\": 107, \"forum_id\": 14, \"post_subject\": \"Re: Centos 6 minimal install iso\", \"username\": \"thildebrant\", \"post_text\": \"Phillip,\\nThank you for your reply.\\nI was using Centos6 mostly for the benefits of the minimal install ISO.\\nIs there a minimal recommended packages list (or checkboxes in the install GUI) for Centos 5.x?\\n\\nThank you,\\nTodd\", \"post_time\": \"2011-09-09 13:22:05\" },\n\t{ \"post_id\": 325, \"topic_id\": 107, \"forum_id\": 14, \"post_subject\": \"Re: Centos 6 minimal install iso\", \"username\": \"pschwartz\", \"post_text\": \"Hello,\\n\\nThank you for your interest in using the HPCC Platform. Currently, we do not support CentOS 6. \\n\\nThe dependency issue you are seeing are due to the dependency versions from CentOS 5.x which our package is built for not being installed. In order to use our current packages for CentOS or Red Hat Enterprise Linux, you would need to use a version of 5.x.\\n\\nSincerely, \\nPhilip\", \"post_time\": \"2011-09-09 13:08:18\" },\n\t{ \"post_id\": 321, \"topic_id\": 107, \"forum_id\": 14, \"post_subject\": \"Centos 6 minimal install iso\", \"username\": \"thildebrant\", \"post_text\": \"Hello,\\nI tried to install hpccsystems-platform-community_3.0.4-10.el5.x86_64.rpm on a Centos6 from the minimal install ISO, and, after downloading the required packages I now see, after trying to install\\n[root@node1 ~]# rpm -Uvh hpccsystems-platform-community_3.0.4-10.el5.x86_64.rpm\\nerror: Failed dependencies:\\n libboost_regex.so.2()(64bit) is needed by hpccsystems-platform-community-3.0.4.x86_64\\n libcrypto.so.6()(64bit) is needed by hpccsystems-platform-community-3.0.4.x86_64\\n libicudata.so.36()(64bit) is needed by hpccsystems-platform-community-3.0.4.x86_64\\n libicui18n.so.36()(64bit) is needed by hpccsystems-platform-community-3.0.4.x86_64\\n libicuuc.so.36()(64bit) is needed by hpccsystems-platform-community-3.0.4.x86_64\\n liblber-2.3.so.0()(64bit) is needed by hpccsystems-platform-community-3.0.4.x86_64\\n libldap_r-2.3.so.0()(64bit) is needed by hpccsystems-platform-community-3.0.4.x86_64\\n libssl.so.6()(64bit) is needed by hpccsystems-platform-community-3.0.4.x86_64\\n libxerces-c.so.27()(64bit) is needed by hpccsystems-platform-community-3.0.4.x86_64\\n\\n\\n\\n[root@node1 ~]# rpm -qa | grep lib\\nncurses-libs-5.7-3.20090208.el6.x86_64\\nfile-libs-5.04-6.el6.x86_64\\nzlib-1.2.3-25.el6.x86_64\\nkrb5-libs-1.8.2-3.el6_0.7.x86_64\\nlibcom_err-1.41.12-3.el6.x86_64\\ndevice-mapper-libs-1.02.53-8.el6_0.4.x86_64\\nlibselinux-2.0.94-2.el6.x86_64\\nglib2-2.22.5-5.el6.x86_64\\nlibattr-2.4.44-4.el6.x86_64\\nlibcap-2.16-5.2.el6.x86_64\\nlibuser-0.56.13-4.el6_0.1.x86_64\\ntcp_wrappers-libs-7.6-56.3.el6.x86_64\\nlibudev-147-2.29.el6.x86_64\\nlibidn-1.18-2.el6.x86_64\\nelfutils-libelf-0.148-1.el6.x86_64\\nlibgcrypt-1.4.5-3.el6.x86_64\\nlibhbaapi-2.2-10.el6.x86_64\\nlibusb-0.1.12-23.el6.x86_64\\nlibutempter-1.1.5-4.1.el6.x86_64\\ncyrus-sasl-lib-2.1.23-8.el6.x86_64\\ne2fsprogs-libs-1.41.12-3.el6.x86_64\\nlibxml2-2.7.6-1.el6.x86_64\\nkeyutils-libs-1.4-1.el6.x86_64\\nlibgssglue-0.1-8.1.el6.x86_64\\nlibcurl-7.19.7-16.el6.x86_64\\nrpm-libs-4.8.0-12.el6.x86_64\\nfipscheck-lib-1.2.0-4.1.el6.x86_64\\npciutils-libs-3.1.4-9.el6.x86_64\\nlibffi-3.0.5-3.2.el6.x86_64\\nlibaio-0.3.107-10.el6.x86_64\\nlibpciaccess-0.10.9-2.el6.x86_64\\nnfs-utils-lib-1.1.5-1.el6.x86_64\\ncryptsetup-luks-libs-1.1.2-2.el6.x86_64\\nlibicu-4.2.1-9.el6.x86_64\\nglibc-common-2.12-1.7.el6_0.5.x86_64\\nglibc-devel-2.12-1.7.el6_0.5.x86_64\\nlibstdc++-devel-4.4.4-13.el6.x86_64\\nperl-libs-5.10.1-115.el6.x86_64\\nlibtool-2.2.6-15.5.el6.x86_64\\nlibuuid-2.17.2-6.el6_0.1.x86_64\\ndbus-libs-1.2.24-4.el6_0.x86_64\\npython-libs-2.6.5-3.el6_0.2.x86_64\\nlibgcc-4.4.4-13.el6.x86_64\\nlibblkid-2.17.2-6.el6_0.1.x86_64\\nlibcap-ng-0.6.4-3.el6_0.1.x86_64\\ndevice-mapper-multipath-libs-0.4.9-31.el6_0.3.x86_64\\ncracklib-dicts-2.8.16-4.el6.x86_64\\naudit-libs-2.0.4-1.el6.x86_64\\ndevice-mapper-event-libs-1.02.53-8.el6_0.4.x86_64\\nlibsepol-2.0.41-3.el6.x86_64\\nlvm2-libs-2.02.72-8.el6_0.4.x86_64\\nlibacl-2.2.49-4.el6.x86_64\\nlibcgroup-0.36.1-6.el6_0.1.x86_64\\nlibgpg-error-1.7-3.el6.x86_64\\nlibstdc++-4.4.4-13.el6.x86_64\\nxz-libs-4.999.9-0.3.beta.20091007git.el6.x86_64\\nlibselinux-utils-2.0.94-2.el6.x86_64\\ndbus-glib-0.86-5.el6.x86_64\\nlibconfig-1.3.2-1.1.el6.x86_64\\nlibnih-1.0.1-6.el6.x86_64\\nlibss-1.41.12-3.el6.x86_64\\nlibedit-2.11-4.20080712cvs.1.el6.x86_64\\ncoreutils-libs-8.4-9.el6.x86_64\\nlibtirpc-0.2.1-1.el6.x86_64\\nlibssh2-1.2.2-7.el6.x86_64\\nplymouth-core-libs-0.8.3-17.el6.centos.x86_64\\nlibsemanage-2.0.43-4.el6.x86_64\\nlibevent-1.4.13-1.el6.x86_64\\nlibhbalinux-1.0.10-1.el6.x86_64\\nlibdrm-2.4.20-2.el6.x86_64\\nglibc-2.12-1.7.el6_0.5.x86_64\\nlibgomp-4.4.4-13.el6.x86_64\\nglibc-headers-2.12-1.7.el6_0.5.x86_64\\ncracklib-2.8.16-4.el6.x86_64\\nbzip2-libs-1.0.5-7.el6_0.x86_64\", \"post_time\": \"2011-09-07 17:26:18\" },\n\t{ \"post_id\": 334, \"topic_id\": 109, \"forum_id\": 14, \"post_subject\": \"Re: yahoo references causes delays\", \"username\": \"thildebrant\", \"post_text\": \"this was actually in the config manager pages, part of 3.0.1,\\nIt now looks like it was fixed in https://github.com/hpcc-systems/HPCC-Platform/commit/632f2f1dfb872ef2f9a09de332112697cc2d1717\\n(which is now viewable).\\nI believe this is resolved, but will post if we find anything else.\\n\\nThank you,\\nTodd\", \"post_time\": \"2011-09-13 12:48:35\" },\n\t{ \"post_id\": 333, \"topic_id\": 109, \"forum_id\": 14, \"post_subject\": \"Re: yahoo references causes delays\", \"username\": \"jo.prichard@lexisnexis.com\", \"post_text\": \"Hi Todd\\n\\nAre specific ESP pages having this issue or all ESP pages? \\n\\nIs it ECLWatch or WS ECL (or both!)?\\n\\nThanks in advance\\n\\nJo\", \"post_time\": \"2011-09-13 12:36:32\" },\n\t{ \"post_id\": 330, \"topic_id\": 109, \"forum_id\": 14, \"post_subject\": \"Re: yahoo references causes delays\", \"username\": \"anthony.fishbeck@lexisnexis.com\", \"post_text\": \"The Yahoo YUI library files are meant to be stored and accessed locally from the ESP. \\n\\nWe may have missed something that is therefore being retrieved from Yahoo. I've tried sniffing my browser traffic as I use the system, but so far I haven't seen any attempts to access files remotely.\\n\\nDo you happen to have specifics about what URL it is trying to resolve at Yahoo? What made you realize the problem was with the Yahoo CSS?\\n\\nThanks and Regards,\\nTony\", \"post_time\": \"2011-09-12 23:51:35\" },\n\t{ \"post_id\": 328, \"topic_id\": 109, \"forum_id\": 14, \"post_subject\": \"yahoo references causes delays\", \"username\": \"thildebrant\", \"post_text\": \"Hello,\\nOn an install where the system cannot access the internet, the Yahoo related CSS causes significant delays when accessing the ESP pages.\\nIs there a way to change those references?\\n\\nThank you,\\nTodd\", \"post_time\": \"2011-09-10 19:34:17\" },\n\t{ \"post_id\": 357, \"topic_id\": 119, \"forum_id\": 14, \"post_subject\": \"intel processor version\", \"username\": \"HPCC Staff\", \"post_text\": \"A question came in from the community about downloading the community server version for an intel i5 processor computer rather than AMD since "amd64" is referenced in the filename of the download. \\n\\namd64 (also known as x86_64) refers to the architecture (64-bit extensions originally designed by AMD) not to a particular chip or manufacturer. The Intel i5 is compatible with this architecture, provided users install the 64-bit version of the chosen distro, rather than the 32-bit version.\\n\\nCurrently, pre-built 32-bit rpms is not available but will revisit this if there is demand from the community.\", \"post_time\": \"2011-09-26 13:36:55\" },\n\t{ \"post_id\": 408, \"topic_id\": 122, \"forum_id\": 14, \"post_subject\": \"Re: From VM to separate Linux box\", \"username\": \"kovacsbv\", \"post_text\": \"Got it.\\n\\nThe iptables were still stopping various connections from the components to dali. I opened iptables wide open and it works now. Of course I need to find a way to tighten it back down, but at least I know what's wrong.\\n\\nThanks for all your time, Flavio.\\n\\nVic\", \"post_time\": \"2011-10-05 18:23:05\" },\n\t{ \"post_id\": 407, \"topic_id\": 122, \"forum_id\": 14, \"post_subject\": \"Re: From VM to separate Linux box\", \"username\": \"kovacsbv\", \"post_text\": \"Ok, so I got the processes killed. All of them. Even the ones with no tcp sockets open.\\n\\nStartup looks normal:\\n\\nroot@LAB-HPCC-01:/var/log/HPCCSystems/mydali/server# time /etc/init.d/hpcc-init start\\nStarting mydafilesrv.... [ OK ]\\nStarting mydali.... [ OK ]\\nStarting mydfuserver.... [ OK ]\\nStarting myeclagent.... [ OK ]\\nStarting myeclccserver.... [ OK ]\\nStarting myeclscheduler.... [ OK ]\\nStarting myesp.... [ OK ]\\nStarting myroxie.... [ OK ]\\nStarting mysasha.... [ OK ]\\nStarting mythor.... [ OK ]\\n
\\n\\nThen the ports look normal:\\n\\nroot@LAB-HPCC-01:/var/log/HPCCSystems/mydali/server# netstat -tanp\\nActive Internet connections (servers and established)\\nProto Recv-Q Send-Q Local Address Foreign Address State PID/Program name\\ntcp 0 0 0.0.0.0:6500 0.0.0.0:* LISTEN 12636/thormaster_65\\ntcp 0 0 0.0.0.0:7365 0.0.0.0:* LISTEN 12086/eclscheduler\\ntcp 0 0 0.0.0.0:6600 0.0.0.0:* LISTEN 12633/thorslave_660\\ntcp 0 0 0.0.0.0:7368 0.0.0.0:* LISTEN 11929/agentexec\\ntcp 0 0 0.0.0.0:8877 0.0.0.0:* LISTEN 12329/saserver\\ntcp 0 0 0.0.0.0:7315 0.0.0.0:* LISTEN 12164/esp\\ntcp 0 0 0.0.0.0:9876 0.0.0.0:* LISTEN 12243/roxie\\ntcp 0 0 0.0.0.0:7444 0.0.0.0:* LISTEN 12008/eclccserver\\ntcp 0 0 0.0.0.0:7221 0.0.0.0:* LISTEN 11850/dfuserver\\ntcp 0 0 0.0.0.0:22 0.0.0.0:* LISTEN 849/sshd\\ntcp 0 0 0.0.0.0:7353 0.0.0.0:* LISTEN 12243/roxie\\ntcp 0 0 0.0.0.0:7100 0.0.0.0:* LISTEN 11684/dafilesrv\\ntcp 0 0 0.0.0.0:7070 0.0.0.0:* LISTEN 11761/daserver\\ntcp 0 1 138.12.249.27:59729 138.12.249.27:7070 SYN_SENT 12636/thormaster_65\\ntcp 0 1 138.12.249.27:59735 138.12.249.27:7070 SYN_SENT 12164/esp\\ntcp 0 0 138.12.249.27:22 138.12.248.168:3802 ESTABLISHED 1075/sshd: kovacsvx\\ntcp 0 1 138.12.249.27:59734 138.12.249.27:7070 SYN_SENT 12086/eclscheduler\\ntcp 0 1 138.12.249.27:59730 138.12.249.27:7070 SYN_SENT 12633/thorslave_660\\ntcp 0 1 138.12.249.27:59731 138.12.249.27:7070 SYN_SENT 11850/dfuserver\\ntcp 0 1 138.12.249.27:59733 138.12.249.27:7070 SYN_SENT 12008/eclccserver\\ntcp 0 1 138.12.249.27:59732 138.12.249.27:7070 SYN_SENT 11929/agentexec\\ntcp 0 1 138.12.249.27:59736 138.12.249.27:7070 SYN_SENT 12329/saserver\\n
\\n\\nThen I get the login soap error:\\n[attachment=0:3v5ukf46]03--Another error logging in after full restart.png\\n\\nThen I note the time/date on the box:\\nroot@LAB-HPCC-01:/var/log/HPCCSystems/mydali/server# # Now I get the error logging in.\\nroot@LAB-HPCC-01:/var/log/HPCCSystems/mydali/server# date\\nWed Oct 5 10:31:28 EDT 2011\\nr
\\n\\n\\nThen I check which logs changed around the time I got the login error:\\nroot@LAB-HPCC-01:/var/log/HPCCSystems# find -iname "*.log" | xargs grep -EHni "2011-10-05 10:(28|29|30)" *.log\\n\\n./mydali/server/DaServer.log:18:00000011 2011-10-05 10:28:52 11761 11765 "SYS: PU= 0% MU= 2% MAL=29619200 MMP=29364224 SBK=254976 TOT=30004K RAM=306820K SWP=0K"\\n./mydali/server/DaServer.log:19:00000012 2011-10-05 10:28:52 11761 11765 "DSK: \\\\[sda\\\\] r/s=0.0 kr/s=0.0 w/s=0.6 kw/s=2.6 bsy=0 \\\\[sdb\\\\] r/s=0.0 kr/s=0.0 w/s=0.0 kw/s=0.0 bsy=0 NIC: rxp/s=1.9 rxk/s=0.2 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=0 iow=0 idle=99"\\n./mydali/server/DaServer.log:20:00000013 2011-10-05 10:29:52 11761 11765 "SYS: PU= 0% MU= 2% MAL=29619200 MMP=29364224 SBK=254976 TOT=30004K RAM=303460K SWP=0K"\\n./mydali/server/DaServer.log:21:00000014 2011-10-05 10:29:52 11761 11765 "DSK: \\\\[sda\\\\] r/s=0.0 kr/s=0.0 w/s=0.5 kw/s=1.9 bsy=0 \\\\[sdb\\\\] r/s=0.0 kr/s=0.0 w/s=0.0 kw/s=0.0 bsy=0 NIC: rxp/s=1.8 rxk/s=0.2 txp/s=0.1 txk/s=0.0 CPU: usr=0 sys=0 iow=0 idle=99"\\n./mydali/server/DaServer.log:22:00000015 2011-10-05 10:30:52 11761 11765 "SYS: PU= 0% MU= 2% MAL=29619200 MMP=29364224 SBK=254976 TOT=30004K RAM=303452K SWP=0K"\\n./mydali/server/DaServer.log:23:00000016 2011-10-05 10:30:52 11761 11765 "DSK: \\\\[sda\\\\] r/s=0.0 kr/s=0.0 w/s=0.4 kw/s=1.8 bsy=0 \\\\[sdb\\\\] r/s=0.0 kr/s=0.0 w/s=0.0 kw/s=0.0 bsy=0 NIC: rxp/s=1.7 rxk/s=0.2 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=0 iow=0 idle=99"\\n\\n./mysasha/saserver.log:6:00000005 2011-10-05 10:29:50 12329 12329 "Failed to connect to Dali Server 138.12.249.27:7070. Retrying..."\\n\\n./myeclscheduler/eclscheduler.log:5:00000004 2011-10-05 10:29:46 12086 12086 "Failed to connect to Dali Server 138.12.249.27:7070. Retrying..."\\n\\n./myroxie/roxie.log:63:0000003F 2011-10-05 10:28:09 12243 12754 "PING: 1 replies received, average delay 104"\\n./myroxie/roxie.log:64:00000040 2011-10-05 10:28:59 12243 12246 "SYS: PU= 0% MU= 2% MAL=1075227168 MMP=1074794496 SBK=432672 TOT=1050160K RAM=306824K SWP=0K"\\n./myroxie/roxie.log:65:00000041 2011-10-05 10:28:59 12243 12246 "DSK: \\\\[sda\\\\] r/s=0.0 kr/s=0.0 w/s=0.5 kw/s=1.8 bsy=0 \\\\[sdb\\\\] r/s=0.0 kr/s=0.0 w/s=0.0 kw/s=0.0 bsy=0 NIC: rxp/s=2.1 rxk/s=0.2 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=0 iow=0 idle=99"\\n./myroxie/roxie.log:66:00000042 2011-10-05 10:29:09 12243 12754 "PING: 0 replies received, average delay 0"\\n./myroxie/roxie.log:67:00000043 2011-10-05 10:29:59 12243 12246 "SYS: PU= 0% MU= 2% MAL=1075227424 MMP=1074794496 SBK=432928 TOT=1050160K RAM=303464K SWP=0K"\\n./myroxie/roxie.log:68:00000044 2011-10-05 10:29:59 12243 12246 "DSK: \\\\[sda\\\\] r/s=0.0 kr/s=0.0 w/s=0.6 kw/s=2.7 bsy=0 \\\\[sdb\\\\] r/s=0.0 kr/s=0.0 w/s=0.0 kw/s=0.0 bsy=0 NIC: rxp/s=1.7 rxk/s=0.2 txp/s=0.1 txk/s=0.0 CPU: usr=0 sys=0 iow=0 idle=99"\\n./myroxie/roxie.log:69:00000045 2011-10-05 10:30:09 12243 12754 "PING: 0 replies received, average delay 0"\\n./myroxie/roxie.log:70:00000046 2011-10-05 10:30:59 12243 12246 "SYS: PU= 0% MU= 2% MAL=1075227648 MMP=1074794496 SBK=433152 TOT=1050160K RAM=303456K SWP=0K"\\n./myroxie/roxie.log:71:00000047 2011-10-05 10:30:59 12243 12246 "DSK: \\\\[sda\\\\] r/s=0.0 kr/s=0.0 w/s=0.3 kw/s=1.0 bsy=0 \\\\[sdb\\\\] r/s=0.0 kr/s=0.0 w/s=0.0 kw/s=0.0 bsy=0 NIC: rxp/s=1.7 rxk/s=0.2 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=0 iow=0 idle=99"\\n\\n./myesp/esp.log:9:00000009 2011-10-05 10:29:48 12164 12164 "Failed to connect to Dali Server 138.12.249.27:7070. Retrying..."\\nroot@LAB-HPCC-01:/var/log/HPCCSystems#\\n
\\n\\nObviously, shasha, eclscheduler, roxie, et al cannot connect to mydali.\\n\\nThen I go to the dali log, and everything looks fine!\\n\\nI look for dali processes with ps -ef | grep dali and there it is:\\n\\nhpcc 11743 1 0 10:25 pts/0 00:00:00 /bin/bash /opt/HPCCSystems/bin/init_dali\\n
\\n\\nThat's where I am now.\", \"post_time\": \"2011-10-05 18:13:22\" },\n\t{ \"post_id\": 388, \"topic_id\": 122, \"forum_id\": 14, \"post_subject\": \"Re: From VM to separate Linux box\", \"username\": \"flavio\", \"post_text\": \"Vic,\\n\\nJust feel free to kill the existing processes (or try issuing another "/etc/init.d/hpcc-init stop") and restart it.\\n\\nThanks,\\n\\nFlavio\", \"post_time\": \"2011-10-04 18:01:37\" },\n\t{ \"post_id\": 387, \"topic_id\": 122, \"forum_id\": 14, \"post_subject\": \"Re: From VM to separate Linux box\", \"username\": \"kovacsbv\", \"post_text\": \"It looks like there is room. This was an newly installed\\nempty drive (see the df -h below).\\n\\nKeep in mind that this is a partially shut down HPCC.\\nI did the "/etc/init.d/hpcc-init stop" and haven't restarted\\nit. It's curious that HPCC is shut down but still\\nhas ports open and processes running.\\n\\nThe question was whether I should kill the existing processes,\\nor if there is a particular order, or if there is a "warm\\nstart" I can do to keep the pid/lock files intact, etc.\\n\\nIf you want, I can do a start too. Dali always seems to wait\\nabout one IP timeout before starting.\\n\\nI also su'd to hpcc and made/deleted a directory to make \\nsure it could write in the filesystem.\\n\\nAnother question: our system was tightened down to the\\npoint that the ethernet interface can't reach itself.\\nThis has been opened up in iptables, but pinging the\\nethernet interface from itself still doesn't work.\\n\\nroot@LAB-HPCC-01:/var/log/HPCCSystems/mydali/server# su hpcc\\n$ cd /mnt/\\n$ ls -l\\ntotal 4\\ndrwxr-xr-x 4 hpcc hpcc 4096 2011-09-30 12:48 hpcc_storage\\n$ cd hpcc_storage\\n$ ls -l\\ntotal 20\\ndrwxr-xr-x 7 hpcc hpcc 4096 2011-10-03 07:27 HPCCSystems\\ndrwx------ 2 hpcc hpcc 16384 2011-09-26 09:59 lost+found\\n$ df -h\\nFilesystem Size Used Avail Use% Mounted on\\n/dev/mapper/LAB--HPCC--01-root\\n 220G 1.2G 208G 1% /\\nnone 1.9G 260K 1.9G 1% /dev\\nnone 1.9G 4.0K 1.9G 1% /dev/shm\\nnone 1.9G 116K 1.9G 1% /var/run\\nnone 1.9G 0 1.9G 0% /var/lock\\nnone 1.9G 0 1.9G 0% /lib/init/rw\\n/dev/sda1 228M 20M 197M 9% /boot\\n/home/kovacsvx/.Private\\n 220G 1.2G 208G 1% /home/kovacsvx\\n/dev/sdb1 1.8T 204M 1.7T 1% /mnt/hpcc_storage\\n$ mkdir bleck\\n$ ls -l\\ntotal 24\\ndrwxr-xr-x 2 hpcc hpcc 4096 2011-10-04 13:12 bleck\\ndrwxr-xr-x 7 hpcc hpcc 4096 2011-10-03 07:27 HPCCSystems\\ndrwx------ 2 hpcc hpcc 16384 2011-09-26 09:59 lost+found\\n$ rmdir bleck\\n$\\n\\n
\", \"post_time\": \"2011-10-04 17:32:56\" },\n\t{ \"post_id\": 386, \"topic_id\": 122, \"forum_id\": 14, \"post_subject\": \"Re: From VM to separate Linux box\", \"username\": \"flavio\", \"post_text\": \"It seems that daserver is listening to 7070 already. And is thor still unable to start? There there may be something else going on... \\n\\nDo you have space in that filesystem?\\n\\nThanks,\\n\\nFlavio\", \"post_time\": \"2011-10-04 16:54:24\" },\n\t{ \"post_id\": 385, \"topic_id\": 122, \"forum_id\": 14, \"post_subject\": \"Re: From VM to separate Linux box\", \"username\": \"kovacsbv\", \"post_text\": \"Here it is.\\n\\nI did wait about 20 minutes and re-ran netstat, but I didn't notice any ports going away.\\n\\nroot:/var/log/HPCCSystems/mydali/server# netstat -tanp | grep 7070\\ntcp 0 0 0.0.0.0:7070 0.0.0.0:* LISTEN 23686/daserver\\ntcp 0 1 138.12.249.27:56151 138.12.249.27:7070 SYN_SENT 17339/eclccserver\\ntcp 0 1 138.12.249.27:56150 138.12.249.27:7070 SYN_SENT 19448/dfuserver\\ntcp 0 1 138.12.249.27:56149 138.12.249.27:7070 SYN_SENT 11549/eclccserver\\ntcp 0 1 138.12.249.27:56148 138.12.249.27:7070 SYN_SENT 11391/dfuserver\\n
\", \"post_time\": \"2011-10-04 15:38:50\" },\n\t{ \"post_id\": 384, \"topic_id\": 122, \"forum_id\": 14, \"post_subject\": \"Re: From VM to separate Linux box\", \"username\": \"flavio\", \"post_text\": \"Can you run a "netstat -tanp | grep 7070" as root to see which process is bound to 7070? \\n\\nIt may just be that the port is in time-wait or fin-wait mode and that you need to wait for a minute or so before trying to restart Dali.\\n\\nThe netstat above will tell you for sure if something is still bound to that port.\\n\\nFlavio\", \"post_time\": \"2011-10-04 15:21:33\" },\n\t{ \"post_id\": 383, \"topic_id\": 122, \"forum_id\": 14, \"post_subject\": \"Re: From VM to separate Linux box\", \"username\": \"kovacsbv\", \"post_text\": \"Ok,\\n\\nSo here's the short of the story:\\nI look at the dali log, and see that it can't open port 7070 because it's in use.\\nThen I shut down everything with /etc/init.d/hpcc-init stop\\nThen I do a netstat -pln and find a number of ports, including dali's 7070 is still open.\\nThe logs for dali give an error that the port is still open (no surprise).\\n\\nSo, the question is do I kill all the remaining processes that have ports open?\\nThe only thing I have open other than hpcc-related things is ipv4-ssh; could you take a look at the netstat below and determine how I shut down remaining processes?\\n\\nThanks,\\n\\nVic\\n\\n[size=50:tpfl8kfe]root:/var/log/HPCCSystems/mydali/server# tail DaServer.log\\n00000006 2011-10-04 10:05:20 11279 11279 "loading store 1, storedCrc=343383a"\\n00000007 2011-10-04 10:05:20 11279 11279 "Loading delta: /mnt/hpcc_storage/HPCCSystems/hpcc-data/dali/daliinc1.xml"\\n00000008 2011-10-04 10:05:20 11279 11279 "store loaded"\\n00000009 2011-10-04 10:05:20 11279 11279 "loading external Environment from: /etc/HPCCSystems/environment.xml"\\n0000000A 2011-10-04 10:05:20 11279 11279 "Scanning store for external references"\\n0000000B 2011-10-04 10:05:20 11279 11279 "External reference count = 0"\\n0000000C 2011-10-04 10:05:20 11279 11279 "DASERVER[0] starting - listening to port 7070"\\n0000000D 2011-10-04 10:05:20 11279 11279 "ERROR: -7: /var/jenkins/workspace/Release-3.2.0/src/dali/server/daserver.cpp(465) : Exception : port in use\\nTarget: S>138.12.249.27, port = 7070, Raised in: /var/jenkins/workspace/Release-3.2.0/src/system/jlib/jsocket.cpp, line 869"\\n0000000E 2011-10-04 10:05:20 11279 11286 "BackupHandler stopped"\\n\\n\\n\\nroot:/var/log/HPCCSystems/mydali/server# netstat -pln\\nActive Internet connections (only servers)\\nProto Recv-Q Send-Q Local Address Foreign Address State PID/Program name\\ntcp 0 0 0.0.0.0:7360 0.0.0.0:* LISTEN 19448/dfuserver\\ntcp 0 0 0.0.0.0:6500 0.0.0.0:* LISTEN 12178/thormaster_65\\ntcp 0 0 0.0.0.0:7205 0.0.0.0:* LISTEN 11628/eclscheduler\\ntcp 0 0 0.0.0.0:6600 0.0.0.0:* LISTEN 12175/thorslave_660\\ntcp 0 0 0.0.0.0:8877 0.0.0.0:* LISTEN 11871/saserver\\ntcp 0 0 0.0.0.0:7245 0.0.0.0:* LISTEN 11391/dfuserver\\ntcp 0 0 0.0.0.0:7118 0.0.0.0:* LISTEN 11470/agentexec\\ntcp 0 0 0.0.0.0:7409 0.0.0.0:* LISTEN 17339/eclccserver\\ntcp 0 0 0.0.0.0:9876 0.0.0.0:* LISTEN 11785/roxie\\ntcp 0 0 0.0.0.0:22 0.0.0.0:* LISTEN 849/sshd\\ntcp 0 0 0.0.0.0:7288 0.0.0.0:* LISTEN 11549/eclccserver\\ntcp 0 0 0.0.0.0:7418 0.0.0.0:* LISTEN 11706/esp\\ntcp 0 0 0.0.0.0:7164 0.0.0.0:* LISTEN 11785/roxie\\ntcp 0 0 0.0.0.0:7070 0.0.0.0:* LISTEN 23686/daserver\\nActive UNIX domain sockets (only servers)\\nProto RefCnt Flags Type State I-Node PID/Program name Path\\nunix 2 [ ACC ] STREAM LISTENING 3154 1/init @/com/ubuntu/upstart\\n\\n\\n\\n\\nroot:/var/log/HPCCSystems/mydali/server# /etc/init.d/hpcc-init stop\\nStopping mythor... [ OK ]\\nStopping mysasha... [ OK ]\\nStopping myroxie... [ OK ]\\nStopping myesp... [ OK ]\\nStopping myeclscheduler... [ OK ]\\nStopping myeclccserver... [ OK ]\\nStopping myeclagent... [ OK ]\\nStopping mydfuserver... [ OK ]\\nStopping mydali... [FAILED]\\nAlready Stopped\\n\\n\\n\\nroot:/var/log/HPCCSystems/mydali/server# netstat -pln\\nActive Internet connections (only servers)\\nProto Recv-Q Send-Q Local Address Foreign Address State PID/Program name\\ntcp 0 0 0.0.0.0:7360 0.0.0.0:* LISTEN 19448/dfuserver\\ntcp 0 0 0.0.0.0:7245 0.0.0.0:* LISTEN 11391/dfuserver\\ntcp 0 0 0.0.0.0:7409 0.0.0.0:* LISTEN 17339/eclccserver\\ntcp 0 0 0.0.0.0:22 0.0.0.0:* LISTEN 849/sshd\\ntcp 0 0 0.0.0.0:7288 0.0.0.0:* LISTEN 11549/eclccserver\\ntcp 0 0 0.0.0.0:7070 0.0.0.0:* LISTEN 23686/daserver\\nActive UNIX domain sockets (only servers)\\nProto RefCnt Flags Type State I-Node PID/Program name Path\\nunix 2 [ ACC ] STREAM LISTENING 3154 1/init @/com/ubuntu/upstart\\n
\", \"post_time\": \"2011-10-04 15:16:48\" },\n\t{ \"post_id\": 382, \"topic_id\": 122, \"forum_id\": 14, \"post_subject\": \"Re: From VM to separate Linux box\", \"username\": \"flavio\", \"post_text\": \"Vic, \\n\\nIt's hard to say by the log fragment that you posted above, but it seems that Dali is not running (and that's why Thor cannot start). My guess is that either a mount point is missing or is read-only, or a corresponding directory within that mount point is missing.\\n\\nCan you please double check that?\\n\\nThanks,\\n\\nFlavio\", \"post_time\": \"2011-10-04 14:07:34\" },\n\t{ \"post_id\": 381, \"topic_id\": 122, \"forum_id\": 14, \"post_subject\": \"Re: From VM to separate Linux box\", \"username\": \"kovacsbv\", \"post_text\": \"Thanks, Flavio. It helped a lot.\\n\\nAfter fixing a few goofs, like having the system clock 3 hours off and creating a mount point that only root had access to (it's owned by hpcc:hpcc now), I ended up getting soap errors when trying to log in.\\n\\nLooking at the thor logs (which was the only log file I could see that had errors), it seems thor is shutting down.\\n\\n[size=50:3t6mjd0x]root@LAB-HPCC-01:/var/log/HPCCSystems# tail ./mythor/10_03_2011_06_44_58/THORMASTER.log\\n000000E0 2011-10-03 12:52:05 24575 3019 DSK: [sda] r/s=0.0 kr/s=0.0 w/s=0.6 kw/s=2.4 bsy=0 [sdb] r/s=0.0 kr/s=0.0 w/s=0.0 kw/s=0.0 bsy=0 NIC: rxp/s=2.4 rxk/s=0.3 txp/s=0.7 txk/s=\\n000000E1 2011-10-03 12:53:05 24575 3019 SYS: PU= 0% MU= 1% MAL=253824 MMP=0 SBK=253824 TOT=364K RAM=275344K SWP=0K\\n000000E2 2011-10-03 12:53:05 24575 3019 DSK: [sda] r/s=0.0 kr/s=0.0 w/s=0.5 kw/s=2.1 bsy=0 [sdb] r/s=0.0 kr/s=0.0 w/s=0.0 kw/s=0.0 bsy=0 NIC: rxp/s=2.1 rxk/s=0.3 txp/s=0.7 txk/s=\\n000000E3 2011-10-03 12:54:05 24575 3019 SYS: PU= 0% MU= 1% MAL=253824 MMP=0 SBK=253824 TOT=364K RAM=275564K SWP=0K\\n000000E4 2011-10-03 12:54:05 24575 3019 DSK: [sda] r/s=0.0 kr/s=0.0 w/s=0.7 kw/s=2.7 bsy=0 [sdb] r/s=0.0 kr/s=0.0 w/s=0.0 kw/s=0.0 bsy=0 NIC: rxp/s=4.1 rxk/s=0.7 txp/s=0.7 txk/s=\\n000000E5 2011-10-03 12:54:06 24575 24575 1: /var/jenkins/workspace/Release-3.2.0/src/dali/base/daclient.cpp(201) : CSDSServerStatus::stop : MP connect failed (138.12.249.27:7070)\\n000000E6 2011-10-03 12:54:06 24575 24575 Thor closing down 6\\n000000E7 2011-10-03 12:54:06 24575 24575 Thor closing down 5\\n000000E8 2011-10-03 12:54:06 24575 24575 Thor closing down 4\\n000000E9 2011-10-03 12:54:06 24575 24575 Thor closing down 3\\n
\", \"post_time\": \"2011-10-04 13:30:18\" },\n\t{ \"post_id\": 372, \"topic_id\": 122, \"forum_id\": 14, \"post_subject\": \"Re: From VM to separate Linux box\", \"username\": \"flavio\", \"post_text\": \"Vic,\\n\\nthere are a couple different ways to tell HPCC to use the other drive for the data store, after you formatted the device and mounted it under, let's say, /mnt/large_drive:\\n\\n1. You can run configmgr (/opt/HPCCSystems/sbin/configmgr) and use a graphical web browser interface (http://IP_address:8015/) to change the data and temp directories (under software->directories) to point at the new location (/mnt/large_drive/ in this case). Since configmgr saves the environment.xml file to /etc/HPCCSystems/source/environment.xml, please don't forget to copy /etc/HPCCSystems/source/environment.xml to /etc/HPCCSystems/environment.xml and restart the platform (/etc/init.d/hpcc-init restart);\\n\\n2. or you could just move your /var/lib/HPCCSystems directory tree to the new drive and use a symbolic link (ln -s /var/lib/HPCCSystems /mnt/large_drive) to tell HPCC to use the new drive instead (you'll need to restart the HPCC environment too).\\n\\nECL IDE defines the IP address (or hostname) of the cluster within the preferences window, and allows you to have even several of clusters defined to quickly switch between them as needed. The preferences section can be accessed from the ribbon bar (icon on the top left corner) or upon restarting ECL IDE (button at the bottom of the login window).\\n\\nI hope this helps,\\n\\nFlavio\", \"post_time\": \"2011-09-30 18:31:24\" },\n\t{ \"post_id\": 367, \"topic_id\": 122, \"forum_id\": 14, \"post_subject\": \"From VM to separate Linux box\", \"username\": \"kovacsbv\", \"post_text\": \"Hi, all:\\n\\nI was using the VM version of HPCC and got it to work.\\nNow I installed a version of HPCC on a separate linux\\nbox. Questions related to this:\\n\\n1. How do I tell the new box to use the large disk on /dev/hdb1\\nfor the DFU?\\n2. How do I know which system the ECL IDE environment is pointed at\\nand repoint it?\\n\\nThanks,\\n\\nVic Kovacs\", \"post_time\": \"2011-09-28 13:35:52\" },\n\t{ \"post_id\": 405, \"topic_id\": 126, \"forum_id\": 14, \"post_subject\": \"Re: VMware Player memory allocation issue\", \"username\": \"ejtstoker\", \"post_text\": \"The loading problem is resolved when folder sharing is disabled although the VMware player 64-bit warning appears at start up.\", \"post_time\": \"2011-10-05 15:44:43\" },\n\t{ \"post_id\": 404, \"topic_id\": 126, \"forum_id\": 14, \"post_subject\": \"Re: VMware Player memory allocation issue\", \"username\": \"ejtstoker\", \"post_text\": \"This problem has re-appeared on VMware Player 4 and seems to be related to\\na problem with the player not being able to run in 64-bit mode. In this case\\nincreasing memory allocation does not help. I will drop back to the previous version of VMware player.\\n\\nEd\", \"post_time\": \"2011-10-05 15:33:54\" },\n\t{ \"post_id\": 392, \"topic_id\": 126, \"forum_id\": 14, \"post_subject\": \"Re: VMware Player memory allocation issue\", \"username\": \"flavio\", \"post_text\": \"Ed, good finding!\\n\\nWe identified the same problem a few days ago and updated the "known issues" description in the downloads page, but haven't been able to release an updated version yet (waiting to release the new version with 3.2.2, which will have some additional improvements).\\n\\nThanks,\\n\\nFlavio\", \"post_time\": \"2011-10-04 23:24:37\" },\n\t{ \"post_id\": 391, \"topic_id\": 126, \"forum_id\": 14, \"post_subject\": \"VMware Player memory allocation issue\", \"username\": \"ejtstoker\", \"post_text\": \"Folks:\\n\\nI had a problem first running the current version of HPCC (3.2.0.2) on\\nthe current version of VMware (3.1.4) onto 64-bit Windows 7 (64-bit\\nService Pack 1). The problem was a failure during\\n\\nChecking vami-sfcbd status: ............. failred, restarting vami-sfcbd.\\n\\nThe problem was corrected once I edited the memory allocation > than 1024 MB.\\n\\nThe problem did not occur when I installed these programs on Windows Vista 64-bit.\\n\\nThis is a heads up for others that may run into the same problem.\\n\\nEd\", \"post_time\": \"2011-10-04 21:28:01\" },\n\t{ \"post_id\": 470, \"topic_id\": 135, \"forum_id\": 14, \"post_subject\": \"Re: eclcc override default g++ version\", \"username\": \"gsmith\", \"post_text\": \"[quote="richardkchapman":1j9klyf7]You can control the path used to locate g++ using compilerPath= in the specs file (usually eclcc.ini) - the location [compilerPath]/bin/g++ is what will actually be used.\\n\\nI couldn't get the above to work (but didn't try too hard) - switching to "master" branch is probably the way to go (it was broke when I tried it last).\", \"post_time\": \"2011-10-17 14:17:56\" },\n\t{ \"post_id\": 468, \"topic_id\": 135, \"forum_id\": 14, \"post_subject\": \"Re: eclcc override default g++ version\", \"username\": \"richardkchapman\", \"post_text\": \"I would expect you'll get other issues if you try to run a DEB designed for an earlier version of ubuntu on Ubuntu 11.10 (the boost library version found by default has changed, for a start). The master branch on GitHub includes support for building an Ubuntu 11.10 system...\\n\\nHaving said that, I'm not sure that the issue you see here is caused by a compiler version mismatch (though I suppose it might be...)\\n\\nYou can control the path used to locate g++ using compilerPath= in the specs file (usually eclcc.ini) - the location [compilerPath]/bin/g++ is what will actually be used.\", \"post_time\": \"2011-10-17 13:13:15\" },\n\t{ \"post_id\": 467, \"topic_id\": 135, \"forum_id\": 14, \"post_subject\": \"eclcc override default g++ version\", \"username\": \"gsmith\", \"post_text\": \"How do I change which version eclcc will use (without changing for my entire OS)?\\n\\nBackground:\\nUsing Ubuntu 11.10 (default g++ is 4.6xxx)\\nHPCC-Platform, built using g++ 4.5xxx\\n\\nGordon.\\n\\nPS eclcc.log:\\n\\n00000000 2011-10-17 13:55:59 8328 8328 Adding library: eclrtl\\n00000001 2011-10-17 13:55:59 8328 8328 Adding library: a.out.res.o\\n00000002 2011-10-17 13:55:59 8328 8328 Compiling a.out\\n00000003 2011-10-17 13:55:59 8328 8328 Failed to compile a.out\\n00000004 2011-10-17 13:55:59 8328 8328 \\n---------- compiler output --------------\\na.out.o: In function `main':\\na.out.cpp:(.text+0x1c): undefined reference to `start_query(int, char const**)'\\na.out.o: In function `EclProcess::Link() const':\\na.out.cpp:(.text._ZNK10EclProcess4LinkEv[non-virtual thunk to EclProcess::Link() const]+0x1a): undefined reference to `RtlCInterface::Link() const'\\na.out.o: In function `EclProcess::Release() const':\\na.out.cpp:(.text._ZNK10EclProcess7ReleaseEv[non-virtual thunk to EclProcess::Release() const]+0x1a): undefined reference to `RtlCInterface::Release() const'\\ncollect2: ld returned 1 exit status\\n\\n--------- end compiler output -----------
\", \"post_time\": \"2011-10-17 12:59:38\" },\n\t{ \"post_id\": 2473, \"topic_id\": 166, \"forum_id\": 14, \"post_subject\": \"Re: Cannot access ECL watch page or the Configuration page\", \"username\": \"jeeves\", \"post_text\": \"ERR: Permission denied (publickey,gssapi-with-mic,password)."
\\n\\nI know this is an old post..\\nI faced this problem while installing on CentOS 6.3. It got resolved after the I did the following\\n\\n1. Remove the .ssh folder in the hpcc user's home directory\\n2. Recreate the directory manually and regenerate the keys using\\n sudo /opt/HPCCSystems/sbin/keygen.sh\\n\\n3. make sure that only the hpcc user has read or write access to .ssh folder\\n sudo -u hpcc chmod -R 700 ~hpcc/.ssh\\n\\n4. If #3 does not work try\\n sudo -u hpcc chmod -R 700 ~hpcc\\n\\nIf all these fail you will have to set OpenSSH logging level to DEBUG1 and look at the logs.\", \"post_time\": \"2012-10-07 18:31:43\" },\n\t{ \"post_id\": 672, \"topic_id\": 166, \"forum_id\": 14, \"post_subject\": \"Re: Cannot access ECL watch page or the Configuration page\", \"username\": \"jitendrakalyan\", \"post_text\": \"Hi, \\n\\nThanks for the reply Flavio. I figured out that the problem was with the firewall blocking these ports. I completed installation on 6 nodes. \\n\\nNow I have a different problem. May be this should go under a new topic heading, but I will post it here first. \\n\\nI uploaded a CSV file. I unable to spray the file.I think it to do with SSH keys, but I made sure of distributing the keys to all nodes as described in the installation manual. The DFUServer logs are as follows: \\n\\n\\n\\nError message on ECL watch page:\\n==================================\\n\\nSummaryMessage\\t:\\tFailed: Timeout waiting for slave x.x.x.30,x.x.x.28,x.x.x.29,x.x.x.31 to respond\\n\\n\\n\\nDFUserver(x.x.x.28) logs:\\n=============\\n\\n0000002D 2011-11-28 18:33:40 9209 11610 "Try to connect to slave x.x.x.29:6411"\\n0000002C 2011-11-28 18:33:40 9209 11610 "Start connect to correct slave ( 4)"\\nERR: Permission denied (publickey,gssapi-with-mic,password)."\\n0000002B 2011-11-28 18:33:40 9209 11610 "ssh result(255):\\n0000002A 2011-11-28 18:33:40 9209 11608 "Try to connect to slave x.x.x.30:6417"\\n00000029 2011-11-28 18:33:40 9209 11608 "Start connect to correct slave ( 3)"\\nERR: Permission denied (publickey,gssapi-with-mic,password)."\\n00000028 2011-11-28 18:33:40 9209 11608 "ssh result(255):\\n00000027 2011-11-28 18:33:40 9209 11611 "Try to connect to slave x.x.x.31:6412"\\n00000026 2011-11-28 18:33:40 9209 11611 "Start connect to correct slave ( 2)"\\nERR: Permission denied (publickey,gssapi-with-mic,password)."\\n00000025 2011-11-28 18:33:40 9209 11611 "ssh result(255):\\n00000024 2011-11-28 18:33:40 9209 11609 "Try to connect to slave x.x.x.28:6407"\\n00000023 2011-11-28 18:33:40 9209 11609 "Start connect to correct slave ( 1)"\\nERR: Permission denied (publickey,gssapi-with-mic,password)."\\n00000022 2011-11-28 18:33:40 9209 11609 "ssh result(255):\\n00000021 2011-11-28 18:33:40 9209 11611 "Start generate part x.x.x.31 [0x13520d80]"\\n0000001F 2011-11-28 18:33:40 9209 11611 "Transferring part x.x.x.31 [0x13520d80]"\\n00000020 2011-11-28 18:33:40 9209 11610 "Start generate part x.x.x.29 [0x1351bdc0]"\\n0000001E 2011-11-28 18:33:40 9209 11610 "Transferring part x.x.x.29 [0x1351bdc0]"\\n0000001D 2011-11-28 18:33:40 9209 11609 "Start generate part x.x.x.28 [0x1351bb50]"\\n0000001C 2011-11-28 18:33:40 9209 11608 "Start generate part x.x.x.30 [0x1351c000]"\\n0000001B 2011-11-28 18:33:40 9209 11609 "Transferring part x.x.x.28 [0x1351bb50]"\\n0000001A 2011-11-28 18:33:40 9209 11608 "Transferring part x.x.x.30 [0x1351c000]"\\n"\\n00000019 2011-11-28 18:33:40 9209 9216 "Begin to transfer parts (4 threads)\\n00000018 2011-11-28 18:33:40 9209 9216 "Calculate CRC = 1"\\n00000017 2011-11-28 18:33:40 9209 9216 "[3] Init 0[0]->0[0]"\\n00000016 2011-11-28 18:33:40 9209 9216 "[2] Init 0[0]->0[0]"\\n00000015 2011-11-28 18:33:40 9209 9216 "[1] Init 0[0]->0[0]"\\n00000014 2011-11-28 18:33:40 9209 9216 "[0] Init 0[0]->0[0]"\\n00000013 2011-11-28 18:33:40 9209 9216 "Progress:"\\n00000012 2011-11-28 18:33:40 9209 9216 "Partition /var/lib/HPCCSystems/dropzone/hpccdata.txt{0}[76441914 size 25480609]->//x.x.x.31:7100/var/lib/HPCCSystems/hpcc-data/thor/travelportdatacsv._4_of_4{3}[0 size 25480609]"\\n00000011 2011-11-28 18:33:40 9209 9216 "Partition /var/lib/HPCCSystems/dropzone/hpccdata.txt{0}[50961327 size 25480587]->//x.x.x.30:7100/var/lib/HPCCSystems/hpcc-data/thor/travelportdatacsv._3_of_4{2}[0 size 25480587]"\\n00000010 2011-11-28 18:33:40 9209 9216 "Partition /var/lib/HPCCSystems/dropzone/hpccdata.txt{0}[25480728 size 25480599]->//x.x.x.29:7100/var/lib/HPCCSystems/hpcc-data/thor/travelportdatacsv._2_of_4{1}[0 size 25480599]"\\n0000000F 2011-11-28 18:33:40 9209 9216 "Partition /var/lib/HPCCSystems/dropzone/hpccdata.txt{0}[0 size 25480728]->/var/lib/HPCCSystems/hpcc-data/thor/travelportdatacsv._1_of_4{0}[0 size 25480728]"\\n0000000E 2011-11-28 18:33:40 9209 9216 "Partition restored from recovery information"\\n0000000D 2011-11-28 18:33:40 9209 9216 "Finished gathering file sizes..."\\n0000000C 2011-11-28 18:33:40 9209 9216 "Start gathering file sizes..."\\n0000000B 2011-11-28 18:33:40 9209 9216 "Pull = 1 from recovery"\\n0000000A 2011-11-28 18:33:40 9209 9216 "Finished gathering file sizes..."\\n00000009 2011-11-28 18:33:40 9209 9216 "Gathering 1 file sizes on 1 threads"\\n00000008 2011-11-28 18:33:40 9209 9216 "Start gathering file sizes..."\\n00000007 2011-11-28 18:33:40 9209 9216 "Using transfer buffer size 65536"\\n00000006 2011-11-28 18:33:40 9209 9216 "DFS: import(hpccdata.txt,)"\\n00000005 2011-11-28 18:33:40 9209 9216 "DFU Server running job: D20111128-161542"\\n00000004 2011-11-28 17:36:47 9209 9209 "Creating sentinel file dfuserver.sentinel for rerun from script"\\n00000003 2011-11-28 17:36:47 9209 9209 "DFU monitor waiting on queue dfuserver_monitor_queue timeout 900000"\\n00000002 2011-11-28 17:36:47 9209 9209 "DFU server waiting on queue dfuserver_queue"\\n00000001 2011-11-28 17:36:47 9209 9209 "ftslave log dir set to /var/log/HPCCSystems/mydfuserver"\", \"post_time\": \"2011-11-30 13:33:15\" },\n\t{ \"post_id\": 649, \"topic_id\": 166, \"forum_id\": 14, \"post_subject\": \"Re: Cannot access ECL watch page or the Configuration page\", \"username\": \"flavio\", \"post_text\": \"Jitendra,\\n\\nI have a few questions: \\n\\nDid you start the hpcc services? (hpcc-init start)\\n\\nDo you have X installed too? If so, can you try to run a local web browser and connect to http://localhost:8010/? Does it open the ECLWatch web page? \\n\\nAlternatively you could try with lynx or links (text mode web browsers) or just telnet to localhost on port 8010.\\n\\nCan you run "netstat -tanp | grep 8010" and "iptables -L -n" and post what you get?\\n\\nThanks,\\n\\nFlavio\", \"post_time\": \"2011-11-29 14:28:38\" },\n\t{ \"post_id\": 626, \"topic_id\": 166, \"forum_id\": 14, \"post_subject\": \"Cannot access ECL watch page or the Configuration page\", \"username\": \"jitendrakalyan\", \"post_text\": \"Hi, \\n\\nI am trying to install the community edition on multiple nodes. I finished installing it on a single node. However, I have the following problems:\\n\\n1. Cannot open the ECL watch page.\\n2. Cannot open the configuration manager.\\n\\nI sense these are related. I have made sure I am using the correct ip address. Also pinged the IP and checked, the connection from my laptop seems to be fine. I am able to use putty, VNC and connect to the Linux server. However, can't access the pages mentioned above.\\n\\nAny help pointing me in the right direction is appreciated.\\n\\nAdditional details:\\nServer- Redhat Linux 2.6.18-238.12.1.el5\\nLet me know if more details are required.\\n\\nThank you.\\n\\n--\\nJitendra\", \"post_time\": \"2011-11-22 01:11:04\" },\n\t{ \"post_id\": 755, \"topic_id\": 193, \"forum_id\": 14, \"post_subject\": \"Re: environment.xml\", \"username\": \"jsmith\", \"post_text\": \"With that many cores/mem, it's probably best to run multiple slaves per physical node..\\nBut it would be matter of experimentation to see what drives the best throughput and will depend on disk speeds, contention and the nature of the jobs.\\n\\nNumber of slaves nodes for thor cluster:\\nIdeally you'd dedicate these 3 nodes to thorslaves, but obviously you need to run the other services, including thormaster, somewhere. If you have no 4th node to dedicate to the other services (which could be of considerably less power), then I suppose you should spread the other services out more or less evenly over the 3 nodes.\\n\\nNumber of slaves per node: I'd suggest trying with 12, each configured with 18G (leaving a healthy chunk for other services + OS).\\n\\nNumber of nodes for roxie cluster: If you're not intending to use, then you can leave it with default settings.\", \"post_time\": \"2011-12-22 16:24:55\" },\n\t{ \"post_id\": 754, \"topic_id\": 193, \"forum_id\": 14, \"post_subject\": \"environment.xml\", \"username\": \"onur\", \"post_text\": \"Hello,\\n\\nMy cluster has 3 boxes with 24core cpu 256G memory. What is the best configuration to achive best throughput.\\n\\nI am using configuration wizard.(Do I need to configure in advanced option?) Will you provide reasonable values for the following paramaters. No plan to use roxie cluster.\\n\\nNumber of nodes for roxie cluster: \\nNumber of slaves nodes for thor cluster:\\nNumber of thos slaves per node: \\n\\nRegards,\\nOnur\", \"post_time\": \"2011-12-20 15:32:50\" },\n\t{ \"post_id\": 785, \"topic_id\": 202, \"forum_id\": 14, \"post_subject\": \"Re: using dfuplus from windows app server\", \"username\": \"rtaylor\", \"post_text\": \"In order to use your local Windows box as a dropzone you would need a Windows version of dafileserv running on it, and there is only a Linux version currently being distributed. \\n\\nYou can interactively upload/download files (up to 2 Gb) from/to Windows machines, using a Linux dropzone, from the ECL Watch page.\", \"post_time\": \"2012-01-05 22:03:28\" },\n\t{ \"post_id\": 784, \"topic_id\": 202, \"forum_id\": 14, \"post_subject\": \"Re: using dfuplus from windows app server\", \"username\": \"bforeman\", \"post_text\": \"dfuplus was made for just this type of application. In the docs, for example, here is a fixed spray operation:\\n\\nC:\\\\>dfuplus action=spray srcip=10.150.50.14\\nsrcfile=c:\\\\import\\\\timezones.txt dstname=RTTEMP::timezones.txt\\ndstcluster=thor format=fixed recordsize=155\\n\\nAnd the DFUPLUS.INI file sppecifies where the "thor" is located:\\n\\nserver=http://10.150.50.12:8010\\nusername=rlor\\npassword=password\\noverwrite=1\\nreplicate=\\n\\nSo I think that you just need the real IP address instead of localhost, but what you are trying to do should work fine.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-01-05 16:05:53\" },\n\t{ \"post_id\": 782, \"topic_id\": 202, \"forum_id\": 14, \"post_subject\": \"using dfuplus from windows app server\", \"username\": \"aintnomyth\", \"post_text\": \"Hello,\\nI have a Windows box with only the ECL IDE installed, would it be possible to use dfuplus to spray files directly from my local machine to a remote cluster? If not, is there some sort of dafilesrv component I can install locally or would I need to install all the hpcc/thor "stuff" to make it work?\\n\\nFyi, I've tried using dfuplus with localhost but it can't seem to find my file in localhost:7100...my filename.\\n\\nThanks!\", \"post_time\": \"2012-01-05 14:17:20\" },\n\t{ \"post_id\": 820, \"topic_id\": 204, \"forum_id\": 14, \"post_subject\": \"Re: Segmentation fault\", \"username\": \"HPCC Staff\", \"post_text\": \"Yes, we are planning for another release by end of the month. Thanks again!\", \"post_time\": \"2012-01-12 19:59:29\" },\n\t{ \"post_id\": 816, \"topic_id\": 204, \"forum_id\": 14, \"post_subject\": \"Re: Segmentation fault\", \"username\": \"shriram.soni\", \"post_text\": \"Seems this issue is fixed in the code. And will be available in next binary release?\", \"post_time\": \"2012-01-12 13:20:12\" },\n\t{ \"post_id\": 801, \"topic_id\": 204, \"forum_id\": 14, \"post_subject\": \"Re: Segmentation fault\", \"username\": \"HPCC Staff\", \"post_text\": \"Thank you for reporting the error and attaching the accompanying log file. We are looking into this issue. \\n\\nTicket reference:\\nhttps://github.com/hpcc-systems/HPCC-Pl ... ssues/1198\", \"post_time\": \"2012-01-09 13:58:21\" },\n\t{ \"post_id\": 787, \"topic_id\": 204, \"forum_id\": 14, \"post_subject\": \"Segmentation fault\", \"username\": \"shriram.soni\", \"post_text\": \"The ECL watch stopped working and than we saw IDE also stopped. \\n\\nWe noticed that myEsp was not running on node. please see log file for the error mentioned.\", \"post_time\": \"2012-01-06 12:28:49\" },\n\t{ \"post_id\": 838, \"topic_id\": 213, \"forum_id\": 14, \"post_subject\": \"Re: Installation issue with windows7\", \"username\": \"bforeman\", \"post_text\": \"There is a couple of things to check.\\n\\nIs the VM Image running?\\n\\nDo you have a firewall installed that could be blocking those ports?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-01-17 16:13:14\" },\n\t{ \"post_id\": 835, \"topic_id\": 213, \"forum_id\": 14, \"post_subject\": \"Installation issue with windows7\", \"username\": \"selvaonline\", \"post_text\": \"Hi, \\n\\nI am trying to install the community edition on windows 7 machine. I finished installation. However, I have the following problems:\\n\\n1. Cannot open the ECL watch page.\\n2. Cannot open the configuration manager.\\n\\nhttps://192.168.184.128:5480\\nhttp://192.168.184.128:8010\\n\\nWhat will be the issue? Thanks a lot for your help in advance\\n\\nBest Regards,\\nSelva\", \"post_time\": \"2012-01-16 16:23:35\" },\n\t{ \"post_id\": 1131, \"topic_id\": 244, \"forum_id\": 14, \"post_subject\": \"Re: Building Community Edition on Windows\", \"username\": \"brian.diekelman@lnssi.com\", \"post_text\": \"Regarding the ICU issue mentioned before, the symbols could not be linked because of the /Zc:wchar_t- additional parameter being passed to the compiler.\\n\\nThis disables wchar_t being treated as a native type, and so is expanded into an unsigned short.\\n\\nIf you look at the exported symbols in icuuc.lib in a .lib built with VC2005+, the signature is UCharCharacter::UCharCharacterIterator(wchar_t const*,int)\\n\\nWith /Zc:wchar_t- passed in, the generated object files try to import UCharCharacter::UCharCharacterIterator(unsigned short const*,int)\\n\\ncmake_modules\\\\commonSetup.cmake:111 enables /Zc:wchar_t- if ARCH64BIT is 0... which breaks in the case of compiling a 32-bit build on a 64-bit system.\\n\\nCommenting out that line, regenerate build files with cmake, and rebuilding succeeds with the latest ICU (4.8.1) on Win7-64 compiling a 32-bit target.\", \"post_time\": \"2012-02-18 21:34:13\" },\n\t{ \"post_id\": 1063, \"topic_id\": 244, \"forum_id\": 14, \"post_subject\": \"Re: Building Community Edition on Windows\", \"username\": \"flavio\", \"post_text\": \"David,\\n\\nYou'll need to start the executables manually, as we don't have support to run them as Windows services.\\n\\nThe best order is:\\n\\ndali (daserver)\\ndfuserver (dfuserver)\\neclagent (eclagent)\\neclccserver (eclccserver)\\neclscheduler (eclscheduler)\\nesp (esp)\\nroxie (roxie)\\nsasha (sasha)\\nthor (thormaster, thorslave)\\n\\nAlthough we don't have windows scripts, you can use the linux versions as guidelines for additional information (for example, environmental variables that they expect).\\n\\nKeep in mind that you need to have a properly set environment.xml configuration file or some of these won't run correctly.\\n\\nPlease let me know your results.\\n\\nThanks,\\n\\nFlavio\", \"post_time\": \"2012-02-09 13:46:01\" },\n\t{ \"post_id\": 1044, \"topic_id\": 244, \"forum_id\": 14, \"post_subject\": \"Re: Building Community Edition on Windows\", \"username\": \"davids\", \"post_text\": \"OK.. I've found the time to install and I am having a problem .... how do I start the service? \\n\\nThe installer simply copies the files to a location on the disk but nothing is started. As far as I can tell no new Windows services have been configured. The only script files I can find in the installation are\\n\\nrun_keydiff\\nrun_keypatch\\nstart_eclagent\\n\\nand aside from all being bash shell scripts none seem suited to the task.\\n\\nFrom the Installing_and_RunningTheHPCCPlatform.pdf document there is an hpcc-init script for Linux but nothing for Windows\\n\\nDo I need to start the relevant exe's manually? If so what are they and what order should they be in?\\n\\nDavid\", \"post_time\": \"2012-02-07 14:31:33\" },\n\t{ \"post_id\": 1037, \"topic_id\": 244, \"forum_id\": 14, \"post_subject\": \"Re: Building Community Edition on Windows\", \"username\": \"davids\", \"post_text\": \"Turns out I can avoid that error by enabling zlib in the 3.7 build. I have successfully compiled it now. Next, run the installer!\\n\\nI've kicked off a download of 3.6.x and I will fall back on that if I encounter any more problems\\n\\nDavid\", \"post_time\": \"2012-02-06 16:37:22\" },\n\t{ \"post_id\": 1035, \"topic_id\": 244, \"forum_id\": 14, \"post_subject\": \"Re: Building Community Edition on Windows\", \"username\": \"gsmith\", \"post_text\": \"That looks like a trunk build issue. switch to candidate-3.6.x and see how you get on!\", \"post_time\": \"2012-02-06 16:02:53\" },\n\t{ \"post_id\": 1034, \"topic_id\": 244, \"forum_id\": 14, \"post_subject\": \"Re: Building Community Edition on Windows\", \"username\": \"davids\", \"post_text\": \"Hi\\n\\nUsing the nm tool, I inspected earlier versions of the ICU library and it turns out that the mangled names match those provide by ICU 3.4.1 \\nSo after correcting the ICU library I go pass that step and I am now encountering the following error:\\n\\nError\\t77\\terror C2065: 'ERRORID_ECLWATCH_TOPOLOGY' : undeclared identifier\\tc:\\\\hpcc\\\\esp\\\\services\\\\ws_topology\\\\ws_topologyService.cpp\\t486\\tws_topology\\n\\nSuggestions?\\n\\nDavid\", \"post_time\": \"2012-02-06 15:51:25\" },\n\t{ \"post_id\": 1029, \"topic_id\": 244, \"forum_id\": 14, \"post_subject\": \"Re: Building Community Edition on Windows\", \"username\": \"davids\", \"post_text\": \"Thanks\\n\\nI checked the linker input settings and it does include the ICU libraries as you listed.\\n\\nIt seems as if the underlying problem is the ICU library. I used the nm tool (through cygwin ) to list the symbols in icuuc.lib and none of the mangled names listed in the library match the ones expected by Visual Studio's linker.\\n\\n\\nDavid\", \"post_time\": \"2012-02-06 14:00:33\" },\n\t{ \"post_id\": 1023, \"topic_id\": 244, \"forum_id\": 14, \"post_subject\": \"Re: Building Community Edition on Windows\", \"username\": \"gsmith\", \"post_text\": \"(I am building on windows, but could not work out what version of ICU I have...)\\n\\nI was building 3.4.2, but just checked trunk and eclrtl built there fine (I am building on Win7/64, but targeting Win32 so should be no diff there).\\n\\nIn VS2008 can you check the linker input settings for eclrtl and see if it includes:\\nX:\\\\some_path\\\\icu\\\\lib\\\\icuuc.lib\\nX:\\\\some_path\\\\icu\\\\lib\\\\icuin.lib\\n\\nI suspect the first lib is missing in which case its a cmake setting(s) issue.\\n\\nIf you put all your third party libraries in one folder ("externals" or such like) and set cmake EXTERNALS_DIRECTORY to point to that folder, the cmake find modules will check there.\\n\\nMy "externals" folder contains (just to give you an idea of what folder names it is expecting - also note my list has extra folders for my own needs):\\nagg\\nbinutils\\nbison\\nboost\\ncppunit\\nexpat\\nfirebreath\\nfreetype2\\nfuse\\ngraphviz2\\nicu\\ninxight\\nmysql\\nopenldap\\nopenssh\\nopenssl\\nsubversion\\nsybase\\nwinldap\\nwtl\\nxalan\\nxml-security-c\\nzlib
\\n\\nGordon.\", \"post_time\": \"2012-02-06 12:06:54\" },\n\t{ \"post_id\": 1022, \"topic_id\": 244, \"forum_id\": 14, \"post_subject\": \"Re: Building Community Edition on Windows\", \"username\": \"ghalliday\", \"post_text\": \"I'll investigate the problem with it disabled.\", \"post_time\": \"2012-02-06 11:39:20\" },\n\t{ \"post_id\": 1021, \"topic_id\": 244, \"forum_id\": 14, \"post_subject\": \"Re: Building Community Edition on Windows\", \"username\": \"davids\", \"post_text\": \"Unfortunately Linux isn't an option for me, I am sure the set-up would have been much easier... \\n\\nMy cmake settings are the same as yours. I had another go at a build using 4.4.2 however I am still getting the same problems. What version of the trunk did you use ( if you can recall)? My build tag is 'community_3.7.0-1trunk'\\n\\nAnother thing, I am building the code on a 32bit Windows install. Would that make a difference? \\n\\nRegards\\nDavid\", \"post_time\": \"2012-02-06 11:30:56\" },\n\t{ \"post_id\": 1020, \"topic_id\": 244, \"forum_id\": 14, \"post_subject\": \"Re: Building Community Edition on Windows\", \"username\": \"gsmith\", \"post_text\": \"I know on Linux I was building against 4.4.2-2 (I assume somthing similar on windows...)\\n\\nFWIW my windows cmake settings are:\\nICU_INCLUDE_DIR: x:\\\\some_path\\\\icu\\\\include\\nICU_LIBRARIES: x:\\\\some_path\\\\icu\\\\lib\\\\icuuc.lib\", \"post_time\": \"2012-02-06 10:42:36\" },\n\t{ \"post_id\": 1019, \"topic_id\": 244, \"forum_id\": 14, \"post_subject\": \"Re: Building Community Edition on Windows\", \"username\": \"davids\", \"post_text\": \"Thanks!\\n\\nIt seems as if I am having a problem with ICU. When I enable ICU and try building the VS project I get quite a few linking errors below is one of them:\\n\\n\\nError\\t23\\terror LNK2019: unresolved external symbol "__declspec(dllimport) public: __thiscall icu_44::UCharCharacterIterator::UCharCharacterIterator(unsigned short const *,int)" (__imp_??0UCharCharacterIterator@icu_44@@QAE@PBGH@Z) referenced in function "void __cdecl escapeUnicode(unsigned int,unsigned short const *,class StringBuffer &)" (?escapeUnicode@@YAXIPBGAAVStringBuffer@@@Z)\\tC:\\\\hpcc\\\\builddir\\\\rtl\\\\eclrtl\\\\eclrtl.obj\\teclrtl\\n\\nwith ICU disabled I get several errors complaining that unicode/utf.h cannot be found, again I have pasted one of the errors below:\\n\\nError\\t104\\terror C1083: Cannot open include file: 'unicode/utf.h': No such file or directory\\tc:\\\\hpcc\\\\rtl\\\\eclrtl\\\\eclrtl.hpp\\t32\\t1\\tfileview2\\n\\nI attempted the compilation with both ICU4C 4.4.2 and ICU4C 4.8.1\\n\\nAny suggestions?\", \"post_time\": \"2012-02-06 10:17:58\" },\n\t{ \"post_id\": 989, \"topic_id\": 244, \"forum_id\": 14, \"post_subject\": \"Re: Building Community Edition on Windows\", \"username\": \"gsmith\", \"post_text\": \"[quote="davids":2rkypzl7]...I cant help but think the README file supplied with the source is missing an important step...\\n\\nIn my case (and on Linux) the missing step looked like this:\\n
sudo apt-get install g++ gcc make bison flex binutils-dev libldap2-dev libicu-dev libxalan110-dev zlib1g-dev libboost-regex-dev libssl-dev
\\nIn windows you have to forage for the libraries...\", \"post_time\": \"2012-02-02 17:13:50\" },\n\t{ \"post_id\": 988, \"topic_id\": 244, \"forum_id\": 14, \"post_subject\": \"Re: Building Community Edition on Windows\", \"username\": \"gsmith\", \"post_text\": \"I think you need to do somthing like: \\ncmake -DOPENLDAP_INCLUDE_DIR="D:/path/to/winldap/include" -DOPENLDAP_LIBRARIES="D:/path/to/winldap/lib/Wldap32.lib"
\\nICU Is the unicode support libraries (http://site.icu-project.org/download). If you don't use it you can set USE_ICU to false for cmake.\\n\\nHere are some more "USE_" options:\\n USE_OPENLDAP\\n USE_CPPUNIT\\n USE_ICU\\n USE_XALAN\\n USE_XERCES\\n USE_ZLIB\\n USE_BOOST_REGEX\\n USE_OPENSSL\\n USE_MYSQL\\n \\nOn my windows box I have (set to true):\\n USE_ICU\\n USE_OPENLDAP\\n USE_OPENSSL\\n USE_XALAN\", \"post_time\": \"2012-02-02 17:05:44\" },\n\t{ \"post_id\": 986, \"topic_id\": 244, \"forum_id\": 14, \"post_subject\": \"Re: Building Community Edition on Windows\", \"username\": \"davids\", \"post_text\": \"I am still getting the same error. Explicitly setting OPENLDAP_LIBRARIES and OPENLDAP_INCLUDE_DIR as system variables doesn't help either. As a test I tried disabling OpenLDAP by editing commonSetup.cmake by changing\\n\\noption(USE_OPENLDAP "Enable OpenLDAP support (requires OpenLDAP)" ON)\\nto\\noption(USE_OPENLDAP "Enable OpenLDAP support (requires OpenLDAP)" OFF)\\n\\nHowever, cmake falls over at the next step and complains about not finding ICU (see bottom of post) even after downloading and explicitly setting the system variables.\\n\\nI cant help but think the README file supplied with the source is missing an important step.\\n\\n=================================================================================\\n\\nC:\\\\hpcc\\\\builddir>cmake c:\\\\hpcc -G "Visual Studio 9 2008"\\n-- Check for working C compiler using: Visual Studio 9 2008\\n-- Check for working C compiler using: Visual Studio 9 2008 -- works\\n-- Detecting C compiler ABI info\\n-- Detecting C compiler ABI info - done\\n-- Check for working CXX compiler using: Visual Studio 9 2008\\n-- Check for working CXX compiler using: Visual Studio 9 2008 -- works\\n-- Detecting CXX compiler ABI info\\n-- Detecting CXX compiler ABI info - done\\n-- Making Release system\\n-- 64bit architecture is 0\\n-- Looking for include files CMAKE_HAVE_PTHREAD_H\\n-- Looking for include files CMAKE_HAVE_PTHREAD_H - not found.\\n-- Found Threads: TRUE\\n-- Could NOT find ICU (missing: ICU_LIBRARIES ICU_INCLUDE_DIR)\\nCMake Error at cmake_modules/commonSetup.cmake:276 (message):\\n ICU requested but package not found\\nCall Stack (most recent call first):\\n CMakeLists.txt:123 (include)\\n\\n\\n-- Configuring incomplete, errors occurred!\", \"post_time\": \"2012-02-02 16:51:24\" },\n\t{ \"post_id\": 982, \"topic_id\": 244, \"forum_id\": 14, \"post_subject\": \"Re: Building Community Edition on Windows\", \"username\": \"davids\", \"post_text\": \"Thanks,\\n\\nI've kicked off an installation of the Windows SDK. I will attempt a build when its complete and post the outcome.\", \"post_time\": \"2012-02-02 14:41:36\" },\n\t{ \"post_id\": 977, \"topic_id\": 244, \"forum_id\": 14, \"post_subject\": \"Re: Building Community Edition on Windows\", \"username\": \"gsmith\", \"post_text\": \"It is part of the Windows SDK I believe (http://social.msdn.microsoft.com/Forums ... 09ccbc75e9)\", \"post_time\": \"2012-02-02 11:54:19\" },\n\t{ \"post_id\": 976, \"topic_id\": 244, \"forum_id\": 14, \"post_subject\": \"Building Community Edition on Windows\", \"username\": \"davids\", \"post_text\": \"I am encountering an OpenLDAP dependency problem when attempting to create a Visual Studio 2008 solution using the most recent code.\\n\\nI have tried installing OpenLDAP via Cygwin but that doesnt fix the problem. \\nWhere can I find the relevant OpenLDAP library package for Windows?\\nCan HPCC be compiled without LDAP support?\\n\\nSee below for my error dump\\n\\nRegards\\nDavid\\n\\nC:\\\\hpcc\\\\builddir>cmake c:\\\\hpcc -G "Visual Studio 9 2008"\\n-- Check for working C compiler using: Visual Studio 9 2008\\n-- Check for working C compiler using: Visual Studio 9 2008 -- works\\n-- Detecting C compiler ABI info\\n-- Detecting C compiler ABI info - done\\n-- Check for working CXX compiler using: Visual Studio 9 2008\\n-- Check for working CXX compiler using: Visual Studio 9 2008 -- works\\n-- Detecting CXX compiler ABI info\\n-- Detecting CXX compiler ABI info - done\\n-- Making Release system\\n-- 64bit architecture is 0\\n-- Looking for include files CMAKE_HAVE_PTHREAD_H\\n-- Looking for include files CMAKE_HAVE_PTHREAD_H - not found.\\n-- Found Threads: TRUE\\n-- Could NOT find OpenLDAP (missing: OPENLDAP_LIBRARIES OPENLDAP_INCLUDE_DIR)\\nCMake Error at cmake_modules/commonSetup.cmake:256 (message):\\n OPENLDAP requested but package not found\\nCall Stack (most recent call first):\\n CMakeLists.txt:123 (include)\\n\\n\\n-- Configuring incomplete, errors occurred!\", \"post_time\": \"2012-02-02 11:15:04\" },\n\t{ \"post_id\": 1118, \"topic_id\": 261, \"forum_id\": 14, \"post_subject\": \"Re: Upgrade has not upgraded my 'lib_stringlib'\", \"username\": \"Allan\", \"post_text\": \"Hi Gordon,\\n\\nNow Have version 6.0.4 With 3.4.2 server and all is now working again \\n\\nThanks for the guidance.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-02-15 20:49:01\" },\n\t{ \"post_id\": 1106, \"topic_id\": 261, \"forum_id\": 14, \"post_subject\": \"Re: Upgrade has not upgraded my 'lib_stringlib'\", \"username\": \"gsmith\", \"post_text\": \"The simplest solution is to ensure you use the IDE that came from the ECL Watch page within that environment. IOW the client eclcc compiler version matches the server version.\\n\\nGordon.\", \"post_time\": \"2012-02-14 12:56:42\" },\n\t{ \"post_id\": 1104, \"topic_id\": 261, \"forum_id\": 14, \"post_subject\": \"Re: Upgrade has not upgraded my 'lib_stringlib'\", \"username\": \"Allan\", \"post_text\": \"Hi Anyone,\\n\\nAny update on this?\\n\\nI cannot use my environment till this is fixed!\\n\\nPlease help.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-02-14 12:10:37\" },\n\t{ \"post_id\": 1094, \"topic_id\": 261, \"forum_id\": 14, \"post_subject\": \"Re: Upgrade has not upgraded my 'lib_stringlib'\", \"username\": \"Allan\", \"post_text\": \"Forgot to mention - Yes the syntax check passes.\", \"post_time\": \"2012-02-11 19:06:12\" },\n\t{ \"post_id\": 1093, \"topic_id\": 261, \"forum_id\": 14, \"post_subject\": \"Re: Upgrade has not upgraded my 'lib_stringlib'\", \"username\": \"gsmith\", \"post_text\": \"Does the local compiler pass? If so it looks like a case of the 3.4.2 compiler (or its associated libraries) not working with a 3.2.2 server.\", \"post_time\": \"2012-02-11 17:06:18\" },\n\t{ \"post_id\": 1092, \"topic_id\": 261, \"forum_id\": 14, \"post_subject\": \"Upgrade has not upgraded my 'lib_stringlib'\", \"username\": \"Allan\", \"post_text\": \"Hi I've just upgraded my HPCC to: See attached image.\\n\\nNow code that used to work is failing.\\n\\nSee other attachment.\\n\\nIt seems the 'str.ecl' has been upgraded without the accompanying 'lib_stringlib' library.\\n\\nAny idea's?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-02-11 15:46:49\" },\n\t{ \"post_id\": 1199, \"topic_id\": 280, \"forum_id\": 14, \"post_subject\": \"Re: Does HPCC rely on Java\", \"username\": \"DSC\", \"post_text\": \"[quote="rtaylor":i257amv6]Who said anything about bi-directional??
\\n\\nI did. Right up there. Didn't you see it?
\\n\\nSeriously: Pull data from a relational database into ECL, Do Magic(tm), push data back into the database. Awesomeness, defined.\", \"post_time\": \"2012-02-24 19:35:04\" },\n\t{ \"post_id\": 1196, \"topic_id\": 280, \"forum_id\": 14, \"post_subject\": \"Re: Does HPCC rely on Java\", \"username\": \"rtaylor\", \"post_text\": \"Who said anything about bi-directional??
\", \"post_time\": \"2012-02-24 19:12:34\" },\n\t{ \"post_id\": 1194, \"topic_id\": 280, \"forum_id\": 14, \"post_subject\": \"Re: Does HPCC rely on Java\", \"username\": \"DSC\", \"post_text\": \"Not for what I'm doing.\\n\\nIn general, relational database interfaces would be a wonderful feature! The rest of the business tends to run on RDBMs, so a live bi-directional connection would enable HPCC to truly integrate with more work flows. That would be fantastic.\\n\\nCheers!\\n\\nDan\", \"post_time\": \"2012-02-24 18:28:28\" },\n\t{ \"post_id\": 1192, \"topic_id\": 280, \"forum_id\": 14, \"post_subject\": \"Re: Does HPCC rely on Java\", \"username\": \"rtaylor\", \"post_text\": \"Of course, there IS a JDBC driver in the works -- does that muddy the picture some?\", \"post_time\": \"2012-02-24 18:20:40\" },\n\t{ \"post_id\": 1190, \"topic_id\": 280, \"forum_id\": 14, \"post_subject\": \"Re: Does HPCC rely on Java\", \"username\": \"DSC\", \"post_text\": \"That is good news from a number of different points of view.\\n\\nThanks, Richard!\\n\\nDan\", \"post_time\": \"2012-02-24 18:17:18\" },\n\t{ \"post_id\": 1189, \"topic_id\": 280, \"forum_id\": 14, \"post_subject\": \"Re: Does HPCC rely on Java\", \"username\": \"rtaylor\", \"post_text\": \"Nope -- HPCC is DEcaffeinated.
\", \"post_time\": \"2012-02-24 18:15:42\" },\n\t{ \"post_id\": 1188, \"topic_id\": 280, \"forum_id\": 14, \"post_subject\": \"Does HPCC rely on Java\", \"username\": \"DSC\", \"post_text\": \"My test HPCC cluster is comprised entirely of RHEL5 nodes, all of which have Java 1.4.x installed. Does HPCC rely on Java in any way? Can I upgrade Java without breaking HPCC?\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-02-24 14:50:07\" },\n\t{ \"post_id\": 1313, \"topic_id\": 305, \"forum_id\": 14, \"post_subject\": \"Re: How does rebalancing work?\", \"username\": \"jeremy\", \"post_text\": \"Excellent! Thanks for all the info.\", \"post_time\": \"2012-03-09 22:58:28\" },\n\t{ \"post_id\": 1312, \"topic_id\": 305, \"forum_id\": 14, \"post_subject\": \"Re: How does rebalancing work?\", \"username\": \"rtaylor\", \"post_text\": \"Jeremy,\\n\\n
Per your previous replies, I'm unable to find any documentation on the "Backup Nodes" process or the "DISTRIBUTE" process. Any pointers?
I was just informed that Backup Nodes is new to the 3.6 open source version we just released, so docs have not yet been created for it (this is mostly a configuration issue that, in our pre-Open Source versions we would always pre-configure for the customer before delivering the system). I suggest you open a new thread on the topic and get some of the operations folks to explain what you need to do to set it up.\\n\\nDISTRIBUTE is an ECL function. Here's a link to the HTML version: http://hpccsystems.com/community/docs/e ... distribute\\n\\nOr you can find it in the ECLLanguageReference.PDF downloadable here: http://hpccsystems.com/community/docs/learning-ecl\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-03-09 22:26:45\" },\n\t{ \"post_id\": 1311, \"topic_id\": 305, \"forum_id\": 14, \"post_subject\": \"Re: How does rebalancing work?\", \"username\": \"jeremy\", \"post_text\": \"Hi Richard,\\nThanks, and understood. I was mainly trying to build out a comparison with other noSQL systems during our evaluation.\\nPer your previous replies, I'm unable to find any documentation on the "Backup Nodes" process or the "DISTRIBUTE" process. Any pointers?\\nThanks,\\nJeremy\", \"post_time\": \"2012-03-09 21:44:35\" },\n\t{ \"post_id\": 1310, \"topic_id\": 305, \"forum_id\": 14, \"post_subject\": \"Re: How does rebalancing work?\", \"username\": \"rtaylor\", \"post_text\": \"Jeremy,\\n\\nOne last question then, is the Backup Nodes process ( or the redundancy system in general ) able to take advantage of geo-location? For instance, can I configure HPCC to put the copies in separate datacenters?
Wouldn't that be nice? \\n\\nNo, the purpose of data replication is for ensuring performance, not Disaster Recovery. For DR stuff you would need to to the same thing we do -- set up a separate data center with all the mission critical stuff replicated and have a fail over plan in place (which really gets into operational stuff that I am unqualified to address, since I am firmly on the software/language side of things here). \\n\\nHowever, if you would like to have some consultation on how best to set up your shop, I do know the right people to speak to. Just shoot me an email to rtaylor@hpccsystems.com\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-03-09 20:25:27\" },\n\t{ \"post_id\": 1309, \"topic_id\": 305, \"forum_id\": 14, \"post_subject\": \"Re: How does rebalancing work?\", \"username\": \"jeremy\", \"post_text\": \"I see, thanks Richard!\\nOne last question then, is the Backup Nodes process ( or the redundancy system in general ) able to take advantage of geo-location? For instance, can I configure HPCC to put the copies in separate datacenters?\", \"post_time\": \"2012-03-09 20:15:28\" },\n\t{ \"post_id\": 1308, \"topic_id\": 305, \"forum_id\": 14, \"post_subject\": \"Re: How does rebalancing work?\", \"username\": \"rtaylor\", \"post_text\": \"Jeremy,\\n\\n
So that implies that if a host crashes and needs to be replaced, then even if we've chosen to have our data stored redundantly by HPCC, we have no guarantee that the lost copies of any data on that host will be written anywhere or moved to the new host automatically? Rather, we'd have to use the steps you've listed above?
No.\\n\\nEach individual file part for a logical dataset is always duplicated in another physical location (a separate machine). The system is set up so that if the node doesn't find the file part on its disk drive it automatically looks for it in the replicated location. AND, you can use RAID disk arrays on each node to make the small possibility of data loss infinitesimal, should you so choose. \\n\\nThis means that if a drive fails OR a single node fails, you don't lose any data. The only way possible to lose data is if both nodes with the same data die "at the same time" -- that is, before you get a chance to run your periodic Backup Nodes process that ensures all data is replicated in the proper locations. Backup Nodes is not an automatic process, but you can set it up to run periodically (we do it nightly).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-03-09 19:59:24\" },\n\t{ \"post_id\": 1307, \"topic_id\": 305, \"forum_id\": 14, \"post_subject\": \"Re: How does rebalancing work?\", \"username\": \"jeremy\", \"post_text\": \"Interesting,\\nSo that implies that if a host crashes and needs to be replaced, then even if we've chosen to have our data stored redundantly by HPCC, we have no guarantee that the lost copies of any data on that host will be written anywhere or moved to the new host automatically? Rather, we'd have to use the steps you've listed above?\\nThanks,\\nJeremy\", \"post_time\": \"2012-03-09 17:19:48\" },\n\t{ \"post_id\": 1305, \"topic_id\": 305, \"forum_id\": 14, \"post_subject\": \"Re: How does rebalancing work?\", \"username\": \"rtaylor\", \"post_text\": \"Jeremy,\\n\\n
I've not seen any documentation regarding how data is rebalanced when nodes are added or dropped from the cluster.
There is no automatic process for handling this. We've found re-sizing a cluster to be a rare occurrence. And, with an HPCC system, it is never done "on the fly" (unlike Hadoop).\\n\\nThere are a couple of ways of handling this:\\n\\n1) You can despray the data, reconfigure the cluster, then spray the data back on (overwriting the old files). \\n\\n2) You can ramp up the new sized cluster as a separate cluster within the same environment (yes, you can have as many Thor clusters as you want/need in a single environment) and simply read the data from the old cluster (running on the new one), DISTRIBUTE it and then write it to disk on the new cluster as a new dataset. \\n\\n3) If you are simply adding additional nodes, then you can leave the data in place, add the nodes and restart, then DISTRIBUTE the file (which will spread the data across all the nodes) and write it to disk again as a new dataset. \\n\\n4) If you are removing nodes, then you'll have to use either solution #1 or #2.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-03-09 16:17:30\" },\n\t{ \"post_id\": 1304, \"topic_id\": 305, \"forum_id\": 14, \"post_subject\": \"How does rebalancing work?\", \"username\": \"jeremy\", \"post_text\": \"I've not seen any documentation regarding how data is rebalanced when nodes are added or dropped from the cluster. Can you please describe? From what I can gather, adding or dropping nodes involves an edit of the environment.xml and system-wide restart. What happens then with respect to the underlying data?\\nThank you.\", \"post_time\": \"2012-03-09 15:20:31\" },\n\t{ \"post_id\": 1478, \"topic_id\": 333, \"forum_id\": 14, \"post_subject\": \"Re: install hpcc systems in linux cluster\", \"username\": \"sort\", \"post_text\": \"You can create as many of your own VMs and install hpcc to it (I personally run a 3 node VM using our ubuntu installation). You can NOT install multiple version of the HPCC VM and have them communicate with each ohter\", \"post_time\": \"2012-04-10 20:40:53\" },\n\t{ \"post_id\": 1476, \"topic_id\": 333, \"forum_id\": 14, \"post_subject\": \"install hpcc systems in linux cluster\", \"username\": \"szhou\", \"post_text\": \"Hi, \\n\\nI would like to have hpcc system/thor installed in a cluster with 4 nodes.\\nShall I use linux vm to install hpcc system?\\n\\nthanks,\\n\\nsjz\", \"post_time\": \"2012-04-10 20:16:26\" },\n\t{ \"post_id\": 1564, \"topic_id\": 344, \"forum_id\": 14, \"post_subject\": \"Re: Ubuntu12.04 Install\", \"username\": \"gsmith\", \"post_text\": \"Follow-up:\\n\\nTurns out this is an issue which affects packages created using cmake 2.8.7 – which happens to be the current version in Ubuntu 12.04 (this is true as of writing and one day before 12.04 is scheduled to go gold).\\n\\nThe platform download for Ubuntu 12.04 on this web site has been refreshed with a good package.\\n\\nAnyone wanting to create packages is advised to get a newer/older version of cmake (or build it from current sources).\\n\\nGordon.\", \"post_time\": \"2012-04-25 14:38:07\" },\n\t{ \"post_id\": 1547, \"topic_id\": 344, \"forum_id\": 14, \"post_subject\": \"Ubuntu12.04 Install\", \"username\": \"gsmith\", \"post_text\": \"I am getting the following error, trying to install on Ubuntu 12.04.\\n\\nI get the same error with a clean build + package from github sources OR via the downloaded package from this website.\\n\\nsudo dpkg --install hpccsystems-platform_community-3.6.2-2precise_amd64.deb \\n(Reading database ... 317410 files and directories currently installed.)\\nUnpacking hpccsystems-platform (from hpccsystems-platform_community-3.6.2-2precise_amd64.deb) ...\\ndpkg: error processing hpccsystems-platform_community-3.6.2-2precise_amd64.deb (--install):\\n corrupted filesystem tarfile - corrupted package archive\\ndpkg-deb: error: subprocess paste was killed by signal (Broken pipe)\\nErrors were encountered while processing:\\n hpccsystems-platform_community-3.6.2-2precise_amd64.deb
\\n\\nThoughts?\\n\\nGordon.\", \"post_time\": \"2012-04-22 08:19:06\" },\n\t{ \"post_id\": 1715, \"topic_id\": 376, \"forum_id\": 14, \"post_subject\": \"Re: Multiple drop zones?\", \"username\": \"bforeman\", \"post_text\": \"Hi Dan,\\n\\nExcellent news! \\n\\nThe bug that Chris is referring to only shows up on the Upload/Download Files section. The drop zones should show up fine in the Spray areas of the ECL Watch.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-06-05 13:02:39\" },\n\t{ \"post_id\": 1714, \"topic_id\": 376, \"forum_id\": 14, \"post_subject\": \"Re: Multiple drop zones?\", \"username\": \"DSC\", \"post_text\": \"OK, this is one of those times that you wish there was an undo command for real life.\\n\\nOne node -- the primary node I connect to with ECL Watch -- was using an outdated environment.xml file. All other nodes were using the updated version(s). This had to do with the fact that I was using a symlink to map /etc/HPPSystems/environment.xml -> source/environment.xml on the other nodes, but the primary node had an actual file instead.\\n\\nWhen I launched configmgr to make this dropzone change I noticed the output citing /etc/HPPSystems/source/environment.xml as the default environment file. So that was what I modified and pushed around the cluster (not remembering that that wasn't the file that was actually used at runtime). When I tested the environment.xml contents with md5sum, I tested the ones in the source/ subdirectory. Because all other nodes had a symlink to source/environment.xml, the config changes worked elsewhere. Some changes in the environment, like the replication option in Thor, were picked up and applied because the Thor master is running on a different node and it saw that change in its updated environment.xml file. That was why I asserted that other changes were being applied. This dropzone change affected the primary node, however, and that node did not have a symlink. It was reading the old file and obviously didn't see any of my changes.\\n\\nExecutive Summary: RTFM. Again.\\n\\nI haven't gone so far as to test Chris' problem of multiple dropzones not being selected properly, but I can at least confirm that all my dropzones are appearing in the popup lists.\\n\\nLive and learn.\\n\\nDan\", \"post_time\": \"2012-06-05 12:58:22\" },\n\t{ \"post_id\": 1713, \"topic_id\": 376, \"forum_id\": 14, \"post_subject\": \"Re: Multiple drop zones?\", \"username\": \"DSC\", \"post_text\": \"[quote="bforeman":27stnb4c]OK, the good news is that your environment.xml looks OK. So the HPCC team wants you to try this:\\n\\n1. Go to the etc folder and rename environment.xml to environment.bak\\n2. Go into the config manager and delete both drop zones.\\n3. Push out your changes and confirm that all drop zones are now removed.\\n4. Go back into the config manager and add your second drop zone first.\\n5. Push out the changes again and now see if you can see the drop zone.\\n6. Finally, go back to config mgr again and add the first drop zone and push your changes out again. Confirm that both drop zones are out there.\\n\\nDevelopment also asked how are you pushing the XML?\\n\\nI'm pushing the XML via the scp command line utility. Manually copying it from place to place, basically.\\n\\nI'll work on testing the configuration changes a little later today and get back to you with the results.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-06-05 12:19:17\" },\n\t{ \"post_id\": 1712, \"topic_id\": 376, \"forum_id\": 14, \"post_subject\": \"Re: Multiple drop zones?\", \"username\": \"DSC\", \"post_text\": \"[quote="clo":dgrk3szr]1. While investigating your environment.xml, I noticed the build was 3.2.2. I'm assuming that you first started with build 3.2.2 and used the configmgr to build your current environment.xml. Is this assumption correct?\\n\\nI honestly don't remember. I thought I had started with 3.4.0 because all of the installation RPMs I've used are still archived on the system, and the earliest is 3.4.0. I saw the reference to 3.2.2 as well and chalked it up to an old attribute value leftover by the configuration utility.\\n\\n[quote="clo":dgrk3szr]2. Have you upgraded from 3.2.2 to 3.4.0 and then to 3.6.2.2?\\n\\nI have done in-place upgrades through the cluster's life. No wipe-and-reinstalls.\\n\\n[quote="clo":dgrk3szr]3. Did you copy your /etc/HPCCSystems/environment.xml out to all your nodes?\\n a. Did all your environment.xml have appropriate permissions?\\n b. Has the esp been restarted since you added the second dropzone?
\\n\\nI did fix a permission problem on one node, but that didn't seem to make a difference with this problem. Permissions in general are not a problem because I've made other changes that I've seen take effect, so updates (in general) are being applied. I use scp to copy the file, BTW. And yes, I've restarted the entire cluster (a couple of times, actually).\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-06-05 12:16:36\" },\n\t{ \"post_id\": 1711, \"topic_id\": 376, \"forum_id\": 14, \"post_subject\": \"Re: Multiple drop zones?\", \"username\": \"clo\", \"post_text\": \"Dan,\\n\\nHi Dan. I was wondering if you've made any progress with your system. I downloaded the xml that you provided and did a couple things with it for testing on my system.\\n1. Installing with your environment.xml\\n
\\n\\n\\nSome questions for you.\\n1. While investigating your environment.xml, I noticed the build was 3.2.2. I'm assuming that you first started with build 3.2.2 and used the configmgr to build your current environment.xml. Is this assumption correct?\\n\\n2. Have you upgraded from 3.2.2 to 3.4.0 and then to 3.6.2.2?\\n\\n3. Did you copy your /etc/HPCCSystems/environment.xml out to all your nodes?\\n a. I changed the IP of the nodes in your environment.xml and used it as the configuration on my system. \\n b. I had clean nodes and installed 3.6.2.2. \\n c. I utilized the install-cluster.sh script that's in our documents to push the system out to all the nodes in my cluster and then started all components. \\n d. I went to verify that all components were up and running.\\n e. I noticed that I had the same error that I ran into before: https://github.com/hpcc-systems/HPCC-Pl ... ssues/2460\\n f. I did still see the second dropzone as being available from the Spray Fixed section.
a. Did all your environment.xml have appropriate permissions?\\n b. Has the esp been restarted since you added the second dropzone?
\\n\\n\\nChris\", \"post_time\": \"2012-06-04 20:41:57\" },\n\t{ \"post_id\": 1710, \"topic_id\": 376, \"forum_id\": 14, \"post_subject\": \"Re: Multiple drop zones?\", \"username\": \"bforeman\", \"post_text\": \"OK, the good news is that your environment.xml looks OK. So the HPCC team wants you to try this:\\n\\n1. Go to the etc folder and rename environment.xml to environment.bak\\n2. Go into the config manager and delete both drop zones.\\n3. Push out your changes and confirm that all drop zones are now removed.\\n4. Go back into the config manager and add your second drop zone first.\\n5. Push out the changes again and now see if you can see the drop zone.\\n6. Finally, go back to config mgr again and add the first drop zone and push your changes out again. Confirm that both drop zones are out there.\\n\\nDevelopment also asked how are you pushing the XML?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-06-04 19:14:01\" },\n\t{ \"post_id\": 1707, \"topic_id\": 376, \"forum_id\": 14, \"post_subject\": \"Re: Multiple drop zones?\", \"username\": \"DSC\", \"post_text\": \"[quote="bforeman":3o814dy4]Does the directory exists on the second landing zone?\\n\\ncomputer="node003"\\ndirectory="/var/lib/HPCCSystems/mydropzone"\\nname="mydropzone"\\n\\nv0gnsb003:/var/lib/HPCCSystems: ll\\ntotal 48\\ndrwxr-xr-x 11 hpcc hpcc 4096 Nov 10 2011 .\\ndrwxr-xr-x 25 root root 4096 Nov 7 2011 ..\\ndrwxr-xr-x 3 hpcc hpcc 4096 Dec 5 09:01 dllserver\\ndrwxrwxrwx 2 hpcc hpcc 4096 Jun 1 14:51 dropzone\\ndrwxrwxr-x 4 hpcc hpcc 4096 Jan 25 12:45 hpcc-data\\ndrwxr-xr-x 3 hpcc hpcc 4096 Nov 10 2011 hpcc-mirror\\ndrwxr-xr-x 2 hpcc hpcc 4096 Jun 4 09:56 mydafilesrv\\ndrwxr-xr-x 4 hpcc hpcc 4096 Jun 4 09:56 myeclagent\\ndrwxr-xr-x 2 hpcc hpcc 4096 Jun 4 09:56 myroxie\\ndrwxr-xr-x 3 hpcc hpcc 4096 Jun 4 11:55 mythor\\ndrwxr-xr-x 4 hpcc hpcc 4096 Nov 7 2011 queries
\\n\\n[quote="bforeman":3o814dy4]\\ncomputer="node002"\\n/var/lib/HPCCSystems/dropzone\\nname="dropzone_node2"\\n\\nv0gnsb002:/var/lib/HPCCSystems: ll\\ntotal 48\\ndrwxr-xr-x 11 hpcc hpcc 4096 Jun 1 12:35 .\\ndrwxr-xr-x 25 root root 4096 Nov 7 2011 ..\\ndrwxrwxrwx 2 hpcc hpcc 4096 Jun 1 13:50 dropzone\\ndrwxr-xr-x 4 hpcc hpcc 4096 Jan 25 12:55 hpcc-data\\ndrwxr-xr-x 2 hpcc hpcc 4096 Jun 4 10:08 mydafilesrv\\ndrwxr-xr-x 2 hpcc hpcc 4096 Jun 4 10:08 mydfuserver\\ndrwxrwxrwx 2 hpcc hpcc 4096 Jun 1 12:35 mydropzone\\ndrwxr-xr-x 2 hpcc hpcc 4096 Jun 4 10:08 myroxie\\ndrwxr-xr-x 2 hpcc hpcc 4096 Jun 4 10:08 mysasha\\ndrwxr-xr-x 3 hpcc hpcc 4096 Jun 4 12:05 mythor\\ndrwxr-xr-x 4 hpcc hpcc 4096 Nov 7 2011 queries
\\n\\nIIRC, I manually created the mydropzone directory and I believe the system created dropzone. Or maybe the other way around. I don't think I created both, but I could very well be wrong.\\n\\nQuestion: Is the 'name' attribute value used only for display? That was my assumption. Perhaps that's where I'm going wrong.\\n\\nReminder: v0gnsb003 (node003) was the original, and I'm trying to add v0gnsb002 (node002).\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-06-04 17:52:21\" },\n\t{ \"post_id\": 1706, \"topic_id\": 376, \"forum_id\": 14, \"post_subject\": \"Re: Multiple drop zones?\", \"username\": \"bforeman\", \"post_text\": \"Does the directory exists on the second landing zone?\\n\\ncomputer="node003"\\ndirectory="/var/lib/HPCCSystems/mydropzone"\\nname="mydropzone"\\n\\ncomputer="node002"\\n/var/lib/HPCCSystems/dropzone\\nname="dropzone_node2"\\n\\n\\nBob\", \"post_time\": \"2012-06-04 17:38:53\" },\n\t{ \"post_id\": 1705, \"topic_id\": 376, \"forum_id\": 14, \"post_subject\": \"Re: Multiple drop zones?\", \"username\": \"DSC\", \"post_text\": \"My newly-added dropzone appears nowhere except in the configuration manager's screens. It doesn't appear anywhere in ECL Watch. I think I'm probably dealing with a simpler problem -- boneheaded configuration changes, most likely. My problem could be an artifact of upgrading from 3.4.0, though.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-06-04 17:32:37\" },\n\t{ \"post_id\": 1704, \"topic_id\": 376, \"forum_id\": 14, \"post_subject\": \"Re: Multiple drop zones?\", \"username\": \"clo\", \"post_text\": \"Hi,\\n\\nI was performing my own tests with the 3.6.2.2 build. I created a 4node environment through configmgr's wizard that had 1 support node and 3 thor slaves. By default, my first dropzone was on node001 so I decided to add a second dropzone on node002. \\n\\nI then started up my system and immediately navigated to the Upload/Download file section on EclWatch. I noticed that the second dropzone appeared on the list of dropzones, but I wasn't actually able to select it. If I selected it, EclWatch would automatically reselect the default dropzone. However, if I navigate to the Spray Fixed section, the second dropzone was fully useable. I've opened a github issue already. https://github.com/hpcc-systems/HPCC-Pl ... ssues/2460 \\n\\nI'm wondering if you noticed something similar with your system.\\n\\nChris\", \"post_time\": \"2012-06-04 17:14:24\" },\n\t{ \"post_id\": 1702, \"topic_id\": 376, \"forum_id\": 14, \"post_subject\": \"Re: Multiple drop zones?\", \"username\": \"DSC\", \"post_text\": \"Ha! My check did not include the MD5 calculation. I'm a dummy.\\n\\nOne node had incorrect permissions on the file and therefore did not get the update. However, I still have only one drop zone in the popup list after correcting this error (change permissions, recopy the file, restart the cluster). The MD5 signature is a7415eb650906f768246055c09f078eb, BTW.\\n\\nMy environment is enclosed. Any insight would be appreciated.\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-06-04 15:13:04\" },\n\t{ \"post_id\": 1701, \"topic_id\": 376, \"forum_id\": 14, \"post_subject\": \"Re: Multiple drop zones?\", \"username\": \"bforeman\", \"post_text\": \"Hi Dan,\\n\\nI need you to do two things:\\n\\n1. Verify again that you are pushing the environment.xml to all nodes and verify using the MD5sum.\\n\\n2. Please post your environment.xml file so we can review it.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-06-04 14:48:56\" },\n\t{ \"post_id\": 1700, \"topic_id\": 376, \"forum_id\": 14, \"post_subject\": \"Re: Multiple drop zones?\", \"username\": \"DSC\", \"post_text\": \"Hi Bob,\\n\\nI've gone over the configuration and I don't see the problem.\\n\\nI have a four-cluster setup that was configured with default settings from version 3.4.0 or thereabouts (it's now running 3.6.2-2). That default configuration placed the Thor master on node002 and the drop zone on node003. I'm temporarily running short on disk space, and incoming files in the drop zone are take valuable space before they're sprayed into the cluster. Since space on node002 is underutilized, because it's the master, I wanted to add a drop zone there.\\n\\nSo: According to the configuration dafilesrv is running on all four nodes (this was from the default configuration). I added the second drop zone and selected node002 as the computer, and gave it a different name. Since the node was already part of the cluster, dafilesrv is already installed (and running, I checked), and the environment file is correctly copied (I checked) I would think that would be all I needed to do. What am I missing?\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-06-04 12:33:35\" },\n\t{ \"post_id\": 1697, \"topic_id\": 376, \"forum_id\": 14, \"post_subject\": \"Re: Multiple drop zones?\", \"username\": \"bforeman\", \"post_text\": \"From our HPCC team:\\n\\nTo add a new landing zone..\\n\\nAny machine designated as a landing zone needs dafilesrv running on it.\\n\\n1.\\tInstall the hpcc rpm to get it.\\n\\nAdd it to the environment\\n\\n1.\\tAdd the IP to the hardware section\\n2.\\tDefine landing zone in the environment section( select freshly added IP from the dropdown)\\n3.\\tPush Environment.xml to all nodes\\n4.\\tRestart ALL components not just the cluster\\n\\nProtip: validate the environment.xml matches the one in source directory\\n\\nmd5sum /etc/HPCCSystems/environment.xml /etc/HPCCSystems/source/environment.xml\\n\\nHTH,\\n\\nBob\", \"post_time\": \"2012-06-01 19:17:03\" },\n\t{ \"post_id\": 1696, \"topic_id\": 376, \"forum_id\": 14, \"post_subject\": \"Multiple drop zones?\", \"username\": \"DSC\", \"post_text\": \"I went into my four-node cluster's configuration and added another drop zone (then copied the environment, restarted the cluster, etc.). The new zone fails to appear in the popup list when attempting to spray files into the cluster. Is there an additional step to making the new drop zone active?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-06-01 17:59:36\" },\n\t{ \"post_id\": 1746, \"topic_id\": 384, \"forum_id\": 14, \"post_subject\": \"Re: Out of threads...\", \"username\": \"DSC\", \"post_text\": \"[quote="bforeman":34a4lt1c]Will let you know Dan, we're reviewing your log right now...\\n\\nBob\\n\\nThanks, Bob.\\n\\nI just tested the reconfigured host with two CPUs and there has been no change.\\n\\nDan\", \"post_time\": \"2012-06-08 17:07:42\" },\n\t{ \"post_id\": 1745, \"topic_id\": 384, \"forum_id\": 14, \"post_subject\": \"Re: Out of threads...\", \"username\": \"bforeman\", \"post_text\": \"Will let you know Dan, we're reviewing your log right now...\\n\\nBob\", \"post_time\": \"2012-06-08 16:16:48\" },\n\t{ \"post_id\": 1744, \"topic_id\": 384, \"forum_id\": 14, \"post_subject\": \"Re: Out of threads...\", \"username\": \"DSC\", \"post_text\": \"[quote="bforeman":6kzqerjo]maxActivityCores is used by activities that sort only at the moment.\\nIf 0 (the default), it will use all cores available. \\nUnless you has some exotic hardware, it is unlikely that you have that many cores (e.g. <32) And that's not many threads.\\n\\nIt was a pure shot in the dark. I'm glad my expectations were low.\\n\\n[quote="bforeman":6kzqerjo]You may (for some reason) have a v. low max pthread count perhaps.\\nTo find out, on the slave that is logging these errors, use:\\n\\ncat /proc/sys/kernel/threads-max\\n\\n~: sudo cat /proc/sys/kernel/threads-max\\n81920
\\n\\n[quote="bforeman":6kzqerjo]If the hardware is the same on the 1 node system, it shouldn't be using more threads that the 4-way.\\n\\nFor sort it won't make any difference, the only thing that counts is # of cores.\\nIn fact for other things, a n-way system should use more threads than a 1-way.\\n\\nAre there any hardware and OS differences between your 4-way and the 1-way?\\n\\nAll of the nodes are actually virtual hosts running RHEL5. The blades running the hosts are virtually identical (tiny version mismatches abound, but nothing major so we can migrate hosts easily). All were imaged from the same base, and I installed HPCC and dependencies via RPMs. One difference is that my original cluster was originally v3.4.0 and I've upgraded it with the new releases, while the new node started out as v3.6.2.\\n\\nHa. I just found out from our Systems group that the new node is 4GB of RAM vs. 6GB in the originals and it was configured with only one CPU vs. two. I've asked for a reconfiguration of the CPUs. Would that make a real difference, here? I know that affects the total number of available cores, but shouldn't that something the code adapts to?\\n\\nDan\", \"post_time\": \"2012-06-08 15:48:16\" },\n\t{ \"post_id\": 1742, \"topic_id\": 384, \"forum_id\": 14, \"post_subject\": \"Re: Out of threads...\", \"username\": \"bforeman\", \"post_text\": \"Dan, it still works on HTHOR right? So a 1-way THOR and a 1-way HTHOR are equivalent.\\n\\nDevelopment also comments:\\n\\nmaxActivityCores is used by activities that sort only at the moment.\\nIf 0 (the default), it will use all cores available. \\nUnless you has some exotic hardware, it is unlikely that you have that many cores (e.g. <32) And that's not many threads.\\n\\nYou may (for some reason) have a v. low max pthread count perhaps.\\nTo find out, on the slave that is logging these errors, use:\\n\\ncat /proc/sys/kernel/threads-max\\n\\nIf the hardware is the same on the 1 node system, it shouldn't be using more threads that the 4-way.\\n\\nFor sort it won't make any difference, the only thing that counts is # of cores.\\nIn fact for other things, a n-way system should use more threads than a 1-way.\\n\\nAre there any hardware and OS differences between your 4-way and the 1-way?\\n\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-06-08 15:30:42\" },\n\t{ \"post_id\": 1741, \"topic_id\": 384, \"forum_id\": 14, \"post_subject\": \"Re: Out of threads...\", \"username\": \"DSC\", \"post_text\": \"[quote="bforeman":18u093r3]Good catch...I will forward the log and ask development about that setting.\\n\\nNot so fast. Of course things started failing again, with the same error, right after I posted that last message. I'm back to square one.\\n\\nDan\", \"post_time\": \"2012-06-08 14:27:20\" },\n\t{ \"post_id\": 1740, \"topic_id\": 384, \"forum_id\": 14, \"post_subject\": \"Re: Out of threads...\", \"username\": \"bforeman\", \"post_text\": \"Hi Dan,\\n\\nGood catch...I will forward the log and ask development about that setting.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-06-08 14:24:36\" },\n\t{ \"post_id\": 1739, \"topic_id\": 384, \"forum_id\": 14, \"post_subject\": \"Re: Out of threads...\", \"username\": \"DSC\", \"post_text\": \"Update: I reset my thor slave count to 1 and noticed a thor option named 'maxActivityCores' with a default value of 0 (unlimited). Since the pthread error seemed to be related, I set that value to 2 and ran some tests on thor. They all succeeded, and quite quickly, and all without any pthread_create errors.\\n\\nWas this change valid? Or is it just coincidence? If it's valid, what would an appropriate value for that option be?\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-06-08 14:20:58\" },\n\t{ \"post_id\": 1738, \"topic_id\": 384, \"forum_id\": 14, \"post_subject\": \"Re: Out of threads...\", \"username\": \"DSC\", \"post_text\": \"[quote="bforeman":37o34cv6]Dan, try publishing it to HTHOR. Does that make a difference?\\n\\nIt seems to. At least, it hasn't failed in the five trials I just ran. Where are the log files for hthor jobs? These seem to be rather more responsive, which makes me think that it's not running into the pthread_create EAGAIN issue, but I'd like to verify that.\\n\\nFWIW, I'm posting a thor log excerpt with what appears to be a crash. It includes the pthread_create problem as well. In this particular attempt, I changed the config to try two thor slaves on this one node just to see if anything changed. It did, but not in a good way.\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-06-08 14:10:00\" },\n\t{ \"post_id\": 1737, \"topic_id\": 384, \"forum_id\": 14, \"post_subject\": \"Re: Out of threads...\", \"username\": \"bforeman\", \"post_text\": \"Dan, try publishing it to HTHOR. Does that make a difference?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-06-08 13:57:44\" },\n\t{ \"post_id\": 1736, \"topic_id\": 384, \"forum_id\": 14, \"post_subject\": \"Re: Out of threads...\", \"username\": \"DSC\", \"post_text\": \"Update: This appears to be an intermittent problem. The job does sometimes complete, though the 'Out of threads, retrying' error continues to appear (sometimes only once, oftentimes multiple occurrences grouped together).\\n\\nDan\", \"post_time\": \"2012-06-08 13:50:55\" },\n\t{ \"post_id\": 1732, \"topic_id\": 384, \"forum_id\": 14, \"post_subject\": \"Out of threads...\", \"username\": \"DSC\", \"post_text\": \"I setup a new single-node system in order to offload someone else's work from my own cluster. The setup seemed to go well until I ported the working code from my cluster to that new node. Now, instead of actually working, it fails with a series of "pthread_create(11): Out of threads, retrying..." errors in the log file.\\n\\nBy "porting" I mean that I recompiled the source on the new node and published it as a Thor query. Again, this works fine on my four-node cluster.\\n\\nWhat should I be looking for in trying to resolve this problem? What can I check? There is nothing obviously different between this new node and any node in my existing cluster, other than a smaller amount of available disk space.\\n\\nAny hints would be appreciated!\\n\\nDan\", \"post_time\": \"2012-06-08 13:06:39\" },\n\t{ \"post_id\": 1817, \"topic_id\": 402, \"forum_id\": 14, \"post_subject\": \"Re: Download packages for 3.8.04rc are broken\", \"username\": \"sort\", \"post_text\": \"This issue has been resolved (and github issue closed). Thank you for bringing this to our attention. Please retry.\", \"post_time\": \"2012-06-20 19:49:20\" },\n\t{ \"post_id\": 1814, \"topic_id\": 402, \"forum_id\": 14, \"post_subject\": \"Download packages for 3.8.04rc are broken\", \"username\": \"cagatayk\", \"post_text\": \"I've created an issue in Github:\\n\\nhttps://github.com/hpcc-systems/HPCC-Pl ... ssues/2615\\n\\nIs anyone else seeing this?\", \"post_time\": \"2012-06-20 14:51:53\" },\n\t{ \"post_id\": 1826, \"topic_id\": 404, \"forum_id\": 14, \"post_subject\": \"Re: VM - single node? HPCC install troubles on CENTOS\", \"username\": \"sort\", \"post_text\": \"Our VM image is a single node test system. If you create your own VM images and follow a standard install you can run the platform the same as it would on physical hardware.\\n\\nAs to your issues with dependencies, there is a requirement on the EPEL yum repository (http://fedoraproject.org/wiki/EPEL). Once this repository is installed, yum localinstall <package> (where <package> is the name of our installation file) will be able to download the required dependencies needed to install the platform.\", \"post_time\": \"2012-06-21 19:17:08\" },\n\t{ \"post_id\": 1823, \"topic_id\": 404, \"forum_id\": 14, \"post_subject\": \"VM - single node? HPCC install troubles on CENTOS\", \"username\": \"michael-mason\", \"post_text\": \"Hi, I just wanted to confirm that the VM install only supports single-node systems. Is this correct? \\n\\nWe tried to install the HPCC software on one of our linux nodes (CENTOS 5.6), but we have a great deal of trouble fulfilling the dependencies for the .rpm. Is this normal? Is there an HPCC .rpm that has all of the deps? Or, is there a yum repository that has HPCC install package so that yum and take care of the deps? I'm not sure if the isntall troubles that we're having is typical, or perhaps our OS install wasn't complete (i.e., not all of the usual packages were installed when the sysadmin installed the OS)?\\n\\nThanks guys!\", \"post_time\": \"2012-06-21 17:34:25\" },\n\t{ \"post_id\": 1859, \"topic_id\": 409, \"forum_id\": 14, \"post_subject\": \"Re: ARM processor version\", \"username\": \"HPCC Staff\", \"post_text\": \"Hi Allan, \\nWe have entertained this idea in the past, but currently do not have any resources assigned to the effort.\\n\\nWhile the code is mostly portable, there are probably some areas that would require some retooling.\\n\\nWe would gladly accept code contributions if there is enough interest in pursuing this.\\n\\nThank you!\", \"post_time\": \"2012-06-27 20:30:03\" },\n\t{ \"post_id\": 1845, \"topic_id\": 409, \"forum_id\": 14, \"post_subject\": \"ARM processor version\", \"username\": \"allanhagan\", \"post_text\": \"Are there any plans to produce an ARM version rather than just intel/AMD???\\n\\nAllan\", \"post_time\": \"2012-06-27 13:54:05\" },\n\t{ \"post_id\": 2037, \"topic_id\": 412, \"forum_id\": 14, \"post_subject\": \"Re: Queues in OSS\", \"username\": \"gsmith\", \"post_text\": \"In general no. But the namespace name did change...\\n\\nGordon.\", \"post_time\": \"2012-07-20 07:05:05\" },\n\t{ \"post_id\": 1876, \"topic_id\": 412, \"forum_id\": 14, \"post_subject\": \"Re: Queues in OSS\", \"username\": \"Monosij.D-R\", \"post_text\": \"[quote="sort":3csybb1y]What tool(s) are you using that are asking these questions.\\n\\nI'm using a Python library that sends SOAP calls directly to the ESP, so that ECL jobs can be composed on the fly and run from scripts. (written based off the equivalent Perl code). If you wish, I can send you a copy.\\n\\nThe library requires the queue name since the current SOAP header definitions require it. Have the SOAP headers changed for the OSS version?\", \"post_time\": \"2012-06-29 19:46:46\" },\n\t{ \"post_id\": 1872, \"topic_id\": 412, \"forum_id\": 14, \"post_subject\": \"Re: Queues in OSS\", \"username\": \"sort\", \"post_text\": \"In the OSS version, queries are submitted to target clusters (roxie, thor) and not to queues. When an cluster is set up, the topology section associates components like thor and roxie with eclccserver, eclagent, etc.\\n\\nWhat tool(s) are you using that are asking these questions.\", \"post_time\": \"2012-06-29 19:05:39\" },\n\t{ \"post_id\": 1866, \"topic_id\": 412, \"forum_id\": 14, \"post_subject\": \"Queues in OSS\", \"username\": \"Monosij.D-R\", \"post_text\": \"Hello -\\n\\nAm doing some testing on the OSS version (3.6 / 3.8) and am trying to post from cmd line or from Python.\\n----------\\nSo it needs the following information:\\nserver=x.x.x.x\\ncluster=?\\nqueue=?\\nowner=my username\\npassword=my password\\n----------\\nIn the OSS version do not see the Queue name in either the ECL IDE or when in the ECL Watch page on the server.\\n\\nFor people internally - servers are 10.194.17.1 (3.6) and 10.194.11.1 (3.8).\\n\\nUnlike the non-OSS servers, in ECL IDE the Clusters are listed under the Queues.\\n\\nIn the OSS version only the Clusters are listed; hthor, thor, roxie.\\n----------\\nSo wanted to check how to submit a job to the OSS clusters w/o a Queue name. Unless I am missing something.\\n\\nThanks.\\n\\nMonosij\", \"post_time\": \"2012-06-29 15:26:09\" },\n\t{ \"post_id\": 1938, \"topic_id\": 421, \"forum_id\": 14, \"post_subject\": \"Re: how many harddisks for a node is recommended in hpcc\", \"username\": \"jonburger\", \"post_text\": \"1. In our hadoop exp environment, every node has 8 or more hard disks for high IO bandwidth and no raid needed . How to config these nodes in hpcc to reach more io performance and less nodes to meet storage capacity?\\n\\n[color=#BF0000:1lsj3sug]> If you are optimizing for Thor, you need to concern yourself with high sequential read/write performance as it differs from the Hadoop DFS model. Your "optimum" configuration would be raid 0, but a RAID 5/6 will provide an additional layer of redundancy that you will find helpful in Thor. If you are optimizing for a Roxie cluster, random seek performance is your target. Therefore, a raid 0 (Roxie is redundant itself) is your best option. Remember, you are taking advantage of data locality in HPCC, so by it's nature it is going to perform better on IO as compared to HDFS assuming your network bandwidth is less than your sequential disk IO.\\n\\n2. Does hpcc compress target data on nodes ?\\n\\n[color=#BF0000:1lsj3sug]> Yes, you have the option to either compress or not, and its configurable on a file by file basis.\", \"post_time\": \"2012-07-11 13:49:19\" },\n\t{ \"post_id\": 1905, \"topic_id\": 421, \"forum_id\": 14, \"post_subject\": \"how many harddisks for a node is recommended in hpcc\", \"username\": \"wgsh\", \"post_text\": \"1. In our hadoop exp environment, every node has 8 or more hard disks for high IO bandwidth and no raid needed . How to config these nodes in hpcc to reach more io performance and less nodes to meet storage capacity?\\n2. Does hpcc compress target data on nodes ?\\nThx.\\nJason\", \"post_time\": \"2012-07-06 05:50:24\" },\n\t{ \"post_id\": 2088, \"topic_id\": 443, \"forum_id\": 14, \"post_subject\": \"Re: HPCC on a single node with multiple CPUs\", \"username\": \"jsmith\", \"post_text\": \"Hi,\\n\\nthe spray is to the cluster group, which is implicitly defined by the Thor configuration. When a Thor has slavesPerNode set, everything else will see the Thor group as a group of : slavesPerNode * (slaves in Thor topology).\\n\\nSo yes, it will partition the file between the processes rather than just the nodes and therefore each slave process will be reading a separate partition.\\n\\nHTH.\", \"post_time\": \"2012-07-26 14:25:10\" },\n\t{ \"post_id\": 2081, \"topic_id\": 443, \"forum_id\": 14, \"post_subject\": \"Re: HPCC on a single node with multiple CPUs\", \"username\": \"yil43\", \"post_text\": \"Hi jsmith,\\n\\nThanks a lot for the answer; this is really helpful. Just a follow-up question: given the setup you mentioned, say I would like to spray a huge file on this node and sort it, will this file be split automatically among the slave processes? Or file splitting only occurs among nodes rather than processes?\\n\\nThank you for your time.\\nYing\", \"post_time\": \"2012-07-25 17:00:11\" },\n\t{ \"post_id\": 2060, \"topic_id\": 443, \"forum_id\": 14, \"post_subject\": \"Re: HPCC on a single node with multiple CPUs\", \"username\": \"jsmith\", \"post_text\": \"Hi,\\n\\nThor and Roxie are highly multithreaded and will in many scenarios be using all the cores available to it. It depends on the nature of the job to some extent, there are some activities within each job that will utilize the cores better than others. Very simple linear tasks that for example only filter or project, tend to be single threaded, however normally they will be within a subgraph composed of many other activities that will overlap.\\n\\nOne approach on a node with many cores to spare, is to configure your Thor with >1 slave per node, so e.g. on your 1-way, depending on # cores and memory, you might want to set 'slavesPerNode' to some reasonable number. With that set, your job(s) will execute on all N slaves in parallel, each working on a proportion of the subgraph running.\\n\\nHope that helps.\", \"post_time\": \"2012-07-24 15:58:15\" },\n\t{ \"post_id\": 2030, \"topic_id\": 443, \"forum_id\": 14, \"post_subject\": \"HPCC on a single node with multiple CPUs\", \"username\": \"yil43\", \"post_text\": \"Hi, I am wondering if HPCC (installed, not VM) could take advantage of the parallelism of a single node with multiple CPUs or a multicore CPU? If so, is there any particular configuration needed to this end? I checked the docs but only found configurations for multiple nodes.\\n\\nThanks\\nYing\", \"post_time\": \"2012-07-19 17:49:21\" },\n\t{ \"post_id\": 3464, \"topic_id\": 444, \"forum_id\": 14, \"post_subject\": \"Re: Concurrent Thor?\", \"username\": \"DSC\", \"post_text\": \"Thor slaves are the worker processes of the thor cluster. Increasing the number of slaves per (hardware) node therefore increases the size of the existing cluster; it doesn't create a new thor cluster.\\n\\nYou may be interested in an older forum post on this topic: http://hpccsystems.com/bb/viewtopic.php?f=14&t=567.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-02-15 12:32:34\" },\n\t{ \"post_id\": 3463, \"topic_id\": 444, \"forum_id\": 14, \"post_subject\": \"Re: Concurrent Thor?\", \"username\": \"Neha Singh\", \"post_text\": \"I want to configure multiple thor target processes on the same cluster.I wanted to know as per the instructions mentioned in Installing and Running Hpcc Platform PDF(page 28),if we change the Number of thor slaves per node to 2,then can we have 2 thor target processess.Is this the way to configure multiple thor target processes or something else needs to be done.\", \"post_time\": \"2013-02-15 06:54:32\" },\n\t{ \"post_id\": 2074, \"topic_id\": 444, \"forum_id\": 14, \"post_subject\": \"Re: Concurrent Thor?\", \"username\": \"jsmith\", \"post_text\": \"That's correct, as long as they are within the same environment, sharing the same Dali, they will have access to all files, whether the Thor clusters share the same nodes or not.\\n\\nIf the Thor's each have their own dedicated nodes, it will mean of course, that when reading a logical file on one of the other Thor's, it will be read across the network. Whereas if the Thor instances shared the same nodes (and therefore disk), it wouldn't make any difference which read/wrote the files.\", \"post_time\": \"2012-07-25 11:47:36\" },\n\t{ \"post_id\": 2063, \"topic_id\": 444, \"forum_id\": 14, \"post_subject\": \"Re: Concurrent Thor?\", \"username\": \"DSC\", \"post_text\": \"That information helps a great deal. Thanks!\\n\\nSince the multiple Thor processes are running on the same cluster, they all have access to that cluster's files, correct? In my original description each process would need access to a fairly large superfile for reading and I would rather avoid duplicating that superfile if possible. Just making sure I understand the solution....\\n\\nThanks again!\\n\\nDan\", \"post_time\": \"2012-07-24 17:01:05\" },\n\t{ \"post_id\": 2058, \"topic_id\": 444, \"forum_id\": 14, \"post_subject\": \"Re: Concurrent Thor?\", \"username\": \"jsmith\", \"post_text\": \"Hi,\\n\\nA single Thor process can only run a single job, however it's possible to configure HPCC to have multiple Thor target processes on the same cluster, listening to the same queue (i.e. load balanced). In that scenario you'd have multiple jobs running concurrently. Typically each Thor target process would be configured to use a share of the total physical memory available, via configmgr and the globalMemorySize option.\\n\\nHowever, depending on the h/w setup (cores/memory/network), running concurrent jobs like this won't necessarily produce a higher throughput. Assuming the Thor targets are sharing the same h/w as I describe, then there will be contention for disk/network/cpu and memory. Overall this could lead to a slower overall throughput.\\n\\nOf course you could also have independent Thor procesess each using their own set of nodes, as you suggested, all listening to the same queue (i.e. still load balanced).\\n\\nHope that helps, or let me know if not.\", \"post_time\": \"2012-07-24 15:45:43\" },\n\t{ \"post_id\": 2039, \"topic_id\": 444, \"forum_id\": 14, \"post_subject\": \"Concurrent Thor?\", \"username\": \"DSC\", \"post_text\": \"I'm looking for any recommendations or best practices for setting up a solution to the following scenario. Both configuration and coding recommendations are welcome.\\n\\nSuppose we start with a large data store in Thor (multiple logical files, interrelated nicely, updated hourly, etc. etc.).\\n\\nNow suppose we need to ingest smaller incoming files. These files need to be ETL'd, have data points appended to them from the large data store (with a JOIN, obviously), analytics run on the result, saved, and indexes built. These incoming files will not be aggregated. In fact, except for reading the large data store, working on these files is a nicely isolated task (there will be no write conflicts). This is obviously a job for Thor, since new datasets and indexes have to be created. Accessing the resulting indexes, OTOH, will probably be a Roxie task due to performance and concurrency requirements.\\n\\nThe kicker, to me at least, is the timing and requirements of the incoming files. They can arrive at any time, and they can be of virtually any size (from insignificant up to a tenth of the size (record count) of the large data store). The expected volume is probably less than a thousand per day. The outside expectation is not that these files be turned around immediately, but that processing of them would not be unduly delayed, either.\\n\\nGiven all of that, I can see a bottleneck with Thor's single-process mode. If there was a concept of "maximum number of concurrent Thor work units" setting for the scheduler, I wouldn't be writing this post. Or is there such a setting and I've missed it?\\n\\nAre there any recommendations for handling this kind of scenario? I can envision a complicated set of separate Thor clusters, operating independently, with a master cluster pulling result files into it periodically, but that seems like over-engineering. I'm hoping that there is a simpler solution that I'm overlooking.\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-07-20 11:55:46\" },\n\t{ \"post_id\": 2046, \"topic_id\": 448, \"forum_id\": 14, \"post_subject\": \"HPCC Cluster - recommendations required\", \"username\": \"kaliyugantagonist\", \"post_text\": \"Posting the link to my question http://hpccsystems.com/bb/viewtopic.php?f=15&t=446&sid=6095f88d19f444ae56a1815a43ed8d48\", \"post_time\": \"2012-07-23 12:45:13\" },\n\t{ \"post_id\": 2087, \"topic_id\": 454, \"forum_id\": 14, \"post_subject\": \"Re: Multi-node installation issue\", \"username\": \"pschwartz\", \"post_text\": \"[quote="kaliyugantagonist":1meugcmo]Hi clo,\\n\\nI'm sorry that I didn't mention that I had checked for SSH access for root before I posted my query :\\n\\nroot@cloudx-767-700:~# ssh root@localhost\\nThe authenticity of host 'localhost (127.0.0.1)' can't be established.\\nECDSA key fingerprint is 26:1c:e8:0c:9b:37:ea:13:2d:40:7d:f8:cd:1e:79:24.\\nAre you sure you want to continue connecting (yes/no)? yes\\nWarning: Permanently added 'localhost' (ECDSA) to the list of known hosts.\\nroot@localhost's password:\\nWelcome to Ubuntu 11.04 (GNU/Linux 2.6.38-8-server x86_64)\\n\\n * Documentation: http://www.ubuntu.com/server/doc\\n\\n System information as of Thu Jul 26 16:10:37 IST 2012\\n\\n System load: 0.0 Processes: 92\\n Usage of /: 9.7% of 46.09GB Users logged in: 1\\n Memory usage: 53% IP address for eth0: 172.25.37.10\\n Swap usage: 1%\\n\\n Graph this data and manage this system at https://landscape.canonical.com/\\nLast login: Thu Jul 26 16:09:33 2012 from 172.17.88.148\\n\\n\\n\\nroot@cloudx-767-700:~# ssh root@172.25.37.10\\nroot@172.25.37.10's password:\\nWelcome to Ubuntu 11.04 (GNU/Linux 2.6.38-8-server x86_64)\\n\\n * Documentation: http://www.ubuntu.com/server/doc\\n\\n System information as of Thu Jul 26 16:12:01 IST 2012\\n\\n System load: 0.0 Processes: 92\\n Usage of /: 9.7% of 46.09GB Users logged in: 1\\n Memory usage: 53% IP address for eth0: 172.25.37.10\\n Swap usage: 1%\\n\\n Graph this data and manage this system at https://landscape.canonical.com/\\nLast login: Thu Jul 26 16:11:49 2012 from 172.17.88.148\\n\\n\\n\\nroot@cloudx-767-700:~# ssh root@172.25.38.217\\nroot@172.25.38.217's password:\\nWelcome to Ubuntu 11.04 (GNU/Linux 2.6.38-8-server x86_64)\\n\\n * Documentation: http://www.ubuntu.com/server/doc\\n\\n System information as of Thu Jul 26 16:16:06 IST 2012\\n\\n System load: 0.14 Processes: 74\\n Usage of /: 5.2% of 46.09GB Users logged in: 0\\n Memory usage: 2% IP address for eth0: 172.25.38.217\\n Swap usage: 0%\\n\\n Graph this data and manage this system at https://landscape.canonical.com/\\nLast login: Thu Jul 26 16:15:51 2012 from 172.25.37.10
\\n\\nAs for the second test, please find below the result(the single-cluster service is shut-down while running this; also the remote machine viz. 172.25.38.217 is accessible, as shown in the output) :\\n\\nroot@cloudx-799-731:~# sudo -u hpcc /opt/HPCCSystems/sbin/hpcc-run.sh -a hpcc-init status\\n172.25.37.10: Host is alive.\\n172.25.37.10: Running sudo /etc/init.d/hpcc-init status\\nmydafilesrv is stopped\\nmydali is stopped\\nmydfuserver is stopped\\nmyeclagent is stopped\\nmyeclccserver is stopped\\nmyeclscheduler is stopped\\nmyesp is stopped\\nmysasha is stopped\\nmythor is stopped\\n172.25.38.217: Host is alive.\\n172.25.38.217: Running sudo /etc/init.d/hpcc-init status\\nmydafilesrv is stopped
\\n\\nTHOUGHT :\\n\\nWhen the config. wizard creates the newenv.xml file, it also creates a user viz. hpcc on all the machines(in my case,on both cloudx-767-700 and cloudx-799-731). I think while running the install-cluster.sh, [color=#FF0000:1meugcmo]one has to log-in as hpcc AND NOT AS root. But I don't have the hpcc credentials(password), hence, could not try it out. The documentation, too, doesn't mention something like this but I have seen such steps in other,similar software.\\n\\nPlease guide about the same !\\n\\nThanks and regards !\\n\\nThe install of the platform creates the hpcc user as it is a secure password-less account that uses ssh keys to allow components of the HPCC to communicate between each other. \\n\\nThe install-cluster.sh script requires a user that is root or has sudo rights in order to work. Here is the flow the script uses:\\n\\n1. Get credentials from user.\\n2. Get list of IP's to install or upgrade on using /etc/HPCCSystems/environment.xml\\n3. Loop through each system and attempt to connect to them with hpcc user and local installed key to determine if platform/shared keys are already installed.\\n4. Deliver a payload with SCP including config files, install package, and ssh keys (if needed) to each node.\\n5. SSH into the each system using the credentials provided in step 1 and run a script to install the payload delivered in step 4.\\n\\nFrom the output you have displayed, it appears that these steps completed correctly. \\n\\nAlso, the failure to ssh as hpcc output that was in your log is correct as it is showing that it attempted to verify that user and keys were setup correctly and they were not on that attempted connection. (Prior to payload delivery and installation.)\\n\\nThis is expected output if the ssh keys or user do not exist.\\n\\n-Philip\", \"post_time\": \"2012-07-26 12:25:53\" },\n\t{ \"post_id\": 2084, \"topic_id\": 454, \"forum_id\": 14, \"post_subject\": \"Re: Multi-node installation issue\", \"username\": \"kaliyugantagonist\", \"post_text\": \"Hi clo,\\n\\nI'm sorry that I didn't mention that I had checked for SSH access for root before I posted my query :\\n\\nroot@cloudx-767-700:~# ssh root@localhost\\nThe authenticity of host 'localhost (127.0.0.1)' can't be established.\\nECDSA key fingerprint is 26:1c:e8:0c:9b:37:ea:13:2d:40:7d:f8:cd:1e:79:24.\\nAre you sure you want to continue connecting (yes/no)? yes\\nWarning: Permanently added 'localhost' (ECDSA) to the list of known hosts.\\nroot@localhost's password:\\nWelcome to Ubuntu 11.04 (GNU/Linux 2.6.38-8-server x86_64)\\n\\n * Documentation: http://www.ubuntu.com/server/doc\\n\\n System information as of Thu Jul 26 16:10:37 IST 2012\\n\\n System load: 0.0 Processes: 92\\n Usage of /: 9.7% of 46.09GB Users logged in: 1\\n Memory usage: 53% IP address for eth0: 172.25.37.10\\n Swap usage: 1%\\n\\n Graph this data and manage this system at https://landscape.canonical.com/\\nLast login: Thu Jul 26 16:09:33 2012 from 172.17.88.148\\n\\n\\n\\nroot@cloudx-767-700:~# ssh root@172.25.37.10\\nroot@172.25.37.10's password:\\nWelcome to Ubuntu 11.04 (GNU/Linux 2.6.38-8-server x86_64)\\n\\n * Documentation: http://www.ubuntu.com/server/doc\\n\\n System information as of Thu Jul 26 16:12:01 IST 2012\\n\\n System load: 0.0 Processes: 92\\n Usage of /: 9.7% of 46.09GB Users logged in: 1\\n Memory usage: 53% IP address for eth0: 172.25.37.10\\n Swap usage: 1%\\n\\n Graph this data and manage this system at https://landscape.canonical.com/\\nLast login: Thu Jul 26 16:11:49 2012 from 172.17.88.148\\n\\n\\n\\nroot@cloudx-767-700:~# ssh root@172.25.38.217\\nroot@172.25.38.217's password:\\nWelcome to Ubuntu 11.04 (GNU/Linux 2.6.38-8-server x86_64)\\n\\n * Documentation: http://www.ubuntu.com/server/doc\\n\\n System information as of Thu Jul 26 16:16:06 IST 2012\\n\\n System load: 0.14 Processes: 74\\n Usage of /: 5.2% of 46.09GB Users logged in: 0\\n Memory usage: 2% IP address for eth0: 172.25.38.217\\n Swap usage: 0%\\n\\n Graph this data and manage this system at https://landscape.canonical.com/\\nLast login: Thu Jul 26 16:15:51 2012 from 172.25.37.10
\\n\\nAs for the second test, please find below the result(the single-cluster service is shut-down while running this; also the remote machine viz. 172.25.38.217 is accessible, as shown in the output) :\\n\\nroot@cloudx-799-731:~# sudo -u hpcc /opt/HPCCSystems/sbin/hpcc-run.sh -a hpcc-init status\\n172.25.37.10: Host is alive.\\n172.25.37.10: Running sudo /etc/init.d/hpcc-init status\\nmydafilesrv is stopped\\nmydali is stopped\\nmydfuserver is stopped\\nmyeclagent is stopped\\nmyeclccserver is stopped\\nmyeclscheduler is stopped\\nmyesp is stopped\\nmysasha is stopped\\nmythor is stopped\\n172.25.38.217: Host is alive.\\n172.25.38.217: Running sudo /etc/init.d/hpcc-init status\\nmydafilesrv is stopped
\\n\\nTHOUGHT :\\n\\nWhen the config. wizard creates the newenv.xml file, it also creates a user viz. hpcc on all the machines(in my case,on both cloudx-767-700 and cloudx-799-731). I think while running the install-cluster.sh, [color=#FF0000:396a25z7]one has to log-in as hpcc AND NOT AS root. But I don't have the hpcc credentials(password), hence, could not try it out. The documentation, too, doesn't mention something like this but I have seen such steps in other,similar software.\\n\\nPlease guide about the same !\\n\\nThanks and regards !\", \"post_time\": \"2012-07-26 05:32:19\" },\n\t{ \"post_id\": 2078, \"topic_id\": 454, \"forum_id\": 14, \"post_subject\": \"Re: Multi-node installation issue\", \"username\": \"clo\", \"post_text\": \"Hi,\\n\\nI guess there are couple of questions and tests I have for you.\\n\\n1. Does root have the permission to ssh to the other nodes?\\n - Just try a simple ssh root@YourIP and see if it works.\\n\\n2. Run the following command to see if you see all the components on all nodes.\\nsudo -u hpcc /opt/HPCCSystems/sbin/hpcc-run.sh -a hpcc-init status
\", \"post_time\": \"2012-07-25 14:44:37\" },\n\t{ \"post_id\": 2071, \"topic_id\": 454, \"forum_id\": 14, \"post_subject\": \"Multi-node installation issue\", \"username\": \"kaliyugantagonist\", \"post_text\": \"I have to set-up a two node HPCC cluster and the VM details are :\\n\\ncloudx-767-700 - HPCC already running in single-node mode\\ncloudx-799-731 - Fresh VM\\n\\nI have generated a new environment.xml file on cloudx-767-700 using the configuration manager.\\n\\nFor the HPCC installation on the fresh VM,as per the instructions in 'Installing_and_RunningTheHPCCPlatform.pdf', I'm running the following command on cloudx-767-700:\\n\\nroot@cloudx-767-700:/usr/share/dumphere/installers# /opt/HPCCSystems/sbin/install-cluster.sh -k hpccsystems-platform_community-3.8.0-1natty_amd64.deb > log.txt\\n[color=#FF0000]Input admin username:root\\nInput admin password[/color]:tar: Removing leading `/' from member names
\\n\\nThe log.txt is as follows :\\n\\n\\n\\nGenerating public/private rsa key pair.\\nYour identification has been saved in /usr/share/dumphere/installers/new_ssh/id_rsa.\\nYour public key has been saved in /usr/share/dumphere/installers/new_ssh/id_rsa.pub.\\nThe key fingerprint is:\\n49:8d:36:42:76:08:17:2d:24:8c:db:c4:da:c9:c1:5f root@cloudx-767-700\\nThe key's randomart image is:\\n+--[ RSA 2048]----+\\n| =oo*+. |\\n| . *=o.Eo |\\n| B +.o= . |\\n| o = .+ o |\\n| S |\\n| |\\n| |\\n| |\\n| |\\n+-----------------+\\n/tmp/remote_install/environment.conf\\n/tmp/remote_install/environment.xml\\n/tmp/remote_install/hpccsystems-platform_community-3.8.0-1natty_amd64.deb\\n/tmp/remote_install/new_keys/\\n/tmp/remote_install/new_keys/id_rsa\\n/tmp/remote_install/new_keys/id_rsa.pub\\n/tmp/remote_install/remote-install-engine.sh\\n[color=#FF0000]172.25.37.10: Host is alive.\\n172.25.37.10: Cannot SSH to host with key..\\n172.25.37.10: Connecting with password.\\nspawn scp /tmp/remote_install.tgz root@172.25.37.10:~\\nroot@172.25.37.10's password: [/color]\\n\\nremote_install.tgz 0% 0 0.0KB/s --:-- ETA\\nremote_install.tgz 100% 28MB 27.8MB/s 00:01 \\nspawn ssh root@172.25.37.10 cd /; tar -zxf ~/remote_install.tgz\\nroot@172.25.37.10's password: \\nspawn ssh root@172.25.37.10\\nroot@172.25.37.10's password: \\nWelcome to Ubuntu 11.04 (GNU/Linux 2.6.38-8-server x86_64)\\n\\n * Documentation: http://www.ubuntu.com/server/doc\\n\\n System information as of Wed Jul 25 20:45:18 IST 2012\\n\\n System load: 0.08 Processes: 94\\n Usage of /: 10.6% of 46.09GB Users logged in: 1\\n Memory usage: 55% IP address for eth0: 172.25.37.10\\n Swap usage: 1%\\n\\n Graph this data and manage this system at https://landscape.canonical.com/\\nLast login: Wed Jul 25 20:43:53 2012 from cloudx-767-700\\n\\n\u001b]0;root@cloudx-767-700: ~\u0007root@cloudx-767-700:~# /tmp/remote_install/remote-install-engine.sh /tmp/remote_install/hpccsystems-platform_community-3.8.0 \\n-1natty_amd64.deb\\nexit\\n\u001b]0;root@cloudx-767-700: ~\u0007root@cloudx-767-700:~# exit\\nlogout\\nConnection to 172.25.37.10 closed.\\n\\n172.25.37.10: Done.\\n172.25.38.217: Host is alive.\\n[color=#FF0000]172.25.38.217: Cannot SSH to host with key..\\n172.25.38.217: Connecting with password.\\nspawn scp /tmp/remote_install.tgz root@172.25.38.217:~\\nroot@172.25.38.217's password:[/color] \\n\\nremote_install.tgz 0% 0 0.0KB/s --:-- ETA\\nremote_install.tgz 43% 12MB 12.0MB/s 00:01 ETA\\nremote_install.tgz 83% 23MB 11.9MB/s 00:00 ETA\\nremote_install.tgz 100% 28MB 13.9MB/s 00:02 \\nspawn ssh root@172.25.38.217 cd /; tar -zxf ~/remote_install.tgz\\nroot@172.25.38.217's password: \\nspawn ssh root@172.25.38.217\\nroot@172.25.38.217's password: \\nWelcome to Ubuntu 11.04 (GNU/Linux 2.6.38-8-server x86_64)\\n\\n * Documentation: http://www.ubuntu.com/server/doc\\n\\n System information as of Wed Jul 25 20:45:29 IST 2012\\n\\n System load: 0.0 Processes: 74\\n Usage of /: 5.3% of 46.09GB Users logged in: 0\\n Memory usage: 3% IP address for eth0: 172.25.38.217\\n Swap usage: 0%\\n\\n Graph this data and manage this system at https://landscape.canonical.com/\\nLast login: Wed Jul 25 20:44:05 2012 from 172.25.37.10\\n\\n\u001b]0;root@cloudx-799-731: ~\u0007root@cloudx-799-731:~# /tmp/remote_install/remote-install-engine.sh /tmp/remote_install/hpccsystems-platform_community-3.8.0 \\n-1natty_amd64.deb\\nexit\\n\u001b]0;root@cloudx-799-731: ~\u0007root@cloudx-799-731:~# exit\\nlogout\\nConnection to 172.25.38.217 closed.\\n\\n172.25.38.217: Done.
\\n\\nThe output like 172.25.38.217: Cannot SSH to host with key..\\n172.25.38.217: Connecting with password.
hints that there is some problem with the user 'hpcc' which the installation probably uses, the keys generated for SSH etc.\\n\\nI noticed that a user hpcc is involved - is it so that I have to log-in as hpcc and then run the sh file for installation on other nodes?What is the password of the user hpcc(the root password doesn't work!)\\n\\nPlease guide as to how I must proceed.\\n\\nThanks and regards !\", \"post_time\": \"2012-07-25 10:03:45\" },\n\t{ \"post_id\": 7147, \"topic_id\": 471, \"forum_id\": 14, \"post_subject\": \"Re: HPCC - some basic queries\", \"username\": \"rtaylor\", \"post_text\": \"Tim,1. Do I need a .SO file and if so, how do I get the local compile to create it?
The Local compile is the one exception -- it doesn't create a .SO file, and no, you don't need it.2. How do I get these files to the ECL Agent and tell it to execute it on THOR?
You don't. A Local compile is designed to give you an executable program to run locally. If you want the job to run on Thor then you need to target Thor (not Local).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-03-17 19:59:19\" },\n\t{ \"post_id\": 7137, \"topic_id\": 471, \"forum_id\": 14, \"post_subject\": \"Re: HPCC - some basic queries\", \"username\": \"tlhumphrey2\", \"post_text\": \"Richard, I noticed from your response to kaliyugantagonist that you say that the compile creates and .SO file.\\n\\nTarget of my ECL IDE is set to 'local'. So, when I hit the submit button the output of the C++ compile is saved to my local machine. But, I noticed that it doesn't create a .SO file. The following are the files it creates:\\nL20150313-092046-result.xml\\nL20150313-092046.xml\\nL20150313-092046.exe\\nL20150313-092046.exe.manifest\\n
\\n\\nSo, I have 2 questions:\\n\\n1. Do I need a .SO file and if so, how do I get the local compile to create it?\\n2. How do I get these files to the ECL Agent and tell it to execute it on THOR?\", \"post_time\": \"2015-03-13 16:26:52\" },\n\t{ \"post_id\": 2131, \"topic_id\": 471, \"forum_id\": 14, \"post_subject\": \"Re: HPCC - some basic queries\", \"username\": \"rtaylor\", \"post_text\": \"*****Data upload
To get data files into the HPCC environment you first place the file on a "Landing Zone" (AKA "DropZone") -- this is what the Upload/Download File page does in ECL Watch (for files up to 2 Gb), but you may use many other methods of placing files on a dropzone (like FTP, or copy/paste, or ...).\\n\\nOnce a file is on a Landing Zone (you may configure your environment to have several), it is available to be sprayed. You can spray files three ways:
\\nThe spray operation requires a utility program to be running on the Landing Zone (DAfileserv) which is part of DFU. This program accomplishes distributing the file across all the nodes of the target cluster so that each single record is always whole and complete on a single node and the records are "evenly" distributed across the nodes. IOW, spraying a 3Mb file to a 3-node cluster results in a 1Mb physical file on each node that comprises a single logical entity (whose parts are kept track of by the DFU from then on).\\n\\nOnce sprayed, the file is available for use in your ECL code once you have defined its RECORD structure and DATASET declaration.\\n\\n*****Query execution
\\nThor and Roxie are separate cluster types, designed to handle different tasks. Therefore, the query execution process is different for each.\\n\\nThor is a "back-office" ETL-processing cluster that handles massive tasks one at a time. This is primarily a "data prep" tool. Its query execution flow is this:\\n
\\nRoxie is an end-user tool, designed to handle thousands of concurrent queries. Roxie queries are:
\\nThe Publish to Roxie step:
\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-08-01 13:39:34\" },\n\t{ \"post_id\": 2130, \"topic_id\": 471, \"forum_id\": 14, \"post_subject\": \"HPCC - some basic queries\", \"username\": \"kaliyugantagonist\", \"post_text\": \"Hello,\\n\\nI have a 3-node HPCC set-up. The different nodes and the processes running on them are shown below :\\n\\nroot@cloudx-767-700:~# service hpcc-init status\\nmydafilesrv ( pid 21286 ) is running...\\nmydfuserver ( pid 6558 ) is running...\\nmyeclagent ( pid 6639 ) is running...\\nmyeclccserver ( pid 6720 ) is running...\\nmyesp ( pid 6800 ) is running...\\nmysasha ( pid 6883 ) is running...
\\n\\n\\nroot@cloudx-798-730:~# service hpcc-init status\\nmydafilesrv ( pid 30555 ) is running...\\nmyroxie ( pid 31107 ) is running...\\n
\\n\\n\\nroot@cloudx-799-731:~# sudo service hpcc-init status\\nmydafilesrv ( pid 10293 ) is running...\\nmydali ( pid 10856 ) is running...\\nmyeclscheduler ( pid 10963 ) is running...\\nmythor ( pid 16028 ) is running...
\\n\\nUsing ECL Watch, I have sprayed a few files and also executed a few ECL queries. However, after closely observing the HPCC architecture diagram,I have started getting doubtful about whether I have understood the exact flow of ETL and query processing - I wish to ask/confirm the same !\\n\\n*****Basics\\n\\nAs seen, the different HPCC services run as Linux processes on different machines. How do the different components communicate with each other e.g 1) How does the DFU Server communicate to the Dali Server about the split info. etc. of a file uploaded, say via ECL Watch 2)How do Dali and Sasha servers communicate. I'm assuming that this is an Inter-Process Communication(IPC), hence, handled by the OS kernel. But is that actually, SSH is used for communication?\\n\\n*****Data upload\\n\\n1. A file is uploaded via ECL Watch. Now since, it is hosted by ESP server, it must be receiving the file(in a form of stream) - how is this file sent to the DFU for spraying?\\nI need a complete flow from file upload to the actual entry of the file into Thor cluster, hence, I assumed the following steps :\\n\\ni. The file uploaded through the ECL Watch reaches the ESP server when the user clicks 'upload'\\nii. This file is sent to the DFU server for spraying(HOW? And has the Landing Zone come into picture?)\\niii. The DFU 'sprays' the file onto the Thor cluster(HOW?)\\niv. The DFU also updates the Dali server(HOW?)\\n\\n\\n*****Query execution\\n\\n1. Suppose the user wants to query for a certain requirement and submits it via ECL IDE/ECL Watch. Now :\\ni. The ECL server compiles the ECL file to C++\\nLooking at the architecture diagram, I couldn't firmly establish the further steps - how does Thor/Roxie get the executable?Does DFU come into picture?How is Dali updated and by whom?When and how do ECL Agents come into picture, in case the query is sent to Roxie?\\n\\n2. I have learned that both Thor and Roxie can execute the user queries. In both the cases,who,respectively, is responsible for assigning the query to different nodes and aggregating the results? And how does Dali come into picture here?\\n\\nI need some document/threads etc. that explains in detail such flows and the roles played by Dali and DFU
.\\n\\nThanks and regards !\", \"post_time\": \"2012-08-01 10:27:11\" },\n\t{ \"post_id\": 4122, \"topic_id\": 486, \"forum_id\": 14, \"post_subject\": \"Re: Adding Roxie servers\", \"username\": \"sbagaria\", \"post_text\": \"Sorry to revive an old thread but just wanted to cross-reference a related post for the archives - viewtopic.php?f=15&t=916\\n\\nThis post mentions using ESP as the implicit load balancer for Roxie queries. However I have found that doing this leads to the dali server becoming a bottleneck (even with a separate ESP farm). This may or may not be expected behaviour, so deserves a mention here.\\n\\nSid\", \"post_time\": \"2013-05-20 13:33:48\" },\n\t{ \"post_id\": 2221, \"topic_id\": 486, \"forum_id\": 14, \"post_subject\": \"Re: Adding Roxie servers\", \"username\": \"DSC\", \"post_text\": \"Most of these problems occurred six days ago, and I went through a number of configuration changes that day while trying to resolve them. I see various issues in the logs, but I honestly cannot tell you which configuration caused which errors.\\n\\nI'll keep an eye on this and post to this thread again if/when the problems reappear.\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-08-15 12:57:25\" },\n\t{ \"post_id\": 2218, \"topic_id\": 486, \"forum_id\": 14, \"post_subject\": \"Re: Adding Roxie servers\", \"username\": \"sort\", \"post_text\": \"1. we will update the documentation.\\n2. As far as making the same request to different roxie nodes and getting errors on some of the nodes... Can you send us the roxie logs for 1 node that succeeded and 1 node that failed (do you have the error message handy?)\\n3. esp2 reporting it was already stopped. It sounds to me like it did not properly start. Had you actually run anything through esp2? It is possible that esp had an issue and did not auto start afterwards. Can you get us the esp.log as well as the myesp_init.log file after getting the error. Also can you check the time stamp of the log file (want to verify that the node actually got the request successfully)\", \"post_time\": \"2012-08-14 16:12:59\" },\n\t{ \"post_id\": 2217, \"topic_id\": 486, \"forum_id\": 14, \"post_subject\": \"Re: Adding Roxie servers\", \"username\": \"anthony.fishbeck@lexisnexis.com\", \"post_text\": \"You would only use the VIPS tab if you have a load balancer between the ESP and the roxie. In that case you would add an entry specifying the roxie and the load balancer IP you want ws_ecl to use to connect to that roxie.\\n\\nIf not using a load balancer you shouldn't have to configure ws_ecl at all. It should find the addresses of the roxie nodes and automatically round robin between them.\\n\\nLet me see if I can get someone else to comment about your shutdown issues.\", \"post_time\": \"2012-08-14 14:50:27\" },\n\t{ \"post_id\": 2215, \"topic_id\": 486, \"forum_id\": 14, \"post_subject\": \"Re: Adding Roxie servers\", \"username\": \"DSC\", \"post_text\": \"The VIPS tab is enclosed in the "ESP Service" section of the configuration manager. Unfortunately, the documentation skips that section entirely. In my setup, the VIPS tab is empty. I see that adding an entry requires a selection of a Roxie cluster ("myroxie" in my case) and that the associated VIP is a text entry field. Should that be an IPv4 address? An address of what? I confess to not being able to deduce this.\\n\\nIn my current (new) setup, I left the existing ws_ecl service alone (which cited one Roxie server instance) and added a new ESP with a ws_ecl citing my other three Roxie servers. That seems to work. In an attempt to clean things up, at one point I had deleted the original ws_ecl and, in my second ws_ecl, I cited all four Roxie servers. That seemed to break things -- queries intermittently failed -- and I don't understand why. Thoughts?\\n\\nAlso, when shutting down HPCC, I've noticed that *intermittently* the script reports that my second ESP (named esp2) has already been shut down. That's fairly confusing to me, since the two ESP (ws_ecl) services seem to be configured for entirely different nodes and shouldn't be sharing PIDs, locks, etc.. What part of the configuration can I look toward for solving this?\\n\\nThanks a million for your help and information!\\n\\nDan\", \"post_time\": \"2012-08-14 11:44:16\" },\n\t{ \"post_id\": 2212, \"topic_id\": 486, \"forum_id\": 14, \"post_subject\": \"Re: Adding Roxie servers\", \"username\": \"anthony.fishbeck@lexisnexis.com\", \"post_text\": \"You did the right thing setting up a stand alone ESP for ws_ecl... adding server "instances" for each machine you want the process to run on.\\n\\nRunning ws_ecl on the same machines you have roxie running on is ok, but it all depends on how much throughput you want from your cluster. ws_ecl and roxie nodes will have very different utilization characteristics, so for production environments, or testing throughput, I would probably keep them separate.\\n\\nDoesn't look like there is much about configuring ws_ecl in the configmgr guide, but I think roxie VIPs are currently the only configurable option. The ws_ecl service configuration page should have a tab for VIPS. You can add entries to the list and associate the virtual IP of the load balancer with the roxie cluster. By default the port used will be 9876.\", \"post_time\": \"2012-08-13 19:39:38\" },\n\t{ \"post_id\": 2208, \"topic_id\": 486, \"forum_id\": 14, \"post_subject\": \"Re: Adding Roxie servers\", \"username\": \"DSC\", \"post_text\": \"OK, that's doable. Is there any documentation regarding how this can be accomplished? Also, is there a downside to running the ws_ecl services on the same nodes as the Roxie servers?\\n\\nThanks for the information!\\n\\nDan\", \"post_time\": \"2012-08-10 17:01:01\" },\n\t{ \"post_id\": 2207, \"topic_id\": 486, \"forum_id\": 14, \"post_subject\": \"Re: Adding Roxie servers\", \"username\": \"anthony.fishbeck@lexisnexis.com\", \"post_text\": \"Ah, sorry, roxie doesn't currently support JSON directly. \\n\\nYou will need some ws_ecl ESPs. You can configure as many instances of your ws_ecl configuration as you need and have it hit the load balancer you have in front of roxie.\", \"post_time\": \"2012-08-10 15:24:15\" },\n\t{ \"post_id\": 2201, \"topic_id\": 486, \"forum_id\": 14, \"post_subject\": \"Re: Adding Roxie servers\", \"username\": \"DSC\", \"post_text\": \"Great information. Thanks!\\n\\nSo you're saying that in production I should be using port 9876 to talk directly to the Roxie server on each node, right? I will have a load balancer in front of the request, so if the query goes that route then I can be assured of no single-point-of-failure?\\n\\nI tried calling a specific Roxie server on port 9876 but I'm getting an error. I'm using JSON, by the way, not SOAP. Here is a working version of the query via curl:\\n\\n
curl -s -H "Content-Type: application/json; charset=UTF-8" -d '{"lookup_cities_within_state": {"country_id":"76","state":"tx"}}' http://onenode.mycompany.com:8002/WsEcl/json/query/myroxie/lookup_cities_within_state
\\n\\nSimply changing the port to 9876 results in this:\\n\\n<UnknownResponse xmlns="urn:hpccsystems:ecl:unknown"><Results><Result><Exception><Source>Roxie</Source><Code>2</Code><Message>Error - syntax error "Expecting "<"" [file offset 1]\\n{*ERROR*"lookup_cities_within_state": {"country_</Message></Exception></Result></Results></UnknownResponse>
\\n\\nDoes the URL change when switching ports? If so, is this documented somewhere? Or is only SOAP supported over this port?\\n\\nThanks again!\\n\\nDan\", \"post_time\": \"2012-08-09 13:15:23\" },\n\t{ \"post_id\": 2197, \"topic_id\": 486, \"forum_id\": 14, \"post_subject\": \"Re: Adding Roxie servers\", \"username\": \"anthony.fishbeck@lexisnexis.com\", \"post_text\": \"You're starting to get into areas where you can flexibly define your own rules for production environments based on how you want to scale, security concerns, etc. \\n\\nBut I can comment on what LN tends to do in production environments.\\n\\nTypically in our environments we have farms of ESP servers that are separate from our cluster nodes. In our case, these are actually specialized ESPs rather than WsECL, that provide security, logging, and other critical functions for our back end systems.\\n\\nIn any case the load characteristics are different for ESP vs Roxie nodes and you can probably use much fewer ESP server nodes compared to roxie nodes.\\n\\nIf you are using a recent HPCC Systems build then ESP load balancing across the roxie cluster should happen automatically. So you could have 2 ws_ecl ESP servers, in front of 10 roxie nodes, etc. ESP will balance the requests across all 10.\\n\\nYou just have to decide how many ESPs you want in front of the cluster, and configure the ESP to run on one or more servers.\\n\\nIn large environments we tend to have a load balancer between the ESPs and the roxie nodes, there is an ws_ecl VIP setting for that scenario.\\n\\nWsECL can be used to provide security, built in load balancing, etc, but that being said, if you aren't concerned with that layer of security (or the built in load balancing) your application can call roxie directly using SOAP requests sent to each roxie node on port 9876.\\n\\nIn that case you can use ws_ecl to help develop the application taking advantage of its client development tools, like WSDLs, XSDs, test pages, sample requests and responses, etc. but at runtime, send SOAP requests directly to roxie.\", \"post_time\": \"2012-08-08 19:48:16\" },\n\t{ \"post_id\": 2195, \"topic_id\": 486, \"forum_id\": 14, \"post_subject\": \"Re: Adding Roxie servers\", \"username\": \"DSC\", \"post_text\": \"I'm the one that's probably confused.\\n\\nMy understanding is that part of the "handling thousands of queries per second" feature of Roxie was the ability to query multiple Roxie servers, all exposing the same queries and backend data. Such servers would be access through a load balancer of some sort with round-robin (or better) distribution. To me, this implied that all such servers needed to have port 8002 open. That led me down this path. My wizard-provided configuration had only one node opening port 8002, which seemed like a single point of failure that could be corrected.\\n\\nWhere did I go wrong?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-08-08 19:00:34\" },\n\t{ \"post_id\": 2193, \"topic_id\": 486, \"forum_id\": 14, \"post_subject\": \"Re: Adding Roxie servers\", \"username\": \"clo\", \"post_text\": \"Hi,\\n\\nTypically the port 8002 is used for the ws_ecl service. This service is only available on the node where the corresponding esp is located. Each roxie does not have its own ws_ecl service. When you were using 8002, you were actually only running the query that was published to the Roxie. \\n\\nHowever, I'm more confused with how you actually want to use your system. Are you trying to run three separate Roxie processes (Roxie1, Roxie2, Roxie3) with each having its own target and running independently of each other? Or are you actually just trying to run 1 Roxie Process that has 3 servers?\\n\\nEither way, when you compile queries and subsequently publish them to the ws_ecl, then you only need to access the query from one location. \\n\\nI hope that helps,\\nChris\", \"post_time\": \"2012-08-08 18:31:28\" },\n\t{ \"post_id\": 2183, \"topic_id\": 486, \"forum_id\": 14, \"post_subject\": \"Adding Roxie servers\", \"username\": \"DSC\", \"post_text\": \"I have a four-node cluster that was originally set up using the wizard, then has been tweaked slightly for various off-topic reasons. I always used the query port (8002) on the 'primary' node to access Roxie, and for whatever reason assumed that Roxie was also available on the other three nodes. I recently discovered that that was not the case. 'myroxie' was running but port 8002 was not available.\\n\\nI spent too long reading the docs and playing with the configuration manager, trying to get port 8002 open on the other three nodes. What I ended up with was a second esp configuration that defines only myws_ecl on those three nodes, as it seemed that modifying the existing esp entry would add too many services to all nodes (rather than just Roxie to the three). That seems to work, but it seems clunky and frankly just bothers me. What is the correct way to enable Roxie query handling on those nodes?\\n\\n(Edit: The cluster was originally configured under v3.4.0CE using the wizard, if that makes a difference.)\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-08-08 14:53:37\" },\n\t{ \"post_id\": 2419, \"topic_id\": 495, \"forum_id\": 14, \"post_subject\": \"Re: DFU Server's monitorqueue parameter\", \"username\": \"DSC\", \"post_text\": \"Not making the revised configuration active was indeed the problem. Changing monitorInterval to a value of 300 results in Std.File.MonitorFile() checking every five minutes, as expected.\\n\\nDespite the public airing of a dumb mistake, I truly wish all my problems were as easily solved.\\n\\nThanks for your help, Bob!\\n\\nDan\", \"post_time\": \"2012-09-21 16:59:03\" },\n\t{ \"post_id\": 2417, \"topic_id\": 495, \"forum_id\": 14, \"post_subject\": \"Re: DFU Server's monitorqueue parameter\", \"username\": \"bforeman\", \"post_text\": \"Cool, no problem Dan, please keep us posted!\\n\\nBob\", \"post_time\": \"2012-09-21 14:34:36\" },\n\t{ \"post_id\": 2416, \"topic_id\": 495, \"forum_id\": 14, \"post_subject\": \"Re: DFU Server's monitorqueue parameter\", \"username\": \"DSC\", \"post_text\": \"Gah. I hate doing stupid things, especially in public.\\n\\nBecause this particular test cluster was only one node (no distribution necessary), I almost dismissed this check. But I diff'd the running environment with the one the configmgr worked with and they were indeed different. I've promoted the modified one and will check again.\\n\\nIs it Monday? Feels like it.\\n\\nDan\", \"post_time\": \"2012-09-21 14:32:18\" },\n\t{ \"post_id\": 2415, \"topic_id\": 495, \"forum_id\": 14, \"post_subject\": \"Re: DFU Server's monitorqueue parameter\", \"username\": \"bforeman\", \"post_text\": \"Hi Dan,\\n\\nI've asked our HPCC team for clarification. What comes to mind is did you push the configuration changes to all of the appropriate places? It almost sounds like it never got updated properly.\\n\\nWill reply again when I have more info!\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-09-21 14:23:35\" },\n\t{ \"post_id\": 2399, \"topic_id\": 495, \"forum_id\": 14, \"post_subject\": \"Re: DFU Server's monitorqueue parameter\", \"username\": \"DSC\", \"post_text\": \"I reduced the DFU Server's monitorInterval from 900 to 300 seconds (5 minutes) but I do not see the right behavior in Std. File.MonitorFile(). Basically, the actual interval remains at 15 minutes. That is to say, new files are found only in 15 minute intervals and the desire is to 'see' them faster, like every five minutes.\\n\\nAm I doing something wrong? Is there perhaps a different configuration setting for this? If monitorInterval did not change this function's behavior, what did it change?\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-09-19 22:20:54\" },\n\t{ \"post_id\": 2250, \"topic_id\": 495, \"forum_id\": 14, \"post_subject\": \"Re: DFU Server's monitorqueue parameter\", \"username\": \"bforeman\", \"post_text\": \"Hi Dan,\\n\\nYes, I'm thinking that 900 was the "worse case scenario" that the developers encountered, and reducing it should not have any adverse effects. \\n\\nAt any rate, I am very interested in reducing the amount of time it takes the code to detect a new file, and it seems that monitorinterval is the way to go. I'm just hoping that there are no adverse side effects such as abdominal pain, neuropathy, nausea, vomiting, pancreatitis, rash, numbness, tingling, burning sensations, fatigue, chills, dizziness, headaches, or insomnia.\\n\\n
\\n\\nBut any interval lasting more than 4 hours should be reported immediately to your HPCC support team. \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-08-22 13:12:04\" },\n\t{ \"post_id\": 2249, \"topic_id\": 495, \"forum_id\": 14, \"post_subject\": \"Re: DFU Server's monitorqueue parameter\", \"username\": \"DSC\", \"post_text\": \"Through experimentation, it appears that File.MonitorFile() continues executing -- checking for files matching the pattern -- at monitorinterval intervals until a match is found. That's assuming this code:\\n\\n
IMPORT * FROM Std;\\n\\nfoundFileEventName := 'FoundAFile';\\n\\nFile.MonitorFile (\\n foundFileEventName,\\n '10.210.150.80',\\n '/var/lib/HPCCSystems/dropzone/foo*'\\n );\\n\\nOUTPUT(EVENTEXTRA) : WHEN(EVENT(foundFileEventName,'*'));
\\n\\nThe WHEN may have something to do with it repeating. At any rate, I am very interested in reducing the amount of time it takes the code to detect a new file, and it seems that monitorinterval is the way to go. I'm just hoping that there are no adverse side effects such as abdominal pain, neuropathy, nausea, vomiting, pancreatitis, rash, numbness, tingling, burning sensations, fatigue, chills, dizziness, headaches, or insomnia.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-08-22 13:05:02\" },\n\t{ \"post_id\": 2247, \"topic_id\": 495, \"forum_id\": 14, \"post_subject\": \"Re: DFU Server's monitorqueue parameter\", \"username\": \"bforeman\", \"post_text\": \"Hi Dan,\\n\\nThe monitorqueue parameter controls the name of the queue. The monitorinterval parameter controls the time interval to check for scheduling events. \\n\\nI am trying to confirm, but I would think that reducing the interval would reduce the window for events, and if an event were not detected perhaps shut down the process, but that's just an educated guess. So the time out is 15 minutes, it should not take more than 15 minutes between DFU events? I would try to gradually reduce the interval to see if there is an adverse affect in your process, or try something really short to see the effect.\\n\\nHope this helps,\\n\\nBob\", \"post_time\": \"2012-08-21 18:50:49\" },\n\t{ \"post_id\": 2238, \"topic_id\": 495, \"forum_id\": 14, \"post_subject\": \"DFU Server's monitorqueue parameter\", \"username\": \"DSC\", \"post_text\": \"My understanding is that DFU Server's monitorqueue parameter governs the interval between file monitoring invocations. Questions:\\n\\n1) Is this single parameter used for both File.MonitorFile() and File.MonitorLogicalFileName()?\\n\\n2) Is this parameter used for any other purposes? (Will changing the value adversely affect something else?)\\n\\n3) What is the downside to reducing the interval from the default (900 seconds) to something much smaller, like 60 seconds?\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-08-20 18:03:49\" },\n\t{ \"post_id\": 2299, \"topic_id\": 498, \"forum_id\": 14, \"post_subject\": \"Re: dafilesrv on windows\", \"username\": \"sbagaria\", \"post_text\": \"Thanks Richard.\\n\\nSee here - https://github.com/hpcc-systems/HPCC-Pl ... ssues/3300\", \"post_time\": \"2012-09-06 11:10:26\" },\n\t{ \"post_id\": 2298, \"topic_id\": 498, \"forum_id\": 14, \"post_subject\": \"Re: dafilesrv on windows\", \"username\": \"rtaylor\", \"post_text\": \"Sid,\\n\\nOpen an issue on GitHub.\\n\\nRichard\", \"post_time\": \"2012-09-06 10:59:15\" },\n\t{ \"post_id\": 2296, \"topic_id\": 498, \"forum_id\": 14, \"post_subject\": \"Re: dafilesrv on windows\", \"username\": \"sbagaria\", \"post_text\": \"I give up.\", \"post_time\": \"2012-09-06 08:00:10\" },\n\t{ \"post_id\": 2291, \"topic_id\": 498, \"forum_id\": 14, \"post_subject\": \"Re: dafilesrv on windows\", \"username\": \"sbagaria\", \"post_text\": \"There is no direct way of compiling only the dafilesrv executable. I am going to try compiling everything on Windows. This is going to be painful... But I have started; let's see.\", \"post_time\": \"2012-09-05 12:01:21\" },\n\t{ \"post_id\": 2265, \"topic_id\": 498, \"forum_id\": 14, \"post_subject\": \"Re: dafilesrv on windows\", \"username\": \"rtaylor\", \"post_text\": \"Sid,\\n\\nI presume so. You can try compiling the dafileserv source code from GitHub on Windows and see how well that works.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-08-29 17:48:27\" },\n\t{ \"post_id\": 2262, \"topic_id\": 498, \"forum_id\": 14, \"post_subject\": \"Re: dafilesrv on windows\", \"username\": \"sbagaria\", \"post_text\": \"This looks exactly like viewtopic.php?f=14&t=202\\n\\nIs the situation still the same?\", \"post_time\": \"2012-08-28 21:09:15\" },\n\t{ \"post_id\": 2255, \"topic_id\": 498, \"forum_id\": 14, \"post_subject\": \"dafilesrv on windows\", \"username\": \"sbagaria\", \"post_text\": \"I am trying to write batch scripts for my Windows users which will spray certain files for them from their Windows machine. Right now, I can scp the files to the cluster's local landing zone and spray from there using dfuplus.\\n\\nHowever, it will be nice to have a dafilesrv executable for Windows which will allow my Windows users to seamlessly spray and despray files to their local machines using dfuplus. This will be a much appreciated utility and simplify the process for early adapters using large files.\\n\\nThanks.\\n\\nSid\", \"post_time\": \"2012-08-28 04:54:09\" },\n\t{ \"post_id\": 2267, \"topic_id\": 500, \"forum_id\": 14, \"post_subject\": \"Re: ECL Startup scripts?\", \"username\": \"DSC\", \"post_text\": \"Ah, I did not know it would survive a restart. Thanks!\\n\\nDan\", \"post_time\": \"2012-08-29 20:12:45\" },\n\t{ \"post_id\": 2266, \"topic_id\": 500, \"forum_id\": 14, \"post_subject\": \"Re: ECL Startup scripts?\", \"username\": \"JimD\", \"post_text\": \"Dan,\\n\\nIf you submit a job to workflow services using :WHEN() it is "scheduled." The ECL Scheduler waits for the nominated event and then executes the code.\\n\\nSee WHEN in the Language Reference. \\n\\nThis scheduled job should remain in the ECL Scheduler's queue through a restart. \\n\\nHTH,\\n\\nJim\", \"post_time\": \"2012-08-29 19:48:11\" },\n\t{ \"post_id\": 2263, \"topic_id\": 500, \"forum_id\": 14, \"post_subject\": \"ECL Startup scripts?\", \"username\": \"DSC\", \"post_text\": \"Does HPCC support the concept of ECL startup scripts or jobs?\\n\\nThe scenario I'm looking at is continual monitoring of a landing zone for incoming files, to be processed by Thor, and I want the job doing the monitoring to automatically restart if the cluster is restarted.\\n\\nI could see how this might be accomplished with shell scripts, but I would be concerned about the timing (making the sure the entire cluster is entirely, successfully, ready before scheduling a job).\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-08-29 13:12:46\" },\n\t{ \"post_id\": 5424, \"topic_id\": 513, \"forum_id\": 14, \"post_subject\": \"Re: Intermittent esp crash on Roxie query\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nAttached the ESP Log File which shows the error.\\n\\n\\nThanks,\\nksviswa\", \"post_time\": \"2014-03-25 13:49:44\" },\n\t{ \"post_id\": 5421, \"topic_id\": 513, \"forum_id\": 14, \"post_subject\": \"Re: Intermittent esp crash on Roxie query\", \"username\": \"richardkchapman\", \"post_text\": \"The issue that Dan reported was fixed in 3.10:\\n\\nhttps://track.hpccsystems.com/browse/HPCC-3324\\n\\nSounds like your issue must be different (though with similar symptoms) if you are seeing it in 4.0\\nCan you search in the esp log files for any information about the fault (likely to start with lines looking something like this:\\n\\n[code]\\n0000006F 2012-09-07 21:56:06 14020 14057 "================================================"\\n00000070 2012-09-07 21:56:06 14020 14057 "Signal: 11 Segmentation fault"\\n{/code]\\n\\nThen paste the traceback information that follows it\\n\\nThanks\\n\\nRichard\", \"post_time\": \"2014-03-25 10:03:20\" },\n\t{ \"post_id\": 5420, \"topic_id\": 513, \"forum_id\": 14, \"post_subject\": \"Re: Intermittent esp crash on Roxie query\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nI encounter the same error. \\nWhen there are many concurrent access to ESP on roxie query, the ESP crashes. Is this issue resolved ..? \\n\\nHPCC Version : community_4.0.2-2\\n\\nError :\\n\\n\\n00000356 2014-03-24 08:34:23.842 31142 31143 "KERN_INFO: [601274.387544] esp[25922]: segfault at 0 ip 00007f83f19b46fe sp 00007f83c67fba90 error 4 in libws_ecl.so[7f83f1997000+39000]"\\n
\\n\\nWhat parameter do we need to change in environment.xml for a temporary workaround..?\\n\\nKindly suggest.\\n\\nI have attached the log files too ( DAFILESRV and ROXIE logs )\", \"post_time\": \"2014-03-25 06:37:26\" },\n\t{ \"post_id\": 2323, \"topic_id\": 513, \"forum_id\": 14, \"post_subject\": \"Re: Intermittent esp crash on Roxie query\", \"username\": \"richardkchapman\", \"post_text\": \"Certainly looks like something in the ESP code for loading info from the workunit is not fully threadsafe.\\n\\nI have opened a github issue at \\n\\nhttps://github.com/hpcc-systems/HPCC-Pl ... ssues/3324\", \"post_time\": \"2012-09-11 13:38:15\" },\n\t{ \"post_id\": 2313, \"topic_id\": 513, \"forum_id\": 14, \"post_subject\": \"Re: Intermittent esp crash on Roxie query\", \"username\": \"DSC\", \"post_text\": \"It appears that modifying the configuration so that ESP spawns at most only one concurrent thread (unlimited concurrent threads is the default) is an effective workaround. At least, no segfaults appear.\\n\\nDan\", \"post_time\": \"2012-09-10 02:28:01\" },\n\t{ \"post_id\": 2311, \"topic_id\": 513, \"forum_id\": 14, \"post_subject\": \"Re: Intermitten esp crash on Roxie query\", \"username\": \"DSC\", \"post_text\": \"Forgot to mention that the stack trace I provided earlier isn't the only one. Here is a fragment of the other one, and I've probably seen this one more often:\\n\\n0000012D 2012-09-07 22:26:13 24942 25099 " /opt/HPCCSystems/lib/libjlib.so(_Z16PrintStackReportv+0x26) [0x2af2c7fbee06]"\\n0000012E 2012-09-07 22:26:13 24942 25099 " /opt/HPCCSystems/lib/libjlib.so(_Z13excsighandleriP7siginfoPv+0x295) [0x2af2c7fbfe55]"\\n0000012F 2012-09-07 22:26:13 24942 25099 " /lib64/libpthread.so.0 [0x2af2c98e3b70]"\\n00000130 2012-09-07 22:26:13 24942 25099 " /opt/HPCCSystems/lib/libjlib.so(_ZN9InitTable4exitEPv+0x6f) [0x2af2c7fdeacf]"\\n00000131 2012-09-07 22:26:13 24942 25099 " /opt/HPCCSystems/lib/libjlib.so(_Z16FreeSharedObjectPv+0x9) [0x2af2c8057119]"\\n00000132 2012-09-07 22:26:13 24942 25099 " /opt/HPCCSystems/lib/libjlib.so(_ZN12SharedObject6unloadEv+0x25) [0x2af2c8057155]"\\n00000133 2012-09-07 22:26:13 24942 25099 " /opt/HPCCSystems/lib/libdllserver.so(_ZN9HelperDllD0Ev+0x5a) [0x2aaaabaaaa7a]"\\n00000134 2012-09-07 22:26:13 24942 25099 " esp(_ZNK10CInterface7ReleaseEv+0x3c) [0x41050c]"\\n00000135 2012-09-07 22:26:13 24942 25099 " /opt/HPCCSystems/lib/libdllserver.so(_ZNK9HelperDll7ReleaseEv+0x9) [0x2aaaabaaafd9]"\\n00000136 2012-09-07 22:26:13 24942 25099 " /opt/HPCCSystems/lib/libwuwebview.so(_ZN9WuWebViewD0Ev+0xb4) [0x2aaab188c874]"\\n00000137 2012-09-07 22:26:13 24942 25099 " esp(_ZNK10CInterface7ReleaseEv+0x3c) [0x41050c]"\\n00000138 2012-09-07 22:26:13 24942 25111 "TxSummary[activeReqs=17;user=@172.16.1.245;total=2159ms;]"\\n00000139 2012-09-07 22:26:13 24942 25099 " /opt/HPCCSystems/lib/libwuwebview.so(_ZNK9WuWebView7ReleaseEv+0x9) [0x2aaab188aef9]"\\n0000013A 2012-09-07 22:26:13 24942 25099 " /opt/HPCCSystems/lib/libws_ecl.so(_ZN13CWsEclBinding14handleHttpPostEP12CHttpRequestP13CHttpResponse+0x577) [0x2aaab2626f47]"\\n0000013B 2012-09-07 22:26:13 24942 25099 " /opt/HPCCSystems/lib/libesphttp.so(_ZN14CEspHttpServer14processRequestEv+0x42a) [0x2af2c879fe2a]"\\n0000013C 2012-09-07 22:26:13 24942 25099 " /opt/HPCCSystems/lib/libesphttp.so(_ZN11CHttpThread9onRequestEv+0x19b) [0x2af2c879ad7b]"\\n0000013D 2012-09-07 22:26:13 24942 25099 " /opt/HPCCSystems/lib/libesphttp.so(_ZN18CEspProtocolThread3runEv+0x1a) [0x2af2c87ca0ba]"\\n0000013E 2012-09-07 22:26:13 24942 25099 " /opt/HPCCSystems/lib/libjlib.so(_ZN6Thread5beginEv+0x37) [0x2af2c8049277]"\\n0000013F 2012-09-07 22:26:13 24942 25099 " /opt/HPCCSystems/lib/libjlib.so(_ZN6Thread11_threadmainEPv+0x1f) [0x2af2c8049def]"
\\n\\nDan\", \"post_time\": \"2012-09-08 03:28:25\" },\n\t{ \"post_id\": 2310, \"topic_id\": 513, \"forum_id\": 14, \"post_subject\": \"Intermittent esp crash on Roxie query\", \"username\": \"DSC\", \"post_text\": \"I have an intermittent (30% of the time) esp crash when submitting Roxie queries. Here is the crash log:\\n\\n00000037 2012-09-07 21:56:04 14020 14057 "buffer_key=1"\\n00000038 2012-09-07 21:56:04 14020 14057 "HTTP First Line: POST /WsEcl/json/query/myroxie/search_top_companies_by_revenue_given_sic4_and_country HTTP/1.1"\\n00000039 2012-09-07 21:56:04 14020 14057 "POST /WsEcl/json/query/myroxie/search_top_companies_by_revenue_given_sic4_and_country, from unknown@172.16.1.245"\\n0000003A 2012-09-07 21:56:04 14020 14058 "HTTP First Line: POST /WsEcl/json/query/myroxie/search_top_cities_by_revenue_given_sic4_and_country HTTP/1.1"\\n0000003B 2012-09-07 21:56:04 14020 14059 "HTTP First Line: POST /WsEcl/json/query/myroxie/search_top_cities_by_revenue_given_sic4_and_country HTTP/1.1"\\n0000003C 2012-09-07 21:56:04 14020 14058 "POST /WsEcl/json/query/myroxie/search_top_cities_by_revenue_given_sic4_and_country, from unknown@172.16.1.245"\\n0000003D 2012-09-07 21:56:04 14020 14059 "POST /WsEcl/json/query/myroxie/search_top_cities_by_revenue_given_sic4_and_country, from unknown@172.16.1.245"\\n0000003E 2012-09-07 21:56:04 14020 14060 "HTTP First Line: POST /WsEcl/json/query/myroxie/search_top_companies_by_revenue_given_sic4_and_country HTTP/1.1"\\n00000041 2012-09-07 21:56:04 14020 14063 "HTTP First Line: POST /WsEcl/json/query/myroxie/search_top_companies_by_revenue_given_sic4_and_country HTTP/1.1"\\n00000042 2012-09-07 21:56:04 14020 14063 "POST /WsEcl/json/query/myroxie/search_top_companies_by_revenue_given_sic4_and_country, from unknown@172.16.1.245"\\n00000043 2012-09-07 21:56:04 14020 14060 "POST /WsEcl/json/query/myroxie/search_top_companies_by_revenue_given_sic4_and_country, from unknown@172.16.1.245"\\n0000003F 2012-09-07 21:56:04 14020 14061 "HTTP First Line: POST /WsEcl/json/query/myroxie/search_top_companies_by_revenue_given_sic4_and_country HTTP/1.1"\\n00000044 2012-09-07 21:56:04 14020 14061 "POST /WsEcl/json/query/myroxie/search_top_companies_by_revenue_given_sic4_and_country, from unknown@172.16.1.245"\\n00000045 2012-09-07 21:56:04 14020 14064 "HTTP First Line: POST /WsEcl/json/query/myroxie/search_top_companies_by_revenue_given_sic4_and_country HTTP/1.1"\\n00000046 2012-09-07 21:56:04 14020 14064 "POST /WsEcl/json/query/myroxie/search_top_companies_by_revenue_given_sic4_and_country, from unknown@172.16.1.245"\\n00000040 2012-09-07 21:56:04 14020 14062 "HTTP First Line: POST /WsEcl/json/query/myroxie/search_top_cities_by_revenue_given_sic4_and_country HTTP/1.1"\\n00000047 2012-09-07 21:56:04 14020 14062 "POST /WsEcl/json/query/myroxie/search_top_cities_by_revenue_given_sic4_and_country, from unknown@172.16.1.245"\\n00000048 2012-09-07 21:56:04 14020 14065 "HTTP First Line: POST /WsEcl/json/query/myroxie/search_top_companies_by_revenue_given_sic4_and_country HTTP/1.1"\\n00000049 2012-09-07 21:56:04 14020 14065 "POST /WsEcl/json/query/myroxie/search_top_companies_by_revenue_given_sic4_and_country, from unknown@172.16.1.245"\\n0000004A 2012-09-07 21:56:04 14020 14066 "HTTP First Line: POST /WsEcl/json/query/myroxie/search_top_cities_by_revenue_given_sic4_and_country HTTP/1.1"\\n0000004B 2012-09-07 21:56:04 14020 14066 "POST /WsEcl/json/query/myroxie/search_top_cities_by_revenue_given_sic4_and_country, from unknown@172.16.1.245"\\n0000004C 2012-09-07 21:56:04 14020 14067 "HTTP First Line: POST /WsEcl/json/query/myroxie/search_top_companies_by_revenue_given_sic4_and_country HTTP/1.1"\\n0000004D 2012-09-07 21:56:04 14020 14067 "POST /WsEcl/json/query/myroxie/search_top_companies_by_revenue_given_sic4_and_country, from unknown@172.16.1.245"\\n0000004E 2012-09-07 21:56:04 14020 14068 "HTTP First Line: POST /WsEcl/json/query/myroxie/search_top_cities_by_revenue_given_sic4_and_country HTTP/1.1"\\n0000004F 2012-09-07 21:56:04 14020 14068 "POST /WsEcl/json/query/myroxie/search_top_cities_by_revenue_given_sic4_and_country, from unknown@172.16.1.245"\\n00000050 2012-09-07 21:56:04 14020 14069 "HTTP First Line: POST /WsEcl/json/query/myroxie/search_top_companies_by_revenue_given_sic4_and_country HTTP/1.1"\\n00000051 2012-09-07 21:56:04 14020 14069 "POST /WsEcl/json/query/myroxie/search_top_companies_by_revenue_given_sic4_and_country, from unknown@172.16.1.245"\\n00000052 2012-09-07 21:56:04 14020 14070 "HTTP First Line: POST /WsEcl/json/query/myroxie/search_top_cities_by_revenue_given_sic4_and_country HTTP/1.1"\\n00000053 2012-09-07 21:56:04 14020 14070 "POST /WsEcl/json/query/myroxie/search_top_cities_by_revenue_given_sic4_and_country, from unknown@172.16.1.245"\\n00000054 2012-09-07 21:56:04 14020 14071 "HTTP First Line: POST /WsEcl/json/query/myroxie/search_top_companies_by_revenue_given_sic4_and_country HTTP/1.1"\\n00000055 2012-09-07 21:56:04 14020 14071 "POST /WsEcl/json/query/myroxie/search_top_companies_by_revenue_given_sic4_and_country, from unknown@172.16.1.245"\\n00000056 2012-09-07 21:56:04 14020 14072 "HTTP First Line: POST /WsEcl/json/query/myroxie/search_top_cities_by_revenue_given_sic4_and_country HTTP/1.1"\\n00000057 2012-09-07 21:56:04 14020 14072 "POST /WsEcl/json/query/myroxie/search_top_cities_by_revenue_given_sic4_and_country, from unknown@172.16.1.245"\\n00000058 2012-09-07 21:56:04 14020 14073 "HTTP First Line: POST /WsEcl/json/query/myroxie/search_top_cities_by_revenue_given_sic4_and_country HTTP/1.1"\\n00000059 2012-09-07 21:56:04 14020 14073 "POST /WsEcl/json/query/myroxie/search_top_cities_by_revenue_given_sic4_and_country, from unknown@172.16.1.245"\\n0000005A 2012-09-07 21:56:04 14020 14074 "HTTP First Line: POST /WsEcl/json/query/myroxie/search_top_cities_by_revenue_given_sic4_and_country HTTP/1.1"\\n0000005B 2012-09-07 21:56:04 14020 14074 "POST /WsEcl/json/query/myroxie/search_top_cities_by_revenue_given_sic4_and_country, from unknown@172.16.1.245"\\n0000005C 2012-09-07 21:56:04 14020 14075 "HTTP First Line: POST /WsEcl/json/query/myroxie/search_top_companies_by_revenue_given_sic4_and_country HTTP/1.1"\\n0000005D 2012-09-07 21:56:04 14020 14075 "POST /WsEcl/json/query/myroxie/search_top_companies_by_revenue_given_sic4_and_country, from unknown@172.16.1.245"\\n0000005E 2012-09-07 21:56:04 14020 14076 "HTTP First Line: POST /WsEcl/json/query/myroxie/search_top_cities_by_revenue_given_sic4_and_country HTTP/1.1"\\n0000005F 2012-09-07 21:56:04 14020 14076 "POST /WsEcl/json/query/myroxie/search_top_cities_by_revenue_given_sic4_and_country, from unknown@172.16.1.245"\\n00000060 2012-09-07 21:56:04 14020 14057 "soap from json req: <?xml version="1.0" encoding="UTF-8"?><soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/" xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"> <soap:Body><search_top_companies_by_revenue_given_sic4_and_countryRequest><us_sic4>2911</us_sic4><country_id>76</country_id></search_top_companies_by_revenue_given_sic4_and_countryRequest></soap:Body></soap:Envelope>"\\n00000061 2012-09-07 21:56:05 14020 14070 "WARNING: Excessive concurrent Dali SDS client transactions. Transaction delayed."\\n00000062 2012-09-07 21:56:05 14020 14071 "WARNING: Excessive concurrent Dali SDS client transactions. Transaction delayed."\\n00000063 2012-09-07 21:56:05 14020 14072 "WARNING: Excessive concurrent Dali SDS client transactions. Transaction delayed."\\n00000064 2012-09-07 21:56:05 14020 14073 "WARNING: Excessive concurrent Dali SDS client transactions. Transaction delayed."\\n00000065 2012-09-07 21:56:05 14020 14074 "WARNING: Excessive concurrent Dali SDS client transactions. Transaction delayed."\\n00000066 2012-09-07 21:56:05 14020 14075 "WARNING: Excessive concurrent Dali SDS client transactions. Transaction delayed."\\n00000067 2012-09-07 21:56:05 14020 14059 "WARNING: Excessive concurrent Dali SDS client transactions. Transaction delayed."\\n00000068 2012-09-07 21:56:05 14020 14076 "WARNING: Excessive concurrent Dali SDS client transactions. Transaction delayed."\\n00000069 2012-09-07 21:56:05 14020 14058 "WARNING: Excessive concurrent Dali SDS client transactions. Transaction delayed."\\n0000006A 2012-09-07 21:56:05 14020 14058 "soap from json req: <?xml version="1.0" encoding="UTF-8"?><soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/" xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"> <soap:Body><search_top_cities_by_revenue_given_sic4_and_countryRequest><us_sic4>2911</us_sic4><country_id>76</country_id></search_top_cities_by_revenue_given_sic4_and_countryRequest></soap:Body></soap:Envelope>"\\n0000006B 2012-09-07 21:56:05 14020 14057 "WARNING: Excessive concurrent Dali SDS client transactions. Transaction delayed."\\n0000006C 2012-09-07 21:56:06 14020 14072 "WARNING: Excessive concurrent Dali SDS client transactions. Transaction delayed."\\n0000006D 2012-09-07 21:56:06 14020 14059 "soap from json req: <?xml version="1.0" encoding="UTF-8"?><soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/" xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"> <soap:Body><search_top_cities_by_revenue_given_sic4_and_countryRequest><us_sic4>2911</us_sic4><country_id>76</country_id></search_top_cities_by_revenue_given_sic4_and_countryRequest></soap:Body></soap:Envelope>"\\n0000006E 2012-09-07 21:56:06 14020 14072 "soap from json req: <?xml version="1.0" encoding="UTF-8"?><soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/" xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"> <soap:Body><search_top_cities_by_revenue_given_sic4_and_countryRequest><us_sic4>2911</us_sic4><country_id>76</country_id></search_top_cities_by_revenue_given_sic4_and_countryRequest></soap:Body></soap:Envelope>"\\n0000006F 2012-09-07 21:56:06 14020 14057 "================================================"\\n00000070 2012-09-07 21:56:06 14020 14057 "Signal: 11 Segmentation fault"\\n00000071 2012-09-07 21:56:06 14020 14057 "Fault IP: 0000003D4B079B60"\\n00000072 2012-09-07 21:56:06 14020 14057 "Accessing: 0000000000000000"\\n00000073 2012-09-07 21:56:06 14020 14057 "Registers:"\\n00000074 2012-09-07 21:56:06 14020 14057 "EAX:6569786F00323233 EBX:0000000000000001 ECX:0000000000000005 EDX:0000000000000000 ESI:00002AAAB4007D55 EDI:6569786F00323233"\\n00000075 2012-09-07 21:56:06 14020 14057 "CS:EIP:0033:0000003D4B079B60"\\n00000076 2012-09-07 21:56:06 14020 14057 " ESP:0000000054267888 EBP:6569786F00323233"\\n00000077 2012-09-07 21:56:06 14020 14057 "Stack[0000000054267888]: 00002AAAABAA8824 0000000000002AAA 0000000000000000 FFFF000000000000 83E4D20AFFFF0000 0000000083E4D20A 0000000000000000 B40025E000000000"\\n00000078 2012-09-07 21:56:06 14020 14057 "Stack[00000000542678A8]: 00002AAAB40025E0 B400829000002AAA 00002AAAB4008290 B4007D2000002AAA 00002AAAB4007D20 0000000000002AAA 0000000000000000 FFFF000000000000"\\n00000079 2012-09-07 21:56:06 14020 14057 "Stack[00000000542678C8]: 83E4D20AFFFF0000 5426000083E4D20A 0000000054260000 B4007C6800000000 00002AAAB4007C68 B40039F000002AAA 00002AAAB40039F0 0000003900002AAA"\\n0000007A 2012-09-07 21:56:06 14020 14057 "Stack[00000000542678E8]: 0000004000000039 0000000100000040 0000000000000001 0000000200000000 0000000000000002 B400253000000000 00002AAAB4002530 ABAA43D600002AAA"\\n0000007B 2012-09-07 21:56:06 14020 14057 "Stack[0000000054267908]: 00002AAAABAA43D6 0000000100002AAA 0000000000000001 0000000100000000 0000000000000001 54267A7000000000 0000000054267A70 4B0325BE00000000"\\n0000007C 2012-09-07 21:56:06 14020 14057 "Stack[0000000054267928]: 0000003D4B0325BE B40073F00000003D 00002AAAB40073F0 F95A010100002AAA 00002AD2F95A0101 B400449000002AD2 00002AAAB4004490 B4007C6000002AAA"\\n0000007D 2012-09-07 21:56:06 14020 14057 "Stack[0000000054267948]: 00002AAAB4007C60 B4007C6000002AAA 00002AAAB4007C60 0000000800002AAA 0000000000000008 ABAA43B000000000 00002AAAABAA43B0 0000000200002AAA"\\n0000007E 2012-09-07 21:56:06 14020 14057 "Stack[0000000054267968]: 0000000000000002 0000000400000000 0000000000000004 54267A7000000000 0000000054267A70 0000000200000000 0000000000000002 B4007C7000000000"\\n0000007F 2012-09-07 21:56:06 14020 14057 "Backtrace:"\\n00000080 2012-09-07 21:56:06 14020 14067 "soap from json req: <?xml version="1.0" encoding="UTF-8"?><soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/" xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"> <soap:Body><search_top_companies_by_revenue_given_sic4_and_countryRequest><us_sic4>2911</us_sic4><country_id>76</country_id></search_top_companies_by_revenue_given_sic4_and_countryRequest></soap:Body></soap:Envelope>"\\n00000081 2012-09-07 21:56:06 14020 14057 " /opt/HPCCSystems/lib/libjlib.so(_Z16PrintStackReportv+0x26) [0x2ad2f954de06]"\\n00000082 2012-09-07 21:56:06 14020 14057 " /opt/HPCCSystems/lib/libjlib.so(_Z13excsighandleriP7siginfoPv+0x295) [0x2ad2f954ee55]"\\n00000083 2012-09-07 21:56:06 14020 14057 " /lib64/libpthread.so.0 [0x2ad2fae72b70]"\\n00000084 2012-09-07 21:56:06 14020 14057 " /lib64/libc.so.6(strlen+0x10) [0x3d4b079b60]"\\n00000085 2012-09-07 21:56:06 14020 14057 " /opt/HPCCSystems/lib/libdllserver.so(_ZN11DllLocation13queryLocationEv+0xa4) [0x2aaaabaa8824]"\\n00000086 2012-09-07 21:56:06 14020 14057 " /opt/HPCCSystems/lib/libdllserver.so(_Z14orderLocationsPP10IInterfaceS1_+0x26) [0x2aaaabaa43d6]"\\n00000087 2012-09-07 21:56:06 14020 14057 " /lib64/libc.so.6 [0x3d4b0325be]"\\n00000088 2012-09-07 21:56:06 14020 14057 " /lib64/libc.so.6 [0x3d4b03246d]"\\n00000089 2012-09-07 21:56:06 14020 14057 " /lib64/libc.so.6(qsort+0x291) [0x3d4b0329f1]"\\n0000008A 2012-09-07 21:56:06 14020 14057 " /opt/HPCCSystems/lib/libdllserver.so(_ZN8DllEntry15getBestLocationEv+0x7f) [0x2aaaabaa608f]"\\n0000008B 2012-09-07 21:56:06 14020 14057 " /opt/HPCCSystems/lib/libdllserver.so(_ZN9DllServer12getBestMatchEPKc+0x1f) [0x2aaaabaa537f]"\\n0000008C 2012-09-07 21:56:06 14020 14057 " /opt/HPCCSystems/lib/libdllserver.so(_ZN9DllServer14getBestMatchExEPKc+0x16) [0x2aaaabaa5476]"\\n0000008D 2012-09-07 21:56:06 14020 14057 " /opt/HPCCSystems/lib/libdllserver.so(_ZN9DllServer7loadDllEPKc15DllLocationType+0x2c) [0x2aaaabaa5d9c]"\\n0000008E 2012-09-07 21:56:06 14020 14057 " /opt/HPCCSystems/lib/libwuwebview.so(_ZN9WuWebView7loadDllEb+0x85) [0x2aaab1888115]"\\n0000008F 2012-09-07 21:56:06 14020 14057 " /opt/HPCCSystems/lib/libwuwebview.so(_ZN9WuWebView13expandResultsEPKcR12StringBufferj+0x56) [0x2aaab1889a56]"\\n00000090 2012-09-07 21:56:06 14020 14057 " /opt/HPCCSystems/lib/libws_ecl.so(_ZN13CWsEclBinding14handleHttpPostEP12CHttpRequestP13CHttpResponse+0x564) [0x2aaab2625f34]"\\n00000091 2012-09-07 21:56:06 14020 14057 " /opt/HPCCSystems/lib/libesphttp.so(_ZN14CEspHttpServer14processRequestEv+0x42a) [0x2ad2f9d2ee2a]"\\n00000092 2012-09-07 21:56:06 14020 14057 " /opt/HPCCSystems/lib/libesphttp.so(_ZN11CHttpThread9onRequestEv+0x19b) [0x2ad2f9d29d7b]"\\n00000093 2012-09-07 21:56:06 14020 14057 " /opt/HPCCSystems/lib/libesphttp.so(_ZN18CEspProtocolThread3runEv+0x1a) [0x2ad2f9d590ba]"\\n00000094 2012-09-07 21:56:06 14020 14057 " /opt/HPCCSystems/lib/libjlib.so(_ZN6Thread5beginEv+0x37) [0x2ad2f95d8277]"\\n00000095 2012-09-07 21:56:06 14020 14057 " /opt/HPCCSystems/lib/libjlib.so(_ZN6Thread11_threadmainEPv+0x1f) [0x2ad2f95d8def]"\\n00000096 2012-09-07 21:56:06 14020 14057 " /lib64/libpthread.so.0 [0x2ad2fae6a73d]"\\n00000097 2012-09-07 21:56:06 14020 14057 " /lib64/libc.so.6(clone+0x6d) [0x3d4b0d44bd]"\\n00000098 2012-09-07 21:56:06 14020 14057 "ThreadList:\\n4225F940 1109784896 14021: CMPNotifyClosedThread\\n44260940 1143343424 14022: MP Connection Thread\\n48262940 1210460480 14024: CSocketSelectThread\\n46261940 1176901952 14025: CMemoryUsageReporter\\n4A263940 1244019008 14026: unknown\\n4C264940 1277577536 14027: unknown\\n4E265940 1311136064 14029: unknown\\n50266940 1344694592 14030: unknown\\n52267940 1378253120 14031: CSocketSelectThread\\n54268940 1411811648 14057: CEspProtocolThread\\n56269940 1445370176 14058: CEspProtocolThread\\n5826A940 1478928704 14059: CEspProtocolThread\\n5A26B940 1512487232 14060: CEspProtocolThread\\n5C26C940 1546045760 14061: CEspProtocolThread\\n5E26D940 1579604288 14062: CEspProtocolThread\\n6026E940 1613162816 14063: CEspProtocolThread\\n6226F940 1646721344 14064: CEspProtocolThread\\n64270940 1680279872 14065: CEspProtocolThread\\n66271940 1713838400 14066: CEspProtocolThread\\n68272940 1747396928 14067: CEspProtocolThread\\n6A273940 1780955456 14068: CEspProtocolThread\\n6C274940 1814513984 14069: CEspProtocolThread\\n6E275940 1848072512 14070: CEspProtocolThread\\n70276940 1881631040 14071: CEspProtocolThread\\n72277940 1915189568 14072: CEspProtocolThread\\n74278940 1948748096 14073: CEspProtocolThread\\n76279940 1982306624 14074: CEspProtocolThread\\n7827A940 2015865152 14075: CEspProtocolThread\\n7A27B940 2049423680 14076: CEspProtocolThread\\n"
\\n\\nThis was taken during a stress test, where another system was executing a pair of json Roxie queries (via curl) 20 times in the background, which is to say simultaneously as bash would allow. Sometimes all queries succeed, other times a segfault occurs, and when esp segfaults it could do it on the first processed query or any of them.\\n\\nThis particular test was performed on a four-node cluster where the esp server sits on one server and the other three have Roxie servers. However, I've duplicated this problem with single-node and dual-node clusters with various configurations. In all configurations, main storage is actually a SAN. Version 3.8.2CE is used on all clusters.\\n\\nThe queries themselves are rather boring. They're doing a simple lookup into an index (two parameters, exact match) and a subsequent FETCH into a dataset that contains variable-length records, each containing embedded child datasets. The results are always 10 top-level records, totalling perhaps 200K at the maximum. The index and underlying datasets are in the gigabyte area.\\n\\nI've changed the number of threads (master and slave) for Roxie, which seems to help but not cure the problem. I've increased Roxie's cache, and that seems to help as well. When I say "help" I mean that the segfaults are less frequent; they do not disappear.\\n\\nAy information on how to address this issue would be greatly appreciated.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-09-08 03:09:00\" },\n\t{ \"post_id\": 2488, \"topic_id\": 541, \"forum_id\": 14, \"post_subject\": \"Re: Preventing Roxie from copying files\", \"username\": \"DSC\", \"post_text\": \"Excellent. Thanks!\", \"post_time\": \"2012-10-10 11:36:00\" },\n\t{ \"post_id\": 2487, \"topic_id\": 541, \"forum_id\": 14, \"post_subject\": \"Re: Preventing Roxie from copying files\", \"username\": \"clo\", \"post_text\": \"I have been able to reproduce your issue in 3.8.4-1. I've opened an issue in JIRA to track this. http://track.hpccsystems.com/browse/HPCC-8012\", \"post_time\": \"2012-10-10 11:34:49\" },\n\t{ \"post_id\": 2477, \"topic_id\": 541, \"forum_id\": 14, \"post_subject\": \"Re: Preventing Roxie from copying files\", \"username\": \"sort\", \"post_text\": \"We will need to investigate copyResources = false not prohibiting m_of_n file parts from being copied. It may need a jira issue\\n\\nuseTreeCopy is a parameter that should be false. It will not override the copyResources parameter. (we may eventually remove the parameter)\", \"post_time\": \"2012-10-08 19:59:18\" },\n\t{ \"post_id\": 2476, \"topic_id\": 541, \"forum_id\": 14, \"post_subject\": \"Re: Preventing Roxie from copying files\", \"username\": \"DSC\", \"post_text\": \"Thanks for the info, Jim.\\n\\nI have set my options as copyResources=FALSE and useRemoteResources=TRUE but I'm still seeing files copied to the roxie directory (/var/lib/HPCCSystems/hpcc-data/roxie/) on my drives. Perhaps I'm misunderstanding how this works. (Hence, these questions.)\\n\\nBecause I'm working with limited disk space, my thinking was that these settings would prevent any copying to the roxie directory and that roxie would reach back to the thor data directory for everything. That's the desired behavior, anyway. Instead, I'm seeing copying-as-usual where files are copied in the background after publishing a query and chewing into my disk space.\\n\\nI'm using 3.8.4CE, and I've verified (via diff and md5sum) that the current configuration does contain those two settings set to the values described above, and that all nodes have exactly the same configuration.\\n\\nWild thought: Does the 'useTreeCopy' setting affect this? I noticed that I set it to TRUE at some point as a test and never turned it back. Could it override the other settings?\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-10-08 19:07:28\" },\n\t{ \"post_id\": 2475, \"topic_id\": 541, \"forum_id\": 14, \"post_subject\": \"Re: Preventing Roxie from copying files\", \"username\": \"JimD\", \"post_text\": \"Good question. \\nThe following information will be in a future version of the Roxie Reference Manual: \\n\\nThere are two settings in the Roxie configuration that control where Roxie looks for data and index files:\\ncopyResources\\t\\nCopies necessary data and key files from the current location when the query is published. \\nuseRemoteResources\\t\\nInstructs Roxie to look for data and key files in the current location after the query is published.\\n\\nThese options may appear to be mutually exclusive, but the chart below shows what each of the four possible combination means. \\n\\ncopyResources = T \\nuseRemoteResources=T \\nDirects the Roxie cluster to use the remote instance of the data until it can copy the data locally. This allows a query to be available immediately while the data is copied.\\n\\ncopyResources = T \\nuseRemoteResources=F\\nDirects the Roxie cluster to copy the data locally. The query cannot be executed until the data is copied. This ensures optimum performance after the data is copied.\\n\\ncopyResources = F\\nuseRemoteResources=T\\nDirects the Roxie cluster to load the data from a remote location. The query can be executed immediately, but performance is limited by network bandwidth. This allows queries to run without using any Roxie disk space, but reduces its throughput capabilities.\\n\\ncopyResources = F\\nuseRemoteResources=F\\nWill use data and indexes already loaded (placed on the Roxie cluster using DFU ) but will not copy or read remote data.\", \"post_time\": \"2012-10-08 18:52:45\" },\n\t{ \"post_id\": 2474, \"topic_id\": 541, \"forum_id\": 14, \"post_subject\": \"Preventing Roxie from copying files\", \"username\": \"DSC\", \"post_text\": \"According to some posts in the forum there is a way to prevent Roxie from copying any files, instead referencing them from their original Thor location. Is that actually possible? If so, what configuration settings are required? The advanced section of the configuration manager guide does not address any Roxie options at all.\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-10-08 14:11:19\" },\n\t{ \"post_id\": 2550, \"topic_id\": 556, \"forum_id\": 14, \"post_subject\": \"Re: Configuration Wizard is not appearing - Blank Page\", \"username\": \"Durai\", \"post_text\": \"Thanks Gleb. I will take the 3.10.x for now.\", \"post_time\": \"2012-10-19 04:28:34\" },\n\t{ \"post_id\": 2549, \"topic_id\": 556, \"forum_id\": 14, \"post_subject\": \"Re: Configuration Wizard is not appearing - Blank Page\", \"username\": \"Gleb Aronsky\", \"post_text\": \"Hi Durai,\\n\\nWe have been able to reproduce the problem, and a fix will be forthcoming to the Master Branch. In the interim, you can pull from the 3.10.x branch to do your builds. The 3.10.x branch is not affected by this issue. \\n\\nThe problem here is a packaging issue; some files have been omitted so the web page is not showing up. The missing files come up as errors in the browser developer console.\\n\\nI will post an update, once this issue has been resolved in the Master branch.\", \"post_time\": \"2012-10-18 15:53:24\" },\n\t{ \"post_id\": 2547, \"topic_id\": 556, \"forum_id\": 14, \"post_subject\": \"Re: Configuration Wizard is not appearing - Blank Page\", \"username\": \"Durai\", \"post_text\": \"Hi Gleb Aronsky, \\n\\nThanks for the response. \\n\\nThe configMgr is up and running and I see the HTML source in the browser. I used IE 9 and Chrome, none of them rendering the content. \\n\\nThanks\\nDurai\", \"post_time\": \"2012-10-18 14:23:17\" },\n\t{ \"post_id\": 2540, \"topic_id\": 556, \"forum_id\": 14, \"post_subject\": \"Re: Configuration Wizard is not appearing - Blank Page\", \"username\": \"Gleb Aronsky\", \"post_text\": \"Durai,\\n\\nMake sure that the ConfigMgr is actually running on the node you are trying to connect to (i.e. nodeip). If it is, are you seeing a dialog with 4 radio buttons, one of which is labeled ‘Generate new environment using wizard’? Also, what browser are you using?\", \"post_time\": \"2012-10-18 13:21:00\" },\n\t{ \"post_id\": 2534, \"topic_id\": 556, \"forum_id\": 14, \"post_subject\": \"Configuration Wizard is not appearing - Blank Page\", \"username\": \"Durai\", \"post_text\": \"Hi,\\n\\nI took the latest from Github and built it for Ubuntu 12.04 on i686 configuration. \\n\\nThe build was successful and installation is done in individual node with proper SSH generations. However after initiating the configuration manager (runs successfully, as given in the installation guide)., when I try to get the configuration wizard (using http://nodeip:8015, the wizard is not appearing. \\n\\nNot sure what am I missing ? Can you please help! Thanks in advance.\", \"post_time\": \"2012-10-18 06:04:34\" },\n\t{ \"post_id\": 3009, \"topic_id\": 567, \"forum_id\": 14, \"post_subject\": \"Re: Configuring second Thor\", \"username\": \"jeeves\", \"post_text\": \"Chris,\\n\\nThanks for the information. I will try adding two thors to the same cluster as well.\\n\\nI am just evaluating various options available so that when we set up our own HPCC lab I will have enough information on hand.\\n\\nThanks,\\n-David\", \"post_time\": \"2012-12-14 05:18:34\" },\n\t{ \"post_id\": 3001, \"topic_id\": 567, \"forum_id\": 14, \"post_subject\": \"Re: Configuring second Thor\", \"username\": \"clo\", \"post_text\": \"Hi David,\\n\\nSetting the nodegroup to match will work. Additionally, it should be noted that with any change to the system, it's advised that dali is updated either by restarting the system or by running updtdalienv /etc/HPCCSystems/environment.xml
in order to update the dali about any changes to the environment.xml.\\n\\nAs for load balancing, Dan, in his original post, asked for two different clusters. If this had been two thors under the same cluster, then load balancing would happen automatically. However, with two separate clusters, load balancing will have to be managed manually by the user.\", \"post_time\": \"2012-12-13 12:47:08\" },\n\t{ \"post_id\": 3000, \"topic_id\": 567, \"forum_id\": 14, \"post_subject\": \"Re: Configuring second Thor\", \"username\": \"jeeves\", \"post_text\": \"Chris,\\n\\nI looked at the second thor topology tab and found that the slave was missing. That was a oversight on my part. \\n\\nBut even after I fixed that, the second thor process would not startup and I kept getting the "Named group 'mythor2' not found" error in the logs. \\n\\nSo I set the nodeGroup in the second thor to 'mythor'. This made the error go away and I found that I could execute two jobs at the same time by running one on thor1 and the other on thor2.\\n\\nHowever I am not sure whether changing the nodeGroup was the right way to go.\\n\\nAlso I am not sure how to get automatic load balancing between the two thor clusters.\\n\\nThanks,\\n-David\", \"post_time\": \"2012-12-13 10:26:36\" },\n\t{ \"post_id\": 2999, \"topic_id\": 567, \"forum_id\": 14, \"post_subject\": \"Re: Configuring second Thor\", \"username\": \"jeeves\", \"post_text\": \"Chris,\\n\\nI looked at the second thor topology tab and found that the slave was missing. That was a oversight on my part. \\n\\nBut even after I fixed that the second thor process would not startup and I kept getting the "Named group 'mythor2' not found" error in the logs. \\n\\nAfter that I set the nodeGroup in the second thor to 'mythor'. This made the error go away and I found that I could execute two jobs at the same time by running one one thor1 and the other on thor2 via ECL ide.\\n\\nHowever I am not sure whether changing the nodeGroup was the right way to go.\\n\\nAlso I am still not sure how to get automatic load balancing between the two thor nodes.\\n\\nThanks,\\n-David\", \"post_time\": \"2012-12-13 10:26:01\" },\n\t{ \"post_id\": 2971, \"topic_id\": 567, \"forum_id\": 14, \"post_subject\": \"Re: Configuring second Thor\", \"username\": \"clo\", \"post_text\": \"Hi jeeves, \\n\\nFirst, if you set the nodegroup to be thor, then you won't be accessing a different thor. Just to check that you have a slave node assigned, can you please navigate to your second thor, look at the topology tab for that cluster, and then look at that to see you have a master and a slave under that master. \\n\\nI'd like to try and recreate your issue on my local system to see what's going on. \\n\\nChris\", \"post_time\": \"2012-12-06 13:45:58\" },\n\t{ \"post_id\": 2959, \"topic_id\": 567, \"forum_id\": 14, \"post_subject\": \"Re: Configuring second Thor\", \"username\": \"jeeves\", \"post_text\": \"Both of my thor clusters havs slaves per node set to 1.\\n\\n I have verified that the configuration attributes other than master port and slave port are identical.\\n\\n If I change nodeGroup(which is empty by default) in my second thor cluster to mythor(the name of the first thor cluster - created by default) the second thor cluster manages to startup and the "Named group 'mythor2' not found" error goes away.\\n\\n But the second cluster is stil dysfunctional. If I try to send a job to it using eclplus I get a "Thor cluster can not have 0 slave processes" error.\", \"post_time\": \"2012-12-05 11:31:16\" },\n\t{ \"post_id\": 2944, \"topic_id\": 567, \"forum_id\": 14, \"post_subject\": \"Re: Configuring second Thor\", \"username\": \"DSC\", \"post_text\": \"How many slavesPerNode do you have assigned in mythor2? It should be at least one.\\n\\nOne thing I did was flip between the two thor's configuration attributes, verifying that everything was the same except for the two ports you already changed. You might want to try that.\", \"post_time\": \"2012-12-04 14:09:57\" },\n\t{ \"post_id\": 2942, \"topic_id\": 567, \"forum_id\": 14, \"post_subject\": \"Re: Configuring second Thor\", \"username\": \"jeeves\", \"post_text\": \"Dan,\\n\\nNow the environment file validates.\\nHowever when I startup HPCC the new thor cluster i created fails to start.\\n\\nStarting mydali.... [ OK ]\\nStarting mydfuserver.... [ OK ]\\nStarting myeclagent.... [ OK ]\\nStarting myeclagent2.... [ OK ]\\nStarting myeclccserver.... [ OK ]\\nStarting myeclccserver2.... [ OK ]\\nStarting myeclscheduler.... [ OK ]\\nStarting myeclscheduler2.... [ OK ]\\nStarting myesp.... [ OK ]\\nStarting mysasha.... [ OK ]\\nStarting mythor.... [ OK ]\\nStarting mythor2.... [FAILED]\\n
\\n\\nIn thormaster.2012.. log I see this \\n\\n\\n00000002 2012-12-05 00:33:13 15387 15387 Build community_3.8.6-4\\n00000003 2012-12-05 00:33:13 15387 15387 calling initClientProcess Port 13600\\n00000004 2012-12-05 00:33:13 15387 15387 Named group 'mythor2' not found\\n
\\nAfter that the log stops.\\n\\nIf I click on activity in ECL watch I get this\\nCode Message \\n5008 2012-12-04 19:11:00 GMT: CEnvironmentClusterInfo: Thor cluster can not have 0 slave processes \\n
\\n\\nWondering what went wrong..\", \"post_time\": \"2012-12-04 13:43:08\" },\n\t{ \"post_id\": 2940, \"topic_id\": 567, \"forum_id\": 14, \"post_subject\": \"Re: Configuring second Thor\", \"username\": \"DSC\", \"post_text\": \"My second thor's masterport is 13600 and the slaveport is 13610. The first thor has no values for those two attributes. That seemed to work, at least for my limited tests.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-12-04 12:47:15\" },\n\t{ \"post_id\": 2939, \"topic_id\": 567, \"forum_id\": 14, \"post_subject\": \"Re: Configuring second Thor\", \"username\": \"jeeves\", \"post_text\": \"I tried to create this setup using just one node. In other words two thor clusters sharing the same node.\\n\\nBut when trying to save I got this error.\\n\\nThere cannot be more than one ThorCluster ('mythor' and 'mythor2') with the same thor master '"node037129' and same thor master port '!\\n ] [5657: \\n]\\n
\\n\\nSo the question is how do I change the thor master port for the second cluster?\\nAnd will i have to make more changes? thor slave ports come to my mind.\", \"post_time\": \"2012-12-04 12:35:21\" },\n\t{ \"post_id\": 2754, \"topic_id\": 567, \"forum_id\": 14, \"post_subject\": \"Re: Configuring second Thor\", \"username\": \"DSC\", \"post_text\": \"For anyone reading this thread, I discovered that the two parameters in question are not scaled in bytes. They are scaled in pages instead, where a page is apparently 1048576 bytes. There is no mouseover help for those two parameters in configmgr, and the default entry is blank for both.\\n\\nMy nodes have only 6GB of RAM, so I used values of 2288 and 1716 for globalMemorySize and largeMemSize, respectively. It works, and seems to work better than leaving the default in place. There may be better values that could be used.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-11-09 14:55:58\" },\n\t{ \"post_id\": 2707, \"topic_id\": 567, \"forum_id\": 14, \"post_subject\": \"Re: Configuring second Thor\", \"username\": \"DSC\", \"post_text\": \"[quote="clo":3ntsucsl]One more gotcha: You might want to take into consideration the amount of memory available to handle both thors. There are two variables in globalMemorySize in the thor component's attribute tab: globalMemorySize and largeMemSize. \\n - You'll most likely want to use a formula as follows:\\n - Take the sum of all of the slaves per node you want to have, then global should account for 80% of the RAM. Take the value you get for global, and use 75% of that value for largemem.
\\n\\nI would like to make sure I understand this part correctly. If I'm running two Thor clusters with one slave per node each, and each node has 6GB of RAM, does that mean:\\n\\nglobalMemorySize = 4.8GB\\nlargeMemSize = 3.6GB\\n\\nOr am I misunderstanding? Also, how would this formula change if three Thor clusters were running instead?\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-11-06 18:29:00\" },\n\t{ \"post_id\": 2601, \"topic_id\": 567, \"forum_id\": 14, \"post_subject\": \"Re: Configuring second Thor\", \"username\": \"DSC\", \"post_text\": \"Excellent info, Chris. I'll give it a whirl and let you know if I run into problems.\\n\\nDan\", \"post_time\": \"2012-10-23 15:02:20\" },\n\t{ \"post_id\": 2599, \"topic_id\": 567, \"forum_id\": 14, \"post_subject\": \"Re: Configuring second Thor\", \"username\": \"clo\", \"post_text\": \"Hi,\\n\\nFirst of all, is this setup actually possible?
\\n - Yes, this is possible.\\n\\nSecond, is merely having the two Thors refer to the same Dali enough to make them "share a file system" so to speak?
\\n - Yes this is true.\\n\\nThird, what recommendations are there for distributing masters and slaves? Keep the masters together, or make sure they are separate?
\\n - The easiest way is to add it through configmgr. It should be fine to have the masters on the same node as well. \\n - You'll need to specify another Thor component with a different name.\\n - In the navigator pane the configmgr, locate the topology section. When you're there, you'll need to right-click on the Topology and add a cluster. You can use your existing Thor cluster as a template to build out your new Thor cluster. \\n - Remember that you'll need to add an eclagentprocess, eclccserverprocess, and an eclschedulerprocess. The only thing that needs to be different is the cluster name, prefix, and where it says ThorCluster, specify the new thor cluster that you created earlier.
\\n\\n - One more gotcha: You might want to take into consideration the amount of memory available to handle both thors. There are two variables in globalMemorySize in the thor component's attribute tab: globalMemorySize and largeMemSize. \\n - You'll most likely want to use a formula as follows:\\n - Take the sum of all of the slaves per node you want to have, then global should account for 80% of the RAM. Take the value you get for global, and use 75% of that value for largemem.
\", \"post_time\": \"2012-10-23 14:54:58\" },\n\t{ \"post_id\": 2572, \"topic_id\": 567, \"forum_id\": 14, \"post_subject\": \"Configuring second Thor\", \"username\": \"DSC\", \"post_text\": \"I would like to configure a second, overlapping Thor in my four-node cluster. The purpose of the test is to assess the impact of concurrent Thor processes. In my case, one Thor would be responsible for handling updates destined for the "big bag of data" while another Thor would be responsible for merging the always-current "big bag of data" with smaller, customer-supplied files and creating new logical files. I'm fully aware that there will be a performance penalty. This is primarily an exercise in making sure this setup is possible.\\n\\nFirst of all, is this setup actually possible?\\n\\nSecond, is merely having the two Thors refer to the same Dali enough to make them "share a file system" so to speak?\\n\\nThird, what recommendations are there for distributing masters and slaves? Keep the masters together, or make sure they are separate?\\n\\nLastly, are there any known gotchas to this kind of setup?\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-10-19 17:53:04\" },\n\t{ \"post_id\": 2610, \"topic_id\": 568, \"forum_id\": 14, \"post_subject\": \"Re: Authentication should be enabled\", \"username\": \"Durai\", \"post_text\": \"Thanks Richard. \\nI used hpccdemo user and it worked. I believe I dont need LDAP for me. \", \"post_time\": \"2012-10-24 15:01:40\" },\n\t{ \"post_id\": 2583, \"topic_id\": 568, \"forum_id\": 14, \"post_subject\": \"Re: Authentication should be enabled\", \"username\": \"rtaylor\", \"post_text\": \"Durai,\\n\\nIf your environment doesn't have an LDAP server then you can't enable authentication and you don't need to add users -- the system will simply be wide open. You can just log in using any user ID you want without a password. \\n\\nThat's the way I run my training clusters for ECL classes, so it's not a particular problem unless you have sensitive data. If you do need authentication, then you'll nee to add an LDAP server.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-10-21 19:36:19\" },\n\t{ \"post_id\": 2576, \"topic_id\": 568, \"forum_id\": 14, \"post_subject\": \"Authentication should be enabled\", \"username\": \"Durai\", \"post_text\": \"Hi,\\n\\nI have created a 3 node cluster (along with Roxie). \\n\\nNow I am able to see ESP site and when I click to "Users" options to add/manager users.\\n\\nI get the following message "In order to use this feature, authentication should be enabled".\\n\\nCan you please help on this ? This is a small cluster at home, I dont have LDAP server. \\n\\nThanks\\nDurai\", \"post_time\": \"2012-10-20 05:52:36\" },\n\t{ \"post_id\": 2661, \"topic_id\": 582, \"forum_id\": 14, \"post_subject\": \"Re: 3.8.6-4 Changelog\", \"username\": \"DSC\", \"post_text\": \"Got it. Thanks for the clarification.\\n\\nDan\", \"post_time\": \"2012-10-30 19:47:47\" },\n\t{ \"post_id\": 2660, \"topic_id\": 582, \"forum_id\": 14, \"post_subject\": \"Re: 3.8.6-4 Changelog\", \"username\": \"clo\", \"post_text\": \"Hi Dan,\\n\\nThe changelog listed in the section titled 'Comprehensive List of changes from 3.8.4 to 3.8.6' is indeed the changelog. I can see how the 3.8.6-4 seems vastly different than 3.8.4-1. However, I fear the -4 and the -1 might be a bit misleading. It's the last number in the numbers 3.8.6 and 3.8.4 that should be focused on. 3.8.6 was intended to fix a very limited set of issues. I hope that clarifies things for you.\", \"post_time\": \"2012-10-30 19:32:17\" },\n\t{ \"post_id\": 2657, \"topic_id\": 582, \"forum_id\": 14, \"post_subject\": \"3.8.6-4 Changelog\", \"username\": \"DSC\", \"post_text\": \"Version 3.8.6-4 (CE) was recently posted for download. The associated change log (http://hpccsystems.com/download/free-community-edition-known-limitations) seems to be specific to that version. Given the vast version number differences between this one and the last publicly-available version (3.8.4-1) I would assume that more items were actually changed than those cited in that change log. Could we see the full log for the intermediate versions, please? Or is the posted log really it?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-10-30 17:16:41\" },\n\t{ \"post_id\": 2730, \"topic_id\": 595, \"forum_id\": 14, \"post_subject\": \"Re: Component Definitions\", \"username\": \"DSC\", \"post_text\": \"[quote="rtaylor":zqr98ca7]Well, we do go over all that the first morning of the Intro to ECL class.
\\n\\nI'm working on it! I'm working on it!\\n\\nThanks a million.\\n\\nDan\", \"post_time\": \"2012-11-07 20:00:39\" },\n\t{ \"post_id\": 2729, \"topic_id\": 595, \"forum_id\": 14, \"post_subject\": \"Re: Component Definitions\", \"username\": \"rtaylor\", \"post_text\": \"Dan,
What are the major components of an HPCC cluster, and what do each of them do, in general?
Well, we do go over all that the first morning of the Intro to ECL class. \\n\\nThe major middleware components are:
\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-11-07 19:51:59\" },\n\t{ \"post_id\": 2727, \"topic_id\": 595, \"forum_id\": 14, \"post_subject\": \"Component Definitions\", \"username\": \"DSC\", \"post_text\": \"This is a fairly simple, silly question but I cannot find the answer in any piece of documentation.\\n\\nWhat are the major components of an HPCC cluster, and what do each of them do, in general? We all talk about Thor and Roxie, but what about their less-famous family members, Sash, Dali, etc.?\\n\\nPointers to the documentation I've apparently missed would be appreciated.\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-11-07 18:57:45\" },\n\t{ \"post_id\": 2766, \"topic_id\": 596, \"forum_id\": 14, \"post_subject\": \"Re: Not installing on Ubuntu 12.10 / 64\", \"username\": \"pschwartz\", \"post_text\": \"monosij,\\n\\nThis is not expected to work. You have taken a package for Ubuntu 12.04 LTS and attempted to install it on Ubuntu 12.10. \\n\\nWe currently do not have packages available for Ubuntu 12.10, but will soon. In the mean time you are more then welcome to build the platform from source (https://github.com/hpccsystems/HPCC-Platform)\\n\\nPhilip\", \"post_time\": \"2012-11-12 18:41:45\" },\n\t{ \"post_id\": 2737, \"topic_id\": 596, \"forum_id\": 14, \"post_subject\": \"Not installing on Ubuntu 12.10 / 64\", \"username\": \"monosij\", \"post_text\": \"Trying to install the current 64-bit HPCC Ubuntu version:\\nhpccsystems-platform_community-3.8.6-4precise_amd64.deb\\n\\nUbuntu 12.10 64. Linux kernel 3.5.0-18-generic.\\n\\nError:\\nDependencyis not satisfiable: libboost-regex1.46.1\\n\\nThe Ubuntu 12.10 is a basic Ubuntu installation - not modified in anyway. It was released late Oct. - so don't know if it is certified for it yet?\\n\\nThank you for your help.\", \"post_time\": \"2012-11-07 22:38:51\" },\n\t{ \"post_id\": 3027, \"topic_id\": 631, \"forum_id\": 14, \"post_subject\": \"Re: Changing data file directories on existing server\", \"username\": \"jsmith\", \"post_text\": \"A couple of possible caveats with the above steps, which may have caused problems.\\n\\na) The cp and subsequent rm, will have moved all data, for all components on the node, if you only had thor on this node, then that's all you needed to stop, if you had other components sharing the same box, e.g. dali/eclccserver etc, they should all be stopped. If in doubt, stop all with e.g.:\\n\\nservice hpcc-init stop\\nservice dafilesrv stop
\\n\\nb) the chown step should be made recursive, i.e. should be:\\n\\nchown -R hpcc:hpcc /media/newdrive/HPCCSystems
\\n\\n\\nThe final step should be to restart all components:\\n\\nservice dafilesrv start\\nservice hpcc-init start
\", \"post_time\": \"2012-12-20 14:31:26\" },\n\t{ \"post_id\": 3026, \"topic_id\": 631, \"forum_id\": 14, \"post_subject\": \"Re: Changing data file directories on existing server\", \"username\": \"jandleman\", \"post_text\": \"Hi,\\n\\nWe have done all of this now, and all services except for mydali and mythor are starting.\\n\\nThanks,\\nJohn\", \"post_time\": \"2012-12-19 18:42:47\" },\n\t{ \"post_id\": 2915, \"topic_id\": 631, \"forum_id\": 14, \"post_subject\": \"Re: Changing data file directories on existing server\", \"username\": \"ultima_centauri\", \"post_text\": \"I have added a new, much larger hard drive to an existing HPCC/Thor node and would like Thor to use this space. How can I move existing data file directories to a new location and reconfigure HPCC to take advantage of all this new space?
\\n\\nOne way doing it will be:\\n\\n1-. After formatting the drive, creating a filesystem, mounting the drive and creating the entry in the fstab file.\\n\\n2-. copy the HPCCSystems directory from /var/lib to the new drive, make sure to give the same ownership and permissions as the HPCCSystems directory located in /var/lib. In the below examples I use /media/newdrive as my new drive mount point, you can probably omit the last two steps, but if you want to make 100000% sure (paranoid approach) that the data was copied over you can run them \\n\\n cp -pvr /var/lib/HPCCSystems /media/newdrive\\n chown hpcc:hpcc /media/newdrive/HPCCSystems\\n cd /media/newdrive/HPCCSystems\\n ls -lhR \\n \\n\\n3-. stop the the thor and mydafilesrv processes.\\n\\n service hpcc-init -c mythor stop <<< Thor Master location\\n service dafilesrv stop <<< node that has the new drive\\n\\n4-.delete the HPCCSystems directory from /var/lib (you wont lose your data since we already copy it over to the new drive and you triple check it, right? \\n\\n rm -rf /var/lib/HPCCSystems\\n\\n5-.create a symbolic link inside the /var/lib directory and call it HPCCSystems\\n\\n ln -s /media/newdrive/HPCCSystems /var/lib/HPCCSystems\\n\\n6-. Start the hpcc components\\n\\n service hpcc-init -c mythor start <<< Thor Master location\\n service dafilesrv start <<< node that has the new drive\", \"post_time\": \"2012-11-30 20:04:24\" },\n\t{ \"post_id\": 2895, \"topic_id\": 631, \"forum_id\": 14, \"post_subject\": \"Re: Changing data file directories on existing server\", \"username\": \"HPCC Staff\", \"post_text\": \"Our operations team is currently reviewing this question. Thanks for the post!\", \"post_time\": \"2012-11-27 21:59:50\" },\n\t{ \"post_id\": 2856, \"topic_id\": 631, \"forum_id\": 14, \"post_subject\": \"Changing data file directories on existing server\", \"username\": \"jandleman\", \"post_text\": \"I have added a new, much larger hard drive to an existing HPCC/Thor node and would like Thor to use this space. How can I move existing data file directories to a new location and reconfigure HPCC to take advantage of all this new space?\\n\\nThanks,\\nJohn\", \"post_time\": \"2012-11-26 05:36:26\" },\n\t{ \"post_id\": 2912, \"topic_id\": 634, \"forum_id\": 14, \"post_subject\": \"Re: Support node backup\", \"username\": \"DSC\", \"post_text\": \"That would still be good, I think.\\n\\nFor those that didn't change from the default, they'll find the stuff in the indicated directory. Those that did change the defaults would presumably know they did it, why the did it, and be able to map between the two locations. Just a note saying that those are the default directories would be sufficient.\\n\\nEdit: An ultra-thorough version would include the configmgr setting for the directory in question, where applicable.\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-11-29 18:57:07\" },\n\t{ \"post_id\": 2911, \"topic_id\": 634, \"forum_id\": 14, \"post_subject\": \"Re: Support node backup\", \"username\": \"JimD\", \"post_text\": \"Good point! I will add that to my notes\\n\\nHowever, I can only include the default locations since most of these locations are configurable.\\n\\nJim\", \"post_time\": \"2012-11-29 18:47:27\" },\n\t{ \"post_id\": 2910, \"topic_id\": 634, \"forum_id\": 14, \"post_subject\": \"Re: Support node backup\", \"username\": \"DSC\", \"post_text\": \"Addendum: For the future documentation, you may want to consider adding the explicit directory paths where appropriate, just to make it crystal-clear as to what needs to be backed up. That may make the docs a bit more fragile in regard to future changes, but it would greatly help the reader.\\n\\nThanks again!\\n\\nDan\", \"post_time\": \"2012-11-29 16:24:01\" },\n\t{ \"post_id\": 2909, \"topic_id\": 634, \"forum_id\": 14, \"post_subject\": \"Re: Support node backup\", \"username\": \"DSC\", \"post_text\": \"Jim, thank you very much for the comprehensive reply. I believe that completely answers my question, and then some.\\n\\nThanks again!\\n\\nDan\", \"post_time\": \"2012-11-29 15:05:12\" },\n\t{ \"post_id\": 2908, \"topic_id\": 634, \"forum_id\": 14, \"post_subject\": \"Re: Support node backup\", \"username\": \"JimD\", \"post_text\": \"Dan, \\n\\nHere are the notes I plan to use for a chapter to be written in the future. \\n\\nThis is a work in progress, and may not be 100% complete at the moment. \\n\\nI hope this helps you and I value your feedback.\\n\\nHPCC System Best Practices for Backups\\n\\n1.\\tDali\\n a.\\tDali Backup folder is specified in its configuration. \\n b.\\tAdditional backup of that backup folder can be implemented at a system level \\n(e.g., backup to offline storage)\\n\\n2.\\tSasha\\n a.\\tArchived Work units can (and probably should) be backed up at system level (e.g., backup to offline storage) \\n\\n3.\\tDFU Server\\n a.\\tNothing required.\\n\\n4.\\tECLCC Server\\n a.\\tNothing required.\\n\\n5.\\tECL Server (if you are using this optional component)\\n a.\\tNothing specific to ECLServer needs backing up, but:\\n b.\\tMySQL server Database (attribute repository) should be backed up using traditional methods for MySQL.\\n\\n6.\\tECL Agent\\n a.\\tTypically nothing required \\n b.\\tIf you have sprayed data to hThor or have output data to hThor, then you should backup that data using traditional system level backup methods. \\n\\n7.\\tECL Scheduler\\n a.\\tNothing required. (Scheduled jobs are stored in Dali)\\n\\n8.\\tESP Server\\n a.\\tTypically, nothing required.\\n b.\\tSSL certificates and keys should be backed up, if used.\\n\\n9.\\tThor\\n a.\\tTypical redundancy provides a backup via replication \\n b.\\tNightly backup via cron task\\n c.\\tBackup upon node swap or drive swap\\n\\n10.\\tRoxie\\na.\\tBackup by channel redundancy.\\nb.\\tAdditional backup by keeping original data files on Thor\\n\\n11.\\tLanding Zone\\na.\\tTraditional system level methods (same as one would use for an FTP server)\\n\\n12.\\tMiscellaneous \\na.\\tBackup of environment.xml and other saved config files you may want to backup.\\nb.\\tBackup of logs from each node, if your enterprise needs a persistent record of activity.\", \"post_time\": \"2012-11-29 14:40:49\" },\n\t{ \"post_id\": 2894, \"topic_id\": 634, \"forum_id\": 14, \"post_subject\": \"Re: Support node backup\", \"username\": \"HPCC Staff\", \"post_text\": \"We've passed this on to our operations team to review and respond. Thanks for the question!\", \"post_time\": \"2012-11-27 21:56:35\" },\n\t{ \"post_id\": 2872, \"topic_id\": 634, \"forum_id\": 14, \"post_subject\": \"Support node backup\", \"username\": \"DSC\", \"post_text\": \"What is the best practice for providing redundancy/backup for the support processes (dali, sasha, et. al.)? Data redundancy seems to be handled several different ways, but what about the controlling nodes? I'm guessing mirroring selected (meta)data directories between working and backup servers would be sufficient, but that's really just a guess.\\n\\nAny pointers would be appreciated.\\n\\nDan\", \"post_time\": \"2012-11-26 16:23:53\" },\n\t{ \"post_id\": 2984, \"topic_id\": 653, \"forum_id\": 14, \"post_subject\": \"Re: build hpcc with recent gcc/g++ version failure -fPIC nee\", \"username\": \"pschwartz\", \"post_text\": \"[quote="markk":2gyeskh5]Hi,\\nok, many thanks. I am sorry to have asked you for help when the solution is not hpcc related. I should have read the error line more carefully.\\nthanks,\\nmark\\n\\nMark,\\n\\nNot a problem at all. If you need any other help, please feel free to ask.\\n\\n-Philip\", \"post_time\": \"2012-12-12 01:11:11\" },\n\t{ \"post_id\": 2982, \"topic_id\": 653, \"forum_id\": 14, \"post_subject\": \"Re: build hpcc with recent gcc/g++ version failure -fPIC nee\", \"username\": \"markk\", \"post_text\": \"Hi,\\nok, many thanks. I am sorry to have asked you for help when the solution is not hpcc related. I should have read the error line more carefully.\\nthanks,\\nmark\", \"post_time\": \"2012-12-12 00:15:26\" },\n\t{ \"post_id\": 2981, \"topic_id\": 653, \"forum_id\": 14, \"post_subject\": \"Re: build hpcc with recent gcc/g++ version failure -fPIC nee\", \"username\": \"pschwartz\", \"post_text\": \"[quote="markk":10a4yvbs]Hi,\\nthanks for your help.\\n1). Centos 5.7\\n2). gcc/g++ 4.7.2 installed locally\\n3). yum\\nI can build hpcc with an older gcc/g++ version but don't want to unless I have to.\\nthanks again,\\nmark\\n\\nOk, that gives me a little more information. Looking at the error and knowing that you have installed a newer gcc/g++ locally points out the issue. \\n\\nIt appears that the version of libiberty.a that you have on your system from your local install of gcc/g++ is not compiled -fPIC. You most likely will have to rebuild this library to correct the issue.\\n\\nAs to our platform, it is completely tested on the default install of gcc/g++ on Centos 5.x if you do not want to rebuild libraries that where not built with your local install with -fPIC.\\n\\n-Philip\", \"post_time\": \"2012-12-12 00:10:45\" },\n\t{ \"post_id\": 2980, \"topic_id\": 653, \"forum_id\": 14, \"post_subject\": \"Re: build hpcc with recent gcc/g++ version failure -fPIC nee\", \"username\": \"markk\", \"post_text\": \"Hi,\\nthanks for your help.\\n1). Centos 5.7\\n2). gcc/g++ 4.7.2 installed locally\\n3). yum\\nI can build hpcc with an older gcc/g++ version but don't want to unless I have to.\\nthanks again,\\nmark\", \"post_time\": \"2012-12-12 00:06:25\" },\n\t{ \"post_id\": 2979, \"topic_id\": 653, \"forum_id\": 14, \"post_subject\": \"Re: build hpcc with recent gcc/g++ version failure -fPIC nee\", \"username\": \"pschwartz\", \"post_text\": \"[quote="markk":1731lx2o]Hi,\\n\\nTrying to build with a recent gcc/g++ version and I get this failure -\\n\\n/sw/Benchmarks/GCC/4.7.2/lib/gcc/x86_64-unknown-linux-gnu/4.7.2/../../../../lib64/libiberty.a(hashtab.o): relocation R_X86_64_32S against `a local symbol' can not be used when making a shared object; recompile with -fPIC\\n\\nWhat file(s) can I add -fPIC to so a re-cmake/rebuild will use this ?\\n\\nthanks,\\nmark\\n\\nMark,\\n\\nI have a few questions to help you with this issue.\\n\\nWhat linux distribution are you using?\\n\\nAre you using the repository installed gcc/g++? If not, what version are you using?\\n\\nHow did you install the dependencies for building our source tree? apt, yum, local compile?\\n\\n-Philip\", \"post_time\": \"2012-12-11 13:46:36\" },\n\t{ \"post_id\": 2976, \"topic_id\": 653, \"forum_id\": 14, \"post_subject\": \"build hpcc with recent gcc/g++ version failure -fPIC needed\", \"username\": \"markk\", \"post_text\": \"Hi,\\n\\nTrying to build with a recent gcc/g++ version and I get this failure -\\n\\n/sw/Benchmarks/GCC/4.7.2/lib/gcc/x86_64-unknown-linux-gnu/4.7.2/../../../../lib64/libiberty.a(hashtab.o): relocation R_X86_64_32S against `a local symbol' can not be used when making a shared object; recompile with -fPIC\\n\\nWhat file(s) can I add -fPIC to so a re-cmake/rebuild will use this ?\\n\\nthanks,\\nmark\", \"post_time\": \"2012-12-10 15:22:21\" },\n\t{ \"post_id\": 3122, \"topic_id\": 679, \"forum_id\": 14, \"post_subject\": \"Re: Installation using --prefix in rpm command\", \"username\": \"pschwartz\", \"post_text\": \"[quote="jeeves":1h0qhlz4]Philip,\\n\\nWe are trying to install HPCC in a user's home directory. This is because the server is being shared by many users.\\n\\nThanks,\\n-David\\n\\n\\nDavid, \\n\\nWe currently do not support this type of install with our packages. But this can be done with a custom build. Our source is located at https://github.com/hpcc-systems/HPCC-Platform\\n\\nTake a look at the CMakeLists.txt file in the root of the source tree and cmake_modules/optionDefaults.cmake. You will see the following config options (and the defaults we use in optionDefaults.cmake)\\n\\nThe 5 options of interest to you are as follows:\\noption(PREFIX "Set the install prefix")\\noption(EXEC_PREFIX "Set the execution prefix")\\noption(CONFIG_PREFIX "Set the configuration prefix")\\noption(RUNTIME_USER "Set the runtime username")\\noption(RUNTIME_GROUP "Set the runtime group")\\n\\nUsing these options you can move the install locations of the platform. These will be used with a `make install` to place the files with the correct permissions in those locations. (You can create a custom package with `make package`, but I would not suggest this as it will force run our install scripts which will install symlinks in /etc/init.d and /usr/bin.)\\n\\nOnce the make install is done, the platform can be started using the init scripts that would normally be symlinked to /etc/init.d. They are located in PREFIX/HPCCSystems/etc/init.d.\\n\\n-Philip\", \"post_time\": \"2013-01-17 18:32:25\" },\n\t{ \"post_id\": 3109, \"topic_id\": 679, \"forum_id\": 14, \"post_subject\": \"Re: Installation using --prefix in rpm command\", \"username\": \"jeeves\", \"post_text\": \"Philip,\\n\\nWe are trying to install HPCC in a user's home directory. This is because the server is being shared by many users.\\n\\nThanks,\\n-David\", \"post_time\": \"2013-01-16 10:36:07\" },\n\t{ \"post_id\": 3105, \"topic_id\": 679, \"forum_id\": 14, \"post_subject\": \"Re: Installation using --prefix in rpm command\", \"username\": \"pschwartz\", \"post_text\": \"David,\\n\\nAre you trying to use the prefix command to install the platform to a location that is not the standard for our installation?\\n\\nCurrently I am not sure if we can support this directly. I am going to run a few tests and if this does not work, I will provide you with steps to follow in order to do the install to a different location.\\n\\nPhilip\", \"post_time\": \"2013-01-15 13:48:27\" },\n\t{ \"post_id\": 3103, \"topic_id\": 679, \"forum_id\": 14, \"post_subject\": \"Re: Installation using --prefix in rpm command\", \"username\": \"bforeman\", \"post_text\": \"Our installation team is currently reviewing this.\\nThanks for your post!\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-01-15 13:42:56\" },\n\t{ \"post_id\": 3102, \"topic_id\": 679, \"forum_id\": 14, \"post_subject\": \"Installation using --prefix in rpm command\", \"username\": \"jeeves\", \"post_text\": \"Hi,\\n\\nOne of us tried to install HPCC in RHEL by setting a prefix using the --prefix option in rpm. But when an attempt was made to start HPCC from the specified path(newLocation/etc/init.d/hpcc-init) the following error was obtained.\\n\\n
\\n[hpcc@server]$ ./init.d/hpcc-init\\n./init.d/hpcc-init: line 92: /opt/HPCCSystems/etc/init.d/lock.sh: No such file or directory\\n./init.d/hpcc-init: line 93: /opt/HPCCSystems/etc/init.d/pid.sh: No such file or directory\\n./init.d/hpcc-init: line 94: /opt/HPCCSystems/etc/init.d/hpcc_common: No such file or directory\\n./init.d/hpcc-init: line 95: /opt/HPCCSystems/etc/init.d/init-functions: No such file or directory\\n./init.d/hpcc-init: line 96: /opt/HPCCSystems/etc/init.d/export-path: No such file or directory\\n./init.d/hpcc-init: line 106: set_environmentvars: command not found\\n./init.d/hpcc-init: line 112: /sbin/hpcc_setenv: No such file or directory\\n./init.d/hpcc-init: line 114: is_root: command not found\\n./init.d/hpcc-init: line 115: which_service: command not found\\n./init.d/hpcc-init: line 116: get_commondirs: command not found\\n./init.d/hpcc-init: line 119: check_user: command not found\\n user does not exits on the system. Exiting .....\\n
\\n\\nSo the question is: Can we use the --prefix option? Or is it better to use the default.\\n\\nThanks,\\n-David\", \"post_time\": \"2013-01-15 12:20:34\" },\n\t{ \"post_id\": 3431, \"topic_id\": 703, \"forum_id\": 14, \"post_subject\": \"Re: Changing data file directories on new install\", \"username\": \"monosij\", \"post_text\": \"Hi Chris -\\n\\nThanks for the details and taking the time. I may try out the move and can let you know how it goes.\\n\\nBut yes - there seem to be quite a few inconsistencies.\\n\\n1. There is a file RoxieTopology.xml that points to Roxie Data being in hpcc-data/roxie as in Dali. However the 'directory' parameter in Roxie points to myroxie but may not be for data directory. However no 'directory' param for the other services - Dali et.al.\\n\\n2. It would be great to have the directories created as mentioned in the Directory param. Example only Dali is created under hpcc-data and not Roxie or the others. As are not the hpcc-data1, hpcc-data2 directories. This because if I write a script to do some checking I will be guessing with a lot of if/then conditions.\\n\\n3. The Roxie XML config file is called RoxieTopology.xml. However for Dali and Sasha it is called daliconf.xml and sashaconf.xml and for ESP just esp.xml. Some of the params start with Caps and some are Lower-case. Example the main 'Directory' and Roxie's 'directory'.\\n\\n4. Regards config file for script I meant if I could set all params and directories up front before executing the install - in a config file which the install program would read to set appropriate locations. And if no config script file then set everything to default.\\n\\n5. The Oracle scheme was just as a reference as most installs follow same pattern. A component, which has a location param set to a directory and which has the data param set to one or more directories. Then the data directory(s) can be 1,2,3 which has multiple data files all under one roof for all components (as you have) or totally separate locations under some scheme. For example in Oracle it is best to have TEMP and ROLLBACK in a separate directory (RAID et.al.) altogether from the storage DATA and INDEX directories (or rather locations) and such.\\n\\nAnyway I don't want to go about this anymore. It has been a great product. Just a few inconsistencies. Hopefully they can be resolved. If there is anything I can help with please let me know. Several folks at S&A know how to reach me. And I think we communicated while I was at LN as well.\\n\\nThanks again Chris.\", \"post_time\": \"2013-02-12 20:31:52\" },\n\t{ \"post_id\": 3419, \"topic_id\": 703, \"forum_id\": 14, \"post_subject\": \"Re: Changing data file directories on new install\", \"username\": \"clo\", \"post_text\": \"Hi Monosij,\\n\\nI'll try my best to answer these questions, but I might need to consult some of my colleagues as the subject matter is starting to become a tad more detailed.\\n\\n1. [NAME] refers to HPCCSystems [COMPONENT] refers to the component such as dali, esp, roxie. \\n\\n2. As far as data2 and data3, I'm not quite sure when they come into effect. I can investigate, but for the most part, hpcc-data and hpcc-mirror are the two main data directories.\\n\\n3. The directory for roxie is probably an issue that I need to open as well. It is where the files for the roxie component is installed. The data directory is actually /var/lib/HPCCSystems/hpcc-data/myroxie\\n\\n4. The data directories are created on startup. If you don't have any data you don't need, I would suggest cleaning out your system first and then continuing from there. That way, there wouldn't be as much confusion after you've put in the modified environment.xml\\n\\n5. They are called my* just as a convention. You may rename all of these as you'd like. I haven't tested with . in the name of the component. If you'd like to modify the name of a component, you just have to change it per component inside configmgr. I just ran a quick test and removed all the my in front of each component and started up the system fine. I'm not quite sure following an Oracle naming scheme would be the suggested as hpccsystems follows its own organization of directories. \\n\\n6. I'm not quite sure what you mean by using production install scripts and modifying them for 1 machine / 1 node. We have a lot of installation, startup scripts detailed in the documentation already. I would suggest you taking a look at this first. http://hpccsystems.com/download/docs/in ... c-platform \\n \\nChris\", \"post_time\": \"2013-02-11 16:43:55\" },\n\t{ \"post_id\": 3396, \"topic_id\": 703, \"forum_id\": 14, \"post_subject\": \"Re: Changing data file directories on new install\", \"username\": \"monosij\", \"post_text\": \"Hi Chris -\\n\\nThanks for the details. I appreciate it. Not confusion at all but a few more questions:\\n\\n1. So your state that no more params can be added. So we are restricted to just 3 'data' directories then? 'data', 'data2', 'data3'? And also I don't really see how the variables [NAME], [INST], [COMPONENT] (except for Dali) are being used. Do they come into effect for larger installs? Where is Roxie, Sasha, Thor data? Not in myroxie, mythor ...?\\n\\n2. Although I have 'data2' and 'data3' and corresponding dirs in '/var/lib', I only see '/var/lib/hpcc-data' and not the ones '/var/lib/{hpcc-data1,hpcc-data2}'. Where are they? Are they created on demand?\\n\\n3. So to really move any and all data off '/var/lib' - I should address all params under Directories ie - temp, data, data2, data3, mirror, query and maybe log. These are the only dirs that will grow. So dirs like mydali, myesp has no data files? Example - there is a 'directory' param for myroxie set to '/var/lib/HPCCSystems/myroxie/' but not for the others.\\n\\n4. Once I change the config should I move those data dirs over to the new base directory? Or will they be automatically created upon startup? Can I delete the data dirs including temp under /var/lib'?\\n\\n5. And a slightly disconnected question - why are the programs called mydali, myesp ... Is it possible for me to take out the 'my'? Is that the same in all your production environments? Do you have a setup that allows my setting up the environ vars in the way I wish. For example I would want the dirs to be '{machine_name}.dali'. Similarly I would like my 'data', 'temp' and such params to be '{machine_name}.data.01' and so on. Similar to how Oracle allows much flexibility as an example. Just thought would ask if there was an environ var to set to run setup from.\\n\\n6. Maybe larger production installs are completely different? If so, is it possible to get my hands on production install script and set it for 1 machine / 1 node. Can you have > 1 node on a machine?\\n\\nThank you for your patience and the details.\\n\\nMonosij\\nNB: Please excuse the Oracle comparison.\", \"post_time\": \"2013-02-08 22:12:01\" },\n\t{ \"post_id\": 3395, \"topic_id\": 703, \"forum_id\": 14, \"post_subject\": \"Re: Changing data file directories on new install\", \"username\": \"clo\", \"post_text\": \"Hi Monosij,\\n\\nFor editing an environment.xml in configmgr\\n - You need to check the box in the upper right hand corner that says 'Write Access'\\n\\nYou'll probably want to leave [NAME] and [INST] alone as those are used to pick up the component names. The only thing you'll want to edit is the /var/lib.\\n\\nAs far as changing data to data1, I don't think that options available yet. I've already brought this up with the developer and they're investigating it, but there's no timeline on when/if that feature will be put in.\\n\\nRegarding adding additional params, I've also discussed this with the developers and there's an issue open for that already.\\n\\nSo this brings us to best practices for moving environment.xmls around.\\n\\nBefore we begin, there's something you should realize. What the system looks for is a file under /etc/HPCCSystems/environment.xml.\\n\\nWhat you work with is under /etc/HPCCSystems/source/\\nAll the xmls that you edit or make changes to will live in the source directory. When copying your edited xml over to the working xml (/etc/HPCCSystems/environment.xml), you will need to replace it. you need to essentially run\\n\\ncp /etc/HPCCSystems/source/mynewenvironment.xml /etc/HPCCSystems/environment.xml
\\n\\nDo not call it /etc/HPCCSystems/mynewenvironment.xml because the system will not look for that.\\n\\nIt's always a good idea to stop your system before modifying your enviroment.xml\\n\\nThere are essentially 2 scenarios: Creating a new environment and modifying an existing environment.\\n\\nCreating a new environment:\\n - If you're just running the default configuration on a 1 node, then I'd suggest you start from scratch. Navigate to the main configmgr page and select Generate a New Environment with config wizard. Simply input the ip of the node you're using and leave support nodes at 0 and add 1 node for roxie if you'd like a roxie to use. \\n\\nYou can then proceed to modify the environment however you like without worrying about changing your backup environment.xml. Once you're done with your edits, replace /etc/HPCCSystems/environment.xml with your new xml.\\n\\nModifying an existing enviroment.xml:\\n - Suppose you want to modify /etc/HPCCSystems/source/myMainEnv.xml (and this is also exactly the same as /etc/HPCCSystems/environment.xml).\\n - First, make sure your system is stopped. \\n - Next, make a copy of myMainEnv.xml \\ncp /etc/HPCCSystems/source/myMainEnv.xml /etc/HPCCSystems/source/myMainEnvModified.xml
\\n - Then change ownership of your new file to hpcc\\nsudo chmod hpcc:hpcc /etc/HPCCSystems/source/myMainEnvModified.xm
\\n - Now start up configmgr, and select to open myMainEnvModified.xml in Advanced mode.\\n - Make your changes.\\n - Close configmgr.\\n - Copy your newly modified xml over to the working xml.\\ncp /etc/HPCCSystems/source/myMainEnvModified.xml /etc/HPCCSystems/environment.xml
\\n\\n\\nI hope this long-winded response didn't confuse you even more. I typically manage a lot of environment.xmls and I've found a few extra steps go a long way in making sure I didn't overwrite what I wasn't supposed to.\\n\\nChris\", \"post_time\": \"2013-02-08 21:25:08\" },\n\t{ \"post_id\": 3394, \"topic_id\": 703, \"forum_id\": 14, \"post_subject\": \"Re: Changing data file directories on new install\", \"username\": \"monosij\", \"post_text\": \"Hi Chris - Thanks for your quick reply.\\n\\nYes I did see the 'Directory' under 'Software' but could not edit it.\\n\\nOk one other question - all the paths have:\\n[NAME] and [INST] in them.\\n\\nI guess they are picked up as params from other parts of the environment file? And so I can leave them alone?\\nSo just need to change '/var/lib' and 'var/log' to other directories?\\n\\nAlso is there a way to change the parameters for the instance? I would like to change 'data' to 'data1'. Is that possible?\\n\\nAnd what about additional params - if I wanted to add 'data4'?\\nWould it be best then to create anew environment.xml and go from there?\\n...\\nOn a locate 'environment.xml' I see:\\n/etc/HPCCSystems/environment.xml\\n/etc/HPCCSystems/rpmnew/environment.xml\\n/etc/HPCCSystems/source/environment.xml\\n/opt/HPCCSystems/etc/HPCCSystems/rpmnew/environment.xml\\n...\\nSo if I create a 'newEnvironment.xml' I should only replace:\\n/etc/HPCCSystems/environment.xml\\nwith\\n/etc/HPCCSystems/source/newEnvironment.xml\\n...\\nThank you.\\n\\nMonosij\", \"post_time\": \"2013-02-08 20:34:00\" },\n\t{ \"post_id\": 3393, \"topic_id\": 703, \"forum_id\": 14, \"post_subject\": \"Re: Changing data file directories on new install\", \"username\": \"clo\", \"post_text\": \"Hi,\\n\\nSo after navigating to :8015, you'll come to the main landing page for configmgr. You can either select Advanced View, or generate a new environment using the wizard. To keep the example short, let's say you decide to edit the default environment.xml with new settings in Advanced View. \\n\\nAfter you've opened your environment.xml in Advanced View, you'll see in the left Navigator pane, Environment, and under there you should see Software, and then under there you should see Directories.\\n\\nOnce you click on directories, you can change /var/lib to be anything you want.\\n\\nMake sure you select Write Access in the upper right hand corner and be sure to save once you're done editing.\\n\\nAfter you close configmgr through the command line, make sure you copy /etc/HPCCSystems/source/environment.xml to /etc/HPCCSystems/environment.xml otherwise your changes will not be recognized by the system. Keep in mind that configmgr works with xmls inside the /etc/HPCCSystems/source/ directory and not the working xml. \\n\\nI hope this helps.\", \"post_time\": \"2013-02-08 20:06:32\" },\n\t{ \"post_id\": 3392, \"topic_id\": 703, \"forum_id\": 14, \"post_subject\": \"Re: Changing data file directories on new install\", \"username\": \"monosij\", \"post_text\": \"Hi Chris - \\n\\nRegards your comments on changing data file directories I had a few questions. I am running on Ubuntu 12.04 and installing for single node.\\n\\n1. What option to start with? 'Advanced View' or 'Generate New Environ'?\\n...\\n2. You say in b. 'Select the Directories component'.\\nI don't see a 'Directories component anywhere in the browser after I connect to localhost:8015 when running configmgr.\\n\\nIn main screen I see the options Summary, Advanced, New Environ, Blank Environ.\\n\\n'Advanced View' under Environment there are: {Hardware, Programs, EnvSettings, Software}.\\nEnvSettings has runtime set to /var/lib/HPCCSystems which I assume I need to change somehow.\\n...\\n?? Could I change this directly everywhere in /etc/environment.xml and move files, set permissions and resiart?\\n...\\nFrom your statements below it seems I should try 'Generate New Environ'.\\n\\nI tried that as well but only gave me 4 options after 'Manual Entry' of 127.0.0.1.\\n\\nThey are: {# Support Nodes (0), # Nodes for Roxie (0), # of Slave Nodes for Thor Clus (1), # Thor Slaves (1), Enable Roxie on Demand (checked).\\n\\nAnd when I select next it goes to 'Environment Summary'. And even going to 'Advanced View' here goes back to as listed above for 'Advanced View' and does not allow me to change anything and says Environment has been changed.\\n...\\nI also tried following this doc: http://cdn.hpccsystems.com/install/docs ... anager.pdf\\n\\nDid not provided any details on changing directories and customizing the configuration.\\n...\\nAnother option I thought of is save newEnvironment.xml. Change dir paths in it and then copy over environment.xml, change dirs and restart?\\n\\nPlease let me know if possible to get this info and optimal settings for a single machine for trial runs.\\n\\nThanks.\\n\\nMonosij\", \"post_time\": \"2013-02-08 19:51:16\" },\n\t{ \"post_id\": 3185, \"topic_id\": 703, \"forum_id\": 14, \"post_subject\": \"Re: Changing data file directories on new install\", \"username\": \"clo\", \"post_text\": \"As an alternative, you can change the paths of the directories inside configmgr.\\n\\n\\n 1. Install the platform on a clean system\\n 2. Open up the configmgr tool. \\n
\\n\\nAll of the folders you wanted in a different location should now be under /data instead. I hope this helps.\\n\\n - Chris\", \"post_time\": \"2013-01-25 15:48:17\" },\n\t{ \"post_id\": 3182, \"topic_id\": 703, \"forum_id\": 14, \"post_subject\": \"Re: Changing data file directories on new install\", \"username\": \"DSC\", \"post_text\": \"I have had success moving the data directory by:\\n\\n a. Configure your system as you normally would.\\n b. Select the Directories component.\\n c. For all the items listed that'd you'd like to modify, change /var/lib to /data \\n d. Save your environment.xml and close configmgr.\\n
\\n 3. Move your newly modified xml into the working directory.\\n a. Supposing your new xml is called new_xml.xml\\n
\\n 4. Change the ownership of your /data directory to hpcc\\n sudo cp /etc/HPCCSystems/source/new_xml.xml /etc/HPCCSystems/environment.xml
\\n 5. Now start up your system. \\n\\nsudo chown hpcc:hpcc /data
1. Install and configure HPCC\\n\\n2. Start up the cluster. This configures everything in /var/lib/HPCCSystems.\\n\\n3. Shut down the cluster.\\n\\n4. Within /var/lib/HPCCSystems, replace hpcc-data and hpcc-mirror with symlinks to directories located on your alternate file system. Make sure the permissions are the same on your new directories.\\n\\n5. Restart the cluster.
\\n\\nThat is purely for the data, of course. You can do the same symlink trick with other directories as needed.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-01-25 12:54:59\" },\n\t{ \"post_id\": 3178, \"topic_id\": 703, \"forum_id\": 14, \"post_subject\": \"Changing data file directories on new install\", \"username\": \"jandleman\", \"post_text\": \"I would like to do a fresh install of HPCC and have it place the data files in a non-default directory. Currently, all data directories are placed under /var/lib/HPCCSystems, but that is in a small file system on this server. We would instead like to have all of these data directories placed under /data/HPCCSystems. We have tried moving directories after installation using a link in the /var/lib directory, and haven't had much luck with that. We also tried moving the directories, modifying a number of config files, and have been able to get everything to start except for dafileserve. It seems that the path is compiled into the configgen binary. Help!\\n\\nThanks,\\nJohn\", \"post_time\": \"2013-01-25 02:52:01\" },\n\t{ \"post_id\": 3199, \"topic_id\": 706, \"forum_id\": 14, \"post_subject\": \"Re: HPCC Source Compilation OSX 64bits Issue\", \"username\": \"gsmith\", \"post_text\": \"See response in http://hpccsystems.com/bb/viewtopic.php?f=10&t=707&sid=ca83b119872ee1b04e17cb0ee8138a88\", \"post_time\": \"2013-01-27 08:48:13\" },\n\t{ \"post_id\": 3195, \"topic_id\": 706, \"forum_id\": 14, \"post_subject\": \"HPCC Source Compilation OSX 64bits Issue\", \"username\": \"buptkang\", \"post_text\": \"Hello there,\\n\\nI am a newbie on HPCC, currently I am trying to compile the source code of HPCC in order to customize something for my own need. \\n\\nMy OS is OSX 64bits, and I downloaded the source code HPCC 3.10.0 package.\\n\\nI saw the installation hints from :https://github.com/hpcc-systems/HPCC-Platform/tree/release-3.2.2\\n\\nAlso I got some other information from its CMakeLists.txt as well, \\n\\nbut no matter I run it using 32bits compiler or 64 bits compiler, I cannot run make properly with different error message. \\n\\nFor cmake -DCMAKE_C_FLAGS:STRING="-m32 -march=i386" -DCMAKE_CXX_FLAGS:STRING="-m32 -march=i386" ~/hpcc\\n\\nI got lots of error like below:\\nld: warning: could not create compact unwind for ***function
\\n\\n\\n\\nFor cmake ~/hpcc,\\n\\nI got the error as:\\n\\n[ 38%] Building CXX object common/remote/hooks/libarchive/CMakeFiles/archivefile.dir/archive.cpp.o\\nLinking CXX shared library ../../../../Release/libs/libarchivefile.dylib\\nUndefined symbols for architecture x86_64:\\n "_archive_read_next_header2", referenced from:\\n ArchiveDirectoryIterator::first() in archive.cpp.o\\n ArchiveFileIO::ArchiveFileIO(char const*)in archive.cpp.o
\\n\\nCurrently, my assumption is that utilizing Ubuntu 12.04 under 32 bits machine can guarantee to compile the code without problems. Can somebody give me some advice on it?\\n\\nThanks\\nBo\", \"post_time\": \"2013-01-26 23:09:47\" },\n\t{ \"post_id\": 3220, \"topic_id\": 712, \"forum_id\": 14, \"post_subject\": \"Re: Multi-node installation issue:4-node cluster\", \"username\": \"Gleb Aronsky\", \"post_text\": \"Prachi,\\n\\nWhen configuring a multi-node cluster, you need to ensure that you have a sufficient number of nodes in your Topology so that Roxie Clusters and Support Nodes do not share the same node. The intention here is to make the greatest number of resources available to the Roxie Cluster to optimize performance/response time.\\n\\nTo fix your configuration, ensure that the number of Roxie Clusters + Support Nodes <= Total Number of Nodes in the System\\n\\nLet me know if you have any other questions.\", \"post_time\": \"2013-01-28 21:48:28\" },\n\t{ \"post_id\": 3208, \"topic_id\": 712, \"forum_id\": 14, \"post_subject\": \"Multi-node installation issue:4-node cluster\", \"username\": \"prachi\", \"post_text\": \"We tried with 2 node HPCC cluster which got install successfully and the VM details are :\\ncldx-1088-982 \\ncldx-1087-981\\n\\n\\nWe have to set-up a 4 node HPCC cluster with requirement:2 nodes for Roxie and 1 node for Thor and the VM details are :\\n\\ncldx-1088-982 - Master node\\ncldx-1087-981 - Fresh VM(involved in 2 node cluster setup)\\ncldx-1086-980 - Fresh VM\\ncldx-1085-979 - Fresh VM\\n\\nIn HPCC Configuration Manager we are facing some issue:\\n1.Whether to include master node IP in List of IPs?\\nFor now,4 IP addresses are added.\\n\\n2.Now,when we are trying to give :\\na.\\tNo. of support nodes : 3\\nb.\\tNo. of nodes for Roxie cluster : 2\\nc.\\tNo. of slave nodes for Thor cluster : 1\\nd.\\tNo. of Thor slaves per node : 2\\nwe get following error:\\nTotal Nodes: 4(3 Support Nodes + 1 Non-support nodes)\\nError:Cannot assign 2 number of nodes for roxie due to insufficient non-support nodes\\navailable. Please enter different values.\\n
\\n\\n3.It is allowing us for following configuration:\\na.\\tNo. of support nodes : 3\\nb.\\tNo. of nodes for Roxie cluster : 1\\nc.\\tNo. of slave nodes for Thor cluster : 1\\nd.\\tNo. of Thor slaves per node : 2\\n\\nIf we go with this configuration then one of our VM is running neither thor nor roxie.So this configuration is not useful for us.\\n\\nPlease guide as to how we must proceed.\\n\\nThanks and regards !\", \"post_time\": \"2013-01-28 10:23:54\" },\n\t{ \"post_id\": 3561, \"topic_id\": 734, \"forum_id\": 14, \"post_subject\": \"Re: 2-node cluster,can not start mythor\", \"username\": \"battleman\", \"post_text\": \"[quote="sort":twto40ql]Please make sure all nodes in the cluster have the same keys defined. Keys are used when communicating across nodes. The log below is from the thor master and it appears to be having issues communicating with the slave node\\n\\nbesides my communication between each other is ok ,what I have done to each node :\\n#vi /etc/sysconfig/selinux\\nSELINUX=disabled\\n#setenforce 0\\n#service iptables stop\\n#chkconfig iptables off\\n\\nand now ,my HPCC clusting system is functional. \", \"post_time\": \"2013-02-27 08:28:12\" },\n\t{ \"post_id\": 3344, \"topic_id\": 734, \"forum_id\": 14, \"post_subject\": \"Re: 2-node cluster,can not start mythor\", \"username\": \"sort\", \"post_text\": \"Please make sure all nodes in the cluster have the same keys defined. Keys are used when communicating across nodes. The log below is from the thor master and it appears to be having issues communicating with the slave node\", \"post_time\": \"2013-02-05 19:36:08\" },\n\t{ \"post_id\": 3311, \"topic_id\": 734, \"forum_id\": 14, \"post_subject\": \"2-node cluster,can not start mythor\", \"username\": \"battleman\", \"post_text\": \"I have to set-up a 2 node HPCC cluster,172.30.62.179(master),172.30.62.180.I failed to start "Thor" :\\n\\nStarting mydali.... [ OK ]\\nStarting mydfuserver.... [ OK ]\\nStarting myeclagent.... [ OK ]\\nStarting myeclccserver.... [ OK ]\\nStarting myeclscheduler.... [ OK ]\\nStarting myesp.... [ OK ]\\nStarting mysasha.... [ OK ]\\nStarting mythor.... [FAILED]\\n\\nThe communication between each other node is well functioned by ssh protocol,when I check out the log file thormaster.2013_02_04.log,I found this :\\n\\nCOMMAND:: /opt/HPCCSystems/bin/start-stop-daemon -S -p /var/run/HPCCSystems/myth or_init.pid -c hpcc:hpcc -d /var/lib/HPCCSystems/mythor -m -x /opt/HPCCSystems/b in/init_thor -b >>/var/log/HPCCSystems/mythor/mythor_init.log 2>&1 ::Issued at M on Feb 4 18:26:30 GMT-1 2013\\n[hpcc@iaas1013060-Computer mythor]$ cat thormaster.2013_02_04.log\\n00000001 2013-02-04 18:26:33 24787 24787 Opened log file //172.30.62.179/var/log /HPCCSystems/mythor/thormaster.2013_02_04.log\\n00000002 2013-02-04 18:26:33 24787 24787 Build community_3.10.0-1\\n00000003 2013-02-04 18:26:33 24787 24787 calling initClientProcess Port 20000\\n00000004 2013-02-04 18:26:33 24787 24787 Found file 'thorgroup', using to form t hor group\\n00000005 2013-02-04 18:26:33 24787 24787 Checking cluster replicate nodes\\n00000006 2013-02-04 18:26:33 24787 24792 jsocket(1,817) post_connect err = 113 : T>172.30.62.180\\n00000007 2013-02-04 18:26:33 24787 24792 multiConnect failed to 172.30.62.180:71 00 with 113\\n00000008 2013-02-04 18:26:33 24787 24787 /var/jenkins/workspace/CE-Candidate-3.1 0.0/CE/centos_6_x86_64/HPCC-Platform/thorlcr/master/thmastermain.cpp(382) : VALI DATE FAILED(1) 172.30.62.180 : Connect failure\\n00000009 2013-02-04 18:26:33 24787 24787 Cluster replicate nodes check completed in 8ms\\n0000000A 2013-02-04 18:26:33 24787 24787 /var/jenkins/workspace/CE-Candidate-3.1 0.0/CE/centos_6_x86_64/HPCC-Platform/thorlcr/master/thmastermain.cpp(606) : ERRO R: Validate failure(s) detected, exiting Thor \\n\\nAny pointers would be appreciated.\", \"post_time\": \"2013-02-04 10:51:17\" },\n\t{ \"post_id\": 3366, \"topic_id\": 737, \"forum_id\": 14, \"post_subject\": \"Re: Memory requirement\", \"username\": \"jsmith\", \"post_text\": \"We have in the past run clusters with 2GB, so yes it should be ok.\\nWithout a default 'globalMemorySize' setting, each slave will default to 75% of physical memory.\\nNB: It establishes the amount of physical memory on the Thor Master and assumes all nodes are the same. So if that's not the case, you should set 'globalMemorySize' manually for your cluster.\\n\\nPerforming any memory intensive operations (e.g. global sort) on a largish file (1TB), on a small cluster with a small amount of memory, is going to be less than efficient though, i.e. it's going to have to spill a lot of intermediate results to disk to process that 1TB, before it can merge to a global sort.\", \"post_time\": \"2013-02-07 09:34:22\" },\n\t{ \"post_id\": 3354, \"topic_id\": 737, \"forum_id\": 14, \"post_subject\": \"Re: Memory requirement\", \"username\": \"bforeman\", \"post_text\": \"My educated guess is that you should be OK, but the tradeoff is that you will suffer a bit with performance. The cluster can only operate as fast as your slowest node.\\n\\nDouble-checking my opinion with the installation team now...\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-02-06 13:34:43\" },\n\t{ \"post_id\": 3328, \"topic_id\": 737, \"forum_id\": 14, \"post_subject\": \"Memory requirement\", \"username\": \"Sunitha\", \"post_text\": \"We have a few unused machines that we might consider setting up as additional nodes. However, they only have 2GB RAM. The recommendation is to have 4GB RAM, so we are wondering if it is a good idea to go this route. We are looking at crunching a 1+TB file and most of our operations seem to be CPU intensive at this point. Any thoughts on this would help!\\n\\nThanks,\\nSunitha\", \"post_time\": \"2013-02-05 01:10:28\" },\n\t{ \"post_id\": 5630, \"topic_id\": 785, \"forum_id\": 14, \"post_subject\": \"Re: HPCC 3.10, OpenLDAP, ACI\", \"username\": \"ankit_kailaswar\", \"post_text\": \"I am facing similar issu with hpcc-4.2.0. Is this problem already solved ?\\ncould anyone provide answers to question asked by James or any workaround for this problem.\\n\\n-Thanks\", \"post_time\": \"2014-05-05 10:30:35\" },\n\t{ \"post_id\": 3541, \"topic_id\": 785, \"forum_id\": 14, \"post_subject\": \"HPCC 3.10, OpenLDAP, ACI\", \"username\": \"james.wiltshire@lnssi.com\", \"post_text\": \"Has HPCC 3.10 been recently tested with OpenLDAP?\\nI'm currently getting an error when ECLServer starts up:\\n...Exception: : ldap_add_ext_s error for ou=ecl,dc=xxxxxx: 17 Undefined attribute type\\n\\nAnd, in slapd logging, I'm seeing:\\n...RESULT tag=105 err=17 text=OpenLDAPaci: attribute type undefined\\n\\nI'm on slapd.2.4.23.\\nA few possible issues, maybe...\\nDo the most recent versions of OpenLDAP still use OpenLDAPaci? (Hard to tell from online OpenLDAP documentation, what there is of it.)\\nIf so, then safe to assume OpenLDAP must be compiled with the enable-aci?\\nOr, might it be possible to re-configure an existing OpenLDAP install to use ACI?\\n\\nAre there other non-obvious issues here?\\n\\nldapconnection.cpp seems to be generating the "OpenLDAPaci" attribute, in m_sdfieldname.\\n\\nThanks for any help.\", \"post_time\": \"2013-02-24 05:00:19\" },\n\t{ \"post_id\": 3551, \"topic_id\": 788, \"forum_id\": 14, \"post_subject\": \"Re: IMDB Tutorial\", \"username\": \"bforeman\", \"post_text\": \"You can find it here:\\n\\nhttp://hpccsystems.com/download/docs/six-degrees\\n\\nThis tutorial references the IMDB public data source.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-02-26 12:41:05\" },\n\t{ \"post_id\": 3547, \"topic_id\": 788, \"forum_id\": 14, \"post_subject\": \"IMDB Tutorial\", \"username\": \"johnlyn763\", \"post_text\": \"I just installed the virtual image (VM 3.10.2.1), and when I browse to the server, a "readme" doc appears - "Welcome to the HPCCSystems Demo VMware Image."\\n\\nIn that doc, there's a mention of a tutorial: "we recommend following the steps in the IMDB Tutorial."\\n\\nI can't seem to find that tutorial. I looked in my local installation (Windows) in the HPCC Systems group, and also online at hpccsystems.com\\n\\nDoes anyone know where the IMDB Tutorial can be found? Or, if it's superseded by another tutorial? Thanks!\", \"post_time\": \"2013-02-26 01:52:35\" },\n\t{ \"post_id\": 5922, \"topic_id\": 815, \"forum_id\": 14, \"post_subject\": \"Re: CMAKE installation issues Ubuntu 12.04\", \"username\": \"atodor\", \"post_text\": \"Thank you. The issue was that I wasn't using git; initially I just copied the source code locally. Everything works now.\", \"post_time\": \"2014-06-19 15:37:27\" },\n\t{ \"post_id\": 5919, \"topic_id\": 815, \"forum_id\": 14, \"post_subject\": \"Re: CMAKE installation issues Ubuntu 12.04\", \"username\": \"Gleb Aronsky\", \"post_text\": \"You need to run the initialize submodules command in the source directory (HPCC-Platform). \\n\\n-Gleb\", \"post_time\": \"2014-06-19 14:40:14\" },\n\t{ \"post_id\": 5816, \"topic_id\": 815, \"forum_id\": 14, \"post_subject\": \"Re: CMAKE installation issues Ubuntu 12.04\", \"username\": \"atodor\", \"post_text\": \"I get this error:\\n\\nfatal: Not a git repository (or any of the parent directories): .git\", \"post_time\": \"2014-06-02 19:07:08\" },\n\t{ \"post_id\": 5815, \"topic_id\": 815, \"forum_id\": 14, \"post_subject\": \"Re: CMAKE installation issues Ubuntu 12.04\", \"username\": \"gsmith\", \"post_text\": \"Did you initialise submodules? (git submodule update --init --recursive)\", \"post_time\": \"2014-06-02 18:49:55\" },\n\t{ \"post_id\": 5813, \"topic_id\": 815, \"forum_id\": 14, \"post_subject\": \"Re: CMAKE installation issues Ubuntu 12.04\", \"username\": \"atodor\", \"post_text\": \"Thank you, gsmith. Now I have a problem when I run make package:\\n\\n
CMake Error at /home/atodor/hpcc/build/esp/src/cmake_install.cmake:43 (message):\\n Can't find Dojo build tools -- did you initialise submodules? (git\\n submodule update --init --recursive)\\nCall Stack (most recent call first):\\n /home/atodor/hpcc/build/esp/cmake_install.cmake:41 (INCLUDE)\\n /home/atodor/hpcc/build/cmake_install.cmake:49 (INCLUDE)\\n\\n\\nCPack Error: Error when generating package: hpccsystems-platform\\nmake: *** [package] Error 1\\n
\", \"post_time\": \"2014-06-02 16:17:22\" },\n\t{ \"post_id\": 5791, \"topic_id\": 815, \"forum_id\": 14, \"post_subject\": \"Re: CMAKE installation issues Ubuntu 12.04\", \"username\": \"gsmith\", \"post_text\": \"Quick answer:\\nsudo apt-get install libiberty-dev
\\n\\nLonger answer:\\nThe build wiki page (https://github.com/hpcc-systems/HPCC-Pl ... lding-HPCC) has a list of prerequisites for building, Ubuntu 14.04 (which I suspect your building for), was recently added/updated.\", \"post_time\": \"2014-06-01 06:07:48\" },\n\t{ \"post_id\": 5790, \"topic_id\": 815, \"forum_id\": 14, \"post_subject\": \"Re: CMAKE installation issues Ubuntu 12.04\", \"username\": \"atodor\", \"post_text\": \"Hello,\\n\\nI have a similar problem. After installing all the pre-requisites, I get this message:\\n\\n-- Could NOT find BinUtils (missing: IBERTY_LIBRARIES) \\nCMake Error at cmake_modules/commonSetup.cmake:499 (message):\\n BINUTILS requested but package not found\\n
\\nCould someone please help?\", \"post_time\": \"2014-05-31 22:25:36\" },\n\t{ \"post_id\": 3670, \"topic_id\": 815, \"forum_id\": 14, \"post_subject\": \"Re: CMAKE installation issues Ubuntu 12.04\", \"username\": \"ideal\", \"post_text\": \"I build package hpccsystems-platform_community-3.10.4-5rcprecise_amd64.deb\\nAnd then I retest my issue about distribute child request, and it is ok now.\\n\\nThis close this issue.\\n\\nJM.\", \"post_time\": \"2013-03-08 10:56:49\" },\n\t{ \"post_id\": 3667, \"topic_id\": 815, \"forum_id\": 14, \"post_subject\": \"Re: CMAKE installation issues Ubuntu 12.04\", \"username\": \"gsmith\", \"post_text\": \"I updated the wiki entry to include "make"\", \"post_time\": \"2013-03-08 09:44:20\" },\n\t{ \"post_id\": 3666, \"topic_id\": 815, \"forum_id\": 14, \"post_subject\": \"Re: CMAKE installation issues Ubuntu 12.04\", \"username\": \"ideal\", \"post_text\": \"Not exactly,\\n\\nAfter investigation, I concluded that "make" package was missing, then some additionnal installation was required : \\nsudo apt-get install make
\\n\\nThere is a "building" chapter not clear about target machine considerations. I skepped it and tried command "make package", to build debian package, hope it will be right.\\n\\nDocumentation exists, it is really a good thing. But if it was clarified a little, it could save some precious hours.\\n\\nJM.\", \"post_time\": \"2013-03-08 09:35:05\" },\n\t{ \"post_id\": 3662, \"topic_id\": 815, \"forum_id\": 14, \"post_subject\": \"Re: CMAKE installation issues Ubuntu 12.04\", \"username\": \"gsmith\", \"post_text\": \"Sounds like your missing gcc and related tools?\\n\\nFrom the wiki at https://github.com/hpcc-systems/HPCC-Pl ... lding-HPCC\\n\\nPrerequisites: \\n\\nsudo apt-get install g++ gcc cmake bison flex binutils-dev libldap2-dev libcppunit-dev libicu-dev libxalan110-dev zlib1g-dev libboost-regex-dev libssl-dev libarchive-dev python2.7-dev libv8-dev openjdk-6-jdk libapr1-dev libaprutil1-dev\\n
\", \"post_time\": \"2013-03-07 20:31:32\" },\n\t{ \"post_id\": 3660, \"topic_id\": 815, \"forum_id\": 14, \"post_subject\": \"CMAKE installation issues Ubuntu 12.04\", \"username\": \"ideal\", \"post_text\": \"Hello,\\n\\nI'd like to install last source code version to get last fixes, on ubuntu 12.04 platform.\\nI follow this installation procedure and I get cmake errors : \\n\\n$ cmake ../HPCC-Platform-candidate-3.10.4\\nCMake Error: CMake was unable to find a build program corresponding to "Unix Makefiles". CMAKE_MAKE_PROGRAM is not set. You probably need to select a different build tool.\\nCMake Error: Error required internal CMake variable not set, cmake may be not be built correctly.\\nMissing variable is:\\nCMAKE_C_COMPILER_ENV_VAR\\nCMake Error: Error required internal CMake variable not set, cmake may be not be built correctly.\\nMissing variable is:\\nCMAKE_C_COMPILER\\nCMake Error: Could not find cmake module file:/home/ubuntu/build/CMakeFiles/CMakeCCompiler.cmake\\nCMake Error: Error required internal CMake variable not set, cmake may be not be built correctly.\\nMissing variable is:\\nCMAKE_CXX_COMPILER_ENV_VAR\\nCMake Error: Error required internal CMake variable not set, cmake may be not be built correctly.\\nMissing variable is:\\nCMAKE_CXX_COMPILER\\nCMake Error: Could not find cmake module file:/home/ubuntu/build/CMakeFiles/CMakeCXXCompiler.cmake\\nCMake Error: CMAKE_C_COMPILER not set, after EnableLanguage\\nCMake Error: CMAKE_CXX_COMPILER not set, after EnableLanguage\\n-- Configuring incomplete, errors occurred!\\n
\\n\\nIs there something wrong with documentation or am I missing something ?\\n\\nJM.\", \"post_time\": \"2013-03-07 15:14:36\" },\n\t{ \"post_id\": 3720, \"topic_id\": 823, \"forum_id\": 14, \"post_subject\": \"Re: Install error: Failed to join multicast channel 3\", \"username\": \"mry\", \"post_text\": \"thx, it works.\", \"post_time\": \"2013-03-12 15:43:50\" },\n\t{ \"post_id\": 3717, \"topic_id\": 823, \"forum_id\": 14, \"post_subject\": \"Re: Install error: Failed to join multicast channel 3\", \"username\": \"sort\", \"post_text\": \"The problem could be related to a few things (i.e. how the network stack, router, etc are configured).\\n\\nYou can try running without multicast. Run configmgr and modify the following roxie setting:\\nOn the UDP tab\\nSet roxieMulticastEnabled to false\\n\\nStop the cluster\\nCopy the environment.xml file to the appropiate location on all nodes\\nrestart the cluster\", \"post_time\": \"2013-03-12 15:14:13\" },\n\t{ \"post_id\": 3690, \"topic_id\": 823, \"forum_id\": 14, \"post_subject\": \"Install error: Failed to join multicast channel 3\", \"username\": \"mry\", \"post_text\": \"Hi,\\n\\nI want to install a 8-node hpcc cluster. But When starting myroxie process, I get this log, "EXCEPTION: (1406): Failed to join multicast channel 3 (239.1.1.4:8887)". \\n\\nWhat should I check?\\n\\n---------------------\\n/var/log/HPCCSystems/myroxie/roxie.log\\n00000001 2013-03-11 00:22:22 28362 28362 "Roxie starting, build = community_3.10.2-1"\\n00000002 2013-03-11 00:22:22 28362 28362 "RoxieMemMgr: Setting memory limit to 1073741824 bytes (1024 pages)"\\n00000003 2013-03-11 00:22:22 28362 28362 "RoxieMemMgr: 1024 Pages successfully allocated for the pool - memsize=1073741824 base=0x7f5dd0600000 alignment=1048576 bitmapSize=32"\\n00000004 2013-03-11 00:22:22 28362 28362 "Current Hardware Info: CPUs=4, speed=2660 MHz, Mem=16081 MB , primDisk=0 GB, primFree=0 GB, secDisk=0 GB, secFree=0 GB, NIC=0"\\n00000005 2013-03-11 00:22:22 28362 28366 "Background copy thread 0x629db0 starting"\\n00000006 2013-03-11 00:22:22 28362 28367 "HandleCloser thread 0x629db0 starting"\\n00000007 2013-03-11 00:22:22 28362 28362 "Roxie: multicast socket created port=8887 sockbuffsize=131071 actual 262142"\\n00000008 2013-03-11 00:22:22 28362 28362 "EXCEPTION: (1406): Failed to join multicast channel 3 (239.1.1.4:8887)"\\n00000009 2013-03-11 00:22:22 28362 28366 "Background copy thread 0x629db0 exiting"\\n0000000A 2013-03-11 00:22:22 28362 28367 "Handle closer thread 0x629db0 exiting"\", \"post_time\": \"2013-03-10 16:48:55\" },\n\t{ \"post_id\": 3945, \"topic_id\": 883, \"forum_id\": 14, \"post_subject\": \"Re: fatal error C1083: Cannot open include file: 'eclinclude\", \"username\": \"gsmith\", \"post_text\": \"If you hold the "shift" key down, while submitting query in the IDE it will echo the command line equivalent. I suspect your are missing some path or -L options.\", \"post_time\": \"2013-04-17 11:15:17\" },\n\t{ \"post_id\": 3943, \"topic_id\": 883, \"forum_id\": 14, \"post_subject\": \"Re: fatal error C1083: Cannot open include file: 'eclinclude\", \"username\": \"bohman\", \"post_text\": \"Thanks for the quick response. Your response triggered me to look at my preferences and the IDE documentation. For some reason, my default compiler options were not set. When I set them, via the IDE I am able to run the program locally and from the thor.\\n\\n[attachment=0:utzxnz9r]bohmanPreferences.jpg\\n\\n\\nHowever, I still get the error when I try to compile from the command line. See attachment.\\n\\n[attachment=1:utzxnz9r]bohmanCommandLine.jpg\\n\\nI'm not sure why it is not compiling via the command line.\", \"post_time\": \"2013-04-16 19:01:25\" },\n\t{ \"post_id\": 3942, \"topic_id\": 883, \"forum_id\": 14, \"post_subject\": \"Re: fatal error C1083: Cannot open include file: 'eclinclude\", \"username\": \"gsmith\", \"post_text\": \"Sounds like you trying to compile local (intentional or not). \\n\\nIn the IDE double check your server IP address:[attachment=1:es9l9myx]Prefs.PNG \\n\\nand then ensure you have picked a valid Target on the builder window:[attachment=0:es9l9myx]Terget.PNG\", \"post_time\": \"2013-04-16 17:35:26\" },\n\t{ \"post_id\": 3935, \"topic_id\": 883, \"forum_id\": 14, \"post_subject\": \"fatal error C1083: Cannot open include file: 'eclinclude.hpp\", \"username\": \"bohman\", \"post_text\": \"I am a newb to HPCC; I installed a single node on Ubuntu 12.04 LTS; I installed the IDE on Windows Vista; via the command line, when I try compiling the hello.ecl referenced on page 19 of the Installing and Running the HPCC Platform, HPCC Installation and Startup document I get the following error:\\n\\nfatal error C1083: Cannot open include file: 'eclinclude.hpp': No such file or directory\\n\\nI can't find the eclinclude.hpp file in any of the directories under the installed directory of C:\\\\Program Files (x86)\\\\HPCC Systems\\\\\\n\\nOther *.hpp files appear in the following folders:\\nC:\\\\Program Files (x86)\\\\HPCC Systems\\\\HPCC\\\\bin\\\\ver_3_0\\\\cl\\\\include\\nC:\\\\Program Files (x86)\\\\HPCC Systems\\\\HPCC\\\\ver_3_6\\\\componentfiles\\\\cl\\\\include\\n\\nThe a.out.cpp file created has the following includes:\\n#include "eclinclude.hpp"\\n#include "eclrtl.hpp"\\n\\nInsights into where I can find the eclinclude.hpp or a nudge on what I need to do to work around this problem will be much appreciated.\\n\\nthanks!\", \"post_time\": \"2013-04-16 14:41:43\" },\n\t{ \"post_id\": 4033, \"topic_id\": 900, \"forum_id\": 14, \"post_subject\": \"Configuring memory and cpus on thor\", \"username\": \"nvasil\", \"post_text\": \"Hi I am setting up a thor cluster and I want to use all the memory of the machine for Thor\\n\\nCan you explain the following parameters?\\nglobalMemorySize\\nmasterMemorySize\\nmemorySpillAt\\nmultiThorMemoryThreshold\\n\\nWhat are the default values?\\n\\nIsMaxActivityCores the number of cores for Thor to use?\\n\\nIf I have 16GB per node what is the recommended distribution of memory over the above parameters?\", \"post_time\": \"2013-04-30 19:56:15\" },\n\t{ \"post_id\": 13743, \"topic_id\": 917, \"forum_id\": 14, \"post_subject\": \"Re: Where is HPCC Client Tools installed on Mac OS X?\", \"username\": \"Naveen\", \"post_text\": \"Could you please share the complete installation process of HPCC platform on Mac Os. I am currently using Mac Os Sierra 10.12.1\", \"post_time\": \"2016-12-13 05:32:07\" },\n\t{ \"post_id\": 4134, \"topic_id\": 917, \"forum_id\": 14, \"post_subject\": \"Re: Where is HPCC Client Tools installed on Mac OS X?\", \"username\": \"rhimbo\", \"post_text\": \"Thanks, Dan....\\n\\nHm, I guess there was something wrong with my regex when doing my "find". I do see the installation in /opt.\", \"post_time\": \"2013-05-21 17:48:46\" },\n\t{ \"post_id\": 4126, \"topic_id\": 917, \"forum_id\": 14, \"post_subject\": \"Re: Where is HPCC Client Tools installed on Mac OS X?\", \"username\": \"DSC\", \"post_text\": \"The installer puts everything into /opt/HPCCSystems, then a subdirectory for the version number (e.g. '3.10.8' for the current version).\\n\\nI've found it useful to create a symlink in that directory (ln -s 3.10.8 current) in order to standardize the path. That would need to be updated every time you install, of course, but it simplifies a number of other things like adjusting $PATH in your login script.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-05-21 11:34:56\" },\n\t{ \"post_id\": 4125, \"topic_id\": 917, \"forum_id\": 14, \"post_subject\": \"Where is HPCC Client Tools installed on Mac OS X?\", \"username\": \"rhimbo\", \"post_text\": \"Hi,\\n\\nWhat is the default installation directory for the HPCC Client tools on Mac OS X? I downloaded and installed from this location:\\nhttp://hpccsystems.com/download/free-co ... ols?os=181\\n\\nI cannot find the installation even though I successfully went through the whole installation process.\\n\\nI see nothing pertinent in my /Applications folder. I used Unix "find" from a shell to search for a reg exp with [Hh][Pp][Cc]1 in it but got no results..... \\n\\nI am on Mac OS X 10.8.3 Mountain Lion....\\n\\nThanks.....\", \"post_time\": \"2013-05-20 23:26:50\" },\n\t{ \"post_id\": 4207, \"topic_id\": 920, \"forum_id\": 14, \"post_subject\": \"Re: Multiple Roxie querysets\", \"username\": \"sbagaria\", \"post_text\": \"So I think I am able to publish queries on to my new target cluster but the process cluster remains detached from Dali.\\n\\n\\n[sbagaria@node251004 ~]$ ecl roxie check myroxiedev\\nAll nodes have matching state hash\\n Hash [16439602216763471597] - 1 node(s)\\nAll nodes detached from DALI\\n1 Total node(s) reported\\n
\\n\\nWhen I query this new target cluster, I get a response saying 'Unknown query ...'. ESP on port 8002 presents me with the form correctly and so I can imagine that the query is stored correctly in Dali but somehow the request is not processed by Roxie. On querying directly to port 9876, I get the same error message in the response. Whereas my original process cluster is attached to dali and works just fine.\\n\\n\\n[sbagaria@node251004 ~]$ ecl roxie check myroxie\\nAll nodes have matching state hash\\n Hash [13781477476754310972] - 3 node(s)\\nAll nodes attached to DALI\\n3 Total node(s) reported\\n
\", \"post_time\": \"2013-06-14 18:58:49\" },\n\t{ \"post_id\": 4192, \"topic_id\": 920, \"forum_id\": 14, \"post_subject\": \"Re: Multiple Roxie querysets\", \"username\": \"bforeman\", \"post_text\": \"That's great news! Thanks for the feedback!\", \"post_time\": \"2013-06-12 12:23:20\" },\n\t{ \"post_id\": 4190, \"topic_id\": 920, \"forum_id\": 14, \"post_subject\": \"Re: Multiple Roxie querysets\", \"username\": \"sbagaria\", \"post_text\": \"I think it worked. So I defined a new cluster in Topology. Added a Roxie component (process cluster). And then mapped that new Roxie component to the new cluster in Topology, and kept the eclccserver and eclccscheduler components common with the Roxie cluster from before.\", \"post_time\": \"2013-06-11 21:00:54\" },\n\t{ \"post_id\": 4188, \"topic_id\": 920, \"forum_id\": 14, \"post_subject\": \"Re: Multiple Roxie querysets\", \"username\": \"sbagaria\", \"post_text\": \"Is a target cluster defined through topology?\", \"post_time\": \"2013-06-11 19:45:52\" },\n\t{ \"post_id\": 4185, \"topic_id\": 920, \"forum_id\": 14, \"post_subject\": \"Re: Multiple Roxie querysets\", \"username\": \"sbagaria\", \"post_text\": \"It could be possible, I will try again.\", \"post_time\": \"2013-06-11 19:36:25\" },\n\t{ \"post_id\": 4163, \"topic_id\": 920, \"forum_id\": 14, \"post_subject\": \"Re: Multiple Roxie querysets\", \"username\": \"bforeman\", \"post_text\": \"Besides defining a ROXIE process cluster you also need to define one or more Target clusters that contain it. Is it possible that you created the process cluster but not a target cluster?\\n\\nBob\", \"post_time\": \"2013-05-30 17:42:09\" },\n\t{ \"post_id\": 4141, \"topic_id\": 920, \"forum_id\": 14, \"post_subject\": \"Multiple Roxie querysets\", \"username\": \"sbagaria\", \"post_text\": \"Is there a way to configure multiple Roxie querysets? I tried publishing multiple Roxie clusters, each with its own farm of servers (on different hardware using different ports). But I could see only one query set.\\n\\nBasically, I want to deploy a 1 node Roxie for testing and a multi-node Roxie for production but within the same environment. Is this possible? Or will I need two separate environments on different hardware?\", \"post_time\": \"2013-05-22 14:01:46\" },\n\t{ \"post_id\": 4173, \"topic_id\": 926, \"forum_id\": 14, \"post_subject\": \"Re: System Error: 1301 Memory Pool Exhausted\", \"username\": \"HPCC Staff\", \"post_text\": \"Thanks for circling back and sharing your conclusion!\", \"post_time\": \"2013-06-05 21:32:57\" },\n\t{ \"post_id\": 4171, \"topic_id\": 926, \"forum_id\": 14, \"post_subject\": \"Re: System Error: 1301 Memory Pool Exhausted\", \"username\": \"jritz\", \"post_text\": \"I figured it out. It helps when you run the cert on THOR and not HTHOR! \", \"post_time\": \"2013-06-05 17:34:12\" },\n\t{ \"post_id\": 4168, \"topic_id\": 926, \"forum_id\": 14, \"post_subject\": \"System Error: 1301 Memory Pool Exhausted\", \"username\": \"jritz\", \"post_text\": \"Error Message: System Error: 1301: Memory Pool Exhausted (In Disk Read G4 E6)(0,0)\\n\\nSystem Specs: I have 6 total nodes (1 LDAP, 1 Systems Server (contains dali, sasha, ect), 4 THOR slaves). Each node has a single cpu with 12 gb of ram. We are running HPCC 3.10.4-1 and I am using ECL IDE 6.10.8.101.3.8.\\n\\nProblem: I am trying to run the _Certification test and getting the above error when I run the Certify_DR.ecl. I am able to build the files just fine, but it chokes when I run the next step. I have played around with the configuration of THOR by adjusting the globalMemorySize and MasterMemorySize. My latest run had it set to 10Gb and it still failed. \\n\\nAny help would be greatly appreciated.\\n\\nThanks,\\n\\nJoe\", \"post_time\": \"2013-06-05 14:07:46\" },\n\t{ \"post_id\": 4199, \"topic_id\": 932, \"forum_id\": 14, \"post_subject\": \"Re: Thor slave IP addresses\", \"username\": \"sbagaria\", \"post_text\": \"Yes, this did the trick. I can see the changed IP addresses in ECL watch now. \\n\\nFor completeness, the output from my command looked like:\\n
\\n# /opt/HPCCSystems/bin/updtdalienv /etc/HPCCSystems/environment.xml -f\\n00000000 2013-06-12 12:03:42 17179 17179 "Environment and node groups updated in dali at 172.20.17.21:7070"\\n00000001 2013-06-12 12:03:42 17179 17179 "WARNING: Forcing new group layout for mythor [ matched active = false, matched old environment = true ]\\nNew cluster layout for cluster mythor\\nForcing new group layout for myroxie [ matched active = false, matched old environment = true ]\\nNew cluster layout for cluster myroxie\\nForcing new group layout for myroxie__farm1 [ matched active = false, matched old environment = true ]\\nNew cluster layout for cluster myroxie__farm1\\n"\\n
\", \"post_time\": \"2013-06-12 16:05:50\" },\n\t{ \"post_id\": 4198, \"topic_id\": 932, \"forum_id\": 14, \"post_subject\": \"Re: Thor slave IP addresses\", \"username\": \"jsmith\", \"post_text\": \"Try forcing dali to update the environment with:\\n\\nupdtdalienv /etc/HPCCSystems/environment.xml -f\", \"post_time\": \"2013-06-12 15:52:36\" },\n\t{ \"post_id\": 4189, \"topic_id\": 932, \"forum_id\": 14, \"post_subject\": \"Re: Thor slave IP addresses\", \"username\": \"sbagaria\", \"post_text\": \"Alight. So a bit of investigation reveals that these IP addresses are collected from dali through the 'daliadmin dfsgroup' command in the shell script run_thor. Now to find out how to remap these IP addresses in Dali.\", \"post_time\": \"2013-06-11 20:43:13\" },\n\t{ \"post_id\": 4186, \"topic_id\": 932, \"forum_id\": 14, \"post_subject\": \"Thor slave IP addresses\", \"username\": \"sbagaria\", \"post_text\": \"Where does HPCC store the IP addresses of the Thor slave nodes.\\n\\nManually changing the environment.xml does not change the IP addresses of the thor slaves which continue to refer to the old IP addresses. At least this is what the files in /var/lib/HPCCSystems/mythor[uslave|uslave.start|thorgroup] and ECL Watch (see below) tell me. When you start thor, you can see that thor is trying to communicate to its slaves on the old IP addresses (calling start_slaves by ssh-ing to the old IP addresses). \\n\\nWhen I go to the 'Target Clusters' view on ECL Watch, I can see that all components show the changed network addresses but thor slaves continue to show the old addresses. How do we change the network addresses for the Thor target cluster?\\n\\nMy HPCC version is 3.10.8-7.\", \"post_time\": \"2013-06-11 19:39:08\" },\n\t{ \"post_id\": 23953, \"topic_id\": 933, \"forum_id\": 14, \"post_subject\": \"Re: Mythor is not starting on Ubuntu 12.10\", \"username\": \"gfortil\", \"post_text\": \"Please try to start the mythor component separately with this command.\\nsudo /etc/init.d/hpcc-init -c mythor start\\nAssuming you used the init process to start the other components.\", \"post_time\": \"2019-01-08 16:47:08\" },\n\t{ \"post_id\": 23653, \"topic_id\": 933, \"forum_id\": 14, \"post_subject\": \"Re: Mythor is not starting on Ubuntu 18.04 LTS on single nod\", \"username\": \"Mohamad Ahtisham Wani\", \"post_text\": \"Starting mydali ... [ OK ] \\nStarting mydfuserver ... [ OK ] \\nStarting myeclagent ... [ OK ] \\nStarting myeclccserver ... [ OK ] \\nStarting myeclscheduler ... [ OK ] \\nStarting myesp ... [ OK ] \\nStarting myroxie ... [ OK ] \\nStarting mysasha ... [ OK ] \\nStarting mythor ... [ TIMEOUT ]\\n\\nIt worked fine the first time after installation but once I restarted the system, the mythor service shows timeout. I would really appreciate some help. Besides, I am running the platform on Ubuntu 18.04 LTS on a virtual machine.\", \"post_time\": \"2018-11-21 07:19:36\" },\n\t{ \"post_id\": 4203, \"topic_id\": 933, \"forum_id\": 14, \"post_subject\": \"Re: Mythor is not starting on Ubuntu 12.10\", \"username\": \"NewGuitar\", \"post_text\": \"I added globalMemorySize="8192 MB" attribute to ThorCluster configuration in environment.xml and it worked.\", \"post_time\": \"2013-06-13 19:48:50\" },\n\t{ \"post_id\": 4202, \"topic_id\": 933, \"forum_id\": 14, \"post_subject\": \"Mythor is not starting on Ubuntu 12.10\", \"username\": \"NewGuitar\", \"post_text\": \"I have installed fresh HPCC Server 3.10.8-9 on Ubuntu 12.10.\\nThe installation process was OK.\\nHowever when I start the system by "service hpcc-init start" mythor fails while all other components start with OK.\\n\\nthormaster log:\\nRoxieMemMgr: Setting memory limit to 13431209984 bytes (12809 pages)\\n1303: /home/clo/builds/HPCC-Platform/thorlcr/master/thmastermain.cpp(714) : ThorMaster : RoxieMemMgr: Unable to create heap\\n\\nstart_thor.log:\\nstarting thorslaves ...\\nrm: cannot remove `/vol/www/python/Test.wsgi': Permission denied\\nrm: cannot remove `/vol/www/python/mysqlquery/__init__.pyc': Permission denied\\nrm: cannot remove `/vol/www/python/mysqlquery/MySQLQuery.pyc': Permission denied\\nrm: cannot remove `/vol/www/python/mysqlquery/MySQLQuery.py': Permission denied\\nrm: cannot remove `/vol/www/python/mysqlquery/__init__.py': Permission denied\\nrm: cannot remove `/vol/www/python/python/Test.wsgi': Permission denied\\nrm: cannot remove `/vol/www/python/python/MySQLQuery.py': Permission denied\\nrm: cannot remove `/vol/www/python/python/tests.py': Permission denied\\nrm: cannot remove `/vol/www/python/MySQLQuery.py': Permission denied\\nrm: cannot remove `/vol/www/python/tests.py': Permission denied\\nrm: cannot remove `/vol/www/python/Test.py': Permission denied\\ncp: cannot create regular file `/usr/lib/python2.7/mysqlquery/__init__.py': Permission denied\\ncp: cannot create regular file `/usr/lib/python2.7/mysqlquery/MySQLQuery.py': Permission denied\\ncp: cannot create regular file `/usr/lib/python2.7/mysqlquery/MySQLQuery.pyc': Permission denied\\ncp: cannot create regular file `/usr/lib/python2.7/mysqlquery/__init__.pyc': Permission denied\\ncp: cannot create regular file `/vol/www/python/MySQLQuery.py': Permission denied\\ncp: cannot create regular file `/vol/www/python/mysqlquery/__init__.py': Permission denied\\ncp: cannot create regular file `/vol/www/python/mysqlquery/MySQLQuery.py': Permission denied\\ncp: cannot create regular file `/vol/www/python/mysqlquery/MySQLQuery.pyc': Permission denied\\ncp: cannot create regular file `/vol/www/python/mysqlquery/__init__.pyc': Permission denied\\ncp: cannot create regular file `/vol/www/python/Test.py': Permission denied\\ncp: cannot create regular file `/vol/www/python/Test.wsgi': Permission denied\\ncp: cannot create regular file `/vol/www/python/tests.py': Permission denied\\ncp: cannot create regular file `/vol/www/python/python/MySQLQuery.py': Permission denied\\ncp: cannot create regular file `/vol/www/python/python/Test.wsgi': Permission denied\\ncp: cannot create regular file `/vol/www/python/python/tests.py': Permission denied\\n\\n\\nHow can I limit the amount of memory Roxie tries to allocate?\\nNow it is trying to allocate 13GB, despite the fact that totalMemoryLimit is set to "1073741824" in environment.xml.\", \"post_time\": \"2013-06-13 18:12:39\" },\n\t{ \"post_id\": 4225, \"topic_id\": 935, \"forum_id\": 14, \"post_subject\": \"Re: descend into write-protected directory\", \"username\": \"clo\", \"post_text\": \"Were there any error messages during the install? I'm wondering if something happened during the installation process where the permissions weren't changed properly. Would it be possible for you to uninstall and reinstall the platform to see what the output is?\", \"post_time\": \"2013-06-18 20:29:08\" },\n\t{ \"post_id\": 4224, \"topic_id\": 935, \"forum_id\": 14, \"post_subject\": \"Re: descend into write-protected directory\", \"username\": \"NewGuitar\", \"post_text\": \"Hi,\\nI didn't change anything. Just installed it and there it is all that stuff.\", \"post_time\": \"2013-06-18 18:58:51\" },\n\t{ \"post_id\": 4218, \"topic_id\": 935, \"forum_id\": 14, \"post_subject\": \"Re: descend into write-protected directory\", \"username\": \"clo\", \"post_text\": \"Hi, can you provide us with a little bit more information? I was wondering if you've changed the defaults for the platform configuration.\", \"post_time\": \"2013-06-18 13:04:31\" },\n\t{ \"post_id\": 4205, \"topic_id\": 935, \"forum_id\": 14, \"post_subject\": \"descend into write-protected directory\", \"username\": \"NewGuitar\", \"post_text\": \"I have installed fresh HPCC Server 3.10.8-9 on Ubuntu 12.10.\\nEverything works seemingly OK.\\nHowever there are a lot of errors in the start_thor.log:\\nstarting thorslaves ...\\nrm: cannot remove `/vol/www/python/Test.wsgi': Permission denied\\nrm: cannot remove `/vol/www/python/mysqlquery/__init__.pyc': Permission denied\\nrm: cannot remove `/vol/www/python/mysqlquery/MySQLQuery.pyc': Permission denied\\nrm: cannot remove `/vol/www/python/mysqlquery/MySQLQuery.py': Permission denied\\nrm: cannot remove `/vol/www/python/mysqlquery/__init__.py': Permission denied\\nrm: cannot remove `/vol/www/python/python/Test.wsgi': Permission denied\\nrm: cannot remove `/vol/www/python/python/MySQLQuery.py': Permission denied\\nrm: cannot remove `/vol/www/python/python/tests.py': Permission denied\\nrm: cannot remove `/vol/www/python/MySQLQuery.py': Permission denied\\nrm: cannot remove `/vol/www/python/tests.py': Permission denied\\nrm: cannot remove `/vol/www/python/Test.py': Permission denied\\ncp: cannot create regular file `/usr/lib/python2.7/mysqlquery/__init__.py': Permission denied\\ncp: cannot create regular file `/usr/lib/python2.7/mysqlquery/MySQLQuery.py': Permission denied\\ncp: cannot create regular file `/usr/lib/python2.7/mysqlquery/MySQLQuery.pyc': Permission denied\\ncp: cannot create regular file `/usr/lib/python2.7/mysqlquery/__init__.pyc': Permission denied\\ncp: cannot create regular file `/vol/www/python/MySQLQuery.py': Permission denied\\ncp: cannot create regular file `/vol/www/python/mysqlquery/__init__.py': Permission denied\\ncp: cannot create regular file `/vol/www/python/mysqlquery/MySQLQuery.py': Permission denied\\ncp: cannot create regular file `/vol/www/python/mysqlquery/MySQLQuery.pyc': Permission denied\\ncp: cannot create regular file `/vol/www/python/mysqlquery/__init__.pyc': Permission denied\\ncp: cannot create regular file `/vol/www/python/Test.py': Permission denied\\ncp: cannot create regular file `/vol/www/python/Test.wsgi': Permission denied\\ncp: cannot create regular file `/vol/www/python/tests.py': Permission denied\\ncp: cannot create regular file `/vol/www/python/python/MySQLQuery.py': Permission denied\\ncp: cannot create regular file `/vol/www/python/python/Test.wsgi': Permission denied\\ncp: cannot create regular file `/vol/www/python/python/tests.py': Permission denied\\n\\nAnd when I login to the server with ssh it asks a lot of questions like:\\nrm: descend into write-protected directory `/vol/www/python'?\\n\\nHow to fix this?\", \"post_time\": \"2013-06-14 05:40:31\" },\n\t{ \"post_id\": 4335, \"topic_id\": 946, \"forum_id\": 14, \"post_subject\": \"Re: thor cluster slave node not running mythor service!\", \"username\": \"clo\", \"post_text\": \"The way to tell if your thors are running are to monitor the thormaster process. \\n\\nAlso, you can go onto ECL Watch > Target Clusters > select your thor cluster > hit submit (with action = machine information).\\n\\nThis will return the status of your thorslaves as well as your thormaster.\\n\\nHope this helps.\", \"post_time\": \"2013-07-18 11:46:59\" },\n\t{ \"post_id\": 4334, \"topic_id\": 946, \"forum_id\": 14, \"post_subject\": \"Re: thor cluster slave node not running mythor service!\", \"username\": \"srbhkmr\", \"post_text\": \"Thanks for the reply. Yes, I did distribute the environment.xml on all nodes file before running the install.sh script. It seems to be working fine now. \\n\\nAlthough I was hoping that the slave node will have some mythor service up and running after the install, but the service hpcc-init --typelist
doesn't display any thor related services defined on slave node.\", \"post_time\": \"2013-07-18 11:43:31\" },\n\t{ \"post_id\": 4254, \"topic_id\": 946, \"forum_id\": 14, \"post_subject\": \"Re: thor cluster slave node not running mythor service!\", \"username\": \"clo\", \"post_text\": \"Hi,\\n\\n I just want to make sure I got the procedure of what you did exactly. I looked at the environment.xml you sent and it appears you configured a thorslave to be running on the second node.\\n\\nBefore you ran the install-cluster.sh script, had you already moved your env.txt over to your /etc/HPCCSystems/environment.xml? If you didn't and you tried to run /opt/HPCCSystems/sbin/install-cluster.sh -k, then you're going to be just installing the platform on the default node which is itself.\", \"post_time\": \"2013-06-25 20:01:30\" },\n\t{ \"post_id\": 4251, \"topic_id\": 946, \"forum_id\": 14, \"post_subject\": \"thor cluster slave node not running mythor service!\", \"username\": \"srbhkmr\", \"post_text\": \"I tried configuring a two-node (two different physical machines with different IP addresses) Thor cluster. \\nAfter going through the documentation and finishing up the configuration using the mentioned /sbin/install-cluster.sh, /sbin/hpcc-push.sh /sbin/hpcc-run.sh scripts etc.\\nI find that the slave node is not registering/running any Thor related service.\\n\\nsudo service hpcc-init --typelist
shows me:\\nNo components on this node as defined by /etc/HPCCSystems/environment.xml.
\\n\\nFor more details attached is the environment.xml file in use.\\nThe single node thor cluster works fine though.\\n\\nAny pointers are appreciated.\\nThanks,\", \"post_time\": \"2013-06-24 15:59:17\" },\n\t{ \"post_id\": 4297, \"topic_id\": 954, \"forum_id\": 14, \"post_subject\": \"Re: configuring SSL/HTTPS end point within ESP\", \"username\": \"manojgvr\", \"post_text\": \"[quote="anthony.fishbeck":1lhjfm3j]Hi Manoj,\\n\\nESP doesn't currently support verifying client certificates. You could open a feature request at https://track.hpccsystems.com if you'd like us to consider adding support for it.\\n\\nRegards,\\nTony\\n\\nThanks Tony for the answer ! \\nThanks\\nManoj\", \"post_time\": \"2013-07-09 09:01:35\" },\n\t{ \"post_id\": 4290, \"topic_id\": 954, \"forum_id\": 14, \"post_subject\": \"Re: configuring SSL/HTTPS end point within ESP\", \"username\": \"anthony.fishbeck\", \"post_text\": \"Hi Manoj,\\n\\nESP doesn't currently support verifying client certificates. You could open a feature request at https://track.hpccsystems.com if you'd like us to consider adding support for it.\\n\\nRegards,\\nTony\", \"post_time\": \"2013-07-08 15:12:48\" },\n\t{ \"post_id\": 4276, \"topic_id\": 954, \"forum_id\": 14, \"post_subject\": \"Re: configuring SSL/HTTPS end point within ESP\", \"username\": \"manojgvr\", \"post_text\": \"[quote="clo":3qt3ynnr]Hi,\\n\\nI have a couple of questions so that I can make sure that I understand your scenario.\\n\\n1) What is the build that you're using. I just tested on 3.10.8 and it seems to be working for me.\\n\\n2) Did you follow the documentation to create the self-signing certificate and move them to the appropriate directories?\\n\\n3) Are you trying to reach the ssl enabled IP and port through http or https ?\\n\\nHi ,\\n\\n Thanks for the reply . I was able to configure SSL now in ESP end point . \\n Point 1 : Iam using v3.10 \\n Point 2 : I was not able to get the steps pertaining to SSL certificate creation and i was using keytool . Below URL helped me configure SSL certificate ( using OPEN SSL) and validate https://github.com/hpcc-systems/HPCC-Pl ... 3731/files \\n\\n Question : Is client side SSL authentication facility available in ESP server ? i.e can HPCC Server authenticate the client so that only valid clients can talk to Server ( for other client hitting the server , server should reject) ? \\n\\nThanks\\nManoj\", \"post_time\": \"2013-07-01 06:05:46\" },\n\t{ \"post_id\": 4272, \"topic_id\": 954, \"forum_id\": 14, \"post_subject\": \"Re: configuring SSL/HTTPS end point within ESP\", \"username\": \"clo\", \"post_text\": \"Hi,\\n\\nI have a couple of questions so that I can make sure that I understand your scenario.\\n\\n1) What is the build that you're using. I just tested on 3.10.8 and it seems to be working for me.\\n\\n2) Did you follow the documentation to create the self-signing certificate and move them to the appropriate directories?\\n\\n3) Are you trying to reach the ssl enabled IP and port through http or https ?\", \"post_time\": \"2013-06-28 16:36:10\" },\n\t{ \"post_id\": 4267, \"topic_id\": 954, \"forum_id\": 14, \"post_subject\": \"configuring SSL/HTTPS end point within ESP\", \"username\": \"manojgvr\", \"post_text\": \"Hi ,\\n Just wanted to confirm whether current latest community edition HPCC build has HTTPS support in ESP layer ? . I could see in the documentation for configuring the same and when i do try to configure i am getting the error below in ESP logs :\\n\\n00000041 2013-06-28 04:50:58 1969 1969 "ERROR: ESP Unhandled IException (-1 -- error loading certificate file /var/lib/HPCCSystems/myesp/certificate/cacert.cert - error:0906D06C:PEM routines:PEM_read_bio:no start line)"\\n\\n\\nAlso just wanted to know whether does HPCC - ESP layer supports client side SSL authentication also ? If so , what are the configuration steps ?\\n\\nHelp regarding this is highly appreciated .\\n\\nThanks\\nManoj\", \"post_time\": \"2013-06-28 05:04:26\" },\n\t{ \"post_id\": 4307, \"topic_id\": 958, \"forum_id\": 14, \"post_subject\": \"Re: ESP features community edition Vs. Enterprise edition\", \"username\": \"manojgvr\", \"post_text\": \"[quote="anthony.fishbeck":2c06kfkd]Hi Manoj,\\n\\nWsSecurity has an extensive set of features and ESP currently only supports that one simple profile.\\n\\nYou don't have to configure the WsSecurity part, but you do have to set up a back end system for ESP to use to authenticate and authorize the credentials provided.\\n\\nThe Community Edition allows you to set up ldap as your security back end. But the security manager is a pluggable component and I believe with Enterprise Edition we would help you integrate with other existing backend security systems.\\n\\nRegards,\\nTony\\n\\nHi Tony ,\\n\\nThanks for prompt reply & support !! Highly appreciated\\n\\nRegards\\nManoj\", \"post_time\": \"2013-07-11 04:44:41\" },\n\t{ \"post_id\": 4304, \"topic_id\": 958, \"forum_id\": 14, \"post_subject\": \"Re: ESP features community edition Vs. Enterprise edition\", \"username\": \"anthony.fishbeck\", \"post_text\": \"Hi Manoj,\\n\\nWsSecurity has an extensive set of features and ESP currently only supports that one simple profile.\\n\\nYou don't have to configure the WsSecurity part, but you do have to set up a back end system for ESP to use to authenticate and authorize the credentials provided.\\n\\nThe Community Edition allows you to set up ldap as your security back end. But the security manager is a pluggable component and I believe with Enterprise Edition we would help you integrate with other existing backend security systems.\\n\\nRegards,\\nTony\", \"post_time\": \"2013-07-10 15:07:30\" },\n\t{ \"post_id\": 4303, \"topic_id\": 958, \"forum_id\": 14, \"post_subject\": \"Re: ESP features community edition Vs. Enterprise edition\", \"username\": \"manojgvr\", \"post_text\": \"[quote="anthony.fishbeck":1xw4d6q8]Hi Manoj,\\n\\nI just meant that you can send username and password in the WsSecurity SOAP header format to ESP. \\n\\nYou don't have to configure anything. It can be used in place of HTTP level authentication.\\n\\n\\n <Security xmlns="http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-secext-1.0.xsd">\\n <UsernameToken>\\n <Username>exampleuser</Username>\\n <Password>password</Password>\\n </UsernameToken>\\n </Security>\\n
\\n\\nAt this stage its only meant as a way of passing the information. SSL is relied on for encryption.\\n\\nOh thanks for this information !! I was under the impression that ESP doesn't support / understand WS-security ? Can UsernameToken profile be configured in HPCC config manager so that only valid clients ( who knows username/password already shared by HPCC admin folks )can connect to ESP . If so , is this available HPCC community edition ?\\n\\nThanks \\nManoj\", \"post_time\": \"2013-07-10 09:26:39\" },\n\t{ \"post_id\": 4301, \"topic_id\": 958, \"forum_id\": 14, \"post_subject\": \"Re: ESP features community edition Vs. Enterprise edition\", \"username\": \"anthony.fishbeck\", \"post_text\": \"Hi Manoj,\\n\\nI just meant that you can send username and password in the WsSecurity SOAP header format to ESP. \\n\\nYou don't have to configure anything. It can be used in place of HTTP level authentication.\\n\\n\\n <Security xmlns="http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-secext-1.0.xsd">\\n <UsernameToken>\\n <Username>exampleuser</Username>\\n <Password>password</Password>\\n </UsernameToken>\\n </Security>\\n
\\n\\nAt this stage its only meant as a way of passing the information. SSL is relied on for encryption.\", \"post_time\": \"2013-07-09 15:04:58\" },\n\t{ \"post_id\": 4296, \"topic_id\": 958, \"forum_id\": 14, \"post_subject\": \"Re: ESP features community edition Vs. Enterprise edition\", \"username\": \"manojgvr\", \"post_text\": \"Thanks a lot Tony for the detailed explanation . I think this pieace of information will really help me to understand ESP component within HPCC better . \\n\\nRegarding your note "ESP currently only supports WsSecurity for passing credentials" -- by which mechanism do you recommend accomplishing this ? Where can i configure this / documentation if available will help me to explore more on this ?\\n\\nThanks in advance.\\n\\nThanks\\nManoj\", \"post_time\": \"2013-07-09 08:59:05\" },\n\t{ \"post_id\": 4293, \"topic_id\": 958, \"forum_id\": 14, \"post_subject\": \"Re: ESP features community edition Vs. Enterprise edition\", \"username\": \"anthony.fishbeck\", \"post_text\": \"Hi Manoj,\\n\\nESP currently only supports WsSecurity for passing credentials.. not for message level, claims based authorization, or xml level encryption / signatures.\\n\\nAt this point if you need to fully integrate into a Federated Security environment I would consider putting something in front of the HPCC with complete WsSecurity support. That component could then call out to roxie (or ESP) after going through verification of identity and claims / authorization.\\n\\nWe could consider adding support for some of these WsSecurity features in our Enterprise Edition. We've done similar things internally for a SAML implementation, but not in a way that's directly applicable for exposing through WsSecurity.\\n\\nAs far as using ESP, it's specifically the WsECL component that's not currently optimized for high throughput... we tend to use WsECL internally only for development and QA so it does pre and post processing by accessing workunints. Accessing workunits is a bit slow... but optimizing WsECL is already on my to do list.\\n\\nBtw, We're actively working on adding another ESP component to the enterprise edition that we use internally for high throughput production queries. It's called Dynamic ESDL and is designed to create more robust web service interfaces for ECL queries.\\n\\nBesides better performance, using Dynamic ESDL the web service interface is defined separately from your ECL code allowing you to be much more explicit about what the client sees. (ECL code is generated to match that interface, and the ECL developer maps the query to that "contract").\\n\\nHTH, Regards,\\nTony\", \"post_time\": \"2013-07-08 18:58:01\" },\n\t{ \"post_id\": 4288, \"topic_id\": 958, \"forum_id\": 14, \"post_subject\": \"Re: ESP features community edition Vs. Enterprise edition\", \"username\": \"manojgvr\", \"post_text\": \"[quote="flavio":ta5r88fw]Manoj,\\n\\nSome of the most specific authentication, authorization and accounting components of ESP are not included in the Community Edition. They have been developed for authentication systems based on database tables used within LexisNexis, so they would still require some re-work to apply them to other environments.\\n\\nAre there any particular features that you were looking to have?\\n\\nThanks,\\n\\nFlavio\\n\\n\\nThanks Flavio for the information . Well I am specifically looking where Message level security ( WS-Security ) can be configured at ESP end point ? . I was able to configure SSL security ( HTTPS) in ESP end point . Also for high throughput clusters , do you recommend using ESP ,since I have seen a post mentioning users facing problem with this approach ? https://hpccsystems.com/bb/viewtopic.php?f=15&t=916\\n\\nThanks\\nManoj\", \"post_time\": \"2013-07-05 06:05:58\" },\n\t{ \"post_id\": 4285, \"topic_id\": 958, \"forum_id\": 14, \"post_subject\": \"Re: ESP features community edition Vs. Enterprise edition\", \"username\": \"flavio\", \"post_text\": \"Manoj,\\n\\nSome of the most specific authentication, authorization and accounting components of ESP are not included in the Community Edition. They have been developed for authentication systems based on database tables used within LexisNexis, so they would still require some re-work to apply them to other environments.\\n\\nAre there any particular features that you were looking to have?\\n\\nThanks,\\n\\nFlavio\", \"post_time\": \"2013-07-03 12:53:59\" },\n\t{ \"post_id\": 4284, \"topic_id\": 958, \"forum_id\": 14, \"post_subject\": \"ESP features community edition Vs. Enterprise edition\", \"username\": \"manojgvr\", \"post_text\": \"It will be great if some one could help me clear below points ?\\n\\n1. Is there any difference in ESP features provided by community edition Vs. Enterprise edition in terms of security?\\n2. If point 1 is yes, what are those?\\n\\nHelp regarding this is greatly appreciated !\\n\\nThanks\\nManoj\", \"post_time\": \"2013-07-03 09:46:32\" },\n\t{ \"post_id\": 4363, \"topic_id\": 969, \"forum_id\": 14, \"post_subject\": \"Re: Roxie retries\", \"username\": \"DSC\", \"post_text\": \"Got it, thanks!\\n\\nDan\", \"post_time\": \"2013-07-24 21:32:29\" },\n\t{ \"post_id\": 4362, \"topic_id\": 969, \"forum_id\": 14, \"post_subject\": \"Re: Roxie retries\", \"username\": \"flavio\", \"post_text\": \"Dan,\\n\\nRichard Chapman says "Just pick something that does not overlap with any other cluster… It's not really a tuning thing" (he just doesn't have access to the forums at this time).\\n\\nFlavio\", \"post_time\": \"2013-07-24 20:09:45\" },\n\t{ \"post_id\": 4361, \"topic_id\": 969, \"forum_id\": 14, \"post_subject\": \"Re: Roxie retries\", \"username\": \"DSC\", \"post_text\": \"How should the multicast range be configured? There are default base and high address values, but I suspect they could be tuned in a single Roxie cluster environment.\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2013-07-24 18:44:59\" },\n\t{ \"post_id\": 4360, \"topic_id\": 969, \"forum_id\": 14, \"post_subject\": \"Re: Roxie retries\", \"username\": \"richardkchapman\", \"post_text\": \"Turning off multicast does not mean Roxie uses tcp - it will still use UDP.\", \"post_time\": \"2013-07-24 16:18:31\" },\n\t{ \"post_id\": 4359, \"topic_id\": 969, \"forum_id\": 14, \"post_subject\": \"Re: Roxie retries\", \"username\": \"DSC\", \"post_text\": \"Well, turning off that option seemed to help some but the overall problem is still there and the same log messages noted above are still appearing, so I suspect that UDP is still heavily involved. We're still investigating. If we turn up anything I'll be sure to post our findings.\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2013-07-23 19:40:38\" },\n\t{ \"post_id\": 4358, \"topic_id\": 969, \"forum_id\": 14, \"post_subject\": \"Re: Roxie retries\", \"username\": \"flavio\", \"post_text\": \"Try setting roxieMulticastEnabled to false. That should disable UDP multicast altogether. \\n\\nPerhaps not the best option from a performance standpoint, but falling back to TCP would use the TCP congestion control instead...\\n\\nFlavio\", \"post_time\": \"2013-07-23 18:16:57\" },\n\t{ \"post_id\": 4357, \"topic_id\": 969, \"forum_id\": 14, \"post_subject\": \"Re: Roxie retries\", \"username\": \"DSC\", \"post_text\": \"What is that setting? I didn't see it in configmgr.\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2013-07-23 18:07:15\" },\n\t{ \"post_id\": 4356, \"topic_id\": 969, \"forum_id\": 14, \"post_subject\": \"Re: Roxie retries\", \"username\": \"flavio\", \"post_text\": \"Dan, the dropped packets are most likely due to overrunning those ports, so I don't think any settings would help much. Perhaps you could try to use TCP instead of UDP? There is a setting in the configuration file to do that. In any case, it may not help a lot. It would be better to move to 10GE or IB, instead.\\n\\nFlavio\", \"post_time\": \"2013-07-23 18:06:05\" },\n\t{ \"post_id\": 4355, \"topic_id\": 969, \"forum_id\": 14, \"post_subject\": \"Re: Roxie retries\", \"username\": \"DSC\", \"post_text\": \"It looks like there are a large number of dropped UDP packets. Is there a recommended set of OS-level network buffer parameters for Roxie?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2013-07-23 17:57:36\" },\n\t{ \"post_id\": 4346, \"topic_id\": 969, \"forum_id\": 14, \"post_subject\": \"Re: Roxie retries\", \"username\": \"flavio\", \"post_text\": \"I'm not sure what setting you're referring to, but only using the local slave would effectively require all of your data to fit in a single node (you could have as many copies of the data as Roxie nodes and increase concurrency that way).\\n\\nWould you be able to run a tcpdump and take a quick look to see if there are tcp retries and/or discarded udp packets? That could explain the slowness...\\n\\nIt would be also good for Richard Chapman to chime in on this one...\\n\\nFlavio\", \"post_time\": \"2013-07-19 19:25:24\" },\n\t{ \"post_id\": 4344, \"topic_id\": 969, \"forum_id\": 14, \"post_subject\": \"Re: Roxie retries\", \"username\": \"DSC\", \"post_text\": \"It's only a 1Gb network, but at least the cluster is on its own segment.\\n\\nI have the Roxie channels dialed down to 1. Shouldn't that mean that a server talks only to the farmer on the local system?\\n\\nI'm unable to reduce this response size, I'm constrained to using SOAP, and anyway there is a near-100% probabiliy that the response sizes could be 3-4x times that size (possibly larger). Is there a limitation on the response size in Roxie?\", \"post_time\": \"2013-07-19 19:00:57\" },\n\t{ \"post_id\": 4341, \"topic_id\": 969, \"forum_id\": 14, \"post_subject\": \"Re: Roxie retries\", \"username\": \"flavio\", \"post_text\": \"Dan, what type of network do you have between the nodes? 143KB of data flowing to/from the farmer in a short period of time could cause contention and packet loss for a GigE network and exhibit these symptoms. A packet capture (tcpdump) should be able to show this.\\n\\nWould it be possible to reduce the result set, perhaps using JSON instead, or tokenizing some of the values?\\n\\nThanks,\\n\\nFlavio\", \"post_time\": \"2013-07-19 14:54:31\" },\n\t{ \"post_id\": 4330, \"topic_id\": 969, \"forum_id\": 14, \"post_subject\": \"Roxie retries\", \"username\": \"DSC\", \"post_text\": \"I have a Roxie query that can return a large response (up to 400K). It is called via SOAP and directly to the Roxie servers (port 9876). The query bangs up against a distributed index, so ALLNODES is part of the ECL.\\n\\nI've focused on one sample that returns a known value that is a little over 143K in size. Intermittently, under load, a query will take a much longer time to return (it is typically subsecond, but can balloon out to many seconds). I've traced this back to retries, presumably due to something I found in roxie.log:\\n\\n00000BEE 2013-07-17 02:17:17 45834 38098 "UdpCollator: CMessageCollator::GetNextResult timeout, 1 partial results"\\n00000BEF 2013-07-17 02:17:17 45834 38098 "[10.210.40.18:9876{1}@51] Input has stalled - retry required?"\\n00000BF0 2013-07-17 02:17:17 45834 38098 "[10.210.40.18:9876{1}@51] Resending packet size=1359: uid=0x00000007 activityId=51 pri=LOW queryHash=3ef85ec\\n52ae9757e ch=3 seq=0 cont=0 server=10.210.40.18 retries=4001 BROADCAST"
\\nI also found many instances of the following, but I don't know if this is related:\\n\\n00000BF1 2013-07-17 02:17:17 45834 45897 "Abandoning missing message 1288 from sender 2 - sender no longer has it"
\\nWhen I say that this happens "under load" I mean that I can set up soapUI with one concurrent user, no downtime between requests, and execute the query for 60 seconds. Within that 60 seconds I will typically see 2-3 'retry' queries. If I increase the number of concurrent users the problem gets worse, but I don't know if it gets linearly worse.\\n\\nMy question is, is there a configuration change I can make that may alleviate this type of problem?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2013-07-17 15:29:32\" },\n\t{ \"post_id\": 4349, \"topic_id\": 971, \"forum_id\": 14, \"post_subject\": \"Re: Dependencies for 4.0 embedded languages\", \"username\": \"clo\", \"post_text\": \"Hi Dan,\\n\\nRegarding R on centos6. You'll probably want to make sure you have this package from the portal and make sure that the md5sums match (as there was a bit of a mix up that was resolved on Friday of last week) HPCC Platform Centos6/RHEL6 with plugins http://hpccsystems.com/download/free-co ... r-platform \\n\\n\\nAs far as the dependencies are concerned. You'll need a couple things.\\nhttp://cran.r-project.org/src/contrib/R ... 0.4.tar.gz\\nhttp://cran.r-project.org/src/contrib/R ... .10.tar.gz\\nas well as the epel repository for yum.\\n\\nAfter you have those things, you will want to install:\\nyum install R-core\\nyum install R-devel\\nR CMD INSTALL Rcpp_0.10.4.tar.gz\\nR CMD INSTALL RInside_0.2.10.tar.gz\\n\\nHope that helps.\", \"post_time\": \"2013-07-22 16:18:09\" },\n\t{ \"post_id\": 4348, \"topic_id\": 971, \"forum_id\": 14, \"post_subject\": \"Re: Dependencies for 4.0 embedded languages\", \"username\": \"DSC\", \"post_text\": \"I upgraded my 3.10.8-8 cluster to 4.0 (with plugins) and ran into a problem:\\n\\n00000028 2013-07-19 15:35:13.465 36869 36869 "Error loading /opt/HPCCSystems/plugins/libjavaembed.so: libmawt.so: cannot open shared object file: No such file or directory"\\n00000029 2013-07-19 15:35:13.465 36869 36869 "ERROR: 0: /var/lib/jenkins/workspace/CE-Candidate-4.0.0-1-with-plugins/CE/centos-6.4-x86_64/HPCC-Platform/common/dllserver/thorplugin.cpp(460) : Loading plugin : Failed to load plugin /opt/HPCCSystems/plugins/libjavaembed.so"
\\nThe above excerpt was taken from a roxie.log file.\\n\\nMost of the needed system libraries reside within /usr/lib/jvm/jre/lib/amd64/. On my systems, different versions of libmawt.so existed within subdirectories named 'headless', 'xawt' and 'motif21'. I created a symlink to the 'xawt' version:\\n\\ncd /usr/lib/jvm/jre/lib/amd64\\nsudo ln -s xawt/libmawt.so .
\\nThat symlink allowed the plugin to start. I haven't fully tested this solution, however. Is there a different workaround that is known to work better? (I did try the headless version first, but then there was a complaint about a missing symbol.)\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2013-07-22 16:08:06\" },\n\t{ \"post_id\": 4345, \"topic_id\": 971, \"forum_id\": 14, \"post_subject\": \"Re: Dependencies for 4.0 embedded languages\", \"username\": \"DSC\", \"post_text\": \"Excellent! I'll review this as well as pass this along to my team.\\n\\nThank you very much!\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-07-19 19:03:20\" },\n\t{ \"post_id\": 4343, \"topic_id\": 971, \"forum_id\": 14, \"post_subject\": \"Re: Dependencies for 4.0 embedded languages\", \"username\": \"arjuna chala\", \"post_text\": \"Dan,\\n\\nPlease take a look at \\nhttps://github.com/arjunachala/arjunachala.github.io/wiki. Granted, my wiki post was for Ubuntu 13.04 but Redhat should not be too different.\\n\\nHope this helps.\\n\\nThanks\\n\\nArjuna\", \"post_time\": \"2013-07-19 15:15:55\" },\n\t{ \"post_id\": 4342, \"topic_id\": 971, \"forum_id\": 14, \"post_subject\": \"Re: Dependencies for 4.0 embedded languages\", \"username\": \"flavio\", \"post_text\": \"Dan, Arjuna and Dinesh have been working on documenting the requirements and configuration, and should be able to provide guidance in this respect.\\n\\nFlavio\", \"post_time\": \"2013-07-19 14:55:49\" },\n\t{ \"post_id\": 4337, \"topic_id\": 971, \"forum_id\": 14, \"post_subject\": \"Dependencies for 4.0 embedded languages\", \"username\": \"DSC\", \"post_text\": \"Is there any information on the dependencies that will need to be installed to support the other languages? I'm most interested in RPMs under RHEL6, for Java.\\n\\nI found a Jira entry talking about a number of R packages to install, but because it was actually a bug report it's pretty unclear as to exactly what the required packages should be.\\n\\nAlternatively, is there an easy way to determine what needs to be installed via the command line?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2013-07-19 13:16:58\" },\n\t{ \"post_id\": 6134, \"topic_id\": 1009, \"forum_id\": 14, \"post_subject\": \"Re: Spray Error Invalid IP\", \"username\": \"rqg0717\", \"post_text\": \"[quote="bforeman":1o33joly]I restarted my router and the problem solved! Cannot believe it it is a Network Problem.
\\n\\nWell, obviously it was Glad you got it sorted out!\\n\\nRegards,\\n\\nBob\\n\\nThanks again for you help, Bob. I really appreciate it.\", \"post_time\": \"2014-07-29 17:23:53\" },\n\t{ \"post_id\": 6132, \"topic_id\": 1009, \"forum_id\": 14, \"post_subject\": \"Re: Spray Error Invalid IP\", \"username\": \"bforeman\", \"post_text\": \"
I restarted my router and the problem solved! Cannot believe it it is a Network Problem.
\\n\\nWell, obviously it was Glad you got it sorted out!\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-07-29 12:00:41\" },\n\t{ \"post_id\": 6130, \"topic_id\": 1009, \"forum_id\": 14, \"post_subject\": \"Re: Spray Error Invalid IP\", \"username\": \"rqg0717\", \"post_text\": \"[quote="bforeman":36zkmtuh]Can the person that told you the cluster was operating normally do a ping from your machine to the Dali server? You might be blocked in some way.\\n\\nThe person who set up your cluster needs to probably take a look at the configuration of their system to see if it was set up properly. I have a feeling that they have multiple network connections for that one box and he didn’t specify properly in the environment.conf file which network card to use.\\n\\nRegards,\\n\\nBob\\n\\nI restarted my router and the problem solved! Cannot believe it it is a Network Problem.\", \"post_time\": \"2014-07-28 20:32:06\" },\n\t{ \"post_id\": 6129, \"topic_id\": 1009, \"forum_id\": 14, \"post_subject\": \"Re: Spray Error Invalid IP\", \"username\": \"bforeman\", \"post_text\": \"Can the person that told you the cluster was operating normally do a ping from your machine to the Dali server? You might be blocked in some way.\\n\\nThe person who set up your cluster needs to probably take a look at the configuration of their system to see if it was set up properly. I have a feeling that they have multiple network connections for that one box and he didn’t specify properly in the environment.conf file which network card to use.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-07-28 20:02:50\" },\n\t{ \"post_id\": 6128, \"topic_id\": 1009, \"forum_id\": 14, \"post_subject\": \"Re: Spray Error Invalid IP\", \"username\": \"rqg0717\", \"post_text\": \"[quote="bforeman":1vj13p00]No problem, what you are getting is highly unusual. If you can connect to the ECL Watch, the spray should just "work". I'm sure it's a bad configuration or possibly a firewall issue. I have emailed some of the techs here and will reply again if something jumps out.\\n\\nOr did you finally get it to work?
\\n\\n\\nBob\\n\\nNo luck still not working, and I was told the cluster is running normally. Dali, dfuserver, and dafilesrv are running.\", \"post_time\": \"2014-07-28 19:59:13\" },\n\t{ \"post_id\": 6127, \"topic_id\": 1009, \"forum_id\": 14, \"post_subject\": \"Re: Spray Error Invalid IP\", \"username\": \"bforeman\", \"post_text\": \"No problem, what you are getting is highly unusual. If you can connect to the ECL Watch, the spray should just "work". I'm sure it's a bad configuration or possibly a firewall issue. I have emailed some of the techs here and will reply again if something jumps out.\\n\\nOr did you finally get it to work?
\\n\\n\\nBob\", \"post_time\": \"2014-07-28 19:54:30\" },\n\t{ \"post_id\": 6126, \"topic_id\": 1009, \"forum_id\": 14, \"post_subject\": \"Re: Spray Error Invalid IP\", \"username\": \"rqg0717\", \"post_text\": \"[quote="bforeman":1elzv7pg]It looks like the cluster may be configured incorrectly. Can you contact the person who set up the cluster and show them your error. They are probably missing a step in the cluster configuration, or it is possible that the Dali Server may not be started.\\n\\nRegards,\\n\\nBob\\n\\n\\nOkay, I got it. Thanks so much for your help.\", \"post_time\": \"2014-07-28 19:46:40\" },\n\t{ \"post_id\": 6125, \"topic_id\": 1009, \"forum_id\": 14, \"post_subject\": \"Re: Spray Error Invalid IP\", \"username\": \"rqg0717\", \"post_text\": \"I just tired to use dfuplus to spray the data; however, I got this error: ERROR: Esp server url not specified.\\n\\n[quote="bforeman":1pigwx9f]Try using the srcip argument, like this:\\n\\n
//fixed spray example:\\ndfuplus action=spray srcip=10.150.50.14\\nsrcfile=c:\\\\import\\\\timezones.txt dstname=RTTEMP::timezones.txt\\ndstcluster=thor format=fixed recordsize=155
\\n\\n...where srcip is the IP address of your target cluster.\\n\\nRegards,\\n\\nBob[/quote]\", \"post_time\": \"2014-07-28 19:45:24\" },\n\t{ \"post_id\": 6124, \"topic_id\": 1009, \"forum_id\": 14, \"post_subject\": \"Re: Spray Error Invalid IP\", \"username\": \"bforeman\", \"post_text\": \"It looks like the cluster may be configured incorrectly. Can you contact the person who set up the cluster and show them your error. They are probably missing a step in the cluster configuration, or it is possible that the Dali Server may not be started.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-07-28 19:43:57\" },\n\t{ \"post_id\": 6123, \"topic_id\": 1009, \"forum_id\": 14, \"post_subject\": \"Re: Spray Error Invalid IP\", \"username\": \"rqg0717\", \"post_text\": \"[quote="bforeman":12swxyqy]OK, and what is the IP address of your ECL Watch? It sounds like there is a problem with the cluster you are working with. The Dali server could actually be down. Are you connecting to a cluster that someone set up for you or are you using an HPCC VM cluster?\\n\\nI am connecting to a cluster that someone set up for me. The IP address of the cluster is 192.168.1.101 and I am using http://192.168.1.101:8010/ to access ECL Watch. Thank you.\", \"post_time\": \"2014-07-28 19:35:05\" },\n\t{ \"post_id\": 6122, \"topic_id\": 1009, \"forum_id\": 14, \"post_subject\": \"Re: Spray Error Invalid IP\", \"username\": \"bforeman\", \"post_text\": \"OK, and what is the IP address of your ECL Watch? It sounds like there is a problem with the cluster you are working with. The Dali server could actually be down. Are you connecting to a cluster that someone set up for you or are you using an HPCC VM cluster?\", \"post_time\": \"2014-07-28 19:31:22\" },\n\t{ \"post_id\": 6121, \"topic_id\": 1009, \"forum_id\": 14, \"post_subject\": \"Re: Spray Error Invalid IP\", \"username\": \"rqg0717\", \"post_text\": \"After I click button "Spray", you can see the error msg in attached pics. \\n\\n\\n[quote="bforeman":26zmsjs7]OK, I see, you are trying to spray from the landing zone. \\n\\nYou should not need the DFUPlus command line tool to do this, you should be able to spray directly using the Fixed option shown on your screen shot. \\n\\nYou are trying to spray the file shown on the landing zone, correct? (OriginalPerson)\\n\\nThat file has a Fixed Length record. In the Fixed Spray option, you only need to enter what is shown on the top of page 9, and then press the Spray button.\\n\\nWhen do you see this error?\", \"post_time\": \"2014-07-28 19:28:12\" },\n\t{ \"post_id\": 6120, \"topic_id\": 1009, \"forum_id\": 14, \"post_subject\": \"Re: Spray Error Invalid IP\", \"username\": \"rqg0717\", \"post_text\": \"[quote="bforeman":2jom42gv]OK, I see, you are trying to spray from the landing zone. \\n\\nYou should not need the DFUPlus command line tool to do this, you should be able to spray directly using the Fixed option shown on your screen shot. \\n\\nYou are trying to spray the file shown on the landing zone, correct? (OriginalPerson)\\n\\nThat file has a Fixed Length record. In the Fixed Spray option, you only need to enter what is shown on the top of page 9, and then press the Spray button.\\n\\nWhen do you see this error?\", \"post_time\": \"2014-07-28 19:26:02\" },\n\t{ \"post_id\": 6119, \"topic_id\": 1009, \"forum_id\": 14, \"post_subject\": \"Re: Spray Error Invalid IP\", \"username\": \"bforeman\", \"post_text\": \"OK, I see, you are trying to spray from the landing zone. \\n\\nYou should not need the DFUPlus command line tool to do this, you should be able to spray directly using the Fixed option shown on your screen shot. \\n\\nYou are trying to spray the file shown on the landing zone, correct? (OriginalPerson)\\n\\nThat file has a Fixed Length record. In the Fixed Spray option, you only need to enter what is shown on the top of page 9, and then press the Spray button.\\n\\nWhen do you see this error?\", \"post_time\": \"2014-07-28 19:15:19\" },\n\t{ \"post_id\": 6118, \"topic_id\": 1009, \"forum_id\": 14, \"post_subject\": \"Re: Spray Error Invalid IP\", \"username\": \"rqg0717\", \"post_text\": \"I am a new user and was doing the tutorial using ECL Watch (please see attached picture). Is dfuplus a programming tool please?\", \"post_time\": \"2014-07-28 19:03:04\" },\n\t{ \"post_id\": 6117, \"topic_id\": 1009, \"forum_id\": 14, \"post_subject\": \"Re: Spray Error Invalid IP\", \"username\": \"bforeman\", \"post_text\": \"In the dfuplus command line.\\n\\nWhere are you trying to spray from?\", \"post_time\": \"2014-07-28 18:57:11\" },\n\t{ \"post_id\": 6116, \"topic_id\": 1009, \"forum_id\": 14, \"post_subject\": \"Re: Spray Error Invalid IP\", \"username\": \"rqg0717\", \"post_text\": \"Where should I run the code please? in ECL Playground? Thanks.\\n\\n[quote="bforeman":1opbqwg0]Try using the srcip argument, like this:\\n\\n//fixed spray example:\\ndfuplus action=spray srcip=10.150.50.14\\nsrcfile=c:\\\\import\\\\timezones.txt dstname=RTTEMP::timezones.txt\\ndstcluster=thor format=fixed recordsize=155
\\n\\n...where srcip is the IP address of your target cluster.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-07-28 18:54:33\" },\n\t{ \"post_id\": 6113, \"topic_id\": 1009, \"forum_id\": 14, \"post_subject\": \"Re: Spray Error Invalid IP\", \"username\": \"bforeman\", \"post_text\": \"Try using the srcip argument, like this:\\n\\n//fixed spray example:\\ndfuplus action=spray srcip=10.150.50.14\\nsrcfile=c:\\\\import\\\\timezones.txt dstname=RTTEMP::timezones.txt\\ndstcluster=thor format=fixed recordsize=155
\\n\\n...where srcip is the IP address of your target cluster.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-07-28 18:18:51\" },\n\t{ \"post_id\": 6112, \"topic_id\": 1009, \"forum_id\": 14, \"post_subject\": \"Re: Spray Error Invalid IP\", \"username\": \"rqg0717\", \"post_text\": \"I got a similar error: Failed: Failed to connect to dafilesrv/daliservix on 10.109.96.143:7100\\n\\nWhere to change the IP of dafilesrv please?\", \"post_time\": \"2014-07-28 18:10:19\" },\n\t{ \"post_id\": 4507, \"topic_id\": 1009, \"forum_id\": 14, \"post_subject\": \"Re: Spray Error Invalid IP\", \"username\": \"bforeman\", \"post_text\": \"Hi Gordon,\\n\\nI see the same address when I use the ECL command line with packagemap - my work around there is to explicitly name my target IP using the -s parameter. I guess with DFU Plus you should be able to do that?\\n\\nBut if you are getting that error in the ECL watch interface, that sounds like a bug to me.\\n\\nI am using Community 4.0.0-9\\n\\nHTH,\\n\\nBob\", \"post_time\": \"2013-08-29 12:46:02\" },\n\t{ \"post_id\": 4502, \"topic_id\": 1009, \"forum_id\": 14, \"post_subject\": \"Spray Error Invalid IP\", \"username\": \"gsmith\", \"post_text\": \"I am getting the following error when spraying an XML file from my DropZone:\\n\\nFailed: Failed to connect to dafilesrv/daliservix on 192.168.1.37:7100\\n\\nI don't have a machine at 192.168.1.37 and cannot find that IP in the environment file, where might I change it?\", \"post_time\": \"2013-08-29 09:13:22\" },\n\t{ \"post_id\": 4549, \"topic_id\": 1015, \"forum_id\": 14, \"post_subject\": \"Re: Install of 4.0.0-6\", \"username\": \"soyyo\", \"post_text\": \"Hi Joe-\\n\\nSounds like the install went awry since:\\n\\n\\t1. You had to manually create links\\n\\n\\n\\t2. You had ulimit issues.\\n\\n\\t-check /etc/security/limits.conf; There should be some hpcc lines appended to the end.\\n\\n\\n1.Validate your MD5SUM on the rpm (perhaps download again).\\n\\n2.Copy the /etc/HPCCSystems/environment.xml to /tmp (just in case)\\n\\n2.Uninstall the rpm\\n\\n3.Validate that it was uninstalled\\n\\n4. Use Yum to install rpm again and resolve dependencies.\\n\\n\\nIf it installed correctly, your links should be there and the limits.conf fine should have the aforementioned entries.\\n\\n\\nHTH\\n\\nF\", \"post_time\": \"2013-09-06 15:30:00\" },\n\t{ \"post_id\": 4521, \"topic_id\": 1015, \"forum_id\": 14, \"post_subject\": \"Install of 4.0.0-6\", \"username\": \"jritz\", \"post_text\": \"I have two issues with the install of 4.0.0-6.\\n\\n1. When I restart the hpcc-init service from my thor master/service node, none of the dafilesrv processes start up on my thor slaves. I had to manually create the links in /etc/rc.d/init.d for dafilesrv along with hpcc-init. Not sure if I installed it wrong, but all I did was install the rpm and try to start the service. \\n\\n2. When trying to spay a file (any file) it throws the error ulimit:open files: cannot modify limit: Operation not permitted. I raised the soft and hard file limits to 4096 and that did not help. Any ideas?\\n\\nThank you,\\n\\nJoe Ritz\", \"post_time\": \"2013-09-03 15:40:13\" },\n\t{ \"post_id\": 4547, \"topic_id\": 1023, \"forum_id\": 14, \"post_subject\": \"Re: Changing ESP Log level\", \"username\": \"DSC\", \"post_text\": \"I think logging levels are determined on a per-process basis and managed within the configmgr. I just looked at them, though, and there doesn't seem to be a standard way of dealing with them and many processes don't seem to have settings exposed. Here is what I found:\\n\\n* Dali: 'msgLevel' (defaults to 100)\\n\\n* DFUServer: No logging options found.\\n\\n* ECL Agent: 'traceLevel' (defaults to 0)\\n\\n* ECL CC Server: 'traceLevel' (defaults to 1)\\n\\n* ECL Scheduler: No logging options found.\\n\\n* ESP: 'logLevel' (defaults to 1); 'logRequests' (defaults to false); 'logResponses' (defaults to false)\\n\\n* Roxie: 'enableSysLog' (defaults to true); lots of logging options in Tracing tab.\\n\\n* Sasha: Log directory, but no level-oriented options found.\\n\\n* Thor: No logging options found.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-09-06 11:59:01\" },\n\t{ \"post_id\": 4542, \"topic_id\": 1023, \"forum_id\": 14, \"post_subject\": \"Changing ESP Log level\", \"username\": \"vsreedharan\", \"post_text\": \"Hi,\\n\\nI am using HPCC Community edition, I could see that my ESP log file grows fast. I assume that it logs lots of debug / Info level logs. Is there a way to change/check the log level for my ESP Server? \\n\\nThanks in advance!\\n\\n\\nRegards,\\nSree\", \"post_time\": \"2013-09-05 17:13:13\" },\n\t{ \"post_id\": 4649, \"topic_id\": 1042, \"forum_id\": 14, \"post_subject\": \"Re: HPCC Environment: best option\", \"username\": \"rtaylor\", \"post_text\": \"can we install on Windows 8?
Good question -- to which someone who knows the answer will have to respond. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-09-26 15:19:44\" },\n\t{ \"post_id\": 4645, \"topic_id\": 1042, \"forum_id\": 14, \"post_subject\": \"Re: HPCC Environment: best option\", \"username\": \"DQ\", \"post_text\": \"Thx Richard!\\nIt does help to know that one is moving forward in the right direction.\\nAppreciate it.\\nOk, now that I'm on my way to installing the stuff...VM player, etc...the pdf says Windows XP, Vista, 7...can we install on Windows 8?\", \"post_time\": \"2013-09-25 23:26:28\" },\n\t{ \"post_id\": 4643, \"topic_id\": 1042, \"forum_id\": 14, \"post_subject\": \"Re: HPCC Environment: best option\", \"username\": \"rtaylor\", \"post_text\": \"DQ,\\n\\nGood idea. Starting with the VM gets you up and running with ECL and the HPCC environment really quickly. Once you've gotten a feel for it, moving your code to a real cluster is simple.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-09-25 19:02:30\" },\n\t{ \"post_id\": 4641, \"topic_id\": 1042, \"forum_id\": 14, \"post_subject\": \"Re: HPCC Environment: best option\", \"username\": \"DQ\", \"post_text\": \"Looked at the intro slide & it says 'VM'...so going with it.\", \"post_time\": \"2013-09-25 16:19:26\" },\n\t{ \"post_id\": 4632, \"topic_id\": 1042, \"forum_id\": 14, \"post_subject\": \"HPCC Environment: best option\", \"username\": \"DQ\", \"post_text\": \"Being a newbie, what would be the best way to create HPCC environment from the following:\\n1]An HPCC Systems Cloud Services cluster.\\n2]An AWS cluster.\\n3]Own cluster using Community Edition.\\n4]VM environment.\\nAppreciate any advice.\\nRgds.\\nDQ\", \"post_time\": \"2013-09-24 03:20:14\" },\n\t{ \"post_id\": 4793, \"topic_id\": 1044, \"forum_id\": 14, \"post_subject\": \"Re: What is solution for Graph\", \"username\": \"Rahul Jain\", \"post_text\": \"I have got the issue resolved. Below are some performance improvement factors -\\n\\nIssue 1 - JOIN existed within a transform. It hence read each record one by one instead of a complete dataset. Hence it was hitting the index key around 1000 times. \\nResolution - We brought the JOIN outside. Hence we read the index key only once.\\nConclusion - Try to keep out Transform outside a transform as far as possible.\\n\\nIssue 2 - Missing Keep and Limit in Joins\\nResolution - Added Keep and Limits wherever possible in the JOINS.\\n\\nIssue 3 - We were filtering the some of the dataset couple of times on different condition.\\nResolution - We did filtering in 1 go rather then filtering again and again wherever possible.\\n\\n\\nI have some questions now on graph as I have been working on New ECL Watch for sometime now -\\n\\nI see 4 words in RecordCount section of graph -Tiny, Few, Disk, Memory\\nWhat do they actually signify ?\", \"post_time\": \"2013-10-21 14:13:13\" },\n\t{ \"post_id\": 4654, \"topic_id\": 1044, \"forum_id\": 14, \"post_subject\": \"Re: What is solution for Graph\", \"username\": \"Rahul Jain\", \"post_text\": \"But Yes I can share few things with you - My Service is a kind of Batch.To test in Builder window I used 1 recordset. Below are some points from my analysis -\\n\\n1. Total Service Time with 1 recordset as per graph is - 84.976. ( I think its in sec)\\n2. Within Timing Tab all graph looks good except the last subgraph which takes almost 99% of total graph - 84.077.Heat map is RED.So I believe the culprit is this subgraph. But this graph is too big and complicated to analyze.\\n3. I do see time for subgraph -84.077 but no timings for 100 of subgraphs within subgraph.\\n4. Hence as there are no timing I used recordsize some other probable means to analyze.I see Record size as 2438 and record count as [0..?]DISK within Activities tab. I also see other text as FEW,TINY. I am not sure that what they mean. Also I guess that label DISK is bad. Am I correct?\\n5.The Code associated with the recordsize 2438 shows up in the ECL column of activities. Its a project using complicated functions within transform. But whole point is I do not have any relevant point to believe what exactly is the issue within the subgraph.\", \"post_time\": \"2013-09-27 16:04:26\" },\n\t{ \"post_id\": 4653, \"topic_id\": 1044, \"forum_id\": 14, \"post_subject\": \"Re: What is solution for Graph\", \"username\": \"Rahul Jain\", \"post_text\": \"I am sorry but I cannot share the file due to certain policies over this forum.\", \"post_time\": \"2013-09-27 15:40:27\" },\n\t{ \"post_id\": 4640, \"topic_id\": 1044, \"forum_id\": 14, \"post_subject\": \"Re: What is solution for Graph\", \"username\": \"gsmith\", \"post_text\": \"If you can, would you open the graph that was causing the issue in the Tech Preview and select Advanced and Show XGMML and send me a copy of the XGMML and I will add to my list of "nasties" and see if I can see what is causing it?\", \"post_time\": \"2013-09-25 13:38:16\" },\n\t{ \"post_id\": 4639, \"topic_id\": 1044, \"forum_id\": 14, \"post_subject\": \"Re: What is solution for Graph\", \"username\": \"Rahul Jain\", \"post_text\": \"Hi,\\n\\nI have already tried below things for Graph-\\n1. IE -8/9/10. and Mozilla 24 ( Both New ECL Watch and old ECL Watch). - Attached screenshot.\\n2. In ECL IDE also it breaks.\\n\\nThis are the option which worked for me- \\n1. Go to new ECL Watch Graphs tab. Enable checkbox for the required graph (Ex. graph2) and click on Open(Safe mode). That's helps and I am able to see \\n2. Use Playground.\", \"post_time\": \"2013-09-25 13:35:17\" },\n\t{ \"post_id\": 4635, \"topic_id\": 1044, \"forum_id\": 14, \"post_subject\": \"Re: What is solution for Graph\", \"username\": \"gsmith\", \"post_text\": \"The bad news:\\nThere are some graphs which will simply break the graph layout algorithm (we re-use the AT&T graphviz software for the actual layout, which sometimes just chokes and dies).\\n\\nThe good news:\\nIn the 4.0.2 HPCC Platform version there is a "Tech Preview" of the next ECL Watch, in there the "new" graphs page has several options to limit the initial size of the graph, which should let the user still navigate it while not trying to layout and display the entire thing.\\n\\nYou can open the new ECL Watch via the old ECL Watch, look for "Tech Preview" on the left hand side.\", \"post_time\": \"2013-09-25 08:38:01\" },\n\t{ \"post_id\": 4634, \"topic_id\": 1044, \"forum_id\": 14, \"post_subject\": \"What is solution for Graph\", \"username\": \"Rahul Jain\", \"post_text\": \"Hi ,\\n\\nI am unable to open the graph anywhere. Below are the details - \\nECL IDE Version - community_4.0.0-2\\nOS - Windows 7.\\n\\nIn ECL IDE when I try to open the graph I get below error -\\n"ECL IDE Application has stopped working"\\nIn IE -10 and IE -8 IE stops and finally breaks\\nIn Mozilla - It says plugin crashed.\\n\\nI have the graph control view plugin.\", \"post_time\": \"2013-09-24 20:04:53\" },\n\t{ \"post_id\": 4662, \"topic_id\": 1047, \"forum_id\": 14, \"post_subject\": \"Re: Authentication\", \"username\": \"gsmith\", \"post_text\": \"Just checked and I do have the following line:\\n<Authenticate method="htpasswd" htpasswdFile="/etc/HPCCSystems/.htpasswd" workunitsBasedn="ou=workunits,ou=ecl">\\n\\nAnd ECL Watch does require me to login, but once logged in, when I click on Add User and complete the form and click Submit I get the following error:\\n\\nException(s) occurred:\\nReporter: ws_access::ADDUSER()\\nCode\\tMessage\\n20030\\t2013-09-27 20:17:28 GMT: Security manager is not found. Please check if the system authentication is set up correctly\", \"post_time\": \"2013-09-28 06:29:14\" },\n\t{ \"post_id\": 4655, \"topic_id\": 1047, \"forum_id\": 14, \"post_subject\": \"Re: Authentication\", \"username\": \"william.whitehead\", \"post_text\": \"First try looking in the ESP configuration file, located in /var/lib/HPCCSystems/myesp/esp.xml for the following string. \\n\\n<Authenticate method="htpasswd" htpasswdFile="/etc/HPCCSystems/.htpasswd"\\n\\nIf its not there but you did configure htpasswd, then its possible you did not copy the modified environment file from /etc/HPCCSystems/source/environment.xml to /etc/HPCCSystems/environment.xml. Hope that helps\", \"post_time\": \"2013-09-27 18:12:56\" },\n\t{ \"post_id\": 4651, \"topic_id\": 1047, \"forum_id\": 14, \"post_subject\": \"Authentication\", \"username\": \"gsmith\", \"post_text\": \"I have enabled htpasswd authentication per the docs and added one user via the htpasswd tool (so that I can log into ECL Watch)\\n\\nNow when I try to add a user I get the following error:\\n\\n
Exception(s) occurred:\\n\\nReporter: ws_access::ADDUSER()\\n\\nCode\\tMessage\\n20030\\t2013-09-26 15:57:31 GMT: Security manager is not found. Please check if the system authentication is set up correctly
\\n\\nWhat am I missing?\", \"post_time\": \"2013-09-26 16:21:05\" },\n\t{ \"post_id\": 4753, \"topic_id\": 1068, \"forum_id\": 14, \"post_subject\": \"Re: unable to communicate with ECL Window\", \"username\": \"david.wheelock\", \"post_text\": \"Abhi,\\n\\nThere is a known issue in the VM instance where the IP address it presents to you is not the one you should be using to connect. If you open a command prompt and run ifconfig, it will give you the actual IP address of the VM. This is the address you want to use to connect.\\n\\n- David\", \"post_time\": \"2013-10-11 14:08:39\" },\n\t{ \"post_id\": 4751, \"topic_id\": 1068, \"forum_id\": 14, \"post_subject\": \"Re: unable to communicate with ECL Window\", \"username\": \"bforeman\", \"post_text\": \"Abhi, I just downloaded the 64-bit VM for 4.0.2-2 and it is working perfectly - IP address given connects fine.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-10-11 12:46:29\" },\n\t{ \"post_id\": 4749, \"topic_id\": 1068, \"forum_id\": 14, \"post_subject\": \"Re: unable to communicate with ECL Window\", \"username\": \"bforeman\", \"post_text\": \"Try bumping it up two numbers, my VM Image shows 192.168.229.128 and I am connecting at 192.168.229.130:8010. I am using 4.0.0-9\", \"post_time\": \"2013-10-11 12:19:02\" },\n\t{ \"post_id\": 4748, \"topic_id\": 1068, \"forum_id\": 14, \"post_subject\": \"Re: unable to communicate with ECL Window\", \"username\": \"abhi.datta\", \"post_text\": \"hi Bob - thanks for your response. i tried changing the URL as suggested but no luck \\n\\nAbhi\", \"post_time\": \"2013-10-11 12:13:46\" },\n\t{ \"post_id\": 4746, \"topic_id\": 1068, \"forum_id\": 14, \"post_subject\": \"Re: unable to communicate with ECL Window\", \"username\": \"bforeman\", \"post_text\": \"Hi Abhi,\\n\\nTry changing the last number of your IP address up or down one number. I saw this problem on my Windows 7 Enterprise machine and that fixed it.\\n\\nFor example, the VM was showing 192.168.229.129 and I needed to change my URL to 192.168.229.130:8010 and I was able to connect OK.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-10-11 12:07:28\" },\n\t{ \"post_id\": 4745, \"topic_id\": 1068, \"forum_id\": 14, \"post_subject\": \"unable to communicate with ECL Window\", \"username\": \"abhi.datta\", \"post_text\": \"I am using the HPCC VM Image 4.0.2 , and VMPlayer 5.0.1\\n\\nThe ECL WATCH URL displayed by VM after loading the first time doesnt work\\n\\nso when i put http;//192.168.xxx.xxx:8010 I get the response Connection refused\", \"post_time\": \"2013-10-11 11:03:09\" },\n\t{ \"post_id\": 4899, \"topic_id\": 1105, \"forum_id\": 14, \"post_subject\": \"Re: HPCC compatiable with NIC teaming?\", \"username\": \"flavio\", \"post_text\": \"Markham,\\n\\nyes, it does.\\n\\nI should be able to find some information on the way we configure certain environments with teaming. \\n\\nIn general, we tend not to use teaming for the following reasons:\\n\\n1.\\tFor Roxie environments, which are 24x7 critical online and exposed to real time customer access, we prefer to have at least 2 (or 3) fully independent clusters and switches located in different areas of our datacenters (the only shared component is a pair of F5 load balancers in active/standby configuration providing round-robin load balancing of transactions to all nodes, and performing health checks), to ensure that even a catastrophic event in an area of the datacenter will not completely affect all our production systems. Even with teaming, a localized event (for example, heat or power) in the racks containing the Roxie nodes for a single cluster could bring your cluster down despite the different network switches;\\n\\n2.\\tFor Thor environments, since their workload is batch oriented, the downtime as a consequence of a potential switch port-blade failure can be easily tolerated. We have multiple Thor environments (and both, Thor and Roxie are capable of remotely reading data from other environments), so even if a Thor system is down for an hour, the overall impact on the data preparation process is very small.\\n\\nHaving said this, there are HPCC users and commercial customers who prefer to have a dual switch configuration, so we have validated the teaming setup in the past. HPCC doesn’t really care how the teaming is accomplished as long as the underlying operating system provides a single virtual NIC to bind ports to (all teaming setups provide this), and the failover process is transparent to user space (again, this is a basic property of all teaming setups). In sum, an standard interconnect supporting TCP and UDP would work (Ethernet or Infiniband).\\n\\nFlavio\", \"post_time\": \"2013-11-08 14:14:15\" },\n\t{ \"post_id\": 4896, \"topic_id\": 1105, \"forum_id\": 14, \"post_subject\": \"HPCC compatiable with NIC teaming?\", \"username\": \"mgreen\", \"post_text\": \"Hello Everybody,\\n\\nIs NIC teaming compatiable with HPCC? Any information would be great! Thanks in advance!!\", \"post_time\": \"2013-11-08 02:38:18\" },\n\t{ \"post_id\": 5037, \"topic_id\": 1138, \"forum_id\": 14, \"post_subject\": \"Re: Roxie Query Returning only 100 Records\", \"username\": \"bforeman\", \"post_text\": \"Hi Bhagwant,\\n\\nYes, that is correct. You can set the limit globally, or control it individually for each workunit. I always opt for the latter, as removing the limit for all queries can quickly use up resources on Dali, depending on how active your cluster is.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-12-10 12:33:48\" },\n\t{ \"post_id\": 5035, \"topic_id\": 1138, \"forum_id\": 14, \"post_subject\": \"Re: Roxie Query Returning only 100 Records\", \"username\": \"Bhagwant\", \"post_text\": \"Hi Bob,\\nActually i am fetching the Results using Web Service.\\nWe Changed the limit in ECL IDE Preferences from 100 to 0 and then again compiled and Published the Query.This solved our problem but wanted to know is this a proper way ?\\n\\nRegards,\\nBhagwant Bhobe\", \"post_time\": \"2013-12-10 05:45:51\" },\n\t{ \"post_id\": 5030, \"topic_id\": 1138, \"forum_id\": 14, \"post_subject\": \"Re: Roxie Query Returning only 100 Records\", \"username\": \"bforeman\", \"post_text\": \"How are you viewing the results? In the ECL IDE, there is a default setting of 100 records for each workunit set in the Results tab in your preferences, and I believe also in the WS ECL service there is a limit. You can bump up this setting to whatever you like, or to see everything simply OUTPUT the query result.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-12-06 20:52:39\" },\n\t{ \"post_id\": 5028, \"topic_id\": 1138, \"forum_id\": 14, \"post_subject\": \"Roxie Query Returning only 100 Records\", \"username\": \"Bhagwant\", \"post_text\": \"Hi,\\nWe have written Roxie Query, but when we execute the Query the Result Set which is returned is limited to 100 Records,Is there any configuration changes that is required if we want that Roxie query to written the entire dataset.\\n\\nRegards,\\nBhagwant Bhobe\", \"post_time\": \"2013-12-06 09:03:46\" },\n\t{ \"post_id\": 5034, \"topic_id\": 1140, \"forum_id\": 14, \"post_subject\": \"Cannot connect to eclwatch browser interface\", \"username\": \"es335sg\", \"post_text\": \"Hello, \\nI am attempting the most rudimentary functions of a basic single node HPCC install and encountering some issues. \\n\\nI have installed :\\nhpccsystems-platform-community-4.0.22.x86_64 on rhel 6.1\\n\\nstarted \\n/sbin/service hpcc-init start\\n\\nI then attempt to access the eclwatch web interface and fail\\n\\n\\niptables are turned off: \\n\\nChain INPUT (policy ACCEPT)\\ntarget prot opt source destination\\n\\nChain FORWARD (policy ACCEPT)\\ntarget prot opt source destination\\n\\nChain OUTPUT (policy ACCEPT)\\ntarget prot opt source destination\\n\\nnetstat indicates:\\ntcp 0 0 0.0.0.0:8010 0.0.0.0:* LISTEN 8827/esp\\n\\nI can : telnet 111.222.333.444 8010\\n\\nEX:\\n telnet mylab1 8010\\nTrying 111.222.333.444\\nConnected to mylab (111.222.333.444).\\nEscape character is '^]'.\\n\\nnetstat -tanp | grep 8010\\ntcp 0 0 0.0.0.0:8010 0.0.0.0:* LISTEN 8827/esp\\ntcp 0 0 111.222.333.444:8010 111.222.333.555:41430 ESTABLISHED 8827/esp <-- My telnet attempt\\n\\nI beleive I am not being blocked by my firewall. \\n\\nAny suggestions ? \\n\\nThx\\nSG\", \"post_time\": \"2013-12-09 18:58:23\" },\n\t{ \"post_id\": 5065, \"topic_id\": 1150, \"forum_id\": 14, \"post_subject\": \"Re: Set up HPCC without root privilege\", \"username\": \"ming\", \"post_text\": \"As I know the user should be in sudo list if not root. Installation (rpm/deb) and start/stop Linux service require run as root or through sudo\", \"post_time\": \"2013-12-18 12:40:33\" },\n\t{ \"post_id\": 5061, \"topic_id\": 1150, \"forum_id\": 14, \"post_subject\": \"Set up HPCC without root privilege\", \"username\": \"linhbngo\", \"post_text\": \"Is it possible to set up HPCC without root privilege in a manner similar to Hadoop/MR (just call on executable to start services)\", \"post_time\": \"2013-12-17 22:38:11\" },\n\t{ \"post_id\": 5216, \"topic_id\": 1160, \"forum_id\": 14, \"post_subject\": \"Re: unable to insall hpcc pkg\", \"username\": \"michael.krumlauf@lexisnexis.com\", \"post_text\": \"Here's a quote from page 17 of the Installing and Running the HPCC Platform - HPCC Installation and Startup documentation:\\n\\n
The installation and package that you download is different depending on the operating system you plan to use. The\\ninstallation packages will fail to install if their dependencies are missing from the target system.\\n
\\n\\nSo you must install the dependencies on your own. I've been down this path as well.\", \"post_time\": \"2014-02-17 16:01:31\" },\n\t{ \"post_id\": 5095, \"topic_id\": 1160, \"forum_id\": 14, \"post_subject\": \"unable to insall hpcc pkg\", \"username\": \"ankit_kailaswar\", \"post_text\": \"I am trying to install hpcc on Ubuntu 13.1. I am getting dependency error as,\\n\\n{code}\\ndpkg -i hpccsystems-platform_community-4.2.0-3saucy_amd64.deb Selecting previously unselected package hpccsystems-platform.\\n(Reading database ... 56670 files and directories currently installed.)\\nUnpacking hpccsystems-platform (from hpccsystems-platform_community-4.2.0-3saucy_amd64.deb) ...\\ndpkg: dependency problems prevent configuration of hpccsystems-platform:\\n hpccsystems-platform depends on libboost-regex1.53.0; however:\\n Package libboost-regex1.53.0 is not installed.\\n hpccsystems-platform depends on libicu48; however:\\n Package libicu48 is not installed.\\n hpccsystems-platform depends on libxalan-c111; however:\\n Package libxalan-c111 is not installed.\\n hpccsystems-platform depends on libxerces-c3.1; however:\\n Package libxerces-c3.1 is not installed.\\n hpccsystems-platform depends on binutils; however:\\n Package binutils is not installed.\\n hpccsystems-platform depends on g++; however:\\n Package g++ is not installed.\\n hpccsystems-platform depends on expect; however:\\n Package expect is not installed.\\n hpccsystems-platform depends on libarchive13; however:\\n Package libarchive13 is not installed.\\n\\ndpkg: error processing hpccsystems-platform (--install):\\n dependency problems - leaving unconfigured\\nErrors were encountered while processing:\\n hpccsystems-platform\\n{code}\\nI have to do foce install with apt-get (apt-get -f install) before running "dpkg -i hpccsystems-platform_community-4.2.0-3saucy_amd64.deb" and I want to avoid that. Is there any way to do so ?\", \"post_time\": \"2013-12-27 11:25:37\" },\n\t{ \"post_id\": 5262, \"topic_id\": 1211, \"forum_id\": 14, \"post_subject\": \"Re: Connect directly to Roxie\", \"username\": \"David Dasher\", \"post_text\": \"Thanks Tony, that's excellent.\\n\\nI really appreciate your help.\\n\\nDavid\", \"post_time\": \"2014-02-20 21:41:38\" },\n\t{ \"post_id\": 5261, \"topic_id\": 1211, \"forum_id\": 14, \"post_subject\": \"Re: Connect directly to Roxie\", \"username\": \"anthony.fishbeck\", \"post_text\": \"Yes the WSDL is the same, and the URL format does not need to be changed.\\n\\nJust update the IP and port to that of the selected roxie node.\\n\\nRegards,\\nTony\", \"post_time\": \"2014-02-20 21:36:43\" },\n\t{ \"post_id\": 5260, \"topic_id\": 1211, \"forum_id\": 14, \"post_subject\": \"Re: Connect directly to Roxie\", \"username\": \"David Dasher\", \"post_text\": \"Many thanks for the replies Bob and Tony.\\n\\nDo you use the same wsdl and URL that you are using for ws_ecl but just on the Roxie farmer port?\\n\\nRegards\\n\\nDavid\", \"post_time\": \"2014-02-20 21:22:59\" },\n\t{ \"post_id\": 5254, \"topic_id\": 1211, \"forum_id\": 14, \"post_subject\": \"Re: Connect directly to Roxie\", \"username\": \"anthony.fishbeck\", \"post_text\": \"Hi David,\\n\\nYes, you can send SOAP (or JSON) requests directly to ROXIE (other formats supported by WsEcl cannot currently be sent directly).\\n\\nA SOAP request can be sent to any of the roxie "farmer" nodes. By default the farmers will be listening on port 9876.\\n\\nThe main concern is load balancing. You'll want to consider distributing requests to different nodes on the cluster. You can do this in your application, or by using some sort of external load balancer.\\n\\nRegards,\\nTony\", \"post_time\": \"2014-02-20 15:02:44\" },\n\t{ \"post_id\": 5250, \"topic_id\": 1211, \"forum_id\": 14, \"post_subject\": \"Re: Connect directly to Roxie\", \"username\": \"bforeman\", \"post_text\": \"Hi David,\\n\\nConnecting to ROXIE directly can be done using WSDL. In the WS_ECL service, you will see a button that displays the WSDL. The front-end can then connect directly to the query via WSDL, bypassing WS_ECL.\\n\\nThere are some other threads on this forum that goes into this process in more detail.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-02-20 14:29:41\" },\n\t{ \"post_id\": 5231, \"topic_id\": 1211, \"forum_id\": 14, \"post_subject\": \"Connect directly to Roxie\", \"username\": \"David Dasher\", \"post_text\": \"Hello\\n\\nWe are currently connecting our front end platform to WS_ECL via SOAP. Is there a way of connecting directly to Roxie without going via WS_ECL.\\n\\nThanks\\n\\nDavid\", \"post_time\": \"2014-02-19 14:10:44\" },\n\t{ \"post_id\": 5274, \"topic_id\": 1215, \"forum_id\": 14, \"post_subject\": \"Re: Single Server HPCC\", \"username\": \"dbd\", \"post_text\": \"soyyo,\\n\\nThank you!!!\\n\\n-dbd\", \"post_time\": \"2014-02-21 15:26:42\" },\n\t{ \"post_id\": 5272, \"topic_id\": 1215, \"forum_id\": 14, \"post_subject\": \"Re: Single Server HPCC\", \"username\": \"soyyo\", \"post_text\": \"The CentOS download from the website by default installs everything on one node and should work fine for very light usage. It does have to have access to internet in order to resolve dependencies.\\n\\nHTH\", \"post_time\": \"2014-02-21 15:07:14\" },\n\t{ \"post_id\": 5243, \"topic_id\": 1215, \"forum_id\": 14, \"post_subject\": \"Single Server HPCC\", \"username\": \"dbd\", \"post_text\": \"Folks,\\n\\nI was wondering if there is an equivalent way to install a single server version of HPCC running atop Centos, similar to the version that runs as a VM under Windows? I am not referring to another VM, but rather a cut down installation of the Linux-based package. This would be for pure evaluation and learning, clearly not for a real-world environment! I am simply trying to avoid items like an F2 load balancer, etc., at this point.\\n\\nRegards...\", \"post_time\": \"2014-02-19 21:38:09\" },\n\t{ \"post_id\": 5385, \"topic_id\": 1235, \"forum_id\": 14, \"post_subject\": \"Re: Failed dependencies: libboost_regex-mt.so.5\", \"username\": \"SuRFDownUnder\", \"post_text\": \"Hi Chris,\\n\\nThat worked. I now have the base node up and running. Thanks for the help.\\n\\nCheers,\\n Scott\", \"post_time\": \"2014-03-14 11:19:39\" },\n\t{ \"post_id\": 5384, \"topic_id\": 1235, \"forum_id\": 14, \"post_subject\": \"Re: Failed dependencies: libboost_regex-mt.so.5\", \"username\": \"SuRFDownUnder\", \"post_text\": \"Hi everyone,\\n\\nLooks like the entire SV crew is here. Good to hear from you all. As to the VM, I had that up and running, just wanted to get a bit more serious.\\n\\nCheers,\\n Scott\", \"post_time\": \"2014-03-14 08:12:10\" },\n\t{ \"post_id\": 5374, \"topic_id\": 1235, \"forum_id\": 14, \"post_subject\": \"Re: Failed dependencies: libboost_regex-mt.so.5\", \"username\": \"bforeman\", \"post_text\": \"Hi Scott,\\n\\nAn alternative idea for now to just to kick the tires on ECL would be to install the HPCC VM. It's easy and painless. Turns your computer into a single node THOR and ROXIE.\\n\\nAnd again, what Jim and Richard said, welcome aboard!\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-03-11 16:52:39\" },\n\t{ \"post_id\": 5373, \"topic_id\": 1235, \"forum_id\": 14, \"post_subject\": \"Re: Failed dependencies: libboost_regex-mt.so.5\", \"username\": \"JimD\", \"post_text\": \"Welcome Scott!\\n\\nGood to see you here!! I hope Chris helped and I hope you like what you see. \\n\\nIf you have any other questions, we'll be here!\\n\\nJim\", \"post_time\": \"2014-03-11 15:02:09\" },\n\t{ \"post_id\": 5371, \"topic_id\": 1235, \"forum_id\": 14, \"post_subject\": \"Re: Failed dependencies: libboost_regex-mt.so.5\", \"username\": \"clo\", \"post_text\": \"Hi Scott,\\n\\nI'm sorry you're having some issues installing the dependencies. It can be a bit cryptic and tricky at times.\\n\\nTry using this instead:\\n\\nsudo yum install -y boost-regex.x86_64\\n\\n\\nLet me know if that helps.\\n\\nChris\", \"post_time\": \"2014-03-11 14:48:46\" },\n\t{ \"post_id\": 5369, \"topic_id\": 1235, \"forum_id\": 14, \"post_subject\": \"Re: Failed dependencies: libboost_regex-mt.so.5\", \"username\": \"rtaylor\", \"post_text\": \"Scott,\\n\\nCan't help with your question, but I just wanted to welcome you to the HPCC community!\\n\\nHave fun with it, and any ECL questions you come up with, Bob and I will be glad to help.\\n\\nRichard Taylor\", \"post_time\": \"2014-03-11 01:49:10\" },\n\t{ \"post_id\": 5364, \"topic_id\": 1235, \"forum_id\": 14, \"post_subject\": \"Failed dependencies: libboost_regex-mt.so.5\", \"username\": \"SuRFDownUnder\", \"post_text\": \"I am building a machine to play with thor/roxie. I have installed CentOS 6.5 and am trying to install hpccsystems-platform_community-4.2.0-4.el6.x86_64.rpm. Using yum I have managed to install all the dependecies except for libboost_regex-mt.so.5()(64bit). When i do sudo yum -y install libboost_regex-mt.so.5 I get told "Package boost-regex-1.41.0-18.el6.i686 already installed and latest version". But when I do sudo rpm -Uvh /home/hpcc/Downloads/hpccsystems-platform_community-4.2.0-4.el6.x86_64.rpm I get the error "libboost_regex-mt.so.5()(64bit) is needed by hpccsystems-platform-community-4.2.04.x86_64"\\n\\nAny ideas how I can get past this?\", \"post_time\": \"2014-03-10 10:49:43\" },\n\t{ \"post_id\": 5466, \"topic_id\": 1259, \"forum_id\": 14, \"post_subject\": \"Re: ERR: Permission denied (password).\", \"username\": \"kmier\", \"post_text\": \"I re-installed and am no longer getting the error.\", \"post_time\": \"2014-04-03 16:03:36\" },\n\t{ \"post_id\": 5443, \"topic_id\": 1259, \"forum_id\": 14, \"post_subject\": \"Re: ERR: Permission denied (password).\", \"username\": \"kmier\", \"post_text\": \"I'm running on a single node Ubuntu 13.10 server.\", \"post_time\": \"2014-03-28 19:07:38\" },\n\t{ \"post_id\": 5442, \"topic_id\": 1259, \"forum_id\": 14, \"post_subject\": \"Re: ERR: Permission denied (password).\", \"username\": \"tlhumphrey2\", \"post_text\": \"Please, tell us a little about your server platform environment. Is your HPCC System a VM image or in an AWS cloud or Centos or Ubuntu, etc.?\", \"post_time\": \"2014-03-28 18:54:35\" },\n\t{ \"post_id\": 5441, \"topic_id\": 1259, \"forum_id\": 14, \"post_subject\": \"ERR: Permission denied (password).\", \"username\": \"kmier\", \"post_text\": \"When I attempt to spray a file I get the error ERR: Permission denied (password).\\nI don't have authentication enabled, never set a password and was never prompted for a password. what am I dong wrong?\", \"post_time\": \"2014-03-28 18:37:09\" },\n\t{ \"post_id\": 5486, \"topic_id\": 1263, \"forum_id\": 14, \"post_subject\": \"Re: Building HPCC-Platform on CygWin\", \"username\": \"tlhumphrey2\", \"post_text\": \"This comes from one of our expert hpcc architects:\\n\\nI suspect the best advice that can be given is to build it in a VM – even if you get the system to build you are not going to be able to run it – we don’t have the init scripts to start the system.\", \"post_time\": \"2014-04-08 13:35:39\" },\n\t{ \"post_id\": 5484, \"topic_id\": 1263, \"forum_id\": 14, \"post_subject\": \"Re: Building HPCC-Platform on CygWin\", \"username\": \"Keshav Shrikant\", \"post_text\": \"Hi,\\n I was able to build the project in Visual Studio after some effort but am still facing problems. I have to manually double click the executables to start the hpcc environment. The "daserver" runs without a problem but "dfuserver" starts and is then immediately terminated. Also upon running "eclagent" the following problem pops up:\\nThe ordinal 4540 could not be located in the Dynamic Link Library LIBEAY32.dll.
\\n\\n I have used Xalan, Xerces, Zlib, Openssl, ICU and OpenLDAP libraries during the build. I tried replacing every conflicting Libeay32.dll file from the one in the openssl library, but that didn't work. Could you please elaborate on what seems to be the problem?\\n\\nThanks,\\nKeshav\", \"post_time\": \"2014-04-08 12:02:07\" },\n\t{ \"post_id\": 5472, \"topic_id\": 1263, \"forum_id\": 14, \"post_subject\": \"Re: Building HPCC-Platform on CygWin\", \"username\": \"Keshav Shrikant\", \"post_text\": \"Hi,\\n So I tried building with Visual Studio 10. I downloaded the required libraries and ran the following command on the command prompt:\\n\\ncmake -DXALAN_LIBRARIES="D:\\\\Users\\\\Documents\\\\XALANCPKG-11-31-VC100\\\\lib\\\\*.lib" -DXALAN_INCLUDE_DIR="D:\\\\Users\\\\Documents\\\\XALANCPKG-11-31-VC100\\\\include" -DXERCES_LIBRARIES="D:\\\\Users\\\\Documents\\\\xerces-c-3.1.1-x86-windows-vc-10.0\\\\lib\\\\*.lib" -DXERCES_INCLUDE_DIR="D:\\\\Users\\\\Documents\\\\xerces-c-3.1.1-x86-windows-vc-10.0\\\\include" -DZLIB_LIBRARIES="D:\\\\Users\\\\Documents\\\\zlib-1.2.3\\\\lib\\\\zdll.lib" -DZLIB_INCLUDE_DIR="D:\\\\Users\\\\Documents\\\\zlib-1.2.3\\\\include" -DOPENLDAP_LIBRARIES="C:\\\\cygwin\\\\lib\\\\libldap_r.dll.a" -DOPENLDAP_INCLUDE_DIR="C:\\\\OpenLDAP\\\\schema" \\n-DICU_LIBRARIES="D:\\\\Users\\\\Documents\\\\icu\\\\lib\\\\icuuc.lib" \\n-DICU_INCLUDE_DIR="D:\\\\Users\\\\Documents\\\\icu\\\\include" \\n-DOPENSSL_LIBRARIES="C:\\\\OpenSSL-Win32\\\\*.dll" \\n-DOPENSSL_INCLUDE_DIR="C:\\\\OpenSSL-Win32" \\n-DMYSQL_LIBRARIES="C:\\\\Program Files\\\\Microsoft SQL Server\\\\100\\\\SDK\\\\Lib\\\\x86\\\\sqlncli10.lib" \\n-DMYSQL_INCLUDE_DIR="C:\\\\Program Files\\\\Microsoft SQL Server\\\\100\\\\SDK\\\\Include" -DMSVC10_REDIST_DIR="D:\\\\Users\\\\Documents" ..\\\\HPCC-Platform -G "Visual Studio 10"
\\n\\nIt generated the hpccsystems-platform solution file for visual studio which I subsequently built in the IDE. But that resulted in 80 projects being built successfully, 64 failing and 8 being skipped. Nonetheless, executables were created as described and on clicking eclccserver.exe the following error popped up:\\n\\nThe Procedure entry point\\n?decodeXML@@YAPBDPBDAAVStringBuffer@@IPAPBDPAUIEntityHelp\\ner@@@Z could not be located in the dynamic link library jlib.dll
\\n\\nThanks,\\nKeshav\", \"post_time\": \"2014-04-04 10:00:00\" },\n\t{ \"post_id\": 5467, \"topic_id\": 1263, \"forum_id\": 14, \"post_subject\": \"Re: Building HPCC-Platform on CygWin\", \"username\": \"ming\", \"post_text\": \"Yes, I can re-produce the error. Will try to find a solution/suggestion.\\nIt probably will take some effort to make HPCC Platform compiled on CYGWIN.\\nCompile with Visual Studio is OK assume all libraries are resolved but no package generated.\", \"post_time\": \"2014-04-03 16:12:28\" },\n\t{ \"post_id\": 5451, \"topic_id\": 1263, \"forum_id\": 14, \"post_subject\": \"Building HPCC-Platform on CygWin\", \"username\": \"Keshav Shrikant\", \"post_text\": \"Hi,\\n I have been trying to build the HPCC-Platform(https://github.com/hpcc-systems/HPCC-Platform) on my windows machine and am using cygwin for the same. After some effort, I was able to cmake the project to generate the makefiles. But the subsequent command- make- to build the generated makefiles generates the following error:\\n\\n$ make\\n[ 1%] Built target processor\\n[ 2%] Built target ProcessFiles-initfiles-bash-etc-init.d\\n[ 2%] Built target ProcessFiles-initfiles-bash-sbin\\n[ 2%] Built target ProcessFiles-initfiles-bash-sbin-deb\\n[ 2%] Built target ProcessFiles-initfiles-bin\\n[ 3%] Built target ProcessFiles-initfiles-sbin\\n[ 3%] Built target ProcessFiles-initfiles-componentfiles-ftslave\\n[ 4%] Built target ProcessFiles-initfiles-componentfiles-thor\\n[ 4%] Building CXX object tools/esdlcmd-xml/CMakeFiles/esdl-xml.dir/esdlgram.cpp.o\\n/home/keshav/hpcc-platform/tools/esdlcmd-xml/esdlgram.cpp:1:0: warning: -fPIC ignored for target (all code is position independent) [enabled by default]\\n /* A Bison parser, made by GNU Bison 2.7.12-4996. */\\n ^\\nIn file included from /home/keshav/hpcc/tools/esdlcmd-xml/esdl_utils.hpp:21:0,\\n from /home/keshav/hpcc/tools/esdlcmd-xml/esdlgram.y:11:\\n/home/keshav/hpcc/system/include/platform.h:248:0: warning: "_stdcall" redefined [enabled by default]\\n #define _stdcall\\n ^\\n/home/keshav/hpcc-platform/tools/esdlcmd-xml/esdlgram.cpp:1:0: note: this is the location of the previous definition\\n /* A Bison parser, made by GNU Bison 2.7.12-4996. */\\n ^\\nIn file included from /home/keshav/hpcc/tools/esdlcmd-xml/esdl_utils.hpp:21:0,\\n from /home/keshav/hpcc/tools/esdlcmd-xml/esdlgram.y:11:\\n/home/keshav/hpcc/system/include/platform.h:249:0: warning: "__stdcall" redefined [enabled by default]\\n #define __stdcall\\n ^\\n/home/keshav/hpcc-platform/tools/esdlcmd-xml/esdlgram.cpp:1:0: note: this is the location of the previous definition\\n /* A Bison parser, made by GNU Bison 2.7.12-4996. */\\n ^\\nIn file included from /home/keshav/hpcc/tools/esdlcmd-xml/esdl_utils.hpp:21:0,\\n from /home/keshav/hpcc/tools/esdlcmd-xml/esdlgram.y:11:\\n/home/keshav/hpcc/system/include/platform.h:250:0: warning: "_fastcall" redefined [enabled by default]\\n #define _fastcall\\n ^\\n/home/keshav/hpcc-platform/tools/esdlcmd-xml/esdlgram.cpp:1:0: note: this is the location of the previous definition\\n /* A Bison parser, made by GNU Bison 2.7.12-4996. */\\n ^\\nIn file included from /home/keshav/hpcc/tools/esdlcmd-xml/esdl_utils.hpp:21:0,\\n from /home/keshav/hpcc/tools/esdlcmd-xml/esdlgram.y:11:\\n/home/keshav/hpcc/system/include/platform.h:251:0: warning: "__fastcall" redefined [enabled by default]\\n #define __fastcall\\n ^\\nIn file included from /usr/include/stdio.h:35:0,\\n from /home/keshav/hpcc/tools/esdlcmd-xml/esdlgram.y:5:\\n/usr/include/sys/cdefs.h:377:0: note: this is the location of the previous definition\\n #define __fastcall __attribute__((__fastcall__))\\n ^\\nIn file included from /home/keshav/hpcc/tools/esdlcmd-xml/esdl_utils.hpp:21:0,\\n from /home/keshav/hpcc/tools/esdlcmd-xml/esdlgram.y:11:\\n/home/keshav/hpcc/system/include/platform.h:253:0: warning: "__cdecl" redefined [enabled by default]\\n #define __cdecl\\n ^\\n/home/keshav/hpcc-platform/tools/esdlcmd-xml/esdlgram.cpp:1:0: note: this is the location of the previous definition\\n /* A Bison parser, made by GNU Bison 2.7.12-4996. */\\n ^\\nIn file included from /home/keshav/hpcc/tools/esdlcmd-xml/esdl_utils.hpp:21:0,\\n from /home/keshav/hpcc/tools/esdlcmd-xml/esdlgram.y:11:\\n/home/keshav/hpcc/system/include/platform.h:305:0: warning: "__declspec" redefined [enabled by default]\\n #define __declspec(dllexport)\\n ^\\n/home/keshav/hpcc-platform/tools/esdlcmd-xml/esdlgram.cpp:1:0: note: this is the location of the previous definition\\n /* A Bison parser, made by GNU Bison 2.7.12-4996. */\\n ^\\nIn file included from /home/keshav/hpcc/tools/esdlcmd-xml/esdl_utils.hpp:21:0,\\n from /home/keshav/hpcc/tools/esdlcmd-xml/esdlgram.y:11:\\n/home/keshav/hpcc/system/include/platform.h:366:0: warning: "_O_BINARY" redefined [enabled by default]\\n #define _O_BINARY 0\\n ^\\nIn file included from /usr/include/sys/fcntl.h:3:0,\\n from /usr/include/fcntl.h:14,\\n from /home/keshav/hpcc/system/include/platform.h:290,\\n from /home/keshav/hpcc/tools/esdlcmd-xml/esdl_utils.hpp:21,\\n from /home/keshav/hpcc/tools/esdlcmd-xml/esdlgram.y:11:\\n/usr/include/sys/_default_fcntl.h:67:0: note: this is the location of the previous definition\\n #define _O_BINARY O_BINARY\\n ^\\nIn file included from /home/keshav/hpcc/tools/esdlcmd-xml/esdl_utils.hpp:21:0,\\n from /home/keshav/hpcc/tools/esdlcmd-xml/esdlgram.y:11:\\n/home/keshav/hpcc/system/include/platform.h:368:0: warning: "_O_TEXT" redefined [enabled by default]\\n #define _O_TEXT 0\\n ^\\nIn file included from /usr/include/sys/fcntl.h:3:0,\\n from /usr/include/fcntl.h:14,\\n from /home/keshav/hpcc/system/include/platform.h:290,\\n from /home/keshav/hpcc/tools/esdlcmd-xml/esdl_utils.hpp:21,\\n from /home/keshav/hpcc/tools/esdlcmd-xml/esdlgram.y:11:\\n/usr/include/sys/_default_fcntl.h:66:0: note: this is the location of the previous definition\\n #define _O_TEXT O_TEXT\\n ^\\nIn file included from /home/keshav/hpcc/tools/esdlcmd-xml/esdlgram.y:19:0:\\n/home/keshav/hpcc/tools/esdlcmd-xml/esdlcomp.h: In member function ‘void attribute::setNameF(const char*, ...)’:\\n/home/keshav/hpcc/tools/esdlcmd-xml/esdlcomp.h:197:49: error: ‘_vsnprintf’ was not declared in this scope\\n _vsnprintf(name_, MAX_IDENT,format, args);\\n ^\\nIn file included from /usr/include/stdlib.h:11:0,\\n from /home/keshav/hpcc/tools/esdlcmd-xml/esdlgram.y:4:\\n/home/keshav/hpcc-platform/tools/esdlcmd-xml/esdlgram.cpp: At global scope:\\n/home/keshav/hpcc-platform/tools/esdlcmd-xml/esdlgram.cpp:204:6: error: expected identifier before ‘void’\\n _VOID = 282,\\n ^\\n/home/keshav/hpcc-platform/tools/esdlcmd-xml/esdlgram.cpp:204:6: error: expected ‘}’ before ‘void’\\n/home/keshav/hpcc-platform/tools/esdlcmd-xml/esdlgram.cpp:204:12: error: expected unqualified-id before ‘=’ token\\n _VOID = 282,\\n ^\\n/home/keshav/hpcc-platform/tools/esdlcmd-xml/esdlgram.cpp:233:4: error: expected declaration before ‘}’ token\\n };\\n ^\\ntools/esdlcmd-xml/CMakeFiles/esdl-xml.dir/build.make:69: recipe for target 'tools/esdlcmd-xml/CMakeFiles/esdl-xml.dir/esdlgram.cpp.o' failed\\nmake[2]: *** [tools/esdlcmd-xml/CMakeFiles/esdl-xml.dir/esdlgram.cpp.o] Error 1\\nCMakeFiles/Makefile2:1617: recipe for target 'tools/esdlcmd-xml/CMakeFiles/esdl-xml.dir/all' failed\\nmake[1]: *** [tools/esdlcmd-xml/CMakeFiles/esdl-xml.dir/all] Error 2\\nMakefile:146: recipe for target 'all' failed\\nmake: *** [all] Error 2\\n\\n\\nI would be very grateful if someone could help rectify this problem.\\n\\nThank you,\\nKeshav\", \"post_time\": \"2014-04-02 12:48:48\" },\n\t{ \"post_id\": 5490, \"topic_id\": 1271, \"forum_id\": 14, \"post_subject\": \"HPCC 4.x Roxie using eclserver and MySQL repository\", \"username\": \"jwilt\", \"post_text\": \"Can HPCC 4+ Roxie be configured with eclserver, so that a "classic" MySQL repository can be used?\\n\\nIf so, can someone give details on what that configuration looks like?\\n\\nThanks.\", \"post_time\": \"2014-04-08 17:32:34\" },\n\t{ \"post_id\": 6245, \"topic_id\": 1279, \"forum_id\": 14, \"post_subject\": \"Re: HPCC LDAP authentication problem\", \"username\": \"ngurjar\", \"post_text\": \"Ahh. Got it.\\nActully they do not want to compile code on production machines.\\nIt will be really appreciable if the changes can come in earlier release.\\nPlease let me know if it is possible.\\n\\nThanks & Regards\\nNeelesh\", \"post_time\": \"2014-08-21 01:10:57\" },\n\t{ \"post_id\": 6243, \"topic_id\": 1279, \"forum_id\": 14, \"post_subject\": \"Re: HPCC LDAP authentication problem\", \"username\": \"william.whitehead\", \"post_text\": \"Neelesh, it looks like the fix won't be part of a build until probably 5.2. You could always pull the source and rebuild it yourself? Otherwise I will talk to the release lead about merging it into an earlier release.\\n\\nThanks,\\nRuss Whitehead\", \"post_time\": \"2014-08-20 18:06:18\" },\n\t{ \"post_id\": 6241, \"topic_id\": 1279, \"forum_id\": 14, \"post_subject\": \"Re: HPCC LDAP authentication problem\", \"username\": \"ngurjar\", \"post_text\": \"Hi Russ,\\nI installed latest HPCC release hpccsystems-platform_community-5.0.0-3precise_amd64.deb on Ubuntu 12.04 LTS.\\nI configured LDAPserver on it.\\nI am facing same issue of LDAP error code 21 when I start hpcc system.\\nHowever when I manuall add DNs manually to Fedora DS389, then I can start hpcc system successfully.\\n\\nHere are last few lines of DaServer.log. For testing I had added DN ou=ecl manually to FedoraDS server. So in below errors it stoped at adding DN ou=modules,ou=ecl.\\n\\n00000021 2014-08-20 15:03:51.455 30071 30071 "clearing remaining sds locks"\\n00000022 2014-08-20 15:03:51.455 30071 30071 "waiting for transaction server to stop"\\n00000023 2014-08-20 15:03:51.455 30071 30071 "waiting for coalescer to stop"\\n00000024 2014-08-20 15:03:51.455 30071 30071 "Saving store"\\n00000025 2014-08-20 15:03:51.456 30071 30071 "Copying store to backup location"\\n00000026 2014-08-20 15:03:51.456 30071 30071 "Copy done"\\n00000027 2014-08-20 15:03:51.456 30071 30071 "Store saved"\\n00000028 2014-08-20 15:03:51.456 30071 30071 "Deleting old store: /var/lib/HPCCSystems/hpcc-data/dali/dalisds2.xml"\\n00000029 2014-08-20 15:03:51.456 30071 30071 "Stopping 1"\\n0000002A 2014-08-20 15:03:51.456 30071 30071 "Stopping 0"\\n0000002B 2014-08-20 15:03:51.457 30071 30079 "BackupHandler stopped"\\n0000002C 2014-08-20 15:03:51.465 30071 30071 "ERROR: /var/lib/jenkins/workspace/CE-Candidate-with-plugins-5.0.0-3/CE/ubuntu-12.04-amd64/HPCC-Platform/dali/server/daserver.cpp(452) : Exception : ldap_add_ext_s error for ou=modules,ou=ecl,dc=members,dc=linode,dc=com: 21 Invalid syntax"\\n\\n\\nRegards\\nNeelesh Gurjar\", \"post_time\": \"2014-08-20 16:33:32\" },\n\t{ \"post_id\": 6001, \"topic_id\": 1279, \"forum_id\": 14, \"post_subject\": \"Re: HPCC LDAP authentication problem\", \"username\": \"william.whitehead\", \"post_text\": \"Hi Neelesh, as it turns out I found several issues and fixed them with multiple Github issues and Jira Pull Requests. They are not part of a published build yet, so the only way to get them would be to pull our HPCC-Platform sources from the "master" branch and build the package yourself. Its actually pretty easy, check out this link\\n\\nhttps://github.com/hpcc-systems/HPCC-Pl ... lding-HPCC\\n\\nand here is the source\\nhttps://github.com/hpcc-systems/HPCC-Platform\\n\\nLet me know if I can help with that\\nRuss\", \"post_time\": \"2014-07-01 12:51:37\" },\n\t{ \"post_id\": 6000, \"topic_id\": 1279, \"forum_id\": 14, \"post_subject\": \"Re: HPCC LDAP authentication problem\", \"username\": \"ngurjar\", \"post_text\": \"Hi Russ,\\nAs per ticket, this issue is resolved.\\nHow can I get the updated stuff on my machine?\\nRegards\\nNeelesh\", \"post_time\": \"2014-07-01 10:25:51\" },\n\t{ \"post_id\": 5854, \"topic_id\": 1279, \"forum_id\": 14, \"post_subject\": \"Re: HPCC LDAP authentication problem\", \"username\": \"william.whitehead\", \"post_text\": \"Neelesh, I was able to repro the problem and have opened https://track.hpccsystems.com/browse/HPCC-11635 to address it. \\n\\nThanks\\nRuss Whitehead\", \"post_time\": \"2014-06-09 15:48:28\" },\n\t{ \"post_id\": 5846, \"topic_id\": 1279, \"forum_id\": 14, \"post_subject\": \"Re: HPCC LDAP authentication problem\", \"username\": \"ngurjar\", \"post_text\": \"Thanks Russ.\\nJust to note that same thing works if HPCC & Fedora DS on CentOS.\\n\\nRegards\\nNeelesh\", \"post_time\": \"2014-06-06 15:48:02\" },\n\t{ \"post_id\": 5842, \"topic_id\": 1279, \"forum_id\": 14, \"post_subject\": \"Re: HPCC LDAP authentication problem\", \"username\": \"william.whitehead\", \"post_text\": \"Neelesh, I was able to recreate the problem last night. The failure occurs when we try to create all the ou=ecl branches, apparently Fedora389 does not support the standard LDAP "ldap_add_ext_s" API. I hope to have a fix soon!\\nRuss\", \"post_time\": \"2014-06-06 12:43:30\" },\n\t{ \"post_id\": 5840, \"topic_id\": 1279, \"forum_id\": 14, \"post_subject\": \"Re: HPCC LDAP authentication problem\", \"username\": \"ngurjar\", \"post_text\": \"Hi\\n1. No I did not create Admin user manually.\\nWhen I executed setup-ds.pl, it asked me to enter dn for admin user.\\nIn that I mentioned cn=admin,ou=ecl and entered password two times for it.\\n\\nThen I tested accessing LDAP directory directly using Jxplorer tool.\\nusing\\nbase dn: dc=members,dc=linode,dc=com\\n\\nUser dn: cn=admin,ou=ecl\\nPassword: which I entered while setting up fedora dn.\\n\\nI could access directoy successfully.\\n\\n2. Yes, for HPCC I had to create all DNs manually, including ou=Sudoers.\\nThat's what my Problem is.\\n\\nRegards\\nNeelesh Gurjar\", \"post_time\": \"2014-06-06 03:50:39\" },\n\t{ \"post_id\": 5835, \"topic_id\": 1279, \"forum_id\": 14, \"post_subject\": \"Re: HPCC LDAP authentication problem\", \"username\": \"william.whitehead\", \"post_text\": \"Hi Neelesh, I was able to get Fedora389 up and running but am having trouble getting HPCC to bind a user. Did you manually create the admin user you specified in the LDAPServer process, or is that the one that was created when you configured Fedora? Also I notice that the "systemBasedn" is set to ou=ecl , which is different than the default cn=users. Also, did you manually create all the hpcc ou or did you let dali/esp create them?\", \"post_time\": \"2014-06-04 22:12:51\" },\n\t{ \"post_id\": 5789, \"topic_id\": 1279, \"forum_id\": 14, \"post_subject\": \"Re: HPCC LDAP authentication problem\", \"username\": \"ngurjar\", \"post_text\": \"Hi Russ,\\nThanks very much.\\nSteps to reproduce:\\nI installed Ubuntu 12.04LTS & Fedora 20 on 2 seperate machines.\\nDisabled Firewall, SElinux, Apparmor on ubuntu.\\n\\n1. Installed ds-389base using yum on Fedora machine\\n2. setup ds using setup-ds.pl command.\\nI set following info, base DN, admin cn with password, port\\n\\n3. I installed HPCC 4.2.4-3 on Ubuntu machine.\\n4. Started Configmanagerusing this command :\\nsudo /opt/HPCCSystems/sbin/configmgr\\n5. Access http://<IP_of_HPCC_machine:8015\\n6. Created new environment with Wizard\\n- In that I added Hardware. Prefix and IP of the LDAP machine\\n- I added software component ldapserver\\n- added instance in ldapserver\\n- And added attributes. I have attached screenshot for this.\\n- Then I added in Esp-myesp section -> Authentication tab. I have uploaded screenshot of this.\\n- I added Dali Server - mydali --> LDAP Tab. Please see attached screenshot.\\n\\n7. After this I started hpcc-system.\\nmydali service failed to restart.\\n\\n=============LDAP Logs ===============================\\n[31/May/2014:12:16:30 +0000] conn=111 fd=65 slot=65 connection from XXX.XXX.XXX.XXX to XXX.XXX.XXX.XXX\\n[31/May/2014:12:16:30 +0000] conn=111 op=0 BIND dn="" method=128 version=3\\n[31/May/2014:12:16:30 +0000] conn=111 op=0 RESULT err=0 tag=97 nentries=0 etime=0 dn=""\\n[31/May/2014:12:16:30 +0000] conn=111 op=1 SRCH base="" scope=0 filter="(objectClass=*)" attrs="namingContexts"\\n[31/May/2014:12:16:30 +0000] conn=111 op=1 RESULT err=0 tag=101 nentries=1 etime=0\\n[31/May/2014:12:16:30 +0000] conn=111 op=2 UNBIND\\n[31/May/2014:12:16:30 +0000] conn=111 op=2 fd=65 closed - U1\\n[31/May/2014:12:16:30 +0000] conn=112 fd=65 slot=65 connection from XXX.XXX.XXX.XXX to XXX.XXX.XXX.XXX\\n[31/May/2014:12:16:30 +0000] conn=112 op=0 BIND dn="cn=admin,ou=ecl" method=128 version=3\\n[31/May/2014:12:16:30 +0000] conn=112 op=0 RESULT err=0 tag=97 nentries=0 etime=0 dn="cn=admin,ou=ecl"\\n[31/May/2014:12:16:30 +0000] conn=112 op=1 ADD dn="dc=members,dc=linode,dc=com"\\n[31/May/2014:12:16:30 +0000] conn=112 op=1 RESULT err=68 tag=105 nentries=0 etime=0\\n[31/May/2014:12:16:30 +0000] conn=112 op=2 ADD dn="ou=ecl,dc=members,dc=linode,dc=com"\\n[31/May/2014:12:16:30 +0000] conn=112 op=2 RESULT err=21 tag=105 nentries=0 etime=0\\n[31/May/2014:12:16:30 +0000] conn=112 op=-1 fd=65 closed - B1\\n\\n========== In DaServer.log\\n0000002C 2014-05-31 12:16:31.065 21068 21068 "ERROR: /var/lib/jenkins/workspace/CE-Candidate-with-plugins-4.2.4-3/CE/ubuntu-12.04-amd64/HPCC-Platform/dali/server/daserver.cpp(451) : Exception : ldap_add_ext_s error for ou=ecl,dc=members,dc=linode,dc=com: 21 Invalid syntax"\\n\\n\\nRegards\\nNeelesh\", \"post_time\": \"2014-05-31 12:22:37\" },\n\t{ \"post_id\": 5787, \"topic_id\": 1279, \"forum_id\": 14, \"post_subject\": \"Re: HPCC LDAP authentication problem\", \"username\": \"william.whitehead\", \"post_text\": \"Neelesh,\\nI will be happy to help you get Fedora389 working with HPCC. I will try standing up a similar environment, but in the meantime it would help if you could send me a screenshot of the ConfigManager setup for \\n\\n1) LDAPServer\\n2) ESP Authentication tab\\n\\nAlso the fragment of the /var/log/HPCCSystems/myesp/esp.log that contains a recreation of the error would greatly help.\\n\\nThanks\\nRuss\", \"post_time\": \"2014-05-30 19:38:39\" },\n\t{ \"post_id\": 5769, \"topic_id\": 1279, \"forum_id\": 14, \"post_subject\": \"Re: HPCC LDAP authentication problem\", \"username\": \"ngurjar\", \"post_text\": \"Hi,\\nI have configured HPCC on CentOS6.5 with FedoraDS_389 without any issue.\\nAll services restarted successfully.\\nHowever I installed HPCC on Ubuntu 12.04 and tried to integrated FedoraDS_389 which is on Fedora20 Machine. When started hpcc services, mydali service failed saying Fail to add DN. LDAP err:21, which means invalid syntax.\\n\\nThen I added DNs including sudoers manually. Then I could start mydali and other services successfully.\\n\\n\\nAny pointers ??\\n\\nRegards\\nNeelesh\", \"post_time\": \"2014-05-29 10:25:57\" },\n\t{ \"post_id\": 5639, \"topic_id\": 1279, \"forum_id\": 14, \"post_subject\": \"Re: HPCC LDAP authentication problem\", \"username\": \"william.whitehead\", \"post_text\": \"We have never tried ApacheDS so I would not recommend it. Our supported configurations are Microsoft Server Active Directory and OpenLDAP built with ACI support. Were you ever able to rebuild and deploy OpenLDAP ?\", \"post_time\": \"2014-05-05 17:35:13\" },\n\t{ \"post_id\": 5636, \"topic_id\": 1279, \"forum_id\": 14, \"post_subject\": \"Re: HPCC LDAP authentication problem\", \"username\": \"william.whitehead\", \"post_text\": \"We have not done any testing with ApacheDS so I don't recommend you use it. We have had good luck using Microsoft Server Active Directory and OpenLDAP built with ACI. Were you able to rebuild and deploy OpenLDAP?\", \"post_time\": \"2014-05-05 15:36:46\" },\n\t{ \"post_id\": 5635, \"topic_id\": 1279, \"forum_id\": 14, \"post_subject\": \"Re: HPCC LDAP authentication problem\", \"username\": \"ankit_kailaswar\", \"post_text\": \"any update on this ?\\nPlease let me know if you need any othe info from me.\", \"post_time\": \"2014-05-05 13:48:15\" },\n\t{ \"post_id\": 5570, \"topic_id\": 1279, \"forum_id\": 14, \"post_subject\": \"Re: HPCC LDAP authentication problem\", \"username\": \"ankit_kailaswar\", \"post_text\": \"This is esp log after hpcc start\\n\\n0000002E 2014-04-28 12:31:51.999 15902 15902 "96.126.97.17:7070 Retrying..."\\n0000002F 2014-04-28 12:32:01.999 15902 15902 "Failed to connect to Dali Server 96.126.97.17:7070."\\n00000030 2014-04-28 12:32:11.999 15902 15902 "Failed to connect to Dali Server 96.126.97.17:7070."\\n00000031 2014-04-28 12:32:22.000 15902 15902 "Failed to connect to Dali Server 96.126.97.17:7070."\\n00000032 2014-04-28 12:32:32.000 15902 15902 "Failed to connect to Dali Server 96.126.97.17:7070."\\n00000033 2014-04-28 12:32:42.001 15902 15902 "Failed to connect to Dali Server 96.126.97.17:7070."\\n00000034 2014-04-28 12:32:52.001 15902 15902 "Failed to connect to Dali Server 96.126.97.17:7070."\\n00000035 2014-04-28 12:33:02.002 15902 15902 "Failed to connect to Dali Server 96.126.97.17:7070."\\n00000036 2014-04-28 12:33:12.002 15902 15902 "Failed to connect to Dali Server 96.126.97.17:7070."\\n00000037 2014-04-28 12:33:22.003 15902 15902 "Failed to connect to Dali Server 96.126.97.17:7070."\\n00000038 2014-04-28 12:33:32.003 15902 15902 "Failed to connect to Dali Server 96.126.97.17:7070."\\n00000039 2014-04-28 12:33:42.003 15902 15902 "Failed to connect to Dali Server 96.126.97.17:7070."\\n0000003A 2014-04-28 12:33:52.004 15902 15902 "Failed to connect to Dali Server 96.126.97.17:7070."\\n0000003B 2014-04-28 12:34:02.004 15902 15902 "Failed to connect to Dali Server 96.126.97.17:7070."\\n0000003C 2014-04-28 12:34:02.004 15902 15902 "96.126.97.17:7070 Retrying..."\\n0000003D 2014-04-28 12:34:12.005 15902 15902 "Failed to connect to Dali Server 96.126.97.17:7070."\\n0000003E 2014-04-28 12:34:22.005 15902 15902 "Failed to connect to Dali Server 96.126.97.17:7070."\\n0000003F 2014-04-28 12:34:32.006 15902 15902 "Failed to connect to Dali Server 96.126.97.17:7070."\\n00000040 2014-04-28 12:34:42.006 15902 15902 "Failed to connect to Dali Server 96.126.97.17:7070."\\n00000041 2014-04-28 12:34:52.007 15902 15902 "Failed to connect to Dali Server 96.126.97.17:7070."\\n00000042 2014-04-28 12:34:58.683 15902 15902 "ESP Abort Handler..."\\n00000043 2014-04-28 12:34:58.683 15902 15902 "================================================"\\n00000044 2014-04-28 12:34:58.683 15902 15902 "Signal: 11 Segmentation fault"\\n00000045 2014-04-28 12:34:58.683 15902 15902 "Fault IP: 000000000040DBC9"\\n00000046 2014-04-28 12:34:58.683 15902 15902 "Accessing: 0000000000000000"\\n00000047 2014-04-28 12:34:58.683 15902 15902 "Registers:"\\n00000048 2014-04-28 12:34:58.683 15902 15902 "EAX:00000000000000B1 EBX:00007FFFCDCBE6A0 ECX:000000003FFFFFFF EDX:00007FF69EFF8798 ESI:0000000000000001 EDI:0000000000000000"\\n00000049 2014-04-28 12:34:58.683 15902 15902 "CS:EIP:E033:000000000040DBC9"\\n0000004A 2014-04-28 12:34:58.683 15902 15902 " ESP:00007FFFCDCBDEF0 EBP:00000000010450C0"\\n0000004B 2014-04-28 12:34:58.683 15902 15902 "Stack[00007FFFCDCBDEF0]: 0000000000000000 A0724BCA00000000 00007FF6A0724BCA 0104C98800007FF6 000000000104C988 00703A7800000000 0000000000703A78 0000000100000000"\\n0000004C 2014-04-28 12:34:58.683 15902 15902 "Stack[00007FFFCDCBDF10]: 0000000000000001 CDCBE41000000000 00007FFFCDCBE410 FFFFFF9200007FFF FFFFFFFFFFFFFF92 A0724BF5FFFFFFFF 00007FF6A0724BF5 CDCBDFD000007FF6"\\n0000004D 2014-04-28 12:34:58.683 15902 15902 "Stack[00007FFFCDCBDF30]: 00007FFFCDCBDFD0 9F00EBB000007FFF 00007FF69F00EBB0 0000000000007FF6 0000000000000000 0000000000000000 0000000000000000 0000000000000000"\\n0000004E 2014-04-28 12:34:58.683 15902 15902 "Stack[00007FFFCDCBDF50]: 0000000000000000 0000000200000000 0000000000000002 0000000000000000 0000000000000000 0104C98800000000 000000000104C988 FFFFFFFF00000000"\\n0000004F 2014-04-28 12:34:58.683 15902 15902 "Stack[00007FFFCDCBDF70]: 00000000FFFFFFFF CDCBE41000000000 00007FFFCDCBE410 0000020600007FFF 0000000000000206 0000000100000000 0000000000000001 CDCBE41000000000"\\n00000050 2014-04-28 12:34:58.683 15902 15902 "Stack[00007FFFCDCBDF90]: 00007FFFCDCBE410 FFFFFF9200007FFF FFFFFFFFFFFFFF92 0104C900FFFFFFFF 000000000104C900 0104C9B400000000 000000000104C9B4 0000018900000000"\\n00000051 2014-04-28 12:34:58.683 15902 15902 "Stack[00007FFFCDCBDFB0]: 0000000000000189 00703A7800000000 0000000000703A78 0104C98800000000 000000000104C988 0000000100000000 0000000000000001 FFFFFFFC00000000"\\n00000052 2014-04-28 12:34:58.683 15902 15902 "Stack[00007FFFCDCBDFD0]: FFFFFFFFFFFFFFFC FFFFFFFFFFFFFFFF FFFFFFFFFFFFFFFF CDCBE3A0FFFFFFFF 00007FFFCDCBE3A0 9F00B03E00007FFF 00007FF69F00B03E 0000020600007FF6"\\n00000053 2014-04-28 12:34:58.683 15902 15902 "Backtrace:"\\n00000054 2014-04-28 12:34:58.684 15902 15902 " /opt/HPCCSystems/lib/libjlib.so(_Z16PrintStackReportv+0x28) [0x7ff6a06e0f28]"\\n00000055 2014-04-28 12:34:58.684 15902 15902 " /opt/HPCCSystems/lib/libjlib.so(_Z13excsighandleriP9siginfo_tPv+0x1fe) [0x7ff6a06e135e]"\\n00000056 2014-04-28 12:34:58.684 15902 15902 " /lib/x86_64-linux-gnu/libpthread.so.0(+0xfbb0) [0x7ff69f00ebb0]"\\n00000057 2014-04-28 12:34:58.684 15902 15902 " esp(_ZN16CEspAbortHandler7onAbortEv+0x29) [0x40dbc9]"\\n00000058 2014-04-28 12:34:58.684 15902 15902 " /opt/HPCCSystems/lib/libjlib.so(_Z13notifyOnAbortv+0x9a) [0x7ff6a0724bca]"\\n00000059 2014-04-28 12:34:58.684 15902 15902 " /opt/HPCCSystems/lib/libjlib.so(+0x113bf5) [0x7ff6a0724bf5]"\\n0000005A 2014-04-28 12:34:58.684 15902 15902 " /lib/x86_64-linux-gnu/libpthread.so.0(+0xfbb0) [0x7ff69f00ebb0]"\\n0000005B 2014-04-28 12:34:58.684 15902 15902 " /lib/x86_64-linux-gnu/libpthread.so.0(pthread_cond_timedwait+0x13e) [0x7ff69f00b03e]"\\n0000005C 2014-04-28 12:34:58.684 15902 15902 " /opt/HPCCSystems/lib/libjlib.so(_ZN9Semaphore4waitEj+0x95) [0x7ff6a0753095]"\\n0000005D 2014-04-28 12:34:58.684 15902 15902 " /opt/HPCCSystems/lib/libdalibase.so(_Z21registerClientProcessP13ICommunicatorRP6IGroupj14DaliClientRole+0x1ce) [0x7ff69fc895ae]"\\n0000005E 2014-04-28 12:34:58.684 15902 15902 " /opt/HPCCSystems/lib/libdalibase.so(_Z17initClientProcessP6IGroup14DaliClientRolejPKcS3_j+0x59) [0x7ff69fbf4329]"\\n0000005F 2014-04-28 12:34:58.684 15902 15902 " esp(_ZN10CEspConfig8initDaliEPKc+0x73) [0x408ac3]"\\n00000060 2014-04-28 12:34:58.684 15902 15902 " esp(_ZN10CEspConfigC1EP11IPropertiesP13IPropertyTreeS3_b+0xa23) [0x40a0b3]"\\n00000061 2014-04-28 12:34:58.684 15902 15902 " esp(_Z9init_mainiPPc+0x3a0) [0x40cd80]"\\n00000062 2014-04-28 12:34:58.684 15902 15902 " esp(main+0x10) [0x4086f0]"\\n00000063 2014-04-28 12:34:58.684 15902 15902 " /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf5) [0x7ff69ec58de5]"\\n00000064 2014-04-28 12:34:58.684 15902 15902 " esp() [0x408731]"\\n00000065 2014-04-28 12:34:58.684 15902 15902 "ThreadList:\\n7FF69D513700 140697178027776 15903: CMPNotifyClosedThread\\n7FF69CD12700 140697169635072 15904: CSocketBaseThread\\n7FF69C511700 140697161242368 15905: MP Connection Thread\\n7FF69BD10700 140697152849664 20643: dasess.registerClientProcess\\n"\\n00000001 2014-04-28 12:35:50.605 21724 21724 "Esp starting community_4.2.0-4"\\n00000002 2014-04-28 12:35:50.605 21724 21724 "componentfiles are under /opt/HPCCSystems/componentfiles"\\n00000003 2014-04-28 12:35:50.605 21724 21724 "ESP process name [myesp]"\\n00000004 2014-04-28 12:35:50.605 21724 21724 "Initializing DALI client [servers = 96.126.97.17:7070]"\\n00000005 2014-04-28 12:35:55.607 21724 21724 "Failed to connect to Dali Server 96.126.97.17:7070."\\n00000006 2014-04-28 12:35:55.607 21724 21724 "96.126.97.17:7070 Retrying..."\\n00000007 2014-04-28 12:36:05.608 21724 21724 "Failed to connect to Dali Server 96.126.97.17:7070."\\n00000007 2014-04-28 12:36:05.608 21724 21724 "Failed to connect to Dali Server 96.126.97.17:7070."\\n00000008 2014-04-28 12:36:15.608 21724 21724 "Failed to connect to Dali Server 96.126.97.17:7070."\\n00000009 2014-04-28 12:36:25.609 21724 21724 "Failed to connect to Dali Server 96.126.97.17:7070."\\n0000000A 2014-04-28 12:36:25.609 21724 21724 "96.126.97.17:7070 Retrying..."\\n0000000B 2014-04-28 12:36:35.609 21724 21724 "Failed to connect to Dali Server 96.126.97.17:7070."\\n0000000C 2014-04-28 12:36:45.610 21724 21724 "Failed to connect to Dali Server 96.126.97.17:7070."\\n0000000D 2014-04-28 12:36:55.610 21724 21724 "Failed to connect to Dali Server 96.126.97.17:7070."\\n0000000E 2014-04-28 12:37:05.610 21724 21724 "Failed to connect to Dali Server 96.126.97.17:7070."\\n0000000F 2014-04-28 12:37:15.611 21724 21724 "Failed to connect to Dali Server 96.126.97.17:7070."\\n00000010 2014-04-28 12:37:15.611 21724 21724 "96.126.97.17:7070 Retrying..."\\n\\n
\", \"post_time\": \"2014-04-29 06:03:36\" },\n\t{ \"post_id\": 5555, \"topic_id\": 1279, \"forum_id\": 14, \"post_subject\": \"Re: HPCC LDAP authentication problem\", \"username\": \"william.whitehead\", \"post_text\": \"No, you don't need to create the OUs, we do that. I would be more interested in seeing the ESP logfile if you can provide that ( /var/log/HPCCSystems/myesp/esp.log ) because it will have more information. Thanks\", \"post_time\": \"2014-04-28 14:51:56\" },\n\t{ \"post_id\": 5553, \"topic_id\": 1279, \"forum_id\": 14, \"post_subject\": \"Re: HPCC LDAP authentication problem\", \"username\": \"ankit_kailaswar\", \"post_text\": \"sorry for delay in reply,\\n\\nI was trying hpcc ldap authentication with apache DS. This time I am facing different issue.\\n\\nIn dali server log I can see that it is able to add ldap server but fails on ldap search,\\n\\ndali log\\n\\n00000000 2014-04-28 12:35:39.731 21252 21252 "Build community_4.2.0-4"\\n00000001 2014-04-28 12:35:39.732 21252 21252 "WARNING: Local path used for backup url: /var/lib/HPCCSystems/hpcc-mirror/dali/"\\n00000002 2014-04-28 12:35:39.732 21252 21252 "Backup URL = //96.126.97.17/var/lib/HPCCSystems/hpcc-mirror/dali/"\\n00000003 2014-04-28 12:35:39.732 21252 21252 "Checking backup location: //96.126.97.17/var/lib/HPCCSystems/hpcc-mirror/dali/"\\n00000004 2014-04-28 12:35:39.733 21252 21252 "Server Version = 3.10, required minimum client version 1.5"\\n00000005 2014-04-28 12:35:39.733 21252 21252 "DFS Server: numThreads=30"\\n00000006 2014-04-28 12:35:39.762 21252 21252 "BackupHandler started, async=false"\\n00000007 2014-04-28 12:35:39.763 21252 21252 "loading store 4, storedCrc=5146a460"\\n00000008 2014-04-28 12:35:39.765 21252 21252 "store loaded"\\n00000009 2014-04-28 12:35:39.765 21252 21252 "loading external Environment from: /etc/HPCCSystems/environment.xml"\\n0000000A 2014-04-28 12:35:39.766 21252 21252 "Scanning store for external references"\\n0000000B 2014-04-28 12:35:39.767 21252 21252 "External reference count = 0"\\n0000000C 2014-04-28 12:35:39.829 21252 21252 "Added ldap server 96.126.97.17"\\n0000000D 2014-04-28 12:35:39.833 21252 21252 "ldap_search_s error: No such object"\\n0000000E 2014-04-28 12:35:39.833 21252 21252 "ERROR: /var/lib/jenkins/workspace/CE-Candidate-with-plugins-4.2.0-4/CE/ubuntu-13.10-amd64/HPCC-Platform/dali/server/daldap.cpp(110) : LDAP server : getServerInfo error - No such object"\\n0000000F 2014-04-28 12:35:39.834 21252 21252 "ERROR: /var/lib/jenkins/workspace/CE-Candidate-with-plugins-4.2.0-4/CE/ubuntu-13.10-amd64/HPCC-Platform/dali/server/daserver.cpp(421) : LDAP initialization error : getServerInfo error - No such object"\\n00000010 2014-04-28 12:35:39.834 21252 21252 "Suspending 6"\\n00000011 2014-04-28 12:35:39.834 21252 21252 "Suspending 5"\\n00000012 2014-04-28 12:35:39.834 21252 21252 "Suspending 4"\\n00000013 2014-04-28 12:35:39.834 21252 21252 "Suspending 3"\\n00000014 2014-04-28 12:35:39.834 21252 21252 "Suspending 2"\\n00000015 2014-04-28 12:35:39.834 21252 21252 "Suspending 1"\\n00000016 2014-04-28 12:35:39.834 21252 21252 "Suspending subscriptions"\\n00000017 2014-04-28 12:35:39.834 21252 21252 "Suspended subscriptions"\\n00000018 2014-04-28 12:35:39.834 21252 21252 "Suspending 0"\\n00000019 2014-04-28 12:35:39.834 21252 21252 "Stopping 6"\\n0000001A 2014-04-28 12:35:39.834 21252 21252 "Stopping 5"\\n0000001B 2014-04-28 12:35:39.834 21252 21252 "Stopping 4"\\n0000001C 2014-04-28 12:35:39.834 21252 21252 "Stopping 3"\\n0000001D 2014-04-28 12:35:39.834 21252 21252 "Stopping 2"\\n0000001E 2014-04-28 12:35:39.834 21252 21252 "clearing remaining sds locks"\\n0000001F 2014-04-28 12:35:39.834 21252 21252 "waiting for transaction server to stop"\\n00000020 2014-04-28 12:35:39.834 21252 21252 "waiting for coalescer to stop"\\n00000021 2014-04-28 12:35:39.834 21252 21252 "Saving store"\\n00000022 2014-04-28 12:35:39.836 21252 21252 "Copying store to backup location"\\n00000023 2014-04-28 12:35:39.836 21252 21252 "Copy done"\\n00000024 2014-04-28 12:35:39.836 21252 21252 "Store saved"\\n00000025 2014-04-28 12:35:39.836 21252 21252 "Deleting old store: /var/lib/HPCCSystems/hpcc-data/dali/dalisds3.xml"\\n00000026 2014-04-28 12:35:39.837 21252 21252 "Stopping 1"\\n00000027 2014-04-28 12:35:39.837 21252 21252 "Stopping 0"\\n00000028 2014-04-28 12:35:39.837 21252 21260 "BackupHandler stopped"\\n00000029 2014-04-28 12:35:39.847 21252 21252 "ERROR: /var/lib/jenkins/workspace/CE-Candidate-with-plugins-4.2.0-4/CE/ubuntu-13.10-amd64/HPCC-Platform/dali/server/daserver.cpp(451) : Exception : getServerInfo error - No such object"\\n\\n
\\n\\ndo I need to add all OUs for ldap server mentioned in envoirnment.xml file before starting hpcc ?\", \"post_time\": \"2014-04-28 13:01:41\" },\n\t{ \"post_id\": 5550, \"topic_id\": 1279, \"forum_id\": 14, \"post_subject\": \"Re: HPCC LDAP authentication problem\", \"username\": \"william.whitehead\", \"post_text\": \"So you rebuilt and redeployed OpenLDAP, and now you cannot connect to it using the OpenLDAP tools? I don't know much about OpenLDAP administration, but does the logfile acknowledge that your request reached the server and was denied for some reason or other? Are you able to ping the LDAP IP and port 389 ? I will ask around here if anyone has any ideas, meanwhile send any logging information and hopefully we can work through it. Once that has been worked out we bring HPCC into the picture and address any issues related to that\\n\\nCheck out this link with information in upping the debug level on your ldapsearch, and setting the SSL certificates\\n\\nhttp://www.linuxquestions.org/questions ... er-383602/\\n\\nRuss\", \"post_time\": \"2014-04-25 14:08:52\" },\n\t{ \"post_id\": 5548, \"topic_id\": 1279, \"forum_id\": 14, \"post_subject\": \"Re: HPCC LDAP authentication problem\", \"username\": \"ankit_kailaswar\", \"post_text\": \"Hi Russ,\\nI was on leave for last two days and wasnt been able to update. I am able to build LDAP server with aci enabled but currently I am unable to contact LDAP server. simple queries like ldapsearch, ldapadd are throwing error like "ldap_sasl_bind(SIMPLE): Can't contact LDAP server (-1)".\\n\\nI have also made appropriate changes in ldap.conf for base dn and uri to point to appropriately, but still its not working.\", \"post_time\": \"2014-04-25 07:55:19\" },\n\t{ \"post_id\": 5520, \"topic_id\": 1279, \"forum_id\": 14, \"post_subject\": \"Re: HPCC LDAP authentication problem\", \"username\": \"william.whitehead\", \"post_text\": \"Ankit, a good place to start would be the OpenLDAP website. I found the following page which describes how to build OpenLDAP with ACI support, and how to enable it once built. Give this a try and let me know how it works out for you.\\nhttp://www.openldap.org/faq/data/cache/634.html\\n\\nAlso, referring to the HPCC documentation on configuring LDAP using configmgr, make sure that when you configure the LDAP Server component, in the "Attributes" tab you need to enter a System User/system Password for an LDAP user account that already exists, and has been granted administrator privileges.\\n\\nHope this helps,\\nRuss\", \"post_time\": \"2014-04-21 12:52:34\" },\n\t{ \"post_id\": 5517, \"topic_id\": 1279, \"forum_id\": 14, \"post_subject\": \"Re: HPCC LDAP authentication problem\", \"username\": \"ankit_kailaswar\", \"post_text\": \"I am preparing POC for LDAP authentication for our system, fot that I am using test LDAP server. It is working fine with other components for our system but I am facing this issue while configuring HPCC for LDAP authentication.\\nI am using "openldap-2.4.31" as openLdap test server. I dont know how to build ACI support into openLDAP deployment. Could you please provide any pointers.\", \"post_time\": \"2014-04-21 09:29:16\" },\n\t{ \"post_id\": 5516, \"topic_id\": 1279, \"forum_id\": 14, \"post_subject\": \"Re: HPCC LDAP authentication problem\", \"username\": \"william.whitehead\", \"post_text\": \"Ankit,\\nSorry you are having trouble with OpenLDAP and HPCC, hopefully we can work through it. I see from the LDAP.LOG file the following error\\n\\n RESULT tag=105 err=17 text=aci: attribute type undefined\\n\\nFrom what I know about ACI, you have to build "ACI" support into your OpenLDAP deployment, and make sure it is configured for ACI. Did you build it from sources or is this a legacy OpenLDAP installation? Please let me know what you find out.\\n\\nThanks,\\nRuss\", \"post_time\": \"2014-04-18 18:55:35\" },\n\t{ \"post_id\": 5515, \"topic_id\": 1279, \"forum_id\": 14, \"post_subject\": \"HPCC LDAP authentication problem\", \"username\": \"ankit_kailaswar\", \"post_text\": \"I am trying to configure HPCC with LDAP authentication.\\n\\nmy ldap server and esp server config is as mentioned,\\n\\n <LDAPServerProcess build="_"\\n buildSet="ldapServer"\\n cacheTimeout="5"\\n description="LDAP server process"\\n filesBasedn="ou=files,ou=ecl"\\n groupsBasedn="ou=groups,ou=ecl"\\n ldapPort="389"\\n ldapSecurePort="636"\\n modulesBasedn="ou=modules,ou=ecl"\\n name="ldapserver"\\n sudoersBasedn="dc=members,dc=linode,dc=com"\\n systemBasedn="dc=members,dc=linode,dc=com"\\n systemCommonName="admin"\\n systemPassword="quOmuY55ftGrdcRi2y70eQ=="\\n systemUser="admin"\\n usersBasedn="ou=users,ou=ecl"\\n workunitsBasedn="ou=workunits,ou=ecl">\\n <Instance computer="ldapserver097017" name="s1" netAddress="96.126.97.17"/>\\n </LDAPServerProcess>\\n
\\n\\n\\n <EspProcess build="_"\\n buildSet="esp"\\n componentfilesDir="/opt/HPCCSystems/componentfiles"\\n daliServers="mydali"\\n description="ESP server"\\n enableSEHMapping="true"\\n formOptionsAccess="false"\\n httpConfigAccess="true"\\n logLevel="1"\\n logRequests="false"\\n logResponses="false"\\n maxBacklogQueueSize="200"\\n maxConcurrentThreads="0"\\n maxRequestEntityLength="8000000"\\n name="myesp"\\n perfReportDelay="60"\\n portalurl="http://hpccsystems.com/download">\\n <Authentication htpasswdFile="/etc/HPCCSystems/.htpasswd"\\n ldapAuthMethod="simple"\\n ldapConnections="10"\\n ldapServer="ldapserver"\\n method="ldap"\\n passwordExpirationWarningDays="10"/>\\n <EspBinding defaultForPort="true"\\n defaultServiceVersion=""\\n name="myespsmc"\\n port="8010"\\n protocol="http"\\n resourcesBasedn="ou=SMC,ou=EspServices,ou=ecl"\\n service="EclWatch"\\n workunitsBasedn="ou=workunits,ou=ecl"\\n wsdlServiceAddress="">\\n </EspBinding>\\n <EspBinding defaultForPort="true"\\n defaultServiceVersion=""\\n name="myws_ecl"\\n port="8002"\\n protocol="http"\\n resourcesBasedn="ou=WsEcl,ou=EspServices,ou=ecl"\\n service="myws_ecl"\\n workunitsBasedn="ou=workunits,ou=ecl"\\n wsdlServiceAddress="">\\n <Authenticate access="Read"\\n description="Root access to WS ECL service"\\n path="/"\\n required="Read"\\n resource="WsEclAccess"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to WS ECL service"\\n path="WsEclAccess"\\n resource="WsEclAccess"\\n service="ws_ecl"/>\\n </EspBinding>\\n
\\n\\nWhile restarting esp fails, following are the logs,\\n\\nesp log\\n\\n00000048 2014-04-18 06:22:17.123 24743 24743 "Plugin /opt/HPCCSystems/plugins/libpyembed.so exports getECLPluginDefinition but does not export ECL - not loading"\\n00000049 2014-04-18 06:22:17.123 24743 24743 "Error loading /opt/HPCCSystems/plugins/libv8embed.so: libv8.so.3.14.5: cannot open shared object file: No such file or directory"\\n0000004A 2014-04-18 06:22:17.123 24743 24743 "Loading plugin /opt/HPCCSystems/plugins/libauditlib.so[lib_auditlib] version = AUDITLIB 1.0.1"\\n0000004B 2014-04-18 06:22:17.123 24743 24743 "Loading plugin /opt/HPCCSystems/plugins/libworkunitservices.so[lib_WORKUNITSERVICES] version = WORKUNITSERVICES 1.0.1"\\n0000004C 2014-04-18 06:22:17.205 24743 24743 "Initializing WsWorkunits_EclWatch_myesp service [process = myesp]"\\n0000004D 2014-04-18 06:22:17.232 24743 24743 "Authenticate method=LdapSecurity"\\n0000004E 2014-04-18 06:22:17.371 24743 24743 "Added ldap server 96.126.97.17"\\n0000004F 2014-04-18 06:22:17.372 24743 24743 "Queried 'dc=members,dc=linode,dc=com', selected basedn 'members'"\\n00000050 2014-04-18 06:22:17.373 24743 24743 "Connected to 'OpenLDAP' LdapServer 96.126.97.17 using protocol ldap"\\n00000051 2014-04-18 06:22:17.374 24743 24743 "ERROR: ESP Unhandled IException (-1 -- ldap_add_ext_s error for ou=ecl,dc=members,dc=linode,dc=com: 17 Undefined attribute type)"\\n\\n\\n
\\n\\nldap log\\n\\nApr 18 06:22:17 li330-17 slapd[4157]: conn=9630 fd=14 ACCEPT from IP=74.207.246.196:60729 (IP=0.0.0.0:389)\\nApr 18 06:22:17 li330-17 slapd[4157]: conn=9630 op=0 BIND dn="" method=128\\nApr 18 06:22:17 li330-17 slapd[4157]: conn=9630 op=0 RESULT tag=97 err=0 text=\\nApr 18 06:22:17 li330-17 slapd[4157]: conn=9630 op=1 SRCH base="" scope=0 deref=0 filter="(objectClass=*)"\\nApr 18 06:22:17 li330-17 slapd[4157]: conn=9630 op=1 SRCH attr=namingContexts\\nApr 18 06:22:17 li330-17 slapd[4157]: conn=9630 op=1 SEARCH RESULT tag=101 err=0 nentries=1 text=\\nApr 18 06:22:17 li330-17 slapd[4157]: conn=9630 op=2 UNBIND\\nApr 18 06:22:17 li330-17 slapd[4157]: conn=9630 fd=14 closed\\nApr 18 06:22:17 li330-17 slapd[4157]: conn=9631 fd=14 ACCEPT from IP=74.207.246.196:60730 (IP=0.0.0.0:389)\\nApr 18 06:22:17 li330-17 slapd[4157]: conn=9631 op=0 BIND dn="cn=admin,dc=members,dc=linode,dc=com" method=128\\nApr 18 06:22:17 li330-17 slapd[4157]: conn=9631 op=0 BIND dn="cn=admin,dc=members,dc=linode,dc=com" mech=SIMPLE ssf=0\\nApr 18 06:22:17 li330-17 slapd[4157]: conn=9631 op=0 RESULT tag=97 err=0 text=\\nApr 18 06:22:17 li330-17 slapd[4157]: conn=9631 op=1 ADD dn="dc=members,dc=linode,dc=com"\\nApr 18 06:22:17 li330-17 slapd[4157]: conn=9631 op=1 RESULT tag=105 err=68 text=\\nApr 18 06:22:17 li330-17 slapd[4157]: conn=9631 op=2 ADD dn="ou=ecl,dc=members,dc=linode,dc=com"\\nApr 18 06:22:17 li330-17 slapd[4157]: conn=9631 op=2 RESULT tag=105 err=17 text=aci: attribute type undefined\\nApr 18 06:22:17 li330-17 slapd[4157]: conn=9631 fd=14 closed (connection lost)\\n\\n
\\n\\n\\nI am not able to figure out what I am doing wrong.\", \"post_time\": \"2014-04-18 08:41:12\" },\n\t{ \"post_id\": 5728, \"topic_id\": 1298, \"forum_id\": 14, \"post_subject\": \"Re: EclWatch fails because stub.htm is not found\", \"username\": \"gsmith\", \"post_text\": \"Also https://github.com/hpcc-systems/HPCC-Platform/pull/5898 fixes the canvas error report (the other fix ensured you still had a working website if an error like this was found).\", \"post_time\": \"2014-05-20 08:39:56\" },\n\t{ \"post_id\": 5727, \"topic_id\": 1298, \"forum_id\": 14, \"post_subject\": \"Re: EclWatch fails because stub.htm is not found\", \"username\": \"tdelbecque\", \"post_text\": \"This is the point, thanks.\", \"post_time\": \"2014-05-20 08:35:58\" },\n\t{ \"post_id\": 5724, \"topic_id\": 1298, \"forum_id\": 14, \"post_subject\": \"Re: EclWatch fails because stub.htm is not found\", \"username\": \"gsmith\", \"post_text\": \"FYI there was a Jira ticket which fixed a very (very) similar issue: https://track.hpccsystems.com/browse/HPCC-11446\", \"post_time\": \"2014-05-19 20:38:59\" },\n\t{ \"post_id\": 5722, \"topic_id\": 1298, \"forum_id\": 14, \"post_subject\": \"Re: EclWatch fails because stub.htm is not found\", \"username\": \"tdelbecque\", \"post_text\": \"Hello,\\n\\nI have done the whole thing again, from scratch in a freshly installed Ubuntu 12.04 server. This time node.js has been installed (the whole track is at the end of the post). \\n\\nThe 'stub.htm' not found issue was still there, preventing the use of ECLWatch. But after copying it by hand:\\n\\nsudo cp git/HPCC-Platform/esp/src/stub.htm /opt/HPCCSystems/componentfiles/files/\\n\\nit worked fine, and at least I was able to see the new ECLWatch UI look & feel (which is very cool by the way, it was worth the effort).\\n\\nThe track of the build is following:\\n\\n##### Begin of track\\n\\nmkdir build\\nmkdir git\\ncd git\\ngit clone https://github.com/hpcc-systems/HPCC-Platform.git\\ncd HPCC-Platform\\ngit submodule sync\\ngit submodule update -f --init\\n\\ncd ../../build\\n\\nsudo apt-get -y install cmake\\nsudo apt-get -y install make gcc g++\\nsudo apt-get -y install bison flex\\nsudo apt-get -y install binutils-dev\\nsudo apt-get -y install libldap2-dev\\nsudo apt-get -y install libicu-dev\\nsudo apt-get -y install libxerces-c-dev\\nsudo apt-get -y install libxalan110-dev\\nsudo apt-get -y install zlib1g-dev\\nsudo apt-get -y install libarchive-dev\\nsudo apt-get -y install libssl-dev\\nsudo apt-get -y install libapr1\\nsudo apt-get -y install libapr1-dev\\nsudo apt-get -y install libaprutil1\\nsudo apt-get -y install libaprutil1-dev\\nsudo apt-get -y install nodejs\\nsudo apt-get -y install nodejs-dev\\nsudo apt-get -y install expect\\nsudo apt-get -y install libboost-regex1.46-dev\\nsudo apt-get -y install python-dev\\nsudo apt-get -y install openjdk-7-jdk\\n\\ncmake ../git/HPCC-Platform/\\nmake\\nmake package\\n\\nsudo dpkg -i hpccsystems-platform_community-5.1.0-trunk0precise_amd64.deb\\n\\n### Then: \\n\\nsudo cp ../git/HPCC-Platform/esp/src/stub.htm /opt/HPCCSystems/componentfiles/files/\\n\\n###### End of track\\n\\neclwatch_build_err.txt is empty; but some errors or warnings can be found in eclwatch_build_out.txt, for example:\\n\\n...\\nwarn(224) A plugin dependency was encountered but there was no build-time plugin resolver. module: dgrid/util/has-css3; plugin: xstyle/css\\nwarn(205) Module not tagged as pure AMD yet it contains AMD API applications. module: d3/d3\\n...\\nerror(307) Failed to evaluate module tagged as pure AMD (fell back to processing with regular expressions). module: hpcc/viz/d3-cloud/d3.layout.cloud; error: ReferenceError: Canvas is not defined\\n...\\n\\n\\nBests.\", \"post_time\": \"2014-05-19 17:01:52\" },\n\t{ \"post_id\": 5695, \"topic_id\": 1298, \"forum_id\": 14, \"post_subject\": \"Re: EclWatch fails because stub.htm is not found\", \"username\": \"tdelbecque\", \"post_text\": \"Yes I have produced successful builds a couple of 3 or 4 months ago, it was already version 4.2, and I did not need node at this time. I have noticed that some files (amongst which stub.htm) have been moved in the github repository since then.\\n\\nI am using Ubuntu Server 12.04 LTS.\\n\\nI have installed node and at least stub.htm is now found, and src/eclwatch_build_out.txt does not complain. \\n\\nI missed the info about the JDK in the wiki, indeed.\\n\\nECLWatch gives me a blank page now instead of the previous error page and I get a bunch of missing files mentioned in esp.log; I will continue to try.\\n\\nThierry.\", \"post_time\": \"2014-05-15 16:08:47\" },\n\t{ \"post_id\": 5693, \"topic_id\": 1298, \"forum_id\": 14, \"post_subject\": \"Re: EclWatch fails because stub.htm is not found\", \"username\": \"gsmith\", \"post_text\": \"What OS are you on? \\n\\nBut yes either Java _or_ node needs to be installed for that part of the build process.\\n\\nI hadn't mentioned that because it sounded like you managed to build previously so I assumed you had that - I just checked the wiki and the jdk seems to be included in the prerequisites already.\", \"post_time\": \"2014-05-15 14:49:51\" },\n\t{ \"post_id\": 5692, \"topic_id\": 1298, \"forum_id\": 14, \"post_subject\": \"Re: EclWatch fails because stub.htm is not found\", \"username\": \"tdelbecque\", \"post_text\": \"Related to the version of the source, cmake tells the following:\\n\\n"\\n-- Current release version is hpccsystems-platform_community-4.3.0-trunk1Debugprecise_amd64\\n-- Git tag is 'heads/master-0-g9daa43-dirty'\\n-- Build tag is 'community_4.3.0-trunk1Debug[heads/master-0-g9daa43-dirty]'\\n"\\n\\nin case this helps ...\\n\\nThierry\", \"post_time\": \"2014-05-14 10:16:56\" },\n\t{ \"post_id\": 5691, \"topic_id\": 1298, \"forum_id\": 14, \"post_subject\": \"Re: EclWatch fails because stub.htm is not found\", \"username\": \"tdelbecque\", \"post_text\": \"Hello,\\n\\nI pulled the sources from github the 25th of april, but I guess you would need a more technically precise info about the version of the source I pulled. Is there a git command that I may issue to get this information ?\\n\\nesp/src/eclwatch_build_err.txt is empty, but src/eclwatch_build_out.txt contains the following content, that may be an explanation:\\n\\n"\\nBuilding application with /home/thierry/git/HPCC-Platform/esp/profiles/eclwatch.profile.js to /home/thierry/build/esp/src/build.\\nCleaning old files... Done\\nNeed node.js or Java to build!\\n"\\n\\nthough I did not notice any complain about that during the build. Is the node.js requirement a new one ?\\n\\nI will install node and rerun the build process.\\n\\nThierry.\", \"post_time\": \"2014-05-14 10:10:34\" },\n\t{ \"post_id\": 5690, \"topic_id\": 1298, \"forum_id\": 14, \"post_subject\": \"Re: EclWatch fails because stub.htm is not found\", \"username\": \"gsmith\", \"post_text\": \"This sounds like a build/package issue, specifically with the "building" of new ECL Watch. \\n\\nIf you can send information about the build (what version of the sources etc.), plus the output from the build, plus the log files from the ECL Watch build I will take a look (eclwatch_build_out.txt and eclwatch_build.err.txt).\\n\\nFYI What I will be looking for during the package step is the following output:\\n\\n...\\n-- ECL Watch: Rebuilding Site\\n---- Build Target: /home/gordon/git/build/HPCC-Platform/esp/src/build\\n---- Output log: /home/gordon/git/build/HPCC-Platform/esp/src/eclwatch_build_out.txt\\n---- Error log: /home/gordon/git/build/HPCC-Platform/esp/src/eclwatch_build_err.txt\\nProcess finished normally\\n errors: 0\\n warnings: 74\\n build time: 115.159 seconds\\n...\\n
\\n\\nI am assuming that you are either getting "Process finished normally" but with some errors, or you are not getting the "Process finished normally" message at all, then based on that I will be looking inside the specific eclwatch build logs to see what the issue is (they can be very noisy, so it is not always obvious what the issue is).\\n\\nAlso, can you try the following:\\n\\ncd to_my_hpcc_src_folder\\nrm -r ./esp/src\\ngit reset --hard\\ngit submodule sync\\ngit submodule update -f --init (no need for the --recursive)\\n\\ncd to_my_build_folder\\ncmake --build . --target package -- -j4\\n
\\nAnd see how you get on.\", \"post_time\": \"2014-05-14 09:02:27\" },\n\t{ \"post_id\": 5684, \"topic_id\": 1298, \"forum_id\": 14, \"post_subject\": \"Re: EclWatch fails because stub.htm is not found\", \"username\": \"tdelbecque\", \"post_text\": \"Thanks. That I seem to be alone running into this problem is bizarre, as I did nothing peculiar indeed.\", \"post_time\": \"2014-05-13 17:11:15\" },\n\t{ \"post_id\": 5683, \"topic_id\": 1298, \"forum_id\": 14, \"post_subject\": \"Re: EclWatch fails because stub.htm is not found\", \"username\": \"tlhumphrey2\", \"post_text\": \"I've asked one of our platform experts to look into this.\", \"post_time\": \"2014-05-13 17:03:24\" },\n\t{ \"post_id\": 5682, \"topic_id\": 1298, \"forum_id\": 14, \"post_subject\": \"Re: EclWatch fails because stub.htm is not found\", \"username\": \"tdelbecque\", \"post_text\": \"Here it is; the suspicious line is at 109 I believe. Indeed the compiled package does not install stub.htm. ECLWatch tells the following:\\n\\n"\\nXML Parsing Error: no element found\\nLocation: http://172.16.0.45:8010/\\nLine Number 1, Column 1:\\n"\\n\\nThierry.\", \"post_time\": \"2014-05-13 15:40:04\" },\n\t{ \"post_id\": 5681, \"topic_id\": 1298, \"forum_id\": 14, \"post_subject\": \"Re: EclWatch fails because stub.htm is not found\", \"username\": \"tlhumphrey2\", \"post_text\": \"Yes it would help. I'm hoping the sequence of events shown in the log will provide additional detail that will better pinpoint the problem.\", \"post_time\": \"2014-05-13 15:03:45\" },\n\t{ \"post_id\": 5680, \"topic_id\": 1298, \"forum_id\": 14, \"post_subject\": \"Re: EclWatch fails because stub.htm is not found\", \"username\": \"tdelbecque\", \"post_text\": \"would esp.log helps ?\", \"post_time\": \"2014-05-13 09:40:27\" },\n\t{ \"post_id\": 5678, \"topic_id\": 1298, \"forum_id\": 14, \"post_subject\": \"Re: EclWatch fails because stub.htm is not found\", \"username\": \"tlhumphrey2\", \"post_text\": \"It would help, if you could attach any log files created by the build.\", \"post_time\": \"2014-05-12 14:34:21\" },\n\t{ \"post_id\": 5640, \"topic_id\": 1298, \"forum_id\": 14, \"post_subject\": \"EclWatch fails because stub.htm is not found\", \"username\": \"tdelbecque\", \"post_text\": \"Hello,\\n\\nI built successfully the platform two months ago on an Ubuntu server, but now the same exercice fails with the current sources available on github.\\n\\nI do the usual steps:\\n\\ngit clone https://github.com/hpcc-systems/HPCC-Platform.git\\ngit submodule update --init --recursive\\n...\\ncmake ...\\nmake\\nmake package\\ndpkg -i ...\\n\\n'sudo service hpcc-init start' runs fine, but ECLWatch cannot be used because stub.htm is not found, as indicated in esp.log; indeed there is a bunch of files that do not appear anymore in the componentfiles/files directory (history of git shows some recent movements there).\\n\\nSo my question is simply: is there a an issue here, or is there a new way to build the platform ?\\n\\nThanks, Thierry.\", \"post_time\": \"2014-05-06 08:32:12\" },\n\t{ \"post_id\": 5729, \"topic_id\": 1317, \"forum_id\": 14, \"post_subject\": \"Re: building of package: plugin dependency warnings\", \"username\": \"gsmith\", \"post_text\": \"The build process for the new ECL Watch is inherently noisy, which is why the console out is redirected to that file. Typically you wouldn't want to look in that file, unless there were build errors (this is probably related to the other thread you opened, which is now resolved).\", \"post_time\": \"2014-05-21 13:35:55\" },\n\t{ \"post_id\": 5726, \"topic_id\": 1317, \"forum_id\": 14, \"post_subject\": \"building of package: plugin dependency warnings\", \"username\": \"tdelbecque\", \"post_text\": \"Hello,\\n\\nAfter the build of the package eclwatch_build_err.txt is empty but some warns and errors are logged in eclwatch_build_out.txt (that at this moment seems to prevent stub.htm to be copied at the right place).\\n\\nWarns and errors in eclwatch_build_out.txt (attached) are of this kind:\\n\\n...\\nwarn(224) A plugin dependency was encountered but there was no build-time plugin resolver. module: dgrid/util/has-css3; plugin: xstyle/css\\nwarn(205) Module not tagged as pure AMD yet it contains AMD API applications. module: d3/d3\\n...\\nerror(307) Failed to evaluate module tagged as pure AMD (fell back to processing with regular expressions). module: hpcc/viz/d3-cloud/d3.layout.cloud; error: ReferenceError: Canvas is not defined\\n...\\n\\nIs there somewhere some documents or info about these issues ?\\n\\nThanks.\", \"post_time\": \"2014-05-20 08:35:12\" },\n\t{ \"post_id\": 5912, \"topic_id\": 1318, \"forum_id\": 14, \"post_subject\": \"Re: Getting error for Saving environment\", \"username\": \"ngurjar\", \"post_text\": \"Thanks for the reply.\\n\\nThis issue is resolved. There was problem in xml file.\\n\\nRegards\", \"post_time\": \"2014-06-19 05:28:56\" },\n\t{ \"post_id\": 5908, \"topic_id\": 1318, \"forum_id\": 14, \"post_subject\": \"Re: Getting error for Saving environment\", \"username\": \"Gleb Aronsky\", \"post_text\": \"This is a warning to indicate that the version of the ldapserver component configuration differs from the version that was used to generate the original environment.xml file. In version 4.2.0, if you are using the default environment.xml that was included in the install, or if you are using a configuration file that was generated in an older release, you may encounter this warning when adding a new component. In the former case, it is safe to ignore this warning.\", \"post_time\": \"2014-06-18 20:44:55\" },\n\t{ \"post_id\": 5730, \"topic_id\": 1318, \"forum_id\": 14, \"post_subject\": \"Getting error for Saving environment\", \"username\": \"ngurjar\", \"post_text\": \"Hi,\\nI have installed hpccsystems-platform_community-4.2.0-4saucy_amd64 on my ubuntu machine.\\nI have configured Fedora 389DS server on another machine.\\nWhen I configure that server and change configuration in ESP and save configuration it throws following error:\\n\\nCWsDeployFileInfo::saveEnvironment:Save operation was successful. However the following exceptions were raised.\\nProcess ldapserver has invalid build\\n\\nAny pointers to solve this?\\n\\nRegards\\nNeelesh\", \"post_time\": \"2014-05-21 17:15:30\" },\n\t{ \"post_id\": 5796, \"topic_id\": 1336, \"forum_id\": 14, \"post_subject\": \"Re: Are all end-user services available without ESP?\", \"username\": \"Lotus\", \"post_text\": \"Thanks,I will post another question to get understand the differences between two.\", \"post_time\": \"2014-06-01 23:40:25\" },\n\t{ \"post_id\": 5795, \"topic_id\": 1336, \"forum_id\": 14, \"post_subject\": \"Re: Are all end-user services available without ESP?\", \"username\": \"gsmith\", \"post_text\": \"I am not the correct person to answer this, but here is my understanding:\\n\\nI think the "purchase required" is for the additional enterprise ESP modules mentioned here: http://hpccsystems.com/products-and-ser ... atform-esp\\n\\nFurther ESP itself is included with the community edition and it is not crippled. The sources are available on GitHub along with the rest of the community platform.\", \"post_time\": \"2014-06-01 15:33:36\" },\n\t{ \"post_id\": 5794, \"topic_id\": 1336, \"forum_id\": 14, \"post_subject\": \"Re: Are all end-user services available without ESP?\", \"username\": \"Lotus\", \"post_text\": \"Thanks.Another questions:\\nAccording to: http://hpccsystems.com/products-and-ser ... odules,ESP is purchasing Required.\\n1,Does this mean user must purchase it before giving hpcc a try? \\n2,But it seems I can use ECL IDE do something now,am I using some temporary way to connect to hpcc?\\n3,Or I am using a real ESP to access hppc,but being during some trial period?\", \"post_time\": \"2014-06-01 13:14:55\" },\n\t{ \"post_id\": 5793, \"topic_id\": 1336, \"forum_id\": 14, \"post_subject\": \"Re: Are all end-user services available without ESP?\", \"username\": \"gsmith\", \"post_text\": \"Some history - \\nWhen the HPCC Platform was originally being developed the popular way to build client server communication layers was to use technologies like CORBA, RPC and DCOM, essentially they had matching client and server objects which allowed the client programmer to instantiate an object and call its methods without worrying about "where" the code was executed (on the server). They had many issues:\\n\\nComplex\\nHard to manage version changes\\nExpected 100% connectivity\\n
\\n\\nThen along came SOAP and Web Services, which essentially solved a lot of these problems.\\n\\nSo back to ESP:\\nESP started off life as a simple Web Services server, basically it could "talk" directly to the server components and expose an API to the clients via SOAP. Today it not only supports SOAP, but also HTTP Get, POST, JSON and REST type services.\\n\\nIt also has three main jobs:\\n1. To expose an API for all the client tools.\\n2. To expose an API for all published queries (written in ECL).\\n3. To work as a regular web server for things like ECL Watch.\\n\\nFinally to answer you question:\\nAll the client tools (ECL IDE, Eclipse ECL Plugin, new ECL Watch, DFU Plus, ECl PLus etc.) talk to the Platform with SOAP/REST style calls via the ESP. ESP Itself doesn't really provide any GUIs as such (not 100% true).\", \"post_time\": \"2014-06-01 12:22:05\" },\n\t{ \"post_id\": 5792, \"topic_id\": 1336, \"forum_id\": 14, \"post_subject\": \"Are all end-user services available without ESP?\", \"username\": \"Lotus\", \"post_text\": \"I have watched three videos about hpcc architecture.Just wondering:\\n1,Are all end-user services(Query builder,ECL Direct...etc.) available without ESP?If so,does this mean the main function of esp is packaging all these into more user friendly GUIs,and user still can access these services by other approaches like comment line? Or\\n2,All these services are only available by ESP?\", \"post_time\": \"2014-06-01 10:59:54\" },\n\t{ \"post_id\": 5814, \"topic_id\": 1337, \"forum_id\": 14, \"post_subject\": \"Re: What is difference between two ESPs?\", \"username\": \"Lotus\", \"post_text\": \"Thanks,and frustrated by this strategy.\", \"post_time\": \"2014-06-02 17:24:27\" },\n\t{ \"post_id\": 5806, \"topic_id\": 1337, \"forum_id\": 14, \"post_subject\": \"Re: What is difference between two ESPs?\", \"username\": \"arjuna chala\", \"post_text\": \"The ESP that is available for purchase is the complete C++ framework needed to create the services layer. The framework includes auto generated functionality to bind to Roxie services, integrates logging, billing and security.\\n\\n\\nThe ESP framework that is included with the current community version of the software refers to two items:\\n\\n1. The already compiled services to perform common tasks like the functions provided as part of ECL Watch\\n2. A lightweight framework to create the services (no binding to Roxie services, logging, billing etc.)\", \"post_time\": \"2014-06-02 13:46:12\" },\n\t{ \"post_id\": 5804, \"topic_id\": 1337, \"forum_id\": 14, \"post_subject\": \"Re: What is difference between two ESPs?\", \"username\": \"Lotus\", \"post_text\": \"Thanks,I thank my question didn't get understood clearly.\\n \\n I actually have checked that page and these two:\\n http://hpccsystems.com/products-and-ser ... ts/modules, \\n (Differences about two versions,it isn't small)\\n http://hpccsystems.com/products-and-ser ... atform-esp \\n (Introduction about ESP,and the stuffs here are attractive).\\n \\n And after checking the architecture of hpcc from here:\\n http://cdn.hpccsystems.com/whitepapers/ ... n_HPCC.pdf.\\n\\n I feel,if ESP isn't free,and all services provided by ESP can't be accessed by alternative ways,then almost all good stuffs that attract me is just gone,and community version is actually a relative limited basic version.\\n So I want to confirm this first.\", \"post_time\": \"2014-06-02 13:19:59\" },\n\t{ \"post_id\": 5800, \"topic_id\": 1337, \"forum_id\": 14, \"post_subject\": \"Re: What is difference between two ESPs?\", \"username\": \"bforeman\", \"post_text\": \"Hi Lotus,\\n\\nRegarding ESP there is not much difference that I can see between the Community and Enterprise versions.\\n\\nThis page might be helpful:\\n\\nhttp://hpccsystems.com/products-and-services/products/ee-ce-comparison\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-06-02 12:19:42\" },\n\t{ \"post_id\": 5797, \"topic_id\": 1337, \"forum_id\": 14, \"post_subject\": \"What is difference between two ESPs?\", \"username\": \"Lotus\", \"post_text\": \"I have asked a question here:\\nviewtopic.php?f=14&t=1336\\n\\nIt leads to this question:\\nWhat are differences between esp included in community version and Enterprise Service Platform (ESP)?\\n\\nCan I understand it like this:they provide some services essentially,and Enterprise Service Platform (ESP) package all services into more user friendly GUIs,and user still can access these services by other approaches like comment line?\", \"post_time\": \"2014-06-01 23:43:04\" },\n\t{ \"post_id\": 5867, \"topic_id\": 1348, \"forum_id\": 14, \"post_subject\": \"Re: HPCC platform update\", \"username\": \"jweeks\", \"post_text\": \"We have a package for Ubuntu 12.04 LTS (Version 4.2.4-3) at http://hpccsystems.com/download/free-co ... r-platform\\n\\nHPCC Platform Ubuntu 12.04 LTS\\nRelease Date: 05/07/2014\\nUbuntu 64bit\", \"post_time\": \"2014-06-11 15:02:45\" },\n\t{ \"post_id\": 5865, \"topic_id\": 1348, \"forum_id\": 14, \"post_subject\": \"HPCC platform update\", \"username\": \"adidassler2011\", \"post_text\": \"At my university, the HPCC platform was installed on Ubuntu 12.04.3 LTS (version 3.2.0-45) using source code. I need to update the server because it is running version 3.6.1 and I’m using 4.2.4 client tools for my ECL IDE. Is there a way to update hpcc on the cluster or do I have to re-download and build again?\", \"post_time\": \"2014-06-10 19:26:03\" },\n\t{ \"post_id\": 5923, \"topic_id\": 1357, \"forum_id\": 14, \"post_subject\": \"Re: HPCC authentication\", \"username\": \"grphilar\", \"post_text\": \"Thanks for that suggestion! I'm able to enable authentication using htpasswd.\", \"post_time\": \"2014-06-19 16:02:22\" },\n\t{ \"post_id\": 5913, \"topic_id\": 1357, \"forum_id\": 14, \"post_subject\": \"Re: HPCC authentication\", \"username\": \"fernando\", \"post_text\": \"Configure (authentication) prior to creating the image. Using configmgr on the authentication tab for the esp component.\\n\\nhtpassword method would probably work best in this case. It defaults to "/etc/HPCCSystems/.htpasswd" on disk.\\n\\nOnce configured to use authentication save and create the image.\\n \\nSet up the image with a default password , which the users could probably change after the initial login by modifying the <.htpassword> on disk.\", \"post_time\": \"2014-06-19 13:37:46\" },\n\t{ \"post_id\": 5911, \"topic_id\": 1357, \"forum_id\": 14, \"post_subject\": \"Re: HPCC authentication\", \"username\": \"grphilar\", \"post_text\": \"Hi Gleb,\\n\\nThe intention is to have users spawn their own HPCC instances in the cloud, and that those cloud instances should have authentication enabled, so that only the authorized user can access port 8010.\\n\\nThanks!\", \"post_time\": \"2014-06-18 21:27:23\" },\n\t{ \"post_id\": 5909, \"topic_id\": 1357, \"forum_id\": 14, \"post_subject\": \"Re: HPCC authentication\", \"username\": \"Gleb Aronsky\", \"post_text\": \"Hi grphilar,\\n\\nCan you please elaborate a bit more on what you are trying to do? Based on your post you seem to indicate that you have HPCC up and running, suggesting that you have a working configuration. Is the intention to have users spawn their own HPCC instances in the cloud, and that those cloud instance should have authentication enabled? Or, are multiple users just accessing existing spawned images, that require them to authenticate before using them? \\n\\n-Gleb\", \"post_time\": \"2014-06-18 21:10:49\" },\n\t{ \"post_id\": 5906, \"topic_id\": 1357, \"forum_id\": 14, \"post_subject\": \"HPCC authentication\", \"username\": \"grphilar\", \"post_text\": \"Hi,\\n\\nAt my university, I'm working on installing an HPCC image that any authorized user can boot in a cloud environment. I've created the image on RHEL 6.0 and users are able to boot a single node instance successfully. I'm having issues enabling authentication on the system. The steps in the installation guide require the user to launch the configuration manager to enable authentication. I'd like to do this without the user's involvement i.e the user should be first redirected to an authentication screen when he/she tries to access ecl_watch or any other subsystem.\\n\\nI'd appreciate some help here.\\n\\nThanks,\\nGautam\", \"post_time\": \"2014-06-18 20:11:34\" },\n\t{ \"post_id\": 6003, \"topic_id\": 1369, \"forum_id\": 14, \"post_subject\": \"Re: HPCC authentication using LDAP\", \"username\": \"william.whitehead\", \"post_text\": \"You do have to choose either HTPASSWD or LDAP. However, if you choose LDAP and only want to use it for authentication, you do not have to configure groups, access rights, file scopes, etc in the ECLWatch Administrator screens. Just add the users through the ECLWatch "Users" interface and that's it. The other OUs specified in the configuration (groups, modules, files, etc) will be created, but for the most part they won't be populated, except of course the ou=users branch.\", \"post_time\": \"2014-07-01 14:31:18\" },\n\t{ \"post_id\": 5963, \"topic_id\": 1369, \"forum_id\": 14, \"post_subject\": \"Re: HPCC authentication using LDAP\", \"username\": \"bforeman\", \"post_text\": \"Comments from our Senior Systems Administrator:\\n\\nAFAIK it’s an all or nothing thing.\\n\\nDoes you have an Active Directory to use? If yes you need a Domain Admin account in order to create OUs etc.\\n\\nYou will probably get a lot of push back from the Windows Admins if the AD is a shared resource. \\n\\n
\\n\\nHope this helps,\\n\\nBob\", \"post_time\": \"2014-06-25 12:12:05\" },\n\t{ \"post_id\": 5954, \"topic_id\": 1369, \"forum_id\": 14, \"post_subject\": \"HPCC authentication using LDAP\", \"username\": \"grphilar\", \"post_text\": \"Hi,\\n\\nI'm trying to set up HPCC single node instances as VMs in a cloud environment. I have enabled authentication through htpasswd but will like to use LDAP too. I am thinking of using LDAP only for user authentication and not for any other services. In going through the installation guide, it looks like the LDAP section encompasses much more information than I actually need which can be misleading at times. So in short, I need some guidance setting up LDAP only for user authentication. Thoughts?\\n\\nThanks!\", \"post_time\": \"2014-06-24 15:51:44\" },\n\t{ \"post_id\": 6048, \"topic_id\": 1389, \"forum_id\": 14, \"post_subject\": \"Re: Installation of single node\", \"username\": \"clo\", \"post_text\": \"Hi,\\n\\nI'm sorry to hear that you're having some issues getting things up and running.\\n\\nA couple of things I'd like to point out. \\n\\nIn regards to the HPCCSystems Platform, we currently support several distributions of Linux including centos/red-hat and ubuntu. However, we do not provide the package for Windows. \\nIf you'd like to run the platform inside a VM on a Windows machine, the quick way is to download a VM with the HPCCSystems Platform code already installed on it. All you'd need to do is start up the VM in either VMWare or VirtualBox. The VM Image can be found here http://hpccsystems.com/download/hpcc-vm-image\\n\\nRegarding the ability to compile locally inside ECL IDE on a Windows machine, this requires the use of a c++ compiler on the Windows machine you're working on. However, if you downloaded the VM Image, you'd be able to connect the ECL IDE with the VM and compile jobs that way.\\n\\nHere is the documentation that can help with this process: http://hpccsystems.com/download/docs/running-hpcc-vm\\n\\nAdditionally, please make sure you have the version of the ECL IDE that matches the platform. This can be found here: http://hpccsystems.com/download/free-co ... on/ecl-ide\\n\\n\\nI hope this helps.\", \"post_time\": \"2014-07-14 18:46:31\" },\n\t{ \"post_id\": 6047, \"topic_id\": 1389, \"forum_id\": 14, \"post_subject\": \"Installation of single node\", \"username\": \"BIG.DATA.ANALYTICS.CONSULTANT\", \"post_text\": \"I installed ECL 2011 and then worked enough to get ECL education but this time I could not make ECL IDE to work (can edit but can not compile c++ code) what is mandatory in order to execute code. I suspect in to much choice of different unix and windows. I have windows 8 4 GB laptop with plan to install solid state disk if I make ECL to work. what is recommendation for download where my system will work. To many choices do not work for me. I need list of download steps from someone who made it to work. I am interested in developing ECL code. I am interested also to be partner in company selling "TURN KEY SINGLE NODE SYSTEM" for people like me (Laptop/Surface/ or USB). I am mainframe developer for 45 years and I am not interested to learn UNIX and Windows more than I need.\\n\\nVladimir.vujic@aol.com\", \"post_time\": \"2014-07-14 16:50:47\" },\n\t{ \"post_id\": 6070, \"topic_id\": 1394, \"forum_id\": 14, \"post_subject\": \"Re: How to get C++ compiler switches to it.\", \"username\": \"ghalliday\", \"post_text\": \"See comment on https://track.hpccsystems.com/browse/HPCC-11985\", \"post_time\": \"2014-07-21 12:23:08\" },\n\t{ \"post_id\": 6064, \"topic_id\": 1394, \"forum_id\": 14, \"post_subject\": \"How to get C++ compiler switches to it.\", \"username\": \"tlhumphrey2\", \"post_text\": \"I can do the following on eclccserver:\\n\\neclcc -platform=hthor embedjson.ecl -Wc,-Igithub/rapidjson/include -Wc,-D__STDC_CONSTANT_MACROS -shared\\n\\necl run --target=hthor liba.out.so
\\nand the program runs correctly.\\n\\nBut, I want to be able to place my program, embedjson.ecl, into ECL IDE and run it. What must I do so I can run this program from my ECL IDE?\\n\\nBy the way, I know I have to put the rapidjson/includes on eclccserver and have done so. But, I don’t know what to do about “-Wc,-D__STDC_CONSTANT_MACROS”. I tried to add it as an argument to eclcc in the compiler tab of perferences of my ECL IDE, but that didn’t work.\\n\\nAny ideas?\", \"post_time\": \"2014-07-18 13:38:30\" },\n\t{ \"post_id\": 6855, \"topic_id\": 1420, \"forum_id\": 14, \"post_subject\": \"Re: Single Node Installation Error\", \"username\": \"jmritz\", \"post_text\": \"I fixed this error by installing the correct rpm version for my OS! Please make sure that you know what version your OS is. \", \"post_time\": \"2015-01-26 13:54:44\" },\n\t{ \"post_id\": 6318, \"topic_id\": 1420, \"forum_id\": 14, \"post_subject\": \"Re: Single Node Installation Error\", \"username\": \"shank\", \"post_text\": \"in /etc/HPCCSystems/environment.conf file, I have replaced * in interface=* with IP of RHEL VM. Even then I'm getting the same error "no components configured to run on this node".\\n\\nAlso, I have tried to run configmgr utility using the command sudo /opt/HPCCSystems/sbin/configmgr for editing interface in environment.conf file.\\n\\nPFB the error I got:\\n\\nVerifying configmgr startup ...Failure\\nExiting configMgr\\n/opt/HPCCSystems/bin/start-stop-daemon: warning: failed to kill 48729: No such process\\n1 pids were not killed\\nNo process in pidfile '/var/run/HPCCSystems/configmgr_init.pid' found running; none killed\", \"post_time\": \"2014-09-17 12:41:42\" },\n\t{ \"post_id\": 6220, \"topic_id\": 1420, \"forum_id\": 14, \"post_subject\": \"Re: Single Node Installation Error\", \"username\": \"Gleb Aronsky\", \"post_text\": \"The error indicates that based on the active configuration (usually in /etc/HPCCSystems/environment.xml) the IP address of current box does not match any IP in the HPCC System configuration. In other words, no HPCC process is configured to run on that box.\\n\\nI suspect you may have multiple NICs. In environment.conf (/etc/HPCCSystems/environment.conf) find ‘interface=*’ and replace the ‘*’ with the appropriate NIC. Leaving it as ‘*’ selects the first card.\", \"post_time\": \"2014-08-15 13:56:09\" },\n\t{ \"post_id\": 6216, \"topic_id\": 1420, \"forum_id\": 14, \"post_subject\": \"Single Node Installation Error\", \"username\": \"jmritz\", \"post_text\": \"I am trying to install hpcc on my CentOS 6 box. I ran the rpm as follows:\\n\\nsudo rpm -Uvh --nodeps hpccsystems-platform_community-5.0.0-3.el5.x86_64.rpm\\n\\nEverything install OK, but when I try to start the service, I get "There are no components configured to run on this node..."\\n\\nAny ideas?\\n\\nThanks,\\n\\nJoe\", \"post_time\": \"2014-08-13 15:55:40\" },\n\t{ \"post_id\": 6321, \"topic_id\": 1444, \"forum_id\": 14, \"post_subject\": \"Re: Clean Ubuntu14.04 Install Fails\", \"username\": \"BobSmith\", \"post_text\": \"In case anyone else hits this error:\\nThe error was a simple mismatch between the deb package and ubuntu version (The ubuntu version I picked on the Azure cloud was not a "pure" 12.04 version - not even close as it turned out!)\\n\\nBob.\", \"post_time\": \"2014-09-18 14:54:33\" },\n\t{ \"post_id\": 6301, \"topic_id\": 1444, \"forum_id\": 14, \"post_subject\": \"Re: Clean Ubuntu14.04 Fails\", \"username\": \"BobSmith\", \"post_text\": \"Hmmm - I am now unable to remove the broken install:\\n\\nRemoving hpccsystems-platform (5.0.03) ...\\ndpkg: error processing package hpccsystems-platform (--remove):\\n subprocess installed post-removal script returned error exit status 1\\nErrors were encountered while processing:\\n hpccsystems-platform\\n\\nBob.\", \"post_time\": \"2014-09-13 14:26:34\" },\n\t{ \"post_id\": 6300, \"topic_id\": 1444, \"forum_id\": 14, \"post_subject\": \"Clean Ubuntu14.04 Install Fails\", \"username\": \"BobSmith\", \"post_text\": \"Installing HPCC for Ubuntu 14.04 is failing with the following errors:\\n\\ndpkg: dependency problems prevent configuration of hpccsystems-platform:\\n hpccsystems-platform depends on libboost-regex1.40.0; however:\\n Package libboost-regex1.40.0 is not installed.\\n hpccsystems-platform depends on libicu42; however:\\n Package libicu42 is not installed.\\n hpccsystems-platform depends on libxalan110; however:\\n Package libxalan110 is not installed.\\n hpccsystems-platform depends on libarchive1; however:\\n Package libarchive1 is not installed.\\n\\nAttempting to install the latest boost (as 1.40 isn't available in the repo) produces the following error:\\n\\nhpccsystems-platform : Depends: libboost-regex1.40.0 but it is not installable\\n Depends: libicu42 but it is not installable\\n Depends: libxalan110 but it is not installable\\n Depends: libarchive1 but it is not installable\\n\\nBob.\", \"post_time\": \"2014-09-13 14:23:34\" },\n\t{ \"post_id\": 6375, \"topic_id\": 1450, \"forum_id\": 14, \"post_subject\": \"Re: SSL 256-bit encryption\", \"username\": \"anthony.fishbeck\", \"post_text\": \"After a bit more testing using ESPs running on both AWS and linux, and browsers on linux and windows, it clearly seems to come down to the precedence order the browser gives to the ciphers. Windows browsers negotiated with a preference for some 128 bit ciphers above 256 bit ones and I ended up with 128 bit connections unless I forced it through browser config.\\n\\nUsing linux browsers I ended up with 256 bit connections without changing anything.\\n\\nThe browsers may be ranking for performance, or some ciphers may be considered to have safer schemes even if their encryption is not as strong, I'm not sure.\", \"post_time\": \"2014-09-24 14:53:23\" },\n\t{ \"post_id\": 6370, \"topic_id\": 1450, \"forum_id\": 14, \"post_subject\": \"Re: SSL 256-bit encryption\", \"username\": \"anthony.fishbeck\", \"post_text\": \"No, I was just narrowing down the options for an explicit test. By default my Ubuntu firefox seems to negotiate Camellia-256 and my Ubuntu chrome negotiates AES_256_CBC.\\n\\nThis website will tell you what your browser is listing as ciphers (in order of it's preference).\\n\\nhttps://www.ssllabs.com/ssltest/viewMyClient.html\", \"post_time\": \"2014-09-22 19:22:15\" },\n\t{ \"post_id\": 6369, \"topic_id\": 1450, \"forum_id\": 14, \"post_subject\": \"Re: SSL 256-bit encryption\", \"username\": \"lpezet\", \"post_text\": \"I'm using the HPCC Instant Cloud for AWS, with Community Edition 4.2.2-1.\\nI'm not trying to use any specific ciphers.\\nUsing Chrome on Mac to access ESP over HTTPS just gave me an AES128 encryption by default (Chrome can use AES-256 just fine with other web sites).\\n\\nAre you saying you had to force Firefox to do 256-bit encryption?\\n\\n\\nThanks for your help!\\nLuc.\", \"post_time\": \"2014-09-22 18:27:39\" },\n\t{ \"post_id\": 6368, \"topic_id\": 1450, \"forum_id\": 14, \"post_subject\": \"Re: SSL 256-bit encryption\", \"username\": \"anthony.fishbeck\", \"post_text\": \"On my Ubuntu 12.10 box, with ESP HTTPS configured following the steps in the PDF, I was able to use AES256-SHA.\\n\\nI tested with both the openssl s_client and by configuring firefox to only use that specific cipher.\\n\\nCan you tell me which OS you are using, how you are testing, and which specific ciphers you are trying to use?\\n\\nRegards,\\nTony\", \"post_time\": \"2014-09-22 17:53:09\" },\n\t{ \"post_id\": 6324, \"topic_id\": 1450, \"forum_id\": 14, \"post_subject\": \"SSL 256-bit encryption\", \"username\": \"lpezet\", \"post_text\": \"Hello!\\n\\nI setup HTTPS for ESP a while back and it works great!\\nOne thing I noticed though is that the SSL is "only" (AES) 128-bit encrypted.\\nI see on the server openssl has ciphers and all for AES 256 bits yet it doesn't seem to be using it...?\\n\\nDid I miss something in HTTPS setup?\\nI used the following PDF (or something like it...looks like it's now for 5.0 but I have the Community Edition 4.2.2 and I was using a similar doc at the time I setup HTTPs).\\nhttp://cdn.hpccsystems.com/releases/CE- ... .0.0-1.pdf\\n\\nThanks!\\nLuc.\", \"post_time\": \"2014-09-18 17:18:02\" },\n\t{ \"post_id\": 6343, \"topic_id\": 1455, \"forum_id\": 14, \"post_subject\": \"Re: HPCC installation error\", \"username\": \"ming\", \"post_text\": \"Does HPCC Platform install run successfully without error?\\nFor HedRed/CentOS if installing hpccsystems-platform-community_with_plugins-xxx make sure using rpm option "--nodeps". Otherwise install will fail or incomplete.\", \"post_time\": \"2014-09-19 15:43:31\" },\n\t{ \"post_id\": 6336, \"topic_id\": 1455, \"forum_id\": 14, \"post_subject\": \"HPCC installation error\", \"username\": \"shank\", \"post_text\": \"While starting the HPCC server in RHEL VM using the command, \\nsudo /sbin/service hpcc-init start I am getting the below error \\n\\n
No components configured to run on this node
.\\n\\nI have tried tried changing the interface=* value in /etc/HPCCSystems/environment.conf file using configmgr utility. But got the below error while starting(sudo /opt/HPCCSystems/sbin/configmgr) the utility\\n\\nVerifying configmgr startup ...Failure\\nExiting configMgr\\n/opt/HPCCSystems/bin/start-stop-daemon: warning: failed to kill 48729: No such process\\n1 pids were not killed\\nNo process in pidfile '/var/run/HPCCSystems/configmgr_init.pid' found running; none killed.\\n\\nIs this because I am trying to installing HPCC in virtual machine?\", \"post_time\": \"2014-09-19 08:42:11\" },\n\t{ \"post_id\": 6383, \"topic_id\": 1461, \"forum_id\": 14, \"post_subject\": \"Re: HPCC on Microsoft Azure\", \"username\": \"David Dasher\", \"post_text\": \"Fantastic. \\n\\nThank you. \\n\\nDavid\", \"post_time\": \"2014-09-26 15:04:05\" },\n\t{ \"post_id\": 6382, \"topic_id\": 1461, \"forum_id\": 14, \"post_subject\": \"Re: HPCC on Microsoft Azure\", \"username\": \"tlhumphrey2\", \"post_text\": \"That folder, temp, is in the list of directories of environment.xml, see below.\\n\\n <Directories name="HPCCSystems">\\n <Category dir="/var/log/[NAME]/[INST]" name="log"/>\\n <Category dir="/var/lib/[NAME]/[INST]" name="run"/>\\n <Category dir="/etc/[NAME]/[INST]" name="conf"/>\\n <Category dir="/var/lib/[NAME]/[INST]/temp" name="temp"/>\\n <Category dir="/var/lib/[NAME]/hpcc-data/[COMPONENT]" name="data"/>\\n <Category dir="/var/lib/[NAME]/hpcc-data2/[COMPONENT]" name="data2"/>\\n <Category dir="/var/lib/[NAME]/hpcc-data3/[COMPONENT]" name="data3"/>\\n <Category dir="/var/lib/[NAME]/hpcc-data4/[COMPONENT]" name="data4"/>\\n <Category dir="/var/lib/[NAME]/hpcc-mirror/[COMPONENT]" name="mirror"/>\\n <Category dir="/var/lib/[NAME]/queries/[INST]" name="query"/>\\n <Category dir="/var/lock/[NAME]/[INST]" name="lock"/>\\n </Directories>\\n\\nSo, I'm thinking you should be able to set that directory's IP address to that of the SDD's using HPCCSystem/configmgr.\", \"post_time\": \"2014-09-26 14:52:32\" },\n\t{ \"post_id\": 6379, \"topic_id\": 1461, \"forum_id\": 14, \"post_subject\": \"HPCC on Microsoft Azure\", \"username\": \"David Dasher\", \"post_text\": \"Hello all\\n\\nOur HPCC platform is hosted on Microsoft Azure. Azure now offers SSD drives for local temp space (Non persisted), how can I configure my platform to use the SSDs (or a folder on it) for all my Intermediate spill and/or persist files?\\n\\nKind regards\\n\\nDavid\", \"post_time\": \"2014-09-26 10:39:43\" },\n\t{ \"post_id\": 6465, \"topic_id\": 1472, \"forum_id\": 14, \"post_subject\": \"Re: HPCC Server on Linux VM\", \"username\": \"fernando\", \"post_text\": \"Hi Biswanath-\\n\\nI recommend you work out the connectivity issues prior to installing the HPCC rpm.\\n\\nAssuming they have valid IPs and you can ssh from one VM to another you should be fine.\\n\\nIOW make sure you Virtual Machines are on the network first, then install your HPCC rpm. \\n\\n\\nHTH\\n\\nF\", \"post_time\": \"2014-10-16 16:07:11\" },\n\t{ \"post_id\": 6464, \"topic_id\": 1472, \"forum_id\": 14, \"post_subject\": \"Re: HPCC Server on Linux VM\", \"username\": \"tlhumphrey2\", \"post_text\": \"Yes. This is possible. The following link shows a video telling how to do it.\\n\\nhttp://hpccsystems.com/community/training-videos/hpcc-linux\\n\\nThe number of nodes your system has will be limited.\", \"post_time\": \"2014-10-16 16:03:29\" },\n\t{ \"post_id\": 6461, \"topic_id\": 1472, \"forum_id\": 14, \"post_subject\": \"Re: HPCC Server on Linux VM\", \"username\": \"biswanath_c\", \"post_text\": \"Thanks for the inputs. But I would like to know if a full HPCC server setup (and not the HPCC VM) can be done inside a Linux VM (Linux VM running on a Windows server and not an actual Linux server). \\n\\nI want an actual HPCC server installation (and not a HPCC VM) to be done inside a Linux VM (and not an actual Linux OS). And I would like to access this HPCC server from another Windows machine (present in the same LAN as the Windows server running the Linux VM).\\n\\nIs this kind of setup possible?\\n\\nThanks\\nBiswanath\", \"post_time\": \"2014-10-16 15:29:22\" },\n\t{ \"post_id\": 6439, \"topic_id\": 1472, \"forum_id\": 14, \"post_subject\": \"Re: HPCC Server on Linux VM\", \"username\": \"tlhumphrey2\", \"post_text\": \"The following link will give you more information: http://hpccsystems.com/download/hpcc-vm-image.\", \"post_time\": \"2014-10-14 14:32:36\" },\n\t{ \"post_id\": 6435, \"topic_id\": 1472, \"forum_id\": 14, \"post_subject\": \"Re: HPCC Server on Linux VM\", \"username\": \"rtaylor\", \"post_text\": \"Yes. Our VM runs in VMware or Virtual Box on Windows.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-10-14 13:30:02\" },\n\t{ \"post_id\": 6407, \"topic_id\": 1472, \"forum_id\": 14, \"post_subject\": \"HPCC Server on Linux VM\", \"username\": \"biswanath_c\", \"post_text\": \"Hi,\\n\\nI have a basic question. Can HPCC server be setup on a Linux Virtual Machine (running on Windows server)? Is this kind of setup possible?\\n\\nThanks\\nBiswa\", \"post_time\": \"2014-10-08 14:55:09\" },\n\t{ \"post_id\": 6422, \"topic_id\": 1476, \"forum_id\": 14, \"post_subject\": \"Re: how to launch a small node?\", \"username\": \"tlhumphrey2\", \"post_text\": \"If you are using windows then VMware is the way to go. Appendix A of the documentation gives you very specific details about how to download the vm player and how to download Ubuntu 12.04 vm image.\", \"post_time\": \"2014-10-10 12:29:44\" },\n\t{ \"post_id\": 6417, \"topic_id\": 1476, \"forum_id\": 14, \"post_subject\": \"Re: how to launch a small node?\", \"username\": \"chennapooja\", \"post_text\": \"Thank you...\\n\\nBut I have a doubt, is Juju charm applicable only for Ubuntu Linux machine? If I am using windows, then should I use VMware? Is my understanding correct, can you please confirm?\", \"post_time\": \"2014-10-09 17:36:19\" },\n\t{ \"post_id\": 6416, \"topic_id\": 1476, \"forum_id\": 14, \"post_subject\": \"Re: how to launch a small node?\", \"username\": \"tlhumphrey2\", \"post_text\": \"I assume you are using InstantCloud to deploy your HPCC System to AWS. If so, there is no way to change the instance type with InstantCloud.\\n\\nI attempted to attach a document that tells you how to use the HPCC Charm with juju charm to configure and deploy an HPCC System to AWS (With it you can change the instance type as well as other things). But, the file was too large to attach. Only files smaller than 512KB can be attached. So, I've placed it on github in the following repository, https://github.com/tlhumphrey2/HPCCtS3. \\n\\nThe file is a pdf document called UsingHPCCCharm.pdf. This document gives you a lot of detail about how to use HPCC Charm with juju charm to configure and deploy an HPCC System to AWS. Plus, it tells you how to setup for using it.\", \"post_time\": \"2014-10-09 17:10:07\" },\n\t{ \"post_id\": 6415, \"topic_id\": 1476, \"forum_id\": 14, \"post_subject\": \"how to launch a small node?\", \"username\": \"chennapooja\", \"post_text\": \"Hello,\\n\\n I am using Amazon AWS services for launching a micro instance of type t2. Now, when I use my security credentials and login using aws.hpcc website, I do not find an option to launch small nodes, it always launches large nodes and I am incurring charges for using amazon large nodes. \\n\\n I am a student and learning ECL ML, so I do not need large instances, small, micro instances are enough for me, can someone help me in installing and launching a micro instance using hpcc.\\n\\n Thanks in advance.\", \"post_time\": \"2014-10-09 16:16:13\" },\n\t{ \"post_id\": 6445, \"topic_id\": 1480, \"forum_id\": 14, \"post_subject\": \"Re: Function for calculating CRC\", \"username\": \"rtaylor\", \"post_text\": \"The function call itself is probably somewhere in the DFU code in GitHub. That would be my next place to look \", \"post_time\": \"2014-10-14 18:28:47\" },\n\t{ \"post_id\": 6444, \"topic_id\": 1480, \"forum_id\": 14, \"post_subject\": \"Re: Function for calculating CRC\", \"username\": \"tlhumphrey2\", \"post_text\": \"I'm looking for the function that is executed by dfuplus when it created the metadata xml file for a logical file. When dfuplus does this, it calculates the CRC for the logical file and all its file parts.\\n\\nAs I mentioned earlier, I thought this might be crc32 or cksum (both command line utilities found in Linux). But, both of these give a different crc than what I see in a metadata xml file.\", \"post_time\": \"2014-10-14 18:08:08\" },\n\t{ \"post_id\": 6443, \"topic_id\": 1480, \"forum_id\": 14, \"post_subject\": \"Re: Function for calculating CRC\", \"username\": \"rtaylor\", \"post_text\": \"Tim,\\n\\nDid you try our HASHCRC function?\\n\\nRichard\", \"post_time\": \"2014-10-14 17:35:55\" },\n\t{ \"post_id\": 6441, \"topic_id\": 1480, \"forum_id\": 14, \"post_subject\": \"Function for calculating CRC\", \"username\": \"tlhumphrey2\", \"post_text\": \"The metadata xml file for a logical file on THOR provides the CRC for the logical file and all its file parts. What function is used to calculate that CRC? I thought it might be crc32 or cksum, but both give values different than the values in the metadata file.\", \"post_time\": \"2014-10-14 15:48:03\" },\n\t{ \"post_id\": 6609, \"topic_id\": 1513, \"forum_id\": 14, \"post_subject\": \"Re: WsSQL_ESP_Services\", \"username\": \"bobl\", \"post_text\": \"Thans, It's works now\", \"post_time\": \"2014-11-19 06:18:37\" },\n\t{ \"post_id\": 6605, \"topic_id\": 1513, \"forum_id\": 14, \"post_subject\": \"Re: WsSQL_ESP_Services\", \"username\": \"DSC\", \"post_text\": \"configmgr works on only one copy of environment.xml (or whatever you call the configuration file you're editing), located on the same system you accessed configrmgr. Once you make changes, you need to copy the changed environment.xml to all other nodes in your cluster. All of this is best done when HPCC is shutdown (though you can do otherwise if you know what you're doing), then you start the cluster back up after the configuration file has been propagated.\\n\\nAlso, port 8015 is open only when configmgr is running. Normally, it is not: You shut it down after you make changes save the file.\\n\\nDid you do all that? It sounds like you may have missed a step somewhere.\\n\\nDan\", \"post_time\": \"2014-11-18 11:59:13\" },\n\t{ \"post_id\": 6603, \"topic_id\": 1513, \"forum_id\": 14, \"post_subject\": \"WsSQL_ESP_Services\", \"username\": \"bobl\", \"post_text\": \"Hi,\\n\\nI run into a problem Configure Using Wizard. I finished the configured the environment.xml and restart the system. but I can't access the WsSQL service in my borwser , I can only access the port 8015. See the attached screenshot.\", \"post_time\": \"2014-11-18 07:28:47\" },\n\t{ \"post_id\": 6622, \"topic_id\": 1520, \"forum_id\": 14, \"post_subject\": \"Re: RAID and file system - Recommendation\", \"username\": \"omnibuzz\", \"post_text\": \"Perfect. Thank you, Bob.\\n-Srini\", \"post_time\": \"2014-11-20 20:37:41\" },\n\t{ \"post_id\": 6621, \"topic_id\": 1520, \"forum_id\": 14, \"post_subject\": \"Re: RAID and file system - Recommendation\", \"username\": \"bforeman\", \"post_text\": \"Hi Srini,\\n\\nFrom the HPCC team:\\n\\nSoftware Raid 64k chunk \\nHardware 1 M\\nFor questions 2 and 3 we run ext4 with a 4K block size\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-11-20 20:20:22\" },\n\t{ \"post_id\": 6617, \"topic_id\": 1520, \"forum_id\": 14, \"post_subject\": \"RAID and file system - Recommendation\", \"username\": \"omnibuzz\", \"post_text\": \"I am installing HPCC in a linux box. I am looking at doing a RAID0 or RAID10 across 8 SSD drives.\\nIs there a recommended chuck size for the RAID setup I am thinking of doing a 64Kb now.\\nWhat about the filesystem. I am planning to use ext4 with blocksize of 4KB.\\n\\nJust wanted to see if there is any recommendations and suggestions here for:\\n1. RAID Chunk size (For mdadm RAID0 and RAID10)\\n2. File system preference\\n3. block size\\n\\nThanks\\nSrini\", \"post_time\": \"2014-11-20 16:24:48\" },\n\t{ \"post_id\": 6631, \"topic_id\": 1523, \"forum_id\": 14, \"post_subject\": \"Re: HPCC LDAP Authentication:ldap anonymous bind error\", \"username\": \"william.whitehead\", \"post_text\": \"Bob, thanks for the inquiry, hopefully we can get this straightened out. From what I see in the logfiles, the problem is that HPCC (both DALI and ESP) are unable to contact the LDAP Server\\n\\n"Failed to connect to Dali Server 192.168.230.157:7070."\\n"192.168.230.157:7070 Retrying..."\\n"Failed to connect to Dali Server 192.168.230.157:7070."\\n"Failed to connect to Dali Server 192.168.230.157:7070."\\n\\nI am sure you have tried the obvious things like pinging the LDAP server. Are you saying that it did work for several days and then stopped? Or were you never able to make them connect? If you use the Apache Directory Studio and attach to that LDAP Server, can you confirm whether HPCC was able to connect and create all the initial OUs (ou=users,ou=ecl and ou=groups,ou=ecl and others). If so it could be some type of network issue, perhaps you could look at the LDAP Server log to see why the connect is failing. Anyway let me know if it ever connected and we can start from there.\\n\\nRuss Whitehead\", \"post_time\": \"2014-11-24 20:49:55\" },\n\t{ \"post_id\": 6627, \"topic_id\": 1523, \"forum_id\": 14, \"post_subject\": \"HPCC LDAP Authentication:ldap anonymous bind error\", \"username\": \"bobl\", \"post_text\": \"Hi,\\n\\nI am trying to configure HPCC with LDAP authentication. After 2 days, I run into a problem the error says "ldap anonymous bind error (-1) - Can't contact LDAP server"\\n\\nmy esp server and ldap server config is as mentioned,\\n
<EspProcess build="_"\\n buildSet="esp"\\n componentfilesDir="/opt/HPCCSystems/componentfiles"\\n daliServers="mydali"\\n description="ESP server"\\n enableSEHMapping="true"\\n formOptionsAccess="false"\\n httpConfigAccess="true"\\n logLevel="1"\\n logRequests="false"\\n logResponses="false"\\n maxBacklogQueueSize="200"\\n maxConcurrentThreads="0"\\n maxRequestEntityLength="8000000"\\n name="myesp"\\n perfReportDelay="60"\\n portalurl="http://hpccsystems.com/download">\\n <Authentication htpasswdFile="/etc/HPCCSystems/.htpasswd"\\n ldapAuthMethod="simple"\\n ldapConnections="10"\\n ldapServer="ldapserver"\\n method="ldap"/>\\n <EspBinding defaultForPort="true"\\n defaultServiceVersion=""\\n name="smc"\\n port="8010"\\n protocol="http"\\n resourcesBasedn="ou=SMC,ou=EspServices,ou=ecl"\\n service="EclWatch"\\n workunitsBasedn="ou=workunits,ou=ecl"\\n wsdlServiceAddress="">\\n <Authenticate access="Read"\\n description="Root access to SMC service"\\n path="/"\\n required="Read"\\n resource="SmcAccess"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to SMC service"\\n path="SmcAccess"\\n resource="SmcAccess"\\n service="ws_smc"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to thor queues"\\n path="ThorQueueAccess"\\n resource="ThorQueueAccess"\\n service="ws_smc"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to roxie control commands"\\n path="RoxieControlAccess"\\n resource="RoxieControlAccess"\\n service="ws_smc"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to super computer environment"\\n path="ConfigAccess"\\n resource="ConfigAccess"\\n service="ws_config"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to DFU"\\n path="DfuAccess"\\n resource="DfuAccess"\\n service="ws_dfu"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to DFU XRef"\\n path="DfuXrefAccess"\\n resource="DfuXrefAccess"\\n service="ws_dfuxref"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to machine information"\\n path="MachineInfoAccess"\\n resource="MachineInfoAccess"\\n service="ws_machine"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to SNMP metrics information"\\n path="MetricsAccess"\\n resource="MetricsAccess"\\n service="ws_machine"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to remote execution"\\n path="ExecuteAccess"\\n resource="ExecuteAccess"\\n service="ws_machine"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to DFU workunits"\\n path="DfuWorkunitsAccess"\\n resource="DfuWorkunitsAccess"\\n service="ws_fs"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to DFU exceptions"\\n path="DfuExceptionsAccess"\\n resource="DfuExceptions"\\n service="ws_fs"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to spraying files"\\n path="FileSprayAccess"\\n resource="FileSprayAccess"\\n service="ws_fs"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to despraying of files"\\n path="FileDesprayAccess"\\n resource="FileDesprayAccess"\\n service="ws_fs"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to dkcing of key files"\\n path="FileDkcAccess"\\n resource="FileDkcAccess"\\n service="ws_fs"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to file upload"\\n path="FileUploadAccess"\\n resource="FileUploadAccess"\\n service="ws_fs"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to files in dropzone"\\n path="FileIOAccess"\\n resource="FileIOAccess"\\n service="ws_fileio"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to permissions for file scopes"\\n path="FileScopeAccess"\\n resource="FileScopeAccess"\\n service="ws_access"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to WS ECL service"\\n path="WsEclAccess"\\n resource="WsEclAccess"\\n service="ws_ecl"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to Roxie queries and files"\\n path="RoxieQueryAccess"\\n resource="RoxieQueryAccess"\\n service="ws_roxiequery"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to cluster topology"\\n path="ClusterTopologyAccess"\\n resource="ClusterTopologyAccess"\\n service="ws_topology"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to own workunits"\\n path="OwnWorkunitsAccess"\\n resource="OwnWorkunitsAccess"\\n service="ws_workunits"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to others' workunits"\\n path="OthersWorkunitsAccess"\\n resource="OthersWorkunitsAccess"\\n service="ws_workunits"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to ECL direct service"\\n path="EclDirectAccess"\\n resource="EclDirectAccess"\\n service="ecldirect"/>\\n </EspBinding>\\n <EspBinding defaultForPort="true"\\n defaultServiceVersion=""\\n name="ws_ecl"\\n port="8002"\\n protocol="http"\\n resourcesBasedn="ou=WsEcl,ou=EspServices,ou=ecl"\\n service="ws_ecl"\\n workunitsBasedn="ou=workunits,ou=ecl"\\n wsdlServiceAddress="">\\n <Authenticate access="Read"\\n description="Root access to WS ECL service"\\n path="/"\\n required="Read"\\n resource="WsEclAccess"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to WS ECL service"\\n path="WsEclAccess"\\n resource="WsEclAccess"\\n service="ws_ecl"/>\\n </EspBinding>\\n <EspBinding defaultForPort="true"\\n defaultServiceVersion=""\\n name="ws_sql"\\n port="8510"\\n protocol="http"\\n resourcesBasedn="ou=WsSql,ou=EspServices,ou=ecl"\\n service="ws_sql"\\n workunitsBasedn="ou=workunits,ou=ecl"\\n wsdlServiceAddress="">\\n <Authenticate access="Read"\\n description="Root access to WS SQL service"\\n path="/"\\n required="Read"\\n resource="WsSqlAccess"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to WS SQL service"\\n path="WsSQLAccess"\\n resource="WsSqlAccess"\\n service="ws_sql"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to own workunits"\\n path="OwnWorkunitsAccess"\\n resource="OwnWorkunitsAccess"\\n service="ws_sql"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to others' workunits"\\n path="OthersWorkunitsAccess"\\n resource="OthersWorkunitsAccess"\\n service="ws_sql"/>\\n </EspBinding>\\n <HTTPS acceptSelfSigned="true"\\n CA_Certificates_Path="ca.pem"\\n certificateFileName="certificate.cer"\\n city=""\\n country="US"\\n daysValid="365"\\n enableVerification="false"\\n organization="Customer of HPCCSystems"\\n organizationalUnit=""\\n passphrase=""\\n privateKeyFileName="privatekey.cer"\\n regenerateCredentials="false"\\n requireAddressMatch="false"\\n state=""\\n trustedPeers="anyone"/>\\n <Instance computer="localhost"\\n directory="/var/lib/HPCCSystems/myesp"\\n FQDN=""\\n name="s1"\\n netAddress="."/>\\n </EspProcess>\\n <EspService ActivityInfoCacheSeconds="10"\\n allowNewRoxieOnDemandQuery="false"\\n AWUsCacheTimeout="15"\\n build="_"\\n buildSet="espsmc"\\n description="ESP services for SMC"\\n disableUppercaseTranslation="false"\\n enableSystemUseRewrite="false"\\n excludePartitions="/,/dev*,/sys,/usr,/proc/*"\\n monitorDaliFileServer="false"\\n name="EclWatch"\\n pluginsPath="/opt/HPCCSystems/plugins"\\n syntaxCheckQueue=""\\n viewTimeout="1000"\\n warnIfCpuLoadOver="95"\\n warnIfFreeMemoryUnder="5"\\n warnIfFreeStorageUnder="5">\\n <Properties defaultPort="8010"\\n defaultResourcesBasedn="ou=SMC,ou=EspServices,ou=ecl"\\n defaultSecurePort="18010"\\n type="WsSMC">\\n <Authenticate access="Read"\\n description="Root access to SMC service"\\n path="/"\\n required="Read"\\n resource="SmcAccess"/>\\n <AuthenticateFeature description="Access to SMC service"\\n path="SmcAccess"\\n resource="SmcAccess"\\n service="ws_smc"/>\\n <AuthenticateFeature description="Access to thor queues"\\n path="ThorQueueAccess"\\n resource="ThorQueueAccess"\\n service="ws_smc"/>\\n <AuthenticateFeature description="Access to roxie control commands"\\n path="RoxieControlAccess"\\n resource="RoxieControlAccess"\\n service="ws_smc"/>\\n <AuthenticateFeature description="Access to super computer environment"\\n path="ConfigAccess"\\n resource="ConfigAccess"\\n service="ws_config"/>\\n <AuthenticateFeature description="Access to DFU"\\n path="DfuAccess"\\n resource="DfuAccess"\\n service="ws_dfu"/>\\n <AuthenticateFeature description="Access to DFU XRef"\\n path="DfuXrefAccess"\\n resource="DfuXrefAccess"\\n service="ws_dfuxref"/>\\n <AuthenticateFeature description="Access to machine information"\\n path="MachineInfoAccess"\\n resource="MachineInfoAccess"\\n service="ws_machine"/>\\n <AuthenticateFeature description="Access to SNMP metrics information"\\n path="MetricsAccess"\\n resource="MetricsAccess"\\n service="ws_machine"/>\\n <AuthenticateFeature description="Access to remote execution"\\n path="ExecuteAccess"\\n resource="ExecuteAccess"\\n service="ws_machine"/>\\n <AuthenticateFeature description="Access to DFU workunits"\\n path="DfuWorkunitsAccess"\\n resource="DfuWorkunitsAccess"\\n service="ws_fs"/>\\n <AuthenticateFeature description="Access to DFU exceptions"\\n path="DfuExceptionsAccess"\\n resource="DfuExceptions"\\n service="ws_fs"/>\\n <AuthenticateFeature description="Access to spraying files"\\n path="FileSprayAccess"\\n resource="FileSprayAccess"\\n service="ws_fs"/>\\n <AuthenticateFeature description="Access to despraying of files"\\n path="FileDesprayAccess"\\n resource="FileDesprayAccess"\\n service="ws_fs"/>\\n <AuthenticateFeature description="Access to dkcing of key files"\\n path="FileDkcAccess"\\n resource="FileDkcAccess"\\n service="ws_fs"/>\\n <AuthenticateFeature description="Access to file upload"\\n path="FileUploadAccess"\\n resource="FileUploadAccess"\\n service="ws_fs"/>\\n <AuthenticateFeature description="Access to files in dropzone"\\n path="FileIOAccess"\\n resource="FileIOAccess"\\n service="ws_fileio"/>\\n <AuthenticateFeature description="Access to permissions for file scopes"\\n path="FileScopeAccess"\\n resource="FileScopeAccess"\\n service="ws_access"/>\\n <AuthenticateFeature description="Access to WS ECL service"\\n path="WsEclAccess"\\n resource="WsEclAccess"\\n service="ws_ecl"/>\\n <AuthenticateFeature description="Access to Roxie queries and files"\\n path="RoxieQueryAccess"\\n resource="RoxieQueryAccess"\\n service="ws_roxiequery"/>\\n <AuthenticateFeature description="Access to cluster topology"\\n path="ClusterTopologyAccess"\\n resource="ClusterTopologyAccess"\\n service="ws_topology"/>\\n <AuthenticateFeature description="Access to own workunits"\\n path="OwnWorkunitsAccess"\\n resource="OwnWorkunitsAccess"\\n service="ws_workunits"/>\\n <AuthenticateFeature description="Access to others' workunits"\\n path="OthersWorkunitsAccess"\\n resource="OthersWorkunitsAccess"\\n service="ws_workunits"/>\\n <AuthenticateFeature description="Access to ECL direct service"\\n path="EclDirectAccess"\\n resource="EclDirectAccess"\\n service="ecldirect"/>\\n <ProcessFilters>\\n <Platform name="Windows">\\n <ProcessFilter name="any">\\n <Process name="dafilesrv"/>\\n </ProcessFilter>\\n <ProcessFilter multipleInstances="true" name="DfuServerProcess"/>\\n <ProcessFilter multipleInstances="true" name="EclCCServerProcess"/>\\n <ProcessFilter multipleInstances="true" name="EspProcess">\\n <Process name="dafilesrv" remove="true"/>\\n </ProcessFilter>\\n </Platform>\\n <Platform name="Linux">\\n <ProcessFilter name="any">\\n <Process name="dafilesrv"/>\\n </ProcessFilter>\\n <ProcessFilter multipleInstances="true" name="DfuServerProcess"/>\\n <ProcessFilter multipleInstances="true" name="EclCCServerProcess"/>\\n <ProcessFilter multipleInstances="true" name="EspProcess">\\n <Process name="dafilesrv" remove="true"/>\\n </ProcessFilter>\\n <ProcessFilter name="GenesisServerProcess">\\n <Process name="httpd"/>\\n <Process name="atftpd"/>\\n <Process name="dhcpd"/>\\n </ProcessFilter>\\n </Platform>\\n </ProcessFilters>\\n </Properties>\\n </EspService>\\n <EspService build="_"\\n buildSet="ws_ecl"\\n description="WS ECL Service"\\n name="ws_ecl">\\n <Properties bindingType="ws_eclSoapBinding"\\n defaultPort="8002"\\n defaultResourcesBasedn="ou=WsEcl,ou=EspServices,ou=ecl"\\n defaultSecurePort="18002"\\n plugin="ws_ecl"\\n type="ws_ecl">\\n <Authenticate access="Read"\\n description="Root access to WS ECL service"\\n path="/"\\n required="Read"\\n resource="WsEclAccess"/>\\n <AuthenticateFeature description="Access to WS ECL service"\\n path="WsEclAccess"\\n resource="WsEclAccess"\\n service="ws_ecl"/>\\n </Properties>\\n </EspService>\\n <EspService build="_"\\n buildSet="ws_sql"\\n description=""\\n name="ws_sql">\\n <Properties bindingType="ws_sqlSoapBinding"\\n defaultPort="8510"\\n defaultResourcesBasedn="ou=WsSql,ou=EspServices,ou=ecl"\\n defaultSecurePort="18510"\\n plugin="ws_sql"\\n type="ws_sql">\\n <Authenticate access="Read"\\n description="Root access to WS SQL service"\\n path="/"\\n required="Read"\\n resource="WsSqlAccess"/>\\n <AuthenticateFeature description="Access to WS SQL service"\\n path="WsSQLAccess"\\n resource="WsSqlAccess"\\n service="ws_sql"/>\\n <AuthenticateFeature description="Access to own workunits"\\n path="OwnWorkunitsAccess"\\n resource="OwnWorkunitsAccess"\\n service="ws_sql"/>\\n <AuthenticateFeature description="Access to others' workunits"\\n path="OthersWorkunitsAccess"\\n resource="OthersWorkunitsAccess"\\n service="ws_sql"/>\\n </Properties>\\n </EspService>\\n
\\n\\n\\n <LDAPServerProcess build="_"\\n buildSet="ldapServer"\\n cacheTimeout="5"\\n description="LDAP server process"\\n filesBasedn="ou=files,ou=ecl"\\n groupsBasedn="ou=groups,ou=ecl"\\n ldapPort="389"\\n ldapSecurePort="636"\\n modulesBasedn="ou=modules,ou=ecl"\\n name="ldapserver"\\n sudoersBasedn="cn=Manager,dc=example,dc=com"\\n systemBasedn="cn=Manager,dc=example,dc=com"\\n systemCommonName="root"\\n systemPassword="pGrUieiINv7nuIZa93yCtQ=="\\n systemUser="root"\\n usersBasedn="ou=users,ou=ecl"\\n workunitsBasedn="ou=workunits,ou=ecl">\\n <Instance computer="ldap001037" name="s1" netAddress="192.168.1.37"/>\\n </LDAPServerProcess>\\n
\\n\\nWhile accessing ecl_watch services and esp web services fails, following are the logs,\\n\\nesp log:\\n0000010C 2014-11-22 16:01:36.397 3966 3966 "Stack[00007FFF6D83A7D0]: FFFFFFFFFFFFFFFC FFFFFFFFFFFFFFFF FFFFFFFFFFFFFFFF 6D83ACE0FFFFFFFF 00007FFF6D83ACE0 9949BB9D00007FFF 00007FCB9949BB9D 0000029300007FCB"\\n0000010D 2014-11-22 16:01:36.397 3966 3966 "Backtrace:"\\n0000010E 2014-11-22 16:01:36.463 3966 3966 " /opt/HPCCSystems/lib/libjlib.so(_Z16PrintStackReportv+0x28) [0x7fcb9ab854c8]"\\n0000010F 2014-11-22 16:01:36.463 3966 3966 " /opt/HPCCSystems/lib/libjlib.so(_Z13excsighandleriP9siginfo_tPv+0x1fe) [0x7fcb9ab858fe]"\\n00000110 2014-11-22 16:01:36.463 3966 3966 " /lib/x86_64-linux-gnu/libpthread.so.0(+0x10340) [0x7fcb9949c340]"\\n00000111 2014-11-22 16:01:36.463 3966 3966 " esp(_ZN16CEspAbortHandler7onAbortEv+0x29) [0x40dc09]"\\n00000112 2014-11-22 16:01:36.463 3966 3966 " /opt/HPCCSystems/lib/libjlib.so(_Z13notifyOnAbortv+0x9a) [0x7fcb9abc92ca]"\\n00000113 2014-11-22 16:01:36.463 3966 3966 " /opt/HPCCSystems/lib/libjlib.so(+0x1162f5) [0x7fcb9abc92f5]"\\n00000114 2014-11-22 16:01:36.463 3966 3966 " /lib/x86_64-linux-gnu/libpthread.so.0(+0x10340) [0x7fcb9949c340]"\\n00000115 2014-11-22 16:01:36.463 3966 3966 " /lib/x86_64-linux-gnu/libpthread.so.0(__nanosleep+0x2d) [0x7fcb9949bb9d]"\\n00000116 2014-11-22 16:01:36.463 3966 3966 " /opt/HPCCSystems/lib/libjlib.so(_Z10MilliSleepj+0x85) [0x7fcb9ac2fcd5]"\\n00000117 2014-11-22 16:01:36.463 3966 3966 " /opt/HPCCSystems/lib/libdalibase.so(_Z21registerClientProcessP13ICommunicatorRP6IGroupj14DaliClientRole+0x323) [0x7fcb9a127403]"\\n00000118 2014-11-22 16:01:36.463 3966 3966 " /opt/HPCCSystems/lib/libdalibase.so(_Z17initClientProcessP6IGroup14DaliClientRolejPKcS3_j+0x59) [0x7fcb9a089e49]"\\n00000119 2014-11-22 16:01:36.463 3966 3966 " esp(_ZN10CEspConfig8initDaliEPKc+0x73) [0x408ae3]"\\n0000011A 2014-11-22 16:01:36.463 3966 3966 " esp(_ZN10CEspConfigC1EP11IPropertiesP13IPropertyTreeS3_b+0xa23) [0x40a0d3]"\\n0000011B 2014-11-22 16:01:36.463 3966 3966 " esp(_Z9init_mainiPPc+0x3a0) [0x40cdc0]"\\n0000011C 2014-11-22 16:01:36.463 3966 3966 " esp(main+0x10) [0x408710]"\\n0000011D 2014-11-22 16:01:36.463 3966 3966 " /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf5) [0x7fcb990e7ec5]"\\n0000011E 2014-11-22 16:01:36.463 3966 3966 " esp() [0x408751]"\\n0000011F 2014-11-22 16:01:36.463 3966 3966 "ThreadList:\\n7FCB979A8700 140512398575360 3967: CMPNotifyClosedThread\\n7FCB971A7700 140512390182656 3968: CSocketBaseThread\\n7FCB969A6700 140512381789952 3969: MP Connection Thread\\n7FCB959A4700 140512365004544 13015: dasess.registerClientProcess\\n"\\n00000001 2014-11-22 16:03:28.397 3910 3910 "Esp starting community_5.0.2-1"\\n00000002 2014-11-22 16:03:28.409 3910 3910 "componentfiles are under /opt/HPCCSystems/componentfiles"\\n00000003 2014-11-22 16:03:28.409 3910 3910 "ESP process name [myesp]"\\n00000004 2014-11-22 16:03:28.409 3910 3910 "Initializing DALI client [servers = 192.168.230.157:7070]"\\n00000005 2014-11-22 16:03:33.411 3910 3910 "Failed to connect to Dali Server 192.168.230.157:7070."\\n00000006 2014-11-22 16:03:33.411 3910 3910 "192.168.230.157:7070 Retrying..."\\n00000007 2014-11-22 16:03:43.414 3910 3910 "Failed to connect to Dali Server 192.168.230.157:7070."\\n00000008 2014-11-22 16:03:53.415 3910 3910 "Failed to connect to Dali Server 192.168.230.157:7070."\\n00000009 2014-11-22 16:04:03.417 3910 3910 "Failed to connect to Dali Server 192.168.230.157:7070."\\n0000000A 2014-11-22 16:04:03.417 3910 3910 "192.168.230.157:7070 Retrying..."\\n0000000B 2014-11-22 16:04:13.418 3910 3910 "Failed to connect to Dali Server 192.168.230.157:7070."\\n
\\n\\nmydali server log,\\n\\n00000000 2014-11-22 16:01:12.149 12081 12081 "Build community_5.0.2-1"\\n00000001 2014-11-22 16:01:12.150 12081 12081 "WARNING: Local path used for backup url: /var/lib/HPCCSystems/hpcc-mirror/dali/"\\n00000002 2014-11-22 16:01:12.151 12081 12081 "Backup URL = //192.168.230.157/var/lib/HPCCSystems/hpcc-mirror/dali/"\\n00000003 2014-11-22 16:01:12.151 12081 12081 "Checking backup location: //192.168.230.157/var/lib/HPCCSystems/hpcc-mirror/dali/"\\n00000004 2014-11-22 16:01:12.166 12081 12081 "Checking for existing daserver instances"\\n00000005 2014-11-22 16:01:12.179 12081 12081 "Server Version = 3.12, required minimum client version 1.5"\\n00000006 2014-11-22 16:01:12.186 12081 12081 "DFS Server: numThreads=30"\\n00000007 2014-11-22 16:01:12.224 12081 12081 "BackupHandler started, async=false"\\n00000008 2014-11-22 16:01:12.239 12081 12081 "loading store 15, storedCrc=ccc95595"\\n00000009 2014-11-22 16:01:12.244 12081 12081 "store loaded"\\n0000000A 2014-11-22 16:01:12.244 12081 12081 "loading external Environment from: /etc/HPCCSystems/environment.xml"\\n0000000B 2014-11-22 16:01:12.248 12081 12081 "Scanning store for external references"\\n0000000C 2014-11-22 16:01:12.249 12081 12081 "External reference count = 0"\\n0000000D 2014-11-22 16:01:12.288 12081 12081 "WARNING: Active cluster 'mythor' group layout does not match environment [matched old environment=true]"\\n0000000E 2014-11-22 16:01:12.288 12081 12081 "WARNING: New cluster layout for cluster mythor_spares"\\n0000000F 2014-11-22 16:01:12.288 12081 12081 "WARNING: Active cluster 'myroxie' group layout does not match environment [matched old environment=true]"\\n00000010 2014-11-22 16:01:12.292 12081 12081 "DFS group initialization : Active cluster 'mythor' group layout does not match environment [matched old environment=true]\\nNew cluster layout for cluster mythor_spares\\nActive cluster 'myroxie' group layout does not match environment [matched old environment=true]\\n"\\n00000011 2014-11-22 16:01:12.300 12081 12081 "Added ldap server 192.168.1.37"\\n00000012 2014-11-22 16:01:33.300 12081 12081 "ldap anonymous bind error (-1) - Can't contact LDAP server"\\n00000013 2014-11-22 16:01:36.301 12081 12081 "Server 192.168.1.37 temporarily unreachable."\\n00000014 2014-11-22 16:01:36.301 12081 12081 "Retrying with 192.168.1.37..."\\n00000015 2014-11-22 16:01:46.419 12081 12081 "Suspending 6"\\n00000016 2014-11-22 16:01:46.419 12081 12081 "Suspending 5"\\n00000017 2014-11-22 16:01:46.419 12081 12081 "Suspending 4"\\n00000018 2014-11-22 16:01:46.419 12081 12081 "Suspending 3"\\n00000019 2014-11-22 16:01:46.419 12081 12081 "Suspending 2"\\n0000001A 2014-11-22 16:01:46.419 12081 12081 "Suspending 1"\\n0000001B 2014-11-22 16:01:46.419 12081 12081 "Suspending subscriptions"\\n0000001C 2014-11-22 16:01:46.419 12081 12081 "Suspended subscriptions"\\n0000001D 2014-11-22 16:01:46.419 12081 12081 "Suspending 0"\\n0000001E 2014-11-22 16:01:46.419 12081 12081 "Stopping 6"\\n0000001F 2014-11-22 16:01:46.420 12081 12081 "Stopping 5"\\n00000020 2014-11-22 16:01:46.420 12081 12081 "Stopping 4"\\n00000021 2014-11-22 16:01:46.421 12081 12081 "Stopping 3"\\n00000022 2014-11-22 16:01:46.421 12081 12081 "Stopping 2"\\n00000023 2014-11-22 16:01:46.421 12081 12081 "clearing remaining sds locks"\\n00000024 2014-11-22 16:01:46.421 12081 12081 "waiting for transaction server to stop"\\n00000025 2014-11-22 16:01:46.421 12081 12081 "waiting for coalescer to stop"\\n00000026 2014-11-22 16:01:46.421 12081 12081 "Saving store"\\n00000027 2014-11-22 16:01:46.430 12081 12081 "Copying store to backup location"\\n00000028 2014-11-22 16:01:46.431 12081 12081 "Copy done"\\n00000029 2014-11-22 16:01:46.431 12081 12081 "Store saved"\\n0000002A 2014-11-22 16:01:46.431 12081 12081 "Deleting old store: /var/lib/HPCCSystems/hpcc-data/dali/dalisds14.xml"\\n0000002B 2014-11-22 16:01:46.433 12081 12081 "Stopping 1"\\n0000002C 2014-11-22 16:01:46.433 12081 12081 "Stopping 0"\\n0000002D 2014-11-22 16:01:46.434 12081 12089 "BackupHandler stopped"\\n00000000 2014-11-22 16:03:22.415 3466 3466 "Build community_5.0.2-1"\\n00000001 2014-11-22 16:03:22.427 3466 3466 "WARNING: Local path used for backup url: /var/lib/HPCCSystems/hpcc-mirror/dali/"\\n00000002 2014-11-22 16:03:22.427 3466 3466 "Backup URL = //192.168.230.157/var/lib/HPCCSystems/hpcc-mirror/dali/"\\n00000003 2014-11-22 16:03:22.427 3466 3466 "Checking backup location: //192.168.230.157/var/lib/HPCCSystems/hpcc-mirror/dali/"\\n00000004 2014-11-22 16:03:22.436 3466 3466 "Checking for existing daserver instances"\\n00000005 2014-11-22 16:03:22.437 3466 3466 "Server Version = 3.12, required minimum client version 1.5"\\n00000006 2014-11-22 16:03:22.446 3466 3466 "DFS Server: numThreads=30"\\n00000007 2014-11-22 16:03:22.469 3466 3466 "BackupHandler started, async=false"\\n00000008 2014-11-22 16:03:22.470 3466 3466 "loading store 16, storedCrc=b2f2ede"\\n00000009 2014-11-22 16:03:22.482 3466 3466 "store loaded"\\n0000000A 2014-11-22 16:03:22.482 3466 3466 "loading external Environment from: /etc/HPCCSystems/environment.xml"\\n0000000B 2014-11-22 16:03:22.483 3466 3466 "Scanning store for external references"\\n0000000C 2014-11-22 16:03:22.484 3466 3466 "External reference count = 0"\\n0000000D 2014-11-22 16:03:22.487 3466 3466 "WARNING: Active cluster 'mythor' group layout does not match environment [matched old environment=true]"\\n0000000E 2014-11-22 16:03:22.487 3466 3466 "WARNING: New cluster layout for cluster mythor_spares"\\n0000000F 2014-11-22 16:03:22.487 3466 3466 "WARNING: Active cluster 'myroxie' group layout does not match environment [matched old environment=true]"\\n00000010 2014-11-22 16:03:22.490 3466 3466 "DFS group initialization : Active cluster 'mythor' group layout does not match environment [matched old environment=true]\\nNew cluster layout for cluster mythor_spares\\nActive cluster 'myroxie' group layout does not match environment [matched old environment=true]\\n"\\n00000011 2014-11-22 16:03:22.506 3466 3466 "Added ldap server 192.168.1.37"\\n00000012 2014-11-22 16:03:43.511 3466 3466 "ldap anonymous bind error (-1) - Can't contact LDAP server"\\n00000013 2014-11-22 16:03:46.512 3466 3466 "Server 192.168.1.37 temporarily unreachable."\\n00000014 2014-11-22 16:03:46.512 3466 3466 "Retrying with 192.168.1.37..."\\n00000015 2014-11-22 16:04:07.506 3466 3466 "ldap anonymous bind error (-1) - Can't contact LDAP server"\\n00000016 2014-11-22 16:04:10.506 3466 3466 "Server 192.168.1.37 temporarily unreachable."\\n00000017 2014-11-22 16:04:10.506 3466 3466 "Retrying with 192.168.1.37..."\\n00000018 2014-11-22 16:04:22.448 3466 3471 "SYS: PU= 15% MU= 26% MAL=31335840 MMP=29364224 SBK=1971616 TOT=32776K RAM=550808K SWP=0K"\\n00000019 2014-11-22 16:04:31.507 3466 3466 "ldap anonymous bind error (-1) - Can't contact LDAP server"\\n0000001A 2014-11-22 16:04:34.517 3466 3466 "ERROR: /var/lib/jenkins/workspace/CE-Candidate-with-plugins-5.0.2-1/CE/ubuntu-14.04-amd64/HPCC-Platform/dali/server/daldap.cpp(111) : LDAP server : getServerInfo error - Can't contact LDAP server"\\n0000001B 2014-11-22 16:04:34.518 3466 3466 "ERROR: /var/lib/jenkins/workspace/CE-Candidate-with-plugins-5.0.2-1/CE/ubuntu-14.04-amd64/HPCC-Platform/dali/server/daserver.cpp(410) : LDAP initialization error : getServerInfo error - Can't contact LDAP server"\\n0000001C 2014-11-22 16:04:34.518 3466 3466 "Suspending 6"\\n0000001D 2014-11-22 16:04:34.518 3466 3466 "Suspending 5"\\n0000001E 2014-11-22 16:04:34.518 3466 3466 "Suspending 4"\\n
\\n\\nI'm not sure how to set sudoersBasedn, systemBasedn, systemCommonName, systemPassword, systemUser.\\n\\nfollowing are the slapd.conf\\n\\natabase bdb\\nsuffix "dc=example,dc=com"\\nrootdn "cn=Manager,dc=example,dc=com"\\n# Cleartext passwords, especially for the rootdn, should\\n# be avoided. See slappasswd(8) and slapd.conf(5) for details.\\n# Use of strong authentication encouraged.\\n# rootpw secret\\nrootpw {SSHA}AcUebwPWREEwxQYplKYSGxlCQ56aF34F\\n\\n# The database directory MUST exist prior to running slapd AND\\n# should only be accessible by the slapd and slap tools.\\n# Mode 700 recommended.\\ndirectory /var/lib/ldap\\n\\n\\n
\\n\\nPlease let me know if anything I missed.\\n\\nThanks\\nBob\", \"post_time\": \"2014-11-24 01:16:27\" },\n\t{ \"post_id\": 6658, \"topic_id\": 1531, \"forum_id\": 14, \"post_subject\": \"Re: Adding Roxie instances to cluster\", \"username\": \"JimD\", \"post_text\": \"I am assuming you are trying to add nodes to an existing Roxie cluster. It is much easier to delete the old cluster one and create a new cluster of the desired size. \\n\\nYou will then need to publish your queries to the new cluster.\\n\\nHTH, \\n\\nJim\", \"post_time\": \"2014-12-03 18:15:31\" },\n\t{ \"post_id\": 6645, \"topic_id\": 1531, \"forum_id\": 14, \"post_subject\": \"Adding Roxie instances to cluster\", \"username\": \"lpezet\", \"post_text\": \"Hello!\\n\\nIs there any documentation on how to add instances to an existing Roxie Cluster?\\nI went through such process but I must have forgotten something along the way.\\nI could publish queries and all, but when just running those queries (or any ECL code for that matter) I'd get the following error message (with varying differences):\\n\\nError: Failed to get response from slave(s) for uid=0x00000003 activityId=5 pri=LOW queryHash=e9239df0e75272e5 ch=2 seq=0 cont=0 server=172.31.40.254 retries=0003 in activity 5 (in Index Read 5)\\n
\\n\\nWhat did I do wrong?\\n\\nThanks!\\nLuke.\", \"post_time\": \"2014-12-01 05:14:56\" },\n\t{ \"post_id\": 6673, \"topic_id\": 1535, \"forum_id\": 14, \"post_subject\": \"Re: Updating HPCC\", \"username\": \"bforeman\", \"post_text\": \"Compare the checksum, which should be available on the portal download site, to make sure the file is not corrupted.\\n\\nThe message you posted means the download was not finished.\\n\\nIf the download hangs, check the network and internet policy.\\n\\nSometimes we have seen an issue when downloading the HPCC package from the portal side through the wget/curl command.\\n\\nTry running "sudo ip link set mtu 1400 dev eth0" before the download on the command line.\\n\\nBob\", \"post_time\": \"2014-12-05 15:04:35\" },\n\t{ \"post_id\": 6672, \"topic_id\": 1535, \"forum_id\": 14, \"post_subject\": \"Re: Updating HPCC\", \"username\": \"KunalA\", \"post_text\": \"What distribution of Ubuntu are you attempting?\", \"post_time\": \"2014-12-05 14:57:36\" },\n\t{ \"post_id\": 6671, \"topic_id\": 1535, \"forum_id\": 14, \"post_subject\": \"Re: Updating HPCC\", \"username\": \"micevepay\", \"post_text\": \"Sorry I meant 5.0.2-1 but I redownloaded last night and still not installing.\", \"post_time\": \"2014-12-05 14:48:16\" },\n\t{ \"post_id\": 6670, \"topic_id\": 1535, \"forum_id\": 14, \"post_subject\": \"Re: Updating HPCC\", \"username\": \"bforeman\", \"post_text\": \"The portal has 5.0.2-1 available and you are reporting 5.0.2-2 which is still a development build. Is that correct?\\n\\nRegarding the "dpkg-split" message that you are getting, I believe that may be due to an incorrect download of the package. You will need to do a file compare to see if there is a difference in file size, and try a re-download of the platform.\\n\\nRegards,\\n\\nBob (on behalf of the HPCC installation team)\", \"post_time\": \"2014-12-05 14:42:53\" },\n\t{ \"post_id\": 6669, \"topic_id\": 1535, \"forum_id\": 14, \"post_subject\": \"Updating HPCC\", \"username\": \"micevepay\", \"post_text\": \"Hi,\\n\\nI have been trying to update my HPCC cluster to 5.0.2-2 but keep getting the following errors.\\n\\n sudo dpkg -i hpccsystems-platform_community-5.0.2-1trusty_amd64.deb \\ndpkg-split: error: unable to read part file `hpccsystems-platform_community-5.0.2-1trusty_amd64.deb': Permission denied
\\n\\nNow I am stuck with no cluster because nothing will install.\", \"post_time\": \"2014-12-04 22:48:51\" },\n\t{ \"post_id\": 6837, \"topic_id\": 1566, \"forum_id\": 14, \"post_subject\": \"Re: mythor not starting up.\", \"username\": \"Dimon\", \"post_text\": \"Michael you are the Man!\\nThanks so much!!! Study time for me now, woohooo! \\n\\n
[root@myhost src]# /etc/init.d/hpcc-init start\\nDependent service dafilesrv, mydafilesrv is already running.\\nStarting mydali.... [ OK ]\\nStarting mydfuserver.... [ OK ]\\nStarting myeclagent.... [ OK ]\\nStarting myeclccserver.... [ OK ]\\nStarting myeclscheduler.... [ OK ]\\nStarting myesp.... [ OK ]\\nStarting myroxie.... [ OK ]\\nStarting mysasha.... [ OK ]\\nStarting mythor.... [ OK ]\\n
\", \"post_time\": \"2015-01-22 23:58:03\" },\n\t{ \"post_id\": 6835, \"topic_id\": 1566, \"forum_id\": 14, \"post_subject\": \"Re: mythor not starting up.\", \"username\": \"mgardner\", \"post_text\": \"Dimon thanks for attaching those log files to the Jira. The issue is that your system doesn't have rsync installed, which normally gets added as a dependency to our debian and redhat packages.\\n\\nsudo pacman -S rsync expect
\\n\\nThat should fix your current issue. Great job getting it up and running on arch btw. I use Arch on my laptop and love it. Best distro ever!\", \"post_time\": \"2015-01-22 21:39:05\" },\n\t{ \"post_id\": 6833, \"topic_id\": 1566, \"forum_id\": 14, \"post_subject\": \"Re: mythor not starting up.\", \"username\": \"Dimon\", \"post_text\": \"Hi bforeman and Michael!\\n\\nIt's so great to head from humans on this forum! I though this place it's not that active... Thanks for the help on this issue. My 8 nodes HPCC deployment is on Arch, which is lean, fast and therefore best Linux distribution around here \\nI definitely like to work with your team on this and have updated Jira with all log files and my build process (which runs w/o any issues). It maybe something Arch related, as by default Arch doesn't use init.d scripts, and I had to create /etc/init.d directory manually.\\n\\n\\nThanks!!!\\nDimon.\", \"post_time\": \"2015-01-22 18:56:05\" },\n\t{ \"post_id\": 6816, \"topic_id\": 1566, \"forum_id\": 14, \"post_subject\": \"Re: mythor not starting up.\", \"username\": \"mgardner\", \"post_text\": \"Dimon thanks for posting this. I've made a Jira ticket for it at https://track.hpccsystems.com/browse/HPCC-12884. The sentinel file is purposely absent as it is used to help determine the health of a component. The thor code only writes out the sentinel once all slaves have connected to the master. If you have any logs you'd like to attach (like the thormaster*.log and start_slaves*.log) please do so on the Jira ticket and I'll take a look at them.\\n\\nMichael\", \"post_time\": \"2015-01-20 21:50:58\" },\n\t{ \"post_id\": 6815, \"topic_id\": 1566, \"forum_id\": 14, \"post_subject\": \"Re: mythor not starting up.\", \"username\": \"bforeman\", \"post_text\": \"Checking now with the HPCC team regarding your messages, but have you run the Configuration Manager and created an Environment.XML to push to the other nodes?\\n\\nSee: http://hpccsystems.com/download/docs/using-config-manager\\n\\nOur guess is that it could be an Arch Linux issue. We are not sure if the Platform will work correctly with it. \\n\\nI would try to compile it to a supported distribution and see if the error persists.\", \"post_time\": \"2015-01-20 20:48:45\" },\n\t{ \"post_id\": 6802, \"topic_id\": 1566, \"forum_id\": 14, \"post_subject\": \"[solved] mythor not starting up.\", \"username\": \"Dimon\", \"post_text\": \"Hi Everyone!\\n\\nI'm new to forum, and to HPCC for that matter
I've compiled HPCC from git repo and found no major issues. However when I try to start hpcc I'm getting this:\\n\\n
[root@boob1 HPCCSystems]# /etc/init.d/hpcc-init start\\nDependent service dafilesrv, mydafilesrv is already running.\\nStarting mydali.... [ OK ]\\nStarting mydfuserver.... [ OK ]\\nStarting myeclagent.... [ OK ]\\nStarting myeclccserver.... [ OK ]\\nStarting myeclscheduler.... [ OK ]\\nStarting myesp.... [ OK ]\\nStarting myroxie.... [ OK ]\\nStarting mysasha.... [ OK ]\\nStarting mythor.... [FAILED]\\nmythor has timed out, but may still be starting
\\n\\nI do see thor is starting up, but then after 3-5 minutes it dies:\\n\\n[root@boob1 HPCCSystems]# ps -eaf|grep thor\\nhpcc 25082 1 0 23:14 pts/2 00:00:00 /bin/bash /opt/HPCCSystems/bin/init_thor\\nhpcc 25108 25082 0 23:14 pts/2 00:00:00 /bin/bash /opt/HPCCSystems/bin/start_thor /opt/HPCCSystems/bin\\nhpcc 25179 25108 0 23:14 pts/2 00:00:00 /bin/bash /opt/HPCCSystems/bin/run_thor\\nhpcc 25417 25179 0 23:14 pts/2 00:00:00 /var/lib/HPCCSystems/mythor/thormaster_mythor MASTER=192.168.1.100:20000
\\n\\nI've inspected thor log at /var/log/HPCCSystems/mythor/thormaster.2015_01_16.log and have found nothing in there what would point me to why thor is dying. I've searched this forum and whole Internet via google for more info on this and no luck. Here is the end of thor log file. Where else can I look at? Please help!\\n\\n0000007 2015-01-16 23:49:27.856 28856 28856 "Transparent huge pages used for roxiemem heap"\\n00000008 2015-01-16 23:49:27.856 28856 28856 "Memory released to OS in 32768k blocks"\\n00000009 2015-01-16 23:49:27.856 28856 28856 "RoxieMemMgr: 512 Pages successfully allocated for the pool - memsize=536870912 base=0x94400000 alignment=1048576 bitmapSize=16"\\n0000000A 2015-01-16 23:49:27.858 28856 28856 "Disk space: /var/lib/HPCCSystems/hpcc-data/thor = 0 MB, /var/lib/HPCCSystems/hpcc-mirror/thor = 0 MB, /var/lib/HPCCSystems/mythor/temp = 3005 MB"\\n0000000B 2015-01-16 23:49:27.865 28856 28856 "Starting watchdog"\\n0000000D 2015-01-16 23:49:27.865 28856 28856 "ThorMaster version 4.1, Started on 192.168.1.100:20000"\\n0000000C 2015-01-16 23:49:27.865 28856 28864 "Started watchdog"\\n0000000E 2015-01-16 23:49:27.865 28856 28856 "Thor name = mythor, queue = thor.thor,thor_roxie.thor, nodeGroup = mythor"\\n0000000F 2015-01-16 23:49:27.866 28856 28856 "Waiting for 1 slaves to register"\\n00000010 2015-01-16 23:49:27.866 28856 28856 "Verifying connection to slave 1"\\n00000011 2015-01-16 23:49:37.866 28856 28856 "Still Waiting for minimum 1 slaves to connect"\\n00000012 2015-01-16 23:49:37.866 28856 28856 "waiting for slave 1 (192.168.1.100:20100)"\\n00000013 2015-01-16 23:49:37.866 28856 28856 "Verifying connection to slave 1"\\n00000014 2015-01-16 23:49:47.866 28856 28856 "Still Waiting for minimum 1 slaves to connect"\\n00000015 2015-01-16 23:49:47.866 28856 28856 "waiting for slave 1 (192.168.1.100:20100)"\\n00000016 2015-01-16 23:49:47.866 28856 28856 "Verifying connection to slave 1"
\\n\\nUpdate:\\nAfter tracing the script execution I have found reason for this is thor.sentinel file is missing. All others are presented:\\n\\n[root@boob1 ~]# ls -1 /var/lib/HPCCSystems/*/*senti*\\n/var/lib/HPCCSystems/mydafilesrv/dafilesrv.sentinel\\n/var/lib/HPCCSystems/mydali/daserver.sentinel\\n/var/lib/HPCCSystems/mydfuserver/dfuserver.sentinel\\n/var/lib/HPCCSystems/myeclagent/agentexec.sentinel\\n/var/lib/HPCCSystems/myeclccserver/eclccserver.sentinel\\n/var/lib/HPCCSystems/myeclscheduler/eclscheduler.sentinel\\n/var/lib/HPCCSystems/myesp/esp.sentinel\\n/var/lib/HPCCSystems/myroxie/roxie.sentinel\\n/var/lib/HPCCSystems/mysasha/saserver.sentinel
\\n\\nDoes anybody know why this might happen? I'm afraid I will need to peek into the codebase... The other thing I'm considering is to give thor more time to start. The default is 2 minutes. I'll give it a shoot at 30. And this didn't work.\\n\\nUpdate #2:\\n\\nI've rebuild the whole thing on 64 bit platform and same issue. Is this because I use 3.18 (latest) linux kernel? Btw: I'm using Arch Linux, if that helps...\", \"post_time\": \"2015-01-17 05:03:43\" },\n\t{ \"post_id\": 6907, \"topic_id\": 1569, \"forum_id\": 14, \"post_subject\": \"Re: initldap not connectiong to 389ldap\", \"username\": \"william.whitehead\", \"post_text\": \"These are the credentials that will be used internally to access and manipulate the LDAP hierarchy (adding new OUs, querying exising ones, etc). So to make things easy for now, lets try your LDAP creds.\", \"post_time\": \"2015-02-05 19:48:23\" },\n\t{ \"post_id\": 6906, \"topic_id\": 1569, \"forum_id\": 14, \"post_subject\": \"Re: initldap not connectiong to 389ldap\", \"username\": \"jwilt\", \"post_text\": \"Hi - \\nTalking with Doug - \\n\\nYou mentioned: "...and then set systemCommonName and systemName both to your Admin name..."\\nWhat exactly does the Admin name need to be? (See previous post.)\\n\\nAt some point - would a phone call make sense?\\n\\nThanks so much.\", \"post_time\": \"2015-02-05 19:43:01\" },\n\t{ \"post_id\": 6903, \"topic_id\": 1569, \"forum_id\": 14, \"post_subject\": \"Re: initldap not connectiong to 389ldap\", \"username\": \"william.whitehead\", \"post_text\": \"Here are the summarized steps to enable LDAP on HPCC\\n\\n1) Add the hardware type under hardware/Computers. That just needs the IP address and a meaningful name.\\n\\n2) Add an LDAPServer. Right click Software, and select New Component (make sure you select Write Access in upper right first) and select LDAPServer. Browse the prepopulated firelds to ensure they are correct, and then set systemCommonName and systemName both to your Admin name. Set the systemPassword as well. If you have 5.2, you would select the serverType and pick the closest match (A.D., 389DS, etc). Click on the Instances tab, and bind the LDAPServer to the hardware component you added in step 1.\\n\\n3) Add authentication to the ESP component. Click Esp , and select the Authentication tab. For ldapServer, select the LDAPServer you created in step 2. Method should be set to ldap.\\n\\n4) Once complete, exit the configmanager and copy the new environment.xml file from the work area /etc/HPCCSystems/source/environment.xml to /etc/HPCCSystems/environment.xml. Restart HPCC (sudo service hpcc-systems restart) and hopefully all works. Check the ESP Log for errors if not feel free to post the LDAP stuff here.\\nGood luck!\", \"post_time\": \"2015-02-05 13:01:43\" },\n\t{ \"post_id\": 6901, \"topic_id\": 1569, \"forum_id\": 14, \"post_subject\": \"Re: initldap not connectiong to 389ldap\", \"username\": \"dshibla\", \"post_text\": \"Russ,\\n\\nWas able to connect to it using JXplorer.\\n\\nif the Basedn is dc=ql1,dc=colo and the user dn is cn=Manager,dc=ql1,dc=colo , what settings do I use in the LDAP setup fields in HPCC?\", \"post_time\": \"2015-02-04 22:13:01\" },\n\t{ \"post_id\": 6900, \"topic_id\": 1569, \"forum_id\": 14, \"post_subject\": \"Re: initldap not connectiong to 389ldap\", \"username\": \"william.whitehead\", \"post_text\": \"Doug, I understand and agree about moving away from AD. I have had good success with 389DirectoryServer but like I said you need 5.2 to make it work. Also, I am wondering if you been able to install Apache Directory Studio or similar tool and been able to log on to 389DS successfully? I am still not convinced that your setup is correct\\nRuss\", \"post_time\": \"2015-02-04 13:57:00\" },\n\t{ \"post_id\": 6899, \"topic_id\": 1569, \"forum_id\": 14, \"post_subject\": \"Re: initldap not connectiong to 389ldap\", \"username\": \"dshibla\", \"post_text\": \"Thanks Russ. We are currently using AD but want to get rid of our Windows server since maintenance and patching is a bit of a headache for us. We are principally a Linux shop and would like to make the whole HPCC system linux (except for ECLIDE but we are discussing long-term move to Eclipse). Jim reached out via Brian E. to Fernando so I'll keep this blog up to date if I hear anything.\", \"post_time\": \"2015-02-04 01:58:16\" },\n\t{ \"post_id\": 6896, \"topic_id\": 1569, \"forum_id\": 14, \"post_subject\": \"Re: initldap not connectiong to 389ldap\", \"username\": \"william.whitehead\", \"post_text\": \"I can help you configure HPCC to work with any supported LDAP server configuration in a matter of minutes. I highly recommend Microsoft Active Directory since that's what we use the most. Alternately, I will reach out to our operations team to identify which other offerings they have had success with. I will update this thread when they reply, but do think about Active Directory\\nThanks,\\nRuss\", \"post_time\": \"2015-02-03 20:58:37\" },\n\t{ \"post_id\": 6875, \"topic_id\": 1569, \"forum_id\": 14, \"post_subject\": \"Re: initldap not connectiong to 389ldap\", \"username\": \"dshibla\", \"post_text\": \"I need a formal release.\\n\\nCan I get an OpenLDAP configuration instead? we don't have to use 389\\n\\nThanks\", \"post_time\": \"2015-01-29 02:08:38\" },\n\t{ \"post_id\": 6862, \"topic_id\": 1569, \"forum_id\": 14, \"post_subject\": \"Re: initldap not connectiong to 389ldap\", \"username\": \"william.whitehead\", \"post_text\": \"The more I think about it, the more it sounds like there is an issue with your 389DS setup. Have you been able to install Apache Directory Studio or similar tool and been able to log on successfully? Let me know\\nRuss\", \"post_time\": \"2015-01-27 14:02:56\" },\n\t{ \"post_id\": 6856, \"topic_id\": 1569, \"forum_id\": 14, \"post_subject\": \"Re: initldap not connectiong to 389ldap\", \"username\": \"william.whitehead\", \"post_text\": \"You are going to need to wait for HPCC V5.2 to get 389DS to work. Are you set up to download (from GitHub) and build it yourself, otherwise you have to wait until we release it\", \"post_time\": \"2015-01-26 14:03:58\" },\n\t{ \"post_id\": 6852, \"topic_id\": 1569, \"forum_id\": 14, \"post_subject\": \"Re: initldap not connectiong to 389ldap\", \"username\": \"dshibla\", \"post_text\": \"I tried OpenLDAP in the configuration - same issue.\\n\\nIt also tried changing over to OpenLDAP instead of 389 but can't seem to get that to work either .\\n\\nIs there a functioning ldif file and slapd.conf that I can use as an example to get things working?\\n\\nThanks\", \"post_time\": \"2015-01-26 01:38:35\" },\n\t{ \"post_id\": 6828, \"topic_id\": 1569, \"forum_id\": 14, \"post_subject\": \"Re: initldap not connectiong to 389ldap\", \"username\": \"william.whitehead\", \"post_text\": \"Its a long shot, but try setting the LDAPServer type OpenLDAP. I will do some research and find which HPCC release added the 389DirectoryServer to the choices.\\n\\nRuss\", \"post_time\": \"2015-01-22 01:29:31\" },\n\t{ \"post_id\": 6827, \"topic_id\": 1569, \"forum_id\": 14, \"post_subject\": \"Re: initldap not connectiong to 389ldap\", \"username\": \"dshibla\", \"post_text\": \"Russ - thanks for the feedback. 389 Directory is not an option in the pulldown for serverType - only ActiveDirectory, OpenLDAP, Fedora389, and iPlanet.\\n\\nI am using version 5.0.4 (enterprise edition) - I got it from Jim WIltshire. We currently use Active Directory on site and are hoping to change to a linux-based LDAP (prefer 389, but can use OpenLDAP if that works).\\n\\nThe accounts and passwords I used to connect were the 389 Administrator for the configuration directory server and the one for Directgory Manager DN, Neither worked.\", \"post_time\": \"2015-01-22 01:13:48\" },\n\t{ \"post_id\": 6826, \"topic_id\": 1569, \"forum_id\": 14, \"post_subject\": \"Re: initldap not connectiong to 389ldap\", \"username\": \"william.whitehead\", \"post_text\": \"Also, in the LDAPServer, you can enter a username and password which will be created as an HPCC Admin. To do this, initldap will prompt you for the LDAP Admin credentials that you used when you stood up the directory server. Just want to make sure you are clear on the differences between the 2 sets of creds\\nRuss\", \"post_time\": \"2015-01-21 21:18:04\" },\n\t{ \"post_id\": 6825, \"topic_id\": 1569, \"forum_id\": 14, \"post_subject\": \"Re: initldap not connectiong to 389ldap\", \"username\": \"william.whitehead\", \"post_text\": \"Hello, sorry you are having trouble but I think I can get you up and running. First off, it looks like you may have the wrong LDAP Server type specified in the LDAPServer component. I cannot see what you specified in your screenshot, but from the output from initldap it looks like you specified Fedora389. I think you want to specify 389 Directory server? Please try this and let me know how it goes\\nThanks,\\nRuss Whitehead\", \"post_time\": \"2015-01-21 21:14:54\" },\n\t{ \"post_id\": 6817, \"topic_id\": 1569, \"forum_id\": 14, \"post_subject\": \"initldap not connectiong to 389ldap\", \"username\": \"dshibla\", \"post_text\": \"I'm sure I am doing something wrong since I know very little about LDAP, but i set up a 389LDAP using the setup-ds-admin.pl and then tried to run initldap but it can't properly authenticate.\\n\\nthe HPCC setup:\\n[attachment=2:2lov96tk]LDAP_snaps.jpg\\n[attachment=1:2lov96tk]LDAP_snaps_0001.jpg\\n[attachment=0:2lov96tk]LDAP_snaps_0002.jpg\\n\\nAnd here is the command sequence:\\n[root@pxe /]# /usr/sbin/setup-ds-admin.pl \\n\\n==============================================================================\\nThis program will set up the 389 Directory and Administration Servers.\\n\\nIt is recommended that you have "root" privilege to set up the software.\\nTips for using this program:\\n - Press "Enter" to choose the default and go to the next screen\\n - Type "Control-B" then "Enter" to go back to the previous screen\\n - Type "Control-C" to cancel the setup program\\n\\nWould you like to continue with set up? [yes]: \\n\\n==============================================================================\\nYour system has been scanned for potential problems, missing patches,\\netc. The following output is a report of the items found that need to\\nbe addressed before running this software in a production\\nenvironment.\\n\\n389 Directory Server system tuning analysis version 23-FEBRUARY-2012.\\n\\nNOTICE : System is x86_64-unknown-linux2.6.32-431.29.2.el6.x86_64 (2 processors).\\n\\nWould you like to continue? [yes]: \\n\\n==============================================================================\\nChoose a setup type:\\n\\n 1. Express\\n Allows you to quickly set up the servers using the most\\n common options and pre-defined defaults. Useful for quick\\n evaluation of the products.\\n\\n 2. Typical\\n Allows you to specify common defaults and options.\\n\\n 3. Custom\\n Allows you to specify more advanced options. This is \\n recommended for experienced server administrators only.\\n\\nTo accept the default shown in brackets, press the Enter key.\\n\\nChoose a setup type [2]: \\n\\n==============================================================================\\nEnter the fully qualified domain name of the computer\\non which you're setting up server software. Using the form\\n<hostname>.<domainname>\\nExample: eros.example.com.\\n\\nTo accept the default shown in brackets, press the Enter key.\\n\\nWarning: This step may take a few minutes if your DNS servers\\ncan not be reached or if DNS is not configured correctly. If\\nyou would rather not wait, hit Ctrl-C and run this program again\\nwith the following command line option to specify the hostname:\\n\\n General.FullMachineName=your.hostname.domain.name\\n\\nComputer name [pxe.ql1.colo]: \\n\\n==============================================================================\\nThe servers must run as a specific user in a specific group.\\nIt is strongly recommended that this user should have no privileges\\non the computer (i.e. a non-root user). The setup procedure\\nwill give this user/group some permissions in specific paths/files\\nto perform server-specific operations.\\n\\nIf you have not yet created a user and group for the servers,\\ncreate this user and group using your native operating\\nsystem utilities.\\n\\nSystem User [nobody]: \\nSystem Group [nobody]: \\n\\n==============================================================================\\nServer information is stored in the configuration directory server.\\nThis information is used by the console and administration server to\\nconfigure and manage your servers. If you have already set up a\\nconfiguration directory server, you should register any servers you\\nset up or create with the configuration server. To do so, the\\nfollowing information about the configuration server is required: the\\nfully qualified host name of the form\\n<hostname>.<domainname>(e.g. hostname.example.com), the port number\\n(default 389), the suffix, the DN and password of a user having\\npermission to write the configuration information, usually the\\nconfiguration directory administrator, and if you are using security\\n(TLS/SSL). If you are using TLS/SSL, specify the TLS/SSL (LDAPS) port\\nnumber (default 636) instead of the regular LDAP port number, and\\nprovide the CA certificate (in PEM/ASCII format).\\n\\nIf you do not yet have a configuration directory server, enter 'No' to\\nbe prompted to set up one.\\n\\nDo you want to register this software with an existing\\nconfiguration directory server? [no]: \\n\\n==============================================================================\\nPlease enter the administrator ID for the configuration directory\\nserver. This is the ID typically used to log in to the console. You\\nwill also be prompted for the password.\\n\\nConfiguration directory server\\nadministrator ID [admin]: \\nPassword: \\nPassword (confirm): \\n\\n==============================================================================\\nThe information stored in the configuration directory server can be\\nseparated into different Administration Domains. If you are managing\\nmultiple software releases at the same time, or managing information\\nabout multiple domains, you may use the Administration Domain to keep\\nthem separate.\\n\\nIf you are not using administrative domains, press Enter to select the\\ndefault. Otherwise, enter some descriptive, unique name for the\\nadministration domain, such as the name of the organization\\nresponsible for managing the domain.\\n\\nAdministration Domain [ql1.colo]: \\n\\n==============================================================================\\nThe standard directory server network port number is 389. However, if\\nyou are not logged as the superuser, or port 389 is in use, the\\ndefault value will be a random unused port number greater than 1024.\\nIf you want to use port 389, make sure that you are logged in as the\\nsuperuser, that port 389 is not in use.\\n\\nDirectory server network port [389]: \\n\\n==============================================================================\\nEach instance of a directory server requires a unique identifier.\\nThis identifier is used to name the various\\ninstance specific files and directories in the file system,\\nas well as for other uses as a server instance identifier.\\n\\nDirectory server identifier [pxe]: \\n\\n==============================================================================\\nThe suffix is the root of your directory tree. The suffix must be a valid DN.\\nIt is recommended that you use the dc=domaincomponent suffix convention.\\nFor example, if your domain is example.com,\\nyou should use dc=example,dc=com for your suffix.\\nSetup will create this initial suffix for you,\\nbut you may have more than one suffix.\\nUse the directory server utilities to create additional suffixes.\\n\\nSuffix [dc=ql1, dc=colo]: \\n\\n==============================================================================\\nCertain directory server operations require an administrative user.\\nThis user is referred to as the Directory Manager and typically has a\\nbind Distinguished Name (DN) of cn=Directory Manager.\\nYou will also be prompted for the password for this user. The password must\\nbe at least 8 characters long, and contain no spaces.\\nPress Control-B or type the word "back", then Enter to back up and start over.\\n\\nDirectory Manager DN [cn=Directory Manager]: cn=qadmin\\nPassword: \\nPassword (confirm): \\n\\n==============================================================================\\nThe Administration Server is separate from any of your web or application\\nservers since it listens to a different port and access to it is\\nrestricted.\\n\\nPick a port number between 1024 and 65535 to run your Administration\\nServer on. You should NOT use a port number which you plan to\\nrun a web or application server on, rather, select a number which you\\nwill remember and which will not be used for anything else.\\n\\nAdministration port [9830]: \\n\\n==============================================================================\\nThe interactive phase is complete. The script will now set up your\\nservers. Enter No or go Back if you want to change something.\\n\\nAre you ready to set up your servers? [yes]: \\nCreating directory server . . .\\nYour new DS instance 'pxe' was successfully created.\\nCreating the configuration directory server . . .\\nBeginning Admin Server creation . . .\\nCreating Admin Server files and directories . . .\\nUpdating adm.conf . . .\\nUpdating admpw . . .\\nRegistering admin server with the configuration directory server . . .\\nUpdating adm.conf with information from configuration directory server . . .\\nUpdating the configuration for the httpd engine . . .\\nError: command 'getsebool httpd_can_connect_ldap' failed - output [getsebool: SELinux is disabled] error []Starting admin server . . .\\noutput: Starting dirsrv-admin: \\noutput: [ OK ]\\nThe admin server was successfully started.\\nAdmin server was successfully created, configured, and started.\\nExiting . . .\\nLog file is '/tmp/setuppEfmyA.log'\\n\\n:1.2.11.15-34.el6_5 \\n 389-ds-console.noarch 0:1.2.6-1.el6 \\n\\nComplete!\\n\\n[root@pxe /]# /opt/HPCCSystems/bin/initldap\\n00000000 2015-01-20 18:48:21.542 9557 9557 "configgen: Creating PIPE program process : '/opt/HPCCSystems/sbin/configgen -env /etc/HPCCSystems/environment.xml -listldapservers' - hasinput=0, hasoutput=1 stderrbufsize=0"\\n00000001 2015-01-20 18:48:21.566 9557 9557 "configgen: Pipe: process 9559 complete 0"\\n\\nEnter the 'Fedora389' LDAP Admin User name on '192.168.1.200'...qadmin\\nEnter the LDAP Admin user 'qadmin' password...password\\n\\nReady to initialize HPCC LDAP Environment, using the following settings\\n\\tLDAP Server : 192.168.1.200\\n\\tLDAP Type : Fedora389\\n\\tHPCC Admin User : hadmin\\nProceed? y/n y\\n00000002 2015-01-20 18:48:36.951 9557 9557 "Added ldap server 192.168.1.200"\\n00000003 2015-01-20 18:48:36.953 9557 9557 "Queried 'dc=ql1,dc=colo', selected basedn 'ql1'"\\n00000004 2015-01-20 18:48:36.955 9557 9557 "LDAP bind error for user ql1\\\\qadmin with 32 - No such object. "\\n00000005 2015-01-20 18:48:36.955 9557 9557 "LDAP: sysuser bind failed - No such object"\\nERROR: Unable to create security manager : Connecting/authenticating to ldap server failed\\n\\n[root@pxe /]# /opt/HPCCSystems/bin/initldap\\n00000000 2015-01-20 18:49:53.652 9563 9563 "configgen: Creating PIPE program process : '/opt/HPCCSystems/sbin/configgen -env /etc/HPCCSystems/environment.xml -listldapservers' - hasinput=0, hasoutput=1 stderrbufsize=0"\\n00000001 2015-01-20 18:49:53.674 9563 9563 "configgen: Pipe: process 9565 complete 0"\\n\\nEnter the 'Fedora389' LDAP Admin User name on '192.168.1.200'...admin\\nEnter the LDAP Admin user 'admin' password...password\\n\\nReady to initialize HPCC LDAP Environment, using the following settings\\n\\tLDAP Server : 192.168.1.200\\n\\tLDAP Type : Fedora389\\n\\tHPCC Admin User : hadmin\\nProceed? y/n y\\n00000002 2015-01-20 18:50:03.043 9563 9563 "Added ldap server 192.168.1.200"\\n00000003 2015-01-20 18:50:03.044 9563 9563 "Queried 'dc=ql1,dc=colo', selected basedn 'ql1'"\\n00000004 2015-01-20 18:50:03.045 9563 9563 "LDAP bind error for user ql1\\\\admin with 32 - No such object. "\\n00000005 2015-01-20 18:50:03.045 9563 9563 "LDAP: sysuser bind failed - No such object"\\n\\nERROR: Unable to create security manager : Connecting/authenticating to ldap server failed\", \"post_time\": \"2015-01-21 00:17:30\" },\n\t{ \"post_id\": 6841, \"topic_id\": 1572, \"forum_id\": 14, \"post_subject\": \"Re: mydali won't start\", \"username\": \"Lee_Meadows\", \"post_text\": \"Issue found.\\n\\n It turned out to be a permissions issue. Some directories were owned by root with group hpcc. Not hpcc:hpcc.\\n\\n It didn't have anything to do with configmgr, setting myecl authentication to htpasswd. \\n\\n Once we fixed the owner issue, we were able to start all the services and spray in a file.\", \"post_time\": \"2015-01-23 14:06:24\" },\n\t{ \"post_id\": 6836, \"topic_id\": 1572, \"forum_id\": 14, \"post_subject\": \"mydali won't start\", \"username\": \"Lee_Meadows\", \"post_text\": \"Hi all.\\n\\n I'm working on a 10 node system, and started with just 1 node. First off, I don't have full root access, so I have to go to the admins to request specific commands that hpcc can execute as sudo. I also can not do screen shots or copy paste any info.\\n\\n I got my initial build installed and all my services started. I was able to go to the machine:8010 and submit some code in ecl playground. I couldn't spray any files in, due to permission error, so I thought that has to do with authentication.\\n\\n I run the configmgr and go to machine:8015 and select htpasswd, I created an htpasswd with a user "hpcc". I ctrl-c and get back to command line. I copied my new environment.xml file from source to /etc/HPCCSystems.\\n\\n On starting up the services, mydafilesrv starts, but them mydali pauses (about 5 seconds) and failes. Then all the other processes start OK. But machine:8010 won't respond. I can run configmgr again and get back to 8015.\\n\\n The log for mydali just shows the command submitted and the time, no other information.\\n\\nAny idea what might have happened?\\n\\nversion 5.0.4-1 el6\", \"post_time\": \"2015-01-22 21:55:42\" },\n\t{ \"post_id\": 6870, \"topic_id\": 1573, \"forum_id\": 14, \"post_subject\": \"Re: [solved] Admin page showing blank\", \"username\": \"mgardner\", \"post_text\": \"We're looking into that error https://track.hpccsystems.com/browse/HPCC-12948. Thanks for the heads up!\\n\\nEdit: Merged and fixed. You should no longer see that error building from an updated copy of our repo.\", \"post_time\": \"2015-01-28 18:34:29\" },\n\t{ \"post_id\": 6859, \"topic_id\": 1573, \"forum_id\": 14, \"post_subject\": \"Re: Admin page showing blank\", \"username\": \"Dimon\", \"post_text\": \"Hi Michael and Everyone,\\n\\nI've build with nodejs and I now can see the GUI at 8010. Awesome!!!\\nThe only issues I've found are 3 errors and some warnings during "make package". Looking into this:\\n\\n\\n...\\n...\\nwarn(216) dojo/has plugin resource could not be resolved during build-time. plugin resource id: touch?./_DnD-touch-autoscroll; reference module id: dgrid/extensions/DnD\\nwarn(224) A plugin dependency was encountered but there was no build-time plugin resolver. module: dgrid/extensions/DnD; plugin: xstyle/css\\nwarn(224) A plugin dependency was encountered but there was no build-time plugin resolver. module: dgrid/extensions/Pagination; plugin: xstyle/css\\nerror(311) Missing dependency. module: hpcc/viz/DojoD32DChart; dependency: ./DojoD3\\nerror(311) Missing dependency. module: hpcc/viz/DojoD32DChart; dependency: src/chart/MultiChartSurface\\nerror(311) Missing dependency. module: hpcc/viz/DojoD32DChart; dependency: d3/d3\\nstarting executing global optimizations...\\nstarting writing resources...\\nstarting cleaning up...\\nwaiting for the optimizer runner to finish...\\nstarting reporting...\\nReport written to /data/src/HPCC-Platform/build/esp/src/build/build-report.txt\\nProcess finished normally.\\n errors: 3\\n warnings: 75\\n build time: 144.424 seconds\\n
\\n\\nThanks!\\nDimon.\", \"post_time\": \"2015-01-27 05:29:04\" },\n\t{ \"post_id\": 6854, \"topic_id\": 1573, \"forum_id\": 14, \"post_subject\": \"Re: Admin page showing blank\", \"username\": \"mgardner\", \"post_text\": \"You're attempting to run the 5.2 release that we're about to put out right? make sure you install nodejs.\\n\\nsudo pacman -S nodejs
\\n\\nDocumentation can be found here at https://github.com/joyent/node/wiki/Ins ... arch-linux.\\n\\nIt's a new build dependency for 5.2 and we have documented it under prerequisites at https://github.com/hpcc-systems/HPCC-Pl ... lding-HPCC\\n\\nTry rebuilding with nodejs and give me a heads up if there are any issues.\\n\\nMichael\", \"post_time\": \"2015-01-26 13:53:21\" },\n\t{ \"post_id\": 6843, \"topic_id\": 1573, \"forum_id\": 14, \"post_subject\": \"Re: Admin page showing blank\", \"username\": \"gsmith\", \"post_text\": \"I was just wondering if we supported "make install" currently - I suspect you just answered my question...\\n\\nYou need to "make package" and then dpkg -i "name of package" (Assuming your on Ubuntu or such like).\\n\\nGordon.\", \"post_time\": \"2015-01-23 17:02:05\" },\n\t{ \"post_id\": 6838, \"topic_id\": 1573, \"forum_id\": 14, \"post_subject\": \"[solved] Admin page showing blank\", \"username\": \"Dimon\", \"post_text\": \"Hi Everyone!\\n\\nI've installed HPCC from source via "make install", started single server HPCC stack (thanks to HPCC team) - so far so good. But when I navigate admin url: http://localhost:8010 I see blank screen. I went into FF debugging mode and figured not all resources were presented for web service and some URLs were giving 404 code. So I had to manually copy them from the source tree:\\n\\ncd /opt/HPCCSystems/componentfiles/files\\ncp -r /data/src/HPCC-Platform/esp/src/eclwatch .\\ncp -r /data/src/HPCC-Platform/esp/src/dijit .\\ncp -r /data/src/HPCC-Platform/esp/src/dojo .\\ncp -r /data/src/HPCC-Platform/esp/src/dojox .\\ncp -r /data/src/HPCC-Platform/esp/src/dgrid .\\ncp /data/src/HPCC-Platform/esp/src/eclwatch/nls/sr/hpcc.js eclwatch/\\ncp -r /data/src/HPCC-Platform/esp/src/put-selector .\\ncp -r /data/src/HPCC-Platform/esp/src/xstyle .\\n
\\n\\nThis gave me little progress and I now see loading bar (png attached) and that's all. Am I missing some other steps? From what see it looks like "make install" is broken in HPCC. \\n\\nThanks!\\nDimon.\", \"post_time\": \"2015-01-23 02:39:05\" },\n\t{ \"post_id\": 9332, \"topic_id\": 1574, \"forum_id\": 14, \"post_subject\": \"Re: Terasort Benchmark\", \"username\": \"Ignacio\", \"post_text\": \"Hi all, \\n\\nNow that I saw some improvements in the SORT function for the upcoming version 6.0\\nhttps://hpccsystems.com/resources/blog/ ... 600-beta-2\\n\\nIt reminds my I've always been curious to know whether we have recent about the original paper :\\nhttp://cdn.hpccsystems.com/pdf/terasort_results.pdf\\nRelated to the know sort benchmark. \\nhttp://sortbenchmark.org/\\n\\nWhen I talk to people about HPCC (meetups, potential hires, etc.) they often ask me about how performant it is compared with other BigData technologies, and sometimes they explicitly mention that benchmark. \\n\\nI wonder whether as a community we could come up with a verifiable up to date result on one of the different categories, maybe on MinuteSort (Amount of data that can be sorted in 60.00 seconds or less), now that TeraByte Sort looks like it was deprecated.\\n\\nI feel this would be an awesome way to give evidence of the power of HPCC, both externally (external professionals who may be interested on it) and people from the community (internal engagement).\", \"post_time\": \"2016-03-15 09:19:06\" },\n\t{ \"post_id\": 7177, \"topic_id\": 1574, \"forum_id\": 14, \"post_subject\": \"Re: Terasort Benchmark\", \"username\": \"Lee_Meadows\", \"post_text\": \"How much ram to allocate:\\n To do all of the sort job in memory, without spilling to disk for intermediate steps, you need to have enough ram for 2x the file part (1TB/7 * 2). 350 GB ram allocated was enough for me on a box with 512GB ram. To calculate the minimum amount of ram is kind of guess work, but I ran the job while I've been typing this and watching my top output. Each slave process was using about 7.2 GB (27 slaves per node), so that would ~200 GB per node. Your results may differ. \\n\\n For your disk space, compressed data assume compression factor of .80, for just your file data, .2TB / 7 slaves * 2 copies * 2 (unsorted|sorted). So, your 300GB is more than enough. \\n\\n For your master, 2TB is fine as long as you delete your original unsorted 1TB file after you spray it onto the cluster, otherwise, you'll run out of space when you despray your sorted file back out. (assuming your original 1TB file is uncompressed)\\n\\nIt took 12 minutes and 7 seconds to sort the 1TB file and write out the file parts using 27 slaves per node (189 file parts total). My nodes have 32 cpu. \\n\\nI'd be curious to hear your setup and results. Good luck!\", \"post_time\": \"2015-03-24 11:14:17\" },\n\t{ \"post_id\": 7175, \"topic_id\": 1574, \"forum_id\": 14, \"post_subject\": \"Re: Terasort Benchmark\", \"username\": \"kps_mani\", \"post_text\": \"I have a 1 TB file which needs to be processed. I am looking for guidance on choosing the proper cluster setup. Hence, I have posted this question. Please help on determining the cluster size along with memory.\\n\\nI have got the 1 TB file with me. I would like to get the sorted output for 1 TB. \\n\\nHere is what I am thinking of having my Thor cluster.\\n\\n7 Node Cluster - 500 GB EBS on each\\n1 Master Node - 2 TB EBS on it\\n\\nI am planning to use the Compression option while uploading the file to Master node and spraying across the cluster. After sorting, I would like to de-spray it from Cluster and get the output in the Master Node.\\n\\nI am planning to keep replication of 2 across the cluster.\\n\\nIs the above configuration good enough for doing the exercise?\\n\\nRegards,\\nSubbu\", \"post_time\": \"2015-03-23 19:44:49\" },\n\t{ \"post_id\": 7172, \"topic_id\": 1574, \"forum_id\": 14, \"post_subject\": \"Re: Terasort Benchmark\", \"username\": \"Lee_Meadows\", \"post_text\": \"You shouldn't have to upload a file.\\n\\nJust run the generate ecl code and it will write out each file part(s) to the slave nodes. Then run your sort and output.\\n\\nSpraying in a 32 GB file into a 7 slave node cluster with 27 slaves/node with compression turned ON took 5m:53sec. \\n\\nCreating a 1 TB file (not spraying it), on the same setup only takes 6m:14sec with compression turned OFF.\", \"post_time\": \"2015-03-23 15:57:38\" },\n\t{ \"post_id\": 7171, \"topic_id\": 1574, \"forum_id\": 14, \"post_subject\": \"Re: Terasort Benchmark\", \"username\": \"Lee_Meadows\", \"post_text\": \"I just requested it, doesn't mean they'll do it
\", \"post_time\": \"2015-03-23 15:48:05\" },\n\t{ \"post_id\": 7169, \"topic_id\": 1574, \"forum_id\": 14, \"post_subject\": \"Re: Terasort Benchmark\", \"username\": \"kps_mani\", \"post_text\": \"Thanks Lee for taking this up. I have got the 1 TB file with me. I would like to get the sorted output for 1 TB. \\n\\nHere is what I am thinking of having my Thor cluster.\\n\\n7 Node Cluster - 500 GB EBS on each\\n1 Master Node - 2 TB EBS on it\\n\\nI am planning to use the Compression option while uploading the file to Master node and spraying across the cluster. After sorting, I would like to de-spray it from Cluster and get the output in the Master Node.\\n\\nI am planning to keep replication of 2 across the cluster.\\n\\nIs the above configuration good enough for doing the exercise?\\n\\nRegards,\\nSubbu\", \"post_time\": \"2015-03-23 15:22:31\" },\n\t{ \"post_id\": 7165, \"topic_id\": 1574, \"forum_id\": 14, \"post_subject\": \"Re: Terasort Benchmark\", \"username\": \"Lee_Meadows\", \"post_text\": \"Subbu, \\n\\n It should only cost about $700 to test out.\\n\\nDatabricks used 207 VMs at a cost of $6.820 per Hour, for a half hour.\\n\\n On my testing of Spark, on same machines as HPCC, using 7 nodes each with 27 containers, HPCC is faster than Spark on a 1TB file.\\n\\n I've requested from LN to do the test on the same nodes that databricks used and publish the results. If I had the space to do a 100TB file, I'd do it.
\", \"post_time\": \"2015-03-23 11:09:22\" },\n\t{ \"post_id\": 7163, \"topic_id\": 1574, \"forum_id\": 14, \"post_subject\": \"Re: Terasort Benchmark\", \"username\": \"kps_mani\", \"post_text\": \"Hi Mark,\\nThere is claim of 100 TB sort by 24 Mins in the 207 Node cluster for Apache Spark. It holds the current record of Tera Byte benchmark. Do we have any such benchmark done in HPCC to prove that we hold the highest record in Terabyte sorting?\\n\\nRegards,\\nSubbu\", \"post_time\": \"2015-03-22 17:03:19\" },\n\t{ \"post_id\": 6937, \"topic_id\": 1574, \"forum_id\": 14, \"post_subject\": \"Re: Terasort Benchmark\", \"username\": \"mkellyhpcc\", \"post_text\": \"Dimon, hi\\n\\nTo try and answer your previous post questions:\\n\\n1). There is a setting in the environment.xml file called "maxActivityCores" that when placed in the ThorCluster section should limit the number of cores used to less than the number of physical cores during graph activities.\\n\\n2). You can adjust thor master and slave memory usage with the "masterMemoryLimit" (in MB, for thor master) and "globalMemoryLimit" (in MB, for thor slaves [and master if masterMemoryLimit is not specified]) settings in the environment.xml file in the ThorCluster section. There are some allocations outside of these settings, but these can impact the sort data size.\\n\\n3). Yes using huge pages can help performance. A larger memory system/config will show a bigger gain in performance with HP. There is a setting in the environment.xml called "heapUseHugePages" in the ThorCluster section that can be set to true but also the OS needs to be configured to allow for huge page allocations (this step is no required in upcoming releases!) with sysctl.conf settings:\\n\\n vm.nr_hugepages = 128\\n vm.nr_overcommit_hugepages = YYYYYY # max setting for your config\\n\\nYou can check huge page usage from the /proc/meminfo values while the sort is running.\\n\\nMake sure to copy any updated environment.xml to all nodes (you can use the hpcc-*.sh convenience scripts for this) before starting HPCC.\\n\\nGreat to hear of your success with HPCC, let us know in the future how we can help out in any way.\\n\\nthanks,\\nmark\", \"post_time\": \"2015-02-10 16:43:46\" },\n\t{ \"post_id\": 6935, \"topic_id\": 1574, \"forum_id\": 14, \"post_subject\": \"Re: Terasort Benchmark\", \"username\": \"Dimon\", \"post_text\": \"I'd like to report on terasort result on using Hadoop. It's not great... It's awful...\\n\\nOk, so I installed latest hadoop on the same cluster as HPCC. Compiled it from source. Time and effort wise Hadoop takes much longer time to install:\\n- you need jdk 7\\n- you need to configure 5 xml files and 2 shell scripts with environment variables.\\n- you need to manually setup ssh host equivalence.\\n- running java commands to manage hdfs is weird and not intuitive.\\n\\nAfter all is set and ready I've used commands from hadoop tutorial to prepare:\\n\\n$ time hadoop jar hadoop-*examples*.jar teragen 10000000 /tera10m\\n$ time hadoop jar hadoop-*examples*.jar teragen 100000000 /tera100m\\n$ time hadoop jar hadoop-*examples*.jar teragen 1000000000 /tera1000m\\n\\nand to run terasort:\\n\\n$ time hadoop jar hadoop-*examples*.jar terasort /tera10m /teraout10m\\n$ time hadoop jar hadoop-*examples*.jar terasort /tera100m /teraout100m\\n\\nResults:\\n\\n
\\nrows pre time (s)\\tsort time (s)\\n10mln 26\\t62.8\\n100mln 97.6 >780\\n1bln 851\\t???\\n
\\n\\nThe first test run just ok, while second one actually never finished. It was spilling java stack traces and I've noticed the yarn manager was constantly re-spawning the failed jobs, eventually running out of space HDFS partition. Below is a portion of the log for curious... What I can say, I'm glad and thankful that HPCC open sourced their platform. It indeed beats Hadoop up by a wide margin.It is much more stable, much more easier to setup and use. And what a pleasure it is not to read pages of Java stack traces! That last error looks pretty strange to me consider that all storage across all 10 nodes is 27TB out of which 25TB is free Yet, an "Out of space" error.I decided not to troubleshoot it and had stopped right there - time is most precious resource...\\n\\nHPCC Team, thank you so much for [color=#FF0000:122p161a]such a great product!!! It is clearly a winner (at least for me) on performance and manageability compare to Hadoop. Lets continue working it out, add features to it and beat the hell out of Hadoop and Spark for that matter! Great, awesome job!\\n\\nDimon.\\n\\n
\\n15/02/10 00:52:22 INFO mapreduce.Job: map 100% reduce 29%\\n15/02/10 00:52:25 INFO mapreduce.Job: map 100% reduce 31%\\n15/02/10 00:52:28 INFO mapreduce.Job: map 100% reduce 100%\\n15/02/10 00:52:30 INFO mapreduce.Job: Task Id : attempt_1423545537643_0006_r_000000_2000, Status : FAILED\\nError: org.apache.hadoop.mapreduce.task.reduce.Shuffle$ShuffleError: error in shuffle in OnDiskMerger - Thread to merge on-disk map-outputs\\n at org.apache.hadoop.mapreduce.task.reduce.Shuffle.run(Shuffle.java:134)\\n at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:376)\\n at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:169)\\n at java.security.AccessController.doPrivileged(Native Method)\\n\\n\\n15/02/10 00:55:37 INFO mapreduce.Job: map 100% reduce 31%\\n15/02/10 00:55:40 INFO mapreduce.Job: Task Id : attempt_1423545537643_0006_r_000000_2002, Status : FAILED\\nError: org.apache.hadoop.mapreduce.task.reduce.Shuffle$ShuffleError: error in shuffle in OnDiskMerger - Thread to merge on-disk map-outputs\\n at org.apache.hadoop.mapreduce.task.reduce.Shuffle.run(Shuffle.java:134)\\n at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:376)\\n at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:169)\\n at java.security.AccessController.doPrivileged(Native Method)\\n at javax.security.auth.Subject.doAs(Subject.java:415)\\n at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1669)\\n at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:164)\\nCaused by: org.apache.hadoop.fs.FSError: java.io.IOException: No space left on device\\n at org.apache.hadoop.fs.RawLocalFileSystem$LocalFSFileOutputStream.write(RawLocalFileSystem.java:248)\\n at java.io.BufferedOutputStream.flushBuffer(BufferedOutputStream.java:82)\\n
\", \"post_time\": \"2015-02-10 06:25:15\" },\n\t{ \"post_id\": 6933, \"topic_id\": 1574, \"forum_id\": 14, \"post_subject\": \"Re: Terasort Benchmark\", \"username\": \"Dimon\", \"post_text\": \"Hi Mark,\\n\\nThanks! Great points. Few questions:\\n\\n1. Do we have control on how many threads each thor slave can spawn?\\n2. Do we have control on memory size for thor slave?(From what I got on reading hpcc jira, you removed this option).\\n3. Does it makes sense use huge pages? Since kernel allocates so much memory and having 4KB page is going carry an overhead in managing all those pages. If answer is yes,then how?\\n\\nThanks,\\nDimon.\", \"post_time\": \"2015-02-09 22:28:21\" },\n\t{ \"post_id\": 6928, \"topic_id\": 1574, \"forum_id\": 14, \"post_subject\": \"Re: Terasort Benchmark\", \"username\": \"mkellyhpcc\", \"post_text\": \"Hi,\\n\\nI am back from a work trip, sorry for my delay.\\nI would probably suggest one or at most two thor slaves per node. Thor slaves already sort in parallel using threads and having fewer streams competing for I/O BW on each node should help. Adding nodes (memory) always helps to fit more of the entire dataset into memory and reduce I/O.\\n\\nthanks,\\nmark\", \"post_time\": \"2015-02-09 19:24:51\" },\n\t{ \"post_id\": 6886, \"topic_id\": 1574, \"forum_id\": 14, \"post_subject\": \"Re: Terasort Benchmark\", \"username\": \"Dimon\", \"post_text\": \"Hi Lee!\\n\\nI see where I'm wrong now... Thanks for pointing out that important difference 100GB vs 1000GB! Seems like my numbers are actually more or less correct and withing the bucket of other people. 1TB sort in 98min... on 10 node cluster vs 22 minutes on 20 nodes. This is certainly comparable. I'll continue my testing.\\n\\nAn update on my tests:\\n\\nrows prep (s) sort(s)\\n------- -------- -------\\n10mln: 2.9\\t 3.3\\n100mln: 5.1\\t 18.5\\n1bln: 39.4\\t 173\\n
\\nP.S. I cheated by disabling thor mirroring, but considering that all is run on md RAID0, I think this is not bad. I'm happy now! \\n\\nThanks,\\nDimon.\", \"post_time\": \"2015-02-02 21:17:27\" },\n\t{ \"post_id\": 6885, \"topic_id\": 1574, \"forum_id\": 14, \"post_subject\": \"Re: Terasort Benchmark\", \"username\": \"Lee_Meadows\", \"post_text\": \"I'm not on the HPCC Team, but the test they did that was in 98 seconds was 100GB, not a terabyte.\\n\\nSo where you have:\\n24 hdds x 120MB/sec = 2.8GB/sec\\n1024GB of data / 2.8GB/sec = 365 seconds just to read the data.\\nit should be\\n100GB of data /2.8GB/sec = 35.8 seconds just to read the data.\\n\\nFor my benchmarking, I started off generating a 10,000 record file (1MB) and doubling the file size each run.\\n\\nI can't give specs on my machines, but I do have more memory per node than they did, and I have 7 thorslaves each with 27 processes (189 threads)\\n\\nIt takes 28 seconds for me to create a test file of 131GB (1,310,720,000 records) and 99.688 seconds to sort.\\n\\nMy 524GB file took 129 seconds to create and 769 seconds to sort.\\n\\nGood luck with your testing.\", \"post_time\": \"2015-02-02 20:46:39\" },\n\t{ \"post_id\": 6884, \"topic_id\": 1574, \"forum_id\": 14, \"post_subject\": \"Re: Terrasort Benchmark\", \"username\": \"Dimon\", \"post_text\": \"Thanks Mark!\\n\\nI've run two Terrasort benchmarks and so far it seems I can't claim great performance (yet!). I knew it won't be easy
\\n\\nHere is my configuration: 10 nodes HPCC cluster!\\n\\nEach node: \\nDual Xeon Quad-Core L5420 2.5ghz 16GB\\n8 full cores\\n4 HDD 7.5K rpm Western Digital 750GB\\n2 1Gbps NICs\\n\\nhdparm -tT /dev/sd[a-b] gives 90-110MB/sec single drive I/O throughput \\nhdparm -tT /dev/md1 gives 250MB/sec when hdds configured as RAID5/RAID0\\n\\nAll nodes connected to TP LINK TL-SG2216 16 Port Gig Smart Switch, which gives 110MB/sec thoughput during single file transfer between two nodes. According to manufacturer site this device gives 32Gbps backplane throughput or 4 GB/sec theoretical - More then enough to handle storage load from all 10 servers.\\n\\nI've run two tests with different storage and thor slaves configuration, for both tests generating dataset (terasortprep.ecl) took about 3 minutes.\\n\\nOS Settings:\\n\\nnoatime is set on all HDD filesystems and deadline scheduler is used for each HDD:\\n\\n
# cat /etc/fstab\\n# /dev/sda1 LABEL=root\\nUUID=e44279d6-70e4-40b7-a359-94e05576ac52 / ext4 rw,noatime,data=ordered 0 1\\n# /dev/sda2 LABEL=sda2\\nUUID=3c2515d8-7e4c-453e-984e-058654c631ef /sda2 ext4 rw,noatime,data=ordered 0 2\\n# /dev/md1 LABEL=data\\nUUID=196ecc51-68da-40cf-b5a2-5870ed096b46 /data ext4 rw,noatime,stripe=48,data=ordered 0 2\\n\\n# echo deadline > /sys/block/sda/queue/scheduler\\n# echo deadline > /sys/block/sdb/queue/scheduler\\n# echo deadline > /sys/block/sdc/queue/scheduler\\n# echo deadline > /sys/block/sdd/queue/scheduler\\n
\\n\\nTest #1: All thor files are on the same RAID5 partition.\\n\\t 9 Thor nodes, 4 Thor slaves per node.\\n\\nStorage setup: HDDs were partitioned into 3GB and 740GB partitions and configured as Linux md1/2 RAID5s.\\n\\nmdadm --create --verbose --level=5 --metadata=1.2 --chunk=256 --raid-devices=4 /dev/md1 /dev/sda1 /dev/sdb1 /dev/sdc1 /dev/sdd1\\nmdadm --create --verbose --level=5 --metadata=1.2 --chunk=256 --raid-devices=4 /dev/md2 /dev/sda2 /dev/sdb2 /dev/sdc2 /dev/sdd2\\n\\n/dev/md1\\t/ \\tpartition: 12GB\\t\\t\\n/dev/md2\\t/data\\tpartition: 740GB for thor temp and spray area\\t\\n\\n# time ecl run terasort.ecl --target=thor --server=.\\n\\n<Result>\\n<Dataset name='Result 1'>\\n</Dataset>\\n</Result>\\n\\nreal 96m37.476s\\nuser 0m0.093s\\nsys 0m0.027s
\\n\\nObservations during test:\\n1. 600 - 1100 iops per node, 9000 peak iops per cluster, - impressive!\\n2. 110 - 200 MB/sec combined (R+W) throughput per node, 1.8GB/sec peak - impressive! \\n3. /dev/sda,b,c,d utilization: 82%-90% await: 28-40 ms <-- this is too high\\n4. HPCC does sorting in three phases:\\n a) first the sprayed 1TB file is read into about 15 1.1GB chunks into thor temp area on each node: \\n\\t <Category dir="/data/data/hpcc/[NAME]/[INST]/temp" name="temp"/>\\n...\\n...\\n-rw-r--r-- 1 hpcc hpcc 1119784639 Feb 1 20:53 thtmp2219_16__srtspill_3.tmp\\n-rw-r--r-- 1 hpcc hpcc 1120811083 Feb 1 20:55 thtmp2219_17__srtspill_3.tmp\\n-rw-r--r-- 1 hpcc hpcc 295295281 Feb 1 20:55 thtmp2219_18__srtspill_3.tmp\\n\\n b) Then these chunks are read into srtmrg.tmp file:\\n...\\n...\\n-rw-r--r-- 1 hpcc hpcc 1119784639 Feb 1 20:53 thtmp2219_16__srtspill_3.tmp\\n-rw-r--r-- 1 hpcc hpcc 1120811083 Feb 1 20:55 thtmp2219_17__srtspill_3.tmp\\n-rw-r--r-- 1 hpcc hpcc 295295281 Feb 1 20:55 thtmp2219_18__srtspill_3.tmp\\n-rw-r--r-- 1 hpcc hpcc 17303601152 Feb 1 22:09 thtmp2219_19__srtmrg.tmp\\t<-- This file created\\n\\n c) strmrg.tmp files merged back into data area\\n\\n5. Why such a good I/O didn't translate to better sorting times?\\n\\nAfter this test observing high await times, I though it would be good idea to separate temp area from data area like so:\\n\\n <Directories name="HPCCSystems">\\n <Category dir="/var/log/[NAME]/[INST]" name="log"/>\\n <Category dir="/var/lib/[NAME]/[INST]" name="run"/>\\n <Category dir="/etc/[NAME]/[INST]" name="conf"/>\\n <Category dir="/sda2/data/hpcc/[NAME]/[INST]/temp" name="temp"/>\\t\\t<-- different physical disk\\n <Category dir="/data/data/hpcc/[NAME]/hpcc-data/[COMPONENT]" name="data"/>\\t<-- Sits on RAID0 on remaining 3 HDDs\\n <Category dir="/data/data/hpcc/[NAME]/hpcc-data2/[COMPONENT]" name="data2"/>\\n <Category dir="/data/data/hpcc/[NAME]/hpcc-data3/[COMPONENT]" name="data3"/>\\n <Category dir="/var/lib/[NAME]/hpcc-mirror/[COMPONENT]" name="mirror"/>\\n <Category dir="/var/lib/[NAME]/queries/[INST]" name="query"/>\\n <Category dir="/var/lock/[NAME]/[INST]" name="lock"/>\\n </Directories>
\\n\\nAnd apparently this was bad idea!\\n\\nTest #2: Thor Temp area is on separate single drive: /dev/sda2, spraying area is RAID0 over remaining 3 drives:\\n\\t 9 Thor nodes, 6 Thor slaves per node.\\n\\nStorage setup:\\n\\nmdadm --create --verbose --level=0 --metadata=1.2 --chunk=64 --raid-devices=3 /dev/md1 /dev/sdb1 /dev/sdc1 /dev/sdd1\\n\\n/dev/sda1\\t/ partition\\n/dev/sda2\\t/sda2 partition for thor temp area\\n/dev/md1\\t/data partition for thor spray area\\n\\n# time ecl run terasort.ecl --target=thor --server=.\\n\\nSOAP rpc error[errorCode = -6 message = timeout expired\\nTarget: C!192.168.1.101, Raised in: /data/src/HPCC-Platform/system/jlib/jsocket.cpp, line 1600\\n]\\n\\nreal 120m0.761s\\nuser 0m0.103s\\nsys 0m0.020s\\n# echo $?\\n2
\\n\\nObservations:\\n1. /dev/sda2 utilization: 90%-100%\\n2. await: 400-800 ms <-- this is way too high\\n3. Step b) performed on a single HDD, and this totally killed performance.\\n4. Error during execution. Test didn't finish.\\n\\nMy questions to HPCC team: \\n\\nYou run your test: http://cdn.hpccsystems.com/pdf/terasort_results.pdf on 4 node cluster, each node had 6 HDDs, which gave you 4x6x150iops = 3600 IOPS for cluster.\\nYou had 6 x 4 = 24 cores or 48 threads in the cluster\\n\\nIn my test I have 10 nodes, 4 HDD per node, which gives me 10 x 4 x 150iops = 6000 IOPS per cluster.\\nIn my test I have 10 x 8 = 80 full cores in the cluster.\\n\\nWhy I don't see 98 seconds run time for sorting? I don't even think 98 seconds is achievable on the configuration mentioned in this PDF, because assuming there are no processing overhead and no network latency overhead in the processing pipeline and we do the sort in one pass while reading 1TB of data it comes to:\\n\\n24 hdds x 120MB/sec = 2.8GB/sec\\n1024GB of data / 2.8GB/sec = 365 seconds just to read the data. Double that to account for writing it back to storage and it comes to 730 seconds. This is an ideal number, the real one would be 2-3 times more.\\n\\nSo either your HDDs in this test were 7.5 times faster or numbers are incorrect.\\nHPCC team, can you please look at this article and either correct it or provide exact specs for your hardware in this test?\\nCan you also help me figuring out where I do wrong in my test? I'm not shooting for 98 seconds, but for 30 minutes should do it taking into account IO, CPU and network overheads.\\n\\nSorry for the long post!\\n\\nThanks!!!\\nDimon.\", \"post_time\": \"2015-02-02 05:07:09\" },\n\t{ \"post_id\": 6842, \"topic_id\": 1574, \"forum_id\": 14, \"post_subject\": \"Re: Terrasort Benchmark\", \"username\": \"mkellyhpcc\", \"post_text\": \"Hi,\\n\\nThe source dir HPCC-Platform/testing/benchmarks/ecl contains files:\\n\\nterasortprep.ecl\\nterasort.ecl\\n\\nwhich we can send to you directly. Run the prep first and then the sort ecl. Use thor. The number of thor slaves per node and memory per slave are very important, as is the file system configuration/performance (number of spindles, etc.)\\n\\nthanks,\\nmark\", \"post_time\": \"2015-01-23 16:40:31\" },\n\t{ \"post_id\": 6839, \"topic_id\": 1574, \"forum_id\": 14, \"post_subject\": \"Terasort Benchmark\", \"username\": \"Dimon\", \"post_text\": \"I'm positing this on installation because this seems to be where most people are... I would like to repeat Terasort benchmark:\\n\\nhttp://hpccsystems.com/Why-HPCC/HPCC-vs-Hadoop\\nhttp://cdn.hpccsystems.com/pdf/terasort_results.pdf\\n\\nand would appreciate if someone points me out for details how to do this properly. I plan to run it on 10 nodes HPCC system. What I'm looking for is \\n- the data source\\n- specifics on HPCC configuration (ex: how many Thor and Roxies nodes, etc)\\n- specifics on Linux configuration besides ext4 parameters.\\n- examples of those 4 lines of ECL code.\\n- any other information or online resource how to set HPCC and this test case.\\n\\nI'd appreciate if someone either from hpccsystems or from the community would help me on this. I will post results on this forum. \\n\\nThanks!\\nDimon.\", \"post_time\": \"2015-01-23 06:26:19\" },\n\t{ \"post_id\": 6874, \"topic_id\": 1583, \"forum_id\": 14, \"post_subject\": \"Re: Questions on components interaction\", \"username\": \"rtaylor\", \"post_text\": \"Emmanuel,
How is data actually moved between nodes?
I've been told that this is one of those things where there is one answer today, but there will be a different answer in the near future as the infrastructure will be changing to eliminate ECL Agent altogether.look at the cmake dependencies when compiling the code, you can see that memcached is an optional dependency
I've been told that this is feature that will be added in the 5.2 release.\\n\\nHTH, \\n\\nRichard\", \"post_time\": \"2015-01-28 21:41:33\" },\n\t{ \"post_id\": 6872, \"topic_id\": 1583, \"forum_id\": 14, \"post_subject\": \"Re: Questions on components interaction\", \"username\": \"eamaro\", \"post_text\": \"With that said, your real answer to these kinds of questions can be found in the platform source code, available for download from GitHub (https://github.com/hpcc-systems/HPCC-Platform).
\\n\\nI understand. However, I was looking to get some context before diving into the source.\\n\\nFor global operations, data is automatically moved between nodes by the cluster infrastructure as needed by the specific activity.
\\n\\nHow is data actually moved between nodes?\\n\\nCan you please tell me where you found this information? I'd like to understand the context before answering.
\\n\\nWell, if you look at the cmake dependencies when compiling the code, you can see that memcached is an optional dependency. A google search also returned this: https://track.hpccsystems.com/browse/HPCC-12766.\\n\\nI guess I'll also try on the dev-mail list.\\n\\nThanks,\\nEmmanuel\\nGeorgia Tech\", \"post_time\": \"2015-01-28 19:20:05\" },\n\t{ \"post_id\": 6869, \"topic_id\": 1583, \"forum_id\": 14, \"post_subject\": \"Re: Questions on components interaction\", \"username\": \"rtaylor\", \"post_text\": \"Emmanuel,In [1], rtaylor briefly explains the Thor query execution flow. I am specifically interested in the step “ECL Agent picks up the compiled workunit and passes it on to the targeted Thor cluster”. Can you provide more details on how this works? For example, assuming the “master” node of the Thor cluster is the one that first receives the work unit from the ACL agent: Which is the component listening on incoming requests on the “master” node? Furthermore, which is the component listening on the “slave” nodes of the Thor cluster, for incoming requests from the “master” node? Is a particular protocol used to send/receive requests from ACL Agent->Master node->Slave node?
When I get in a car to drive somewhere, as long as the user interface is standard (steering wheel, accelerator, brakes, etc.), it is irrelevant to me whether that car's motive power is a 4-cylinder, Straight-6, V-8, Wankel, or battery-operated Electric -- they will all get me from point A to point B and my driving will be the same. \\n\\nI know the developers of this platform well. Therefore, I also know that, within the HPCC infrastructure code, whatever mechanism is used today will be replaced when a better mechanism comes along. And I know that that infrastructure change will be transparent to me (like switching out the 4-cylinder for a V-8), and if I notice the change at all it will just be the improved performance. So I don't "need to know" exactly how it does what it does, I just need to know how to "drive" it. For me, ECL is my user interface (steering wheel, accelerator, brakes, etc.) that I use to "drive" my data from point A to point B.\\n\\nWith that said, your real answer to these kinds of questions can be found in the platform source code, available for download from GitHub (https://github.com/hpcc-systems/HPCC-Platform).For this second question, please keep in mind that I am not very familiar with ECL.
That circumstance is easily changed. We have FREE online eLearning courses available here: http://learn.lexisnexis.com/hpcc They're self-paced and provide a good overview of the fundamental syntax and concepts of the ECL/HPCC platform.
In [2], it is mentioned that “Processing steps defined in an ECL job can specify local (data processed separately on each node) or global (data is processed across all nodes) operation”. Are there specific semantics in ECL to specify whether the work unit that is sent to each slave node, will only be concerned about data available locally? If not, how is this determined?
On the HPCC platform, unlike Hadoop, a workunit always executes on every node in the cluster. Specific activities within the workunit may be specified for global (the default) or local (using the LOCAL option) operation. For global operations, data is automatically moved between nodes by the cluster infrastructure as needed by the specific activity. \\n\\nThe LOCAL option on an activity tells the compiler that the ECL programmer knows that each node will already have the data it needs to do its work on that activity separately and independently. That could be because the programmer has used the DISTRIBUTE function to re-distribute the data across the nodes, or that the programmer understands that the LOCAL operation results will be globally aggregated in a subsequent activity.Finally, I noticed HPCC supports memcached as a plugin, but I couldn’t find any documentation about it. Can you mention where the memcached sits in the flow, and which components it interacts with?
Can you please tell me where you found this information? I'd like to understand the context before answering.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-01-28 15:22:56\" },\n\t{ \"post_id\": 6867, \"topic_id\": 1583, \"forum_id\": 14, \"post_subject\": \"Questions on components interaction\", \"username\": \"eamaro\", \"post_text\": \"Hello,\\n\\nI tried to find the right sub-forum to ask these questions - I hope I did decent job.\\n\\nIn [1], rtaylor briefly explains the Thor query execution flow. I am specifically interested in the step “ECL Agent picks up the compiled workunit and passes it on to the targeted Thor cluster”. Can you provide more details on how this works? For example, assuming the “master” node of the Thor cluster is the one that first receives the work unit from the ACL agent: Which is the component listening on incoming requests on the “master” node? Furthermore, which is the component listening on the “slave” nodes of the Thor cluster, for incoming requests from the “master” node? Is a particular protocol used to send/receive requests from ACL Agent->Master node->Slave node?\\n\\nFor this second question, please keep in mind that I am not very familiar with ECL. In [2], it is mentioned that “Processing steps defined in an ECL job can specify local (data processed separately on each node) or global (data is processed across all nodes) operation”. Are there specific semantics in ECL to specify whether the work unit that is sent to each slave node, will only be concerned about data available locally? If not, how is this determined? If a work unit needs global data, how is the data transferred between nodes? Suppose a work unit is executed with only local data on slave nodes, does the master node aggregate the results at the end, and then sends them back to the ACL agent?\\n\\nFinally, I noticed HPCC supports memcached as a plugin, but I couldn’t find any documentation about it. Can you mention where the memcached sits in the flow, and which components it interacts with?\\n\\n[1] http://hpccsystems.com/bb/viewtopic.php?f=14&t=471\\n[2] http://hpccsystems.com/Why-HPCC/features\\n\\nThank you,\\nEmmanuel\\nGeorgia Tech\", \"post_time\": \"2015-01-28 05:08:45\" },\n\t{ \"post_id\": 7022, \"topic_id\": 1603, \"forum_id\": 14, \"post_subject\": \"Re: Ports\", \"username\": \"David Dasher\", \"post_text\": \"Hi Jim\\n\\nThat's really helpful. Thanks\\n\\nDavid\", \"post_time\": \"2015-02-25 14:58:25\" },\n\t{ \"post_id\": 7000, \"topic_id\": 1603, \"forum_id\": 14, \"post_subject\": \"Re: Ports\", \"username\": \"JimD\", \"post_text\": \"David,\\n\\nThese are the default ports you may need to open for external access:\\n\\n8010 (ECL Watch)\\n8002 (WsECL)\\n8015 (Config Manager)\\n9876 (Direct access to Roxie)\\n8510 (WsSQL, if installed)\\n\\nRemember that these are configurable, so if you add or change one in Config Manager, you'll need to remember to allow access.\\n\\nYou should not need to access Dali over port 7070 externally. \\n\\nFor inter-node communication, you can just open all ports (1-65535). This ensures slaves can communicate with each other and with Dali and other system servers. I assume this can be limited to a group of nodes, so it shouldn't be an issue. In AWS, we did this using Security groups. \\n\\nHTH,\\n\\nJim\", \"post_time\": \"2015-02-19 21:12:56\" },\n\t{ \"post_id\": 6996, \"topic_id\": 1603, \"forum_id\": 14, \"post_subject\": \"Re: Ports\", \"username\": \"David Dasher\", \"post_text\": \"Thanks for the info\\n\\nHow does the destination roxie that you are deploying the package to get the indexes from the source Dali? what ports need to be open for that transfer to happen\\n\\nDavid\", \"post_time\": \"2015-02-19 19:27:01\" },\n\t{ \"post_id\": 6993, \"topic_id\": 1603, \"forum_id\": 14, \"post_subject\": \"Re: Ports\", \"username\": \"sort\", \"post_text\": \"FYI... whether you use packages or not to get files to roxie has no bearing on any ports. Packages are used as an xml way to define superkeys instead of getting the information from dali. Actual file usage is the same\", \"post_time\": \"2015-02-19 16:04:19\" },\n\t{ \"post_id\": 6981, \"topic_id\": 1603, \"forum_id\": 14, \"post_subject\": \"Re: Ports\", \"username\": \"David Dasher\", \"post_text\": \"Hi Bob\\n\\nThanks for that.\\n\\nYes, we host everything on Microsoft Azure. We are just testing their new Premium Drive service attached to a VM which should give us 20,000 IOPS. We want to test a roxie service to see what the initial performance is like before we commission a thor environment. As it's in a different region to where our current services are it's on another network.\\n\\nThanks\\n\\nDavid\", \"post_time\": \"2015-02-18 21:02:04\" },\n\t{ \"post_id\": 6980, \"topic_id\": 1603, \"forum_id\": 14, \"post_subject\": \"Re: Ports\", \"username\": \"bforeman\", \"post_text\": \"Hi David,\\n\\nDefault ports are assigned by the configuration manager. The default for Dali is 7070.\\n\\nYou can specify a port when specifying the daliip but it should default to 7070 as well.\\n\\nSounds like you are working in a secure environment and you need to open the port to allow access?\\n\\nRegards, \\n\\nBob\", \"post_time\": \"2015-02-18 18:56:18\" },\n\t{ \"post_id\": 6975, \"topic_id\": 1603, \"forum_id\": 14, \"post_subject\": \"Ports\", \"username\": \"David Dasher\", \"post_text\": \"Hello\\n\\nIf we are using Packages to copy indexes to a Roxie, what ports need to be open on the Roxie and the source Dali?\\n\\nKind regards\\n\\nDavid\", \"post_time\": \"2015-02-18 12:26:18\" },\n\t{ \"post_id\": 7138, \"topic_id\": 1638, \"forum_id\": 14, \"post_subject\": \"How to configuring eclccserver in ECL IDE?\", \"username\": \"tlhumphrey2\", \"post_text\": \"In ECL IDE, how to I configure an eclccserver?\", \"post_time\": \"2015-03-13 18:00:37\" },\n\t{ \"post_id\": 7184, \"topic_id\": 1648, \"forum_id\": 14, \"post_subject\": \"Re: Ubuntu 14.04 installation problem\", \"username\": \"Gleb Aronsky\", \"post_text\": \"It doesn't appear to be installed properly. Try running \\n\\nsudo dpkg --list | grep node\\n\\nI would expect that you don't have node installed. If that is that case, I would recommend again running \\n\\ncurl -sL https://deb.nodesource.com/setup | sudo bash -\\n\\nthen \\n\\nsudo apt-get install -y nodejs\", \"post_time\": \"2015-03-24 21:50:18\" },\n\t{ \"post_id\": 7183, \"topic_id\": 1648, \"forum_id\": 14, \"post_subject\": \"Re: Ubuntu 14.04 installation problem\", \"username\": \"siddharth7\", \"post_text\": \"yes, I ran this command sudo apt-get install -y nodejs\\n\\nif I do node --version, it gives this:\\nThe program 'node' can be found in the following packages:\\n * node\\n * nodejs-legacy\\nTry: sudo apt-get install <selected package>\\n\\nif I do nodejs --version, it gives:\\nv0.10.25\", \"post_time\": \"2015-03-24 20:51:42\" },\n\t{ \"post_id\": 7174, \"topic_id\": 1648, \"forum_id\": 14, \"post_subject\": \"Re: Ubuntu 14.04 installation problem\", \"username\": \"gsmith\", \"post_text\": \"Did you follow these instructions: https://github.com/joyent/node/wiki/Ins ... tributions\\n\\nIf so what do you see when you type node --version?\", \"post_time\": \"2015-03-23 18:23:56\" },\n\t{ \"post_id\": 7166, \"topic_id\": 1648, \"forum_id\": 14, \"post_subject\": \"Ubuntu 14.04 installation problem\", \"username\": \"siddharth7\", \"post_text\": \"I am following the steps from the readme of the below website:\\n\\nhttps://github.com/hpcc-systems/HPCC-Platform\\n\\nI did 1,2,3,4b,5, i am getting this error on step 7,\\nERROR: node.js is required to build - see https://github.com/joyent/node/wiki/Ins ... ge-manager\\n\\nI even installed this, but it is still giving me this error,\\n\\nUsing Ubuntu 14.04\\n\\nRegards\\nSiddharth\", \"post_time\": \"2015-03-23 12:43:42\" },\n\t{ \"post_id\": 7361, \"topic_id\": 1679, \"forum_id\": 14, \"post_subject\": \"Re: Installation and deployment of HPCC without root privile\", \"username\": \"bforeman\", \"post_text\": \"Our HPCC team has contacted you and will be working with you to resolve this issue.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-15 15:03:59\" },\n\t{ \"post_id\": 7315, \"topic_id\": 1679, \"forum_id\": 14, \"post_subject\": \"Installation and deployment of HPCC without root priviledge\", \"username\": \"linhbngo\", \"post_text\": \"In this post I would like to detail the steps taken to install and deploy HPCC as a non-root user into non-default accounts. The motivation for this comes from the demand to provide isolated HPCC environments on which computer science students can not only learn to use HPCC but also become familiar with configuration, performance, and other administrative aspects of the framework. \\n\\nAt the current time, I am able to install and deploy a single non-root instance of HPCC. While I was able to fire up an HPCC cluster using the cluster deployment scripts, the thor and hthor aren't able to communicate with the manager yet. \\n=========================================================================================\\nThe installation/deployment is done on HPCC community version 4.3.0 using gcc version 4.8.1 on Clemson University's Palmetto Supercomputer, a shared research cluster. \\n\\nStep 1: Dependency:\\nWhen installing HPCC with administrative priviledges, many of the dependencies can easily be resolved. It is not the case with a non-root account. Our initial try results with errors from support libraries such as BINUTILS, ICU, ZLIB ... on the shared research computing cluster. \\n\\nBINUTILS: version 2.24 is downloaded to /home/lngo/hpcc/software\\ncd /home/lngo/hpcc/software\\nmkdir binutils-build\\ncd binutils-build\\n../binutils-2.24/configure --prefix=/home/lngo/hpcc/lib --enable-shared\\nmake\\nmake install
\\n\\nAt first I thought I needed to set BINUTILS_LIBRARIES and BINUTILS_INCLUDE_DIR. This did not help with the installation error. After looking into the instruction on cmake's find_path(), it turns out that I need to add the paths to BINUTILS' lib and include to the system $PATH. Another alternative is the direct specification of the BINUTILS'\\npaths to the parameters of find_path().\\n\\nICU: version 51.1 is downloaded to /home/lngo/hpcc/software\\n./configure --prefix=/home/lngo/hpcc/lib\\nmake\\nmake install
\\n\\nAs /home/lngo/hpcc/lib was already added to $PATH, cmake automatically finds the necessary paths for ICU\\n\\nXALAN: xalan_c-1.11 is downloaded to /home/lngo/hpcc/lib\\nXALAN requires XERCES (xerces-c-3.1.1) to be installed first:\\n./configure --prefix=/home/lngo/hpcc/lib\\nmake\\nmake install\\nexport XERCESCROOT=/home/lngo/hpcc/lib
\\n\\nAfter XERCES is installed, go back installing XALAN:\\nexport XALANCROOT=/home/lngo/hpcc/software/xalan-c-1.11/c/\\n./runConfigure -p linux -P /home/lngo/hpcc/lib/\\nmake\\nmake install
\\n\\nLIBARCHIVE: libarchive-3.1.2 is downloaded to /home/lngo/hpcc/software\\n./configure --prefix=/home/lngo/hpcc/lib\\nmake\\nmake install
\\n\\nMYSQL: Use the generic linux binary version: mysql-5.6.15-linux-glibc2.5-x86_64.tar.gz\\nUntar this file\\nCopy the contents of the include directory to /home/lngo/hpcc/lib/include\\nCopy the contents of the lib directory to /home/lngo/hpcc/lib/lib\\n\\nAPR: apr-1.5.0 is downloaded to /home/lngo/hpcc/software\\n./configure --prefix=/home/lngo/hpcc/lib\\nmake\\nmake install
\\n\\nHPCC could not found this installation of APR due to the default search paths in FindAPR.cmake are /usr/local/include/apr-1,/usr/local/include/apr-1.0,/usr/include/apr-1,/usr/include/apr-1.0\\n\\nAfter /home/lngo/hpcc/lib/include/apr-1.0 and /home/lngo/hpcc/lib/lib are added to find_path() and find_library(), HPCC was able to find APR. \\n\\nAPRUTILS apr-util-1.5.3 is downloaded to /home/lngo/hpcc/software\\n./configure --prefix=/home/lngo/hpcc/lib --with-apr=/home/lngo/hpcc/lib\\nmake\\nmake install
\\n\\nEdits similar to the case of APR are also needed to be made to FindAPR.cmake in the Find APRUTIL section of the files. \\n\\nAfter these steps, we were able to configure HPCC to prepare for installation. \\n\\nStep 2: Installation:\\nIn the cmake process, we first ran into this problem:\\n[ 23%] Building CXX object common/dllserver/CMakeFiles/dllserver.dir/thorplugin.cpp.o\\nIn file included from /home/lngo/hpcc/HPCC-Platform/common/dllserver/thorplugin.cpp:27:0:\\n/home/lngo/hpcc/lib/include/bfd.h:35:2: error: #error config.h must be included before this header\\n #error config.h must be included before this header\\n ^\\nmake[2]: *** [common/dllserver/CMakeFiles/dllserver.dir/thorplugin.cpp.o] Error 1\\nmake[1]: *** [common/dllserver/CMakeFiles/dllserver.dir/all] Error 2\\nmake: *** [all] Error 2
\\n\\nFirst, I tried to modify thorplugin.cpp to include "config.h" before "bfd.h". This did not work, as now the compile has no idea what is config.h. After some searches on just the string "bfd.h: #error config.h must be included before this header", this seems to be an issue with newer version of BINUTILS (https://sourceware.org/bugzilla/show_bug.cgi?id=14243). We retried the fix by retaining the modification from the first solution, copy the file config.h from\\n/home/lngo/hpcc/software/binutils-build/binutils into /home/lngo/hpcc/lib/include\\n\\nThis leads to a different error:\\n\\n[ 23%] Building CXX object common/dllserver/CMakeFiles/dllserver.dir/thorplugin.cpp.o\\nIn file included from /home/lngo/hpcc/HPCC-Platform/common/dllserver/thorplugin.cpp:27:0:\\n/home/lngo/hpcc/lib/include/config.h:7:4: error: #error config.h must be #included before system headers\\n # error config.h must be #included before system headers\\n ^\\nmake[2]: *** [common/dllserver/CMakeFiles/dllserver.dir/thorplugin.cpp.o] Error 1\\nmake[1]: *** [common/dllserver/CMakeFiles/dllserver.dir/all] Error 2\\nmake: *** [all] Error 2
\\n\\nNow we move #include "config.h" to top of the file ... and it works!\\nThis link (http://stackoverflow.com/questions/1174 ... nfig-h-now) suggests another fix, probably simpler, but I don't understand what's needed to be done. \\n\\nA second location where the code must be modified is at ecl/hqlcpp/hqlres.cpp:27:0\\nWith these modifications, we were able to finish the installation process. Below is the installation script:\\n\\n#!/bin/bash\\nrm -Rf /home/lngo/hpcc/build/*\\nmkdir /home/lngo/hpcc/build\\ncd /home/lngo/hpcc/build\\nrm -Rf /home/lngo/hpcc/HPCC-Platform/CMakeFiles\\nrm -Rf /home/lngo/hpcc/HPCC-Platform/CMakeCache.txt\\nrm -Rf /home/lngo/hpcc/HPCC-Platform/Testing\\nrm -Rf /local_scratch/lngo/hpcc/HPCCSystems\\n\\ncmake -DPREFIX=/local_scratch/lngo/hpcc -DEXEC_PREFIX=/local_scratch/lngo/hpcc -\\nDCONFIG_PREFIX=/local_scratch/lngo/hpcc -DENV_XML_FILE=environment.xml -DENV_CON\\nF_FILE=environment.conf -DLOG_DIR=log -DPID_DIR=pid -DLOCK_DIR=lock /home/lngo/h\\npcc/HPCC-Platform -DRUNTIME_USER=lngo -DRUNTIME_GROUP=bigdata\\n\\nmake\\nmake install
\\n\\nStep 3: Deployment:\\nThrough the initial deployment process, we ran into issues of path conflicts and root privilege requirements. A number of modifications are made to the source codes, and require re-installation to fix these issues: \\n\\n1. is_root check in several files, including hpcc-init, hpcc_common, and dafilesrv. This can be commented out. \\n\\n2. in hpcc-init, where we have $configs and $environment are absolute paths, but $envfile appends these two variables and the path become duplicated. This problem also appears in hpcc_setenv, where we have: PCC_CONFIG=${HPCC_CONFIG:-${CONFIG_DIR}/${ENV_CONF_FILE}}
. \\n\\n3. some of the calls to ulimit, more specically -n, -Hr, and -r cannot be executed as non-root. \\n\\n4. several hard-coded calls to /etc/init.d/hpcc-init and /etc/init.d/dafilesrv\\n\\n5. Conflicting paths to INSTALL_DIR, CONFIG_DIR, ENV_XML_FILE, ENV_CONF_FILE, PID_DIR, LOCK_DIR, and LOG_DIR. To correct for these, we edit configmgr.in to include ${INSTALL_DIR} in the source calls, and also modify HPCC_CONFIG path:\\n\\nsource ${INSTALL_DIR}/etc/init.d/lock.sh\\nsource ${INSTALL_DIR}/etc/init.d/pid.sh\\nsource ${INSTALL_DIR}/etc/init.d/hpcc_common\\nsource ${INSTALL_DIR}/etc/init.d/init-functions\\nsource ${INSTALL_DIR}/etc/init.d/export-path\\n\\n#HPCC_CONFIG=${HPCC_CONFIG:-${CONFIG_DIR}/${ENV_CONF_FILE}}\\nHPCC_CONFIG=${HPCC_CONFIG:-${ENV_CONF_FILE}}\\n
\\n\\nAlso, in initfiles/etc/DIR_NAME/CMakeLists.txt and initfiles/etc/DIR_NAME/confmgr/CMakeLists.txt, remove the "." infront of ${CONFIG_DIR}\\n\\n\\n6. Unable to find the executable configesp (has to use esp) in init_configesp\\n#nohup configesp 1>/dev/null 2>/dev/null &\\nnohup esp 1>/dev/null 2>/dev/null &
\\n\\nThese can be fixed by going back and changing the relevant scripts (hpcc-init.in, hpcc_common.in, init_configesp, init_dafilesrv, init_dali, init_dfuserver, init_eclagent.in, init_eclccserver, init_esp, init_roxi, init_sasha, run_ftslave, run_thor, start_slaves, start_thor, and hpcc_setenv.in) from the source code, and reinstall HPCC. \\n\\n7. The final edit that must be made is for when configmgr is started, we need to go into my_thor configuration page and change the user/group from hpcc:hpcc to the user/group specified in the installation script in part 2. Somehow that did not make into the generation of my_thor.xml file. \\n\\nWith these change, I was able to spawn a one-node HPCC as a non-root user on the Palmetto cluster. \\n==========\\nOngoing Work:\\n\\nI have not been able to get a cluster up and running. Everything seems to work, with the individual processes are up and running on all the nodes. However, the HPCC web interface did not recognize the thor slaves instances. I am having the following errors:\\n00000006 2015-01-21 10:22:58.980 51016 51016 "multiConnect failed to 10.125.8.166:7100 with -1"\\n00000007 2015-01-21 10:22:58.980 51016 51016 "multiConnect failed to 10.125.2.26:7100 with -1"\\n00000008 2015-01-21 10:22:58.980 51016 51016 "multiConnect failed to 10.125.3.56:7100 with -1"\\n00000009 2015-01-21 10:22:58.981 51016 51016 "ERROR: /home/lngo/hpcc/HPCC-Platform/thorlcr/master/thmaster\\nmain.cpp(390) : VALIDATE FAILED(1) 10.125.8.166 : Connect failure"\\n0000000A 2015-01-21 10:22:58.981 51016 51016 "ERROR: /home/lngo/hpcc/HPCC-Platform/thorlcr/master/thmaster\\nmain.cpp(390) : VALIDATE FAILED(1) 10.125.2.26 : Connect failure"\\n0000000B 2015-01-21 10:22:58.981 51016 51016 "ERROR: /home/lngo/hpcc/HPCC-Platform/thorlcr/master/thmaster\\nmain.cpp(390) : VALIDATE FAILED(1) 10.125.3.56 : Connect failure"\\n0000000C 2015-01-21 10:22:58.981 51016 51016 "Cluster replicate nodes check completed in 60184ms"\\n0000000D 2015-01-21 10:22:58.981 51016 51016 "ERROR: /home/lngo/hpcc/HPCC-Platform/thorlcr/master/thmaster\\nmain.cpp(614) : ERROR: Validate failure(s) detected, exiting Thor"\\n00000001 2015-01-21 14:52:32.594 54291 54291 "Opened log file //10.125.8.166/local_scratch/lngo/hpcc/log/H\\nPCCSystems/mythor/thormaster.2015_01_21.log"
\", \"post_time\": \"2015-04-11 03:30:25\" },\n\t{ \"post_id\": 7449, \"topic_id\": 1686, \"forum_id\": 14, \"post_subject\": \"Re: Setting up Roxie Cluster\", \"username\": \"lpezet\", \"post_text\": \"That was it!\\n\\nThanks clo!\", \"post_time\": \"2015-04-24 19:29:37\" },\n\t{ \"post_id\": 7440, \"topic_id\": 1686, \"forum_id\": 14, \"post_subject\": \"Re: Setting up Roxie Cluster\", \"username\": \"clo\", \"post_text\": \"Hi Luc,\\n\\nI see that you're trying to do some work with Roxie in an AWS environment. \\n\\nOut of curiosity, can you please check your environment.xml in configmgr for the following setting?\\n\\nRoxie Cluster > UDP > roxieMulticastEnabled.\\n\\nThis variable should be set to false in an AWS environment.\", \"post_time\": \"2015-04-23 17:29:56\" },\n\t{ \"post_id\": 7389, \"topic_id\": 1686, \"forum_id\": 14, \"post_subject\": \"Re: Setting up Roxie Cluster\", \"username\": \"lpezet\", \"post_text\": \"Here's some more info.\\nI allowed all traffic within my VPC (I can ping all instances within the VPC).\\nThis is what's running in my cluster:\\n\\n172.31.48.185 hpcc-init status :\\nmydafilesrv ( pid 4626 ) is running...\\nmydali ( pid 4732 ) is running...\\nmydfuserver ( pid 4852 ) is running...\\nmyeclagent ( pid 4977 ) is running...\\nmyeclccserver ( pid 5089 ) is running...\\nmyeclscheduler ( pid 5218 ) is running...\\nmyesp ( pid 5332 ) is running...\\nmysasha ( pid 5450 ) is running...\\nmythor ( pid 13903 ) is running...\\n\\n172.31.48.186 hpcc-init status :\\nmydafilesrv ( pid 2068 ) is running...\\nmyroxie ( pid 2442 ) is running...\\n\\n172.31.48.187 hpcc-init status :\\nmydafilesrv ( pid 2181 ) is running...\\nmyroxie ( pid 2288 ) is running...\\n\\n172.31.48.188 hpcc-init status :\\nmydafilesrv ( pid 2167 ) is running...\\n
\\n\\n\\nChecking some of my roxie.log, I can see the following error messages:\\n000023F2 2015-04-16 20:52:31.316 2442 2568 "PING: 1 replies received, average delay 285"\\n000023F3 2015-04-16 20:53:01.415 2442 7789 "RoxieMemMgr: Heap size 4096 pages, 4095 free, largest block 4095, heapLWM 0, dataBuffersActive=38, dataBufferPages=1"\\n000023F4 2015-04-16 20:53:11.428 2442 7790 "UdpCollator: CMessageCollator::GetNextResult timeout, 0 partial results"\\n000023F5 2015-04-16 20:53:11.428 2442 7790 "[172.31.48.186:9876{69}@11] Input has stalled - retry required?"\\n000023F6 2015-04-16 20:53:11.428 2442 7790 "[172.31.48.186:9876{69}@11] Resending packet size=74: uid=0x00000012 activityId=11 pri=LOW queryHash=57c13af5551f37db ch=2 seq=0 cont=0 server=172.31.48.186 retries=0001"\\n000023F7 2015-04-16 20:53:11.428 2442 7790 "[172.31.48.186:9876{69}@11] Resending packet size=74: uid=0x00000012 activityId=11 pri=LOW queryHash=57c13af5551f37db ch=2 seq=1 cont=0 server=172.31.48.186 retries=0001"\\n000023F8 2015-04-16 20:53:21.428 2442 7790 "UdpCollator: CMessageCollator::GetNextResult timeout, 0 partial results"\\n000023F9 2015-04-16 20:53:21.428 2442 7790 "[172.31.48.186:9876{69}@11] Input has stalled - retry required?"\\n000023FA 2015-04-16 20:53:21.428 2442 7790 "[172.31.48.186:9876{69}@11] Resending packet size=74: uid=0x00000012 activityId=11 pri=LOW queryHash=57c13af5551f37db ch=2 seq=0 cont=0 server=172.31.48.186 retries=0002"\\n000023FB 2015-04-16 20:53:21.428 2442 7790 "[172.31.48.186:9876{69}@11] Resending packet size=74: uid=0x00000012 activityId=11 pri=LOW queryHash=57c13af5551f37db ch=2 seq=1 cont=0 server=172.31.48.186 retries=0002"\\n000023FC 2015-04-16 20:53:31.138 2442 2445 "SYS: PU= 0% MU= 5% MAL=1080382896 MMP=1076576256 SBK=3806640 TOT=1058392K RAM=218312K SWP=0K RMU= 1% RMX=1023M"\\n000023FD 2015-04-16 20:53:31.138 2442 2445 "NIC: rxp/s=24.8 rxk/s=0.0 txp/s=112.3 txk/s=0.0 CPU: usr=0 sys=0 iow=0 idle=99"\\n000023FE 2015-04-16 20:53:31.317 2442 2568 "PING: 1 replies received, average delay 260"\\n000023FF 2015-04-16 20:53:31.428 2442 7790 "UdpCollator: CMessageCollator::GetNextResult timeout, 0 partial results"\\n00002400 2015-04-16 20:53:31.428 2442 7790 "[172.31.48.186:9876{69}@11] Input has stalled - retry required?"\\n00002401 2015-04-16 20:53:31.428 2442 7790 "[172.31.48.186:9876{69}@11] ERROR: 1406: /var/lib/jenkins/workspace/CE-Candidate-withplugins-5.2.0-1/CE/ubuntu-14.10-amd64/HPCC-Platform/roxie/ccd/ccdserver.cpp(3209) : Failed to get response from slave(s) for uid=0x00000012 activityId=11 pri=LOW queryHash=57c13af5551f37db ch=2 seq=0 cont=0 server=172.31.48.186 retries=0003 in activity 11: CRemoteResultAdaptor::retry"\\n00002402 2015-04-16 20:53:31.428 2442 7790 "[172.31.48.186:9876{69}@11] makeWrappedException - Failed to get response from slave(s) for uid=0x00000012 activityId=11 pri=LOW queryHash=57c13af5551f37db ch=2 seq=0 cont=0 server=172.31.48.186 retries=0003 in activity 11 (in Index Read 11)"\\n00002403 2015-04-16 20:53:31.429 2442 4243 "[172.31.48.186:9876{69}] Exception thrown in query - cleaning up: 1406: Failed to get response from slave(s) for uid=0x00000012 activityId=11 pri=LOW queryHash=57c13af5551f37db ch=2 seq=0 cont=0 server=172.31.48.186 retries=0003 in activity 11 (in Index Read 11)"\\n00002404 2015-04-16 20:53:31.429 2442 4243 "[172.31.48.186:9876{69}] Done cleaning up"\\n00002405 2015-04-16 20:53:31.429 2442 4243 "[172.31.48.186:9876{69}] ERROR: 1406: Failed to get response from slave(s) for uid=0x00000012 activityId=11 pri=LOW queryHash=57c13af5551f37db ch=2 seq=0 cont=0 server=172.31.48.186 retries=0003 in activity 11 (in Index Read 11) (in item 3)"\\n00002406 2015-04-16 20:53:31.429 2442 4243 "[172.31.48.186:9876{69}] FAILED: <episodecostsbyei><period_length>30</period_length><results>100</results><providers/><initiator_type/><offset>1</offset><service_line/><first_episode_begin_date>20130101</first_episode_begin_date><pgp_tin_eins/><state_code/><npis/><last_episode_begin_date>20140101</last_episode_begin_date><ms_drgs/><ei_bpids/><bpids>1000</bpids></episodecostsbyei>"\\n00002407 2015-04-16 20:53:31.429 2442 4243 "EXCEPTION: Failed to get response from slave(s) for uid=0x00000012 activityId=11 pri=LOW queryHash=57c13af5551f37db ch=2 seq=0 cont=0 server=172.31.48.186 retries=0003 in activity 11 (in Index Read 11)"\\n
\", \"post_time\": \"2015-04-16 21:09:17\" },\n\t{ \"post_id\": 7351, \"topic_id\": 1686, \"forum_id\": 14, \"post_subject\": \"Setting up Roxie Cluster\", \"username\": \"lpezet\", \"post_text\": \"Hi!\\n\\nI'm trying to setup a Roxie Cluster from (pretty much) scratch.\\nI tried before just adding instances to an existing Roxie cluster but had problems. I was advised to just create a brand new Roxie cluster, which I just did, but experienced the same problem.\\n\\nHere's what I did:\\n1. Created an image with Ubuntu Utopic (14.10) and HPCC 5.2.0-1 installed (AWS EC2 environment)\\n2. Booted up 4 instances (1 support node, 1 Thor, 2 Roxie)\\n3. Started Config Manager and created a brand new setup using first the wizard (specifying all the private IPs), then switched to Advanced View to setup SSL and Authentication (using htpasswd).\\n4. Saved config, copied it over /etc/HPCCSystems/environment.xml, pushed it to all nodes then started up everything.\\n\\nOpened up ECL Watch in my browser (auth and SSL all good), homepage saying the cluster is fine (like nothing detached or anything like it).\\nI was even able to Remote Copy stuff (to myroxie), and compile and publish queries against Roxie.\\n\\nBut when I run a query against Roxie, I get the following error:\\nFailed to get response from slave(s) for uid=0x00000007 activityId=10 pri=LOW queryHash=57c13af5551f37db ch=1 seq=0 cont=0 server=172.31.48.187 retries=0003 in activity 10 (in Index Read 10)
\\n\\nThis is similar to the problem I mentioned above about adding nodes to an existing Roxie cluster:\\nhttp://hpccsystems.com/bb/viewtopic.php?f=14&t=1531&p=6645&hilit=Roxie+slave&sid=98c8d0f35a97dee9b8175845d818715e&sid=010a4fcdd00af2621a37246e72e10f30#p6645\\n\\n\\nWhat did I do wrong?\\n\\nThanks!\\nLuc.\", \"post_time\": \"2015-04-15 01:36:45\" },\n\t{ \"post_id\": 7569, \"topic_id\": 1712, \"forum_id\": 14, \"post_subject\": \"Re: Single Node Installation Running DHCP\", \"username\": \"azukas\", \"post_text\": \"I used localhost during setup. Could not find the old IP in any of the config files under /opt or /etc per your step 1. Tried your steps 2 and 3 which worked.\\n\\nAlso found the answer in viewtopic.php?f=14&t=932 just before your post appeared. \\n\\nThx\", \"post_time\": \"2015-05-09 09:48:15\" },\n\t{ \"post_id\": 7542, \"topic_id\": 1712, \"forum_id\": 14, \"post_subject\": \"Re: Single Node Installation Running DHCP\", \"username\": \"mkellyhpcc\", \"post_text\": \"Hi,\\n\\nIf your IP changes you should:\\n\\n1). if you did not use localhost, update the environment.xml file to have the new IP address everywhere - do this before starting any HPCC services.\\n2). run this cmd after starting dali:\\n /opt/HPCCSystems/bin/updtdalienv /etc/HPCCSystems/environment.xml -f\\n3). start up HPCC again\\n\\nthanks,\\nmark\", \"post_time\": \"2015-05-05 20:49:05\" },\n\t{ \"post_id\": 7508, \"topic_id\": 1712, \"forum_id\": 14, \"post_subject\": \"Single Node Installation Running DHCP\", \"username\": \"azukas\", \"post_text\": \"I successfully installed the latest community version of hpcc. All services started up correctly until a shutdown and reboot of the machine. After reboot all services start correctly with the exception of mythor. Need to say system is running dhcp. Ubuntu 14.04\\n\\nmythor times out while trying to connect the to the mythor slave which never happens because it is looking at the previous IP address that was in place during the install process. A grep through the config files does not reveal a line in a configuration file that determines what IP address the thor master should use when trying to connect to the thor slave. All config files point to localhost. Messages in the mythor/thormaster log file are shown below:\\n\\n"Waiting for 1 slaves to register"\\n"Verify connection to slave 1"\\n "Still waiting for minimum 1 slaves to connect"\\n"waiting for slave 1 (xxx.xxx.1.17:20100"\\n\\nThe current IP is xxx.xxx.1.15 after the reboot. Some place in the config file tree there is a reference to the previous IP address rather than localhost.\\n\\nAny thoughts where to look?\", \"post_time\": \"2015-04-30 21:15:56\" },\n\t{ \"post_id\": 7700, \"topic_id\": 1753, \"forum_id\": 14, \"post_subject\": \"Multi nodes Installation\", \"username\": \"sky_tech\", \"post_text\": \"Hello,\\n\\nI'm a french student, I need to use HPCC Systems for my internship. \\n\\nI have been following the installation guide : http://cdn.hpccsystems.com/releases/CE- ... .0.0-1.pdf\\n\\nI use a virtual machine with Ubuntu 14.04 to install the HPCC Systems environment.\\n\\nMy first problem, during the installation of multi nodes, when I execute this script : \\n\\nsudo /opt/HPCCSystems/sbin/install-cluster.sh -k <package-file-name>
\\n\\nI get this error : \\n\\nExecution progress: 100%, running: 0, in queue: 0, succeed: 49, failed: 52\\n\\n Error found during install-hpcc.exp execution. \\n Reference following log for more information: \\n /var/log/HPCCSystems/cluster/cc_install-hpcc.exp_20150601_151419.log
\\n\\nI tried a lot of configuration, with 2 nodes, 4 nodes or just adding a Thor node, the same problem during the RSA key generating.\\n\\nAnd for the next stape : \\n\\n17. Copy the /etc/HPCCSystems/environment.xml to /etc/HPCCSystems/ on every node.\\nYou may want to create a script to push out the XML file to all nodes. A sample script is provided with HPCC. The following command copies the XML files out to all nodes as required:\\n
\\n\\nOk but where is the location of nodes ? \\n\\nThank in advance \\n\\nPS : Sorry for my bad english.\", \"post_time\": \"2015-06-01 15:04:25\" },\n\t{ \"post_id\": 7875, \"topic_id\": 1797, \"forum_id\": 14, \"post_subject\": \"Re: Getting error "An HTTP processing error occurred"\", \"username\": \"gsmith\", \"post_text\": \"FYI - It is simply the IDE trying to work out if there is a remote repository, in your case not (all SOAP errors get reported in the warnings window).\", \"post_time\": \"2015-07-06 08:55:10\" },\n\t{ \"post_id\": 7874, \"topic_id\": 1797, \"forum_id\": 14, \"post_subject\": \"Re: Getting error "An HTTP processing error occurred"\", \"username\": \"avinash454\", \"post_text\": \"Thanks Bob. Its working after submitting.\", \"post_time\": \"2015-07-06 03:56:59\" },\n\t{ \"post_id\": 7873, \"topic_id\": 1797, \"forum_id\": 14, \"post_subject\": \"Re: Getting error "An HTTP processing error occurred"\", \"username\": \"bforeman\", \"post_text\": \"This is simply a warning and is harmless, you should still be able to submit code successfully in spite of it.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-07-05 16:01:36\" },\n\t{ \"post_id\": 7872, \"topic_id\": 1797, \"forum_id\": 14, \"post_subject\": \"Getting error "An HTTP processing error occurred"\", \"username\": \"avinash454\", \"post_text\": \"Hi,\\n\\nI am getting following error while submitting :\\nWARNING: SOAP 1.1 fault: SOAP-ENV:Client[no subcode]\\n"An HTTP processing error occurred"\\nDetail: [no detail]\\n\\nI have installed HPCC VM image 64 bit on windows 7 and ran through VM player version: 5.2.4-1\\n\\nECL IDE installed version is :community_5.2.4-1\\n\\nPlease suggest how to make it work.\", \"post_time\": \"2015-07-03 15:15:59\" },\n\t{ \"post_id\": 8388, \"topic_id\": 1801, \"forum_id\": 14, \"post_subject\": \"Re: mythor not starting\", \"username\": \"Dimon\", \"post_text\": \"There are few things worth trying when this happens again:\\n\\n1. Verify environment.xml directories are presented on all hosts and have promptly owned by hpcc:hpcc\\n\\n2. chown -R hpcc:hpcc /var/lib/HPCCSystems (or your directory)\\n\\nI've noticed many hpcc processes output into /dev/null. So, it makes sense to change the startup scripts (those which fail) to output into a tangible file to see what the errors are. This helped me to discover above points.\\n\\nThanks, D.\", \"post_time\": \"2015-10-29 04:04:44\" },\n\t{ \"post_id\": 7916, \"topic_id\": 1801, \"forum_id\": 14, \"post_subject\": \"Re: mythor not starting\", \"username\": \"Jerry\", \"post_text\": \"Hi Tim,\\n\\nI was able to install another version, hpccsystems -platform_community-5.0.16-1.el6.x86_64 successfully using the steps that you had mentioned.\\n\\nThanks a lot for your help.\\n\\nJerry\", \"post_time\": \"2015-07-15 10:12:01\" },\n\t{ \"post_id\": 7908, \"topic_id\": 1801, \"forum_id\": 14, \"post_subject\": \"Re: mythor not starting\", \"username\": \"tlhumphrey2\", \"post_text\": \"Jerry,\\n\\nI'm not sure what the problem is. I've asked one of our hardware architects to look at your post.\\n\\nIf I were having these problems, I would bring up an HPCC System without roxie (to do this make "roxienodes" 0 in the envgen command). Try this and see if you get any errors.\\n\\nTim\", \"post_time\": \"2015-07-14 13:00:35\" },\n\t{ \"post_id\": 7907, \"topic_id\": 1801, \"forum_id\": 14, \"post_subject\": \"Re: mythor not starting\", \"username\": \"Jerry\", \"post_text\": \"Hi Tim,\\n\\nI did the following\\n\\n1. Uninstalled HPCC in both nodes\\n2. Executed \\nsudo yum localinstall http://cdn.hpccsystems.com/releases/CE- ... x86_64.rpm on both the nodes\\n3. Created new environment.xml using \\nsudo /opt/HPCCSystems/sbin/envgen -env /etc/HPCCSystems/source/newly_created_environment.xml -ipfile ipaddress -supportnodes 1 -thornodes 1 -roxienodes 1 -slavesPerNode 8 -roxieondemand 1\\n*ipaddress is the filename containing ipaddresses(Master first followed by Slave)\\nthere were two warnings\\n sudo /opt/HPCCSystems/sbin/hpcc-push.sh <sourcefile> <destinationfile>
\\nSee the appendix for more information on using this script.00000000 2015-07-14 11:49:49.162 39542 39542 "WARNING: Couldn't delete file @temp/EclWatch.xml+EclWatch"\\n00000001 2015-07-14 11:49:49.162 39542 39542 "WARNING: Couldn't delete file @temp/myws_ecl2.xml+myws_ecl"
\\n4. Copied the new environment.xml file using\\nsudo /opt/HPCCSystems/sbin/hpcc-push.sh -s /etc/HPCCSystems/source/newly_created_environment.xml -t /etc/HPCCSystems/environment.xml\\nAlso verified the timestamp of environment.xml file in both nodes to confirm this.\\n5. Executed \\nsudo /sbin/service hpcc-init start \\nin both nodes\\n\\nBut I am still facing the same issue. Roxie logs show Segmentation fault and core dumps are created every few seconds.\\n\\nJerry\", \"post_time\": \"2015-07-14 07:12:50\" },\n\t{ \"post_id\": 7905, \"topic_id\": 1801, \"forum_id\": 14, \"post_subject\": \"Re: mythor not starting\", \"username\": \"tlhumphrey2\", \"post_text\": \"Make sure the single node HPCC is stopped before you configure a multi-node HPCC System, i.e. run: sudo service hpcc-init stop (run this on every machine of your HPCC System). Then, check to see if the HPCC System has stopped using "sudo service hpcc-init status".\\n\\nThere is an easier way to configure an HPCC System with multiple thor slave nodes per machine. From the master machine, execute the following command from a Linux terminal window:\\nsudo /opt/HPCCSystems/sbin/envgen \\\\\\n -env /etc/HPCCSystems/source/newly_created_environment.xml \\\\\\n -ipfile $ip_file \\\\\\n -supportnodes 1 \\\\\\n -thornodes $thornodes \\\\\\n -roxienodes $roxienodes \\\\\\n -slavesPerNode $slavesPerNode \\\\\\n -roxieondemand 1\\n
\\nwhere $ip_file is the name of a file (full path) containing the IP addresses of your HPCC System machines. Make sure the 1st IP is the master's and put one IP per line. $thornodes is the number of machines that have thor slave nodes on them. $roxienodes is the number of nodes in your roxie cluster (I've always made this the same as $thornodes). $slavesperNode is the number of thor slave nodes on each of the thor slave node machines.\\n\\nThe above command makes a new environment.xml file, /etc/HPCCSystems/source/newly_created_environment.xml. Distribute this to all machines in your HPCC System (in this case just your B machine). Use the following command to distribute the new environment.xml file on each of your machines (in your base execute the following on the master (A) and machine B).\\nsudo /opt/HPCCSystems/sbin/hpcc-push.sh \\\\\\n -s /etc/HPCCSystems/source/newly_created_environment.xml \\\\\\n -t /etc/HPCCSystems/environment.xml\\n
\\n\\nWhen you have finished distributing the new environment.xml file, restart the HPCC System with the following command (DO THIS ON EVERY MACHINE):\\nsudo service hpcc-init start
\\n\\nLet me know if this works.\", \"post_time\": \"2015-07-13 15:27:10\" },\n\t{ \"post_id\": 7904, \"topic_id\": 1801, \"forum_id\": 14, \"post_subject\": \"Re: mythor not starting\", \"username\": \"Jerry\", \"post_text\": \"Hi Tim,\\n\\nYes, I was able to execute the sample program given in playground.\\nI was also able to publish the query to Roxie.\\nWhen I execute the published query, few times I got the following message(may be because Roxie is restrating constantly)\\nException\\nReported by: WsEcl\\nMessage: Roxie cluster communication error: roxie
\\nOther times, the published query executed correctly.\\n\\nI used the rpm given in the following path\\nhttp://cdn.hpccsystems.com/releases/CE-Candidate-5.2.4/bin/platform/hpccsystems-%20platform-community_5.2.4-1.el6.x86_64.rpm\\n\\nJerry\", \"post_time\": \"2015-07-13 14:49:41\" },\n\t{ \"post_id\": 7903, \"topic_id\": 1801, \"forum_id\": 14, \"post_subject\": \"Re: mythor not starting\", \"username\": \"tlhumphrey2\", \"post_text\": \"Jerry,\\n\\nSuccess to me means you were able to execute a small ECL program on the system ( I usually execute one the ECL programs provided in the playground on ECL Watch).\\n\\nTim\", \"post_time\": \"2015-07-13 14:31:36\" },\n\t{ \"post_id\": 7902, \"topic_id\": 1801, \"forum_id\": 14, \"post_subject\": \"Re: mythor not starting\", \"username\": \"tlhumphrey2\", \"post_text\": \"Jerry,\\n\\nWere you successful at setting-up a single node HPCC System? If so, which version of the HPCC Platform did you use (provide a link to it, please)?\\n\\nTim\", \"post_time\": \"2015-07-13 14:29:04\" },\n\t{ \"post_id\": 7901, \"topic_id\": 1801, \"forum_id\": 14, \"post_subject\": \"Re: mythor not starting\", \"username\": \"Jerry\", \"post_text\": \"Hi Tim,\\n\\nI was referring to the document which talks about how to install HPCC.\\nhttp://hpccsystems.com/download/docs/installing-running-hpcc-platform\", \"post_time\": \"2015-07-13 14:02:18\" },\n\t{ \"post_id\": 7900, \"topic_id\": 1801, \"forum_id\": 14, \"post_subject\": \"Re: mythor not starting\", \"username\": \"tlhumphrey2\", \"post_text\": \"Jerry,\\n\\nProvide a link to the pdf document you used, please.\\n\\nTim\", \"post_time\": \"2015-07-13 13:43:23\" },\n\t{ \"post_id\": 7899, \"topic_id\": 1801, \"forum_id\": 14, \"post_subject\": \"Re: mythor not starting\", \"username\": \"Jerry\", \"post_text\": \"Update\\n\\nI reinstalled HPCC in both the machines and followed the steps mentioned the PDF.\\nThis time I am not facing any problem with thor slaves.\\n(I probably might have messed up some steps during the previous installation)\\n\\nBut now I am facing another issue.\\nAfter installation when I was going through Roxie logs I found out that logs are showing segmentation fault.\\n\\n00000040 2015-07-13 17:42:33.814 17890 17890 "Loading empty package for QuerySet roxie"\\n00000041 2015-07-13 17:42:33.815 17890 17902 "AutoReloadThread 0x6842e8 starting"\\n00000042 2015-07-13 17:42:33.816 17890 17890 "UdpReceiver: rcv_data_socket created port=9001 requested sockbuffsize=131071 actual sockbuffsize=262142"\\n00000043 2015-07-13 17:42:33.816 17890 17890 "UdpReceiver: rcv_flow_socket created port=9000 sockbuffsize=131071 actual 262142"\\n00000044 2015-07-13 17:42:33.818 17890 17890 "UdpReceiver: receive_sniffer port open 239.1.1.4:9003"\\n00000045 2015-07-13 17:42:33.818 17890 17903 "UdpReceiver: CPacketCollator::run"\\n00000046 2015-07-13 17:42:33.818 17890 17904 "UdpReceiver: receive_data started"\\n00000047 2015-07-13 17:42:33.818 17890 17904 "priority set id=139999460706048 policy=2 pri=4 PID=17890"\\n00000048 2015-07-13 17:42:33.819 17890 17905 "UdpReceiver: ReceiveFlowManager started"\\n00000049 2015-07-13 17:42:33.820 17890 17906 "UdpReceiver: receive_receive_flow started"\\n0000004A 2015-07-13 17:42:33.820 17890 17906 "priority set id=139999439726336 policy=2 pri=3 PID=17890"\\n0000004B 2015-07-13 17:42:33.820 17890 17907 "UdpReceiver: sniffer started"\\n0000004C 2015-07-13 17:42:33.836 17890 17890 "UdpSender: sendbuffer set for local socket (size=131071)"\\n0000004D 2015-07-13 17:42:33.836 17890 17890 "UdpSender: added entry for ip=10.242.48.244 to receivers table at index=0 - send_flow_port=9000"\\n0000004E 2015-07-13 17:42:33.836 17890 17908 "UdpSender: send_data started"\\n0000004F 2015-07-13 17:42:33.836 17890 17908 "priority set id=139999213250304 policy=2 pri=1 PID=17890"\\n00000050 2015-07-13 17:42:33.836 17890 17909 "UdpSender: send_send_flow started - node=0"\\n00000051 2015-07-13 17:42:33.836 17890 17890 "UdpSender: rcv_flow_socket created port=9002 sockbuffsize=131071 actualsize=262142"\\n00000052 2015-07-13 17:42:33.836 17890 17910 "UdpSender: send_receive_flow started"\\n00000053 2015-07-13 17:42:33.836 17890 17910 "priority set id=139999192270592 policy=2 pri=2 PID=17890"\\n00000054 2015-07-13 17:42:33.843 17890 18001 "priority set id=139998237693696 policy=2 pri=3 PID=17890"\\n00000055 2015-07-13 17:42:33.843 17890 18001 "RoxieSocketQueueManager::run() starting: doIbytiDelay=YES minIbytiDelay=0 initIbytiDelay=100"\\n00000056 2015-07-13 17:42:33.843 17890 17890 "Creating Roxie socket listener, pool size 30, listen queue 200"\\n00000057 2015-07-13 17:42:33.843 17890 18003 "RoxieSocketListener (30 threads) listening to socket on port 9876"\\n00000058 2015-07-13 17:42:33.844 17890 17890 "Creating Roxie workunit listener, pool size 30"\\n00000059 2015-07-13 17:42:33.844 17890 17890 "Creating sentinel file roxie.sentinel for rerun from script"\\n0000005A 2015-07-13 17:42:33.844 17890 17890 "Waiting for queries"\\n0000005B 2015-07-13 17:42:33.847 17890 18004 "roxie: Waiting on queue(s) 'roxie.roxie'"\\n0000005C 2015-07-13 17:42:42.338 17890 18001 "================================================"\\n0000005D 2015-07-13 17:42:42.338 17890 18001 "Signal: 11 Segmentation fault"\\n0000005E 2015-07-13 17:42:42.338 17890 18001 "Fault IP: 00007F5487A54AD4"\\n0000005F 2015-07-13 17:42:42.338 17890 18001 "Accessing: 00007F5487D7C5DC"\\n00000060 2015-07-13 17:42:42.338 17890 18001 "Registers:"\\n00000061 2015-07-13 17:42:42.338 17890 18001 "EAX:00007F5487D4C5E0 EBX:00000000006BCBE0 ECX:000000000000BFFF EDX:0000000000000000 ESI:00000000006BCBE0 EDI:000000000000BFFF"\\n00000062 2015-07-13 17:42:42.338 17890 18001 "CS:EIP:0033:00007F5487A54AD4"\\n00000063 2015-07-13 17:42:42.338 17890 18001 " ESP:00007F53E13A0AA0 EBP:000000000066C270"\\n00000064 2015-07-13 17:42:42.338 17890 18001 "Stack[00007F53E13A0AA0]: 00007F5487757000 4D00DFF000007F54 000000304D00DFF0 0000000500000030 0000000000000005 0000000000000000 0000000000000000 0000000000000000"\\n00000065 2015-07-13 17:42:42.338 17890 18001 "Stack[00007F53E13A0AC0]: 0000000000000000 8779670800000000 00007F5487796708 0000000000007F54 0000000000000000 0066C39000000000 000000000066C390 0066C27000000000"\\n00000066 2015-07-13 17:42:42.338 17890 18001 "Stack[00007F53E13A0AE0]: 000000000066C270 0066C62000000000 000000000066C620 0000FFFF00000000 000000000000FFFF 4D01472500000000 000000304D014725 0000000000000030"\\n00000067 2015-07-13 17:42:42.338 17890 18001 "Stack[00007F53E13A0B00]: 0000000000000000 0066C62000000000 000000000066C620 0066C39000000000 000000000066C390 0066C39000000000 000000000066C390 0066C27000000000"\\n00000068 2015-07-13 17:42:42.338 17890 18001 "Stack[00007F53E13A0B20]: 000000000066C270 0066C62000000000 000000000066C620 0000FFFF00000000 000000000000FFFF 87491D8000000000 00007F5487491D80 006BCBE000007F54"\\n00000069 2015-07-13 17:42:42.338 17890 18001 "Stack[00007F53E13A0B40]: 00000000006BCBE0 87A55BEE00000000 00007F5487A55BEE E13A0B8000007F54 00007F53E13A0B80 E39FF95200007F53 00007FFFE39FF952 0000000100007FFF"\\n0000006A 2015-07-13 17:42:42.338 17890 18001 "Stack[00007F53E13A0B60]: 0000000000000001 0000000000000000 0000000000000000 E13A0C6000000000 00007F53E13A0C60 E13A0D3C00007F53 00007F53E13A0D3C E13A0B9000007F53"\\n0000006B 2015-07-13 17:42:42.338 17890 18001 "Stack[00007F53E13A0B80]: 00007F53E13A0B90 E39FFA4700007F53 00007FFFE39FFA47 E13A0BD000007FFF 00007F53E13A0BD0 4E803E4600007F53 000000304E803E46 8775552000000030"\\n0000006C 2015-07-13 17:42:42.338 17890 18001 "Backtrace:"\\n0000006D 2015-07-13 17:42:42.339 17890 18001 " /opt/HPCCSystems/lib/libjlib.so(_Z16printStackReportv+0x28) [0x7f5487411198]"\\n0000006E 2015-07-13 17:42:42.339 17890 18001 " /opt/HPCCSystems/lib/libjlib.so(_Z13excsighandleriP7siginfoPv+0x9ca) [0x7f5487411eca]"\\n0000006F 2015-07-13 17:42:42.339 17890 18001 " /lib64/libpthread.so.0() [0x304d80f710]"\\n00000070 2015-07-13 17:42:42.339 17890 18001 " /opt/HPCCSystems/lib/libccd.so(_ZN23RoxieSocketQueueManager7doIbytiER17RoxiePacketHeaderR10RoxieQueueP11IThreadPool+0x84) [0x7f5487a54ad4]"\\n00000071 2015-07-13 17:42:42.339 17890 18001 " /opt/HPCCSystems/lib/libccd.so(_ZN23RoxieSocketQueueManager14processMessageER12MemoryBufferR17RoxiePacketHeaderR10RoxieQueueP11IThreadPool+0x18e) [0x7f5487a55bee]"\\n00000072 2015-07-13 17:42:42.339 17890 18001 " /opt/HPCCSystems/lib/libccd.so(_ZN23RoxieSocketQueueManager3runEv+0xd9) [0x7f5487a564d9]"\\n00000073 2015-07-13 17:42:42.339 17890 18001 " /opt/HPCCSystems/lib/libjlib.so(_ZN6Thread5beginEv+0x2f) [0x7f54874ba32f]"\\n00000074 2015-07-13 17:42:42.339 17890 18001 " /opt/HPCCSystems/lib/libjlib.so(_ZN6Thread11_threadmainEPv+0x1c) [0x7f54874b917c]"\\n00000075 2015-07-13 17:42:42.339 17890 18001 " /lib64/libpthread.so.0() [0x304d8079d1]"\\n00000076 2015-07-13 17:42:42.339 17890 18001 " /lib64/libc.so.6(clone+0x6d) [0x304d4e886d]"\\n00000077 2015-07-13 17:42:42.340 17890 18001 "ThreadList:\\n
\\nI also found highly frequent core dumps in /var/lib/HPCCSystems/myroxie \\n-rw-------. 1 hpcc hpcc 2287874048 Jul 13 17:45 core.19465\\n-rw-------. 1 hpcc hpcc 2287874048 Jul 13 17:45 core.19584\\n-rw-------. 1 hpcc hpcc 2287874048 Jul 13 17:45 core.19709\\n-rw-------. 1 hpcc hpcc 2287874048 Jul 13 17:45 core.19829\\n-rw-------. 1 hpcc hpcc 2287730688 Jul 13 17:45 core.19948\\n-rw-------. 1 hpcc hpcc 2287874048 Jul 13 17:45 core.20069\\n-rw-------. 1 hpcc hpcc 2287874048 Jul 13 17:46 core.20188\\n-rw-------. 1 hpcc hpcc 2287874048 Jul 13 17:46 core.20311\\n-rw-------. 1 hpcc hpcc 2287874048 Jul 13 17:46 core.20434\\n-rw-------. 1 hpcc hpcc 2277384192 Jul 13 17:46 core.20555\\n-rw-------. 1 hpcc hpcc 2277384192 Jul 13 17:46 core.20673\\n-rw-------. 1 hpcc hpcc 2287874048 Jul 13 17:46 core.20792\\n-rw-------. 1 hpcc hpcc 2287874048 Jul 13 17:47 core.20917\\n-rw-------. 1 hpcc hpcc 2287730688 Jul 13 17:47 core.21039\\n-rw-------. 1 hpcc hpcc 2287730688 Jul 13 17:47 core.21160\\n-rw-rw-r--. 1 hpcc hpcc 1932 Jul 13 17:47 07_13_2015_17_23_52.stdout\\n-rw-rw-r--. 1 hpcc hpcc 25411 Jul 13 17:47 07_13_2015_17_23_52.stderr\\n-rw-r--r--. 1 hpcc hpcc 6222 Jul 13 17:47 RoxieTopology.xml\\n-rw-r--r--. 1 hpcc hpcc 5 Jul 13 17:47 roxie.sentinel\\n-rw-------. 1 hpcc hpcc 1179332608 Jul 13 17:47 core.21287\\n
\\n\\nI also checked Roxie health via ECL Watch -> Cluster Processes and found out that 'Up Time' for Roxie is getting reset with few seconds.\\n\\nObviously some thing is wrong here.\\nIt would be a big help if someone could shed light onto this.\\n\\nThanks in advance\\n\\nJerry\", \"post_time\": \"2015-07-13 12:40:14\" },\n\t{ \"post_id\": 7893, \"topic_id\": 1801, \"forum_id\": 14, \"post_subject\": \"mythor not starting\", \"username\": \"Jerry\", \"post_text\": \"Hi,\\n\\nI am trying to install a 2 node HPCC cluster in Centos 6.5.\\nI have done the following\\n1. Installed HPCCC_5.2.4-1 in nodes A and B\\n2. Created environment.xml in machine A under 'hpcc' user and copied it to machine B\\n I have configured machine A to be the thor master and machine B to be thor slave(8 slaves). Roxie cluster also resides in machine B.\\n3. Generated .ssh keys in machine A and copied to machine B\\n4. Started all the services in both the machines\\n\\nBut for mythor I am getting an error\\n\\nStarting mythor.... [FAILED]\\nmythor has timed out, but may still be starting\\n
\\n\\nI went to ECLWatch and checked in 'Cluster Processes' and found out that\\na. Thor Master has started but thor slaves are down\\nb. Roxie is up\\n(After some time both Thor Master and Roxie also went down)\\n\\nI also checked thormaster logs\\n\\n00000001 2015-07-09 17:15:32.666 58469 58469 "Opened log file //10.242.48.242/var/log/HPCCSystems/mythor/thormaster.2015_07_09.log"\\n00000002 2015-07-09 17:15:32.666 58469 58469 "Build community_5.2.4-1"\\n00000003 2015-07-09 17:15:32.666 58469 58469 "calling initClientProcess Port 20000"\\n00000004 2015-07-09 17:15:32.670 58469 58469 "Found file 'thorgroup', using to form thor group"\\n00000005 2015-07-09 17:15:32.670 58469 58469 "Checking cluster replicate nodes"\\n00000006 2015-07-09 17:15:32.675 58469 58469 "Cluster replicate nodes check completed in 5ms"\\n00000007 2015-07-09 17:15:32.677 58469 58469 "Sharing globalMemorySize(36170 MB), between 8 slave. 4521 MB each"\\n00000008 2015-07-09 17:15:32.677 58469 58469 "Global memory size = 36170 MB"\\n00000009 2015-07-09 17:15:32.677 58469 58469 "RoxieMemMgr: Setting memory limit to 37926993920 bytes (144680 pages)"\\n0000000A 2015-07-09 17:15:32.678 58469 58469 "Transparent huge pages are not supported on this kernel. Requires kernel version > 2.6.38."\\n0000000B 2015-07-09 17:15:32.679 58469 58469 "Memory released to OS on each 256k 'page'"\\n0000000C 2015-07-09 17:15:32.679 58469 58469 "RoxieMemMgr: 144704 Pages successfully allocated for the pool - memsize=37933285376 base=0x7f88aee00000 alignme\\nnt=262144 bitmapSize=4522"\\n0000000D 2015-07-09 17:15:32.681 58469 58469 "Disk space: /var/lib/HPCCSystems/hpcc-data/thor = 0 MB, /var/lib/HPCCSystems/hpcc-mirror/thor = 0 MB, /var/lib/\\nHPCCSystems/mythor/temp = 153068 MB"\\n0000000E 2015-07-09 17:15:32.686 58469 58469 "Starting watchdog"\\n0000000F 2015-07-09 17:15:32.686 58469 58488 "Started watchdog"\\n00000010 2015-07-09 17:15:32.686 58469 58469 "ThorMaster version 4.1, Started on 10.242.48.242:20000"\\n00000011 2015-07-09 17:15:32.687 58469 58469 "Thor name = mythor, queue = thor.thor, nodeGroup = mythor"\\n00000012 2015-07-09 17:15:32.687 58469 58469 "Waiting for 8 slaves to register"\\n00000013 2015-07-09 17:15:32.688 58469 58469 "Verifying connection to slave 7"\\n00000014 2015-07-09 17:15:42.687 58469 58469 "Still Waiting for minimum 8 slaves to connect"\\n00000015 2015-07-09 17:15:42.688 58469 58469 "Verifying connection to slave 7"\\n00000016 2015-07-09 17:15:52.687 58469 58469 "Still Waiting for minimum 8 slaves to connect"\\n00000017 2015-07-09 17:15:52.687 58469 58469 "Verifying connection to slave 3"\\n00000018 2015-07-09 17:16:03.937 58469 58469 "Still Waiting for minimum 8 slaves to connect"\\n00000019 2015-07-09 17:16:03.937 58469 58469 "Verifying connection to slave 5"\\n0000001A 2015-07-09 17:16:20.812 58469 58469 "Still Waiting for minimum 8 slaves to connect"\\n0000001B 2015-07-09 17:16:20.812 58469 58469 "Verifying connection to slave 8"\\n0000001C 2015-07-09 17:16:46.124 58469 58469 "Still Waiting for minimum 8 slaves to connect"\\n0000001D 2015-07-09 17:16:46.124 58469 58469 "Verifying connection to slave 2"\\n0000001E 2015-07-09 17:17:24.092 58469 58469 "Still Waiting for minimum 8 slaves to connect"\\n0000001F 2015-07-09 17:17:24.092 58469 58469 "Verifying connection to slave 2"\\n00000020 2015-07-09 17:18:21.044 58469 58469 "Still Waiting for minimum 8 slaves to connect"\\n00000021 2015-07-09 17:18:21.044 58469 58469 "Verifying connection to slave 5"\\n00000022 2015-07-09 17:19:46.472 58469 58469 "Still Waiting for minimum 8 slaves to connect"\\n00000023 2015-07-09 17:19:46.472 58469 58469 "Verifying connection to slave 2"\\n00000024 2015-07-09 17:21:54.614 58469 58469 "Still Waiting for minimum 8 slaves to connect"\\n00000025 2015-07-09 17:21:54.614 58469 58469 "Verifying connection to slave 5"\\n00000026 2015-07-09 17:25:06.827 58469 58469 "Still Waiting for minimum 8 slaves to connect"\\n00000027 2015-07-09 17:25:06.827 58469 58469 "Verifying connection to slave 1"\\n00000028 2015-07-09 17:29:55.146 58469 58469 "Still Waiting for minimum 8 slaves to connect"\\n00000029 2015-07-09 17:29:55.146 58469 58469 "Verifying connection to slave 4"\\n0000002A 2015-07-09 17:30:33.688 58469 58469 "priority set id=140263242077952 policy=0 pri=0 PID=58469"\\n0000002B 2015-07-09 17:30:33.688 58469 58469 "Stopping watchdog"\\n0000002C 2015-07-09 17:30:33.688 58469 58469 "Stopped watchdog"\\n0000002D 2015-07-09 17:30:33.688 58469 58469 "ERROR: 10056: /var/lib/jenkins/workspace/CE-Candidate-5.2.4-1/CE/centos-6.4-x86_64/HPCC-Platform/thorlcr/master/thmastermain.cpp(807) : ThorMaster : Have waited over 15 minutes for all slaves to connect, quitting."\\n0000002E 2015-07-09 17:30:33.699 58469 58469 "Thor closing down 5"\\n0000002F 2015-07-09 17:30:33.699 58469 58469 "Thor closing down 4"\\n00000030 2015-07-09 17:30:33.700 58469 58469 "Thor closing down 3"\\n00000031 2015-07-09 17:30:33.700 58469 58469 "Thor closing down 2"\\n00000032 2015-07-09 17:30:33.714 58469 58469 "Thor closing down 1"\\n
\\n\\nI went through the forums and found out that there is a thread with similar issue\\nhttp://hpccsystems.com/bb/viewtopic.php?f=14&t=1566&p=6802&hilit=slaves+not+starting&sid=34667b3bf6f19e0c8ccfb0aee2d009af&sid=34667b3bf6f19e0c8ccfb0aee2d009af#p6802\\nThere it was mentioned that the issue was because 'rsync' was not installed.\\nBut I have rsync in both my machines.\\nWould someome please help me with this?\\n\\nThanks in advance\\n\\nJerry\", \"post_time\": \"2015-07-09 12:51:08\" },\n\t{ \"post_id\": 7950, \"topic_id\": 1815, \"forum_id\": 14, \"post_subject\": \"Re: Can I disable the Thor Cluster idle restart period?\", \"username\": \"JimD\", \"post_text\": \"It doesn't look like setting to 0 would disable it. \\n\\nSet it to something high to effectively disable it (but make sure that it doesn't overflow 32-bits when converted to milliseconds).\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2015-07-28 15:52:11\" },\n\t{ \"post_id\": 7933, \"topic_id\": 1815, \"forum_id\": 14, \"post_subject\": \"Re: Can I disable the Thor Cluster idle restart period?\", \"username\": \"bforeman\", \"post_text\": \"Hi Shah,\\n Can I disable this idle restart period (or make it a very large value)?\\n
\\n\\nSome settings in the Configuration Manager use zero (0) as a way of turning off that feature. Try that first, otherwise setting it to a larger number will do it.\\n\\nWhat is the purpose of this idle restart (ie. is it necessary)?
\\n\\nI think it was a safety feature if a THOR cluster went unused for any extended length. THOR clusters should always be in use. \\nThe restart is needed. Not really sure why I think it releases memory…something related to a memory leak I’m sure.\\n\\n
...slaves go into a bad state
\\n\\nIf you move to the 5.2.4 build I believe your restart should work correctly.\\n\\nBob\", \"post_time\": \"2015-07-22 12:06:30\" },\n\t{ \"post_id\": 7932, \"topic_id\": 1815, \"forum_id\": 14, \"post_subject\": \"Can I disable the Thor Cluster idle restart period?\", \"username\": \"shah\", \"post_text\": \"I'm new to HPCC so some of my terminology may be off.\\n\\nI've set up a cluster with 1 master and 4 slaves. Every 8 hours, our Thor Cluster restarts and the slaves go into a bad state, see this issue.\\n\\nThe Thor Master log shows this:\\n\\n"ERROR: 10097: /var/lib/jenkins/workspace/CE-Candidate-5.2.2-1/CE/centos-7.0-x86_64/HPCC-Platform/thorlcr/master/thgraphmanager.cpp(792) : abortThor : Thor has been idle for 480 minutes, restarting"
\\n\\nI believe this is because the idleRestartPeriod value defaults to 480. \\n\\nTwo questions:\\n\\n[list=1:34702fia]I want to know if it is possible to cluster link a 2 personal computers or laptops to increase speed and power of the computers ?
That's exactly what HPCC is all about. Read this doc and it will tell you how to accomplish that: http://cdn.hpccsystems.com/releases/CE-Candidate-5.4.8/docs/Installing_and_RunningTheHPCCPlatform-5.4.8-1.pdf\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-02-26 14:36:12\" },\n\t{ \"post_id\": 9276, \"topic_id\": 1950, \"forum_id\": 14, \"post_subject\": \"Re: HPCC Build on Windows - Errors\", \"username\": \"yew\", \"post_text\": \"Hi..ALL\\n\\nI want to know if it is possible to cluster link a 2 personal computers or laptops to increase speed and power of the computers ?\", \"post_time\": \"2016-02-26 05:00:08\" },\n\t{ \"post_id\": 8786, \"topic_id\": 1950, \"forum_id\": 14, \"post_subject\": \"Re: HPCC Build on Windows - Errors\", \"username\": \"MauricioNO\", \"post_text\": \"As a tip for anyone trying to build the Platform on windows is making sure you are using the right version of the Dependencies(for instance the versions of Xalan and Xerces I mentioned above) and the rest of the errors should already be ironed out on candidate 6.0.0.\\n\\nThank you to everyone involved!\", \"post_time\": \"2015-12-09 19:34:32\" },\n\t{ \"post_id\": 8372, \"topic_id\": 1950, \"forum_id\": 14, \"post_subject\": \"Re: HPCC Build on Windows - Errors\", \"username\": \"ming\", \"post_text\": \"Sorry usually we don't build HPCC Platform on Windows instead only build Clienttools. I will try to build it and investigate the new error messages.\", \"post_time\": \"2015-10-22 19:27:31\" },\n\t{ \"post_id\": 8368, \"topic_id\": 1950, \"forum_id\": 14, \"post_subject\": \"Re: HPCC Build on Windows - Errors\", \"username\": \"bforeman\", \"post_text\": \"Mauricio,\\n\\nI was told that one of our HPCC staff is now in touch with you. If you have a resolution to this, please post your feedback at that time.\\n\\nThanks!\\n\\nBob\", \"post_time\": \"2015-10-22 16:35:54\" },\n\t{ \"post_id\": 8358, \"topic_id\": 1950, \"forum_id\": 14, \"post_subject\": \"Re: HPCC Build on Windows - Errors\", \"username\": \"MauricioNO\", \"post_text\": \"I actually found these two pages about this error:\\n\\nhttps://track.hpccsystems.com/browse/HPCC-14342\\n\\nhttps://github.com/hpcc-systems/HPCC-Pl ... f56253b80a\\n\\nAnd on my version the #define statement is already on the hpp file instead of the ipp file and the error happened \", \"post_time\": \"2015-10-22 00:27:39\" },\n\t{ \"post_id\": 8356, \"topic_id\": 1950, \"forum_id\": 14, \"post_subject\": \"Re: HPCC Build on Windows - Errors\", \"username\": \"MauricioNO\", \"post_text\": \"I actually got through these errors by switching over to Xalan and Xerces instead, using the versions 1.11 and 3.1 respectively.\\n\\nNow the only error before a successful build is\\n
error C2491: 'getComponentStatusFactory' : definition of dllimport function not allowed\\tD:\\\\HPCC-Platform\\\\esp\\\\services\\\\ws_machine\\\\componentstatus.cpp\\tLine: 335
\\n\\nI've done a bit of research about it and it seems that the way the function shouldn't be defined as it is declared as dllimport.\\n\\nHas anybody ever gone through this?\", \"post_time\": \"2015-10-22 00:07:08\" },\n\t{ \"post_id\": 8352, \"topic_id\": 1950, \"forum_id\": 14, \"post_subject\": \"HPCC Build on Windows - Errors\", \"username\": \"MauricioNO\", \"post_text\": \"Hello everyone, my name is Mauricio, I worked at LexisNexis(in Atlanta) for this past summer as an Intern and now I am currently working from home until I graduate.\\n\\nI've been trying to install the HPCC Systems on my Windows computer and I've been running into some difficulties that I am pretty sure relate to dependencies libraries versions.\\n\\nI am working on candidate 6.0.0 and this is the cmake command I ran to generate the build files:\\n\\ncmake -G "Visual Studio 12 2013 Win64" -DCMAKE_BUILD_TYPE=Release -DUSE_NATIVE_LIBRARIES=OFF -DCHECK_GIT_TAG=0 -DEXTERNALS_DIRECTORY=D:/Dependencies -DUSE_APR=OFF -DUSE_LIBARCHIVE=OFF -DUSE_OPENLDAP=OFF -DUSE_XALAN=OFF -DUSE_LIBXSLT=ON -DUSE_ZLIB=OFF –DWITH_PLUGINS=OFF ../
\\n\\nThis is a part of the errors I've been getting when trying to build it on VS 2013.\\n\\n\\n\\nNow after looking at the names (xml and xslt) I used DUMPBIN on both my libsxslt.lib and libxml2.lib to see their symbols and saw that all of these unresolved external symbols are within these two files, so maybe my version of them is not the right one?\\nI downloaded libxml2 2.7.8 and libxslt 1.1.26\\n\\nAny tips? Maybe regarding which dependencies I should and shouldn't use, their versions, anything is appreciated.\\nThank you!\", \"post_time\": \"2015-10-21 15:37:59\" },\n\t{ \"post_id\": 8428, \"topic_id\": 1976, \"forum_id\": 14, \"post_subject\": \"Re: mythor not starting and unable to change thor port numbe\", \"username\": \"JimD\", \"post_text\": \"After setting the Thor masterport setting to the new value, did you save and copy the new environment.xml from the source directory to the /etc/HPCCSystems directory? \\n\\nThe file you edit in Config Manager is never the live file. \\n\\nHTH,\\n\\nJim\", \"post_time\": \"2015-11-05 21:01:19\" },\n\t{ \"post_id\": 8426, \"topic_id\": 1976, \"forum_id\": 14, \"post_subject\": \"mythor not starting and unable to change thor port number\", \"username\": \"sbasodi1\", \"post_text\": \"Hi,\\nIm trying to install a single-node HPCC platform and client on ubuntu 15.04 64 bit system. So far, installation went fine. But whenever I start the client, it fails. I looked into the logs and found that mythor starts on port "20000" which my machine is already using for some other process. So I have change the mythor port to 20010 but still mythor is trying to use the old port 20000. \\n\\nCould you please let me know what exactly is the issue? How to change port for thor? I will add screenshot and log details in some time. \\n\\nAny help is appreciated. Thankyou.\", \"post_time\": \"2015-11-05 17:16:18\" },\n\t{ \"post_id\": 10141, \"topic_id\": 1978, \"forum_id\": 14, \"post_subject\": \"Re: Multi Node Setup Error\", \"username\": \"kkagia\", \"post_text\": \"I am new to the linux environment, I tried using scp to share the ssh keys to the nodes but this was not successful. If you do not mind could guide me on how you distributed the keys to the nodes?\", \"post_time\": \"2016-07-28 20:25:50\" },\n\t{ \"post_id\": 10131, \"topic_id\": 1978, \"forum_id\": 14, \"post_subject\": \"Re: Multi Node Setup Error\", \"username\": \"kps_mani\", \"post_text\": \"Yes, I did generate the SSH Keys and shared it across the nodes.\", \"post_time\": \"2016-07-28 18:30:32\" },\n\t{ \"post_id\": 10101, \"topic_id\": 1978, \"forum_id\": 14, \"post_subject\": \"Re: Multi Node Setup Error\", \"username\": \"kkagia\", \"post_text\": \"Hello,\\n\\nI think we have a similar issue. But ours looks like it is SSH related. Did you generate the SSH keys and distribute them to the nodes?\", \"post_time\": \"2016-07-28 07:59:33\" },\n\t{ \"post_id\": 8434, \"topic_id\": 1978, \"forum_id\": 14, \"post_subject\": \"Re: Multi Node Setup Error\", \"username\": \"kps_mani\", \"post_text\": \"I have resolved this error by myself. It looks like that we have to use Private IP while configuring the cluster instead of Public IP.\\n\\nRegards,\\nSubbu\", \"post_time\": \"2015-11-06 03:07:12\" },\n\t{ \"post_id\": 8430, \"topic_id\": 1978, \"forum_id\": 14, \"post_subject\": \"Multi Node Setup Error\", \"username\": \"kps_mani\", \"post_text\": \"Hi,\\nI am trying to setup the HPCC Cluster in the AWS by having 2 nodes. I have got 2 m4.large machines and installed the HPCC Platform on both the machines. I have tested the HPCC platform after installation and it was working find on both the machines. I have followed the instruction given in the Installing & Running the HPCC Platform PDF for multi node setup. \\n\\nI have used the Configuration Manager to setup the New Environment XML with No of Slave Nodes for Thor Cluster as 1 and Number of Thor Slaves per node as 1. I have deployed the New Environment XML in both the machines. After deploying the XML, I restarted the Service in both the machines and ended up having the error message - "There are no components configured to run on this node" \\n\\nWhen I copy back the Enviornment.xml came with HPCC installation, I was able to start the HPCC Service. I am wondering whether it is related to Network IP addresses that needs to be used. I have used the Public IP Address of my EC2 instances while Configuring XML. Same Public IP Address was used to access ECL Watch when it ran standalone mode. \\n\\nNote: I have masked the Public IP Address of both machine in the below XML.\\n\\nHere is the Environment XML\\n<?xml version="1.0" encoding="UTF-8"?>\\n<!-- Edited with ConfigMgr on ip X.X.X.X on 2015-11-06T00:05:38 -->\\n<Environment>\\n <EnvSettings>\\n <allow_pgcache_flush>true</allow_pgcache_flush>\\n <blockname>HPCCSystems</blockname>\\n <classpath>/opt/HPCCSystems/classes</classpath>\\n <configs>/etc/HPCCSystems</configs>\\n <environment>environment.xml</environment>\\n <group>hpcc</group>\\n <home>/home</home>\\n <interface>*</interface>\\n <lock>/var/lock/HPCCSystems</lock>\\n <log>/var/log/HPCCSystems</log>\\n <logfields>TIM+DAT+MLT+MID+PID+TID+COD+QUO+PFX</logfields>\\n <mpEnd>7500</mpEnd>\\n <mpSoMaxConn>128</mpSoMaxConn>\\n <mpStart>7101</mpStart>\\n <path>/opt/HPCCSystems</path>\\n <pid>/var/run/HPCCSystems</pid>\\n <runtime>/var/lib/HPCCSystems</runtime>\\n <sourcedir>/etc/HPCCSystems/source</sourcedir>\\n <use_epoll>true</use_epoll>\\n <user>hpcc</user>\\n </EnvSettings>\\n <Hardware>\\n <Computer computerType="linuxmachine"\\n domain="localdomain"\\n name="node167218"\\n netAddress="1X.2X.3X.4X"/>\\n <Computer computerType="linuxmachine"\\n domain="localdomain"\\n name="node204100"\\n netAddress="5X.6X.7X.8X"/>\\n <ComputerType computerType="linuxmachine"\\n manufacturer="unknown"\\n name="linuxmachine"\\n nicSpeed="1000"\\n opSys="linux"/>\\n <Domain name="localdomain" password="" username=""/>\\n <Switch name="Switch"/>\\n </Hardware>\\n <Programs>\\n <Build name="_" url="/opt/HPCCSystems">\\n <BuildSet installSet="deploy_map.xml"\\n name="dafilesrv"\\n path="componentfiles/dafilesrv"\\n processName="DafilesrvProcess"\\n schema="dafilesrv.xsd"/>\\n <BuildSet installSet="deploy_map.xml"\\n name="dali"\\n path="componentfiles/dali"\\n processName="DaliServerProcess"\\n schema="dali.xsd"/>\\n <BuildSet installSet="deploy_map.xml"\\n name="dfuplus"\\n overide="no"\\n path="componentfiles/dfuplus"\\n processName="DfuplusProcess"\\n schema="dfuplus.xsd"/>\\n <BuildSet installSet="deploy_map.xml"\\n name="dfuserver"\\n path="componentfiles/dfuserver"\\n processName="DfuServerProcess"\\n schema="dfuserver.xsd"/>\\n <BuildSet deployable="no"\\n installSet="deploy_map.xml"\\n name="DropZone"\\n path="componentfiles/DropZone"\\n processName="DropZone"\\n schema="dropzone.xsd"/>\\n <BuildSet installSet="deploy_map.xml"\\n name="eclagent"\\n path="componentfiles/eclagent"\\n processName="EclAgentProcess"\\n schema="eclagent_config.xsd"/>\\n <BuildSet installSet="deploy_map.xml"\\n name="eclminus"\\n overide="no"\\n path="componentfiles/eclminus"/>\\n <BuildSet installSet="deploy_map.xml"\\n name="eclplus"\\n overide="no"\\n path="componentfiles/eclplus"\\n processName="EclPlusProcess"\\n schema="eclplus.xsd"/>\\n <BuildSet installSet="eclccserver_deploy_map.xml"\\n name="eclccserver"\\n path="componentfiles/configxml"\\n processName="EclCCServerProcess"\\n schema="eclccserver.xsd"/>\\n <BuildSet installSet="eclscheduler_deploy_map.xml"\\n name="eclscheduler"\\n path="componentfiles/configxml"\\n processName="EclSchedulerProcess"\\n schema="eclscheduler.xsd"/>\\n <BuildSet installSet="deploy_map.xml"\\n name="esp"\\n path="componentfiles/esp"\\n processName="EspProcess"\\n schema="esp.xsd"/>\\n <BuildSet deployable="no"\\n installSet="deploy_map.xml"\\n name="espsmc"\\n path="componentfiles/espsmc"\\n processName="EspService"\\n schema="espsmcservice.xsd">\\n <Properties defaultPort="8010"\\n defaultResourcesBasedn="ou=SMC,ou=EspServices,ou=ecl"\\n defaultSecurePort="18010"\\n type="WsSMC">\\n <Authenticate access="Read"\\n description="Root access to SMC service"\\n path="/"\\n required="Read"\\n resource="SmcAccess"/>\\n <AuthenticateFeature description="Access to SMC service"\\n path="SmcAccess"\\n resource="SmcAccess"\\n service="ws_smc"/>\\n <AuthenticateFeature description="Access to thor queues"\\n path="ThorQueueAccess"\\n resource="ThorQueueAccess"\\n service="ws_smc"/>\\n <AuthenticateFeature description="Access to roxie control commands"\\n path="RoxieControlAccess"\\n resource="RoxieControlAccess"\\n service="ws_smc"/>\\n <AuthenticateFeature description="Access to super computer environment"\\n path="ConfigAccess"\\n resource="ConfigAccess"\\n service="ws_config"/>\\n <AuthenticateFeature description="Access to DFU"\\n path="DfuAccess"\\n resource="DfuAccess"\\n service="ws_dfu"/>\\n <AuthenticateFeature description="Access to DFU XRef"\\n path="DfuXrefAccess"\\n resource="DfuXrefAccess"\\n service="ws_dfuxref"/>\\n <AuthenticateFeature description="Access to machine information"\\n path="MachineInfoAccess"\\n resource="MachineInfoAccess"\\n service="ws_machine"/>\\n <AuthenticateFeature description="Access to SNMP metrics information"\\n path="MetricsAccess"\\n resource="MetricsAccess"\\n service="ws_machine"/>\\n <AuthenticateFeature description="Access to remote execution"\\n path="ExecuteAccess"\\n resource="ExecuteAccess"\\n service="ws_machine"/>\\n <AuthenticateFeature description="Access to DFU workunits"\\n path="DfuWorkunitsAccess"\\n resource="DfuWorkunitsAccess"\\n service="ws_fs"/>\\n <AuthenticateFeature description="Access to DFU exceptions"\\n path="DfuExceptionsAccess"\\n resource="DfuExceptions"\\n service="ws_fs"/>\\n <AuthenticateFeature description="Access to spraying files"\\n path="FileSprayAccess"\\n resource="FileSprayAccess"\\n service="ws_fs"/>\\n <AuthenticateFeature description="Access to despraying of files"\\n path="FileDesprayAccess"\\n resource="FileDesprayAccess"\\n service="ws_fs"/>\\n <AuthenticateFeature description="Access to dkcing of key files"\\n path="FileDkcAccess"\\n resource="FileDkcAccess"\\n service="ws_fs"/>\\n <AuthenticateFeature description="Access to files in dropzone"\\n path="FileIOAccess"\\n resource="FileIOAccess"\\n service="ws_fileio"/>\\n <AuthenticateFeature description="Access to permissions for file scopes"\\n path="FileScopeAccess"\\n resource="FileScopeAccess"\\n service="ws_access"/>\\n <AuthenticateFeature description="Access to WS ECL service"\\n path="WsEclAccess"\\n resource="WsEclAccess"\\n service="ws_ecl"/>\\n <AuthenticateFeature description="Access to cluster topology"\\n path="ClusterTopologyAccess"\\n resource="ClusterTopologyAccess"\\n service="ws_topology"/>\\n <AuthenticateFeature description="Access to own workunits"\\n path="OwnWorkunitsAccess"\\n resource="OwnWorkunitsAccess"\\n service="ws_workunits"/>\\n <AuthenticateFeature description="Access to others' workunits"\\n path="OthersWorkunitsAccess"\\n resource="OthersWorkunitsAccess"\\n service="ws_workunits"/>\\n <AuthenticateFeature description="Access to ECL direct service"\\n path="EclDirectAccess"\\n resource="EclDirectAccess"\\n service="ecldirect"/>\\n <ProcessFilters>\\n <Platform name="Windows">\\n <ProcessFilter name="any">\\n <Process name="dafilesrv"/>\\n </ProcessFilter>\\n <ProcessFilter multipleInstances="true" name="DfuServerProcess"/>\\n <ProcessFilter multipleInstances="true" name="EclCCServerProcess"/>\\n <ProcessFilter multipleInstances="true" name="EspProcess">\\n <Process name="dafilesrv" remove="true"/>\\n </ProcessFilter>\\n </Platform>\\n <Platform name="Linux">\\n <ProcessFilter name="any">\\n <Process name="dafilesrv"/>\\n </ProcessFilter>\\n <ProcessFilter multipleInstances="true" name="DfuServerProcess"/>\\n <ProcessFilter multipleInstances="true" name="EclCCServerProcess"/>\\n <ProcessFilter multipleInstances="true" name="EspProcess">\\n <Process name="dafilesrv" remove="true"/>\\n </ProcessFilter>\\n <ProcessFilter name="GenesisServerProcess">\\n <Process name="httpd"/>\\n <Process name="atftpd"/>\\n <Process name="dhcpd"/>\\n </ProcessFilter>\\n </Platform>\\n </ProcessFilters>\\n </Properties>\\n </BuildSet>\\n <BuildSet installSet="deploy_map.xml"\\n name="ftslave"\\n path="componentfiles/ftslave"\\n processName="FTSlaveProcess"\\n schema="ftslave_linux.xsd"/>\\n <BuildSet installSet="deploy_map.xml"\\n name="hqltest"\\n overide="no"\\n path="componentfiles/hqltest"\\n processName="HqlTestProcess"/>\\n <BuildSet deployable="no"\\n installSet="deploy_map.xml"\\n name="ldapServer"\\n path="componentfiles/ldapServer"\\n processName="LDAPServerProcess"\\n schema="ldapserver.xsd"/>\\n <BuildSet installSet="roxie_deploy_map.xml"\\n name="roxie"\\n path="componentfiles/configxml"\\n processName="RoxieCluster"\\n schema="roxie.xsd"/>\\n <BuildSet installSet="deploy_map.xml"\\n name="sasha"\\n path="componentfiles/sasha"\\n processName="SashaServerProcess"\\n schema="sasha.xsd"/>\\n <BuildSet deployable="no"\\n installSet="deploy_map.xml"\\n name="SiteCertificate"\\n overide="no"\\n path="componentfiles/SiteCertificate"\\n processName="SiteCertificate"\\n schema="SiteCertificate.xsd"/>\\n <BuildSet installSet="deploy_map.xml"\\n name="soapplus"\\n overide="no"\\n path="componentfiles/soapplus"\\n processName="SoapPlusProcess"\\n schema="soapplus.xsd"/>\\n <BuildSet installSet="deploy_map.xml"\\n name="thor"\\n path="componentfiles/thor"\\n processName="ThorCluster"\\n schema="thor.xsd"/>\\n <BuildSet deployable="no"\\n installSet="deploy_map.xml"\\n name="topology"\\n path="componentfiles/topology"\\n processName="Topology"\\n schema="topology.xsd"/>\\n <BuildSet deployable="no"\\n installSet="deploy_map.xml"\\n name="ws_ecl"\\n path="componentfiles/ws_ecl"\\n processName="EspService"\\n schema="esp_service_wsecl2.xsd">\\n <Properties bindingType="ws_eclSoapBinding"\\n defaultPort="8002"\\n defaultResourcesBasedn="ou=WsEcl,ou=EspServices,ou=ecl"\\n defaultSecurePort="18002"\\n plugin="ws_ecl"\\n type="ws_ecl">\\n <Authenticate access="Read"\\n description="Root access to WS ECL service"\\n path="/"\\n required="Read"\\n resource="WsEclAccess"/>\\n <AuthenticateFeature description="Access to WS ECL service"\\n path="WsEclAccess"\\n resource="WsEclAccess"\\n service="ws_ecl"/>\\n </Properties>\\n </BuildSet>\\n <BuildSet deployable="no"\\n installSet="deploy_map.xml"\\n name="ecldirect"\\n path="componentfiles/ecldirect"\\n processName="EspService"\\n schema="esp_service_ecldirect.xsd">\\n <Properties bindingType="EclDirectSoapBinding"\\n defaultPort="8008"\\n defaultResourcesBasedn="ou=EclDirectAccess,ou=EspServices,ou=ecl"\\n defaultSecurePort="18008"\\n plugin="ecldirect"\\n type="ecldirect">\\n <Authenticate access="Read"\\n description="Root access to ECL Direct service"\\n path="/"\\n required="Read"\\n resource="EclDirectAccess"/>\\n <AuthenticateFeature description="Access to ECL Direct service"\\n path="EclDirectAccess"\\n resource="EclDirectAccess"\\n service="ecldirect"/>\\n </Properties>\\n </BuildSet>\\n <BuildSet deployable="no"\\n installSet="deploy_map.xml"\\n name="DynamicESDL"\\n path="componentfiles/esdlsvcengine"\\n processName="EspService"\\n schema="esdlsvcengine.xsd">\\n <Properties bindingType="EsdlBinding"\\n defaultPort="8043"\\n defaultResourcesBasedn="ou=EspServices,ou=ecl"\\n defaultSecurePort="18043"\\n plugin="esdl_svc_engine"\\n type="DynamicESDL"/>\\n </BuildSet>\\n </Build>\\n </Programs>\\n <Software>\\n <DafilesrvProcess build="_"\\n buildSet="dafilesrv"\\n description="DaFileSrv process"\\n name="mydafilesrv"\\n parallelRequestLimit="20"\\n throttleCPULimit="75"\\n throttleDelayMs="5000"\\n version="1">\\n <Instance computer="node167218"\\n directory="/var/lib/HPCCSystems/mydafilesrv"\\n name="s1"\\n netAddress="1X.2X.3X.4X"\\n parallelRequestLimit="20"\\n throttleCPULimit="75"\\n throttleDelayMs="5000"/>\\n <Instance computer="node204100"\\n directory="/var/lib/HPCCSystems/mydafilesrv"\\n name="s2"\\n netAddress="5X.6X.7X.8X"\\n parallelRequestLimit="20"\\n throttleCPULimit="75"\\n throttleDelayMs="5000"/>\\n </DafilesrvProcess>\\n <DaliServerProcess build="_"\\n buildSet="dali"\\n environment="/etc/HPCCSystems/environment.xml"\\n name="mydali"\\n recoverFromIncErrors="true">\\n <Instance computer="node204100"\\n directory="/var/lib/HPCCSystems/mydali"\\n name="s1"\\n netAddress="5X.6X.7X.8X"\\n port="7070"/>\\n </DaliServerProcess>\\n <DfuServerProcess build="_"\\n buildSet="dfuserver"\\n daliServers="mydali"\\n description="DFU Server"\\n monitorinterval="900"\\n monitorqueue="dfuserver_monitor_queue"\\n name="mydfuserver"\\n queue="dfuserver_queue"\\n transferBufferSize="65536">\\n <Instance computer="node167218"\\n directory="/var/lib/HPCCSystems/mydfuserver"\\n name="s1"\\n netAddress="1X.2X.3X.4X"/>\\n <SSH SSHidentityfile="$HOME/.ssh/id_rsa"\\n SSHpassword=""\\n SSHretries="3"\\n SSHtimeout="0"\\n SSHusername="hpcc"/>\\n </DfuServerProcess>\\n <Directories name="HPCCSystems">\\n <Category dir="/var/log/[NAME]/[INST]" name="log"/>\\n <Category dir="/var/lib/[NAME]/[INST]" name="run"/>\\n <Category dir="/etc/[NAME]/[INST]" name="conf"/>\\n <Category dir="/var/lib/[NAME]/[INST]/temp" name="temp"/>\\n <Category dir="/var/lib/[NAME]/hpcc-data/[COMPONENT]" name="data"/>\\n <Category dir="/var/lib/[NAME]/hpcc-data2/[COMPONENT]" name="data2"/>\\n <Category dir="/var/lib/[NAME]/hpcc-data3/[COMPONENT]" name="data3"/>\\n <Category dir="/var/lib/[NAME]/hpcc-mirror/[COMPONENT]" name="mirror"/>\\n <Category dir="/var/lib/[NAME]/queries/[INST]" name="query"/>\\n <Category dir="/var/lock/[NAME]/[INST]" name="lock"/>\\n </Directories>\\n <DropZone build="_"\\n buildSet="DropZone"\\n computer="node204100"\\n description="DropZone process"\\n directory="/var/lib/HPCCSystems/mydropzone"\\n name="mydropzone"/>\\n <EclAgentProcess allowedPipePrograms="*"\\n build="_"\\n buildSet="eclagent"\\n daliServers="mydali"\\n defaultMemoryLimitMB="300"\\n description="EclAgent process"\\n heapRetainMemory="false"\\n heapUseHugePages="false"\\n heapUseTransparentHugePages="true"\\n name="myeclagent"\\n pluginDirectory="/opt/HPCCSystems/plugins/"\\n thorConnectTimeout="600"\\n traceLevel="0"\\n wuQueueName="myeclagent_queue">\\n <Instance computer="node167218"\\n directory="/var/lib/HPCCSystems/myeclagent"\\n name="s1"\\n netAddress="1X.2X.3X.4X"/>\\n </EclAgentProcess>\\n <EclCCServerProcess build="_"\\n buildSet="eclccserver"\\n daliServers="mydali"\\n description="EclCCServer process"\\n enableSysLog="true"\\n generatePrecompiledHeader="true"\\n maxEclccProcesses="4"\\n name="myeclccserver"\\n traceLevel="1">\\n <Instance computer="node167218"\\n directory="/var/lib/HPCCSystems/myeclccserver"\\n name="s1"\\n netAddress="1X.2X.3X.4X"/>\\n </EclCCServerProcess>\\n <EclSchedulerProcess build="_"\\n buildSet="eclscheduler"\\n daliServers="mydali"\\n description="EclScheduler process"\\n name="myeclscheduler">\\n <Instance computer="node204100"\\n directory="/var/lib/HPCCSystems/myeclscheduler"\\n name="s1"\\n netAddress="5X.6X.7X.8X"/>\\n </EclSchedulerProcess>\\n <EspProcess build="_"\\n buildSet="esp"\\n componentfilesDir="/opt/HPCCSystems/componentfiles"\\n daliServers="mydali"\\n description="ESP server"\\n enableSEHMapping="true"\\n formOptionsAccess="false"\\n httpConfigAccess="true"\\n logLevel="1"\\n logRequests="false"\\n logResponses="false"\\n maxBacklogQueueSize="200"\\n maxConcurrentThreads="0"\\n maxRequestEntityLength="8000000"\\n name="myesp"\\n perfReportDelay="60"\\n portalurl="http://hpccsystems.com/download">\\n <Authentication htpasswdFile="/etc/HPCCSystems/.htpasswd"\\n ldapAuthMethod="kerberos"\\n ldapConnections="10"\\n ldapServer=""\\n method="none"\\n passwordExpirationWarningDays="10"/>\\n <EspBinding defaultForPort="true"\\n defaultServiceVersion=""\\n name="myespsmc"\\n port="8010"\\n protocol="http"\\n resourcesBasedn="ou=SMC,ou=EspServices,ou=ecl"\\n service="EclWatch"\\n workunitsBasedn="ou=workunits,ou=ecl"\\n wsdlServiceAddress="">\\n <Authenticate access="Read"\\n description="Root access to SMC service"\\n path="/"\\n required="Read"\\n resource="SmcAccess"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to SMC service"\\n path="SmcAccess"\\n resource="SmcAccess"\\n service="ws_smc"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to thor queues"\\n path="ThorQueueAccess"\\n resource="ThorQueueAccess"\\n service="ws_smc"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to roxie control commands"\\n path="RoxieControlAccess"\\n resource="RoxieControlAccess"\\n service="ws_smc"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to super computer environment"\\n path="ConfigAccess"\\n resource="ConfigAccess"\\n service="ws_config"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to DFU"\\n path="DfuAccess"\\n resource="DfuAccess"\\n service="ws_dfu"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to DFU XRef"\\n path="DfuXrefAccess"\\n resource="DfuXrefAccess"\\n service="ws_dfuxref"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to machine information"\\n path="MachineInfoAccess"\\n resource="MachineInfoAccess"\\n service="ws_machine"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to SNMP metrics information"\\n path="MetricsAccess"\\n resource="MetricsAccess"\\n service="ws_machine"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to remote execution"\\n path="ExecuteAccess"\\n resource="ExecuteAccess"\\n service="ws_machine"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to DFU workunits"\\n path="DfuWorkunitsAccess"\\n resource="DfuWorkunitsAccess"\\n service="ws_fs"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to DFU exceptions"\\n path="DfuExceptionsAccess"\\n resource="DfuExceptions"\\n service="ws_fs"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to spraying files"\\n path="FileSprayAccess"\\n resource="FileSprayAccess"\\n service="ws_fs"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to despraying of files"\\n path="FileDesprayAccess"\\n resource="FileDesprayAccess"\\n service="ws_fs"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to dkcing of key files"\\n path="FileDkcAccess"\\n resource="FileDkcAccess"\\n service="ws_fs"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to files in dropzone"\\n path="FileIOAccess"\\n resource="FileIOAccess"\\n service="ws_fileio"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to permissions for file scopes"\\n path="FileScopeAccess"\\n resource="FileScopeAccess"\\n service="ws_access"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to WS ECL service"\\n path="WsEclAccess"\\n resource="WsEclAccess"\\n service="ws_ecl"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to cluster topology"\\n path="ClusterTopologyAccess"\\n resource="ClusterTopologyAccess"\\n service="ws_topology"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to own workunits"\\n path="OwnWorkunitsAccess"\\n resource="OwnWorkunitsAccess"\\n service="ws_workunits"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to others' workunits"\\n path="OthersWorkunitsAccess"\\n resource="OthersWorkunitsAccess"\\n service="ws_workunits"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to ECL direct service"\\n path="EclDirectAccess"\\n resource="EclDirectAccess"\\n service="ecldirect"/>\\n </EspBinding>\\n <EspBinding defaultForPort="true"\\n defaultServiceVersion=""\\n name="myws_ecl"\\n port="8002"\\n protocol="http"\\n resourcesBasedn="ou=WsEcl,ou=EspServices,ou=ecl"\\n service="myws_ecl"\\n workunitsBasedn="ou=workunits,ou=ecl"\\n wsdlServiceAddress="">\\n <Authenticate access="Read"\\n description="Root access to WS ECL service"\\n path="/"\\n required="Read"\\n resource="WsEclAccess"/>\\n <AuthenticateFeature authenticate="Yes"\\n description="Access to WS ECL service"\\n path="WsEclAccess"\\n resource="WsEclAccess"\\n service="ws_ecl"/>\\n </EspBinding>\\n <HTTPS acceptSelfSigned="true"\\n CA_Certificates_Path="ca.pem"\\n certificateFileName="certificate.cer"\\n city=""\\n country="US"\\n daysValid="365"\\n enableVerification="false"\\n organization="Customer of HPCCSystems"\\n organizationalUnit=""\\n passphrase=""\\n privateKeyFileName="privatekey.cer"\\n regenerateCredentials="false"\\n requireAddressMatch="false"\\n state=""\\n trustedPeers="anyone"/>\\n <Instance computer="node167218"\\n directory="/var/lib/HPCCSystems/myesp"\\n FQDN=""\\n name="s1"\\n netAddress="1X.2X.3X.4X"/>\\n </EspProcess>\\n <EspService ActivityInfoCacheSeconds="10"\\n allowNewRoxieOnDemandQuery="false"\\n AWUsCacheTimeout="15"\\n build="_"\\n buildSet="espsmc"\\n clusterQueryStateThreadPoolSize="25"\\n defaultTargetCluster=""\\n description="ESP services for SMC"\\n disableUppercaseTranslation="false"\\n enableSystemUseRewrite="false"\\n excludePartitions="/dev*,/sys,/proc/*"\\n monitorDaliFileServer="false"\\n name="EclWatch"\\n pluginsPath="/opt/HPCCSystems/plugins"\\n serverForArchivedECLWU=""\\n syntaxCheckQueue=""\\n viewTimeout="1000"\\n warnIfCpuLoadOver="95"\\n warnIfFreeMemoryUnder="5"\\n warnIfFreeStorageUnder="5">\\n <Properties defaultPort="8010"\\n defaultResourcesBasedn="ou=SMC,ou=EspServices,ou=ecl"\\n defaultSecurePort="18010"\\n type="WsSMC">\\n <Authenticate access="Read"\\n description="Root access to SMC service"\\n path="/"\\n required="Read"\\n resource="SmcAccess"/>\\n <AuthenticateFeature description="Access to SMC service"\\n path="SmcAccess"\\n resource="SmcAccess"\\n service="ws_smc"/>\\n <AuthenticateFeature description="Access to thor queues"\\n path="ThorQueueAccess"\\n resource="ThorQueueAccess"\\n service="ws_smc"/>\\n <AuthenticateFeature description="Access to roxie control commands"\\n path="RoxieControlAccess"\\n resource="RoxieControlAccess"\\n service="ws_smc"/>\\n <AuthenticateFeature description="Access to super computer environment"\\n path="ConfigAccess"\\n resource="ConfigAccess"\\n service="ws_config"/>\\n <AuthenticateFeature description="Access to DFU"\\n path="DfuAccess"\\n resource="DfuAccess"\\n service="ws_dfu"/>\\n <AuthenticateFeature description="Access to DFU XRef"\\n path="DfuXrefAccess"\\n resource="DfuXrefAccess"\\n service="ws_dfuxref"/>\\n <AuthenticateFeature description="Access to machine information"\\n path="MachineInfoAccess"\\n resource="MachineInfoAccess"\\n service="ws_machine"/>\\n <AuthenticateFeature description="Access to SNMP metrics information"\\n path="MetricsAccess"\\n resource="MetricsAccess"\\n service="ws_machine"/>\\n <AuthenticateFeature description="Access to remote execution"\\n path="ExecuteAccess"\\n resource="ExecuteAccess"\\n service="ws_machine"/>\\n <AuthenticateFeature description="Access to DFU workunits"\\n path="DfuWorkunitsAccess"\\n resource="DfuWorkunitsAccess"\\n service="ws_fs"/>\\n <AuthenticateFeature description="Access to DFU exceptions"\\n path="DfuExceptionsAccess"\\n resource="DfuExceptions"\\n service="ws_fs"/>\\n <AuthenticateFeature description="Access to spraying files"\\n path="FileSprayAccess"\\n resource="FileSprayAccess"\\n service="ws_fs"/>\\n <AuthenticateFeature description="Access to despraying of files"\\n path="FileDesprayAccess"\\n resource="FileDesprayAccess"\\n service="ws_fs"/>\\n <AuthenticateFeature description="Access to dkcing of key files"\\n path="FileDkcAccess"\\n resource="FileDkcAccess"\\n service="ws_fs"/>\\n <AuthenticateFeature description="Access to files in dropzone"\\n path="FileIOAccess"\\n resource="FileIOAccess"\\n service="ws_fileio"/>\\n <AuthenticateFeature description="Access to permissions for file scopes"\\n path="FileScopeAccess"\\n resource="FileScopeAccess"\\n service="ws_access"/>\\n <AuthenticateFeature description="Access to WS ECL service"\\n path="WsEclAccess"\\n resource="WsEclAccess"\\n service="ws_ecl"/>\\n <AuthenticateFeature description="Access to cluster topology"\\n path="ClusterTopologyAccess"\\n resource="ClusterTopologyAccess"\\n service="ws_topology"/>\\n <AuthenticateFeature description="Access to own workunits"\\n path="OwnWorkunitsAccess"\\n resource="OwnWorkunitsAccess"\\n service="ws_workunits"/>\\n <AuthenticateFeature description="Access to others' workunits"\\n path="OthersWorkunitsAccess"\\n resource="OthersWorkunitsAccess"\\n service="ws_workunits"/>\\n <AuthenticateFeature description="Access to ECL direct service"\\n path="EclDirectAccess"\\n resource="EclDirectAccess"\\n service="ecldirect"/>\\n <ProcessFilters>\\n <Platform name="Windows">\\n <ProcessFilter name="any">\\n <Process name="dafilesrv"/>\\n </ProcessFilter>\\n <ProcessFilter multipleInstances="true" name="DfuServerProcess"/>\\n <ProcessFilter multipleInstances="true" name="EclCCServerProcess"/>\\n <ProcessFilter multipleInstances="true" name="EspProcess">\\n <Process name="dafilesrv" remove="true"/>\\n </ProcessFilter>\\n </Platform>\\n <Platform name="Linux">\\n <ProcessFilter name="any">\\n <Process name="dafilesrv"/>\\n </ProcessFilter>\\n <ProcessFilter multipleInstances="true" name="DfuServerProcess"/>\\n <ProcessFilter multipleInstances="true" name="EclCCServerProcess"/>\\n <ProcessFilter multipleInstances="true" name="EspProcess">\\n <Process name="dafilesrv" remove="true"/>\\n </ProcessFilter>\\n <ProcessFilter name="GenesisServerProcess">\\n <Process name="httpd"/>\\n <Process name="atftpd"/>\\n <Process name="dhcpd"/>\\n </ProcessFilter>\\n </Platform>\\n </ProcessFilters>\\n </Properties>\\n </EspService>\\n <EspService build="_"\\n buildSet="ws_ecl"\\n description="WS ECL Service"\\n name="myws_ecl"\\n roxieTimeout="300"\\n workunitTimeout="600">\\n <Properties bindingType="ws_eclSoapBinding"\\n defaultPort="8002"\\n defaultResourcesBasedn="ou=WsEcl,ou=EspServices,ou=ecl"\\n defaultSecurePort="18002"\\n plugin="ws_ecl"\\n type="ws_ecl">\\n <Authenticate access="Read"\\n description="Root access to WS ECL service"\\n path="/"\\n required="Read"\\n resource="WsEclAccess"/>\\n <AuthenticateFeature description="Access to WS ECL service"\\n path="WsEclAccess"\\n resource="WsEclAccess"\\n service="ws_ecl"/>\\n </Properties>\\n </EspService>\\n <FTSlaveProcess build="_"\\n buildSet="ftslave"\\n description="FTSlave process"\\n name="myftslave"\\n version="1">\\n <Instance computer="node167218"\\n directory="/var/lib/HPCCSystems/myftslave"\\n name="s1"\\n netAddress="1X.2X.3X.4X"\\n program="/opt/HPCCSystems/bin/ftslave"/>\\n <Instance computer="node204100"\\n directory="/var/lib/HPCCSystems/myftslave"\\n name="s2"\\n netAddress="5X.6X.7X.8X"\\n program="/opt/HPCCSystems/bin/ftslave"/>\\n </FTSlaveProcess>\\n <SashaServerProcess autoRestartInterval="0"\\n build="_"\\n buildSet="sasha"\\n cachedWUat="* * * * *"\\n cachedWUinterval="24"\\n cachedWUlimit="100"\\n coalesceAt="* * * * *"\\n coalesceInterval="1"\\n dafsmonAt="* * * * *"\\n dafsmonInterval="0"\\n dafsmonList="*"\\n daliServers="mydali"\\n description="Sasha Server process"\\n DFUrecoveryAt="* * * * *"\\n DFUrecoveryCutoff="4"\\n DFUrecoveryInterval="12"\\n DFUrecoveryLimit="20"\\n DFUWUat="* * * * *"\\n DFUWUcutoff="14"\\n DFUWUduration="0"\\n DFUWUinterval="24"\\n DFUWUlimit="1000"\\n DFUWUthrottle="0"\\n ExpiryAt="* 3 * * *"\\n ExpiryDefault="14"\\n ExpiryInterval="24"\\n keepResultFiles="false"\\n LDSroot="LDS"\\n logDir="."\\n minDeltaSize="50000"\\n name="mysasha"\\n PersistExpiryDefault="7"\\n recoverDeltaErrors="false"\\n suspendCoalescerDuringXref="true"\\n thorQMonInterval="1"\\n thorQMonQueues="*"\\n thorQMonSwitchMinTime="0"\\n WUat="* * * * *"\\n WUbackup="0"\\n WUcutoff="8"\\n WUduration="0"\\n WUinterval="6"\\n WUlimit="1000"\\n WUretryinterval="7"\\n WUthrottle="0"\\n xrefAt="* 2 * * *"\\n xrefCutoff="1"\\n xrefEclWatchProvider="true"\\n xrefInterval="672"\\n xrefList="*"\\n xrefMaxMemory="4096"\\n xrefMaxScanThreads="500">\\n <Instance computer="node167218"\\n directory="/var/lib/HPCCSystems/mysasha"\\n name="s1"\\n netAddress="1X.2X.3X.4X"\\n port="8877"/>\\n </SashaServerProcess>\\n <ThorCluster autoCopyBackup="false"\\n build="_"\\n buildSet="thor"\\n compressInternalSpills="true"\\n computer="node167218"\\n daliServers="mydali"\\n description="Thor process"\\n fileCacheLimit="1800"\\n heapRetainMemory="false"\\n heapUseHugePages="false"\\n heapUseTransparentHugePages="true"\\n localThor="false"\\n monitorDaliFileServer="true"\\n name="mythor"\\n pluginsPath="/opt/HPCCSystems/plugins/"\\n replicateAsync="true"\\n replicateOutputs="true"\\n slavesPerNode="1"\\n watchdogEnabled="true"\\n watchdogProgressEnabled="true">\\n <Debug/>\\n <SSH SSHidentityfile="$HOME/.ssh/id_rsa"\\n SSHpassword=""\\n SSHretries="3"\\n SSHtimeout="0"\\n SSHusername="hpcc"/>\\n <Storage/>\\n <SwapNode/>\\n <ThorMasterProcess computer="node167218" name="m1"/>\\n <ThorSlaveProcess computer="node204100" name="s1"/>\\n </ThorCluster>\\n <Topology build="_" buildSet="topology" name="topology">\\n <Cluster name="hthor" prefix="hthor">\\n <EclAgentProcess process="myeclagent"/>\\n <EclCCServerProcess process="myeclccserver"/>\\n <EclSchedulerProcess process="myeclscheduler"/>\\n </Cluster>\\n <Cluster name="thor" prefix="thor">\\n <EclAgentProcess process="myeclagent"/>\\n <EclCCServerProcess process="myeclccserver"/>\\n <EclSchedulerProcess process="myeclscheduler"/>\\n <ThorCluster process="mythor"/>\\n </Cluster>\\n </Topology>\\n </Software>\\n</Environment>\", \"post_time\": \"2015-11-06 00:43:22\" },\n\t{ \"post_id\": 8556, \"topic_id\": 1984, \"forum_id\": 14, \"post_subject\": \"Re: Replication\", \"username\": \"sipan4434\", \"post_text\": \"Thank you for your help!\", \"post_time\": \"2015-11-10 16:42:59\" },\n\t{ \"post_id\": 8554, \"topic_id\": 1984, \"forum_id\": 14, \"post_subject\": \"Re: Replication\", \"username\": \"JimD\", \"post_text\": \"Yes.\\n\\nInstall the IDE on your Windows workstation, then configure the IDE to communicate with the server on AWS (using the public IP of the server running ECLWatch).\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2015-11-10 16:39:05\" },\n\t{ \"post_id\": 8552, \"topic_id\": 1984, \"forum_id\": 14, \"post_subject\": \"Re: Replication\", \"username\": \"sipan4434\", \"post_text\": \"So I should install ECL IDE on Windows and then I will be able to use it in Amazon AWS??\", \"post_time\": \"2015-11-10 15:51:53\" },\n\t{ \"post_id\": 8548, \"topic_id\": 1984, \"forum_id\": 14, \"post_subject\": \"Re: Replication\", \"username\": \"JimD\", \"post_text\": \"Since I saw in another post that you are running on AWS, the easiest thing for you to do is to start over. \\n\\nUsing the HPCC Systems® Instant Cloud for AWS, select the number of nodes you want to use. That will create a multi-node system and replication will be enabled.\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2015-11-10 15:37:26\" },\n\t{ \"post_id\": 8546, \"topic_id\": 1984, \"forum_id\": 14, \"post_subject\": \"Re: Replication\", \"username\": \"sipan4434\", \"post_text\": \"I got it. Now how can I add nodes??\", \"post_time\": \"2015-11-10 15:04:46\" },\n\t{ \"post_id\": 8540, \"topic_id\": 1984, \"forum_id\": 14, \"post_subject\": \"Re: Replication\", \"username\": \"rtaylor\", \"post_text\": \"Sipan,\\n\\nReplication means the data on node #1's C: drive is replicated on node #2's D: drive, the data on node #2's C: drive is replicated on node #3's D: drive, ... \\n\\nSo, with only one node there can be no replication. That's why you can't check the box.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-11-10 14:52:52\" },\n\t{ \"post_id\": 8536, \"topic_id\": 1984, \"forum_id\": 14, \"post_subject\": \"Re: Replication\", \"username\": \"sipan4434\", \"post_text\": \"I installed HPCC Systems on Ubuntu. So one node.\", \"post_time\": \"2015-11-10 14:47:21\" },\n\t{ \"post_id\": 8534, \"topic_id\": 1984, \"forum_id\": 14, \"post_subject\": \"Re: Replication\", \"username\": \"rtaylor\", \"post_text\": \"Are you spraying to a 1-node cluster, or hThor?\", \"post_time\": \"2015-11-10 14:46:15\" },\n\t{ \"post_id\": 8528, \"topic_id\": 1984, \"forum_id\": 14, \"post_subject\": \"Re: Replication\", \"username\": \"sipan4434\", \"post_text\": \"Still I am not able to check it.\", \"post_time\": \"2015-11-10 11:15:58\" },\n\t{ \"post_id\": 8526, \"topic_id\": 1984, \"forum_id\": 14, \"post_subject\": \"Re: Replication\", \"username\": \"sipan4434\", \"post_text\": \"Still I am not able to check it.\", \"post_time\": \"2015-11-10 11:14:41\" },\n\t{ \"post_id\": 8442, \"topic_id\": 1984, \"forum_id\": 14, \"post_subject\": \"Re: Replication\", \"username\": \"rtaylor\", \"post_text\": \"Sipan,\\n\\nI just check the "replication" box for the spray.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-11-06 15:51:45\" },\n\t{ \"post_id\": 8438, \"topic_id\": 1984, \"forum_id\": 14, \"post_subject\": \"Replication\", \"username\": \"sipan4434\", \"post_text\": \"I want to spray the data but replicate box is unchecked. How to enable the replication on the system?\", \"post_time\": \"2015-11-06 11:01:47\" },\n\t{ \"post_id\": 8560, \"topic_id\": 1996, \"forum_id\": 14, \"post_subject\": \"Re: Adding nodes\", \"username\": \"JimD\", \"post_text\": \"You should find all the information here:\\n\\nhttp://cdn.hpccsystems.com/releases/CE- ... df#page=27\\n\\n\\nHTH,\\nJim\", \"post_time\": \"2015-11-11 19:16:05\" },\n\t{ \"post_id\": 8558, \"topic_id\": 1996, \"forum_id\": 14, \"post_subject\": \"Adding nodes\", \"username\": \"sipan4434\", \"post_text\": \"After installing HPCC on ubuntu 14.04 I want to add nodes of cluster to the System. How can I do it and what do I need?\", \"post_time\": \"2015-11-11 15:14:54\" },\n\t{ \"post_id\": 8640, \"topic_id\": 2006, \"forum_id\": 14, \"post_subject\": \"Re: Refresh Roxie\", \"username\": \"lpezet\", \"post_text\": \"So by only specifying say superfiles in the packagemap (former example with the ::all.ndx in it), Roxie will rely on Dali to get the list of files to use?\\nSo in my case here when I re-activated the packagemap, Roxie asked Dali again and somehow Dali returned the "old" (cached?) content for it, even though I did change the content of the ::all.ndx superfile.\\nAm I understanding this correctly?\\n\\nAlright, I'll have to generate new packagemaps then every time I create new versions of my indices.\\n\\nThanks a lot for the help!\\nLuc.\", \"post_time\": \"2015-11-18 17:29:49\" },\n\t{ \"post_id\": 8638, \"topic_id\": 2006, \"forum_id\": 14, \"post_subject\": \"Re: Refresh Roxie\", \"username\": \"sort\", \"post_text\": \"Package files for roxie are meant to provide the definition of the superkey used in your query. You will need to specify the subfiles for each super. Think of packages as an xml way of telling roxie the the subfiles used in superfiles (just like if you used thor or dfu to create and maintain superkey information in dali). If I understand your example, then you should use the second example (weather::forecast as the super and the *.ndx files will be the indexes actually used in your query when weather::forecast is referenced)\\n\\n...and that instead we should have all files under ~weather::forecast and the following packagemap:\\nCode: \\n<RoxiePackages>\\n <Package id="weather_forecast.1">\\n <Base id="weather_data"/>\\n </Package>\\n \\n <Package id="weather_data">\\n <SuperFile id="~weather::forecast">\\n <SubFile value="~weather::forecast::ga_20150101.ndx"/>\\n <SubFile value="~weather::forecast::ny_20150101.ndx"/>\\n <SubFile value="~weather::forecast::ma_20150101.ndx"/>\\n <SubFile value="~weather::forecast::co_20150101.ndx"/>\\n </SuperFile>\\n </Package>\\n</RoxiePackages>\\n\\n>>>Q: Why not directly (if PackegMap were to permit it) list all subfiles without even mentioning the superfile?\\nA: In order for packages to work, your ecl needs to refer to the superfile and NOT to subfiles. At runtime roxie will lookup the query and superfile information to see what subfiles to use. If you just listed the subfiles without the superfile we would have no way to associate the correct files at query execution time. Packages are meant to be a way to control the association between query / superfile / subfile without looking at the superfile definition in dali.\\n\\n>>>>Q:How come I can use the superfile/superfile structure from earlier, the system will find the files underneath the first time and never (as far as I could tell) later on when I update the content of those superfiles? (I'm guessing it's cached...any reason we can't refresh that cache?)\\nThat would make updating data actually easier: we can script adding/removing files to superfiles in ECL, but it's much more difficult to tweak XML and impossible (?) to push packagemaps directly from ECL (without using a PIPE).\\n\\nA: You can only have 1 package active at a time. If you publish a second package, make sure you make it active then roxie is supposed to switch to the new package. Also you specify a specific version of the query (i.e. weather_forecast.1). If you ever publish a new version of that query (i.e. weather_forecast.2), then your package will not update the superfile information for that query. We usually specify the default name (i.e. weather_forecast) and not a specific instance (btw - roxie internally adds the number to the end of the queryname). \\n\\nIf you prefer using ECL to directly update the contents of your superfiles, then maybe you would prefer just using the dali definition of your superfiles. Unfortunately I am not familiar enough with ECL, but I believe people use ECL to generate package files\\n\\n\\nHTH\\nStu\", \"post_time\": \"2015-11-18 17:19:45\" },\n\t{ \"post_id\": 8618, \"topic_id\": 2006, \"forum_id\": 14, \"post_subject\": \"Re: Refresh Roxie\", \"username\": \"lpezet\", \"post_text\": \"Hi sort,\\n\\n\\nThis is pretty much what we have re. packagemaps, but we're specifying only superfiles and not files in it. When activating such packagemap, Roxie (or whichever) actually finds all the files under those superfiles (the first time only) and I can see those files listed in the "Logical Files" and "SuperFiles" tabs in ECL Watch for the queries assigned for it.\\n\\nSo you are saying we HAVE to specify the exact files to use in the packagemap instead?\\n\\nIn the following packagemap, ~weather::forecast::all.ndx is a superfile with actual files under it.\\nAre you then saying that this is *wrong*?\\n
\\n<RoxiePackages>\\n\\t<Package id="weather_forecast.1">\\n\\t\\t<Base id="weather_data"/>\\n\\t</Package>\\n\\t\\n\\t<Package id="weather_data">\\n\\t\\t<SuperFile id="~weather::forecast">\\n\\t\\t\\t<SubFile value="~weather::forecast::all.ndx"/>\\n\\t\\t</SuperFile>\\n\\t</Package>\\n</RoxiePackages>\\n
\\n\\n\\n...and that instead we should have all files under ~weather::forecast and the following packagemap:\\n\\n<RoxiePackages>\\n\\t<Package id="weather_forecast.1">\\n\\t\\t<Base id="weather_data"/>\\n\\t</Package>\\n\\t\\n\\t<Package id="weather_data">\\n\\t\\t<SuperFile id="~weather::forecast">\\n\\t\\t\\t<SubFile value="~weather::forecast::ga_20150101.ndx"/>\\n\\t\\t\\t<SubFile value="~weather::forecast::ny_20150101.ndx"/>\\n\\t\\t\\t<SubFile value="~weather::forecast::ma_20150101.ndx"/>\\n\\t\\t\\t<SubFile value="~weather::forecast::co_20150101.ndx"/>\\n\\t\\t</SuperFile>\\n\\t</Package>\\n</RoxiePackages>\\n
\\n\\nThen when updating the data with say 20150601 data, we create a new package map like the following and activate it:\\n\\n<RoxiePackages>\\n\\t<Package id="weather_forecast.1">\\n\\t\\t<Base id="weather_data"/>\\n\\t</Package>\\n\\t\\n\\t<Package id="weather_data">\\n\\t\\t<SuperFile id="~weather::forecast">\\n\\t\\t\\t<SubFile value="~weather::forecast::ga_20150601.ndx"/>\\n\\t\\t\\t<SubFile value="~weather::forecast::ny_20150601.ndx"/>\\n\\t\\t\\t<SubFile value="~weather::forecast::ma_20150601.ndx"/>\\n\\t\\t\\t<SubFile value="~weather::forecast::co_20150601.ndx"/>\\n\\t\\t</SuperFile>\\n\\t</Package>\\n</RoxiePackages>\\n
\\n\\nDid I understand correctly?\\n\\nIf so, what's even the point of having (or specifying in packagemap) the superfile ~weather::forecast? Do we actually need to have those subfiles added to the superfile or just specify this relationship in the packagemap only?\\nWhy not directly (if PackegMap were to permit it) list all subfiles without even mentioning the superfile?\\n\\nHow come I can use the superfile/superfile structure from earlier, the system will find the files underneath the first time and never (as far as I could tell) later on when I update the content of those superfiles? (I'm guessing it's cached...any reason we can't refresh that cache?)\\nThat would make updating data actually easier: we can script adding/removing files to superfiles in ECL, but it's much more difficult to tweak XML and impossible (?) to push packagemaps directly from ECL (without using a PIPE).\\n\\n\\nThank you!\\n\\n\\nPS: I don't understand "BTW - we do not usually use "::" in our package defintions" since the Package "thor::MyData_Key" has "::" in it.\", \"post_time\": \"2015-11-17 15:27:58\" },\n\t{ \"post_id\": 8614, \"topic_id\": 2006, \"forum_id\": 14, \"post_subject\": \"Re: Refresh Roxie\", \"username\": \"sort\", \"post_text\": \"Packages are an xml way to define the contents of superkeys that roxie will use at runtime without using a definition in dali. Roxie will copy the files to disk if / when they need them. As far as your package definition,\\n<Package id="my::superfile">\\n <SuperFile id="~my::superfile.ndx">\\n <SubFile value="~my::sub_superfile.ndx"/>\\n </SuperFile>\\n</Package>\\n\\nMake sure that the <Package id> is referenced in the package definition that tells the query what to load. (BTW - we do not usually use "::" in our package defintions.\\nFrom our documentation http://cdn.hpccsystems.com/releases/CE-Candidate-5.4.4/docs/RDDERef-5.4.4-1.pdf\\n...\\n<RoxiePackages>\\n <Package id="MyQuery">\\n <Base id="thor::MyData_Key"/>\\n </Package>\\n\\n <Package id="thor::MyData_Key">\\n <SuperFile id="~thor::MyData_Key">\\n <SubFile value="~thor::Mysubfile1"/>\\n <SubFile value="~thor::Mysubfile2"/>\\n </SuperFile>\\n </Package>\\n</RoxiePackages>\", \"post_time\": \"2015-11-17 14:57:01\" },\n\t{ \"post_id\": 8600, \"topic_id\": 2006, \"forum_id\": 14, \"post_subject\": \"Re: Refresh Roxie\", \"username\": \"bforeman\", \"post_text\": \"Checking with the ROXIE team, but I am pretty sure that if you are doing a remote copy of the sub files of the superkey than the package map is not even needed.\\n\\nBob\", \"post_time\": \"2015-11-16 18:22:25\" },\n\t{ \"post_id\": 8598, \"topic_id\": 2006, \"forum_id\": 14, \"post_subject\": \"Re: Refresh Roxie\", \"username\": \"lpezet\", \"post_text\": \"Thanks Bob!\\n\\nI'm actually using a completely separate Roxie cluster for it (which doesn't even have Thor on it).\\nWe're doing remote copies from the Thor cluster to Roxie cluster (again, 2 different clusters, as in Thor configuration has nothing about that Roxie cluster configuration).\\n\\nAm I missing something?\", \"post_time\": \"2015-11-16 18:11:39\" },\n\t{ \"post_id\": 8594, \"topic_id\": 2006, \"forum_id\": 14, \"post_subject\": \"Re: Refresh Roxie\", \"username\": \"bforeman\", \"post_text\": \"Hi Luc,\\n\\nIs the query published to ROXIE using the same Dali as the THOR cluster? I think that the problem lies in the fact that you are updating the superkey on the THOR cluster, but the ROXIE cluster is not getting updated. Even on deactivate/activate, it sounds like the data itself is never getting updated.\\n\\nTo get your package maps to work the way you want to, you will need to publish your queries on a ROXIE that uses a different Dali than the one on your THOR cluster. When you publish using the remote dali option, then your updates of the superkey on the remote THOR will be reflected in the ROXIE query.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-11-16 17:03:06\" },\n\t{ \"post_id\": 8592, \"topic_id\": 2006, \"forum_id\": 14, \"post_subject\": \"Refresh Roxie\", \"username\": \"lpezet\", \"post_text\": \"Hi!\\n\\nI'm having a hard time figuring out Roxie...in a sys admin way \\n\\nSay I have a PackageMap pointing to Superfiles like so:\\n
\\n<Package id="my::superfile">\\n <SuperFile id="~my::superfile.ndx">\\n <SubFile value="~my::sub_superfile.ndx"/>\\n </SuperFile>\\n</Package>\\n
\\n\\nNow ~my::sub_superfile.ndx is actually a superfile with actual files underneath.\\nI update that superfile with new subfiles from time to time (I remove then add new ones).\\nIt works fine (i.e. I don't get any locking that way, vs. specifying ~my::superfile.ndx and listing all subfiles underneath in the packagemap).\\n\\nHowever, I can't seem to tell Roxie to take into account those changes I made (and see) in ~my::sub_superfile.ndx. I tried to deactivate then re-activate the packagemap, re-upload the same packagemap but in vain. I check my published queries and I see they are still using the old files (the ones I removed from ~my::sub_superfile.ndx).\\nRight now to have it take into account the new content in ~my::sub_superfile.ndx, I need to restart Roxie.\\n\\n\\nWhat am I doing wrong? Am I misunderstanding the function of the packagemap?\\n\\n\\nThank you for your help!\\nLuc.\", \"post_time\": \"2015-11-16 15:55:38\" },\n\t{ \"post_id\": 8838, \"topic_id\": 2036, \"forum_id\": 14, \"post_subject\": \"Re: Quick ESP question(adding new services)\", \"username\": \"MauricioNO\", \"post_text\": \"I posted the way I managed to fix it on the other thread.\\n\\nThe problem was indeed differences my DATASET structure and the Response one, but I fixed that by performing a transform that dint't actually change anything, so I don't really know what happened behind the curtains.\\n\\n \", \"post_time\": \"2015-12-13 20:58:45\" },\n\t{ \"post_id\": 8790, \"topic_id\": 2036, \"forum_id\": 14, \"post_subject\": \"Re: Quick ESP question(adding new services)\", \"username\": \"JimD\", \"post_text\": \"I would try two things:\\n\\n1) I don't think you need ROW. In our AddThis example, we wanted a scalar result so we wrapped it in a ROW function.\\n\\n2) Make sure your DATASET structure matches your Response structure.\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2015-12-09 20:19:50\" },\n\t{ \"post_id\": 8784, \"topic_id\": 2036, \"forum_id\": 14, \"post_subject\": \"Re: Quick ESP question(adding new services)\", \"username\": \"MauricioNO\", \"post_text\": \"Since the topic became ECL development, I created a new topic under that forum. I feel like this way it's going to be easier for other users with similar issues to find information about it.\\n\\nviewtopic.php?f=8&t=2046\", \"post_time\": \"2015-12-09 19:24:44\" },\n\t{ \"post_id\": 8778, \"topic_id\": 2036, \"forum_id\": 14, \"post_subject\": \"Re: Quick ESP question(adding new services)\", \"username\": \"MauricioNO\", \"post_text\": \"Hello Rodrigo thanks for the reply.\\n\\nI did as you suggested and added another field in the search\\n\\nFrom the wsNAME.ecl\\n
\\nexport t_ComplaintSearchRequest := record\\n\\tstring zipCode {xpath('zipCode')};\\n\\tstring state {xpath('state')};\\nend;\\n
\\n\\nAnd edited the query code to look like this:(now it looks for matches of zip OR state\\n\\n//INTERFACE definition\\nIMPORT ConsumerComplaints as CC;\\nIMPORT iesp;\\n\\nrec_in := iesp.wsNAME.t_ComplaintSearchRequest;\\nFirst_row := ROW([], rec_in) : STORED('ComplaintSearchRequest', FEW);\\n\\n\\nCSVRecord := CC.LayoutComplaints;\\nIFilterArgs := INTERFACE //defines passed parameters\\n EXPORT DATASET(CSVRecord) ds;\\n EXPORT STRING searchZip;\\n EXPORT STRING searchState;\\nEND;\\nFilterLibIface2(IFilterArgs args) := INTERFACE\\n EXPORT DATASET(CSVRecord) matches;\\n// EXPORT DATASET(CSVRecord) others;\\nEND;\\n\\n//MODULE Definition\\nFilterDsLib2(IFilterArgs args) := MODULE,LIBRARY(FilterLibIface2)\\n EXPORT matches := args.ds(zipcode = args.searchzip OR state = args.searchState);\\n //EXPORT others := args.ds(zipcode != args.search); //No need to fetch data that doesnt match input criteria in this case\\nEND;\\n\\n\\n//Using the library\\nComplaints := DATASET('~MN::ProcessedComplaints', CSVRecord, Thor);\\n\\nSearchArgs := MODULE(IFilterArgs)\\n EXPORT DATASET(CSVRecord) ds := Complaints;\\n EXPORT STRING searchzip := First_row.zipcode;\\n EXPORT STRING searchstate := First_row.state;\\nEND;\\nlib3 := LIBRARY(INTERNAL(FilterDsLib2),FilterLibIface2(SearchArgs));\\n//ds_out := DATASET(lib3.matches, iesp.wsNAME.t_ComplaintSearchResponse);\\n//ds_out := ROW({lib3.matches}, iesp.wsNAME.t_ComplaintSearchResponse);\\n\\nOUTPUT(lib3.matches, NAMED('ComplaintSearchResponse'));\\n
\\n\\nAs the output is just the regular lib3.matches it looks and works fine if I go straight to :8002 and test it. I can see all of the complaints on a State and the ones on a specific zipcode.\\n\\nBut still if I uncomment the line\\n\\n//ds_out := ROW({lib3.matches}, iesp.wsNAME.t_ComplaintSearchResponse);\\n
\\nI get the same error "Initializer for field records has the wrong type".\\n\\nand if I uncomment the line\\n\\n//ds_out := DATASET(lib3.matches, iesp.wsBRureau.t_ComplaintSearchResponse);\\n
\\nI get "Error: syntax error near "t_ComplaintSearchResponse" : expected datarow, identifier, macro-name (36, 48), 3002"\\n\\nSeems like something is wrong between the data type of lib3.matches and what t_ComplaintSearchResponse expects...\", \"post_time\": \"2015-12-09 17:52:41\" },\n\t{ \"post_id\": 8776, \"topic_id\": 2036, \"forum_id\": 14, \"post_subject\": \"Re: Quick ESP question(adding new services)\", \"username\": \"rodrigo.pastrana\", \"post_text\": \"Mauricio, the issue might have to do with the ComplaintSearchRequest structure. It might be treated as a scalar since it only contains a single field and it doesn't make sense to create a ROW from the request. Try declaring First_row as a string instead of a ROW, or if you expect the request structure to be more complex, try adding another field to it.\", \"post_time\": \"2015-12-09 14:57:08\" },\n\t{ \"post_id\": 8774, \"topic_id\": 2036, \"forum_id\": 14, \"post_subject\": \"Re: Quick ESP question(adding new services)\", \"username\": \"MauricioNO\", \"post_text\": \"Hey Jim, I am having some trouble writing the support ECL code(the querty itself) for the generated ECL from my ECM definition.\\n\\nI created a service called wsNAME that will have two methods, AddThis(which is the one implemented on the pdf you linked and works fine) and ComplaintSearch.\\nComplaintSearch is supposed to fetch data from a logical file on my HPCC Platform based on a zipcode search by the user.\\n\\nThe ECM definition for the service:\\n\\nESPservice wsNAME\\n{\\n ESPmethod AddThis(AddThisRequest, AddThisResponse);\\n ESPmethod ComplaintSearch(ComplaintSearchRequest, ComplaintSearchResponse);\\n};\\n\\n//AddThis method\\nESPrequest AddThisRequest\\n{\\n int FirstNumber;\\n int SecondNumber;\\n};\\n\\nESPresponse AddThisResponse\\n{\\n int Answer;\\n};\\n\\n//ComplaintSearch Method\\nESPrequest ComplaintSearchRequest\\n{\\n\\tstring zipCode;\\n};\\n\\nESPstruct ComplaintSearchRecord\\n{\\n\\tstring complaintid;\\n\\tstring product;\\n\\tstring subproduct;\\n\\tstring issue;\\n\\tstring subissue;\\n\\tstring state; \\n\\tstring zipcode; \\n\\tstring submittedvia;\\n\\tstring datereceived;\\n\\tstring datesent;\\n\\tstring company;\\n\\tstring response;\\n\\tstring timely;\\n\\tstring disputed;\\n};\\n\\nESPresponse ComplaintSearchResponse\\n{\\n\\tESParray<ESPstruct ComplaintSearchRecord, Complaint> Records;\\n};\\n
\\n\\nThe ECL it generates:\\n\\n/*** Not to be hand edited (changes will be lost on re-generation) ***/\\n/*** ECL Interface generated by esdl2ecl version 1.0 from wsNAME.xml. ***/\\n/*===================================================*/\\n\\nexport wsNAME := MODULE\\n\\nexport t_ComplaintSearchRecord := record\\n\\tstring complaintid {xpath('complaintid')};\\n\\tstring product {xpath('product')};\\n\\tstring subproduct {xpath('subproduct')};\\n\\tstring issue {xpath('issue')};\\n\\tstring subissue {xpath('subissue')};\\n\\tstring state {xpath('state')};\\n\\tstring zipcode {xpath('zipcode')};\\n\\tstring submittedvia {xpath('submittedvia')};\\n\\tstring datereceived {xpath('datereceived')};\\n\\tstring datesent {xpath('datesent')};\\n\\tstring company {xpath('company')};\\n\\tstring response {xpath('response')};\\n\\tstring timely {xpath('timely')};\\n\\tstring disputed {xpath('disputed')};\\nend;\\n\\nexport t_AddThisRequest := record\\n\\tinteger FirstNumber {xpath('FirstNumber')};\\n\\tinteger SecondNumber {xpath('SecondNumber')};\\nend;\\n\\nexport t_ComplaintSearchRequest := record\\n\\tstring zipCode {xpath('zipCode')};\\nend;\\n\\nexport t_AddThisResponse := record\\n\\tinteger Answer {xpath('Answer')};\\nend;\\n\\nexport t_ComplaintSearchResponse := record\\n\\tdataset(t_ComplaintSearchRecord) Records {xpath('Records/Complaint'), MAXCOUNT(1)};\\nend;\\n\\n\\nend;\\n\\n/*** Not to be hand edited (changes will be lost on re-generation) ***/\\n/*** ECL Interface generated by esdl2ecl version 1.0 from wsNAME.xml. ***/\\n/*===================================================*/\\n\\n\\n
\\n\\nThe Query where the problem is happening:\\n\\n//INTERFACE definition\\nIMPORT ConsumerComplaints as CC;\\nIMPORT iesp;\\n\\nrec_in := iesp.wsNAME.t_ComplaintSearchRequest;\\nFirst_row := ROW([], rec_in) : STORED('ComplaintSearchRequest', FEW);\\n\\n\\nCSVRecord := CC.LayoutComplaints;\\nIFilterArgs := INTERFACE //defines passed parameters\\n EXPORT DATASET(CSVRecord) ds;\\n EXPORT STRING search;\\nEND;\\nFilterLibIface2(IFilterArgs args) := INTERFACE\\n EXPORT DATASET(CSVRecord) matches;\\n EXPORT DATASET(CSVRecord) others;\\nEND;\\n\\n//MODULE Definition\\nFilterDsLib2(IFilterArgs args) := MODULE,LIBRARY(FilterLibIface2)\\n EXPORT matches := args.ds(zipcode = args.search);\\n EXPORT others := args.ds(zipcode != args.search);\\nEND;\\n\\n\\n//Using the library\\nComplaints := DATASET('~MN::ProcessedComplaints', CSVRecord, Thor);\\n\\nSearchArgs := MODULE(IFilterArgs)\\n EXPORT DATASET(CSVRecord) ds := Complaints;\\n EXPORT STRING search := First_row.zipcode;\\nEND;\\nlib3 := LIBRARY(INTERNAL(FilterDsLib2),FilterLibIface2(SearchArgs));\\nds_out := ROW({lib3.matches}, iesp.wsNAME.t_ComplaintSearchResponse);\\n\\nOUTPUT(ds_out, NAMED('ComplaintSearchResponse'));\\n
\\n\\nThe error that I get when checking for errors on ECL IDE is:\\nError: Initializer for field records has the wrong type (34, 15), 3123\\n(34, 15) is this part: ds_out := ROW(
.\\n\\nDo you know what could be wrong?\\nIf I just output lib3.matches I have exactly what I want(several entries with all the fields [complaintid, product, subproduct, etc...]) so I would expect that to match what\\nt_ComplaintSearchResponse is expecting.\\n\\nAs I have never worked with Dynamic ESDL queries before any tips and directions are appreciated.\\nThank you!\", \"post_time\": \"2015-12-08 21:45:16\" },\n\t{ \"post_id\": 8752, \"topic_id\": 2036, \"forum_id\": 14, \"post_subject\": \"Re: Quick ESP question(adding new services)\", \"username\": \"JimD\", \"post_text\": \"Excellent! Please post your impressions, experiences, and any feedback here. \\n\\nThis will help us plan future enhancements.\\n\\nAll the best,\\n\\nJim\", \"post_time\": \"2015-12-07 21:08:39\" },\n\t{ \"post_id\": 8746, \"topic_id\": 2036, \"forum_id\": 14, \"post_subject\": \"Re: Quick ESP question(adding new services)\", \"username\": \"MauricioNO\", \"post_text\": \"Thanks Jim!\\n\\nI think the Dynamic ESDL will serve the purpose.\", \"post_time\": \"2015-12-07 19:51:53\" },\n\t{ \"post_id\": 8724, \"topic_id\": 2036, \"forum_id\": 14, \"post_subject\": \"Re: Quick ESP question(adding new services)\", \"username\": \"JimD\", \"post_text\": \"The easiest way to develop your own Web services to access queries is Dynamic ESDL.\\n\\nDynamic ESDL (Enterprise Service Description Language)is a methodology that helps you develop and manage web-based query interfaces quickly and consistently.\\n\\nDynamic ESDL takes an interface-first development approach. It leverages the ESDL Language to create a common interface “contract” that both Roxie Query and Web interface developers will adhere to. It is intended to allow developers to create production web services, with clean interfaces that can evolve and grow over time without breaking existing applications.\\n\\nhttps://hpccsystems.com/download/docume ... namic-esdl\\n\\nAn alternative method is to use WsECL to expose your queries, then write SOAP or RESTful applications to consume them. This is shown in the Accessing your Roxie Queries section of Using Roxie.\\nhttp://cdn.hpccsystems.com/releases/CE- ... df#page=36\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2015-12-04 13:43:22\" },\n\t{ \"post_id\": 8718, \"topic_id\": 2036, \"forum_id\": 14, \"post_subject\": \"Quick ESP question(adding new services)\", \"username\": \"MauricioNO\", \"post_text\": \"Hi, I just need to clear something up.\\n\\nSay I have the OSS ESP installed and I would like to create a new webservice ws_WSname and then create a new method wsm_methodname that will hit a query published in a(any) Roxie(granted I configure the esp.xml accordingly).\\n\\nCan this even be done on the OSS ESP? Does it have what it takes to handle Roxie requests?\\nIf not, can this be done on the Purchasable module ESP?\\n\\nThanks.\", \"post_time\": \"2015-12-03 22:59:23\" },\n\t{ \"post_id\": 8928, \"topic_id\": 2066, \"forum_id\": 14, \"post_subject\": \"Re: eclcc -- platform.h not found\", \"username\": \"DSC\", \"post_text\": \"Vince, for what it's worth, I followed your example with a simple hello world .ecl file and it worked. I did have to manually fix the libraries as I described earlier, and the performance was far from stellar (apparently due to a call to gethostbyname), but I actually saw 'Hello, World!' as a command-line output. The difference, perhaps, may be that the last round of updates to my OS X (10.11.2) included Xcode updates and something changed there.\\n\\nAgain, if running ECL code on your Mac client is important then I would suggest opening a Jira ticket.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2015-12-18 16:59:52\" },\n\t{ \"post_id\": 8910, \"topic_id\": 2066, \"forum_id\": 14, \"post_subject\": \"Re: eclcc -- platform.h not found\", \"username\": \"vin\", \"post_text\": \"Further update. I copied the platform.h file from github to the suggested directory on my Ubuntu 14.04 VM and the compile succeeded.\\n\\nI would still like to resolve this on OS X but at least it is not blocking.\\n\\nThanks,\\n+vince\", \"post_time\": \"2015-12-17 22:24:59\" },\n\t{ \"post_id\": 8908, \"topic_id\": 2066, \"forum_id\": 14, \"post_subject\": \"Re: eclcc -- platform.h not found\", \"username\": \"vin\", \"post_text\": \"Dan, thanks for the help. However, I didn't get very far.\\n\\nIn step 1, I downloaded platform.h from github into the destination directory you suggested.\\n\\nFor step 2 I compiled hello.ecl -- just OUTPUT('Hello, World!');\\n\\nBut this failed with the following compiler errors.\\n$ eclcc -o hello hello.ecl\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/platform.h(1,1): error C6003: expected unqualified-id\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/jscm.hpp(187,31): error C6003: expected class name\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/jscm.hpp(192,13): error C6003: unknown type name 'IInterface'\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/jscm.hpp(193,13): error C6003: unknown type name 'IInterface'\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/jscm.hpp(197,32): error C6003: expected class name\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/jscm.hpp(209,9): error C6003: unknown type name 'IInterface'\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/jscm.hpp(210,15): error C6003: unknown type name 'IInterface'; did you mean 'IInterfacePtr'?\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/jscm.hpp(211,16): error C6003: unknown type name 'IInterface'; did you mean 'IInterfacePtr'?\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/jscm.hpp(235,48): error C6003: unknown class name 'IInterface'; did you mean 'IInterfacePtr'?\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/jscm.hpp(235,48): error C6003: base specifier must name a class\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/eclrtl.hpp(76,28): error C6003: unknown type name 'size32_t'; did you mean 'size_t'?\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/eclrtl.hpp(81,26): error C6003: unknown type name 'size32_t'; did you mean 'size_t'?\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/eclrtl.hpp(81,59): error C6003: unknown type name 'size32_t'; did you mean 'size_t'?\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/eclrtl.hpp(81,92): error C6003: unknown type name 'size32_t'; did you mean 'size_t'?\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/eclrtl.hpp(82,62): error C6003: unknown type name 'size32_t'; did you mean 'size_t'?\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/eclrtl.hpp(82,77): error C6003: unknown type name 'size32_t'; did you mean 'size_t'?\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/eclrtl.hpp(89,28): error C6003: unknown type name 'size32_t'; did you mean 'size_t'?\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/eclrtl.hpp(94,26): error C6003: unknown type name 'size32_t'; did you mean 'size_t'?\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/eclrtl.hpp(94,60): error C6003: unknown type name 'size32_t'; did you mean 'size_t'?\\nhello(0,0): error C3000: Compile/Link failed for hello (see '//10.139.66.21/Users/vin/Work/ecl/eclcc.log' for details)\\n\\n---------- compiler output --------------\\nIn file included from hello.cpp:2:\\nIn file included from /opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/eclinclude4.hpp:59:\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/platform.h:1:1: error: expected unqualified-id\\n--2015-12-17 15:53:25-- https://raw.githubusercontent.com/hpcc-systems/HPCC-Platform/master/system/include/platform.h\\n^\\nIn file included from hello.cpp:2:\\nIn file included from /opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/eclinclude4.hpp:62:\\nIn file included from /opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/eclrtl.hpp:21:\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/jscm.hpp:187:31: error: expected class name\\ninterface IIterator : extends IInterface\\n ^\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/jscm.hpp:192:13: error: unknown type name 'IInterface'\\n virtual IInterface & query() = 0;\\n ^\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/jscm.hpp:193:13: error: unknown type name 'IInterface'\\n virtual IInterface & get() = 0;\\n ^\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/jscm.hpp:197:32: error: expected class name\\ninterface IIteratorOf : public IInterface\\n ^\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/jscm.hpp:209:9: error: unknown type name 'IInterface'\\ntypedef IInterface * IInterfacePtr;\\n ^\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/jscm.hpp:210:15: error: unknown type name 'IInterface'; did you mean 'IInterfacePtr'?\\ntypedef Owned<IInterface> OwnedIInterface;\\n ^~~~~~~~~~\\n IInterfacePtr\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/jscm.hpp:209:22: note: 'IInterfacePtr' declared here\\ntypedef IInterface * IInterfacePtr;\\n ^\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/jscm.hpp:211:16: error: unknown type name 'IInterface'; did you mean 'IInterfacePtr'?\\ntypedef Linked<IInterface> LinkedIInterface;\\n ^~~~~~~~~~\\n IInterfacePtr\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/jscm.hpp:209:22: note: 'IInterfacePtr' declared here\\ntypedef IInterface * IInterfacePtr;\\n ^\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/jscm.hpp:235:48: error: unknown class name 'IInterface'; did you mean 'IInterfacePtr'?\\ninterface jlib_thrown_decl IException : public IInterface\\n ^~~~~~~~~~\\n IInterfacePtr\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/jscm.hpp:209:22: note: 'IInterfacePtr' declared here\\ntypedef IInterface * IInterfacePtr;\\n ^\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/jscm.hpp:235:48: error: base specifier must name a class\\ninterface jlib_thrown_decl IException : public IInterface\\n ~~~~~~~^~~~~~~~~~\\nIn file included from hello.cpp:2:\\nIn file included from /opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/eclinclude4.hpp:62:\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/eclrtl.hpp:76:28: error: unknown type name 'size32_t'; did you mean 'size_t'?\\n virtual void getMatchX(size32_t & outlen, char * & out, unsigned n = 0) const = 0;\\n ^~~~~~~~\\n size_t\\n/usr/include/sys/_types/_size_t.h:30:32: note: 'size_t' declared here\\ntypedef __darwin_size_t size_t;\\n ^\\nIn file included from hello.cpp:2:\\nIn file included from /opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/eclinclude4.hpp:62:\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/eclrtl.hpp:81:26: error: unknown type name 'size32_t'; did you mean 'size_t'?\\n virtual void replace(size32_t & outlen, char * & out, size32_t slen, char const * str, size32_t rlen, char const * replace) const = 0;\\n ^~~~~~~~\\n size_t\\n/usr/include/sys/_types/_size_t.h:30:32: note: 'size_t' declared here\\ntypedef __darwin_size_t size_t;\\n ^\\nIn file included from hello.cpp:2:\\nIn file included from /opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/eclinclude4.hpp:62:\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/eclrtl.hpp:81:59: error: unknown type name 'size32_t'; did you mean 'size_t'?\\n virtual void replace(size32_t & outlen, char * & out, size32_t slen, char const * str, size32_t rlen, char const * replace) const = 0;\\n ^~~~~~~~\\n size_t\\n/usr/include/sys/_types/_size_t.h:30:32: note: 'size_t' declared here\\ntypedef __darwin_size_t size_t;\\n ^\\nIn file included from hello.cpp:2:\\nIn file included from /opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/eclinclude4.hpp:62:\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/eclrtl.hpp:81:92: error: unknown type name 'size32_t'; did you mean 'size_t'?\\n virtual void replace(size32_t & outlen, char * & out, size32_t slen, char const * str, size32_t rlen, char const * replace) const = 0;\\n ^~~~~~~~\\n size_t\\n/usr/include/sys/_types/_size_t.h:30:32: note: 'size_t' declared here\\ntypedef __darwin_size_t size_t;\\n ^\\nIn file included from hello.cpp:2:\\nIn file included from /opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/eclinclude4.hpp:62:\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/eclrtl.hpp:82:62: error: unknown type name 'size32_t'; did you mean 'size_t'?\\n virtual IStrRegExprFindInstance * find(const char * str, size32_t from, size32_t len, bool needToKeepSearchString) const = 0;\\n ^~~~~~~~\\n size_t\\n/usr/include/sys/_types/_size_t.h:30:32: note: 'size_t' declared here\\ntypedef __darwin_size_t size_t;\\n ^\\nIn file included from hello.cpp:2:\\nIn file included from /opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/eclinclude4.hpp:62:\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/eclrtl.hpp:82:77: error: unknown type name 'size32_t'; did you mean 'size_t'?\\n virtual IStrRegExprFindInstance * find(const char * str, size32_t from, size32_t len, bool needToKeepSearchString) const = 0;\\n ^~~~~~~~\\n size_t\\n/usr/include/sys/_types/_size_t.h:30:32: note: 'size_t' declared here\\ntypedef __darwin_size_t size_t;\\n ^\\nIn file included from hello.cpp:2:\\nIn file included from /opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/eclinclude4.hpp:62:\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/eclrtl.hpp:89:28: error: unknown type name 'size32_t'; did you mean 'size_t'?\\n virtual void getMatchX(size32_t & outlen, UChar * & out, unsigned n = 0) const = 0;\\n ^~~~~~~~\\n size_t\\n/usr/include/sys/_types/_size_t.h:30:32: note: 'size_t' declared here\\ntypedef __darwin_size_t size_t;\\n ^\\nIn file included from hello.cpp:2:\\nIn file included from /opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/eclinclude4.hpp:62:\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/eclrtl.hpp:94:26: error: unknown type name 'size32_t'; did you mean 'size_t'?\\n virtual void replace(size32_t & outlen, UChar * & out, size32_t slen, UChar const * str, size32_t rlen, UChar const * replace) const = 0;\\n ^~~~~~~~\\n size_t\\n/usr/include/sys/_types/_size_t.h:30:32: note: 'size_t' declared here\\ntypedef __darwin_size_t size_t;\\n ^\\nIn file included from hello.cpp:2:\\nIn file included from /opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/eclinclude4.hpp:62:\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/eclrtl.hpp:94:60: error: unknown type name 'size32_t'; did you mean 'size_t'?\\n virtual void replace(size32_t & outlen, UChar * & out, size32_t slen, UChar const * str, size32_t rlen, UChar const * replace) const = 0;\\n ^~~~~~~~\\n size_t\\n/usr/include/sys/_types/_size_t.h:30:32: note: 'size_t' declared here\\ntypedef __darwin_size_t size_t;\\n ^\\nfatal error: too many errors emitted, stopping now [-ferror-limit=]\\n20 errors generated.\\nApple LLVM version 7.0.2 (clang-700.1.81)\\nTarget: x86_64-apple-darwin15.2.0\\nThread model: posix\\nclang: error: no such file or directory: 'hello.cpp.o'\\n\\n--------- end compiler output -----------\\n20 errors, 0 warning\\n
\\n\\nThanks,\\n+vince\", \"post_time\": \"2015-12-17 21:09:20\" },\n\t{ \"post_id\": 8898, \"topic_id\": 2066, \"forum_id\": 14, \"post_subject\": \"Re: eclcc -- platform.h not found\", \"username\": \"DSC\", \"post_text\": \"I recreated this problem, but since I didn't know what your ECL looked like I just used a random sample I had lying around. More on that later.\\n\\nIn my run I had to perform several steps manually to get a working executable (this is on a fully-updated 10.11.2 OS X):\\n\\n1) The platform.h file is indeed missing. The file can be found in the HPCC Platform's source tree, which is at https://github.com/hpcc-systems/HPCC-Platform. The file is within the system/include subdirectory within the source tree. So, once you've obtained the source, copy the file to the right place in the client tools directory:\\n\\n: sudo cp HPCC-Platform/system/include/platform.h /opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include
\\n2) Compile the file as before (my executable will be named foo):\\n\\n: eclcc -o foo RandomCode.ecl
\\n3) If the code compiles it probably now has a problem finding some shared libraries to link against when launched. Here is what I found:\\n\\n: otool -L foo\\nfoo:\\n\\t@loader_path/../lib/libeclrtl.dylib (compatibility version 0.0.0, current version 0.0.0)\\n\\t@loader_path/../lib/libhthor.dylib (compatibility version 0.0.0, current version 0.0.0)\\n\\t/usr/lib/libc++.1.dylib (compatibility version 1.0.0, current version 120.1.0)\\n\\t/usr/lib/libSystem.B.dylib (compatibility version 1.0.0, current version 1226.10.1)
\\nThe two libraries with '@loader_path' will not be found. You can correct them one at a time:\\n\\n: install_name_tool -change @loader_path/../lib/libeclrtl.dylib /opt/HPCCSystems/5.4.6/clienttools/lib/libeclrtl.dylib foo\\n\\n: install_name_tool -change @loader_path/../lib/libhthor.dylib /opt/HPCCSystems/5.4.6/clienttools/lib/libhthor.dylib foo
\\nNow the libraries look right:\\n\\n: otool -L foo\\nfoo:\\n\\t/opt/HPCCSystems/5.4.6/clienttools/lib/libeclrtl.dylib (compatibility version 0.0.0, current version 0.0.0)\\n\\t/opt/HPCCSystems/5.4.6/clienttools/lib/libhthor.dylib (compatibility version 0.0.0, current version 0.0.0)\\n\\t/usr/lib/libc++.1.dylib (compatibility version 1.0.0, current version 120.1.0)\\n\\t/usr/lib/libSystem.B.dylib (compatibility version 1.0.0, current version 1226.10.1)
\\nAt this point you should have a runnable executable.\\n\\nHowever:\\n\\nCreating executables from ECL is something that really happens on the cluster platform rather than on a client system. The HPCC platform is officially supported on a number of operating systems (mostly Linux), but OS X isn't one of them. Your executable may or may not work; it depends on the ECL. My example did actually run, and it used SOAPCALL and several aggregations.\\n\\nI would encourage you to open a Jira ticket (https://track.hpccsystems.com/) if this feature is important to you.\\n\\nHope this helps!\\n\\nDan\", \"post_time\": \"2015-12-17 18:51:00\" },\n\t{ \"post_id\": 8896, \"topic_id\": 2066, \"forum_id\": 14, \"post_subject\": \"Re: eclcc -- platform.h not found\", \"username\": \"vin\", \"post_text\": \"Not there.\\n\\nOn Linux:\\n $ cat find\\n$ ls /opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/\\neclhelper_base.hpp eclrtl.hpp rtlbcd.hpp rtlfield_imp.hpp\\neclhelper.hpp eclrtl_imp.hpp rtldistr.hpp rtlkey.hpp\\neclinclude4.hpp jscm.hpp rtlds_imp.hpp
\\n\\nOn Mac:\\n$ ls /opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include\\neclhelper.hpp eclinclude4.hpp eclrtl_imp.hpp rtlbcd.hpp rtlds_imp.hpp rtlkey.hpp\\neclhelper_base.hpp eclrtl.hpp jscm.hpp rtldistr.hpp rtlfield_imp.hpp\\n
\\n\\nNote: those are the same set of files; display is not because the terminal widths and sort order are different.\", \"post_time\": \"2015-12-17 16:38:34\" },\n\t{ \"post_id\": 8894, \"topic_id\": 2066, \"forum_id\": 14, \"post_subject\": \"Re: eclcc -- platform.h not found\", \"username\": \"vin\", \"post_text\": \"Since my post I installed client tools on an Ubuntu 14.04 VM and confirmed the same error on Ubuntu.\\n\\nNo, I haven't seen that document. I installed the dependencies indicated in on the wiki page for both Mac and Linux. The problem persists.\\n\\nThe C++ compiler cannot find a kernel header file. Debugging the Linux VM first. The file exists:\\n\\n$ find /usr/src/linux-headers-3.19.0-25/arch -name platform.h\\n/usr/src/linux-headers-3.19.0-25/arch/mips/include/asm/mach-loongson1/platform.h\\n/usr/src/linux-headers-3.19.0-25/arch/mips/include/asm/mach-jz4740/platform.h\\n/usr/src/linux-headers-3.19.0-25/arch/xtensa/include/asm/platform.h\\n/usr/src/linux-headers-3.19.0-25/arch/arm/mach-lpc32xx/include/mach/platform.h\\n/usr/src/linux-headers-3.19.0-25/arch/arm/mach-ep93xx/include/mach/platform.h\\n/usr/src/linux-headers-3.19.0-25/arch/arm/mach-versatile/include/mach/platform.h\\n/usr/src/linux-headers-3.19.0-25/arch/arm/mach-realview/include/mach/platform.h\\n/usr/src/linux-headers-3.19.0-25/arch/arm/mach-ixp4xx/include/mach/platform.h\\n
\\n\\nCould this be a missing path or env var? It is (was) a virgin install of 14.04 in a guest VM.\", \"post_time\": \"2015-12-17 16:34:00\" },\n\t{ \"post_id\": 8889, \"topic_id\": 2066, \"forum_id\": 14, \"post_subject\": \"Re: eclcc -- platform.h not found\", \"username\": \"richardkchapman\", \"post_text\": \"It should have been installed to the same location as eclinclude4.hpp, i.e. /opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include\\n\\nRichard\", \"post_time\": \"2015-12-17 15:04:33\" },\n\t{ \"post_id\": 8874, \"topic_id\": 2066, \"forum_id\": 14, \"post_subject\": \"eclcc -- platform.h not found\", \"username\": \"vin\", \"post_text\": \"Installed HPCC Client tool on macbook, OS X 10.11.1 (El Capitan). Attempted to compile but "platform.h" could not be found.\\n\\n$ eclcc -o hello hello.ecl\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/eclinclude4.hpp:59(10,0): error C6003: fatal error: 'platform.h' file not found\\nhello(0,0): error C3000: Compile/Link failed for hello (see '//192.168.2.5/Users/vin/Work/ecl/eclcc.log' for details)\\n\\n---------- compiler output --------------\\nIn file included from hello.cpp:2:\\n/opt/HPCCSystems/5.4.6/clienttools/componentfiles/cl/include/eclinclude4.hpp:59:10: fatal error: 'platform.h' file not found\\n#include "platform.h"\\n ^\\n1 error generated.\\nApple LLVM version 7.0.2 (clang-700.1.81)\\nTarget: x86_64-apple-darwin15.0.0\\nThread model: posix\\nclang: error: no such file or directory: 'hello.cpp.o'\\n\\n--------- end compiler output -----------\\n2 errors, 0 warning
\\n\\nFound one such file:\\n\\n$ locate platform.h\\n/System/Library/Frameworks/OpenCL.framework/Versions/A/Headers/cl_platform.h\\n/System/Library/Frameworks/Tcl.framework/Versions/8.5/Resources/Documentation/Reference/Tcl/TclCmd/platform.htm\\n/usr/include/tidy/platform.h\\n
\\n\\nNot sure this is the file I need and don't know how to get it included.\\n\\nDuring the install process the only step was opening the Mac dmg. Had to search the drive to find the bin directory holding the executables. Then in order to get to this error I had to resolve two missing libraries (libssl and libcrypto.\\n\\nI cannot find any instructions on installing client tools. If there are such instructions, can some one point me to them? If not any ideas what steps to take?\\n\\nThanks,\\n+vince\", \"post_time\": \"2015-12-17 03:10:17\" },\n\t{ \"post_id\": 9132, \"topic_id\": 2126, \"forum_id\": 14, \"post_subject\": \"Re: Failed to Publish ESDL Service definition. Permission de\", \"username\": \"rodrigo.pastrana\", \"post_text\": \"Great to hear! Thanks for letting us know.\", \"post_time\": \"2016-01-22 15:06:37\" },\n\t{ \"post_id\": 9128, \"topic_id\": 2126, \"forum_id\": 14, \"post_subject\": \"Re: Failed to Publish ESDL Service definition. Permission de\", \"username\": \"JimD\", \"post_text\": \"I believe that when a new option (organizational unit or OU) is added to LDAP, the default is to allows access to member of the Authenticated Users group.\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2016-01-22 13:50:28\" },\n\t{ \"post_id\": 9126, \"topic_id\": 2126, \"forum_id\": 14, \"post_subject\": \"Re: Failed to Publish ESDL Service definition. Permission de\", \"username\": \"longly\", \"post_text\": \"Thanks Rodrigo. Your solution works perfectly. But we only need to add that ESDLConfigAccess. It will work even if I don't grant that permission for the account.\", \"post_time\": \"2016-01-22 06:31:29\" },\n\t{ \"post_id\": 9122, \"topic_id\": 2126, \"forum_id\": 14, \"post_subject\": \"Re: Failed to Publish ESDL Service definition. Permission de\", \"username\": \"rodrigo.pastrana\", \"post_text\": \"Hi, if the problem you're encountering is indeed related to https://track.hpccsystems.com/browse/HPCC-14673, I have a work-around for you to use until the fix is rolled out.\\nI'm not sure if it is related because I didn't see a "Permission denied" entry in your logs.\\n\\nAt any rate, the work-around is to manually create an "ESP Features for SMC" permission named "ESDLConfigAccess" and assign it to your user.\\n\\nYou have to be logged on to eclwatch as an Admin user, under "operations" -> "Security" -> "Permissions" -> "Add"\\nType: "ESP Features for SMC"\\nName: "ESDLConfigAccess"\\n\\nThen, under "operations" -> "Security" -> "Users" -> choose the target user -> right click -> "Edit" -> "User Permissions" tab -> find ESDLConfigAccess and check "allow full", and save it.\\n\\nThe user should now be able to utilize wsesdlconfig features.\", \"post_time\": \"2016-01-21 15:24:55\" },\n\t{ \"post_id\": 9116, \"topic_id\": 2126, \"forum_id\": 14, \"post_subject\": \"Re: Failed to Publish ESDL Service definition. Permission de\", \"username\": \"longly\", \"post_text\": \"Thanks Rodrigo. I have the same problem with both normal account and admin account.\\nBest Regards,\\nLong Ly\", \"post_time\": \"2016-01-21 04:26:29\" },\n\t{ \"post_id\": 9114, \"topic_id\": 2126, \"forum_id\": 14, \"post_subject\": \"Re: Failed to Publish ESDL Service definition. Permission de\", \"username\": \"rodrigo.pastrana\", \"post_text\": \"This looks to be related to https://track.hpccsystems.com/browse/HPCC-14673 which has been resolved and should be made available soon. Unfortunately there is no documented work-around for issue (does your user account belong to admin group?).\\n\\nWe're trying to push this fix for the new platform release, hopefully this doesn't cause a huge inconvenience until it's made available, thanks.\", \"post_time\": \"2016-01-20 17:14:18\" },\n\t{ \"post_id\": 9106, \"topic_id\": 2126, \"forum_id\": 14, \"post_subject\": \"Failed to Publish ESDL Service definition. Permission denied\", \"username\": \"longly\", \"post_text\": \"Hi All,\\nI have a 5.4.6 HPCC cluster with 4 nodes(the problem happen for 5.4.2 too, but it is ok on other server without LDAP). The cluster use LDAP to authenticate. Every is fine except that when I try to use esdl to publish a service. Please let me know how to fix it. I already grant that user all the permissions.\\n\\nesdl publish AManagerWs AManager.ecm -u xx --password xxx -s x.x.x.x -p 8010 --version 1\\n\\nTime taken for adding XML ESDL definition: 5127608 cycles (5M) = 2 msec\\nTime taken for EsdlDefinition::getDependencies: 930016 cycles (0M) = 0 msec\\nTime taken for serializing EsdlObjects to XML: 345744 cycles (0M) = 0 msec\\njsocket(9,2258) shutdown err = 57 : C!192.168.22.150\\n\\nException(s):\\n20009: 2016-01-20 08:12:10 GMT: Failed to Publish ESDL Service definition. Permission denied.\\n\\nBelow is my esp log :\\n\\n/var/log/HPCCSystems/myesp/esp_main.2016_01_20.log:000002AE 2016-01-20 12:40:41.940 41062 41796 "HTTP First Line: POST /WsESDLConfig HTTP/1.1"\\n/var/log/HPCCSystems/myesp/esp_main.2016_01_20.log:000002AF 2016-01-20 12:40:41.941 41062 41796 "SOAP method <ListESDLBindingsRequest> from admin@127.0.0.1."\\n/var/log/HPCCSystems/myesp/esp_main.2016_01_20.log:000002B1 2016-01-20 12:40:41.941 41062 41796 "WARNING: Exception(s) in WsESDLConfig::ListESDLBindings - [ -1: Unable to connect to ESDL Service definition information in dali '/ESDL/Definitions/'] \\n/var/log/HPCCSystems/myesp/esp_main.2016_01_20.log:000002D5 2016-01-20 12:51:50.607 41062 41062 " /opt/HPCCSystems/lib/libesdl_svc_engine.so(_ZN15EsdlBindingImpl24CESDLBindingSubscriptionD0Ev+0x42) [0x7fddeafa4ae2]"\\n/var/log/HPCCSystems/myesp/esp_main.2016_01_20.log:000002D6 2016-01-20 12:51:50.607 41062 41062 " /opt/HPCCSystems/lib/libesdl_svc_engine.so(_ZNK15EsdlBindingImpl24CESDLBindingSubscription7ReleaseEv+0x53) [0x7fddeafa5943]"\\n/var/log/HPCCSystems/myesp/esp_main.2016_01_20.log:000002EA 2016-01-20 12:51:50.607 41062 41062 " /opt/HPCCSystems/lib/libesdl_svc_engine.so(_ZN15EsdlBindingImpl27CESDLDefinitionSubscriptionD0Ev+0x42) [0x7fddeafa4bb2]"\\n/var/log/HPCCSystems/myesp/esp_main.2016_01_20.log:000002EB 2016-01-20 12:51:50.607 41062 41062 " /opt/HPCCSystems/lib/libesdl_svc_engine.so(_ZNK15EsdlBindingImpl27CESDLDefinitionSubscription7ReleaseEv+0x53) [0x7fddeafa5803]"\\n/var/log/HPCCSystems/myesp/esp_main.2016_01_20.log:0000012D 2016-01-20 12:52:17.779 43600 43600 "ESDL Binding AManagerWs is subscribing to all /ESDL/Bindings/Binding dali changes"\\n/var/log/HPCCSystems/myesp/esp_main.2016_01_20.log:0000012E 2016-01-20 12:52:17.779 43600 43600 "ESDL Binding AManagerWs is subscribing to all /ESDL/Bindings/Definition dali changes"\\n/var/log/HPCCSystems/myesp/esp_main.2016_01_20.log:0000012F 2016-01-20 12:52:17.779 43600 43600 "Unable to connect to ESDL Service binding information in dali /ESDL/Bindings/"\\n/var/log/HPCCSystems/myesp/esp_main.2016_01_20.log:00000130 2016-01-20 12:52:17.779 43600 43600 "ESDL Binding: Could not fetch ESDL binding AManagerWs for ESP Process myesp"\\n/var/log/HPCCSystems/myesp/esp_main.2016_01_20.log:00000146 2016-01-20 12:52:17.780 43600 43600 "ESDL Binding: adding service 'DynamicESDL' on host 0.0.0.0 and port 8003 on AManagerWs binding."\\n/var/log/HPCCSystems/myesp/esp_main.2016_01_20.log:00000147 2016-01-20 12:52:17.780 43600 43600 "ESDL Binding: Error adding service 'DynamicESDL': ESDL binding configuration not available"\", \"post_time\": \"2016-01-20 08:32:12\" },\n\t{ \"post_id\": 9130, \"topic_id\": 2128, \"forum_id\": 14, \"post_subject\": \"Re: Upgrading from 5.0.16-1 to 5.4.6-1 -- Dependencies issue\", \"username\": \"ming\", \"post_text\": \"Could you try "yum update --nogpgcheck -y <local package file name>"?\\nOn HPCC 5.4.6-1 we implemented automatically resolve dependencies with yum on CentOS.\", \"post_time\": \"2016-01-22 13:51:52\" },\n\t{ \"post_id\": 9120, \"topic_id\": 2128, \"forum_id\": 14, \"post_subject\": \"Upgrading from 5.0.16-1 to 5.4.6-1 -- Dependencies issue\", \"username\": \"Bhanu Pratap Singh Sikarwar\", \"post_text\": \"Hi Team \\n\\nWe have 2 different HPCC server setups :\\nHPCC Server 1 has centos 6.5 installed on all the nodes (master + slaves) \\nHPCC Server 2 has centos 6.3 installed on all the nodes (master + slaves) \\n \\nWe have upgraded the HPCC server 1 to 5.4.6-1.\\n\\nBut when we are trying to upgrade the Server 2, we were able to upgrade only the master node (which runs on Centos 6.3). \\nThe upgradation on subsequent node (slave - which also runs on centos 6.3) runs into errors that a particular dependency is not on the node .\\nWe have already downloaded these dependencies :\\n\\ndevice-mapper-1.02.67-2.el5.i386.rpm \\nglibc-2.17-105.el7.x86_64.rpm \\nlibsepol-2.1.9-3.el7.i686.rpm\\ne2fsprogs-libs-1.39-37.el5.i386.rpm \\nglibc-common-2.17-105.el7.x86_64.rpm \\nmcstrans-0.3.4-5.el7.x86_64.rpm\\nglibc-2.17-105.el7.i686.rpm \\nlibselinux-1.33.4-5.7.el5.i386.rpm \\nopenssl-1.0.1e-42.el6.i686.rpm
\\n\\nBut it does not stop here and it keeps asking for more and more dependencies.\\n\\nCould someone help us understand the reason for these errors??\\n\\nRegards\\nBhanu Pratap\", \"post_time\": \"2016-01-21 13:12:28\" },\n\t{ \"post_id\": 9292, \"topic_id\": 2170, \"forum_id\": 14, \"post_subject\": \"Core File are getting generated every minute\", \"username\": \"Bhanu Pratap Singh Sikarwar\", \"post_text\": \"Hi,\\n\\nWe have a 8 node HPCC cluster in CentOS 6.3 and was having 5.0.16-1 version installed in it. \\nBut the issue was that the myroxie was restarting every minute and each time when it restarted , it was generating a core(dump) file. \\n\\nWe have found that the below log is entered every minute in /var/lib/HPCCSystems/myroxie/<date>.stderr file.\\n"Removing roxie.sentinel and loading topology file roxietopology.xml"\\n\\nBecause of this dump file generation every minute, we run out of storage space very often.\\n\\nAnyway now we have installed 5.4.6-1 version but We are curious to know the reason behind this behavior i.e. the generation of core(dump) files?\\n\\nRegards,\\nBhanu Pratap\", \"post_time\": \"2016-03-03 07:38:47\" },\n\t{ \"post_id\": 9722, \"topic_id\": 2306, \"forum_id\": 14, \"post_subject\": \"Re: Migrating data from one hpcc to a 2nd\", \"username\": \"JimD\", \"post_text\": \"The communication from your browser to the ESP is encrypted, however, I am not certain about the path of the data copy. \\n\\nI will discuss with developers to make sure and let you know more.\\n\\nJim\", \"post_time\": \"2016-06-08 18:01:45\" },\n\t{ \"post_id\": 9720, \"topic_id\": 2306, \"forum_id\": 14, \"post_subject\": \"Re: Migrating data from one hpcc to a 2nd\", \"username\": \"JimD\", \"post_text\": \"All of those tools have the option to run using SSL (or TLS as it is now known) but that requires an ESP server configured to use HTTPS for ECL Watch.\\n\\nIt would need a certificate installed, too. \\n\\nOnce this is set up, you would either:\\n* access ECL Watch using the HTTPS URL or\\n* Specify the ECLWatch server URL using HTTPS in DFUPlus or\\n* Specify the HTTPS protocol in the espserverIPport parameter for STD.File.Copy\\n\\nConfiguring ESP server to use SSL is detailed in the Installing and Running guide.\\nhttp://cdn.hpccsystems.com/releases/CE- ... df#page=79\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2016-06-08 17:02:39\" },\n\t{ \"post_id\": 9718, \"topic_id\": 2306, \"forum_id\": 14, \"post_subject\": \"Re: Migrating data from one hpcc to a 2nd\", \"username\": \"tlhumphrey2\", \"post_text\": \"Jim, Thanks.\\n\\nDo you know if the data is encrypted, i.e. ssl, when the copy is done? This is important, since the internet will be used to connect the 2 systems.\", \"post_time\": \"2016-06-08 16:47:59\" },\n\t{ \"post_id\": 9716, \"topic_id\": 2306, \"forum_id\": 14, \"post_subject\": \"Re: Migrating data from one hpcc to a 2nd\", \"username\": \"JimD\", \"post_text\": \"ECL Watch has a function called Remote Copy. This allows you to copy from one environment to another. \\n\\n"DFUPlus copy" also provides the ability to remote copy by specifying a srcdali.\\n\\nSTD.File.Copy has a similar parameter -- sourceDali . You might also look at STD.File.RemotePull. \\n\\nHTH,\\n\\nJim\", \"post_time\": \"2016-06-08 16:03:01\" },\n\t{ \"post_id\": 9712, \"topic_id\": 2306, \"forum_id\": 14, \"post_subject\": \"Migrating data from one hpcc to a 2nd\", \"username\": \"tlhumphrey2\", \"post_text\": \"Is there a tool for transferring THOR files from on hpcc system to another? If so, what is its name? Also, does the tool use SSL?\", \"post_time\": \"2016-06-08 12:58:37\" },\n\t{ \"post_id\": 9738, \"topic_id\": 2314, \"forum_id\": 14, \"post_subject\": \"Re: Differences in 2 environment.xml\", \"username\": \"JimD\", \"post_text\": \"The Roxie setting (Environment.Software.RoxieCluster:copyResources) is explained here:\\n\\nhttp://cdn.hpccsystems.com/releases/CE- ... df#page=10\\n\\nIt is basically a setting telling it how to handle data when a query is published. It can read data from the original location or copy it to the Roxie itself (or a combination of both)\\n\\nIn a single node implementation, using remote makes the most sense. In a production setting, copying to the Roxie is usually best.\\n\\nThe Sasha port is used to communicate with other components. Each component uses a port or a range of ports. \\n\\nHTH,\\n\\nJim\", \"post_time\": \"2016-06-10 19:30:34\" },\n\t{ \"post_id\": 9736, \"topic_id\": 2314, \"forum_id\": 14, \"post_subject\": \"Re: Differences in 2 environment.xml\", \"username\": \"tlhumphrey2\", \"post_text\": \"This is another question about an environment.xml file. In one environment.xml file I saw the following assignment.Environment.Software.SashaServerProcess.Instance:port="8877"
\\nSo, what does the sasha instance use port 8877 for?\", \"post_time\": \"2016-06-10 18:32:02\" },\n\t{ \"post_id\": 9734, \"topic_id\": 2314, \"forum_id\": 14, \"post_subject\": \"Differences in 2 environment.xml\", \"username\": \"tlhumphrey2\", \"post_text\": \"Can you tell me what these differences mean?\\n\\nThe following shows differences in two environment.xml (lines beginning with "<" come from the 1st environment.xml and lines beginning with ">" come from the 2nd environment.xml. The differences are shown in pairs -- one from each environment.xml file.\\n\\nFor example, the 1st pair of differences show values for umask in DtopZone. So, I'm wondering what the 2 different values for umask mean.\\n\\n--------------------------------------------\\n< Environment.Software.DropZone:umask="002"\\n---\\n> Environment.Software.DropZone:umask="022"\\n--------------------------------------------\\n\\n--------------------------------------------\\n< Environment.Software.RoxieCluster:channelsPerNode="2"\\n---\\n> Environment.Software.RoxieCluster:channelsPerNode="1"\\n--------------------------------------------\\n\\n--------------------------------------------\\n< Environment.Software.RoxieCluster:copyResources="true"\\n---\\n> Environment.Software.RoxieCluster:copyResources="false"\\n--------------------------------------------\\n\\n--------------------------------------------\\n< Environment.Software.RoxieCluster:lazyOpen="false"\\n---\\n> Environment.Software.RoxieCluster:lazyOpen="true"\\n--------------------------------------------
\", \"post_time\": \"2016-06-10 18:26:41\" },\n\t{ \"post_id\": 9744, \"topic_id\": 2320, \"forum_id\": 14, \"post_subject\": \"Time synchronization problem\", \"username\": \"subba\", \"post_text\": \"Problem background: We have a 10 node cluster up and running. But the date is not in sync with the actual date. It is about 10 hours ahead. Since whenever I wanted to know the time it took for a particular job to complete, I had to do a bit of arithmetic, I have reset the date on ONLY the master node. That was completely thoughtless.\\n\\nAfter the time reset on master node, the cluster seem to behave differently. It runs some jobs. Some jobs go into blocked mode forever.\\n\\nIt is a production cluster. Has 100s of datasets sprayed. Has many superfiles. It has been alive for months.\\n\\nCouple of questions that I have:\\n1) What is the impact of the change?\\n1) What are the options available so that nothing on the cluster gets lost?\\n\\nThanks in advance,\\nsubba\", \"post_time\": \"2016-06-13 16:35:53\" },\n\t{ \"post_id\": 9784, \"topic_id\": 2326, \"forum_id\": 14, \"post_subject\": \"Re: Time sync problem on cluster\", \"username\": \"fernando\", \"post_text\": \"Having the nodes on different times may lead to all kinds of unforeseen issues.\\n\\nIt would be best to set up a centralized time server and have the all the nodes synchronize their time to it. Keep ALL the servers including the middleware (dali, eclccserver etc,) components in-sync with the same time for best results.\\n\\nStop all the components, sync the time all the nodes, restart components.\", \"post_time\": \"2016-06-16 10:19:27\" },\n\t{ \"post_id\": 9752, \"topic_id\": 2326, \"forum_id\": 14, \"post_subject\": \"Time sync problem on cluster\", \"username\": \"subba\", \"post_text\": \"We have a 10 node production cluster. It has been alive for months and over the time accumulated hundreds of sprayed data and many superfiles.\\n\\nThe time that has been set on every node of the cluster is not in sync with the actual time. So it requires a bit of arithmetic everytime I need to look at for how long a job has been running.\\n\\nAlmost on a whim, I have reset the date ONLY on master node today. It seemed to be causing some issues. Some jobs run. But some jobs seem to be blocked forever. And the time change seemed to be the cause behind it.\\n\\nI could not reset the date to original as I do not have it. I could take time from one of the slave nodes and use it to set the time on master but I doubt if it solves the problem.\\n\\nFew questions in this regard:\\n1) What is the impact of date reset on one node of the cluster?\\n2) Would I lose any of the work that has been done so far if I had to reconfigure the cluster?\\n3) Are there solutions that can ensure no loss of data?\\n\\nThanks in advance,\\nsubba\", \"post_time\": \"2016-06-14 04:53:53\" },\n\t{ \"post_id\": 12373, \"topic_id\": 2340, \"forum_id\": 14, \"post_subject\": \"Re: Mythor not starting after 6.0.0-x Install\", \"username\": \"rqg0717\", \"post_text\": \"Dear all,\\n\\nI have encounted the same issue. After "sudo /opt/HPCCSystems/sbin/hpcc-run.sh -a hpcc-init start" called, all components can be started successfully, except mythor. Then I called "sudo service hpcc-init restart" on the master node and I got:\\n\\nubuntu@ubuntu01:/etc/HPCCSystems$ sudo service hpcc-init restart\\n*****************************************\\nStopping entire system for a full restart\\n*****************************************\\nStopping mythor... [ OK ]\\nStopping mysasha... [ OK ]\\nStopping myesp... [ OK ]\\nStopping myeclscheduler... [ OK ]\\nStopping myeclccserver... [ OK ]\\nStopping myeclagent... [ OK ]\\nStopping mydfuserver... [ OK ]\\nStopping mydali... [ OK ]\\n***************************************************\\nStarting the entire System\\n***************************************************\\nStarting mydali ... [ OK ]\\nStarting mydfuserver ... [ OK ]\\nStarting myeclagent ... [ OK ]\\nStarting myeclccserver ... [ OK ]\\nStarting myeclscheduler ... [ OK ]\\nStarting myesp ... [ OK ]\\nStarting mysasha ... [ OK ]\\nStarting mythor ... [ FAILED ]\\n\\n
\\n\\nI checked "/var/log/HPCCSystems/mythor/init_mythor_<timestamp>.log" and it shows: \\n\\n2016-10-24T21:09:33: Starting mythor\\n2016-10-24T21:09:33: removing any previous sentinel file\\n2016-10-24T21:09:33: Ensuring a clean working environment ...\\n2016-10-24T21:09:33: Killing slaves\\n2016-10-24T21:09:35: Error 255 in frunssh\\n2016-10-24T21:09:35: Please check /var/log/HPCCSystems/frunssh for more details\\n2016-10-24T21:09:35: Stopping mythor\\n
\\nand in "/var/log/HPCCSystems/frunssh/*.log" it shows:\\n\\n1: ssh(0):\\n2: ssh(0):\\n3: ssh(0):\\nERROR: /mnt/disk1/jenkins/workspace/CE-Candidate-6.0.6-1/CE/ubuntu-14.04-amd64/HPCC-Platform/services/runagent/frunssh.cpp(84) : frunssh : [255: ]\\nERROR: /mnt/disk1/jenkins/workspace/CE-Candidate-6.0.6-1/CE/ubuntu-14.04-amd64/HPCC-Platform/services/runagent/frunssh.cpp(84) : frunssh : [255: ]\\nERROR: /mnt/disk1/jenkins/workspace/CE-Candidate-6.0.6-1/CE/ubuntu-14.04-amd64/HPCC-Platform/services/runagent/frunssh.cpp(84) : frunssh : [255: ]\\n
\\nThe complete log files can be found in the attachment.\\n\\nPlease advise. Thank you.\\n\\nSincerely,\\nJames\", \"post_time\": \"2016-10-24 21:23:08\" },\n\t{ \"post_id\": 9900, \"topic_id\": 2340, \"forum_id\": 14, \"post_subject\": \"Re: Mythor not starting after 6.0.0-x Install\", \"username\": \"mgardner\", \"post_text\": \"Hi demills,\\n\\nAny chance you can check on the status of rsync on your system? If you can't find the rsync package in your $PATH, but the package manager says it's installed, can you please print out your $PATH for us? There could possibly be an issue with your environment variables.\\n\\nRsync should be installed as a dependency when you install the platform package. Also, did you install the package (on the slave) the standard way, with 'dpkg -i hpccsystems-platform..., apt-get install -f'? \\n\\nBest regards,\\n\\nMichael Gardner\", \"post_time\": \"2016-07-06 16:11:48\" },\n\t{ \"post_id\": 9888, \"topic_id\": 2340, \"forum_id\": 14, \"post_subject\": \"Re: Mythor not starting after 6.0.0-x Install\", \"username\": \"jsmith\", \"post_text\": \"I think rsync is installed on most distros by default, but to fix this you'll need to install it.\\n\\nOn CentOS you can install with:\\n\\nsudo yum -y install rsync\\n
\\nOn Ubuntu you can install with:\\n\\nsudo apt-get -y install rsync\\n
\", \"post_time\": \"2016-07-04 12:16:07\" },\n\t{ \"post_id\": 9886, \"topic_id\": 2340, \"forum_id\": 14, \"post_subject\": \"Re: Mythor not starting after 6.0.0-x Install\", \"username\": \"jsmith\", \"post_text\": \"Hi,\\n\\nthe slaves file will not normally exist after Thor has stopped or failed to start, since it is a temporary file that's cleared up on exit.\\n\\nIn start_slaves_01_22_2015_13_26_44.log, I notice:\\n\\n/opt/HPCCSystems/bin/start_slaves: line 77: rsync: command not found\\ncat: /var/lib/HPCCSystems/mythor/thorgroup.slave: No such file or directory\\n
\\n\\nI suspect that's the problem. It's expecting rsync to be installed, but it appears not to be. The use of rsync here isn't new to 6.0.0-x though as far as I know.\", \"post_time\": \"2016-07-04 12:07:03\" },\n\t{ \"post_id\": 9818, \"topic_id\": 2340, \"forum_id\": 14, \"post_subject\": \"Mythor not starting after 6.0.0-x Install\", \"username\": \"demills\", \"post_text\": \"Hi,\\n\\nOn recent installs of both 6.0.0-1 and 6.0.0-2, I've encountered identical problems preventing the startup of mythor, and affecting it's connections to the mythor slaves.\\nAfter a "sudo service hpcc-init restart" call, all components start successfully, except mythor. I've attached the relevant log files, but I believe the originating issues are in /var/log/HPCCSystems/frunssh/*.log, reading:ERROR: /var/lib/jenkins2/workspace/CE-Candidate-6.0.0-1/CE/ubuntu-14.04amd64/HPCC-Platform/services/runagent/frunssh.cpp(73) : frunssh : Failed to open slaves file /var/lib/HPCCSystems/mythor/slaves
\\nand in /var/log/HPCCSystems/mythor/init_mythor_<timestamp>.log: \\n2016-06-23T20:12:08: Killing slaves\\n2016-06-23T20:12:08: Error 255 in frunssh\\n2016-06-23T20:12:08: Please check /var/log/frunssh for more details\\n2016-06-23T20:12:08: Stopping mythor
\\n\\nI've also noticed that the file /var/lib/HPCCSystems/mythor/slaves does not exist, but a file /var/lib/HPCCSystems does. On a whim, changing the name of this "uslaves" file to "slaves" changes the output of the "init_mythor..." log to: \\n1: starting 10.10.1.57 (0 of 2 finished)\\n0: starting 10.10.1.55 (0 of 2 finished)\\nResults: (2 of 2 finished)\\n2: 10.10.1.57(255): \\n1: 10.10.1.55(255): \\n1: starting 10.10.1.55 (0 of 2 finished)\\n0: starting 10.10.1.57 (0 of 2 finished)\\nResults: (2 of 2 finished)\\n2: 10.10.1.55(255): \\n1: 10.10.1.57(255):\\n
\\n\\nAny ideas?\\n\\nThanks,\\nDaniel\", \"post_time\": \"2016-06-23 20:36:34\" },\n\t{ \"post_id\": 9882, \"topic_id\": 2354, \"forum_id\": 14, \"post_subject\": \"Re: LDAP Setup\", \"username\": \"JimD\", \"post_text\": \"If you want simple "All or Nothing" authentication, you can use HTPassword authentication. This form of security simply provides a list of users who are allowed in.\\n\\nIf you want to do the same but use LDAP, then just add all you users to a Group and grant FULL access to that Group for all the settings. Any new users added will inherit that. Groups allow you to carefully choose your settings and then quickly add people to use those settings.\\n\\nPermissions are complex because they provide a lot of flexibility. We know that the process is a little difficult and are working to improve it.\\n\\nhth,\\n\\nJim\", \"post_time\": \"2016-07-01 13:29:15\" },\n\t{ \"post_id\": 9876, \"topic_id\": 2354, \"forum_id\": 14, \"post_subject\": \"LDAP Setup\", \"username\": \"bbrown57\", \"post_text\": \"The LDAP configuration example in the installation document for HPCC is very confusing. I only need to authenticate users and most of the fields don't pertain to that ability. What fields do I need for LDAP user authentication exclusively? I also don't need HPCC to make changes to LDAP and it doesn't need a username or password as our LDAP is anonymous.\", \"post_time\": \"2016-07-01 12:23:39\" },\n\t{ \"post_id\": 10483, \"topic_id\": 2413, \"forum_id\": 14, \"post_subject\": \"Re: New program that creates environment.xml\", \"username\": \"bforeman\", \"post_text\": \"Thank you Tim!\", \"post_time\": \"2016-08-10 20:34:34\" },\n\t{ \"post_id\": 10033, \"topic_id\": 2413, \"forum_id\": 14, \"post_subject\": \"New program that creates environment.xml\", \"username\": \"tlhumphrey2\", \"post_text\": \"I have created a program that creates an environment.xml file. It is called tlh_envgen.pl. It is in the following github repository: https://github.com/tlhumphrey2/hpcc-environment-processing.\\n\\nThe program takes as input a configuration file like the following. And produces an environment.xml file.\\n\\nthor names are: thor1\\nroxie name is: roxie1\\n\\n# IPs COMPONENTS\\n 10.0.0.179 master:thor1\\n 10.0.0.225 slave:thor1\\n 10.0.0.226 slave:thor1\\n\\n 10.0.0.89 roxie:roxie1\\n 10.0.0.107 roxie:roxie1\\n\\n 10.0.0.178 middleware dali\\n 10.0.0.178 middleware dfu\\n 10.0.0.178 middleware eclagent\\n 10.0.0.178 middleware eclcc\\n 10.0.0.178 middleware eclsch\\n 10.0.0.178 middleware esp\\n 10.0.0.178 middleware sasha\\n 10.0.0.178 middleware dropzone\\n\\nSoftware.ThorCluster.ahead:slavesPerNode="4"
\\n\\nThe README.md file of the repository gives more information about the program and other content in the repository.\", \"post_time\": \"2016-07-25 15:27:38\" },\n\t{ \"post_id\": 10473, \"topic_id\": 2423, \"forum_id\": 14, \"post_subject\": \"Re: Setting-up a JAILED SFTP server on your landing zone\", \"username\": \"bforeman\", \"post_text\": \"Tim, this is nice! We should FAQ or Wiki this!\\nRegards,\\nBob\", \"post_time\": \"2016-08-10 20:33:37\" },\n\t{ \"post_id\": 10043, \"topic_id\": 2423, \"forum_id\": 14, \"post_subject\": \"Setting-up a JAILED SFTP server on your landing zone\", \"username\": \"tlhumphrey2\", \"post_text\": \"You may have people you want to place data on your landing zone; but, you don't want them to access any other part of your system. A JAILED SFTP server is the answer.\\n\\nWhat does JAILED SFTP mean?\\n\\nIt means that each SFTP user has access to ONE AND ONLY ONE DIRECTORY.\\n\\nThis post shows you how setup SFTP server on your landing zone where each user has access to ONE AND ONLY ONE directory on the landing zone.\\n\\nThe instructions given below are bash commands executed on the linux box of your landing zone. If you need additional help with these commands, don't hesitate to post.\\n\\n#1. Create a new group on the landing zone instance that will be used by SFTP.\\nsudo groupadd sftponly\\n\\n#2. For each SFTP user do the following:\\n NEWUSER=<username>\\n PASSWORD=<user-password>\\n NEWUSERHOME=<path-to-landingzone>/$NEWUSER\\n\\n # Make user's landing zone directory\\n sudo mkdir -p $NEWUSERHOME\\n # Add user specifying his home directory, that he is in the group sftponly and that he has no login script (i.e. he can't login).\\n sudo useradd -d $NEWUSERHOME -G sftponly -s /bin/false $NEWUSER\\n\\n # Add user's password to password file\\n echo $PASSWORD |sudo passwd --stdin $NEWUSER\\n\\n #Check to make sure the user is in the sftponly group and has a password in the password file\\n grep "sftponly" /etc/group\\n grep $NEWUSER /etc/passwd\\n\\n#3. Make changes to the sshd configuration file, sshd_config so SFTP server is activated the next time the sshd service is started. Here are the changes you should make:\\n Uncomment "Protocol 2".\\n Uncomment "PasswordAuthentication yes"\\n Comment "PasswordAuthentication no"\\n Comment "Subsystem sftp /usr/libexec/openssh/sftp-server"\\n Add "Subsystem sftp internal-sftp"\\n #At the end of the file, add the following lines:\\n Match Group sftponly\\n ChrootDirectory /var/mydropzone/%u\\n X11Forwarding no\\n AllowTCPForwarding no\\n ForceCommand internal-sftp\\n\\n# Then, save the changes you made\\n\\n# And, test the configuation\\nsudo sshd -t\\n# And, restart sshd\\nsudo service sshd reload\\n\\n#4. Setup the sub-directories, upload and download. And set permissions and owners for these sub-directories.\\nsudo mkdir -p /var/mydropzone/$NEWUSER/download\\nsudo mkdir -p /var/mydropzone/$NEWUSER/upload\\nsudo chmod 777 /var/mydropzone/$NEWUSER/upload\\nsudo chmod 777 /var/mydropzone/$NEWUSER/download\\nsudo chown $NEWUSER:hpcc /var/mydropzone/$NEWUSER/upload\\nsudo chown $NEWUSER:hpcc /var/mydropzone/$NEWUSER/download\\n\\n#5. Test to use if user can SFTP into his landing zone directory\\nsftp $NEWUSER@<IP-of-landing-zone> # for sftp on linux box\\n\\n# If the SFTP server is working correctly, next the user will be prompted for their password. Once the user is in the service, the following command should show the 2 sub-directories: upload and download.\\nls
\", \"post_time\": \"2016-07-25 17:38:27\" },\n\t{ \"post_id\": 12761, \"topic_id\": 2843, \"forum_id\": 14, \"post_subject\": \"Re: Cassandra for Workunit Storage configuration help\", \"username\": \"balajisampath\", \"post_text\": \"Thank You clo,\\n\\nIt worked after adding username & password. I thought uid/pwd is not mandatory since cqlsh doesn't ask for one.\", \"post_time\": \"2016-11-01 17:12:38\" },\n\t{ \"post_id\": 12751, \"topic_id\": 2843, \"forum_id\": 14, \"post_subject\": \"Re: Cassandra for Workunit Storage configuration help\", \"username\": \"clo\", \"post_text\": \"I don't see a username and password in your settings. I'm assuming you either removed it so I didn't see it or you didn't add it in the first place. \\n\\nMine looks something like this:\\n\\n <Option name="user" value="editedusername"/>\\n <Option name="password" value="editedpassword"/>\\n\\nwhere edited username and passwords are what I set.\\n\\nAlso, I see that your server is pointing to 127.0.0.1.\\n\\nThat's your loopback address and could present issues. I would suggest using the address listed under 192.168.56.10x instead.\", \"post_time\": \"2016-11-01 16:34:23\" },\n\t{ \"post_id\": 12741, \"topic_id\": 2843, \"forum_id\": 14, \"post_subject\": \"Re: Cassandra for Workunit Storage configuration help\", \"username\": \"balajisampath\", \"post_text\": \"Attached my environment.xml\", \"post_time\": \"2016-11-01 15:41:03\" },\n\t{ \"post_id\": 12731, \"topic_id\": 2843, \"forum_id\": 14, \"post_subject\": \"Re: Cassandra for Workunit Storage configuration help\", \"username\": \"clo\", \"post_text\": \"Hmm. I configured a cassandra workunit store from within a 6.0.4-1 vm which I downloaded from hpccsystems.com How do you have you cassandra configured? Also, would you be able to provide your environment.xml to see how your environment is set up?\", \"post_time\": \"2016-11-01 14:26:09\" },\n\t{ \"post_id\": 12721, \"topic_id\": 2843, \"forum_id\": 14, \"post_subject\": \"Re: Cassandra for Workunit Storage configuration help\", \"username\": \"clo\", \"post_text\": \"Hi, we're trying to work out a reason as to why this might not be working with the cassandra workunit store.\", \"post_time\": \"2016-11-01 11:25:45\" },\n\t{ \"post_id\": 12263, \"topic_id\": 2843, \"forum_id\": 14, \"post_subject\": \"Re: Cassandra for Workunit Storage configuration help\", \"username\": \"balajisampath\", \"post_text\": \"Yes I am able to submit workunit once I restore the old environment.\\nAlso I am able to run cassandra commands from ECL.\\nI haven't setup userid/password for accessing Cassandra.\", \"post_time\": \"2016-10-20 12:31:46\" },\n\t{ \"post_id\": 12253, \"topic_id\": 2843, \"forum_id\": 14, \"post_subject\": \"Re: Cassandra for Workunit Storage configuration help\", \"username\": \"clo\", \"post_text\": \"Hi,\\n\\nI'm glad increasing the memory has helped some. Would you be able to turn off cassandra as the workunit store and restart your system to see if you still encounter this issue?\", \"post_time\": \"2016-10-20 12:01:31\" },\n\t{ \"post_id\": 12223, \"topic_id\": 2843, \"forum_id\": 14, \"post_subject\": \"Re: Cassandra for Workunit Storage configuration help\", \"username\": \"balajisampath\", \"post_text\": \"Increased the VM memory and now all looks good but, still getting the error message when I submit workunits. thor and roxie is up , cassandra is up and configured as workunti storage.\\n\\nWsWorkunits.WUCreate\\n\\n<h3>Cannot read property 'Workunit' of undefined</h3><p>TypeError: Cannot read property 'Workunit' of undefined at Object.load (http://192.168.56.101:8010/esp/files/ec ... js:651:441) at http://192.168.56.101:8010/esp/files/ec ... js:470:446 at g (http://192.168.56.101:8010/esp/files/ec ... c.js:103:1) at m (http://192.168.56.101:8010/esp/files/ec ... js:102:430) at k.resolve (http://192.168.56.101:8010/esp/files/ec ... js:104:332) at a (http://192.168.56.101:8010/esp/files/ec ... js:103:295) at g (http://192.168.56.101:8010/esp/files/ec ... js:103:124) at m (http://192.168.56.101:8010/esp/files/ec ... js:102:430) at k.resolve (http://192.168.56.101:8010/esp/files/ec ... js:104:332) at a (h...\\n... ... ...\\n\\nbelow tables are created in cassandra\\nwustatistics wuexceptions workunits uniquesearchvalues\\nfilessearchvalues wugraphs wutemporaries wugraphrunning\\nworkunitssearch wufileswritten wufilesread\\nwuqueries wuvariables wugraphstate\\nwugraphprogress version wuresults\", \"post_time\": \"2016-10-19 20:21:20\" },\n\t{ \"post_id\": 11853, \"topic_id\": 2843, \"forum_id\": 14, \"post_subject\": \"Re: Cassandra for Workunit Storage configuration help\", \"username\": \"balajisampath\", \"post_text\": \"it's 64 bit\\nVirtualBox version 5.1.6\\nHPCCSystemsVM-amd64-6.0.4-1\\nCassandra 2.1\", \"post_time\": \"2016-10-06 12:23:27\" },\n\t{ \"post_id\": 11843, \"topic_id\": 2843, \"forum_id\": 14, \"post_subject\": \"Re: Cassandra for Workunit Storage configuration help\", \"username\": \"clo\", \"post_text\": \"I think your underlying problem is still going to be that your VM doesn't have enough memory resource to be able to handle all the things you're trying to do. You can try to increase the amount of memory from the VM box manager, however I'll need to consult with a subject matter expert in order to give you better information.\\n\\nFor reference, what is the version of the hpccsystems platform are you using? Is it 32bit or 64bit?\", \"post_time\": \"2016-10-05 18:47:24\" },\n\t{ \"post_id\": 11823, \"topic_id\": 2843, \"forum_id\": 14, \"post_subject\": \"Re: Cassandra for Workunit Storage configuration help\", \"username\": \"balajisampath\", \"post_text\": \"Yes I am using VM version. I got WU create error while executing in roxie which is up and running.\", \"post_time\": \"2016-10-05 17:06:24\" },\n\t{ \"post_id\": 11793, \"topic_id\": 2843, \"forum_id\": 14, \"post_subject\": \"Re: Cassandra for Workunit Storage configuration help\", \"username\": \"clo\", \"post_text\": \"Are you trying to do all this on one of the vms we provide? If so, the resources on the vm itself might be limiting your progress.\\n\\nIt looks like the thor ran out of memory:\\n\\n00000001 2016-10-05 13:11:13.586 4251 4251 "Opened log file //192.168.56.101/var/log/HPCCSystems/mythor/thormaster.2016_10_05.log"\\n00000002 2016-10-05 13:11:13.587 4251 4251 "Build community_6.0.4-1"\\n00000003 2016-10-05 13:11:13.587 4251 4251 "calling initClientProcess Port 20000"\\n00000004 2016-10-05 13:11:13.591 4251 4251 "Global memory size = 374 MB"\\n00000005 2016-10-05 13:11:13.591 4251 4251 "RoxieMemMgr: Setting memory limit to 392167424 bytes (1496 pages)"\\n00000006 2016-10-05 13:11:13.591 4251 4251 "RoxieMemMgr: posix_memalign (alignment=262144, size=394264576) failed - ret=12 (ENOMEM There was insufficient memory to fulfill the allocation request.)"\\n00000007 2016-10-05 13:11:13.591 4251 4251 "ERROR: 1303: /var/lib/jenkins2/workspace/CE-Candidate-6.0.4-1/CE/ubuntu-14.04-amd64/HPCC-Platform/thorlcr/master/thmastermain.cpp(753) : ThorMaster : RoxieMemMgr: Unable to create heap"\", \"post_time\": \"2016-10-05 16:09:43\" },\n\t{ \"post_id\": 11783, \"topic_id\": 2843, \"forum_id\": 14, \"post_subject\": \"Re: Cassandra for Workunit Storage configuration help\", \"username\": \"balajisampath\", \"post_text\": \"Thank You clo\\n\\nLooks like some issue with cassandra 3.x. Once I installed 2.1 I was able to make the configuration work.\\n\\nNow I am unable run any workunit. Please refer attachment.\\nLooks like the below issue is not resolved\\nhttps://track.hpccsystems.com/browse/HPCC-15124\\n\\nBTW my thor is down. attached the log file. \\n\\nPlease help\", \"post_time\": \"2016-10-05 14:55:05\" },\n\t{ \"post_id\": 11733, \"topic_id\": 2843, \"forum_id\": 14, \"post_subject\": \"Re: Cassandra for Workunit Storage configuration help\", \"username\": \"clo\", \"post_text\": \"I see this one line in the system.log.\\n\\nINFO [main] 2016-09-28 12:34:07,106 CassandraDaemon.java:471 - Not starting RPC server as requested. Use JMX (StorageService->startRPCServer()) or nodetool (enablethrift) to start it\\n\\nI'm not familiar enough with Cassandra to recommend a fix but I do see some articles out there on stackoverflow that might suggest some possible solutions.\", \"post_time\": \"2016-10-04 13:49:43\" },\n\t{ \"post_id\": 11723, \"topic_id\": 2843, \"forum_id\": 14, \"post_subject\": \"Re: Cassandra for Workunit Storage configuration help\", \"username\": \"balajisampath\", \"post_text\": \"Still I couldn't make it work.Is there any workaround someone can suggest? Please let me know if any other information is needed.\", \"post_time\": \"2016-10-04 12:43:18\" },\n\t{ \"post_id\": 11583, \"topic_id\": 2843, \"forum_id\": 14, \"post_subject\": \"Re: Cassandra for Workunit Storage configuration help\", \"username\": \"balajisampath\", \"post_text\": \"I didn't stop the cassandra service. it is still up and running.\\nAttached cassandra system log.\", \"post_time\": \"2016-09-28 15:40:44\" },\n\t{ \"post_id\": 11563, \"topic_id\": 2843, \"forum_id\": 14, \"post_subject\": \"Re: Cassandra for Workunit Storage configuration help\", \"username\": \"richardkchapman\", \"post_text\": \"Hmmm, ok - the protocol question was probably a red herring then.\\n\\nYou did start the cassandra service BEFORE you tried to start hpcc ?\", \"post_time\": \"2016-09-28 13:02:12\" },\n\t{ \"post_id\": 11553, \"topic_id\": 2843, \"forum_id\": 14, \"post_subject\": \"Re: Cassandra for Workunit Storage configuration help\", \"username\": \"balajisampath\", \"post_text\": \"attaching the entire log\", \"post_time\": \"2016-09-28 12:59:08\" },\n\t{ \"post_id\": 11543, \"topic_id\": 2843, \"forum_id\": 14, \"post_subject\": \"Re: Cassandra for Workunit Storage configuration help\", \"username\": \"balajisampath\", \"post_text\": \"with option 2\\n\\n00000021 2016-09-28 12:52:00.108 14959 14959 "ERROR: 0: /var/lib/jenkins2/workspace/CE-Candidate-6.0.4-1/CE/ubuntu-14.04-amd64/HPCC-Platform/dali/server/daserver.cpp(439) : Exception : cassandra: failed to connect (No hosts available for the control connection)"\", \"post_time\": \"2016-09-28 12:53:05\" },\n\t{ \"post_id\": 11533, \"topic_id\": 2843, \"forum_id\": 14, \"post_subject\": \"Re: Cassandra for Workunit Storage configuration help\", \"username\": \"richardkchapman\", \"post_text\": \"Looks like the version of the cassandra client library included in 6.0.4 only supports protocol versions 1 and 2.\\n\\n\\nprotocol_version=2 might work?\", \"post_time\": \"2016-09-28 12:47:07\" },\n\t{ \"post_id\": 11523, \"topic_id\": 2843, \"forum_id\": 14, \"post_subject\": \"Re: Cassandra for Workunit Storage configuration help\", \"username\": \"balajisampath\", \"post_text\": \"0000001C 2016-09-28 12:39:22.550 6117 6117 "Stopping 1"\\n0000001D 2016-09-28 12:39:22.550 6117 6117 "Stopping 0"\\n0000001E 2016-09-28 12:39:22.550 6117 6125 "BackupHandler stopped"\\n0000001F 2016-09-28 12:39:22.550 6117 6117 "ERROR: 0: /var/lib/jenkins2/workspace/CE-Candidate-6.0.4-1/CE/ubuntu-14.04-amd64/HPCC-Platform/dali/server/daserver.cpp(439) : Exception : cassandra: While setting option protocol_version: Bad parameters"\", \"post_time\": \"2016-09-28 12:42:26\" },\n\t{ \"post_id\": 11513, \"topic_id\": 2843, \"forum_id\": 14, \"post_subject\": \"Re: Cassandra for Workunit Storage configuration help\", \"username\": \"richardkchapman\", \"post_text\": \"This could be related to an issue setting the proper Cassandra protocol version - there is some info in\\nhttps://track.hpccsystems.com/browse/HPCC-15879 \\n\\nIf so, adding an option "protocol_version" with value 3 to the dali cassandra plugin settings may help\", \"post_time\": \"2016-09-28 09:15:50\" },\n\t{ \"post_id\": 11453, \"topic_id\": 2843, \"forum_id\": 14, \"post_subject\": \"Re: Cassandra for Workunit Storage configuration help\", \"username\": \"balajisampath\", \"post_text\": \"hpccdemo@HPCCSystemsVM-amd64-6:~$ sudo service cassandra start\\nhpccdemo@HPCCSystemsVM-amd64-6:~$ cqlsh\\nConnected to Test Cluster at 127.0.0.1:9042.\\n[cqlsh 5.0.1 | Cassandra 3.2.1 | CQL spec 3.4.0 | Native protocol v4]\\nUse HELP for help.\\ncqlsh> use hpcc;\\ncqlsh:hpcc> exit\\nhpccdemo@HPCCSystemsVM-amd64-6:~$ sudo /opt/HPCCSystems/sbin/hpcc-run.sh -a hpcc-init start\\n192.168.56.101: Running sudo /etc/init.d/hpcc-init start\\nStarting mydafilesrv ... [ OK ]\\nStarting mydali ... [ FAILED ]\\nStarting mydfuserver ... [ OK ]\\nWaiting on sentinel file creation\\nStarting myeclagent ... [ OK ]\\nWaiting on sentinel file creation\\nStarting myeclccserver ...\\n\\n-----------------------------------------------------------------------------------\\nalso tried this to ensure its running\\nhpccdemo@HPCCSystemsVM-amd64-6:~$ nodetool status\\nDatacenter: datacenter1\\n=======================\\nStatus=Up/Down\\n|/ State=Normal/Leaving/Joining/Moving\\n-- Address Load Tokens Owns Host ID Rack\\nUN 127.0.0.1 108.91 KB 256 ? 2b7dd353-8462-41eb-a93a-cee1167581ac rack1\\n\\nNote: Non-system keyspaces don't have the same replication settings, effective ownership information is meaningless\\nhpccdemo@HPCCSystemsVM-amd64-6:~$\", \"post_time\": \"2016-09-27 16:58:16\" },\n\t{ \"post_id\": 11443, \"topic_id\": 2843, \"forum_id\": 14, \"post_subject\": \"Re: Cassandra for Workunit Storage configuration help\", \"username\": \"clo\", \"post_text\": \"Just to be thorough, can you please verify that our cassandra is running?\\n\\nTry `sudo service cassandra start` and then restart the hpccsystems platform.\", \"post_time\": \"2016-09-27 16:23:24\" },\n\t{ \"post_id\": 11423, \"topic_id\": 2843, \"forum_id\": 14, \"post_subject\": \"Re: Cassandra for Workunit Storage configuration help\", \"username\": \"balajisampath\", \"post_text\": \"this are the last few lines of the log\\n\\n00000009 2016-09-27 15:40:45.823 8251 8251 "store loaded"\\n0000000A 2016-09-27 15:40:45.823 8251 8251 "loading external Environment from: /etc/HPCCSystems/environment.xml"\\n0000000B 2016-09-27 15:40:50.853 8251 8260 "cassandra: ERROR - Host 127.0.0.1 had the following error on startup: 'Connection timeout'"\\n0000000C 2016-09-27 15:40:50.854 8251 8260 "cassandra: WARN - Lost connection on host 127.0.0.1"\\n0000000D 2016-09-27 15:40:50.857 8251 8251 "ERROR: 0: Exception - Failed to load main store : cassandra: failed to connect (No hosts available for the control connection)"\\n0000000E 2016-09-27 15:40:50.857 8251 8251 "Failed to load main store"\\n0000000F 2016-09-27 15:40:50.857 8251 8251 "ERROR: 0: /var/lib/jenkins2/workspace/CE-Candidate-6.0.4-1/CE/ubuntu-14.04-amd64/HPCC-Platform/dali/server/daserver.cpp(398) : Failed whilst starting servers : cassandra: failed to connect (No hosts available for the control connection)"\\n00000016 2016-09-27 15:40:50.858 8251 8251 "Suspending subscriptions"\\n00000017 2016-09-27 15:40:50.858 8251 8251 "Suspended subscriptions"\\n00000018 2016-09-27 15:40:50.858 8251 8251 "Suspending 0"\\n00000019 2016-09-27 15:40:50.858 8251 8251 "Stopping 6"\\n0000001A 2016-09-27 15:40:50.858 8251 8251 "Stopping 5"\\n0000001B 2016-09-27 15:40:50.858 8251 8251 "Stopping 4"\\n0000001C 2016-09-27 15:40:50.858 8251 8251 "Stopping 3"\\n0000001D 2016-09-27 15:40:50.858 8251 8251 "Stopping 2"\\n0000001E 2016-09-27 15:40:50.858 8251 8251 "Stopping 1"\\n0000001F 2016-09-27 15:40:50.859 8251 8251 "Stopping 0"\\n00000020 2016-09-27 15:40:50.860 8251 8259 "BackupHandler stopped"\\n00000021 2016-09-27 15:40:50.862 8251 8251 "ERROR: 0: /var/lib/jenkins2/workspace/CE-Candidate-6.0.4-1/CE/ubuntu-14.04-amd64/HPCC-Platform/dali/server/daserver.cpp(439) : Exception : cassandra: failed to connect (No hosts available for the control connection)"\", \"post_time\": \"2016-09-27 15:53:20\" },\n\t{ \"post_id\": 11413, \"topic_id\": 2843, \"forum_id\": 14, \"post_subject\": \"Re: Cassandra for Workunit Storage configuration help\", \"username\": \"clo\", \"post_text\": \"Hi,\\n\\nWould you be able to post the contents of the dali log? It should be located here:\\n\\n/var/log/HPCCSystems/mydali/server/DaServer.log\", \"post_time\": \"2016-09-27 14:36:40\" },\n\t{ \"post_id\": 11353, \"topic_id\": 2843, \"forum_id\": 14, \"post_subject\": \"Cassandra for Workunit Storage configuration help\", \"username\": \"balajisampath\", \"post_text\": \"I am trying to setup Cassandra as dali store. I have completed the configuration as per the documentation. Dali is not starting after making the changes. \\n\\nPlease help me figure out what I am missing. I have attached the steps as screen shots.\\n\\nCassandra is up and running.\\n\\nVirtualBox version 5.1.6\\nHPCCSystemsVM-amd64-6.0.4-1\\nCassandra 3.2.1\", \"post_time\": \"2016-09-26 14:19:37\" },\n\t{ \"post_id\": 11463, \"topic_id\": 2883, \"forum_id\": 14, \"post_subject\": \"Debug output when running HPCC client tools\", \"username\": \"GercoDries\", \"post_text\": \"Hello!\\n\\nWhenever I run any ECL client tools, I always get some extra lines of debug output on one system and on another system I don't get that extra output. It's messing up some extra output parsing I do so I'd like to get rid of it. The extra debug output looks like this:\\n\\n\\n/opt/HPCCSystems/6.0.4/clienttools/bin/dfuplus action=list server=http://my-server:8010\\nList *\\n00000000 2016-09-27 13:16:17.183 95060 -1 "jsocket(9,2266) shutdown err = 57 : C!127.0.0.1"\\nesthor::openfield::data::csv\\n
\\n\\nThe only difference I can imagine between the two systems is that on the one that's generating debug output, I previously compiled the HPCC client tools myself but I've since cleaned up that installation and even deleted /opt/HPCCSystems completely and reinstalled the client tools from the website.\\n\\nBoth systems are running Mac OS X 10.10.5.\", \"post_time\": \"2016-09-27 17:58:27\" },\n\t{ \"post_id\": 12383, \"topic_id\": 2943, \"forum_id\": 14, \"post_subject\": \"Re: Cannot parse components from environment.xml\", \"username\": \"linhbngo\", \"post_text\": \"Thank you for the suggestion, I finally got it to work. The error lies with the permission of mythor directory within /var/lib/HPCCSystems. In our settings, the hpcc account does not have administrative right, so I have to manually go in and set the permission for this directory itself. Once the correct permission is in place, rsync happens without a hitch and the cluster came up correctly.\", \"post_time\": \"2016-10-25 13:08:46\" },\n\t{ \"post_id\": 12001, \"topic_id\": 2943, \"forum_id\": 14, \"post_subject\": \"Re: Cannot parse components from environment.xml\", \"username\": \"mgardner\", \"post_text\": \"We need to make sure that the thorgroup.slave file is getting populated on the slave machine. I would suggest checking under /var/lib/HPCCSystems/mythor for thorgroup.slave. If it isn't there, try doing something like ...\\n\\n\\nsudo -i\\nsu hpcc\\nrsync -e "ssh -o LogLevel=QUIET -o StrictHostKeyChecking=no" 10.125.10.131:/var/lib/HPCCSystems/mythor/thorgroup /var/lib/HPCCSystems/mythor/thorgroup.slave\\nls -lart /var/lib/HPCCSystems/mythor/\\n
\", \"post_time\": \"2016-10-17 13:30:28\" },\n\t{ \"post_id\": 11943, \"topic_id\": 2943, \"forum_id\": 14, \"post_subject\": \"Re: Cannot parse components from environment.xml\", \"username\": \"linhbngo\", \"post_text\": \"When I run\\nsudo service hppc-init -c mythor restart
\\nthe mythor message waits for a very long time, then it returns the following:\\n\\n[root@hpcc001 mythor]# sudo service hpcc-init -c mythor start\\nStarting mythor ... [ TIMEOUT ]\\n
\\n\\nThe content of the log file on hpcc001 shows:\\n\\n[root@hpcc001 mythor]# more thormaster.2016_10_11.log\\n00000001 2016-10-11 20:31:22.614 118730 118730 "Opened log file //10.125.10.131/var/log/HPCCSystems/mythor/thormaster.2016_10_11.log"\\n00000002 2016-10-11 20:31:22.614 118730 118730 "Build enterprise_5.4.8-1"\\n00000003 2016-10-11 20:31:22.614 118730 118730 "calling initClientProcess Port 20000"\\n00000004 2016-10-11 20:31:22.620 118730 118730 "Found file 'thorgroup', using to form thor group"\\n00000005 2016-10-11 20:31:22.620 118730 118730 "Checking cluster replicate nodes"\\n00000006 2016-10-11 20:31:22.621 118730 118730 "Cluster replicate nodes check completed in 2ms"\\n00000007 2016-10-11 20:31:22.622 118730 118730 "Global memory size = 193680 MB"\\n00000008 2016-10-11 20:31:22.622 118730 118730 "RoxieMemMgr: Setting memory limit to 203088199680 bytes (774720 pages)"\\n00000009 2016-10-11 20:31:22.623 118730 118730 "Transparent huge pages are not supported on this kernel. Requires kernel version > 2.6.38."\\n0000000A 2016-10-11 20:31:22.623 118730 118730 "Memory released to OS on each 256k 'page'"\\n0000000B 2016-10-11 20:31:22.623 118730 118730 "RoxieMemMgr: 774720 Pages successfully allocated for the pool - memsize=203088199680 base=0x7f39c2e00000 alignment=262144\\n bitmapSize=24210"\\n0000000C 2016-10-11 20:31:22.624 118730 118730 "Disk space: /var/lib/HPCCSystems/hpcc-data/thor = 38490 MB, /var/lib/HPCCSystems/hpcc-mirror/thor = 0 MB, /var/lib/HPCCSy\\nstems/mythor/temp = 38490 MB"\\n0000000D 2016-10-11 20:31:22.627 118730 118730 "Starting watchdog"\\n0000000E 2016-10-11 20:31:22.627 118730 118745 "Started watchdog"\\n0000000F 2016-10-11 20:31:22.627 118730 118730 "ThorMaster version 4.1, Started on 10.125.10.131:20000"\\n00000010 2016-10-11 20:31:22.627 118730 118730 "Thor name = mythor, queue = thor.thor, nodeGroup = mythor"\\n00000011 2016-10-11 20:31:22.628 118730 118730 "Waiting for 4 slaves to register"\\n00000012 2016-10-11 20:31:22.628 118730 118730 "Verifying connection to slave 1"\\n00000013 2016-10-11 20:31:32.627 118730 118730 "Still Waiting for minimum 4 slaves to connect"\\n00000014 2016-10-11 20:31:32.627 118730 118730 "waiting for slave 1 (10.125.10.133:20100)"\\n00000015 2016-10-11 20:31:32.627 118730 118730 "waiting for slave 2 (10.125.10.134:20100)"\\n00000016 2016-10-11 20:31:32.627 118730 118730 "waiting for slave 3 (10.125.10.135:20100)"\\n00000017 2016-10-11 20:31:32.627 118730 118730 "waiting for slave 4 (10.125.10.136:20100)"\\n00000018 2016-10-11 20:31:32.628 118730 118730 "Verifying connection to slave 2"\\n00000019 2016-10-11 20:31:42.627 118730 118730 "Still Waiting for minimum 4 slaves to connect"\\n0000001A 2016-10-11 20:31:42.627 118730 118730 "waiting for slave 1 (10.125.10.133:20100)"\\n0000001B 2016-10-11 20:31:42.627 118730 118730 "waiting for slave 2 (10.125.10.134:20100)"\\n0000001C 2016-10-11 20:31:42.627 118730 118730 "waiting for slave 3 (10.125.10.135:20100)"\\n0000001D 2016-10-11 20:31:42.627 118730 118730 "waiting for slave 4 (10.125.10.136:20100)"\\n0000001E 2016-10-11 20:31:42.627 118730 118730 "Verifying connection to slave 4"\\n0000001F 2016-10-11 20:31:53.877 118730 118730 "Still Waiting for minimum 4 slaves to connect"\\n00000020 2016-10-11 20:31:53.877 118730 118730 "waiting for slave 1 (10.125.10.133:20100)"\\n00000021 2016-10-11 20:31:53.877 118730 118730 "waiting for slave 2 (10.125.10.134:20100)"\\n00000022 2016-10-11 20:31:53.877 118730 118730 "waiting for slave 3 (10.125.10.135:20100)"\\n00000023 2016-10-11 20:31:53.877 118730 118730 "waiting for slave 4 (10.125.10.136:20100)"\\n00000024 2016-10-11 20:31:53.877 118730 118730 "Verifying connection to slave 3"\\n00000025 2016-10-11 20:32:10.752 118730 118730 "Still Waiting for minimum 4 slaves to connect"\\n00000026 2016-10-11 20:32:10.752 118730 118730 "waiting for slave 1 (10.125.10.133:20100)"\\n00000027 2016-10-11 20:32:10.752 118730 118730 "waiting for slave 2 (10.125.10.134:20100)"\\n00000028 2016-10-11 20:32:10.752 118730 118730 "waiting for slave 3 (10.125.10.135:20100)"\\n00000029 2016-10-11 20:32:10.752 118730 118730 "waiting for slave 4 (10.125.10.136:20100)"\\n0000002A 2016-10-11 20:32:10.752 118730 118730 "Verifying connection to slave 4"\\n0000002B 2016-10-11 20:32:36.064 118730 118730 "Still Waiting for minimum 4 slaves to connect"\\n0000002C 2016-10-11 20:32:36.064 118730 118730 "waiting for slave 1 (10.125.10.133:20100)"\\n0000002D 2016-10-11 20:32:36.064 118730 118730 "waiting for slave 2 (10.125.10.134:20100)"\\n0000002E 2016-10-11 20:32:36.064 118730 118730 "waiting for slave 3 (10.125.10.135:20100)"\\n0000002F 2016-10-11 20:32:36.064 118730 118730 "waiting for slave 4 (10.125.10.136:20100)"\\n00000030 2016-10-11 20:32:36.064 118730 118730 "Verifying connection to slave 3"\\n00000031 2016-10-11 20:33:14.032 118730 118730 "Still Waiting for minimum 4 slaves to connect"\\n00000032 2016-10-11 20:33:14.032 118730 118730 "waiting for slave 1 (10.125.10.133:20100)"\\n00000033 2016-10-11 20:33:14.032 118730 118730 "waiting for slave 2 (10.125.10.134:20100)"\\n00000034 2016-10-11 20:33:14.032 118730 118730 "waiting for slave 3 (10.125.10.135:20100)"\\n00000035 2016-10-11 20:33:14.032 118730 118730 "waiting for slave 4 (10.125.10.136:20100)"\\n00000036 2016-10-11 20:33:14.032 118730 118730 "Verifying connection to slave 3"\\n00000037 2016-10-11 20:34:10.984 118730 118730 "Still Waiting for minimum 4 slaves to connect"\\n00000038 2016-10-11 20:34:10.984 118730 118730 "waiting for slave 1 (10.125.10.133:20100)"\\n00000039 2016-10-11 20:34:10.984 118730 118730 "waiting for slave 2 (10.125.10.134:20100)"\\n0000003A 2016-10-11 20:34:10.984 118730 118730 "waiting for slave 3 (10.125.10.135:20100)"\\n0000003B 2016-10-11 20:34:10.984 118730 118730 "waiting for slave 4 (10.125.10.136:20100)"\\n0000003C 2016-10-11 20:34:10.984 118730 118730 "Verifying connection to slave 3"\\n
\\n\\nsudo ps aux | grep thorslave
on the slave nodes does not return anything, and the log on one of the slaves (with correct time stamp) shows the following:\\n\\nmore init_thorslave_mythor_2016_10_11_20_31_20.log\\n2016-10-12T00:31:37: dependency dafilesrv started\\n2016-10-12T00:31:37: slave(10.125.10.133) init\\n2016-10-12T00:31:37: slave(s) starting\\n2016-10-12T00:31:37: rsync -e ssh -o LogLevel=QUIET -o StrictHostKeyChecking=no 10.125.10.131:/var/lib/HPCCSystems/mythor/thorgroup /var/lib/HPCCSystems/mythor/thorgroup\\n.slave\\n
\", \"post_time\": \"2016-10-12 00:40:50\" },\n\t{ \"post_id\": 11941, \"topic_id\": 2943, \"forum_id\": 14, \"post_subject\": \"Re: Cannot parse components from environment.xml\", \"username\": \"mgardner\", \"post_text\": \"But when I run service hpcc-init --componentlist, this is what I have:\\n[root@hpcc004 HPCCSystems]# service hpcc-init --componentlist\\nNo components on this node as defined by /etc/HPCCSystems/environment.xml.
\\n\\nYou're on node 134 according to your ifconfig output. 134 only has ftslave, dafilesrv, and a thorslave on it. None of those components are directly started by hpcc-init so --componentlist filters them out and you won't see them.\\n\\nTo start your thormaster what I would suggest is to go to 10.125.10.131 and run sudo service hpcc-init -c mythor restart
or from any node, run sudo /opt/HPCCSystems/sbin/hpcc-run.sh -c mythor restart
.\\n\\nOn 10.125.10.131 you should have output in your init_mythor log. Could you please post it? I'm worried that the slaves aren't being started remotely. After you start mythor, hop onto node 134 and check sudo ps aux | grep thorslave
.\", \"post_time\": \"2016-10-10 20:06:18\" },\n\t{ \"post_id\": 11903, \"topic_id\": 2943, \"forum_id\": 14, \"post_subject\": \"Re: Cannot parse components from environment.xml\", \"username\": \"linhbngo\", \"post_text\": \"This is my thormaster log:\\n\\n00000001 2016-10-07 21:36:23.951 120417 120417 "Opened log file //10.125.10.131/var/log/HPCCSystems/mythor/thormaster.2016_10_07.log"\\n00000002 2016-10-07 21:36:23.951 120417 120417 "Build enterprise_5.4.8-1"\\n00000003 2016-10-07 21:36:23.951 120417 120417 "calling initClientProcess Port 20000"\\n00000004 2016-10-07 21:36:23.957 120417 120417 "Found file 'thorgroup', using to form thor group"\\n00000005 2016-10-07 21:36:23.957 120417 120417 "Checking cluster replicate nodes"\\n00000006 2016-10-07 21:36:23.960 120417 120417 "Cluster replicate nodes check completed in 2ms"\\n00000007 2016-10-07 21:36:23.961 120417 120417 "Global memory size = 193680 MB"\\n00000008 2016-10-07 21:36:23.961 120417 120417 "RoxieMemMgr: Setting memory limit to 203088199680 bytes (774720 pages)"\\n00000009 2016-10-07 21:36:23.961 120417 120417 "Transparent huge pages are not supported on this kernel. Requires kernel version > 2.6.38."\\n0000000A 2016-10-07 21:36:23.961 120417 120417 "Memory released to OS on each 256k 'page'"\\n0000000B 2016-10-07 21:36:23.961 120417 120417 "RoxieMemMgr: 774720 Pages successfully allocated for the pool - memsize=203088199680 base=0x7ed4fae00000 alignment=262144 bitmapSize=24210"\\n0000000C 2016-10-07 21:36:23.963 120417 120417 "Disk space: /var/lib/HPCCSystems/hpcc-data/thor = 38503 MB, /var/lib/HPCCSystems/hpcc-mirror/thor = 0 MB, /var/lib/HPCCSystems/mythor/temp = 3\\n8503 MB"\\n0000000D 2016-10-07 21:36:23.966 120417 120417 "Starting watchdog"\\n0000000E 2016-10-07 21:36:23.966 120417 120432 "Started watchdog"\\n0000000F 2016-10-07 21:36:23.966 120417 120417 "ThorMaster version 4.1, Started on 10.125.10.131:20000"\\n00000010 2016-10-07 21:36:23.966 120417 120417 "Thor name = mythor, queue = thor.thor, nodeGroup = mythor"\\n00000011 2016-10-07 21:36:23.966 120417 120417 "Waiting for 4 slaves to register"\\n00000012 2016-10-07 21:36:23.966 120417 120417 "Verifying connection to slave 4"\\n00000013 2016-10-07 21:36:33.966 120417 120417 "Still Waiting for minimum 4 slaves to connect"\\n00000014 2016-10-07 21:36:33.966 120417 120417 "waiting for slave 1 (10.125.10.133:20100)"\\n00000015 2016-10-07 21:36:33.967 120417 120417 "waiting for slave 2 (10.125.10.134:20100)"\\n00000016 2016-10-07 21:36:33.967 120417 120417 "waiting for slave 3 (10.125.10.135:20100)"\\n00000017 2016-10-07 21:36:33.967 120417 120417 "waiting for slave 4 (10.125.10.136:20100)"\\n00000018 2016-10-07 21:36:33.967 120417 120417 "Verifying connection to slave 3"\\n00000019 2016-10-07 21:36:43.966 120417 120417 "Still Waiting for minimum 4 slaves to connect"\\n0000001A 2016-10-07 21:36:43.966 120417 120417 "waiting for slave 1 (10.125.10.133:20100)"\\n0000001B 2016-10-07 21:36:43.966 120417 120417 "waiting for slave 2 (10.125.10.134:20100)"\\n0000001C 2016-10-07 21:36:43.966 120417 120417 "waiting for slave 3 (10.125.10.135:20100)"\\n0000001D 2016-10-07 21:36:43.966 120417 120417 "waiting for slave 4 (10.125.10.136:20100)"\\n0000001E 2016-10-07 21:36:43.966 120417 120417 "Verifying connection to slave 1"\\n0000001F 2016-10-07 21:36:55.216 120417 120417 "Still Waiting for minimum 4 slaves to connect"\\n00000020 2016-10-07 21:36:55.216 120417 120417 "waiting for slave 1 (10.125.10.133:20100)"\\n00000021 2016-10-07 21:36:55.216 120417 120417 "waiting for slave 2 (10.125.10.134:20100)"\\n00000022 2016-10-07 21:36:55.216 120417 120417 "waiting for slave 3 (10.125.10.135:20100)"\\n00000023 2016-10-07 21:36:55.216 120417 120417 "waiting for slave 4 (10.125.10.136:20100)"\\n00000024 2016-10-07 21:36:55.216 120417 120417 "Verifying connection to slave 2"\\n00000025 2016-10-07 21:37:12.091 120417 120417 "Still Waiting for minimum 4 slaves to connect"\\n00000026 2016-10-07 21:37:12.091 120417 120417 "waiting for slave 1 (10.125.10.133:20100)"\\n00000027 2016-10-07 21:37:12.091 120417 120417 "waiting for slave 2 (10.125.10.134:20100)"\\n00000028 2016-10-07 21:37:12.091 120417 120417 "waiting for slave 3 (10.125.10.135:20100)"\\n00000029 2016-10-07 21:37:12.091 120417 120417 "waiting for slave 4 (10.125.10.136:20100)"\\n0000002A 2016-10-07 21:37:12.091 120417 120417 "Verifying connection to slave 4"\\n0000002B 2016-10-07 21:37:37.403 120417 120417 "Still Waiting for minimum 4 slaves to connect"\\n0000002C 2016-10-07 21:37:37.403 120417 120417 "waiting for slave 1 (10.125.10.133:20100)"\\n0000002D 2016-10-07 21:37:37.403 120417 120417 "waiting for slave 2 (10.125.10.134:20100)"\\n0000002E 2016-10-07 21:37:37.403 120417 120417 "waiting for slave 3 (10.125.10.135:20100)"\\n0000002F 2016-10-07 21:37:37.403 120417 120417 "waiting for slave 4 (10.125.10.136:20100)"\\n00000030 2016-10-07 21:37:37.403 120417 120417 "Verifying connection to slave 3"\\n00000031 2016-10-07 21:38:15.371 120417 120417 "Still Waiting for minimum 4 slaves to connect"\\n00000032 2016-10-07 21:38:15.371 120417 120417 "waiting for slave 1 (10.125.10.133:20100)"\\n00000033 2016-10-07 21:38:15.371 120417 120417 "waiting for slave 2 (10.125.10.134:20100)"\\n00000034 2016-10-07 21:38:15.371 120417 120417 "waiting for slave 3 (10.125.10.135:20100)"\\n00000035 2016-10-07 21:38:15.371 120417 120417 "waiting for slave 4 (10.125.10.136:20100)"\\n00000036 2016-10-07 21:38:15.371 120417 120417 "Verifying connection to slave 2"\\n\\nand then thormaster reported a TIMEOUT in starting mythor\", \"post_time\": \"2016-10-08 01:40:42\" },\n\t{ \"post_id\": 11893, \"topic_id\": 2943, \"forum_id\": 14, \"post_subject\": \"Re: Cannot parse components from environment.xml\", \"username\": \"Gleb Aronsky\", \"post_text\": \"Hi Linh,\\n\\nIf this is a Thor slave node, then the master would start the slave process. What errors are you having running Thor jobs? Does the Thor log indicate any errors?\", \"post_time\": \"2016-10-07 23:59:41\" },\n\t{ \"post_id\": 11883, \"topic_id\": 2943, \"forum_id\": 14, \"post_subject\": \"Re: Cannot parse components from environment.xml\", \"username\": \"linhbngo\", \"post_text\": \"When I scan through the environment.xml file, it shows the node's IP address:\\n[root@hpcc004 ~]# more /etc/HPCCSystems/environment.xml | grep 134\\n name="node010134"\\n netAddress="10.125.10.134"/>\\n <Instance computer="node010134"\\n netAddress="10.125.10.134"\\n <Instance computer="node010134"\\n netAddress="10.125.10.134"\\n <ThorSlaveProcess computer="node010134" name="s2"/>\\n[root@hpcc004 ~]# more /etc/HPCCSystems/environment.xml | grep s2\\n name="s2"\\n name="s2"\\n <ThorSlaveProcess computer="node010134" name="s2"/>\\n\\nand this is the ifconfig results:\\n[root@hpcc004 ~]# ifconfig\\neth3 Link encap:Ethernet HWaddr 00:8C:FA:5A:FA:85\\n inet addr:10.125.10.134 Bcast:10.125.255.255 Mask:255.255.0.0\\n inet6 addr: 2620:103:a006:1:28c:faff:fe5a:fa85/64 Scope:Global\\n inet6 addr: fe80::28c:faff:fe5a:fa85/64 Scope:Link\\n UP BROADCAST RUNNING MULTICAST MTU:9000 Metric:1\\n RX packets:39647692 errors:0 dropped:0 overruns:0 frame:0\\n TX packets:0 errors:0 dropped:0 overruns:0 carrier:0\\n collisions:0 txqueuelen:1000\\n RX bytes:2689741060 (2.5 GiB) TX bytes:0 (0.0 b)\\n\\nIfconfig uses the ioctl access method to get the full address information, which limits hardware addresses to 8 bytes.\\nBecause Infiniband address has 20 bytes, only the first 8 bytes are displayed correctly.\\nIfconfig is obsolete! For replacement check ip.\\nib0 Link encap:InfiniBand HWaddr A0:00:02:20:FE:80:00:00:00:00:00:00:00:00:00:00:00:00:00:00\\n inet addr:10.128.1.217 Bcast:10.128.31.255 Mask:255.255.224.0\\n inet6 addr: fe80::f652:1403:73:70f1/64 Scope:Link\\n UP BROADCAST RUNNING MULTICAST MTU:65520 Metric:1\\n RX packets:69389 errors:0 dropped:0 overruns:0 frame:0\\n TX packets:6 errors:0 dropped:0 overruns:0 carrier:0\\n collisions:0 txqueuelen:1024\\n RX bytes:5027610 (4.7 MiB) TX bytes:456 (456.0 b)\\n\\nlo Link encap:Local Loopback\\n inet addr:127.0.0.1 Mask:255.0.0.0\\n inet6 addr: ::1/128 Scope:Host\\n UP LOOPBACK RUNNING MTU:65536 Metric:1\\n RX packets:1105 errors:0 dropped:0 overruns:0 frame:0\\n TX packets:1105 errors:0 dropped:0 overruns:0 carrier:0\\n collisions:0 txqueuelen:0\\n RX bytes:67087 (65.5 KiB) TX bytes:67087 (65.5 KiB)\\n\\nbut when I run configgen with the flag -ip, it does not show anything:\\n[root@hpcc004 ~]# /opt/HPCCSystems/sbin/configgen -env /etc/HPCCSystems/environment.xml -ip 10.125.10.134\\n[root@hpcc004 ~]#\\n\\nI tested this against an xml file that was known to be working before, and the result is the same, configgen cannot derive the component type based on the ip address provided. I have also followed suggestions in another thread and change the interface flag in /etc/HPCCSystems/environment.conf from * to eth3, and still not able to identify the components. How does HPCCSystems acquire the ip address from the system to compare it against the ip address in environment.xml?\", \"post_time\": \"2016-10-07 22:11:51\" },\n\t{ \"post_id\": 11873, \"topic_id\": 2943, \"forum_id\": 14, \"post_subject\": \"Re: Cannot parse components from environment.xml\", \"username\": \"Gleb Aronsky\", \"post_text\": \"Hi Linh,\\n\\nThe message "No components on this node as defined by /etc/HPCCSystems/environment.xml." means that the IP of the node that you are trying to run the HPCC cluster on is not listed in the environment.xml, though it may appear in the hardware section.\\n\\nVerify that the IP of the node in question appears in environment.xml and that some HPCC component is actually assigned to run on that node. Also, if you run configgen with the the flag -ip <IP> it should list the components that are configured to run on the node with the given IP.\\n\\n-Gleb\", \"post_time\": \"2016-10-07 21:00:13\" },\n\t{ \"post_id\": 11863, \"topic_id\": 2943, \"forum_id\": 14, \"post_subject\": \"Cannot parse components from environment.xml\", \"username\": \"linhbngo\", \"post_text\": \"When I run configgen directly, this is what I have:\\n\\n[root@hpcc004 HPCCSystems]# /opt/HPCCSystems/sbin/configgen -env /etc/HPCCSystems/environment.xml -listall2\\nEclAgentProcess,myeclagent,10.125.10.131,,/var/lib/HPCCSystems/myeclagent,\\nFTSlaveProcess,myftslave,10.125.10.131,,/var/lib/HPCCSystems/myftslave,\\nFTSlaveProcess,myftslave,10.125.10.132,,/var/lib/HPCCSystems/myftslave,\\nFTSlaveProcess,myftslave,10.125.10.133,,/var/lib/HPCCSystems/myftslave,\\nFTSlaveProcess,myftslave,10.125.10.134,,/var/lib/HPCCSystems/myftslave,\\nFTSlaveProcess,myftslave,10.125.10.135,,/var/lib/HPCCSystems/myftslave,\\nFTSlaveProcess,myftslave,10.125.10.136,,/var/lib/HPCCSystems/myftslave,\\nSashaServerProcess,mysasha,10.125.10.131,8877,/var/lib/HPCCSystems/mysasha,.\\nDaliServerProcess,mydali,10.125.10.131,7070,/var/lib/HPCCSystems/mydali,\\nDfuServerProcess,mydfuserver,10.125.10.131,,/var/lib/HPCCSystems/mydfuserver,\\nEclCCServerProcess,myeclccserver,10.125.10.131,,/var/lib/HPCCSystems/myeclccserver,\\nEspProcess,myesp,10.125.10.131,,/var/lib/HPCCSystems/myesp,\\nDafilesrvProcess,mydafilesrv,10.125.10.131,,/var/lib/HPCCSystems/mydafilesrv,\\nDafilesrvProcess,mydafilesrv,10.125.10.132,,/var/lib/HPCCSystems/mydafilesrv,\\nDafilesrvProcess,mydafilesrv,10.125.10.133,,/var/lib/HPCCSystems/mydafilesrv,\\nDafilesrvProcess,mydafilesrv,10.125.10.134,,/var/lib/HPCCSystems/mydafilesrv,\\nDafilesrvProcess,mydafilesrv,10.125.10.135,,/var/lib/HPCCSystems/mydafilesrv,\\nDafilesrvProcess,mydafilesrv,10.125.10.136,,/var/lib/HPCCSystems/mydafilesrv,\\nThorMasterProcess,mythor,10.125.10.131,,/var/lib/HPCCSystems/mythor,\\nThorSlaveProcess,mythor,10.125.10.133,,/var/lib/HPCCSystems/mythor,\\nThorSlaveProcess,mythor,10.125.10.134,,/var/lib/HPCCSystems/mythor,\\nThorSlaveProcess,mythor,10.125.10.135,,/var/lib/HPCCSystems/mythor,\\nThorSlaveProcess,mythor,10.125.10.136,,/var/lib/HPCCSystems/mythor,\\nEclSchedulerProcess,myeclscheduler,10.125.10.131,,/var/lib/HPCCSystems/myeclscheduler,\\n\\n\\nBut when I run service hpcc-init --componentlist, this is what I have:\\n[root@hpcc004 HPCCSystems]# service hpcc-init --componentlist\\nNo components on this node as defined by /etc/HPCCSystems/environment.xml.\\n\\nAny idea on what could cause this problem? At the same time, on a node with similar configuration, HPCCSystems was able to parse the .xml file correctly. All nodes have same hardware configuration, and all run LSB_VERSION=base-4.0-amd64:base-4.0-noarch:core-4.0-amd64:core-4.0-noarch:graphics-4.0-amd64:graphics-4.0-noarch:printing-4.0-amd64:printing-4.0-noarch\\nScientific Linux release 6.6 (Carbon)\\nScientific Linux release 6.6 (Carbon)\\n\\nThanks, \\n\\nLinh\", \"post_time\": \"2016-10-07 20:27:04\" },\n\t{ \"post_id\": 13843, \"topic_id\": 3213, \"forum_id\": 14, \"post_subject\": \"Re: hpccsystems platform package name\", \"username\": \"lily\", \"post_text\": \"[quote="mgardner":1etqn2i1]Morning Lily,\\n\\nThe install cluster script takes a local copy of the rpm or deb file on your system and pushes it to the remote machines and installs it. So you need to grab the actual rpm or deb file and have it available locally for the script to work.\\n\\nIt should look a little more like this.\\n\\n`install-cluster.sh -k /path/to/rpm/or/deb/file/hpccsystems-platform*.deb`\\n\\n\\n*************************************************************************************\\n\\nThank you so much @mgardner! I got HPCC system installed based on your reply! Lightening!\\n\\n-Lily\", \"post_time\": \"2016-12-18 20:20:50\" },\n\t{ \"post_id\": 12823, \"topic_id\": 3213, \"forum_id\": 14, \"post_subject\": \"Re: hpccsystems platform package name\", \"username\": \"mgardner\", \"post_text\": \"Morning Lily,\\n\\nThe install cluster script takes a local copy of the rpm or deb file on your system and pushes it to the remote machines and installs it. So you need to grab the actual rpm or deb file and have it available locally for the script to work.\\n\\nIt should look a little more like this.\\n\\n`install-cluster.sh -k /path/to/rpm/or/deb/file/hpccsystems-platform*.deb`\", \"post_time\": \"2016-11-04 15:09:43\" },\n\t{ \"post_id\": 12813, \"topic_id\": 3213, \"forum_id\": 14, \"post_subject\": \"hpccsystems platform package name\", \"username\": \"lily\", \"post_text\": \"Hi people,\\ndoes anyone know the package name of the hpcc system for installed??\\nI tried all the package name posted online but all failed...\\n\\nthe command that requires the package name is as below:\\n\\n$install-cluster.sh -k <package-file-name>\\n\\nthe package file name should be in below format according to the installation documentation:\\nhpccsystems-platform-xxxx-n.n.nnnn.rmp(or .deb)\", \"post_time\": \"2016-11-04 13:04:59\" },\n\t{ \"post_id\": 13793, \"topic_id\": 3463, \"forum_id\": 14, \"post_subject\": \"Re: SALT License and Enterprise Edition - Cost Details\", \"username\": \"HPCC Staff\", \"post_text\": \"Hello Subbu, thanks for your post and interest in HPCC Systems and SALT. \\n\\nPricing varies and is based on two primary drivers: \\n- Size of system (number of nodes)\\n- Service level (Bronze, Silver, Gold) - see more at https://hpccsystems.com/enterprise-serv ... al-support \\n\\nThis is inclusive of HPCC Systems Enterprise license (https://hpccsystems.com/enterprise-services) and SALT (https://hpccsystems.com/enterprise-serv ... dules/SALT).\\n\\nPlease do contact us at sales@hpccsystems.com for pricing information and we can have a representative contact you. Thank you again for your interest!\", \"post_time\": \"2016-12-15 12:23:37\" },\n\t{ \"post_id\": 13783, \"topic_id\": 3463, \"forum_id\": 14, \"post_subject\": \"SALT License and Enterprise Edition - Cost Details\", \"username\": \"kps_mani\", \"post_text\": \"Hi,\\nWe understand that SALT can be used only with Enterprise Edition Platform of HPCC Systems. We would like to know what would be the approximate Cost for below components?\\n\\nEnterprise Edition - License Cost, if any. We are not looking for Professional Services.\\nSALT License - Cost. Will it be based on per node in the Cluster? If yes, please provide us the cost of per node license for SALT.\\n\\nPlease provide me the contact point for checking on the same if this can't be discussed in the forum.\\n\\nRegards,\\nSubbu\", \"post_time\": \"2016-12-14 20:50:35\" },\n\t{ \"post_id\": 15473, \"topic_id\": 3853, \"forum_id\": 14, \"post_subject\": \"How to redirect ECL command line logging file?\", \"username\": \"jzuo\", \"post_text\": \"when I run ecl publish or ecl deploy in command line, it will create eclcc.log file in current location, and overwrite the previous log file.\\nHow can I redirect it a new file when I run ecl command?\", \"post_time\": \"2017-02-23 13:47:35\" },\n\t{ \"post_id\": 20243, \"topic_id\": 4733, \"forum_id\": 14, \"post_subject\": \"Re: mythor failed to start\", \"username\": \"tlhumphrey2\", \"post_text\": \"It would help if I knew what the error message is telling me:\\nDFS cluster topology for 'mythor', does not match existing DFS group layout for group
\\n\\nWhat I think it is telling me is that there is something in the Topology attribute of my environment.xml file that doesn't match with something about 'mythor' that is stored someplace on /var/lib/HPCCSystems.\", \"post_time\": \"2017-12-12 18:40:47\" },\n\t{ \"post_id\": 20233, \"topic_id\": 4733, \"forum_id\": 14, \"post_subject\": \"Re: mythor failed to start\", \"username\": \"tlhumphrey2\", \"post_text\": \"My last post said I had fixed the problem. But, the fix was removing contents of /var/lib/HPCCSystem. But, I really don't want to do that.\\n\\nI get this error after starting the cluster's instances on AWS. Everytime. I can get rid of the problem by doing:\\nsudo rm -r /var/lib/HPCCSystems/*
which deletes the contents of /var/lib/HPCCSystems.\\n\\nBut, that isn't the best solution because I need data that is stored there.\\n\\nAnyone have any ideas???\", \"post_time\": \"2017-12-12 18:36:03\" },\n\t{ \"post_id\": 18793, \"topic_id\": 4733, \"forum_id\": 14, \"post_subject\": \"Re: mythor failed to start\", \"username\": \"tlhumphrey2\", \"post_text\": \"Fixed problem. I had /var/lib/HPCCSystems as a symbolic link which pointed to another directory. I did the following to fix the problem:\\nsudo /opt/HPCCSystems/sbin/hpcc-run.sh -a hpcc-init stop\\nsudo /opt/HPCCSystems/sbin/hpcc-run.sh -a dafilesrv stop\\nsudo rm /var/lib/HPCCSystems\\nsudo mkdir /var/lib/HPCCSystems\\nsudo chown hpcc:hpcc /var/lib/HPCCSystems\\nsudo /opt/HPCCSystems/sbin/hpcc-run.sh -a hpcc-init start
\", \"post_time\": \"2017-09-12 15:51:42\" },\n\t{ \"post_id\": 18783, \"topic_id\": 4733, \"forum_id\": 14, \"post_subject\": \"mythor failed to start\", \"username\": \"tlhumphrey2\", \"post_text\": \"Looking in /var/log/mythor/init_mythor_2017_09_12_14_00_09.log, I see the following error message:\\n\\nERROR: DFS cluster topology for 'mythor', does not match existing DFS group layout for group 'mythor'\\n\\nHow do I correct this problem?\", \"post_time\": \"2017-09-12 15:27:08\" },\n\t{ \"post_id\": 21173, \"topic_id\": 4823, \"forum_id\": 14, \"post_subject\": \"Re: Memory Pool Exhausted error when passing 2 large dataset\", \"username\": \"bforeman\", \"post_text\": \"Hi Tim,\\n\\nThis might be something to post to the JIRA issue tracker, I just saw a similar post this morning with a developer attempting a Python script with PIPE. Could be related:\\n\\nhttps://hpccsystems.com/bb/viewtopic.php?f=8&t=5393\\n\\nRegards,\\nBob\", \"post_time\": \"2018-03-16 13:32:19\" },\n\t{ \"post_id\": 19173, \"topic_id\": 4823, \"forum_id\": 14, \"post_subject\": \"Memory Pool Exhausted error when passing 2 large datasets\", \"username\": \"tlhumphrey2\", \"post_text\": \"Below is the code. Currently, my embedded python doesn't do anything. The ECL reads in 2 large datasets and passes both to the embedded python, MatrixMultiply. The sizes of the 2 datasets are 1,600,040,000 and 1,600,080,000. I get the following error:Error: System error: 1301: Memory pool exhausted: pool id 4194314 (1216 pages) exhausted, requested 1 (in Disk Read G22 E23)
\\nimport python;\\n\\nrec0 := RECORD\\n REAL cell;\\nEND;\\n\\nrec := RECORD\\n DATASET(rec0) arow;\\nEND;\\n\\nDATASET(rec) MatrixMultiply(DATASET(rec) A, unsigned nrowsA, unsigned ncolsA,DATASET(rec) B, unsigned nrowsB, unsigned ncolsB) := embed(Python)\\n import numpy as np\\n import re\\n return A\\n\\nendembed;\\n\\nA:=DATASET('~hthor::tlh::AMatrix',rec,THOR);\\nB:=DATASET('~hthor::tlh::BMatrix',rec,THOR);\\nNRowsA:=COUNT(A);\\nNColsA:=COUNT(A[1].arow);\\nNRowsB:=COUNT(B);\\nNColsB:=COUNT(B[1].arow);\\n\\nMatrixMultiply(A,NRowsA,NColsA,B,NRowsB,NColsB);\\n
\\nI'm running this on hthor. I'm working on a machine that has 15GB of memory. So, I should have plenty of memory. Both datasets should fit entirely in memory. I want to make changes to my environment.xml file so more memory will be available to my workunit. I have added these 2 parameters to both EclAgentProcess and ThorCluster. But, I'm still getting the error.\\ndefaultMemoryLimit="10000000000"\\ntotalMemoryLimit="10000000000"\\n
\\nAng help would be most appreciated.\", \"post_time\": \"2017-09-29 14:51:55\" },\n\t{ \"post_id\": 19393, \"topic_id\": 4903, \"forum_id\": 14, \"post_subject\": \"Could NOT find OpenLDAP\", \"username\": \"balajisampath\", \"post_text\": \"I am getting below error while building 6.4.2-1 in VM . Please help\\n\\nhpccdemo@HPCCSystemsVM-amd64-6:~$ cmake -DREMBED=ON HPCC-Platform\\n-- Making Release system\\n-- 64bit architecture is 1\\nUsing compiler: GNU :: 4.8.4 :: ::\\n-- Could NOT find OpenLDAP (missing: OPENLDAP_LIBRARIES OPENLDAP_INCLUDE_DIR)\\nCMake Error at cmake_modules/commonSetup.cmake:699 (message):\\n OPENLDAP requested but package not found\\nCall Stack (most recent call first):\\n CMakeLists.txt:142 (include)\\n\\n\\n-- Configuring incomplete, errors occurred!\\nSee also "/home/hpccdemo/CMakeFiles/CMakeOutput.log".\\nSee also "/home/hpccdemo/CMakeFiles/CMakeError.log".\", \"post_time\": \"2017-10-10 21:36:24\" },\n\t{ \"post_id\": 19883, \"topic_id\": 5033, \"forum_id\": 14, \"post_subject\": \"Re: Multi node setup in Local VM Instance\", \"username\": \"ravishankar\", \"post_text\": \"Gotcha. Thanks Richard. \\n\\nSo basically I can also run 3 or 4 separate VMWare Virtual Machine instance and follow the Installing & Running the HPCC Platform document to set up Multi node cluster, Which can also be done by Oracle Virtual Box. \\n\\nBelieve Running Multiple HPCC instance on Oracle Virtual Box or in VMWare Virtual Machine behaves same way in this regards ? Please clarify and confirm the same.\", \"post_time\": \"2017-11-07 20:19:08\" },\n\t{ \"post_id\": 19873, \"topic_id\": 5033, \"forum_id\": 14, \"post_subject\": \"Re: Multi node setup in Local VM Instance\", \"username\": \"rtaylor\", \"post_text\": \"Ravi,\\n\\nThe HPCC VM you can download is not meant to do multiple-node clusters. It is designed for R&D POC-type use. \\n\\nTo do what you want, you need to read the Installing & Running the HPCC Platform doc (download from here: https://hpccsystems.com/training/documentation/installation-and-administration). Then you can just set up a separate VM for each node in your configuration (if you're using Oracle's VirtualBox, those docs would be from Oracle) and install HPCC the same way you would if they were physical boxes instead of VMs.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-11-07 19:53:14\" },\n\t{ \"post_id\": 19863, \"topic_id\": 5033, \"forum_id\": 14, \"post_subject\": \"Multi node setup in Local VM Instance\", \"username\": \"ravishankar\", \"post_text\": \"I am working with Local Virtual Instance HPCCSystemVM-amd64-6.4.2.1. \\nFor a specific testing and learning, I want a multi node cluster to be set up in the local VM Instance. \\n\\nBelow is my physical system configuration\\n\\nProcessor: Intel(R) Core(TM) i5-6300 CPU @ 2.40 GHz 2.50 GHz\\nInstalled Memory(RAM): 16.0 GB (15.7 GB usable)\\nSystem Type: 64-bit Operating System,x64-based processor \\n\\nPlease let me know is it feasible to set up a Multi node setup in the Local Virtual Instance.\", \"post_time\": \"2017-11-07 17:49:36\" },\n\t{ \"post_id\": 19893, \"topic_id\": 5043, \"forum_id\": 14, \"post_subject\": \"Cluster thor not listening for workunits\", \"username\": \"ravishankar\", \"post_text\": \"Using Configuration manager I set up 4 node cluster with 1 master and 3 thor slave node.\\nI can able to spray and despray files. While spraying, the files parts are replicated into slave node as expected. I can able to access the sprayed files in HTHOR using ECL and All ECL workunits work successfully in HTHOR.\\n\\nBut in Thor I couldn't. It display the below error message.\\n\\nCluster thor not listening for workunits; thor.thor: queue active;\\n\\nKindly help me with the same to fix it. Attached the environment xml for reference.\", \"post_time\": \"2017-11-09 17:15:43\" },\n\t{ \"post_id\": 20213, \"topic_id\": 5153, \"forum_id\": 14, \"post_subject\": \"Mulit-Disk setup\", \"username\": \"micevepay\", \"post_text\": \"Hi,\\n\\nLet's say I have 1 physical node with 2 thor slaves. I believe in the typical setup, each slave has it's own memory allocation but write (or spill) to the same disk. Is it possible to have each slave write to a separate disk? Of course this is assuming that my physical node has more than one mounted volumes.\\n\\nMike\", \"post_time\": \"2017-12-12 15:26:30\" },\n\t{ \"post_id\": 20483, \"topic_id\": 5203, \"forum_id\": 14, \"post_subject\": \"Re: error starting mythor\", \"username\": \"rtaylor\", \"post_text\": \"jibtga, I've downloaded virtual image from Web, also I create another virtual image
The VM on our portal is specifically designed to be a POC/test environment. It only creates a single-node Thor and single-node Roxie. It is not designed to be used as part of a VM-based multi-node environment.\\n\\nIf you want to create a multi-node VM-based environment then you need to create your VMs with just the OS and then follow the instructions to install any multi-node environment (VM or hardware-based) contained in this doc:\\n http://cdn.hpccsystems.com/releases/CE-Candidate-6.4.6/docs/Installing_and_RunningTheHPCCPlatform-6.4.6-1.pdf\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-01-16 14:42:39\" },\n\t{ \"post_id\": 20443, \"topic_id\": 5203, \"forum_id\": 14, \"post_subject\": \"error starting mythor\", \"username\": \"jibtga\", \"post_text\": \"I am trying to test hpcc (version 6.4.6-1). I've downloaded virtual image from Web, also I create another virtual image running over debian 9.3. When I start service......\\n\\nsudo /etc/init.d/hpcc-init start\\nDependent service dafilesrv, mydafilesrv is already running.\\nStarting mydali ... [ OK ] \\nStarting mydfuserver ... [ OK ] \\nStarting myeclagent ... [ OK ] \\nStarting myeclccserver ... [ OK ] \\nStarting myeclscheduler ... [ OK ] \\nStarting myesp ... [ OK ] \\nStarting myroxie ... [ OK ] \\nStarting mysasha ... [ OK ] \\nStarting mythor ... [ FAILED ] \\n\\nIn logs (init_mythor_xxxxxxx.log), it show:\\n \\nERROR: DFS cluster topology for 'mythor', does not match existing DFS group layout for group 'mythor'\\n\\nI am a bit lost.... any help?\\n\\nThanks\", \"post_time\": \"2018-01-12 13:29:46\" },\n\t{ \"post_id\": 20703, \"topic_id\": 5283, \"forum_id\": 14, \"post_subject\": \"Re: HPCC not start automatically after reboot\", \"username\": \"mgardner\", \"post_text\": \"Thanks for the question. Currently this isn't officially supported but if you want to attempt it, a great tutorial is located here at https://www.digitalocean.com/community/ ... l-examples. This feature will be a supported capability in upcoming releases though.\", \"post_time\": \"2018-02-02 15:09:35\" },\n\t{ \"post_id\": 20693, \"topic_id\": 5283, \"forum_id\": 14, \"post_subject\": \"HPCC not start automatically after reboot\", \"username\": \"eprado22\", \"post_text\": \"How I can set de hpcc-init for start after a crash or reboot. My server only boot up when i write manually \\n\\nsudo /etc/init.d/hpcc-init start\", \"post_time\": \"2018-02-02 00:07:52\" },\n\t{ \"post_id\": 20943, \"topic_id\": 5343, \"forum_id\": 14, \"post_subject\": \"Re: Deploying Roxie Package With Authentication\", \"username\": \"bforeman\", \"post_text\": \"Thank You Tony! Good to know!\\nBob\", \"post_time\": \"2018-03-02 13:59:07\" },\n\t{ \"post_id\": 20933, \"topic_id\": 5343, \"forum_id\": 14, \"post_subject\": \"Re: Deploying Roxie Package With Authentication\", \"username\": \"anthony.fishbeck\", \"post_text\": \"Bob, Some projects automate file changes by making SOAPCALLs out to the package file management system.\\n\\nAntony,\\n\\nYou can add basic authentication credentials to a SOAPCALL through the URL.\\n\\nThe format is 'https://user:pw@myhost:18010/WsPackageProcess'.\\n\\nECL example:\\n\\nSTRING us := 'xx' : stored('username');\\nSTRING pw := 'zz' : stored('password', format(password));\\n\\nSTRING RoxieUrl := 'https://'+ us + ':' + pw + '@' + Host + ':18010/WsPackageProcess';
\", \"post_time\": \"2018-03-02 00:15:04\" },\n\t{ \"post_id\": 20923, \"topic_id\": 5343, \"forum_id\": 14, \"post_subject\": \"Re: Deploying Roxie Package With Authentication\", \"username\": \"bforeman\", \"post_text\": \"Hi,\\n\\nJust curious...\\n\\nPackage map deployment is usually done from the ECL command line, why do you need to use SOAPCALL? I would guess that authentication would be done using a "user=name,password=value" format.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2018-03-01 18:27:23\" },\n\t{ \"post_id\": 20903, \"topic_id\": 5343, \"forum_id\": 14, \"post_subject\": \"Deploying Roxie Package With Authentication\", \"username\": \"amillar\", \"post_text\": \"Hi all,\\n\\nI was wondering if someone can help with a quick authentication query,\\n\\nIn our package deployment ecl we have the following settings. We have implemented authentication and want to know how to send the username and password through when deploying the package to the roxie using SOAPCALL...\\n\\nSTRING RoxieUrl_WsWorkunits := 'https://' + Roxie_Hostname + ':18010/WsWorkunits?ver_=1.44';\\nSTRING RoxieUrl_WsPackageProcess := 'https://'+ Roxie_Hostname + ':18010/WsPackageProcess';\\nSTRING Roxie_Clustername := 'roxie';\\nSTRING Package_Name := 'globex.pkg';\\n\\nThanks in advance\\n\\nAntony\", \"post_time\": \"2018-02-28 20:57:44\" },\n\t{ \"post_id\": 21343, \"topic_id\": 5413, \"forum_id\": 14, \"post_subject\": \"Re: AWS HPCC Service Multinode cluster specification\", \"username\": \"tlhumphrey2\", \"post_text\": \"I'm assuming you are talking about what we call "Instant Cloud". If so, then there isn't away to change the instance type. But, if you have access to CloudFormation on the AWS console you can use the cloudformation templates and scripts given in the following github repository to launch an hpcc cluster on just about any instance type. The document in this repository will tell you how to use the cloudformation template and scripts:\\nhttps://github.com/tlhumphrey2/EasyFastHPCCoAWS\\n\\nI recommend you use the cloudformation template called MyHPCCCloudFormationTemplate.json\", \"post_time\": \"2018-03-29 12:53:54\" },\n\t{ \"post_id\": 21333, \"topic_id\": 5413, \"forum_id\": 14, \"post_subject\": \"Re: AWS HPCC Service Multinode cluster specification\", \"username\": \"rtaylor\", \"post_text\": \"see reply here: https://hpccsystems.com/bb/viewtopic.php?f=24&t=5403&p=21323#p21323\", \"post_time\": \"2018-03-29 11:04:31\" },\n\t{ \"post_id\": 21313, \"topic_id\": 5413, \"forum_id\": 14, \"post_subject\": \"AWS HPCC Service Multinode cluster specification\", \"username\": \"rsghatpa\", \"post_text\": \"The AWS HPCC service sets up an HPCC cluster on m1.large instance. These instances are not EBS backed and hence cannot be stopped to change the instance type. Is there anyway to change the instance type while setting up the cluster using AWS HPCC service?\", \"post_time\": \"2018-03-28 21:04:42\" },\n\t{ \"post_id\": 21503, \"topic_id\": 5433, \"forum_id\": 14, \"post_subject\": \"Re: Disk Capacity Planning\", \"username\": \"lpezet\", \"post_text\": \"Sorry, I made some assumption as to how the cluster was deployed.\\n\\nThis is extremely helpful. Thanks a ton jsmith!\", \"post_time\": \"2018-04-09 16:30:15\" },\n\t{ \"post_id\": 21483, \"topic_id\": 5433, \"forum_id\": 14, \"post_subject\": \"Re: Disk Capacity Planning\", \"username\": \"jsmith\", \"post_text\": \"Here is my current understanding of folders on each node type. Please correct me where I'm wrong.\\n\\nFor Slave nodes, /var/lib/HPCCSystems/hpcc-data and hpcc-mirror are used to store Logical Files (and replicas).\\nThe folder /var/lib/HPCCSystems/mythor will host ????. I see core.* files up to 4GBs in there, and mythor/temp can get quite huge (right now I have 129GB in there).
\\n\\nIn reality the large core files don't actually take up as much space as their size might suggest, because they are sparse files.\\ndu corefile will reveal the actual disk space consumed.\\n'mythor' is the component instance directory and is only used to hold it's configuration and temporary small runtime files.\\nAnd as you've noted core files if they are created, although you can configure core file behaviour and location in Linux so they could end up elsewhere.\\n\\nThe instance temp directory (e.g. mythor/temp), holds internal spilling files, e.g. when performing a sort or join that is larger than memory, the engines spill intermediate results to this directory. NB: Thor master will not use this temp directory for spilling, so it's size on the Thor master node should be insignificant.\\n\\nNB: These directories can be reconfigured to other locations via the Directories section in the enviroinment, e.g. the default for the component temp directory is : <Category dir="/home/jsmith/hpccdeb/var/lib/[NAME]/[INST]/temp" name="temp"/>\\n\\nhpcc-data and hpcc-mirror on the slave nodes are as you say, the root level storage areas for logical file parts.\\n\\nFor Master node, /var/lib/HPCCSystems/myeclccserver gets big overtime as it stores workunits (one .so file per workunit, is that right?).\\nFolders /var/lib/HPCCSystems/hpcc-data and hpcc-mirror don't seem to store actual Logical Files and do not follow Slaves' equivalent.
\\nthe eclccserver needn't necessarily be on the Thor master node, workunit query dll's and potentially other intermediate files will build up there, however Sasha should be configured to automatically archive old workunits, removing them from Dali and the related disk files, e.g. in the myeclccserver directory.\\nThat does mean however, that the Sasha folders (/var/lib/HPCCSystems/hpcc-data/sasha/Archive will grow.\\n\\nFor Support nodes, I don't know what's stored there (I see hpcc-data, mydafilesrv and myesp, none are meaningfully big.).
\\nAs mentioned above, the 'sasha' directory will grow to be substantially big overtime.\\nIf hthor jobs are being executed then files are written to '/var/lib/HPCCSysytems/hpcc-data/eclagent', so that can be arbitrarily large also.\\nThe dali data directory (e.g. /var/lib/HPCCSystems/hpcc-data/dali) will also grow overtime, since it keeps copies/snapshots of the database over time. The number of copies it keeps can be configured with the 'keepStores' setting.\\n\\nFor all nodes (or is it just Master?), the logs can also get quite big (with skewed JOINs I believe it becomes very verbose and drastically increase logs).\\n\\nAm I missing anything? While hpcc-data and hpcc-mirror (on slave nodes) is easy to grasp, I bumped into a disk space issue because mythor/temp overfilled the disk and query crashed. Runtime is more difficult to figure out, is what I'm saying.
\\nhpcc-data , hpcc-mirror and the temp directory will be the main consumers. You could reconfigure the temp directory to be under hpcc-data for simplicity.\\n\\nNow when trying to create mounts for some of those folders, some things don't work any longer.\\nhpcc-data and hpcc-mirror are "symbolic link" friendly. But not so much for mythor on the Master (something to do with .sentinel file???). If I create a symbolic link for /var/lib/HPCCSystems/mythor (to say /mnt/mythor), hpcc-init says it can't start mythor.
\\nI'm not sure why that would be.. I'd have to study the init logs to see why it's failing, but I wouldn't suggest relocating the instance directory itself as it should be tiny except the temp directory which can be independently reconfigured.\\n\\nWhich leads to yet another question: I see in the environment.xml that some folders are configured at the <Instance> level, while others are in the <Directories> element. What's the difference between the two?\\nI see <Roxie> can specify its own "directory" (attribute), yet <Thor> cannot (or at least I don't see it...haven't checked the XSD...sorry). I could mount an EBS volume directly into /var/lib/HPCCSystems/mythor, but I'd rather not if possible (trying to keep all mounts in same folder and symlink those).
\\n\\nIt should be possible to reconfigured the locations of everything via the <Directories> section, but the instance directories seem to bypass these directives.\\nHowever I think the rest can be via the Directories section.\\nYou can certainly use this section to relocate the main data directories and temp directories though.\", \"post_time\": \"2018-04-05 15:12:31\" },\n\t{ \"post_id\": 21403, \"topic_id\": 5433, \"forum_id\": 14, \"post_subject\": \"Disk Capacity Planning\", \"username\": \"lpezet\", \"post_text\": \"Hello!\\n\\nI'm trying to figure out the "data load" vs. configuration on a Thor cluster to plan for disk capacity. Think AWS EC2 instances using EBS volumes for each prone-to-get-huge folders like hpcc-data on slave nodes.\\nIO need is different between Master/Slave/Support, so if possible, I'd rather not pay extra $$$ for say Support disks and rather allocate those $$$ on Slaves' disks to get better IO (separate IO capacity between actual data read/write and mirroring read/writes, and even yet a separate IO capacity for mythor/temp for instance).\\n\\nHere is my current understanding of folders on each node type. Please correct me where I'm wrong.\\n\\nFor Slave nodes, /var/lib/HPCCSystems/hpcc-data and hpcc-mirror are used to store Logical Files (and replicas).\\nThe folder /var/lib/HPCCSystems/mythor will host ????. I see core.* files up to 4GBs in there, and mythor/temp can get quite huge (right now I have 129GB in there).\\n\\nFor Master node, /var/lib/HPCCSystems/myeclccserver gets big overtime as it stores workunits (one .so file per workunit, is that right?).\\nFolders /var/lib/HPCCSystems/hpcc-data and hpcc-mirror don't seem to store actual Logical Files and do not follow Slaves' equivalent.\\n\\nFor Support nodes, I don't know what's stored there (I see hpcc-data, mydafilesrv and myesp, none are meaningfully big.).\\n\\nFor all nodes (or is it just Master?), the logs can also get quite big (with skewed JOINs I believe it becomes very verbose and drastically increase logs).\\n\\nAm I missing anything? While hpcc-data and hpcc-mirror (on slave nodes) is easy to grasp, I bumped into a disk space issue because mythor/temp overfilled the disk and query crashed. Runtime is more difficult to figure out, is what I'm saying.\\n\\nNow when trying to create mounts for some of those folders, some things don't work any longer.\\nhpcc-data and hpcc-mirror are "symbolic link" friendly. But not so much for mythor on the Master (something to do with .sentinel file???). If I create a symbolic link for /var/lib/HPCCSystems/mythor (to say /mnt/mythor), hpcc-init says it can't start mythor.\\nWhich leads to yet another question: I see in the environment.xml that some folders are configured at the <Instance> level, while others are in the <Directories> element. What's the difference between the two?\\nI see <Roxie> can specify its own "directory" (attribute), yet <Thor> cannot (or at least I don't see it...haven't checked the XSD...sorry). I could mount an EBS volume directly into /var/lib/HPCCSystems/mythor, but I'd rather not if possible (trying to keep all mounts in same folder and symlink those).\\n\\n\\n\\nThank you for your help!\\nLuke.\", \"post_time\": \"2018-04-03 15:19:10\" },\n\t{ \"post_id\": 23263, \"topic_id\": 5973, \"forum_id\": 14, \"post_subject\": \"Setup https on hpcc cluster without passphrase\", \"username\": \"tlhumphrey2\", \"post_text\": \"I want to setup https on an hpcc cluster using a cert that doesn't have a passphrase (the passphrase is something you would normally enter into the configmgr when setting up https). I believe there is a way to NOT use a passphrase, but I don't know how.\", \"post_time\": \"2018-10-10 14:58:35\" },\n\t{ \"post_id\": 24363, \"topic_id\": 6203, \"forum_id\": 14, \"post_subject\": \"Re: Issue with spray from first demo\", \"username\": \"ming\", \"post_text\": \"On Windows Virtualbox doesn't switch to correct host-only adapter probably is due the host-only adapter names are different for Windows and Unix (Linux and Mac). Don't know why Virtualbox make this way. It is the case even before Oracle took over it. We build VM image on Linux.\\n\\nAs HPCC 7.0.6-1 and 7.0.2-1 VMs they shouldn't have any difference on Virtualbox settings. We tested several systems and they all work. \\n\\nWhat is the problem you experienced? Still not get ip?\", \"post_time\": \"2019-01-30 15:50:34\" },\n\t{ \"post_id\": 24323, \"topic_id\": 6203, \"forum_id\": 14, \"post_subject\": \"Re: Issue with spray from first demo\", \"username\": \"cmconnelly\", \"post_text\": \"Update: I am still unable to run the 7.0.6-1 HPCC virtual image.\\n\\nBut I was able to fix the ip issue with the 7.0.2-1 image. \\n\\nI went into the network settings in windows and found the host-only network that had been set up for the virtual box. it had DHCP server as not enabled even though in my virtual box settings I had it enabled. I had to then switch the adapter settings to configure manually. Once I applied that change it fixed the settings in my windows network settings. Not sure why it hadn't switched on its own, but now it gives me an ip that I was able to connect to ECL watch with and successfully sprayed the original text file.\", \"post_time\": \"2019-01-28 16:29:01\" },\n\t{ \"post_id\": 24253, \"topic_id\": 6203, \"forum_id\": 14, \"post_subject\": \"Re: Issue with spray from first demo\", \"username\": \"cmconnelly\", \"post_text\": \"Hi Ming,\\n\\nWith the 64x 7.0.6-1, the machine will not run at all. This is the session information from the machine from the last time I tried to run it:\\n\\n\\nRuntime attributes\\n\\nScreen Resolution\\n\\n720x400 @0,0\\nVM Uptime\\n\\n0d 00:10:55\\nClipboard Mode\\n\\nBidirectional\\nDrag and Drop Mode\\n\\nDisabled\\nVT-x/AMD-V\\n\\nInactive\\nNested Paging\\n\\nInactive\\nUnrestricted Execution\\n\\nInactive\\nParavirtualization Interface\\n\\nNone\\nGuest Additions\\n\\nNot Detected\\nGuest OS Type\\n\\nUbuntu (32-bit)\\nRemote Desktop Server Port\\n\\nNot Available\\n\\n\\nNetwork statistics\\n\\nAdapter 1\\n\\nData Transmitted\\n\\n0 B\\nData Received\\n\\n0 B\\nAdapter 2\\n\\nData Transmitted\\n\\n0 B\\nData Received\\n\\n0 B\\n\\n\\nStorage statistics\\n\\nController: SATAController\\n\\nSATA Port 0\\n\\nRequests\\n\\n901 \\nData Read\\n\\n14732288 B\\nData Written\\n\\n1024 B\\n\\nI tried the 32x just to see if it would run, and it does but gives me the same issue of no ip just like the 64x 7.0.2-1.\\n\\nWhen I try ifconfig eth1 there is no ip.\\n\\nThe adapter2 has been setup as you suggest the entire time, I deleted the original one and made a new one. The new adapter ip is now 192.168.148.1 with DHCP server enabled.\\n\\nThanks,\\nChris\", \"post_time\": \"2019-01-23 14:43:09\" },\n\t{ \"post_id\": 24243, \"topic_id\": 6203, \"forum_id\": 14, \"post_subject\": \"Re: Issue with spray from first demo\", \"username\": \"ming\", \"post_text\": \"Hi Chris,\\n By default HPCC VM is configured to use network adapter 2 of the instance.\\nYou can check from the instance "Settings" -> "Network" ->"Adapter 2". Make sure it is attached to "Host-only Adapter" and "Name" field should look like "VirtualBox Host-Only Ethernet Adapter" or "VirtualBox Host-Only Ethernet Adapter #2", etc. For Host-only network configuration of Virtualbox you can check from "Host Network Manager" and pick the adapter assigned to your VM instance, for example, VirtualBox Host-Only Ethernet Adapter #2", It should have Adpater (with ip) and DHCP Server (with ip) defined and enabled.\\n\\nIf these all looks right but eth1 does get ip (sudo ifconfig eth1) you can try "sudo dhclient eth1". In HPCC VM documentation we tell user to configure "Adapter 2" to avoid a warning or user can click "change the network setting" when see the warning.\\n\\nIf the host-only network doesn't work (You may try other non-HPCC VM instance with host-only nextwork to check) you can try bridge network which will get ip from you internet provider.\\n\\nFor the earlier message about spray, do you get ip for eth1? VM 127.0.0.1 can't be reached from Windows host. 192.168.56.1 sounds like your adapter ip which is not the VM instance ip. Again "sudo ifconfig eth1" will tell.\\n\\nI run HPCC 7.0.6-1 VM on my Windows 10 pro and it is OK.\\n\\nLet me your progress and I can work with you to resolve this issue.\\n\\nThanks\", \"post_time\": \"2019-01-23 13:04:21\" },\n\t{ \"post_id\": 24193, \"topic_id\": 6203, \"forum_id\": 14, \"post_subject\": \"Re: Issue with spray from first demo\", \"username\": \"cmconnelly\", \"post_text\": \"Hi Richard,\\n\\nToday I updated my virtualbox to the newest version and downloaded and setup the newest version of the virtual image (HPCCSystemsVM-amd64-7.0.6-1.ova) in the virtualbox manager. When I try to run it the box pops up, shows the virtualbox starting picture, and then goes black and nothing runs. I let it sit there for about 30 minutes and nothing happened. I also removed the old image (7.0.2-1) and then put it back in the virtualbox manager and tried to set it up again. That one will run but still does not give me an ip address.\\n\\nI am running windows 10 [Version 10.0.17763.253]\\n\\nAny ideas?\\n\\nThank you for your help,\\nChris\", \"post_time\": \"2019-01-22 15:59:10\" },\n\t{ \"post_id\": 24183, \"topic_id\": 6203, \"forum_id\": 14, \"post_subject\": \"Re: Issue with spray from first demo\", \"username\": \"rtaylor\", \"post_text\": \"Chris,"When all else fails, punt."
\\nI would suggest that you simply download the latest VM image from the portal and start over fresh with a new VM. Once that's working, remove (delete) this troublesome VM. \\n\\nAlthough I have heard of a "missing IP" problem once before, it is very uncommon (perhaps a stray gamma ray hit your memory chip at exactly the wrong time?? ). \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-01-22 14:27:00\" },\n\t{ \"post_id\": 24163, \"topic_id\": 6203, \"forum_id\": 14, \"post_subject\": \"Re: Issue with spray from first demo\", \"username\": \"cmconnelly\", \"post_text\": \"Hi ming,\\n\\nreminder: i can upload to landing zone but cannot spray.\\nthe message from a spray is:\\n
Failed: Localhost address used in remote file name: '/var/lib/HPCCSystems/mydropzone/2of12.txt'
\\n\\nthank you for your suggestions but we can't figure out what to do. we can ping 127.0.0.1, 192.168.56.1 (this is the ip address assigned by the virtualbox DHCP server). So we believe that the local host is configured correctly.\\n\\nIt appears the dfuserver is unable to communicate with the cluster. Since there is only one machine, and we verified that the networks are working, we don't know how to go forward. \\n\\nThanks,\\nChris\", \"post_time\": \"2019-01-21 20:30:12\" },\n\t{ \"post_id\": 24103, \"topic_id\": 6203, \"forum_id\": 14, \"post_subject\": \"Re: Issue with spray from first demo\", \"username\": \"ming\", \"post_text\": \"Hi Chris,\\n\\nIf you run "ifconfig eth1" can you get ip? Sometime it is slow for some VM to get ip the first time. If still can't get ip you can try to reboot one more time. If still can't get ip let us know your environment: Virtualbox version and host OS. And what is your network. Usually HPCC VM should start even the host is offline.\\nAlso there is documentation about HPCC VM setup: "Running HPCC in a Virtual Machine"\\nMing\", \"post_time\": \"2019-01-16 14:36:56\" },\n\t{ \"post_id\": 24013, \"topic_id\": 6203, \"forum_id\": 14, \"post_subject\": \"Re: Issue with spray from first demo\", \"username\": \"cmconnelly\", \"post_text\": \"Hi Jim,\\n\\nWhen I run the VM it gives me:\\nECLWatch: http://:8010\\n\\nIs this from some sort of network issue?\\nI followed the Running HPCC in a Virtual Machine documentation for installing the\\nHPCC systems image file, i have version 7.0.2-1.\\n\\nThank you,\\nChris\", \"post_time\": \"2019-01-14 21:19:55\" },\n\t{ \"post_id\": 24003, \"topic_id\": 6203, \"forum_id\": 14, \"post_subject\": \"Re: Issue with spray from first demo\", \"username\": \"JimD\", \"post_text\": \"Chris,\\n\\nI just followed the steps you provided and the file sprayed successfully on my VM. \\n\\nThe error message says you are using localhost. I access ECL Watch on my VM using this url:\\nhttp://192.168.56.102:8010/ \\n\\nCan you try using the URL displayed when the VM starts?\\n\\nJim\", \"post_time\": \"2019-01-14 20:09:02\" },\n\t{ \"post_id\": 23993, \"topic_id\": 6203, \"forum_id\": 14, \"post_subject\": \"Re: Issue with spray from first demo\", \"username\": \"cmconnelly\", \"post_text\": \"It was the Installing & Running the HPCC System under more examples, Roxie Example: Anagram2, Spray the data file to your data refinery (Thor) cluster.\\n\\nIt gives me the error after I attempt the spray.\", \"post_time\": \"2019-01-11 15:03:15\" },\n\t{ \"post_id\": 23943, \"topic_id\": 6203, \"forum_id\": 14, \"post_subject\": \"Re: Issue with spray from first demo\", \"username\": \"gfortil\", \"post_text\": \"Please provide the name of the documentation you were following.\", \"post_time\": \"2019-01-08 16:36:04\" },\n\t{ \"post_id\": 23913, \"topic_id\": 6203, \"forum_id\": 14, \"post_subject\": \"Issue with spray from first demo\", \"username\": \"cmconnelly\", \"post_text\": \"Hi,\\n\\nI recently installed the ECL IDE and the VM environment and was trying to go through the demo to learn the system. When I had reached the step; Spray the Data File to your Data Refinery (Thor) Cluster, I got this error:\\n\\nFailed: Localhost address used in remote file name: '/var/lib/HPCCSystems/mydropzone/testfiles/2of12.txt'\\n\\nI originally tried the spray with the file in the mydropzone/ directory but had the same error so tried to create a folder and go from there and had the same result.\\n\\nAny help on what I can do from here would be greatly appreciated.\\n\\nThank you,\\nChris\", \"post_time\": \"2019-01-04 15:10:40\" },\n\t{ \"post_id\": 24293, \"topic_id\": 6273, \"forum_id\": 14, \"post_subject\": \"Re: How do i install hpcc on windows10\", \"username\": \"rtaylor\", \"post_text\": \"Shwetha,\\n\\nOnce you have the HPCC VM installed and running in VirtualBox, you don't need to do anything else in that Linux VM -- everything you need to do to USE that HPCC environment can be done in Windows. \\n\\nYou will either be using the ECL IDE (to write, save, and run ECL code in your HPCC environment) or your Web Browser (accessing your environment's ECL Watch page to upload/spray files or look at completed workunits, or ...). \\n\\nIOW, you ARE working in Windows. Only your HPCC environment has to run in Linux. You just need to use our Windows programs to interface to that HPCC environment.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-01-24 22:41:18\" },\n\t{ \"post_id\": 24283, \"topic_id\": 6273, \"forum_id\": 14, \"post_subject\": \"How do i install hpcc on windows10\", \"username\": \"Shwetha\", \"post_text\": \"I have done the installation of hpcc on virtual box. but i want it to be on my windows platform. I am just starting to understand hpcc. Please help me how do i achieve that?\", \"post_time\": \"2019-01-23 15:41:50\" },\n\t{ \"post_id\": 25193, \"topic_id\": 6553, \"forum_id\": 14, \"post_subject\": \"Re: Ambiguous Groups with 2 Thor Clusters\", \"username\": \"rtaylor\", \"post_text\": \"That's definitely JIRA report time!\", \"post_time\": \"2019-03-12 15:48:58\" },\n\t{ \"post_id\": 25173, \"topic_id\": 6553, \"forum_id\": 14, \"post_subject\": \"Re: Ambiguous Groups with 2 Thor Clusters\", \"username\": \"micevepay\", \"post_text\": \"Lastly, it seems as though just writing a dataset out if failing so the following code does not work.\\n\\nOutput(somedataset,,'~path::to::file')
\\n\\nSystem error: -1: getGroupName(): ambiguous groups mythor, mythor_2
\", \"post_time\": \"2019-03-12 15:26:43\" },\n\t{ \"post_id\": 25153, \"topic_id\": 6553, \"forum_id\": 14, \"post_subject\": \"Re: Ambiguous Groups with 2 Thor Clusters\", \"username\": \"micevepay\", \"post_text\": \"One thing to note is that if I make an in-line dataset I can write to both clusters using Output(in_line_dataset,,'~path::to::file',CLUSTER( 'mythor','mythor_2' ))
These two thor clusters do share a Dali. One of things we want to do is to have core data available locally to both clusters. Other generated data not so much.\", \"post_time\": \"2019-03-12 15:12:38\" },\n\t{ \"post_id\": 25123, \"topic_id\": 6553, \"forum_id\": 14, \"post_subject\": \"Ambiguous Groups with 2 Thor Clusters\", \"username\": \"micevepay\", \"post_text\": \"Hi,\\n\\nI have a new 2 thor cluster. I'm having an issue where when I want to write a [non-inline] dataset to both clusters I get an ambiguous groups error.\\n\\nHere is my cluster topology\\n[attachment=0:ebpc0dju]Topology.PNG\\n\\nAnd when try to run\\nOutput(somedataset,,'~path::to::file',CLUSTER( 'mythor','mythor_2' ))
\\n\\nOR\\n\\nOutput(somedataset,,'~path::to::file',CLUSTER( 'mythor' ))\\nOutput(somedataset,,'~path::to::file',CLUSTER( 'mythor_2' ))
\\n\\nI get this error\\n\\n\\tSystem error: -1: getGroupName(): ambiguous groups mythor, mythor_2
\\n\\nHow can I write a dataset to both clusters? \\n\\nI added to "Installation" group in case some configuration changes need to be made.\", \"post_time\": \"2019-03-12 14:42:15\" },\n\t{ \"post_id\": 26693, \"topic_id\": 7023, \"forum_id\": 14, \"post_subject\": \"Re: Installing Spark Plugin\", \"username\": \"rodrigo.pastrana\", \"post_text\": \"Justin, definitely open an issue regarding the sparkthor component not starting using systemcontrol.\\n\\nI'll discuss with the doc to be more explicit about the 2 components (server-side and client-side). On the server-side, the spark plugin will install an HPCC controlled instance of spark (therefore no spark install required), on the client-side, the spark-hpcc.jar has dependencies on spark provided libraries. Most users will not use the client side component directly, they'll use spark shell, or some notebook type interface. The jar is required if you plan to write a java application that exploits the hpcc-spark component features provided within. Thanks.\", \"post_time\": \"2019-05-23 18:05:04\" },\n\t{ \"post_id\": 26683, \"topic_id\": 7023, \"forum_id\": 14, \"post_subject\": \"Re: Installing Spark Plugin\", \"username\": \"rtaylor\", \"post_text\": \"jumou,\\n\\nYes, that would certainly be a bug. Please report it in JIRA (https://track.hpccsystems.com).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-05-23 13:19:44\" },\n\t{ \"post_id\": 26663, \"topic_id\": 7023, \"forum_id\": 14, \"post_subject\": \"Re: Installing Spark Plugin\", \"username\": \"jumou\", \"post_text\": \"It looks like this problem only occurs when using systemctl to start the cluster. I don't have this problem when I use the command\\n\\n/etc/init.d/hpcc-init start
\\n\\nThis may be a bug.\", \"post_time\": \"2019-05-22 19:36:52\" },\n\t{ \"post_id\": 26523, \"topic_id\": 7023, \"forum_id\": 14, \"post_subject\": \"Installing Spark Plugin\", \"username\": \"jumou\", \"post_text\": \"I'm having some trouble with Spark plugin installation. I'm not sure if I understand the documentation and need some guidance. I'm using the installation guide found here.\\n\\nI have a 3-node cluster running HPCC CE 7.2.10-1 on Ubuntu 16.04 LTS. I'm able to access ECL Watch on port 8010 and upload and spray files without issue. When I point my browser to port 8080, I get a page not found error. Further, I don't see Sparkthor listed on the ECL Watch System Servers page.\\n\\nI used the Configuration Manager to add the Sparkthor component and pushed the environment file to all machines. I've confirmed the following lines were added:\\n\\n\\n <SparkThorProcess build="_"\\n buildSet="sparkthor"\\n name="mysparkthor"\\n SPARK_EXECUTOR_CORES="1"\\n SPARK_EXECUTOR_MEMORY="1G"\\n SPARK_MASTER_PORT="7077"\\n SPARK_MASTER_WEBUI_PORT="8080"\\n SPARK_WORKER_CORES="1"\\n SPARK_WORKER_MEMORY="1G"\\n SPARK_WORKER_PORT="7071"\\n ThorClusterName="mythor">\\n <Instance computer="node001006"\\n directory="/var/lib/HPCCSystems/mysparkthor"\\n name="s1"\\n netAddress="192.168.1.6"/>\\n </SparkThorProcess>\\n
\\n\\nI've also confirmed that Java is installed on all machines with the following output from java -version:\\n\\n\\nopenjdk version "1.8.0_212"\\nOpenJDK Runtime Environment (build 1.8.0_212-8u212-b03-0ubuntu1.16.04.1-b03)\\nOpenJDK 64-Bit Server VM (build 25.212-b03, mixed mode)\\n
\\n\\nIt wasn't clear to me whether I needed to manually install Spark, so I didn't at first. After the first failed installation attempt, however, I installed Spark, but the issue persists. This page mentions the following:\\n\\nThe HPCC Systems Spark Connector requires Spark 2.10 or 2.11 and the org.hpccsystems.wsclient library available from the Maven Repository, download now.\\n\\nFind the source code and examples in the spark-hpccsystems repository\\nGet the 7.2.12-1 JAR file from Maven Central Repository or download now\\nGet the javadocs from Maven Central Repository or download now
\\n\\nIt's also not clear to me whether these files are necessary except for specific applications, so I didn't download these files (I'm not sure how to anyway). Maybe that's where the problem lies.\", \"post_time\": \"2019-05-20 13:48:53\" },\n\t{ \"post_id\": 26703, \"topic_id\": 7053, \"forum_id\": 14, \"post_subject\": \"Re: Failed to connect to server\", \"username\": \"bforeman\", \"post_text\": \"Hi Bing,\\n\\nYou can try to update your VM, but sometimes a conflict with your network adapter can cause a failure to connect. Run a diagnostic on your network adapter first, and then if that doesn't help try to refresh your network settings from the Virtual Machine.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2019-05-29 19:24:40\" },\n\t{ \"post_id\": 26593, \"topic_id\": 7053, \"forum_id\": 14, \"post_subject\": \"Failed to connect to server\", \"username\": \"BingH\", \"post_text\": \"Hello,\\n\\nI have no problem working with HPCC before. But these days failed to connect. When I run the VM, it seems everything is ok (I am using hpccdemo log in), but when I open the ECL watch, Activity shows Cluster thor(hthor/thro_roxie/roxie) not listening for workunits; thor.thor: queue active; it also shows EspProcess Error: check for myesp of type EspProcess ip:192.168.56.101:8510. The version is 6.4.24-1. Do I need to update? \\n\\nThank you!\", \"post_time\": \"2019-05-22 14:08:58\" },\n\t{ \"post_id\": 29033, \"topic_id\": 7613, \"forum_id\": 14, \"post_subject\": \"Re: Stopping & Starting HPCC V7 Cluster on Ubuntu 18.04\", \"username\": \"amillar\", \"post_text\": \"Hi Mgardner,\\n\\nThanks for getting back to me, this is really helpful and you have confirmed my suspicions that its part of a transition.\\n\\nI also came across this blog which helped : https://hpccsystems.com/blog/systemd\\n\\nI will be using systemctrl going forward to stop and start the platform and I will be keeping my eye out for new releases.\\n\\nI will definitely look at this script /opt/HPCCSystems/sbin/generate-hpccsystems-target.sh\\n\\nThis command is another handy one to know as well systemctl list-dependencies hpccsystems-platform.target`\\n\\nThanks again for your help.\\n\\nBest Regards\\n\\nAntony\", \"post_time\": \"2020-01-14 13:57:54\" },\n\t{ \"post_id\": 29023, \"topic_id\": 7613, \"forum_id\": 14, \"post_subject\": \"Re: Stopping & Starting HPCC V7 Cluster on Ubuntu 18.04\", \"username\": \"mgardner\", \"post_text\": \"Morning Amillar,\\n\\nMost systems have moved from sysV to systemd. What you're seeing is our transition along with the platforms we build HPCCSystems Platform on to that new init system. If you want to revert to the old sysV style of output. You can start/stop and check status with the hpcc-init script. But the sysV scripts will have no knowledge of the status of processes running under systemd and vice versa, so it's recommended to stick to one or the other.\\n\\nWith the new systemd, 'status' is just showing which components have been setup and systemd is controlling. If you look in the new eclwatch, you should be able to easily tell which components are properly started or if there are any warnings.\\n\\nsudo service hpcc-init -c mythor stop / Start\\n\\nis this still current on Ubuntu 18.04 & HPCC V7?
\\n\\nIt's recommended to stop using 'service' and start using 'systemctl'\\n\\n\\nsudo service mydafilesrv stop / start\\n\\nshould this now be :\\n\\nSudo systemctl start <service name>
\\n\\nYes, `sudo systemctl start roxie@myroxie.service` is the recommended way to start/stop components.\\n\\nAn extra command that you might want to play with is `systemctl list-dependencies <service>`\\n\\nWe also have a generated target that you can regenerate after deploying a new environment.xml with the /opt/HPCCSystems/sbin/generate-hpccsystems-target.sh. It will list all your declared components on that local node. You can then do `systemctl list-dependencies hpccsystems-platform.target` and see a list of all running components, which is more like the old hpcc-init status that you're used to.\", \"post_time\": \"2020-01-13 15:28:52\" },\n\t{ \"post_id\": 28753, \"topic_id\": 7613, \"forum_id\": 14, \"post_subject\": \"Stopping & Starting HPCC V7 Cluster on Ubuntu 18.04\", \"username\": \"amillar\", \"post_text\": \"Hi There,\\n\\nWe have recently upgraded one of our HPCC Clusters to Version 7.4.8-1 running on Ubtunu 18.04.\\n\\nWith this being on Ubuntu 18.04 and HPCC V7 I just want to check if there is any differences with the commands to start / stop and check the status of the cluster.\\n\\nI have always run sudo bash start-hpcc.sh status / stop / start \\n\\nwhich runs : \\n\\n#!/bin/bash\\nsudo /opt/HPCCSystems/sbin/hpcc-run.sh $1\\n\\nThis command does work but the output looks like this\\n\\n192.168.20.35 hpccsystems-platform.target status :\\nhpccsystems-platform.target\\nâ ââdafilesrv@mydafilesrv.service\\nâ ââdali@mydali.service\\nâ ââdfuserver@mydfuserver.service\\nâ ââeclagent@myeclagent.service\\nâ ââeclccserver@myeclccserver.service\\nâ ââeclscheduler@myeclscheduler.service\\nâ ââesp@myesp.service\\nâ ââroxie@myroxie.service\\nâ ââsasha@mysasha.service\\nâ ââthor@mythor.service\\n \\n192.168.20.36 hpccsystems-platform.target status :\\nhpccsystems-platform.target\\nâ ââdafilesrv@mydafilesrv.service\\nâ ââdali@mydali.service\\nâ ââdfuserver@mydfuserver.service\\nâ ââeclagent@myeclagent.service\\nâ ââeclccserver@myeclccserver.service\\nâ ââeclscheduler@myeclscheduler.service\\nâ ââesp@myesp.service\\nâ ââroxie@myroxie.service\\nâ ââsasha@mysasha.service\\nâ ââthor@mythor.service\\n \\n192.168.20.37 hpccsystems-platform.target status :\\nhpccsystems-platform.target\\nâ ââdafilesrv@mydafilesrv.service\\nâ ââdali@mydali.service\\nâ ââdfuserver@mydfuserver.service\\nâ ââeclagent@myeclagent.service\\nâ ââeclccserver@myeclccserver.service\\nâ ââeclscheduler@myeclscheduler.service\\nâ ââesp@myesp.service\\nâ ââroxie@myroxie.service\\nâ ââsasha@mysasha.service\\nâ ââthor@mythor.service\\n \\n192.168.20.39 hpccsystems-platform.target status :\\nhpccsystems-platform.target\\nâ ââdafilesrv@mydafilesrv.service\\nâ ââdali@mydali.service\\nâ ââdfuserver@mydfuserver.service\\nâ ââeclagent@myeclagent.service\\nâ ââeclccserver@myeclccserver.service\\nâ ââeclscheduler@myeclscheduler.service\\nâ ââesp@myesp.service\\nâ ââroxie@myroxie.service\\nâ ââsasha@mysasha.service\\nâ ââthor@mythor.service\\n \\n192.168.20.40 hpccsystems-platform.target status :\\nhpccsystems-platform.target\\nâ ââdafilesrv@mydafilesrv.service\\nâ ââdali@mydali.service\\nâ ââdfuserver@mydfuserver.service\\nâ ââeclagent@myeclagent.service\\nâ ââeclccserver@myeclccserver.service\\nâ ââeclscheduler@myeclscheduler.service\\nâ ââesp@myesp.service\\nâ ââroxie@myroxie.service\\nâ ââsasha@mysasha.service\\nâ ââthor@mythor.service\\n \\n192.168.20.41 hpccsystems-platform.target status :\\nhpccsystems-platform.target\\nâ ââdafilesrv@mydafilesrv.service\\nâ ââdali@mydali.service\\nâ ââdfuserver@mydfuserver.service\\nâ ââeclagent@myeclagent.service\\nâ ââeclccserver@myeclccserver.service\\nâ ââeclscheduler@myeclscheduler.service\\nâ ââesp@myesp.service\\nâ ââroxie@myroxie.service\\nâ ââsasha@mysasha.service\\nâ ââthor@mythor.service\\n \\n192.168.20.42 hpccsystems-platform.target status :\\nhpccsystems-platform.target\\nâ ââdafilesrv@mydafilesrv.service\\nâ ââdali@mydali.service\\nâ ââdfuserver@mydfuserver.service\\nâ ââeclagent@myeclagent.service\\nâ ââeclccserver@myeclccserver.service\\nâ ââeclscheduler@myeclscheduler.service\\nâ ââesp@myesp.service\\nâ ââroxie@myroxie.service\\nâ ââsasha@mysasha.service\\nâ ââthor@mythor.service\\n \\n192.168.20.43 hpccsystems-platform.target status :\\nhpccsystems-platform.target\\nâ ââdafilesrv@mydafilesrv.service\\nâ ââdali@mydali.service\\nâ ââdfuserver@mydfuserver.service\\nâ ââeclagent@myeclagent.service\\nâ ââeclccserver@myeclccserver.service\\nâ ââeclscheduler@myeclscheduler.service\\nâ ââesp@myesp.service\\nâ ââroxie@myroxie.service\\nâ ââsasha@mysasha.service\\nâ ââthor@mythor.service\\n \\n192.168.20.44 hpccsystems-platform.target status :\\nhpccsystems-platform.target\\nâ ââdafilesrv@mydafilesrv.service\\nâ ââdali@mydali.service\\nâ ââdfuserver@mydfuserver.service\\nâ ââeclagent@myeclagent.service\\nâ ââeclccserver@myeclccserver.service\\nâ ââeclscheduler@myeclscheduler.service\\nâ ââesp@myesp.service\\nâ ââroxie@myroxie.service\\nâ ââsasha@mysasha.service\\nâ ââthor@mythor.service\\n \\n192.168.20.45 hpccsystems-platform.target status :\\nhpccsystems-platform.target\\nâ ââdafilesrv@mydafilesrv.service\\nâ ââdali@mydali.service\\nâ ââdfuserver@mydfuserver.service\\nâ ââeclagent@myeclagent.service\\nâ ââeclccserver@myeclccserver.service\\nâ ââeclscheduler@myeclscheduler.service\\nâ ââesp@myesp.service\\nâ ââroxie@myroxie.service\\nâ ââsasha@mysasha.service\\nâ ââthor@mythor.service\\n \\n192.168.20.46 hpccsystems-platform.target status :\\nhpccsystems-platform.target\\nâ ââdafilesrv@mydafilesrv.service\\nâ ââdali@mydali.service\\nâ ââdfuserver@mydfuserver.service\\nâ ââeclagent@myeclagent.service\\nâ ââeclccserver@myeclccserver.service\\nâ ââeclscheduler@myeclscheduler.service\\nâ ââesp@myesp.service\\nâ ââroxie@myroxie.service\\nâ ââsasha@mysasha.service\\nâ ââthor@mythor.service\\n \\n192.168.20.47 hpccsystems-platform.target status :\\nhpccsystems-platform.target\\nâ ââdafilesrv@mydafilesrv.service\\nâ ââdali@mydali.service\\nâ ââdfuserver@mydfuserver.service\\nâ ââeclagent@myeclagent.service\\nâ ââeclccserver@myeclccserver.service\\nâ ââeclscheduler@myeclscheduler.service\\nâ ââesp@myesp.service\\nâ ââroxie@myroxie.service\\nâ ââsasha@mysasha.service\\nâ ââthor@mythor.service\\n \\n192.168.20.48 hpccsystems-platform.target status :\\nhpccsystems-platform.target\\nâ ââdafilesrv@mydafilesrv.service\\nâ ââdali@mydali.service\\nâ ââdfuserver@mydfuserver.service\\nâ ââeclagent@myeclagent.service\\nâ ââeclccserver@myeclccserver.service\\nâ ââeclscheduler@myeclscheduler.service\\nâ ââesp@myesp.service\\nâ ââroxie@myroxie.service\\nâ ââsasha@mysasha.service\\nâ ââthor@mythor.service\\n \\n192.168.20.49 hpccsystems-platform.target status :\\nhpccsystems-platform.target\\nâ ââdafilesrv@mydafilesrv.service\\nâ ââdali@mydali.service\\nâ ââdfuserver@mydfuserver.service\\nâ ââeclagent@myeclagent.service\\nâ ââeclccserver@myeclccserver.service\\nâ ââeclscheduler@myeclscheduler.service\\nâ ââesp@myesp.service\\nâ ââroxie@myroxie.service\\nâ ââsasha@mysasha.service\\nâ ââthor@mythor.service\\n \\n192.168.20.50 hpccsystems-platform.target status :\\nhpccsystems-platform.target\\nâ ââdafilesrv@mydafilesrv.service\\nâ ââdali@mydali.service\\nâ ââdfuserver@mydfuserver.service\\nâ ââeclagent@myeclagent.service\\nâ ââeclccserver@myeclccserver.service\\nâ ââeclscheduler@myeclscheduler.service\\nâ ââesp@myesp.service\\nâ ââroxie@myroxie.service\\nâ ââsasha@mysasha.service\\nâ ââthor@mythor.service\\n \\nIs this to be expected? On HPCC V6 and Ubuntu 16.04 it was nice and clear of what didn’t start.\\n\\ne.g.\\n\\n 192.168.20.126 hpcc-init status :\\nmydafilesrv ( pid 1765 ) is running ...\\n192.168.20.132 hpcc-init status :\\nmydafilesrv ( pid 1707 ) is running ...\\nmydfuserver ( pid 1836 ) is running ...\\nmyeclagent ( pid 2766 ) is running ...\\nmyeclccserver ( pid 31624 ) is running ...\\nmyesp ( pid 3065 ) is running ...\\nmysasha ( pid 3220 ) is running ...\\nmythor ( pid 7600 ) is running with 48 slave process(es) ...\\n192.168.20.133 hpcc-init status :\\nmydafilesrv ( pid 1710 ) is running ...\\nmydali ( pid 1839 ) is running ...\\nmyeclscheduler ( pid 4018 ) is running ...\\n\\n192.168.20.134 hpcc-init status :\\nmydafilesrv ( pid 1788 ) is running ...\\n\\n192.168.20.135 hpcc-init status :\\nmydafilesrv ( pid 1663 ) is running ...\\n\\n192.168.20.136 hpcc-init status :\\nmydafilesrv ( pid 1649 ) is running ...\\n\\n192.168.20.137 hpcc-init status :\\nmydafilesrv ( pid 1664 ) is running ...\\n\\n192.168.20.138 hpcc-init status :\\nmydafilesrv ( pid 1673 ) is running ...\\n\\n192.168.20.139 hpcc-init status :\\nmydafilesrv ( pid 1666 ) is running ...\\n\\n192.168.20.140 hpcc-init status :\\nmydafilesrv ( pid 1656 ) is running ...\\n\\n192.168.20.141 hpcc-init status :\\nmydafilesrv ( pid 1660 ) is running ...\\n\\nIn addition to the commands to stop and start the cluster sometimes I have to stop and start certain components.\\n\\nFor example : \\n\\nsudo service hpcc-init -c mythor stop / Start \\n\\nis this still current on Ubuntu 18.04 & HPCC V7?\\n\\nAlso sometimes I need to stop the Dali server, on Version 6 and Ubuntu 16.04 I used to run :\\n\\nsudo service mydafilesrv stop / start \\n\\nshould this now be :\\n\\nSudo systemctl start <service name> \\n\\nAre there any other commands since V7 I should be aware of?\\n\\nThanks in advance\", \"post_time\": \"2020-01-08 14:36:32\" },\n\t{ \"post_id\": 29783, \"topic_id\": 7783, \"forum_id\": 14, \"post_subject\": \"Re: Clienttools does not install on Ubuntu 18.04\", \"username\": \"ming\", \"post_text\": \"HPCC Systems Platform needs BLAS for matrix operations in Machine Learning. The BLAS package we currently build with probably requires other third party libraries, such as FORTRAN. There probably is a reason why we choose ATLAS, example, it is open-source or widely used by the community, etc.\", \"post_time\": \"2020-03-10 19:59:31\" },\n\t{ \"post_id\": 29753, \"topic_id\": 7783, \"forum_id\": 14, \"post_subject\": \"Re: Clienttools does not install on Ubuntu 18.04\", \"username\": \"JimD\", \"post_text\": \"The question of why should be a separate Jira issue. The issue I created was for documentation. \\n\\nWe encourage you to open issues for things you are interested in / concerned about. Do you have a Jira account?\\n\\nJim\", \"post_time\": \"2020-03-09 17:01:19\" },\n\t{ \"post_id\": 29743, \"topic_id\": 7783, \"forum_id\": 14, \"post_subject\": \"Re: Clienttools does not install on Ubuntu 18.04\", \"username\": \"vin\", \"post_text\": \"Thanks, Jim. But that doesn't answer my question. The question isn't how but why?\\n\\nI do not wish to install all those software packages on my machine. Moreover, I find it hard to believe that BLAS and FORTRAN are necessary to execute a CLI.\\n\\nThanks for the Jira issue. Please add this detail to the issue.\\n\\nBest,\\n-vince\", \"post_time\": \"2020-03-09 15:17:45\" },\n\t{ \"post_id\": 29733, \"topic_id\": 7783, \"forum_id\": 14, \"post_subject\": \"Re: Clienttools does not install on Ubuntu 18.04\", \"username\": \"JimD\", \"post_text\": \"After installing the package, you should run the following to update any dependencies.\\nsudo apt-get install -f
\\n\\nI am opening a Jira issue to add this information to the installation instructions for Client Tools (https://track.hpccsystems.com/browse/HPCC-23681)\", \"post_time\": \"2020-03-09 15:04:39\" },\n\t{ \"post_id\": 29703, \"topic_id\": 7783, \"forum_id\": 14, \"post_subject\": \"Clienttools does not install on Ubuntu 18.04\", \"username\": \"vin\", \"post_text\": \"There are a multitude of failed dependencies when installing clienttools on Ubuntu 18.04.\\n\\n$ sudo dpkg -i /tmp/hpccsystems-clienttools-community_7.6.30-1bionic_amd64.deb\\nSelecting previously unselected package hpccsystems-clienttools-7.6.\\n(Reading database ... 301595 files and directories currently installed.)\\nPreparing to unpack .../hpccsystems-clienttools-community_7.6.30-1bionic_amd64.deb ...\\nUnpacking hpccsystems-clienttools-7.6 (7.6.30-1) ...\\ndpkg: dependency problems prevent configuration of hpccsystems-clienttools-7.6:\\n hpccsystems-clienttools-7.6 depends on openssh-server; however:\\n Package openssh-server is not installed.\\n hpccsystems-clienttools-7.6 depends on expect; however:\\n Package expect is not installed.\\n hpccsystems-clienttools-7.6 depends on libapr1; however:\\n Package libapr1 is not installed. \\n hpccsystems-clienttools-7.6 depends on libaprutil1 (>= 1.2.7+dfsg); however:\\n Package libaprutil1 is not installed.\\n hpccsystems-clienttools-7.6 depends on libatlas3-base; however:\\n Package libatlas3-base is not installed.\\n hpccsystems-clienttools-7.6 depends on libboost-regex1.65.1; however:\\n Package libboost-regex1.65.1 is not installed.\\n hpccsystems-clienttools-7.6 depends on libmemcached11; however:\\n Package libmemcached11 is not installed.\\n hpccsystems-clienttools-7.6 depends on libmemcachedutil2; however:\\n Package libmemcachedutil2 is not installed.\\n \\ndpkg: error processing package hpccsystems-clienttools-7.6 (--install):\\n dependency problems - leaving unconfigured \\nErrors were encountered while processing:\\n hpccsystems-clienttools-7.6\\n
\\n\\nIf I understand this I have to install a lot of libraries, eg, 'libatlas3-base'. According to https://packages.debian.org/sid/libatlas3-base "ATLAS is an approach for the automatic generation and optimization of numerical software. Currently ATLAS supplies optimized versions for the complete set of linear algebra kernels known as the Basic Linear Algebra Subroutines (BLAS), and a subset of the linear algebra routines in the LAPACK library."\\n\\nFurthermore, libatlas has a whole load of dependencies itself, such as fortran.\\n\\nIt appears that I have to install BLAS and FORTRAN in order to use the CLI. Surely, I must be mistaken.\\n\\nPlease confirm or correct my (mis)understanding.\", \"post_time\": \"2020-03-06 21:08:44\" },\n\t{ \"post_id\": 29813, \"topic_id\": 7823, \"forum_id\": 14, \"post_subject\": \"Re: Virtualbox - vboxnet0 error\", \"username\": \"ming\", \"post_text\": \"You can create vboxnet0 or vboxnet1 from VirtualBox -> File -> Host Network Manager\\nMake sure "DHCP Server" is enabled\", \"post_time\": \"2020-03-13 13:25:51\" },\n\t{ \"post_id\": 29803, \"topic_id\": 7823, \"forum_id\": 14, \"post_subject\": \"Re: Virtualbox - vboxnet0 error\", \"username\": \"rtaylor\", \"post_text\": \"brianv,\\n\\nI have had this experience also when I made the mistake of upgrading to VirtualBox 6.0 from the 5.2 I had been using. I "cured" it by uninstalling and going back to the 5.2 version.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-03-13 12:45:20\" },\n\t{ \"post_id\": 29793, \"topic_id\": 7823, \"forum_id\": 14, \"post_subject\": \"Virtualbox - vboxnet0 error\", \"username\": \"brianv\", \"post_text\": \"Hello,\\n\\nI have installed Virtualbox and imported the HPCC Platform VM Image 64bit. However when I try to run it I get an error\\n\\nCould not start the machine HPCCSystemsVM-amd64-7.6.30-1 because the following physical network interfaces were not found:\\n\\nvboxnet0 (adapter 2)\\nHost\\nYou can either change the machine's network settings or stop the machine.\\n\\n
\\n\\n\\nMy OS is Windows 10. As shown on the tutorial videos, the platform settings for the Network->Adapter 2 are "Attached to: Host-only Adapter" and "Name: VirtualBox Host-Only Ethernet Adapter".\\n\\nI have tried a few solutions I found through google, but no luck. Any help would be very much appreciated.\\n\\nThank you.\", \"post_time\": \"2020-03-12 15:18:41\" },\n\t{ \"post_id\": 30773, \"topic_id\": 8123, \"forum_id\": 14, \"post_subject\": \"Re: Listing Files Details from Data Store\", \"username\": \"rtaylor\", \"post_text\": \"Artur,\\n\\nYou can use ECL code to do this, too. The Standard Library has a GetLogicalFileAttribute() function that gets all the attributes of any file. The one you're interested in is this:\\nSTD.File.GetLogicalFileAttribute(filename,'clusterName')
\\nHere's a function I wrote using that function to return all the attributes of any set of files:\\nIMPORT STD;\\nGetLogicalFileAllAttributes(SET OF STRING files) := FUNCTION\\n rec := RECORD\\n\\t\\tSTRING filename; \\n\\t\\tSTRING recordSize; \\n\\t\\tSTRING recordCount; \\n\\t\\tSTRING size; \\n\\t\\tSTRING clusterName; \\n\\t\\tSTRING directory; \\n\\t\\tSTRING numparts; \\n\\t\\tSTRING owner; \\n\\t\\tSTRING description; \\n\\t\\tSTRING partmask; \\n\\t\\tSTRING name; \\n\\t\\tSTRING modified; \\n\\t\\tSTRING protected; \\n\\t\\tSTRING format; \\n\\t\\tSTRING job; \\n\\t\\tSTRING checkSum; \\n\\t\\tSTRING kind; \\n\\t\\tSTRING csvSeparate; \\n\\t\\tSTRING csvTerminate; \\n\\t\\tSTRING csvEscape; \\n\\t\\tSTRING headerLength; \\n\\t\\tSTRING footerLength; \\n\\t\\tSTRING rowtag; \\n\\t\\tSTRING workunit; \\n\\t\\tSTRING accessed; \\n\\t\\tSTRING expireDays; \\n\\t\\tSTRING maxRecordSize; \\n\\t\\tSTRING csvQuote; \\n\\t\\tSTRING blockCompressed; \\n\\t\\tSTRING compressedSize; \\n\\t\\tSTRING fileCrc; \\n\\t\\tSTRING formatCrc;\\n\\t\\tSTRING ECL; \\n\\tEND; \\n\\n ds := DATASET(files,{STRING file});\\n\\n RetDS := NOTHOR(\\n PROJECT(ds,TRANSFORM(rec,\\n ThisFile := '~' + LEFT.file;\\n GetAttr(STRING attr) := \\n STD.File.GetLogicalFileAttribute(ThisFile,\\n attr);\\n SELF.filename := LEFT.file;\\n SELF.recordSize := GetAttr('recordSize');\\n SELF.recordCount := GetAttr('recordCount');\\n SELF.size := GetAttr('size');\\n SELF.clusterName := GetAttr('clusterName');\\n SELF.directory := GetAttr('directory');\\n SELF.numparts := GetAttr('numparts');\\n SELF.owner := GetAttr('owner');\\n SELF.description := GetAttr('description');\\n SELF.partmask := GetAttr('partmask');\\n SELF.name := GetAttr('name');\\n SELF.modified := GetAttr('modified');\\n SELF.protected := GetAttr('protected');\\n SELF.format := GetAttr('format');\\n SELF.job := GetAttr('job');\\n SELF.checkSum := GetAttr('checkSum');\\n SELF.kind := GetAttr('kind');\\n SELF.csvSeparate := GetAttr('csvSeparate');\\n SELF.csvTerminate := GetAttr('csvTerminate');\\n SELF.csvEscape := GetAttr('csvEscape');\\n SELF.headerLength := GetAttr('headerLength');\\n SELF.footerLength := GetAttr('footerLength');\\n SELF.rowtag := GetAttr('rowtag');\\n SELF.workunit := GetAttr('workunit');\\n SELF.accessed := GetAttr('accessed');\\n SELF.expireDays := GetAttr('expireDays');\\n SELF.maxRecordSize := GetAttr('maxRecordSize');\\n SELF.csvQuote := GetAttr('csvQuote');\\n SELF.blockCompressed := GetAttr('blockCompressed');\\n SELF.compressedSize := GetAttr('compressedSize');\\n SELF.fileCrc := GetAttr('fileCrc');\\n SELF.formatCrc := GetAttr('formatCrc');\\n SELF.ECL := GetAttr('ECL'))));\\n RETURN RetDS;\\nEND;\\t\\n\\nfilelist := NOTHOR(STD.File.LogicalFileList());\\n// MyFiles := filelist(owner='rtaylor' and name[1..5]='class');\\nMyFiles := filelist(Std.Str.find(name,'lookupcsz',1)<>0);\\n\\nGetLogicalFileAllAttributes(SET(MyFiles,name));
\\nI'm using NOTHOR because this is only working with the DFU's metadata so there's no need to run on Thor. I'm also using the STD.File.LogicalFileList() function to get all the filenames, then filtering that result to get just the files I'm interested in.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-05-20 12:44:08\" },\n\t{ \"post_id\": 30743, \"topic_id\": 8123, \"forum_id\": 14, \"post_subject\": \"Listing Files Details from Data Store\", \"username\": \"abaruchi\", \"post_text\": \"Hi guys,\\n\\nI need to develop a tool to keep tracking the existence of files in OS level and also in DFU. In order to list files from data store Im using the dfuplus command as following:\\n\\n\\ndfuplus action=list name=* server=http://<ecl_watch:8010> username=<myuser> password=<mypass>\\n
\\n\\nThe command executes fine, however, in my configuration I have four cluster being managed by the same ECL Watch and I would like to gather more details of these files, at least, in which cluster they are stored. Does somebody know a command or another parameter to get this kind of information?\\n\\nThanks in advance.\\n\\nAtt.\\nArtur Baruchi\", \"post_time\": \"2020-05-19 21:41:20\" },\n\t{ \"post_id\": 32323, \"topic_id\": 8133, \"forum_id\": 14, \"post_subject\": \"Re: Clienttools Installation on Windows\", \"username\": \"McPP82\", \"post_text\": \"I've checked, the new update solves the problem, thanks for your work!\", \"post_time\": \"2020-10-21 15:37:53\" },\n\t{ \"post_id\": 32313, \"topic_id\": 8133, \"forum_id\": 14, \"post_subject\": \"Re: Clienttools Installation on Windows\", \"username\": \"ming\", \"post_text\": \"We fixed the dependent library which cause the problem. Check again in latest Clienttools for example 7.8.50+ or 7.10.22+ or 7.12.0+.\", \"post_time\": \"2020-10-20 22:55:16\" },\n\t{ \"post_id\": 32253, \"topic_id\": 8133, \"forum_id\": 14, \"post_subject\": \"Re: Clienttools Installation on Windows\", \"username\": \"McPP82\", \"post_text\": \"Yes, discrepancies in the installation path of some binaries due to what Windows automatically decides to do has already caused issues... I've fortunately managed to solve them by myself, but it's still unpleasant.\", \"post_time\": \"2020-10-16 16:20:45\" },\n\t{ \"post_id\": 30883, \"topic_id\": 8133, \"forum_id\": 14, \"post_subject\": \"Re: Clienttools Installation on Windows\", \"username\": \"Gurman\", \"post_text\": \"Thank you,\\n\\nInstalling rdistributables from https://www.microsoft.com/en-us/downloa ... x?id=40784 fixed the issue.\\n\\nThe .exe at clienttools/tmp did not work as another version of that redistribution product was already installed.\", \"post_time\": \"2020-05-22 16:26:38\" },\n\t{ \"post_id\": 30873, \"topic_id\": 8133, \"forum_id\": 14, \"post_subject\": \"Re: Clienttools Installation on Windows\", \"username\": \"ming\", \"post_text\": \"I notice the missing library is MSVRC120.dll which is Visual Studio 2013 I remember. It mean the clienttools binary is old not 7.8.12-1. This probably is due to old clienttools binary still in the path but most libraries already uninstalled. For example Windows may copy certain binary to c:/Windows\", \"post_time\": \"2020-05-22 13:21:37\" },\n\t{ \"post_id\": 30863, \"topic_id\": 8133, \"forum_id\": 14, \"post_subject\": \"Re: Clienttools Installation on Windows\", \"username\": \"ming\", \"post_text\": \"Is there is file vcredist_x86 under c:/Program Files(x86)/HPCCSystems/7.8.12/clienttools/tmp? This is Visual Studio runtime library which should be automatically installed during clienttools installation. You can manually install it again to see if it fixes your problem or not. Let us know\", \"post_time\": \"2020-05-22 13:16:40\" },\n\t{ \"post_id\": 30853, \"topic_id\": 8133, \"forum_id\": 14, \"post_subject\": \"Re: Clienttools Installation on Windows\", \"username\": \"gsmith\", \"post_text\": \"Thanks for the error report - sounds like a missing dependency on our build server.\\n\\nAs a workaround you could try installing the runtimes from: https://www.microsoft.com/en-us/downloa ... x?id=40784\", \"post_time\": \"2020-05-22 12:48:35\" },\n\t{ \"post_id\": 30823, \"topic_id\": 8133, \"forum_id\": 14, \"post_subject\": \"Clienttools Installation on Windows\", \"username\": \"Gurman\", \"post_text\": \"Client tools are not running on windows after installation.\\nTried version 7.8.12.\\nGetting the following error message:\\n\\n"The code execution cannot proceed because MSVRC120.dll was not found. Reinstalling the program may fix this problem." (PFA Err.jpg)\\n\\nTried reinstalling multiple times with same issue.\", \"post_time\": \"2020-05-21 16:28:52\" },\n\t{ \"post_id\": 31343, \"topic_id\": 8203, \"forum_id\": 14, \"post_subject\": \"Re: Certificate Problem - The remote certificate is invalid\", \"username\": \"amillar\", \"post_text\": \"Hi Yanrui,\\n\\nThanks for the update, that's great, we will get this Roxie's upgraded and let you know if we have any further problems.\\n\\nBest Regards\\n\\nAntony\", \"post_time\": \"2020-07-07 12:11:57\" },\n\t{ \"post_id\": 31233, \"topic_id\": 8203, \"forum_id\": 14, \"post_subject\": \"Re: Certificate Problem - The remote certificate is invalid\", \"username\": \"yma\", \"post_text\": \"Hi Antony,\\n\\nSorry for the delay in responding to you, for some reason I didn't get a notification of your posts. \\n\\nThe version you use 6.2.4-1 doesn't support certificate chain. We added the support starting in 7.6, so all the 7.6.x (and later) builds do have the support.\\n\\nThanks,\\n\\nYanrui\", \"post_time\": \"2020-06-26 19:15:03\" },\n\t{ \"post_id\": 31203, \"topic_id\": 8203, \"forum_id\": 14, \"post_subject\": \"Re: Certificate Problem - The remote certificate is invalid\", \"username\": \"amillar\", \"post_text\": \"Hi Yanrui,\\n\\njust checking in if you had a chance to look into this issue further for us?\\n\\nDo you need anything else from me?\\n\\nLet me know when you can.\\n\\nThanks in advance.\\n\\nAntony\", \"post_time\": \"2020-06-25 16:18:25\" },\n\t{ \"post_id\": 31163, \"topic_id\": 8203, \"forum_id\": 14, \"post_subject\": \"Re: Certificate Problem - The remote certificate is invalid\", \"username\": \"amillar\", \"post_text\": \"Hi Yanrui,\\n\\nThanks for getting in touch.\\n\\nin answer to your questions.\\n\\n1. From your azure web app, are you trying to talk to esp or roxie itself? Which port are you trying to connect to?\\n\\nOur Azure app is talking to the Roxie directly, on port 8002, the Roxie is running HPCC V 6.2.4-1, on Ubuntu 14.04\\n\\n2. Is your roxie/esp running in azure also, or somewhere else?\\n\\nThe Roxie, is on premise, so the connection from Azure is whitelisted on the App service IP, and is then Nat'd through our on premise Firewall.\\n\\nWe have multiple Roxie's set-up, on premise talking to various Azure App services all of which are on premise.\\n\\nAll seems to work ok, and has been for many years, but recently it appears that the SSL / TLS connection from the App Server (which will be a server running IIS10) is having problems verifying the certificate chain.\\n\\nWhen you go direct to the Roxie URL, the computers browser / windows machine seems to validate the chain from its own certificate store.\\n\\nI did have this problem with Wordpress a while ago, and had to add a line to the apache config which pointed to the intermediate certificate.\\n\\nIs it possible to do this from config manager by using the "CA_Certificates_Path"?\\n\\nIf so, does this need to be a folder location rather than an absolute file path? \\n\\nare there any restrictions on the HPCC user accessing certain locations? \\n\\n for example I have tried: \\n\\n/var/lib/HPCCSystems/myesp and /var/lib/HPCCSystems/myesp/cachain.cer\\n\\nMy public and private certificate are in this folder /var/lib/HPCCSystems/myesp \\n\\nI have also tried putting the Cert bundles in /usr/local/share/ca-certificates\\n\\nthen running : sudo update-ca-certificates\\n\\nI see in the config manager for the public and private certificates you specify a file name only.\\n\\ncertificateFileName\\nprivateKeyFileName\\n\\nI have used various tools on the internet to verify the certificate, e.g. SSL Labs and whatsmycertchan\\n\\nand each do come back with a mismatch. as mentioned previously I have "chained" the certificate into one .cer file. e.g. Certificate - Intermediate - Root \\n\\nI have also restarted the ESP component, the whole cluster and restarted the OS to make sure these changes where getting picked up.\\n\\nIts also worth noting when running OpenSSL tests I do also get a cert validation error if I do not pass a CA file e.g.\\n\\nopenssl s_client -connect localhost:8002. (this fails with unable to get first certificate)\\n\\nbut this passes the test, when the CA file is specified \\n\\nopenssl s_client -CAfile /var/lib/HPCCSystems/myesp/certificate.cer -connect localhost:8002\\n\\nAny help would be greatly appreciated, and if you have any more questions or queries don't hesitate to ask, and I will do my best to answer.\\n\\nThanks\\n\\nAntony\", \"post_time\": \"2020-06-16 09:08:07\" },\n\t{ \"post_id\": 31153, \"topic_id\": 8203, \"forum_id\": 14, \"post_subject\": \"Re: Certificate Problem - The remote certificate is invalid\", \"username\": \"yma\", \"post_text\": \"Hi, I have a couple of questions about your post that would help us understand the issue better:\\n1. From your azure web app, are you trying to talk to esp or roxie itself? Which port are you trying to connect to?\\n2. Is your roxie/esp running in azure also, or somewhere else?\\n\\nThanks,\\n\\nYanrui\", \"post_time\": \"2020-06-15 15:12:04\" },\n\t{ \"post_id\": 31143, \"topic_id\": 8203, \"forum_id\": 14, \"post_subject\": \"Certificate Problem - The remote certificate is invalid\", \"username\": \"amillar\", \"post_text\": \"Hi there,\\n\\nAs of the last few days I am having problems talking to my Roxie over HTTPS from a web App in Azure, the error I am getting is : \\n\\nThe remote certificate is invalid according to the validation procedure.\\n\\n[WebException: The underlying connection was closed: Could not establish trust relationship for the SSL/TLS secure channel.]\\n\\nFrom what I understand the web server is unable to obtain the certificate chain. It is also worth pointing out that the certificate has been issued from GoDaddy and is not a self signed one.\\n\\nI have been using open SSL to verify the certificate and can confirm this is passing the tests, when I pass either a -CAPath or -CAfile to the certificate itself, or the general CA store.\\n\\nI have also made sure that my certificate has the intermediates “chained” inside the same certificate.\\n\\nI have even tried putting the intermediates and root certificate within the /var/lib/HPCCSystems/myesp folder.\\n\\nLooking at the config manager I see a setting for CA_Certificates_Path under HTTPS settings within the ESP Process.\\n\\nI have tried changing this to a directory, and a complete file path including the .cer file but it does not seem to have solved my issue.\\n\\nNormally in Apache I would specify the CA chain in the sites conf, but I don’t know where to do that in HPCC .\\n\\ncan you let me know how would get around this issue?\\n\\nAlso can you tell me what web server you use e.g. tomcat, Ngix etc? to help me get a better understanding.\\n\\nThanks in advance.\", \"post_time\": \"2020-06-12 15:59:50\" },\n\t{ \"post_id\": 32563, \"topic_id\": 8473, \"forum_id\": 14, \"post_subject\": \"Re: Error: Could not locate a supported version of visual st\", \"username\": \"rtaylor\", \"post_text\": \"lpezet,\\n\\nGordon just suggested to me that you might want to try re-selecting your target cluster from the droplist. Apparently the server queue names sometimes change.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-11-16 16:56:21\" },\n\t{ \"post_id\": 32553, \"topic_id\": 8473, \"forum_id\": 14, \"post_subject\": \"Re: Error: Could not locate a supported version of visual st\", \"username\": \"lpezet\", \"post_text\": \"Hi Richard!\\n\\nAs I mentioned, I didn't touch anything else besides ECL code. I know the "L" stands for local, but my target was still "thor" in that dropdown in the IDE.\\n\\nI went ahead and looked into installing more VisualStudio stuff (didn't install anything though), but I don't understand why I should be doing that since, like I said, it worked before.\\nI started up my computer yesterday and it's back working again.\\n\\nCouple things:\\n\\n1) I'll try but I think I can replicate the behavior in the IDE by messing up with the eclcc.exe. You can then see what I mean by this "L"ocal tab in the IDE even though the Target is set to "thor" in the IDE. If there's a problem at the eclcc.exe level, this is how the IDE responds to it.\\n\\n2) I wish I could get something more verbose from eclcc.exe to help with "Could not locate a supported version of visual studio." the next time it happens (happened to me on 2 different computers already). Would you have anything for this?\\n\\nThanks Richard!\", \"post_time\": \"2020-11-16 16:42:48\" },\n\t{ \"post_id\": 32543, \"topic_id\": 8473, \"forum_id\": 14, \"post_subject\": \"Re: Error: Could not locate a supported version of visual st\", \"username\": \"rtaylor\", \"post_text\": \"lpezet,\\n\\nThe fact that the workunit starts with "L" indicates that your target is set to "Local" and that is the reason the compiler is looking for Visual Studio. The generated C++ code needs a standard C++ compiler, and when you target Thor/hThor/ROXIE, that C++ compiler is in your environment's infrastructure. However, when your target is "local" you're specifying that you want to create an EXE to run on your local machine, and that's why it's looking for Visual Studio to compile the generated C++ code.\\n\\nSo, the real question is, what is your intention? If you want to run on your environment and your Target has somehow gotten switched to Local, then reset your target to Thor/hThor/ROXIE. If you don't see those listed as targets, then reboot the ECL IDE and try resetting the target then.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-11-16 14:18:16\" },\n\t{ \"post_id\": 32533, \"topic_id\": 8473, \"forum_id\": 14, \"post_subject\": \"Error: Could not locate a supported version of visual studio\", \"username\": \"lpezet\", \"post_text\": \"I just did a fresh install of HPCC Systems ECL IDE version 7.12.4-1 on Windows 10.\\nI was cruising along, fixing my crazy MARCOs when all of a sudden compiling (through the IDE) ECL code was returning an error (in the IDE, it looks like a local execution, like "L-20201113-...." for the name of the new tab).\\nI went into the clientools to execute eclcc myself and sure enough it gave me an error:\\n\\nC:\\\\>eclcc Test.ecl\\nFailed to compile a.out\\na.out(0,0): error C3000: Compile/Link failed for a.out (see '\\\\\\\\192.168.56.1\\\\c$\\\\eclcc.log' for details)\\n\\n---------- compiler output --------------\\nError: Could not locate a supported version of visual studio.\\n\\n--------- end compiler output -----------\\n1 error, 0 warning\\n
\\nI attached the eclcc.log file, if that helps.\\n\\nTo be clear, I was really just doing ECL stuff, it was working just fine and then all of a sudden it stopped working.\\nI haven't uninstalled anything re. Visual Studio, or did anything else at the time (e.g. I didn't reboot or anything).\\nAfter this happened, I tried to reboot, uninstall/reinstall ECL IDE, but I'm still getting the same "Could not locate a supported version of visual studio." error.\\n\\nWhy is this happening all of a sudden? How do I fix this?\\n\\nThanks for the help!\", \"post_time\": \"2020-11-14 03:23:09\" },\n\t{ \"post_id\": 33603, \"topic_id\": 8823, \"forum_id\": 14, \"post_subject\": \"Re: File Spray from Thor to Roxie - Access Denied\", \"username\": \"abaruchi\", \"post_text\": \"Hi,\\n\\nThanks for your replies. Looks like, after adding the ESP server in White List, the error disappeared. I still not able to copy the index, but I think it is a different error now. I'm investigating the problem right now, however, I really appreciate your help.\\n\\nRegards,\\nArtur\", \"post_time\": \"2021-05-27 13:53:28\" },\n\t{ \"post_id\": 33593, \"topic_id\": 8823, \"forum_id\": 14, \"post_subject\": \"Re: File Spray from Thor to Roxie - Access Denied\", \"username\": \"jsmith\", \"post_text\": \"Hi,\\n\\nthis is being caused by Dali's AllowList mechanism, which prevents unknown/unauthorized clients from accessing Dali meta data (this feature was added in version 7.4).\\n\\nBy default only clients within the same environment are permitted (they are implicitly added to the AllowList).\\n\\nHowever you can add other clients to the AllowList, or disable the feature.\\nTo disable the feature, you would need to add:\\n<AllowList enabled="false"/>
\\n\\nas a property under DaliServerProcess in the environment.xml\\n\\nThere is more info in the HPCC Systems® Administrator's Guide, under the section 'The AllowList in Dali'.\\n\\nHope that helps.\", \"post_time\": \"2021-05-26 08:31:36\" },\n\t{ \"post_id\": 33583, \"topic_id\": 8823, \"forum_id\": 14, \"post_subject\": \"Re: File Spray from Thor to Roxie - Access Denied\", \"username\": \"ghalliday\", \"post_text\": \"In recent versions of HPCC access to dali is protected with a whitelist. You will need to add an exception for the azure cluster to the whitelist.\\n\\nSee https://track.hpccsystems.com/browse/HPCC-22355 and linked issues for more details.\\n\\nIn future versions (probably 8.4) the need to directly connect to dali will be removed and the remote copying will be routed through esp.\", \"post_time\": \"2021-05-26 08:25:11\" },\n\t{ \"post_id\": 33563, \"topic_id\": 8823, \"forum_id\": 14, \"post_subject\": \"File Spray from Thor to Roxie - Access Denied\", \"username\": \"abaruchi\", \"post_text\": \"Hi Guys,\\n\\nI'm installing a roxie cluster into Azure (installation using VMs) and I'm running a Thor cluster where I built the necessary keys to my queries. I want to be able to have several Thor clusters where I can copy files to this Roxie running in Azure. However, when I try to perform a remote copy, I'm facing an error, saying that my ESP Server (client) doesn't have permission to copy from Thor's dali server (attached the error into this post).\\n\\nProbably I'm missing a very small detail for this problem, however, I've tried several solutions and didn't work. What I've tried:\\n- Added the thor node into hardware list of my roxie cluster\\n- Added authentication (same user) in thor and roxie ECLWatch/ESP \\n- Disabled UDP Multicast (Azure doesn't support Multicast - I though this could be impacting the communication between nodes, but It was a shot in the dark).\\n\\nThanks for your help,\\n\\nArtur Baruchi\", \"post_time\": \"2021-05-24 18:16:43\" },\n\t{ \"post_id\": 34293, \"topic_id\": 8932, \"forum_id\": 14, \"post_subject\": \"Re: Thor Slave won't start\", \"username\": \"rtaylor\", \"post_text\": \"lpezet,\\n\\nYou should be able to simply click on that link and get to the ticket. JIRA will ask you to login, so if you do not yet have a JIRA account you can just sign up to get one. Remember, this is Open Source so the JIRA tickets are visible to everybody.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2021-10-27 13:09:14\" },\n\t{ \"post_id\": 34273, \"topic_id\": 8932, \"forum_id\": 14, \"post_subject\": \"Re: Thor Slave won't start\", \"username\": \"lpezet\", \"post_text\": \"Could I get access to that JIRA ticket?\\nhttps://track.hpccsystems.com/browse/HPCC-26258\\n\\nI'm at it again trying to run version 8.4 on Ubuntu 20.04 LTS and I'm still having issues with that new "systemctl" way of things. I'd like to check on that ticket if there's anything I could try to make it work.\\n\\nThanks!\", \"post_time\": \"2021-10-26 17:14:56\" },\n\t{ \"post_id\": 33853, \"topic_id\": 8932, \"forum_id\": 14, \"post_subject\": \"Re: Thor Slave won't start\", \"username\": \"lpezet\", \"post_text\": \"Hello!\\n\\nI would say thorslaves-exec.sh runs fine, and it's something with /opt/HPCCSystems/bin/thorslave_lcr. When I try to run manually /opt/HPCCSystems/bin/thorslave_lcr I can't get much from it (besides its usage if I don't pass the right parameters): exit code is always 0 and no std/error output.\", \"post_time\": \"2021-07-26 06:28:34\" },\n\t{ \"post_id\": 33843, \"topic_id\": 8932, \"forum_id\": 14, \"post_subject\": \"Re: Thor Slave won't start\", \"username\": \"mgardner\", \"post_text\": \"Ipezet, thanks for bringing this up. I've opened a Jira ticket and I'll be investigating the issue. https://track.hpccsystems.com/browse/HPCC-26258\\n\\nReading the info you provided, it looks like there isn't actually an issue with the ssh call going through, the error is in the thorslaves-exec.sh script?\", \"post_time\": \"2021-07-23 22:42:31\" },\n\t{ \"post_id\": 33833, \"topic_id\": 8932, \"forum_id\": 14, \"post_subject\": \"Re: Thor Slave won't start\", \"username\": \"lpezet\", \"post_text\": \"I've now gone down all the way to HPCCSystems 7.8 on Ubuntu 20.04 and still getting the same behavior WHEN USING systemctl (as mentioned in the doc: https://cdn.hpccsystems.com/releases/CE ... .2.2-1.pdf).\\nNow I went back to HPCCSystems 8.2/Ubuntu 20.04, but this time using the old school /etc/init.d/hpcc-init start and it worked!\\nHere are the processes I get for hpcc user:\\n\\nhpcc 25704 0.0 0.0 9672 4372 pts/0 S 19:06 0:00 /bin/bash /opt/HPCCSystems/bin/init_dafilesrv\\nhpcc 25743 0.0 0.1 138536 16424 pts/0 Sl 19:06 0:00 dafilesrv -L /var/log/HPCCSystems -I mydafilesrv\\nhpcc 25887 0.0 0.0 9672 4388 pts/0 S 19:06 0:00 /bin/bash /opt/HPCCSystems/bin/init_dali\\nhpcc 25924 0.0 0.2 764380 47800 pts/0 Sl 19:06 0:00 daserver\\nhpcc 26085 0.0 0.0 9672 4392 pts/0 S 19:06 0:00 /bin/bash /opt/HPCCSystems/bin/init_dfuserver\\nhpcc 26122 0.0 0.1 604864 24360 pts/0 Sl 19:06 0:00 dfuserver\\nhpcc 26283 0.0 0.0 9672 4464 pts/0 S 19:06 0:00 /bin/bash /opt/HPCCSystems/bin/init_eclagent\\nhpcc 26323 0.0 0.0 421156 14400 pts/0 Sl 19:06 0:00 agentexec\\nhpcc 26472 0.0 0.0 9672 4432 pts/0 S 19:06 0:00 /bin/bash /opt/HPCCSystems/bin/init_eclccserver\\nhpcc 26509 0.0 0.0 576856 14688 pts/0 Sl 19:06 0:00 eclccserver\\nhpcc 26674 0.0 0.0 9672 4444 pts/0 S 19:06 0:00 /bin/bash /opt/HPCCSystems/bin/init_eclscheduler\\nhpcc 26711 0.0 0.0 601496 14356 pts/0 Sl 19:06 0:00 eclscheduler\\nhpcc 26866 0.0 0.0 9672 4228 pts/0 S 19:06 0:00 /bin/bash /opt/HPCCSystems/bin/init_esp\\nhpcc 26903 0.0 0.3 756612 57512 pts/0 Sl 19:06 0:00 esp snmpid=26866\\nhpcc 27392 0.0 0.0 9672 4292 pts/0 S 19:06 0:00 /bin/bash /opt/HPCCSystems/bin/init_roxie\\nhpcc 27434 0.0 0.2 1771348 44128 pts/0 Sl 19:06 0:00 roxie --topology=RoxieTopology.xml --logfile --restarts=0 --stdlog=0\\nhpcc 27608 0.0 0.0 9672 4340 pts/0 S 19:06 0:00 /bin/bash /opt/HPCCSystems/bin/init_sasha\\nhpcc 27645 0.0 0.0 617888 14940 pts/0 Sl 19:06 0:00 saserver\\nhpcc 27807 0.0 0.0 9672 4396 pts/0 S 19:06 0:00 /bin/bash /opt/HPCCSystems/bin/init_thor\\nhpcc 27952 0.0 0.1 8577428 27952 pts/0 Sl 19:06 0:00 ./thorslave_mythor --master=172.32.5.233:20000 --slave=.:20100 --slavenum=1 --slaveprocessnum=0 --logDir=/var/log/HPCCSystems/mythor\\nhpcc 27957 0.0 0.1 4701252 28336 pts/0 Sl 19:06 0:00 /var/lib/HPCCSystems/mythor/thormaster_mythor --master=172.32.5.233:20000\\nhpcc 28135 0.0 0.0 9672 4364 pts/0 S 19:06 0:00 /bin/bash /opt/HPCCSystems/bin/init_toposerver\\nhpcc 28172 0.0 0.0 87080 8876 pts/0 Sl 19:06 0:00 toposerver\\n
\\n\\nPreflight/certification is all good to.\\nWhy, oh why?\", \"post_time\": \"2021-07-23 19:14:01\" },\n\t{ \"post_id\": 33832, \"topic_id\": 8932, \"forum_id\": 14, \"post_subject\": \"Thor Slave won't start\", \"username\": \"lpezet\", \"post_text\": \"Hello!\\n\\nI've been trying to install new version of HPCCSystems Platform on Ubuntu 20.x but I'm facing an issue where the Thor Slave just won't start.\\nI tried HPCCSystems Platform 8.2, 8.0 and 7.12 (latest for each) on Ubuntu 20.04 and 20.10 but I get the same behavior.\\nEvery time I simply do:\\n\\ndpkg -i hpccsystems-platform....\\napt install -f\\nsystemctl start hpccsystems-platform.service\\n
\\nWhen I run preflight certification, it shows Thor Slave is not ready.\\nDoing simple ps auxwww | grep hpcc I get the following:\\n\\nhpcc 50730 0.0 0.0 130820 6532 ? Ssl 16:20 0:00 /opt/HPCCSystems/bin/dafilesrv -L /var/log/HPCCSystems -I mydafilesrv -D\\nhpcc 50745 0.0 0.0 577052 9004 ? Ssl 16:20 0:00 /opt/HPCCSystems/bin/eclccserver --daemon myeclccserver\\nhpcc 50748 0.0 0.0 355816 8732 ? Ssl 16:20 0:00 /opt/HPCCSystems/bin/agentexec --daemon myeclagent\\nhpcc 50754 0.0 0.2 2108300 38356 ? Ssl 16:20 0:00 /opt/HPCCSystems/bin/daserver --daemon mydali\\nhpcc 50755 0.0 0.1 540264 19944 ? Ssl 16:20 0:00 /opt/HPCCSystems/bin/dfuserver --daemon mydfuserver\\nhpcc 50757 0.0 0.2 2279816 39692 ? Ssl 16:20 0:00 /opt/HPCCSystems/bin/roxie --topology=RoxieTopology.xml --logfile --restarts=2 --stdlog=0 --daemon myroxie\\nhpcc 50760 0.0 0.0 536128 8444 ? Ssl 16:20 0:00 /opt/HPCCSystems/bin/eclscheduler --daemon myeclscheduler\\nhpcc 50769 0.0 0.0 86972 3352 ? Ssl 16:20 0:00 /opt/HPCCSystems/bin/toposerver --daemon mytoposerver\\nhpcc 50775 0.0 0.2 993604 46228 ? Ssl 16:20 0:00 /opt/HPCCSystems/bin/esp --daemon myesp\\nhpcc 51046 0.0 0.1 4292344 22896 ? Ssl 16:20 0:00 /opt/HPCCSystems/bin/thormaster_lcr --daemon mythor MASTER=172.32.5.210:20000\\n
\\n\\nThe content of the /var/log/HPCCSystems/mythor/thorslaves-launch.debug is like this:\\n\\n+ [[ -z mythor ]]\\n+ [[ -z start ]]\\n++ pwd\\n+ cwd=/var/lib/HPCCSystems/mythor\\n+ [[ /var/lib/HPCCSystems/mythor != \\\\/\\\\v\\\\a\\\\r\\\\/\\\\l\\\\i\\\\b\\\\/\\\\H\\\\P\\\\C\\\\C\\\\S\\\\y\\\\s\\\\t\\\\e\\\\m\\\\s\\\\/\\\\m\\\\y\\\\t\\\\h\\\\o\\\\r ]]\\n+ source mythor.cfg\\n++ PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin:/opt/HPCCSystems/bin:/opt/HPCCSystems/sbin:/var/lib/HPCCSystems/mythor\\n++ THORNAME=mythor\\n++ THORMASTER=172.32.5.210\\n++ THORMASTERPORT=20000\\n++ THORSLAVEPORT=20100\\n++ localthorportinc=20\\n++ slavespernode=1\\n++ channelsperslave=1\\n++ DALISERVER=172.32.5.210:7070\\n++ localthor=true\\n++ breakoutlimit=3600\\n++ refreshrate=3\\n++ autoSwapNode=false\\n++ SSHidentityfile=/home/hpcc/.ssh/id_rsa\\n++ SSHusername=hpcc\\n++ SSHpassword=\\n++ SSHtimeout=0\\n++ SSHretries=3\\n++ SSHsudomount=\\n+ slaveIps=($(/opt/HPCCSystems/bin/daliadmin server=$DALISERVER clusternodes ${THORNAME} slaves timeout=2 1>/dev/null 2>&1; uniq slaves))\\n++ /opt/HPCCSystems/bin/daliadmin server=172.32.5.210:7070 clusternodes mythor slaves timeout=2\\n++ uniq slaves\\n+ [[ -z 172.32.5.210 ]]\\n+ [[ -z 172.32.5.210 ]]\\n+ numOfNodes=1\\n+ (( i=0 ))\\n+ (( i<1 ))\\n+ (( c=0 ))\\n+ (( c<1 ))\\n+ __slavePort=20100\\n+ __slaveNum=1\\n+ ssh -o LogLevel=QUIET -o StrictHostKeyChecking=no -o BatchMode=yes -i /home/hpcc/.ssh/id_rsa hpcc@172.32.5.210 '/bin/bash -c '\\\\''/opt/HPCCSystems/sbin/thorslaves-exec.sh start thorslave_mythor_1 20100 1 mythor 172.32.5.210 20000'\\\\'''\\n(...)\\n+ exit 0\\n
\\n(had to remove some lines from it to be able to submit this post).\\n\\nI can run manually the ssh command from above, or even directly the thorslaves-exec.sh (with all the right values) but nothing shows up (no errors, no output). I ran the command that thorslaves-exec.sh runs, systemctl start thorslave@thorslave_mythor_1.service, and here is its status:\\n● thorslave@thorslave_mythor_1.service - thorslave_mythor_1\\n Loaded: loaded (/etc/systemd/system/thorslave@.service; static)\\n Active: failed (Result: exit-code) since Fri 2021-07-23 16:30:48 UTC; 10min ago\\n Process: 53104 ExecStart=/opt/HPCCSystems/bin/thorslave_lcr --daemon thorslave_mythor_1 master=${THORMASTER}:${THORMASTERPORT} slave=.:${SLAVEPORT} slavenum=${SLAVENUM} logDir=/var/log/HPCCSystems/${THORNAME} (code=exited, status=1/FAILURE)\\n Main PID: 53104 (code=exited, status=1/FAILURE)\\n\\nJul 23 16:30:48 ip-172-32-5-210 systemd[1]: Started thorslave_mythor_1.\\nJul 23 16:30:48 ip-172-32-5-210 systemd[1]: thorslave@thorslave_mythor_1.service: Main process exited, code=exited, status=1/FAILURE\\nJul 23 16:30:48 ip-172-32-5-210 systemd[1]: thorslave@thorslave_mythor_1.service: Failed with result 'exit-code'.
\\n\\nAny idea why the slave would not start? Any idea how I could get more logs here to understand what's going on?\\n\\nThanks!\", \"post_time\": \"2021-07-23 16:56:19\" },\n\t{ \"post_id\": 33901, \"topic_id\": 8942, \"forum_id\": 14, \"post_subject\": \"Re: Dfuplus: problem with srcIP?\", \"username\": \"JimD\", \"post_text\": \"Thanks for calling this to our attention. I will get more details and update the latest DFUPlus documentation. \\n\\nI opened a Jira:\\nhttps://track.hpccsystems.com/browse/HPCC-26298\\n\\nJim\", \"post_time\": \"2021-07-30 18:26:11\" },\n\t{ \"post_id\": 33882, \"topic_id\": 8942, \"forum_id\": 14, \"post_subject\": \"Re: Dfuplus: problem with srcIP?\", \"username\": \"lpezet\", \"post_text\": \"Figured it out.\\nIf I add srcplane=mydropzone to the dfuplus command for 8.2, it works:\\n\\n$ /usr/bin/dfuplus action=spray srcfile=/var/lib/HPCCSystems/mydropzone/something.csv format=csv dstname=test::something dstcluster=mythor server=127.0.0.1 srcplane=mydropzone\\n\\nVariable spraying from /var/lib/HPCCSystems/mydropzone/something.csv on mydropzone to test::something\\nSubmitted WUID D20210727-113924\\n0% Done\\nD20210727-113924 Finished\\nTotal time taken 0 secs\\n
\\nI saw that new option in the dfuplus usage and on github but not in the doc (https://cdn.hpccsystems.com/releases/CE ... .2.4-1.pdf).\\nHad to check some tests to figure out what to set it to (https://github.com/hpcc-systems/HPCC-Pl ... t.ecl#L164).\", \"post_time\": \"2021-07-27 15:58:38\" },\n\t{ \"post_id\": 33872, \"topic_id\": 8942, \"forum_id\": 14, \"post_subject\": \"Re: Dfuplus: problem with srcIP?\", \"username\": \"lpezet\", \"post_text\": \"I might be confused between srcip and server. I think I always omitted srcip (since I always run dfuplus on the instance itself).\\nRegardless, I went ground zero to test things on equal footing.\\nI downloaded HPCCSystems 8.2.0-2 VM (VirtualBox) and HPCCSystems 7.10.70-1 VM (VirtualBox).\\nI simply run the VMs one at a time when performing my tests here (I run one, perform the test, shut it down, and run the other one). I am NOT changing any settings (no configmgr, no editing OS files, nada, nichts, rien).\\nI ran the same exact command on both VMs.\\nWith 7.10, I get the following:\\n\\n$ /usr/bin/dfuplus action=spray srcfile=/var/lib/HPCCSystems/mydropzone/something.csv format=csv dstname=test::something dstcluster=mythor server=127.0.0.1\\n\\nsrcip not specified - assuming spray from local machine\\nChecking for local Dali File Server on port 7100\\n\\nVariable spraying from /var/lib/HPCCSystems/mydropzone/something.csv on 192.168.56.101:7100 to test::something\\nSubmitted WUID D20210727-110739\\nD20210727-110739 status: queued\\nD20210727-110739 Finished\\nTotal time taken 0 secs\\n
\\nWith 8.2:\\n\\n$ /usr/bin/dfuplus action=spray srcfile=/var/lib/HPCCSystems/mydropzone/something.csv format=csv dstname=test::something dstcluster=mythor server=127.0.0.1\\n\\nsrcip not specified - assuming spray from local machine\\nChecking for local Dali File Server on port 7100\\n\\nVariable spraying from /var/lib/HPCCSystems/mydropzone/something.csv on http://127.0.0.1:8010/FileSpray (3) to test::something\\n[20052: 2021-07-27 15:21:40 GMT: SprayVariable to test::something: cannot resolve source network IP from http://127.0.0.1:8010/FileSpray (3).] \\n
\\nSo, what am I doing wrong?\\n\\nThanks for the help!\", \"post_time\": \"2021-07-27 15:24:13\" },\n\t{ \"post_id\": 33862, \"topic_id\": 8942, \"forum_id\": 14, \"post_subject\": \"Dfuplus: problem with srcIP?\", \"username\": \"lpezet\", \"post_text\": \"Hello!\\n\\nI'm trying to use dfuplus from the command line to spray (import) from CSV files.\\nI'm guessing I'm missing some configuration/options here because it doesn't seem to stick to the srcIP I'm passing (with some exceptions) and tries to...resolve?/do something with those IP during the process.\\nMy setup is:\\nsingle node, \\nHPCC Systems 8.2.0-2,\\nDFU Version: 7 community_8.2.0-2 (exact output of /usr/bin/dfuplus --version),\\nUbuntu 20.10,\\nI run /usr/bin/dfuplus on the instance itself.\\n
\\n\\n\\nI said some exceptions because when I use localhost or 127.0.0.1, it uses 127.0.0.1 just fine:\\n\\n$ /usr/bin/dfuplus action=spray srcip=localhost srcfile=/var/lib/HPCCSystems/mydropzone/something.csv format=csv dstname=some::thing dstcluster=mythor\\nChecking for local Dali File Server on port 7100\\n\\nVariable spraying from /var/lib/HPCCSystems/mydropzone/something.csv on 127.0.0.1:7100 to some::thing\\nSubmitted WUID D20210726-211856\\nD20210726-211856 status: queued\\nFailed: No Drop Zone on '127.0.0.1' configured at '/var/lib/HPCCSystems/mydropzone/something.csv'.\\n
\\n\\n$ /usr/bin/dfuplus action=spray srcip=127.0.0.1 srcfile=/var/lib/HPCCSystems/mydropzone/something.csv format=csv dstname=some::thing dstcluster=mythor\\nVariable spraying from /var/lib/HPCCSystems/mydropzone/archwayha-cms/lds/2019/extracted/dme_claimsk_lds_5_2019q1.csv on 127.0.0.1:7100 to archwayha-cms::lds::2019::dme::q1\\nSubmitted WUID D20210726-204329\\nD20210726-204329 status: queued\\nFailed: No Drop Zone on '127.0.0.1' configured at '/var/lib/HPCCSystems/mydropzone/something.csv'.\\n
\\n\\nSo I understand my Drop Zone is not "on" 127.0.0.1, and it expects the private IP here (please correct me if I'm wrong...and where is this defined exactly? I don't see it in my /etc/HPCCSystems/environment.xml).\\n\\nBut when I use my private IP I get the following:\\n\\n$ /usr/bin/dfuplus action=spray srcip=172.32.5.210 srcfile=/var/lib/HPCCSystems/mydropzone/something.csv format=csv dstname=some::thing dstcluster=mythor\\nChecking for local Dali File Server on port 7100\\n\\nVariable spraying from /var/lib/HPCCSystems/mydropzone/something.csv on�V to some::thing\\n[20052: 2021-07-26 20:49:40 GMT: SprayVariable to some::thing: cannot resolve source network IP from ÅV.]\\n
\\nI'm not sure what's going on (I don't get any logs when doing this), but I wonder if it has something to do about that IP address somehow garbled here into this mess of either �V or ÅV (and it changes every time I run that command).\\nAny idea what I'm missing or did wrong?\\nAny idea how I can troubleshoot further?\\n\\n\\nBy the way, I tried with my IPv6 address (just to see what would happen), but I get the same result as with localhost/127.0.0.1 (which might be normal then):\\n\\n$ /usr/bin/dfuplus action=spray srcip=fe80::106f:6cff:fec2:2c7f srcfile=/var/lib/HPCCSystems/mydropzone/something.csv format=csv dstname=some::thing dstcluster=mythor\\nChecking for local Dali File Server on port 7100\\n\\nVariable spraying from /var/lib/HPCCSystems/mydropzone/something.csv on fe80::106f:6cff:fec2:2c7f to some::thing\\nSubmitted WUID D20210726-205038\\nD20210726-205038 status: queued\\nFailed: No Drop Zone on 'fe80::106f:6cff:fec2:2c7f' configured at '/var/lib/HPCCSystems/mydropzone/something.csv'.\\n
\\n\\nWhen I use the Spray feature in ECL Watch it works just fine, but I would like to do it from the command line (need to script the spraying of a bunch of files).\\n\\nThanks!\", \"post_time\": \"2021-07-26 21:21:01\" },\n\t{ \"post_id\": 34323, \"topic_id\": 9053, \"forum_id\": 14, \"post_subject\": \"Re: HPCC Platform 8.2.26 on Ubuntu 20.04 LTS\", \"username\": \"mgardner\", \"post_text\": \"Thanks for you Feedback and the Jiras that you created. The following thorslave, dafilesrv and dali issues have been resolved and will likely be included in 8.4.10-1.\\n\\nhttps://track.hpccsystems.com/browse/HPCC-26750\\nhttps://track.hpccsystems.com/browse/HPCC-26757\\nhttps://track.hpccsystems.com/browse/HPCC-26761\", \"post_time\": \"2021-11-02 13:51:46\" },\n\t{ \"post_id\": 34313, \"topic_id\": 9053, \"forum_id\": 14, \"post_subject\": \"Re: HPCC Platform 8.2.26 on Ubuntu 20.04 LTS\", \"username\": \"lpezet\", \"post_text\": \"Here's another clue.\\nThe Dali service starts daserver as a daemon.\\nNow if I start it manually, it works just fine, like so:\\n/opt/HPCCSystems/bin/daserver
\\nI can telnet to its port and everythins is peachy.\\nWhen I run it as a daemon, manually, nothing happens really (I do expect it to return right away, but no logs, nothing):\\n/opt/HPCCSystems/bin/daserver --daemon mydali
\\n(exit code for that command is 0 though)\\nAnd nothing listening on any of the expected ports.\\n\\nAny idea what I can do now to make it work in daemon mode?\", \"post_time\": \"2021-10-27 16:25:52\" },\n\t{ \"post_id\": 34303, \"topic_id\": 9053, \"forum_id\": 14, \"post_subject\": \"Re: HPCC Platform 8.2.26 on Ubuntu 20.04 LTS\", \"username\": \"lpezet\", \"post_text\": \"I tried something else. Just to be clear, I'm not attached to any specific version of HPCC Platform here, I just want a latest-ish version working.\\nI downloaded the source code, checked out the community_8.2.26-1 tag and built it all on Ubuntu 20.04.2 LTS successfully.\\nI then installed the package and ran\\nsystemctl start hpccsystems-platform.target
\\nBut the Dali server keeps crashing like before:\\nOct 27 15:29:42 ip-192-168-247-8 systemd[1]: dali@mydali.service: Scheduled restart job, restart counter is at 92.\\nOct 27 15:29:42 ip-192-168-247-8 systemd[1]: Stopped mydali.\\nOct 27 15:29:42 ip-192-168-247-8 systemd[1]: Starting generating configuration files in mydali instance directory...\\nOct 27 15:29:42 ip-192-168-247-8 check-component-exists.sh[57894]: Found component: mydali\\nOct 27 15:29:42 ip-192-168-247-8 systemd[1]: hpcc-conf@mydali.service: Succeeded.\\nOct 27 15:29:42 ip-192-168-247-8 systemd[1]: Finished generating configuration files in mydali instance directory.\\nOct 27 15:29:42 ip-192-168-247-8 systemd[1]: Starting mydali...\\nOct 27 15:29:42 ip-192-168-247-8 systemd[1]: Started mydali.\\nOct 27 15:29:42 ip-192-168-247-8 systemd[1]: dali@mydali.service: Main process exited, code=exited, status=1/FAILURE\\nOct 27 15:29:42 ip-192-168-247-8 systemd[1]: dali@mydali.service: Failed with result 'exit-code'.
\\n\\nIs it possible to get more logs out of that daserver program?\\n\\nThanks!\", \"post_time\": \"2021-10-27 15:31:47\" },\n\t{ \"post_id\": 34283, \"topic_id\": 9053, \"forum_id\": 14, \"post_subject\": \"HPCC Platform 8.2.26 on Ubuntu 20.04 LTS\", \"username\": \"lpezet\", \"post_text\": \"Hello!\\n\\nI'm trying to install HPCC Platform 8.2.26 on Ubuntu 20.04 LTS (in AWS).\\nI literally do this:\\n\\nwget https://d2wulyp08c6njk.cloudfront.net/releases/CE-Candidate-8.2.26/bin/platform/hpccsystems-platform-community_8.2.26-1focal_amd64.deb\\ndpkg -i hpccsystems-platform-community_8.2.26-1focal_amd64.deb\\napt install -f\\nsystemctl start hpccsystems-platform.service\\n
\\nAnd it's a mess!\\nDali starts but seems to exit right away (from /var/log/syslog):\\n\\nOct 26 21:18:33 ip-192-168-247-151 systemd[1]: Starting generating configuration files in mydali instance directory...\\nOct 26 21:18:34 ip-192-168-247-151 check-component-exists.sh[17410]: Found component: mydali\\nOct 26 21:18:34 ip-192-168-247-151 systemd[1]: hpcc-conf@mydali.service: Succeeded.\\nOct 26 21:18:34 ip-192-168-247-151 systemd[1]: Finished generating configuration files in mydali instance directory.\\nOct 26 21:18:34 ip-192-168-247-151 systemd[1]: Starting mydali...\\nOct 26 21:18:34 ip-192-168-247-151 systemd[1]: Started mydali.\\nOct 26 21:18:34 ip-192-168-247-151 systemd[1]: dali@mydali.service: Main process exited, code=exited, status=1/FAILURE\\nOct 26 21:18:34 ip-192-168-247-151 systemd[1]: dali@mydali.service: Failed with result 'exit-code'.\\n
\\n\\nAnd MyESP just crashes I believe (from /var/log/HPCCSystems/myesp/esp.log):\\n\\n00000032 OPR 2021-10-26 20:53:00.204 8319 8319 "ERROR: Failed to connect to Dali Server 192.168.247.151:7070."\\n00000033 USR 2021-10-26 20:53:04.626 8319 8319 "ESP Abort Handler..."\\n00000034 USR 2021-10-26 20:53:04.627 8319 8319 "================================================"\\n00000035 USR 2021-10-26 20:53:04.627 8319 8319 "Program: 192.168.247.151:/opt/HPCCSystems/bin/esp"\\n00000036 USR 2021-10-26 20:53:04.627 8319 8319 "Signal: 11 Segmentation fault"\\n00000037 USR 2021-10-26 20:53:04.627 8319 8319 "Fault IP: 0000558126BD8C21"\\n00000038 USR 2021-10-26 20:53:04.627 8319 8319 "Accessing: 0000000000000000"\\n00000039 PRG 2021-10-26 20:53:04.627 8319 8319 "Backtrace:"\\n0000003A PRG 2021-10-26 20:53:04.627 8319 8319 " /opt/HPCCSystems/bin/esp(+0x15c21) [0x558126bd8c21]"\\n0000003B PRG 2021-10-26 20:53:04.627 8319 8319 " /opt/HPCCSystems/lib/libjlib.so(+0x117dc2) [0x7f1a3df83dc2]"\\n0000003C PRG 2021-10-26 20:53:04.627 8319 8319 " /opt/HPCCSystems/lib/libjlib.so(+0x117df9) [0x7f1a3df83df9]"\\n0000003D PRG 2021-10-26 20:53:04.627 8319 8319 " /lib/x86_64-linux-gnu/libpthread.so.0(+0x153c0) [0x7f1a3dc533c0]"\\n0000003E PRG 2021-10-26 20:53:04.627 8319 8319 " /lib/x86_64-linux-gnu/libc.so.6(clock_nanosleep+0xdf) [0x7f1a3db2a3bf]"\\n0000003F PRG 2021-10-26 20:53:04.627 8319 8319 " /lib/x86_64-linux-gnu/libc.so.6(nanosleep+0x17) [0x7f1a3db30047]"\\n00000040 PRG 2021-10-26 20:53:04.627 8319 8319 " /opt/HPCCSystems/lib/libjlib.so(_Z10MilliSleepj+0x8b) [0x7f1a3e05846b]"\\n00000041 PRG 2021-10-26 20:53:04.627 8319 8319 " /opt/HPCCSystems/lib/libdalibase.so(+0x1038c6) [0x7f1a3e2a28c6]"\\n00000042 PRG 2021-10-26 20:53:04.627 8319 8319 " /opt/HPCCSystems/lib/libdalibase.so(_Z17initClientProcessP6IGroup14DaliClientRolejPKcS3_jb+0x74) [0x7f1a3e1e5c74]"\\n00000043 PRG 2021-10-26 20:53:04.627 8319 8319 " /opt/HPCCSystems/bin/esp(+0xdca1) [0x558126bd0ca1]"\\n00000044 PRG 2021-10-26 20:53:04.627 8319 8319 " /opt/HPCCSystems/bin/esp(+0x1017e) [0x558126bd317e]"\\n00000045 PRG 2021-10-26 20:53:04.627 8319 8319 " /opt/HPCCSystems/bin/esp(+0x13ab0) [0x558126bd6ab0]"\\n00000046 PRG 2021-10-26 20:53:04.627 8319 8319 " /opt/HPCCSystems/bin/esp(+0xae4d) [0x558126bcde4d]"\\n00000047 PRG 2021-10-26 20:53:04.627 8319 8319 " /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf3) [0x7f1a3da710b3]"\\n00000048 PRG 2021-10-26 20:53:04.627 8319 8319 " /opt/HPCCSystems/bin/esp(+0xaf5e) [0x558126bcdf5e]"\\n00000049 USR 2021-10-26 20:53:04.627 8319 8319 "Registers:"\\n0000004A USR 2021-10-26 20:53:04.627 8319 8319 "EAX:0000000000000000 EBX:00007FFF8C9A9E30 ECX:0000000000000002 EDX:0000558126BD7B70 ESI:0000558127827B40 EDI:0000558127761022"\\n0000004B USR 2021-10-26 20:53:04.627 8319 8319 "R8 :0000000000000001 R9 :0000000000000017 R10:00007F1A3E09C0F7 R11:0000000000000246"\\n0000004C USR 2021-10-26 20:53:04.627 8319 8319 "R12:0000000000000000 R13:00007F1A39E70240 R14:0000000000000000 R15:00007F1A3E137C20"\\n0000004D USR 2021-10-26 20:53:04.627 8319 8319 "CS:EIP:0033:0000558126BD8C21"\\n0000004E USR 2021-10-26 20:53:04.627 8319 8319 " ESP:00007FFF8C9A8CF0 EBP:0000000000000000"\\n0000004F USR 2021-10-26 20:53:04.627 8319 8319 "Stack[00007FFF8C9A8CF0]: 00007F1A3DC40968 0000000000007F1A 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 39E7024000000000"\\n00000050 USR 2021-10-26 20:53:04.627 8319 8319 "Stack[00007FFF8C9A8D10]: 00007F1A39E70240 0000000000007F1A 0000000000000000 3E137C2000000000 00007F1A3E137C20 3DF83DC200007F1A 00007F1A3DF83DC2 8C9A8DC400007F1A"\\n00000051 USR 2021-10-26 20:53:04.627 8319 8319 "Stack[00007FFF8C9A8D30]: 00007FFF8C9A8DC4 0000000100007FFF 0000000000000001 0000000000000000 0000000000000000 8C9A9AD000000000 00007FFF8C9A9AD0 0000000000007FFF"\\n00000052 USR 2021-10-26 20:53:04.627 8319 8319 "Stack[00007FFF8C9A8D50]: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 3DF83DF900000000 00007F1A3DF83DF9 027F5EE700007F1A"\\n00000053 USR 2021-10-26 20:53:04.627 8319 8319 "Stack[00007FFF8C9A8D70]: 00000000027F5EE7 3DC533C000000000 00007F1A3DC533C0 0000000700007F1A 0000000000000007 0000000000000000 0000000000000000 0000000000000000"\\n00000054 USR 2021-10-26 20:53:04.627 8319 8319 "Stack[00007FFF8C9A8D90]: 0000000000000000 0000000200000000 00007FFF00000002 0000000000007FFF 0000000000000000 0000000000000000 0000000000000000 8C9A9AA800000000"\\n00000055 USR 2021-10-26 20:53:04.627 8319 8319 "Stack[00007FFF8C9A8DB0]: 00007FFF8C9A9AA8 0000000000007FFF 0000000000000000 0000029300000000 0000000000000293 8C9A9AD000000000 00007FFF8C9A9AD0 0000000000007FFF"\\n00000056 USR 2021-10-26 20:53:04.627 8319 8319 "Stack[00007FFF8C9A8DD0]: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000"\\n00000057 USR 2021-10-26 20:53:04.627 8319 8319 "ThreadList:\\n7F1A3959A700 139750608054016 8324: CMPNotifyClosedThread\\n7F1A38D99700 139750599661312 10912: dasess.registerClientProcess\\n
\\n\\nWhat did I do wrong or what am I missing?\\n\\nThanks!\", \"post_time\": \"2021-10-26 21:24:37\" },\n\t{ \"post_id\": 34553, \"topic_id\": 9133, \"forum_id\": 14, \"post_subject\": \"Re: Raspberry PI\", \"username\": \"lpezet\", \"post_text\": \"I commented out that line #843 in hqlfold.cpp (not sure why it would trip my compilation in ARM/64 and not trip the usual workflow in Git???).\\nIt compiled just fine, created package, installed it and started the platform successfully. I had to comment out the ws_sql service and binding in /etc/HPCCSystems/environment.xml (does -DUSE_MYSQL=OFF skip the missing lib libws_sql.so???).\\nNow ECL Watch is up, Playground code ran fine (hthor) and now to run some more tests...\", \"post_time\": \"2021-12-28 00:07:12\" },\n\t{ \"post_id\": 34543, \"topic_id\": 9133, \"forum_id\": 14, \"post_subject\": \"Re: Raspberry PI\", \"username\": \"lpezet\", \"post_text\": \"All this time I was using Raspberry Pi OS...\\nI just switched to Ubuntu 20.03.3 LTS 64-bits and now using pristine branch candidate-8.4.20.\\n\\n$ cmake -DUSE_AWS=OFF -DUSE_AERON=OFF -DUSE_AZURE=OFF -DUSE_CASSANDRA=OFF -DUSE_JAVA=OFF -DUSE_MYSQL=OFF -DUSE_NUMA=OFF -DUSE_TBB=OFF ../HPCC-Platform/\\n-- The C compiler identification is GNU 9.3.0\\n-- The CXX compiler identification is GNU 9.3.0\\n-- Check for working C compiler: /usr/bin/cc\\n-- Check for working C compiler: /usr/bin/cc -- works\\n-- Detecting C compiler ABI info\\n-- Detecting C compiler ABI info - done\\n-- Detecting C compile features\\n-- Detecting C compile features - done\\n-- Check for working CXX compiler: /usr/bin/c++\\n-- Check for working CXX compiler: /usr/bin/c++ -- works\\n-- Detecting CXX compiler ABI info\\n-- Detecting CXX compiler ABI info - done\\n-- Detecting CXX compile features\\n-- Detecting CXX compile features - done\\nCMake Deprecation Warning at cmake_modules/commonSetup.cmake:42 (cmake_policy):\\n The OLD behavior for policy CMP0026 will be removed from a future version\\n of CMake.\\n\\n The cmake-policies(7) manual explains that the OLD behaviors of all\\n policies are deprecated and that a policy should be set to OLD only under\\n specific short-term circumstances. Projects should be ported to the NEW\\n behavior and not rely on setting a policy to OLD.\\nCall Stack (most recent call first):\\n CMakeLists.txt:145 (include)\\n\\n\\n-- Found LIBMEMCACHED: /usr/lib/aarch64-linux-gnu/libmemcached.so (Required is at least version "1.0.10") \\n-- Making Release system\\n-- 64bit architecture is 1\\n-- Looking for pthread.h\\n-- Looking for pthread.h - found\\n-- Performing Test CMAKE_HAVE_LIBC_PTHREAD\\n-- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Failed\\n-- Looking for pthread_create in pthreads\\n-- Looking for pthread_create in pthreads - not found\\n-- Looking for pthread_create in pthread\\n-- Looking for pthread_create in pthread - found\\n-- Found Threads: TRUE \\nUsing compiler: GNU :: 9.3.0 :: :: \\n-- GLIBC version: 2.31\\nCMake Warning at cmake_modules/commonSetup.cmake:671 (message):\\n USE_OPTIONAL set - missing dependencies for optional features will\\n automatically disable them\\nCall Stack (most recent call first):\\n CMakeLists.txt:145 (include)\\n\\n\\n-- Found Bison v3.5.1\\n-- Found OpenLDAP: /usr/lib/aarch64-linux-gnu/libldap_r.so \\n-- Found ICU: /usr/lib/aarch64-linux-gnu/libicuuc.so \\n-- version: 66 unicode: 13.0\\n-- Found Libxslt: /usr/lib/aarch64-linux-gnu/libxslt.so \\n-- Found Libxml2: /usr/lib/aarch64-linux-gnu/libxml2.so \\n-- Found CBLAS: /usr/lib/aarch64-linux-gnu/libcblas.so \\n-- Found ZLIB: /usr/lib/aarch64-linux-gnu/libz.so \\n-- Found libarchive: /usr/lib/aarch64-linux-gnu/libarchive.so \\n-- Found Boost: /usr/lib/aarch64-linux-gnu/cmake/Boost-1.71.0/BoostConfig.cmake (found suitable version "1.71.0", minimum required is "1.34.0") found components: regex \\n-- Found BOOST_REGEX: Boost::regex \\nBOOST_REGEX_VERSION is 107100\\n-- BOOST_REGEX enabled\\n-- Found OPENSSL: /usr/lib/aarch64-linux-gnu/libssl.so \\n-- Found APR: /usr/lib/aarch64-linux-gnu/libapr-1.so\\n-- Found APRUTIL: /usr/lib/aarch64-linux-gnu/libaprutil-1.so\\n-- Looking for dlopen in dl\\n-- Looking for dlopen in dl - found\\n-- Looking for crypt in crypt\\n-- Looking for crypt in crypt - found\\n-- Found OpenSSL: /usr/lib/aarch64-linux-gnu/libcrypto.so (found suitable version "1.1.1f", minimum required is "1.0.2") \\n-- Found CURL: /usr/lib/aarch64-linux-gnu/libcurl.so (found version "7.68.0") \\nCMake Warning (dev) at initfiles/bash/etc/systemd/system/CMakeLists.txt:53 (if):\\n Policy CMP0054 is not set: Only interpret if() arguments as variables or\\n keywords when unquoted. Run "cmake --help-policy CMP0054" for policy\\n details. Use the cmake_policy command to set the policy and suppress this\\n warning.\\n\\n Quoted variables like "thor" will no longer be dereferenced when the policy\\n is set to NEW. Since the policy is not set the OLD behavior will be used.\\nThis warning is for project developers. Use -Wno-dev to suppress it.\\n\\n-- Process file: dafilesrv.in\\n-- Output file: /media/disk1/HPCC/HPCC-Platform-Build2/initfiles/bash/etc/init.d/dafilesrv\\n-- Process file: install-init.in\\n-- Output file: /media/disk1/HPCC/HPCC-Platform-Build2/initfiles/bash/etc/init.d/install-init\\n-- Process file: hpcc-init.in\\n-- Output file: /media/disk1/HPCC/HPCC-Platform-Build2/initfiles/bash/etc/init.d/hpcc-init\\n-- Process file: hpcc_common.in\\n-- Output file: /media/disk1/HPCC/HPCC-Platform-Build2/initfiles/bash/etc/init.d/hpcc_common\\n-- Process file: uninstall-init.in\\n-- Output file: /media/disk1/HPCC/HPCC-Platform-Build2/initfiles/bash/etc/init.d/uninstall-init\\n-- Process file: setupPKI.in\\n-- Output file: /media/disk1/HPCC/HPCC-Platform-Build2/initfiles/bash/etc/init.d/setupPKI\\n-- Process file: distributePKI.in\\n-- Output file: /media/disk1/HPCC/HPCC-Platform-Build2/initfiles/bash/etc/init.d/distributePKI\\n-- Process file: safe_copyPKI.in\\n-- Output file: /media/disk1/HPCC/HPCC-Platform-Build2/initfiles/bash/etc/init.d/safe_copyPKI\\n-- Process file: bash_postinst.in\\n-- Output file: /media/disk1/HPCC/HPCC-Platform-Build2/initfiles/bash/sbin/bash_postinst\\n-- Process file: init_dafilesrv.in\\n-- Output file: /media/disk1/HPCC/HPCC-Platform-Build2/initfiles/bin/init_dafilesrv\\n-- Process file: init_eclagent.in\\n-- Output file: /media/disk1/HPCC/HPCC-Platform-Build2/initfiles/bin/init_eclagent\\n-- Process file: init_dali.in\\n-- Output file: /media/disk1/HPCC/HPCC-Platform-Build2/initfiles/bin/init_dali\\n-- Process file: init_thor.in\\n-- Output file: /media/disk1/HPCC/HPCC-Platform-Build2/initfiles/bin/init_thor\\n-- Process file: init_configesp.in\\n-- Output file: /media/disk1/HPCC/HPCC-Platform-Build2/initfiles/bin/init_configesp\\n-- Process file: init_dfuserver.in\\n-- Output file: /media/disk1/HPCC/HPCC-Platform-Build2/initfiles/bin/init_dfuserver\\n-- Process file: init_eclccserver.in\\n-- Output file: /media/disk1/HPCC/HPCC-Platform-Build2/initfiles/bin/init_eclccserver\\n-- Process file: init_eclscheduler.in\\n-- Output file: /media/disk1/HPCC/HPCC-Platform-Build2/initfiles/bin/init_eclscheduler\\n-- Process file: init_esp.in\\n-- Output file: /media/disk1/HPCC/HPCC-Platform-Build2/initfiles/bin/init_esp\\n-- Process file: init_roxie.in\\n-- Output file: /media/disk1/HPCC/HPCC-Platform-Build2/initfiles/bin/init_roxie\\n-- Process file: init_sasha.in\\n-- Output file: /media/disk1/HPCC/HPCC-Platform-Build2/initfiles/bin/init_sasha\\n-- Process file: init_thorslave.in\\n-- Output file: /media/disk1/HPCC/HPCC-Platform-Build2/initfiles/bin/init_thorslave\\n-- Process file: init_toposerver.in\\n-- Output file: /media/disk1/HPCC/HPCC-Platform-Build2/initfiles/bin/init_toposerver\\n-- Process file: hpcc_setenv.in\\n-- Output file: /media/disk1/HPCC/HPCC-Platform-Build2/initfiles/sbin/hpcc_setenv\\n-- Process file: complete-uninstall.sh.in\\n-- Output file: /media/disk1/HPCC/HPCC-Platform-Build2/initfiles/sbin/complete-uninstall.sh\\n-- Process file: keygen.sh.in\\n-- Output file: /media/disk1/HPCC/HPCC-Platform-Build2/initfiles/sbin/keygen.sh\\n-- Process file: update-keys\\n-- Output file: /media/disk1/HPCC/HPCC-Platform-Build2/initfiles/sbin/update-keys\\n-- Process file: add_conf_settings.sh.in\\n-- Output file: /media/disk1/HPCC/HPCC-Platform-Build2/initfiles/sbin/add_conf_settings.sh\\n-- Process file: rm_conf_settings.sh.in\\n-- Output file: /media/disk1/HPCC/HPCC-Platform-Build2/initfiles/sbin/rm_conf_settings.sh\\n-- Process file: configmgr.in\\n-- Output file: /media/disk1/HPCC/HPCC-Platform-Build2/initfiles/sbin/configmgr\\n-- Process file: config2mgr.in\\n-- Output file: /media/disk1/HPCC/HPCC-Platform-Build2/initfiles/sbin/config2mgr\\n-- Process file: install-cluster.sh.in\\n-- Output file: /media/disk1/HPCC/HPCC-Platform-Build2/initfiles/sbin/install-cluster.sh\\n-- Process file: hpcc-push.sh.in\\n-- Output file: /media/disk1/HPCC/HPCC-Platform-Build2/initfiles/sbin/hpcc-push.sh\\n-- Process file: hpcc-run.sh.in\\n-- Output file: /media/disk1/HPCC/HPCC-Platform-Build2/initfiles/sbin/hpcc-run.sh\\n-- Process file: remote-install-engine.sh.in\\n-- Output file: /media/disk1/HPCC/HPCC-Platform-Build2/initfiles/sbin/remote-install-engine.sh\\n-- Process file: deploy-java-files.sh.in\\n-- Output file: /media/disk1/HPCC/HPCC-Platform-Build2/initfiles/sbin/deploy-java-files.sh\\n-- Process file: check-component-exists.sh.in\\n-- Output file: /media/disk1/HPCC/HPCC-Platform-Build2/initfiles/sbin/check-component-exists.sh\\n-- Process file: run_ftslave.in\\n-- Output file: /media/disk1/HPCC/HPCC-Platform-Build2/initfiles/componentfiles/ftslave/run_ftslave\\n-- Process file: start_backupnode.in\\n-- Output file: /media/disk1/HPCC/HPCC-Platform-Build2/initfiles/componentfiles/thor/start_backupnode\\n-- Updated CPACK_DEBIAN_PACKAGE_DEPENDS to xterm\\n----INSTALLING esdlcomp\\n----INSTALLING ESDL-XML\\n-- Building start-stop-daemon\\n-- Found Python3: /usr/bin/python3.8 (found suitable version "3.8.10", minimum required is "3.6") found components: Interpreter Development \\n-- Auto Detecting Packaging type\\n-- distro uses DEB, revision is focal_aarch64\\n-- Current release version is hpccsystems-platform-community_8.4.20-rc1focal_aarch64\\n-- Git tag is 'community_8.4.20-rc1'\\n-- Build tag is 'community_8.4.20-rc1'\\n-- Base build tag is ''\\n-- Will build DEB package\\n-- Packing BASH installation files\\n-- Updated CPACK_DEBIAN_PACKAGE_DEPENDS to xterm, g++, openssh-client, openssh-server, expect, rsync, libapr1, python2, python3, psmisc, curl\\n-- Configuring done\\n-- Generating done\\n-- Build files have been written to: /media/disk1/HPCC/HPCC-Platform-Build2\\n
\\n\\nWhen when running make, I get:\\n\\n/media/disk1/HPCC/HPCC-Platform/ecl/hql/hqlfold.cpp: In function ‘IValue* doFoldExternalCall(IHqlExpression*, unsigned int, const char*, const char*, void*)’:\\n/media/disk1/HPCC/HPCC-Platform/ecl/hql/hqlfold.cpp:843:11: error: unused variable ‘strbuf’ [-Werror=unused-variable]\\n 843 | char* strbuf = fstack.getMem();\\n | ^~~~~~\\n
\", \"post_time\": \"2021-12-27 22:18:51\" },\n\t{ \"post_id\": 34523, \"topic_id\": 9133, \"forum_id\": 14, \"post_subject\": \"Re: Raspberry PI\", \"username\": \"lpezet\", \"post_text\": \"I made some progress but still can't build everything in the end.\\nI had to skip a bunch of (I think) optional stuff for now.\\nHere's where I'm at right now.\\nI have "HPCC-Platform" folder with all the source code, and "HPCC-Platform-Build" folder as my build folder.\\nIn my build folder, I run the following:\\n\\ncmake -DUSE_AWS=OFF -DUSE_AERON=OFF -DUSE_AZURE=OFF -DUSE_CASSANDRA=OFF -DUSE_ELASTICSTACK_CLIENT=OFF ../HPCC-Platform/\\nmake -j6\\n
\\n\\nAt that point, I get errors like:\\n\\n/usr/bin/ld: CMakeFiles/jlib.dir/jstats.cpp.o: in function `std::__atomic_base<unsigned long long>::load(std::memory_order) const':\\n/usr/include/c++/10/bits/atomic_base.h:426: undefined reference to `__atomic_load_8'\\n
\\n\\nI don't know if I did something wrong before all this, but the linker needs the "-latomic" flag for things to compile in "jlib". For now I edited the CMakeLists.txt in system/jlib like so:\\n\\ndiff --git a/system/jlib/CMakeLists.txt b/system/jlib/CMakeLists.txt\\nindex f66c3173b..ac7847607 100644\\n--- a/system/jlib/CMakeLists.txt\\n+++ b/system/jlib/CMakeLists.txt\\n@@ -235,6 +235,7 @@ elseif (("${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin" ))\\n else ()\\n target_link_libraries ( jlib rt )\\n endif ()\\n+target_link_libraries ( jlib atomic )\\n \\n if (NOT PLUGIN)\\n if (WIN32)\\n
\\nI'm just trying to make it work right now (not trying to detect things or use some EXTRA flags to make it more flexible for example).\\n\\nCleaning things up (deleting build folder and re-creating it) and running the same exact cmake and make from above (with VERBOSE=1), I end up with this kind of error:\\n\\ncd /media/pi/Data/HPCC/HPCC-Platform-Build/system/jlib && /usr/bin/cmake -E cmake_link_script CMakeFiles/jlib.dir/link.txt --verbose=1\\n/usr/bin/c++ -fPIC -frtti -fPIC -fmessage-length=0 -Werror=format -Wformat-security -Wformat-nonliteral -pthread -Wuninitialized -Werror=return-type -Werror=format-nonliteral -Wno-psabi -Wparentheses -std=c++11 -Wall -Wextra -Wno-switch -Wno-unused-parameter -Werror -Wno-error=delete-non-virtual-dtor -O3 -DNDEBUG -g -fno-inline-functions -g -fno-default-inline -rdynamic -Wl,-z,defs -shared -Wl,-soname,libjlib.so -o ../../Release/libs/libjlib.so CMakeFiles/jlib.dir/jargv.cpp.o CMakeFiles/jlib.dir/jarray.cpp.o CMakeFiles/jlib.dir/javahash.cpp.o CMakeFiles/jlib.dir/jbsocket.cpp.o CMakeFiles/jlib.dir/jbuff.cpp.o CMakeFiles/jlib.dir/jcomp.cpp.o CMakeFiles/jlib.dir/jcrc.cpp.o CMakeFiles/jlib.dir/jdebug.cpp.o CMakeFiles/jlib.dir/jencrypt.cpp.o CMakeFiles/jlib.dir/jexcept.cpp.o CMakeFiles/jlib.dir/jfile.cpp.o CMakeFiles/jlib.dir/jflz.cpp.o CMakeFiles/jlib.dir/jhash.cpp.o CMakeFiles/jlib.dir/jiface.cpp.o CMakeFiles/jlib.dir/jio.cpp.o CMakeFiles/jlib.dir/jiter.cpp.o CMakeFiles/jlib.dir/jkeyboard.cpp.o CMakeFiles/jlib.dir/jlib.cpp.o CMakeFiles/jlib.dir/jlog.cpp.o CMakeFiles/jlib.dir/jlz4.cpp.o CMakeFiles/jlib.dir/jlzma.cpp.o CMakeFiles/jlib.dir/jlzw.cpp.o CMakeFiles/jlib.dir/jmd5.cpp.o CMakeFiles/jlib.dir/jmemleak.cpp.o CMakeFiles/jlib.dir/jmetrics.cpp.o CMakeFiles/jlib.dir/jmisc.cpp.o CMakeFiles/jlib.dir/jmutex.cpp.o CMakeFiles/jlib.dir/jobserve.cpp.o CMakeFiles/jlib.dir/jprop.cpp.o CMakeFiles/jlib.dir/jptree.cpp.o CMakeFiles/jlib.dir/jqueue.cpp.o CMakeFiles/jlib.dir/jregexp.cpp.o CMakeFiles/jlib.dir/jrowstream.cpp.o CMakeFiles/jlib.dir/jsecrets.cpp.o CMakeFiles/jlib.dir/jsem.cpp.o CMakeFiles/jlib.dir/jset.cpp.o CMakeFiles/jlib.dir/jsmartsock.cpp.o CMakeFiles/jlib.dir/jsocket.cpp.o CMakeFiles/jlib.dir/jsort.cpp.o CMakeFiles/jlib.dir/jstats.cpp.o CMakeFiles/jlib.dir/jstream.cpp.o CMakeFiles/jlib.dir/jstring.cpp.o CMakeFiles/jlib.dir/jsuperhash.cpp.o CMakeFiles/jlib.dir/jthread.cpp.o CMakeFiles/jlib.dir/jtime.cpp.o CMakeFiles/jlib.dir/junicode.cpp.o CMakeFiles/jlib.dir/jutil.cpp.o CMakeFiles/jlib.dir/__/globalid/lnuid.cpp.o CMakeFiles/jlib.dir/__/codesigner/codesigner.cpp.o CMakeFiles/jlib.dir/__/codesigner/gpgcodesigner.cpp.o CMakeFiles/jlib.dir/__/security/cryptohelper/cryptocommon.cpp.o CMakeFiles/jlib.dir/__/security/cryptohelper/digisign.cpp.o CMakeFiles/jlib.dir/__/security/cryptohelper/pke.cpp.o CMakeFiles/jlib.dir/__/security/cryptohelper/ske.cpp.o -Wl,-rpath,/media/pi/Data/HPCC/HPCC-Platform-Build/Release/libs::::::::::::::::::::::: ../../Release/libs/liblzma.a ../../Release/libs/liblz4.a ../../Release/libs/liblibbase58.a ../../Release/libs/libyaml.so -ldl -lcrypt /usr/lib/arm-linux-gnueabihf/libssl.so /usr/lib/arm-linux-gnueabihf/libcrypto.so -lrt -latomic \\n/usr/bin/ld: CMakeFiles/jlib.dir/jarray.cpp.o:(.rodata+0xc): multiple definition of `typeinfo name for CSimpleInterfaceOf<CEmptyClass>'; CMakeFiles/jlib.dir/jargv.cpp.o:(.rodata+0xc): first defined here\\n/usr/bin/ld: CMakeFiles/jlib.dir/jarray.cpp.o:(.data.rel.ro+0x0): multiple definition of `typeinfo for CSimpleInterfaceOf<CEmptyClass>'; CMakeFiles/jlib.dir/jargv.cpp.o:(.data.rel.ro+0x0): first defined here\\n/usr/bin/ld: CMakeFiles/jlib.dir/javahash.cpp.o:(.rodata+0x0): multiple definition of `typeinfo name for CSimpleInterfaceOf<CEmptyClass>'; CMakeFiles/jlib.dir/jargv.cpp.o:(.rodata+0xc): first defined here\\n...bunch more...\\ncollect2: error: ld returned 1 exit status\\nmake[2]: *** [system/jlib/CMakeFiles/jlib.dir/build.make:904: Release/libs/libjlib.so] Error 1\\nmake[2]: Leaving directory '/media/pi/Data/HPCC/HPCC-Platform-Build'\\nmake[1]: *** [CMakeFiles/Makefile2:7830: system/jlib/CMakeFiles/jlib.dir/all] Error 2\\nmake[1]: Leaving directory '/media/pi/Data/HPCC/HPCC-Platform-Build'\\nmake: *** [Makefile:182: all] Error 2\\n
\\n\\nNot sure how to deal with that right now...\", \"post_time\": \"2021-12-25 22:07:44\" },\n\t{ \"post_id\": 34513, \"topic_id\": 9133, \"forum_id\": 14, \"post_subject\": \"Raspberry PI\", \"username\": \"lpezet\", \"post_text\": \"Hello!\\n\\nI remember an old (I'm old too) blog post about the HPCC Systems team building the source code of HPCC-Platform for ARM to make it work on Raspberry PI (an old version now). I remember some plugin/options were disabled to make things happen more quickly.\\n\\nNow years later, I'm trying to do the same on a R-PI 4 but I'm having some difficulties.\\nIs there anyone with some documentation/guidance/hints to accomplish this?\\n(I can build source code on Ubuntu just fine).\\n\\nHere's an example of something failing at the moment, in HPCC-Platform/system/aeron/aeron-driver/src/main/c/concurrent/aeron_atomic.h:\\n\\n#if defined(AERON_COMPILER_GCC)\\n #if defined(AERON_CPU_X64)\\n #include <concurrent/aeron_atomic64_gcc_x86_64.h>\\n #else\\n #include <concurrent/aeron_atomic64_gcc_c11.h>\\n #endif\\n
\\nThat concurrent/aeron_atomic64_gcc_c11.h is simply missing.\\nThis is just an example, and I can start ironing one kink at a time, but if there's something out there to help speed things up, that'd be great!\\n\\nThanks!\", \"post_time\": \"2021-12-18 03:45:15\" },\n\t{ \"post_id\": 35051, \"topic_id\": 9231, \"forum_id\": 14, \"post_subject\": \"Re: Auth-Service installation issue with MySQL DB\", \"username\": \"rfernandez2007\", \"post_text\": \"Hi Jerry,\\nHow are you?\\n\\nWell, a few things:\\n\\n1. You were right, there was a problem either with the quotes or the spaces. \\n2. After adjusting everything according to your instructions I was still unable to connect, but the error message changed to something related to the private/public key-pair.\\n3. I was working for a while on that, and tried generating the keys with different tools (puttygen, openssh, openssl) and the error message kept changing but always on the key-pair subject.\\n4. I opted for generating them both as .pem (not only the public one), but it kept failing.\\n5. Given the command I was using, openssl was forcing me to use a passphrase to encrypt the private key, and that turned out to be the final problem. I changed the command to generate a plain text private key, and ... hallelujah!!!! it started working.\\n\\nSo, thank you very much for all your help!!! You put me on the right path to get to the solution.\\n\\nI'll just attach a couple of screenshots, one from the working page , and the other from the MySQL errors that are still showing, and although I don't know if they are relevant, I thought it would be good that you knew about them.\\n\\nBest regards!!\\nRicardo\", \"post_time\": \"2022-02-09 17:35:11\" },\n\t{ \"post_id\": 34981, \"topic_id\": 9231, \"forum_id\": 14, \"post_subject\": \"Re: Auth-Service installation issue with MySQL DB\", \"username\": \"jjacob\", \"post_text\": \"Hi Ricardo,\\n\\nThanks for sharing your config files. It looks like the .env file contains spaces and single quotes which could be causing the issue. For e.g: the DB_USERNAME value has a leading space and enclosed in single quotes. Please remove the leading spaces and quotes for property values and rebuild the containers. \\n\\nPlease note that docker may have preserved some of these values in the volumes. Please make sure you follow the below steps to restart the containers. \\n\\n1. Stop the containers - docker stop <container id-1> <container id-2> <container id-3>\\n2. Run docker-compose rm -v\\n3. Delete mysql-data directory under the Auth-Service installed directory\\n4. docker-compose up -d\\n\\nPlease report back if you run into further issues, we can get on a call with the team to troubleshoot the issues and get you up and running\\n\\nThanks\\nJerry\", \"post_time\": \"2022-02-08 20:04:38\" },\n\t{ \"post_id\": 34921, \"topic_id\": 9231, \"forum_id\": 14, \"post_subject\": \"Re: Auth-Service installation issue with MySQL DB\", \"username\": \"rfernandez2007\", \"post_text\": \"Hello Jerry,\\n\\nA quick update.\\nAll the previous information is from my attempts to try the tool in my test environment. That is Docker Desktop on Windows 10.\\n\\nJust in case I tried on my real target system which is ubuntu 20.04, using the same configuration and I got a similar but not equal scenario.\\nFirst, I got some warning messages during the installation, and after that same behavior, but different messages in the logs.\\nI attach a summary of the messages, in case they help in clarifying the situation.\\n\\nThank you\\nRegards\\nRicardo\", \"post_time\": \"2022-02-08 03:37:01\" },\n\t{ \"post_id\": 34911, \"topic_id\": 9231, \"forum_id\": 14, \"post_subject\": \"Re: Auth-Service installation issue with MySQL DB\", \"username\": \"rfernandez2007\", \"post_text\": \"Hello Jerry,\\nNice to meet you! and thank you very much for your answer!\\n\\nWe are getting closer
\\nThe good news first.\\nNow when I go to http://localhost:3003/login it shows the screen with the login fields.\\nGreat!\\n\\nThe not so good ones (although maybe it's just me missing something or doing something wrong in the configuration files)\\nThe problem is that no matter what password I use, it displays a message saying login failed.\\nI tried many different things in the .env file and also in the ..admin-user.js, like using quotes, not using quotes and things like that.\\nI also connected to the MySQL database, the user admin is there and the password is obviously masked.\\n\\nI'm attaching the logs from the containers with the errors, and my configuration files, to see if you are able to spot where is the mistake, or if this has its root cause somewhere else.\\n\\nThe error in the MySQL container remains, and maybe, there lies the persistent problem.\\n\\n
2022-02-07T23:40:21.971260Z 2 [Note] Got an error reading communication packets\\n\\n2022-02-07T23:40:22.489549Z 4 [Note] Aborted connection 4 to db: 'authservicedb' user: 'user' host: '172.26.0.3' (Got an error reading communication packets)
\\n\\nIf I can provide any other information to help in the resolution, please let me know.\\n\\nThank you!!\\nWarm regards\\nRicardo\", \"post_time\": \"2022-02-08 00:08:50\" },\n\t{ \"post_id\": 34901, \"topic_id\": 9231, \"forum_id\": 14, \"post_subject\": \"Re: Auth-Service installation issue with MySQL DB\", \"username\": \"jjacob\", \"post_text\": \"Hello Ricardo,\\n\\nThanks for reporting this issue. We were able to reproduce it at our end. It looks like an incorrect file formatting is causing the issue and we have applied a patch for it. Please pull the latest Auth-Service code and rebuild the containers. Since the changes are only in the web container, you can execute the following command to rebuild only the web container\\n\\ndocker-compose up -d --no-deps --build web\\n\\nPlease feel free to reach out to us if you need further assistance in setting it up\\n\\nThanks\\nJerry\", \"post_time\": \"2022-02-04 19:24:49\" },\n\t{ \"post_id\": 34881, \"topic_id\": 9231, \"forum_id\": 14, \"post_subject\": \"Re: Auth-Service installation issue with MySQL DB\", \"username\": \"rfernandez2007\", \"post_text\": \"Hi Russ,\\nGreat! \\nI'll be waiting for their contact to be able to continue with the project.\\nThank you!!\", \"post_time\": \"2022-02-03 17:15:49\" },\n\t{ \"post_id\": 34861, \"topic_id\": 9231, \"forum_id\": 14, \"post_subject\": \"Re: Auth-Service installation issue with MySQL DB\", \"username\": \"william.whitehead\", \"post_text\": \"Thanks for the update Ricardo. I have reached out to the subject matter experts and hopefully they will respond soon.\", \"post_time\": \"2022-02-02 21:00:46\" },\n\t{ \"post_id\": 34851, \"topic_id\": 9231, \"forum_id\": 14, \"post_subject\": \"Re: Auth-Service installation issue with MySQL DB\", \"username\": \"rfernandez2007\", \"post_text\": \"Hi Russ,\\nThank you very much for your answer!\\nI think I failed to explain the problem correctly.\\nSorry for that, and I'll start again:\\n\\nI'm trying to use HPCCSystems Auth-Service: \\nhttps://github.com/hpcc-systems/Auth-Service\\n\\nI chose this one because I want to install HPCCSystems RealBI :\\nhttps://github.com/hpcc-systems/REAL-BI\\n\\nand\\n\\nHPCCSystems Tombolo:\\nhttps://github.com/hpcc-systems/Tombolo\\n\\nAnd from what I understood, both of them rely on having this Auth-Service installed for authentication. They have other authentication options but this HPCCSystems Auth-Service is the one they have in common.\\nI'm far from being an expert in security, therefore I'm quite a bit lost here.\\n\\nBut, if you tell me that there is an HPCCSystems LDAP that could cover the same functionality and works with these other two tools I want to install, I'll be more than happy to follow your instructions to install it and configure it.\\n\\nHere is some additional information to try to further clarify the problem:\\n\\n**************************************************************************************\\n[size=150:2ou1tas5]For RealBI these are the parameters available to configure authentication in the .env file of the project.\\n\\n# HPCC Auth Service (https://github.com/hpcc-systems/Auth-Service)\\n#Required only if REACT_APP_AUTH_METHOD is set to AUTH\\nAUTH_URL= [ HPCC Auth Service url ]\\nAUTH_PORT= [ HPCC Auth Service port ]\\nAUTH_CLIENT_ID= [ This is a unique identifier for an application in Auth Service. Will be set up through Auth Service when a new Application is registered ]\\n\\n#Microsoft AD server side token validation.\\n#Required only if REACT_APP_AUTH_METHOD is set to ADFS\\nAZURE_TENANT_ID= [ Azure Tenant ID used by passport-azure-ad package for tokens validation ]\\nAZURE_CLIENT_ID= [ Azure Client ID used by passport-azure-ad package for tokens validation ]
\\n\\nAnd these are the Notes for the application:\\n\\nThis application relies on:\\nA running instance of Auth Service to handle user authentication and JWT generation.\\nApplication can use Microsoft Active Directory for authentication and authorization\\nAn HPCC cluster containing data files.\\n**************************************************************************************\\n\\n[size=150:2ou1tas5]For Tombolo these are the parameters in the .env file :\\n\\n## Auth Service details\\n#For authentication, Tombolo uses AuthService module, which needs to be setup separetly.\\nAUTH_SERVICE_URL=<protocol>://<host_name>:<port>/api/auth\\nAUTHSERVICE_TOMBOLO_CLIENT_ID=\\nsecret=
\\n\\nAnd these are the notes referring to authentication:\\n\\nAUTH_SERVICE_URL - ( Tombolo uses Auth Service for user authentication. An existing Auth Service can be used or you may set up Auth Service separately. You can find the Authservice setup instructions here. Once you have an instance of Authservice up and running, update this value. Eg - ://<host_name>:/api/auth)\\nAUTHSERVICE_TOMBOLO_CLIENT_ID - (Unique id of Tombolo app in Auth Service. This will be used in the communication between Tombolo and AuthService)
\\n\\nAll the installations I'm doing are of "on-premise" kind. By that I mean that I'm installing everything from scratch, mostly in Docker and some directly on Linux for server components, and some client tools in Windows, and I'm not using hosted services in third party providers like Azure or AWS, for databases or any other applications)\\n\\nI'll really appreciate your help in troubleshooting the issue! And I'm really open to listen to options, if there are any.\\nThank you very much!!\\nWarm regards!\\nRicardo\", \"post_time\": \"2022-02-02 19:19:31\" },\n\t{ \"post_id\": 34841, \"topic_id\": 9231, \"forum_id\": 14, \"post_subject\": \"Re: Auth-Service installation issue with MySQL DB\", \"username\": \"william.whitehead\", \"post_text\": \"Hello, and thanks giving HPCC a spin. I am not familiar with the Docker AuthService that you refer to, but there are a lot of HPCC specific security managers available to users. The most robust being our LDAP security manager. What are your security requirements, and hopefully I can assist you in choosing and configuring one that best suits your needs\\nRuss\", \"post_time\": \"2022-02-02 15:55:44\" },\n\t{ \"post_id\": 34831, \"topic_id\": 9231, \"forum_id\": 14, \"post_subject\": \"Auth-Service installation issue with MySQL DB\", \"username\": \"rfernandez2007\", \"post_text\": \"Hi,\\nI'm trying to install AuthService in Docker to be able to authenticate other applications. \\n\\nI followed the steps in the github page, and after a while the three containers are running.\\nBut when I try to access the UI it does not respond.\\nAfter looking at the logs I see that although the MySQL container is running is displaying these messages:\\n\\nauth_mysql_db_1 | 2022-02-02T03:02:48.521696Z 4 [Note] Aborted connection 4 to db: 'authservice' user: 'admin' host: '172.22.0.3' (Got an error reading communication packets)\\n\\nI've investigated a bit but there seem to be many different causes for this error depending on many different factors.\\nSo, I'll really appreciate any help on troubleshooting this particular scenario.\\nI attach the logs.\\n\\nThank you!\\nRegards\", \"post_time\": \"2022-02-02 03:51:42\" },\n\t{ \"post_id\": 35191, \"topic_id\": 9291, \"forum_id\": 14, \"post_subject\": \"Re: Tombolo - Zookeeper and Kafka\", \"username\": \"rfernandez2007\", \"post_text\": \"Hello Jerry,\\n\\nOk, that's good to know, and I'll take it into consideration for design purposes.\\n\\nThank you!\\nRegards!\\nRicardo\", \"post_time\": \"2022-02-15 16:24:05\" },\n\t{ \"post_id\": 35181, \"topic_id\": 9291, \"forum_id\": 14, \"post_subject\": \"Re: Tombolo - Zookeeper and Kafka\", \"username\": \"jjacob\", \"post_text\": \"Hello,\\n\\nKafka and Zookeeper setup are optional for Tombolo. At some point, Tombolo used Kafka and Zookeeper for Orchestrating workflows, but the architecture has changed and they are not used for Orchestration currently. However, a Kafka instance is required if you want to trigger a job based on Kafka messages. \\n\\nThe following configurations are no longer needed except for START_JOB_TOPIC, which is the Topic Tombolo is listening to trigger Kafka based jobs. However, for basic Orchestration it is not needed\\n\\nJOB_COMPLETE_TOPIC=\\nSTART_JOB_TOPIC\\nJOB_COMPLETE_GROUP_ID=\\nJOB_COMPLETE_TOPIC=\\n\\nThanks\", \"post_time\": \"2022-02-15 14:33:02\" },\n\t{ \"post_id\": 35151, \"topic_id\": 9291, \"forum_id\": 14, \"post_subject\": \"Tombolo - Zookeeper and Kafka\", \"username\": \"rfernandez2007\", \"post_text\": \"Hi,\\n\\nI have a few questions about Tombolo installation.\\n\\nIn the installation process, particularly in Docker there are Zookeeper and Kafka configuration sections. \\n\\n1. Are they a mandatory part of the Tombolo architecture?\\n2. In case they are not mandatory, how do you remove them without breaking the installation?\\n3. In case they are mandatory, is it possible to configure them without all the security features, like for example certificates, keystores, etc? How would you achieve this?\\n4. In case it is not possible to avoid installing zookeeper and kafka as part of the stack, and all the security must be configured, could you please provide sample configuration values that help understand the relationship between all the parts, and allow to configure a working installation.\\n\\nI've previously worked with zookeeper and kafka in other Docker configurations, but the setup was much simpler.\\n\\n5. Also, there are some variables to be filled that are not clear (to me) to where they belong and what's their function:\\nJOB_COMPLETE_TOPIC=\\nSTART_JOB_TOPIC\\nJOB_COMPLETE_GROUP_ID=\\nJOB_COMPLETE_TOPIC=\\n\\n\\nAny help in configuring/understanding this will be much appreciated!!\\n\\nThank you very much!!\\nWarm regards\\nRicardo\", \"post_time\": \"2022-02-12 15:14:15\" },\n\t{ \"post_id\": 35411, \"topic_id\": 9335, \"forum_id\": 14, \"post_subject\": \"Re: Persistent Storage for a Local Deployment\", \"username\": \"JimD\", \"post_text\": \"The updated manual is now available on the web site. \\n\\nhttps://hpccsystems.com/training/docume ... d-Platform\\n\\nJim\", \"post_time\": \"2022-04-26 15:05:19\" },\n\t{ \"post_id\": 35385, \"topic_id\": 9335, \"forum_id\": 14, \"post_subject\": \"Re: Persistent Storage for a Local Deployment\", \"username\": \"JimD\", \"post_text\": \"Matt, \\n\\nThanks for calling this to our attention. I worked with you offline and you indicated that the issue is resolved and you have a running cluster with persistent local storage.\\n\\nI am working on releasing an updated manual with the updated steps. I will reply to this post when that updated manual is available. \\n\\nRegards,\\nJim\", \"post_time\": \"2022-04-19 20:08:40\" },\n\t{ \"post_id\": 35375, \"topic_id\": 9335, \"forum_id\": 14, \"post_subject\": \"Re: Persistent Storage for a Local Deployment\", \"username\": \"mrumsey\", \"post_text\": \"Update: I made sure those folders existed and I am still not getting an environment to start up.\\n\\nThis gets an environment:\\n.\\\\helm install mycluster hpcc/hpcc
\\n\\nThis does not:\\n.\\\\helm install mycluster hpcc/hpcc -f examples/local/values-localfile.yaml
\\n\\nI'm not very familiar with .yaml files or helm. Could I need to adjust something? I just used the stuff located in the documentation/git repo.\", \"post_time\": \"2022-04-18 20:15:44\" },\n\t{ \"post_id\": 35365, \"topic_id\": 9335, \"forum_id\": 14, \"post_subject\": \"Re: Persistent Storage for a Local Deployment\", \"username\": \"mrumsey\", \"post_text\": \"I had most of them. hpccdata/debug wasn't in my documentation. Dropzone was hpccdata/mydropzone.\\n\\nI'll see if the updated folders does anything.\", \"post_time\": \"2022-04-18 19:25:50\" },\n\t{ \"post_id\": 35355, \"topic_id\": 9335, \"forum_id\": 14, \"post_subject\": \"Re: Persistent Storage for a Local Deployment\", \"username\": \"JimD\", \"post_text\": \"Have you created the required folders under c:/hpcccdata? The example helm charts for localfile does not create the folders. Missing folders is the most common cause of this type of deployment to fail to start.\\n\\nFor Windows, use these commands:\\nmkdir c:\\\\hpccdata\\nmkdir c:\\\\hpccdata\\\\dalistorage\\nmkdir c:\\\\hpccdata\\\\hpcc-data\\nmkdir c:\\\\hpccdata\\\\debug\\nmkdir c:\\\\hpccdata\\\\queries\\nmkdir c:\\\\hpccdata\\\\sasha\\nmkdir c:\\\\hpccdata\\\\dropzone
\\n\\nThis portion of the documentation is being updated. But is under review before publication. \\n\\nHTH,\\n\\nJim\", \"post_time\": \"2022-04-18 18:25:15\" },\n\t{ \"post_id\": 35345, \"topic_id\": 9335, \"forum_id\": 14, \"post_subject\": \"Persistent Storage for a Local Deployment\", \"username\": \"mrumsey\", \"post_text\": \"I am trying to get a single-node local deployment going for a small personal project. I have Docker Desktop (WSL2) and I am using Powershell with Helm.\\n\\nSo far I have been able to get an instance of HPCC running and I can successfully submit code and access the environment via ECLWatch (localhost:8010) and the ECL-IDE. What I need is to be able to access files on my local machine and, preferable, write to disk and access my finished files after my work is done.\\n\\nI tried following the instructions on the Containerized HPCC System Platform documentation, but I am running into some problems. \\n\\nI have downloaded the helm charts from https://github.com/hpcc-systems/helm-chart and extracted the /examples folder into my folder from which I make helm calls. I then run the following script from the documentation:\\n\\n.\\\\helm install hpcc-localfile examples/local/hpcc-localfile --set common.hostpath=/run/desktop/mnt/host/c/hpccdata
\\n\\nThis works fine and I see hpcc-localfile running with a ./helm list call.\\n\\nAfter this, the next step should be to run open an instance of HPCC pointing to the .yaml mapping file and setting the default path to my local drive in the files created earlier (C:\\\\hpccdata\\\\...). I use the following code to access this:\\n\\n.\\\\helm install mycluster hpcc/hpcc -f examples/local/values-localfile.yaml
\\n\\nWhen I run this, I see mycluster when I call ./helm list, but all of the processes in kubectl get pods never get up and running. Most of them stay at processing, with a few getting to the Running.... state, but never actually starting. I don't see any containers get created, but some of the processes have multiple restarts as time goes on. The rest never come fully online.\\n\\nAm I missing a vital step from the documentation, or could there be something else going on here?\\n\\nThanks,\\n\\nMatt Rumsey\", \"post_time\": \"2022-04-16 18:24:56\" },\n\t{ \"post_id\": 1437, \"topic_id\": 321, \"forum_id\": 15, \"post_subject\": \"Re: Backup Nodes\", \"username\": \"soyyo\", \"post_text\": \"1. Is each node in charge of running its own backup_node process?\\n\\nThe process is initiated from the master node and spawns an instance "backupnode" on every node that is part of that cluster, with 10 active threads at a time.\\n\\n2. I think the above post means that start_backupnode populates its own primary data from its backup node if necessary, as well as verifies it has complete backup data for the node it is backing up. Is that right?\\n\\nIt compiles a list from the metadata on the Dali of what file parts belong to it and where they reside. It then validates that it can physically find those files, any missing files are replaced.\\n\\nIt is of interest to note, if the data is lost in the primary location on node n, the backupnode process running on node n+1 restores the file part(s). \\n\\nIf the data is lost on the mirror or replicate location, the backupnode process on the n-1 node restore the data. \\n\\nHTH\", \"post_time\": \"2012-04-04 18:30:10\" },\n\t{ \"post_id\": 1427, \"topic_id\": 321, \"forum_id\": 15, \"post_subject\": \"Re: Backup Nodes\", \"username\": \"rtaylor\", \"post_text\": \"Jeremy,\\nAny info on the first 2 questions from my post?
Sorry, but those are hardware/operations type of questions -- I'm firmly ensconced on the software/programming side of things. \\n\\nRichard\", \"post_time\": \"2012-04-04 14:08:50\" },\n\t{ \"post_id\": 1422, \"topic_id\": 321, \"forum_id\": 15, \"post_subject\": \"Re: Backup Nodes\", \"username\": \"jeremy\", \"post_text\": \"Thanks Richard... apologies, I should have quoted you directly. That makes sense. Any info on the first 2 questions from my post?\\nThanks again,\\nJeremy\", \"post_time\": \"2012-04-03 23:52:30\" },\n\t{ \"post_id\": 1421, \"topic_id\": 321, \"forum_id\": 15, \"post_subject\": \"Re: Backup Nodes\", \"username\": \"rtaylor\", \"post_text\": \"Jeremy,\\n\\n
I've heard elsewhere on the forum that node backups are for performance reasons and not for disaster recovery. Is this true? If so, how are the backups used for performance?
You "heard" that from me, so let me explain what I meant.\\n\\n"Disaster Recovery" is generally thought of as "nothing is working due to ... (some disastrous circumstance, like power outage or hurricane or something)" so you need to failover to an alternate site (usually somewhere far away).\\n\\nWhen I said "performance reasons" what I was getting at is the ability to continue with useful work despite "minor mishaps (like bad disk drive or motherboard or whatever)" -- and that's what backup nodes are for. If a single node goes down, the backup node still has the data that node should have, so the system can continue performing its function -- that's the "performance reasons" I was talking about.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-04-03 21:03:37\" },\n\t{ \"post_id\": 1419, \"topic_id\": 321, \"forum_id\": 15, \"post_subject\": \"Re: Backup Nodes\", \"username\": \"jeremy\", \"post_text\": \"Thank you for the response... a few follow up questions:\\n1. Is each node in charge of running its own backup_node process?\\n2. I think the above post means that start_backupnode populates its own primary data from its backup node if necessary, as well as verifies it has complete backup data for the node it is backing up. Is that right?\\n3. I've heard elsewhere on the forum that node backups are for performance reasons and not for disaster recovery. Is this true? If so, how are the backups used for performance?\", \"post_time\": \"2012-04-03 15:46:24\" },\n\t{ \"post_id\": 1418, \"topic_id\": 321, \"forum_id\": 15, \"post_subject\": \"Re: Backup Nodes\", \"username\": \"soyyo\", \"post_text\": \"Under normal operating conditions in a multi-node setup, thor file reads and writes are performed on the data directory of each node. Additionally, any writes also go to a mirror data directory on the next node in the sequence has a copy, which is the backup of the previous listed node. \\n\\nIf the data is not found at the primary(for example drive failed and was swapped out), it goes to the mirror directory to read the data. Any writes will go to the primary and then to the mirror.\\n\\nFor example on a three node system, node 2 has node 1s' mirror directory, node 3 has node 2s' and node 1 has node 3s'.\\n\\nAfter a node failure, the backup process (/opt/HPCCSystems/bin/start_backupnode thor) copies all files from the backup node’s location to the replacement node’s primary data location. The mirror'ed directory also receives all the new files that have been written to the "new drive" first.\\n\\nAdditionally, a cron job can be setup to run every night to backup the operator in case he forgets to manually kick off the backup process.\\n\\nSomething like...\\n0 1 * * * /bin/su - hpcc -c "/opt/HPCCSystems/bin/start_backupnode thor"\\n\\nIn this example, the cron has been setup on root crontab to run as the hpcc user.\\n\\nHTH\", \"post_time\": \"2012-04-03 15:10:28\" },\n\t{ \"post_id\": 1414, \"topic_id\": 321, \"forum_id\": 15, \"post_subject\": \"Backup Nodes\", \"username\": \"jeremy\", \"post_text\": \"Greetings,\\nElsewhere on this forum, I was told that there was a process called "Backup Nodes" that could be run periodically to ensure proper redundancy levels on data files in HPCC. Can someone elaborate on its use cases and operation?\\nThanks,\\nJeremy\", \"post_time\": \"2012-03-28 15:48:30\" },\n\t{ \"post_id\": 2075, \"topic_id\": 446, \"forum_id\": 15, \"post_subject\": \"Re: HPCC Cluster - recommendations required\", \"username\": \"clo\", \"post_text\": \"Hi, \\n\\nLet's see if I can clarify some of your concerns.\\n\\nBasic queries:\\n1. If you're referring to the first page of the configuration wizard and whether you need to supply the IP of cloudx-767-700, then the answer is 'yes'. You should supply the IPs of all the nodes you intend on having the wizard configure for you.\\n\\n2. You can verify that the components are allocated properly through two methods. If you're using the wizard to configure your 3 VMs (which I would suggest), then you will be given a summary of your environment. If you look there, you'll notice that the only the support components are allocated to the first VM and that mythor, myftslave, myroxie are allocated to the other two VMs. (Keep in mind that mythor refers to both the thormaster and the thorslaves so it will list all three IPs for this component). \\n\\nYou can also get a Summary View of your configured XML from the initial landing page for the ConfigMgr. This is the first option. \\n\\nOnly the components that are allocated to a node will get started/stopped/restarted when you call the start script for all nodes.\\nsudo -u hpcc /opt/HPCCSystems/sbin/hpcc-run.sh -a hpcc-init start
\\n\\n3. You're correct. In your setup, you only really use 2 of your VMs. You can verify this\\n\\nI would suggest, in order to take advantage of 2 non-support-node VMs, to set it up like the following instead:\\n\\nNo. of support nodes : 1\\nNo. of nodes for Roxie cluster : 2\\nNo. of slave nodes for Thor cluster : 2\\nNo. of Thor slaves per node : 1\\n\\n This will create ONE Roxie server process & ONE Thorslave process on each non-support-node VM. (The wording can be a tad confusing).\\n \\nFurther queries:\\n1. Typically, the first IP(s) you supply is going to allocated to being your support nodes. If you specify to have more than 1 support node in the configuration wizard, then the system will decide which nodes to put what supporting component. Keep in mind, that however many number of support nodes you decide to have, these nodes will not be able to be used by the Config Wizard to configure thorslaves or roxie servers. (Example: if you have 5 nodes total and would like to configure 3 support nodes, you only have 2 nodes left for allocating to the roxie cluster and thorslave nodes in the Config Wizard)\\n\\n2. In Advanced mode, you're able to allocate a component to whichever hardware node you like. You can do more detailed customization in Advanced Mode. For the most part, if you wish you configure a working system quickly, then the Wizard should be all that you need. I've been able to get full systems up and running in a relatively short amount of time this way. The Config Wizard takes care of a lot of the tedious tasks of allocating a component to the proper hardware and such. \\n\\nI hope that clears up some of your queries.\\n\\nChris\", \"post_time\": \"2012-07-25 14:05:37\" },\n\t{ \"post_id\": 2070, \"topic_id\": 446, \"forum_id\": 15, \"post_subject\": \"Re: HPCC Cluster - recommendations required\", \"username\": \"kaliyugantagonist\", \"post_text\": \"Hi clo,\\n\\nI'm really thankful for your precise response !\\n\\nI have three VM as follows :\\n\\ncloudx-767-700 - HPCC running in single-node mode\\ncloudx-799-731 - Fresh VM\\ncloudx-798-730 - Fresh VM\\n\\nAll have Ubuntu 11.04.\\n\\nAs per your suggestion and my constraints, I have decided to use the following :\\n\\nNo. of support nodes : 1\\nNo. of nodes for Roxie cluster : 1\\nNo. of slave nodes for Thor cluster : 1\\nNo. of Thor slaves per node : 2\\n\\nBasic queries :\\n\\n1. Do I have to supply the IP address of cloudx-767-700 too - HPCC is already running there, hence, I was wondering is it required/may cause any issues ? \\n\\n2. Now, [color=#FF0000:1u2i5lel]will it be ensured that the fresh VM will have only Thor slaves and Roxie and NOT any support components?\\n\\n3. When I say 'No. of Thor slaves per node' = 2, do I mean that two software processes pertaining to Thor slaves running on ONE hardware VM ?\\n\\n\\nAfter reading your reply and going through 'Using ConfigMgr in Advanced Mode' in 'UsingConfigManager.pdf', I have a few queries :\\n\\n1. As you wrote 'The support node will have all the components except the Thor Slave Processes and the Roxie Cluster. The Thormaster will be on the support node as well.'. Does this mean that in that while using 'Generate new environment using wizard', HPCC itself decides what to install on which 'support node' i.e I simply provide the no. of support nodes - 1,2 or more and it simply abstracts the support component installation decisions?\\n2. In the 'Advance View' of the configuration manager, I saw that the components - Dali Server etc. can be installed on separate hardware nodes. Is this what is being abstracted in the normal config(point 1. I raised)\\n\\nThanks and regards !\", \"post_time\": \"2012-07-25 04:04:06\" },\n\t{ \"post_id\": 2065, \"topic_id\": 446, \"forum_id\": 15, \"post_subject\": \"Re: HPCC Cluster - recommendations required\", \"username\": \"clo\", \"post_text\": \"Hi,\\n\\nSupposing you have 3 VMs to work with, then I would suggest the following. Using the Configmgr, you'll be able to use generate a new environment using the wizard. In here, you have the option of setting up something called a support node. Since you have a limited number of VMs to work with, one node would be suggested for this. The support node will have all the components except the Thor Slave Processes and the Roxie Cluster. The Thormaster will be on the support node as well. The main reasoning for this is so that the thorslaves won't fight for the same resources as the supporting components.\\n\\nYou can then finish off by allocating 2 nodes for the Roxie Cluster (if you'd like a roxie) and 2 nodes for the Thorslaves.\\n\\nHope that helps,\\nChris\", \"post_time\": \"2012-07-24 18:25:42\" },\n\t{ \"post_id\": 2044, \"topic_id\": 446, \"forum_id\": 15, \"post_subject\": \"HPCC Cluster - recommendations required\", \"username\": \"kaliyugantagonist\", \"post_text\": \"I went through the HPCC introduction and installation docs. And I have a single-node set-up running(Ubuntu 11.04 64-bit).\\n\\nFor my understanding, I have listed the various software components/directories of HPCC as follows :\\n\\n1.\\tThor – Master-Slave nodes\\n2.\\tRoxie\\n3.\\tDFU Server\\n4.\\tLanding Zone ¬– a directory named ‘mydropzone’ where all uploaded data resides was created under HPCC installation\\n5.\\tDali Server – primary and back-up\\n6.\\tECL Server(related to Roxie)\\n7.\\tECL Agent(related to Roxie)\\n8.\\tECL Scheduler\\n9.\\tECL CC Server\\n10.\\tESP Server\\n11.\\tSasha Server\\n\\nNow, I want to move to a multi-node set-up but [color=#FF0000:19ovdqad]there is a restriction on the no. of VM available to me – 2 or 3 at the maximum. My core query is as to what should/shouldn’t be on different machines.\\n\\nPlease guide me on the following points :\\n\\n1.\\tWhat is the ‘recommended’ design of an HPCC cluster? E.g : Should Dali and its backup be on the same or different VM? Should the Sasha server run on the VM same as Dali? Which of the servers – DFU, ECL X must/must not reside on the same VM?\\n2.\\tGiven the no. of total VM available to me as 3(includes the current single-node set-up machine), how should be my cluster configuration\\n\\nThanks and regards !\", \"post_time\": \"2012-07-23 11:27:18\" },\n\t{ \"post_id\": 2079, \"topic_id\": 455, \"forum_id\": 15, \"post_subject\": \"Re: Multi-node installation issue\", \"username\": \"HPCC Staff\", \"post_text\": \"Thank you! A response has been posted in the following forum:\\n\\nviewtopic.php?f=14&t=454&sid=3ed7c58d25674617245f99e4417fee43#p2078\", \"post_time\": \"2012-07-25 14:55:16\" },\n\t{ \"post_id\": 2072, \"topic_id\": 455, \"forum_id\": 15, \"post_subject\": \"Multi-node installation issue\", \"username\": \"kaliyugantagonist\", \"post_text\": \"The issue is posted here : http://hpccsystems.com/bb/viewtopic.php?f=14&t=454&sid=3ed7c58d25674617245f99e4417fee43\", \"post_time\": \"2012-07-25 10:06:12\" },\n\t{ \"post_id\": 2318, \"topic_id\": 514, \"forum_id\": 15, \"post_subject\": \"Re: Server requirements for hpcc community edition installat\", \"username\": \"clo\", \"post_text\": \"Hi,\\n\\nThe number of servers is entirely left up to the user's preference and availability of hardware. The HPCC system can be installed on just one node. However, the performance boosts will be most easily noticed in multiple nodes. The Configmgr can aid in the easy configuration of multiple nodes.\", \"post_time\": \"2012-09-10 17:20:30\" },\n\t{ \"post_id\": 2312, \"topic_id\": 514, \"forum_id\": 15, \"post_subject\": \"Server requirements for hpcc community edition installation\", \"username\": \"abinaya\", \"post_text\": \"Hi...........I want to install and work on Hpcc community edition.Though I went through the installation documentation I do not have a clear idea for the number of servers required. I want to know how many servers do we need for this.Please let me know.\", \"post_time\": \"2012-09-08 03:54:36\" },\n\t{ \"post_id\": 2423, \"topic_id\": 531, \"forum_id\": 15, \"post_subject\": \"Re: No user script available on AWS ?\", \"username\": \"bforeman\", \"post_text\": \"Hi JM,\\n\\nHere is where it currently resides:\\n\\nhttps://s3-us-west-2.amazonaws.com/hpcc ... ripts.html\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-09-24 12:52:18\" },\n\t{ \"post_id\": 2421, \"topic_id\": 531, \"forum_id\": 15, \"post_subject\": \"No user script available on AWS ?\", \"username\": \"ideal\", \"post_text\": \"Hello,\\n\\nThe user script http://s3.amazonaws.com/hpccsystems-ins ... ripts.html, for AWS Thor cluster setup is no more available and documentation has not been updated. What is the alternate link ?\\n\\nThanks,\\nJM.\", \"post_time\": \"2012-09-24 08:01:33\" },\n\t{ \"post_id\": 2950, \"topic_id\": 625, \"forum_id\": 15, \"post_subject\": \"Re: Typical Datawarehousing environment -- HPCC best practic\", \"username\": \"jonburger\", \"post_text\": \"Your understanding is correct about the thor process. There are a couple of different scenarios that can be used.\\n\\nIt's important to remember one thing, you are constrained only by the ability of your hardware to reach your SLA's. Regardless of the method, whatever your hardware will support will guide your method regardless of the software. For instance, if your hardware is capable of producing a set batch job in say 10 minutes, you can run multiple thors on the same hardware and process two jobs at a time, but very likely they will not finish in 10 minutes - more like 15. This only gives you benefit to a point. At some point, dependent on your hardware, you will reach diminishing returns. Basically you will have say 3 jobs running simultaneously and they will finish in > 30 minutes, at that point you are better using 2. SAS drives and higher bandwidth network help. Using 6 drive RAID10 12 cores, 48GB RAM, 10Gb/sec ethernet, allows us to run 6 jobs concurrently on the same hardware - but your mileage may vary.\\n\\nSo to answer your question, yes you can have multiple thors running on both the same hardware or different hardware reading from the same queue. Depending on the number of "thors" you can process that number of jobs simultaneously. Whether that is your best method of producing the fastest returns entirely depends on your hardware.\\n\\nAs far as answering questions to people around SLA's as it relates to thor - I don't know that your answer would be any different using any other type of solution. If you cannot properly predict the future inflow of jobs and you have a static set of hardware, how could you possibly predict an SLA based on completion. For instance if a particular oracle query produced a batch result in 10 minutes, and suddenly you had 50 of those jobs, I am doubtful they will all finish within that same 10 minutes.\\n\\nYou would make the best prediction of inbound volume, test it, produce a slightly padded SLA and scale your hardware if your volume increases in order to meet it.\\n\\nHTH\\n\\nJon\", \"post_time\": \"2012-12-04 15:52:11\" },\n\t{ \"post_id\": 2896, \"topic_id\": 625, \"forum_id\": 15, \"post_subject\": \"Re: Typical Datawarehousing environment -- HPCC best practic\", \"username\": \"HPCC Staff\", \"post_text\": \"Our operations team is currently reviewing this. Thanks for the post!\", \"post_time\": \"2012-11-27 22:03:05\" },\n\t{ \"post_id\": 2841, \"topic_id\": 625, \"forum_id\": 15, \"post_subject\": \"Typical Datawarehousing environment -- HPCC best practices\", \"username\": \"arunarav\", \"post_text\": \"My understanding is that Thor is single process - i.e. a given Thor process can only run a single job (aka work unit) at a time. \\n\\n(I also gathered that as a mitigation, it is possible to configure HPCC to have multiple Thor target processes on the same cluster - these Thor processes will have access to the same set of cluster's files.)\\n\\nI need some clarifications around best practices especially around the following potential concerns. Please correct the concerns wherever applicable if they are incorrectly stated:\\n\\n\\nConcern 1: (Typical Data Warehousing environment)\\n\\nMultiple business users may issue requests to run "on-demand" batch jobs.\\n\\nA typical Data warehousing environment could have a combination of:\\n> short interactive jobs\\n> large batch jobs\\n> Guaranteed-capacity production jobs\\n\\nAlso there would be a mix of:\\n\\n> interesting R&D experiments on dataset\\n> Mission critical jobs\\n\\nA key ability is to allow any combination of the above types of jobs to be issued concurrently.\\n\\nSince Thor cannot handle concurrent jobs, the business user's request would be queued or blocked for an indeterminate length of time.\\n\\nWhat are the potential solutions for the above scenario? The above limitation inhibits the IT team's ability to promise or adhere to an SLA since we cannot predict how many concurrent batch jobs would arrive at a given point in time.\\n \\n(Re: Running multiple Thor processes. While this ability is available, we still have a ceiling of running as many concurrent batch jobs as the number of available Thor processes. Other qs: \\n(a) How do we load balance and route the next batch job request to the optimal (least utilized) Thor process? \\n(b) How do we even arrive at the optimal since we may not know the expected completion time of the currently running batch job? \\n(c) How do we re-route jobs to other Thor processes if they have already been submitted? \\nAll of this in an automated fashion)\\n\\n\\nConcern 2 (Development environment): Constrains parallel team development\\n\\n\\tEven if we have a sufficiently powerful development cluster (eg a 50 node cluster), only one developer can issue a given work-unit at a time. The batch jobs issued by other developers will be "blocked" or waiting in queue affecting their productivity. \\n\\nConcern 3 (Production environment): Under-utilizes cluster horsepower\\n\\n\\tIn production a sufficiently powerful cluster (eg 500 node cluster) could be running a batch job and have sufficient headroom (CPU, memory) to accommodate other batch jobs. Even through there is sufficient headroom to perform other batch jobs, it is not possible, given the single process constraint. This causes the the investment on cluster hardware to be under-utilized.\\n\\n\\nConcern 4 (Development and Production): Increased need for monitoring\\n\\n\\tSince only one workunit can run at a given time, there is a need to invest in cluster administrators who have to be diligent in monitoring the currently executing workunit and potentially killing jobs if there are other critical jobs waiting in queue. This results in increased overhead in manual co-ordination. While monitoring is applicable for any cluster, it is particularly accentuated due to the single workunit constraint. \\n\\n________\\n\\nThanks\\nArun\", \"post_time\": \"2012-11-23 09:37:18\" },\n\t{ \"post_id\": 3252, \"topic_id\": 726, \"forum_id\": 15, \"post_subject\": \"Re: Data Exceeded Error\", \"username\": \"ksviswa\", \"post_text\": \"Hi..\\n\\nThanks a lot Bob..\\n\\nI think i had missed the replication option while spraying, but since the default option for replicate is 1 , it should have replicated the data in another node. Correct me if i am wrong.\\n\\nYeah pretty sure that the dafilesrv was down, once restarted i was able to spray and perform some other operations.\\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2013-01-30 18:11:22\" },\n\t{ \"post_id\": 3251, \"topic_id\": 726, \"forum_id\": 15, \"post_subject\": \"Re: Data Exceeded Error\", \"username\": \"bforeman\", \"post_text\": \"BTW, are you sure that the dafilesrv was down? One of our HPCC managers asked me to clarify with you.\", \"post_time\": \"2013-01-30 18:02:28\" },\n\t{ \"post_id\": 3250, \"topic_id\": 726, \"forum_id\": 15, \"post_subject\": \"Re: Data Exceeded Error\", \"username\": \"bforeman\", \"post_text\": \"
What will happen to the data if one node goes down and then restarted again..? Will it be replicated or archived anywhere and can any other operation be performed..?
\\n\\nIn a typical THOR configuration, if one of your 10 slave nodes went down, if you sprayed your data with replication it is possible that the ECL operation can continue since the data is replicated on another node.\\n\\nThe node which was down was dafilesrv. How to avoid errors in such scenario..?
\\nIn this case, since the server that went down was one of the HPCC System servers, you have no alternative but to stop and restart the cluster.\", \"post_time\": \"2013-01-30 16:57:51\" },\n\t{ \"post_id\": 3241, \"topic_id\": 726, \"forum_id\": 15, \"post_subject\": \"Data Exceeded Error\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nI am working on a POC with 10 dedicated thor cluster. Sprayed the data using the dfu plus since there were many similar files in a given path and the data is sprayed in a distributed fashion across all the 10 nodes.\\n\\nWhat will happen to the data if one node goes down and then restarted again..? Will it be replicated or archived anywhere and can any other operation be performed..?\\n\\nI encountered this error when one node went down and was restarted :\\n\\nError : “Error: System error: 0: Graph[1], csvread[2]: SLAVE 10.xxx.xx.xx:6600: File ~abc::def contained a line of length greater than 10485760 bytes. (0, 0), 0,”\\n\\nThe node which was down was dafilesrv. How to avoid errors in such scenario..?\\n\\nKindly suggest.\\n\\nThanks and Regards\\nksviswa\", \"post_time\": \"2013-01-30 12:35:05\" },\n\t{ \"post_id\": 3362, \"topic_id\": 741, \"forum_id\": 15, \"post_subject\": \"Re: Switch Requirements\", \"username\": \"bforeman\", \"post_text\": \"Jon Burger, our Director of Technology, replied:\\n\\nWe recommend a non-blocking architecture – so yes a 1:1 ratio.
\", \"post_time\": \"2013-02-06 17:32:55\" },\n\t{ \"post_id\": 3339, \"topic_id\": 741, \"forum_id\": 15, \"post_subject\": \"Switch Requirements\", \"username\": \"Durai\", \"post_text\": \"Hi, \\n\\nThis is regarding switch requirements, \\n\\nIn a typical tree topology, do you recommend 1:1 subscription at Leaf and Spine level. Isn't it very expensive? \\n\\nCan you please guid with some network topology for HPCC If I have to build production cluster ? \\n\\nThanks in advance.\\n\\nRegards\\nDurai\", \"post_time\": \"2013-02-05 15:47:20\" },\n\t{ \"post_id\": 3346, \"topic_id\": 742, \"forum_id\": 15, \"post_subject\": \"Re: Thor slaves running serially?\", \"username\": \"Sunitha\", \"post_text\": \"Hi Richard,\\n\\nI have included the code and a sample data file (you can probably replicate the lines to get a bigger input file). \\n\\nOne thing to point out is that each input line is double url decoded and we have an embedded c++ function that does that. Not sure if that is something that can hold up the pipeline, but each input line can be independently parsed.\\n\\nThanks,\\nSunitha\", \"post_time\": \"2013-02-05 20:09:42\" },\n\t{ \"post_id\": 3345, \"topic_id\": 742, \"forum_id\": 15, \"post_subject\": \"Re: Thor slaves running serially?\", \"username\": \"rtaylor\", \"post_text\": \"Can you show me your code? Maybe I can see something to suggest, or at least try to duplicate the issue on one of my clusters.\", \"post_time\": \"2013-02-05 19:39:45\" },\n\t{ \"post_id\": 3343, \"topic_id\": 742, \"forum_id\": 15, \"post_subject\": \"Re: Thor slaves running serially?\", \"username\": \"Sunitha\", \"post_text\": \"Thanks for the quick response Richard!\\n\\nOur project just parses the file, pattern matches some key value pairs and writes it out. We think this should be parallelizable.\\n\\nBTW, I forgot to mention earlier, we have a one node config with 2 thor slaves.\", \"post_time\": \"2013-02-05 19:32:23\" },\n\t{ \"post_id\": 3341, \"topic_id\": 742, \"forum_id\": 15, \"post_subject\": \"Re: Thor slaves running serially?\", \"username\": \"rtaylor\", \"post_text\": \"Sunitha,\\n\\nWhat is your code doing? Is it possibly a PROJECT that uses a COUNTER?\\n\\nRichard\", \"post_time\": \"2013-02-05 19:16:57\" },\n\t{ \"post_id\": 3340, \"topic_id\": 742, \"forum_id\": 15, \"post_subject\": \"Thor slaves running serially?\", \"username\": \"Sunitha\", \"post_text\": \"Hi,\\n\\nWe have a CPU bound process running on a really small subset of our data and with one slave it took ~45 mins to crunch a 7GB file. We are running on a machine that has 24cores and 64G RAM. To speed things up we reconfiged to have 2 slaves and reran the script. Everything that I can currently verify seems right: we have 2 pieces of sprayed file, I see 2 thor slaves running etc. BUT the slaves seem to be running serially. For about 30mins, one slave process ran at a 100% on one of the cores; and now the other slave has taken over. \\n\\nLooks like I have gotten some config wrong somewhere. Any quick pointers would be appreciated. \\n\\nThanks,\\nSunitha\", \"post_time\": \"2013-02-05 18:48:25\" },\n\t{ \"post_id\": 3348, \"topic_id\": 743, \"forum_id\": 15, \"post_subject\": \"Re: How long should file spray take?\", \"username\": \"Sunitha\", \"post_text\": \"Never mind, I see progress now. The file is not on a local drive, so may be thats slowing it down.\", \"post_time\": \"2013-02-06 02:04:22\" },\n\t{ \"post_id\": 3347, \"topic_id\": 743, \"forum_id\": 15, \"post_subject\": \"How long should file spray take?\", \"username\": \"Sunitha\", \"post_text\": \"I am trying to spray a 140GB file on a one node cluster with 8 slaves on the node. The spray process has been running for about 45 mins now and the progress status is still at 0%. I see a dfuprocess running and taking more than 50% cpu occasionally. This is the output of the dfuserver log:\\n\\n0000036 2013-02-05 16:54:50 6467 6479 "Start gathering file sizes..."\\n00000037 2013-02-05 16:54:50 6467 6479 "Gathering 1 file sizes on 1 threads"\\n00000038 2013-02-05 16:54:50 6467 6479 "Finished gathering file sizes..."\\n00000039 2013-02-05 16:54:50 6467 6479 "Use pull operation as default"\\n0000003A 2013-02-05 16:54:50 6467 6479 "Start gathering file sizes..."\\n0000003B 2013-02-05 16:54:50 6467 6479 "Finished gathering file sizes..."\\n0000003C 2013-02-05 16:54:50 6467 6479 "Calculate partition information"\\n0000003D 2013-02-05 16:54:50 6467 6479 "Calculating N:M partition"\\n0000003E 2013-02-05 16:54:50 6467 6479 "Partition 0(//10.0.5.108/data/HPCCSystems/mydropzone/file.txt)"\\n\\nHow long should something like this take? (BTW, to sanity check the 8slave configuration I played with a 1GB subset of the file and everything worked fine).\", \"post_time\": \"2013-02-06 01:38:40\" },\n\t{ \"post_id\": 3762, \"topic_id\": 781, \"forum_id\": 15, \"post_subject\": \"Re: HPCC cluster sizing\", \"username\": \"jeeves\", \"post_text\": \"Dan,\\n\\nThanks for you detailed reply. The information you provided was really useful.\\n\\nThanks,\\n-David\", \"post_time\": \"2013-03-18 07:43:27\" },\n\t{ \"post_id\": 3520, \"topic_id\": 781, \"forum_id\": 15, \"post_subject\": \"Re: HPCC cluster sizing\", \"username\": \"DSC\", \"post_text\": \"I don't have an answer, but I can provide a bit of insight gained through experience.\\n\\nThe short form is: It isn't as simple as knowing the size of your data. The real linchpin for determining cluster configuration is knowing what you want to do with the data versus the performance you need for each activity.\\n\\nAt one end of the spectrum, if your work involves ingesting your 1TB of data infrequently to create relatively small lookup-style index files that are only infrequently accessed through Roxie, and you don't have a requirement on how long it takes to perform that ingest->index step, you could get away with a one or two nodes that total 3-4TB of storage (you need extra storage for maintaining the data and for making copies of the data for Roxie). Those nodes could run both Thor and Roxie processes. In other words, pretty much the tiniest footprint you can get away with.\\n\\nIf your requirements dictate faster ingest/ETL then increase Thor's size by either adding hardware nodes or by increasing Thor's slave count per node, or both. The latter can be done if you have enough cores on the hardware nodes to support the extra activity. Basically, dividing the ingest/ETL work among more Thor nodes will make it go faster. (You can, however, write bad ECL and prevent that from happening.)\\n\\nIf your Roxie queries are of extremely high volume, you may need to increase the Roxie cluster size, but that depends on what you're doing inside a Roxie query. If all queries are simple lookup-into-an-index activity then you can get away with a very small Roxie cluster unless you have a very high query load. If your queries do a lot of processing then you may need to increase the size of your Roxie cluster in order to spread the workload around.\\n\\nThere are a ton of configuration parameters that can be tuned to make a smaller cluster behave like a bigger cluster for a particular workload. That can be a fragile optimization though, because your requirements may change. Mine do, constantly.\\n\\nThere are really many factors that go into determining cluster sizing and many of those factors depend entirely on exactly what you are trying to do. Bottom line, sizing a cluster accurately is hard.\\n\\nAll that said, I have had great success with this general configuration:\\n\\n* 10 hardware nodes\\n* 24-cores per node (2x12, I believe)\\n* 32GB of RAM\\n* 2TB local storage per node\\n* Nine nodes are used for both Thor and Roxie, one node for everything else.\\n* 20 Thor slaves per node (which gives me a 180-way Thor cluster)\\n* Gigabit Ethernet connecting all nodes\\n\\nThat cluster seems to handily consume everything I throw at it pretty easily. What I use it for is almost certainly wildly different than what you would use it for, but at least it's something to look at and ponder. My previous cluster was a 4-node, dual-core, virtual host thing using a SAN for storage. It worked, and the performance was impressive when compared with other technologies, but my new cluster makes the old one look positively crippled. I'm a performance junkie, so this new cluster makes me happy.\\n\\nWhile I can't answer your question, I hope this kind of information at least provides some food for thought.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-02-21 13:15:12\" },\n\t{ \"post_id\": 3515, \"topic_id\": 781, \"forum_id\": 15, \"post_subject\": \"HPCC cluster sizing\", \"username\": \"jeeves\", \"post_text\": \"Are there simple thumb rules to provide an approximate estimate of the size of a HPCC cluster?\\nAssuming a maximum size of 1TB, what should be the..\\n\\n1. No of thor nodes\\n\\n2. No of roxie nodes\\n\\n3. HDD capacity and RAM size per node.\\n\\n4. No of CPU cores per node.\", \"post_time\": \"2013-02-21 06:04:26\" },\n\t{ \"post_id\": 3566, \"topic_id\": 790, \"forum_id\": 15, \"post_subject\": \"Re: can not access configmgr\", \"username\": \"bforeman\", \"post_text\": \"Cool! Thanks for the feedback and sharing with the rest of the forum group!\\n\\nBob\", \"post_time\": \"2013-02-27 13:06:06\" },\n\t{ \"post_id\": 3560, \"topic_id\": 790, \"forum_id\": 15, \"post_subject\": \"Re: can not access configmgr\", \"username\": \"battleman\", \"post_text\": \"[quote="bforeman":25hppbyf]Could it be a firewall issue perhaps?\\n\\nMake sure that the 8015 port is enabled.\\n\\nRegards,\\n\\nBob\\n\\nThanks for your tip!!\\nAnd what I have done:\\n#vi /etc/sysconfig/selinux\\nSELINUX=disabled\\n#setenforce 0\\n#service iptables stop\\n#chkconfig iptables off\\nIt works!! Thank you bro!\", \"post_time\": \"2013-02-27 07:49:21\" },\n\t{ \"post_id\": 3553, \"topic_id\": 790, \"forum_id\": 15, \"post_subject\": \"Re: can not access configmgr\", \"username\": \"bforeman\", \"post_text\": \"Could it be a firewall issue perhaps?\\n\\nMake sure that the 8015 port is enabled.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-02-26 13:28:05\" },\n\t{ \"post_id\": 3550, \"topic_id\": 790, \"forum_id\": 15, \"post_subject\": \"can not access configmgr\", \"username\": \"battleman\", \"post_text\": \"After I have installed the HPCC system on one vm(172.30.62.180),I start the configmgr :\\n\\n[hpcc@iaas1013062-Computer root]$ sudo /opt/HPCCSystems/sbin/configmgr\\n[sudo] password for hpcc: \\nUsing default filename /etc/HPCCSystems/source/environment.xml and default port "8015"\\nVerifying configmgr startup ... Success\\nExit by pressing ctrl-c...\\n\\nbut I cannot access this configuration service by http://172.30.62.180:8015\\nAny advice? \\nThanks and regards !\", \"post_time\": \"2013-02-26 10:47:02\" },\n\t{ \"post_id\": 3948, \"topic_id\": 882, \"forum_id\": 15, \"post_subject\": \"Re: ECL IDE taking long time to load with a 10 node cluster\", \"username\": \"bforeman\", \"post_text\": \"The time of the ping should also be long.\\n\\nLet me check with our cloud team to see what could introduce a delay.\\n\\nBob\", \"post_time\": \"2013-04-17 15:14:28\" },\n\t{ \"post_id\": 3947, \"topic_id\": 882, \"forum_id\": 15, \"post_subject\": \"Re: ECL IDE taking long time to load with a 10 node cluster\", \"username\": \"GK\", \"post_text\": \"ECL watch is also taking more time than before.\\n\\nPing is working fine.\\n\\nThanks\\nGK\", \"post_time\": \"2013-04-17 14:21:13\" },\n\t{ \"post_id\": 3934, \"topic_id\": 882, \"forum_id\": 15, \"post_subject\": \"Re: ECL IDE taking long time to load with a 10 node cluster\", \"username\": \"bforeman\", \"post_text\": \"Do you get the same delay accessing the ECl Watch through a browser? The ECL IDE simply will try to connect to the cluster IP address in the same fashion. It sounds like a possible issue on the AWS side, have you tried a simple ping on the command line?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-04-16 12:48:41\" },\n\t{ \"post_id\": 3933, \"topic_id\": 882, \"forum_id\": 15, \"post_subject\": \"ECL IDE taking long time to load with a 10 node cluster\", \"username\": \"GK\", \"post_text\": \"We have a 10 node cluster running continuously for last 10 days. The cluster was created using "One Click Thor" on AWS. From yesterday, we are facing an issue while launching ECL IDE that it takes more than 15mins to get loaded. We have cleaned up all the unwanted data and work units. But still we are facing this issue. \\n\\nAny solution/suggestion would be greatly appreciated.\", \"post_time\": \"2013-04-16 10:22:43\" },\n\t{ \"post_id\": 4605, \"topic_id\": 916, \"forum_id\": 15, \"post_subject\": \"Re: High throughput Roxie clusters\", \"username\": \"DSC\", \"post_text\": \"Nice!\\n\\nThen the documentation is perfectly correct and I was the one failing to connect the dots. Learning moment.\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2013-09-17 21:10:06\" },\n\t{ \"post_id\": 4604, \"topic_id\": 916, \"forum_id\": 15, \"post_subject\": \"Re: High throughput Roxie clusters\", \"username\": \"anthony.fishbeck\", \"post_text\": \"Correct.\", \"post_time\": \"2013-09-17 21:06:42\" },\n\t{ \"post_id\": 4603, \"topic_id\": 916, \"forum_id\": 15, \"post_subject\": \"Re: High throughput Roxie clusters\", \"username\": \"DSC\", \"post_text\": \"To summarize to make sure I understand: The URLs for both SOAP and JSON requests are identical. Roxie examines the HTTP content type to determine how to both parse the request and generate the response. It Just Works.\\n\\nIs that right?\", \"post_time\": \"2013-09-17 21:04:11\" },\n\t{ \"post_id\": 4602, \"topic_id\": 916, \"forum_id\": 15, \"post_subject\": \"Re: High throughput Roxie clusters\", \"username\": \"anthony.fishbeck\", \"post_text\": \"Sending JSON requests directly to roxie is supported in 4.0. The trick is not in the url used but rather in setting the HTTP content type to "application/json" and formatting the JSON request correctly.\\n\\nEven though you're not planning on using WsEcl in production, it can still be a great\\ndevelopment and QA tool, showing you the format of the request you should send to roxie and allowing you to test your own requests, etc.\\n\\nNavigate to the WsEcl form for your query. Fill in any values you would like populated and change the drop down box below the form to read "JSON Test" and click submit.\\n\\nThe Request box should be populated with a well formatted JSON request. Click submit to see what the roxie response will look like.\\n\\nFYI In the latest release sending JSON and SOAP requests to roxie through WsEcl has also been optimized, minimizing the overhead. It no longer gets workunit resources from DALI for pre and post processing. So if you get benefit from using WsEcl you can reconsider using it for SOAP and JSON requests.\", \"post_time\": \"2013-09-17 20:59:36\" },\n\t{ \"post_id\": 4599, \"topic_id\": 916, \"forum_id\": 15, \"post_subject\": \"Re: High throughput Roxie clusters\", \"username\": \"DSC\", \"post_text\": \"I think that v4.0 does support it. The RDDERef-4.0.0-9 manual talks about it (page 37) and the release notes cite it ("HPCC-8953 Add roxie support for JSON/HTTP queries").\\n\\nI've used the direct SOAP interface without a problem. I'm hoping to do the same thing with JSON.\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2013-09-17 19:49:39\" },\n\t{ \"post_id\": 4598, \"topic_id\": 916, \"forum_id\": 15, \"post_subject\": \"Re: High throughput Roxie clusters\", \"username\": \"sbagaria\", \"post_text\": \"I have not tried with v4.0. But for previous versions, just send your SOAP query on port 9876 without any path.\\n\\nAFAIK, there is no JSON support when querying Roxie directly. ESP provides the additional JSON layer to Roxie. I don't think this has changed in v4.0.\", \"post_time\": \"2013-09-17 19:44:02\" },\n\t{ \"post_id\": 4596, \"topic_id\": 916, \"forum_id\": 15, \"post_subject\": \"Re: High throughput Roxie clusters\", \"username\": \"DSC\", \"post_text\": \"[quote="sbagaria":1bqwxq0t]Resolved with a hint from Richard Chapman. wsECL on port 8002 is to be used strictly for testing. For high performance systems, we should use Roxie directly on port 9876.\\n\\nFor example, if previously the SOAP query URL looked like this\\nhttp://localhost:8002/wsECL/soap/query/roxie/queryname\\nit should now look like this\\nhttp://localhost:9876/\\n\\nThe actual SOAP query and response will remain the same.\\n\\nWhat does a URL look like for querying a Roxie server directly with JSON, with v4.0 of HPCC? The RDDERef documentation leaves out that important factoid.\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2013-09-17 19:41:03\" },\n\t{ \"post_id\": 4142, \"topic_id\": 916, \"forum_id\": 15, \"post_subject\": \"Re: High throughput Roxie clusters\", \"username\": \"HPCC Staff\", \"post_text\": \"Thank you for sharing the post and how it was resolved.\", \"post_time\": \"2013-05-22 14:18:51\" },\n\t{ \"post_id\": 4121, \"topic_id\": 916, \"forum_id\": 15, \"post_subject\": \"Re: High throughput Roxie clusters\", \"username\": \"sbagaria\", \"post_text\": \"Related post - http://hpccsystems.com/bb/viewtopic.php?f=14&t=486\", \"post_time\": \"2013-05-20 13:30:00\" },\n\t{ \"post_id\": 4119, \"topic_id\": 916, \"forum_id\": 15, \"post_subject\": \"Re: High throughput Roxie clusters\", \"username\": \"sbagaria\", \"post_text\": \"This also means that you don't need multiple ESP instances. You can query any of the Roxie servers on your cluster directly. This can be put behind an external load balancer if you wish.\", \"post_time\": \"2013-05-20 13:25:46\" },\n\t{ \"post_id\": 4118, \"topic_id\": 916, \"forum_id\": 15, \"post_subject\": \"Re: High throughput Roxie clusters\", \"username\": \"sbagaria\", \"post_text\": \"Resolved with a hint from Richard Chapman. wsECL on port 8002 is to be used strictly for testing. For high performance systems, we should use Roxie directly on port 9876.\\n\\nFor example, if previously the SOAP query URL looked like this\\nhttp://localhost:8002/wsECL/soap/query/roxie/queryname\\nit should now look like this\\nhttp://localhost:9876/\\n\\nThe actual SOAP query and response will remain the same.\", \"post_time\": \"2013-05-20 13:12:50\" },\n\t{ \"post_id\": 4116, \"topic_id\": 916, \"forum_id\": 15, \"post_subject\": \"High throughput Roxie clusters\", \"username\": \"sbagaria\", \"post_text\": \"I am setting up a high throughput Roxie cluster but it seems like the daserver process on the master node is the bottleneck. \\n\\nI have turned up the logging on my Roxie servers and they now record the resource stats for each query. I can see from the Roxie logs that the query needed only 10 ms to execute. However, the end to end turnaround time for the query (executed on localhost) was 60 ms. On some more investigation, I found that the extra time was for the post processing by daserver and esp processes. I was successful in setting up multiple instances for esp and load balancing externally between those ESP instances, but I am not sure if that helped. Sometimes my ESP servers will just stop communicating new requests to Roxie until Dali is restarted. In the ESP logs, I see the following lines:\\n\\n00000302 2013-05-17 20:52:32 20428 23613 "WARNING: Excessive concurrent Dali SDS client transactions. Transaction delayed."\\n
\\nfollowed by \\n\\n00000680 2013-05-17 20:57:31 20428 23614 "Sending SOAP Fault(1315): <?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/" xmlns:wsse="http://schemas.xmlsoap.org/ws/2002/04/secext"><soap:Body><soap:Fault><faultcode>1</faultcode><faultstring>[ 1: SDS: Lock timeout\\nSDS Reply Error : SDS: Lock timeout\\nFailed to establish lock to QuerySets/\\nExisting lock status: Locks on path: /QuerySets/\\nEndpoint |SessionId |ConnectionId |mode |time(duration)]\\n\\n10.38.251.1:7234 |630000042a |6300009cf8 |242 |2013-05-18T00:52:33(298986 ms)\\n] </faultstring><faultactor>Esp</faultactor><detail><Exceptions xmlns="urn:hpccsystems:ecl:soap" xsi:schemaLocation="urn:hpccsystems:ecl:soap http://10.38.251.1:8002/WsEcl/soap?xsd"><Source>Esp</Source><Exception><Code>1</Code><Audience>user</Audience><Message>SDS: Lock timeout\\nSDS Reply Error : SDS: Lock timeout\\nFailed to establish lock to QuerySets/\\nExisting lock status: Locks on path: /QuerySets/\\nEndpoint |SessionId |ConnectionId |mode |time(duration)]\\n\\n10.38.251.1:7234 |630000042a |6300009cf8 |242 |2013-05-18T00:52:33(298986 ms)\\n</Message></Exception></Exceptions></detail></soap:Fault></soap:Body></soap:Envelope>"\\n
\\n\\nWhat are the best practices around setting up a high throughput Roxie cluster? It seems like the daserver process is a botleneck at the moment.\\n\\nMy HPCC version is 3.10.8-3.\", \"post_time\": \"2013-05-18 01:04:56\" },\n\t{ \"post_id\": 4459, \"topic_id\": 987, \"forum_id\": 15, \"post_subject\": \"Re: cluster redundancy\", \"username\": \"jonburger\", \"post_text\": \"I'm not sure I totally understand your layout, but I will post best practices as far as redundancy is concerned. I will go service by service:\\n\\nDali - this has to be run in an active/passive configuration. My suggestion is to use standard clustering with a quorum and a takover VIP. Move the VIP and data directory over to the other node and restart the dali service upon failure. Think standard clustering setup (shared array) or potentially DRBD with pacemaker/heartbeat. You need a lot of RAM for dali.\\n\\nDFUServer - this can be run active/active/active. There is no need for a load balancer or VIP. Each instance routinely queries the dali for "spray/despray" workunits. Should one fail, the other(s) will continue to pull new workunits. \\n\\nECLccServer - this can be run active/active/active. No need for a load balancer or VIP for this either. Works just like DFUServer.\\n\\nESP/ECLWatch/WsECL - For redundancy, these *will* need to be behind a VIP of some sort. For an active/active design you will need a load balancer. For active/passive you can use pacemaker/heartbeat. If you run active/active you will want to use sticky bits and keep a single client on a single server for the life of their session for the ECLWatch service (port 8010). The others (8002,8003) you are not required to use sticky bits.\\n\\nECLAgent - These can be run active/active/active. No need for a load balancer or VIP. Works like DFUServer.\\n\\nFtSlave - One per node - so nothing needed here.\\n\\nSasha - Sasha will have to be set up similar to dali. See above. We are working on having active/active sashas.\\n\\nDafilesrv - One per node - nothing needed.\\n\\nECLScheduler - No need for load balancer, runs active/active works like DFUServer.\\n\\nThormaster - You want this set up like dali in an active/passive configuration. No load balancer. Failover VIP.\\n\\nDropzone - this is just a fileserver that runs the dafilesrv process. So configure it in the same fashion as you would any active/passive fileserver. Like the dali.\\n\\n\\nMy only other thoughts are you need to make sure you give significant resources to key components. Dali is very RAM intensive, eclagent and eclserver are very processor dependent. Thor nodes need a minimum of 4G RAM per.\", \"post_time\": \"2013-08-14 18:33:34\" },\n\t{ \"post_id\": 4431, \"topic_id\": 987, \"forum_id\": 15, \"post_subject\": \"Re: cluster redundancy\", \"username\": \"JSJ\", \"post_text\": \"Thanks... I have updated my original post, and I will continue to do so until I have a working config.\", \"post_time\": \"2013-08-05 16:44:21\" },\n\t{ \"post_id\": 4424, \"topic_id\": 987, \"forum_id\": 15, \"post_subject\": \"Re: cluster redundancy\", \"username\": \"sort\", \"post_text\": \"We will be looking into writing the document regarding "high-available/auto-failover setups?". \\n\\n\\nASSUMPTIONS:\\na. mydali - Sevice active/active on 2 nodes with a load balancer (node 1 and node 2) \\nThis is not valid. There is 1 dali per environment (same for sasha)\", \"post_time\": \"2013-08-02 16:57:12\" },\n\t{ \"post_id\": 4417, \"topic_id\": 987, \"forum_id\": 15, \"post_subject\": \"Re: cluster redundancy\", \"username\": \"DSC\", \"post_text\": \"I'm interested in this as well. Any documentation on high-available/auto-failover setups?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2013-08-01 19:50:39\" },\n\t{ \"post_id\": 4397, \"topic_id\": 987, \"forum_id\": 15, \"post_subject\": \"cluster redundancy\", \"username\": \"JSJ\", \"post_text\": \"I am working out the high availability capabilities of HPCC. The only comprehensive post I could find was from 2011/06 I've setup the following test configuration. Could anyone verify my assumptions?\\n\\n\\nSETUP:\\n==Node===Type===Components\\n================================================================\\n1\\n
\\nVM\\n
\\nmydropzone\\nmydali \\nmydfuserver \\nmyeclccserver \\nmyesp \\nmyeclagent \\nmyftslave \\nmysasha \\nmydafilesrv \\nmythor \\nmyeclscheduler\\n
\\n2\\n
\\nPhy\\n
\\nmydfuserver \\nmyeclccserver \\nmyesp \\nmyeclagent \\nmyftslave \\nmysasha \\nmydafilesrv \\nmythor \\nmyeclscheduler\\n
\\n3\\n
\\nPhy\\n
\\nmyroxie\\nmyftslave\\nmydafilesrv\\nmythor\\n
\\n4\\n
\\nPhy\\n
\\nmyroxie\\nmyftslave\\nmydafilesrv\\nmythor\\n
\\n5\\n
\\nPhy\\n
\\nmyroxie\\nmyftslave\\nmydafilesrv\\nmythor\\n
\\n6\\n
\\nPhy\\n
\\nmyroxie\\nmyftslave\\nmydafilesrv\\nmythor\\n
\\n7\\n
\\n================================================================\\n\\nASSUMPTIONS:\\nPhy\\n
\\nmyroxie\\nmyftslave\\nmydafilesrv\\nmythor\\n
\\na.\\tmydali\\n
\\n\\t\\ni.\\tMydali runs on a VM. The virtual hard drive Is replicated to separated storage accessible from the virtualization cluster. Recovery via booting the standby VM assigned to the replicated virtual drive \\n
b.\\tmydfuserver\\n
\\n\\t\\ni.\\tSevice active/active on 2 nodes with a load balancer (node 1 and node 2) \\n
c.\\tMyeclccserver\\n
\\n\\t\\ni.\\tSevice active/active on 2 nodes with a load balancer (node 1 and node 2) \\n
d.\\tMyesp\\n
\\n\\t\\ni.\\t Sevice active/active on 2 nodes with a load balancer (node 1 and node 2) \\n
e.\\tMyeclagent\\n
\\n\\t\\ni.\\tSevice active/active on 2 nodes with a load balancer (node 1 and node 2)\\n
f.\\tMyftslave\\n
\\n\\t\\ni.\\tNo redundancy needed for failed node \\n
g.\\tMysasha\\n
\\n\\t\\ni.\\tSevice active/active on 2 nodes with a load balancer (node 1 and node 2) \\n
h.\\tMyeclscheduler\\n
\\n\\t\\ni.\\tSevice active/active on 2 nodes with a load balancer (node 1 and node 2) \\n
i.\\tMydafilesrv\\n
\\n\\t\\ni.\\tNo redundancy needed for failed node \\n
j.\\tMythor\\n
\\n\\t\\ni.\\tThor master runs on a VM. The virtual hard drive Is replicated to separated storage accessible from the virtualization cluster. Recovery via booting the standby VM assigned to the replicated virtual drive \\nii.\\tThor slaves mirror data between themselves \\niii.\\tnode 2 is a thor slave swap node \\n
k.\\tMyroxie\\n
\\n\\t\\ni.\\tRoxie is configured with “full redundancy” in order to mirror data between nodes. \\n
l.\\tMydropzone\\n
\", \"post_time\": \"2013-07-31 14:56:53\" },\n\t{ \"post_id\": 4472, \"topic_id\": 993, \"forum_id\": 15, \"post_subject\": \"Re: Thor swap failback\", \"username\": \"richardkchapman\", \"post_text\": \"Shouldn't the backupnode process have replicated the files anyway ?\\n\\nHow/when do we run backupnode on these systems - is it something the user has to do manually (or set up via cron), or is it automatic? Jake?\", \"post_time\": \"2013-08-19 10:35:55\" },\n\t{ \"post_id\": 4443, \"topic_id\": 993, \"forum_id\": 15, \"post_subject\": \"Re: Thor swap failback\", \"username\": \"JSJ\", \"post_text\": \"Yes, replication is enabled, but I think I found the problem. It looks like replication was disabled in the DFU jobs that placed the files into the thor cluster. I will have the DFU jobs run again with replication enabled and try again.\", \"post_time\": \"2013-08-08 04:01:06\" },\n\t{ \"post_id\": 4441, \"topic_id\": 993, \"forum_id\": 15, \"post_subject\": \"Re: Thor swap failback\", \"username\": \"jsmith\", \"post_text\": \"Is/was replication turned on in the Thor Cluster?\\n(replicateOutputs and replicateAsync would need setting to true in configmgr)\\n\\nThe error ('No physical file part for logical file'), should list the location it looked for the part.. and if replication is on, it should state the path of the primary part and the path to replicate part on the buddy node..\", \"post_time\": \"2013-08-07 15:00:17\" },\n\t{ \"post_id\": 4437, \"topic_id\": 993, \"forum_id\": 15, \"post_subject\": \"Re: Thor swap failback\", \"username\": \"JSJ\", \"post_text\": \"-I shut down thor and forced the swap manually from ESP.\\n-Yes, the system was working on the spare.\\n-I tried to configure the original server as a spare, but that did not seem to work. I received an error like the one below when I submitted a job that accessed the existing logical files. Do I need to force the cluster to copy the mirrored data back to this node somehow? (This also raises questions about why the mirror is not working)\\n\\n10004: System error: 10004: Graph[1], SLAVE xxx.xxx.xxx.xxx:20100: Graph[1], csvread[2]: No physical file part for logical file\", \"post_time\": \"2013-08-07 01:09:59\" },\n\t{ \"post_id\": 4435, \"topic_id\": 993, \"forum_id\": 15, \"post_subject\": \"Re: Thor swap failback\", \"username\": \"clo\", \"post_text\": \"Hi,\\n\\nWe need to clarify a couple things before I can offer a proper response. \\n\\nHow did you run the swap node?\\nDoes your system work now that you've swapped in the standby node for the failed node?\\nIf it's working, then you can just add the repaired node back in as a standby node.\", \"post_time\": \"2013-08-06 18:01:23\" },\n\t{ \"post_id\": 4432, \"topic_id\": 993, \"forum_id\": 15, \"post_subject\": \"Thor swap failback\", \"username\": \"JSJ\", \"post_text\": \"I have successfully swapped a failed Thor node for my backup node. Now the hard drive is repaired in the failed Thor node. How do I put it back in service?\", \"post_time\": \"2013-08-05 17:14:20\" },\n\t{ \"post_id\": 7692, \"topic_id\": 1208, \"forum_id\": 15, \"post_subject\": \"Re: Performance with Dfuserver\", \"username\": \"jwilt\", \"post_text\": \"Is the fix referred to above the "quotedTerminator" option in dfuplus?\\n\\nThe dfuplus usage statement shows:\\n\\nspray options:\\n ... \\n options for csv/delimited:\\n ...\\n quotedTerminator=1|0 -- optional, default is 1 (quoted terminators in rows)\\n\\nThanks again.\", \"post_time\": \"2015-05-29 02:36:10\" },\n\t{ \"post_id\": 7689, \"topic_id\": 1208, \"forum_id\": 15, \"post_subject\": \"Re: Performance with Dfuserver\", \"username\": \"alex\", \"post_text\": \"Bumping this thread...\\n\\nI see in the source code for the DFU server that there is something called "QuickPartitioner", which seems like the implementation asked about in the OP. How do I take advantage of this? Is there an argument to STD.File.SprayVariable or something? A flag to dfuplus?\", \"post_time\": \"2015-05-28 20:51:22\" },\n\t{ \"post_id\": 6351, \"topic_id\": 1208, \"forum_id\": 15, \"post_subject\": \"Re: Performance with Dfuserver\", \"username\": \"AttilaV\", \"post_text\": \"Hi,\\n\\nIn early May 2014 it is implemented in HPCC 5.0. \\n\\nAttila\", \"post_time\": \"2014-09-19 18:02:02\" },\n\t{ \"post_id\": 5227, \"topic_id\": 1208, \"forum_id\": 15, \"post_subject\": \"Performance with Dfuserver\", \"username\": \"jwilt\", \"post_text\": \"Hi - \\nIt seems like dfuserver takes a while at the very beginning to scan large files to identify the offsets for each node. Of course, this involves I/O and network time and can naturally take a while.\\n\\nI'm wondering if searching for terminators (and separators) in quoted strings forces dfuserver to do a full scan on the file? \\nIf so... does dfuserver have an option to indicate "quoted terminators" don't exist in the in-coming file - which would allow dfuserver to do a more streamlined generation of offsets (for a 10-node cluster, seek to 10%, find the next terminator, have offset... repeat...).\\n\\nThanks.\", \"post_time\": \"2014-02-18 16:57:07\" },\n\t{ \"post_id\": 5537, \"topic_id\": 1274, \"forum_id\": 15, \"post_subject\": \"Re: HPCC Cluster Sizing\", \"username\": \"bforeman\", \"post_text\": \"Hi Lokesh,\\n\\nThe HPCC Community Wiki has some interesting topics regarding this:\\n\\nhttps://wiki.hpccsystems.com/display/hpcc/Sample+Sizing+Guide+for+HPCC+-+Heavy+processing+on+low+data+volume\\n\\nhttps://wiki.hpccsystems.com/display/hpcc/Sample+Sizing+Guide+for+HPCC+-+High+Data+volume+-+Typical+scenario\\n\\nhttps://wiki.hpccsystems.com/display/hpcc/How+to+expand+a+HPCC+Cluster\\n\\nHope this helps!\\n\\nBob\", \"post_time\": \"2014-04-23 13:40:51\" },\n\t{ \"post_id\": 5504, \"topic_id\": 1274, \"forum_id\": 15, \"post_subject\": \"HPCC Cluster Sizing\", \"username\": \"lokesh\", \"post_text\": \"Hi,\\n\\nI am trying to gauge the estimates for cluster size and I have some queries regarding the same:\\n\\n1. How many slaves per node should be configured ? How is it limited by use of number of cores and memory.\\n\\n2. Is there any rule of thumb/guideline to check what kind of processing power would be required for particular task - specifically my most expensive tasks would be ARM/Logistic Regression (both from ML library).\", \"post_time\": \"2014-04-11 06:50:52\" },\n\t{ \"post_id\": 5866, \"topic_id\": 1346, \"forum_id\": 15, \"post_subject\": \"Re: Cluster not updating with new environment.xml\", \"username\": \"fmorstatter\", \"post_text\": \"Thank you! This did the trick. The cluster is up and running.\", \"post_time\": \"2014-06-10 21:51:25\" },\n\t{ \"post_id\": 5864, \"topic_id\": 1346, \"forum_id\": 15, \"post_subject\": \"Re: Cluster not updating with new environment.xml\", \"username\": \"jsmith\", \"post_text\": \"It's a HPCC environment.xml setting, which you can configure using the config manager.\", \"post_time\": \"2014-06-10 17:02:26\" },\n\t{ \"post_id\": 5863, \"topic_id\": 1346, \"forum_id\": 15, \"post_subject\": \"Re: Cluster not updating with new environment.xml\", \"username\": \"fmorstatter\", \"post_text\": \"Thank you for the reply! When you say "set the property" do you mean that it is an environment variable in the system, or do I set it using the configuration manager?\", \"post_time\": \"2014-06-10 16:59:39\" },\n\t{ \"post_id\": 5862, \"topic_id\": 1346, \"forum_id\": 15, \"post_subject\": \"Re: Cluster not updating with new environment.xml\", \"username\": \"jsmith\", \"post_text\": \"Right, the handling of that error looks like it could certainly be improved (I've opened a new JIRA issue [HPCC-11651] to track)\\n\\nThor automatically configures the amount of memory the slaves use by examining the amount of physical memory and dedicating 75% of it for itself.\\nIt assumes that the master/slaves are all homogeneous.\\n\\nLooks like your master has 12GB in this case (Thor decided to use ~9GB of it), which seems to be more than yours slaves have.\\n\\nYou can manually configure how much nodes use by setting 'globalMemorySize' in the environment. You probably want to set it to 75% of the physical memory of your slave).\\nThe master will use the same property if set, or you can override by defining 'masterMemorySize'\\n\\nHope that helps.\", \"post_time\": \"2014-06-10 16:44:03\" },\n\t{ \"post_id\": 5860, \"topic_id\": 1346, \"forum_id\": 15, \"post_subject\": \"Re: Cluster not updating with new environment.xml\", \"username\": \"fmorstatter\", \"post_text\": \"You are right! Here is the error in the log:\\n\\n0000000A 2014-06-10 09:28:33.871 23215 23215 "Disk space: /var/lib/HPCCSystems/hpcc-data/thor = 871987 MB, /var/lib/HPCCSystems/hpcc-mirror/thor = 871987 MB, /var/lib/HPCCSystems/mythor/temp = 871987 MB"\\n0000000B 2014-06-10 09:28:33.871 23215 23215 "ThorSlave Version LCR - 4.1 started"\\n0000000C 2014-06-10 09:28:33.871 23215 23215 "Slave 128.2.219.77:20100 - temporary dir set to : /var/lib/HPCCSystems/mythor/temp/"\\n0000000D 2014-06-10 09:28:33.871 23215 23215 "Using querySo directory: /var/lib/HPCCSystems/queries/mythor_20100"\\n0000000E 2014-06-10 09:28:33.871 23215 23215 "WARNING: Slave has less memory than master node"\\n0000000F 2014-06-10 09:28:33.871 23215 23215 "RoxieMemMgr: Setting memory limit to 9445572608 bytes (9008 pages)"\\n00000010 2014-06-10 09:28:33.872 23215 23215 "RoxieMemMgr: posix_memalign (alignment=1048576, size=9462349824) failed - ret=12 (ENOMEM There was insufficient memory to fulfill the allocation request.)"\\n00000011 2014-06-10 09:28:33.872 23215 23215 "ERROR: 1303: /var/lib/jenkins/workspace/CE-Candidate-with-plugins-4.2.4-3/CE/ubuntu-12.04-amd64/HPCC-Platform/thorlcr/slave/thslavemain.cpp(417) : ThorSlave : RoxieMemMgr: Unable to create heap"\\n00000012 2014-06-10 09:28:33.872 23215 23215 "temp directory cleared"\", \"post_time\": \"2014-06-10 13:30:16\" },\n\t{ \"post_id\": 5858, \"topic_id\": 1346, \"forum_id\": 15, \"post_subject\": \"Re: Cluster not updating with new environment.xml\", \"username\": \"jsmith\", \"post_text\": \"i.\\tMydropzone runs on a VM. The virtual hard drive Is replicated to separated storage accessible from the virtualization cluster. Recovery via booting the standby VM assigned to the replicated virtual drive \\n
0000001C 2014-06-09 14:51:38.188 8000 8000 "Listening for graph"\\n0000001D 2014-06-09 14:51:38.191 8000 8013 "WARNING: /var/lib/jenkins/workspace/CE-Candidate-with-plugins-4.2.4-3/CE/ubuntu-12.04-amd64/HPCC-Platform/system/mp/mpcomm.cpp(2225) : CInterCommunicator: ignoring closed endpoint: 128.2.219.77:20100"
\\n\\nI think that suggests that the slave started, registered and then immediately exited/crashed.\\n\\nCan you attach some slave logging around this time frame?\", \"post_time\": \"2014-06-10 08:00:04\" },\n\t{ \"post_id\": 5857, \"topic_id\": 1346, \"forum_id\": 15, \"post_subject\": \"Re: Cluster not updating with new environment.xml\", \"username\": \"fmorstatter\", \"post_text\": \"[quote="clo":2drsta27]Hi, I was wondering what version of the platform you're currently running as well.\\n\\nSorry, I must have missed this! I am running "community_4.2.4-3".\", \"post_time\": \"2014-06-09 23:55:31\" },\n\t{ \"post_id\": 5856, \"topic_id\": 1346, \"forum_id\": 15, \"post_subject\": \"Re: Cluster not updating with new environment.xml\", \"username\": \"fmorstatter\", \"post_text\": \"Just another piece of information regarding this problem:\\n\\nIt seems that it switches from "ThorCluster - thor" to "Cluster not attached" for several hours. Then, after hours it decides to permanently stay in the "ThorCluster - thor" position. The problem here is that, even though it looks like everything is in good shape, when I submit a job it simply says "RUNNING" and then goes to the "FAILED" state after about 30 minutes. When it is in the "RUNNING" state, nothing is going on in any of the servers (none are using any CPU).\", \"post_time\": \"2014-06-09 23:52:28\" },\n\t{ \"post_id\": 5855, \"topic_id\": 1346, \"forum_id\": 15, \"post_subject\": \"Re: Cluster not updating with new environment.xml\", \"username\": \"fmorstatter\", \"post_text\": \"Find the last few lines from my log file below. It is over the 256KiB limit, so I cannot upload. You are right, it looks like it is not seeing the slave correctly. Thanks for any insight you can shed onto this.\\n\\n\\n------------------------------------------------------------------------\\n\\n0000000E 2014-06-09 14:51:38.153 8000 8000 "ThorMaster version 4.1, Started on 128.2.218.180:20000"\\n0000000D 2014-06-09 14:51:38.153 8000 8012 "Started watchdog"\\n0000000F 2014-06-09 14:51:38.153 8000 8000 "Thor name = mythor, queue = thor.thor, nodeGroup = mythor"\\n00000010 2014-06-09 14:51:38.153 8000 8000 "Creating sentinel file thor.sentinel for rerun from script"\\n00000011 2014-06-09 14:51:38.153 8000 8000 "Waiting for 1 slaves to register"\\n00000012 2014-06-09 14:51:38.153 8000 8000 "Verifying connection to slave 1"\\n00000013 2014-06-09 14:51:38.187 8000 8000 "verified connection with 128.2.219.77:20100"\\n00000014 2014-06-09 14:51:38.187 8000 8000 "Slaves connected, initializing.."\\n00000015 2014-06-09 14:51:38.187 8000 8000 "Initialization sent to slave group"\\n00000016 2014-06-09 14:51:38.188 8000 8000 "Registration confirmation from 128.2.219.77:20100"\\n00000017 2014-06-09 14:51:38.188 8000 8000 "Slave 1 (128.2.219.77:20100) registered"\\n00000018 2014-06-09 14:51:38.188 8000 8000 "Slaves initialized"\\n00000019 2014-06-09 14:51:38.188 8000 8000 "verifying mp connection to rest of cluster"\\n0000001A 2014-06-09 14:51:38.188 8000 8000 "verified mp connection to rest of cluster"\\n0000001B 2014-06-09 14:51:38.188 8000 8000 ",Progress,Thor,Startup,mythor,mythor,thor.thor,//128.2.218.180/var/log/HPCCSystems/mythor/thormaster.2014_06_09.log"\\n0000001C 2014-06-09 14:51:38.188 8000 8000 "Listening for graph"\\n0000001D 2014-06-09 14:51:38.191 8000 8013 "WARNING: /var/lib/jenkins/workspace/CE-Candidate-with-plugins-4.2.4-3/CE/ubuntu-12.04-amd64/HPCC-Platform/system/mp/mpcomm.cpp(2225) : CInterCommunicator: ignoring closed endpoint: 128.2.219.77:20100"\\n0000001E 2014-06-09 14:51:38.191 8000 8008 "WARNING: /var/lib/jenkins/workspace/CE-Candidate-with-plugins-4.2.4-3/CE/ubuntu-12.04-amd64/HPCC-Platform/system/mp/mpcomm.cpp(2225) : CInterCommunicator: ignoring closed endpoint: 128.2.219.77:20100"\\n0000001F 2014-06-09 14:51:38.191 8000 8012 "ERROR: 10056: /var/lib/jenkins/workspace/CE-Candidate-with-plugins-4.2.4-3/CE/ubuntu-12.04-amd64/HPCC-Platform/thorlcr/master/thgraphmanager.cpp(787) : abortThor : Watchdog has lost connectivity with Thor slave: 128.2.219.77:20100 (Process terminated or node down?)"\\n00000020 2014-06-09 14:51:38.191 8000 8012 "abortThor called"\\n00000021 2014-06-09 14:51:38.191 8000 8012 "Stopping jobManager"\\n00000022 2014-06-09 14:51:38.191 8000 8012 "aborting any current active job"\\n00000023 2014-06-09 14:51:38.191 8000 8012 "Watchdog : Unknown Machine! [0.0.0.0]"\\n00000024 2014-06-09 14:51:38.193 8000 8000 ",Progress,Thor,Terminate,mythor,mythor,thor.thor"\\n00000025 2014-06-09 14:51:38.193 8000 8000 "ThorMaster terminated OK"\\n00000026 2014-06-09 14:51:39.194 8000 8000 "priority set id=140199223068416 policy=0 pri=0 PID=8000"\\n00000027 2014-06-09 14:51:39.194 8000 8000 "Stopping watchdog"\\n00000028 2014-06-09 14:51:39.194 8000 8000 "Stopped watchdog"\\n00000029 2014-06-09 14:51:39.205 8000 8000 "Thor closing down 6"\\n0000002A 2014-06-09 14:51:39.205 8000 8000 "Thor closing down 5"\\n0000002B 2014-06-09 14:51:39.205 8000 8000 "Thor closing down 4"\\n0000002C 2014-06-09 14:51:39.205 8000 8000 "Thor closing down 3"\\n0000002D 2014-06-09 14:51:39.205 8000 8000 "Thor closing down 2"\\n0000002E 2014-06-09 14:51:39.216 8000 8000 "Thor closing down 1"\\n00000002 2014-06-09 14:51:40.598 8205 8205 "Opened log file //128.2.218.180/var/log/HPCCSystems/mythor/thormaster.2014_06_09.log"\\n00000003 2014-06-09 14:51:40.598 8205 8205 "Build community_4.2.4-3"\\n00000004 2014-06-09 14:51:40.598 8205 8205 "calling initClientProcess Port 20000"\\n00000005 2014-06-09 14:51:40.599 8205 8205 "Found file 'thorgroup', using to form thor group"\\n00000006 2014-06-09 14:51:40.599 8205 8205 "Checking cluster replicate nodes"\\n00000007 2014-06-09 14:51:40.603 8205 8205 "Cluster replicate nodes check completed in 4ms"\\n00000008 2014-06-09 14:51:40.604 8205 8205 "Global memory size = 9008 MB"\\n00000009 2014-06-09 14:51:40.604 8205 8205 "RoxieMemMgr: Setting memory limit to 9445572608 bytes (9008 pages)"\\n0000000A 2014-06-09 14:51:40.604 8205 8205 "RoxieMemMgr: 9024 Pages successfully allocated for the pool - memsize=9462349824 base=0x7f834bf00000 alignment=1048576 bitmapSize=282"\\n0000000B 2014-06-09 14:51:40.606 8205 8205 "Disk space: /var/lib/HPCCSystems/hpcc-data/thor = 1403102 MB, /var/lib/HPCCSystems/hpcc-mirror/thor = 0 MB, /var/lib/HPCCSystems/mythor/temp = 1403102 MB"\\n0000000C 2014-06-09 14:51:40.610 8205 8205 "Starting watchdog"\\n0000000E 2014-06-09 14:51:40.610 8205 8205 "ThorMaster version 4.1, Started on 128.2.218.180:20000"\\n0000000D 2014-06-09 14:51:40.610 8205 8217 "Started watchdog"\\n0000000F 2014-06-09 14:51:40.610 8205 8205 "Thor name = mythor, queue = thor.thor, nodeGroup = mythor"\\n00000010 2014-06-09 14:51:40.610 8205 8205 "Creating sentinel file thor.sentinel for rerun from script"\\n00000011 2014-06-09 14:51:40.611 8205 8205 "Waiting for 1 slaves to register"\\n00000012 2014-06-09 14:51:40.611 8205 8205 "Verifying connection to slave 1"\\n00000013 2014-06-09 14:51:40.627 8205 8205 "verified connection with 128.2.219.77:20100"\\n00000014 2014-06-09 14:51:40.627 8205 8205 "Slaves connected, initializing.."\\n00000015 2014-06-09 14:51:40.628 8205 8205 "Initialization sent to slave group"\\n00000016 2014-06-09 14:51:40.628 8205 8205 "Registration confirmation from 128.2.219.77:20100"\\n00000017 2014-06-09 14:51:40.628 8205 8205 "Slave 1 (128.2.219.77:20100) registered"\\n00000018 2014-06-09 14:51:40.628 8205 8205 "Slaves initialized"\\n00000019 2014-06-09 14:51:40.628 8205 8205 "verifying mp connection to rest of cluster"\\n0000001A 2014-06-09 14:51:40.628 8205 8205 "verified mp connection to rest of cluster"\\n0000001B 2014-06-09 14:51:40.628 8205 8205 ",Progress,Thor,Startup,mythor,mythor,thor.thor,//128.2.218.180/var/log/HPCCSystems/mythor/thormaster.2014_06_09.log"\\n0000001C 2014-06-09 14:51:40.629 8205 8205 "Listening for graph"\\n0000001D 2014-06-09 14:51:40.631 8205 8213 "WARNING: /var/lib/jenkins/workspace/CE-Candidate-with-plugins-4.2.4-3/CE/ubuntu-12.04-amd64/HPCC-Platform/system/mp/mpcomm.cpp(2225) : CInterCommunicator: ignoring closed endpoint: 128.2.219.77:20100"\\n0000001E 2014-06-09 14:51:40.631 8205 8217 "ERROR: 10056: /var/lib/jenkins/workspace/CE-Candidate-with-plugins-4.2.4-3/CE/ubuntu-12.04-amd64/HPCC-Platform/thorlcr/master/thgraphmanager.cpp(787) : abortThor : Watchdog has lost connectivity with Thor slave: 128.2.219.77:20100 (Process terminated or node down?)"\\n0000001F 2014-06-09 14:51:40.631 8205 8217 "abortThor called"\\n00000020 2014-06-09 14:51:40.631 8205 8218 "WARNING: /var/lib/jenkins/workspace/CE-Candidate-with-plugins-4.2.4-3/CE/ubuntu-12.04-amd64/HPCC-Platform/system/mp/mpcomm.cpp(2225) : CInterCommunicator: ignoring closed endpoint: 128.2.219.77:20100"\\n00000021 2014-06-09 14:51:40.631 8205 8217 "Stopping jobManager"\\n00000022 2014-06-09 14:51:40.632 8205 8217 "aborting any current active job"\\n00000023 2014-06-09 14:51:40.632 8205 8217 "Watchdog : Unknown Machine! [0.0.0.0]"\\n00000024 2014-06-09 14:51:40.632 8205 8205 ",Progress,Thor,Terminate,mythor,mythor,thor.thor"\\n00000025 2014-06-09 14:51:40.632 8205 8205 "ThorMaster terminated OK"\\n00000026 2014-06-09 14:51:41.634 8205 8205 "priority set id=140211580438272 policy=0 pri=0 PID=8205"\\n00000027 2014-06-09 14:51:41.634 8205 8205 "Stopping watchdog"\\n00000028 2014-06-09 14:51:41.634 8205 8205 "Stopped watchdog"\\n00000029 2014-06-09 14:51:41.644 8205 8205 "Thor closing down 6"\\n0000002A 2014-06-09 14:51:41.644 8205 8205 "Thor closing down 5"\\n0000002B 2014-06-09 14:51:41.644 8205 8205 "Thor closing down 4"\\n0000002C 2014-06-09 14:51:41.644 8205 8205 "Thor closing down 3"\\n0000002D 2014-06-09 14:51:41.645 8205 8205 "Thor closing down 2"\\n0000002E 2014-06-09 14:51:41.655 8205 8205 "Thor closing down 1"\\n00000002 2014-06-09 14:51:43.046 8412 8412 "Opened log file //128.2.218.180/var/log/HPCCSystems/mythor/thormaster.2014_06_09.log"\\n00000003 2014-06-09 14:51:43.046 8412 8412 "Build community_4.2.4-3"\\n00000004 2014-06-09 14:51:43.046 8412 8412 "calling initClientProcess Port 20000"\\n00000005 2014-06-09 14:51:43.048 8412 8412 "Found file 'thorgroup', using to form thor group"\\n00000006 2014-06-09 14:51:43.048 8412 8412 "Checking cluster replicate nodes"\\n00000007 2014-06-09 14:51:43.049 8412 8412 "Cluster replicate nodes check completed in 1ms"\\n00000008 2014-06-09 14:51:43.050 8412 8412 "Global memory size = 9008 MB"\\n00000009 2014-06-09 14:51:43.050 8412 8412 "RoxieMemMgr: Setting memory limit to 9445572608 bytes (9008 pages)"\\n0000000A 2014-06-09 14:51:43.050 8412 8412 "RoxieMemMgr: 9024 Pages successfully allocated for the pool - memsize=9462349824 base=0x7fe563f00000 alignment=1048576 bitmapSize=282"\\n0000000B 2014-06-09 14:51:43.050 8412 8412 "Disk space: /var/lib/HPCCSystems/hpcc-data/thor = 1403101 MB, /var/lib/HPCCSystems/hpcc-mirror/thor = 0 MB, /var/lib/HPCCSystems/mythor/temp = 1403101 MB"\\n0000000C 2014-06-09 14:51:43.052 8412 8412 "Starting watchdog"\\n0000000E 2014-06-09 14:51:43.052 8412 8412 "ThorMaster version 4.1, Started on 128.2.218.180:20000"\\n0000000D 2014-06-09 14:51:43.052 8412 8424 "Started watchdog"\\n0000000F 2014-06-09 14:51:43.052 8412 8412 "Thor name = mythor, queue = thor.thor, nodeGroup = mythor"\\n00000010 2014-06-09 14:51:43.053 8412 8412 "Creating sentinel file thor.sentinel for rerun from script"\\n00000011 2014-06-09 14:51:43.053 8412 8412 "Waiting for 1 slaves to register"\\n00000012 2014-06-09 14:51:43.053 8412 8412 "Verifying connection to slave 1"\\n00000013 2014-06-09 14:51:43.086 8412 8412 "verified connection with 128.2.219.77:20100"\\n00000014 2014-06-09 14:51:43.086 8412 8412 "Slaves connected, initializing.."\\n00000015 2014-06-09 14:51:43.086 8412 8412 "Initialization sent to slave group"\\n00000016 2014-06-09 14:51:43.087 8412 8412 "Registration confirmation from 128.2.219.77:20100"\\n00000017 2014-06-09 14:51:43.087 8412 8412 "Slave 1 (128.2.219.77:20100) registered"\\n00000018 2014-06-09 14:51:43.087 8412 8412 "Slaves initialized"\\n00000019 2014-06-09 14:51:43.087 8412 8412 "verifying mp connection to rest of cluster"\\n0000001A 2014-06-09 14:51:43.087 8412 8412 "verified mp connection to rest of cluster"\\n0000001B 2014-06-09 14:51:43.087 8412 8412 ",Progress,Thor,Startup,mythor,mythor,thor.thor,//128.2.218.180/var/log/HPCCSystems/mythor/thormaster.2014_06_09.log"\\n0000001C 2014-06-09 14:51:43.087 8412 8412 "Listening for graph"\\n0000001D 2014-06-09 14:51:43.090 8412 8420 "WARNING: /var/lib/jenkins/workspace/CE-Candidate-with-plugins-4.2.4-3/CE/ubuntu-12.04-amd64/HPCC-Platform/system/mp/mpcomm.cpp(2225) : CInterCommunicator: ignoring closed endpoint: 128.2.219.77:20100"\\n0000001E 2014-06-09 14:51:43.090 8412 8425 "WARNING: /var/lib/jenkins/workspace/CE-Candidate-with-plugins-4.2.4-3/CE/ubuntu-12.04-amd64/HPCC-Platform/system/mp/mpcomm.cpp(2225) : CInterCommunicator: ignoring closed endpoint: 128.2.219.77:20100"\\n0000001F 2014-06-09 14:51:43.090 8412 8424 "ERROR: 10056: /var/lib/jenkins/workspace/CE-Candidate-with-plugins-4.2.4-3/CE/ubuntu-12.04-amd64/HPCC-Platform/thorlcr/master/thgraphmanager.cpp(787) : abortThor : Watchdog has lost connectivity with Thor slave: 128.2.219.77:20100 (Process terminated or node down?)"\\n00000020 2014-06-09 14:51:43.090 8412 8424 "abortThor called"\\n00000021 2014-06-09 14:51:43.090 8412 8424 "Stopping jobManager"\\n00000022 2014-06-09 14:51:43.090 8412 8424 "aborting any current active job"\\n00000023 2014-06-09 14:51:43.090 8412 8424 "Watchdog : Unknown Machine! [0.0.0.0]"\\n00000024 2014-06-09 14:51:43.091 8412 8412 ",Progress,Thor,Terminate,mythor,mythor,thor.thor"\\n00000025 2014-06-09 14:51:43.091 8412 8412 "ThorMaster terminated OK"\", \"post_time\": \"2014-06-09 18:54:19\" },\n\t{ \"post_id\": 5853, \"topic_id\": 1346, \"forum_id\": 15, \"post_subject\": \"Re: Cluster not updating with new environment.xml\", \"username\": \"jsmith\", \"post_text\": \""ThorCluster - thorCluster not attached"\\n\\nThat is probably an indication that thormaster is not running.\\nNormally thormaster (and most other components) will auto restart though, so whilst it wouldn't be surprising to see '... not attached' for a short period, if a component, e.g. Thor recycled, for it to be sustained would be.\\nFor example, if you deliberately kill the thormaster process, it is possible to briefly reproduce the "ThorCluster - thorCluster not attached", but in a few seconds it will be rerun.\\n\\nSo if it's consistently '.. not attached' for some time, it sounds like something is preventing the thormaster starting again, or it's alive and the process is defunct in some way.\\n\\nI suspect the current thormaster log, when the system is in this state, will shed some clues on what's going on.\\n\\nCan you attach here?\\nThanks.\", \"post_time\": \"2014-06-09 15:32:21\" },\n\t{ \"post_id\": 5852, \"topic_id\": 1346, \"forum_id\": 15, \"post_subject\": \"Re: Cluster not updating with new environment.xml\", \"username\": \"clo\", \"post_text\": \"Hi, I was wondering what version of the platform you're currently running as well.\", \"post_time\": \"2014-06-09 15:24:32\" },\n\t{ \"post_id\": 5850, \"topic_id\": 1346, \"forum_id\": 15, \"post_subject\": \"Re: Cluster not updating with new environment.xml\", \"username\": \"fmorstatter\", \"post_text\": \"Thanks for the help with this issue. I thought this was causing the other issues I was seeing in ECL Watch, but since this is expected behavior I'll explain what else is going on.\\n\\nWhen I go to ECL Watch, and click "target clusters", "ThorCluster - thor" switches between a green light saying everything is OK and a warning sign saying "ThorCluster - thorCluster not attached". When I click the error message, it says "0\\t2014-06-09 13:25:31 GMT: Cannot connect to SDS cluster mythor".\\n\\nAlso, any workunit I submit to the cluster gets blocked and stays in the blocked state until it times out.\\n\\nAny idea why this might be happening?\", \"post_time\": \"2014-06-09 13:27:13\" },\n\t{ \"post_id\": 5849, \"topic_id\": 1346, \"forum_id\": 15, \"post_subject\": \"Re: Cluster not updating with new environment.xml\", \"username\": \"jsmith\", \"post_text\": \">When I go to start the services on the slave machine it says "No components on this node as defined by /etc/HPCCSystems/environment.xml". \\n\\n\\nThat sounds correct.\\n\\nThe thorslaves are directly managed, started and stopped by the thormaster, not by the service.\\nSo unless there are other components on the slave node, a 'No components on this node..' is expected.\\n\\nIt might be slightly clearer perhaps, if that message said 'No components to start on this node ..' or similar.\", \"post_time\": \"2014-06-09 10:33:28\" },\n\t{ \"post_id\": 5847, \"topic_id\": 1346, \"forum_id\": 15, \"post_subject\": \"Re: Cluster not updating with new environment.xml\", \"username\": \"fmorstatter\", \"post_text\": \"Thank you for the pointers, my machine now can see the other node. I have one more hitch that is preventing me form running thor.\\n\\nWhen I go to start the services on the slave machine it says "No components on this node as defined by /etc/HPCCSystems/environment.xml". When I inspect the file manually, it appears to indicate that the node is a mythor slave. I've attached the configuration file. It is called environment.xml on the server, I had to add the .txt extension to get past the forum filters.\", \"post_time\": \"2014-06-06 16:09:47\" },\n\t{ \"post_id\": 5843, \"topic_id\": 1346, \"forum_id\": 15, \"post_subject\": \"Re: Cluster not updating with new environment.xml\", \"username\": \"bforeman\", \"post_text\": \"Just to add to what Dan said from our HPCC team:\\n\\n1.\\tValidate the environment xml\\nmd5sum /etc/HPCCSystems/source/<modified xml> vs md5sum /etc/HPCCSystems/environment.xml
\\n(the running xml the system reads)\\n\\n2.\\tMake sure you restart all the components to read in the new xml. (Not just the THOR) ECLWatch gets the information about the components/Environment from DALI.-this is probably the root cause\\n\\n\\nHTH,\\n\\nBob\", \"post_time\": \"2014-06-06 13:19:11\" },\n\t{ \"post_id\": 5841, \"topic_id\": 1346, \"forum_id\": 15, \"post_subject\": \"Re: Cluster not updating with new environment.xml\", \"username\": \"DSC\", \"post_text\": \"You might want to check out this older forum thread:\\n\\nhttp://hpccsystems.com/bb/viewtopic.php?f=14&t=932\\n\\nIf that doesn't work, you can try a heavy-handed approach. The system copies excerpts of the environment.xml file to another location for runtime purposes (this allows environment.xml to be updated without affecting a running cluster). Those copies are supposed to be rebuilt during startup, but it's possible that they are not in this case. So, you can try shutting down the cluster, deleting /var/lib/HPCCSystems/mythor/slaves on each of your nodes, then starting the cluster back up. That file is one of the excerpts, and it will be rebuilt if missing.\\n\\nHope one of these helps.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2014-06-06 11:50:48\" },\n\t{ \"post_id\": 5839, \"topic_id\": 1346, \"forum_id\": 15, \"post_subject\": \"Cluster not updating with new environment.xml\", \"username\": \"fmorstatter\", \"post_text\": \"Hello,\\n\\nI am trying to set up a 2-node HPCC cluster. I have followed the steps in the "Configuring a Multinode System" section of the HPCC Installation guide. When I initially pushed out the environment.xml to all of the nodes, I had the wrong IP address for one of the machines.\\n\\nTo mitigate this issue, I pushed out a new environment.xml (with the correct IP address) to the machines and restarted them. When starting the cluster, thor will not start.\\n\\nUpon further inspection, it seems the thor cluster is still looking for the machine with the wrong IP address. When I look at the thor cluster in ECL Watch, it clearly lists the wrong IP address in its list of machines, but does not have the new, correct IP address. \\n\\nI believe the old IP address is somehow cached in the system. What can I do to make HPCC read in the new IP address?\\n\\nThanks for your help.\", \"post_time\": \"2014-06-05 20:45:35\" },\n\t{ \"post_id\": 6315, \"topic_id\": 1446, \"forum_id\": 15, \"post_subject\": \"Re: ERROR:No component confirgured to run on this node\", \"username\": \"Gleb Aronsky\", \"post_text\": \"Selecting an alternate NIC is discussed here: viewtopic.php?t=1420&p=6220#p6220\\n\\n-Gleb\", \"post_time\": \"2014-09-16 13:11:20\" },\n\t{ \"post_id\": 6314, \"topic_id\": 1446, \"forum_id\": 15, \"post_subject\": \"Re: ERROR:No component confirgured to run on this node\", \"username\": \"bforeman\", \"post_text\": \"Hi Shank,\\nI relayed your report to our HPCC team.\\n\\nIt sounds like a localhost issue in the config.\\nDo you have multiple interfaces on the box?\\n\\nTry using the actual IP of the interface you want to use..(just guessing here) in the environment.xml.\\n\\nThe actual config the system uses is in /etc/HPCCSystems/environment.xml .\\n\\nUse configmgr (start in /opt/HPCCSystems/sbin/configmgr and going in the browser to the IP:8015) to edit the one in /etc/HPCCSystems/source/environment.xml and pushing it to /etc/HPCCSystems/environment.xml\\n\\nIf that fails uninstall the rpm\\n\\n
\\n\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-09-16 13:05:48\" },\n\t{ \"post_id\": 6312, \"topic_id\": 1446, \"forum_id\": 15, \"post_subject\": \"ERROR:No component confirgured to run on this node\", \"username\": \"shank\", \"post_text\": \"I have installed the HPCC Server on a RHEL machine. While starting the server using this command - "sudo/sbin/service hpcc-init start", I am getting a "no component configured to run on this node error"\\n\\nCan you please help me resolving this problem\\n\\nRegards,\\nShank\", \"post_time\": \"2014-09-16 05:48:05\" },\n\t{ \"post_id\": 6374, \"topic_id\": 1459, \"forum_id\": 15, \"post_subject\": \"Re: configuration\", \"username\": \"bforeman\", \"post_text\": \"Hi Keren,\\n\\nI have an email out to the HPCC team regarding your question. What you are doing is pretty close to our training cluster, and we have configured all of the System Servers on a single box, and that seems to work pretty well.\\n\\nI will reply again when I hear back from the team.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-09-23 13:44:38\" },\n\t{ \"post_id\": 6373, \"topic_id\": 1459, \"forum_id\": 15, \"post_subject\": \"configuration\", \"username\": \"kereno\", \"post_text\": \"Hello,\\n\\nI would like to optimize the configuration of HPCC on a cluster of ten nodes with 4 cores and 4 disks. My jobs will be executing joins, sort, etc\\nShould I use 4 thor slaves? I have 12G of memory, so I was thinking leaving 2GB for the OS and giving 2.5G for each thor slave. Is that configuration optimal?\\nAlso, how should I divide the other procs (sasha, dali,etc) among the nodes?\\n\\nThanks a lot,\\nKeren\", \"post_time\": \"2014-09-23 06:15:23\" },\n\t{ \"post_id\": 6393, \"topic_id\": 1465, \"forum_id\": 15, \"post_subject\": \"Re: storage configuration\", \"username\": \"bforeman\", \"post_text\": \"Hi Keren,\\n\\nI was told that some members of the HPCC team have already been working with you, and it would seem that the best course at this time would be to submit a feature request in the Community Issue Tracker https://track.hpccsystems.com/secure/Dashboard.jspa That way we can track your request properly and the development team can evaluate what needs to be done.\\n\\nThank You,\\n\\nBob\", \"post_time\": \"2014-10-02 12:32:06\" },\n\t{ \"post_id\": 6391, \"topic_id\": 1465, \"forum_id\": 15, \"post_subject\": \"storage configuration\", \"username\": \"kereno\", \"post_text\": \"Hello,\\n\\nI am in the process of comparing Big Data systems and solutions on a cluster where each node has multiple disks that are directly accessible. As the cluster is also used for Hadoop/HDFS, the disks are mounted as JBOD; i.e., they are mounted as separate Linux volumes and are not utilizing any sort of abstraction between the OS and disk besides the filesystem itself (i.e., no RAID or LVM). For many systems I have encountered, this is one of their many acceptable hardware configurations, with this type of configuration being geared towards newer systems such as Hadoop/HDFS that take on the tasks of replication and failover in software. However, for HPCC this appears not to be a configuration where I can fully utilize the hardware, as it seems that with HPCC I must have one (and only one?!) location allocated for my data, homogeneously across the entire cluster. Using RAID is not a choice in my situation, as the cluster's hardware and OS are shared with other (Hadoop/HDFS) users and they are not mine to reconfigure. (I would expect similar situations to arise with the Big Data clusters of many enterprises today.) I am trying to understand if there is a simple way that this type of hardware configuration could be better accommodated. For example, something as simple as supporting an HPCC node process startup parameter that points at a configuration file might work. There could then be multiple processes, one per disk volume, coexisting on the same machine; this is how systems like MongoDB deal with multiple volumes, for example, when in non-RAIDed configurations.\\n\\nThank you,\\nKeren Ouaknine\", \"post_time\": \"2014-10-01 00:43:09\" },\n\t{ \"post_id\": 14463, \"topic_id\": 1533, \"forum_id\": 15, \"post_subject\": \"Re: Multinode cluster issue: thorCluster not attached\", \"username\": \"ultima_centauri\", \"post_text\": \"Can you stop all the hpcc processes on the cluster with sudo /opt/HPCCSystems/sbin/hpcc-run.sh -a hpcc-init stop and sudo /opt/HPCCSystems/sbin/hpcc-run.sh -a dafilesrv stop\\n\\nIf have a linux desktop, I suggest you to install the python module radssh via pip install radssh http://radssh.readthedocs.io/en/v1.1.0/ and run it python -m radssh.shell --username=ubuntu 192.168.5.23-26 this will facilitate troubleshooting\\n\\nValidate that all the processes are down with ps aux|grep hpcc if you installed radssh you can run it in parallel otherwise you will need to check each node individually or use a for loop; change the slaveport to 21000 in the environment.xml on one of the nodes; once you are done editing the file push it from the node where you made the changes with sudo /opt/HPCCSystems/sbin/hpcc-push.sh -s /etc/HPCCSystems/source/<edited file> -t /etc/HPCCSystems/environment.xml \\n\\nIf you installed radssh you can start the processes by running service dafilesrv start and service hpcc-init start else run sudo /opt/HPCCSystems/sbin/hpcc-run.sh -a hpcc-init stop on one of the nodes\\n\\nIf still fails please include the last log files from the thormaster (init log and thormaster log) and the log files of the thorslave node and the output from radssh (if installed).\", \"post_time\": \"2017-01-10 20:03:02\" },\n\t{ \"post_id\": 14453, \"topic_id\": 1533, \"forum_id\": 15, \"post_subject\": \"Re: Multinode cluster issue: thorCluster not attached\", \"username\": \"vchinta\", \"post_text\": \"I've attached the contents of the xml file as text file.\", \"post_time\": \"2017-01-10 17:51:43\" },\n\t{ \"post_id\": 14443, \"topic_id\": 1533, \"forum_id\": 15, \"post_subject\": \"Re: Multinode cluster issue: thorCluster not attached\", \"username\": \"ultima_centauri\", \"post_text\": \"Can you share the mythor attributes from the environment.xml\", \"post_time\": \"2017-01-10 17:45:08\" },\n\t{ \"post_id\": 14433, \"topic_id\": 1533, \"forum_id\": 15, \"post_subject\": \"Re: Multinode cluster issue: thorCluster not attached\", \"username\": \"vchinta\", \"post_text\": \"Tried it, same error as before.\", \"post_time\": \"2017-01-10 17:12:10\" },\n\t{ \"post_id\": 14403, \"topic_id\": 1533, \"forum_id\": 15, \"post_subject\": \"Re: Multinode cluster issue: thorCluster not attached\", \"username\": \"bforeman\", \"post_text\": \"OK, so with that process stopped, please try restarting the thormaster again\", \"post_time\": \"2017-01-10 13:48:14\" },\n\t{ \"post_id\": 14393, \"topic_id\": 1533, \"forum_id\": 15, \"post_subject\": \"Re: Multinode cluster issue: thorCluster not attached\", \"username\": \"vchinta\", \"post_text\": \"After killing the process \\n\\nubuntu@hpcc-3:~$ sudo ps -ef|grep thor\\nsudo: unable to resolve host hpcc-3\\nubuntu 6864 6161 0 23:08 pts/0 00:00:00 grep --color=auto thor\\nubuntu@hpcc-3:~$ sudo netstat -plan|grep 20100\\nsudo: unable to resolve host hpcc-3\\nubuntu@hpcc-3:~$\", \"post_time\": \"2017-01-09 23:09:37\" },\n\t{ \"post_id\": 14373, \"topic_id\": 1533, \"forum_id\": 15, \"post_subject\": \"Re: Multinode cluster issue: thorCluster not attached\", \"username\": \"bforeman\", \"post_text\": \"It may be a delay issue here. Can you try this one more time please?\\n\\nKill the process again, wait like a minute or 2 and run the same 2 commands again\\n\\nps -ef|grep thor\\nnetstat -plan|grep 20100 \\n\\non the bad node\\n\\nThanks!\", \"post_time\": \"2017-01-09 20:27:04\" },\n\t{ \"post_id\": 14343, \"topic_id\": 1533, \"forum_id\": 15, \"post_subject\": \"Re: Multinode cluster issue: thorCluster not attached\", \"username\": \"vchinta\", \"post_text\": \"ubuntu@hpcc-3:~$ sudo netstat -plan|grep 20100\\nsudo: unable to resolve host hpcc-3\\ntcp 0 0 0.0.0.0:20100 0.0.0.0:* LISTEN 31264/thorslave_myt\\nubuntu@hpcc-3:~$ ^C\\nubuntu@hpcc-3:~$ sudo ps -ef|grep thor\\nsudo: unable to resolve host hpcc-3\\nubuntu 1909 29212 0 20:00 pts/0 00:00:00 grep --color=auto thor\\nhpcc 31264 1 0 19:43 ? 00:00:00 ./thorslave_mythor master=192.168.5.23:20000 slave=.:20100 slavenum=1 logDir=/var/log/HPCCSystems/mythor\", \"post_time\": \"2017-01-09 20:00:41\" },\n\t{ \"post_id\": 14333, \"topic_id\": 1533, \"forum_id\": 15, \"post_subject\": \"Re: Multinode cluster issue: thorCluster not attached\", \"username\": \"bforeman\", \"post_text\": \"Hmmm....\\nSomething is holding that port...what is the output of \\nps -ef|grep thor \\nand \\nnetstat -plan|grep 20100 \\non the bad node?\", \"post_time\": \"2017-01-09 19:57:21\" },\n\t{ \"post_id\": 14323, \"topic_id\": 1533, \"forum_id\": 15, \"post_subject\": \"Re: Multinode cluster issue: thorCluster not attached\", \"username\": \"vchinta\", \"post_text\": \"killall worked on all nodes except 191.168.5.26 where it said hpcc:no process found\\nKilled the following processes running with user hpcc\\nhpcc 30365 0.0 0.0 145644 14928 ? Sl 19:04 0:00 ./thorslave_mythor master=192.168.5.\\n\\nhpcc 29451 0.0 0.0 119416 6680 ? Sl 18:59 0:00 dafilesrv -L /var/log/HPCCSystems -I\\n\\n\\nhpcc 25709 0.0 0.0 42620 4544 ? Ss Jan07 0:00 /lib/systemd/systemd --user\\nhpcc 25710 0.0 0.0 58356 1504 ? S Jan07 0:00 (sd-pam)\\nhpcc 26061 0.0 0.0 12196 4240 ? S Jan07 0:00 /bin/bash /opt/HPCCSystems/bin/init_\\n\\nThor still wont start. Looks like same error logs\", \"post_time\": \"2017-01-09 19:45:41\" },\n\t{ \"post_id\": 14313, \"topic_id\": 1533, \"forum_id\": 15, \"post_subject\": \"Re: Multinode cluster issue: thorCluster not attached\", \"username\": \"bforeman\", \"post_text\": \"OK, looks like the same error at the end. \\n\\nTry:\\nkillall -9 –u hpcc \\nor \\nkill -9 <PID>\\n\\nThen please try a restart.\", \"post_time\": \"2017-01-09 19:32:31\" },\n\t{ \"post_id\": 14303, \"topic_id\": 1533, \"forum_id\": 15, \"post_subject\": \"Re: Multinode cluster issue: thorCluster not attached\", \"username\": \"vchinta\", \"post_text\": \"hpcc-init.log\\n2017-01-09T19:02:27: --------------------------\\n2017-01-09T19:02:27: --------------------------\\n2017-01-09T19:02:27: The following components have been located:\\n2017-01-09T19:02:27: ---> mydali\\n2017-01-09T19:02:27: ---> mydfuserver\\n2017-01-09T19:02:27: ---> myeclagent\\n2017-01-09T19:02:27: ---> myeclccserver\\n2017-01-09T19:02:27: ---> myeclscheduler\\n2017-01-09T19:02:27: ---> myesp\\n2017-01-09T19:02:27: ---> mysasha\\n2017-01-09T19:02:27: ---> mythor\\n2017-01-09T19:02:27: --------------------------\\n2017-01-09T19:02:27: Debug log written to /var/log/HPCCSystems/hpcc-init.debug\\n2017-01-09T19:02:27: Attempting to execute stop argument on specified components\\n2017-01-09T19:02:27: --------------------------\\n2017-01-09T19:02:27: mythor ---> stop\\n2017-01-09T19:02:27: Already stopped\\n2017-01-09T19:02:27: stop_component mythor ---> Exit status 0\\n2017-01-09T19:02:27: --------------------------\\n2017-01-09T19:02:27: mysasha ---> stop\\n2017-01-09T19:02:27: mysasha ---> Waiting on Sentinel\\n2017-01-09T19:02:27: /opt/HPCCSystems/bin/start-stop-daemon -K -p /var/run/HPCCSystems/init_mysasha.pid >> tmp.txt 2>&1\\n2017-01-09T19:02:27: mysasha ---> Waiting on Sentinel\\n2017-01-09T19:02:28: Lock file /var/lock/HPCCSystems/mysasha/mysasha.lock does not exist\\n2017-01-09T19:02:28: Pid file doesn't exist\\n2017-01-09T19:02:28: stop_component mysasha ---> Exit status 0\\n2017-01-09T19:02:28: --------------------------\\n2017-01-09T19:02:28: myesp ---> stop\\n2017-01-09T19:02:28: myesp ---> Waiting on Sentinel\\n2017-01-09T19:02:28: /opt/HPCCSystems/bin/start-stop-daemon -K -p /var/run/HPCCSystems/init_myesp.pid >> tmp.txt 2>&1\\n2017-01-09T19:02:28: myesp ---> Waiting on Sentinel\\n2017-01-09T19:02:29: Lock file /var/lock/HPCCSystems/myesp/myesp.lock does not exist\\n2017-01-09T19:02:29: Pid file doesn't exist\\n2017-01-09T19:02:29: stop_component myesp ---> Exit status 0\\n2017-01-09T19:02:29: --------------------------\\n2017-01-09T19:02:29: myeclscheduler ---> stop\\n2017-01-09T19:02:29: myeclscheduler ---> Waiting on Sentinel\\n2017-01-09T19:02:29: /opt/HPCCSystems/bin/start-stop-daemon -K -p /var/run/HPCCSystems/init_myeclscheduler.pid >> tmp.txt 2>&1\\n2017-01-09T19:02:29: myeclscheduler ---> Waiting on Sentinel\\n2017-01-09T19:02:30: Lock file /var/lock/HPCCSystems/myeclscheduler/myeclscheduler.lock does not exist\\n2017-01-09T19:02:30: Pid file doesn't exist\\n2017-01-09T19:02:30: stop_component myeclscheduler ---> Exit status 0\\n2017-01-09T19:02:30: --------------------------\\n2017-01-09T19:02:30: myeclccserver ---> stop\\n2017-01-09T19:02:30: myeclccserver ---> Waiting on Sentinel\\n2017-01-09T19:02:30: /opt/HPCCSystems/bin/start-stop-daemon -K -p /var/run/HPCCSystems/init_myeclccserver.pid >> tmp.txt 2>&1\\n2017-01-09T19:02:30: myeclccserver ---> Waiting on Sentinel\\n2017-01-09T19:02:31: Lock file /var/lock/HPCCSystems/myeclccserver/myeclccserver.lock does not exist\\n2017-01-09T19:02:31: Pid file doesn't exist\\n2017-01-09T19:02:31: stop_component myeclccserver ---> Exit status 0\\n2017-01-09T19:02:31: --------------------------\\n2017-01-09T19:02:31: myeclagent ---> stop\\n2017-01-09T19:02:31: myeclagent ---> Waiting on Sentinel\\n2017-01-09T19:02:31: /opt/HPCCSystems/bin/start-stop-daemon -K -p /var/run/HPCCSystems/init_myeclagent.pid >> tmp.txt 2>&1\\n2017-01-09T19:02:31: myeclagent ---> Waiting on Sentinel\\n2017-01-09T19:02:32: Lock file /var/lock/HPCCSystems/myeclagent/myeclagent.lock does not exist\\n2017-01-09T19:02:32: Pid file doesn't exist\\n2017-01-09T19:02:32: stop_component myeclagent ---> Exit status 0\\n2017-01-09T19:02:32: --------------------------\\n2017-01-09T19:02:32: mydfuserver ---> stop\\n2017-01-09T19:02:32: mydfuserver ---> Waiting on Sentinel\\n2017-01-09T19:02:32: /opt/HPCCSystems/bin/start-stop-daemon -K -p /var/run/HPCCSystems/init_mydfuserver.pid >> tmp.txt 2>&1\\n2017-01-09T19:02:32: mydfuserver ---> Waiting on Sentinel\\n2017-01-09T19:02:33: Lock file /var/lock/HPCCSystems/mydfuserver/mydfuserver.lock does not exist\\n2017-01-09T19:02:33: Pid file doesn't exist\\n2017-01-09T19:02:33: stop_component mydfuserver ---> Exit status 0\\n2017-01-09T19:02:33: --------------------------\\n2017-01-09T19:02:33: mydali ---> stop\\n2017-01-09T19:02:33: mydali ---> Waiting on Sentinel\\n2017-01-09T19:02:33: /opt/HPCCSystems/bin/start-stop-daemon -K -p /var/run/HPCCSystems/init_mydali.pid >> tmp.txt 2>&1\\n2017-01-09T19:02:33: mydali ---> Waiting on Sentinel\\n2017-01-09T19:02:49: Lock file /var/lock/HPCCSystems/mydali/mydali.lock does not exist\\n2017-01-09T19:02:49: Pid file doesn't exist\\n2017-01-09T19:02:49: stop_component mydali ---> Exit status 0\\n2017-01-09T19:02:49: mydafilesrv ---> Waiting on Sentinel\\n2017-01-09T19:02:49: mydafilesrv ---> Sentinel Up\\n2017-01-09T19:02:49: mydafilesrv ---> Running ( pid 14949 )\\n2017-01-09T19:02:49: Service dafilesrv, mydafilesrv is still running.\\n2017-01-09T19:03:40: --------------------------\\n2017-01-09T19:03:40: --------------------------\\n2017-01-09T19:03:40: The following components have been located:\\n2017-01-09T19:03:40: ---> mydali\\n2017-01-09T19:03:40: ---> mydfuserver\\n2017-01-09T19:03:40: ---> myeclagent\\n2017-01-09T19:03:40: ---> myeclccserver\\n2017-01-09T19:03:40: ---> myeclscheduler\\n2017-01-09T19:03:40: ---> myesp\\n2017-01-09T19:03:40: ---> mysasha\\n2017-01-09T19:03:40: ---> mythor\\n2017-01-09T19:03:40: --------------------------\\n2017-01-09T19:03:40: Debug log written to /var/log/HPCCSystems/hpcc-init.debug\\n2017-01-09T19:03:40: Attempting to execute status argument on specified components\\n2017-01-09T19:03:41: mydafilesrv ---> Waiting on Sentinel\\n2017-01-09T19:03:41: mydafilesrv ---> Sentinel Up\\n2017-01-09T19:03:41: mydafilesrv ---> Running ( pid 14949 )\\n2017-01-09T19:03:41: --------------------------\\n2017-01-09T19:03:41: mydali ---> status\\n2017-01-09T19:03:41: mydali ---> Sentinel Down\\n2017-01-09T19:03:41: mydali ---> Stopped\\n2017-01-09T19:03:41: status_component mydali ---> Exit status 1\\n2017-01-09T19:03:41: --------------------------\\n2017-01-09T19:03:41: mydfuserver ---> status\\n2017-01-09T19:03:41: mydfuserver ---> Sentinel Down\\n2017-01-09T19:03:41: mydfuserver ---> Stopped\\n2017-01-09T19:03:41: status_component mydfuserver ---> Exit status 1\\n2017-01-09T19:03:41: --------------------------\\n2017-01-09T19:03:41: myeclagent ---> status\\n2017-01-09T19:03:41: myeclagent ---> Sentinel Down\\n2017-01-09T19:03:41: myeclagent ---> Stopped\\n2017-01-09T19:03:41: status_component myeclagent ---> Exit status 1\\n2017-01-09T19:03:41: --------------------------\\n2017-01-09T19:03:41: myeclccserver ---> status\\n2017-01-09T19:03:41: myeclccserver ---> Sentinel Down\\n2017-01-09T19:03:41: myeclccserver ---> Stopped\\n2017-01-09T19:03:41: status_component myeclccserver ---> Exit status 1\\n2017-01-09T19:03:41: --------------------------\\n2017-01-09T19:03:41: myeclscheduler ---> status\\n2017-01-09T19:03:41: myeclscheduler ---> Sentinel Down\\n2017-01-09T19:03:41: myeclscheduler ---> Stopped\\n2017-01-09T19:03:41: status_component myeclscheduler ---> Exit status 1\\n2017-01-09T19:03:41: --------------------------\\n2017-01-09T19:03:41: myesp ---> status\\n2017-01-09T19:03:41: myesp ---> Sentinel Down\\n2017-01-09T19:03:41: myesp ---> Stopped\\n2017-01-09T19:03:41: status_component myesp ---> Exit status 1\\n2017-01-09T19:03:41: --------------------------\\n2017-01-09T19:03:41: mysasha ---> status\\n2017-01-09T19:03:41: mysasha ---> Sentinel Down\\n2017-01-09T19:03:41: mysasha ---> Stopped\\n2017-01-09T19:03:41: status_component mysasha ---> Exit status 1\\n2017-01-09T19:03:41: --------------------------\\n2017-01-09T19:03:41: mythor ---> status\\n2017-01-09T19:03:41: mythor ---> Sentinel Down\\n2017-01-09T19:03:41: mythor ---> Stopped\\n2017-01-09T19:03:41: status_component mythor ---> Exit status 1\\n2017-01-09T19:04:01: --------------------------\\n2017-01-09T19:04:01: --------------------------\\n2017-01-09T19:04:01: The following components have been located:\\n2017-01-09T19:04:01: ---> mydali\\n2017-01-09T19:04:01: ---> mydfuserver\\n2017-01-09T19:04:01: ---> myeclagent\\n2017-01-09T19:04:01: ---> myeclccserver\\n2017-01-09T19:04:01: ---> myeclscheduler\\n2017-01-09T19:04:01: ---> myesp\\n2017-01-09T19:04:01: ---> mysasha\\n2017-01-09T19:04:01: ---> mythor\\n2017-01-09T19:04:01: --------------------------\\n2017-01-09T19:04:01: Debug log written to /var/log/HPCCSystems/hpcc-init.debug\\n2017-01-09T19:04:01: Attempting to execute start argument on specified components\\n2017-01-09T19:04:01: Creating dropzone\\n2017-01-09T19:04:01: mydafilesrv ---> Waiting on Sentinel\\n2017-01-09T19:04:01: mydafilesrv ---> Sentinel Up\\n2017-01-09T19:04:01: mydafilesrv ---> Running ( pid 14949 )\\n2017-01-09T19:04:01: Dependent service dafilesrv, mydafilesrv is already running.\\n2017-01-09T19:04:01: --------------------------\\n2017-01-09T19:04:01: mydali ---> start\\n2017-01-09T19:04:01: /opt/HPCCSystems/sbin/configgen -env /etc/HPCCSystems/environment.xml -od /var/lib/HPCCSystems -id /opt/HPCCSystems/componentfiles/configxml -c mydali\\n2017-01-09T19:04:01: compType = dali\\n2017-01-09T19:04:01: mydali ---> Sentinel Down\\n2017-01-09T19:04:01: /opt/HPCCSystems/bin/start-stop-daemon -S -p /var/run/HPCCSystems/init_mydali.pid -c hpcc:hpcc -d /var/lib/HPCCSystems/mydali -m -x /opt/HPCCSystems/bin/init_dali -b\\n2017-01-09T19:04:02: mydali ---> Waiting on Sentinel\\n2017-01-09T19:04:02: mydali ---> Sentinel Up\\n2017-01-09T19:04:02: start_component mydali ---> Exit status 0\\n2017-01-09T19:04:02: --------------------------\\n2017-01-09T19:04:02: mydfuserver ---> start\\n2017-01-09T19:04:02: /opt/HPCCSystems/sbin/configgen -env /etc/HPCCSystems/environment.xml -od /var/lib/HPCCSystems -id /opt/HPCCSystems/componentfiles/configxml -c mydfuserver\\n2017-01-09T19:04:03: compType = dfuserver\\n2017-01-09T19:04:03: mydfuserver ---> Sentinel Down\\n2017-01-09T19:04:03: /opt/HPCCSystems/bin/start-stop-daemon -S -p /var/run/HPCCSystems/init_mydfuserver.pid -c hpcc:hpcc -d /var/lib/HPCCSystems/mydfuserver -m -x /opt/HPCCSystems/bin/init_dfuserver -b\\n2017-01-09T19:04:04: mydfuserver ---> Waiting on Sentinel\\n2017-01-09T19:04:04: mydfuserver ---> Sentinel Up\\n2017-01-09T19:04:04: start_component mydfuserver ---> Exit status 0\\n2017-01-09T19:04:04: --------------------------\\n2017-01-09T19:04:04: myeclagent ---> start\\n2017-01-09T19:04:04: /opt/HPCCSystems/sbin/configgen -env /etc/HPCCSystems/environment.xml -od /var/lib/HPCCSystems -id /opt/HPCCSystems/componentfiles/configxml -c myeclagent\\n2017-01-09T19:04:04: compType = eclagent\\n2017-01-09T19:04:04: myeclagent ---> Sentinel Down\\n2017-01-09T19:04:04: /opt/HPCCSystems/bin/start-stop-daemon -S -p /var/run/HPCCSystems/init_myeclagent.pid -c hpcc:hpcc -d /var/lib/HPCCSystems/myeclagent -m -x /opt/HPCCSystems/bin/init_eclagent -b\\n2017-01-09T19:04:05: myeclagent ---> Waiting on Sentinel\\n2017-01-09T19:04:05: myeclagent ---> Sentinel Up\\n2017-01-09T19:04:05: start_component myeclagent ---> Exit status 0\\n2017-01-09T19:04:05: --------------------------\\n2017-01-09T19:04:05: myeclccserver ---> start\\n2017-01-09T19:04:05: /opt/HPCCSystems/sbin/configgen -env /etc/HPCCSystems/environment.xml -od /var/lib/HPCCSystems -id /opt/HPCCSystems/componentfiles/configxml -c myeclccserver\\n2017-01-09T19:04:05: compType = eclccserver\\n2017-01-09T19:04:05: myeclccserver ---> Sentinel Down\\n2017-01-09T19:04:05: /opt/HPCCSystems/bin/start-stop-daemon -S -p /var/run/HPCCSystems/init_myeclccserver.pid -c hpcc:hpcc -d /var/lib/HPCCSystems/myeclccserver -m -x /opt/HPCCSystems/bin/init_eclccserver -b\\n2017-01-09T19:04:06: myeclccserver ---> Waiting on Sentinel\\n2017-01-09T19:04:06: myeclccserver ---> Sentinel Up\\n2017-01-09T19:04:06: start_component myeclccserver ---> Exit status 0\\n2017-01-09T19:04:06: --------------------------\\n2017-01-09T19:04:06: myeclscheduler ---> start\\n2017-01-09T19:04:06: /opt/HPCCSystems/sbin/configgen -env /etc/HPCCSystems/environment.xml -od /var/lib/HPCCSystems -id /opt/HPCCSystems/componentfiles/configxml -c myeclscheduler\\n2017-01-09T19:04:06: compType = eclscheduler\\n2017-01-09T19:04:06: myeclscheduler ---> Sentinel Down\\n2017-01-09T19:04:06: /opt/HPCCSystems/bin/start-stop-daemon -S -p /var/run/HPCCSystems/init_myeclscheduler.pid -c hpcc:hpcc -d /var/lib/HPCCSystems/myeclscheduler -m -x /opt/HPCCSystems/bin/init_eclscheduler -b\\n2017-01-09T19:04:07: myeclscheduler ---> Waiting on Sentinel\\n2017-01-09T19:04:07: myeclscheduler ---> Sentinel Up\\n2017-01-09T19:04:07: start_component myeclscheduler ---> Exit status 0\\n2017-01-09T19:04:07: --------------------------\\n2017-01-09T19:04:07: myesp ---> start\\n2017-01-09T19:04:07: /opt/HPCCSystems/sbin/configgen -env /etc/HPCCSystems/environment.xml -od /var/lib/HPCCSystems -id /opt/HPCCSystems/componentfiles/configxml -c myesp\\n2017-01-09T19:04:07: compType = esp\\n2017-01-09T19:04:07: myesp ---> Sentinel Down\\n2017-01-09T19:04:07: /opt/HPCCSystems/bin/start-stop-daemon -S -p /var/run/HPCCSystems/init_myesp.pid -c hpcc:hpcc -d /var/lib/HPCCSystems/myesp -m -x /opt/HPCCSystems/bin/init_esp -b\\n2017-01-09T19:04:08: myesp ---> Waiting on Sentinel\\n2017-01-09T19:04:08: myesp ---> Sentinel Up\\n2017-01-09T19:04:08: start_component myesp ---> Exit status 0\\n2017-01-09T19:04:08: --------------------------\\n2017-01-09T19:04:08: mysasha ---> start\\n2017-01-09T19:04:08: /opt/HPCCSystems/sbin/configgen -env /etc/HPCCSystems/environment.xml -od /var/lib/HPCCSystems -id /opt/HPCCSystems/componentfiles/configxml -c mysasha\\n2017-01-09T19:04:08: compType = sasha\\n2017-01-09T19:04:08: mysasha ---> Sentinel Down\\n2017-01-09T19:04:08: /opt/HPCCSystems/bin/start-stop-daemon -S -p /var/run/HPCCSystems/init_mysasha.pid -c hpcc:hpcc -d /var/lib/HPCCSystems/mysasha -m -x /opt/HPCCSystems/bin/init_sasha -b\\n2017-01-09T19:04:09: mysasha ---> Waiting on Sentinel\\n2017-01-09T19:04:09: mysasha ---> Sentinel Up\\n2017-01-09T19:04:09: start_component mysasha ---> Exit status 0\\n2017-01-09T19:04:09: --------------------------\\n2017-01-09T19:04:09: mythor ---> start\\n2017-01-09T19:04:10: /opt/HPCCSystems/sbin/configgen -env /etc/HPCCSystems/environment.xml -od /var/lib/HPCCSystems -id /opt/HPCCSystems/componentfiles/configxml -c mythor\\n2017-01-09T19:04:10: compType = thor\\n2017-01-09T19:04:10: mythor ---> Sentinel Down\\n2017-01-09T19:04:10: /opt/HPCCSystems/bin/start-stop-daemon -S -p /var/run/HPCCSystems/init_mythor.pid -c hpcc:hpcc -d /var/lib/HPCCSystems/mythor -m -x /opt/HPCCSystems/bin/init_thor -b\\n2017-01-09T19:04:12: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:12: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:13: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:13: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:14: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:14: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:15: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:15: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:16: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:16: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:17: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:17: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:18: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:18: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:19: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:19: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:20: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:20: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:21: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:21: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:22: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:22: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:23: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:23: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:24: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:24: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:25: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:25: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:26: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:26: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:27: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:27: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:28: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:28: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:29: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:29: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:30: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:30: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:31: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:31: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:32: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:32: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:33: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:33: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:34: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:34: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:35: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:35: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:36: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:36: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:37: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:37: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:38: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:38: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:39: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:39: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:40: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:40: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:41: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:41: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:42: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:42: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:43: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:43: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:44: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:44: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:45: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:45: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:46: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:46: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:47: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:47: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:49: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:49: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:50: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:50: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:51: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:51: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:52: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:52: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:53: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:53: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:54: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:54: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:55: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:55: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:56: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:56: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:57: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:57: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:58: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:58: mythor ---> Currently Unhealthy\\n2017-01-09T19:04:59: mythor ---> Waiting on Sentinel\\n2017-01-09T19:04:59: mythor ---> Currently Unhealthy\\n2017-01-09T19:05:00: mythor ---> Waiting on Sentinel\\n2017-01-09T19:05:00: mythor ---> Currently Unhealthy\\n2017-01-09T19:05:01: mythor ---> Waiting on Sentinel\\n2017-01-09T19:05:01: mythor ---> Currently Unhealthy\\n2017-01-09T19:05:02: mythor ---> Waiting on Sentinel\\n2017-01-09T19:05:02: mythor ---> Currently Unhealthy\\n2017-01-09T19:05:03: mythor ---> Waiting on Sentinel\\n2017-01-09T19:05:03: mythor ---> Currently Unhealthy\\n2017-01-09T19:05:04: mythor ---> Waiting on Sentinel\\n2017-01-09T19:05:04: mythor ---> Currently Unhealthy\\n2017-01-09T19:05:05: mythor ---> Waiting on Sentinel\\n2017-01-09T19:05:05: mythor ---> Currently Unhealthy\\n2017-01-09T19:05:06: mythor ---> Waiting on Sentinel\\n2017-01-09T19:05:06: mythor ---> Currently Unhealthy\\n2017-01-09T19:05:07: mythor ---> Waiting on Sentinel\\n2017-01-09T19:05:07: mythor ---> Currently Unhealthy\\n2017-01-09T19:05:08: mythor ---> Waiting on Sentinel\\n2017-01-09T19:05:08: mythor ---> Currently Unhealthy\\n2017-01-09T19:05:09: mythor ---> Waiting on Sentinel\\n2017-01-09T19:05:09: mythor ---> Currently Unhealthy\\n2017-01-09T19:05:10: mythor ---> Waiting on Sentinel\\n2017-01-09T19:05:10: mythor ---> Currently Unhealthy\\n2017-01-09T19:05:11: mythor ---> Waiting on Sentinel\\n2017-01-09T19:05:11: mythor ---> Currently Unhealthy\\n2017-01-09T19:05:12: mythor failed to start cleanly\\n2017-01-09T19:05:12: Refer to the log file for the binary mythor for more information\\n2017-01-09T19:05:12: Pid file doesn't exist\\n2017-01-09T19:05:12: start_component mythor ---> Exit status 1\\nubuntu@hpcc-master:~$\\n\\nthormaster.log\\n00000001 2017-01-09 19:04:11.615 26123 26123 "Opened log file //192.168.5.23/var/log/HPCCSystems/mythor/thormaster.2017_01_09.log"\\n00000002 2017-01-09 19:04:11.615 26123 26123 "Build community_5.4.6-1"\\n00000003 2017-01-09 19:04:11.615 26123 26123 "calling initClientProcess Port 20000"\\n00000004 2017-01-09 19:04:11.618 26123 26123 "Found file 'thorgroup', using to form thor group"\\n00000005 2017-01-09 19:04:11.619 26123 26123 "Checking cluster replicate nodes"\\n00000006 2017-01-09 19:05:11.620 26123 26123 "multiConnect failed to 192.168.5.26:7100 with -1"\\n00000007 2017-01-09 19:05:11.621 26123 26123 "ERROR: /var/lib/jenkins/workspace/CE-Candidate-withplugins-5.4.6-1/CE/ubuntu-15.04-amd64/HPCC-Platform/thorlcr/master/thmastermain.cpp(393) : VALIDATE FAILED(1) 192.168.5.26 : Connect failure"\\n00000008 2017-01-09 19:05:11.621 26123 26123 "Cluster replicate nodes check completed in 60002ms"\\n00000009 2017-01-09 19:05:11.621 26123 26123 "ERROR: /var/lib/jenkins/workspace/CE-Candidate-withplugins-5.4.6-1/CE/ubuntu-15.04-amd64/HPCC-Platform/thorlcr/master/thmastermain.cpp(632) : ERROR: Validate failure(s) detected, exiting Thor"\\n\\ninit_thorslave_mythor.log\\nubuntu@hpcc-3:~$ cat /var/log/HPCCSystems/mythor/init_thorslave_mythor_2017_01_09_19_04_10.log\\n2017-01-09T19:04:11: dependency dafilesrv started\\n2017-01-09T19:04:11: slave(192.168.5.26) init\\n2017-01-09T19:04:11: slave(s) starting\\n2017-01-09T19:04:11: rsync -e ssh -o LogLevel=QUIET -o StrictHostKeyChecking=no 192.168.5.23:/var/lib/HPCCSystems/mythor/thorgroup /var/lib/HPCCSystems/mythor/thorgroup.slave\\n2017-01-09T19:04:11: thorslave_mythor master=192.168.5.23:20000 slave=.:20100 slavenum=1 logDir=/var/log/HPCCSystems/mythor\\n2017-01-09T19:04:11: slave pid 30365 started\\n\\nthorslave.log\\nubuntu@hpcc-3:~$ cat /var/log/HPCCSystems/mythor/thorslave.1.2017_01_09.log\\n00000000 2017-01-09 15:09:59.138 28652 28652 "Opened log file //192.168.5.26/var/log/HPCCSystems/mythor/thorslave.1.2017_01_09.log"\\n00000001 2017-01-09 15:09:59.138 28652 28652 "Build community_5.4.6-1"\\n00000002 2017-01-09 15:09:59.140 28652 28652 "ERROR: -7: /var/lib/jenkins/workspace/CE-Candidate-withplugins-5.4.6-1/CE/ubuntu-15.04-amd64/HPCC-Platform/thorlcr/slave/thslavemain.cpp(424) : ThorSlave : port in use\\nTarget: S>192.168.5.26, port = 20100, Raised in: /var/lib/jenkins/workspace/CE-Candidate-withplugins-5.4.6-1/CE/ubuntu-15.04-amd64/HPCC-Platform/system/jlib/jsocket.cpp, line 912"\\n00000003 2017-01-09 15:09:59.140 28652 28652 "temp directory cleared"\\n00000004 2017-01-09 15:09:59.140 28652 28652 "Unregistering slave : 192.168.5.26:20100"\\n00000005 2017-01-09 15:09:59.140 28652 28652 "ERROR: Failed to unregister slave : 192.168.5.26:20100"\\n00000000 2017-01-09 19:04:11.608 30365 30365 "Opened log file //192.168.5.26/var/log/HPCCSystems/mythor/thorslave.1.2017_01_09.log"\\n00000001 2017-01-09 19:04:11.608 30365 30365 "Build community_5.4.6-1"\\n00000002 2017-01-09 19:04:11.610 30365 30365 "registering 192.168.5.26:20100 - master 192.168.5.23:20000"\", \"post_time\": \"2017-01-09 19:26:36\" },\n\t{ \"post_id\": 14293, \"topic_id\": 1533, \"forum_id\": 15, \"post_subject\": \"Re: Multinode cluster issue: thorCluster not attached\", \"username\": \"bforeman\", \"post_text\": \"Anything new in the log? Perhaps a different error?\", \"post_time\": \"2017-01-09 19:20:33\" },\n\t{ \"post_id\": 14283, \"topic_id\": 1533, \"forum_id\": 15, \"post_subject\": \"Re: Multinode cluster issue: thorCluster not attached\", \"username\": \"vchinta\", \"post_text\": \"port 20100 had thorslave, killed it, Tried init-start again, mythor failed to start again\", \"post_time\": \"2017-01-09 19:06:46\" },\n\t{ \"post_id\": 14273, \"topic_id\": 1533, \"forum_id\": 15, \"post_subject\": \"Re: Multinode cluster issue: thorCluster not attached\", \"username\": \"bforeman\", \"post_text\": \"I don't think so, find out what process is using that port on your system and if it is thorslave simply kill it.\", \"post_time\": \"2017-01-09 18:53:58\" },\n\t{ \"post_id\": 14263, \"topic_id\": 1533, \"forum_id\": 15, \"post_subject\": \"Re: Multinode cluster issue: thorCluster not attached\", \"username\": \"vchinta\", \"post_text\": \"ubuntu@hpcc-3:~$ cat /var/log/HPCCSystems/mythor/thorslave.1.2017_01_09.log\\n00000000 2017-01-09 15:09:59.138 28652 28652 "Opened log file //192.168.5.26/var/log/HPCCSystems/mythor/thorslave.1.2017_01_09.log"\\n00000001 2017-01-09 15:09:59.138 28652 28652 "Build community_5.4.6-1"\\n00000002 2017-01-09 15:09:59.140 28652 28652 "ERROR: -7: /var/lib/jenkins/workspace/CE-Candidate-withplugins-5.4.6-1/CE/ubuntu-15.04-amd64/HPCC-Platform/thorlcr/slave/thslavemain.cpp(424) : ThorSlave : port in use\\nTarget: S>192.168.5.26, port = 20100, Raised in: /var/lib/jenkins/workspace/CE-Candidate-withplugins-5.4.6-1/CE/ubuntu-15.04-amd64/HPCC-Platform/system/jlib/jsocket.cpp, line 912"\\n00000003 2017-01-09 15:09:59.140 28652 28652 "temp directory cleared"\\n00000004 2017-01-09 15:09:59.140 28652 28652 "Unregistering slave : 192.168.5.26:20100"\\n00000005 2017-01-09 15:09:59.140 28652 28652 "ERROR: Failed to unregister slave : 192.168.5.26:20100"\\n\\n\\nubuntu@hpcc-master:~$ cat /var/log/HPCCSystems/mythor/thormaster.2017_01_09.log\\n00000001 2017-01-09 15:09:59.139 21007 21007 "Opened log file //192.168.5.23/var/log/HPCCSystems/mythor/thormaster.2017_01_09.log"\\n00000002 2017-01-09 15:09:59.139 21007 21007 "Build community_5.4.6-1"\\n00000003 2017-01-09 15:09:59.139 21007 21007 "calling initClientProcess Port 20000"\\n00000004 2017-01-09 15:09:59.142 21007 21007 "Found file 'thorgroup', using to form thor group"\\n00000005 2017-01-09 15:09:59.142 21007 21007 "Checking cluster replicate nodes"\\n00000006 2017-01-09 15:10:59.143 21007 21007 "multiConnect failed to 192.168.5.26:7100 with -1"\\n00000007 2017-01-09 15:10:59.144 21007 21007 "ERROR: /var/lib/jenkins/workspace/CE-Candidate-withplugins-5.4.6-1/CE/ubuntu-15.04-amd64/HPCC-Platform/thorlcr/master/thmastermain.cpp(393) : VALIDATE FAILED(1) 192.168.5.26 : Connect failure"\\n00000008 2017-01-09 15:10:59.144 21007 21007 "Cluster replicate nodes check completed in 60002ms"\\n00000009 2017-01-09 15:10:59.144 21007 21007 "ERROR: /var/lib/jenkins/workspace/CE-Candidate-withplugins-5.4.6-1/CE/ubuntu-15.04-amd64/HPCC-Platform/thorlcr/master/thmastermain.cpp(632) : ERROR: Validate failure(s) detected, exiting Thor"\\n\\nFound some error logs.\\nI may have notice noticed something, in our environment, I need explicitly enable access to ports, I've added 8010 and 8015 for eclwatch and the configmgr. Do I need to add any other ports?\", \"post_time\": \"2017-01-09 18:27:35\" },\n\t{ \"post_id\": 14253, \"topic_id\": 1533, \"forum_id\": 15, \"post_subject\": \"Re: Multinode cluster issue: thorCluster not attached\", \"username\": \"bforeman\", \"post_text\": \"Ok, still checking with the team to see what else we can do. I still haven't seen a log from you with a specific ERROR in it, you might want to browse the other logs to see if you can find anything. I checked our issue tracker and there was an issue reported a while ago in 5.4.0 that was fixed in later releases, but not sure it applies to your configuration or version.\\n\\nBob\", \"post_time\": \"2017-01-09 18:18:50\" },\n\t{ \"post_id\": 14243, \"topic_id\": 1533, \"forum_id\": 15, \"post_subject\": \"Re: Multinode cluster issue: thorCluster not attached\", \"username\": \"vchinta\", \"post_text\": \"Yes, can ssh to all nodes from thormaster and vice versa\", \"post_time\": \"2017-01-09 17:42:25\" },\n\t{ \"post_id\": 14233, \"topic_id\": 1533, \"forum_id\": 15, \"post_subject\": \"Re: Multinode cluster issue: thorCluster not attached\", \"username\": \"bforeman\", \"post_text\": \"It's in the ECL Watch, from your screen shot I can see it:\\n192.168.5.23\", \"post_time\": \"2017-01-09 17:33:18\" },\n\t{ \"post_id\": 14213, \"topic_id\": 1533, \"forum_id\": 15, \"post_subject\": \"Re: Multinode cluster issue: thorCluster not attached\", \"username\": \"vchinta\", \"post_text\": \"How do I find the IP address of the thormaster, I don't see it in the environment.xml file\", \"post_time\": \"2017-01-09 17:25:09\" },\n\t{ \"post_id\": 14203, \"topic_id\": 1533, \"forum_id\": 15, \"post_subject\": \"Re: Multinode cluster issue: thorCluster not attached\", \"username\": \"bforeman\", \"post_text\": \"On suspected bad node \\nsu - hpcc then ssh <thormaster>\\n\\nOn thormaster \\nsu - hpcc then ssh <suspected bad node>\", \"post_time\": \"2017-01-09 17:20:14\" },\n\t{ \"post_id\": 14183, \"topic_id\": 1533, \"forum_id\": 15, \"post_subject\": \"Re: Multinode cluster issue: thorCluster not attached\", \"username\": \"vchinta\", \"post_text\": \"How would I do that?\", \"post_time\": \"2017-01-09 16:57:05\" },\n\t{ \"post_id\": 14173, \"topic_id\": 1533, \"forum_id\": 15, \"post_subject\": \"Re: Multinode cluster issue: thorCluster not attached\", \"username\": \"bforeman\", \"post_text\": \"Are you able to become hpcc user and ssh into the thormaster and viceversa?\", \"post_time\": \"2017-01-09 16:53:51\" },\n\t{ \"post_id\": 14143, \"topic_id\": 1533, \"forum_id\": 15, \"post_subject\": \"Re: Multinode cluster issue: thorCluster not attached\", \"username\": \"vchinta\", \"post_text\": \"ubuntu@hpcc-3:~$ cat /var/log/HPCCSystems/mythor/init_thorslave_mythor_2017_01_09_15_09_57.log\\n2017-01-09T15:09:58: dependency dafilesrv started\\n2017-01-09T15:09:58: slave(192.168.5.26) init\\n2017-01-09T15:09:58: slave(s) starting\\n2017-01-09T15:09:58: rsync -e ssh -o LogLevel=QUIET -o StrictHostKeyChecking=no 192.168.5.23:/var/lib/HPCCSystems/mythor/thorgroup /var/lib/HPCCSystems/mythor/thorgroup.slave\\n2017-01-09T15:09:59: thorslave_mythor master=192.168.5.23:20000 slave=.:20100 slavenum=1 logDir=/var/log/HPCCSystems/mythor\\n2017-01-09T15:09:59: slave pid 28652 started\", \"post_time\": \"2017-01-09 16:18:14\" },\n\t{ \"post_id\": 14133, \"topic_id\": 1533, \"forum_id\": 15, \"post_subject\": \"Re: Multinode cluster issue: thorCluster not attached\", \"username\": \"bforeman\", \"post_text\": \"What is in the log files on 192.168.5.26 where you are having issues?\", \"post_time\": \"2017-01-09 16:14:29\" },\n\t{ \"post_id\": 14103, \"topic_id\": 1533, \"forum_id\": 15, \"post_subject\": \"Re: Multinode cluster issue: thorCluster not attached\", \"username\": \"vchinta\", \"post_text\": \"Hello,\\nDid the preflight, mydafilesrv is running without any issues. Screenshots of the preflight [attachment=0:htqdhsgo]Error2.PNG[attachment=1:htqdhsgo]Error1.PNG\\n\\nand generated output when I attempt to start the cluster. Also now I get the same error for Roxie as well\\n\\nubuntu@hpcc-master:~$ sudo -u hpcc /opt/HPCCSystems/sbin/hpcc-run.sh -a hpcc-init start\\n192.168.5.23: Running sudo /etc/init.d/hpcc-init start\\nsudo: unable to resolve host hpcc-master\\nDependent service dafilesrv, mydafilesrv is already running.\\nStarting mydali ... [ OK ]\\nStarting mydfuserver ... [ OK ]\\nStarting myeclagent ... [ OK ]\\nStarting myeclccserver ... [ OK ]\\nStarting myeclscheduler ... [ OK ]\\nStarting myesp ... [ OK ]\\nStarting mysasha ... [ OK ]\\nStarting mythor ... [ FAILED ]\\n\\nhpcc-init start in the cluster ...\\n\\nTotal hosts to process: 3\\n\\nExecution progress: 100%, running: 0, in queue: 0, succeed: 3, failed: 0\\n\\nhpcc-init_start_20004 run successfully on all hosts in the cluster\\n\\n\\n192.168.5.24 hpcc-init start :\\nStarting myroxie ... [ OK ]\\n\\n192.168.5.25 hpcc-init start :\\n\\n192.168.5.26 hpcc-init start :\", \"post_time\": \"2017-01-09 15:18:13\" },\n\t{ \"post_id\": 14043, \"topic_id\": 1533, \"forum_id\": 15, \"post_subject\": \"Re: Multinode cluster issue: thorCluster not attached\", \"username\": \"bforeman\", \"post_text\": \"Does the cluster has the mydafilesrv process up and running? You might want to do a preflight on the system:\\n\\nhttp://cdn.hpccsystems.com/releases/CE-Candidate-6.2.0/docs/The_ECL_Watch_Manual-6.2.0-1.pdf \\n\\nSee pages 101 and 104\", \"post_time\": \"2017-01-09 14:04:11\" },\n\t{ \"post_id\": 14013, \"topic_id\": 1533, \"forum_id\": 15, \"post_subject\": \"Re: Multinode cluster issue: thorCluster not attached\", \"username\": \"vchinta\", \"post_text\": \"Hello, \\n\\nI get the same error running hpcc 5.4.6-1 on ubuntu 15.04. I checked the xml file on all nodes, checksum is same on all of them.\\nBelow is the generated log file\\nubuntu@hpcc-master:~$cat /var/log/HPCCSystems/mythor/init_mythor_2017_01_07_20_34_50.log\\n2017-01-07T20:34:50: Starting mythor\\n2017-01-07T20:34:50: removing any previous sentinel file\\n2017-01-07T20:34:50: Ensuring a clean working environment ...\\n2017-01-07T20:34:50: Killing slaves\\n2017-01-07T20:34:50: --------------------------\\n2017-01-07T20:34:50: starting thorslaves ...\\n2017-01-07T20:34:51: thormaster cmd : /var/lib/HPCCSystems/mythor/thormaster_mythor MASTER=192.168.5.23:20000\\n2017-01-07T20:34:51: thormaster_lcr process started pid = 23790\\n2017-01-07T20:35:51: Thormaster (23790) Exited cleanly\\nubuntu@hpcc-master:~$\\n\\nAny idea what is wrong? Thanks for your help.\\n\\nVishnu\", \"post_time\": \"2017-01-07 20:38:26\" },\n\t{ \"post_id\": 6666, \"topic_id\": 1533, \"forum_id\": 15, \"post_subject\": \"Re: Multinode cluster issue: thorCluster not attached\", \"username\": \"bforeman\", \"post_text\": \"The first place to start is by looking at the thor logs:\\n\\nrpm –e $(rpm –qa|grep hpcc)
\\n\\nand re-install/var/log/HPCCSystems/<name of thor>
\\n\\nSee what the error is in the thormaster log. Its possible the error is in the environment.xml file. As you already know, the environment xml needs to be on all machines.\\n\\nWhen you edit the enviroinment file, the file in question is found in:\\n\\n/etc/HPCCSystems/source/environment.xml
\\n\\nTo "push" it out it needs to be located in:\\n \\n/etc/HPCCSystems/environment.xml
\\n\\n\\nIn addition, on both machines, you need to check to make sure that the md5sum match\\n\\nFor example:\\n\\nOn the 1st node (assuming the edit happened on that node)\\n\\nmd5sum /etc/HPCCSystems/environment.xml /etc/HPCCSystems/source/*.xml
\\n\\nNext, go onto the second node and run\\n\\nmd5sum /etc/HPCCSystems/environment.xml
\\n\\nHTH,\\n\\nBob and the HPCC team\", \"post_time\": \"2014-12-04 14:42:37\" },\n\t{ \"post_id\": 6654, \"topic_id\": 1533, \"forum_id\": 15, \"post_subject\": \"Multinode cluster issue: thorCluster not attached\", \"username\": \"soniaghanekar\", \"post_text\": \"Hi,\\n\\nI am trying to create a 2-node cluster on Ubuntu as given in the installation guide. \\n1) Installed HPCC 5.0.2.1 on both the machines (master A and slave B). \\n2) Created an environment.xml using configmgr. Copied the environment over to the second machine (Machine B) with hpcc user credentials.\\n3) Copied over .ssh folder (id_rsa, id_rsa.pub, authorized key) from hpcc user's home from machine A to B.\\n4) Now I start the hpcc process using "sudo service hpcc-init". But on ECL watch, it says "thorCluster is not attached".\\n\\nAm I missing something?\", \"post_time\": \"2014-12-02 19:20:43\" },\n\t{ \"post_id\": 7113, \"topic_id\": 1586, \"forum_id\": 15, \"post_subject\": \"Re: MyThor is not running in cluster\", \"username\": \"mkellyhpcc\", \"post_text\": \"Hi,\\n\\nWe are interested in the output from starting, if you can capture that. It seems after the start cmd one of the services must still report as stopped so your script goes into the restart - which then sends the CTRL-C to thor.\\n\\nIf you could send output from this master start up script where we can see which other service was still stopped it would help us debug this.\\nPerhaps also better than restarting would be to just issue another start cmd, as that would just try to start just the service(s) that are not yet up. Also probably a good idea to add a sleep 20 or so after the first start, before checking for any stopped services.\\n\\nIn the next version of HPCC (5.2) we have improved the startup flow and reporting so this should go smoother, but until then sending the output from your start script and adding the sleep 20 and changing restart to start should help.\\n\\nthanks,\\nmark\", \"post_time\": \"2015-03-09 19:52:10\" },\n\t{ \"post_id\": 7105, \"topic_id\": 1586, \"forum_id\": 15, \"post_subject\": \"Re: MyThor is not running in cluster\", \"username\": \"lakshmannaresh\", \"post_text\": \"I have attached the script that is executed only at the master.\\n\\n-Lakshman Naresh\", \"post_time\": \"2015-03-09 16:30:48\" },\n\t{ \"post_id\": 7104, \"topic_id\": 1586, \"forum_id\": 15, \"post_subject\": \"Re: MyThor is not running in cluster\", \"username\": \"mkellyhpcc\", \"post_text\": \"Lakshman, hi\\n\\n"... my script will check the status of the services and if needed will issue a restart"\\n\\nCan you send us this script ? I am thinking that somehow a restart is done when it is not really needed.\\n\\nthanks,\\nmark\", \"post_time\": \"2015-03-09 16:10:41\" },\n\t{ \"post_id\": 7103, \"topic_id\": 1586, \"forum_id\": 15, \"post_subject\": \"Re: MyThor is not running in cluster\", \"username\": \"lakshmannaresh\", \"post_text\": \"Hi Mark,\\nI'm able to use HPCC services. My script starts HPCC services at the master using the below command.\\nsudo /opt/HPCCSystems/sbin/hpcc-run.sh -a hpcc-init start\\nAfter the execution of this command, my script will check the status of the services and if needed will issue a restart. [sudo /opt/HPCCSystems/sbin/hpcc-run.sh -a hpcc-init restart]\\n\\nRegards,\\nLakshman Naresh\", \"post_time\": \"2015-03-09 15:59:34\" },\n\t{ \"post_id\": 7095, \"topic_id\": 1586, \"forum_id\": 15, \"post_subject\": \"Re: MyThor is not running in cluster\", \"username\": \"mkellyhpcc\", \"post_text\": \"Hi,\\n\\nFrom these logs it appears all is up and running ok.\\nCan you use HPCC ok now ?\\n\\nThere is a CTRL-C event received about 30 seconds after thormaster starts - but the process is re-started after this and the second time around it continues to run ok. The thor slave connects and registers and it appears all is ok.\\n\\nWe will continue to try and solve why the CTRL-C event, but can you confirm if you are able to use HPCC now ? \\n\\nFor the CTRL-C issue - could it be possible another start up of HPCC occurred at or near the same time ? Or there was a stop of HPCC attempted somehow ? What cmds on each host do you use to start HPCC ?\\n\\nthanks,\\nmark\", \"post_time\": \"2015-03-06 21:15:06\" },\n\t{ \"post_id\": 7075, \"topic_id\": 1586, \"forum_id\": 15, \"post_subject\": \"Re: MyThor is not running in cluster\", \"username\": \"lakshmannaresh\", \"post_text\": \"Please find the remaining log files.\", \"post_time\": \"2015-03-04 17:24:50\" },\n\t{ \"post_id\": 7074, \"topic_id\": 1586, \"forum_id\": 15, \"post_subject\": \"Re: MyThor is not running in cluster\", \"username\": \"lakshmannaresh\", \"post_text\": \"Hi,\\nI have attached the log files.\\n\\nRegards,\\nLakshman\", \"post_time\": \"2015-03-04 17:24:03\" },\n\t{ \"post_id\": 7067, \"topic_id\": 1586, \"forum_id\": 15, \"post_subject\": \"Re: MyThor is not running in cluster\", \"username\": \"mgardner\", \"post_text\": \"Can you post the start_thor log file that is found under /var/log/HPCCSystems/mythor that coresponds with the time of March 2, 15:17 ?\\n\\nWe're trying to figure out why the control-C got caught in the first place. But again it seems that it's up and working.\", \"post_time\": \"2015-03-03 17:17:36\" },\n\t{ \"post_id\": 7064, \"topic_id\": 1586, \"forum_id\": 15, \"post_subject\": \"Re: MyThor is not running in cluster\", \"username\": \"mkellyhpcc\", \"post_text\": \"Hi,\\n\\nIt appears from these logs that thor is up and running ok.\\nThere is a failure at first, but then it tries again and looks ok the second time. The failure msg was:\\n\\n0000001D 2015-03-02 15:17:38.240 5488 5488 "CTRL-C detected"\\n\\nSo after about 30 seconds of thor master and slave being up and ok a CTRL-C event was detected, but then thor restarted automatically. \\nHow are you starting up HPCC ?\\n\\nAre you able to use HPCC now ?\\n\\nthanks,\\nmark\", \"post_time\": \"2015-03-03 16:23:08\" },\n\t{ \"post_id\": 7062, \"topic_id\": 1586, \"forum_id\": 15, \"post_subject\": \"Re: MyThor is not running in cluster\", \"username\": \"lakshmannaresh\", \"post_text\": \"Hi,\\nSorry, I have uploaded the wrong one. Please find the latest log files.\\n\\nRegards,\\nLakshman\", \"post_time\": \"2015-03-03 15:51:13\" },\n\t{ \"post_id\": 7055, \"topic_id\": 1586, \"forum_id\": 15, \"post_subject\": \"Re: MyThor is not running in cluster\", \"username\": \"mkellyhpcc\", \"post_text\": \"Hi,\\n\\nIt shows the interface is set to * in the xml. It should be set to eth1.\\nThese files all look like the ones from last month (02-03 instead of 03-02).\\nCan you double check the files. Make sure interface=eth1 is specified in the conf file on all 3 nodes before starting HPCC.\\n\\nthanks,\\nmark\", \"post_time\": \"2015-03-02 21:58:34\" },\n\t{ \"post_id\": 7053, \"topic_id\": 1586, \"forum_id\": 15, \"post_subject\": \"Re: MyThor is not running in cluster\", \"username\": \"lakshmannaresh\", \"post_text\": \"Hi Mark,\\nEven after changing the interface in config file the problem didn't resolved. I have attached the log files for your reference.\\n\\nRegards,\\nLakshman Naresh\", \"post_time\": \"2015-03-02 20:37:09\" },\n\t{ \"post_id\": 7023, \"topic_id\": 1586, \"forum_id\": 15, \"post_subject\": \"Re: MyThor is not running in cluster\", \"username\": \"mkellyhpcc\", \"post_text\": \"Hi,\\n\\nIt appears you want to use the public IP addresses, but these are not the first interfaces listed from the ifconfig scans. Can you edit the environment.conf file on all machines and change the interface line to be:\\n\\ninterface=eth1\\n\\nSo this matches the IP addresses you have specified in the configs.\\nStop HPCC, make this change for all 3 machines and then start HPCC up again and let us know the status and log file(s).\\n\\nthanks,\\nmark\", \"post_time\": \"2015-02-25 22:01:34\" },\n\t{ \"post_id\": 7011, \"topic_id\": 1586, \"forum_id\": 15, \"post_subject\": \"Re: MyThor is not running in cluster\", \"username\": \"lakshmannaresh\", \"post_text\": \"Hi Mark,\\nBelow is the mapping for new set of IPs.\\nX.X.X.167 => X.X.X.221 => 152.X.X.67\\nX.X.X.70 => X.X.X.190 => 152.X.X.51\\nX.X.X.240 => X.X.X.255 => 152.X.X.7\\n\\n.221, .190, .255 are public IPs.\\n\\nI have attached the output of the two commands and environment.xml file for this set of IPs.\\n\\nRegards,\\nLakshman Naresh\", \"post_time\": \"2015-02-23 18:09:11\" },\n\t{ \"post_id\": 6994, \"topic_id\": 1586, \"forum_id\": 15, \"post_subject\": \"Re: MyThor is not running in cluster\", \"username\": \"mkellyhpcc\", \"post_text\": \"Hi,\\n\\nCan you send us the output from these two commands:\\n\\nifconfig\\niptables -L\\n\\non each of the 3 machines (.221, .190, .255)\\n\\nIs the .221, .190, .255 the private or public IPs ?\\n\\nthanks,\\nmark\", \"post_time\": \"2015-02-19 16:12:51\" },\n\t{ \"post_id\": 6992, \"topic_id\": 1586, \"forum_id\": 15, \"post_subject\": \"Re: MyThor is not running in cluster\", \"username\": \"lakshmannaresh\", \"post_text\": \"Hi,\\nAs I mentioned earlier whenever I request VMs I will get a new set of machines. So the IP addresses got changed this time. Below is a mapping of the old to new IP addresses and also I have attached the new environment file for your reference.\\n\\nX.X.X.221 => X.X.X.167\\nX.X.X.190 => X.X.X.70\\nX.X.X.255 => X.X.X.240\\n\\nI executed the below command in X.X.X.190 and X.X.X.221\\nsudo /opt/HPCCSystems/bin/daliadmin X.X.X.221 dfsgroup mythor\\nThe output was X.X.X.221 in both the machines.\\n\\nRegards,\\nLakshman Naresh C A\", \"post_time\": \"2015-02-19 15:52:45\" },\n\t{ \"post_id\": 6962, \"topic_id\": 1586, \"forum_id\": 15, \"post_subject\": \"Re: MyThor is not running in cluster\", \"username\": \"mgardner\", \"post_text\": \"Also, if you could please run this command on .70 and .167 (the thormaster and thorslave.) Then post the output. I'm assuming that X.X.X.167 is the ip of your dali node according to the xml you gave us earlier.\\n\\nsudo /opt/HPCCSystems/bin/daliadmin X.X.X.167 dfsgroup mythor
\", \"post_time\": \"2015-02-17 14:58:37\" },\n\t{ \"post_id\": 6961, \"topic_id\": 1586, \"forum_id\": 15, \"post_subject\": \"Re: MyThor is not running in cluster\", \"username\": \"mkellyhpcc\", \"post_text\": \"Hi,\\n\\nCan you send the output from\\n\\nifconfig\\n\\non all 3 machines ? This info will help\\nto configure which interface to use on all\\n3 machines.\\n\\nthanks,\\nmark\", \"post_time\": \"2015-02-17 14:48:01\" },\n\t{ \"post_id\": 6959, \"topic_id\": 1586, \"forum_id\": 15, \"post_subject\": \"Re: MyThor is not running in cluster\", \"username\": \"lakshmannaresh\", \"post_text\": \"Hi Mark,\\nThere are two NICs for each machine, one of them is public facing NIC and another one is internal. Below are the pair of IP for each node.\\nMaster - X.X.X.69/X.X.X.70\\nSlave - X.X.X.68/X.X.X.167\\n\\nThanks,\\nLakshman Naresh\", \"post_time\": \"2015-02-16 16:10:10\" },\n\t{ \"post_id\": 6950, \"topic_id\": 1586, \"forum_id\": 15, \"post_subject\": \"Re: MyThor is not running in cluster\", \"username\": \"mkellyhpcc\", \"post_text\": \"Hi,\\n\\nI may be looking at out of date log files but I do not understand the IP addresses. Thormaster log shows:\\n\\n0000000C 2015-02-03 18:05:33.968 5481 5481 "ThorMaster version 4.1, Started on X.X.X.69:20000"\\n\\nwhich suggests its IP address is X.X.X.69\\n\\nAnd it is trying to connect with a thorslave on X.X.X.167:\\n\\n00000012 2015-02-03 18:05:33.973 5481 5481 "verified connection with X.X.X.167:20100"\\n\\nBut Thorslave log shows:\\n\\n00000002 2015-02-03 18:05:33.828 3850 3850 "registering X.X.X.68:20100 - master X.X.X.70:20000"\\n\\nwhich suggests it is X.X.X.68 and the master is X.X.X.70. Can we verify all hosts and IPs again ?\\n\\nthanks,\\nmark\", \"post_time\": \"2015-02-12 15:03:08\" },\n\t{ \"post_id\": 6946, \"topic_id\": 1586, \"forum_id\": 15, \"post_subject\": \"Re: MyThor is not running in cluster\", \"username\": \"HPCC Staff\", \"post_text\": \"Thank you for posting the files. The team is still reviewing and will circle back soon.\", \"post_time\": \"2015-02-11 21:38:55\" },\n\t{ \"post_id\": 6898, \"topic_id\": 1586, \"forum_id\": 15, \"post_subject\": \"Re: MyThor is not running in cluster\", \"username\": \"lakshmannaresh\", \"post_text\": \"Hi Michael,\\nI haven't taken backup of the environment.xml file for the set of IPs that I posted earlier. Now I have created the same scenario with a new set of machines because the VMs I work with are temporary ones i.e., whenever I request VMs I will get a new set of machines. The thor cluster configuration remains the same. I have attached thormaster log, thorslave log, environment.xml file. \\n\\nBelow are the services running at each machine.\\nX.X.X.240 hpcc-init status :\\nmydafilesrv ( pid 3175 ) is running...\\nmydfuserver ( pid 5924 ) is running...\\nmyeclscheduler ( pid 6023 ) is running...\\n\\nX.X.X.70 hpcc-init status :\\nmydafilesrv ( pid 3266 ) is running...\\nmyeclagent ( pid 9561 ) is running...\\nmyesp ( pid 9657 ) is running...\\nmysasha ( pid 9758 ) is running...\\nmythor ( pid 10721 ) is running...\\n\\nX.X.X.167 hpcc-init status :\\nmydafilesrv ( pid 3099 ) is running...\\nmydali ( pid 6871 ) is running...\\nmyeclccserver ( pid 6975 ) is running...\\n\\nThanks Michael..\", \"post_time\": \"2015-02-03 23:30:47\" },\n\t{ \"post_id\": 6897, \"topic_id\": 1586, \"forum_id\": 15, \"post_subject\": \"Re: MyThor is not running in cluster\", \"username\": \"mgardner\", \"post_text\": \"Can you please attach a copy of your environment.xml and the thorslave log? I'll try to get to the bottom of this for you.\\n\\nMichael\", \"post_time\": \"2015-02-03 21:10:45\" },\n\t{ \"post_id\": 6895, \"topic_id\": 1586, \"forum_id\": 15, \"post_subject\": \"Re: MyThor is not running in cluster\", \"username\": \"lakshmannaresh\", \"post_text\": \"Hi Bob,\\nI have attached thormaster log.\\nBelow is the configuration parameters that are passed to envgen script to generate environment.xml.\\nnumber of thor nodes: 1\\nnumber of thor slaves per node: 1\", \"post_time\": \"2015-02-03 19:12:36\" },\n\t{ \"post_id\": 6889, \"topic_id\": 1586, \"forum_id\": 15, \"post_subject\": \"Re: MyThor is not running in cluster\", \"username\": \"bforeman\", \"post_text\": \"The HPCC team took a look at your post, but we need some more information.\\n\\nHow are you configuring your THOR cluster with regards to the number of slave nodes?\\n\\nAlso, if you have the thormaster log, we would like to take a look at that as well.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-02-03 13:54:28\" },\n\t{ \"post_id\": 6888, \"topic_id\": 1586, \"forum_id\": 15, \"post_subject\": \"MyThor is not running in cluster\", \"username\": \"lakshmannaresh\", \"post_text\": \"Hi,\\nI have 3 VM reservations installed with HPCC 5.0.2.1. I'm building a script that automates HPCC cluster formation when these three machines boots up.\\n\\nExchange of SSH Keys, and environment.xml files are successful. But when I try to start the service hpcc-init using /opt/HPCCSystems/sbin/hpcc-run.sh script for the first time, except mythor service all other services are running. However, when I restart the hpcc-init service using the same script, all the services are running.\\n\\nTo successfully start mythor service, atleast one restart of the entire hpcc services is required. Why doesn't mythor service run at the first start? Can it be resolved? Because it takes some time to restart hpcc service in all the machines. This delays the service availability to the end user.\\n\\nBelow is the status of the services in each machine after first start.\\nX.X.X.154 hpcc-init status :\\nmydafilesrv ( pid 2954 ) is running...\\nmydfuserver ( pid 3044 ) is running...\\nmyeclscheduler ( pid 3143 ) is running...\\n\\nX.X.X.153 hpcc-init status :\\nmydafilesrv ( pid 2391 ) is running...\\nmydali ( pid 2481 ) is running...\\nmyeclccserver ( pid 2585 ) is running...\\n\\nX.X.X.63 hpcc-init status :\\nmydafilesrv ( pid 3260 ) is running...\\nmyeclagent ( pid 3354 ) is running...\\nmyesp ( pid 3450 ) is running...\\nmysasha ( pid 3548 ) is running...\\nmythor is stopped\\n\\n\\nAfter the first start, when I try to check the status of the services, hpcc-run.sh script print the below statement.\\nError found during hpcc-init_status_3795 execution.\\nReference following log for more information:\\n/var/log/HPCCSystems/cluster/cc_hpcc-init_status_3795_20150203_012107.log\\n\\nThese are the last few lines of the log.\\n2015-02-03 01:21:12,385 - hpcc.cluster.ScriptTask.2 - ERROR - X.X.X.63: Host is alive.\\nX.X.X.63: Running sudo /etc/init.d/hpcc-init status\\n\\n2015-02-03 01:21:12,385 - hpcc.cluster.ScriptTask.2 - INFO - result: FAILED\\n2015-02-03 01:21:14,128 - hpcc.cluster - INFO - script execution done.\", \"post_time\": \"2015-02-03 06:47:19\" },\n\t{ \"post_id\": 6905, \"topic_id\": 1587, \"forum_id\": 15, \"post_subject\": \"Re: Record size restriction by RAM?\", \"username\": \"rtaylor\", \"post_text\": \"Jim,\\n\\nIf you have individual records that are larger than the amount of RAM you have on each node, then I would strongly suggest adding more RAM per node until you can at least fit one record in RAM (and more, if possible). \\n\\nWith 64-bit Linux you can put a lot more RAM one each box than previously possible. I've seen boxes that handle up to 256Gb RAM, and I'm pretty sure I'm probably out of date on that figure. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-02-05 16:55:42\" },\n\t{ \"post_id\": 6904, \"topic_id\": 1587, \"forum_id\": 15, \"post_subject\": \"Re: Record size restriction by RAM?\", \"username\": \"bforeman\", \"post_text\": \"Hi James,\\n\\nWhat happens if the record size exceeds the RAM is you get a disk spill, in other words, part of the disk drive is used as temporary RAM. You can see this by looking at the graph, it will clearly show "disk spill" in the process. Of course, this means that your job may slow down due to the additional I/O.\\n\\nThe rule of HPCC is that a record structure never overlaps onto another node. \\n\\nHTH,\\n\\nBob\", \"post_time\": \"2015-02-05 13:14:50\" },\n\t{ \"post_id\": 6902, \"topic_id\": 1587, \"forum_id\": 15, \"post_subject\": \"Record size restriction by RAM?\", \"username\": \"jwilt\", \"post_text\": \"Hi - \\nFor parsing very large text files in Thor - \\nIf files are loaded 1 file/record - does the node require sufficient RAM to hold the entire record at once?\\nOr would a record larger than RAM fail the workunit?\\n\\nNaturally, if splitting the raw file into smaller parts across multiple records is possible, that would presumably avoid this issue (though possibly still spilling to disk).\\n\\nThanks.\", \"post_time\": \"2015-02-05 01:44:17\" },\n\t{ \"post_id\": 6960, \"topic_id\": 1597, \"forum_id\": 15, \"post_subject\": \"Re: My Roxie is not starting in cluster environment.\", \"username\": \"lakshmannaresh\", \"post_text\": \"Hi Mark,\\nAfter increasing the physical memory and net.core.wmem_max/net.core.rmem_max values, the roxie service started running in the cluster environment. Thanks for the help.\\n\\n-Lakshman Naresh.\", \"post_time\": \"2015-02-16 16:13:48\" },\n\t{ \"post_id\": 6949, \"topic_id\": 1597, \"forum_id\": 15, \"post_subject\": \"Re: My Roxie is not starting in cluster environment.\", \"username\": \"mkellyhpcc\", \"post_text\": \"Hi,\\n\\nFrom the roxie log file is:\\n\\n00000038 2015-02-11 13:42:58.578 6441 6441 "/proc/sys/net/core/rmem_max value 124928 is less than 131071"\\n00000039 2015-02-11 13:42:58.578 6441 6441 "EXCEPTION: (1455): System socket max read buffer is less than 131071"\\n\\nThe socket buffer size is set to 128kb and the kernel needs to have at least this much as well (ideally should have more than this).\\n\\nCan you increase your kernel socket buffer sizes with some settings in /etc/sysctl.conf. You can see what these are all set to with:\\n\\nsudo sysctl -a | grep core | grep mem_max\\n\\nYou need to increase:\\n\\nnet.core.wmem_max\\nnet.core.rmem_max\\n\\nTo be > 128kb. I would suggest 256kb (262144).\\n\\nI have used much larger settings for those and these below:\\n\\nnet.core.wmem_default\\nnet.core.rmem_default\\nnet.core.optmem_max\\nnet.ipv4.tcp_mem\\nnet.ipv4.tcp_wmem\\nnet.ipv4.tcp_rmem\\nnet.ipv4.udp_mem\\n\\nBut it appears your system does not have much physical memory (only 1 GB - is that right ??) and so you may not want to increase these settings (other than the required net.core.[r,w]mem_max mentioned above).\\n\\nOn that note, roxie memory is set to 1 GB -\\n\\n00000002 2015-02-11 12:24:15.691 3555 3555 "RoxieMemMgr: Setting memory limit to 1073741824 bytes (1024 pages)"\\n\\nwhich suggests you really want more physical memory.\\n\\nthanks,\\nmark\", \"post_time\": \"2015-02-12 14:25:48\" },\n\t{ \"post_id\": 6948, \"topic_id\": 1597, \"forum_id\": 15, \"post_subject\": \"Re: My Roxie is not starting in cluster environment.\", \"username\": \"sort\", \"post_text\": \"Roxie is not starting due to the following error\\n00000037 2015-02-11 13:35:32.959 2546 2546 "Loading empty package for QuerySet roxie"\\n00000038 2015-02-11 13:35:32.962 2546 2546 "/proc/sys/net/core/rmem_max value 124928 is less than 131071"\\n00000039 2015-02-11 13:35:32.963 2546 2568 "AutoReloadThread 0xc13548 starting"\\n0000003A 2015-02-11 13:35:32.963 2546 2546 "EXCEPTION: (1455): System socket max read buffer is less than 131071"\\n\\nPlease modify the rmem_max value and retry\", \"post_time\": \"2015-02-12 14:16:50\" },\n\t{ \"post_id\": 6945, \"topic_id\": 1597, \"forum_id\": 15, \"post_subject\": \"Re: My Roxie is not starting in cluster environment.\", \"username\": \"lakshmannaresh\", \"post_text\": \"Remaining log files.\", \"post_time\": \"2015-02-11 19:12:51\" },\n\t{ \"post_id\": 6944, \"topic_id\": 1597, \"forum_id\": 15, \"post_subject\": \"My Roxie is not starting in cluster environment.\", \"username\": \"lakshmannaresh\", \"post_text\": \"Hi,\\nIn a 4 node cluster, myroxie service is not starting up even after restarting the entire HPCC service 3 times. The cluster configuration is\\nnumber of support nodes: 1\\nnumber of roxie nodes: 2\\nnumber of thor nodes: 2\\nnumber of thor slaves per node: 1\\n\\nBelow are the services running at each node.\\nX.X.X.221 hpcc-init status :\\nmydafilesrv ( pid 2451 ) is running...\\n\\nX.X.X.241 hpcc-init status :\\nmydafilesrv ( pid 3323 ) is running...\\nmyroxie is stopped\\n\\nX.X.X.139 hpcc-init status :\\nmydafilesrv ( pid 3424 ) is running...\\nmydali ( pid 12375 ) is running...\\nmydfuserver ( pid 12479 ) is running...\\nmyeclagent ( pid 12590 ) is running...\\nmyeclccserver ( pid 12686 ) is running...\\nmyeclscheduler ( pid 12787 ) is running...\\nmyesp ( pid 12886 ) is running...\\nmysasha ( pid 12988 ) is running...\\nmythor ( pid 13226 ) is running...\\n\\nX.X.X.14 hpcc-init status :\\nmydafilesrv ( pid 2455 ) is running...\\nmyroxie is stopped\\n\\nI have attached the environment.xml and log files for reference.\\n\\nThanks,\\nLakshman\", \"post_time\": \"2015-02-11 19:11:29\" },\n\t{ \"post_id\": 7725, \"topic_id\": 1755, \"forum_id\": 15, \"post_subject\": \"Re: Support nodes\", \"username\": \"jeeves\", \"post_text\": \"Richard,\\n\\nThanks for the answer. It helped.\\n\\nThanks,\\n-David\", \"post_time\": \"2015-06-03 13:37:53\" },\n\t{ \"post_id\": 7713, \"topic_id\": 1755, \"forum_id\": 15, \"post_subject\": \"Re: Support nodes\", \"username\": \"rtaylor\", \"post_text\": \"David,
Would 1 support node be sufficient for a 10 node Thor cluster processing around 3 TB of data?
Probably, depending on how much continuous work you're doing with the system. For production systems it is good to have separation, even if that separation is achieved by having the infrastructure run in separate VMs on the same box.\\n\\nFor example, in our production environments we usually keep each support process (Dali, DFU Server, etc.) on it's own "box" (which may be physical or virtual, depending on the environment) for separation of duties and fastest performance. In some cases we have multiple servers setup for each of the support services. So we may have as many as 15-20 actual support nodes working to support a single HPCC environment containing multiple 400-node clusters and any number of additional clusters of other various sizes (200-node, 100-node, 50-node, etc.). This is how we daily deal with the multiple Petabytes of data that we work with.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-06-02 20:05:05\" },\n\t{ \"post_id\": 7710, \"topic_id\": 1755, \"forum_id\": 15, \"post_subject\": \"Support nodes\", \"username\": \"jeeves\", \"post_text\": \"Hi,\\n\\nIs there a rule of thumb for determining the no of support nodes required for a Thor cluster?\\n\\nWould 1 support node be sufficient for a 10 node Thor cluster processing around 3 TB of data?\\n\\nThanks,\\n-David\", \"post_time\": \"2015-06-02 18:33:40\" },\n\t{ \"post_id\": 8390, \"topic_id\": 1940, \"forum_id\": 15, \"post_subject\": \"Re: Swapping Back in a Failed Node\", \"username\": \"bforeman\", \"post_text\": \"Yes!\", \"post_time\": \"2015-10-29 11:19:44\" },\n\t{ \"post_id\": 8360, \"topic_id\": 1940, \"forum_id\": 15, \"post_subject\": \"Re: Swapping Back in a Failed Node\", \"username\": \"amillar\", \"post_text\": \"Hi Bob,\\n\\nthanks for the quick reply its very much appreciated,\\n\\nI have one more question though if you don't mind\\n\\nShould we clear off all the hpcc-data off the node before we put it back in?\\n\\nThanks in advance\", \"post_time\": \"2015-10-22 08:56:56\" },\n\t{ \"post_id\": 8346, \"topic_id\": 1940, \"forum_id\": 15, \"post_subject\": \"Re: Swapping Back in a Failed Node\", \"username\": \"fernando\", \"post_text\": \"Make sure that after running "swap node", you sync up your data by running the backupnode utility from the thormaster node.\\n\\n /opt/HPCCSystems/bin/start_backupnode \\nusage: /opt/HPCCSystems/bin/start_backupnode thor_cluster_name\\n\\nIn this example, the name of the thor cluster is thor200_100\\n\\n/opt/HPCCSystems/bin/start_backupnode thor200_100\\n------------------------------\\nstarting backupnode ...\\nUsing backupnode directory /var/lib/HPCCSystems/hpcc-data/backupnode/last_backup\\nReading slaves file /var/lib/HPCCSystems/thor200_100/backupnode.slaves\\nScanning files from dali ...\\n------------------------------\\n------------------------------\\nWaiting for backup to complete\\n✔ complete at 11:30:54 \\n\\nnote: This process could take some time to complete depending on the amount of data to be restored.\\n\\n1- The data in the hpcc-mirror from the **replicate** node gets copied to the primary location on the "new node".\\n2- The hpcc-mirror directory on the "new node" gets populated with **replicate** data from the appropriate node.\\n\\n\\n\\nAdditionally, you should make sure the "fixed" node has:\\n1- The OS installed along with the matching HPCCSystems software build.\\n2- Empty thor data directories\\n \\n/var/lib/HPCCSystems/hpcc-data \\n/var/lib/HPCCSystems/hpcc-mirror
\", \"post_time\": \"2015-10-20 15:39:18\" },\n\t{ \"post_id\": 8342, \"topic_id\": 1940, \"forum_id\": 15, \"post_subject\": \"Re: Swapping Back in a Failed Node\", \"username\": \"bforeman\", \"post_text\": \"From our HPCC Systems team, here is the process:\\n\\n1. Use configmgr tool to set up the node that was swapped out as a spare node. \\n\\n2. Push out the change ( copy the updated environment.xml to all the nodes).\\n\\n3. You must restart the components to make them aware of the **change**.\\n\\n4. You may be able to run the “updtdalienv” cmd line tool in order to avoid restarting the dali.\\n\\n\\n/opt/HPCCSystems/bin/updtdalienv <path to the environment-xml-file> [-i <dali-ip>] \\n\\nAssuming that the updated environment.xml file has been copied to </etc/HPCCSystems/environment.xml> on all the nodes.\\n\\nThe command should look like\\n\\n[fernanux@node010241012201 ~]$ sudo /opt/HPCCSystems/bin/updtdalienv /etc/HPCCSystems/environment.xml -i 10.nnn.nnn.nnn\\n00000000 2015-10-20 07:05:09.710 52484 52484 "Environment and node groups updated in dali at 10.nnn.nnn.nnn:7070"\\n00000001 2015-10-20 07:05:09.710 52484 52484 "WARNING: New cluster layout for cluster thorxxx_spares\\nNew cluster layout for cluster thorxxx_spares\\n\\n\\nHTH,\\n\\nBob (for Fernando)\", \"post_time\": \"2015-10-20 12:02:46\" },\n\t{ \"post_id\": 8332, \"topic_id\": 1940, \"forum_id\": 15, \"post_subject\": \"Re: Swapping Back in a Failed Node\", \"username\": \"amillar\", \"post_text\": \"Hi There,\\n\\nthanks for getting back to me, I have followed your instructions to get the information, if you need anything else then please let me know.\\n\\nThanks in advance\\n\\nPlatform Version :5.2.0-1\\n\\nTopology :\\n\\nnode020021\\tThor Master\\t192.168.20.21\\tlocaldomain\\tLinux\\nnode020024\\tThor Slave\\n[mythor, 1]\\tSwap Node\\t192.168.20.24\\tlocaldomain\\tLinux\\nnode020025\\tThor Slave\\n[mythor, 2]\\tSwap Node\\t192.168.20.25\\tlocaldomain\\tLinux\\nnode020026\\tThor Slave\\n[mythor, 3]\\tSwap Node\\t192.168.20.26\\tlocaldomain\\tLinux\\nnode020027\\tThor Slave\\n[mythor, 4]\\tSwap Node\\t192.168.20.27\\tlocaldomain\\tLinux\\nnode020028\\tThor Slave\\n[mythor, 5]\\tSwap Node\\t192.168.20.28\\tlocaldomain\\tLinux\\nnode020023\\tThor Slave\\n[mythor, 6]\\tSwap Node\\t192.168.20.23\\tlocaldomain\\tLinux\\nnode020024\\tThor Slave\\n[mythor, 7]\\tSwap Node\\t192.168.20.24\\tlocaldomain\\tLinux\\nnode020025\\tThor Slave\\n[mythor, 8]\\tSwap Node\\t192.168.20.25\\tlocaldomain\\tLinux\\nnode020026\\tThor Slave\\n[mythor, 9]\\tSwap Node\\t192.168.20.26\\tlocaldomain\\tLinux\\nnode020027\\tThor Slave\\n[mythor, 10]\\tSwap Node\\t192.168.20.27\\tlocaldomain\\tLinux\\nnode020028\\tThor Slave\\n[mythor, 11]\\tSwap Node\\t192.168.20.28\\tlocaldomain\\tLinux\\nnode020023\\tThor Slave\\n[mythor, 12]\\tSwap Node\\t192.168.20.23\\tlocaldomain\\tLinux\\nnode020024\\tThor Slave\\n[mythor, 13]\\tSwap Node\\t192.168.20.24\\tlocaldomain\\tLinux\\nnode020025\\tThor Slave\\n[mythor, 14]\\tSwap Node\\t192.168.20.25\\tlocaldomain\\tLinux\\nnode020026\\tThor Slave\\n[mythor, 15]\\tSwap Node\\t192.168.20.26\\tlocaldomain\\tLinux\\nnode020027\\tThor Slave\\n[mythor, 16]\\tSwap Node\\t192.168.20.27\\tlocaldomain\\tLinux\\nnode020028\\tThor Slave\\n[mythor, 17]\\tSwap Node\\t192.168.20.28\\tlocaldomain\\tLinux\\nnode020023\\tThor Slave\\n[mythor, 18]\\tSwap Node\\t192.168.20.23\\tlocaldomain\\tLinux\\nnode020024\\tThor Slave\\n[mythor, 19]\\tSwap Node\\t192.168.20.24\\tlocaldomain\\tLinux\\nnode020025\\tThor Slave\\n[mythor, 20]\\tSwap Node\\t192.168.20.25\\tlocaldomain\\tLinux\\nnode020026\\tThor Slave\\n[mythor, 21]\\tSwap Node\\t192.168.20.26\\tlocaldomain\\tLinux\\nnode020027\\tThor Slave\\n[mythor, 22]\\tSwap Node\\t192.168.20.27\\tlocaldomain\\tLinux\\nnode020028\\tThor Slave\\n[mythor, 23]\\tSwap Node\\t192.168.20.28\\tlocaldomain\\tLinux\\nnode020023\\tThor Slave\\n[mythor, 24]\\tSwap Node\\t192.168.20.23\\tlocaldomain\\tLinux\\nnode020024\\tThor Slave\\n[mythor, 25]\\tSwap Node\\t192.168.20.24\\tlocaldomain\\tLinux\\nnode020025\\tThor Slave\\n[mythor, 26]\\tSwap Node\\t192.168.20.25\\tlocaldomain\\tLinux\\nnode020026\\tThor Slave\\n[mythor, 27]\\tSwap Node\\t192.168.20.26\\tlocaldomain\\tLinux\\nnode020027\\tThor Slave\\n[mythor, 28]\\tSwap Node\\t192.168.20.27\\tlocaldomain\\tLinux\\nnode020028\\tThor Slave\\n[mythor, 29]\\tSwap Node\\t192.168.20.28\\tlocaldomain\\tLinux\\nnode020023\\tThor Slave\\n[mythor, 30]\\tSwap Node\\t192.168.20.23\\tlocaldomain\\tLinux\\nnode020024\\tThor Slave\\n[mythor, 31]\\tSwap Node\\t192.168.20.24\\tlocaldomain\\tLinux\\nnode020025\\tThor Slave\\n[mythor, 32]\\tSwap Node\\t192.168.20.25\\tlocaldomain\\tLinux\\nnode020026\\tThor Slave\\n[mythor, 33]\\tSwap Node\\t192.168.20.26\\tlocaldomain\\tLinux\\nnode020027\\tThor Slave\\n[mythor, 34]\\tSwap Node\\t192.168.20.27\\tlocaldomain\\tLinux\\nnode020028\\tThor Slave\\n[mythor, 35]\\tSwap Node\\t192.168.20.28\\tlocaldomain\\tLinux\\nnode020023\\tThor Slave\\n[mythor, 36]\\tSwap Node\\t192.168.20.23\\tlocaldomain\\tLinux\\nnode020024\\tThor Slave\\n[mythor, 37]\\tSwap Node\\t192.168.20.24\\tlocaldomain\\tLinux\\nnode020025\\tThor Slave\\n[mythor, 38]\\tSwap Node\\t192.168.20.25\\tlocaldomain\\tLinux\\nnode020026\\tThor Slave\\n[mythor, 39]\\tSwap Node\\t192.168.20.26\\tlocaldomain\\tLinux\\nnode020027\\tThor Slave\\n[mythor, 40]\\tSwap Node\\t192.168.20.27\\tlocaldomain\\tLinux\\nnode020028\\tThor Slave\\n[mythor, 41]\\tSwap Node\\t192.168.20.28\\tlocaldomain\\tLinux\\nnode020023\\tThor Slave\\n[mythor, 42]\\tSwap Node\\t192.168.20.23\\tlocaldomain\\tLinux\\nnode020024\\tThor Slave\\n[mythor, 43]\\tSwap Node\\t192.168.20.24\\tlocaldomain\\tLinux\\nnode020025\\tThor Slave\\n[mythor, 44]\\tSwap Node\\t192.168.20.25\\tlocaldomain\\tLinux\\nnode020026\\tThor Slave\\n[mythor, 45]\\tSwap Node\\t192.168.20.26\\tlocaldomain\\tLinux\\nnode020027\\tThor Slave\\n[mythor, 46]\\tSwap Node\\t192.168.20.27\\tlocaldomain\\tLinux\\nnode020028\\tThor Slave\\n[mythor, 47]\\tSwap Node\\t192.168.20.28\\tlocaldomain\\tLinux\\nnode020023\\tThor Slave\\n[mythor, 48]\\tSwap Node\\t192.168.20.23\\tlocaldomain\\tLinux\\nnode020024\\tThor Slave\\n[mythor, 49]\\tSwap Node\\t192.168.20.24\\tlocaldomain\\tLinux\\nnode020025\\tThor Slave\\n[mythor, 50]\\tSwap Node\\t192.168.20.25\\tlocaldomain\\tLinux\\nnode020026\\tThor Slave\\n[mythor, 51]\\tSwap Node\\t192.168.20.26\\tlocaldomain\\tLinux\\nnode020027\\tThor Slave\\n[mythor, 52]\\tSwap Node\\t192.168.20.27\\tlocaldomain\\tLinux\\nnode020028\\tThor Slave\\n[mythor, 53]\\tSwap Node\\t192.168.20.28\\tlocaldomain\\tLinux\\nnode020023\\tThor Slave\\n[mythor, 54]\\tSwap Node\\t192.168.20.23\\tlocaldomain\\tLinux\\nnode020024\\tThor Slave\\n[mythor, 55]\\tSwap Node\\t192.168.20.24\\tlocaldomain\\tLinux\\nnode020025\\tThor Slave\\n[mythor, 56]\\tSwap Node\\t192.168.20.25\\tlocaldomain\\tLinux\\nnode020026\\tThor Slave\\n[mythor, 57]\\tSwap Node\\t192.168.20.26\\tlocaldomain\\tLinux\\nnode020027\\tThor Slave\\n[mythor, 58]\\tSwap Node\\t192.168.20.27\\tlocaldomain\\tLinux\\nnode020028\\tThor Slave\\n[mythor, 59]\\tSwap Node\\t192.168.20.28\\tlocaldomain\\tLinux\\nnode020023\\tThor Slave\\n[mythor, 60]\\tSwap Node\\t192.168.20.23\\tlocaldomain\\tLinux\\nnode020023\\tThor Spare\\t192.168.20.23\\tlocaldomain\\tLinux\\nnode020030\\tThor Spare\\t192.168.20.30\\tlocaldomain\\tLinux\", \"post_time\": \"2015-10-19 10:22:23\" },\n\t{ \"post_id\": 8322, \"topic_id\": 1940, \"forum_id\": 15, \"post_subject\": \"Re: Swapping Back in a Failed Node\", \"username\": \"clo\", \"post_text\": \"Hi, I'm trying to verify proper procedure at the moment. In the meantime, would it be possible for you to respond with the version of the platform that you're running as well as the topology of the cluster?\\n\\nTo get the topology of the thor, please do the following:\\n\\n1. Click on the Operations button in the top row of icons\\n2. Click on Target Clusters on the second row of options that appears.\\n3. Click on the name of the thor that you're trying to investigate and it should expand the list of components that are attached to that Thor.\\n\\nThanks.\", \"post_time\": \"2015-10-16 14:21:53\" },\n\t{ \"post_id\": 8320, \"topic_id\": 1940, \"forum_id\": 15, \"post_subject\": \"Swapping Back in a Failed Node\", \"username\": \"amillar\", \"post_text\": \"Hi There,\\n\\nWe recently had an issue on one of the thor nodes and had to use swap node to get the cluster back in action, which worked perfectly.\\n\\nWe have rectified the issue on the original node and we want to swap it back again. \\n\\nWhen we look in ECLWatch at Cluster Processes -> mythor, it does not have the original box in the list of processes.\\n\\nIt has it on the list of machines. (.29) but it isn't a spare so we can't use it as an option to swap back to.\\n\\nDoes anyone know the steps to swap a node back in? (back to the original that was swapped out). It is the same original IP.\\n\\nSee Attached Screen shot.\\n\\nThanks in advance\\n\\n[attachment=0:3oh5w4ny]image.png\", \"post_time\": \"2015-10-16 09:05:28\" },\n\t{ \"post_id\": 8486, \"topic_id\": 1986, \"forum_id\": 15, \"post_subject\": \"Re: Marrying Thor Cluster with Roxie Cluster\", \"username\": \"kps_mani\", \"post_text\": \"Thanks Bob\", \"post_time\": \"2015-11-09 16:22:39\" },\n\t{ \"post_id\": 8464, \"topic_id\": 1986, \"forum_id\": 15, \"post_subject\": \"Re: Marrying Thor Cluster with Roxie Cluster\", \"username\": \"bforeman\", \"post_text\": \"Hi Subbu,\\n\\nIsn’t this just a matter of opening the Configuration Manager for the Thor, adding the IP addresses for the ROXIE cluster, and then pushing the Environment.XML to the appropriate nodes?\\n\\nI think the documentation here might be helpful:\\n\\n[url]http://cdn.hpccsystems.com/releases/CE-Candidate-5.4.4/docs/UsingConfigManager-5.4.4-1.pdf\\n[/url]\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-11-09 14:22:49\" },\n\t{ \"post_id\": 8450, \"topic_id\": 1986, \"forum_id\": 15, \"post_subject\": \"Marrying Thor Cluster with Roxie Cluster\", \"username\": \"kps_mani\", \"post_text\": \"Hi,\\nI have setup a Thor Cluster with 2 nodes and Roxie Cluster with 2 nodes independently. Now, I would like to make these 2 clusters talk each other. Hence, I can deploy the query on the Roxie cluster and allow the end process access it. I could not find any documentation on how to tie up the Thor and Roxie Cluster. What are the steps that needs to be followed to make both of these clusters talk each other? I understand that it would be more with configuration file. Can you please help me on the same?\\n\\nRegards,\\nSubbu\", \"post_time\": \"2015-11-06 22:09:47\" },\n\t{ \"post_id\": 8488, \"topic_id\": 1988, \"forum_id\": 15, \"post_subject\": \"Re: ESP JSON REST API - Cross Origin Resource Sharing\", \"username\": \"kps_mani\", \"post_text\": \"Thanks Gordan!\", \"post_time\": \"2015-11-09 16:23:06\" },\n\t{ \"post_id\": 8460, \"topic_id\": 1988, \"forum_id\": 15, \"post_subject\": \"Re: ESP JSON REST API - Cross Origin Resource Sharing\", \"username\": \"gsmith\", \"post_text\": \"I have opened an issue for this: https://track.hpccsystems.com/browse/HPCC-14493\\n\\nI know it was discussed a few years ago and at the time jsonp support was added (which was enough for my cross origin work to continue).\\n\\nIf you add jsonp="nameOfMyJSFunc" to your request then the response will be wrapped in a "nameOfMyJSFunc({...})".\\n\\nhttps://en.wikipedia.org/wiki/JSONP\", \"post_time\": \"2015-11-09 13:25:50\" },\n\t{ \"post_id\": 8452, \"topic_id\": 1988, \"forum_id\": 15, \"post_subject\": \"ESP JSON REST API - Cross Origin Resource Sharing\", \"username\": \"kps_mani\", \"post_text\": \"Hi,\\nI am trying to use the REST API from Roxie Cluster to display the data in the Web Application. It seems that HTTP Header is not carrying the below header.\\n\\nAccess-Control-Allow-Origin: *\\n\\nHence, when the API is accessed from the Web Application (Domain 1) to Roxie Cluster (Domain 2), it is failing to return the Results.\\n\\nCan you please look into it and enable to the ESP JSON REST API to add the above header as well?\\n\\nRegards,\\nSubbu\", \"post_time\": \"2015-11-09 02:51:05\" },\n\t{ \"post_id\": 9364, \"topic_id\": 2194, \"forum_id\": 15, \"post_subject\": \"Re: How Roxie channels affect data/partition distribution?\", \"username\": \"chsu6\", \"post_text\": \"A quick update. The parameter channelsPerSlave has nothing to do with the replication mechanism. I find out the following configuration can work. n is an integer greater than 0.\\n\\n[Cyclic mode]\\n slaveConfig="cyclic redundancy"\\n numDataCopies=n\\n\\n[Full mode]\\n slaveConfig="full redundancy"\\n numDataCopies=n\\n\\nHowever, the overloaded configuration did not produce replication.\\n\\n[Overloaded mode]\\n slaveConfig="overloaded"\\n channelsPerNode=n\\n numDataCopies=n\", \"post_time\": \"2016-03-21 03:36:53\" },\n\t{ \"post_id\": 9362, \"topic_id\": 2194, \"forum_id\": 15, \"post_subject\": \"Re: How Roxie channels affect data/partition distribution?\", \"username\": \"chsu6\", \"post_text\": \"As mentioned before, here attached the two configuration files. The major difference is the channelsPerSlave parameter in Thor. Can this affect the replication behavior in Roxie?\\n\\n\\nHere is the diff result (diff new.xml old.xml):\\n\\n2c2\\n< <!-- Edited with ConfigMgr on ip xx.xx.xx.xx on 2016-03-20T17:48:03 -->\\n---\\n> <!-- Edited with ConfigMgr on ip xx.xx.xx.xx on 2016-02-17T13:36:03 -->\\n218a219,222\\n> <AuthenticateFeature description="Access to ESDL configuration service"\\n> path="ESDLConfigAccess"\\n> resource="ESDLConfigAccess"\\n> service="ws_esdlconfig"/>\\n644a649,653\\n> <AuthenticateFeature authenticate="Yes"\\n> description="Access to ESDL configuration service"\\n> path="ESDLConfigAccess"\\n> resource="ESDLConfigAccess"\\n> service="ws_esdlconfig"/>\\n800a810,813\\n> <AuthenticateFeature description="Access to ESDL configuration service"\\n> path="ESDLConfigAccess"\\n> resource="ESDLConfigAccess"\\n> service="ws_esdlconfig"/>\\n989c1002\\n< slaveConfig="cyclic redundancy"\\n---\\n> slaveConfig="cyclic"\\n1105a1119\\n> channelsPerSlave="1"\\n1114a1129\\n> localThorPortInc="200"\\n1120a1136\\n> slaveport="20100"\\n1130,1131c1146\\n< <Storage/>\\n< <SwapNode/>\\n---\\n> <SwapNode AutoSwapNode="false"/>\", \"post_time\": \"2016-03-20 21:05:35\" },\n\t{ \"post_id\": 9360, \"topic_id\": 2194, \"forum_id\": 15, \"post_subject\": \"Re: How Roxie channels affect data/partition distribution?\", \"username\": \"chsu6\", \"post_text\": \"Hi Bob and Richard,\\n\\nI have the chance to rerun the test and do a fresh install on the our cluster. I carefully follow the instruction and now the replication mechanism works as expected, as shown in Figure A and Figure B. Thanks a lot.\\n\\nI compare the new configuration (which works) and the old configuration. I found the major difference comes from channelsPerSlave in Thor. I attached the two configurations in another post due to attachment limitation.\\n\\nThanks,\\nChin-Jung\", \"post_time\": \"2016-03-20 20:55:51\" },\n\t{ \"post_id\": 9344, \"topic_id\": 2194, \"forum_id\": 15, \"post_subject\": \"Re: How Roxie channels affect data/partition distribution?\", \"username\": \"bforeman\", \"post_text\": \"Hi Chin-Jung,\\n\\nI am wondering if your configuration experiments might have changed the configuration to not create replication on your ROXIE. I just ran a test of my 2-node ROXIE training cluster and for a given index, this is what I see:\\n\\n\\nLooking at my configuration file on each node, I see:\\n\\ncyclicOffset="1"\\nnumChannels="2"\\nnumDataCopies="2"\\n\\nSo my cluster seems to be working as documented regarding replication. My server version is the latest 5.4.10-1\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2016-03-16 14:04:19\" },\n\t{ \"post_id\": 9336, \"topic_id\": 2194, \"forum_id\": 15, \"post_subject\": \"Re: How Roxie channels affect data/partition distribution?\", \"username\": \"chsu6\", \"post_text\": \"[quote="richardkchapman":3hcq76t1]You can get some insights into which slaves are retrieving what data using roxie control queries such as control:indexmetrics.\\n\\n\\nBTW, the information obtained from that command seems identical to that from ECL watch. \\n\\nThanks,\\nChin-Jung\", \"post_time\": \"2016-03-15 15:49:21\" },\n\t{ \"post_id\": 9334, \"topic_id\": 2194, \"forum_id\": 15, \"post_subject\": \"Re: How Roxie channels affect data/partition distribution?\", \"username\": \"chsu6\", \"post_text\": \"Thank you for your responses. It makes some things clear. But we are still unclear about a basic concept. Maybe we are asking a much simpler question than you think.\\n\\nThe data presented above was for Roxie cluster cyclic with number_of_data_copies = 2 and a 4-node Thor cluster with default config.\\n\\nFigure 4 shows the data distribution on a 4-node Thor cluster. It has 2 replicas (and appears to be cyclic).\\n\\nTable 1 shows the channel assignments (extracted from the logs) for our 4-node Roxie cluster. This is what we expected for cyclic with 2 copies.\\n\\nFigure 1, which shows the index distribution of our Roxie cluster, has no replicas of the index parts. We didn't expect this because it is contrary to the config.\\n\\nWe do not understand the relationship between index parts and channel assignments. Specifically:\\n\\nQ1: Are channels related to index parts? We thought that there was some correspondence between channels and index parts.\\n\\nQ2: Consider n1, which has index part 1 and channels 1 and 4. What queries can the slave process on n1 service?\\n\\nReally appreciate your time answering our questions.\\n\\nThanks,\\nChin-Jung\", \"post_time\": \"2016-03-15 15:47:05\" },\n\t{ \"post_id\": 9330, \"topic_id\": 2194, \"forum_id\": 15, \"post_subject\": \"Re: How Roxie channels affect data/partition distribution?\", \"username\": \"richardkchapman\", \"post_text\": \"Data partitioning is determined by the thor used to build the data - Roxie uses the partitioning information stored in the top level key to decide which channel(s) to communicate with to retrieve data. Each roxie channel will handle a number of index file parts, and each channel may be implemented by several roxie slave nodes.\\n\\nIndexes are not loaded into RAM on the slaves (they are too large) but are heavily cached.\\n\\nYou can get some insights into which slaves are retrieving what data using roxie control queries such as control:indexmetrics.\\n\\nRichard\", \"post_time\": \"2016-03-14 09:11:56\" },\n\t{ \"post_id\": 9328, \"topic_id\": 2194, \"forum_id\": 15, \"post_subject\": \"Re: How Roxie channels affect data/partition distribution?\", \"username\": \"chsu6\", \"post_text\": \"Hi Bob,\\n\\nThank you for quick reply, and detailed description. I made a mistake to upload the wrong Figure 1 and the correct one should be Figure 4 (as attached here). In Figure 1, I manually upload the DATA part to verify data distribution. In our case, DATA is not copied to the Roxie cluster as shown in Figure 4. Does this mean the INDEX has payload?\\n[attachment=1:3r6pg4wo]s1_data_on_thor.png\\n\\nWith the following configuration:\\n1. 4-node cluster\\n2. channel mode: cyclic (as in Figure 3 in my previous post)\\n3. numChannels: 5\\n4. numDataCopies: 2\\n\\nINDEX partition and distribution\\n[attachment=0:3r6pg4wo]Screen Shot 2016-03-11 at 10.24.14 AM.png\\n\\nQuestions\\nGiven a particular case in the following, how does Roxie select the slave process to handle a query? For example, when the server process on node 1 (n1) receives a Roxie query, the node n1 forwards this query to other slaves (via multicast channel in default setting). In this case, node 1 can communicate with only node 2 (via channel 1) and node 4 (via channel 4). What if the query involves INDEX partition i3 on node 3?\\n\\n\\nIt is still not clear to me about how it works: the multicast channels and data management in Roxie. I really appreciate your response.\", \"post_time\": \"2016-03-11 15:36:59\" },\n\t{ \"post_id\": 9326, \"topic_id\": 2194, \"forum_id\": 15, \"post_subject\": \"Re: How Roxie channels affect data/partition distribution?\", \"username\": \"bforeman\", \"post_text\": \"Hi Chin,\\n\\nLet me answer each question individually.\\n\\n
1. Does the channel mode (as described in Figure 3) affect how Roxie stores data? Our results do not indicate this relationship.\\n
\\nWhat you are looking at is the actual INDEX in Figure 2. Each ROXIE node divides the INDEX into four pieces, and also stores a meta-key (that's the 32K piece) on each node. Since your INDEX was a non-payload, or standard index, the DATA part that was copied to ROXIE is shown in Figure 1, and reflects what is described in Figure 3. \\n\\n2. How a Roxie server process picks the slave process to run the Roxie query? It seems partition layout is fixed (as shown in Figure 2). The multicast channel does not help in this case?
\\n\\nA copy of the query is stored on each ROXIE node. The load balancer built in to the ROXIE software decides which Farmer will process the query, and then the Farmer will decide which Agent to use to retrieve the result. If one is currently busy, the replicated channel can sometimes be used.\\n\\n3. Are the index files loaded to memory already? Does this mean I look the wrong place in my experiments?
\\n\\nThe way that I understand it, the index files indeed are loaded into RAM at the start of the query, and remain there for its life cycle.\\n\\n4. Is it possible to know which slave nodes handle a certain query? Is the statistics (taken from the output of a query) used for the server or slave process?
\\n\\nTo be honest, I have never needed to dig this deep into the specific nodes, but I would imagine that the Ganglia monitoring tool can give you that information.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2016-03-11 14:12:29\" },\n\t{ \"post_id\": 9324, \"topic_id\": 2194, \"forum_id\": 15, \"post_subject\": \"How Roxie channels affect data/partition distribution?\", \"username\": \"chsu6\", \"post_text\": \"Hi,\\n\\nI am a PhD student, and we are working on adding elasticity to Roxie. I am trying to understand how Roxie manage data. I setup a 4-node Thor cluster (not including the Thor master) and a 4-node Roxie cluster. To understand How Roxie works, I tried different channel mode (slaveConfig in environment.xml), including cyclic, overloaded and simple. I also tried different combinations of the following parameters.\\n\\nConfigurations\\nCyclic mode: numDataCopies=[1, 2, 4]\\nOverload mode: channelsPerNode=[1, 2, 4]\\nSimple mode: numDataCopies[1, 2, 4]\\n\\nRoxie Application\\nSix Degree from the official tutorial\\n\\nExperiment Steps\\n1. Setup Roxie against one configuration combination from above\\n2. Upload data to landing zone\\n3. Spray data to Thor (observed two replicas per data partition) - see Figure 1\\n4. Publish the query to Roxie (observed only one replica per index partition, but metadata file is replicated to all Roxie nodes) - see Figure 2\\n5. Repeat the same experiment with remaining configurations\\n\\nQuestions\\n1. Does the channel mode (as described in Figure 3) affect how Roxie stores data? Our results do not indicate this relationship.\\n2. How a Roxie server process picks the slave process to run the Roxie query? It seems partition layout is fixed (as shown in Figure 2). The multicast channel does not help in this case?\\n3. Are the index files loaded to memory already? Does this mean I look the wrong place in my experiments?\\n4. Is it possible to know which slave nodes handle a certain query? Is the statistics (taken from the output of a query) used for the server or slave process?\\n\\n<Statistic c="roxie"\\n count="1"\\n creator="myroxie@10.25.11.102"\\n desc="Graph graph1"\\n kind="TimeElapsed"\\n s="graph"\\n scope="graph1"\\n ts="1457679003666831"\\n unit="ns"\\n value="296060166"/>\\n <Statistic c="roxie"\\n count="1"\\n creator="myroxie@10.25.11.102"\\n kind="TimeElapsed"\\n s="global"\\n scope="workunit"\\n ts="1457679003666855"\\n unit="ns"\\n value="296060166"/>\\n <Statistic c="summary"\\n count="1"\\n creator="roxie"\\n desc="Total cluster time"\\n kind="TimeElapsed"\\n s="global"\\n scope="workunit"\\n ts="1457679003666871"\\n unit="ns"\\n value="296060166"/>\\n\\n\\nI would appreciate any feedback. Sorry for the very long questions.\\n\\n-chin\", \"post_time\": \"2016-03-11 06:59:58\" },\n\t{ \"post_id\": 13883, \"topic_id\": 2224, \"forum_id\": 15, \"post_subject\": \"Re: Handling Node Failure\", \"username\": \"nawazkhan\", \"post_text\": \"Hi\\n\\nCan someone help me to find the existing environment details, like how many support nodes, slave nodes of thor, thor slaves in each node are configured?\\n\\nThanks.\\n\\nRegards Nawaz\", \"post_time\": \"2016-12-22 07:03:14\" },\n\t{ \"post_id\": 13703, \"topic_id\": 2224, \"forum_id\": 15, \"post_subject\": \"Re: Handling Node Failure\", \"username\": \"nawazkhan\", \"post_text\": \"Thanks for the detailed explanation.\\nI have one more question. \\n\\nIs there any detailed document to understand the below step in better way. I have referred reference document it is not explained more. Is there any criteria for defining the nodes and slave nodes for roxie and thor? \\n\\nEnter\\tnumber of support nodes - What is it referring as support components?\\nNumber nodes for roxie cluster - \\nNumber of salve nodes for thor cluster - \\nNumber of thor slaves per node -\\n \\nRegards Nawaz\", \"post_time\": \"2016-12-07 14:14:38\" },\n\t{ \"post_id\": 13573, \"topic_id\": 2224, \"forum_id\": 15, \"post_subject\": \"Re: Handling Node Failure\", \"username\": \"fernando\", \"post_text\": \"1. Why can not the Thor cluster be up when a node fails?\\n\\nAll the nodes need to be up, if a node fails, the job will fail. Once the master or one of the slaves loses connection to the failed node, an **MP link closed** error will trigger a job abort.\\n\\nThe intent of the hpcc-mirror directory is to prevent data loss due to catastrophic RAID / disk failure. \\n\\nOnce the node is replaced and back online, the system will look for the file in the primary location ( hpcc-data ), then look for it in the replicate location (hpcc-mirror). \\n\\nnote: It is recommended after such an event to run the backupnode utility to restore the data. Additionally, best practices it to have run nightly via cron.\\n\\n2. Is it mandatory that we need to salvage the sprayed data manually by de spray?\\nJust wanted to understand how the data will be lost, because just we are doing configuration alone and there is nothing change with folders? how the sprayed files will be deleted.\\n\\nResizing or redefining the **width** of the thor will effectively break your dataset, as it was originally defined to have 8 parts. The metadata will not know where to find the missing data. \\n\\nTo clean the **bad data** you should be able to delete via the eclwatch interface.\\n\\nOr alternatively you can bring up a clean dali( basically lose all the metadata regarding files, workunits run etc), by renaming or deleting the "hpcc-data/dali" directory. The data on disk can be deleted using the "XREF" utility in the ECLWatch interface. They will show as files on disk that which are not part of the metadata in the dali system store. Alternatively, you can delete the hpcc-data directory on all the thorslaves. The directories will get recreated once the thor restarts.\\n\\nIf you choose to pick any of the **delete** options the system must be down.\\n\\nHpcc_Mirror:\\n\\nBy default and recommended setting, the write to the replicate location (hpcc-mirror) happens asynchronously, so after the write has happened to the primary location (hpcc-data) directory.\", \"post_time\": \"2016-12-01 15:49:42\" },\n\t{ \"post_id\": 13563, \"topic_id\": 2224, \"forum_id\": 15, \"post_subject\": \"Re: Handling Node Failure\", \"username\": \"nawazkhan\", \"post_text\": \"Hi bob,\\n\\nIs it mandatory that we need to salvage the sprayed data manually by de spray?\\nJust wanted to understand how the data will be lost, because just we are doing configuration alone and there is nothing change with folders? how the sprayed files will be deleted.\\n\\nHpcc_Mirror:\\nwhat is the frequency of this mirror happing or only during the write process alone? also why it is been storing as part1 and part2 as two copies?\\n\\nRegards Nawaz\", \"post_time\": \"2016-12-01 10:08:49\" },\n\t{ \"post_id\": 9482, \"topic_id\": 2224, \"forum_id\": 15, \"post_subject\": \"Re: Handling Node Failure\", \"username\": \"bforeman\", \"post_text\": \"Hi Ramesh,\\n\\nIf you wanted to salvage the data, it's important that you have replication on for the cluster configuration.\\n\\nIn that case, you would have the complete data set, because the missing files would be in the hpcc-mirror directory on the n+1 node.\\n\\nOutside of HPCC, you could certainly “stitch” the data set together. (getting the missing file parts from the mirror)\\n\\nAnd before redefining the environment together ( in this example for 8 to 7), I would try a despray back to the landing zone to salvage the data.\\n\\nBut once you redefine the cluster, it ( the system )thinks the datasets are composed of 7 file parts instead of 8 and "breaks".\\n\\nSo the important part is to salvage your data prior to changing the configuration.\\n\\nHope this helps!\\n\\nBob\", \"post_time\": \"2016-04-05 15:45:32\" },\n\t{ \"post_id\": 9478, \"topic_id\": 2224, \"forum_id\": 15, \"post_subject\": \"Re: Handling Node Failure\", \"username\": \"rameshpachamuthu\", \"post_text\": \"Hi Bob,\\n\\nThanks for your response.\\n\\nActually we had a 8 node cluster setup for learning purpose.\\n\\nRecently one of nodes went down and was not in a state to get repaired. Since it is a cluster for learning, we did not want to replace it with a new node. Finally We were left with the option to bring the size of the cluster to 7 nodes. so We had modified environment.xml for 7 node cluster setup and which led to the loss of data present in the cluster. We were fine with the data loss because it is the cluster for learning. \\n\\nWe want to know the right approach that could have followed to avoid the data loss when repairing or replacing the failed node option is ruled out.\\n\\nKindly share your thoughts. \\n\\nRegards,\\nRamesh\", \"post_time\": \"2016-04-05 09:39:15\" },\n\t{ \"post_id\": 9456, \"topic_id\": 2224, \"forum_id\": 15, \"post_subject\": \"Re: Handling Node Failure\", \"username\": \"bforeman\", \"post_text\": \"Hi Ramesh,\\n\\n1. Why can not the Thor cluster be up even if a node fails?
\\nThe way I understand it, if a node drops out in the middle of a job, the job will try to complete using the replicated node. After that, you would then need to replace the node.\\n\\n What should we do, if the failed node goes to an unrecoverable state?
\\nBecause THOR is the development cluster, the best practice is to take the cluster down, replace or repair the node, and then restart the cluster. I have been told that a "hot swap" (replacing a node while the cluster is still running) can be done but it is just safer to stop the cluster and replace after that.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2016-03-31 16:11:36\" },\n\t{ \"post_id\": 9424, \"topic_id\": 2224, \"forum_id\": 15, \"post_subject\": \"Handling Node Failure\", \"username\": \"rameshpachamuthu\", \"post_text\": \"Hi,\\n\\nIt is said that the node failures in HPCC cluster are handled by replicating data in other nodes. I assumed that even if a node goes down, the cluster will still be up. \\n\\nBut what I understood recently is that, if a node goes down then the Thor cluster as whole also goes down.\\n\\nSo my questions are,\\n\\t1. Why can not the Thor cluster be up even if a node fails?\\n\\t2. What should we do, if the failed node goes to an unrecoverable state?\\n\\nThanks,\\nRamesh\", \"post_time\": \"2016-03-30 05:38:38\" },\n\t{ \"post_id\": 19123, \"topic_id\": 2226, \"forum_id\": 15, \"post_subject\": \"Re: Increasing or Decreasing the cluster size\", \"username\": \"rtaylor\", \"post_text\": \"Jim,\\n\\nFYI, this code DOES run on my 6.4, 3-node training cluster:OUTPUT(ds,,'~RTTEST::test::fileparts2',CLUSTER('mythor[1-2]'));
\\nIt DOES limit the number of nodes the data is written to, but it does NOT change the total number of file parts written. IOW, on my 3-node cluster I get file part 1 on node 1, and parts 2 & 3 both on node 2. \\n\\nThat means, running the above code on a 400-node cluster would put file part 1 on node 1, and all the rest of the parts on node 2 (possibly running out of disk space). \\n\\nTherefore I have to score this workaround as "works, but dangerous" \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-09-27 13:46:52\" },\n\t{ \"post_id\": 19103, \"topic_id\": 2226, \"forum_id\": 15, \"post_subject\": \"Re: Increasing or Decreasing the cluster size\", \"username\": \"jwilt\", \"post_text\": \"Just happened on this - \\nFor future reference, this ECL will write a dataset to only selected nodes in a cluster:\\nOUTPUT(ds, , '~thor::myfile', CLUSTER('thor[1-4]'));\\n\\nI.e., the file could be re-written to only the first 4 nodes, before node 5 is removed.\", \"post_time\": \"2017-09-26 21:23:35\" },\n\t{ \"post_id\": 9452, \"topic_id\": 2226, \"forum_id\": 15, \"post_subject\": \"Re: Increasing or Decreasing the cluster size\", \"username\": \"rameshpachamuthu\", \"post_text\": \"Hi Richard,\\n\\nThanks for your response. It is helpful.\\n\\nRegards,\\nRamesh\", \"post_time\": \"2016-03-31 04:17:17\" },\n\t{ \"post_id\": 9428, \"topic_id\": 2226, \"forum_id\": 15, \"post_subject\": \"Re: Increasing or Decreasing the cluster size\", \"username\": \"rtaylor\", \"post_text\": \"Ramesh,
I like to know, how to perform below operations in a existing HPCC cluster without data loss.
HPCC clusters are always pre-configured, so changing the number of nodes always implies bringing the cluster down, re-configuring, then bringing it back up in the new configuration.1. Add a node
Since HPCC data files are always distributed across the nodes and the DFU keeps track of where all the data file parts are, changing a 4-node cluster to a 5-node simply means adding the new node. The new configuration will still have all the data (as 4 file parts instead of 5) and all you need to do is use the files on the new configuration, redistributing the data to make use of all 5 nodes.2. Remove a node
This is the tough one. You would need to despray the data, reconfigure your cluster, then spray the files again to the new configuration.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-03-30 17:35:27\" },\n\t{ \"post_id\": 9426, \"topic_id\": 2226, \"forum_id\": 15, \"post_subject\": \"Increasing or Decreasing the cluster size\", \"username\": \"rameshpachamuthu\", \"post_text\": \"Hi,\\n\\nI like to know, how to perform below operations in a existing HPCC cluster without data loss.\\n1. Add a node\\n2. Remove a node\\n\\nThanks,\\nRamesh\", \"post_time\": \"2016-03-30 05:56:46\" },\n\t{ \"post_id\": 12563, \"topic_id\": 3143, \"forum_id\": 15, \"post_subject\": \"HPCC cluster fault tolerance\", \"username\": \"rqg0717\", \"post_text\": \"Dear all,\\n\\nCan THOR nodes be removed from the HPCC cluster dynamically and HPCC THOR or ROXIE continues to operate without interruption please? I have configured a cluster with 5 THOR slave nodes. This morning I lost one node due to the hardware failure and I realized that I am not able to query or read logical files from THOR. I was wondering if I could recover the logical files. Please advise. Thank you.\\n\\nSincerely,\\nJames\", \"post_time\": \"2016-10-27 14:47:51\" },\n\t{ \"post_id\": 21693, \"topic_id\": 3403, \"forum_id\": 15, \"post_subject\": \"Re: Mythor failing to start\", \"username\": \"jsmith\", \"post_text\": \"It shouldn't be anything to do with moving the storage directories, but it sounds like you've lost (or moved) some of the installation files (was /etc/HPCCSystems relocated?) \\n\\ni.e. /etc/HPCCSystems/environment.conf must be present and it's owner and group should be 'hpcc'.\\n\\nFrom the error you've pasted, it looks like it's now missing on either the master and/or some of the slave machines. OR in theory it (the scripts running as user hpcc) no longer have rights (permissions) to access it...\\n\\nI would look at the master and all slave nodes and look to see 1st if this file is present everywhere, and if user hpcc can access it.\", \"post_time\": \"2018-04-17 16:03:46\" },\n\t{ \"post_id\": 21661, \"topic_id\": 3403, \"forum_id\": 15, \"post_subject\": \"Re: Mythor failing to start\", \"username\": \"eprado22\", \"post_text\": \"My Logs\\n\\n[hpcc@nodoa mythor]$ vi init_mythor_2018_04_13_20_11_31.log\\n2018_04_13_20_11_31: Starting mythor\\n2018_04_13_20_11_31: removing any previous sentinel file\\n2018_04_13_20_11_31: Ensuring a clean working environment ...\\n2018_04_13_20_11_31: Killing slaves\\n2018_04_13_20_11_31: --------------------------\\n2018_04_13_20_11_31: starting thorslaves ...\\n2018_04_13_20_11_32: Error 255 in frunssh\\n2018_04_13_20_11_32: Please check /var/log/HPCCSystems/frunssh for more details\\n2018_04_13_20_11_33: Stopping mythor\\n2018_04_13_20_11_33: mythor Stopped\\n2018_04_13_20_11_33: Killing slaves\\n2018_04_13_20_11_34: Frunssh successful\\n2018_04_13_20_11_34: removing init.pid file and slaves file\\n\\n\\nvi frunssh.2018_04_13.log\\n\\n1: ssh(0): STDERR: cat: /etc/HPCCSystems/environment.conf: No existe el fichero o el directorio\\ncat: /etc/HPCCSystems/environment.conf: No existe el fichero o el directorio\\ncat: /etc/HPCCSystems/environment.conf: No existe el fichero o el directorio\\n/opt/HPCCSystems/etc/init.d/hpcc_common: línea 266: cfg.section.DEFAULT: no se encontró la orden\\nunable to write to /var/log/HPCCSystems/mythor/init_thorslave_mythor_2018_04_13_20_11_31.log\\n2: ssh(0): STDERR: cat: /etc/HPCCSystems/environment.conf: No existe el fichero o el directorio\\ncat: /etc/HPCCSystems/environment.conf: No existe el fichero o el directorio\\ncat: /etc/HPCCSystems/environment.conf: No existe el fichero o el directorio\\n/opt/HPCCSystems/etc/init.d/hpcc_common: línea 266: cfg.section.DEFAULT: no se encontró la orden\\nunable to write to /var/log/HPCCSystems/mythor/init_thorslave_mythor_2018_04_13_20_11_31.log\", \"post_time\": \"2018-04-14 01:33:57\" },\n\t{ \"post_id\": 21651, \"topic_id\": 3403, \"forum_id\": 15, \"post_subject\": \"Re: Mythor failing to start\", \"username\": \"eprado22\", \"post_text\": \"Hi,\\nI have the same problem, any solution for this?\\n\\nThanks\", \"post_time\": \"2018-04-14 00:58:53\" },\n\t{ \"post_id\": 13403, \"topic_id\": 3403, \"forum_id\": 15, \"post_subject\": \"Mythor failing to start\", \"username\": \"bbrown57\", \"post_text\": \"I recently had to change a local storage location on my compute slaves to a remote nfs share due to space issues. After doing this, mythor fails to start no matter what I do. \\nThe mythor init log looks like this:\\n# cat init_mythor_2016_11_28_10_59_29.log\\n2016-11-28T15:59:29: Starting mythor\\n2016-11-28T15:59:29: removing any previous sentinel file\\n2016-11-28T15:59:29: Ensuring a clean working environment ...\\n2016-11-28T15:59:29: Killing slaves\\n2016-11-28T15:59:29: Error 255 in frunssh\\n2016-11-28T15:59:29: Please check /var/log/HPCCSystems/frunssh for more details\\n2016-11-28T15:59:29: Stopping mythor\\n\\nShort of rebuilding my cluster, I'm not sure what to do at this point. The location that was remapped to an nfs share is /var/lib/HPCCSystems/hpcc-data.\", \"post_time\": \"2016-11-28 16:05:01\" },\n\t{ \"post_id\": 21631, \"topic_id\": 5491, \"forum_id\": 15, \"post_subject\": \"Re: Node-wise Workload Distribution\", \"username\": \"rsghatpa\", \"post_text\": \"You are spot on @RTaylor.\\n\\nI have changed my target to thor and now I see different metrics. Thanks for the help.\", \"post_time\": \"2018-04-11 16:14:20\" },\n\t{ \"post_id\": 21621, \"topic_id\": 5491, \"forum_id\": 15, \"post_subject\": \"Re: Node-wise Workload Distribution\", \"username\": \"rtaylor\", \"post_text\": \"rsghatpa,\\n\\nECL Agent is also known as hThor. So without seeing your actual code, this sounds like the job is running on hThor, not Thor. \\n\\nEven if you target your Thor cluster, ECL Agent will "hijack" jobs when the code is really simple. If you have targeted your Thor, and think your code is complex enough that it should be running on Thor, then please post it.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-04-11 13:29:23\" },\n\t{ \"post_id\": 21611, \"topic_id\": 5491, \"forum_id\": 15, \"post_subject\": \"Node-wise Workload Distribution\", \"username\": \"rsghatpa\", \"post_text\": \"Hi guys, I have 1 master node and 2 slave nodes.\\n\\nProcesses running on Master :\\nmydafilesrv ( pid 2252 ) is running ...\\nmyeclagent ( pid 3533 ) is running ...\\nmyesp ( pid 5159 ) is running ...\\nmysasha ( pid 6796 ) is running ...\\nmythor ( pid 21232 ) is running with 2 slave process(es) ...\\n\\nProcesses running on Slave :\\nmydafilesrv ( pid 2096 ) is running ...\\nmydali ( pid 3349 ) is running ...\\nmyeclccserver ( pid 4779 ) is running ...\\n\\nInitially, the CPU utilization for all 3 nodes is 0% (100% idle state). \\n\\nWhen I start running a dataGeneration ECL script, the CPU utilization of master node reaches 40-50% however CPU utilization on Slaves continues to remain 0%. \\nEven the network utilization on all 3 nodes is similar and the disk utilization for just the master seems very high.\\n\\nI am confused. I expected the slave processes to be doing all the data generation and writing it to the disk work. But low-level metrics don't indicate that. Can someone please share some insight on the same?\\n\\nOnly HPCC related user processes are running on these 3 instances.\\nI am consistently seeing eclagent to be the top process on master.\\nI am periodically seeing daserver & thorslave to be the top process on slave\", \"post_time\": \"2018-04-11 12:31:14\" },\n\t{ \"post_id\": 21793, \"topic_id\": 5493, \"forum_id\": 15, \"post_subject\": \"Re: Dfuplus fails to spray files\", \"username\": \"tlhumphrey2\", \"post_text\": \"rsghatpa,\\n\\nGlad to hear it worked. Did you basically do the dfuplus command as I show it above?\\n\\nTim\", \"post_time\": \"2018-04-30 15:01:17\" },\n\t{ \"post_id\": 21783, \"topic_id\": 5493, \"forum_id\": 15, \"post_subject\": \"Re: Dfuplus fails to spray files\", \"username\": \"rsghatpa\", \"post_text\": \"Hi Tim, it worked. I changed my dropzone location. Added my files to it. And using ECL Watch did the necessary spray jobs.\", \"post_time\": \"2018-04-30 14:22:22\" },\n\t{ \"post_id\": 21773, \"topic_id\": 5493, \"forum_id\": 15, \"post_subject\": \"Re: Dfuplus fails to spray files\", \"username\": \"tlhumphrey2\", \"post_text\": \"rsghatpa,\\n\\nLet us know if you got your spray to work and how, please.\\n\\nTim\", \"post_time\": \"2018-04-24 15:03:45\" },\n\t{ \"post_id\": 21763, \"topic_id\": 5493, \"forum_id\": 15, \"post_subject\": \"Re: Dfuplus fails to spray files\", \"username\": \"tlhumphrey2\", \"post_text\": \"From a linux (actually it was my master instance) I placed a file in my home directory, /home/ubuntu, and was able to spray it with the following:\\nsudo dfuplus action=spray srcip=10.106.73.212 \\\\\\nsrcfile=/home/ubuntu/myfile_head4.csv \\\\\\ndstname=tlh::myfile_head4 dstcluster=mythor \\\\\\nserver=http://10.106.73.212 format=csv\\n
\", \"post_time\": \"2018-04-23 18:00:29\" },\n\t{ \"post_id\": 21753, \"topic_id\": 5493, \"forum_id\": 15, \"post_subject\": \"Re: Dfuplus fails to spray files\", \"username\": \"tlhumphrey2\", \"post_text\": \"First, take the port number off the server. Second, use the IP addresses found on your ecl watch under the Operations icon then click on System Servers.\", \"post_time\": \"2018-04-23 17:43:12\" },\n\t{ \"post_id\": 21743, \"topic_id\": 5493, \"forum_id\": 15, \"post_subject\": \"Re: Dfuplus fails to spray files\", \"username\": \"JimD\", \"post_text\": \"rsghatpa,\\n\\nYou can only spray from locations that are "registered drop zones" in your environment. \\n\\nYou can either move the file(s) to your drop zone or reconfigure your environment so that \\nmnt/var/lib/HPCCSystems/dataset/kmeans_30GB/ on 172.31.45.14 is a registered drop zone. \\n\\nHTH,\\n\\nJim\", \"post_time\": \"2018-04-23 16:56:48\" },\n\t{ \"post_id\": 21733, \"topic_id\": 5493, \"forum_id\": 15, \"post_subject\": \"Dfuplus fails to spray files\", \"username\": \"rsghatpa\", \"post_text\": \"I have been trying to spray data using dfuplus command.\\n\\nHere are the details of my HPCC setup.\\n\\nMaster IP Address :172.31.45.14\\nSlave 1 IP Address : 172.31.33.152 \\nSlave 2 IP Address : 172.31.42.187\\n\\nOn my master node, I have 30 GB of kmeans related dataset in csv format spread across 30 files.\\nThe directory is : /mnt/var/lib/HPCCSystems/dataset/kmeans_30GB/\\n\\nUsing the dfuplus command, I am trying to spray the same data across my slave nodes. \\n\\nHere is the command I am running for an individual file.\\n\\n$ sudo dfuplus action=spray srcip=172.31.45.14 srcfile=/mnt/var/lib/HPCCSystems/dataset/kmeans_30GB/file1 dstname=kmeans::dataset::file1 dstcluster=mythor server=http://172.31.45.14:8010 format=csv\\n\\nChecking for local Dali File Server on port 7100\\n\\nVariable spraying from /mnt/var/lib/HPCCSystems/dataset/kmeans_30GB/file1 on 172.31.45.14:7100 to kmeans::dataset::file1\\nSubmitted WUID D20180423-152513\\nD20180423-152513 status: queued\\nFailed: No Drop Zone on '172.31.45.14' configured at '/mnt/var/lib/HPCCSystems/dataset/kmeans_30GB/file1'.\\n\\n\\nThe job fails stating that\\nNo Drop Zone on '172.31.45.14' configured at '/mnt/var/lib/HPCCSystems/dataset/kmeans_30GB/file1'.\\n\\nMy ECL Watch console shows a single mydropzone entry of 172.31.33.152.\", \"post_time\": \"2018-04-23 15:37:30\" },\n\t{ \"post_id\": 21863, \"topic_id\": 5563, \"forum_id\": 15, \"post_subject\": \"Re: CLI to manage cluster\", \"username\": \"ming\", \"post_text\": \"This is very good.\\nAlso we will provide generic tools to deploy HPCC cluster\\n1) Docker Compose. \\n2) Kubernetes. \\n 2.1) local Virtualbox with minikube\\n 2.2) AWS\\n 2.3) Google Computing Engine if we have the environment\\n3) Juju charm\\n 3.1) local Linux\\n 3.2) AWS\\n 3.3) Azure if we have the environment\\n 3.4) Google Computing Engine if we have the environment \\n\\nWe have all of three in the past. But need some re-work and update.\", \"post_time\": \"2018-05-07 19:23:26\" },\n\t{ \"post_id\": 21853, \"topic_id\": 5563, \"forum_id\": 15, \"post_subject\": \"Re: CLI to manage cluster\", \"username\": \"tlhumphrey2\", \"post_text\": \"I would be interested in your CLI. Do you have the code for it on github?\", \"post_time\": \"2018-05-07 12:13:59\" },\n\t{ \"post_id\": 21843, \"topic_id\": 5563, \"forum_id\": 15, \"post_subject\": \"Re: CLI to manage cluster\", \"username\": \"lpezet\", \"post_text\": \"Here's what a configuration file might look like:\\n\\n---\\nAWS:\\n Profile: someprofile\\n Region: us-east-1\\n # run: "aws iam get-user" to get your username\\n Username: first.last\\n S3Bucket: bucket-where-cluster-files-will-be-uploaded\\n# If true, most operations won't trigger any AWS action, or will trigger AWS actions with "DryRun" parameter.\\nDryRun: true\\n\\n# Used for notifications\\nEmail: 'youremail@domain.com'\\n\\nVpc:\\n # default or dedicated\\n Tenancy: default\\n # form of x.x.x.x/xx\\n CidrBlock: 192.168.0.10/24\\n # usually in the form of subnet-xxxxxxxx\\n SubnetId: subnet-12345678\\n # usually in the form of sg-xxxxxxxx\\n SecurityGroupId: sg-12345678\\n\\nCluster:\\n # 0-45\\n Slaves: 1\\n # usually 1 or 2\\n Supports: 1\\n # Name of cluster\\n Name: hpccv3-fun-name-auto-generated\\n \\n# Default settings applied to all type of instances (Master, Support, and Slave).\\nInstance:\\n # Key Pair name from the region. Will be imported into instance and used when ssh into.\\n KeyName: hpcc-cluster \\n # Depends on region. Best is to either check AWS documentation or try to create a simple instance with AWS Console and see the image id being used.\\n # Some hints for now: us-east-1 (a) : ami-0b33d91d, us-east-2 : ami-c55673a0\\n ImageId: ami-0b33d91d\\n # Role to be used by EC2 instances. This is importat as resources will be downloaded using "aws" cli, which will use this role for permissions.\\n IamRole: hpcc-cluster\\n # Valid EC2 Instance Type for region AND availability zone (!!!). Check AWS documentation for list of valid types.\\n Type: m5.xlarge\\n Volumes:\\n - # required\\n DeviceName: /dev/xvdf\\n # required if type != ephemeral. Size in GB\\n Size: 1\\n # optional, defaults to gp2. Valid EBS Volume Type: gp2, io1, st1, etc. Check AWS documentation for exhaustive and up-to-date list of valid values.\\n # ephemeral is also possible for instance store.\\n Type: gp2\\n # optional, defaults to ext4. Values depend on OS but most supported ones are ext2, ext3, ext4, and xfs.\\n FSType: ext4\\n # optional, defaults to false. Either true or false\\n Encrypted: true\\n # required for io1 type volume.\\n #Iops: 1000\\n # optional. Where the device will be mounted.\\n Mount: /volumes/vol-data\\n # optional. Will create a symbolic link to the mount\\n MapsTo: /var/lib/HPCCSystems/hpcc-data\\n # optional. If part of an array, specify array device name\\n #RaidDeviceName: /dev/md0\\n # RAID devices\\n # NB: RAIDs on ephemeral devices has not been tested yet.\\n Raids:\\n - # required\\n DeviceName: /dev/md0\\n # required\\n Name: MyRaid0\\n # required\\n Level: 0\\n # optional, defaults to ext4. Values depend on OS but most supported ones are ext2, ext3, ext4, and xfs.\\n FSType: ext4\\n # required. Where the device will be mounted.\\n Mount: /volumes/vol-data\\n # optional. Will create a symbolic link to the mount\\n MapsTo: /var/lib/HPCCSystems/hpcc-data\\n \\nMasterInstance:\\n Volumes:\\n - DeviceName: /dev/xvdf\\n Size: 10\\n Type: gp2\\n Encrypted: true\\n Mount: /volumes/vol-data\\n MapsTo: /var/lib/HPCCSystems/hpcc-data\\n - DeviceName: /dev/xvdz\\n Type: gp2\\n # Specify SnapshotId only or Size and Encrypted for new volume\\n Size: 10\\n Encrypted: true\\n #SnapshotId: snap-12345678901234567\\n Mount: /volumes/vol-dropzone\\n MapsTo: /var/lib/HPCCSystems/mydropzone\\n \\nSlaveInstance:\\n # could be of a different type\\n Type: m5.xlarge\\n Volumes:\\n - DeviceName: /dev/xvdf\\n Size: 80\\n Type: gp2\\n Encrypted: true\\n Mount: /volumes/vol-data\\n MapsTo: /var/lib/HPCCSystems/hpcc-data\\n - DeviceName: /dev/xvdg\\n Size: 50\\n Type: gp2\\n Encrypted: true\\n Mount: /volumes/vol-thor\\n MapsTo: /var/lib/HPCCSystems/mythor\\n - DeviceName: /dev/xvdh\\n Size: 10\\n Type: gp2\\n Encrypted: true\\n Mount: /volumes/vol-hpcc-mirror\\n MapsTo: /var/lib/HPCCSystems/hpcc-mirror\\n\\nSupportInstance:\\n # could be of a different type\\n Type: m5.xlarge\\n Volumes:\\n - DeviceName: /dev/xvdf\\n Size: 80\\n Type: gp2\\n Encrypted: true\\n Mount: /volumes/vol-data\\n MapsTo: /var/lib/HPCCSystems/hpcc-data\\n
\", \"post_time\": \"2018-05-04 21:21:01\" },\n\t{ \"post_id\": 21833, \"topic_id\": 5563, \"forum_id\": 15, \"post_subject\": \"CLI to manage cluster\", \"username\": \"lpezet\", \"post_text\": \"Hello!\\n\\nNot sure where to post this...\\nI was wondering if anyone from the community feels like a CLI might be useful to ease creation, sharing and administration of HPCC Systems Clusters (be it Thor or Roxie or both)?\\nThink "vagrant", like vagrant init to initialize configuration file(s) for cluster to be brought up, and vagrant up to create actual cluster according to specs.\\n\\nI am aware of the https://aws.hpccsystems.com/ and templates from Timothy Humphrey (tlhumphrey2?) on github (https://github.com/tlhumphrey2/EasyFastHPCCoAWS).\\nThis CLI is inspired by both and I mostly use it to quickly boot up clusters (once one has a configuration file, it's fast to copy it over and tweak it) and run ECL code. Right now it only works with AWS.\\n\\nOne big benefit here is that I can commit those cluster configuration files for someone else to checkout and/or copy, and easily boot similar cluster and tweak as needed.\\nI personally use it extensively to run benchmarks and figure out best configuration within AWS (EBS volumes are everything but AWS-ome).\\n\\nIn a nutshell:\\n\\nmkdir mycluster\\ncd mycluster\\n# initializes the current directory to be an HPCC Systems environment by creating an initial cluster.config file if one does not already exist\\nhpcc-cluster init\\n# configure cluster\\nvi cluster.config\\n# (optional) validate configuration\\nhpcc-init validate\\n# create cluster\\nhpcc-cluster up\\n# start configmgr. Will open browser to http://....:8015/ page if started successfully\\nhpcc-cluster configmgr\\n# stop configmgr\\nhpcc-cluster configmgr\\n# first make sure hpcc-init stopped, then copy environment.xml file to all nodes\\nhpcc-cluster hpcc-init update\\n# now start cluster\\nhpcc-cluster hpcc-init start\\n\\n# do stuff...\\n\\n# stop cluster cause it's time to go to bed and I'd be wasting money just leaving it up and running...\\nhpcc-cluster halt\\n\\n# after good long night's sleep, start cluster back:\\nhpcc-cluster resume\\n
\\n\\nAnother thing is does is use aliases for nodes to help target them. \\nFor example, to ssh into the 6th slave (using consecutive private IP addresses):\\nhpcc-cluster ssh @slave005
\\n\\nTo simply ssh into the Master node:\\nhpcc-cluster ssh
\\n\\nTo open ECL Watch page:\\nhpcc-cluster eclwatch
\\n\\n\\nHere's the help page from this cli:\\n\\n Usage: hpcc-cluster [options] [command]\\n\\n For manual, use man hpcc-cluster\\n\\n Options:\\n\\n -V, --version output the version number\\n -d, --debug <level> Specify log level (default: info)\\n -p, --profile <profile> Specify AWS Configuration Profile to use.\\n -r, --region <region> Specify AWS region to use (default: us-east-1)\\n -h, --help output usage information\\n\\n Commands:\\n\\n init [options] Initialize cluster configuration.\\n up|create Create new cluster or Update existing cluster based on configuration.\\n validate Validate template using cluster configuration. This is mostly for debugging purposes when updating the cluster template/configuration.\\n resume Resume cluster previously halted.\\n halt Halt current cluster. Cluster can be resumed thereafter.\\n destroy|terminate Destroy current cluster. Cluster CAN NOT be stopped nor resumed thereafter.\\n status Display status of current cluster.\\n help Display help.\\n estimate Estimate the costs of your current configuration.\\n eclwatch Open ECL Watch page.\\n run [options] <target> <cmd> Run command in target(s). Example: run slave* "sudo resize2fs /dev/xvdf".\\n hpcc-init <cmd> [ip_or_node] HPCC Cluster itself. Possible commands: start, stop, restart, status, stopAll (stops dafilesrv as well) and update which stops cluster, copy source/environment.xml file and push to all nodes.\\n configmgr [ip_or_node] Start/Stop HPCC Config Manager\\n ssh [ip_or_node] SSH into node of current cluster\\n scp <source> <target> SCP files from/to node. Just prefix remote with ":", like "scp local_file.txt @slave000:/tmp/remote_file.txt".\\n
\\n\\nIt doesn't show here but there's a lot of things happening behind the scene. For example, one can specify an ephemeral volume to be used on a node (say for mythor temporary/spill files in slaves) but those volumes need to be "re-setup" after stop/start of instance, and not when doing a soft reboot. \\nIt's just not as easy as putting those in /etc/fstab, especially when one wants to use those in a RAID setting (which this tool supports).\\n\\nAnyone interested in such CLI?\\nIf there's enough interest I'll open source it.\\n\\nThanks!\", \"post_time\": \"2018-05-04 21:20:10\" },\n\t{ \"post_id\": 32633, \"topic_id\": 8523, \"forum_id\": 15, \"post_subject\": \"Re: Multinode Roxie Setup\", \"username\": \"rtaylor\", \"post_text\": \"Antony,\\n\\nLet me start with the general questions:Server – what is the definition of a Roxie Server
In general, a multi-node ROXIE has n nodes (physical or virtual) where each node runs both a "Server" process and an "Agent" (slave) process. The Server process handles the queries themselves (each node in the ROXIE can handle all queries published to that ROXIE). The Agent (slave) process handles all the data access for the queries.\\nSlave – what is the definition of a Roxie Slave
The Agent (slave) process handles all the data access for the queries. In a multi-node ROXIE, the data is distributed across all the nodes in "channels" so that a single piece of data exists only in a single channel. So each node of the ROXIE contains only a portion of all the data, and access to that portion is controlled by the Agent (slave) processes.Redundancy – can you explain in more detail what each of these redundancy modes do?
Each data channel contains a portion of the data. The data for a single channel is duplicated on two (or more) nodes, and the Agent (slave) processes on those nodes determines which actual node delivers the data for each request (the less busy node usually wins that battle). The most common form of Redundancy is "Cyclic" wherein a single piece of data goes to one channel -- and the channel stores its data on both nodes n and n+1. \\n\\nSo, here's an example, assuming I have a 12-node Thor:
\\nThat is a brief overview of how a multi-node ROXIE operates. I'll let others chime in with responses to your configuration questions.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-12-09 19:56:36\" },\n\t{ \"post_id\": 32623, \"topic_id\": 8523, \"forum_id\": 15, \"post_subject\": \"Multinode Roxie Setup\", \"username\": \"amillar\", \"post_text\": \"Hi There,\\n\\nI am looking for some help configuring a multi Node Roxie, we have been using HPCC for many years now and have always deployed multiple single node Roxies and used our Firewall as the load balancer.\\n\\nI am now looking at configuring Roxies more efficiently but have a few queries based on the documentation and working through the config manager.\\n\\nSet-up : \\n\\nTHOR Cluster :\\n\\n16x Physical Servers configured with multiple Thor Instances and queues :\\n\\nTHOR_L1 – 144 Nodes (12 Physical machines – configured with 12 Slaves Per Node)\\nTHOR_L2 - 144 Nodes (12 Physical machines– configured with 12 Slaves Per Node))\\n\\nTHOR_S1 – 36 Nodes (3 physical Machines– configured with 12 Slaves Per Node)) \\nTHOR_S2 – 36 Nodes (3 physical Machines– configured with 12 Slaves Per Node)) \\n\\nThor Master – one Physical machine for the above.\\n\\nThese Thor Clusters will publish to the new Roxies once configured : \\n\\nMulti-Node Roxie (plan)\\n\\n3 x Physical Machines\\n3 x Physical Machines\\n\\nBehind a load balancer using Round Robin.\\n\\nQuestions : \\n\\nRoxie Cluster : \\n\\nServer – what is the definition of a Roxie Server – my understanding is it will be the server that hosts the relevant services and endpoints for applications to talk to e.g. ECL Watch, WsECL, Dali etc? is this correct?\\n\\nSlave – what is the definition of a Roxie Slave – my understanding is these are physical computers that host the data, perform lookups etc (similar to a THOR slave) is this correct?\\n\\nRedundancy – can you explain in more detail what each of these redundancy modes do? Do you have some real world examples where each would be applicable?\\n\\n• Simple Redundancy - One channel per slave. Most commonly used for a single node Roxie.\\n• Full Redundancy - More slaves than the number of channels. Multiple slaves host each channel.\\n• Overloaded Redundancy - There are multiple channels per slave.\\n• Cyclic Redundancy - Each node hosts multiple channels in rotation. The most commonly used configuration.\\n\\nAfter reading through all of the documentation and cannot seem to see where you would configure the following : \\n\\nFor maximum performance, you should configure your cluster so slave nodes perform most jobs in memory. \\n\\n where in the config manager are these settings?\\n\\nFor maximum performance, you should configure your cluster so slave nodes perform most jobs in memory. For example, if a query uses three data files with a combined file size of 60 GB, a 40-channel cluster is a good size, while a 60-channel is probably better. \\n\\n – how do I configure this to suit my set-up? Am I right in thinking a “Channel” is a disk on the Physical Server? So in order to get 60 channels I would need 30 machines configured with two hard disks in each? How do are those hard disks then configured for Roxie?\\n\\nAnother consideration is the size of the Thor cluster creating the data files and index files to be loaded. Your target Roxie cluster should be the same size as the Thor on which the data and index files are created or a number evenly divisible by the size of your Roxie cluster. For example, a 100-way Thor to a 20-way Roxie would be acceptable.\\n\\n-\\tWe have a 144 Node Thor, and 6 Physical machines to use as Roxies, the plan here would be to set-up 2 X 3 node Roxies ( 1 Server + 2 Slaves) and put these behind an external load balancer - is the Divisible number the total number of nodes e.g. 3? Or the total number of Slave nodes? E.g 2? - I appreciate this needs to be correct so each node gets an equal distribution of data. \\n\\nThe final consideration is the number of Server processes in a cluster. Each slave must also be a Server, but you can dedicate additional nodes to be only Server processes. This is useful for queries that require processing on the Server after results are returned from slaves. Those Server-intensive queries could be sent only to dedicated Server IP addresses so the load is removed from nodes acting as both Server and slave.\\n\\n-\\tI have set all three in Config manager as Servers under Roxie Cluster – myroxie – Servers tab. but how do a set a server as a server only? Or a slave only? \\n\\nnode020025\\n192.168.20.25\\nmytoposerver myroxie mydali mydfuserver myeclccserver myesp myeclagent myftslave mysasha mydafilesrv myeclscheduler\\n\\nnode020026\\n192.168.20.26\\nmyroxie myftslave mydafilesrv\\n\\nnode020027\\n192.168.20.27\\nmyroxie myftslave mydafilesrv\\n\\nThe documentation also goes on to say :\\n\\nThe most typical scenario for HPCC Systems is utilizing it with a high volume of data. This suggested sample sizing would be appropriate for a site with large volumes of data. A good policy is to set the Thor size to 4 times the source data on your HPCC Systems. Typically, Roxie would be about 1/4 the size of Thor. This is because the data is compressed and the system does not hold any transient data in Roxie. Remember that you do not want the number of Roxie nodes to exceed the number of Thor nodes.\\n\\n-\\tWhat is the best practice? ¼ size of Thor? Equal to THOR? Or Divisible by the size of the Roxie Cluster? \\n\\nRoxie keeps most of its data in memory, so you should allocate plenty of memory for Roxie. Calculate the approximate size of your data, and allocate appropriately. You should either increase the number of nodes, or increase the amount of memory.\\n\\n-\\tHow do I allocate memory to Roxie? Is this physically installing more memory or is there a setting? Is HPCC’s definition of a NODE a Physical Server?\\n\\nConfig Manager \\n\\nLazy Open - what is the difference between True, False, and Smart?\\nLocal Slave – FALSE – is this similar to the Thor Master not running slaves? If this is set to FALSE will the slaves only run on the Roxies not running the other HPCC services? \\n\\nCPU Affinity & Cores Per Query - these are set to default, does that mean the system will use all available cores per query? Would you only set this if you had many large queries and wanted to reserve CPU?\\n\\nAny help or advice would be greatly appreciated.\\n\\nThanks\\n\\nAntony\", \"post_time\": \"2020-12-08 15:35:42\" },\n\t{ \"post_id\": 86, \"topic_id\": 46, \"forum_id\": 16, \"post_subject\": \"Re: Redundancy\", \"username\": \"jonburger\", \"post_text\": \"Ah, simple question, complex answer.\\n\\nWell, redundancy is achieved multiple ways depending on the component and/or budget \\n\\nFirst, I'll address the Roxie Engine (data delivery engine), In most cases, this is the component that is "customer facing", requires 99% uptime, and delivers high speed results based on user queries (think select). Roxie has internal redundancy which is achieved through a n+1 mirror. When Roxie is configured for redundancy (or replicate), each node has two data locations. Location #1 holds the nodes 'primary' data piece. Location #2 holds the next nodes (n+1) 'primary' data piece. The Roxie engine runs active/active and the Roxie service is capable of answering queries from both Location1 and Location2. In the event of a single server failure, the Roxie node next to it is still able to answer queries.\\n\\nSecond, I'll address the Thor Engine. Generally, this is primarily for index building and data munging and doesn't have the 99% uptime requirement, but there are cases where this also needs to be fully redundant. This can be achieved through multiple ways. \\n\\nFirst, when configured for redundancy, Thor also utilizes a (n+1) mirror. Each node backs up the prior nodes data. \\n\\nSecond, internal to Thor is the ability to replicate new data automatically, or file-by-file based on the ECL programmers choice. This would be useful in cases where there are many intermediary temporary files that don't need to be written twice but the final resulting file needs to be on a mirror. \\n\\nAdditionally, the ECL programmer is allowed to create 'break points' in long running jobs that in case of a hard failure the job can resume from the last break point. Internal to Thor is something called auto-swap-node, that if there is an available standby server (configured), that server will take over operations of the failed node and continue from the last break point.\\n\\nIn large enterprise setups, a common method of redundancy is achieved by running multiple Thor engines and reading from a single queue. Any available Thor engine can then read from this queue and process the ECL job. This can also be combined with internal Thor auto-swap-node.\\n\\nAdministrative components, namely Dali, Sasha, Thormaster require dual server/shared disk/VIP to become redundant (Normal HA setup), or potentially something like DRBD.\\n\\nAdministrative auxiliary components, ECLServer, ECLAgent, ECLScheduler, DFUServer, ESP can exist in active/active multiples.\\n\\nHopefully that answered your question
\\n\\n~Jon Burger\\nManager HPCC Engineering Team\", \"post_time\": \"2011-06-29 17:39:48\" },\n\t{ \"post_id\": 81, \"topic_id\": 46, \"forum_id\": 16, \"post_subject\": \"Redundancy\", \"username\": \"Helen\", \"post_text\": \"Am I OK if a node goes down? How does that redundancy work?\", \"post_time\": \"2011-06-29 15:00:52\" },\n\t{ \"post_id\": 123, \"topic_id\": 52, \"forum_id\": 16, \"post_subject\": \"Re: How do I recover from a deleted Roxie workunit?\", \"username\": \"richardkchapman\", \"post_text\": \"It's the sort of utility where the documentation says:\\n\\n"If instructed to run this by tech support, do. Otherwise, don't"\\n\\n...\\n\\nSeriously, I think there is documentation in the works for it but it's generally safest to avoid using it if at all possible.\", \"post_time\": \"2011-07-11 20:34:10\" },\n\t{ \"post_id\": 117, \"topic_id\": 52, \"forum_id\": 16, \"post_subject\": \"Re: How do I recover from a deleted Roxie workunit?\", \"username\": \"thildebrant\", \"post_text\": \"above was mentioned the use of daliadmin, is there any documentation for the use of this utility?\", \"post_time\": \"2011-07-08 20:12:55\" },\n\t{ \"post_id\": 106, \"topic_id\": 52, \"forum_id\": 16, \"post_subject\": \"Re: How do I recover from a deleted Roxie workunit?\", \"username\": \"sort\", \"post_text\": \"Shortly we will be releasing a fix to the "search roxie query" problem you encountered. Running tests of my own, I was able to remove the previously deleted workunit from a queryset via eclwatch by clicking on QuerySets / Browse and then finding the workunit.\\n\\nWe did open a new bug for a missing piece of functionality. Search Roxie Queries allows you to see the queries on roxie, but does not allow you to delete a query.\", \"post_time\": \"2011-07-07 12:53:08\" },\n\t{ \"post_id\": 104, \"topic_id\": 52, \"forum_id\": 16, \"post_subject\": \"Re: How do I recover from a deleted Roxie workunit?\", \"username\": \"richardkchapman\", \"post_text\": \"For now you should be sure to remove the query from the QuerySet before deleting the workunit.\\n\\nAnd if you hate vi, try nano ...\", \"post_time\": \"2011-07-06 20:46:50\" },\n\t{ \"post_id\": 103, \"topic_id\": 52, \"forum_id\": 16, \"post_subject\": \"Re: How do I recover from a deleted Roxie workunit?\", \"username\": \"dabayliss\", \"post_text\": \"Excellent! That worked perfectly (although it did remind me just how much I hate VI).\\n\\nThanks\\n\\nSo; was there a way I could have deleted the roxie query without screwing everything up? (Or resorting to VI!)\", \"post_time\": \"2011-07-06 20:43:40\" },\n\t{ \"post_id\": 102, \"topic_id\": 52, \"forum_id\": 16, \"post_subject\": \"Re: How do I recover from a deleted Roxie workunit?\", \"username\": \"richardkchapman\", \"post_text\": \"Hmm, looks like you found a bug (more than one actually).\\n\\n1. You should not be allowed to delete a workunit that is in a QuerySet (or else it should remove it from the QuerySet if you do)\\n2. WsECL on port 8002 should cope more gracefully with missing workunits in datasets\\n3. Search Roxie Queries on port 8010 should cope more gracefully with missing workunits in datasets.\\n\\nYou can recover using daliadmin as follows:\\n\\n1. Assuming you don't have a windows copy of daliadmin lying around, you'll need to use putty or similar to log in to one of the linux boxes in the cluster that has hpcc-init installed.\\n2. Run \\n\\n/opt/HPCCSystems/bin/daliadmin . export QuerySets tmp.xml\\n\\n3. Edit tmp.xml to remove the reference to the deleted workunit\\n4. run \\n\\n/opt/HPCCSystems/bin/daliadmin . import QuerySets tmp.xml \\n\\nto reimport the info to dali.\\n\\nThe above assumes that you logged into the box that is running dali (or you are on a single-node cluster). If you logged into a different box you'll have to replace the . in the above commands with the ip of the dali node.\\n\\nI'll make sure bugs are raised for the above issues.\", \"post_time\": \"2011-07-06 19:23:41\" },\n\t{ \"post_id\": 101, \"topic_id\": 52, \"forum_id\": 16, \"post_subject\": \"How do I recover from a deleted Roxie workunit?\", \"username\": \"dabayliss\", \"post_text\": \"Ok,\\n\\nI have a service which I compiled and then published to roxie (very cool btw) which was working great until I accidentally deleted the WU that I had published.\\n\\nNow I cannot 'search' my roxie queries from ECL Watch (it complains about the missing WU) - and I cannot delete my now-dead query (at least - no method I can find).\\n\\nSo - is there any way I can extricate myself from this mess?\\n\\nDavid\", \"post_time\": \"2011-07-06 17:41:35\" },\n\t{ \"post_id\": 287, \"topic_id\": 96, \"forum_id\": 16, \"post_subject\": \"Re: Blocked Workunits\", \"username\": \"richardkchapman\", \"post_text\": \"Can you locate the thormaster and thorslave log files (they will be in subdirectories of /var/log/HPCCSystems/mythor) and send me the last few lines of each\", \"post_time\": \"2011-08-22 21:24:14\" },\n\t{ \"post_id\": 286, \"topic_id\": 96, \"forum_id\": 16, \"post_subject\": \"Re: Blocked Workunits\", \"username\": \"jeffk\", \"post_text\": \"[quote="richardkchapman":3o425nyo]When you restarted the system via service hpcc-init start, did all components report they had started ok?\\nYes, however, the mythor component took a long time to restart. Some time longer than 5 minutes but shorter than going out for coffee.\\n
How many thor slaves in your cluster?
\\n1\\nDoes the eclwatch 'preflight' page (I think it's called 'view system servers' show any clues?
\\nNothing jumps out at me.\", \"post_time\": \"2011-08-22 19:19:48\" },\n\t{ \"post_id\": 285, \"topic_id\": 96, \"forum_id\": 16, \"post_subject\": \"Re: Blocked Workunits\", \"username\": \"richardkchapman\", \"post_text\": \"When you restarted the system via service hpcc-init start, did all components report they had started ok?\\n\\nHow many thor slaves in your cluster?\\n\\nDoes the eclwatch 'preflight' page (I think it's called 'view system servers' show any clues?\", \"post_time\": \"2011-08-22 19:05:30\" },\n\t{ \"post_id\": 284, \"topic_id\": 96, \"forum_id\": 16, \"post_subject\": \"Blocked Workunits\", \"username\": \"jeffk\", \"post_text\": \"Hi,\\n\\nMy system is unresponsive to new work unit requests to thor. They are showing up on the activity webpage as blocked, and after 10 minutes they fail with this error message:\\n\\nError: System error: 0: Query W20110822-131046 failed to start within specified timelimit (600) seconds (0, 0), 0, \\n\\nI restarted all of my system services via hpcc-init, but the problem persists.\\n\\nAs background, I believe the trouble started when I attempted to load a 1.3 GB dataset and count the number of records, with this error message:\\n\\nError: System error: 11: Graph[1], diskcount[2]: SLAVE 10.200.1.105:6600: JMalloc Heap error 11 ((nil)), JMalloc Heap error 11 ((nil)) - handling file: /var/lib/HPCCSystems/hpcc-data/thor/test/jk/original._1_of_1 (0, 0), 11, \\n\\nI uploaded approximately 400k files via the dfuplus utility as a flat file, 1 record per file. This may be the issue, but would like to resolve the unresponsive thor process first.\\n\\nThanks\", \"post_time\": \"2011-08-22 17:58:53\" },\n\t{ \"post_id\": 403, \"topic_id\": 123, \"forum_id\": 16, \"post_subject\": \"Re: Blocked workunits\", \"username\": \"HPCC Staff\", \"post_text\": \"Also the log from /var/log/HPCCSystems/myeclccserver\\n\\nThank you\", \"post_time\": \"2011-10-05 14:42:13\" },\n\t{ \"post_id\": 402, \"topic_id\": 123, \"forum_id\": 16, \"post_subject\": \"Re: Blocked workunits\", \"username\": \"HPCC Staff\", \"post_text\": \"Allan,\\n\\nNot sure if you can reproduce the problem. If you do, can you please attach the logs under /var/log/HPCCSystems/mythor and /var/log/HPCCSystems/myesp for that day?\\n\\nThank You\", \"post_time\": \"2011-10-05 13:54:45\" },\n\t{ \"post_id\": 376, \"topic_id\": 123, \"forum_id\": 16, \"post_subject\": \"Re: Blocked workunits\", \"username\": \"Allan\", \"post_text\": \"Ok Seemed to have cleared the blockage, by deleting every workunit I could find. Those on thor and Roxie, published and unpublished. \\n\\nDon't really understand the problem, but at least I'm moving again.\", \"post_time\": \"2011-10-02 15:04:16\" },\n\t{ \"post_id\": 374, \"topic_id\": 123, \"forum_id\": 16, \"post_subject\": \"Blocked workunits\", \"username\": \"Allan\", \"post_text\": \"Hi\\n\\nI'm very new to HPCC and ECL.\\nI've downloade and installed the HPCC VM and have the ECL IDE working.\\n\\nI've been plowing though the tutorials supplied on the hpccsystems website OK.\\n(e.g. the 'HPCC Data Tutorial' example), plus one of my own projects.\\n\\nHowever, today when I submit any Work Unit it moves into a BLOCKED state.\\nI've deleted / cleared out previous workunits to no avail. I expected as much.\\n\\nI'm running on Windows 7 and have not found the other thread on BLOCKED workunits any help. So have opened this one.\\n\\nAny idea's anyone?\\nYours\\n\\nAllan\", \"post_time\": \"2011-10-01 19:40:39\" },\n\t{ \"post_id\": 462, \"topic_id\": 134, \"forum_id\": 16, \"post_subject\": \"Re: Recover from a forced system shutdown. (Windows7)\", \"username\": \"Allan\", \"post_text\": \"Hi Philip,\\n\\nBy uninstalling everything, that's VMWare and the entire HPCC system. Plus deleting any directories left lieing around, then reinstalling everything from scratch, I seem to be up and working again.\\n\\nThe HPCCSystems VM 3.2.2.1 (a later version than the one I was using before) is repeatably throwing the error:\\n\\nChecking vami-sfcbd status: .......... failed, restarting vami-sfcbd\\nStarting vami-sfcbd: done.\\nChecking vami-sfcbd status: .......... failed, restarting vami-sfcbd\\nStarting vami-sfcbd: done.\\nChecking vami-sfcbd status: .......... failed, restarting vami-sfcbd\\nStarting vami-sfcbd: done.\\n
\\n\\nBut this does not seem to be affecting any work.(It's early days, not exercised much)\\n\\nThe moral to learn seems to be don't do a forced shutdown.\\nGiven windows propensity to lock up, this may be a issue many others will also have, and should be looked at by LN.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-10-15 14:52:41\" },\n\t{ \"post_id\": 461, \"topic_id\": 134, \"forum_id\": 16, \"post_subject\": \"Re: Recover from a forced system shutdown. (Windows7)\", \"username\": \"Allan\", \"post_text\": \"Hi Philip,\\n\\nUnfortunately I cannot log into VMWare any more. I used to be able to.\\nBut the login: prompt no longer appears.\\n\\nI don't mind starting from scratch if its what it takes.\\n\\nAny pointers as to what to uninstall?\\n\\nYours\", \"post_time\": \"2011-10-14 17:18:35\" },\n\t{ \"post_id\": 460, \"topic_id\": 134, \"forum_id\": 16, \"post_subject\": \"Re: Recover from a forced system shutdown. (Windows7)\", \"username\": \"pschwartz\", \"post_text\": \"[quote="Allan":29xbktkr]\\nI've now uninstalled and re-installed VMWare to no avail.\\n\\n\\nDid you uninstall and re-install just VMWare or the VM image also?\\n\\nIf the VM image starts up, please login to a shell in the vm window with the hpccdemo user. Once in a shell run the following command to check the status of the HPCC platform.\\n\\n`sudo service hpcc-init status`\\n\\nAlso run the following to see what ports are open and listening.\\n\\n`netstat -ntulp`\\n\\nIf you can run those commands and tell us what the results are, it will be very helpful in debugging your issue.\\n\\nPhilip\", \"post_time\": \"2011-10-14 13:19:44\" },\n\t{ \"post_id\": 458, \"topic_id\": 134, \"forum_id\": 16, \"post_subject\": \"Re: Recover from a forced system shutdown. (Windows7)\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI've now uninstalled and re-installed VMWare to no avail.\\nObviously the uninstall does not clean up everything.\\n\\nAny idea's anyone?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-10-14 08:40:48\" },\n\t{ \"post_id\": 455, \"topic_id\": 134, \"forum_id\": 16, \"post_subject\": \"Re: Recover from a forced system shutdown. (Windows7)\", \"username\": \"Allan\", \"post_text\": \"Hi Philip,\\n\\nNo luck - I'd already done all that.\\n\\nInterestingly the MVWare box, on starting, no longer shows that its restoring a state, nor is the IP address shown any more.\\n\\nSo still stuck \\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-10-13 19:02:56\" },\n\t{ \"post_id\": 454, \"topic_id\": 134, \"forum_id\": 16, \"post_subject\": \"Re: Recover from a forced system shutdown. (Windows7)\", \"username\": \"pschwartz\", \"post_text\": \"Allan,\\n\\nI am sorry to hear that you are experiencing issues currently with your VM.\\n\\nI have personally seen this happen twice. Both times it was related to the VMWare virtual network driver in the operating system. Below is the steps which corrected it for me.\\n\\n1. Shutdown the VM completely (do not pause or suspend it.)\\n2. Close all copies of VMWare that are running. \\n3. Reboot the computer VMWare is running on.\\n4. Open VMWare and start up the VM.\\n\\nPlease let me know if this works to correct the issue for you. Also make sure that the ip address that is displayed in the VM is the one that you are using to connect to ECLWatch and inside of the IDE as when this happens to the virtual network driver, the ip address of the VM can change.\\n\\nPhilip\", \"post_time\": \"2011-10-13 16:53:13\" },\n\t{ \"post_id\": 453, \"topic_id\": 134, \"forum_id\": 16, \"post_subject\": \"Recover from a forced system shutdown. (Windows7)\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nMy computer locked up I had to force a shutdown.\\n\\nOn re-boot all works except for any HPCC system.\\nI can boot the HPCCSystem VM 3.2.0.2 ok, but any attempt to log in using the ECL/IDE is rejected with 'Cannot connect to server'\\n\\nAnd neither the ECLWatch (Port 8010) or ESP (Port 8002) connect.\\n\\nSomething's broken - I don't know what nor what to do about it.\\n\\nAny idea's would be gratefully received.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-10-13 16:11:55\" },\n\t{ \"post_id\": 502, \"topic_id\": 138, \"forum_id\": 16, \"post_subject\": \"Re: Unable to Delete a logical file.\", \"username\": \"Allan\", \"post_text\": \"I've deleted all ECL and DFU workunits. No luck the files still don't delete.\\n\\nI logged onto the VM as 'hpcc' and manually deleted the files mentioned above, They still appear in ECL Watch even after a bounce of the VM.\\n\\nI'm giving up with this one.\\n\\nRest of system still seems ok and I can continue learning the language and system.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-10-20 15:33:28\" },\n\t{ \"post_id\": 493, \"topic_id\": 138, \"forum_id\": 16, \"post_subject\": \"Re: Unable to Delete a logical file.\", \"username\": \"bforeman\", \"post_text\": \"In the ECL Watch, are there any workunits perhaps that could be locking those files? Otherwise I'm not sure why you would not be able to simply delete them.\", \"post_time\": \"2011-10-19 19:47:04\" },\n\t{ \"post_id\": 492, \"topic_id\": 138, \"forum_id\": 16, \"post_subject\": \"Re: Unable to Delete a logical file.\", \"username\": \"Allan\", \"post_text\": \"Humm...\\n\\nVia WinSCP I can see three files under thor/genesis\\nbibleindex.1_of_2\\nbibleindex.2_of_2\\nlayout_verse.1_of_1\\n\\nVia ECLWatch on only see two entries under thor/genesis that's\\nbibleindex and layout_verse. I suppose that's expected.\\n\\nIf I attempt to delete these files via WinSCP I get a permissions failour.\\nI could log into the VM as hpccdemo (or root?) chmod and rm that way but I feel I'm fighting the system doing that and should not be doing those kind of things.\\n\\nBy the way these are just normal FLAT(thor) files, I'm new to all this and am not doing anything fancy.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-10-19 18:41:44\" },\n\t{ \"post_id\": 489, \"topic_id\": 138, \"forum_id\": 16, \"post_subject\": \"Re: Unable to Delete a logical file.\", \"username\": \"bforeman\", \"post_text\": \"If you are using a VM, you can use a program like WinSCP to open the local port and then scan your hpcc-data folders and look for those files. I'm not sure why you can't delete them, what kind of files are you talking about? THOR, CSV, XML, Indexes, persisted files, etc?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2011-10-19 18:13:52\" },\n\t{ \"post_id\": 483, \"topic_id\": 138, \"forum_id\": 16, \"post_subject\": \"Unable to Delete a logical file.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI'm unable to delete some logical files via ECL Watch.\\n\\nIt accepts the 'delete' command, with a 'are you sure' dialog box, but then does nothing except put up the hour glass.I'm using windows 7.\\n\\nI can delete the workunit that created the logical file, but not the file itself.\\n\\nLogical files I've created recently (like today) are deleted.\\n\\nAny idea's?\\n\\nYours\\nAllan\", \"post_time\": \"2011-10-18 18:59:28\" },\n\t{ \"post_id\": 1090, \"topic_id\": 260, \"forum_id\": 16, \"post_subject\": \"Re: Thor workunits\", \"username\": \"bforeman\", \"post_text\": \"Thanks Dan, I have passed this info to the documentation team and they are working on it!\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-02-10 19:24:09\" },\n\t{ \"post_id\": 1089, \"topic_id\": 260, \"forum_id\": 16, \"post_subject\": \"Re: Thor workunits\", \"username\": \"DSC\", \"post_text\": \"Hi Bob,\\n\\nThanks for the information. It would be great if there were a set of workflow service options that governed this behavior. Basically, stuff that defines what happens to the workunit, and when, after the job either completes or fails. A future capability, perhaps?\\n\\nAnother request ties in to the above and would probably be easier to implement: Add explicit archive and delete functions to the workunit portion of the standard library. The library already supports search, and if it allowed archiving and deleting then developers could concoct their own periodic cleanup scripts.\\n\\n[quote="bforeman":1w3jmlli]The only doc that I knw of online regarding the specific settings is the Using Configuration Manager PDF, available in the Documentation section.\\n\\nYou mean the Sasha stuff, right? The current document (http://cdn.hpccsystems.com/install/docs ... anager.pdf) needs some help. Take a look at the SashaServer Process Archiver, beginning on page 39. The description for all of the options is, literally, "SashaServer Archiver Process description." I think that section needs a little TLC.\\n\\nThanks again!\\n\\nDan\", \"post_time\": \"2012-02-10 18:55:16\" },\n\t{ \"post_id\": 1088, \"topic_id\": 260, \"forum_id\": 16, \"post_subject\": \"Re: Thor workunits\", \"username\": \"bforeman\", \"post_text\": \"
Are workunits submitted to thor eventually deleted?
\\n\\nWorkunits are periodically archived (not deleted) to the Sasha server from Dali. It is up to the HPCC administrator to delete any workunits when needed.\\n\\n Or, better yet, is there a mechanism for indicating when a completed/failed workunit should be automatically deleted?
\\n\\nNot that I know of, Sasha only archives workunits and it is the responsibility of the HPCC administrator to delete them if needed. \\n\\nThe only doc that I knw of online regarding the specific settings is the Using Configuration Manager PDF, available in the Documentation section.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-02-10 16:52:26\" },\n\t{ \"post_id\": 1087, \"topic_id\": 260, \"forum_id\": 16, \"post_subject\": \"Thor workunits\", \"username\": \"DSC\", \"post_text\": \"Are workunits submitted to thor eventually deleted? Or, better yet, is there a mechanism for indicating when a completed/failed workunit should be automatically deleted?\\n\\nLastly, please let me know if the answers to those questions are documented somewhere so I'll know where to look next time.\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-02-10 15:19:12\" },\n\t{ \"post_id\": 6040, \"topic_id\": 297, \"forum_id\": 16, \"post_subject\": \"Re: dafilesrv: stops and starts\", \"username\": \"rtaylor\", \"post_text\": \"Considering that you're running version 3.10 and we just released version 5.0, I would suggest that you upgrade to the latest release.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-07-11 19:19:27\" },\n\t{ \"post_id\": 6039, \"topic_id\": 297, \"forum_id\": 16, \"post_subject\": \"Re: dafilesrv: stops and starts\", \"username\": \"davidt\", \"post_text\": \"One of our HPCC clusters was failing today which happens once in a while probably due to running version community_3.10. We found that there were 2 mydafilesrv processes running and the init script would not stop them. We rebooted the nodes and then everything was fine.\", \"post_time\": \"2014-07-11 18:09:38\" },\n\t{ \"post_id\": 1254, \"topic_id\": 297, \"forum_id\": 16, \"post_subject\": \"Re: dafilesrv: stops and starts\", \"username\": \"bforeman\", \"post_text\": \"Dafilesrv should be stopped and started by: \\n\\nsudo service dafilerv stop/start/restart\\n\\n\\nHTH,\\n\\nBob\", \"post_time\": \"2012-03-06 14:30:06\" },\n\t{ \"post_id\": 1250, \"topic_id\": 297, \"forum_id\": 16, \"post_subject\": \"dafilesrv: stops and starts\", \"username\": \"DSC\", \"post_text\": \"The dafilesrv process is started by 'sudo service hpcc-init start' (on RHEL5, at least) but it is not shut down by 'sudo service hpcc-init stop'. Why? Should it be?\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-03-06 13:23:26\" },\n\t{ \"post_id\": 1314, \"topic_id\": 302, \"forum_id\": 16, \"post_subject\": \"Re: log files size running into GBs\", \"username\": \"vkumars\", \"post_text\": \"Thanks for your response. But I am not sure why we are getting this Network unreachable issue because I am able to ping to this IP which is local subnet ip. And this single node environment is working fine in all other aspects. This is a remote single node.\\n\\nThanks.\\n\\n[quote="jsmith":1eqpl31u]I'm not sure, error 101 means "Network is unreachable"\\n\\nIs "my server ip here" - on the local single node?\\nIs this IP a local subnet ip?\\nDo you have multiple network interfaces on this box, or none?\\nIs your environment in other respects working?\\nCan you ping this IP?\\n\\nIt sounds like a network/or firewall issue of some kind.\", \"post_time\": \"2012-03-11 17:38:51\" },\n\t{ \"post_id\": 1306, \"topic_id\": 302, \"forum_id\": 16, \"post_subject\": \"Re: log files size running into GBs\", \"username\": \"jsmith\", \"post_text\": \"I'm not sure, error 101 means "Network is unreachable"\\n\\nIs "my server ip here" - on the local single node?\\nIs this IP a local subnet ip?\\nDo you have multiple network interfaces on this box, or none?\\nIs your environment in other respects working?\\nCan you ping this IP?\\n\\nIt sounds like a network/or firewall issue of some kind.\", \"post_time\": \"2012-03-09 16:32:31\" },\n\t{ \"post_id\": 1280, \"topic_id\": 302, \"forum_id\": 16, \"post_subject\": \"log files size running into GBs\", \"username\": \"vkumars\", \"post_text\": \"I am running a community license version 3.4.0-1 in Cent OS-5 box in a single node single cluster setup.\\n\\nThe log files under the following locations are running into 20-50GB size each.\\n\\n/var/log/HPCCSystems/mythor\\n/var/log/HPCCSystems/myeclscheduler\\n/var/log/HPCCSystems/mydfuserver\\n/var/log/HPCCSystems/mysasha\\n/var/log/HPCCSystems/myesp\\n\\nIn all these files the line "jsocket(1,796) pre_connect err = 101 : T><my server ip here>" is repeated again and again which created such huge files of GBs. \\n\\nAny idea what could be the cause?\\n\\nThanks in advance\\nVasantha\", \"post_time\": \"2012-03-08 13:56:14\" },\n\t{ \"post_id\": 2628, \"topic_id\": 391, \"forum_id\": 16, \"post_subject\": \"Re: Managing dead files\", \"username\": \"ghalliday\", \"post_text\": \"It looks like it is a bug - the files should be being removed.\\n\\nSee http://track.hpccsystems.com/browse/HPCC-2558 for the issue, and updates.\", \"post_time\": \"2012-10-26 11:24:58\" },\n\t{ \"post_id\": 1777, \"topic_id\": 391, \"forum_id\": 16, \"post_subject\": \"Re: Managing dead files\", \"username\": \"DSC\", \"post_text\": \"Thanks for the info.\\n\\nI presume that the system should have been removing the unused binaries and code files all along? If so, then that looks like a small bug of some kind.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-06-14 11:39:03\" },\n\t{ \"post_id\": 1776, \"topic_id\": 391, \"forum_id\": 16, \"post_subject\": \"Re: Managing dead files\", \"username\": \"bforeman\", \"post_text\": \"Hi Dan,\\n\\nI can't answer why they are not automatically removed, but if you know that they are "dead" files I do not see any problem with you manually removing them.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-06-13 18:35:06\" },\n\t{ \"post_id\": 1772, \"topic_id\": 391, \"forum_id\": 16, \"post_subject\": \"Managing dead files\", \"username\": \"DSC\", \"post_text\": \"While debugging a compiler error I discovered that /var/lib/HPCCSystems/myeclccserver/ on one of my nodes was full of dead files. Unused binaries, orphan .cpp files (due to failed compilations), etc.. Is this cruft not automatically removed by the system? If not, what is the recommended method for managing this stuff? Is there anything wrong with simply deleting files?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-06-13 13:43:09\" },\n\t{ \"post_id\": 2489, \"topic_id\": 545, \"forum_id\": 16, \"post_subject\": \"Re: mythor stop and start automatically\", \"username\": \"pschwartz\", \"post_text\": \"Dongliang,\\n\\nIf Thor is coming up and then stopping, the problem could be configuration but seeing to the fact that you said it was running, I do not believe this is the case.\\n\\nThe next step would be to trouble shoot the reason for the start/stop of Thor. Can you take a look at the Thor log files at /var/log/HPCCSystems/mythor/. If you do not think you see anything that might be reporting an issue, feel free to paste/attach the log files here or use a paste service like http://paste2.org and I will help you to determine the issue.\\n\\n-Philip\", \"post_time\": \"2012-10-10 12:21:11\" },\n\t{ \"post_id\": 2486, \"topic_id\": 545, \"forum_id\": 16, \"post_subject\": \"Re: mythor stop and start automatically\", \"username\": \"dsun\", \"post_text\": \"And it does not work by doing 'sudo service hpcc-init restart', even reinstall HPCC!\", \"post_time\": \"2012-10-09 16:54:38\" },\n\t{ \"post_id\": 2485, \"topic_id\": 545, \"forum_id\": 16, \"post_subject\": \"mythor stop and start automatically\", \"username\": \"dsun\", \"post_text\": \"Hi,\\n\\nI install the HPCC system on Ubuntu 11.10, at first it works quite well.\\n\\nBut when I restart the machine and 'sudo service hpcc-init start', the 'mythor' does not work any more, it stop and start automatically all the time, I use 'sudo service hpcc-init status', found that the 'mythor' sometimes is stopped, sometimes started (with always different PID).\\n\\nWhat caused the problem? Bug or configuration problem?\\n\\nThanks,\\nDongliang\", \"post_time\": \"2012-10-09 16:34:58\" },\n\t{ \"post_id\": 6038, \"topic_id\": 552, \"forum_id\": 16, \"post_subject\": \"Re: Cluster monitoring\", \"username\": \"davidt\", \"post_text\": \"we use prtg http monitor to load a roxie query page with a json payload as sometimes the cluster reports running fine yet even though roxie queries are taking over 30 seconds and clearly something is wrong.\", \"post_time\": \"2014-07-11 17:40:34\" },\n\t{ \"post_id\": 2536, \"topic_id\": 552, \"forum_id\": 16, \"post_subject\": \"Re: Cluster monitoring\", \"username\": \"jeeves\", \"post_text\": \"thank you!\", \"post_time\": \"2012-10-18 11:06:16\" },\n\t{ \"post_id\": 2518, \"topic_id\": 552, \"forum_id\": 16, \"post_subject\": \"Re: Cluster monitoring\", \"username\": \"sort\", \"post_text\": \"You can use a URL like the following to get the wsdl information\\nhttp://IP:8010/ws_machine/GetMachineInfoEx?wsdl\", \"post_time\": \"2012-10-12 15:03:01\" },\n\t{ \"post_id\": 2517, \"topic_id\": 552, \"forum_id\": 16, \"post_subject\": \"Re: Cluster monitoring\", \"username\": \"sort\", \"post_text\": \"all web services can be access via soap using the wsdl\", \"post_time\": \"2012-10-12 14:56:16\" },\n\t{ \"post_id\": 2509, \"topic_id\": 552, \"forum_id\": 16, \"post_subject\": \"Cluster monitoring\", \"username\": \"jeeves\", \"post_text\": \"Hi,\\n\\nDoes HPCC provde any API to programmatically monitor the health of the nodes and the jobs running on those nodes. I know that we can get this information from ECL watch web app. But we are looking at ways to create a customized monitoring application with advanced notification and graphical monitoring capabilities.\\n\\nThanks,\\n-David\", \"post_time\": \"2012-10-12 13:18:43\" },\n\t{ \"post_id\": 2781, \"topic_id\": 600, \"forum_id\": 16, \"post_subject\": \"Re: HPCC Architecture\", \"username\": \"JimD\", \"post_text\": \"You can find log files in: \\n\\n /var/log/HPCCSystems/\\n\\nThat directory contains a subdirectory for each component running on that node:\\n\\nThese could include:\\n/configmgr\\n/mydafilesrv\\n/mydali\\n/mydfuserver\\n/myeclagent\\n/myeclccserver\\n/myeclscheduler\\n/myesp\\n/myroxie\\n/mysasha\\n/mythor\\n\\nHope this helps,\\n\\nJim\", \"post_time\": \"2012-11-15 14:19:29\" },\n\t{ \"post_id\": 2761, \"topic_id\": 600, \"forum_id\": 16, \"post_subject\": \"Re: HPCC Architecture\", \"username\": \"HPCC Staff\", \"post_text\": \"Hi, as a starter, take a look at the HPCC architecture online video. Part II takes a deeper dive into Sasha and Dali.\\n\\nhttp://hpccsystems.com/community/traini ... -roxie-ecl\\n\\nI'll ask the team for a good reference on the other areas. Thanks for your post!\", \"post_time\": \"2012-11-11 23:03:30\" },\n\t{ \"post_id\": 2753, \"topic_id\": 600, \"forum_id\": 16, \"post_subject\": \"HPCC Architecture\", \"username\": \"jeeves\", \"post_text\": \"I am looking for resources(links/pdfs) which will help me understand the HPCC system better. I am looking for answers to questions like\\n\\n1. What does the Sasha(and the other) servers do? Can I change the ports on which they are listening?\\n3. Where can I find log files when something goes wrong\\n4. What are the guidelines for administering a HPCC cluster\\n\\netc.\", \"post_time\": \"2012-11-09 13:05:26\" },\n\t{ \"post_id\": 2798, \"topic_id\": 609, \"forum_id\": 16, \"post_subject\": \"Re: Could not open source file\", \"username\": \"anag\", \"post_text\": \"sudo -u hpcc head /root/events.txt\\n\\nsudo: unable to resolve host ip-172-25-30-102\\nhead: cannot open `/root/events.txt' for reading: Permission denied\", \"post_time\": \"2012-11-16 08:14:30\" },\n\t{ \"post_id\": 2797, \"topic_id\": 609, \"forum_id\": 16, \"post_subject\": \"Could not open source file\", \"username\": \"anag\", \"post_text\": \"I'm having a Dilbert moment, and I'm trying to spray a file from the command line and I get the following error\\n\\n>> dfuplus action=spray server=172.25.30.102 srcfile=/root/events.txt dstname=anag::test::events.txt dstcluster=mythor format=csv encoding=ascii seperator="\\\\t"\\nsrcip not specified - assuming spray from local machine\\nChecking for local Dali File Server\\n\\nSpraying from /root/events.txt on 172.25.30.102:7100 to anag::test::events.txt\\nSubmitted WUID D20121116-071748\\nD20121116-071748 status: queued\\nFailed: Could not open source file //172.25.30.102:7100/root/events.txt\\n\\nThe entry from the dfuserver.log is the following\\n\\n000000BE 2012-11-16 07:17:48 15548 15560 "DFU Server running job: D20121116-071748"\\n000000BF 2012-11-16 07:17:48 15548 15560 "DFS: import(events.txt,)"\\n000000C0 2012-11-16 07:17:48 15548 15560 "Using transfer buffer size 65536"\\n000000C1 2012-11-16 07:17:48 15548 15560 "Start gathering file sizes..."\\n000000C2 2012-11-16 07:17:48 15548 15560 "Gathering 1 file sizes on 1 threads"\\n000000C3 2012-11-16 07:17:48 15548 15560 "Finished gathering file sizes..."\\n000000C4 2012-11-16 07:17:48 15548 15560 "Use pull operation as default"\\n000000C5 2012-11-16 07:17:48 15548 15560 "Start gathering file sizes..."\\n000000C6 2012-11-16 07:17:48 15548 15560 "Gathering 1 file sizes on 1 threads"\\n000000C7 2012-11-16 07:17:48 15548 15560 "ERROR: 8050: /var/jenkins/workspace/CE-Candidate-3.8.6/CE/ubuntu_12_04_x86_64/HPCC-Platform/dali/dfu/dfurun.cpp(1610) : DFURUN Exception: : Could not open source file //172.25.30.102:7100/root/events.txt"\\n000000C8 2012-11-16 07:17:48 15548 15560 "DFU Server finished job: D20121116-071748"\\n000000C9 2012-11-16 07:17:48 15548 15560 "DFU Server waiting on queue dfuserver_queue"\\n \\n\\nI checked the permissions on the file. They seem to be fine. What am I doing wrong here? I'm pretty sure I've used this command in a different cluster successfully.\\n\\nI'm using version community_3.8.6-4\", \"post_time\": \"2012-11-16 07:35:13\" },\n\t{ \"post_id\": 2829, \"topic_id\": 616, \"forum_id\": 16, \"post_subject\": \"Re: are spills to disk compressed?\", \"username\": \"DSC\", \"post_text\": \"I don't know of a published schedule, sorry. Version 3.6.0.1 was released March 5 and version 3.8.0.1 was released July 9, so I would expect version 3.10.x to arrive relatively soon. I'm just guessing though.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-11-20 18:31:36\" },\n\t{ \"post_id\": 2828, \"topic_id\": 616, \"forum_id\": 16, \"post_subject\": \"Re: are spills to disk compressed?\", \"username\": \"anag\", \"post_text\": \"Thank you, Dan.\\n\\nI seem to have seen it somewhere, but can't seem to find the release schedule for 3.10. Can you please point me to it?\\n\\nAN\", \"post_time\": \"2012-11-20 18:21:41\" },\n\t{ \"post_id\": 2823, \"topic_id\": 616, \"forum_id\": 16, \"post_subject\": \"Re: are spills to disk compressed?\", \"username\": \"DSC\", \"post_text\": \"Not currently, but they will be in the next release (3.10). See https://track.hpccsystems.com/browse/HPCC-3097.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-11-20 12:29:30\" },\n\t{ \"post_id\": 2821, \"topic_id\": 616, \"forum_id\": 16, \"post_subject\": \"are spills to disk compressed?\", \"username\": \"anag\", \"post_text\": \"While I know that HPCC uses compression to store the sprayed file, during processing, for spills and writes to disk, are those intermediate data files compressed as well?\", \"post_time\": \"2012-11-20 07:13:15\" },\n\t{ \"post_id\": 2907, \"topic_id\": 619, \"forum_id\": 16, \"post_subject\": \"Re: spray compressed file\", \"username\": \"DSC\", \"post_text\": \"Ha! I only remembered it because it's on my Wish List, too. That would be an incredibly handy feature, and one that is relatively easy to implement (if you constrain the compression to gzip, especially).\\n\\nDan\", \"post_time\": \"2012-11-29 13:14:39\" },\n\t{ \"post_id\": 2906, \"topic_id\": 619, \"forum_id\": 16, \"post_subject\": \"Re: spray compressed file\", \"username\": \"bforeman\", \"post_text\": \"Thanks Dan, I knew we had a prior thread on this somewwhere \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-11-29 13:10:16\" },\n\t{ \"post_id\": 2905, \"topic_id\": 619, \"forum_id\": 16, \"post_subject\": \"Re: spray compressed file\", \"username\": \"DSC\", \"post_text\": \"I think he's talking about having a compressed file on the landing zone and spraying that directly into the cluster without decompressing first. It may already be on the Wish List:\\n\\nhttps://track.hpccsystems.com/browse/HPCC-1371\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-11-29 12:53:15\" },\n\t{ \"post_id\": 2903, \"topic_id\": 619, \"forum_id\": 16, \"post_subject\": \"Re: spray compressed file\", \"username\": \"anag\", \"post_text\": \"[quote="bforeman":uwxvgmwd]Hi Anag,\\n\\nYou can do that already I think, just use the Spray CSV option and remove any default delimiters. In other words, just spray the file as a single record document.\\n\\nRegards,\\n\\nBob\\n\\nThanks Bob,\\nI don't think I understand you. Could you give me an example?\\n\\nan\", \"post_time\": \"2012-11-29 10:49:54\" },\n\t{ \"post_id\": 2866, \"topic_id\": 619, \"forum_id\": 16, \"post_subject\": \"Re: spray compressed file\", \"username\": \"bforeman\", \"post_text\": \"Hi Anag,\\n\\nYou can do that already I think, just use the Spray CSV option and remove any default delimiters. In other words, just spray the file as a single record document.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-11-26 14:28:45\" },\n\t{ \"post_id\": 2826, \"topic_id\": 619, \"forum_id\": 16, \"post_subject\": \"spray compressed file\", \"username\": \"anag\", \"post_text\": \"Is there any plan in any of the future releases to have the functionality to spray zipped files? This is a huge problem for us. \\n\\nAN\", \"post_time\": \"2012-11-20 17:06:03\" },\n\t{ \"post_id\": 2972, \"topic_id\": 646, \"forum_id\": 16, \"post_subject\": \"Re: XRef report 'Not Found'\", \"username\": \"jsmith\", \"post_text\": \"My reply crossed with your post..\\n\\nYes, I was just replying to say that this is probably the issue you are hitting.\", \"post_time\": \"2012-12-06 13:46:18\" },\n\t{ \"post_id\": 2970, \"topic_id\": 646, \"forum_id\": 16, \"post_subject\": \"Re: XRef report 'Not Found'\", \"username\": \"DSC\", \"post_text\": \"I saw the issue https://track.hpccsystems.com/browse/HPCC-8412 this morning and reduced the max memory setting to under 2GB. The report executed and finished.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-12-06 13:16:10\" },\n\t{ \"post_id\": 2962, \"topic_id\": 646, \"forum_id\": 16, \"post_subject\": \"Re: XRef report 'Not Found'\", \"username\": \"DSC\", \"post_text\": \"I'm skeptical. The status is still the same ("Not Found") and I would suspect that it should be done by now. It's been 15 hours or so. Thoughts?\", \"post_time\": \"2012-12-05 16:35:29\" },\n\t{ \"post_id\": 2961, \"topic_id\": 646, \"forum_id\": 16, \"post_subject\": \"Re: XRef report 'Not Found'\", \"username\": \"bforeman\", \"post_text\": \"Dan,\\n\\nOur developer looked at the underlying code, and thinks that the error is harmless and that the system may still be working on the report.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-12-05 16:30:25\" },\n\t{ \"post_id\": 2946, \"topic_id\": 646, \"forum_id\": 16, \"post_subject\": \"XRef report 'Not Found'\", \"username\": \"DSC\", \"post_text\": \"I have a new cluster on which I attempted to run a Thor Xref report. The SuperFiles report runs fine, but the Thor report's 'Last Message' status finishes with 'Not Found' (after saying 'Submitted' for a time) and no report can be loaded. Can someone point me in a direction for troubleshooting this?\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-12-04 14:44:33\" },\n\t{ \"post_id\": 3193, \"topic_id\": 659, \"forum_id\": 16, \"post_subject\": \"Re: Multiple slaves per node and disk configuration\", \"username\": \"flavio\", \"post_text\": \"Sid,\\n\\nAnd you are probably right about the decompression process. In that particular case, having several slaves per node may help.\\n\\nFlavio\", \"post_time\": \"2013-01-25 21:15:11\" },\n\t{ \"post_id\": 3022, \"topic_id\": 659, \"forum_id\": 16, \"post_subject\": \"Re: Multiple slaves per node and disk configuration\", \"username\": \"sbagaria\", \"post_text\": \"Thanks. Most of this question is coming from a conversation I had with Jake. My main concern is that having one slave per node will result in a bottleneck when decompressing the file. Because the decompression will be run in a single thread. So we definitely want as many slaves/nodes as we can.\\n\\nI think there is intrinsic value in having multiple slaves as this helps you further (horizontally) fragment your data towards the number of cores you have, and not just the number of nodes. This value should be most apparent when you have a slow computing node in the dataflow and several fast ones, and the data still fits in memory, so no spilling happens. But this is all conjecture.\\n\\nI told Jake that I will be doing some tests to build up my understanding of how this actually works.\\n\\nThanks again Flavio!!\", \"post_time\": \"2012-12-18 17:54:18\" },\n\t{ \"post_id\": 3020, \"topic_id\": 659, \"forum_id\": 16, \"post_subject\": \"Re: Multiple slaves per node and disk configuration\", \"username\": \"flavio\", \"post_text\": \"Well, I wouldn't jump to the automatic conclusion that your 10 slaves are CPU bound (or, even if they be for the task at hand, it's not necessarily something that you can lightly generalize for other jobs), but I would also reasonably assume that you don't want to run more slaves than the total number of cores since task switching does bring an overhead (branch prediction and cache thrashing, etc.). It is also worth mentioning that trading I/O by CPU using compression is beneficial with modern CPU's.\\n\\nBut, as an alternative to running multiple slaves, you could try having just one slave per box, and each slave should be able to use all of your available cores, for activities that can take advantage of them. Or even run a handful of slaves per node and let Thor know how many cores per slave it should use.\\n\\nJake and Richard would be better than me to provide information about the best specific configuration for a particular architecture, but if you have some time, these tests may render interesting results.\\n\\nFlavio\", \"post_time\": \"2012-12-18 16:04:02\" },\n\t{ \"post_id\": 3018, \"topic_id\": 659, \"forum_id\": 16, \"post_subject\": \"Re: Multiple slaves per node and disk configuration\", \"username\": \"sbagaria\", \"post_text\": \"Hi Flavio!!\\n\\nThank you very much for your very erudite reply. We are currently using a 3GHz 24-core, 6xRAID6 configuration. To assess the impact of disk I/O on my runtime, I tried reading a 80 GB (100 M rows) CSV file (sprayed for 10 slaves on 3 nodes each) and counting the number of unique values in one of the fields (sort -> dedup -> count).\\n\\nWhen the raw file was sprayed as compressed (compression ratio obtained was 1:3), I had a runtime of 1m24s. When uncompressed, I had a runtime of 1m34s. A simple count operation also showed a difference of 10 seconds.\\n\\nSo 10 slaves on these machines are actually proving to be CPU bound when my files are compressed. Since most disk spills are compressed (sort will use compressed disk spills from 3.10), if I increase the number of slaves, I expect better overall performance. Does this make sense?\", \"post_time\": \"2012-12-18 15:05:58\" },\n\t{ \"post_id\": 3017, \"topic_id\": 659, \"forum_id\": 16, \"post_subject\": \"Re: Multiple slaves per node and disk configuration\", \"username\": \"flavio\", \"post_text\": \"Sid, you are absolutely right in your assessment. If you configure multiple slaves per node, each slave will be performing sequential reads and writes of its own file parts, but as these I/O requests interleave, the storage system will be effectively executing more random seeks than if you had a single slave. \\n\\nIf you are planning on having more than a handful of slaves per node, you should seriously consider using SAS drives (for example 10K RPM 2.5' 900GB Seagate SAS drives) instead of SATA drives (for Thor clusters running just one or two slaves per node, 7200RPM 3.5' 3TB drives are fine and very cost effective). And if you have determined that you will utilize more than just a couple of drives per node, a RAID 5 container with an interleave of 1MB or so should help spread the disk activity and make storage performance more predictable, while helping cope with a single drive failure without interrupting the job being executed (a RAID 0 would work better from a performance standpoint, but it may increase the rate of job disruptive failures significantly). If you decide to go the RAID 5 route, I do recommend that you ensure that your controller has write-back enabled (it will probably need a battery), otherwise your write performance may be less than optimal.\\n\\nSolid State Drives are still not at a price/capacity point where they can be considered as the main storage for Thor. While there could be some use for them as an add-on to the main storage (like, for example, when you have a number of small files that you regularly perform lookup joins against), they wouldn't be my first choice to replace mechanical hard drives in Thor just yet (Roxie is a whole different place, though).\\n\\nPlease let me know if this helps.\\n\\nThanks,\\n\\nFlavio\", \"post_time\": \"2012-12-18 14:41:31\" },\n\t{ \"post_id\": 3015, \"topic_id\": 659, \"forum_id\": 16, \"post_subject\": \"Multiple slaves per node and disk configuration\", \"username\": \"sbagaria\", \"post_text\": \"Hi!\\n\\nSo I have been debating this question and I have asked a few people about it. But better to put it on the forum so that others can be aware of it too.\\n\\nMy understanding is that when we configure multiple slaves per node, all of these perform I/O from the file system at the same time, i.e. reading/writing the part files or spilling the temporary files at the same time. This means that if there are 12 slaves per node, I am essentially running at least 12 concurrent I/O threads. This has an impact on the throughput of my disk.\\n\\nIf I am using SSDs or if my sequential throughput on the HDD is good with a large buffer limit, then these are less of a concern, specifically when the files are compressed. But what would you recommend in a typical setting. I understand that the compression algorithm (or an absence of one) will have an impact. So let's consider a scenario where I have to read a file and sort on one field. Right now, version 3.8.6-4 does not spill compressed files when sorting.\\n\\nWhat should be my ideal disk configuration (in terms of striped RAID sets, SSDs, etc.) when the large (sprayed) raw file is compressed and when it is not compressed.\\n\\nSid\", \"post_time\": \"2012-12-17 11:28:25\" },\n\t{ \"post_id\": 3342, \"topic_id\": 740, \"forum_id\": 16, \"post_subject\": \"Re: dfuplus action=dkc\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nThe dkc action is ancient (about 10 years, I would guesstimate), back to the days when Thor could build indexes but not use them. Back then we used a 400-node Thor to build our index files (401 file parts), then the dkc action would despray and properly "stitch together" the file parts into a usable single-file index on the dropzone so the index could be used in a Unix Sun/EMC environment.\\n\\nIt probably should be removed from the docs. I'll alert the proper authorities.
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-02-05 19:24:01\" },\n\t{ \"post_id\": 3338, \"topic_id\": 740, \"forum_id\": 16, \"post_subject\": \"dfuplus action=dkc\", \"username\": \"DSC\", \"post_text\": \"I stumbled across the dkc action in dfuplus and tried to play with it. For my troubles, I get an error message that seems to indicate that the option is not really available.\\n\\n
~: dfuplus action=dkc server=127.0.0.1 srcname='index::foo' dstip=127.0.0.1 dstfile=/tmp/index_foo\\n\\nUnknown dfuplus action
\\n\\nIs this option supposed to work? If it does, what actions can one take on the resulting file? (My hope is that it can be sprayed into a Roxie cluster in another environment.)\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2013-02-05 15:32:47\" },\n\t{ \"post_id\": 3453, \"topic_id\": 761, \"forum_id\": 16, \"post_subject\": \"Re: Workunits getting blocked/failed\", \"username\": \"clo\", \"post_text\": \"Hi,\\n\\nWhen you stop all the components, what command do you use to stop the system?\\n\\nI typically like to run service hpcc-init stop
.\\n\\nAfter you stop your system, can you verify that all the processes are stopped for the system by running a ps on the nodes?\\n\\nJust to help diagnose, what do the thorslave logs say during this time?\\n\\n- Chris\", \"post_time\": \"2013-02-13 19:18:49\" },\n\t{ \"post_id\": 3441, \"topic_id\": 761, \"forum_id\": 16, \"post_subject\": \"Workunits getting blocked/failed\", \"username\": \"kaliyugantagonist\", \"post_text\": \"Hello,\\n\\nWe are using a 4-node HPCC cluster - the partial config is shown in Cluster.JPG\\n\\nWe have been using this cluster since two weeks without even any hiccups and we have not changed any configuration since the installation. But suddenly :\\nThe workunits started getting blocked
\\nThe status of the workunits got changed later to failed
\\n\\nPlease refer the attached images.\\n\\nWhen we restart the cluster/services on each node, for some time, the cluster functions smoothly but then again the same issue - some blocking/locking issue I believe.\\n\\nThanks and regards !\", \"post_time\": \"2013-02-13 12:52:50\" },\n\t{ \"post_id\": 3887, \"topic_id\": 860, \"forum_id\": 16, \"post_subject\": \"Re: Availability, Backup & Recovery - basic queries\", \"username\": \"ultima_centauri\", \"post_text\": \"You might be able to find some info in here:\\n\\nhttp://hpccsystems.com/bb/viewtopic.php?f=8&t=859&sid=4f155baf9842fc3e6ebcca0f31d4e8de\", \"post_time\": \"2013-04-05 00:12:49\" },\n\t{ \"post_id\": 3830, \"topic_id\": 860, \"forum_id\": 16, \"post_subject\": \"Availability, Backup & Recovery - basic queries\", \"username\": \"kaliyugantagonist\", \"post_text\": \"Hi,\\n\\nWe have a 4-node HPCC cluster running. \\nThe Thor data includes several super-files with large no. of sub-files. There are payload INDEXes built on super-files and super-keys using these
\\nThe Roxie cluster has queries that use the super-keys
\\nThe payload INDEXes from Thor are sent to Roxie often using packagemap
\\nA web application accesses the Roxie queries via a WS client
\\n\\nGiven the usage scenario, we are trying to figure out the sturdiest back-up strategy. Some scenarios are as follows :\\nThe machine hosting the Thor master goes down.\\nIn the advanced configuration, the Thor cluster screen has two tabs - SwapNode and Backup. I could assume only one thing - a machine can be used as a passive Thor master which will be updated continuously by the active Thor master. But where to put its IP, is the swapping manual or auto and many such queries are unanswered !\\n
\\nAll the machines of the Thor cluster go down/segment containing Thor cluster machines is unreachable - how to make available another Thor cluster without manually doing anything?
\\nThe machine hosting the Dali server goes down.\\nIn the advanced configuration, the Dali cluster screen has a tab named 'Backup' which shows a backupComputer field. Is this the machine which will act as the Dali backup? How and where to add this machine's IP?
\\nThe machine(s) hosting the Roxie cluster go down.\\nThe advanced configuration doesn't provide anything pertaining to backup for Roxie.\\n
\\n\\nI referred to the documentation(only installation guide is relevant to a small extent) and the threads below :\\n\\nhttp://hpccsystems.com/bb/viewtopic.php?f=14&t=634\\n\\nhttp://hpccsystems.com/bb/viewtopic.php?f=15&t=321\\n\\nWhile I have mentioned a few scenarios above, there can be many unforeseen ! The core questions are :\\n\\nIs the 'hot-swap' possible in some/all components of an HPCC cluster ?
\\nWhat are file types/formats in which a component is backed-up and which are the scripts that must be used on these files to restore the cluster? Ex. /opt/HPCCSystems/bin/start_backupnode
\\nWhere can we find the official guide to the back-up and recovery?
\\n\\nThanks and regards !\", \"post_time\": \"2013-03-26 08:34:13\" },\n\t{ \"post_id\": 3917, \"topic_id\": 865, \"forum_id\": 16, \"post_subject\": \"Re: Intermittent long despray operation\", \"username\": \"DSC\", \"post_text\": \"Here is a slightly clearer version of the timestamp information I tried to supply yesterday. This time I also made absolutely sure that all clocks were synchronized to within a tenth of a second of each other.\\n\\nFirst, the warnings that appear in dfuserver.log:\\n\\n0000727D 2013-04-11 07:33:29 50016 50028 "Gathering 180 file sizes on 13 threads"\\n0000727E 2013-04-11 07:33:39 50016 50028 "WARNING: Waiting for file: //10.210.40.11/var/lib/HPCCSystems/hpcc-data/thor/tmp/id_bar_records_by_industry_report._180_of_180"\\n0000727F 2013-04-11 07:33:49 50016 50028 "WARNING: Waiting for file: //10.210.40.19/var/lib/HPCCSystems/hpcc-data/thor/tmp/id_bar_records_by_industry_report._179_of_180"\\n00007280 2013-04-11 07:33:59 50016 50028 "WARNING: Waiting for file: //10.210.40.18/var/lib/HPCCSystems/hpcc-data/thor/tmp/id_bar_records_by_industry_report._178_of_180"\\n00007281 2013-04-11 07:34:09 50016 50028 "WARNING: Waiting for file: //10.210.40.13/var/lib/HPCCSystems/hpcc-data/thor/tmp/id_bar_records_by_industry_report._164_of_180"\\n00007282 2013-04-11 07:34:19 50016 50028 "WARNING: Waiting for file: //10.210.40.18/var/lib/HPCCSystems/hpcc-data/thor/tmp/id_bar_records_by_industry_report._160_of_180"\\n00007283 2013-04-11 07:34:29 50016 50028 "WARNING: Waiting for file: //10.210.40.12/var/lib/HPCCSystems/hpcc-data/thor/tmp/id_bar_records_by_industry_report._163_of_180"\\n00007284 2013-04-11 07:34:37 50016 50028 "Finished gathering file sizes..."
\\nNext, the corresponding connection logs from the slave nodes. These are sorted by timestamps and the node's IP address is appended to each line to make it easier to match up with the warning entries.\\n\\n2013-04-11 07:33:33 22185 44712 "Connect from 10.210.40.10:33767" 10.210.40.11\\n2013-04-11 07:33:36 63375 25048 "Connect from 10.210.40.10:37677" 10.210.40.19\\n2013-04-11 07:33:39 60097 18614 "Connect from 10.210.40.10:56286" 10.210.40.18\\n2013-04-11 07:33:43 26667 49692 "Connect from 10.210.40.10:39802" 10.210.40.17\\n2013-04-11 07:33:47 16627 39653 "Connect from 10.210.40.10:49578" 10.210.40.15\\n2013-04-11 07:33:52 40007 63390 "Connect from 10.210.40.10:52135" 10.210.40.16\\n2013-04-11 07:33:55 36284 59662 "Connect from 10.210.40.10:55171" 10.210.40.14\\n2013-04-11 07:33:56 4149 29054 "Connect from 10.210.40.10:45142" 10.210.40.13\\n2013-04-11 07:33:57 12046 34983 "Connect from 10.210.40.10:44332" 10.210.40.12\\n2013-04-11 07:33:58 22185 44712 "Connect from 10.210.40.10:33779" 10.210.40.11\\n2013-04-11 07:34:02 63375 25048 "Connect from 10.210.40.10:37689" 10.210.40.19\\n2013-04-11 07:34:03 60097 18614 "Connect from 10.210.40.10:56297" 10.210.40.18\\n2013-04-11 07:34:09 26667 50654 "Connect from 10.210.40.10:39814" 10.210.40.17\\n2013-04-11 07:34:10 40007 63390 "Connect from 10.210.40.10:52145" 10.210.40.16\\n2013-04-11 07:34:15 16627 39653 "Connect from 10.210.40.10:49590" 10.210.40.15\\n2013-04-11 07:34:17 36284 59662 "Connect from 10.210.40.10:55182" 10.210.40.14\\n2013-04-11 07:34:21 12046 34983 "Connect from 10.210.40.10:44342" 10.210.40.12\\n2013-04-11 07:34:22 63375 25048 "Connect from 10.210.40.10:37697" 10.210.40.19\\n2013-04-11 07:34:24 40007 63390 "Connect from 10.210.40.10:52150" 10.210.40.16\\n2013-04-11 07:34:28 26667 49692 "Connect from 10.210.40.10:39821" 10.210.40.17\\n2013-04-11 07:34:32 4149 29054 "Connect from 10.210.40.10:45158" 10.210.40.13\\n2013-04-11 07:34:35 36284 61189 "Connect from 10.210.40.10:55189" 10.210.40.14\\n2013-04-11 07:34:37 4149 29054 "Connect from 10.210.40.10:45160" 10.210.40.13\\n2013-04-11 07:35:00 4149 29054 "Connect from 10.210.40.10:45218" 10.210.40.13\\n2013-04-11 07:35:00 4149 29054 "Connect from 10.210.40.10:45236" 10.210.40.13\\n2013-04-11 07:35:00 12046 34983 "Connect from 10.210.40.10:44408" 10.210.40.12\\n2013-04-11 07:35:00 12046 34983 "Connect from 10.210.40.10:44419" 10.210.40.12\\n2013-04-11 07:35:00 12046 34983 "Connect from 10.210.40.10:44424" 10.210.40.12\\n2013-04-11 07:35:00 16627 39653 "Connect from 10.210.40.10:49665" 10.210.40.15\\n2013-04-11 07:35:00 16627 39653 "Connect from 10.210.40.10:49672" 10.210.40.15\\n2013-04-11 07:35:00 16627 39653 "Connect from 10.210.40.10:49673" 10.210.40.15\\n2013-04-11 07:35:00 22185 44712 "Connect from 10.210.40.10:33863" 10.210.40.11\\n2013-04-11 07:35:00 22185 44712 "Connect from 10.210.40.10:33864" 10.210.40.11\\n2013-04-11 07:35:00 26667 49692 "Connect from 10.210.40.10:39886" 10.210.40.17\\n2013-04-11 07:35:00 26667 49692 "Connect from 10.210.40.10:39889" 10.210.40.17\\n2013-04-11 07:35:00 36284 59662 "Connect from 10.210.40.10:55250" 10.210.40.14\\n2013-04-11 07:35:00 36284 61189 "Connect from 10.210.40.10:55252" 10.210.40.14\\n2013-04-11 07:35:00 40007 63390 "Connect from 10.210.40.10:52212" 10.210.40.16\\n2013-04-11 07:35:00 40007 63390 "Connect from 10.210.40.10:52226" 10.210.40.16\\n2013-04-11 07:35:00 60097 18614 "Connect from 10.210.40.10:56375" 10.210.40.18\\n2013-04-11 07:35:00 60097 18614 "Connect from 10.210.40.10:56377" 10.210.40.18\\n2013-04-11 07:35:00 63375 25048 "Connect from 10.210.40.10:37766" 10.210.40.19\\n2013-04-11 07:35:00 63375 25048 "Connect from 10.210.40.10:37775" 10.210.40.19
\\nAnd just to prove that I'm not crazy, I submitted the job again and here is the snippet from dfuserver.log:\\n\\n0000759A 2013-04-11 07:44:36 50016 50028 "Gathering 180 file sizes on 13 threads"\\n0000759B 2013-04-11 07:44:36 50016 50028 "Finished gathering file sizes..."
\\nJust from looking at these timestamps, I would guess that the node running dfuserver is the source of the problems, whatever those are. Most telling is the delay between the "Gathering..." log entry and the first connection entry from a slave node. The dfuserver node is a decent hardware node running everything except Thor and Roxie. It's frankly bored most of the time.\\n\\nDan\", \"post_time\": \"2013-04-11 12:58:12\" },\n\t{ \"post_id\": 3911, \"topic_id\": 865, \"forum_id\": 16, \"post_subject\": \"Re: Intermittent long despray operation\", \"username\": \"DSC\", \"post_text\": \"Thanks, Jake and Richard. I did some checking along your suggestions.\\n\\nThe drives aren't configured to spin down, and I doubt they would be idle at this point even if they were so configured. The warning I saw was at the tail end of a Thor process that exercised all the nodes and that process would spin them up, I would think. If nothing else, the warning was citing an attempted read of a temporary file, so that means the file would have been flushed to disk, forcing a spin up. (Probably.)\\n\\n/var/log/HPCCSystems/mydafilesrv files on the target nodes show only usual activity: Ping-style entries every 10 minutes, then what I think are normal entries showing this read activity. Specifically, here is an excerpt from the first slave node matching the activity I originally posted:\\n\\n00004D54 2013-04-01 09:08:09 22185 22186 "SYS: PU= 3% MU= 17% MAL=182656 MMP=0 SBK=182656 TOT=1320K RAM=8825280K SWP=19108K"\\n00004D55 2013-04-01 09:08:09 22185 22186 "DSK: [sda] r/s=0.0 kr/s=0.0 w/s=2.2 kw/s=63.6 bsy=0 NIC: rxp/s=180.2 rxk/s=130.4 txp/s=134.8 txk/s=19.7 CPU: usr=2 sys=0 iow=0 idle=96"\\n00004D56 2013-04-01 09:10:09 22185 44712 "Connect from 10.210.40.10:48526"\\n00004D57 2013-04-01 09:10:51 22185 44712 "Connect from 10.210.40.10:48553"\\n00004D58 2013-04-01 09:11:08 22185 44712 "Connect from 10.210.40.10:48635"\\n00004D59 2013-04-01 09:11:08 22185 44712 "Connect from 10.210.40.10:48642"\\n00004D5A 2013-04-01 09:11:08 22185 45680 "Connect from 10.210.40.10:48645"\\n00004D5B 2013-04-01 09:18:09 22185 22186 "SYS: PU= 3% MU= 19% MAL=183360 MMP=0 SBK=183360 TOT=1320K RAM=9615812K SWP=19108K"\\n00004D5C 2013-04-01 09:18:09 22185 22186 "DSK: [sda] r/s=0.0 kr/s=0.8 w/s=2.3 kw/s=65.0 bsy=0 NIC: rxp/s=181.3 rxk/s=130.8 txp/s=137.2 txk/s=19.9 CPU: usr=2 sys=0 iow=0 idle=96"
\\nLooking at this got me thinking about the overall flow of getting remote file sizes. I extracted the "Connect from" log entries from all the slave nodes and sorted them according to log entry timestamp. In the process I discovered that my node-9 has a ntpd issue and its clock is off. I compensated for that in what follows:\\n\\n2013-04-01 09:09:47 60097 18614 "Connect from 10.210.40.10:42813" node-9\\n2013-04-01 09:10:09 22185 44712 "Connect from 10.210.40.10:48526" node-2\\n2013-04-01 09:10:10 60097 18614 "Connect from 10.210.40.10:42823" node-9\\n2013-04-01 09:10:11 63375 25048 "Connect from 10.210.40.10:52436" node-10\\n2013-04-01 09:10:16 26667 49692 "Connect from 10.210.40.10:54560" node-8\\n2013-04-01 09:10:22 40007 63390 "Connect from 10.210.40.10:38660" node-7\\n2013-04-01 09:10:23 16627 39653 "Connect from 10.210.40.10:36105" node-6\\n2013-04-01 09:10:28 36284 59662 "Connect from 10.210.40.10:41697" node-5\\n2013-04-01 09:10:32 4149 29054 "Connect from 10.210.40.10:59902" node-4\\n2013-04-01 09:10:36 60097 18614 "Connect from 10.210.40.10:42925" node-9\\n2013-04-01 09:10:36 60097 18614 "Connect from 10.210.40.10:42937" node-9\\n2013-04-01 09:10:38 12046 34983 "Connect from 10.210.40.10:59092" node-3\\n2013-04-01 09:10:41 63375 25048 "Connect from 10.210.40.10:52448" node-10\\n2013-04-01 09:10:46 26667 49692 "Connect from 10.210.40.10:54572" node-8\\n2013-04-01 09:10:47 36284 59662 "Connect from 10.210.40.10:41705" node-5\\n2013-04-01 09:10:51 22185 44712 "Connect from 10.210.40.10:48553" node-2\\n2013-04-01 09:10:53 40007 63390 "Connect from 10.210.40.10:38681" node-7\\n2013-04-01 09:10:56 16627 39653 "Connect from 10.210.40.10:36126" node-6\\n2013-04-01 09:10:59 4149 29054 "Connect from 10.210.40.10:59921" node-4\\n2013-04-01 09:11:02 12046 34983 "Connect from 10.210.40.10:59112" node-3\\n2013-04-01 09:11:08 4149 29054 "Connect from 10.210.40.10:60005" node-4\\n2013-04-01 09:11:08 4149 29054 "Connect from 10.210.40.10:60017" node-4\\n2013-04-01 09:11:08 12046 34983 "Connect from 10.210.40.10:59195" node-3\\n2013-04-01 09:11:08 12046 34983 "Connect from 10.210.40.10:59198" node-3\\n2013-04-01 09:11:08 16627 39653 "Connect from 10.210.40.10:36207" node-6\\n2013-04-01 09:11:08 16627 39653 "Connect from 10.210.40.10:36210" node-6\\n2013-04-01 09:11:08 22185 44712 "Connect from 10.210.40.10:48635" node-2\\n2013-04-01 09:11:08 22185 44712 "Connect from 10.210.40.10:48642" node-2\\n2013-04-01 09:11:08 22185 45680 "Connect from 10.210.40.10:48645" node-2\\n2013-04-01 09:11:08 26667 49692 "Connect from 10.210.40.10:54676" node-8\\n2013-04-01 09:11:08 26667 49692 "Connect from 10.210.40.10:54678" node-8\\n2013-04-01 09:11:08 26667 49692 "Connect from 10.210.40.10:54679" node-8\\n2013-04-01 09:11:08 26667 49692 "Connect from 10.210.40.10:54681" node-8\\n2013-04-01 09:11:08 36284 59662 "Connect from 10.210.40.10:41799" node-5\\n2013-04-01 09:11:08 36284 59662 "Connect from 10.210.40.10:41809" node-5\\n2013-04-01 09:11:08 36284 59662 "Connect from 10.210.40.10:41812" node-5\\n2013-04-01 09:11:08 40007 63390 "Connect from 10.210.40.10:38761" node-7\\n2013-04-01 09:11:08 40007 63390 "Connect from 10.210.40.10:38780" node-7\\n2013-04-01 09:11:08 40007 64354 "Connect from 10.210.40.10:38781" node-7\\n2013-04-01 09:11:08 63375 25048 "Connect from 10.210.40.10:52541" node-10\\n2013-04-01 09:11:08 63375 25048 "Connect from 10.210.40.10:52546" node-10\\n
\\nIf find this timestamp pattern interesting, if not particularly informative, when you consider that 13 threads were supposedly running simultaneously.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-04-10 12:58:13\" },\n\t{ \"post_id\": 3910, \"topic_id\": 865, \"forum_id\": 16, \"post_subject\": \"Re: Intermittent long despray operation\", \"username\": \"jsmith\", \"post_text\": \"It also might be worth checking the DaFileSrv logs on the target nodes at the corresponding times, to see if any warnings there.\", \"post_time\": \"2013-04-10 08:58:13\" },\n\t{ \"post_id\": 3909, \"topic_id\": 865, \"forum_id\": 16, \"post_subject\": \"Re: Intermittent long despray operation\", \"username\": \"richardkchapman\", \"post_text\": \"It's not possible that your disks have spun down is it?\", \"post_time\": \"2013-04-10 07:53:06\" },\n\t{ \"post_id\": 3907, \"topic_id\": 865, \"forum_id\": 16, \"post_subject\": \"Re: Intermittent long despray operation\", \"username\": \"DSC\", \"post_text\": \"Any ideas?\\n\\nMy earlier assertion about the "first despray of the day exhibits the problem" appears to be true. Reliably, the first execution of the code in the morning coughs up the warnings and subsequent executions display no warnings.\\n\\nDan\", \"post_time\": \"2013-04-09 18:59:41\" },\n\t{ \"post_id\": 3861, \"topic_id\": 865, \"forum_id\": 16, \"post_subject\": \"Intermittent long despray operation\", \"username\": \"DSC\", \"post_text\": \"I have some code that desprays a small recordset in XML format to my cluster's landing zone. The recordset is very small -- the resulting XML is 30K -- but it's the result of a 180-way Thor process running across nine physical nodes. Usually, the despray process takes only a second or two to complete. Intermittently, it takes much longer (1-2 minutes).\\n\\nHere is an excerpt from /var/log/HPCCSystems/mydfuserver/dfuserver.log:\\n\\n0000220F 2013-04-01 09:10:07 57462 57474 "Gathering 180 file sizes on 13 threads"\\n00002210 2013-04-01 09:10:17 57462 57474 "WARNING: Waiting for file: //10.210.40.11/var/lib/HPCCSystems/hpcc-data/thor/tmp/id_bar_presence_of_data_rep\\nort._180_of_180"\\n00002211 2013-04-01 09:10:27 57462 57474 "WARNING: Waiting for file: //10.210.40.19/var/lib/HPCCSystems/hpcc-data/thor/tmp/id_bar_presence_of_data_rep\\nort._179_of_180"\\n00002212 2013-04-01 09:10:37 57462 57474 "WARNING: Waiting for file: //10.210.40.18/var/lib/HPCCSystems/hpcc-data/thor/tmp/id_bar_presence_of_data_rep\\nort._178_of_180"\\n00002213 2013-04-01 09:10:47 57462 57474 "WARNING: Waiting for file: //10.210.40.15/var/lib/HPCCSystems/hpcc-data/thor/tmp/id_bar_presence_of_data_rep\\nort._166_of_180"\\n00002214 2013-04-01 09:10:57 57462 57474 "WARNING: Waiting for file: //10.210.40.11/var/lib/HPCCSystems/hpcc-data/thor/tmp/id_bar_presence_of_data_rep\\nort._162_of_180"\\n00002215 2013-04-01 09:11:02 57462 57474 "Finished gathering file sizes..."
\\nThese warnings, with the associated 10-second delay, appears to be the source of the intermittent problem.\\n\\nAnecdotally, it seems that the delay pops up whenever the despray operation has not run recently. If I turn around and run this process again, it will typically fly right along.\\n\\nHow can I debug this issue? Any recommendations on perhaps a configuration parameter I could change? I noticed in the log that there are 13 threads allocated to gathering file sizes. Can that be increased? Or is that the wrong thing to be looking at?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2013-04-01 14:46:45\" },\n\t{ \"post_id\": 3900, \"topic_id\": 871, \"forum_id\": 16, \"post_subject\": \"Re: Channel 1 blocked by flow control\", \"username\": \"sbagaria\", \"post_text\": \"I suppose it was bad ECL. I am running a 1 node roxie server with all files background copied to the roxie node. In my ECL job submitted through wsECL, there is a transform which performs 5 index lookups (through an index filter inside the transform) per row. This transform is then applied to a dataset at least a few thousand rows in size. This must be generating the high number of roxie agent requests.\\n\\nAll other queries are working fine. I have since fixed the query to be more optimized by using keyed joins instead of the lookups inside a transform.\", \"post_time\": \"2013-04-08 20:27:09\" },\n\t{ \"post_id\": 3899, \"topic_id\": 871, \"forum_id\": 16, \"post_subject\": \"Re: Channel 1 blocked by flow control\", \"username\": \"sort\", \"post_text\": \"This error is caused by the udp layer on the given channel not responding fast enough. This can happen for a few reasons:\\n1. All slave nodes are not running causing some nodes to do more than others\\n2. Misconfiguration of the udp layer. Please make sure that the multicast ip settings in the environment.xml file are unique across different roxie clusters\\n3. Bad ecl causing too many roxie agent requests.\\n\\nHow are you running your roxie queries - i.e. are you sending requests directly to roxie, or are you going through an esp service like wsecl.\\n\\nYou can try to disable udp - go to configmgr, find the roxie component. Go to the upd tab and set RoxieMulticastEnabled to false\", \"post_time\": \"2013-04-08 20:10:51\" },\n\t{ \"post_id\": 3892, \"topic_id\": 871, \"forum_id\": 16, \"post_subject\": \"Channel 1 blocked by flow control\", \"username\": \"sbagaria\", \"post_text\": \"What does this error message mean? - \\n\\n000007EC 2013-04-07 23:36:30 6590 27098 "[10.38.251.4:9876{26}@3] Channel 1 blocked by flow control: uid=0x00000104 activityId=3 pri=LOW queryHash=d01275399d72f218 ch=1 seq=53 cont=0 server=10.38.251.4"\\n\\nI never saw this before and now all of a sudden these are showing up in my Roxie logs and my Roxie queries are failing (timeout).\\n\\nSid\", \"post_time\": \"2013-04-08 03:51:46\" },\n\t{ \"post_id\": 4097, \"topic_id\": 912, \"forum_id\": 16, \"post_subject\": \"Re: Multiple ECLAgent processes\", \"username\": \"william.whitehead@lexisnexis.com\", \"post_text\": \"Yes, you can look at the eclagent logfiles, which are normally located in /var/log/HPCCSystems/myeclagent. Feel free to attach it to this thread and I can have a look as well\", \"post_time\": \"2013-05-15 14:42:47\" },\n\t{ \"post_id\": 4094, \"topic_id\": 912, \"forum_id\": 16, \"post_subject\": \"Re: Multiple ECLAgent processes\", \"username\": \"sbagaria\", \"post_text\": \"Thanks for clarifying that. I don't have any active workunits running, so this may very well be zombie processes which failed to abort. BUT their CPU time has increased since yesterday (from 10 seconds to 11 seconds) which means they are lightweight processes still active and not showing up in the activity page. I don't have anything scheduled either, so they can not be polling threads used by the scheduler. Is there any logfile I can examine to check for active processes? I would like to know what these processes are doing.\", \"post_time\": \"2013-05-15 14:16:42\" },\n\t{ \"post_id\": 4093, \"topic_id\": 912, \"forum_id\": 16, \"post_subject\": \"Re: Multiple ECLAgent processes\", \"username\": \"william.whitehead@lexisnexis.com\", \"post_text\": \"Hello Sid, this could be normal. The way it works is that the agentexec process spawns an ECLAGENT instance for each active workunit (eithor THOR or HTHOR). When the workunits complete, the eclagent process terminates. So, if you open ECLWatch and look at the "Activity" or "Browse Workunits" screen, it is likely that you will see there are active workunits.\", \"post_time\": \"2013-05-15 14:10:46\" },\n\t{ \"post_id\": 4082, \"topic_id\": 912, \"forum_id\": 16, \"post_subject\": \"Multiple ECLAgent processes\", \"username\": \"sbagaria\", \"post_text\": \"Hi,\\n\\nI noticed that somehow my system has spun up many eclagent processes. Is this normal? My version is 3.10.4-1. A list of all running processes under the hpcc user:\\n\\n\\n PID TTY TIME CMD\\n 2384 ? 00:00:10 eclagent\\n 2673 ? 00:00:10 eclagent\\n 2697 ? 00:00:10 eclagent\\n 2834 ? 00:00:10 eclagent\\n 2846 ? 00:00:10 eclagent\\n 2982 ? 00:00:10 eclagent\\n 3033 ? 00:00:11 eclagent\\n 3183 ? 00:00:10 eclagent\\n 3326 ? 00:00:10 eclagent\\n 3476 ? 00:00:10 eclagent\\n 3607 ? 00:00:10 eclagent\\n 3760 ? 00:00:10 eclagent\\n 3931 ? 00:00:10 eclagent\\n 4149 ? 00:00:10 eclagent\\n 4302 ? 00:00:10 eclagent\\n 4451 ? 00:00:10 eclagent\\n 4614 ? 00:00:10 eclagent\\n 4777 ? 00:00:10 eclagent\\n 4924 ? 00:00:10 eclagent\\n 5074 ? 00:00:10 eclagent\\n 5230 ? 00:00:10 eclagent\\n 5369 ? 00:00:10 eclagent\\n 5528 ? 00:00:10 eclagent\\n 5667 ? 00:00:10 eclagent\\n 5838 ? 00:00:10 eclagent\\n 5980 ? 00:00:10 eclagent\\n 5992 ? 00:00:10 eclagent\\n 6125 ? 00:00:10 eclagent\\n 6137 ? 00:00:10 eclagent\\n 6274 ? 00:00:11 eclagent\\n 6286 ? 00:00:10 eclagent\\n 6812 ? 00:00:10 eclagent\\n 7335 ? 00:00:10 eclagent\\n 7857 ? 00:00:10 eclagent\\n10732 ? 00:00:00 init_dali\\n10764 ? 07:52:38 daserver\\n10872 ? 00:00:00 init_dfuserver\\n10905 ? 00:04:50 dfuserver\\n11017 ? 00:00:00 init_eclagent\\n11050 ? 00:00:08 agentexec\\n11151 ? 00:00:00 init_eclccserve\\n11183 ? 00:00:10 eclccserver\\n11284 ? 00:00:00 init_eclschedul\\n11316 ? 00:00:06 eclscheduler\\n11420 ? 00:00:00 init_esp\\n11452 ? 00:13:38 esp\\n11555 ? 00:00:00 init_roxie\\n11588 ? 13:52:41 roxie\\n11696 ? 00:00:00 init_sasha\\n11731 ? 00:00:42 saserver\\n11937 ? 00:00:00 init_thor\\n11971 ? 00:00:00 start_thor\\n12067 ? 00:00:00 run_thor\\n14767 ? 00:00:00 init_dafilesrv\\n14796 ? 00:26:20 dafilesrv\\n29073 ? 00:02:30 thormaster_myth\\n
\", \"post_time\": \"2013-05-14 15:18:55\" },\n\t{ \"post_id\": 4601, \"topic_id\": 1032, \"forum_id\": 16, \"post_subject\": \"Re: eclplus list option: need all workunits\", \"username\": \"DSC\", \"post_text\": \"That looks like a better tool to use anyway. Thanks!\\n\\nDan\", \"post_time\": \"2013-09-17 20:20:07\" },\n\t{ \"post_id\": 4600, \"topic_id\": 1032, \"forum_id\": 16, \"post_subject\": \"Re: eclplus list option: need all workunits\", \"username\": \"bforeman\", \"post_text\": \"Hi Dan,\\n\\nI received this information from Jake:\\n\\nDon't know about eclplus, but you could use daliadmin\\navailable in the package, and available on your path along with some other things if you do:\\n\\n# source /opt/HPCCSystems/sbin/hpcc_setenv\\n\\nyou'll see the syntax / usage if you just daliadmin on it's own, you can get it to list some or all with a wildcard etc.\\ne.g.\\n\\ndaliadmin <daliip> listworkunits @state=failed\\n\\n\\nI just noticed the <workunit-mask> in the usage is a bit misleading, I will fix that.\\n
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-09-17 20:18:26\" },\n\t{ \"post_id\": 4580, \"topic_id\": 1032, \"forum_id\": 16, \"post_subject\": \"eclplus list option: need all workunits\", \"username\": \"DSC\", \"post_text\": \"Issuing the following command on my management node works, but only shows the first 100 workunits:\\n\\neclplus action=list server=127.0.0.1
\\nIs there a way to force eclplus to show all the workunits? Better yet, can a filter be supplied for state (competed, compiled, etc.) or job name?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2013-09-16 19:03:34\" },\n\t{ \"post_id\": 5716, \"topic_id\": 1313, \"forum_id\": 16, \"post_subject\": \"Re: HPCC on Azure\", \"username\": \"bforeman\", \"post_text\": \"Hi David, \\n\\nI forgot to mention, there is also a Timings section for every workunit you submit. Just click on the ECL Watch tab and then look for the Timings link. The timings page is VERY detailed \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-05-19 15:16:20\" },\n\t{ \"post_id\": 5715, \"topic_id\": 1313, \"forum_id\": 16, \"post_subject\": \"Re: HPCC on Azure\", \"username\": \"David Dasher\", \"post_text\": \"Hi Bob\\n\\nMany thanks for your reply, I will take a good look.\\n\\nWe are mainly looking for a tool that can profile how fast the data is coming off disk for HPCC and also what kind of internode network speed between the Thor nodes.\\n\\nKind regards\\n\\nDavid\", \"post_time\": \"2014-05-19 15:10:05\" },\n\t{ \"post_id\": 5714, \"topic_id\": 1313, \"forum_id\": 16, \"post_subject\": \"Re: HPCC on Azure\", \"username\": \"bforeman\", \"post_text\": \"Hi David,\\n\\nI'm still checking with the HPCC development team, but on my level I just normally use the Graphs on each ECL process to identify any hot spots or skew. As long as your nodes are relatively homogeneous, there shouldn't be any significant speed difference between them. But you can also look at the cluster processes in the ECL watch Topology section and monitor the logs.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-05-19 13:54:45\" },\n\t{ \"post_id\": 5710, \"topic_id\": 1313, \"forum_id\": 16, \"post_subject\": \"HPCC on Azure\", \"username\": \"David Dasher\", \"post_text\": \"Hello\\n\\nWe are running HPCC on Azure and I am wondering if anyone can recommend a way to test internode communication speeds on a multinode THOR cluster?\\n\\nWe need to run this from time to time to track if we are seeing any significant skews in terms of speed due to sudden changes in network speed. \\n\\nAny suggestions would be appreciated.\\n\\nDavid\", \"post_time\": \"2014-05-18 19:11:09\" },\n\t{ \"post_id\": 6579, \"topic_id\": 1507, \"forum_id\": 16, \"post_subject\": \"Re: Roxie Query Frequency (Platform 5)\", \"username\": \"David Dasher\", \"post_text\": \"Thanks Bob\\n\\nDavid\", \"post_time\": \"2014-11-07 18:38:46\" },\n\t{ \"post_id\": 6578, \"topic_id\": 1507, \"forum_id\": 16, \"post_subject\": \"Re: Roxie Query Frequency (Platform 5)\", \"username\": \"bforeman\", \"post_text\": \"Hi David,\\n\\nThe best tools for that are Ganglia and Nagios. \\n\\nhttp://hpccsystems.com/download/free-community-edition/monitoring\\n\\nhttp://hpccsystems.com/download/docs/hpcc-monitoring-and-reporting-technical-preview\\n\\nCheers,\\n\\nBob\", \"post_time\": \"2014-11-07 18:18:13\" },\n\t{ \"post_id\": 6577, \"topic_id\": 1507, \"forum_id\": 16, \"post_subject\": \"Roxie Query Frequency (Platform 5)\", \"username\": \"David Dasher\", \"post_text\": \"Hello\\n\\nIs there an easy way I can see the number of times each Roxie query is being hit in ECLWatch?\\n\\nKind regards\\n\\nDavid\", \"post_time\": \"2014-11-07 14:37:32\" },\n\t{ \"post_id\": 6662, \"topic_id\": 1534, \"forum_id\": 16, \"post_subject\": \"Re: Roxie response slow, lot's of errors in log\", \"username\": \"kevinLv\", \"post_text\": \"Thanks bforeman , Your post addressed why so many error message raised, yesterday this slave node is down automatically, but master node is alive.\\nCould you please help find the reason why his slave node automatically shut down several times a day from attached log? Our roxie cluster has 1 master and only 1 slave, it's in production environment, so often shut down is a big risk for us.\", \"post_time\": \"2014-12-04 06:23:53\" },\n\t{ \"post_id\": 6659, \"topic_id\": 1534, \"forum_id\": 16, \"post_subject\": \"Re: Roxie response slow, lot's of errors in log\", \"username\": \"bforeman\", \"post_text\": \"The [unregistered query] error is usually caused when the Roxie is getting a new data package deployment. \\n\\nI think its related to some nodes having Roxie up and responding to requests while other nodes are not yet responding. When we first started using OSS some people were running the "for" loop to start the cluster sequentially instead of concurrently. We received these errors when we started Roxie every time like that. Once we began running startup concurrently on every node we stopped getting these errors on startup. Since the deployment doesn’t finish at the same time on every node you can get these errors after almost every deployment. \\n\\nTry stopping and restarting the cluster concurrently.\\n\\nBob\", \"post_time\": \"2014-12-03 19:42:58\" },\n\t{ \"post_id\": 6656, \"topic_id\": 1534, \"forum_id\": 16, \"post_subject\": \"Roxie response slow, lot's of errors in log\", \"username\": \"kevinLv\", \"post_text\": \"Dear big guys,\\n Today we found roxie response is very slow, and a slave node is down, so we checked roxie.log and found lot's of errors about "Roxie slave received request for unregistered query".\\n Is it caused by code bug, or incorrect cluster configuration, lease give some ideas on how to read and fix it, thanks.\\n Roxie log is attached, I just keep the log tail because of upload size limitation.\\nBR,\\nKevin\", \"post_time\": \"2014-12-03 08:34:19\" },\n\t{ \"post_id\": 7458, \"topic_id\": 1698, \"forum_id\": 16, \"post_subject\": \"Re: Thor Cluster won't start\", \"username\": \"bforeman\", \"post_text\": \"Hi David,\\n\\nNot sure if this will help, but it sounds like a similar situation. \\n\\nhttp://hpccsystems.com/bb/viewtopic.php?f=15&t=1533&p=6666&hilit=thor+cluster+not+attached&sid=43e6605b36ed126210255392b876a434&sid=43e6605b36ed126210255392b876a434#p6666\\n\\nHope this helps,\\n\\nBob\", \"post_time\": \"2015-04-24 23:22:01\" },\n\t{ \"post_id\": 7452, \"topic_id\": 1698, \"forum_id\": 16, \"post_subject\": \"Thor Cluster won't start\", \"username\": \"David Dasher\", \"post_text\": \"Hi all\\n\\nMy thor cluster won't start and I can't spot what the issue is. On the activity page it shows\\n\\n"Cluster thor not attached; thor.thor: queue active;"\\n\\nI am including my environment and thormaster log.\\n\\nCan anyone please help?\\n\\nKind regards\\n\\nDavid\", \"post_time\": \"2015-04-24 19:58:38\" },\n\t{ \"post_id\": 7574, \"topic_id\": 1724, \"forum_id\": 16, \"post_subject\": \"Re: Error on 5.2\", \"username\": \"David Dasher\", \"post_text\": \"Hi\\n\\nThanks very much for the reply. \\n\\nIt does indeed include a distribute. I'll try the solution mentioned. \\n\\nThanks again\\n\\nDavid\", \"post_time\": \"2015-05-11 13:28:16\" },\n\t{ \"post_id\": 7573, \"topic_id\": 1724, \"forum_id\": 16, \"post_subject\": \"Re: Error on 5.2\", \"username\": \"jsmith\", \"post_text\": \"Hi David,\\n\\nspeculating, but this could be case of : https://track.hpccsystems.com/browse/HPCC-13477\\n\\nDoes the subgraph it occurred in (161) involve a distribute?\", \"post_time\": \"2015-05-11 13:20:35\" },\n\t{ \"post_id\": 7570, \"topic_id\": 1724, \"forum_id\": 16, \"post_subject\": \"Error on 5.2\", \"username\": \"David Dasher\", \"post_text\": \"Hello all\\n\\nWe are having some trouble with 5.2, can anyone help with this error?\\n\\nSystem error: -1: Graph[161], diskwrite[165]: SLAVE #15 [10.12.0.13:20300]: CMemoryBufferSerialStream::get read past end of stream (1300,40), \\n\\nKind regards\\n\\nDavid\", \"post_time\": \"2015-05-11 05:52:59\" },\n\t{ \"post_id\": 7697, \"topic_id\": 1752, \"forum_id\": 16, \"post_subject\": \"Re: Thor shutting down\", \"username\": \"rtaylor\", \"post_text\": \"David,\\n\\nOur Operations guys took a look at your logs and told me: "both logs point to 10.12.0.24 having problems...I would try and reboot that node ..if that does not correct the issue re-install the software"\\n\\nAnd replacing a node is covered in our Systems Administrators Guide (PDF downloadable here: http://hpccsystems.com/download/docs/installation-and-administration). I found it on page 90.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-06-01 13:51:53\" },\n\t{ \"post_id\": 7696, \"topic_id\": 1752, \"forum_id\": 16, \"post_subject\": \"Re: Thor shutting down\", \"username\": \"David Dasher\", \"post_text\": \"Hello\\n\\nCan someone tell us what might be causing the segfault? Slavelog attached.\\n\\nAlso, how do we replace a node with a new one?\\n\\nAre there instructions?\\n\\nKind regards\\n\\nDavid\", \"post_time\": \"2015-05-29 21:25:31\" },\n\t{ \"post_id\": 7695, \"topic_id\": 1752, \"forum_id\": 16, \"post_subject\": \"Thor shutting down\", \"username\": \"David Dasher\", \"post_text\": \"Hi\\n\\nWe seem to be having some issues with our Thor cluster. The system seems to be shutting down periodically. I have attached our thormaster Log. If somebody can help point me in the right direction it would be a great help.\\n\\nKind regards\\n\\nDavid\", \"post_time\": \"2015-05-29 20:00:11\" },\n\t{ \"post_id\": 8204, \"topic_id\": 1904, \"forum_id\": 16, \"post_subject\": \"Re: Can't delete superfile\", \"username\": \"alex\", \"post_text\": \"Thanks Bob/Fernando. That did the trick.\", \"post_time\": \"2015-09-24 14:59:37\" },\n\t{ \"post_id\": 8200, \"topic_id\": 1904, \"forum_id\": 16, \"post_subject\": \"Re: Can't delete superfile\", \"username\": \"bforeman\", \"post_text\": \"Hi Alex,\\n\\nTry the following command\\n[root@node010241020205 ~]# /opt/HPCCSystems/bin/daliadmin <dali ip> checksuperfile <superfilename>\\n\\nThe output will look like this if its good..\\n\\nSuperfile <superfilename> OK - contains 0 subfiles\\n\\nIf it comes back as corrupt run the same command but tack “fix=true” at the end\\n\\nEg.\\n\\n/opt/HPCCSystems/bin/daliadmin <dali ip> checksuperfile <superfilename> fix=true\\n\\n\\nUsage:\\n\\n[root@node010241020205 ~]# /opt/HPCCSystems/bin/daliadmin |grep check\\n checksuperfile <superfilename> [fix=true|false] -- check superfile links consistent and optionally fix\\n checksubfile <subfilename> -- check subfile links to parent consistent\\n [deletefiles=<true|false>]-- perform some checks on dali meta data an optionally fix or remove redundant info\\n\\nHTH,\\n\\nBob (via Fernando)\", \"post_time\": \"2015-09-24 14:42:08\" },\n\t{ \"post_id\": 8196, \"topic_id\": 1904, \"forum_id\": 16, \"post_subject\": \"Can't delete superfile\", \"username\": \"alex\", \"post_text\": \"When trying to delete a superfile, I get the following error:\\n\\n"Cannot delete <superfilename>:CDristributedSuperfile: Superfile <superfilename>: corrupt subfile file <subfilename> cannot be found\\n\\nThe subfile in this case was another superfile, if that matters. Seems like daliadmin is the way to go here, but when I do:\\n\\ndaliadmin <ip> delete <superfilename>\\n\\nI get "can't connect to <superfilename>", so I gather my syntax is wrong, but I can't figure out what's correct.\\n\\nHow do I get rid of this file?\", \"post_time\": \"2015-09-23 20:21:20\" },\n\t{ \"post_id\": 8674, \"topic_id\": 2024, \"forum_id\": 16, \"post_subject\": \"Re: Fileservices and 702 to HPCC\", \"username\": \"alex\", \"post_text\": \"Thanks Richard. \\n\\nhttps://track.hpccsystems.com/browse/HPCC-14569\", \"post_time\": \"2015-11-24 17:29:01\" },\n\t{ \"post_id\": 8670, \"topic_id\": 2024, \"forum_id\": 16, \"post_subject\": \"Re: Fileservices and 702 to HPCC\", \"username\": \"rtaylor\", \"post_text\": \"alex,\\n\\nThis should be reported through JIRA.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-11-24 17:19:24\" },\n\t{ \"post_id\": 8660, \"topic_id\": 2024, \"forum_id\": 16, \"post_subject\": \"Fileservices and 702 to HPCC\", \"username\": \"alex\", \"post_text\": \"We currently support a legacy 702 cluster in addition to HPCC systems running 5.0.2. and 5.2.4. We thought we would upgrade the 5.0 system to match the 5.2.4, but FileServices calls don't seem to behave properly when pointed at the 5.2.4 cluster.\\n\\nFileServices.LogicalFileList only returns empty datasets (even when we know the files exist) and SuperFileContents errors with "IPropertyTree: invalid tag name"\\n\\nShould I take from this that it would be unwise to change from version 5.0.2 as long as I have to support the 702 cluster?\", \"post_time\": \"2015-11-23 20:17:37\" },\n\t{ \"post_id\": 8844, \"topic_id\": 2050, \"forum_id\": 16, \"post_subject\": \"Re: HPCC Components - Service Types & Processes\", \"username\": \"amillar\", \"post_text\": \"Hi Ming and Gleb,\\n\\nThanks for getting back to me, I now understand the process a little bit better, and can confirm I have HPCC-INIT and DAFILESERV are being monitoring now via pulse way which great.\\n\\nI do want to implement some more sophisticated monitoring in the future and have been looking at Ganglia and Nagios. \\n\\nNow I have at least something in place it’s going to give me some breathing space to look into these solutions properly.\\n\\nThanks again\\n\\nAntony\", \"post_time\": \"2015-12-15 09:19:17\" },\n\t{ \"post_id\": 8834, \"topic_id\": 2050, \"forum_id\": 16, \"post_subject\": \"Re: HPCC Components - Service Types & Processes\", \"username\": \"Gleb Aronsky\", \"post_text\": \"You may want to examine our offerings for monitoring and alerting with Ganglia and Nagios.\\n\\nSome of the monitoring utilities released for Nagios maybe useful for Pulse monitoring. The utilities provided do some basic health checks on HPCC components. Other tools provided in the package generate configuration files from the HPCC environment.xml configuration file directly for Nagios. \\n\\nThe utility configgen, released as part of the platform can also list ports and configured nodes. Just check the command line help.\\n\\nYou can find monitoring/alerting docs here: https://hpccsystems.com/download/docume ... -technical\\n\\nAnd the source code for here:\\nhttps://github.com/hpcc-systems/nagios-monitoring\\nhttps://github.com/hpcc-systems/ganglia-monitoring\", \"post_time\": \"2015-12-11 19:57:02\" },\n\t{ \"post_id\": 8824, \"topic_id\": 2050, \"forum_id\": 16, \"post_subject\": \"Re: HPCC Components - Service Types & Processes\", \"username\": \"ming\", \"post_text\": \"HPCCSystem Platform uses SYSVINT for service management.\\nThe registered service are hpcc-init and dafilesrv. Majority components are managed through service hpcc-init or /opt/HPCCSystems/etc/init.d/hpcc-init hpcc-init doesn't stay as daemon on the system, instead it start a group of init-<component> daemon (defined under /opt/HPCCSystems/bin). These init-<component> process start/monitor/restart each component.\", \"post_time\": \"2015-12-11 19:26:20\" },\n\t{ \"post_id\": 8806, \"topic_id\": 2050, \"forum_id\": 16, \"post_subject\": \"HPCC Components - Service Types & Processes\", \"username\": \"amillar\", \"post_text\": \"Good Afternoon,\\n\\nWe have been using a monitoring service named pulse way on windows machines for a few years now and are very impressed with it. \\n\\nThis week I have tried implementing the monitoring agent on one of our Ubuntu 12.04 LTS boxes which are running hpccsystems-platform_community-5.0.2-1trusty_amd64\\n\\nI have pretty much configured it so it will tell me if drive space is low, CPU usage is high, free memory is low, whether the machine is off or on etc which is great\\n\\nbut I am having problems getting it to tell me of these HPCC components are running :\\n\\nmydafilesrv \\nmydali \\nmydfuserver \\nmyeclagent \\nmyeclccserver \\nmyeclscheduler \\nmyesp \\nmyroxie \\nmysasha \\nmythor \\n\\nI have been trying to establish initially if these are actually services rather than processes, can anyone give me anymore information on this?\\n\\nIt seems Ubuntu has three service types : \\n\\nSYSVINIT, UPSTART or SYSTEMD\\n\\nif they are services, does anyone know which type?\\n\\nThe agent is configured via XML, and to give you an example I can get the service SSH monitored like this :\\n\\n<Service Name="ssh" DisplayName="SSH Daemon" IsDaemon="true" DaemonType="UPSTART" Path="" StartParameters="" CanBeStopped="true" Enabled="true" />\\n\\nand the NTP process using this :\\n\\n<Service Name="ntpd" DisplayName="NTPD Process" IsDaemon="false" DaemonType="NONE" Path="/usr/sbin/ntpd" StartParameters="-p /var/run/ntp/ntpd.pid -g -u ntp:ntp -i /var/lib/ntp -c /etc/ntp.conf" CanBeStopped="true" Enabled="false" />\\n\\nbut any HPCC services, show up on the app as stopped, even when they are started so its probably a configuration error on my part.\\n\\nHere are some service variations I have tried without success :\\n\\n<Service Name="myesp" DisplayName="myesp" IsDaemon="true" DaemonType="UPSTART" Path="" StartParameters="" CanBeStopped="true" Enabled="true" />\\n<Service Name="myroxie" DisplayName="myroxie" IsDaemon="true" DaemonType="NONE" Path="" StartParameters="" CanBeStopped="true" Enabled="true" />\\n<Service Name="mysasha" DisplayName="mysasha" IsDaemon="true" DaemonType="SYSVINIT" Path="" StartParameters="" CanBeStopped="true" Enabled="true" />\\n<Service Name="mythor" DisplayName="mythor" IsDaemon="true" DaemonType="SYSTEMD" Path="" StartParameters="" CanBeStopped="true" Enabled="true" />\\n\\nI have done the same with processes e.g. \\n\\n<Service Name="mythor" DisplayName="mythor" IsDaemon="false" DaemonType="NONE" Path="" StartParameters="" CanBeStopped="true" Enabled="true" />\\n\\nThe only catch here is I am not passing any start-up parameters like in the NTP example I found.\\n\\nIf these HPCC components are processes, does anyone know what parameters I need to pass?\\n\\nI have had a good look around the internet but cannot find much information, and my Ubuntu knowledge is limited \\n\\nAny help would be greatly appreciated.\\n\\nThanks in advance\", \"post_time\": \"2015-12-10 16:46:08\" },\n\t{ \"post_id\": 9754, \"topic_id\": 2328, \"forum_id\": 16, \"post_subject\": \"Backup of workunit data\", \"username\": \"subba\", \"post_text\": \"Is there a way to backup workunit data before reconfiguring the cluster?\\n\\nI looked into /var/log/HPCCSystems and /opt/HPCCSystems. But the storage of workunit data does not seem to be as plain as it appears on the ECL watch.\\n\\nThanks in advance,\\nsubba\", \"post_time\": \"2016-06-14 05:53:43\" },\n\t{ \"post_id\": 9820, \"topic_id\": 2338, \"forum_id\": 16, \"post_subject\": \"Re: Missing dfu workunits\", \"username\": \"Puneet\", \"post_text\": \"Yes Richard, I searched for archived only workunits as well but there were no results.\\n\\nRegards,\\nPuneet\", \"post_time\": \"2016-06-24 06:20:36\" },\n\t{ \"post_id\": 9814, \"topic_id\": 2338, \"forum_id\": 16, \"post_subject\": \"Re: Missing dfu workunits\", \"username\": \"rtaylor\", \"post_text\": \"Puneet,\\n\\nIn the DFU Workunits list in ECL Watch, have you tried checking the "Archived Only" box on the Filter to see if there are any archived DFU workunits at all?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-06-23 08:21:58\" },\n\t{ \"post_id\": 9812, \"topic_id\": 2338, \"forum_id\": 16, \"post_subject\": \"Missing dfu workunits\", \"username\": \"Puneet\", \"post_text\": \"When looking at files which were sprayed a few months ago and trying to figure out the source .csvs for them, I noticed that the wuid visible when looking at the file details on the ECL watch no longer exists.\\n\\nI tried searching for the same workunit using both the dfuplus tool as well as through the ECL Watch UI but couldn't find any trace of it. \\n\\nOn further inspection, I found that none of the workunits over two months old could be found. Is there a configuration setting I'm missing somewhere that tells HPCC to delete old DFU workunits instead of archiving it?\\n\\nThis was on the community edition 5.4.2\\n\\nThanks in advance.\\n~Puneet\", \"post_time\": \"2016-06-23 05:27:37\" },\n\t{ \"post_id\": 20343, \"topic_id\": 2593, \"forum_id\": 16, \"post_subject\": \"Re: Installing Ganglia Ubuntu 14.04\", \"username\": \"KunalA\", \"post_text\": \"Hi Anthony,\\n\\nThe GangliaMonitoring-Plugin to ECLWatch is provided as a convenience, so that you have access to ganglia information without having to leave ECLWatch. It also enables a (red/yellow/green) indication light in the banner of ECLWatch. However, the Ganglia interface, may provide more features.\\n\\nThanks,\\nKunal\", \"post_time\": \"2017-12-19 18:30:44\" },\n\t{ \"post_id\": 20323, \"topic_id\": 2593, \"forum_id\": 16, \"post_subject\": \"Re: Installing Ganglia Ubuntu 14.04\", \"username\": \"amillar\", \"post_text\": \"Hi Kunal, \\n\\nThanks for your very quick response, it's really appreciated. \\n\\nHere's a little explanation of our current setup.\\n\\nWe currently have 4 Roxie clusters all being used by different external applications/clients.\\n\\nOn the Ganglia host we are running 4 different Gmond instances to act as the "Cluster" master for each of our Roxie Clusters. We are doing this by running gmond in Docker all running on different ports to separate them out between the different clusters.\\n\\nWe do this to keep all Ganglia data central to one host as we sometimes reload/upgrade hardware on our Roxie nodes.\\n\\nThese 4 different Ganglia instances running in Docker that then talk to the main Ganglia Master which runs another instance of Gmond and gmetad.\\n\\nMy Thoughts/Plan to enable the MultiNodes to work are as follows...\\n1. Install Ganglia gmond and HPCC Ganglia plugin on all nodes within the MultiNode.\\n2. Create a new Cluster for the MultiNode on the Ganglia Master host \\n3. Ensure that the Cluster gmond speaks to the Grid/Gmetad on the Ganglia Master host\\n4. Ensure that all nodes within the MultiNode speak to the newly created Cluster\\n\\nSo far, the same steps and process that has been followed for all previous instances.\\n\\nAs the gmond can send data to more than one host we would setup a gmetad instance on the Master node for the MultiNode. Once this is setup/configured we could then use ECL Watch on the Master node for the MultiNode to view all requests that the MultiNode is performing.\\n\\nOn another note:\\nAs we are using a single host for all of our Ganglia data, we do not have the data rendering in ECL Watch. We have the plugin installed on each of our nodes but do not use the frontend provided by ECL Watch. Are there any benefits/additional capabilities of using the ECL Watch frontend over the Ganglia Frontend interface we are currently using?\\n\\nThanks,\\nAntony\", \"post_time\": \"2017-12-19 09:56:44\" },\n\t{ \"post_id\": 20313, \"topic_id\": 2593, \"forum_id\": 16, \"post_subject\": \"Re: Installing Ganglia Ubuntu 14.04\", \"username\": \"KunalA\", \"post_text\": \"Hi Anthony, \\n\\nWe generally suggest to follow Ganglia's documentation for install. \\n\\nThat being said, Gmond is generally installed across all nodes, and Gmetad is installed on your master node. \\n\\nOnce, you have that configured per Ganglia's or your own systems requirements, the Ganglia-Monitoring Plugin provided by HPCC should work to display data in ECLWatch. \\n\\nThanks,\\nKunal\", \"post_time\": \"2017-12-18 16:27:46\" },\n\t{ \"post_id\": 20283, \"topic_id\": 2593, \"forum_id\": 16, \"post_subject\": \"Re: Installing Ganglia Ubuntu 14.04\", \"username\": \"amillar\", \"post_text\": \"Hi,\\n\\nWe've now got Ganglia Monitoring on all of our production our single-node Roxies.\\n\\nNow that we have this, we would like to roll ganglia out to our Multi-Node Roxies. \\n\\nWhat are the best practices for monitoring multi-nodes through Ganglia?\\n\\nDo we only need to install Ganglia on the node which clients hit (Master node of the Multi-Node cluster) or do we need to install this on all nodes within the multi-node cluster?\\n\\nThanks,\\nAntony\", \"post_time\": \"2017-12-18 15:14:22\" },\n\t{ \"post_id\": 12101, \"topic_id\": 2593, \"forum_id\": 16, \"post_subject\": \"Re: Installing Ganglia Ubuntu 14.04\", \"username\": \"amillar\", \"post_text\": \"Hi Gleb,\\n\\nApologies with the late response to your post, I unfortunately had to work on some other projects over the last few weeks.\\n\\nI haven’t had a chance to look into the remote mount approach yet, but will be getting back into this over the next few days.\\n\\nHowever, I have had a strange problem occur on one of our live Roxies that I have Ganglia installed on.\\n\\nAll of a sudden last week, the Ganglia service / Gmond was using up 100% CPU which caused the Roxie to become unresponsive.\\n\\nI have rebooted it, but the problem occurs pretty much straight away and nothing has been changed since we last spoke in August.\\n\\nI can get around the problem by running sudo service ganglia-monitor stop but would like to know if you have come across this before?\\n\\nI have been hunting around for log files that may shed some light on this but have found nothing at the moment, \\n\\nDo you know of any log files that may contain Gmond / Ganglia information?\\n\\nI have attached a screen shot of process list for you as well.\\n\\nBest Regards\\n\\nAntony\", \"post_time\": \"2016-10-18 11:51:46\" },\n\t{ \"post_id\": 11071, \"topic_id\": 2593, \"forum_id\": 16, \"post_subject\": \"Re: Installing Ganglia Ubuntu 14.04\", \"username\": \"Gleb Aronsky\", \"post_text\": \"Hi Antony,\\n\\nI would recommend that you install gmetad on any EclWatch server that is internal to the cluster that you want to view graphs on. In larger environments it would be expected to have multiple gmetad services running, often arranged in hierarchical structure. The gmetad service running internally in your cluster can be viewed as a convenient way to allow for metrics to be viewed from within EclWatch. The external monitoring node(s) likely wouldn’t have HPCC installed, and would use the ganglia web interface to surface metrics to the user, including the HPCC node and Roxie metrics. In our HPCC VM download you can view some basic HPCC customizations to the ganglia web interface.\\n\\nIf you do a remote mount approach, you will have to mount it to the default path for EclWatch to pull up the graphs (helpful link I found on remote mounts: https://www.digitalocean.com/community/ ... untu-14-04).\\n\\nThe esp.log would only contain entries relating to ganglia when initially binding the ws_rrd service at startup and when the ganglia graphs are generated for users in EclWatch. ws_rrd is the ESP service responsible for displaying ganglia graphs in EclWatch. Gmond and gmetad would have their own log files, but I found just examining the gmond and gemetad traffic helpful in debugging connectivity issues.\", \"post_time\": \"2016-08-31 14:50:44\" },\n\t{ \"post_id\": 11053, \"topic_id\": 2593, \"forum_id\": 16, \"post_subject\": \"Re: Installing Ganglia Ubuntu 14.04\", \"username\": \"amillar\", \"post_text\": \"Hi Gleb,\\n\\nSorry the late reply, \\n\\nWe have a test HPCC cluster here which is currently not doing anything data wise so I have made that the “external monitoring node” and that has – gmetad,RRDtool, Ganglia monitor and Ganglia web front end installed, by running this command \\n\\nsudo apt-get install -y ganglia-monitor rrdtool gmetad ganglia-webfrontend\\n\\nThe monitored Roxie cluster includes – two test roxie’s from my test cluster (I did this to practice the steps getting this set-up), and one live Roxie from my Live cluster (to test we are receiving some real metrics) each of these monitored nodes only has Ganglia Monitor installed by running this command:\\n\\nsudo apt-get install -y ganglia-monitor\\n\\nHowever, all machines have the HPCC monitoring agent installed by running these commands: \\n\\n1. sudo dpkg -i hpccsystems-ganglia-monitoring-5.2.2-1trusty_amd64.deb\\n2. sudo apt-get update\\n3. sudo apt-get install -f\\n4. sudo dpkg -i hpccsystems-ganglia-monitoring-5.2.2-1trusty_amd64.deb\\n5. sudo service ganglia-monitor restart && sudo service gmetad restart && sudo service apache2 restart (on master node)\\n6. . sudo service ganglia-monitor restart (on monitored nodes)\\n\\n\\nI was unware that ECL Watch is looking locally for the files, but that would explain all the graph errors, do you know the steps involved to remote mount to another machine on Ubuntu 14.04?\\n\\nOr is the preferred method to install Gmetad and change the config to point to the new RRDS file location?\\n\\n# Where gmetad stores its round-robin databases\\n# default: "/var/lib/ganglia/rrds"\\n# rrd_rootdir "/some/other/place"\\n\\nIf so could you send over an example?\\n\\nBelow is some of the ESP log file from the 22nd, I couldn't upload it all with it being close to 1mb, you will see a few calls from IP : 192.168.20.72 which is my PC I am using to connect to ECL watch.\\n\\n00000356 2016-08-22 14:26:49.907 12301 12306 "SYS: PU= 0% MU= 1% MAL=5406816 MMP=1564672 SBK=3842144 TOT=7316K RAM=707236K SWP=0K"\\n00000357 2016-08-22 14:26:49.907 12301 12306 "DSK: [sda] r/s=0.1 kr/s=1.5 w/s=6.4 kw/s=43.1 bsy=0 NIC: rxp/s=0.0 rxk/s=0.0 txp/s=0.0 txk/s=0.0 CPU: usr=0 sys=0 iow=0 idle=99"\\n00000358 2016-08-22 14:26:49.908 12301 12306 "KERN_UNKNOWN: <12>[10295.897389] init: tty4 main process (1227) killed by TERM signal"\\n00000359 2016-08-22 14:26:49.908 12301 12306 "KERN_UNKNOWN: <12>[10295.897661] init: tty5 main process (1230) killed by TERM signal"\\n0000035A 2016-08-22 14:26:49.908 12301 12306 "KERN_UNKNOWN: <12>[10295.897909] init: tty2 main process (1235) killed by TERM signal"\\n0000035B 2016-08-22 14:26:49.908 12301 12306 "KERN_UNKNOWN: <12>[10295.898168] init: tty3 main process (1236) killed by TERM signal"\\n0000035C 2016-08-22 14:26:49.908 12301 12306 "KERN_UNKNOWN: <12>[10295.898419] init: tty6 main process (1238) killed by TERM signal"\\n0000035D 2016-08-22 14:26:49.908 12301 12306 "KERN_UNKNOWN: <12>[10295.898665] init: cron main process (1279) killed by TERM signal"\\n0000035E 2016-08-22 14:26:49.908 12301 12306 "KERN_UNKNOWN: <12>[10295.899106] init: tty1 main process (3088) killed by TERM signal"\\n0000035F 2016-08-22 14:26:49.908 12301 12306 "KERN_UNKNOWN: <12>[10295.899985] init: irqbalance main process (27775) killed by TERM signal"\\n00000360 2016-08-22 14:26:49.908 12301 12306 "KERN_UNKNOWN: <12>[10295.900647] init: ganglia-monitor main process (25128) killed by TERM signal"\\n00000361 2016-08-22 14:26:49.908 12301 12306 "KERN_UNKNOWN: <12>[10295.900837] init: gmetad main process (25144) killed by TERM signal"\\n00000362 2016-08-22 14:26:49.908 12301 12306 "KERN_UNKNOWN: <12>[10295.901206] init: plymouth-upstart-bridge main process (14247) terminated with status 1"\\n00000363 2016-08-22 14:26:49.908 12301 12306 "KERN_UNKNOWN: <12>[10295.901219] init: plymouth-upstart-bridge main process ended, respawning"\\n00000364 2016-08-22 14:26:49.908 12301 12306 "KERN_UNKNOWN: <12>[10295.945201] init: plymouth-upstart-bridge main process (14272) terminated with status 1"\\n00000365 2016-08-22 14:26:49.908 12301 12306 "KERN_UNKNOWN: <12>[10295.945211] init: plymouth-upstart-bridge main process ended, respawning"\\n00000366 2016-08-22 14:26:49.908 12301 12306 "KERN_UNKNOWN: <12>[10295.958314] init: plymouth-upstart-bridge main process (14276) terminated with status 1"\\n00000367 2016-08-22 14:26:49.908 12301 12306 "KERN_UNKNOWN: <12>[10295.958327] init: plymouth-upstart-bridge main process ended, respawning"\\n00000368 2016-08-22 14:26:49.908 12301 12306 "KERN_UNKNOWN: <12>[10295.965898] init: plymouth-upstart-bridge main process (14279) terminated with status 1"\\n00000369 2016-08-22 14:26:49.908 12301 12306 "KERN_UNKNOWN: <12>[10295.965910] init: plymouth-upstart-bridge main process ended, respawning"\\n0000036A 2016-08-22 14:26:49.908 12301 12306 "KERN_UNKNOWN: <12>[10295.970521] init: plymouth-upstart-bridge main process (14281) terminated with status 1"\\n0000036B 2016-08-22 14:26:49.908 12301 12306 "KERN_UNKNOWN: <12>[10295.970533] init: plymouth-upstart-bridge main process ended, respawning"\\n0000036C 2016-08-22 14:26:49.908 12301 12306 "KERN_UNKNOWN: <12>[10295.973068] init: plymouth-upstart-bridge main process (14283) terminated with status 1"\\n0000036D 2016-08-22 14:26:49.908 12301 12306 "KERN_UNKNOWN: <12>[10295.973080] init: plymouth-upstart-bridge main process ended, respawning"\\n0000036E 2016-08-22 14:26:49.908 12301 12306 "KERN_UNKNOWN: <12>[10295.977678] init: plymouth-upstart-bridge main process (14285) terminated with status 1"\\n0000036F 2016-08-22 14:26:49.908 12301 12306 "KERN_UNKNOWN: <12>[10295.977692] init: plymouth-upstart-bridge main process ended, respawning"\\n00000370 2016-08-22 14:26:49.908 12301 12306 "KERN_UNKNOWN: <12>[10295.983208] init: plymouth-upstart-bridge main process (14288) terminated with status 1"\\n00000371 2016-08-22 14:26:49.908 12301 12306 "KERN_UNKNOWN: <12>[10295.983222] init: plymouth-upstart-bridge main process ended, respawning"\\n00000372 2016-08-22 14:26:49.908 12301 12306 "KERN_UNKNOWN: <12>[10295.990099] init: plymouth-upstart-bridge main process (14291) terminated with status 1"\\n00000373 2016-08-22 14:26:49.908 12301 12306 "KERN_UNKNOWN: <12>[10295.990112] init: plymouth-upstart-bridge main process ended, respawning"\\n00000374 2016-08-22 14:26:49.908 12301 12306 "KERN_UNKNOWN: <12>[10295.994984] init: plymouth-upstart-bridge main process (14293) terminated with status 1"\\n00000375 2016-08-22 14:26:49.908 12301 12306 "KERN_UNKNOWN: <12>[10295.994998] init: plymouth-upstart-bridge main process ended, respawning"\\n00000376 2016-08-22 14:26:49.908 12301 12306 "KERN_UNKNOWN: <12>[10296.000381] init: plymouth-upstart-bridge main process (14296) terminated with status 1"\\n00000377 2016-08-22 14:26:49.908 12301 12306 "KERN_UNKNOWN: <12>[10296.000395] init: plymouth-upstart-bridge respawning too fast, stopped"\\n00000378 2016-08-22 14:26:49.908 12301 12306 "KERN_UNKNOWN: <12>[10296.063247] init: wait-for-state (rcplymouth-shutdown) main process (14295) killed by TERM signal"\\n00000379 2016-08-22 14:26:50.785 12301 12301 "ESP Abort Handler..."\\n0000037A 2016-08-22 14:26:50.785 12301 12301 "select handler stopped."\\n00000001 2016-08-22 14:31:31.898 2280 2280 "Esp starting community_5.0.2-1"\\n00000002 2016-08-22 14:31:31.907 2280 2280 "componentfiles are under /opt/HPCCSystems/componentfiles"\\n00000003 2016-08-22 14:31:31.907 2280 2280 "ESP process name [myesp]"\\n00000004 2016-08-22 14:31:31.907 2280 2280 "Initializing DALI client [servers = 192.168.20.125:7070]"\\n00000005 2016-08-22 14:31:31.913 2280 2280 "Configuring Esp Platform..."\\n00000006 2016-08-22 14:31:31.913 2280 2280 "loadServices"\\n00000007 2016-08-22 14:31:32.139 2280 2280 "queueLabel=dfuserver_queue"\\n00000008 2016-08-22 14:31:32.139 2280 2280 "monitorQueueLabel=dfuserver_monitor_queue"\\n00000009 2016-08-22 14:31:32.139 2280 2280 "rootFolder=/c$/thordata"\\n0000000A 2016-08-22 14:31:32.292 2280 2280 "Initializing WsDfuXRef_EclWatch_myesp service [process = myesp]"\\n0000000B 2016-08-22 14:31:32.292 2280 2280 "Initializing WsDfu_EclWatch_myesp service [process = myesp]"\\n0000000C 2016-08-22 14:31:32.312 2280 2280 "Loaded DLL /opt/HPCCSystems/plugins/libpyembed.so"\\n0000000D 2016-08-22 14:31:32.312 2280 2280 "Current reported version is Python2.7 Embed Helper 1.0.0"\\n0000000E 2016-08-22 14:31:32.312 2280 2280 "Compatible version Python2.7 Embed Helper 1.0.0"\\n0000000F 2016-08-22 14:31:32.319 2280 2280 "Error loading /opt/HPCCSystems/plugins/libv8embed.so: libv8.so.3.14.5: cannot open shared object file: No such file or directory"\\n00000010 2016-08-22 14:31:32.319 2280 2280 "ERROR: 0: /var/lib/jenkins/workspace/CE-Candidate-with-plugins-5.0.2-1/CE/ubuntu-14.04-amd64/HPCC-Platform/common/dllserver/thorplugin.cpp(487) : Loading plugin : Failed to load plugin /opt/HPCCSystems/plugins/libv8embed.so"\\n00000011 2016-08-22 14:31:32.321 2280 2280 "Loaded DLL /opt/HPCCSystems/plugins/libworkunitservices.so"\\n00000012 2016-08-22 14:31:32.321 2280 2280 "Current reported version is WORKUNITSERVICES 1.0.1"\\n00000013 2016-08-22 14:31:32.321 2280 2280 "Compatible version WORKUNITSERVICES 1.0 "\\n00000014 2016-08-22 14:31:32.321 2280 2280 "Compatible version WORKUNITSERVICES 1.0.1"\\n00000015 2016-08-22 14:31:32.329 2280 2280 "Loaded DLL /opt/HPCCSystems/plugins/libauditlib.so"\\n00000016 2016-08-22 14:31:32.329 2280 2280 "Current reported version is AUDITLIB 1.0.1"\\n00000017 2016-08-22 14:31:32.329 2280 2280 "Compatible version AUDITLIB 1.0.0 [29933bc38c1f07bcf70f938ad18775c1]"\\n00000018 2016-08-22 14:31:32.329 2280 2280 "Compatible version AUDITLIB 1.0.1"\\n00000019 2016-08-22 14:31:32.424 2280 2280 "Loaded DLL /opt/HPCCSystems/plugins/libfileservices.so"\\n0000001A 2016-08-22 14:31:32.424 2280 2280 "Current reported version is FILESERVICES 2.1.3"\\n0000001B 2016-08-22 14:31:32.431 2280 2280 "Compatible version FILESERVICES 2.1 [a68789cfb01d00ef6dc362e52d5eac0e]"\\n0000001C 2016-08-22 14:31:32.431 2280 2280 "Compatible version FILESERVICES 2.1.1"\\n0000001D 2016-08-22 14:31:32.431 2280 2280 "Compatible version FILESERVICES 2.1.2"\\n0000001E 2016-08-22 14:31:32.431 2280 2280 "Compatible version FILESERVICES 2.1.3"\\n0000001F 2016-08-22 14:31:32.436 2280 2280 "Loaded DLL /opt/HPCCSystems/plugins/liblogging.so"\\n00000020 2016-08-22 14:31:32.436 2280 2280 "Current reported version is LOGGING 1.0.1"\\n00000021 2016-08-22 14:31:32.436 2280 2280 "Compatible version LOGGING 1.0.0 [66aec3fb4911ceda247c99d6a2a5944c]"\\n00000022 2016-08-22 14:31:32.436 2280 2280 "Compatible version LOGGING 1.0.1"\\n00000023 2016-08-22 14:31:32.444 2280 2280 "Error loading /opt/HPCCSystems/plugins/libRembed.so: libR.so: cannot open shared object file: No such file or directory"\\n00000024 2016-08-22 14:31:32.444 2280 2280 "ERROR: 0: /var/lib/jenkins/workspace/CE-Candidate-with-plugins-5.0.2-1/CE/ubuntu-14.04-amd64/HPCC-Platform/common/dllserver/thorplugin.cpp(487) : Loading plugin : Failed to load plugin /opt/HPCCSystems/plugins/libRembed.so"\\n00000025 2016-08-22 14:31:32.448 2280 2280 "Loaded DLL /opt/HPCCSystems/plugins/libdebugservices.so"\\n00000026 2016-08-22 14:31:32.448 2280 2280 "Current reported version is DEBUGSERVICES 1.0.1"\\n00000027 2016-08-22 14:31:32.449 2280 2280 "Loaded DLL /opt/HPCCSystems/plugins/libparselib.so"\\n00000028 2016-08-22 14:31:32.449 2280 2280 "Current reported version is PARSELIB 1.0.1"\\n00000029 2016-08-22 14:31:32.449 2280 2280 "Compatible version PARSELIB 1.0.0 [fa9b3ab8fad8e46d8c926015cbd39f06]"\\n0000002A 2016-08-22 14:31:32.449 2280 2280 "Compatible version PARSELIB 1.0.1"\\n0000002B 2016-08-22 14:31:32.452 2280 2280 "Loaded DLL /opt/HPCCSystems/plugins/libunicodelib.so"\\n0000002C 2016-08-22 14:31:32.452 2280 2280 "Current reported version is UNICODELIB 1.1.06"\\n0000002D 2016-08-22 14:31:32.452 2280 2280 "Compatible version UNICODELIB 1.1.01 [64d78857c1cecae15bd238cd7767b3c1]"\\n0000002E 2016-08-22 14:31:32.452 2280 2280 "Compatible version UNICODELIB 1.1.01 [e8790fe30d9627997749c3c4839b5957]"\\n0000002F 2016-08-22 14:31:32.452 2280 2280 "Compatible version UNICODELIB 1.1.02"\\n00000030 2016-08-22 14:31:32.452 2280 2280 "Compatible version UNICODELIB 1.1.03"\\n00000031 2016-08-22 14:31:32.452 2280 2280 "Compatible version UNICODELIB 1.1.04"\\n00000032 2016-08-22 14:31:32.452 2280 2280 "Compatible version UNICODELIB 1.1.05"\\n00000033 2016-08-22 14:31:32.453 2280 2280 "Loaded DLL /opt/HPCCSystems/plugins/libsqlite3embed.so"\\n00000034 2016-08-22 14:31:32.453 2280 2280 "Current reported version is SqLite3 Embed Helper 1.0.0"\\n00000035 2016-08-22 14:31:32.453 2280 2280 "Compatible version SqLite3 Embed Helper 1.0.0"\\n00000036 2016-08-22 14:31:32.461 2280 2280 "Loaded DLL /opt/HPCCSystems/plugins/libstringlib.so"\\n00000037 2016-08-22 14:31:32.461 2280 2280 "Current reported version is STRINGLIB 1.1.14"\\n00000038 2016-08-22 14:31:32.461 2280 2280 "Compatible version STRINGLIB 1.1.06 [fd997dc3feb4ca385d59a12b9dc4beab]"\\n00000039 2016-08-22 14:31:32.461 2280 2280 "Compatible version STRINGLIB 1.1.06 [f8305e66ca26a1447dee66d4a36d88dc]"\\n0000003A 2016-08-22 14:31:32.461 2280 2280 "Compatible version STRINGLIB 1.1.07"\\n0000003B 2016-08-22 14:31:32.461 2280 2280 "Compatible version STRINGLIB 1.1.08"\\n0000003C 2016-08-22 14:31:32.461 2280 2280 "Compatible version STRINGLIB 1.1.09"\\n0000003D 2016-08-22 14:31:32.461 2280 2280 "Compatible version STRINGLIB 1.1.10"\\n0000003E 2016-08-22 14:31:32.461 2280 2280 "Compatible version STRINGLIB 1.1.11"\\n0000003F 2016-08-22 14:31:32.461 2280 2280 "Compatible version STRINGLIB 1.1.12"\\n00000040 2016-08-22 14:31:32.461 2280 2280 "Compatible version STRINGLIB 1.1.13"\\n00000041 2016-08-22 14:31:32.470 2280 2280 "Error loading /opt/HPCCSystems/plugins/libjavaembed.so: libjvm.so: cannot open shared object file: No such file or directory"\\n00000042 2016-08-22 14:31:32.471 2280 2280 "ERROR: 0: /var/lib/jenkins/workspace/CE-Candidate-with-plugins-5.0.2-1/CE/ubuntu-14.04-amd64/HPCC-Platform/common/dllserver/thorplugin.cpp(487) : Loading plugin : Failed to load plugin /opt/HPCCSystems/plugins/libjavaembed.so"\\n00000043 2016-08-22 14:31:32.471 2280 2280 "Plugin /opt/HPCCSystems/plugins/libpyembed.so exports getECLPluginDefinition but does not export ECL - not loading"\\n00000044 2016-08-22 14:31:32.471 2280 2280 "Error loading /opt/HPCCSystems/plugins/libv8embed.so: libv8.so.3.14.5: cannot open shared object file: No such file or directory"\\n00000045 2016-08-22 14:31:32.471 2280 2280 "Loading plugin /opt/HPCCSystems/plugins/libworkunitservices.so[lib_WORKUNITSERVICES] version = WORKUNITSERVICES 1.0.1"\\n00000046 2016-08-22 14:31:32.471 2280 2280 "Loading plugin /opt/HPCCSystems/plugins/libauditlib.so[lib_auditlib] version = AUDITLIB 1.0.1"\\n00000047 2016-08-22 14:31:32.471 2280 2280 "Loading plugin /opt/HPCCSystems/plugins/libfileservices.so[lib_fileservices] version = FILESERVICES 2.1.3"\\n00000048 2016-08-22 14:31:32.471 2280 2280 "Loading plugin /opt/HPCCSystems/plugins/liblogging.so[lib_logging] version = LOGGING 1.0.1"\\n00000049 2016-08-22 14:31:32.471 2280 2280 "Error loading /opt/HPCCSystems/plugins/libRembed.so: libR.so: cannot open shared object file: No such file or directory"\\n0000004A 2016-08-22 14:31:32.471 2280 2280 "Loading plugin /opt/HPCCSystems/plugins/libdebugservices.so[lib_debugservices] version = DEBUGSERVICES 1.0.1"\\n0000004B 2016-08-22 14:31:32.471 2280 2280 "Loading plugin /opt/HPCCSystems/plugins/libparselib.so[lib_parselib] version = PARSELIB 1.0.1"\\n0000004C 2016-08-22 14:31:32.471 2280 2280 "Loading plugin /opt/HPCCSystems/plugins/libunicodelib.so[lib_unicodelib] version = UNICODELIB 1.1.06"\\n0000004D 2016-08-22 14:31:32.471 2280 2280 "Plugin /opt/HPCCSystems/plugins/libsqlite3embed.so exports getECLPluginDefinition but does not export ECL - not loading"\\n0000004E 2016-08-22 14:31:32.471 2280 2280 "Loading plugin /opt/HPCCSystems/plugins/libstringlib.so[lib_stringlib] version = STRINGLIB 1.1.14"\\n0000004F 2016-08-22 14:31:32.471 2280 2280 "Error loading /opt/HPCCSystems/plugins/libjavaembed.so: libjvm.so: cannot open shared object file: No such file or directory"\\n00000050 2016-08-22 14:31:33.015 2280 2280 "Initializing WsWorkunits_EclWatch_myesp service [process = myesp]"\\n00000051 2016-08-22 14:31:33.207 2280 2280 "CSmartSocketFactory::CSmartSocketFactory(192.168.20.125:9876)"\\n00000052 2016-08-22 14:31:33.254 2280 2280 "Load binding WsSMC_smc_myesp (type: ws_smcSoapBinding, process: myesp) succeeded"\\n00000053 2016-08-22 14:31:33.266 2280 2280 "Load binding WsWorkunits_smc_myesp (type: ws_workunitsSoapBinding, process: myesp) succeeded"\\n00000054 2016-08-22 14:31:33.278 2280 2280 "Load binding WsTopology_smc_myesp (type: ws_topologySoapBinding, process: myesp) succeeded"\\n00000055 2016-08-22 14:31:33.283 2280 2280 "Load binding WsDfu_smc_myesp (type: ws_dfuSoapBinding, process: myesp) succeeded"\\n00000056 2016-08-22 14:31:33.286 2280 2280 "Load binding WsDfuXRef_smc_myesp (type: ws_dfuxrefSoapBinding, process: myesp) succeeded"\\n00000057 2016-08-22 14:31:33.287 2280 2280 "Load binding ecldirect_smc_myesp (type: EclDirectSoapBinding, process: myesp) succeeded"\\n00000058 2016-08-22 14:31:33.296 2280 2280 "Load binding FileSpray_Serv_smc_myesp (type: FileSpray_Bind, process: myesp) succeeded"\\n00000059 2016-08-22 14:31:33.297 2280 2280 "Load binding WsFileIO_smc_myesp (type: WsFileIO, process: myesp) succeeded"\\n0000005A 2016-08-22 14:31:33.310 2280 2280 "Load binding WsPackageProcess_smc_myesp (type: WsPackageProcessSoapBinding, process: myesp) succeeded"\\n0000005B 2016-08-22 14:31:33.312 2280 2280 "Load binding ws_machine_smc_myesp (type: ws_machineSoapBinding, process: myesp) succeeded"\\n0000005C 2016-08-22 14:31:33.313 2280 2280 "Load binding ws_account_smc_myesp (type: ws_accountSoapBinding, process: myesp) succeeded"\\n0000005D 2016-08-22 14:31:33.314 2280 2280 "Load binding ws_access_smc_myesp (type: ws_accessSoapBinding, process: myesp) succeeded"\\n0000005E 2016-08-22 14:31:33.314 2280 2280 "Load binding ws_config_smc_myesp (type: ws_configSoapBinding, process: myesp) succeeded"\\n0000005F 2016-08-22 14:31:33.315 2280 2280 "Load binding ws_ecl_ws_ecl_myesp (type: ws_eclSoapBinding, process: myesp) succeeded"\\n00000060 2016-08-22 14:31:33.333 2280 2280 "binding WsSMC_smc_myesp, on 0.0.0.0:8010"\\n00000061 2016-08-22 14:31:33.333 2280 2280 " created server socket(14)"\\n00000062 2016-08-22 14:31:33.335 2280 2280 " Socket(14) listening."\\n00000063 2016-08-22 14:31:33.340 2280 2280 "binding WsWorkunits_smc_myesp, on 0.0.0.0:8010"\\n00000064 2016-08-22 14:31:33.340 2280 2280 "binding WsTopology_smc_myesp, on 0.0.0.0:8010"\\n00000065 2016-08-22 14:31:33.340 2280 2280 "binding WsDfu_smc_myesp, on 0.0.0.0:8010"\\n00000066 2016-08-22 14:31:33.340 2280 2280 "binding WsDfuXRef_smc_myesp, on 0.0.0.0:8010"\\n00000067 2016-08-22 14:31:33.340 2280 2280 "binding ecldirect_smc_myesp, on 0.0.0.0:8010"\\n00000068 2016-08-22 14:31:33.340 2280 2280 "binding FileSpray_Serv_smc_myesp, on 0.0.0.0:8010"\\n00000069 2016-08-22 14:31:33.340 2280 2280 "binding WsFileIO_smc_myesp, on 0.0.0.0:8010"\\n0000006A 2016-08-22 14:31:33.340 2280 2280 "binding WsPackageProcess_smc_myesp, on 0.0.0.0:8010"\\n0000006B 2016-08-22 14:31:33.340 2280 2280 "binding ws_machine_smc_myesp, on 0.0.0.0:8010"\\n0000006C 2016-08-22 14:31:33.340 2280 2280 "binding ws_account_smc_myesp, on 0.0.0.0:8010"\\n0000006D 2016-08-22 14:31:33.340 2280 2280 "binding ws_access_smc_myesp, on 0.0.0.0:8010"\\n0000006E 2016-08-22 14:31:33.340 2280 2280 "binding ws_config_smc_myesp, on 0.0.0.0:8010"\\n0000006F 2016-08-22 14:31:33.340 2280 2280 "binding ws_ecl_ws_ecl_myesp, on 0.0.0.0:8002"\\n00000070 2016-08-22 14:31:33.340 2280 2280 " created server socket(15)"\\n00000071 2016-08-22 14:31:33.340 2280 2280 " Socket(15) listening."\\n00000072 2016-08-22 14:31:33.340 2280 2280 "Creating sentinel file esp.sentinel for rerun from script"\\n00000073 2016-08-22 14:31:33.340 2280 2280 "ESP server started."\\n00000074 2016-08-22 14:31:43.992 2280 3041 "HTTP First Line: POST /WsSMC/Activity.json HTTP/1.1"\\n00000075 2016-08-22 14:31:43.992 2280 3041 "POST /WsSMC/Activity.json, from unknown@192.168.20.72"\\n00000076 2016-08-22 14:31:44.004 2280 3041 "CWsSMCEx::getActivityInfo - rebuild cached information"\\n00000077 2016-08-22 14:31:44.024 2280 3041 "Time taken for createActivityInfo: 46570230 cycles (46M) =\\n\\nlet me know if you need anymore information\\n\\nThanks again\\n\\nBest Regards\\n\\nAntony\", \"post_time\": \"2016-08-30 14:21:33\" },\n\t{ \"post_id\": 10893, \"topic_id\": 2593, \"forum_id\": 16, \"post_subject\": \"Re: Installing Ganglia Ubuntu 14.04\", \"username\": \"Gleb Aronsky\", \"post_text\": \"Hi Antony,\\n\\nI am not sure if I totally understand your exact configuration. In a typical small environment you should have an external monitoring node located outside the cluster you intend to monitor. The monitored cluster would have gmond running on each of the nodes. The external monitoring node would have the gmetad daemon, EclWatch. and/or the Apache Ganglia web interface running to view the graphs. \\n\\nEclWatch looks locally for the rrds files, therefore an instance of gmetad or a remote mount of the rrds files is needed. An ECLWatch instance running in the cluster being monitored would need local access to the rrd files to view the graphs. Though if the ESP goes down in the cluster you wouldn't be able to access the graphs, so the external monitoring node is needed.\\n\\nCan you provide ESP logs for the case where it fails to start with the ganglia monitoring plugin installed?\\n\\n-Gleb\", \"post_time\": \"2016-08-23 14:08:20\" },\n\t{ \"post_id\": 10843, \"topic_id\": 2593, \"forum_id\": 16, \"post_subject\": \"Re: Installing Ganglia Ubuntu 14.04\", \"username\": \"amillar\", \"post_text\": \"Hi Gleb,\\n\\nThanks for the link for the HPCC Monitoring agent
\\n\\nThis morning I have re-configured Ganglia to use the name "TEST_HPCC" and purged the rrds folder. This seems to have solved the problem with the custom graphs so thanks for that.\\n\\nI am still having problems capturing information from one of our Live Roxies though. It is on a different subnet from my test cluster, but both can communicate ok on a network level, however when I got to 8010 on my Roxie, and click the plugin icon, no graphs display and I get around 15 errors which all seem to relate to opening files /var/lib/ganglia/rrds/__sumaryinfo__/ - my thought here is that ECL watch is trying to open these locally, rather than from the Ganglia master which has the rrds files. Do you know where the IP / Host information is configured for this? Should it not get this configuration from the gmond.conf file?\\n\\nI thought the different subnet may be an issue so I set-up a new Ganglia monitor on another test machine on the same subnet, but I can’t get ESP to start when the monitor is installed, when I remove it, it starts ok. I can confirm that the HPCC agent and the HPCC platform version are 5.2.0.1.\\n\\nI did have a good look around the internet, and Ganglia mention having the hostname configured in /etc/hosts can cause issues, so I have now entered this value on all the hosts configured for Ganglia "127.0.0.1 *sysname* localhost.localdomain localhost" I gave them all a restart but I am still getting the error opening graphs on the live roxie, however I can see metrics coming in on http://IPADDRESS/Ganglia. So I think communication between all the nodes is ok. Just the ECL watch plugin I am having problems with.\\n\\nAny help would be greatly appreciated.\\n\\nBest Regards\\n\\nAntony\", \"post_time\": \"2016-08-22 13:53:28\" },\n\t{ \"post_id\": 10673, \"topic_id\": 2593, \"forum_id\": 16, \"post_subject\": \"Re: Installing Ganglia Ubuntu 14.04\", \"username\": \"Gleb Aronsky\", \"post_text\": \"To find hpcc-ganglia-monitoring 5.4.2-1 on the download page please select Previous for the version type. Here is the direct link: http://wpc.423a.rhocdn.net/00423A/relea ... _amd64.deb\\n\\nIt looks like the issue has to do with the space in the name of your cluster. Please change the cluster name from "TEST HPCC" to something like "TEST_HPCC". That should fix your issue with the missing graphs.\\n\\nAs far as the graph generation on the command line goes, the first part "RRD TOOL GRAPH CMD -->" should not be include as part of the command line. The command should start with /usr/bin/rrdtool. Sorry, I wasn't clear enough previously. However, I think that once you change your cluster name, the issue will be resolved.\\n\\nIt would also be a good idea to delete all the existing .rrds file before restarting ganglia with the new cluster name.\", \"post_time\": \"2016-08-17 21:07:32\" },\n\t{ \"post_id\": 10663, \"topic_id\": 2593, \"forum_id\": 16, \"post_subject\": \"Re: Installing Ganglia Ubuntu 14.04\", \"username\": \"amillar\", \"post_text\": \"Hi Gleb,\\n\\nThanks for your continued help on this.\\n\\nI have just had a look for the 5.4.2 package on https://hpccsystems.com/download/Monitoring but I can’t seem to find it, do you have a location I can download it from?\\n\\nGreat news on the RRD files, I will keep an eye on this as we do have limited space on our cluster.\\n\\nTo get things moving forward, I have just added the GMOND and the HPCC Ganglia monitor to one of our live roxies. This is running on platform 5.2.0-1 so I installed that version of the HPCC monitor also.\\n\\nCurrently this live Roxie is logging directly to my test cluster, which is on a 5.4.8-1 – I imagine that won’t make any difference though as the Ganglia versions are the same, please let me know if that is incorrect.\\n\\nHowever currently no graphs are showing on this one, I have attached a screen shot of the errors, which I still get 30mins or so later.\\n\\nHere is a sample of ESP log from my test cluster you ask for :\\n\\n000B44 2016-08-17 13:58:41.276 19165 17958 "RRDTOOL GRAPH CMD --> /usr/bin/rrdtool graph /tmp/hpcc_ws_rrd_graphsJs0Sfx/graphhlXVNz -a SVG --start 1471350866 --end 1471354466 DEF:ds11=/var/lib/ganglia/rrds/TEST HPCC/localhost/mem_free.rrd:sum:AVERAGE LINE1:ds11#0000FF:mem_free -w 300 -h 120 -t 'TEST HPCC:localhost:mem_free' <--"\\n00000B45 2016-08-17 13:58:41.281 19165 17958 "================================================"\\n00000B46 2016-08-17 13:58:41.281 19165 17958 "Signal: 11 Segmentation fault"\\n00000B47 2016-08-17 13:58:41.281 19165 17958 "Fault IP: 00007FA465B3BD63"\\n00000B48 2016-08-17 13:58:41.281 19165 17958 "Accessing: 0000000000000004"\\n00000B49 2016-08-17 13:58:41.281 19165 17958 "Registers:"\\n00000B4A 2016-08-17 13:58:41.281 19165 17958 "EAX:0000000000000004 EBX:0000000000000001 ECX:00007FA465B52620 EDX:0000000000000064 ESI:0000000000000022 EDI:0000000000000004"\\n00000B4B 2016-08-17 13:58:41.281 19165 17958 "CS:EIP:0033:00007FA465B3BD63"\\n00000B4C 2016-08-17 13:58:41.281 19165 17958 " ESP:00007FA43B5A1DF8 EBP:00007FA42C0036A0"\\n00000B4D 2016-08-17 13:58:41.281 19165 17958 "Stack[00007FA43B5A1DF8]: 00007FA43BDEBA68 3BDEE06F00007FA4 00007FA43BDEE06F 2C0029E000007FA4 00007FA42C0029E0 0000000000007FA4 0000000000000000 0000000200000000"\\n00000B4E 2016-08-17 13:58:41.281 19165 17958 "Stack[00007FA43B5A1E18]: 0000000000000002 3B5A1EC000000000 00007FA43B5A1EC0 2C0029E000007FA4 00007FA42C0029E0 3B5A1F2000007FA4 00007FA43B5A1F20 0000000000007FA4"\\n00000B4F 2016-08-17 13:58:41.281 19165 17958 "Stack[00007FA43B5A1E38]: 0000000000000000 0000000000000000 0000000000000000 0000000100000000 0000000000000001 0000000100000000 0000000000000001 0000000400000000"\\n00000B50 2016-08-17 13:58:41.281 19165 17958 "Stack[00007FA43B5A1E58]: 0000000000000004 2C00275000000000 00007FA42C002750 2C0036A000007FA4 00007FA42C0036A0 2C00277800007FA4 00007FA42C002778 0000000100007FA4"\\n00000B51 2016-08-17 13:58:41.281 19165 17958 "Stack[00007FA43B5A1E78]: 0000000000000001 0000000100000000 0000000000000001 3B5A1EE000000000 00007FA43B5A1EE0 3B5A294000007FA4 00007FA43B5A2940 2C00293000007FA4"\\n00000B52 2016-08-17 13:58:41.281 19165 17958 "Stack[00007FA43B5A1E98]: 00007FA42C002930 0000000000007FA4 0000000000000000 0000000000000000 0000000000000000 2C00473000000000 00007FA42C004730 0000003600007FA4"\\n00000B53 2016-08-17 13:58:41.281 19165 17958 "Stack[00007FA43B5A1EB8]: 0000080000000036 0000000000000800 0000000000000000 0000000000000000 0000000000000000 2C003F2000000000 00007FA42C003F20 0000010200007FA4"\\n00000B54 2016-08-17 13:58:41.281 19165 17958 "Stack[00007FA43B5A1ED8]: 0000080000000102 0000000000000800 0000000000000000 0000000000000000 0000000000000000 2C0036D000000000 00007FA42C0036D0 0000002900007FA4"\\n00000B55 2016-08-17 13:58:41.281 19165 17958 "Backtrace:"\\n00000B56 2016-08-17 13:58:41.282 19165 17958 " /opt/HPCCSystems/lib/libjlib.so(+0xe2ff8) [0x7fa466898ff8]"\\n00000B57 2016-08-17 13:58:41.282 19165 17958 " /opt/HPCCSystems/lib/libjlib.so(_Z13excsighandleriP9siginfo_tPv+0x22c) [0x7fa46689aa7c]"\\n00000B58 2016-08-17 13:58:41.282 19165 17958 " /lib/x86_64-linux-gnu/libpthread.so.0(+0x10330) [0x7fa465e8a330]"\\n00000B59 2016-08-17 13:58:41.282 19165 17958 " /lib/x86_64-linux-gnu/libc.so.6(+0x86d63) [0x7fa465b3bd63]"\\n00000B5A 2016-08-17 13:58:41.282 19165 17958 " /opt/HPCCSystems/lib/libws_rrd.so(_ZN16CRRDGraphWrapper8getGraphEP12MemoryBufferRK11StringArrayS4_S4_lliiPKcbS6_+0x798) [0x7fa43bdeba68]"\\n00000B5B 2016-08-17 13:58:41.282 19165 17958 " /opt/HPCCSystems/lib/libws_rrd.so(_ZN9Cws_rrdEx13ongetGraphSVGER11IEspContextR23IEspGraphSVGDataRequestR24IEspGraphSVGDataResponse+0xc4) [0x7fa43bdea474]"\\n00000B5C 2016-08-17 13:58:41.282 19165 17958 " /opt/HPCCSystems/lib/libws_rrd.so(_ZN6ws_rrd18Cws_rrdSoapBinding17onGetInstantQueryER11IEspContextP12CHttpRequestP13CHttpResponsePKcS8_+0x6fd) [0x7fa43bddfeed]"\\n00000B5D 2016-08-17 13:58:41.282 19165 17958 " /opt/HPCCSystems/lib/libesphttp.so(_ZN14EspHttpBinding5onGetEP12CHttpRequestP13CHttpResponse+0x1f8) [0x7fa46766f1d8]"\\n00000B5E 2016-08-17 13:58:41.282 19165 17958 " /opt/HPCCSystems/lib/libesphttp.so(_ZN14CEspHttpServer14processRequestEv+0x5e7) [0x7fa467679fd7]"\\n00000B5F 2016-08-17 13:58:41.282 19165 17958 " /opt/HPCCSystems/lib/libesphttp.so(_ZN11CHttpThread9onRequestEv+0x164) [0x7fa467675ab4]"\\n00000B60 2016-08-17 13:58:41.282 19165 17958 " /opt/HPCCSystems/lib/libesphttp.so(_ZN18CEspProtocolThread3runEv+0x31) [0x7fa4676a7fb1]"\\n00000B61 2016-08-17 13:58:41.282 19165 17958 " /opt/HPCCSystems/lib/libjlib.so(_ZN6Thread5beginEv+0x2d) [0x7fa46693e1ad]"\\n00000B62 2016-08-17 13:58:41.282 19165 17958 " /opt/HPCCSystems/lib/libjlib.so(_ZN6Thread11_threadmainEPv+0x1e) [0x7fa46693f97e]"\\n00000B63 2016-08-17 13:58:41.282 19165 17958 " /lib/x86_64-linux-gnu/libpthread.so.0(+0x8184) [0x7fa465e82184]"\\n00000B64 2016-08-17 13:58:41.282 19165 17958 " /lib/x86_64-linux-gnu/libc.so.6(clone+0x6d) [0x7fa465baf37d]"\\n00000B65 2016-08-17 13:58:41.282 19165 17958 "ThreadList:\\n7FA46384F700 140344021022464 19166: CMPNotifyClosedThread\\n7FA46304E700 140344012629760 19167: CSocketBaseThread\\n7FA46284D700 140344004237056 19168: MP Connection Thread\\n7FA46204C700 140343995844352 19170: CMemoryUsageReporter\\n7FA4595B6700 140343850526464 19171: unknown\\n7FA44F780700 140343684630272 19172: unknown\\n7FA44B647700 140343616239360 19173: CDaliPublisherClient\\n7FA4427E7700 140343466948352 19174: unknown\\n7FA4411F5700 140343443937024 19175: unknown\\n7FA43BDA4700 140343355524864 19176: CSocketBaseThread\\n7FA43B5A3700 140343347132160 17958: CEspProtocolThread\\n\\nI also tried running this directly via putty on the host master (hopefully I have done this correctly)\\n\\n@HPCC-T1:~$ RRDTOOL GRAPH CMD --> /usr/bin/rrdtool graph /tmp/hpcc_ws_rrd_graphsJs0Sfx/graphhlXVNz -a SVG --start 1471350866 --end 1471354466 DEF:ds11=/var/lib/ganglia/rrds/TEST HPCC/localhost/mem_free.rrd:sum:AVERAGE\\n-bash: /usr/bin/rrdtool: Permission denied\\n\\nThen again with Sudo :\\n\\n@HPCC-T1:~$ sudo RRDTOOL GRAPH CMD --> /usr/bin/rrdtool graph /tmp/hpcc_ws_rrd_graphsJs0Sfx/graphhlXVNz -a SVG --start 1471350866 --end 1471354466 DEF:ds11=/var/lib/ganglia/rrds/TEST HPCC/localhost/mem_free.rrd:sum:AVERAGE\\n-bash: /usr/bin/rrdtool: Permission denied\\n\\nLet me know if you need anymore information\\n\\nThanks in advance\\n\\nAntony\", \"post_time\": \"2016-08-17 15:05:31\" },\n\t{ \"post_id\": 10653, \"topic_id\": 2593, \"forum_id\": 16, \"post_subject\": \"Re: Installing Ganglia Ubuntu 14.04\", \"username\": \"Gleb Aronsky\", \"post_text\": \"I installed hpccsystems-ganglia-monitoring 5.2.2 on Ubuntu 14.04 with HPCC 5.4.8-1, and I was not able to reproduce your issue with custom monitoring. The fact that you get some graphs and not custom graphs is a bit strange. Btw there is a ganglia-monitoring 5.4.2 package on the portal, though I don’t think that is the issue you are seeing. \\n\\nCan you please provide a portion of the esp log (/var/log/HPCCSystems/myesp/esp.log) that deals with the custom graph call. You can tail the log and you will see entries starting with “RRDTOOL GRAPH CMD -->” for every graph that is generated. If there is an error I would expect to see it after that entry. You can also try copying the command from the log and running it directly on the command line to see if you get an error. In general, if the rrd file is there and the ESP can access the file for reading, then I would expect the command to generate a graph.\\n \\nIn regards to size, the round robin database stores data with progressively less resolution. So data that is 6 months old is less granular than data from the last hour. The RRD file should reach a fixed size and write over itself, so you won’t have to worry about cycling the file out. I am not sure how to map the defined resolution, the number of metrics, and the types of data stored (such as avg and running count) to a fixed file size. You may need to research RR databases further to get a concrete answer, but some experimentation will probably give some insight to the max file size you could expect.\", \"post_time\": \"2016-08-16 21:17:57\" },\n\t{ \"post_id\": 10633, \"topic_id\": 2593, \"forum_id\": 16, \"post_subject\": \"Re: Installing Ganglia Ubuntu 14.04\", \"username\": \"amillar\", \"post_text\": \"Hi Gleb,\\n\\nThanks for the quick reply that’s been a great help for me to understand how all these components work together.\\n\\nThis morning I completely removed and purged all of the ganglia components and folders, one to see if I can safely remove everything ok, and two to start the setup from fresh so I am more familiar with it.\\n\\nI have now re-setup the Ganglia cluster, by just installing Ganglia monitor, RRDtool, Gmeatad, Ganglia Web Frontend on the Ganglia master node, and then just the Ganglia monitor (Gmond) on the other nodes. A quick config of the gmetad.conf and gmond.conf on the master, and the gmond.conf on the other nodes and all seems to be working ok.\\n\\nI then proceeded to install the HPCC monitoring component ( hpccsystems-ganglia-monitoring-5.2.2-1trusty_amd64.deb), on all the nodes and then ran sudo bash start-hpcc.sh restart at the end of the process, and I can see all the nodes and metrics via http://IPADDRESS/Ganglia, the ECL watch plugin as well as the /etc/ganglia/conf.d and /etc/ganglia/.pyconf files
\\n\\nThe only part in the ECL watch that does not seem to be working is the “custom monitoring” I can use the drop down to select the cluster and metrics e.g. free mem over the last hour, but when I hit “generate graph” nothing seems to happen, no errors or anything – is this just a case of waiting for metrics to be generated over the next few hours?\\n\\nAlso this time around I have completely bypassed the install_graphs_helper.sh script, as I think that was complicating my set-up and it I don’t think it’s needed, but do let me know if I am wrong on this.\\n\\nI think I may have jumped the gun a bit, with these errors:\\n\\ncould not find rrd file for /var/lib/ganglia/rrds/__summaryindo__/disk_total.rrd three times.\\n\\nAs after around 10mins (thanks to your post) these errors do go away.
\\n\\nThe next stage for me now is keeping tabs on the RRDS folder, I see that only after a few hours on a test cluster not doing very much the space used is over 1GB.\\n\\nDo you have any steps I can follow to limit the size of this database? E.g. keep the last three months of data? Or possibly a CRON job to delete files older than a certain date (if possible)?\\n\\nI took a look at the gmetad.conf about the RR archives, but am having trouble working out how to set it to limit the database size :\\n\\n#\\n# Round-Robin Archives\\n# You can specify custom Round-Robin archives here (defaults are listed below)\\n#\\n# Old Default RRA: Keep 1 hour of metrics at 15 second resolution. 1 day at 6 minute\\n# RRAs "RRA:AVERAGE:0.5:1:244" "RRA:AVERAGE:0.5:24:244" "RRA:AVERAGE:0.5:168:244" "RRA:AVERAGE:0.5:672:244" \\\\\\n# "RRA:AVERAGE:0.5:5760:374"\\n# New Default RRA\\n# Keep 5856 data points at 15 second resolution assuming 15 second (default) polling. That's 1 day\\n# Two weeks of data points at 1 minute resolution (average)\\n#RRAs "RRA:AVERAGE:0.5:1:5856" "RRA:AVERAGE:0.5:4:20160" "RRA:AVERAGE:0.5:40:52704"\\n\\n\\nThe next stage after this is to tackle Nagios!\\n\\nThanks again for your help.\\n\\nBest Regards\\n\\nAntony\", \"post_time\": \"2016-08-16 14:11:30\" },\n\t{ \"post_id\": 10613, \"topic_id\": 2593, \"forum_id\": 16, \"post_subject\": \"Re: Installing Ganglia Ubuntu 14.04\", \"username\": \"Gleb Aronsky\", \"post_text\": \"It looks like you are fairly close. \\n\\nThe short answer is that the specific version of ganglia you are running on Ubuntu 14.04 doesn’t support the disk_total metric. You can remove it or replace it with other metric(s) that provide the information that you want.\\n\\nIn more detail…\\n\\nOn the node running ECLWatch with ganglia, there is file called ganglia.json located in /opt/HPCCSystems/componentfiles/files/ganglia/. It contains the graph and metric definitions that are used by the ECLWatch plugin. The default install contains a suggested predefined set of metrics, but it can be modified to meet your specific needs. Not all distros and versions of ganglia support all non-HPCC specific metrics. There are a number of 3rd party ganglia plugins available to allow for the monitoring of various metrics and parameters. The hpcc-ganglia-monitoring package includes plugins to monitor Roxie metrics and to surface those metrics (and possible others) in EclWatch.\\n\\nThe file disk_total.rrd is the round robin database file that stores the disk_total metric. The error is correct in that the file can’t be opened by the EclWatch ganglia plugin. Typically this is caused by something as simple as not waiting long enough for the metric to be populated. Allow some time to pass for all the metrics to be populated and updated. It is expected that not all metrics would be visible when initially starting ganglia monitoring.\\n\\nAlternatively, the ganglia plugin responsible for collecting the metric data could be failing or not configured properly. In the case of disk_total, that metric is provided by the ganglia package, and is not an HPCC specific metric. You can examine gmond.conf, and in there you should see the entry for disk_total. In 14.04 it does not appear that disk_total is supported. If you drill down through the configs you will find that the python code is missing for this metric.\\n\\nIn general to debug gmond you may want to telnet to port 8649 (gmond default) to see which metrics are being gathered by the gmond processing running on the node of interest.\\n\\nTo debug the gmetad aggregation service you can telnet to port 8651 (default) on the EclWatch node running gmetad.\\n\\nIf you simple don’t care about a particular metric, you can remove it from the ganglia.json and gmond.conf files.\\n\\nAs far as the install_graphs_helper.sh script, that file is used as part of our internal VM build process. Some of the steps in that script may not be applicable to all users. While others may want use their own customizations. It is intended primarily as a sample or template for users to use or examine.\\n\\nLet me know if you have any other questions.\", \"post_time\": \"2016-08-15 20:13:49\" },\n\t{ \"post_id\": 10563, \"topic_id\": 2593, \"forum_id\": 16, \"post_subject\": \"Installing Ganglia Ubuntu 14.04\", \"username\": \"amillar\", \"post_text\": \"Hi There,\\n\\nI am having problems installing Ganglia on my Test Cluster which is no doubt probably down to me and my limited Ubuntu skills. I have followed the HPCC Monitoring and reporting document from the HPCC website as best I can, and was hoping someone could have a look at my process and give me some help.\\n\\nHere is how I have installed Ganglia on the master node in my cluster :\\n\\nsudo apt-get update && sudo apt-get -y upgrade\\nsudo apt-get install -y ganglia-monitor rrdtool gmetad ganglia-webfrontend\\nsudo cp /etc/ganglia-webfrontend/apache.conf /etc/apache2/sites-enabled/ganglia.conf\\n\\nI then edit this file : sudo vi /etc/ganglia/gmetad.conf adding in my cluster name and the frequency data is collected : \\n\\ndata_source "my cluster" 60 localhost\\n\\nI then edit this file sudo vi /etc/ganglia/gmond.conf comment out the mcast_join and add my local hosts, as per the gmetad.conf file.\\n\\nudp_send_channel {\\n #mcast_join = 239.2.11.71\\n host = localhost\\n port = 8649\\n ttl = 1\\n\\nudp_recv_channel {\\n #mcast_join = 239.2.11.71 ## comment out\\n port = 8649\\n #bind = 239.2.11.71 ## comment out\\n}\\n\\nOnce complete I then restart the services : sudo service ganglia-monitor restart && sudo service gmetad restart && sudo service apache2 restart\\n\\nI can connect to Ganglia no problem once I have completed these steps by going to : http://IPADDRESS/Ganglia\\n\\nThe part I am getting stuck on, is bit of the document :\\n\\nIf you have a Ganglia monitoring server running in your environment, you already have the required components and\\nprerequisites. Verify that you have /etc/ganglia/conf.d and /etc/ganglia/.pyconf files in place and then add the Roxie\\nnodes you wish to monitor. You can do that by installing the Ganglia components and HPCC Monitoring components\\non to each Roxie node.\\nIf you do not have Ganglia, or want to install it, read the Ganglia documentation provided at the above link, and install\\nit and any system dependencies. You will then need to download and install the HPCC Monitoring component.\\n\\nThese two files : /etc/ganglia/conf.d and /etc/ganglia/.pyconf - do not exist on my test box, so have I maybe missed something out?\\n\\nI have downloaded the files from : http://sourceforge.net/apps/trac/gangli ... on_modules\\n\\nbut I am unsure how to proceed correctly from here correctly step by step. \\n\\nOur HPCC version is : community_5.4.8-1 but I can only find a Ganglia monitoring tool for 5.2.0, 5.2.2, 5.6 or 6+ - will any of these work ok?\\n\\nI ended up installing 5.2.2 as it was the closed I could get before 5.6 using these steps :\\n\\n1. sudo dpkg -i hpccsystems-ganglia-monitoring-5.2.2-1trusty_amd64.deb\\n2. sudo apt-get update\\n3. sudo apt-get install -f\\n4. sudo dpkg -i hpccsystems-ganglia-monitoring-5.2.2-1trusty_amd64.deb\\n5. sudo service ganglia-monitor restart && sudo service gmetad restart && sudo service apache2 restart\\n\\nWhen I go back to Ganglia I can see there are now more metrics added for things like roxie - which is great and looks promising.\\n\\nI then ran : install_graphs_helper.sh after modified the following lines :\\n\\n# echo "Alias /ganglia /usr/share/ganglia-webfrontend" >> /etc/apache2/apache2.conf; \\\\\\n\\nas the alias was already specified in : /etc/apache2/sites-enabled/ganglia.conf\\n\\nand commented this part, as I have previously configured gmetad.conf \\n\\n#sed 's/my cluster\\\\" localhost/VM Cluster\\\\" localhost/g' < /etc/ganglia/gmetad.conf > /tmp/gmetad.conf; mv /tmp/gmetad.conf /etc/ganglia/gmetad.conf; \\\\\\n\\nI then restarted the cluster : sudo bash start-hpcc.sh restart.\\n\\nWhen I connect to ECL on port 8010 I can see the plug, but do get the following errors :\\n\\ncould not find rrd file for /var/lib/ganglia/rrds/__summaryindo__/disk_total.rrd three times.\\n\\nIt looks as though I am nearly there but clearly have a few steps I may have gotten wrong, any advice would be greatly appreciated. Once I have this configured correctly I will then move on to adding more nodes to Ganglia.\\n\\nThanks in Advance.\", \"post_time\": \"2016-08-14 10:27:14\" },\n\t{ \"post_id\": 16553, \"topic_id\": 4103, \"forum_id\": 16, \"post_subject\": \"Re: Sasha\", \"username\": \"JimD\", \"post_text\": \"There shouldn't be a need to stop or restart anything. After a WU is archived, it is just a file on disk. \\n\\nSince you can search archived workunits in ECL Watch, you could delete while a search is active, but that is unlikely and should be harmless. \\n\\nThe search is on-demand, so you could do your housekeeping to off hours.\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2017-04-18 12:59:42\" },\n\t{ \"post_id\": 16223, \"topic_id\": 4103, \"forum_id\": 16, \"post_subject\": \"Re: Sasha\", \"username\": \"David Dasher\", \"post_text\": \"Hi Jim\\n\\nI hope you are well.\\n\\nDo we need to stop any components before we ssh into the box and delete all the archived workunit xmls?\\n\\nD\", \"post_time\": \"2017-04-10 12:54:13\" },\n\t{ \"post_id\": 16213, \"topic_id\": 4103, \"forum_id\": 16, \"post_subject\": \"Re: Sasha\", \"username\": \"JimD\", \"post_text\": \"One of Sasha's main functions is archiving workunits. It removes workunits from the System Data Store (SDS) and archives them to disk on the Sasha server. Since the SDS is held in Dali memory, this reduces Dali resourse needs.\\n\\nOnce archived, these workunits are still available and can be found using the filter mechanism in ECL Watch. If needed, archived WUs can be restored from disk back into the main SDS. \\n\\nOnce WUs are archived, it is up to you what to do with the disk files. You can delete if you are certain they are no longer needed or you can move them to another storage location.\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2017-04-10 12:21:54\" },\n\t{ \"post_id\": 16203, \"topic_id\": 4103, \"forum_id\": 16, \"post_subject\": \"Sasha\", \"username\": \"David Dasher\", \"post_text\": \"Hi all\\n\\nMy network team are asking about storage on Sasha. It's currently taking 42gb of storage, is there anyway to clean this up?\\n\\nThanks\\n\\nDavid\", \"post_time\": \"2017-04-10 11:58:21\" },\n\t{ \"post_id\": 22213, \"topic_id\": 5683, \"forum_id\": 16, \"post_subject\": \"Re: Thor Architecture\", \"username\": \"daviddasher\", \"post_text\": \"That's perfect, thanks Richard.\\n\\nDavid\", \"post_time\": \"2018-06-26 07:53:14\" },\n\t{ \"post_id\": 22203, \"topic_id\": 5683, \"forum_id\": 16, \"post_subject\": \"Re: Thor Architecture\", \"username\": \"rtaylor\", \"post_text\": \"David,\\n\\nSince the 6.0 release, package files have had the ability to be split into multiple files (http://cdn.hpccsystems.com/releases/CE-Candidate-7.0.0/docs/EN_US/RoxieReference_EN_US-7.0.0-beta2.pdf#page=30). \\n\\nIf you're using superkeys in your queries, then updating the data for a query just means updating the correct part file for the package for the query that uses that data. \\n\\nIf your Roxie and Thor clusters are all in the same HPCC environment, that should be all you need to do. But if your Roxie is in its own separate environment, you'll need to remote copy the new index files from whichever Thor to the Roxie before you update the package.\\n\\nOr did i misunderstand the issue?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-06-25 15:41:39\" },\n\t{ \"post_id\": 22183, \"topic_id\": 5683, \"forum_id\": 16, \"post_subject\": \"Thor Architecture\", \"username\": \"daviddasher\", \"post_text\": \"Hello\\n\\nWe would like to know if it is possible to \\n\\nDeploy two (or more) separate roxie package files to be active at the same time on a roxie.\\n•\\t with indexes coming from two separate ip addresses \\n•\\t where query names are in both package files so they can access indexes referenced from both packages.\\n\\nWe want separate out our index builds into two separate Thors but then deploy the indexes onto a single roxie. We have upwards of 200 indexes, some of which are relatively small but the build time still adds up. If you have any recommendations of how we can architect this differently. \\n\\nKind regards\\n\\nDavid\", \"post_time\": \"2018-06-25 08:07:28\" },\n\t{ \"post_id\": 26763, \"topic_id\": 7083, \"forum_id\": 16, \"post_subject\": \"Re: Removing Unused Roxie Indexes Issue\", \"username\": \"anthony.fishbeck\", \"post_text\": \"Hi, yes, understood. I was just trying to help determine if:\\n\\n1. This is a timing issue where roxie just hasn't released the locks yet but soon will. \\n--This is just a question about how soon you are calling the delete after changing the packagemap or removing queries.\\n\\n2. If there may be a bug in what we are reporting as unused files. \\n-- If after restarting roxie you still can't delete the files this may be the case.\\n\\n3. If there may be an issue with roxie holding locks.\\n-- If you've waited a long time, but still can't delete, but restarting roxie allows you to delete the files.\\n\\nIn any case detaching roxie should allow you to delete. But if the problem is something like #2 then it could be dangerous. That's the reason I asked the questions.\\n\\nAnother possibility would be that you have other clusters (roxie or otherwise) using the same DALI, accessing the same files.\\n\\nTo help determine that, as another sanity check (if there are multiple clusters on the same DALI), and if you have access to the DALI box you could use the daliadmin command to show the locks on the files.\\n\\n"/opt/HPCCSystems/bin/daliadmin . dalilocks"\\n\\nThen look for one of the locked file paths and check the IP of the server(s) locking the file.\", \"post_time\": \"2019-06-18 14:35:26\" },\n\t{ \"post_id\": 26753, \"topic_id\": 7083, \"forum_id\": 16, \"post_subject\": \"Re: Removing Unused Roxie Indexes Issue\", \"username\": \"amillar\", \"post_text\": \"Hi Anthony,\\n\\nThanks for the quick reply its very much appreciated.\\n\\nWhen we pull the unused file list it also won’t let us remove those files even though they are listed as unused.\", \"post_time\": \"2019-06-17 10:36:47\" },\n\t{ \"post_id\": 26743, \"topic_id\": 7083, \"forum_id\": 16, \"post_subject\": \"Re: Removing Unused Roxie Indexes Issue\", \"username\": \"anthony.fishbeck\", \"post_text\": \"First, just as a sanity check, to make sure that the files are not actually in use, can you verify that the files listed by unused-files can be deleted if you do restart roxie? That would tell us that the locks really aren't needed, and help weed out the possibility that we are dealing with an issue determining which files are actually in use.\\n\\nThere can be a delay before roxie releases the locks are you trying to run the cleanup immediately after updating the packagemap?\\n\\nDetaching roxie should release the locks, but make sure there isn't an issue with which files are being listed before using detach as a way of forcing the ability to delete them. You don't want to reattach roxie just to find out the wrong files were deleted.\\n\\nWhen roxie is detached it can still use files it had already copied and files it's opened remotely, but it won't be able to be updated with new queries or packagemaps until it is reattached. It's basically in a frozen state until it's re-attached to dali.\\n\\nReload should try to re-resolve files and start copying files that it can. If you can verify that after making space reload doesn't seem to cause files to be copied as you would expect please open a JIRA.\", \"post_time\": \"2019-06-14 15:15:00\" },\n\t{ \"post_id\": 26733, \"topic_id\": 7083, \"forum_id\": 16, \"post_subject\": \"Removing Unused Roxie Indexes Issue\", \"username\": \"amillar\", \"post_text\": \"Hi All, \\n\\nI am hoping someone can help with the below.\\n\\nWhen I check my roxie for unused indexes using :\\n\\necl roxie unused-files myroxie\\n\\nand then I take that list and attempt to use \\n\\ndfuplus action=remove \\n\\nto attempt to remove the indexes from the list it returned, it just sits and both won't remove the index AND the nowait appears to be ignored. \\n\\nI think Roxie is locking those indexes even though the package has updated and doesn't reference them any more. \\n\\n1. Is there a way to get Roxie to release them so that we can clean them out properly? \\n\\n2. What does ecl roxie detach and attach do?\\n\\n3. If we detach does it mean all the queries stop working? (can we detach from dali and the queries still know where all the indexes are?)\\n\\nAlso, there are cases where the roxie runs out of space and can't copy index parts to local. Once we clean things up we want to poke the roxie to force it to try and copy the indexes the package references. The only successful way we have managed it is to stop and start the roxie, which takes it out of commission for a while and we would like to avoid this. \\n\\n1. Is there a way to trigger the roxie to try to refresh all the local indexes without deploying a new package or stop and starting roxie?\\n\\n2. What does reload do? (We have tried using that to force roxie to continue to copy index parts from the thor but it doesn't seem to do anything).\\n\\nThanks in advance\", \"post_time\": \"2019-06-14 10:12:33\" },\n\t{ \"post_id\": 26783, \"topic_id\": 7093, \"forum_id\": 16, \"post_subject\": \"Re: Performing Stress Test on Roxie\", \"username\": \"rtaylor\", \"post_text\": \"Artur,\\n\\nThe way I would approach this would be to create your query and the data you intend to place on that Roxie. Before you put that Roxie into production do your stress testing.\\n\\nRoxie is configured with the number of threads each Roxie Server process will run. That number specifies the number of concurrent queries each node is capable of handling. If that number is 30, then each node can handle 30 simultaneous queries. And if it's a 10-node Roxie, that means the Roxie cluster can handle 300 simultaneous queries. And if your average response time for that query is 1/4 of a second (as some of our really complex queries do), then the Roxie could handle 1200 of those queries per second (or if your query only takes 1/10 of a second, that would be 3000/second total -- YMMV). \\n\\nSo to stress that 10-node Roxie, using that 1/4-second query, you would need to send more than 1200 queries per second. That suggests to me using Thor. \\n\\nYou would need to set up a Thor job that uses SOAPCALL to run that Roxie query. A simple PROJECT through a large dataset would do as the test case, calling SOAPCALL once for each record in that test dataset, each record containing "randomized" query parameters to run (so you're not just repeating the same query each time).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-06-19 15:09:44\" },\n\t{ \"post_id\": 26773, \"topic_id\": 7093, \"forum_id\": 16, \"post_subject\": \"Performing Stress Test on Roxie\", \"username\": \"abaruchi\", \"post_text\": \"Hi Guys,\\n\\nI would like to know if some of you have an script or something to perform a stress test in a Roxie Cluster. The servers we just received is to run Roxie Queries only and we would like to check its performance. \\nI was thinking in deploy a Roxie Query with some mocked data and run several queries per second using some script (bash or python or anything else) in order to stress the cluster and try to reach its limit. However, if someone have any other idea or have done this before, I really appreciate any help.\\n\\nThanks in advance!\", \"post_time\": \"2019-06-18 22:48:10\" },\n\t{ \"post_id\": 27313, \"topic_id\": 7263, \"forum_id\": 16, \"post_subject\": \"Re: Question relating to "dafilesrv"\", \"username\": \"mgardner\", \"post_text\": \"Good day Simon, thanks for the question. The issue you're seeing is a functional change between version 6 and 7 of the platform that occurred due to the migration towards systemd as the default init system on many linux distributions.\\n\\nThe 'service' call on 18.04 acts as a wrapper that will parse service files that it sees in initV form and modify them to be called as systemd units. The problem with this is that our old initV files for HPCCSystems-Platform acted as a driver for several different components that were started depending on your configuration files. When automatically converted to a systemd unit, it causes you to lose the granularity of control that our initV scripts allowed for.\\n\\nIf you want the old initV style control, we recommend calling /etc/init.d/hpcc-init or /etc/init.d/dafilesrv directly. I would however recommend using the newer systemd unit and hpccsystems-platform.target files. [color=#FF0040:2l7ulha9]But pick one or the other, because initV and Systemd won't be aware of each others states.\\n\\nTo use the newer systemd service files, do the following:\\n1. modify your environment.xml as necessary for your environment.\\n2. generate a new hpccsystems-platform.target file, which will parse your environment.xml, generate custom systemd.unit files for your named components, and generate a fresh hpccsystems-platform.target file which has those systemd.unit files as dependencies of the target. The script can be found at /opt/HPCCSystems/sbin/generate-hpccsystems-target.sh\\n3. start the platform using
systemctl start hpccsystems-platform.target
\\n\\n\\nA more detailed explanation of how to use the new systemd unit files can be found in our installing and running documentation at https://hpccsystems.com/training/documentation/installation-and-administration\", \"post_time\": \"2019-08-16 14:04:40\" },\n\t{ \"post_id\": 27303, \"topic_id\": 7263, \"forum_id\": 16, \"post_subject\": \"Question relating to "dafilesrv"\", \"username\": \"SimonEdwardsCPL\", \"post_text\": \"Hi\\n\\nWe are currently using Pulseway to monitor our HPCC installations, but since we have upgraded to Ubuntu 18.04.02 and HPCC 7 Gold configured as "Thor" in a cluster of HPe Blade servers I do not seem to be able to get pulseway to monitor dafilesrv, when I run the command "sudo service dafilesrv status" it tells me that the service is inactive (dead), but when i status all the services using "sudo service --status-all" it is showing dafilesrv as an active services. \\n\\nDoes "dafilesrv" run constantly on all the servers in the cluster, or does it only start when there is a workload to deal with?\\n\\nI have managed to get pulseway to monitor the "hpcc-init" service under "SYSTEMD" but not the "dafilesrv"\\n\\nHopefully someone can shed some light on this\\n\\nRegards\\n\\nSimon Edwards\\nCPL Online\", \"post_time\": \"2019-08-15 13:35:05\" },\n\t{ \"post_id\": 28961, \"topic_id\": 7623, \"forum_id\": 16, \"post_subject\": \"Re: THOR Log & Information THOR component keeps stopping\", \"username\": \"amillar\", \"post_text\": \"Hi Fernando,\\n\\nThanks for getting back to me.\\n\\nWe have been having problems over the last 24hrs, so while I was waiting I have upgraded the platform from 7.4.8-1 to 7.6.16-1 to give it a try, I was still experiencing the same problems, THOR starts and then STOPS.\\n\\nI have had a look in /var/lib/HPCCSystems/mythor and there is a file named core - its dated 15th Aug 19 and is 0 bytes - is that to be expected? \\n\\nI have also looked here /var/log/HPCCSystems/mythor - initially the issue seemed to be that the slaves failed to initialise\\n\\n8379 2020_01_03_16_09_59: Starting mythor\\n8379 2020_01_03_16_09_59: removing any previous sentinel file\\n8379 2020_01_03_16_09_59: Ensuring a clean working environment ...\\n8379 2020_01_03_16_09_59: Killing slaves\\n8379 2020_01_03_16_09_59: --------------------------\\n8379 2020_01_03_16_09_59: starting thorslaves ...\\n8379 2020_01_03_16_10_02: thormaster cmd : /var/lib/HPCCSystems/mythor/thormaster_mythor MASTER=192.168.20.35:20000\\n8379 2020_01_03_16_10_02: thormaster_lcr process started pid = 9577\\n8379 2020_01_03_16_10_05: Thormaster (9577) Slaves failed to initialize\\n8379 2020_01_03_16_10_05: Shutting down\\n8379 2020_01_03_16_10_05: Stopping mythor\\n8379 2020_01_03_16_10_05: mythor Stopped\\n8379 2020_01_03_16_10_05: Killing slaves\\n8379 2020_01_03_16_10_07: Frunssh successful\\n8379 2020_01_03_16_10_07: removing init.pid file and slaves file\\n \\nhowever after stopping PID's under HPCC user, and closing open ports on the other nodes I did get the platform to start.\\n\\nSo far everything seems to be stable. \\n\\nThanks for your help.\\n\\nAntony\", \"post_time\": \"2020-01-10 10:47:11\" },\n\t{ \"post_id\": 28833, \"topic_id\": 7623, \"forum_id\": 16, \"post_subject\": \"Re: THOR Log & Information THOR component keeps stopping\", \"username\": \"fernando\", \"post_text\": \"Would you please check for cores in /var/lib/HPCCSystems/<name of your thor>\\n\\nAlso would you post the contents of /var/log/HPCCSystems/<name of your \\n\\nthor>/init_thorXXXX\\n\\nand the thormaster.log of when the thor is going down.\\n\\n\\n\\n\\nthanks\\n\\n-F\", \"post_time\": \"2020-01-08 18:52:36\" },\n\t{ \"post_id\": 28783, \"topic_id\": 7623, \"forum_id\": 16, \"post_subject\": \"Re: THOR Log & Information THOR component keeps stopping\", \"username\": \"rtaylor\", \"post_text\": \"amillar,\\n\\nThis is something you should report in JIRA. That will get it directly to the attention of the developers. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-01-08 16:45:56\" },\n\t{ \"post_id\": 28763, \"topic_id\": 7623, \"forum_id\": 16, \"post_subject\": \"THOR Log & Information THOR component keeps stopping\", \"username\": \"amillar\", \"post_text\": \"Hi There,\\n\\nI am having some problems with one of my Clusters, this is the only one we have running on :\\n\\nUbuntu 18.04\\nHPCC Community 7.4.8-1\\n\\nEvery couple or days the THOR service will stop and I have to run \\n\\nsudo service hpcc-init -c mythor stop / Start\\n\\nSometimes I need to run this command many times for THOR to start and stay started.\\n\\nI am trying to fund out why this might be happening \\n\\nIn ECL watch I am only getting errors like : \\n\\nSource\\tSeverity\\tCode\\tMessage\\tFileName\\tLineNo\\tColumn\\tid\\neclagent\\tError\\t0\\tAbort: 0: Workunit abort request received\\t\\t0\\t0\\t0\\neclagent\\tWarning\\t0\\tAbort takes precedence over error: 0: Query W20200107-135137 cancelled (1) (in item 10)\\t\\t0\\t0\\t1\\neclagent\\tInfo\\t0\\tPERSIST('~XXX::special::XXXidentdedup3') is up to date\\t\\t0\\t0\\t2\\n\\nI am looking for more detailed information to see why.\\n\\nI have had a look in these directories & log files but can’t see anything that helps.\\n\\n/var/log/HPCCSystems/mythor\\n\\n/var/log/HPCCSystem/hpcc-init.log\\n\\n/var/log/HPCCSystems/cluster\\n\\nI have also tried to see whats entered into the Sys log :\\n\\nsudo cat /var/log/syslog |tail\\n\\nCan you help point me in the right direction to get more detailed information?\\n\\nThanks in advance.\", \"post_time\": \"2020-01-08 14:47:59\" },\n\t{ \"post_id\": 29413, \"topic_id\": 7723, \"forum_id\": 16, \"post_subject\": \"Re: HPCC System Performance Monitoring & Nagios\", \"username\": \"rodrigo.pastrana\", \"post_text\": \"Hi Anthony, the ganglia feature is limited to system health and roxie specific metrics. We're currently working on a non-ganglia mechanism for component health metrics reporting, but I can't provide any timeline on that right now.\\n\\nThere are ways to fetch metrics off of our component logs via filebeats -> Elastic stack. Let me know if you're interested going down that road. Thanks.\", \"post_time\": \"2020-02-10 21:22:22\" },\n\t{ \"post_id\": 29373, \"topic_id\": 7723, \"forum_id\": 16, \"post_subject\": \"HPCC System Performance Monitoring & Nagios\", \"username\": \"amillar\", \"post_text\": \"Hi There,\\n\\nI am looing to get more performance out of my HPCC cluster and am trying find out where the performance issues could be coming from CPU, MEM or Disk.\\n\\nOur THOR Cluster set-up is :\\n\\n10 x HP BL460c G7 Blade machines - 2 x 6 Core CPU (Xeon X5650 2.67ghz), 48GB MEM, 480GB SSD (mirrored)\\n\\nHPCC V6.4.2-1 running on Ubuntu 14.04.02\\n1 Machine is the THOR master, ECL server etc.\\n1 Machine is configured as a Spare.\\n\\nOur Environment is set with the defaults for memory 75% and each physical node is running 6 Slaves. (slavesPerNode="6")\\n\\nI am looking for some tools that will help me see where the potential slowdowns could be, our Data Science team have worked through the ECL code to make it as efficient as possible over the last 6months or so.\\n\\nI have been using the ECL watch to look at the Graphs and I notice a lot of “spills” which to me says we are running out of memory, however when I run tools such at HTOP and IOSTAT I don’t see the memory being exhausted, and the CPU load is normally pretty low, with the odd momentary peak at around 50% when Work units are running.\\n\\nI have tried to follow the documentation on the Nagios install reading HPCC Monitoring and Reporting 6.4 but I cannot seem to be able to execute all of the commands.\\n\\nSuch as :\\n\\nGenerate a host groups configuration for Nagios.\\n \\n/opt/HPCCSystem/bin/hpcc-nagios-tools -env \\\\ \\n/etc/HPCCSystems/environment.xml -g -out /etc/nagios3/config.d/hpcc_hostgroups.cfg \\n\\nGenerate a services configuration file. \\n\\n/opt/HPCCSystem/bin/hpcc-nagios-tools -env \\\\ \\n/etc/HPCCSystems/environment.xml -s -out /etc/nagios3/config.d/hpcc_services.cfg \\n\\nGenerate an escalation notifications file. \\n\\n./hpcc-nagios-tools -ec -env /etc/HPCCSystems/environment.xml \\\\ -enable_host_notify -enable_service_notify -set_url localhost/nagios3 \\\\ -disable_check_all_disks –out /etc/nagios3/conf.d/hpcc_notifications.cfg\\n\\n\\nI have since installed Nagios Core onto another server and installed NRPE so I can monitor – CPU, DISK, MEM, SWAP, CPU load on remote hosts, I have been struggling getting all of this to work over the last few days with only the Free Mem and CPU stats still not working.\\n\\nI am really keen to see the Dali, DFU, ECL Agent, CC, Scheduler stats within Nagios.\\n\\nI am trying to create a dashboard of my system so I can see the trends overtime to spot contention points and action them.\\n\\nI was wondering if someone can help with my Nagios set-up or whether I should be looking into another solution?\\n\\nI am happy to share my environment config as there maybe areas that needs tweaking, setting or enabling. I am also happy to try and suopply any tool output if that helps?\\n\\nAny advice or feedback would be greatly appreciated.\\n\\nThanks\\n\\nAntony\", \"post_time\": \"2020-02-07 16:05:42\" },\n\t{ \"post_id\": 30113, \"topic_id\": 7933, \"forum_id\": 16, \"post_subject\": \"Re: Clearing Sasha Archives\", \"username\": \"JimD\", \"post_text\": \"You cannot delete Archived WUs or Archived DFU-WUs from ECLWatch. You will have to use your favorite Linux utility or command line. Keep in mind that most people do not want to delete these. Most enterprises move them to a long term storage location. \\n\\nThe default directory Sasha uses for archives is:\\n/var/lib/HPCCSystems/hpcc-data/sasha/Archive
\\n\\nUnder that directory, are directories for the types of archived items, for example WUs are in:\\n/var/lib/HPCCSystems/hpcc-data/sasha/Archive/WorkUnits/
\\n\\nHTH,\\nJim\", \"post_time\": \"2020-04-09 20:06:35\" },\n\t{ \"post_id\": 30103, \"topic_id\": 7933, \"forum_id\": 16, \"post_subject\": \"Re: Clearing Sasha Archives\", \"username\": \"micevepay\", \"post_text\": \"So to properly clear it, do I just clear the contents of /var/lib/HPCCSystems/mysasha, a command line tool that should be invoked, or is there a way to clear it via the ECL Watch?\", \"post_time\": \"2020-04-09 16:05:42\" },\n\t{ \"post_id\": 30093, \"topic_id\": 7933, \"forum_id\": 16, \"post_subject\": \"Re: Clearing Sasha Archives\", \"username\": \"JimD\", \"post_text\": \"How you archive the files is completely up to you.\\n\\nWhen WUs and DFU-WUs are archived, they are removed from Dali's datastore and written to disk at the location specified in Sasha's configuration (usually on the Sasha server).\\n\\nIf you want to clear them, merely delete the files.\\n\\nIf you want to move them, you can use the Linux mv command. You could, if desired, use tar or zip to encapsulate and compress before moving them. You could write a script to do this and set it up as a cron job to automate the process.\\n\\nWe recommend you do not disable Sasha's archiving. If you do, that can fill up Dali's memory and eventually cause problems.\\n\\nIf still want to disable Sasha's archiving, there are several settings in Sasha's configuration you can set to zero to disable:\\n\\nWUlimit, WUinterval, DFUrecoveryLimit, DFUrecoveryInterval, DFUWUlimit, DFUWUinterval, cachedWUlimit, cachedWUinterval\\n\\nSetting all of those to zero disables Sasha's archiving of WUs, DFU-WUs, cached WUs, and recovered DFU-WUs. Again, we advise against this. \\n\\nHTH,\\n\\nJim\", \"post_time\": \"2020-04-09 15:21:21\" },\n\t{ \"post_id\": 30023, \"topic_id\": 7933, \"forum_id\": 16, \"post_subject\": \"Clearing Sasha Archives\", \"username\": \"micevepay\", \"post_text\": \"Is there a standard approach to clearing all of Sasha's archived data? Also, how do I prevent Sasha from archiving at all? My HPCC usage does not require going back and retrieving anything from Sasha. I'm using 6.4.8-1\", \"post_time\": \"2020-04-08 20:33:28\" },\n\t{ \"post_id\": 168, \"topic_id\": 68, \"forum_id\": 17, \"post_subject\": \"Re: Spraying flat text file\", \"username\": \"mjwalshe\", \"post_text\": \"[quote="bforeman":32bswf67]Hi Maurice,\\n\\nTo spray a variable length flat file, simply use the CSV option, and clear out the seperator and quote values. We actually use this techniques in one of our ECL training classes to spray a flat file that also contains nested child datasets, which make it variable length.\\n\\nRegards,\\n\\nBob Foreman\\nchears ill try that when I get a chance tomorow\", \"post_time\": \"2011-07-18 16:29:43\" },\n\t{ \"post_id\": 167, \"topic_id\": 68, \"forum_id\": 17, \"post_subject\": \"Re: Spraying flat text file\", \"username\": \"bforeman\", \"post_text\": \"Hi Maurice,\\n\\nTo spray a variable length flat file, simply use the CSV option, and clear out the seperator and quote values. We actually use this techniques in one of our ECL training classes to spray a flat file that also contains nested child datasets, which make it variable length.\\n\\nRegards,\\n\\nBob Foreman\", \"post_time\": \"2011-07-18 16:26:37\" },\n\t{ \"post_id\": 166, \"topic_id\": 68, \"forum_id\": 17, \"post_subject\": \"Spraying flat text file\", \"username\": \"mjwalshe\", \"post_text\": \"I am trying to impliment a quick wordcount program in ECl to get started with ECL.\\n\\nI think I understand how I define a record for a text file but I am a little unclear on how do you spray a flat file ecl watch has fixed,CSV and html - but how does one say spray one or more varaible length text files?\\n\\nrgds Maurice\", \"post_time\": \"2011-07-18 16:17:57\" },\n\t{ \"post_id\": 482, \"topic_id\": 137, \"forum_id\": 17, \"post_subject\": \"Re: Viewing progress of submitted workunits.\", \"username\": \"Allan\", \"post_text\": \"Thanks Richard,\\n\\nActually while running 'IMDB.KeysKevinBacon.BuildAll; I noticed results being completed one by one, so could estimate progress that way for that particular build.\\n\\nI suppose it just depends on the particular build/query one is doing.\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-10-18 17:36:08\" },\n\t{ \"post_id\": 481, \"topic_id\": 137, \"forum_id\": 17, \"post_subject\": \"Re: Viewing progress of submitted workunits.\", \"username\": \"richardkchapman\", \"post_text\": \"It's not generally possible for the system to predict the estimated completion time of an ECL job. You can however view the graph as it executes and see how many records have been processed by each activity, and what activities have completed so far, which will give you some idea of the rate of progress (or otherwise) Of your query.\", \"post_time\": \"2011-10-18 15:44:30\" },\n\t{ \"post_id\": 480, \"topic_id\": 137, \"forum_id\": 17, \"post_subject\": \"Viewing progress of submitted workunits.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nWhen spraying I can view progress of the process from ECL Watch (as a % bar) All very nice.\\n\\nHowever when submitting a Workunit from the ECL IDE, though I can access the ECL Watch from the IDE and also the running workunit from the ECLWatch pane itself, I can't find a way of viewing % complete. \\n\\nThe actual instance I have in mind is the construction of '~temp::IMDB::ActorsInMovies' as part of the tutorial 'Six Degrees of Kevin Bacon'. It took 15 minutes on my laptop, but could have been hours as far as I could tell.\\n\\nPerhaps runtime for these workunits cannot be determined, I don't know?\\n\\nYours\\n\\nAllan\", \"post_time\": \"2011-10-18 13:35:33\" },\n\t{ \"post_id\": 723, \"topic_id\": 184, \"forum_id\": 17, \"post_subject\": \"Re: cannot spray the file- ERR: Permission denied (publickey\", \"username\": \"pschwartz\", \"post_text\": \"[quote="jitendrakalyan":1q09m9ts]1. If I am running the system on a single node, why do I require ssh in the first place? \\n\\n2. Is it absolutely necessary that I have HPCC user created on the machine? Can I use some other user account, if I make necessary changes in environment file?\\n\\n\\nSSH is required because of how some of the HPCC communication is preformed.\\n\\nIf you want to use a user other then hpcc, you need to modify 4 things.\\n\\n1. /etc/HPCCSystems/environment.conf (set the user and group correctly)\\n\\n2. Modify the environment.xml with configgen or by hand to change all instances of the hpcc user and group to the same user/group set in the environment.conf.\\n\\n3. Modify /etc/security/limits.conf to have the following settings:\\n<username> hard nofile 8192\\n<username> soft core unlimited\\n<username> hard core unlimited
\\n\\n4. Modify /etc/sudoers to have the following settings or full no password sudo access:\\nCmnd_Alias HPCC_DAFILESRV = /etc/init.d/dafilesrv\\nCmnd_Alias HPCC_HPCCINIT = /etc/init.d/hpcc-init\\n<username> ALL = NOPASSWD: HPCC_DAFILESRV, HPCC_HPCCINIT\\nDefaults:<username> !requiretty\\n
\\n\\nOnce this is done, that user must have ssh keys for passwordless authentication between the nodes.\", \"post_time\": \"2011-12-06 14:21:30\" },\n\t{ \"post_id\": 722, \"topic_id\": 184, \"forum_id\": 17, \"post_subject\": \"Re: cannot spray the file- ERR: Permission denied (publickey\", \"username\": \"pschwartz\", \"post_text\": \"To determine where the users home directory is, please do the following.\\n\\n/usr/sbin/useradd -D
\\n\\nLook for the variable HOME in the return, then cd to the {value of HOME}/hpcc.\\n\\nOnce in this directory run the following command.\\n\\nchmod -R 400 .ssh
\\n\\nAfter this is complete on all nodes, verify the ability to ssh between them as the hpcc user with the following.\\n\\nsudo -u hpcc ssh localhost
\\n\\nIf the ssh worked correctly, your problem should be fixed. If not, the key files on between the nodes do not match causing the ssh commands using the keys for the hpcc user to fail. At this point you would need to create and distribute new keys using the manor you are most comfortable with or by using the command provided by clo and following the on screen directions.\\n\\n-Philip\", \"post_time\": \"2011-12-06 14:14:18\" },\n\t{ \"post_id\": 721, \"topic_id\": 184, \"forum_id\": 17, \"post_subject\": \"Re: cannot spray the file- ERR: Permission denied (publickey\", \"username\": \"jitendrakalyan\", \"post_text\": \"1. If I am running the system on a single node, why do I require ssh in the first place? \\n\\n2. Is it absolutely necessary that I have HPCC user created on the machine? Can I use some other user account, if I make necessary changes in environment file?\", \"post_time\": \"2011-12-06 14:07:54\" },\n\t{ \"post_id\": 720, \"topic_id\": 184, \"forum_id\": 17, \"post_subject\": \"Re: cannot spray the file- ERR: Permission denied (publickey\", \"username\": \"clo\", \"post_text\": \"Hi, It's possible that the ssh keys were not properly distrubuted or generated on your system. Please verify that the user hpcc exists on your system. Typically, the user ssh keys are located somewhere like /Users/hpcc/.ssh \\n\\nIf it's true that your system does not have the proper keys, you can run \\nsudo /opt/HPCCSystems/sbin/keygen.sh
to generate a new key\", \"post_time\": \"2011-12-06 12:15:30\" },\n\t{ \"post_id\": 718, \"topic_id\": 184, \"forum_id\": 17, \"post_subject\": \"Re: cannot spray the file- ERR: Permission denied (publickey\", \"username\": \"jitendrakalyan\", \"post_text\": \"Hi, \\n\\nAttached are the results. Please let me know what the errors mean. Thank you.\\n\\n--\\nJitendra\\n\\nMachine Information\\n \\n \\nroxie \\n\\n\\tLocation\\tComponent\\tDescription\\nx.x.x.27\\n/var/lib/HPCCSystems/myroxie\\tRoxie Server\\n[myroxie]\\tSystem command(s) has been executed. Response: Permission denied, please try again. Permission denied, please try again. Permission denied (publickey,gssapi-with-mic,password). \\nx.x.x.27\\n/var/lib/HPCCSystems/myeclccserver\\tEcl CC Server\\n[myeclccserver]\\tSystem command(s) has been executed. Response: Permission denied, please try again. Permission denied, please try again. Permission denied (publickey,gssapi-with-mic,password). \\nx.x.x.27\\n/var/lib/HPCCSystems/myeclscheduler\\tEcl Scheduler\\n[myeclscheduler]\\tSystem command(s) has been executed. Response: Permission denied, please try again. Permission denied, please try again. Permission denied (publickey,gssapi-with-mic,password). \\n\\n \\n \\nthor \\n\\n\\tLocation\\tComponent\\tDescription\\nx.x.x.27\\n/var/lib/HPCCSystems/mythor\\tThor Master\\n[mythor]\\tSystem command(s) has been executed. Response: Permission denied, please try again. Permission denied, please try again. Permission denied (publickey,gssapi-with-mic,password). \\nx.x.x.27\\n/var/lib/HPCCSystems/mythor\\tThor Slave\\n[mythor]\\tSystem command(s) has been executed. Response: Permission denied, please try again. Permission denied, please try again. Permission denied (publickey,gssapi-with-mic,password). \\nx.x.x.27\\n/var/lib/HPCCSystems/myeclccserver\\tEcl CC Server\\n[myeclccserver]\\tSystem command(s) has been executed. Response: Permission denied, please try again. Permission denied, please try again. Permission denied (publickey,gssapi-with-mic,password). \\nx.x.x.27\\n/var/lib/HPCCSystems/myeclagent\\tAgent Exec\\n[myeclagent]\\tSystem command(s) has been executed. Response: Permission denied, please try again. Permission denied, please try again. Permission denied (publickey,gssapi-with-mic,password). \\nx.x.x.27\\n/var/lib/HPCCSystems/myeclscheduler\\tEcl Scheduler\\n[myeclscheduler]\\tSystem command(s) has been executed. Response: Permission denied, please try again. Permission denied, please try again. Permission denied (publickey,gssapi-with-mic,password). \\n\\n \\n \\nhthor \\n\\n\\tLocation\\tComponent\\tDescription\\nx.x.x.27\\n/var/lib/HPCCSystems/myeclccserver\\tEcl CC Server\\n[myeclccserver]\\tSystem command(s) has been executed. Response: Permission denied, please try again. Permission denied, please try again. Permission denied (publickey,gssapi-with-mic,password). \\nx.x.x.27\\n/var/lib/HPCCSystems/myeclagent\\tAgent Exec\\n[myeclagent]\\tSystem command(s) has been executed. Response: Permission denied, please try again. Permission denied, please try again. Permission denied (publickey,gssapi-with-mic,password). \\nx.x.x.27\\n/var/lib/HPCCSystems/myeclscheduler\\tEcl Scheduler\\n[myeclscheduler]\\tSystem command(s) has been executed. Response: Permission denied, please try again. Permission denied, please try again. Permission denied (publickey,gssapi-with-mic,password). \\n\\nFetched: 12/05/11 21:36:42\\n\\n\\n\\nSystem servers\\n=================\\n\\nMachine Information\\n \\nLocation \\tComponent \\tDescription \\n \\nx.x.x.27\\n/var/lib/HPCCSystems/myesp\\tEsp\\n[myesp]\\tSystem command(s) has been executed. Response: Permission denied, please try again. Permission denied, please try again. Permission denied (publickey,gssapi-with-mic,password). \\n \\nx.x.x.27\\n/var/lib/HPCCSystems/myeclscheduler\\tEcl Scheduler\\n[myeclscheduler]\\tSystem command(s) has been executed. Response: Permission denied, please try again. Permission denied, please try again. Permission denied (publickey,gssapi-with-mic,password). \\n \\nx.x.x.27\\n/var/lib/HPCCSystems/myeclagent\\tAgent Exec\\n[myeclagent]\\tSystem command(s) has been executed. Response: Permission denied, please try again. Permission denied, please try again. Permission denied (publickey,gssapi-with-mic,password). \\n \\nx.x.x.27\\n/var/lib/HPCCSystems/myeclccserver\\tEcl CC Server\\n[myeclccserver]\\tSystem command(s) has been executed. Response: Permission denied, please try again. Permission denied, please try again. Permission denied (publickey,gssapi-with-mic,password). \\n \\nx.x.x.27\\n/var/lib/HPCCSystems/mysasha\\tSasha Server\\n[mysasha]\\tSystem command(s) has been executed. Response: Permission denied, please try again. Permission denied, please try again. Permission denied (publickey,gssapi-with-mic,password). \\n \\nx.x.x.27\\n/var/lib/HPCCSystems/mydfuserver\\tDfu Server\\n[mydfuserver]\\tSystem command(s) has been executed. Response: Permission denied, please try again. Permission denied, please try again. Permission denied (publickey,gssapi-with-mic,password). \\n \\nx.x.x.27\\n/var/lib/HPCCSystems/mydali\\tDali Server\\n[mydali]\\tSystem command(s) has been executed. Response: Permission denied, please try again. Permission denied, please try again. Permission denied (publickey,gssapi-with-mic,password).\", \"post_time\": \"2011-12-05 21:41:44\" },\n\t{ \"post_id\": 717, \"topic_id\": 184, \"forum_id\": 17, \"post_subject\": \"Re: cannot spray the file- ERR: Permission denied (publickey\", \"username\": \"sort\", \"post_text\": \"Can you please log into eclwatch (ESP) on port 8010.\\n\\nCan you verify that all system components are running. Click on Clusters (under Topology) and scroll to the bottom and click submit. Do the same after for System Servers.\", \"post_time\": \"2011-12-05 21:15:11\" },\n\t{ \"post_id\": 715, \"topic_id\": 184, \"forum_id\": 17, \"post_subject\": \"cannot spray the file- ERR: Permission denied (publickey,gss\", \"username\": \"jitendrakalyan\", \"post_text\": \"I uploaded a CSV file. I am unable to spray the file. I am running this on a single node, however the ssh communication is not happening as per the error. See below the complete error message. \\n\\nIf you are wondering if the ports are open, I have made sure they are. I can't think of anything else. Appreciate your guidance. Thank you.\\n\\n\\n\\nError message on ECL watch page:\\n==================================\\n\\nSummaryMessage\\t:\\tFailed: Timeout waiting for slave \\n\\n\\nDFUserver(x.x.x.27) logs:\\n=============\\n\\n\\n00000036 2011-12-05 20:33:37 14568 14576 "WARNING: Still waiting for slave x.x.x.27"\\n00000035 2011-12-05 20:33:37 14568 14576 "WARNING: No response from any slaves in last 840 seconds."\\n00000034 2011-12-05 20:32:37 14568 14576 "WARNING: Still waiting for slave x.x.x.27"\\n00000033 2011-12-05 20:32:37 14568 14576 "WARNING: No response from any slaves in last 780 seconds."\\n00000032 2011-12-05 20:31:37 14568 14576 "WARNING: Still waiting for slave x.x.x.27"\\n00000031 2011-12-05 20:31:37 14568 14576 "WARNING: No response from any slaves in last 720 seconds."\\n00000030 2011-12-05 20:30:37 14568 14576 "WARNING: Still waiting for slave x.x.x.27"\\n0000002F 2011-12-05 20:30:37 14568 14576 "WARNING: No response from any slaves in last 660 seconds."\\n0000002E 2011-12-05 20:29:38 14568 29851 "Try to connect to slave x.x.x.27:6406"\\nTarget: T>x.x.x.27, Raised in: /var/jenkins/workspace/Candidate-3.4.0/HPCC-Platform/system/jlib/jsocket.cpp, line 1347"\\n0000002D 2011-12-05 20:29:38 14568 29851 "-3: Failed to connect to slave (1) (try again): : connection failed\\n0000002C 2011-12-05 20:29:37 14568 14576 "WARNING: Still waiting for slave x.x.x.27"\\n0000002B 2011-12-05 20:29:37 14568 14576 "WARNING: No response from any slaves in last 600 seconds."\\n0000002A 2011-12-05 20:28:37 14568 14576 "WARNING: Still waiting for slave x.x.x.27"\\n00000029 2011-12-05 20:28:37 14568 14576 "WARNING: No response from any slaves in last 540 seconds."\\n00000028 2011-12-05 20:27:37 14568 14576 "WARNING: Still waiting for slave x.x.x.27"\\n00000027 2011-12-05 20:27:37 14568 14576 "WARNING: No response from any slaves in last 480 seconds."\\n00000026 2011-12-05 20:26:37 14568 14576 "WARNING: Still waiting for slave x.x.x.27"\\n00000025 2011-12-05 20:26:37 14568 14576 "WARNING: No response from any slaves in last 420 seconds."\\n00000024 2011-12-05 20:25:37 14568 14576 "WARNING: Still waiting for slave x.x.x.27"\\n00000023 2011-12-05 20:25:37 14568 14576 "WARNING: No response from any slaves in last 360 seconds."\\n00000022 2011-12-05 20:24:38 14568 29851 "Try to connect to slave x.x.x.27:6406"\\nTarget: T>x.x.x.27, Raised in: /var/jenkins/workspace/Candidate-3.4.0/HPCC-Platform/system/jlib/jsocket.cpp, line 1347"\\n00000021 2011-12-05 20:24:37 14568 29851 "-3: Failed to connect to slave (1) (try again): : connection failed\\n00000020 2011-12-05 20:24:37 14568 14576 "WARNING: Still waiting for slave x.x.x.27"\\n0000001F 2011-12-05 20:24:37 14568 14576 "WARNING: No response from any slaves in last 300 seconds."\\n0000001E 2011-12-05 20:23:37 14568 14576 "WARNING: Still waiting for slave x.x.x.27"\\n0000001D 2011-12-05 20:23:37 14568 14576 "WARNING: No response from any slaves in last 240 seconds."\\n0000001C 2011-12-05 20:22:37 14568 14576 "WARNING: Still waiting for slave x.x.x.27"\\n0000001B 2011-12-05 20:22:37 14568 14576 "WARNING: No response from any slaves in last 180 seconds."\\n0000001A 2011-12-05 20:21:37 14568 14576 "WARNING: Still waiting for slave x.x.x.27"\\n00000019 2011-12-05 20:21:37 14568 14576 "WARNING: No response from any slaves in last 120 seconds."\\n00000018 2011-12-05 20:20:37 14568 14576 "WARNING: Still waiting for slave x.x.x.27"\\n00000017 2011-12-05 20:20:37 14568 14576 "WARNING: No response from any slaves in last 60 seconds."\\n00000016 2011-12-05 20:19:37 14568 29851 "Try to connect to slave x.x.x.27:6406"\\n00000015 2011-12-05 20:19:37 14568 29851 "Start connect to correct slave ( 1)"\\nERR: Permission denied (publickey,gssapi-with-mic,password)."\\n00000014 2011-12-05 20:19:37 14568 29851 "ssh result(255):\\n00000013 2011-12-05 20:19:37 14568 29851 "Start generate part x.x.x.27 [0x17573a10]"\\n00000012 2011-12-05 20:19:37 14568 29851 "Transferring part x.x.x.27 [0x17573a10]"\\n"\\n00000011 2011-12-05 20:19:37 14568 14576 "Begin to transfer parts (1 threads)\\n00000010 2011-12-05 20:19:37 14568 14576 "Calculate CRC = 1"\\n0000000F 2011-12-05 20:19:37 14568 14576 "Partition /var/lib/HPCCSystems/mydropzone/hpccdata.txt{0}[0 size 101922523]->/var/lib/HPCCSystems/hpcc-data/thor/travelportdatacsv._1_of_1{0}[0 size 101922523]"\\n0000000E 2011-12-05 20:19:37 14568 14576 "Setting up one2One partition"\\n0000000D 2011-12-05 20:19:37 14568 14576 "Calculate partition information"\\n0000000C 2011-12-05 20:19:37 14568 14576 "Finished gathering file sizes..."\\n0000000B 2011-12-05 20:19:37 14568 14576 "Start gathering file sizes..."\\n0000000A 2011-12-05 20:19:37 14568 14576 "Use pull operation as default"\\n00000009 2011-12-05 20:19:37 14568 14576 "Finished gathering file sizes..."\\n00000008 2011-12-05 20:19:37 14568 14576 "Gathering 1 file sizes on 1 threads"\\n00000007 2011-12-05 20:19:37 14568 14576 "Start gathering file sizes..."\\n00000006 2011-12-05 20:19:37 14568 14576 "Using transfer buffer size 65536"\\n00000005 2011-12-05 20:19:37 14568 14576 "DFS: import(hpccdata.txt,)"\\n00000004 2011-12-05 20:19:37 14568 14576 "DFU Server running job: D20111205-201937"\", \"post_time\": \"2011-12-05 20:36:46\" },\n\t{ \"post_id\": 874, \"topic_id\": 226, \"forum_id\": 17, \"post_subject\": \"Re: Bug in generated WSDL\", \"username\": \"DSC\", \"post_text\": \"I just figured out that the workaround in my case is to provide an explicit name for my output (I'm using Thor, so I can use OUTPUT and the NAMED option).\\n\\nStill, encoding that name is probably a Good Idea.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-01-23 14:14:59\" },\n\t{ \"post_id\": 873, \"topic_id\": 226, \"forum_id\": 17, \"post_subject\": \"Bug in generated WSDL\", \"username\": \"DSC\", \"post_text\": \"After publishing a small work unit I looked at the SOAP WSDL as generated by WsECL (the port 8002 interface). There is an error that results in an unusable WSDL. The XSD for the result is declared as the following in my example:\\n\\n<xsd:import namespace="urn:hpccsystems:ecl:generate_analytics:result:result_1" schemaLocation="../result/Result 1.xsd" />
\\n\\nNote that the schemaLocation value contains a space. That space should be encoded, probably as '%20' for best compatibility. If you leave the space there then tools such as soapUI cannot parse the WSDL.\\n\\nCheers!\\n\\nDan\", \"post_time\": \"2012-01-23 13:48:34\" },\n\t{ \"post_id\": 4584, \"topic_id\": 230, \"forum_id\": 17, \"post_subject\": \"Re: spray latency\", \"username\": \"soyyo\", \"post_text\": \"The type of file being sprayed is a significant factor on the "lag time" before initiating "work".\\n\\nThe fastest sprays will always be fixed length files.\\n\\nVariable length files will always take longer to initiate. \\n\\nThe system calculates the file positions before beginning the spray, to avoid splitting the file a within record when it is distributing the file accross the cluster. It spins through the entire file first before initiating the spray. The larger the variable length the longer the calculation(s) will take.\", \"post_time\": \"2013-09-17 11:37:58\" },\n\t{ \"post_id\": 902, \"topic_id\": 230, \"forum_id\": 17, \"post_subject\": \"Re: spray latency\", \"username\": \"aintnomyth\", \"post_text\": \"That's good info, I'm mostly using ECL watch. I'll use the ECL libraries and report back if it has any latency.\", \"post_time\": \"2012-01-26 15:09:26\" },\n\t{ \"post_id\": 901, \"topic_id\": 230, \"forum_id\": 17, \"post_subject\": \"Re: spray latency\", \"username\": \"bforeman\", \"post_text\": \"How are you spraying? Via the ECL watch, DFUPlus, or through the ECL Libraries?\\nECL watch may give you a delay, but using DFUPlus or ECL code you cn perhaps speed things up a bit.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-01-26 13:25:48\" },\n\t{ \"post_id\": 899, \"topic_id\": 230, \"forum_id\": 17, \"post_subject\": \"spray latency\", \"username\": \"aintnomyth\", \"post_text\": \"I noticed the spray job sometimes takes a minute or two to start doing work. Is this something I can speed up?\\n\\n\\n[edit]\\nBy "doing work" I mean the percent done shows 0% for a little while and then it speeds through the file.\", \"post_time\": \"2012-01-25 20:55:33\" },\n\t{ \"post_id\": 965, \"topic_id\": 238, \"forum_id\": 17, \"post_subject\": \"Re: how to structure roxie queries\", \"username\": \"aintnomyth\", \"post_text\": \"Thanks Richard, I'll check out that section in the programmer's guide.\", \"post_time\": \"2012-02-01 14:05:55\" },\n\t{ \"post_id\": 964, \"topic_id\": 238, \"forum_id\": 17, \"post_subject\": \"Re: how to structure roxie queries\", \"username\": \"rtaylor\", \"post_text\": \"How about a single "GetHospitals" query that takes all the possible parameters that could be passed to any of your existing queries, then calls the appropriate one by determining which parameters it got "this time". Assuming they all return the same data format, you could conceivably have just the one Service that calls any number of functions to fetch the right result set. Take a look at the Programmer's Guide articles in the Working with Roxie section -- especially the article on Query Libraries. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-01-31 20:50:35\" },\n\t{ \"post_id\": 963, \"topic_id\": 238, \"forum_id\": 17, \"post_subject\": \"how to structure roxie queries\", \"username\": \"aintnomyth\", \"post_text\": \"I'm starting to get a lot of Roxie queries. \\n\\nThey are named such that services with similar functionality are grouped together like this:\\n
gethospitalsbyclient\\ngethospitalsbyclientbedsize\\ngethospitalsbyclientbedsizename\\ngethospitalsbybedsize\\n...more
\\n\\nIf I keep going down this path the list of queries will be huge, is this normal or am I overlooking the obvious query-grouping mechanism?\", \"post_time\": \"2012-01-31 19:44:56\" },\n\t{ \"post_id\": 1052, \"topic_id\": 253, \"forum_id\": 17, \"post_subject\": \"Re: Roxie "could not open workunit"\", \"username\": \"bforeman\", \"post_text\": \"Can you recompile and publish it again from the ECL IDE? \\nIs this a query in myws_ecl that is posting this error when you try to open it?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-02-07 20:58:31\" },\n\t{ \"post_id\": 1046, \"topic_id\": 253, \"forum_id\": 17, \"post_subject\": \"Roxie "could not open workunit"\", \"username\": \"aintnomyth\", \"post_text\": \"I started getting the "could not open workunit" exception for Roxie queries I published weeks ago. Is that because I did not click the "Protected" check box on the publish screen?\", \"post_time\": \"2012-02-07 15:34:51\" },\n\t{ \"post_id\": 25613, \"topic_id\": 285, \"forum_id\": 17, \"post_subject\": \"Re: submitting ecl workunits through soap\", \"username\": \"anthony.fishbeck\", \"post_text\": \"Can you describe the functionality you are trying to provide? For example is this a standalone client or part of a query or service? Do you want to wait asynchronously for results? How long will each workunit take to complete?\\n\\nThere are several ways of running jobs via SOAP and what you use depends on the client behavior you are trying to achieve.\", \"post_time\": \"2019-04-10 18:12:33\" },\n\t{ \"post_id\": 25583, \"topic_id\": 285, \"forum_id\": 17, \"post_subject\": \"Re: submitting ecl workunits through soap\", \"username\": \"harshdesai\", \"post_text\": \"Hi,\\nCan you please help with excat steps and how we will be using this soap calls.\", \"post_time\": \"2019-04-10 05:02:40\" },\n\t{ \"post_id\": 1219, \"topic_id\": 285, \"forum_id\": 17, \"post_subject\": \"Re: submitting ecl workunits through soap\", \"username\": \"anthony.fishbeck@lexisnexis.com\", \"post_text\": \"Yes, in fact both the ECL IDE and eclplus make SOAP calls to WsWorkunits in order to create and run workunits.\\n\\nWsWorkunits has a lot of SOAP operations and quite a few parameters on top of those, and it isn't currently documented, but the basic steps would be:\\n\\n\\n
\\n\\nYou can get indiviual wsdls for each operation: [color=#0080BF:2jf8dkks]/WsWorkunits/<operation_name>?wsdl\\n\\nYou can also access a test form and test the operations via browser: [color=#0080BF:2jf8dkks]/WsWorkunits/<operation_name>?form\", \"post_time\": \"2012-02-28 00:47:17\" },\n\t{ \"post_id\": 1215, \"topic_id\": 285, \"forum_id\": 17, \"post_subject\": \"submitting ecl workunits through soap\", \"username\": \"Verticon\", \"post_text\": \"we can retrieve work unit information over soap utilizing the wsdl available from ecl watch (/wsworkunits/?ver_=1.34&wsdl). is there a way to submit an ecl query through soap using a combination of the methods available, essentially simulating the ecl ide submit button and the eclplus asynchronous submission?\", \"post_time\": \"2012-02-27 17:46:41\" },\n\t{ \"post_id\": 1278, \"topic_id\": 296, \"forum_id\": 17, \"post_subject\": \"Re: 'Could not open workunit' message\", \"username\": \"DSC\", \"post_text\": \"That worked perfectly. Well, I can't really vouch for Roxie On Demand working the way it is supposed to, but at least my orphaned workunits are gone.\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-03-08 13:34:33\" },\n\t{ \"post_id\": 1267, \"topic_id\": 296, \"forum_id\": 17, \"post_subject\": \"Re: 'Could not open workunit' message\", \"username\": \"sridhar.meda\", \"post_text\": \"To enable roxie on demand using ConfigMgr, please follow these steps.\\n\\n1. Using ConfigMgr, open the environment you are working with in Advanced mode.\\n\\n2. Select the Roxie Cluster in the navigator and you should see its definition on the right hand side. Click on the Servers tab.\\n\\n3. To verify if roxie on demand is already enabled, check if there are two farms with the same servers (clicking on the arrow icon before the farm name will expand the farm to display its servers), one with port "9876" and the second with port "0". If yes, then roxie on demand is enabled.\\n\\n4. If roxie on demand is not already enabled, follow these steps\\n\\n
\\n
a. Acquire write access by clicking on the "Write Access" check box\\nb. Assuming you are still in the "Servers" tab, right click on the "Roxie Cluster" node and click on the "Add Farm" context menu. A "Select computers" dialog should pop up. Select the same computers that you have in your existing farm and click ok.\\nc. A new farm should be appear in the list of farms. Now select the newly added row in the table and click in the "port" column to edit the value. Change the port to 0 and click outside the edit box or press enter.\\nd. Save the environment and follow the steps to update the runtime (i.e. stop all the components, copy the updated environment over to /etc/HPCCSystems on all the nodes, restart all the components e.t.c)\\n
\", \"post_time\": \"2012-03-07 16:43:40\" },\n\t{ \"post_id\": 1266, \"topic_id\": 296, \"forum_id\": 17, \"post_subject\": \"Re: 'Could not open workunit' message\", \"username\": \"anthony.fishbeck@lexisnexis.com\", \"post_text\": \"Right, the Queue xml you found in the Dali/SDS is the 'roxie on demand' queue I was talking about.\\n\\nI think we should add a "clear" queue mechanism to EclWatch for the roxie queue like we have for Thor (i.e. click on the gear icon of the thor queue). I'll open an issue on GitHub for that.\\n\\nThe idea of running the configmgr wizard again would be to get the cluster set up with the 3.6 defaults instead of whatever the defaults were when the cluster was first set up. The new defaults should include roxie on demand, and turning on roxie on demand should prompt roxie to try and run those workunits. After not finding them it should remove them from the queue.\\n\\nI don't think the 'Roxie on demand' setting does what what one would expect, I'm not sure how its even used anymore.\\n\\nI'll ask someone more familiar with configmgr details to comment.\", \"post_time\": \"2012-03-07 15:55:09\" },\n\t{ \"post_id\": 1265, \"topic_id\": 296, \"forum_id\": 17, \"post_subject\": \"Re: 'Could not open workunit' message\", \"username\": \"DSC\", \"post_text\": \"I wasn't able to clear this issue with configmgr, but then again I didn't want to fool around with it too much. Others are using this cluster and I didn't want to accidentally fubar their backend. I did change the 'Roxie on demand' setting from false to true and restarted everything, without result. I restored the original configuration (and restarted) afterwards.\\n\\nI did some digging and found references to the queues on one of the nodes (filename dalisds181.xml, to give you an idea of where I'm looking). Here's an excerpt:\\n\\n<SDS>\\n\\t<JobQueues>\\n\\t\\t<Queue count="3" name="roxie.roxie" state="active">\\n\\t\\t\\t<Item enqueuedt="2012-01-27T18:24:45" node="10.210.150.81:7108" num="1" port="0" priority="0" session="38654713212" wuid="W20120127-122441" />\\n\\t\\t\\t<Item enqueuedt="2012-02-09T18:25:14" node="10.210.150.81:7138" num="2" port="0" priority="0" session="47244644521" wuid="W20120209-122513" />\\n\\t\\t\\t<Item enqueuedt="2012-02-22T15:30:29" node="10.210.150.81:7108" num="3" port="0" priority="0" session="68719484103" wuid="W20120222-093024" />\\n\\t\\t\\t<Edition>\\n\\t\\t\\t\\t4\\n\\t\\t\\t</Edition>\\n\\t\\t</Queue>\\n\\t</JobQueues>\\n</SDS>
\\n\\nThose items do cite my three missing workunits. The other queues listed in this file are:\\n\\ndfuserver_queue\\ndfuserver_monitor_queue\\nhthor.agent\\nthor.agent\\nroxie.agent\\nhthor.eclserver\\nthor.eclserver\\nroxie.eclserver\\nthor.thor\\nroxie.thor\\nhthor.hthor.eclserver\\nthor.hthor.eclserver\\nroxie.hthor.eclserver\\n
\\n\\nThe workunits are also cited in <GeneratedDll> nodes. The shared libraries do exist on the file systems of the referenced node.\\n\\nWhere can I go from here? If it's back to the configmgr, can you give me a few more pointers on what needs to be changed?\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-03-07 14:42:18\" },\n\t{ \"post_id\": 1261, \"topic_id\": 296, \"forum_id\": 17, \"post_subject\": \"Re: 'Could not open workunit' message\", \"username\": \"anthony.fishbeck@lexisnexis.com\", \"post_text\": \"Most likely the three missing workunits were created by accidentally trying to run the ecl directly rather than compiling, publishing, and using the forms.\\n\\nI think if you run the configmgr wizard again and generate a new config it will include roxie on demand and roxie will clean up the queue.\", \"post_time\": \"2012-03-06 22:21:34\" },\n\t{ \"post_id\": 1260, \"topic_id\": 296, \"forum_id\": 17, \"post_subject\": \"Re: 'Could not open workunit' message\", \"username\": \"DSC\", \"post_text\": \"Well....\\n\\nThere are a total of three 'missing' workunits. From the names, they were created about a week apart from each other. I've been writing Roxie queries and testing them via the form submission page all along with nothing going missing (to my knowledge). That includes several rounds of writing/debugging/testing today.\\n\\nI also have a four-node cluster, configured with the defaults provided at the time (version 3.4, perhaps).\\n\\nDoes any of that information change your recommendation/diagnosis?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-03-06 21:34:19\" },\n\t{ \"post_id\": 1259, \"topic_id\": 296, \"forum_id\": 17, \"post_subject\": \"Re: 'Could not open workunit' message\", \"username\": \"anthony.fishbeck@lexisnexis.com\", \"post_text\": \"Roxie runs in two modes, one mode runs published queries and the other runs workunits on demand via the queue you are seeing on the activities page.\\n\\nMy first guess is that you had submitted those workunits to the roxie queue, but for some reason (most likely related to the series of upgrades) your roxie is not configured to run workunits on demand. \\n\\nIf that's the case, you can update your config to enable roxie on demand it should clean up the queue automatically.\\n\\nThe documentation for configmgr can be found here: http://hpccsystems.com/community/docs/i ... nistration\\n\\nIf you are using a default single node system you should be able to use the configmgr wizard to regenerate an xml file which will support both modes.\\n\\nOtherwise you can use the configmgr advanced view to configure an identical roxie cluster running on port 0.\\n\\nOthers could probably give more detailed descriptions of how that works.\\n\\nAnother possibility would be that the name of the roxie queue has changed and therefore nothing is processing the old queue... but I'd verify the first theory first.\", \"post_time\": \"2012-03-06 21:24:01\" },\n\t{ \"post_id\": 1257, \"topic_id\": 296, \"forum_id\": 17, \"post_subject\": \"Re: 'Could not open workunit' message\", \"username\": \"rtaylor\", \"post_text\": \"Any ideas?
None -- sorry. \", \"post_time\": \"2012-03-06 15:24:30\" },\n\t{ \"post_id\": 1256, \"topic_id\": 296, \"forum_id\": 17, \"post_subject\": \"Re: 'Could not open workunit' message\", \"username\": \"DSC\", \"post_text\": \"Ha. I knew those numbers looked familiar!\\n\\nThe workunits in question are now boring and replaced by far more interesting (read: useful) ones. I'm simply interested in getting rid of the messages at this point. Any ideas?\", \"post_time\": \"2012-03-06 15:00:06\" },\n\t{ \"post_id\": 1255, \"topic_id\": 296, \"forum_id\": 17, \"post_subject\": \"Re: 'Could not open workunit' message\", \"username\": \"rtaylor\", \"post_text\": \"
"Could not open workunit W20120127-122441". \\n...\\nI really don't know how long the messages may have existed. They could have been there for weeks, for all I know.
The workunit ID tells you when it was created (Jan 27, about noon), so you know it had to have been lost since then -- most likely when you did the upgrade. But you are correct in that it may have been your upgrade from 3.2 to 3.4 that caused the issue (if that was done after 1/27).\\n\\nAs to why -- someone else will have to posit the possibilities. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-03-06 14:56:06\" },\n\t{ \"post_id\": 1253, \"topic_id\": 296, \"forum_id\": 17, \"post_subject\": \"Re: 'Could not open workunit' message\", \"username\": \"DSC\", \"post_text\": \"No such luck. I really can't find these workunits. They're not in query sets, not visible via browse or search functionality within ECL Watch, not visible by browsing the workunits in the IDE. I've refrained from grepping through files on the system. I'm sure I'll find them there, but knowing what to do with that information is another problem.\\n\\nDan\", \"post_time\": \"2012-03-06 13:58:14\" },\n\t{ \"post_id\": 1252, \"topic_id\": 296, \"forum_id\": 17, \"post_subject\": \"Re: 'Could not open workunit' message\", \"username\": \"bforeman\", \"post_text\": \"Could it be that you have an outdated Query Set in the Roxie that may need to be deleted?\", \"post_time\": \"2012-03-06 13:54:11\" },\n\t{ \"post_id\": 1249, \"topic_id\": 296, \"forum_id\": 17, \"post_subject\": \"'Could not open workunit' message\", \"username\": \"DSC\", \"post_text\": \"I just upgraded to community_3.6.0-1. In the Roxie section of the Clusters -> Activity display I see three messages along the lines of "Could not open workunit W20120127-122441". I cannot find these workunits anywhere. How can I get rid of the messages?\\n\\nBTW, this portion of the display was not displaying at all in the previous release (broken link symbol in Chrome), so I really don't know how long the messages may have existed. They could have been there for weeks, for all I know.\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2012-03-06 12:53:43\" },\n\t{ \"post_id\": 1524, \"topic_id\": 339, \"forum_id\": 17, \"post_subject\": \"Re: Unable to spray tab-delimited file in 3.6.0 CE\", \"username\": \"rtaylor\", \"post_text\": \"Dan,
This does seem like a bug.
I agree. \\n\\nI will report it.\\n\\nRichard\", \"post_time\": \"2012-04-13 18:18:50\" },\n\t{ \"post_id\": 1521, \"topic_id\": 339, \"forum_id\": 17, \"post_subject\": \"Re: Unable to spray tab-delimited file in 3.6.0 CE\", \"username\": \"DSC\", \"post_text\": \"OK, I understand the workaround. But what about code that relies on the documented behavior for the separator value?\\n\\n
Optional. The field delimiter. If omitted, the default is a comma (',') or the delimiter specified in the spray operation that put the file on disk.
\\n\\nThis does seem like a bug.\\n\\nThanks for the workaround!\\n\\nDan\", \"post_time\": \"2012-04-13 15:12:32\" },\n\t{ \"post_id\": 1520, \"topic_id\": 339, \"forum_id\": 17, \"post_subject\": \"Re: Unable to spray tab-delimited file in 3.6.0 CE\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nI created a 3-record tab-delimited file, then uploaded and sprayed it to my VMware HPCC.\\n\\nThe Logical File Details page for the sprayed file shows the incorrect field separator -- it displays comma where I definitely sprayed with a \\\\t.\\n\\nHowever, the spray operation doesn't care about field delimiters, so the file itself is still good, and all I need to do is explicitly define the \\\\t as the delimiter in my DATASET declaration and it works. Here's my code:\\nr := record\\n integer f1;\\n integer f2;\\n integer f3;\\nend;\\n\\nds := dataset('~TEST::RT::TabDelimit',r,CSV(SEPARATOR('\\\\t')));\\n\\nds;
Which produces this result:1\\t2\\t3\\n4\\t5\\t6\\n7\\t8\\t9\\n
HTH,\\n\\nRichard\", \"post_time\": \"2012-04-13 15:05:50\" },\n\t{ \"post_id\": 1514, \"topic_id\": 339, \"forum_id\": 17, \"post_subject\": \"Unable to spray tab-delimited file in 3.6.0 CE\", \"username\": \"DSC\", \"post_text\": \"Using the ECL Watch web interface I am unable to spray a tab-delimited file into my cluster. The spray happens, but if you look at the details of the sprayed file you'll see that a comma was used as the field separator rather than a tab. The resulting file is, of course, unusable. Spraying does work just fine with ECL standard library commands.\\n\\nIs this a known issue with 3.6.0 CE that will be addressed? Or am I somehow doing something wrong?\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-04-13 13:28:10\" },\n\t{ \"post_id\": 1561, \"topic_id\": 343, \"forum_id\": 17, \"post_subject\": \"Re: Saving Filters, for browsing datasets.\", \"username\": \"bforeman\", \"post_text\": \"Allan, feedback from development:\\n\\n"Internet browsers control how to save the input history. The input history will be saved if using HTML form/submit button. For other button types, the input history will not be saved. Unfortunately, the Search Logical File screen does not use the form/submit button for certain reasons. So, we need to do some research about how to work around the problem."\\n\\nBest regards,\\n\\nBob\", \"post_time\": \"2012-04-24 13:10:31\" },\n\t{ \"post_id\": 1556, \"topic_id\": 343, \"forum_id\": 17, \"post_subject\": \"Re: Saving Filters, for browsing datasets.\", \"username\": \"bforeman\", \"post_text\": \"Thanks for the input Allan, I've passed this message to a member of our development team.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-04-23 17:33:18\" },\n\t{ \"post_id\": 1545, \"topic_id\": 343, \"forum_id\": 17, \"post_subject\": \"Saving Filters, for browsing datasets.\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI notice that the 'Label' field in the spray screens saves previously entered names.\\n\\nIt would be handy if the 'File Name Pattern' field in the 'Search Logical Files' screen did the same. (or some interface that allowed access to saved filters)\\n\\nIts just that I find myself having to retype in the same old filters again and again to examine logical files.\\n\\n(There a lot of logical files in the system, the browsing is out of the question.)\\n\\nYours\\n\\nAllan\", \"post_time\": \"2012-04-21 08:45:58\" },\n\t{ \"post_id\": 1703, \"topic_id\": 375, \"forum_id\": 17, \"post_subject\": \"Re: Spraying XML files - no compress option?\", \"username\": \"bforeman\", \"post_text\": \"Hi Dan,\\n\\nCompress for XML is not supported, but it is available for Fixed and CSV. The command line DFUPLUS /? lists compress as a general option, but it looks to be missing from the documentation as you note. \\n\\nI have notified the development team. Thanks for pointing this out!\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-06-04 15:34:58\" },\n\t{ \"post_id\": 1695, \"topic_id\": 375, \"forum_id\": 17, \"post_subject\": \"Spraying XML files - no compress option?\", \"username\": \"DSC\", \"post_text\": \"When using ECL Watch, there is no 'Compress' option visible. The others (Overwrite, Replicate and No Split) are all present, and 'Compress' is available with the other spray options. Is this a bug or a feature? (If the latter, then the documentation needs to be updated.)\\n\\nI'm using 3.6.2CE.\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-06-01 17:10:46\" },\n\t{ \"post_id\": 1891, \"topic_id\": 413, \"forum_id\": 17, \"post_subject\": \"Re: Cannot upload file\", \"username\": \"michael-mason\", \"post_text\": \"Hi,\\n\\nIt turned out that the problem that we were having had to do with the number of inodes in our system. We reached the maximum limit of the number of inodes in our system, so, even through there was space left, we couldn't create any *new* files.\\n\\nThe daemon process daliadmin created over 1,000,000 files in the /var/log directory. We've only had HPCC running for ~2 months. I deleted the old log files and everything started working.\\n\\nThanks for your help.\\n\\n-Mike\", \"post_time\": \"2012-07-05 14:17:50\" },\n\t{ \"post_id\": 1885, \"topic_id\": 413, \"forum_id\": 17, \"post_subject\": \"Re: Cannot upload file\", \"username\": \"pschwartz\", \"post_text\": \"[quote="michael-mason":2l8drndo]Hi,\\n\\nI believe my issues are related to the fact that the VM is having problems creating files on the system. Even though 'df' reports several gigabytes available, when I do something as simple as 'touch test.txt', I get a 'No space left on device' error. I have an email out to our sysadmin/VM guy, so hopefully he can figure out what is going on. I'm not a VM guy, but I wonder if the host system isn't allowing the VM to grow its disk space usage or something.\\nThank you all for your help. HPCC is a very cool system, and I'm really enjoying working with it. It seems to make very hard problems possible and hard problems easy.\\n\\nThanks!\\n\\n\\nHow much space is being listed as available?\\n\\nPlease paste a `df -h`\", \"post_time\": \"2012-07-02 19:15:36\" },\n\t{ \"post_id\": 1884, \"topic_id\": 413, \"forum_id\": 17, \"post_subject\": \"Re: Cannot upload file\", \"username\": \"michael-mason\", \"post_text\": \"Hi,\\n\\nI believe my issues are related to the fact that the VM is having problems creating files on the system. Even though 'df' reports several gigabytes available, when I do something as simple as 'touch test.txt', I get a 'No space left on device' error. I have an email out to our sysadmin/VM guy, so hopefully he can figure out what is going on. I'm not a VM guy, but I wonder if the host system isn't allowing the VM to grow its disk space usage or something.\\nThank you all for your help. HPCC is a very cool system, and I'm really enjoying working with it. It seems to make very hard problems possible and hard problems easy.\\n\\nThanks!\", \"post_time\": \"2012-07-02 16:54:45\" },\n\t{ \"post_id\": 1881, \"topic_id\": 413, \"forum_id\": 17, \"post_subject\": \"Re: Cannot upload file\", \"username\": \"michael-mason\", \"post_text\": \"Hi everyone,\\n\\nI'm currently trying to gather the information that you requested. Part of the problem is that the machine that I'm working on is located in a different city and the people that set it up are not in my group. So, I'm trying to see what information I can gather while waiting for replies to emails.\\n\\nWe are running the VM version that was downloaded from the website. They started the VM up on a Windows machine (not sure which version).\\n\\nThank you so much for your help!\", \"post_time\": \"2012-07-02 15:08:26\" },\n\t{ \"post_id\": 1877, \"topic_id\": 413, \"forum_id\": 17, \"post_subject\": \"Re: Cannot upload file\", \"username\": \"clo\", \"post_text\": \"Hi,\\n\\nI was wondering which virtual machine program you are using to run the VM. I've typically run it on VMWare.\", \"post_time\": \"2012-06-29 20:03:12\" },\n\t{ \"post_id\": 1875, \"topic_id\": 413, \"forum_id\": 17, \"post_subject\": \"Re: Cannot upload file\", \"username\": \"sort\", \"post_text\": \"Just to be clear. You are running the lastest official HPCC OSS vm build\\nHPCC VM Image Release Date: 05/01/2012 3.6.2.3.CE\\n\\nThis image is meant to be self contained and auto started when the VM starts up.\\n\\nIf you are running one of our binary installation files in your own VM, then we need to trouble shoot differently\", \"post_time\": \"2012-06-29 19:37:13\" },\n\t{ \"post_id\": 1874, \"topic_id\": 413, \"forum_id\": 17, \"post_subject\": \"Re: Cannot upload file\", \"username\": \"michael-mason\", \"post_text\": \"Hello,\\n\\nThanks for your reply. I will reply again with the information you requested, however I wanted to first say that there are other problems going on as well that *may* explain these issues. Inside ECLWatch, I went to:\\n\\nTopology, Systems Servers\\n\\nand hit Update to see the latest status, and I'm getting messages that myeclagent, myeclccserver, mysasha, and mydfuserver aren't running. I'm currently searching for a way to get these started. (Is there some script that launches these, or do we start these daemons by hand?)\\n\\nAlso, I meant to write "I ran df" up above when I was checking for the amount of free disk space. I will return with the information you requested shortly.\\n\\nThanks,\", \"post_time\": \"2012-06-29 19:26:38\" },\n\t{ \"post_id\": 1873, \"topic_id\": 413, \"forum_id\": 17, \"post_subject\": \"Re: Cannot upload file\", \"username\": \"sort\", \"post_text\": \"also what version of Virtual Box are you using (you are having other VM issues that we will be looking into)\", \"post_time\": \"2012-06-29 19:06:46\" },\n\t{ \"post_id\": 1871, \"topic_id\": 413, \"forum_id\": 17, \"post_subject\": \"Re: Cannot upload file\", \"username\": \"anthony.fishbeck@lexisnexis.com\", \"post_text\": \"Can you also post the tail end of your esp log (/var/log/HPCCSystems/myesp/esp.log -- replace "myesp" if the component name is different).\\n\\nAfter both trying to upload, and after submitting a job as they may be related.\", \"post_time\": \"2012-06-29 19:05:02\" },\n\t{ \"post_id\": 1870, \"topic_id\": 413, \"forum_id\": 17, \"post_subject\": \"Re: Cannot upload file\", \"username\": \"kevin.wang@lexisnexis.com\", \"post_text\": \"Could you please let me know the version of your HPCC and your Internet Explorer?\", \"post_time\": \"2012-06-29 18:51:34\" },\n\t{ \"post_id\": 1867, \"topic_id\": 413, \"forum_id\": 17, \"post_subject\": \"Cannot upload file\", \"username\": \"michael-mason\", \"post_text\": \"In ECL Watch, I go to Upload/download file (on the left) and choose browse. I have a small XML file (~200k) that I want to upload, so I hit "browse", choose the file, hit Open, and when I'm back on the ECLWatch page, I choose "Upload Now" and the page reloads, and Internet Explorer cannot connect. It says "\\n\\nInternet Explorer cannot display the webpage \\n\\nAs if I typed an invalid address or something. As far as I can tell, HPCC is running. I can go through the other areas of ECLWatch and look at the different queues, jobs, nodes, etc. \\n\\n**** not sure if this is related, but ...\\n\\nI am having a (possibly) separate issue that I already posted a topic about. I'm getting a strange error in ECL IDE when I Submit a job:\\n\\nWARNING: ESP Exception - CSoapResponseBinding: 2012-06-29 10:56:45 GMT: SDS: Dirty client cache members used\\nSDS Reply Error : CFile::open /var/lib/HPCCSystems/hpcc-data/dali/delta.progress, Disk full (D:\\\\hpccsystems\\\\src\\\\eclide\\\\comms\\\\Dali.cpp, 1281) SoapUtil.h(419)\\n\\nWe're running on a VM, and as far as I can tell, we have space left on the VM. When I do a du, I see that we have several gigabytes available.\\n\\nCan anyone help?\", \"post_time\": \"2012-06-29 15:54:27\" },\n\t{ \"post_id\": 2106, \"topic_id\": 462, \"forum_id\": 17, \"post_subject\": \"Re: Function of View Data File\", \"username\": \"Ankita Singla\", \"post_text\": \"Hi jprichard....\\nThanx for the reply....\\nBut it is giving "No records"...\", \"post_time\": \"2012-07-30 06:22:53\" },\n\t{ \"post_id\": 2105, \"topic_id\": 462, \"forum_id\": 17, \"post_subject\": \"Re: Function of View Data File\", \"username\": \"Ankita Singla\", \"post_text\": \"Hi Bob..\\nThanx for the reply...\\nIt helps me 2 get informatn....\", \"post_time\": \"2012-07-30 06:21:27\" },\n\t{ \"post_id\": 2101, \"topic_id\": 462, \"forum_id\": 17, \"post_subject\": \"Re: Function of View Data File\", \"username\": \"jprichard\", \"post_text\": \"Hi Ankita\\n\\nTry searching for logical files with a mask.\\n\\nIn ECLWatch, Under DFU Files. Pick Search Logical Files,\\n\\nIn the Filename pattern put *originalperson* and click find.\\n\\nThis will look and see if you have any logical files for OriginalPerson.\\n\\nIf there are you can click on the upside down triangle icon and select view data file.\\n\\nLet me know if this helps?\", \"post_time\": \"2012-07-27 16:16:50\" },\n\t{ \"post_id\": 2098, \"topic_id\": 462, \"forum_id\": 17, \"post_subject\": \"Re: Function of View Data File\", \"username\": \"bforeman\", \"post_text\": \"Hi Ankita,\\n\\nFiles that are OUTPUT using ECL code can be viewed using the View Data File option. Some files that are sprayed variable length and XML can also be viewed directly with the View Data File option. I think that the only files that do not have the record description information are the THOR Fixed length files that are sprayed onto the cluster, and View Data File cannot read those files. \\n\\nAt least that's what my testing tells me before I posted this reply.
\\n\\nHope this helps!\\n\\nBob\", \"post_time\": \"2012-07-27 15:55:45\" },\n\t{ \"post_id\": 2096, \"topic_id\": 462, \"forum_id\": 17, \"post_subject\": \"Function of View Data File\", \"username\": \"Ankita Singla\", \"post_text\": \"Hi\\n\\nI am new to ECL....\\nI want to know the need of View Data File tab under DFU Files..\\nAs if i am giving file name or logical file name its giving error as\\n\\nMessage:2012-07-27 17:45:00 GMT: Could not find file OriginalPerson.\\nwhen file name is entered\\n\\nMessage:2012-07-27 17:46:55 GMT: DFS did not contain record description for 'tutorial::ps::neworiginalper'\\nwhen logical filename is entered...\\n\\nplzz help..\", \"post_time\": \"2012-07-27 12:21:52\" },\n\t{ \"post_id\": 2114, \"topic_id\": 464, \"forum_id\": 17, \"post_subject\": \"Re: Need of Remote Copy\", \"username\": \"Ankita Singla\", \"post_text\": \"Thanx for replying...\\n\\nNow i get somevat idea to proceed with Remote Copying...
\", \"post_time\": \"2012-07-31 06:49:55\" },\n\t{ \"post_id\": 2109, \"topic_id\": 464, \"forum_id\": 17, \"post_subject\": \"Re: Need of Remote Copy\", \"username\": \"clo\", \"post_text\": \"Hi, \\n\\nIn the case when you have two separate systems (different dali and completely different set of information), Remote Copy on ECL Watch can be used to copy a logical file from one environment to another. In the case when spraying the logical file or generating the file on another system can be difficult or time consuming, remote copy will enable a user to simply copy the file from another system. \\n\\nI hope that clears it up a bit.\\n\\nChris\", \"post_time\": \"2012-07-30 13:07:52\" },\n\t{ \"post_id\": 2108, \"topic_id\": 464, \"forum_id\": 17, \"post_subject\": \"Need of Remote Copy\", \"username\": \"Ankita Singla\", \"post_text\": \"Hi........\\nCan any one plzz explain the need of Remote Copy in ECL....\\nAlso get me the idea about the different scenarios where ‘Remote Copy’ can come in handy.\\nplzz help\", \"post_time\": \"2012-07-30 07:44:11\" },\n\t{ \"post_id\": 2315, \"topic_id\": 509, \"forum_id\": 17, \"post_subject\": \"Re: Replicate option does not appear in ecl watch in spray \", \"username\": \"bforeman\", \"post_text\": \"I am seeing the replicate option in the Spray CSV ECL watch page. It is a checkbox near the bottom.\\n\\nThe error that you refer to has nothing to do with spraying, in ECL if you have an EXPORT definition you cannot have an action in the same file.\\n\\nIf you temporarily remove the EXPORT scope than the action should work just fine.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-09-10 14:55:40\" },\n\t{ \"post_id\": 2306, \"topic_id\": 509, \"forum_id\": 17, \"post_subject\": \"Replicate option does not appear in ecl watch in spray csv\", \"username\": \"abinaya\", \"post_text\": \"Hi....\\n\\nThe replicate option does not occur in ecl watch when I try to spray csv. Anyone please suggest me a solution to this.Due to this i got this error\\nError: Definition contains actions after the EXPORT has been defined (43, 1), 2325,\", \"post_time\": \"2012-09-07 07:45:49\" },\n\t{ \"post_id\": 2433, \"topic_id\": 532, \"forum_id\": 17, \"post_subject\": \"Re: Superfile with missing subfile\", \"username\": \"bforeman\", \"post_text\": \"Dan,\\n\\nI received a message this morning that this issue has been fixed in the next update. Thanks again for your report.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-09-25 12:05:03\" },\n\t{ \"post_id\": 2431, \"topic_id\": 532, \"forum_id\": 17, \"post_subject\": \"Re: Superfile with missing subfile\", \"username\": \"DSC\", \"post_text\": \"Huh.\\n\\nWhen I had the problem earlier, before finding the workaround with File.DeleteSuperfile(), clicking on the Details option for the superfile generated an error rather than the superfile details. This time, when File.DeleteSuperfile() segfaults, Details works and I can delete everything without a problem.\\n\\nGo figure.\\n\\nI guess the take-away here is to always get you guys involved, burning time and bandwidth, and the solution will just present itself.\\n\\nCheers!\\n\\nDan\", \"post_time\": \"2012-09-24 21:31:26\" },\n\t{ \"post_id\": 2430, \"topic_id\": 532, \"forum_id\": 17, \"post_subject\": \"Re: Superfile with missing subfile\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nYou can maintain superfiles in a GUI interface through ECL Watch, too. So you can open the sub-superfile's Logical File Details page and remove any sub-file references there, too.\\n\\nIt also might help to know that superfiles themselves don't really exist!
\\nThey are simply meta-data in the DFU that are treated as if they were actual logical files.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-09-24 20:56:15\" },\n\t{ \"post_id\": 2429, \"topic_id\": 532, \"forum_id\": 17, \"post_subject\": \"Re: Superfile with missing subfile\", \"username\": \"bforeman\", \"post_text\": \"Hi Dan,\\n\\nI have an issue opened with the development team, and one of us will reply back as soon as we have a solution. Meanwhile simply ignore that file on your cluster, it is simply an entry and is not taking up any resources.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-09-24 19:00:26\" },\n\t{ \"post_id\": 2428, \"topic_id\": 532, \"forum_id\": 17, \"post_subject\": \"Re: Superfile with missing subfile\", \"username\": \"DSC\", \"post_text\": \"Well, now I've run into a superfile that refuses to be deleted, even through code. This particular superfile believes that it contains a sub-superfile that itself contains data. Executing File.DeleteSuperFile() now results in a segfault:\\n\\n
00000005 2012-09-24 13:14:16 6991 6992 AgentExec: Executing 'start_eclagent WUID=W20120924-131415 DALISERVERS=10.210.150.113:7070'\\n00000006 2012-09-24 13:14:16 6991 6992 Execution started\\n00000007 2012-09-24 13:14:16 6991 6992 AgentExec: Waiting on queue(s) 'hthor.agent,thor.agent'\\n00000000 2012-09-24 13:14:16 7580 7580 Logging to /var/log/HPCCSystems/myeclagent/eclagent.2012_09_24.log\\n00000001 2012-09-24 13:14:16 7580 7580 ECLAGENT build community_3.8.2-1\\n00000002 2012-09-24 13:14:16 7580 7580 Waiting for workunit lock\\n00000003 2012-09-24 13:14:16 7580 7580 Obtained workunit lock\\n00000004 2012-09-24 13:14:17 7580 7580 Loading dll (libW20120924-131415.so) from location /var/lib/HPCCSystems/myeclccserver/libW20120924-131415.so\\n00000005 2012-09-24 13:14:17 7580 7580 Starting process\\n00000006 2012-09-24 13:14:17 7580 7580 RoxieMemMgr: Setting memory limit to 314572800 bytes (300 pages)\\n00000007 2012-09-24 13:14:17 7580 7580 RoxieMemMgr: 320 Pages successfully allocated for the pool - memsize=335544320 base=0x2aaaab600000 alignment=1048576 bitmapSize=10\\n00000008 2012-09-24 13:14:17 7580 7580 Waiting for run lock\\n00000009 2012-09-24 13:14:17 7580 7580 Obtained run lock\\n0000000A 2012-09-24 13:14:17 7580 7580 CDistributedSuperFile: SuperFile test_poc::test_data::all_data is missing sub-file file test_poc::test_data::update_data\\n0000000B 2012-09-24 13:14:17 7580 7580 ================================================\\n0000000C 2012-09-24 13:14:17 7580 7580 Signal: 11 Segmentation fault\\n0000000D 2012-09-24 13:14:17 7580 7580 Fault IP: 00002B80756F1A0F\\n0000000E 2012-09-24 13:14:17 7580 7580 Accessing: 0000000000000000\\n0000000F 2012-09-24 13:14:17 7580 7580 Registers:\\n00000010 2012-09-24 13:14:17 7580 7580 EAX:0000000017191A4C EBX:0000000000000000 ECX:0000000017191A40 EDX:0000000000000000 ESI:0000000000000000 EDI:0000000000000000\\n00000011 2012-09-24 13:14:17 7580 7580 CS:EIP:0033:00002B80756F1A0F\\n00000012 2012-09-24 13:14:17 7580 7580 ESP:00007FFF439ACCF0 EBP:000000001719221C\\n00000013 2012-09-24 13:14:17 7580 7580 Stack[00007FFF439ACCF0]: 0000000000001D9C 73FAC9D800000000 00002B8073FAC9D8 0000000100002B80 0000000000000001 7454FAC300000000 00002B807454FAC3 1719142000002B80\\n00000014 2012-09-24 13:14:17 7580 7580 Stack[00007FFF439ACD10]: 0000000017191420 439ACDB000000000 00007FFF439ACDB0 439ACDA000007FFF 00007FFF439ACDA0 1719177800007FFF 0000000017191778 439ACD6000000000\\n00000015 2012-09-24 13:14:17 7580 7580 Stack[00007FFF439ACD30]: 00007FFF439ACD60 0000271000007FFF 01002B8000002710 1719172001002B80 0000000017191720 FFFFFFFF00000000 00000001FFFFFFFF 1719169800000001\\n00000016 2012-09-24 13:14:17 7580 7580 Stack[00007FFF439ACD50]: 0000000017191698 0037D46000000000 000000000037D460 171914A000000000 00000000171914A0 0000001600000000 0000000000000016 0000000000000000\\n00000017 2012-09-24 13:14:17 7580 7580 Stack[00007FFF439ACD70]: 0000000000000000 0000000000000000 0000000000000000 7448000000000000 00002B8074480000 0000000000002B80 0000000000000000 171930B000000000\\n00000018 2012-09-24 13:14:17 7580 7580 Stack[00007FFF439ACD90]: 00000000171930B0 0000007A00000000 000008000000007A 0000000200000800 0000000000000002 1718CD1000000000 000000001718CD10 1719180000000000\\n00000019 2012-09-24 13:14:17 7580 7580 Stack[00007FFF439ACDB0]: 0000000017191800 0000002100000000 0000004000000021 0000000000000040 0000000000000000 0000000000000000 0000000000000000 0000000000000000\\n0000001A 2012-09-24 13:14:17 7580 7580 Stack[00007FFF439ACDD0]: 0000000000000000 0000271000000000 0000000000002710 171910D000000000 00000000171910D0 0000000100000000 0000000000000001 1719168000000000\\n0000001B 2012-09-24 13:14:17 7580 7580 Backtrace:\\n0000001C 2012-09-24 13:14:17 7580 7580 /opt/HPCCSystems/lib/libjlib.so(_Z16PrintStackReportv+0x26) [0x2b80756a6e06]\\n0000001D 2012-09-24 13:14:17 7580 7580 /opt/HPCCSystems/lib/libjlib.so(_Z13excsighandleriP7siginfoPv+0x295) [0x2b80756a7e55]\\n0000001E 2012-09-24 13:14:17 7580 7580 /lib64/libpthread.so.0 [0x3d4bc0eb70]\\n0000001F 2012-09-24 13:14:17 7580 7580 /opt/HPCCSystems/lib/libjlib.so(_Z18createPTreeFromIPTPK13IPropertyTree9ipt_flags+0x1f) [0x2b80756f1a0f]\\n00000020 2012-09-24 13:14:17 7580 7580 /opt/HPCCSystems/lib/libdalibase.so(_ZN21CDistributedSuperFile12loadSubFilesEbP27IDistributedFileTransactionj+0x353) [0x2b807454fac3]\\n00000021 2012-09-24 13:14:17 7580 7580 /opt/HPCCSystems/lib/libdalibase.so(_ZN21CDistributedSuperFileC1EP25CDistributedFileDirectoryP17IRemoteConnectionRK19CDfsLogicalFileNameP15IUserDescriptorbP27IDistributedFileTransactionbj+0xf4) [0x2b807455bc24]\\n00000022 2012-09-24 13:14:17 7580 7580 /opt/HPCCSystems/lib/libdalibase.so(_ZN25CDistributedFileDirectory8dolookupERK19CDfsLogicalFileNameP15IUserDescriptorbP27IDistributedFileTransactionbj+0x165) [0x2b80745423d5]\\n00000023 2012-09-24 13:14:17 7580 7580 /opt/HPCCSystems/lib/libdalibase.so(_ZN25CDistributedFileDirectory15lookupSuperFileEPKcP15IUserDescriptorP27IDistributedFileTransactionbj+0x6e) [0x2b807454286e]\\n00000024 2012-09-24 13:14:17 7580 7580 /opt/HPCCSystems/lib/libdalibase.so(_ZN27CDistributedFileTransaction15lookupSuperFileEPKcbj+0x95) [0x2b8074553f95]\\n00000025 2012-09-24 13:14:17 7580 7580 /opt/HPCCSystems/plugins/libfileservices.so [0x2aaaaae74f52]\\n00000026 2012-09-24 13:14:17 7580 7580 /opt/HPCCSystems/plugins/libfileservices.so(fsDeleteSuperFile+0x65) [0x2aaaaae76545]\\n00000027 2012-09-24 13:14:17 7580 7580 /var/lib/HPCCSystems/myeclccserver/libW20120924-131415.so [0x2aaaab3e8fb6]\\n00000028 2012-09-24 13:14:17 7580 7580 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11performItemEjj+0x54) [0x2b80751a31c4]\\n00000029 2012-09-24 13:14:17 7580 7580 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine13doExecuteItemER20IRuntimeWorkflowItemj+0x3f) [0x2b80751a3b3f]\\n0000002A 2012-09-24 13:14:17 7580 7580 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine11executeItemEjj+0x26a) [0x2b80751a35da]\\n0000002B 2012-09-24 13:14:17 7580 7580 /opt/HPCCSystems/lib/libeclrtl.so(_ZN15WorkflowMachine7performEP18IGlobalCodeContextP11IEclProcess+0x139) [0x2b80751a41b9]\\n0000002C 2012-09-24 13:14:17 7580 7580 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent10runProcessEP11IEclProcess+0x14a) [0x2b8072aa503a]\\n0000002D 2012-09-24 13:14:17 7580 7580 /opt/HPCCSystems/lib/libhthor.so(_ZN8EclAgent9doProcessEv+0x283) [0x2b8072aa6fd3]\\n0000002E 2012-09-24 13:14:17 7580 7580 /opt/HPCCSystems/lib/libhthor.so(_Z13eclagent_mainiPPKcP12StringBufferb+0x6af) [0x2b8072aabf0f]\\n0000002F 2012-09-24 13:14:17 7580 7580 eclagent(main+0x61) [0x4011a1]\\n00000030 2012-09-24 13:14:17 7580 7580 /lib64/libc.so.6(__libc_start_main+0xf4) [0x3d4b01d994]\\n00000031 2012-09-24 13:14:17 7580 7580 eclagent(__gxx_personality_v0+0xe9) [0x401079]\\n00000032 2012-09-24 13:14:17 7580 7580 ThreadList:\\n439D1940 1134369088 7581: CMPNotifyClosedThread\\n459D2940 1167927616 7582: MP Connection Thread\\n499D4940 1235044672 7584: CSocketSelectThread\\n479D3940 1201486144 7585: LogMsgParentReceiver\\n4B9D5940 1268603200 7587: LogMsgFilterReceiver\\n4D9D6940 1302161728 7588: EclAgent Abort Monitor\\n4F9D7940 1335720256 7589: CDaliPublisherClient\\n\\n00000033 2012-09-24 13:14:17 7580 7580 SIG: Segmentation fault(11), accessing 0000000000000000, IP=00002B80756F1A0F
\\n\\ntest_poc::test_data::update_data (the superfile that is the referenced subfile) did exist at one time.\\n\\nIs there any way to clear this thing from my list?\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-09-24 18:18:35\" },\n\t{ \"post_id\": 2427, \"topic_id\": 532, \"forum_id\": 17, \"post_subject\": \"Re: Superfile with missing subfile\", \"username\": \"bforeman\", \"post_text\": \"Hi Dan,\\n\\nYour file system should still be rock solid. I believe the issue is in the Delete function of the ECL Watch. We are currently investigating this issue as you always have been able to Delete a Superfile in the ECL Watch, even if the sub file had been removed, but we just noticed this in the most recent 3.8 version.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-09-24 17:55:29\" },\n\t{ \"post_id\": 2426, \"topic_id\": 532, \"forum_id\": 17, \"post_subject\": \"Re: Superfile with missing subfile\", \"username\": \"DSC\", \"post_text\": \"It turns out that File.DeleteSuperFile() does remove the superfile from the logical file list, as long as I don't pass TRUE for the second parameter ('delete subfiles'). I don't know if this leaves the file system in an inconsistent state, though. Any information along those lines would be appreciated.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-09-24 17:33:55\" },\n\t{ \"post_id\": 2425, \"topic_id\": 532, \"forum_id\": 17, \"post_subject\": \"Superfile with missing subfile\", \"username\": \"DSC\", \"post_text\": \"I'm building a set of superfiles and subfiles to be used in a data update scenario. In working through the ECL I've made several mistakes, and I now have a peculiar situation. I have a superfile that, according to the 'Browse Logical Files' screen contains one subfile but that subfile doesn't actually exist. Furthermore, when trying to either delete the superfile or to even view its details, I get the following error:\\n\\n Reporter: WsDfu::DFUARRAYACTION()\\n\\n-1\\t2012-09-24 15:53:22 GMT: CDistributedSuperFile: SuperFile poc::test_data::data is missing sub-file file poc::test_data::update_data
\\n\\nHow can I get rid of this superfile?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-09-24 15:57:54\" },\n\t{ \"post_id\": 2542, \"topic_id\": 555, \"forum_id\": 17, \"post_subject\": \"Re: Zipped files in landing zone\", \"username\": \"bforeman\", \"post_text\": \"Cool, thanks Gordon, I should have searched! \\n\\nBob\", \"post_time\": \"2012-10-18 13:27:23\" },\n\t{ \"post_id\": 2541, \"topic_id\": 555, \"forum_id\": 17, \"post_subject\": \"Re: Zipped files in landing zone\", \"username\": \"gsmith\", \"post_text\": \"I think someone beat you to it:\\nhttp://track.hpccsystems.com/browse/HPCC-1371\", \"post_time\": \"2012-10-18 13:23:59\" },\n\t{ \"post_id\": 2537, \"topic_id\": 555, \"forum_id\": 17, \"post_subject\": \"Re: Zipped files in landing zone\", \"username\": \"bforeman\", \"post_text\": \"Hi Thierry,\\n\\nPerhaps the best venue for this type of request would be at the Community Issue Tracker:\\n\\nhttp://track.hpccsystems.com/secure/Dashboard.jspa\\n\\nFrom there you can submit a simple feature request and the entire development team will be able to track the request and comment.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-10-18 11:49:47\" },\n\t{ \"post_id\": 2533, \"topic_id\": 555, \"forum_id\": 17, \"post_subject\": \"Zipped files in landing zone\", \"username\": \"tdelbecque\", \"post_text\": \"Hello,\\n\\nit would be so nice to have an "unzip" option in the "Spray CSV" page of ECL watch, so that upload time to the landing zone could be reduced (user does not always have the necessary access to the LZ to manually unzip him/herself)\\n\\nThierry.\", \"post_time\": \"2012-10-18 06:02:13\" },\n\t{ \"post_id\": 2758, \"topic_id\": 597, \"forum_id\": 17, \"post_subject\": \"Re: Remote Copy?\", \"username\": \"rtaylor\", \"post_text\": \"[quote="hzhang":13y9tc64]Thanks, Dan. \\n\\nThe problem might exist in the VM server internet connection. In the VM console, I pinged 192.168.xx.xx (my local VM server), I got reply. However, once I pinged the remote server, it showed "Network not reachable".\\n\\nDo you know how to set up the network connection for the VM HPCC server?\\n\\nThanks.\\n\\nhzhang,\\n\\nThe default setting for the VM is host-only networking. This means the VM cannot connect out to a network address or be accessed by a network address from anything but the system the VM is running on.\\n\\nIf you want to be able to connected out from the VM you will have to stop the VM and change its networking to Bridged or NAT. Once this is done, you will have to verify that the new IP the VM receives can be accessed correctly from outside your system. You might also have to open up local firewall settings on your system and setup NAT rules if you pick NAT.\\n\\nPhilip\", \"post_time\": \"2012-11-09 19:54:15\" },\n\t{ \"post_id\": 2752, \"topic_id\": 597, \"forum_id\": 17, \"post_subject\": \"Re: Remote Copy?\", \"username\": \"DSC\", \"post_text\": \"[quote="hzhang":1injd4gt]Do you know how to set up the network connection for the VM HPCC server?\\n\\nI haven't used the VM, sorry. Maybe Richard or one of the other HPCC folks can shed some light on this one.\\n\\nDan\", \"post_time\": \"2012-11-09 03:07:53\" },\n\t{ \"post_id\": 2748, \"topic_id\": 597, \"forum_id\": 17, \"post_subject\": \"Re: Remote Copy?\", \"username\": \"hzhang\", \"post_text\": \"Thanks, Dan. \\n\\nThe problem might exist in the VM server internet connection. In the VM console, I pinged 192.168.xx.xx (my local VM server), I got reply. However, once I pinged the remote server, it showed "Network not reachable".\\n\\nDo you know how to set up the network connection for the VM HPCC server?\\n\\nThanks.\", \"post_time\": \"2012-11-09 01:01:23\" },\n\t{ \"post_id\": 2747, \"topic_id\": 597, \"forum_id\": 17, \"post_subject\": \"Re: Remote Copy?\", \"username\": \"DSC\", \"post_text\": \"Well, that sounds like the system you're copying from can't find a route to the host. Can you ping that address from the client? Maybe telnet to port 8010 and see if you get an open connection?\\n\\nJust some ideas to try. I'll have to let the HPCC folks chime in on this one, I think. The MP Link failure could be prior to an actual connection, indicating a connection failure, or after connection, indicating a communication failure. I don't know how how to diagnose which except through ping or telnet.\\n\\nDan\", \"post_time\": \"2012-11-09 00:05:47\" },\n\t{ \"post_id\": 2746, \"topic_id\": 597, \"forum_id\": 17, \"post_subject\": \"Re: Remote Copy?\", \"username\": \"hzhang\", \"post_text\": \"Thanks for your reply, Dan. I did try using no split option. The problem still exists: waiting for 192.168.xx.xx (my local VM). Then, 10 minutes later, error message shows up at ECL watch as follows:\\n\\nException(s) occured:\\n\\nReporter: FileSpray::COPY()\\n\\nCode\\tMessage\\n1\\t2012-11-08 23:34:49 GMT: MP connect failed (0.0.0.0:7070)\", \"post_time\": \"2012-11-08 23:36:46\" },\n\t{ \"post_id\": 2745, \"topic_id\": 597, \"forum_id\": 17, \"post_subject\": \"Re: Remote Copy?\", \"username\": \"DSC\", \"post_text\": \"What error message are you seeing? (You may have to dig into the logs for this, within /var/log/HPCCSystems/mydfuserver/).\\n\\nThere is a known, outstanding issue with copying logical files with variable records lengths to a cluster that has a different number of nodes than the original. Setting the "no split" option to true is a workaround for that issue.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-11-08 19:33:34\" },\n\t{ \"post_id\": 2744, \"topic_id\": 597, \"forum_id\": 17, \"post_subject\": \"Remote Copy?\", \"username\": \"hzhang\", \"post_text\": \"Hi, \\n\\nI installed HPCC VM on my local machine. I was trying to remote copy some logic files I have created on a HPCC server. I input the following in the "remote copy" in ECL Watch:\\nSource\\nLogical file: logic file name in the remote HPCC server. xx::xx::xx\\nsource Dali: the ip address of the remote server: 172.xx.xx.xx:8010\\nSource Username: my user name on remote server\\nPwd: my pwd on remote server\\n\\nDestination\\ngroup: mythor\\nlogical name: xx::xx::xx (I used the same as the source logical name)\\n\\nThen, I clicked "submit". It started showing that "waiting for 192.168.xx.xx (the virtual IP address of my local VM). Then, after about 10 minutes, it showed "failed".\\n\\nCan anybody help me on this?\\n\\nThanks.\", \"post_time\": \"2012-11-08 19:06:00\" },\n\t{ \"post_id\": 2860, \"topic_id\": 612, \"forum_id\": 17, \"post_subject\": \"Re: Metrics / info - roxie queries\", \"username\": \"bforeman\", \"post_text\": \"There is a nice section on Metrics in the Using Roxie PDF:\\n\\nhttp://hpccsystems.com/download/docs/roxie-guide\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-11-26 13:38:44\" },\n\t{ \"post_id\": 2807, \"topic_id\": 612, \"forum_id\": 17, \"post_subject\": \"Metrics / info - roxie queries\", \"username\": \"arunarav\", \"post_text\": \"Are there features within HPCC where various metrics on queries served by Roxie are available? Examples:No of queries served in a time interval, time taken within HPCC to service the request etc.\\n\\nthanks\\narun\", \"post_time\": \"2012-11-18 15:43:49\" },\n\t{ \"post_id\": 3021, \"topic_id\": 660, \"forum_id\": 17, \"post_subject\": \"Re: Export of graphs\", \"username\": \"tdelbecque\", \"post_text\": \"Thanks a lot for that.\\n\\nI am not the administrator of the cluster on which I work, so I cannot install this addon myself, but I will look at the javascript in it and at the activeX code also to see what I can do client side.\\n\\nbest regards, Thierry.\", \"post_time\": \"2012-12-18 16:30:00\" },\n\t{ \"post_id\": 3019, \"topic_id\": 660, \"forum_id\": 17, \"post_subject\": \"Re: Export of graphs\", \"username\": \"gsmith\", \"post_text\": \"Short answer: No.\\nLonger answer: Maybe...\\nAs you (probably) know the graph view in ECL Watch is displayed using a plugin/ActiveX (source code available at https://github.com/hpcc-systems/GraphControl).\\n\\nThis control is fed XGMML from the server, performs the layout and then displays the graph. \\n\\nTo see its API have a look at: https://github.com/hpcc-systems/GraphCo ... ntrolAPI.h\\n\\nThere you will see a "GetSVG" method!\\n\\n---the following hack is based on the 3.8.6 release---\\nI have attached a modified version of the ECL Playground. It adds a new button “Get SVG” which will fetch the SVG for the displayed graph and put it in the ECL Editor. To make it work just extract the contents of the zip file into:\\n/opt/HPCCSystems/componentfiles/files\\nThere is no need to restart the server, but you will need to refresh your ECL Watch Page.\\nNext, just open the ECL Playground view for your WU of choice and press “Get SVG”.\\n\\nGordon.\\n\\nPS I have added a feature request for this at https://track.hpccsystems.com/browse/HPCC-8487\", \"post_time\": \"2012-12-18 15:26:01\" },\n\t{ \"post_id\": 3016, \"topic_id\": 660, \"forum_id\": 17, \"post_subject\": \"Export of graphs\", \"username\": \"tdelbecque\", \"post_text\": \"Hello,\\n\\nIs there a way to export the graphs of WU in graphical formats such as SVG ? They are indeed very suitable for illustrating technical documentation of WU code.\\n\\nThanks, Thierry.\", \"post_time\": \"2012-12-17 22:28:14\" },\n\t{ \"post_id\": 3174, \"topic_id\": 701, \"forum_id\": 17, \"post_subject\": \"Re: Workunit Graph\", \"username\": \"bforeman\", \"post_text\": \"Hi JS,\\n\\nThere is a little documentation at the following link:\\n\\nhttp://hpccsystems.com/download/docs/ecl-ide-client-tools\\n\\nRegarding skew, assume that you have a three node cluster with 300 records, distributed evenly on each node. We would expect 100 records per node.\\n\\nThe skew percentage always identifies the maximum and minimum conditions. So in the example cluster above, a skew of +200%, -100% means:\\n\\n"There is one cluster processing 200% over the nominal 100 records: 100 + 200 = 300. Another cluster is operating at -100%, so 100-100 is zero." The net result is that one cluster is doing all of the work, and the other two are idle.\\n\\nSo skews with small percentages (in single digits) means that your distribution of data across the nodes are even and well optimized. Indeed, sometime a skew is expected, especially near the final output where the workunit is spitting out the first 100 records from a single node.\\n\\nYes, graphs are great visual tools, used with the timings they can help to identify "hot spots" in your code where improvement might be needed. The sub-graph I think simply means that the job was split into a smaller process that contributed to the overall result.\\n\\nHope this helps,\\n\\nBob\", \"post_time\": \"2013-01-24 21:57:56\" },\n\t{ \"post_id\": 3171, \"topic_id\": 701, \"forum_id\": 17, \"post_subject\": \"Workunit Graph\", \"username\": \"jacksock\", \"post_text\": \"Is there documentation describing how to use the workunit graph? I am specifically interesting in learning about min skew, max skew etc., and how to use this information to tune my ECL program.\\n\\nIn addition:\\n\\n1. How can I read the graph to understand how the data is distributed and processed? For example if I perform a PROJECT on a 1 million data set that is distributed across 4 nodes, I would like to see how the PROJECT behaved and which nodes participated in the operation and to what extent.\\n\\n2. What is the significance of the sub graph? \\n\\nAny help is appreciated\", \"post_time\": \"2013-01-24 18:55:55\" },\n\t{ \"post_id\": 3646, \"topic_id\": 748, \"forum_id\": 17, \"post_subject\": \"Re: Queued up workunits timing out\", \"username\": \"battleman\", \"post_text\": \"[quote="Sunitha":lm9cl4e8]We are trying to run quite a few workunits on thor, some of them long running (probably run for > hour), so we just queued them up. Unfortunately it appears that there is a 10min timeout and the queued up jobs fail. Is there any way to increase the timeout or disable it?\\n\\nQuick help will be really appreciated!\\n\\nYou can always check out the log in /var/log/HPCCSystems/. That would be helpful.\", \"post_time\": \"2013-03-06 13:58:32\" },\n\t{ \"post_id\": 3405, \"topic_id\": 748, \"forum_id\": 17, \"post_subject\": \"Re: Queued up workunits timing out\", \"username\": \"bforeman\", \"post_text\": \"Each workunit has a Max Runtime setting behind the More button, have you tried to increase the setting there?\", \"post_time\": \"2013-02-10 15:52:25\" },\n\t{ \"post_id\": 3383, \"topic_id\": 748, \"forum_id\": 17, \"post_subject\": \"Queued up workunits timing out\", \"username\": \"Sunitha\", \"post_text\": \"We are trying to run quite a few workunits on thor, some of them long running (probably run for > hour), so we just queued them up. Unfortunately it appears that there is a 10min timeout and the queued up jobs fail. Is there any way to increase the timeout or disable it?\\n\\nQuick help will be really appreciated!\", \"post_time\": \"2013-02-07 17:42:37\" },\n\t{ \"post_id\": 3663, \"topic_id\": 814, \"forum_id\": 17, \"post_subject\": \"Re: spray csv,spray xml succeed ,but can not identify any re\", \"username\": \"bforeman\", \"post_text\": \"What is the error that you are seeing in ECL code?\\n\\nThe job of a spray is to get a file from the landing zone and distribute it to the cluster. Each setting, from the row tag in XML and the separators and terminators in the CSV spray can have a big effect on what you see.\\n\\nThe only reason that you see records in a fixed length spray is a result of a simple math operation - taking the file size and dividing it by the record length that you enter. \\n\\nIn nearly all cases, the RECORD and DATASET statements in your ECL code should match the parameters of your spray. So when you say that the file is not readable, you have just defined it incorrectly.
\\n\\n
What kind of csv file ,xml file ,fixed file is available for HPCC System ?
\\n\\nCSV is any variable length file\\nXML is any file that contains well-formed XML\\nFixed files conform to a fixed length record file, but when nested child datasets are added, a THOR file can also be variable length.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-03-07 21:16:46\" },\n\t{ \"post_id\": 3656, \"topic_id\": 814, \"forum_id\": 17, \"post_subject\": \"spray csv,spray xml succeed ,but can not identify any record\", \"username\": \"battleman\", \"post_text\": \"Hi :\\nI create a csv and a xml file ,after I sprayed them ,in the ECL Watch--> Browse Logical Files,the records count value under the "Records" column is empty .And of cause ,I get errors when I read them by ECL code .I have a question : What kind of csv file ,xml file ,fixed file is available for HPCC System ? What's the standard ? How can I know my file is not readable before spraying?\\nAny guidance ?\\nregards !\", \"post_time\": \"2013-03-07 09:32:02\" },\n\t{ \"post_id\": 4377, \"topic_id\": 977, \"forum_id\": 17, \"post_subject\": \"Re: Roxie "Cluster Stopped"\", \"username\": \"richardkchapman\", \"post_text\": \"I suspect this is related to \\n\\nhttps://track.hpccsystems.com/browse/HPCC-8963\\n\\nWhich hopefully will get fixed in 4.2. Basically the information given about the state of the Roxie cluster can be misleading.\", \"post_time\": \"2013-07-30 15:29:27\" },\n\t{ \"post_id\": 4364, \"topic_id\": 977, \"forum_id\": 17, \"post_subject\": \"Roxie "Cluster Stopped"\", \"username\": \"jwilt\", \"post_text\": \"...when it is running "OK", apparently.\\n\\nIn EclWatch, I'm seeing:\\nRoxieCluster - roxie Cluster stopped\\n<Queue running - Cluster stopped>\\nBut I'm able to publish queries to Roxie, and they run fine.\\n\\nI may have something wrong with my configuration.\\nenterprise_3.10.8-4\\n\\nThanks.\", \"post_time\": \"2013-07-26 01:31:28\" },\n\t{ \"post_id\": 4373, \"topic_id\": 982, \"forum_id\": 17, \"post_subject\": \"Re: Access DropZone via FTP\", \"username\": \"DSC\", \"post_text\": \"There is no FTP host software installed by HPCC, as far as I know.\\n\\nI use SFTP, accessing the system with a username/password created on that system. The default dropzone is located at /var/lib/HPCCSystems/mydropzone, but you can adjust that (and create others) in the configuration manager.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-07-30 13:45:32\" },\n\t{ \"post_id\": 4371, \"topic_id\": 982, \"forum_id\": 17, \"post_subject\": \"Access DropZone via FTP\", \"username\": \"siddharth\", \"post_text\": \"I'm running HPCC systems in a VM. I want to upload files to dropzone (LandingZone) via FTP. I can't connect to IP of dropzone displayed under 'Upload/download Files' in ESP. Where can I find the ftp path to upload to?\", \"post_time\": \"2013-07-30 11:59:56\" },\n\t{ \"post_id\": 4405, \"topic_id\": 985, \"forum_id\": 17, \"post_subject\": \"Re: Resetting Roxie Metrics\", \"username\": \"JimD\", \"post_text\": \"It is on my list, now. Thanks\", \"post_time\": \"2013-07-31 16:23:06\" },\n\t{ \"post_id\": 4404, \"topic_id\": 985, \"forum_id\": 17, \"post_subject\": \"Re: Resetting Roxie Metrics\", \"username\": \"DSC\", \"post_text\": \"Great information!\\n\\nIs this documented anywhere? If not, can we get it added to the "to be documented" list?\\n\\nThanks!\\n\\nDan\", \"post_time\": \"2013-07-31 16:22:16\" },\n\t{ \"post_id\": 4403, \"topic_id\": 985, \"forum_id\": 17, \"post_subject\": \"Re: Resetting Roxie Metrics\", \"username\": \"richardkchapman\", \"post_text\": \"You can send control messages to Roxie using testsocket:\\n\\n\\ntestsocket roxieip "<control:resetindexmetrics/>"\\ntestsocket roxieip "<control:resetMetrics/>\\ntestsocket roxieip "<control:resetquerystats/>"\\ntestsocket roxieip "<control:resetquerystats><Query id='queryId'/></control:resetquerystats>"\\n
\\n\\nWe should probably provide access via the eclwatch gui...\", \"post_time\": \"2013-07-31 15:53:11\" },\n\t{ \"post_id\": 4394, \"topic_id\": 985, \"forum_id\": 17, \"post_subject\": \"Re: Resetting Roxie Metrics\", \"username\": \"JimD\", \"post_text\": \"Good question. \\n\\nI am researching this for you and someone reply here. After we have an answer, I will add that info to the Using Roxie Manual.\\n\\nYour question also made me think of another question. I would like to know how to view metrics for time periods previous to any restart. \\n\\nStay tuned...\", \"post_time\": \"2013-07-31 14:01:24\" },\n\t{ \"post_id\": 4382, \"topic_id\": 985, \"forum_id\": 17, \"post_subject\": \"Resetting Roxie Metrics\", \"username\": \"DSC\", \"post_text\": \"In the "Cluster Processes" section of ECL Watch, you can choose to view Roxie Metrics for a Roxie cluster. Is there a way to reset those statistics for testing/evaluation purposes? Restarting the Roxie cluster performs a reset, but I was hoping for something a little less drastic.\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2013-07-31 03:29:45\" },\n\t{ \"post_id\": 4854, \"topic_id\": 1096, \"forum_id\": 17, \"post_subject\": \"Re: Count of records shown in Result Section\", \"username\": \"bforeman\", \"post_text\": \"Hi Rajesh,\\n\\nThe short answer is that "Yes", this is expected behavior, because you requested an indexed recordset result, so the compiler wanted to accommodate you and overwrite the 100 record limit and stream to you all of the records that you requested. A filtered request like the example in Step 1 will use the 100 record limit that was set.\\n\\nAlso, you may notice that any OUTPUT to a logical file (third parameter of OUTPUT) will also stream all of the results of the logical file. \\n\\nHope this helps!\\n\\nBob\", \"post_time\": \"2013-10-30 13:27:15\" },\n\t{ \"post_id\": 4850, \"topic_id\": 1096, \"forum_id\": 17, \"post_subject\": \"Count of records shown in Result Section\", \"username\": \"rajesh.dorairaj\", \"post_text\": \"Hi,\\n\\nBelow is the sequence of lines on my ecl file. On my ECL IDE editor, I have set the Limit/count of returned rows to be shown on ECL watch as 100.\\n1. STEP1: Create a Record set of a DataSet,which contains 40844 rows and OUTPUT\\n FloridaPersons :=(Tutorial.File_OriginalPerson(Tutorial.File_OriginalPerson.state = 'FL'));\\n OUTPUT(FloridaPersons,NAMED('FloridaPersons'));\\n2. STEP2: Create an Indexed recordset as below and OUTPUT that:\\n AllFloridaPersonsExceptFirst:= FloridaPersons[2..] ; //all recs except the first\\n OUTPUT(AllFloridaPersonsExceptFirst,NAMED('AllFloridaPersonsExceptFirst'));\\n\\nOn execution, ECL watch "Result Section" displayed count as 100 rows for STEP1 result, whereas 40843 for STEP2 result.\\n\\nMy expectation was count of rows will be displayed as 100 for both STEP1 and STEP2 because the Limit of rows has been set as 100 on my ECL IDE.\\n\\nIs this difference an expected behavior? i.e., result of Indexed RecordSet/DataSet will display the total count of returned records irrespective of Limit set on ECL IDE. \\n\\nRegards,\\nRajesh\", \"post_time\": \"2013-10-30 09:40:35\" },\n\t{ \"post_id\": 4929, \"topic_id\": 1109, \"forum_id\": 17, \"post_subject\": \"Re: ECL Playground is blank\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"Thanks Gordon! Problem solved. \\n\\nThis time, it did tell me in Dev tools that it is running in compatibility view because 'intranet sites' are asked to (no indicator in the address bar, though!). I unchecked the compatibility view option for intranet and it started displaying fine. Now it works from ECL watch too. \\n\\nRegards,\\nGayathri\", \"post_time\": \"2013-11-13 05:11:15\" },\n\t{ \"post_id\": 4920, \"topic_id\": 1109, \"forum_id\": 17, \"post_subject\": \"Re: ECL Playground is blank\", \"username\": \"gsmith\", \"post_text\": \"The issue you are seeing has been fixed in a later build and has to do with the use of "default" in a qualified object identifier:\\n if (response.TpLogicalClusterQueryResponse.default)...\\n\\nThe reason I am a little confused is that this only affected (I thought) IE-8 (hence the question about the compatibility mode).\\n\\nCan you try the following:\\n1. Open the playground in its own web page: http://<VMIP>:8010/esp/files/stub.htm?Widget=ECLPlaygroundWidget\\n2. Press F12 and ensure your browser is in IE10/Standards mode (see attachement).\\n\\nThanks,\\n\\nGordon.\", \"post_time\": \"2013-11-12 11:21:36\" },\n\t{ \"post_id\": 4918, \"topic_id\": 1109, \"forum_id\": 17, \"post_subject\": \"Re: ECL Playground is blank\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"Compatibility View isn't ON.\", \"post_time\": \"2013-11-12 06:11:35\" },\n\t{ \"post_id\": 4917, \"topic_id\": 1109, \"forum_id\": 17, \"post_subject\": \"Re: ECL Playground is blank\", \"username\": \"gsmith\", \"post_text\": \"Is "compatability mode" on?\\n\\n\\nIf so can you turn it off and see if that helps?\", \"post_time\": \"2013-11-12 06:00:45\" },\n\t{ \"post_id\": 4916, \"topic_id\": 1109, \"forum_id\": 17, \"post_subject\": \"Re: ECL Playground is blank\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"1. I am using http://<VMIP>:8010/ to connect to ECL Watch\\n2. I am using IE version 10\\n3. Yes, there are error messages in the console - given below\\n\\nSCRIPT1010: Expected identifier \\nTargetSelectWidget.js, line 198 character 68\\n\\nSCRIPT1010: Expected identifier \\nESPWorkunit.js, line 217 character 81\\n\\ndojo/parser::parse() error[object Error]\\n\\nIn both cases, it seems to error out in the lines referring to 'TpLogicalClusterQueryResponse'\\n\\nRegards,\\nGayathri\", \"post_time\": \"2013-11-12 05:45:52\" },\n\t{ \"post_id\": 4914, \"topic_id\": 1109, \"forum_id\": 17, \"post_subject\": \"Re: ECL Playground is blank\", \"username\": \"gsmith\", \"post_text\": \"I was going to ask:\\n1. What URL are you using?\\n2. What browser version are you using?\\n\\nAre there any console messages (Press F12 and click on the console tab - available in most browsers).\", \"post_time\": \"2013-11-11 13:43:23\" },\n\t{ \"post_id\": 4913, \"topic_id\": 1109, \"forum_id\": 17, \"post_subject\": \"Re: ECL Playground is blank\", \"username\": \"bforeman\", \"post_text\": \"Hi Gayathri,\\n\\nI just tested my community_4.0.2-2 VM, and the ECL Playground is showing up fine in both Firefox and IE. I don't see any open issues reported. Have you tried a different browser?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-11-11 13:40:21\" },\n\t{ \"post_id\": 4910, \"topic_id\": 1109, \"forum_id\": 17, \"post_subject\": \"ECL Playground is blank\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"Ever since I started using VM-4.0.2-2, my ECL Playground page is showing as blank after a brief 'loading' message. Any ideas why?\\n\\nThanks,\\nGayathri\", \"post_time\": \"2013-11-11 11:25:35\" },\n\t{ \"post_id\": 5984, \"topic_id\": 1374, \"forum_id\": 17, \"post_subject\": \"Re: Changed IP address to an aliase\", \"username\": \"rajesh.dorairaj\", \"post_text\": \"Hi Smith,\\nThanks!\\nMy bad, all the services were stopped. Once I started, I am able to access with alias name and also with IP address.\\nThanks for your suggestion.\", \"post_time\": \"2014-06-30 07:07:07\" },\n\t{ \"post_id\": 5983, \"topic_id\": 1374, \"forum_id\": 17, \"post_subject\": \"Re: Changed IP address to an aliase\", \"username\": \"gsmith\", \"post_text\": \"To check if its running:\\nsudo service hpcc-init status\\n\\nTo Start\\nsudo service hpcc-init start\", \"post_time\": \"2014-06-30 07:00:48\" },\n\t{ \"post_id\": 5982, \"topic_id\": 1374, \"forum_id\": 17, \"post_subject\": \"Re: Changed IP address to an aliase\", \"username\": \"rajesh.dorairaj\", \"post_text\": \"Hi Smith,\\nI tried accessing ECL Watch on the same machine as ESP (http://localhost:8010/). It didn't work.\\nCould you please let me know on how to check whether ESP has started.\", \"post_time\": \"2014-06-30 06:59:33\" },\n\t{ \"post_id\": 5981, \"topic_id\": 1374, \"forum_id\": 17, \"post_subject\": \"Re: Changed IP address to an aliase\", \"username\": \"gsmith\", \"post_text\": \"This sounds like a network configuration issue.\\n\\nIf you are unable to access ECL Watch via http://xx.xxx.xx.xxx:8010/ then I would:\\n1. Check ESP is started\\n2. Check you can access ECL Watch on the same machine as ESP (http://localhost:8010/)\\n3. Double check the IP address for ESP (it may have changed when you changed the alias?).\\n4. On the remote machine do a "tracert" to the IP address (this will ensure your network connectivity allows you to access the remote machine).\\n5. Ensure there is no firewalls preventing access to the IP:PORT adddress\\n\\nOnce you have regained access to http://xx.xxx.xx.xxx:8010/ then I would look at testing the alias.\\n\\nNote: If you can't access ECL Watch in the web browser, then the IDE will NOT be able to access the WebServices either (IOW get the browser working first).\", \"post_time\": \"2014-06-30 06:48:09\" },\n\t{ \"post_id\": 5980, \"topic_id\": 1374, \"forum_id\": 17, \"post_subject\": \"Changed IP address to an aliase\", \"username\": \"rajesh.dorairaj\", \"post_text\": \"Dear Team,\\n\\nRecently we created an alias for LINUX machine where HPCCSystem is installed. \\nI am not able to access ESP from the time alias was created. \\nI tried by accessing using IP http://xx.xxx.xx.xxx:8010/ and also using http://alias:8010. In any case I am not able to access ESP and not able to login to ECL IDE.\\n\\nPlease help me to resolve that.\", \"post_time\": \"2014-06-30 06:39:00\" },\n\t{ \"post_id\": 6031, \"topic_id\": 1383, \"forum_id\": 17, \"post_subject\": \"HPCC Systems 5.0 new ECL Watch\", \"username\": \"HPCC Staff\", \"post_text\": \"There are some major changes to the ECL Watch interface in HPCC Systems 5.0 which is now displayed as the default version in this release. \\n\\nTo help you adjust to the new interface, we have created the HPCC ECL Watch 5.0 Transition Guide. https://wiki.hpccsystems.com/display/hp ... tion+Guide\\n\\nExisting users will find managing the transition from the legacy version easier using the Quick guide for users upgrading from HPCC 4.x. It is designed to help you find features which have changed location and provides information about new features we have added: https://wiki.hpccsystems.com/display/hp ... m+HPCC+4.x \\n\\nNew ECL Watch 5.0 Feature Highlights\\n\\n•\\tAccess to different areas of the system while viewing workunit’s/files is now seamless. For example, when viewing an ECL Workunit/DFU Workunit, additional tabs and menus provide access to actions and graphs, results, logical files, query information etc without having to close windows or retrace your steps. \\n•\\tMultiple workunit’s can be opened which are displayed on tabs and you can move between them also in a seamless fashion keeping as many open as you need to complete your work.\\n•\\tSignificantly improved queries area with icons showing status, and tabs giving direct access to WU and logical file information.\\n•\\tDirect access to test pages such as Legacy WS-ECL form for published queries\\n•\\tSuperfiles and logical files associated with published queries are now clearly listed. This information is available from the workunit associated with a query, the query details page and when viewing the query details page using the logical file details page.\\n•\\tVisualisations of results for a number of chart types\\n•\\tMultiple files of the same type can be sprayed in one action.\\n•\\tView by Scope switch for toggling between viewing the list of logical files by logical name or by scope.\\n•\\tGlobal search facility\\n•\\tHex previewer for viewing the contents of files on the landing zone\\n•\\tAdd File utility for adding files stored on another machine or HPCC landing zone\\n•\\tImproved Permissions area which is now located in the Operations section.\", \"post_time\": \"2014-07-11 13:09:01\" },\n\t{ \"post_id\": 6069, \"topic_id\": 1393, \"forum_id\": 17, \"post_subject\": \"Re: Not able to access graphs from Workunits\", \"username\": \"bforeman\", \"post_text\": \"The graph control should only be installed on the computer with the web browser. What version is your cluster and what browser are you using? I would try a re-install first.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-07-21 12:21:49\" },\n\t{ \"post_id\": 6062, \"topic_id\": 1393, \"forum_id\": 17, \"post_subject\": \"Not able to access graphs from Workunits\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nI am not able to access the graphs from the "Browse Workunits" section in ESP Page.\\n\\nEach time i click on the graphs , get a pop up telling \\n\\n
\\n"Graph Control needs to be installed to visualize activity graphs"\\n
\\n\\nI have installed the graph control plugin and placed the same in installation folder of ECL IDE, still get the same error.\\n\\nDo we need to do any other additional setting changes to view the graphs from the ESP page ? \\n\\nI am able to view the graphs from ECL IDE.\\n\\nThanks,\\nViswa\", \"post_time\": \"2014-07-17 16:16:31\" },\n\t{ \"post_id\": 6176, \"topic_id\": 1408, \"forum_id\": 17, \"post_subject\": \"Re: Spray Delimited Not Parsing Fields\", \"username\": \"rtaylor\", \"post_text\": \"fmorstatter,When I spray the file and look at it in ECL watch, I see only two fields: "line", and "__fileposition__".
Spray is a "dumb" operation. Its mission is to get the data onto your cluster as quickly as possible and its only real intelligence is to make sure that a single record never spans multiple nodes.\\n\\nWhen you are spraying a CSV file, the Spray operation itself doesn't know or care what the field structure of the file is. Therefore, when you use ECL Watch to "View Data File" the DFU has no metadata about the field structure, which is why you see the data just as "Line" and "fileposition."\\n\\nAs Bob pointed out, in 5.0 the Delimited spray (AKA: CSV) now has the option of reading the first record for the field names and giving you a RECORD structure that you can copy and use in your ECL code that works with that data, saving you having to type it all in, but it still does not put that information into the DFU's metadata about the file.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-08-05 14:35:18\" },\n\t{ \"post_id\": 6171, \"topic_id\": 1408, \"forum_id\": 17, \"post_subject\": \"Re: Spray Delimited Not Parsing Fields\", \"username\": \"bforeman\", \"post_text\": \"Hello,\\n\\nWhen I sprayed your sample using " as the Quote character, I was able to read in your fields of your sample file correctly. \\n\\nIf you are using the 5.0 ECL Watch, the Delimited Spray Option has a check box that says "Record Structure Present". Checking that box for the spray yielded the following result from your sample file:\\n\\nRECORD\\n STRING created_at;\\n STRING entities_user_mentions;\\n STRING entities_hashtags;\\n STRING entities_urls;\\n STRING favorite_count;\\n STRING favorited;\\n STRING filter_level;\\n STRING geotagged;\\n STRING id_str;\\n STRING in_reply_to_screen_name;\\n STRING in_reply_to_status_id_str;\\n STRING in_reply_to_user_id_str;\\n STRING lang;\\n STRING lat;\\n STRING lng;\\n STRING place_country;\\n STRING place_country_code;\\n STRING place_full_name;\\n STRING place_id;\\n STRING place_name;\\n STRING place_place_type;\\n STRING place_url;\\n STRING possibly_sensitive;\\n STRING retweet_count;\\n STRING retweeted;\\n STRING retweeted_status_created_at;\\n STRING retweeted_status_entities_user_mentions;\\n STRING retweeted_status_entities_hashtags;\\n STRING retweeted_status_entities_urls;\\n STRING retweeted_status_favorite_count;\\n STRING retweeted_status_favorited;\\n STRING retweeted_status_geotagged;\\n STRING retweeted_status_id_str;\\n STRING retweeted_status_in_reply_to_screen_name;\\n STRING retweeted_status_in_reply_to_status_id_str;\\n STRING retweeted_status_in_reply_to_user_id_str;\\n STRING retweeted_status_lang;\\n STRING retweeted_status_lat;\\n STRING retweeted_status_lng;\\n STRING retweeted_status_place_country;\\n STRING retweeted_status_place_country_code;\\n STRING retweeted_status_place_full_name;\\n STRING retweeted_status_place_id;\\n STRING retweeted_status_place_name;\\n STRING retweeted_status_place_place_type;\\n STRING retweeted_status_place_url;\\n STRING retweeted_status_possibly_sensitive;\\n STRING retweeted_status_retweet_count;\\n STRING retweeted_status_retweeted;\\n STRING retweeted_status_source;\\n STRING retweeted_status_text;\\n STRING retweeted_status_truncated;\\n STRING retweeted_status_user_contributors_enabled;\\n STRING retweeted_status_user_created_at;\\n STRING retweeted_status_user_default_profile;\\n STRING retweeted_status_user_default_profile_image;\\n STRING retweeted_status_user_description;\\n STRING retweeted_status_user_favourites_count;\\n STRING retweeted_status_user_followers_count;\\n STRING retweeted_status_user_following;\\n STRING retweeted_status_user_friends_count;\\n STRING retweeted_status_user_geo_enabled;\\n STRING retweeted_status_user_id_str;\\n STRING retweeted_status_user_is_translation_enabled;\\n STRING retweeted_status_user_is_translator;\\n STRING retweeted_status_user_lang;\\n STRING retweeted_status_user_listed_count;\\n STRING retweeted_status_user_location;\\n STRING retweeted_status_user_name;\\n STRING retweeted_status_user_notifications;\\n STRING retweeted_status_user_profile_background_color;\\n STRING retweeted_status_user_profile_background_image_url;\\n STRING retweeted_status_user_profile_background_image_url_https;\\n STRING retweeted_status_user_profile_background_tile;\\n STRING retweeted_status_user_profile_banner_url;\\n STRING retweeted_status_user_profile_image_url;\\n STRING retweeted_status_user_profile_link_color;\\n STRING retweeted_status_user_profile_sidebar_border_color;\\n STRING retweeted_status_user_profile_sidebar_fill_color;\\n STRING retweeted_status_user_profile_text_color;\\n STRING retweeted_status_user_profile_use_background_image;\\n STRING retweeted_status_user_protected;\\n STRING retweeted_status_user_screen_name;\\n STRING retweeted_status_user_statuses_count;\\n STRING retweeted_status_user_time_zone;\\n STRING retweeted_status_user_url;\\n STRING retweeted_status_user_utc_offset;\\n STRING retweeted_status_user_verified;\\n STRING source;\\n STRING text;\\n STRING truncated;\\n STRING user_contributors_enabled;\\n STRING user_created_at;\\n STRING user_default_profile;\\n STRING user_default_profile_image;\\n STRING user_description;\\n STRING user_favourites_count;\\n STRING user_follow_request_sent;\\n STRING user_followers_count;\\n STRING user_following;\\n STRING user_friends_count;\\n STRING user_geo_enabled;\\n STRING user_id_str;\\n STRING user_is_translation_enabled;\\n STRING user_is_translator;\\n STRING user_lang;\\n STRING user_listed_count;\\n STRING user_location;\\n STRING user_name;\\n STRING user_notifications;\\n STRING user_profile_background_color;\\n STRING user_profile_background_image_url;\\n STRING user_profile_background_tile;\\n STRING user_profile_image_url;\\n STRING user_profile_link_color;\\n STRING user_profile_sidebar_border_color;\\n STRING user_profile_sidebar_fill_color;\\n STRING user_profile_text_color;\\n STRING user_profile_use_background_image;\\n STRING user_protected;\\n STRING user_screen_name;\\n STRING user_statuses_count;\\n STRING user_time_zone;\\n STRING user_url;\\n STRING user_utc_offset;\\n STRING user_verified;\\n STRING field127;\\nEND;
\\n\\nNow in versions prior to 5.0, when you look at the Details of the sprayed sample, you will see Field1, Field2, ....Field127 in the ECL Watch details instead.\\n\\nI am checking to see if there was an option in the DFUPlus utility that enabled this result.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-08-05 12:25:34\" },\n\t{ \"post_id\": 6157, \"topic_id\": 1408, \"forum_id\": 17, \"post_subject\": \"Spray Delimited Not Parsing Fields\", \"username\": \"fmorstatter\", \"post_text\": \"Hello,\\n\\nI am trying to spray a CSV file. When I spray the file and look at it in ECL watch, I see only two fields: "line", and "__fileposition__". Once I read it in through ECL and Output the dataset again, the fields are parsed. Another thing I've noticed is that the original file I spray is 9,806 bytes, while the re-sprayed file is 78,930 bytes.\\n\\nDo you have any what could cause this behavior? Can I make it so that the fields are parsed on the first spray?\\n\\n-Fred\\n\\nExtra materials: \\n1) A sample of 20 lines from the file I'm trying to spray: 2012_01.20lines.csv.txt. I had to add the extension .txt to satisfy the forum system.\\n2) The ECL code I use to "re-spray" the file, which causes the output file to be parsed:\\n\\nimport $;\\nOUTPUT(DISTRIBUTE($.tweets, HASH32($.tweets.id_str)) ,, 'all6');\\n\\n3) A screenshot of the initial "two-field" configuration: http://i.imgur.com/0vVfBlC.png.\", \"post_time\": \"2014-08-04 14:27:13\" },\n\t{ \"post_id\": 6537, \"topic_id\": 1492, \"forum_id\": 17, \"post_subject\": \"Re: Timings\", \"username\": \"bforeman\", \"post_text\": \"Hi Keren,\\n\\nThere is a new video on the HPCC Systems web site that discusses the new 5.0 Graph control:\\n\\nhttp://hpccsystems.com/community/training-videos/ecl-watch-interface\\n\\nAs the video shows, the new "heat" graph can be used to identify hot spots and select timings directly. It's quite useful and very in depth.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-10-30 14:20:20\" },\n\t{ \"post_id\": 6529, \"topic_id\": 1492, \"forum_id\": 17, \"post_subject\": \"Timings\", \"username\": \"kereno\", \"post_text\": \"Hello,\\n\\nI am looking at a query graph in HPCC v 5 and would be interested to see the timings for each subcomponent of the graph. For example, how long did the query spend in the "local sort" component, or in the distribute? \\n\\nAlso, is there documentation about the graph visualization tool?\\n\\nThank you,\\nKeren\", \"post_time\": \"2014-10-29 19:04:56\" },\n\t{ \"post_id\": 6879, \"topic_id\": 1522, \"forum_id\": 17, \"post_subject\": \"Re: Status during spray\", \"username\": \"Lee_Meadows\", \"post_text\": \"I submitted. I can not do any screenshots from my systems.\\n\\nhttps://track.hpccsystems.com/browse/HD-2\", \"post_time\": \"2015-01-30 21:01:54\" },\n\t{ \"post_id\": 6877, \"topic_id\": 1522, \"forum_id\": 17, \"post_subject\": \"Re: Status during spray\", \"username\": \"rtaylor\", \"post_text\": \"Lee,\\n\\nYou should submit a JIRA ticket on that.\\n\\nRichard\", \"post_time\": \"2015-01-29 21:52:13\" },\n\t{ \"post_id\": 6876, \"topic_id\": 1522, \"forum_id\": 17, \"post_subject\": \"Re: Status during spray\", \"username\": \"Lee_Meadows\", \"post_text\": \"JimD, \\n\\n Are you sure about that? Because I did sprays with replication on and off.\\n\\n My Percent Done will be at 100% when my Progress is at 10%. If it were replication, wouldn't it be 100% / 50%.\\n I just pulled up Legacy ECLwatch and Percent Done and Progress Message match. Switch over to new ECL watch and it's at 100% while Progress still going.\\n \\n Looks like to me that it's a code bug with a decimal point in the wrong place. \\n\\nusing community v5.0.4-1 el6\\n\\nLee\", \"post_time\": \"2015-01-29 21:46:57\" },\n\t{ \"post_id\": 6650, \"topic_id\": 1522, \"forum_id\": 17, \"post_subject\": \"Re: Status during spray\", \"username\": \"JimD\", \"post_text\": \"The Percent Done is for the current task and the Progress is for the entire task. \\n\\nWhen replication is enabled, spraying a file has two steps--It first sprays to the primary location, then copies from that node to the mirror(s).\\n\\nIn other words, in a typical multi-node Thor with N+1 replication, when the file spray completes to the primary data location, the Percent Done would be 100% and the Progress is 50% until it copies to the mirror.\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2014-12-01 20:47:56\" },\n\t{ \"post_id\": 6647, \"topic_id\": 1522, \"forum_id\": 17, \"post_subject\": \"Re: Status during spray\", \"username\": \"bforeman\", \"post_text\": \"They should both mean the same thing Srini, one is text based and the other graphical. The disparity of the display may be due to the frequency of the refresh. You might want to open a JIRA issue to solicit comments from the developers.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-12-01 13:27:43\" },\n\t{ \"post_id\": 6626, \"topic_id\": 1522, \"forum_id\": 17, \"post_subject\": \"Status during spray\", \"username\": \"omnibuzz\", \"post_text\": \"[attachment=0:2fgili94]Progress.png\\nAs you see above, when I am spraying a file, I get 2 statuses. Percent done and Progress message. \\nWhat does each mean? I see the percent done quickly reaching 100% but the progress is slower. \\nThanks\\nSrini\", \"post_time\": \"2014-11-21 21:19:42\" },\n\t{ \"post_id\": 6632, \"topic_id\": 1524, \"forum_id\": 17, \"post_subject\": \"Re: Ws_Sql Invalid table name or file type not supported: te\", \"username\": \"bobl\", \"post_text\": \"This problem has been resolved\", \"post_time\": \"2014-11-25 01:03:13\" },\n\t{ \"post_id\": 6628, \"topic_id\": 1524, \"forum_id\": 17, \"post_subject\": \"Ws_Sql Invalid table name or file type not supported: test\", \"username\": \"bobl\", \"post_text\": \"Hi,\\n\\nI run into a problem with Ws_SQL Web Services. When I use the PrepareSQL method soap test. I can't query table.\\n\\nRequest:\\n
<?xml version="1.0" encoding="UTF-8"?>\\n<soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/" xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/" xmlns="urn:hpccsystems:ws:ws_sql">\\n <soap:Body>\\n <PrepareSQLRequest>\\n <SqlText>select * from test</SqlText>\\n <TargetCluster>hthor</TargetCluster>\\n <TargetQuerySet/>\\n <Wait>-1</Wait>\\n </PrepareSQLRequest>\\n </soap:Body>\\n</soap:Envelope>
\\n\\nResponse:\\n<?xml version="1.0" encoding="utf-8"?>\\n<soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/" xmlns:wsse="http://schemas.xmlsoap.org/ws/2002/04/secext">\\n <soap:Body>\\n <PrepareSQLResponse xmlns="urn:hpccsystems:ws:ws_sql">\\n <Exceptions>\\n <Exception>\\n <Code>-1</Code>\\n <Audience>user</Audience>\\n <Source>CSoapResponseBinding</Source>\\n <Message>2014-11-24 06:32:30 GMT: Invalid table name or file type not supported: test\\n</Message>\\n </Exception>\\n </Exceptions>\\n </PrepareSQLResponse>\\n </soap:Body>\\n</soap:Envelope>
\\n\\nfollowing are the GetDBMetaData\\n<?xml version="1.0" encoding="utf-8"?>\\n<soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/" xmlns:wsse="http://schemas.xmlsoap.org/ws/2002/04/secext">\\n <soap:Body>\\n <GetDBMetaDataResponse xmlns="urn:hpccsystems:ws:ws_sql">\\n <TableCount>0</TableCount>\\n <QuerySets>\\n <QuerySet>\\n <Name>hthor</Name>\\n <QuerySetQueries>\\n <QuerySetQuery>\\n <Name>test</Name>\\n <Id>test.1</Id>\\n <Wuid>W20141124-101734</Wuid>\\n <Suspended>1</Suspended>\\n <Signature>\\n <ResultSets>\\n <ResultSet>\\n <name>Result 1</name>\\n <OutParams>\\n <OutParam>\\n <Name>personid</Name>\\n <Type>unsigned1</Type>\\n </OutParam>\\n <OutParam>\\n <Name>name</Name>\\n <Type>string25</Type>\\n </OutParam>\\n <OutParam>\\n <Name>age</Name>\\n <Type>integer8</Type>\\n </OutParam>\\n <OutParam>\\n <Name>height</Name>\\n <Type>integer8</Type>\\n </OutParam>\\n <OutParam>\\n <Name>address</Name>\\n <Type>string25</Type>\\n </OutParam>\\n </OutParams>\\n </ResultSet>\\n </ResultSets>\\n </Signature>\\n </QuerySetQuery>\\n <QuerySetQuery>\\n <Name>bobtest</Name>\\n <Id>bobtest.1</Id>\\n <Wuid>W20141124-134421</Wuid>\\n <Suspended>0</Suspended>\\n <Signature>\\n <ResultSets>\\n <ResultSet>\\n <name>Result 1</name>\\n <OutParams>\\n <OutParam>\\n <Name>personid</Name>\\n <Type>unsigned1</Type>\\n </OutParam>\\n <OutParam>\\n <Name>firstname</Name>\\n <Type>string15</Type>\\n </OutParam>\\n <OutParam>\\n <Name>lastname</Name>\\n <Type>string25</Type>\\n </OutParam>\\n </OutParams>\\n </ResultSet>\\n </ResultSets>\\n </Signature>\\n </QuerySetQuery>\\n </QuerySetQueries>\\n <QuerySetAliases>\\n <QuerySetAlias>\\n <Id>test.1</Id>\\n <Name>test</Name>\\n </QuerySetAlias>\\n <QuerySetAlias>\\n <Id>bobtest.1</Id>\\n <Name>bobtest</Name>\\n </QuerySetAlias>\\n </QuerySetAliases>\\n </QuerySet>\\n <QuerySet>\\n <Name>thor</Name>\\n </QuerySet>\\n <QuerySet>\\n <Name>roxie</Name>\\n </QuerySet>\\n <QuerySet>\\n <Name>thor_roxie</Name>\\n </QuerySet>\\n </QuerySets>\\n <ClusterNames>\\n <ClusterName>hthor</ClusterName>\\n <ClusterName>thor</ClusterName>\\n <ClusterName>roxie</ClusterName>\\n <ClusterName>thor_roxie</ClusterName>\\n </ClusterNames>\\n </GetDBMetaDataResponse>\\n </soap:Body>\\n</soap:Envelope>
\\n\\nfollowing are the create 'test' table code,\\n\\nLayout_Person := RECORD\\n UNSIGNED1 PersonID;\\n STRING25 Name;\\n INTEGER age;\\n INTEGER height;\\n STRING25 address;\\n \\nEND;\\n\\nt := DATASET([ {1,'bob',23,180,'xian'},\\n {2,'ivy',28,160,'gaolin'},\\n {3,'leo',28,180,'xian'},\\n {4,'neal',35,175,'tangyanlu'},\\n {5,'owen',32,170,'tianshui'},\\n {6,'jake',34,170,'wuhanxiangyang'}],Layout_Person);\\n\\ntest := t;\\n\\n// Outputs ---\\ntest;
\\n\\nI am not able to figure out what I am doing wrong. and I don't know how to set the tablename. \\n\\nThanks\\nBob\", \"post_time\": \"2014-11-24 06:40:48\" },\n\t{ \"post_id\": 7706, \"topic_id\": 1748, \"forum_id\": 17, \"post_subject\": \"Re: Logical Files is Blank\", \"username\": \"georgeb2d\", \"post_text\": \"Baggett, Don\", \"post_time\": \"2015-06-01 17:32:44\" },\n\t{ \"post_id\": 7705, \"topic_id\": 1748, \"forum_id\": 17, \"post_subject\": \"Re: Logical Files is Blank\", \"username\": \"rtaylor\", \"post_text\": \"One of our guys wants to look at your screen on Lync. Can you please give us your full name so he can contact you?\\n\\nThanks,\\n\\nRichard\", \"post_time\": \"2015-06-01 17:31:37\" },\n\t{ \"post_id\": 7704, \"topic_id\": 1748, \"forum_id\": 17, \"post_subject\": \"Re: Logical Files is Blank\", \"username\": \"georgeb2d\", \"post_text\": \"No change. Thanks for the suggestion.\", \"post_time\": \"2015-06-01 17:28:45\" },\n\t{ \"post_id\": 7703, \"topic_id\": 1748, \"forum_id\": 17, \"post_subject\": \"Re: Logical Files is Blank\", \"username\": \"bforeman\", \"post_text\": \"I'm glad at least you can see files in the Legacy ECL Watch.\\n\\nTry clearing your cache on your browser (i.e., F5)- that worked for some of my students in the classroom.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-06-01 16:53:16\" },\n\t{ \"post_id\": 7702, \"topic_id\": 1748, \"forum_id\": 17, \"post_subject\": \"Re: Logical Files is Blank\", \"username\": \"georgeb2d\", \"post_text\": \"I just tried another computer. Same result. I was able to go to legacy and get the file list.\", \"post_time\": \"2015-06-01 15:21:15\" },\n\t{ \"post_id\": 7701, \"topic_id\": 1748, \"forum_id\": 17, \"post_subject\": \"Re: Logical Files is Blank\", \"username\": \"rtaylor\", \"post_text\": \"Does it happen when you login on another computer, or just yours?\", \"post_time\": \"2015-06-01 15:11:29\" },\n\t{ \"post_id\": 7699, \"topic_id\": 1748, \"forum_id\": 17, \"post_subject\": \"Re: Logical Files is Blank\", \"username\": \"georgeb2d\", \"post_text\": \"It seems to be peculiar to my login. My login has been deleted and restored. Still no change.\", \"post_time\": \"2015-06-01 14:38:35\" },\n\t{ \"post_id\": 7698, \"topic_id\": 1748, \"forum_id\": 17, \"post_subject\": \"Re: Logical Files is Blank\", \"username\": \"bforeman\", \"post_text\": \"What ECL watch version are you using? 4.x or 5.x?\\nIf you are using the 5.x version, try Browsing Logical Files from the Legacy ECL Watch.\\n\\nI just tested it, and although I saw a delay in Loading, the files eventually did display.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-06-01 14:24:41\" },\n\t{ \"post_id\": 7678, \"topic_id\": 1748, \"forum_id\": 17, \"post_subject\": \"Logical Files is Blank\", \"username\": \"georgeb2d\", \"post_text\": \"I am on Alpha_DEV_Thor.\\n\\nAbout 5 PM yesterday I needed to go to logical files and they went blank. I am not able to see any logical files. It worked for about five minutes this morning. Then I went to a particular file and then it went blank again. \\n\\nTHis occurs on Chrome, IE, and Waterfox. \\n\\nApparently everyone else is fine. \\n\\nThis is driving me crazy. Please assist.\", \"post_time\": \"2015-05-28 14:42:20\" },\n\t{ \"post_id\": 7752, \"topic_id\": 1765, \"forum_id\": 17, \"post_subject\": \"Re: Changing ECL Watch language\", \"username\": \"Mauricio\", \"post_text\": \"[quote="gsmith":1f4uxpeb]The ECL Watch locale is triggered by the language that the Client Web Browser uses, not what the server uses.\\n\\nThank you! \", \"post_time\": \"2015-06-13 16:24:20\" },\n\t{ \"post_id\": 7751, \"topic_id\": 1765, \"forum_id\": 17, \"post_subject\": \"Re: Changing ECL Watch language\", \"username\": \"gsmith\", \"post_text\": \"The ECL Watch locale is triggered by the language that the Client Web Browser uses, not what the server uses.\", \"post_time\": \"2015-06-13 15:08:09\" },\n\t{ \"post_id\": 7750, \"topic_id\": 1765, \"forum_id\": 17, \"post_subject\": \"Changing ECL Watch language\", \"username\": \"Mauricio\", \"post_text\": \"Hello, I have just recently started learning how to use the HPCC System but I wanted to use it in another language rather than English.\\n\\nI have been using HPCCSystemsVM-5.2.2-1-vmx.ova on VMWare.\\n\\nI listed every language option by running \\n
locale -a
\\n\\nthen\\n\\nsudo locale-gen pt_BR.UTF-8
\\n\\nto add Brazilian portuguese and then finally changed the file /etc/default/locale to:\\n\\n\\nLANG=pt_BR.utf8\\nLANGUAGE=pt_BR:\\n
\\n\\nBut even after all that it is still using the //opt/HPCCSystem/componentfiles/files/eclwatch/nls/hpcc_en-us.js file to display every text on ECL Watch.\\n\\nAny suggestions?\\n\\nThank you!\", \"post_time\": \"2015-06-13 13:48:38\" },\n\t{ \"post_id\": 7794, \"topic_id\": 1769, \"forum_id\": 17, \"post_subject\": \"Re: WsWorkunitsGetGraph\", \"username\": \"bforeman\", \"post_text\": \"First, try the query again on another cluster if possible, and see if you get the same error.\\n\\nSecond, try another query on the same cluster where you saw the error and see if it is consistent for all queries on that cluster.\\n\\nHow does your ECL IDE version differ from the ECL Watch version?\\n\\nThe error text suggests that the server is missing some files / bad build – but I am doubtful that this would be the case here. \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-06-18 13:00:58\" },\n\t{ \"post_id\": 7767, \"topic_id\": 1769, \"forum_id\": 17, \"post_subject\": \"WsWorkunitsGetGraph\", \"username\": \"georgeb2d\", \"post_text\": \"In my ECL IDE I am able to see the graph 11 for my current Workunit. W20150615-134201. \\n\\nHowever on ECL watch for Alpha_Dev_thor on the same graph this is what I get:\\nWsWorkunits.WUGetGraph\\n\\n<h3>this._plugin.mergeXGMML is not a function</h3><p>.cache["hpcc/GraphWidget"]/</<.loadXGMML@http://10.194.10.2:8010/esp/files/eclwatch/hpcc.js:693:19 .mergeGraphFromXGMML@http://10.194.10.2:8010/esp/files/eclwatch/GraphPageWidget.js:21:468 .refreshGraphFromWU/<@http://10.194.10.2:8010/esp/files/eclwatch/GraphPageWidget.js:23:311 .cache["hpcc/ESPWorkunit"]/</g<.fetchGraphXgmml/<.load@http://10.194.10.2:8010/esp/files/eclwatch/hpcc.js:594:158 .cache["hpcc/ESPRequest"]/</s<._send/<@http://10.194.10.2:8010/esp/files/eclwatch/hpcc.js:394:444 .cache["dojo/Deferred"]/</m@http://10.194.10.2:8010/esp/files/eclwatch/hpcc.js:103:1 .cache["dojo/Deferred"]/</r@http://10.194.10.2:8010/esp/files/eclwatch/hpcc.js:102:430 .cache["dojo/Deferred"]/</s/this.resolve@http://10.194.10.2:8010/esp/files/eclwatch...
\\n\\nWhat do I need to do?\", \"post_time\": \"2015-06-15 18:58:22\" },\n\t{ \"post_id\": 8066, \"topic_id\": 1775, \"forum_id\": 17, \"post_subject\": \"Re: ECLWatch will not delete a logical file\", \"username\": \"kovacsbv\", \"post_text\": \"Turns out this was a bug. It's fixed in JIRA 13981, which is incorporated in version 5.4.0. You cannot delete logical files starting with a digit in versions of HPCC before this fix without using special tools.\", \"post_time\": \"2015-09-01 21:21:46\" },\n\t{ \"post_id\": 7799, \"topic_id\": 1775, \"forum_id\": 17, \"post_subject\": \"Re: ECLWatch will not delete a logical file\", \"username\": \"kovacsbv\", \"post_text\": \"No.\", \"post_time\": \"2015-06-18 13:35:06\" },\n\t{ \"post_id\": 7798, \"topic_id\": 1775, \"forum_id\": 17, \"post_subject\": \"Re: ECLWatch will not delete a logical file\", \"username\": \"rtaylor\", \"post_text\": \"Next question: Is this file used by a query that you have published to a Roxie in the same environment?\", \"post_time\": \"2015-06-18 13:33:33\" },\n\t{ \"post_id\": 7797, \"topic_id\": 1775, \"forum_id\": 17, \"post_subject\": \"Re: ECLWatch will not delete a logical file\", \"username\": \"kovacsbv\", \"post_text\": \"I went into ECLWatch, and had two superfiles in Thor (which are normally transitory). I cleared them both out, and filtered by "Not in superfiles". The same file was available. I then tried re-deleting it, and got the same error.\", \"post_time\": \"2015-06-18 13:26:03\" },\n\t{ \"post_id\": 7796, \"topic_id\": 1775, \"forum_id\": 17, \"post_subject\": \"Re: ECLWatch will not delete a logical file\", \"username\": \"rtaylor\", \"post_text\": \"The word "SuperOwnerLock" in the error message makes me wonder if the file you're trying to delete is a sub-file in a superfile. If so, that's the reason. You need to first remove the file from the superfile before deleting it.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-06-18 13:10:43\" },\n\t{ \"post_id\": 7793, \"topic_id\": 1775, \"forum_id\": 17, \"post_subject\": \"ECLWatch will not delete a logical file\", \"username\": \"kovacsbv\", \"post_text\": \"Hi, I am trying to delete a logical file from ECLWatch and I get errors when trying to do so.\\n\\n.::2015061106.boca__browse\\n\\nCannot delete .::2015061106.boca__browse on thor: [ 25: SDS: IPropertyTree exception SDS Reply Error : SDS: IPropertyTree exception IPropertyTree: xpath parse error XPath Exception: Qualifier expected e.g. [..] in xpath = 2015061106_46boca_95_95browse ^ in xpath '/_Locks/SuperOwnerLock/2015061106_46boca_95_95browse']
\\n\\n\\nI tried it with dfuplus as well:\\n\\n\\n\\n$ dfuplus action=remove server=http://10.0.1.2 name=.::2015061106.boca__browse dstcluster=thor username="username" password='secret'\\n\\nRemoving .::2015061106.boca__browse\\nCannot delete .::2015061106.boca__browse: [ 25: SDS: IPropertyTree exception\\nSDS Reply Error : SDS: IPropertyTree exception\\nIPropertyTree: xpath parse error\\nXPath Exception: Qualifier expected e.g. [..]\\nin xpath = 2015061106_46boca_95_95browse\\n ^ in xpath '/_Locks/SuperOwnerLock/2015061106_46boca_95_95browse']\\n\\n
\\n\\nI also tried it with ~.::, ~::, and ~ for the prefix.\\n\\nCan anybody suggest a solution? Anybody know what this error means?\", \"post_time\": \"2015-06-18 12:58:30\" },\n\t{ \"post_id\": 7807, \"topic_id\": 1777, \"forum_id\": 17, \"post_subject\": \"Re: Accessing log files\", \"username\": \"alex\", \"post_text\": \"Ok, I found it. I see the list of names for the log, but when I select it I get a Page Not Found.\\n\\nETA: Turns out if I double click it instead of pressing the select button, I can open the file (that's not intuitive). So why doesn't it work from the Topology page?\", \"post_time\": \"2015-06-19 14:22:38\" },\n\t{ \"post_id\": 7806, \"topic_id\": 1777, \"forum_id\": 17, \"post_subject\": \"Re: Accessing log files\", \"username\": \"bforeman\", \"post_text\": \"When you open Cluster Processes, click on the appropriate cluster link to open it. the icon to open the logs is in the Name column.\\n\\nBob\", \"post_time\": \"2015-06-19 14:18:01\" },\n\t{ \"post_id\": 7805, \"topic_id\": 1777, \"forum_id\": 17, \"post_subject\": \"Re: Accessing log files\", \"username\": \"alex\", \"post_text\": \"Hi Bob. \\n\\nI don't see a way to get to the logs from Operations->Cluster Processes. It shows me what directory the logs are saved in, but that's it. I only see a way to access the logs through Operations->Topology (which is what doesn't work).\", \"post_time\": \"2015-06-19 14:13:57\" },\n\t{ \"post_id\": 7804, \"topic_id\": 1777, \"forum_id\": 17, \"post_subject\": \"Re: Accessing log files\", \"username\": \"bforeman\", \"post_text\": \"Hi Alex,\\n\\nDid you try accessing the THOR logs under the Cluster Processes menu item?\\nOperations >> Cluster Processes.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-06-19 13:57:53\" },\n\t{ \"post_id\": 7803, \"topic_id\": 1777, \"forum_id\": 17, \"post_subject\": \"Accessing log files\", \"username\": \"alex\", \"post_text\": \"ECL Watch in 5.2 seems to have the useful ability to let me look at the various log files on any node. However, for the thor nodes, when I try to view one I get an error: WsTopology.TpLogFile Cannot open file in /var/log/.../<file>.log\\n\\nThis seems like it might be a permissions issue, but I don't know what permissions are needed by ECL Watch to get these files.\\n\\nFor the system server logs, I get a different error. For example:\\nFileSpray.FileList CreateIFile cannot resolve //0.0.0.0/var/log/HPCCSystems/myeclccserver\", \"post_time\": \"2015-06-18 19:25:57\" },\n\t{ \"post_id\": 8018, \"topic_id\": 1837, \"forum_id\": 17, \"post_subject\": \"Re: Suggestion for New TAB (Timings/Stats)\", \"username\": \"greg.whitaker\", \"post_text\": \"Ok, done. \\nJIRA Ticket: HPCC-14081\", \"post_time\": \"2015-08-17 15:30:00\" },\n\t{ \"post_id\": 8011, \"topic_id\": 1837, \"forum_id\": 17, \"post_subject\": \"Re: Suggestion for New TAB (Timings/Stats)\", \"username\": \"rtaylor\", \"post_text\": \"Greg,\\n\\nGreat suggestion, but the best way to bring this to the attention of the developers is to submit it through JIRA: https://track.hpccsystems.com/secure/Dashboard.jspa\\nThat way it's in their queue to be considered and discussed, and more importantly, tracked all the way through to its final resolution.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-08-12 10:18:21\" },\n\t{ \"post_id\": 8004, \"topic_id\": 1837, \"forum_id\": 17, \"post_subject\": \"Suggestion for New TAB (Timings/Stats)\", \"username\": \"greg.whitaker\", \"post_text\": \"I would like to make a recommendation for a new tab when viewing a query.\\nReason: I think we all need a quick method of viewing/finding the processIDs that are taking the longest amount of time, and know what ecl is responsible.\\n\\nCurrent tabs include:\\nSummary, Error/Status, logical files, superfiles, lib used, graphs, resources , testpages, wu.\\n\\nNew tab name: Timings/Stats\\nThis tab would pull the existing information found in 2 areas of the existing graphs (activities.localtime, properties).\\n\\nList contains 2 fields: ActivityID, LocalTime (longest time on top).\\nWhen selecting a activityid a dynamic window would display the properties for that activityID (same window in graphs, that contains ecl, attribute name, line#, etc).\", \"post_time\": \"2015-08-11 13:46:43\" },\n\t{ \"post_id\": 8020, \"topic_id\": 1841, \"forum_id\": 17, \"post_subject\": \"Re: Command line ECL\", \"username\": \"JimD\", \"post_text\": \"I do not see a way to get this information from the ecl command line tool. \\n\\nI have submitted a feature request (improvement) in our issue tracking system.\\n\\nhttps://track.hpccsystems.com/browse/HPCC-14088\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2015-08-18 14:19:44\" },\n\t{ \"post_id\": 8019, \"topic_id\": 1841, \"forum_id\": 17, \"post_subject\": \"Command line ECL\", \"username\": \"alex\", \"post_text\": \"Not sure where else to put this question, but:\\n\\nIs there any way to use either ecl or eclplus from the command line to get a list of which superfiles are in use by a roxie query?\\n\\nusing:\\necl queries files roxie myquery
\\n\\nonly shows subfiles, not the superfile that contains them. It appears that I could use "ecl queries list" to find the original workunit and then use eclplus to dump the XML and parse that (although you'd have to then do some work to figure out what is a super file and what is just a regular file).\\n\\nECLWatch, under the queries tab, has this information in plain sight. So is there any way to get it from the command line?\", \"post_time\": \"2015-08-17 19:39:31\" },\n\t{ \"post_id\": 8040, \"topic_id\": 1848, \"forum_id\": 17, \"post_subject\": \"Re: localtime meaning\", \"username\": \"bforeman\", \"post_text\": \"Hi Greg,\\n\\nI have a message out to RKC on this one...\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-08-26 13:35:40\" },\n\t{ \"post_id\": 8035, \"topic_id\": 1848, \"forum_id\": 17, \"post_subject\": \"localtime meaning\", \"username\": \"greg.whitaker\", \"post_text\": \"In the graphs properties section there is a column titled "localTime" and another titled "totalTime". \\n\\nQuestion1: what is the value found in localTime\\nQuestion2: is the local time the place I should look for the longest running processes or is there another place to look?\\n\\nI was under the impression that localTime was the total time that single processID took to process. (activityID=processID)\\n\\nOther observations:\\nSome processIds have the same time in both localTime and totalTime.\\nSome processIds have no value at all in either localTime or totalTime.\\n\\nJust looking for more insight into what values are stored here and why they would be blank.\", \"post_time\": \"2015-08-24 20:28:16\" },\n\t{ \"post_id\": 8522, \"topic_id\": 1992, \"forum_id\": 17, \"post_subject\": \"Re: Publishing queries with remote dali\", \"username\": \"alex\", \"post_text\": \"https://track.hpccsystems.com/browse/HPCC-14496\\n\\nThanks Richard and Bob.\", \"post_time\": \"2015-11-09 21:28:44\" },\n\t{ \"post_id\": 8520, \"topic_id\": 1992, \"forum_id\": 17, \"post_subject\": \"Re: Publishing queries with remote dali\", \"username\": \"rtaylor\", \"post_text\": \"Alex,\\n\\nTime to submit a JIRA so the developers can look at the issue.\\n\\nRichard\", \"post_time\": \"2015-11-09 21:26:37\" },\n\t{ \"post_id\": 8518, \"topic_id\": 1992, \"forum_id\": 17, \"post_subject\": \"Re: Publishing queries with remote dali\", \"username\": \"alex\", \"post_text\": \"Again, hand typed so any typos are my own:\\n\\n\\n"ERROR: 1410: /var/lib/jenkins/workspace/EE-Candidate-withplugins-5.2.4-1/LN/centos-6.4-x86_64/HPCC-Platform/roxie/ccd/ccdstate.cpp(980): Failed to load query remotedalitest.1 from libW20151109-154925.so : Could not resolve filename 120way::Country_to_ISO3"\\n
\", \"post_time\": \"2015-11-09 21:23:17\" },\n\t{ \"post_id\": 8516, \"topic_id\": 1992, \"forum_id\": 17, \"post_subject\": \"Re: Publishing queries with remote dali\", \"username\": \"rtaylor\", \"post_text\": \"Alex,It's only when publishing it on the remote cluster that the file fails to resolve.
Then I suggest the possibility that there may be firewall issues between the two clusters. Since the query is compiled in the Roxie environment and only the file needs to be pulled over from the Thor, that's the only other possibility that occurs to me.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-11-09 20:57:01\" },\n\t{ \"post_id\": 8514, \"topic_id\": 1992, \"forum_id\": 17, \"post_subject\": \"Re: Publishing queries with remote dali\", \"username\": \"bforeman\", \"post_text\": \"OK, one last request please, can you post the Roxie log that contains the error? Since the error is reflected on every node, any log from any node will do, as long as it contains the error. \\n\\nThanks,\\n\\nBob\", \"post_time\": \"2015-11-09 20:56:27\" },\n\t{ \"post_id\": 8512, \"topic_id\": 1992, \"forum_id\": 17, \"post_subject\": \"Re: Publishing queries with remote dali\", \"username\": \"alex\", \"post_text\": \"In response to your 2 points:\\n\\n1) Names have been sanitized for public viewing; I typed in "120way" but that's not the actual name of the cluster. The actual path in my code is correct; publishing the query to the cluster that actually contains the file works ok. It's only when publishing it on the remote cluster that the file fails to resolve.\\n\\n2) Wrapping the code in a function or macro has no effect on the ability to resolve the file.\", \"post_time\": \"2015-11-09 20:52:52\" },\n\t{ \"post_id\": 8510, \"topic_id\": 1992, \"forum_id\": 17, \"post_subject\": \"Re: Publishing queries with remote dali\", \"username\": \"rtaylor\", \"post_text\": \"Alex,\\n\\nI see a couple of possible things:
\\nHere's my query code:IMPORT $;\\nEXPORT TestPublishRemoteDaliService() := FUNCTION\\n\\n STRING30 fname_value := '' : STORED('FirstName');\\n STRING30 lname_value := '' : STORED('LastName');\\n \\n Fetched := IF(fname_value <> '',\\n $.File_People.IDX_LFname(LastName= lname_value,\\n FirstName= fname_value),\\n $.File_People.IDX_LFname(LastName= lname_value));\\n\\n RETURN Fetched;\\nEND;
And this is the supporting code:EXPORT File_People := MODULE\\n Layout := RECORD\\n UNSIGNED8 ID;\\n STRING15 FirstName;\\n STRING25 LastName;\\n STRING15 MiddleName;\\n STRING2 NameSuffix;\\n STRING8 FileDate;\\n UNSIGNED2 BureauCode;\\n STRING1 MaritalStatus;\\n STRING1 Gender;\\n UNSIGNED1 DependentCount;\\n STRING8 BirthDate;\\n STRING42 StreetAddress;\\n STRING20 City;\\n STRING2 State;\\n STRING5 ZipCode;\\n END;\\n EXPORT File := DATASET('~CLASS::RT::Intro::Persons',Layout,FLAT);\\n EXPORT IDX_LFname := INDEX(File,{Lastname,Firstname},{ID}, \\n '~RTTEST::RemoteTest::KEY::People::LFname');\\n EXPORT BLD_IDX_LFname := BUILD(IDX_LFname,OVERWRITE);\\nEND;
\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-11-09 20:38:00\" },\n\t{ \"post_id\": 8508, \"topic_id\": 1992, \"forum_id\": 17, \"post_subject\": \"Re: Publishing queries with remote dali\", \"username\": \"alex\", \"post_text\": \"Easy enough:\\n\\nState: suspended\\nError: Could not resolve filename 120way::Country_To_ISO3\\n\\nI've fiddled with tildes; the error is the same if I leave it out of the code, but filename is obviously wrong since its appends the cluster name to the front of the path.\\n\\nETA: As I mentioned, if I use "foreign::<ip address>" in front of the path in the DATASET, it resolves the file ok. So it's not a path issue in that respect.\", \"post_time\": \"2015-11-09 20:30:32\" },\n\t{ \"post_id\": 8506, \"topic_id\": 1992, \"forum_id\": 17, \"post_subject\": \"Re: Publishing queries with remote dali\", \"username\": \"bforeman\", \"post_text\": \"Also, what we would really like to see is the exact content of the error. It would usually have a logical file name in it.\\n\\nThe reason its failing could be for so many reasons. Like incorrect use of ~ or other naming issues. Or perhaps it can’t get to the dali, but the error could help us diagnose.\\n\\nBob\", \"post_time\": \"2015-11-09 20:26:05\" },\n\t{ \"post_id\": 8504, \"topic_id\": 1992, \"forum_id\": 17, \"post_subject\": \"Re: Publishing queries with remote dali\", \"username\": \"alex\", \"post_text\": \"This is hand-typed so there might be typos; My HPCCs are not on an Internet-connected network.\\n\\n\\nstring3 codein1 := '' : stored('ISO3');\\nstring40 namein1 := '' : stored('Name');\\n\\ncountries := DATASET('~120way::Country_To_ISO3', Layout_Country_to_ISO3, thor);\\nd1 := countries(\\n if( codein1 != '', (trim(codein1) = iso3_code), true),\\n if( namein1 != '', stringlib.stringfind(trim(country), trim(namein1), 1) >0, true));\\n\\noutput(d1, named('Matching_Countries'));\\n\\n
\", \"post_time\": \"2015-11-09 20:22:01\" },\n\t{ \"post_id\": 8502, \"topic_id\": 1992, \"forum_id\": 17, \"post_subject\": \"Re: Publishing queries with remote dali\", \"username\": \"rtaylor\", \"post_text\": \"Alex,\\n\\nCan you upload all your test code so we can look at it, please?\\n\\nRichard\", \"post_time\": \"2015-11-09 20:11:05\" },\n\t{ \"post_id\": 8500, \"topic_id\": 1992, \"forum_id\": 17, \"post_subject\": \"Re: Publishing queries with remote dali\", \"username\": \"alex\", \"post_text\": \"The only difference I see between what you're doing and what I'm doing is that in my simple experiment, my file is thor file and not an index. Other than that my steps are exactly the same as yours.\\n\\nI presume that shouldn't make a difference, though.\", \"post_time\": \"2015-11-09 20:00:56\" },\n\t{ \"post_id\": 8498, \"topic_id\": 1992, \"forum_id\": 17, \"post_subject\": \"Re: Publishing queries with remote dali\", \"username\": \"rtaylor\", \"post_text\": \"Alex,\\n\\nOK, I just tested this. Here are the steps I took:
\\nTherefore, it is not necessary to use "Foreign file" naming (I didn't) to accomplish this.\\n\\nSo, next you'll want to duplicate my experiment with your code and data, step-by-step, and see if you still get an error.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-11-09 19:43:50\" },\n\t{ \"post_id\": 8494, \"topic_id\": 1992, \"forum_id\": 17, \"post_subject\": \"Re: Publishing queries with remote dali\", \"username\": \"alex\", \"post_text\": \"I was hoping I would be able to keep the code agnostic about where the file lives and just use the "remote dali" feature of published queries.\\n\\nI know using foreign::<remote dali ip>::path::to::file works, as I've tested it (so the file is definitely where I think it is, and authorization doesn't seem to be an issue). I'm specifically trying avoid having to do that, though.\", \"post_time\": \"2015-11-09 19:06:26\" },\n\t{ \"post_id\": 8490, \"topic_id\": 1992, \"forum_id\": 17, \"post_subject\": \"Re: Publishing queries with remote dali\", \"username\": \"rtaylor\", \"post_text\": \"Alex,Yes, the error is "Can't resolve file".
Then the most probable issue will be the way you named the logical file in the DATASET declaration for your query. Since the file is on a remote cluster you should probably use the "foreign file" naming syntax, discussed here: https://hpccsystems.com/download/documentation/ecl-language-reference/html/Foreign_Files.html \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-11-09 18:50:09\" },\n\t{ \"post_id\": 8484, \"topic_id\": 1992, \"forum_id\": 17, \"post_subject\": \"Re: Publishing queries with remote dali\", \"username\": \"alex\", \"post_text\": \"Yes, the error is "Can't resolve file". I created this file just for this test, so I do not believe it's being used by any other queries.\", \"post_time\": \"2015-11-09 15:53:16\" },\n\t{ \"post_id\": 8482, \"topic_id\": 1992, \"forum_id\": 17, \"post_subject\": \"Re: Publishing queries with remote dali\", \"username\": \"bforeman\", \"post_text\": \"If you open up the ECL Watch, there is a section named Published Queries, and the specific error as to why it is suspended should be available from there. I wonder if it is possible that another query might have that file locked.\\n\\nBob\", \"post_time\": \"2015-11-09 15:38:46\" },\n\t{ \"post_id\": 8480, \"topic_id\": 1992, \"forum_id\": 17, \"post_subject\": \"Re: Publishing queries with remote dali\", \"username\": \"alex\", \"post_text\": \"Yes, both clusters are using LDAP. My credentials are the same on both (they use the same LDAP server, even). Using the command line tool I do have to provide user name and password. It publishes the query but suspends it, so presumably authentication to the Roxie cluster is ok.\\n\\nThanks; I'll open a ticket.\", \"post_time\": \"2015-11-09 15:34:05\" },\n\t{ \"post_id\": 8478, \"topic_id\": 1992, \"forum_id\": 17, \"post_subject\": \"Re: Publishing queries with remote dali\", \"username\": \"bforeman\", \"post_text\": \"Right on both.\\n\\nIs the ROXIE cluster LDAP protected? If so, you may need to add the user and password on your command line publish.\\n\\nBut it sounds like you are doing things correctly (not user error).\\n\\nBob\", \"post_time\": \"2015-11-09 15:29:21\" },\n\t{ \"post_id\": 8476, \"topic_id\": 1992, \"forum_id\": 17, \"post_subject\": \"Re: Publishing queries with remote dali\", \"username\": \"alex\", \"post_text\": \"Yes, it's just the IP with no port. And it's the Dali IP, not ESP or anything, right?\\n\\nI didn't want to open a JIRA until I ruled out user error, but it's sounding more like a cluster configuration problem than something I'm doing wrong.\", \"post_time\": \"2015-11-09 15:24:29\" },\n\t{ \"post_id\": 8474, \"topic_id\": 1992, \"forum_id\": 17, \"post_subject\": \"Re: Publishing queries with remote dali\", \"username\": \"bforeman\", \"post_text\": \"Hi Alex,\\n\\nWhat are you putting in for the Remote Dali address? It should only be the base address of the cluster (no ports needed). Also, could it be a security issue (not having remote access or a firewall issue). If you run out of options I would suggest that you open a report in our issue tracker and get more community members to look at this.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-11-09 15:21:19\" },\n\t{ \"post_id\": 8470, \"topic_id\": 1992, \"forum_id\": 17, \"post_subject\": \"Re: Publishing queries with remote dali\", \"username\": \"alex\", \"post_text\": \"Yes, that is what I'm doing.\\n\\nCluster A has all our thors and contains the dataset I care about.\\n\\nCluster B has the roxies.\\n\\nI compile the ECL on cluster B, publish it to the Roxies (either through ECL Watch or the command line; behavior is no different) specifying that the Remote Dali is the dali server IP for cluster A. The query immediately suspends because it can't resolve the file that lives on Cluster A.\", \"post_time\": \"2015-11-09 14:42:44\" },\n\t{ \"post_id\": 8468, \"topic_id\": 1992, \"forum_id\": 17, \"post_subject\": \"Re: Publishing queries with remote dali\", \"username\": \"bforeman\", \"post_text\": \"Hi Alex,\\n\\nI think the design flow is to build all of your data on the remote dali server, and then create your query on the other server where you are publishing. Remember, when you create a ROXIE query you only need to compile, and then when you publish the remote dali should correctly point to your indexes and files that you need.\\n\\nI have a lesson in the Advanced ROXIE Online course that walks you through this process.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-11-09 14:35:39\" },\n\t{ \"post_id\": 8462, \"topic_id\": 1992, \"forum_id\": 17, \"post_subject\": \"Publishing queries with remote dali\", \"username\": \"alex\", \"post_text\": \"I can't find any instructions in the documentation on how to do this. Can someone explain how Roxie queries that reference a remote dali work? I assumed I would write the ECL as normal:\\n\\nds := DATASET('~file_on_other_cluster', layout, flat)\\n\\nand then put the other cluster's dali IP address in the "Remote Dali" field of the ECL Watch "Publish" pop-up. When I do this my query suspends because it can't resolve the file, though.\\n\\nCredentials on both clusters are the same. Am I doing something else wrong?\", \"post_time\": \"2015-11-09 13:57:58\" },\n\t{ \"post_id\": 8650, \"topic_id\": 2018, \"forum_id\": 17, \"post_subject\": \"Re: CORS support for wsworkunit server\", \"username\": \"bforeman\", \"post_text\": \"There was a previous discussion of this here: \\nhttps://hpccsystems.com/bb/viewtopic.php?t=1988&p=8452#p8452\\n\\nAnd a JIRA was opened: https://track.hpccsystems.com/browse/HPCC-14493\\n\\nNote that it was also recommended to try using jsonp, which may help you as well.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-11-19 17:49:13\" },\n\t{ \"post_id\": 8644, \"topic_id\": 2018, \"forum_id\": 17, \"post_subject\": \"CORS support for wsworkunit server\", \"username\": \"lillianm\", \"post_text\": \"I am trying to access the information from one of our workunits with javascript to generate reports. This used to work fine when I was running it on the same server as ECL watch. Since I have moved my javascript to another server I am running into errors related to the same-origin policy. These are indicating that the ECL watch server does not support CORS (the mechanism that supports cross domain requests). Is there any way to configure the server to support cross domain requests (via CORS) from my server?\", \"post_time\": \"2015-11-19 12:28:24\" },\n\t{ \"post_id\": 9502, \"topic_id\": 2240, \"forum_id\": 17, \"post_subject\": \"Re: Compressing when spraying\", \"username\": \"tlhumphrey2\", \"post_text\": \"I found it. When I clicked on the name of the sprayed file, it shows me both the original size and the compressed size.\", \"post_time\": \"2016-04-07 19:08:57\" },\n\t{ \"post_id\": 9500, \"topic_id\": 2240, \"forum_id\": 17, \"post_subject\": \"Compressing when spraying\", \"username\": \"tlhumphrey2\", \"post_text\": \"On LZ, I have a cvs file whose size is 260,409,757. In ecl watch, I did a delimited spray with compress checked. When I look at the sprayed file, ecl watch says its size is 260,409,757 (the same size as the original file). I want to know the size of the compressed file. How can I get that?\", \"post_time\": \"2016-04-07 19:05:34\" },\n\t{ \"post_id\": 9674, \"topic_id\": 2274, \"forum_id\": 17, \"post_subject\": \"Re: Logoff ECL Watch\", \"username\": \"vin\", \"post_text\": \"More test results\\n\\n(1) [Mac/Chrome]\\n\\n(a) Delete “cookies and other site and plugin data” from “the beginning of time"(in chrome>settings>show advanced setting>clear browser data)\\n\\n - ECL watch - DOES NOT logout\\n - every other site I examined, including this forum — DOES logout\\n\\n(b) [Mac/Chrome] Close chrome (without deleting cookies as in (1))\\n\\n - ECL watch - DOES logout\\n - every other site I examined, including this forum — DOES NOT logout\\n\\nRepeated the above tests in several other OS/browser combos.\\n\\n(2) [Mac/Safari] Same as (1)\\n\\n(3) [Mac/Opera] Same as (1)\\n\\n(4) [Linux/Chrome] Same as (1)\\n\\n(5) [Linux/Firefox] deleting cookies DOES logout ECL and other apps.\\n\\n(6) [Windows/Chrome] deleting cookies DOES logout ECL and other apps.\\n\\nSummary, in 4 of the 6 OS/browser combis, ECL watch behaved differently than “normal” sites, such as the HPCC forums. Mac and/or chrome may do something odd, but other sites have figured out how to play well. I don't think it is too much to ask ECL Watch play well with others as well. This isn’t something that a logout button fixes.\", \"post_time\": \"2016-05-29 20:44:18\" },\n\t{ \"post_id\": 9656, \"topic_id\": 2274, \"forum_id\": 17, \"post_subject\": \"Re: Logoff ECL Watch\", \"username\": \"Gleb Aronsky\", \"post_text\": \"Another way to logout is to change the url to use a different username/password. For example change the url to be http://user:pw@10.1.1.10:8010. user:pw can be replaced with phony account info, and the IP with the IP of eclwatch.\", \"post_time\": \"2016-05-24 19:22:04\" },\n\t{ \"post_id\": 9654, \"topic_id\": 2274, \"forum_id\": 17, \"post_subject\": \"Re: Logoff ECL Watch\", \"username\": \"rtaylor\", \"post_text\": \"I've discovered that one workaround to the logoff issue is to have an administrator change the user password out from under you, so that the saved value will fail next time you try to get in.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-05-24 19:08:17\" },\n\t{ \"post_id\": 9652, \"topic_id\": 2274, \"forum_id\": 17, \"post_subject\": \"Re: Logoff ECL Watch\", \"username\": \"Gleb Aronsky\", \"post_text\": \"EclWatch uses basic access authentication.\", \"post_time\": \"2016-05-24 18:41:40\" },\n\t{ \"post_id\": 9644, \"topic_id\": 2274, \"forum_id\": 17, \"post_subject\": \"Re: Logoff ECL Watch\", \"username\": \"vin\", \"post_text\": \"I was collecting data for Stuart and Kevin. I first deleted all cookies and other browser date. Refresh ECL watch page -- still logged in and it regenerated a cookie.\\n\\nI tried an idea I had while talking with Flavio today: shutdown the browser app (Chrome, BTW) then restart. Presto! I was logged off.\\n\\nSo ECL watch uses some browser identification thing other than cookies to maintain my log in session. BTW, the something is more specific than my machine because I can log in another user to the same ECL watch using a different browser or an incognito window. It doesn't use my IP either because I remain logged in when my laptop comes home.\\n\\nI am very curious what information is being maintained.\\n\\nThanks,\\n+vince\", \"post_time\": \"2016-05-20 00:41:08\" },\n\t{ \"post_id\": 9598, \"topic_id\": 2274, \"forum_id\": 17, \"post_subject\": \"Re: Logoff ECL Watch\", \"username\": \"vin\", \"post_text\": \"The JIRA is 2 years old. It doesn't like is will be fix any time soon.\\n\\nAm I the only one who finds this (a) a bafflingly omission and (b) a huge inconvenience?\\n\\nIf I open an incognito window, I'm not logged on. So there is a simple (inconvenient) workaround: always log in to ECL incognito.\\n\\nJust curious: I cleared all browser data and I'm still logged on. What mechanism is being used?\\n\\nThanks,\\n+vince\", \"post_time\": \"2016-05-06 15:13:48\" },\n\t{ \"post_id\": 9596, \"topic_id\": 2274, \"forum_id\": 17, \"post_subject\": \"Re: Logoff ECL Watch\", \"username\": \"JimD\", \"post_text\": \"There is a Jira issue already:\\n\\nhttps://track.hpccsystems.com/browse/HPCC-11612\", \"post_time\": \"2016-05-04 15:49:00\" },\n\t{ \"post_id\": 9590, \"topic_id\": 2274, \"forum_id\": 17, \"post_subject\": \"Re: Logoff ECL Watch\", \"username\": \"rtaylor\", \"post_text\": \"Vince,\\n\\nYeah, I don't see a way to do that, either. I had thought the old ECL Watch interface had that capability, but when I open the legacy version I don't see it there, either.\\n\\nTime to submit a JIRA \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-05-04 13:42:19\" },\n\t{ \"post_id\": 9586, \"topic_id\": 2274, \"forum_id\": 17, \"post_subject\": \"Logoff ECL Watch\", \"username\": \"vin\", \"post_text\": \"How can I logout of ECL Watch?\\n\\nI would like to logout as one user and log in as a new user. I do not see a button on ECL Watch. I remove cookies to no avail.\\n\\nThanks,\\n+vince\", \"post_time\": \"2016-05-03 21:56:08\" },\n\t{ \"post_id\": 9704, \"topic_id\": 2298, \"forum_id\": 17, \"post_subject\": \"Re: DFUPlus Not Overwriting Files During Spray\", \"username\": \"AttilaV\", \"post_text\": \"I try to reproduce this problem:\\n\\nIf the target file doesn't belong to any superfile, then it can be replaced by re-spray.\\n\\nIf it belongs to at least one master then the refusal behaviour is correct, the user can't replace existing subfile with re-spray. But the generated error message is not correct. I raised a JIRA ticket to fix it.\", \"post_time\": \"2016-06-06 15:17:30\" },\n\t{ \"post_id\": 9702, \"topic_id\": 2298, \"forum_id\": 17, \"post_subject\": \"Re: DFUPlus Not Overwriting Files During Spray\", \"username\": \"demills\", \"post_text\": \"JimD,\\n\\nNo, but these are sub-files of super-files. I've checked the "ECL Programmer's Guide" on working with super-files, and it only mentions deleting and adding new sub-files to super-files. Is overwriting sub-files allowed in 5.6.2?\\n\\nThanks,\\nDaniel\", \"post_time\": \"2016-06-03 23:07:28\" },\n\t{ \"post_id\": 9700, \"topic_id\": 2298, \"forum_id\": 17, \"post_subject\": \"Re: DFUPlus Not Overwriting Files During Spray\", \"username\": \"JimD\", \"post_text\": \"Is the file used in a published query? \\n\\nPublishing a query locks a file which would prevent overwriting it even with overwrite specified.\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2016-06-03 19:19:39\" },\n\t{ \"post_id\": 9698, \"topic_id\": 2298, \"forum_id\": 17, \"post_subject\": \"Re: DFUPlus Not Overwriting Files During Spray\", \"username\": \"rtaylor\", \"post_text\": \"demills,\\n\\nYou should use JIRA (https://track.hpccsystems.com) to report the issue to the developers.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-06-03 18:31:15\" },\n\t{ \"post_id\": 9690, \"topic_id\": 2298, \"forum_id\": 17, \"post_subject\": \"DFUPlus Not Overwriting Files During Spray\", \"username\": \"demills\", \"post_text\": \"Hello,\\n\\nI've made some changes to files in the the dropzone, and I'm trying to respray them to update the changes. I'm using DFUPlus and I've changed the "overwrite" flag to "1", but all spray jobs fail. The error I'm getting says:
Failed: DFS Exception: 1: logical name nfl::2013::11::pa::2013111710.csv already exists.
The workunit for the failed spray operation registers that the overwrite flag is true (picture attached), so I don't understand why the server complains about the filename already existing. That's the intention! Below are the values passed to DFUPlus.\\n\\naction=spray\\nserver=<removed>\\ndstcluster=mythor\\nusername=<removed>\\npassword=<removed>\\nsrcfile=/var/lib/HPCCSystems/mydropzone/nfl/2013/11/pa/2013111710.csv\\ndstname=nfl::2013::11::pa::2013111710.csv\\nformat=delimited\\nseparator=|\\noverwrite=1\\nnowait=0\\nautorecover=2\\n\\nThis is using client-tools version 5.6.2-1 and platform version 5.6.2-1 on Ubuntu 14.04 LTS.\\n\\nThanks,\\nDaniel\", \"post_time\": \"2016-06-01 22:03:59\" },\n\t{ \"post_id\": 9916, \"topic_id\": 2356, \"forum_id\": 17, \"post_subject\": \"Re: Cannot Connect to ECL Watch\", \"username\": \"John Meier\", \"post_text\": \"Thank you for your responses. I had to turn virtualization on my laptop (Windows 10 has this option disabled by default). This was no easy feat since key access during boot-up to access the BIOS proved impossible! There is a bit of an arduous journey through the SYSTEMS panel, but I did enable it. I did flip the eth0 and eth1 values as suggested. At this point I sync'd the VM and the internal HPCC machine so everybody was speaking the same connections, rebooting the VirtualBox I don't know how many times...but success!!\\nI can now connect to ECL Watch and I ran a "Hello World" syntax check and execution and everything worked as it should.\\n\\nI've worked on command-line Unix systems for years and even though Ubuntu hhas a GUI, I didn't use it. I did use the "ifconfig" and the "sudo service" commands, which were helpful in identifying that the system wasn't running but hung (which led to the BIOS check), as well as the IP addressing. The documentation I had showed the GUI, which threw me off \\n\\nAgain - thanks for the assist!\", \"post_time\": \"2016-07-07 15:18:20\" },\n\t{ \"post_id\": 9914, \"topic_id\": 2356, \"forum_id\": 17, \"post_subject\": \"Re: Cannot Connect to ECL Watch\", \"username\": \"ming\", \"post_text\": \"NET ip is only accessible from VM not from host system which is why 10.0.3.15 is not reachable.\\n\\nBy default host-only card should be configured as second network card in VM unless something changed on the system or by user. In VM documentation it shows "VM VirtualBox Network Adapter 2".\\n\\nTo fix the problem there are two options:\\n1) Shutdown VM. Switch two network cards settings: make adapter 1 as NET and adapter 2 as host-only. Start VM\\n2) Change "Interface=eth1" to Interface=eth0" in /etc/environment.conf. Restart HPCC: sudo service hpcc-init restart.\\n\\n1) is recommended method.\\nIn either case access ECLWatch with host-only adapter ip (adapter 2).\\n\\nThanks\", \"post_time\": \"2016-07-07 14:13:24\" },\n\t{ \"post_id\": 9902, \"topic_id\": 2356, \"forum_id\": 17, \"post_subject\": \"Re: Cannot Connect to ECL Watch\", \"username\": \"sort\", \"post_text\": \"Sorry to hear you have been having problems. We are updating our documentation to alert users about the virtualization setting in the bios (another user has this issue last week). If there are other settings that are missing in our documentation please let us know.\\n\\nCan you verify that you can ping 192.168.56.101 from your desktop?\\nCan you run "sudo service hpcc-init status" to make sure everything is still running. When the VM starts we auto start everything- running hpcc-init status will let us know that everything is still ok. You should see all components - i.e.\\nmyesp (pid 3030 ) is running ...\", \"post_time\": \"2016-07-06 18:31:41\" },\n\t{ \"post_id\": 9884, \"topic_id\": 2356, \"forum_id\": 17, \"post_subject\": \"Cannot Connect to ECL Watch\", \"username\": \"John Meier\", \"post_text\": \"I have just installed VirtualBox 5.0.20 r106931 and the image for the Ubuntu 64bit HPCC on my HP laptop running Windows10 (8GB ram and 1TB on disk). This has not been a straight-forward installation! After enabling virtualization on the BIOS and such, I have finally achieved a successful startup of the HPCC server. But when I enter http://10.0.3.15:8010 to access ECL Watch from my browser, it cannot find it. Then I thought "VirtualBox shows the hostonly network ip addresses within a (low) 192.168.56.101 and (hi) 192.168.56.254. I tried a number of different addresses with the port :8010 tacked on...again "not found". Then I ran an ifconfig on the Ubuntu server itself: eth0 = 192.168.56.101 eth1 = 10.0.3.15 (the one the documentation points to) lo(localhost) = 127.0.0.1 I've tried all of these with the port :8010 - NOTHING! I'm sure I'm missing some indiscriminate switch or something, but I've no idea where to look. Anyone have any suggestions?\", \"post_time\": \"2016-07-01 15:54:26\" },\n\t{ \"post_id\": 11773, \"topic_id\": 2933, \"forum_id\": 17, \"post_subject\": \"Re: Publishing a query with a remote dali--authentication?\", \"username\": \"JimD\", \"post_text\": \"Confirmed, This is expected behavior.\", \"post_time\": \"2016-10-04 17:43:57\" },\n\t{ \"post_id\": 11763, \"topic_id\": 2933, \"forum_id\": 17, \"post_subject\": \"Re: Publishing a query with a remote dali--authentication?\", \"username\": \"JimD\", \"post_text\": \"According to the Jira below, we currently require the user to have identical credentials.\\n\\nhttps://track.hpccsystems.com/browse/HPCC-10739\\n\\nI will investigate further to see if this is still the case.\\n\\nJim\", \"post_time\": \"2016-10-04 16:44:08\" },\n\t{ \"post_id\": 11743, \"topic_id\": 2933, \"forum_id\": 17, \"post_subject\": \"Publishing a query with a remote dali--authentication?\", \"username\": \"drealeed\", \"post_text\": \"I have a query that was compiled on http://10.173.147.1:8010, cluster roxie.\\n\\nThe indexes were generated on http://10.241.100.159:8010, which has different userid/pwd authentication.\\n\\nWhen I attempt to publish the query via ECL Watch on http://10.173.147.1:8010 and specify 10.241.100.159 as the remote dali, the files are not copied over and the query is published as suspended. I assume this is because a username and password is required; but I don't see any inputs for the remote dali userid/pwd in ECL Watch or in the wsdl for WUPublishWorkunit. How do I indicate the username/password to use to connect to the remote dali?\", \"post_time\": \"2016-10-04 14:54:25\" },\n\t{ \"post_id\": 13723, \"topic_id\": 3461, \"forum_id\": 17, \"post_subject\": \"Re: Missing Record Structures for sprayed files\", \"username\": \"Puneet\", \"post_text\": \"[quote="JimD":32w7gsih]How many nodes in the cluster to which you are spraying? \\n\\nThere was an issue (which was fixed in 6.2) where the structure would not appear when spraying to a single node cluster. \\n\\nhttps://track.hpccsystems.com/browse/HPCC-16165 \\n\\nHTH,\\nJim\\n\\nThanks Jim!\\n\\nThis was exactly it.\", \"post_time\": \"2016-12-12 05:02:50\" },\n\t{ \"post_id\": 13713, \"topic_id\": 3461, \"forum_id\": 17, \"post_subject\": \"Re: Missing Record Structures for sprayed files\", \"username\": \"JimD\", \"post_text\": \"How many nodes in the cluster to which you are spraying? \\n\\nThere was an issue (which was fixed in 6.2) where the structure would not appear when spraying to a single node cluster. \\n\\nhttps://track.hpccsystems.com/browse/HPCC-16165 \\n\\nHTH,\\nJim\", \"post_time\": \"2016-12-07 17:40:14\" },\n\t{ \"post_id\": 13681, \"topic_id\": 3461, \"forum_id\": 17, \"post_subject\": \"Missing Record Structures for sprayed files\", \"username\": \"Puneet\", \"post_text\": \"Hi,\\n\\nI find that in some instances the record structures for sprayed files do not show up even when the RecordStructurePresent checkbox is checked when spraying a csv file.\\n\\nVersion = 6.0\\n\\nAttached are screen shots of some suspicious looking things.\\n\\n[attachment=0:1j7655af]Selection_003.png\\n\\n[attachment=2:1j7655af]Selection_001.png\\n\\n[attachment=1:1j7655af]Selection_002.png\\n\\nHas anyone encountered similar issues? More importantly is there a fix?\\n\\nRegards,\\nPuneet\", \"post_time\": \"2016-12-07 09:05:38\" },\n\t{ \"post_id\": 21723, \"topic_id\": 3483, \"forum_id\": 17, \"post_subject\": \"Re: Missing information when loading all workunits in ECL Wa\", \"username\": \"BrianB644\", \"post_text\": \"All ... thank you for the quick responses. I've verified the issue is fixed by updating the software.\", \"post_time\": \"2018-04-19 14:39:47\" },\n\t{ \"post_id\": 21713, \"topic_id\": 3483, \"forum_id\": 17, \"post_subject\": \"Re: Missing information when loading all workunits in ECL Wa\", \"username\": \"william.whitehead\", \"post_text\": \"This was a problem with the HTPASSWD security manager, and is confirmed to be fixed in release 6.4.6 and newer. Hope that helps\\nRuss\", \"post_time\": \"2018-04-19 12:29:57\" },\n\t{ \"post_id\": 21703, \"topic_id\": 3483, \"forum_id\": 17, \"post_subject\": \"Re: Missing information when loading all workunits in ECL Wa\", \"username\": \"JimD\", \"post_text\": \"Brian,\\n\\nThanks for letting us know about this. A Jira issue has been entered:\\n\\nhttps://track.hpccsystems.com/browse/HPCC-19488\\n\\nJim\", \"post_time\": \"2018-04-18 17:51:13\" },\n\t{ \"post_id\": 21691, \"topic_id\": 3483, \"forum_id\": 17, \"post_subject\": \"Re: Missing information when loading all workunits in ECL Wa\", \"username\": \"JimD\", \"post_text\": \"Brian, \\n\\nWhich field(s) are showing as <hidden>?\\n\\nJim\", \"post_time\": \"2018-04-16 18:22:43\" },\n\t{ \"post_id\": 21681, \"topic_id\": 3483, \"forum_id\": 17, \"post_subject\": \"Re: Missing information when loading all workunits in ECL Wa\", \"username\": \"BrianB644\", \"post_text\": \"For the systems I work with on a daily basis (6.2.10, 6.4.2, and 6.4.6), I see <Hidden> in the main EclWatch workunit page on every system where we use htpasswd authentication. I've followed the htpasswd documentation carefully.\\n\\nMuch of the discussion above focuses on how to properly configure systems using LDAP, but that isn't our case. I've looked for a JIRA ticket concerning this issue, but didn't find one.\\n\\nIs there a workaround I can configure on our existing systems?\\n\\nIs a fix for the software and/or the documentation in the pipeline? ... when will it be available?\\n\\nCheers!\", \"post_time\": \"2018-04-16 14:16:07\" },\n\t{ \"post_id\": 14493, \"topic_id\": 3483, \"forum_id\": 17, \"post_subject\": \"Re: Missing information when loading all workunits in ECL Wa\", \"username\": \"g-pan\", \"post_text\": \"Luke. A couple of things you can look at here. \\nFirst, I am assuming you have more than one user on your system. \\nSince the <Hidden> attribute is typically triggered by the file ownership. \\nNext, if you are an Admin on your system, you can check for some of the permission settings through ECL Watch. \\n\\nSign into ECL Watch (with Administrator level permissions). \\nClick on Operations link (as illustrated on Page 94 of Using ECL Watch http://cdn.hpccsystems.com/releases/CE- ... df#Page=94) then press the Security link (as illustrated on page 110)\\nExpand the ESP features for SMC (as illustrated on page 120).\\n\\nThere are couple of settings to look at there:\\n OthersWorkunitAccess \\n OwnWorkunitAccess\\n\\nHave a look at the settings there. Adjusting the settings there may be what is suppressing display of the Workunit details. \\n\\nSee if that helps.\", \"post_time\": \"2017-01-11 19:53:35\" },\n\t{ \"post_id\": 14483, \"topic_id\": 3483, \"forum_id\": 17, \"post_subject\": \"Re: Missing information when loading all workunits in ECL Wa\", \"username\": \"lpezet\", \"post_text\": \"Thanks for confirming.\\n\\nThis thread is about the fact that the workunit information is not filled in when landing on that page in ECL Watch. \\nAnd looks like it happens only when site is secured (when not secured, method=none, all info is filled in). \\nHere's the screenshot again:\\nhttps://s3.amazonaws.com/archwayha-temp/Screen+Shot+2016-12-15+at+10.17.26+AM.png\\n\\nYou can see most info blank and the State is "<Hidden>", except for the one workunit I previously clicked on to check its details (the very first one in screenshot).\", \"post_time\": \"2017-01-11 16:29:03\" },\n\t{ \"post_id\": 14473, \"topic_id\": 3483, \"forum_id\": 17, \"post_subject\": \"Re: Missing information when loading all workunits in ECL Wa\", \"username\": \"g-pan\", \"post_text\": \"Sorry for the duplicate answer. \\nJim posted before mine displayed. \\nAt least it was the same reply.
\\n\\nHope that helped.\", \"post_time\": \"2017-01-11 13:14:35\" },\n\t{ \"post_id\": 14383, \"topic_id\": 3483, \"forum_id\": 17, \"post_subject\": \"Re: Missing information when loading all workunits in ECL Wa\", \"username\": \"JimD\", \"post_text\": \"None means No Authentication.\\n\\nTo enable htpassword authentication, follow the steps in the System Administrator Guide (http://cdn.hpccsystems.com/releases/CE- ... .2.0-1.pdf).\\n\\nHTH,\\nJim\", \"post_time\": \"2017-01-09 21:19:21\" },\n\t{ \"post_id\": 14353, \"topic_id\": 3483, \"forum_id\": 17, \"post_subject\": \"Re: Missing information when loading all workunits in ECL Wa\", \"username\": \"g-pan\", \"post_text\": \"Luke. \\nThank you for your post. Looks like you found an issue with our documentation. I will open a JIRA to address that immediately. \\n\\nTo answer your question, if you set the method to "none" then you will have NO security enabled at all. \\nTo use the .htpasswd security please follow the steps in the System Administrator doc. http://cdn.hpccsystems.com/releases/CE- ... .2.0-1.pdf\\n\\nAs indicated previously the method described in the Configuration Manager doc is incorrect, we will be correcting that immediately. Thank you for pointing that out, your contribution helps to improve our documentation.\", \"post_time\": \"2017-01-09 20:09:19\" },\n\t{ \"post_id\": 14193, \"topic_id\": 3483, \"forum_id\": 17, \"post_subject\": \"Re: Missing information when loading all workunits in ECL Wa\", \"username\": \"lpezet\", \"post_text\": \"In the Authentication settings in the Configuration Manager, we changed method from secMgrPlugin to none and that did the trick. We do (want to) use authentication with .htpasswd file.\\n\\nWe're also seeing 2 different things in the Configuration Manager (http://cdn.hpccsystems.com/releases/CE-Candidate-6.2.0/docs/UsingConfigManager-6.2.0-1.pdf) and the System Administrator (http://cdn.hpccsystems.com/releases/CE-Candidate-6.2.0/docs/HPCCSystemAdministratorsGuide-6.2.0-1.pdf) docs.\\nThe former has the htpasswdFile attribute (p.43) where the latter has the Htpasswd Security Manager module in it (p.51).\\n\\nI'm confused now. What would none then do in method in the Authentication settings for myesp?\\n\\n\\nThanks for your help,\\nLuke.\", \"post_time\": \"2017-01-09 17:00:05\" },\n\t{ \"post_id\": 13833, \"topic_id\": 3483, \"forum_id\": 17, \"post_subject\": \"Re: Missing information when loading all workunits in ECL Wa\", \"username\": \"lpezet\", \"post_text\": \"That's just the attachment being truncated here.\\nHere's a link to the screenshot:\\nhttps://s3.amazonaws.com/archwayha-temp/Screen+Shot+2016-12-15+at+10.17.26+AM.png\\n\\nEven so, you can see the Owner not being filled in and it should be (there's one and it's the same as the first row).\\nIt will show up in that list of workunits if I open each individual workunit.\", \"post_time\": \"2016-12-15 21:34:03\" },\n\t{ \"post_id\": 13823, \"topic_id\": 3483, \"forum_id\": 17, \"post_subject\": \"Re: Missing information when loading all workunits in ECL Wa\", \"username\": \"JimD\", \"post_text\": \"Is it possible that you accidentally removed the State column from the view? \\n\\nClick on the + sign icon at right side of the column headers to see which columns are enabled and which are hidden. \\n\\nSee Screen Shot attached.\\n\\nHTH,\\nJim\", \"post_time\": \"2016-12-15 20:07:20\" },\n\t{ \"post_id\": 13813, \"topic_id\": 3483, \"forum_id\": 17, \"post_subject\": \"Missing information when loading all workunits in ECL Watch\", \"username\": \"lpezet\", \"post_text\": \"Hello!\\n\\nWe're testing HPCC Systems 6.0.8-1 and we're seeing something new (to me at least) in the ECL Watch Workunits screen.\\nIt loads all the workunits but only displays the WUID and the State is "<Hidden>".\\nI have to open a workunit and go back to see all its details in the list of workunits. I've attached a screenshot.\\n\\nWhat did I miss or do wrong?\\n\\nThank you for your help,\\nLuke.\", \"post_time\": \"2016-12-15 17:23:49\" },\n\t{ \"post_id\": 15343, \"topic_id\": 3793, \"forum_id\": 17, \"post_subject\": \"Re: New HTML page hosted on ECL Watch\", \"username\": \"ravishankar\", \"post_text\": \"Thanks a lot. It served the purpose.\", \"post_time\": \"2017-02-16 16:09:51\" },\n\t{ \"post_id\": 15333, \"topic_id\": 3793, \"forum_id\": 17, \"post_subject\": \"Re: New HTML page hosted on ECL Watch\", \"username\": \"gsmith\", \"post_text\": \"Try: http://X.X.X.X:8010/esp/files/demo.html\", \"post_time\": \"2017-02-16 05:47:49\" },\n\t{ \"post_id\": 15323, \"topic_id\": 3793, \"forum_id\": 17, \"post_subject\": \"Re: New HTML page hosted on ECL Watch\", \"username\": \"ravishankar\", \"post_text\": \"Thanks for the response.\\nPlaced the demo.html file in the path /opt/HPCCSystems/componentfiles/files and tried accessing http://10.134.198.XXX:8010/demo.html - But still it takes me to http://10.134.198.XXX:8010/WsSMC/\", \"post_time\": \"2017-02-15 20:08:52\" },\n\t{ \"post_id\": 15313, \"topic_id\": 3793, \"forum_id\": 17, \"post_subject\": \"Re: New HTML page hosted on ECL Watch\", \"username\": \"gsmith\", \"post_text\": \"If you place demo.html in
/opt/HPCCSystems/componentfiles/files
you can open it with: http://10.XXX.XXX.XXX:8010/demo.html\", \"post_time\": \"2017-02-15 19:20:01\" },\n\t{ \"post_id\": 15273, \"topic_id\": 3793, \"forum_id\": 17, \"post_subject\": \"New HTML page hosted on ECL Watch\", \"username\": \"ravishankar\", \"post_text\": \"Can we able to deploy/host new HTML page on ECL Watch or in ESP and I want to access it like\\nhttp://10.XXX.XXX.XXX:8010/demo.html or\\nhttp://10.XXX.XXX.XXX:8002/demo.html\\n\\nECL watch or ESP should be deployed in some web server/container inside HPCC Systsem and since HPCC System is open source - Can I able to deploy/host my own HTML pages in there.\\nI tried through all the folders in HPCC clusters - All thor, hthor, dali servers folders I can't able to find the web container folder where I can deploy my HTML file.\\n\\nCould you please help me to get in there to deploy my own HTML page and access them like ECL Watch and ESP as mentioned below.\\nhttp://10.XXX.XXX.XXX:8010/demo.html or\\nhttp://10.XXX.XXX.XXX:8002/demo.html\", \"post_time\": \"2017-02-14 20:03:53\" },\n\t{ \"post_id\": 15483, \"topic_id\": 3833, \"forum_id\": 17, \"post_subject\": \"Re: Delete of a logical file does not create workunit\", \"username\": \"rtaylor\", \"post_text\": \"Oscar,\\n\\nRKC suggested you ask Jake Smith -- he should be able to tell you. Hopefully he'll repsond in the JIRA ticket.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-02-23 14:08:26\" },\n\t{ \"post_id\": 15453, \"topic_id\": 3833, \"forum_id\": 17, \"post_subject\": \"Re: Delete of a logical file does not create workunit\", \"username\": \"oscar.foley\", \"post_text\": \"Richard... seems that it is a "feature" \\n\\nCould you tell me where are these logs?\\n\\nThanks,\\nOscar\", \"post_time\": \"2017-02-23 10:01:50\" },\n\t{ \"post_id\": 15443, \"topic_id\": 3833, \"forum_id\": 17, \"post_subject\": \"Re: Delete of a logical file does not create workunit\", \"username\": \"oscar.foley\", \"post_text\": \"JIRA bug raised: https://track.hpccsystems.com/browse/HPCC-17124\", \"post_time\": \"2017-02-22 16:34:51\" },\n\t{ \"post_id\": 15433, \"topic_id\": 3833, \"forum_id\": 17, \"post_subject\": \"Re: Delete of a logical file does not create workunit\", \"username\": \"rtaylor\", \"post_text\": \"Oscar,
Is this a bug or a feature?
Good question.How can I see who and when a logical file was deleted?
I'm sure it's probably in some log file somewhere, but I have no idea which or how you would easily access the information. Looks to me like a great candidate for a JIRA report. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-02-22 14:31:05\" },\n\t{ \"post_id\": 15413, \"topic_id\": 3833, \"forum_id\": 17, \"post_subject\": \"Delete of a logical file does not create workunit\", \"username\": \"oscar.foley\", \"post_text\": \"I have a logical file. From ECL Watch I can copy it, rename it, respray it, spray it back. All these operations generates a D type workunits (i.e: D20170222-115852)\\n[attachment=0:2upsnzje]Capture1.PNG\\n\\nBut I can see that no D type workunit is generated when I delete logical files.\\n\\n- Is this a bug or a feature?\\n- How can I see who and when a logical file was deleted?\", \"post_time\": \"2017-02-22 12:06:09\" },\n\t{ \"post_id\": 17021, \"topic_id\": 4231, \"forum_id\": 17, \"post_subject\": \"Re: Query publishing using remote files\", \"username\": \"dsanchez\", \"post_text\": \"I couldn't get it to work...\\n\\nThis is the RoxieTopology file in the cluster, I changed those two parameters, restarted the cluster but still the files were copied over. The parameter in roxie shows now the right values but when I publish the query it copies the files again or if I specify in the publishing script the options "--no-files --allow-foreign --daliip=$remoteDali" the query gets suspended because it cannot find the file.\\n\\nAny Idea what I could be missing? Platform is on version 5.6.8-1 by the way.\\n\\nThanks!\\n\\nEDIT*\", \"post_time\": \"2017-05-11 13:30:34\" },\n\t{ \"post_id\": 17011, \"topic_id\": 4231, \"forum_id\": 17, \"post_subject\": \"Re: Query publishing using remote files\", \"username\": \"dsanchez\", \"post_text\": \"This was really helpful. Thanks Jim.\\n\\nI will test this and thank you one more time if it works!\", \"post_time\": \"2017-05-11 08:41:26\" },\n\t{ \"post_id\": 17001, \"topic_id\": 4231, \"forum_id\": 17, \"post_subject\": \"Re: Query publishing using remote files\", \"username\": \"JimD\", \"post_text\": \"This is achieved in the way your Roxie os configured. \\n\\nThere are two settings in the Roxie configuration that control where Roxie looks for data and index files:\\n\\ncopyResources Copies necessary data and key files from the current location when the query is published.\\n\\nuseRemoteResources Instructs Roxie to look for data and key files in the current location after the query is published.\\n\\nFor the scenarion you descriobe, you should set copyResources to FALSE and useRemoteResources to TRUE.\\n\\nThis directs the Roxie cluster to load the data from a remote location. \\nThe query can be executed immediately, but performance is limited by network bandwidth.\\nThis allows queries to run without using any Roxie disk space but reduces its throughput capabilities.\\n\\nYou can find more details about this and other Roxie configuration settings in this manual:\\nhttp://cdn.hpccsystems.com/releases/CE- ... df#page=10\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2017-05-10 21:05:22\" },\n\t{ \"post_id\": 16991, \"topic_id\": 4231, \"forum_id\": 17, \"post_subject\": \"Query publishing using remote files\", \"username\": \"dsanchez\", \"post_text\": \"Hi all,\\n\\nI am trying to publish a query into a Roxie cluster but avoiding the copy of any file from Thor and instead using them remotely.\\n\\nI've been reading this thread but I would like to discard other options before I dive into the configuration files:\\nviewtopic.php?f=17&t=1992\\n\\nAs him, I have an index created in my Thor cluster A, I have some ECL I want to deploy in my Roxie Cluster B.\\nThe line I use to publish is as follows:\\n
ecl publish -v --name="$folderName.$file" -s $server --target=$target -u $user -pw $password --allow-foreign --main="$folderName.$file" --memoryLimit=2GB --timeLimit=30000 --warnTimeLimit=5000 --daliip=$remoteDali --no-files --allow-foreign
\\nA and B don't share a dali, and this works if I publish normally (Without the last 2 options). The problem is that this is a small development Roxie and it doesn't have a lot of disk space so we can't afford having Roxie to copy the files it needs.\\n\\nAny Idea on how can we achieve this?\\n\\nThanks for the help!\\nDaniel.\", \"post_time\": \"2017-05-10 14:23:32\" },\n\t{ \"post_id\": 17603, \"topic_id\": 4403, \"forum_id\": 17, \"post_subject\": \"Re: Having issues with Spraying my CSV file\", \"username\": \"JimD\", \"post_text\": \"Yes, exactly. \\nHere is some code I have handy (might not be the best code, but it shows you what I mean)\\n\\n\\n//Assumes this is a CSV file (comma separated, new line terminated) exists and \\n// sprayed to Thor as ~test::testme.csv\\n/* \\n1,312,"Joe","Smith"\\n2,433,"John","Doe"\\n3,513,"Jim","Jones"\\n*/\\nMyFileLayout := RECORD\\nSTRING num;\\nSTRING id;\\nSTRING Fname;\\nSTRING LastName;\\nEND;\\t\\t\\t\\t\\t\\t\\t\\n\\nMyDataset := dataset ('~test::testme.csv', MyFileLayout,CSV(separator(',')));\\nOUTPUT(MyDataset);\\n
\\n\\nThis is the result in the IDE:\\n[attachment=1:2ehlqao0]TestCSVOutput.png\\n\\nHTH, \\nJim\", \"post_time\": \"2017-06-21 21:23:25\" },\n\t{ \"post_id\": 17593, \"topic_id\": 4403, \"forum_id\": 17, \"post_subject\": \"Re: Having issues with Spraying my CSV file\", \"username\": \"KatyChow\", \"post_text\": \"Hi Jim,\\n\\nEven when I look at the data structure, it's only 1 string field. Are you saying it will be different when I output this file?\\n\\nTHanks!\\n\\nKaty\", \"post_time\": \"2017-06-21 19:35:12\" },\n\t{ \"post_id\": 17583, \"topic_id\": 4403, \"forum_id\": 17, \"post_subject\": \"Re: Having issues with Spraying my CSV file\", \"username\": \"JimD\", \"post_text\": \"Katy,\\n\\nI think your issue is in the manner in which it displays in ECL watch and not with the spray.\\n\\nTry OUTPUT(yourdataset) and you should see your columns.\\n\\nThere is a Jira issue for the display in ECL Watch:\\nhttps://track.hpccsystems.com/browse/HPCC-11535zz\\n\\nHTH, \\nJim\", \"post_time\": \"2017-06-21 19:30:11\" },\n\t{ \"post_id\": 17573, \"topic_id\": 4403, \"forum_id\": 17, \"post_subject\": \"Having issues with Spraying my CSV file\", \"username\": \"KatyChow\", \"post_text\": \"Hi there!\\n\\nI've been trying to spray my CSV file for most of the afternoon. I think my settings are correct, but somehow once my file is done spraying the columns are not separated as I was expecting.\\n\\nHere are the settings when I spray:\\nFormat ASCII\\nMax Record Length 8192\\nSeparators ,\\nLine Terminators \\\\n,\\\\r\\\\n\\nQuotes "\\n\\nWhen I view context after my file has sprayed, it looks like it did not recognize my separator. Am I missing something here?\\n\\nThanks!\\n\\nKaty\", \"post_time\": \"2017-06-20 20:18:37\" },\n\t{ \"post_id\": 19423, \"topic_id\": 4883, \"forum_id\": 17, \"post_subject\": \"Re: Force abort of stuck workunit?\", \"username\": \"JimD\", \"post_text\": \"If this happens again and you are able to reproduce it, you should report it in Jira -- our issue tracking system: \\n\\n JIRA (https://track.hpccsystems.com). \\n\\nregards,\\n\\nJim\", \"post_time\": \"2017-10-11 17:38:00\" },\n\t{ \"post_id\": 19373, \"topic_id\": 4883, \"forum_id\": 17, \"post_subject\": \"Re: Force abort of stuck workunit?\", \"username\": \"BGehalo\", \"post_text\": \"Thanks Jim but that wasn't working either, the workunits were completely unresponsive to client commands whether through ECL, ECL Watch, or ECL Plus.\\n\\nThey worked themselves out in time, just took overnight. I imagine the only way to force an abort when the client tools are unresponsive is to access the underlying OS directly to issue something like a kill -9 command. I'm going to play around with my VM to see what I can do.\", \"post_time\": \"2017-10-10 20:17:47\" },\n\t{ \"post_id\": 19363, \"topic_id\": 4883, \"forum_id\": 17, \"post_subject\": \"Re: Force abort of stuck workunit?\", \"username\": \"JimD\", \"post_text\": \"In ECL Watch:\\n\\n\\n* Open the list of Workunits\\n* Select the Workunit you want to abort. The Abort action button is now enabled.\\n* Press the Abort button.\\n
\\n[attachment=0:1ahgnuhm]Abort.jpg\", \"post_time\": \"2017-10-10 20:09:24\" },\n\t{ \"post_id\": 19323, \"topic_id\": 4883, \"forum_id\": 17, \"post_subject\": \"Force abort of stuck workunit?\", \"username\": \"BGehalo\", \"post_text\": \"I have a workunit that is stuck alternating between aborting and compiling. Is there some way to force it to abort so I can delete it and allow the system to process other jobs.\\n\\nThis is on a single node cluster.\\n\\nI tried going through the command line using both ECL and ECLPlus but neither of them get the job done.\", \"post_time\": \"2017-10-09 20:36:56\" },\n\t{ \"post_id\": 21513, \"topic_id\": 5443, \"forum_id\": 17, \"post_subject\": \"Re: Query Time\", \"username\": \"lpezet\", \"post_text\": \"Thanks Richard!\", \"post_time\": \"2018-04-09 16:30:45\" },\n\t{ \"post_id\": 21473, \"topic_id\": 5443, \"forum_id\": 17, \"post_subject\": \"Re: Query Time\", \"username\": \"rtaylor\", \"post_text\": \"Luke,\\n\\nIn this case, the timing for any single OUTPUT could be computed by going through the graphs backwards,starting with the OUTPUT activity and adding up the time each graph that contributed to that result took.\\n\\nOr the simple way would be to run the job again, commenting out the OUTPUTs you don't want to see, and then the total time is the total time for the single OUTPUT. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-04-05 14:42:45\" },\n\t{ \"post_id\": 21463, \"topic_id\": 5443, \"forum_id\": 17, \"post_subject\": \"Re: Query Time\", \"username\": \"lpezet\", \"post_text\": \"Hi Richard!\\n\\nIf I just generate 1 OUTPUT I understand the Total Cluster Time would be the time it took for that one OUTPUT.\\nBut say I OUTPUT 3 different results. I'd like to know how long it took (in seconds or so) for each OUTPUT to complete.\\nIn the screenshot, and just to be more visual in my explanations, I'd like to know how long each output here took, not just how many results they have. For example, I'd like to know that Query3A took 65 seconds, Query3B took 100 seconds and Query3C took 555 seconds. Total Cluster Time will tell me it's 555 seconds only and Timers are...well, cryptic at best as they are right now. (NB: Don't get me wrong, I use Timers a lot, to figure out where my queries spent the most time for example. They just don't provide what what I'm asking for here directly, that's all).\\nSince this "elapsed time" for each output is not there in ECL Watch, I'm guessing that information is not provided today (through any of the ECL Watch API endpoint) and some calculations are needed. Am I wrong?\\n\\n[attachment=0:1b42kumh]Screen Shot 2018-04-05 at 8.22.17 AM.png\", \"post_time\": \"2018-04-05 14:27:20\" },\n\t{ \"post_id\": 21453, \"topic_id\": 5443, \"forum_id\": 17, \"post_subject\": \"Re: Query Time\", \"username\": \"rtaylor\", \"post_text\": \"Luke,\\n\\nThe Timers tab displays a number of timings for the workunit, including the total TimeElapsed, which tells you the total time it took for your whole Thor to do the job. The individual graph timings are also on a whole-Thor basis, not per-node.\\n\\nSo, why would you want timings on a per-node basis? What problem are you trying to solve?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-04-05 08:10:59\" },\n\t{ \"post_id\": 21443, \"topic_id\": 5443, \"forum_id\": 17, \"post_subject\": \"Query Time\", \"username\": \"lpezet\", \"post_text\": \"Hello!\\n\\nIs there an easy way today to get the "time period" of a result?\\nFor example, if I open ECL Watch on a workunit, and go to the Outputs tab, I'd love to see the "time period" for each result (say as a separate column).\\nBy "time period" I mean from start to finish, vs. adding up each second spent on each node when running ECL code. So "2s time period" could be actually "6s total time spent" with 3 nodes spending each 2s.\\nNow I don't see it (today) but maybe it's somewhere else?\\n\\nIf it's not available today, is there a way to calculate it? I was thinking of getting the graph of the WU (WUGetGraph.json?) and just go bottom-up (from leaf to root) and add up the TimeMaxLocalExecute to get a good approx. of "time period" for a given leaf (i.e. result). Is that the right approach?\\nI understand with this approach I may get more seconds than the total number of seconds spent by cluster if there's more than 1 result.\\n\\n\\nThanks!\", \"post_time\": \"2018-04-04 15:46:06\" },\n\t{ \"post_id\": 21543, \"topic_id\": 5463, \"forum_id\": 17, \"post_subject\": \"Re: ECL Watch - Individual nodes on cluster\", \"username\": \"rtaylor\", \"post_text\": \"Replied here: https://hpccsystems.com/bb/viewtopic.php?f=8&t=5453\\n\\nPlease, post your questions once and re-post only if you don't get a response.\\n\\nThanks,\\n\\nRichard\", \"post_time\": \"2018-04-09 18:39:29\" },\n\t{ \"post_id\": 21523, \"topic_id\": 5463, \"forum_id\": 17, \"post_subject\": \"ECL Watch - Individual nodes on cluster\", \"username\": \"rsghatpa\", \"post_text\": \"How to check what process is running on what node in a cluster environment through ECL Watch?\", \"post_time\": \"2018-04-09 17:09:51\" },\n\t{ \"post_id\": 23963, \"topic_id\": 6213, \"forum_id\": 17, \"post_subject\": \"Re: myeclagent temp folder\", \"username\": \"amillar\", \"post_text\": \"Hi Jim,\\n\\nThanks for getting back to me that makes sense as the date stamps on these files are spread out over the year, and we did have some hardware issues last year which would have led to the cluster / work units terminating unexpectedly. \\n\\nAll the best\\n\\nAntony\", \"post_time\": \"2019-01-09 10:04:31\" },\n\t{ \"post_id\": 23933, \"topic_id\": 6213, \"forum_id\": 17, \"post_subject\": \"Re: myeclagent temp folder\", \"username\": \"JimD\", \"post_text\": \"For the short term, after you make sure that no jobs are running, you can safely delete these files.\\n\\nTypically, these temp files ae automatically deleted, but for some reasno you have some that were left behind. We suspect that the jobs that created these temp files terminated abnormally. \\n\\nCan you check if the workunits named in the temp files did indeed terminate unexpectedly?\\n\\nHTH, \\n\\nJim\", \"post_time\": \"2019-01-08 15:17:26\" },\n\t{ \"post_id\": 23923, \"topic_id\": 6213, \"forum_id\": 17, \"post_subject\": \"myeclagent temp folder\", \"username\": \"amillar\", \"post_text\": \"Hi there,\\n\\nI was wondering if someone can help answer this query for me.\\n\\nWe are trying to keep our disk space usage nice and clean and regularly perform clean-ups\\n\\nHowever I have just noticed We have around 36,000 files and 100Gig of data in the :\\n\\n/var/lib/HPCCSystems/eclagent/temp folder.\\n\\nwith file names such as : \\n\\nW20180314-142635.spill_sort_0xc863d0.45_140659541950464_3266\\nW20180312-112025.~spill__scope__7__W20180312-112025\\n\\nCan we remove everything that is in that folder without causing any issues?\\n\\nThanks in advance\", \"post_time\": \"2019-01-04 15:54:14\" },\n\t{ \"post_id\": 28931, \"topic_id\": 7641, \"forum_id\": 17, \"post_subject\": \"Re: Fix Download to CSV function\", \"username\": \"rtaylor\", \"post_text\": \"dczajak,
If there is another place to submit bugs besides this forum, please let me know.
Bug reporting/feature requests are submitted in JIRA (https://track.hpccsystems.com)\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-01-09 18:02:38\" },\n\t{ \"post_id\": 28881, \"topic_id\": 7641, \"forum_id\": 17, \"post_subject\": \"Fix Download to CSV function\", \"username\": \"dczajak\", \"post_text\": \"I'd like to request that the "download to CSV" function for the Logical Files tab of the ECL Watch be fixed. Currently, when you try to use it the Records and Size columns are shifted because the values contain commas. I'd like to request that the commas be stripped out of the field values before downloading to CSV so that the files will open correctly in Excel and not skew the output. \\n\\nIf there is another place to submit bugs besides this forum, please let me know.\", \"post_time\": \"2020-01-09 16:21:54\" },\n\t{ \"post_id\": 29923, \"topic_id\": 7873, \"forum_id\": 17, \"post_subject\": \"Re: Determining size of workunit dataset\", \"username\": \"ghalliday\", \"post_text\": \"I don't think there is directly. \\n\\nHowever, I would recommend taking a look at the DataPatterns bundle that is now merged into the standard library (Std.DataPatterns). It provides the functionality of DISTRIBUTION plus much more.\", \"post_time\": \"2020-03-25 14:05:14\" },\n\t{ \"post_id\": 29893, \"topic_id\": 7873, \"forum_id\": 17, \"post_subject\": \"Determining size of workunit dataset\", \"username\": \"newportm\", \"post_text\": \"I'm running some large DISTRIBUTION functions and we have some concerns over the size of the output in the Workunit. Is there a way for me to tell the size of a workunit output? DISTRIBUTION does not give the option to save to a logical file. So as we move to the cloud, I want to make sure we limit the size of our outputs where possible. \\n\\nThanks,\\nTim N, Sr Data Eng, LN\", \"post_time\": \"2020-03-24 14:31:54\" },\n\t{ \"post_id\": 97, \"topic_id\": 38, \"forum_id\": 19, \"post_subject\": \"Re: Available White Papers\", \"username\": \"dabayliss\", \"post_text\": \"@Gavin\\nYes - that is true - but as this is an introductory text:\\na) I wanted to show how it actually would execute\\nb) I wasn't quite ready to admit that you would pretty much ignore them and build the graph you throught they should have written \", \"post_time\": \"2011-07-01 13:34:26\" },\n\t{ \"post_id\": 96, \"topic_id\": 38, \"forum_id\": 19, \"post_subject\": \"Re: Available White Papers\", \"username\": \"ghalliday\", \"post_text\": \"In this case (and many others) if you didn't add ,LOCAL to the ROLLUP then the code generator will automatically spot that all the matching records must be on the same node, and will automatically add the LOCAL attribute for you.\", \"post_time\": \"2011-07-01 08:10:12\" },\n\t{ \"post_id\": 95, \"topic_id\": 38, \"forum_id\": 19, \"post_subject\": \"Re: Available White Papers\", \"username\": \"dabayliss\", \"post_text\": \"You are correct. The SORT will distribute the data evenly - but it does ensure that all of the records with the same value for the key are on the same node.\\nGIVEN that the rollup condition contained an equality on the whole key from the sort I could guarantee that a rollup would never need to pull records from the following node. I thus used the ,LOCAL flag so that all nodes could act independantly.\", \"post_time\": \"2011-06-30 22:08:16\" },\n\t{ \"post_id\": 94, \"topic_id\": 38, \"forum_id\": 19, \"post_subject\": \"Re: Available White Papers\", \"username\": \"udetelx\", \"post_text\": \"The LOCAL is on a ROLLUP, not DEDUP.\", \"post_time\": \"2011-06-30 20:04:25\" },\n\t{ \"post_id\": 93, \"topic_id\": 38, \"forum_id\": 19, \"post_subject\": \"Re: Available White Papers\", \"username\": \"udetelx\", \"post_text\": \"I noticed on page 6 of wp_ecl_for_hadoopers.pdf that the LOCAL was absent from the sort to help with possible skewing but the following DEDUP had LOCAL included. Is that what you intended since LOCAL will keep the focus of the operation on a per node basis? Thanks.\", \"post_time\": \"2011-06-30 19:56:15\" },\n\t{ \"post_id\": 70, \"topic_id\": 38, \"forum_id\": 19, \"post_subject\": \"Available White Papers\", \"username\": \"HPCC Staff\", \"post_text\": \"In response to recent social media posts comparing HPCC/ECL to Hadoop/MapReduce/PIG, below are references to three helpful white papers that provide an introduction, comparison and benchmarks on the technology. \\n\\nhttp://hpccsystems.com/community/white-papers/ecl-for-piggers \\nhttp://hpccsystems.com/community/white-papers/ecl-for-hadoopers\\nhttp://hpccsystems.com/community/white-papers/performing-pig-pen\", \"post_time\": \"2011-06-21 20:18:26\" },\n\t{ \"post_id\": 153, \"topic_id\": 64, \"forum_id\": 19, \"post_subject\": \"Re: Can Roxie work with hadoop?\", \"username\": \"dabayliss\", \"post_text\": \"Hey There,\\n\\nRoxie works from keys (or indexes) that are built for it by Thor. It is theoretically possible for someone to build roxie keys from Hadoop but it would be a substantial undertaking.\\n\\nWhat I would recommend would be installing THOR on the nodes used for hadoop (the systems can co-reside). Do any processing you wish in hadoop; then use a very short THOR process to build the roxie keys. Then you would be able to use Roxie as your search engine and Hadoop for the 'bulk of' your batch work.\\n\\nAt the moment you would want the 'end' of your hadoop process to write out regular Linux files (perhaps in CSV) to allow them to be directly read in THOR. There is a rather more automated HDFS->THOR module in the works: currently slated for Q4\\n\\nhttp://hpccsystems.com/products-and-services/products/modules/hadoop-to-roxie-data-export\", \"post_time\": \"2011-07-14 14:29:33\" },\n\t{ \"post_id\": 152, \"topic_id\": 64, \"forum_id\": 19, \"post_subject\": \"Can Roxie work with hadoop?\", \"username\": \"hli\", \"post_text\": \"Hi,\\n\\nI am just new to HPCC. Currently, we are running hadoop for data process and storage and like what hadoop can do so far. For query part, we are wondering whether we can put Roxie in front of hadoop for searching. Is it possible and easy?\\n\\nThanks,\", \"post_time\": \"2011-07-14 13:57:01\" },\n\t{ \"post_id\": 250, \"topic_id\": 85, \"forum_id\": 19, \"post_subject\": \"Re: question on improvisational complex queries\", \"username\": \"dabayliss\", \"post_text\": \"Well I cannot really comment on : "Why Hadoop doesn't let them ..."\\n\\nI suspect the issue they are hitting is that Hadoop is generally coded in Java - and it generally requires a lot of Java - so it more lends itself to 'projects' that 'dynamic complex queries'. There are (of course) multiple things in the Hadoop community designed to attack that (eg Pig, Cascalog) - but again - I cannot really tell you what will or won't work for your client.\\n\\nThe ECL answer is rather easier: ECL is a dictionary based language; it is designed to that the heavy hitters go in first and create functions and words that the end users can they use dynamically. The concept is that it is 'simple in the end' and the complexity is buried in the lower layers.\\n\\nIn the case that they queries are FAIRLY dynamic but not entirely free form then the ECL Template language can be used to actually generate the ECL for you.\\n\\nEven more specialized it is possible to use the ECL graph statement and the PARSE statement in Tomita mode to construct a 'mini-language' which is then parsed and executed by roxie at run-time.\\n\\nHTH - but as is usually the case - the more specific the question the more helpful the answer ....\\n\\nDavid\", \"post_time\": \"2011-08-09 20:02:18\" },\n\t{ \"post_id\": 248, \"topic_id\": 85, \"forum_id\": 19, \"post_subject\": \"question on improvisational complex queries\", \"username\": \"ingeniera_maria\", \"post_text\": \"Hello,\\n\\nI am supporting a team where hadoop/mapreduce is being used. From what I understand, the current implementation does not allow for the end-user (non-developer) to automatically send complex queries to the system. Can you please help me understand why this is and how HPCC addresses this? \\n\\nThanks.\", \"post_time\": \"2011-08-09 16:33:38\" },\n\t{ \"post_id\": 1173, \"topic_id\": 89, \"forum_id\": 19, \"post_subject\": \"Re: Comparing HPCC to Hadoop\", \"username\": \"rtaylor\", \"post_text\": \"
HPCC IDE is a matured one, it was developed by Borland.
Not exactly. More accurately, SOME of the HPCC development team were part of the original Borland compiler team about 25 years ago, but left Borland about 25 years ago.\\n\\nHowever, regarding the "maturity" issue -- yes, the same core members of the development team have been together all this time and the technology is thoroughly "mature."\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-02-22 18:41:10\" },\n\t{ \"post_id\": 1130, \"topic_id\": 89, \"forum_id\": 19, \"post_subject\": \"Re: Comparing HPCC to Hadoop\", \"username\": \"mhgopal\", \"post_text\": \"The Key feature that I like are the real time query capabilities of HPCC. Agreed that the vast majority of people will go out and venture with Hadoop rightly so because of the volume of attention Hadoop is getting.\\n\\nIn my opinion the cost of running a Hadoop system to a HPCC system will be significant. \\n\\nOver time HPCC will come out as a clear alternative.\\n\\nHPCC IDE is a matured one, it was developed by Borland. Its a state of the art in terms of compiler checking and what not.\\n\\nOrganisations can actually spend money on Data Scientists, Business Intelligence & Hardware. Instead of spending all the dollars on getting a hadoop system up and running and later realizing that it does not have a real time analysis feature.\", \"post_time\": \"2012-02-18 13:57:27\" },\n\t{ \"post_id\": 269, \"topic_id\": 89, \"forum_id\": 19, \"post_subject\": \"Comparing HPCC to Hadoop\", \"username\": \"HPCC Staff\", \"post_text\": \"The four key factors that differentiate HPCC from Hadoop: \\n\\n1. Powerful Enterprise Control Language\\n2. Roxie Delivery Engine\\n3. Enterprise Ready\\n4. Beyond MapReduce\\n\\nRead more at http://hpccsystems.com/Why-HPCC/HPCC-vs-Hadoop\\n\\nAlso check out The Road from Pig to ECL: The PigMix Benchmark on HPCC\\nhttp://hpccsystems.com/Why-HPCC/HPCC-vs ... pigmix_ecl\", \"post_time\": \"2011-08-12 11:58:23\" },\n\t{ \"post_id\": 337, \"topic_id\": 112, \"forum_id\": 19, \"post_subject\": \"Re: Pig Hadoop scripts to ECL using Bacon\", \"username\": \"HPCC Staff\", \"post_text\": \"This topic is also being discussed in the Programming forum: \\n\\nhttp://hpccsystems.com/bb/viewtopic.php?f=10&t=111&sid=72cc738c55ee96f7f3c3cece456bf97c\", \"post_time\": \"2011-09-13 13:55:12\" },\n\t{ \"post_id\": 332, \"topic_id\": 112, \"forum_id\": 19, \"post_subject\": \"Pig Hadoop scripts to ECL using Bacon\", \"username\": \"mayurchoubey\", \"post_text\": \"Hi,\\n\\nI am new to HPCC. I need to convert my pig scripts to ECL. Currently using VM image and not be able to find or locate BACON command line tool.\\n\\nPlease suggest how this can be achieved.\\n\\nThanks in advance.\", \"post_time\": \"2011-09-13 07:49:42\" },\n\t{ \"post_id\": 729, \"topic_id\": 186, \"forum_id\": 19, \"post_subject\": \"Re: How to prove HPCC is truly parallel?\", \"username\": \"HPCC Staff\", \"post_text\": \"There are multiple causes to the “long tails” problem in Hadoop. Some of these causes, related for example to data skews and slow nodes, get amplified by the fact that multiple MapReduce cycles are normally serialized over a single data workflow (when, for example, performing a multi-join, working through a graph traversal problem or executing a clustering algorithm).\\n\\nHPCC utilizes several mechanisms to minimize the lasting effect of these long tails, including the additional parallelization that was described in the previous post, a record oriented filesystem which ensures that each node receives an approximate similar load (in terms of number of data records processed by each node, even for variable length and/or XML record layouts) and enough instrumentation to make the user aware of the data skew levels at each step in the data workflow execution graph.\\n\\nPlease let us know if you need more information.\", \"post_time\": \"2011-12-08 15:19:20\" },\n\t{ \"post_id\": 728, \"topic_id\": 186, \"forum_id\": 19, \"post_subject\": \"Re: How to prove HPCC is truly parallel?\", \"username\": \"HPCC Staff\", \"post_text\": \"Followup question from a community member:\\n\\nSpecific to "which contributes to the well-known “long tail problem” in Hadoop", \\n\\nDo you have any examples, diagram or description to explain why HPCC doesn't have the long tail problem?\", \"post_time\": \"2011-12-08 15:18:05\" },\n\t{ \"post_id\": 726, \"topic_id\": 186, \"forum_id\": 19, \"post_subject\": \"Re: How to prove HPCC is truly parallel?\", \"username\": \"HPCC Staff\", \"post_text\": \"The fundamental design concepts in HPCC are not based in the MapReduce paradigm postulated by Google in 2004. As a matter of fact, HPCC predates that paper by a several years.\\n\\nThe idea behind the way data workflows are architected in HPCC is based on high level data primitives (SORT, PROJECT, DISTRIBUTE, JOIN, etc.), exposed through the ECL language, and a powerful optimizer which, at ECL compile time, determines how these operations can be parallelized during execution, and what the execution strategy should be to achieve the highest performance in the system.\\n\\nECL is a declarative language, so ideally the programmer doesn’t need to define the control flow of the program. A large number of data operations are commutative in nature, and since transferring (big) data is normally very expensive, the optimizer can, for example, move a filter closer to the beginning to reduce the amount of data that is carried over in subsequent operations. Other optimizations such as lazy execution are also utilized to eliminate throwaway code and data structures.\\n\\nThe specific execution plans vary, depending on how the particular data workflow (ECL program) looks like, and the system provides for a graphical display of the exact execution plan that the optimizer determined to be the most appropriate for that workflow. Once you submit a workunit from the ECL IDE, you can visualize the execution plan for that workunit, and even key metrics in each intermediate step which include number of data records processed, data skews and the specific operation represented. As you can see, a complex execution graph is normally subdivided in multiple subgraphs, and many of those operations are parallelized if there is no need for a synchronization barrier (or if the optimizer thinks that excessive parallelization will affect the overall performance negatively).\\n\\nIt is recommended that you download the Virtual Machine and/or binaries of the platform, http://hpccsystems.com/download, and play with some of the examples that we provide in our portal, to understand how this all works in practice. Although in real life you would never need to tinker with the platform itself, if you feel inclined to seeing how things work under the hood, please feel free to download the C++ source code of the HPCC platform from our GIT repository, https://github.com/hpcc-systems, and take a look at the inner implementation details of the platform and ECL compiler and optimizer.\\n\\nAnother source of reference is the PigMix Benchmark on HPCC:\\nhttp://hpccsystems.com/Why-HPCC/HPCC-vs ... pigmix_ecl\\n\\nPlease post a reply if you need any help, or if you have any other questions.\", \"post_time\": \"2011-12-08 13:57:56\" },\n\t{ \"post_id\": 725, \"topic_id\": 186, \"forum_id\": 19, \"post_subject\": \"How to prove HPCC is truly parallel?\", \"username\": \"HPCC Staff\", \"post_text\": \"This question was submitted from a community member and is a great topic to add in this forum. \\n\\n\\nHow to prove HPCC is truly parallel?\\nFrom the Beyond MapReduce section, http://hpccsystems.com/Why-HPCC/HPCC-vs-Hadoop/Components#beyondmapreduce, there is the following description:\\n\\nTruly parallel: Unlike Hadoop, nodes of a datagraph can be processed in parallel as data seamlessly flows through them. In Hadoop MapReduce (Java, Pig, Hive, Cascading, etc.) almost every complex data transformation requires a series of MapReduce cycles; each of the phases for these cycles cannot be started until the previous phase has completed for every record, which contributes to the well-known “long tail problem” in Hadoop. HPCC effectively avoids this, which effectively results in higher and predictable performance.\\n\\nIt told us that every complex data transformation requires a series of MapReduce cycles. But it didn't say HPCC how to avoid this issue. \\n\\nWould you tell me HPCC how to avoid complex data transformation? Is there any diagram like Hadoop MapReduce to show its process flow for HPCC? Is there any example to compare them?\", \"post_time\": \"2011-12-08 13:53:09\" },\n\t{ \"post_id\": 1100, \"topic_id\": 252, \"forum_id\": 19, \"post_subject\": \"Re: Hortonworks HDP\", \"username\": \"thildebrant\", \"post_text\": \"Thank you Richard.\\nDoes anyone else have insight into the differences? \\nThe Roxie capability is a Hadoop shortcoming, I was more interested in a comparison of the HDP capabilities vs. the HPCC total system, in terms of integration and sysadmin-type concerns.\\nThank you,\\nTodd\", \"post_time\": \"2012-02-13 13:42:32\" },\n\t{ \"post_id\": 1049, \"topic_id\": 252, \"forum_id\": 19, \"post_subject\": \"Re: Hortonworks HDP\", \"username\": \"rtaylor\", \"post_text\": \"Todd,\\n\\nThe way I read it, essentially all the Hortonworks bundle does is create a Hadoop version of our Thor platform in one package instead of eight, making installation and configuration issues easier to handle. Hadoop still has nothing to compare to our Roxie delivery platform, and our fully-integrated ease of use and demonstrated performance advantages all still apply.\\n\\nI'm sure other more Hadoop-knowledgeable folks will chime in with specifics.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-02-07 16:24:52\" },\n\t{ \"post_id\": 1045, \"topic_id\": 252, \"forum_id\": 19, \"post_subject\": \"Hortonworks HDP\", \"username\": \"thildebrant\", \"post_text\": \"The HDP is declaring itself as a 'complete package' for large data problems. http://hortonworks.com/technology/horto ... aplatform/ \\nHow does the HPCC solution compete against each of the HDP's features?\\n\\nThank you,\\nTodd\", \"post_time\": \"2012-02-07 15:20:17\" },\n\t{ \"post_id\": 1200, \"topic_id\": 279, \"forum_id\": 19, \"post_subject\": \"Re: Hadoop/Cloudera and HPCC coexisting\", \"username\": \"DSC\", \"post_text\": \"That is exactly the information I was looking for. Thanks, guys!\\n\\nDan\", \"post_time\": \"2012-02-24 19:36:19\" },\n\t{ \"post_id\": 1198, \"topic_id\": 279, \"forum_id\": 19, \"post_subject\": \"Re: Hadoop/Cloudera and HPCC coexisting\", \"username\": \"flavio\", \"post_text\": \"Dan,\\n\\nWe have a test cluster internally, where we run both, HPCC and Hadoop, alternatively, to benchmark them. You wouldn't need to remove one in order to run the other, but you wouldn't probably want both running at the same time while testing, either.\\n\\nKeep in mind that, for example, the jvm can hold a substantial amount of memory, even while Hadoop seems not to be doing much (or anything at all). \\n\\nYou want to also verify, in between runs, that you don't have significant paging (and you may also want to flush out filesystem cache, for fairness).\\n\\nAnd you shouldn't forget to configure your memory allocation settings (for both, HPCC and Hadoop), to values that effectively utilize the available hardware.\\n\\nFlavio\", \"post_time\": \"2012-02-24 19:32:38\" },\n\t{ \"post_id\": 1197, \"topic_id\": 279, \"forum_id\": 19, \"post_subject\": \"Re: Hadoop/Cloudera and HPCC coexisting\", \"username\": \"rtaylor\", \"post_text\": \"I'm just saying I've SEEN it -- I was not the guy doing it so I have no idea what chuckholes and pitfalls there may be -- sorry. \", \"post_time\": \"2012-02-24 19:14:45\" },\n\t{ \"post_id\": 1193, \"topic_id\": 279, \"forum_id\": 19, \"post_subject\": \"Re: Hadoop/Cloudera and HPCC coexisting\", \"username\": \"DSC\", \"post_text\": \"Let me clarify a bit.\\n\\nThis is purely for testing. We are still evaluating big data solutions and it makes a great deal of sense to normalize as much of the environment as possible when comparing competing products. It struck me that actually using the same nodes would be ideal. What would then make it *easy* would be the case where neither HPCC nor Hadoop conflict with each other; that way, we don't have to explicitly shutdown or remove one product to test the other.\\n\\nSo, I'm hoping that it's possible to make them coexist. You're telling me they will, right? No conflicts at runtime with things like TCP/IP ports or other system resources?\", \"post_time\": \"2012-02-24 18:24:23\" },\n\t{ \"post_id\": 1191, \"topic_id\": 279, \"forum_id\": 19, \"post_subject\": \"Re: Hadoop/Cloudera and HPCC coexisting\", \"username\": \"rtaylor\", \"post_text\": \"I have seen it done here, but that was POC code I was viewing at the time. Whether you would want to do it in a production system is a question someone else will need to answer.\", \"post_time\": \"2012-02-24 18:18:50\" },\n\t{ \"post_id\": 1187, \"topic_id\": 279, \"forum_id\": 19, \"post_subject\": \"Hadoop/Cloudera and HPCC coexisting\", \"username\": \"DSC\", \"post_text\": \"Does anyone have any experience with installing both HPCC and Cloudera's Hadoop distro on the same systems and running them simultaneously? Any conflicts or gotchas?\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-02-24 14:48:18\" },\n\t{ \"post_id\": 3146, \"topic_id\": 361, \"forum_id\": 19, \"post_subject\": \"Re: Hadoop Data Integration Connector\", \"username\": \"HPCC Staff\", \"post_text\": \"A new version is now available! This includes both a libhdfs (native API provided by Hadoop) based connector and a webhdfs (web based API provided by Hadoop) implementation. \\n\\nhttp://hpccsystems.com/h2h\", \"post_time\": \"2013-01-22 20:30:06\" },\n\t{ \"post_id\": 2062, \"topic_id\": 361, \"forum_id\": 19, \"post_subject\": \"Hadoop Data Integration Connector Podcast\", \"username\": \"HPCC Staff\", \"post_text\": \"Have questions about the Hadoop Data Integration Connector? Listen to the latest podcast where Jim DeFabia and Rodrigo Pastrana from LexisNexis have a Q&A discussion about this cool utility!\\n\\nhttp://hpccsystems.com/podcasts?order=f ... &sort=desc\", \"post_time\": \"2012-07-24 16:58:31\" },\n\t{ \"post_id\": 1688, \"topic_id\": 361, \"forum_id\": 19, \"post_subject\": \"Re: Hadoop Data Integration Connector\", \"username\": \"Durai\", \"post_text\": \"Good to see this long-awaited connector. Thank You.\", \"post_time\": \"2012-06-01 05:53:31\" },\n\t{ \"post_id\": 1616, \"topic_id\": 361, \"forum_id\": 19, \"post_subject\": \"Hadoop Data Integration Connector\", \"username\": \"HPCC Staff\", \"post_text\": \"The Hadoop Data Integration Connector is now available in beta! This connector provides a way to seamlessly access data stored in HDFS from within the Thor component of the HPCC platform and also allows writing to HDFS from within Thor. More at http://hpccsystems.com/H2H\\n\\nRead the latest blog post from Flavio Villanustre explaining how it works and how it can be used to leverage certain HPCC components from within existing Hadoop clusters.\\nhttp://hpccsystems.com/blog/hpcchadoop- ... w-elephant\", \"post_time\": \"2012-05-16 19:16:51\" },\n\t{ \"post_id\": 1822, \"topic_id\": 401, \"forum_id\": 19, \"post_subject\": \"Re: Features for NLP with Hadoop and in HPCC\", \"username\": \"chargil\", \"post_text\": \"[quote="Jeniba":3f1ox31k]What are the features of Natural Processing Language(NLP) with Hadoop?and why?\\nWhat are the features of NLP in HPCC?and why?\\n\\nWith HPCC, ECL contains functions for NLP. There is a section detailing what those functions are and how to use them in the ECL Language Reference documentation. Also, the Machine Learning library contains a Docs module that allows for common NLP tasks like tokenization/collocation discovery/etc.\\n\\nWith Hadoop, there is a lot written about how to use third-party libraries like NLTK and Stanford NLP with Hadoop. However Hadoop itself does not have NLP features built in to it.\", \"post_time\": \"2012-06-21 17:30:57\" },\n\t{ \"post_id\": 1809, \"topic_id\": 401, \"forum_id\": 19, \"post_subject\": \"Features for NLP with Hadoop and in HPCC\", \"username\": \"Jeniba\", \"post_text\": \"What are the features of Natural Processing Language(NLP) with Hadoop?and why?\\nWhat are the features of NLP in HPCC?and why?\", \"post_time\": \"2012-06-20 09:25:43\" },\n\t{ \"post_id\": 2097, \"topic_id\": 461, \"forum_id\": 19, \"post_subject\": \"Re: Issue with the H2H connector\", \"username\": \"rodrigo.pastrana@lexisnexis.com\", \"post_text\": \"Hi Greg, thanks for contacting us regarding your concern.\\n\\nIf you're using double quote to encapsulate the contents of all your fields have you tried setting the QUOTE attribute? CSV(SEPARATOR('|'), QUOTE('\\\\"'))\\n\\nAnyway, the CSV attributes are acknowledged by the ECL PIPE command as passed in through the "HadoopFileFormat" parameter.\\n\\nIf you looked at the PipeIn macro, the "TERMINATOR" attribute is explicitly gleaned and passed to the "hdfspipe" command, but the entire "HadoopFileFormat" is passed in to PIPE().\\n\\nLet me know if that helps. Thanks.\\n-Rodrigo\", \"post_time\": \"2012-07-27 14:09:32\" },\n\t{ \"post_id\": 2094, \"topic_id\": 461, \"forum_id\": 19, \"post_subject\": \"Issue with the H2H connector\", \"username\": \"gkrasnow\", \"post_text\": \"The H2H documentation seems to imply that you can set the CSV format parameters in the PipeIn command:\\n\\n
\\nDataConnectors.HDFSConnector.PipeIn(MyDataFile,\\n'/user/Administrator/test/MyData1',\\nLayout_CSV, CSV(SEPARATOR('|')),\\n'192.168.56.120',\\n54310);\\n
\\n\\nHowever I have found that in the PipeIn macro the only CSV parameter that actually is looked at is terminator and the rest are thrown away. I have a file where all the fields are quoted with double quotes and the PipeIn is not detecting that and thus all my fields in my result dataset from the pipein retain the double quotes. Thanks.\\n\\n- Greg\", \"post_time\": \"2012-07-26 22:02:45\" },\n\t{ \"post_id\": 5240, \"topic_id\": 1212, \"forum_id\": 19, \"post_subject\": \"Re: Hadoop Admin Information Request\", \"username\": \"bforeman\", \"post_text\": \"This site showcases the HPCC Environment as an alternative to Hadoop. The information that we provide are the process and steps involved for developers moving from the Hadoop platform to HPCC. Here are links to some PDFs with that related information:\\n\\nhttp://hpccsystems.com/community/white-papers/ecl-for-hadoopers\\n\\nhttp://hpccsystems.com/community/white-papers/performing-pig-pen\\n\\nhttp://hpccsystems.com/community/white-papers/ecl-for-piggers\\n\\nHope this helps.\\n\\nBob\", \"post_time\": \"2014-02-19 19:47:50\" },\n\t{ \"post_id\": 5232, \"topic_id\": 1212, \"forum_id\": 19, \"post_subject\": \"Hadoop Admin Information Request\", \"username\": \"shashi24\", \"post_text\": \"What are some of the questions a hadoop admin shd ask or request information if someone asks him they want to setup a hadoop environment with data of size 1tb?\", \"post_time\": \"2014-02-19 14:54:33\" },\n\t{ \"post_id\": 5357, \"topic_id\": 1229, \"forum_id\": 19, \"post_subject\": \"Re: Embedded Pig\", \"username\": \"bforeman\", \"post_text\": \"Hi Jeniba,\\n\\nThis forum is for the migration of Hadoop to the HPCC platform using ECL. The only reference that I could find on the web regarding your question said this:\\n\\nBasically it is using PigServer to register your query/jar. You could use control flow construct to chain jobs thereafter.
\\n\\nI think that you would probably have more success posting this question in a Java forum. Sorry I can't be more helpful!\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-03-08 14:30:46\" },\n\t{ \"post_id\": 5353, \"topic_id\": 1229, \"forum_id\": 19, \"post_subject\": \"Embedded Pig\", \"username\": \"Jeniba Johnson\", \"post_text\": \"Can someone help with an example for embedded pig programming in java.\\nIllustrate with the steps ?.I have already read the document related to Embedded Pig from Apache site.Need help in running simple pig commands through Java.Explain with an example ?\", \"post_time\": \"2014-03-06 17:31:35\" },\n\t{ \"post_id\": 5861, \"topic_id\": 1347, \"forum_id\": 19, \"post_subject\": \"Re: using ECL in Hadoop\", \"username\": \"rodrigo.pastrana\", \"post_text\": \"Hello cmlakhan, \\nTake a look at our HDFS connector: http://hpccsystems.com/products-and-ser ... ntegration\\n\\nIt provides a mechanism for streaming HDFS data to HPCC for processing using ECL.\\n\\nthanks, and let us know if you have any further questions.\", \"post_time\": \"2014-06-10 14:22:07\" },\n\t{ \"post_id\": 5859, \"topic_id\": 1347, \"forum_id\": 19, \"post_subject\": \"using ECL in Hadoop\", \"username\": \"cmlakhan\", \"post_text\": \"I have a Hadoop cluster and am wondering if it would be possible to use ECL on top of my current Hadoop system. I would prefer to keep my data in HDFS and use it as an alternative to Pig. Is that possible?\", \"post_time\": \"2014-06-10 13:02:57\" },\n\t{ \"post_id\": 6712, \"topic_id\": 1544, \"forum_id\": 19, \"post_subject\": \"Re: Reading in Sequence files using H2H Connector\", \"username\": \"rodrigo.pastrana\", \"post_text\": \"Lee, the H2H commercial only supports FLAT/CSV files as you pointed out. I'm not familiar with the particular file type you're working with, can it be treated as a fixed length record file? If it's a variable length record file, is there a delimiter which denotes the end of the record?\", \"post_time\": \"2014-12-19 18:50:00\" },\n\t{ \"post_id\": 6702, \"topic_id\": 1544, \"forum_id\": 19, \"post_subject\": \"Reading in Sequence files using H2H Connector\", \"username\": \"Lee_Meadows\", \"post_text\": \"Anyone have an example on reading a sequence file from HDFS into HPCC?\\n\\nWe tried using the pipe connector, but the options are only FLAT or CSV and the HDFS file is default codec compression and the Key Value is Text:Text\\n\\nThe file is generated by the wordcount benchmark. You can download the source at https://github.com/intel-hadoop/HiBench\\n\\nThanks,\\nLee\", \"post_time\": \"2014-12-16 14:30:56\" },\n\t{ \"post_id\": 6857, \"topic_id\": 1580, \"forum_id\": 19, \"post_subject\": \"Re: About latest Benchmark comparison between Hadoop and HPC\", \"username\": \"Lee_Meadows\", \"post_text\": \"An interesting benchmark that we are working on is the HiBench https://github.com/intel-hadoop/HiBench\\n\\nWe are using that to compare Spark to Hadoop. I'm also in the process of setting up HPCC cluster to throw that into the mix.\", \"post_time\": \"2015-01-26 15:01:39\" },\n\t{ \"post_id\": 6851, \"topic_id\": 1580, \"forum_id\": 19, \"post_subject\": \"About latest Benchmark comparison between Hadoop and HPCC\", \"username\": \"LY\", \"post_text\": \"Hello there,\\n\\nI am a graduate student and am interested in HPCC.\\n\\nSince the only available benchmark comparison between Hadoop and HPCC on the website is published on 2011 (I believe it used Hadoop 1 for benchmark testing), some friends and I are planning to do a benchmark comparison between the latest Hadoop 2 and the latest HPCC. We are planing to use PUMA (https://engineering.purdue.edu/~puma/pumabenchmarks.htm) as testing scripts.\\n\\nWe don't want to repeat what someone has done, so we are wondering if there is any similar work having been done. If there is such one, could you please let us know the comparison result and how the benchmark testing is done (scripts, dataset, environment configuration)?\\n\\nThanks in advance.\\n\\nLY\", \"post_time\": \"2015-01-25 18:26:37\" },\n\t{ \"post_id\": 6991, \"topic_id\": 1604, \"forum_id\": 19, \"post_subject\": \"Re: HPCC vs CDH\", \"username\": \"rtaylor\", \"post_text\": \"Luc,\\n\\nI am not aware of any such comparison having been done so far, but it sounds like a good idea for someone to do. You are most welcome to contribute one (or more) such comparison. \\n\\nRichard\", \"post_time\": \"2015-02-19 15:00:40\" },\n\t{ \"post_id\": 6984, \"topic_id\": 1604, \"forum_id\": 19, \"post_subject\": \"HPCC vs CDH\", \"username\": \"lpezet\", \"post_text\": \"Hi!\\n\\nI've seen the comparisons between HPCC Systems and Hadoop.\\nLike those:\\n
\\n
\\n\\nIs there any other comparison with technology sitting on top of Hadoop?\\nFor example:\\n\\n
\\n\\nThanks!\\nLuc.\", \"post_time\": \"2015-02-19 03:41:32\" },\n\t{ \"post_id\": 7646, \"topic_id\": 1741, \"forum_id\": 19, \"post_subject\": \"Re: Call for workload benchmarks hpcc/hadoop/spark\", \"username\": \"flavio\", \"post_text\": \"Outside of SORTs, other benchmarks that quickly come to mind are:\\n\\n1.\\tJoin two datasets, and you can try different types: 2 large datasets on a sort-merge-join in Hadoop, a large left hand and a small right hand for a lookup join, a large left hand and an indexed right hand for a keyed join, etc. – Hadoop vs. Thor\\n2.\\tPlay with SALT’s scored search for real-time slicing and dicing in memory in Roxie – Spark vs. Roxie\\n3.\\tGraph processing using KEL (you can follow the tutorial in David’s Blog here: http://hpccsystems.com/blog/dabayliss) – Hadoop and Spark vs. HPCC\\n\\nI hope this helps.\", \"post_time\": \"2015-05-22 16:38:16\" },\n\t{ \"post_id\": 7645, \"topic_id\": 1741, \"forum_id\": 19, \"post_subject\": \"Call for workload benchmarks hpcc/hadoop/spark\", \"username\": \"Lee_Meadows\", \"post_text\": \"*Caveat Some of you may know I used to work for LN.*\\n\\nHPCC vs Hadoop vs Spark over Mellanox IB on x86\\n\\nI am about to get Mellanox Unstructured Data Acceleration (UDA) for Hadoop on the same cluster I have HPCC installed on. (http://www.mellanox.com/page/hpcc http://www.mellanox.com/page/hadoop)\\n\\nI'll be able to do an apples to apples comparison on the same hardware. I can't go into specifics on the clusters, but high number of cpus and more than 512GB ram on each node, for a 8 node system (1 master, 7 slaves)\\n\\nI'll be testing spinny disk and SanDisk FusionIO.\\n\\nI'd like to get ideas on honest workload benchmarks so I can document performance. My gut says the C++ will be faster, but I want to have defensible work to show a true apples to apples comparison. \\n\\nMy goal is to show results for HPCC, MapReduce, Spark jobs.\", \"post_time\": \"2015-05-22 15:34:59\" },\n\t{ \"post_id\": 58, \"topic_id\": 7, \"forum_id\": 20, \"post_subject\": \"Re: VM does not work in VMWare 7.1.4\", \"username\": \"ewadler\", \"post_text\": \"For me it was probably an error somewhere between the user and the keyboard. Not sure what, though. \", \"post_time\": \"2011-05-19 14:12:21\" },\n\t{ \"post_id\": 57, \"topic_id\": 7, \"forum_id\": 20, \"post_subject\": \"Re: VM does not work in VMWare 7.1.4\", \"username\": \"thildebrant\", \"post_text\": \"I was able to get it to work successfully on VMWare workstation 7.1.4 on Windows 7 professional 64bit. The VM (1.0.1.5 RC2) worked without a problem.\", \"post_time\": \"2011-05-19 13:17:54\" },\n\t{ \"post_id\": 41, \"topic_id\": 7, \"forum_id\": 20, \"post_subject\": \"Re: VM does not work in VMWare 7.1.4\", \"username\": \"ewadler\", \"post_text\": \"Ok, I swear I did nothing different than before (Actually, I am sure this was a user error by me, oh well), but just to be sure, I started fresh, and it worked in Workstation 7.1.4.\\n\\nAlso in Workstation 7.1.4, I tried opening the VM that I successfully ran in VMWare Player, and it worked as well. Much ado about nothing I guess.\\n\\nI hope if anyone else does whatever it is that I did to get the same issue, they can find some help here.\\n\\nThanks!\", \"post_time\": \"2011-05-13 20:14:11\" },\n\t{ \"post_id\": 28, \"topic_id\": 7, \"forum_id\": 20, \"post_subject\": \"Re: VM does not work in VMWare 7.1.4\", \"username\": \"sort\", \"post_text\": \"I am glad you got it working with one of our supported configurations. Since you got it working with a supported configuration we’re going to close this issue, but here’s what we found out with our testing...\\n\\nWe downloaded the 30 day trial of VMWare Workstation 7.1.4 for Windows and Linux in order to try and recreate the issue.\\n\\nOn linux 32 and 64 bit (both systems running Ubuntu 10.04 LTS), we were able to install version 7.1.4 and load the current VM from the portal without issue.\\n\\nOn WinXP 32 bit and WinServer 2k8 64 bit, we were able to install version 7.1.4 and load the current VM from the portal without issue.\\n\\nProcess used to test. \\n\\n1.\\tDownload the VMWare Workstation 7.1.4 installer from VMWare’s website for the OS platform.\\n2.\\tInstall VMWare Workstation 7.1.4\\n3.\\tDownload the current VM directly from hpccsystems.com\\n4.\\tUnzip the VM zip file. \\n5.\\tOpen VMWare Workstaion\\n6.\\tClick on “Open an Existing VM” and select the VMX file from the uncompressed VM zip.\\n7.\\tClick “Power on VM”\\n8.\\tWait for Blue start screen to appear and display IP address.\\n9.\\tLogin to ECLWatch to verify VM system is running.\\n\\nThis process was used for all 4 of the systems above without issue. \\n\\nThe only way we were able to duplicate the issue was by modifying the VM and setting the network mode to Bridge. On our secure network due to firewall issues, we have the issue where no IP address is provided as the user reported. \\n\\nQUESTION: Was the network mode changed from Host Only?\\n\\nThe VM is configured to run with Host only networking in order for a greater audience to use the VM without the chance of interference from their network infrastructure. If users wish to use Bridged networking, they must be on a network that supports advanced DHCP routing.\", \"post_time\": \"2011-05-11 20:38:38\" },\n\t{ \"post_id\": 27, \"topic_id\": 7, \"forum_id\": 20, \"post_subject\": \"Re: VM does not work in VMWare 7.1.4\", \"username\": \"ewadler\", \"post_text\": \"[quote="sort":194myhbx]My team and I will investigate. \\n\\nWe currently support VMWare Server and VMWare player. I currently run VMWare player 3.1.5 build 385536 on windows server 2003, windows XP and windows 7.\\n\\nWe do not support this configuration, but we will look into it.\\n\\nStu\\n\\nOk, I was trying to download and install VMWare Player 3.1.4 and I noticed that VMWare Workstation 7.1.4 also comes with VMWare Player 3.1.4. I just tried it on that, and it appears to run fine.\\n\\nI generally prefer using VMWare Workstation for managing all of my VM's, but Player will do fine.\\n\\nThanks for your help!\", \"post_time\": \"2011-05-11 15:55:22\" },\n\t{ \"post_id\": 16, \"topic_id\": 7, \"forum_id\": 20, \"post_subject\": \"Re: VM does not work in VMWare 7.1.4\", \"username\": \"sort\", \"post_text\": \"My team and I will investigate. \\n\\nWe currently support VMWare Server and VMWare player. I currently run VMWare player 3.1.5 build 385536 on windows server 2003, windows XP and windows 7.\\n\\nWe do not support this configuration, but we will look into it.\\n\\nStu\", \"post_time\": \"2011-05-11 14:02:10\" },\n\t{ \"post_id\": 14, \"topic_id\": 7, \"forum_id\": 20, \"post_subject\": \"VM does not work in VMWare 7.1.4\", \"username\": \"ewadler\", \"post_text\": \"I downloaded the NIMvm-1.0.1.5-RC2 VM and it worked in VMWare Fusion on the Mac, but it did not work in VMWare v7.1.4 for Windows. My guess is that the VMWare Tools are for VMWare v6.x. \\nThe symptom was that Ubuntu could not find the eth0 network adapter (or any adapter, for that matter). It retried vami-sfcbd during the boot sequence 10 painfully long times and then finally booted with no IP address.\\n\\nI have screenshots, but they are not on a publicly available web server and I cannot post images (that I know of) on this forum, so feel free to request screenshots and I will send them to you.\\n\\nCan someone please help?\", \"post_time\": \"2011-05-11 03:35:40\" },\n\t{ \"post_id\": 65, \"topic_id\": 34, \"forum_id\": 20, \"post_subject\": \"Re: Upgrading to a new VM Image - Migrating Sprayed Data\", \"username\": \"robert.foreman@lexisnexis.com\", \"post_text\": \"After a little research and confering with colleagues, you will need to respray and files that you are using to your new landing zone when upgrading the HPCC VM install. It's fast and easy to sync up again. \\n\\nAlso, just deleting the folder of the older VM Image Install will effectively uninstall it.\", \"post_time\": \"2011-05-31 17:11:47\" },\n\t{ \"post_id\": 64, \"topic_id\": 34, \"forum_id\": 20, \"post_subject\": \"Upgrading to a new VM Image - Migrating Sprayed Data\", \"username\": \"robert.foreman@lexisnexis.com\", \"post_text\": \"Hi team,\\n\\nI just upgraded my test machine to the latest VM 3.0.0.2 RC1. Since my VM Player was already installed, I simply unzipped the download and then double-clicked on the new VMX file. The install and initialization worked great.\\n\\nMy question is regarding my data that I had sprayed to my earlier install. The latest HPCC VM assigned a new IP address for me. If I start the OLD HPCC VM version and then login to my old IP address using my WINSCP tool I can see my old DropZone and sprayed data. But of course logging in to my new VM location using the new IP address shows an empty dropzone and no sprayed data as I anticipated.\\n\\nIs there an easy way to migrate the sprayed data from my older VM install, or will I just have to "bite the bullet" and respray everything again from my new drop zone? \\n\\nAlso, after I have migrated all of my data to the new VM IP location, what is the best way to remove the older install that I no longer need? Can I just delete the older install VM folder that contains the vmem and vmdk files? \\n\\nThanks in advance!\\n\\nBob Foreman\", \"post_time\": \"2011-05-31 13:33:45\" },\n\t{ \"post_id\": 239, \"topic_id\": 82, \"forum_id\": 20, \"post_subject\": \"Re: VM Does not work in VM Player\", \"username\": \"kovacsbv\", \"post_text\": \"Very nice. \\nThat version cleaned it all up.\\nThanks for your help.\\n\\nVic Kovacs\", \"post_time\": \"2011-08-08 14:29:06\" },\n\t{ \"post_id\": 238, \"topic_id\": 82, \"forum_id\": 20, \"post_subject\": \"Re: VM Does not work in VM Player\", \"username\": \"pschwartz\", \"post_text\": \"Vic,\\n\\nThank you for taking the time to alert us of the issue. In the release on 08/05 a few of the issues you are reporting have been addressed. Please try the 3.0.4 release at your convenience.\\n\\nThe main issue at hand is the lack of an ip address being assigned on the first boot of the VM on your instance of VMware Player. I have personally seen this issue before and here are a few things I have done to correct it, one of which might help you.\\n\\n1. Verify that the VMWare vnet device is functional.\\n2. Reboot your host machine to allow the VMWare vnet driver to reload.\\n3. Verify that the windows firewall is allowing network communications for VMWare Player.\\n4. Reinstall VMWare Player. \\n\\nvami-sfcbd is a vmware component that is set to use the localhost as its resolving address which if the ip address is not received correctly is not set in /etc/hosts causing the component to fail to start and sometimes become stuck in an infinite restart loop. I have seen a few open trouble tickets on the VMWare support site that indicate that this issue is already being looked at by VMWare and will be corrected in future versions of their software.\\n\\nAs for the issue of port 8010 not being accessible, the usage of 3.0.4 should resolve this issue for you.\", \"post_time\": \"2011-08-08 13:54:31\" },\n\t{ \"post_id\": 237, \"topic_id\": 82, \"forum_id\": 20, \"post_subject\": \"Re: VM Does not work in VM Player\", \"username\": \"richardkchapman\", \"post_text\": \"port 8010 will be opened by esp executable, log file will be in /var/log/HPCCSystems/myesp/esp.log\", \"post_time\": \"2011-08-08 13:40:19\" },\n\t{ \"post_id\": 236, \"topic_id\": 82, \"forum_id\": 20, \"post_subject\": \"Re: VM Does not work in VM Player\", \"username\": \"kovacsbv\", \"post_text\": \"The errors are beginning to confirm something I've been suspecting: The VM wants a FQDN that DNS can resolve into an IP. Maybe I can set up a hosts entry. I'll look for alternatives, but it looks a bit like I might need a static IP with a DNS entry.\\n\\nRight now, it looks like initialization reads /etc/hostname and uses that to set the host name which it then tries to look up with DNS, and can't.\\n\\nI'll play with /etc/resolv.conf and /etc/host* and see if it can be fixed.\\n\\nOf course, this may be completely unrelated to why port 8010 won't open up. Can anybody tell me which executable opens the port and listens, and where its log file is?\", \"post_time\": \"2011-08-08 13:20:25\" },\n\t{ \"post_id\": 235, \"topic_id\": 82, \"forum_id\": 20, \"post_subject\": \"Re: VM Does not work in VM Player\", \"username\": \"kovacsbv\", \"post_text\": \"Ok, This error was seen before on VMWare's forum at http://communities.vmware.com/message/1712897?tstart=0.\\n\\nThis page refers to the vami-sfcb log to look for errors:\\n\\nI found the errors in /opt/vmware/var/log/vami/vami-sfcb.log\\n\\n
05/08/2011 15:35:11 [info] [process id: 2526] Starting a new initialization script.\\n05/08/2011 15:35:11 [info] [process id: 2526] Pids passed in are 2523 2522 2521 2519. Sleeping for 300 (secs).\\n05/08/2011 15:40:11 [info] [process id: 2526] ran command: ps -p 2523 2522 2521 2519\\n05/08/2011 15:40:11 [info] [process id: 2526] ps returned process: PID TTY STAT TIME COMMAND\\n05/08/2011 15:40:11 [info] [process id: 2526] ps returned process: 2519 ? Ssl 0:00 /opt/vmware/sbin/vami-sfcbd -d\\n05/08/2011 15:40:11 [info] [process id: 2526] ps returned process: 2521 ? S 0:00 /opt/vmware/sbin/vami-sfcbd -d\\n05/08/2011 15:40:11 [info] [process id: 2526] ps returned process: 2522 ? S 0:00 /opt/vmware/sbin/vami-sfcbd -d\\n05/08/2011 15:40:11 [info] [process id: 2526] ps returned process: 2523 ? S 0:00 /opt/vmware/sbin/vami-sfcbd -d\\n05/08/2011 15:40:11 [info] [process id: 2526] Number of sfcbd processes: 4\\n05/08/2011 15:40:11 [info] [process id: 2526] VAMI Scheduler provider is present on the system.\\n05/08/2011 15:40:11 [ERROR] [process id: 2526] Failed to start Scheduler Service. Unknown error.\\n05/08/2011 15:40:11 [ERROR] [process id: 2526] Traceback (most recent call last):\\n File "/opt/vmware/share/vami/vami_sfcb_initialize", line 86, in main\\n rettuple = vami_cim_util.invokeNonStaticMethod('root/vami', 'VAMI_SchedulerService', 'StartService')\\n File "/opt/vmware/share/vami/vami_cim_util.py", line 36, in invokeNonStaticMethod\\n insts = cliconn.EnumerateInstanceNames(className)\\n File "/opt/vmware/lib/python/site-packages/pywbem/cim_operations.py", line 382, in EnumerateInstanceNames\\n **params)\\n File "/opt/vmware/lib/python/site-packages/pywbem/cim_operations.py", line 173, in imethodcall\\n raise CIMError(0, str(arg))\\nCIMError: (0, 'Socket error: [Errno -3] Temporary failure in name resolution')\\n05/08/2011 16:19:56 [info] [process id: 2463] Starting a new initialization script.\\n05/08/2011 16:19:56 [info] [process id: 2463] Pids passed in are 2460 2459 2458 2456. Sleeping for 300 (secs).\\n05/08/2011 16:24:56 [info] [process id: 2463] ran command: ps -p 2460 2459 2458 2456\\n05/08/2011 16:24:56 [info] [process id: 2463] ps returned process: PID TTY STAT TIME COMMAND\\n05/08/2011 16:24:56 [info] [process id: 2463] ps returned process: 2456 ? Ssl 0:00 /opt/vmware/sbin/vami-sfcbd -d\\n05/08/2011 16:24:56 [info] [process id: 2463] ps returned process: 2458 ? S 0:00 /opt/vmware/sbin/vami-sfcbd -d\\n05/08/2011 16:24:56 [info] [process id: 2463] ps returned process: 2459 ? S 0:00 /opt/vmware/sbin/vami-sfcbd -d\\n05/08/2011 16:24:56 [info] [process id: 2463] ps returned process: 2460 ? S 0:00 /opt/vmware/sbin/vami-sfcbd -d\\n
\", \"post_time\": \"2011-08-08 13:02:50\" },\n\t{ \"post_id\": 233, \"topic_id\": 82, \"forum_id\": 20, \"post_subject\": \"Re: VM Does not work in VM Player\", \"username\": \"sort\", \"post_text\": \"Thank you for pointing this out to us. \\n\\nWe have made some code changes in our platform that should make the "getAddrInfo" error no longer occur. Certain ports like 8010, 8002 need to be open\\n\\nWe have not made any changes yet to the script in our most recent release that address the syntax errors. We will look to correct our problem across all the distros we support. We appreciate the code you provided in your post\\n\\nWe will be releasing our source code in the near future and look forward to you submitting code to us.\\n\\nThank you\\nStu\", \"post_time\": \"2011-08-06 13:27:55\" },\n\t{ \"post_id\": 231, \"topic_id\": 82, \"forum_id\": 20, \"post_subject\": \"VM Does not work in VM Player\", \"username\": \"kovacsbv\", \"post_text\": \"I tried to open HPCCSystemsVM-3.0.3.3.vmx, and when the machine started up, I got:\\n00000000 2011-08-04 12:51:02 1041 "jsocket(1,2778) getaddrinfo failed err = -3 : localhost.localdom"\\n/opt/HPCCSystems/etc/init.s/hpcc_common: eval: line 69: syntax error near unexpected token `)'\\n\\nThe error repeats on line 121\\nLooking into the script, there are some quotes that need to be escaped if you want to run the script like that:\\n\\neval "$(echo "${cmp[*]}")"
\\n\\n Should be more like:\\n\\neval "$(echo \\\\"${cmp[*]}\\\\")"
\\n\\nThen I get the vami-sfcbd restarting 10 times (slowly).\\n\\nIn the end, netstat -pln reveals that port 8010 is not open, although 3 548x ports are open for IPv6 only.\\n\\nVersions:\\nOS: XP 32\\nHPCC: 3.0.3.3\\nVMWare: Player 3.1.4
\\n\\n\\nThanks,\\n\\nVic Kovacs\", \"post_time\": \"2011-08-05 14:20:59\" },\n\t{ \"post_id\": 818, \"topic_id\": 129, \"forum_id\": 20, \"post_subject\": \"Re: VMWare Player Version 4\", \"username\": \"HPCC Staff\", \"post_text\": \"Thank you Aviv for sharing and posting this information!\", \"post_time\": \"2012-01-12 14:26:21\" },\n\t{ \"post_id\": 815, \"topic_id\": 129, \"forum_id\": 20, \"post_subject\": \"Re: VMWare Player Version 4\", \"username\": \"Aviv\", \"post_text\": \"For those of you still struggling with VMWare, there is an alternative provided by the open-source option, VirtualBox. (https://www.virtualbox.org/)\\n\\nOnce installed, create a new virtual machine. \\nSelect the "Linux" option from the Operating System drop-down box and for OS Type choose "Ubuntu".\\nYou will need to add at least 2 Gigs of memory allocated for the virtual machine.\\nUse an existing hard disk by browsing to the system.vmdk file supplied in your HPCC virtual image download package. Make sure to select the Boot Hard Disk check box.\\n\\nIf you do not have proper assigning of a private IP Address for your virtual box, try changing the setting from NAT to Bridge Adapter with default settings.\\n\\nIf the web interface hangs, try restarting box. This has happened to me a few times from first loading of the box.\", \"post_time\": \"2012-01-11 18:04:51\" },\n\t{ \"post_id\": 431, \"topic_id\": 129, \"forum_id\": 20, \"post_subject\": \"Re: VMWare Player Version 4\", \"username\": \"HPCC Staff\", \"post_text\": \"After further research, we have verified there is no problem with our VM under VMWare Player Version 4. As with any other VM image, please shutdown the virtual image completely (power off) before proceeding with the VMWare upgrade.\", \"post_time\": \"2011-10-10 16:16:30\" },\n\t{ \"post_id\": 427, \"topic_id\": 129, \"forum_id\": 20, \"post_subject\": \"VMWare Player Version 4\", \"username\": \"HPCC Staff\", \"post_text\": \"We have received reports of people having problems with the HPCC VM Image after they have upgraded their VMWare Player to version 4. \\n\\nWe are currently researching this issue.\", \"post_time\": \"2011-10-07 21:37:28\" },\n\t{ \"post_id\": 1560, \"topic_id\": 345, \"forum_id\": 20, \"post_subject\": \"Re: Connect to VM from remote machine\", \"username\": \"michael.krumlauf@lexisnexis.com\", \"post_text\": \"Problem fixed - firewall was blocking.\", \"post_time\": \"2012-04-24 12:50:14\" },\n\t{ \"post_id\": 1555, \"topic_id\": 345, \"forum_id\": 20, \"post_subject\": \"Re: Connect to VM from remote machine\", \"username\": \"michael.krumlauf@lexisnexis.com\", \"post_text\": \"I'm sorry but my posting must not have been clear.\\n\\nI know the VM supplies only a one-node instance. What I am trying to do is this:\\n\\n1. Start the VM on Box A\\n2. Run client tools (ECL IDE, ECL Watch, etc.) on Box B and connect via a URL pointing to the VM on Box A (nnn.nnn.nnn.nnn:8010, nnn.nnn.nnn.nnn:5480)\", \"post_time\": \"2012-04-23 16:00:28\" },\n\t{ \"post_id\": 1554, \"topic_id\": 345, \"forum_id\": 20, \"post_subject\": \"Re: Connect to VM from remote machine\", \"username\": \"sort\", \"post_text\": \"The HPCC VM that we release is meant to be run as a 1 node configuration. We do not support it connecting to other VMs.\\n\\nIf you want to set up a multi node system using VMs (I use Virtual Box), you will need to create a VM and install a Linux OS that we support. Then install the appropriate binary release (get it from the portal). You can use our scripts to copy the binary install file and install on multiple machines (documetation can be found on the portal "Installing & Running the HPCC Platform")\", \"post_time\": \"2012-04-23 15:52:03\" },\n\t{ \"post_id\": 1551, \"topic_id\": 345, \"forum_id\": 20, \"post_subject\": \"Connect to VM from remote machine\", \"username\": \"michael.krumlauf@lexisnexis.com\", \"post_text\": \"I have two machines at my work desk (same subnet) and would like to run the HPCC Systems VM (using VMWare Player) on one machine and connect to it from the other machine. How do I configure the VM network settings? Is it possible? I have tried the VM Network Settings (Bridged, NAT, etc.) but they don't seem to work.\", \"post_time\": \"2012-04-23 13:13:48\" },\n\t{ \"post_id\": 4187, \"topic_id\": 400, \"forum_id\": 20, \"post_subject\": \"Re: Bridged Networking and DHCP\", \"username\": \"sbagaria\", \"post_text\": \"Related post: viewtopic.php?f=14&t=932\", \"post_time\": \"2013-06-11 19:43:31\" },\n\t{ \"post_id\": 1810, \"topic_id\": 400, \"forum_id\": 20, \"post_subject\": \"Re: Bridged Networking and DHCP\", \"username\": \"pschwartz\", \"post_text\": \"[quote="gkrasnow":xcbu2ud7]If I install into a VM which is using Bridged Networking and my the IP address which I get through DHCP may periodically change, which files will need to be updated to show the new IP address? I find that when my IP address changes, any thor slave processes showup in ECL Watch with the old IP address even though the thor master process shows the correct IP address. Thanks.\\n\\nAre you installing into multiple nodes in VM's or a single node?\\n\\nFrom the sound of the question, I think that you are running multiple nodes in VM's. If this is the case and the IP address of some of the nodes are changing via DHCP leases expiring, you would need to reconfigure the system to have the correct IP addresses in the environment.xml. This can either be done by hand or with ConfigMgr (sudo /opt/HPCCSystems/sbin/configmgr). You would then need to distribute the updated environment.xml to all nodes and restart the HPCC platform on all nodes.\\n\\nThe machines section of environment.xml when configured contains the actual IP address of the nodes which are displayed in ECLWatch.\\n\\nMy suggestion to get around this in the future is to allow your node with esp to have 2 network devices, one bridged and one host only. All other nodes should have a host only device. Then you would configure the system to use the host only network. This will prevent any issues if the public IP address controlled by DHCP is changed as esp will listen on all devices for connections.\\n\\n-Philip\", \"post_time\": \"2012-06-20 10:33:40\" },\n\t{ \"post_id\": 1800, \"topic_id\": 400, \"forum_id\": 20, \"post_subject\": \"Bridged Networking and DHCP\", \"username\": \"gkrasnow\", \"post_text\": \"If I install into a VM which is using Bridged Networking and my the IP address which I get through DHCP may periodically change, which files will need to be updated to show the new IP address? I find that when my IP address changes, any thor slave processes showup in ECL Watch with the old IP address even though the thor master process shows the correct IP address. Thanks.\", \"post_time\": \"2012-06-19 16:31:28\" },\n\t{ \"post_id\": 1879, \"topic_id\": 414, \"forum_id\": 20, \"post_subject\": \"Re: mydafilesrv and mydali failing to start\", \"username\": \"pschwartz\", \"post_text\": \"[quote="michael.krumlauf@lexisnexis.com":1h34wbll]Upon rebooting my VM image the mydafilesrv and mydali processes are failing to start.\\n\\nAlso I just noticed that vami-sfcbd fails the status check.\\n\\nAny ideas?\\n\\nThis has been a commonly seen issue when between reboots VMWare has had an issue trying to reconnect previous network settings. \\n\\nThe only options to correct it are to do one of the following:\\n\\n1. Reboot the vm a few times until VMWare allows the network connection (sometimes helps to change the network type to host-only).\\n\\n2. Destroy and recreate the VM from a fresh download of the image.\", \"post_time\": \"2012-07-02 12:41:53\" },\n\t{ \"post_id\": 1868, \"topic_id\": 414, \"forum_id\": 20, \"post_subject\": \"mydafilesrv and mydali failing to start\", \"username\": \"michael.krumlauf@lexisnexis.com\", \"post_text\": \"Upon rebooting my VM image the mydafilesrv and mydali processes are failing to start.\\n\\nAlso I just noticed that vami-sfcbd fails the status check.\\n\\nAny ideas?\", \"post_time\": \"2012-06-29 18:27:48\" },\n\t{ \"post_id\": 1889, \"topic_id\": 415, \"forum_id\": 20, \"post_subject\": \"Re: Too many daliadmin log files\", \"username\": \"pschwartz\", \"post_text\": \"[quote="michael-mason":rbrh3py1]We have the VM image running, and I kept getting 'No space left on device' errors even when 'df -h' showed that we had plenty of space left. It turns out that 'df -i' showed that we'd used 100% of our allowed inodes. While looking around to see which directory was the culprit, I found that /var/log/HPCCSystems/daliadmin had over 1 million files in it. The files are owned by 'hpcc', and I don't have hpcc or root's password. I don't know what version of the VM we're running, but I do know that we downloaded and installed it about 1 month ago. How can I figure out this info if you guys need it to troubleshoot this problem? \\n\\nTwo questions:\\n\\nA. How can I delete some of these files? Do you make the password for hpcc available? I've looked and only found the password for hpccdemo.\\nB. What settings can I adjust so that this doesn't happen again?\\n\\nThanks,\\n\\nThe hpccdemo user has full sudo rights. You can use the sudo command to do the deletes or do `sudo su hpcc` to get an hpcc shell.\\n\\n-Philip\", \"post_time\": \"2012-07-03 12:58:31\" },\n\t{ \"post_id\": 1886, \"topic_id\": 415, \"forum_id\": 20, \"post_subject\": \"Too many daliadmin log files\", \"username\": \"michael-mason\", \"post_text\": \"We have the VM image running, and I kept getting 'No space left on device' errors even when 'df -h' showed that we had plenty of space left. It turns out that 'df -i' showed that we'd used 100% of our allowed inodes. While looking around to see which directory was the culprit, I found that /var/log/HPCCSystems/daliadmin had over 1 million files in it. The files are owned by 'hpcc', and I don't have hpcc or root's password. I don't know what version of the VM we're running, but I do know that we downloaded and installed it about 1 month ago. How can I figure out this info if you guys need it to troubleshoot this problem? \\n\\nTwo questions:\\n\\nA. How can I delete some of these files? Do you make the password for hpcc available? I've looked and only found the password for hpccdemo.\\nB. What settings can I adjust so that this doesn't happen again?\\n\\nThanks,\", \"post_time\": \"2012-07-02 21:21:01\" },\n\t{ \"post_id\": 1920, \"topic_id\": 420, \"forum_id\": 20, \"post_subject\": \"Re: IDE crashes on VM\", \"username\": \"HPCC Staff\", \"post_text\": \"Thank you for posting. This issue has been moved to the ECL IDE forum:\\nviewtopic.php?t=422&p=1918#p1918\", \"post_time\": \"2012-07-09 12:32:01\" },\n\t{ \"post_id\": 1914, \"topic_id\": 420, \"forum_id\": 20, \"post_subject\": \"Re: IDE crashes on VM\", \"username\": \"eric.scott\", \"post_text\": \"Turns out this is an IDE problem rather than a VM issue. I have the same problem when I point the IDE at an instance deployed on Amazon Web Services.\", \"post_time\": \"2012-07-07 13:48:23\" },\n\t{ \"post_id\": 1904, \"topic_id\": 420, \"forum_id\": 20, \"post_subject\": \"IDE crashes on VM\", \"username\": \"eric.scott\", \"post_text\": \"The system my team is using is going down for maintenance, and I'm hoping to use the VM version in the meantime. I used the VM version several months ago without problem.\\n\\nNow when I run the IDE against the VM, I get a crash. Message: 'A crash has been detected by BugTrap'.\\n\\nI've uninstalled/reinstalled my previous system (currently 3.6.2.3). The VMWare player is 4.0.2. I also deleted the Application Data/HPCCSystems folder.\\n\\nAny help appreciated.\", \"post_time\": \"2012-07-06 00:56:32\" },\n\t{ \"post_id\": 2069, \"topic_id\": 445, \"forum_id\": 20, \"post_subject\": \"Re: Accessing internet data\", \"username\": \"mrudul\", \"post_text\": \"Thanks Bob, that was helpful. I have been going through various documentation that's available on the site.\\n\\nI am trying to do a small proof of concept and have a question on the SOAPCALL.\\n\\nI have some data in data files which I will load in THOR and build queries to fetch the data. I also want to invoke a SOAP service (external service on the internet) to fetch another set of data and then process the same.\\n\\nI was reading about the SOAPCALL and wanted to know if this function works only with SOAP service or does it also work with XML over HTTP. \\n\\nThank you once again for replying to the post.\", \"post_time\": \"2012-07-25 01:52:28\" },\n\t{ \"post_id\": 2048, \"topic_id\": 445, \"forum_id\": 20, \"post_subject\": \"Re: Accessing internet data\", \"username\": \"bforeman\", \"post_text\": \"ECL has powerful language tools that support a variety of parsing options. If the data extracted from the internet is in XML format, you can spray and parse XML directly. If the internet data is raw text, you can use PARSE for free form text parsing. Any file can be sprayed as a variable length file, and then parsing applied to it using ECL. To quote the Language Reference Manual:\\n\\nNatural Language Parsing is accomplished in ECL by combining pattern definitions with an output RECORD structure specifically designed to receive the parsed values, then using the PARSE function to perform the operation.\\nPattern definitions are used to detect "interesting" text within the data. Just as with all other attribute definitions, these patterns typically define specific parsing elements and may be combined to form more complex patterns, tokens,\\nand rules.\\nThe output RECORD structure (or TRANSFORM function) defines the format of the resulting recordset. It typically contains specific pattern matching functions that return the "interesting" text, its length or position.\\nThe PARSE function implements the parsing operation. It returns a recordset that may then be post-processed as needed using standard ECL syntax, or simply output.
\\n\\nThere are a number of resources on this site to help get you started. \\n\\nhttp://hpccsystems.com/download/docs/six-degrees%20\\nShows how to parse and format an IMDB movie file.\\n\\nhttp://hpccsystems.com/Why-HPCC/case-studies/engauge-pinterest\\nLinks to an article where a partner of ours uses ECL to process sentiment data extracted from twitter\\n\\nhttp://hpccsystems.com/download/docs/machine-learning\\nThe Machine Learning libraries also have a section on document parsing if you are interested.\\n\\nFinally, refer to the Language Reference and the PARSE statement for some great examples, and also review the section on PATTERN, RULE and TOKEN. \\n\\nHope this helps!\\n\\nBob\", \"post_time\": \"2012-07-23 15:34:41\" },\n\t{ \"post_id\": 2040, \"topic_id\": 445, \"forum_id\": 20, \"post_subject\": \"Accessing internet data\", \"username\": \"mrudul\", \"post_text\": \"I have completed HPCC data tutorial. I have gone through the HPCC documentation (HPCCDataHandling.pdf) and understand that HPCC works with data files.\\n\\nI want to know if there is way a to configure HPCC to work or to access internet data.\", \"post_time\": \"2012-07-21 00:17:26\" },\n\t{ \"post_id\": 4841, \"topic_id\": 538, \"forum_id\": 20, \"post_subject\": \"Re: Is there any way to increase disk space in the VM Image?\", \"username\": \"BrianB644\", \"post_text\": \"If the HPCC Systems VM isn't configured appropriately for your needs, I think the first thing to consider would be to rebuild a VM or system that was better suited to your needs. It seems to be fairly straight-forward to do so, and the resulting system will probably save time, effort, and frustration in the long run.\\n\\nThat being said, I recently found myself in a situation where I had a short-term need and circumstances conspired so that the desktops we needed to run on were isolated and couldn't get access to the resources required to create a reconfigured VM.\\n\\nIn the end what was easy and safe to configure was the following. I loaded the VM-ware tools in the VM, and defined a shared folder on the VM Host System with sub-directories for my dropzone, sprayed files, and result files. Within the VM Guest ... I verified the shared folder was working correctly, and I used symbolic links (man 'ln -s') to link the directories into the HPCC file system. Using this configuration, I was able to: 1) copy files into the shared folder on the host and have them "appear" on the landing zone (no VM disk space is used), 2) spray the files normally and have the results written to the shared folder on the host (no VM disk space used), and 3) have the result files written to the shared folder on the host (no VM disk space used). Spill and temporary files still used the VM disk ... which I imagined to be faster than writing the files externally.\\n\\nUsing this configuration we were able to write and test quite a bit of code before our full system was available. We also did some full file data analysis, and join trials. When our full hardware was configured, we were able to hit the ground running.\\n\\nFYI - in our case, we also updated the VM to add a CPU and bumped memory a bit to better leverage the resources we had available.\\n\\nBrian\", \"post_time\": \"2013-10-29 12:00:18\" },\n\t{ \"post_id\": 2455, \"topic_id\": 538, \"forum_id\": 20, \"post_subject\": \"Re: Is there any way to increase disk space in the VM Image?\", \"username\": \"bforeman\", \"post_text\": \"Hi,\\n\\nThe HPCC VM Edition runs on a single node, has a limit of 20 GB in it workspace, and doesn't support custom configurations.\\n\\nThat said, you can still use the HPCC VM Image to deeply evaluate ECL and ETL on a single node. My advice is that if you are running low on disk space, try filtering the output to a smaller recordset result. Anything larger, you should probably build a test cluster of 3 or 4 nodes. If you have an Amazon Web Services account, you can test larger datasets using the one-click THOR cluster for a nominal fee.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-09-28 12:51:34\" },\n\t{ \"post_id\": 2454, \"topic_id\": 538, \"forum_id\": 20, \"post_subject\": \"Is there any way to increase disk space in the VM Image?\", \"username\": \"dsun\", \"post_text\": \"Hi,\\n\\nSince it will produce much more data(output and temp) after running the ecl code, there is no enough space in the disk, is it possible that we can increase the disk space?\\n\\nThanks,\\nDongliang\", \"post_time\": \"2012-09-28 12:42:33\" },\n\t{ \"post_id\": 4534, \"topic_id\": 1014, \"forum_id\": 20, \"post_subject\": \"Re: Import of VM image to player fails\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"Thanks Bob. \\n\\nIt worked this time when I continued installation after the message but then, I had to reinstall VMware Player too. Else, it would stop abruptly after the message.\", \"post_time\": \"2013-09-05 04:25:04\" },\n\t{ \"post_id\": 4533, \"topic_id\": 1014, \"forum_id\": 20, \"post_subject\": \"Re: Import of VM image to player fails\", \"username\": \"bforeman\", \"post_text\": \"I've also seen that message in Windows XP, but I simply continue and everything gets installed OK. But I don't think it's a Windows 8 issue.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-09-04 19:30:39\" },\n\t{ \"post_id\": 4520, \"topic_id\": 1014, \"forum_id\": 20, \"post_subject\": \"Import of VM image to player fails\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"When I try to import the latest VM image HPCCSystemsVM-4.0.0-9.ova, it fails with the following error message:\\n\\n[color=#000080:3etexmo1]The import failed because <folderpath>\\\\HPCCSystemsVM-4.0.0-9.ova did not pass OVF specification conformance or virtual hardware compliance checks.\\n\\nClick Retry to relax OVF specification and virtual hardware compliance checks and try the import again, or click Cancel to cancel the import. If you retry the import, you might not be able to use the virtual machine in VMware Player.\\n\\nThe earlier 3.x versions have worked for me with no hassles. The only difference I note is that I recently moved to Windows 8 OS. Could that be a problem?\", \"post_time\": \"2013-09-03 11:44:41\" },\n\t{ \"post_id\": 4840, \"topic_id\": 1073, \"forum_id\": 20, \"post_subject\": \"Re: Queued jobs time out after 600 seconds\", \"username\": \"BrianB644\", \"post_text\": \"Dan,\\n\\nThanks for the follow-up post. With your explanation,I better understand the best practice for administering the configuration. It makes sense.\\n\\nThank You,\\n\\nBrian\", \"post_time\": \"2013-10-29 10:51:26\" },\n\t{ \"post_id\": 4767, \"topic_id\": 1073, \"forum_id\": 20, \"post_subject\": \"Re: Queued jobs time out after 600 seconds\", \"username\": \"DSC\", \"post_text\": \"Your experience may be different than mine, but I learned that it is usually not a good idea to muck around with the environment.xml file outside of the configmgr interface. I've brought a whole lot of pain into my life, goofing around with the configuration that way.\\n\\nconfigmgr uses environment files in /etc/HPCCSystems/source as a scratch pad. When you edit a configuration, that is the directory in which the configuration is stored. To make a configuration live, you copy it up to /etc/HPCCSystems/environment.xml on the local system and then duplicate it on all nodes in the cluster. Personally, I keep source/environment.xml a mirror of /etc/HPCCSystems/environment.xml, but retain a bunch of other configurations within that source subdirectory as backups or whatever. Once I've edited the source/environment.xml through configmgr, I use a variation of this command to push the file to all my nodes, including the local system:\\n\\nsudo -u hpcc /opt/HPCCSystems/sbin/hpcc-push.sh /etc/HPCCSystems/source/environment.xml /etc/HPCCSystems
\\nThis works as long as your cluster's nodes are unchanged from one configuration to another, as it uses the current environment.xml to determine the IP addresses of all the nodes. So if you're trying to add a node, you'll have to propagate the environment.xml file a little differently. The push script works well when you're just experimenting, though.\\n\\nJust throwing out something to consider.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-10-14 18:04:42\" },\n\t{ \"post_id\": 4765, \"topic_id\": 1073, \"forum_id\": 20, \"post_subject\": \"Re: Queued jobs time out after 600 seconds\", \"username\": \"BrianB644\", \"post_text\": \"DSC ... Thanks! ... Once I figured out how to change the parameter it fixed my problem.\\n\\nUnfortunately, I had already found the thorConnectTimeout parameter earlier in the day and attempted to modify the value by changing the .xml file under myeclagent ... not knowing better. Of course it didn't work and I just assumed it was because it was the wrong parameter ... and ended up writing my original post.\\n\\nBecause of your suggestion, I went back and checked and discovered the error of my ways. I now know that the source of all configuration knowledge is in the environment.xml file and I also now know where to find it ... /etc/HPCCSystems (don't be fooled by the copy in the sub-directory named 'source') ... Thanks Again!\\n\\nAs a side note, I've generated a lot of code in the past, and I would suggest that everyone benefits when generated files contains a comment saying that it was generated ... and perhaps a pointer concerning what it was generated from. Such a comment would have shortened my current work day by several hours.\\n\\nCheers,\\n\\nBrian B.\", \"post_time\": \"2013-10-14 12:45:28\" },\n\t{ \"post_id\": 4764, \"topic_id\": 1073, \"forum_id\": 20, \"post_subject\": \"Re: Queued jobs time out after 600 seconds\", \"username\": \"DSC\", \"post_text\": \"I believe that behavior is governed by the 'thorConnectTimeout' Option configuration parameter within the EclAgent component. I don't know if it's possible to disable the timeout, but you should be able to at least set it to a very high value.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-10-14 10:25:43\" },\n\t{ \"post_id\": 4763, \"topic_id\": 1073, \"forum_id\": 20, \"post_subject\": \"Queued jobs time out after 600 seconds\", \"username\": \"BrianB644\", \"post_text\": \"For the moment, I need to use the VM Image to do real work while our full system is being set up. In the VM environment, my jobs are going to take a while to run, and I'd like to queue up a series of them to run sequentially ... so that I can go home overnight and return in the morning to find a few jobs done and the system beavering away on yet another job. However, jobs waiting on the queue time out after 10 minutes with a message like this ...\\n\\n 0: System error: 0: Query W201310xx-xxxxxx failed to start within specified timelimit (600) seconds.\\n\\nWhere can I configure or disable this timeout?\\n\\nThanks for your help in advance,\\n\\nBrian B.\\n\\nThere is probably a better forum for this question, but I am experiencing this issue in the VM Image environment.\", \"post_time\": \"2013-10-14 08:26:51\" },\n\t{ \"post_id\": 5245, \"topic_id\": 1202, \"forum_id\": 20, \"post_subject\": \"Re: Can I access a virtual machine from my LAN\", \"username\": \"flavio\", \"post_text\": \"The IP address needs to be assigned to the guest Linux OS. A command that adds or lists an IP address is "ifconfig". Alternatively, "ip addr list" will show the existing IP addresses in that guest Linux OS. You will need to connect to the IP address that corresponds to the bridged interface.\\n\\nFlavio\", \"post_time\": \"2014-02-19 22:05:29\" },\n\t{ \"post_id\": 5244, \"topic_id\": 1202, \"forum_id\": 20, \"post_subject\": \"Re: Can I access a virtual machine from my LAN\", \"username\": \"BenJones\", \"post_text\": \"Computers plugged into my LAN automatically get an IP address so I guess it has DHCP. However, I still can't reach the Virtual Box from another computer. It is unclear how I would assign an IP address to the Virtual Box when Bridged is selected because there is no field to enter it.\", \"post_time\": \"2014-02-19 22:00:42\" },\n\t{ \"post_id\": 5242, \"topic_id\": 1202, \"forum_id\": 20, \"post_subject\": \"Re: Can I access a virtual machine from my LAN\", \"username\": \"flavio\", \"post_text\": \"You can change either one, but keep in mind that you will need to assign an IP address that is valid within your network (if you have a DHCP server, it may be automatically done for you).\\n\\nAfter this, you will need to configure the client tools to hit this new IP address (alternatively you could add an A record in your DNS server that assigns a name to this IP address, and use that name to access the VM).\\n\\nI hope this helps.\\n\\nFlavio\", \"post_time\": \"2014-02-19 21:02:52\" },\n\t{ \"post_id\": 5241, \"topic_id\": 1202, \"forum_id\": 20, \"post_subject\": \"Re: Can I access a virtual machine from my LAN\", \"username\": \"BenJones\", \"post_text\": \"It is not clear how to do that. There are two adapters shown under Settings in Virtual Box. One is NAT and one is Host Only. I tried selecting Bridged instead of Host Only and then the client tools couldn't find it even on the local machine. I tried Bridged instead of NAT and my other computer on the LAN still couldn't find it.\", \"post_time\": \"2014-02-19 20:51:17\" },\n\t{ \"post_id\": 5238, \"topic_id\": 1202, \"forum_id\": 20, \"post_subject\": \"Re: Can I access a virtual machine from my LAN\", \"username\": \"flavio\", \"post_text\": \"Ben, \\n\\nYou would need to set it to "bridged", but you will need to either have a DHCP server in your network or set the IP address for that virtual adapter manually.\\n\\nPlease let me know if this works for you.\\n\\nThanks,\\n\\nFlavio\", \"post_time\": \"2014-02-19 18:35:06\" },\n\t{ \"post_id\": 5215, \"topic_id\": 1202, \"forum_id\": 20, \"post_subject\": \"Can I access a virtual machine from my LAN\", \"username\": \"BenJones\", \"post_text\": \"Running the Virtual Machine makes my computer run very sluggishly. I'd like to connect to it from another computer on the same LAN but I can't see the Virtual Machine IP addresses from my other computer. The Virtual Machine (Virtual Box) has two network adapters, which the instructions say should be set to NAT and Host-only Adapter respectively. What do I need to change them to in order to be able to see my virtual machine from another computer other than the one it is running in?\", \"post_time\": \"2014-02-17 14:42:42\" },\n\t{ \"post_id\": 6109, \"topic_id\": 1400, \"forum_id\": 20, \"post_subject\": \"Re: ECL Watch not loading\", \"username\": \"micevepay\", \"post_text\": \"Just redownloaded... Works fine now. Will update if problem arises again.\", \"post_time\": \"2014-07-25 04:28:32\" },\n\t{ \"post_id\": 6108, \"topic_id\": 1400, \"forum_id\": 20, \"post_subject\": \"ECL Watch not loading\", \"username\": \"micevepay\", \"post_text\": \"Downloaded VM for 5.0.0-2. Worked fine until I tried to move files to dropzone. Everything froze. I tried to refresh but ECL Watch never came back. Stopped and restarted hpcc but to no avail. Even restarted the VM. And yes I am at the correct IP address. May be a bug.\", \"post_time\": \"2014-07-25 02:46:41\" },\n\t{ \"post_id\": 7027, \"topic_id\": 1591, \"forum_id\": 20, \"post_subject\": \"Re: Can't access vm ecl watch or ping the adresss\", \"username\": \"ming\", \"post_text\": \"I notice you set adapter1 to host-only. What is your adapter 2 setting?\\nFor some version virtualbox adapter 1 set to NAT is necessary. \\n\\nYou can try to set NAT on adapter 1 and host-only on adapter 2 as our HPCC VM build settings.\\n\\nActually you must set adapter 2 since in /etc/HPCCSystems/environment.conf we set network adapter for eth1. If you want to use adapter 1 you need at least change the seeting to eth0 in environment.conf\", \"post_time\": \"2015-02-26 13:32:30\" },\n\t{ \"post_id\": 7024, \"topic_id\": 1591, \"forum_id\": 20, \"post_subject\": \"Re: Can't access vm ecl watch or ping the adresss\", \"username\": \"billwright2\", \"post_text\": \"Still having this issue - \\nFirewall turned off\\nVirus checker off.\\nProper setting for Host Only adapters\\nstill cannot ping the 192.68.56.101 address nor able to access ECL watch at 192.168.56.101:8010\\nHave made sure DHCP is active on VM host\", \"post_time\": \"2015-02-26 02:05:35\" },\n\t{ \"post_id\": 6943, \"topic_id\": 1591, \"forum_id\": 20, \"post_subject\": \"Re: Can't access vm ecl watch or ping the adresss\", \"username\": \"billwright2\", \"post_text\": \"Please note that I have installed this on about 8 other machines without any issues such as this. It is puzzling.\", \"post_time\": \"2015-02-11 00:04:36\" },\n\t{ \"post_id\": 6942, \"topic_id\": 1591, \"forum_id\": 20, \"post_subject\": \"Re: Can't access vm ecl watch or ping the adresss\", \"username\": \"billwright2\", \"post_text\": \"sudo iptables --list returns results \\n\\nifconfig eth1 generates an error (see screens).\\n\\nI cannot ping 192.168.56.1 or 192.168,56.100 to the VM machine.\\nI can ping other devices on my router (printers, another machine) on the 192.168.xx.xxx mask.\", \"post_time\": \"2015-02-11 00:03:10\" },\n\t{ \"post_id\": 6941, \"topic_id\": 1591, \"forum_id\": 20, \"post_subject\": \"Re: Can't access vm ecl watch or ping the adresss\", \"username\": \"ming\", \"post_text\": \"Can you display "sudo iptables --list" and "ifconfig eth1" from the VM?\\nAlso can you ping 192.168.56.1 or 192.168.56.100 from your host system?\\nI still think either your host firewall or VM firewall block the traffic.\\n\\nAlso make sure when run ipconfig (assume on Windows) you can see \\n"VisualBox Host-Only Network:". For example,\\nEthernet adapter VirtualBox Host-Only Network:\\n\\n Connection-specific DNS Suffix . :\\n Link-local IPv6 Address . . . . . : fe80::9460:accd:5344:1513%18\\n IPv4 Address. . . . . . . . . . . : 192.168.56.25\\n Subnet Mask . . . . . . . . . . . : 255.255.255.0\\n Default Gateway . . . . . . . . . :\\n\\nOtherwise you need to fix the network at Windows (troubleshoot network,etc)\", \"post_time\": \"2015-02-10 23:05:25\" },\n\t{ \"post_id\": 6940, \"topic_id\": 1591, \"forum_id\": 20, \"post_subject\": \"Re: Can't access vm ecl watch or ping the adresss\", \"username\": \"billwright2\", \"post_text\": \"Thanks - I checked and the DHCP is checked already.\\n\\nI next tried setting adapter 2 to Bridged. That did not work either.\", \"post_time\": \"2015-02-10 22:46:49\" },\n\t{ \"post_id\": 6936, \"topic_id\": 1591, \"forum_id\": 20, \"post_subject\": \"Re: Can't access vm ecl watch or ping the adresss\", \"username\": \"ming\", \"post_text\": \"The setting is not for each VM instance but for virtualbox.[attachment=0:jvxrs7iv]vm_preference.png\", \"post_time\": \"2015-02-10 13:29:09\" },\n\t{ \"post_id\": 6934, \"topic_id\": 1591, \"forum_id\": 20, \"post_subject\": \"Re: Can't access vm ecl watch or ping the adresss\", \"username\": \"billwright2\", \"post_text\": \"No DHCP tab under those settings, if I am looking in the right place.\", \"post_time\": \"2015-02-10 00:26:28\" },\n\t{ \"post_id\": 6932, \"topic_id\": 1591, \"forum_id\": 20, \"post_subject\": \"Re: Can't access vm ecl watch or ping the adresss\", \"username\": \"ming\", \"post_text\": \"Is the "DHCP Server" enabled? It is in "Preferences" -> "Network"-> "Host-only Networks" -> select the "Host-only Network" device you are using (Usually should be vbooxnet0)-> press the "Edit" -> check "DHCP Server" tab.\\n\\nAlternatively you can try "Bridged Adapter" for "Adapter 2" if possible.\", \"post_time\": \"2015-02-09 21:42:02\" },\n\t{ \"post_id\": 6931, \"topic_id\": 1591, \"forum_id\": 20, \"post_subject\": \"Re: Can't access vm ecl watch or ping the adresss\", \"username\": \"billwright2\", \"post_text\": \"No, this is when I am totally 'clean' - no VPN.\\n\\nSo far I have turned off everything:\\nWindows Firewall\\nAvast Anti-virus\\nNetgear router firewall.\\nAlso, running the following from the command line on the linux side confirms that all services are running (sudo service hpcc-init status) See hpcc_status.jpg\", \"post_time\": \"2015-02-09 21:04:08\" },\n\t{ \"post_id\": 6930, \"topic_id\": 1591, \"forum_id\": 20, \"post_subject\": \"Re: Can't access vm ecl watch or ping the adresss\", \"username\": \"bforeman\", \"post_text\": \"yes, that's it! You can't run the HPCC VM while another VPN is connected.\", \"post_time\": \"2015-02-09 20:18:46\" },\n\t{ \"post_id\": 6929, \"topic_id\": 1591, \"forum_id\": 20, \"post_subject\": \"Re: Can't access vm ecl watch or ping the adresss\", \"username\": \"aintnomyth\", \"post_text\": \"I have this same issue when I'm running Cisco VPN. I see the VM guest running but I cannot connect to the ECL watch page unless I disconnect from the VPN.\\n\\nAre you on a VPN?\", \"post_time\": \"2015-02-09 20:12:38\" },\n\t{ \"post_id\": 6919, \"topic_id\": 1591, \"forum_id\": 20, \"post_subject\": \"Re: Can't access vm ecl watch or ping the adresss\", \"username\": \"bforeman\", \"post_text\": \"Hi Bill,\\n\\nIn the VM window, login using the hpccdemo username and password, then check the hpcc-init status to see if the cluster is actually running. I believe you would use:\\n\\nsudo service hpcc-init status\\n\\nAlso, is this the first time you installed on this machine? The IP address might be out of sync (incorrect).\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-02-09 13:53:16\" },\n\t{ \"post_id\": 6918, \"topic_id\": 1591, \"forum_id\": 20, \"post_subject\": \"Can't access vm ecl watch or ping the adresss\", \"username\": \"billwright2\", \"post_text\": \"Even though I have tried turning off the Windows firewall and avast anti-virus, I cannot access ecl watch on the virtual machine nor ping the address.\\nI have attached an image hppc_2.jpg) of the adapter settings (the same across all 4 adapters).\\nThe other attached image (hppc_4.jpg) shows the vm address and the ping attempts.\\nThis is all on my Windows 7, AMD 6 core 64 bit machine.\", \"post_time\": \"2015-02-07 15:53:56\" },\n\t{ \"post_id\": 18813, \"topic_id\": 4743, \"forum_id\": 20, \"post_subject\": \"Roxie pool memory exhausted\", \"username\": \"chuajo01\", \"post_text\": \"Hi,\\n\\nI sometimes get the "Pool memory exhausted" error below when running a Roxie query on my HPCC demo VM (amd64, v.6.2.14):\\n\\nresult:odgvizpulsepocgetshoppingrateaggregates_02Response:Results:Exception:Array(1):0\\nCode:1301\\nMessage:\\n"Pool memory exhausted: pool id 4194325 exhausted, requested 1 heap(406/4294967295) global(4096/4096)"\\nSource:"Roxie"
\\n\\nIt seems to happen when my Node/Electron app makes multiple parallel queries. My workaround at the moment is to queue the queries and limit the hits to roxie.\\n\\nI have tried adding and enabling a reasonably large swap partition, but that didn't seem to have helped. There was plenty of memory available (the datasets I'm working with are not very large) but Roxie doesn't seem to utilise them.\\n\\nI was advised there are some Roxie configuration parameters which could be adjusted, but I'm not sure which one would be pertinent:\\n\\ndefaultMemoryLimit: Maximum amount of memory available for row data in any single active query (if not overridden)\\ntotalMemoryLimit: Maximum amount of memory available for row data in all active queries \\nheapUseHugePages Allow roxie to use memory from huge pages if they have been configured. heapUseTransparentHugePages Allow roxie to use memory from transparent huge pages. \\nheapRetainMemory Retain and do not return unused memory to the operating system.
\\n\\nAny advice would be much appreciated.\\n\\nThanks,\\nJoey\", \"post_time\": \"2017-09-14 17:28:21\" },\n\t{ \"post_id\": 3995, \"topic_id\": 671, \"forum_id\": 21, \"post_subject\": \"Re: Migration tools\", \"username\": \"swapna\", \"post_text\": \"Hi Miller,\\n\\nWe are also working in migration from Mainframe to HPCC.\\nCould you share the PERL Script for converting COBOL(copy book)definition to ecl record layout? which help us a lot.\\n\\nThanks,\\nSwapna\", \"post_time\": \"2013-04-26 14:21:44\" },\n\t{ \"post_id\": 3076, \"topic_id\": 671, \"forum_id\": 21, \"post_subject\": \"Re: Migration tools\", \"username\": \"jeeves\", \"post_text\": \"Thanks Flavio and Miller. This really helped!\", \"post_time\": \"2013-01-09 06:32:46\" },\n\t{ \"post_id\": 3075, \"topic_id\": 671, \"forum_id\": 21, \"post_subject\": \"Re: Migration tools\", \"username\": \"miller\", \"post_text\": \"I did a significant amount of COBOL to ECL. The only thing I automated was creating ECL record definitions from COBOL definitions. I did this with some PERL and not very well (written to the coding standards used in the COBOL I was porting). The records were used for porting data from the COBOL system to the ECL system. \\n\\nA proper port involves understanding the COBOL and what it accomplishes, then laying it out in ECL. The procedural COBOL does not translate directly into ECL.\", \"post_time\": \"2013-01-08 21:34:41\" },\n\t{ \"post_id\": 3068, \"topic_id\": 671, \"forum_id\": 21, \"post_subject\": \"Re: Migration tools\", \"username\": \"flavio\", \"post_text\": \"David,\\n\\nThere are no automated code migration tools from COBOL or JCL, that I know of. In the majority of the cases (and every situation we encountered) is significantly more efficient to re-implement the logic in ways that it can be more optimal, concise, readable and easier to maintain, rather than trying to copy the code semi-verbatim. \\n\\nSeveral people in our team have participated in migrating applications and code from Mainframes to HPCC. The best people to speak about their personal experiences are, possibly, David Miller and John Holt.\\n\\nLet's see if they can chime in and tell us a bit about their insights.\\n\\nThanks,\\n\\nFlavio\", \"post_time\": \"2013-01-08 13:17:15\" },\n\t{ \"post_id\": 3066, \"topic_id\": 671, \"forum_id\": 21, \"post_subject\": \"Migration tools\", \"username\": \"jeeves\", \"post_text\": \"Hi,\\n\\n1. Are there any tools to generate ECL code from COBOL or JCL?\\n\\n2. If anyone here has been part of a migration from mainframe to HPCC and has some insights to share it will be greatly appreciated.\\n\\nThanks,\\n-David\", \"post_time\": \"2013-01-08 10:21:39\" },\n\t{ \"post_id\": 3149, \"topic_id\": 681, \"forum_id\": 21, \"post_subject\": \"Re: Scheduling jobs\", \"username\": \"sunilatCTS\", \"post_text\": \"Thank you bforeman. The details provided will be really useful for any migration projects. Good to know that the document on scheduling is due for release very soon. Eagerly waiting for it.\\n\\nThanks you.\\nSunil\", \"post_time\": \"2013-01-23 08:47:30\" },\n\t{ \"post_id\": 3112, \"topic_id\": 681, \"forum_id\": 21, \"post_subject\": \"Re: Scheduling jobs\", \"username\": \"bforeman\", \"post_text\": \"Hi Sunil,\\n\\nThe answer is YES!, HPCC and ECL indeed support job scheduling. Some ECL developers live by it \\n\\nThe ECL Scheduler is a component process installed with the HPCC system platform. It is typically started up with the platform. An interface to the scheduler is available through ECL Watch. The ECL Scheduler interface allows you to see a\\nlist of scheduled workunits. It can also trigger an event. An Event is a case-insensitive string constant naming the event to trap.\\n\\nA command line tool is available on the server installed in /opt/HPCCSystems/bin.\\n\\nECL Scheduling\\nECL Scheduling provides a means of automating processes within ECL code or to chain processes together to work in sequence.\\n\\nFor example, you can write ECL code that watches a landing zone for the arrival of a file, and when it arrives, sprays it to Thor, processes it, builds an index, and then adds it to a superfile.\\n\\nHow it Works\\nECL Scheduling is event-based. The ECL Scheduler monitors a Schedule list containing registered Workunits and Events and executes any Workunits associated with an Event when that Event is triggered.\\n\\nYou write ECL Code that will execute when an Event is triggered. You can also write code to trigger an Event.\\n\\nSubmit code containing a WHEN clause and the Event and Workunit is registered in the Schedule list. When that Event is triggered, the Workunit is compiled and executed. If the Workunit is completed, ECL Scheduler removes it from the Schedule list.\\n\\nFor example, if you submit a Workunit using WHEN('Event1','MyEvent', COUNT(2)) in the appropriate place, it will execute twice (The value of COUNT) before the ECL Scheduler removes it from the Schedule list and the Workunit is marked as completed.\\n\\nI believe that more documentation regarding the ECL Scheduler is due for release very soon. See the Language Reference manual for WHEN, NOTIFY, EVENT, and CRON.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-01-16 16:55:41\" },\n\t{ \"post_id\": 3108, \"topic_id\": 681, \"forum_id\": 21, \"post_subject\": \"Scheduling jobs\", \"username\": \"sunilatCTS\", \"post_text\": \"Hi, \\n\\nI come from AS/400 background. Mainframe guys might also have a similar question regarding scheduling jobs. \\n\\n1. Lets say i have a COBOL program that is basically a report and at the end of it, it calls a job, how do i replicate this in HPCC. I would write a ELC program similar to the report but how do i invoke the mainframe job from the HPCC environment or through ECL ?\\n\\n2. Overnight jobs have a lot of reports running in mainframe. These jobs are scheduled using Control M scheduler. How can i replicate this in HPCC ? How can i schedule an ECL job to run, say at 1 am ?\\n\\nThe answers for these questions or an alternate approach would help us a lot to know what can and what cannot be done when taking up migration projects, dealing with either mainframe or AS/400.\\n\\nThanks,\\nSunil\", \"post_time\": \"2013-01-16 10:35:26\" },\n\t{ \"post_id\": 275, \"topic_id\": 90, \"forum_id\": 22, \"post_subject\": \"Re: Monolithic server HPCC/VM\", \"username\": \"dabayliss\", \"post_text\": \"I don't know of anyone -quite- having done that. We did do some extensive testing a couple of years back with a 3 node HPCC against a 3 node Oracle cluster; we were 9.something x faster (I remember AE complaining that we hadn't hit 10x)\\n\\nThe 'default' Linux install is single machine - I have it running on 6 CPU & 4CPU machines. It defaults to a single node thor and roxie - that CAN have advantages (no skew etc) - although we have sometimes found that running multiple thor 'nodes' on a single machine (with multiple CPUs) can provide some lift too.\\n\\nOne thing you don't mention is your disks. Depending upon what you are doing Roxie can do quite a few disk hits - if you only have one or two disks then you will need to be a little careful in how you lay out your roxie keys - not a problem - but you need to plan.\\n\\nThe only other thing I can think of is that (obviously) only having one machine rather limits your redundancy - you will need some other way to keep your data backed up - and you will need SLAs that work within the limits of the machine.\", \"post_time\": \"2011-08-12 17:44:57\" },\n\t{ \"post_id\": 274, \"topic_id\": 90, \"forum_id\": 22, \"post_subject\": \"Re: Monolithic server HPCC/VM\", \"username\": \"HPCC Staff\", \"post_text\": \"Hello, thanks for the post. We have moved it to this new category based on your input. Thank you!\", \"post_time\": \"2011-08-12 17:31:44\" },\n\t{ \"post_id\": 270, \"topic_id\": 90, \"forum_id\": 22, \"post_subject\": \"Monolithic server HPCC/VM\", \"username\": \"aintnomyth\", \"post_text\": \"Sorry, this question fits into the Migration area but it is not related to Hadoop.\\n\\nWe are looking to convert our Oracle system to HPCC. Our business handles dozens of clients locally but we have remote deployments for several clients too. Some of the remote clients already run Oracle, but some purchased servers specifically to run our Oracle-based solutions.\\n\\nI'm trying to formulate a plan for the existing remote clients, hopefully one that involves re-using their existing hardware to run HPCC.\\n\\nI realize HPCC is designed to be clustered but has anyone performed a head-to-head comparison between a DBMS and HPCC, where HPCC is running on the same single-server hardware as the DBMS? Something like 16-24 cores and 64+GB RAM.\\n\\nIf not, does the VM have any built-in restrictions that would prevent me from testing this myself?\\n\\nThanks\", \"post_time\": \"2011-08-12 14:46:57\" },\n\t{ \"post_id\": 4221, \"topic_id\": 105, \"forum_id\": 22, \"post_subject\": \"Re: SQL to ECL tool?\", \"username\": \"rodrigo.pastrana@lexisnexis.com\", \"post_text\": \"HPCC currently provides a JDBC Driver which supports a specific subset of SQL. More information can be found here: http://hpccsystems.com/products-and-ser ... DBC-Driver.\\n\\nThere is a related ODBC driver planned, but please note that the ODBC driver will only support a specific subset of SQL (much like the JDBC driver). Progress is ongoing and status is reflected in the roadmap document: http://cdn.hpccsystems.com/pdf/HPCC_Project_Roadmap.pdf \\n\\n\\nthanks.\", \"post_time\": \"2013-06-18 15:04:49\" },\n\t{ \"post_id\": 4220, \"topic_id\": 105, \"forum_id\": 22, \"post_subject\": \"Re: SQL to ECL tool?\", \"username\": \"rtaylor\", \"post_text\": \"Praveen\\n\\nIt's in our JDBC driver: http://hpccsystems.com/products-and-services/products/plugins/JDBC-Driver\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-06-18 14:55:07\" },\n\t{ \"post_id\": 4215, \"topic_id\": 105, \"forum_id\": 22, \"post_subject\": \"Re: SQL to ECL tool?\", \"username\": \"Praveen\", \"post_text\": \"Any update on SQL to ECL?\", \"post_time\": \"2013-06-18 11:08:55\" },\n\t{ \"post_id\": 316, \"topic_id\": 105, \"forum_id\": 22, \"post_subject\": \"Re: SQL to ECL tool?\", \"username\": \"HPCC Staff\", \"post_text\": \"A read-only SQL interface for our Roxie delivery system is planned as a future extension (module) to our Enterprise Edition. Unfortunately there isn't a release date yet.\", \"post_time\": \"2011-09-02 20:14:41\" },\n\t{ \"post_id\": 312, \"topic_id\": 105, \"forum_id\": 22, \"post_subject\": \"SQL to ECL tool?\", \"username\": \"aintnomyth\", \"post_text\": \"I believe I saw a SQL to ECL converter tool on the list of features for Enterprise Edition, where can I find more information about it?\", \"post_time\": \"2011-09-02 15:57:22\" },\n\t{ \"post_id\": 5763, \"topic_id\": 1327, \"forum_id\": 22, \"post_subject\": \"Re: SQL "LIKE"\", \"username\": \"rtaylor\", \"post_text\": \"househippo,\\n\\nThis previous thread: http://hpccsystems.com/bb/viewtopic.php?f=8&t=1193&p=5185&hilit=+LIKE&sid=45bef542c61fec074e57dfc7f991132c&sid=45bef542c61fec074e57dfc7f991132c#p5185 also addresses the issue of how to do an SQL LIKE.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-05-28 14:37:31\" },\n\t{ \"post_id\": 5754, \"topic_id\": 1327, \"forum_id\": 22, \"post_subject\": \"Re: SQL "LIKE"\", \"username\": \"DSC\", \"post_text\": \"Std.Str.Contains() would be another candidate to check out. There are a few others in the string library that may meet your needs as well, such as WildMatch().\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2014-05-28 10:31:13\" },\n\t{ \"post_id\": 5753, \"topic_id\": 1327, \"forum_id\": 22, \"post_subject\": \"Re: SQL "LIKE"\", \"username\": \"househippo\", \"post_text\": \"Dan thanks for the response.\\nRegular expression can be expensive process in terms of CPU and memory when the string are large.\\nIs there other tools to use besides REGEXFIND() on these large strings when they are MB or GB size?\\nEX.\\n
Select * FROM `table` WHERE `html` LIKE "%iPad2%"
\", \"post_time\": \"2014-05-28 06:46:59\" },\n\t{ \"post_id\": 5751, \"topic_id\": 1327, \"forum_id\": 22, \"post_subject\": \"Re: SQL "LIKE"\", \"username\": \"DSC\", \"post_text\": \"I just realized that my response was a little specific, focusing on testing the end of a string. Check out the REGEXFIND() built-in function. It would be used in a filter, like the Std.Str.EndsWith() above, and would provide SQL LIKE-like functionality (and more).\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2014-05-27 21:03:17\" },\n\t{ \"post_id\": 5746, \"topic_id\": 1327, \"forum_id\": 22, \"post_subject\": \"Re: SQL "LIKE"\", \"username\": \"DSC\", \"post_text\": \"There is a standard library function that you can use as a recordset filter to do this:\\n\\nIMPORT Std;\\n\\nDataRec := RECORD\\n STRING email;\\nEND;\\n\\nmyTable := DATASET\\n (\\n [\\n {'one_address@yahoo.com'},\\n {'second_address@gmail.com'},\\n {'third_address@hotmail.com'}\\n ],\\n DataRec\\n );\\n\\nr := myTable(Std.Str.EndsWith(email, '@gmail.com'));\\n\\nOUTPUT(r);
\\nCheers,\\n\\nDan\", \"post_time\": \"2014-05-27 12:49:56\" },\n\t{ \"post_id\": 5745, \"topic_id\": 1327, \"forum_id\": 22, \"post_subject\": \"SQL "LIKE"\", \"username\": \"househippo\", \"post_text\": \"The wiki give some great examples of basic SQL to ECL code samples. What would be the ECL equivalent to SQL: \\n\\n SELECT * FROM `table` WHERE `email` LIKE "%@gmail.com"
\", \"post_time\": \"2014-05-27 08:43:46\" },\n\t{ \"post_id\": 2273, \"topic_id\": 181, \"forum_id\": 23, \"post_subject\": \"Re: My impression from reviewing HPCC ML capabilities\", \"username\": \"HPCC Staff\", \"post_text\": \"From an earlier response from dabayliss:\\nWe have had PMML suggested before. At the moment we have not written any code to assist that. HOWEVER - all of our models have a meta-data specification - so if someone wants to write the code - there is nothing stopping them.
\\n\\nThank you\", \"post_time\": \"2012-08-30 20:52:16\" },\n\t{ \"post_id\": 2252, \"topic_id\": 181, \"forum_id\": 23, \"post_subject\": \"Re: My impression from reviewing HPCC ML capabilities\", \"username\": \"Durai\", \"post_text\": \"Hi, \\n\\nI am interested in writing a export to PMML from a model. (for now)\\nBut unsure on where to start. it would be very helpful if you could provide some pointers around. \\n\\nThanks\\nDurai\", \"post_time\": \"2012-08-23 13:51:35\" },\n\t{ \"post_id\": 1057, \"topic_id\": 181, \"forum_id\": 23, \"post_subject\": \"Re: My impression from reviewing HPCC ML capabilities\", \"username\": \"Jeff\", \"post_text\": \"Good point! I will look at the code on GitHub. \", \"post_time\": \"2012-02-08 16:24:35\" },\n\t{ \"post_id\": 1055, \"topic_id\": 181, \"forum_id\": 23, \"post_subject\": \"Re: My impression from reviewing HPCC ML capabilities\", \"username\": \"dabayliss\", \"post_text\": \"We have had PMML suggested before. At the moment we have not written any code to assist that. HOWEVER - all of our models have a meta-data specification - so if someone wants to write the code - there is nothing stopping them
\\n\\nWe have not started any SVM implementation yet. Our sister company (USLM) is in the process of porting an SVM implementation to HPCC - so once that is complete I am hoping to snaffle it ...\", \"post_time\": \"2012-02-08 12:22:40\" },\n\t{ \"post_id\": 1054, \"topic_id\": 181, \"forum_id\": 23, \"post_subject\": \"Re: My impression from reviewing HPCC ML capabilities\", \"username\": \"Jeff\", \"post_text\": \"I was super excited to hear about these new machine learning features. I'm from Oklahoma City and we have a couple of text classification processes currently in production. I've often thought to myself that ML and HPCC would be a great fit together. I'm really looking forward to reading the docs and getting my hands dirty. By the way, nice touch with Stanford's robot in the docs! I love that image.\\n\\nFlavio's questions got me thinking about additional features that would enhance and build on these capabilities. The first thing that crossed my mind was a vendor-independent model export feature to facilitate the exchange of predictive models between HPCC and other applications. This is a perfect use case for an industry standard like the Predictive Model Markup Language (PMML).\\n\\nWe use the Support Vector Machine algorithm in OKC for our text classification products because of the extremely high dimensionality. It has worked very well for us. Has there been any discussion or thought given to how SVM might be implemented within the distributed HPCC environment?\\n\\nAgain, I can't say how glad I am that these two worlds finally converged. Thanks for all your hard work making it happen!\\n\\nJeff\", \"post_time\": \"2012-02-08 07:04:00\" },\n\t{ \"post_id\": 690, \"topic_id\": 181, \"forum_id\": 23, \"post_subject\": \"Re: My impression from reviewing HPCC ML capabilities\", \"username\": \"dabayliss\", \"post_text\": \"The classify module currently has perceptrons; essentially a single layer forward learning neural 'net'; next step is to build up the multi-layer. One particularly interesting twist is that I am using an online learning approach - partly just to demonstrate how to do it - partly because it gives people a possible Roxie approach.\\n\\nI suspect that when I eventually dust of my textbooks and go for the backwards learning mechanism I will want to use a batch learning system - it is a far more natural way to use massive parallelism. I have also been rolling restricted Boltzman machines around my mind; I have a hunch that they might be a very good front end for a more general neural net in the case of highly dimensions input.\\n\\nAll the above said; we are currently trying to start closing down towards a first public release. Therefore I am trying -not- to think about all that exciting cool stuff so I can focus on the hum-drum of rounding out the interfaces and getting some documentation together!\\n\\nDavid\", \"post_time\": \"2011-12-02 14:18:53\" },\n\t{ \"post_id\": 689, \"topic_id\": 181, \"forum_id\": 23, \"post_subject\": \"My impression from reviewing HPCC ML capabilities\", \"username\": \"flavio\", \"post_text\": \"This is less of a question and more of a possible start of a discussion around our current and future capabilities in the Machine Learning area. I have been recently testing some of HPCC functionality around Machine Learning and I can truly say that I'm impressed!\\n\\nUsed to Matlab/Octave but not being an ECL expert myself didn't hamper my ability to use regression, classifiers and clustering, at all. Data structures are very intuitive (at least for people familiar with matrix representation in other systems), and very little (if any) of the ECL glue is required to use this functionality. What is also very impressive is the fact that all algorithms are fully distributed.\\n\\nI know that we are currently working on supporting some of these functions from within the graphical workflow design interface in Pentaho Kettle/Spoon, so I don't see how it could get any easier
\\n\\nI would personally like to see more in the neural networks area (including forward and back propagation and temporal difference learning) and integration with our graphing capabilities from our web services interfaces.\\n\\nWhat do others think about this? Which areas would you like to see worked on next? What are HPCC ML developers working on at the moment?\\n\\nThanks,\\n\\nFlavio\", \"post_time\": \"2011-12-02 14:00:53\" },\n\t{ \"post_id\": 1114, \"topic_id\": 265, \"forum_id\": 23, \"post_subject\": \"Re: K-Means,AggloN Custering Methods\", \"username\": \"flavio\", \"post_text\": \"K-means and AggloN algorithms collectively known as Clustering, which are part of the Unsupervised Learning set of machine learning techniques.\\n\\nBoth will allow to automatically group different elements of an item set based on certain characteristics, and identify "clusters" of related items.\\n\\nIn the case of k-means, for example, you pass a set of items with a number of features (or "dimensions") and a set of tentative centroids (usually randomly selected to match some of the items). After running several iterations, the algorithm will converge to a local minimum, which represents the positions of these centroids that minimize the distance to the items in the set. \\n\\nSince the convergence is to a local minimum and selecting the number of initial centroids can be tricky, it's not uncommon to execute several runs with different number of centroids and random startup locations, and calculate the convergence using certain measurements (F-measure, Dunn Index, etc.), using the "elbow rule" to identify the most efficient number of starting centroids.\\n\\nAgglomerative clustering (AggloN) uses a hierarchical agglomerative model but it's otherwise similar in principle.\\n\\nFlavio\", \"post_time\": \"2012-02-14 17:56:08\" },\n\t{ \"post_id\": 1109, \"topic_id\": 265, \"forum_id\": 23, \"post_subject\": \"K-Means,AggloN Custering Methods\", \"username\": \"Bhagwant\", \"post_text\": \"What next after creating these custers?\\nWhat is the use of these Custers?\\nHow can i get the prediction for a particular data?\", \"post_time\": \"2012-02-14 15:20:31\" },\n\t{ \"post_id\": 2115, \"topic_id\": 266, \"forum_id\": 23, \"post_subject\": \"Re: Machine Learning\", \"username\": \"pablo07\", \"post_text\": \"It's strongly depend to your data. \\nAccording to the number of instance, the dimension of data, the type of data, or the number of classes you must adapt your classifier to obtain the best results.\\n\\n\\n---\\nSmart Me Up, the machine learning company\\n---\", \"post_time\": \"2012-07-31 07:46:29\" },\n\t{ \"post_id\": 1113, \"topic_id\": 266, \"forum_id\": 23, \"post_subject\": \"Re: Machine Learning\", \"username\": \"flavio\", \"post_text\": \"There are different ways that you can classify data according to certain groups or categories. \\n\\nAssuming that you have a items that have already been classified, you could use the features of these items to train a classifier, either by creating a Naives Bayes model, or by calculating the coefficients of a logistic regression classifier.\\n\\nAfter that, you can run any new items through these models, for the items to be automatically classified (1 if >= 0.5, 0 otherwise).\\n\\nIn the particular case of logistic regression, a single model will only allow to separate two categories. If you need to handle more than two categories and plan on using logistic regression, you'll need to use an "all vs. all" approach, essentially using as many models as independent categories, and selecting the outcome that ranks the highest for that item.\\n\\nThese techniques are also known as Supervised Learning.\\n\\nIf, instead, you have a set of items and you want to automatically group them based on (currently unknown) commonalities, you will need to use a different technique, for example clustering (k-means clustering is also part of the ML library).\\n\\nIf you are looking for "anomalies" in a set, you'll need to use yet a different technique (univariate or multi-variate Gaussian distributions, for example, perhaps combined with Principal Component Analysis).\\n\\nPlease let me know if this helps.\\n\\nThanks,\\n\\nFlavio\", \"post_time\": \"2012-02-14 17:43:39\" },\n\t{ \"post_id\": 1110, \"topic_id\": 266, \"forum_id\": 23, \"post_subject\": \"Machine Learning\", \"username\": \"Bhagwant\", \"post_text\": \"What is actual power of Machine Learning?\\nIf i want to my data to be classified according to my groups? Which algorithm should i use? why?\", \"post_time\": \"2012-02-14 15:23:31\" },\n\t{ \"post_id\": 1510, \"topic_id\": 311, \"forum_id\": 23, \"post_subject\": \"Re: HPCC Systems ML library in action\", \"username\": \"dabayliss\", \"post_text\": \"We are currently working with the graphlab people to try to come up with a fair benchmark across a range of single and multi-node ML systems\\n\\nKeren Ouaknine is spearheading the work from our side ....\\n\\nDavid\", \"post_time\": \"2012-04-12 23:35:22\" },\n\t{ \"post_id\": 1508, \"topic_id\": 311, \"forum_id\": 23, \"post_subject\": \"Re: HPCC Systems ML library in action\", \"username\": \"flavio\", \"post_text\": \"I agree! Please take a look at PaperBoat and let me know if we can work together on a fair benchmark between ECL-ML/PaperBoat and Mahout...\\n\\nFlavio\", \"post_time\": \"2012-04-12 18:07:56\" },\n\t{ \"post_id\": 1507, \"topic_id\": 311, \"forum_id\": 23, \"post_subject\": \"Re: HPCC Systems ML library in action\", \"username\": \"szhou\", \"post_text\": \"The numerical calculation performance with python and matlab could be bad if they did not use c libraries internally. Another fair comparison for hpcc/ML could be against java/hadoop/mahout. The integrating paperboat with ecl is interesting.\\n\\nThanks,\\n\\nShujia\", \"post_time\": \"2012-04-12 18:03:57\" },\n\t{ \"post_id\": 1506, \"topic_id\": 311, \"forum_id\": 23, \"post_subject\": \"Re: HPCC Systems ML library in action\", \"username\": \"flavio\", \"post_text\": \"Szhou,\\n\\nI'm not familiar with the implementations that you mention below, but I did run some comparisons with Matlab/Octave (and also some code that I wrote in Python) and our current ECL implementation was faster, even for reasonably small data.\\n\\nIn addition to our ECL-ML libraries, we have some current alpha/beta state HPCC integration with Ismion's PaperBoat ML library (http://ismion.com/documentation/paperboat/introduction.html) in case you want to check it out too: http://ismion.com/documentation/ecl-pb/index.html.\\n\\nPlease let me know.\\n\\nThanks,\\n\\nFlavio\", \"post_time\": \"2012-04-12 17:50:34\" },\n\t{ \"post_id\": 1489, \"topic_id\": 311, \"forum_id\": 23, \"post_subject\": \"Re: HPCC Systems ML library in action\", \"username\": \"szhou\", \"post_text\": \"Hi,\\n\\nLooking at the source of hpcc/ML/k-means, I wonder how long for you to implement it? Have you compared the absolute performance against the corresponding serial c code and/or c with mpi code?\", \"post_time\": \"2012-04-11 16:57:10\" },\n\t{ \"post_id\": 1486, \"topic_id\": 311, \"forum_id\": 23, \"post_subject\": \"Re: HPCC Systems ML library in action\", \"username\": \"dabayliss\", \"post_text\": \"To be clear - the performance scales linearly with the number of NODES - so 10x as many nodes = 10x faster.\\n\\nThe algorithm itself is NOT linear. If you look at the different distance metrics we support; some of them are significantly lower than the standard kN (especially for sparse data).\\n\\nNote: we have decision tree support on the master branch - it is not yet in our official release ...\\n\\nDavid\", \"post_time\": \"2012-04-11 15:31:37\" },\n\t{ \"post_id\": 1484, \"topic_id\": 311, \"forum_id\": 23, \"post_subject\": \"Re: HPCC Systems ML library in action\", \"username\": \"arjuna chala\", \"post_text\": \"Szhou,\\n\\nSource code is available here - https://github.com/hpcc-systems/ecl-ml.\\n\\nThanks\\n\\nArjuna\", \"post_time\": \"2012-04-11 14:54:56\" },\n\t{ \"post_id\": 1483, \"topic_id\": 311, \"forum_id\": 23, \"post_subject\": \"Re: HPCC Systems ML library in action\", \"username\": \"szhou\", \"post_text\": \"Hi, Flavio:\\n\\nTypically we will use k-means and decision tree for analysis. If HPCC ML has a linear scalability for them, that would be great. Do you have the source codes for HPCC ML/k-means? I am curious how it is implemented and how it achieves a linear scalability.\\n\\nThanks,\\n\\n\\nSJZ\", \"post_time\": \"2012-04-11 14:39:48\" },\n\t{ \"post_id\": 1482, \"topic_id\": 311, \"forum_id\": 23, \"post_subject\": \"Re: HPCC Systems ML library in action\", \"username\": \"flavio\", \"post_text\": \"Szhou,\\n\\nI don't have personally much experience with SAS in large scale analytics, and I don't even know if it performs properly (or at all) in distributed computing platforms.\\n\\nWe did benchmark scalability on ECL-ML, and it does scale linearly with the number of nodes in the cluster, achieving almost perfect parallelism.\\n\\nDo you have any specific algorithms in mind? If so, and if you can provide some base cases, we could help you run some benchmarks too.\\n\\nThanks,\\n\\nFlavio\", \"post_time\": \"2012-04-11 13:32:17\" },\n\t{ \"post_id\": 1477, \"topic_id\": 311, \"forum_id\": 23, \"post_subject\": \"Re: HPCC Systems ML library in action\", \"username\": \"szhou\", \"post_text\": \"Is there any performance and scalability about this ML library? In general, how HPCC systems with ML compares with SAS's? Is there any solid performance and feature comparison?\\n\\nThanks,\\n\\n\\nsjz\", \"post_time\": \"2012-04-10 20:28:21\" },\n\t{ \"post_id\": 1335, \"topic_id\": 311, \"forum_id\": 23, \"post_subject\": \"HPCC Systems ML library in action\", \"username\": \"HPCC Staff\", \"post_text\": \"Below is a link to a Pinterest POV conducted by Engauge. They used the HPCC Systems platform, ECL to process the data for analysis and several ML methods to generate results. \\n\\nCheck it out!\\nhttp://hpccsystems.com/Why-HPCC/case-st ... -pinterest\", \"post_time\": \"2012-03-13 18:19:26\" },\n\t{ \"post_id\": 1487, \"topic_id\": 334, \"forum_id\": 23, \"post_subject\": \"Re: How to create a matrix based on a CSV file?\", \"username\": \"dabayliss\", \"post_text\": \"Good news - your data is already in the right format! If you look in the Types attribute of the MAT sub-directory you will find the matrix format. \\n\\n
\\nEXPORT Element := RECORD \\n t_Index x; // X is rows\\n t_Index y; // Y is columns\\t\\n t_value value;\\nEND;
\\n\\nYou should be able to read your data in directly to that format using the CSV attribute on a DATASET statement.\\n\\n[quote="jhr1021":2n6lhys4]Hello, \\n\\nI'd like to read a matrix from a CSV file. \\n\\nCSV format\\nrow, col, value => \\n1, 2, 10\\n1, 3, 14\\n...\\n\\nHowever, I don't know how to define a record for the file and convert it to matrix format used in ML. \\n\\nThank you.\", \"post_time\": \"2012-04-11 15:35:12\" },\n\t{ \"post_id\": 1481, \"topic_id\": 334, \"forum_id\": 23, \"post_subject\": \"Re: How to create a matrix based on a CSV file?\", \"username\": \"flavio\", \"post_text\": \"jhr,\\n\\nthere are two ECL macros, which will do the conversion to and from the internal matrix format representation for you.\\n\\nFor example, with an inline dataset, if you wanted to create a matrix with 4 columns and a REAL in each cell, you could define a record layout like the one below,where the first column contains a row ID, and each subsequent column contains a value (I use generic column names for clarity in this example, but you should probably use better mnemonics there):\\n\\nMyRecordLayout := RECORD\\n UNSIGNED RowId;\\n REAL column1;\\n REAL column2;\\n REAL column3;\\n REAL column4;\\nEND;\\n\\nAnd then use that record layout for your dataset, as I do below with this inline dataset definition (you could be loading the CSV file from the filesystem, if you wanted):\\n\\nX2 := DATASET([\\t\\n{1, 1, 5, 2.4, 5.2},\\n{2, 5, 7, 9.7, 1.4},\\n{3, 8, 1, 3.3, 6.1},\\n{4, 5, 2, 9.5, 3.2},\\n{5, 9, 3, 8.9, 1.7},\\n{6, 1, 4, 1.1, 2.8},\\n{7, 9, 4, 2.4, 6.8}], MyRecordLayout);\\n\\nAnd last, but not least, you can call ml.ToField() to convert your dataset into the internal matrix representation format (the first parameter is the input dataset name and the second is the output dataset in the internal matrix format):\\n\\nml.ToField(X2,fX2);\\n\\nAfter processing, you can convert back from the internal format to your original record layout using ml.FromField():\\n\\nml.FromField(fX3.result(), MyRecordLayout, X3);\\n\\nI hope this example is clear, but please chime in if you need more help.\\n\\nThanks,\\n\\nFlavio\", \"post_time\": \"2012-04-11 13:26:34\" },\n\t{ \"post_id\": 1479, \"topic_id\": 334, \"forum_id\": 23, \"post_subject\": \"How to create a matrix based on a CSV file?\", \"username\": \"jhr1021\", \"post_text\": \"Hello, \\n\\nI'd like to read a matrix from a CSV file. \\n\\nCSV format\\nrow, col, value => \\n1, 2, 10\\n1, 3, 14\\n...\\n\\nHowever, I don't know how to define a record for the file and convert it to matrix format used in ML. \\n\\nThank you.\", \"post_time\": \"2012-04-11 01:25:49\" },\n\t{ \"post_id\": 1648, \"topic_id\": 364, \"forum_id\": 23, \"post_subject\": \"Re: solution suggestion on a use case\", \"username\": \"flavio\", \"post_text\": \"Szhou,\\n\\nYou are correct. If you only have a few cases and/or training examples, depending on the method that decide to use, you don't need much scalability. However, since you can still run HPCC in a VM or a single node, there may not be any drawback in using HPCC either, and you could reuse your code if/when your number of cases grows by one or two orders of magnitude (keep in mind that the exact same ECL code that you use in a one-node HPCC system will run in 10 or 100 nodes, with the same efficiency). \\n\\nThe other element to consider, also with respect to scalability, is that some of these algorithms (clustering is one of them) have a quadratic big "O" computational complexity (O(n^2)), so if the number of cases grows tenfold, the total theoretical computing time would be 100 times higher. In this case, you may want to have a solution that will scale accordingly.\\n\\nOn a related topic, the original response to your post led me into writing this short blog entry, which you may find useful too: http://hpccsystems.com/blog/short-guide ... s-platform.\\n\\nThanks,\\n\\nFlavio\", \"post_time\": \"2012-05-24 12:22:50\" },\n\t{ \"post_id\": 1642, \"topic_id\": 364, \"forum_id\": 23, \"post_subject\": \"Re: solution suggestion on a use case\", \"username\": \"szhou\", \"post_text\": \"Flavio:\\n\\n\\nGood extensive discussions. Since those clustering techniques are also available outside hpcc, what is the advantage of using hpcc? (If the number of samples are less than 10000, I assume that there is no need to use the parallel computers.)\\n\\nsjz\", \"post_time\": \"2012-05-23 20:56:55\" },\n\t{ \"post_id\": 1641, \"topic_id\": 364, \"forum_id\": 23, \"post_subject\": \"Re: solution suggestion on a use case\", \"username\": \"bforeman\", \"post_text\": \"For an extended blog on this topic:\\n\\nhttp://hpccsystems.com/blog/short-guide-anomaly-detection-hpcc-systems-platform\\n\\nGreat content Flavio!!\\n\\nBob\", \"post_time\": \"2012-05-23 17:26:38\" },\n\t{ \"post_id\": 1634, \"topic_id\": 364, \"forum_id\": 23, \"post_subject\": \"Re: solution suggestion on a use case\", \"username\": \"arjuna chala\", \"post_text\": \"PCA is now supported by the HPCC ECL-ML library.\\n\\nThanks\\n\\nArjuna\", \"post_time\": \"2012-05-23 13:49:16\" },\n\t{ \"post_id\": 1629, \"topic_id\": 364, \"forum_id\": 23, \"post_subject\": \"Re: solution suggestion on a use case\", \"username\": \"flavio\", \"post_text\": \"Szhou,\\n\\nIn order to answer your question, I would probably need a little bit more context. It seems to me that what you are trying to accomplish is some sort of anomaly detection (a particular trader has anomalous activity, which could be defined as activity which doesn't correlate well with what other traders do). \\n\\nThere are multiple machine learning techniques that could be used, although either semi-supervised learning using some variation of PCA and/or multi-gaussian analysis, and unsupervised learning using clustering, are the most common.\\n\\nOne way to tackle this problem on HPCC is by using an unsupervised machine learning method known as agglomerative hierarchical clustering, which should work with this type of multi-dimensional data, to identify points which don't agglomerate well after a number of iterations (looking for isolated points and/or belonging to small clusters). This would require our ECL-ML modules. http://hpccsystems.com/ml\\n\\nAnother option would be to leverage our Paperboat integration and use Paperboat's PCA implementation to identify the primary components of the vast majority of normal cases, and identify outliers that don't correlate properly. This would require our Paperboat integration. http://ismion.com/documentation/ecl-pb/index.html\\n\\nOf course, if you have a way to properly define which particular behavior is indicative of this anomalous activity, you could also build a rules based system, but knowing all possible rules tends to be a feat on itself.\\n\\nPlease let me know if you need any help.\\n\\nThanks,\\n\\nFlavio\\n\\n[quote="szhou":2x3feazh]Hi,\\n\\nI am looking for solving a problem with hpcc/ecl/ML. Here is the rough description of the problem:\\n\\none trader set a lot of buy orders around the target stock to artificially increase the trading volume. How to quickly identify this trader with one-day, -week, -month, -year transaction records. (one day transaction record can be 30 (messages)x one million transactions.)\\n\\nIs hpcc/ecl sufficient to do it? Is ML/cluster needed? What is the advantage of using hpcc?\\n\\nThanks,\\n\\n\\nsjz\", \"post_time\": \"2012-05-21 20:00:56\" },\n\t{ \"post_id\": 1628, \"topic_id\": 364, \"forum_id\": 23, \"post_subject\": \"solution suggestion on a use case\", \"username\": \"szhou\", \"post_text\": \"Hi,\\n\\nI am looking for solving a problem with hpcc/ecl/ML. Here is the rough description of the problem:\\n\\none trader set a lot of buy orders around the target stock to artificially increase the trading volume. How to quickly identify this trader with one-day, -week, -month, -year transaction records. (one day transaction record can be 30 (messages)x one million transactions.)\\n\\nIs hpcc/ecl sufficient to do it? Is ML/cluster needed? What is the advantage of using hpcc?\\n\\nThanks,\\n\\n\\nsjz\", \"post_time\": \"2012-05-21 17:41:26\" },\n\t{ \"post_id\": 1671, \"topic_id\": 371, \"forum_id\": 23, \"post_subject\": \"Re: Incomplete Subgrams Table?\", \"username\": \"david.wheelock@lexisnexis.com\", \"post_text\": \"Yes, I have been aware of this bug. The fix will be included in an update to the CoLocation module that I will be pushing to git in the near future.\\n\\nIf you have a pressing need for this function in the meantime, the fix is a simple one -- just add a "LEFT OUTER" to the JOIN embedded in the RETURN statement.\\n\\n- David W.\", \"post_time\": \"2012-05-30 13:33:11\" },\n\t{ \"post_id\": 1670, \"topic_id\": 371, \"forum_id\": 23, \"post_subject\": \"Re: Incomplete Subgrams Table?\", \"username\": \"bforeman\", \"post_text\": \"Thank you for your post, I will pass this on to the ML development team for comment.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-05-30 12:31:24\" },\n\t{ \"post_id\": 1669, \"topic_id\": 371, \"forum_id\": 23, \"post_subject\": \"Incomplete Subgrams Table?\", \"username\": \"cfgilbert\", \"post_text\": \"The Subgrams function in ML.Docs.Colocation produces a table for n-grams where n = max-n instead of producing a table for all n-grams where n > 1 as specified in the ML-Library documentation.\", \"post_time\": \"2012-05-29 21:56:13\" },\n\t{ \"post_id\": 1717, \"topic_id\": 378, \"forum_id\": 23, \"post_subject\": \"ML-Library Function Reference\", \"username\": \"chargil\", \"post_text\": \"Has been posted in the ecl-ml git repo wiki here.\\n\\nThe reference page covers most functions in the library (pretty much all the useful ones).\", \"post_time\": \"2012-06-05 21:39:37\" },\n\t{ \"post_id\": 1815, \"topic_id\": 397, \"forum_id\": 23, \"post_subject\": \"Re: A machine learning problem - what method to select\", \"username\": \"nvasil\", \"post_text\": \"Ok let's see another thing. Do you really care to know about the rules or you care about the accuracy of the prediction?\\nIf you care about the rules then you should look at inductive logic programming, which is a super category of constructive induction. These are difficult problems and hard to scale. The one I prefer is the Markov Logic Networks.\\nIf you don't care then let's start with the simplest model:\\nUnfold X into a vector \\\\hat{X} (I think it 60000 dimensions). Then Run an L1 or L2 regularized Support Vector machine. If ECL-ML does not support it, use vopal wabbit (http://hunch.net/~vw/). \\nIf the accuracy is not acceptable then you should try interaction terms. That means you should augment \\\\hat{X} with \\\\hat{X}_i\\\\hat{X}_j terms and then run linear SVM. Now for 60000 terms in \\\\hat{X} will result in 36e+8 elements which is huge. You can use a greedy algorithm to pick them. There is an approach to this using Genetic Algorithms (http://www.amazon.com/Adaptive-Learning ... l+networks)\\n\\nAnother way to deal with the nonlinearities is to use Deep Belief networks/Restricted Boltzman Machines (or in general deep learning networks).\\nAn excellent tutorial can be found here http://www.iro.umontreal.ca/~bengioy/pa ... l_book.pdf\\n\\nI hope I was helpful \\n\\nNick\", \"post_time\": \"2012-06-20 14:52:02\" },\n\t{ \"post_id\": 1808, \"topic_id\": 397, \"forum_id\": 23, \"post_subject\": \"Re: A machine learning problem - what method to select\", \"username\": \"tomas\", \"post_text\": \"Hello\\n\\nYep I was not very clear. When I want to build a decision tree I first need to select attributes (features) before the being able to run a decision tree building algorithm.\\n\\nYou are right the basic thing to do would be to search the neighborhood around the variable that I am trying to predict (I am doing that right now using Weka). However it seems to me that there are other dependencies in the problem not necessarily in the neighborhood. The problem is I don't know exactly what they are.\\n\\nHaving done some research on the internet, I guess that I am dealing with a problem that is called "constructive induction". It seems to me that I could use the Multifactor dimensionality reduction algorithm that is part of the Weka GC package. \\n\\nDo you have any experience with this? Do you think it would work?\\n\\n(To give a little background, the two arrays are solutions -linear relaxation, discrete - of a complicated real world model that takes quite some time to solve using exact mathematical optimization methods. On the other hand a human expert is capable of finding a solution through his understanding of the underlying relations of the model elements that are not necessarily well described in the model. Unfortunately I don't really have sufficient access to pick the brain of the expert so I would like to discover some rules that could be implemented in a heuristic solution algorithm that works at least part of the time...)\\n\\nThanks a LOT!\", \"post_time\": \"2012-06-20 08:22:40\" },\n\t{ \"post_id\": 1803, \"topic_id\": 397, \"forum_id\": 23, \"post_subject\": \"Re: A machine learning problem - what method to select\", \"username\": \"nvasil\", \"post_text\": \"Help me a little bit here:\\n\\nWhat do you mean " I need to select attributes before running"?\\nIt seems to me that you are trying to predict \\nY[i,j,k]=\\\\sum_{n=-N}^{N}\\\\sum_{m=-M}^{m=M}\\\\sum_{l=-L}^{l=L} f(X[i-n, j-m, k-l])\\n\\nand your problem is that you have to choose N,M,L and also you have no clue about f. In theory f can be any nonlinear mapping such as SVM, Neural Network, nonparametric regression etc. It seems to me that your problem is how to choose the neighborhood around i,j,k. Am I right?\\n\\nNick\", \"post_time\": \"2012-06-19 19:15:45\" },\n\t{ \"post_id\": 1792, \"topic_id\": 397, \"forum_id\": 23, \"post_subject\": \"A machine learning problem - what method to select\", \"username\": \"tomas\", \"post_text\": \"Hello everybody\\n\\nI would like to solve a machine learning problem and I am looking for some advice on what method to use(took some ML classes in college a while ago but have not done much since).\\n\\nI have the following problem, I have to arrays :\\n1) X(i,j,k) with values between 0 and 1, including fractional values\\n2) Y(i,k,k) with discreet values 0 and 1\\n\\ni ~= 1..100,j ~= 1..30, k ~= 1..20\\n(these arays represent the linear relaxation (X array) and near optimal binary solution (Y) of a difficult real world problem)\\n\\nI am convinced that there is some structure in this problem that could be extracted as rules such as :\\nIF X(i-1,j,k)==1 AND X(i+1,j,k)==1 AND X(i,j,k-1)==1\\nTHEN \\nY(i,j,k)==1\\n\\nIt seems to me that these kind of rules could be extracted through building a decision tree (I have done a quick test with Weka and it seems to work). However what bothers me is that I need to select attributes before running the decision tree learning algorithm. I would like to explore a wider range of possible rules. My ideal ML algorithm would first select a suitable set of attributes and then give me the rule that I want (or do it at the same time). \\n\\nWhat algorithm would you use for such a problem?\\n\\nI have done some things with genetic algorithms, it seems to my that it might work in this case (gene = attribute, random gene generation, fitness function based on the predictive quality of the rule). Would this work?\\n\\nThx a lot\", \"post_time\": \"2012-06-18 13:04:39\" },\n\t{ \"post_id\": 1919, \"topic_id\": 419, \"forum_id\": 23, \"post_subject\": \"Re: Large scale linear regression\", \"username\": \"HPCC Staff\", \"post_text\": \"Thank you for posting! This issue and all related responses can be found in Github:\\nhttps://github.com/hpcc-systems/HPCC-Pl ... ssues/2788\", \"post_time\": \"2012-07-09 12:27:48\" },\n\t{ \"post_id\": 1906, \"topic_id\": 419, \"forum_id\": 23, \"post_subject\": \"Re: Large scale linear regression\", \"username\": \"sbagaria\", \"post_text\": \"I have also posted this as an issue on github. Sorry for the duplicate posting; I thought my github post did not work. \\n\\nhttps://github.com/hpcc-systems/HPCC-Pl ... ssues/2788\", \"post_time\": \"2012-07-06 09:32:55\" },\n\t{ \"post_id\": 1902, \"topic_id\": 419, \"forum_id\": 23, \"post_subject\": \"Large scale linear regression\", \"username\": \"sbagaria\", \"post_text\": \"On small datasets, linear regression runs fine for me although slower than anticipated but gives accurate results. But on larger datasets, I run into errors.\\n\\nThe dataset I am using is http://archive.ics.uci.edu/ml/datasets/ ... dictionMSD\\n\\nWhen running on a cluster,\\nError: System error: -1: Graph[44], SLAVE 172.20.7.11:20700: Graph[44], diskwrite[48]: sequential writes only on compressed file
\\n\\nWhen running a locally compiled executable,\\nSystem error: 0: Exceeded disk write size limit of 10737418240 while writing file ~spill::S__WLOCAL_27519
\\n\\nI understand that ECL is generating huge temporary files even for small datasets (about 10G for a 10000x90 data table). What am I doing wrong?\\n\\nThe subgraph where this failure happens is \\n\\n
IMPORT ML;\\n\\nLayout_Row := RECORD \\n\\tINTEGER Year;\\n\\tREAL Val1;\\n\\tREAL Val2;\\n\\tREAL Val3;\\n\\tREAL Val4;\\n\\tREAL Val5;\\n\\tREAL Val6;\\n\\tREAL Val7;\\n\\tREAL Val8;\\n\\tREAL Val9;\\n\\tREAL Val10;\\n\\tREAL Val11;\\n\\tREAL Val12;\\n\\tREAL Val13;\\n\\tREAL Val14;\\n\\tREAL Val15;\\n\\tREAL Val16;\\n\\tREAL Val17;\\n\\tREAL Val18;\\n\\tREAL Val19;\\n\\tREAL Val20;\\n\\tREAL Val21;\\n\\tREAL Val22;\\n\\tREAL Val23;\\n\\tREAL Val24;\\n\\tREAL Val25;\\n\\tREAL Val26;\\n\\tREAL Val27;\\n\\tREAL Val28;\\n\\tREAL Val29;\\n\\tREAL Val30;\\n\\tREAL Val31;\\n\\tREAL Val32;\\n\\tREAL Val33;\\n\\tREAL Val34;\\n\\tREAL Val35;\\n\\tREAL Val36;\\n\\tREAL Val37;\\n\\tREAL Val38;\\n\\tREAL Val39;\\n\\tREAL Val40;\\n\\tREAL Val41;\\n\\tREAL Val42;\\n\\tREAL Val43;\\n\\tREAL Val44;\\n\\tREAL Val45;\\n\\tREAL Val46;\\n\\tREAL Val47;\\n\\tREAL Val48;\\n\\tREAL Val49;\\n\\tREAL Val50;\\n\\tREAL Val51;\\n\\tREAL Val52;\\n\\tREAL Val53;\\n\\tREAL Val54;\\n\\tREAL Val55;\\n\\tREAL Val56;\\n\\tREAL Val57;\\n\\tREAL Val58;\\n\\tREAL Val59;\\n\\tREAL Val60;\\n\\tREAL Val61;\\n\\tREAL Val62;\\n\\tREAL Val63;\\n\\tREAL Val64;\\n\\tREAL Val65;\\n\\tREAL Val66;\\n\\tREAL Val67;\\n\\tREAL Val68;\\n\\tREAL Val69;\\n\\tREAL Val70;\\n\\tREAL Val71;\\n\\tREAL Val72;\\n\\tREAL Val73;\\n\\tREAL Val74;\\n\\tREAL Val75;\\n\\tREAL Val76;\\n\\tREAL Val77;\\n\\tREAL Val78;\\n\\tREAL Val79;\\n\\tREAL Val80;\\n\\tREAL Val81;\\n\\tREAL Val82;\\n\\tREAL Val83;\\n\\tREAL Val84;\\n\\tREAL Val85;\\n\\tREAL Val86;\\n\\tREAL Val87;\\n\\tREAL Val88;\\n\\tREAL Val89;\\n\\tREAL Val90;\\nEND;\\n\\nFile_MSD := \\nDATASET('~testdata::sb::msd',Layout_Row,CSV);\\n\\nLayout_Row_ID := RECORD \\n\\tINTEGER8 ID := 0;\\n\\tLayout_Row;\\nEND;\\n\\nLayout_Row_ID AddID(Layout_Row l, INTEGER c) := TRANSFORM\\n\\tSELF.ID := c;\\n\\tSELF := l;\\nEND;\\n\\nFIle_MSD_ID := PROJECT(File_MSD,AddID(LEFT,COUNTER));\\n\\nML.ToField(File_MSD_ID[1..500000],songs);\\n\\nX := songs(Number >1);\\nY := songs(Number =1);\\n\\nReg := ML.Regression.OLS(X,Y);\\nB := Reg.Beta();\\nB;\\n\\n//Reg.ModelY;\\n//Reg.Extrapolate(X,B);\\n\\nReg.RSquared;\\nReg.Anova;
\", \"post_time\": \"2012-07-05 21:43:40\" },\n\t{ \"post_id\": 5702, \"topic_id\": 425, \"forum_id\": 23, \"post_subject\": \"Re: Decision tree query\", \"username\": \"tlhumphrey2\", \"post_text\": \"It isn't ready, see https://track.hpccsystems.com/browse/ML-216.\\n\\nBut, Victor believes he knows what is causing the problem of ML-216. So, we expect that this problem will be fixed, soon.\\n\\nFurthermore, we haven't tested DecisionTree on large datasets. So, we don't know what the execution speed might be.\\n\\nTim\", \"post_time\": \"2014-05-16 12:42:21\" },\n\t{ \"post_id\": 5701, \"topic_id\": 425, \"forum_id\": 23, \"post_subject\": \"Re: Decision tree query\", \"username\": \"sunil3loq\", \"post_text\": \"Thanks Victor for sharing the link to the code to build decision tree.\\n\\nCan somebody confirm if the decision tree functions are ready to use? I am posing the question as decision tree building functions are not referred to in the ML document's classification section. That could mean that they are in process of development.\\n\\nIt would be very helpful if we get to know that they could be used without any fear of being wrong.\\n\\nSunil\", \"post_time\": \"2014-05-16 04:07:49\" },\n\t{ \"post_id\": 2773, \"topic_id\": 425, \"forum_id\": 23, \"post_subject\": \"Re: Decision tree query\", \"username\": \"Victor Herrera\", \"post_text\": \"Hello,\\nTake a look at this code from: https://github.com/hpcc-systems/ecl-ml/ ... onTree.ecl\\n\\n...\\ntrainer1:= Classify.DecisionTree.GiniImpurityBased(5, 1); \\nmodel1:= trainer1.LearnD(Indep, Dep);\\ntrainer2:= Classify.DecisionTree.C45(FALSE); // Unpruned\\nmodel2:= trainer2.LearnD(Indep, Dep);\\n\\nOUTPUT(model1, NAMED('Model1'));\\nOUTPUT(SORT(trainer1.Model(model1), level, node_id), NAMED('DecTree_1'), ALL);\\nOUTPUT(model2, NAMED('Model2'));\\nOUTPUT(SORT(trainer2.Model(model2), level, node_id), NAMED('DecTree_2'), ALL);\\n...\\n//Classifying independent test data and comparing with dependent test data \\nresults1:= trainer1.ClassifyD(indep_t, model1);\\nresults11:= Classify.Compare(dep_t, results1);\\nresults2:= trainer2.ClassifyD(indep_t, model2);results21:= Classify.Compare(dep_t, results2);\\n...\\n//Showing Results\\nOUTPUT(results11.CrossAssignments, NAMED('CrossAssig1'));\\nOUTPUT(results11.RecallByClass, NAMED('RecallByClass1'));\\nOUTPUT(results11.Accuracy, NAMED('Accur1'));\\nOUTPUT(results21.CrossAssignments, NAMED('CrossAssig2'));\\nOUTPUT(results21.RecallByClass, NAMED('RecallByClass2'));\\nOUTPUT(results21.Accuracy, NAMED('Accur2'));\\n\\nYou can find Decision Tree classifier at the end of https://github.com/hpcc-systems/ecl-ml/ ... assify.ecl\\n\\nRegards\\nVictor\", \"post_time\": \"2012-11-13 22:46:42\" },\n\t{ \"post_id\": 1968, \"topic_id\": 425, \"forum_id\": 23, \"post_subject\": \"Re: Decision tree query\", \"username\": \"HPCC Staff\", \"post_text\": \"[quote="chhaya":2dfhc5yv]hi\\n\\ndabayliss from your reply what i understood is classifier based on Decision tree is not yet developed right ?\\n\\nCorrect, it is not yet developed.\", \"post_time\": \"2012-07-13 13:03:50\" },\n\t{ \"post_id\": 1947, \"topic_id\": 425, \"forum_id\": 23, \"post_subject\": \"Re: Decision tree query\", \"username\": \"chhaya\", \"post_text\": \"Thanks Richard \", \"post_time\": \"2012-07-12 10:57:39\" },\n\t{ \"post_id\": 1946, \"topic_id\": 425, \"forum_id\": 23, \"post_subject\": \"Re: Decision tree query\", \"username\": \"chhaya\", \"post_text\": \"hi\\n\\ndabayliss from your reply what i understood is classifier based on Decision tree is not yet developed right ?\", \"post_time\": \"2012-07-12 10:51:33\" },\n\t{ \"post_id\": 1943, \"topic_id\": 425, \"forum_id\": 23, \"post_subject\": \"Re: Decision tree query\", \"username\": \"nvasil\", \"post_text\": \"For nearest neighbors look at this answer here, it might give you the intuition you need to understand decision tree.\\n\\nhttp://hpccsystems.com/bb/viewtopic.php?f=23&t=428&sid=7ae1b244e23f5cf11685dcf7dbf21291\", \"post_time\": \"2012-07-12 04:15:33\" },\n\t{ \"post_id\": 1941, \"topic_id\": 425, \"forum_id\": 23, \"post_subject\": \"Re: Decision tree query\", \"username\": \"dabayliss\", \"post_text\": \"Both of our 'Tree' pieces of code are really there to show how to handle Trees in ECL - there are two separate classifiers that can be built from them (KNN & Decision tree). The plan is to build the classifiers using the unified classifier interface in the Classify module - but that has not happened yet.\\n\\nTo answer your question more directly - the closest to what you are looking for is the Splits output - this records for each node of the tree - which variable is to be interrogated and the new node dependent upon the value of the discrete variable.\", \"post_time\": \"2012-07-11 20:36:58\" },\n\t{ \"post_id\": 1937, \"topic_id\": 425, \"forum_id\": 23, \"post_subject\": \"Re: Decision tree query\", \"username\": \"rtaylor\", \"post_text\": \"
yea i understood that but my concern is which output out of 8 is giving classification.\\n\\nIn NaiveBayes first we create model then that model is used to predict dependent of input provide how can i do same for decision tree?
Someone else will have to answer that -- I know ECL but do not have a background in ML (as I said in my first blog post ) so I'm unfamiliar with the algorithms being implemented.\\n\\nHopefully, someone more ML-knowledgeable will educate us both on this issue. \\n\\nRichard\", \"post_time\": \"2012-07-11 12:47:42\" },\n\t{ \"post_id\": 1935, \"topic_id\": 425, \"forum_id\": 23, \"post_subject\": \"Re: Decision tree query\", \"username\": \"chhaya\", \"post_text\": \"Hi,\\nyea i understood that but my concern is which output out of 8 is giving classification.\\n\\nIn NaiveBayes first we create model then that model is used to predict dependent of input provide how can i do same for decision tree?\", \"post_time\": \"2012-07-11 07:16:15\" },\n\t{ \"post_id\": 1927, \"topic_id\": 425, \"forum_id\": 23, \"post_subject\": \"Re: Decision tree query\", \"username\": \"rtaylor\", \"post_text\": \"What you're getting is actually 8 separate results from your call to ML.Trees.Decision because Decision is a MODULE structure in which there are 8 EXPORT definitions that all produce recordset results.\\n\\nIf you're interested in working with, say, splits, then your code should be:
Result := ML.Trees.Decision(RoundDS(Number<=4),RoundDS(Number=5)).splits; \\noutput(Result);
\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-07-10 17:15:15\" },\n\t{ \"post_id\": 1926, \"topic_id\": 425, \"forum_id\": 23, \"post_subject\": \"Decision tree query\", \"username\": \"chhaya\", \"post_text\": \"Hi,\\nWe are trying to explore the algorithm Decision Tree but i'm not able to understand what is the final resultSet this algorithm returns and how to use it.\\n\\nRight now i'm giving it a training DataSet as input so it returns me a resultSet,\\nnow my concern is if i want to use this trained tree on my input data to predict target value.\\n\\nHere, is the code i'm using,\\n\\n\\nIMPORT ML;\\n\\nvalue_record := RECORD\\nUNSIGNED rid;\\nINTEGER outlook; //Sunny = 1 , Overcast = 2 , Rainy = 3\\nINTEGER temperature; //where Hot = 1 , Mild = 2 , Cool = 3\\nINTEGER humidity; //where 1 = High \\n //and 0 = Normal\\nINTEGER wind; //where 1 = True, 0 = False \\nINTEGER playtennins; //where 1 = yes the conditions are good to play tennis\\n //and 0 = No the conditions are not suitable to play tennis \\nEND;\\n\\nInputDS := DATASET([{1,1,1,1,2,0},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t{2,1,1,1,1,0},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t{3,2,1,1,2,1},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t{4,3,2,1,2,1},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t{5,3,3,2,2,1},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t{6,3,3,2,1,0},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t{7,2,3,2,1,1},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t{8,1,2,1,2,0},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t{9,1,3,2,2,1},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t{10,3,2,2,2,1},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t{11,1,2,2,1,1},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t{12,2,1,2,1,1},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t{13,3,2,1,1,0}]\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t,value_record);\\n\\nOUTPUT(InputDS);\\nML.ToField(InputDS,OutDS);\\n\\nRoundDS := ML.Discretize.ByRounding(OutDS);\\n\\nResult := ML.Trees.Decision(RoundDS(Number<=4),RoundDS(Number=5)); \\noutput(Result);
\\n\\nlooking at the output i'm not able to understand classification of it what exactly its giving.\\nis format of input provided correct?\", \"post_time\": \"2012-07-10 10:53:51\" },\n\t{ \"post_id\": 2004, \"topic_id\": 428, \"forum_id\": 23, \"post_subject\": \"Re: KdTree Query\", \"username\": \"nvasil\", \"post_text\": \"Let me clarify some things\\nThe build process starts by making balanced splits up to the MedianDepth and then starts making splits according to the spans of the dimensions as I explained in a previous post. Now the Depth means that the building process will keep splitting until it reaches the Depth you set. I have already explained that before.You can identify a node from a node_id. If you read the documentation inside Tree.ecl it explains how node_id is translated. In short node_id is an integer. \\n\\nRead this to find out how the node_id is interpreted read this\\n\\n\\n/* The NodeIds within a KdTree follow a natural pattern - all the node-ids will have the same number of bits - corresponding to the\\n depth of the tree+1. The left-most will always be 1. Moving from left to right a 0 always implies taking the 'low' decision at a node\\n and a 1 corresponds to taking a 'high'. Thus an ID of 6 = 110 has been split twice; and this group is in the high then low group\\n The Splits show the number and value used to split at each point\\n*/\\n
\", \"post_time\": \"2012-07-18 14:46:48\" },\n\t{ \"post_id\": 1998, \"topic_id\": 428, \"forum_id\": 23, \"post_subject\": \"Re: KdTree Query\", \"username\": \"sapthashree\", \"post_text\": \"Hi,\\n Thanks,Now I came to know that how to give input to the kdTree but in input parameters we gave default values i.e.,t_level Depth=10,t_level MedianDepth=0 .What is that MedianDepth mean and why we need this? And also in output how do we identify the root node,internal nodes and leaf nodes? And which result is the final output?\", \"post_time\": \"2012-07-18 08:49:47\" },\n\t{ \"post_id\": 1997, \"topic_id\": 428, \"forum_id\": 23, \"post_subject\": \"Re: KdTree Query\", \"username\": \"nvasil\", \"post_text\": \"So here are the parameters a KdTree receives:\\n\\n\\n\\nKdTree(DATASET(ML.Types.NumericField) f,t_level Depth=10,t_level MedianDepth=0)
\\n\\nYour dataset must be of this type\\n\\n\\n ML.Types.NumericField\\n //this is how it is defined inside ecl-ml/ML/Types.ecl\\n EXPORT NumericField := RECORD\\n t_RecordID id;\\n\\tt_FieldNumber number;\\n\\tt_FieldReal value;\\n END;\\n
\\nThe Depth of the tree is defined by Depth which by default is 10. This should be roughly log_2(length(f))\\nbut it doesn't have to be exactly that. In fact you want it less.\\n\\nNow what do you mean by dimensionality of your tree?\\n\\nYou are expected to have N d-dimensional data points as an input. \\nLet's say we have two three dimensional points\\n[0.3 0.5 0.1] which has a unique identifier let's call it 0\\nand\\n[-0.2 -0.11 -0.4] which has a unique identifier let's call it 4\\n\\nThe unique identifiers can be any UNSIGNED8 they don't have to be contiguous. \\nNow let's see how we create a dataset out of these 2 points that ecl-ml understands \\nd=DATASET(\\n {0, 0, 0.3}, {0, 1, 0.5}, {0, 2, 0.1},\\n {4, 0, -0.2}, {4, 1, -0.11}, {4, 2, -0.4}, \\n ML.Types.NumericField);\\n\\nMake sure you understand how we did it\", \"post_time\": \"2012-07-18 06:48:20\" },\n\t{ \"post_id\": 1996, \"topic_id\": 428, \"forum_id\": 23, \"post_subject\": \"Re: KdTree Query\", \"username\": \"sapthashree\", \"post_text\": \"Hi,\\n One more thing I wanted to ask you that is it a right way of giving input to the kdTree algorithm ?\\n\\n\\nvalue_record := RECORD\\ninteger id;\\ninteger t_depth;\\ninteger m_depth;\\nEND;\\n\\nd := dataset([{1,2,1},{2,4,5},{3,9,7},{4,6,9},{5,7,1},{6,3,8},{7,7,9},{8,5,5},{9,8,8}],value_record);\\nd;\\n
\\n\\nHere the first field refers to the id,second and third field refers to the data points.How can I pass the dimention of a tree as an input.Am I missing anything in the input field?\\n\\nPlease help us in this regard...\", \"post_time\": \"2012-07-18 06:15:12\" },\n\t{ \"post_id\": 1990, \"topic_id\": 428, \"forum_id\": 23, \"post_subject\": \"Re: KdTree Query\", \"username\": \"nvasil\", \"post_text\": \"The kdtree does unbalanced splits. It finds the dimension with the highest span inside a node. Lets say the dimension spans [lo, hi] then it computes the midpoint mid=(lo+hi)/2. The points with value less than mid go to the left subtree and the others in the right. We call these splitting points as pivot points or splits. You can find them here\\n\\nEXPORT Splits := Res(id=0); // The split points used to partition each node id\\n\\n\\nThe nodes of the tree can be found here\\nEXPORT Partitioned := Res(id<>0); // The training data - all partitioned\", \"post_time\": \"2012-07-17 14:22:58\" },\n\t{ \"post_id\": 1979, \"topic_id\": 428, \"forum_id\": 23, \"post_subject\": \"Re: KdTree Query\", \"username\": \"sapthashree\", \"post_text\": \"Hi,\\n Here in KdTree algorithm as I told I'm getting 6 results as output and I would like to know which output to refer for the further analysis.Also I would like to know how it is splitting the nodes and assigning the node_id's.\", \"post_time\": \"2012-07-16 07:04:02\" },\n\t{ \"post_id\": 1949, \"topic_id\": 428, \"forum_id\": 23, \"post_subject\": \"Re: KdTree Query\", \"username\": \"nvasil\", \"post_text\": \"So as I mentioned before the algorithm just computes the kdtree not the nearest neighbors. In my previous response I tried to show how to compute an approximation of the neighbors. \\n\\nIf you see the output of the kdtree is a sorted dataset of nodes.\\nThis is what a node is\\n\\n EXPORT Node := RECORD\\n t_node node_id; // The node-id for a given point\\n t_level level; // The level for a given point\\n ML.Types.NumericField;\\n END;\\n
\\n\\nIt has a node_id \\nand a NumericField\\nThe numericField has the information of your point such as the id, the dimension and the value\\n\\nAre we ok up to here?\", \"post_time\": \"2012-07-12 13:50:19\" },\n\t{ \"post_id\": 1945, \"topic_id\": 428, \"forum_id\": 23, \"post_subject\": \"Re: KdTree Query\", \"username\": \"sapthashree\", \"post_text\": \"Hi, I know the basic concepts of KdTree, but the problem is i'm not understanding the output of this algorithm that how it partions the data points and find the nearest neighbor points for a particular given set of pionts as an input.\", \"post_time\": \"2012-07-12 07:59:03\" },\n\t{ \"post_id\": 1942, \"topic_id\": 428, \"forum_id\": 23, \"post_subject\": \"Re: KdTree Query\", \"username\": \"nvasil\", \"post_text\": \"Hi there\\n\\nFirst of all I am not sure if you are familiar with the concept of the kdtree. You can find the definition of a kd-tree here http://www.wikipedia.org/wiki/K-d_tree. In short kd-trees will partition the space where your data live in a hierarchical set of boxes. It is something like sorting your data so that most of the time data points that are next to each other are very likely to be nearest neighbors. This is a very simplified explanation but it is probably sufficient if you have no idea about them\\nHere http://www.ismion.com/documentation/paperboat/all_nearest_neighbors/nearest_neighbor_theory.html you can find the description of the algorithm that computes the nearest neighbor based on a kd-tree.\\n\\nAlthough we have not implemented the nearest neighbor algorithm yet (it is in the to do list) you can get a good estimate of the nearest neighbors for a particular point id by querying the Partitioned dataset which is exported.\\nSo first you find the node (the leaf of the tree) that contains the record_id you are looking for. All the points in that node are candidate nearest neighbors.\\n\\nHope to have answered your question. Let me know if you need more\", \"post_time\": \"2012-07-12 04:12:06\" },\n\t{ \"post_id\": 1934, \"topic_id\": 428, \"forum_id\": 23, \"post_subject\": \"KdTree Query\", \"username\": \"sapthashree\", \"post_text\": \"Hi,\\n\\nI'm trying to explore KdTree algorithm but not understanding the output of the algorithm.\\n\\nimport ML.Classify;\\nIMPORT ML,ML.Mat;\\n\\nvalue_record := RECORD\\n integer id;\\n integer t_depth;\\n integer m_depth;\\nEND;\\n\\nd := dataset([{1,2,1},{2,4,5},{3,9,7},{4,6,9},{5,7,1},{6,3,8},{7,7,9},{8,5,5},{9,8,8}],value_record);\\nd;\\n\\n//Turn into regular NumericField file (with continuous variables)\\nml.ToField(d,o);\\no;\\nm1 := ML.Trees.KdTree(o);\\noutput(m1); \\n\\n
\\n\\nSo, we would like to know about the 6 results this algorithm gives as output.\\nWhat all these results analyse individually and which output to refer for further analysis...\\n\\nPlease help us in this regards.\", \"post_time\": \"2012-07-11 06:34:24\" },\n\t{ \"post_id\": 2200, \"topic_id\": 485, \"forum_id\": 23, \"post_subject\": \"Re: Reinforcement learning for power management\", \"username\": \"flavio\", \"post_text\": \"Umairalipathan,\\n\\nUnfortunately I don't have any experience in Matlab, but I (or someone else here) could probably help you if this was done in ECL, on the HPCC systems platform.\\n\\nHave you tried posting this question in a Matlab forum, instead?\\n\\nThanks,\\n\\nFlavio\", \"post_time\": \"2012-08-09 11:25:00\" },\n\t{ \"post_id\": 2199, \"topic_id\": 485, \"forum_id\": 23, \"post_subject\": \"Re: Reinforcement learning for power management\", \"username\": \"umairalipathan\", \"post_text\": \"This simulation is being done in MATLAB. It's a long code. What I need is just a hint about the correct method.\", \"post_time\": \"2012-08-09 08:24:13\" },\n\t{ \"post_id\": 2186, \"topic_id\": 485, \"forum_id\": 23, \"post_subject\": \"Re: Reinforcement learning for power management\", \"username\": \"flavio\", \"post_text\": \"Umairalipathan,\\n\\nAre you coding this in ECL? Would you mind posting a code snippet so that I have better context?\\n\\nThanks,\\n\\nFlavio\", \"post_time\": \"2012-08-08 16:02:46\" },\n\t{ \"post_id\": 2182, \"topic_id\": 485, \"forum_id\": 23, \"post_subject\": \"Reinforcement learning for power management\", \"username\": \"umairalipathan\", \"post_text\": \"I am working on a power management problem where I control the power management of a computing board based on the occurance of events. I am using Reinforcement learning (the traditional Q-learning) for power management where the computing boards works as a Service Provider (SP) for processing requests (images). The SP is connected to a smart camera and the Power Manager (PM) algorithm runs on the camera where it issues appropriate power commands (sleep, wake-up) to the SP. The smart camera captures images (requests) based on the occurance of an event and maintains a Service Queue (SQ) for the requests (images). I also have an ANN based workload estimator that classifies the current workload as low or high. The state space for the Q-learning algorithm is therefore comprises a composite for Q(s,a) where s=(SR, SQ, SP). SR is the state of the workload. SQ is the state of the service queue and SP is the state of the service provider. Based on the current workload, state of the queue and the state of the service provider, the PM issues certain commands to the SP (sleep, wake-up). The decision is taken at the following stages:\\n\\n1. SP is idle\\n2. SP just entered the sleep state and SQ>=1\\n3. SP is in the sleep state and SQ transits from 0 to 1.\\n\\nFor each action, a cost is assigned which consists of a weighted sum of average power consumption and average latency per request caused by the action. Both the average power consumption and average latency caused by an action are assigned relative weights as follows:\\n\\nc(s,a)=lambda*p_avg + (1-lambda)*avg_latency\\n\\nWhere lambda is a power-performance parameter. In both sleep state and idle state, the action comprises selecting some time-out values from a list of pre-defined time-out values. My problem is as follows:\\n\\nUsing the above mentioned cost, it always favors small time-out values in sleep state, because the avg_latency for small time-out values is always less. Hence the cost function for small timeout values is always small. I expect that if I increase the power-performance parameter, lambda, the learning should go for higher power saving at the expense of higher latency. It should, then, select higher time-out values in sleep state. How can I modify the cost function?\", \"post_time\": \"2012-08-08 14:02:29\" },\n\t{ \"post_id\": 2465, \"topic_id\": 519, \"forum_id\": 23, \"post_subject\": \"Re: Using hierarchical clustering to detect user communities\", \"username\": \"HPCC Staff\", \"post_text\": \"A follow up discussion on this topic was handled offline.\", \"post_time\": \"2012-10-04 13:03:49\" },\n\t{ \"post_id\": 2452, \"topic_id\": 519, \"forum_id\": 23, \"post_subject\": \"Re: Using hierarchical clustering to detect user communities\", \"username\": \"jandleman\", \"post_text\": \"Hi Edin,\\n\\nI was able to replace my messy algorithm with the ML Hierarchical Clustering algorithm, but I don't think it will be able to handle then amount of data I am trying to throw at it. I have about 340,000 pairs of accounts + hardware ids. The AggloN function is doubling this number of data items and then self-joining it. That means it will produce a dataset with about 460 billion rows! I am running this on a single-node installation and have realized that I don't have enough disk space for this, plus it would probably need to run for at least a week or two to finish. I either need a more efficient algorithm or more hardware! \\n\\nCan you think of any way to process this more efficiently?\\n\\nThanks,\\nJohn\", \"post_time\": \"2012-09-28 00:42:37\" },\n\t{ \"post_id\": 2356, \"topic_id\": 519, \"forum_id\": 23, \"post_subject\": \"Re: Using hierarchical clustering to detect user communities\", \"username\": \"edin.muharemagic@lexisnexis.com\", \"post_text\": \"Hi John,\\n\\nYou could do something like this:\\n\\nIMPORT * FROM ML;\\n\\nd := DATASET([{1,1,1},{1,2,1},{2,2,1},{2,3,1},{3,4,1},{3,5,1},{4,4,1},{4,6,1},{5,1,1},{5,3,1},{6,7,1}], Types.NumericField);\\n\\nA := ML.Cluster.AggloN(d,4);\\nA.Dendrogram;\\n\\nThe dataset d reprsents your sample data packaged into the NumericField format, where id field represents your account ids, number field represents your HW ids, and value indicates that the account id and HW id showed up together.\\n\\nEdin\", \"post_time\": \"2012-09-14 19:47:40\" },\n\t{ \"post_id\": 2338, \"topic_id\": 519, \"forum_id\": 23, \"post_subject\": \"Using hierarchical clustering to detect user communities\", \"username\": \"jandleman\", \"post_text\": \"I am looking for an efficient way to detect SAAS user communities. For each user session, we collect data including the account id and computer hardware identifier (mac address, etc). Using records consisting of account id + hardware identifiers, I am looking for communities of users. Here is a simplified example:\\n\\naccount 1 used computers A and B. \\nAccount 2 used computers B and C.\\nAccount 3 used computers D and E.\\nAccount 4 used computers E and F.\\nAccount 5 used computers A and C.\\nAccount 6 used computer G.\\n\\nI want to find clusters based on shared computer usage. So, in this example, accounts 1, 2, and 5 form one cluster, and accounts 3 and 4 form another. Account 6 is in a cluster by itself.\\n\\nI have written an algorithm in ECL which does this:\\n\\n1. self join the file of account+hw id to itself where left HW id = right HW id.\\n2. organize the pairs of accounts (left and right) with the lowest ordered account on the left.\\n3. deduplicate this set of account pairs.\\n4. Repeatedly self join and then deduplicate the pairs of accounts until the number of pairs stops increasing.\\n5. Deduplicate the final set so that each right account is associated with the lowest valued left account. I now have "clusters" of accounts identified by a commmon "ancestor" account.\\n\\nIn reality, the algorithm is a bit more complicated than this, but this summarizes the important details. Also, I am starting with hundreds of millions of account+hw id pairs. As you can imagine, this runs for a long time! I have really been putting Thor to work with his large hammer! \\n\\nFinally, my question: Can I use the AggloN function to do this more efficiently? If so, how? I have read through the machine learning documentation, and I am fuzzy on whether the algorithm is design to do this, and if so, how to run it. \\n\\nThanks,\\nJohn\", \"post_time\": \"2012-09-14 06:09:19\" },\n\t{ \"post_id\": 2791, \"topic_id\": 607, \"forum_id\": 23, \"post_subject\": \"Re: bigdata applications\", \"username\": \"arjuna chala\", \"post_text\": \"Nikovv,\\n\\nWe will be glad to help you. I have some pointers but it would be best to discuss more via email (info@hpccsystems.com) to really understand your data. \\n\\nIn general, in the mobile phone industry, you would be interested in analytics to better understand the customers and networks. You would also be interested in finding patterns and revelations. For example, network congestion during certain times of the day, coverage issues, predicting customer enrollment if coverage is expanded. For sales you can generate attrition models, up selling models etc.\\n\\nPlease feel free to drop us an email and we can setup a call to discuss.\\n\\nThank You\\n\\nArjuna Chala\", \"post_time\": \"2012-11-15 21:04:37\" },\n\t{ \"post_id\": 2789, \"topic_id\": 607, \"forum_id\": 23, \"post_subject\": \"bigdata applications\", \"username\": \"nikovv\", \"post_text\": \"Hi,\\nI am new to big data and I don't know exactly what kind of applications can be done with HPCC. \\nOur company does consulting services for a large mobile phone company. What kind of products can be sold to it? We have access to the all traffic customers generate, sales localized geographicaly, customer profiles and you name it, lots of data.\\nWe want to put a demo server so we can show some demo, but the problem is, we don't have a customer need, how do we generate it for big data applications?\\n\\nThanks in advance\", \"post_time\": \"2012-11-15 18:52:07\" },\n\t{ \"post_id\": 14933, \"topic_id\": 641, \"forum_id\": 23, \"post_subject\": \"Re: Calculate Median\", \"username\": \"rtaylor\", \"post_text\": \"Jeroen,\\n\\nHere's a slightly different approach:
rec := RECORD\\n STRING1 Ltr;\\n STRING10 City;\\n UNSIGNED1 age;\\nEND;\\n\\nds := DATASET([ {'A','Boston',23},\\n {'B','Boston',43},\\n {'C','Boston',29},\\n {'D','Chicago',15},\\n {'E','NY',12},\\n {'F','NY',55},\\n {'G','NY',57},\\n {'H','NY',11}],rec);\\n\\nDNrec := RECORD\\n STRING10 City;\\n DATASET({UNSIGNED1 age}) ages;\\nEND;\\npds := PROJECT(TABLE(ds,{city},city),\\n TRANSFORM(DNrec,SELF.Ages := [],SELF := LEFT));\\ndnds := DENORMALIZE(pds, ds,\\n LEFT.city = RIGHT.city,\\n TRANSFORM(DNrec,\\n SELF.Ages := LEFT.Ages + \\n ROW({RIGHT.age},{UNSIGNED1 age});\\n SELF := LEFT));\\nOutrec := RECORD\\n STRING10 City;\\n UDECIMAL10_1 MedianAge;\\nEND;\\nOutRec XF2(dnds L) := TRANSFORM\\n AgeCnt := COUNT(L.ages);\\n S_ages := SORT(L.ages,age);\\n MidRec := AgeCnt - (AgeCnt DIV 2); \\n SELF.MedianAge := IF(AgeCnt % 2 = 1,\\n S_ages[MidRec].age, \\n (S_ages[MidRec].age + S_ages[MidRec+1].age)/2);\\n SELF := L;\\t\\nEND;\\nPROJECT(dnds,XF2(LEFT));
I created a nested child dataset (using your example data) of cities and their ages to achieve the required data grouping. All the real work is done in the TRANSFORM for the final PROJECT.\\n\\nI ran each version several times, and this version seems to be consistently faster than your original code (although that could change given large amounts of data -- so YMMV );\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-01-31 15:51:46\" },\n\t{ \"post_id\": 14883, \"topic_id\": 641, \"forum_id\": 23, \"post_subject\": \"Re: Calculate Median\", \"username\": \"jeroenbaas\", \"post_text\": \"Perhaps not most efficient, but in a situation where I couldn't directly use the ML package I decided to calculate the median by taking the middle value(s):\\n
\\nrec := RECORD\\n STRING1 Ltr;\\n STRING10 City;\\n unsigned1 age;\\nEND;\\n\\nds := dataset([ {'A','Boston',23},\\n{'B','Boston',43},\\n{'C','Boston',29},\\n{'D','Chicago',15},\\n{'E','NY',12},\\n{'F','NY',55},\\n{'G','NY',57},\\n{'H','NY',11}],rec);\\n\\n// count records per city\\nt_counts_format := RECORD\\n\\tSTRING10 City:=ds.City;\\n\\tUNSIGNED3 recordCount:=COUNT(GROUP);\\nEND;\\nt_counts := TABLE(\\n\\tds,\\n\\tt_counts_format,\\n\\tds.City\\n);\\n// join ds with count\\nds_city_count_layout:=RECORD\\n STRING1 Ltr;\\n STRING10 City;\\n unsigned1 age;\\n\\tUNSIGNED3 recordCount;\\nEND;\\nds_city_count_layout join_ds_city_count(ds L, t_counts R):=TRANSFORM\\n\\tSELF:=L;\\n\\tSELF.recordCount:=R.recordCount;\\nEND;\\nds_city_count:=JOIN(\\n\\tds,\\n\\tt_counts,\\n\\tLEFT.City=RIGHT.City\\n);\\n\\n//iterate and only keep the median(s): 1 value for uneven recordCount, 2 values if the group count is even.\\nds_city_count_grouped_city := GROUP(SORT(ds_city_count,City,age),City); //then group them\\nds_city_count_layout MedianValues(ds_city_count_grouped_city L, INTEGER C) :=\\n\\tTRANSFORM, SKIP((L.recordCount % 2 = 0 AND (C < L.recordCount/2 OR C > ((L.recordCount/2)+1))) OR (L.recordCount % 2 != 0 AND C != ((L.recordCount+1)/2)))\\n\\tSELF := L;\\nEND;\\nds_city_count_grouped_city_median_values := PROJECT(ds_city_count_grouped_city,\\n MedianValues(LEFT,COUNTER));\\n// median is now average of values in the group.\\ncity_median_age_format:=RECORD\\n\\t\\tSTRING10 City:=ds_city_count_grouped_city_median_values.City;\\t\\t\\n\\t\\tUDECIMAL10_5 median_age:=AVE(GROUP,ds_city_count_grouped_city_median_values.age);\\nEND;\\ncity_median_age:=TABLE(\\n\\tds_city_count_grouped_city_median_values,\\n\\tcity_median_age_format,\\n\\tds_city_count_grouped_city_median_values.City\\n);\\nOUTPUT(city_median_age);\\n
\", \"post_time\": \"2017-01-30 19:17:39\" },\n\t{ \"post_id\": 2957, \"topic_id\": 641, \"forum_id\": 23, \"post_subject\": \"Re: Calculate Median\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nThanks a lot..\\n\\nRegards,\\nksviswa\", \"post_time\": \"2012-12-05 07:49:59\" },\n\t{ \"post_id\": 2948, \"topic_id\": 641, \"forum_id\": 23, \"post_subject\": \"Re: Calculate Median\", \"username\": \"rtaylor\", \"post_text\": \"ksviswa,\\n\\nTry this:
IMPORT ML,STD;\\n\\nrec := RECORD\\n STRING1 Ltr;\\n STRING10 City;\\n\\tunsigned1 age;\\nEND;\\n\\nds := dataset([ {'A','Boston',23},\\n\\t\\t\\t\\t\\t\\t\\t\\t{'B','Boston',43},\\n\\t\\t\\t\\t\\t\\t\\t\\t{'C','Boston',29},\\n\\t\\t\\t\\t\\t\\t\\t\\t{'D','Chicago',15},\\n\\t\\t\\t\\t\\t\\t\\t\\t{'E','NY',12},\\n\\t\\t\\t\\t\\t\\t\\t\\t{'F','NY',55},\\n\\t\\t\\t\\t\\t\\t\\t\\t{'G','NY',57},\\n\\t\\t\\t\\t\\t\\t\\t\\t{'H','NY',61}],rec);\\n\\nt1 := SORT(TABLE(ds,{city},City),City);\\t\\t\\t\\t\\t\\t\\t\\t\\n\\nCityNums := PROJECT(t1,TRANSFORM({UNSIGNED4 Num,STRING10 City},SELF.Num := COUNTER;SELF := LEFT));\\n\\nML.Types.NumericField XF(ds L, integer C) := TRANSFORM\\n SELF.id := C;\\n\\tSELF.number := CityNums(City = L.City)[1].Num;\\n\\tSELF.value := L.age;\\nEND;\\n\\nP := PROJECT(ds,XF(LEFT,COUNTER));\\n\\nML.FieldAggregates(P).Medians;\\n
HTH,\\n\\nRichard\", \"post_time\": \"2012-12-04 15:17:13\" },\n\t{ \"post_id\": 2938, \"topic_id\": 641, \"forum_id\": 23, \"post_subject\": \"Re: Calculate Median\", \"username\": \"ksviswa\", \"post_text\": \"Thanks Richard and Edin,\\n\\nThe recent code from github solved the median calculation issue for a particular column..\\n\\nHow the same can be applied to a group..?\\n\\nSample Input to calculate median based on a grouped value.\\n\\n\\n\\n'A','Boston',23\\n'B','Boston',43\\n'C','Boston',29\\n\\n'D','Chicago',15\\n\\n'E','NY',12\\n'F','NY',55\\n'G','NY',57\\n'H','NY',61\\n\\n
\\n\\nProblem Statement : Median age per city\\n\\nSample Output :\\n\\n\\n\\ncity MedianAge\\n---------- -----------\\nBoston 29\\nChicago 15\\nNY 56\\n\\n
\\n\\n\\nCan we use FieldAggregates.Medians to compute this result or any other logic to be implemented.\\n\\nAny pointers regarding the same would be highly appreciated.\\n\\nThanks a lot in advance.\\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2012-12-04 06:02:59\" },\n\t{ \"post_id\": 2937, \"topic_id\": 641, \"forum_id\": 23, \"post_subject\": \"Re: Calculate Median\", \"username\": \"edin.muharemagic@lexisnexis.com\", \"post_text\": \"Hi ksviswa,\\n\\nHere's how you can use the FieldAggregates.Medians:\\n\\nIMPORT ML;\\n \\nmyData := DATASET([{1,1,1}, {2,2,1}, {3,5,2}, {4,7,6}],{UNSIGNED rid; REAL f1; REAL f2;}); \\n// Turn into regular NumericField file (with continuous variables)\\nML.ToField(myData,o);\\nML.FieldAggregates(o).Medians;\\n\\nThe output will have median values for both f1 and f2 columns.\\n\\nPlease make sure your ecl-ml code is up to date. You can get the most recent version at : https://github.com/hpcc-systems/ecl-ml\\n\\nEdin\", \"post_time\": \"2012-12-03 23:02:54\" },\n\t{ \"post_id\": 2933, \"topic_id\": 641, \"forum_id\": 23, \"post_subject\": \"Re: Calculate Median\", \"username\": \"rtaylor\", \"post_text\": \"ksviswa,\\n\\nThis code shows you how it works:IMPORT ML,STD;\\n\\nds := DATASET([{20120101},{20120102},{20120103},{20120104},{20120105},{20120106}],{integer d});\\n\\nML.Types.NumericField XF(ds L, integer C) := TRANSFORM\\n SELF.id := C;\\n\\tSELF.number := 1;\\n\\tSELF.value := STD.Date.FromJulianDate(L.D);\\nEND;\\n\\nP := PROJECT(ds,XF(LEFT,COUNTER));\\n\\nAVG2(REAL L, REAL R) := AVE(L,R);\\nDateStr(REAL jdate) := STD.Date.ToString(STD.Date.ToJulianDate(ROUND(jdate)),'%Y-%m-%d');\\n\\nSimples := ML.FieldAggregates(P).Simple;\\nMinval := Simples[1].minval;\\nMaxval := Simples[1].maxval;\\nMeanval := Simples[1].mean;\\n\\nQuarts := ML.FieldAggregates(P).NTileRanges(4);\\nQuart1 := AVG2(Quarts[1].max,Quarts[2].min);\\n// Quart2 := AVG2(Quarts[2].max,Quarts[3].min);\\nQuart2 := ML.FieldAggregates(P).Medians[1].median;\\nQuart3 := AVG2(Quarts[3].max,Quarts[4].min);\\n\\nOutDS := DATASET([{'Min',DateStr(Minval)},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t{'1st Qu',DateStr(Quart1)},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t{'Median',DateStr(Quart2)},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t{'Mean',DateStr(Meanval)},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t{'3rd Qu',DateStr(Quart3)},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t{'Max',DateStr(Maxval)}],\\n {STRING10 Prompt,STRING10 Val});\\nOutDS;
HTH,\\n\\nRichard\", \"post_time\": \"2012-12-03 20:40:58\" },\n\t{ \"post_id\": 2916, \"topic_id\": 641, \"forum_id\": 23, \"post_subject\": \"Calculate Median\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nHow to calculate the median for a particular column..?\\n\\nTried looking at the concept of FieldAggregates.Medians, but not clear about the usage.\\n\\nCan anybody share a example on how to use Median..?\\n\\nCan median be used for cross tab reports (to calculate median based on a grouped value)...?\\n\\nKindly help me regarding this.\\n\\nThanks a lot in advance.\\n\\nRegards,\\nksviswa\", \"post_time\": \"2012-12-01 16:51:46\" },\n\t{ \"post_id\": 6754, \"topic_id\": 652, \"forum_id\": 23, \"post_subject\": \"Re: Compute 95th Percentile\", \"username\": \"john holt\", \"post_text\": \"I'd like to extend Edin's remark a bit.\\n\\nIn your particular case, suppose that you had 25 observations. There is an observation at the 92nd, 96th, and 100th percentile, but not at the 95th. So, the real question is what do you want the answer to be when there is nothing at the 95th percentile? \\n\\nI usually want to be the closest to 95, which would be 96 in the 25 observation case. You can determine the formulae to set the target percentile by noting that you will want to record at either the position of CEILING(94*Record_Count/100) or FLOOR(94*Record_Count/100).\", \"post_time\": \"2015-01-06 15:09:43\" },\n\t{ \"post_id\": 2978, \"topic_id\": 652, \"forum_id\": 23, \"post_subject\": \"Re: Compute 95th Percentile\", \"username\": \"edin.muharemagic@lexisnexis.com\", \"post_text\": \"Hi Ksviswa,\\n\\nThe ML Library Reference (http://cdn.hpccsystems.com/pdf/machinelearning.pdf) reads:\\nThe NTiles are closely related to terms like ‘percentiles’, ‘deciles’ and ‘quartiles’, which allow you to grade each score according the a ‘percentile’ of the population. The name ‘N’ tile is there because you get to pick the number of\\ngroups the population is split into. Use NTile(4) for quartiles, NTile(10) for deciles and NTile(100) for percentiles. NTile(1000) can be used if you want to be able to split populations to one tenth of a percent. Every group (or Tile)\\nwill have the same number of records within it (unless your data has a lot of duplicate values because identical values land in the same tile). The following example demonstrates the possible use of NTiling.
\\n\\nYou may want to split your set of 100 numbers into 100 groups where each group will have the same number of elements (1 in this case), but you cannot split the set of 20 numbers into 100 groups, and expect that every group will have the same number of elements, as 80 groups will have zero elements.\\n\\nHTH,\\nEdin\", \"post_time\": \"2012-12-10 20:49:07\" },\n\t{ \"post_id\": 2975, \"topic_id\": 652, \"forum_id\": 23, \"post_subject\": \"Compute 95th Percentile\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nHow do we compute the 95th percentile for a given set of values.?\\n\\nTried using the following code :\\n\\n\\n\\nx := ML.FieldAggregates(y).NTiles(100);\\n\\nz := x(ntile = 95);\\n\\n
\\n\\nThe following code gives me the 95th percentile, but in cases where the set of values is much lesser than 100 , there may not be a particular ntile value for 95. How do we compute in such scenarios..?\\n\\n\\n\\n\\nSet of 100 numbers : 1,2,3,...100\\n\\n95th Percentile : 95.0\\n\\n\\nSet of 20 Numbers : 1,2,3...20\\n\\n95th Percentile : 19 or Blank depending on the value of ntile.\\n\\n
\\n\\nKindly help regarding the same.\\n\\nThanks and Regards,\\nksviswa\", \"post_time\": \"2012-12-10 12:45:09\" },\n\t{ \"post_id\": 4987, \"topic_id\": 663, \"forum_id\": 23, \"post_subject\": \"Re: Covariance matrix of an n-dimensional random vector\", \"username\": \"Tankatanka\", \"post_text\": \"Thank you all.\", \"post_time\": \"2013-12-02 09:41:28\" },\n\t{ \"post_id\": 3056, \"topic_id\": 663, \"forum_id\": 23, \"post_subject\": \"Re: Covariance matrix of an n-dimensional random vector\", \"username\": \"flavio\", \"post_text\": \"This presentation is also quite general, and may be useful: http://www.math.ucdavis.edu/~tracy/cour ... ussian.pdf\\n\\nFlavio\", \"post_time\": \"2013-01-04 13:52:47\" },\n\t{ \"post_id\": 3048, \"topic_id\": 663, \"forum_id\": 23, \"post_subject\": \"Re: Covariance matrix of an n-dimensional random vector\", \"username\": \"Tankatanka\", \"post_text\": \"Thank you very much flavio for those links. I have gone through them and they are very useful. Do you have any more links or materials on similar tutorials. I am presently trying to learn probabilistic modeling uisng Gaussian distribution, especially the multivariate one.\\n\\nThanks.\", \"post_time\": \"2013-01-04 03:49:24\" },\n\t{ \"post_id\": 3045, \"topic_id\": 663, \"forum_id\": 23, \"post_subject\": \"Re: Covariance matrix of an n-dimensional random vector\", \"username\": \"flavio\", \"post_text\": \"There are a number of those in Matlab Central, for example, this: http://www.mathworks.com/matlabcentral/ ... stribution. And there is also this blog with some code samples: http://mrmartin.net/?p=223.\\n\\nFlavio\", \"post_time\": \"2013-01-03 13:13:23\" },\n\t{ \"post_id\": 3044, \"topic_id\": 663, \"forum_id\": 23, \"post_subject\": \"Re: Covariance matrix of an n-dimensional random vector\", \"username\": \"Tankatanka\", \"post_text\": \"Thank you very much flavio. I am actually a complete beginner in Matlab and i am doing some research in machine learning. Do you know where i can get tutorials on matlab implementation of univariate and multivariate Gaussian distribution to fine mean, variance and co-variance matrix of multivariate gaussian.\\n\\nThanks.\", \"post_time\": \"2013-01-03 03:08:33\" },\n\t{ \"post_id\": 3043, \"topic_id\": 663, \"forum_id\": 23, \"post_subject\": \"Re: Covariance matrix of an n-dimensional random vector\", \"username\": \"flavio\", \"post_text\": \"In that case, I suggest that you take a look at this Matlab reference guide section: http://www.mathworks.com/help/matlab/ref/cov.html.\\n\\nFlavio\", \"post_time\": \"2013-01-03 03:01:48\" },\n\t{ \"post_id\": 3042, \"topic_id\": 663, \"forum_id\": 23, \"post_subject\": \"Re: Covariance matrix of an n-dimensional random vector\", \"username\": \"Tankatanka\", \"post_text\": \"I need the implementation code in matlab please.\", \"post_time\": \"2013-01-03 02:31:51\" },\n\t{ \"post_id\": 3039, \"topic_id\": 663, \"forum_id\": 23, \"post_subject\": \"Re: Covariance matrix of an n-dimensional random vector\", \"username\": \"flavio\", \"post_text\": \"Tankatanka,\\n\\nthe algorithm to calculate the covariance metric would be the same in any programming language, being this Matlab/Octave, Python or ECL.\\n\\nDo you need help implementing a covariance matrix in ECL?\\n\\nThanks,\\n\\nFlavio\", \"post_time\": \"2013-01-02 14:43:32\" },\n\t{ \"post_id\": 3031, \"topic_id\": 663, \"forum_id\": 23, \"post_subject\": \"Covariance matrix of an n-dimensional random vector\", \"username\": \"Tankatanka\", \"post_text\": \"Consider an n-dimensional random vector X=(X_1,X_2,...X_n) whose\\ncoordinates X_i are independent Gaussians with mean 0 and variance i.\\nWhat is the covariance matrix of X? Can anyone help me with this answer Please.\", \"post_time\": \"2012-12-27 23:17:28\" },\n\t{ \"post_id\": 3539, \"topic_id\": 780, \"forum_id\": 23, \"post_subject\": \"Re: Errors when using ML.Discretize in transforms\", \"username\": \"tdelbecque\", \"post_text\": \"By the way, may be it is worth also to look at why using the ML attributes directly in TransformDiscretize transformation wind up in a compilation error (this is why I had to write the DoDiscretize function)\\n\\nThanks, Thierry.\", \"post_time\": \"2013-02-22 17:42:25\" },\n\t{ \"post_id\": 3538, \"topic_id\": 780, \"forum_id\": 23, \"post_subject\": \"Re: Errors when using ML.Discretize in transforms\", \"username\": \"tdelbecque\", \"post_text\": \"Thanks.\\n\\nThierry.\", \"post_time\": \"2013-02-22 17:22:27\" },\n\t{ \"post_id\": 3537, \"topic_id\": 780, \"forum_id\": 23, \"post_subject\": \"Re: Errors when using ML.Discretize in transforms\", \"username\": \"edin.muharemagic\", \"post_text\": \"Hello tdelbecque,\\n\\nThank you for reporting this error. Your code uses the ML.Discretize correctly, and ML.Discretize.ByTiling (x, 10) should not have produced values greater than 10.\\n\\nYour ECL code sample may have uncovered a platform problem, and I have opened an issue to track it (https://track.hpccsystems.com/browse/HPCC-8846).\\n\\nIn the meantime, if you run your code against the hThor target, you will be getting correct results.\\n\\nThanks,\\nEdin\", \"post_time\": \"2013-02-22 17:19:48\" },\n\t{ \"post_id\": 3507, \"topic_id\": 780, \"forum_id\": 23, \"post_subject\": \"Errors when using ML.Discretize in transforms\", \"username\": \"tdelbecque\", \"post_text\": \"Hello,\\n\\nI am wondering if I am misusing ML.Discretize, or if there is a bug in the library, may be related to the macros. I guess the first option is the right one ...\\n\\nI have a dataset DS1 that contains a child dataset CHILD in its layout. The child dataset contains values that I want to discretize. I tried the obvious way of applying a PROJECT on DS1 using a transformation that would compute a ML.Discretize.ByTiling (x, 10) on each CHILD element (that is to say, I am interested in the deciles). Doing this results indeed in values greater than 10, which is incorrect imho for deciles. Certainly I am missing something, or is it possible that the macro mechanism is failing there ? I did not have time yet to investigate this point. If someone is interested, following is a code snippet that makes the problem appear. Here for example is an output of one run of this code snippet, in which discvalue=11 or 12 should not have happened.\\n\\n discvalue\\tn\\n1\\t12\\t2\\n2\\t11\\t11\\n3\\t10\\t30\\n4\\t9\\t38\\n5\\t8\\t49\\n6\\t7\\t60\\n7\\t6\\t50\\n8\\t5\\t50\\n9\\t4\\t60\\n10\\t3\\t50\\n11\\t2\\t50\\n12\\t1\\t50\\n\\nThanks, Thierry.\\n\\n/************************************************************************\\n Begin of code snippet\\n*************************************************************************/\\nIMPORT ML;\\nIMPORT ML.Discretize;\\n\\n/*\\n * Fake dataset creation\\n */\\nR := RECORD\\n UNSIGNED8 value;\\n UNSIGNED1 grp := 0; // aka segments\\n END;\\n\\nempty := DATASET ([{0,0}], R);\\n\\nR T (R x) := TRANSFORM\\n SELF.value := RANDOM(); // simulated value\\n END;\\n\\n// ds is the fakesimulated dataset of segmented data. We artificially create 10 segments.\\n\\nds := PROJECT ( NORMALIZE (empty, 500, T(LEFT)),\\n TRANSFORM ( R, \\n SELF.grp := LEFT.value - 10*(UNSIGNED)(LEFT.value/10); // 10 segments\\n SELF := LEFT));\\n\\n/*\\n * now we denormalize the data, to get a new data set\\n * with one record per segment, and per segment values in a child dataset\\n */\\n\\nRDENORM := RECORD // The denormalized record \\n UNSIGNED1 grp;\\n DATASET({UNSIGNED8 value}) recs := DATASET ([], {UNSIGNED8 value});\\n END;\\n\\nsegments := PROJECT (TABLE (ds, {ds.grp}, grp), TRANSFORM (RDENORM, SELF := LEFT));\\n\\nRDENORM TDenormalize (RDENORM x, DATASET(R) y) := TRANSFORM \\n SELF.recs := SORT (TABLE (y, {y.value}), value);\\n SELF := x;\\nEND;\\n\\n// segments 2 is the denormalized dataset\\nsegments2 := DENORMALIZE (segments, ds, LEFT.grp = RIGHT.grp, GROUP, TDenormalize (LEFT, ROWS(RIGHT)));\\n\\n\\n// Just a layout to gather the results for each segments\\nRDENORMX := RECORD(RDENORM)\\n DATASET ({UNSIGNED id, UNSIGNED8 value}) recsx;\\n END;\\n\\n \\n// We must put the call to ML macros outside of the TransformDiscretize or the code just do not compile:\\n// This function performs a discretization par tiling of its input\\n\\nDoDiscretize (DATASET ({UNSIGNED8 value}) x) := FUNCTION\\n // Just add \\n {UNSIGNED id, UNSIGNED8 value} d := PROJECT ( x, \\n TRANSFORM ( {UNSIGNED id, UNSIGNED8 value},\\n SELF.id := RANDOM(); // mandatory for ML\\n SELF := LEFT));\\n ML.ToField (d, o, id, 'value');\\n RETURN ML.Discretize.ByTiling (o, 10);\\nEND;\\n\\n\\nRDENORMX TransformDiscretize (RDENORM x) := TRANSFORM\\n SELF.recsx := PROJECT (DoDiscretize (x.recs), TRANSFORM ({UNSIGNED id, UNSIGNED8 value}, SELF := LEFT));\\n SELF := x;\\n END;\\n\\n// Now we compute discretization for each segment:\\nsegments3 := PROJECT (segments2, TransformDiscretize (LEFT));\\n\\n// Histogram of the discretized values.\\n// Some values in recsx are greater than 10, that should not happen ...\\ndisc := NORMALIZE (segments3, LEFT.recsx, TRANSFORM ({UNSIGNED grp, UNSIGNED discvalue}, SELF.grp := LEFT.grp; SELF.discvalue := RIGHT.value));\\ntbl := SORT (TABLE (disc, {disc.discvalue, UNSIGNED n := COUNT(GROUP)}, discvalue), -discvalue);\\nOUTPUT (tbl);\\n\\n//OUTPUT(segments3);\\n\\n//OUTPUT (disc);\\n\\n/************************************************************************\\n End of code snippet\\n*************************************************************************/\", \"post_time\": \"2013-02-20 15:36:36\" },\n\t{ \"post_id\": 7482, \"topic_id\": 1028, \"forum_id\": 23, \"post_subject\": \"Re: Machine Learning (ML) Usage Examples\", \"username\": \"jwilt\", \"post_text\": \"One (painful) way to address this...\\nFlatten modules from, e.g.:\\n ML.Mat.Add\\nto:\\n ML.Mat_Add\\n\\nThis requires both changing of attribute names, as well as replacing references in code.\\nPossibly script-able.\\n\\nThe issue seems to be - the (older) eclserver (which supports the legacy MySQL code repository) only partially supports deeper modules. Note, it *does* support deeper module levels sometimes, but it doesn't seem to support them when in a called attribute (?).\\n\\nAgain, any alternative solutions would be more-than-welcome.\", \"post_time\": \"2015-04-29 04:15:58\" },\n\t{ \"post_id\": 7481, \"topic_id\": 1028, \"forum_id\": 23, \"post_subject\": \"Re: Machine Learning (ML) Usage Examples\", \"username\": \"jwilt\", \"post_text\": \"Was there ever found another solution to this, besides a separate file-system code repository?\\nE.g., with IMPORTs, or compiler options, or #OPTION, ...?\\nThanks!\", \"post_time\": \"2015-04-29 02:19:46\" },\n\t{ \"post_id\": 5026, \"topic_id\": 1028, \"forum_id\": 23, \"post_subject\": \"Re: Machine Learning (ML) Usage Examples\", \"username\": \"tdelbecque\", \"post_text\": \"Hello John,\\n\\nThank you. It seems that you are right indeed ! \\n\\nI did as you suggested, and the code compiles and executes when I set the attributes server to blank and use a local copy of ML. \\n\\nSo it seems that the issue comes from the pre-OSS version of the repository, finaly.\\n\\nThierry.\", \"post_time\": \"2013-12-03 21:40:03\" },\n\t{ \"post_id\": 5025, \"topic_id\": 1028, \"forum_id\": 23, \"post_subject\": \"Re: Machine Learning (ML) Usage Examples\", \"username\": \"john holt\", \"post_text\": \"Theirry, \\nI think that the problem is an incompatibility between using the old shared repository on the pre-OSS world and the OSS. \\n\\nTo verify that this is indeed the problem, I would like you to clone your ECLIDE configuration, but in the clone make the Attribute server entry blank. You may need to selected Advanced to allow the entries to be changed.\\n\\nYou should of course have a copy of the attributes locally for the ECLIDE.\", \"post_time\": \"2013-12-03 20:54:01\" },\n\t{ \"post_id\": 5024, \"topic_id\": 1028, \"forum_id\": 23, \"post_subject\": \"Re: Machine Learning (ML) Usage Examples\", \"username\": \"tdelbecque\", \"post_text\": \"I thought of that and I checked, and it seems not. Also I tried from too distinct machines, so too distincts ECLIDE installations, and got the same problem.\\n\\nBut I am going to check again, we never know ...\", \"post_time\": \"2013-12-03 16:51:45\" },\n\t{ \"post_id\": 5023, \"topic_id\": 1028, \"forum_id\": 23, \"post_subject\": \"Re: Machine Learning (ML) Usage Examples\", \"username\": \"tlhumphrey2\", \"post_text\": \"tdelbecque,\\n\\nDo you have more than one ML visable in the ECLIDE repository(or possibly a subfolder that appears more than once)? I'm thinking that might be your problem.\\n\\nIf this is the case, then you need to remove the path of one ML library from "ECL Folders:" text box in the "compiler" tab of preferences.\\n\\nTim\", \"post_time\": \"2013-12-03 16:42:12\" },\n\t{ \"post_id\": 5022, \"topic_id\": 1028, \"forum_id\": 23, \"post_subject\": \"Re: Machine Learning (ML) Usage Examples\", \"username\": \"tdelbecque\", \"post_text\": \"Thanks. \\n\\nI am still investigating a bit on this funny problem: for example, if I rename Mul and remove Add in the Each attribute, then it works ... This makes me believe that there is something happening in the code generation and the name resolution. Should it be possible that it comes from our common repository ? \\n\\nBests\", \"post_time\": \"2013-12-03 16:20:14\" },\n\t{ \"post_id\": 5021, \"topic_id\": 1028, \"forum_id\": 23, \"post_subject\": \"Re: Machine Learning (ML) Usage Examples\", \"username\": \"tlhumphrey2\", \"post_text\": \"tdelbecque\\n\\nHere are very high level instructions on how to install the ML library, or at least how I installed it.\\n\\nFirst, get the ML library on your PC, the one where you run ECLIDE. For example, the path where I have the full ML library structure is: C:\\\\Users\\\\humphrtl\\\\Documents\\\\github\\\\ecl-ml (By the way, on my PC, I’m running Microsoft Windows 7).\\n\\nTo setup ECLIDE so it will show the full ML library as part of the repository, go into “preferences” of the ECLIDE and click on the “compiler” tab. There you will find a text box titled “ECL Folders:”. Add the path to the ML library structure to that text box.\\n\\nAfter doing that you will see the ML library structure in the repository.\\n\\nAnd, in fact you will probably see two versions of it: 1) the one you have already installed and 2) the one that you just told ECLIDE about. You will have to remove the path of the currently installed ML library from “ECL Folders” text box.\\n\\nLet me know if you need additional details.\", \"post_time\": \"2013-12-03 15:57:00\" },\n\t{ \"post_id\": 5019, \"topic_id\": 1028, \"forum_id\": 23, \"post_subject\": \"Re: Machine Learning (ML) Usage Examples\", \"username\": \"tdelbecque\", \"post_text\": \"I am also absolutly sure that the example is fine, and that there is something specific to my settings.\\n\\nI am indeed working on an Elsevier project with an Elsevier team, but as an external. My e-mail is thierry.delbecque (at) sodad.com.\\n\\nBests, Thierry.\", \"post_time\": \"2013-12-03 15:17:59\" },\n\t{ \"post_id\": 5018, \"topic_id\": 1028, \"forum_id\": 23, \"post_subject\": \"Re: Machine Learning (ML) Usage Examples\", \"username\": \"tlhumphrey2\", \"post_text\": \"tdelbecque,\\n\\nI don't know what your problem is. But, I'm trying to find someone who might know.\\n\\nI'm fairly sure the problem isn't in the ml usage example you got from the ecl-samples repository because I ran yesterday and this morning without any problems.\\n\\nCurrently, I'm thinking something is wrong with the way the ML library is installed.\\n\\nDo you work for a Reed-Elsevier company? If so, would you give me your work email address, so we can communicate faster?\\n\\nTim\", \"post_time\": \"2013-12-03 15:10:00\" },\n\t{ \"post_id\": 5015, \"topic_id\": 1028, \"forum_id\": 23, \"post_subject\": \"Re: Machine Learning (ML) Usage Examples\", \"username\": \"tdelbecque\", \"post_text\": \"I have not solve my problem yet, and of course I will let you know if I find someting.\\n\\nAs for my environmment:\\n\\n- I an running a thor cluster in Boca, provided and maintained by Lexis Nexis (we are a team working on it). So this is a rather orthodox cluster, though I do not know much about it;\\n\\n- the common repository is hosted there\\n\\n- I am using ECLIDE Version 6.10.4.101.3.8, but the problem is still there with the latest version;\\n\\n- I got the latest ML package from github, and put it in the repository with AMT, but the same problem existed with a previous installation of the ML package.\\n\\nnothing exotic, as you can see.\\n\\nBests.\", \"post_time\": \"2013-12-03 10:01:58\" },\n\t{ \"post_id\": 5009, \"topic_id\": 1028, \"forum_id\": 23, \"post_subject\": \"Re: Machine Learning (ML) Usage Examples\", \"username\": \"tlhumphrey2\", \"post_text\": \"tdelbecque,\\n\\nPlease post and let everyone know how you fixed the problem. I just ran ML_use.Mat.Has.Stats.ecl on my hpcc and it ran fine.\", \"post_time\": \"2013-12-02 19:49:47\" },\n\t{ \"post_id\": 5004, \"topic_id\": 1028, \"forum_id\": 23, \"post_subject\": \"Re: Machine Learning (ML) Usage Examples\", \"username\": \"tlhumphrey2\", \"post_text\": \"You shouldn't be getting all those errors unless something when wrong with you installation of the new ML package.\\n\\nYou mentioned that you tried kmeans and had no trouble which is not in ML.Mat. So, I'm thinking something happened to ML.Mat during the installation.\\n\\nTell me about your HPCC environment. Is this a virtual hpcc cluster that I are running on? Or, exactly what is your HPCC environment?\", \"post_time\": \"2013-12-02 18:44:06\" },\n\t{ \"post_id\": 5003, \"topic_id\": 1028, \"forum_id\": 23, \"post_subject\": \"Re: Machine Learning (ML) Usage Examples\", \"username\": \"tdelbecque\", \"post_text\": \"Sure, following is the whole content.\\n\\nI have tried to find the problem by disabling some part of some modules in ML, and it is as if the compiler get confused beetwen each.mul and ml.mul ... \\n\\nAs you do not have this problem, I guess this is due to my own configuration, though merely put the latest ML package in the repository. Bizarre bizarre ...\\n\\nThanks.\\n\\n\\neclserver \\tml.mat.mul (22,11) : 2167: Unknown identifier "Has"\\neclserver \\tml.mat.mul (26,18) : 3002: syntax error near "T" : expected DICTIONARY, RANGE, ROWSET, SELF, SUCCESS, datarow, dataset, dictionary, module-name, identifier, identifier, function-name, identifier, macro-name, '^', '(', '['\\neclserver \\tml.mat.mul (28,1) : 3002: syntax error near "END" : expected APPLY, _ARRAY_, BIG_ENDIAN, BUILD, DICTIONARY, DISTRIBUTION, EMBEDDED, expression, EXPORT, FEATURE, IMPORT, KEYDIFF, KEYPATCH, _LINKCOUNTED_, LITTLE_ENDIAN, LOADXML, NOTIFY, OUTPUT, PACKED, PARALLEL, PATTERN, RANGE, RECORD, RETURN, ROWSET, RULE, SET, SHARED, <typename>, STREAMED, SUCCESS, TOKEN, TRANSFORM, TYPEOF, UNSIGNED, UPDATE, WAIT, WILD, <, datarow, identifier, action, constant, #CONSTANT, #OPTION, #WORKUNIT, #STORED, #LINK, #ONWARNING, '^', ';', '$'\\neclserver \\tml.mat.mul (39,5) : 3002: syntax error near ":=" : expected ';'\\neclserver \\tml.mat.mul (48,4) : 3002: syntax error near ":=" : expected ';'\\neclserver \\tml.mat.mul (52,17) : 3002: syntax error near "T" : expected DICTIONARY, RANGE, ROWSET, SELF, SUCCESS, datarow, dataset, dictionary, module-name, identifier, identifier, function-name, identifier, macro-name, '^', '(', '['\\neclserver \\tml.mat.mul (61,19) : 3002: syntax error near "mT" : expected DICTIONARY, RANGE, ROWSET, SELF, SUCCESS, datarow, dataset, dictionary, module-name, identifier, identifier, function-name, identifier, macro-name, '^', '(', '['\\neclserver \\tml.mat.mul (63,1) : 3002: syntax error near "END" : expected APPLY, _ARRAY_, BIG_ENDIAN, BUILD, DICTIONARY, DISTRIBUTION, EMBEDDED, expression, EXPORT, FEATURE, IMPORT, KEYDIFF, KEYPATCH, _LINKCOUNTED_, LITTLE_ENDIAN, LOADXML, NOTIFY, OUTPUT, PACKED, PARALLEL, PATTERN, RANGE, RECORD, RETURN, ROWSET, RULE, SET, SHARED, <typename>, STREAMED, SUCCESS, TOKEN, TRANSFORM, TYPEOF, UNSIGNED, UPDATE, WAIT, WILD, <, datarow, identifier, action, constant, #CONSTANT, #OPTION, #WORKUNIT, #STORED, #LINK, #ONWARNING, '^', ';', '$'\\neclserver \\tml.mat.mul (66,13) : 2167: Unknown identifier "Has"\\neclserver \\tml.mat.mul (67,13) : 2167: Unknown identifier "Has"\\neclserver \\tml.mat.mul (68,34) : 3002: syntax error near "." : expected ')'\\neclserver \\tml.mat.mul (70,34) : 2073: Expected boolean expression\\neclserver \\tml.mat.mul (72,16) : 2073: Expected boolean expression\\neclserver \\tml.mat.mul (72,56) : 2167: Unknown identifier "Mul_Default"\\neclserver \\tml.mat.mul (74,5) : 3002: syntax error : expected APPLY, _ARRAY_, BIG_ENDIAN, BUILD, DICTIONARY, DISTRIBUTION, EMBEDDED, expression, EXPORT, FEATURE, IMPORT, KEYDIFF, KEYPATCH, _LINKCOUNTED_, LITTLE_ENDIAN, LOADXML, NOTIFY, OUTPUT, PACKED, PARALLEL, PATTERN, RANGE, RECORD, RETURN, ROWSET, RULE, SET, SHARED, <typename>, STREAMED, SUCCESS, TOKEN, TRANSFORM, TYPEOF, UNSIGNED, UPDATE, WAIT, WILD, <, datarow, identifier, action, constant, #CONSTANT, #OPTION, #WORKUNIT, #STORED, #LINK, #ONWARNING, '^', ';', '$'\\neclserver \\tml.mat.add (5,11) : 2167: Unknown identifier "Has"\\neclserver \\tml.mat.add (6,11) : 2167: Unknown identifier "Has"\\neclserver \\tml.mat.add (7,32) : 3002: syntax error near "." : expected ')'\\neclserver \\tml.mat.add (15,32) : 2073: Expected boolean expression\\neclserver \\tml.mat.add (17,14) : 2073: Expected boolean expression\\neclserver \\tml.mat.add (5,11) : 2167: Unknown identifier "Has"\\neclserver \\tml.mat.add (6,11) : 2167: Unknown identifier "Has"\\neclserver \\tml.mat.add (7,32) : 3002: syntax error near "." : expected ')'\\neclserver \\tml.mat.add (15,32) : 2073: Expected boolean expression\\neclserver \\tml.mat.add (17,14) : 2073: Expected boolean expression\\neclserver \\tml.mat.mul (22,11) : 2167: Unknown identifier "Has"\\neclserver \\tml.mat.mul (26,18) : 3002: syntax error near "T" : expected DICTIONARY, RANGE, ROWSET, SELF, SUCCESS, datarow, dataset, dictionary, module-name, identifier, identifier, function-name, identifier, macro-name, '^', '(', '['\\neclserver \\tml.mat.mul (28,1) : 3002: syntax error near "END" : expected APPLY, _ARRAY_, BIG_ENDIAN, BUILD, DICTIONARY, DISTRIBUTION, EMBEDDED, expression, EXPORT, FEATURE, IMPORT, KEYDIFF, KEYPATCH, _LINKCOUNTED_, LITTLE_ENDIAN, LOADXML, NOTIFY, OUTPUT, PACKED, PARALLEL, PATTERN, RANGE, RECORD, RETURN, ROWSET, RULE, SET, SHARED, <typename>, STREAMED, SUCCESS, TOKEN, TRANSFORM, TYPEOF, UNSIGNED, UPDATE, WAIT, WILD, <, datarow, identifier, action, constant, #CONSTANT, #OPTION, #WORKUNIT, #STORED, #LINK, #ONWARNING, '^', ';', '$'\\neclserver \\tml.mat.mul (39,5) : 3002: syntax error near ":=" : expected ';'\\neclserver \\tml.mat.mul (48,4) : 3002: syntax error near ":=" : expected ';'\\neclserver \\tml.mat.mul (52,17) : 3002: syntax error near "T" : expected DICTIONARY, RANGE, ROWSET, SELF, SUCCESS, datarow, dataset, dictionary, module-name, identifier, identifier, function-name, identifier, macro-name, '^', '(', '['\\neclserver \\tml.mat.mul (61,19) : 3002: syntax error near "mT" : expected DICTIONARY, RANGE, ROWSET, SELF, SUCCESS, datarow, dataset, dictionary, module-name, identifier, identifier, function-name, identifier, macro-name, '^', '(', '['\\neclserver \\tml.mat.mul (63,1) : 3002: syntax error near "END" : expected APPLY, _ARRAY_, BIG_ENDIAN, BUILD, DICTIONARY, DISTRIBUTION, EMBEDDED, expression, EXPORT, FEATURE, IMPORT, KEYDIFF, KEYPATCH, _LINKCOUNTED_, LITTLE_ENDIAN, LOADXML, NOTIFY, OUTPUT, PACKED, PARALLEL, PATTERN, RANGE, RECORD, RETURN, ROWSET, RULE, SET, SHARED, <typename>, STREAMED, SUCCESS, TOKEN, TRANSFORM, TYPEOF, UNSIGNED, UPDATE, WAIT, WILD, <, datarow, identifier, action, constant, #CONSTANT, #OPTION, #WORKUNIT, #STORED, #LINK, #ONWARNING, '^', ';', '$'\\neclserver \\tml.mat.mul (66,13) : 2167: Unknown identifier "Has"\\neclserver \\tml.mat.mul (67,13) : 2167: Unknown identifier "Has"\\neclserver \\tml.mat.mul (68,34) : 3002: syntax error near "." : expected ')'\\neclserver \\tml.mat.mul (70,34) : 2073: Expected boolean expression\\neclserver \\tml.mat.mul (72,16) : 2073: Expected boolean expression\\neclserver \\tml.mat.mul (72,56) : 2167: Unknown identifier "Mul_Default"\\neclserver \\tml.mat.mul (74,5) : 3002: syntax error : expected APPLY, _ARRAY_, BIG_ENDIAN, BUILD, DICTIONARY, DISTRIBUTION, EMBEDDED, expression, EXPORT, FEATURE, IMPORT, KEYDIFF, KEYPATCH, _LINKCOUNTED_, LITTLE_ENDIAN, LOADXML, NOTIFY, OUTPUT, PACKED, PARALLEL, PATTERN, RANGE, RECORD, RETURN, ROWSET, RULE, SET, SHARED, <typename>, STREAMED, SUCCESS, TOKEN, TRANSFORM, TYPEOF, UNSIGNED, UPDATE, WAIT, WILD, <, datarow, identifier, action, constant, #CONSTANT, #OPTION, #WORKUNIT, #STORED, #LINK, #ONWARNING, '^', ';', '$'\\neclserver \\tml.mat.add (5,11) : 2167: Unknown identifier "Has"\\neclserver \\tml.mat.add (6,11) : 2167: Unknown identifier "Has"\\neclserver \\tml.mat.add (7,32) : 3002: syntax error near "." : expected ')'\\neclserver \\tml.mat.add (15,32) : 2073: Expected boolean expression\\neclserver \\tml.mat.add (17,14) : 2073: Expected boolean expression\\neclserver \\tml.mat.add (5,11) : 2167: Unknown identifier "Has"\\neclserver \\tml.mat.add (6,11) : 2167: Unknown identifier "Has"\\neclserver \\tml.mat.add (7,32) : 3002: syntax error near "." : expected ')'\\neclserver \\tml.mat.add (15,32) : 2073: Expected boolean expression\\neclserver \\tml.mat.add (17,14) : 2073: Expected boolean expression\\neclserver \\tml.mat.has (19,25) : 2167: Unknown identifier "Each"\\neclserver \\tml.mat.mul (22,11) : 2167: Unknown identifier "Has"\\neclserver \\tml.mat.mul (26,18) : 3002: syntax error near "T" : expected DICTIONARY, RANGE, ROWSET, SELF, SUCCESS, datarow, dataset, dictionary, module-name, identifier, identifier, function-name, identifier, macro-name, '^', '(', '['\\neclserver \\tml.mat.mul (28,1) : 3002: syntax error near "END" : expected APPLY, _ARRAY_, BIG_ENDIAN, BUILD, DICTIONARY, DISTRIBUTION, EMBEDDED, expression, EXPORT, FEATURE, IMPORT, KEYDIFF, KEYPATCH, _LINKCOUNTED_, LITTLE_ENDIAN, LOADXML, NOTIFY, OUTPUT, PACKED, PARALLEL, PATTERN, RANGE, RECORD, RETURN, ROWSET, RULE, SET, SHARED, <typename>, STREAMED, SUCCESS, TOKEN, TRANSFORM, TYPEOF, UNSIGNED, UPDATE, WAIT, WILD, <, datarow, identifier, action, constant, #CONSTANT, #OPTION, #WORKUNIT, #STORED, #LINK, #ONWARNING, '^', ';', '$'\\neclserver \\tml.mat.mul (39,5) : 3002: syntax error near ":=" : expected ';'\\neclserver \\tml.mat.mul (48,4) : 3002: syntax error near ":=" : expected ';'\\neclserver \\tml.mat.mul (52,17) : 3002: syntax error near "T" : expected DICTIONARY, RANGE, ROWSET, SELF, SUCCESS, datarow, dataset, dictionary, module-name, identifier, identifier, function-name, identifier, macro-name, '^', '(', '['\\neclserver \\tml.mat.mul (61,19) : 3002: syntax error near "mT" : expected DICTIONARY, RANGE, ROWSET, SELF, SUCCESS, datarow, dataset, dictionary, module-name, identifier, identifier, function-name, identifier, macro-name, '^', '(', '['\\neclserver \\tml.mat.mul (63,1) : 3002: syntax error near "END" : expected APPLY, _ARRAY_, BIG_ENDIAN, BUILD, DICTIONARY, DISTRIBUTION, EMBEDDED, expression, EXPORT, FEATURE, IMPORT, KEYDIFF, KEYPATCH, _LINKCOUNTED_, LITTLE_ENDIAN, LOADXML, NOTIFY, OUTPUT, PACKED, PARALLEL, PATTERN, RANGE, RECORD, RETURN, ROWSET, RULE, SET, SHARED, <typename>, STREAMED, SUCCESS, TOKEN, TRANSFORM, TYPEOF, UNSIGNED, UPDATE, WAIT, WILD, <, datarow, identifier, action, constant, #CONSTANT, #OPTION, #WORKUNIT, #STORED, #LINK, #ONWARNING, '^', ';', '$'\\neclserver \\tml.mat.mul (66,13) : 2167: Unknown identifier "Has"\\neclserver \\tml.mat.mul (67,13) : 2167: Unknown identifier "Has"\\neclserver \\tml.mat.mul (68,34) : 3002: syntax error near "." : expected ')'\\neclserver \\tml.mat.mul (70,34) : 2073: Expected boolean expression\\neclserver \\tml.mat.mul (72,16) : 2073: Expected boolean expression\\neclserver \\tml.mat.mul (72,56) : 2167: Unknown identifier "Mul_Default"\\neclserver \\tml.mat.mul (74,5) : 3002: syntax error : expected APPLY, _ARRAY_, BIG_ENDIAN, BUILD, DICTIONARY, DISTRIBUTION, EMBEDDED, expression, EXPORT, FEATURE, IMPORT, KEYDIFF, KEYPATCH, _LINKCOUNTED_, LITTLE_ENDIAN, LOADXML, NOTIFY, OUTPUT, PACKED, PARALLEL, PATTERN, RANGE, RECORD, RETURN, ROWSET, RULE, SET, SHARED, <typename>, STREAMED, SUCCESS, TOKEN, TRANSFORM, TYPEOF, UNSIGNED, UPDATE, WAIT, WILD, <, datarow, identifier, action, constant, #CONSTANT, #OPTION, #WORKUNIT, #STORED, #LINK, #ONWARNING, '^', ';', '$'\\neclserver \\tml.mat.mul (22,11) : 2167: Unknown identifier "Has"\\neclserver \\tml.mat.mul (26,18) : 3002: syntax error near "T" : expected DICTIONARY, RANGE, ROWSET, SELF, SUCCESS, datarow, dataset, dictionary, module-name, identifier, identifier, function-name, identifier, macro-name, '^', '(', '['\\neclserver \\tml.mat.mul (28,1) : 3002: syntax error near "END" : expected APPLY, _ARRAY_, BIG_ENDIAN, BUILD, DICTIONARY, DISTRIBUTION, EMBEDDED, expression, EXPORT, FEATURE, IMPORT, KEYDIFF, KEYPATCH, _LINKCOUNTED_, LITTLE_ENDIAN, LOADXML, NOTIFY, OUTPUT, PACKED, PARALLEL, PATTERN, RANGE, RECORD, RETURN, ROWSET, RULE, SET, SHARED, <typename>, STREAMED, SUCCESS, TOKEN, TRANSFORM, TYPEOF, UNSIGNED, UPDATE, WAIT, WILD, <, datarow, identifier, action, constant, #CONSTANT, #OPTION, #WORKUNIT, #STORED, #LINK, #ONWARNING, '^', ';', '$'\\neclserver \\tml.mat.mul (39,5) : 3002: syntax error near ":=" : expected ';'\\neclserver \\tml.mat.mul (48,4) : 3002: syntax error near ":=" : expected ';'\\neclserver \\tml.mat.mul (52,17) : 3002: syntax error near "T" : expected DICTIONARY, RANGE, ROWSET, SELF, SUCCESS, datarow, dataset, dictionary, module-name, identifier, identifier, function-name, identifier, macro-name, '^', '(', '['\\neclserver \\tml.mat.mul (61,19) : 3002: syntax error near "mT" : expected DICTIONARY, RANGE, ROWSET, SELF, SUCCESS, datarow, dataset, dictionary, module-name, identifier, identifier, function-name, identifier, macro-name, '^', '(', '['\\neclserver \\tml.mat.mul (63,1) : 3002: syntax error near "END" : expected APPLY, _ARRAY_, BIG_ENDIAN, BUILD, DICTIONARY, DISTRIBUTION, EMBEDDED, expression, EXPORT, FEATURE, IMPORT, KEYDIFF, KEYPATCH, _LINKCOUNTED_, LITTLE_ENDIAN, LOADXML, NOTIFY, OUTPUT, PACKED, PARALLEL, PATTERN, RANGE, RECORD, RETURN, ROWSET, RULE, SET, SHARED, <typename>, STREAMED, SUCCESS, TOKEN, TRANSFORM, TYPEOF, UNSIGNED, UPDATE, WAIT, WILD, <, datarow, identifier, action, constant, #CONSTANT, #OPTION, #WORKUNIT, #STORED, #LINK, #ONWARNING, '^', ';', '$'\\neclserver \\tml.mat.mul (66,13) : 2167: Unknown identifier "Has"\\neclserver \\tml.mat.mul (67,13) : 2167: Unknown identifier "Has"\\neclserver \\tml.mat.mul (68,34) : 3002: syntax error near "." : expected ')'\\neclserver \\tml.mat.mul (70,34) : 2073: Expected boolean expression\\neclserver \\tml.mat.mul (72,16) : 2073: Expected boolean expression\\neclserver \\tml.mat.mul (72,56) : 2167: Unknown identifier "Mul_Default"\\neclserver \\tml.mat.mul (74,5) : 3002: syntax error : expected APPLY, _ARRAY_, BIG_ENDIAN, BUILD, DICTIONARY, DISTRIBUTION, EMBEDDED, expression, EXPORT, FEATURE, IMPORT, KEYDIFF, KEYPATCH, _LINKCOUNTED_, LITTLE_ENDIAN, LOADXML, NOTIFY, OUTPUT, PACKED, PARALLEL, PATTERN, RANGE, RECORD, RETURN, ROWSET, RULE, SET, SHARED, <typename>, STREAMED, SUCCESS, TOKEN, TRANSFORM, TYPEOF, UNSIGNED, UPDATE, WAIT, WILD, <, datarow, identifier, action, constant, #CONSTANT, #OPTION, #WORKUNIT, #STORED, #LINK, #ONWARNING, '^', ';', '$'\\neclserver \\t(19,23) : 2171: Object 'mat' does not have a member named 'has'\\neclserver \\t(19,23) : 2167: Unknown identifier "Has"\", \"post_time\": \"2013-12-02 18:34:44\" },\n\t{ \"post_id\": 5002, \"topic_id\": 1028, \"forum_id\": 23, \"post_subject\": \"Re: Machine Learning (ML) Usage Examples\", \"username\": \"tlhumphrey2\", \"post_text\": \"tdelbecque,\\n\\nYou getting this error doesn't make sense to me, either. But, would you provide all the error lines you got (in you 1st post you mentioned that you didn't include all the lines.).\", \"post_time\": \"2013-12-02 18:23:17\" },\n\t{ \"post_id\": 5001, \"topic_id\": 1028, \"forum_id\": 23, \"post_subject\": \"Re: Machine Learning (ML) Usage Examples\", \"username\": \"tdelbecque\", \"post_text\": \"Hello,\\n\\nThere is really nothing else than the content of Use_ML.ML.Mat.Has.Stats.ecl, that I have cut and pasted in an empty builder. I know that there is no reference to ml.mat.mul there, this is why I find this rather strange. Adding the IMPORT do not solve it.\\n\\nThanks.\", \"post_time\": \"2013-12-02 18:14:36\" },\n\t{ \"post_id\": 5000, \"topic_id\": 1028, \"forum_id\": 23, \"post_subject\": \"Re: Machine Learning (ML) Usage Examples\", \"username\": \"tlhumphrey2\", \"post_text\": \"tdelbecque,\\n\\nYour error message, "Error: Unknown identifier "Has" (22, 11 - ml.mat.mul)", indicates that the error occurred on line 22 of ml.mat.mul.\\n\\nI looked at the code of "Use_ML.ML.Mat.Has.Stats.ecl" and there isn't a reference to ml.mat.mul in it. So, you must have other code than just that of "Use_ML.ML.Mat.Has.Stats.ecl". But, with that said, try placing the following line at the top of the code and see if the error goes away:\\n\\nIMPORT * FROM ML.Mat;\\n\\ntlhumphrey2\", \"post_time\": \"2013-12-02 18:08:25\" },\n\t{ \"post_id\": 4995, \"topic_id\": 1028, \"forum_id\": 23, \"post_subject\": \"Re: Machine Learning (ML) Usage Examples\", \"username\": \"tdelbecque\", \"post_text\": \"Hello,\\n\\nI am trying to run your example:\\n\\nUse_ML.ML.Mat.Has.Stats.ecl\\n\\nbut I keep getting this error:\\n\\nError: Unknown identifier "Has" (22, 11 - ml.mat.mul)\\n...(I don't copy the whole bunch of lines)\\n\\nwhich I find weird. I have installed the latest ML package, and I am able to run ML algorithms such as Kmeans for example, so I don't think this is an installation problem. \\n\\nI have spent some amount of time to understand what was going on, but I did not find the problem. Did you ever run into this problem, or do you have some kind of idea about the problem ?\\n\\nThanks in advance,\\n\\nBests, Thierry.\", \"post_time\": \"2013-12-02 17:24:43\" },\n\t{ \"post_id\": 4982, \"topic_id\": 1028, \"forum_id\": 23, \"post_subject\": \"Re: Machine Learning (ML) Usage Examples\", \"username\": \"tlhumphrey2\", \"post_text\": \"24 new machine learning library usage examples have been added to those already in https://github.com/hpcc-systems/ecl-sam ... geExamples. Plus, all of those examples already there that had syntax errors have been corrected.\", \"post_time\": \"2013-11-30 20:14:14\" },\n\t{ \"post_id\": 4720, \"topic_id\": 1028, \"forum_id\": 23, \"post_subject\": \"Re: Machine Learning (ML) Usage Examples\", \"username\": \"tlhumphrey2\", \"post_text\": \"If anyone has questions about the ML usage examples I've placed on github, please post here. I now have a feed to the Machine Learning forum and should get instant notification when someone posts.\", \"post_time\": \"2013-10-01 15:04:27\" },\n\t{ \"post_id\": 4560, \"topic_id\": 1028, \"forum_id\": 23, \"post_subject\": \"Machine Learning (ML) Usage Examples\", \"username\": \"tlhumphrey2\", \"post_text\": \"I have placed Machine Learning (ML) example programs on github: https://github.com/hpcc-systems/ecl-samples. They are in the folder MLUsageExamples.\\n\\nThese are small example ECL programs that illustrate how to use the functions and macros in the Machine Learning (ML) library. Each of these examples are stand-alone except for the need of ML. To execute any of them, you only need the ML library in your repository.\\n\\nThere are more than 200 example programs. Furthermore, the number of example programs will increase over time. I will post again when I add additional programs.\\n\\nIn MLUsageExamples, there is a README.txt file that tells a little about the organization of these example programs.\\n\\nORGANIZATION\\n\\nMLUsageExamples is organized like the ML library's folder structure. For example an example of how to use ML.Mat.Add will be found in the Mat folder because ML.Mat.Add.ecl is in the ML/Mat folder of the ML library. Furthermore, modules of the ML library will have examples in a folder named after the module. For example, examples of how to use the functions of the ML.Mat.Each.ecl module will be found in the folder MLUsageExamples/Mat/Each.\", \"post_time\": \"2013-09-11 17:34:56\" },\n\t{ \"post_id\": 4988, \"topic_id\": 1135, \"forum_id\": 23, \"post_subject\": \"How to sample points from a multivarete t - Distribution\", \"username\": \"Tankatanka\", \"post_text\": \"I was wondering if there is any Matlab implementation that sample points from a multivariate t-distribution. I have checked here but could not find any. Does any one knows how to do this or have a matlab implementation of it? I would also like someone to point me to resources also where i can read about parameter estimation(ML) of a multivariate t-dsitribution. I know one can use EM algorithm to do the estimation, do any one have a very good tutorials on how to do this with EM?\\n\\nThanks.\", \"post_time\": \"2013-12-02 09:50:17\" },\n\t{ \"post_id\": 5155, \"topic_id\": 1182, \"forum_id\": 23, \"post_subject\": \"How well do the ML functions work?\", \"username\": \"tlhumphrey2\", \"post_text\": \"I'm trying to determine how well the following ML functions work on large matrices.\\n\\n Random Forests, \\n Decision Trees,\\n Linear Regression,\\n Logistic Regression,\\n Arima, Naïve Bayes,\\n K-Mean, KD Trees,\\n Agglomerative/Hierarchical,\\n SVD, \\n PCA\\n\\nIf you have used any of the above ML functions with large matrices, would you respond by answering the following questions.\\n\\n 1. How large were the matrices?\\n 2. Did the ML function(s) work as expected?\\n 3. If they didn't work as you expected, e.g. they were too slow or you got errors, how did they malfunction?\\n\\nThank you\", \"post_time\": \"2014-01-24 21:26:39\" },\n\t{ \"post_id\": 5321, \"topic_id\": 1227, \"forum_id\": 23, \"post_subject\": \"Machine Learning Java Development Job in London\", \"username\": \"catqualserv\", \"post_text\": \"Hi everyone, \\n\\nI am currently recruiting for a Machine Learning Java Developer in London. It is a great opportunity to work for the world leader of intent based solutions. \\n\\nThey are looking for someone to join their small team of high calibre developers, designers and machine-learning experts to solve difficult yet interesting problems. \\n\\nYou will need to have a 2:1 degree or above in a field such as Engineering, Computer Science, Statistics or Maths, solid Java programming skills and Machine Learning expertise in one or more of the following areas:\\n\\nReinforment Learning, Bandit Algorithms, Linear/Logistic Regression, Decision Trees, Multivariate Regression, Bayesian Methods, Statistical Methods or General online learning methods. \\n\\nYou will also need to have hands on experience applying Machine Learning Techniques. \\n\\nFor more information please reply to this message, or email catherine.scambler@qualserv.co.uk\", \"post_time\": \"2014-03-04 13:56:02\" },\n\t{ \"post_id\": 5381, \"topic_id\": 1239, \"forum_id\": 23, \"post_subject\": \"Types of Machine Learning Problems\", \"username\": \"Pavani\", \"post_text\": \"Hi,\\n\\nI found this blog ''BEyond'' where the author explains about different types of Machine Learning problems with examples. He also explained very clearly the thousand feet view of Machine Learning. And the lecture is very interesting. People who really want to know more about Machine Learning can view this video with the link below. It would be really a great help to you.\\n\\nhttp://beyond.insofe.edu.in/fast-track- ... -problems/\\n\\nAll The Best \", \"post_time\": \"2014-03-12 07:40:14\" },\n\t{ \"post_id\": 5519, \"topic_id\": 1281, \"forum_id\": 23, \"post_subject\": \"Very well explained Machine Learning Algorithms in these Vid\", \"username\": \"nishu.intellipaat\", \"post_text\": \"Hi All,\\n\\nFound good video tutorial for Machine Learning and Mahout. Hope this will be helpful to all.\\n\\nURL – http://tinyurl.com/jwf3u9g\", \"post_time\": \"2014-04-21 12:48:30\" },\n\t{ \"post_id\": 7015, \"topic_id\": 1338, \"forum_id\": 23, \"post_subject\": \"Re: Problem with SVD and PCA in ML?\", \"username\": \"AdhoreHum2no\", \"post_text\": \"The first is telling you that your compiler is a different version than the target server, and the others are simply optimizing messages on JOINs performed by the compiler\\n\\n\\n_________________\\nhttp://www.test-king.com/cert-RHCE.htm\\nhttp://www.tabor.edu/\\nhttp://test-king.com/vendor-Oracle.htm\\nhttp://hsbc.edu/\\nhttp://www.callutheran.edu/\", \"post_time\": \"2015-02-24 06:04:55\" },\n\t{ \"post_id\": 5851, \"topic_id\": 1338, \"forum_id\": 23, \"post_subject\": \"Re: Problem with SVD and PCA in ML?\", \"username\": \"tlhumphrey2\", \"post_text\": \"We have done testing of SVD on large matrices and performance is slow. We plan to re-implement SVD using PBBlas which lets one do matrice operations after the matrices have been partiioned in blocks and the blocks distributed.\\n\\nThe new implementation will enable us to have perform most matrix operations locally (on individual nodes of a thor cluster).\", \"post_time\": \"2014-06-09 14:29:23\" },\n\t{ \"post_id\": 5848, \"topic_id\": 1338, \"forum_id\": 23, \"post_subject\": \"Re: Problem with SVD and PCA in ML?\", \"username\": \"sunil3loq\", \"post_text\": \"Thank you!\\n\\nThe code is running in version rc12.\\n\\nOn a side note, you said ' both SVD and PCA need to be tuned so they run faster'. Can you throw more light on what is meant by tuning?\\n\\nThanks in advance,\\nSunil\", \"post_time\": \"2014-06-09 06:50:36\" },\n\t{ \"post_id\": 5820, \"topic_id\": 1338, \"forum_id\": 23, \"post_subject\": \"Re: Problem with SVD and PCA in ML?\", \"username\": \"tlhumphrey2\", \"post_text\": \"You may need to get a more recent HPCC platform. Because I know they made a change to the platform that fixed problems with SVD and PCA. I believe the fix was in version 4.2.2 rc10.\", \"post_time\": \"2014-06-03 14:35:58\" },\n\t{ \"post_id\": 5819, \"topic_id\": 1338, \"forum_id\": 23, \"post_subject\": \"Re: Problem with SVD and PCA in ML?\", \"username\": \"tlhumphrey2\", \"post_text\": \"I just ran the same code on a 20 node thor cluster and got good output results. total thor time was 47 seconds.\\n\\nBy the way, both SVD and PCA need to be tuned so they run faster. But, they are giving me the correct results.\", \"post_time\": \"2014-06-03 13:48:01\" },\n\t{ \"post_id\": 5818, \"topic_id\": 1338, \"forum_id\": 23, \"post_subject\": \"Re: Problem with SVD and PCA in ML?\", \"username\": \"bforeman\", \"post_text\": \"In my tests I am not seeing that:\\n\\n
x y value\\n\\nUmat:\\n1\\t1\\t0.5354048905103566\\n2\\t2\\t-0.447944920844673\\n3\\t2\\t0.7959365909975218\\n3\\t1\\t0.6053799989352813\\n1\\t2\\t-0.4072226553133227\\n2\\t1\\t0.5889453795613923\\n\\n\\nSmat:\\n1\\t1\\t10.05304674560946\\n2\\t2\\t0.4198227371058382\\n\\nVmat:\\n\\n1\\t1\\t0.397990352021638\\n2\\t2\\t-0.5537641296810254\\n3\\t2\\t-0.0431935002001337\\n3\\t1\\t0.7417840000174529\\n1\\t2\\t0.8315525300419226\\n2\\t1\\t0.5397778960051997\\n
\\nI downloaded the very latest ML libraries from the HPCC Systems web site, and my compiler version is:\\n\\nVersion:\\tcommunity_4.2.4-1\\nServer:\\t\\tinternal_4.2.2-1\\nCompiler:\\t4.2.2 community_4.2.2-1\\n\\nBy the way, my total THOR time was only 18 seconds on a three-node THOR, so I'm not sure what is going on with your cluster.\\n\\nHope this helps!\\n\\nBob\", \"post_time\": \"2014-06-03 13:33:21\" },\n\t{ \"post_id\": 5817, \"topic_id\": 1338, \"forum_id\": 23, \"post_subject\": \"Re: Problem with SVD and PCA in ML?\", \"username\": \"sunil3loq\", \"post_text\": \"Thanks Bob!\\n\\nBut Why are the resulting matrices have all 'nan' values?\", \"post_time\": \"2014-06-03 03:36:41\" },\n\t{ \"post_id\": 5801, \"topic_id\": 1338, \"forum_id\": 23, \"post_subject\": \"Re: Problem with SVD and PCA in ML?\", \"username\": \"bforeman\", \"post_text\": \"The warnings that you show are not critical. The first is telling you that your compiler is a different version than the target server, and the others are simply optimizing messages on JOINs performed by the compiler. It should not affect your results or performance.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-06-02 12:23:24\" },\n\t{ \"post_id\": 5798, \"topic_id\": 1338, \"forum_id\": 23, \"post_subject\": \"Problem with SVD and PCA in ML?\", \"username\": \"sunil3loq\", \"post_text\": \"Can someone direct me as why the following is happening and what is the best step for me to take?\\n\\nI tried to execute sample codes given in the examples of the ML document. Strangely I seem to get warnings and resultant set contained values as nan.\\n\\nThe code I ran-\\nimport ML;\\n\\nA := dataset([{1,1,2.0},{1,2,3.0},{1,3,4.0},\\n{2,1,2.2}, {2,2,3.3},{2,3,4.4},\\n{3,1,2.7},{3,2,3.1}, {3,3,4.5}], ML.Mat.Types.Element);\\n\\noutput(a,named('mainmat'));\\n\\numat := ML.Mat.Svd(A).UComp;\\noutput(umat,named('umat'));\\nsmat := ML.Mat.Svd(A).SComp;\\noutput(smat,named('smat'));\\nvmat := ML.Mat.Svd(A).VComp;\\noutput(vmat,named('vmat'));\\n\\nResults-\\n\\neclcc: unknown(0,0): Warning C3118: Mismatch in subminor version number (4.2.4 v 4.2.2)\\neclcc: /opt/HPCCSystems/share/ecllibrary/ML/Mat/Mul.ecl(13,8): Warning C4531: JOIN condition folded to constant, converting to an ALL join\\neclcc: /opt/HPCCSystems/share/ecllibrary/ML/Mat/Mul.ecl(13,8): Warning C4531: JOIN condition folded to constant, converting to an ALL join\\neclcc: /opt/HPCCSystems/share/ecllibrary/ML/Mat/Mul.ecl(13,8): Warning C4531: JOIN condition folded to constant, converting to an ALL join\\neclcc: /opt/HPCCSystems/share/ecllibrary/ML/Mat/Mul.ecl(13,8): Warning C4531: JOIN condition folded to constant, converting to an ALL join\\n0 error(s), 5 warning(s)\\n\\nThe value column of umat, smat and vmat is all nan. On the top of it the code too 12 mins to run!\\n\\nSimilar is the situation when you use PCA.\\n\\nRegards\\nSunil\", \"post_time\": \"2014-06-02 07:23:40\" },\n\t{ \"post_id\": 5826, \"topic_id\": 1342, \"forum_id\": 23, \"post_subject\": \"Re: SVM Classifier\", \"username\": \"bforeman\", \"post_text\": \"If I may add, if you have the C++ source code, you can use the BEGINC++ embed structure to place it inline with your ECL.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-06-03 17:47:36\" },\n\t{ \"post_id\": 5824, \"topic_id\": 1342, \"forum_id\": 23, \"post_subject\": \"Re: SVM Classifier\", \"username\": \"tlhumphrey2\", \"post_text\": \"Sorry, no we don't. But, this is open source so feel free to make one.\", \"post_time\": \"2014-06-03 16:45:46\" },\n\t{ \"post_id\": 5822, \"topic_id\": 1342, \"forum_id\": 23, \"post_subject\": \"SVM Classifier\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nDo we have a SVM Classifier module available in HPCC ML..? Tried searching the same in Github , but couldnt find it.\\n\\nKindly share if there is any available for SVM Classifier along with any examples. \\n\\nCan we call the C++ SVM library source code directly from HPCC ?\\n\\nKindly advise.\\n\\nThanks,\\nViswa\", \"post_time\": \"2014-06-03 16:18:21\" },\n\t{ \"post_id\": 6868, \"topic_id\": 1581, \"forum_id\": 23, \"post_subject\": \"Re: Is there online video sessions regarding the ML IN ECL?\", \"username\": \"pius_francis\", \"post_text\": \"Currently i am starting to learn ML in ECL. So i thought there might me video sessions as in THOR ROXIE and SALT. Surely will get back if i have any queries.\", \"post_time\": \"2015-01-28 05:47:47\" },\n\t{ \"post_id\": 6863, \"topic_id\": 1581, \"forum_id\": 23, \"post_subject\": \"Re: Is there online video sessions regarding the ML IN ECL?\", \"username\": \"tlhumphrey2\", \"post_text\": \"Currently there aren't any videos.\\n\\nIs there are particular issue/problem that we can help you with?\", \"post_time\": \"2015-01-27 14:40:59\" },\n\t{ \"post_id\": 6861, \"topic_id\": 1581, \"forum_id\": 23, \"post_subject\": \"Is there online video sessions regarding the ML IN ECL?\", \"username\": \"pius_francis\", \"post_text\": \"Please let me know if there is online video sessions to learn Machine Learning in ECL. Currently i am going through the machinelearning pdf provided in hpccsystems website.\", \"post_time\": \"2015-01-27 09:25:56\" },\n\t{ \"post_id\": 7260, \"topic_id\": 1643, \"forum_id\": 23, \"post_subject\": \"Re: Multilayer perceptron\", \"username\": \"chennapooja\", \"post_text\": \"Dear Team,\\n\\n Any update here please. Thanks in advance.\\n Also I would like to know which kind of deep learning is implemented, Is it like Deep Belief Network or Boltzmann machine or convolutional networks etc...Are there any further proposals in the deep learning.\\n\\nThanks and Regards,\\nPooja.\", \"post_time\": \"2015-04-06 18:11:18\" },\n\t{ \"post_id\": 7232, \"topic_id\": 1643, \"forum_id\": 23, \"post_subject\": \"Re: Multilayer perceptron\", \"username\": \"chennapooja\", \"post_text\": \"Dear Tim,\\n\\n Even for the example NeuralNetworks_test.ecl, I am getting same error. To ssh the log, should I have some secure key or user id, password? How can I check the log, please update. Thanks in advance.\\n\\nRegards,\\nPooja.\", \"post_time\": \"2015-03-31 15:57:51\" },\n\t{ \"post_id\": 7221, \"topic_id\": 1643, \"forum_id\": 23, \"post_subject\": \"Re: Multilayer perceptron\", \"username\": \"chennapooja\", \"post_text\": \"Thanks Tim, will check it out.\", \"post_time\": \"2015-03-29 19:54:42\" },\n\t{ \"post_id\": 7220, \"topic_id\": 1643, \"forum_id\": 23, \"post_subject\": \"Re: Multilayer perceptron\", \"username\": \"tlhumphrey2\", \"post_text\": \"chennapooja,\\n\\nSorry for the late response. I haven't been around for a week.\\n\\nYou need to attach the eclcc.log if you want help with this specific problem. If you decide to go this route, also let me know if you can ssh into the THOR node where the eclcc.log file exists, i.e. 192.168.56.129. \\n\\nBut if you are just interested in testing ML.NeuralNetworks, look at the test case ML.Tests.Explanatory.NeuralNetworks_test.ecl.\\n\\nIf you are using ECL IDE, open the folder, ML/Tests/Explanatory, and right click on NeuralNetworks_test and select "Open in Builder Window". This puts NeuralNetworks_test in a builder window so you can execute it.\\n\\nThen, you only have to "Submit" it to have it executed.\\n\\nAlso, you might want to compare NeuralNetworks_test to your code. You probably will learn something that will help you solve your problem.\\n\\nTim\", \"post_time\": \"2015-03-29 19:20:34\" },\n\t{ \"post_id\": 7218, \"topic_id\": 1643, \"forum_id\": 23, \"post_subject\": \"Re: Multilayer perceptron\", \"username\": \"chennapooja\", \"post_text\": \"Dear team,\\n\\n Please throw me some light on above questions.\\n\\nThanks and Regards,\\nPooja.\", \"post_time\": \"2015-03-27 22:40:32\" },\n\t{ \"post_id\": 7168, \"topic_id\": 1643, \"forum_id\": 23, \"post_subject\": \"Re: Multilayer perceptron\", \"username\": \"chennapooja\", \"post_text\": \"Dear Team,\\n\\n Please check if there is something wrong in below code as I am getting error "Error: Compile/Link failed for W20150322-004815 (see '//192.168.56.129/var/lib/HPCCSystems/myeclccserver/eclcc.log' for details) (0, 0), 3000, W20150322-004815"\\n\\n I am trying to test neural networks - multi layer perceptron.\\n <code>IMPORT * FROM ML;\\nIMPORT ML.Mat;\\n//IMPORT ML.Tests.Explanatory as TE;\\n\\n//This is the tennis-weather dataset transformed to discrete number values.\\nmlpRecord := RECORD\\n\\tTypes.t_RecordID id;\\n\\tTypes.t_FieldNumber layer_num;\\n\\tTypes.t_FieldNumber default;\\n\\tTypes.t_FieldNumber nodes;\\nEND;\\n\\nmlp_Data := DATASET([\\n{1,1,1,4},\\n{2,2,1,2},\\n{3,3,1,5}],\\nmlpRecord);\\n\\nweatherRecord := RECORD\\n\\tTypes.t_RecordID id;\\n\\tTypes.t_FieldNumber outlook;\\n\\tTypes.t_FieldNumber temperature;\\n\\tTypes.t_FieldNumber humidity;\\n\\tTypes.t_FieldNumber windy;\\n\\tTypes.t_FieldNumber play;\\nEND;\\n\\nweather_Data := DATASET([\\n{1,0,0,1,0,0},\\n{2,0,0,1,1,0},\\n{3,1,0,1,0,1},\\n{4,2,1,1,0,1},\\n{5,2,2,0,0,1},\\n{6,2,2,0,1,0},\\n{7,1,2,0,1,1},\\n{8,0,1,1,0,0},\\n{9,0,2,0,0,1},\\n{10,2,1,0,0,1},\\n{11,0,1,0,1,1},\\n{12,1,1,1,1,1},\\n{13,1,0,0,0,1},\\n{14,2,1,1,1,0}],\\nweatherRecord);\\n\\nindep_data1:= TABLE(weather_Data,{id, outlook, temperature, humidity, windy});\\ndep_data:= TABLE(weather_Data,{id, play});\\n\\nToField(indep_data1, pr_indep1);\\nindep1 := ML.Discretize.ByRounding(pr_indep1);\\nToField(dep_data, pr_dep);\\ndep := ML.Discretize.ByRounding(pr_dep);\\n\\nindep_data:= TABLE(mlp_Data,{id, layer_num, default, nodes});\\n\\nToField(indep_data, pr_indep);\\nindep := ML.Discretize.ByRounding(pr_indep);\\n\\n\\ntrainer2:= ML.NeuralNetworks(indep); // Unpruned\\nmodel2:= trainer2.NNLearn(pr_indep1, pr_dep, trainer2.IntWeights,trainer2.IntBias);\\nresults2:= trainer2.NNClassify(pr_indep1, model2);\\n\\n//trainer2;\\nOUTPUT(model2, NAMED('Model2'));\\n//OUTPUT(SORT(trainer2.Model(model2), level, node_id), NAMED('DecTree_2'), ALL);\\nresults2;</code>\\n\\nThanks and Regards,\\nPooja.\", \"post_time\": \"2015-03-23 15:16:17\" },\n\t{ \"post_id\": 7162, \"topic_id\": 1643, \"forum_id\": 23, \"post_subject\": \"Re: Multilayer perceptron\", \"username\": \"chennapooja\", \"post_text\": \"Dear Tlhumphrey,\\n\\n Thanks for the information. I did not have that in my local copy. I will go through it and post if I face any confusions.\\n\\n It will be grateful if any documents related to the algorithm development are shared.\\n\\nThanks and Regards,\\nPooja.\", \"post_time\": \"2015-03-20 21:57:46\" },\n\t{ \"post_id\": 7160, \"topic_id\": 1643, \"forum_id\": 23, \"post_subject\": \"Re: Multilayer perceptron\", \"username\": \"tlhumphrey2\", \"post_text\": \"We have a multilayer neural network. See ML.NeuralNetworks.ecl.\\n\\nIt is fairly new. So, you may not have it in your local copy of the ecl-ml repository. But, it is in the github repository.\", \"post_time\": \"2015-03-19 13:58:40\" },\n\t{ \"post_id\": 7153, \"topic_id\": 1643, \"forum_id\": 23, \"post_subject\": \"Multilayer perceptron\", \"username\": \"chennapooja\", \"post_text\": \"Hello,\\n\\n I have this idea of implementing multi layer perceptron, I have just checked how perceptron is working. I need some guidance and inputs for starting with it. Please provide if pre requisites any are required, like if I need to know Roxie for it or can I start with only ecl. \\n Also please suggest if it can be done for Google summer competition, or proposals have to be submitted only for the listed topics.\\n\\nThanks,\\nPooja.\", \"post_time\": \"2015-03-19 00:11:54\" },\n\t{ \"post_id\": 7219, \"topic_id\": 1655, \"forum_id\": 23, \"post_subject\": \"Re: First ML Program - Stuck with an Error\", \"username\": \"bforeman\", \"post_text\": \"Hi Subbu,\\n\\nWell, it's not the same error, but a similar error \\nThe call to KMeans is expecting a DATASET using the Types.NumericField format, which is defined as UNSIGNED, UNSIGNED4, REAL8. The DATASET passed has a typo I believe, and when I correct the typo (dDocumentsMatrix to ddocumentMatrix) and change the lMatrix definition to match the Types.NumericField format:\\n\\n
lMatrix:=ML.Types.NumericField;
\\n\\nThen the example will compile and run correctly.\\n\\nI have included the corrected code for your convenience.\\n\\nIMPORT ML;\\n\\nlMatrix:=ML.Types.NumericField;\\n\\ndDocumentMatrix:=DATASET([\\n{1,2.4639,7.8579},\\n{2,0.5573,9.4681},\\n{3,4.6054,8.4723},\\n{4,1.24,7.3835},\\n{5,7.8253,4.8205},\\n{6,3.0965,3.4085},\\n{7,8.8631,1.4446},\\n{8,5.8085,9.1887},\\n{9,1.3813,0.515},\\n{10,2.7123,9.2429},\\n{11,6.786,4.9368},\\n{12,9.0227,5.8075},\\n{13,8.55,0.074},\\n{14,1.7074,3.9685},\\n{15,5.7943,3.4692},\\n{16,8.3931,8.5849},\\n{17,4.7333,5.3947},\\n{18,1.069,3.2497},\\n{19,9.3669,7.7855},\\n{20,2.3341,8.5196},\\n{21,0.5004,2.2394},\\n{22,6.5147,1.8744},\\n{23,5.1284,2.0043},\\n{24,3.555,1.3365},\\n{25,1.9224,8.0774},\\n{26,6.6664,9.9721},\\n{27,2.5007,5.2815},\\n{28,8.7526,6.6125},\\n{29,0.0898,3.9292},\\n{30,1.2544,9.5753},\\n{31,1.5462,8.4605},\\n{32,3.723,4.1098},\\n{33,9.8581,8.0831},\\n{34,4.0208,2.7462},\\n{35,4.6232,1.3271},\\n{36,1.5694,2.168},\\n{37,1.8174,4.779},\\n{38,9.2858,3.3175},\\n{39,7.1321,2.2322},\\n{40,2.9921,3.2818},\\n{41,7.0561,9.2796},\\n{42,1.4107,2.6271},\\n{43,5.1149,8.3582},\\n{44,6.8967,7.6558},\\n{45,0.0982,8.2855},\\n{46,1.065,4.9598},\\n{47,0.3701,3.7443},\\n{48,3.1341,8.8177},\\n{49,3.1314,7.3348},\\n{50,9.6476,3.3575},\\n{51,6.1636,5.3563},\\n{52,8.9044,7.8936},\\n{53,9.7695,9.6457},\\n{54,2.3383,2.229},\\n{55,5.9883,9.3733},\\n{56,9.3741,4.4313},\\n{57,8.4276,2.9337},\\n{58,8.2181,1.0951},\\n{59,3.2603,6.9417},\\n{60,3.0235,0.8046},\\n{61,1.0006,9.4768},\\n{62,8.5635,9.2097},\\n{63,5.903,7.6075},\\n{64,4.3534,7.5549},\\n{65,8.2062,3.453},\\n{66,9.0327,8.9012},\\n{67,8.077,8.6283},\\n{68,4.7475,5.5387},\\n{69,2.4441,7.106},\\n{70,8.1469,1.1593},\\n{71,5.0788,5.315},\\n{72,5.1421,9.8605},\\n{73,7.7034,2.019},\\n{74,3.5393,2.2992},\\n{75,2.804,1.3503},\\n{76,4.7581,2.2302},\\n{77,2.6552,1.7776},\\n{78,7.4403,5.5851},\\n{79,2.6909,9.7426},\\n{80,7.2932,5.4318},\\n{81,5.7443,4.3915},\\n{82,3.3988,9.8385},\\n{83,2.5105,3.6425},\\n{84,4.3386,4.9175},\\n{85,6.5916,5.7468},\\n{86,2.7913,7.4308},\\n{87,9.3152,5.4451},\\n{88,9.3501,3.9941},\\n{89,1.7224,4.6733},\\n{90,6.6617,1.6269},\\n{91,3.0622,1.9185},\\n{92,0.6733,2.4744},\\n{93,1.355,1.0267},\\n{94,3.75,9.499},\\n{95,7.2441,0.5949},\\n{96,3.3434,4.9163},\\n{97,8.7538,5.3958},\\n{98,7.4316,2.6315},\\n{99,3.6239,5.3696},\\n{100,3.2393,3.0533}\\n],lMatrix);\\n\\n// dDocumentMatrix := DATASET('~tmp::keren::randomTest',{UNSIGNED ID, REAL A, REAL B, REAL C, REAL D, REAL E}, Flat);\\n\\ndCentroidMatrix:=DATASET([\\n{1,1,1,1,1,1},\\n{2,2,2,2,2,2},\\n{3,3,3,3,3,3},\\n{4,4,4,4,4,4}\\n],{UNSIGNED ID, REAL A, REAL B, REAL C, REAL D, REAL E});\\n\\n//ML.ToField(dDocumentMatrix,dDocuments);\\nML.ToField(dCentroidMatrix,dCentroids);\\n\\n // EXAMPLES\\nKMeans:=ML.Cluster.KMeans(dDocumentMatrix,dCentroids,30,.3); // Set up KMeans with a maximum of 30 iterations and .3 as a convergence threshold\\nKMeans.Allresults; // The table that contains the results of each iteration\\nKMeans.Convergence; // The number of iterations it took to converge\\nKMeans.Result(12); // The results of iteration 12\\nKMeans.Delta(5,15); // The distance every centroid travelled across each axis from iterations 5 to 15\\nKMeans.Delta(0); // The total distance the centroids travelled on each axis\\nKMeans.DistanceDelta(5,15); // The straight-line distance travelled by each centroid from iterations 5 to 15\\nKMeans.DistanceDelta(0); // The total straight-line distance each centroid travelled \\nKMeans.DistanceDelta();
\", \"post_time\": \"2015-03-28 14:55:53\" },\n\t{ \"post_id\": 7217, \"topic_id\": 1655, \"forum_id\": 23, \"post_subject\": \"Re: First ML Program - Stuck with an Error\", \"username\": \"kps_mani\", \"post_text\": \"I have attached the folder directory for your reference. I was trying to run the Cluster_Doc file which is available in the TESTS folder. It also threw the same error. Please look into it and let me know what needs to be done here.\\n\\nRegards,\\nSubbu\", \"post_time\": \"2015-03-27 19:57:54\" },\n\t{ \"post_id\": 7203, \"topic_id\": 1655, \"forum_id\": 23, \"post_subject\": \"Re: First ML Program - Stuck with an Error\", \"username\": \"bforeman\", \"post_text\": \"I believe it is how your ML folders are organized. This code compiles file using the folder organization attached below:\\n\\nIMPORT ML;\\n//Define my record layout\\nMyRecordLayout := RECORD\\n UNSIGNED RecordId;\\n REAL XCoordinate;\\n REAL YCoordinate;\\n END;\\n//My dataset\\nX2 := DATASET([{1, 1, 5},\\n {2, 5, 7},\\n {3, 8, 1},\\n {4, 0, 0},\\n {5, 9, 3},\\n {6, 1, 4},\\n {7, 9, 4}], MyRecordLayout);\\n//Three candidate centroids\\nCentroidCandidates := DATASET([{1, 1, 5},{2, 5, 7},{3, 9, 4}], MyRecordLayout);\\n//Convert them to our internal field format\\nML.ToField(X2,fX2);\\nml.ToField(CentroidCandidates, fCentroidCandidates);\\n//Run K-Means for, at most, 10 iterations and stop if delta < 0.3 between iterations\\nfX3 := ML.Cluster.Kmeans(fX2, fCentroidCandidates, 10, 0.3);\\n//Convert the final centroids to the original layout\\nml.FromField(fX3.result(), MyRecordLayout, X3);\\n//Display the results\\nOUTPUT(X3);
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-03-26 12:43:06\" },\n\t{ \"post_id\": 7197, \"topic_id\": 1655, \"forum_id\": 23, \"post_subject\": \"First ML Program - Stuck with an Error\", \"username\": \"kps_mani\", \"post_text\": \"Hi,\\nI am trying to write my first ML program with the below BWR file. I am getting the below error. Can you please help?\\n\\nError: Unknown identifier before "." (expected :=) (25, 3), 2167, C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\MachineLearning\\\\BWR_TestML.ecl\\nError: Unknown identifier before "." (expected :=) (26, 3), 2167, C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\MachineLearning\\\\BWR_TestML.ecl\\nError: Unknown identifier "Kmeans" (28, 8), 2167, C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\MachineLearning\\\\BWR_TestML.ecl\\nError: Unknown identifier "X3" (32, 8), 2167, C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\MachineLearning\\\\BWR_TestML.ecl\\n\\n\\nIMPORT * FROM ML;\\nIMPORT * FROM ML.Cluster;\\nIMPORT * FROM ML.Types;\\n//Define my record layout\\nMyRecordLayout := RECORD\\nUNSIGNED RecordId;\\nREAL XCoordinate;\\nREAL YCoordinate;\\nEND;\\n//My dataset\\nX2 := DATASET([\\n{1, 1, 5},\\n{2, 5, 7},\\n{3, 8, 1},\\n{4, 0, 0},\\n{5, 9, 3},\\n{6, 1, 4},\\n{7, 9, 4}], MyRecordLayout);\\n//Three candidate centroids\\nCentroidCandidates := DATASET([\\n{1, 1, 5},\\n{2, 5, 7},\\n{3, 9, 4}], MyRecordLayout);\\n//Convert them to our internal field format\\nml.ToField(X2, fX2);\\nml.ToField(CentroidCandidates, fCentroidCandidates);\\n//Run K-Means for, at most, 10 iterations and stop if delta < 0.3 between iterations\\nfX3 := Kmeans(fX2, fCentroidCandidates, 10, 0.3);\\n//Convert the final centroids to the original layout\\nml.FromField(fX3.result(), MyRecordLayout, X3);\\n//Display the results\\nOUTPUT(X3);\", \"post_time\": \"2015-03-25 17:51:09\" },\n\t{ \"post_id\": 7386, \"topic_id\": 1662, \"forum_id\": 23, \"post_subject\": \"Re: Unable to run ML code\", \"username\": \"tlhumphrey2\", \"post_text\": \"Does anyone know how to get a readable copy of the thormaster log off of the HPCC VM image? The screenshot that Cambchen got is unreadable.\", \"post_time\": \"2015-04-16 15:40:15\" },\n\t{ \"post_id\": 7367, \"topic_id\": 1662, \"forum_id\": 23, \"post_subject\": \"Re: Unable to run ML code\", \"username\": \"chanbchen\", \"post_text\": \"I could not find the exact string "MP Link CLosed" in the thormaster. But I could find "MP Exception" (shown below) and "Error" (shown in previous screenshot):\\n\\n[attachment=0:q144v7xo]Error 2.PNG\", \"post_time\": \"2015-04-15 18:49:06\" },\n\t{ \"post_id\": 7366, \"topic_id\": 1662, \"forum_id\": 23, \"post_subject\": \"Re: Unable to run ML code\", \"username\": \"chanbchen\", \"post_text\": \"Selecting hthor worked. Thank you! But any pointers as to why it wouldn't work on thor?\\n\\nAm attaching the log below; it looks mangled though:\\n\\n[attachment=0:390xqc4s]Error.PNG\", \"post_time\": \"2015-04-15 18:44:09\" },\n\t{ \"post_id\": 7360, \"topic_id\": 1662, \"forum_id\": 23, \"post_subject\": \"Re: Unable to run ML code\", \"username\": \"tlhumphrey2\", \"post_text\": \"If you can ssh into the thor master then cd into where the logs are, i.e. cd /var/lib/HPCCSystems/mythor, and take a screenshot of the area of the log where you see your error, i.e. MP link closed error. The log you want is the one that begins with 'thormaster'. Then, add it to this thread.\", \"post_time\": \"2015-04-15 15:03:27\" },\n\t{ \"post_id\": 7359, \"topic_id\": 1662, \"forum_id\": 23, \"post_subject\": \"Re: Unable to run ML code\", \"username\": \"tlhumphrey2\", \"post_text\": \"Try running on hthor instead of thor and let us know if you get the same error, please.\", \"post_time\": \"2015-04-15 14:44:52\" },\n\t{ \"post_id\": 7356, \"topic_id\": 1662, \"forum_id\": 23, \"post_subject\": \"Re: Unable to run ML code\", \"username\": \"tlhumphrey2\", \"post_text\": \"Please try running on hthor instead of thor and let us know if you still get the same error.\", \"post_time\": \"2015-04-15 13:36:47\" },\n\t{ \"post_id\": 7350, \"topic_id\": 1662, \"forum_id\": 23, \"post_subject\": \"Re: Unable to run ML code\", \"username\": \"chanbchen\", \"post_text\": \"Here it is ...\\n\\n[attachment=0:cp9sc1nk]Compiler Preferences.PNG\", \"post_time\": \"2015-04-14 20:13:58\" },\n\t{ \"post_id\": 7349, \"topic_id\": 1662, \"forum_id\": 23, \"post_subject\": \"Re: Unable to run ML code\", \"username\": \"tlhumphrey2\", \"post_text\": \"Would you also attach a screenshot of the compiler tab under preferences, please.\", \"post_time\": \"2015-04-14 20:11:07\" },\n\t{ \"post_id\": 7348, \"topic_id\": 1662, \"forum_id\": 23, \"post_subject\": \"Re: Unable to run ML code\", \"username\": \"chanbchen\", \"post_text\": \"[attachment=0:1ogdjg5y]My Repository.PNG\", \"post_time\": \"2015-04-14 20:03:15\" },\n\t{ \"post_id\": 7347, \"topic_id\": 1662, \"forum_id\": 23, \"post_subject\": \"Re: Unable to run ML code\", \"username\": \"chanbchen\", \"post_text\": \"[attachment=0:w1mt4jzk]WU Error.PNG\", \"post_time\": \"2015-04-14 20:02:28\" },\n\t{ \"post_id\": 7346, \"topic_id\": 1662, \"forum_id\": 23, \"post_subject\": \"Re: Unable to run ML code\", \"username\": \"chanbchen\", \"post_text\": \"Hi,\\n\\nI have attached the snapshot of another ML code that failed for me.\\n\\nThanks\\nchan\", \"post_time\": \"2015-04-14 19:58:06\" },\n\t{ \"post_id\": 7345, \"topic_id\": 1662, \"forum_id\": 23, \"post_subject\": \"Re: Unable to run ML code\", \"username\": \"tlhumphrey2\", \"post_text\": \"If you are running in ECL IDE, would you should us what your code repository looks like. And, the compiler tab in preferences (Ideally a screenshot of both would be nice. Although, screenshots may be too large to attach)?\", \"post_time\": \"2015-04-14 18:52:51\" },\n\t{ \"post_id\": 7322, \"topic_id\": 1662, \"forum_id\": 23, \"post_subject\": \"Re: Unable to run ML code\", \"username\": \"chanbchen\", \"post_text\": \"Any code that says IMPORT ML doesn't seem to work. Any other code works fine.\\n\\nFor ex., i tried running Association.ecl file that's present in My Files->ML->Tests->Explanatory folder that shows up in the repository of ECL IDE.\", \"post_time\": \"2015-04-12 17:48:02\" },\n\t{ \"post_id\": 7321, \"topic_id\": 1662, \"forum_id\": 23, \"post_subject\": \"Re: Unable to run ML code\", \"username\": \"bforeman\", \"post_text\": \"OK, what specific file are you trying to Submit? \\nI will try to reproduce on my HPCC VM.\\n\\nBob\", \"post_time\": \"2015-04-12 16:54:00\" },\n\t{ \"post_id\": 7319, \"topic_id\": 1662, \"forum_id\": 23, \"post_subject\": \"Re: Unable to run ML code\", \"username\": \"chanbchen\", \"post_text\": \"Do you mean c:\\\\Temp ? I cleared that folder an tried again. But getting the same error.\", \"post_time\": \"2015-04-11 17:46:04\" },\n\t{ \"post_id\": 7291, \"topic_id\": 1662, \"forum_id\": 23, \"post_subject\": \"Re: Unable to run ML code\", \"username\": \"bforeman\", \"post_text\": \"Try cleaning up your TEMP files as well...\", \"post_time\": \"2015-04-08 20:29:18\" },\n\t{ \"post_id\": 7290, \"topic_id\": 1662, \"forum_id\": 23, \"post_subject\": \"Re: Unable to run ML code\", \"username\": \"chanbchen\", \"post_text\": \"Hi,\\n\\nI just checked again. I am not running out of memory or disk space. But I get this error consistently. I have 8GB RAM and when I ran this WU the memory consumed as shown by the task manager was only 53%.\\n\\nThanks\", \"post_time\": \"2015-04-08 19:38:08\" },\n\t{ \"post_id\": 7283, \"topic_id\": 1662, \"forum_id\": 23, \"post_subject\": \"Re: Unable to run ML code\", \"username\": \"bforeman\", \"post_text\": \"See the following forum thread:\\n\\nhttp://hpccsystems.com/bb/viewtopic.php?f=10&t=1278&hilit=MP+Link+closed&sid=ea5bf855a2197f888c53c3e776013931\\n\\nIt could be that your machine where your VM is installed is simply running out of memory.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-04-07 20:36:18\" },\n\t{ \"post_id\": 7252, \"topic_id\": 1662, \"forum_id\": 23, \"post_subject\": \"Unable to run ML code\", \"username\": \"chanbchen\", \"post_text\": \"Hi,\\n\\nI just downloaded VMWare and the HPCC VM image. I was able to start the HPCC VM and run some ECL sample code. But I am not able to run any ML code (I downloaded the ML library and tried running the sample code shown on the HPCC ML homepage [k means sample]).\\n\\nI am getting an "MP Link closed" error consistently when I try to run the ML code. But I am able to run non-ML code tough.\\n\\nThanks\", \"post_time\": \"2015-04-04 20:16:20\" },\n\t{ \"post_id\": 7320, \"topic_id\": 1682, \"forum_id\": 23, \"post_subject\": \"Machine Learning book searched\", \"username\": \"Helveticus\", \"post_text\": \"Hi everybody\\n\\nI'm searching a good (and compact) book about multivariate pattern analysis in images with machine learning techniques. I took a machine learning course and used for it the Bishop book but I found it not so great (sometimes difficult).\\n\\nNow, I don't need the book for a lecture but for a project, so the book should perhaps be more practical oriented and related to multivariate pattern analysis in images (image processing). Proofs are not important. More important is that different algorithms/techniques are presented (e.g. SVM etc.) in an easy way with some explanations and perhaps implementation details.\\n\\nDoes somebody know a good book?\\n\\nWhat about the the book "Pattern Classification" from R. Duda, P. Hart, and D. Stork?\", \"post_time\": \"2015-04-11 18:41:34\" },\n\t{ \"post_id\": 7492, \"topic_id\": 1705, \"forum_id\": 23, \"post_subject\": \"Re: ML with eclserver\", \"username\": \"john holt\", \"post_text\": \"I don't see how you can fit multiple folder levels into a single folder repository without changing the attributes. Nested modules where the module is a single "file" is all that is possible.\\n\\nSo, you are stuck with needing to modify the current code base to flatten the structure.\\n\\nThe strategy that you are employing, mentioned in ( https://track.hpccsystems.com/browse/ML-250 ), of taking ML.Mat.Add attribute and creating an ML.Mat_Add attribute will indeed be tedious and error prone.\\n\\nConsider a different approach, one that does not require you to add a bunch of files in the ML repository together into a single large file.\\n\\nYour classic repository will need a top level folder for each folder in the repository. I suspect that you will need to make the name unique by adding a suffix or prefix. You will then use MODULE atttributes to export aliases. For instance, in the ML folder you will have a single Mat attribute. This attribute will be a MODULE and EXPORT each of the Mat attributes, such as Add, as EXPORT Add(...args...) := Mat_mod.Add. \\n\\nYou will then need to make some changes to the references to use the prefix or suffix. You should be able to use a sed or Perl script to make these changes.\\n\\nWhat you gain is that you will no longer need to glob up massive attributes.\", \"post_time\": \"2015-04-30 11:35:48\" },\n\t{ \"post_id\": 7487, \"topic_id\": 1705, \"forum_id\": 23, \"post_subject\": \"ML with eclserver\", \"username\": \"jwilt\", \"post_text\": \"Hi - \\nHaving trouble running some ML (Mat, specifically) with the older eclserver (along with the legacy MySQL code repository).\\n\\nThere *seems* to be a problem reading deeper nested modules.\\n\\nShould this work?\\nWill be moving to a folder-based repo eventually - and, therefore, eclccserver - but not yet.\\n\\nThanks.\", \"post_time\": \"2015-04-30 04:38:15\" },\n\t{ \"post_id\": 7524, \"topic_id\": 1713, \"forum_id\": 23, \"post_subject\": \"Logistic Regresion / SVM / Random Forest Implementation in M\", \"username\": \"Helveticus\", \"post_text\": \"Hi\\n\\nI would like to implement (L2-regularized) Logistic Regression, (L2 regularized) SVM and Random Forest for muticlass classification in Matlab (without using a toolbox or the corresponding functions in Matlab).\\n\\nDoes somebody know easy implementable pseudocode or easy Code (in Java, R, Python etc.) which I can use for my implementation in Matlab.\", \"post_time\": \"2015-05-01 22:50:54\" },\n\t{ \"post_id\": 7647, \"topic_id\": 1733, \"forum_id\": 23, \"post_subject\": \"Re: Datasets\", \"username\": \"bforeman\", \"post_text\": \"Hi Pooja,\\n\\nMy colleague Dinesh has sent you an email with the appropriate links.\\n\\nFor Cancer dataset, I downloaded the data from here http://seer.cancer.gov/data/ . The page has instructions on how to download the data.\\nLet me know if you have any questions.\\n
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-05-24 16:00:20\" },\n\t{ \"post_id\": 7616, \"topic_id\": 1733, \"forum_id\": 23, \"post_subject\": \"Datasets\", \"username\": \"chennapooja\", \"post_text\": \"Dear Team,\\n\\n Can anyone share me the links to the datasets used for testing sparse auto encoders and stacked sparse autoencoders algorithms in machine learning. I could see examples tested with cancer data set and MNSIT dataset. So please share me the links to these two datasets and I would like an explanation for the output for hardcoded net example SparseAutoencoder_test.ecl. Is it finding patterns for the data after finding weights for 2 iterations? \\n\\n Please help me, thanks in advance.\\n\\nRegards,\\nPooja.\", \"post_time\": \"2015-05-20 15:48:21\" },\n\t{ \"post_id\": 7626, \"topic_id\": 1735, \"forum_id\": 23, \"post_subject\": \"Learn Data Science and Participate in Contests\", \"username\": \"solver26\", \"post_text\": \"Those of you looking to participate in analytics/data science contests and win prize money can do so by signing up at CrowdAnalytix.com. One contest is now live, whose link is below.\\n\\nhttps://www.crowdanalytix.com/contests/ ... ion-claims\\n\\nIf you work at a Non Profit or University, you can host a Competition for free at CrowdANALYTIX-a community of data experts, where the solvers of the community will help you solve your problem . You can use the community to help with your research or project.You get multiple solutions to your problem and your students and the solvers learn new skills and develop even better solutions as all content generated (reports, algorithms etc.) will be made available for everyone to access.\\n\\nTake a look at the FAQs below for more information on how to submit your idea.\\n\\nhttp://info.crowdanalytix.com/host-your ... ontest-faq\", \"post_time\": \"2015-05-21 12:24:27\" },\n\t{ \"post_id\": 7719, \"topic_id\": 1756, \"forum_id\": 23, \"post_subject\": \"Re: ML Library Import\", \"username\": \"iMikePayne\", \"post_text\": \"Not necessarily. They say in order to use you must flatten the directories.\\n\\n\\nhttps://track.hpccsystems.com/browse/ML-250\\nhttp://hpccsystems.com/bb/viewtopic.php?f=23&t=1705&sid=68d58a4ee73b55dd0f1e5c884f99761e\", \"post_time\": \"2015-06-02 20:34:43\" },\n\t{ \"post_id\": 7718, \"topic_id\": 1756, \"forum_id\": 23, \"post_subject\": \"Re: ML Library Import\", \"username\": \"tlhumphrey2\", \"post_text\": \"Do the other threads give you a solution to the problem when the cluster has an attacjed repository? If not, I'll see if I can find an answer, too.\", \"post_time\": \"2015-06-02 20:32:31\" },\n\t{ \"post_id\": 7716, \"topic_id\": 1756, \"forum_id\": 23, \"post_subject\": \"Re: ML Library Import\", \"username\": \"iMikePayne\", \"post_text\": \"I think the problem is trying to use the ML library on clusters with repositories attached. At least that's what I'm gathering from the other thread.\", \"post_time\": \"2015-06-02 20:15:12\" },\n\t{ \"post_id\": 7715, \"topic_id\": 1756, \"forum_id\": 23, \"post_subject\": \"Re: ML Library Import\", \"username\": \"tlhumphrey2\", \"post_text\": \"I forgot to mention that I just downloaded the latest version of the ML library. So, we should be using the same code.\", \"post_time\": \"2015-06-02 20:09:25\" },\n\t{ \"post_id\": 7714, \"topic_id\": 1756, \"forum_id\": 23, \"post_subject\": \"Re: ML Library Import\", \"username\": \"tlhumphrey2\", \"post_text\": \"I just tried the following code on the ML development cluster and it worked fine.\\n\\n IMPORT * FROM ML;\\n A := DATASET([\\n {1,1,1},{1,2,2},{1,3,3},\\n {2,1,4},{2,2,5},{2,3,6},\\n {3,1,7},{3,2,8},{3,3,9}\\n ], ML.Mat.Types.Element);\\n OUTPUT(A,NAMED('A'));\\n HasProperties := ML.Mat.Has(A);\\n OUTPUT(HasProperties);\\n
\", \"post_time\": \"2015-06-02 20:08:10\" },\n\t{ \"post_id\": 7712, \"topic_id\": 1756, \"forum_id\": 23, \"post_subject\": \"Re: ML Library Import\", \"username\": \"iMikePayne\", \"post_text\": \"I guess this is the same problem listed here and there will be no fix.\\n\\nviewtopic.php?f=23&t=1705\", \"post_time\": \"2015-06-02 19:47:41\" },\n\t{ \"post_id\": 7711, \"topic_id\": 1756, \"forum_id\": 23, \"post_subject\": \"ML Library Import\", \"username\": \"iMikePayne\", \"post_text\": \"Hi,\\n\\nI just imported the latest ML code to my repository but some of the code doesn't execute. Specifically, anything that uses ML.Mat.Has or ML.Mat.Each will not compile. \\n\\nFor example, when doing a syntax check on ML.Mat.Mul I get an error\\n\\nError: Unknown identifier "Has" (5, 11), 2167, ML.Mat.Add\\nError: Unknown identifier "Has" (6, 11), 2167, ML.Mat.Add\\nError: syntax error near "." : expected ')' (7, 32), 3002, ML.Mat.Add\\nError: Expected boolean expression (15, 32), 2073, ML.Mat.Add\\nError: Expected boolean expression (17, 14), 2073, ML.Mat.Add\\nError: Unknown identifier "Each" (19, 25), 2167, ML.Mat.Has\\nError: Unknown identifier "Has" (22, 11), 2167, ML.Mat.Mul\\nError: syntax error near "T" : expected RANGE, ROWSET, SELF, SUCCESS, datarow, dataset, dictionary, module-name, identifier, identifier, function-name, identifier, macro-name, '^', '(', '[' (26, 18), 3002, ML.Mat.Mul\\nError: syntax error near "END" : expected APPLY, BIG_ENDIAN, BUILD, DISTRIBUTION, expression, EXPORT, FEATURE, IMPORT, KEYDIFF, KEYPATCH, LITTLE_ENDIAN, LOADXML, NOTIFY, OUTPUT, PACKED, PARALLEL, PATTERN, __PLATFORM__, RANGE, RECORD, RETURN, ROWSET, RULE, SET, SHARED, type-name, SUCCESS, TOKEN, TRANSFORM, TYPEOF, UNSIGNED, UPDATE, WAIT, WILD, <, datarow, identifier, action, constant, #CONSTANT, #OPTION, #WORKUNIT, #STORED, #LINK, #ONWARNING, #WEBSERVICE, '^', ';', '$' (28, 1), 3002, ML.Mat.Mul\\nError: syntax error near ":=" : expected ';' (39, 5), 3002, ML.Mat.Mul\\nError: syntax error near ":=" : expected ';' (48, 4), 3002, ML.Mat.Mul\\nError: syntax error near "T" : expected RANGE, ROWSET, SELF, SUCCESS, datarow, dataset, dictionary, module-name, identifier, identifier, function-name, identifier, macro-name, '^', '(', '[' (52, 17), 3002, ML.Mat.Mul\\nError: syntax error near "mT" : expected RANGE, ROWSET, SELF, SUCCESS, datarow, dataset, dictionary, module-name, identifier, identifier, function-name, identifier, macro-name, '^', '(', '[' (61, 19), 3002, ML.Mat.Mul\\nError: syntax error near "END" : expected APPLY, BIG_ENDIAN, BUILD, DISTRIBUTION, expression, EXPORT, FEATURE, IMPORT, KEYDIFF, KEYPATCH, LITTLE_ENDIAN, LOADXML, NOTIFY, OUTPUT, PACKED, PARALLEL, PATTERN, __PLATFORM__, RANGE, RECORD, RETURN, ROWSET, RULE, SET, SHARED, type-name, SUCCESS, TOKEN, TRANSFORM, TYPEOF, UNSIGNED, UPDATE, WAIT, WILD, <, datarow, identifier, action, constant, #CONSTANT, #OPTION, #WORKUNIT, #STORED, #LINK, #ONWARNING, #WEBSERVICE, '^', ';', '$' (63, 1), 3002, ML.Mat.Mul\\nError: Unknown identifier "Has" (66, 13), 2167, ML.Mat.Mul\\nError: Unknown identifier "Has" (67, 13), 2167, ML.Mat.Mul\\nError: syntax error near "." : expected ')' (68, 34), 3002, ML.Mat.Mul\\nError: Expected boolean expression (70, 34), 2073, ML.Mat.Mul\\nError: Expected boolean expression (72, 16), 2073, ML.Mat.Mul\\nError: Unknown identifier "Mul_Default" (72, 56), 2167, ML.Mat.Mul\\nError: syntax error : expected APPLY, BIG_ENDIAN, BUILD, DISTRIBUTION, expression, EXPORT, FEATURE, IMPORT, KEYDIFF, KEYPATCH, LITTLE_ENDIAN, LOADXML, NOTIFY, OUTPUT, PACKED, PARALLEL, PATTERN, __PLATFORM__, RANGE, RECORD, RETURN, ROWSET, RULE, SET, SHARED, type-name, SUCCESS, TOKEN, TRANSFORM, TYPEOF, UNSIGNED, UPDATE, WAIT, WILD, <, datarow, identifier, action, constant, #CONSTANT, #OPTION, #WORKUNIT, #STORED, #LINK, #ONWARNING, #WEBSERVICE, '^', ';', '$' (74, 7), 3002, ML.Mat.Mul\\nError: Unknown identifier "Has" (5, 11), 2167, ML.Mat.Add\\nError: Unknown identifier "Has" (6, 11), 2167, ML.Mat.Add\\nError: syntax error near "." : expected ')' (7, 32), 3002, ML.Mat.Add\\nError: Expected boolean expression (15, 32), 2073, ML.Mat.Add\\nError: Expected boolean expression (17, 14), 2073, ML.Mat.Add\\nError: Unknown identifier "Each" (19, 25), 2167, ML.Mat.Has\\nError: Unknown identifier "Has" (22, 11), 2167, ML.Mat.Mul\\nError: syntax error near "T" : expected RANGE, ROWSET, SELF, SUCCESS, datarow, dataset, dictionary, module-name, identifier, identifier, function-name, identifier, macro-name, '^', '(', '[' (26, 18), 3002, ML.Mat.Mul\\nError: syntax error near "END" : expected APPLY, BIG_ENDIAN, BUILD, DISTRIBUTION, expression, EXPORT, FEATURE, IMPORT, KEYDIFF, KEYPATCH, LITTLE_ENDIAN, LOADXML, NOTIFY, OUTPUT, PACKED, PARALLEL, PATTERN, __PLATFORM__, RANGE, RECORD, RETURN, ROWSET, RULE, SET, SHARED, type-name, SUCCESS, TOKEN, TRANSFORM, TYPEOF, UNSIGNED, UPDATE, WAIT, WILD, <, datarow, identifier, action, constant, #CONSTANT, #OPTION, #WORKUNIT, #STORED, #LINK, #ONWARNING, #WEBSERVICE, '^', ';', '$' (28, 1), 3002, ML.Mat.Mul\\nError: syntax error near ":=" : expected ';' (39, 5), 3002, ML.Mat.Mul\\nError: syntax error near ":=" : expected ';' (48, 4), 3002, ML.Mat.Mul\\nError: syntax error near "T" : expected RANGE, ROWSET, SELF, SUCCESS, datarow, dataset, dictionary, module-name, identifier, identifier, function-name, identifier, macro-name, '^', '(', '[' (52, 17), 3002, ML.Mat.Mul\\nError: syntax error near "mT" : expected RANGE, ROWSET, SELF, SUCCESS, datarow, dataset, dictionary, module-name, identifier, identifier, function-name, identifier, macro-name, '^', '(', '[' (61, 19), 3002, ML.Mat.Mul\\nError: syntax error near "END" : expected APPLY, BIG_ENDIAN, BUILD, DISTRIBUTION, expression, EXPORT, FEATURE, IMPORT, KEYDIFF, KEYPATCH, LITTLE_ENDIAN, LOADXML, NOTIFY, OUTPUT, PACKED, PARALLEL, PATTERN, __PLATFORM__, RANGE, RECORD, RETURN, ROWSET, RULE, SET, SHARED, type-name, SUCCESS, TOKEN, TRANSFORM, TYPEOF, UNSIGNED, UPDATE, WAIT, WILD, <, datarow, identifier, action, constant, #CONSTANT, #OPTION, #WORKUNIT, #STORED, #LINK, #ONWARNING, #WEBSERVICE, '^', ';', '$' (63, 1), 3002, ML.Mat.Mul\\nError: Unknown identifier "Has" (66, 13), 2167, ML.Mat.Mul\\nError: Unknown identifier "Has" (67, 13), 2167, ML.Mat.Mul\\nError: syntax error near "." : expected ')' (68, 34), 3002, ML.Mat.Mul\\nError: Expected boolean expression (70, 34), 2073, ML.Mat.Mul\\nError: Expected boolean expression (72, 16), 2073, ML.Mat.Mul\\nError: Unknown identifier "Mul_Default" (72, 56), 2167, ML.Mat.Mul\\nError: syntax error : expected APPLY, BIG_ENDIAN, BUILD, DISTRIBUTION, expression, EXPORT, FEATURE, IMPORT, KEYDIFF, KEYPATCH, LITTLE_ENDIAN, LOADXML, NOTIFY, OUTPUT, PACKED, PARALLEL, PATTERN, __PLATFORM__, RANGE, RECORD, RETURN, ROWSET, RULE, SET, SHARED, type-name, SUCCESS, TOKEN, TRANSFORM, TYPEOF, UNSIGNED, UPDATE, WAIT, WILD, <, datarow, identifier, action, constant, #CONSTANT, #OPTION, #WORKUNIT, #STORED, #LINK, #ONWARNING, #WEBSERVICE, '^', ';', '$' (74, 7), 3002, ML.Mat.Mul\", \"post_time\": \"2015-06-02 19:22:17\" },\n\t{ \"post_id\": 7764, \"topic_id\": 1768, \"forum_id\": 23, \"post_subject\": \"Matrix Transpose\", \"username\": \"chennapooja\", \"post_text\": \"Hello,\\n\\nI have some issues in getting the transpose for a dataset after converting into matrix..\\n\\nActually I have to get the result for below equation:\\n\\n wnew = w + ALPHA(a2trans-a3trans)\\n \\n where wnew, w, a2trans, a3trans are of type PBblas.Types.Layout_Part and ALPHA is some constant value\\n\\n actually I have dataset results a2 and a3, I have to make transpose those to get a2trans and a3trans. Can someone help me with right PBblas functions to be used to get the result wnew? \\n actually I checked PBBlas.PB_dgemm and PBBlas.PB_daxpy but could not get proper result. I don't have transpose option in daxpy and dgemm is something different.\\n\\nThanks,\\nPooja.\", \"post_time\": \"2015-06-15 16:22:22\" },\n\t{ \"post_id\": 7859, \"topic_id\": 1780, \"forum_id\": 23, \"post_subject\": \"Re: Memory pool exhausted error\", \"username\": \"chennapooja\", \"post_text\": \"Dear Maryam,\\n\\n I am still doubtful with data I am using though I have csv format file from byte file format now. So is it possible to attach the dataset input training and test files with which I can check once.\\n\\nThanks,\\nPooja.\", \"post_time\": \"2015-06-30 13:54:00\" },\n\t{ \"post_id\": 7858, \"topic_id\": 1780, \"forum_id\": 23, \"post_subject\": \"Re: Memory pool exhausted error\", \"username\": \"chennapooja\", \"post_text\": \"Dear Maryam,\\n\\n Its working perfectly fine with a simple dataset. I sprayed it and tested. I just extracted few records from MNIST dataset also, to be specific 5 records and put them into separate csv, then sprayed. It is also working good but the output probabilities are not that good, almost all the outputs have same probability. I understand its possible because training and no of iterations is not sufficient. But when I test with whole dataset, facing these kind of memory and thor slave related errors which I am not able to understand.\\n\\nThanks,\\nPooja.\", \"post_time\": \"2015-06-30 13:42:56\" },\n\t{ \"post_id\": 7857, \"topic_id\": 1780, \"forum_id\": 23, \"post_subject\": \"Re: Memory pool exhausted error\", \"username\": \"maryamregister\", \"post_text\": \"Hi,\\n\\nDid you try to spray just a very simple cvs file?\\nfor example:\\n1,2,3\\n4,5,6\\n\\nand then upload and spray the data. \\nuse a simple program as below to make sure you can read the data:\\n\\nvalue_record := RECORD\\nreal\\tf1\\t;\\nreal\\tf2\\t;\\nreal\\tf3\\t;\\nEND;\\n\\ninput_data_tmp := DATASET('~::SimpleUploadedFile', value_record, CSV);\\n\\nOUTPUT(input_data_tmp);\\n\\n\\n\\nLook at this manual to see how to upload and spary the daya :\\nhttp://cdn.hpccsystems.com/install/docs ... ndling.pdf\\n\\nyou first upload the data, then you use "spray delimited" to spray the data.\\n\\nI hope these help!\\n\\n-Maryam\", \"post_time\": \"2015-06-30 13:37:59\" },\n\t{ \"post_id\": 7848, \"topic_id\": 1780, \"forum_id\": 23, \"post_subject\": \"Re: Memory pool exhausted error\", \"username\": \"chennapooja\", \"post_text\": \"Dear Maryam,\\n\\n As thor was down, could not test with "csv" sprayed file as input from 2 days, today when I tested getting below error:\\n\\n System error: -1: Graph[326], localresultwrite[332]: SLAVE #1 [10.0.1.1:20100]: CMemoryBufferSerialStream::get read past end of stream (15680000,7421865), \\n\\n and these kind of errors are not understandable at all. It will be of great help if any suggestions given to overcome above error.\\n\\nThanks and Regards,\\nPooja.\", \"post_time\": \"2015-06-29 14:43:33\" },\n\t{ \"post_id\": 7839, \"topic_id\": 1780, \"forum_id\": 23, \"post_subject\": \"Re: Memory pool exhausted error\", \"username\": \"chennapooja\", \"post_text\": \"Dear Maryam,\\n\\n Thanks, I guess that might be the problem, I was spraying that train-images.idx3-ubyte file directly. \\n\\n Actually, I tried similar thing yesterday, I copied my result of 100 samples from input_data_tmp into a csv and then sprayed, I got the result(may be wrong as input is corrupted - not sure) but the conf value (confidence) was almost same in all the output records.\\n\\n Please correct me if I am wrong - I included NNClassify also in the code to get the classified result. What I need to check here? Since there are 10 output neurons, I have to check one with highest probability or conf as the classified output right? Is this understanding correct or output represents something else?\\n\\nRegards,\\nPooja.\", \"post_time\": \"2015-06-25 17:18:49\" },\n\t{ \"post_id\": 7838, \"topic_id\": 1780, \"forum_id\": 23, \"post_subject\": \"Re: Memory pool exhausted error\", \"username\": \"maryamregister\", \"post_text\": \"My Guess is that the problem is your data, either you have not spared it correctly or something else.\\nMake a simple text file with a toy dataset, for example as simple as the below data:\\n1,0,2\\n3,4,6\\n3,7,8\\n4,9,8\\n\\nThen try to spare and read the data. And then use it with stacked sparse autoencoder to see if it works. \\nAlso you can't spare "train-images.idx3-ubyte" directly. First you have to convert it to a comma separated text file and then spray it. You can use "loadMNISTImages.m" from Stanford deep learning files to do that.\", \"post_time\": \"2015-06-25 15:50:35\" },\n\t{ \"post_id\": 7826, \"topic_id\": 1780, \"forum_id\": 23, \"post_subject\": \"Re: Memory pool exhausted error\", \"username\": \"chennapooja\", \"post_text\": \"Dear Maryam,\\n\\n When I aborted my previous execution and re-execute it I am getting the same error again "System error: 0: Graph[122], hashdistribute[126]: SLAVE #1 [10.0.1.1:20100]: FastLZExpander - corrupt data(1) 0 0, Received from node: 10.0.1.3:20100". I have checked my input_data_tmp, its same. I am getting the output for indepDataC but later its failing with above error. I am not sure why its failing when the same code did not give even an error before.\\n At times I am getting this error. Can you guide me through what the reason could be ?\\n\\nThanks,\\nPooja.\", \"post_time\": \"2015-06-24 21:27:39\" },\n\t{ \"post_id\": 7824, \"topic_id\": 1780, \"forum_id\": 23, \"post_subject\": \"Re: Memory pool exhausted error\", \"username\": \"maryamregister\", \"post_text\": \"with 2 iterations it takes almost 2 minutes\", \"post_time\": \"2015-06-24 15:22:29\" },\n\t{ \"post_id\": 7823, \"topic_id\": 1780, \"forum_id\": 23, \"post_subject\": \"Re: Memory pool exhausted error\", \"username\": \"chennapooja\", \"post_text\": \"Its only 2 iterations. Anyway I will try with 1 and check it out.\\n\\n\\nThanks a lot,\\nPooja.\", \"post_time\": \"2015-06-24 14:59:27\" },\n\t{ \"post_id\": 7821, \"topic_id\": 1780, \"forum_id\": 23, \"post_subject\": \"Re: Memory pool exhausted error\", \"username\": \"maryamregister\", \"post_text\": \"what is the maximum iteration number you are using?\\nbegin with low numbers and make sure the algorithm works and then increase the maximum iterations number.\", \"post_time\": \"2015-06-24 14:35:04\" },\n\t{ \"post_id\": 7819, \"topic_id\": 1780, \"forum_id\": 23, \"post_subject\": \"Re: Memory pool exhausted error\", \"username\": \"chennapooja\", \"post_text\": \"Thanks Maryam.\\n\\nBut I have done the same, removed labels and executed but its taking a lot of time to execute....not getting any errors and no output too....so I have aborted the execution after 800 minutes....does it take too long for getting output? I have checked that other sample example which has 6 input samples, that is being executed thoroughly.\", \"post_time\": \"2015-06-24 14:09:25\" },\n\t{ \"post_id\": 7818, \"topic_id\": 1780, \"forum_id\": 23, \"post_subject\": \"Re: Memory pool exhausted error\", \"username\": \"maryamregister\", \"post_text\": \"zeros are fine, they belong to the background in the digit images.\\nyes, you can remove labels from the value_record type. Please look at Stacked_SparseAutoencoder_test to see an example on a toy dataset.\\nstack sparse autoencoder algorithm does not need labels. however I had sprayed the Mnist along with the labels so I had to separate them first. But you don't need to. Please look at "Stacked_SparseAutoencoder_test" to see what you should do.\", \"post_time\": \"2015-06-24 14:00:47\" },\n\t{ \"post_id\": 7815, \"topic_id\": 1780, \"forum_id\": 23, \"post_subject\": \"Re: Memory pool exhausted error\", \"username\": \"chennapooja\", \"post_text\": \"Dear Maryam,\\n\\n Thanks for the inputs.\\n \\n When I print the input_data_tmp, I could find zeroes everywhere. Will the pixel features do not change for different digits? Does it mean that data is corrupted?\\n Actually I have a doubt, I sprayed only images files "train-images.idx3-ubyte" to the thor not the labels file "train-labels.idx3-ubyte". So can I execute removing label from the value_record type? \\n I tried executing and I did not get output or errors even after 2 hours. How much time it usually takes for execution?\\n\\nThanks in advance,\\nPooja.\\n\\n\\nRegards,\\nPooja.\", \"post_time\": \"2015-06-23 22:58:02\" },\n\t{ \"post_id\": 7814, \"topic_id\": 1780, \"forum_id\": 23, \"post_subject\": \"Re: Memory pool exhausted error\", \"username\": \"maryamregister\", \"post_text\": \"Hi Pooja,\\n\\nPlease make sure you are reading the data correctly. Run the stack autoencoder program until line 795 when it reads the data and then simply print the output (OUTPUT(input_data_tmp,named('data'));) and make sure the data is read correctly.There should features f1 through f784 and label features shown in each row of the output.\\n\\n-Maryam\", \"post_time\": \"2015-06-23 16:57:36\" },\n\t{ \"post_id\": 7813, \"topic_id\": 1780, \"forum_id\": 23, \"post_subject\": \"Memory pool exhausted error\", \"username\": \"chennapooja\", \"post_text\": \"Dear Team,\\n\\n When I am trying to run stacked auto-encoders code for MNIST dataset, I am getting below error:\\n\\n Error: System error: 1301: Memory pool exhausted: pool (1216 pages) exhausted, requested 61 (in Rollup Group G85 E89) (0, 0), 1301\\n\\n Can someone help me in resolving this? Thanks in advance. Any specific requirements to run this code?\\n\\nThanks Again,\\nPooja\", \"post_time\": \"2015-06-22 20:23:35\" },\n\t{ \"post_id\": 9956, \"topic_id\": 1850, \"forum_id\": 23, \"post_subject\": \"Re: ML - calculate Euclidean distance\", \"username\": \"vivekaxl\", \"post_text\": \"\\nREAL euclidean_distance(DATASET(Types.NumericField) a, DATASET(Types.NumericField) b):= FUNCTION\\n temp := JOIN(a, b, LEFT.number = RIGHT.number, TRANSFORM(Types.NumericField, \\n SELF.id := -1;\\n SELF.number := LEFT.number;\\n SELF.value := POWER(LEFT.value-RIGHT.value, 2)\\n ));\\n return (SQRT(SUM(temp, temp.value)));\\nEND;\\n
\", \"post_time\": \"2016-07-13 22:37:37\" },\n\t{ \"post_id\": 8041, \"topic_id\": 1850, \"forum_id\": 23, \"post_subject\": \"Re: ML - calculate Euclidean distance\", \"username\": \"tlhumphrey2\", \"post_text\": \"You might be able to use ML.Cluster.Distances, but I have a feeling it will be difficult because that function was setup for only those clustering algorithms is ML.Cluster.\", \"post_time\": \"2015-08-26 16:12:18\" },\n\t{ \"post_id\": 8039, \"topic_id\": 1850, \"forum_id\": 23, \"post_subject\": \"Re: ML - calculate Euclidean distance\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"Yes Tim, I want to implement supervised learning using KNN and I am using Euclidean distance for measurement. I have 2 labelled sets - training set and test set. \\n\\nI want to use training set to learn and predict for the test set so that I can cross-verify predictions with labels from test set. \\n\\nThis is what I want to do:\\nfor each row of test set\\n{\\n Compute Euc distance (for all features) with every row of training set
Take k closest distances
\\nAssign the max label from k neighbours to the current row's label
\\n}\\n\\nTo implement this, given a training Matrix X and a test Matrix Y, for each row y in Y, I need to compute sqrt((x1-y1)^2 + (x2-y2)^2...). Will I be able to achieve this using ML.Cluster.Distances?\\n\\n- Gayathri\", \"post_time\": \"2015-08-26 10:08:51\" },\n\t{ \"post_id\": 8038, \"topic_id\": 1850, \"forum_id\": 23, \"post_subject\": \"Re: ML - calculate Euclidean distance\", \"username\": \"tlhumphrey2\", \"post_text\": \"Your steps 1 and 2 are correct. But, I believe you want a KNN supervised learning algorithm, i.e. you use your training set with a learning algorithm to learn some kind of model which you then use for classification. And, with KNN the model is actually all the rows of your training set. Then, the classifier compares new rows (of independent variables (or features or X)) with those of your training set. And, the class (or Y or dependent variable) that is assigned to each row will be the closest.\\n\\nLook at ML.Tests.Explanatory.KNN_KDTree.ecl which is an example using KNN_KDTree (in ML.Lazy.ecl). Also, in the same module is KNN. You use it just like KNN_KDTree.\", \"post_time\": \"2015-08-25 15:55:19\" },\n\t{ \"post_id\": 8037, \"topic_id\": 1850, \"forum_id\": 23, \"post_subject\": \"ML - calculate Euclidean distance\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"I have just started exploring HPCC ML module. I am trying to use KNN for classification of my test dataset (contains 4 feature fields). Please let me know if my approach is right:\\n\\n1. Read my training and test data files into datasets
\\n2. use ML.ToField on the dataset
\\n3. Call ML.Cluster.Distances with training and test datasets as parameters - this computes Euclidean distance (this is the default for 3rd param to distances?) for every row in left with every row in right, taking into account all features?
\\n4. Call ML.Cluster.Closest for result from previous step - this computes closest neighbour for each row? How do I pass x to this to get x closest neighbours?
\\n\\n- Gayathri\", \"post_time\": \"2015-08-25 12:48:04\" },\n\t{ \"post_id\": 8708, \"topic_id\": 2032, \"forum_id\": 23, \"post_subject\": \"Clustering on large data samples\", \"username\": \"sameermsc\", \"post_text\": \"Hi,\\n\\nUsing default settings ran AggloN clustering on a dataset containing around 10K text sentences and the clusters generated are not at all convincing, it ends up with around 9K clusters\\n\\nOne observation is that when the input dataset is of smaller size (few hundred sentences) it does a good job in clustering the data, but on a larger dataset it gives strange output \\n\\nAnother observation is that on the same input dataset i get to see different cluster formations on a 10 node and 100 node cluster, this is strange, does anyone else encountered similar situation ?\\n\\nWhy is this behavior\\n\\nThanks\\nSameer\", \"post_time\": \"2015-11-26 17:01:39\" },\n\t{ \"post_id\": 8870, \"topic_id\": 2064, \"forum_id\": 23, \"post_subject\": \"Re: Big data tutorial\", \"username\": \"jcma\", \"post_text\": \"Thank you Richard for the fast reply and help. \", \"post_time\": \"2015-12-16 18:28:58\" },\n\t{ \"post_id\": 8866, \"topic_id\": 2064, \"forum_id\": 23, \"post_subject\": \"Re: Big data tutorial\", \"username\": \"rtaylor\", \"post_text\": \"jcma,\\n\\nThis kind of request would be best handled through email, either to training@hpccsystems.com (goes to several of us) or directly to me: richard.taylor@lexisnexis.com\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-12-16 14:10:47\" },\n\t{ \"post_id\": 8864, \"topic_id\": 2064, \"forum_id\": 23, \"post_subject\": \"Big data tutorial\", \"username\": \"jcma\", \"post_text\": \"Good Afternoon, \\n\\nI am an employee in Insurance Analytics. I was wondering if any experts would be available to train my group (1hr ish) to better leverage HPCC for machine learning or big data in general. We have varying level of skill - mostly have done the beginner ECL training while others are are advanced. We love big data but perhaps we are missing some key things. Would particularly love to hear about a real/practical way we are currently using the ML libraries.\\n\\nIs there anyone that would be best to contact?\\n\\nWould appreciate any help!\", \"post_time\": \"2015-12-15 21:53:32\" },\n\t{ \"post_id\": 9172, \"topic_id\": 2142, \"forum_id\": 23, \"post_subject\": \"Re: ML.Regress_Poly_X\", \"username\": \"rtaylor\", \"post_text\": \"Vivek,\\n\\nNote that these are only warnings, your workunit is still being run. If you run your code on hThor, you won't get those warnings.\\n\\nThese warnings come from the "set of datasets" form of JOIN, which is only supported on Thor as LOCAL, which is what the warning is telling you. You could also edit the ECL code files where the warnings are from and add the LOCAL option to those JOINs and that would also make the warnings disappear.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-01-29 14:48:52\" },\n\t{ \"post_id\": 9170, \"topic_id\": 2142, \"forum_id\": 23, \"post_subject\": \"ML.Regress_Poly_X\", \"username\": \"vivekaxl\", \"post_text\": \"Hello ,\\n\\nI am trying to run an example code for linear regression\\n\\n
\\nIMPORT ML;\\nR := RECORD\\nINTEGER rid;\\nINTEGER Recs;\\nREAL Time;\\nEND;\\nd := DATASET([{1,50000,1.00},{2,500000,2.29}, {3,5000000,16.15},\\n{4,25000000,80.2},{5,50000000,163},{6,100000000,316},\\n{7,10,0.83},{8,1500000,5.63}],R);\\nML.ToField(d,flds);\\nP := ML.Regress_Poly_X(flds(number=1),flds(number=2));\\nP.Beta;\\nP.RSquared\\n
\\n\\n and I am running into this error:\\n\\n\\n\\nWarning: Only LOCAL versions of JOIN are currently supported on THOR (115, 13 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\ecl-ml-master\\\\PBblas\\\\PB_dgetrf.ecl)\\nWarning: Only LOCAL versions of JOIN are currently supported on THOR (147, 16 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\ecl-ml-master\\\\PBblas\\\\PB_dtrsm.ecl)\\nWarning: Only LOCAL versions of JOIN are currently supported on THOR (147, 16 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\ecl-ml-master\\\\PBblas\\\\PB_dtrsm.ecl)\\nWarning: (0,0): error C6003: C++ link error: cannot find -lcblas\\nError: Compile/Link failed for W20160129-134350 (see '//192.168.56.101/var/lib/HPCCSystems/myeclccserver/eclcc.log' for details) (0, 0 - W20160129-134350)\\nWarning: \\nWarning: ---------- compiler output --------------\\nWarning: /usr/bin/ld: cannot find -lcblas\\nWarning: collect2: ld returned 1 exit status\\nWarning: \\nWarning: --------- end compiler output -----------\\n
\\n\\nI am not sure what is going wrong. Any help would be very helpful. \\n\\nRegards, \\nVivek\", \"post_time\": \"2016-01-29 09:54:50\" },\n\t{ \"post_id\": 9230, \"topic_id\": 2150, \"forum_id\": 23, \"post_subject\": \"Re: Examples.Sentilyze.NaiveBayes.Classify error\", \"username\": \"tlhumphrey2\", \"post_text\": \"To checkout FieldAggregates, try running ML.Tests.Explanatory.FieldAggregate.\", \"post_time\": \"2016-02-04 14:22:47\" },\n\t{ \"post_id\": 9228, \"topic_id\": 2150, \"forum_id\": 23, \"post_subject\": \"Re: Examples.Sentilyze.NaiveBayes.Classify error\", \"username\": \"JimD\", \"post_text\": \"I submitted ML.FieldAggregates and it worked on my server running 5.4.4-1(gold)\\n\\nHTH,\\nJim\", \"post_time\": \"2016-02-04 12:59:25\" },\n\t{ \"post_id\": 9224, \"topic_id\": 2150, \"forum_id\": 23, \"post_subject\": \"Re: Examples.Sentilyze.NaiveBayes.Classify error\", \"username\": \"balajisampath\", \"post_text\": \"Download the latest ML folder and try to compile ML.FieldAggregates\", \"post_time\": \"2016-02-03 22:51:40\" },\n\t{ \"post_id\": 9222, \"topic_id\": 2150, \"forum_id\": 23, \"post_subject\": \"Re: Examples.Sentilyze.NaiveBayes.Classify error\", \"username\": \"tlhumphrey2\", \"post_text\": \"Sorry. I miss-spoke. You don't need the blue Examples folder shown at the bottom of my file structure attachment.\", \"post_time\": \"2016-02-03 17:24:08\" },\n\t{ \"post_id\": 9220, \"topic_id\": 2150, \"forum_id\": 23, \"post_subject\": \"Re: Examples.Sentilyze.NaiveBayes.Classify error\", \"username\": \"tlhumphrey2\", \"post_text\": \"I have attempted to setup my file structure to look like yours -- hoping to duplicate the error you are getting. But, I couldn't duplicate the error. The attachment is the file structure I currently have which should look similar to yours where the Examples and ML folders are contained in "My Files". You should also have the 3 blue folders I have at the bottom, i.e. Examples, ecllibrary, and plugins.\", \"post_time\": \"2016-02-03 16:33:40\" },\n\t{ \"post_id\": 9218, \"topic_id\": 2150, \"forum_id\": 23, \"post_subject\": \"Re: Examples.Sentilyze.NaiveBayes.Classify error\", \"username\": \"balajisampath\", \"post_text\": \"Error: Unknown identifier "FieldAggregates" (3, 14), 2167, C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\ML\\\\Correlate.ecl\\n\\nExamples.Sentilyze.NaiveBayes.Classify calls ML.Discretize which in turn calls ML.FieldAggregates.\\n\\nI am getting error when I try to compile ML.FieldAggregates or ML.Correlate\", \"post_time\": \"2016-02-03 15:18:21\" },\n\t{ \"post_id\": 9214, \"topic_id\": 2150, \"forum_id\": 23, \"post_subject\": \"Re: Examples.Sentilyze.NaiveBayes.Classify error\", \"username\": \"tlhumphrey2\", \"post_text\": \"Where is the "unknown identifier FieldAggregates" error coming from? It isn't coming from Sentilyze.NaiveBayes.Classify because that function doesn't use FieldAggregates. So, the error must be coming from something that this function uses.\\n\\nIn the Syntax error window use the horizontal scroll bar and scroll over the right of the error. There you should see the name of the function/module the error comes from. Please let me know the function's name.\", \"post_time\": \"2016-02-03 14:32:30\" },\n\t{ \"post_id\": 9210, \"topic_id\": 2150, \"forum_id\": 23, \"post_subject\": \"Re: Examples.Sentilyze.NaiveBayes.Classify error\", \"username\": \"balajisampath\", \"post_text\": \"Yes it is. Please refer the image attached in my above reply. Scroll to right you can see my folder structure and files.\", \"post_time\": \"2016-02-02 21:43:10\" },\n\t{ \"post_id\": 9204, \"topic_id\": 2150, \"forum_id\": 23, \"post_subject\": \"Re: Examples.Sentilyze.NaiveBayes.Classify error\", \"username\": \"tlhumphrey2\", \"post_text\": \"Your ML folder should look like the following. You should see FieldAggregates in it.\", \"post_time\": \"2016-02-02 20:47:54\" },\n\t{ \"post_id\": 9196, \"topic_id\": 2150, \"forum_id\": 23, \"post_subject\": \"Re: Examples.Sentilyze.NaiveBayes.Classify error\", \"username\": \"balajisampath\", \"post_text\": \"I tried the same but didn't post the complete code.\\nNow just tried your code and got the same error\\n[attachment=0:fxzf3i89]error.jpg\", \"post_time\": \"2016-02-02 19:55:03\" },\n\t{ \"post_id\": 9192, \"topic_id\": 2150, \"forum_id\": 23, \"post_subject\": \"Re: Examples.Sentilyze.NaiveBayes.Classify error\", \"username\": \"tlhumphrey2\", \"post_text\": \"Your attempt to call Examples.Sentilyze.NaiveBayes.Classify, should look something like the following:\\nIMPORT ML;\\nIMPORT Examples.Sentilyze AS Sentilyze;\\nd:=DATASET('~gilbch::sentilyze::trainer::english',ML.Docs.Types.Raw,THOR);\\nSentilyze.NaiveBayes.Classify(d);\\n
\", \"post_time\": \"2016-02-02 19:24:51\" },\n\t{ \"post_id\": 9184, \"topic_id\": 2150, \"forum_id\": 23, \"post_subject\": \"Examples.Sentilyze.NaiveBayes.Classify error\", \"username\": \"balajisampath\", \"post_text\": \"Getting error when I try to run Classification example\\nExamples.Sentilyze.NaiveBayes.Classify(<dataset>);\\n\\nError: Unknown identifier "FieldAggregates" (3, 14), 2167, \\n\\nLooks like there is a circular reference\\nML.Classify calls ML.Discretize-> ML.FieldAggregates-> ML.Correlate-> ML.FieldAggregates\\n\\nNot sure if I am missing something.\", \"post_time\": \"2016-02-02 17:00:46\" },\n\t{ \"post_id\": 9532, \"topic_id\": 2258, \"forum_id\": 23, \"post_subject\": \"Random forest implementation\", \"username\": \"jcma\", \"post_text\": \"Hi,\\n\\nThis question may be better suited to someone in technology, but I was wondering about the feasibility of doing the following. We are working on a project in Analytics that seems very computationally demanding. Is this something we could potentially do in production or is this just too much? \\n\\nHere are the parameters:\\n- Each person will have about 100 data points that will need to be refreshed periodically, every month.\\n- We need to fit one random forest model per person.\\n- There are 50,000+ users and thus 50,000+ models.\\n- We need to score future data points based on the tree model for each user.\\n- We need to both train the model in production and also score the model in production.\\n\\n\\nThank you.\", \"post_time\": \"2016-04-14 16:09:32\" },\n\t{ \"post_id\": 9724, \"topic_id\": 2308, \"forum_id\": 23, \"post_subject\": \"convolutional neural network (CNN) in ML package\", \"username\": \"elaheh\", \"post_text\": \"Hi,\\nI couldn't find any implementation of Convolutional Neural Network in ML package. Does anybody has any idea whether implementation of CNN is available for HPCC systems or not?\\nAnd If it is not available yet, where is the good point to start its implementation (e.g. RBM)?\\nThanks\", \"post_time\": \"2016-06-09 20:25:27\" },\n\t{ \"post_id\": 9788, \"topic_id\": 2322, \"forum_id\": 23, \"post_subject\": \"Re: CNN (convolutional neural network) in ML package\", \"username\": \"tlhumphrey2\", \"post_text\": \"No, we don't. The neural network algorithms we have are backpropagation in NeuralNetworks, deep learning in DeepLearning, and perceptron in Classify.\", \"post_time\": \"2016-06-16 12:23:01\" },\n\t{ \"post_id\": 9748, \"topic_id\": 2322, \"forum_id\": 23, \"post_subject\": \"CNN (convolutional neural network) in ML package\", \"username\": \"elaheh\", \"post_text\": \"Hi guys\\nIs CNN available in Machine Learning package? I have seen Neural Networks are available, but I couldn't find the CNN.\\nThank you all\", \"post_time\": \"2016-06-13 20:17:13\" },\n\t{ \"post_id\": 11003, \"topic_id\": 2613, \"forum_id\": 23, \"post_subject\": \"Re: Naive Bayes Algorithm - Challenges in Prediction\", \"username\": \"tlhumphrey2\", \"post_text\": \"yes\", \"post_time\": \"2016-08-24 12:25:15\" },\n\t{ \"post_id\": 10993, \"topic_id\": 2613, \"forum_id\": 23, \"post_subject\": \"Re: Naive Bayes Algorithm - Challenges in Prediction\", \"username\": \"kps_mani\", \"post_text\": \"I have downloaded them as CSV file and I believe that you will be still able to load them in the Dataset and use it directly for ML with required transformation. Do you still need ECL code?\\n\\nRegards,\\nSubbu\", \"post_time\": \"2016-08-23 20:49:50\" },\n\t{ \"post_id\": 10983, \"topic_id\": 2613, \"forum_id\": 23, \"post_subject\": \"Re: Naive Bayes Algorithm - Challenges in Prediction\", \"username\": \"tlhumphrey2\", \"post_text\": \"The 2 files you attached aren't in the format needed for input into the ML classifiers. Attach you ecl code, I will modify it so the dependent and independent data is in the correct format and then attach the changed code to my next post.\", \"post_time\": \"2016-08-23 20:44:48\" },\n\t{ \"post_id\": 10973, \"topic_id\": 2613, \"forum_id\": 23, \"post_subject\": \"Re: Naive Bayes Algorithm - Challenges in Prediction\", \"username\": \"kps_mani\", \"post_text\": \"I have attached the Dependent and Independent Variables which are used for training dataset for your reference. Can you please look at it and comment on why classification is not working properly for Classification Id 10 & 20?\\n\\nRegards,\\nSubbu\", \"post_time\": \"2016-08-23 20:34:11\" },\n\t{ \"post_id\": 10963, \"topic_id\": 2613, \"forum_id\": 23, \"post_subject\": \"Re: Naive Bayes Algorithm - Challenges in Prediction\", \"username\": \"kps_mani\", \"post_text\": \"Hi,\\nHere is the outcome of the Model with the training data set for the classification id (10, 20, 30)..\\n\\nclassfier c_actual c_modeled cnt \\n1\\t10\\t10\\t33\\n1\\t10\\t20\\t566\\n1\\t10\\t30\\t30\\n1\\t20\\t10\\t37\\n1\\t20\\t20\\t1660\\n1\\t20\\t30\\t84\\n1\\t30\\t10\\t38\\n1\\t30\\t20\\t49\\n1\\t30\\t30\\t2196\\n\\nclassifier c_modeled precision\\n1\\t10\\t30.55555555555556\\n1\\t20\\t72.96703296703296\\n1\\t30\\t95.06493506493507\\n\\nclassifier accuracy\\n1\\t82.86810142765822\\n\\nWhy am I not seeing better precision and modeling for Classification Id 10 & 20 whereas I see better precision and modeling for Classification Id 30? Any idea or suggestions?\\n\\nRegards,\\nSubbu\", \"post_time\": \"2016-08-23 20:17:28\" },\n\t{ \"post_id\": 10953, \"topic_id\": 2613, \"forum_id\": 23, \"post_subject\": \"Re: Naive Bayes Algorithm - Challenges in Prediction\", \"username\": \"tlhumphrey2\", \"post_text\": \"The following shows a tree diagram of ecl-ml. Notice where the folder Explanatory is at. Naive_Bayes.ecl is there.\\n+---docs\\n| \\\\---images\\n+---Examples\\n| \\\\---Sentilyze\\n| +---KeywordCount\\n| \\\\---NaiveBayes\\n+---ML\\n| +---DMat\\n| +---Docs\\n| +---LDA\\n| +---Mat\\n| +---Regression\\n| | +---Dense\\n| | \\\\---Sparse\\n| +---StepRegression\\n| +---StepwiseLogistic\\n| +---SVM\\n| | \\\\---LibSVM\\n| | \\\\---Test\\n| \\\\---Tests\\n| +---Benchmarks\\n| +---Deprecated\\n| +---Explanatory\\n| \\\\---Validation\\n+---PBblas\\n| +---BLAS\\n| +---Block\\n| +---LAPACK\\n| \\\\---Tests\\n+---TS\\n| \\\\---Demo\\n\\\\---VL\\n \\\\---XSLT
\", \"post_time\": \"2016-08-23 18:52:20\" },\n\t{ \"post_id\": 10943, \"topic_id\": 2613, \"forum_id\": 23, \"post_subject\": \"Re: Naive Bayes Algorithm - Challenges in Prediction\", \"username\": \"tlhumphrey2\", \"post_text\": \"I shortened the example code, ML.Tests.Explanatory.NaiveBayes.ecl, to the following:\\n\\nIMPORT ML;\\n//NaiveBayes classifier\\ntrainer:= ML.Classify.NaiveBayes();\\n\\n// Monk Dataset - Discrete dataset 124 instances x 6 attributes + class\\nMonkData:= ML.Tests.Explanatory.MonkDS.Train_Data;\\nML.ToField(MonkData, fullmds, id);\\nfull_mds:=PROJECT(fullmds, TRANSFORM(ML.Types.DiscreteField, SELF:= LEFT));\\nindepDataD:= full_mds(number>1);\\ndepDataD := full_mds(number=1);\\n// Learning Phase\\nD_Model:= trainer.LearnD(indepDataD, depDataD);\\ndmodel:= trainer.Model(D_model);\\n// Classification Phase\\nD_classDist:= trainer.ClassProbDistribD(indepDataD, D_Model); // Class Probalility Distribution\\nD_results:= trainer.ClassifyD(indepDataD, D_Model);\\nOUTPUT(D_results);\\n// Performance Metrics\\nD_compare:= ML.Classify.Compare(depDataD, D_results); // Comparing results with original class\\nOUTPUT(D_compare);
\\n\\nThe last line of this code is "OUTPUT(D_compare);". It outputs statistics that show the accuracy of the predicted vs the training set's dependent dataset. You should notice that the accuracy is around 80%.\", \"post_time\": \"2016-08-23 15:49:49\" },\n\t{ \"post_id\": 10933, \"topic_id\": 2613, \"forum_id\": 23, \"post_subject\": \"Re: Naive Bayes Algorithm - Challenges in Prediction\", \"username\": \"kps_mani\", \"post_text\": \"Hi,\\nI could not see the ML.Tests.Explanatory.Naive_Bayes.ecl in the ML Beta version of Library. Can you please let me know if you have latest version of ML?\\n\\nRegards,\\nSubbu\", \"post_time\": \"2016-08-23 15:21:47\" },\n\t{ \"post_id\": 10923, \"topic_id\": 2613, \"forum_id\": 23, \"post_subject\": \"Re: Naive Bayes Algorithm - Challenges in Prediction\", \"username\": \"tlhumphrey2\", \"post_text\": \"Before using your own data, I would run the example, ML.Tests.Explanatory.Naive_Bayes.ecl. And, make sure the model produced does a good job classifying the training set (when I ran it, it did).\\n\\nThen, use the example as a guide to setting up your code to train and classify your own data.\", \"post_time\": \"2016-08-23 14:59:08\" },\n\t{ \"post_id\": 10623, \"topic_id\": 2613, \"forum_id\": 23, \"post_subject\": \"Naive Bayes Algorithm - Challenges in Prediction\", \"username\": \"kps_mani\", \"post_text\": \"Hi,\\nI have used the ML module to implement the Naive Bayes algorithm for the Classification problem. I have created the Independent Variables (in my case, Word bag) and Dependent variable (in my case, Classification id). It is similar to Sentiment analysis. I have created a training dataset (around 10K records) with 3 classification id. I have created the model with the training dataset (Independent and Dependent variables). After creating the Model, I have executed against the original dataset (with 400K records which includes the Training dataset as well). I was expecting the Training Dataset information should have been classified correctly. However, when I looked at the results, my training dataset records were not classified correctly. How do I resolve this issue and improve the accuracy of Predicted Classification?\\n\\nRegards,\\nSubbu\", \"post_time\": \"2016-08-15 22:04:26\" },\n\t{ \"post_id\": 21933, \"topic_id\": 4913, \"forum_id\": 23, \"post_subject\": \"Re: Random Seed Persistence\", \"username\": \"JamesHolmes\", \"post_text\": \"Thank you for sharing useful thread.\", \"post_time\": \"2018-05-17 10:51:24\" },\n\t{ \"post_id\": 19463, \"topic_id\": 4913, \"forum_id\": 23, \"post_subject\": \"Re: Random Seed Persistence\", \"username\": \"john holt\", \"post_text\": \"The random seed was not reused, rather you only computed one dataset of random values instead of the three datasets you wanted.\\n\\nIt is useful to recall that the ECL language statements are definitions and that there are action statements that cause the computation to occur.\\n\\nIf you examine the graph, you will see that the 3 "GenData" definitions have been recognized as the same definition. Since x1, x2, and x3 definitions are the same, the x1, x2, and x3 actions (and the AVE actions) can use the common definitions.\\n\\nWhen I look at the graph generated on a 6.2.22 version platform, I see two sub-graphs. The first has activities 2-9 and correspond to the Distribution definition. This sub-graph produces 2 values, a 10,000 record dataset and a single number; and these are used by the GenData definitions.\\n\\nThe second sub-graph has activities 11-30, and includes the 6 output actions.\\n\\nIf I change the definitions to be:\\nx1 := ML.Distribution.GenData(100,a1,1);\\nx2 := ML.Distribution.GenData(101,a1,1);\\nx3 := ML.Distribution.GenData(102,a1,1);\\nI get 4 sub-graphs. The first sub-graph is the same, and the second sub-graph has activities 11-26 which writes the x1 dataset and the x1 dataset average. The 3rd and 4th are for the x2 and x3 respectively.\\n\\nNote that you can use CHOOSEN(...) to make the definitions produce three datasets with 100 records each:\\nx1 := ML.Distribution.GenData(100,a1,1);\\nx2 := CHOOSEN(ML.Distribution.GenData(101,a1,1), 100);\\nx3 := CHOOSEN(ML.Distribution.GenData(102,a1,1), 100);\\n\\n\\nNow the "GenData" definitions are all different, so you will get 3 different random sequences.\\n\\nBest,\", \"post_time\": \"2017-10-12 21:17:05\" },\n\t{ \"post_id\": 19433, \"topic_id\": 4913, \"forum_id\": 23, \"post_subject\": \"Random Seed Persistence\", \"username\": \"tlitherland\", \"post_text\": \"In testing out the Machine Learning functionalities of HPCC, I encounter an issue one might term “The Persistence of Random Seeds.” Whereas the usual problem is how to initialize random seeds for reproducibility purposes from run to run, I’m having the opposite problem of a given WorkUnit’s random generation calls all giving me the same results. \\n\\nFor example, the following code is intended to create 3 standard normal random samples of 100 observations each (x1, x2, and x3). However, I find that all three of these results are in fact identical. (They do differ from run to run – just not among themselves within a given WorkUnit.)\\n\\nIMPORT * FROM ML;\\nIMPORT * FROM ML.Cluster;\\nIMPORT * FROM ML.Types;\\n\\na1 := ML.Distribution.Normal(0.0,1,10000);\\n\\nx1 := ML.Distribution.GenData(100,a1,1);\\nx2 := ML.Distribution.GenData(100,a1,1);\\nx3 := ML.Distribution.GenData(100,a1,1);\\n\\nx1;\\nx2;\\nx3;\\n\\nave(x1,value);\\nave(x2,value);\\nave(x3,value);\\n\\nThe issue appears to be fundamental in that if I instead select three different mean and standard deviations pairs, the x1, x2, and x3 are precisely translates/rescalings of one another, i.e., they still apparently rely on a common underlying random number stream.\\n\\nAny help is appreciated.\", \"post_time\": \"2017-10-12 15:07:51\" },\n\t{ \"post_id\": 20833, \"topic_id\": 5223, \"forum_id\": 23, \"post_subject\": \"Re: Regression tutorial\", \"username\": \"lpezet\", \"post_text\": \"Hi John!\\n\\nSo I checked and looks like SAS calculates the F statistics for each independent variable at each step when using the forward method. So it's a different approach.\\n\\nI found another example of step regression and reported here (contains ECL code too to reproduce):\\nhttps://github.com/lpezet/hpcc_vs_sas/t ... Prediction\\n\\nNow I'm inclined to look into ForwardRegression.ecl and implement the p-value way just for fun \\n\\nThanks.\", \"post_time\": \"2018-02-17 03:03:35\" },\n\t{ \"post_id\": 20473, \"topic_id\": 5223, \"forum_id\": 23, \"post_subject\": \"Re: Regression tutorial\", \"username\": \"john holt\", \"post_text\": \"I have not compared the version in the ecl-ml repository to SAS. I would expect some differences with something like a step-wise procedure because there are several choices for the criteria to use for selecting the best variable to add in a step and there are several different approaches to stepwise. The attributes in ML/StepRegression use AIC to select the best variable.\\n\\nThere are several analytic techniques that have been re-built (such as Multiple Linear Regression and Random Forests) as supported bundles. The bundle documentation describes which external implementations were used in a validation role and in a performance comparison role.\\n\\nAssuming that the SAS implementation of Stepwise is using AIC, and your SAS model is using one of the three forms of step-wise that are provided in the ecl-ml repository, I would be interested in looking at the issue. Please create a Jira report describing what you are finding and please provide a link to the data. \\n\\nThanks.\", \"post_time\": \"2018-01-16 13:15:09\" },\n\t{ \"post_id\": 20463, \"topic_id\": 5223, \"forum_id\": 23, \"post_subject\": \"Regression tutorial\", \"username\": \"lpezet\", \"post_text\": \"Dear Awesome HPCC Team,\\n\\nI was in the middle of writing a tutorial on how to use the ML library for modeling (regression analysis) when something turned up.\\nNow I'm not even remotely close to being a figment of authority in either SAS nor Modeling/Regression Analysis, so I'm porting over a SAS(TM) tutorial instead.\\nHere's the link:\\nhttps://stats.idre.ucla.edu/sas/webbook ... egression/\\n\\nMy goal is 2-fold:\\n
\\n- Show concrete tutorial with multiple technics (methods) to understand data and create a model\\n- Compare HPCC ML vs. SAS (and shows how to replicate what SAS does in ECL)\\n
\\n\\nMy problem is that I'm having a hard time replicating SAS results.\\nI do get the same results in simple regression between api00 and enroll (using elemapi2, the cleaned up data file) for example.\\nBut the parameter acs_k3 (average class size in kindergarten through 3rd grade) is negative in SAS, yet I end up with a very positive one (~16.7 with ML and -0.71 in SAS) when running the model of "api00 = acs_k3 meals full".\\n\\nMy concern here is that, if I let ML run a step regression to find the best model, how do I know HPCC will find the best one (like SAS would for example, not in absolute)?\\nI tried it using ML.StepRegression.ForwardRegression
and SAS and HPCC find 2 different sets of independent variables for api00.\\n\\nHas anyone done some comparison between SAS and ML? This would help me understand the differences (I don't know much about SAS and documentation is pretty opaque too).\\nAny similar work than what I'm trying to do here already out there?\\n\\n\\nThanks!\", \"post_time\": \"2018-01-15 20:30:39\" },\n\t{ \"post_id\": 20643, \"topic_id\": 5243, \"forum_id\": 23, \"post_subject\": \"Re: Matrix Multiply Embedded Python and GPU\", \"username\": \"tlhumphrey2\", \"post_text\": \"Richard,\\n\\nI like your code. It does everything I wanted to do inside MakeMatrix. And, it is much shorter than my code.\\n\\nI did notice one odd behavior of the code -- matrices A and B have the same values. Again, I don't understand this behavior. I entered a jira asking why (HPCC-19025).\\n\\nTim\", \"post_time\": \"2018-01-29 21:10:36\" },\n\t{ \"post_id\": 20633, \"topic_id\": 5243, \"forum_id\": 23, \"post_subject\": \"Re: Matrix Multiply Embedded Python and GPU\", \"username\": \"rtaylor\", \"post_text\": \"Tim,\\n\\nI'm not entirely sure why your code was getting the same row repeated each time, but here's my version of your code that DOES generate a different row for each record.\\n\\nMakeMatrix(UNSIGNED NRows, UNSIGNED NCols) := FUNCTION\\n rec0 := {REAL cell};\\n matrec := {SET OF REAL arow};\\n REAL RealRandom := RANDOM()/4294967295;\\n RealDS := DATASET(NRows*NCols,TRANSFORM(rec0,SELF.cell := RealRandom));\\n\\n MatRec DS2Set(INTEGER C) := TRANSFORM\\n EndRec := C*NCols; \\n BegRec := (EndRec - NCols) + 1; \\n SELF.arow := SET(RealDS[BegRec..EndRec],cell);\\n END;\\n RETURN DATASET(NRows,DS2Set(COUNTER));\\nEND;\\n\\nNRows1:= 10;\\nNCols1:= 20;\\nNRows2:= NCols1;\\nNCols2:= NRows1;\\n\\nA:=MakeMatrix(NRows1, NCols1);\\nOUTPUT(A,,'tlh::AMatrix',OVERWRITE);\\n\\nB:=MakeMatrix(NRows2, NCols2);\\nOUTPUT(B,,'tlh::BMatrix',OVERWRITE);
Note that I'm just generating all the cells in one pass, then just splitting them into their separate recs.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-01-29 20:56:15\" },\n\t{ \"post_id\": 20623, \"topic_id\": 5243, \"forum_id\": 23, \"post_subject\": \"Re: Matrix Multiply Embedded Python and GPU\", \"username\": \"tlhumphrey2\", \"post_text\": \"Richard,\\n\\nNo, numbers in different rows should not be the same. I'm surprised they are.\\n\\nCould the problem be the NORMALIZE I have in the transform, genMatrix, of a 2nd NORMALIZE? Plus, maybe I'm thinking procedurally instead of declaratively???\\n\\nTim\", \"post_time\": \"2018-01-29 16:21:26\" },\n\t{ \"post_id\": 20613, \"topic_id\": 5243, \"forum_id\": 23, \"post_subject\": \"Re: Matrix Multiply Embedded Python and GPU\", \"username\": \"rtaylor\", \"post_text\": \"Tim,\\n\\nI ran your MakeMatrix() code (changing to 10 rows and 20 columns) and saw that each row ends up with exactly the same data. Is that intentional? \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-01-29 15:22:33\" },\n\t{ \"post_id\": 20593, \"topic_id\": 5243, \"forum_id\": 23, \"post_subject\": \"Matrix Multiply Embedded Python and GPU\", \"username\": \"tlhumphrey2\", \"post_text\": \"Does anyone know a better data structure for matrices being passed to embedded python where a GPU will multiply them? By better I mean the conversion to a GPU format will take the least amount of time.\\n\\nIn the following implementation, it takes the embedded python code approximately 21 seconds to convert to GPU format the 2 passed matrices that are 10,000 by 20,000 and 20,000 by 10,000. But, it only takes the GPU approximately 3 1/3 seconds to multiply the 2 matrices once the matrices are converted.\\n\\nIn the following implementation, the embedded python code computes the amount of time is takes to a) convert the 2 matrices to GPU format; b) do matrix multiplication, and c) convert the resultant matrix to a ecl dataset format.\\nThe embedded python code puts the times in a file called TimeOfMatrixMultiply.txt. The directory where the file is stored is /var/lib/HPCCSystms/myeclagent.\\n\\n(NOTE: All the ecl code I show below was ran on hthor.)\\n\\nNOTE:I deployed my hpcc system on AWS using a g2.2xlarge instance type, which comes with a GPU. Also in region us-west-2, I used the AMI, ami-638c1e03, which comes with all the software you will need, except hpcc.\\n\\nimport python;\\n\\nmatrec := RECORD\\n SET OF REAL arow;\\nEND;\\n\\nDATASET(matrec) MatrixMultiply(DATASET(matrec) A, unsigned nrowsA, unsigned ncolsA,DATASET(matrec) B, unsigned nrowsB, unsigned ncolsB) := embed(Python)\\n import numpy as np\\n import re\\n import cudamat as cm\\n import gnumpy as gp\\n\\n import sys\\n sys.stdout=open('TimeOfMatrixMultiply.txt','w')\\n \\n def ECLDataset2NPArray(s,r,c):\\n zarray=np.empty([r,c],dtype=float)\\n i=0\\n for row in s:\\n zarray[i]=np.asarray(row)\\n i+=1\\n return zarray\\n \\n from timeit import default_timer as timer\\n\\n start1 = timer()\\n Aarray=ECLDataset2NPArray(A,nrowsA,ncolsA)\\n Barray=ECLDataset2NPArray(B,nrowsB,ncolsB)\\n cm.cublas_init()\\n cm.CUDAMatrix.init_random()\\n A_cm = gp.garray(Aarray)\\n B_cm = gp.garray(Barray)\\n ecl2cm_time=timer() - start1\\n print("Time to just convert 2 ecl matrices (%d X %d) and (%d X %d) to cm was %f seconds" % (nrowsA,ncolsA,nrowsB,ncolsB,ecl2cm_time))\\n\\n time2=timer()\\n C_cm = A_cm.dot(B_cm)\\n mm_time=timer() - time2\\n print("Just GPU matrix multiple of 2 matrices (%d X %d) and (%d X %d) took %f seconds" % (nrowsA,ncolsA,nrowsB,ncolsB,mm_time))\\n\\n time3=timer()\\n cm.cublas_shutdown()\\n Carray=gp.as_numpy_array(C_cm)\\n ecl_Carray=Carray.tolist()\\n cm2ecl_time=timer() - time3\\n print("Time to just convert 2 cm matrices (%d X %d) and (%d X %d) to ecl was %f seconds" % (nrowsA,ncolsA,nrowsB,ncolsB,cm2ecl_time))\\n return ecl_Carray\\n\\nendembed;\\n\\n//The A and B matrices (datasets) were created by "make2MatricesAndStoreOnDisk_b.ecl".\\nA:=DATASET('~hthor::tlh::AMatrix',matrec,THOR);\\nB:=DATASET('~hthor::tlh::BMatrix',matrec,THOR);\\nNRowsA:=COUNT(A);\\nNColsA:=COUNT(A[1].arow);\\nNRowsB:=COUNT(B);\\nNColsB:=COUNT(B[1].arow);\\n\\nMM:=MatrixMultiply(A,NRowsA,NColsA,B,NRowsB,NColsB);\\n\\nOUTPUT(MM,,'tlh::AMatrixAndBMatrixProduct',OVERWRITE)\\n
\\nThe following embedded python will get the timings.\\nimport python;\\n\\ntimings := RECORD\\n STRING line;\\nEND;\\n\\nDATASET(timings) readTimings() := EMBED(python)\\n timings = open('/var/lib/HPCCSystems/myeclagent/TimeOfMatrixMultiply.txt').readlines()\\n return timings\\nENDEMBED;\\n\\nOUTPUT(readTimings());\\n
\\nThe following ecl code makes the 2 matrices that are multiplied.\\nmatrec := RECORD\\n SET OF REAL arow;\\nEND;\\n\\nMakeMatrix(UNSIGNED NRows, UNSIGNED NCols) := FUNCTION\\n \\n REAL RealRandom() := FUNCTION\\n r:= RANDOM()/4294967295;\\n return r;\\n END;\\n\\n rec0 := RECORD\\n REAL cell;\\n END;\\n\\t\\n rec1 := RECORD\\n DATASET(rec0) arow;\\n END;\\n\\n rec1 genMatrix(rec1 r, UNSIGNED row_num, UNSIGNED ncols) := TRANSFORM\\n SELF.arow:=NORMALIZE(DATASET([{0}],rec0), ncols, TRANSFORM(rec0,SELF.cell:=RealRandom()));\\n END;\\n\\n rec1DS:=NORMALIZE(DATASET([{DATASET([{0}],rec0)}],rec1),NRows,genMatrix(LEFT,COUNTER,NCols));\\n\\n matrec Dataset2Set(rec1 L) := TRANSFORM\\n SELF.arow := SET(L.arow,cell);\\n END;\\n \\n RETURN PROJECT(rec1DS,Dataset2Set(LEFT));\\nEND;\\n\\nNRows1:= 10000;\\nNCols1:= 20000;\\nA:=MakeMatrix(NRows1, NCols1);\\nOUTPUT(A,,'tlh::AMatrix',OVERWRITE);\\n\\nNRows2:= NCols1;\\nNCols2:= NRows1;\\nB:=MakeMatrix(NRows2, NCols2);\\nOUTPUT(B,,'tlh::BMatrix',OVERWRITE);\\n
\", \"post_time\": \"2018-01-26 15:44:36\" },\n\t{ \"post_id\": 21583, \"topic_id\": 5473, \"forum_id\": 23, \"post_subject\": \"Re: Installing ML libraries on Ubuntu 16.04\", \"username\": \"rtaylor\", \"post_text\": \"rsghatpa, It asks us to download the ML Library from the following page : https://hpccsystems.com/ml. However, that page no longer exists.
You can download the ML repository code from the link on this page: \\nhttps://hpccsystems.com/download/free-modules/ecl-ml\\n\\nThis link will download the hpcc-systems-ecl-ml.zip file, which you then need to extract into your ECL Repository directory structure.Could someone share the process of downloading and installing HPCC ML libraries in Linux? And then compiling some ML ECL code? (Without ECL IDE support)
I would suggest you first use the ECL IDE (or Eclipse or VS Code, whichever IDE "floats your boat") to try running some examples. Only after that should you try doing a standalone compile. IOW, first you make it work, THEN you make it work the way you want it to. That way you're only trying to solve one problem at a time.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-04-10 14:37:18\" },\n\t{ \"post_id\": 21573, \"topic_id\": 5473, \"forum_id\": 23, \"post_subject\": \"Re: Installing ML libraries on Ubuntu 16.04\", \"username\": \"rsghatpa\", \"post_text\": \"I am trying :\\n eclcc -I /home/ubuntu/ecl-ml/ /home/ubuntu/ecl-ml/ML/Tests/Explanatory/Naive_Bayes.ecl\\n\\nError: Include paths -I '/home/ubuntu' and '/home/ubuntu/ecl-ml' overlap\\n\\nHow to resolve the same?\", \"post_time\": \"2018-04-10 14:01:45\" },\n\t{ \"post_id\": 21553, \"topic_id\": 5473, \"forum_id\": 23, \"post_subject\": \"Installing ML libraries on Ubuntu 16.04\", \"username\": \"rsghatpa\", \"post_text\": \"I have cloned the following project : \\nhttps://github.com/hpcc-systems/ecl-ml\\n\\nI tried compiling the following code as follows :\\n\\neclcc ~/ecl-ml/blob/master/ML/Tests/Explanatory/Naive_Bayes.ecl\\n\\nwhich gives an error log :\\nNaive_Bayes.ecl(1,14): error C2081: Import names unknown module "ML"\\n\\n\\nI have been following the associated Machine Learning guide provided by HPCC : https://github.com/hpcc-systems/ecl-ml/ ... arning.pdf for installing ML libraries used in the above examples. It asks us to download the ML Library from the following page : https://hpccsystems.com/ml. However, that page no longer exists.\\n\\nCould someone share the process of downloading and installing HPCC ML libraries in Linux? And then compiling some ML ECL code? (Without ECL IDE support)\", \"post_time\": \"2018-04-09 21:48:52\" },\n\t{ \"post_id\": 22933, \"topic_id\": 5883, \"forum_id\": 23, \"post_subject\": \"Re: Converting from old ML library to new ML_Core one\", \"username\": \"lpezet\", \"post_text\": \"Oh wow. That was it.\\nAs you said, I basically TABLEd my data to filter the columns I needed between independent and dependent variables into 2 separate datasets before using ML_Core.ToField() and now it works.\\n\\nThanks a lot Tim!\", \"post_time\": \"2018-09-11 14:48:30\" },\n\t{ \"post_id\": 22923, \"topic_id\": 5883, \"forum_id\": 23, \"post_subject\": \"Re: Converting from old ML library to new ML_Core one\", \"username\": \"tlhumphrey2\", \"post_text\": \"I believe there must be a difference between ML and ML_Core because you didn't have this problem with ML.\\n\\nIn ML_Core, OLS uses PBblas to multiply matrices and since your X basically looks like the following, where all elements of the 3rd column (Oxygen) are missing:\\n44, 89.47, , 11.37, 62, 178, 182\\n40, 75.07, , 10.07, 62, 185, 185\\n44, 85.84, , 8.65, 45, 156, 168\\n42, 68.15, , 8.17, 40, 166, 172\\n38, 89.02, , 9.22, 55, 178, 180\\n47, 77.45, , 11.63, 58, 176, 176\\n40, 75.98, , 11.95, 70, 176, 180\\n43, 81.19, , 10.85, 64, 162, 170\\n44, 81.42, , 13.08, 63, 174, 176\\n38, 81.87, , 8.63, 48, 170, 186\\n44, 73.03, , 10.13, 45, 168, 168\\n45, 87.66, , 14.03, 56, 186, 192\\n45, 66.45, , 11.12, 51, 176, 176\\n47, 79.15, , 10.60, 47, 162, 164\\n54, 83.12, , 10.33, 50, 166, 170\\n49, 81.42, , 8.95, 44, 180, 185\\n51, 69.63, , 10.95, 57, 168, 172\\n51, 77.91, , 10.00, 48, 162, 168\\n48, 91.63, , 10.25, 48, 162, 164\\n49, 73.37, , 10.08, 67, 168, 168\\n57, 73.37, , 12.63, 58, 174, 176\\n54, 79.38, , 11.17, 62, 156, 165\\n52, 76.32, , 9.63, 48, 164, 166\\n50, 70.87, , 8.92, 48, 146, 155\\n51, 67.25, , 11.08, 48, 172, 172\\n54, 91.63, , 12.88, 44, 168, 172\\n51, 73.71, , 10.47, 59, 186, 188\\n57, 59.08, , 9.93, 49, 148, 155\\n49, 76.32, , 9.40, 56, 186, 188\\n48, 61.24, , 11.50, 52, 170, 176\\n52, 82.78, , 10.50, 53, 170, 172
\\n\\nPBblas treats missing elements as zeros. So to it, column 3 contains all zeros which makes X NOT positive definite.\\n\\nOne way to get around this problem is to break oRawData into oRawX and oRawY before you use ToField to convert them to a NumericField dataset.\\n\\nRoger Dev will have to give you more details. He is the expert.\", \"post_time\": \"2018-09-11 12:46:26\" },\n\t{ \"post_id\": 22913, \"topic_id\": 5883, \"forum_id\": 23, \"post_subject\": \"Re: Converting from old ML library to new ML_Core one\", \"username\": \"lpezet\", \"post_text\": \"Hi Tim!\\n\\nI'm not sure I understand. X and Y are simply the data for my independent and dependent variables respectively. X will just have data for Age (1), Weight (2), RunTime (4), RunPulse (5), RestPulse (6), and MaxPulse (7). And my dependent variable is Oxygen (3).\\n\\nIs there something different then in that regard between ECL-ML and ML_Core?\\n\\n\\nThanks!\", \"post_time\": \"2018-09-10 23:56:14\" },\n\t{ \"post_id\": 22903, \"topic_id\": 5883, \"forum_id\": 23, \"post_subject\": \"Re: Converting from old ML library to new ML_Core one\", \"username\": \"tlhumphrey2\", \"post_text\": \"Your problem is caused by this line of code:\\nX := oFields( Number IN [ 1, 2, 4, 6, 5, 7 ] );\\n
\\n\\nBasically, the above line of code creates a sparse matrix where all elements of the 3rd column are zero. Why? Because missing elements are considered to be elements with value zero and all the elements of the 3rd column are missing in X. \\n\\nSo, you have created a matrix that is NOT positive definite.\", \"post_time\": \"2018-09-10 19:13:57\" },\n\t{ \"post_id\": 22893, \"topic_id\": 5883, \"forum_id\": 23, \"post_subject\": \"Re: Converting from old ML library to new ML_Core one\", \"username\": \"tlhumphrey2\", \"post_text\": \"lpezet,\\n\\nI found LinearRegression.\\n\\nTim\", \"post_time\": \"2018-09-10 14:59:11\" },\n\t{ \"post_id\": 22883, \"topic_id\": 5883, \"forum_id\": 23, \"post_subject\": \"Re: Converting from old ML library to new ML_Core one\", \"username\": \"tlhumphrey2\", \"post_text\": \"lpezet,\\n\\nI'm attempting to duplicate the error you are getting. But, in your MainMLCore.ecl of your github repo, you are IMPORTing LinearRegression. But, I don't see it either in ecl-ml or ML_Core.\\n\\nTim\", \"post_time\": \"2018-09-10 14:49:59\" },\n\t{ \"post_id\": 22863, \"topic_id\": 5883, \"forum_id\": 23, \"post_subject\": \"Converting from old ML library to new ML_Core one\", \"username\": \"lpezet\", \"post_text\": \"Hello!\\n\\nI'm trying to convert my ECL code from using old ML library to the new one (ML_Core and such).\\nI have a very simple code available on github: https://github.com/lpezet/hpcc_vs_sas/tree/master/AerobicFitnessPrediction\\n\\nMain.ecl uses the old ML library, while MainMLCore.ecl is the conversion to ML_Core. \\nThe former works perfectly. With the latter (ML_Core) I'm getting a "Not a positive definite matrix" error.\\n\\nDoes anyone have any hints on how to convert from old ML to ML_Core (like any catches???)? \\nOr maybe something about the "Not a positive definite matrix" error?\\nI do see differences between ML and ML_Core (like the new "work item" field when calling ToField()), but I still can't figure out what's wrong with my code.\\n\\n\\nThanks for the help!\", \"post_time\": \"2018-09-08 02:46:16\" },\n\t{ \"post_id\": 677, \"topic_id\": 179, \"forum_id\": 24, \"post_subject\": \"Welcome to the Cloud Forums\", \"username\": \"HPCC Staff\", \"post_text\": \"This is the place to ask questions about your experiences with AWS and HPCC. Please refer to the documentation at http://hpccsystems.com/community/docs/aws-install-thor for more information on how you can install and use HPCC on AWS.\", \"post_time\": \"2011-11-30 14:47:04\" },\n\t{ \"post_id\": 735, \"topic_id\": 189, \"forum_id\": 24, \"post_subject\": \"Amazon AWS - Single node in cluster fails to start\", \"username\": \"HPCC Staff\", \"post_text\": \"Under some circumstances, Thor will not start if the Thormaster is on the same node as the Dropzone. If Thor fails to start and you have already configured your system, please do the following on the node containing the dropzone:\\n\\nsudo -u hpcc /opt/HPCCSystems/sbin/hpcc-run.sh -a hpcc-init stop \\nsudo chown --recursive hpcc /mnt\\nsudo -u hpcc /opt/HPCCSystems/sbin/hpcc-run.sh -a hpcc-init start\\n\\nIf you have not configured your cluster yet, to avoid this known issue please use the following updated steps instead of the section from the beta1 documentation:\\n\\nIf you are using an Instance Store AMI, as recommended, you must tell the HPCC Systems Thor Platform where your instance storage is located. \\n\\nIn the Configuration Manager browser window:\\n\\n• Press the Advanced View button.\\n• Check the Write Access checkbox at the top to enable write-mode.\\n• Click on the Directories link on the left.\\n• Prepend the following directories with /mnt\\n \\n • temp\\n • data\\n • data2\\n • data3\\n • mirror\\n • query\\n
\\n• Click on the Drop Zone link on the left and change the directory entry to /mnt/mydropzone.\\n• Click on the Save icon at the top.\\n• Close this browser window or tab.\\n\\nIf the situation arises where the thorslave does not start, a restart of the whole system might be needed.\\n\\nPlease refer to the new version of the Beta 2 documentation:\\nhttp://hpccsystems.com/community/docs/aws-install-thor\", \"post_time\": \"2011-12-08 21:51:08\" },\n\t{ \"post_id\": 1096, \"topic_id\": 262, \"forum_id\": 24, \"post_subject\": \"Re: Scripted Deployment\", \"username\": \"HPCC Staff\", \"post_text\": \"Hi Anoop, the code is not available at this time. Thank you for reaching out.\", \"post_time\": \"2012-02-13 13:23:13\" },\n\t{ \"post_id\": 1095, \"topic_id\": 262, \"forum_id\": 24, \"post_subject\": \"Scripted Deployment\", \"username\": \"Anoop Patel\", \"post_text\": \"Hi,\\n\\nIs the code\\\\scripting behind your aws management page available for download? I would like to deploy your technology onto AWS using an aws manager I have already developed.\\n\\nAnoop\", \"post_time\": \"2012-02-12 03:10:40\" },\n\t{ \"post_id\": 1601, \"topic_id\": 357, \"forum_id\": 24, \"post_subject\": \"Re: Using hpccsystems AMI\", \"username\": \"ckaminski\", \"post_text\": \"Hi Eric,\\n\\nYou are correct. The detailed instructions you found are designed to walk you through a configuration process for a Thor Cluster on Amazon from beginning to end. As you mentioned, it assumes you start with a bare-bones Ubuntu AMI. \\n\\nIt sounds like you also found the AMI we use to support our One-Click Thor portal. You can find the One-Click Thor Portal at https://aws.hpccsystems.com \\n\\nThe easiest way to configure a Thor Cluster on Amazon is to use our One-Click Thor portal. You can usually have a single or multi-node cluster provisioned, configured, tested, and running in under 5 minutes. I recommend you try the One-Click Thor portal first before attempting to configure a cluster on your own. You may not even need to do the latter. You can always come back to the PDF when you want more insight into cluster configuration.\\n\\nThe AMI you found is intended to be used with the portal I mentioned and is not configured "out of the box".\\n\\n-Charles\", \"post_time\": \"2012-05-11 20:22:06\" },\n\t{ \"post_id\": 1599, \"topic_id\": 357, \"forum_id\": 24, \"post_subject\": \"Using hpccsystems AMI\", \"username\": \"eric.scott\", \"post_text\": \"I am new to both HPCC and Amazon Web Services, so please excuse the very elementary questions.\\n\\nI'm also accessing the account through a linux laptop (not windows), in case that is relevant.\\n\\nI've managed to launch an instance in AWS using the public hpcc AMI (version 3.6.0). \\n\\nI can connect through a terminal and navigate the shell locally. I was able to run the configuration manager and access it through a browser, per the instructions in <http://cdn.hpccsystems.com/pdf/RunningHPCCwithinAWS_EC2_Beta1.pdf>, but these instructions seem to assume that I'm starting with a bare-bones linux AMI. \\n\\nAm I correct in assuming that the hpcc-systems-community-3.6.0-1-ubuntu-yadda-yadda AMI is already configured?\\n\\nI have more questions, but they all hinge on the answer to the one above.\\n\\nThanks,\", \"post_time\": \"2012-05-11 18:51:44\" },\n\t{ \"post_id\": 2361, \"topic_id\": 487, \"forum_id\": 24, \"post_subject\": \"Re: Hadoop Round Table sign up and learn\", \"username\": \"samisam\", \"post_text\": \"I need to buy this table, how i can brought it in North London?\\n\\n\\n\\nround dining room table\", \"post_time\": \"2012-09-17 11:34:00\" },\n\t{ \"post_id\": 2198, \"topic_id\": 487, \"forum_id\": 24, \"post_subject\": \"Hadoop Round Table sign up and learn\", \"username\": \"jamesb\", \"post_text\": \"Thought this might be interesting to watch and learn about Hadoop, to possibly leverage or find places we can out do Hadoop where Hadoop fails. That is if this round table is worth its weight.\\n\\nhttp://pro.gigaom.com/webinars/mapr-con ... the-cloud/\", \"post_time\": \"2012-08-08 20:24:46\" },\n\t{ \"post_id\": 6729, \"topic_id\": 515, \"forum_id\": 24, \"post_subject\": \"Re: One Click THOR version\", \"username\": \"alison3492\", \"post_text\": \"Thanks for the report, researching testking MB7-702 exam with the documentation and development teams.\\n\\nRegards,\", \"post_time\": \"2014-12-29 18:09:23\" },\n\t{ \"post_id\": 2643, \"topic_id\": 515, \"forum_id\": 24, \"post_subject\": \"Re: One Click THOR version\", \"username\": \"jcoleman\", \"post_text\": \"The AWS one-click site has been updated with the HPCC 3.8.2-2 images. The site has also been re-branded and is now called "HPCC Systems Instant Cloud for AWS".\", \"post_time\": \"2012-10-28 17:55:34\" },\n\t{ \"post_id\": 2329, \"topic_id\": 515, \"forum_id\": 24, \"post_subject\": \"Re: One Click THOR version\", \"username\": \"jcoleman\", \"post_text\": \"The most recent version of the HPCC (3.8.2) will be available on the AWS 1-Click site in the next few days after some internal validation. I will keep you posted. \\n\\nThanks.\\n\\nJack Coleman\", \"post_time\": \"2012-09-12 19:43:30\" },\n\t{ \"post_id\": 2325, \"topic_id\": 515, \"forum_id\": 24, \"post_subject\": \"One Click THOR version\", \"username\": \"curtkohler\", \"post_text\": \"I recently started up a AWS HPCC cluster and noticed it appears to be using version 3.6.0-1. Is there any way to leverage a more recent version of the software in this Beta?\", \"post_time\": \"2012-09-12 14:55:46\" },\n\t{ \"post_id\": 2492, \"topic_id\": 544, \"forum_id\": 24, \"post_subject\": \"Re: Thor on Amazon with on-premise Roxie\", \"username\": \"bforeman\", \"post_text\": \"Once you log into AWS and then Launch Cluster, you should see the entry field for adding any number of Roxie nodes that you need. On the one-click interface they are created together. Rule of thumb is that your THOR nodes should be a multiple of your Roxie nodes, for example if you create 10 THOR nodes, 5 Roxie nodes would be a good choice, and adding 2 support nodes you come up with 17 nodes total created by the one-click.\", \"post_time\": \"2012-10-10 15:34:22\" },\n\t{ \"post_id\": 2491, \"topic_id\": 544, \"forum_id\": 24, \"post_subject\": \"Re: Thor on Amazon with on-premise Roxie\", \"username\": \"arunarav\", \"post_text\": \"Bob: Thanks for the update. Will there be an equivalent of one-click Thor for Roxie? Could you kindly point me to the documentation to get Roxie instances going on AWS?\\n\\nThanks\\n-Arun\", \"post_time\": \"2012-10-10 15:26:04\" },\n\t{ \"post_id\": 2484, \"topic_id\": 544, \"forum_id\": 24, \"post_subject\": \"Re: Thor on Amazon with on-premise Roxie\", \"username\": \"bforeman\", \"post_text\": \"I just did a quick check, and it looks like Roxie nodes are now available on the one-click THOR AWS cluster. That would be your best bet.\\n\\nThe rule of thumb for the ratio of THOR to ROXIE clusters is that it be evenly divisible. Typically in a 400-node production cluster you will see a 100 node Roxie.\\n\\nHope this helps,\\n\\nBob\", \"post_time\": \"2012-10-09 13:04:08\" },\n\t{ \"post_id\": 2483, \"topic_id\": 544, \"forum_id\": 24, \"post_subject\": \"Thor on Amazon with on-premise Roxie\", \"username\": \"arunarav\", \"post_text\": \"We would like to create some proof-of-concepts on varying dataset size (ranging from 300 GB to 10 TB). \\n\\nIn order to get a quick ramp-up, we are looking to leverage the cloud. My understanding is that Thor is available on Amazon AWS but Roxie is not yet available. \\n\\nRequest some insight into the following questions:\\n\\n(1) Is this a feasible option (ie Thor on Amazon with on-premise Roxie?)\\n\\n(2) What are the suggestions around using Thor on Amazon with on-premise Roxie? What network and infrastructure would be required to demonstrate ETL on Thor with low latency querying on Roxie? \\n\\n(3) I read on the blog that a thumb rule for estimating Thor disk space is usually about 3 times the size of the aggregated input data. How should we go about estimating the # of Roxie nodes? \\n\\nThanks\", \"post_time\": \"2012-10-09 12:40:27\" },\n\t{ \"post_id\": 2590, \"topic_id\": 564, \"forum_id\": 24, \"post_subject\": \"Re: Spraying a file takes a long time to initiate on AWS\", \"username\": \"bforeman\", \"post_text\": \"Something doesn't add up with your stats displayed (could it be a typo?)\\n\\nIf it's an 8GB file then :\\n@82618KB/s , is not ~ 9m12s ( more like 96s ) \\n\\nDo you know if the data is being pulled or pushed? I assume pulled since I think that is the default.\\n\\nThere should be log files – e.g., ftslave.log (one for each target node). That should tell you when the process was started to read the data and some progress information. There should be a master log (in the dali log?) that traces what happens on the master node.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-10-22 20:06:05\" },\n\t{ \"post_id\": 2559, \"topic_id\": 564, \"forum_id\": 24, \"post_subject\": \"Spraying a file takes a long time to initiate on AWS\", \"username\": \"arunarav\", \"post_text\": \"Spraying an 8GB file on a 10 node cluster on AWS takes a total of 9 minutes+ but there is a long wait for almost 7 minutes where it stays at 0%. (The actual spraying finishes within 2 minutes once it starts)\\n\\n\\nTime Taken\\t:\\t9m 21s\\nKB Per Second\\t:\\t82618\\nKB Per Second Average\\t:\\t82618\\nProgress Message\\t:\\t100% Done, 0 secs left (987/987MB @82618KB/s) current rate=82618KB/s [10/10nodes]\\nSummary Message\\t:\\tTotal time taken 9m 21s, Average transfer 82618Kb/sec
\\n\\n1. Are there ways of speeding up the initial wait where it stays at 0%?\\n2. Are there any logs we could 'tail' to observe the activity throughout the spraying process?\\n\\nRegards\\nArun\", \"post_time\": \"2012-10-19 12:23:12\" },\n\t{ \"post_id\": 2624, \"topic_id\": 571, \"forum_id\": 24, \"post_subject\": \"Re: Where is spot instance AWS EC2 cluster data user script \", \"username\": \"ideal\", \"post_text\": \"Thanks for your reply. \\n\\nTo be clear, the current link in documentation is : http://s3.amazonaws.com/hpccsystems-ins ... ripts.html\\nIt is located page 24 of document "Running the HPCC Systems Thor Platform within Amazon Web Services" in tutorials topic. \\n\\nThanks,\\nJM.\", \"post_time\": \"2012-10-25 17:18:24\" },\n\t{ \"post_id\": 2609, \"topic_id\": 571, \"forum_id\": 24, \"post_subject\": \"Re: Where is spot instance AWS EC2 cluster data user script \", \"username\": \"jcoleman\", \"post_text\": \"Here is the current link to the user data script.\\n\\nhttps://s3-us-west-2.amazonaws.com/hpcc ... ripts.html\", \"post_time\": \"2012-10-24 13:32:54\" },\n\t{ \"post_id\": 2608, \"topic_id\": 571, \"forum_id\": 24, \"post_subject\": \"Re: Where is spot instance AWS EC2 cluster data user script \", \"username\": \"bforeman\", \"post_text\": \"Thanks for the report, researching with the documentation and development teams.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-10-24 12:29:17\" },\n\t{ \"post_id\": 2588, \"topic_id\": 571, \"forum_id\": 24, \"post_subject\": \"Where is spot instance AWS EC2 cluster data user script ?\", \"username\": \"ideal\", \"post_text\": \"Hello,\\n\\nIn documentation, there is a manual procedure describing how to install HPCC spot instances cluster on amazon EC2. In this paper, there is a link (http://www.google.com/url?q=http%3A%2F%2Fs3.amazonaws.com%2Fhpccsystems-installs%2Fuserdatascripts.html&sa=D&sntz=1&usg=AFQjCNHidPAPwQlrvY_3T2BhMD-HhfzG5A) to a user data script file. \\n\\nThis link is not available any more, since a few weeks.\\n\\nJM.\", \"post_time\": \"2012-10-22 17:29:36\" },\n\t{ \"post_id\": 2626, \"topic_id\": 572, \"forum_id\": 24, \"post_subject\": \"Re: Cluster Compute and GPU AWS AMI\", \"username\": \"HPCC Staff\", \"post_text\": \"We haven’t developed AMIs for the Cluster Compute instances yet as this is a complex topic with lots of options. We are looking at different approaches to create an AMI and will share our results when ready.\\n\\nThank you!\", \"post_time\": \"2012-10-25 18:15:15\" },\n\t{ \"post_id\": 2607, \"topic_id\": 572, \"forum_id\": 24, \"post_subject\": \"Cluster Compute and GPU AWS AMI\", \"username\": \"ideal\", \"post_text\": \"Hello,\\n\\nIs there some Amazon AMI available, with HPCC systems already installed, that could be launched on AWS "cluster compute instances" and "cluster GPU instances" ?\\n\\nThanks,\\nJM.\", \"post_time\": \"2012-10-23 19:16:10\" },\n\t{ \"post_id\": 2690, \"topic_id\": 579, \"forum_id\": 24, \"post_subject\": \"Re: How to optimize elapsed time to read CSV File - Amazon A\", \"username\": \"arunarav\", \"post_text\": \"Any advice or input on this query would be appreciated since we need to explain this behavior.\", \"post_time\": \"2012-11-03 13:38:58\" },\n\t{ \"post_id\": 2654, \"topic_id\": 579, \"forum_id\": 24, \"post_subject\": \"Re: How to optimize elapsed time to read CSV File - Amazon A\", \"username\": \"arunarav\", \"post_text\": \"Bob,\\n\\n> have you looked at the timings in the ECL Watch and tried to identify the process that eats up the time? \\n\\nI've attached a screenshot of ECL Watch in the link below which shows the CSV read as consuming 8+ mins out of the total ~11 minutes. The CSV write operation takes about 3 minutes.\\n\\nhttps://www.dropbox.com/s/tagyzpebruxay ... 20Read.png\\n\\n\\n> which realm are you using?\\n\\nOregon (US West)\\n\\nThanks\\nArun\", \"post_time\": \"2012-10-30 13:21:23\" },\n\t{ \"post_id\": 2653, \"topic_id\": 579, \"forum_id\": 24, \"post_subject\": \"Re: How to optimize elapsed time to read CSV File - Amazon A\", \"username\": \"bforeman\", \"post_text\": \"Also, which realm are you using? US-West uses newer hardware and may produce better results.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-10-30 12:59:05\" },\n\t{ \"post_id\": 2651, \"topic_id\": 579, \"forum_id\": 24, \"post_subject\": \"Re: How to optimize elapsed time to read CSV File - Amazon A\", \"username\": \"bforeman\", \"post_text\": \"Hi Arun,\\n\\nI'm curious, have you looked at the timings in the ECL Watch and tried to identify the process that eats up the time? \\n\\nIf this is simply an OUTPUT of a file to the cluster, I'm not sure that anything can be done and I think that point number 3 is the reason as you stated.\\n\\nI will throw this question to our "Instant Cloud" team and post back if I have additional information.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-10-30 12:40:32\" },\n\t{ \"post_id\": 2639, \"topic_id\": 579, \"forum_id\": 24, \"post_subject\": \"How to optimize elapsed time to read CSV File - Amazon AWS\", \"username\": \"arunarav\", \"post_text\": \"I am running a proof-of-concept on Amazon (one click thor / 10 thor nodes / no roxie nodes) that reads CSV files of varying sizes and then outputs it back to the disk. \\n\\nSample code as follows:\\n\\n\\nSomeRecordStruct := RECORD\\n\\n INTEGER ID;\\n // 50 snippets of varying sizes of XML in each field\\n STRING XMLSnippet1;\\n STRING XMLSnippet2;\\n ...\\n ...\\n STRING XMLSnippet50;\\n END;\\n\\nSome_Recordset := DATASET('~thor::db::Sample.CSV',SomeRecordStruct,CSV);\\n\\noutput(Some_Recordset,,'~thor::db::output_test.CSV',CSV,OVERWRITE);\\n
\\n \\nThe logical file is perfectly/ evenly distributed across 10 nodes. Typically, it takes the following elapsed times :\\n\\nSize of logical file/Elapsed time for dataset read:\\n\\n> 60 GB/7 minutes\\n> 200 GB/ 20 minutes\\n> 320 GB/ 35 minutes\\n\\nI also tried running the same test on extra large instance with EBS optimized but I did not get appreciably better results. I can post them here as a follow up.\\n\\n1. Are the above numbers typical of the type of elapsed times to read a file?\\n2. Are there ways of speeding up on AWS One click thor (default large instance)? \\n3. Is this because of the inherent characteristic (I/O and network throughput) of the nature of Amazon AWS? (I have not tried running on a dedicated environment yet to compare results).\\n\\nIf the experts could shed light on the above, it would be very helpful.\\n\\nThanks\\nArun\", \"post_time\": \"2012-10-27 16:38:38\" },\n\t{ \"post_id\": 2756, \"topic_id\": 598, \"forum_id\": 24, \"post_subject\": \"Re: multiple Thor processes - same cluster - One click thor\", \"username\": \"rtaylor\", \"post_text\": \"Arun,\\n\\nAlso take a look at this thread: http://hpccsystems.com/bb/viewtopic.php?f=14&t=567&sid=fdac010aaefa2a18949322cba9229ad6\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-11-09 19:38:38\" },\n\t{ \"post_id\": 2755, \"topic_id\": 598, \"forum_id\": 24, \"post_subject\": \"Re: multiple Thor processes - same cluster - One click thor\", \"username\": \"rtaylor\", \"post_text\": \"Arun,\\n\\nI do not believe that is possible with a "One-click" Amazon cluster -- you're pretty much limited to single Thor and/or Roxie clusters and they boot with default configurations. \\n\\nIt may be possible if you setup your own Amazon cluster -- take a look here: http://hpccsystems.com/download/docs/aws-install-thor\\n\\nIt is definitely possible if you load the Community Edition onto your own set of boxes and configure them for multiple instances of Thor on each box.\\n\\nRichard\", \"post_time\": \"2012-11-09 19:32:06\" },\n\t{ \"post_id\": 2749, \"topic_id\": 598, \"forum_id\": 24, \"post_subject\": \"multiple Thor processes - same cluster - One click thor\", \"username\": \"arunarav\", \"post_text\": \"I am trying to setup a proof of concept which needs multiple Thor processes running on the same cluster (sharing the same Dali and thereby having access to the cluster's files). \\n\\nIs this possible with 'One click Thor'? If not, what is the most optimal way of achieving this on the AWS cloud?\\n\\nThanks\\n-Arun\", \"post_time\": \"2012-11-09 01:56:54\" },\n\t{ \"post_id\": 2800, \"topic_id\": 608, \"forum_id\": 24, \"post_subject\": \"Re: Load Balancer for Roxie - One click thor\", \"username\": \"jcoleman\", \"post_text\": \"Clusters started from the one-click solution (or Instant Cloud as we now call it), are no different traditional clusters. You get all of the benefits of an HPCC Systems cluster with Instant Cloud, including ESP load balancing across Roxie nodes.\", \"post_time\": \"2012-11-16 14:11:43\" },\n\t{ \"post_id\": 2792, \"topic_id\": 608, \"forum_id\": 24, \"post_subject\": \"Load Balancer for Roxie - One click thor\", \"username\": \"arunarav\", \"post_text\": \"Does One click thor provide the ability to hit a load balancer IP address whereby queries are subsequently routed to multiple Roxie instances? If not, do I have to hit each specific Roxie IP address?\\n\\nI noticed the following statement in this thread - viewtopic.php?f=14&t=486: \\n\\n"If you are using a recent HPCC Systems build then ESP load balancing across the roxie cluster should happen automatically."\\n\\nIs the above available in one click thor?\\n\\nthanks\\narun\", \"post_time\": \"2012-11-16 06:17:04\" },\n\t{ \"post_id\": 2922, \"topic_id\": 620, \"forum_id\": 24, \"post_subject\": \"Re: Roxie throughput on AWS\", \"username\": \"jeeves\", \"post_text\": \"Bob,\\n\\nI will run the tests again and get back.\\n\\nThanks,\\nDavid\", \"post_time\": \"2012-12-03 09:24:58\" },\n\t{ \"post_id\": 2890, \"topic_id\": 620, \"forum_id\": 24, \"post_subject\": \"Re: Roxie throughput on AWS\", \"username\": \"bforeman\", \"post_text\": \"From our HPCC team:\\n\\nRoxie itself is not a very CPU intensive process, disk I/O yes but not CPU. So if this is simply an index hit/data retrieval then something is amiss. If you are doing quite a bit of post data processing using ECL then that could also be the issue.\\n\\nThat said, performance on AWS is slower than a standard cluster, but it should be better than 250kb/sec.\\n\\nThe CPU peg is suspect. Which process is taking the CPU when pegged? Is it CCD?\\n\\nRun a 'top' program on the node during this test to see which process is consuming all the CPU.\\n\\nIf it is CCD then we need to look at the ECL. If it apache or some java app then there is not much we can do there.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-11-27 14:56:14\" },\n\t{ \"post_id\": 2831, \"topic_id\": 620, \"forum_id\": 24, \"post_subject\": \"Roxie throughput on AWS\", \"username\": \"jeeves\", \"post_text\": \"Hi,\\n\\nWe are performance testing a simple Roxie query on AWS. The query itself simply extracts one row from a logical file with 100 rows based on an unique identifier.\\nThe amount of data in one row is around 100 KB.\\n\\nThe issue we are seeing is that the throughput is very low. It is only 2.5 queries per second. I am sure this is below par performance. Can anyone through any light on \\nthis? Could we be seeing this because we are running on a shared environment( AWS)?\\n\\nThe CPU usage shoots to 100% the moment we have two concurrent threads(Apache Jmeter) running.\\n\\nDetails:\\n\\nnodes in roxie cluster : 1\\n\\nnode details:\\n\\ncores - 2 virtual cores\\nRAM 7.5 GB\\nDisk 850 GB\\n\\nPerformance test results:(Attached)\", \"post_time\": \"2012-11-21 10:10:42\" },\n\t{ \"post_id\": 2880, \"topic_id\": 632, \"forum_id\": 24, \"post_subject\": \"Re: Roxie in AWS\", \"username\": \"bforeman\", \"post_text\": \"Can you post the logs related to this query please?\\nThis is similar to another repost we received over the holiday weekend.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-11-26 18:03:46\" },\n\t{ \"post_id\": 2857, \"topic_id\": 632, \"forum_id\": 24, \"post_subject\": \"Roxie in AWS\", \"username\": \"jeeves\", \"post_text\": \"Hi,\\n\\nWhen trying to invoke a query published in Roxie(Amazon AWS - One Click Thor) we got error provided below.\\n\\nThis error occurs only when we have multiple thor nodes. 3 node roxie + 1 node thor works fine. However 3 node roxie + 7 node thor always gives the error provided below. \\n\\n− <online_retrieval_v1Response>\\n− <Result>\\n− <Exception>\\n<Source>Roxie</Source>\\n<Code>1406</Code>\\n<Message>Failed to get response from slave(s) for uid=0x00000003 activityId=2 pri=LOW queryHash=3cbd15d724d3e54c ch=1 seq=0 cont=0 server=10.244.152.25 retries=400f BROADCAST in activity 2</Message>\\n</Exception>\\n</Result>\\n</online_retrieval_v1Response>
\\n\\nIs anyone familiar with this kind of error.\\n\\nThanks,\\n-David\", \"post_time\": \"2012-11-26 08:29:25\" },\n\t{ \"post_id\": 3255, \"topic_id\": 728, \"forum_id\": 24, \"post_subject\": \"Re: aborted workunit issue\", \"username\": \"bforeman\", \"post_text\": \"Hi Jacob,\\n\\nSend me the IP address of the cluster in a private email and I will look at it. This caused a problem for Richard one time when he had paused a workunit inadvertently.\\n\\nIf I can't resolve it I will ask a member of our HPCC team to take a look.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-01-30 21:57:07\" },\n\t{ \"post_id\": 3253, \"topic_id\": 728, \"forum_id\": 24, \"post_subject\": \"aborted workunit issue\", \"username\": \"jkatzen\", \"post_text\": \"Wasn't sure where to post this, but since I'm running an AWS HPCC cluster, I figured here would be appropriate.\\n\\nAnyway, I have a cluster running a workunit that I cannot seem to get rid of. On the ECL Watch page, the workunit is under dfuserver_queue as running, however going to the workunit, the state says 'failed.' I have tried a number of different things to get rid of it, which includes going onto the DFU server and running dfuplus abort on the workunit. The command comes back as having aborted, however it remains there. Even when I browse DFU Workunits and try to delete it, the page will hang and then fail to delete the workunit after a minute or so.\\n\\nAny helpful input would be welcome. I think this is also causing an issue with the dropzone because the upload/download file page now hangs and the popup page when you try to spray a file also hangs. I know the drive and files are still present though because I ssh connected to the dropzone server and saw that it was still mounted and checked that all the files were still there.\", \"post_time\": \"2013-01-30 21:45:01\" },\n\t{ \"post_id\": 4070, \"topic_id\": 904, \"forum_id\": 24, \"post_subject\": \"Re: Spraying from S3\", \"username\": \"joe.chambers\", \"post_text\": \"You have to unzip it first. You can upload it and unzip with ECL pipe.\\n\\nHere is an example of how to unzip it with ECL pipe:\\n\\nstring filename := 'stocks_20130417.csv.gz' : STORED('filename');\\nboolean gunzip := true : STORED('gunzip');\\nboolean bzip := false : STORED('bzip');\\n\\nrTest := record\\nstring1000 s;\\nend;\\n\\ndTest := PIPE(MAP(gunzip=>'gunzip', 'bzip2 -d ') + ' /var/lib/HPCCSystems/dropzone/' + filename, rTest, csv);\\noutput(dTest);\\n
\", \"post_time\": \"2013-05-09 18:36:12\" },\n\t{ \"post_id\": 4069, \"topic_id\": 904, \"forum_id\": 24, \"post_subject\": \"Re: Spraying from S3\", \"username\": \"joe.chambers\", \"post_text\": \"There are a few ways you can spray from S3 but you can't spray directly from S3. The easiest is to use one of the amazon utilities such as s3fs which will allow you to mount your s3 bucket as a drive and use it as the landing zone. Depending on your file count and size this is usually a decent solution. In many cases its quicker to copy the file over and work with it than using the s3 bucket as a mounted drive though.\\n\\nI'm checking into the gz question, there has been talk about implementing this but I haven't tried it on the newest release.\", \"post_time\": \"2013-05-09 18:20:27\" },\n\t{ \"post_id\": 4067, \"topic_id\": 904, \"forum_id\": 24, \"post_subject\": \"Spraying from S3\", \"username\": \"nvasil\", \"post_text\": \"Is it possible to spray data directly from s3\\nalso my files are in gz format is it possible to spray them directly without unzipping them?\", \"post_time\": \"2013-05-09 04:20:58\" },\n\t{ \"post_id\": 4458, \"topic_id\": 934, \"forum_id\": 24, \"post_subject\": \"Re: Install and Configure S3 packages on your Landing Zone n\", \"username\": \"jcoleman\", \"post_text\": \"If you are timing out, then most likely you are getting blocked by a corporate firewall.\", \"post_time\": \"2013-08-14 16:33:46\" },\n\t{ \"post_id\": 4457, \"topic_id\": 934, \"forum_id\": 24, \"post_subject\": \"Re: Install and Configure S3 packages on your Landing Zone n\", \"username\": \"greg.whitaker@lexisnexis.com\", \"post_text\": \"Thanks for all the help:\\nOK good thru step 5 (no prompt for yes/no) but successfull.\\n Passphrase updated in keychain: keyxxxx.pem\\n Identity added: keyxxx.pem finished adding, all is good.\\n\\nStep 6, times out.\\n ssh ubuntu@ip.address.of.landingzone.node (accept the prompts with "yes")\", \"post_time\": \"2013-08-14 16:19:10\" },\n\t{ \"post_id\": 4454, \"topic_id\": 934, \"forum_id\": 24, \"post_subject\": \"Re: Install and Configure S3 packages on your Landing Zone n\", \"username\": \"jcoleman\", \"post_text\": \"Try this:\\n1. Download the .pem key (say, to /Users/greg)\\n2. Open the OSX terminal and issue the following commands:\\n3. cd /Users/greg \\n4. chmod 400 Hpcc-key1.pem (use your key name)\\n5. ssh-add -K Hpcc-key1.pem (accept the prompts with "yes")\\n6. ssh ubuntu@ip.address.of.node (accept the prompts with "yes")\\n\\nLet me know if this works for you.\\n\\nJack\", \"post_time\": \"2013-08-14 01:26:03\" },\n\t{ \"post_id\": 4453, \"topic_id\": 934, \"forum_id\": 24, \"post_subject\": \"Re: Install and Configure S3 packages on your Landing Zone n\", \"username\": \"greg.whitaker@lexisnexis.com\", \"post_text\": \"MacBookPro User: SSH key downloaded and trying to use for Secure Shell (ssh) to access landing zone ip.\\n1) Downloaded .PEM file.\\n2) Trying to store downloaded .PEM file in keychain. \\n Error: Unable to import an item.\\n The contents of this item cannot be retrieved.\\n\\nI found this info:\\nKeyChain Access recognize a certificate file:\\nPEM encoded, extension .p7r, .p7b, .p7m, .p7c, or .p7s\\n\\nCould this be the reason, the PEM file is not encoded or extension is invalid.\", \"post_time\": \"2013-08-14 01:01:41\" },\n\t{ \"post_id\": 4209, \"topic_id\": 934, \"forum_id\": 24, \"post_subject\": \"Re: Install and Configure S3 packages on your Landing Zone n\", \"username\": \"jcoleman\", \"post_text\": \"Sorry for the confusion.\\n\\nThe user ID that you should use is "ubuntu". When connecting to a node in AWS, you will need to download the SSH key as described on page 15, and use it to connect with your SSH client. \\n\\nLet me know if you need more details.\", \"post_time\": \"2013-06-14 20:51:53\" },\n\t{ \"post_id\": 4208, \"topic_id\": 934, \"forum_id\": 24, \"post_subject\": \"Re: Install and Configure S3 packages on your Landing Zone n\", \"username\": \"greg.whitaker@lexisnexis.com\", \"post_text\": \"Even if you don't know Mac...if you know the USERID to use that would be helpful.\", \"post_time\": \"2013-06-14 19:38:06\" },\n\t{ \"post_id\": 4204, \"topic_id\": 934, \"forum_id\": 24, \"post_subject\": \"Install and Configure S3 packages on your Landing Zone node\", \"username\": \"greg.whitaker@lexisnexis.com\", \"post_text\": \"Reading the documentation it states:\\n"To move files to or from S3 storage, the S3 packages must be installed and configured on your Landing Zone node.\\n1. Open a console window and connect to the Landing Zone (LZ) node"\\n\\nOK, I know the IP address for the landing zone node, but this simple connection process I need help with....\\n\\nMy request for help is:\\nI'm using a Terminal session on a Mac and was wondering if the steps could be outlined a bit:\\n\\nExample: Using Terminal on a Mac\\n1) select "Shell/New Remote Connection\\n2) select "Secure Shell" and add the landing zones IP address as a "server"\\n3) enter the UserID that you use to...etc\", \"post_time\": \"2013-06-13 21:35:52\" },\n\t{ \"post_id\": 5163, \"topic_id\": 1185, \"forum_id\": 24, \"post_subject\": \"Re: Terminating a cluster on AWS\", \"username\": \"bforeman\", \"post_text\": \"Latest Docs:\\nhttp://cdn.hpccsystems.com/releases/CE-Candidate-4.2.0/docs/InstantCloud_for_AWS-4.2.0-1.pdf\\n\\nThe AWS page also has good docs on EC2 and Elastic Block Storage.\\n\\n\\nFinally, there is our own Cloud service:\\nhttp://hpccsystems.com/products-and-services/services/cloud\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-01-29 18:52:45\" },\n\t{ \"post_id\": 5162, \"topic_id\": 1185, \"forum_id\": 24, \"post_subject\": \"Re: Terminating a cluster on AWS\", \"username\": \"Greg\", \"post_text\": \"Thanks Bob.\\n\\nIs there documentation this by any chance?\\n\\nAlso, do you see any other alternatives to AWS?\", \"post_time\": \"2014-01-29 18:41:20\" },\n\t{ \"post_id\": 5161, \"topic_id\": 1185, \"forum_id\": 24, \"post_subject\": \"Re: Terminating a cluster on AWS\", \"username\": \"bforeman\", \"post_text\": \"Since it's not currently configured for static IP, i'm afraid if I terminate, the following will be lost:\\n\\n1: The IP address it's currently sitting on\\n2: The drop-zone data? Or are these retained in the volume, even after terminated?\\nAny suggestions?
\\n\\nIn order to stop paying you have to terminate the cluster, and yes, you lose the IP address and all data on the cluster and drop zone.\\n\\nWhat we do in training is to create a snapshot using the EB2 protocol, and then store all of the data to spray on that volume. After we start up the cluster, we attach that EB2 storage snapshot at the initialization phase, and then after the cluster is started we use the Spray library functions to automatically spray any data needed to the newly created cluster.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-01-29 15:51:46\" },\n\t{ \"post_id\": 5160, \"topic_id\": 1185, \"forum_id\": 24, \"post_subject\": \"Terminating a cluster on AWS\", \"username\": \"Greg\", \"post_text\": \"Is there a way to simply stop the instances as opposed to terminating?\\n\\nSince it's not currently configured for static IP, i'm afraid if I terminate, the following will be lost:\\n\\n1: The IP address it's currently sitting on\\n2: The drop-zone data? Or are these retained in the volume, even after terminated?\\n\\nBasically, I don't want to have the nodes running unless they're being used (otherwise we're paying for nothing).\\n\\nAny suggestions?\", \"post_time\": \"2014-01-29 14:38:13\" },\n\t{ \"post_id\": 5966, \"topic_id\": 1358, \"forum_id\": 24, \"post_subject\": \"Re: EBS volume as root volume for instance\", \"username\": \"tlhumphrey2\", \"post_text\": \"I'm starting to learn how to use juju charm to setup an HPCC System on AWS. This is possible and it gives you more flexibility. So, if you want your root volume EBS. There is a learning curve (see https://jujucharms.com/). Also, there is a setup procedure on github (https://github.com/hpcc-systems/HPCC-Platform) for setting up juju charm on linux box. And, there is a step-by-step procedure in the readme.md file of the charm folder.\", \"post_time\": \"2014-06-25 15:07:39\" },\n\t{ \"post_id\": 5916, \"topic_id\": 1358, \"forum_id\": 24, \"post_subject\": \"Re: EBS volume as root volume for instance\", \"username\": \"mrashti\", \"post_text\": \"Thank you. Yes, I know that in general you could set an EBS volume as your instance's root. I just wanted to see if your Cloud setup script on AWS supports that or not. I still can attach my volume to the instance, which is helpful, but I need to re-install all other software that I need, every time I come back, since I need to terminate the cluster and not able to stop/restart it.\", \"post_time\": \"2014-06-19 14:10:06\" },\n\t{ \"post_id\": 5915, \"topic_id\": 1358, \"forum_id\": 24, \"post_subject\": \"Re: EBS volume as root volume for instance\", \"username\": \"tlhumphrey2\", \"post_text\": \"I stand corrected about setting an EBS volume as the root volume. A colleague of mine showed me the following link: http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/RootDeviceStorage.html, which says an EBS volume can be a root volume.\\n\\nBut, currently our Instant Cloud doesn't use EBS.\", \"post_time\": \"2014-06-19 14:06:41\" },\n\t{ \"post_id\": 5914, \"topic_id\": 1358, \"forum_id\": 24, \"post_subject\": \"Re: EBS volume as root volume for instance\", \"username\": \"tlhumphrey2\", \"post_text\": \"I ask one of our CLOUD experts about your question. He said that "We cannot set an EBS volume as the root volume."\\n\\nWe have considered providing the capability to stop a cluster and have a snapshot of its current state saved so that later you can restart the cluster where you previously stopped it.\\n\\nBut, currently we don't have that capability.\", \"post_time\": \"2014-06-19 13:54:57\" },\n\t{ \"post_id\": 5907, \"topic_id\": 1358, \"forum_id\": 24, \"post_subject\": \"EBS volume as root volume for instance\", \"username\": \"mrashti\", \"post_text\": \"Is there a way that we can set an EBS volume as the root volume for the instances to be launched. I need to stop/start my instances and not need to re-install / reconfigure the OS every time by launching a new HPCC cluster. I want to launch once and stop when I do not need, resume back when needed.\\nAlso wanted to check if I can change the type of instances (currently they m1.large is launched).\", \"post_time\": \"2014-06-18 20:43:10\" },\n\t{ \"post_id\": 6252, \"topic_id\": 1425, \"forum_id\": 24, \"post_subject\": \"Re: HPCC Systems® Instant Cloud for AWS Beta\", \"username\": \"greg.whitaker\", \"post_text\": \"Update: Eclipse Preferences - Install/Update - a not so obvious link that says UnInstall/Update - clicked on that and noticed the Ecl Language Version was still 4.2.xx.\\nSo I'll change that...\", \"post_time\": \"2014-08-23 23:42:17\" },\n\t{ \"post_id\": 6251, \"topic_id\": 1425, \"forum_id\": 24, \"post_subject\": \"Re: HPCC Systems® Instant Cloud for AWS Beta\", \"username\": \"greg.whitaker\", \"post_text\": \"Ok, I can see the instant cloud is now 5.0.0-3, thanks for the quick turnaround.\\nI might need to do something local, still isn't working out for me.\\n\\nThe eclwatch test is successful, to that IP and port 8010 so the communication is there.\\n\\nHowever, still can't get WU onto instance. \\nI switched target to hthor then to roxie..no diff.\\n\\nIf all I do is switching back to 4.2.2 version of client tools in my compile overrides and re-run it functions correctly. \\n\\nChecked file permissions on directories and files in both 4.2.2 and 5.0.0 and they appear to be the same.\\nThe eclipse console seems to issue the same command whether in 4.2..and 5.0. other than pointing to the different directories.\\nNo errors show in the eclipse console for either method.\\n\\nRestarted Eclipse in between steps...no diff.\\nBut no wu ever shows up in eclwatch. I think I'm missing some basic step but thats why Im here asking, any suggestions would be greatly appreciated.\", \"post_time\": \"2014-08-23 22:03:40\" },\n\t{ \"post_id\": 6239, \"topic_id\": 1425, \"forum_id\": 24, \"post_subject\": \"Re: HPCC Systems® Instant Cloud for AWS Beta\", \"username\": \"tlhumphrey2\", \"post_text\": \"Instant Cloud is now using HPCC 5.0.0-3.\", \"post_time\": \"2014-08-19 19:03:39\" },\n\t{ \"post_id\": 6232, \"topic_id\": 1425, \"forum_id\": 24, \"post_subject\": \"Re: HPCC Systems® Instant Cloud for AWS Beta\", \"username\": \"tlhumphrey2\", \"post_text\": \"Yes. Instant Cloud needs to be updated to 5.0.0. I'll look into it. And post when it is updated.\", \"post_time\": \"2014-08-18 14:25:59\" },\n\t{ \"post_id\": 6231, \"topic_id\": 1425, \"forum_id\": 24, \"post_subject\": \"HPCC Systems® Instant Cloud for AWS Beta\", \"username\": \"greg.whitaker\", \"post_text\": \"Eclipse on MAC, accessing AWS instance.\\nUpdated to version 5.0. I noticed the HPCC Instant cloud for AWS deploys 4.2.x.\\nWhen I try to compile and run it isn't completing, no WU shows up in eclwatch.\\nIf I change the location of ECLCC Compiler installed Client Tools back to 4.2.X\\neverything works fine.\\n\\nIs this something where we just need to wait for the AWS deployment to get updated to 5.0?\", \"post_time\": \"2014-08-18 13:31:31\" },\n\t{ \"post_id\": 6286, \"topic_id\": 1438, \"forum_id\": 24, \"post_subject\": \"Re: Format of file parts of XML logical file.\", \"username\": \"rtaylor\", \"post_text\": \"Tim,\\n\\nWhen you spray a "well-formed" XML file, you end up with a "well-formed" XML file as each of its component parts of the HPCC logical file. For eaxmple, spraying this file to my 3-node training cluster:<?xml version="1.0" encoding="utf-8" ?>\\n<Dataset>\\n<area><code>201</code><description>PA Pennsylvania</description></area>\\n<area><code>202</code><description>OH Ohio</description></area>\\n<area><code>203</code><description>IL Illinois</description></area>\\n</Dataset>
\\nwill result in something like these files:\\nNode 1<?xml version="1.0" encoding="utf-8" ?>\\n<Dataset>\\n<area><code>201</code><description>PA Pennsylvania</description></area>\\n</Dataset>
Node 2<?xml version="1.0" encoding="utf-8" ?>\\n<Dataset>\\n<area><code>202</code><description>OH Ohio</description></area>\\n</Dataset>
Node 3<?xml version="1.0" encoding="utf-8" ?>\\n<Dataset>\\n<area><code>203</code><description>IL Illinois</description></area>\\n</Dataset>
\\nOr possibly like these (depending on exactly how spray decides to split the data):\\nNode 1<?xml version="1.0" encoding="utf-8" ?>\\n<Dataset>\\n<area><code>201</code><description>PA Pennsylvania</description></area>\\n<area><code>202</code><description>OH Ohio</description></area>\\n</Dataset>
Node 2<?xml version="1.0" encoding="utf-8" ?>\\n<Dataset>\\n<area><code>203</code><description>IL Illinois</description></area>\\n</Dataset>
Node 3<?xml version="1.0" encoding="utf-8" ?>\\n<Dataset>\\n</Dataset>
\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-09-10 13:48:41\" },\n\t{ \"post_id\": 6282, \"topic_id\": 1438, \"forum_id\": 24, \"post_subject\": \"Format of file parts of XML logical file.\", \"username\": \"tlhumphrey2\", \"post_text\": \"I'm storing a XML file on S3 and want to break the file into file parts that look exactly like those file parts created when one does a spray. I believe, I know all the steps to do this EXCEPT the format of the content of the file parts files.\\n\\nCan someone tell me the format of an file parts of an XML file that is sprayed?\", \"post_time\": \"2014-09-09 20:05:15\" },\n\t{ \"post_id\": 6592, \"topic_id\": 1504, \"forum_id\": 24, \"post_subject\": \"Re: Remote Dali access in AWS\", \"username\": \"omnibuzz\", \"post_text\": \"That did help. Thanks a lot.\\n-Srini\", \"post_time\": \"2014-11-12 00:28:25\" },\n\t{ \"post_id\": 6575, \"topic_id\": 1504, \"forum_id\": 24, \"post_subject\": \"Re: Remote Dali access in AWS\", \"username\": \"jsmith\", \"post_text\": \"> 1. What is the protocol/port that is used to copy the data to a remote cluster. \\n\\nBy default Dali is on port 7070 (Tcp)\\n\\n2. Does the cluster need to have access to Dali or all the nodes and what kind of access.\\n\\nTo access the parts on the Thor slaves, it will need access to dafilesrv on those slave nodes. That is listening on port 7100 (Tcp)\\n\\n3. Do I give the private IP or the public IP when connecting to Dali.\\n\\nThat depends on how you've configured the cluster it's reading from.\\nBy default it will bind to the 1st interface, but you can define which interface the setup uses by changing 'interface=*' in /etc/HPCCSystems/environment.conf to the specific interface you want, e.g. to 'interface=eth0'\\n\\nHope that helps.\", \"post_time\": \"2014-11-06 18:24:36\" },\n\t{ \"post_id\": 6566, \"topic_id\": 1504, \"forum_id\": 24, \"post_subject\": \"Re: Remote Dali access in AWS\", \"username\": \"tlhumphrey2\", \"post_text\": \"I've asked an HPCC architect to answer your question.\", \"post_time\": \"2014-11-05 18:33:15\" },\n\t{ \"post_id\": 6563, \"topic_id\": 1504, \"forum_id\": 24, \"post_subject\": \"Remote Dali access in AWS\", \"username\": \"omnibuzz\", \"post_text\": \"Hi - I am running a Thor cluster and a Roxie cluster separately in AWS. After building the index in Thor, I would like to push it to Roxie by pointing to remote Dali. It errors out saying cannot connect to remote Dali. I have a few questions regarding that. \\n1. What is the protocol/port that is used to copy the data to a remote cluster. \\n2. Does the cluster need to have access to Dali or all the nodes and what kind of access.\\n3. Do I give the private IP or the public IP when connecting to Dali.\\n\\nIf there are any instructions to do it, that would be great too.\\n-Srini\", \"post_time\": \"2014-11-05 16:10:21\" },\n\t{ \"post_id\": 6642, \"topic_id\": 1530, \"forum_id\": 24, \"post_subject\": \"lighter instance for the support node?\", \"username\": \"omnibuzz\", \"post_text\": \"If I am running a 5 node cluster. using the config utility, I am able to effectively use only 4 nodes for thor (1 goes to support). Is it okay, if I keep the support node smaller in size when compared to the thor slaves? Or will there be any data intensive task going on in the support node too?\\nThanks\\nSrini\", \"post_time\": \"2014-11-30 17:14:56\" },\n\t{ \"post_id\": 6925, \"topic_id\": 1593, \"forum_id\": 24, \"post_subject\": \"Re: How many queries can Roxie handle simultaneously?\", \"username\": \"rtaylor\", \"post_text\": \"Tim,\\n\\nThe answer is of course, the number of threads per Roxie node times the number of nodes in the Roxie. I've heard of 30 threads before (not 40) but this is configuration-based, so YMMV. \\n\\nSo, with 30 threads per node and a 100-node Roxie, that would be 3000 simultaneous queries. A 20-node Roxie with 40 threads per node gets you 800 simultaneous queries. ... \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-02-09 18:21:01\" },\n\t{ \"post_id\": 6923, \"topic_id\": 1593, \"forum_id\": 24, \"post_subject\": \"How many queries can Roxie handle simultaneously?\", \"username\": \"tlhumphrey2\", \"post_text\": \"I'm placing this under CLOUD because the person who asked me the question, wants to configure and deploy an HPCC System on AWS.\\n\\nThe answer,below, was given my an in-house expert.\\n\\nAnswer:\\n\\nAssuming you have configured your roxie as an N way roxie – the data will be spread across all N nodes. Again configuration dependent – there will often be 2 or more copies spread across the N nodes.\\n\\nEach roxie farmer can take some number of threads – again this is configuration based – and depends upon things such as memory each farmer needs. I have come across 40 threads per farmer.\\n\\nAdditionally it is common to have a farmer on every node fronted with some kind of virtual IP. On our prod roxies we often had 100 nodes each with at least a dozen threads allowing for 1200 threads simultaneously.\\n\\nOf course all of this is dependend upon the configuration (and query)\\n
\", \"post_time\": \"2015-02-09 18:09:48\" },\n\t{ \"post_id\": 7745, \"topic_id\": 1607, \"forum_id\": 24, \"post_subject\": \"Re: Setup HPCC on AWS that is fast.\", \"username\": \"Lee_Meadows\", \"post_text\": \"@tlhumphrey2 \\n\\n Tim, sorry I missed your earlier posting towards me. Do you work at LN/RE ?\\n\\nIf so, Flavio has my email, you can get it form him and we can talk offline. I don't want to post up my email on here.\\n\\nLee\", \"post_time\": \"2015-06-11 11:30:22\" },\n\t{ \"post_id\": 7744, \"topic_id\": 1607, \"forum_id\": 24, \"post_subject\": \"Re: Setup HPCC on AWS that is fast.\", \"username\": \"tlhumphrey2\", \"post_text\": \"Subbu,\\n\\nSorry for the really late response. I just saw your post.\\n\\nI would like to get more understanding on your finding. You have mentioned that you have used 1BG of data. I am little confused on what you have used to find out the results of 2 Min and 29 Seconds. Is it 1 TB data or 1 GB data?\\n
\\nI wasn't very clear with my numbers was I. The total size of the data was 1TB. The size of each record was 100 bytes. So, the number of records was 10GB.\\nAlso, you said that you have got 2 instances of EC2 (i2.8xlarge - 244 GB RAM - Total 488 GB RAM) which has 32 Thor Slave Nodes configured in total (16 on each instance). What was the slave node configuration? Are we missing something on the Sort Timings given that 488 GB RAM used?\\n
\\nThere are 8 x 800 SSD disk drives on the i2.8xlarge which were raided to make one large volume. So, during the sort there had to be some spilling to disk.\\nMy basic question: \\nI was under impression that you can configure only one Slave on given instance. How do we configure multiple Slave nodes in a instance? It would be great helpful if you have any pointers for the same
\\nYou can configure more than one slave node per instance. I've been using envgen to do so. Here is my envgen command:\\n/opt/HPCCSystems/sbin/envgen -env $created_environment_file -ipfile $private_ips \\\\\\n -supportnodes $supportnodes \\\\\\n -thornodes $thornodes \\\\\\n -roxienodes $roxienodes \\\\\\n -slavesPerNode $slavesPerNode \\\\\\n -roxieondemand 1\\n
\\nwhere $created_environment_file is the name of the new environment.xml file that this command creates; $private_ips is the name of the file containing the private ip addresses for the instances you launched where the 1st IP in the file is for the master and other support functions, the next $thornodes IPs in the file are the instances that will have thor slave nodes, and the final $roxienodes IPs in the file are the instances that will have roxie nodes.\\n\\nYou will notice that one of the parameters for this command is -slavesPerNode and $slavesPerNode is the number of thor slave nodes you want per instance.\\n\\nWhen you execute this command, make sure you have 1st stopped the HPCC System. And, after you have executed this command, use hpcc-push.sh to push the new environment file to every instance of your HPCC System. My hpcc-push.sh looks like:\\n /opt/HPCCSystems/sbin/hpcc-push.sh \\\\\\n -s $created_environment_file \\\\\\n -t /etc/HPCCSystems/environment.xml \\n
\\nwhere $created_environment_file is the same as in the envgen command.\\n\\nOnce this is all done, make sure you restart the HPCC System.\", \"post_time\": \"2015-06-10 16:24:42\" },\n\t{ \"post_id\": 7164, \"topic_id\": 1607, \"forum_id\": 24, \"post_subject\": \"Re: Setup HPCC on AWS that is fast.\", \"username\": \"kps_mani\", \"post_text\": \"Hi Tim,\\nI would like to get more understanding on your finding. You have mentioned that you have used 1BG of data. I am little confused on what you have used to find out the results of 2 Min and 29 Seconds. Is it 1 TB data or 1 GB data?\\n\\nAlso, you said that you have got 2 instances of EC2 (i2.8xlarge - 244 GB RAM - Total 488 GB RAM) which has 32 Thor Slave Nodes configured in total (16 on each instance). What was the slave node configuration? Are we missing something on the Sort Timings given that 488 GB RAM used?\\n\\nMy basic question: \\nI was under impression that you can configure only one Slave on given instance. How do we configure multiple Slave nodes in a instance? It would be great helpful if you have any pointers for the same.\\n\\nRegards,\\nSubbu\", \"post_time\": \"2015-03-22 21:12:13\" },\n\t{ \"post_id\": 7052, \"topic_id\": 1607, \"forum_id\": 24, \"post_subject\": \"Re: Setup HPCC on AWS that is fast.\", \"username\": \"tlhumphrey2\", \"post_text\": \"Lee,\\n\\nCan you post the results on got when you ran terasort? I know others would appreciate it. A table with 1) thor execution time, instance type, number of slave instances, and number of slaves per instance.\\n\\nBy the way, here is one result I got when executing the terasort with 1BG of data.\\n\\nIt ran 2 minutes and 29 seconds on a THOR that had 2 ec2 i2.8xlarge instances for slave nodes, each having 16 slave nodes per instance. This was better than our internal cloud, REIL100, which had 100 slave nodes.\\n\\nTim\", \"post_time\": \"2015-03-02 20:26:14\" },\n\t{ \"post_id\": 7046, \"topic_id\": 1607, \"forum_id\": 24, \"post_subject\": \"Re: Setup HPCC on AWS that is fast.\", \"username\": \"Lee_Meadows\", \"post_text\": \"Ok great, that's the same code I'm using for the terasort.\\n\\nI actually have some interesting findings on timings on an 8 node cluster, each with 32 cores. 7 thor slave nodes with [1,4,8,16,27,30,32] slaves per node. The fastest was the 27 per node. (for a 1TB file)\\n\\nUsing Ganglia, it was really easy to see the stats of cpu_io wait across the cluster, along with all the other cpu,IO and memory utilization metrics.\\n\\nOf course this was just a test of terasort, so YMMV as other job profiles could be different.\", \"post_time\": \"2015-03-02 15:03:01\" },\n\t{ \"post_id\": 7041, \"topic_id\": 1607, \"forum_id\": 24, \"post_subject\": \"Re: Setup HPCC on AWS that is fast.\", \"username\": \"tlhumphrey2\", \"post_text\": \"I did run the terasort described here: \\nhttp://www.ordinal.com/gensort.html, but I didn't record the times. The ecl code for creating a 1GB dataset and distributing it across all slave nodes of the THOR follows: // Generate standard terasort datafile\\n\\nunsigned8 numrecs := 1000000000/CLUSTERSIZE : stored('numrecs'); // rows per node\\n\\nrec := record\\n string10 key;\\n string10 seq;\\n string80 fill;\\n end;\\n\\nseed := dataset([{'0', '0', '0'}], rec);\\n\\nrec addNodeNum(rec L, unsigned4 c) := transform\\n SELF.seq := (string) (c-1);\\n SELF := L;\\n END;\\n\\none_per_node := distribute(normalize(seed, CLUSTERSIZE, addNodeNum(LEFT, COUNTER)), (unsigned) seq);\\n\\nrec fillRow(rec L, unsigned4 c) := transform\\n\\n SELF.key := (>string1<)(RANDOM()%95+32)+\\n (>string1<)(RANDOM()%95+32)+\\n (>string1<)(RANDOM()%95+32)+\\n (>string1<)(RANDOM()%95+32)+\\n (>string1<)(RANDOM()%95+32)+\\n (>string1<)(RANDOM()%95+32)+\\n (>string1<)(RANDOM()%95+32)+\\n (>string1<)(RANDOM()%95+32)+\\n (>string1<)(RANDOM()%95+32)+\\n (>string1<)(RANDOM()%95+32);\\n\\n unsigned4 n := ((unsigned4)L.seq)*numrecs+c;\\n SELF.seq := (string10)n;\\n unsigned4 cc := (n-1)*8;\\n string1 c1 := (>string1<)((cc)%26+65);\\n string1 c2 := (>string1<)((cc+1)%26+65);\\n string1 c3 := (>string1<)((cc+2)%26+65);\\n string1 c4 := (>string1<)((cc+3)%26+65);\\n string1 c5 := (>string1<)((cc+4)%26+65);\\n string1 c6 := (>string1<)((cc+5)%26+65);\\n string1 c7 := (>string1<)((cc+6)%26+65);\\n string1 c8 := (>string1<)((cc+7)%26+65);\\n SELF.fill := c1+c1+c1+c1+c1+c1+c1+c1+c1+c1+\\n c2+c2+c2+c2+c2+c2+c2+c2+c2+c2+\\n c3+c3+c3+c3+c3+c3+c3+c3+c3+c3+\\n c4+c4+c4+c4+c4+c4+c4+c4+c4+c4+\\n c5+c5+c5+c5+c5+c5+c5+c5+c5+c5+\\n c6+c6+c6+c6+c6+c6+c6+c6+c6+c6+\\n c7+c7+c7+c7+c7+c7+c7+c7+c7+c7+\\n c8+c8+c8+c8+c8+c8+c8+c8+c8+c8;\\n END;\\n\\noutdata := NORMALIZE(one_per_node, numrecs, fillRow(LEFT, counter)); \\n\\nOUTPUT(outdata,,'nhtest::terasort1',overwrite);\\n
.\\nAnd the ecl code for doing the sort is here: // Perform global terasort\\n\\n#option('THOR_ROWCRC', 0); // don/t need individual row CRCs\\n\\nrec := record\\n string10 key;\\n string10 seq;\\n string80 fill;\\n end;\\n\\nin := DATASET('nhtest::terasort1',rec,FLAT);\\nOUTPUT(SORT(in,key,UNSTABLE),,'nhtest::terasort1out',overwrite);\\n
.\\n\\nSo that image has 16 cpu, did you only have 1 thor slave per node?
\\n\\nThe perl code and bash scripts in the repository puts 16 thor slaves per ec2 instance.\", \"post_time\": \"2015-03-01 19:17:15\" },\n\t{ \"post_id\": 7005, \"topic_id\": 1607, \"forum_id\": 24, \"post_subject\": \"Re: Setup HPCC on AWS that is fast.\", \"username\": \"Lee_Meadows\", \"post_text\": \"Did you perform any benchmarks to define 'fast'? Such as http://sortbenchmark.org/\\n\\nSo that image has 16 cpu, did you only have 1 thor slave per node?\\n\\nThanks for the pdf, very nice to have such a thorough walk through.\", \"post_time\": \"2015-02-20 20:03:45\" },\n\t{ \"post_id\": 6999, \"topic_id\": 1607, \"forum_id\": 24, \"post_subject\": \"Setup HPCC on AWS that is fast.\", \"username\": \"tlhumphrey2\", \"post_text\": \"In the github repository, https://github.com/tlhumphrey2/BestHPCCoAWS, is software that will configure and deploy an HPCC System that runs fast.\\n\\nIn this repository is also a document, SetupBestHPCCoAWS.pdf, that describes in great detail, how to use the software of this repository to setup an HPCC System on AWS that runs fast.\", \"post_time\": \"2015-02-19 21:00:05\" },\n\t{ \"post_id\": 7071, \"topic_id\": 1616, \"forum_id\": 24, \"post_subject\": \"Re: Reconfigure THOR when data on THOR slave nodes?\", \"username\": \"bforeman\", \"post_text\": \"More comments from the HPCC team (thanks Fernando!)\\n\\nIf you reconfigure thor…and you still want to access the previously created files…\\n\\nYou have to change the name of the cluster, or it will report the existing files as missing or extra files parts.(depending on your "persuation")\\n\\nWhen you change the name of the cluster, the existing files can be accessed without errors because it **knows** the existing files parts are part of a "thorgroup"\\ndefined to have 10 parts. Any new files generated, are part of the "new thor group" containing 40 file parts.\\n\\nFWIW on your system the file parts will wrap\\n\\nNode 1 Node 2 ..Node 10\\n\\nPart1 Part2 Part10\\nPart11 Part12 Part20\\nPart21 Part22 Part30 \\nPart31 Part32 Part40
\\n\\nHTH,\\n\\nBob\", \"post_time\": \"2015-03-04 13:41:29\" },\n\t{ \"post_id\": 7070, \"topic_id\": 1616, \"forum_id\": 24, \"post_subject\": \"Re: Reconfigure THOR when data on THOR slave nodes?\", \"username\": \"bforeman\", \"post_text\": \"Hi Tim,\\n\\nI've always been told that when you update a cluster and add more nodes that the only option is a re-spray of the existing data, but let me confirm that with the HPCC installation team.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-03-04 13:19:21\" },\n\t{ \"post_id\": 7066, \"topic_id\": 1616, \"forum_id\": 24, \"post_subject\": \"Reconfigure THOR when data on THOR slave nodes?\", \"username\": \"tlhumphrey2\", \"post_text\": \"I have configured a THOR using envgen with -slavesPerNode is 1. Plus, I sprayed a file to all 10 THOR slave nodes.\\n\\nCan I now use envgen to reconfigure my THOR so now -slavesPerNode is 4 (i.e. I now have 40 slave nodes)? If I can, how do the file parts on my slave nodes change? If I can't, do I get an error message from envgen telling me why?\", \"post_time\": \"2015-03-03 17:08:15\" },\n\t{ \"post_id\": 7529, \"topic_id\": 1708, \"forum_id\": 24, \"post_subject\": \"Re: Expanding or moving dropzone location volume\", \"username\": \"dnordahl\", \"post_text\": \"I ended up transferring my large file to a new folder I created in mnt where there was about 300GB allocated by the HPCC cluster creation. Then I placed a symbolic link in the mydropzone folder to the file I uploaded in the subfolder of mnt. \\n\\nLuckily the output files I needed to pull back into the drop zone after processing did not exceed the small size allocated to the partition where the mydropzone folder is located.\", \"post_time\": \"2015-05-04 13:06:18\" },\n\t{ \"post_id\": 7523, \"topic_id\": 1708, \"forum_id\": 24, \"post_subject\": \"Re: Expanding or moving dropzone location volume\", \"username\": \"tlhumphrey2\", \"post_text\": \"# Put the directory s3cache someplace where there is a lot of disk space. Here\\n# I put it under /var/lib/HPCCSystems because there is a lot of space there.\\nsudo mkdir /var/lib/HPCCSystems/s3cache\\nsudo chmod 777 /var/lib/HPCCSystems/s3cache\\nsudo chmod 777 /var/lib/HPCCSystems/mydropzone\\n\\n# Uncommenting 'user_allow_other' in /etc/fuse.conf so 'allow_other' works in f3fs\\nsudo sed "s/# *user_allow_other/user_allow_other/" /etc/fuse.conf > t\\nsudo mv t /etc/fuse.conf\\n\\n# Do mount onto /var/lib/HPCCSystems/mydropzone. Be sure to put your bucket name\\n# in bucket_name. Plus, make sure you have the environment variables: S3_ACCESS_KEY\\n# and S3_SECRET_KEY with yours.\\nbucket_name=<your s3 bucket name>\\ns3fs -o rw,allow_other,use_cache=/var/lib/HPCCSystems/s3cache,uid=33,gid=33 \\\\\\n $bucket_name /var/lib/HPCCSystems/mydropzone\\n
\", \"post_time\": \"2015-05-01 20:51:33\" },\n\t{ \"post_id\": 7503, \"topic_id\": 1708, \"forum_id\": 24, \"post_subject\": \"Re: Expanding or moving dropzone location volume\", \"username\": \"dnordahl\", \"post_text\": \"Yes, its on S3. I've figured out how to use the s3cmd tool to pull it over, but I'm running out of disk space. How to I mount the /mnt directory to the dropzone?\", \"post_time\": \"2015-04-30 18:24:45\" },\n\t{ \"post_id\": 7502, \"topic_id\": 1708, \"forum_id\": 24, \"post_subject\": \"Re: Expanding or moving dropzone location volume\", \"username\": \"tlhumphrey2\", \"post_text\": \"It sounds like you have the 45GB file in the directory, /mnt. So, you should be able to mount that directory onto the dropzone.\\n\\nIs the file in an S3 bucket?\", \"post_time\": \"2015-04-30 18:23:28\" },\n\t{ \"post_id\": 7501, \"topic_id\": 1708, \"forum_id\": 24, \"post_subject\": \"Re: Expanding or moving dropzone location volume\", \"username\": \"dnordahl\", \"post_text\": \"Another aspect that is adding to the difficulty of figuring out how to do this is that all the info I find online about re-sizing a partition involves shutting down a node first. But that option is not available for my individual instances and the only option on the cluster status screen is terminate.\", \"post_time\": \"2015-04-30 17:53:39\" },\n\t{ \"post_id\": 7497, \"topic_id\": 1708, \"forum_id\": 24, \"post_subject\": \"Expanding or moving dropzone location volume\", \"username\": \"dnordahl\", \"post_text\": \"I'm have a difficult time figuring out how to increase the dropzone folder capacity of the instance which is setup to be the drop zone location. \\n\\nI'm able to attach a new volume to the mnt partition, but not the device where where the dropzone is currently pointing to: xvda1 \\n\\nSo if I could just point the dropzone location to /mnt or attach a larger volume to the xvda1 device, I think I'd be good to go. \\n\\nThough when I spray the dropped file and when I run the job and it creates temp files, I'm probably going to need to increase disk space available for that as well. Is there an easy way to bump up the storage space where it needs to be increased? I'm trying to process an input file that is 45 GB XML. It would be handy if the cluster create command asked for the disk space needed up front in addition to the number of nodes.\", \"post_time\": \"2015-04-30 15:45:21\" },\n\t{ \"post_id\": 7976, \"topic_id\": 1721, \"forum_id\": 24, \"post_subject\": \"Re: HPCC Installiation question on AWS single node\", \"username\": \"kovacsbv\", \"post_text\": \"I can't seem to get the dependencies resolved because hpcc depends on versions of libraries that are not available any more:\\n\\nYou might want to run 'apt-get -f install' to correct these:\\nThe following packages have unmet dependencies:\\n hpccsystems-platform : Depends: libboost-regex1.40.0 but it is not installable\\n Depends: libicu42 but it is not installable\\n Depends: libarchive1 but it is not installable\\n Depends: zip\\nE: Unmet dependencies. Try 'apt-get -f install' with no packages (or specify a solution).\\nroot@lab-hpcc-01:~# apt-get install zip\\nReading package lists... Done\\nBuilding dependency tree\\nReading state information... Done\\nYou might want to run 'apt-get -f install' to correct these:\\nThe following packages have unmet dependencies:\\n hpccsystems-platform : Depends: libboost-regex1.40.0 but it is not installable\\n Depends: libicu42 but it is not installable\\n Depends: libarchive1 but it is not installable\\nE: Unmet dependencies. Try 'apt-get -f install' with no packages (or specify a solution).\\nroot@lab-hpcc-01:~# apt-get install libarchive1\\nReading package lists... Done\\nBuilding dependency tree\\nReading state information... Done\\nPackage libarchive1 is not available, but is referred to by another package.\\nThis may mean that the package is missing, has been obsoleted, or\\nis only available from another source\\nHowever the following packages replace it:\\n libarchive-dev:i386 libarchive-dev\\n\\nE: Package 'libarchive1' has no installation candidate\\nroot@lab-hpcc-01:~# apt-get install libarchive-dev\\nReading package lists... Done\\nBuilding dependency tree\\nReading state information... Done\\nYou might want to run 'apt-get -f install' to correct these:\\nThe following packages have unmet dependencies:\\n hpccsystems-platform : Depends: libboost-regex1.40.0 but it is not installable\\n Depends: libicu42 but it is not installable\\n Depends: libarchive1 but it is not installable\\n Depends: zip\\n libarchive-dev : Depends: libarchive13 (= 3.1.2-7ubuntu2.1) but 3.1.2-7ubuntu2 is to be installed\\nE: Unmet dependencies. Try 'apt-get -f install' with no packages (or specify a solution).\\nroot@lab-hpcc-01:~# apt-get install libarchive-dev\\n
\", \"post_time\": \"2015-07-31 20:52:41\" },\n\t{ \"post_id\": 7575, \"topic_id\": 1721, \"forum_id\": 24, \"post_subject\": \"Re: HPCC Installiation question on AWS single node\", \"username\": \"balajisampath\", \"post_text\": \"No Jim,\\n\\nI didn't try. Will let you know soon.\", \"post_time\": \"2015-05-11 14:10:07\" },\n\t{ \"post_id\": 7568, \"topic_id\": 1721, \"forum_id\": 24, \"post_subject\": \"Re: HPCC Installiation question on AWS single node\", \"username\": \"JimD\", \"post_text\": \"Did you try installing with apt-get?\\n\\nJim\", \"post_time\": \"2015-05-08 18:55:15\" },\n\t{ \"post_id\": 7561, \"topic_id\": 1721, \"forum_id\": 24, \"post_subject\": \"Re: HPCC Installiation question on AWS single node\", \"username\": \"balajisampath\", \"post_text\": \"Thanks tlhumphrey2 ,\\n\\nI wish dependencies packaged as part of HPCC.\", \"post_time\": \"2015-05-08 13:18:14\" },\n\t{ \"post_id\": 7557, \"topic_id\": 1721, \"forum_id\": 24, \"post_subject\": \"Re: HPCC Installiation question on AWS single node\", \"username\": \"tlhumphrey2\", \"post_text\": \"Make sure you have the correct version of the HPCC Platform because the one you need depends on the operating system on your instance. (see http://hpccsystems.com/bb/viewtopic.php?f=14&t=1160).\", \"post_time\": \"2015-05-07 18:48:19\" },\n\t{ \"post_id\": 7555, \"topic_id\": 1721, \"forum_id\": 24, \"post_subject\": \"HPCC Installiation question on AWS single node\", \"username\": \"balajisampath\", \"post_text\": \"I got dependency error while installing HPCC Ubuntu versions in AWS Ubuntu AMI. Is there any document which outlines the most compatible AMI? \\n\\nSteps I followed:\\n- started one micro ubuntu 14.x version instance\\n- downloaded hpcc ubuntu 14.x to my windows pc\\n- ftp the hpcc package to aws instance\\n- executed sudo dpkg -i <deb filename>\\n\\nI am aware of the HPCC AWS offering. I would like to install HPCC on my own in AWS and don't want to research on dependencies and install them. Please help\", \"post_time\": \"2015-05-07 14:16:15\" },\n\t{ \"post_id\": 7989, \"topic_id\": 1776, \"forum_id\": 24, \"post_subject\": \"New: Copy data to/from THOR from/to S3 buckets in parallel\", \"username\": \"tlhumphrey2\", \"post_text\": \"I have added a new capability to the github repository, EasyFastHPCCoAWS, https://github.com/tlhumphrey2/EasyFastHPCCoAWS. This capability saves/restores data to/from S3 buckets from/to a THOR. And, the transfer of data to/from the S3 buckets from/to each THOR slave is done in parallel, which means Big Data can be transferred quickly.\\n\\nThere are two functions one uses:cp2S3FromMasterAndAllSlaves.pl (used to transfer data from THOR nodes to S3 buckets).\\ncpFromS3ToMasterAndAllSlaves.pl (used to transfer data from S3 buckets to THOR nodes).
\\nTo use the 2nd function to transfer data from S3 buckets to THOR nodes, the THOR must be compatible with the THOR from which the data originally came, i.e. the THOR that cp2S3FromMasterAndAllSlaves.pl copied the data from. Compatible means: 1) both THORs have the same number in instances, 2) both THORs have the same number of slave nodes per instance, and 3) the disk space of the receiving THOR must be large enough to hold the data.\\n\\nI haven’t, yet, written a document giving details on how to use these new functions. But in the README.md file of the repository, there is a brief explanation on how to use these functions.\", \"post_time\": \"2015-08-05 13:54:21\" },\n\t{ \"post_id\": 7870, \"topic_id\": 1776, \"forum_id\": 24, \"post_subject\": \"Re: Easy setup of a fast HPCC System on AWS\", \"username\": \"tlhumphrey2\", \"post_text\": \"On https://github.com/tlhumphrey2/EasyFastHPCCoAWS, I updated the document, EasyFastHPCCOnAWS.pdf to include a new appendix, Appendix D, that gives your AWS administrator detailed instructions for adding a new IAM group, Super-Power-Group, and adding you to that group.\\n\\nIf you are an AWS administrator, you don't need to do the instructions of Appendix D in order to use CloudFormation to build a stack that launches an HPCC System on AWS. And, if you are another IAM user, you only need to have your AWS administrator do the instructions of Appendix D if you get an error message while creating a stack that contains the words “not authorized to perform:iam:CreateRole”.\", \"post_time\": \"2015-07-02 15:16:07\" },\n\t{ \"post_id\": 7860, \"topic_id\": 1776, \"forum_id\": 24, \"post_subject\": \"Re: Easy setup of a fast HPCC System on AWS\", \"username\": \"tlhumphrey2\", \"post_text\": \"Added two new capabilities to EasyFastHPCCoAWS.\\n\\n1. htpasswd authentication.\\n2. ability to select HPCC Platform\\n
\\nhttps://github.com/tlhumphrey2/EasyFastHPCCoAWS\", \"post_time\": \"2015-06-30 13:56:08\" },\n\t{ \"post_id\": 7802, \"topic_id\": 1776, \"forum_id\": 24, \"post_subject\": \"Easy setup of a fast HPCC System on AWS\", \"username\": \"tlhumphrey2\", \"post_text\": \"You can use the HPCC CloudFormation (CF) template and accompanying scripts to configure and deploy an HPCC System on AWS from your Windows computer in two steps.\\n1. Copy the 14 accompanying scripts and your ssh pem file to an S3 bucket.\\n2. Use CloudFormation on the AWS console to do the rest.
\\nThe following github repository has 1) the HPCC CF template, 2) the accompanying scripts, and 3) the document, EasyFastHPCCOnAWS.pdf, that provides details on the deployment process.\\n\\n\", \"post_time\": \"2015-06-18 16:10:47\" },\n\t{ \"post_id\": 7809, \"topic_id\": 1778, \"forum_id\": 24, \"post_subject\": \"Re: Roxie "Failed to get response from slave(s)" - SOLVED\", \"username\": \"tlhumphrey2\", \"post_text\": \"It would be nice if some expert would tell us why roxieMulticastEnabled should be set to false. That seems counter-intuitive.\\n\\nA related note: In the github repository: https://github.com/tlhumphrey2/EasyFastHPCCoAWS is a CloudFormation template and accompanying scripts, with documentation, that will configure and deploy a fast running HPCC System on AWS in just 2 steps. The scripts even set roxieMulticastEnabled to false if you indicate you want a roxie.\", \"post_time\": \"2015-06-20 19:00:47\" },\n\t{ \"post_id\": 7808, \"topic_id\": 1778, \"forum_id\": 24, \"post_subject\": \"Roxie "Failed to get response from slave(s)" - SOLVED\", \"username\": \"BrianB644\", \"post_text\": \"I recently ran across this issue while setting up a multi-Roxie system in AWS ... within a VPC I didn't control ... though one might have this issue in other envronments too.\\n\\nAfter publishing a query, when I tried to execute the query ... after bit of a wait ... I received an error message as follows:\\n\\nException\\nReported by: Roxie\\nMessage: Failed to get response from slave(s) for uid=0x00000003 activityId=8 (fetch part) pri=LOW queryHash=cd7737a0929de9b2 ch=8 seq=0 cont=0 server=XXX.XX.XX.159 retries=0003 in activity 8
\\n\\nExplanation:\\n\\nAt a high-level this message says that the Roxie nodes "can't talk to one another". Roxie nodes often talk to one another using UDP and optionally using "multicast". The defaulted setting in my environment.xml file was ... 'roxieMulticastEnabled="true"'.\\n\\nIn my environment, multicast requests were not being passed on my network ... so messages between Roxie nodes were never recieved. Once I configured Roxie to disable multicast (and ensured all necessary UDP networking was allowed between Roxie nodes) ... Roxie queries functioned normally.\\n\\nFix:\\n\\n1) In your HPCC "environment.xml" file (usually /etc/HPCCSystems/environment.xml) set 'roxieMulticastEnabled="false"'.
\\n2) Ensure you allow the necessary UDP connections between your Roxie nodes.
\\n\\nLinux tools I found useful for helping to debug this problem:\\n\\nnetstat -l (ports being listened on)
\\nlsof (open files and what they are attached to ... including TCP/UDP ports)
\\n\\nAdditional Note:\\n\\nEven after my change, I noticed that the actual Roxie process still opened the multicast address. I didn't experiment with configuration changes that would disable this.\", \"post_time\": \"2015-06-19 20:29:42\" },\n\t{ \"post_id\": 8082, \"topic_id\": 1849, \"forum_id\": 24, \"post_subject\": \"Re: HPCC on AWS:Not getting access to ECL Watch\", \"username\": \"Anjali\", \"post_text\": \"Hi, \\n\\nThe issue is resolved by changing the security group inbound permissions.\\n\\nThanks,\\nAnjali\", \"post_time\": \"2015-09-04 04:55:48\" },\n\t{ \"post_id\": 8036, \"topic_id\": 1849, \"forum_id\": 24, \"post_subject\": \"HPCC on AWS:Not getting access to ECL Watch\", \"username\": \"Anjali\", \"post_text\": \"Hi,\\n\\nI tried setting up instant cloud on amazon and it was a success.This time i wanted to set up HPCC in AWS instance that i have launched already(Amazon Linux AMI 2015.03.1 (HVM), SSD Volume Type,64bit,t2.micro).\\n\\nFollowing the document 'Installing & Running the HPCC Platform'(http://hpccsystems.com/download/docs/installing-running-hpcc-platform), i could install HPCC in one of my machine and successfully started the single node instance as in the attachment.\\n\\nThen i tried to access the corresponding ECLWatch(http://<publicIP of AWS Instance>:8010),and i got connection timeout error saying 'web page not available'.\\n\\nHPCC Version 5.2.6 (hpccsystems-platform-community_5.2.6-1.el5.x86_64.rpm)\\n\\n\\nCan anyone help me figure out what's wrong?\\n\\nThanks,\\nAnjali\", \"post_time\": \"2015-08-25 10:55:37\" },\n\t{ \"post_id\": 8296, \"topic_id\": 1928, \"forum_id\": 24, \"post_subject\": \"Re: Configuring multi-node HPCC System in AWS\", \"username\": \"Anjali\", \"post_text\": \"Sure...Thanks a lot \", \"post_time\": \"2015-10-15 11:54:54\" },\n\t{ \"post_id\": 8294, \"topic_id\": 1928, \"forum_id\": 24, \"post_subject\": \"Re: Configuring multi-node HPCC System in AWS\", \"username\": \"tlhumphrey2\", \"post_text\": \"The following document is the best place to start: http://cdn.hpccsystems.com/pdf/RunningHPCCwithinAWS_EC2_Beta1.pdf.\\n\\nIt gives the basics. The github repository I sited is more advanced and therefore leaves out details you need to know.\", \"post_time\": \"2015-10-15 10:17:23\" },\n\t{ \"post_id\": 8292, \"topic_id\": 1928, \"forum_id\": 24, \"post_subject\": \"Re: Configuring multi-node HPCC System in AWS\", \"username\": \"Anjali\", \"post_text\": \"Hi,\\n\\nThank you for your valuable comments..\\n\\nI hope the git hub link that you have shared can help me to sort out this.Let me try once:)\\n\\nThanks,\\nAnjali\", \"post_time\": \"2015-10-15 05:00:32\" },\n\t{ \"post_id\": 8290, \"topic_id\": 1928, \"forum_id\": 24, \"post_subject\": \"Re: Configuring multi-node HPCC System in AWS\", \"username\": \"tlhumphrey2\", \"post_text\": \"Another alternative.\\nYou can have multiple HPCC nodes on the same instance. And, if the instance type is one with multiple cores, lots of memory and disk space, the HPCC System can run fairly fast.\", \"post_time\": \"2015-10-14 15:05:10\" },\n\t{ \"post_id\": 8288, \"topic_id\": 1928, \"forum_id\": 24, \"post_subject\": \"Re: Configuring multi-node HPCC System in AWS\", \"username\": \"tlhumphrey2\", \"post_text\": \"It is possible to have an HPCC System whose nodes are on different accounts. The setup process doesn't ask any information about the AWS accounts. I do have one caveat. If you want the HPCC System to be fast, the instances should be in the same AWS Placement Group, but it isn't a requirement. I don't know if you can have the same placement group in two accounts -- probably not.\\n\\nWith that said, the key to making all this work is getting the programs that setup the HPCC System on all instances and making sure that all instances have the correct ports setup in their security group. You can get all the programs for setting up the HPCC System on github at https://github.com/tlhumphrey2/EasyFastHPCCoAWS.\\n\\nThe document in this github repository, EasyFastHPCCoAWS.pdf, tells you how to setup an HPCC System using the programs and the CloudFormation template in the repository. You won't be able to use CloudFormation since your HPCC System will be made instances from different accounts. But, you can use the programs.\", \"post_time\": \"2015-10-14 14:48:44\" },\n\t{ \"post_id\": 8282, \"topic_id\": 1928, \"forum_id\": 24, \"post_subject\": \"Re: Configuring multi-node HPCC System in AWS\", \"username\": \"Anjali\", \"post_text\": \"Hi,\\n\\nThanks for your reply
\\n\\nYes,that is the scenario i have given.We are a study group with our own AWS free accounts.Just for knowledge purpose we are trying to set up a cluster between our free instances.\\n\\nSo is it possible to configure the multi node system between instances of different account?is it possible to have a connect between these free instances?\\n\\nThanks,\\nAnjali\", \"post_time\": \"2015-10-14 05:07:15\" },\n\t{ \"post_id\": 8278, \"topic_id\": 1928, \"forum_id\": 24, \"post_subject\": \"Re: Configuring multi-node HPCC System in AWS\", \"username\": \"rtaylor\", \"post_text\": \"Anjali,
As part of study i was asked to set up a multi-node system on Amazon instances,in such a way that instance1 will be of AWS account1 and instance2 will be of AWS account2.
So you have two user accounts on AWS. You want to ramp up a single HPCC environment where the nodes are billed to two separate AWS accounts? How do you expect to do that? The nodes must talk to each other to work together. \\n\\nSo, my real question is: Why?\\n\\nRichard\", \"post_time\": \"2015-10-13 18:26:38\" },\n\t{ \"post_id\": 8276, \"topic_id\": 1928, \"forum_id\": 24, \"post_subject\": \"Configuring multi-node HPCC System in AWS\", \"username\": \"Anjali\", \"post_text\": \"Hi,\\n\\nAs part of study i was asked to set up a multi-node system on Amazon instances,in such a way that instance1 will be of AWS account1 and instance2 will be of AWS account2.\\n\\nIs it possible to configure multi-node system on AWS instances that are of different account(here between instance1 and instance2)?\\n\\n\\nCan anyone provide a clarification?\\n\\n\\nThanks,\\nAnjali\", \"post_time\": \"2015-10-13 08:31:35\" },\n\t{ \"post_id\": 10553, \"topic_id\": 2583, \"forum_id\": 24, \"post_subject\": \"Re: hpcc 6.0.2 installation in AWS Ubuntu\", \"username\": \"yunchen\", \"post_text\": \"I just created a CentOS 7 AWS EC2 instance, and HPCC installation works well in CentOS. So it seems the data corruption problem only happens for ubuntu AWS.\", \"post_time\": \"2016-08-12 03:28:02\" },\n\t{ \"post_id\": 10543, \"topic_id\": 2583, \"forum_id\": 24, \"post_subject\": \"hpcc 6.0.2 installation in AWS Ubuntu\", \"username\": \"yunchen\", \"post_text\": \"Has anyone done a recent installation of hpcc 6.0.2 in AWS? I created an AWS EC2 instance Ubuntu Server 14.04 LTS (HVM), and scp hpccsystems-platform-community_6.0.2-1trusty_amd64.deb from desktop to AWS. However, there was an error below when doing sudo dpkg -i. Is there anything wrong within AWS setup?\\n\\n(Reading database ... 51172 files and directories currently installed.)\\nPreparing to unpack hpccsystems-platform-community_6.0.2-1trusty_amd64.deb ...\\nUnpacking hpccsystems-platform (6.0.2-1) ...\\ndpkg-deb (subprocess): decompressing archive member: lzma error: compressed data is corrupt\\ndpkg-deb: error: subprocess <decompress> returned error exit status 2\\ndpkg: error processing archive hpccsystems-platform-community_6.0.2-1trusty_amd64.deb (--install):\\n cannot copy extracted data for './opt/HPCCSystems/lib/libjhtree.so' to '/opt/HPCCSystems/lib/libjhtree.so.dpkg-new': unexpected end of file or stream\\nErrors were encountered while processing:\\n hpccsystems-platform-community_6.0.2-1trusty_amd64.deb\", \"post_time\": \"2016-08-12 01:18:56\" },\n\t{ \"post_id\": 13183, \"topic_id\": 3323, \"forum_id\": 24, \"post_subject\": \"Re: Error when trying to create clusters in aws\", \"username\": \"fcong922\", \"post_text\": \"Richard,\\n\\nThanks for your reply. I tried copy and paste from Notepad, and got the same error. I also tried manually type the access key id and secret access key two times, but no luck.\\n\\nI am wondering if I should grant any other permission to the access key than only ec2fullaccess permission. Or is that possible that the "HPCC Systems® Instant Cloud for AWS Beta" is temporarily unavailable?\\n\\nThanks\\nfcong922\", \"post_time\": \"2016-11-14 21:10:58\" },\n\t{ \"post_id\": 13173, \"topic_id\": 3323, \"forum_id\": 24, \"post_subject\": \"Re: Error when trying to create clusters in aws\", \"username\": \"rtaylor\", \"post_text\": \"fcong922,\\n\\nI have occasionally had such problems (not necessarily with AWS) and it usually has come down to including trailing spaces in the paste and the validation routine not trimming those spaces before comparison. Try it again, making sure you're only pasting in the characters themselves, without leading or trailing spaces. If that doesn't work, try typing them in manually.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-11-14 20:53:31\" },\n\t{ \"post_id\": 13163, \"topic_id\": 3323, \"forum_id\": 24, \"post_subject\": \"Error when trying to create clusters in aws\", \"username\": \"fcong922\", \"post_text\": \"I followed the instruction here: https://aws.hpccsystems.com/aws/login/ and created an access key in IAM of my aws account and granted EC2fullaccess permission to this access key. However, when I copied and pasted the access key ID and secret key to login. I got an error "AWS was not able to validate the provided access credentials". Does it happen to anyone else? Any suggestions?\\n\\nThanks!\", \"post_time\": \"2016-11-14 20:13:25\" },\n\t{ \"post_id\": 15493, \"topic_id\": 3863, \"forum_id\": 24, \"post_subject\": \"Spray data directly from S3 to Thor\", \"username\": \"chsu6\", \"post_text\": \"Hi,\\n\\nWe are trying to run Thor applications over at least several TB input dataset. We plan to use S3 as the persistent storage and provision the HPCC cluster with a size that fits the application requirement. Since the cluster size may change, simply backing up data to and from S3 might not work. However, fetching data to the landing zone and then spraying the data seems not efficient.\\n\\nWe found a tool, rnet-parspray that can spray data directly from S3 to the Thor cluster. This script supports only zipped XML files. Before we jump into any implementation, just want to hear any feedback on this scenario. Any suggestion on the best practice of running HPCC on AWS? Any good ways to handle large dataset on AWS? Thanks.\\n\\n-chin\", \"post_time\": \"2017-02-24 21:07:32\" },\n\t{ \"post_id\": 19593, \"topic_id\": 4833, \"forum_id\": 24, \"post_subject\": \"Re: envgen roxieMulticastEnabled\", \"username\": \"tlhumphrey2\", \"post_text\": \"The name you want just to the left of ",@" is the name of the appropriate BuildSet. In your case that would be "roxie". If you want to override a ThorCluster parameter, the BuildSet name is "thor".\", \"post_time\": \"2017-10-19 12:33:55\" },\n\t{ \"post_id\": 19583, \"topic_id\": 4833, \"forum_id\": 24, \"post_subject\": \"Re: envgen roxieMulticastEnabled\", \"username\": \"ming\", \"post_text\": \"buildSet is "roxie" instead of "RoxieCluster"\", \"post_time\": \"2017-10-18 17:25:21\" },\n\t{ \"post_id\": 19203, \"topic_id\": 4833, \"forum_id\": 24, \"post_subject\": \"envgen roxieMulticastEnabled\", \"username\": \"k8enorton\", \"post_text\": \"Hi,\\n\\nI am trying to write an envgen script which disables multicast for roxie, but I keep getting this error:\\nWarning: unable to override RoxieCluster,@roxieMulticastEnabled/false as override option needs 3 valid values to override.\\n
\\n\\nThe script I have written is:\\nsudo /opt/HPCCSystems/sbin/envgen \\\\\\n-env $environmentfile \\\\\\n-ipfile $ipfilename \\\\\\n-supportnodes 1 \\\\\\n-thornodes 1 \\\\\\n-roxienodes 1 \\\\\\n-override RoxieCluster,@roxieMulticastEnabled,false\\n
\\n\\nI am wondering if there are other properties I need to override in order to change this setting.\\n\\nThanks,\\nKatie\", \"post_time\": \"2017-09-29 15:09:04\" },\n\t{ \"post_id\": 20823, \"topic_id\": 5323, \"forum_id\": 24, \"post_subject\": \"Re: Cannot access ecl watch of one instance cluster\", \"username\": \"rodrigo.pastrana\", \"post_text\": \"Tim, based on your write up, it sounds like the ESP loaded without issue which points to an AWS/networking issue but let's back up a bit.\\n\\nCan you view the ESP log (/var/log/HPCCSystems/<yourESPname>/esp.log and confirm the process started and loaded all libraries without issue.\\n\\nAlso, confirm that the ESP Process has eclwatch(wssmc) bound to it on port 8010.\\n\\nYou can also issue a command like this to ensure an ESP process is listening on the port you expect it to be on:\\nnetstat -pa |grep esp\\n\\nOnce you've confirmed that, let's look into connectivity...\\n\\nTry this from a remote machine:\\nssh -vp 8010 <your ECLWATCH ESP IP>\\n\\nit should should you've connected. Otherwise it would appear to be a network error.\", \"post_time\": \"2018-02-14 15:17:27\" },\n\t{ \"post_id\": 20813, \"topic_id\": 5323, \"forum_id\": 24, \"post_subject\": \"Cannot access ecl watch of one instance cluster\", \"username\": \"tlhumphrey2\", \"post_text\": \"I have deployed a one instance hpcc system on AWS and I cannot access the cluster's ecl watch. Not sure what is wrong. The environment.xml file is the one that placed by the installation of the platform, which is 6.4.8. Here is the trouble shooting I've done.\\n\\nFirst, when I start the cluster, all components startup without any errors. I've checked the cluster's security group to make sure I have given my IP permission to access port 8010 (in fact, for a short period of time, I opened all inbound traffic to everyone, but I still couldn't access ecl watch).\", \"post_time\": \"2018-02-14 13:36:22\" },\n\t{ \"post_id\": 21373, \"topic_id\": 5403, \"forum_id\": 24, \"post_subject\": \"Re: AWS HPCC Service Multinode cluster specification\", \"username\": \"rtaylor\", \"post_text\": \"rsghatpa,\\n\\nSince this is an "Instant Cloud" instance, Tim Humphrey's reply to your same question on this thread is your solution: https://hpccsystems.com/bb/viewtopic.php?f=14&t=5413\\n\\nBTW, you only need to post a question once, because all the forums are monitored.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-04-03 08:08:11\" },\n\t{ \"post_id\": 21353, \"topic_id\": 5403, \"forum_id\": 24, \"post_subject\": \"Re: AWS HPCC Service Multinode cluster specification\", \"username\": \"rsghatpa\", \"post_text\": \"I used the following :\\n\\nhttps://aws.hpccsystems.com/aws/login/\\n\\nI started the cluster by stating the number of thor instances and clicking the launch cluster button.\", \"post_time\": \"2018-03-29 15:04:10\" },\n\t{ \"post_id\": 21323, \"topic_id\": 5403, \"forum_id\": 24, \"post_subject\": \"Re: AWS HPCC Service Multinode cluster specification\", \"username\": \"rtaylor\", \"post_text\": \"rsghatpa,\\n\\nHow did you create this AWS environment? Was it by pressing the "AWS Cloud" button on the HPCC Systems portal? Or did you configure it all yourself?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-03-29 11:03:54\" },\n\t{ \"post_id\": 21303, \"topic_id\": 5403, \"forum_id\": 24, \"post_subject\": \"AWS HPCC Service Multinode cluster specification\", \"username\": \"rsghatpa\", \"post_text\": \"How do I change the instance type of an ec2-cluster used for making an HPCC cluster?\\n\\nCurrently all instances are m1.xlarge and because they are not EBS backed, I cannot stop them and change the instance type. The only option is to reboot or terminate them. Can you tell me a fix for the same?\", \"post_time\": \"2018-03-28 17:41:04\" },\n\t{ \"post_id\": 21913, \"topic_id\": 5573, \"forum_id\": 24, \"post_subject\": \"Re: AWS instance for single-node HPCC\", \"username\": \"tlhumphrey2\", \"post_text\": \"A more complicated method is to use what is in the repository: https://github.com/tlhumphrey2/EasyFastHPCCoAWS. But, if you know how to use aws, it isn't very hard.\", \"post_time\": \"2018-05-11 16:45:05\" },\n\t{ \"post_id\": 21893, \"topic_id\": 5573, \"forum_id\": 24, \"post_subject\": \"Re: AWS instance for single-node HPCC\", \"username\": \"GeneDAngelo\", \"post_text\": \"Hi Richard,\\n\\nThanks for your response.\\n\\nYes, I actually discovered that option right after I posted this topic. However, I tried it with my personal (paid) AWS account and it failed. I sent the log files to info@hpccsystems.com two days ago, but haven't yet received a response. Is that the correct procedure to get help?\\n\\nGene\", \"post_time\": \"2018-05-11 14:16:09\" },\n\t{ \"post_id\": 21883, \"topic_id\": 5573, \"forum_id\": 24, \"post_subject\": \"Re: AWS instance for single-node HPCC\", \"username\": \"rtaylor\", \"post_text\": \"Gene,\\n\\nThe simplest way to do this is to use the "Instant Cloud" button on this page: https://hpccsystems.com/try-now.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-05-11 13:58:44\" },\n\t{ \"post_id\": 21873, \"topic_id\": 5573, \"forum_id\": 24, \"post_subject\": \"AWS instance for single-node HPCC\", \"username\": \"GeneDAngelo\", \"post_text\": \"I want to set up an inexpensive HPCC instance in AWS just for use in learning ECL. Can anyone tell me the minimum requirements? I tried installing a single-node HPCC cluster on a free instance with Red Hat Linux on a t2 micro machine with 1 vCPU, 1 GB RAM and 10 GB storage, and of course it failed. I don't know what needs to be changed - probably everything, but by how much? If the minimum is unknown, can anyone tell me the AWS specs for any relatively small single-node cluster that does work? Thanks for your help.\", \"post_time\": \"2018-05-09 17:39:49\" },\n\t{ \"post_id\": 26203, \"topic_id\": 6903, \"forum_id\": 24, \"post_subject\": \"Re: Failure to connect to the ESP for a new AWS Cluster\", \"username\": \"mwilmshurst\", \"post_text\": \"Hi Bob,\\nYes I did, thanks - Franz Nisswandt managed to help me debug the problem.\\n\\nThe ESP Page was being blocked by a particular VPN server in the UK. When I rerouted my connection to a US based server the connection was allowed. \\n\\nOn a separate point, is it possible to launch a smaller (Optional) AWS server instance. The current default is a Large and costs quite a bit for R&D,\\nThanks again\\nMark\", \"post_time\": \"2019-05-13 07:29:07\" },\n\t{ \"post_id\": 26163, \"topic_id\": 6903, \"forum_id\": 24, \"post_subject\": \"Re: Failure to connect to the ESP for a new AWS Cluster\", \"username\": \"bforeman\", \"post_text\": \"Hi Mark,\\n\\nSorry for the delay in reply, did you ever get this issue resolved?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2019-05-09 17:45:56\" },\n\t{ \"post_id\": 25993, \"topic_id\": 6903, \"forum_id\": 24, \"post_subject\": \"Failure to connect to the ESP for a new AWS Cluster\", \"username\": \"mwilmshurst\", \"post_text\": \"Hi there,\\n\\nI have tried to launch two different clusters in different regions. Both appear to be initialised OK. \\n\\nI launched them from the hpccsystems.com site and can see them running OK in AWS.\\n\\nHowever, any attempt to access the ESP on port 8010 for returns a server failure error.\\n\\nAre the default security groups being created OK or am I missing a step in the process.\\nHappy to provide more information if anyone has any ideas\\nAll the best\\nMark\", \"post_time\": \"2019-04-30 11:35:58\" },\n\t{ \"post_id\": 34821, \"topic_id\": 9221, \"forum_id\": 24, \"post_subject\": \"Re: elastic4hpcclogs fields on Kibana question\", \"username\": \"rodrigo.pastrana\", \"post_text\": \"We've noticed this once but we're unable to recreate.\\nHowever, one possible solution is to explicitly provide field types for the generated hpcc.log.* fields in the hpccpipeline.\\n\\nIn Kibana, under Stack Management | Ingest Pipelines, find the pre-configured "hpccpipeline" and choose to edit it. Change the Grok to include field types.\\nIn this example we declare all fields as "strings":\\n{{[\\n{ "grok":\\n{ "field": "message", "patterns": [ "%\\\\{BASE16NUM:hpcc.log.sequence:string}\\n\\n\\\\\\\\s+%{HPCC_LOG_AUDIENCE:hpcc.log.audience:string}\\\\\\\\s+%{HPCC_LOG_CLASS:hpcc.log.class:string}\\\\\\\\s+%{TIMESTAMP_ISO8601:hpcc.log.timestamp:string}\\\\\\\\s+%{POSINT:hpcc.log.procid:string}\\\\\\\\s+%{POSINT:hpcc.log.threadid:string}\\\\\\\\s+%{HPCC_LOG_WUID:hpcc.log.jobid:string}\\ns+%{QUOTEDSTRING:hpcc.log.message:string}" ], "pattern_definitions":\\n{ "HPCC_LOG_WUID": "([A-Z][0-9]\\\\{8}\\n\\n-[0-9]{6})|(UNK)", "HPCC_LOG_CLASS": "DIS|ERR|WRN|INF|PRO|MET|UNK", "HPCC_LOG_AUDIENCE": "OPR|USR|PRG|AUD|UNK" }\\n}\\n}\\n]}}
\\n\\nSince the pipeline creates the hpcc.log fields on the target Elastic index(es), preexisting indexes will not update the field types. The field types will take affect on new indexes, pre-existing indexes will need to be removed.\\n\\nIf the issue continues, please let us know on the HPCC bug tracking system:\\nhttps://track.hpccsystems.com/browse/HPCC-27084\", \"post_time\": \"2022-01-31 18:44:25\" },\n\t{ \"post_id\": 34811, \"topic_id\": 9221, \"forum_id\": 24, \"post_subject\": \"elastic4hpcclogs fields on Kibana question\", \"username\": \"g-pan\", \"post_text\": \"I have Deployed elastic4hpcclogs 1.2.0 on an Azure AKS cluster, and found that the hpcc.log.* fields created don't seem to be acknowledged as searchable/filterable by Kibana, is there any way to fix this?\", \"post_time\": \"2022-01-31 18:28:28\" },\n\t{ \"post_id\": 35201, \"topic_id\": 9301, \"forum_id\": 24, \"post_subject\": \"Re: Helm install fails with int overflow error in hpcc/templ\", \"username\": \"Akhila\", \"post_text\": \"Hi,\\n\\nThe sigcheck tool helped in identifying the 32-bit helm which was installed by windows chocolatey package. It seems to be working as expected now.\\n\\nThank you for the help.\\n\\n\\nThanks,\\nAkhila\", \"post_time\": \"2022-02-15 20:54:32\" },\n\t{ \"post_id\": 35171, \"topic_id\": 9301, \"forum_id\": 24, \"post_subject\": \"Re: Helm install fails with int overflow error in hpcc/templ\", \"username\": \"jsmith\", \"post_text\": \"Hi,\\n\\nthe int in the template it is pointing to is a 64bit constant, the error does suggest it is larger than the native process can handle.\\n\\nIs it possible both the 32bit and 64bit version are installed, but it's picking up the 32bit version?\\nMS's sigcheck can confirm by pointing it at the exe : https://docs.microsoft.com/en-us/sysint ... s/sigcheck\\n\\n\\nAlso could you try a helm template on the chart, and see if that succeeds (I've just tried this myself a moment ago having just installed helm for windows), e.g.:\\n\\nhelm template myhpcc hpcc/hpcc > capturedoutput.yaml\\n\\nThanks.\", \"post_time\": \"2022-02-14 17:26:04\" },\n\t{ \"post_id\": 35161, \"topic_id\": 9301, \"forum_id\": 24, \"post_subject\": \"Helm install fails with int overflow error in hpcc/templates\", \"username\": \"Akhila\", \"post_text\": \"Hi,\\n\\nI have been running into overflows int for helm install as below -\\n \\n│ Error: template: hpcc/templates/thor.yaml:397:5: executing "hpcc/templates/thor.yaml" at <include "hpcc.addStubResources" ($commonCtx | merge (dict "instances" .maxGraphs))>: error calling include: template: hpcc/templates/_helpers.tpl:810:16: executing "hpcc.addStubResources" at <include "hpcc.bytesToK8sMemoryString" $totalBytes>: error calling include: template: hpcc/templates/_helpers.tpl:1548:14: executing "hpcc.bytesToK8sMemoryString" at <1152921504606846976>: 1152921504606846976 overflows int\\n\\nI was told that for the windows operating system, using a 64 bit Helm would fix but seems like it is not.\\n\\nI have installed windowsamdx64 bit of Helm 3.8 version and still running into the overflow int error.\\n\\nCould you please provide the steps to resolve this issue?\\n\\n\\nThanks,\\nAkhila\", \"post_time\": \"2022-02-14 03:24:42\" },\n\t{ \"post_id\": 2076, \"topic_id\": 457, \"forum_id\": 26, \"post_subject\": \"Demos now available\", \"username\": \"HPCC Staff\", \"post_text\": \"Be sure to check out these cool interactive demos on the key features and components within the HPCC Systems platform!\\n\\nData Profiling\\nThis demo shows capabilities of the ECL language for use in data analysis and profiling of raw data. \\nhttp://hpccsystems.com/demos/data-profiling-demo\\n\\nWikipedia Demo\\nThe demo shows the capabilities of Roxie queries to deliver data in the format and shape which make integrating Web network visualizations like Sigma.js easy.\\nhttp://hpccsystems.com/demos/wikidemo\", \"post_time\": \"2012-07-25 14:23:46\" },\n\t{ \"post_id\": 12543, \"topic_id\": 770, \"forum_id\": 26, \"post_subject\": \"Re: More demos available\", \"username\": \"JimD\", \"post_text\": \"Try:\\nhttps://hpccsystems.com/community/contr ... -sentiment \\n\\nHTH,\\nJim\", \"post_time\": \"2016-10-26 19:38:38\" },\n\t{ \"post_id\": 12483, \"topic_id\": 770, \"forum_id\": 26, \"post_subject\": \"Re: More demos available\", \"username\": \"rqg0717\", \"post_text\": \"Dear Admin,\\n\\nThe link is not working. Are these demos still availabble please?\\n\\nSincerely,\\nJames\", \"post_time\": \"2016-10-25 21:38:48\" },\n\t{ \"post_id\": 3462, \"topic_id\": 770, \"forum_id\": 26, \"post_subject\": \"More demos available\", \"username\": \"HPCC Staff\", \"post_text\": \"Check out our latest demos showcasing data and sentiment analysis via the HPCC Systems platform. \\n\\n<b>Cancer Rate Demo</b>\\nThe SEER data was cleaned and analyzed using the HPCC Systems platform to derive a set of useful reports.\\n\\n<b>Twitter Sentiments</b>\\nThis demo allows the user to enter a search query and receive real-time tweets from twitter and sentiment using the HPCC Systems platform.\\n\\nhttp://hpccsystems.com/demos#twitter\", \"post_time\": \"2013-02-14 16:48:54\" },\n\t{ \"post_id\": 12493, \"topic_id\": 3113, \"forum_id\": 26, \"post_subject\": \"Data Streaming using Apache Kafka\", \"username\": \"rqg0717\", \"post_text\": \"Dear admin,\\n\\nI was wondering if there would be a demo shows how to use Apache Kafka with HPCC Systems. Please let me know. Thank you.\\n\\nSincerely,\\nJames\", \"post_time\": \"2016-10-25 21:40:57\" },\n\t{ \"post_id\": 1613, \"topic_id\": 360, \"forum_id\": 28, \"post_subject\": \"Re: Is there any Plugin development reference?\", \"username\": \"HPCC Staff\", \"post_text\": \"Hello! Please review the documentation in the zip file below. If this isn't what you are looking for, let us know. Thank you!\\n\\nhttp://cdn.hpccsystems.com/install/ecl- ... tation.zip\", \"post_time\": \"2012-05-15 12:32:53\" },\n\t{ \"post_id\": 1612, \"topic_id\": 360, \"forum_id\": 28, \"post_subject\": \"Is there any Plugin development reference?\", \"username\": \"fanliangze\", \"post_text\": \"Hi\\n\\nNow planning to develop some functions via plugin, there is little explanation at\\nExternal Service Implementation in ECL Language Reference.\\n\\nBut that's too simple, for example, where to location the .SO file, or how to load the library\\nin ECL code, and any configuration needed?\\n\\nIs there any detail manual about this?\\n\\nThanks \", \"post_time\": \"2012-05-15 06:42:19\" },\n\t{ \"post_id\": 4722, \"topic_id\": 677, \"forum_id\": 28, \"post_subject\": \"Re: Integration with SAS\", \"username\": \"arjuna chala\", \"post_text\": \"Durai,\\n\\nYes, it should be embeddable. I guess it also depends on the complexity of the scoring code.\\n\\nThanks\", \"post_time\": \"2013-10-01 20:28:25\" },\n\t{ \"post_id\": 4721, \"topic_id\": 677, \"forum_id\": 28, \"post_subject\": \"Re: Integration with SAS\", \"username\": \"Durai\", \"post_text\": \"Thanks for the details, Arjuna. I will check and revert. \\n\\nInterim, can we use the converted C++ as embedded program. I am quite interested in learning the manual effort involved in this requirement. (SAS Models->ECL->HPCC Analytics->ROXIE Queries)\", \"post_time\": \"2013-10-01 20:25:51\" },\n\t{ \"post_id\": 4657, \"topic_id\": 677, \"forum_id\": 28, \"post_subject\": \"Re: Integration with SAS\", \"username\": \"arjuna chala\", \"post_text\": \"Durai,\\n\\nSAS models can be converted to C++ or Java scoring code which can in turn be executed in HPCC. I believe that the conversion feature is available in SAS Enterprise miner.\\n\\nPlease feel free to reach back if you any further questions.\\n\\nThank You\\n\\nArjuna\", \"post_time\": \"2013-09-27 19:24:24\" },\n\t{ \"post_id\": 4656, \"topic_id\": 677, \"forum_id\": 28, \"post_subject\": \"Re: Integration with SAS\", \"username\": \"Durai\", \"post_text\": \"Hi, \\nWe are working on a data intensive computing solutions and having following questions on SAS capability in HPCC. It is quite attractive to see SAS capabilities. Can you please help on the following questions?\\nIs this converter available in enterprise edition now? Need some advice if the tools has capabilities on moving SAS statistical model to ECL with minimal or no manual fixes on converted code ? I am not from SAS background, one of the requirements is heavily depends this feature so any information on this would be appreciated. \\n\\nThanks\\nDurai\", \"post_time\": \"2013-09-27 18:26:21\" },\n\t{ \"post_id\": 3121, \"topic_id\": 677, \"forum_id\": 28, \"post_subject\": \"Re: Integration with SAS\", \"username\": \"nick_montpetit\", \"post_text\": \"Thanks for your interest in the SAS-ECL Converter!\\n\\nThe SAS-ECL Converter translates SAS code into syntactically correct ECL code that is semantically as close as possible to the original SAS code. The current version of the SAS-ECL Converter is intended for internal HPCC Systems use only, but we will be releasing an enhanced version -- with more features and a user-friendly interface -- to the public in Q2 2013. \\n\\n-Nicholas Montpetit, HPCC Systems\\nSAS-ECL Converter Developer\", \"post_time\": \"2013-01-17 18:01:52\" },\n\t{ \"post_id\": 3104, \"topic_id\": 677, \"forum_id\": 28, \"post_subject\": \"Re: Integration with SAS\", \"username\": \"HPCC Staff\", \"post_text\": \"I've asked the developer of this tool to reply back with a little more information. \\n\\nThanks for posting the question!\", \"post_time\": \"2013-01-15 13:45:24\" },\n\t{ \"post_id\": 3092, \"topic_id\": 677, \"forum_id\": 28, \"post_subject\": \"Integration with SAS\", \"username\": \"jeeves\", \"post_text\": \"I see that there is SAS to ECL converter here http://hpccsystems.com/products-and-services/products/modules/sas-ecl-converter\\n\\nBut I can find absolutely no documentation about it. Would be great if someone can throw some more light on the tool.\\n\\nThanks,\\n-David\", \"post_time\": \"2013-01-11 13:28:12\" },\n\t{ \"post_id\": 3946, \"topic_id\": 738, \"forum_id\": 28, \"post_subject\": \"Re: pentaho kettle with hpcc plugins\", \"username\": \"joe.chambers\", \"post_text\": \"It's not implemented yet. What I envision is a separate job where it has a entity that you select the output folder of a previous job and it will pick up the work unit ID from the files we generate and call the cluster to resubmit it. Or it will take the compiled ecl code and resubmit it, either approach would work, the second one may work better if you are including libraries from your local machine.\", \"post_time\": \"2013-04-17 13:21:46\" },\n\t{ \"post_id\": 3944, \"topic_id\": 738, \"forum_id\": 28, \"post_subject\": \"Re: pentaho kettle with hpcc plugins\", \"username\": \"sapthashree\", \"post_text\": \"Hi Joe,\\n\\nAs you told
One feature we have looked at is allowing you to choose a work unit ID and resubmit it via pentaho
How can we do that? (i.e. how to choose work unit ID and resubmit it via pentaho).\\nWill it really prevent repetitive code generation?\", \"post_time\": \"2013-04-17 07:15:21\" },\n\t{ \"post_id\": 3937, \"topic_id\": 738, \"forum_id\": 28, \"post_subject\": \"Re: pentaho kettle with hpcc plugins\", \"username\": \"joe.chambers\", \"post_text\": \"Publishing ROXIE queries and additional ROXIE features are on are list of features we would like to implement but we haven't gotten there yet.\\n\\nYes the code regeneration has been considered, it actually takes very little time. However it is one item we have looked at to increase efficiency. One feature we have looked at is allowing you to choose a work unit ID and resubmit it via pentaho. There are some limits within the spoon paradigm that we haven't addressed in order to prevent repetitive code generation. \\n\\nYou can call sub jobs using spoon. For the more complex jobs I usually break it into smaller jobs and then have a job that just calls the smaller jobs.\", \"post_time\": \"2013-04-16 15:02:38\" },\n\t{ \"post_id\": 3931, \"topic_id\": 738, \"forum_id\": 28, \"post_subject\": \"Re: pentaho kettle with hpcc plugins\", \"username\": \"sapthashree\", \"post_text\": \"Hi,\\n\\nI have some doubts regarding code generation in pentaho kettle.\\nWhenever we run the job in pentaho kettle the ECL code is going to generate for the user everytime regardless of any changes made into a job.So my concern is that when i run the job 2nd time without any changes it should not generate ECL code again and should generate code only when there is change in the job.(i.e. it should not generate same ECL code again and again when there is no change/modification in the job). Is it possible??\", \"post_time\": \"2013-04-16 09:26:37\" },\n\t{ \"post_id\": 3501, \"topic_id\": 738, \"forum_id\": 28, \"post_subject\": \"Re: pentaho kettle with hpcc plugins\", \"username\": \"sapthashree\", \"post_text\": \"Hi Joe,\\nAs you said in the previous post for subjobs it would actually be best to output the data on thor as a thor file and then on the next job utilize this data.I tried with the same i.e.,saved the output of job1 as a thor file and used the same thor file in job2.If i do so untill and unless i run the job1,job2 doesn't get executed which throws error so i need to run job1 prior to job2(here i need to run both the jobs).But the thing which i wanted to do is to run only job2 which internally calls job1 .Can we establish dependencies between thses two jobs.Is there any alternative to do this?\\n\\n\\nAlso you said about web services lookup to point to the WSDL generated by ROXIE which would allow you to call roxie.But i'm trying to write and publish roxie using pentaho kettle but as you said pentaho kettle doesn't support ROXIE.\", \"post_time\": \"2013-02-20 10:02:45\" },\n\t{ \"post_id\": 3489, \"topic_id\": 738, \"forum_id\": 28, \"post_subject\": \"Re: pentaho kettle with hpcc plugins\", \"username\": \"joe.chambers\", \"post_text\": \"For ROXIE support there are a few built in features that would allow you to fetch data.\\n\\nTake a look at this http://wiki.pentaho.com/display/EAI/Web+services+lookup the experimental plugin they have listed here looks like you can point it to the WSDL generated by ROXIE which would allow you to call roxie. \\n\\nThere are probably additional SOAP interfaces as well as JSON plugins that may work.\", \"post_time\": \"2013-02-19 14:59:15\" },\n\t{ \"post_id\": 3488, \"topic_id\": 738, \"forum_id\": 28, \"post_subject\": \"Re: pentaho kettle with hpcc plugins\", \"username\": \"joe.chambers\", \"post_text\": \"No we don't currently support ROXIE.\\n\\nAs for subjobs it would actually be best to output your data on thor as a thor file and then on the next job utilize this data. If you try to use kettle to move the data between the jobs that will require it download the entire dataset and push it back to the cluster which wouldn't be the best approach.\\n\\nIf you need to move the data into a non thor pentaho mode then do an output which will write it out as a csv and in the next job you can pick up this CSV.\\n\\nThe moving data from the ecl plugins into the default kettle plugins does need a little refining and this is something we could implement in the future if there is enough demand. Let me know if the two solutions above will work for you.\", \"post_time\": \"2013-02-19 14:49:19\" },\n\t{ \"post_id\": 3484, \"topic_id\": 738, \"forum_id\": 28, \"post_subject\": \"Re: pentaho kettle with hpcc plugins\", \"username\": \"sapthashree\", \"post_text\": \"Hi,\\n\\nCan we write roxie queries in pentaho kettle?If yes what should be the job entry needs to be used for writing roxie queries and publishing it.\\n\\nAlso i have one more query to ask, can we create dependencies between two jobs in pentaho kettle i.e.,suppose if i have two jobs say job1 & job2 ,can i send output of job1 as an input to job2(i.e.,if i run the job2 it should internally call job1 inorder to excecute both the jobs)\", \"post_time\": \"2013-02-19 11:17:00\" },\n\t{ \"post_id\": 3389, \"topic_id\": 738, \"forum_id\": 28, \"post_subject\": \"Re: pentaho kettle with hpcc plugins\", \"username\": \"joe.chambers\", \"post_text\": \"Can you post a small sample of your dataset you are using (a few lines of each) and I will test in more detail?\\n\\nThe one thing is in the IDE you export and import modules where as pentaho doesn't have this feature, everything is done in one "file".\\n\\nStaying with the use of custom code in the generic code job entry give this a try. Note this compiles within pentaho but since I don't have your data I haven't tested actual execution. However I would create the non module part using the spoon job entries.\\n\\n\\ntagModule := MODULE\\n\\ndataDictRec := RECORD\\n INTEGER Keynum;\\n STRING Parent;\\n STRING Key;\\n STRING Related;\\nEND;\\ndictInfoData := DATASET('~kettle::trial::dictdata', dataDictRec, THOR); \\nstrRec := RECORD\\nSTRING strTag;\\nEND;\\n\\nEXPORT STRING tagFunction(STRING strTitleToTag, STRING strLinkToTag) := FUNCTION\\ntitleLinkStr := Std.Str.ToUpperCase(strTitleToTag + ' ' +strLinkToTag);\\n \\nstrRec FindTag( dictInfoData L ) := TRANSFORM\\n SELF.strTag := IF ((REGEXFIND(L.Related, titleLinkStr)), L.Key, SKIP);\\nEND; \\nfoundTags := PROJECT( dictInfoData, FindTag(LEFT)); \\nRETURN foundTags[1].strTag;\\nEND;\\nEND;\\n\\n AssetMetadataRec := RECORD\\n STRING AssetId;\\n STRING AssetIdHash;\\n STRING Title;\\n STRING LastModifiedDateTime;\\n STRING InsertDateTime;\\n STRING assetTag;\\n END;\\n assetMetaDataDS := DATASET( '~kittle::trial::metadata',AssetMetadataRec,THOR );\\n AssetMetadataRec tagXfm(AssetMetadataRec L) := TRANSFORM\\n SELF.assetTag := tagModule.tagFunction(L.Title, L.AssetId);\\n SELF := L;\\n END;\\n taggedAssetMetaDataDS := PROJECT( assetMetaDataDS, tagXfm(LEFT) );\\n taggedAssetMetaDataDS;\\n\\n
\", \"post_time\": \"2013-02-08 16:50:34\" },\n\t{ \"post_id\": 3349, \"topic_id\": 738, \"forum_id\": 28, \"post_subject\": \"Re: pentaho kettle with hpcc plugins\", \"username\": \"sapthashree\", \"post_text\": \"Hi,\\nActually when i entered the code in GENERIC CODE job entry it is saving the job ,as soon as i run the job it's saying "Unable to load the job from XML file [D:\\\\HPCC\\\\Pentaho_Kettle\\\\spoon_projects\\\\spoon\\\\re_trial.kjb]" and "Error reading information from input stream".\\n\\nActually what i have done in ecl is :\\nIn one builder i have written the function ,the code as below\\n\\nEXPORT tagModule := MODULE\\n\\ndataDictRec := RECORD\\n\\tINTEGER Keynum;\\n\\tSTRING Parent;\\n\\tSTRING Key;\\n\\tSTRING Related;\\nEND;\\ndictInfoData := DATASET('~kettle::trial::dictdata', dataDictRec, THOR); \\nstrRec := RECORD\\nSTRING strTag;\\nEND; \\n\\nEXPORT STRING tagFunction(STRING strTitleToTag, STRING strLinkToTag) := FUNCTION\\n titleLinkStr := Std.Str.ToUpperCase(strTitleToTag + ' ' +strLinkToTag);\\n\\t\\nstrRec FindTag( dictInfoData L ) := TRANSFORM\\n\\tSELF.strTag := IF ((REGEXFIND(L.Related, titleLinkStr)), L.Key, SKIP);\\nEND; \\nfoundTags := PROJECT( dictInfoData, FindTag(LEFT)); \\t\\nRETURN foundTags[1].strTag;\\nEND;\\nEND;\\n
\\n\\nAnd in another builder,\\n\\n\\nAssetMetadataRec := RECORD\\n\\tSTRING AssetId;\\n\\tSTRING AssetIdHash;\\n\\tSTRING Title;\\n\\tSTRING LastModifiedDateTime;\\n\\tSTRING InsertDateTime;\\n\\tSTRING assetTag;\\nEND;\\nassetMetaDataDS := DATASET( '~kittle::trial::metadata',AssetMetadataRec,THOR );\\nAssetMetadataRec tagXfm(AssetMetadataRec L) := TRANSFORM\\n\\t\\tSELF.assetTag := tagModule.tagFunction(L.Title, L.AssetId);\\n\\t\\tSELF := L;\\nEND;\\ntaggedAssetMetaDataDS := PROJECT( assetMetaDataDS, tagXfm(LEFT) );\\ntaggedAssetMetaDataDS;\\n
\\n\\nThe above code is working fine in ECL.I want to do the same thing in pentaho kettle with hpcc plugins.For that i have written the above whole code in a GENERIC CODE job entry and as i said its not loading onto pentaho spoon IDE. If i write only the fuction part in GENERIC CODE job entry and rest using other job entries like DATASET and PROJECT, the function(tagFunction) is not available to the transform(tagXfm).\\nHow it can be done?\", \"post_time\": \"2013-02-06 06:06:56\" },\n\t{ \"post_id\": 3337, \"topic_id\": 738, \"forum_id\": 28, \"post_subject\": \"Re: pentaho kettle with hpcc plugins\", \"username\": \"joe.chambers\", \"post_text\": \"When you entered the code in the Generic code job entry are you saying it didn't execute or it didn't save the code? \\n\\nIf it is an execution problem can you check your work unit in the ecl watch interface (http://[clusterip]:8010) and see if it included your custom code and post any error that it has listed.\\n\\nAs far as adding custom features please check the github account you can download the source and extend it as needed or if there are particular needs post them here, its no gurantee that someone working on the plugins will get to it but it will help us to know what features people are looking for.\\n\\nThe two related github projects are:\\nhttps://github.com/hpcc-systems/java-ecl-api\\nhttps://github.com/hpcc-systems/spoon-plugins\", \"post_time\": \"2013-02-05 14:39:50\" },\n\t{ \"post_id\": 3331, \"topic_id\": 738, \"forum_id\": 28, \"post_subject\": \"pentaho kettle with hpcc plugins\", \"username\": \"sapthashree\", \"post_text\": \"Hi,\\n\\nI'm using pentaho kettle with hpcc plugins where i get most of the options like SPRAY,DATASET,OUTPUT,PROJECT,DEDUP,GENERIC CODE and many more.\\n\\nHere i have sprayed two physical files from my local to hpcc system using SPRAY option in pentaho kettle which is working fine.Suppose if i want to write any function using pentaho kettle,can i do that ? I tried writting function using GENERIC CODE option where my whole logic resides but I'm unable to run the job as it is not loading onto the pentaho kettle IDE.\", \"post_time\": \"2013-02-05 11:28:30\" },\n\t{ \"post_id\": 5670, \"topic_id\": 1303, \"forum_id\": 28, \"post_subject\": \"Re: Authenticating with Kettle plugins\", \"username\": \"perekl\", \"post_text\": \"Fantastic, that fixed it. I also needed that Server IP override, so I was happy to see that as well. My spray to the cluster worked as expected, thanks so much for the help.\", \"post_time\": \"2014-05-08 14:19:18\" },\n\t{ \"post_id\": 5664, \"topic_id\": 1303, \"forum_id\": 28, \"post_subject\": \"Re: Authenticating with Kettle plugins\", \"username\": \"joe.chambers\", \"post_text\": \"In the install go to plugins/hpcc-common/properties/libaryInclude.properties and open that file in notepad and change includeML=true (you may need to change the permissions on the file so you can change it). Then reopen spoon and see if that fixed it.\", \"post_time\": \"2014-05-07 20:44:26\" },\n\t{ \"post_id\": 5661, \"topic_id\": 1303, \"forum_id\": 28, \"post_subject\": \"Re: Authenticating with Kettle plugins\", \"username\": \"perekl\", \"post_text\": \"Awesome! Thank you, I like the new look. I moved over the new files and now get 'Error Editing Job Entry' when I try to click OK after entering in the cluster credentials.\\n\\njava.lang.NullPointerException\\n\\tat org.hpccsystems.pentaho.job.eclglobalvariables.ECLGlobalVariablesDialog.updatePaths(ECLGlobalVariablesDialog.java:455)\\n\\tat org.hpccsystems.pentaho.job.eclglobalvariables.ECLGlobalVariablesDialog.access$500(ECLGlobalVariablesDialog.java:52)\\n\\tat org.hpccsystems.pentaho.job.eclglobalvariables.ECLGlobalVariablesDialog$6.handleEvent(ECLGlobalVariablesDialog.java:305)\\n\\tat org.eclipse.swt.widgets.EventTable.sendEvent(Unknown Source)\\n\\tat org.eclipse.swt.widgets.Widget.sendEvent(Unknown Source)\\n\\tat org.eclipse.swt.widgets.Display.runDeferredEvents(Unknown Source)\\n\\tat org.eclipse.swt.widgets.Display.readAndDispatch(Unknown Source)\\n\\tat org.hpccsystems.pentaho.job.eclglobalvariables.ECLGlobalVariablesDialog.open(ECLGlobalVariablesDialog.java:427)\\n\\tat org.pentaho.di.ui.spoon.delegates.SpoonJobDelegate.editJobEntry(SpoonJobDelegate.java:285)\\n\\tat org.pentaho.di.ui.spoon.Spoon.editJobEntry(Spoon.java:8046)\\n\\tat org.pentaho.di.ui.spoon.job.JobGraph.editEntry(JobGraph.java:2727)\\n\\tat org.pentaho.di.ui.spoon.job.JobGraph.mouseDoubleClick(JobGraph.java:607)\\n\\tat org.eclipse.swt.widgets.TypedListener.handleEvent(Unknown Source)\\n\\tat org.eclipse.swt.widgets.EventTable.sendEvent(Unknown Source)\\n\\tat org.eclipse.swt.widgets.Widget.sendEvent(Unknown Source)\\n\\tat org.eclipse.swt.widgets.Display.runDeferredEvents(Unknown Source)\\n\\tat org.eclipse.swt.widgets.Display.readAndDispatch(Unknown Source)\\n\\tat org.pentaho.di.ui.spoon.Spoon.readAndDispatch(Spoon.java:1227)\\n\\tat org.pentaho.di.ui.spoon.Spoon.waitForDispose(Spoon.java:7368)\\n\\tat org.pentaho.di.ui.spoon.Spoon.start(Spoon.java:8673)\\n\\tat org.pentaho.di.ui.spoon.Spoon.main(Spoon.java:625)\\n\\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\\n\\tat sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source)\\n\\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)\\n\\tat java.lang.reflect.Method.invoke(Unknown Source)\\n\\tat org.pentaho.commons.launcher.Launcher.main(Launcher.java:134)\\n
\\n\\nIf I continue through the error and click OK again, I get a second error and then a "Continue with Spoon after fatal error?" dialogue box.\\njava.lang.NullPointerException\\n\\tat org.hpccsystems.pentaho.job.eclglobalvariables.ECLGlobalVariablesDialog.updatePaths(ECLGlobalVariablesDialog.java:455)\\n\\tat org.hpccsystems.pentaho.job.eclglobalvariables.ECLGlobalVariablesDialog.access$500(ECLGlobalVariablesDialog.java:52)\\n\\tat org.hpccsystems.pentaho.job.eclglobalvariables.ECLGlobalVariablesDialog$6.handleEvent(ECLGlobalVariablesDialog.java:305)\\n\\tat org.eclipse.swt.widgets.EventTable.sendEvent(Unknown Source)\\n\\tat org.eclipse.swt.widgets.Widget.sendEvent(Unknown Source)\\n\\tat org.eclipse.swt.widgets.Display.runDeferredEvents(Unknown Source)\\n\\tat org.eclipse.swt.widgets.Display.readAndDispatch(Unknown Source)\\n\\tat org.pentaho.di.ui.spoon.Spoon.readAndDispatch(Spoon.java:1227)\\n\\tat org.pentaho.di.ui.spoon.Spoon.waitForDispose(Spoon.java:7368)\\n\\tat org.pentaho.di.ui.spoon.Spoon.start(Spoon.java:8673)\\n\\tat org.pentaho.di.ui.spoon.Spoon.main(Spoon.java:625)\\n\\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\\n\\tat sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source)\\n\\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)\\n\\tat java.lang.reflect.Method.invoke(Unknown Source)\\n\\tat org.pentaho.commons.launcher.Launcher.main(Launcher.java:134)
\\n\\nI am operating under the assumption that if I can get to the URL and log in from my browser, my local Spoon should be able to access the same location.\", \"post_time\": \"2014-05-07 19:39:56\" },\n\t{ \"post_id\": 5659, \"topic_id\": 1303, \"forum_id\": 28, \"post_subject\": \"Re: Authenticating with Kettle plugins\", \"username\": \"joe.chambers\", \"post_text\": \"Hi I just pushed a new version that supports authentication. The authentication will be in the global variables entry.\", \"post_time\": \"2014-05-07 19:19:34\" },\n\t{ \"post_id\": 5658, \"topic_id\": 1303, \"forum_id\": 28, \"post_subject\": \"Authenticating with Kettle plugins\", \"username\": \"perekl\", \"post_text\": \"I have an HPCC cluster that requires authentication to connect, but I don't see any way to pass my login info in with the HPCC plugins. I am trying to test out spraying a file that already exists on my LandingZone, and Pentaho is showing me success across the board, but nothing is actually happening on the cluster. Do these plugins only work for clusters that don't have authentication? \\n\\nThanks!\", \"post_time\": \"2014-05-07 18:17:59\" },\n\t{ \"post_id\": 6104, \"topic_id\": 1335, \"forum_id\": 28, \"post_subject\": \"Re: Use Pentaho to call ECL from classic repository?\", \"username\": \"Alvindavid\", \"post_text\": \"Hiii, As we know that the pentaho is an open source hence the plug-in which you are using may not support the classic repository from where you are trying to connect it. For further detail on the same kindly view this free online tutorial. Its really informative\\n\\nhttps://www.youtube.com/watch?v=ayFt9L0n_rM\", \"post_time\": \"2014-07-23 14:16:38\" },\n\t{ \"post_id\": 5805, \"topic_id\": 1335, \"forum_id\": 28, \"post_subject\": \"Re: Use Pentaho to call ECL from classic repository?\", \"username\": \"joe.chambers\", \"post_text\": \"The plugins were only designed to work with a local repository there hasn't been any extensive testing using the mysql repository. As you discovered the compiler flags in Global Variables will allow you to do pretty much anything you need. One other option is to copy your library down to the local machine and include its location in the compiler flags.\", \"post_time\": \"2014-06-02 13:24:55\" },\n\t{ \"post_id\": 5788, \"topic_id\": 1335, \"forum_id\": 28, \"post_subject\": \"Re: Use Pentaho to call ECL from classic repository?\", \"username\": \"jwilt\", \"post_text\": \"Made a discovery...\\n\\nIn the "Global Variables", set:\\n Compile Flags ... -main ...\\n\\nThis (apparently) tells it to pass the ECL to the cluster and not use the local (C: drive) eclcc.\\n(This is actually similar to the ecl command line.)\\n\\nIn brief testing, this appears to allow the Pentaho plugin to use ECL from the classic (MySQL) repository.\\n\\nJust FYI.\", \"post_time\": \"2014-05-30 22:46:20\" },\n\t{ \"post_id\": 5785, \"topic_id\": 1335, \"forum_id\": 28, \"post_subject\": \"Re: Use Pentaho to call ECL from classic repository?\", \"username\": \"rtaylor\", \"post_text\": \"Jim,\\n\\nYes, I understood that. The "classic" central repository will be going away and is only supported currently for our pre-OSS customers, both internal and external, to provide an easier migration path. Therefore, I believe the Pentaho plug-in was not designed to work with them.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-05-30 18:35:32\" },\n\t{ \"post_id\": 5783, \"topic_id\": 1335, \"forum_id\": 28, \"post_subject\": \"Re: Use Pentaho to call ECL from classic repository?\", \"username\": \"jwilt\", \"post_text\": \"Sorry, I didn't say, but this is for HPCC 4.2.2-1, running with eclserver (vs. eclccserver).\", \"post_time\": \"2014-05-30 17:23:00\" },\n\t{ \"post_id\": 5779, \"topic_id\": 1335, \"forum_id\": 28, \"post_subject\": \"Re: Use Pentaho to call ECL from classic repository?\", \"username\": \"rtaylor\", \"post_text\": \"Jim,\\n\\nPentaho is an Open Source tool itself, so I don't believe our Pentaho plug-in was designed to work with anything but the Open Source version of HPCC. FWIW, the "classic" repository is soon to be deprecated and was really only supported for our 702 => OSS migration process.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-05-30 13:24:51\" },\n\t{ \"post_id\": 5777, \"topic_id\": 1335, \"forum_id\": 28, \"post_subject\": \"Use Pentaho to call ECL from classic repository?\", \"username\": \"jwilt\", \"post_text\": \"Hi - \\nIn my early testing with the Pentaho plug-in, it's not clear how to use ECL that calls attributes from a "classic" (MySQL, via eclserver) repository.\\nWhat am I missing?\\nAny help would be greatly appreciated.\\nThanks.\", \"post_time\": \"2014-05-30 02:06:15\" },\n\t{ \"post_id\": 6610, \"topic_id\": 1515, \"forum_id\": 28, \"post_subject\": \"Re: Hadoop Online Training\", \"username\": \"rtaylor\", \"post_text\": \"Thank you for sharing, sarirami, but since this forum is devoted to HPCC Systems (not Hadoop), let me remind everybody that we do have FREE online HPCC Systems ECL Training right here: http://learn.lexisnexis.com/hpcc\\n\\nRichard Taylor\\nChief Trainer\\nHPCC Systems\", \"post_time\": \"2014-11-19 14:19:11\" },\n\t{ \"post_id\": 6607, \"topic_id\": 1515, \"forum_id\": 28, \"post_subject\": \"Hadoop Online Training\", \"username\": \"sarirami\", \"post_text\": \"[url]HADOOP Online Training[/url] by iq online training with an excellent and real time faculty.\\nwe have started this hadoop classes with much passion of providing excellent best quality learning of hadoop learners.\\nHadoop is having very good demand in the market, huge number of job openings are there in the IT world.\\nOur Hadoop training is regarded as the best online training by our students and corporate clients. \\nWe train students from across all countries like USA, UK, Singapore, UAE, Australia, India. \\nOur Hadoop training is your one stop solution to Learn, Practice and build career in this field at the comfort of your Home with flexible class schedules.\\n\\nweb:http://www.iqonlinetraining.com/hadoop-online-training/\\nplease contact:\\nIndia :+91 95734 81637 USA: 732-475-4280 \\nMail:info@iqonlinetraining.com\", \"post_time\": \"2014-11-19 04:43:05\" },\n\t{ \"post_id\": 8402, \"topic_id\": 1764, \"forum_id\": 28, \"post_subject\": \"Re: bug while running simple tutorial example\", \"username\": \"xue_fg\", \"post_text\": \"you can use 4.2.1 or 4.4.0\", \"post_time\": \"2015-11-02 03:05:16\" },\n\t{ \"post_id\": 7811, \"topic_id\": 1764, \"forum_id\": 28, \"post_subject\": \"Re: bug while running simple tutorial example\", \"username\": \"vikas134\", \"post_text\": \"Hi Joe, \\n\\nCan you help me out here? Please let me know if there's any additional information you need.\\n\\nThanks,\\nVikash\", \"post_time\": \"2015-06-22 14:05:20\" },\n\t{ \"post_id\": 7753, \"topic_id\": 1764, \"forum_id\": 28, \"post_subject\": \"Re: bug while running simple tutorial example\", \"username\": \"vikas134\", \"post_text\": \"Thanks for the response Joe.\\n\\nI am using version pdi-ce-5.4.0.0-128, downloaded the same from http://community.pentaho.com/projects/data-integration/. I am using spoon plugin distribution downloaded through http://hpccsystems.com/products-and-ser ... ntegration on an Ubuntu 14.04 LTS 64 bit installed system.\\n\\nPlease let me know if you need any more information. \\n\\nRegards,\\nVikash\", \"post_time\": \"2015-06-15 05:47:17\" },\n\t{ \"post_id\": 7749, \"topic_id\": 1764, \"forum_id\": 28, \"post_subject\": \"Re: bug while running simple tutorial example\", \"username\": \"joe.chambers\", \"post_text\": \"What version of pentaho are you using the plugins with? It looks like a compatibility issue. The function (org.pentaho.di.ui.job.dialog.JobDialog.setShellImage) is there to set the icon for the popup window for the plugin. It looks like that function doesn't exist in that version of pentaho.\", \"post_time\": \"2015-06-12 21:14:41\" },\n\t{ \"post_id\": 7748, \"topic_id\": 1764, \"forum_id\": 28, \"post_subject\": \"Re: bug while running simple tutorial example\", \"username\": \"vikas134\", \"post_text\": \"Hi,\\n\\nPlease let me know if you need any more information regarding this error message, I have been following the documentation given through http://hpccsystems.com/products-and-ser ... ntegration. I can't set any attributes in the "Global variable" module as a double click or edit the properties option throws the error specified above.\\n\\nThanks,\\nVikash\", \"post_time\": \"2015-06-12 10:11:39\" },\n\t{ \"post_id\": 7746, \"topic_id\": 1764, \"forum_id\": 28, \"post_subject\": \"bug while running simple tutorial example\", \"username\": \"vikas134\", \"post_text\": \"Hi,\\n\\nI am trying to get the hpcc plugin for Spoon to work in linux environment using Ubuntu 14.04. I am following the steps as specified in the "Simple Tutorials" example in the documentation. I keep getting a bug while double clicking on any of the modules under the hpcc systems plugin. Following is the stack trace of the same:\\n\\nAn unexpected error occurred in Spoon: \\norg.pentaho.di.ui.job.dialog.JobDialog.setShellImage(Lorg/eclipse/swt/widgets/Shell;Lorg/pentaho/di/job/entry/JobEntryInterface;)V\\njava.lang.NoSuchMethodError: org.pentaho.di.ui.job.dialog.JobDialog.setShellImage(Lorg/eclipse/swt/widgets/Shell;Lorg/pentaho/di/job/entry/JobEntryInterface;)V\\n\\tat org.hpccsystems.pentaho.job.eclglobalvariables.ECLGlobalVariablesDialog.open(ECLGlobalVariablesDialog.java:119)\\n\\tat org.pentaho.di.ui.spoon.delegates.SpoonJobDelegate.editJobEntry(SpoonJobDelegate.java:255)\\n\\tat org.pentaho.di.ui.spoon.Spoon.editJobEntry(Spoon.java:8560)\\n\\tat org.pentaho.di.ui.spoon.job.JobGraph.editEntry(JobGraph.java:2818)\\n\\tat org.pentaho.di.ui.spoon.job.JobGraph.mouseDoubleClick(JobGraph.java:634)\\n\\tat org.eclipse.swt.widgets.TypedListener.handleEvent(Unknown Source)\\n\\tat org.eclipse.swt.widgets.EventTable.sendEvent(Unknown Source)\\n\\tat org.eclipse.swt.widgets.Widget.sendEvent(Unknown Source)\\n\\tat org.eclipse.swt.widgets.Display.runDeferredEvents(Unknown Source)\\n\\tat org.eclipse.swt.widgets.Display.readAndDispatch(Unknown Source)\\n\\tat org.pentaho.di.ui.spoon.Spoon.readAndDispatch(Spoon.java:1319)\\n\\tat org.pentaho.di.ui.spoon.Spoon.waitForDispose(Spoon.java:7939)\\n\\tat org.pentaho.di.ui.spoon.Spoon.start(Spoon.java:9190)\\n\\tat org.pentaho.di.ui.spoon.Spoon.main(Spoon.java:654)\\n\\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\\n\\tat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\\n\\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\\n\\tat java.lang.reflect.Method.invoke(Method.java:606)\\n\\tat org.pentaho.commons.launcher.Launcher.main(Launcher.java:92)\\n\\n\\nIt seems it is a bug in the setShellImage function or may be there is some configuration I am missing. Can you advise me here on how to resolve the issue?\\n\\nThanks,\\nVikash\", \"post_time\": \"2015-06-11 13:43:28\" },\n\t{ \"post_id\": 8780, \"topic_id\": 2042, \"forum_id\": 28, \"post_subject\": \"Re: Workunit in Unknown status\", \"username\": \"Mragesh\", \"post_text\": \"Ok, I was able to identify the issue.\\n"Cluster" in the global variables was incorrect.\\n\\nHad to go through the plugin code to debug the issue and found that this error was never reported because in "java-ecl-api" (ECLSoap.java) the return code after submitting the workunit was not evaluated (although it was acknowledged in the comment, see below )\\n\\n
public void submitSoapCall(String wuid){\\n String xml = "<?xml version=\\\\"1.0\\\\" encoding=\\\\"UTF-8\\\\"?>"+\\n "<soapenv:Envelope xmlns:soapenv=\\\\"http://schemas.xmlsoap.org/soap/envelope/\\\\" xmlns:xsd=\\\\"http://www.w3.org/2001/XMLSchema\\\\" xmlns:xsi=\\\\"http://www.w3.org/2001/XMLSchema-instance\\\\">"+\\n "<soapenv:Body>"+\\n "<WUSubmit xmlns=\\\\"urn:hpccsystems:ws:wsworkunits\\\\">"+\\n "<Wuid>" + wuid + "</Wuid>"+\\n //"<MaxRunTime>" + maxRunTime + "<MaxRunTime>" +\\n "<Cluster>" + this.cluster + "</Cluster>"+\\n "</WUSubmit>"+\\n "</soapenv:Body>"+\\n "</soapenv:Envelope>";\\nString path = "/WsWorkunits/WUSubmit";\\n InputStream is2 = this.doSoap(xml, path);\\n //need to check for errors here
\\n\\nWill report this issue in JIRA. \\n\\nThanks.\", \"post_time\": \"2015-12-09 17:57:36\" },\n\t{ \"post_id\": 8742, \"topic_id\": 2042, \"forum_id\": 28, \"post_subject\": \"Workunit in Unknown status\", \"username\": \"Mragesh\", \"post_text\": \"Hi Everyone,\\n\\nI created a pdi job, which successfully submitted the workunit to HPCC. \\nHowever, the workunit always remains in a "unknown" status and gets stuck.\\n\\nWhen I manually set my workunit to "failed" and abort the job from ECL-Watch, this is the error I receive on pdi console:\\n\\n2015/12/07 11:51:07 - Execute - ERROR (version 4.4.0-stable, build 17588 from 2012-11-21 16.02.21 by buildguy) : Server Failed to compile code please refer to ECLWatch and verify your settings\\n\\nI am using the following HPCC version: internal_5.4.6-rc1\\nAnd Kettle Spoon Stable Release - 4.4.0\\n\\nLet me know if you want me to share any specific details from my workunit.\\n\\nThanks.\", \"post_time\": \"2015-12-07 17:05:23\" },\n\t{ \"post_id\": 9026, \"topic_id\": 2090, \"forum_id\": 28, \"post_subject\": \"Re: Project plugin with counter throws error\", \"username\": \"SuganthSelvan\", \"post_text\": \"Thanks Joe.\\nSo, will you be raising any JIRA request for fixing this issue.\", \"post_time\": \"2016-01-08 04:24:59\" },\n\t{ \"post_id\": 9000, \"topic_id\": 2090, \"forum_id\": 28, \"post_subject\": \"Re: Project plugin with counter throws error\", \"username\": \"joe.chambers\", \"post_text\": \"There wasn't a conflict with the version of HPCC that existed when the plugin was developed. We will need to update it.\", \"post_time\": \"2016-01-04 14:46:17\" },\n\t{ \"post_id\": 8978, \"topic_id\": 2090, \"forum_id\": 28, \"post_subject\": \"Project plugin with counter throws error\", \"username\": \"SuganthSelvan\", \"post_text\": \"Hi,\\nI tried to use the project plugin and call a transform function with the counter.\\nFor this, i selected "yes" in the "Declare counter" dropdown in Project plugin's general tab.\\nWhen i executed the job with the above setup am getting the below error.\\n"ERROR (version 4.4.0-stable, build 17588 from 2012-11-21 16.02.21 by buildguy) : (24,35): error C2397: Identifier 'count' clashes with a reserved symbol"\\n\\nBelow is the equivalent ecl code generated by the job.\\nnewPersonRS Trans(personDS input, integer count) := transform \\n\\tself.id := input.id;\\n\\tself.firstName := input.firstName;\\n\\tself.lastName := input.lastName;\\n\\tself.address := input.address;\\n\\tself.state := input.state;\\n\\tself.city := input.city;\\n\\tself.zip := input.zip;\\n\\tself.callcnt := 1;\\nend; \\nProjectedPersonDS := project(personDS,Trans(left, counter)); \\nOUTPUT( ProjectedPersonDS);\\n\\nWhy is the HPCC plugin generating code with the counter paramater named as "count" (which is a reserved keyword) in the transform function definition?\\nIs there any way to override this issue or is this an existing issue which needs to be fixed?\\n\\nPls anyone help me out of this issue. Pentaho version used: 4.4.0\", \"post_time\": \"2015-12-24 10:37:04\" },\n\t{ \"post_id\": 9056, \"topic_id\": 2106, \"forum_id\": 28, \"post_subject\": \"Re: Dataset plugin doesnt have XML filetype\", \"username\": \"bforeman\", \"post_text\": \"Please direct these issues to GitHub so that can be better tracked:https://github.com/hpcc-systems/spoon-plugins\\n\\nThank You!\", \"post_time\": \"2016-01-08 17:18:49\" },\n\t{ \"post_id\": 9028, \"topic_id\": 2106, \"forum_id\": 28, \"post_subject\": \"Dataset plugin doesnt have XML filetype\", \"username\": \"SuganthSelvan\", \"post_text\": \"Hi,\\n\\nThe Dataset plugin available has the option for selecting only CSV and THOR file types.\\nIs there any option for reading XML file type or the option for reading XML file type is yet to be developed?\", \"post_time\": \"2016-01-08 04:37:16\" },\n\t{ \"post_id\": 9054, \"topic_id\": 2108, \"forum_id\": 28, \"post_subject\": \"Re: Issue with Keeper option in Dedup plugin\", \"username\": \"bforeman\", \"post_text\": \"Please direct these issues to GitHub so that can be better tracked:https://github.com/hpcc-systems/spoon-plugins\\n\\nThank You!\", \"post_time\": \"2016-01-08 17:18:05\" },\n\t{ \"post_id\": 9030, \"topic_id\": 2108, \"forum_id\": 28, \"post_subject\": \"Issue with Keeper option in Dedup plugin\", \"username\": \"SuganthSelvan\", \"post_text\": \"Hi,\\n\\nWhen using the Dedup plugin if i provide the value for the Keeper option as RIGHT (since default is LEFT) it doesnt keep the last record encountered. Instead it always keeps the first record though i provide RIGHT in the keeper option.\\n\\nBut when i use this option along with the KEEP n option the KEEPER functionality is working fine meaning if i provide RIGHT it correctly keeps the last record and if i provide LEFT it keeps the first record.\\n\\nIs this an existing issue with the integration?\", \"post_time\": \"2016-01-08 04:52:28\" },\n\t{ \"post_id\": 9052, \"topic_id\": 2110, \"forum_id\": 28, \"post_subject\": \"Re: Functionality of Error plugin\", \"username\": \"bforeman\", \"post_text\": \"Please direct these issues to GitHub so that can be better tracked:https://github.com/hpcc-systems/spoon-plugins\\n\\nThank You!\", \"post_time\": \"2016-01-08 17:17:06\" },\n\t{ \"post_id\": 9032, \"topic_id\": 2110, \"forum_id\": 28, \"post_subject\": \"Functionality of Error plugin\", \"username\": \"SuganthSelvan\", \"post_text\": \"Hi,\\n\\nI tried to explore the error plugin but i couldnt find a scenario where this plugin would be useful.\\nI tried to include the Error plugin in the flow and i find that this plugin is used to stop the execution of the workunit and am receiving a pop-up. But once i close the popup the execution gets resumed and i am able to get the result.\\n\\nThere is also no option within the Error plugin for including the Error message/Error Code.\\n\\nCan anyone pls tell me a scenario where this plugin can be used within the PDI tool?\", \"post_time\": \"2016-01-08 04:58:54\" },\n\t{ \"post_id\": 9050, \"topic_id\": 2112, \"forum_id\": 28, \"post_subject\": \"Re: INDEX plugin doesn't work and it throws error\", \"username\": \"bforeman\", \"post_text\": \"Please direct these issues to GitHub so that can be better tracked:https://github.com/hpcc-systems/spoon-plugins\\n\\nThank You!\", \"post_time\": \"2016-01-08 17:15:34\" },\n\t{ \"post_id\": 9034, \"topic_id\": 2112, \"forum_id\": 28, \"post_subject\": \"INDEX plugin doesn't work and it throws error\", \"username\": \"SuganthSelvan\", \"post_text\": \"Hi,\\n\\nI tried to create an Index using the INDEX plugin available. But it is throwing error during the execution.\\n\\nI tried to check the ECL code generated and i inferred that the attribute name which holds the Index definition is being generated as "index" and since its a keyword, error is being produced during the execution of the code.\\n\\nECL Code generated by the plugin is something like, index := INDEX(......); BUILDINDEX(index); which causes the error.\\n\\nIs this an existing issue which needs to be fixed?\", \"post_time\": \"2016-01-08 05:05:18\" },\n\t{ \"post_id\": 9048, \"topic_id\": 2114, \"forum_id\": 28, \"post_subject\": \"Re: Limitations with JOIN plugin\", \"username\": \"bforeman\", \"post_text\": \"Please direct these issues to GitHub so that can be better tracked:https://github.com/hpcc-systems/spoon-plugins\\n\\nThank You!\", \"post_time\": \"2016-01-08 17:14:45\" },\n\t{ \"post_id\": 9036, \"topic_id\": 2114, \"forum_id\": 28, \"post_subject\": \"Limitations with JOIN plugin\", \"username\": \"SuganthSelvan\", \"post_text\": \"Hi,\\n\\nI tried to explore the join plugin available. Only the basic join condition/functionality is working fine. I found the following limitations while using the JOIN plugin while performing the join operation.\\n\\nThe join plugin doesn’t have provision to include the below join options,\\n1. Multiple join conditions cant be formed using a single join plugin.\\n2. Transform function cant be defined.\\n3. Non-Equality join conditions cant be formed. \\n4. Options for selecting Join flags is not available.\\n5. Full-keyed join can be formed since there not provision for using the 'KEYED' option.\\n\\nAre these the existing limitations of using the JOIN plugin or these options are still under development?\", \"post_time\": \"2016-01-08 05:19:27\" },\n\t{ \"post_id\": 9046, \"topic_id\": 2116, \"forum_id\": 28, \"post_subject\": \"Re: Rollup plugin doesn't work for Group option\", \"username\": \"bforeman\", \"post_text\": \"Please direct these issues to GitHub so that can be better tracked:https://github.com/hpcc-systems/spoon-plugins\\n\\nThank You!\", \"post_time\": \"2016-01-08 17:13:53\" },\n\t{ \"post_id\": 9038, \"topic_id\": 2116, \"forum_id\": 28, \"post_subject\": \"Rollup plugin doesn't work for Group option\", \"username\": \"SuganthSelvan\", \"post_text\": \"Hi,\\n\\nRollup plugin is working fine for Rollup form 1 & 2 mentioned in ECL reference. But when i tried to use form 3 by selecting "yes" option in Grouped dropdown available in the plugin, am getting error while the job gets executed.\\n\\nWhen i checked the ECL code generated, i inferred that the ECL code generated is not compatible for Rollup form 3. The actual format must be "ROLLUP(datasetname, GROUP, transformName(LEFT,ROWS(LEFT)));". But the ECL code generated by the plugin is "ROLLUP(datasetname, GROUP, transformName(LEFT,RIGHT));".\\n\\nInstead of passing ROWS(LEFT) its passing RIGHT to the transform function and the transform function definition being generated by the plugin also doesnt receive the right parameter in the Dataset format, instead it receives the right parameter in the same format as the LEFT parameter.\\n\\nIs this an existing issue with the plugin?\", \"post_time\": \"2016-01-08 06:35:37\" },\n\t{ \"post_id\": 9044, \"topic_id\": 2118, \"forum_id\": 28, \"post_subject\": \"Re: Loop doesn't have PROJECT Transform option\", \"username\": \"bforeman\", \"post_text\": \"Please direct these issues to GitHub so that can be better tracked:https://github.com/hpcc-systems/spoon-plugins\\n\\nThank You!\", \"post_time\": \"2016-01-08 17:11:13\" },\n\t{ \"post_id\": 9040, \"topic_id\": 2118, \"forum_id\": 28, \"post_subject\": \"Loop doesn't have PROJECT Transform option\", \"username\": \"SuganthSelvan\", \"post_text\": \"Hi,\\n\\nWhile exploring the Loop plugin, i tried to use a PROJECT Transform as the Loop body. But i didn't find any provision for defining the transform function separately as provided for the other plugins like PROJECT.\\n\\nIs we can define only Inline transform in the Loop body section to use it or any other option available to include PROJECT Transform as loop body within the Loop plugin?\", \"post_time\": \"2016-01-08 06:44:03\" },\n\t{ \"post_id\": 9236, \"topic_id\": 2130, \"forum_id\": 28, \"post_subject\": \"Re: Issue with updatePath() from eclglobalvariables\", \"username\": \"jwilt\", \"post_text\": \"It might be worth considering that many (most?) Pentaho users would want to be able to call other platforms/interfaces from the same workflows.\\n\\n(Safe to assume DSP would not fill that need?)\", \"post_time\": \"2016-02-10 03:18:07\" },\n\t{ \"post_id\": 9234, \"topic_id\": 2130, \"forum_id\": 28, \"post_subject\": \"Re: Issue with updatePath() from eclglobalvariables\", \"username\": \"joe.chambers\", \"post_text\": \"We have scaled back support for these plugins and focused development on a ground up ETL tool. It appears that Pentaho no longer supports the 4.2.0-GA Libs that the MVN scripts point to.\\n\\nI will try to block out some time in the next few weeks to finalize a branch with changes needed to run it against Kettle 5.2.\\n\\nThe changes will be posted here:\\nhttps://github.com/hpcc-systems/spoon-p ... al-release\\n\\nIf you are internal to the company reach out to us about DSP which will eventually replace most of the ELT type tools we are using.\", \"post_time\": \"2016-02-09 22:26:29\" },\n\t{ \"post_id\": 9194, \"topic_id\": 2130, \"forum_id\": 28, \"post_subject\": \"Re: Issue with updatePath() from eclglobalvariables\", \"username\": \"HPCC Staff\", \"post_text\": \"Thank you for your question. The team is currently researching the issue.\", \"post_time\": \"2016-02-02 19:35:56\" },\n\t{ \"post_id\": 9138, \"topic_id\": 2130, \"forum_id\": 28, \"post_subject\": \"Issue with updatePath() from eclglobalvariables\", \"username\": \"jennifer.hughes@lnssi.com\", \"post_text\": \"JDK: 7\\nPentaho/kettle: (I think) 5.2.0.0\\nWindows 7 64-bit\\n\\nI cloned the HPCCPentahoPlugin from git. I had to make one change. The items from codehaus have moved to https://repository.mulesoft.org/nexus/c ... ies/public. I built via the command line; ran the one-time command then the main build command. Built the base package and extracted it into the plugins directory in my pentaho/kettle project.\\n\\nThe issue is when I open an existing ECLGlobalVariable job and click on "OK" I get a null pointer exception. \\n\\njava.lang.NullPointerException\\n\\tat org.hpccsystems.pentaho.job.eclglobalvariables.ECLGlobalVariablesDialog.updatePaths(ECLGlobalVariablesDialog.java:455)\\n...\\n\\nThe includeML combo variable is null at the point in updatePaths() that it is checked.\\n\\nIf I switch back to the downloaded plugin from your site it works fine. \\n\\nI am at a loss. \\n\\nThanks for any help...\", \"post_time\": \"2016-01-26 19:34:02\" },\n\t{ \"post_id\": 17081, \"topic_id\": 4251, \"forum_id\": 28, \"post_subject\": \"Pentaho Kettle ECL Execute Archive Plugin using HPCC J-API\", \"username\": \"srijanroy\", \"post_text\": \"[color=#008000:3w1k7tbj]Hi\\n\\nI tried to make a simple plugin which can execute a ECL Archive file in HPCC server from Pentaho Kettle. Below is the link. \\n\\nhttps://drive.google.com/open?id=0B3AJ1 ... m9qSVI3ODQ\\n\\nLet me know if I can help you further\\n\\nEmail: roy.srijon@gmail.com\\n\\nP.S. I am not a Java developer. I work on Mainframe Cobol platform. If there is any issue please feel free to suggest.\", \"post_time\": \"2017-05-16 18:33:59\" },\n\t{ \"post_id\": 1567, \"topic_id\": 347, \"forum_id\": 29, \"post_subject\": \"Welcome to the Contributor forum\", \"username\": \"ghalliday\", \"post_text\": \"This is a place for developers to ask questions about contributing to the open source code base. For example if you want an idea of a simple change/improvement that would help you get started working with the source code.\\n\\nDesign discussions, discussing and reporting issues, and submitting change requests are generally done on the github site where the sources are hosted\\n(see https://github.com/hpcc-systems/HPCC-Platform)\\nor on the hpcc-dev mailing list \\n(see http://hpccsystems.com/mailman/listinfo/hpcc-dev).\\n\\nFrom time to time we will also post information that may prove useful for understanding the code base.\", \"post_time\": \"2012-04-25 16:17:45\" },\n\t{ \"post_id\": 1570, \"topic_id\": 348, \"forum_id\": 29, \"post_subject\": \"Re: An introduction to optimizations\", \"username\": \"ghalliday\", \"post_text\": \"This isn't the correct forum to ask that question - this forum should be restricted to questions about modifying the system source.\\n\\nP.S. 3.8 is the first version that should include it.\", \"post_time\": \"2012-04-26 08:45:34\" },\n\t{ \"post_id\": 1569, \"topic_id\": 348, \"forum_id\": 29, \"post_subject\": \"Re: An introduction to optimizations\", \"username\": \"bforeman\", \"post_text\": \"Hi Gavin,\\n\\nIs the COUNTER operator coming in a later version? I just tried it with the latest release:\\n\\n
values := DATASET(100, TRANSFORM(COUNTER));\\nvalues;
\\n\\nWhen I syntax check I get:\\n\\nError: Unknown identifier "COUNTER" (1, 37), 2167, \\n\\nRegards,\\nBob\", \"post_time\": \"2012-04-25 18:38:18\" },\n\t{ \"post_id\": 1568, \"topic_id\": 348, \"forum_id\": 29, \"post_subject\": \"An introduction to optimizations\", \"username\": \"ghalliday\", \"post_text\": \"The following is copied from a hpcc-dev post and provides some background on optimizations. It was written with one particular optimization in mind, but should serve as a useful example for discussing some of the issues involved in adding new optimizations to the code generator.\\n\\nAn introduction to optimization\\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\\n\\nWith the introduction of the new operator\\nDATASET(<count>,transform(COUNTER)) there is an opportunity to use it to optimize some existing code. In particular a construct that has previously been used for generating test data is\\n\\nsimpleRow := DATASET([0], { unsigned value }); values := NORMALIZE(simpleRow, <count>, transform(COUNTER));\\n\\nThis can now be replaced with \\n\\nvalues := DATASET(<count>, transform(COUNTER));\\n\\nDifferent optimizers\\n~~~~~~~~~~~~~~~~~~~~\\nThere are two main expression optimizers in the system hqlfold and hqlopt.\\nWhich should the code for the new optimization go in?\\n\\nhqlfold is used for scalar optimizations. It is also used for optimizations which can be guaranteed to reduce the amount of work. (See example below)\\n\\nhqlopt is used for dataset optimizations. One key difference is that it keeps track of the number of times an expression is used to stop common expressions becoming duplicated.\\n\\nFor example with the following ECL:\\n\\nds1 := DATASET('x', rec, thor);\\nds2 := SORT(ds1, fieldx);\\nr1 := ds2(fieldy == 1);\\noutput(ds2);\\noutput(r1);\\n\\nIn general it is more efficient to filter a dataset before it is sorted, so the system contains an optimization to ensure that filters are moved before sorts. However if it is done on this example you would end up with\\n\\nds1 := DATASET('x', rec, thor);\\nds2 := SORT(ds1, fieldx);\\nr1 := ds1(fieldy == 1);\\nr1b := SORT(r1, fieldx);\\noutput(ds2);\\noutput(r1);\\n\\nwhich has duplicated the sort activity. To avoid this hqlopt keeps track of the number of times each dataset expression is referenced, and only moves filters over sort activities if the sort isn't shared.\\n\\n[Occasionally some of the complex transforms in hqlfold can cause scalar expressions to become duplicated, but generally hqlfold is relatively conservative, and the examples are few and far between. (Ideally there would be a scalar optimizer which also kept track of the number of times an expression was used to allow some more exotic scalar optimizations.)]\\n\\nAn example of a dataset optimization that IS done in hqlfold is removing a sort of a sort:\\n\\nds1 := DATASET('x', rec, thor);\\nds2 := SORT(ds1, fieldx);\\nds3 := SORT(ds2, fieldy);\\noutput(ds3);\\n\\nOptimizing to...\\n\\nds1 := DATASET('x', rec, thor);\\nds3 := SORT(ds1, fieldy);\\noutput(ds3);\\n\\nIf ds2 is used by another definition then the new graph will have two independent sorts - which could be done in parallel. If it isn't then you have removed a sort from your execution graph. Since neither outcome makes it worse the optimization can be made without checking if the datasets are shared.\\n\\nWhich optimizer?\\n~~~~~~~~~~~~~~~~\\nSo for the DATASET() optimization where should it go?\\n\\nsimpleRow := DATASET([0], { unsigned value }); values := NORMALIZE(simpleRow, <count>, transform(COUNTER));\\n------\\nvalues := DATASET(<count>, transform(COUNTER));\\n\\nThis optimization merges two nodes, so if there is also code that uses simpleRow, you will potentially duplicate its evaluation - once in simpleRow, and once as part of the new dataset operation. That would argue for the code going inside hqlopt rather than hqlfold. However, the cost of evaluating that datarow is unlikely to be very high, and often the transform for the normalize will not make any reference to LEFT - so there will be no actual duplication of simpleRow.\\n\\nStrictly speaking the optimization should go in hqlfold for cases where no values from the input dataset are used in the transform, or only constant values from the row. The general case could be implemented in hqlopt. In practice duplicating the row is highly unlikely to create extra work, so in this case good enough (and simplest) to always implement in hqlfold. This has the advantage that multiple NORMALIZES of the same input row will be converted (hqlopt would prevent this because the row would be shared).\\n\\n(Note expression commoning up means it is realtively likely the input row is commoned up across independent attributes.)\\n\\nThe optimization\\n~~~~~~~~~~~~~~~~\\n\\nThe code should probably be place inside IHqlExpression * CExprFolderTransformer::doFoldTransformed(IHqlExpression * unfolded, IHqlExpression * original)\\n\\nAdding a case statement for the no_normalize. There are several different forms of row that we should match:\\n\\na) DATASET(ROW(transform));\\nb) DATASET([transform()]);\\nc) DATASET([value],{ myfield });\\n\\nThese have the following graph representations:\\n \\na) no_datasetfromrow(no_createrow(no_transform(...)))\\n\\nb) no_inlinetable(no_transformlist(no_transform(...)))\\n\\nc) Already converted into form (b) when the expression tree is normalized.\\n\\nThe optimization need to check the child graph is of one of those forms, and extract the transform. It then needs to substitute the values from that transform for any references to LEFT.\\nThat is achieved by creating a new graph node\\n no_newrow(no_createrow(transform))\\nand then calling\\n replaceSelector(expression, left-expression, new-row-expression) on the NORMALIZE's transform. \\n(no_newrow indicates that you are replacing a selector (i.e. a cursor onto an active dataset) with an explicit row). \\n\\nreplaceSelector() returns a new transform which can be used as the parameter to the new DATASET(count, transform) operation. The code also needs to inherit the attribute that uniquely identifies the counter from the NORMALIZE.\\n\\nThe other situation that can be optimized (d) is where the input dataset has a single row, and the transform doesn't use any values from LEFT. (E.g., DATASET(GLOBAL(row))).\\nUsing hasSingleRow() and exprReferencesDataset(transform, left) you can determine if that condition is met. \\n\\nYou need to add regression tests for cases a,b,c,d ensuring you have cases which do and don't get transformed.\", \"post_time\": \"2012-04-25 16:24:38\" },\n\t{ \"post_id\": 2304, \"topic_id\": 368, \"forum_id\": 29, \"post_subject\": \"Re: "Sentilyze", Twitter Sentiment Classification - UPDATE\", \"username\": \"chargil\", \"post_text\": \"I've just uploaded a new version of Sentilyze to contributions. For those of you who are tracking Sentilyze on github, the updates should show up shortly. \\n\\n\\n
\", \"post_time\": \"2012-09-06 18:23:42\" },\n\t{ \"post_id\": 1842, \"topic_id\": 368, \"forum_id\": 29, \"post_subject\": \"Re: Sentiment Classification,"Sentilyze" now on Github\", \"username\": \"chargil\", \"post_text\": \"I've added a Naive Bayes classifier to both github and the "Contributions" section on hpccsystems.com.\", \"post_time\": \"2012-06-26 21:25:50\" },\n\t{ \"post_id\": 1802, \"topic_id\": 368, \"forum_id\": 29, \"post_subject\": \"Re: Sentiment Classification, From "SeeSaveSkip" to "Sentily\", \"username\": \"chargil\", \"post_text\": \"You can now find Sentilyze on github in the 'Examples' folder.\", \"post_time\": \"2012-06-19 17:04:35\" },\n\t{ \"post_id\": 1768, \"topic_id\": 368, \"forum_id\": 29, \"post_subject\": \"Re: Sentiment Classification, From "SeeSaveSkip" to "Sentily\", \"username\": \"Durai\", \"post_text\": \"Thanks for the reply. I will look forward to get it from github. \", \"post_time\": \"2012-06-12 17:35:05\" },\n\t{ \"post_id\": 1767, \"topic_id\": 368, \"forum_id\": 29, \"post_subject\": \"Re: Sentiment Classification, From "SeeSaveSkip" to "Sentily\", \"username\": \"chargil\", \"post_text\": \"I will get it into the hpccsystems/ecl-ml repository as soon as possible.\", \"post_time\": \"2012-06-12 17:33:36\" },\n\t{ \"post_id\": 1763, \"topic_id\": 368, \"forum_id\": 29, \"post_subject\": \"Re: Sentiment Classification, From "SeeSaveSkip" to "Sentily\", \"username\": \"Durai\", \"post_text\": \"Can you please post this code into GitHub? So whomever wants to build the further enhancements can officially pull and extend it.\", \"post_time\": \"2012-06-11 19:41:59\" },\n\t{ \"post_id\": 1652, \"topic_id\": 368, \"forum_id\": 29, \"post_subject\": \""Sentilyze", Twitter Sentiment Classification - UPDATE\", \"username\": \"chargil\", \"post_text\": \""Sentilyze" Twitter Sentiment Classfication\\n\\nSentilyze on Github\\n\\nChanges:\\n
\\n
\\n\\nWhat's included in Sentilyze?\\n
\\n\\nHope more people find this useful with the new changes.\\n\\nAlso I am working on an addition Sentiment Classifer using Naive Bayes. So if you're interested in that, be sure to watch this post for more updates.\", \"post_time\": \"2012-05-24 19:27:52\" },\n\t{ \"post_id\": 3142, \"topic_id\": 687, \"forum_id\": 29, \"post_subject\": \"Re: Can you add some syntactic sugar?\", \"username\": \"rtaylor\", \"post_text\": \"Nothing special.
If it were common practice, then I would probably have the same complaint, but I don't, so...Quite often you just need an array of strings or integers and you always have to wrap them in DATASETS
I have not found this to be the case. When I need an array (a set, in ECL terms) I simply define a set of strings or integers or whatever.I would have liked to have DATASET(STRING) or DATASET(INTEGER)
I have no idea what it is you're trying to accomplish, which is precisely why I asked for a code example showing exactly what you are doing. \\n\\nIOW, right now I have no idea what syntax requirements you are complaining about. If you will please expand on your issue and show me what you are referring to, then communication will have been achieved. \\n\\nPlease show me the kind of code you are writing, then perhaps I can show you an alternative.\\n\\nRichard\", \"post_time\": \"2013-01-21 23:22:11\" },\n\t{ \"post_id\": 3141, \"topic_id\": 687, \"forum_id\": 29, \"post_subject\": \"Re: Can you add some syntactic sugar?\", \"username\": \"nvasil\", \"post_text\": \"Nothing special.Quite often you just need an array of strings or integers and you always have to wrap them in DATASETS\\nI would have liked to have DATASET(STRING) or DATASET(INTEGER)\", \"post_time\": \"2013-01-21 21:04:51\" },\n\t{ \"post_id\": 3139, \"topic_id\": 687, \"forum_id\": 29, \"post_subject\": \"Re: Can you add some syntactic sugar?\", \"username\": \"rtaylor\", \"post_text\": \"
One of the things that I find annoying at ECL is when I need to use a DATASET of primitives, such as
Can you post a larger piece of example code so we can see exactly what you're doing within its context?\\n\\nThanks,\\n\\nRichard\", \"post_time\": \"2013-01-21 20:58:43\" },\n\t{ \"post_id\": 3128, \"topic_id\": 687, \"forum_id\": 29, \"post_subject\": \"Can you add some syntactic sugar?\", \"username\": \"nvasil\", \"post_text\": \"One of the things that I find annoying at ECL is when I need to use a DATASET of primitives, such as \\n\\nDATASET(STRING)\\n\\nIt seems to me that it should be easy to make the compiler accept that and easily generate the right code underneath:\\n\\nlike \\nSTRINC_REC = {\\n STRING _s;\\n};\\n\\nand automatically convert DATASET(STRING) to DATASET(STRING_REC)\", \"post_time\": \"2013-01-20 15:00:28\" },\n\t{ \"post_id\": 4976, \"topic_id\": 864, \"forum_id\": 29, \"post_subject\": \"Re: Expansion of the Git Hub Step-by-Step Instructions\", \"username\": \"tlhumphrey2\", \"post_text\": \"I've been having problems with using git especially when I'm doing something none standard. I felt I needed to know more about how it works. I found the following web page that does a very good job of explaining how it works: http://www.sbf5.com/~cduan/technical/git/\\n\\nThe explanation is fairly short and very understandable.\", \"post_time\": \"2013-11-27 14:14:51\" },\n\t{ \"post_id\": 3850, \"topic_id\": 864, \"forum_id\": 29, \"post_subject\": \"Expansion of the Git Hub Step-by-Step Instructions\", \"username\": \"HPCC Staff\", \"post_text\": \"Step 0 – Setup and Configuration\\nGit Hub has setup instructions on the https://help.github.com/articles/set-up-git page and instructions on creating your fork of a project repository, and your local clone of that fork on the https://help.github.com/articles/fork-a-repo page. We adopt the Git Hub recommendation and name the project repository upstream and the page supra explains how to configure the remote repository aliases.\\n\\nIt is very important to not introduce spurious differences, and so we have adopted some rules on white space, particularly the line endings, use of tab characters, and trailing white space. We provide a pre-commit script, and the developer should enable this script.\\n\\nStep 1- Resynchronization of your repository\\nIt is a good idea to bring your fork and local clone up to date with the main repository. The upstream alias points to remote main repository. You should be able to accomplish the re-synchronization with fast forward merges (simple file replacements). If you had previously diverged in a significant way, then you will need to perform more elaborate file merges. We also recommend that you push your local clone up to the Git Hub clone of your repository. We follow the Git Hub recommendation that the main development line has master as the branch name. We also create branches for release candidates, releases, and maintain stable as a branch that lags master but is considered to be stable code. \\n\\nStep 2 – Branching\\nA branch should be a logical packaging or work that has a high degree of cohesion. We prefer contributors to use separate branches for each bug or feature.\\n\\nStep 3 – Staging your changes\\nYou will need to tell Git about your changes by using the rm and add sub-commands. \\n\\nStep 4 – Committing your changes\\nWe provide guidelines for commit messages on the https://github.com/hpcc-systems/HPCC-Pl ... guidelines page. \\n\\nStep 5 – Rebase your repository\\nIf enough time has passed between step 1 and when you are ready to make your changes available, you will need to re-synchronize your changes with the then current code base. \\n\\nStep 6 – Push your changes up to your Git Hub clone.\\nYou should consider pushing your changes up to your Git Hub clone on a regular basis during development just to provide a copy for safe keeping. Whether you have or not, after the rebase you will need to push your changes again.\\n\\nStep 7 – Generate the Pull request\\nOn github.com on your repository page, you will see a pull request button at the top of the page. Use this to generate your pull request. This tells the maintainer that your changes are ready.\\n\\nAfterwards\\nThe https://github.com/hpcc-systems/HPCC-Platform/wiki wiki pages provide information on code reviews and how we integrate with Jira for issues tracking.\", \"post_time\": \"2013-03-29 15:35:00\" },\n\t{ \"post_id\": 4114, \"topic_id\": 911, \"forum_id\": 29, \"post_subject\": \"Re: ECL-WLAM\", \"username\": \"flavio\", \"post_text\": \"[quote="Sudha":3frui5mk]I have tried the tutorial WLAM log file parsing(https://github.com/hpcc-systems/ECL-WLAM) and used Apache log file in the below format.\\n[Sun Mar 7 17:27:37 2004] [info] [client 64.242.88.10] (104)Connection reset by peer: client stopped connection before send body completed.\\n\\nBut not getting any output.\\n\\nCould you provide Sample Apache log file for this tutorial?\\n\\nSudha,\\n\\nI just committed the Apache logs that I used to test the WLAM code. The file containing about 5MB of logs is called "Weblog example.zip" and is in the top directory of ECL-WLAM.\\n\\nPlease give it a try and let me know how it goes.\\n\\nThanks,\\n\\nFlavio\", \"post_time\": \"2013-05-17 19:40:43\" },\n\t{ \"post_id\": 4079, \"topic_id\": 911, \"forum_id\": 29, \"post_subject\": \"ECL-WLAM\", \"username\": \"Sudha\", \"post_text\": \"I have tried the tutorial WLAM log file parsing(https://github.com/hpcc-systems/ECL-WLAM) and used Apache log file in the below format.\\n[Sun Mar 7 17:27:37 2004] [info] [client 64.242.88.10] (104)Connection reset by peer: client stopped connection before send body completed.\\n\\nBut not getting any output.\\n\\nCould you provide Sample Apache log file for this tutorial?\", \"post_time\": \"2013-05-13 17:38:52\" },\n\t{ \"post_id\": 4429, \"topic_id\": 986, \"forum_id\": 29, \"post_subject\": \"Re: WS documentation\", \"username\": \"soumyadip\", \"post_text\": \"Yes, I guess I should make Fiddler my friend. Thanks a ton Gordon for reminding me about the Eclipse plugin source code.\", \"post_time\": \"2013-08-05 14:30:40\" },\n\t{ \"post_id\": 4428, \"topic_id\": 986, \"forum_id\": 29, \"post_subject\": \"Re: WS documentation\", \"username\": \"gsmith\", \"post_text\": \"FYI - If you open ECL Watch in a web browser open the "Debugging/Network" tab, you can see what params it passes to the various ESP services to do "things".\\n\\nFurther - You could similarly sniff the network calls that the IDE makes.\\n\\nFurther Still: If you look at the Eclipse Plugins sources (on gut hub), you will find a WSDL generated SOAP client layer which is making the calls...\\n\\nGordon.\", \"post_time\": \"2013-08-04 15:34:12\" },\n\t{ \"post_id\": 4402, \"topic_id\": 986, \"forum_id\": 29, \"post_subject\": \"Re: WS documentation\", \"username\": \"soumyadip\", \"post_text\": \"Thanks a lot Bob, I'll look forward to the documentation. Meanwhile, I'll root around in the code to figure out as much as I can.\", \"post_time\": \"2013-07-31 15:45:29\" },\n\t{ \"post_id\": 4400, \"topic_id\": 986, \"forum_id\": 29, \"post_subject\": \"Re: WS documentation\", \"username\": \"bforeman\", \"post_text\": \"Thanks for the inquiry, this topic is under consideration by the HPCC team, and we will keep you posted.\\n\\nBob\", \"post_time\": \"2013-07-31 15:38:09\" },\n\t{ \"post_id\": 4396, \"topic_id\": 986, \"forum_id\": 29, \"post_subject\": \"WS documentation\", \"username\": \"soumyadip\", \"post_text\": \"I was looking at the Web Services (WsDfu, WsWorkunits, FileSpray, etc.). While a couple of web services have a form to enter sample parameters to figure out how the service works (FileSpray and ws_account), WsDfu, WsWorkunits and WsTopology do not. Is there any documentation on the web services (other than going through the code)?\", \"post_time\": \"2013-07-31 14:41:23\" },\n\t{ \"post_id\": 8722, \"topic_id\": 1134, \"forum_id\": 29, \"post_subject\": \"Re: Couchbase Plugin\", \"username\": \"househippo\", \"post_text\": \"Not to much work has been done on this transport b/c all the current method to get data into Thor or Roxie really can not keep up with a "streaming" data source i.e. millions of mobile devices.\\nBut looks like a binary protocol or plugin is in the works on the HPCC Systems side, plus HPCC Systems 5.4 has release for JSON support. So a plugin is still on the table to be developed.\", \"post_time\": \"2015-12-04 09:39:08\" },\n\t{ \"post_id\": 5896, \"topic_id\": 1134, \"forum_id\": 29, \"post_subject\": \"Re: Couchbase Plugin\", \"username\": \"househippo\", \"post_text\": \"Looks like plugin will be done closer to end of 2014 Q4 or 2015 Q1.\\nCouchbase 3.0 is still not released and native JSON support in HPCC is scheduled for2014 Q4.\\n\", \"post_time\": \"2014-06-14 19:24:14\" },\n\t{ \"post_id\": 5301, \"topic_id\": 1134, \"forum_id\": 29, \"post_subject\": \"Re: Couchbase Plugin\", \"username\": \"househippo\", \"post_text\": \"This plugin looks like will not be created until Couchbase releases version 3.0 in June.\", \"post_time\": \"2014-02-28 11:33:03\" },\n\t{ \"post_id\": 4983, \"topic_id\": 1134, \"forum_id\": 29, \"post_subject\": \"Couchbase Plugin\", \"username\": \"househippo\", \"post_text\": \"I started learning about HPCC a little while ago and very excited about it. I will be working on a plugin soon to use Couchbase XDCR(Cross Data Center Replication) to put data into HPCC. \\n\\nhttps://github.com/househippo/couchbase-hpcc-transport\\n\\nThe Github repo is empty right now. In a few months I'm hoping to have a basic json feed working.\", \"post_time\": \"2013-12-01 09:29:21\" },\n\t{ \"post_id\": 7048, \"topic_id\": 1250, \"forum_id\": 29, \"post_subject\": \"Re: What Programs and Libraries are Required to Build on Win\", \"username\": \"ming\", \"post_text\": \"Current HPCCSystems Platform can only compiled with Visual Studio 9 32bit.\\nWe don't package the HPCCSystems Platform since the management scripts are for Linux only.\\nBut you can build and package HPCCSystems Clienttools on Windows.\\n\\nIf you are still interested in it I can create a dependency packages for Visual Studio 9 32bit and post the CMake options\", \"post_time\": \"2015-03-02 16:36:56\" },\n\t{ \"post_id\": 5423, \"topic_id\": 1250, \"forum_id\": 29, \"post_subject\": \"What Programs and Libraries are Required to Build on Windows\", \"username\": \"BenJones\", \"post_text\": \"I'm trying to build the HPCC-Platform for Windows using Visual Studio. Following the instructions, I get errors because I need to have Bison, Flex, and OpenLDAP installed. I obtained Bison and Flex by virtue of having Cygwin installed on my PC but OpenLDAP is a library and I would need a Windows-style DLL. Do you have a recommended set of utilities and libraries to point me in order to get set up?\", \"post_time\": \"2014-03-25 13:31:43\" },\n\t{ \"post_id\": 5491, \"topic_id\": 1267, \"forum_id\": 29, \"post_subject\": \"Re: How Do I View or Edit Docs for a Branch?\", \"username\": \"g-pan\", \"post_text\": \"BenJones: \\n Yes we have a CMAKE target, cmake_minimum_required(VERSION 2.8) PROJECT(docs)...\\njust check out the CMakeLists.txt files. \\n\\nIn addition There are some custom macros and other dependencies, most of which are all out there on github. There are some external components, such as XSLT Processor, the Apache FOP, which are going to be required, and configured to work with these sources.\\n\\nIt would be much easier, and a more effective use of your time, to just download the PDFs from the portal.\", \"post_time\": \"2014-04-08 18:56:42\" },\n\t{ \"post_id\": 5488, \"topic_id\": 1267, \"forum_id\": 29, \"post_subject\": \"Re: How Do I View or Edit Docs for a Branch?\", \"username\": \"BenJones\", \"post_text\": \"Is there a make target I should use to build the documentation?\", \"post_time\": \"2014-04-08 13:52:05\" },\n\t{ \"post_id\": 5485, \"topic_id\": 1267, \"forum_id\": 29, \"post_subject\": \"Re: How Do I View or Edit Docs for a Branch?\", \"username\": \"JimD\", \"post_text\": \"If you are mostly interested in viewing the documentation, then reading them in your WYSIWYG editor should work nicely but the intended formatting will not be 100% identical. You can also download the PDFs from the portal, too. \\n\\nWe build the outputs (PDF, CHM, HTML, etc) using transformation templates (XSLT) and formatting object processors (FOP) which are also in our open source repository. This produces the look and feel we want. WYSIWYG editors do not use our formatting templates or CSS files, so some of the custom formatting features our templates support won't display in an editor.\\n\\nIf you want to edit, you can contribute in the same manner as any of our source code or you can open Jira issues to request edits. We welcome feedback and contributions. \\n \\nJim\", \"post_time\": \"2014-04-08 13:29:20\" },\n\t{ \"post_id\": 5476, \"topic_id\": 1267, \"forum_id\": 29, \"post_subject\": \"Re: How Do I View or Edit Docs for a Branch?\", \"username\": \"BenJones\", \"post_text\": \"At this point, I'm mostly interested in just viewing the documentation. I discovered that I can open individual XML files with LibreOffice in my Ubuntu system and it displays them in WYSIWYG.\\n\\nWhen I build the HPCC-Platform, the last step did a build of "docs". Does this generate some master file that allows me to browse the documents?\", \"post_time\": \"2014-04-04 20:40:37\" },\n\t{ \"post_id\": 5475, \"topic_id\": 1267, \"forum_id\": 29, \"post_subject\": \"Re: How Do I View or Edit Docs for a Branch?\", \"username\": \"rtaylor\", \"post_text\": \"Ben,\\n\\nThose XML files are in standard Docbook format, so any docbook-compliant xml editor should work. We use XMLmind (http://www.xmlmind.com) for all of our editing.\\n\\nIf you plan on making significant contributions to the docs, then you should coordinate with Jim DeFabia to ensure that you follow all of our standard conventions. The docs are part of our automated build process, so introducing changes that don't conform to the established practice could potentially "break the world" on a nightly basis.
\\n\\nTherefore, if you simply intend to make minor changes (fix typos, add a few comments, etc.) then you should just submit a JIRA ticket against the docs and Jim's team will take care of it.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-04-04 19:33:57\" },\n\t{ \"post_id\": 5474, \"topic_id\": 1267, \"forum_id\": 29, \"post_subject\": \"How Do I View or Edit Docs for a Branch?\", \"username\": \"BenJones\", \"post_text\": \"I recently built the HPCC-Platform on Linux. I noticed that there is a docs subfolder and that it contains a lot of XML files that correspond to the sections of the various manuals. When I open the XML documents with the browser, I see the XML and a note that says no style-sheet found. Is there a program I should use in Linux to view and/or edit the documentation?\", \"post_time\": \"2014-04-04 19:14:47\" },\n\t{ \"post_id\": 7047, \"topic_id\": 1268, \"forum_id\": 29, \"post_subject\": \"Re: What needs to be installed in order to build HPCC-Platfo\", \"username\": \"ming\", \"post_text\": \"The build dependencies : https://github.com/hpcc-systems/HPCC-Pl ... lding-HPCC\", \"post_time\": \"2015-03-02 16:31:09\" },\n\t{ \"post_id\": 5477, \"topic_id\": 1268, \"forum_id\": 29, \"post_subject\": \"What needs to be installed in order to build HPCC-Platform?\", \"username\": \"BenJones\", \"post_text\": \"I recently built the HPCC-Platform on Linux. When I did the CMake, it kept bombing out saying that it needed this or that installed in order to continue. Some of these were very straight forward, such as bison and flex, and they were available right within my Linux distribution (Ubuntu 13). Others were not so easy and I had to consult with someone else who had previously built them. Some of the items to install had rather obscure names such as "lib...-dev". I was eventually able to build and install the whole thing.\\n\\nI think it would be very helpful if the CMake script error message indicated the precise name of the package to install or at the very least that the documentation on the build included a list of every package that needs to be installed on my system in order for the build to work.\", \"post_time\": \"2014-04-04 20:57:54\" },\n\t{ \"post_id\": 6190, \"topic_id\": 1410, \"forum_id\": 29, \"post_subject\": \"Re: ECL-PiPES\", \"username\": \"gsmith\", \"post_text\": \"The docs don't exist yet, but I am in the process of adding some annotation to help in this matter.\\n\\nComments:\\nThe vertex_click is an event that user can hook.\\n\\nTo add/remove items, you need to maintain your own list of "vertices" and "edges", then you simply call .data({vertices: myVertices, edges:myEdges, merge: true}); To merge your changes into the viz.\\n\\nIt would be simple to expose some simple add/remove methods as well.\\n\\n(as you can see this is very early days, but any real world usage like this is excellent).\", \"post_time\": \"2014-08-07 08:35:30\" },\n\t{ \"post_id\": 6189, \"topic_id\": 1410, \"forum_id\": 29, \"post_subject\": \"Re: ECL-PiPES\", \"username\": \"househippo\", \"post_text\": \"Gordan,\\n\\nGot it to work. \\nIs there a documentation for methods that I can use?\\n\\nI see some like\\n.vertex_click() \\n\\nare there others like:\\n.remove_edge()\", \"post_time\": \"2014-08-07 07:01:55\" },\n\t{ \"post_id\": 6188, \"topic_id\": 1410, \"forum_id\": 29, \"post_subject\": \"Re: ECL-PiPES\", \"username\": \"househippo\", \"post_text\": \"Gordan,\\n\\nCan't get the require.js file to work right.\\nCould you create a single example page for the just Composition\\nand have onload="doTest()" execute on a jquery $(document).ready(function(){ doTest(); });\\n\\nThank you\", \"post_time\": \"2014-08-07 03:20:15\" },\n\t{ \"post_id\": 6180, \"topic_id\": 1410, \"forum_id\": 29, \"post_subject\": \"Re: ECL-PiPES\", \"username\": \"gsmith\", \"post_text\": \"Disclaimer: I have a vested interest in getting people to use the new Viz framework!\\n\\nThe layout algorithm is independent to the visual rendering (and is from dagre), it does a really good job for these type of hierarchical flows.\\n\\nFurther - if you _did_ adopt the new Viz Widget Framework for each of your "nodes"/"edges" then they would be trivial to plug into a "Graph" view which can do the layout.\\n\\nFurther Again: The core viz libraries will be included in future versions of the platform.\\n\\nPS - Looking good!!!\", \"post_time\": \"2014-08-06 08:49:01\" },\n\t{ \"post_id\": 6179, \"topic_id\": 1410, \"forum_id\": 29, \"post_subject\": \"Re: ECL-PiPES\", \"username\": \"househippo\", \"post_text\": \"Gordan,\\n\\nThanks for the recommend. I really like how you can zoom in and out the window with ease.\\nWas thinking about using jsPlumb. I'm liking the thick connectors ,but that just CSS.\\n\\nHere is the progress so far, a little more thought out. If anyone has any suggestions or features ideas let me know.\\n
\", \"post_time\": \"2014-08-06 08:38:57\" },\n\t{ \"post_id\": 6170, \"topic_id\": 1410, \"forum_id\": 29, \"post_subject\": \"Re: ECL-PiPES\", \"username\": \"gsmith\", \"post_text\": \"If you are looking for some JavaScript which will take the "flow" diagram and do an automatic layout you will find it here: https://github.com/hpcc-systems/Visualization\\n\\nAnd an (under development) demo page is here: http://gordonsmith.github.io/Visualization/\", \"post_time\": \"2014-08-05 11:31:48\" },\n\t{ \"post_id\": 6166, \"topic_id\": 1410, \"forum_id\": 29, \"post_subject\": \"ECL-PiPES\", \"username\": \"househippo\", \"post_text\": \"[size=200:18oknp37]C++ (Hard) => ECL(Easy) => ECL-PIPES (Easiest)\\n\\n
\\n\\n[size=150:18oknp37]DRAG & DROP to make ECL code\\n\\nHere are some rough GUI layouts:\\nhttps://846a6bcc15e6009871807cef600f8ea ... .09-AM.png\\n\\nhttps://846a6bcc15e6009871807cef600f8ea ... .21-AM.png\\n\\n
\", \"post_time\": \"2014-08-05 08:09:57\" },\n\t{ \"post_id\": 6915, \"topic_id\": 1436, \"forum_id\": 29, \"post_subject\": \"Re: Building HPCC sources in Visual Studio 2012\", \"username\": \"ming\", \"post_text\": \"You can build HPCC Platform with plugins on Windows. Here are the cmake options to generate Visual Studio Solution:\\n-G "Visual Studio 9 2008" -DUSE_NATIVE_LIBRARIES=OFF -DCHECK_GIT_TAG=0 -DDEVEL=ON -DEXTERNALS_DIRECTORY=${EXTERNALS_DIRECTORY} -DUSE_APR=OFF -DUSE_LIBARCHIVE=OFF -DUSE_ZLIB=OFF -DUSE_OPENLDAP=ON -DUSE_XALAN=ON -DUSE_PYTHON=OFF -DUSE_V8=OFF -DUSE_JNI=OFF -DUSE_RINSIDE=OFF -DUSE_MYSQL=OFF -DUSE_SQLITE3=OFF -DUSE_CASSANDRA=OFF -DUSE_MEMCACHED=OFF\\n\\n${EXTERNALS_DIRECTORY} contains required lib and header files. For example:\\n boost/include/boost/\\n windows-i386-vc90/lib\\n\\nYou can build it inside Visual Studio or from command line:\\ncmake --build . --config Release\\n\\nWe don't generate HPCC Platform installable package due to the management scripts are for Unix only. And there is no test of the HPCC Platform on Windows.\", \"post_time\": \"2015-02-06 17:01:55\" },\n\t{ \"post_id\": 6278, \"topic_id\": 1436, \"forum_id\": 29, \"post_subject\": \"Building HPCC sources in Visual Studio 2012\", \"username\": \"tinebp\", \"post_text\": \"Hi I'm having some difficulties building the HPCC Platform source in VS 2012.\\nMy cmake build process is complaining about missing dependencies.\\nIs there any tutorial online for doing that?\\nHas anyone gone through those steps that before?\", \"post_time\": \"2014-09-09 15:51:30\" },\n\t{ \"post_id\": 6850, \"topic_id\": 1579, \"forum_id\": 29, \"post_subject\": \"Re: EclWatch broken after a custom build installation\", \"username\": \"gsmith\", \"post_text\": \"1. There is a new prerequisite for ECL Watch (node.js), instructions are at https://github.com/hpcc-systems/HPCC-Pl ... lding-HPCC (https://github.com/joyent/node/wiki/Ins ... ge-manager) - Note: This is only a build requisite not a runtime requirement.\\n2. Make sure you have done a recent "git submodule update --init --recursive"\", \"post_time\": \"2015-01-25 09:29:27\" },\n\t{ \"post_id\": 6849, \"topic_id\": 1579, \"forum_id\": 29, \"post_subject\": \"EclWatch broken after a custom build installation\", \"username\": \"tinebp\", \"post_text\": \"Hi,\\n\\nI recently created a new build of hpcc systems platform (version 6.0.0).\\nI then installed the generated *.deb package on my single node Ubuntu 14.04.1 system and started all the services. The Eclwatch website would not load on my browser, even using localhost on the same system.\\n\\nAfter digging into this, I noticed that the esp log file under "/var/log/HPCCSystems/myesp/esp.log" had some "file not found" errors saying that the website files (javascript and css files) were missing.\\n\\nI fixed the problem by manually copying the missing files directly from my source enlistment using the following command:\\nsudo cp -u -r <hpcc-source-directory>/esp/src/* /opt/HPCCSystems/componentfiles/files\\n\\nCan someone please log this bug so that it gets resolved? \\nI'm not familiar with the process.\\n\\nThanks,\\n-Blaise\", \"post_time\": \"2015-01-25 00:44:39\" },\n\t{ \"post_id\": 7817, \"topic_id\": 1782, \"forum_id\": 29, \"post_subject\": \"Happy 4th Anniversary HPCC Systems Open Source\", \"username\": \"lchapman\", \"post_text\": \"Come and see why we're celebrating! Read my blog for news about the highlights and a sneak preview of 5.4.0: http://hpccsystems.com/blog/celebrating ... ce-project\", \"post_time\": \"2015-06-24 09:41:42\" },\n\t{ \"post_id\": 7962, \"topic_id\": 1827, \"forum_id\": 29, \"post_subject\": \"Re: shortcomings of HPCC\", \"username\": \"sumaira khalid\", \"post_text\": \"Richard Sir,\\nThank you for your reply it is very helpful for my research work.\", \"post_time\": \"2015-07-30 11:02:36\" },\n\t{ \"post_id\": 7954, \"topic_id\": 1827, \"forum_id\": 29, \"post_subject\": \"Re: shortcomings of HPCC\", \"username\": \"rtaylor\", \"post_text\": \"Sumaira,\\n\\nThis page contains the most negative posting I've ever seen about HPCC (and my response to the "anonymous" person who posted it): http://www.quora.com/What-is-the-technical-advantage-of-LexisNexis-HPCC-over-Hadoop (and be sure to expand and read the comments posted in response to my posting).\\nBTW, that thread was posted over two years ago and I've seen nothing like it since.\\n\\nNo software is perfect, and HPCC is no exception to that, but I do not have a list of shortcomings to share with you because any issues I find that could be described as such are reported and fixed in a timely fashion. \\n\\nTherefore, I would suggest you simply learn about and work with HPCC enough to form your own opinion. \\n\\nYou can start with these articles: http://hpccsystems.com/why-hpcc\\nThen these white papers: http://hpccsystems.com/community/white-papers\\nAnd these case studies: http://hpccsystems.com/why-HPCC/case-studies\\n\\nAnd then you can ask, in these forums, any specific questions you have.\\n\\nWe also offer free online courses to get you started actually working with the platform, here: http://learn.lexisnexis.com/hpcc\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-07-29 14:10:13\" },\n\t{ \"post_id\": 7953, \"topic_id\": 1827, \"forum_id\": 29, \"post_subject\": \"shortcomings of HPCC\", \"username\": \"sumaira khalid\", \"post_text\": \"I am student and want to know about shortcomings of HPCC for my research work kindly help me to know about it.\", \"post_time\": \"2015-07-29 13:20:11\" },\n\t{ \"post_id\": 8392, \"topic_id\": 1964, \"forum_id\": 29, \"post_subject\": \"Want to contribute? Hints on how to get started.\", \"username\": \"lchapman\", \"post_text\": \"Do you want to contribute to the HPCC Systems Open Source Project? We'd love to hear from you and see your ideas and solutions whether you are a new or current user of HPCC Systems. We have some resources available which will help you get started if you are new. If not, and you want to look for ideas to code here are some ways to get started quickly:\\n\\n
\\n\\n
\\n\\n
\\n\\n
\\n\\nWelcome to any students who are interested in working on a project shown on our GSoC/Interns Ideas List. But don't just look at this list, go and look at the rest of the GSoC/Intern wiki too. There is a lot of information in there about the students programs, projects on offer as well as links to other information you might useful: https://wiki.hpccsystems.com/x/MAB. \\n\\nWhile the student application period for GSoC does not open until 14th March 2016 and our internships don't start again until next June, this is a good time to familiarize yourself with the projects on offer in 2016. Check out the details provided including the deliverables. Where relevant, links to other resources you might find useful are also provided. Each project has already been assigned a mentor who is an expert in that area of our system. Email details are provided so you can contact them for more information. Keep checking back, there are more ideas to come.\\n\\nYou may have your own project idea. If so, we'd love to hear about it. Email Lorraine.Chapman@lexisnexis.com with details.\\n\\nNext steps...\\nGo ahead, download the system and play around with it. There are downloads here: http://hpccsystems.com/download/free-co ... r-platform. But we think you're going want to delve down into the code, in which case you will want to download and build the system yourself using the instructions on our development wiki here: https://github.com/hpcc-systems/HPCC-Pl ... lding-HPCC.\\n\\nOnce you have your system up and running, there are plenty of examples you can use to see what the system can do. Try out the HPCC Data Tutorial, http://hpccsystems.com/download/docs/da ... rial-guide or the Six Degrees of Kevin Bacon example, https://hpccsystems.com/download/docume ... cl-example. You will need to download the ECL IDE to do this which you can get here: https://hpccsystems.com/download/develo ... ls/ecl-ide.\\n\\nThen you may want to learn a bit of ECL. Have a go at the Introduction to ECL online training course. You will need to sign up first which you can do here: https://hpccsystems.com/getting-started ... ng-classes. If you decide you want to graduate onto any of the more advanced courses, we will supply you with a promotional code so that you can take it for free. To get a promotional code, email trish.mccall@lexisnexis.com. Be prepared to provide proof of your student status.\\n\\nFeel free to post comments or questions relating to GSoC here in the specific GSoC forum. You can also use the developer forums to post comments and questions about project ideas.\\n\\nHere are the contact details for the GSoC/Internship administrative organisers for HPCC Systems in case you have any general questions about the programs. \\n\\nBe inspired by the work completed by our students of 2015: https://wiki.hpccsystems.com/x/g4BR.\\n\\nTrish McCall - trish.mccall@lexisnexis.com\\nLorraine Chapman - Lorraine.Chapman@lexisnexis.com\", \"post_time\": \"2015-10-29 11:21:55\" },\n\t{ \"post_id\": 8654, \"topic_id\": 2020, \"forum_id\": 29, \"post_subject\": \"Re: This is a test to see if posts from Contributors ...\", \"username\": \"rtaylor\", \"post_text\": \"and are they? \", \"post_time\": \"2015-11-19 19:12:19\" },\n\t{ \"post_id\": 8652, \"topic_id\": 2020, \"forum_id\": 29, \"post_subject\": \"This is a test to see if posts from Contributors ...\", \"username\": \"tlhumphrey2\", \"post_text\": \"This is a test to see if posts from Contributors are sent to my email.\", \"post_time\": \"2015-11-19 18:49:12\" },\n\t{ \"post_id\": 8726, \"topic_id\": 2038, \"forum_id\": 29, \"post_subject\": \"Re: EZ-ECL-Schema Maker\", \"username\": \"JimD\", \"post_text\": \"It is a nice tool! \\n\\nI have used it several times to quickly write my record layout for me. It is especially nice when there are a lot of columns. \\n\\nI eagerly await XML and JSON support. \\n\\nThanks,\\n\\nJim\", \"post_time\": \"2015-12-04 20:00:18\" },\n\t{ \"post_id\": 8720, \"topic_id\": 2038, \"forum_id\": 29, \"post_subject\": \"EZ-ECL-Schema Maker\", \"username\": \"househippo\", \"post_text\": \"With the new website the demo section was removed. There was a great tool to make ECL Schemas. I made a Javascript version of schema maker here: https://github.com/househippo/ez-ecl-schema\\n\\nJSON and XML schema making will be coming later.\\n\\nDownload it and try it out. Let me know if it has any bugs. \\n\\n
\", \"post_time\": \"2015-12-04 09:30:58\" },\n\t{ \"post_id\": 9144, \"topic_id\": 2136, \"forum_id\": 29, \"post_subject\": \"HPCC Systems 6.0.0 Beta 2 is on its way!\", \"username\": \"lchapman\", \"post_text\": \"You may have taken a look at the new features previewed in the HPCC Systems Beta version released in September 2015. The second beta, containing more new features and enhancements, will be available from the downloads page in March.\\n\\nWant to know more about what's coming? Read the blogs:\\n\\nHPCC Systems 6.0.0 Beta 1 feature details\\n\\nLook what's coming in HPCC Systems 6.0.0 Beta 2\", \"post_time\": \"2016-01-28 12:03:19\" },\n\t{ \"post_id\": 17383, \"topic_id\": 4323, \"forum_id\": 29, \"post_subject\": \"Re: Benchmarking against Hadoop\", \"username\": \"bforeman\", \"post_text\": \"Hi Vince,\\n\\nI know we have more recent benchmarks, and they are stellar. I will contact the HPCC team and ask a member to contact you.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2017-06-14 15:09:35\" },\n\t{ \"post_id\": 17353, \"topic_id\": 4323, \"forum_id\": 29, \"post_subject\": \"Benchmarking against Hadoop\", \"username\": \"vin\", \"post_text\": \"In all discussions I have about HPCC, I am always asked to compare to Hadoop. I can enumerate reasons but I would like to give an empirical answer in addition. I found the terasort comparison, which is great but dated. Has anyone done a comparison of HPCC performance vs. Hadoop performance?\\n\\nThanks,\\n+vince\\n----\\nVincent W. Freeh\\nAssociate Professor, Computer Science, NCSU\", \"post_time\": \"2017-06-09 16:15:31\" },\n\t{ \"post_id\": 32843, \"topic_id\": 8543, \"forum_id\": 29, \"post_subject\": \"Re: HPCC VM INSTALLATION ERROR\", \"username\": \"hwatanuki\", \"post_text\": \"Hello Manikandan, \\n\\nI am glad that your platform setup is working fine now. \\n\\nAs for the query on the INDEPENDENT workflow service, the simplest and best example to understand its functioning, in my opinion, is the one provided in the ECL Language reference and that compares the GLOBAL and INDEPEDENT workflow services. See the self-explanatory code example below (you can copy and paste it directly on your ECL IDE to play with it):\\n\\n
I := RANDOM() : INDEPENDENT; //calculated once, period\\nG := RANDOM() : GLOBAL; //calculated once in each graph\\n\\nds := \\n DATASET([{1,0,0,0},{2,0,0,0}],{UNSIGNED1 rec,UNSIGNED Ival, UNSIGNED Gval , UNSIGNED Aval });\\n\\nRECORDOF(ds) XF(ds L) := TRANSFORM\\n SELF.Ival := I;\\n SELF.Gval := G;\\n SELF.Aval := RANDOM(); //calculated each time used\\n SELF := L;\\nEND;\\n\\nP1 := PROJECT(ds,XF(left)) : PERSIST('~RTTEST::PERSIST::IndependentVsGlobal1');\\nP2 := PROJECT(ds,XF(left)) : PERSIST('~RTTEST::PERSIST::IndependentVsGlobal2');\\n\\nOUTPUT(P1); \\nOUTPUT(P2); //this gets the same Ival values as P1, but the Gval value is different than P1
\\n\\nI also find this forum question very useful as well: \\n\\nHTH,\\nHugoW\", \"post_time\": \"2021-01-11 15:02:16\" },\n\t{ \"post_id\": 32833, \"topic_id\": 8543, \"forum_id\": 29, \"post_subject\": \"Re: HPCC VM INSTALLATION ERROR\", \"username\": \"Daniel_mani\", \"post_text\": \"Hi HugoW,\\n\\nCan i know about the Independent work flow service with a simple example please.\\nSo i can understand easily.If you can please attach the graph details before and after the independent service used.\\n\\nThanks,\\nManikandan N\", \"post_time\": \"2021-01-10 12:46:52\" },\n\t{ \"post_id\": 32831, \"topic_id\": 8543, \"forum_id\": 29, \"post_subject\": \"Re: HPCC VM INSTALLATION ERROR\", \"username\": \"Daniel_mani\", \"post_text\": \"Hi HugoW,\\nThanks a lot for the timely help in all the time.\\n\\nAs you guided i have updated the client tool with the servion version.So it is working fine now.\\n\\nOnce again Thanks for your response and a digital Hug
.\\n\\nRegards,\\nManikandan N\", \"post_time\": \"2021-01-10 06:38:32\" },\n\t{ \"post_id\": 32823, \"topic_id\": 8543, \"forum_id\": 29, \"post_subject\": \"Re: HPCC VM INSTALLATION ERROR\", \"username\": \"hwatanuki\", \"post_text\": \"Hello Manikandan,\\n\\nThank you for the additional details. It looks like the issue you are getting is being caused by a major version mismatch between your client tools version (v7.0.0) and your server side version of the platform (v6.2.10). \\n\\nFor instance: in v7.0.0, the ECL function RenameLogicalFile() under File.ecl expects two input parameters, whereas in v6.2.10, the same fuction receives a third default input parameter, hence the first error message in your ECL IDE Syntax Errors panel.\\n\\nTo avoid such issues, ideally, you should use a client tools version matching (or at least matching the major release) of the server side version. In your case, you can either install a v6.2.10 client tools on your PC https://d2wulyp08c6njk.cloudfront.net/releases/CE-Candidate-6.2.10/bin/clienttools/hpccsystems-clienttools-community_6.2.10-1Windows-i386.exe or upgrade the server side version to v7.0.0 https://hpccsystems.com/download/archive/version?v=7.0.0-1- the later being the preferred option.\\n\\nHTH,\\nHugoW\", \"post_time\": \"2021-01-08 15:26:12\" },\n\t{ \"post_id\": 32813, \"topic_id\": 8543, \"forum_id\": 29, \"post_subject\": \"Re: HPCC VM INSTALLATION ERROR\", \"username\": \"Daniel_mani\", \"post_text\": \"Hi HugoW,\\n\\nI am getting this error while i am using any kind of STD.File functions.\\n\\nI have attached the error screen shot when i am trying to create super file.\\n\\nWhile i am tring to use spray function also same error coming.\\n\\nPlease help me to eradicate this.\\n\\nMany Thanks,\\nManikandan N\", \"post_time\": \"2021-01-08 12:53:34\" },\n\t{ \"post_id\": 32803, \"topic_id\": 8543, \"forum_id\": 29, \"post_subject\": \"Re: HPCC VM INSTALLATION ERROR\", \"username\": \"hwatanuki\", \"post_text\": \"Hello Manikandan,\\n\\nPer the error message you display, it seems like you were trying to use the STD.File.RenameLogicalFile() function by passing more than two input parameters. Is this the same error message you are getting accross different STD.File functions? If possible, could you please provide a code example for further troubleshooting?\\n\\nThanks,\\nHugoW\", \"post_time\": \"2020-12-31 03:19:08\" },\n\t{ \"post_id\": 32793, \"topic_id\": 8543, \"forum_id\": 29, \"post_subject\": \"Error while using STD functions\", \"username\": \"Daniel_mani\", \"post_text\": \"Hi Team,\\n\\nWhen i am using any of the STD file functions i am getting an error.\\nPlease help me to rectify this error.\\n\\nError message:\\nError: Too many parameters passed to function RenameLogicalFile (expected 2) (141, 71), 2061, C:\\\\Program Files (x86)\\\\HPCCSystems\\\\7.0.0\\\\clienttools\\\\share\\\\ecllibrary\\\\std\\\\File.ecl\\n\\nRegards,\\nManikandan N\", \"post_time\": \"2020-12-30 05:02:54\" },\n\t{ \"post_id\": 32763, \"topic_id\": 8543, \"forum_id\": 29, \"post_subject\": \"Re: HPCC VM INSTALLATION ERROR\", \"username\": \"Daniel_mani\", \"post_text\": \"Hi Hugo,\\nThanks a lot for the timely guidance.\\nAs you mentioned, I have checked my Windows system Options.Under the BIOS settings there is no options available in my system as you listed and options in attached sheet.\\nI am using Windows 7 professional 64bit OS,2 GB RAM,Processor- Inter(R) CPU A1018 @2.10GHz.\\nBut really this was helpful to know multiple new things that i don't know before.\\n\\nThanks,\\nManikandan N\", \"post_time\": \"2020-12-19 13:04:14\" },\n\t{ \"post_id\": 32683, \"topic_id\": 8543, \"forum_id\": 29, \"post_subject\": \"Re: HPCC VM INSTALLATION ERROR\", \"username\": \"hwatanuki\", \"post_text\": \"Hello Manikandan,\\n\\nCould you please confirm if hardware virtualization is enabled on your host machine? If you are using Windows on the host machine, you can quickly check that under the "Performance" tab in Task Manager (See screenshot attached).\\n\\nTo run the HPCC Systems platform in a virtual machine, hardware virtualization must be enabled in the BIOS of your host machine. This setting may be called VT-x or AMD-V, depending on the processor of the host machine you are using. In case needed, some general steps to manage the hardware virtualization settings in your host machine are provided here: https://bce.berkeley.edu/enabling-virtualization-in-your-pc-bios.html.\\n\\nHTH,\\nHugo\", \"post_time\": \"2020-12-15 13:03:26\" },\n\t{ \"post_id\": 32673, \"topic_id\": 8543, \"forum_id\": 29, \"post_subject\": \"HPCC VM INSTALLATION ERROR\", \"username\": \"Daniel_mani\", \"post_text\": \"Hi Team,\\n\\nI am trying to install hpcc VM server in my system.My system spec is 2GB RAM,64bit\\nI am almost reached the end of installation.Finally it showing an error.But i am unable to eradicate that.It is asking me to disable Hardware virtulisation in the system page\\nBut i am keeping disable option only and it is throwing the error still.I am attaching the that screen shot for your reference.\\n\\nPlease help me on this.\\nIn system page -- acceleration - Hardware virtualisation\\n\\nThanks in advance,\\nManikandan N\", \"post_time\": \"2020-12-15 05:24:15\" },\n\t{ \"post_id\": 32783, \"topic_id\": 8563, \"forum_id\": 29, \"post_subject\": \"Re: Difference between unicode and varunicode\", \"username\": \"hwatanuki\", \"post_text\": \"Hello Manikandan, \\n\\nFrom a string length perspective, the main difference between UNICODE and VARUNICODE value types in ECL lies in the fact that the latter utilizes a null character to indicate its termination (i.e., it is null-terminated or C String), whereas the first does not. \\n\\nConsidering that the UNICODE value type in ECL refers to a UTF-16 encoded unicode character string, in practical terms, the VARUNICODE value type will require two extra bytes (due to the null character at the end of the string) for storing the same UNICODE string. See the example below:\\n\\n
MyRec := RECORD\\nUNICODE1 F1;\\nVARUNICODE1 F2;\\nEND;\\n\\nMyData := DATASET([{'A','A'}],MyRec);\\n\\nSIZEOF(MyData.F1); //result is 2 (16-bit code unit per char)\\nSIZEOF(MyData.F2); //result is 4 (16-bit code unit per char plus a 16-bit code unit for the null terminator)
\\n\\nAs for the comparison between STRING and STRING[n] value types in ECL, again from a string length perspective; in the latter you are specifying the maximum length of the string in bytes (i.e., the 'n' value), whereas in the first you are leaving the maximum length information variable to the size needed to contain the passed value (this information is then stored as a leading 4-byte integer indicating the actual number of characters for the string, similar to a Pascal string). \\n\\nIn practical terms, and depending on the characteristics of your data, the right choice between these two value types can have an impact in both storage and memory usage of your data manipulations. See a more detailed discussion focused on this type of decision here: https://hpccsystems.com/bb/viewtopic.php?f=10&t=3493&p=13953&hilit=String+Field+Definitions#p13953\\n\\nHTH,\\nHugoW\", \"post_time\": \"2020-12-21 16:06:21\" },\n\t{ \"post_id\": 32773, \"topic_id\": 8563, \"forum_id\": 29, \"post_subject\": \"Difference between unicode and varunicode\", \"username\": \"Daniel_mani\", \"post_text\": \"Hi Team,\\n\\nCan u share the details of difference between unicode and varunicode,\\nString and string[n] with an example\\n\\nThanks in advance,\\nManikandan N.\", \"post_time\": \"2020-12-20 18:09:34\" },\n\t{ \"post_id\": 1630, \"topic_id\": 352, \"forum_id\": 32, \"post_subject\": \"Re: About the Challenge / Instructions\", \"username\": \"csekellick\", \"post_text\": \"How long does it take to get access? The challenge was announced on May 7; I sent the Supercomputer ID Request to my manager on May 8; a support ticket was opened on May 10; and nothing has happened since. Meanwhile, the first deadline has come and gone. \\n\\nAm I just unlucky, or are there other people who were intrigued by the challenge but stymied by the need for an ID?\", \"post_time\": \"2012-05-22 17:39:50\" },\n\t{ \"post_id\": 1584, \"topic_id\": 352, \"forum_id\": 32, \"post_subject\": \"About the Challenge / Instructions\", \"username\": \"HPCC Staff\", \"post_text\": \"[color=#0000FF:18w6btx6]Overview\\nECL is a terrific programming language for processing BIG DATA; however, it also helps to shine a light on those little everyday problems that occur when doing real work inside a BIG DATA company. One of these interesting problems is understanding what an organization name means. Of course, anyone can treat a name as a string, but we want to understand the meaning behind it. \\n\\nUnderstanding everything anyone can put into a name is clearly a broad and detailed problem. . . and, may not even be attainable. Fortunately, we don’t need to understand everything to be the best – we just need to understand more than everyone else and have a pathway to understanding everything.\\n\\nTo this end we are issuing “The ECL Challenge” for understanding a company name. The overall challenge will run for a number of months. It will consist of individual ‘problems’ issued on a weekly basis. These problems will start off fairly simple and will grow as we eat the lower hanging fruit. Each problem will run for two weeks and points will be allocated as follows:\\n\\n•\\t1st place winner – 10 points\\n•\\t2nd place winner – 7 points\\n•\\t3rd place winner – 5 points\\n•\\tEntrant that has at least some of their code (or data) used in the final result – 3 points\\n•\\tEntrant – 1 point\\n
\\n\\nEach first place winner for each problem will receive a $100 prize. At the end of the challenge, the winners and runners up will be ranked upon who has the most points and prizes will be awarded as follows:\\n•\\t1st Place Prize – an iPAD\\n•\\t2nd Place Prize - $250\\n•\\t3rd Place Prize - $100\\n
\\n\\nThis challenge is open for all Reed Elsevier employees and assumes knowledge of the ECL programming language. Use this forum to post questions and provide feedback.\\nNote: Company machinery may be used for all development work; however this is an ‘after hours’ challenge that should be completed primarily upon your own time. \\n\\n[color=#0000FF:18w6btx6]Winning Criteria for Each Problem\\nThe winners will be picked at the absolute discretion of the judging panel (David Bayliss, Charles Morton and John Holt). Essentially, the criteria is: “Do we want to use this code?” \\n\\nThings to consider:\\n1)\\tRecall is important: If you can only process half of the names and everyone else does three quarters of them, this will count against you;\\n2)\\tPrecision is even more important: Whilst the occasional wrong answer is not fatal, crimes of commission tend to be penalized more harshly than crimes of omission;\\n3)\\tMake the code maintainable and focus on the problem. We will be asking ourselves not only how easy it is to extend your code -- add words to word-tables, handle exceptions – but also how easy is it to read;\\n4)\\tWhilst external libraries are not forbidden, and calls to other parts of the repository are acceptable, we are really looking for small, focused point solutions;\\n5)\\tEach problem will be evaluated and rewarded based entirely on its own merits: Just because you won with a technique for one problem does not mean the same trick will work next time;\\n6)\\tRemember: You can get 3 points if any of your solution is used; so, just having good tables of synonyms or exceptions could be a way to get lots of points in the long haul.\\n
\\n\\n[color=#0000FF:18w6btx6]Participating\\nAn entry can be submitted by entering an attribute:\\nProblemY_XXX\\nInto the CompanyNameChallenge module, on boca-dataland, prior to the deadline for the given problem. In this case, ‘Y’ = ‘1’ indicates challenge problem ‘1’, and ‘XXX’ represents the submitter’s initials. \\n\\nA ‘template’ such as:\\nProblem1_XXX\\nWill be provided for each problem , along with a trivial solution.\\n\\nIf a problem needs an extended description, it will be entered into the same module as a text file without the XXX – thus:\\nProblem1\\nWill highlight a particular challenge. \\n\\nDavid Bayliss will post to his LexisNexis/Yammer account when a new problem is posted, as well as into the dedicated forum, so people can follow and be immediately alerted.\\n\\nNote: Access to boca-dataland can be requested via the attached Supercomputer ID Request Form.\", \"post_time\": \"2012-05-03 16:28:58\" },\n\t{ \"post_id\": 1726, \"topic_id\": 353, \"forum_id\": 32, \"post_subject\": \"Winners from Challenge Problem 1\", \"username\": \"HPCC Staff\", \"post_text\": \"Congrats to the winners from Round 1 of the ECL Challenge!\\n\\nJoint 1st and 2nd place winners – 8.5 points each: Laverne Bentley, Tina Gonsiewski\\n3rd place winner – 5 points: Keith Dues \\nHonorable mention – 3 points: Chirag Mandan\\n\\nThank you to all who participated!\", \"post_time\": \"2012-06-07 13:07:52\" },\n\t{ \"post_id\": 1617, \"topic_id\": 353, \"forum_id\": 32, \"post_subject\": \"UPDATE: Challenge Problem 1 - Deadline Extended!\", \"username\": \"HPCC Staff\", \"post_text\": \"The deadline for Challenge Problem 1 has been extended to Noon, GMT Tuesday, May 29, 2012.\", \"post_time\": \"2012-05-17 11:15:18\" },\n\t{ \"post_id\": 1611, \"topic_id\": 353, \"forum_id\": 32, \"post_subject\": \"Reminder: Challenge Problem 1 - Due May 29\", \"username\": \"HPCC Staff\", \"post_text\": \"UPDATE: Just a reminder entries for Challenge Problem 1 is due Noon, GMT Tuesday, May 29, 2012.\", \"post_time\": \"2012-05-14 18:44:15\" },\n\t{ \"post_id\": 1600, \"topic_id\": 353, \"forum_id\": 32, \"post_subject\": \"Re: Challenge Problem 1 - Due May 21\", \"username\": \"dabayliss\", \"post_text\": \"I think that as long as the 'hospital type' can validly fit into the fields given - then yes absolutely convert it if you can.\\n\\nRemember - the purpose is to 'understand' the company name - so if I have\\n\\nBoca Raton Doll Hospital\\nBoca Raton Community Hospital\\n\\nthen I ABSOLUTELY want to be able to know that if I need my Barbie fixed (or maybe just given a good meal) I know where to go ...\\n\\nThink of 'rejected' as: "the hospital processing code has no clue what this is"\\n\\nHTH\\n\\nDavid\", \"post_time\": \"2012-05-11 19:32:10\" },\n\t{ \"post_id\": 1598, \"topic_id\": 353, \"forum_id\": 32, \"post_subject\": \"Re: Challenge Problem 1 - Due May 21\", \"username\": \"dustin.skaggs@lexisnexis.com\", \"post_text\": \"I'm assuming that Animal Hospitals count as a valid hospital because your example has 'PET' as a hospital type. What about "hospitals" for inanimate objects? Does a 'Doll Hospital' count as a hospital? A 'Lawnmower Hospital'? Should these kind of "hospitals" be rejected?\", \"post_time\": \"2012-05-11 16:40:27\" },\n\t{ \"post_id\": 1585, \"topic_id\": 353, \"forum_id\": 32, \"post_subject\": \"Challenge Problem 1 - Due May 29\", \"username\": \"HPCC Staff\", \"post_text\": \"Challenge Problem 1 is all about hospitals. We give you a file of names with the word ‘hospital’ in them. All you have to do is split the names into the interesting parts.\\n\\nAs the template shows the required parts are:\\nR := RECORD\\nD;\\n BOOLEAN Hospital_Rejected; // Set this to true if you do not think this should be treated as a hospital\\n STRING Hospital_Type; // Pet, community, regional, children's etc etc\\n STRING Hospital_Geo; // Macon County, South Florida etc\\n STRING Hospital_Dept; // Oncology, radiology etc etc\\n STRING Hospital_Enumeration; // The '2' of 'Regional District 2' - sometimes a franchise number\\n STRING Hospital_Indicative; // Anything left deemed important\\n END;
\\n\\nNote: it is fine if you feel compelled to add extra field(s) to parse into, but these fields require a definable meaning and must be used consistently.\\n\\nThe template also has a small (hand-generated) example file, showing a way that the submitter may choose to parse out a handful of different hospital names. This template file is available in boca-dataland. \\n\\nThe bottom of the template has three outputs:\\n1.\\tStats: These are the basic ‘how much did you extract’ numbers that need to be copied to the comment at the top of the attribute.\\n2.\\tAllProfiles: This is a SALT Profile report on the parsed out data. It is one good way to get a first approximation to the quality of the extractions.\\n3.\\tResults: Nothing quite like eyeballing the data
\\n\\nNote: the data file being used is small, it is expected that testing will be possible using hthor/ecl-agent.\\n\\nNEW Deadline: Challenge problem 1 is due 12:00PM, GMT Tuesday May 29, 2012. Refer to previous post for entry instructions:\\nviewtopic.php?f=32&t=352\", \"post_time\": \"2012-05-03 16:52:24\" },\n\t{ \"post_id\": 1869, \"topic_id\": 363, \"forum_id\": 32, \"post_subject\": \"Challenge Problem #2 - Winners Announced\", \"username\": \"HPCC Staff\", \"post_text\": \"Congrats to the winners from Round 2 of the ECL Challenge!\\n\\n1st Place – Keith Dues – 10 pts\\n2nd Place – Chirag Mandan – 7 pts\\n3rd Place - Tina Gonsiewski – 5 pts\\n4th Place – Vern Bentley – 3 pts\\n\\nThings brings the overall scores to:\\n\\nKeith - 15\\nTina - 13.5\\nVern - 11.5\\nChirag - 10\\n\\nThank you to all who participated!\", \"post_time\": \"2012-06-29 18:33:56\" },\n\t{ \"post_id\": 1624, \"topic_id\": 363, \"forum_id\": 32, \"post_subject\": \"Challenge Problem #2 - Due Jun 4th\", \"username\": \"dabayliss\", \"post_text\": \"This challenge is all about banks - the full text can be found in the CompanyNameChallege.Problem2_XXX\\n\\n\\n/* Challenge Problem 2 - Bank Names\\n\\t Deadline: Entries must be submitted by 6:00 EST 4th June 2012\\n\\t Name - Jostling Beaver\\n\\t Results:\\n\\t Results:\\n\\t\\t\\t- Number extracted from: 00000\\n\\t\\t\\t- Number with Type : 00000\\n\\t\\t\\t- Number with Fka : 00000\\n\\t\\t\\t- Number with Parent : 00000\\n\\t\\t\\t- Number with Behavior : 00000\\n\\t\\t\\t- Number with Enum : 00000\\n - Number with Branch : 00000\\n - Number with Indicat : 00000\\n - Number with Geo : 00000\\n - Number rejected : 00000\\n*/\\n\\nD := dataset('~thor_data400::base::CNC::Banks', {qstring120 company_name}, thor);\\n\\n// Note - the presumption is that if the record is not rejected then 'everything we need to know'\\n// (regarding the company name) is in the fields OTHER than company name.\\n// Company name must be left AS IS to show the original string\\n\\nR := RECORD\\n D;\\n\\tBOOLEAN Bank_Rejected; // Set this to true if you do not think this should be treated as a bank\\n\\tSTRING Bank_Type; // Savings, Trust, Investment, Blood, Sperm etc etc\\n\\tSTRING Bank_Geo; // FIRST BANK OF FLORIDA\\n\\tSTRING Bank_FKA; // eg: BANK OF AMERICA, N.A. F/K/A SUNWEST BANK OF ALBUQUERQUE, N.A \\n\\tSTRING Bank_Parent; // eg: 1ST BANK A DIVISION OF FIRST NATIONAL BANK \\n\\tSTRING Bank_Behavior; // BANK OF AMERICA AS AGENT FOR CERTAIN LENDERS - Bank could also be the source or target of a C/O \\n\\tSTRING Bank_Enumeration; // 'First' from 'first national bank'\\n\\tSTRING Bank_Branch; // BANK LEUMI LE ISRAEL BM, CHICAGO BRANCH \\n\\tSTRING Bank_Indicative; // Anything left deemed important\\n// If you really need extra fields to capture new meaning - you may add them; but they must be used consistently\\n\\tEND;\", \"post_time\": \"2012-05-21 13:05:43\" },\n\t{ \"post_id\": 1725, \"topic_id\": 381, \"forum_id\": 32, \"post_subject\": \"Challenge Problem #3 - Due July 5th\", \"username\": \"dabayliss\", \"post_text\": \"/* Challenge Problem 3 - Churches Names\\n\\t Deadline: Entries must be submitted by 6:00 EST 5th July 2012\\n\\t Name - Jostling Beaver\\n\\t Results:\\n\\t Results:\\n\\t\\t\\t- Number extracted from: 00000\\n\\t\\t\\t- Number with Type : 00000\\n\\t\\t\\t- Number with Geo : 00000\\n\\t\\t\\t- Number with Nation : 00000\\n\\t\\t\\t- Number with Corp : 00000\\n\\t\\t\\t- Number with Enum : 00000\\n - Number with Indicat : 00000\\n - Number rejected : 00000\\n*/\\n\\nD := dataset('~thor_data400::base::CNC::Churches', {qstring120 company_name}, thor);\\n\\n// Note - the presumption is that if the record is not rejected then 'everything we need to know'\\n// (regarding the company name) is in the fields OTHER than company name.\\n// Company name must be left AS IS to show the original string\\n\\nR := RECORD\\n D;\\n\\tBOOLEAN Church_Rejected; // Set this to true if you do not think this should be treated as a church\\n\\tSTRING Church_Type; // Denomination, Baptist, Evangelical Lutheran etc\\n\\tSTRING Church_Geo; // 'Delray Beach' from FIRST BAPTIST CHURCH OF DELRAY BEACH\\n\\tSTRING Church_Nation; // 'Korean' from FIRST KOREAN BIBLE CHURCH\\n\\tSTRING Church_Corp; // INC / LLC etc\\n\\tSTRING Church_Enumeration; // 'First' from 'first baptist church'\\n\\tSTRING Church_Indicative; // Anything left deemed important\\n// If you really need extra fields to capture new meaning - you may add them; but they must be used consistently\\n\\tEND;\", \"post_time\": \"2012-06-07 12:37:58\" },\n\t{ \"post_id\": 3699, \"topic_id\": 826, \"forum_id\": 32, \"post_subject\": \"Demo: Using the ECL Watch Graph Control\", \"username\": \"HPCC Staff\", \"post_text\": \"Very informative video of a walkthrough demo on using the ECL Watch Graph Control\\n\\nhttp://cdn.hpccsystems.com/video/ecl-wa ... aining.mp4 \\n\\nPresented by Jill Luber, Sr Architect\", \"post_time\": \"2013-03-11 17:42:47\" },\n\t{ \"post_id\": 3891, \"topic_id\": 870, \"forum_id\": 32, \"post_subject\": \"Webex: SALT Profiling\", \"username\": \"HPCC Staff\", \"post_text\": \"This video includes a tutorial on how to do SALT Profiling along with an example use case.\\n\\nhttps://reedelsevier.webex.com/reedelse ... cba8ec846e\\n\\nPresented by Jill Luber, Sr Architect.\", \"post_time\": \"2013-04-05 17:30:44\" },\n\t{ \"post_id\": 4347, \"topic_id\": 975, \"forum_id\": 32, \"post_subject\": \"Online Training\", \"username\": \"HPCC Staff\", \"post_text\": \"Both the Introductory and Advanced ECL & Thor courses are available online in the LMS: http://learn.lexisnexis.com/hpcc\\n\\nThese are self-paced courses to learn ECL, the programming language designed and used for the HPCC Systems platform. The online Introductory to Thor course is a prerequisite to the online Advanced courses. Both Introductory courses are free to the public while the Advanced courses are $495 each, but complimentary to all Reed Elsevier employees.\\n\\nEmployees wishing to register for the online Advanced course will need the promo code from their manager. \\n\\nThe process for registering to the Instructor-led courses remain the same and the schedule is available on the portal:\\nhttp://hpccsystems.com/community/traini ... s/training\", \"post_time\": \"2013-07-19 21:30:33\" },\n\t{ \"post_id\": 4452, \"topic_id\": 996, \"forum_id\": 32, \"post_subject\": \"Automatic ROXIE updates\", \"username\": \"Dinesh_S\", \"post_text\": \"Problem Statement: Need to constantly update ROXIE queries with new data without having to a need for standby ROXIE or without taking down ROXIE.\\n\\nPre Requisites: HPCCSystems Enterprsie 4.0 with separate DALI’s for THOR and ROXIE. The THOR system will be an independent system with its own DALI and the ROXIE System will have its own DALI, ESP etc. \\n\\nSolution: Use Packages to update the ROXIE with reference to the new data. A package tells the ROXIE from where to get the data and which DALI to look up for the information about the files. \\nIf there is a packagemap, then any superfiles found in that packagemap are not retrieved from or locked in DALI-DFS. Roxie still goes to DALI-DFS to get information to locate and open logical files and they will still be locked. This issue is resolved by having separate DALI’s.\\n\\nHigh Level Steps:\\n \\nThese are the steps we perform for each incoming file:\\n1.\\tCreate a raw file after Spray\\n2.\\tCreate a Base File which contains the cleaned/parsed data. \\n3.\\tCreate a index on the sub file\\n4.\\tAdd the index to the SuperKey and Base file to Superfile\\n5.\\tCreate package XML for the queries deployed and publish the new data using packages (I do this using SOAPCALL from ECL. It can be done using ecl command line as well). \\n6.\\tRoxie Query will pick up the data from Superkey (which will be deployed using package)\\n7.\\tAfter specific time interval (1 hour, 6 hours or 1 day) do the following:\\na.\\tAll the sub files in a Superfile will be consolidated into a single sub file\\nb.\\tBuild one single index using the data in the superfile.\\nc.\\tClear up the SuperKey and add the index built in step 7(a) to the Superkey.\\nd.\\tClear the SuperFile.\\n\\nI am attaching the SOAPCALL code sample which I use to deploy packages to ROXIE.\\n\\nGuidelines:\\n\\n1.\\tTo build a package you will need to do the following 2 steps:\\n a.Get the list of all the queries deployed on Roxie\\n b.Get a list of Superfiles and Subfiles associated with each query and build a package\\n\\n2.\\tYou can get a list of all the webservices available using the below URL:\\nhttp:// HOSTNAME: 8010/WsWorkunits/?list_forms
\\n\\n3.\\tGet the list of all the queries deployed on Roxie:\\na.\\tYou can get the list of all the deployed queries in ROXIE using the below URL:\\nhttp://HOSTNAME:8010/WsWorkunits/WUQuerysetDetails?QuerySetName=roxie
\\n\\n4.\\tGet a list of Superfiles and Subfiles associated with each query and build a package. There are currently 2 ways, that behave slightly differently.\\n\\na. Calls out to roxie and gets the information based on what roxie currently has loaded. Requires roxie to be running, and the query not to be suspended.\\n\\nhttp://HOSTNAME:8010/WsWorkunits/WUQueryDetails?ver_=1.42&soap_builder_&.QueryId= QUERYNAME.VERSION&.QuerySet=roxie
\\n\\nForm:\\n http:// HOSTNAME:8010/WsWorkunits/WUQueryDetails?form
\\n\\nb. Resolves query against package to figure out file information. Doesn’t interact with roxie. Will show files in the query that aren’t in the packagemap as well as what subfiles were mapped via the packagemap\\n\\nhttp://HOSTNAME:8010/WsPackageProcess/GetQueryFileMapping?ver_=1&soap_builder_&.QueryName= QUERYNAME.VERSION&.Target=roxie
\\n\\nForm:\\n http:// HOSTNAME:8010/WsPackageProcess/GetQueryFileMapping?form
\\n\\nThe idea here is to build an xml (package) containing list of superfiles and subfiles and then use it to deploy the package using packagemap command. You will need to write a cron job to execute this command after a specific time interval.\\n\\n\\nIMPORT $;\\ncompletePackage := '<RoxiePackages><Package id="service_km_by_speed"><Base id="thor::superkey::by_speed"/></Package>' + \\n\\t\\t\\t\\t\\t\\t\\t\\t\\t '<Package id="service_accdec"> <Base id="thor::superkey::accdec"/> </Package> <Package id="thor::superkey::by_speed">'+\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t '<SuperFile id="~thor::superkey::by_speed"> <SubFile value="~thor::key::china_by_speed"/> </SuperFile> </Package>'+\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t '<Package id="thor::superkey::accdec"> <SuperFile id="~thor::superkey::accdec"> <SubFile value="~thor::key::china_acc_dec"/> '+\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t '</SuperFile></Package></RoxiePackages>';\\n\\nSTRING Roxie_Hostname := '192.168.40.16';\\nSTRING RoxieUrl_WsWorkunits := 'http://' + Roxie_Hostname + ':8010/WsWorkunits?ver_=1.44';\\nSTRING RoxieUrl_WsPackageProcess := 'http://'+ Roxie_Hostname + ':8010/WsPackageProcess';\\nSTRING Roxie_Clustername := 'roxie';\\nSTRING Package_Name := 'demo.pkg';\\nSTRING Dali_IP := '192.168.40.15'; // Change this field if your DALI IP is different\\n\\nRequestLayout := RECORD\\n STRING packageMapData {XPATH('Info')};\\n BOOLEAN overwritePackage {XPATH('OverWrite')};\\n BOOLEAN activatePackage {XPATH('Activate')};\\n STRING targetCluster {XPATH('Target')};\\n STRING packageMapID {XPATH('PackageMap')};\\n\\t\\tSTRING\\t\\t\\tProcess\\t\\t\\t\\t\\t\\t\\t\\t\\t{XPATH('Process')};\\n\\t\\tSTRING\\t\\t\\tDaliIp\\t\\t\\t\\t\\t\\t\\t\\t\\t{XPATH('DaliIp')};\\nEND;\\n\\nrequest := DATASET\\n (\\n [\\n {\\n completePackage,\\n TRUE,\\n TRUE,\\n $.constants.Roxie_Clustername,\\n $.constants.Package_Name,\\n\\t\\t\\t\\t\\t\\t\\t\\t'*',\\n\\t\\t\\t\\t\\t\\t\\t\\t$.constants.Dali_IP\\n }\\n ],\\n RequestLayout\\n );\\n\\nResponseLayout := RECORD\\n STRING code {XPATH('Code')};\\n STRING description {XPATH('Description')};\\nEND;\\n\\nDeployPackage := SOAPCALL\\n (\\n request,\\n $.constants.RoxieUrl_WsPackageProcess,\\n 'AddPackage',\\n RequestLayout,\\n TRANSFORM(LEFT),\\n DATASET(ResponseLayout),\\n XPATH('AddPackageResponse/status')\\n );\\nDeployPackage;\\n\\n
\", \"post_time\": \"2013-08-13 21:42:30\" },\n\t{ \"post_id\": 4687, \"topic_id\": 1013, \"forum_id\": 32, \"post_subject\": \"Re: New SALT tool for quickly finding best threshold.\", \"username\": \"tlhumphrey2\", \"post_text\": \"I have created a macro that creates the tool I previously described in this thread, which is now called "MAC_fmmpOfMatchSample" (fmmp in the name stands for "field match mismatch pattern"). The name of the macro that creates this tool is MAC_CreatorOf_MAC_fmmpOfMatchSample. It is in the module, BridgerIndividualsSaltLinking of the Boca dataland repository.\\n\\nThis macro works much like the SALT macro, MAC_Default_SPC. This is, its output is to the workunit and you need to copy and paste it into a new attribute you create in your module. The name of the new attribute MUST BE, MAC_fmmpToMatchSample.\\n\\nThe input to MAC_CreatorOf_MAC_fmmpOfMatchSample is the name of a MatchSample dataset.\", \"post_time\": \"2013-09-30 19:00:15\" },\n\t{ \"post_id\": 4516, \"topic_id\": 1013, \"forum_id\": 32, \"post_subject\": \"New SALT tool for quickly finding best threshold.\", \"username\": \"tlhumphrey2\", \"post_text\": \"This is for those doing or will be doing internal linking using SALT. I have made a tool that enables one to quickly decide what conf value would make a good threshold for clustering entities.\\n\\nThe tool is able to speedup the process of determining what conf value makes a good threshold because its field match mismatch string patterns (field fmmp in the tools output) basically summaries the field matches and mismatches in a single string pattern. Furthermore, the string of field matches and mismatches are ordered from left to right, where the most important fields (those with the highest specificity) are on the left. \\n\\nI have this tool in the boca dataland repository in\\nBridgerIndividualsSaltLinking.MatchSampleFieldMatchMismatchPatterns.\\nThe workunit, W20130830-093152, shows an example of how to use the tool and what it produces. In addition, there are comments at the top of the attribute that will further help you.\\n\\nAs a final note, I have requested this tool to be an enhancement to SALT and hopefully one day SALT will generate this code for you. But, currently it doesn’t.\", \"post_time\": \"2013-08-30 16:17:36\" },\n\t{ \"post_id\": 4752, \"topic_id\": 1067, \"forum_id\": 32, \"post_subject\": \"Re: SALT POPULATION can be larger than RECORDS\", \"username\": \"tlhumphrey2\", \"post_text\": \"What I'm doing is purely experimental. My "In" file is just those individuals that have been placed on a watchlist, specifically those on World Compliance's combined watchlist.\\n\\nIt is true that the specificities can be much less accurate when the "In" file is not representative of the total population. And, if the "In" file is much smaller than the total population, you would expect the specificities to be incurate. But, we don't know how this will affect the linking process. We don't know where the cutoff is, i.e. how small is too small.\\n\\nWe do know this, you can use just field average specificities in linking, i.e. no field value specificities are used. And, we know that averages taken from a relatively small sample can be fairly close to the population averages. So, it makes since to me that good linking is possible even with a relatively small "In" file.\", \"post_time\": \"2013-10-11 13:34:28\" },\n\t{ \"post_id\": 4750, \"topic_id\": 1067, \"forum_id\": 32, \"post_subject\": \"Re: SALT POPULATION can be larger than RECORDS\", \"username\": \"bforeman\", \"post_text\": \"Tim, Tony just replied to me:\\n\\nWell this would be a highly unusual situation that should only occur if you are working with a sample of data. Possibly Tim has some other circumstance with his entity. Typically in a SALT application your goal is to have significant coverage with your data SALT, the whole idea of a statistical linking method is based on this. If you do not have full coverage there will be issues, in SALT, the specificities will not be as accurate. Working with samples in SALT can be problematic. POPLULATION is used in calculating the default match threshold along with NINES and RECORDS. Even when using a sample you should always set these numbers to the full amount of data available in order to get the threshold set properly. You should also calculate specificities on the full data. The THRESHOLD statement is the recommended method to override the default threshold when needed.\\n\\n\\nHTH,\\n\\nBob\", \"post_time\": \"2013-10-11 12:44:21\" },\n\t{ \"post_id\": 4747, \"topic_id\": 1067, \"forum_id\": 32, \"post_subject\": \"Re: SALT POPULATION can be larger than RECORDS\", \"username\": \"bforeman\", \"post_text\": \"Thanks for the feedback Tim, I have forwarded the info to our documentation man (Tony).\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-10-11 12:13:07\" },\n\t{ \"post_id\": 4744, \"topic_id\": 1067, \"forum_id\": 32, \"post_subject\": \"SALT POPULATION can be larger than RECORDS\", \"username\": \"tlhumphrey2\", \"post_text\": \"I found out the other day, from David Bayliss, that POPULATION (a statement in SALT .spc files) can be larger than RECORDS (another statement in SALT .spc files). I mention this because 1) the SALT User Guide does not mention it; 2) for a SALT linking project I'm currently working on, the recommended match threshold was way low; and 3) the POPULATION statement of my .spc file was way low.\\n\\nIf most everyone is covered by the records of your "In" file then POPULATION will be smaller than RECORDS. But, if the number of records in the "In" file is small and does not represent the total population you want linking to work, then the POPULATION will be larger than RECORDS.\", \"post_time\": \"2013-10-10 13:49:57\" },\n\t{ \"post_id\": 4812, \"topic_id\": 1081, \"forum_id\": 32, \"post_subject\": \"Re: SALT question on BESTTYPE\", \"username\": \"tmiddleton\", \"post_text\": \"The EXTEND method on BESTTYPE extends those fields for matching which are part way there. For example if you are matching FRITCH to FRITSCH-MIDDLETON, and have specified LONGEST as the construction method on BESTTYPE, and EXTEND as the propagation method, FRITSCH will be replaced with FRITSCH-MIDDLETON during matching.\", \"post_time\": \"2013-10-23 13:21:48\" },\n\t{ \"post_id\": 4797, \"topic_id\": 1081, \"forum_id\": 32, \"post_subject\": \"SALT question on BESTTYPE\", \"username\": \"john holt\", \"post_text\": \"Does the EXTEND propagation method also work with HYPHEN1?\", \"post_time\": \"2013-10-22 13:38:53\" },\n\t{ \"post_id\": 4940, \"topic_id\": 1115, \"forum_id\": 32, \"post_subject\": \"Best place to get lat/long of address\", \"username\": \"tlhumphrey2\", \"post_text\": \"I asked this question but couldn't get a simple answer to it. So, since I knew the address cleaner does return lat/long when given an address, I thought I would run an experiment that would tell me how often the lat/long returned is very accurate.\\n\\nHere is the results of my experiment. I randomly selected 1 million Person Header addresses and passed them through the address cleaner. The following table shows the counts and percentages for each geo_match value ( 0 through 5), where the geo_match indicates the accuracy of the returned lat/long. The 1st column is the geo_match values. The 2nd is the count of the number of returned lat/longs where the accuracy specified by the geo_match value of column 1. And, the last column is the percentage.\\n\\n0\\t768596\\t76.9%\\n1\\t 95133\\t 9.5%\\n4\\t 8119\\t 0.8%\\n5\\t128152\\t12.8%\\n\\nHere are the meaning of each geo_match value:\\n\\n0 Matched in address level.\\n1 9-digit match in Centroid.\\n4 7-digit match in Centroid.\\n5 5-digit match in Centroid.\", \"post_time\": \"2013-11-13 21:31:24\" },\n\t{ \"post_id\": 5088, \"topic_id\": 1158, \"forum_id\": 32, \"post_subject\": \"Re: Interesting person header address stats\", \"username\": \"rtaylor\", \"post_text\": \"Then it sounds to me like the Address Cleaner may have a bug and/or "garbage data" for some locations. \", \"post_time\": \"2013-12-20 16:42:31\" },\n\t{ \"post_id\": 5087, \"topic_id\": 1158, \"forum_id\": 32, \"post_subject\": \"Re: Interesting person header address stats\", \"username\": \"tlhumphrey2\", \"post_text\": \"For all of these address pairs, geo_match was 0, from address cleaner. The value of geo_match tells how accurate the lat/long is. The following are the possible values for geo_match and their meanings.\\n\\n0 means matched in address level.\\n1 means 9-digit match in Centroid.\\n4 means 7-digit match in Centroid.\\n5 means 5-digit match in Centroid.\\n7 means no match in Centroid.\\n8 means not matched in Address level.\\n9 means both options tried, but no match in either.\", \"post_time\": \"2013-12-20 16:40:43\" },\n\t{ \"post_id\": 5086, \"topic_id\": 1158, \"forum_id\": 32, \"post_subject\": \"Re: Interesting person header address stats\", \"username\": \"rtaylor\", \"post_text\": \"So is the LatLong from the Address Cleaner actually each specific rooftop's location, or is it just the centroid of the Zip+4 (which is what I thought they were)?\", \"post_time\": \"2013-12-20 15:43:44\" },\n\t{ \"post_id\": 5085, \"topic_id\": 1158, \"forum_id\": 32, \"post_subject\": \"Re: Interesting person header address stats\", \"username\": \"tlhumphrey2\", \"post_text\": \"Forgot one thing. The workunit, W20131218-095743, on OSS boca dataland, has random samples of address pairs with each of the properties which I provide counts for.\\n\\nNAMING CONVENTION OF RESULTS: Any result name that begins with "c " is a count. The "lr " that a result name can begin with means left right. The result names are fairly intuitive. For example, the result set named "lr zip5s differ" gives a random sample of addresses pairs at the same location that have zip5s different.\", \"post_time\": \"2013-12-20 15:30:01\" },\n\t{ \"post_id\": 5084, \"topic_id\": 1158, \"forum_id\": 32, \"post_subject\": \"Interesting person header address stats\", \"username\": \"tlhumphrey2\", \"post_text\": \"I was surprised by the following statistics. I though that two addresses at the same location (same roof top lat/long) would always have the same prim_range, same city, same zip5, same state. But, these statistic say that isn't the case.\\n\\n1..1,902,765,816....Number of person header records that have zip and zip4\\n2........5,622,223....Unique address pairs with 1) different street names, but 2) same\\n...........................roof top lat/long, i.e. location (got lat/long from address cleaner)\\n3........3,305,073....Unique address pairs with same location and same prim_ranges\\n4........2,317,150....Unique address pairs with same location and different prim_ranges\\n5..........644,864....Line 2 address pairs with different city, state zip zip4\\n6..........386,255....Line 2 address pairs with different zip5s\\n7..........287,808....Line 2 address pairs with different city state\\n8..........287,186....Line 2 address pairs with different cities\\n9............3,025....Line 2 address pairs with different states\", \"post_time\": \"2013-12-20 14:41:10\" },\n\t{ \"post_id\": 1929, \"topic_id\": 418, \"forum_id\": 33, \"post_subject\": \"Re: Graph Control in Eclipse IDE for Ubuntu\", \"username\": \"gsmith\", \"post_text\": \"At the moment the quickest way to get the graph control working on Ubuntu is to build it your self, see instructions at:\\nhttps://github.com/GordonSmith/GraphControl\\n\\nWe are planning on getting build machines up and running to do this in the next release cycle (there are a myriad of combinations for the various Linux distos, 32/64bit etc.).\\n\\nGordon\", \"post_time\": \"2012-07-10 21:48:44\" },\n\t{ \"post_id\": 1898, \"topic_id\": 418, \"forum_id\": 33, \"post_subject\": \"Re: Graph Control in Eclipse IDE for Ubuntu\", \"username\": \"gkrasnow\", \"post_text\": \"Oops... I guessed I assumed that with a Linux Eclipse IDE there would also be a Linux version of the graph control plugin, but I guess there is still only the windows version.\", \"post_time\": \"2012-07-05 17:52:33\" },\n\t{ \"post_id\": 1897, \"topic_id\": 418, \"forum_id\": 33, \"post_subject\": \"Graph Control in Eclipse IDE for Ubuntu\", \"username\": \"gkrasnow\", \"post_text\": \"I installed the Graph Control for Ubuntu, but when I try to inspect the graph using Eclipse plugin I still get the javascript message that the plugin is not installed. Does the eclipse plugin use the browser settings from within eclipse or try to open its own browser window? If the latter, how do I make sure that the plugin .so file is in the correct location. Thanks.\\n\\n- Greg\", \"post_time\": \"2012-07-05 17:32:19\" },\n\t{ \"post_id\": 1931, \"topic_id\": 423, \"forum_id\": 33, \"post_subject\": \"Re: Controlling the ECLCC command line\", \"username\": \"gsmith\", \"post_text\": \"Depending on which version of the plugin you have:\\n[*]Newer: In the "run configurations" window, there should be a second tab for advabced eclcc settings.\\n[*]Older: In window/preferences there is asimilar ECL Section\", \"post_time\": \"2012-07-10 21:54:32\" },\n\t{ \"post_id\": 1922, \"topic_id\": 423, \"forum_id\": 33, \"post_subject\": \"Controlling the ECLCC command line\", \"username\": \"gkrasnow\", \"post_text\": \"When using the Eclipse ECL plugin, is it possible to manipulate the ECL CC command line? For instance, if I want to add more import and/or library folders where can I add them to the -I/-L command-line parameters? Thanks.\\n\\n- Greg\", \"post_time\": \"2012-07-09 17:22:58\" },\n\t{ \"post_id\": 3915, \"topic_id\": 424, \"forum_id\": 33, \"post_subject\": \"Re: Graph Control in Eclipse IDE for Mac OS\", \"username\": \"gsmith\", \"post_text\": \"Looking at https://github.com/hpcc-systems/GraphControl/pull/119\\n\\nIt will be in the public 3.10.6 gold release.\", \"post_time\": \"2013-04-11 08:33:39\" },\n\t{ \"post_id\": 3913, \"topic_id\": 424, \"forum_id\": 33, \"post_subject\": \"Re: Graph Control in Eclipse IDE for Mac OS\", \"username\": \"jandleman\", \"post_text\": \"It looks like this will show up in the next RC which is v4.0. That's exciting! Is this coming soon?\", \"post_time\": \"2013-04-10 20:35:15\" },\n\t{ \"post_id\": 3904, \"topic_id\": 424, \"forum_id\": 33, \"post_subject\": \"Re: Graph Control in Eclipse IDE for Mac OS\", \"username\": \"gsmith\", \"post_text\": \"Just to confirm this issue is resolved for OSX 10.8.3:\\nhttps://github.com/hpcc-systems/GraphControl/pull/118\\n\\nAnd should be available in next RC.\", \"post_time\": \"2013-04-09 12:20:53\" },\n\t{ \"post_id\": 3897, \"topic_id\": 424, \"forum_id\": 33, \"post_subject\": \"Re: Graph Control in Eclipse IDE for Mac OS\", \"username\": \"jandleman\", \"post_text\": \"I am running OSX 10.8.3, and have tried to use Graph Control with the latest Chrome browser, the latest Safari browser, and also Eclipse. None of them find the plugin. This morning, I installed Firefox. It finds the plugin, but when I try to display a graph, the browser just hangs. Activity monitor shows the browser as "not responding".\", \"post_time\": \"2013-04-08 18:42:18\" },\n\t{ \"post_id\": 3896, \"topic_id\": 424, \"forum_id\": 33, \"post_subject\": \"Re: Graph Control in Eclipse IDE for Mac OS\", \"username\": \"gsmith\", \"post_text\": \"I have opened an issue for this: https://github.com/hpcc-systems/GraphControl/issues/117\\n\\nAnd will investigate further.\", \"post_time\": \"2013-04-08 14:22:14\" },\n\t{ \"post_id\": 3895, \"topic_id\": 424, \"forum_id\": 33, \"post_subject\": \"Re: Graph Control in Eclipse IDE for Mac OS\", \"username\": \"DSC\", \"post_text\": \"It looks like Graph Control is looking for a library specific to its build environment:\\n\\n
4/8/13 6:33:33.119 AM PluginProcess[55188]: Error loading /Library/Internet Plug-Ins/HPCCSystemsGraphViewControl_x86_64.plugin/Contents/MacOS/HPCCSystemsGraphViewControl_x86_64: dlopen(/Library/Internet Plug-Ins/HPCCSystemsGraphViewControl_x86_64.plugin/Contents/MacOS/HPCCSystemsGraphViewControl_x86_64, 262): Library not loaded: /opt/local/lib/libfreetype.6.dylib\\n Referenced from: /Library/Internet Plug-Ins/HPCCSystemsGraphViewControl_x86_64.plugin/Contents/MacOS/HPCCSystemsGraphViewControl_x86_64\\n Reason: image not found\\n
\\nThis was on a system running OS X 10.8.3 and Safari, with everything up-to-date.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-04-08 11:36:49\" },\n\t{ \"post_id\": 3894, \"topic_id\": 424, \"forum_id\": 33, \"post_subject\": \"Re: Graph Control in Eclipse IDE for Mac OS\", \"username\": \"gsmith\", \"post_text\": \"Yes the graph control is working in OSX now.\\n\\nBUT (isn't there always)\\n\\nThe control that is available for download on the portal is 64bit only and thus will only work in 64bit Safari and 64bit FireFox (I believe chrome is 32bit only on OSX?).\\n\\nFurther (on windows at least) you need to have the matching version for eclipse (32bit control for 32bit Eclipse and 64bit for 64bit Eclipse).\\n\\nCan you share your setup?\", \"post_time\": \"2013-04-08 11:29:48\" },\n\t{ \"post_id\": 3893, \"topic_id\": 424, \"forum_id\": 33, \"post_subject\": \"Re: Graph Control in Eclipse IDE for Mac OS\", \"username\": \"jandleman\", \"post_text\": \"Is graph control still broken in Mac OS? I have installed the Eclipse IDE and all of its prerequisites. Everything works fine except I get an error when I try to display graphs. It tells me the plugin is not installed (I get the same message from ECL watch in my browser). I have installed graph control, but it is not working.\", \"post_time\": \"2013-04-08 04:35:45\" },\n\t{ \"post_id\": 1930, \"topic_id\": 424, \"forum_id\": 33, \"post_subject\": \"Re: Graph Control in Eclipse IDE for Mac OS\", \"username\": \"gsmith\", \"post_text\": \"The graph control is currently not working on OSX, but it is on the list for the next release cycle.\", \"post_time\": \"2012-07-10 21:49:57\" },\n\t{ \"post_id\": 1925, \"topic_id\": 424, \"forum_id\": 33, \"post_subject\": \"Graph Control in Eclipse IDE for Mac OS\", \"username\": \"gkrasnow\", \"post_text\": \"I see that there is now a clienttools for Mac OS, thus allowing the Eclipse plugin to run on Mac OS. However, I did not see the Graph Control plugin for Mac OS. Is this going to be available soon? Thanks.\\n\\n- Greg\", \"post_time\": \"2012-07-09 21:02:54\" },\n\t{ \"post_id\": 2013, \"topic_id\": 427, \"forum_id\": 33, \"post_subject\": \"Re: Issues with Eclipse Plugin and Mac OSX\", \"username\": \"gsmith\", \"post_text\": \"Since we primarily build on Linux I would imagine you will always get more consistent results with gcc.\\n\\nWe haven't really done much build/packaging outside of Snow Leopard (and with pretty specific third party library versions) so this is all helpful feedback.\\n\\nGordon.\", \"post_time\": \"2012-07-18 19:37:19\" },\n\t{ \"post_id\": 2006, \"topic_id\": 427, \"forum_id\": 33, \"post_subject\": \"Re: Issues with Eclipse Plugin and Mac OSX\", \"username\": \"gkrasnow\", \"post_text\": \"Gordon,\\n\\nAhh.. I see. I guess there does seem to be a packaging issue. (I am on Lion by the way). It does work when run directly from the build directory. I did find that I get a lot of strange warnings when using clang which I did not get when using gcc. Thanks.\\n\\n- Greg\\n\\n[quote="gsmith":g7g39ki5]I am not sure when the next official build will be done (I am on vacation at the moment).\\n\\n1. Did you build from my branch OSX_Plugins @ https://github.com/GordonSmith/HPCC-Platform.git?\\n2. You need to set CLIENTTOOLS_ONLY to true?\\n\\nIf so your issue may just be with some of the install patching (are you on Lion or Snow Leopard?). Try running eclcc directly from your build folder and see if that works (if building local you don't need to do whole install/package part).\\n\\nHTH\\n\\nGordon.\", \"post_time\": \"2012-07-18 16:21:19\" },\n\t{ \"post_id\": 1999, \"topic_id\": 427, \"forum_id\": 33, \"post_subject\": \"Re: Issues with Eclipse Plugin and Mac OSX\", \"username\": \"gsmith\", \"post_text\": \"I am not sure when the next official build will be done (I am on vacation at the moment).\\n\\n1. Did you build from my branch OSX_Plugins @ https://github.com/GordonSmith/HPCC-Platform.git?\\n2. You need to set CLIENTTOOLS_ONLY to true?\\n\\nIf so your issue may just be with some of the install patching (are you on Lion or Snow Leopard?). Try running eclcc directly from your build folder and see if that works (if building local you don't need to do whole install/package part).\\n\\nHTH\\n\\nGordon.\", \"post_time\": \"2012-07-18 09:17:36\" },\n\t{ \"post_id\": 1970, \"topic_id\": 427, \"forum_id\": 33, \"post_subject\": \"Re: Issues with Eclipse Plugin and Mac OSX\", \"username\": \"gkrasnow\", \"post_text\": \"[quote="gsmith":7kcycsbq]Ok that makes more sense. \\n\\nI have been able to reproduce the issue and have submitted a fix: \\nhttps://github.com/hpcc-systems/HPCC-Platform/pull/2829\\n\\nGordon.\\n\\nAny idea when a new binary will be built? I tried building from source trying both clang and a macport install of gcc-4.5. My final eclcc complains about a symbol _environ missing from one of the shared libraries. \\n\\n\\ndyld: Symbol not found: _environ\\n Referenced from: /opt/HPCCSystems/3.11.0/clienttools/bin/./../lib2/libbfd-2.21.dylib\\n Expected in: flat namespace\\n in /opt/HPCCSystems/3.11.0/clienttools/bin/./../lib2/libbfd-2.21.dylib\\nTrace/BPT trap: 5\\n
\\n\\nThanks.\\n\\n- Greg\", \"post_time\": \"2012-07-13 17:22:51\" },\n\t{ \"post_id\": 1964, \"topic_id\": 427, \"forum_id\": 33, \"post_subject\": \"Re: Issues with Eclipse Plugin and Mac OSX\", \"username\": \"gsmith\", \"post_text\": \"Ok that makes more sense. \\n\\nI have been able to reproduce the issue and have submitted a fix: \\nhttps://github.com/hpcc-systems/HPCC-Platform/pull/2829\\n\\nGordon.\", \"post_time\": \"2012-07-12 21:35:12\" },\n\t{ \"post_id\": 1961, \"topic_id\": 427, \"forum_id\": 33, \"post_subject\": \"Re: Issues with Eclipse Plugin and Mac OSX\", \"username\": \"gkrasnow\", \"post_text\": \"[quote="gsmith":if5dhyxk]I beleive lib_stringlib is unsupported on the OSS environment. Its successor is std.str:\\n\\nimport std.str;\\n\\nIF(str.Find('ABCDE', 'BC',1) = 2,\\n\\t'Success',\\n\\t'Failure - 1');
\\nGordon\\n\\nIt is used internally in the std.str ECL library:\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(12,22): error C2081: Import names unknown module "lib_stringlib"\", \"post_time\": \"2012-07-12 18:55:08\" },\n\t{ \"post_id\": 1960, \"topic_id\": 427, \"forum_id\": 33, \"post_subject\": \"Re: Issues with Eclipse Plugin and Mac OSX\", \"username\": \"gsmith\", \"post_text\": \"I beleive lib_stringlib is unsupported on the OSS environment. Its successor is std.str:\\n\\nimport std.str;\\n\\nIF(str.Find('ABCDE', 'BC',1) = 2,\\n\\t'Success',\\n\\t'Failure - 1');
\\nGordon\", \"post_time\": \"2012-07-12 18:52:07\" },\n\t{ \"post_id\": 1958, \"topic_id\": 427, \"forum_id\": 33, \"post_subject\": \"Re: Issues with Eclipse Plugin and Mac OSX\", \"username\": \"gkrasnow\", \"post_text\": \"[quote="gsmith":235xc75q]I pushed an update earlier today, can you update your version of the plugin: "Help/Check for Updates" and report back?\\n\\nGordon.\\n\\nstill seeing: error C2081: Import names unknown module "lib_stringlib"\\n\\n ECL Language\\t1.0.0.201207121347\\n\\nThanks.\", \"post_time\": \"2012-07-12 18:41:19\" },\n\t{ \"post_id\": 1957, \"topic_id\": 427, \"forum_id\": 33, \"post_subject\": \"Re: Issues with Eclipse Plugin and Mac OSX\", \"username\": \"gsmith\", \"post_text\": \"I pushed an update earlier today, can you update your version of the plugin: "Help/Check for Updates" and report back?\\n\\nGordon.\", \"post_time\": \"2012-07-12 18:28:32\" },\n\t{ \"post_id\": 1951, \"topic_id\": 427, \"forum_id\": 33, \"post_subject\": \"Re: Issues with Eclipse Plugin and Mac OSX\", \"username\": \"gkrasnow\", \"post_text\": \"[quote="gsmith":ih441fma]I suspect you will need a newer version of the Eclipse Plugin, I will try and push one out to eclipse.hpccsystems.com/develop tomorrow.\\n\\nGordon.\\n\\nI am not seeing the NullPointerException anymore, but I am still seeing the first problem mentioned where the IMPORT lib_stringlib is failing during the syntax check. The eclcc compiler reports the following:\\n\\n\\n/opt/HPCCSystems/3.8.0/clienttools/bin/eclcc --version\\n/opt/HPCCSystems/3.8.0/clienttools/bin/eclcc -M -I/Users/gkrasnow/Documents/workspace/VektorTurtorialHPCC /Users/gkrasnow/Documents/workspace/VektorTurtorialHPCC/ExternalAccessExamples/testPipe2.ecl\\n/opt/HPCCSystems/3.8.0/clienttools/bin/eclcc -fsyntaxcheck=1 -E -I/Users/gkrasnow/Documents/workspace/VektorTurtorialHPCC /Users/gkrasnow/Documents/workspace/VektorTurtorialHPCC/ExternalAccessExamples/testPipe2.ecl\\nErr: Error: File '' does not exist\\nErr: /Users/gkrasnow/Documents/workspace/VektorTurtorialHPCC/ExternalAccessExamples/testPipe2.ecl(1,22): error C2081: Import names unknown module "lib_stringlib"\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(12,22): error C2081: Import names unknown module "lib_stringlib"\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(24,3): error C2167: Unknown identifier "lib_stringlib"\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(47,3): error C2167: Unknown identifier "lib_stringlib"\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(56,58): error C2167: Unknown identifier "lib_stringlib"\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(68,3): error C2167: Unknown identifier "lib_stringlib"\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(80,3): error C2167: Unknown identifier "lib_stringlib"\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(90,55): error C2167: Unknown identifier "lib_stringlib"\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(90,24): error C2007: Incompatible types: can not assign Integer to String\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(100,52): error C2167: Unknown identifier "lib_stringlib"\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(100,21): error C2007: Incompatible types: can not assign Integer to String\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(114,3): error C2167: Unknown identifier "lib_stringlib"\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(113,33): error C2007: Incompatible types: can not assign Integer to String\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(128,3): error C2167: Unknown identifier "lib_stringlib"\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(127,33): error C2007: Incompatible types: can not assign Integer to String\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(142,3): error C2167: Unknown identifier "lib_stringlib"\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(141,24): error C2007: Incompatible types: can not assign Integer to String\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(150,42): error C2167: Unknown identifier "lib_stringlib"\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(150,26): error C2007: Incompatible types: can not assign Integer to String\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(158,42): error C2167: Unknown identifier "lib_stringlib"\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(158,26): error C2007: Incompatible types: can not assign Integer to String\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(168,44): error C2167: Unknown identifier "lib_stringlib"\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(168,28): error C2007: Incompatible types: can not assign Integer to String\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(178,42): error C2167: Unknown identifier "lib_stringlib"\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(178,26): error C2007: Incompatible types: can not assign Integer to String\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(187,38): error C2167: Unknown identifier "lib_stringlib"\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(187,22): error C2007: Incompatible types: can not assign Integer to String\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(198,3): error C2167: Unknown identifier "lib_stringlib"\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(197,26): error C2007: Incompatible types: can not assign Integer to String\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(207,58): error C2167: Unknown identifier "lib_stringlib"\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(207,22): error C2007: Incompatible types: can not assign Integer to String\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(217,42): error C2167: Unknown identifier "lib_stringlib"\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(217,26): error C2007: Incompatible types: can not assign Integer to String\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(258,62): error C2167: Unknown identifier "lib_stringlib"\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(258,30): error C2007: Incompatible types: can not assign Integer to String\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(269,91): error C2167: Unknown identifier "lib_stringlib"\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(280,95): error C2167: Unknown identifier "lib_stringlib"\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(280,32): error C2007: Incompatible types: can not assign Integer to Set of String\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(291,70): error C2167: Unknown identifier "lib_stringlib"\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(291,27): error C2007: Incompatible types: can not assign Integer to String\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(304,5): error C2167: Unknown identifier "lib_stringlib"\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(317,5): error C2167: Unknown identifier "lib_stringlib"\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(328,5): error C2167: Unknown identifier "lib_stringlib"\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(339,5): error C2167: Unknown identifier "lib_stringlib"\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(338,25): error C2007: Incompatible types: can not assign Integer to String\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(348,41): error C2167: Unknown identifier "lib_stringlib"\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(348,25): error C2007: Incompatible types: can not assign Integer to String\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(361,47): error C2167: Unknown identifier "lib_stringlib"\\nErr: /opt/HPCCSystems/3.8.0/clienttools/share/ecllibrary/std/Str.ecl(361,25): error C2007: Incompatible types: can not assign Integer to Data\\n
\", \"post_time\": \"2012-07-12 15:56:10\" },\n\t{ \"post_id\": 1933, \"topic_id\": 427, \"forum_id\": 33, \"post_subject\": \"Re: Issues with Eclipse Plugin and Mac OSX\", \"username\": \"gsmith\", \"post_text\": \"I suspect you will need a newer version of the Eclipse Plugin, I will try and push one out to eclipse.hpccsystems.com/develop tomorrow.\\n\\nGordon.\", \"post_time\": \"2012-07-10 23:50:52\" },\n\t{ \"post_id\": 1932, \"topic_id\": 427, \"forum_id\": 33, \"post_subject\": \"Issues with Eclipse Plugin and Mac OSX\", \"username\": \"gkrasnow\", \"post_text\": \"I am seeing two issues when trying to use the Mac OSX clienttools with the Eclipse plugin:\\n\\n1) With both the old and new Eclipse plugin, I am seeing the "red X" on these two lines:\\nIMPORT lib_stringlib;\\nIMPORT std.str;\\n\\nIt appears (to me at least) that the difference in the organization of the clienttools in the Mac OSX (bin,lib,shared,etc..) seems to be confusing the compiler when run from the plugin and it cannot find the standard libraries.\\n\\n2) With the new plugin, the ECL Builder component in the plugin does not work correctly and gives a NullPointerException everytime it is run for the syntax checking. Submitting code to the cluster seems to work ok though.\\n\\n- Greg\", \"post_time\": \"2012-07-10 23:07:35\" },\n\t{ \"post_id\": 2014, \"topic_id\": 440, \"forum_id\": 33, \"post_subject\": \"Re: Eclipse plugin location?\", \"username\": \"gsmith\", \"post_text\": \"As you are on OSX, you may want to look at the other thread in this forum) as you may need to build the client tools yourself (for best results).\\n\\nGordon.\", \"post_time\": \"2012-07-18 19:39:34\" },\n\t{ \"post_id\": 2012, \"topic_id\": 440, \"forum_id\": 33, \"post_subject\": \"Re: Eclipse plugin location?\", \"username\": \"DSC\", \"post_text\": \"Great information. Thanks, Gordon.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-07-18 19:33:00\" },\n\t{ \"post_id\": 2011, \"topic_id\": 440, \"forum_id\": 33, \"post_subject\": \"Re: Eclipse plugin location?\", \"username\": \"gsmith\", \"post_text\": \"...no and the OSX version is still a work in progress (as is the eclipse plugin)...\\n\\nHowever, if you have the platform installed on ubuntu, it includes the needed client tools.\\n\\nAlternatively, you can build from sources from github.\\n\\nGordon. \\n\\nPS This is all a little rushed at the moment, but all of the above - OSX, WIndows + Linux Client Tools Packages are going to be part of our official build process soon (next non bug fix release cycle all going well), the same will be true for the Eclipse Plugin.\", \"post_time\": \"2012-07-18 19:30:14\" },\n\t{ \"post_id\": 2010, \"topic_id\": 440, \"forum_id\": 33, \"post_subject\": \"Re: Eclipse plugin location?\", \"username\": \"DSC\", \"post_text\": \"[quote="gsmith":2vyn0hrk]I would suggest using http://eclipse.hpccsystems.com/develop and to treat it as a beta. The stable URL probably won't be updated until an official release is made.\\n\\nI just switched to it and it's definitely an improvement from the stable version. I'm using Mac OS X, BTW.\\n\\nIs there a Ubuntu client-tools-only package for 3.8.0? The downloads page (http://hpccsystems.com/download/free-community-edition/ecl-ide-and-client-tools) shows only Windows and Mac versions. All of my other developers here are using Ubuntu and would like to use their existing Eclipse installations if possible.\\n\\nThanks, Gordon!\\n\\nDan\", \"post_time\": \"2012-07-18 19:01:37\" },\n\t{ \"post_id\": 2007, \"topic_id\": 440, \"forum_id\": 33, \"post_subject\": \"Re: Eclipse plugin location?\", \"username\": \"gsmith\", \"post_text\": \"I would suggest using http://eclipse.hpccsystems.com/develop and to treat it as a beta. The stable URL probably won't be updated until an official release is made.\\n\\nGordon.\", \"post_time\": \"2012-07-18 17:13:45\" },\n\t{ \"post_id\": 2003, \"topic_id\": 440, \"forum_id\": 33, \"post_subject\": \"Re: Eclipse plugin location?\", \"username\": \"HPCC Staff\", \"post_text\": \"Thank you Dan for your post and sharing the answer! You beat us to it. Let us know how it works out for you.\", \"post_time\": \"2012-07-18 13:32:43\" },\n\t{ \"post_id\": 2002, \"topic_id\": 440, \"forum_id\": 33, \"post_subject\": \"Re: Eclipse plugin location?\", \"username\": \"DSC\", \"post_text\": \"Never mind. As is usually the case, after posting a very public question I found the answer by grepping through the documentation source XML. Should have done that first, right?\\n\\nFor anyone else with the same question: Use Eclipse's standard software installation scheme and point it toward http://eclipse.hpccsystems.com/stable.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-07-18 13:02:28\" },\n\t{ \"post_id\": 2001, \"topic_id\": 440, \"forum_id\": 33, \"post_subject\": \"Eclipse plugin location?\", \"username\": \"DSC\", \"post_text\": \"Where is the Eclipse plugin? Is it available only for enterprise customers?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-07-18 12:47:18\" },\n\t{ \"post_id\": 2086, \"topic_id\": 449, \"forum_id\": 33, \"post_subject\": \"Re: ECL deployments via Go\", \"username\": \"DSC\", \"post_text\": \"We use Go for virtually everything else, so it's entirely possible that we'll use it here as well. If so, I'll write something up and share.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2012-07-26 11:37:30\" },\n\t{ \"post_id\": 2082, \"topic_id\": 449, \"forum_id\": 33, \"post_subject\": \"Re: ECL deployments via Go\", \"username\": \"bforeman\", \"post_text\": \"Hi Dan,\\n\\nSeveral of us here were tossing it around but nobody has used it that we know of. Maybe you could be the trail blazer?
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2012-07-25 23:38:17\" },\n\t{ \"post_id\": 2050, \"topic_id\": 449, \"forum_id\": 33, \"post_subject\": \"ECL deployments via Go\", \"username\": \"DSC\", \"post_text\": \"Does anyone use Go (http://www.thoughtworks-studios.com/go-agile-release-management) to manage their ECL deployments? If so, are there any pointers or pitfalls that one should be aware of?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2012-07-23 18:08:48\" },\n\t{ \"post_id\": 3436, \"topic_id\": 696, \"forum_id\": 33, \"post_subject\": \"Re: Eclipse Plug In\", \"username\": \"gsmith\", \"post_text\": \"[quote="ideal":1kkqlxt3]It is related to outline, when project is building and there is a lot of components to display ! \\nThis behaviour can be avoided by closing outline window before building the project.\\n\\n\\nThe outline feature is still quite experimental - I may disable/hide it by default for now.\", \"post_time\": \"2013-02-13 07:46:50\" },\n\t{ \"post_id\": 3430, \"topic_id\": 696, \"forum_id\": 33, \"post_subject\": \"Re: Eclipse Plug In\", \"username\": \"ideal\", \"post_text\": \"
Eclipse crashed and were not able to relaunch again. I don't know if there is a relation with ECL plugin
\\n\\nIt is related to outline, when project is building and there is a lot of components to display ! \\nThis behaviour can be avoided by closing outline window before building the project.\\n\\nJM.\", \"post_time\": \"2013-02-12 20:15:59\" },\n\t{ \"post_id\": 3429, \"topic_id\": 696, \"forum_id\": 33, \"post_subject\": \"Re: Eclipse Plug In\", \"username\": \"ideal\", \"post_text\": \"
\\n\\nYour answer about "Project References" concerns link between ECL projects. This is a useful information, anyway, I was refering to links inside the same project and, I should have done it before but when reading compiler documentation more carefully, I read there is an option "-I" to include relative paths : I set common compiler arguments field in ECL preferences with -I "<my path>" and it worked.\\n\\nAs I were recompiling ECL files, one by one, after having changed the preferences (don't know why it didn't happen automatically), my Eclipse crashed and were not able to relaunch again. I don't know if there is a relation with ECL plugin but if it happens, the best thing to do, to keep all eclipse content safe, is to delete (or move) the file C:\\\\Users\\\\XXXXX\\\\workspace\\\\.metadata\\\\.plugins\\\\org.eclipse.e4.workbench\\\\workbench.xmi and relaunch eclipse. \\n\\nJM.\", \"post_time\": \"2013-02-12 19:54:10\" },\n\t{ \"post_id\": 3428, \"topic_id\": 696, \"forum_id\": 33, \"post_subject\": \"Re: Eclipse Plug In\", \"username\": \"gsmith\", \"post_text\": \"[quote="ideal":2c8tgk0l]You are right, I am working with windows 32 bits. I guess, after having fixed ECL plugin, it will be downloadable from the usual eclipse repository http://eclipse.hpccsystems.com/develop.\\n\\nOnce the fix has been reviewed and accepted to upstream I will do a new build and publish, at which point your eclipse will automatically update (depending on your settings this could be on startup, daily or weekly etc.) or if you select "Help/Check for updates".\", \"post_time\": \"2013-02-12 16:32:06\" },\n\t{ \"post_id\": 3427, \"topic_id\": 696, \"forum_id\": 33, \"post_subject\": \"Re: Eclipse Plug In\", \"username\": \"gsmith\", \"post_text\": \"I have another question about configuration : I don't find the way to configurate what is called "ECL folders" in ECLIDE preferences, which would allow me to specify where ECL modules can see each other. The workaround I found was to prefix imported module names with their relative path in working directory.\\nDo you know a better way to do it ?
\\nFor this "feature" you need to go with the eclipse way of doing things, so:\\n1. Ensure that all relevant projects are included in your workspace.\\n2. Right click on the project that needs to "reference" a different project and select properties.\\n3. Select "Project References" as per the attached screen shot.I have another question about configuration : I don't find the way to configurate what is called "ECL folders" in ECLIDE preferences, which would allow me to specify where ECL modules can see each other. The workaround I found was to prefix imported module names with their relative path in working directory. \\nDo you know a better way to do it ?
\\n\\nFor this "feature" you need to go with the eclipse way of doing things, so:\\n1. Ensure that all relevant projects are included in your workspace.\\n2. Right click on the project that needs to "reference" a different project and select properties.\\n3. Select "Project References" as per the attached screen shot.\", \"post_time\": \"2013-02-12 16:27:32\" },\n\t{ \"post_id\": 3424, \"topic_id\": 696, \"forum_id\": 33, \"post_subject\": \"Re: Eclipse Plug In\", \"username\": \"ideal\", \"post_text\": \"Hello Gordon,\\n\\nThank you so much for your help ! \\n\\nIt worked great on my sample source. I will try on my code but I guess it should be the same. You are right, I am working with windows 32 bits. I guess, after having fixed ECL plugin, it will be downloadable from the usual eclipse repository http://eclipse.hpccsystems.com/develop.\\n\\nI have another question about configuration : I don't find the way to configurate what is called "ECL folders" in ECLIDE preferences, which would allow me to specify where ECL modules can see each other. The workaround I found was to prefix imported module names with their relative path in working directory. \\nDo you know a better way to do it ? \\n\\nThanks,\\nJM.\", \"post_time\": \"2013-02-12 15:25:50\" },\n\t{ \"post_id\": 3420, \"topic_id\": 696, \"forum_id\": 33, \"post_subject\": \"Re: Eclipse Plug In\", \"username\": \"gsmith\", \"post_text\": \"That exception is being thrown while trying to "auto" locate the client tools (and then setup the default params).\\n\\nLooking at the code it is probably because you are running a 32bit version of windows (I will open a ticket and fix today: https://github.com/hpcc-systems/EclipsePlugin/pull/92).\\n\\nIn the meantime if you ensure that the defaults are correct, you should be able to continue (I have attached a screenshot to assist).\\n\\nGordon.\", \"post_time\": \"2013-02-12 09:32:43\" },\n\t{ \"post_id\": 3404, \"topic_id\": 696, \"forum_id\": 33, \"post_subject\": \"Re: Eclipse Plug In\", \"username\": \"ideal\", \"post_text\": \"Hello again,\\n\\nAfter having made some changes in eclipse installation (new java and jdk version 7,axis2 tool plugin installation asked by hpcc plugin,new empty workspace), I get this error message during eclipse startup (in Error Log window) : \\nProblems occurred when invoking code from plug-in: "org.eclipse.equinox.preferences".\\nwith a "java.lang.NullPointerException" and the stack trace is : \\njava.lang.NullPointerException\\n\\tat org.hpccsystems.internal.data.ClientTools.findClientTools(ClientTools.java:163)\\n\\tat org.hpccsystems.internal.data.ClientTools.findNewestClientTool(ClientTools.java:193)\\n\\tat org.hpccsystems.eclide.preferences.ECLPreferenceInitializer.initializeDefaultPreferences(ECLPreferenceInitializer.java:38)\\n\\tat org.eclipse.core.internal.preferences.PreferenceServiceRegistryHelper$1.run(PreferenceServiceRegistryHelper.java:300)\\n\\tat org.eclipse.core.runtime.SafeRunner.run(SafeRunner.java:42)\\n\\tat org.eclipse.core.internal.preferences.PreferenceServiceRegistryHelper.runInitializer(PreferenceServiceRegistryHelper.java:303)\\n\\tat org.eclipse.core.internal.preferences.PreferenceServiceRegistryHelper.applyRuntimeDefaults(PreferenceServiceRegistryHelper.java:131)\\n\\tat org.eclipse.core.internal.preferences.PreferencesService.applyRuntimeDefaults(PreferencesService.java:368)\\n\\tat org.eclipse.core.internal.preferences.DefaultPreferences.applyRuntimeDefaults(DefaultPreferences.java:166)\\n\\tat org.eclipse.core.internal.preferences.DefaultPreferences.load(DefaultPreferences.java:237)\\n\\tat org.eclipse.core.internal.preferences.EclipsePreferences.create(EclipsePreferences.java:410)\\n\\tat org.eclipse.core.internal.preferences.EclipsePreferences.internalNode(EclipsePreferences.java:663)\\n\\tat org.eclipse.core.internal.preferences.EclipsePreferences.node(EclipsePreferences.java:805)\\n\\tat org.eclipse.core.internal.preferences.AbstractScope.getNode(AbstractScope.java:38)\\n\\tat org.eclipse.core.runtime.preferences.DefaultScope.getNode(DefaultScope.java:76)\\n\\tat org.eclipse.ui.preferences.ScopedPreferenceStore.getDefaultPreferences(ScopedPreferenceStore.java:250)\\n\\tat org.eclipse.ui.preferences.ScopedPreferenceStore.getPreferenceNodes(ScopedPreferenceStore.java:285)\\n\\tat org.eclipse.ui.preferences.ScopedPreferenceStore.internalGet(ScopedPreferenceStore.java:475)\\n\\tat org.eclipse.ui.preferences.ScopedPreferenceStore.getString(ScopedPreferenceStore.java:535)\\n\\tat org.hpccsystems.internal.data.ClientTools.<init>(ClientTools.java:83)\\n\\tat org.hpccsystems.internal.data.ClientTools.get(ClientTools.java:37)\\n\\tat org.hpccsystems.internal.data.Data.<init>(Data.java:45)\\n\\tat org.hpccsystems.internal.data.Data.get(Data.java:95)\\n\\tat org.hpccsystems.eclide.ui.viewer.platform.PlatformViewer.createPartControl(PlatformViewer.java:275)\\n\\tat org.eclipse.ui.internal.e4.compatibility.CompatibilityPart.createPartControl(CompatibilityPart.java:129)\\n\\tat org.eclipse.ui.internal.e4.compatibility.CompatibilityView.createPartControl(CompatibilityView.java:155)\\n\\tat org.eclipse.ui.internal.e4.compatibility.CompatibilityPart.create(CompatibilityPart.java:300)\\n\\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\\n\\tat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\\n\\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\\n\\tat java.lang.reflect.Method.invoke(Method.java:601)\\n\\tat org.eclipse.e4.core.internal.di.MethodRequestor.execute(MethodRequestor.java:56)\\n\\tat org.eclipse.e4.core.internal.di.InjectorImpl.processAnnotated(InjectorImpl.java:859)\\n\\tat org.eclipse.e4.core.internal.di.InjectorImpl.processAnnotated(InjectorImpl.java:839)\\n\\tat org.eclipse.e4.core.internal.di.InjectorImpl.inject(InjectorImpl.java:111)\\n\\tat org.eclipse.e4.core.internal.di.InjectorImpl.internalMake(InjectorImpl.java:319)\\n\\tat org.eclipse.e4.core.internal.di.InjectorImpl.make(InjectorImpl.java:240)\\n\\tat org.eclipse.e4.core.contexts.ContextInjectionFactory.make(ContextInjectionFactory.java:161)\\n\\tat org.eclipse.e4.ui.internal.workbench.ReflectionContributionFactory.createFromBundle(ReflectionContributionFactory.java:102)\\n\\tat org.eclipse.e4.ui.internal.workbench.ReflectionContributionFactory.doCreate(ReflectionContributionFactory.java:71)\\n\\tat org.eclipse.e4.ui.internal.workbench.ReflectionContributionFactory.create(ReflectionContributionFactory.java:53)\\n\\tat org.eclipse.e4.ui.workbench.renderers.swt.ContributedPartRenderer.createWidget(ContributedPartRenderer.java:141)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine.createWidget(PartRenderingEngine.java:896)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine.safeCreateGui(PartRenderingEngine.java:630)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine$6.run(PartRenderingEngine.java:518)\\n\\tat org.eclipse.core.runtime.SafeRunner.run(SafeRunner.java:42)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine.createGui(PartRenderingEngine.java:503)\\n\\tat org.eclipse.e4.ui.workbench.renderers.swt.ElementReferenceRenderer.createWidget(ElementReferenceRenderer.java:74)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine.createWidget(PartRenderingEngine.java:896)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine.safeCreateGui(PartRenderingEngine.java:630)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine.safeCreateGui(PartRenderingEngine.java:732)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine.access$2(PartRenderingEngine.java:703)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine$7.run(PartRenderingEngine.java:697)\\n\\tat org.eclipse.core.runtime.SafeRunner.run(SafeRunner.java:42)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine.createGui(PartRenderingEngine.java:682)\\n\\tat org.eclipse.e4.ui.workbench.renderers.swt.StackRenderer.showTab(StackRenderer.java:1114)\\n\\tat org.eclipse.e4.ui.workbench.renderers.swt.LazyStackRenderer.postProcess(LazyStackRenderer.java:98)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine.safeCreateGui(PartRenderingEngine.java:646)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine.safeCreateGui(PartRenderingEngine.java:732)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine.access$2(PartRenderingEngine.java:703)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine$7.run(PartRenderingEngine.java:697)\\n\\tat org.eclipse.core.runtime.SafeRunner.run(SafeRunner.java:42)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine.createGui(PartRenderingEngine.java:682)\\n\\tat org.eclipse.e4.ui.workbench.renderers.swt.SWTPartRenderer.processContents(SWTPartRenderer.java:59)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine.safeCreateGui(PartRenderingEngine.java:642)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine.safeCreateGui(PartRenderingEngine.java:732)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine.access$2(PartRenderingEngine.java:703)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine$7.run(PartRenderingEngine.java:697)\\n\\tat org.eclipse.core.runtime.SafeRunner.run(SafeRunner.java:42)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine.createGui(PartRenderingEngine.java:682)\\n\\tat org.eclipse.e4.ui.workbench.renderers.swt.SWTPartRenderer.processContents(SWTPartRenderer.java:59)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine.safeCreateGui(PartRenderingEngine.java:642)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine.safeCreateGui(PartRenderingEngine.java:732)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine.access$2(PartRenderingEngine.java:703)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine$7.run(PartRenderingEngine.java:697)\\n\\tat org.eclipse.core.runtime.SafeRunner.run(SafeRunner.java:42)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine.createGui(PartRenderingEngine.java:682)\\n\\tat org.eclipse.e4.ui.workbench.renderers.swt.SWTPartRenderer.processContents(SWTPartRenderer.java:59)\\n\\tat org.eclipse.e4.ui.workbench.renderers.swt.PerspectiveRenderer.processContents(PerspectiveRenderer.java:59)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine.safeCreateGui(PartRenderingEngine.java:642)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine.safeCreateGui(PartRenderingEngine.java:732)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine.access$2(PartRenderingEngine.java:703)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine$7.run(PartRenderingEngine.java:697)\\n\\tat org.eclipse.core.runtime.SafeRunner.run(SafeRunner.java:42)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine.createGui(PartRenderingEngine.java:682)\\n\\tat org.eclipse.e4.ui.workbench.renderers.swt.PerspectiveStackRenderer.showTab(PerspectiveStackRenderer.java:103)\\n\\tat org.eclipse.e4.ui.workbench.renderers.swt.LazyStackRenderer.postProcess(LazyStackRenderer.java:98)\\n\\tat org.eclipse.e4.ui.workbench.renderers.swt.PerspectiveStackRenderer.postProcess(PerspectiveStackRenderer.java:77)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine.safeCreateGui(PartRenderingEngine.java:646)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine.safeCreateGui(PartRenderingEngine.java:732)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine.access$2(PartRenderingEngine.java:703)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine$7.run(PartRenderingEngine.java:697)\\n\\tat org.eclipse.core.runtime.SafeRunner.run(SafeRunner.java:42)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine.createGui(PartRenderingEngine.java:682)\\n\\tat org.eclipse.e4.ui.workbench.renderers.swt.SWTPartRenderer.processContents(SWTPartRenderer.java:59)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine.safeCreateGui(PartRenderingEngine.java:642)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine.safeCreateGui(PartRenderingEngine.java:732)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine.access$2(PartRenderingEngine.java:703)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine$7.run(PartRenderingEngine.java:697)\\n\\tat org.eclipse.core.runtime.SafeRunner.run(SafeRunner.java:42)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine.createGui(PartRenderingEngine.java:682)\\n\\tat org.eclipse.e4.ui.workbench.renderers.swt.SWTPartRenderer.processContents(SWTPartRenderer.java:59)\\n\\tat org.eclipse.e4.ui.workbench.renderers.swt.WBWRenderer.processContents(WBWRenderer.java:639)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine.safeCreateGui(PartRenderingEngine.java:642)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine.safeCreateGui(PartRenderingEngine.java:732)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine.access$2(PartRenderingEngine.java:703)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine$7.run(PartRenderingEngine.java:697)\\n\\tat org.eclipse.core.runtime.SafeRunner.run(SafeRunner.java:42)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine.createGui(PartRenderingEngine.java:682)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine$9.run(PartRenderingEngine.java:968)\\n\\tat org.eclipse.core.databinding.observable.Realm.runWithDefault(Realm.java:332)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine.run(PartRenderingEngine.java:923)\\n\\tat org.eclipse.e4.ui.internal.workbench.E4Workbench.createAndRunUI(E4Workbench.java:86)\\n\\tat org.eclipse.ui.internal.Workbench$5.run(Workbench.java:588)\\n\\tat org.eclipse.core.databinding.observable.Realm.runWithDefault(Realm.java:332)\\n\\tat org.eclipse.ui.internal.Workbench.createAndRunWorkbench(Workbench.java:543)\\n\\tat org.eclipse.ui.PlatformUI.createAndRunWorkbench(PlatformUI.java:149)\\n\\tat org.eclipse.ui.internal.ide.application.IDEApplication.start(IDEApplication.java:124)\\n\\tat org.eclipse.equinox.internal.app.EclipseAppHandle.run(EclipseAppHandle.java:196)\\n\\tat org.eclipse.core.runtime.internal.adaptor.EclipseAppLauncher.runApplication(EclipseAppLauncher.java:110)\\n\\tat org.eclipse.core.runtime.internal.adaptor.EclipseAppLauncher.start(EclipseAppLauncher.java:79)\\n\\tat org.eclipse.core.runtime.adaptor.EclipseStarter.run(EclipseStarter.java:353)\\n\\tat org.eclipse.core.runtime.adaptor.EclipseStarter.run(EclipseStarter.java:180)\\n\\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\\n\\tat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\\n\\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\\n\\tat java.lang.reflect.Method.invoke(Method.java:601)\\n\\tat org.eclipse.equinox.launcher.Main.invokeFramework(Main.java:629)\\n\\tat org.eclipse.equinox.launcher.Main.basicRun(Main.java:584)\\n\\tat org.eclipse.equinox.launcher.Main.run(Main.java:1438)\\n
\\n\\nSession data is \\neclipse.buildId=M20120914-1800\\njava.version=1.7.0_13\\njava.vendor=Oracle Corporation\\nBootLoader constants: OS=win32, ARCH=x86, WS=win32, NL=fr_FR\\nCommand-line arguments: -os win32 -ws win32 -arch x86 -clean\\n
\\nWhat does it mean ?\\n\\nI can create a new ECL project, I can test ok run configuration with hpcc virtual image. When I save (compile) my example, I get this message in console eclcc window : \\nC:\\\\Program Files\\\\HPCCSystems\\\\3.10.0\\\\clienttools\\\\bin\\\\eclcc --version\\nC:\\\\Program Files\\\\HPCCSystems\\\\3.10.0\\\\clienttools\\\\bin\\\\eclcc -I"C:\\\\Users\\\\XXXXX\\\\workspace2\\\\Interpreteur" "C:\\\\Users\\\\XXXXX\\\\workspace2\\\\Interpreteur\\\\essai\\\\new_file.ecl"\\n
\\nAfter having launch my run configuration on a simple example, when I try to reload or refresh Workunits window, I don't get new workunit entry (same by checking directly in my browser).\\nMy simple example is : \\nOUTPUT('hello World!');
\\n\\nMy preference ECL configuration is : \\n[attachment=0:1m9eklm8]ECL preferences.jpg\\n\\nIf someone have some idea, it would be helpful,\\nThanks,\\nJM.\", \"post_time\": \"2013-02-10 11:17:56\" },\n\t{ \"post_id\": 3336, \"topic_id\": 696, \"forum_id\": 33, \"post_subject\": \"Re: Eclipse Plug In\", \"username\": \"ideal\", \"post_text\": \"As you can see, my run configuration is ok : \\n[attachment=0:15dqxz5s]my run configuration.jpg\\n\\nBut I have a lot of error messages in error log, then I have to investigate further.\\n\\nThank you,\\nJM.\", \"post_time\": \"2013-02-05 14:35:47\" },\n\t{ \"post_id\": 3335, \"topic_id\": 696, \"forum_id\": 33, \"post_subject\": \"Re: Eclipse Plug In\", \"username\": \"gsmith\", \"post_text\": \"This is exactly what is not working in my eclipse configuration. Then if yours is working fine, I guess something is missing in mine but I don't know what
\\nDoes your Run Configuration look similar to mine?\\n\\nAlso could you try looking in in the Error Log to see if there is any additional information: Window->Show View->Other...->Error Log\\n\\n(Before I go and download clean eclipse etc.).\", \"post_time\": \"2013-02-05 14:14:54\" },\n\t{ \"post_id\": 3334, \"topic_id\": 696, \"forum_id\": 33, \"post_subject\": \"Re: Eclipse Plug In\", \"username\": \"ideal\", \"post_text\": \"Once submitted it creates a "Workunit" which is used to track the progress of the job
This is exactly what is not working in my eclipse configuration. Then if yours is working fine, I guess something is missing in mine but I don't know what.\\n\\nare you saying that if you also had a Java project that the Client Tools Package shows up there as well? (is this in the ECL Explorer or int the "Package Explorer")?
Yes this is what I can see in ECL explorer. There is definitly a problem in my configuration.\\n\\nIf you want to help, the first thing you have to do is to reproduce the bug under known conditions. Then, you can begin to analyse what the problem really is.\\n\\nIf you have time to do it, here is the procedure : \\n\\n- download last JUNO eclipse package\\n- unzip this package somewhere on your disk\\n- check if you have Microsoft visual studio 9.0 installed with vsvars32.bat exe in Common7\\\\Tools directory.\\n- setup environment variables for your user\\n
\\n- launch eclipse\\n- install plugin from eclipse.hpccsystems.com/develop and restart eclipse\\n- prepare your configuration run with your hpcc server settings\\n- create a new ECL project with some test code\\n- run your configuration on your code\\n\\nAnd then tell me if a new workunit is created. In the same time, il you have a java project already in eclipse workspace, check if your ECL package has been included in this project also.\\n\\n\\nJM.\", \"post_time\": \"2013-02-05 13:41:07\" },\n\t{ \"post_id\": 3332, \"topic_id\": 696, \"forum_id\": 33, \"post_subject\": \"Re: Eclipse Plug In\", \"username\": \"gsmith\", \"post_text\": \"run configuration does not display any result
\\nHaving created your run configuration, you then submit the ECL to the server via the "Run" button.\\nOnce submitted it creates a "Workunit" which is used to track the progress of the job.\\n\\nIn "Eclipse001.PNG" you can see a typical ECL development layout, with an (expanded) workunit with two results.\\n\\nIn "Eclipse002.PNG" you can see that clicking on the result will display the result web page within eclipse.\\n\\nClient Tools package fills all projects instead of ECL project only
\\nI am not 100% sure I understand - but are you saying that if you also had a Java project that the Client Tools Package shows up there as well? (is this in the ECL Explorer or int the "Package Explorer")?\", \"post_time\": \"2013-02-05 11:42:36\" },\n\t{ \"post_id\": 3330, \"topic_id\": 696, \"forum_id\": 33, \"post_subject\": \"Re: Eclipse Plug In\", \"username\": \"ideal\", \"post_text\": \"Right now, there is two bugs in ECL plugin : \\n\\n- run configuration does not display any result\\n- Client Tools package fills all projects instead of ECL project only\\n\\nJM.\", \"post_time\": \"2013-02-05 10:00:20\" },\n\t{ \"post_id\": 3329, \"topic_id\": 696, \"forum_id\": 33, \"post_subject\": \"Re: Eclipse Plug In\", \"username\": \"gsmith\", \"post_text\": \"Thanks, for the information, I think all you now need to do is add a "Run Configuration":\\n1. Select "Run->Run Configurations"\\n2. Select "ECL Targets"\\n3. Press the "New Launch Configuration" Button.\\n4. Fill in your server details.\", \"post_time\": \"2013-02-05 09:05:57\" },\n\t{ \"post_id\": 3326, \"topic_id\": 696, \"forum_id\": 33, \"post_subject\": \"Re: Eclipse Plug In\", \"username\": \"ideal\", \"post_text\": \" Can you ensure you have latest version from eclipse.hpccsystems.com/develop
\\nWhat I read from eclipse for "ECL Language" plugin is : \\nECL Language\\t1.0.0.201301221331\\tECL_Language.feature.group\\tnull
\\nI assume it is the latest version.\\nVC or c++/linker etc. They are only needed if you want to create a standalone exe
Nice to know but I just followed procedure explained in the beginning of this topic : The easy way is to use the software installer from within the Eclipse menu. Just go to Help -> Install New Software, click Add and type the following source: eclipse.hpccsystems.com/develop, and click Next.
.. and from there, tried to run ecl code, making error messages mentionned above appear. \\n\\nAfter having fixed everything, I can currently read on console output : C:\\\\Program Files\\\\HPCCSystems\\\\3.10.0\\\\clienttools\\\\bin\\\\eclcc --version\\nC:\\\\Program Files\\\\HPCCSystems\\\\3.10.0\\\\clienttools\\\\bin\\\\eclcc --version\\nC:\\\\Program Files\\\\HPCCSystems\\\\3.10.0\\\\clienttools\\\\bin\\\\eclcc -I"C:\\\\Users\\\\XXXXXX\\\\workspace\\\\XXXXXX" "C:\\\\Users\\\\XXXXXX\\\\workspace\\\\XXXXXX\\\\XXXXXX\\\\new_file.ecl"
\\n\\nCertainly, something is missing but I don't know what. Nothing appears in console "Results" window. Only if I run ecl code from ECLIDE, I can off course get new workunit in "Workunits" section by reloading in eclipse.\\n\\nMaybe it would be better to take it from "eclipse.hpccsystems.com/stable" or from github repository.\\n\\nJM.\", \"post_time\": \"2013-02-04 22:05:03\" },\n\t{ \"post_id\": 3324, \"topic_id\": 696, \"forum_id\": 33, \"post_subject\": \"Re: Eclipse Plug In\", \"username\": \"gsmith\", \"post_text\": \"ideal -\\n\\n1. Can you ensure you have latest version from eclipse.hpccsystems.com/develop - I am guessing from you comments your using an old version.\\n\\n2. FYI - If you are targeting a HPCC-Platform server then there is no need for VC or c++/linker etc. They are only needed if you want to create a standalone exe from your ECL (which the eclipse plugin does not support currently).\", \"post_time\": \"2013-02-04 20:12:49\" },\n\t{ \"post_id\": 3317, \"topic_id\": 696, \"forum_id\": 33, \"post_subject\": \"Re: Eclipse Plug In\", \"username\": \"ideal\", \"post_text\": \"I solved my issue concerning linkage by setting two variables in windows environment : \\nLIB and LIBPATH that must be uptaded or created in user's environment set with the path to clienttool component files libraries, like this :\\n
\\n\\nNow, I am looking for results somewhere in eclipse (I guess in Result console window), but nothing appears.\\n\\nMy code is :\\nds:=DATASET([{1}],{INTEGER i});\\nOUTPUT(ds);
\\nI click in editor of this code and run my ECL run configuration (validated by test button). I use virtual image 3.10.0-1\\n\\nMy question is now : did I made some mistake somewhere ?\\n\\n\\nps : I forget to tell about installation : I confirm windows Client tool package has to be installed in order to use ECL plugin on windows, even if you use linux for HPCC server.\", \"post_time\": \"2013-02-04 13:11:34\" },\n\t{ \"post_id\": 3316, \"topic_id\": 696, \"forum_id\": 33, \"post_subject\": \"Re: Eclipse Plug In\", \"username\": \"ideal\", \"post_text\": \"There is also something weird : "Client Tool community" package appears in all my not ECL related projects ..\", \"post_time\": \"2013-02-04 12:33:47\" },\n\t{ \"post_id\": 3312, \"topic_id\": 696, \"forum_id\": 33, \"post_subject\": \"Re: Eclipse Plug In\", \"username\": \"ideal\", \"post_text\": \"I did have following problems : \\n\\nError message during ECL compilation with cl.exe and vsvars32 executable. \\n--> need to install Microsoft visual studio 9.0, the free express version.\\n\\nAnd also to set the windows environment variable VS90COMNTOOLS for execution user other than system.\\n\\n\\nNow, I still have some troubles..\\n\\nLINK : fatal error LNK1181: failed to open input file 'eclrtl.lib'\\n(after translation)\\n\\nMy question : how is it possible to make LINK executable to reach library file eclrtl.lib present in componentfiles of ECL installation ?\\n\\nThanks\\nJM.\", \"post_time\": \"2013-02-04 11:05:16\" },\n\t{ \"post_id\": 3308, \"topic_id\": 696, \"forum_id\": 33, \"post_subject\": \"Re: Eclipse Plug In\", \"username\": \"gsmith\", \"post_text\": \"How can I import source files ?
\\nAssuming you were using a local repository (not a legacy remote repository). Then the easiest way to "import" your existing ECL is to:\\n1. Create a new ECL Project.\\n2. Create a new ECL File in that project.\\n3. Locate that file on your hard drive.\\n4. Copy your existing files to the same location.\\n5. In eclipse select "Refresh" for you project\\n\\nIf you were using a remote legacy repository:\\n1. Look in the ECLIDE install folder and you will find another application called AMT (Attribute Migration Tool).\\n2. Start AMT and select your legacy repository in the "Source" and pick a local folder as the "Target"\\n3. Export the ECL you want.\\n4. Once the files are local follow the steps above.\\n\\nHow can I configure remote cluster ?
\\nCreate a new "Run Configuration" and you can set the remote cluster information there.\\n\\nHow can I check and submit code ?
\\nSyntax check happens automatically every time you save a file.\\nYou submit ECL by using the "Run Configuration" above (the big "play" button).\\n\\nWhere is ECL eclipse plugin user manual ?
\\nWe are hoping to have some documentation ready at the same time we release the official beta version.\", \"post_time\": \"2013-02-04 07:16:50\" },\n\t{ \"post_id\": 3307, \"topic_id\": 696, \"forum_id\": 33, \"post_subject\": \"Re: Eclipse Plug In\", \"username\": \"ideal\", \"post_text\": \"Hello,\\n\\nI'd like to import source folder I used under ECLIDE, on windows, in ECLIPSE workspace, to gain all eclipse benefits, using ECL. \\nWhen I do import after having created ECL project, I get a window I don't understand. \\nThen I try to build a new file to test ECL plugin but I don't know how to configure servers and submit ECL code.\\n\\nThen my questions : \\n\\n
\\n\\nThanks,\\nJM.\", \"post_time\": \"2013-02-04 00:26:02\" },\n\t{ \"post_id\": 3197, \"topic_id\": 696, \"forum_id\": 33, \"post_subject\": \"Re: Eclipse Plug In\", \"username\": \"buptkang\", \"post_text\": \"Thanks for the help.\", \"post_time\": \"2013-01-26 23:17:06\" },\n\t{ \"post_id\": 3160, \"topic_id\": 696, \"forum_id\": 33, \"post_subject\": \"Re: Eclipse Plug In\", \"username\": \"flavio\", \"post_text\": \"Bo,\\n\\nThe easy way is to use the software installer from within the Eclipse menu. Just go to Help -> Install New Software, click Add and type the following source: eclipse.hpccsystems.com/develop, and click Next.\\n\\nYou may want to restart Eclipse to ensure that the installation went through properly.\\n\\nIf you want a more graphical tutorial, you can find one here: agile.csc.ncsu.edu/SEMaterials/tutorials/install_plugin/index_v35.html\\n\\nYou should probably double check that Eclipse -> Windows -> Preferences -> ECL -> Compiler is now populated with the correct path.\\n\\nPlease keep in mind that you need to download and install the HPCC Client Tools package for your Operating System and version (32 or 64 bits) for the Eclipse ECL plugin to work. You can find this package in the downloads section of our HPCC Systems portal.\\n\\nPlease let me know how it goes and if you have any trouble.\\n\\nThanks,\\n\\nFlavio\", \"post_time\": \"2013-01-23 19:43:03\" },\n\t{ \"post_id\": 3158, \"topic_id\": 696, \"forum_id\": 33, \"post_subject\": \"Eclipse Plug In\", \"username\": \"buptkang\", \"post_text\": \"Hello there, \\n\\nI am newbie toward ECL and its ecosystem, now I am wondering how could I integrate the the eclipse Plugin into my eclipse IDE in Windows environment. \\n\\nI have tried to put the github source code into the subfolder of eclipse plugins subfolder, but I do not see any changes or new items when I tried to create or import project in eclipse. \\n\\nAny comments or tips will be appreciated. \\n\\nThanks for help.\\n\\nBo\", \"post_time\": \"2013-01-23 18:39:13\" },\n\t{ \"post_id\": 3665, \"topic_id\": 760, \"forum_id\": 33, \"post_subject\": \"Re: Shortcuts in Eclipse plugin\", \"username\": \"gsmith\", \"post_text\": \"But it does work the same way the comment/uncomment works in the java editor (so is more "eclipse" like).\\n\\nIn general I am trying to do things "the eclipse" way as appropriate...\\n\\nHaving said that it would be easy to add an "Invert Comment (Ctrl+Q)" option now (using https://github.com/hpcc-systems/Eclipse ... l/94/files as a template).\", \"post_time\": \"2013-03-08 09:33:45\" },\n\t{ \"post_id\": 3657, \"topic_id\": 760, \"forum_id\": 33, \"post_subject\": \"Re: Shortcuts in Eclipse plugin\", \"username\": \"ideal\", \"post_text\": \"I have just tested updated plugin. Comment toggle is ok but not as good as in ECLIDE. \\nIn ECLIDE, when you have uncommented and commented lines then, toggle command uncomment commented lines and comment uncommented lines. With ECL plugin, in is not the case any more.\\n\\nJM.\", \"post_time\": \"2013-03-07 10:00:20\" },\n\t{ \"post_id\": 3552, \"topic_id\": 760, \"forum_id\": 33, \"post_subject\": \"Re: Shortcuts in Eclipse plugin\", \"username\": \"gsmith\", \"post_text\": \"https://github.com/hpcc-systems/EclipsePlugin/pull/94\", \"post_time\": \"2013-02-26 13:26:07\" },\n\t{ \"post_id\": 3473, \"topic_id\": 760, \"forum_id\": 33, \"post_subject\": \"Re: Shortcuts in Eclipse plugin\", \"username\": \"ideal\", \"post_text\": \"Another limitations is : to visualize results, it has to be done in ECL editor view. \\nIf not, following error message appears : \\nAn error has occurred. See error log for more details.\\norg.eclipse.jdt.internal.ui.javaeditor.CompilationUnitEditor cannot be cast to org.hpccsystems.eclide.editors.ECLWindow
\\n\\nJM\", \"post_time\": \"2013-02-16 14:55:04\" },\n\t{ \"post_id\": 3470, \"topic_id\": 760, \"forum_id\": 33, \"post_subject\": \"Re: Shortcuts in Eclipse plugin\", \"username\": \"ideal\", \"post_text\": \"There is some simple workaround. \\n\\nJust open ECL files in Java editor and then you have native CTRL-Shift-C or CTRL-/ to toggle comments. \\nYou can even customize eclipse to have CTRL-q bound to this command.\\nGo to windows/preferences/General/keys\\nFind "toggle Comment"\\nCopy the one with "Editing Java Source" in When column\\nBind it to CTRL-q by typing it in proper field\\n\\nLimitations : when not all lines in the selection are commented (or uncommented), does not toggle like in ECLIDE; in this case, it converts all lines to comments rather than toggling them.\\n\\nJM.\", \"post_time\": \"2013-02-16 13:34:53\" },\n\t{ \"post_id\": 3439, \"topic_id\": 760, \"forum_id\": 33, \"post_subject\": \"Re: Shortcuts in Eclipse plugin\", \"username\": \"ideal\", \"post_text\": \"Ok, thanks,\\n\\nAs I need it, I will see if I can do something about it. \\n\\nJM.\", \"post_time\": \"2013-02-13 11:10:26\" },\n\t{ \"post_id\": 3438, \"topic_id\": 760, \"forum_id\": 33, \"post_subject\": \"Re: Shortcuts in Eclipse plugin\", \"username\": \"gsmith\", \"post_text\": \"Quick answer:\\nSpecifically no.\\nGenerally yes.\\n\\nTo elaborate:\\nIt is our intention to implement all requested usability features as is practical (and this feature definitely falls into that category). \\n\\nThese requests will be manged with a combination of our online issue tracking system https://track.hpccsystems.com/browse/EPE and github https://github.com/hpcc-systems/EclipsePlugin. So unless an Issue/Feature is specifically listed it is not on the "todo" list.\\n\\nI will enter this specific request to get the ball rolling: https://track.hpccsystems.com/browse/EPE-1\\n\\n/Gordon.\", \"post_time\": \"2013-02-13 10:55:33\" },\n\t{ \"post_id\": 3437, \"topic_id\": 760, \"forum_id\": 33, \"post_subject\": \"Shortcuts in Eclipse plugin\", \"username\": \"ideal\", \"post_text\": \"Hello,\\n\\nShortcut CTRL-Q is very useful in ECLIDE to transform lines in comments. \\nIs it planned to include it in ECL plugin ? \\n\\nJM.\", \"post_time\": \"2013-02-13 10:00:52\" },\n\t{ \"post_id\": 3640, \"topic_id\": 813, \"forum_id\": 33, \"post_subject\": \"Re: ECL Watch does not refresh automatically\", \"username\": \"gsmith\", \"post_text\": \"https://track.hpccsystems.com/browse/EPE-14\", \"post_time\": \"2013-03-06 08:56:45\" },\n\t{ \"post_id\": 3639, \"topic_id\": 813, \"forum_id\": 33, \"post_subject\": \"ECL Watch does not refresh automatically\", \"username\": \"ideal\", \"post_text\": \"Hello,\\n\\nEach time I am modifying run configuration with a new public ip, I must restart Eclipse to be abble to see the new entry in "Servers" view. It does not refresh automatically.\\n\\nJM.\", \"post_time\": \"2013-03-06 08:51:58\" },\n\t{ \"post_id\": 3721, \"topic_id\": 834, \"forum_id\": 33, \"post_subject\": \"Eclipse IDE for HPCC Systems\", \"username\": \"HPCC Staff\", \"post_text\": \"A beta version of an ECL plugin for Eclipse is now available!\\n\\nMore information here:\\nhttp://hpccsystems.com/products-and-ser ... clipse-ide\", \"post_time\": \"2013-03-12 20:56:42\" },\n\t{ \"post_id\": 26953, \"topic_id\": 880, \"forum_id\": 33, \"post_subject\": \"Re: Machine Learning and Graphics\", \"username\": \"aabha31\", \"post_text\": \"Excellent thread..!!!\\n\\nvfx companies in India\", \"post_time\": \"2019-07-23 12:11:47\" },\n\t{ \"post_id\": 3941, \"topic_id\": 880, \"forum_id\": 33, \"post_subject\": \"Re: Machine Learning and Graphics\", \"username\": \"gsmith\", \"post_text\": \"I have opened an issue for this.\", \"post_time\": \"2013-04-16 17:30:55\" },\n\t{ \"post_id\": 3938, \"topic_id\": 880, \"forum_id\": 33, \"post_subject\": \"Re: Machine Learning and Graphics\", \"username\": \"greg.whitaker@lexisnexis.com\", \"post_text\": \"Machine Learning code is running now.\\n\\nMy VL ecl demo is getting error : \\n\\nDescription\\tResource\\tPath\\tLocation\\tType\\nUNIMPLEMENTED feature at /Users/hpccbuild/jenkins/workspace/CE-Candidate-3.10.6-clienttools/CE/mac_osx_x86_64/HPCC-Platform/system/jlib/jutil.cpp(87)\\tvisualization_demo.ecl\\t/visualization_demo/demo\\tline 1\\tECL Problem\", \"post_time\": \"2013-04-16 15:30:34\" },\n\t{ \"post_id\": 3936, \"topic_id\": 880, \"forum_id\": 33, \"post_subject\": \"Re: Machine Learning and Graphics\", \"username\": \"greg.whitaker@lexisnexis.com\", \"post_text\": \"Added VL and ML as folders to my project and that appears to work as far as getting rid of the syntax check that the files weren't found.\\n\\nnot quite working yet but thats an issue with the thor deployment I'm setting up.\", \"post_time\": \"2013-04-16 15:01:29\" },\n\t{ \"post_id\": 3929, \"topic_id\": 880, \"forum_id\": 33, \"post_subject\": \"Re: Machine Learning and Graphics\", \"username\": \"greg.whitaker@lexisnexis.com\", \"post_text\": \"What about the ML and VL to import, how/where do I add them so they are found?\\n\\nbtw, I never used eclipse before so I'm a newbie.\", \"post_time\": \"2013-04-15 22:18:06\" },\n\t{ \"post_id\": 3927, \"topic_id\": 880, \"forum_id\": 33, \"post_subject\": \"Re: Machine Learning and Graphics\", \"username\": \"gsmith\", \"post_text\": \"If (relative to the ECL file you are submitting) there is a folder/file called "files/manifest.xml" then the command line argument manifest=XXX is automatically added when compiling the ECL.\\n\\nOtherwise you can simply add "manifest=XXX" where XXX is the path to the manifest file to arguments list on the the Compiler Tab in the Run Configuration.\\n\\nGordon.\", \"post_time\": \"2013-04-15 14:37:18\" },\n\t{ \"post_id\": 3926, \"topic_id\": 880, \"forum_id\": 33, \"post_subject\": \"Machine Learning and Graphics\", \"username\": \"greg.whitaker@lexisnexis.com\", \"post_text\": \"What I've done so far:\\n1) I got ml and graphics working using Query Builder running on windows.\\nThe graphic needed the additional Arguments for the manifest file.\\n2) I got ecl code to run using Eclipse runing on a MAC.\\n\\nQuestion: Where are the documents/downloads for the ML lib and Graphic Lib for Eclipse runing on a MAC?\", \"post_time\": \"2013-04-15 14:21:11\" },\n\t{ \"post_id\": 4264, \"topic_id\": 952, \"forum_id\": 33, \"post_subject\": \"Podcast: Using the Eclipse IDE\", \"username\": \"HPCC Staff\", \"post_text\": \"A recent podcast featuring Gordon Smith, Sr Architect from LexisNexis and the developer of the Eclipse IDE, explains how the Eclipse plugin can be used to create and execute queries into your data on an HPCC Systems platform using ECL. \\n\\nListen in: http://cdn.hpccsystems.com/podcasts/201 ... se_IDE.mp3\", \"post_time\": \"2013-06-27 20:13:07\" },\n\t{ \"post_id\": 6227, \"topic_id\": 1036, \"forum_id\": 33, \"post_subject\": \"Re: Installed Eclipse plug-in; but no window preferences ...\", \"username\": \"JimD\", \"post_text\": \"Thanks for pointing this out! \\n\\nI have opened an issue in our tracking system and we will fix this as soon as possible. \\n\\nhttps://track.hpccsystems.com/browse/EPE-125\", \"post_time\": \"2014-08-15 15:56:43\" },\n\t{ \"post_id\": 6226, \"topic_id\": 1036, \"forum_id\": 33, \"post_subject\": \"Re: Installed Eclipse plug-in; but no window preferences ...\", \"username\": \"Steve.Steele\", \"post_text\": \"The documentation still refers to configuring via Windows->Preferences, so that should probably be updated too.\", \"post_time\": \"2014-08-15 15:43:28\" },\n\t{ \"post_id\": 4630, \"topic_id\": 1036, \"forum_id\": 33, \"post_subject\": \"Re: Installed Eclipse plug-in; but no window preferences ...\", \"username\": \"gsmith\", \"post_text\": \"I have opened a "no client tools located issue": https://track.hpccsystems.com/browse/EPE-70\\n\\nI suspect your issue isn't the auto find not locating anything (you can simply check this by expanding an ECL Project). But rather that clicking on the Run toolbar item, won't actually run anything until you have:\\n1. Created a Launch Configuration.\\n2. Actually Submitted ECL specifically to a Launch Configuration (this can be done by clicking on the "Run" drop down and select a Launch Configuration via the "Run As" item).\\n\\nFurther: There may be an issue with the plugin remembering the most recent launch configuration (I shall investigate further).\", \"post_time\": \"2013-09-23 14:31:02\" },\n\t{ \"post_id\": 4629, \"topic_id\": 1036, \"forum_id\": 33, \"post_subject\": \"Re: Installed Eclipse plug-in; but no window preferences ...\", \"username\": \"dabayliss\", \"post_text\": \"So given that when I do a run I get a null pointer exception; I'm guessing the bug should be: "autofind didn't"\\n\\nIf you are not able to reproduce this then let me know which details of my system you need.\", \"post_time\": \"2013-09-23 14:15:01\" },\n\t{ \"post_id\": 4628, \"topic_id\": 1036, \"forum_id\": 33, \"post_subject\": \"Re: Installed Eclipse plug-in; but no window preferences ...\", \"username\": \"gsmith\", \"post_text\": \"In the latest eclipse plugin there is no Window/Preferecnces/ECL options. It got removed when I added the "auto find" eclcc feature (since there is no need to nominate a default eclcc)...\", \"post_time\": \"2013-09-23 13:27:56\" },\n\t{ \"post_id\": 4620, \"topic_id\": 1036, \"forum_id\": 33, \"post_subject\": \"Re: Installed Eclipse plug-in; but no window preferences ...\", \"username\": \"rajunagarajan\", \"post_text\": \"Same error, can't see the preferences.\\n\\nAfter reviewing the plugins error log found this. Not sure if this is the reason or not..\\n\\njava.io.IOException: Unable to resolve plug-in "platform:/plugin/org.hpccsystems.eclide/icons/releng_gears.gif".\\n\\tat org.eclipse.core.internal.runtime.PlatformURLPluginConnection.parse(PlatformURLPluginConnection.java:64)\\n\\tat org.eclipse.core.internal.runtime.PlatformURLPluginConnection.resolve(PlatformURLPluginConnection.java:75)\\n\\tat org.eclipse.core.internal.boot.PlatformURLHandler.openConnection(PlatformURLHandler.java:67)\\n\\tat org.eclipse.osgi.framework.internal.protocol.URLStreamHandlerProxy.openConnection(URLStreamHandlerProxy.java:112)\\n\\tat java.net.URL.openConnection(Unknown Source)\\n\\tat org.eclipse.core.internal.runtime.PlatformURLConverter.toFileURL(PlatformURLConverter.java:33)\\n\\tat org.eclipse.core.runtime.FileLocator.toFileURL(FileLocator.java:206)\\n\\tat org.eclipse.jface.resource.URLImageDescriptor.getFilePath(URLImageDescriptor.java:137)\\n\\tat org.eclipse.jface.resource.URLImageDescriptor.createImage(URLImageDescriptor.java:157)\\n\\tat org.eclipse.jface.resource.ImageDescriptor.createImage(ImageDescriptor.java:227)\\n\\tat org.eclipse.jface.resource.ImageDescriptor.createImage(ImageDescriptor.java:205)\\n\\tat org.eclipse.e4.ui.workbench.renderers.swt.SWTPartRenderer.getImage(SWTPartRenderer.java:193)\\n\\tat org.eclipse.e4.ui.workbench.renderers.swt.StackRenderer.createTab(StackRenderer.java:680)\\n\\tat org.eclipse.e4.ui.workbench.renderers.swt.LazyStackRenderer.processContents(LazyStackRenderer.java:133)\", \"post_time\": \"2013-09-20 17:47:38\" },\n\t{ \"post_id\": 4619, \"topic_id\": 1036, \"forum_id\": 33, \"post_subject\": \"Re: Installed Eclipse plug-in; but no window preferences ...\", \"username\": \"dabayliss\", \"post_text\": \"Yes\", \"post_time\": \"2013-09-20 16:51:41\" },\n\t{ \"post_id\": 4618, \"topic_id\": 1036, \"forum_id\": 33, \"post_subject\": \"Re: Installed Eclipse plug-in; but no window preferences ...\", \"username\": \"JimD\", \"post_text\": \"Is your ECL Development Perspective open? \\n\\nWindow > Open Perspective > Other... Then select ECL Development\", \"post_time\": \"2013-09-20 16:28:29\" },\n\t{ \"post_id\": 4617, \"topic_id\": 1036, \"forum_id\": 33, \"post_subject\": \"Re: Installed Eclipse plug-in; but no window preferences ...\", \"username\": \"dabayliss\", \"post_text\": \"Done; same problem persists\", \"post_time\": \"2013-09-20 16:10:45\" },\n\t{ \"post_id\": 4616, \"topic_id\": 1036, \"forum_id\": 33, \"post_subject\": \"Re: Installed Eclipse plug-in; but no window preferences ...\", \"username\": \"JimD\", \"post_text\": \"Please try Window -> Reset Perspective...\", \"post_time\": \"2013-09-20 15:59:01\" },\n\t{ \"post_id\": 4608, \"topic_id\": 1036, \"forum_id\": 33, \"post_subject\": \"Installed Eclipse plug-in; but no window preferences ...\", \"username\": \"dabayliss\", \"post_text\": \"Just followed the docs to install the eclipse plug-in. (I have a working 4.0 ECL-IDE already). It went through and found the .jar file and claimed to install it. Said it needed to reboot eclipse (which it did). But when Eclipse lumbered into life there was not an ECL option under window preferences.\\n\\nNow it DOES allow me to create a new ECL Project; but even in there I do not seem to have access to the Windows Preferences ....\\n\\nI followed the rest of the docs; most of the options are still there but when I get to the 'run' part I get an 'internal error- null pointer exception'\\n\\nHow do I set about finding out how to fix this?\\n\\nWindows 7\\nEclipse Platform\\n\\nVersion: 4.2.1.v20130118-173121-9MF7GHYdG0B5kx4E_SkfZV-1mNjVATf67ZAb7\\nBuild id: M20130204-1200\\n\\nBTW: ECL does not appear on the 'about' tab for eclipse\", \"post_time\": \"2013-09-18 18:40:52\" },\n\t{ \"post_id\": 4652, \"topic_id\": 1046, \"forum_id\": 33, \"post_subject\": \"Re: Problems with ECL Plug-in for Eclipse on Mac OS X\", \"username\": \"rhimbo\", \"post_text\": \"I created a new workspace and it fixed the problem with the errors on the ECL Development and ECL Watch perspectives. \\n\\nNow, back to the original question.... Where are the compiler settings for ECL? I don't see any \\n"Window -> Preferences" menu. \\n\\nUnder "Eclipse -> Preferences" I don't see any "ECL" menu as shown in the installation manual. Attached is a capture of my Preferences window. \\n\\nOf course, under "Java" there is a compiler menu, but this isn't the right one... \\n\\nThanks....\", \"post_time\": \"2013-09-26 17:08:29\" },\n\t{ \"post_id\": 4646, \"topic_id\": 1046, \"forum_id\": 33, \"post_subject\": \"Re: Problems with ECL Plug-in for Eclipse on Mac OS X\", \"username\": \"gsmith\", \"post_text\": \"Can you try the following things:\\n1. Double check you have the latest ECL Language plugin (Help/Check for Updates)\\n2. On the "ECL Developement" and "ECL Watch" perspectives (top right) right click and select "Reset" to reset them to default layouts.\\n3. If the above does not help can you try creating a new clean workspace (File/Switch Workspac/Other... and type in my new workspace name).\\n\\nIf after all that you are still getting this issue, can you click on the "Details" button and post the contents here for examination.\", \"post_time\": \"2013-09-26 08:19:12\" },\n\t{ \"post_id\": 4644, \"topic_id\": 1046, \"forum_id\": 33, \"post_subject\": \"Problems with ECL Plug-in for Eclipse on Mac OS X\", \"username\": \"rhimbo\", \"post_text\": \"Hi,\\n\\nMac OS X 10.8.5, Mountain Lion\\nEclipse Version: Kepler Release\\nBuild id: 20130614-0229\\n\\nInstalled Eclipse and then the ECL Plug-in from http://eclipse.hpccsystems.com/stable \\n\\nI'm following the instructions in the document "ECL_Plugin_for_EclipseIDE-4.0.0-1.pdf"\\n\\nOn page 9 there are instructions for checking the ECL Compiler Settings. The screen shot is quite a bit different than the menu on my Eclipse version. I've attached a screen shot of my preferences menu. Where are the ECL compiler settings now?\\n\\nAlso, I tried opening both the ECL Watch and ECL Development perspectives. I received errors for both. Screen shots are attached. \\n\\nAny ideas?\\n\\nThanks in advance....\", \"post_time\": \"2013-09-25 21:47:10\" },\n\t{ \"post_id\": 4682, \"topic_id\": 1049, \"forum_id\": 33, \"post_subject\": \"Re: How to set Eclipse compiler preferences...?\", \"username\": \"gsmith\", \"post_text\": \"Thanks - there is an open issue on this: https://track.hpccsystems.com/browse/EPE-76\", \"post_time\": \"2013-09-30 14:43:28\" },\n\t{ \"post_id\": 4680, \"topic_id\": 1049, \"forum_id\": 33, \"post_subject\": \"Re: How to set Eclipse compiler preferences...?\", \"username\": \"jeroenbaas\", \"post_text\": \"I entered here using a similar problem on locating the Eclipse compiler preferences in Windows. Just a headsup that the documentation still mentions this on page 9 today.\", \"post_time\": \"2013-09-30 14:24:23\" },\n\t{ \"post_id\": 4665, \"topic_id\": 1049, \"forum_id\": 33, \"post_subject\": \"Re: How to set Eclipse compiler preferences...?\", \"username\": \"gsmith\", \"post_text\": \"You are mostly correct. In general, when you save (or press F7) eclipse will "syntax check" your ECL file, this is not quite the same as a full compile as it doesn't create the final binary output.\\n\\nFor your scenario I would suggest creating the Run Configuration for your VM, but then marking it as disabled (I suspect doing this will fix your g++ related error).\\n\\nSecondly, you have hit a known (fixed, but not released) issue: https://github.com/hpcc-systems/EclipsePlugin/pull/125 where the plugin is using a Java 1.7 feature (the Character.isAlphabetic call).\\n\\nI will revisit the issue and see when we can get a release for it out.\", \"post_time\": \"2013-09-29 07:11:27\" },\n\t{ \"post_id\": 4663, \"topic_id\": 1049, \"forum_id\": 33, \"post_subject\": \"Re: How to set Eclipse compiler preferences...?\", \"username\": \"rhimbo\", \"post_text\": \"To simply compile some ECL code, I don't need to set up the "server" (my VM running the HPCC VM), correct?\\n\\nSo, upon saving an ECL file, the compiler should compile the source. If Eclipse knows to associate the compilation with the ECL compiler (in /opt/HPCCSystems/...) then a compilation should take place successfully, right. I only need a Run configuration to run the code, in which case I need to set up my VM and have it running.\\n\\nI created a simple project, hello_world. I typed one line of ECL source in it and saved the file. The attached window captures show the errors I'm getting in a) a dialog, and, b) my console output window upon saving the ECL source file. \\n\\nAm I missing something?\", \"post_time\": \"2013-09-29 00:03:33\" },\n\t{ \"post_id\": 4661, \"topic_id\": 1049, \"forum_id\": 33, \"post_subject\": \"Re: How to set Eclipse compiler preferences...?\", \"username\": \"gsmith\", \"post_text\": \"In the latest version of the plugin, it will automatically locate and use the best matched compiler for your server. This removed the need for the global compiler settings that the documentation refers to.\\n\\nYou can still override the auto settings on the Run Configurations page on a per configuration basis.\\n\\nSo if you haven't done it yet, all you should need to do is:\\n1. Install the appropriate client tools for your server (from hpccsystems.com)\\n2. Create a run configuration for your server: Run/Run Configurations.\\n\\nAnd you should be good to go.\", \"post_time\": \"2013-09-28 06:12:48\" },\n\t{ \"post_id\": 4660, \"topic_id\": 1049, \"forum_id\": 33, \"post_subject\": \"How to set Eclipse compiler preferences...?\", \"username\": \"rhimbo\", \"post_text\": \"Mac OS X 10.8.5, Mountain Lion\\nEclipse Version: Kepler Release\\nBuild id: 20130614-0229\\n\\nHow does one set compiler preferences for ECL development? I see nothing in the menus. Has anyone successfully configured the compiler on Mac? I've attached some screen shots which should help describe my environment.\\n\\nI successfully installed the client tools in /opt/HPCCSystems. \\n\\nI saw a post at the URL below for a similar problem related to Windows platform installations. \\nviewtopic.php?f=33&t=1036\\n\\n\\nThanks....\", \"post_time\": \"2013-09-28 03:19:46\" },\n\t{ \"post_id\": 4705, \"topic_id\": 1060, \"forum_id\": 33, \"post_subject\": \"Re: Error: "org.eclipse.ui.editors.text.TextEditor cannot be\", \"username\": \"gsmith\", \"post_text\": \"After the rename you will need to close the file and reopen it (so it opens in the ECL Editor and not the generic editor. \\n\\nThe ECL moniker may not have made it into a public release yet.\", \"post_time\": \"2013-10-01 10:24:05\" },\n\t{ \"post_id\": 4704, \"topic_id\": 1060, \"forum_id\": 33, \"post_subject\": \"Re: Error: "org.eclipse.ui.editors.text.TextEditor cannot be\", \"username\": \"jeroenbaas\", \"post_text\": \"I've tried creating a new ECL Project and converting to ECL Project options, none of them make it so they look like that;\\n\\nas for the file issue: I think I was able to reproduce by creating a new file without extension, then rename the file to .ecl; the editor doesn't change if the file is still open.\", \"post_time\": \"2013-10-01 10:17:45\" },\n\t{ \"post_id\": 4703, \"topic_id\": 1060, \"forum_id\": 33, \"post_subject\": \"Re: Error: "org.eclipse.ui.editors.text.TextEditor cannot be\", \"username\": \"gsmith\", \"post_text\": \"Like this:\", \"post_time\": \"2013-10-01 10:10:50\" },\n\t{ \"post_id\": 4701, \"topic_id\": 1060, \"forum_id\": 33, \"post_subject\": \"Re: Error: "org.eclipse.ui.editors.text.TextEditor cannot be\", \"username\": \"jeroenbaas\", \"post_text\": \"Not sure what you mean by that:\\n\", \"post_time\": \"2013-10-01 10:07:49\" },\n\t{ \"post_id\": 4700, \"topic_id\": 1060, \"forum_id\": 33, \"post_subject\": \"Re: Error: "org.eclipse.ui.editors.text.TextEditor cannot be\", \"username\": \"gsmith\", \"post_text\": \"Does your ECL project have an "ECL" moniker in the icon? \\nIf not right click and select "Convert to ECL Project".\", \"post_time\": \"2013-10-01 10:01:35\" },\n\t{ \"post_id\": 4699, \"topic_id\": 1060, \"forum_id\": 33, \"post_subject\": \"Re: Error: "org.eclipse.ui.editors.text.TextEditor cannot be\", \"username\": \"jeroenbaas\", \"post_text\": \"All my ECL files have the extension .ecl;\\n\\nI'm a little puzzled. Eclipse was giving me this error continuously last night and this morning. I then switched perspective to PHP (also develop PHP with Eclipse) and now back again to ECL Development and I no longer have this error message.\", \"post_time\": \"2013-10-01 09:57:02\" },\n\t{ \"post_id\": 4697, \"topic_id\": 1060, \"forum_id\": 33, \"post_subject\": \"Re: Error: "org.eclipse.ui.editors.text.TextEditor cannot be\", \"username\": \"gsmith\", \"post_text\": \"I have opened an issue for this: https://track.hpccsystems.com/browse/EPE-79\\n\\nI suspect this is happening because the ECL is getting opened in the generic editor and not the ECL Editor. \\n\\nThis is probably happening because your ecl file is missing the ".ecl" file extension?\", \"post_time\": \"2013-10-01 09:25:00\" },\n\t{ \"post_id\": 4693, \"topic_id\": 1060, \"forum_id\": 33, \"post_subject\": \"Error: "org.eclipse.ui.editors.text.TextEditor cannot be cas\", \"username\": \"jeroenbaas\", \"post_text\": \"Whenever I try to run something from the Eclipse IDE, and click on a part of the result in the Workunits section, I see this error:\\n\\nAn error has occurred. See error log for more details.\\norg.eclipse.ui.editors.text.TextEditor cannot be cast to org.hpccsystems.eclide.editors.ECLWindow\", \"post_time\": \"2013-10-01 06:27:21\" },\n\t{ \"post_id\": 4711, \"topic_id\": 1061, \"forum_id\": 33, \"post_subject\": \"Re: OUTPUT fails on "Dataset too large to output to workunit\", \"username\": \"bforeman\", \"post_text\": \"Hi Jeroen,\\n\\nAn ECL file cannot have an exported definition and action (like OUTPUT) in the same file. In your code above, the EXPORTed dataset should be interpreted as an implicit action by the compiler, so no OUTPUT is really needed. If you want to use OUTPUT, run it in a separate builder window. For example:\\n\\n
IMPORT MyFolder;\\nOUTPUT(MyFolder.Persons);
\\n\\nThe best practice for #OPTION is to place it at the top of the ECL file.\\n\\nHope this helps,\\n\\nBob\", \"post_time\": \"2013-10-01 12:16:52\" },\n\t{ \"post_id\": 4698, \"topic_id\": 1061, \"forum_id\": 33, \"post_subject\": \"Re: OUTPUT fails on "Dataset too large to output to workunit\", \"username\": \"jeroenbaas\", \"post_text\": \"Thanks; Where am I supposed to put this line? I assume it is a string for the first parameter, right? so like this:\\n#OPTION('outputlimit',100);
\\n\\nIf I add this to my Persons.ecl file (anywhere before the OUTPUT), I get an error on the last line: \\nWHEN must be used to associate an action with a definition\", \"post_time\": \"2013-10-01 09:53:35\" },\n\t{ \"post_id\": 4696, \"topic_id\": 1061, \"forum_id\": 33, \"post_subject\": \"Re: OUTPUT fails on "Dataset too large to output to workunit\", \"username\": \"gsmith\", \"post_text\": \"I have opened an issue for this: https://track.hpccsystems.com/browse/EPE-78\\n\\nBackground:\\nWhen you submit a WU from the ECL IDE it forces a limit on the number of rows to output for all outputs which do not explicitly output to disk (this can be overridden by pressing "More").\\n\\nThe same is true for the eclipse plugin except it is currently defaulting to 0 (which means all).\\n\\nWorkaround: \\nadd #OPTION(outputlimit,<new value>) to your ECL.\", \"post_time\": \"2013-10-01 09:20:20\" },\n\t{ \"post_id\": 4694, \"topic_id\": 1061, \"forum_id\": 33, \"post_subject\": \"OUTPUT fails on "Dataset too large to output to workunit"\", \"username\": \"jeroenbaas\", \"post_text\": \"I tried running this ECL code on the class persons file:\\nLayoutPersons := RECORD\\n\\tINTEGER4\\tID;\\n\\tSTRING15\\tFirstName;\\n\\tSTRING25\\tLastName;\\n\\tSTRING15\\tMiddleName;\\n\\tSTRING2\\tNameSuffix;\\n\\tSTRING8\\tFileDate;\\n\\tUNSIGNED2\\tBureauCode;\\n\\tSTRING1\\tMaritalStatus;\\n\\tSTRING1\\tGender;\\n\\tUNSIGNED1\\tDependentCount;\\n\\tSTRING8\\tBirthDate;\\n\\tSTRING42\\tStreetAddress;\\n\\tSTRING20\\tCity;\\n\\tSTRING2\\tState;\\n\\tSTRING5\\tZipCode;\\t\\nEND;\\nEXPORT Persons := DATASET('~CLASS::AJB::INTRO::PERSONS',LayoutPersons,THOR);
\\n\\nFrom the ECL IDE, this runs fine. From Eclipe, it fails, with error:\\neclagent\\t10099: System error: 10099: Graph[1], workunitwrite[3]: Dataset too large to output to workunit (limit 10) megabytes, in result (sequence=0), Master exception\", \"post_time\": \"2013-10-01 06:29:54\" },\n\t{ \"post_id\": 5054, \"topic_id\": 1144, \"forum_id\": 33, \"post_subject\": \"Re: Fedora: installing\", \"username\": \"GregM\", \"post_text\": \"Great! I have this building on my machine. Makes the ".sh", "tar.gz", and "tar.Z" files, but no .rpm file. Should I be concerned about this?\\n\\nRunning the .sh file this "./hpccsystems-clienttools_community_4.3.0-trunk1Linux-x86_64/opt/HPCCSystems/4.3.0/clienttools/" directory is created. If I cd to it and try to run ./ecl I get this error:\\n\\n./ecl: error while loading shared libraries: libjlib.so: cannot open shared object file: No such file or directory
\\n\\nAm I doing something wrong? Do I need to add something to the "ldconfig"?\", \"post_time\": \"2013-12-16 12:47:41\" },\n\t{ \"post_id\": 5051, \"topic_id\": 1144, \"forum_id\": 33, \"post_subject\": \"Re: Fedora: installing\", \"username\": \"gsmith\", \"post_text\": \"I was able to do a Fedora 19 client tools build without too much difficulty.\\n\\nI updated the "build" wiki to include the prerequisites I needed: https://github.com/hpcc-systems/HPCC-Pl ... lding-HPCC\", \"post_time\": \"2013-12-12 10:25:58\" },\n\t{ \"post_id\": 5050, \"topic_id\": 1144, \"forum_id\": 33, \"post_subject\": \"Re: Fedora: installing\", \"username\": \"GregM\", \"post_text\": \"Fedora 19 on the desktop. Soon to be Fedora 20...\\n\\nI'd be prepared to package the client-tools for the OS from a tarball if there is one.\\n\\nThanks\", \"post_time\": \"2013-12-12 10:06:48\" },\n\t{ \"post_id\": 5046, \"topic_id\": 1144, \"forum_id\": 33, \"post_subject\": \"Re: Fedora: installing\", \"username\": \"gsmith\", \"post_text\": \"Not that I know of. Which version of fedora are you guys working on?\", \"post_time\": \"2013-12-11 22:20:41\" },\n\t{ \"post_id\": 5041, \"topic_id\": 1144, \"forum_id\": 33, \"post_subject\": \"Fedora: installing\", \"username\": \"GregM\", \"post_text\": \"Hi,\\n\\nIs there a build of "hpccsystems-clienttools_community" for fedora 19? The ones for CentOS don't work as they're linked to specific versions of libraries that have been superseded in fedora\\n\\n--> Processing Dependency: libarchive.so.2()(64bit) for package: hpccsystems-clienttools-4.2-community-4.2.01.x86_64\\n--> Processing Dependency: libboost_regex-mt.so.5()(64bit) for package: hpccsystems-clienttools-4.2-community-4.2.01.x86_64\\n--> Processing Dependency: libicudata.so.42()(64bit) for package: hpccsystems-clienttools-4.2-community-4.2.01.x86_64\\n--> Processing Dependency: libicui18n.so.42()(64bit) for package: hpccsystems-clienttools-4.2-community-4.2.01.x86_64\\n--> Processing Dependency: libicuuc.so.42()(64bit) for package: hpccsystems-clienttools-4.2-community-4.2.01.x86_64
\\n\\nThanks!\", \"post_time\": \"2013-12-10 18:03:17\" },\n\t{ \"post_id\": 5207, \"topic_id\": 1195, \"forum_id\": 33, \"post_subject\": \"Re: How do we bring in existing repositories into Eclipse\", \"username\": \"BenJones\", \"post_text\": \"Okay. I got it to work finally! I created a clean project called Legacy, which created a folder Legacy in my workspace. I then copied my existing repository folders into the Legacy folder. I then created a new XXX.ecl in Legacy to be like a builder window in the old ECL IDE. I put the actions I wanted to execute into it, then saved that file and then submitted it. That worked properly!\\n\\nThe thing that is a little different from the old ECL IDE is that there, I could create an unnamed builder window, put ECL actions in it, and then submit it without saving it first and it would work. When working in Eclipse, I have to create some XXX.ecl in the same project, put ECL actions into it, save it, and then submit it. If I don't save it before submitting it, it will give errors such as "Could not resolve attribute XXX".\", \"post_time\": \"2014-02-13 16:32:01\" },\n\t{ \"post_id\": 5199, \"topic_id\": 1195, \"forum_id\": 33, \"post_subject\": \"Re: How do we bring in existing repositories into Eclipse\", \"username\": \"BenJones\", \"post_text\": \"I was pretty much doing as you suggested. I have a single project and I added a linked folder pointing to "My Files", which contains several folders that are properly recognized as top-level modules in ECL IDE. However, they aren't recognized as such in Eclipse.\\n\\nNow, I noticed that under my project, there is a little orange box titled "Community Tools 4.2.0". It looks like an implicit folder. Underneath is another folder. Underneath that is the "Std" folder. Now, I can import "Std" just fine.\\n\\nI tried linking to the folder above "My Files" so that my module folders would be the same number of levels deep underneath the Project as "Std" was but that didn't help either.\\n\\nI also tried overriding the defaults in the Run Configuration Page, using a -I switch to point to "My Files". That didn't help either.\", \"post_time\": \"2014-02-12 14:03:54\" },\n\t{ \"post_id\": 5196, \"topic_id\": 1195, \"forum_id\": 33, \"post_subject\": \"Re: How do we bring in existing repositories into Eclipse\", \"username\": \"gsmith\", \"post_text\": \"There are many different ways to do what you are trying to do (and that is probably part of the problem).\\n\\nThe simplest way is to create a new ECL Project and call it "legacy" or "Existing" or such like. Then locate that folder on your hard drive (it will be in your workspace folder), next copy your ECL files/folders into that one and "refresh" the project in eclipse.\\n\\nAlternatively the link option should also work (but you may need a project to put the link in first), I use the import existing wizard when doing it this way.\\n\\nAlso the next way is to get the sources direct from git or svn etc. but that implies they are there already etc.\\n\\nNote: One other main difference (that may be tripping you up) is that you can have several Eclipse "Projects" and by default they will not be able to "see" each other. If you want one to use the contents of an other project, you need to right click the project and select Preferences and then add the other projects to the "references" list. (IOW You have to tell eclipse which projects reference other projects).\\n\\nHTH\", \"post_time\": \"2014-02-12 09:29:45\" },\n\t{ \"post_id\": 5193, \"topic_id\": 1195, \"forum_id\": 33, \"post_subject\": \"How do we bring in existing repositories into Eclipse\", \"username\": \"BenJones\", \"post_text\": \"I tried out the ECL plugin for Eclipse (http://eclipse.hpccsystems.com/stable) and tried the HelloWorld example. That worked just fine. However, it is not at all clear how I bring in an existing repository of code that I'd been working with in ECL IDE. In the ECL IDE, I would go to the Compiler tab in preferences and add my existing Repository folder (i.e. My Files) to the list of ECL Folders and then all existing module folders would magically appear in the Repository window in ECL IDE.\\n\\nIn Eclipse, it looks like we could create an overall project and then add a Linked Folder into that project to reference "ecllibraries" and "My Files" but it didn't seem to behave correctly as far as being able to "import xxx;" in ECL where "xxx" is the name of a folder found in "My Files".\\n\\nI also tried creating a project with the name of a folder "xxx", which would create the xxx folder relative to my "workspace" folder and then copy in the .ecl files from my older repository folder xxx but then when I created another project yyy and tried to create a .ecl file that did "import xxx;", it didn't like that either.\", \"post_time\": \"2014-02-11 14:26:26\" },\n\t{ \"post_id\": 5213, \"topic_id\": 1198, \"forum_id\": 33, \"post_subject\": \"Re: I'm trying to get the Eclipse plugin offline\", \"username\": \"BenJones\", \"post_text\": \"Send it to benjamin.jones@lnssi.com\", \"post_time\": \"2014-02-14 15:55:56\" },\n\t{ \"post_id\": 5212, \"topic_id\": 1198, \"forum_id\": 33, \"post_subject\": \"Re: I'm trying to get the Eclipse plugin offline\", \"username\": \"gsmith\", \"post_text\": \"Let me know where you would like me to send it to (the file upload limit here is too small)...\", \"post_time\": \"2014-02-14 15:52:25\" },\n\t{ \"post_id\": 5211, \"topic_id\": 1198, \"forum_id\": 33, \"post_subject\": \"Re: I'm trying to get the Eclipse plugin offline\", \"username\": \"BenJones\", \"post_text\": \"I don't seem to be able to access the website you specified:\\n\\nhttp://1drv.ms/1bRCf0U\", \"post_time\": \"2014-02-14 15:25:31\" },\n\t{ \"post_id\": 5210, \"topic_id\": 1198, \"forum_id\": 33, \"post_subject\": \"Re: I'm trying to get the Eclipse plugin offline\", \"username\": \"gsmith\", \"post_text\": \"Apologies I was under the impression you had already grabbed the packages.\\n\\nI have copied the contents of the update site to: http://1drv.ms/1bRCf0U let me know if you need anything else...\", \"post_time\": \"2014-02-14 14:35:14\" },\n\t{ \"post_id\": 5205, \"topic_id\": 1198, \"forum_id\": 33, \"post_subject\": \"Re: I'm trying to get the Eclipse plugin offline\", \"username\": \"BenJones\", \"post_text\": \"We can bring things into the secure environment but JAR, EXE, DLL, etc. but they have to be quarantined and thoroughly scanned first to make sure they don't contain malware. That is why we don't have a direct connection.\\n\\nAnyway, we have other packages which were already brought it. For example, the Subclipse plugin for SVN. I looked at its directory and saw "site.xml", "artifacts.xml", and "features.xml". I tried accessing site.xml on your website and found it but it only contained one reference, to the "features/ECL_Language_4.2.0.101.jar" file. I tried to see if there were the other XML files I saw in in the Subclipse folder but couldn't find them. I don't have direct access to your website of course and don't know what other file names Eclipse might try to access. If you can tell me what other files are contained in your website, I can easily download them.\", \"post_time\": \"2014-02-13 15:29:24\" },\n\t{ \"post_id\": 5204, \"topic_id\": 1198, \"forum_id\": 33, \"post_subject\": \"Re: I'm trying to get the Eclipse plugin offline\", \"username\": \"gsmith\", \"post_text\": \"Thought - \\n\\n1. Take a printout of the contents of your eclipse/plugins folder within the secure zone.\\n2. Setup a similar install outside your secure zone and install the ECL Plugin there.\\n3. Compare the contents of the eclipse/plugins folder with the one in the secure zone.\\n4. Take a copy of the missing packages to the secure zone \\n\\n(easy for me to say as I have no idea of how you get approval for eclipse packages - alternatively you could just take the entire eclipse folder, it amounts to the same thing from a security point of view I would have thought?)\", \"post_time\": \"2014-02-13 14:33:33\" },\n\t{ \"post_id\": 5203, \"topic_id\": 1198, \"forum_id\": 33, \"post_subject\": \"Re: I'm trying to get the Eclipse plugin offline\", \"username\": \"gsmith\", \"post_text\": \"Hmmm - that is a little bit tricky as one of the nice things about the eclipse package system is that it looks after all the dependencies automatically.\\n\\nNow having that said that, I _think_ that the "Axis2 Tools" is the only additional package you will need and you can probably use the same trick to get the correct version as you did for the ECL Plugin.\\n\\nFWIW here is the calculated dependencies for the ECL Plugin:\\n javax.xml.rpc\\n org.apache.axis\\n org.eclipse.core.runtime\\n org.eclipse.ui,\\n org.eclipse.ui.console,\\n org.eclipse.ui.editors,\\n org.eclipse.ui.ide,\\n org.eclipse.ui.navigator,\\n org.eclipse.ui.views,\\n org.eclipse.core.runtime,\\n org.eclipse.help,\\n org.eclipse.jdt.ui,\\n org.eclipse.jface.text,\\n org.eclipse.debug.ui,\\n org.eclipse.core.resources\", \"post_time\": \"2014-02-13 14:25:27\" },\n\t{ \"post_id\": 5201, \"topic_id\": 1198, \"forum_id\": 33, \"post_subject\": \"I'm trying to get the Eclipse plugin offline\", \"username\": \"BenJones\", \"post_text\": \"The installation instructions for the Eclipse plugin to be installed in Eclipse from the website:\\n\\nhttp://eclipse.hpccsystems.com/stable
\\n\\nThe problem is that I need to bring this into a secure area so I don't have direct access to the website through the internet. I tried faking it by downloading the file "site.xml" from that website and seeing that it referenced:\\n\\nfeatures/ECL_Language_4.2.0.101.jar
\\n\\nSo I copied the jar file and the "site.xml" file to my computer and tried installing it locally. It almost worked but it bombed saying that it was missing a resource called:\\n\\norg.apache.axis 0.0.0
\\n\\nIs there anyway that I can get all the files I need in a zip file so that I can bring it into my site?\", \"post_time\": \"2014-02-12 21:42:06\" },\n\t{ \"post_id\": 7418, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"greg.whitaker\", \"post_text\": \"Looks like you were right, I updated client tools to 5.2 and removed the directory for the older version and the Project - Clean is using 5.2 eclcc for everything now.\\n\\nThe original issue is still there though, F7, Ctrl-S, and Edit/Check Syntax don't perform the syntax check and nothing shows in Console window.\\nI did notice that the F7 does save the file changes.\", \"post_time\": \"2015-04-20 12:06:15\" },\n\t{ \"post_id\": 7417, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"gsmith\", \"post_text\": \"I am not sure what servers your targeting, but it might be a simple matter of removing your 5.0.4 ClientTools (or updating them to the latest 5.0.x).\", \"post_time\": \"2015-04-20 09:41:10\" },\n\t{ \"post_id\": 7413, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"greg.whitaker\", \"post_text\": \"Also, in my environment, Run - Run Configurations - Run does compile/syntax check correctly.\", \"post_time\": \"2015-04-19 14:13:35\" },\n\t{ \"post_id\": 7412, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"greg.whitaker\", \"post_text\": \"Additional info: This may be a separate issue...not sure.\\n\\nIf I select Project - Clean, it does compile/syntax all my *ecl files in my repository, I can see it in the console, but they are using the previous compiler I had been using 5.0.4. (I just upgraded to 5.2).\\n\\nExample console window: \\n/opt/HPCCSystems/5.0.4/clienttools/bin/eclcc -M -I/Users/gregwhitaker/Documents/workspace/ecl_repository /Users/gregwhitaker/Documents/workspace/ecl_repository/TIM/Misc/tester.ecl\\n/opt/HPCCSystems/5.0.4/clienttools/bin/eclcc -fsyntaxcheck=1 -E -I/Users/gregwhitaker/Documents/workspace/ecl_repository /Users/gregwhitaker/Documents/workspace/ecl_repository/TIM/Misc/tester.ecl\\n\\nI right-clicked on 1 attribute and selected Refresh and saw this in the console:\\n/opt/HPCCSystems/5.2.0/clienttools/bin/eclcc --version\\n\\nSo I right-clicked on the project folder and selected Refresh, then I selected Project - Clean.\\nThe console was showing 5.2.0 for the compile for all my files.\\n\\nI exited Eclipse and brought it back up and selected Project - Clean.\\nThe console went back to using 5.0.4 for the compile for all my files.\\n\\nPS, still isn't syntax checking in any other method: F7, Edit-Check Syntax, Ctrl-S.\", \"post_time\": \"2015-04-19 13:47:42\" },\n\t{ \"post_id\": 7411, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"greg.whitaker\", \"post_text\": \"Oh and I'm on OS X version 10.9.5\\n\\nEclipse IDE for Java Developers\\nVersion: Juno Service Release 2\\nBuild id: 20130225-0426\", \"post_time\": \"2015-04-19 13:04:14\" },\n\t{ \"post_id\": 7410, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"greg.whitaker\", \"post_text\": \"after typing "this" into one of my tester.ecl files the command line seemed to work fine. Then I removed "this" and reported no syntax errors so still worked fine.\\n\\ntim:~ gregwhitaker$ /opt/HPCCSystems/5.2.0/clienttools/bin/eclcc -fsyntaxcheck=1 -I/Users/gregwhitaker/Documents/workspace/ecl_repository/TIM/Misc /Users/gregwhitaker/Documents/workspace/ecl_repository/TIM/Misc/tester.ecl\\n/Users/gregwhitaker/Documents/workspace/ecl_repository/TIM/Misc/tester.ecl(14,1): error C2324: Unknown type 'this'\\n1 error, 0 warning\\ntim:~ gregwhitaker$ /opt/HPCCSystems/5.2.0/clienttools/bin/eclcc -fsyntaxcheck=1 -I/Users/gregwhitaker/Documents/workspace/ecl_repository/TIM/Misc /Users/gregwhitaker/Documents/workspace/ecl_repository/TIM/Misc/tester.ecl\\ntim:~ gregwhitaker$\", \"post_time\": \"2015-04-19 13:02:47\" },\n\t{ \"post_id\": 7327, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"gsmith\", \"post_text\": \"In my case:\\n/opt/HPCCSystems/5.2.0/clienttools/bin/eclcc -fsyntaxcheck=1 -I/Users/gordon/Documents/workspace_ecl/TestPRJ /Users/gordon/Documents/workspace_ecl/TestPRJ/new_file.ecl
\\n\\nProduces:\\n/Users/gordon/Documents/workspace_ecl/TestPRJ/new_file.ecl(6,10): error C3002: syntax error near ";" : expected :=\\n1 error, 0 warning
\", \"post_time\": \"2015-04-13 09:44:02\" },\n\t{ \"post_id\": 7325, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"gsmith\", \"post_text\": \"(Back from vacation...)\\n\\nI have just retested this (on a clean installation of the latest OSX and the 5.2.0 Client Tools) and it appears to be working fine.\\n\\nCan those of you who are experiencing this issue, please try running the following command line in your terminal (adjust the names to match your files, FYI I copied this line from my Console Window):\\n\\n/opt/HPCCSystems/5.2.0/clienttools/bin/eclcc -fsyntaxcheck=1 -I/Users/gordon/Documents/runtime-EclipseApplication/TestPRJ /Users/gordon/Documents/runtime-EclipseApplication/TestPRJ/new_file.ecl\\n
\\n\\nAnd report back (with the version of OSX your using) - thx.\", \"post_time\": \"2015-04-13 09:39:30\" },\n\t{ \"post_id\": 7230, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"micevepay\", \"post_text\": \"I still have the same issue as well. The proposed resolution never changed anything. Even with the latest updates since then.\", \"post_time\": \"2015-03-31 13:44:36\" },\n\t{ \"post_id\": 7222, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"greg.whitaker\", \"post_text\": \"I Have the same issue about no syntax checking. It has worked in the past. I'm on 5.0.4 clienttools.\\nCtrl-S after changes, File/Save, F7, and Edit/Syntax check does nothing.\", \"post_time\": \"2015-03-30 12:52:47\" },\n\t{ \"post_id\": 6460, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"gsmith\", \"post_text\": \"...no... But the following (finally does): https://track.hpccsystems.com/browse/HPCC-12420\\n\\nEssentially the stdout / stderr of eclcc are getting interrupted prematurely on OSX.\", \"post_time\": \"2014-10-16 12:15:42\" },\n\t{ \"post_id\": 6454, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"micevepay\", \"post_text\": \"Will this also fix the submit issue?\", \"post_time\": \"2014-10-15 17:53:43\" },\n\t{ \"post_id\": 6453, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"gsmith\", \"post_text\": \"Ok I was able to reproduce on an OSX machine here (My linux machine had a server installed, so it just worked...): https://track.hpccsystems.com/browse/EPE-135\\n\\nI will try and push a version to "http://eclipse.hpccsystems.com/test" in the morning.\", \"post_time\": \"2014-10-15 17:51:32\" },\n\t{ \"post_id\": 6452, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"micevepay\", \"post_text\": \"I have the updated Eclipse plugin and updated Client tools. Both are 5.0.2. Still get \\n\\nCannot run program "/opt/HPCCSystems/bin/eclcc" (in directory "/var/folders/0g/9gr4m7cd2pbdgn5cn9kj01b40000gn/T"): error=2, No such file or directory\", \"post_time\": \"2014-10-15 12:02:40\" },\n\t{ \"post_id\": 6450, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"gsmith\", \"post_text\": \"Sanity Check - That fix is in the client tools version 4.2.10 and 5.0.2 (and not in the Eclipse Plugin)?\", \"post_time\": \"2014-10-15 07:04:08\" },\n\t{ \"post_id\": 6446, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"micevepay\", \"post_text\": \"Got the latest update...still not fixed...\", \"post_time\": \"2014-10-15 00:21:04\" },\n\t{ \"post_id\": 6263, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"gsmith\", \"post_text\": \"That specific issue (https://track.hpccsystems.com/browse/HPCC-12145) has been resolved and will be included in the 5.0.2-rc1 release, which is currently scheduled for later this week.\\n\\nWhen the ECL plugin submits ECL to the server, it attempts to use the appropriate client tools for the job. It does this by comparing the version of the server with the version of the client tools. What you are seeing in the log is the "locating" of the available Client Tools.\", \"post_time\": \"2014-09-01 08:33:19\" },\n\t{ \"post_id\": 6262, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"greg.whitaker\", \"post_text\": \"I also noticed in the console window of eclipse when I click on my ecl project its looking for a laundry list of ECL compilers.\\n\\n/opt/HPCCSystems/4.2.4/clienttools/bin/eclcc --version\\n/opt/HPCCSystems/bin/eclcc --version\\nCannot run program "/opt/HPCCSystems/bin/eclcc" (in directory "/var/folders/76/7_pflr0x6v384ykb63bbb5y00000gn/T"): error=2, No such file or directory\\n/opt/HPCCSystems/5.0.0/clienttools/bin/eclcc --version\\n/opt/HPCCSystems/bin/eclcc --version\\nCannot run program "/opt/HPCCSystems/bin/eclcc" (in directory "/var/folders/76/7_pflr0x6v384ykb63bbb5y00000gn/T"): error=2, No such file or directory\\n/opt/HPCCSystems/5.0.0/clienttools/bin/eclcc --version\\n/opt/HPCCSystems/4.2.4/clienttools/bin/eclcc --version\\n/Users/gregwhitaker/Downloads/4.2.4/clienttools/bin/eclcc --version\", \"post_time\": \"2014-08-30 18:05:05\" },\n\t{ \"post_id\": 6261, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"greg.whitaker\", \"post_text\": \"I upgraded to 5.0.0 on Mac I get same error on compiles:\\nhostname mymacname.local not resolved, using localhost\\n\\nSwitching back to 4.2.4 works fine.\", \"post_time\": \"2014-08-30 18:02:35\" },\n\t{ \"post_id\": 6258, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"micevepay\", \"post_text\": \"I am a registered user on both of my macs. Just for some clarification, I didn't have issues with OSX, Eclipse, and HPCC pre-5.0.0.\", \"post_time\": \"2014-08-25 17:07:19\" },\n\t{ \"post_id\": 6257, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"gsmith\", \"post_text\": \"No I mean when you log onto your Mac - on my machine I have two accounts, Mine: "gordon" and a "Guest", Guest is a restricted and temporary account (OSX will delete all files when you log out again).\", \"post_time\": \"2014-08-25 17:04:30\" },\n\t{ \"post_id\": 6256, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"micevepay\", \"post_text\": \"I am not sure what you mean by guest. I am working on two clusters. The first is the ML Dev cluster where I use my LN information. The second is my cluster set up at the university. Both of which I have no problem when using a Windows machine with Eclipse and the exact same credentials I use on my Mac.\", \"post_time\": \"2014-08-25 16:59:13\" },\n\t{ \"post_id\": 6255, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"gsmith\", \"post_text\": \"I have found the cause and opened an issue report: https://track.hpccsystems.com/browse/HPCC-12145\\n\\nIn my case I only get the issue when logged in as "Guest", are you running as a restricted user?\", \"post_time\": \"2014-08-25 16:53:45\" },\n\t{ \"post_id\": 6254, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"micevepay\", \"post_text\": \"Yes that is exactly what I get.\", \"post_time\": \"2014-08-25 15:21:44\" },\n\t{ \"post_id\": 6253, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"gsmith\", \"post_text\": \"I think I may have reproduced your issue. Can you open a terminal and run:\\n\\n/opt/HPCCSystems/5.0.0/clienttools/bin/eclcc -M -I/Users/micevepay/Documents/workspace/hello_world /Users/micevepay/Documents/workspace/hello_world/hello.ecl\\n
\\n(Where the above corresponds to one of your files from earlier in the thread).\\n\\nOn the machine I have reproduced this on, I am seeing two unexpected errors prior to the expected output:\\n\\nhostname MacBook-OSX-6.local not resolved, using localhost\\nhostname MacBook-OSX-6.local not resolved, using localhost\\n...\\n
\\n\\nAre you seeing something similar?\", \"post_time\": \"2014-08-25 06:32:23\" },\n\t{ \"post_id\": 6250, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"micevepay\", \"post_text\": \"[quote="gsmith":1rh62zyz]I am just coming from this thread: viewtopic.php?t=1425&p=6231#p6231 (where the OSX user is using plugin to target an older system)\\n\\nWhich made me revisit your setup.\\n\\nYour running 5.0.0-3 client tools and the server is 5.0.0-rc8.\\n\\nWhile mixing those two setups is probably fine for the most part, I am wondering if there is an issue with eclipse in the "auto find best compiler for server" logic. Can you grab the 5.0.0-rc8 client tools and remove your 5.0.0-3 ones and see if that helps?\\n\\n\\nIs there anything else I can try? Both are now 5.0.0-3 but the error still exists.\", \"post_time\": \"2014-08-23 22:02:14\" },\n\t{ \"post_id\": 6236, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"micevepay\", \"post_text\": \"Just updated the hpcc platform so that it is consistent with client tools... Both are 5.0.0-3\\nbut the issues still exist.\\n\\n\\nServer: community_5.0.0-3\\nCompiler: 5.0.0 community_5.0.0-3\", \"post_time\": \"2014-08-18 20:42:01\" },\n\t{ \"post_id\": 6235, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"micevepay\", \"post_text\": \"I don't know where I can get Client Tools 5.0.0-rc8 from. Let me see if someone will update the server instead.\", \"post_time\": \"2014-08-18 19:17:47\" },\n\t{ \"post_id\": 6234, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"gsmith\", \"post_text\": \"I am just coming from this thread: viewtopic.php?t=1425&p=6231#p6231 (where the OSX user is using plugin to target an older system)\\n\\nWhich made me revisit your setup.\\n\\nYour running 5.0.0-3 client tools and the server is 5.0.0-rc8.\\n\\nWhile mixing those two setups is probably fine for the most part, I am wondering if there is an issue with eclipse in the "auto find best compiler for server" logic. Can you grab the 5.0.0-rc8 client tools and remove your 5.0.0-3 ones and see if that helps?\", \"post_time\": \"2014-08-18 17:24:18\" },\n\t{ \"post_id\": 6233, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"micevepay\", \"post_text\": \"<Archive build="community_5.0.0-3"\\n eclVersion="5.0.0"\\n legacyImport="0"\\n legacyWhen="0">\\n <Query attributePath="tmp"/>\\n <Module key="" name="">\\n <Attribute key="tmp" name="tmp" sourcePath="./tmp.ecl">\\n 123 \\n </Attribute>\\n </Module>\\n</Archive>
\", \"post_time\": \"2014-08-18 14:59:36\" },\n\t{ \"post_id\": 6225, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"gsmith\", \"post_text\": \"Those screenshots all look good.\\n\\nCan you open a terminal and try the following:\\n\\n\\necho 123 > tmp.ecl\\n/opt/HPCCSystems/5.0.0/clienttools/bin/eclcc -E ./tmp.ecl\\n
\\n\\nYou should get a XML structure echoed out to the terminal.\", \"post_time\": \"2014-08-15 15:09:47\" },\n\t{ \"post_id\": 6219, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"micevepay\", \"post_text\": \"Editor/Compiler/Platform\", \"post_time\": \"2014-08-14 15:10:17\" },\n\t{ \"post_id\": 6218, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"gsmith\", \"post_text\": \"Can I get a screen shot (for sanity) of:\\n1. Run Configuration / HPCC Platform\\n2. Run Configuration / ECLCC Compiler\\n3. Open Editor Window\\n\\n(I have attached mine for comparison)\", \"post_time\": \"2014-08-14 14:57:00\" },\n\t{ \"post_id\": 6215, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"micevepay\", \"post_text\": \"Tried updating Java and got the newest version of Eclipse Luna. The ECL plugin does not work on Mac.\", \"post_time\": \"2014-08-11 23:36:05\" },\n\t{ \"post_id\": 6203, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"micevepay\", \"post_text\": \"Here is the update. Just to clear some stuff up, as they are now clear to me:\\n\\nOverriding defaults does get rid of the message\\nCannot run program "/opt/HPCCSystems/bin/eclcc" (in directory "/var/folders/0g/9gr4m7cd2pbdgn5cn9kj01b40000gn/T"): error=2, No such file or directory\\n\\nBut, it still does not run the syntax check even though it says it does.\\n\\nOUTPUT('Hello world');jaiofjoiaejfoajoiw\\n\\nIs not seen as an error. Output to the console is\\n\\n/opt/HPCCSystems/5.0.0/clienttools/bin/eclcc --version\\n\\n\\n/opt/HPCCSystems/5.0.0/clienttools/bin/eclcc -M -I/Users/micevepay/Documents/workspace/hello_world /Users/micevepay/Documents/workspace/hello_world/hello.ecl\\n\\n\\n/opt/HPCCSystems/5.0.0/clienttools/bin/eclcc -fsyntaxcheck=1 -E -I/Users/micevepay/Documents/workspace/hello_world /Users/micevepay/Documents/workspace/hello_world/hello.ecl\\n\\nAlso, hitting submit still does not submit the workunit to the cluster.\", \"post_time\": \"2014-08-08 16:16:33\" },\n\t{ \"post_id\": 6202, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"micevepay\", \"post_text\": \"Here is the update. Just to clear some stuff up, as they are now clear to me:\\n\\nOverriding defaults does get rid of the message\\nCannot run program "/opt/HPCCSystems/bin/eclcc" (in directory "/var/folders/0g/9gr4m7cd2pbdgn5cn9kj01b40000gn/T"): error=2, No such file or directory\\n\\nBut, it still does not run the syntax check even though it says it does.\\n\\nOUTPUT('Hello world');jaiofjoiaejfoajoiw\\n\\nIs not seen as an error. Output to the console is\\n\\n/opt/HPCCSystems/5.0.0/clienttools/bin/eclcc --version\\n/opt/HPCCSystems/5.0.0/clienttools/bin/eclcc -M -I/Users/micevepay/Documents/workspace/hello_world /Users/micevepay/Documents/workspace/hello_world/hello.ecl\\n/opt/HPCCSystems/5.0.0/clienttools/bin/eclcc -fsyntaxcheck=1 -E -I/Users/micevepay/Documents/workspace/hello_world /Users/micevepay/Documents/workspace/hello_world/hello.ecl\\n\\nAlso, hitting submit still does not submit the workunit to the cluster.\", \"post_time\": \"2014-08-08 16:14:10\" },\n\t{ \"post_id\": 6197, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"gsmith\", \"post_text\": \"Can you try turning off the "Override Defaults" on ECLCC Compiler Tab on the Run Configuration?\", \"post_time\": \"2014-08-08 07:06:30\" },\n\t{ \"post_id\": 6194, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"micevepay\", \"post_text\": \"Hitting test in the Run Configurations does and always has worked properly. The path to eclcc is also correct in the compiler tab.\", \"post_time\": \"2014-08-07 16:41:09\" },\n\t{ \"post_id\": 6193, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"gsmith\", \"post_text\": \"I am looking at this on a Mac now. I am also getting the error:\\n/opt/HPCCSystems/bin/eclcc --version\\nCannot run program "/opt/HPCCSystems/bin/eclcc" (in directory "/var/folders/dm/wf0jns3j169c4gzfxzxbyy200000gn/T"): error=2, No such file or directory\\n
\\n\\nBut it can be ignored - for some reason it locating an eclcc at /opt/HPCCSystems/bin/eclcc (which I will fix).\\n\\nAlso the submitting is working fine for me. Can you confirm that can open ECL Watch on the Mac and that the "Test" button is working on the configuration page?\", \"post_time\": \"2014-08-07 13:01:32\" },\n\t{ \"post_id\": 6154, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"micevepay\", \"post_text\": \"I can't even submit anything on my mac\", \"post_time\": \"2014-08-02 20:19:53\" },\n\t{ \"post_id\": 6153, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"micevepay\", \"post_text\": \"Seems to work now on window... \\n\\nBut on Mac (my primary machine) I get this message:\\n\\nCannot run program "/opt/HPCCSystems/bin/eclcc" (in directory "/var/folders/0g/9gr4m7cd2pbdgn5cn9kj01b40000gn/T"): error=2, No such file or directory\\n/opt/HPCCSystems/5.0.0/clienttools/bin/eclcc --version\\n/opt/HPCCSystems/5.0.0/clienttools/bin/eclcc --version\\n/opt/HPCCSystems/5.0.0/clienttools/bin/eclcc -E -fapplyInstantEclTransformations=1 -fapplyInstantEclTransformationsLimit=100 -I/Users/micevepay/Documents/workspace/hello_world /Users/micevepay/Documents/workspace/hello_world/hello.ecl\\n\\nThough the folder var/folders/0g/9gr4m7cd2pbdgn5cn9kj01b40000gn/T does exist.\\n/opt/HPCCSystems/bin/eclcc exists as well.\", \"post_time\": \"2014-08-02 20:19:12\" },\n\t{ \"post_id\": 6091, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"gsmith\", \"post_text\": \"Some more things to try (one at a time):\\n1. Can you confirm that the run configurations->compiler tab is set to its defaults.\\n2. Can rename your "C:\\\\Program Files (x86)\\\\HPCCSystems\\\\4.2.4\\\\clienttools" folder to "C:\\\\Program Files (x86)\\\\XXXXSystems\\\\4.2.4\\\\clienttools" (so it is no longer auto found by eclipse.\\n3. Right click on the "ECL Development" perspective and click "reset".\\n4. Create a new empty workspace (file->Switch Workspace->other) and retest from clean environment...\", \"post_time\": \"2014-07-21 20:12:21\" },\n\t{ \"post_id\": 6090, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"micevepay\", \"post_text\": \"Nothing in the console window changes from above.\", \"post_time\": \"2014-07-21 20:06:34\" },\n\t{ \"post_id\": 6089, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"micevepay\", \"post_text\": \"[attachment=0:19x97mqy]screen.PNG\", \"post_time\": \"2014-07-21 20:05:33\" },\n\t{ \"post_id\": 6088, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"micevepay\", \"post_text\": \"Image not uploading properly\", \"post_time\": \"2014-07-21 20:04:27\" },\n\t{ \"post_id\": 6087, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"gsmith\", \"post_text\": \"Can you:\\n1. Press F7 and see if the syntax check happens in the console window.\\n2. Select "Edit->Check Syntax" and see if the syntax check happens in the console view...\", \"post_time\": \"2014-07-21 20:03:17\" },\n\t{ \"post_id\": 6086, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"micevepay\", \"post_text\": \"Sorry didn't see the bottom of your screenshot\", \"post_time\": \"2014-07-21 20:00:39\" },\n\t{ \"post_id\": 6085, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"micevepay\", \"post_text\": \"Requested screenshot\", \"post_time\": \"2014-07-21 19:57:24\" },\n\t{ \"post_id\": 6078, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"gsmith\", \"post_text\": \"Can you send me a screenshot, similar to this:\\n[attachment=0:2wkga3hm]EclipseWhole.PNG\\nI have highlighted some areas that you may want to check...\", \"post_time\": \"2014-07-21 19:22:40\" },\n\t{ \"post_id\": 6073, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"micevepay\", \"post_text\": \"Syntax check is not happening on save or F7. I actually have to generate a workunit by hitting submit or compile. Hasn't worked since I started using ECL with Eclipse in April.\", \"post_time\": \"2014-07-21 18:20:38\" },\n\t{ \"post_id\": 5310, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Re: Syntax Check Failure\", \"username\": \"gsmith\", \"post_text\": \"Let me answer Question 2 first:\\nTo perform a fresh syntax check of your entire source tree, just select Project/Clean. (This may also fix your actual issue)\\n\\nQuestion 1:\\nI am not sure I have a quick fix solution here, but some explanation may help you resolve it: \\nThe plugin is designed to work with several servers (and consequently several different versions of server). These servers are added using the "Run Configurations" in Eclipse. \\nAssuming you have left the "ECLCC Compiler" settings at their defaults (for each run configuration), the plugin will attempt to locate and use the best matching ClientTools version on you local machine. \\nThis is where it can get fuzzy as typically you will only have the one source tree, but it is being potentially compiled and submitted using several versions of the compiler (worst case is you may have compile errors with version A of the compiler but not version B).\\n\\nNote: If in doubt the plugin will typically use the compiler with the highest version.\\n\\nOn startup it doesn't do any syntax check as such, but will have a persisted state from a previous run - now a "save file" syntax check versus a F7 syntax check versus a "submit to server" syntax check may have subtly different persistence - you may find touching the files in question and then saving them fixes your issue (for example)...\\n\\nFurther I would double check all your run configurations for valid eclcc settings (ideally set to default).\\n\\nHTH\\n\\nGordon.\", \"post_time\": \"2014-03-03 14:42:42\" },\n\t{ \"post_id\": 5309, \"topic_id\": 1224, \"forum_id\": 33, \"post_subject\": \"Syntax Check Failure\", \"username\": \"BenJones\", \"post_text\": \"Often when I start up Eclipse, a number of files in a given folder in the ECL Explorer Window show error flags. These go away if I do a syntax check on them individually. It is as though the system comes up in a weird state where a syntax check is done with the wrong settings but once I have connected to the particular cluster, the issues will go away. That looks like a bug of some sort. Meanwhile, is there any way to refresh the view so that the syntax check is done on all the files in a folder to clear those error flags.\", \"post_time\": \"2014-03-03 13:55:44\" },\n\t{ \"post_id\": 5547, \"topic_id\": 1284, \"forum_id\": 33, \"post_subject\": \"Re: Visualizing ECL Results in Eclipse\", \"username\": \"gsmith\", \"post_text\": \"When working in eclipse it should "just work" without the need to fiddle with compiler settings (as long as eclipse can auto detect the manifest file).\\n\\nTo test I simply:\\n\\nTook the ecl-samples visualizations example\\nOpened the google_charts/pie.ecl example\\nSubmitted it to my VM\\n
\\n\\nI then navigated to the outputs page and clicked on the "Views" link. (see screen shots)\\n[attachment=2:1yp78s5u]pie001.PNG\\n[attachment=1:1yp78s5u]pie002.PNG\\n[attachment=0:1yp78s5u]pie003.PNG\", \"post_time\": \"2014-04-25 04:52:38\" },\n\t{ \"post_id\": 5546, \"topic_id\": 1284, \"forum_id\": 33, \"post_subject\": \"Visualizing ECL Results in Eclipse\", \"username\": \"jacob\", \"post_text\": \"I've been trying to add data visualizations to my results in the Eclipse IDE. I tried adding the sample google charts manifest to the ECLCC Compiler Arguments with no success. I tried Common, Local Compile and Remote Compile. \\n\\n-manifest="C:\\\\Users\\\\me\\\\workspace\\\\ecl-samples-master\\\\visualizations\\\\google_charts\\\\files\\\\manifest.xml"\\n\\nThe documentation http://hpccsystems.com/download/docs/visualizing-ecl-results provides ECL IDE examples and mentions the ECLCC but I couldn't quite translate that into working for Eclipse.\", \"post_time\": \"2014-04-24 19:57:42\" },\n\t{ \"post_id\": 5786, \"topic_id\": 1332, \"forum_id\": 33, \"post_subject\": \"Re: Using a github repository with Eclipse IDE\", \"username\": \"JimD\", \"post_text\": \"Tim,\\n\\nI am working of instructions for that. I will send you a draft for review.\", \"post_time\": \"2014-05-30 18:39:17\" },\n\t{ \"post_id\": 5782, \"topic_id\": 1332, \"forum_id\": 33, \"post_subject\": \"Re: Using a github repository with Eclipse IDE\", \"username\": \"tlhumphrey2\", \"post_text\": \"gsmith,\\n\\nI've ran my first ECL program inside Eclipse. I'd like to set it up to use a repository on github. \\n\\nCan you give me some help or give me a link to some documentation that can help me?\\n\\nTim\", \"post_time\": \"2014-05-30 15:38:53\" },\n\t{ \"post_id\": 5775, \"topic_id\": 1332, \"forum_id\": 33, \"post_subject\": \"Re: Using a github repository with Eclipse IDE\", \"username\": \"gsmith\", \"post_text\": \"Git + Eclipse does work well together, if your new to eclipse it can be a little bit tricky to set up just the way you like it (well just the way I like it), but once your setup it works well.\", \"post_time\": \"2014-05-29 20:15:28\" },\n\t{ \"post_id\": 5770, \"topic_id\": 1332, \"forum_id\": 33, \"post_subject\": \"Using a github repository with Eclipse IDE\", \"username\": \"tlhumphrey2\", \"post_text\": \"Has anyone used a github code respository with Eclipse IDE?\\n\\nDo they work well together? \\n\\nHas anyone had problems and if so what kinds of problems?\", \"post_time\": \"2014-05-29 16:41:06\" },\n\t{ \"post_id\": 5781, \"topic_id\": 1334, \"forum_id\": 33, \"post_subject\": \"Re: No ECL Preferences in Eclipse IDE\", \"username\": \"tlhumphrey2\", \"post_text\": \"I attempted to get to the ECL Watch page through IE and couldn't. So, it appears there is a problem with the ML Dev Cluster server.\", \"post_time\": \"2014-05-30 14:03:39\" },\n\t{ \"post_id\": 5780, \"topic_id\": 1334, \"forum_id\": 33, \"post_subject\": \"Re: No ECL Preferences in Eclipse IDE\", \"username\": \"tlhumphrey2\", \"post_text\": \"OK, I'm now on the "Run Configurations" page, where I enter the server IP address, port, my username and password, etc. After entering everything down through the ECL Watch address (which is automatically filled in), I click on test (to see if ECL Watch comes up) and I get the following error message:\\n\\n; nested exception is: \\n\\tjava.net.SocketTimeoutException: connect timed out\", \"post_time\": \"2014-05-30 13:48:11\" },\n\t{ \"post_id\": 5774, \"topic_id\": 1334, \"forum_id\": 33, \"post_subject\": \"Re: No ECL Preferences in Eclipse IDE\", \"username\": \"gsmith\", \"post_text\": \"Those docs are a bit out of date, you can ignore that step and when you get to the "Launch Configuration" part you will similar setting there.\", \"post_time\": \"2014-05-29 20:13:49\" },\n\t{ \"post_id\": 5773, \"topic_id\": 1334, \"forum_id\": 33, \"post_subject\": \"No ECL Preferences in Eclipse IDE\", \"username\": \"tlhumphrey2\", \"post_text\": \"The Eclipse IDE documentation says in step 9 of the installation instructions, after installing the ECL plugin to go to preferences where one should see a screen that shows that ECL preferences. But, my screen looks like the attached file, i.e. not ECL preferences are shown.\", \"post_time\": \"2014-05-29 19:45:34\" },\n\t{ \"post_id\": 5836, \"topic_id\": 1344, \"forum_id\": 33, \"post_subject\": \"Re: Import two local git repositories into one project.\", \"username\": \"gsmith\", \"post_text\": \"[The following "help" is generic to eclipse and not specific to the ECL Plugin]\\n\\nWhat catches me each time I try to import a git repository, is that it looks like it works, but doesn't show up in my "Package Explorer" (or ECL Explorer).\\n\\nIt turns out they are imported into the "Git Repositories" view!\\n\\nSo for sanity, show the Git Repositories View: Window->Show View->Other...->Git->Git Repositories\\n\\nIn that window you may find some repos that you imported previously, or it might be empty.\\n\\nImport some git repositories (you can import from local git repositories as well as remote ones).\\n\\nOnce you have some repositories right click on the repository you want in your Explorer view and select "Import Projects..."\\n\\nIf you repositories have an eclipse project in them already select "Import Existing Projects". If not select "Import as general project".\\n\\nThe should now appear in your "Explorer" view!\\n\\nFinally, if this imported folder contains ECL, you can convert it to an ECL project by right clicking and selecting "Convert to ECL Project".\\n\\nGordon.\", \"post_time\": \"2014-06-05 13:49:27\" },\n\t{ \"post_id\": 5834, \"topic_id\": 1344, \"forum_id\": 33, \"post_subject\": \"Import two local git repositories into one project.\", \"username\": \"tlhumphrey2\", \"post_text\": \"Has anyone had any luck importing two or more local git repositories into a single project?\\n\\nI could import one but not two. The second, I had to import it as a File System.\", \"post_time\": \"2014-06-04 17:49:58\" },\n\t{ \"post_id\": 7229, \"topic_id\": 1396, \"forum_id\": 33, \"post_subject\": \"Re: ECL Watch from Workunit Tab\", \"username\": \"greg.whitaker\", \"post_text\": \"Also, when I right-click where the image should be it only displays a button that says "Reload". Clicking the button has no effect.\\n\\nBTW, I consider this a very minor issue. The lack of syntax checking is a much bigger issue for me at least.\", \"post_time\": \"2015-03-30 16:43:08\" },\n\t{ \"post_id\": 7228, \"topic_id\": 1396, \"forum_id\": 33, \"post_subject\": \"Re: ECL Watch from Workunit Tab\", \"username\": \"greg.whitaker\", \"post_text\": \"MAC: Eclipse Plug-in: \\nI have the same issue, ECL Watch from Workunit Tabs does not display.\\nEcl Watch does display in the 'ECL Watch' Perspective and it does display in \\nthe 'Run Configurations' window when I click the TEST button.\\n\\nHere is what I found:\\nEntries in error log when I click on WU tabs inside eclipse:\\n\\nMessage Unhandled event loop exception\\nPlug-in: org.eclipse.ui\\n \\nException stack trace: \\n org.eclipse.swt.SWTException: Widget is disposed\\n\\tat org.eclipse.swt.SWT.error(SWT.java:4361)\\n\\tat org.eclipse.swt.SWT.error(SWT.java:4276)\\n\\tat org.eclipse.swt.SWT.error(SWT.java:4247)\\n\\tat org.eclipse.swt.widgets.Widget.error(Widget.java:775)\\n\\tat org.eclipse.swt.widgets.Widget.checkWidget(Widget.java:569)\\n\\tat org.eclipse.swt.widgets.Tree.getItems(Tree.java:1669)\\n\\tat org.eclipse.jface.viewers.TreeViewer.getChildren(TreeViewer.java:171)\\n\\tat org.hpccsystems.eclide.ui.viewer.platform.PlatformViewer$MyTreeViewer.getElements(PlatformViewer.java:77)\\n\\tat org.hpccsystems.eclide.ui.viewer.platform.WorkunitsViewer.select(WorkunitsViewer.java:32)\\n\\tat org.hpccsystems.eclide.editors.ECLWindow$3.widgetSelected(ECLWindow.java:250)\\n\\tat org.eclipse.swt.widgets.TypedListener.handleEvent(TypedListener.java:248)\\n\\tat org.eclipse.swt.widgets.EventTable.sendEvent(EventTable.java:84)\\n\\tat org.eclipse.swt.widgets.Display.sendEvent(Display.java:4136)\\n\\tat org.eclipse.swt.widgets.Widget.sendEvent(Widget.java:1458)\\n\\tat org.eclipse.swt.widgets.Widget.sendEvent(Widget.java:1481)\\n\\tat org.eclipse.swt.widgets.Widget.sendEvent(Widget.java:1466)\\n\\tat org.eclipse.swt.widgets.Widget.notifyListeners(Widget.java:1271)\\n\\tat org.eclipse.swt.custom.CTabFolder.setSelection(CTabFolder.java:3028)\\n\\tat org.eclipse.swt.custom.CTabFolder.onMouse(CTabFolder.java:1749)\\n\\tat org.eclipse.swt.custom.CTabFolder$1.handleEvent(CTabFolder.java:278)\\n\\tat org.eclipse.swt.widgets.EventTable.sendEvent(EventTable.java:84)\\n\\tat org.eclipse.swt.widgets.Display.sendEvent(Display.java:4136)\\n\\tat org.eclipse.swt.widgets.Widget.sendEvent(Widget.java:1458)\\n\\tat org.eclipse.swt.widgets.Widget.sendEvent(Widget.java:1481)\\n\\tat org.eclipse.swt.widgets.Widget.sendEvent(Widget.java:1466)\\n\\tat org.eclipse.swt.widgets.Widget.notifyListeners(Widget.java:1271)\\n\\tat org.eclipse.swt.widgets.Display.runDeferredEvents(Display.java:3982)\\n\\tat org.eclipse.swt.widgets.Display.readAndDispatch(Display.java:3621)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine$9.run(PartRenderingEngine.java:1053)\\n\\tat org.eclipse.core.databinding.observable.Realm.runWithDefault(Realm.java:332)\\n\\tat org.eclipse.e4.ui.internal.workbench.swt.PartRenderingEngine.run(PartRenderingEngine.java:942)\\n\\tat org.eclipse.e4.ui.internal.workbench.E4Workbench.createAndRunUI(E4Workbench.java:86)\\n\\tat org.eclipse.ui.internal.Workbench$5.run(Workbench.java:588)\\n\\tat org.eclipse.core.databinding.observable.Realm.runWithDefault(Realm.java:332)\\n\\tat org.eclipse.ui.internal.Workbench.createAndRunWorkbench(Workbench.java:543)\\n\\tat org.eclipse.ui.PlatformUI.createAndRunWorkbench(PlatformUI.java:149)\\n\\tat org.eclipse.ui.internal.ide.application.IDEApplication.start(IDEApplication.java:124)\\n\\tat org.eclipse.equinox.internal.app.EclipseAppHandle.run(EclipseAppHandle.java:196)\\n\\tat org.eclipse.core.runtime.internal.adaptor.EclipseAppLauncher.runApplication(EclipseAppLauncher.java:110)\\n\\tat org.eclipse.core.runtime.internal.adaptor.EclipseAppLauncher.start(EclipseAppLauncher.java:79)\\n\\tat org.eclipse.core.runtime.adaptor.EclipseStarter.run(EclipseStarter.java:353)\\n\\tat org.eclipse.core.runtime.adaptor.EclipseStarter.run(EclipseStarter.java:180)\\n\\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\\n\\tat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)\\n\\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)\\n\\tat java.lang.reflect.Method.invoke(Method.java:597)\\n\\tat org.eclipse.equinox.launcher.Main.invokeFramework(Main.java:629)\\n\\tat org.eclipse.equinox.launcher.Main.basicRun(Main.java:584)\\n\\tat org.eclipse.equinox.launcher.Main.run(Main.java:1438)\\n\\n\\nSession Data:\\neclipse.buildId=M20130204-1200\\njava.version=1.6.0_65\\njava.vendor=Apple Inc.\\nBootLoader constants: OS=macosx, ARCH=x86_64, WS=cocoa, NL=en_US\\nFramework arguments: -product org.eclipse.epp.package.java.product -keyring /Users/XXXXXXXXXXXXXXXX/.eclipse_keyring -showlocation\\nCommand-line arguments: -os macosx -ws cocoa -arch x86_64 -product org.eclipse.epp.package.java.product -keyring /Users/XXXXXXXXXXXXXXXXXX/.eclipse_keyring -showlocation\", \"post_time\": \"2015-03-30 16:40:19\" },\n\t{ \"post_id\": 6084, \"topic_id\": 1396, \"forum_id\": 33, \"post_subject\": \"Re: ECL Watch from Workunit Tab\", \"username\": \"gsmith\", \"post_text\": \"Probably weren't related so... Can you take a look at the response I posted in the Syntax Check thread? Bit of a long shot, but I might spot something when you post your equivalent window...\", \"post_time\": \"2014-07-21 19:54:11\" },\n\t{ \"post_id\": 6083, \"topic_id\": 1396, \"forum_id\": 33, \"post_subject\": \"Re: ECL Watch from Workunit Tab\", \"username\": \"micevepay\", \"post_text\": \"Of course I accidentally cleared them. But here is what the error warnings are now. Created a new ECL project to see if those errors were recreated but they weren't.\", \"post_time\": \"2014-07-21 19:51:35\" },\n\t{ \"post_id\": 6082, \"topic_id\": 1396, \"forum_id\": 33, \"post_subject\": \"Re: ECL Watch from Workunit Tab\", \"username\": \"gsmith\", \"post_text\": \"Can you double click one of those and paste the content here (there is a copy to clipboard button)?\", \"post_time\": \"2014-07-21 19:39:43\" },\n\t{ \"post_id\": 6081, \"topic_id\": 1396, \"forum_id\": 33, \"post_subject\": \"Re: ECL Watch from Workunit Tab\", \"username\": \"micevepay\", \"post_text\": \"Error Log\", \"post_time\": \"2014-07-21 19:37:02\" },\n\t{ \"post_id\": 6080, \"topic_id\": 1396, \"forum_id\": 33, \"post_subject\": \"Re: ECL Watch from Workunit Tab\", \"username\": \"gsmith\", \"post_text\": \"How very strange (the browser is there fine, but no URL...) - can you open: Window->Show View->Other...->General->Error Log \\nand see if there are any any related error messages?\", \"post_time\": \"2014-07-21 19:30:43\" },\n\t{ \"post_id\": 6079, \"topic_id\": 1396, \"forum_id\": 33, \"post_subject\": \"Re: ECL Watch from Workunit Tab\", \"username\": \"micevepay\", \"post_text\": \"Works in the ECL Watch view and in Run Configurations when I test the address. Also works in a regular browser. Just doesn't work in the ECL Development view. \\n\\nIE properties\", \"post_time\": \"2014-07-21 19:27:22\" },\n\t{ \"post_id\": 6077, \"topic_id\": 1396, \"forum_id\": 33, \"post_subject\": \"Re: ECL Watch from Workunit Tab\", \"username\": \"gsmith\", \"post_text\": \"If you right click where the browser should be, do you get a "IE" context menu?\\n\\nIf so can you select "properties" and let me know what the Address is (and can you try opening that address in a regular browser?\\n\\nJust to clarify, you said it works in the ECL Watch view? How about the run configurations/test?\", \"post_time\": \"2014-07-21 19:12:19\" },\n\t{ \"post_id\": 6076, \"topic_id\": 1396, \"forum_id\": 33, \"post_subject\": \"Re: ECL Watch from Workunit Tab\", \"username\": \"micevepay\", \"post_text\": \"Yes I am using the stable version. I also just recently updated to 5.0.0 in eclipse and still no fix. I have one machine using Windows 7 Enterprise 64-bit and another with OSX Mavericks.\\n\\nIf I switch to the ECL Watch tab in the upper right I can look at it but this is the problem I am talking about.\", \"post_time\": \"2014-07-21 18:59:25\" },\n\t{ \"post_id\": 6075, \"topic_id\": 1396, \"forum_id\": 33, \"post_subject\": \"Re: ECL Watch from Workunit Tab\", \"username\": \"gsmith\", \"post_text\": \"What OS are you on?\\n\\nAre you running latest from eclipse.hpccsystems.com/stable?\", \"post_time\": \"2014-07-21 18:43:37\" },\n\t{ \"post_id\": 6074, \"topic_id\": 1396, \"forum_id\": 33, \"post_subject\": \"ECL Watch from Workunit Tab\", \"username\": \"micevepay\", \"post_text\": \"Outside of the first time I used ECL with Eclipse, the ECL watch does not appear in the workunit tab. It is just blank. This occurs in Windows and OSX.\", \"post_time\": \"2014-07-21 18:23:40\" },\n\t{ \"post_id\": 6348, \"topic_id\": 1449, \"forum_id\": 33, \"post_subject\": \"Re: Configuration issue on submit: "... is Unreachable"\", \"username\": \"joe.chambers\", \"post_text\": \"If you have multiple versions of java installed make sure you are installing the certificate into the correct keystore.\\n\\nThe first time I did the keystore I installed the cert into a different version of java than my application was using. \\nThis may help though it is probably the same as the stackoverflow link above.\\nhttp://docs.oracle.com/javase/tutorial/ ... step2.html\\n\\nAlso there is a setting in Axis that allows for invalid certs, though I'm not sure if you could add this to your configuration.\\nAxisProperties.setProperty("axis.socketSecureFactory",\\n"org.apache.axis.components.net.SunFakeTrustSocketFactory");\", \"post_time\": \"2014-09-19 17:36:00\" },\n\t{ \"post_id\": 6345, \"topic_id\": 1449, \"forum_id\": 33, \"post_subject\": \"Re: Configuration issue on submit: "... is Unreachable"\", \"username\": \"gsmith\", \"post_text\": \"Lemme check with some of the folks here, as we hit this exact issue...\", \"post_time\": \"2014-09-19 16:14:11\" },\n\t{ \"post_id\": 6344, \"topic_id\": 1449, \"forum_id\": 33, \"post_subject\": \"Re: Configuration issue on submit: "... is Unreachable"\", \"username\": \"jh\", \"post_text\": \"gsmith,\\nNo that didn't work...\\n\\nI was able to add it fine the keystore. But I still get the error.\", \"post_time\": \"2014-09-19 16:12:33\" },\n\t{ \"post_id\": 6339, \"topic_id\": 1449, \"forum_id\": 33, \"post_subject\": \"Re: Configuration issue on submit: "... is Unreachable"\", \"username\": \"gsmith\", \"post_text\": \"I think self sign certs and Java is a general "issue", in so far as you have to manually add it to the keystore.\\n\\nQuick google led me here: http://stackoverflow.com/questions/6840 ... to-eclipse\\n\\nCan you post back if this works?\", \"post_time\": \"2014-09-19 13:40:25\" },\n\t{ \"post_id\": 6333, \"topic_id\": 1449, \"forum_id\": 33, \"post_subject\": \"Re: Configuration issue on submit: "... is Unreachable"\", \"username\": \"jh\", \"post_text\": \"gsmith,\\n\\nYes the test was within the configuration page. I also copied that URL and opened the site in a browser just to be 100% positive. \\n\\nI tried using the default location for the ECLCC Compiler and that didn't work with the SSL target/website but did work with our non-SSL site.\\n\\nSo improvement.
Thanks!\\n\\nSo now the question is how to get the plugin to work with a self-signed cert? (The assumption here being the self-signed cert is the issue at this point.)\", \"post_time\": \"2014-09-18 21:20:42\" },\n\t{ \"post_id\": 6332, \"topic_id\": 1449, \"forum_id\": 33, \"post_subject\": \"Re: Configuration issue on submit: "... is Unreachable"\", \"username\": \"gsmith\", \"post_text\": \"When you say tested ECL Watch Site - was that within the run Configuration page?\\n\\nFor the ECLCC Compiler tab on the run configurations page, can you leave the "Override Defaults" off and try again.\", \"post_time\": \"2014-09-18 21:10:13\" },\n\t{ \"post_id\": 6323, \"topic_id\": 1449, \"forum_id\": 33, \"post_subject\": \"Configuration issue on submit: "... is Unreachable"\", \"username\": \"jh\", \"post_text\": \"I have Eclipse (kepler) installed and installed the ECL plugin from hpccsystems.com (stable version per the instructions).\\n\\nI set up the configuration: SSL (with self-signed cert), correct IP address and port, test of ECL Watch successful. I pointed clienttools to the install directory (I tried the base directory and bin both failed).\\n\\nI tested the ECL Watch site and that shows correctly. \\n\\nWhen I go to run I get a message that says "... is Unreachable."\\n\\nAny thoughts on what I may have done wrong in the configuration?\\n\\nthanks for the help\\nWindows 7 64-bit\\nEclipse: Kepler service release 2\\nplugin: ECL_Language - 5.0.0.101\", \"post_time\": \"2014-09-18 16:21:29\" },\n\t{ \"post_id\": 6786, \"topic_id\": 1561, \"forum_id\": 33, \"post_subject\": \"Re: Exception During Launch Config Change Notification\", \"username\": \"benhastings\", \"post_text\": \"I was having problems, then tried reading through everything and finally started with a fresh eclipse installation and new ecl clienttools yesterday.\\n\\nAll I have to do to get those errors is to go to the "run as" configuration screen and select "run" from there and the errors occur.\\n\\nI've also included the console output... it looks likt it's _trying_ to do something!\\n\\n[attachment=2:2gwx852u]Run_Configurations_and_ECL_Development_-_ECLTest_hello_ecl_-_Eclipse_-__Users_hastinbx_Code_eclipse.png\\n\\n[attachment=1:2gwx852u]Run_Configurations_and_ECL_Development_ECLCC.png\\n\\n[attachment=0:2gwx852u]errors.png\", \"post_time\": \"2015-01-14 13:37:54\" },\n\t{ \"post_id\": 6785, \"topic_id\": 1561, \"forum_id\": 33, \"post_subject\": \"Re: Exception During Launch Config Change Notification\", \"username\": \"gsmith\", \"post_text\": \"Can you add some steps to reproduce.\\n\\nAlso if you could attach a screenshot of your configuration page that might show something...\", \"post_time\": \"2015-01-14 10:50:13\" },\n\t{ \"post_id\": 6784, \"topic_id\": 1561, \"forum_id\": 33, \"post_subject\": \"Exception During Launch Config Change Notification\", \"username\": \"benhastings\", \"post_text\": \"Trying to get up and running and am having problems submitting jobs to either of our clusters.\\n\\nI read through every Eclipse, OSX, and Eclipse+OSX thread I could find here and haven't seen anything like this. When I go to congifure a "run as..." server config, the test works fine, recognizes the cluster and my eclcc, however, when I pulled up the error log, I found the following messages repeatedly:\\n\\nMessage: An exception occurred during launch configuration change notification.\\n\\nStack Trace:\\n
org.eclipse.swt.SWTException: Invalid thread access\\n\\tat org.eclipse.swt.SWT.error(SWT.java:4441)\\n\\tat org.eclipse.swt.SWT.error(SWT.java:4356)\\n\\tat org.eclipse.swt.SWT.error(SWT.java:4327)\\n\\tat org.eclipse.swt.widgets.Widget.error(Widget.java:783)\\n\\tat org.eclipse.swt.widgets.Widget.checkWidget(Widget.java:574)\\n\\tat org.eclipse.swt.widgets.Combo.getText(Combo.java:949)\\n\\tat org.hpccsystems.eclide.editors.ECLEditorToolbar.refreshServers(ECLEditorToolbar.java:191)\\n\\tat org.hpccsystems.eclide.editors.ECLEditorToolbar.refreshServers(ECLEditorToolbar.java:214)\\n\\tat org.hpccsystems.eclide.editors.ECLEditorToolbar.access$4(ECLEditorToolbar.java:212)\\n\\tat org.hpccsystems.eclide.editors.ECLEditorToolbar$4.launchConfigurationChanged(ECLEditorToolbar.java:116)\\n\\tat org.eclipse.debug.internal.core.LaunchManager$ConfigurationNotifier.run(LaunchManager.java:229)\\n\\tat org.eclipse.core.runtime.SafeRunner.run(SafeRunner.java:42)\\n\\tat org.eclipse.debug.internal.core.LaunchManager$ConfigurationNotifier.notify(LaunchManager.java:209)\\n\\tat org.eclipse.debug.internal.core.LaunchConfigurationWorkingCopy.setDirty(LaunchConfigurationWorkingCopy.java:552)\\n\\tat org.eclipse.debug.internal.core.LaunchConfigurationWorkingCopy.setAttribute(LaunchConfigurationWorkingCopy.java:425)\\n\\tat org.eclipse.debug.internal.ui.DebugUIPlugin$8.run(DebugUIPlugin.java:1221)\\n\\tat org.eclipse.core.internal.jobs.Worker.run(Worker.java:54)
\\n\\nand\\n\\nMessage:Problems occurred when invoking code from plug-in: "org.eclipse.debug.core".\\n\\nStack Trace:\\norg.eclipse.swt.SWTException: Invalid thread access\\n\\tat org.eclipse.swt.SWT.error(SWT.java:4441)\\n\\tat org.eclipse.swt.SWT.error(SWT.java:4356)\\n\\tat org.eclipse.swt.SWT.error(SWT.java:4327)\\n\\tat org.eclipse.swt.widgets.Widget.error(Widget.java:783)\\n\\tat org.eclipse.swt.widgets.Widget.checkWidget(Widget.java:574)\\n\\tat org.eclipse.swt.widgets.Combo.getText(Combo.java:949)\\n\\tat org.hpccsystems.eclide.editors.ECLEditorToolbar.refreshServers(ECLEditorToolbar.java:191)\\n\\tat org.hpccsystems.eclide.editors.ECLEditorToolbar.refreshServers(ECLEditorToolbar.java:214)\\n\\tat org.hpccsystems.eclide.editors.ECLEditorToolbar.access$4(ECLEditorToolbar.java:212)\\n\\tat org.hpccsystems.eclide.editors.ECLEditorToolbar$4.launchConfigurationChanged(ECLEditorToolbar.java:116)\\n\\tat org.eclipse.debug.internal.core.LaunchManager$ConfigurationNotifier.run(LaunchManager.java:229)\\n\\tat org.eclipse.core.runtime.SafeRunner.run(SafeRunner.java:42)\\n\\tat org.eclipse.debug.internal.core.LaunchManager$ConfigurationNotifier.notify(LaunchManager.java:209)\\n\\tat org.eclipse.debug.internal.core.LaunchConfigurationWorkingCopy.setDirty(LaunchConfigurationWorkingCopy.java:552)\\n\\tat org.eclipse.debug.internal.core.LaunchConfigurationWorkingCopy.setAttribute(LaunchConfigurationWorkingCopy.java:425)\\n\\tat org.eclipse.debug.internal.ui.DebugUIPlugin$8.run(DebugUIPlugin.java:1221)\\n\\tat org.eclipse.core.internal.jobs.Worker.run(Worker.java:54)\\n
\\n\\nAny ideas?\", \"post_time\": \"2015-01-14 02:46:37\" },\n\t{ \"post_id\": 7334, \"topic_id\": 1678, \"forum_id\": 33, \"post_subject\": \"Re: Failed to start web browser - see error log - but where!\", \"username\": \"dabayliss\", \"post_text\": \"Yes to both. The whole 'compile/submit' thing works .... it is just the click on the work-unit window that fails with the error message\", \"post_time\": \"2015-04-13 23:41:56\" },\n\t{ \"post_id\": 7329, \"topic_id\": 1678, \"forum_id\": 33, \"post_subject\": \"Re: Failed to start web browser - see error log - but where!\", \"username\": \"gsmith\", \"post_text\": \"Make sure your local file has an ".ecl" file extension and make sure your project is an ECL Project...\", \"post_time\": \"2015-04-13 15:56:01\" },\n\t{ \"post_id\": 7328, \"topic_id\": 1678, \"forum_id\": 33, \"post_subject\": \"Re: Failed to start web browser - see error log - but where!\", \"username\": \"dabayliss\", \"post_text\": \"It is Ubuntu 14.10 (so yes - Linux).\\n\\nThe install of the gtk has solved the 'unable to start webbrowser'. Very cool. Thnx.\\n\\nStill getting the:\\n\\nAn error has occurred. See error log for more details.\\norg.eclipse.ui.editors.text.TextEditor cannot be cast to org.hpccsystems.eclide.editors.ECLWindow\\n\\nwhen clicking one a work unit in the work unit tab from the development window\", \"post_time\": \"2015-04-13 15:54:17\" },\n\t{ \"post_id\": 7324, \"topic_id\": 1678, \"forum_id\": 33, \"post_subject\": \"Re: Failed to start web browser - see error log - but where!\", \"username\": \"gsmith\", \"post_text\": \"Is this a Linux based machine? \\n\\nAssuming it is (and your localhost ECL Playground comment), you may need to install libwebkitgtk:\\n\\nsudo apt-get install libwebkitgtk-1.0-0
\", \"post_time\": \"2015-04-13 09:20:37\" },\n\t{ \"post_id\": 7318, \"topic_id\": 1678, \"forum_id\": 33, \"post_subject\": \"Re: Failed to start web browser - see error log - but where!\", \"username\": \"dabayliss\", \"post_text\": \"Possibly related - in 'Development' window - if I click on the work unit I get:\\n\\nAn error has occurred. See error log for more details.\\norg.eclipse.ui.editors.text.TextEditor cannot be cast to org.hpccsystems.eclide.editors.ECLWindow\", \"post_time\": \"2015-04-11 15:48:21\" },\n\t{ \"post_id\": 7314, \"topic_id\": 1678, \"forum_id\": 33, \"post_subject\": \"Failed to start web browser - see error log - but where!\", \"username\": \"dabayliss\", \"post_text\": \"I have an hpcc system installed on a laptop - and Eclipse Luna on the same laptop. Hpcc is latest (5.2) - Eclipse plugin is latest too (both installed today).\\n\\nECL is working find from playground - ecl watch working fine from localhost via Chrome.\\n\\nEclipse Ecl Developer view is able to submit job - and it runs fine (can see from ecl-watch in chrome).\\n\\nHOWEVER - when I switch to ECL Watch view in eclipse - I get the message "Failed to create Web Browsers - see Error Log"\\n\\nI looked in the Eclipse General Error Log and there was nothing there - so where should I look?\", \"post_time\": \"2015-04-11 01:56:46\" },\n\t{ \"post_id\": 8370, \"topic_id\": 1952, \"forum_id\": 33, \"post_subject\": \"Re: clienttool 5.4.2 can't run on Windows 10 64 bit\", \"username\": \"gsmith\", \"post_text\": \"You are correct (and it used to) - I have already reached out to the package maintainer with a link to this issue.\", \"post_time\": \"2015-10-22 16:39:22\" },\n\t{ \"post_id\": 8366, \"topic_id\": 1952, \"forum_id\": 33, \"post_subject\": \"Re: clienttool 5.4.2 can't run on Windows 10 64 bit\", \"username\": \"longly\", \"post_text\": \"Thank gsmith. That solved my issue. I tried vc 2012 redistribute x86 but it doesnt work. I think the installer should include this lib or at least check and ask user to install it.\", \"post_time\": \"2015-10-22 15:47:36\" },\n\t{ \"post_id\": 8364, \"topic_id\": 1952, \"forum_id\": 33, \"post_subject\": \"Re: clienttool 5.4.2 can't run on Windows 10 64 bit\", \"username\": \"gsmith\", \"post_text\": \"Try installing this package: http://www.microsoft.com/en-us/download ... px?id=5555\", \"post_time\": \"2015-10-22 15:20:14\" },\n\t{ \"post_id\": 8362, \"topic_id\": 1952, \"forum_id\": 33, \"post_subject\": \"clienttool 5.4.2 can't run on Windows 10 64 bit\", \"username\": \"longly\", \"post_text\": \"when I try to run any commandline tool, it says MSVCP100.dll and MSVCR100.dll is missing . ECL IDE can run but I can't submit any code to server. I checked Windows\\\\system32 and both dlls are there. However if I copy the dll to the same folder with clienttool it will crash .\\nPlease help.\", \"post_time\": \"2015-10-22 14:05:26\" },\n\t{ \"post_id\": 9946, \"topic_id\": 2366, \"forum_id\": 33, \"post_subject\": \"Re: Integrating the help files\", \"username\": \"vardha24\", \"post_text\": \"Will be watching out for this ... Thanks for Your update\", \"post_time\": \"2016-07-12 14:14:24\" },\n\t{ \"post_id\": 9940, \"topic_id\": 2366, \"forum_id\": 33, \"post_subject\": \"Re: Integrating the help files\", \"username\": \"JimD\", \"post_text\": \"Yes, \\n\\nI discovered the issue in Eclipse Mars after replying to your post. You can add yourself as a watcher of the issue to get notification when it is resolved. Thanks for posting the question that lead to our discovery of the issue.\\n\\nJim\", \"post_time\": \"2016-07-12 12:43:51\" },\n\t{ \"post_id\": 9936, \"topic_id\": 2366, \"forum_id\": 33, \"post_subject\": \"Re: Integrating the help files\", \"username\": \"gsmith\", \"post_text\": \"Looks like this may be an issue with newer versions of Eclipse: https://track.hpccsystems.com/browse/EPE-151\", \"post_time\": \"2016-07-12 10:53:22\" },\n\t{ \"post_id\": 9922, \"topic_id\": 2366, \"forum_id\": 33, \"post_subject\": \"Re: Integrating the help files\", \"username\": \"JimD\", \"post_text\": \"That has already been done. Place your cursor on any keyword in the Eclipse editor window, then press F1. \\n\\nThis opens the help file for the Language Reference/Standard Library Reference and should go right to the appropriate topic.\\n\\nHTH, \\n\\nJim\", \"post_time\": \"2016-07-08 13:28:15\" },\n\t{ \"post_id\": 9918, \"topic_id\": 2366, \"forum_id\": 33, \"post_subject\": \"Integrating the help files\", \"username\": \"vardha24\", \"post_text\": \"Hi \\n\\nI have to toggle from eclipse to pdf files to find help on the constructs / keywords.. \\n\\nJust wondering if there is a way to integrate the ECL reference & other help files with Eclipse ?\\n\\nRegards\\nVarad\", \"post_time\": \"2016-07-07 18:58:22\" },\n\t{ \"post_id\": 11143, \"topic_id\": 2753, \"forum_id\": 33, \"post_subject\": \"Re: Eclipse plugin with GIT repository\", \"username\": \"gsmith\", \"post_text\": \"If you add the "-legacy" option to the "common" section, it should resolve this (I would suggest you then go through an update your sources to the new $. syntax so you can remove the legacy option.\", \"post_time\": \"2016-09-08 19:30:34\" },\n\t{ \"post_id\": 11103, \"topic_id\": 2753, \"forum_id\": 33, \"post_subject\": \"Eclipse plugin with GIT repository\", \"username\": \"clanderson\", \"post_text\": \"I am in the process of migrating our source code out of MySQL to a git repository. This work is complete with a repository hosted within our LN gitlab site and I have created a local repository connected to this remote repository. I have been able to transfer all the code out of MySQL using mysql2git and populated the repository.\\n\\nI then opened up eclipse and opened the git repository where the attributes (code) is stored and created a ECL Project (convert to ECL project) - the compiler takes off checking syntax on all the files (there is a lot) - but in the end I have lots and lots of syntax errors and they all appear to be related to import statements not being found. For example - not able to locate and use thorlib. I see the file within the project folder but it is not resolved. My guess is my compiler on my windows machine is not configured correctly but I cannot find any information on this.\\n\\nAs a simple example I created a hello_world.ecl file with a simple assignment from the thorlib attribute to show the syntax error.\\n\\n[attachment=1:2l8xer7g]9-7-2016 12-59-41 PM.png\\n\\nNeed some guidance on my compiler setup perhaps?\\n[attachment=0:2l8xer7g]9-7-2016 1-55-27 PM.png\", \"post_time\": \"2016-09-07 17:57:52\" },\n\t{ \"post_id\": 11183, \"topic_id\": 2783, \"forum_id\": 33, \"post_subject\": \"Failed to create Web Browser ... Ubuntu 16.05 LTS\", \"username\": \"BrianB644\", \"post_text\": \"I was trying to run the Eclipse IDE under Ubuntu … My cluster ran and I could execute jobs … and could review them in the Firefox browser via the ESP … but couldn't see much of anything in the IDE.\\n \\nAny page with web-like content said ... “Warning: Failed to create Web Browser, see Error Log for further details."\\n \\nMy HPCC server version was 6.0.4-1 on Ubuntu 16.04 LTS ... I was running Eclipse Neon.\\n\\nThe Eclipse ... Window | Show View | Other | Internal Web Browser ... dialogue contained messages including ...\\n\\n"Embedded browser not available"\\n\\n"The embedded browser widget for this editor cannot be created. It is either not available for your operating system or the system needs to be configured in order to support embedded browser."\\n\\n"...=mozilla are not supported with GTK3 as XULRunner is not ported for GTK 3 yet ..."\\n\\nWith help, I found the solution to this problem was to install ... libwebkitgtk-3.00 ... and the problems appeared to be solved.\\n\\nHTH\", \"post_time\": \"2016-09-13 14:52:05\" },\n\t{ \"post_id\": 20493, \"topic_id\": 2923, \"forum_id\": 33, \"post_subject\": \"Re: C++ "expected unqualified-id before string constant" err\", \"username\": \"oscar.foley\", \"post_text\": \"Thanks a lot.\\n\\nI had a very similar problem with 120 characters limit and HPCC 5.4.2\\nBTW, I "think" this limit is fixed on HPCC 6.4.2\", \"post_time\": \"2018-01-22 17:48:16\" },\n\t{ \"post_id\": 11673, \"topic_id\": 2923, \"forum_id\": 33, \"post_subject\": \"C++ "expected unqualified-id before string constant" error\", \"username\": \"kpolicano\", \"post_text\": \"I'm curious what could be causing workunits with inline C++ to fail.\\n\\nHere's a very simple script, which completes successfully when submitted through the ECL Playground and the ECL IDE, but fails when submitted from Eclipse:\\nINTEGER4 add(INTEGER4 x, INTEGER4 y) := BEGINC++\\n\\t#option pure\\n\\t#body\\n\\treturn x + y;\\nENDC++;\\n\\nOUTPUT(add(10,20));
\\n\\nAnd the ECLCC log:\\n--------- end compiler output -----------\\ng++: error: W20160930-140246_1.o: No such file or directory\\nBWR_Test:2:3: error: expected unqualified-id before string constant\\n---------- compiler output --------------
\\n\\nEDIT:\\nIt turns out this was just a symptom of a different bug, and is not related to the Eclipse plugin.\\n\\nECL files that contain inline C++ will fail to compile if the path to the ECL file is more than 120 characters.\", \"post_time\": \"2016-09-30 14:31:15\" },\n\t{ \"post_id\": 18613, \"topic_id\": 4653, \"forum_id\": 33, \"post_subject\": \"Re: Unable to find ECLCC Compiler\", \"username\": \"Rush\", \"post_text\": \"Jim, \\n\\nThanks for the help. I was able to successfully download the\", \"post_time\": \"2017-08-22 20:04:23\" },\n\t{ \"post_id\": 18603, \"topic_id\": 4653, \"forum_id\": 33, \"post_subject\": \"Re: Unable to find ECLCC Compiler\", \"username\": \"Rush\", \"post_text\": \"Jim, \\n\\nThanks for the help. I was able to successfully download the client tools for Mac.\", \"post_time\": \"2017-08-22 20:04:04\" },\n\t{ \"post_id\": 18593, \"topic_id\": 4653, \"forum_id\": 33, \"post_subject\": \"Re: Unable to find ECLCC Compiler\", \"username\": \"JimD\", \"post_text\": \"We do support Client Tools for Mac OsX, but somehow the download page did not offer this as a download option. \\n\\nOur portal team has fixed the issue and the Client Tools for Mac OsX should be available now.\\n\\nHTH,\\nJim\", \"post_time\": \"2017-08-22 18:45:15\" },\n\t{ \"post_id\": 18583, \"topic_id\": 4653, \"forum_id\": 33, \"post_subject\": \"Re: Unable to find ECLCC Compiler\", \"username\": \"Rush\", \"post_text\": \"rtylaor, \\n\\nThanks for your response. So, you are suggestion to download the Windows Client Tool file which downloads a .exe file. I don't think exe file is executable on a Mac OS.\", \"post_time\": \"2017-08-22 16:07:18\" },\n\t{ \"post_id\": 18573, \"topic_id\": 4653, \"forum_id\": 33, \"post_subject\": \"Re: Unable to find ECLCC Compiler\", \"username\": \"rtaylor\", \"post_text\": \"Rush,\\n\\nYou'll need to use the Windows client tools. The only downloads for Mac OSX are the 32 and 64-bit VM immages.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-08-22 13:00:51\" },\n\t{ \"post_id\": 18563, \"topic_id\": 4653, \"forum_id\": 33, \"post_subject\": \"Unable to find ECLCC Compiler\", \"username\": \"Rush\", \"post_text\": \"I am not able to find Client tools for Mac OS in Version 6.4. Can anyone help if there is one for Mac?\", \"post_time\": \"2017-08-21 01:17:45\" },\n\t{ \"post_id\": 30033, \"topic_id\": 7743, \"forum_id\": 33, \"post_subject\": \"Re: Setting VS Code\", \"username\": \"newportm\", \"post_text\": \"I am able to run using VS code with no problems. Can you provide configurations at user level and workspace level?\", \"post_time\": \"2020-04-08 22:27:00\" },\n\t{ \"post_id\": 29563, \"topic_id\": 7743, \"forum_id\": 33, \"post_subject\": \"Setting VS Code\", \"username\": \"vedant_dulori\", \"post_text\": \"Hi,\\n\\nI trying to configure ECL on VS Code and I did follow the instructions present in the following link but I am unable to figure where am I going wrong. It would be great if anyone can suggest a different method to configure ECL on VS code\\n\\nhttps://marketplace.visualstudio.com/it ... ystems.ecl\\n\\nAlso, I trying to be able to run any ECL code from vs code and then on to the cluster (private IP). It would be great if anyone can suggest with the fact that I am using a proxy to access the cluster IP?\", \"post_time\": \"2020-02-14 18:52:26\" },\n\t{ \"post_id\": 29853, \"topic_id\": 7843, \"forum_id\": 33, \"post_subject\": \"Eclipse ECL Plugin\", \"username\": \"vedant_dulori\", \"post_text\": \"Hi,\\n\\nI have setup ECL environment using Eclipse ECL Plugin on MacOS Mojave and have been able to run my code and see results on my cluster. However, when I try to open the workunit results on Eclipse it just shows an empty white window (PFA image). This has happened earlier too and was able to see results by restarting eclipse but I am unable to do see any now. It would be great if anyone can help me solve this issue?\\n\\nThanks!\", \"post_time\": \"2020-03-22 02:47:36\" },\n\t{ \"post_id\": 30083, \"topic_id\": 7943, \"forum_id\": 33, \"post_subject\": \"Re: ECL Debug Log / Track submitted wuid from VS Code\", \"username\": \"gsmith\", \"post_text\": \"You can find it in the "Output" view under "ECL". \\n\\nSee attached screenshot...\", \"post_time\": \"2020-04-09 14:29:23\" },\n\t{ \"post_id\": 30043, \"topic_id\": 7943, \"forum_id\": 33, \"post_subject\": \"ECL Debug Log / Track submitted wuid from VS Code\", \"username\": \"newportm\", \"post_text\": \"Does anyone know where the "ecl.debugLogging" log is stored by VS Code? I was thinking it was under AppData/Code/Log but that's not really what I expected to see. \\n\\nIs there anything that tracks the debug console text for every execution? \\n\\nThanks in advance.\", \"post_time\": \"2020-04-08 22:31:59\" },\n\t{ \"post_id\": 30203, \"topic_id\": 8023, \"forum_id\": 33, \"post_subject\": \"Re: VS Code Hints and Questions\", \"username\": \"newportm\", \"post_text\": \"I'm sure there are more but here are a few things I use in VS code to make my life a bit easier. \\nWarnings/Hints: \\n•\\tDon’t try to name two folders the same under the same directory structure. So like I have an Prod folder and a Dev folder all with the same ECL repos but different environments. You can’t use both folders in the workspace. It will not run. So create a new workspace and place the workspace one level higher than the ECL Repo folder.\\n \\n•\\tYou may need to add imports inside of your bwr and macro definitions to syntax check or run code. ECL_IDE did not force this. I had the Import before the macro and that does not work in VS code. \\n \\n•\\tAs you see above you can do code compares in VS code. Live. A nice touch. Also it adds come merge choices to take current or take previous. \\n\\n•\\tWelcome Timeline View! You can now see the history of GIT changes for an attribute in the VS code IDE. From the Explorer Menu >> [Your Workspace Name] >> Select Attribute >> Right Click "Open Timeline" >> Look at commit history.\\n\\n•\\tVs Code has source control built into the UI so that’s handy. .gitignore is important to have setup! so you don't end up adding logs and other settings to the ecl repo.\\n\\n•\\tIn the code repo. I like to copy the file name and use in my code. This plugin will allow that. \\nhttps://marketplace.visualstudio.com/items?itemName=jack89ita.copy-filename \\n\\n•\\tIn the explore you can start typing and do a repo search for the attribute names.\\n\\n•\\tAdd a plugin to auto format your ecl to clean up extra white spaces at the end of each line. Remember to change your settings to format after save or set up a keyboard shortcut. \\n "editor.formatOnSave": true,
\\nhttps://marketplace.visualstudio.com/items?itemName=eetami.whitespace-formatter\\n \\n•\\tShift F1 will now open the online ECL reference. Remember to add hpccsystems to your trusted websites so it will auto open the link. This does not work 100%. IT will take you to the site but you may need to do a search. Try INDEX from VSCODE. Sad Day. \\n\\n•\\tLove control D to duplicate line. You will want this plugin as well. \\nhttps://marketplace.visualstudio.com/items?itemName=geeebe.duplicate\\n\\n•\\tWhen wanting to run multiple wuid at once you will have to stop the IDE from monitoring the last one you submitted. Unless you’re in a separate workspace. Stopping just stop monitoring, it does not affect your WU.\\n \\n•\\tThe output tab is for if you have debug ecl turned on. Change the dropdown to "ECL" to see the correct log.\\n\\n•\\tAlt + arrow (left , right) key will toggle you back and forth between tab you just left in order visited\\n\\n• If you set up the global settings first then they will import into your workspace. Keybinding is the keyboard shortcuts. I got used to Ctrl Q and Ctrl + Shift + Q for comments so I set them up in VS code to do the same thing. \\n\\n• F12 will open the highlighted code but it replaces the tab if your in the same folder. I have not yet can’t figure out how to go back from an F12 if it changes the tab. So you have to reopen the file. Alt F12 just opens up a peek window the can be closed. Much better for quick glance at something.\\n\\n• You can block highlight by holding down the middle mouse button.\\n\\n•\\tChange your Terminal to gitbash so you can use aliases. In settings:\\n"terminal.integrated.shell.windows": "C:\\\\\\\\Program Files\\\\\\\\Git\\\\\\\\bin\\\\\\\\bash.exe",\\n\\n•\\tWant to keep your regex up to par and your code pretty. This plugin allows you to line up selected text by any regex statement of highlighted text. There are some others out there that do automatic but those are more for java or python. Feel free to create one for ECL\\nhttps://marketplace.visualstudio.com/items?itemName=janjoerke.align-by-regex\\n\\nHave a complicated string you use a lot or always forget the escape chars add this to your user or global settings and just type the word into the command prompt. Don’t forget to set up a key shortcut I like CTRL + =
\\n \\n"align.by.regex.templates": {\\n "comma": ",", //lines up all commas by occurrence\\n "first comma": "^[^,]+", //align the first comma only\\n "comma word": "(.*?),", //first char after first comma\\n "last comma": "(,)[^,]*$",\\n // assigment operator not after a function, macro, module, record, transform definition\\n "assignment": ":=(?<!((FUNCTION|[Ff]unction|MACRO|[Mm]acro|[Mm]odule|MODULE|RECORD|[Rr]ecord|[Tt]ransfrom|TRANSFORM)(\\\\\\\\s*):=))",\\n "comment": "^[^//]+", //first comment string\\n "colon": "^([^:]|(::))+", //first solo colon not :: in our file names\\n }
\", \"post_time\": \"2020-04-16 14:28:52\" },\n\t{ \"post_id\": 30193, \"topic_id\": 8023, \"forum_id\": 33, \"post_subject\": \"VS Code Hints and Questions\", \"username\": \"newportm\", \"post_text\": \"Because I am somewhat greedy ... Let's start with the questions. \\n\\n1. Is there a way just to submit a compile, such that it generates a wuid from VS code without running the code. I use this to test macros as syntax is not great at it. Shhh don't tell Richard I'm using macros. \\n\\n2. intellisense for ECL is much better in VS code than it was in the ide. However, am I missing a setting to capture local attribute definitions? From modules I can grab exported and shared attributes from a module but local or recently used attributes. \\n
"editor.suggest.localityBonus": true,\\n "editor.suggestSelection": "recentlyUsed",\\n "editor.wordBasedSuggestions": true,\\n "editor.parameterHints.enabled": true,
\\n\\n3. Has anyone found an extension that can format key-words and such automatically? IMPORT, EXPORT, END, TRANSFORM , FUNCTION etc.\\n\\n4. "files.defaultLanguage": "ecl", does not change the default file type to .ecl when I go to save a new file. Is this a bug or am I missing something? \\n\\nTim Newport, Sr Data Eng. LexisNexis Risk\", \"post_time\": \"2020-04-16 14:28:22\" },\n\t{ \"post_id\": 31833, \"topic_id\": 8333, \"forum_id\": 33, \"post_subject\": \"Re: Broken http://eclipse.hpccsystems.com/stable\", \"username\": \"tlhumphrey2\", \"post_text\": \"There is link: https://hpccsystems.com/download/third-party-integrations/eclipse-ide\", \"post_time\": \"2020-08-17 13:40:56\" },\n\t{ \"post_id\": 31823, \"topic_id\": 8333, \"forum_id\": 33, \"post_subject\": \"Re: Broken http://eclipse.hpccsystems.com/stable\", \"username\": \"gsmith\", \"post_text\": \"FWIW the eclipse plugin has been deprecated in favor of the VSCode ECL Extension (https://marketplace.visualstudio.com/it ... ystems.ecl).\\n\\nWe recently switched CDN provider and didn't renew the eclipse domain prefix.\\n\\nI believe the following URL should be equivalent - https://cdn.hpccsystems.com/cdn/install/eclipse/stable\", \"post_time\": \"2020-08-17 12:29:22\" },\n\t{ \"post_id\": 31803, \"topic_id\": 8333, \"forum_id\": 33, \"post_subject\": \"Broken http://eclipse.hpccsystems.com/stable\", \"username\": \"emarquez\", \"post_text\": \"I know that the Eclipse IDE for HPCC Systems is no longer actively supported but I'm wondering if I could still get access to it. Currently, the link http://eclipse.hpccsystems.com/stable is broken. I just need it because I had to re-install Eclipse. \\n\\nThanks!\", \"post_time\": \"2020-08-15 03:24:52\" },\n\t{ \"post_id\": 32703, \"topic_id\": 8533, \"forum_id\": 33, \"post_subject\": \"Re: VS Code Setup - Improved Extension\", \"username\": \"gsmith\", \"post_text\": \"FYI I just pushed a fix to the VS Code Extension Marketplace (v2.8.0)\", \"post_time\": \"2020-12-15 13:51:53\" },\n\t{ \"post_id\": 32693, \"topic_id\": 8533, \"forum_id\": 33, \"post_subject\": \"Re: VS Code Setup - Improved Extension\", \"username\": \"gsmith\", \"post_text\": \"No its not expected.\\n\\nFWIW The result page you see in VSCode is hosted in a different process to VSCode itself, so I pass in all the "settings" as params and I bet I am bypassing the cached pw in memory and pulling from the launch config (which is empty).\\n\\nWell spotted - will try and get a fix out ASAP.\", \"post_time\": \"2020-12-15 13:10:03\" },\n\t{ \"post_id\": 32663, \"topic_id\": 8533, \"forum_id\": 33, \"post_subject\": \"Re: VS Code Setup - Improved Extension\", \"username\": \"flyer\", \"post_text\": \"Hi gmith,\\n\\nI was not able to reproduce the issue using the target you provided and I think I figured out why. \\n\\nCurrently, I have two configurations for target servers: Dataland/Production. For security purposes, I chose to not have my password in the launch.json file. I left it empty so that every time I logged in VSCode and wanted to submit a workunit, I would be prompted to enter the password (See attachment). However, I learned that if I do that, the server does not appear to send a response back. If I keep the password in the launch.json file, I no longer see the issue with the result viewer. I am able to see results.\\n\\nIs this behavior expected? \\n\\nThanks.\", \"post_time\": \"2020-12-14 13:54:19\" },\n\t{ \"post_id\": 32653, \"topic_id\": 8533, \"forum_id\": 33, \"post_subject\": \"Re: VS Code Setup - Improved Extension\", \"username\": \"gsmith\", \"post_text\": \"Thanks - What server version are you targeting?\\n\\nCan you reproduce when you target: https://play.hpccsystems.com:18010/\", \"post_time\": \"2020-12-14 11:33:29\" },\n\t{ \"post_id\": 32643, \"topic_id\": 8533, \"forum_id\": 33, \"post_subject\": \"VS Code Setup - Improved Extension\", \"username\": \"flyer\", \"post_text\": \"Hi,\\n\\nI just installed VS Code along with the ECL extension, but it does not look like the result viewer feature that was added recently is working for me. I am able to submit a workunit to a specific target cluster, and I am able to see the results out on the ECL Watch page. However, I am unable to see the results within VS Code. Please see attachments. \\n\\nCurious about whether there is some special setting that I am supposed to set in order to get that feature to be active. \\n\\nAny help would be appreciated.\", \"post_time\": \"2020-12-11 14:10:47\" },\n\t{ \"post_id\": 33143, \"topic_id\": 8613, \"forum_id\": 33, \"post_subject\": \"Re: ECL Alias in VS Code\", \"username\": \"flyer\", \"post_text\": \"This makes sense to me. Thank you so much!\", \"post_time\": \"2021-02-26 14:03:58\" },\n\t{ \"post_id\": 33113, \"topic_id\": 8613, \"forum_id\": 33, \"post_subject\": \"Re: ECL Alias in VS Code\", \"username\": \"hwatanuki\", \"post_text\": \"Thanks for the additional information, flyer. \\n\\nI think I was now able to reproduce the behavior and it looks like it is just related to how IMPORT statements work with workspaces in VSCode. \\n\\nWhen you add the myProject folder to the workspace in VSCode it seems to be equivalent of actually making it the root folder of your ECL repository, thus the IMPORT statements for the ecl code contained inside the myProject folder should start with the direct reference to the modules/subfolders within myProject (such as: "IMPORT services"). So in your case, the myService.ecl could look like this with an alias:\\n\\nIMPORT services AS X;\\n\\nEXPORT myService := FUNCTION\\n inMessage := X.modConstants.CONSTANT1;\\n RETURN inMessage;\\nEND;\\n
\\n\\nInstead, if you really want to use IMPORT statements that start with references to the myProject folder (such as: "IMPORT myProject.services"), then you would need to select the myProject´s parent folder (if there is one) when you create your workspace in VSCode. I have simulated this alternative option here and the "Go to Definition" feature (F12) worked fine for the CONSTANTS1 definition.\\n\\nHTH,\\nHugoW\", \"post_time\": \"2021-02-25 04:24:44\" },\n\t{ \"post_id\": 33093, \"topic_id\": 8613, \"forum_id\": 33, \"post_subject\": \"Re: ECL Alias in VS Code\", \"username\": \"flyer\", \"post_text\": \"I included two test constants to showcase what I am experiencing. \\n\\n\\nEXPORT modConstants := MODULE\\n EXPORT CONSTANT1 := 'TEST CONSTANT1';\\n EXPORT CONSTANT2 := 'TEST CONSTANT2';\\nEND;\\n
\\n\\nLet me know if this helps!\", \"post_time\": \"2021-02-24 16:24:02\" },\n\t{ \"post_id\": 33073, \"topic_id\": 8613, \"forum_id\": 33, \"post_subject\": \"Re: ECL Alias in VS Code\", \"username\": \"hwatanuki\", \"post_text\": \"Hello Flyer, \\n\\nIn my previous test above I was already using an exported definition inside a MODULE. Just to make sure we are on the same page, could you please share the content of your modConstants.ecl file?\\n\\nThanks,\\nHugoW\", \"post_time\": \"2021-02-24 14:25:20\" },\n\t{ \"post_id\": 33063, \"topic_id\": 8613, \"forum_id\": 33, \"post_subject\": \"Re: ECL Alias in VS Code\", \"username\": \"flyer\", \"post_text\": \"Hi,\\n\\nYes. I am currently using version 2.10.0 of the ECL extension. I just notice that aliases do work for some things such as the first example that I provided - apologies for not providing an example that would actually fail (grins). However, it doesn't seem to work with more deeply nested folder structures when referencing an export definition from a module? ([color=#800080:26avmvur]MODULE)\\n\\nNot very creative but I have attached something I just tried (see attachments). \\n\\nRundown of what I did:\\nTESTPROJECT - test workspace\\n MyProject - main project folder\\n common - common functions and other usable pieces of software\\n services - folder where services and service related functions would go\\n layouts - folder where several different layouts would go\\n myLayout - sample layout (alias worked on this)\\n myService - dummy service returns a message\\n modConstants - MODULE that contains several constant definitions\\n\\nWhen I tried to F12 on the constant defined in modConstants, VSCode says: no definition found for CONSTANT1. So it seems that the issue can be observed when trying to F12 on export definitions contained in [color=#800080:26avmvur]MODULEs.\\n\\nLet me know if you can reproduce the issue on your side. I am not sure if this behavior is expected, a bug, or a potential improvement that could be worked through. \\n\\nThanks!\", \"post_time\": \"2021-02-24 03:09:02\" },\n\t{ \"post_id\": 33053, \"topic_id\": 8613, \"forum_id\": 33, \"post_subject\": \"Re: ECL Alias in VS Code\", \"username\": \"hwatanuki\", \"post_text\": \"Hello flyer,\\n\\nI have just tried a similar alias on VSCode with v2.10.0 of the ECL extension and it seemed to work. Can you please confirm if this is the same version you are using so I can try to reproduce the issue?\\n\\nThanks\\nHugoW\", \"post_time\": \"2021-02-23 23:37:29\" },\n\t{ \"post_id\": 33043, \"topic_id\": 8613, \"forum_id\": 33, \"post_subject\": \"ECL Alias in VS Code\", \"username\": \"flyer\", \"post_text\": \"I recently noticed that F12 does not seem to work on aliases (e.g. import statements) as shown in the example below: \\n\\n\\nIMPORT MainFolder.SubFolder.AllLayouts AS Layouts;\\n\\ninData := DATASET([], Layouts.outputLayout);\\n
\\n\\nIt appears that by F12'ing on outputLayout doesn't take us to the MainFolder.SubFolder1.AllLayouts (outputlayout) object. Is this an available feature? If not, would it be possible to add it?\\n\\nThanks in advance.\", \"post_time\": \"2021-02-23 23:12:55\" },\n\t{ \"post_id\": 33173, \"topic_id\": 8623, \"forum_id\": 33, \"post_subject\": \"Re: Load ECL.mod file in VS Code\", \"username\": \"cliffjackie369\", \"post_text\": \"[quote="Eric Graves":16kz8d8g]For those interested in this topic, an issue has been created in the extension repository to add support for .mod files in VS Code:\\nhttps://github.com/hpcc-systems/employee monitoring/vscode-ecl/issues/233\\n\\nGreat thanks for sharing!\", \"post_time\": \"2021-03-18 18:02:14\" },\n\t{ \"post_id\": 33163, \"topic_id\": 8623, \"forum_id\": 33, \"post_subject\": \"Re: Load ECL.mod file in VS Code\", \"username\": \"cliffjackie369\", \"post_text\": \"[quote="Eric Graves":mz655hsf]For those interested in this topic, an issue has been created in the extension repository to add support for .mod files in VS Code:\\n\\nhttps://github.com/hpcc-systems/employee monitoring/vscode-ecl/issues/233\\n\\nGreat thanks for sharing!!\", \"post_time\": \"2021-03-18 14:55:26\" },\n\t{ \"post_id\": 33133, \"topic_id\": 8623, \"forum_id\": 33, \"post_subject\": \"Re: Load ECL.mod file in VS Code\", \"username\": \"gsmith\", \"post_text\": \"There is no way to do that currently, for others watching this is the open ticket: https://github.com/hpcc-systems/vscode-ecl/issues/233\", \"post_time\": \"2021-02-25 13:49:56\" },\n\t{ \"post_id\": 33123, \"topic_id\": 8623, \"forum_id\": 33, \"post_subject\": \"Re: Load ECL.mod file in VS Code\", \"username\": \"Eric Graves\", \"post_text\": \"For those interested in this topic, an issue has been created in the extension repository to add support for .mod files in VS Code:\\n\\nhttps://github.com/hpcc-systems/vscode-ecl/issues/233\", \"post_time\": \"2021-02-25 13:48:25\" },\n\t{ \"post_id\": 33083, \"topic_id\": 8623, \"forum_id\": 33, \"post_subject\": \"Load ECL.mod file in VS Code\", \"username\": \"Eric Graves\", \"post_text\": \"In the ECL IDE, I can load a .mod file and it will create a directory and all of the files as directed. How can I do this in VS Code?\\n\\nMy .mod file has special comments that look like this:\\n\\n//Import:rvs2005_0_1.Append_RVS2005_0_1_ModelLayouts\\n... ECL Code\\n//Import:rvs2005_0_1.RVS2005_0_1_OVERALL_Tree1\\n... etc..
\\n\\nWhen I open this file in the ECL IDE, this dialog pops up:\\n\\n\\n\\nHow can I do the same think in VS Code?\", \"post_time\": \"2021-02-24 15:19:02\" },\n\t{ \"post_id\": 34533, \"topic_id\": 9143, \"forum_id\": 33, \"post_subject\": \"ECL code formatter\", \"username\": \"Nathaniel\", \"post_text\": \"https://github.com/nathanielgraham/ecltidy\\n\\nI couldn't find a VSCode extension for formatting ECL code, so I decided to write one myself. ecltidy is a perl script that reads ECL file from STDIN and outputs the results to STDOUT. \\n\\nIt's very basic and doesn't offer many features, but it does integrate nicely into VSCode. See https://github.com/nathanielgraham/ecltidy for install instructions. \\n\\nCheers, Nathaniel\", \"post_time\": \"2021-12-27 20:29:10\" },\n\t{ \"post_id\": 2234, \"topic_id\": 494, \"forum_id\": 34, \"post_subject\": \"HPCC JDBC Driver\", \"username\": \"HPCC Staff\", \"post_text\": \"Now available! The HPCC JDBC Driver provides SQL-based access to HPCC Systems data files and published queries which allows developers to connect to the HPCC Systems platform through a JDBC client.\\n\\nDocumentation and Download information:\\nhttp://hpccsystems.com/products-and-ser ... DBC-Driver\", \"post_time\": \"2012-08-16 21:14:25\" },\n\t{ \"post_id\": 2620, \"topic_id\": 573, \"forum_id\": 34, \"post_subject\": \"Re: ODBC Driver\", \"username\": \"rodrigo.pastrana@lexisnexis.com\", \"post_text\": \"Durai, there are third-party ODBC-JDBC bridge/gateways available which you could use in conjunction with the HPCC JDBC driver to gain access to HPCC via ODBC. Thanks.\", \"post_time\": \"2012-10-25 15:18:29\" },\n\t{ \"post_id\": 2612, \"topic_id\": 573, \"forum_id\": 34, \"post_subject\": \"Re: ODBC Driver\", \"username\": \"rtaylor\", \"post_text\": \"Durai,\\n\\nWe do not currently have an ODBC driver, but we do have a JDBC driver available here: http://hpccsystems.com/products-and-services/products/plugins/JDBC-Driver\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2012-10-24 15:42:00\" },\n\t{ \"post_id\": 2611, \"topic_id\": 573, \"forum_id\": 34, \"post_subject\": \"ODBC Driver\", \"username\": \"Durai\", \"post_text\": \"Hi, \\n\\nIs there a ODBC driver for Roxie to connect and query data. Do you have it in roadmap for future release (open source license/or enterprise license?\\n\\nany information will be of great help!\\n\\nThanks and Regards\\nDurai\", \"post_time\": \"2012-10-24 15:11:45\" },\n\t{ \"post_id\": 3350, \"topic_id\": 719, \"forum_id\": 34, \"post_subject\": \"Re: Record definition for a sprayed file\", \"username\": \"kaliyugantagonist\", \"post_text\": \"Hello everyone,\\n\\nThanks for the inputs - they helped a lot !\\nPlease treat this question as closed.\\n\\nThanks and regards !\", \"post_time\": \"2013-02-06 09:26:07\" },\n\t{ \"post_id\": 3233, \"topic_id\": 719, \"forum_id\": 34, \"post_subject\": \"Re: Record definition for a sprayed file\", \"username\": \"rodrigo.pastrana@lexisnexis.com\", \"post_text\": \"Kaliyugantagonist, as Richard pointed out, adding a record definition to a file involves defining the data structure, loading the data into an HPCC dataset, and OUTPUTing the dataset out to an HPCC file... \\n\\nsomething like this:\\n\\ntwitter_data_structure :=RECORD\\n type fieldname1;\\n type fieldname2;\\n ...\\n type fieldnameN;\\n END;\\n\\ntwitter_data = DATASET('.::adcampaign_twitter_data', twitter_data_structure, CSV);\\nOUTPUT(twitter_data, , '.::hpcc::adcampaign_twitter_data');\\n\\nnow you should be able to query '.::hpcc::adcampaign_twitter_data' using SQL through the jdbc driver. Let us know if there are any questions/concerns/comments. Thanks.\", \"post_time\": \"2013-01-29 17:06:50\" },\n\t{ \"post_id\": 3231, \"topic_id\": 719, \"forum_id\": 34, \"post_subject\": \"Re: Record definition for a sprayed file\", \"username\": \"rtaylor\", \"post_text\": \"Metadata about a dataset is maintained by the DFU and gets written when the OUTPUT action writes the file to disk. To make the sprayed file available to the JDBC driver, just define the sprayed file then write it to disk (with a new filename) with OUTPUT -- this will give the JDBC driver the metadata it needs.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-01-29 15:42:05\" },\n\t{ \"post_id\": 3227, \"topic_id\": 719, \"forum_id\": 34, \"post_subject\": \"Record definition for a sprayed file\", \"username\": \"kaliyugantagonist\", \"post_text\": \"Hi,\\n\\nI have attached a file that has the record in my csv(as csv file upload is not allowed in the forum)I have sprayed onto a HPCC cluster using ECLWatch. \\n\\nLogical Name: \\t.::adcampaign_twitter_data\\n\\nI'm using HPCC JDBC Driver to access the sprayed data. The query is :\\n\\n
String stmtsql = "select tbl.* from .::adcampaign_twitter_data tbl where User_ID='294766'";
\\n\\nThe error that I'm getting is as follows:\\n\\n2013/01/29 07:05:28.664 - HPCCStatementAttempting to process sql query: select tbl.* from .::adcampaign_twitter_data tbl where User_ID='294766'\\n2013/01/29 07:05:28.664 - INCOMING SQL: select tbl.* from .::adcampaign_twitter_data tbl where User_ID='294766'\\n2013/01/29 07:05:28.679 - Fetching file information: http://10.101.2.170:8010/WsDfu/DFUQuery?LogicalName=.::ADCAMPAIGN_TWITTER_DATA&rawxml_&filetype=Logical Files and Superfiles\\n2013/01/29 07:05:28.726 - .::adcampaign_twitter_data does not appear to contain a valid record definition.\\njava.sql.SQLException: HPCCStatement: Cannot execute SQL command\\n\\tInvalid or forbidden table found: .::ADCAMPAIGN_TWITTER_DATA\\n\\tat org.hpccsystems.jdbcdriver.HPCCStatement.executeHPCCQuery(HPCCStatement.java:110)\\n\\tat org.hpccsystems.jdbcdriver.HPCCStatement.execute(HPCCStatement.java:255)\\n\\tat org.hpccsystems.jdbcdriver.HPCCStatement.execute(HPCCStatement.java:245)\\n\\tat org.hpccsystems.jdbcdriver.tests.HPCCDriverTest.main(HPCCDriverTest.java:1355)\\n2013/01/29 07:05:28.726 - Invalid or forbidden table found: .::ADCAMPAIGN_TWITTER_DATA
\\n\\nAs per the HPCC JDBC documentation :\\n\\nNote: The HPCC JDBC driver only supports files which contain the record definition in the logical file’s\\nmetadata. Sprayed files do not contain this metadata. This metadata exists on any file or index which is\\nwritten to the HPCC Distributed File System. Sprayed data files typically undergo some processing and an\\nOUTPUT of the transformed data to disk before use, so this should not interfere with the driver’s usefulness.\\n\\nIn my case, how must I upload the file onto HPCC - I used the word upload because sprayed files don't seem to be supported by HPCC-JDBC ! Shall I use ECL Plus, DFU etc. for upload?Or something else?\\n\\nThanks and regards !\", \"post_time\": \"2013-01-29 13:51:30\" },\n\t{ \"post_id\": 3466, \"topic_id\": 744, \"forum_id\": 34, \"post_subject\": \"Re: HPCC-JDBC driver to access superfiles\", \"username\": \"rodrigo.pastrana@lexisnexis.com\", \"post_text\": \"Kaliyugantagonist, \\nDid you get a chance to try out the changes made for this issue?\\nhttps://github.com/hpcc-systems/hpcc-jdbc/issues/85\", \"post_time\": \"2013-02-15 22:12:13\" },\n\t{ \"post_id\": 3363, \"topic_id\": 744, \"forum_id\": 34, \"post_subject\": \"Re: HPCC-JDBC driver to access superfiles\", \"username\": \"rodrigo.pastrana@lexisnexis.com\", \"post_text\": \"Kaliyugantagonist, \\nyou took the right approach, this forum is great for discussing potential issues. Actual issues are then reported and tracked on github. If you open the issue, you'll be able to track it. Otherwise I can open it, and you can add yourself as a watcher. \\n\\nAnyway, HPCCJDBCUtils contains a static instance of NumberFormat format. \\nOne quick way to resolve the threading issue is to declare the object as a ThreadLocal:\\n\\n\\npublic static final ThreadLocal <NumberFormat> format =\\n new ThreadLocal <NumberFormat>()\\n{\\n @Override\\n protected NumberFormat initialValue()\\n {\\n return NumberFormat.getInstance(Locale.US);\\n }\\n};\\n
\\n\\nEvery reference to format then needs to call get()\\n\\n\\n //format.parse(str);\\n format.get().parse(str);\\n
\", \"post_time\": \"2013-02-06 17:55:34\" },\n\t{ \"post_id\": 3360, \"topic_id\": 744, \"forum_id\": 34, \"post_subject\": \"Re: HPCC-JDBC driver to access superfiles\", \"username\": \"kaliyugantagonist\", \"post_text\": \"Hi Rodrigo,\\n\\nThanks for your inputs.\\n\\nActually, the code cleaning/optimization is our next activity - right now, we are just trying things out.\\nBefore posting this issue, I went through list of open and closed issues on the GitHub link and was not able to find anything similar to what I have reported. I thought I must put the issue here first and depending upon the response may/may not report it !\\n\\nCan you specify in which class exactly is this non-thread safe usage of NumberFormat present so that I too can give a try to solve this issue?\\n\\nThanks and regards !\", \"post_time\": \"2013-02-06 17:05:50\" },\n\t{ \"post_id\": 3358, \"topic_id\": 744, \"forum_id\": 34, \"post_subject\": \"Re: HPCC-JDBC driver to access superfiles\", \"username\": \"rodrigo.pastrana@lexisnexis.com\", \"post_text\": \"kaliyugantagonist, I wasn't able to recreate the exact issue you're seeing, but the logs you provided lead me to what I think is the problem.\\n\\nBut first let me mention that using a new connection object for every query is very expensive, in practice you'd prob want to share HPCCPreparedStatement objects created from a single connection. \\n\\nAnyway it appears the problem stems from the shared use of a java.text.NumberFormat object. Turns out the NumberFormat object is not thread safe!\\n\\nIf you have github access I encourage you to report the issue there: \\nhttps://github.com/hpcc-systems/hpcc-jdbc/issues\\n\\nThanks.\", \"post_time\": \"2013-02-06 16:19:54\" },\n\t{ \"post_id\": 3351, \"topic_id\": 744, \"forum_id\": 34, \"post_subject\": \"HPCC-JDBC driver to access superfiles\", \"username\": \"kaliyugantagonist\", \"post_text\": \"Hello,\\n\\nWe have successfully implemented the following things:\\nA web application pushes business module-wise csv files at regular intervals which are added as sub-files to the corresponding, pre-created super-files
\\nThe same web application queries these super-files via the HPCC-JDBC driver. When the web application/sample java code generates only a single query, the data is fetched properly from the super-file
\\n\\n2013/02/06 01:43:15.816 - HPCC JDBC Driver registered.\\n********************************************************************\\nHPCC JDBC Test Package Usage:\\n Connection Parameters: paramname==paramvalue\\n eg. ServerAddress==192.168.124.128\\n Prepared Statement param value: "param"==paramvalue\\n eg. param=='33445'\\n\\n By default full test is executed.\\n To execute free hand sql:\\n freehandsql==<SQL STATEMENT>\\n eg. freehandsql=="select * from tablename where zip=? limit 100"\\n\\n********************************************************************\\n\\n*****ldriver : org.hpccsystems.jdbcdriver.HPCCDriver@665753\\n\\nHPCC Driver completed as expected - Verify results.\\n********************************************************************\\nHPCC JDBC Test Package Usage:\\n Connection Parameters: paramname==paramvalue\\n eg. ServerAddress==192.168.124.128\\n Prepared Statement param value: "param"==paramvalue\\n eg. param=='33445'\\n\\n By default full test is executed.\\n To execute free hand sql:\\n freehandsql==<SQL STATEMENT>\\n eg. freehandsql=="select * from tablename where zip=? limit 100"\\n\\n********************************************************************\\n\\n*****ldriver : org.hpccsystems.jdbcdriver.HPCCDriver@665753\\n\\nHPCC Driver completed as expected - Verify results.\\n********************************************************************\\nHPCC JDBC Test Package Usage:\\n Connection Parameters: paramname==paramvalue\\n eg. ServerAddress==192.168.124.128\\n Prepared Statement param value: "param"==paramvalue\\n eg. param=='33445'\\n\\n By default full test is executed.\\n To execute free hand sql:\\n freehandsql==<SQL STATEMENT>\\n eg. freehandsql=="select * from tablename where zip=? limit 100"\\n\\n********************************************************************\\n\\n*****ldriver : org.hpccsystems.jdbcdriver.HPCCDriver@665753\\n\\nHPCC Driver completed as expected - Verify results.\\n********************************************************************\\nHPCC JDBC Test Package Usage:\\n Connection Parameters: paramname==paramvalue\\n eg. ServerAddress==192.168.124.128\\n Prepared Statement param value: "param"==paramvalue\\n eg. param=='33445'\\n\\n By default full test is executed.\\n To execute free hand sql:\\n freehandsql==<SQL STATEMENT>\\n eg. freehandsql=="select * from tablename where zip=? limit 100"\\n\\n********************************************************************\\n\\n*****ldriver : org.hpccsystems.jdbcdriver.HPCCDriver@665753\\n\\nHPCC Driver completed as expected - Verify results.\\n********************************************************************\\nHPCC JDBC Test Package Usage:\\n Connection Parameters: paramname==paramvalue\\n eg. ServerAddress==192.168.124.128\\n Prepared Statement param value: "param"==paramvalue\\n eg. param=='33445'\\n\\n By default full test is executed.\\n To execute free hand sql:\\n freehandsql==<SQL STATEMENT>\\n eg. freehandsql=="select * from tablename where zip=? limit 100"\\n\\n********************************************************************\\n\\n*****ldriver : org.hpccsystems.jdbcdriver.HPCCDriver@665753\\n\\nHPCC Driver completed as expected - Verify results.\\nFor loop running for : REQ1\\n2013/02/06 01:43:15.832 - HPCCConnection jdbc url: \\n2013/02/06 01:43:15.832 - Couldn't determine log level, will log at default level: INFO\\n2013/02/06 01:43:15.832 - HPCCDriver::connect10.101.2.170\\n2013/02/06 01:43:15.847 - HPCCDatabaseMetaData ServerAddress: 10.101.2.170 TargetCluster: thor eclwatch: http://10.101.2.170:8010\\n2013/02/06 01:43:15.847 - HPCCDatabaseMetaData Fetching HPCC INFO: http://10.101.2.170:8010/WsSMC/Activity?rawxml_\\n2013/02/06 01:43:16.222 - HPCC file and published query info not pre-fetched (LazyLoad enabled)\\n2013/02/06 01:43:16.222 - HPCCDatabaseMetaData initialized\\n2013/02/06 01:43:16.222 - HPCCConnection initialized - server: 10.101.2.170\\n2013/02/06 01:43:16.222 - HPCCConnection: createStatement( )\\n2013/02/06 01:43:16.222 - HPCCStatementConstructor(conn)\\nBefore Timestamp for REQ1 is: Wed Feb 06 13:43:16 IST 2013\\n2013/02/06 01:43:16.222 - HPCCStatementAttempting to process sql query: select tbl.* from test::superfile::clientwebcrawldatabasefile tbl \\n2013/02/06 01:43:16.222 - INCOMING SQL: select tbl.* from test::superfile::clientwebcrawldatabasefile tbl \\n2013/02/06 01:43:16.238 - Fetching file information: http://10.101.2.170:8010/WsDfu/DFUQuery?LogicalName=TEST::SUPERFILE::CLIENTWEBCRAWLDATABASEFILE&rawxml_&filetype=Logical Files and Superfiles\\n2013/02/06 01:43:17.097 - Found super file: test::superfile::clientwebcrawldatabasefile\\n2013/02/06 01:43:17.097 - Fetching file information: http://10.101.2.170:8010/WsDfu/DFUQuery?LogicalName=test::subfile::clientwebcrawldata1_1344234710&rawxml_&filetype=Logical Files and Superfiles\\n2013/02/06 01:43:17.207 - Processing superfile: test::superfile::clientwebcrawldatabasefile\\n2013/02/06 01:43:17.207 - \\tUsing record definition from: test::subfile::clientwebcrawldata1_1344234710\\n2013/02/06 01:43:17.222 - NOT USING INDEX!\\n2013/02/06 01:43:17.222 - HPCC URL created: http://10.101.2.170:8010/EclDirect/RunEcl?Submit&cluster=thor\\n2013/02/06 01:43:17.222 - HPCCStatement: execute()\\n2013/02/06 01:43:17.222 - \\tAttempting to process sql query: select tbl.* from test::superfile::clientwebcrawldatabasefile tbl \\n2013/02/06 01:43:17.222 - HPCCStatement: executeQuery()\\n2013/02/06 01:43:17.222 - \\tAttempting to process sql query: select tbl.* from test::superfile::clientwebcrawldatabasefile tbl \\n2013/02/06 01:43:17.222 - Executing ECL: &eclText=\\nimport std;\\nTblDS0RecDef := RECORD varstring controller_id; varstring user_id; varstring url_link; varstring url_content; real8 url_sentiment; varstring url_date; integer8 unique_search_id; integer8 insertid; varstring statusid; varstring search_pattern; varstring word_ignored; varstring search_date; varstring detected_language; END; \\nTblDS0 := DATASET('~test::superfile::clientwebcrawldatabasefile', TblDS0RecDef,FLAT);\\nSelectStruct := RECORD\\n varstring user_id := TblDS0.user_id;\\n varstring url_date := TblDS0.url_date;\\n varstring url_link := TblDS0.url_link;\\n varstring search_date := TblDS0.search_date;\\n varstring url_content := TblDS0.url_content;\\n varstring controller_id := TblDS0.controller_id;\\n varstring statusid := TblDS0.statusid;\\n varstring search_pattern := TblDS0.search_pattern;\\n integer8 unique_search_id := TblDS0.unique_search_id;\\n varstring word_ignored := TblDS0.word_ignored;\\n real8 url_sentiment := TblDS0.url_sentiment;\\n varstring detected_language := TblDS0.detected_language;\\n integer8 insertid := TblDS0.insertid;\\nEND;\\nDSTable := TABLE( TblDS0, SelectStruct );\\nOUTPUT(CHOOSEN(DSTable,ALL),NAMED('JDBCSelectQueryResult'));\\n\\n2013/02/06 01:43:47.315 - Total elapsed http request/response time in milliseconds: 30093\\n2013/02/06 01:43:47.315 - Parsing results...\\n2013/02/06 01:43:47.487 - Results datsets found: 1\\n2013/02/06 01:43:47.487 - Finished Parsing results.\\n2013/02/06 01:43:47.487 - HPCCResultSet encapsulateDataSet\\n2013/02/06 01:43:47.519 - Results rows found: 9756\\n2013/02/06 01:43:47.800 - HPCCStatement: executeQuery(select tbl.* from test::superfile::clientwebcrawldatabasefile tbl )\\n2013/02/06 01:43:47.800 - HPCCStatementAttempting to process sql query: select tbl.* from test::superfile::clientwebcrawldatabasefile tbl \\n2013/02/06 01:43:47.800 - INCOMING SQL: select tbl.* from test::superfile::clientwebcrawldatabasefile tbl \\n2013/02/06 01:43:47.800 - NOT USING INDEX!\\n2013/02/06 01:43:47.800 - HPCC URL created: http://10.101.2.170:8010/EclDirect/RunEcl?Submit&cluster=thor\\n2013/02/06 01:43:47.800 - HPCCStatement: executeQuery()\\n2013/02/06 01:43:47.800 - \\tAttempting to process sql query: select tbl.* from test::superfile::clientwebcrawldatabasefile tbl \\n2013/02/06 01:43:47.800 - Executing ECL: &eclText=\\nimport std;\\nTblDS0RecDef := RECORD varstring controller_id; varstring user_id; varstring url_link; varstring url_content; real8 url_sentiment; varstring url_date; integer8 unique_search_id; integer8 insertid; varstring statusid; varstring search_pattern; varstring word_ignored; varstring search_date; varstring detected_language; END; \\nTblDS0 := DATASET('~test::superfile::clientwebcrawldatabasefile', TblDS0RecDef,FLAT);\\nSelectStruct := RECORD\\n varstring user_id := TblDS0.user_id;\\n varstring url_date := TblDS0.url_date;\\n varstring url_link := TblDS0.url_link;\\n varstring search_date := TblDS0.search_date;\\n varstring url_content := TblDS0.url_content;\\n varstring controller_id := TblDS0.controller_id;\\n varstring statusid := TblDS0.statusid;\\n varstring search_pattern := TblDS0.search_pattern;\\n integer8 unique_search_id := TblDS0.unique_search_id;\\n varstring word_ignored := TblDS0.word_ignored;\\n real8 url_sentiment := TblDS0.url_sentiment;\\n varstring detected_language := TblDS0.detected_language;\\n integer8 insertid := TblDS0.insertid;\\nEND;\\nDSTable := TABLE( TblDS0, SelectStruct );\\nOUTPUT(CHOOSEN(DSTable,ALL),NAMED('JDBCSelectQueryResult'));\\n\\n2013/02/06 01:44:18.315 - Total elapsed http request/response time in milliseconds: 30515\\n2013/02/06 01:44:18.315 - Parsing results...\\n2013/02/06 01:44:18.502 - Results datsets found: 1\\n2013/02/06 01:44:18.502 - Finished Parsing results.\\n2013/02/06 01:44:18.502 - HPCCResultSet encapsulateDataSet\\n2013/02/06 01:44:18.534 - Results rows found: 9756\\n**********result : 9756\\nAfter Timestamp is: Wed Feb 06 13:44:18 IST 2013
\\n\\n[color=#FF0000:1uj6cv0q]The real issue crops up when multiple threads try to access the same super-file via the same query\\n\\n2013/02/06 01:46:45.484 - HPCC JDBC Driver registered.\\n********************************************************************\\nHPCC JDBC Test Package Usage:\\n Connection Parameters: paramname==paramvalue\\n eg. ServerAddress==192.168.124.128\\n Prepared Statement param value: "param"==paramvalue\\n eg. param=='33445'\\n\\n By default full test is executed.\\n To execute free hand sql:\\n freehandsql==<SQL STATEMENT>\\n eg. freehandsql=="select * from tablename where zip=? limit 100"\\n\\n********************************************************************\\n\\n*****ldriver : org.hpccsystems.jdbcdriver.HPCCDriver@3a6727\\n\\nHPCC Driver completed as expected - Verify results.\\n********************************************************************\\nHPCC JDBC Test Package Usage:\\n Connection Parameters: paramname==paramvalue\\n eg. ServerAddress==192.168.124.128\\n Prepared Statement param value: "param"==paramvalue\\n eg. param=='33445'\\n\\n By default full test is executed.\\n To execute free hand sql:\\n freehandsql==<SQL STATEMENT>\\n eg. freehandsql=="select * from tablename where zip=? limit 100"\\n\\n********************************************************************\\n\\n*****ldriver : org.hpccsystems.jdbcdriver.HPCCDriver@3a6727\\n\\nHPCC Driver completed as expected - Verify results.\\n********************************************************************\\nHPCC JDBC Test Package Usage:\\n Connection Parameters: paramname==paramvalue\\n eg. ServerAddress==192.168.124.128\\n Prepared Statement param value: "param"==paramvalue\\n eg. param=='33445'\\n\\n By default full test is executed.\\n To execute free hand sql:\\n freehandsql==<SQL STATEMENT>\\n eg. freehandsql=="select * from tablename where zip=? limit 100"\\n\\n********************************************************************\\n\\n*****ldriver : org.hpccsystems.jdbcdriver.HPCCDriver@3a6727\\n\\nHPCC Driver completed as expected - Verify results.\\n********************************************************************\\nHPCC JDBC Test Package Usage:\\n Connection Parameters: paramname==paramvalue\\n eg. ServerAddress==192.168.124.128\\n Prepared Statement param value: "param"==paramvalue\\n eg. param=='33445'\\n\\n By default full test is executed.\\n To execute free hand sql:\\n freehandsql==<SQL STATEMENT>\\n eg. freehandsql=="select * from tablename where zip=? limit 100"\\n\\n********************************************************************\\n\\n*****ldriver : org.hpccsystems.jdbcdriver.HPCCDriver@3a6727\\n\\nHPCC Driver completed as expected - Verify results.\\n********************************************************************\\nHPCC JDBC Test Package Usage:\\n Connection Parameters: paramname==paramvalue\\n eg. ServerAddress==192.168.124.128\\n Prepared Statement param value: "param"==paramvalue\\n eg. param=='33445'\\n\\n By default full test is executed.\\n To execute free hand sql:\\n freehandsql==<SQL STATEMENT>\\n eg. freehandsql=="select * from tablename where zip=? limit 100"\\n\\n********************************************************************\\n\\n*****ldriver : org.hpccsystems.jdbcdriver.HPCCDriver@3a6727\\n\\nHPCC Driver completed as expected - Verify results.\\nFor loop running for : REQ1\\n2013/02/06 01:46:45.500 - HPCCConnection jdbc url: \\n2013/02/06 01:46:45.500 - Couldn't determine log level, will log at default level: INFO\\nFor loop running for : REQ2\\n2013/02/06 01:46:45.515 - HPCCConnection jdbc url: \\n2013/02/06 01:46:45.515 - Couldn't determine log level, will log at default level: INFO\\n2013/02/06 01:46:45.515 - HPCCDriver::connect10.101.2.170\\n2013/02/06 01:46:45.515 - HPCCDriver::connect10.101.2.170\\n2013/02/06 01:46:45.515 - HPCCDatabaseMetaData ServerAddress: 10.101.2.170 TargetCluster: thor eclwatch: http://10.101.2.170:8010\\n2013/02/06 01:46:45.515 - HPCCDatabaseMetaData ServerAddress: 10.101.2.170 TargetCluster: thor eclwatch: http://10.101.2.170:8010\\n2013/02/06 01:46:45.515 - HPCCDatabaseMetaData Fetching HPCC INFO: http://10.101.2.170:8010/WsSMC/Activity?rawxml_\\n2013/02/06 01:46:45.515 - HPCCDatabaseMetaData Fetching HPCC INFO: http://10.101.2.170:8010/WsSMC/Activity?rawxml_\\n2013/02/06 01:46:45.531 - Could not fetch HPCC info.\\n2013/02/06 01:46:45.531 - Could not fetch HPCC info.\\n2013/02/06 01:46:45.531 - Could not fetch cluster information.\\n2013/02/06 01:46:45.531 - Could not fetch cluster information.\\n2013/02/06 01:46:45.531 - Could not fetch cluster information.\\n2013/02/06 01:46:45.531 - Could not fetch cluster information.\\n2013/02/06 01:46:45.531 - HPCC file and published query info not pre-fetched (LazyLoad enabled)\\n2013/02/06 01:46:45.531 - HPCC file and published query info not pre-fetched (LazyLoad enabled)\\n2013/02/06 01:46:45.531 - Could not query HPCC metadata check server address, cluster name, wsecl, and wseclwatch configuration.\\n2013/02/06 01:46:45.531 - HPCCDatabaseMetaData initialized\\n2013/02/06 01:46:45.531 - Could not query HPCC metadata check server address, cluster name, wsecl, and wseclwatch configuration.\\n2013/02/06 01:46:45.531 - HPCCConnection initialized - server: 10.101.2.170\\n2013/02/06 01:46:45.531 - HPCCDatabaseMetaData initialized\\n2013/02/06 01:46:45.531 - HPCCConnection: createStatement( )\\n2013/02/06 01:46:45.531 - HPCCConnection initialized - server: 10.101.2.170\\n2013/02/06 01:46:45.531 - HPCCConnection: createStatement( )\\n2013/02/06 01:46:45.531 - HPCCStatementConstructor(conn)\\n2013/02/06 01:46:45.531 - HPCCStatementConstructor(conn)\\nBefore Timestamp for REQ2 is: Wed Feb 06 13:46:45 IST 2013\\nBefore Timestamp for REQ1 is: Wed Feb 06 13:46:45 IST 2013\\n2013/02/06 01:46:45.531 - HPCCStatementAttempting to process sql query: select tbl.* from test::superfile::clientwebcrawldatabasefile tbl \\n2013/02/06 01:46:45.531 - HPCCStatementAttempting to process sql query: select tbl.* from test::superfile::clientwebcrawldatabasefile tbl \\n2013/02/06 01:46:45.531 - INCOMING SQL: select tbl.* from test::superfile::clientwebcrawldatabasefile tbl \\n2013/02/06 01:46:45.531 - INCOMING SQL: select tbl.* from test::superfile::clientwebcrawldatabasefile tbl \\njava.lang.IllegalArgumentException: timeouts can't be negative\\n\\tat sun.net.www.protocol.http.HttpURLConnection.setReadTimeout(HttpURLConnection.java:2358)\\n\\tat org.hpccsystems.jdbcdriver.HPCCDatabaseMetaData.createHPCCESPConnection(HPCCDatabaseMetaData.java:2892)\\n\\tat org.hpccsystems.jdbcdriver.HPCCDatabaseMetaData.fetchHPCCFileInfo(HPCCDatabaseMetaData.java:2202)\\n\\tat org.hpccsystems.jdbcdriver.HPCCDatabaseMetaData.tableExists(HPCCDatabaseMetaData.java:2720)\\n\\tat org.hpccsystems.jdbcdriver.ECLEngine.generateSelectECL(ECLEngine.java:129)\\n\\tat org.hpccsystems.jdbcdriver.ECLEngine.generateECL(ECLEngine.java:640)\\n\\tat org.hpccsystems.jdbcdriver.HPCCStatement.processQuery(HPCCStatement.java:72)\\n\\tat org.hpccsystems.jdbcdriver.HPCCStatement.execute(HPCCStatement.java:243)\\n\\tat org.hpccsystems.jdbcdriver.HPCCDriverTest.queryHPCC(HPCCDriverTest.java:848)\\n\\tat org.hpccsystems.jdbcdriver.HPCCQueryRunnable.run(HPCCQueryRunnable.java:28)\\n\\tat java.lang.Thread.run(Thread.java:619)\\njava.lang.IllegalArgumentException: timeouts can't be negative\\n\\tat sun.net.www.protocol.http.HttpURLConnection.setReadTimeout(HttpURLConnection.java:2358)\\n\\tat org.hpccsystems.jdbcdriver.HPCCDatabaseMetaData.createHPCCESPConnection(HPCCDatabaseMetaData.java:2892)\\n\\tat org.hpccsystems.jdbcdriver.HPCCDatabaseMetaData.fetchHPCCFileInfo(HPCCDatabaseMetaData.java:2202)\\n\\tat org.hpccsystems.jdbcdriver.HPCCDatabaseMetaData.tableExists(HPCCDatabaseMetaData.java:2720)\\n\\tat org.hpccsystems.jdbcdriver.ECLEngine.generateSelectECL(ECLEngine.java:129)\\n\\tat org.hpccsystems.jdbcdriver.ECLEngine.generateECL(ECLEngine.java:640)\\n\\tat org.hpccsystems.jdbcdriver.HPCCStatement.processQuery(HPCCStatement.java:72)\\n\\tat org.hpccsystems.jdbcdriver.HPCCStatement.execute(HPCCStatement.java:243)\\n\\tat org.hpccsystems.jdbcdriver.HPCCDriverTest.queryHPCC(HPCCDriverTest.java:848)\\n\\tat org.hpccsystems.jdbcdriver.HPCCQueryRunnable.run(HPCCQueryRunnable.java:28)\\n\\tat java.lang.Thread.run(Thread.java:619)\\n2013/02/06 01:46:45.546 - Invalid or forbidden table found: TEST::SUPERFILE::CLIENTWEBCRAWLDATABASEFILE\\n2013/02/06 01:46:45.546 - Invalid or forbidden table found: TEST::SUPERFILE::CLIENTWEBCRAWLDATABASEFILE\\n2013/02/06 01:46:45.546 - HPCCStatement: execute()\\n2013/02/06 01:46:45.546 - HPCCStatement: execute()\\n2013/02/06 01:46:45.546 - \\tAttempting to process sql query: select tbl.* from test::superfile::clientwebcrawldatabasefile tbl \\n2013/02/06 01:46:45.546 - \\tAttempting to process sql query: select tbl.* from test::superfile::clientwebcrawldatabasefile tbl \\n2013/02/06 01:46:45.546 - HPCCStatement: executeQuery()\\n2013/02/06 01:46:45.546 - \\tAttempting to process sql query: select tbl.* from test::superfile::clientwebcrawldatabasefile tbl \\n2013/02/06 01:46:45.546 - HPCCStatement: executeQuery()\\n2013/02/06 01:46:45.546 - \\tAttempting to process sql query: select tbl.* from test::superfile::clientwebcrawldatabasefile tbl \\njava.sql.SQLException: HPCCStatement: Cannot execute SQL command\\n\\tInvalid or forbidden table found: TEST::SUPERFILE::CLIENTWEBCRAWLDATABASEFILE\\n\\tat org.hpccsystems.jdbcdriver.HPCCStatement.executeHPCCQuery(HPCCStatement.java:110)\\n\\tat org.hpccsystems.jdbcdriver.HPCCStatement.execute(HPCCStatement.java:255)\\n\\tat org.hpccsystems.jdbcdriver.HPCCStatement.execute(HPCCStatement.java:245)\\n\\tat org.hpccsystems.jdbcdriver.HPCCDriverTest.queryHPCC(HPCCDriverTest.java:848)\\n\\tat org.hpccsystems.jdbcdriver.HPCCQueryRunnable.run(HPCCQueryRunnable.java:28)\\n\\tat java.lang.Thread.run(Thread.java:619)\\njava.sql.SQLException: HPCCStatement: Cannot execute SQL command\\n\\tInvalid or forbidden table found: TEST::SUPERFILE::CLIENTWEBCRAWLDATABASEFILE\\n\\tat org.hpccsystems.jdbcdriver.HPCCStatement.executeHPCCQuery(HPCCStatement.java:110)\\n\\tat org.hpccsystems.jdbcdriver.HPCCStatement.execute(HPCCStatement.java:255)\\nAfter Timestamp is: Wed Feb 06 13:46:45 IST 2013\\nAfter Timestamp is: Wed Feb 06 13:46:45 IST 2013\\tat org.hpccsystems.jdbcdriver.HPCCStatement.execute(HPCCStatement.java:245)\\n\\tat org.hpccsystems.jdbcdriver.HPCCDriverTest.queryHPCC(HPCCDriverTest.java:848)\\n\\tat org.hpccsystems.jdbcdriver.HPCCQueryRunnable.run(HPCCQueryRunnable.java:28)\\n\\tat java.lang.Thread.run(Thread.java:619)
\\n\\nI have attached the following components:\\nHPCCDriverTest.txt - actually, a java file
\\nHPCCQueryRunnable.java - actually, a java file
\\nTester.java - actually, a java file
\\n\\nI did some analysis of the exception and HPCC-JDBC driver code - the primary error is at HPCCDatabaseMetaData.fetchHPCCInfo() as the first error in the log is [color=#FF0000:1uj6cv0q]"Could not fetch HPCC info."\\n\\nIn the class which I have uploaded (HPCCQueryRunnable.java), I attempted to connect http://10.101.2.170:8010/WsSMC/Activity?rawxml_ as I was suspecting that this address is not accessible. But both the threads get a response !\\n\\nThis is a core and crucial issue at our end - any inputs are welcome !\\n\\nThanks and regards !\", \"post_time\": \"2013-02-06 10:43:22\" },\n\t{ \"post_id\": 3385, \"topic_id\": 746, \"forum_id\": 34, \"post_subject\": \"Re: Connection pooling and other performance considerations\", \"username\": \"rodrigo.pastrana@lexisnexis.com\", \"post_text\": \"Hello again kaliyugantagonist, \\nIn this case, if connection pooling necessary, it is the responsibility of the caller.\\nThere's currently no plan to integrate connection pooling into the driver( but remember contributions are always welcomed).\\n\\nThe HPCCConnection can be seen as a session. It fetches and retains information needed to handle incoming SQL requests, and user configuration info. It gathers db metadata from the HPCC system via web services in an on-demand manner.\\n\\nDepending on the situation, one HPCCConnection object should be created and either HPCCStatement(s) or HPCCPreparedStatement(s) should be created in order to execute the SQL queries. Each statement object communicates with the HPCC system via WebServices in an on-demand manner.\\n\\nIf your queries tend to follow a pattern, you should set-up an HPCCPreparedStatement with parameter placeholders, re-use the object while providing the input param values at execute time. Thanks.\", \"post_time\": \"2013-02-07 18:36:23\" },\n\t{ \"post_id\": 3367, \"topic_id\": 746, \"forum_id\": 34, \"post_subject\": \"Connection pooling and other performance considerations\", \"username\": \"kaliyugantagonist\", \"post_text\": \"Hello,\\n\\nIn the web applications where a database is the back-end, the connection pooling is handled either by the hosting application server or some third-party library e.g: Apache DBCP - rarely someone writes connection pooling logic these days \\n\\nAs per my understanding, following the flow when HPCC-JDBC driver is used :\\n
An instance of HPCCConnection is created
\\nInternally, a HTTP connection is established with the ESP server
\\nBased on the SQL query, the ECL code is generated and sent over the above connection for 'direct ecl' execution
\\n\\nOne can imagine that in case of 100s of simultaneous users, the web application will generate 1000s of SQL queries. Now, the following things are crucial for an acceptable performance:\\n\\nThere is a pool of connections to the HPCC cluster so that neither a new connection is created every time nor can unlimited connections be opened to the HPCC cluster
\\nThe Statement objects must be reused and cached
\\nSharing connection objects between different requests must be done so that a limited no. of connections can serve a relatively large no. of users
\\n\\nI flipped through the HPCC-JDBC driver's source code. Is there currently a way to ensure the listed things for performance tuning?If yes, what and where are the configurations located?If no, what can be done currently to optimize the performance?\\n\\nThanks and regards !\", \"post_time\": \"2013-02-07 10:27:37\" },\n\t{ \"post_id\": 3425, \"topic_id\": 752, \"forum_id\": 34, \"post_subject\": \"Re: Query for ROXIE\", \"username\": \"rodrigo.pastrana@lexisnexis.com\", \"post_text\": \"Hi Prachi,\\nThe TargetCluster configuration entry is the name of the HPCC cluster on which to execute your "Select" queries. \\nOne quick way to find the available TargetClusters is here:\\nhttp:///<yourHPCCip>:<yourwseclwatchport>/WsSMC/Activity.\\nThe names follow the cluster types and are separated by a dash.\\n\\nThe QuerySet entry is the name of the queryset which contains the published query (stored procedure) you intend to execute.\\n\\nYou can find the list of querysets here:\\nhttp://<yourHPCCip>:<yourwseclwatchport>/WsWorkunits/WUQuerysets\\n\\nYou can also fully-qualify the published query if you'd like to override the "QuerySet" configuration value: call <yourquerysetname>::<yourpublishedqueryname(...);\\n\\nLet us know if there are any other questions. Thanks.\", \"post_time\": \"2013-02-12 15:34:47\" },\n\t{ \"post_id\": 3414, \"topic_id\": 752, \"forum_id\": 34, \"post_subject\": \"Query for ROXIE\", \"username\": \"prachi\", \"post_text\": \"Hi,\\nI have started with following steps:\\n\\nStep1:Create Superfile\\nStep2:Create one logical file with record structure and add it to Superfile as subfile\\nStep3:Create INDEX on Superfile\\nStep4:ECL code to publish file on ROXIE.Code is as follow:\\n\\nIMPORT STD;\\n\\nExport QueryToRunOnRoxie():=Function\\nSTRING100 insertid_value := '' : STORED('insertid');\\n\\nLayout_ClientWebCrawlData := RECORD\\n VARSTRING Controller_ID;\\n VARSTRING User_ID;\\n VARSTRING URL_Link;\\n VARSTRING URL_Content;\\n REAL URL_Sentiment;\\n VARSTRING URL_Date;\\n INTEGER Unique_Search_ID;\\n STRING100 InsertID;\\n VARSTRING StatusID;\\n VARSTRING Search_Pattern;\\n VARSTRING Word_Ignored;\\n VARSTRING Search_Date;\\n VARSTRING Detected_Language;\\n END;\\n\\t \\nVARSTRING Superfilename := '~roxietrail::superfile::clientwebcrawldata';\\nVARSTRING index_filename := '~roxietrail::superfile::index';\\n\\n// create dataset of superfile\\nSuperFile_Dataset := DATASET(Superfilename,{Layout_ClientWebCrawlData, \\nUNSIGNED8 fileposition {VIRTUAL(fileposition)}}, CSV);\\n\\n// create index on superfile\\nIDX_SuperFile := INDEX(SuperFile_Dataset,{InsertID,fileposition},index_filename);\\n\\n//fetch records \\nfetched_records := FETCH(SuperFile_Dataset, IDX_SuperFile(insertid=insertid_value), RIGHT.fileposition);\\n\\n//store output\\ngetOutput := OUTPUT(fetched_records);\\n\\nRETURN getOutput;\\nEND;\\n\\n
\\n\\nAbove code was published successfully!\\n\\n\\nNow when i am trying to call <published_file_name>(in my case it is \\tquerytorunonroxie) through java code error is generated as:\\n\\nCode1:\\ninfo.put("ServerAddress", "10.101.2.170");\\ninfo.put("LazyLoad", "true");\\ninfo.put("TargetCluster", "myroxie");\\ninfo.put("QuerySet", "myroxie");\\ninfo.put("WsECLWatchPort", "8010");\\ninfo.put("WsECLDirectPort", "8010");\\ninfo.put("EclResultLimit", "10");\\ninfo.put("PageSize", "");\\n\\ninfourl = "jdbc:hpcc;ServerAddress=10.101.2.170;TargetCluster=myroxie;EclResultLimit=8";\\n\\n\\nsuccess &= executeFreeHandSQL(info,"call QueryToRunOnRoxie( )",params);
\\n\\nError:(for Code1)\\n2013/02/11 04:55:55.083 - HPCCConnection jdbc url: \\n2013/02/11 04:55:55.083 - HPCCDriver::connect10.101.2.170\\n2013/02/11 04:55:55.083 - HPCCDatabaseMetaData ServerAddress: 10.101.2.170 TargetCluster: myroxie eclwatch: http://10.101.2.170:8010\\n2013/02/11 04:55:55.083 - HPCCDatabaseMetaData Fetching HPCC INFO: http://10.101.2.170:8010/WsSMC/Activity?rawxml_\\n2013/02/11 04:55:55.380 - HPCC file and published query info not pre-fetched (LazyLoad enabled)\\n2013/02/11 04:55:55.380 - Invalid cluster name found: myroxie. using: hthor\\n2013/02/11 04:55:55.380 - Invalid query set name found: myroxie. using: hthor\\n2013/02/11 04:55:55.380 - HPCCDatabaseMetaData initialized\\n2013/02/11 04:55:55.380 - HPCCConnection initialized - server: 10.101.2.170\\n2013/02/11 04:55:55.380 - HPCCConnection: prepareStatement( call QueryToRunOnRoxie( ) )\\n2013/02/11 04:55:55.380 - HPCCStatementConstructor(conn)\\n2013/02/11 04:55:55.380 - HPCCPreparedStatement Constructor: Sqlquery: call QueryToRunOnRoxie( )\\n2013/02/11 04:55:55.380 - HPCCStatementAttempting to process sql query: call QueryToRunOnRoxie( )\\n2013/02/11 04:55:55.380 - INCOMING SQL: call QueryToRunOnRoxie( )\\n2013/02/11 04:55:55.443 - Invalid Stored Procedure found, verify name and QuerySet: QueryToRunOnRoxie\\nErrr: Invalid Stored Procedure found, verify name and QuerySet: QueryToRunOnRoxie\\n
\\n\\n\\nCode2:\\ninfo.put("ServerAddress", "10.101.2.170");\\ninfo.put("LazyLoad", "true");\\ninfo.put("TargetCluster", "roxie");\\ninfo.put("QuerySet", "roxie");\\ninfo.put("WsECLWatchPort", "8010");\\ninfo.put("WsECLDirectPort", "8010");\\ninfo.put("EclResultLimit", "10");\\ninfo.put("PageSize", "");\\n\\ninfourl = "jdbc:hpcc;ServerAddress=10.101.2.170;TargetCluster=roxie;EclResultLimit=8";\\n\\n\\nsuccess &= executeFreeHandSQL(info,"call QueryToRunOnRoxie( )",params);
\\n\\nError:(for Code2)\\n2013/02/11 05:14:14.154 - HPCCConnection jdbc url: \\n2013/02/11 05:14:14.169 - HPCCDriver::connect10.101.2.170\\n2013/02/11 05:14:14.169 - HPCCDatabaseMetaData ServerAddress: 10.101.2.170 TargetCluster: roxie eclwatch: http://10.101.2.170:8010\\n2013/02/11 05:14:14.169 - HPCCDatabaseMetaData Fetching HPCC INFO: http://10.101.2.170:8010/WsSMC/Activity?rawxml_\\n2013/02/11 05:14:14.497 - HPCC file and published query info not pre-fetched (LazyLoad enabled)\\n2013/02/11 05:14:14.497 - HPCCDatabaseMetaData initialized\\n2013/02/11 05:14:14.497 - HPCCConnection initialized - server: 10.101.2.170\\n2013/02/11 05:14:14.497 - HPCCConnection: prepareStatement( call QueryToRunOnRoxie( ) )\\n2013/02/11 05:14:14.497 - HPCCStatementConstructor(conn)\\n2013/02/11 05:14:14.497 - HPCCPreparedStatement Constructor: Sqlquery: call QueryToRunOnRoxie( )\\n2013/02/11 05:14:14.497 - HPCCStatementAttempting to process sql query: call QueryToRunOnRoxie( )\\n2013/02/11 05:14:14.497 - INCOMING SQL: call QueryToRunOnRoxie( )\\n2013/02/11 05:14:14.560 - Invalid Stored Procedure found, verify name and QuerySet: QueryToRunOnRoxie\\nErrr: Invalid Stored Procedure found, verify name and QuerySet: QueryToRunOnRoxie
\\n\\n\\nI have attached screenshot of Target Clusters and Java code.\\n\\nQuestion:\\n1. Where is the problem in java code?\\n2. What name should be provided: myroxie/roxie?? Asked because when i use myroxie it says that Invalid query set name found: myroxie. using: hthor
\\n\\nThanks and Regards!!\", \"post_time\": \"2013-02-11 12:08:29\" },\n\t{ \"post_id\": 3422, \"topic_id\": 753, \"forum_id\": 34, \"post_subject\": \"Re: Queries on Roxie\", \"username\": \"bforeman\", \"post_text\": \"The issue seems to be that Roxie does not like the file name starting with “.” even though it is handled fine in hthor, and thor. We are looking into it, but try changing the filename to something else for now.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-02-12 12:59:09\" },\n\t{ \"post_id\": 3415, \"topic_id\": 753, \"forum_id\": 34, \"post_subject\": \"Queries on Roxie\", \"username\": \"kaliyugantagonist\", \"post_text\": \"Hello,\\n\\nI have sprayed a csv file via an ECL code and the logical name is .::subfile::cwcd_sub1 (the name is deceiving - it is NOT a sub/superfile but a normal logical file )\\n\\nI'm using the HPCCDriverTest.java provided along with the HPCC-JDBC driver. I have only replaced the parameters and the logical file name :\\n\\n
package org.hpccsystems.jdbcdriver;\\n\\nimport java.sql.Driver;import java.sql.DriverManager;\\nimport java.sql.DriverPropertyInfo;\\nimport java.sql.PreparedStatement;\\nimport java.sql.ResultSet;\\nimport java.sql.ResultSetMetaData;\\nimport java.sql.SQLException;\\nimport java.sql.Timestamp;\\nimport java.util.ArrayList;\\nimport java.util.Date;\\nimport java.util.List;\\nimport java.util.Properties;\\n\\nimport org.hpccsystems.jdbcdriver.HPCCConnection;\\nimport org.hpccsystems.jdbcdriver.HPCCDatabaseMetaData;\\nimport org.hpccsystems.jdbcdriver.HPCCDriver;\\nimport org.hpccsystems.jdbcdriver.HPCCJDBCUtils;\\nimport org.hpccsystems.jdbcdriver.HPCCPreparedStatement;\\nimport org.hpccsystems.jdbcdriver.HPCCResultSet;\\nimport org.hpccsystems.jdbcdriver.HPCCStatement;\\n\\npublic class HPCCDriverTest {\\n\\tstatic private HPCCDriver driver;\\n\\n\\tstatic {\\n\\t\\tdriver = new HPCCDriver();\\n\\t\\ttry {\\n\\t\\t\\tDriverPropertyInfo[] info = driver.getPropertyInfo("", null);\\n\\n\\t\\t\\t/*\\n\\t\\t\\t * System.out .println(\\n\\t\\t\\t * "-----------------Driver Properties----------------------------------"\\n\\t\\t\\t * );\\n\\t\\t\\t */\\n\\t\\t\\t/*\\n\\t\\t\\t * for (int i = 0; i < info.length; i++) System.out.println("\\\\t" +\\n\\t\\t\\t * info[i].name + ": " + info[i].description); System.out .println(\\n\\t\\t\\t * "\\\\n--------------------------------------------------------------------"\\n\\t\\t\\t * );\\n\\t\\t\\t */\\n\\t\\t} catch (SQLException e) {\\n\\t\\t\\t// TODO Auto-generated catch block\\n\\t\\t\\te.printStackTrace();\\n\\t\\t}\\n\\t}\\n\\n\\tprivate static boolean testLazyLoading(Properties conninfo) {\\n\\t\\tboolean success = true;\\n\\t\\ttry {\\n\\t\\t\\tconninfo.put("LazyLoad", "true");\\n\\t\\t\\tHPCCConnection connection = (HPCCConnection) driver.connect("",\\n\\t\\t\\t\\t\\tconninfo);\\n\\n\\t\\t\\tSystem.out\\n\\t\\t\\t\\t\\t.println("No query nor file loading should have occured yet.");\\n\\n\\t\\t\\tResultSet procs = connection.getMetaData().getProcedures(null,\\n\\t\\t\\t\\t\\tnull, null);\\n\\n\\t\\t\\tSystem.out\\n\\t\\t\\t\\t\\t.println("Queries should be cached now. No files should be cached yet.");\\n\\n\\t\\t\\tSystem.out.println("procs found: ");\\n\\t\\t\\twhile (procs.next()) {\\n\\t\\t\\t\\tSystem.out.println(" " + procs.getString("PROCEDURE_NAME"));\\n\\t\\t\\t}\\n\\n\\t\\t\\tResultSet tables = connection.getMetaData().getTables(null, null,\\n\\t\\t\\t\\t\\t"%", new String[] { "" });\\n\\n\\t\\t\\tSystem.out.println("Tables found: ");\\n\\t\\t\\twhile (tables.next()) {\\n\\t\\t\\t\\tSystem.out.println(" " + tables.getString("TABLE_NAME")\\n\\t\\t\\t\\t\\t\\t+ " Remarks: \\\\'" + tables.getString("REMARKS") + "\\\\'");\\n\\t\\t\\t}\\n\\n\\t\\t} catch (Exception e) {\\n\\t\\t\\te.printStackTrace();\\n\\t\\t\\tSystem.out.println(e.getMessage());\\n\\t\\t\\tsuccess = false;\\n\\t\\t}\\n\\t\\treturn success;\\n\\t}\\n\\n\\tprivate static boolean createStandAloneDataMetadata(Properties conninfo) {\\n\\t\\tboolean success = true;\\n\\t\\ttry {\\n\\t\\t\\tHPCCDatabaseMetaData dbmetadata = new HPCCDatabaseMetaData(conninfo);\\n\\t\\t\\tsuccess = getDatabaseInfo(dbmetadata);\\n\\t\\t} catch (Exception e) {\\n\\t\\t\\te.printStackTrace();\\n\\t\\t\\tSystem.out.println(e.getMessage());\\n\\t\\t\\tsuccess = false;\\n\\t\\t}\\n\\t\\treturn success;\\n\\t}\\n\\n\\tprivate static HPCCConnection connectViaProps(Properties conninfo) {\\n\\t\\tHPCCConnection connection = null;\\n\\t\\ttry {\\n\\t\\t\\tconnection = (HPCCConnection) driver.connect("", conninfo);\\n\\t\\t} catch (Exception e) {\\n\\t\\t}\\n\\t\\treturn connection;\\n\\t}\\n\\n\\tprivate static HPCCConnection connectViaUrl(String conninfo) {\\n\\t\\tHPCCConnection connection = null;\\n\\t\\ttry {\\n\\t\\t\\tconnection = (HPCCConnection) driver.connect(conninfo, null);\\n\\t\\t} catch (Exception e) {\\n\\t\\t}\\n\\t\\treturn connection;\\n\\t}\\n\\n\\t@SuppressWarnings("unused")\\n\\tprivate static boolean printouttable(HPCCConnection connection,\\n\\t\\t\\tString tablename) {\\n\\t\\tboolean success = true;\\n\\t\\ttry {\\n\\t\\t\\tResultSet table = connection.getMetaData().getTables(null, null,\\n\\t\\t\\t\\t\\ttablename, null);\\n\\n\\t\\t\\twhile (table.next())\\n\\t\\t\\t\\tSystem.out.println("\\\\t" + table.getString("TABLE_NAME"));\\n\\t\\t} catch (Exception e) {\\n\\t\\t\\tsuccess = false;\\n\\t\\t}\\n\\t\\treturn success;\\n\\t}\\n\\n\\t@SuppressWarnings("unused")\\n\\tprivate static boolean printoutExportedKeys(HPCCConnection connection) {\\n\\t\\tboolean success = true;\\n\\t\\ttry {\\n\\t\\t\\tResultSet keys = connection.getMetaData().getExportedKeys(null,\\n\\t\\t\\t\\t\\tnull, null);\\n\\n\\t\\t\\t// while (table.next())\\n\\t\\t\\t// System.out.println("\\\\t" + table.getString("TABLE_NAME"));\\n\\t\\t} catch (Exception e) {\\n\\t\\t\\tsuccess = false;\\n\\t\\t}\\n\\t\\treturn success;\\n\\t}\\n\\n\\t@SuppressWarnings("unused")\\n\\tprivate static boolean printouttables(HPCCConnection connection) {\\n\\t\\tboolean success = true;\\n\\t\\ttry {\\n\\t\\t\\tResultSet tables = connection.getMetaData().getTables(null, null,\\n\\t\\t\\t\\t\\t"%", null);\\n\\n\\t\\t\\tSystem.out.println("Tables found: ");\\n\\t\\t\\twhile (tables.next())\\n\\t\\t\\t\\tSystem.out.println("\\\\t" + tables.getString("TABLE_NAME"));\\n\\t\\t} catch (Exception e) {\\n\\t\\t\\tsuccess = false;\\n\\t\\t}\\n\\t\\treturn success;\\n\\t}\\n\\n\\t@SuppressWarnings("unused")\\n\\tprivate static boolean printouttablecols(HPCCConnection connection,\\n\\t\\t\\tString tablename) {\\n\\t\\tboolean success = true;\\n\\t\\ttry {\\n\\t\\t\\tResultSet tablecols = connection.getMetaData().getColumns(null,\\n\\t\\t\\t\\t\\tnull, tablename, "%");\\n\\n\\t\\t\\tSystem.out.println("Table cols found: ");\\n\\t\\t\\twhile (tablecols.next())\\n\\t\\t\\t\\tSystem.out.println("\\\\t" + tablecols.getString("TABLE_NAME")\\n\\t\\t\\t\\t\\t\\t+ "::" + tablecols.getString("COLUMN_NAME") + "( "\\n\\t\\t\\t\\t\\t\\t+ tablecols.getString("TYPE_NAME") + " )");\\n\\t\\t} catch (Exception e) {\\n\\t\\t\\tsuccess = false;\\n\\t\\t}\\n\\t\\treturn success;\\n\\t}\\n\\n\\tprivate static boolean printoutalltablescols(HPCCConnection connection) {\\n\\t\\tboolean success = true;\\n\\t\\ttry {\\n\\t\\t\\tResultSet tablecols = connection.getMetaData().getColumns(null,\\n\\t\\t\\t\\t\\tnull, "%", "%");\\n\\n\\t\\t\\tSystem.out.println("Table cols found: ");\\n\\t\\t\\twhile (tablecols.next())\\n\\t\\t\\t\\tSystem.out.println("\\\\t" + tablecols.getString("TABLE_NAME")\\n\\t\\t\\t\\t\\t\\t+ "::" + tablecols.getString("COLUMN_NAME") + "( "\\n\\t\\t\\t\\t\\t\\t+ tablecols.getString("TYPE_NAME") + " )");\\n\\t\\t} catch (Exception e) {\\n\\t\\t\\tsuccess = false;\\n\\t\\t}\\n\\t\\treturn success;\\n\\t}\\n\\n\\t@SuppressWarnings("unused")\\n\\tprivate static boolean printoutprocs(HPCCConnection connection) {\\n\\t\\tboolean success = true;\\n\\t\\ttry {\\n\\t\\t\\tResultSet procs = connection.getMetaData().getProcedures(null,\\n\\t\\t\\t\\t\\tnull, null);\\n\\n\\t\\t\\tSystem.out.println("procs found: ");\\n\\t\\t\\twhile (procs.next())\\n\\t\\t\\t\\tSystem.out.println("\\\\t" + procs.getString("PROCEDURE_NAME"));\\n\\n\\t\\t} catch (Exception e) {\\n\\t\\t\\tsuccess = false;\\n\\t\\t}\\n\\t\\treturn success;\\n\\t}\\n\\n\\tprivate static boolean printouttypeinfo(HPCCConnection connection) {\\n\\t\\tboolean success = true;\\n\\t\\ttry {\\n\\t\\t\\tResultSet types = connection.getMetaData().getTypeInfo();\\n\\n\\t\\t\\tSystem.out.println("ECL Types: ");\\n\\t\\t\\tint colcount = types.getMetaData().getColumnCount();\\n\\n\\t\\t\\tfor (int i = 1; i <= colcount; i++) {\\n\\t\\t\\t\\tSystem.out.print("[*****"\\n\\t\\t\\t\\t\\t\\t+ types.getMetaData().getColumnName(i) + "*****]");\\n\\t\\t\\t}\\n\\t\\t\\tSystem.out.println("");\\n\\n\\t\\t\\twhile (types.next()) {\\n\\t\\t\\t\\tfor (int i = 1; i <= colcount; i++) {\\n\\t\\t\\t\\t\\tSystem.out.print("[ " + types.getObject(i) + " ]");\\n\\t\\t\\t\\t}\\n\\t\\t\\t\\tSystem.out.println();\\n\\t\\t\\t}\\n\\t\\t} catch (Exception e) {\\n\\t\\t\\tsuccess = false;\\n\\t\\t}\\n\\t\\treturn success;\\n\\t}\\n\\n\\t@SuppressWarnings("unused")\\n\\tprivate static boolean printoutproccols(HPCCConnection connection) {\\n\\t\\tboolean success = true;\\n\\t\\ttry {\\n\\t\\t\\tResultSet proccols = connection.getMetaData().getProcedureColumns(\\n\\t\\t\\t\\t\\tnull, null, null, null);\\n\\n\\t\\t\\tSystem.out.println("procs cols found: ");\\n\\t\\t\\twhile (proccols.next())\\n\\t\\t\\t\\tSystem.out.println("\\\\t" + proccols.getString("PROCEDURE_NAME")\\n\\t\\t\\t\\t\\t\\t+ proccols.getString("PROCEDURE_NAME") + "::"\\n\\t\\t\\t\\t\\t\\t+ proccols.getString("COLUMN_NAME") + " ("\\n\\t\\t\\t\\t\\t\\t+ proccols.getInt("COLUMN_TYPE") + ")");\\n\\n\\t\\t} catch (Exception e) {\\n\\t\\t\\tsuccess = false;\\n\\t\\t}\\n\\t\\treturn success;\\n\\t}\\n\\n\\tprivate static PreparedStatement createPrepStatement(\\n\\t\\t\\tHPCCConnection hpccconnection, String SQL) throws Exception {\\n\\t\\tif (hpccconnection == null)\\n\\t\\t\\tthrow new Exception("Could not connect with properties object");\\n\\n\\t\\treturn hpccconnection.prepareStatement(SQL);\\n\\t}\\n\\n\\tprivate static boolean reusePrepStatement(PreparedStatement p) {\\n\\t\\tboolean success = true;\\n\\t\\ttry {\\n\\t\\t\\tHPCCResultSet qrs = (HPCCResultSet) ((HPCCPreparedStatement) p)\\n\\t\\t\\t\\t\\t.executeQuery();\\n\\n\\t\\t\\tResultSetMetaData meta = qrs.getMetaData();\\n\\t\\t\\tSystem.out.println();\\n\\n\\t\\t\\tint colcount = meta.getColumnCount();\\n\\t\\t\\tfor (int i = 1; i <= colcount; i++) {\\n\\t\\t\\t\\tSystem.out.print("[*****" + meta.getColumnName(i) + "*****]");\\n\\t\\t\\t}\\n\\t\\t\\tSystem.out.println("");\\n\\n\\t\\t\\twhile (qrs.next()) {\\n\\t\\t\\t\\tSystem.out.println();\\n\\t\\t\\t\\tfor (int i = 1; i <= colcount; i++) {\\n\\t\\t\\t\\t\\tSystem.out.print("[ " + qrs.getObject(i) + " ]");\\n\\t\\t\\t\\t}\\n\\t\\t\\t}\\n\\t\\t\\tSystem.out.println("\\\\nTotal Records found: " + qrs.getRowCount());\\n\\t\\t} catch (Exception e) {\\n\\t\\t\\tSystem.err.println(e.getMessage());\\n\\t\\t\\tsuccess = false;\\n\\t\\t}\\n\\t\\treturn success;\\n\\t}\\n\\n\\tpublic static boolean testClosePrepStatementUse(Properties conninfo) {\\n\\t\\tboolean success = true;\\n\\t\\ttry {\\n\\t\\t\\tHPCCConnection connectionprops = connectViaProps(conninfo);\\n\\t\\t\\tif (connectionprops == null)\\n\\t\\t\\t\\tthrow new Exception("Could not connect with properties object");\\n\\n\\t\\t\\tString SQL = "select tbl.* from Superfile::ClientWebCrawlDataBaseFile tbl where User_ID='294766'";\\n\\t\\t\\tHPCCPreparedStatement p = (HPCCPreparedStatement) createPrepStatement(\\n\\t\\t\\t\\t\\tconnectionprops, SQL);\\n\\n\\t\\t\\tp.close();\\n\\t\\t\\tp.execute();\\n\\t\\t} catch (Exception e) {\\n\\t\\t\\tSystem.err.println(e.getMessage());\\n\\t\\t\\tsuccess = false;\\n\\t\\t}\\n\\t\\treturn success;\\n\\t}\\n\\n\\tpublic static boolean testPrepStatementReuse(Properties conninfo) {\\n\\t\\tboolean success = true;\\n\\t\\ttry {\\n\\t\\t\\tHPCCConnection connectionprops = connectViaProps(conninfo);\\n\\t\\t\\tif (connectionprops == null)\\n\\t\\t\\t\\tthrow new Exception("Could not connect with properties object");\\n\\n\\t\\t\\tString SQL = "select tbl.* from Superfile::ClientWebCrawlDataBaseFile tbl where User_ID='294766'";\\n\\t\\t\\tHPCCPreparedStatement p = (HPCCPreparedStatement) createPrepStatement(\\n\\t\\t\\t\\t\\tconnectionprops, SQL);\\n\\n\\t\\t\\tfor (int i = 33445; i < 33448; i++) {\\n\\t\\t\\t\\tp.clearParameters();\\n\\t\\t\\t\\tp.setString(1, "'" + Integer.toString(i, 10) + "'");\\n\\t\\t\\t\\tsuccess &= reusePrepStatement(p);\\n\\t\\t\\t}\\n\\n\\t\\t\\t// what?\\n\\t\\t\\tSQL = "call myroxie::fetchpeoplebyzipservice(?)";\\n\\n\\t\\t\\tp = (HPCCPreparedStatement) createPrepStatement(connectionprops,\\n\\t\\t\\t\\t\\tSQL);\\n\\n\\t\\t\\tfor (int i = 33445; i < 33448; i++) {\\n\\t\\t\\t\\tp.clearParameters();\\n\\t\\t\\t\\tp.setString(1, Integer.toString(i, 10));\\n\\t\\t\\t\\tsuccess &= reusePrepStatement(p);\\n\\t\\t\\t}\\n\\t\\t} catch (Exception e) {\\n\\t\\t\\tSystem.err.println(e.getMessage());\\n\\t\\t\\tsuccess = false;\\n\\t\\t}\\n\\t\\treturn success;\\n\\n\\t}\\n\\n\\tpublic static boolean testPrepStatementReuseBadQuery(Properties conninfo) {\\n\\t\\tboolean success = true;\\n\\t\\ttry {\\n\\t\\t\\tHPCCConnection connectionprops = connectViaProps(conninfo);\\n\\t\\t\\tif (connectionprops == null)\\n\\t\\t\\t\\tthrow new Exception("Could not connect with properties object");\\n\\n\\t\\t\\tString SQL = "call bogusSPname()";\\n\\t\\t\\tHPCCPreparedStatement p = null;\\n\\t\\t\\ttry {\\n\\t\\t\\t\\tp = (HPCCPreparedStatement) createPrepStatement(\\n\\t\\t\\t\\t\\t\\tconnectionprops, SQL);\\n\\t\\t\\t} catch (Exception e) {\\n\\t\\t\\t\\tSystem.out.println("Ignoring expected exception: "\\n\\t\\t\\t\\t\\t\\t+ e.getLocalizedMessage());\\n\\t\\t\\t}\\n\\n\\t\\t\\tp.executeQuery();\\n\\n\\t\\t} catch (Exception e) {\\n\\t\\t\\tSystem.err.println(e.getMessage());\\n\\t\\t\\tsuccess = false;\\n\\t\\t}\\n\\t\\treturn success;\\n\\t}\\n\\n\\tprivate static void executeFreeHandSQL(Properties conninfo, String SQL,\\n\\t\\t\\tList<String> params, boolean expectPass, int minResults,\\n\\t\\t\\tString testName) {\\n\\t\\tboolean success = true;\\n\\t\\ttry {\\n\\t\\t\\tHPCCConnection connectionprops = connectViaProps(conninfo);\\n\\t\\t\\tif (connectionprops == null)\\n\\t\\t\\t\\tthrow new Exception("Could not connect with properties object");\\n\\n\\t\\t\\tPreparedStatement p = connectionprops.prepareStatement(SQL);\\n\\t\\t\\tp.clearParameters();\\n\\n\\t\\t\\tfor (int i = 0; i < params.size(); i++) {\\n\\t\\t\\t\\tp.setObject(i + 1, params.get(i));\\n\\t\\t\\t}\\n\\n\\t\\t\\tHPCCResultSet qrs = (HPCCResultSet) ((HPCCPreparedStatement) p)\\n\\t\\t\\t\\t\\t.executeQuery();\\n\\n\\t\\t\\tResultSetMetaData meta = qrs.getMetaData();\\n\\t\\t\\tSystem.out.println();\\n\\n\\t\\t\\tint colcount = meta.getColumnCount();\\n\\t\\t\\tint resultcount = qrs.getRowCount();\\n\\n\\t\\t\\tif (resultcount > 0) {\\n\\t\\t\\t\\tfor (int i = 1; i <= colcount; i++) {\\n\\t\\t\\t\\t\\tSystem.out.print("[*****" + meta.getColumnName(i)\\n\\t\\t\\t\\t\\t\\t\\t+ "*****]");\\n\\t\\t\\t\\t}\\n\\t\\t\\t\\tSystem.out.println("");\\n\\t\\t\\t\\tfor (int i = 1; i <= colcount; i++) {\\n\\t\\t\\t\\t\\tSystem.out.print("[^^^^^" + meta.getColumnLabel(i)\\n\\t\\t\\t\\t\\t\\t\\t+ "^^^^^]");\\n\\t\\t\\t\\t}\\n\\t\\t\\t\\tSystem.out.println();\\n\\t\\t\\t\\tfor (int i = 1; i <= meta.getColumnCount(); i++) {\\n\\t\\t\\t\\t\\tSystem.out.print("[+++++"\\n\\t\\t\\t\\t\\t\\t\\t+ HPCCJDBCUtils.convertSQLtype2JavaClassName(meta\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t.getColumnType(i)) + "+++++]");\\n\\t\\t\\t\\t}\\n\\n\\t\\t\\t\\twhile (qrs.next()) {\\n\\t\\t\\t\\t\\tSystem.out.println();\\n\\t\\t\\t\\t\\tfor (int i = 1; i <= colcount; i++) {\\n\\t\\t\\t\\t\\t\\tSystem.out.print("[ " + qrs.getObject(i) + " ]");\\n\\t\\t\\t\\t\\t}\\n\\t\\t\\t\\t}\\n\\t\\t\\t}\\n\\n\\t\\t\\tSystem.out.println("\\\\nTotal Records found: " + resultcount);\\n\\n\\t\\t\\tsuccess = (resultcount >= minResults);\\n\\t\\t} catch (Exception e) {\\n\\t\\t\\tSystem.err.println(e.getMessage());\\n\\t\\t\\tsuccess = false;\\n\\t\\t}\\n\\n\\t\\tif (!success && expectPass)\\n\\t\\t\\tthrow new RuntimeException(testName + " - FAILED!");\\n\\t\\telse if (success && !expectPass)\\n\\t\\t\\tthrow new RuntimeException(testName + " - UNEXPECTEDLY PASSED!");\\n\\t}\\n\\n\\tprivate static boolean testSelect1(HPCCConnection connection) {\\n\\t\\tboolean success = true;\\n\\t\\ttry {\\n\\t\\t\\tPreparedStatement p = connection\\n\\t\\t\\t\\t\\t.prepareStatement("Select 1 AS ONE");\\n\\n\\t\\t\\tHPCCResultSet qrs = (HPCCResultSet) ((HPCCPreparedStatement) p)\\n\\t\\t\\t\\t\\t.executeQuery();\\n\\n\\t\\t\\tSystem.out.println("---------Testing Select 1---------------");\\n\\n\\t\\t\\twhile (qrs.next()) {\\n\\t\\t\\t\\tif (qrs.getInt(1) != 1)\\n\\t\\t\\t\\t\\tsuccess = false;\\n\\t\\t\\t}\\n\\n\\t\\t\\tSystem.out.println("\\\\tTest Success: " + success);\\n\\t\\t} catch (Exception e) {\\n\\t\\t\\tSystem.err.println(e.getMessage());\\n\\t\\t\\tsuccess = false;\\n\\t\\t}\\n\\t\\treturn success;\\n\\t}\\n\\n\\tprivate static boolean getDatabaseInfo(HPCCConnection conn) {\\n\\t\\ttry {\\n\\t\\t\\treturn getDatabaseInfo((HPCCDatabaseMetaData) conn.getMetaData());\\n\\t\\t} catch (SQLException e) {\\n\\t\\t\\te.printStackTrace();\\n\\t\\t\\treturn false;\\n\\t\\t}\\n\\t}\\n\\n\\tprivate static boolean getDatabaseInfo(HPCCDatabaseMetaData dbmetadata) {\\n\\t\\tboolean success = true;\\n\\t\\ttry {\\n\\t\\t\\tString hpccname = dbmetadata.getDatabaseProductName();\\n\\t\\t\\tString hpccprodver = dbmetadata.getDatabaseProductVersion();\\n\\t\\t\\tint major = dbmetadata.getDatabaseMajorVersion();\\n\\t\\t\\tint minor = dbmetadata.getDatabaseMinorVersion();\\n\\t\\t\\tString sqlkeywords = dbmetadata.getSQLKeywords();\\n\\n\\t\\t\\tSystem.out.println("HPCC System Info:");\\n\\t\\t\\tSystem.out.println("\\\\tProduct Name: " + hpccname);\\n\\t\\t\\tSystem.out.println("\\\\tProduct Version: " + hpccprodver);\\n\\t\\t\\tSystem.out.println("\\\\tProduct Major: " + major);\\n\\t\\t\\tSystem.out.println("\\\\tProduct Minor: " + minor);\\n\\t\\t\\tSystem.out.println("\\\\tDriver Name: " + dbmetadata.getDriverName());\\n\\t\\t\\tSystem.out.println("\\\\tDriver Major: "\\n\\t\\t\\t\\t\\t+ dbmetadata.getDriverMajorVersion());\\n\\t\\t\\tSystem.out.println("\\\\tDriver Minor: "\\n\\t\\t\\t\\t\\t+ dbmetadata.getDriverMinorVersion());\\n\\t\\t\\tSystem.out.println("\\\\tSQL Key Words: " + sqlkeywords);\\n\\t\\t} catch (Exception e) {\\n\\t\\t\\tSystem.out.println(e.getMessage());\\n\\t\\t\\tsuccess = false;\\n\\t\\t}\\n\\t\\treturn success;\\n\\t}\\n\\n\\tpublic static synchronized boolean printOutResultSet(\\n\\t\\t\\tHPCCResultSet resultset, long threadid) {\\n\\t\\tSystem.out.println("Servicing thread id: " + threadid);\\n\\n\\t\\tint padvalue = 20;\\n\\t\\tboolean isSuccess = true;\\n\\t\\ttry {\\n\\t\\t\\tResultSetMetaData meta = resultset.getMetaData();\\n\\n\\t\\t\\tSystem.out.println("*****Printing column names*****");\\n\\n\\t\\t\\tint colcount = meta.getColumnCount();\\n\\t\\t\\tcolcount = 2;\\n\\t\\t\\tfor (int i = 1; i <= colcount; i++) {\\n\\t\\t\\t\\tString colname = meta.getColumnName(i);\\n\\t\\t\\t\\tSystem.out.print("[");\\n\\n\\t\\t\\t\\tfor (int y = 0; y < (colname.length() >= padvalue ? 0\\n\\t\\t\\t\\t\\t\\t: (padvalue - colname.length()) / 2); y++)\\n\\t\\t\\t\\t\\tSystem.out.print(" ");\\n\\t\\t\\t\\tSystem.out.print(colname);\\n\\n\\t\\t\\t\\tfor (int y = 0; y < (colname.length() >= padvalue ? 0\\n\\t\\t\\t\\t\\t\\t: (padvalue - colname.length()) / 2); y++)\\n\\t\\t\\t\\t\\tSystem.out.print(" ");\\n\\n\\t\\t\\t\\tSystem.out.print("]");\\n\\t\\t\\t}\\n\\t\\t\\tSystem.out.println("*****Printing column labels*****");\\n\\n\\t\\t\\tfor (int i = 1; i <= colcount; i++) {\\n\\t\\t\\t\\tString collabel = meta.getColumnLabel(i);\\n\\t\\t\\t\\tSystem.out.print("[");\\n\\n\\t\\t\\t\\tfor (int y = 0; y < (collabel.length() >= padvalue ? 0\\n\\t\\t\\t\\t\\t\\t: (padvalue - collabel.length()) / 2); y++)\\n\\t\\t\\t\\t\\tSystem.out.print("^");\\n\\t\\t\\t\\tSystem.out.print(collabel);\\n\\n\\t\\t\\t\\tfor (int y = 0; y < (collabel.length() >= padvalue ? 0\\n\\t\\t\\t\\t\\t\\t: (padvalue - collabel.length()) / 2); y++)\\n\\t\\t\\t\\t\\tSystem.out.print("^");\\n\\n\\t\\t\\t\\tSystem.out.print("]");\\n\\t\\t\\t}\\n\\t\\t\\tSystem.out.println("*****Printing column types*****");\\n\\n\\t\\t\\tfor (int i = 1; i <= colcount; i++) {\\n\\t\\t\\t\\tString coltype = HPCCJDBCUtils\\n\\t\\t\\t\\t\\t\\t.convertSQLtype2JavaClassName(meta.getColumnType(i));\\n\\t\\t\\t\\tSystem.out.print("[");\\n\\n\\t\\t\\t\\tfor (int y = 0; y < (coltype.length() >= padvalue ? 0\\n\\t\\t\\t\\t\\t\\t: (padvalue - coltype.length()) / 2); y++)\\n\\t\\t\\t\\t\\tSystem.out.print(" ");\\n\\t\\t\\t\\tSystem.out.print(coltype);\\n\\n\\t\\t\\t\\tfor (int y = 0; y < (coltype.length() >= padvalue ? 0\\n\\t\\t\\t\\t\\t\\t: (padvalue - coltype.length()) / 2); y++)\\n\\t\\t\\t\\t\\tSystem.out.print(" ");\\n\\n\\t\\t\\t\\tSystem.out.print("]");\\n\\t\\t\\t}\\n\\n\\t\\t\\twhile (resultset.next()) {\\n\\t\\t\\t\\tSystem.out.println();\\n\\t\\t\\t\\tfor (int i = 1; i <= colcount; i++) {\\n\\t\\t\\t\\t\\tString result = (String) resultset.getObject(i);\\n\\t\\t\\t\\t\\tSystem.out.print("[");\\n\\n\\t\\t\\t\\t\\tfor (int y = 0; y < (result.length() >= padvalue ? 0\\n\\t\\t\\t\\t\\t\\t\\t: padvalue - result.length()); y++)\\n\\t\\t\\t\\t\\t\\tSystem.out.print(" ");\\n\\t\\t\\t\\t\\tSystem.out.print(result);\\n\\t\\t\\t\\t\\tSystem.out.print("]");\\n\\t\\t\\t\\t}\\n\\t\\t\\t}\\n\\n\\t\\t\\tSystem.out.println("\\\\nTotal Records found: "\\n\\t\\t\\t\\t\\t+ resultset.getRowCount());\\n\\t\\t} catch (Exception e) {\\n\\t\\t\\tSystem.out.println(e.getMessage());\\n\\t\\t\\tisSuccess = false;\\n\\t\\t}\\n\\t\\treturn isSuccess;\\n\\t}\\n\\n\\tpublic static void main(String[] args) {\\n\\n\\t\\torg.hpccsystems.jdbcdriver.HPCCDriverTest dt = new org.hpccsystems.jdbcdriver.HPCCDriverTest();\\n\\t\\ttry {\\n\\t\\t\\tSystem.out\\n\\t\\t\\t\\t\\t.println("********************************************************************");\\n\\t\\t\\tSystem.out.println("HPCC JDBC Test Package Usage:");\\n\\t\\t\\tSystem.out.println(" Connection Parameters: paramname==paramvalue");\\n\\t\\t\\tSystem.out.println(" eg. ServerAddress==192.168.124.128");\\n\\t\\t\\tSystem.out\\n\\t\\t\\t\\t\\t.println(" Prepared Statement param value: \\\\"param\\\\"==paramvalue");\\n\\t\\t\\tSystem.out.println(" eg. param==\\\\'33445\\\\'");\\n\\t\\t\\tSystem.out.println();\\n\\t\\t\\tSystem.out.println(" By default full test is executed.");\\n\\t\\t\\tSystem.out.println(" To execute free hand sql:");\\n\\t\\t\\tSystem.out.println(" freehandsql==<SQL STATEMENT>");\\n\\t\\t\\tSystem.out\\n\\t\\t\\t\\t\\t.println(" eg. freehandsql==\\\\"select * from tablename where zip=? limit 100\\\\"");\\n\\t\\t\\tSystem.out.println();\\n\\t\\t\\tSystem.out\\n\\t\\t\\t\\t\\t.println("********************************************************************\\\\n");\\n\\n\\t\\t\\tProperties info = new Properties();\\n\\t\\t\\tList<String> params = new ArrayList<String>();\\n\\n\\t\\t\\tDriver ldriver = DriverManager.getDriver("jdbc:hpcc");\\n\\n\\t\\t\\tSystem.out.println("*****ldriver : " + ldriver);\\n\\n\\t\\t\\tif (!(ldriver instanceof HPCCDriver))\\n\\t\\t\\t\\tthrow new RuntimeException(\\n\\t\\t\\t\\t\\t\\t"Driver fetched with 'jdbc:hpcc' url is not of HPCCDriver type");\\n\\t\\t\\tif (!driver.acceptsURL("jdbc:hpcc"))\\n\\t\\t\\t\\tthrow new RuntimeException(\\n\\t\\t\\t\\t\\t\\t"Valid lower case JDBC URL test failed");\\n\\t\\t\\tif (!driver.acceptsURL("JDBC:hpcc"))\\n\\t\\t\\t\\tthrow new RuntimeException(\\n\\t\\t\\t\\t\\t\\t"Valid mixed case JDBC URL test1 failed");\\n\\t\\t\\tif (!driver.acceptsURL("JDBC:HPCC"))\\n\\t\\t\\t\\tthrow new RuntimeException(\\n\\t\\t\\t\\t\\t\\t"Valid upper case JDBC URL test failed");\\n\\t\\t\\tif (!driver.acceptsURL("jdbc:HPCC"))\\n\\t\\t\\t\\tthrow new RuntimeException(\\n\\t\\t\\t\\t\\t\\t"Valid mixed case JDBC URL test2 failed");\\n\\t\\t\\tif (!driver.acceptsURL("jDbC:hPcC"))\\n\\t\\t\\t\\tthrow new RuntimeException(\\n\\t\\t\\t\\t\\t\\t"Valid camel case JDBC URL test2 failed");\\n\\n\\t\\t\\tif (!driver.acceptsURL("jdbc:hpcc;"))\\n\\t\\t\\t\\tthrow new RuntimeException(\\n\\t\\t\\t\\t\\t\\t"Valid seperator JDBC URL test failed");\\n\\t\\t\\tif (!driver.acceptsURL("jdbc:hpcc;prop1=val1;prop2=val2"))\\n\\t\\t\\t\\tthrow new RuntimeException(\\n\\t\\t\\t\\t\\t\\t"Valid properties JDBC URL semicolon test failed");\\n\\t\\t\\tif (!driver.acceptsURL("jdbc:hpcc:prop1=val1:prop2=val2"))\\n\\t\\t\\t\\tthrow new RuntimeException(\\n\\t\\t\\t\\t\\t\\t"Valid properties JDBC URL colon test failed");\\n\\t\\t\\tif (!driver.acceptsURL("jdbc:hpcc:"))\\n\\t\\t\\t\\tthrow new RuntimeException(\\n\\t\\t\\t\\t\\t\\t"Valid seperator JDBC URL test passed");\\n\\t\\t\\tif (driver.acceptsURL("jdbc : hpcc"))\\n\\t\\t\\t\\tthrow new RuntimeException(\\n\\t\\t\\t\\t\\t\\t"Invalid spaces JDBC URL test passed");\\n\\t\\t\\tif (!driver.acceptsURL("jdbc:hpcc:prop1=val1;prop2=val2"))\\n\\t\\t\\t\\tthrow new RuntimeException("Valid JDBC URL test failed");\\n\\t\\t\\tif (driver.acceptsURL(" jdbc:hpcc"))\\n\\t\\t\\t\\tthrow new RuntimeException(\\n\\t\\t\\t\\t\\t\\t"Invalid spaces JDBC URL test2 passed");\\n\\t\\t\\tif (driver.acceptsURL("Garbage"))\\n\\t\\t\\t\\tthrow new RuntimeException(\\n\\t\\t\\t\\t\\t\\t"Invalid garbage JDBC URL test passed");\\n\\t\\t\\tif (driver.acceptsURL("url:jdbc:hpcc"))\\n\\t\\t\\t\\tthrow new RuntimeException(\\n\\t\\t\\t\\t\\t\\t"Invalid prefix JDBC URL test passed");\\n\\t\\t\\tif (driver.acceptsURL(""))\\n\\t\\t\\t\\tthrow new RuntimeException("Invalid empty JDBC URL test passed");\\n\\t\\t\\tif (driver.acceptsURL(" "))\\n\\t\\t\\t\\tthrow new RuntimeException(\\n\\t\\t\\t\\t\\t\\t"Invalid singlespace JDBC URL test passed");\\n\\t\\t\\tif (driver.acceptsURL(null))\\n\\t\\t\\t\\tthrow new RuntimeException("Invalid null JDBC URL test passed");\\n\\n\\t\\t\\tif (args.length <= 0) {\\n\\t\\t\\t\\tinfo.put("ServerAddress", "10.101.2.170"); // your HPCC address\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t// here\\n\\t\\t\\t\\tinfo.put("LazyLoad", "true");\\n\\t\\t\\t\\tinfo.put("TraceToFile", "false");\\n\\t\\t\\t\\tinfo.put("TraceLevel", "boguslevel");\\n\\t\\t\\t\\tinfo.put("ReadTimeoutMilli", "30000"); // we have a couple of\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t// long running queries\\n\\t\\t\\t\\tinfo.put("TargetCluster", "roxie"); // queries will run on\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t// this\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t// HPCC target cluster\\n\\t\\t\\t\\tinfo.put("QuerySet", "roxie"); // published HPCC queries will run\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t// from this queryset\\n\\t\\t\\t\\tinfo.put("WsECLWatchPort", "8010"); // Target HPCC configured to\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t// run WsECLWatch on this\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t// port\\n\\t\\t\\t\\t// info.put("WsECLDirectPort", "8008"); // Target HPCC\\n\\t\\t\\t\\t// configured\\n\\t\\t\\t\\t// to run WsECLDirect on\\n\\t\\t\\t\\t// this port\\n\\t\\t\\t\\tinfo.put("EclResultLimit", "ALL"); // I want all records\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t// returned\\n\\t\\t\\t\\tinfo.put("PageSize", "20"); // GetTables and GetProcs will only\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t// return 20 entries\\n\\t\\t\\t\\tdt.queryHPCC("1", info);\\n\\n\\t\\t\\t}\\n\\t\\t} catch (Exception e) {\\n\\t\\t\\te.printStackTrace();\\n\\t\\t\\tSystem.out.println(e.getMessage());\\n\\t\\t}\\n\\n\\t\\tSystem.out\\n\\t\\t\\t\\t.println("\\\\nHPCC Driver completed as expected - Verify results.");\\n\\t}\\n\\n\\tpublic Properties useDefaultCluster() {\\n\\t\\tProperties info = new Properties();\\n\\t\\ttry {\\n\\t\\t\\tSystem.out\\n\\t\\t\\t\\t\\t.println("********************************************************************");\\n\\t\\t\\tSystem.out.println("HPCC JDBC Test Package Usage:");\\n\\t\\t\\tSystem.out.println(" Connection Parameters: paramname==paramvalue");\\n\\t\\t\\tSystem.out.println(" eg. ServerAddress==192.168.124.128");\\n\\t\\t\\tSystem.out\\n\\t\\t\\t\\t\\t.println(" Prepared Statement param value: \\\\"param\\\\"==paramvalue");\\n\\t\\t\\tSystem.out.println(" eg. param==\\\\'33445\\\\'");\\n\\t\\t\\tSystem.out.println();\\n\\t\\t\\tSystem.out.println(" By default full test is executed.");\\n\\t\\t\\tSystem.out.println(" To execute free hand sql:");\\n\\t\\t\\tSystem.out.println(" freehandsql==<SQL STATEMENT>");\\n\\t\\t\\tSystem.out\\n\\t\\t\\t\\t\\t.println(" eg. freehandsql==\\\\"select * from tablename where zip=? limit 100\\\\"");\\n\\t\\t\\tSystem.out.println();\\n\\t\\t\\tSystem.out\\n\\t\\t\\t\\t\\t.println("********************************************************************\\\\n");\\n\\n\\t\\t\\tList<String> params = new ArrayList<String>();\\n\\n\\t\\t\\tDriver ldriver = DriverManager.getDriver("jdbc:hpcc");\\n\\n\\t\\t\\tSystem.out.println("*****ldriver : " + ldriver);\\n\\n\\t\\t\\tif (!(ldriver instanceof HPCCDriver))\\n\\t\\t\\t\\tthrow new RuntimeException(\\n\\t\\t\\t\\t\\t\\t"Driver fetched with 'jdbc:hpcc' url is not of HPCCDriver type");\\n\\t\\t\\tif (!driver.acceptsURL("jdbc:hpcc"))\\n\\t\\t\\t\\tthrow new RuntimeException(\\n\\t\\t\\t\\t\\t\\t"Valid lower case JDBC URL test failed");\\n\\t\\t\\tif (!driver.acceptsURL("JDBC:hpcc"))\\n\\t\\t\\t\\tthrow new RuntimeException(\\n\\t\\t\\t\\t\\t\\t"Valid mixed case JDBC URL test1 failed");\\n\\t\\t\\tif (!driver.acceptsURL("JDBC:HPCC"))\\n\\t\\t\\t\\tthrow new RuntimeException(\\n\\t\\t\\t\\t\\t\\t"Valid upper case JDBC URL test failed");\\n\\t\\t\\tif (!driver.acceptsURL("jdbc:HPCC"))\\n\\t\\t\\t\\tthrow new RuntimeException(\\n\\t\\t\\t\\t\\t\\t"Valid mixed case JDBC URL test2 failed");\\n\\t\\t\\tif (!driver.acceptsURL("jDbC:hPcC"))\\n\\t\\t\\t\\tthrow new RuntimeException(\\n\\t\\t\\t\\t\\t\\t"Valid camel case JDBC URL test2 failed");\\n\\n\\t\\t\\tif (!driver.acceptsURL("jdbc:hpcc;"))\\n\\t\\t\\t\\tthrow new RuntimeException(\\n\\t\\t\\t\\t\\t\\t"Valid seperator JDBC URL test failed");\\n\\t\\t\\tif (!driver.acceptsURL("jdbc:hpcc;prop1=val1;prop2=val2"))\\n\\t\\t\\t\\tthrow new RuntimeException(\\n\\t\\t\\t\\t\\t\\t"Valid properties JDBC URL semicolon test failed");\\n\\t\\t\\tif (!driver.acceptsURL("jdbc:hpcc:prop1=val1:prop2=val2"))\\n\\t\\t\\t\\tthrow new RuntimeException(\\n\\t\\t\\t\\t\\t\\t"Valid properties JDBC URL colon test failed");\\n\\t\\t\\tif (!driver.acceptsURL("jdbc:hpcc:"))\\n\\t\\t\\t\\tthrow new RuntimeException(\\n\\t\\t\\t\\t\\t\\t"Valid seperator JDBC URL test passed");\\n\\t\\t\\tif (driver.acceptsURL("jdbc : hpcc"))\\n\\t\\t\\t\\tthrow new RuntimeException(\\n\\t\\t\\t\\t\\t\\t"Invalid spaces JDBC URL test passed");\\n\\t\\t\\tif (!driver.acceptsURL("jdbc:hpcc:prop1=val1;prop2=val2"))\\n\\t\\t\\t\\tthrow new RuntimeException("Valid JDBC URL test failed");\\n\\t\\t\\tif (driver.acceptsURL(" jdbc:hpcc"))\\n\\t\\t\\t\\tthrow new RuntimeException(\\n\\t\\t\\t\\t\\t\\t"Invalid spaces JDBC URL test2 passed");\\n\\t\\t\\tif (driver.acceptsURL("Garbage"))\\n\\t\\t\\t\\tthrow new RuntimeException(\\n\\t\\t\\t\\t\\t\\t"Invalid garbage JDBC URL test passed");\\n\\t\\t\\tif (driver.acceptsURL("url:jdbc:hpcc"))\\n\\t\\t\\t\\tthrow new RuntimeException(\\n\\t\\t\\t\\t\\t\\t"Invalid prefix JDBC URL test passed");\\n\\t\\t\\tif (driver.acceptsURL(""))\\n\\t\\t\\t\\tthrow new RuntimeException("Invalid empty JDBC URL test passed");\\n\\t\\t\\tif (driver.acceptsURL(" "))\\n\\t\\t\\t\\tthrow new RuntimeException(\\n\\t\\t\\t\\t\\t\\t"Invalid singlespace JDBC URL test passed");\\n\\t\\t\\tif (driver.acceptsURL(null))\\n\\t\\t\\t\\tthrow new RuntimeException("Invalid null JDBC URL test passed");\\n\\n\\t\\t\\tinfo.put("ServerAddress", "10.101.2.170"); // your HPCC address\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t// here\\n\\t\\t\\tinfo.put("LazyLoad", "true");\\n\\t\\t\\tinfo.put("TraceToFile", "false");\\n\\t\\t\\tinfo.put("TraceLevel", "boguslevel");\\n\\t\\t\\tinfo.put("ReadTimeoutMilli", "30000"); // we have a couple of\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t// long running queries\\n\\t\\t\\tinfo.put("ConnectTimeoutMilli", "30000");\\n\\t\\t\\tinfo.put("TargetCluster", "roxie"); // queries will run on\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t// this\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t// HPCC target cluster\\n\\t\\t\\tinfo.put("QuerySet", "roxie"); // published HPCC queries will run\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t// from this queryset\\n\\t\\t\\tinfo.put("WsECLWatchPort", "8010"); // Target HPCC configured to\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t// run WsECLWatch on this\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t// port\\n\\t\\t\\t// info.put("WsECLDirectPort", "8008"); // Target HPCC\\n\\t\\t\\t// configured\\n\\t\\t\\t// to run WsECLDirect on\\n\\t\\t\\t// this port\\n\\t\\t\\tinfo.put("EclResultLimit", "ALL"); // I want all records\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t// returned\\n\\t\\t\\tinfo.put("PageSize", "20"); // GetTables and GetProcs will only\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t// return 20 entries\\n\\t\\t\\tSystem.out\\n\\t\\t\\t\\t\\t.println("\\\\nHPCC Driver completed as expected - Verify results.");\\n\\t\\t} catch (Exception re) {\\n\\t\\t\\tre.printStackTrace();\\n\\t\\t}\\n\\n\\t\\treturn info;\\n\\t}\\n\\n\\tpublic void queryHPCC(String i, Properties info) {\\n\\t\\tSystem.out.println("For loop running for : " + i);\\n\\t\\tHPCCConnection connectionprops = connectViaProps(info);\\n\\t\\tif (connectionprops == null)\\n\\t\\t\\tthrow new RuntimeException(\\n\\t\\t\\t\\t\\t"Could not connect with properties object");\\n\\n\\t\\tHPCCStatement stmt = null;\\n\\t\\ttry {\\n\\t\\t\\tstmt = (HPCCStatement) connectionprops.createStatement();\\n\\t\\t} catch (SQLException e) {\\n\\t\\t\\t// TODO Auto-generated catch block\\n\\t\\t\\te.printStackTrace();\\n\\t\\t}\\n\\t\\t// java.util.Date date= new java.util.Date();\\n\\t\\t// System.out.println("Before Timestamp is: "+new\\n\\t\\t// Timestamp(date.getTime()));\\n\\t\\tSystem.out.println("Before Timestamp for " + i + " is: "\\n\\t\\t\\t\\t+ new Date(System.currentTimeMillis()));\\n\\n\\t\\tString stmtsql = "select tbl.* from .::subfile::cwcd_sub1 tbl ";// where\\n\\t\\t// User_ID='293705'";\\n\\n\\t\\ttry {\\n\\t\\t\\tif (stmt.execute(stmtsql)) {\\n\\t\\t\\t\\tHPCCResultSet res1 = (HPCCResultSet) stmt.executeQuery(stmtsql);\\n\\t\\t\\t\\tSystem.out.println("**********result : " + res1.getRowCount());\\n\\t\\t\\t\\tif (res1.getRowCount() > 0) {\\n\\n\\t\\t\\t\\t\\t// System.out.println(res1.getString(1));\\n\\n\\t\\t\\t\\t}\\n\\n\\t\\t\\t} else\\n\\t\\t\\t\\tthrow new RuntimeException("HPCCStatement test failed.");\\n\\t\\t} catch (SQLException e) {\\n\\t\\t\\t// TODO Auto-generated catch block\\n\\t\\t\\te.printStackTrace();\\n\\t\\t}\\n\\n\\t\\tfinally {\\n\\t\\t\\ttry {\\n\\t\\t\\t\\tstmt.close();\\n\\t\\t\\t\\tconnectionprops.close();\\n\\t\\t\\t} catch (SQLException e) {\\n\\t\\t\\t\\t// TODO Auto-generated catch block\\n\\t\\t\\t\\te.printStackTrace();\\n\\t\\t\\t}\\n\\t\\t}\\n\\t\\tSystem.out.println("After Timestamp is: "\\n\\t\\t\\t\\t+ new Date(System.currentTimeMillis()));\\n\\t}\\n\\n}
\\n\\nIn the above code, when I use :\\n\\ninfo.put("TargetCluster", "thor"); \\ninfo.put("QuerySet", "thor");
\\n\\nthe output is proper.\\n\\nWhen I use :\\n\\n\\ninfo.put("TargetCluster", "roxie"); \\ninfo.put("QuerySet", "roxie");
\\n\\nI get an exception: \\n\\n2013/02/11 07:05:14.672 - HPCC JDBC Driver registered.\\n********************************************************************\\nHPCC JDBC Test Package Usage:\\n Connection Parameters: paramname==paramvalue\\n eg. ServerAddress==192.168.124.128\\n Prepared Statement param value: "param"==paramvalue\\n eg. param=='33445'\\n\\n By default full test is executed.\\n To execute free hand sql:\\n freehandsql==<SQL STATEMENT>\\n eg. freehandsql=="select * from tablename where zip=? limit 100"\\n\\n********************************************************************\\n\\n*****ldriver : org.hpccsystems.jdbcdriver.HPCCDriver@901887\\nFor loop running for : 1\\n2013/02/11 07:05:14.688 - HPCCConnection jdbc url: \\n2013/02/11 07:05:14.688 - Couldn't determine log level, will log at default level: INFO\\n2013/02/11 07:05:14.688 - HPCCDriver::connect10.101.2.170\\n2013/02/11 07:05:14.688 - HPCCDatabaseMetaData ServerAddress: 10.101.2.170 TargetCluster: roxie eclwatch: http://10.101.2.170:8010\\n2013/02/11 07:05:14.688 - HPCCDatabaseMetaData Fetching HPCC INFO: http://10.101.2.170:8010/WsSMC/Activity?rawxml_\\n2013/02/11 07:05:14.985 - HPCC file and published query info not pre-fetched (LazyLoad enabled)\\n2013/02/11 07:05:14.985 - HPCCDatabaseMetaData initialized\\n2013/02/11 07:05:14.985 - HPCCConnection initialized - server: 10.101.2.170\\n2013/02/11 07:05:14.985 - HPCCConnection: createStatement( )\\n2013/02/11 07:05:15.000 - HPCCStatementConstructor(conn)\\nBefore Timestamp for 1 is: Mon Feb 11 19:05:15 IST 2013\\n2013/02/11 07:05:15.000 - HPCCStatementAttempting to process sql query: select tbl.* from .::subfile::cwcd_sub1 tbl \\n2013/02/11 07:05:15.000 - INCOMING SQL: select tbl.* from .::subfile::cwcd_sub1 tbl \\n2013/02/11 07:05:15.000 - Fetching file information: http://10.101.2.170:8010/WsDfu/DFUQuery?LogicalName=.::SUBFILE::CWCD_SUB1&rawxml_&filetype=Logical Files and Superfiles\\n2013/02/11 07:05:15.188 - NOT USING INDEX!\\n2013/02/11 07:05:15.188 - HPCC URL created: http://10.101.2.170:8010/EclDirect/RunEcl?Submit&cluster=roxie\\n2013/02/11 07:05:15.203 - HPCCStatement: execute()\\n2013/02/11 07:05:15.203 - \\tAttempting to process sql query: select tbl.* from .::subfile::cwcd_sub1 tbl \\n2013/02/11 07:05:15.203 - HPCCStatement: executeQuery()\\n2013/02/11 07:05:15.203 - \\tAttempting to process sql query: select tbl.* from .::subfile::cwcd_sub1 tbl \\n2013/02/11 07:05:15.203 - Executing ECL: &eclText=\\nimport std;\\nTblDS0RecDef := RECORD varstring controller_id; varstring user_id; varstring url_link; varstring url_content; real8 url_sentiment; varstring url_date; integer8 unique_search_id; integer8 insertid; varstring statusid; varstring search_pattern; varstring word_ignored; varstring search_date; varstring detected_language; END; \\nTblDS0 := DATASET('~.::subfile::cwcd_sub1', TblDS0RecDef,FLAT);\\nSelectStruct := RECORD\\n varstring url_content := TblDS0.url_content;\\n varstring search_pattern := TblDS0.search_pattern;\\n real8 url_sentiment := TblDS0.url_sentiment;\\n integer8 unique_search_id := TblDS0.unique_search_id;\\n varstring statusid := TblDS0.statusid;\\n varstring url_link := TblDS0.url_link;\\n varstring user_id := TblDS0.user_id;\\n varstring url_date := TblDS0.url_date;\\n varstring controller_id := TblDS0.controller_id;\\n varstring word_ignored := TblDS0.word_ignored;\\n varstring detected_language := TblDS0.detected_language;\\n varstring search_date := TblDS0.search_date;\\n integer8 insertid := TblDS0.insertid;\\nEND;\\nDSTable := TABLE( TblDS0, SelectStruct );\\nOUTPUT(CHOOSEN(DSTable,ALL),NAMED('JDBCSelectQueryResult'));\\n\\n2013/02/11 07:05:17.547 - Total elapsed http request/response time in milliseconds: 2344\\n2013/02/11 07:05:17.547 - Parsing results...\\njava.sql.SQLException: java.lang.Exception: HPCCJDBC: Error in response: 'Query W20120812-214700 is suspended because IPropertyTree: unsupported xpath syntax used\\nXPath Exception: \\nin xpath = .::subfile::cwcd_sub1\\n ^'\\n\\tat org.hpccsystems.jdbcdriver.ECLEngine.executeSelect(ECLEngine.java:930)\\n\\tat org.hpccsystems.jdbcdriver.ECLEngine.execute(ECLEngine.java:787)\\n\\tat org.hpccsystems.jdbcdriver.HPCCStatement.executeHPCCQuery(HPCCStatement.java:113)\\n\\tat org.hpccsystems.jdbcdriver.HPCCStatement.execute(HPCCStatement.java:255)\\n\\tat org.hpccsystems.jdbcdriver.HPCCStatement.execute(HPCCStatement.java:245)\\n\\tat org.hpccsystems.jdbcdriver.HPCCDriverTest.queryHPCC(HPCCDriverTest.java:849)\\n\\tat org.hpccsystems.jdbcdriver.HPCCDriverTest.main(HPCCDriverTest.java:700)\\nAfter Timestamp is: Mon Feb 11 19:05:17 IST 2013\\n\\nHPCC Driver completed as expected - Verify results.\\n
\\n\\nEven when I invoke the 'executeFreeHandSQL(...)' method, I'm getting the same error.\\n\\nWhat is the issue exactly?\", \"post_time\": \"2013-02-11 13:38:35\" },\n\t{ \"post_id\": 3444, \"topic_id\": 757, \"forum_id\": 34, \"post_subject\": \"Re: Do HPCC JDBC driver supports Mondrian?\", \"username\": \"mnaweed\", \"post_text\": \"The Connection string that we use is defined as follows:\\n\\ndriver=org.hpccsystems.jdbcdriver.HPCCDriver\\nlocation=jdbc:mondrian:Jdbc=jdbc:hpcc:ServerAddress=xxx.xxx.xxx:Cluster=default:WsECLDirectPort=8008:EclResultLimit=100:ReadTimeoutMilli=30000:QuerySet=thor:LazyLoad=false:PageSize=100:TraceLevel=ALL;Catalog=res:HPCC/Schema.xml;\\nusername=\\npassword=\", \"post_time\": \"2013-02-13 14:54:11\" },\n\t{ \"post_id\": 3443, \"topic_id\": 757, \"forum_id\": 34, \"post_subject\": \"Re: Do HPCC JDBC driver supports Mondrian?\", \"username\": \"mnaweed\", \"post_text\": \"1.Download the HPCC JDBC Driver from:\\nhttp://hpccsystems.com/products-and-ser ... DBC-Driver\\n\\n2.Read the documenation, it has the details on defining the connection string.\", \"post_time\": \"2013-02-13 14:33:12\" },\n\t{ \"post_id\": 3440, \"topic_id\": 757, \"forum_id\": 34, \"post_subject\": \"Re: Do HPCC JDBC driver supports Mondrian?\", \"username\": \"Apurv.Khare\", \"post_text\": \"I'm new to Mondrian, i have just mondrian zip file.\\nI tried the In built MS-Access database it worked with jPivot tables it shows me data and some graphs.\\n\\nI want to connect Mondrian with HPCC.I dont know about Jpivot or Pentaho.\", \"post_time\": \"2013-02-13 12:11:49\" },\n\t{ \"post_id\": 3423, \"topic_id\": 757, \"forum_id\": 34, \"post_subject\": \"Re: Do HPCC JDBC driver supports Mondrian?\", \"username\": \"mnaweed\", \"post_text\": \"What UI are you using Mondrian with? JPivot/Saiku/Pentaho etc..\", \"post_time\": \"2013-02-12 14:49:52\" },\n\t{ \"post_id\": 3421, \"topic_id\": 757, \"forum_id\": 34, \"post_subject\": \"Do HPCC JDBC driver supports Mondrian?\", \"username\": \"Apurv.Khare\", \"post_text\": \"Hi,\\n\\nI'm trying to use Mondrian and connect it with our HPCC cluster but not able to find a proper place to give the connection string.\\n\\nCurrently i'm using Mondrian-3.3.0.14703. \\nHave anyone tried with Mondrian??\\n\\nThanks,\\nApurv\", \"post_time\": \"2013-02-12 11:39:40\" },\n\t{ \"post_id\": 3458, \"topic_id\": 768, \"forum_id\": 34, \"post_subject\": \"Weird behaviour of HPCC-JDBC Driver\", \"username\": \"prachi\", \"post_text\": \"Hi,\\nOur general usage scenario is :\\n Via a web application multiple users(at a time) are adding sub-files to super-files and also reading the super-files via the HPCC-JDBC driver on THOR.\\n\\nCode snippet :\\n\\ninfo.put("ServerAddress", "10.101.2.170"); // your HPCC address\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t// here\\n\\t\\t\\t\\tinfo.put("LazyLoad", "true");\\n\\t\\t\\t\\tinfo.put("TraceToFile", "false");\\n\\t\\t\\t\\tinfo.put("TraceLevel", "boguslevel");\\n\\t\\t\\t\\tinfo.put("ReadTimeoutMilli", "30000"); // we have a couple of\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t// long running queries\\n\\t\\t\\t\\tinfo.put("TargetCluster", "thor"); // queries will run on\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t// this\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t// HPCC target cluster\\n\\t\\t\\t\\tinfo.put("QuerySet", "thor"); // published HPCC queries will run\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t// from this queryset\\n\\t\\t\\t\\tinfo.put("WsECLWatchPort", "8010"); // Target HPCC configured to\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t// run WsECLWatch on this\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t// port\\n\\t\\t\\t\\t// info.put("WsECLDirectPort", "8008"); // Target HPCC\\n\\t\\t\\t\\t// configured\\n\\t\\t\\t\\t// to run WsECLDirect on\\n\\t\\t\\t\\t// this port\\n\\t\\t\\t\\tinfo.put("EclResultLimit", "ALL"); // I want all records\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t// returned\\n\\t\\t\\t\\tinfo.put("PageSize", "20"); // GetTables and GetProcs will only\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t// return 20 entries\\n
\\n\\n\\nBehaviour of HPCC-JDBC java code is different everytime.We are facing different issues such as:\\n\\nIssue 1:\\n\\nlocal user login\\nYou are authorized to proceed.293705\\ndomain is Retail Banking\\n2013/02/14 02:46:40.935 - HPCC JDBC Driver registered.\\n2013/02/14 02:46:40.935 - HPCC JDBC Driver registered.\\n-----------------Driver Properties----------------------------------\\n ServerAddress: Target HPCC ESP Address (used to contact WsECLWatch, WsECLDirect, or WsECL if override not specified).\\n WsECLWatchAddress: WsECLWatch address (required if different than ServerAddress).\\n WsECLWatchPort: WsECLWatch port (required if HPCC configuration does not use default port).\\n WsECLAddress: WsECLAddress Address (required if different than ServerAddress).\\n WsECLPort: WsECL port (required if HPCC configuration does not use default port).\\n WsECLDirectAddress: WsECLDirect Address (required if different than ServerAddress).\\n WsECLDirectPort: WsECLDirect port (required if HPCC configuration does not use default port).\\n username: HPCC username (*Use JDBC client secure interface if available*).\\n password: HPCC password (*Use JDBC client secure interface if available*).\\n PageOffset: Starting HPCC data file or HPCC published queries displayed.\\n PageSize: Number of HPCC data files (DB tables) or HPCC published queries (DB Stored Procs) displayed.\\n QuerySet: Queryset from which published query (Stored Procedure) is chosen.\\n TargetCluster: Target cluster on which to execute ECL code.\\n TraceToFile: false -> System.out, true -> D:\\\\Program Files\\\\eclipse\\\\HPCCJDBC.log\\n TraceLevel: Logging level (java.util.logging.level).\\n EclResultLimit: Default limit on number of result records returned.\\n LazyLoad: If disabled, all HPCC metadata loaded and cached at connect time; otherwise HPCC file, and published query info is loaded on-demand\\n ReadTimeoutMilli: HPCC requests connection read time out value in milliseconds.\\n ConnectTimeoutMilli: HPCC requests connection time out value in milliseconds.\\n\\n--------------------------------------------------------------------\\n*****ldriver : org.hpccsystems.jdbcdriver.HPCCDriver@68400e\\n2013/02/14 02:46:40.966 - HPCCConnection jdbc url: \\n2013/02/14 02:46:40.966 - HPCCConnection jdbc url: \\n2013/02/14 02:46:40.966 - Couldn't determine log level, will log at default level: INFO\\n2013/02/14 02:46:40.966 - Couldn't determine log level, will log at default level: INFO\\n2013/02/14 02:46:40.966 - HPCCDriver::connect10.101.2.170\\n2013/02/14 02:46:40.966 - HPCCDriver::connect10.101.2.170\\n2013/02/14 02:46:40.966 - HPCCDatabaseMetaData ServerAddress: 10.101.2.170 TargetCluster: thor eclwatch: http://10.101.2.170:8010\\n2013/02/14 02:46:40.966 - HPCCDatabaseMetaData ServerAddress: 10.101.2.170 TargetCluster: thor eclwatch: http://10.101.2.170:8010\\n2013/02/14 02:46:40.982 - HPCCDatabaseMetaData Fetching HPCC INFO: http://10.101.2.170:8010/WsSMC/Activity?rawxml_\\n2013/02/14 02:46:40.982 - HPCCDatabaseMetaData Fetching HPCC INFO: http://10.101.2.170:8010/WsSMC/Activity?rawxml_\\n2013/02/14 02:47:11.499 - Could not fetch HPCC info.\\n2013/02/14 02:47:11.499 - Could not fetch HPCC info.\\n2013/02/14 02:47:42.000 - Could not fetch cluster information.\\n2013/02/14 02:47:42.000 - Could not fetch cluster information.\\n2013/02/14 02:48:12.517 - Could not fetch cluster information.\\n2013/02/14 02:48:12.517 - Could not fetch cluster information.\\n2013/02/14 02:48:12.517 - HPCC file and published query info not pre-fetched (LazyLoad enabled)\\n2013/02/14 02:48:12.517 - HPCC file and published query info not pre-fetched (LazyLoad enabled)\\n2013/02/14 02:48:12.517 - Could not query HPCC metadata check server address, cluster name, wsecl, and wseclwatch configuration.\\n2013/02/14 02:48:12.517 - Could not query HPCC metadata check server address, cluster name, wsecl, and wseclwatch configuration.\\n2013/02/14 02:48:12.517 - HPCCDatabaseMetaData initialized\\n2013/02/14 02:48:12.517 - HPCCDatabaseMetaData initialized\\n2013/02/14 02:48:12.517 - HPCCConnection initialized - server: 10.101.2.170\\n2013/02/14 02:48:12.517 - HPCCConnection initialized - server: 10.101.2.170\\n2013/02/14 02:48:12.517 - HPCCConnection: createStatement( )\\n2013/02/14 02:48:12.517 - HPCCConnection: createStatement( )\\n2013/02/14 02:48:12.580 - HPCCStatementConstructor(conn)\\n2013/02/14 02:48:12.580 - HPCCStatementConstructor(conn)\\n2013/02/14 02:48:12.580 - HPCCStatementAttempting to process sql query: SELECT Search_Keyword,Search_Date,TwitterUniqueID FROM sapphire::superfile::buzzmonitoring where UserID='293705' group by TwitterUniqueID order by Search_Date desc\\n2013/02/14 02:48:12.580 - HPCCStatementAttempting to process sql query: SELECT Search_Keyword,Search_Date,TwitterUniqueID FROM sapphire::superfile::buzzmonitoring where UserID='293705' group by TwitterUniqueID order by Search_Date desc\\n2013/02/14 02:48:12.580 - INCOMING SQL: SELECT Search_Keyword,Search_Date,TwitterUniqueID FROM sapphire::superfile::buzzmonitoring where UserID='293705' group by TwitterUniqueID order by Search_Date desc\\n2013/02/14 02:48:12.580 - INCOMING SQL: SELECT Search_Keyword,Search_Date,TwitterUniqueID FROM sapphire::superfile::buzzmonitoring where UserID='293705' group by TwitterUniqueID order by Search_Date desc\\n2013/02/14 02:48:12.595 - Fetching file information: http://10.101.2.170:8010/WsDfu/DFUQuery?LogicalName=SAPPHIRE::SUPERFILE::BUZZMONITORING&rawxml_&filetype=Logical Files and Superfiles\\n2013/02/14 02:48:12.595 - Fetching file information: http://10.101.2.170:8010/WsDfu/DFUQuery?LogicalName=SAPPHIRE::SUPERFILE::BUZZMONITORING&rawxml_&filetype=Logical Files and Superfiles\\n2013/02/14 02:48:43.128 - Warning: Connection to HPCC timed out while fetching: SAPPHIRE::SUPERFILE::BUZZMONITORING\\n2013/02/14 02:48:43.128 - Warning: Connection to HPCC timed out while fetching: SAPPHIRE::SUPERFILE::BUZZMONITORING\\n2013/02/14 02:48:43.128 - Invalid or forbidden table found: SAPPHIRE::SUPERFILE::BUZZMONITORING\\n2013/02/14 02:48:43.128 - Invalid or forbidden table found: SAPPHIRE::SUPERFILE::BUZZMONITORING\\n2013/02/14 02:48:43.128 - HPCCStatement: execute()\\n2013/02/14 02:48:43.128 - HPCCStatement: execute()\\n2013/02/14 02:48:43.128 - Attempting to process sql query: SELECT Search_Keyword,Search_Date,TwitterUniqueID FROM sapphire::superfile::buzzmonitoring where UserID='293705' group by TwitterUniqueID order by Search_Date desc\\n2013/02/14 02:48:43.128 - Attempting to process sql query: SELECT Search_Keyword,Search_Date,TwitterUniqueID FROM sapphire::superfile::buzzmonitoring where UserID='293705' group by TwitterUniqueID order by Search_Date desc\\n2013/02/14 02:48:43.128 - HPCCStatement: executeQuery()\\n2013/02/14 02:48:43.128 - HPCCStatement: executeQuery()\\n2013/02/14 02:48:43.128 - Attempting to process sql query: SELECT Search_Keyword,Search_Date,TwitterUniqueID FROM sapphire::superfile::buzzmonitoring where UserID='293705' group by TwitterUniqueID order by Search_Date desc\\n2013/02/14 02:48:43.128 - Attempting to process sql query: SELECT Search_Keyword,Search_Date,TwitterUniqueID FROM sapphire::superfile::buzzmonitoring where UserID='293705' group by TwitterUniqueID order by Search_Date desc\\nException caught =HPCCStatement: Cannot execute SQL command\\n Invalid or forbidden table found: SAPPHIRE::SUPERFILE::BUZZMONITORING\\njava.sql.SQLException: HPCCStatement: Cannot execute SQL command\\n Invalid or forbidden table found: SAPPHIRE::SUPERFILE::BUZZMONITORING\\n\\n\\n\\n________________________________________\\nThe contents of this e-mail and any attachment(s) may contain confidential or privileged information for the intended recipient(s). Unintended recipients are prohibited from taking action on the basis of information in this e-mail and using or disseminating the information, and must notify the sender and delete it from their system. L&T Infotech will not accept responsibility or liability for the accuracy or completeness of, or the presence of any virus or disabling code in this e-mail"\\n
\\n\\nIssue 2:\\n\\njava.sql.SQLException: HTTP Connection Response code: -1\\n
\\n\\n\\nIssue 3:\\n\\nlocal user login\\nYou are authorized to proceed.293705\\ndomain is Retail Banking\\n*****ldriver : org.hpccsystems.jdbcdriver.HPCCDriver@de1b7b\\n2013/02/14 05:20:25.689 - HPCCConnection jdbc url: \\n2013/02/14 05:20:25.689 - Couldn't determine log level, will log at default level: INFO\\n2013/02/14 05:20:25.689 - HPCCDriver::connect10.101.2.170\\n2013/02/14 05:20:25.689 - HPCCDatabaseMetaData ServerAddress: 10.101.2.170 TargetCluster: thor eclwatch: http://10.101.2.170:8010\\n2013/02/14 05:20:25.689 - HPCCDatabaseMetaData Fetching HPCC INFO: http://10.101.2.170:8010/WsSMC/Activity?rawxml_\\n2013/02/14 05:20:56.205 - Could not fetch HPCC info.\\n2013/02/14 05:21:26.721 - Could not fetch cluster information.\\n2013/02/14 05:21:57.222 - Could not fetch cluster information.\\n2013/02/14 05:21:57.222 - HPCC file and published query info not pre-fetched (LazyLoad enabled)\\n2013/02/14 05:21:57.222 - Could not query HPCC metadata check server address, cluster name, wsecl, and wseclwatch configuration.\\n2013/02/14 05:21:57.222 - HPCCDatabaseMetaData initialized\\n2013/02/14 05:21:57.222 - HPCCConnection initialized - server: 10.101.2.170\\n2013/02/14 05:21:57.222 - HPCCConnection: createStatement( )\\n2013/02/14 05:21:57.222 - HPCCStatementConstructor(conn)\\n2013/02/14 05:21:57.222 - HPCCStatementAttempting to process sql query: SELECT Search_Keyword,Search_Date,TwitterUniqueID FROM sapphire::superfile::buzzmonitoring where UserID='293705' group by TwitterUniqueID order by Search_Date desc\\n2013/02/14 05:21:57.222 - INCOMING SQL: SELECT Search_Keyword,Search_Date,TwitterUniqueID FROM sapphire::superfile::buzzmonitoring where UserID='293705' group by TwitterUniqueID order by Search_Date desc\\n2013/02/14 05:21:57.237 - Fetching file information: http://10.101.2.170:8010/WsDfu/DFUQuery?LogicalName=SAPPHIRE::SUPERFILE::BUZZMONITORING&rawxml_&filetype=Logical Files and Superfiles\\n2013/02/14 05:22:27.753 - Warning: Connection to HPCC timed out while fetching: SAPPHIRE::SUPERFILE::BUZZMONITORING\\njava.sql.SQLException: HPCCStatement: Cannot execute SQL command\\n Invalid or forbidden table found: SAPPHIRE::SUPERFILE::BUZZMONITORING\\n at org.hpccsystems.jdbcdriver.HPCCStatement.executeHPCCQuery(HPCCStatement.java:110)\\n2013/02/14 05:22:27.753 - Invalid or forbidden table found: SAPPHIRE::SUPERFILE::BUZZMONITORING\\n2013/02/14 05:22:27.753 - HPCCStatement: execute()\\n2013/02/14 05:22:27.753 - Attempting to process sql query: SELECT Search_Keyword,Search_Date,TwitterUniqueID FROM sapphire::superfile::buzzmonitoring where UserID='293705' group by TwitterUniqueID order by Search_Date desc\\n2013/02/14 05:22:27.753 - HPCCStatement: executeQuery()\\n2013/02/14 05:22:27.753 - Attempting to process sql query: SELECT Search_Keyword,Search_Date,TwitterUniqueID FROM sapphire::superfile::buzzmonitoring where UserID='293705' group by TwitterUniqueID order by Search_Date desc\\nException caught =HPCCStatement: Cannot execute SQL command\\n Invalid or forbidden table found: SAPPHIRE::SUPERFILE::BUZZMONITORING\\n at org.hpccsystems.jdbcdriver.HPCCStatement.execute(HPCCStatement.java:255)\\n at org.hpccsystems.jdbcdriver.HPCCStatement.execute(HPCCStatement.java:245)\\n at SapphireClient.hpcc.actions.cHPCCResultsetMaster.ResultSet(cHPCCResultsetMaster.java:147)\\n at SapphireClient.web.actions.cBuzzRedirectAction.execute(cBuzzRedirectAction.java:39)\\n at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\\n at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source)\\n at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)\\n at java.lang.reflect.Method.invoke(Unknown Source)\\n at com.opensymphony.xwork2.DefaultActionInvocation.invokeAction(DefaultActionInvocation.java:440)\\n at com.opensymphony.xwork2.DefaultActionInvocation.invokeActionOnly(DefaultActionInvocation.java:279)\\n at com.opensymphony.xwork2.DefaultActionInvocation.invoke(DefaultActionInvocation.java:242)\\n at com.opensymphony.xwork2.interceptor.DefaultWorkflowInterceptor.doIntercept(DefaultWorkflowInterceptor.java:163)\\n at com.opensymphony.xwork2.interceptor.MethodFilterInterceptor.intercept(MethodFilterInterceptor.java:87)\\n at com.opensymphony.xwork2.DefaultActionInvocation.invoke(DefaultActionInvocation.java:236)\\n at com.opensymphony.xwork2.validator.ValidationInterceptor.doIntercept(ValidationInterceptor.java:249)\\n at org.apache.struts2.interceptor.validation.AnnotationValidationInterceptor.doIntercept(AnnotationValidationInterceptor.java:68)\\n at com.opensymphony.xwork2.interceptor.MethodFilterInterceptor.intercept(MethodFilterInterceptor.java:87)\\n at com.opensymphony.xwork2.DefaultActionInvocation.invoke(DefaultActionInvocation.java:236)\\n at com.opensymphony.xwork2.interceptor.ConversionErrorInterceptor.intercept(ConversionErrorInterceptor.java:122)\\n at com.opensymphony.xwork2.DefaultActionInvocation.invoke(DefaultActionInvocation.java:236)\\n at com.opensymphony.xwork2.interceptor.ParametersInterceptor.doIntercept(ParametersInterceptor.java:195)\\n at com.opensymphony.xwork2.interceptor.MethodFilterInterceptor.intercept(MethodFilterInterceptor.java:87)\\n at com.opensymphony.xwork2.DefaultActionInvocation.invoke(DefaultActionInvocation.java:236)\\n at com.opensymphony.xwork2.interceptor.ParametersInterceptor.doIntercept(ParametersInterceptor.java:195)\\n at com.opensymphony.xwork2.interceptor.MethodFilterInterceptor.intercept(MethodFilterInterceptor.java:87)\\n at com.opensymphony.xwork2.DefaultActionInvocation.invoke(DefaultActionInvocation.java:236)\\n at com.opensymphony.xwork2.interceptor.StaticParametersInterceptor.intercept(StaticParametersInterceptor.java:148)\\n at com.opensymphony.xwork2.DefaultActionInvocation.invoke(DefaultActionInvocation.java:236)\\n at org.apache.struts2.interceptor.CheckboxInterceptor.intercept(CheckboxInterceptor.java:93)\\n at com.opensymphony.xwork2.DefaultActionInvocation.invoke(DefaultActionInvocation.java:236)\\n at org.apache.struts2.interceptor.FileUploadInterceptor.intercept(FileUploadInterceptor.java:235)\\n at com.opensymphony.xwork2.DefaultActionInvocation.invoke(DefaultActionInvocation.java:236)\\n at com.opensymphony.xwork2.interceptor.ModelDrivenInterceptor.intercept(ModelDrivenInterceptor.java:89)\\n at com.opensymphony.xwork2.DefaultActionInvocation.invoke(DefaultActionInvocation.java:236)\\n at com.opensymphony.xwork2.interceptor.ScopedModelDrivenInterceptor.intercept(ScopedModelDrivenInterceptor.java:128)\\n at com.opensymphony.xwork2.DefaultActionInvocation.invoke(DefaultActionInvocation.java:236)\\n at org.apache.struts2.interceptor.ProfilingActivationInterceptor.intercept(ProfilingActivationInterceptor.java:104)\\n at com.opensymphony.xwork2.DefaultActionInvocation.invoke(DefaultActionInvocation.java:236)\\n at org.apache.struts2.interceptor.debugging.DebuggingInterceptor.intercept(DebuggingInterceptor.java:267)\\n at com.opensymphony.xwork2.DefaultActionInvocation.invoke(DefaultActionInvocation.java:236)\\n at com.opensymphony.xwork2.interceptor.ChainingInterceptor.intercept(ChainingInterceptor.java:126)\\n at com.opensymphony.xwork2.DefaultActionInvocation.invoke(DefaultActionInvocation.java:236)\\n at com.opensymphony.xwork2.interceptor.PrepareInterceptor.doIntercept(PrepareInterceptor.java:138)\\n at com.opensymphony.xwork2.interceptor.MethodFilterInterceptor.intercept(MethodFilterInterceptor.java:87)\\n at com.opensymphony.xwork2.DefaultActionInvocation.invoke(DefaultActionInvocation.java:236)\\n at com.opensymphony.xwork2.interceptor.I18nInterceptor.intercept(I18nInterceptor.java:148)\\n at com.opensymphony.xwork2.DefaultActionInvocation.invoke(DefaultActionInvocation.java:236)\\n at org.apache.struts2.interceptor.ServletConfigInterceptor.intercept(ServletConfigInterceptor.java:164)\\n at com.opensymphony.xwork2.DefaultActionInvocation.invoke(DefaultActionInvocation.java:236)\\n at com.opensymphony.xwork2.interceptor.AliasInterceptor.intercept(AliasInterceptor.java:128)\\n at com.opensymphony.xwork2.DefaultActionInvocation.invoke(DefaultActionInvocation.java:236)\\n at com.opensymphony.xwork2.interceptor.ExceptionMappingInterceptor.intercept(ExceptionMappingInterceptor.java:176)\\n at com.opensymphony.xwork2.DefaultActionInvocation.invoke(DefaultActionInvocation.java:236)\\n at org.apache.struts2.impl.StrutsActionProxy.execute(StrutsActionProxy.java:52)\\n at org.apache.struts2.dispatcher.Dispatcher.serviceAction(Dispatcher.java:468)\\n at org.apache.struts2.dispatcher.FilterDispatcher.doFilter(FilterDispatcher.java:395)\\n at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:243)\\n at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:210)\\n at org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:225)\\n at org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:123)\\n at org.apache.catalina.authenticator.AuthenticatorBase.invoke(AuthenticatorBase.java:472)\\n at org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:168)\\n at org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:98)\\n at org.apache.catalina.valves.AccessLogValve.invoke(AccessLogValve.java:927)\\n at org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:118)\\n at org.apache.catalina.connector.CoyoteAdapter.service(CoyoteAdapter.java:407)\\n at org.apache.coyote.http11.AbstractHttp11Processor.process(AbstractHttp11Processor.java:1001)\\n at org.apache.coyote.AbstractProtocol$AbstractConnectionHandler.process(AbstractProtocol.java:579)\\n at org.apache.tomcat.util.net.JIoEndpoint$SocketProcessor.run(JIoEndpoint.java:312)\\n at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)\\n at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)\\n at java.lang.Thread.run(Unknown Source)\\n
\\n\\n\\nSometimes it works as required.\\n\\nWhere can be the cause?\\n\\nThanks and Regards!\", \"post_time\": \"2013-02-14 11:25:57\" },\n\t{ \"post_id\": 3673, \"topic_id\": 819, \"forum_id\": 34, \"post_subject\": \"Re: get no tables when I connected HPCC by Squirrel-sql\", \"username\": \"rodrigo.pastrana@lexisnexis.com\", \"post_text\": \"Hi, the JDBC documentation can be found here: \\nhttp://cdn.hpccsystems.com/install/jdbc ... 2beta1.pdf\\n\\nThe JDBC driver will only recognize HPCC files that contain a record definition (otherwise the driver won't know the structure of the file). See the note on page 7 in the doc. \\n\\nAlso, this thread might be helpful if your files do not contain rec defs:\\nhttp://hpccsystems.com/bb/viewtopic.php ... 3350#p3233\\n\\nOtherwise, there could be a connectivity issue, in which case you'll have to take a look at the log output (on windows, at the bottom of SQUIRREL, there's a monitor icon you can click to view logs).\\n\\nLook around this statement:\\nFetching tables: http://<yourIp>:8010/WsDfu/DFUQuery?LogicalName=*&PageSize=100&PageStartFrom=0&rawxml_&filetype=Logical Files and Superfiles\\n\\nHope that helps. Let us know if there are any issues. Thanks.\", \"post_time\": \"2013-03-08 14:00:17\" },\n\t{ \"post_id\": 3669, \"topic_id\": 819, \"forum_id\": 34, \"post_subject\": \"get no tables when I connected HPCC by Squirrel-sql\", \"username\": \"battleman\", \"post_text\": \"Hi:\\n When I connected to HPCC by Squirrel,I cannot see any tables . How can I view my tables? Is there any guidant PDF? \\n\\nRegards!\", \"post_time\": \"2013-03-08 10:06:33\" },\n\t{ \"post_id\": 3882, \"topic_id\": 866, \"forum_id\": 34, \"post_subject\": \"Re: Can I use JDBC driver to connect Pentaho with HPCC\", \"username\": \"wei xu\", \"post_text\": \"The name is hpcctry. The screen shot is showed below. And also the problem is that I can not get the table. The pentaho seems updateing data source and then runs forever.\\n\\n\\n[quote="mnaweed":yw9tdprt]What name did you give to the Database Connection?\\n\\n[quote="wei xu":yw9tdprt]Hi, I wonder if I can use spoon to create a dashboard? I use pentaho console because it can help to create a dashboard.\\n\\nHere is the screen short I get when use pentaho user console to connect HPCC. Do you have any idea that what the schema called HPCC System means? And why there seems one file name repeats twice under the available table ?\\n\\n\\n\\n[quote="mnaweed":yw9tdprt]On Spoon UI, Click on 'Tools' menu item. you have the Wizards and other database options available.\\n\\nGoogle 'getting started with pentaho data integration' for good documentation.\", \"post_time\": \"2013-04-03 21:17:49\" },\n\t{ \"post_id\": 3880, \"topic_id\": 866, \"forum_id\": 34, \"post_subject\": \"Re: Can I use JDBC driver to connect Pentaho with HPCC\", \"username\": \"mnaweed\", \"post_text\": \"What name did you give to the Database Connection?\\n\\n[quote="wei xu":2ld4vdyj]Hi, I wonder if I can use spoon to create a dashboard? I use pentaho console because it can help to create a dashboard.\\n\\nHere is the screen short I get when use pentaho user console to connect HPCC. Do you have any idea that what the schema called HPCC System means? And why there seems one file name repeats twice under the available table ?\\n\\n\\n\\n[quote="mnaweed":2ld4vdyj]On Spoon UI, Click on 'Tools' menu item. you have the Wizards and other database options available.\\n\\nGoogle 'getting started with pentaho data integration' for good documentation.\", \"post_time\": \"2013-04-03 21:03:09\" },\n\t{ \"post_id\": 3879, \"topic_id\": 866, \"forum_id\": 34, \"post_subject\": \"Re: Can I use JDBC driver to connect Pentaho with HPCC\", \"username\": \"wei xu\", \"post_text\": \"Hi, I wonder if I can use spoon to create a dashboard? I use pentaho console because it can help to create a dashboard.\\n\\nHere is the screen short I get when use pentaho user console to connect HPCC. Do you have any idea that what the schema called HPCC System means? And why there seems one file name repeats twice under the available table ?\\n\\n\\n\\n[quote="mnaweed":3355mzf8]On Spoon UI, Click on 'Tools' menu item. you have the Wizards and other database options available.\\n\\nGoogle 'getting started with pentaho data integration' for good documentation.\", \"post_time\": \"2013-04-03 20:58:01\" },\n\t{ \"post_id\": 3878, \"topic_id\": 866, \"forum_id\": 34, \"post_subject\": \"Re: Can I use JDBC driver to connect Pentaho with HPCC\", \"username\": \"mnaweed\", \"post_text\": \"On Spoon UI, Click on 'Tools' menu item. you have the Wizards and other database options available.\\n\\nGoogle 'getting started with pentaho data integration' for good documentation.\", \"post_time\": \"2013-04-02 16:52:52\" },\n\t{ \"post_id\": 3877, \"topic_id\": 866, \"forum_id\": 34, \"post_subject\": \"Re: Can I use JDBC driver to connect Pentaho with HPCC\", \"username\": \"wei xu\", \"post_text\": \"Thank you for your help. And I tried Pentaho Spoon and I can see the tables. Do not know where is the data source wizard. I wonder if I want to draw pictures out of the table output by HPCC, is there a way to do it in Spoon. What kind of document I should check? \\n\\nThank you very much.\\n\\n\\n[quote="mnaweed":1m68m6cs]Actually, I was trying to replicate what you are doing on my side. I use Pentaho(Spoon) but I am not seeing the Database wizard you are using. Let me know how to get to it in Spoon.\\n\\nOne thing that we noticed. On the Wizard, the image seems to indicate that Pentaho will show the 'Keys' in the box's. If so, HPCC dont define Primary/forign keys hence they'll be blank.\\n\\nCheck the log files for any errors being returned by the JDBC driver and let us know if you see any.\\n\\n\\n\\n\\n[quote="wei xu":1m68m6cs]Pentaho User Console.\\n\\n[quote="mnaweed":1m68m6cs]What tool are you using?\", \"post_time\": \"2013-04-02 15:54:20\" },\n\t{ \"post_id\": 3876, \"topic_id\": 866, \"forum_id\": 34, \"post_subject\": \"Re: Can I use JDBC driver to connect Pentaho with HPCC\", \"username\": \"mnaweed\", \"post_text\": \"Actually, I was trying to replicate what you are doing on my side. I use Pentaho(Spoon) but I am not seeing the Database wizard you are using. Let me know how to get to it in Spoon.\\n\\nOne thing that we noticed. On the Wizard, the image seems to indicate that Pentaho will show the 'Keys' in the box's. If so, HPCC dont define Primary/forign keys hence they'll be blank.\\n\\nCheck the log files for any errors being returned by the JDBC driver and let us know if you see any.\\n\\n\\n\\n\\n[quote="wei xu":1e4nxnvx]Pentaho User Console.\\n\\n[quote="mnaweed":1e4nxnvx]What tool are you using?\", \"post_time\": \"2013-04-02 14:20:23\" },\n\t{ \"post_id\": 3875, \"topic_id\": 866, \"forum_id\": 34, \"post_subject\": \"Re: Can I use JDBC driver to connect Pentaho with HPCC\", \"username\": \"wei xu\", \"post_text\": \"Pentaho User Console.\\n\\n[quote="mnaweed":2xr1shwe]What tool are you using?\", \"post_time\": \"2013-04-02 14:08:55\" },\n\t{ \"post_id\": 3874, \"topic_id\": 866, \"forum_id\": 34, \"post_subject\": \"Re: Can I use JDBC driver to connect Pentaho with HPCC\", \"username\": \"mnaweed\", \"post_text\": \"What tool are you using?\", \"post_time\": \"2013-04-02 14:01:55\" },\n\t{ \"post_id\": 3868, \"topic_id\": 866, \"forum_id\": 34, \"post_subject\": \"Re: Can I use JDBC driver to connect Pentaho with HPCC\", \"username\": \"wei xu\", \"post_text\": \"I went into this problem that when I connecting to the HPCC, I can see the available table list,but when I add it, I could not see the fields. Please see the attached screenshot.\\n\\n\\n[quote="rodrigo.pastrana@lexisnexis.com":22xcrzsv]In particular, take a look at the EclResultLimit connection parameter. By default it is set to return the first 100 result records. You can set this parameter to 'ALL'.\\n\", \"post_time\": \"2013-04-01 21:30:14\" },\n\t{ \"post_id\": 3867, \"topic_id\": 866, \"forum_id\": 34, \"post_subject\": \"Re: Can I use JDBC driver to connect Pentaho with HPCC\", \"username\": \"rodrigo.pastrana@lexisnexis.com\", \"post_text\": \"In particular, take a look at the EclResultLimit connection parameter. By default it is set to return the first 100 result records. You can set this parameter to 'ALL'.\\n\\n[quote="mnaweed":1ftqh8me]The driver returns the results of the requested query but It'll not store the results so you'll not be able to see the previous queries results.\\n\\nYou can control the some information using the parameters. I suggest you refer to the following document for more information:\\nhttp://cdn.hpccsystems.com/install/jdbc ... 2beta1.pdf \\n\\n[quote="wei xu":1ftqh8me]Thank you,mnaweed. \\n\\nIt works. And I wonder if there is any parameters I can control to see all the results or see the recent results output by HPCC?\\n\\nThank you for your help.\\n \\n[quote="mnaweed":1ftqh8me]Yes, you can connect Pentaho with HPCC using JDBC driver. To do it:\\n\\nSelect 'Generic database' as Connection Type\\n\\nCustom Connection URL:\\njdbc:hpcc:ServerAddress=<server ip address>;Cluster=default:WsECLDirectPort=8008:EclResultLimit=100:QuerySet=thor:LazyLoad=true:PageSize=100:LogDebug=true:\\n\\nCustom Driver Class Name:\\norg.hpccsystems.jdbcdriver.HPCCDriver\", \"post_time\": \"2013-04-01 20:20:38\" },\n\t{ \"post_id\": 3866, \"topic_id\": 866, \"forum_id\": 34, \"post_subject\": \"Re: Can I use JDBC driver to connect Pentaho with HPCC\", \"username\": \"mnaweed\", \"post_text\": \"The driver returns the results of the requested query but It'll not store the results so you'll not be able to see the previous queries results.\\n\\nYou can control the some information using the parameters. I suggest you refer to the following document for more information:\\nhttp://cdn.hpccsystems.com/install/jdbc ... 2beta1.pdf \\n\\n[quote="wei xu":1odmy31b]Thank you,mnaweed. \\n\\nIt works. And I wonder if there is any parameters I can control to see all the results or see the recent results output by HPCC?\\n\\nThank you for your help.\\n \\n[quote="mnaweed":1odmy31b]Yes, you can connect Pentaho with HPCC using JDBC driver. To do it:\\n\\nSelect 'Generic database' as Connection Type\\n\\nCustom Connection URL:\\njdbc:hpcc:ServerAddress=<server ip address>;Cluster=default:WsECLDirectPort=8008:EclResultLimit=100:QuerySet=thor:LazyLoad=true:PageSize=100:LogDebug=true:\\n\\nCustom Driver Class Name:\\norg.hpccsystems.jdbcdriver.HPCCDriver\", \"post_time\": \"2013-04-01 19:56:31\" },\n\t{ \"post_id\": 3865, \"topic_id\": 866, \"forum_id\": 34, \"post_subject\": \"Re: Can I use JDBC driver to connect Pentaho with HPCC\", \"username\": \"wei xu\", \"post_text\": \"Thank you,mnaweed. \\n\\nIt works. And I wonder if there is any parameters I can control to see all the results or see the recent results output by HPCC?\\n\\nThank you for your help.\\n \\n[quote="mnaweed":2iuigjxo]Yes, you can connect Pentaho with HPCC using JDBC driver. To do it:\\n\\nSelect 'Generic database' as Connection Type\\n\\nCustom Connection URL:\\njdbc:hpcc:ServerAddress=<server ip address>;Cluster=default:WsECLDirectPort=8008:EclResultLimit=100:QuerySet=thor:LazyLoad=true:PageSize=100:LogDebug=true:\\n\\nCustom Driver Class Name:\\norg.hpccsystems.jdbcdriver.HPCCDriver\", \"post_time\": \"2013-04-01 19:19:18\" },\n\t{ \"post_id\": 3864, \"topic_id\": 866, \"forum_id\": 34, \"post_subject\": \"Re: Can I use JDBC driver to connect Pentaho with HPCC\", \"username\": \"mnaweed\", \"post_text\": \"Yes, you can connect Pentaho with HPCC using JDBC driver. To do it:\\n\\nSelect 'Generic database' as Connection Type\\n\\nCustom Connection URL:\\njdbc:hpcc:ServerAddress=<server ip address>;Cluster=default:WsECLDirectPort=8008:EclResultLimit=100:QuerySet=thor:LazyLoad=true:PageSize=100:LogDebug=true:\\n\\nCustom Driver Class Name:\\norg.hpccsystems.jdbcdriver.HPCCDriver\", \"post_time\": \"2013-04-01 17:21:49\" },\n\t{ \"post_id\": 3863, \"topic_id\": 866, \"forum_id\": 34, \"post_subject\": \"Re: Can I use JDBC driver to connect Pentaho with HPCC\", \"username\": \"joe.chambers\", \"post_text\": \"I've not used the JDBC driver but we have developed some plugins that allow you to interface Pentaho Spoon with HPCC directly. Which part of the Pentaho suite are you using?\", \"post_time\": \"2013-04-01 17:12:31\" },\n\t{ \"post_id\": 3862, \"topic_id\": 866, \"forum_id\": 34, \"post_subject\": \"Can I use JDBC driver to connect Pentaho with HPCC\", \"username\": \"wei xu\", \"post_text\": \"Hi,\\n\\nI wonder if I can connect Pentaho with HPCC using JDBC driver. Which database selection should I choose? Will the Generic Database work? What should the customer URL be?\\n\\nThank you very much.\", \"post_time\": \"2013-04-01 14:56:48\" },\n\t{ \"post_id\": 3958, \"topic_id\": 884, \"forum_id\": 34, \"post_subject\": \"Re: Order of params for stored procedures\", \"username\": \"sbagaria\", \"post_text\": \"Thanks! It's not critical. I had some logic defined for empty strings. I now changed it to an '_'.\", \"post_time\": \"2013-04-22 18:04:24\" },\n\t{ \"post_id\": 3957, \"topic_id\": 884, \"forum_id\": 34, \"post_subject\": \"Re: Order of params for stored procedures\", \"username\": \"rodrigo.pastrana@lexisnexis.com\", \"post_text\": \"The driver doesn't currently allow mismatching stored procedure signatures (it rejects the request if the number of in params don't match.)\\n\\nLet me know if this is an issue for you, we can discuss a change request if it makes sense. Thanks, Rodrigo.\", \"post_time\": \"2013-04-22 18:03:05\" },\n\t{ \"post_id\": 3940, \"topic_id\": 884, \"forum_id\": 34, \"post_subject\": \"Re: Order of params for stored procedures\", \"username\": \"sbagaria\", \"post_text\": \"OK. Got some hints. I have to skip the quote characters. They are considered a part of the string. The order is what appears in the wsECL interface or the columns information for the procedure.\\n\\nNow the only remaining question is how do I either skip a parameter so that the default value is taken from the ECL code or input an empty string as the param?\", \"post_time\": \"2013-04-16 17:25:21\" },\n\t{ \"post_id\": 3939, \"topic_id\": 884, \"forum_id\": 34, \"post_subject\": \"Order of params for stored procedures\", \"username\": \"sbagaria\", \"post_text\": \"I am trying to use the JDBC driver for using published queries. Everything works fine on queries with one or two params. But when there are multiple params, I can't seem to get back any results.\\n\\nDo you have examples on how best to handle multiple parameters for a query of different types?\", \"post_time\": \"2013-04-16 17:13:01\" },\n\t{ \"post_id\": 6668, \"topic_id\": 1511, \"forum_id\": 34, \"post_subject\": \"Re: connect via SquirrelSQL\", \"username\": \"rodrigo.pastrana\", \"post_text\": \"Hi, ideally you should take a look at the free online lessons Richard linked above.\\n\\nThe JDBC connector, and WsSQL do translate SQL to ECL, and it is possible to access the ECL(although I don't recommend it). \\n\\nAfter executing an SQL query via JDBC/WsSQL, you can browse your recent workunits via ECLWatch (on http://yourip:8010). Once you locate the workunit that corresponds to your recent query, in the workunit view, you can view the actual ECL query associated with that workunit.\\n\\nAgain, that is robotic ECL and shouldn't be mistaken as exemplary ECL, so take it with a grain a salt. Thanks.\", \"post_time\": \"2014-12-04 15:18:46\" },\n\t{ \"post_id\": 6667, \"topic_id\": 1511, \"forum_id\": 34, \"post_subject\": \"Re: connect via SquirrelSQL\", \"username\": \"rtaylor\", \"post_text\": \"But ECL is a Problem
We do have free self-paced, online courses for ECL. \\n\\nRegister here to get started: http://learn.lexisnexis.com/hpcc\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-12-04 14:43:49\" },\n\t{ \"post_id\": 6664, \"topic_id\": 1511, \"forum_id\": 34, \"post_subject\": \"Re: connect via SquirrelSQL\", \"username\": \"NSD\", \"post_text\": \"Hi,\\n\\nsorry for my very late answer, i had a little fight with my document and some sources for my text. i will now concentrate onto the setup.\\n\\n\\n[quote="rodrigo.pastrana":25ko38rs]Hi and welcome to the forum,\\nI can definitely help you connect to your HPCC system via Squirrel and the JDBC driver. \\n\\nFirst of all, can you describe what is the exact issue you are encountering, and what HPCC version you are using?\\n\\nthank you \\n\\nI've downloaded following packages and installed them:\\n\\nhpccsystems-clienttools_community-5.0.2-1trusty_amd64.deb\\nhpccsystems-jdbcdriver-0.2.4Beta.jar\\nhpccsystems-wssql-5.0.0-rc2precise_amd64.deb\\n\\n\\n\\n[quote="rodrigo.pastrana":25ko38rs]\\nBy default, the JDBC driver attempts to contact main HPCC web service at http://localhost:8010. Please verify you can access that url from your local browser.\\nIf you're running a local vm, the service is not running on localhost, but rather on the VM, which should have it's own ip address. Once you find the VM's ip address you will want to configure the JDBC to connect to that IP by updating the ServerAddress property. All the connection properties are described in the configuration section of the JDBC doc.\\n\\nthe server is up @localhost:8010\\n\\nI've already uploaded 3 CSV Files. There I stuck now
\\n\\n\\n\\n[quote="rodrigo.pastrana":25ko38rs]\\nAnyhow, JDBC connections are typically configured via one of two ways, via a JDBC connection url, or via a driver property (squirrel provides a mechanism for configuring both).\\n\\nWhen I use squirrel I find it much easier to use their built in Driver properties mechanism to configure the driver.\\n\\nIn the HPCC Driver configuration dialog, select the properties button, choose the Driver Properties tab, enable the "use driver properties" check box.\\nFind the "ServerAddress" line, and choose the "Specify" column, and enter the correct value (your VM's ip address).\\n\\nTry that and report any issues you experience. \\n\\nthank you! now Squirrel connected to HPCC! \\nthe localhost:8010 was the problem
\\n\\n
\\n\\n\\n\\n[quote="rodrigo.pastrana":25ko38rs]\\nAlso, please remember that currently this driver is read-only and cannot be used to create new database/tables in HPCC.\\nAlso, this driver will only acknowledge HPCC files which have been processed via ECL.\\n\\nRead-Only is ok. I just need Performance-Tests (SELECT's).\\nBut ECL is a Problem, ive read in the DOC, that there is a translator for SQL -> ECL? WsSQL maybe?\", \"post_time\": \"2014-12-04 13:05:27\" },\n\t{ \"post_id\": 6599, \"topic_id\": 1511, \"forum_id\": 34, \"post_subject\": \"Re: connect via SquirrelSQL\", \"username\": \"rodrigo.pastrana\", \"post_text\": \"Hi and welcome to the forum,\\nI can definitely help you connect to your HPCC system via Squirrel and the JDBC driver. \\n\\nFirst of all, can you describe what is the exact issue you are encountering, and what HPCC version you are using?\\n\\nBy default, the JDBC driver attempts to contact main HPCC web service at http://localhost:8010. Please verify you can access that url from your local browser.\\nIf you're running a local vm, the service is not running on localhost, but rather on the VM, which should have it's own ip address. Once you find the VM's ip address you will want to configure the JDBC to connect to that IP by updating the ServerAddress property. All the connection properties are described in the configuration section of the JDBC doc. \\n\\nAnyhow, JDBC connections are typically configured via one of two ways, via a JDBC connection url, or via a driver property (squirrel provides a mechanism for configuring both).\\n\\nWhen I use squirrel I find it much easier to use their built in Driver properties mechanism to configure the driver.\\n\\nIn the HPCC Driver configuration dialog, select the properties button, choose the Driver Properties tab, enable the "use driver properties" check box.\\nFind the "ServerAddress" line, and choose the "Specify" column, and enter the correct value (your VM's ip address).\\n\\nTry that and report any issues you experience. \\n\\nAlso, please remember that currently this driver is read-only and cannot be used to create new database/tables in HPCC.\\nAlso, this driver will only acknowledge HPCC files which have been processed via ECL.\", \"post_time\": \"2014-11-17 15:46:33\" },\n\t{ \"post_id\": 6596, \"topic_id\": 1511, \"forum_id\": 34, \"post_subject\": \"connect via SquirrelSQL\", \"username\": \"NSD\", \"post_text\": \"Hi,\\n\\nI am completely new to this topic and my knowledge about HPCC and Linux is nearly 0.\\n\\nCurrently I am writing my Masterthesis about HPCC on a Linux Machine (VM). I tried to connect to my (already installed
) HPCC Server at localhost, but I can't find the specific URL needed for SquirrelSQL.\\n\\nWhat I want to get to work: write SQL for large datasets (uploaded CSV-Files) and analyze the speed, because my time is too short to learn ECL :/\\n\\nHere's an img:\\n
\\n\\n\\ntl;dr: \\nI've read the Manual about the JDBC driver, but can't find the needed URL.\\n\\nHope, somebody can help me!\", \"post_time\": \"2014-11-15 10:25:36\" },\n\t{ \"post_id\": 6824, \"topic_id\": 1571, \"forum_id\": 34, \"post_subject\": \"Re: Production ready JDBC driver\", \"username\": \"jeeves\", \"post_text\": \"Rodrigo,\\n\\nNothing in particular besides basic join types and aggregations.\\n\\nI noticed that this link \\n\\nhttp://hpccsystems.com/products-and-ser ... DBC-Driver\\n\\nStill says that the driver is in Beta. So I Just wanted to know whether it is stable enough to be used in production.\\n\\nWhen I checked this driver more than a year back it sometimes mis-behaved when the order of tables in the 'from' clause was changed.\\n\\nThanks,\\n-David\", \"post_time\": \"2015-01-21 20:11:39\" },\n\t{ \"post_id\": 6823, \"topic_id\": 1571, \"forum_id\": 34, \"post_subject\": \"Re: Production ready JDBC driver\", \"username\": \"rodrigo.pastrana\", \"post_text\": \"Hi David, although the latest version is still considered "Technical Preview" it has been heavily tested and is stable.\\nAre there any features you're looking for in the JDBC connector? Thanks.\", \"post_time\": \"2015-01-21 18:47:09\" },\n\t{ \"post_id\": 6821, \"topic_id\": 1571, \"forum_id\": 34, \"post_subject\": \"Production ready JDBC driver\", \"username\": \"jeeves\", \"post_text\": \"Hi,\\n\\nis a non beta version of the JDBC driver available?\\n\\nThanks,\\n-David\", \"post_time\": \"2015-01-21 17:47:54\" },\n\t{ \"post_id\": 7237, \"topic_id\": 1659, \"forum_id\": 34, \"post_subject\": \"Re: Java Docs for HPCC JDBC Driver\", \"username\": \"Mragesh\", \"post_text\": \"Thanks Bob,\\n\\nI was actually looking for the java docs to understand all the classes and methods supported by HPCC JDBC Driver.\\n\\nI realized that the HPCC JDBC Driver has been created using the java JDBC API and hence most of the interfaces, classes and methods are implementation of existing ones in the JDBC API.\\n\\nThe following link has a list of the same:\\n\\n[url]\\nhttp://docs.oracle.com/javase/7/docs/ap ... mmary.html\\n[/url]\", \"post_time\": \"2015-04-01 15:13:45\" },\n\t{ \"post_id\": 7234, \"topic_id\": 1659, \"forum_id\": 34, \"post_subject\": \"Re: Java Docs for HPCC JDBC Driver\", \"username\": \"bforeman\", \"post_text\": \"The available doc is located here:\\n\\nhttp://cdn.hpccsystems.com/install/jdbc/stable/HPCC_JDBC_Driver-0.2.6-1Beta.pdf\\n\\n...and of course, the main product page contains additional links to the readme and source:\\n\\nhttp://hpccsystems.com/products-and-services/products/plugins/JDBC-Driver\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-03-31 18:41:03\" },\n\t{ \"post_id\": 7231, \"topic_id\": 1659, \"forum_id\": 34, \"post_subject\": \"Java Docs for HPCC JDBC Driver\", \"username\": \"Mragesh\", \"post_text\": \"Hi, \\n\\nDo you have JAVA docs for the driver?\\nWanted more information on some methods.\\n\\nThanks,\\nMragesh\", \"post_time\": \"2015-03-31 14:41:10\" },\n\t{ \"post_id\": 7289, \"topic_id\": 1664, \"forum_id\": 34, \"post_subject\": \"Re: Including Special Characters in SQL query\", \"username\": \"rodrigo.pastrana\", \"post_text\": \"Hi Mragesh, thanks for pointing out this issue. You'll be glad to know the next version of the JDBC Connector (0.3.0) will resolve this issue by ensuring all values which need to be encoded are indeed encoded. Thanks.\", \"post_time\": \"2015-04-08 17:19:45\" },\n\t{ \"post_id\": 7257, \"topic_id\": 1664, \"forum_id\": 34, \"post_subject\": \"Re: Including Special Characters in SQL query\", \"username\": \"Mragesh\", \"post_text\": \"Sorry for the unnecessary question, found the solution.\\nRealized that everything is passed to HPCC as an URL so used URL encoding to preserve the "+" sign.\\n\\n
\\np.setString(1, URLEncoder.encode(value.trim()));\\n
\", \"post_time\": \"2015-04-06 17:24:17\" },\n\t{ \"post_id\": 7256, \"topic_id\": 1664, \"forum_id\": 34, \"post_subject\": \"Including Special Characters in SQL query\", \"username\": \"Mragesh\", \"post_text\": \"Hi,\\n\\nHere is the Java code that I use to access a dataset in HPCC:\\n\\n String mysql = "select * from table where value = ?";\\n PreparedStatement p;\\n p = (PreparedStatement) hpccTestObject .connectionByProperties.prepareStatement(mysql);\\n String value="abc+";\\n p.clearParameters();\\n p.setString(1, value.trim());\\n HPCCResultSet hrs = (HPCCResultSet) ((HPCCPreparedStatement) p).executeQuery();\\n
\\n\\nNow when I execute the query, the "+" gets dropped in the workunit created on my HPCC cluster.\\n\\n\\n STRING PARAM1 := 'abc '; //Dropped +\\nimport std;\\nTblDS0RecDef := RECORD string value END; \\nTblDS0 := DATASET('~abc::xyz::table', TblDS0RecDef,FLAT);\\nTblDS0Filtered := TblDS0( pat = PARAM1 );\\nSelectStruct := RECORD\\n string value := TblDS0Filtered.value;\\nEND;\\nTblDS0FilteredTable := TABLE( TblDS0Filtered, SelectStruct );\\nOUTPUT(CHOOSEN(TblDS0FilteredTable,ALL),NAMED('JDBCSelectQueryResult'));\\n\\n
\\n\\nHow do I preserve the "+" sign, I have tried using prepared statement and escaping the "+" in the string?\", \"post_time\": \"2015-04-06 17:13:16\" },\n\t{ \"post_id\": 10223, \"topic_id\": 2453, \"forum_id\": 34, \"post_subject\": \"Re: Unable to download hpccjapi/wsclient/wsclient-0.6.0.pom\", \"username\": \"rodrigo.pastrana\", \"post_text\": \"Hi, you’ll have to open an ISIT ticket to gain access to "https://mvnrisk.risk.regn.net" \\n\\nIf you’re in a hurry, you could also build the wsclient locally and change the pom to point to your local wsclient build:\\nhttps://github.com/hpcc-systems/HPCC-JAPIs\", \"post_time\": \"2016-08-02 21:21:21\" },\n\t{ \"post_id\": 10173, \"topic_id\": 2453, \"forum_id\": 34, \"post_subject\": \"Unable to download hpccjapi/wsclient/wsclient-0.6.0.pom\", \"username\": \"apohl\", \"post_text\": \"Hi,\\n\\nI've been attempting to compile a custom HPCC JDBC Driver using Maven. The problem I'm having is that one of the dependencies doesn't seem to exist or is at least unavailable at the following path...\\nhttp://mvnrisk.risk.regn.net/content/re ... -0.6.0.pom\\n\\nDoes anybody know of a workaround for this problem?\\n\\nThanks,\\nAaron\", \"post_time\": \"2016-08-02 15:29:53\" },\n\t{ \"post_id\": 10413, \"topic_id\": 2463, \"forum_id\": 34, \"post_subject\": \"Re: HPCCPreparedStatement not accepting Long/Integer as para\", \"username\": \"rodrigo.pastrana\", \"post_text\": \"Aaron, I'm glad to see you got your project building. \\nThanks for bringing up this issue, I'm currently working on the primitive type binding and will have a solution very soon. In the meantime you can add yourself as a watcher to this Jira so you can follow the progress. Thanks\\n\\nhttps://track.hpccsystems.com/browse/JDBC-49\\n\\nUpdate, just noticed you were the original reporter of the issue... the fix has been submitted and will the reviewed shortly. If you're in a hurry you can pull the commit from my repo: https://github.com/rpastrana/hpcc-jdbc/ ... ypeBinding\", \"post_time\": \"2016-08-09 15:28:38\" },\n\t{ \"post_id\": 10183, \"topic_id\": 2463, \"forum_id\": 34, \"post_subject\": \"HPCCPreparedStatement not accepting Long/Integer as params\", \"username\": \"apohl\", \"post_text\": \"Hi,\\n\\nTrying to run the following example code...\\nString myprepsql = "select k,s from ajp::temp::ds persons where k >= ? and k <= ?";\\nHPCCPreparedStatement prepstmt = (HPCCPreparedStatement)connection.prepareStatement(myprepsql);\\nprepstmt.setLong(1, (long)1);\\nprepstmt.setLong(2, (long)5);\\nHPCCResultSet qrs = (HPCCResultSet) ((HPCCPreparedStatement) prepstmt).executeQuery();\\nwhile(qrs.next()) {\\n\\tSystem.out.println(qrs.getInt("k"));\\n}
\\n\\nI get the following error message...\\n[color=#BF0000:3892fdu1]java.sql.SQLException: java.lang.Long cannot be cast to java.lang.String\\n\\tat org.hpccsystems.jdbcdriver.HPCCPreparedStatement.executeQuery(HPCCPreparedStatement.java:108)\\n\\nWhen looking at the HPCCPreparedStatement code it looks like any parameter passed in is type cast to a String. I'm wondering if this is something that could be changed in a future update?\\n\\nThanks,\\nAaron\", \"post_time\": \"2016-08-02 15:46:00\" },\n\t{ \"post_id\": 35395, \"topic_id\": 9345, \"forum_id\": 34, \"post_subject\": \"Realtime insert/update operation with JDBC on roxie\", \"username\": \"adityac\", \"post_text\": \"Hi Team,\\n\\nAfter research I came to know that HPCC can process million of data in real time with less time. I am trying to evaluate a process to do real time data insert or modify with jdbc driver in java. But no luck to crack for real time data writing using jdbc driver. I did it for read/select operation and it works. \\n\\nCan any one please help here to workout.\\n\\nWe are working with JDBC driver to fetch data from logical file in java.\\n\\nBelow are snap from our code-\\n\\nString sqlQuery ="select * from tutorial::yn::transactions";\\norg.hpccsystems.ws.client.utils.Connection connection = new Connection("http://localhost:8510");\\nHPCCWsSQLClient mywssqlclient = HPCCWsSQLClient.get(connection);\\nSystem.out.println(mywssqlclient.getResults(mywssqlclient.executeSQLWUIDResponse(sqlQuery, "roxie", ""), 0, 50));\\n\\nTill now we are ok with fetching data from file but we have real time requirement of INSERT/UPDATE operations on existing data of file.\\n\\nCan someone please suggest us that we can support realtime INSERT/UPDATE operation on logical file?\\nIf yes then how can we fulfill our requirements?\", \"post_time\": \"2022-04-22 09:24:07\" },\n\t{ \"post_id\": 35425, \"topic_id\": 9355, \"forum_id\": 34, \"post_subject\": \"Re: Realtime insert/update operation with JDBC driver on rox\", \"username\": \"rtaylor\", \"post_text\": \"adityac,Can someone please suggest us that we can support realtime INSERT/UPDATE operation on logical file?
The first thing to understand about the HPCC Systems platform is that it is NOT an RDBMS and not originally designed for OLTP use. It is a Big Data processing platform that was created to do batch processing of huge amounts of data (too big for effective use on any RDBMS). The JDBC driver is designed to allow external access, only, and does not support write operations.If yes then how can we fulfill our requirements?
It IS possible to make an HPCC Systems platform emulate an OLTP system by adding in a "Deltabase" -- a small SQL database that contains all added/changed records until they can be rolled into the ROXIE data. \\n\\nAn intern in our Brazil office did a Deltabase project and submitted this to our 2020 Community Day conference Poster Competition: https://wiki.hpccsystems.com/display/hpcc/Johny+Chen+Jy+-+2020+Poster+Contest+Resources \\n\\nAnd Fujio Turner did a TechTalk presentation in 2017 that also discussed this technique: https://hpccsystems.com/blog/download-tech-talks-hpcc-systems-community-episode-2 \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2022-05-06 14:55:39\" },\n\t{ \"post_id\": 35405, \"topic_id\": 9355, \"forum_id\": 34, \"post_subject\": \"Realtime insert/update operation with JDBC driver on roxie\", \"username\": \"adityac\", \"post_text\": \"Hi Team,\\n\\nAfter research I came to know that HPCC can process million of data in real time with less time. I am trying to evaluate a process to do real time data insert or modify with jdbc driver in java. But no luck to crack for real time data writing using jdbc driver. I did it for read/select operation and it works. \\n\\nCan any one please help here to workout.\\n\\nWe are working with JDBC driver to fetch data from logical file in java.\\n\\nBelow are snap from our code-\\n\\n[color=#FFFF00:2m53iatw]String sqlQuery ="select * from tutorial::yn::transactions";\\norg.hpccsystems.ws.client.utils.Connection connection = new Connection("http://localhost:8510");\\nHPCCWsSQLClient mywssqlclient = HPCCWsSQLClient.get(connection);\\nSystem.out.println(mywssqlclient.getResults(mywssqlclient.executeSQLWUIDResponse(sqlQuery, "roxie", ""), 0, 50));\\n\\nTill now we are ok with fetching data from file but we have real time requirement of INSERT/UPDATE operations on existing data of file.\\n\\nCan someone please suggest us that we can support realtime INSERT/UPDATE operation on logical file?\\nIf yes then how can we fulfill our requirements?\", \"post_time\": \"2022-04-22 09:27:43\" },\n\t{ \"post_id\": 5608, \"topic_id\": 493, \"forum_id\": 35, \"post_subject\": \"Re: R Integration\", \"username\": \"tlhumphrey2\", \"post_text\": \"I just successfully install rHpcc in R version 3.0.2.\", \"post_time\": \"2014-05-01 18:06:48\" },\n\t{ \"post_id\": 4395, \"topic_id\": 493, \"forum_id\": 35, \"post_subject\": \"Re: R Integration\", \"username\": \"Dinesh_S\", \"post_text\": \"I think you are having version related issues here. You have RCurl and rHpcc which were built under R version 2.14.2 and package bitops built under R version 3.0. I am not really sure how to resolve this error unless you have everything built under one version (R version 2.14.2).\\n\\nI will test the rHpcc code for version 3.0 and let you know the results.\", \"post_time\": \"2013-07-31 14:13:57\" },\n\t{ \"post_id\": 4383, \"topic_id\": 493, \"forum_id\": 35, \"post_subject\": \"Re: R Integration\", \"username\": \"gnanasinghamos\", \"post_text\": \"Dinesh,\\n\\nIm getting this error when i try to install the package and loading.\\n\\n> install.packages("D:/Amos/Application/R/rHpcc_1.0.zip", repos = NULL)\\nWarning in install.packages :\\n package ‘D:/Amos/Application/R/rHpcc_1.0.zip’ is not available (for R version 2.14.1)\\nInstalling package(s) into ‘C:/Program Files/R/R-2.14.1/library’\\n(as ‘lib’ is unspecified)\\npackage ‘rHpcc’ successfully unpacked and MD5 sums checked\\n> install.packages("D:/Amos/Application/R/RCurl_1.95-4.1.zip", repos = NULL)\\nWarning in install.packages :\\n package ‘D:/Amos/Application/R/RCurl_1.95-4.1.zip’ is not available (for R version 2.14.1)\\nInstalling package(s) into ‘C:/Program Files/R/R-2.14.1/library’\\n(as ‘lib’ is unspecified)\\npackage ‘RCurl’ successfully unpacked and MD5 sums checked\\n> install.packages("D:/Amos/Application/R/XML_3.96-1.1.zip", repos = NULL)\\nWarning in install.packages :\\n package ‘D:/Amos/Application/R/XML_3.96-1.1.zip’ is not available (for R version 2.14.1)\\nInstalling package(s) into ‘C:/Program Files/R/R-2.14.1/library’\\n(as ‘lib’ is unspecified)\\npackage ‘XML’ successfully unpacked and MD5 sums checked\\n> library("rHpcc", lib.loc="C:/Program Files/R/R-2.14.1/library")\\nLoading required package: RCurl\\nLoading required package: bitops\\nError in eval(expr, envir, enclos) : \\n could not find function ".getNamespace"\\nIn addition: Warning messages:\\n1: package ‘rHpcc’ was built under R version 2.14.2 \\n2: package ‘RCurl’ was built under R version 2.14.2 \\n3: package ‘bitops’ was built under R version 3.0.0 \\nError : unable to load R code in package ‘bitops’\\nError: package ‘bitops’ could not be loaded\", \"post_time\": \"2013-07-31 04:37:57\" },\n\t{ \"post_id\": 4376, \"topic_id\": 493, \"forum_id\": 35, \"post_subject\": \"Re: R Integration\", \"username\": \"Dinesh_S\", \"post_text\": \"R version 2.14.1\", \"post_time\": \"2013-07-30 15:19:38\" },\n\t{ \"post_id\": 4375, \"topic_id\": 493, \"forum_id\": 35, \"post_subject\": \"Re: R Integration\", \"username\": \"gnanasinghamos\", \"post_text\": \"Hi Dinesh,\\n\\nLet me know the version of R you have installed so that it would be helpful for me to check the same.\", \"post_time\": \"2013-07-30 14:49:58\" },\n\t{ \"post_id\": 4374, \"topic_id\": 493, \"forum_id\": 35, \"post_subject\": \"Re: R Integration\", \"username\": \"Dinesh_S\", \"post_text\": \"Hi,\\n\\nrHPCC plugin has dependencies on the following packages:\\n1. RCurl\\n2. XML\\n\\nThis dependencies are resolved when you try to install the plugin. Are you downloading the package from CRAN or HPCCSystems Website?\\n\\nThis is how I do it from commandline:\\n\\n\\n> install.packages("rHpcc")\\n--- Please select a CRAN mirror for use in this session ---\\nalso installing the dependencies ‘RCurl’, ‘XML’\\n\\ntrying URL 'http://mirrors.nics.utk.edu/cran/bin/windows/contrib/2.14/RCurl_1.95-4.1.zip'\\nContent type 'application/zip' length 2839377 bytes (2.7 Mb)\\nopened URL\\ndownloaded 2.7 Mb\\n\\ntrying URL 'http://mirrors.nics.utk.edu/cran/bin/windows/contrib/2.14/XML_3.96-1.1.zip'\\nContent type 'application/zip' length 4254339 bytes (4.1 Mb)\\nopened URL\\ndownloaded 4.1 Mb\\n\\ntrying URL 'http://mirrors.nics.utk.edu/cran/bin/windows/contrib/2.14/rHpcc_1.0.zip'\\nContent type 'application/zip' length 247670 bytes (241 Kb)\\nopened URL\\ndownloaded 241 Kb\\n\\npackage ‘RCurl’ successfully unpacked and MD5 sums checked\\npackage ‘XML’ successfully unpacked and MD5 sums checked\\npackage ‘rHpcc’ successfully unpacked and MD5 sums checked\\n
\\n\\nIn RStudio I just click on install packages, Select CRAN as a repository and specify the package name. R is not able to pick up the installed RCURL package for some reason.\\n\\nTake a look at this post: https://stat.ethz.ch/pipermail/biocondu ... 28936.html\\n\\nLet me know if that works.\\n\\nThanks,\\nDinesh\", \"post_time\": \"2013-07-30 14:45:56\" },\n\t{ \"post_id\": 4354, \"topic_id\": 493, \"forum_id\": 35, \"post_subject\": \"Re: R Integration\", \"username\": \"DSC\", \"post_text\": \"It was worth a try. I know nothing about R, so I'll just sit back, open the popcorn, let the experts chime in.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-07-23 12:35:34\" },\n\t{ \"post_id\": 4353, \"topic_id\": 493, \"forum_id\": 35, \"post_subject\": \"Re: R Integration\", \"username\": \"gnanasinghamos\", \"post_text\": \"Hi Dan,\\nI'm using Windows machine. \\n\\nNow i downloaded R-2.14.1 and installed the rHPCC package but im getting below error when i try to load the package\\n\\n> utils:::menuInstallLocal()\\npackage ‘rHpcc’ successfully unpacked and MD5 sums checked\\n> local({pkg <- select.list(sort(.packages(all.available = TRUE)),graphics=TRUE)\\n+ if(nchar(pkg)) library(pkg, character.only=TRUE)})\\nLoading required package: RCurl\\nError: package ‘RCurl’ could not be loaded\\nIn addition: Warning message:\\nIn library(pkg, character.only = TRUE, logical.return = TRUE, lib.loc = lib.loc) :\\n there is no package called ‘RCurl’\", \"post_time\": \"2013-07-23 12:32:02\" },\n\t{ \"post_id\": 4352, \"topic_id\": 493, \"forum_id\": 35, \"post_subject\": \"Re: R Integration\", \"username\": \"DSC\", \"post_text\": \"Does the information in this forum post help? Chris posted some additional information for getting R up and running.\\n\\nDan\", \"post_time\": \"2013-07-23 12:14:23\" },\n\t{ \"post_id\": 4350, \"topic_id\": 493, \"forum_id\": 35, \"post_subject\": \"Re: R Integration\", \"username\": \"gnanasinghamos\", \"post_text\": \"Hi,\\n\\nI have downloaded the rHPCC plugins from HPCC SYSTEMS site. When i try to install the plugin i found the below mentioned error.\\n> install.packages("D:/Amos/RStudio/rHpcc_1.0.zip", repos = NULL)\\nWarning in install.packages :\\n package ‘D:/Amos/RStudio/rHpcc_1.0.zip’ is not available (for R version 3.0.1)\\npackage ‘rHpcc’ successfully unpacked and MD5 sums checked\\n> install.packages("D:/Amos/RStudio/rHpcc_1.0.tar.gz", repos = NULL, type = "source")\\nWarning in install.packages :\\n package ‘D:/Amos/RStudio/rHpcc_1.0.tar.gz’ is not available (for R version 3.0.1)\\nERROR: dependencies 'RCurl', 'XML' are not available for package 'rHpcc'\\n* removing 'C:/Program Files/R/R-3.0.1/library/rHpcc'\\nWarning in install.packages :\\n running command '"C:/PROGRA~1/R/R-30~1.1/bin/i386/R" CMD INSTALL -l "C:\\\\Program Files\\\\R\\\\R-3.0.1\\\\library" "D:/Amos/RStudio/rHpcc_1.0.tar.gz"' had status 1\\nWarning in install.packages :\\n installation of package ‘D:/Amos/RStudio/rHpcc_1.0.tar.gz’ had non-zero exit status.\\n\\nIt would be fine if you provide me the steps to integrate rHPCC and an sample code to test the integration.\", \"post_time\": \"2013-07-23 07:05:37\" },\n\t{ \"post_id\": 2597, \"topic_id\": 493, \"forum_id\": 35, \"post_subject\": \"Re: R Integration\", \"username\": \"arjuna chala\", \"post_text\": \"Nick,\\n\\nWe have this item on the roadmap for 2013. The ability to execute R functions from ECL. We will keep you updated as we make progress.\\n\\nThank You\\n\\nArjuna\", \"post_time\": \"2012-10-23 13:45:49\" },\n\t{ \"post_id\": 2585, \"topic_id\": 493, \"forum_id\": 35, \"post_subject\": \"Executing R-functions on ECL Datasets\", \"username\": \"nvasil\", \"post_text\": \"So from R I can manipulate data in an HPCC workspace. I think it would be more interesting to be able to execute R-functions on the HPCC workspace and take advantage of the distributed engine of HPCC?\\nI am looking for something like that \\nhttps://github.com/RevolutionAnalytics/ ... utorial.md\", \"post_time\": \"2012-10-22 01:34:18\" },\n\t{ \"post_id\": 2233, \"topic_id\": 493, \"forum_id\": 35, \"post_subject\": \"R Integration\", \"username\": \"HPCC Staff\", \"post_text\": \"Now available! The rHPCC package provides an interface between R and HPCC Systems which allows R developers to integrate with the HPCC Systems platform by writing ECL queries using the R language.\\n\\nDocumentation and Download information:\\nhttp://hpccsystems.com/products-and-ser ... ntegration\", \"post_time\": \"2012-08-16 21:12:29\" },\n\t{ \"post_id\": 3949, \"topic_id\": 881, \"forum_id\": 35, \"post_subject\": \"Re: Issues in displaying the data\", \"username\": \"HPCC Staff\", \"post_text\": \"Jeniba, thank you for reaching out to us and posting this issue. Our development team is currently looking into it and will post a response soon.\", \"post_time\": \"2013-04-17 16:28:59\" },\n\t{ \"post_id\": 3932, \"topic_id\": 881, \"forum_id\": 35, \"post_subject\": \"Issues in displaying the data\", \"username\": \"Jeniba Johnson\", \"post_text\": \"Hi \\n\\nI am new to R programming.I have just started exploring "rHpcc" plugin and trying to use hpcc keywords in R.\\nThe problem I am facing is that the output is not displayed\\nEven if the ECLOutput function being used.Can anyone help me with an small example? \\nI have attached the code below..\\n\\nThanks\", \"post_time\": \"2013-04-16 10:00:12\" },\n\t{ \"post_id\": 4145, \"topic_id\": 919, \"forum_id\": 35, \"post_subject\": \"Re: Call published queries\", \"username\": \"arjuna chala\", \"post_text\": \"Great Ideas.\\n\\n"1. You need R to do sophisticated reporting (using R graphics packages, etc.) based on datasets computed from HPCC. The current typical apporach is to output and despray a CSV file which is then ingested into R. However, a direct import into R from HPCC workunits (run from the R plugin or run previously) is the most ideal scenario as we reduce the scope for using inconsistent (desprayed) files."\\n\\nWe had the same intention. Please take a look at:\\n\\nhttp://hpccsystems.com/products-and-ser ... ntegration\\n\\nWe stopped short of implementing data frames. The idea was to see if somebody in the community would continue the development.\\n\\n2. Calling HPCC web services (Roxie published queries) from within R, which at the moment is not very easy to do as SOAP support in R is not ideal. A roundabout way to do this is to use the stored procedure calls in HPCC JDBC driver but the SQL standard has its own limitations (eg. no support for nested datasets).\\n\\nAgain, please take a look at the integration above. All ROXIE and Thor queires expose a REST interface. Even though we did not integrate Roxie in this implementation, the idea of calling out to a service is the same.\", \"post_time\": \"2013-05-24 19:36:06\" },\n\t{ \"post_id\": 4140, \"topic_id\": 919, \"forum_id\": 35, \"post_subject\": \"Call published queries\", \"username\": \"sbagaria\", \"post_text\": \"IMO, the R plugin can be of tremendous advantage in two situations:\\n\\n1. You need R to do sophisticated reporting (using R graphics packages, etc.) based on datasets computed from HPCC. The current typical apporach is to output and despray a CSV file which is then ingested into R. However, a direct import into R from HPCC workunits (run from the R plugin or run previously) is the most ideal scenario as we reduce the scope for using inconsistent (desprayed) files.\\n\\n2. Calling HPCC web services (Roxie published queries) from within R, which at the moment is not very easy to do as SOAP support in R is not ideal. A roundabout way to do this is to use the stored procedure calls in HPCC JDBC driver but the SQL standard has its own limitations (eg. no support for nested datasets).\\n\\nIs there a plan to allow for accessing Roxie published queries directly from R?\", \"post_time\": \"2013-05-22 13:55:02\" },\n\t{ \"post_id\": 5833, \"topic_id\": 1333, \"forum_id\": 35, \"post_subject\": \"Re: EMBED(R) compile/link failed.\", \"username\": \"JimD\", \"post_text\": \"I believe your issue is that your server does not have the R plug-in or dependencies installed (Rembed).\\n\\nWhat OS are you using and what package did you install? \\n\\nIf your system is RPM-based, you need the one that says: \\n\\nhpccsystems-platform_community-with-plugins- \\nThis must be installed with the --nodeps option. \\n\\nYou must then install dependencies. There is more information in the appendix of Installing and Running... \\nhttp://hpccsystems.com/download/docs/in ... c-platform\", \"post_time\": \"2014-06-04 16:26:17\" },\n\t{ \"post_id\": 5803, \"topic_id\": 1333, \"forum_id\": 35, \"post_subject\": \"Re: EMBED(R) compile/link failed.\", \"username\": \"bforeman\", \"post_text\": \"Hi Tim,\\n\\nI can't find anything wrong with your code. You should probably open up an Issue in the Community Issue Tracker.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-06-02 12:33:32\" },\n\t{ \"post_id\": 5771, \"topic_id\": 1333, \"forum_id\": 35, \"post_subject\": \"EMBED(R) compile/link failed.\", \"username\": \"tlhumphrey2\", \"post_text\": \"I have the following code that attempts to use the ECL EMBED structure to embed R code:\\nIMPORT R;\\n\\ninteger add1(integer VAL) := EMBED(R)\\nVAL+1\\nENDEMBED;\\n\\nadd1(10);\\n
\\n\\nMy syntax check was good, i.e. no errors or warnings. But, when I submitted I got the following error: \\n\\nWarning: Mismatch in subminor version number (4.2.0 v 4.2.2) (0, 0 - unknown)\\nError: Compile/Link failed for W20140529-141047 (see '//10.239.40.4/mnt/disk1/var/lib/HPCCSystems/myeclccserver/eclcc.log' for details) (0, 0 - W20140529-141047)\\nWarning: \\nWarning: ---------- compiler output --------------\\nWarning: /usr/bin/ld: cannot find -lRembed\\nWarning: collect2: ld returned 1 exit status\\nWarning: \\nWarning: --------- end compiler output -----------
\", \"post_time\": \"2014-05-29 18:15:59\" },\n\t{ \"post_id\": 8348, \"topic_id\": 1946, \"forum_id\": 35, \"post_subject\": \"Re: External Language Support- R\", \"username\": \"bforeman\", \"post_text\": \"There is a great post on integrating external languages in this forum:\\nhttps://hpccsystems.com/bb/viewtopic.php?f=41&t=1509&p=6593&hilit=EMBED#p6593\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-10-20 20:47:18\" },\n\t{ \"post_id\": 8340, \"topic_id\": 1946, \"forum_id\": 35, \"post_subject\": \"External Language Support- R\", \"username\": \"Anjali\", \"post_text\": \"Hi,\\n\\nI have recently done a sentiment analysis task on twitter data using R.\\nNow i wanted to integrate this with HPCC, using external language support feature of HPCC in R language.\\n\\nCan anyone provide a guidance on this?\\n\\nThanks,\\nAnjali\", \"post_time\": \"2015-10-20 11:26:42\" },\n\t{ \"post_id\": 8458, \"topic_id\": 1966, \"forum_id\": 35, \"post_subject\": \"Re: Using R packages in embedded codes\", \"username\": \"richardkchapman\", \"post_text\": \"I'm not really an expert on the use of R, but yes, you will need to use the standard R mechanisms to install R libraries on the cluster nodes.\\n\\nRichard\", \"post_time\": \"2015-11-09 13:22:37\" },\n\t{ \"post_id\": 8396, \"topic_id\": 1966, \"forum_id\": 35, \"post_subject\": \"Using R packages in embedded codes\", \"username\": \"Anjali\", \"post_text\": \"Hi,\\n\\nI was trying out external language support feature in R. I needed to call some functions in 'sentiment' package,for that i tried the below code\\n\\ndataset(out_Layout) sent_analysis(dataset(Message_Layout) old_file) := EMBED(R)\\n\\n library(sentiment)\\n out = old_file\\n txt<-out[,2]\\n txt = gsub("(RT|via)((?:\\\\\\\\b\\\\\\\\W*@\\\\\\\\w+)+)", "", txt)\\n txt = gsub("@\\\\\\\\w+", "", txt)\\n txt = gsub("[[:punct:]]", "", txt)\\n txt = gsub("[[:digit:]]", "", txt)\\n txt = gsub("http\\\\\\\\w+", "", txt)\\n txt = txt[!is.na(txt)]\\n names(txt) = NULL\\n -----\\n -----\\n txt_class_pol = classify_polarity(txt, algorithm="Bayes")\\n --------\\n -------\\n ENDEMBED;\\n\\t\\n sent_analysis(in);
\\n\\nHere i am getting evaluation error while running \\n\\nlibrary(sentiment)\\n------\\n-----\\nclassify_polarity(...)\\n\\nRest of the code works properly,without this function call.\\n\\n\\n\\nCan anyone tell,how can we include libraries in embedded R code. Is it necessary to explicitly install the required packages before using it?If so,how should i do it?\", \"post_time\": \"2015-10-30 04:46:24\" },\n\t{ \"post_id\": 10513, \"topic_id\": 2493, \"forum_id\": 35, \"post_subject\": \"Re: myesp failed to start after enabled "R support within HP\", \"username\": \"bforeman\", \"post_text\": \"From the HPCC team:\\nHi –\\n\\nFor our PXE booted nodes, here is how we go about installing the platform with R.\\n\\n1.\\tYum install the HPCCSystems platform RPM to resolve dependency issues.\\n2.\\tYum install R-core-devel package\\n3.\\tFetch copies of Rcpp (I’m using 0.12.1), RInside (0.2.12), and inline (0.3.14), I suspect the latest versions will work as well but I can’t say for sure.\\nhttps://cran.r-project.org/web/packages/Rcpp/index.html\\nhttps://cran.r-project.org/web/packages ... index.html\\nhttps://cran.r-project.org/web/packages ... index.html\\n4.\\tR CMD INSTALL Rcpp_0.12.1.tar.gz\\n5.\\tR CMD INSTALL RInside_0.2.12.tar.gz\\n6.\\tR CMD INSTALL inline_0.3.14.tar.gz\\n7.\\tStart up the HPCC services.\\n\\nThis is what you do on ALL nodes that run HPCC processes… it might be that it just needs doing on a servers running specific HPCC processes, but this gets done on everything in our world. That way it covers everything it might be needed on. Plus it then gives you flexibility if you redefine your env, you’ll already have R installed on all servers.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2016-08-11 15:26:34\" },\n\t{ \"post_id\": 10443, \"topic_id\": 2493, \"forum_id\": 35, \"post_subject\": \"Re: myesp failed to start after enabled "R support within HP\", \"username\": \"bforeman\", \"post_text\": \"Hi Ramesh,\\n\\nChecking with the HPCC team, you may have something wrong in your configuration changes.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2016-08-10 20:24:07\" },\n\t{ \"post_id\": 10233, \"topic_id\": 2493, \"forum_id\": 35, \"post_subject\": \"myesp failed to start after enabled "R support within HPCC"\", \"username\": \"rameshpachamuthu\", \"post_text\": \"Hi,\\n\\nTo explore R support within HPCC, I have followed instructions given in Installing_and_RunningTheHPCCPlatform-6.0.2-1(page no 120). I have installed both Rccp & RInside packages for R with out any issue. After this when I have restarted HPCC server, myesp component failed to start. I could find error message 'segfault at 0 ip 00000033e00ec942 sp 00007fff7c1dfbb8 error 4 in "libR.so"[33e0000000+27e000]' in esp.log file. Since libR.so is related to R, I have uninstalled the above two packages and tried to start myesp but it is failed again. \\n\\nKindly someone help me to identify what went wrong.\\n\\nNote: HPCC server was up when I have installed the two packages in R. I am not sure weather this could be the cause.\\n\\nRegards,\\nRamesh\", \"post_time\": \"2016-08-03 10:35:16\" },\n\t{ \"post_id\": 11073, \"topic_id\": 2723, \"forum_id\": 35, \"post_subject\": \"Installing an R package into the R plugin\", \"username\": \"tlhumphrey2\", \"post_text\": \"I need to install an R package, gemtc, in the R plugin. On my local machine, I have R and to install this package in it, I go into R and do the following:\\ninstall.packages('gemtc')\\n
\", \"post_time\": \"2016-09-06 14:51:57\" },\n\t{ \"post_id\": 11333, \"topic_id\": 2823, \"forum_id\": 35, \"post_subject\": \"Installing R packages\", \"username\": \"ceejac\", \"post_text\": \"Hi,\\n\\nI would like to know how we can install R packages in the cluster nodes in order to use them inside embed(r)\\n\\nRegards,\\nCeejac\", \"post_time\": \"2016-09-21 04:52:50\" },\n\t{ \"post_id\": 11603, \"topic_id\": 2853, \"forum_id\": 35, \"post_subject\": \"Re: Error during R plugin installation\", \"username\": \"ming\", \"post_text\": \"HPCCSystems uses Apache2.0 license and R uses a different license. This is the reason HPCCSystems VM build doesn't include R.\", \"post_time\": \"2016-09-28 16:16:06\" },\n\t{ \"post_id\": 11593, \"topic_id\": 2853, \"forum_id\": 35, \"post_subject\": \"Re: Error during R plugin installation\", \"username\": \"balajisampath\", \"post_text\": \"Yes it is VM but, didn't have Rembed.\\n\\nNot sure if I missed something.I was not able to run R embed until i installed rembed. \\nAlso based on below lines in CMakeLists.txt I thought its not available and I build it.\\n\\n# Component: Rembed\\n\\n#####################################################\\n# Description:\\n# ------------\\n# Cmake Input File for Rembed\\n# Note that as RInside and RCpp are GPL licensed,\\n# Rembed must not be packaged with any Apache licenced\\n# software such as the HPCC platform or client tools\\n#####################################################\", \"post_time\": \"2016-09-28 15:53:55\" },\n\t{ \"post_id\": 11573, \"topic_id\": 2853, \"forum_id\": 35, \"post_subject\": \"Re: Error during R plugin installation\", \"username\": \"clo\", \"post_text\": \"Hi,\\n\\nIf this is a VM from the downloads page, then it should have had R embed already installed. Was there some reason you tried to build it inside the VM?\", \"post_time\": \"2016-09-28 13:07:04\" },\n\t{ \"post_id\": 11503, \"topic_id\": 2853, \"forum_id\": 35, \"post_subject\": \"Re: Error during R plugin installation\", \"username\": \"balajisampath\", \"post_text\": \"Thank You mgardner\\nIt worked. I was able to build.\", \"post_time\": \"2016-09-27 21:41:56\" },\n\t{ \"post_id\": 11473, \"topic_id\": 2853, \"forum_id\": 35, \"post_subject\": \"Re: Error during R plugin installation\", \"username\": \"mgardner\", \"post_text\": \"The error you're getting is due to commenting out lines 133-144.\\n\\nCMake Error at cmake_modules/commonSetup.cmake:119 (message):\\nCannot declare multiple plugins in a plugin package\\n
\\n\\nThis was guarding against what happened in your build. You were somehow able to build Rembed on top of a cmake cache that already had kafka enabled, which then caused both the R and Kafka plugins to be built, and packaged together. This is why you're getting package installation conflicts in the pasted output.\\n\\nWhat I suggest is this. First remove your CMakeCache.txt file between builds. This insures that any settings from a previous build can't get carried over to a new one. Then hop into the HPCC-Platform directory and do a hard reset on your head so it matches up with master again. `git reset --hard origin/master` is generally the command, assuming origin is the hpcc-systems/HPCC-Platform.git repository. Then go and rerun CMAKE -DREMBED=ON HPCC-Platform.\", \"post_time\": \"2016-09-27 18:36:53\" },\n\t{ \"post_id\": 11363, \"topic_id\": 2853, \"forum_id\": 35, \"post_subject\": \"Error during R plugin installation\", \"username\": \"balajisampath\", \"post_text\": \"Please refer attachment for error message and help me to fix this\\n\\nVirtualBox version 5.1.6\\nHPCCSystemsVM-amd64-6.0.4-1\\n\\nNote: I modified HPCC-Platform/cmake_modules/commonSetup.cmake and commented line 133 to 144 as I got the below error\\n\\nhpccdemo@HPCCSystemsVM-amd64-6:~$ cmake -DREMBED=ON HPCC-Platform\\nCMake Error at cmake_modules/commonSetup.cmake:119 (message):\\n Cannot declare multiple plugins in a plugin package\\nCall Stack (most recent call first):\\n cmake_modules/commonSetup.cmake:149 (SET_PLUGIN_PACKAGE)\\n CMakeLists.txt:142 (include)\", \"post_time\": \"2016-09-26 19:10:09\" },\n\t{ \"post_id\": 20403, \"topic_id\": 5073, \"forum_id\": 35, \"post_subject\": \"Re: hpcc dataset and R data frame mapping\", \"username\": \"richardkchapman\", \"post_text\": \"Unfortunately I'm not really very familiar with R, and in particular the differences between lists and vectors and when you would want one rather than the other. Ideally the R embed plugin would have been written by an R expert (but we don't have one).\\n\\nIf someone can help me understand the differences and why a vector is what is wanted in this situation, I can take a look at changing it (though I'll also have to be a little careful that I don't break any existing code).\", \"post_time\": \"2018-01-08 09:03:46\" },\n\t{ \"post_id\": 19983, \"topic_id\": 5073, \"forum_id\": 35, \"post_subject\": \"hpcc dataset and R data frame mapping\", \"username\": \"balajisampath\", \"post_text\": \"hpcc dataset gets converted to data frames when passed in as parameter but, all columns in dataframes are as list. e.g. cyl becomes a list instead of vector producing the error\\n\\tSystem error: 0: Rembed: Rcpp error: invalid type (list) for variable 'cyl'
\\nIMPORT R,ML;\\nmtcarsrec := RECORD\\n real8 mpg;\\n unsigned1 cyl;\\n real8 disp;\\n unsigned2 hp;\\n real8 drat;\\n real8 wt;\\n real8 qsec;\\n boolean vs;\\n boolean am;\\n unsigned1 gear;\\n unsigned1 carb;\\nEND;\\nhpccmtcars:= dataset([{21.0,6,160.0,110,3.90,2.620,16.46,TRUE,FALSE,4,4}],mtcarsrec);\\n\\nSTRING run_r(dataset(RECORDOF(hpccmtcars)) ds):=EMBED(R)\\nrslt= function (ds) \\n{\\n my_modl = lm(cyl ~ ., data = ds)\\n coef(my_modl)\\n}\\nrawToChar(serialize(rslt(ds), NULL,ascii=TRUE),multiple = FALSE)\\nENDEMBED;\\nrun_r( hpccmtcars );
\\n\\nplease help if I am missing something\", \"post_time\": \"2017-11-16 16:16:49\" },\n\t{ \"post_id\": 3167, \"topic_id\": 694, \"forum_id\": 38, \"post_subject\": \"Re: HPCC on Chromebook\", \"username\": \"richardkchapman\", \"post_text\": \"[quote="rengolin":16rcqw1b]Starting the services go well until it reaches Thor, when it fails on the Roxie memory manager:\\n\\nHowever, oddly, restarting again works!\\n\\n\\nI think it will need 1.5Gb contiguous address space. You can set the amount it uses in the environment.xml - I think it defaults to 75% of total ram (on the expectation you are running thor on dedicated servers...)\", \"post_time\": \"2013-01-24 14:49:39\" },\n\t{ \"post_id\": 3166, \"topic_id\": 694, \"forum_id\": 38, \"post_subject\": \"Re: HPCC on Chromebook\", \"username\": \"richardkchapman\", \"post_text\": \"The libbfd code is used to attach the workunit info (graph) to the workunit .so\\n\\nUnfortunately the library it relies on seems to be very variable from distro to distro - I've not been able to get it fully working on OSX either, though I really should go back and revisit that sometime.\\n\\nIf you can think of a more portable way to achieve the same result, I'd be interested.\\n\\nI wouldn't get too hung up on the constant-folding of plugin calls issue. The system can be used quite happily without that ability - sol long as you don't want to be able to do things like #if (stringlib.trim(myoption) = 'I). In fact, I'm quite tempted to deprecate that ability in general.\", \"post_time\": \"2013-01-24 14:46:55\" },\n\t{ \"post_id\": 3162, \"topic_id\": 694, \"forum_id\": 38, \"post_subject\": \"Re: HPCC on Chromebook\", \"username\": \"rengolin\", \"post_text\": \"Oh, well... That can't be right... \\n\\n
\\nat src/ecl/hqlcpp/hqlres.cpp:423\\n
\\n\\n\\n bfd_init ();\\n bfd_set_default_target(target64bit ? "x86_64-unknown-linux-gnu" : "x86_32-unknown-linux-gnu");\\n const bfd_arch_info_type *temp_arch_info = bfd_scan_arch ("i386");\\n#if defined __APPLE__\\n file = bfd_openw(filename, NULL);//MORE: Quick fix to get working on OSX\\n#else\\n file = bfd_openw(filename, target64bit ? "elf64-x86-64" : NULL);//MORE: Test on 64 bit to see if we can always pass NULL\\n#endif\\n verifyex(file);\\n verifyex(bfd_set_arch_mach(file, temp_arch_info->arch, temp_arch_info->mach));\\n
\\n\\nHere's the debug trace...\\n\\n\\n(gdb) run\\nStarting program: /usr/bin/eclcc superfile5.ecl\\n[Thread debugging using libthread_db enabled]\\nUsing host libthread_db library "/lib/arm-linux-gnueabihf/libthread_db.so.1".\\n\\nProgram received signal SIGSEGV, Segmentation fault.\\n0x7694d8ec in ResourceManager::flush (this=0xb9cd8, filename=0x116668 "liba.out.res.o.so", flushText=false, target64bit=false)\\n at /home/user/devel/hpcc/src/ecl/hqlcpp/hqlres.cpp:423\\n423\\t verifyex(bfd_set_arch_mach(file, temp_arch_info->arch, temp_arch_info->mach));\\n(gdb) p temp_arch_info->arch\\nCannot access memory at address 0xc\\n(gdb) p temp_arch_info\\n$1 = (const bfd_arch_info_type *) 0x0\\n(gdb) bt\\n#0 0x7694d8ec in ResourceManager::flush (this=0xb9cd8, filename=0x116668 "liba.out.res.o.so", flushText=false, target64bit=false)\\n at /home/user/devel/hpcc/src/ecl/hqlcpp/hqlres.cpp:423\\n#1 0x7687849c in HqlCppInstance::flushResources (this=0xb9c90, filename=0x24a308 "a.out.res.o", ctxCallback=0x115528)\\n at /home/user/devel/hpcc/src/ecl/hqlcpp/hqlcpp.cpp:1326\\n#2 0x768cf0ec in HqlDllGenerator::flushResources (this=0xa1ee8) at /home/user/devel/hpcc/src/ecl/hqlcpp/hqlecl.cpp:562\\n#3 0x768cd506 in HqlDllGenerator::processQuery (this=0xa1ee8, parsedQuery=..., _generateTarget=EclGenerateExe) at /home/user/devel/hpcc/src/ecl/hqlcpp/hqlecl.cpp:216\\n#4 0x00015396 in EclCC::instantECL (this=0x7efff20c, instance=..., wu=0x51648, queryFullName=0x0, errs=0x5db58, outputFile=0x46470 "a.out")\\n at /home/user/devel/hpcc/src/ecl/eclcc/eclcc.cpp:635\\n#5 0x00016670 in EclCC::processSingleQuery (this=0x7efff20c, instance=..., queryContents=0x7fd58, queryAttributePath=0x5d6b8 "superfile5")\\n at /home/user/devel/hpcc/src/ecl/eclcc/eclcc.cpp:965\\n#6 0x0001746e in EclCC::processFile (this=0x7efff20c, instance=...) at /home/user/devel/hpcc/src/ecl/eclcc/eclcc.cpp:1165\\n#7 0x0001855e in EclCC::processFiles (this=0x7efff20c) at /home/user/devel/hpcc/src/ecl/eclcc/eclcc.cpp:1403\\n#8 0x00013e8c in doMain (argc=2, argv=0x7efff4a4) at /home/user/devel/hpcc/src/ecl/eclcc/eclcc.cpp:358\\n#9 0x00013ff8 in main (argc=2, argv=0x7efff4a4) at /home/user/devel/hpcc/src/ecl/eclcc/eclcc.cpp:387\\n
\", \"post_time\": \"2013-01-24 00:05:05\" },\n\t{ \"post_id\": 3161, \"topic_id\": 694, \"forum_id\": 38, \"post_subject\": \"Re: HPCC on Chromebook\", \"username\": \"flavio\", \"post_text\": \"Wow! You're quickly making good progress! \\n\\nI'll ping Jake so that he can take a look at the 1.5GB number (I believe it comes from the auto-detection code and/or config file).\\n\\nFlavio\", \"post_time\": \"2013-01-23 23:20:41\" },\n\t{ \"post_id\": 3153, \"topic_id\": 694, \"forum_id\": 38, \"post_subject\": \"Re: HPCC on Chromebook\", \"username\": \"rengolin\", \"post_text\": \"Flavio, ARM 64 is just around the corner, with AMD and Dell actively working on it, it might not be too long until we have a board to play with. I'd call it Apple Pie (PoPeye, the strong sailor). \\n\\nAnyway, back to business. The ECL compiler segfaults, as Calxeda reported, probably due to the hack on external folding. I need debug symbols to know what's wrong, though.\\n\\nStarting the services go well until it reaches Thor, when it fails on the Roxie memory manager:\\n\\n
\\nBuild community_3.11.0-1trunk[heads/arm-hack-0-g33f624-dirty]\\ncalling initClientProcess Port 20000\\nFound file 'thorgroup', using to form thor group\\nGlobal memory size = 1514 MB\\nRoxieMemMgr: Setting memory limit to 1587544064 bytes (1514 pages)\\nRoxieMemMgr: posix_memalign (alignment=1048576, size=1610612736) failed - ret=12\\n/home/user/devel/hpcc/src/thorlcr/master/thmastermain.cpp(714) : ThorMaster : RoxieMemMgr: Unable to create heap\\n
\\n\\nFunny that the Chromebook has 2GB of RAM, of which 1.2GB are free. I don't know where this 1514MB number comes from...\\n\\n\\n$ free\\n total used free shared buffers cached\\nMem: 2067736 831660 1236076 0 54164 491552\\n-/+ buffers/cache: 285944 1781792\\nSwap: 0 0 0\\n
\\n\\nThat line (thmastermain.cpp:714) is the last catch on main(), on a large block of code, so it's hard to trace without debug symbols. I'll re-compile in debug mode, so I can run the unittests, which I think will catch the bug, too.\\n\\nHowever, oddly, restarting again works!\\n\\n\\nGlobal memory size = 1514 MB\\nRoxieMemMgr: Setting memory limit to 1587544064 bytes (1514 pages)\\nRoxieMemMgr: 1536 Pages successfully allocated for the pool - memsize=1610612736 base=0x14d00000 alignment=1048576 bitmapSize=48\\n
\\n\\nUnfortunately, I can't run any job on it now, since the compiler is not producing any binary output (though, it does generate C++ and the sources look correct, which indicates it's not the lack of external folding).\\n\\nNext step: Debug symbols!\\n\\nOn a side note, trying to run the regression, some of the Python modules are not available as an Ubuntu package, but installing them via CPAN was a breeze.\", \"post_time\": \"2013-01-23 15:39:30\" },\n\t{ \"post_id\": 3150, \"topic_id\": 694, \"forum_id\": 38, \"post_subject\": \"Re: HPCC on Chromebook\", \"username\": \"flavio\", \"post_text\": \"Renato, this is great work and quite a milestone, as it would be the first non x86 32-64 bits based platform that HPCC is being ported to.\\n\\nUnfortunately, there are still no 64 bits ARM CPU's out there yet, but with the announcements of the Cortex A53 and A57, and a number of hardware manufacturers planning on supporting the A64 instruction set in upcoming silicon, 64 bits capable parts can't be that far.\\n\\nKeep up the good work, and let us know how the port effort goes!\\n\\nFlavio\", \"post_time\": \"2013-01-23 12:35:02\" },\n\t{ \"post_id\": 3147, \"topic_id\": 694, \"forum_id\": 38, \"post_subject\": \"Re: HPCC on Chromebook\", \"username\": \"rengolin\", \"post_text\": \"After those changes, the system compiled to completion, and a package was generated and installed correctly.\\n\\nI haven't had a chance to test, since I upgraded a few packages and rebooted, only to find that I forgot the lid closed, in the server room, at the office. \\n\\nWhen I open the lid again, and re-start, I'll report on the usability.\", \"post_time\": \"2013-01-22 21:09:30\" },\n\t{ \"post_id\": 3145, \"topic_id\": 694, \"forum_id\": 38, \"post_subject\": \"Re: HPCC on Chromebook\", \"username\": \"rengolin\", \"post_text\": \"Another thing to remember when compiling on ARM: there is a va_list change in the ABI, and GCC warns on every single file that includes another with a va_list. In HPCC's case, that's pretty much all files.\\n\\nSo, configure your CMake with:\\n
-DCMAKE_CXX_FLAGS:String=-Wno-psabi
\\n\\nRegarding the two issues raised:\\n\\n
\\n\\nAfter these two fixes, the compilation goes on until 39%, when the hqlfold meets another x86 asm region. This is the one I was waiting for, and the one that will cause serious problems.\\n\\nThis is the part of the code that folds external calls (IValue * foldExternalCall()), where the external call will be called using the base C Procedure Call Standard (PCS), which is to populate the stack for the function arguments.\\n\\nIntel's x86 architecture used that extensively, and most architectures use it for variadic functions, but newer architectures (such as x86_64, AArch32, AArch64) use registers to pass the parameters.\\n\\nIt's much faster for functions with small number of parameters, but all architectures revert back if the function has more than a handful of them. This means following the ABI is not trivial.\\n\\nThere are two ways of fixing this:\\n\\n\\n
\\n\\nBoth solutions are hard work, and both will take a while to complete. If I could chose, I'd chose standard RPC.\\n\\nFor the moment, I'll comment out the external folding and continue...\", \"post_time\": \"2013-01-22 20:18:25\" },\n\t{ \"post_id\": 3144, \"topic_id\": 694, \"forum_id\": 38, \"post_subject\": \"HPCC on Chromebook\", \"username\": \"rengolin\", \"post_text\": \"Hi Folks,\\n\\nI've been trying to compile HPCC on a Chromebook. Just FYI, the Chromebook is a dual core A15 1.7GHz (Exynos 5) with 2GB RAM, which is about 4x faster than the Panda's dual A9 chip.\\n\\nSo, I got Ubuntu running on it (ChrUbuntu) and downloaded the HPCC's sources to it. Since it's Ubuntu, all packages are available via apt-get and getting it to configure is not hard.\\n\\nThe only package that was unavailable on ChrUbuntu was Xalan/Xerces, but I got them from the Panda's Ubuntu. Richard told me I could have used libxslt, which IS available in ChrUbuntu. If you're trying this at home, it might be easier, since HPCC will eventually use it by default on all platforms.\\n\\nTyping "make" on the build directory gets you to about 13% (according to CMake) when two errors occur:\\n\\nIn src/system/jlib/jexcept.cpp:\\n In function 'void excsighandler(int, siginfo_t*, void*)':\\n error: 'mcontext_t' has no member named 'gregs'\\n error: 'REG_EIP' was not declared in this scope\\n error: 'mcontext_t' has no member named 'gregs'\\n error: 'REG_ESP' was not declared in this scope\\n ...\\n REG_EAX, REG_EBX, REG_ECX etc.\\n\\nClearly, x86 register declarations.\\n\\nIn src/system/jlib/jdebug.cpp:\\n In function 'void calibrate_timing()':\\n error: impossible constraint in 'asm'\\n\\nClearly, x86 assembly on timing functions.\\n\\nThere are two ways to go, here:\\n 1. Replicate each instance that assumes x86 and add ARM assembly/directives, with appropriate #ifdef __arm__ wrapping around.\\n 2. Find libraries or function calls on the kernel that do the same without having to delve into specific code for specific architectures.\\n\\nThe way the code is structured today, solution 1 would be the easiest to add, but the hardest to maintain. Remember that ARM 64-bits (AArch64) is just around the corner and there are lots of changes.\\n\\nHowever, finding an exact match on all architectures x all OSs is not an easy task, and might not have a generic solution anyway.\\n\\nThe jexcept seems to be registering an exception handling and printing pretty much what the standard library already does, and looks like redundant code. Can this be removed?\\n\\nThe jdebug issue can possibly be fixed by skipping that section altogether and proceed directly to the calibration.\\n\\nI'll have a look at removing those parts of the code. Stay tuned! \\n\\nNot sure how relevant a Windows build would be for ARM, and they may very well have their own PCSs. The AAPCS is defined for bare-metal applications and GNU EABI follows it into the Linux world.\\n
).
\", \"post_time\": \"2013-01-22 18:09:01\" },\n\t{ \"post_id\": 7610, \"topic_id\": 1704, \"forum_id\": 38, \"post_subject\": \"Re: HPCC on PPC64EL\", \"username\": \"Lee_Meadows\", \"post_text\": \"Jim,\\n\\n I'm working on some Power7 systems now, but haven't put HPCC over to there yet.\\n\\n Can you keep this thread updated with what you find?\\n\\nThanks,\\nLee\", \"post_time\": \"2015-05-18 15:46:40\" },\n\t{ \"post_id\": 7488, \"topic_id\": 1704, \"forum_id\": 38, \"post_subject\": \"Re: HPCC on PPC64EL\", \"username\": \"jsmith\", \"post_text\": \"The backtrace would appear to indicate a fault in CSimpleInterfaceOf::Link, which is calling atomic_inc.\\nIt may be worth trying to call atomic_inc in a test program to see if the implementation runs into problems..\", \"post_time\": \"2015-04-30 09:23:28\" },\n\t{ \"post_id\": 7485, \"topic_id\": 1704, \"forum_id\": 38, \"post_subject\": \"HPCC on PPC64EL\", \"username\": \"jimtom\", \"post_text\": \"Hey all...I'm trying to port HPCC to IBM POWER8 running Ubuntu 15.04. I have most of the base code compiled, but when I try to run, I am getting the following when the mydali daemon starts:\\n\\n
00000000 2015-04-28 17:46:40.558 27679 27679 "Build community_6.0.0-trunk0[heads/master-0-g6f2c8d-dirty]"\\n00000001 2015-04-28 17:46:40.558 27679 27679 "WARNING: Local path used for backup url: /var/lib/HPCCSystems/hpcc-mirror/dali/"\\n00000002 2015-04-28 17:46:40.558 27679 27679 "Backup URL = //9.3.3.23/var/lib/HPCCSystems/hpcc-mirror/dali/"\\n00000003 2015-04-28 17:46:40.558 27679 27679 "Checking backup location: //9.3.3.23/var/lib/HPCCSystems/hpcc-mirror/dali/"\\n00000004 2015-04-28 17:46:40.560 27679 27679 "Checking for existing daserver instances"\\n00000005 2015-04-28 17:46:40.561 27679 27679 "Server Version = 3.12, required minimum client version 1.5"\\n00000006 2015-04-28 17:46:40.561 27679 27679 "DFS Server: numThreads=30"\\n00000007 2015-04-28 17:46:40.564 27679 27679 "BackupHandler started, async=false"\\n00000008 2015-04-28 17:46:40.565 27679 27679 "loading store 0, storedCrc=0"\\n00000009 2015-04-28 17:46:40.565 27679 27679 "Store 0 does not exist, creating new store"\\n0000000A 2015-04-28 17:46:40.565 27679 27679 "store loaded"\\n0000000B 2015-04-28 17:46:40.565 27679 27679 "loading external Environment from: /etc/HPCCSystems/environment.xml"\\n0000000C 2015-04-28 17:46:40.566 27679 27679 "Backtrace:"\\n0000000D 2015-04-28 17:46:40.567 27679 27679 " /opt/HPCCSystems/lib/libjlib.so(_Z16printStackReportv+0x50) [0x3fff7e04fdb0]"\\n0000000E 2015-04-28 17:46:40.567 27679 27679 " /opt/HPCCSystems/lib/libjlib.so(_Z13excsighandleriP9siginfo_tPv+0xf8) [0x3fff7e0503b8]"\\n0000000F 2015-04-28 17:46:40.567 27679 27679 " [0x3fff7e230478]"\\n00000010 2015-04-28 17:46:40.567 27679 27679 " /opt/HPCCSystems/lib/libjlib.so(_ZNK18CSimpleInterfaceOfI13IPropertyTreeE4LinkEv+0x10) [0x3fff7e0ef300]"\\n00000011 2015-04-28 17:46:40.567 27679 27679 " /opt/HPCCSystems/lib/libjlib.so(_ZN5PTree11addPropTreeEPKcP13IPropertyTree+0x3f8) [0x3fff7e0db6c8]"
\\n\\nas you can imagine this has all kinds of negative downstream effects. I'm wondering where to look next.\\n\\nThanks!\", \"post_time\": \"2015-04-29 20:08:12\" },\n\t{ \"post_id\": 3159, \"topic_id\": 697, \"forum_id\": 39, \"post_subject\": \"Looking for ECL developers?\", \"username\": \"HPCC Staff\", \"post_text\": \"This forum has been added to assist our community members who have a need for ECL developers or other HPCC Systems related resources to post job openings.\", \"post_time\": \"2013-01-23 18:47:43\" },\n\t{ \"post_id\": 3168, \"topic_id\": 699, \"forum_id\": 39, \"post_subject\": \"Comrise is Hiring! Job Openings in the U.S. and China\", \"username\": \"jkatzen\", \"post_text\": \"Jr. Data Scientist(s)- U.S.\\nhttps://jobs-comrise.icims.com/jobs/26391/job\\n\\nData Science Internship & Graduate Programme - China\\nhttps://jobs-comrise.icims.com/jobs/26393/job\\nhttps://jobs-comrise.icims.com/jobs/26392/job\\n\\n\\nAbout Comrise\\nEstablished in 1984, Comrise is a global consulting firm with headquarters in the U.S. and China. Our teams specialize in Managed IT, Big Data, and Workforce Solutions – Staff Augmentation, Recruiting, RPO, and Payrolling. With nearly 30 years of experience, Comrise provides local talent and resources on a global scale.\", \"post_time\": \"2013-01-24 15:48:18\" },\n\t{ \"post_id\": 27473, \"topic_id\": 755, \"forum_id\": 39, \"post_subject\": \"Re: Sr. Software Engineer - ECL expr or will train\", \"username\": \"Sayali Kulkarni\", \"post_text\": \"Excellent Thread!!\", \"post_time\": \"2019-08-29 09:41:07\" },\n\t{ \"post_id\": 6908, \"topic_id\": 755, \"forum_id\": 39, \"post_subject\": \"Re: Sr. Software Engineer - ECL expr or will train\", \"username\": \"uhappyy\", \"post_text\": \"Skills you will need:\\n1. Mastery of at least one high-level programming language (e.g. Java, C++, C#) \\nand/or interest and ability to think declarative rather than procedural computer logic.\\n2. Some query language skills useful\\n3. Understanding of relational database system design (e.g. SQL Server, Oracle, Teradata)\\n4. Understanding of ETL concepts in manipulation of data\\n5. Some knowledge of Hadoop data technology concepts a plus/....\\n\\n___________\\nGUL\", \"post_time\": \"2015-02-06 10:19:04\" },\n\t{ \"post_id\": 3417, \"topic_id\": 755, \"forum_id\": 39, \"post_subject\": \"Sr. Software Engineer - ECL expr or will train\", \"username\": \"SAS\", \"post_text\": \"[color=#800000:k7knr39o]Senior Software Engineer – Big Data Technology\\nAlpharetta, GA\\nLooking for an exciting opportunity to learn a new, dynamic technology built to utilize the power of "Big Data"? You will be translating and manipulating massive amounts of data utilizing ECL. You will be trained in ECL** -- our open source software language that maximizes the rapid utilization of our massive databases. If you are intrigued with learning a new data centric technology designed to rapidly access meaningful information from an enormous database of information, then this may be the job for you.\\n \\nSkills you will need:\\n1. Mastery of at least one high-level programming language (e.g. Java, C++, C#) \\nand/or interest and ability to think declarative rather than procedural computer logic.\\n2. Some query language skills useful\\n3. Understanding of relational database system design (e.g. SQL Server, Oracle, Teradata)\\n4. Understanding of ETL concepts in manipulation of data\\n5. Some knowledge of Hadoop data technology concepts a plus\\n\\nThis position exists to perform research, design and software development assignments within a software functional area. Provides meaningful input to project plans, schedules and approach in the development of cross-functional software products. \\n\\nQualifications: \\n1. Bachelor's degree in computer science, computer engineering, math or equivalent technical discipline or equivalent technical experience \\n2. 5 + years experience in full lifecycle development process including design, development, testing and implementation of moderate to advanced complexity systems. \\n3. Ability to work independently and collaborate with cross functional groups as required. \\n4. Strong verbal and written communication and presentation skills with both technical and non technical audiences including management and cross functional groups. \\nLexisNexis Employees: Please, for info on hiring manager e-mail shirley.schafer@lexisnexis.com\\nApply at:https://reedelsevier.taleo.net/careersection/jobdetail.ftl?job=241314&lang=en\", \"post_time\": \"2013-02-11 15:35:25\" },\n\t{ \"post_id\": 3418, \"topic_id\": 756, \"forum_id\": 39, \"post_subject\": \"Sr. Software Engineer - ECL or will train\", \"username\": \"SAS\", \"post_text\": \"[color=#800000:2dq1wpz6]Senior Software Engineer – Big Data\\nBoca Raton, FL \\nIf you enjoy coding and trouble shooting, this may be an opportunity for you. There are a diversity of projects that support every product we offer our customers. Additionally, you will be trained to use our open source language (ECL*) to support existing software products at LexisNexis Risk Solutions. You will also perform analysis, design and development tasks, and provide input to project plans, schedules throughout the software development lifecycle.\\n \\nTechnical Skill Required:\\n1. Strong expertise in at least one high-level programming language (e.g. Java, C++, C#) \\n2. Understanding OOP concept\\n3. Understanding the basic concepts of DBMS (transactions, type of joins, etc., at least some Knowledge of SQL (can be ODBC/JDBC))\\n4. Knowledge of abstract data structures and algorithms\\n5. Experience with any Source Control system (SVN, ClearCase, CVS, Perforce)\\nNice to Have:\\nKnowledge of XML schemas, XSLT, \\nExperience with any well-known RDBMS (MS SQL Server, Oracle, MySQL, etc.), \\nBasic knowledge/Understanding of network protocols and web services: TCP/IP, HTTP, SOAP, WSDL, etc.\\nKnowledge of Hadoop\\nHands on experience debugging complex problems.\\nPublic Records knowledge/experience.\\nVery nice to have: Hands-on experience or basic knowledge of ECL*, participation in Open Source development.\\nWhat you will do:\\n1. Interfaces with internal customers to gather requirements and provide recommendations on complex technical tasks.\\n2. Independently performs analysis to develop and implement high level designs for assigned projects.\\n3. Develops moderate-to advanced modules and system components and develops simple to moderately complex re-usable code.\\n\\nGeneral Qualifications: \\nPosition requires a bachelor's (Master’s degree preferred) degree in computer science, computer engineering, math or equivalent technical discipline OR equivalent technical experience.\\n5 + years experience in full lifecycle development process including design, development, testing and implementation of moderate to advanced complexity systems. \\nExpertise in the technologies used in the job.\\nAbility to work independently, with minimal supervision.\\nAbility to work in a cooperative team environment.\\nExcellent communication skills.\\nLexisNexisRisk Employees: PLEASE, for info on Hiring Manager e-mail shirley.schafer@lexisnexis.com \\nApply at: https://reedelsevier.taleo.net/careersection/jobdetail.ftl?job=238293&lang=en\", \"post_time\": \"2013-02-11 15:45:00\" },\n\t{ \"post_id\": 5056, \"topic_id\": 1147, \"forum_id\": 39, \"post_subject\": \"Sr. Software Engineer Data Warehousing\", \"username\": \"SAS\", \"post_text\": \"Sr. Software Engineer Data Warehousing\\nAlpharetta, GA\\nUtilize your extensive experience with Data Warehousing and data Marts.\\n\\nSkill you will need:Extensive experience with Data Warehousing / Data Marts. Formal Training in Data Warehouse Life Cycle a plus.\\n4+ years on Microsoft SQL Server 2005 and/or 2008.\\n4+ years experience with Dimensional Modeling.\\n4+ years experience with Microsoft SSAS, SSIS and SSRS.\\nExperience with MDX (plus).\\nExperience with Data Quality, Data Profiling, Data Analytics.\\nKnowledgeable with OLAP / MOLAP / ROLAP.\\nExperience with Design & Optimize Star and Snowflake schemas.\\nExperience with BI front-end reporting, dashboarding and visualization tools.\\nExperience writing ECL** code or will train.\\n\\nNice to have skills:Knowledge of C# \\nExperience with Open Source tools like Pentaho or Kettle \\nKnowledge of Kimball or Inmon Methodology. \\n\\n**Enterprise Control Language\\n\\nQualifications: \\nBachelor's degree in computer science, computer engineering, math or equivalent technical discipline or equivalent technical experience \\n5 + years experience in full lifecycle development process including design, development, testing and implementation of moderate to advanced complexity systems. \\nAbility to work independently and collaborate with cross functional groups as required.\", \"post_time\": \"2013-12-16 18:30:07\" },\n\t{ \"post_id\": 5057, \"topic_id\": 1148, \"forum_id\": 39, \"post_subject\": \"Sr. Software Engineer - Train in ECL for Big Data\", \"username\": \"SAS\", \"post_text\": \"Sr. Software Engineer \\nBoca Raton, FL or Dayton, OH or Alpharetta, GA\\n \\nAre you an analytical thinker who loves the challenges of "Big Data"? You will get the opportunity to work on a team that is critical to the future success of the Risk Solutions business. There are a variety of ongoing projects, but primary focus is on developing and supporting the products running in our Batch and real-time environments. Additionally, you will be trained in ECL** to create and enhance the queries that link customer provided data to the Lexis data we have stored in our HPCC environment.\\n\\nSkills you will need: \\nC++/C/Java, SQL, XML\\nSkills nice to have: \\nFamiliarity with ETL tools such as Pentaho Kettle (PDI CE), \\n\\nThis is an opportunity to learn ECL** our open source software language that maximizes the rapid utilization of our massive databases. You will be trained to use ECL (Enterprise Control Language) -ECL is a programming language designed and used with the HPCC system. It is specifically designed for data management and query processing. ECL code is written using the ECL IDE programming development tool. ECL is a transparent and implicitly parallel programming language which is both powerful and flexible. It is optimized for data-intensive operations, declarative, non-procedural and dataflow oriented. ECL uses intuitive syntax which is modular, reusable, extensible and highly productive. It combines data representation and algorithm implementation\\n\\n**ECL (Enterprise Control Language) is the powerful programming language that is ideally suited for the manipulation of Big Data. Go to http://hpccsystems.com/FAQ/what-is-ecl and check out the open source version:\\nTransparent and implicitly parallel programming language\\nNon-procedural and dataflow oriented\\nModular, reusable, extensible syntax\\nCombines data representation and algorithm implementation\\nEasily extend using C++ libraries\\nECL is compiled into optimized C++\\n\\nResponsibilities:\\n1.\\tAssist in development/maintenance of software specifications utilizing design constructs and standards. \\n2.\\tInteract with business analysts to understand business requirements and translate these into technical designs. \\n3.\\tInteract with production support personnel to ensure that critical and major production issues are resolved in a timely manner. \\n4.\\tProvide technical support and take technical direction from project lead. Perform design and code reviews with peers. \\n5.\\tCreate and execute unit test plans\\n\\nQualifications:\\n1. Bachelor's degree in computer science, computer engineering, math or equivalent technical discipline or equivalent technical experience \\n2. 5 + years experience in full lifecycle development process including design, development, testing and implementation of moderate to advanced complexity systems.\", \"post_time\": \"2013-12-16 18:45:34\" },\n\t{ \"post_id\": 5096, \"topic_id\": 1161, \"forum_id\": 39, \"post_subject\": \"Sr. Software Engineer - Data Warehouse and Data Marts\", \"username\": \"SAS\", \"post_text\": \"Use or learn ECL: Utilize your extensive experience with Data Warehousing and Data Marts in a rapidly growing Big Data business. \\n\\nSr. Software Engineer Data Warehousing\\nAlpharetta, GA\\n\\nSkills you will need:Extensive experience with Data Warehousing / Data Marts. Formal Training in Data Warehouse Life Cycle a plus.\\n4+ years on Microsoft SQL Server 2005 and/or 2008.\\n4+ years experience with Dimensional Modeling.\\n4+ years experience with Microsoft SSAS, SSIS and SSRS.\\nExperience with MDX (plus).\\nExperience with Data Quality, Data Profiling, Data Analytics.\\nKnowledgeable with OLAP / MOLAP / ROLAP.\\nExperience with Design & Optimize Star and Snowflake schemas.\\nExperience with BI front-end reporting, dashboarding and visualization tools.\\nExperience writing ECL** code a plus or interest in training\\n\\nNice to have skills:Knowledge of C# \\nExperience with Open Source tools like Pentaho or Kettle \\nKnowledge of Kimball or Inmon Methodology. \\n\\nQualifications: \\nBachelor's degree in computer science, computer engineering, math or equivalent technical discipline or equivalent technical experience \\n5 + years experience\\n\\nApply at: https://reedelsevier.taleo.net/careerse ... HY&lang=en\", \"post_time\": \"2013-12-27 17:22:45\" },\n\t{ \"post_id\": 5425, \"topic_id\": 1251, \"forum_id\": 39, \"post_subject\": \"Machine Learning Researcher/Scientist\", \"username\": \"Machine Learning Search Specialist\", \"post_text\": \"The ideal candidate will demonstrate in cutting edge research organization and will apply the highest level of technical competence, analytics and problem solving abilities in support of original and creative scientific projects of significant business impact.\\n\\nA PhD/Master's in Computer Science, Machine Learning, Data Mining, Information Retrieval/NLP or related disciplines with sound technical expertise in one or more of the above mentioned areas.\\nDemonstrated ability to formulate scientifically challenging problems and develop and implement novel solutions.\\nStrong track record of publications, excellent written and oral communication skills.\\n\\nInterested prospects, do write to me at ambika.cs@rinalytics.com for a confidential conversation.\", \"post_time\": \"2014-03-26 08:13:08\" },\n\t{ \"post_id\": 5426, \"topic_id\": 1252, \"forum_id\": 39, \"post_subject\": \"Manager-Data Science\", \"username\": \"Machine Learning Search Specialist\", \"post_text\": \"JOB SKILLS:\\nInterest in solving problems with big data and large volumes of data from multiple and varied data sources.\\nHas worked extensively in the fields of information retrieval, data mining, or related field.\\nFormulate and execute strategies for obtaining and handling raw data sets.\\nWork closely with software teams to deliver whole solutions to solve business problems.\\nIdentify imperfections in raw data and anticipate potential downstream problems\\nDealing with missing or partial data and also test data generation techniques.\\n\\nCANDIDATE REQUIREMENTS: \\n7+ years of experience in the field of data mining, data science and analytics.\\nExcellent problem solving abilities.\\nAbility to build and scale highly motivated teams.\\nAbility to do hands-on work to help the team achieve goals.\\nExperience of managing global multi-location team of data scientists to deliver projects.\\nAbility to mentor and guide data scientists at various levels (from senior to fresh graduates).\\nAbility to collaborate with industry experts, academicians and thought leaders in the data science space.\\n\\nInterested prospects, do write to me at ambika.cs@rinalytics.com for a confidential conversation.\", \"post_time\": \"2014-03-26 08:15:06\" },\n\t{ \"post_id\": 5427, \"topic_id\": 1253, \"forum_id\": 39, \"post_subject\": \"CTO - Machine Learning based Products\", \"username\": \"Machine Learning Search Specialist\", \"post_text\": \"Overview\\n\\nAs VP of Engineering/CTO you will take charge of this green-field project and start from scratch right from building the Engine’s Architecture, Product Road-map, Data Platform & Structure, Data Analytics, Data Mining strategies, Implement Recommendation Algorithms, Implement Machine Learning on a large Data set to deliver personalization.\\n\\nResponsibilities\\n\\n• Apply machine learning methods on large data sets\\n\\n• Run experiments with new algorithms and scale it up to large number of data points\\n\\n• Work with other stakeholders and prioritize data/ML initiatives across the product\\n\\n• Focus on user impact and how to build a better product\\n\\n• Be proactive and constantly pay attention to the scalability, performance and availability of our systems\\n\\n• Take an active interest in our features and our user happiness\\n\\n• Stay up to date on current data engineering trends, in particular distributed systems and large scale machine learning\\n\\n Requirements\\n\\n• PhD or MS Computer Science, in Algorithms, Data Analytics, Machine Learning or related fields\\n\\n• Strong Product Architecture & Development experience\\n\\n• Theoretical knowledge of Mathematical Modeling/Discrete Mathematics\\n\\n• Expertise on Information Extraction and Bid Data frameworks like Hadoop\\n\\n• C/C++ Programming skills. Python, PHP/Java knowledge will be a plus\\n\\n• Experience of using tools like Matlab, SAS, SPSS\\n\\n• Experience managing teams of engineers\\n\\nInterested prospects, do write to me at ambika.cs@rinalytics.com for a confidential conversation.\", \"post_time\": \"2014-03-26 08:16:45\" },\n\t{ \"post_id\": 5646, \"topic_id\": 1301, \"forum_id\": 39, \"post_subject\": \"Consulting or Sr. Software Engineer (ECL/HPCC) Dayton LN\", \"username\": \"geoffjwebb\", \"post_text\": \"Job Description\\nThis position exists to provide assistance and input to management; develop and lead large multi-functional development activities; solve complex technical problems, write complex code for computer systems and serve as a senior source of expertise.\\n \\nWe are looking for someone to work on our Big Data platform (HPCC) and create/implement complex algorithms against our content to produce value add enrichments. Learning and working with the ECL programming language that is part of HPCC will be a key element of this position.\\n \\nAs the Consulting Software Engineeryour key roles and responsibilities will include:\\nLeads major technical project design, engineering and integration efforts or develops complex code\\nConsults with internal and vendor resources on design, code and test reviews.\\nBuilds new high level designs as required; reviews vendor generated high level designs.\\nLeads major technical project design, engineering and integration efforts or develop complex code\\nRecommends and plans use of system and software features.\\nMaintains an understanding of current industry trends possibly through attendance at vendor seminars and/or training sessions.\\nDetermines feasibility of customer requirements and designs solutions, builds and integrates the implementation by working with in house or vendor resources.\\nProvides recommendations on the utilization of emerging and existing technologies.\\nWorks independently - very complex system problems using debugging tools or other diagnostic aids\\nRecommends technical strategy and direction in area(s) of expertise.\\nDemonstrates an ability to provide technical guidance in one or more center of expertise.\\nEstablishes work standards and guidelines.\\nReviews vendor generated high level designs\\nActs as a contact between the vendor engineering teams and LexisNexis to communicates to vendors additional features or products needed and works with vendors to effect system improvements.\\nAble to communicate with customers (internal and external) on new system features enhancements.\\nLeads major technical project design, engineering and integration efforts or develop complex code\\nLeads teams consisting of both employees and vendor resources in multiple locations and time zones.\\nConsults with cross-functional project teams to ensure integrity of existing fabrication processes and participates in implementation of new technologies.\\nConsults with stakeholders to find alternative workarounds during system outages.\\nConsults with vendor engineering teams and LexisNexis, answers questions from vendor(s) and provide technical guidance\\nPerforms engineering cost/benefit analysis to verify potential effectiveness of new projects.\\nProvides technical assessment and recommends acquisition and joint venture projects.\\nParticipates in development and evolution of architecture.\\nOther duties as assigned.\\n \\nLexisNexis Legal & Professional (http://www.lexisnexis.com) is a leading global provider of content and technology solutions that enable professionals in legal, corporate, tax, government, academic and non-profit organizations to make informed decisions and achieve better business outcomes. As a digital pioneer, the company was the first to bring legal and business information online with its Lexis� and Nexis� services. Today, LexisNexis Legal & Professional harnesses leading-edge technology and world-class content, to help professionals work in faster, easier and more effective ways. Through close collaboration with its customers, the company ensures organizations can leverage its solutions to reduce risk, improve productivity, increase profitability and grow their business. Part of Reed Elsevier, LexisNexis Legal & Professional serves customers in more than 100 countries with 10,000 employees worldwide.\\n \\nLexisNexis, a division of Reed Elsevier, is an equal opportunity employer: qualified applicants are considered for and treated during employment without regard to race, color, religion, sex, national origin, disability status, protected veteran status or any other characteristic protected by law. If a qualified individual with a disability or disabled veteran needs a reasonable accommodation to use or access our online system, that individual should please contact HR-Careers@lexisnexis.com\\nBasic Qualifications\\n10 years of experience in all phases of system and/or product development\\nBachelor's Degree in computer science, computer engineering, Business Administration or equivalent technical discipline or equivalent technical experience.\\n \\nWhat we are looking for in you:\\nAbility to work independently\\nAbility to work in a cooperative team environment with internal & vendor resources. Mentors less experienced staff on all aspects of multiple system environments, software and system operations.\\nReview impact analysis, design and test plans prepared by the vendor engineering teams.\\nAdvanced Technical Knowledge (coding, testing, system design, consulting), of a broader range of technology.\\nSells ideas to functional top management to gain commitment for system enhancements.\\nExcellent communications skills to be able to communicate with customers (internal and external) on new system features enhancements.\\nMakes decisions within their ability and authority.\\nCommunicates with recognized authority figures and proactively influences and educates members of technical and managerial staff on technology issues.\\nAbility to work through very complex problems and reach a workable solution\\nAbility to work in ambiguous situations on problems / project of large - advanced complexity\\n\\nCompetitive salary plus comprehensive benefits package to include:\\nMedical/Dental/Vision\\nQuarterly 401K Match\\nPaid Time Off\\nPaid Holidays\\nTwo Paid Volunteer Days\\nEmployee Stock Purchase Plan\\nEmployee Assistance Program\\nHealth Spending, Flexible Spending & Commuter Spending Accounts\\n \\nThis position is part of the LexisNexis Legal & Professional Global Technology Organization (GTO), which is responsible for the Legal & Professional global technology strategy, bringing together the company’s applications, product platforms and business systems to deliver LexisNexis world-class content to its customers in innovative ways. GTO is working to transform LexisNexis to leverage technology and processes globally, support new products/ services, and improve the customer experience through effective and efficient delivery of business architecture and strategy.\\n\\nFor more information or to apply please email me at geoff.webb@lexisnexis.com\", \"post_time\": \"2014-05-06 18:11:42\" },\n\t{ \"post_id\": 9318, \"topic_id\": 2188, \"forum_id\": 39, \"post_subject\": \"Students! Come and intern with us this summer.\", \"username\": \"lchapman\", \"post_text\": \"So we didn't get accepted for GSoC 2016, but hey, they accepted less than half the organisations that applied and spread the net wider for new open source organisations to get involved. We were lucky enough to be one of those in 2015 and we'll try our luck again in 2017.\\n\\nLast year, we also ran a summer intern program and it was such a success that we are going to do that again this year. So, if you've been looking at us already, keep on looking. We have some great projects available.\\n\\nI've written a blog about how you can get involved in the program so go and read it. It tells you what you need to know. If you have any questions, email lorraine.chapman@lexisnexis.com.\\n\\nhttps://hpccsystems.com/resources/blog/ ... d-find-out\", \"post_time\": \"2016-03-09 12:17:39\" },\n\t{ \"post_id\": 9376, \"topic_id\": 2208, \"forum_id\": 39, \"post_subject\": \"Revised deadline for non-machine learning intern proposals\", \"username\": \"lchapman\", \"post_text\": \"Good news students!\\n\\nThe HPCC Systems Summer Intern Program has extended the deadline for you to apply for non-machine learning projects! You now have a few extra weeks to get your proposal ready because the new deadline is Friday 15th April 2016.\\n\\nThere are a number of non-machine learning projects available including:\\n\\n
\\n\\nGo and look at our list of projects here: https://wiki.hpccsystems.com/x/zYBc, there's bound to be something that catches your eye.\\n\\nYou can find out more about the HPCC Systems Summer Internship Program here: https://wiki.hpccsystems.com/x/HwBm\\n\\nThanks to all students who have already submitted proposals ahead of the deadline this Friday (25th March). Please note that after 25th March, we will not be accepting any more proposal for machine learning projects for summer 2016.\\n\\nQuestions? \\nEmail lorraine.chapman@lexisnexis.com\", \"post_time\": \"2016-03-22 12:25:12\" },\n\t{ \"post_id\": 9514, \"topic_id\": 2246, \"forum_id\": 39, \"post_subject\": \"Deadline for intern proposals is this Friday 15th April 7pm\", \"username\": \"lchapman\", \"post_text\": \"The deadline for submitting non-machine learning intern proposals is this Friday 15th April 7pm UTC.\\n\\nIf you haven't yet prepared your proposal, there is still time. Look what's on our list: https://wiki.hpccsystems.com/x/zYBc. Here's a taster...\\n\\n
\\n\\nFind out more about the HPCC Systems Summer Internship Program here: https://wiki.hpccsystems.com/x/HwBm\\n\\nThanks to all students who have already submitted proposals ahead of the deadline this Friday (15th April). You still have time to make changes right up to the last minute if you need to. Just make sure you send your final version to lorraine.chapman@lexisnexis.com before the deadline.\\n\\nWe have already reviewed the machine learning proposals and all students will be informed whether they have been successful by 22nd April.\\n\\nGood luck!\", \"post_time\": \"2016-04-12 11:31:45\" },\n\t{ \"post_id\": 3698, \"topic_id\": 825, \"forum_id\": 40, \"post_subject\": \"Welcome!\", \"username\": \"rtaylor\", \"post_text\": \"The purpose of this forum is to support the HPCC Systems online eLearning courses, now available here: https://learn.lexisnexis.com/hpcc \\n\\nThis forum is the appropriate venue to ask about the courses, their content, any questions you may have about the exercises, or any other issues that may come up as you go through the courses. The HPCC Systems training team instructors will be monitoring and responding to your posts as quickly as possible.\\n\\nGood luck, and have fun learning ECL!\", \"post_time\": \"2013-03-11 17:18:39\" },\n\t{ \"post_id\": 3716, \"topic_id\": 833, \"forum_id\": 40, \"post_subject\": \"Code Correction - Introduction to ECL - Module 24\", \"username\": \"bforeman\", \"post_text\": \"In Tutorial #9, the suggested solution has a missing filter clause on Line 14:\\n\\nChange:\\n\\nOlderFemaleinMStates := COUNT($.Persons(State IN $.SetMStates,OlderPersons)) -\\n COUNT($.MeninMStatesPersons(OlderPersons));
\\n\\nTo:\\n\\nOlderFemaleinMStates := \\n COUNT($.Persons(State IN $.SetMStates,OlderPersons,Gender = 'F'));
\\n\\nIn the old code, we are also counting gender types that are "Unknown" or "None".\\n\\n\\nThanks to Lorraine Hill in Dayton for catching this! Kudos!\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-03-12 14:49:51\" },\n\t{ \"post_id\": 3788, \"topic_id\": 852, \"forum_id\": 40, \"post_subject\": \"Podcast Available!\", \"username\": \"HPCC Staff\", \"post_text\": \"Have questions on how to get started with the new Online Training program? Listen to the latest podcast featuring Bob Foreman, Sr Trainer, as he gives an overview of the curriculum and what to expect.\\n\\nhttp://hpccsystems.com/podcasts?order=f ... &sort=desc\", \"post_time\": \"2013-03-20 12:49:36\" },\n\t{ \"post_id\": 4723, \"topic_id\": 1055, \"forum_id\": 40, \"post_subject\": \"Re: Introduction to Roxie - index creation\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"Thanks Dan!\", \"post_time\": \"2013-10-03 10:45:43\" },\n\t{ \"post_id\": 4712, \"topic_id\": 1055, \"forum_id\": 40, \"post_subject\": \"Re: Introduction to Roxie - index creation\", \"username\": \"DSC\", \"post_text\": \"[quote="bforeman":3fsdqs1r]Do we then need to compulsorily use SEQUENTIAL in all places where subsequent steps depend on values from prior assignment statements (in any conventional example)?
\\n\\nIt's a good practice, but you could always separate the process into different workunits, and then SEQUENTIAL of course would not be necessary. But if one action is dependent on another action that needs to be completed first, SEQUENTIAL is the way to go.\\nSEQUENTIAL can have some performance impacts if you over-use it. The ECL compiler typically does a fantastic job of finding "common code" and making sure that it is executed only once. However, if such code was referenced by two different statements within a SEQUENTIAL then it may not be commoned-up and you end up executing that code multiple times.\\n\\nAs a rule of thumb, I use SEQUENTIAL only in certain situations, usually when dealing with the file system. The compiler doesn't really understand dependencies in the file system, such as writing a file and then reading it later, or manipulating superfiles. In general, if you have one piece of code that "touches" an external resource (like a file) and then another piece of code that needs to access that touched resource, you need to wrap the statements in a SEQUENTIAL. If your subsequent statements reference only internal data or attributes, the compiler will always order things correctly.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-10-01 12:47:17\" },\n\t{ \"post_id\": 4710, \"topic_id\": 1055, \"forum_id\": 40, \"post_subject\": \"Re: Introduction to Roxie - index creation\", \"username\": \"bforeman\", \"post_text\": \"By the way, very good questions! Keep them coming, and get some sleep \\n\\nBob\", \"post_time\": \"2013-10-01 11:55:33\" },\n\t{ \"post_id\": 4708, \"topic_id\": 1055, \"forum_id\": 40, \"post_subject\": \"Re: Introduction to Roxie - index creation\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"Got it! Thanks
\", \"post_time\": \"2013-10-01 11:53:38\" },\n\t{ \"post_id\": 4707, \"topic_id\": 1055, \"forum_id\": 40, \"post_subject\": \"Re: Introduction to Roxie - index creation\", \"username\": \"bforeman\", \"post_text\": \"
Do we then need to compulsorily use SEQUENTIAL in all places where subsequent steps depend on values from prior assignment statements (in any conventional example)?
\\n\\nIt's a good practice, but you could always separate the process into different workunits, and then SEQUENTIAL of course would not be necessary. But if one action is dependent on another action that needs to be completed first, SEQUENTIAL is the way to go.\\n\\nIn this example, all statements until SEQUENTIAL are attribute definition statements. In the absence of action statements, if there was no SEQUENTIAL, would the code still get executed and index get built?
\\n\\nNo. When any action is used in a definition, that definition needs to be explicitly referenced to be executed. Remember that the rule for any ECL file with regards to the compiler is that a file must have at least one EXPORT or SHARED definition, OR, any action (explicit using OUTPUT or SEQUENTIAL or another action statement, or implicit by just naming the definition).\\n\\nHope this helps! \\n\\nBob\", \"post_time\": \"2013-10-01 11:51:32\" },\n\t{ \"post_id\": 4702, \"topic_id\": 1055, \"forum_id\": 40, \"post_subject\": \"Re: Introduction to Roxie - index creation\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"Do we then need to compulsorily use SEQUENTIAL in all places where subsequent steps depend on values from prior assignment statements (in any conventional example)?\\n\\nIn this example, all statements until SEQUENTIAL are attribute definition statements. In the absence of action statements, if there was no SEQUENTIAL, would the code still get executed and index get built? \\n\\nRegards,\\nGayathri\", \"post_time\": \"2013-10-01 10:08:47\" },\n\t{ \"post_id\": 4676, \"topic_id\": 1055, \"forum_id\": 40, \"post_subject\": \"Re: Introduction to Roxie - index creation\", \"username\": \"bforeman\", \"post_text\": \"Hi Gayathri,\\n\\n
When exactly does the index 'AlphaInStateCity' get built here? Is it\\ni) on execution of BUILD?\\nii) on execution of the following FETCH?\\niii) on execution of SEQUENTIAL?\\n\\n
\\non execution of SEQUENTIAL\\n\\nWhat if there was no SEQUENTIAL statement?
\\n\\nWithout SEQUENTIAL, all actions would be executed in parallel, and there is a chance that if the OUTPUT action did not completely write out the recordset, the BUILD could create a corrupted index, so the SEQUENTIAL action forces the actions to occur in sequence, where the output file is written first, and then the BUILD action creates the INDEX.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-09-30 13:16:10\" },\n\t{ \"post_id\": 4671, \"topic_id\": 1055, \"forum_id\": 40, \"post_subject\": \"Introduction to Roxie - index creation\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"In the FETCH_Example.ecl bundled along with Intro to Roxie online course material, I found the following piece of code (not complete):\\n\\nAlphaInStateCity := INDEX(Ptbl,\\n\\t\\t\\t{address,csz_id,lname,fname,RecPos},\\n\\t\\t\\tKeyFile);\\nBld := BUILD(AlphaInStateCity,OVERWRITE);\\nAlphaPeople := FETCH(Ptbl, \\n\\t\\t\\tAlphaInStateCity(WILD(address),\\n\\t\\t\\tWILD(csz_id),\\n\\t\\t\\tKEYED(Lname='WIK')\\n\\t\\t\\t), \\n\\t\\t\\tRIGHT.RecPos);\\nOutFile := OUTPUT(CHOOSEN(AlphaPeople,10));\\nSEQUENTIAL(PtblOut,Bld,OutFile)
\\n\\nWhen exactly does the index 'AlphaInStateCity' get built here? Is it\\ni) on execution of BUILD?\\nii) on execution of the following FETCH?\\niii) on execution of SEQUENTIAL? \\n\\nWhat if there was no SEQUENTIAL statement?\\n\\nThanks,\\nGayathri\", \"post_time\": \"2013-09-30 11:18:16\" },\n\t{ \"post_id\": 4741, \"topic_id\": 1056, \"forum_id\": 40, \"post_subject\": \"Re: Index Seek\", \"username\": \"rtaylor\", \"post_text\": \"Gayathri,There is one statement in that post I am not in agreement with - The one who posted the question says \\n\\nWe just create index and all filters on record set containing indexed fields automatically uses index\\n\\nHow will the index be 'automatically' used if a field name is used? Unlike in an RDBMS, in HPCC, index would be used only if we explicitly use the attribute name (defined earlier for the index), isn't it? Pls. clarify.
Clarification is easy -- you're correct.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-10-08 13:07:42\" },\n\t{ \"post_id\": 4737, \"topic_id\": 1056, \"forum_id\": 40, \"post_subject\": \"Re: Index Seek\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"Great! Just what I wanted. \\n\\nThere is one statement in that post I am not in agreement with - The one who posted the question says \\n\\nWe just create index and all filters on record set containing indexed fields automatically uses index\\n\\nHow will the index be 'automatically' used if a field name is used? Unlike in an RDBMS, in HPCC, index would be used only if we explicitly use the attribute name (defined earlier for the index), isn't it? Pls. clarify.\", \"post_time\": \"2013-10-08 09:28:00\" },\n\t{ \"post_id\": 4735, \"topic_id\": 1056, \"forum_id\": 40, \"post_subject\": \"Re: Index Seek\", \"username\": \"rtaylor\", \"post_text\": \"Gayathri,What is the difference between a DICTIONARY and an INDEX?
\\nTake a look at this thread that discusses that very point: http://hpccsystems.com/bb/viewtopic.php?f=10&t=1062&sid=464a43fde197e917f465bff5eb3015a9\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-10-07 12:58:57\" },\n\t{ \"post_id\": 4734, \"topic_id\": 1056, \"forum_id\": 40, \"post_subject\": \"Re: Index Seek\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"Thanks Bob and Richard!! \\n\\nWhat is the difference between a DICTIONARY and an INDEX?\", \"post_time\": \"2013-10-07 04:30:00\" },\n\t{ \"post_id\": 4732, \"topic_id\": 1056, \"forum_id\": 40, \"post_subject\": \"Re: Index Seek\", \"username\": \"bforeman\", \"post_text\": \"Thanks for the clarification Richard, when I said the "same" thing, I was referring to KEYED used in the JOIN condition, not as the JOIN flag, but it is important that you point that out. KEYED as the JOIN flag is used in a FULL KEYED JOIN, where the KEYED option in the JOIN condition is used as you described it.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-10-04 13:38:38\" },\n\t{ \"post_id\": 4731, \"topic_id\": 1056, \"forum_id\": 40, \"post_subject\": \"Re: Index Seek\", \"username\": \"rtaylor\", \"post_text\": \"Gayathri,\\n\\nKEYED as an option on JOIN and KEYED as used in an INDEX filter are two different things.\\n\\nThe KEYED option on JOIN nominates an INDEX into the right dataset for the JOIN so that that join condition is first applied against the INDEX as a filter, then the selected right dataset records are fetched to pass on to the TRANSFORM. This is called a "full-keyed" JOIN.\\n\\nWhen KEYED is used in an INDEX filter (along with WILD) you are simply specifying filtering by a trailing element of the key and wildcarding the leading element(s). That means the binary tree of the INDEX will be scanned (instead of walked) and the INDEX read will still be fast. Without KEYED/WILD in the filter, filtering by trailing elements causes the INDEX to be treated as a DATASET, and all the leaf nodes of the INDEX (the index "records") will be read to satisfy the filter (AKA a "full table scan").\\n\\nA JOIN condition is implicitly a filter on the records in the left and right datasets. Therefore, if the right dataset is actually an INDEX (usually a payload index -- making the JOIN "half-keyed"), then you can also use KEYED in the JOIN condition to ensure the INDEX is treated as an index and not a dataset. \\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2013-10-04 13:29:59\" },\n\t{ \"post_id\": 4730, \"topic_id\": 1056, \"forum_id\": 40, \"post_subject\": \"Re: Index Seek\", \"username\": \"bforeman\", \"post_text\": \"Hi Gayathri,\\n\\nYes, spot on, and essentially they are doing the same thing, but KEYED as you mention is used in different contexts (JOIN vs. FETCH). Usually the Half-Keyed JOIN is used with a payload index, optimizing the read, and FETCH is used when the INDEX is a non-payload type, and additional I/O is needed into the base dataset.\\n\\nHope this helps! \\n\\nBob\", \"post_time\": \"2013-10-04 12:32:27\" },\n\t{ \"post_id\": 4729, \"topic_id\": 1056, \"forum_id\": 40, \"post_subject\": \"Re: Index Seek\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"Please let me know if my understanding here is correct:\\n\\nKEYED, when used in JOIN, indicates indexed access into record set - accepts index name alone - all fields part of index may be used for record access.\\n\\nKEYED, when used in FETCH, accepts an expression using field name(s??) and mainly filters the index, performs what is called as the 'bookmark lookup' in database world, to retrieve all fields for records satisfying the index key.\\n\\nRegards,\\nGayathri\", \"post_time\": \"2013-10-04 09:53:14\" },\n\t{ \"post_id\": 4709, \"topic_id\": 1056, \"forum_id\": 40, \"post_subject\": \"Re: Index Seek\", \"username\": \"bforeman\", \"post_text\": \"
In a multi-component key, do all leading fields that aren't used in the filter need to be mandatorily wild carded?\\n
\\n\\nYes, or you will receive a compiler warning.\\n\\n\\nBob\", \"post_time\": \"2013-10-01 11:54:10\" },\n\t{ \"post_id\": 4692, \"topic_id\": 1056, \"forum_id\": 40, \"post_subject\": \"Re: Index Seek\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"In a multi-component key, do all leading fields that aren't used in the filter need to be mandatorily wild carded?\\n\\nThanks,\\nGayathri\", \"post_time\": \"2013-10-01 05:00:29\" },\n\t{ \"post_id\": 4677, \"topic_id\": 1056, \"forum_id\": 40, \"post_subject\": \"Re: Index Seek\", \"username\": \"bforeman\", \"post_text\": \"KEYED and WILD are used with multi-component keys, and is used to indicate to the compiler which of the leading index fields are used as filters (KEYED) or wild carded (WILD) so that the compiler can warn you if you’ve gotten it wrong. Trailing fields not used in the filter are ignored (always treated as wildcards).\\n\\nSo for example if I have a multi-component key with an ID, LastName, and Firstname fields, and I want to build a query by Lastname, I would WILD the ID, use KEYED on LastName, and nothing is needed for FirstName since it is a trailing field not used in the filter.\\n\\nSo with the use of KEYED and WILD, you are telling the compiler that this is the intention of your query, and you simply did not forget to include the ID as a search field.\\n\\nHope this helps!\\n\\nBob\", \"post_time\": \"2013-09-30 13:43:34\" },\n\t{ \"post_id\": 4672, \"topic_id\": 1056, \"forum_id\": 40, \"post_subject\": \"Index Seek\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"When is WILD used and when is KEYED used?\", \"post_time\": \"2013-09-30 11:20:32\" },\n\t{ \"post_id\": 4706, \"topic_id\": 1057, \"forum_id\": 40, \"post_subject\": \"Re: Executing Roxie query\", \"username\": \"bforeman\", \"post_text\": \"If the underlying ECL code changes, yes, the query would have to be republished. If the data behind the query was the only thing that changed, you could use packages to update the data without the need to republish (packages are covered in the Advanced ROXIE online course).\\n\\nThe strategy and best practice for deployment and production is to configure multiple ROXIEs. Use one for development and updating the queries and then periodically they can switch over to use the new cluster.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-10-01 11:44:06\" },\n\t{ \"post_id\": 4691, \"topic_id\": 1057, \"forum_id\": 40, \"post_subject\": \"Re: Executing Roxie query\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"Thanks Bob! That clarifies it.\\n\\nIf underlying ECL code changes, does it have to be re-published to Roxie or is it sufficient to just recompile it?\", \"post_time\": \"2013-10-01 04:55:55\" },\n\t{ \"post_id\": 4678, \"topic_id\": 1057, \"forum_id\": 40, \"post_subject\": \"Re: Executing Roxie query\", \"username\": \"bforeman\", \"post_text\": \"Hi Gayathri,\\n\\nIn the OSS HPCC, you can now publish your query to hTHOR and THOR, and test it in WS_ECL just like you would for ROXIE. Or if you just want to test the query directly in the ECL IDE, add default values to the search parameters like this:\\n\\n STRING15 fname_value := '' : STORED('FirstName');\\n STRING25 lname_value := 'SMITH' : STORED('LastName');\\n STRING2 state_value := '' : STORED('State');\\n STRING1 sex_value := '' : STORED('Sex');
\\n\\n...and then call the function from a builder window or BWR file in your repository folder. For example, I created a file named BWR_TestService, and then in the file simply do this:\\n\\nIMPORT $;\\n\\n$.PeopleFileSearchService();
\\n\\n\\nWorks great! Just remember after testing to reset the STORED definitions back to blanks when you are ready to publish.\\n\\nIn the Advanced ROXIE we also introduce INTERFACES, which help to make these defaults even easier to use and seed.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-09-30 14:02:31\" },\n\t{ \"post_id\": 4673, \"topic_id\": 1057, \"forum_id\": 40, \"post_subject\": \"Executing Roxie query\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"In 'Intro to Roxie' online training, we published a function to Roxie and executed it from ws-ECL. \\n\\nIn this scenario, the function didn't accept any parameters - all were defined in the form of local variables within the function and exposed through STORED services. \\n\\nMy question is this:\\nHow do I execute this function from within ECL IDE? During the course of development, I may want to test it at every build stage to ensure correctness and publish to Roxie only when all is well and complete.\\n\\nThanks,\\nGayathri\", \"post_time\": \"2013-09-30 11:28:48\" },\n\t{ \"post_id\": 4782, \"topic_id\": 1079, \"forum_id\": 40, \"post_subject\": \"Re: Publishing half-keyed search query to Roxie\", \"username\": \"bforeman\", \"post_text\": \"What is the difference between RETURNing a dataset vs. returning OUTPUT(dataset), as function result, to Roxie?
\\n\\nHi Gayathri,\\n\\nRETURNing a DATASET will return all fields and all records of the DATASET unconditionally (unless you apply a filter to the DATASET itself).\\n\\nOUTPUT is available to give you a little more flexibility, as you can control the format of the output in the second parameter.\\n\\nSo you can return a partial number of fields from the dataset, only the ones that are interesting to you, instead of all them.\\n\\nHTH,\\n\\nBob \", \"post_time\": \"2013-10-18 12:20:05\" },\n\t{ \"post_id\": 4780, \"topic_id\": 1079, \"forum_id\": 40, \"post_subject\": \"Publishing half-keyed search query to Roxie\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"In Lab Exercise No.12, we publish results of a half-keyed search query to Roxie.\\n\\nWhat is the difference between RETURNing a dataset vs. returning OUTPUT(dataset), as function result, to Roxie?\", \"post_time\": \"2013-10-18 05:26:24\" },\n\t{ \"post_id\": 4955, \"topic_id\": 1111, \"forum_id\": 40, \"post_subject\": \"Re: Use SOAPCALL to call Roxie function with interface param\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"It worked great Bob. Thanks so much!\", \"post_time\": \"2013-11-19 12:19:23\" },\n\t{ \"post_id\": 4943, \"topic_id\": 1111, \"forum_id\": 40, \"post_subject\": \"Re: Use SOAPCALL to call Roxie function with interface param\", \"username\": \"bforeman\", \"post_text\": \"Your code example works fine here. I think you just need to adjust your RoxieIP definition as follows:\\n\\n
RoxieIP := 'http://192.168.11.131:8002/WsEcl/soap/query/roxie/getwordcount';
\", \"post_time\": \"2013-11-14 15:03:10\" },\n\t{ \"post_id\": 4942, \"topic_id\": 1111, \"forum_id\": 40, \"post_subject\": \"Re: Use SOAPCALL to call Roxie function with interface param\", \"username\": \"bforeman\", \"post_text\": \"Logs are good, and I can forward them to the development team, and the ECL code is also good to send.\", \"post_time\": \"2013-11-14 14:15:37\" },\n\t{ \"post_id\": 4941, \"topic_id\": 1111, \"forum_id\": 40, \"post_subject\": \"Re: Use SOAPCALL to call Roxie function with interface param\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"Yes, this is how I am calling too. My function errored out even after I made it 9876. But, this time around, I noticed lot more activity in the log file - I'll send it to you in a mail - can you let me know if you can infer anything from the logs?\\n\\nRegards,\\nGayathri\", \"post_time\": \"2013-11-14 14:01:11\" },\n\t{ \"post_id\": 4937, \"topic_id\": 1111, \"forum_id\": 40, \"post_subject\": \"Re: Use SOAPCALL to call Roxie function with interface param\", \"username\": \"bforeman\", \"post_text\": \"...and to add one more point\\n\\nTo access the query via WSECL you just need to extend the directory a bit more, like this:\\n\\nip := 'http://192.168.229.131:8002/WsEcl/soap/query/roxie/FetchPeopleByZipService'
\\n\\nAccessing the Roxie query through WsECL gives you an automatic round robin load distribution among your Roxie servers (farmers).\\n\\nThanks to Tony Fishbeck and Jim Defabia for this input.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-11-13 16:05:23\" },\n\t{ \"post_id\": 4936, \"topic_id\": 1111, \"forum_id\": 40, \"post_subject\": \"Re: Use SOAPCALL to call Roxie function with interface param\", \"username\": \"bforeman\", \"post_text\": \"Hi Gayathri,\\n\\nYes, the secret to accessing your queries via SOAPCALL is to access the node directly using the 9876 default configuration port.\\n\\nFor example, here is the Data Tutorial ROXIE query that I am accessing via SOAPCALL:\\noutRecord := RECORD\\n STRING15 FirstName{xpath('firstname')};\\n STRING25 LastName{xpath('lastname')};\\n STRING15 MiddleName{xpath('middlename')};\\n STRING5 Zip{xpath('zip')};\\n STRING42 Street{xpath('street')};\\n STRING20 City{xpath('city')};\\n STRING2 State{xpath('state')};\\nEND;\\n\\nip := 'http://10.173.248.5:9876/'; //training cluster\\n//ip := 'http://192.168.229.131:9876/'; //my one-way ROXIE VM\\nsvc:= 'FetchPeopleByZipService';\\nOUTPUT(SOAPCALL(ip, svc,{STRING10 ZIPValue := '63033'},DATASET(outRecord)));
\\n\\nFor a single node VM ROXIE, the base address and port 9876 is all you need.\\n\\nFor physical clusters with multiple nodes, you need to access any one of the ROXIE nodes target IP address, and again use the 9876 port.\\n\\nDevelopers actually do their own "load balancing" by randomly selecting a node on each call. For example, on a 100-way ROXIE:\\n\\nip:='http://10.173.219.' + (STRING)(random() % 100 + 1) + ':9876';
\\n\\nAnyway, it works great when you have the right port! \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-11-13 15:01:40\" },\n\t{ \"post_id\": 4934, \"topic_id\": 1111, \"forum_id\": 40, \"post_subject\": \"Re: Use SOAPCALL to call Roxie function with interface param\", \"username\": \"bforeman\", \"post_text\": \"Gayathri,\\n\\nThis looks like you are trying to make a SOAPCALL to a local VM, correct? To be honest, I've never tried that before. Let me tinker with it here today. \\n\\nMy guess is that it may be either be blocked or possibly an incorrect port. What if you switch the port from 8002 to 9876?\\n\\nMeanwhile, let me test here and I will get back to you.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-11-13 13:11:49\" },\n\t{ \"post_id\": 4932, \"topic_id\": 1111, \"forum_id\": 40, \"post_subject\": \"Re: Use SOAPCALL to call Roxie function with interface param\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"It failed with the same error
\", \"post_time\": \"2013-11-13 12:19:25\" },\n\t{ \"post_id\": 4925, \"topic_id\": 1111, \"forum_id\": 40, \"post_subject\": \"Re: Use SOAPCALL to call Roxie function with interface param\", \"username\": \"bforeman\", \"post_text\": \"Hi Gayathri,\\n\\n
Error: System error: -3: Graph[1], SOAP_rowdataset[2]: SLAVE 192.168.253.131:20100: <Error><text>connection failed 192.168.11.131:8002</text><url>http://192.168.11.131:8002/WsEcl/soap/query/myroxie/personsfilesearchservice2</url></Error>,
\\n\\nYes, it may not be related to the interface question. What I would do is test your SOAPCALL with another published query that is NOT using an interface, and this will verify if the problem is in the RoxieIP and/or port.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-11-12 15:39:36\" },\n\t{ \"post_id\": 4919, \"topic_id\": 1111, \"forum_id\": 40, \"post_subject\": \"Use SOAPCALL to call Roxie function with interface params\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"I am trying to get a hang of SOAPCALL usage. \\n\\nI picked up the Roxie function we published in the online Roxie trainings - PersonsFileSearchService2. This takes StateParams and NameParams, both interfaces, as parameters apart from a gender parameter. \\n\\nI used the following piece of code to call the Roxie service:\\n\\nfilteredRecSet := SOAPCALL(RoxieIP,\\n svc,\\n\\t\\t SearchServiceParams,\\n DATASET(OutRec1));
\\t \\n\\nwhere SearchServiceParams has been defined as \\n\\nSearchServiceParams := RECORD\\n\\tSTRING25 LastName := 'SMITH';\\n\\tSTRING15 FirstName := '';\\n\\t\\n\\tSTRING2 State := 'NJ';\\n\\tSTRING1 Sex := 'M';\\t\\nEND;
\\n\\nThe idea was to combine params from both interfaces into an input record structure and pass. But, apparently, that is wrong since it didn't work. How should I call it instead?\\n\\nAlso, the error message I am getting may not be because of the params. This is what I get:\\n\\nError: System error: -3: Graph[1], SOAP_rowdataset[2]: SLAVE 192.168.253.131:20100: <Error><text>connection failed 192.168.11.131:8002</text><url>http://192.168.11.131:8002/WsEcl/soap/query/myroxie/personsfilesearchservice2</url></Error>, \\n\\nPls help.\\n\\nRegards,\\nGayathri\", \"post_time\": \"2013-11-12 09:06:18\" },\n\t{ \"post_id\": 5016, \"topic_id\": 1128, \"forum_id\": 40, \"post_subject\": \"Re: Module parameters and interfaces\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"Thanks Bob!\\n\\nHope you had a good time off work \", \"post_time\": \"2013-12-03 11:21:11\" },\n\t{ \"post_id\": 4992, \"topic_id\": 1128, \"forum_id\": 40, \"post_subject\": \"Re: Module parameters and interfaces\", \"username\": \"bforeman\", \"post_text\": \"Hi Gayathri,\\n\\nFirst, our apologies for not getting back to you sooner, we just came back from a long holiday weekend over here
\\n\\nYes, to answer your question the MODULE is establishing a concrete instance of the INTERFACE (params1) and the parameters on the LHS can be used for anything inside the exported MyModule. So myModule can be used to change the initial settings in the INTERFACE if needed and also do any additional processing needed as established by the parameters.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-12-02 15:29:57\" },\n\t{ \"post_id\": 4975, \"topic_id\": 1128, \"forum_id\": 40, \"post_subject\": \"Module parameters and interfaces\", \"username\": \"Gayathri_Jayaraman\", \"post_text\": \"I came across a piece of code that looks like this:\\n\\n
\\nEXPORT myModule(string name='',Boolean IsActive = true):= MODULE(params1)\\n <processing code>\\nEND;\\n
\\n\\nHere, the portion in the RHS - after MODULE, is the parameter - the interface. What is the list of values passed to the LHS? More parameters, that aren't part of an interface?\\n\\nRegards,\\nGayathri\", \"post_time\": \"2013-11-27 12:03:08\" },\n\t{ \"post_id\": 27873, \"topic_id\": 1870, \"forum_id\": 40, \"post_subject\": \"Re: Complete Course PDFs Now Included in Downloads\", \"username\": \"bforeman\", \"post_text\": \"Hello,\\n\\nYou have to enroll in the appropriate class first, then you should see the download link on the target course page.\", \"post_time\": \"2019-10-16 01:04:11\" },\n\t{ \"post_id\": 27823, \"topic_id\": 1870, \"forum_id\": 40, \"post_subject\": \"Re: Complete Course PDFs Now Included in Downloads\", \"username\": \"Gallupstr\", \"post_text\": \"Hi! \\nWhere could I find the Downloads? Thank you.\", \"post_time\": \"2019-10-14 11:47:12\" },\n\t{ \"post_id\": 8108, \"topic_id\": 1870, \"forum_id\": 40, \"post_subject\": \"Complete Course PDFs Now Included in Downloads\", \"username\": \"bforeman\", \"post_text\": \"In the core ECL Online Courses, each course download now includes a PDF of the entire lesson slide presentations. This includes the following courses:\\n\\nIntroduction to ECL\\nIntroduction to THOR\\nAdvanced ECL\\nAdvanced THOR\\nIntroduction to ROXIE\\nAdvanced ROXIE\\n\\nYou can now use these PDFs as an offline reference whenever needed.\\n\\nBest Regards,\\n\\nThe HPCC Training Team\", \"post_time\": \"2015-09-11 18:03:04\" },\n\t{ \"post_id\": 18773, \"topic_id\": 4723, \"forum_id\": 40, \"post_subject\": \"Re: Trouble with ECL 1 training course\", \"username\": \"dnordahl\", \"post_text\": \"Ok, I checked ports in use but didn't see anything on 8010. I tried running VM VirtualBox as admin but that didn't make a difference. Next I tried changing eth1 to eth0 in /etc/network/interfaces, and that made it stop hanging on initializing network on the boot sequence, but now it hangs for awhile later on in the boot sequence, reports no failures starting HPCC services, but then still has the http://:8010 address.\\n\\nI also tried enabling and disabling the wireless adapter on my laptop which didn't make a difference either, and also tried changing the network adapter type in VM ware to PCnet-Fast III (from Intel PRO/1000 MT Desktop 82540EM).\\n\\nNext I'll try updating the VMWare client. Let me know if you have any other ideas or if there's a newer virtual machine version I should be using than 6.2.14-1\\n\\nT\", \"post_time\": \"2017-09-11 18:49:42\" },\n\t{ \"post_id\": 18763, \"topic_id\": 4723, \"forum_id\": 40, \"post_subject\": \"Re: Trouble with ECL 1 training course\", \"username\": \"rtaylor\", \"post_text\": \"dnordahl,Would the local network admin need to look if this port is blocked on my PC if this is happening?
\\nYes, absolutely. Also, the VM doesn't peacefully co-exist with VPN, so if you're on a VPN you should try turning that off before starting your VM cluster.\\nUsing my dev hpcc account, I’m able to spray the file successfully, but then running the code for Lesson 14, I get the error: “Error: Query does not contain any actions (0, 0), 3,”. I don’t see hthor_dev as an option to spray to though, just hthor_dev_eclagent.
hthor_dev_eclagent is the hthor target cluster. When I try running the code for lab exercise, I get the error “Error: Object does not have a member named 'Persons' (5, 3) Error: Unknown identifier "Persons" (5,..”\\nI’ve tried running the BWR_BasicQueries.ecl and Persons.ecl files from both locally saved files in the same folder and from checked in files on a repository folder on hthor_dev and have the same problem.
Does your dev system still have a central repository? If so, that's your problem. The courses are designed for non-central repository systems (IOW, they're designed for use on Open Source systems, not internal LN systems that haven't yet migrated). That's why we suggest using a VM for the courses.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-09-11 16:30:22\" },\n\t{ \"post_id\": 18723, \"topic_id\": 4723, \"forum_id\": 40, \"post_subject\": \"Trouble with ECL 1 training course\", \"username\": \"dnordahl\", \"post_text\": \"I’m having a number of issues when working to complete the ECL 1 training (as an employee out of OKC), and am looking for some guidance:\\n\\n1.\\tRunning the VM locally for HPCC, it has issues booting the network configuration, then it returns http://:8010 for the ECL watch address when it finally boots up. Would the local network admin need to look if this port is blocked on my PC if this is happening? It says I’m running VM version 6.2.14-1. Maybe I should try a different port somehow?\\n\\n2.\\tUsing my dev hpcc account, I’m able to spray the file successfully, but then running the code for Lesson 14, I get the error: “Error: Query does not contain any actions (0, 0), 3,”. I don’t see hthor_dev as an option to spray to though, just hthor_dev_eclagent.\\n\\n3.\\tWhen I try running the code for lab exercise, I get the error “Error: Object does not have a member named 'Persons' (5, 3) Error: Unknown identifier "Persons" (5,..”\\nI’ve tried running the BWR_BasicQueries.ecl and Persons.ecl files from both locally saved files in the same folder and from checked in files on a repository folder on hthor_dev and have the same problem.\", \"post_time\": \"2017-09-08 18:17:42\" },\n\t{ \"post_id\": 33523, \"topic_id\": 8783, \"forum_id\": 40, \"post_subject\": \"Podcast: Meet the trainers!\", \"username\": \"HPCC Staff\", \"post_text\": \"Want to get to know some of our trainers a little better? Watch the 10 Year anniversary podcast series featuring our training team and hear a little bit of history and what's ahead.\\n\\nRichard Taylor & Bob Foreman\\nhttps://wiki.hpccsystems.com/display/hpcc/10+Year+Anniversary+Podcast+Series#id-10YearAnniversaryPodcastSeries-trainingteam\\n\\nHugo Watanuki (& Claudio Amaral)\\nhttps://wiki.hpccsystems.com/display/hpcc/10+Year+Anniversary+Podcast+Series#id-10YearAnniversaryPodcastSeries-claudioandhugo\\n\\n\", \"post_time\": \"2021-05-14 12:27:06\" },\n\t{ \"post_id\": 3750, \"topic_id\": 843, \"forum_id\": 41, \"post_subject\": \"One method for dynamically updating superkeys\", \"username\": \"DSC\", \"post_text\": \"Cross-posted from another topic: One method for dynamically updating superkeys.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2013-03-14 19:59:17\" },\n\t{ \"post_id\": 4029, \"topic_id\": 899, \"forum_id\": 41, \"post_subject\": \"Re: Best way to trigger system commands\", \"username\": \"sbagaria\", \"post_text\": \"Noticed this was a cross-post: viewtopic.php?f=8&t=581\\n\\nAlso, the absolute path will only be needed for versions prior to 3.10.8. 3.10.8 and onwards will be able to search the standard path variable. If you are updating the doc, then please change the example to refer 'cat' instead of 'echo'.\\n\\nThanks
\", \"post_time\": \"2013-04-30 11:56:05\" },\n\t{ \"post_id\": 4028, \"topic_id\": 899, \"forum_id\": 41, \"post_subject\": \"Re: Best way to trigger system commands\", \"username\": \"bforeman\", \"post_text\": \"Thanks for the feedback, regarding the absolute path issue, ww will review the documentation and update as needed. Again, thanks very much!\\n\\nBob\", \"post_time\": \"2013-04-30 11:53:51\" },\n\t{ \"post_id\": 4027, \"topic_id\": 899, \"forum_id\": 41, \"post_subject\": \"Re: Best way to trigger system commands\", \"username\": \"sbagaria\", \"post_text\": \"As noted in the above Jira items, this is now resolved for 3.10.8.\", \"post_time\": \"2013-04-30 11:51:00\" },\n\t{ \"post_id\": 4023, \"topic_id\": 899, \"forum_id\": 41, \"post_subject\": \"Re: Best way to trigger system commands\", \"username\": \"sbagaria\", \"post_text\": \"Diagnosed the problem. Use the absolute path for the command. Also, the example you provided will not return anything as at least in Linux, echo repeats its arguments and not the standard input. cat was built to repeat standard input. So the correct example would be Std.System.Utils.CmdProcess('cat','George Jetson');\\n\\nSee issues \\nhttps://track.hpccsystems.com/browse/HPCC-9226\\nhttps://track.hpccsystems.com/browse/HPCC-9227\", \"post_time\": \"2013-04-30 00:15:01\" },\n\t{ \"post_id\": 4022, \"topic_id\": 899, \"forum_id\": 41, \"post_subject\": \"Re: Best way to trigger system commands\", \"username\": \"sbagaria\", \"post_text\": \"It's the same command as Jim posted. I tried it on 4.0.0rc5 running on Ubuntu and 3.10.4-1 running on CentOS. I will log a Jira item.\", \"post_time\": \"2013-04-29 21:51:46\" },\n\t{ \"post_id\": 4018, \"topic_id\": 899, \"forum_id\": 41, \"post_subject\": \"Re: Best way to trigger system commands\", \"username\": \"bforeman\", \"post_text\": \"There is no active issue (bug) reported for CmdProcess currently, if you are unable to use it you probably need to open an issue in the Community Issue Tracker, and include a code sample if it is reproducable.\\n\\nAre you getting this lock on the simple echo command that Jim posted, or are you trying to run something else?\\n\\nThanks,\\n\\nBob\", \"post_time\": \"2013-04-29 18:58:51\" },\n\t{ \"post_id\": 4017, \"topic_id\": 899, \"forum_id\": 41, \"post_subject\": \"Re: Best way to trigger system commands\", \"username\": \"sbagaria\", \"post_text\": \"Thanks. Unfortunately, the CmdProcess function is not working for me. The workunits do not terminate. The last line in the eclagent log says 'Obtained run lock'. Both for hthor and thor. Roxie also does not terminate but I don't have a log entry for roxie.\\n\\nPipe on the other hand works fine but it executes on the slave nodes whereas I want the command to be executed on the master node only.\", \"post_time\": \"2013-04-29 18:13:12\" },\n\t{ \"post_id\": 4016, \"topic_id\": 899, \"forum_id\": 41, \"post_subject\": \"Re: Best way to trigger system commands\", \"username\": \"JimD\", \"post_text\": \"The ECL action to trigger an event is NOTIFY. This will trigger the event that a scheduled workunit can be set to wait for. .\\n\\nYou can find more details in the ECL Scheduler guide:\\n\\nhttp://hpccsystems.com/download/docs/ecl-scheduler\\n\\n\\nThe example on page 12 of this guide shows a good example. \\n\\nTo run a system command, use the standard library function: CmdProcess\\n\\nFor example:\\n\\n
STD.System.Util.CmdProcess('echo','George Jetson');
\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2013-04-29 18:03:20\" },\n\t{ \"post_id\": 4013, \"topic_id\": 899, \"forum_id\": 41, \"post_subject\": \"Best way to trigger system commands\", \"username\": \"sbagaria\", \"post_text\": \"I want to trigger a system command on the master node at the end of a workunit. I remember seeing a function which will call a particular command on a node but I can not recollect where I saw it. Anyone remembers?\", \"post_time\": \"2013-04-29 15:31:48\" },\n\t{ \"post_id\": 4754, \"topic_id\": 1069, \"forum_id\": 41, \"post_subject\": \"HPCC Systems Wiki and Red Book\", \"username\": \"HPCC Staff\", \"post_text\": \"Check out the HPCC Systems Wiki for best practices, sample code examples and tips, as well as the HPCC Systems Red Book which contains useful information to help users manage the transition between releases.\\n\\nhttps://wiki.hpccsystems.com\", \"post_time\": \"2013-10-11 14:36:37\" },\n\t{ \"post_id\": 5355, \"topic_id\": 1230, \"forum_id\": 41, \"post_subject\": \"Build HPCCSystem Platform with CMake 2.8.12.x\", \"username\": \"ming\", \"post_text\": \"CMake 2.8.12.0-2 introduced a bug for install path with '@'.\\nPrevious CMake versions do not have this issue.\\nWe already opened a bug report for CMake: http://www.cmake.org/Bug/view.php?id=14782#c35227. It is accepted and assigned and should be in future CMake release or service pack.\\n\\nIf you have CMake 2.8.12.x and want to compile HPCC Platform before CMake's official fix here is a work-around:\\n\\n1) Open <CMake Home>/Modules/CPackRPM.cmake
\\n2) comment out around lines 881: \\n set(CPACK_RPM_INSTALL_FILES_LIST "${CPACK_RPM_INSTALL_FILES}")\\n set(PROTECTED_AT "@")\\n string(REPLACE "@" "\\\\@PROTECTED_AT\\\\@" CPACK_RPM_INSTALL_FILES "${CPACK_RPM_INSTALL_FILES_LIST}")\\n set(CPACK_RPM_INSTALL_FILES_LIST "")\\n
\\n3) comment out around lines 992\\n unset(PROTECTED_AT)\\n
\", \"post_time\": \"2014-03-07 14:41:55\" },\n\t{ \"post_id\": 24743, \"topic_id\": 1486, \"forum_id\": 41, \"post_subject\": \"Re: Returning multiple values from a function\", \"username\": \"yugdewalkar\", \"post_text\": \"Thanks for this valuable tip. It will be great help for my project.\", \"post_time\": \"2019-03-06 04:51:51\" },\n\t{ \"post_id\": 24023, \"topic_id\": 1486, \"forum_id\": 41, \"post_subject\": \"Re: Returning multiple values from a function\", \"username\": \"sarahah\", \"post_text\": \"This post is useful to me as I was also returning multiple values from a function but was not able to do it.\", \"post_time\": \"2019-01-16 06:34:33\" },\n\t{ \"post_id\": 23893, \"topic_id\": 1486, \"forum_id\": 41, \"post_subject\": \"Re: Returning multiple values from a function\", \"username\": \"Allan\", \"post_text\": \"Hi Richard / All,\\n\\nI've found this:\\n\\nRETURN MODULE\\n EXPORT Results := h.SomeData;\\n EXPORT Report := h.ReportOnSaidData;\\nEND;\\n
\\nparticularly useful.\\nWhere I'm returning results, but also have to tie a report to those same results.\\n\\nYours\\nAllan\", \"post_time\": \"2019-01-02 13:08:36\" },\n\t{ \"post_id\": 6491, \"topic_id\": 1486, \"forum_id\": 41, \"post_subject\": \"Re: Returning multiple values from a function\", \"username\": \"rtaylor\", \"post_text\": \"Dan,\\n\\nThanks for bringing this up. It's one of those "little" things in ECL that can easily get overlooked, but can be quite useful. \\n\\nRichard\", \"post_time\": \"2014-10-22 14:13:12\" },\n\t{ \"post_id\": 6490, \"topic_id\": 1486, \"forum_id\": 41, \"post_subject\": \"Returning multiple values from a function\", \"username\": \"DSC\", \"post_text\": \"I just learned something today that is really neat and elegant. If you need to return multiple values from a function, simply wrap the values in a MODULE and make each value an exported attribute, then return the module.\\n\\nHere is an example, which is the code from Std.Date.ToGregorianYMD() in the 5.0.0 version of the standard library:\\n\\n
EXPORT ToGregorianYMD(Days_t days) := FUNCTION\\n //See Fliegel and van Flandern (1968) and other quoted sources (e.g., http://www.ortelius.de/kalender/calc_en.php)\\n //Process as 4, 100 and 400 year cycles.\\n daysIn4Years := 3*365+366;\\n daysIn100Years := 25*daysIn4Years-1;\\n daysIn400Years := 4*daysIn100Years+1;\\n\\n //Calulate days in each of the cycles.\\n adjustedDays := days - GregorianDateOrigin;\\n num400Years := adjustedDays div daysIn400Years;\\n rem400Years := adjustedDays % daysIn400Years;\\n\\n num100Years := ((rem400Years div daysIn100Years + 1) * 3) DIV 4;\\n rem100Years := rem400Years - num100Years * daysIn100Years;\\n\\n num4Years := rem100Years div daysIn4Years;\\n rem4Years := rem100Years % daysIn4Years;\\n\\n years := ((rem4Years div 365 + 1) * 3) DIV 4;\\n numdays := rem4Years - years * 365;\\n\\n //Now calculate the actual year, month day\\n y := num400Years * 400 + num100Years * 100 + num4Years * 4 + years;\\n m := (numdays * 5 + 308) div 153 - 2;\\n d := numdays - (m + 4) * 153 div 5 + 122;\\n result := MODULE\\n EXPORT year := (y + (m + 2) div 12) - YearDelta;\\n EXPORT month := (m + 2) % 12 + 1;\\n EXPORT day := d + 1;\\n END;\\n return result;\\nEND;
\\nThe example from the documentation illustrates how you use this:\\n\\nIMPORT STD;\\nINTEGER2 MyYear := 2012;\\nUNSIGNED1 MyMonth := 1;\\nUNSIGNED1 MyDay := 1;\\nJ := STD.Date.FromGregorianYMD(MyYear,MyMonth,MyDay);\\n //J contains 734503\\nY := STD.Date.ToGregorianYMD(J).Year; //Y contains 2012\\nM := STD.Date.ToGregorianYMD(J).Month; //M contains 1\\nD := STD.Date.ToGregorianYMD(J).Day; //D contains 1
\\nPretty neat.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2014-10-22 14:08:12\" },\n\t{ \"post_id\": 15593, \"topic_id\": 1509, \"forum_id\": 41, \"post_subject\": \"Re: Uses, abuses and internals of the EMBED feature\", \"username\": \"nawazkhan\", \"post_text\": \"It is so informative and the attached video is not able to open. can any one give more details about this discussion like how we can create context and invoking python function.\\n\\nthanks in advance.\", \"post_time\": \"2017-03-06 13:08:00\" },\n\t{ \"post_id\": 6593, \"topic_id\": 1509, \"forum_id\": 41, \"post_subject\": \"Uses, abuses and internals of the EMBED feature\", \"username\": \"LAChapman\", \"post_text\": \"In HPCC Systems release 5.0, we added SqLite and MySQL to the list of languages supported which already included C++, Java, Python, Javascript and R. We have also now extended the functionality of the EMBED feature to include streaming. \\n\\nGetting started with the basics\\nLet's look at how to use the EMBED syntax starting with the basics. The first thing to do is IMPORT the plugin for the language you want to embed. To use EMBED, declare a function (typically with parameters) and then for the body of the function where normally the ECL would go, simply type the code you want to embed in between the EMBED and ENDEMBED.\\n \\nThe following code illustrates a simple call to Python using the EMBED syntax to call the split function on the string that is passed to it and it will return a list corresponding to a set of string outputting ‘Once upon a time’ as separate strings:\\n\\nIMPORT python;\\nSET OF STRING split(STRING text) := EMBED(python)\\n return text.split()\\nENDEMBED;\\t\\nsplit('Once upon a time');
\\nThe use of IMPORT is similar to embedding, but it has no EMBED body. In this case, you would simply give the name of an external function you want to call. In the following example, ex2 is the name of the module and the tag is the name of the function in that module to be called. The IMPORT statement replaces the EMBED body. Note that the IMPORT keyword here should not be confused with the use of IMPORT to import other ECL modules; while their purposes are a little related, the syntax and usage are completely different.\\n\\nIn this example we are passing in a string and returning a dataset rather than a list:\\n\\nIMPORT python;\\nr := RECORD\\n STRING word;\\n UTF8 tags;\\nEND;\\nDATASET(R) tag(STRING text) := IMPORT(python, './ex2.tag');\\ntag('Once upon a time there was a boy called Richard');
\\nIt calls the following Python code (in ex2.py) which imports the Natural Language Toolkit (NLP processing tool written in Python) to assign a grammar tag to the different parts of the sentence shown above:\\n\\nimport nltk\\ntokenizer = None\\ntagger = None\\ndef init_nltk():\\n global tokenizer, tagger\\n tokenizer = nltk.tokenize.RegexpTokenizer(r'\\\\w+|[^\\\\w\\\\s]+')\\n tagger = nltk.UnigramTagger(nltk.corpus.brown.tagged_sents())\\ndef tag(text):\\n global tokenizer, tagger\\n if not tokenizer:\\n init_nltk()\\n tokenized = tokenizer.tokenize(text)\\n return tagger.tag(tokenized)
\\nThe result shows the sentence by word showing the grammar tag assigned by the Python Natural Language Toolkit:\\n\\nOnce,RB\\nupon,IN\\na,AT\\ntime,NN\\nthere,EX\\nwas,BEDZ\\na,AT\\nboy,NN\\ncalled,VBN\\nRichard,NP
\\nWhether ECL supports EMBED, IMPORT, or both depends on the target language, for example, Python supports both, but most target languages only support one or the other.\\n\\nNow let’s look at embedding Java. Java is slightly harder to call because it needs to be told the name of the function and also the signature of it. This is because, unlike Python, Java has function overloading. The rules for writing Java function signatures are not especially complex, but the simplest way to determine them is to use the javap tool (part of the standard Java toolset). Here is an example of how to get the signatures out easily showing the java function signature and the types of parameters and results. \\n\\nGiven the following java code: \\n\\nimport java.util.*;\\npublic class JavaCat\\n{\\n public static String cat(String a, String b)\\n {\\n return a + b;\\n }\\n}
\\nCompiled using the following command:\\n\\n$ javac JavaCat.java
\\nYou can use javap to report the signatures: \\n\\n$ javap -s JavaCat\\nCompiled from "JavaCat.java"\\npublic class JavaCat {\\npublic static java.lang.String cat(java.lang.String, java.lang.String);\\n Signature: (Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;\\n}
\\nYou then simply find the signature you want, paste it into your ECL code and call it:\\n\\nIMPORT java;\\nSTRING jcat(STRING a, STRING b) := \\n IMPORT(java,\\n 'JavaCat.cat:(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;' : classpath('/opt/HPCCSystems/classes'));\\n\\njcat('Hello ', 'world!');
\\nThe second IMPORT has the name of the function with a colon and the name of the signature. You can use a colon with attributes after it to pass additional information to the plugin – here it is used to set the Java classpath. This example calls a simple Java function which concatenates two strings and returns the result.\\n\\nGetting the most out of using EMBED as an advanced user\\nSo now you’ve seen basic usage of the EMBED syntax, but there is much more you can do as an advanced user depending on the language you are using.\\n\\nPassing/returning records and datasets is significantly improved in HPCC Systems 5.0. Previously, in release 4.x you could only pass/return scalars and SETs. If you wanted to return a list or to pass in a large quantity of data, you had to turn it into a scalar first which could be awkward and inefficient. Using 5.0, you can also pass and return records and datasets. The details of exactly how datasets and records in ECL correspond to the target language features vary for each target. See the table below:\\n\\n[attachment=2:rf5vhtu4]Records%20and%20Datasets.png\\nA note about Python generators...\\n\\nUsing a generator in Python is effectively the same as using lazy evaluation. So for example, if you pass in a dataset with a billion rows to some python embedded code designed to return the 3rd field of the 1st record, only the first record is evaluated and the rest are not fetched at all. This is similar to using a choosen(1) in ECL. You can return a generator from Python taking advantage of the same lazy evaluation or simply return a standard Python list if lazy evaluation is not important.\\n\\nNew in 5.0 – Streaming data\\nUsers of HPCC Systems 5.x can take advantage of the ability to stream data. However, be aware that:\\n\\n
\\nSince streaming varies between languages, use the following table to guide you:\\n\\n[attachment=1:rf5vhtu4]Streaming Data.png\\nUsing Transforms\\nTransforms can also be used with the EMBED syntax bearing in mind that:\\n\\n\\n
\\nLet’s look at an example of advanced use of the EMBED syntax. The following example creates a “keyed join” to MySQL. Data is coming in, represented by indata and is being passed into the Join function which is doing a SELECT from a MySQL table. A record is being passed in and the project is saying call the EMBED function is called for every record in indata and eventually one record will be selected and returned from the table:\\n\\nIMPORT MySQL;\\nstringrec := RECORD\\n string name\\nEND;\\nsqlrec := RECORD\\n string ssn;\\n string address;\\nEND;\\nRECORD(sqlrec) MySQLJoin(RECORD(stringrec) inrec) := EMBED(mysql)\\n SELECT FIRST 1 * from tbl1 where name = ?;\\nENDEMBED;\\nPROJECT(indata, MySQLJoin(LEFT));\\n
\\nA better way of achieving the same thing is to pass the whole dataset in and let the embedded plugin call the select for every record in the input. This is better because you can batch them up into a single transaction on the SQL which may be interesting for the update case and cannot be done using the PROJECT.\\n \\nAlso, the previous example returns a single record so it only works for a one to one join whereas in the following example, a given record of the input may actually return more than one match on the output side so it is a one to many left inner join: \\n\\nIMPORT MySQL;\\nstringrec := RECORD\\n string name\\nEND;\\nsqlrec := RECORD\\n string ssn;\\n string address;\\nEND;\\nDATASET(sqlrec) MySQLJoin(dataset(stringrec) inrecs) := EMBED(mysql)\\n SELECT * from tbl1 where name = ?;\\nENDEMBED;\\nMySQLJoin(indata);
\\nSo now you know how to use it, but before you do, be aware of the wider implications of using the EMBED syntax as there are a number of points to consider:\\n\\n[attachment=0:rf5vhtu4]Implications to Consider.png\\nWhat not to do…\\nThere is quite an overhead in using the call to EMBED. So use it to do something significant and not to do something you could have done with a couple of lines of ECL. In other words:\\n\\n
\\nThe only exception to this is C++. Since ECL is translated into C++, there is no overhead in calling C++ and there are some things that are more efficiently done in C++. String manipulation, for example, is often easier to code using a procedural paradigm.\\n\\nGo ahead - Contribute to HPCC Systems by implementing a new embedded language plugin \\nThe embedded language features are implemented as plugins which makes it easier to add new ones. A function call uses ECL record metadata to walk records in datasets:\\n\\n
\\nIf there is a language you want to use one that is not currently implemented, the answer is simple. Make an embedded language plugin yourself and check it in. We will review it and include it in the next major release.\\n\\nThis information is based on a presentation given by Richard Chapman, Vice President of Research and Development, recorded at the 2014 HPCC Systems Engineering Summit. The full recording of this presentation is available on YouTube: https://www.youtube.com/watch?v=ESXMcrNiXhQ&list=UUmySfVDlEUzlIiIdDc7oQbQ\", \"post_time\": \"2014-11-13 13:59:20\" },\n\t{ \"post_id\": 23013, \"topic_id\": 2200, \"forum_id\": 41, \"post_subject\": \"Re: Generate a Reverse Polish Stack\", \"username\": \"Allan\", \"post_text\": \"From comments back, this version replaces the output numeric operators with readable text:\\n\\nEXPORT GenerateReversePolishStack(STRING Text) := FUNCTION\\n\\n /*\\n Generates A reverse Polich Stack from text with operands of the form:\\n \\n operand "qualifier"\\n \\n And the operators (in order of precedence): NOT, AND, OR or XOR\\n Brackets can be used to enforce a different precedence.\\n \\n e.g.\\n \\n A"q1 with q4" AND (B"q2" OR C"q3")\\n \\n Generates output of the form:\\n \\n 1 S7037 A q1 with q4\\n 1 S6142 B q2\\n 1 S1873 C q3\\n 2 S8633 S6142 S1873\\n 3 S7435 S7037 S8633\\n \\n Returns an empty set if there is a syntax error.\\n */\\n infile := DATASET(ROW(transform({ string line }, self.line := Text)));\\n\\n ActionType := ENUM(UNSIGNED1,None,LogicalOr,LogicalAnd,LogicalXor,LogicalNot);\\n SET OF STRING ActionText := ['','Or','And','Xor','Not'];\\n\\n Symbol := RECORD\\n STRING Action;\\n STRING5 id;\\n STRING key;\\n STRING Qualifier;\\n END;\\n\\n Production := RECORD\\n DATASET(Symbol) itm;\\n END;\\n\\n TYPEOF(Symbol.id) GetID := 'S'+INTFORMAT(HASH32(STD.System.Util.GetUniqueInteger(),RANDOM())%10000,SIZEOF(Symbol.id)-1,1);\\n\\n PRULE := RULE TYPE (Production);\\n\\n PATTERN ws := PATTERN('[[:space:]]');\\n TOKEN wordpat := PATTERN('[A-Za-z][A-Za-z0-9_]*');\\n PATTERN firstchar := PATTERN('[[:alnum:]\\\\'*&%!~#;:@?<>=+\\\\\\\\-_(){},.[\\\\\\\\]]');\\n PATTERN subsequent := firstchar | ws;\\n PATTERN anything := firstchar+subsequent*;\\n PATTERN quotechar := '"';\\n TOKEN quotedword := quotechar anything quotechar;\\n\\n PRULE forwardExpr := USE(Production, 'ExpressionRule');\\n\\n PRULE op\\n := wordpat quotedword TRANSFORM(Production,\\n SELF.itm := ROW({ActionText[ActionType.None],GetID,$1,$2[2..length($2)-1]},Symbol);\\n )\\n | '(' forwardExpr ')'\\n | 'NOT' wordpat quotedword TRANSFORM(Production,\\n SELF.itm := ROW({ActionText[ActionType.LogicalNot],GetID,$2,$3[2..length($3)-1]},Symbol);\\n )\\n | 'NOT' '(' forwardExpr ')' TRANSFORM(Production,\\n SELF.itm := $3.itm & ROW({ActionText[ActionType.LogicalNot],GetID,$3.itm[COUNT($3.itm)].id,''},Symbol);\\n )\\n ;\\n PRULE factor\\n := op\\n | SELF 'AND' op TRANSFORM(Production,\\n SELF.itm := $1.itm & $3.itm & ROW({ActionText[ActionType.LogicalAnd],GetID,$1.itm[COUNT($1.itm)].id,$3.itm[COUNT($3.itm)].id},Symbol)\\n )\\n ;\\n PRULE term\\n := factor\\n | SELF 'OR' factor TRANSFORM(Production,\\n SELF.itm := $1.itm & $3.itm & ROW({ActionText[ActionType.LogicalOr] ,GetID,$1.itm[COUNT($1.itm)].id,$3.itm[COUNT($3.itm)].id},Symbol)\\n )\\n | SELF 'XOR' factor TRANSFORM(Production,\\n SELF.itm := $1.itm & $3.itm & ROW({ActionText[ActionType.LogicalXor],GetID,$1.itm[COUNT($1.itm)].id,$3.itm[COUNT($3.itm)].id},Symbol)\\n )\\n ; \\n PRULE expr\\n := term : DEFINE ('ExpressionRule');\\n\\n p1 := PARSE(infile,line,expr,TRANSFORM(Production,SELF := $1),FIRST,WHOLE,SKIP(ws+),NOCASE,PARSE);\\n n1 := NORMALIZE(p1,LEFT.itm,TRANSFORM(Symbol,SELF := RIGHT)) : INDEPENDENT;\\n\\n RETURN n1;\\n\\nEND;\\n
\", \"post_time\": \"2018-09-13 10:23:04\" },\n\t{ \"post_id\": 20423, \"topic_id\": 2200, \"forum_id\": 41, \"post_subject\": \"Re: Generate a Reverse Polish Stack\", \"username\": \"Allan\", \"post_text\": \"There is now STD.System.Util.GetUniqueInteger() which will eliminate any possibility of a clash in generated Identifier names.\\n\\nIMPORT STD;\\nRID := {UNSIGNED8 id};\\nRID GetId(RID L) := TRANSFORM\\n SELF.id := STD.System.Util.GetUniqueInteger();\\nEND;\\n\\nn := NORMALIZE(DATASET([{0}],RID),2000,GetId(LEFT));\\n\\nRT := RECORD\\n UNSIGNED8 id := n.id;\\n UNSIGNED8 cnt := COUNT(GROUP);\\nEND;\\n\\nTABLE(n,RT,id)(cnt > 1);\\n
\", \"post_time\": \"2018-01-10 10:41:04\" },\n\t{ \"post_id\": 15893, \"topic_id\": 2200, \"forum_id\": 41, \"post_subject\": \"Re: Generate a Reverse Polish Stack\", \"username\": \"Allan\", \"post_text\": \"Hi jwit,\\n\\nYes this is working fine for me/us. The MOD 10000 has not throw up a clash and generated non-unique id's. (so far).\\n\\nIf you're concerned you could always increase the MOD to reduce the likelihood further, but you do have a valid point. Perhaps a datetime component could be added to the construction to generate Id's of the form S20170101084522_4976, but even then there is a very small chance of a clash. I would like to know of a fall proof way of generating unique ID's. So I'll watch this space.\\n\\nCheers\\n\\nAllan\", \"post_time\": \"2017-03-17 08:08:56\" },\n\t{ \"post_id\": 15883, \"topic_id\": 2200, \"forum_id\": 41, \"post_subject\": \"Re: Generate a Reverse Polish Stack\", \"username\": \"jwilt\", \"post_text\": \"Great post. Thanks, Allan.\\n\\nAllan's algorithm is using this technique to generate a unique ID for tokens:\\nTYPEOF(Symbol.id) GetID := 'S'+INTFORMAT(HASH32(STD.System.Util.GetUniqueInteger(),RANDOM())%10000,SIZEOF(Symbol.id)-1,1);\\n\\nAllan - did this work sufficiently for your purposes?\\n\\nOthers - is there another way to do this? I.e., to mark each token found with a unique ID. \\n\\nThanks.\", \"post_time\": \"2017-03-16 20:34:07\" },\n\t{ \"post_id\": 9352, \"topic_id\": 2200, \"forum_id\": 41, \"post_subject\": \"Generate a Reverse Polish Stack\", \"username\": \"Allan\", \"post_text\": \"Had to implement this for an internal product but then thought it could be useful for others:\\n\\nEXPORT GenerateReversePolishStack(STRING Text) := FUNCTION\\n\\n /*\\n Generates A reverse Polich Stack from text with operands of the form:\\n \\n operand "qualifier"\\n \\n And the operators (in order of precedence): NOT, AND, OR or XOR\\n Brackets can be used to enforce a different precedence.\\n \\n e.g.\\n \\n A"q1 with q4" AND (B"q2" OR C"q3")\\n \\n Generates output of the form:\\n \\n 1 S7037 A q1 with q4\\n 1 S6142 B q2\\n 1 S1873 C q3\\n 2 S8633 S6142 S1873\\n 3 S7435 S7037 S8633\\n \\n Returns an empty set if there is a syntax error.\\n */\\n infile := DATASET(ROW(transform({ string line }, self.line := Text)));\\n\\n ActionType := ENUM(UNSIGNED1,None,LogicalOr,LogicalAnd,LogicalXor,LogicalNot);\\n\\n Symbol := RECORD\\n ActionType Action;\\n STRING5 id;\\n STRING key;\\n STRING Qualifier;\\n END;\\n\\n Production := RECORD\\n DATASET(Symbol) itm;\\n END;\\n\\n TYPEOF(Symbol.id) GetID := 'S'+INTFORMAT(HASH32(STD.System.Util.GetUniqueInteger(),RANDOM())%10000,SIZEOF(Symbol.id)-1,1);\\n\\n PRULE := RULE TYPE (Production);\\n\\n PATTERN ws := PATTERN('[[:space:]]');\\n TOKEN wordpat := PATTERN('[A-Za-z][A-Za-z0-9_]*');\\n PATTERN firstchar := PATTERN('[[:alnum:]\\\\'*&%!~#;:@?<>=+\\\\\\\\-_(){},.[\\\\\\\\]]');\\n PATTERN subsequent := firstchar | ws;\\n PATTERN anything := firstchar+subsequent*;\\n PATTERN quotechar := '"';\\n TOKEN quotedword := quotechar anything quotechar;\\n\\n PRULE forwardExpr := USE(Production, 'ExpressionRule');\\n\\n PRULE op\\n := wordpat quotedword TRANSFORM(Production,\\n SELF.itm := ROW({ActionType.None,GetID,$1,$2[2..length($2)-1]},Symbol);\\n )\\n | '(' forwardExpr ')'\\n | 'NOT' wordpat quotedword TRANSFORM(Production,\\n SELF.itm := ROW({ActionType.LogicalNot,GetID,$2,$3[2..length($3)-1]},Symbol);\\n )\\n | 'NOT' '(' forwardExpr ')' TRANSFORM(Production,\\n SELF.itm := $3.itm & ROW({ActionType.LogicalNot,GetID,$3.itm[COUNT($3.itm)].id,''},Symbol);\\n )\\n ;\\n PRULE factor\\n := op\\n | SELF 'AND' op TRANSFORM(Production,\\n SELF.itm := $1.itm & $3.itm & ROW({ActionType.LogicalAnd,GetID,$1.itm[COUNT($1.itm)].id,$3.itm[COUNT($3.itm)].id},Symbol)\\n )\\n ;\\n PRULE term\\n := factor\\n | SELF 'OR' factor TRANSFORM(Production,\\n SELF.itm := $1.itm & $3.itm & ROW({ActionType.LogicalOr ,GetID,$1.itm[COUNT($1.itm)].id,$3.itm[COUNT($3.itm)].id},Symbol)\\n )\\n | SELF 'XOR' factor TRANSFORM(Production,\\n SELF.itm := $1.itm & $3.itm & ROW({ActionType.LogicalXor,GetID,$1.itm[COUNT($1.itm)].id,$3.itm[COUNT($3.itm)].id},Symbol)\\n )\\n ; \\n PRULE expr\\n := term : DEFINE ('ExpressionRule');\\n\\n p1 := PARSE(infile,line,expr,TRANSFORM(Production,SELF := $1),FIRST,WHOLE,SKIP(ws+),NOCASE,PARSE);\\n n1 := NORMALIZE(p1,LEFT.itm,TRANSFORM(Symbol,SELF := RIGHT)) : INDEPENDENT;\\n\\n RETURN n1;\\n\\nEND;\\n
\", \"post_time\": \"2016-03-17 14:20:36\" },\n\t{ \"post_id\": 16901, \"topic_id\": 3643, \"forum_id\": 41, \"post_subject\": \"Re: Converting String to Date type\", \"username\": \"Rony Albert\", \"post_text\": \"[quote="rtaylor":x9awx12s]Janet,\\n\\nThis code works for me: date := '3/20/01 2:27 AM';\\nStd.Date.ConvertDateFormat(date, '%m/%d/%y', '%Y%m%d');
I think you were "confusing" the function with the time formatting characters when you're really only interested in the date.\\n\\nHTH,\\n\\nRichard\\n\\nNow it is understood thank you so much for helping me to get through it!\\n[size=1:x9awx12s][color=transparent:x9awx12s]tachophobia\\n[color=transparent:x9awx12s]ipledges\", \"post_time\": \"2017-05-01 07:05:00\" },\n\t{ \"post_id\": 16423, \"topic_id\": 3643, \"forum_id\": 41, \"post_subject\": \"Re: Converting String to Date type\", \"username\": \"rtaylor\", \"post_text\": \"Janet,\\n\\nThis code works for me: date := '3/20/01 2:27 AM';\\nStd.Date.ConvertDateFormat(date, '%m/%d/%y', '%Y%m%d');
I think you were "confusing" the function with the time formatting characters when you're really only interested in the date.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2017-04-12 19:20:38\" },\n\t{ \"post_id\": 16343, \"topic_id\": 3643, \"forum_id\": 41, \"post_subject\": \"Re: Converting String to Date type\", \"username\": \"Rony Albert\", \"post_text\": \"I am confused too in order to get it done!\\n[size=1:11lvjsuh][color=transparent:11lvjsuh]subchorionichemorrhage[color=transparent:11lvjsuh]health care tips\", \"post_time\": \"2017-04-11 23:34:51\" },\n\t{ \"post_id\": 16233, \"topic_id\": 3643, \"forum_id\": 41, \"post_subject\": \"Re: Converting String to Date type\", \"username\": \"janet.anderson\", \"post_text\": \"I ran across your post when trying to do a date conversion. I was able to use the function you demonstrate above to get my date conversion to work, but I am confused why my first attempt (code below) did not work? I tried it with a couple of different input formats with no success, so what I am misunderstanding about ConvertDateFormat?\\n\\n\\ndate := '3/20/01 2:27 AM';\\nStd.Date.ConvertDateFormat(date, '%m/%d/%y %I:%M %p', '%Y%m%d');\\n
\", \"post_time\": \"2017-04-10 17:15:50\" },\n\t{ \"post_id\": 14823, \"topic_id\": 3643, \"forum_id\": 41, \"post_subject\": \"Converting String to Date type\", \"username\": \"rtaylor\", \"post_text\": \"I got an email with this request:I need to convert incoming date like “Tue Jul 20 00:00:00 EDT 2010” into date type.
So here's the code I wrote to do that:IMPORT Std;\\nParseDate(STRING28 d) := FUNCTION\\n SetParts := Std.Str.SplitWords(d,' ');\\n str := SetParts[2] + ' ' + SetParts[3] + ' ' + SetParts[6];\\n fmt :='%b%t%d%t%Y';\\n RtnDate := STD.Date.FromStringToDate(str, fmt);\\n RETURN RtnDate;\\nEND;
This simply makes use of the Std.Str.SplitWords() function to parse out the discrete parts from the input date string which the STD.Date.FromStringToDate() function can then convert to the Date_t (AKA - UNSIGNED4) integer format.\\n\\nInDate := 'Tue Jul 20 00:00:00 EDT 2010';\\nParseDate(InDate); //returns an integer value of 20100720
\\nAs useful as this function is, the principle it demonstrates shows how easy it is to use the Date Standard Library functions to manipulate date/time values from one format to another. For example, this next version just stretches the function to also return the time:IMPORT Std;\\nParseDateTime(STRING28 d) := FUNCTION\\n SetParts := Std.Str.SplitWords(d,' ');\\n DateStr := SetParts[2] + ' ' + SetParts[3] + ' ' + SetParts[6];\\n DateFmt :='%b%t%d%t%Y';\\n TimeFmt :='%H:%M:%S';\\n RETURN MODULE\\n EXPORT Date := STD.Date.FromStringToDate(DateStr, DateFmt);\\n EXPORT Time := STD.Date.FromStringToTime(SetParts[4], TimeFmt);\\n END;\\nEND; \\n\\nInDate := 'Tue Jul 20 12:32:45 EDT 2010';\\nParseDateTime(InDate).Date; //returns 20100720\\nParseDateTime(InDate).Time; //returns 123245
\\nHTH, \\n\\nRichard\", \"post_time\": \"2017-01-23 20:27:05\" },\n\t{ \"post_id\": 22223, \"topic_id\": 5673, \"forum_id\": 41, \"post_subject\": \"Re: Generate SQL INSERT statements from a DATASET\", \"username\": \"Allan\", \"post_text\": \"Richard,\\n\\nThanks very much for this correction, will take on-board.\\n\\nI must admit it's reassuring we have You and Bob + others as backstop reviewers!\\n\\nYours\\nAllan\", \"post_time\": \"2018-06-26 13:40:03\" },\n\t{ \"post_id\": 22193, \"topic_id\": 5673, \"forum_id\": 41, \"post_subject\": \"Re: Generate SQL INSERT statements from a DATASET\", \"username\": \"rtaylor\", \"post_text\": \"Allan,\\n\\nCool code! \\n\\nI had to change lines 67 & 68 to eliminate this error: \\n"Error: WHEN must be used to associate an action with a definition (68, 12)"\\n
TooBig := ASSERT(COUNT(ds) < 1000,\\n 'DATASET too big -- you really should consider bulk insert.');\\n RETURN WHEN(PROJECT(ds,MakeSQLStatement(LEFT)),TooBig);\\n
So, I have to assume you're using "legacy" in your environment, otherwise you would also be getting this error on syntax check. So for anybody outside LNRS, they would also need to make this change.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2018-06-25 15:13:46\" },\n\t{ \"post_id\": 22173, \"topic_id\": 5673, \"forum_id\": 41, \"post_subject\": \"Generate SQL INSERT statements from a DATASET\", \"username\": \"Allan\", \"post_text\": \"Hi,\\n\\nI've just had to create a load of SQL INSERT statements from an ECL result (DATASET), thought it might come in handy for someone else.\\nIt does not cope with child datasets, SET OF or DATA field types, but does escape quotes and '\\\\'. Warns of DATASET is really too large to be used in this way, and suggests bulk inserts be done instead. I've also appended some example uses.\\n\\nIMPORT STD;\\n\\n/*\\n Constructs MySql INSERT statments from a DATASET input.\\n Does not cope with:\\n Child datasets\\n DATA fields\\n SET OF fields\\n*/\\nSHARED MAC_content(reference,lay) := MACRO\\n #UNIQUENAME(attrib)\\n #SET(attrib,'')\\n #UNIQUENAME(sep)\\n #SET(sep,'')\\n #UNIQUENAME(out)\\n #EXPORTXML(out, lay)\\n #FOR (out)\\n #FOR (Field)\\n #IF(REGEXFIND('table',%'{@type}'%))\\n #ERROR('Unable to process child datasets.')\\n #ELSEIF(REGEXFIND('data|set of',%'{@type}'%))\\n #ERROR('Unable to process '+%'{@type}'%)\\n #ELSEIF(REGEXFIND('boolean',%'{@type}'%))\\n #APPEND(attrib,%'sep'%+'IF('+#TEXT(reference)+'.'+%'{@label}'%+',\\\\'TRUE\\\\',\\\\'FALSE\\\\')') \\n #ELSEIF(REGEXFIND('decimal|integer|real|unsigned',%'{@type}'%))\\n #APPEND(attrib,%'sep'%+#TEXT(reference)+'.'+%'{@label}'%)\\n #ELSE\\n #APPEND(attrib,%'sep'%+'\\\\'\\\\\\\\\\\\'\\\\'+f((STRING)'+#TEXT(reference)+'.'+%'{@label}'%+')+\\\\'\\\\\\\\\\\\'\\\\'')\\n #END\\n #SET(sep,'+ \\\\',\\\\'+')\\n #END\\n #END\\n %attrib%\\nENDMACRO;\\n\\nSHARED MAC_makeFieldListFromLayout(lay) := MACRO\\n #UNIQUENAME(attrib)\\n #SET(attrib,'')\\n #UNIQUENAME(sep)\\n #SET(sep,'(')\\n #UNIQUENAME(out)\\n #EXPORTXML(out, lay)\\n #FOR (out)\\n #FOR (Field)\\n #APPEND(attrib,%'sep'%+%'{@label}'%)\\n #SET(sep,',')\\n #END\\n #END\\n %'attrib'%+')'\\nENDMACRO;\\n\\n\\nEXPORT MAC_GenerateSQLStatementsFromDATASET(ds,TgtSQLTable) := FUNCTIONMACRO\\n\\n\\n LOCAL f(STRING txt) := REGEXREPLACE('(\\\\'|"|\\\\\\\\\\\\\\\\)',txt,'\\\\\\\\\\\\\\\\\\\\\\\\1'); // For MySql interpretor, escapes any quote delimiters and the escape character itself that is inside any STRING fields.\\n\\n {STRING SQLStatement} MakeSQLStatement(RECORDOF(ds) L) := TRANSFORM\\n\\n SELF.SQLStatement := 'INSERT INTO '+ TgtSQLTable +' '\\n + MAC_makeFieldListFromLayout(RECORDOF(ds))\\n + ' VALUES ('\\n + MAC_content(L,RECORDOF(ds))\\n +');';\\n END;\\n\\n ASSERT(COUNT(ds) < 1000,'DATASET of a size that you really should consider bulk insert.');\\n RETURN PROJECT(ds,MakeSQLStatement(LEFT));\\n\\nENDMACRO;\\n\\n\\n////////////////////////\\n// Some Unit test cases:\\n////////////////////////\\n\\n// Test 1: Must fail indicating child datasets cannot be processed. But can process a child dataset from a de-referenced parent.\\nd1 := DATASET([{'ab\\\\\\\\c'}],{STRING txt});\\nd2 := DATASET([{'de\\\\'f'},{'gh\\\\"i'}],{STRING txt});\\ndt1 := DATASET([{FALSE,'',1.1,88.4,1,1.4,'',d1},{TRUE,'',1.2,99.52,2,2.5,'',d2},{FALSE,'',1.3,75.754,3,3.6,'',d1+d2}],{BOOLEAN b,QSTRING5 qs5,DECIMAL2 d2, UDECIMAL3 ud3,INTEGER5 id,REAL4 r4,UTF8 ut8,DATASET(RECORDOF(d1)) child});\\n//MAC_GenerateSQLStatementsFromDATASET(dt1,'Schema.TestA1');\\n//MAC_GenerateSQLStatementsFromDATASET(dt1.Child,'Schema.TestA2');\\n\\n\\n// Test 2: Must fail indicating DATA types cannot be processed\\ndt2 := DATASET([{'ab\\\\'c',1.1,88.4,1,1.4,'CRUMP BILL',x'ffed344a'},{' hhtryf',1.2,99.52,2,2.5,'',x'ffed344a'},{'treSDE',1.3,75.754,3,3.6,'ZASE',x'0011223344aa'}],{QSTRING5 qs5,DECIMAL2 d2, UDECIMAL3 ud3,INTEGER5 id,REAL4 r4,UTF8 ut8,DATA da1});\\n//MAC_GenerateSQLStatementsFromDATASET(dt2,'Schema.TestB1');\\n\\n// Test 3: Must fail indicating 'SET OF' types cannot be processed\\ndt3 := DATASET([{'ab\\\\'c',1.1,88.4,1,1.4,'CRUMP BILL',[1,2,3,4]},{' hhtryf',1.2,99.52,2,2.5,'',[1,2,3,4]},{'treSDE',1.3,75.754,3,3.6,'ZASE',[1,2,3,4]}],{QSTRING5 qs5,DECIMAL2 d2, UDECIMAL3 ud3,INTEGER5 id,REAL4 r4,UTF8 ut8,SET OF UNSIGNED sou});\\n//MAC_GenerateSQLStatementsFromDATASET(dt3,'Schema.TestC1');\\n\\n// Test 4: Success, must not fail.\\ndt4 := DATASET([{TRUE,'ab\\\\'c',1.1,88.4,1,1.4,'CRUMP BILL'},{FALSE,' hhtryf',1.2,99.52,2,2.5,''},{FALSE,'treSDE',1.3,75.754,3,3.6,'ZASE'}],{BOOLEAN b,QSTRING5 qs5,DECIMAL2 d2, UDECIMAL3 ud3,INTEGER5 id,REAL4 r4,UTF8 ut8});\\n//MAC_GenerateSQLStatementsFromDATASET(dt4,'Schema.TestD1');\\n
\", \"post_time\": \"2018-06-25 07:28:31\" },\n\t{ \"post_id\": 22523, \"topic_id\": 5783, \"forum_id\": 41, \"post_subject\": \"Bloom filters - What they are and how to use them\", \"username\": \"lchapman\", \"post_text\": \"Try out Bloom filters, which are a new feature included in our HPCC Systems 7.0.0 Beta Release, now available for download.\\n\\nTo find out more about Bloom filters and how to use them, read this blog written by Richard Chapman, VP Technology and leader of the HPCC Systems development team.\", \"post_time\": \"2018-07-19 11:58:27\" },\n\t{ \"post_id\": 23843, \"topic_id\": 6173, \"forum_id\": 41, \"post_subject\": \"Loading a DATASET with a Record Structure\", \"username\": \"Allan\", \"post_text\": \"Hi,\\nI was having to flatten a complex data structure and realised a structure in a DATASET would be easy to use. I reproduce below as it may be of general use. (I just used it for my specific task, Richard Taylor has generalised by wrapping it inside a FUNCTIONMACRO)\\n\\n//************************************************************************\\nRecordStructureAsDATASET(RecStruct) := FUNCTIONMACRO \\n #DECLARE(xmlOfRecordStructure)\\n #EXPORT(xmlOfRecordStructure,RecStruct)\\n\\n RRec := RECORD\\n STRING ecltype := XMLTEXT('@ecltype');\\n BOOLEAN isRecord := (BOOLEAN) XMLTEXT('@isRecord');\\n STRING label := XMLTEXT('@label');\\n STRING name := XMLTEXT('@name');\\n STRING position := XMLTEXT('@position');\\n STRING rawtype := XMLTEXT('@rawtype');\\n STRING size := XMLTEXT('@size');\\n STRING ttype := XMLTEXT('@type');\\n BOOLEAN isEnd := (BOOLEAN) XMLTEXT('@isEnd');\\n END;\\n\\n OnRec := {STRING FldEntry};\\n RETURN PARSE(DATASET([{%'xmlOfRecordStructure'%}],OnRec),FldEntry,RRec,XML('Data/Field'));\\nENDMACRO;\\n//************************************************************************\\n\\nR1abc := RECORD\\n UNSIGNED1 u1;\\n STRING2 s2;\\n SET OF STRING3 ss3;\\nEND;\\n\\nR2def := RECORD\\n REAL4 r4;\\n UDECIMAL3_2 ud32;\\n R1abc;\\n R1abc InnerR1;\\nEND;\\n\\nR := RECORD\\n BOOLEAN b;\\n QSTRING5 qs5;\\n DATASET(R1abc) dsr1;\\n SET OF DATASET(R2def) sdsr2;\\nEND;\\n\\nOUTPUT(RecordStructureAsDATASET(R),NAMED('RecordStructureAsDATASET'));\\n
\\nRunning the above gives:\\n\", \"post_time\": \"2018-12-20 10:02:03\" },\n\t{ \"post_id\": 23853, \"topic_id\": 6183, \"forum_id\": 41, \"post_subject\": \"Multi-Variable, Multi-Dimensional Searches\", \"username\": \"Allan\", \"post_text\": \"Hi,\\nBelow is an example of the use of the GRAPH built-in. A very handy function when you want to search some multi-dimensional space (In the example below I've used holiday selection with 3 search criteria/dimensions) with a range of values (an OR of values).\\n
\\nd := DATASET([{'A',1,100,0}\\n ,{'B',2,200,0}\\n ,{'C',2,200,2000}\\n ,{'D',2,100,2000}\\n ,{'E',2,100,0}\\n ,{'F',3,200,0}\\n ,{'G',3,200,2000}\\n ,{'H',3,100,2000}\\n ,{'I',3,100,0}\\n ,{'J',3,300,0}\\n ,{'K',3,300,3000}\\n ,{'L',3,100,3000}\\n ,{'M',3,100,0}\\n ],{STRING1 Holiday;UNSIGNED TourOperator,UNSIGNED Price,UNSIGNED HolidayClass});\\n\\nSelect(SET OF UNSIGNED pTourOperator = [],SET OF UNSIGNED pPrice = [],SET OF UNSIGNED pHolidayClass = []) := FUNCTION\\n\\n DFilters:= [DATASET(pTourOperator,{UNSIGNED itm}),DATASET(pPrice,{UNSIGNED itm}),DATASET(pHolidayClass,{UNSIGNED itm})];\\n \\n RunFilter(SET OF DATASET(RECORDOF(d)) ds,UNSIGNED c) := FUNCTION\\n\\n j := JOIN(ds[c-1],DFilters[c],CASE(C, 1 => LEFT.TourOperator = RIGHT.itm\\n , 2 => LEFT.Price = RIGHT.itm\\n , LEFT.HolidayClass = RIGHT.itm),TRANSFORM(LEFT),ALL);\\n RETURN IF(EXISTS(DFilters[c]),j,ds[c-1]);\\n END;\\n RETURN GRAPH(d,3,RunFilter(ROWSET(LEFT),COUNTER));\\nEND;\\n\\nSelect(,,);\\nSelect([3],,[3000,0]);\\nSelect(,[100],[3000,0]);\\n
\\nObviously easily extendable to as many dimensions as you like. Even with as low a number as 5 dimensions alternative approaches that don't use GRAPH start to look very messy.\\n\\nYouTube Video explaining GRAPH: https://youtu.be/O8L83FxAa6s \\n\\nYours\\nAllan\", \"post_time\": \"2018-12-21 09:53:44\" },\n\t{ \"post_id\": 23903, \"topic_id\": 6193, \"forum_id\": 41, \"post_subject\": \"Avoid unnecessary distributions.\", \"username\": \"Allan\", \"post_text\": \"If you know a logical file you are working with is distributed, it’s annoying to have to unconditionally DISTRIBUTE every time a workunit is run just to ensure the distribution aligns with the cluster size the said workunit is running on. Obviously workunits run on a specifically sized cluster have to redistribute any file built on a differently sized cluster, or that is not distributed at all.\\nThe trick being to detect when a re-distribution is NOT necessary.\\n\\nI have found the following test works for our business:\\n\\nIf <Number of Nodes in the cluster the Workunit is running on> = <Number of Nodes file was built on>\\nTHEN\\n Use file as is (i.e. no need for re-distribution)\\nELSE\\n Re-distribute file\\nEND\\n\\nPseudo ECL:\\nIMPORT STD;\\nSomefilename := ‘~folder::file’;\\nd := DATASET(Somefilename,layout,THOR);\\nn_nodes := STD.system.ThorLib.Nodes();\\n\\ndistributed_d := IF((INTEGER)NOTHOR(STD.File.GetLogicalFileAttribute(Somefilename,'numparts')) = n_nodes\\n ,DISTRIBUTED(d,HASH32(id))\\n ,DISTRIBUTE (d,HASH32(id)));\\n
\\nNote if the filename is known at compile time, the expression is evaluated at compile time. See the Graph for confirmation. \\n\\nOne can also check the ‘Queue’ names of the running workunit and the ‘Queue’ the file was built on, though this is more work as STD.File.GetLogicalFileAttribute does not have an option to return the ‘Queue’.\\nIt’s still possible as GetLogicalFileAttribute does have an option to return the WUid the file was built on, this WUid can be passed to the ESP service WsWorkunits/WUInfo which returns with the ‘Queue’ in the (confusingly called) <Workunit><Cluster> tag. This can then be compared in the same manner as above with the return from STD.System.Job.Target(). \\n\\nYours\\n\\nAllan\", \"post_time\": \"2019-01-03 12:06:41\" },\n\t{ \"post_id\": 24933, \"topic_id\": 6423, \"forum_id\": 41, \"post_subject\": \"Re: multiple instance through single function\", \"username\": \"Allan\", \"post_text\": \"Hi snehalpatil_2391,\\n\\nCould I request this question be posted in say the 'Programming' folder. This folder 'Tips & Tricks' should have postings restricted to just that 'Tips & Tricks'.\\n\\nYours\\nAllan\", \"post_time\": \"2019-03-08 19:34:35\" },\n\t{ \"post_id\": 24833, \"topic_id\": 6423, \"forum_id\": 41, \"post_subject\": \"Re: multiple instance through single function\", \"username\": \"rtaylor\", \"post_text\": \"snehalpatil_2391,\\n\\nWhat exactly does this question have to do with ECL programming? Are you embedding Java code within your ECL? \\n\\nAnd what exactly do you mean by "multiple instances through function"?\\n\\nRichard\", \"post_time\": \"2019-03-07 14:38:38\" },\n\t{ \"post_id\": 25523, \"topic_id\": 6663, \"forum_id\": 41, \"post_subject\": \"Viewing ECL generated by MACROs\", \"username\": \"Allan\", \"post_text\": \"Hi,\\nFor a long time one of my problems in using MACROS was that I could not see the ECL generated by the MACRO. If I could do that, developing MACROS would be a whole lot easier.\\nAnyway this has bugged me enough that I've worked on a solution. I expect its bugged others so I put it out here for use and comment. (Perhaps there is an easier way than I give here, in which case PLEASE post a reply.)\\nThe secret is to generate the ECL as a STRING then #EXPAND the STRING outside the MACRO.\\nDoing that allows you to do one run to output the STRING. One can then cut-and-paste the resultant STRING into its own BWR window, syntax check, or even run the ECL, from that BWR to find any problems. It then becomes a trivial task to correct the MACRO.\\nI've even known people to dispense with the MACRO completely once the generated ECL runs correctly.\\nAny way here is an example, of a fairly complex little MACRO where its easy to miss a bracket or something. \\n\\nEXPORT GenerateStatReport_Macro(d,outAttrib) := FUNCTIONMACRO\\n\\n #UNIQUENAME(totalNumberOfRecords);\\n #UNIQUENAME(GetPercent);\\n\\n Stats(dinner,AttributeName) := FUNCTIONMACRO\\n\\n RETURN 'ROW({'+#TEXT(AttributeName)\\n +',(STRING) COUNT('+dinner+'('+AttributeName+' = \\\\'Y\\\\'))'\\n +',(STRING) '+%'GetPercent'%+'(COUNT('+dinner+'('\\n +AttributeName+' = \\\\'Y\\\\')))'\\n +'},{'\\n +' STRING AttributeName'\\n +',STRING Match'\\n +',STRING Match_Percentage'\\n +'})\\\\n';\\n ENDMACRO;\\n\\n RFlat := RECORDOF(d);\\n\\n #EXPORTXML(out, RFlat);\\n\\n #DECLARE(sep);\\n #SET(sep,'');\\n #DECLARE(str);\\n\\n #SET(str,%'totalNumberOfRecords'%+' := COUNT('+#TEXT(d)+');\\\\n'\\n +%'GetPercent'%\\n +'(INTEGER iCnt) := (DECIMAL5_2) (( iCnt/'\\n +%'totalNumberOfRecords'%+')*100)+\\\\'%\\\\';\\\\n'\\n +#TEXT(outAttrib)+':=');\\n\\n #FOR (out)\\n #FOR (Field)\\n #APPEND(str,%'sep'%+Stats(#TEXT(d),%'{@label}'%))\\n #SET(sep,'&')\\n #END\\n #END\\n Res := %'str'%+';';\\n RETURN Res;\\n\\nENDMACRO;\\n
\\n\\nNote the return type is a STRING. #EXPAND not done in the FUNCTIONMACRO itself.\\nTo see if the generated ECL is correct, one first runs:\\n\\nInData := DATASET([{'Y','N','Y'}\\n ,{'Y','Y','Y'}\\n ,{'Y','Y','Y'}\\n ,{'N','N','Y'}\\n ,{'N','N','Y'}\\n ,{'N','N','Y'}\\n ,{'N','Y','Y'}\\n ],{STRING Included,STRING OutLier,STRING Extra});\\n\\n//#EXPAND(GenerateStatReport_Macro(InData,res))\\n//OUTPUT(res,NAMED('RESULTS'),ALL);\\nGenerateStatReport_Macro(InData,res);\\n
\\nThis will produce an output of:\\n\\n__totalNumberOfRecords__6591__ := COUNT(InData);\\n__GetPercent__6592__(INTEGER iCnt) := (DECIMAL5_2) (( iCnt/__totalNumberOfRecords__6591__)*100)+'%';\\nres:=ROW({'included',(STRING) COUNT(InData(included = 'Y')),(STRING) __GetPercent__6592__(COUNT(InData(included = 'Y')))},{ STRING AttributeName,STRING Match,STRING Match_Percentage})\\n&ROW({'outlier',(STRING) COUNT(InData(outlier = 'Y')),(STRING) __GetPercent__6592__(COUNT(InData(outlier = 'Y')))},{ STRING AttributeName,STRING Match,STRING Match_Percentage})\\n&ROW({'extra',(STRING) COUNT(InData(extra = 'Y')),(STRING) __GetPercent__6592__(COUNT(InData(extra = 'Y')))},{ STRING AttributeName,STRING Match,STRING Match_Percentage})\\n;\\n
\\ncut-and-paste this into a builder window, along with the definition of the input dataset and syntax that to find the errors.\\nOnce the MACRO is generating the correct ECL one can then change to use:\\n\\n#EXPAND(GenerateStatReport_Macro(InData,res))\\nOUTPUT(res,NAMED('RESULTS'),ALL);\\n
\\n\\nTo do the actual job.\\nBy the way I use #UNIQUENAME to allow the MACRO to be used multiple times in one compilation.\\n\\nI hope this will help people\\n\\nYours\\n\\nAllan\", \"post_time\": \"2019-03-27 11:31:40\" },\n\t{ \"post_id\": 26673, \"topic_id\": 7043, \"forum_id\": 41, \"post_subject\": \"Re: Getting all messages from a WU,\", \"username\": \"Allan\", \"post_text\": \"Raised ticket:\\nhttps://track.hpccsystems.com/browse/HPCC-22196\\nto get functionality incorporated into HPCC/ECL\", \"post_time\": \"2019-05-23 08:02:53\" },\n\t{ \"post_id\": 26613, \"topic_id\": 7043, \"forum_id\": 41, \"post_subject\": \"Re: Getting all messages from a WU,\", \"username\": \"rtaylor\", \"post_text\": \"Thanks for the contribution, Allan!!\", \"post_time\": \"2019-05-22 15:12:47\" },\n\t{ \"post_id\": 26573, \"topic_id\": 7043, \"forum_id\": 41, \"post_subject\": \"Getting all messages from a WU,\", \"username\": \"Allan\", \"post_text\": \"Hi,\\nECL built-in FAILMESSAGE just returns the LAST error message form a Workunit.\\n99% of the time this is just:\\n4294967295, eclagent, System error: -1: Abort execution\\n
\\nThe actual error is an earlier message. This is especially annoying when you crash out with an ASSERT...,FAIL. The ASSERTion message is NOT the last message. \\nThe function below returns all messages from a Workunit. You can also filter by the severity of the message, one of 'info', warning' or 'error'. (Note a message type of a particular severity, also returns all messages of a higher severity)\\nGetWUMessages(STRING Wuid,STRING MessageType = 'error') := FUNCTION\\n /*\\n Note a message type of a particular severity, also returns all messages of a higher severity.\\n */\\n\\n Chk := ASSERT(MessageType IN ['info','warning','error'],'MessageType must be one of \\\\'info\\\\', \\\\'warning\\\\' or \\\\'error\\\\'.',FAIL);\\n rWURequest\\t:=\\n record\\n string\\t\\tWuid{XPATH('Wuid')}\\t\\t\\t\\t\\t\\t\\t\\t := Wuid;\\n STRING \\tExceptionSeverity{XPATH('ExceptionSeverity')}\\t := MessageType;\\n end;\\n\\n rWUResponse\\t:=\\n record\\n string\\t\\tWuid{XPATH('Wuid'),maxlength(20)};\\n string \\t\\tResults{XPATH('Results')};\\n end;\\n\\n\\n dWUResult\\t:=\\tsoapcall( 'http://<Ip:port e.g. 1.2.3.4:8010>/WsWorkunits',\\n 'WUFullResult',\\n rWURequest,\\n rWUResponse,\\n XPATH('WUFullResultResponse')\\n );\\n\\n // Convert XML reponse into something readable.\\n\\n RLine := RECORD\\n STRING Code{xpath('Code')};\\n STRING Filename{xpath('Filename')};\\n STRING Line{xpath('Line')};\\n STRING Source{xpath('Source')};\\n STRING Message{xpath('Message')};\\n END;\\n\\n ROut := RECORD\\n DATASET(RLine) Results{XPATH('*')};\\n END;\\n\\n Out := DATASET(FROMXML(ROut,dWUResult.Results));\\n T(STRING s) := IF(s = '','',', '+s);\\n \\n RETURN MODULE\\n EXPORT AsXML := WHEN(dWUResult.Results,chk);\\n EXPORT AsDATASET := WHEN(NORMALIZE(Out,LEFT.Results,TRANSFORM(RIGHT)),chk);\\n EXPORT AsString := WHEN(NORMALIZE(Out,LEFT.Results,TRANSFORM({STRING Message}\\n ;SELF.Message := RIGHT.Code\\n + T(RIGHT.Filename)\\n + T(RIGHT.Line)\\n + T(RIGHT.Source)\\n + T(RIGHT.Message))),chk);\\n END;\\nEND;\\n
\\n\\nTo use:\\nf := GetWUMessages('W20190522-100123','error');\\nf.AsXML;\\nf.AsDATASET;\\nf.AsString;\\n
\\nMight save 10 seconds of peoples lives.\\nYours\\nAllan\", \"post_time\": \"2019-05-22 10:48:15\" },\n\t{ \"post_id\": 29763, \"topic_id\": 7803, \"forum_id\": 41, \"post_subject\": \"Universal Workunit Scheduler\", \"username\": \"Allan\", \"post_text\": \"Attached is example code (use without restriction) for a workunit scheduler.\\n\\nEnjoy\\n\\n[attachment=2:1jmpvvhf]UniversalWUScheduler.ecl\\n[attachment=1:1jmpvvhf]Demo1.ecl\\n[attachment=0:1jmpvvhf]Demo2.ecl\\nSummary\\nThis document describes a harness for scheduling one or more streams of work within the HPCC system.\\nHPCC has the concept of a 'Workunit' on THOR. A 'Workunit' Performs a task on THOR, it may be a build of a keyfile, a spray of data into THOR, an analysis of historic data or myriad other tasks. This is all well and good but there are numerous scenarios where one workunit cannot, in itself, complete the entire job. The Job requires multiple stages, some examples:\\n\\n* (E)xtraction; (T)ransform; (L)oad of data into THOR is almost invariably a multi stage process where the 'scrubbing' or 'Transforming' of data has to wait upon the extraction and presentation of its input.\\n* One Workunit may be 'watching' the run of another workunit and e-mailing out progress reports.\\n* Build of retro datasets almost always requires multiple builds of the same dataset, consequently multiple runs of the same workunit but for different dates.\\n* Multiple tasks (or workunits) can be dependent upon the same event (say presentation of data). All dependent tasks being able to run in parallel.\\n* The decision on what stream of work to do may be dependent upon results from a workunit, e.g. an unusually large amount of ingested data may require extra tasks inserted into the normal stream of work.\\n* Stats on say the 'process time' of a workunit obviously cannot be generated by the target workunit as it's not completed.\\n* A build of a complex product can well be a choreograph of builds of components from disparate teams. A final assembly of pre-fabricated components all of which have to be in the right state at the right time.\\n
\\nThis scheduler harness is suitable for all the scenarios above.\\nThe basic idea is to implement each workunit as an ECL FUNCTION with the scheduler harness as a wrapper around said FUNCTION. So the harness both calls the target application function and is directed by the same function as to what to do next on return from the function. Note All target code must be accessible to the target THOR(s).Plural as the workunits do not have to all run on the same THOR. \\n\\nAs pseudo code its:\\nDaisyChain(TargetApplicationFUNCTION(<NextJob>));
\\nTo use the scheduling harness the only mandatory conditions are that the target application take, as a parameter, the 'Job Number' of the next job to perform in the sequence and return a 'Job Number'.\\nThere are three options the FUNCTION has regarding the return of a 'Job Number'\\n1. Just echo back the 'Job Number' supplied as the input parameter and let the harness work out what to do next.\\n2. Return 0 (zero). This unconditionally terminates the sequence of executed workunits regardless of any default behaviour the harness may have had planned. (But NOT any workunit sequences running in PARALLEL in the same finite state machine.)\\n3. Return a completely new/different 'Job Number'. This directs the harness to start a different WU than that specified by the defined machine. This allows sequence of workunits to be decided at run time. With this option, one has to have detailed knowledge of the finite state machine driving the run time operation of the harness.\\n\\nECL allows one to tie an action to a returned result thus:\\nTYPEOF(<NextJob>)TargetApplicationFUNCTION(TYPEOF(<NextJob>) <NextJob>) := FUNCTION\\n RETURN WHEN(<NextJob>,Action);\\nEND;
\\nDefining the Sequence of workunits\\nThe Record layout defining one workunit is:\\nOneTask := RECORD\\n STRING WUName; // The 'name' to give to the workunit via the #WORKUNIT('name'...) option.\\n STRING eclFUNCTION; // The full path to the target application FUNCTION\\nEND;
\\nThen a sequence of workunits is just a DATASET(OneTask), where the order of the records in the DATASET defines the order of execution.\\nThere is also the option to define PARALLEL running with a DATASET wrapper around the DATASET(OneTask)\\nIn pseudo code:\\nParallelRuns := RECORD\\n DATASET(DATASET(OneTask)) Queues;\\nEND;
\\nMarcos are supplied to allocate <NextJob> identifiers and to generate initiating ECL that can be passed to \\n.../WsWorkunits/WUSubmit to programmatically start the whole sequence.\\nYou can also do everything by hand, defining the machine yourself, allocating out your own <NextJob> identifiers. Note that within the 'OneTask' structure place holder 'NextJob' is signified by the text '<NextJob>' with a synonym of '<state>'.\\n\\nAdditional Features\\nAlong with the mandatory place holder <NextJob>, one can also use the <ParentWUID> place holder in either the WUName or parameter to the target ECL function. (Its type being STRING). This allows communication from parent to child workunits using:\\nTYPEOF(<NextJob>)TargetApplicationFUNCTION(TYPEOF(<NextJob>) NextJob,STRING ParentWUID) := FUNCTION\\nSomeData := DATASET(WORKUNIT(ParentWUID, '<OUTPUT Identifier>'), {STRING Somedata})[1].Somedata;
\\nThe target ECL Function can also take, as a parameter, the entire machine driving the sequence of work, using place holder <fsm>. Obviously this has no meaning in the WUName.\\nIn pseudo code:\\n\\nTYPEOF(<NextJob>)TargetApplicationFUNCTION(TYPEOF(<NextJob>) NextJob,TYPEOF(<fsm>) fsm) := FUNCTION
\\nConclusions\\n\\nThis design is truly independent of, and agnostic to, the workings of any application.\\n\\nBeing based on a State Machine there is no limit to its flexibility. For example: applications can themselves use the harness to initiate their own scheduling sequence, in effect allowing nested scheduling of workunits.\\n \\nThe harness wrapper to any individual workunit runs in the same said workunit. Consequently if the workunit crashes the wrapper is terminated as well. The machine just stops without defunct child workunits left running or EVENTs left to be de-scheduled. (Note only the individual stream stops, other streams running in PARALLEL in the same state Machine are unaffected.)\\n
\\n\\nFinally my Thanks and Acknowledgement to:\\n Richard Taylor\\n Robert Foreman\\n Tony Kirk\\n Dan Camper\\n Charles Kaminski\\nWithout whose help this project would not have got over the line.\\n\\nYours\\nAllan\", \"post_time\": \"2020-03-09 19:47:01\" },\n\t{ \"post_id\": 29773, \"topic_id\": 7813, \"forum_id\": 41, \"post_subject\": \"Universal Workunit Scheduler (Continued)\", \"username\": \"Allan\", \"post_text\": \"It seems a post is limited to 3 three attachments, so here are the other Demos I have for scheduling workunits using the said harness.\\n\\n[attachment=2:2dyk75hr]Demo3.ecl\\n\\nThese two use STD.System.Email.SendEmail where the 'server', 'port' and 'sender' parameters are defaulted to environment variables.\\n\\n[attachment=1:2dyk75hr]Demo4.ecl\\n[attachment=0:2dyk75hr]Demo5.ecl\", \"post_time\": \"2020-03-09 19:56:15\" },\n\t{ \"post_id\": 4540, \"topic_id\": 992, \"forum_id\": 43, \"post_subject\": \"Re: ECL Bundles competition 2013!\", \"username\": \"DSC\", \"post_text\": \"I know the deadline for issuing a pull request was Sept. 3, but what is the deadline for code freeze? Feedback causes some code changes as well as simply the developer coming up with new ideas. Is there a deadline for stopping changes so the bundles can be officially judged?\\n\\nThanks,\\n\\nDan\", \"post_time\": \"2013-09-05 15:29:58\" },\n\t{ \"post_id\": 4500, \"topic_id\": 992, \"forum_id\": 43, \"post_subject\": \"Re: ECL Bundles competition 2013!\", \"username\": \"richardkchapman\", \"post_text\": \"We have our first entry! Head over to https://github.com/hpcc-systems/ecl-bundles/pulls to see the submission from Gordon.\\n\\nNote that early submission of pull requests is very much encouraged - that way you can get code-reviewed and have a chance to correct any issues ahead of judging time. Judging will take into account the development process as well as the end-product, so get collaborating!\", \"post_time\": \"2013-08-29 07:51:42\" },\n\t{ \"post_id\": 4430, \"topic_id\": 992, \"forum_id\": 43, \"post_subject\": \"ECL Bundles competition 2013!\", \"username\": \"flavio\", \"post_text\": \"Goal\\n\\nIn order to introduce the new bundle system in version 4.0 of the HPCC platform, we are encouraging attendees to the 2013 LexisNexis® Risk Solutions HPCC Engineering Summit to write their own bundles, the best of which will be added to a central 'Approved bundles' repository (and thus be available for any ECL programmer to use). \\n\\nNOTE: the very best of these will win prizes at the summit.\\n\\nIn the simplest terms, a bundle is just an ECL module, together with the metadata (defined by an exported ECL attribute called Bundle) required by the ECL bundle tool to properly install it. Bundles may have dependencies on other bundles (which the ECL bundle tool will check), and may specify versions, copyrights, etc. Bundles are designed to be distributed either as a single .ECL file (for the simplest cases), or as a source directory (optionally compressed into a zip or tgz file).\\n\\nA good bundle does a single thing well, with a consistent, well-defined, documented interface that is easy to use.\\n\\nOne or more example bundles supplied by the judges will be placed in the ecl-bundles repository before the competition is launched.\\n \\nRules\\n\\nThe competition is open to all conference attendees, other than the judges.\\n\\nSubmitted bundles must be able to be cleanly installed on an HPCC 4.0.0 community edition system.\\n\\nBundles should be submitted as pull requests to the GitHub 'ecl-bundles' repository, (https://github.com/hpcc-systems/ecl-bundles). While you CAN wait until the last minute to submit your pull request, it is strongly recommended that you submit them early and get feedback from other entrants, judges, and anyone else that might be watching the repository. Bear in mind that this is a public repository, so please don't submit code that is LN-proprietary, and most definitely do not include any PII data in your submissions.\\n\\nCooperation is encouraged, and a bundle may be entered into the competition with multiple listed authors. Feel free to ask anyone, including the judges, for advice and assistance in creating or designing your bundle, and to comment on pull requests from other entrants.\\n\\nDeadlines\\n\\nAll submissions must be sent as pull requests to the ECL Bundles Github repository (https://github.com/hpcc-systems/ecl-bundles) by 9/3/2013 to be considered for this contest. \\n\\nThe winners will be announced at the LexisNexis HPCC Systems Summit 2013 event, in September.\\n\\nJudges\\n\\nRichard Chapman, David Bayliss, Jill Luber, and Flavio Villanustre.\\n\\nJudging criteria\\n\\nEntries will be judged on usefulness, cleanliness, style, maintainability and efficiency, as the judges see fit.\\n\\nAny entry that meets the judges' minimum criteria (and does not duplicate functionality) will be merged into the github repository, (https://github.com/hpcc-systems/ecl-bundles), and thus made available for use by the ECL community.\\n\\nTeams submitting entries with potentially duplicate or overlapping functionality are encouraged to cooperate to form a single 'best of breed' entry for the central repository.\\n\\nPrizes\\n\\nAnyone whose bundle is accepted into the 'Approved bundles' repository gets an iTunes voucher.\\n\\nThe authors of the best bundles get an iPad mini!\", \"post_time\": \"2013-08-05 15:34:28\" },\n\t{ \"post_id\": 4496, \"topic_id\": 1005, \"forum_id\": 43, \"post_subject\": \"Re: 4.0.2 Bundle\", \"username\": \"richardkchapman\", \"post_text\": \"Forgot to address the documentation question.\\n\\nAgain - good question. I think a README.rst - as we use for documenting directories in the HPCC sources - would make sense (rst - ReStructured Text - is used for Python code documentation and is displayed sensibly by GitHub).\\n\\nYou should also be using the JavaDoc-style comments for inline documentation in a form that the IDE will automatically pick up (things like function parameter names/usage, etc).\", \"post_time\": \"2013-08-27 16:00:25\" },\n\t{ \"post_id\": 4495, \"topic_id\": 1005, \"forum_id\": 43, \"post_subject\": \"Re: 4.0.2 Bundle\", \"username\": \"richardkchapman\", \"post_text\": \"There isn't a way to specify the minimum 'platform' level at the moment, but it's a good idea. We could use a 'reserved' bundle name 'platform' for specifying dependencies on a platform version, though since the 4.0 version of teh ecl-bundle tool won't recognize that it would create some compatibility issues. We may need to use the Properties field instead.\\n\\nI'll raise a Jira, and have a think about the best way to add it without any compatibility issues.\\n\\nI don't think that requiring 4.0.2 would automatically disqualify a bundle from the competition (hopefully 4.0.2 will be gold by the time I need to judge )\\n\\nPlease do submit your entry as soon as you can for early feedback - I want the bundle contest to exercise the whole bundle development process - including reviewing, critique, and feedback - and bundles that have been peer-reviewed (and acted on any feedback) stand a better chance of doing well in the competition.\", \"post_time\": \"2013-08-27 15:40:27\" },\n\t{ \"post_id\": 4493, \"topic_id\": 1005, \"forum_id\": 43, \"post_subject\": \"4.0.2 Bundle\", \"username\": \"gsmith\", \"post_text\": \"For a bundle which will only work in 4.0.2 (or greater) is there:\\n1. Any way to specify a minimum server version?\\n2. Any way to submit it to the competition without being disqualified<g>?\\n\\nAlso is there any point in submitting early for feedback?\\n\\nFinally is there any preferred way of documenting the bundle? Can we include docs inside the bundle for example (what format is preferred etc.)?\\n\\nGordon.\", \"post_time\": \"2013-08-27 15:20:29\" },\n\t{ \"post_id\": 4532, \"topic_id\": 1020, \"forum_id\": 43, \"post_subject\": \"Bundles with more than one file\", \"username\": \"richardkchapman\", \"post_text\": \"If your bundle is a little more complex than the example 'Bloom.ecl', you can supply your bundle module as a directory (tree) of ecl files - just make sure that one of them is called bundle.ecl\\n\\nUnfortunately support for such bundles was not quite working in 4.0.0 - but it is in 4.0.2-rc2, available for download from the portal now.\\n\\nRichard\", \"post_time\": \"2013-09-04 15:34:14\" },\n\t{ \"post_id\": 32033, \"topic_id\": 1368, \"forum_id\": 43, \"post_subject\": \"Re: ECL bundle users guide\", \"username\": \"JimD\", \"post_text\": \"The "error" message is a bit confusing--It is actually saying there is no error. \\n\\nI have opened a Jira to fix this. \\n\\nhttps://track.hpccsystems.com/browse/HPCC-24758\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2020-09-21 14:31:54\" },\n\t{ \"post_id\": 32013, \"topic_id\": 1368, \"forum_id\": 43, \"post_subject\": \"ecl 'bundle' command error 0\", \"username\": \"arnoldmashava\", \"post_text\": \"Thanks a lot,\\n\\nI have been experimenting with the HPCC ECL system, removed the previous system, reinstalled to the C:\\\\ root directory, without spaces, added to PATH in the Environment Variables.\\n\\nIn this new installation to the C:\\\\ root directory, I have not yet added any specific bundles from the Git repository, just querying the system, for applicable commands and indeed any shell operation that involves the "ecl bundle" command, always returns the same painful error:\\n\\necl 'bundle' command error 0\\n\\nI don't know where I am losing it.\", \"post_time\": \"2020-09-19 21:51:57\" },\n\t{ \"post_id\": 32003, \"topic_id\": 1368, \"forum_id\": 43, \"post_subject\": \"Re: ECL bundle users guide\", \"username\": \"JimD\", \"post_text\": \"I am not sure I understand your question. \\n\\nThe INFO message you posted is informational--it is letting you know the relative path to the compiler. \\n\\nHTH,\\n\\nJim\", \"post_time\": \"2020-09-16 18:16:07\" },\n\t{ \"post_id\": 31911, \"topic_id\": 1368, \"forum_id\": 43, \"post_subject\": \"Re: ECL bundle users guide\", \"username\": \"arnoldmashava\", \"post_text\": \"Error Log\\nINFO: eclcc.exe relative path: C:\\\\Program Files (x86)\\\\HPCCSystems\\\\7.10.8\\\\clienttools\\\\bin\\\\eclcc.exe\", \"post_time\": \"2020-09-05 01:57:30\" },\n\t{ \"post_id\": 5972, \"topic_id\": 1368, \"forum_id\": 43, \"post_subject\": \"Re: ECL bundle users guide\", \"username\": \"jeeves\", \"post_text\": \"Thank You!. This helped\", \"post_time\": \"2014-06-26 06:12:39\" },\n\t{ \"post_id\": 5952, \"topic_id\": 1368, \"forum_id\": 43, \"post_subject\": \"Re: ECL bundle users guide\", \"username\": \"rtaylor\", \"post_text\": \"jeeves,\\n\\nBundle functionality is documented in the Client Tools PDF as some of the options available for use through the ecl command line. There is no overview doc for them (yet). \\n\\nYou'll want to start with the "ecl bundle install" option. Once the bundle is installed, you simply use the bundle's functions the same way you would the standard library functions.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2014-06-24 15:24:56\" },\n\t{ \"post_id\": 5947, \"topic_id\": 1368, \"forum_id\": 43, \"post_subject\": \"ECL bundle users guide\", \"username\": \"jeeves\", \"post_text\": \"I see a bundle writers guide. But how is the bundle supposed to be used, say from the ECL IDE?\", \"post_time\": \"2014-06-24 07:08:54\" },\n\t{ \"post_id\": 32063, \"topic_id\": 1556, \"forum_id\": 43, \"post_subject\": \"Re: Error in ECL Bundle\", \"username\": \"rtaylor\", \"post_text\": \"arnoldmashava,\\n\\nAnswered here: https://hpccsystems.com/bb/viewtopic.php?f=43&t=1368\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-09-22 12:16:25\" },\n\t{ \"post_id\": 32023, \"topic_id\": 1556, \"forum_id\": 43, \"post_subject\": \"Re: Error in ECL Bundle\", \"username\": \"arnoldmashava\", \"post_text\": \"Thanks a lot,\\n\\nI have been experimenting with the HPCC ECL system, removed the previous system, reinstalled to the C:\\\\ root directory, without spaces, added to PATH in the Environment Variables.\\n\\nIn this new installation to the C:\\\\ root directory, I have not yet added any specific bundles from the Git repository, just querying the system, for applicable commands and indeed any shell operation that involves the "ecl bundle" command, always returns the same painful error:\\n\\necl 'bundle' command error 0\\n\\nI don't know where I am losing it.\", \"post_time\": \"2020-09-19 21:54:42\" },\n\t{ \"post_id\": 31901, \"topic_id\": 1556, \"forum_id\": 43, \"post_subject\": \"Re: Error in ECL Bundle\", \"username\": \"arnoldmashava\", \"post_text\": \"C:\\\\Program Files\\\\HPCCSystems\\\\7.10.8\\\\clienttools>ecl bundle list\\nBloom\\nCellFormatter\\nDataMgmt\\nDataPatterns\\nDataPull\\nGLM\\nKMeans\\nLearningTrees\\nLinearRegression\\nLogisticRegression\\nML_Core\\nMySqlImport\\nPBblas\\nPerformanceTesting\\nStringMatch\\nSupportVectorMachines\\nTextVectors\\nTrigram\\nVisualizer\\necl 'bundle' command error 0\\n\\nC:\\\\Program Files\\\\HPCCSystems\\\\7.10.8\\\\clienttools>ecl bundle info LinearRegression\\n\\nBundle LinearRegression could not be loaded\\necl 'bundle' command error 0\", \"post_time\": \"2020-09-05 01:37:31\" },\n\t{ \"post_id\": 6769, \"topic_id\": 1556, \"forum_id\": 43, \"post_subject\": \"Re: Error in ECL Bundle\", \"username\": \"lokesh\", \"post_text\": \"[quote="DSC":8mkjjlhj]Did you rename the module within the bundle file to match the bundle's filename? In general, they need to match, so you'll probably have to either rename the module to match the filename, or the filename to match the module name.\\n\\nCheers,\\n\\nDan\\n\\nThanks Dan,\\n\\nI did that.\\n\\nHere is my Bundle.ecl:\\n
IMPORT Std;\\n\\nEXPORT Bundle := MODULE(Std.BundleBase)\\n EXPORT name := 'test';\\n EXPORT description := 'ECL test Bundle';\\n EXPORT authors := ['Lokesh'];\\n EXPORT license := '';\\n EXPORT copyright := 'Copyright (C)';\\n EXPORT dependsOn := [];\\n EXPORT version := '0.0.1';\\n EXPORT platformversion := '5.0.4.1';\\nEND;\\n
\", \"post_time\": \"2015-01-09 05:58:20\" },\n\t{ \"post_id\": 6768, \"topic_id\": 1556, \"forum_id\": 43, \"post_subject\": \"Re: Error in ECL Bundle\", \"username\": \"DSC\", \"post_text\": \"Did you rename the module within the bundle file to match the bundle's filename? In general, they need to match, so you'll probably have to either rename the module to match the filename, or the filename to match the module name.\\n\\nCheers,\\n\\nDan\", \"post_time\": \"2015-01-08 21:53:01\" },\n\t{ \"post_id\": 6763, \"topic_id\": 1556, \"forum_id\": 43, \"post_subject\": \"Error in ECL Bundle\", \"username\": \"lokesh\", \"post_text\": \"I am facing an issue in creating the ECL bundle.\\n\\nThe Bundle is copied from the ML library and only name has been changed.\\n\\nMy directory structure is following:\\n\\n./test/Bundle.ecl\\n./test/Similarity.ecl\\n./test/SimilarityRecordDefinitions.ecl
\\n\\nWhen I move to ./test/ and run the following command:\\necl-bundle install -v Bundle.ecl
\\n\\nI get the following output and error:\\nRunning eclcc --nologfile -showpaths\\nRunning eclcc - --nologfile --nostdinc -Me --nobundles -I.\\nwith input IMPORT Bundle.Bundle as B; [ (UTF8) B.name, (UTF8) B.version, B.description, B.license, B.copyright ] + [ (UTF8) COUNT(b.authors) ] + B.authors + [ (UTF8) COUNT(B.dependsOn) ] + B.dependsOn + [ (UTF8) #IFDEFINED(B.platformVersion, '')]\\neclcc return code was 2, output to stderr:\\nstdin:(1,8): error C2171: Object 'Bundle' does not have a field named 'Bundle'\\nstdin:(1,37): error C2167: Unknown identifier "B"\\n2 errors, 0 warning\\nBundle.ecl cannot be parsed as a bundle
\\n\\nAny pointers on how to resolve this.\", \"post_time\": \"2015-01-08 14:32:08\" },\n\t{ \"post_id\": 31113, \"topic_id\": 3533, \"forum_id\": 43, \"post_subject\": \"Re: Image Processing Libraries\", \"username\": \"rtaylor\", \"post_text\": \"Vishal97,\\n\\nSome University students in India have made a contribution towards just that issue: https://github.com/TanmayH/HPCC-OPENCV\\n\\nThis was developed by university students so it should provide a starting point for your integration of HPCC and OpenCV. I've been told that it will need some work to get to production quality, but that's what Open Source is all about. And when you've gotten it working well, please contribute your updates to the community.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-06-09 14:57:57\" },\n\t{ \"post_id\": 31103, \"topic_id\": 3533, \"forum_id\": 43, \"post_subject\": \"Re: Image Processing Libraries\", \"username\": \"Vishal97\", \"post_text\": \"I'm looking for something like that have you got any solution.\", \"post_time\": \"2020-06-09 04:56:20\" },\n\t{ \"post_id\": 13913, \"topic_id\": 3533, \"forum_id\": 43, \"post_subject\": \"Image Processing Libraries\", \"username\": \"vchinta\", \"post_text\": \"Hi,\\n\\nI'm trying to do some image processing tasks(OpenCV) on HPCC, are there any bundles I can use for this. If not, how do I go about creating one?TIA.\\n\\nVishnu Chinta\", \"post_time\": \"2016-12-27 18:44:26\" },\n\t{ \"post_id\": 27063, \"topic_id\": 7183, \"forum_id\": 43, \"post_subject\": \"Re: DataMgmt GenData.WriteData problem\", \"username\": \"DSC\", \"post_text\": \"Thanks for the code.\\n\\nI was able to replicate the problem up through version 6.4.40, but the code succeeds with version 7.4.2. I believe the issue was addressed in https://track.hpccsystems.com/browse/HPCC-20328, which means the fix went into version 7.0.0. I believe that DataMgmt.GenData.AppendData() would also suffer from this same problem, as it relies on a VIRTUAL DATASET parameter.\\n\\nIf you want to use DataMgmt.GenData.WriteData() then you may be forced to upgrade your platform version. There is, however, a workaround: Create the subfile yourself and use DataMgmt.GenData.WriteFile() to place the file into the data store properly. DataMgmt's README talks a bit about using that function.\", \"post_time\": \"2019-07-24 13:54:55\" },\n\t{ \"post_id\": 27053, \"topic_id\": 7183, \"forum_id\": 43, \"post_subject\": \"Re: DataMgmt GenData.WriteData problem\", \"username\": \"lpezet\", \"post_text\": \"I'm using the example(s) from the github page:\\n\\nIMPORT DataMgmt;\\nMyRecLayout := { STRING name, UNSIGNED1 age };\\nnewData := DATASET([{'Bill', 35}], MyRecLayout);\\nDataMgmt.GenData.WriteData('~my_data_store', newData);
\\n\\nWhen I click on the "Check" button in the IDE, I get that error message I mentioned earlier.\\n\\nThanks!\", \"post_time\": \"2019-07-24 13:40:20\" },\n\t{ \"post_id\": 27003, \"topic_id\": 7183, \"forum_id\": 43, \"post_subject\": \"Re: DataMgmt GenData.WriteData problem\", \"username\": \"DSC\", \"post_text\": \"The error you cited is an internal parser error, which is itself is pretty unusual. Can you post the ECL you wrote that called into DataMgmt.GenData.WriteData()? That may give me a clue as to what is going wrong.\", \"post_time\": \"2019-07-23 20:19:01\" },\n\t{ \"post_id\": 26973, \"topic_id\": 7183, \"forum_id\": 43, \"post_subject\": \"DataMgmt GenData.WriteData problem\", \"username\": \"lpezet\", \"post_text\": \"Hello!\\n\\nNot sure where to ask for this, here it is.\\nCan I use DataMgmt ECL Bundle (https://github.com/hpcc-systems/DataMgmt) when using ClientTools/Platform 6.4.x?\\nInstallation didn't seem to complain (and site does say "HPCC 6.0.0 or later is required."), yet when I compile GenData.WriteData, I get the following error:\\n\\nError: assert(scope) failed - file: hqlexpr.cpp, line 9904 (7, 10), 3000, \\n
\\nGenData.Init works just fine.\\n\\nI tried with ClientTools 6.4.6 and 6.4.40. \\nAny idea?\\n\\nThanks!\", \"post_time\": \"2019-07-23 16:52:36\" },\n\t{ \"post_id\": 29973, \"topic_id\": 7913, \"forum_id\": 43, \"post_subject\": \"Re: Cannot install GNN bundles\", \"username\": \"bforeman\", \"post_text\": \"Have you installed any of the other bundles, or is this specific to GNN? In our training classroom I saw this error while trying to install the ML_Core and others. I eventually had to download the image from the GitHub site and then manually extracted the folder to the bundle location, and then I was able to use it. In other words, I had to bypass the Git method of installing on some machines. I think this might be an issue where the environment path is not pointing to the DLL location. \\nWindows 10, right?\\nRegards,\\n\\nBob\", \"post_time\": \"2020-03-30 15:19:32\" },\n\t{ \"post_id\": 29963, \"topic_id\": 7913, \"forum_id\": 43, \"post_subject\": \"Cannot install GNN bundles\", \"username\": \"pdutta3\", \"post_text\": \"Whenever I try to install GNN from client tools I get an error like "cannot find MVCR". However, I have reinstalled MVCR latest version and also the ECL client tools again. The snapshot of the detailed error is attached herewith.\", \"post_time\": \"2020-03-27 20:38:38\" },\n\t{ \"post_id\": 4800, \"topic_id\": 1083, \"forum_id\": 44, \"post_subject\": \"Webex: SALT Profiling\", \"username\": \"HPCC Staff\", \"post_text\": \"This video includes a tutorial on how to do SALT Profiling along with an example use case.\\n\\nhttps://reedelsevier.webex.com/reedelse ... cba8ec846e\\n\\nPresented by Jill Luber, Sr Architect.\", \"post_time\": \"2013-04-05 17:30:44\" },\n\t{ \"post_id\": 5192, \"topic_id\": 1084, \"forum_id\": 44, \"post_subject\": \"Re: New SALT tool for quickly finding best threshold.\", \"username\": \"tlhumphrey2\", \"post_text\": \"This tool has been added to SALT 2.9 which is now available as Beta 1. The tool is called MAC_MatchSamplePatterns. It is available in the BWR_iterate module as commented code.\\n\\nI haven't tried it since becoming part of SALT. But, it should work just like I have previously described it.\", \"post_time\": \"2014-02-10 15:26:00\" },\n\t{ \"post_id\": 4817, \"topic_id\": 1084, \"forum_id\": 44, \"post_subject\": \"Re: New SALT tool for quickly finding best threshold.\", \"username\": \"tlhumphrey2\", \"post_text\": \"If anyone wants a copy of MAC_CreatorOf_MAC_fmmpOfMatchSample, email me and I'll email it to you. My email is timothy.humphrey@lexisnexis.com\", \"post_time\": \"2013-10-23 14:14:44\" },\n\t{ \"post_id\": 4802, \"topic_id\": 1084, \"forum_id\": 44, \"post_subject\": \"Re: New SALT tool for quickly finding best threshold.\", \"username\": \"tlhumphrey2\", \"post_text\": \"I have created a macro that creates the tool I previously described in this thread, which is now called "MAC_fmmpOfMatchSample" (fmmp in the name stands for "field match mismatch pattern"). The name of the macro that creates this tool is MAC_CreatorOf_MAC_fmmpOfMatchSample. It is in the module, BridgerIndividualsSaltLinking of the Boca dataland repository.\\n\\nThis macro works much like the SALT macro, MAC_Default_SPC. This is, its output is to the workunit and you need to copy and paste it into a new attribute you create in your module. The name of the new attribute MUST BE, MAC_fmmpToMatchSample.\\n\\nThe input to MAC_CreatorOf_MAC_fmmpOfMatchSample is the name of a MatchSample dataset.\", \"post_time\": \"2013-09-30 19:00:15\" },\n\t{ \"post_id\": 4801, \"topic_id\": 1084, \"forum_id\": 44, \"post_subject\": \"New SALT tool for quickly finding best threshold.\", \"username\": \"tlhumphrey2\", \"post_text\": \"This is for those doing or will be doing internal linking using SALT. I have made a tool that enables one to quickly decide what conf value would make a good threshold for clustering entities.\\n\\nThe tool is able to speedup the process of determining what conf value makes a good threshold because its field match mismatch string patterns (field fmmp in the tools output) basically summaries the field matches and mismatches in a single string pattern. Furthermore, the string of field matches and mismatches are ordered from left to right, where the most important fields (those with the highest specificity) are on the left. \\n\\nI have this tool in the boca dataland repository in\\nBridgerIndividualsSaltLinking.MatchSampleFieldMatchMismatchPatterns.\\nThe workunit, W20130830-093152, shows an example of how to use the tool and what it produces. In addition, there are comments at the top of the attribute that will further help you.\\n\\nAs a final note, I have requested this tool to be an enhancement to SALT and hopefully one day SALT will generate this code for you. But, currently it doesn’t.\", \"post_time\": \"2013-08-30 16:17:36\" },\n\t{ \"post_id\": 4806, \"topic_id\": 1085, \"forum_id\": 44, \"post_subject\": \"Re: SALT POPULATION can be larger than RECORDS\", \"username\": \"tlhumphrey2\", \"post_text\": \"What I'm doing is purely experimental. My "In" file is just those individuals that have been placed on a watchlist, specifically those on World Compliance's combined watchlist.\\n\\nIt is true that the specificities can be much less accurate when the "In" file is not representative of the total population. And, if the "In" file is much smaller than the total population, you would expect the specificities to be incurate. But, we don't know how this will affect the linking process. We don't know where the cutoff is, i.e. how small is too small.\\n\\nWe do know this, you can use just field average specificities in linking, i.e. no field value specificities are used. And, we know that averages taken from a relatively small sample can be fairly close to the population averages. So, it makes since to me that good linking is possible even with a relatively small "In" file.\", \"post_time\": \"2013-10-11 13:34:28\" },\n\t{ \"post_id\": 4805, \"topic_id\": 1085, \"forum_id\": 44, \"post_subject\": \"Re: SALT POPULATION can be larger than RECORDS\", \"username\": \"bforeman\", \"post_text\": \"Tim, Tony just replied to me:\\n\\nWell this would be a highly unusual situation that should only occur if you are working with a sample of data. Possibly Tim has some other circumstance with his entity. Typically in a SALT application your goal is to have significant coverage with your data SALT, the whole idea of a statistical linking method is based on this. If you do not have full coverage there will be issues, in SALT, the specificities will not be as accurate. Working with samples in SALT can be problematic. POPLULATION is used in calculating the default match threshold along with NINES and RECORDS. Even when using a sample you should always set these numbers to the full amount of data available in order to get the threshold set properly. You should also calculate specificities on the full data. The THRESHOLD statement is the recommended method to override the default threshold when needed.\\n\\n\\nHTH,\\n\\nBob\", \"post_time\": \"2013-10-11 12:44:21\" },\n\t{ \"post_id\": 4804, \"topic_id\": 1085, \"forum_id\": 44, \"post_subject\": \"Re: SALT POPULATION can be larger than RECORDS\", \"username\": \"bforeman\", \"post_text\": \"Thanks for the feedback Tim, I have forwarded the info to our documentation man (Tony).\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2013-10-11 12:13:07\" },\n\t{ \"post_id\": 4803, \"topic_id\": 1085, \"forum_id\": 44, \"post_subject\": \"SALT POPULATION can be larger than RECORDS\", \"username\": \"tlhumphrey2\", \"post_text\": \"I found out the other day, from David Bayliss, that POPULATION (a statement in SALT .spc files) can be larger than RECORDS (another statement in SALT .spc files). I mention this because 1) the SALT User Guide does not mention it; 2) for a SALT linking project I'm currently working on, the recommended match threshold was way low; and 3) the POPULATION statement of my .spc file was way low.\\n\\nIf most everyone is covered by the records of your "In" file then POPULATION will be smaller than RECORDS. But, if the number of records in the "In" file is small and does not represent the total population you want linking to work, then the POPULATION will be larger than RECORDS.\", \"post_time\": \"2013-10-10 13:49:57\" },\n\t{ \"post_id\": 4814, \"topic_id\": 1088, \"forum_id\": 44, \"post_subject\": \"Re: SALT question on BESTTYPE\", \"username\": \"tmiddleton\", \"post_text\": \"The EXTEND method on BESTTYPE extends those fields for matching which are part way there. For example if you are matching FRITCH to FRITSCH-MIDDLETON, and have specified LONGEST as the construction method on BESTTYPE, and EXTEND as the propagation method, FRITSCH will be replaced with FRITSCH-MIDDLETON during matching.\", \"post_time\": \"2013-10-23 13:21:48\" },\n\t{ \"post_id\": 4813, \"topic_id\": 1088, \"forum_id\": 44, \"post_subject\": \"SALT question on BESTTYPE\", \"username\": \"john holt\", \"post_text\": \"Does the EXTEND propagation method also work with HYPHEN1?\", \"post_time\": \"2013-10-22 13:38:53\" },\n\t{ \"post_id\": 5181, \"topic_id\": 1191, \"forum_id\": 44, \"post_subject\": \"SALT Online Courses now available\", \"username\": \"HPCC Staff\", \"post_text\": \"The Introductory to SALT and Advanced SALT courses are now available in the Learning Management System at http://learn.lexisnexis.com/hpcc. The Introductory course requires completion of the Introduction to Thor online course. The Advanced course requires completion of the Introduction to SALT online course. \\n\\nReminder: Online courses are free to Reed Elsevier, LexisNexis employees, and partners and should use the approved promo code provided by their manager. Contact training@hpccsystems.com if you need assistance.\", \"post_time\": \"2014-02-06 15:50:56\" },\n\t{ \"post_id\": 5699, \"topic_id\": 1308, \"forum_id\": 44, \"post_subject\": \"Re: SALT External Linking\", \"username\": \"ksviswa\", \"post_text\": \"Thanks a lot David & Tim..\\n\\nYeah probably i need to attend an online course as most of my work is going to be in SALT.\\n\\nRegarding the child datasets linking what i meant was :\\n\\nSay for an example : A person has many books.\\n\\n\\n<person>\\n\\t<personid>1</personid>\\n\\t<book>\\n\\t\\t<bookid>1</bookid>\\n\\t\\t<booktitle>abc</booktitle>\\n\\t</book>\\n\\t<book>\\n\\t\\t<bookid>2</bookid>\\n\\t\\t<booktitle>def</booktitle>\\n\\t</book>\\n\\t<book>\\n\\t\\t<bookid>3</bookid>\\n\\t\\t<booktitle>ghi</booktitle>\\n\\t</book>\\n</person>\\t\\n
\\n\\nAttached a sample record.\\n\\nNow in case I need to link with another file with bookid field , How do I specify the same in the linkpath ? \\n\\nDo we need to normalize the base data and then specify the "bookid" field in the linkpath or can we specify book.bookid something of this type ?\\n\\nKindly excuse me if its a lame doubt, new to SALT and exploring different ways.\\n\\nThanks,\\nViswa\", \"post_time\": \"2014-05-15 20:25:26\" },\n\t{ \"post_id\": 5698, \"topic_id\": 1308, \"forum_id\": 44, \"post_subject\": \"Re: SALT External Linking\", \"username\": \"dabayliss\", \"post_text\": \"Hey there,\\n\\nWe have some excellent online training classes that might help with this. In essence the way to think of it is this:\\n\\nINTERNAL linking is the way to cluster together all of -your- data that you can. We have internal linking processes with hundreds of consituent files - this is your header file.\\n\\nEXTERNAL linking gives you a way to append an ID to an EXTERNAL file that tells you which cluster it would be in IF you could add it to the header file. We called it external linking because it was for 'data we couldn't keep hold of'\\n\\nREMOTE linking is for the situation where you have two files NEITHER of which you can put into the header, and you cannot perform external linking on either of them.\\n\\nThe accuracy of the process is in the order listed; ie INTERNAL is best.\\n\\nEXTERNAL files are files you have appended an ID to for whatever purpose. At attributefile is an EXTERNAL file which you ALSO have fed back into either an internal or external linking process.\\n\\nSuppose you have a header file of names and addresses from 20 sources. You then get a file come in that has some interesting data. You (at least initially) externally link the new file (you don't trust it enough to put it in the header yet). The new files as some data on it that none of your other files have.\\n\\nSomeone then says - I want to search ALL of your data -but using the information ONLY on that one file. You don't want to add the 'weird' data to your ehader (most of the columns would be blank) so instead you add the file as an attribute file. This 'attribute file' adds a new column (attribute) that can be used in linkpaths (or in internal linking) but without actually having the column on each record.\\n\\nINITIAL matches the leading portion of a string to a fuller string. D = DAVID or DAV = DAVID\\n\\nABBR allows for one string to be fully contained (in the same sequence) in another: SGT = SerGeanT\\n\\nI suspect the child dataset case you are looking for is an attribute file with ,LIST specified.\\n\\nSeriously - is usually used after an fairly intensive 3 day training course - it is available online. If SALT is going to consume a chunk of your time - you need the course \", \"post_time\": \"2014-05-15 18:24:57\" },\n\t{ \"post_id\": 5697, \"topic_id\": 1308, \"forum_id\": 44, \"post_subject\": \"Re: SALT External Linking\", \"username\": \"tlhumphrey2\", \"post_text\": \"Viswa,\\n\\nI'm not qualified to answer your questions. But, if you haven't has a SALT class, I think you would benefit from attending one.\", \"post_time\": \"2014-05-15 18:19:50\" },\n\t{ \"post_id\": 5694, \"topic_id\": 1308, \"forum_id\": 44, \"post_subject\": \"SALT External Linking\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nHave few clarifications regarding external linking.\\n\\n1.) The external linking result will just tell which all records could be linked to the base file ..? \\n\\nIncase we want a collated result of both the files we need to perform a join separately based on the entity id ( base file ) and the unique id ( external file ) based on the linking result.\\n\\nFor Ex :\\n\\nConsider a base data file "abc" with these fields \\n\\nfield1 field2 field3 field4\\n123 abc xxx xyz\\n\\nExternal File With these fields :\\n\\nfield_1 field_2 field_3 field_4\\n1 xxx def zzz \\n\\nLinking of internal file and external file is based on field3 and field_2 respectively.\\n\\nDo we get the result of this format after linking ?\\n\\nfield1 field2 field3 field4 field_1 field_2 field_3 field_4\\n123 abc xxx xyz 1 xxx def zzz\\n\\n2.) What exactly is the difference between "External Files" and "Attribute Files"\\n\\n3.) If the base internal file and external file do not have many similarities , which is better to use External linking or remote linking ?\\n\\n4.) Can anybody explain with an example of "INITIAL , ABBR " which we specify in the FIELD statement in the specification file.\\n\\nEx : if "abc def" is present as abbr / initial , abc / def alone should be matched Is that correct ?\\n\\n5.) How do we link child datasets from the base file to a field in the external file ? Can anybody provide an example..?\\n\\nKindly advise.\\n\\nThanks a lot in advance.\\n\\nThanks,\\nViswa\", \"post_time\": \"2014-05-15 14:55:13\" },\n\t{ \"post_id\": 5872, \"topic_id\": 1349, \"forum_id\": 44, \"post_subject\": \"Re: Joins in SALT\", \"username\": \"ksviswa\", \"post_text\": \"Thanks a lot Tony and Tim..\", \"post_time\": \"2014-06-12 09:02:58\" },\n\t{ \"post_id\": 5870, \"topic_id\": 1349, \"forum_id\": 44, \"post_subject\": \"Re: Joins in SALT\", \"username\": \"tlhumphrey2\", \"post_text\": \"The JOINs that SALT does are fairly complicated.\\n\\nIt might help you to read the wilipedia article on Record Linkage: http://en.wikipedia.org/wiki/Record_linkage. In that article, they talk about a technique called blocking. They said
Blocking attempts to restrict comparisons to just those records for which one or more particularly discriminating identifiers agree, which has the effect of increasing the positive predictive value (precision) at the expense of sensitivity (recall)
.\\n\\nMany of the JOINs in SALT are for blocking, that is to reduce the number of pairs of records that need to be compared without missing pairs of records that should be in the same cluster (i.e. without hurting recall).\\n\\nTo see exact details of the JOINs that SALT does, look at the attribute "matches" created by SALT.\", \"post_time\": \"2014-06-11 17:41:20\" },\n\t{ \"post_id\": 5869, \"topic_id\": 1349, \"forum_id\": 44, \"post_subject\": \"Re: Joins in SALT\", \"username\": \"tmiddleton\", \"post_text\": \"Hi,\\n\\nIn SALT all fields are initially considered independent, and specificities (weightings) for mapping are calculated independently for each field. However, as with most data, some fields have dependent relationships with other fields. For example, zip code and zip4. Names which are divided into individual name parts, and addresses also can be considered a dependent group of fields. SALT allows you to use a CONCEPT statement in your specification file to define this dependent group so the individual fields will not be overweighted during matching. So a CONCEPT for a full person name might appear as follows:\\n\\nCONCEPT:FULLNAME:FNAME:MNAME:LNAME:BAGOFWORDS:18,366\\n\\nNote that specificities are also calculated for CONCEPTs based on your data and should be added to the specification definition before linking.\\n\\nThe BAGOFWORDS option is what can solve your issue. It allows the individual fields of a CONCEPT to be matched in any order from record to record. BAGOFWORDS can also be used on a single string field, and allows the words in strings from two records to be matched in any order.\", \"post_time\": \"2014-06-11 17:12:49\" },\n\t{ \"post_id\": 5868, \"topic_id\": 1349, \"forum_id\": 44, \"post_subject\": \"Joins in SALT\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nIn SALT , based on the specification file what type of joins are performed ?\\n\\nIs it an "AND" join with all the fields specified in the specification file taken in consideration or an "OR" Join ?\\n\\nI have specified all the fields in the specification file.\\n\\nFor Ex :\\n\\nI have a dataset with following fields :\\n\\n\\n\\nfname lname id\\nabc def 123\\na def 124\\nabc d 125\\ndef abc 123\\n\\n
\\n\\nBased on SALT specification file, i will be able to match the first 3 records in to a single entity , but the fourth record and the first record are the same with names interchanged Can they be linked using the SALT Internal Linking process.\\n\\nThanks a lot in advance..\\n\\nRegards,\\nViswa\", \"post_time\": \"2014-06-11 16:10:56\" },\n\t{ \"post_id\": 6028, \"topic_id\": 1371, \"forum_id\": 44, \"post_subject\": \"Re: SALT Error\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nThanks a lot bob.\\n\\nI did have very few fields , wanted to analyse for smaller set of records , how linking happens and then try the same for larger sets of data.\", \"post_time\": \"2014-07-11 11:26:51\" },\n\t{ \"post_id\": 5976, \"topic_id\": 1371, \"forum_id\": 44, \"post_subject\": \"Re: SALT Error\", \"username\": \"bforeman\", \"post_text\": \"Regarding your other issues, after consulting with the SALT gurus:\\n\\nThe minimum default specificity required for matching cannot be overridden. You may be trying to use SALT on data that is not in SALT’s "sweet spot". SALT is a statistical matching system (specificity), and the specificity characteristics of the data (number of fields, amount of specificity) need to be at a certain level for this to work. However, it could be some other problem, did you assign unique RIDs to every record? Make sure POPULATION, NINES, and RECORDS are set appropriately.\\n\\nPOPULATION is an educated guess at first it should reflect the number of expected entities (like unique people) in the data.\\n\\nIf you are having to lower the matching threshold using the THRESHOLD statement, there is probably something else wrong. Sounds like the SPC file is not right somewhere, or you have lots of low specificity data for the fields defined. i.e. everyone is named "John Smith".\\n\\nHope this helps,\\n\\nBob\", \"post_time\": \"2014-06-27 07:11:56\" },\n\t{ \"post_id\": 5974, \"topic_id\": 1371, \"forum_id\": 44, \"post_subject\": \"Re: SALT Error\", \"username\": \"bforeman\", \"post_text\": \"Well, before we go deeper down the rabbit hole it's possible that you have too few fields and a small amount of data with lots of duplicates as well. SALT is not designed for this (like killing a mosquito with a sledgehammer) so we may need to take a look at your data.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2014-06-26 13:33:34\" },\n\t{ \"post_id\": 5968, \"topic_id\": 1371, \"forum_id\": 44, \"post_subject\": \"SALT Error\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nI was trying to run SALT for a very small set of files, encountered this error :\\n\\nError : \\n(29):No match joins created! Specificities are too low for matching\\n\\nCan we override the minimum default specificity value for matching ?\\n\\nFew Other Clarifications :\\n\\n1.) How do we determine the value for POPULATION in the specification file, as i dont know how many records would be linked and how many would not be linked. ?\\n\\n2.) Each time we change the threshold for each iterations during internal linking , we get additional matches.. How long we go on changing the threshold as the matches are all valid matches or we just specify a very low threshold in the config file ?\\n\\nfor ex : fuzzy name match and direct email match , we know that it belongs to a particular entity because there is a direct email match.\\n\\nKindly suggest.\\n\\nThanks and Regards,\\nViswa\", \"post_time\": \"2014-06-25 17:48:38\" },\n\t{ \"post_id\": 6043, \"topic_id\": 1381, \"forum_id\": 44, \"post_subject\": \"Re: SALT Error : Watchdog has lost contact\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nThanks a lot bob. Yeah it seems like it was an hardware issue in the cluster.\\n\\nRegards,\\nViswa\", \"post_time\": \"2014-07-14 11:38:53\" },\n\t{ \"post_id\": 6041, \"topic_id\": 1381, \"forum_id\": 44, \"post_subject\": \"Re: SALT Error : Watchdog has lost contact\", \"username\": \"bforeman\", \"post_text\": \"That error usually means that a node went down for some reason. Basically a hardware or platform issue, not a specific SALT issue. When it happens on our own cluster, we ask operations to look at the specific node and the system logs there to see what occurred.\\n\\nHTH,\\n\\nBob\", \"post_time\": \"2014-07-12 18:58:18\" },\n\t{ \"post_id\": 6029, \"topic_id\": 1381, \"forum_id\": 44, \"post_subject\": \"SALT Error : Watchdog has lost contact\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nIs it possible to get this error when we run SALT Internal linking for a large set of input data ? \\n\\nCurrently i have around 650 GB of Input File with ~200 Million records and running in a 50 node cluster and am trying to do internal linking based on email.\\n\\nSample Input File \\n\\n
\\n\\nentity_id unique_id lname fname email title keywords \\n1 1 abc def abc@def.com title1 test1,test2\\n2 2 sdd fgh sdd@fgh.com title2 test3,test4\\n\\n
\\n\\nI don't have access to the logs file. I could find only DAFILESRV related log files.\\n\\nNot sure if that could be of some help.\\n\\nKindly suggest.\\n\\nThanks,\\nViswa\", \"post_time\": \"2014-07-11 11:40:47\" },\n\t{ \"post_id\": 6131, \"topic_id\": 1392, \"forum_id\": 44, \"post_subject\": \"Re: SELF Join taking lot of time\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nThanks a lot Bob and Tony.\\n\\nI really appreciate the help and support.\\n\\nI have run the specificities initially and added the corresponding specificity values before internal linking.\\n\\nI am not sure if adding other fields would help much. Not sure though.\\n\\nI am trying to link 2 authors from 2 different sources. They could have the same article published in 2 different sources and i could use some additional parameters/fields for linking but they could have different articles published too in 2 different sources. \\n\\nExample 1 : \\n\\n\\n\\nSame person with different articles published : \\n\\nJohn Smith has published a article in source1 with DOI : 123 , ISSN : 2345 , Title : "abc" , Publication Year : 2000\\n\\nJohn Smith has published a article in source2 with DOI : 125 , ISSN : 7778 , Title : "DEF" , Publication Year : 2010\\n\\n
\\n\\nExample 2 :\\n\\n\\n\\nSame person with same articles published :\\n\\nJohn Smith has published a article in source1 with DOI : 123 , ISSN : 2345 , Title : "abc" , Publication Year : 2000\\n\\nJohn S has published a article in source2 with DOI : 123 , ISSN : 2345 , Title : "abc" , Publication Year : 2000\\n\\n
\\n\\nI am little confused if using other fields will help in the internal linking process in this scenario, hence considering only email , first name and last name for linking presently.\\n\\nAdditional Fuzzy matching on the name fields like EDIT could be used but there could be ambiguities right ?\\n\\nEx : \\n\\nJohn Smith may not be the same as Jon Smith , they could be 2 different authors.\\nZei Zhui may not be the same as Zei Zhiu.\\n\\n\\nI will tweak my specification file for name fields to not include ABBR and BAGOFWORDS for the name fields.\\n\\nKindly suggest.\\n\\nThanks,\\nViswa\", \"post_time\": \"2014-07-29 10:14:58\" },\n\t{ \"post_id\": 6111, \"topic_id\": 1392, \"forum_id\": 44, \"post_subject\": \"Re: SELF Join taking lot of time\", \"username\": \"bforeman\", \"post_text\": \"Hi Viswa,\\n\\nWell, the new definition should work better, this gives you 3 fields which will be used for matching: email, fname, and lname, and you accounted for the dependency of fname and lname with the fullname CONCEPT. Have you run specificities and added the specificity values to the specification file before you run internal linking? Also data profiling will help understand how well these fields are populated and the specificities will help determine how suitable the data is for matching. It seems like there are other fields there that could be used in the matching process.\\n\\nA key to using internal linking successfully is understanding the data and its relationships. If the entity we are linking is “person”, then we need to identify all the attributes of person that are relevant to matching, and these need to be defined in the specification file. Internal linking is not really something you build up a field at a time.\\n\\nYou may need some additional fuzzy matching on the name fields. Here is fuzzy matching we use for the person name in the insurance header:\\n\\nFIELD:FNAME:PROP:EDIT1:INITIAL:PreferredName:FORCE(--):9,130\\nFIELD:MNAME:PROP:INITIAL:EDIT2:6,137\\nFIELD:LNAME:PROP:INITIAL:HYPHEN2:EDIT2:11,221\\nCONCEPT:MAINNAME:FNAME:MNAME:LNAME:BAGOFWORDS:18,366\\n\\nI think you are misusing ABBR on the name fields and should take it off. It is meant for cases where something like IBM would match International Business Machines. Also not sure if you need BAGOFWORDS on the fname and lname fields unless it is likely that these fields contain multiple words that can be out of order. But you probably need EDIT1 or EDIT2 depending on how loose you want the fuzzy matching to be.\\n\\nI still think you may need more “facts” about your entity. For example, what if the name is John Smith and email address is blank?\\n\\nRegards,\\n\\nBob and Tony\", \"post_time\": \"2014-07-28 17:02:04\" },\n\t{ \"post_id\": 6110, \"topic_id\": 1392, \"forum_id\": 44, \"post_subject\": \"Re: SELF Join taking lot of time\", \"username\": \"ksviswa\", \"post_text\": \"Thanks a lot Bob and Tony.\\n\\nThere were multiple email address in the initial input file with the same email address in many records and hence the self join was taking a considerable amount of time.\\n\\nInitial Specification Definition:\\n\\n\\nMODULE:abc\\nOPTIONS:-gs2\\nFILENAME:Sample\\nIDFIELD:EXISTS:entity_id\\nRIDFIELD:unique_id\\nRECORDS:190000000\\nPOPULATION:50000000\\nNINES:3\\nFIELDTYPE:DEFAULT:LEFTTRIM:NOQUOTES("'):\\nFIELDTYPE:NUMBER:ALLOW(0123456789.-s):\\nFIELDTYPE:ALPHA:CAPS:ALLOW(ABCDEFGHIJKLMNOPQRSTUVWXYZ):\\nFIELDTYPE:WORDBAG:ALLOW(abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'):SPACES( <>{}[]-^=!+&,./@_£$%*):ONFAIL(IGNORE):\\nFIELDTYPE:TITLE:ALLOW(abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'()<>{}[]-^=!+&,./@_£$%*\\\\):ONFAIL(IGNORE):\\nFIELD:email:LIKE(TITLE):0,0\\nFIELD:lname:CARRY:\\nFIELD:fname:CARRY:\\nFIELD:id:CARRY:\\nFIELD:user_id:CARRY:\\nFIELD:id1:CARRY:\\nFIELD:id2:CARRY:\\nFIELD:ISSN:CARRY:\\nFIELD:year:CARRY:\\nFIELD:doi:CARRY:\\nFIELD:title:CARRY:\\nFIELD:Pub_id:CARRY:\\nFIELD:keywords:CARRY:\\nFIELD:source:CARRY:\\n\\n
\\n\\nInput File Layout :\\n\\n\\n\\nLayout_Sample := RECORD\\n unsigned8 unique_id;\\n unsigned8 entity_id;\\n string300 lname;\\n string300 fname;\\n string255 email;\\n unsigned7 id;\\n unsigned7 uer_id;\\n unsigned7 id1;\\n string40 id2;\\n string50 issn;\\n unsigned2 year;\\n string255 doi;\\n string1000 title;\\n string50 pub_id;\\n string1000 keywords;\\n string25 source;\\nEND;\\n\\n
\\n\\nThe initial layout and specification definition which i had used for the same. Looks like i need to tweak the specification file and add other features along with email probably some fuzzy name match and direct email link.\\n\\nNew Specification File :\\n\\n\\n\\n\\nMODULE:abc\\nOPTIONS:-gs2\\nFILENAME:Sample\\nIDFIELD:EXISTS:entity_id\\nRIDFIELD:unique_id\\nRECORDS:190000000\\nPOPULATION:50000000\\nNINES:3\\nFIELDTYPE:DEFAULT:LEFTTRIM:NOQUOTES("'):\\nFIELDTYPE:NUMBER:ALLOW(0123456789.-s):\\nFIELDTYPE:ALPHA:CAPS:ALLOW(ABCDEFGHIJKLMNOPQRSTUVWXYZ):\\nFIELDTYPE:WORDBAG:ALLOW(abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'):SPACES( <>{}[]-^=!+&,./@_£$%*):ONFAIL(IGNORE):\\nFIELDTYPE:TITLE:ALLOW(abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'()<>{}[]-^=!+&,./@_£$%*\\\\):ONFAIL(IGNORE):\\nFIELD:email:LIKE(TITLE):0,0\\nFIELD:lname:BAGOFWORDS:LIKE(WORDBAG):TYPE(STRING300):INITIAL:ABBR:HYPHEN1:0,0\\nFIELD:fname:BAGOFWORDS:LIKE(WORDBAG):TYPE(STRING300):INITIAL:ABBR:HYPHEN1:0,0\\nFIELD:id:CARRY:\\nFIELD:user_id:CARRY:\\nFIELD:id1:CARRY:\\nFIELD:id2:CARRY:\\nFIELD:ISSN:CARRY:\\nFIELD:year:CARRY:\\nFIELD:doi:CARRY:\\nFIELD:title:CARRY:\\nFIELD:Pub_id:CARRY:\\nFIELD:keywords:CARRY:\\nFIELD:source:CARRY:\\nCONCEPT:fullname:source_authors_lname:source_authors_fname:BAGOFWORDS:0,0\\n
\\n\\nI will share the results soon based on this new specification definition.\\n\\nKindly provide your suggestions on the new specification file. \\n\\nThanks a lot in advance.\\n\\nViswa\", \"post_time\": \"2014-07-28 11:19:51\" },\n\t{ \"post_id\": 6071, \"topic_id\": 1392, \"forum_id\": 44, \"post_subject\": \"Re: SELF Join taking lot of time\", \"username\": \"bforeman\", \"post_text\": \"Hi Viswa,\\n\\nWell, this self-join warning is an ECL warning, and is clearly related to the data that you are using.\\n\\nThe real clue here is that your specification file contains only 1 linkable field (assuming this is email) and that all the other fields are carried. I am surprised you are not getting a warning out of the SALT compile about not having enough specificity for internal linking. Make sure you check the GenerationDocs definition to be sure there are no SALT errors or warnings in there related to your specification file.\\n\\nThis is really not an appropriate use of SALT, if you only have 1 field to match on between records, you don’t need SALT anyway, you can just use a join (it would be a self-join since you are linking records in the same file, and not sure you would want much fuzzy matching on an email address), and then you don’t need all the SALT overhead. SALT is a statistical method and relies on having multiple facts about the entity where the average specificity adds up to enough to insure statistically significant results, that is the idea of specificities. You should always include your specification definition when posting a SALT question. The input file layout is also very helpful.\\n\\nAgain SALT does matching by creating self-joins based on field combinations of matching fields which add up to enough specificity to insure a statistically significant match based on the data. If you were doing this without SALT, you would be getting the same result on a self-join.\\n\\nTypically the reason a self-join will take a long time, is that you have a lot of duplicates on the join key. For example if there were a thousand records in the file with the same email addresses value, a self-join would end up creating 1000*1000 or a million results. If there are multiple email addresses in the file with a lot of records, it gets worse. If there were 10000 records with the same email address, well you can see what would happen and why it takes a long time.\\n\\nOne way to check the file is to run a simple crosstab report on the file using the email address, like:\\n\\nf := mydata;\\n\\nlayout_stat := record\\nf.email_address;\\ncnt := COUNT(GROUP);\\nend;\\n\\nf_stat := TOPN(TABLE(f, layout_stat, email_address), 1000, -cnt);\\n\\n
\\nIf you have many email values where the cnt is >1000, unless you can filter them out, it means long run times. SALT uses something called BasicMatch which attempts to pre-link the duplicates and avoid self-join problems in the match self-joins.\\n\\nHope this helps!\\n\\nBob (and Tony Middleton)\", \"post_time\": \"2014-07-21 14:12:58\" },\n\t{ \"post_id\": 6061, \"topic_id\": 1392, \"forum_id\": 44, \"post_subject\": \"SELF Join taking lot of time\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nType of Job : SALT Internal Linking \\n\\nI have many fields in the specification file around 17 fields with email being one of the fields and the only field used for linking and the rest are carried and specificities are not calculated for others. \\n\\nInitial Data File Size : ~630 GB , 191 Million Records.\\n\\nSample Data\\n\\n\\nemail name1 name2 field1 field2 field3 field4 field5\\nabc@1.com aaa bb xyz xyz xyz abc xyz\\n12@abc.com abc def ajk qw12 aqws aaa bcd\\n
\\n\\nI ran the internal linking for the first iteration, the job took around 2 hours to complete.\\n\\nI tried the second iteration by modifying the threshold slightly for additional linking , get this warning message and the job was continuously running for hours without giving any results. Had to abort the same after 6 to 7 hours.\\n\\nI have tried multiple iterations for smaller datasets and was able to get the corresponding results.\\n\\n\\nWarning Message :\\n\\n-1: Graph[236], selfjoin[238]: SELFJOIN: Warning 578287 preliminary matches, join will take some time\\n\\n
\\n\\nKindly help regarding the same.\\n\\nThanks,\\nViswa\", \"post_time\": \"2014-07-17 16:01:19\" },\n\t{ \"post_id\": 7961, \"topic_id\": 1786, \"forum_id\": 44, \"post_subject\": \"Re: Salt Tools Error\", \"username\": \"leonarta\", \"post_text\": \"SALTTOOLS30.mac_Patch_SPC and SALTTOOLS30.mod_Soapcalls currently depend on the HPCC system in use having a central MySQL repository. SALT in general does not require this.\\n\\nThese macros implement a means of automatically updating the specificity and switch values associated with FIELD declarations in one's spec file. They use a SOAPCALL to retrieve the spec file attribute from said repository, parse the spec file and tweak its values in memory, and then optionally write it back to the repository with another SOAPCALL.\\n\\nI've submitted a SALT issue to investigate whether we can improve this, either by issuing a clear error message or ideally adding support for a non-central repository. (Issue #1421 in GitHub)\", \"post_time\": \"2015-07-29 21:25:08\" },\n\t{ \"post_id\": 7960, \"topic_id\": 1786, \"forum_id\": 44, \"post_subject\": \"Re: Salt Tools Error\", \"username\": \"iMikePayne\", \"post_text\": \"Does this not work on cloud environments? Copying and pasting is very tedious.\", \"post_time\": \"2015-07-29 18:58:57\" },\n\t{ \"post_id\": 7897, \"topic_id\": 1786, \"forum_id\": 44, \"post_subject\": \"Re: Salt Tools Error\", \"username\": \"iMikePayne\", \"post_text\": \"Even after removing that, anytime I try to use the SALTTOOLS I get this error:\\n\\nError: System error: -1: Graph[723], SOAP_rowdataset[724]: SLAVE #1 [10.0.1.133:6600]: <Error><text>socket not opened Target: C!10.242.70.7, Raised in: /var/lib/jenkins/workspace/LN-Candidate-withplugins-5.2.4-1/LN/centos-6.4-x86_64/HPCC-Platform/system/jlib/jsocket.cpp, line 1511</text><url>http://10.242.70.7:18145/WsAttributes</url></Error>,
\", \"post_time\": \"2015-07-09 18:33:12\" },\n\t{ \"post_id\": 7845, \"topic_id\": 1786, \"forum_id\": 44, \"post_subject\": \"Re: Salt Tools Error\", \"username\": \"bforeman\", \"post_text\": \"Mike, you should be able to edit that file on the cluster, just comment out the #OPTION statement at the top.\\n\\nBob\", \"post_time\": \"2015-06-26 18:46:06\" },\n\t{ \"post_id\": 7844, \"topic_id\": 1786, \"forum_id\": 44, \"post_subject\": \"Re: Salt Tools Error\", \"username\": \"iMikePayne\", \"post_text\": \"REIL 100 10.242.70.7:19010 and REIL 40 10.242.70.7:18010\", \"post_time\": \"2015-06-26 18:29:56\" },\n\t{ \"post_id\": 7843, \"topic_id\": 1786, \"forum_id\": 44, \"post_subject\": \"Re: Salt Tools Error\", \"username\": \"bforeman\", \"post_text\": \"Yes, it is because of the #option line at the beginning of that module. I thought we had fixed that, it just needs to be removed or commented out. Where is this happening? What cluster?\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-06-26 18:26:51\" },\n\t{ \"post_id\": 7842, \"topic_id\": 1786, \"forum_id\": 44, \"post_subject\": \"Salt Tools Error\", \"username\": \"iMikePayne\", \"post_text\": \"Can anyone explain what this is? This is from mod_SoapCalls in salt tools\\n30.\\n\\n\\nError: Cannot associate a side effect with a module - action will be lost (4, 1), 2395, SALTTOOLS30.mod_Soapcalls
\", \"post_time\": \"2015-06-26 17:51:40\" },\n\t{ \"post_id\": 8030, \"topic_id\": 1799, \"forum_id\": 44, \"post_subject\": \"Re: Requirements for External Linking\", \"username\": \"leonarta\", \"post_text\": \"[quote="leonarta":1mdjbbuf]would you please post the entire spec file? We'll need more context to figure out what's going on here.\\n\\nAlternately, if you can't post it here for whatever reason, please just send it to me via email and we'll get to the bottom of it.\", \"post_time\": \"2015-08-23 20:31:03\" },\n\t{ \"post_id\": 8029, \"topic_id\": 1799, \"forum_id\": 44, \"post_subject\": \"Re: Requirements for External Linking\", \"username\": \"iMikePayne\", \"post_text\": \"Did someone ever report this bug? I can't access the SALT Github page. Or can someone grant me access so I can report myself?\\n\\nIt only occurs when the linkpath ends with a single optional field. If you are using more than one it works fine.\", \"post_time\": \"2015-08-23 18:29:41\" },\n\t{ \"post_id\": 7968, \"topic_id\": 1799, \"forum_id\": 44, \"post_subject\": \"Re: Requirements for External Linking\", \"username\": \"iMikePayne\", \"post_text\": \"Its not repeated. That was a mistake I made when generalizing the problem.\", \"post_time\": \"2015-07-30 14:23:11\" },\n\t{ \"post_id\": 7967, \"topic_id\": 1799, \"forum_id\": 44, \"post_subject\": \"Re: Requirements for External Linking\", \"username\": \"leonarta\", \"post_text\": \"[quote="iMikePayne":1lx7fbwc]\\n\\nLINKPATH:pathname:field1:field2:field3:field4:field5:field5:?:field6\\n
\\n\\n\\nIs field5 actually being repeated in each of the LINKPATHs? Or is that just an artifact of the genericizing here?\\n\\nIf you are repeating fields... don't. If not, would you please post the entire spec file? We'll need more context to figure out what's going on here.\", \"post_time\": \"2015-07-30 14:22:08\" },\n\t{ \"post_id\": 7966, \"topic_id\": 1799, \"forum_id\": 44, \"post_subject\": \"Re: Requirements for External Linking\", \"username\": \"iMikePayne\", \"post_text\": \"Don't have access to this page.\", \"post_time\": \"2015-07-30 14:04:59\" },\n\t{ \"post_id\": 7965, \"topic_id\": 1799, \"forum_id\": 44, \"post_subject\": \"Re: Requirements for External Linking\", \"username\": \"rtaylor\", \"post_text\": \"Mike,\\n\\nNow that you have identified the culprit, you can report the issue here: https://github.com/hpcc-systems/SALT/issues\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2015-07-30 14:04:09\" },\n\t{ \"post_id\": 7959, \"topic_id\": 1799, \"forum_id\": 44, \"post_subject\": \"Re: Requirements for External Linking\", \"username\": \"iMikePayne\", \"post_text\": \"Just an update. Has nothing to do with bag of words. Only happens when ending with an optional field. Every single time without fail.\\n\\nDoesn't work\\nLINKPATH:pathname:field1:field2:field3:field4:field5:field5:?:field6\\n
\\nDoes work\\nLINKPATH:pathname:field1:field2:field3:field4:field5:field5:field6\\n
\\nDoes work\\nLINKPATH:pathname:field1:field2:field3:field4:field5:field5:+:field6\\n
\\nDoes work\\nLINKPATH:pathname:field1:field2:field3:field4:field5:field5:?:field6:+:field7\\n
\", \"post_time\": \"2015-07-29 18:44:24\" },\n\t{ \"post_id\": 7896, \"topic_id\": 1799, \"forum_id\": 44, \"post_subject\": \"Re: Requirements for External Linking\", \"username\": \"iMikePayne\", \"post_text\": \"Does anyone know what else could be causing this?\", \"post_time\": \"2015-07-09 18:27:01\" },\n\t{ \"post_id\": 7890, \"topic_id\": 1799, \"forum_id\": 44, \"post_subject\": \"Re: Requirements for External Linking\", \"username\": \"iMikePayne\", \"post_text\": \"I took off all Bag of Words but still getting the same error.\", \"post_time\": \"2015-07-08 15:15:42\" },\n\t{ \"post_id\": 7888, \"topic_id\": 1799, \"forum_id\": 44, \"post_subject\": \"Re: Requirements for External Linking\", \"username\": \"leonarta\", \"post_text\": \"Following some offline investigation... The trigger for these error messages is having two BAGOFWORDS fields in the mandatory portion of a LINKPATH. SALT currently only handles one. I've submitted an issue to help clarify how best to proceed; either we'll add support for >1, or we'll report it as an error when compiling the spec file.\\n\\nIn the meantime, try making one of them optional.\", \"post_time\": \"2015-07-07 19:26:47\" },\n\t{ \"post_id\": 7887, \"topic_id\": 1799, \"forum_id\": 44, \"post_subject\": \"Re: Requirements for External Linking\", \"username\": \"bforeman\", \"post_text\": \"Hi Mike,\\n\\nDid you try to rebuild the indexes using the Build in the BWR_Specificities?\\n\\nBob\", \"post_time\": \"2015-07-07 19:17:12\" },\n\t{ \"post_id\": 7886, \"topic_id\": 1799, \"forum_id\": 44, \"post_subject\": \"Re: Requirements for External Linking\", \"username\": \"iMikePayne\", \"post_text\": \"I had no problem with the dataset when came to BWR_Specificities before hand.\", \"post_time\": \"2015-07-07 17:00:00\" },\n\t{ \"post_id\": 7885, \"topic_id\": 1799, \"forum_id\": 44, \"post_subject\": \"Re: Requirements for External Linking\", \"username\": \"iMikePayne\", \"post_text\": \"Thanks Bob! I adjusted accordingly but getting an error in BWR_GoExternal. Say that Error: Object 'le' does not have a field named 'myfieldname2' (7, 57), 2171, SALT30.Mac_Expand_WordBag_Key\\nError: While expanding macro <param> (7, 56), 2171, SALT30.Mac_Expand_WordBag_Key\\nError: While expanding macro mac_expand_wordbag_key (29, 91), 2171, MyModule.Key_MyDataset_LinkPathName1
\\n\\nWhat I don't understand is that my field does exist in the dataset and in Layout_MyDataset.\\n\\nTaking a look at Key_MyDataset_LinkPathName1 it seems like myfieldname2 is not referenced there at all. As a matter of fact, it is missing is all of my Key_ files. \\n\\nMy LINKPATH is as follows:\\nLINKPATH:Name1:myfieldname1:myfieldname2:myfieldname3:myfieldname4:?:myfieldname5\\nLINKPATH:Name2:myfieldname1:myfieldname2:myfieldname5:myfieldname3:myfieldname4\\nLINKPATH:Name3:myfieldname1:myfieldname2:myfieldname5:myfieldname4:?:myfieldname5
\", \"post_time\": \"2015-07-07 16:10:28\" },\n\t{ \"post_id\": 7882, \"topic_id\": 1799, \"forum_id\": 44, \"post_subject\": \"Re: Requirements for External Linking\", \"username\": \"bforeman\", \"post_text\": \"Here is the detailed reply that I just received from my colleague, Todd Leonard (Thanks Todd!):\\n\\nThe field declared as the RIDFIELD is indeed intended to a unique record id (in fact it’s mandatory).\\n\\nSuppose we have some business data, linked by the “other program” as follows…\\n\\nIDFIELD:EXISTS:bdid\\nRIDFIELD:rcid\\n\\n\\nrcid bdid company_name addr\\n1 1 Smith’s Car Wash 123 Main St.\\n2 1 Smythe’s Car Wash 123 Main St.\\n3 4 Joe’s Pizza 1313 Mockingbird Ln.\\n4 4 Joe’s Pizza Shack 1313 Mockingbird\\n5 5 Susie’s Cupcakes 456 Madison Circle\\n5 5 Herbie’s BBQ 789 Jefferson Court\\n
\\n\\nRows 1-2 are well-formed – the rcid values are unique, and the bdid value which defines “the cluster” is equal to the lowest rcid value in the cluster.\\n\\nRows 3-4 are not well formed – their bdid value is not equal to the lowest rcid value. If internal linking had produced this in error, it would have reported that in the IdConsistency0 output. rcid would have a “belowparent0” error, and bdid would have an “unbased0” error.\\n\\nRows 5-5 are not well-formed – their rcid value is not unique. If internal linking had produced this in error, it would be reported in ValidityStatistics.duplicaterids0.\\n\\nExternal linking doesn’t run these integrity reports automatically, but it gives you the tools to do so:\\n\\nOUTPUT(modname.Fields.UIDConsistency(infile).Advanced0);
\\n\\nYou’d have to roll your own uniqueness test for the rcids, but it’s just something like:\\n\\nOUTPUT(COUNT(infile) – COUNT(TABLE(infile,{rcid},rcid,MERGE)));
\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-07-06 20:09:31\" },\n\t{ \"post_id\": 7881, \"topic_id\": 1799, \"forum_id\": 44, \"post_subject\": \"Re: Requirements for External Linking\", \"username\": \"iMikePayne\", \"post_text\": \"Yes that was a typo (fixed). So this linked data does have a cluster ID and record ID. Though I don't necessarily get what you mean by cluster’s IDFIELD has to be equal to the lowest RIDFIELD in the cluster
\\n\\nI see from example SALT data that the Min for bdid and rcid are the same. I'm sure that may be what you're alluding to but I still don't understand the reasoning behind it. So RIDFIELD are not meant to be 'unique' record id fields when doing internal linking? I did mean internal in the previous question. What is their relation to cluster IDs?\", \"post_time\": \"2015-07-06 19:06:37\" },\n\t{ \"post_id\": 7879, \"topic_id\": 1799, \"forum_id\": 44, \"post_subject\": \"Re: Requirements for External Linking\", \"username\": \"bforeman\", \"post_text\": \"The short answer is "No" and "Yes" \\n\\nAssuming you meant _external_ linking, you’re exactly right. (The context does seem to suggest internal was a typo.)\\n\\nAs long as you add the fields that SALT is looking for like the RECID and the Source, you can essentially link any dataset, that’s kind of what external linking is all about.\\n\\nIt’s probably important to mention the results of the other process can’t violate SALT’s id integrity rules for a linked header. For a single-level header (i.e. no IDPARENTS or IDCHILDREN) that just means RIDFIELD has to be non-null and unique among all records, and that a cluster’s IDFIELD has to be equal to the lowest RIDFIELD in the cluster. Multi-level integrity is more complex than that, but that’s unlikely to be of interest here.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2015-07-06 16:59:54\" },\n\t{ \"post_id\": 7878, \"topic_id\": 1799, \"forum_id\": 44, \"post_subject\": \"Requirements for External Linking\", \"username\": \"iMikePayne\", \"post_text\": \"Hello All,\\n\\nI have a question about the processes of external linking. I know that you have to have a previously linked dataset to link against. My question is, does that dataset have to be a result of a SALT internal linking process? Or can I have a dataset that has already been clustered by some other process and use that as my base file?\", \"post_time\": \"2015-07-06 15:04:50\" },\n\t{ \"post_id\": 7979, \"topic_id\": 1829, \"forum_id\": 44, \"post_subject\": \"Re: External Linking and Best Type\", \"username\": \"leonarta\", \"post_text\": \"[quote="iMikePayne":3lnuuei2]When doing external linking. Does resolution occur at the record or cluster level?\\n\\nFurthermore using BESTTYPE with external linking are do the propagations happen before the match process begins?\\n\\n I ask this because I am seeing entities resolved to a single instance of a cluster where it doesn't seem to have a value that should've been propagated/extended.\\n\\nThe SALT manual offers the term "entity resolution" as a description of the external linking process. We're trying to find which entity (a.k.a. cluster) in a header matches a given input record, and when we find it the entity id (i.e. IDFIELD) gets patched into the input file to form the output.\\n\\nSo, it can understandably be a little confusing when we say that matching occurs on a record level. This is how the code works, though... SALT has one particular header record in mind when it identifies a match. Even if more than one record scores highly enough to exceed the match threshold, only one is ultimately picked as _the_ match.\\n\\nWe do have a couple features clouding that picture a little:\\n\\nPropagation (from records in a cluster to another record in the same cluster) using either PROP or BESTTYPE occurs before matching gets underway (during the key build for external linking). It's also important to note that propagation is impermanent. You won't see propagated values in the linking output; they're just a temporary augmentation that allows the collective knowledge of their cluster to contribute a _little_ to the record-level matching. Propagation alone shouldn't be making huge differences in what matches and what doesn't, but it can give a little nudge that helps differentiate between two otherwise viable matches.\\n\\nThe weighting associated with a match is derived not only from the values of the specific matching record (which would have been the best match), but from a rollup of all matching records from that cluster.\", \"post_time\": \"2015-08-03 18:21:27\" },\n\t{ \"post_id\": 7969, \"topic_id\": 1829, \"forum_id\": 44, \"post_subject\": \"External Linking and Best Type\", \"username\": \"iMikePayne\", \"post_text\": \"When doing external linking. Does resolution occur at the record or cluster level?\\n\\nFurthermore using BESTTYPE with external linking are do the propagations happen before the match process begins?\\n\\n I ask this because I am seeing entities resolved to a single instance of a cluster where it doesn't seem to have a value that should've been propagated/extended.\", \"post_time\": \"2015-07-30 22:20:08\" },\n\t{ \"post_id\": 8386, \"topic_id\": 1954, \"forum_id\": 44, \"post_subject\": \"Re: Macro: SALT31.MAC_External_AddPcnt\", \"username\": \"HPCC Staff\", \"post_text\": \"I've moved this to the Salt forum which limits access to company accounts. Please don't post salt info to the public forums.\", \"post_time\": \"2015-10-28 16:01:00\" },\n\t{ \"post_id\": 8376, \"topic_id\": 1954, \"forum_id\": 44, \"post_subject\": \"Macro: SALT31.MAC_External_AddPcnt\", \"username\": \"batema01\", \"post_text\": \"In executing an ECL app, I received this error: \\n1300: System error: 1300: Graph[2316], project[2318]: SLAVE #329 [10.241.60.129:22050]: Memory limit exceeded: current 14000, requested 1, limit 14000, \\nThe source of the error was traced to macro \\nto SALT31.MAC_External_AddPcnt(24,16)\\n\\nThis was WU W20151015-150156 executing on the production thor.\", \"post_time\": \"2015-10-26 19:06:40\" },\n\t{ \"post_id\": 8588, \"topic_id\": 2000, \"forum_id\": 44, \"post_subject\": \"Re: Salt Including Numbers in Fields\", \"username\": \"bforeman\", \"post_text\": \"Mike,\\nThe Organization field is a BAGOFWORDS field, and BOW values are decorated that way to facilitate matching. The first number represents the weight for the whole BOW value, and numbers listed after every word represent specificities of those words.\\n\\nWhat output are you trying to post process? Is it possible to join the output to the original input file and get to the 'un-decorated' Organization field values that way?\\n\\nBob (thanks Edin!)\", \"post_time\": \"2015-11-12 15:33:08\" },\n\t{ \"post_id\": 8570, \"topic_id\": 2000, \"forum_id\": 44, \"post_subject\": \"Salt Including Numbers in Fields\", \"username\": \"iMikePayne\", \"post_text\": \"Hi,\\n\\nI am using SALT to do external linking but when I get results there are numbers in front of every word that doesn't go away. I need to do post processing on the output and don't want to use regex post-processing to remove all numbers because some of them I need (e.g. 529 Corporation).\", \"post_time\": \"2015-11-11 21:44:36\" },\n\t{ \"post_id\": 9200, \"topic_id\": 2132, \"forum_id\": 44, \"post_subject\": \"Re: issue with -ga option in SALT specification (*.salt) fil\", \"username\": \"bforeman\", \"post_text\": \"I have been using 5.4.2 and SALT 3.2.2 with no problems. I will send you a private email with more info.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2016-02-02 19:59:31\" },\n\t{ \"post_id\": 9198, \"topic_id\": 2132, \"forum_id\": 44, \"post_subject\": \"Re: issue with -ga option in SALT specification (*.salt) fil\", \"username\": \"jennifer.hughes@lnssi.com\", \"post_text\": \"Bob,\\n\\nI tried that and it still isn't working.\\n\\nThanks\", \"post_time\": \"2016-02-02 19:57:04\" },\n\t{ \"post_id\": 9190, \"topic_id\": 2132, \"forum_id\": 44, \"post_subject\": \"Re: issue with -ga option in SALT specification (*.salt) fil\", \"username\": \"bforeman\", \"post_text\": \"Hi Jennifer.\\n\\nRun the new SALT install again, and verify that it is installing into the right path.\\n\\nAfter the install, all of your SALT stuff should be in:\\n\\nC:\\\\Program Files (x86)\\\\HPCCSystems\\\\5.2.4\\\\SALT\\n\\nnot in the Plugins folder as in the old method....\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2016-02-02 17:44:02\" },\n\t{ \"post_id\": 9188, \"topic_id\": 2132, \"forum_id\": 44, \"post_subject\": \"Re: issue with -ga option in SALT specification (*.salt) fil\", \"username\": \"jennifer.hughes@lnssi.com\", \"post_text\": \"No this hasn't been resolved.\\n\\nSalt.bat is in: Program Files (x86)\\\\HPCCSystems\\\\5.4.2\\\\eclide\\\\bin\\\\plugin\\n\\nI was told as a fix to a prior issue to install salt at c:\\\\salt. That issue was SALT wasn't working at all via the ECL_IDE. \\n\\nI think you are right that there is still a setup issue, but I can't figure it out.\\n\\nI did try to reinstall SALT at the location you mentioned. Made sure there was only one copy of the SALT.bat and it was in the appropriate plugin directory. And it didn't work either.\\n\\nAny other ideas?\", \"post_time\": \"2016-02-02 17:41:11\" },\n\t{ \"post_id\": 9186, \"topic_id\": 2132, \"forum_id\": 44, \"post_subject\": \"Re: issue with -ga option in SALT specification (*.salt) fil\", \"username\": \"bforeman\", \"post_text\": \"Hi Jennifer,\\n\\nWere you ever able to get this resolved? \\n\\nIf the generation works on the command line, but not in the ECL IDE, it might be an issue with the SALT batch file being in the wrong location.\\n\\nCheck your 5.4.2 install folder. Ensure SALT is installed as a sibling to the “eclide” installation directory. For example, if you installed IDE 5.2.4, then SALT's destination folder should be C:\\\\Program Files (x86)\\\\HPCCSystems\\\\5.2.4\\\\SALT\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2016-02-02 17:20:18\" },\n\t{ \"post_id\": 9140, \"topic_id\": 2132, \"forum_id\": 44, \"post_subject\": \"issue with -ga option in SALT specification (*.salt) file\", \"username\": \"jennifer.hughes@lnssi.com\", \"post_text\": \"Course: Advanced SALT, module 3\\nSALT: 3.0 (old but should be fine for the course I am taking)\\nVM: HPCCSystemsVM-amd64-5.2.4-1 ( 5.4.2 does not open via VMWare or VirtualBox, but I was told previously it should be fine for the course I am taking)\\nECL_IDE: 5.4.2\\n\\nThe course video says the -ga flag should create a BWR_Iterate file. And it isn't there.\\n\\nTop three lines out of my specification file:\\nMODULE:AdvSALT\\nOPTIONS:-ga -gs2\\nFILENAME:Sample\\n\\nrunning from the command line seems to generate code and documentation: \\nC:\\\\salt\\\\SALT.exe -ga -gs2 Sample_Input_Internal_AF_SPC.salt \\n\\nBoth the above and below generated the same code/documentation output.\\nC:\\\\salt\\\\SALT.exe Sample_Input_Internal_AF_SPC.salt \\n\\nI did have issues getting SALT to work with the ECL_IDE to begin with so I was wondering if maybe I still have set up issues?\\n\\nAny ideas/suggestions would help
\\n\\nThanks\", \"post_time\": \"2016-01-27 15:58:08\" },\n\t{ \"post_id\": 5027, \"topic_id\": 1137, \"forum_id\": 45, \"post_subject\": \"2014 Reed Elsevier Environmental Challenge\", \"username\": \"HPCC Staff\", \"post_text\": \"The Reed Elsevier Environmental Challenge is awarded to projects that best demonstrate how they can provide sustainable access to safe water where it is presently at risk and/or access to improved sanitation. Projects must have clear practical applicability, address identified need, and advance related issues such as health, education, or human rights.\\n\\nThere is a $50,000 prize for the first place entry and a $25,000 prize for the second place entry. For the second year, a $15,000 WASH Alliance prize will be given for the third place project. The WASH Alliance is a consortium of six Dutch NGOs promoting hygienic use of sustainable water and sanitation. The WASH Alliance will also provide all three winners with relevant training and professional development up to $2,500 each.\\n\\nApplicants are offered access to relevant Reed Elsevier products that can help in the preparation of their submissions. And for the first time, all applicants will be offered access to LexisNexis Risk Solution’s open source high performance computing (HPCC Systems) resource, to allow them to process large amounts of research data, supported by online training. The winning projects will be highlighted in the Reed Elsevier journal Water Research.\\n\\nView more information about the challenge and how to apply:\\nhttp://www.reedelsevier.com/corporatere ... /Home.aspx\", \"post_time\": \"2013-12-05 19:07:16\" },\n\t{ \"post_id\": 5073, \"topic_id\": 1155, \"forum_id\": 46, \"post_subject\": \"Welcome to the KEL forum!\", \"username\": \"eblood66\", \"post_text\": \"Welcome to the KEL forum. Please use this forum for questions and discussions about using KEL.\", \"post_time\": \"2013-12-19 22:31:07\" },\n\t{ \"post_id\": 5159, \"topic_id\": 1184, \"forum_id\": 46, \"post_subject\": \"Working with NULLs\", \"username\": \"dabayliss\", \"post_text\": \"For hardened ECL programmers one of the major new (and probably odd) features of KEL is that it supports NULLs. This is knowledge engineering; you need to know if you know something?\\n\\nAs a simple example\\n\\n
ePerson := ENTITY(FLAT(UID=DID\\n,INTEGER dob\\n,STRING fname\\n,STRING mname\\n,STRING lname\\n,STRING zip\\n));\\nUSE header . file_headers(FLAT,ePerson);\\n\\nePerson: dob > 0 => Age := 2014-dob DIV 10000;\\n\\nQUERY: zipstats <= ePerson{zip,Age$GROUP:Ave,Age$GROUP:Median};\\n
\\n\\nLook at the computation of Age, especially the precondition .... It can be read as:\\n\\nFor all ePerson, if dob > 0 then the Age is defined as ....\\n\\nThen when we come to compute the average age and the median; will will ONLY be dealing with those ages that are actually defined ....\", \"post_time\": \"2014-01-28 19:27:58\" },\n\t{ \"post_id\": 10083, \"topic_id\": 2378, \"forum_id\": 46, \"post_subject\": \"Re: KEL Compiler Limitation\", \"username\": \"schen\", \"post_text\": \"To publish multiple Roxie queries simultaneously, the queries should be put in one file and compile them together or to put them in different folders and compile them separately. The first one is an better option.\", \"post_time\": \"2016-07-27 14:03:48\" },\n\t{ \"post_id\": 9948, \"topic_id\": 2378, \"forum_id\": 46, \"post_subject\": \"KEL Compiler Limitation\", \"username\": \"vin\", \"post_text\": \"We are unable to simultaneously have two published and working Roxie queries that use the same entity but access different attributes. Because the KEL compiler generates the index file based on the entity, it uses the same name for the index file. Because the index file has the same name publishing the second query overwrites the file used by the first query rendering the first query inoperable.\\n\\nSpecific example.\\n\\nKEL code\\n\\nIMPORT model;\\n\\n// ROW Based Properties\\nDrive: => NumPlays := Play(PlayInDrive.drive.UID = Drive.UID):Count;\\nDrive: => NumPasses := Play(PlayInDrive.play.playtype='PASS' AND PlayInDrive.drive.UID = Drive.UID):Count;\\nDrive: => PercentPasses := Drive.NumPasses/Drive.NumPlays;\\nDrive: => Bucket := ROUNDUP(Drive.PercentPasses/0.1);\\n\\n// Get Histogram Bucket\\nQUERY: Bucket(DriveIDs) <= Drive(UID IN DriveIDs){Bucket, UID}; // <----------- Query A\\n\\n// Get Average Delta VF\\nQUERY: AvgVf(DriveIDs) <= Drive{vfdelta$Drive(UID IN DriveIDs):Ave}; // <----- Query B\\n
\\n\\nGenerated ECL code that generates index file for query A\\n\\nSHARED IDX_Drive_UID_Layout := RECORD\\n KEL.typ.uid UID;\\n __EE9349._sequence_;\\n __EE9349._possession_;\\n __EE9349._quarter_;\\n __EE9349._time_;\\n __EE9349._starttime_;\\n __EE9349._endtime_;\\n __EE9349._startyard_;\\n __EE9349._endyard_;\\n __EE9349._result_;\\n __EE9349._redzone_;\\n __EE9349._yardspenalized_;\\n __EE9349._yardsgained_;\\n __EE9349._vf_;\\n __EE9349._vfdelta_;\\n __EE9349.Bucket_;\\n END;\\n\\n SHARED IDX_Drive_UID_Projected := PROJECT(__EE9349,TRANSFORM(IDX_Drive_UID_Layout,SELF.UID:=__T(LEFT.UID),SELF:=LEFT));\\n\\n EXPORT IDX_Drive_UID := INDEX(IDX_Drive_UID_Projected,{UID},{IDX_Drive_UID_Projected},'~key::KEL::MOD::Drive::UID');\\n
\\n\\nNOTE: the persistent filename for the above index is ,~key::KEL::MOD::Drive::UID\\n\\nGenerated ECL code that generates index file for query B\\nSHARED IDX_Drive_UID_Layout := RECORD\\n KEL.typ.uid UID;\\n __EE412._sequence_;\\n __EE412._possession_;\\n __EE412._quarter_;\\n __EE412._time_;\\n __EE412._starttime_;\\n __EE412._endtime_;\\n __EE412._startyard_;\\n __EE412._endyard_;\\n __EE412._result_;\\n __EE412._redzone_;\\n __EE412._yardspenalized_;\\n __EE412._yardsgained_;\\n __EE412._vf_;\\n __EE412._vfdelta_;\\n END;\\n\\n SHARED IDX_Drive_UID_Projected := PROJECT(__EE412,TRANSFORM(IDX_Drive_UID_Layout,SELF.UID:=__T(LEFT.UID),SELF:=LEFT));\\n EXPORT IDX_Drive_UID := INDEX(IDX_Drive_UID_Projected,{UID},{IDX_Drive_UID_Projected},'~key::KEL::MOD::Drive::UID');\\n
\\n\\nNOTE: the persistent filename for this second index is also ,~key::KEL::MOD::Drive::UID\\n\\nAs a result there is the following error when publish A, then publish B, and then execute A:\\n\\nException\\nReported by: Roxie\\nMessage: Query test1.1 is suspended because Key layout mismatch detected for index key::KEL::MOD::Drive::UID\\n
\\n\\nRequest\\n\\nEither (1) generate more specific or unique names for persistent indexes or (2) add compiler flag allowing user to specify index file name.\\n\\nConclusion\\n\\nIf we cannot publish multiple Roxie queries for a given entity, I do not see how we can use KEL to create our application.\", \"post_time\": \"2016-07-12 15:07:51\" },\n\t{ \"post_id\": 10093, \"topic_id\": 2433, \"forum_id\": 46, \"post_subject\": \"Re: EC L Code Generation\", \"username\": \"rtaylor\", \"post_text\": \"Viswa,How do we generate the ECL code from the KEL query ?
Once KEL is all set up correctly with the IDE you don't have to do anything to generate ECL code -- it happens automatically when you save the KEL code file.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-07-27 15:07:38\" },\n\t{ \"post_id\": 10073, \"topic_id\": 2433, \"forum_id\": 46, \"post_subject\": \"Re: EC L Code Generation\", \"username\": \"schen\", \"post_text\": \"The KEL need to be installed in the same folder in which your ECL IDE is installed. Since the version of your ECL IDE is different from the KEL, you need to change the default installation folder during KEL installation.\\nFor example change C:\\\\Program Files (x86)\\\\HPCCSystems\\\\6.0.0\\\\KEL to C:\\\\Program Files (x86)\\\\HPCCSystems\\\\6.0.2\\\\KEL\", \"post_time\": \"2016-07-27 12:43:30\" },\n\t{ \"post_id\": 10063, \"topic_id\": 2433, \"forum_id\": 46, \"post_subject\": \"EC L Code Generation\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nI am new to KEL and starting to learn the same. I installed KEL and trying to execute the first KEL query. Saving the KEL file did not generate the required ECL code.\\n\\nHow do we generate the ECL code from the KEL query ?\\n\\nMy KEL and ECL IDE are 2 different versions \\n\\nKEL : 6.0.0\\nECL IDE : 6.0.2\\n\\nTried copying the KEL.bat file in \\nC:\\\\Program Files (x86)\\\\HPCCSystems\\\\6.0.2\\\\clienttools\\\\bin , didnt help the same.\\n\\nPlease provide your suggestions.\\n\\nThanks,\\nViswa\", \"post_time\": \"2016-07-27 05:47:19\" },\n\t{ \"post_id\": 10433, \"topic_id\": 2553, \"forum_id\": 46, \"post_subject\": \"Re: KEL compiler does not work with relative paths\", \"username\": \"schen\", \"post_text\": \"Vin,\\nThank you for reporting this issue. We'll fix it in the next release.\\n\\nThanks,\\nShawn\", \"post_time\": \"2016-08-10 13:52:30\" },\n\t{ \"post_id\": 10423, \"topic_id\": 2553, \"forum_id\": 46, \"post_subject\": \"KEL compiler does not work with relative paths\", \"username\": \"vin\", \"post_text\": \"With a relative path:\\n% kel -D ../work VFDRatio.kel\\nHPCC Systems KEL Lite Compiler Version v0.7.0-3\\nParsing VFDRatio.kel...\\n<none>:0,0:error K1017 - internal - invalid internal compiler state
\\n\\nWith absolute path:\\n\\n% kel -D ~/Work/gim/src/work VFDRatio.kel\\nHPCC Systems KEL Lite Compiler Version v0.7.0-3\\nParsing VFDRatio.kel...\\nSuccess.
\\n\\nThis is on Mac OS X El Capitan 10.11.4, Darwin Kernel Version 15.4.0.\", \"post_time\": \"2016-08-09 21:36:06\" },\n\t{ \"post_id\": 12363, \"topic_id\": 3073, \"forum_id\": 46, \"post_subject\": \"Re: KEL Use Case\", \"username\": \"rtaylor\", \"post_text\": \"Viswa,1.) Is there a simple use case where we can clearly distinguish between ECL and KEL and why KEL is better than ECL ?
Just like SALT, KEL is a meta-language that generates ECL code. \\n\\nI have a simple 3-line KEL file (one ENTITY, one USE, and one QUERY) that generates 7 ECL files containing a total of 126 lines of ECL code. And, the generated ECL code is designed to implement best ECL coding practices. So, given that you can do it either way, would you rather type 3 lines or 126? \\n2.) Is KEL performance significantly better than ECL ?
Since KEL is an ECL code generator and not an alternative to ECL, the question is meaningless.\\n3.) Based on the problem solutions , what type of problem solutions can we use KEL directly. Is it only for entity relationships ?
KEL allows you to view your data in whatever way you choose, regardless of how the data is physically stored in your files. It is a language designed specifically for Data Scientists to mine their "data lake" in any way they choose, to find information that is otherwise hidden.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-10-24 20:53:12\" },\n\t{ \"post_id\": 12343, \"topic_id\": 3073, \"forum_id\": 46, \"post_subject\": \"KEL Use Case\", \"username\": \"ksviswa\", \"post_text\": \"Hi,\\n\\nAm pretty new to KEL and exploring the same.\\n\\nHave very few generic questions though :\\n\\n1.) Is there a simple use case where we can clearly distinguish between ECL and KEL and why KEL is better than ECL ?\\n\\n2.) Is KEL performance significantly better than ECL ?\\n\\n3.) Based on the problem solutions , what type of problem solutions can we use KEL directly. Is it only for entity relationships ?\\n\\nPlease provide your suggestions.\\n\\nThanks a lot in advance.\\n\\nRegards,\\nViswa\", \"post_time\": \"2016-10-24 15:31:25\" },\n\t{ \"post_id\": 16133, \"topic_id\": 4043, \"forum_id\": 46, \"post_subject\": \"Re: Comments section within KEL CHM\", \"username\": \"eblood66\", \"post_text\": \"I'm afraid you have found the wrong location for questions about the KEL CHM product. This forum is for the Knowledge Engineering Language for the HPCC big data platform. Nobody here is knowledgeable about help files. Sorry.\", \"post_time\": \"2017-04-05 13:48:18\" },\n\t{ \"post_id\": 16063, \"topic_id\": 4043, \"forum_id\": 46, \"post_subject\": \"Comments section within KEL CHM\", \"username\": \"calumbus53\", \"post_text\": \"Hello,\\n\\nI am trying to create a comments section within my "HELP" document, which I am creating within KEL CHM.\\n\\nBelow is the code:\\n\\n<html>\\n\\n\\t<head>\\n\\t\\t<link rel="stylesheet" href="style.css" type="text/css">\\n\\t</head>\\n\\n\\t<body>\\n\\t\\n\\t\\t<form action="com.txt" method="POST">\\n\\t\\t\\n\\t\\t\\t<label> Name:\\n\\t\\t\\t\\t<input type="text" name="Name" class="Input" style="width: 100%" required>\\n\\t\\t\\t</label>\\n\\t\\t\\t<br><br>\\n\\t\\t\\t<label> Comment: <br>\\n\\t\\t\\t\\t<textarea name="Comment" class="Input" style="width: 100%" required></textarea>\\n\\t\\t\\t</label>\\n\\t\\t\\t<br><br>\\n\\t\\t\\t<input type="submit" name="Submit" value="Submit Comment" class="Submit">\\n\\t\\t\\n\\t\\t</form>\\n\\t\\t\\n\\t</body>\\n\\t\\n</html>\\n\\n\\n<?php\\n\\n\\tif($_POST['Submit']){\\n\\t\\tprint "<h1>Your comment has been submitted!</h1>";\\n\\t\\t\\n\\t\\t$Name = $_POST['Name'];\\n\\t\\t$Comment = $_POST['Comment'];\\n\\t\\t\\n\\t\\t#Get old comments\\n\\t\\t$old = fopen("comments.txt", "r+t");\\n\\t\\t$old_comments = fread($old, 1024);\\n\\t\\t\\n\\t\\t#Delete everything, write down new and old comments\\n\\t\\t$write = fopen("comments.txt", "w+");\\n\\t\\t$string = "<b>".$Name."</b><br>".$Comment."<br>\\\\n".$old_comments;\\n\\t\\tfwrite($write, $string);\\n\\t\\tfclose($write);\\n\\t\\tfclose($old);\\n\\t}\\n\\t\\n\\t#Read comments\\n\\t$read = fopen("comments.txt", "r+t");\\n\\techo "<br><br>Comments<hr>".fread($read, 1024);\\n\\tfclose\\n\\t\\n?>\", \"post_time\": \"2017-03-28 11:55:51\" },\n\t{ \"post_id\": 22393, \"topic_id\": 1173, \"forum_id\": 49, \"post_subject\": \"Re: 欢迎您访问HPCC系统的中文论坛\", \"username\": \"jzuo\", \"post_text\": \"欢迎一起切磋 \", \"post_time\": \"2018-07-09 13:30:30\" },\n\t{ \"post_id\": 22373, \"topic_id\": 1173, \"forum_id\": 49, \"post_subject\": \"Re: 欢迎您访问HPCC系统的中文论坛\", \"username\": \"jzuo\", \"post_text\": \"你好,我也使用hpcc,很高兴有人一起讨论。\", \"post_time\": \"2018-07-09 13:12:37\" },\n\t{ \"post_id\": 22283, \"topic_id\": 1173, \"forum_id\": 49, \"post_subject\": \"Re: 欢迎您访问HPCC系统的中文论坛\", \"username\": \"hellowangzhi\", \"post_text\": \"HPCC中文论坛有版主吗?或是有人关注中文论坛吗?\", \"post_time\": \"2018-07-04 01:35:19\" },\n\t{ \"post_id\": 5132, \"topic_id\": 1173, \"forum_id\": 49, \"post_subject\": \"欢迎您访问HPCC系统的中文论坛\", \"username\": \"aquila\", \"post_text\": \"HPCC 是一个用集群服务器进行大数据分析的系统。本论坛是为HPCC系统的用户和研发人员提供信息交流和合作的平台。欢迎您上传有关HPCC系统及ECL语言的各种问题。也欢迎您对有关问题提供您的答复,看法和建议。\\n注: 任何人均可浏览本论坛.您若需要上传您的帖子,敬请先登录。\", \"post_time\": \"2014-01-15 21:24:26\" },\n\t{ \"post_id\": 8664, \"topic_id\": 2005, \"forum_id\": 49, \"post_subject\": \"Re: 在中国不能下载HPCC软件\", \"username\": \"xue_fg\", \"post_text\": \"目前在中国已经可以直接下载软件。谢谢\", \"post_time\": \"2015-11-24 02:04:53\" },\n\t{ \"post_id\": 8630, \"topic_id\": 2005, \"forum_id\": 49, \"post_subject\": \"Re: 在中国不能下载HPCC软件\", \"username\": \"JimD\", \"post_text\": \"这应该可以解决。如果您仍然有困难,请告诉我们。\\n\\nThis should be resolved. If you are still having difficulty, please let us know. \\n\\nJim\", \"post_time\": \"2015-11-17 20:06:31\" },\n\t{ \"post_id\": 8591, \"topic_id\": 2005, \"forum_id\": 49, \"post_subject\": \"在中国不能下载HPCC软件\", \"username\": \"xue_fg\", \"post_text\": \"提示不能访问\\nhttp://wpc.423a.edgecastcdn.net/00423A/ ... .4.4-1.ova\\n\\n能否解决?\", \"post_time\": \"2015-11-14 03:20:59\" },\n\t{ \"post_id\": 30393, \"topic_id\": 5723, \"forum_id\": 49, \"post_subject\": \"Re: 有中文版主吗?我来签个到!\", \"username\": \"hellowangzhi\", \"post_text\": \"HPCC还是非常好用,使用方便快捷。\", \"post_time\": \"2020-04-29 03:06:05\" },\n\t{ \"post_id\": 27963, \"topic_id\": 5723, \"forum_id\": 49, \"post_subject\": \"Re: 有中文版主吗?我来签个到!\", \"username\": \"hellowangzhi\", \"post_text\": \"对于一个新手,怎么开始HPCC之旅?\", \"post_time\": \"2019-10-24 01:47:17\" },\n\t{ \"post_id\": 22423, \"topic_id\": 5723, \"forum_id\": 49, \"post_subject\": \"Re: 有中文版主吗?我来签个到!\", \"username\": \"tlhumphrey2\", \"post_text\": \"There isn't a Chinese version of hpcc systems. But, here you can try it out using the playground: play.hpccsystems.com. And, there is plenty of documentation about the system here: https://hpccsystems.com.\\n\\nAlso, there is a couple AWS cloud versions. One is here: https://aws.hpccsystems.com/aws/login. And, there is cloudformation templates and scripts for launching your own hpcc system on AWS here: https://github.com/tlhumphrey2/EasyFastHPCCoAWS.\\n\\nIf you have any addition questions, please post.\", \"post_time\": \"2018-07-09 14:15:32\" },\n\t{ \"post_id\": 22403, \"topic_id\": 5723, \"forum_id\": 49, \"post_subject\": \"Re: 有中文版主吗?我来签个到!\", \"username\": \"jzuo\", \"post_text\": \"欢迎切磋
\", \"post_time\": \"2018-07-09 13:31:40\" },\n\t{ \"post_id\": 22363, \"topic_id\": 5723, \"forum_id\": 49, \"post_subject\": \"Re: 有中文版主吗?我来签个到!\", \"username\": \"jzuo\", \"post_text\": \"你好,我不是版主,但我也使用HPCC,很高兴可以一起探讨。\", \"post_time\": \"2018-07-09 13:10:40\" },\n\t{ \"post_id\": 22293, \"topic_id\": 5723, \"forum_id\": 49, \"post_subject\": \"有中文版主吗?我来签个到!\", \"username\": \"hellowangzhi\", \"post_text\": \"我来学习使用HPCC!求懂中文的技术高手带个路!!!\", \"post_time\": \"2018-07-04 01:57:01\" },\n\t{ \"post_id\": 27983, \"topic_id\": 7433, \"forum_id\": 49, \"post_subject\": \"Re: 冒个泡,集合!\", \"username\": \"tlhumphrey2\", \"post_text\": \"Thank you for your nice comments about the HPCC Systems.\", \"post_time\": \"2019-10-24 12:43:17\" },\n\t{ \"post_id\": 27973, \"topic_id\": 7433, \"forum_id\": 49, \"post_subject\": \"冒个泡,集合!\", \"username\": \"hellowangzhi\", \"post_text\": \"召集一下,有多少国内的技术人员在用HPCC,请大家签个到,看看国内应用情况。\\n我是HPCC用户,使用HPCC系统3年以上,系统稳定,没出过问题!\", \"post_time\": \"2019-10-24 01:50:50\" },\n\t{ \"post_id\": 30491, \"topic_id\": 8053, \"forum_id\": 49, \"post_subject\": \"Re: 我已经成功在centos7部署了7.8.6\", \"username\": \"hellowangzhi\", \"post_text\": \"Thank you Jim & tlhumphrey2 .\\nI will try do it today.\", \"post_time\": \"2020-05-13 00:53:31\" },\n\t{ \"post_id\": 30473, \"topic_id\": 8053, \"forum_id\": 49, \"post_subject\": \"Re: 我已经成功在centos7部署了7.8.6\", \"username\": \"JimD\", \"post_text\": \"You can also read this manual:\\nThe ECL Scheduler\\nhttps://d2wulyp08c6njk.cloudfront.net/r ... .8.6-1.pdf\\n\\nJim\", \"post_time\": \"2020-05-12 12:21:18\" },\n\t{ \"post_id\": 30463, \"topic_id\": 8053, \"forum_id\": 49, \"post_subject\": \"Re: 我已经成功在centos7部署了7.8.6\", \"username\": \"tlhumphrey2\", \"post_text\": \"This is a good article about scheduling jobs: viewtopic.php?f=21&t=681\", \"post_time\": \"2020-05-12 12:08:36\" },\n\t{ \"post_id\": 30453, \"topic_id\": 8053, \"forum_id\": 49, \"post_subject\": \"我已经成功在centos7部署了7.8.6\", \"username\": \"hellowangzhi\", \"post_text\": \"我已经成功在centos7部署了7.8.6,运行良好。有谁知道如何创建定时任务?\", \"post_time\": \"2020-05-12 05:44:58\" },\n\t{ \"post_id\": 30591, \"topic_id\": 8081, \"forum_id\": 49, \"post_subject\": \"添加定时任务举例\", \"username\": \"hellowangzhi\", \"post_text\": \"我改写了官方文档中定时任务的例子,供大家参考:\\n
\\nIMPORT TutorialYourName;\\ntask := FUNCTION\\n RETURN BUILDINDEX(TutorialYourName.IDX_PeopleByZIP,OVERWRITE);\\nEND;\\nevents := MODULE\\n EXPORT dailyAtMidnight := CRON(‘0 0 * * *‘);\\n EXPORT dailyAtMinute(INTEGER minute=0) := EVENT(‘CRON‘,(STRING)minute + ‘ * * * *‘);\\n EXPORT dailyAt( INTEGER hour,INTEGER minute=0) := EVENT(‘CRON‘,(STRING)minute + ‘ ‘ + (STRING)hour + ‘ * * *‘);\\n EXPORT dailyAtMidday := dailyAt(12, 0);\\n EXPORT EveryThreeHours := CRON(‘0 0-23/3 * * *‘);\\nEND;\\ntask : WHEN(events.dailyAtMinute(36));\\n
\\n这个例子提交后,会出现在事件计划列表中。\", \"post_time\": \"2020-05-14 03:30:05\" },\n\t{ \"post_id\": 30763, \"topic_id\": 8103, \"forum_id\": 49, \"post_subject\": \"Re: 怎么调整HPCC时区为东八区\", \"username\": \"hellowangzhi\", \"post_text\": \"Thank Richard!\", \"post_time\": \"2020-05-20 01:04:00\" },\n\t{ \"post_id\": 30723, \"topic_id\": 8103, \"forum_id\": 49, \"post_subject\": \"Re: 怎么调整HPCC时区为东八区\", \"username\": \"rtaylor\", \"post_text\": \"hellowangzhi,\\n\\nHow to adjust the HPCC time zone to the East Eight District
Normally, dates and times are just taken from the hardware the platform runs on. The ECL Language also has a large number of date and time functions in its Date Standard Library (in the ECL IDE look under ecllibrary >> std >> system for the Date.ecl file).\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-05-19 18:04:36\" },\n\t{ \"post_id\": 30673, \"topic_id\": 8103, \"forum_id\": 49, \"post_subject\": \"怎么调整HPCC时区为东八区\", \"username\": \"hellowangzhi\", \"post_text\": \"怎么调整HPCC时区为东八区\", \"post_time\": \"2020-05-19 04:32:01\" },\n\t{ \"post_id\": 30753, \"topic_id\": 8113, \"forum_id\": 49, \"post_subject\": \"Re: 如何实现多DataSet的join连接\", \"username\": \"hellowangzhi\", \"post_text\": \"Thank Richard!\\nI will try do it.\", \"post_time\": \"2020-05-20 01:03:27\" },\n\t{ \"post_id\": 30713, \"topic_id\": 8113, \"forum_id\": 49, \"post_subject\": \"Re: 如何实现多DataSet的join连接\", \"username\": \"rtaylor\", \"post_text\": \"hellowangzhi,\\nI would like to ask, how to achieve join connection for more than 2 DataSets?
If all the datasets have exactly the same structure, then you can use the "set of datasets" form of JOIN (the second form here: https://hpccsystems.com/training/documentation/ecl-language-reference/html/JOIN.html). \\n\\nOtherwise, you'll just need to do multiple JOINs.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-05-19 17:52:36\" },\n\t{ \"post_id\": 30683, \"topic_id\": 8113, \"forum_id\": 49, \"post_subject\": \"如何实现多DataSet的join连接\", \"username\": \"hellowangzhi\", \"post_text\": \"请教一下,超过2张以上的DataSet,如何实现join连接?\", \"post_time\": \"2020-05-19 06:57:03\" },\n\t{ \"post_id\": 30843, \"topic_id\": 8143, \"forum_id\": 49, \"post_subject\": \"Re: 在ECL Watch中Security按钮不可用,怎么解决?\", \"username\": \"rtaylor\", \"post_text\": \"hellowangzhi,In ECL Watch, Security is gray and buttons are not available. What is the reason and how to solve it?
It's also gray on my training cluster, because my training cluster does not have LDAP set up for it. If you do have an LDAP set up, then it's probable that you just do not have Security access rights.\\n\\nEither way, it's a platform configuration issue.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-05-22 12:40:40\" },\n\t{ \"post_id\": 30833, \"topic_id\": 8143, \"forum_id\": 49, \"post_subject\": \"在ECL Watch中Security按钮不可用,怎么解决?\", \"username\": \"hellowangzhi\", \"post_text\": \"在ECL Watch中Security是灰色的,按钮不可用,这是什么原因,怎么解决?\", \"post_time\": \"2020-05-22 07:09:03\" },\n\t{ \"post_id\": 7060, \"topic_id\": 1615, \"forum_id\": 51, \"post_subject\": \"HPCC Systems is taking part in GSoC 2015\", \"username\": \"lchapman\", \"post_text\": \"The HPCC Systems platform team is delighted that we have been accepted as a mentoring organisation for GSoC 2015!\\n\\nWelcome to any students who are interested in working on a project on our ideas list located here: https://wiki.hpccsystems.com/x/cQB. \\n\\nWhile the student application period does not open until 16th March 2015, this is a good time to familiarize yourself with the projects on offer. Check out the details provided including the deliverables. Where relevant, links to other resources you might find useful are also provided. Each project has already been assigned a mentor who is an expert in that area of our system. Email details are provided so you can contact them for more information.\\n\\nWe have created a wiki which highlights the main things you need to know about us and GSoC here: https://wiki.hpccsystems.com/x/MAB. But if you have made it here, you will already know that there is wealth of information about HPCC Systems here on our website.\\n\\nGo ahead, download the system and play around with it. There are downloads here: http://hpccsystems.com/download/free-community-edition/server-platform. But we think you're going want to delve down into the code, in which case you will want to download and build the system yourself using the instructions on our development wiki here: https://github.com/hpcc-systems/HPCC-Platform/wiki/Building-HPCC.\\n\\nOnce you have your system up and running, there are plenty of examples you can use to see what the system can do. Try out the HPCC Data Tutorial, https://hpccsystems.com/download/documentation/tutorials/hpcc-data-tutorial-introduction-thor-and-roxie, or the Six Degrees of Kevin Bacon example https://hpccsystems.com/download/documentation/tutorials/six-degrees-kevin-bacon-ecl-example. You will need to download the ECL IDE to do this.\\n\\nThen you may want to learn a bit of ECL. Have a go at the Introduction to ECL online training course. You will need to sign up first which you can do here: https://hpccsystems.com/getting-started/training-classes. If you decide you want to graduate onto any of the more advanced courses, we will supply you with a promotional code so that you can take it for free. To get a promotional code, email trish.mccall@lexisnexis.com. Be prepared to provide proof of your student status.\\n\\nFeel free to post comments or questions relating to GSoC here. We will be opening a GSoC specific IRC channel shortly, details of which will be posted here. \\n\\nHere are the contact details for the GSoC administrative organisers for HPCC Systems in case you have any general questions: \\n\\nTrish McCall - trish.mccall@lexisnexis.com\\nLorraine Chapman - Lorraine.Chapman@lexisnexis.com\\n\\nMore to come...\", \"post_time\": \"2015-03-03 14:51:36\" },\n\t{ \"post_id\": 7077, \"topic_id\": 1619, \"forum_id\": 51, \"post_subject\": \"GSoC IRC Channel\", \"username\": \"lchapman\", \"post_text\": \"We have added a new IRC Channel specifically for GSoC here: #hpcc-gsoc at irc.freenode.net.\\n\\nIt's fine if you want to contact us via our development channel here: #hpcc-dev at irc.freenode.net. But be aware that it can get quite quite busy in there! \\n\\nIf you have a question or comment, we want to know about it and get back to you with guidance as soon as possible. Mentors will be watching both channels but your post has less chance of getting 'lost in the noise' in the GSoC channel.\\n\\nYou need to use an IRC client to be able to join an IRC Channel. For more information about this and how to use an IRC Channel look here: https://developers.google.com/open-sour ... ources/irc.\", \"post_time\": \"2015-03-04 18:49:16\" },\n\t{ \"post_id\": 7129, \"topic_id\": 1620, \"forum_id\": 51, \"post_subject\": \"Link to Recorded Meetup\", \"username\": \"HPCC Staff\", \"post_text\": \"Thank you to all who joined the online meetup. You can listen to the recorded playback here: https://reedelsevier.webex.com/reedelse ... e289d1d227\\n\\nView the slide of useful links here: http://cdn.hpccsystems.com/presentation ... s_2015.pdf\", \"post_time\": \"2015-03-11 16:30:05\" },\n\t{ \"post_id\": 7086, \"topic_id\": 1620, \"forum_id\": 51, \"post_subject\": \"GSoC Projects Meetup for Students Weds 11th March\", \"username\": \"lchapman\", \"post_text\": \"GSoC Students...\\n\\nCome and join us on Wednesday 11th March at 10.00am EDT. \\n\\nFlavio Villanustre (VP Technology) leads HPCC Systems and will open the meeting. HPCC System GSoC mentors will be talking about their projects and there will be an opportunity to ask questions. \\n\\nThis is a webex meeting details as follows:\\n\\n-------------------------------------------------------\\nMeetup information\\n-------------------------------------------------------\\nTopic: HPCC Systems GSoC Meetup\\nDate: Wednesday, March 11, 2015\\nTime: 10:00 am, Eastern Daylight Time (New York, GMT-04:00)\\nMeeting Number: 274 631 262\\nMeeting Password: (This meeting does not require a password.)\\nMeeting Link: https://reedelsevier.webex.com/reedelse ... 1cf2a5b76d\\n\\nProvide your phone number when you join the meeting to receive a call back. Alternatively, you can call:\\nCall-in toll-free number: 1-855-733-3266 FREE (US)\\nCall-in number: 1-937-610-2663 (US)\\nShow global numbers: http://www.mymeetingroom.com/cnums.asp? ... 7-610-2663\\nAttendee access code: 678 694 5772\\n\\n-------------------------------------------------------\\nFor assistance\\n-------------------------------------------------------\\n1. Go to https://reedelsevier.webex.com/reedelsevier/mc\\n2. On the left navigation bar, click "Support".\\nTo check whether you have the appropriate players installed for UCF (Universal Communications Format) rich media files, go to https://reedelsevier.webex.com/reedelse ... gnosis.php.\", \"post_time\": \"2015-03-04 22:32:42\" },\n\t{ \"post_id\": 7092, \"topic_id\": 1621, \"forum_id\": 51, \"post_subject\": \"Re: Regarding ML ideas in GSOC'15\", \"username\": \"sayan6\", \"post_text\": \"Thank you Sir,that was helpful. \\nI have another query,so I sent you a mail a few days back regarding the SVM classifier inclusion in addition to the statistical tests.So on a side note,I am curious to know whether the mentor team is thinking on this topic to be included in the ideas list,\\nsuch that this idea can be worked upon after the primary ideas are being executed.\\nThank you again for your help.
\", \"post_time\": \"2015-03-06 15:13:53\" },\n\t{ \"post_id\": 7091, \"topic_id\": 1621, \"forum_id\": 51, \"post_subject\": \"Re: Regarding ML ideas in GSOC'15\", \"username\": \"tlhumphrey2\", \"post_text\": \"We may change the formula for the OLS Rsquared. The decision is being worked-on now.\\n\\nIf we decide NOT to change the formula then there will be one less statistic to implement.\", \"post_time\": \"2015-03-06 15:10:59\" },\n\t{ \"post_id\": 7090, \"topic_id\": 1621, \"forum_id\": 51, \"post_subject\": \"Regarding ML ideas in GSOC'15\", \"username\": \"sayan6\", \"post_text\": \"I am starting to learn the ECL language and also want to participate in GSOC'15.So I was going through the ML algorithms listed on the github page,and I want to clarify something.The inclusion of adjusted Rsquared ( [https://wiki.hpccsystems.com/display/hpcc/Add+new+statistics+to+the+Linear+and+Logistic+Regression+Modules] ) is asked in the GSOC ideas page and the codebase (the OLS regression one) already has a Rsquared in it.Right?\\n
\", \"post_time\": \"2015-03-06 12:29:52\" },\n\t{ \"post_id\": 7101, \"topic_id\": 1623, \"forum_id\": 51, \"post_subject\": \"Re: Working on Issues ?\", \"username\": \"john holt\", \"post_text\": \"First, as a participant in GSoC it will be important to register and submit a proposal.\\n\\nThe Git Hub project is: https://github.com/hpcc-systems/ecl-ml\\n\\n\\nYou will notice Pull requests from contributors so you can see some contributions from some of out community members. \\n\\nThe GSoC 2015 calendar shows that we are hosting an online meeting Wednesday morning. It would be good if you could attend.\\n\\nBest,\", \"post_time\": \"2015-03-09 12:23:52\" },\n\t{ \"post_id\": 7099, \"topic_id\": 1623, \"forum_id\": 51, \"post_subject\": \"Re: Working on Issues ?\", \"username\": \"lchapman\", \"post_text\": \"Hi,\\n\\nI want to acknowledge your enquiry and will ask one of our ML experts to comment further.\\n\\nLorraine Chapman\\nGSoC Organisation Administrator\", \"post_time\": \"2015-03-08 14:39:52\" },\n\t{ \"post_id\": 7096, \"topic_id\": 1623, \"forum_id\": 51, \"post_subject\": \"Working on Issues ?\", \"username\": \"sarthakjain\", \"post_text\": \"Hello\\nI am interested in working on the Machine Projects for Gsoc 2015 and would like to know how I can contribute towards fixing bugs listed in track.hpccsystems.com/browse/ML, so that I can get an idea on how development works here ?\\nSarthak Jain\", \"post_time\": \"2015-03-07 13:18:07\" },\n\t{ \"post_id\": 7100, \"topic_id\": 1625, \"forum_id\": 51, \"post_subject\": \"Re: Machine Learning Algorithms in GSOC 2015\", \"username\": \"lchapman\", \"post_text\": \"Hi,\\n\\nWe have some online ECL training course that you can take for free. They can be found on this website here: https://hpccsystems.com/getting-started ... ng-classes. You will need to register to take a class.\\n\\nAdvanced classes are also available. If you whizz through the beginner class and want to progress further, you will need to get a promotional code to take the advanced class for free. You will need to provide proof of your student status. Contact trish.mccall@lexisnexis.com for the code.\\n\\nIf you haven't done this already, go to the ideas page for the ML projects we have on offer here: https://wiki.hpccsystems.com/x/dAB\\n\\nYou will find more information and contact details for the mentors overseeing these projects.\\n\\nGood luck!\", \"post_time\": \"2015-03-08 14:46:11\" },\n\t{ \"post_id\": 7098, \"topic_id\": 1625, \"forum_id\": 51, \"post_subject\": \"Machine Learning Algorithms in GSOC 2015\", \"username\": \"kyrs\", \"post_text\": \"Hi guys,\\nI wanted to contribute in Implementation of Gravitational Clustering in GSOC 2015.\\n\\n I am familiar with basic Machine Learning algorithm and have Implemented SVM and Nueral Network from scratch in C++ and python.Apart from that I have created my own Machine Learning algorithm which recently have been selected in Journal of Machine Learning and Cybernatics for publication for producing better result in minimum time when compared to algorithm like SVR and other.\\n \\n After going through your wiki I found that your codes are developed on ECL Language, Can you clarify what difficulty I would face, If I don't know the basic of this language. \\n\\nPlease suggest me place where I can learn more about ECL.\\n\\ncheers,\\nKumar Shubham
\", \"post_time\": \"2015-03-08 07:18:10\" },\n\t{ \"post_id\": 7128, \"topic_id\": 1633, \"forum_id\": 51, \"post_subject\": \"Link to Recorded Meetup\", \"username\": \"HPCC Staff\", \"post_text\": \"Thank you to all who joined the online meetup. You can listen to the recorded playback here: https://reedelsevier.webex.com/reedelse ... e289d1d227\\n\\nView the slide of useful links here: http://cdn.hpccsystems.com/presentation ... s_2015.pdf\", \"post_time\": \"2015-03-11 16:27:53\" },\n\t{ \"post_id\": 7125, \"topic_id\": 1633, \"forum_id\": 51, \"post_subject\": \"GSoC Student Meetup Agenda Weds 11th March 2015\", \"username\": \"lchapman\", \"post_text\": \"Don't forget to come to our meetup (details below). For those of you for whom the timing is not convenient, we will be posting a recording of the discussions. I'll post here with details of where you can find it after the meeting. If you can't attend and want to ask a question, email the details to me and I will ask it for you: Lorraine.Chapman@lexisnexis.com.\\n\\nAgenda\\n\\nWelcome \\nby Flavio Villanustre VP Technology and leader of the HPCC Systems project\\n\\nMentor Introductions\\nby Lorraine Chapman HPCC Systems GSoC Administrator\\n\\nProject Overviews by Mentors with questions taken after each project discussion\\nMachine learning projects - John Holt\\nWeb based visualisation framework - Gordon Smith\\nImprove Child Query Processing - Gavin Halliday\\nDFU spray from zip/gzip files - Attila Vamos\\nProvide unicode implementations for HPCC Systems standard library functions - John Holt\\nCluster deployment manager - Kevin Wang\\nECL debugger front end (Web Based) - Gordon Smith\\nUI improvements to the HPCC Systems configuration management tool - Kevin Wang\\nAllow multiple activities to be treated as a single meta-activity - Gavin Halliday\\nAdditional embedded languages - Richard Chapman\\n\\nClosing words by Lorraine Chapman\\n\\nEach project has a backup mentor who will also attend this meeting and may contribute to the discussion. This is an opportunity for you to get answers to any questions that you may have.\\n\\nMeeting details as follows:\\nThis is a webex meeting details as follows:\\n\\n-------------------------------------------------------\\nWebex Meeting Details \\n-------------------------------------------------------\\nTopic: HPCC Systems GSoC Meetup\\nDate: Wednesday, March 11, 2015\\nTime: 10:00 am, Eastern Daylight Time (New York, GMT-04:00)\\nMeeting Number:[masked]\\nMeeting Password: (This meeting does not require a password.)\\nMeeting Link: https://reedelsevier.webex.com/reedelse ... 1cf2a5b76d\\n\\nProvide your phone number when you join the meeting to receive a call back. Alternatively, you can call:\\nCall-in toll-free number:[masked] (US)\\nCall-in number:[masked] (US)\\nShow global numbers: http://www.mymeetingroom.com/cnums.asp? ... 7-610-2663\\nAttendee access code:[masked]\\n\\n-------------------------------------------------------\\nFor assistance\\n-------------------------------------------------------\\n1. Go to https://reedelsevier.webex.com/reedelsevier/mc\\n2. On the left navigation bar, click "Support".\\n3. To check whether you have the appropriate players installed for UCF (Universal Communications Format) rich media files, go to https://reedelsevier.webex.com/reedelse ... gnosis.php.\", \"post_time\": \"2015-03-10 19:33:03\" },\n\t{ \"post_id\": 7136, \"topic_id\": 1635, \"forum_id\": 51, \"post_subject\": \"Re: Project - Improve child query processing - github\", \"username\": \"ghalliday\", \"post_text\": \"Yes that repository contains the code that will need to be changed.\\n\\nI'm not sure how much familiarity you have with HPCC, but I would strongly recommend that you follow some of the tutorials on the HPCC systems web site and get an initial understanding of how ECL works.\\n\\nI would also strongly recommend reading the file ecl/eclcc/DOCUMENTATION.rst - it provides quite a lot of background information on the system especially the code generator.\", \"post_time\": \"2015-03-13 15:05:43\" },\n\t{ \"post_id\": 7131, \"topic_id\": 1635, \"forum_id\": 51, \"post_subject\": \"Project - Improve child query processing - github\", \"username\": \"sarthakjain\", \"post_text\": \"Hello\\nI would like to confirm if the following repository contains the majority of existing code for above project.\\nhttps://github.com/hpcc-systems/HPCC-Pl ... master/ecl\\nAlso , among the folders present in above repository, which ones would be more likely to contain code that needs to be modified for this project ?\\nRegards\\nSarthak Jain\", \"post_time\": \"2015-03-12 10:35:56\" },\n\t{ \"post_id\": 7146, \"topic_id\": 1640, \"forum_id\": 51, \"post_subject\": \"Re: DFU spray from zip/gzip GSOC 2015\", \"username\": \"AttilaV\", \"post_text\": \"Hello\\n\\nThank you for your inquire.\\n\\nFor this project you need:\\n- C++ programming language\\n- Knowledge about ZIP/GZIP archive (internal structure, archive handling, open source implementation)\\n- GitHub experience (clone, commit, push, pull request handling)\\n- How to build, install and run HPCC system, system architecture, components and their role\\n- Knowledge about the current Spray implementation ( how to spray, active components, workflow)\\n\\nIf you have any further question, please send an email for me: attila.vamos@lexisnexis.com\\n\\nThanks\", \"post_time\": \"2015-03-17 02:42:04\" },\n\t{ \"post_id\": 7143, \"topic_id\": 1640, \"forum_id\": 51, \"post_subject\": \"DFU spray from zip/gzip GSOC 2015\", \"username\": \"bhanusharma\", \"post_text\": \"Hello\\n\\nI am interested in DFU spray from zip/gzip files project.I am at 6th semester of btech(CSE) program and can code in c++,java,mysql.I also know basics of UML diagram and have working knowlege of rational rose. I want to know what i need to learn to do this project.\\n\\n\\nThanks\", \"post_time\": \"2015-03-16 05:15:33\" },\n\t{ \"post_id\": 7145, \"topic_id\": 1641, \"forum_id\": 51, \"post_subject\": \"GSoC student application period opens today\", \"username\": \"lchapman\", \"post_text\": \"From today at 19:00 UTC, GSoC students can start submitting proposals via Google's Melange interface here: https://www.google-melange.com/gsoc/hom ... e/gsoc2015.\\n\\nThe application period ends on Friday 27th March 19:00 UTC.\\n\\nWhat you need to do:\\n1. Get a Google account if you don't have one using this link: https://accounts.google.com/signup.\\n2. Login into Google's Melange interface using your Google account https://www.google-melange.com/gsoc/hom ... e/gsoc2015.\\n3. Create a profile.\\n4. Supply proof of your student status by uploading your enrollment form.\\n5. Find HPCC Systems in the list of accepted organisations and submit your proposal.\\n\\nThere are some additional pages on the HPCC Systems GSoC wiki designed to help students navigate the application phase here: https://wiki.hpccsystems.com/x/SQB\\n\\nGoogle also provides details about this in their student guide here: http://en.flossmanuals.net/melange/stud ... ion-phase/\\n\\nOnce you have submitted a proposal, don't forget to look and see whether a mentor has any comments that may help you improve your proposal. You can update your proposal at any time up to the deadline date.\\n\\nHere is a recording of HPCC Systems mentors talking about their projects at our recent meetup: https://reedelsevier.webex.com/cmp0307l ... 4529801808\\n\\nView the slide of useful links here: http://cdn.hpccsystems.com/presentation ... s_2015.pdf\\n\\nGood Luck!\", \"post_time\": \"2015-03-16 14:49:23\" },\n\t{ \"post_id\": 7335, \"topic_id\": 1649, \"forum_id\": 51, \"post_subject\": \"Re: Implement an approximate n-tile algorithm\", \"username\": \"dabayliss\", \"post_text\": \"John,\\n\\nJust an FYI - the exact algorithm can be sped up significantly (at least in the discretish case) - using TABLE,MERGE as a precursor\\n\\nDavid\", \"post_time\": \"2015-04-13 23:48:39\" },\n\t{ \"post_id\": 7209, \"topic_id\": 1649, \"forum_id\": 51, \"post_subject\": \"Re: Implement an approximate n-tile algorithm\", \"username\": \"john holt\", \"post_text\": \"Artem,\\n You can determine the approximate n-tile to a specified maximum error without knowing the exact error. The claim is merely that the error will be no more than X. It could be a lot less than X or could even be zero.\", \"post_time\": \"2015-03-26 17:54:25\" },\n\t{ \"post_id\": 7206, \"topic_id\": 1649, \"forum_id\": 51, \"post_subject\": \"Re: Implement an approximate n-tile algorithm\", \"username\": \"artkom\", \"post_text\": \"One thing left unclear to me: in order to find approximate solution with a given tolerance we have to compare it with exact one, which involves global redistribution anyhow. Therefore, in order to find close solution we have to run exact one for every new dataset. That means that performance of our approximate solution will be less than exact one, right? If so, where is a point to do this?\", \"post_time\": \"2015-03-26 15:41:34\" },\n\t{ \"post_id\": 7205, \"topic_id\": 1649, \"forum_id\": 51, \"post_subject\": \"Re: Implement an approximate n-tile algorithm\", \"username\": \"john holt\", \"post_text\": \"Artem,\\n Yes, a compare against the current exact implementation is a good idea.\", \"post_time\": \"2015-03-26 14:03:42\" },\n\t{ \"post_id\": 7198, \"topic_id\": 1649, \"forum_id\": 51, \"post_subject\": \"Re: Implement an approximate n-tile algorithm\", \"username\": \"artkom\", \"post_text\": \"Yes, it does. Thank you.\\n\\nOne more question about a tolerance in NTiles_approx() function. We have to compare our approximate solution with exact one (my assumption that it should be ML.FieldAggregates(..)), right?\\n\\nIf it's not, what we have to choose in order to compare our approximate solution with?\", \"post_time\": \"2015-03-25 19:30:34\" },\n\t{ \"post_id\": 7187, \"topic_id\": 1649, \"forum_id\": 51, \"post_subject\": \"Re: Implement an approximate n-tile algorithm\", \"username\": \"john holt\", \"post_text\": \"Let me start with the last question first. I have sprayed 10 million record dataset to 100 nodes in a 100 node cluster. Assume that the objective is to find the 2-tile (which is the median). \\n\\nEach node sees only the data on that node. Each node can determine the median independently, but the medians may all be different. If I want to know the median for the total dataset, there needs to be some information sharing.\\n\\nObviously, the records could be shared. This is effectively the current implementation approach. If I perform a local sort and some data reduction, like reducing the original set to 100 records that each describe 1% of the local data with high and low values; I can share that data to all of the nodes so every node sees the 100*100 (10 thousand) records. \\n\\nEach node independently uses these 10 thousand records to approximate the global median. THe degree of accuracy will be a function of the number of nodes and number of summary records.\\n\\nDoes this make sense?\", \"post_time\": \"2015-03-25 05:41:22\" },\n\t{ \"post_id\": 7182, \"topic_id\": 1649, \"forum_id\": 51, \"post_subject\": \"Re: Implement an approximate n-tile algorithm\", \"username\": \"artkom\", \"post_text\": \"What does "fixed accuracy (m = n*#nodes will produce counts that are accurate within the number of nodes)" mean? \\nn - Ntiles,\\n#nodes = number of nodes? If it is, it will be always a constant!\\n\\nWhy we have to do a "local data reduction"? Let's imagine, we sprayed our dataset on N nodes. We still have the same number of records, right?\", \"post_time\": \"2015-03-24 18:08:23\" },\n\t{ \"post_id\": 7173, \"topic_id\": 1649, \"forum_id\": 51, \"post_subject\": \"Re: Implement an approximate n-tile algorithm\", \"username\": \"john holt\", \"post_text\": \"The algotrithm is definitely not an NP-Compete type of problem. \\n\\nThere is an exact n-tile implementation in the ML.FieldAggregates attribute. The problem with this implementation is that it requires a global sort of the data. This in turn involves a complete re-distribution of the data.\\n\\nAn approximate solution will not require that global sort. Instead, there will be some local sorting and local data reduction; and then some distribution of the reduced dataset.\", \"post_time\": \"2015-03-23 16:32:54\" },\n\t{ \"post_id\": 7170, \"topic_id\": 1649, \"forum_id\": 51, \"post_subject\": \"Re: Implement an approximate n-tile algorithm\", \"username\": \"lchapman\", \"post_text\": \"Hi,\\n\\nI have alerted the mentor of this project to your request for information. Until then, there are some things you can do to get started using HPCC Systems and familiarise yourself with the ECL language as follows:\\n\\n•\\tDownload the software (server, client tools, graph control ECL IDE) from here: http://hpccsystems.com/download/free-community-edition \\n\\n•\\tIf you want to delve into the platform code, download the sources from here: https://github.com/hpcc-systems/HPCC-Platform. The sources for the ML library can be found here: https://github.com/hpcc-systems/ecl-ml \\n\\n•\\tOnce you have an HPCC Systems environment up and running, run some simple examples. Try the HPCC Systems Data Tutorial here: http://hpccsystems.com/download/docs/da ... rial-guide or Six Degrees of Kevin Bacon here: http://hpccsystems.com/download/docs/six-degrees\\n\\n•\\tTake a basic online ECL training course here: http://hpccsystems.com/community/traini ... s/training\\n\\nI hope this helps for now.\\n\\nLorraine Chapman\\nHPCC Systems GSoC Administrator\", \"post_time\": \"2015-03-23 15:37:51\" },\n\t{ \"post_id\": 7167, \"topic_id\": 1649, \"forum_id\": 51, \"post_subject\": \"Implement an approximate n-tile algorithm\", \"username\": \"artkom\", \"post_text\": \"Hello,\\n\\nI am also interested in writing proposal for implementing an approximate n-tile algorithm. Could you, please, give more explanations about such kind of algorithm? Is it NP-hard problem? I found MAX-MIN tiling algorithm for two dimensional array. Is that what I need?\\n\\nThank you in advance.\", \"post_time\": \"2015-03-23 14:58:46\" },\n\t{ \"post_id\": 7193, \"topic_id\": 1653, \"forum_id\": 51, \"post_subject\": \"Two days left until the GSoC Student Proposal Deadline\", \"username\": \"lchapman\", \"post_text\": \"Just a reminder that the deadline is Friday 27th March at 19:00 UTC. \\n\\nIf you have already submitted a proposal...\\nPlease check that you have received answers to all your questions and have updated your proposal to include any information received from comments exchanged with mentors. \\n\\nOffline enquiries\\nIf you have been communicating with one of our mentors via email, make sure that you have converted your idea in to a proposal via Google's Melange interface before the deadline.\\n\\nRemember that we can't accept proposals via email, blog, forum or any other media channel.\\n\\nSee the HPCC Systems GSoC Wiki for guidance on how to submit your proposal: https://wiki.hpccsystems.com/x/SQB or the GSoC Student Guide: http://en.flossmanuals.net/GSoCStudentGuide/\\n\\nLorraine Chapman\\nHPCC Systems GSoC Administrator\", \"post_time\": \"2015-03-25 15:47:49\" },\n\t{ \"post_id\": 7371, \"topic_id\": 1657, \"forum_id\": 51, \"post_subject\": \"Re: Gravitational Clustering\", \"username\": \"jamienoss\", \"post_text\": \"Hi Rishab,\\n\\nIterating the entire procedure over G-deltaG space is a possible solution and would be regarded as a Monte-Carlo approach. In fact such computational 'runs' could be done in parallel. You could then look for boundaries of cluster convergence/divergence in this G-deltaG space, which would be a really interesting analysis of the algorithm itself - almost a must. However, using a Monte Carlo method as an actual implementation detail may not be desirable for this particular algorithm. I guess an incredible low resolution implementation of this phase space may be doable/useful. Another solution could be to compute a rough estimate for the fractal dimension of the data-space which would give you an estimate on the clustering/sparseness of the sample. This could then be used in a trivial calculation to determine adequate values for the actual algorithm's parameters.\\n\\nNone of the above methods would be considered 'tinkering'. 'Tinkering' refers to the need to cater such parameters on a sample by sample basis and 'by-hand' i.e. have the actual user test for a good set of parameters based on their particular dataset.\\n\\nKind regards,\\n Jamie\", \"post_time\": \"2015-04-16 11:31:09\" },\n\t{ \"post_id\": 7344, \"topic_id\": 1657, \"forum_id\": 51, \"post_subject\": \"Re: Gravitational Clustering\", \"username\": \"rgoel_0112\", \"post_text\": \"hi jamie
\\nMy name is rishab and i have applied for gsoc 2015 for the implementation of g-clustering algorithm.In the comment section you told me to think over certain points.I have pondered over them.you told that we need to do some pathological testing for the random sampling of the point and calculation of force between the current and the randomly selected point.I think the algorithm can go wrong but if we iterate over many a times with variation of the force parameters then we can find an optimum value.But we should be careful not to tinker with values to extent its supervised behaviour is affected.so we will need to create a dummy dataset to get range of parameters such that the algorithm works.\\nplease reply if I am right.\", \"post_time\": \"2015-04-14 18:41:26\" },\n\t{ \"post_id\": 7208, \"topic_id\": 1657, \"forum_id\": 51, \"post_subject\": \"Gravitational Clustering\", \"username\": \"jamienoss\", \"post_text\": \"The following two papers could prove useful:\\n\\nhttp://www.mpa-garching.mpg.de/gadget/gadget1-paper.pdf\\nhttp://www.mpa-garching.mpg.de/gadget/gadget2-paper.pdf\", \"post_time\": \"2015-03-26 17:50:31\" },\n\t{ \"post_id\": 7216, \"topic_id\": 1658, \"forum_id\": 51, \"post_subject\": \"GSoC Student Proposal Deadline is today\", \"username\": \"lchapman\", \"post_text\": \"The student proposal period closes at 19:00 UTC today. \\n\\nAll proposals must be entered into Melange by this time. Make sure you have included everything you want us to be able see to evaluate your ideas.\\n\\nAfter 19:00 UTC you will not be able to add a new proposal or edit any proposal you have already added.\\n\\nOnce we have properly reviewed proposals, mentors may post further comments and questions so keep checking the GSoC site. If we require a response from you and/or need you to update your proposal after today's deadline has passed, we will open up your proposal so you can edit it. This will be done on a case by case basis and only for those proposals where we need more details.\\n\\nThank you to all students who have submitted proposals to the HPCC Systems project. We will be in touch via the Melange interface soon.\\n\\nLorraine Chapman\\nHPCC Systems GSoC Adminstrator\", \"post_time\": \"2015-03-27 17:44:52\" },\n\t{ \"post_id\": 7483, \"topic_id\": 1703, \"forum_id\": 51, \"post_subject\": \"HPCC Systems® GSoC 2015 Accepted Projects\", \"username\": \"lchapman\", \"post_text\": \"The accepted proposal deadline has passed and now all accepted proposals have been announced. We were awarded 2 slots, so the competition was fierce. It was very difficult to choose but nonetheless, a choice had to be made. \\n\\nCongratulations and welcome aboard to the two students whose proposals we accepted. Congratulations also to all other students whose proposals were accepted into GSoC 2015.\\n\\nOur two slots went to these projects on our ideas list:\\n\\n1. Add statistics to the Linear and Logistic Regression modules in the HPCC Systems® Machine Learning Library\\n2. Expand the HPCC Systems® Visualization Framework. \\n\\nTo those who applied and were not successful, thank you for the interest you have shown in the HPCC Systems® project. Do not be disheartened. We had some excellent proposals but simply not enough slots to accept them all. We intend to apply again next year and hope to be accepted as a returning organisation, receiving more slots. \\n\\nDO try again next year!\\nDO check your email/proposal for hints on how you might improve your proposal next year and increase your chances of being accepted.\\nDO consider contributing to our source code. Email me at lorraine.chapman@lexisnexis.com if you have any questions about contributing. \\nDO visit our website and forums to keep in the loop with HPCC Systems®.\\nDO visit our ideas page next year for details of more interesting HPCC Systems® projects available for GSoC 2016.\\n\\nHave a great summer. We hope to see you again next year for GSoC 2016!\", \"post_time\": \"2015-04-29 11:29:38\" },\n\t{ \"post_id\": 8028, \"topic_id\": 1847, \"forum_id\": 51, \"post_subject\": \"GSoC is almost over what happens next?\", \"username\": \"lchapman\", \"post_text\": \"Today is the final pencils down date for GSoC. Final evaluations are also due in the next week and soon GSoC 2015 will be complete.\\n\\nAs I posted earlier, we were allocated 2 slots and both our GSoC projects have been successfully completed. We plan to showcase the work done by Anmol Jagetia and Sarthak Jain at the HPCC Systems® Engineering Summit at the end of September. \\n\\nTheir work will be available in the next major release, HPCC Systems® 6.0.0 which will be available early next year. \\n\\nIf you want to know more about their projects see the blogs about the GSoC Program which contain links to the student blogs:\\nhttp://bit.ly/1TkQvIZ\\nhttp://hpccsystems.com/blog/gsoc-final- ... nally-here\\n\\nWe also took on 4 interns this year to work on HPCC Systems® projects this summer and you can find out more about these students and their projects here:\\nhttp://hpccsystems.com/blog/results-hpc ... jects-2015\\nhttp://hpccsystems.com/blog/interns-wor ... alizations\\nhttp://bit.ly/1Pz1ieN\\n\\nWe will be applying to be an accepted organisation for GSoC 2016. We are already collecting project ideas and will be updating the HPCC Systems® GSoC Wiki to include these ideas later in the year. So keep checking our Ideas List 2016 here: https://wiki.hpccsystems.com/x/yIBc\\n\\nIf you are interested in finding out more about the HPCC Systems® Intern Program, email Lorraine Chapman or Molly O'Neal for more details.\\n\\nLorraine.Chapman@lexisnexis.com\\nmolly.oneal@lexisnexis.com\", \"post_time\": \"2015-08-21 17:16:04\" },\n\t{ \"post_id\": 8266, \"topic_id\": 1920, \"forum_id\": 51, \"post_subject\": \"Google opensource blogspot features HPCC Systems this week\", \"username\": \"lchapman\", \"post_text\": \"Google are featuring a GSoC organization every week on their open source blogspot. This week it's HPCC Systems® turn. Read our blog about how GSoC 2015 went for us here: http://google-opensource.blogspot.com/\", \"post_time\": \"2015-10-10 06:58:58\" },\n\t{ \"post_id\": 8298, \"topic_id\": 1932, \"forum_id\": 51, \"post_subject\": \"Wanted - Project Ideas for Students to complete in 2016!\", \"username\": \"lchapman\", \"post_text\": \"It's been announced! The closing date for organisations to apply to participate in Google Summer of Code 2016 is 19th February 2016.\\n\\nWe have already started compiling a list of project ideas. I want to reach out to everyone in our community for ideas to add to our 2016 Ideas List here: https://wiki.hpccsystems.com/x/yIBc. \\n\\nIs there a cool feature you think is missing or would like to see implemented in HPCC Systems? Perhaps there is a Machine Learning statistic that you would like to see added to our ML Library? More visualisations you would like HPCC Systems to provide? Or perhaps you need more IoT enabling features?\\n\\nThe chances are, if it will benefit you, it will benefit others. So tell us about it. You can either post here, email Lorraine.Chapman@lexisnexis.com, or create a new issue in our Community Issue Tracker (please say if it is in response to this call out): https://track.hpccsystems.com/secure/Dashboard.jspa. \\n\\nWhile we are looking for projects for students to complete over a 12 weeks period next summer, we'd love to hear about any ideas you have. So get in touch and have your say! Go and see what our students of 2015 contributed to the HPCC Systems project: https://wiki.hpccsystems.com/x/g4BR \\n\\nLorraine Chapman\", \"post_time\": \"2015-10-15 11:59:45\" },\n\t{ \"post_id\": 9238, \"topic_id\": 2154, \"forum_id\": 51, \"post_subject\": \"GSoC Accepted Org Application Period is Open\", \"username\": \"lchapman\", \"post_text\": \"So this is where it all starts for GSoC 2016!\\n\\nThe application period for organisations opened earlier this week and I am working on ours now! The website has changed quite a bit and I have to say it is definitely easier to use. There are quite a few changes to get used to, so I am being meticulous in the extreme to get it right! \\n\\nThe deadline for applications is February 19th but I'm planning to have ours complete way before then. Then we just have to wait until February 29th when the list of accepted organisations is published. We really want to be on that list! So keep your fingers crossed and wish us good luck!\\n\\nStudents - We've been surprised by the number of enquiries we have had already, even though we are only at the application stage. But I guess it makes sense to be prepared! Be like those students and get ahead of the game. \\n\\nGo and look at our Ideas List. If you're really keen, download the HPCC Systems Platform, take a tutorial and maybe even learn some ECL!\\n\\nQuestions? Email me: Lorraine.Chapman@lexisnexis.com\", \"post_time\": \"2016-02-11 13:21:09\" },\n\t{ \"post_id\": 9314, \"topic_id\": 2184, \"forum_id\": 51, \"post_subject\": \"Students - Come and intern with us this summer!\", \"username\": \"lchapman\", \"post_text\": \"So we didn't get accepted for GSoC 2016, but hey, they accepted less than half the organisations that applied and spread the net wider for new open source organisations to get involved. We were lucky enough to be one of those in 2015 and we'll try our luck again in 2017.\\n\\nLast year, we also ran a summer intern program and it was such a success that we are going to do that again this year. So, if you've been looking at us already, keep on looking. We have some great projects available.\\n\\nI've written a blog about how you can get involved in the program so go and read it. It tells you what you need to know. If you have any questions, email lorraine.chapman@lexisnexis.com.\\n\\nhttps://hpccsystems.com/resources/blog/ ... d-find-out\", \"post_time\": \"2016-03-08 18:52:53\" },\n\t{ \"post_id\": 9378, \"topic_id\": 2210, \"forum_id\": 51, \"post_subject\": \"Revised deadline for non-machine learning intern proposals\", \"username\": \"lchapman\", \"post_text\": \"Good news students!\\n\\nThe HPCC Systems Summer Intern Program has extended the deadline for you to apply for non-machine learning projects! You now have a few extra weeks to get your proposal ready because the new deadline is Friday 15th April 2016.\\n\\nThere are a number of non-machine learning projects available including:\\n\\n
\\n\\nGo and look at our list of projects here: https://wiki.hpccsystems.com/x/zYBc, there's bound to be something that catches your eye.\\n\\nYou can find out more about the HPCC Systems Summer Internship Program here: https://wiki.hpccsystems.com/x/HwBm\\n\\nThanks to all students who have already submitted proposals ahead of the deadline this Friday (25th March). Please note that after 25th March, we will not be accepting any more proposal for machine learning projects for summer 2016.\\n\\nQuestions? \\nEmail lorraine.chapman@lexisnexis.com\", \"post_time\": \"2016-03-22 12:25:55\" },\n\t{ \"post_id\": 9512, \"topic_id\": 2244, \"forum_id\": 51, \"post_subject\": \"Deadline for intern proposals is this Friday 15th April 7pm\", \"username\": \"lchapman\", \"post_text\": \"The deadline for submitting non-machine learning intern proposals is this Friday 15th April 7pm UTC.\\n\\nIf you haven't yet prepared your proposal, there is still time. Look what's on our list: https://wiki.hpccsystems.com/x/zYBc. Here's a taster...\\n\\n
\\n\\nFind out more about the HPCC Systems Summer Internship Program here: https://wiki.hpccsystems.com/x/HwBm\\n\\nThanks to all students who have already submitted proposals ahead of the deadline this Friday (15th April). You still have time to make changes right up to the last minute if you need to. Just make sure you send your final version to lorraine.chapman@lexisnexis.com before the deadline.\\n\\nWe have already reviewed the machine learning proposals and all students will be informed whether they have been successful by 22nd April.\\n\\nGood luck!\", \"post_time\": \"2016-04-12 11:30:33\" },\n\t{ \"post_id\": 12593, \"topic_id\": 3153, \"forum_id\": 51, \"post_subject\": \"Google Summer of Code 2017 has been announced\", \"username\": \"lchapman\", \"post_text\": \"HPCC systems will be applying to be an accepted organisation for GSoC 2017. Google have announced the timeline and our preparations are underway.\\n\\nCalling all HPCC Systems community members and users...\\nOur Ideas List is a work in progress. If you have any ideas for suitable projects, email lorraine.chapman@lexisnexis.com with details.\\n\\nCalling all students...\\nVisit our GSoC Wiki to take a look at our Ideas List. We will be updating it regularly, so keep calling back.\\n\\nWe were an accepted organisation for GSoC 2015. Read about the HPCC Systems projects students completed as part of the program in 2015.\\n\\nWe're looking forward to 2017 and hoping to take part in a program which provides such wonderful, collaborative opportunities for students and open source projects like HPCC Systems!\", \"post_time\": \"2016-10-27 17:50:09\" },\n\t{ \"post_id\": 12613, \"topic_id\": 3173, \"forum_id\": 51, \"post_subject\": \"Looking for an internship for summer 2017? Look no further!\", \"username\": \"lchapman\", \"post_text\": \"We know some of you are early birds and like to get your internship organised as soon as you can. If you are a student who aspires to be a software developer or data scientist and you're looking for summer internship, we want to hear from you.\\n\\nThe HPCC Systems intern program is a specialist program. To get accepted, you need to choose a project, and submit a proposal demonstrating how you would complete it and why you are the right person for the job.\\n\\nEach project has a mentor and we recommend that you submit a first draft of your proposal to the mentor to get some feedback, so you can improve it before you submit your final version to lorraine.chapman@lexisnexis.com. You have until Monday 3rd April 2017, but don't wait until the last minute. We are accepting proposals already so take a look and let us know if something catches your eye. \\n\\nWant to know more about HPCC Systems, our intern program and the projects? Look here:\\n\\n
\\n\\nMore blogs will be posted on the HPCC Systems website presenting the work students completed this summer. Here's the first one featuring Column Level Security on HPCC Systems.\\n\\nHave questions? Email lorraine.chapman@lexisnexis.com\", \"post_time\": \"2016-10-27 18:47:37\" },\n\t{ \"post_id\": 15063, \"topic_id\": 3723, \"forum_id\": 51, \"post_subject\": \"Google Summer of Code 2017. Our application is in!\", \"username\": \"lchapman\", \"post_text\": \"It was a great experience to take part in GSoC 2015 so we've applied to do it all over again in 2017. No guarantees - we know they like to share the experience with new open source projects. We will know whether we have had the good fortune to be accepted on Feb 27th. So wish us luck and stay tuned for news!\\n\\nOur own summer intern programme was born out of our involvement with GSoC 2015, because we knew we could mentor more students than the 2 slots we were given. This program will run for the third consecutive year in 2017 and for the foreseeable future. So if you are a student, know someone who is or have contacts in the academic world, let them know we have places for students (Bachelors, Masters and PhD) with coding abilities who want to get experience working in a development environment. We support remote working, having accepted students working from the USA, Ireland and India in the past and we can accommodate the differences in semester dates just in case you are wondering.\\n\\nThinking about it but want more information? Contact Lorraine.Chapman@lexisnexisrisk.com.\\n\\nFind out about the achievements of our student contributors in 2015 and 2016, check out the list of projects for 2017 or suggest a project yourself. \\n\\nYou don't have to be a student applicant to suggest a project. Are you an HPCC Systems user who has an idea of a new feature or improvement that would help you and other community members? Contact Lorraine.Chapman@lexisnexisrisk.com with details and let's get it done!\", \"post_time\": \"2017-02-08 09:43:25\" },\n\t{ \"post_id\": 16163, \"topic_id\": 4083, \"forum_id\": 51, \"post_subject\": \"Intern Project Proposal Deadline Extension\", \"username\": \"lchapman\", \"post_text\": \"Good news if you're a bit late finalising your intern opportunity this year. We've extended the deadline for HPCC Systems Intern Program proposals to Saturday 22nd April.\\n\\nThe HPCC Systems intern program is a specialist program. To get accepted, you need to choose a project, and submit a proposal demonstrating how you would complete it and why you are the right person for the job.\\n\\nWant to know more about HPCC Systems, our intern program and the projects? Look here:\\n\\n
\\n\\nBlogs about student projects completed in 2016:\\n
\\n\\nHave questions? Email lorraine.chapman@lexisnexisrisk.com\", \"post_time\": \"2017-04-05 16:09:28\" },\n\t{ \"post_id\": 17453, \"topic_id\": 4343, \"forum_id\": 51, \"post_subject\": \"Meet the 2017 HPCC Systems interns\", \"username\": \"lchapman\", \"post_text\": \"We are pleased to announce that 5 students were awarded places on the 2017 HPCC Systems summer intern program.\\n\\nTo find out more about who they are, the projects they are working on and how it's going, read the blog: A very warm welcome to our 2017 intern students.\\n\\nThe proposal period for 2018 opens towards the end of September. Keep an eye on this forum for announcements and also on our available projects list. Remember, you can submit an proposal for an idea of your own, but it must leverage HPCC Systems and be of benefit to our open source community.\\n\\nWant to know more about our intern program? Visit our Student Wiki for more information or contact lorraine.chapman@lexisnexisrisk.com.\", \"post_time\": \"2017-06-19 14:29:26\" },\n\t{ \"post_id\": 19743, \"topic_id\": 4983, \"forum_id\": 51, \"post_subject\": \"2018 internship application period is now open\", \"username\": \"lchapman\", \"post_text\": \"We are pleased to announce that we are now accepting proposal from students who would like complete an internship with HPCC Systems in the summer of 2018.\\n\\nThis paid program is open to high school students, undergraduate, masters or PhD students who are interested in joining our platform development team to get some real world experience coding a solution for a feature that will actively be used by our open source community in the future. \\n\\nInternships last for 12 weeks starting in late May running through to mid August and are available to students across the globe. As a result, we are flexible about start and end dates to take account of semester timings across different timezones.\\n\\nTake a look at the projects we are offering for internships in 2018. Students who want to suggest a project of their own may do so, however your project must:\\n\\n
\\nIf you choose to do this, let us know so we can assign a mentor with relevant experience to support you. \\n\\nOur Student Wiki provides more information about the program itself, how to prepare a proposal (including an example) and our expectations of interns. You may also want to read our blog about the program and find out about previously completed intern projects.\\n\\nNew to HPCC Systems? Familiarise yourself with our platform and how it works:\\n\\n\\n
\\nFinal deadline for proposals is April 6th 2018\\n\\nFinal versions of proposals must be sent to lorraine.chapman@lexisnexisrisk.com (not the mentor) by the deadline date. However, we strongly recommend that you discuss your proposal ideas with the project mentor before you submit your final version. \\n\\nWe reserve the right to make earlier offers to students who submit an excellent proposal we know we want to accept. \\n\\nFor more information contact lorraine.chapman@lexisnexisrisk.com\", \"post_time\": \"2017-10-26 12:16:02\" },\n\t{ \"post_id\": 21563, \"topic_id\": 5483, \"forum_id\": 51, \"post_subject\": \"2018 HPCC Systems Internship Deadline has passed\", \"username\": \"lchapman\", \"post_text\": \"Thank you to all students who have applied to the HPCC Systems summer intern program by a submitting a project proposal. Good luck to all applicants!\\n\\nThe review team is looking at all proposals this week and we will be back in touch as soon as possible to let applicants know whether they have been successful.\\n\\nFor those who are already thinking about intern opportunities in 2019, the proposal application period for the HPCC Systems summer intern program 2019 will open towards the end of this year in the early Autumn. \\n\\nKeep an eye on our available projects list. Get in touch with Lorraine.Chapman@lexisnexisrisk.com to suggest a project of your own for 2019. Remember, it must leverage HPCC Systems in some way and be of use and interest to our open source community.\\n\\nWe do make offers early to students who submit an excellent proposal we know we want to accept before the deadline date. So plan ahead and don't wait until the last minute!\", \"post_time\": \"2018-04-10 10:31:09\" },\n\t{ \"post_id\": 23483, \"topic_id\": 6053, \"forum_id\": 51, \"post_subject\": \"Intern with us in 2019 - The proposal period is now open\", \"username\": \"lchapman\", \"post_text\": \"We are now accepting applications from students who would like to join the HPCC Systems team as an intern in 2019. The deadline for proposals is Friday March 29th 2019. But we do award places in advance of the deadline date to students who submit an excellent proposal we know we want to accept. So don't miss out on a place on the HPCC Systems intern program, get started now!\\n\\nApplication is by proposal submission to complete a specific HPCC Systems related project. This can be a project from our list, one you have suggested yourself or a project that supports work your school is doing which leverages HPCC Systems in some way.\\n\\nOur intern program is aimed at students studying STEM related subjects and most projects require coding skills. Our projects range from working on a new feature or enhancement, providing the ability to connect to third party products that may be used alongside HPCC Systems, to machine learning related projects. \\n\\nWe welcome applications from students across the academic spectrum from high school to PhD. It's a 12 week program over the summer months, but we can be flexible about start and end dates to take account of differences in semester dates for students interested in working remotely from outside the USA.\\n\\nInterested in finding out more? \\n\\n\\n
\\n\\nWe also have a brochure you can use to send to your friends, professors and school teachers so pass this information on to anyone you know who may be interested in joining our intern program in 2019!\", \"post_time\": \"2018-11-07 13:34:58\" },\n\t{ \"post_id\": 25443, \"topic_id\": 6653, \"forum_id\": 51, \"post_subject\": \"2019 Intern Proposal Deadline Friday 29th March 2019\", \"username\": \"lchapman\", \"post_text\": \"The deadline for proposals is this Friday. \\n\\nThere is still time to apply! See the projects on our list, or suggest one of your own that leverages HPCC Systems in some way and supports the work you or your school is doing.\\n\\nOur program is open to students across the academic spectrum from high school to PhD. It's a 12 week program over the summer months, but we can be flexible about start and end dates to take account of differences in semester dates for students interested in working remotely from outside the USA.\\n\\nInterested in finding out more? \\n\\n\\n
\\n\\nSend your final proposal to Lorraine.Chapman@lexisnexisrisk.com by COB Friday 29th March.\", \"post_time\": \"2019-03-25 10:42:12\" },\n\t{ \"post_id\": 29013, \"topic_id\": 7063, \"forum_id\": 51, \"post_subject\": \"Re: Welcome to the students joining our intern program in 20\", \"username\": \"lchapman\", \"post_text\": \"You're most welcome! Please pass on the details of our 2020 intern program to anyone you know who may be interested in completing a project with us as an intern. See the post about this program here: viewtopic.php?f=51&t=7453.\\n\\nBest wishes\\nLorraine Chapman\\nHPCC Systems Intern Program Manager\", \"post_time\": \"2020-01-13 13:26:04\" },\n\t{ \"post_id\": 28971, \"topic_id\": 7063, \"forum_id\": 51, \"post_subject\": \"Re: Welcome to the students joining our intern program in 20\", \"username\": \"ricardos\", \"post_text\": \"Thank you for share it.\", \"post_time\": \"2020-01-10 12:54:42\" },\n\t{ \"post_id\": 26713, \"topic_id\": 7063, \"forum_id\": 51, \"post_subject\": \"Welcome to the students joining our intern program in 2019\", \"username\": \"lchapman\", \"post_text\": \"A warm welcome to the students joining the HPCC Systems Intern Program this summer!\\n\\nAs always, the standard of proposal submissions from students this year was extremely high. The review panel had a difficult task evaluating which proposals to accept, given the limited number places available. We know a lot of hard work goes into each proposal, so we'd like to say thank you to all students who took an interest in our intern program this year.\\n\\nFive students have started work on their projects already, with five more joining the team by the end of June. Read our blog Meet the students joining our intern program in 2019 to find out more about the students who have been accepted on to the HPCC Systems Intern Program this year.\\n\\nWe encourage our interns to get involved with our open source community in a number of ways. In addition to their contribution to our open source project we encourage students to:\\n\\n\\n
\\nEvery year, the students who join our intern program achieve a lot in what is a relatively short period of time (12 weeks). We greatly value their contribution to our open source platform and community and are proud of their achievements. \\n\\nEach student is mentored by one of our LexisNexis Risk Solutions colleagues and in many cases, additional mentoring is provided by university professors from our Academic Partners. We thank all our mentors for the encouragement and support they provide to our interns and for giving up their time to support our program. \\n\\nApplication to join the HPCC Systems Intern Program is by the submission of a proposal to complete a specific project during the 12 week internship period. Students can choose from our list of available projects, or they can suggest one of ther own, but it must leverage HPCC Systems in some way.\\n\\nThe proposal period reopens in the Fall for internships in 2020. Watch this forum for announcements. More information about the HPCC Systems Intern Program is available in our blog: Join the HPCC Systems team as an intern.\", \"post_time\": \"2019-06-06 16:08:32\" },\n\t{ \"post_id\": 28103, \"topic_id\": 7453, \"forum_id\": 51, \"post_subject\": \"Intern with us in 2020 - Proposal period is now open\", \"username\": \"lchapman\", \"post_text\": \"We are now accepting applications from students who would like to join the HPCC Systems team as an intern in 2020. The deadline for proposals is Friday March 20th 2020. But we do award places in advance of the deadline date to students who submit an excellent proposal we know we want to accept. So don't miss out on a place on the HPCC Systems intern program, get started now!\\n\\nApplication is by proposal submission to complete a specific HPCC Systems related project. This can be a project from our list, one you have suggested yourself or a project that supports work your school is doing which leverages HPCC Systems in some way.\\n\\nOur intern program is aimed at students studying STEM related subjects and most projects require coding skills. Our projects range from working on a new feature or enhancement, providing the ability to connect to third party products that may be used alongside HPCC Systems, to machine learning related projects. \\n\\nWe welcome applications from students across the academic spectrum from high school to PhD. It's a 12 week program over the summer months, but we can be flexible about start and end dates to take account of differences in semester dates for students interested in working remotely from outside the USA. In 2019, we accepted students on to the program from Europe and Asia.\\n\\nInterested in finding out more? \\n\\n\\n
\\n\\nWe also have a brochure you can use to send to your friends, professors and school teachers so pass this information on to anyone you know who may be interested in joining our intern program in 2020!\", \"post_time\": \"2019-11-14 10:46:25\" },\n\t{ \"post_id\": 28303, \"topic_id\": 7473, \"forum_id\": 51, \"post_subject\": \"Re: changing the workunit\", \"username\": \"suleman Shreef\", \"post_text\": \"Thanks to all, for supporting my interest, I have working with lower version it working good.\", \"post_time\": \"2019-11-20 16:53:22\" },\n\t{ \"post_id\": 28233, \"topic_id\": 7473, \"forum_id\": 51, \"post_subject\": \"Re: changing the workunit\", \"username\": \"suleman Shreef\", \"post_text\": \"Hi David, No Idea about, how to share my screen!\", \"post_time\": \"2019-11-19 17:05:52\" },\n\t{ \"post_id\": 28223, \"topic_id\": 7473, \"forum_id\": 51, \"post_subject\": \"Re: changing the workunit\", \"username\": \"dehilster\", \"post_text\": \"Hello! This is David de Hilster. I'm the programmer in charge of the ECL IDE. If someone can reproduce this problem, then let's do a screen share so I can take a look. So far, this problem has occurred but doesn't seem reproducible.\", \"post_time\": \"2019-11-19 16:28:38\" },\n\t{ \"post_id\": 28213, \"topic_id\": 7473, \"forum_id\": 51, \"post_subject\": \"Re: changing the workunit\", \"username\": \"suleman Shreef\", \"post_text\": \"HPCCSystemsVM-amd64-7.6.8-1\", \"post_time\": \"2019-11-19 16:26:22\" },\n\t{ \"post_id\": 28203, \"topic_id\": 7473, \"forum_id\": 51, \"post_subject\": \"Re: changing the workunit\", \"username\": \"ming\", \"post_text\": \"What is HPCCSystems Platform version?\", \"post_time\": \"2019-11-19 13:30:27\" },\n\t{ \"post_id\": 28163, \"topic_id\": 7473, \"forum_id\": 51, \"post_subject\": \"Re: changing the workunit\", \"username\": \"suleman Shreef\", \"post_text\": \"Rich,\\n\\nYes still it is not working properly.\", \"post_time\": \"2019-11-19 04:56:39\" },\n\t{ \"post_id\": 28153, \"topic_id\": 7473, \"forum_id\": 51, \"post_subject\": \"Re: changing the workunit\", \"username\": \"rtaylor\", \"post_text\": \"Suleman,\\n\\nI've also found it can happen if the connection to the Thor is lost. Try closing (powering down) the VM and the IDE, then restart them both and see if the problem still occurs.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-11-18 18:53:59\" },\n\t{ \"post_id\": 28143, \"topic_id\": 7473, \"forum_id\": 51, \"post_subject\": \"Re: changing the workunit\", \"username\": \"suleman Shreef\", \"post_text\": \"Hello,\\n\\nRich, Thank you for this information, Since I was Running in oracle VM virtual box, I have not seen 'Local' in drop down list, even after selecting required target(Thor) same thing it doing('L' work unit). I was finding the thought to go through off this from past one week.\\n\\nThanks,\\nSuleman Shreef\", \"post_time\": \"2019-11-18 16:52:47\" },\n\t{ \"post_id\": 28133, \"topic_id\": 7473, \"forum_id\": 51, \"post_subject\": \"Re: changing the workunit\", \"username\": \"rtaylor\", \"post_text\": \"Suleman,\\n\\nThe "L" or "W" workunit designator is dependent only on the Target you select (top-right corner droplist). You get "L" when "Local" is selected and "W" when any hThor, Thor, or Roxie cluster is selected from that droplist.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2019-11-18 13:39:07\" },\n\t{ \"post_id\": 28123, \"topic_id\": 7473, \"forum_id\": 51, \"post_subject\": \"changing the workunit\", \"username\": \"suleman Shreef\", \"post_text\": \"Hi Team,\\n\\nCould you please help me, I am not getting the result when i submit the job on ECL IDE,\\nBecause of 'L' work unit (Local) generating, if it possible to change to 'W' work unit,\\nCould you please suggest, what I need to change.\\n\\nThanks,\\nSuleman Shreef\", \"post_time\": \"2019-11-16 06:39:06\" },\n\t{ \"post_id\": 28493, \"topic_id\": 7573, \"forum_id\": 51, \"post_subject\": \"Re: DFU Plus Command\", \"username\": \"JimD\", \"post_text\": \"DFUPlus is part of the Client Tools package. You can download it from:\\nhttps://hpccsystems.com/download#HPCC-Platform. \\n\\nSelect the OS of the machine you want to run it on, then download and install.\\n\\nThe documentation is in the Client Tools manual:\\n\\nhttps://d2wulyp08c6njk.cloudfront.net/r ... 6.16-1.pdf\\n\\nHTH,\\n\\nJim\", \"post_time\": \"2019-12-16 13:52:35\" },\n\t{ \"post_id\": 28473, \"topic_id\": 7573, \"forum_id\": 51, \"post_subject\": \"DFU Plus Command\", \"username\": \"suleman Shreef\", \"post_text\": \"HI Team,\\n\\nCan you Please help me to download DFU plus command prompt .exe URL and also Demo How to use it.\\n\\nThanks,\\nS.Shreef\", \"post_time\": \"2019-12-14 13:24:25\" },\n\t{ \"post_id\": 29943, \"topic_id\": 7893, \"forum_id\": 51, \"post_subject\": \"2020 Intern Program - Proposal Period is now closed\", \"username\": \"lchapman\", \"post_text\": \"Thanks to all those who have submitted proposals. We are now moving through the review process and will be in touch with all applicants in the next two weeks.\\n\\nThe proposal period for 2021 will open in the fall. Check back here for details.\\n\\nBest wishes\\nLorraine Chapman\\nHPCC Systems Intern Program Manager\", \"post_time\": \"2020-03-26 14:02:32\" },\n\t{ \"post_id\": 30403, \"topic_id\": 7963, \"forum_id\": 51, \"post_subject\": \"Re: Regarding Summer Intern Oppurtunities\", \"username\": \"lchapman\", \"post_text\": \"Hi Vedant,\\n\\nIt's good to hear you have reached out to our LN corporate intern program. I wish you good luck with your applications. The HPCC Systems intern program runs separately from that program. It is specifically a coding program offering opportunities for students with an interest in developing their coding skills or extending their knowledge of data science. Our 2020 program is still running, but I'm afraid all places have now been taken.\\n\\nThe proposal application period for 2021 will open in the Fall, around the end of October, so check back here for announcements. I encourage you to apply for the 2021 program, which I hope will be back to full strength. In the meantime, if you don't know how the HPCC Systems program works, read our blog. Feel free to email me if you have any questions.\\n\\nBest wishes\\nLorraine Chapman\\n\\nConsulting Business Analyst\\nHPCC Systems Intern Program Manager\", \"post_time\": \"2020-05-04 10:58:52\" },\n\t{ \"post_id\": 30123, \"topic_id\": 7963, \"forum_id\": 51, \"post_subject\": \"Regarding Summer Intern Oppurtunities\", \"username\": \"vedant_dulori\", \"post_text\": \"Hey,\\n\\nAfter getting an opportunity to work on HPCC Systems via my undergraduate research at NCSU, I am really keen on working on similar projects in summer. Hence, I applied to summer intern positions at LexisNexis career jobs page earlier this month and I am curious to know if there are opportunities still open and if yes, how can I apply to them?\\n\\nThank you,\\nVedant Dulori\", \"post_time\": \"2020-04-13 08:44:00\" },\n\t{ \"post_id\": 32573, \"topic_id\": 8483, \"forum_id\": 51, \"post_subject\": \"NOW OPEN - 2021 HPCC Systems Intern Program Proposal Period\", \"username\": \"lchapman\", \"post_text\": \"We are now accepting applications from students who would like to join the HPCC Systems team as an intern in 2021. The deadline for proposals is Friday March 19th 2021. But we do award places in advance of the deadline date to students who submit an excellent proposal we know we want to accept. So don't miss out on a place on the HPCC Systems intern program, get started now!\\n\\nApplication is by proposal submission to complete a specific HPCC Systems related project. This can be a project from our list, one you have suggested yourself or a project that supports work your school is doing which leverages HPCC Systems in some way.\\n\\nOur intern program is aimed at students studying STEM related subjects and most projects require coding skills. Our projects range from working on a new feature or enhancement, providing the ability to connect to third party products that may be used alongside HPCC Systems, to machine learning related projects. \\n\\nWe welcome applications from students across the academic spectrum from high school to PhD. It's a 12 week program over the summer months, but we can be flexible about start and end dates to take account of differences in semester dates for students interested in working remotely from outside the USA.\\n\\nInterested in finding out more? \\n\\n\\n
\\n\\nWe also have a brochure you can use to send to your friends, professors and school teachers so pass this information on to anyone you know who may be interested in joining our intern program in 2021!\", \"post_time\": \"2020-11-17 15:26:14\" },\n\t{ \"post_id\": 33193, \"topic_id\": 8653, \"forum_id\": 51, \"post_subject\": \"Intern Program 2021 Proposal Period is now CLOSED\", \"username\": \"lchapman\", \"post_text\": \"The proposal period for students applying to join the HPCC Systems Intern Program in 2021 is now closed.\\n\\nProposals are now being reviewed. \\n\\nThank you to all students who have submitted proposals this year. Thanks also to all professors and school teachers who have encouraged their students to apply, as well as our LexisNexis Risk Solutions Groups colleagues who have supported students with their proposals.\\n\\nAll applicants will receive an email response whether they have been successful or not, within 2 weeks.\\n\\nMore information about students who have been accepted onto the program this summer will be provided via a blog post towards the end of May. Keep visiting the HPCC Systems blog on our website for updates: https://hpccsystems.com/blog\\n\\nThe proposal period for internships in 2022 will open in the Fall. Register as a member of our Community Forum to be notified when new posts about this and other subjects are available.\\n\\nAny educator who would like us to present to students about the HPCC Systems Intern program when the 2022 academic year starts, please contact academia@hpccsystems.com to arrange a date and time.\", \"post_time\": \"2021-03-22 11:26:59\" },\n\t{ \"post_id\": 10213, \"topic_id\": 2483, \"forum_id\": 53, \"post_subject\": \"Re: Global Variables\", \"username\": \"drealeed\", \"post_text\": \"You should use the %GLOBAL.DPPAPurpose% syntax. This allows validation to occur, so if your ecl is referencing a global variable not defined in the composition you’ll get a validation-time error rather than an inexplicable runtime error.\\n\\nAnother option is to have an input in your plugin with the default value set to GLOBAL.DPPAPurpose, if you want users to be able to override it. Like:\\n\\nSTRING DPPAPurpose:DEFAULT(^GLOBAL|DPPAPurpose),OPTIONAL,LABEL("DPPA PURPOSE")\\n\\nAnd then reference the input in your ecl.\", \"post_time\": \"2016-08-02 19:06:56\" },\n\t{ \"post_id\": 10203, \"topic_id\": 2483, \"forum_id\": 53, \"post_subject\": \"Global Variables\", \"username\": \"senthilkumar.periasamy\", \"post_text\": \"How do I pass global variables from hipie plugin to function macro.\", \"post_time\": \"2016-08-02 19:03:59\" },\n\t{ \"post_id\": 10303, \"topic_id\": 2523, \"forum_id\": 53, \"post_subject\": \"Re: Passing Global Variables in Search Service Soapcall\", \"username\": \"drealeed\", \"post_text\": \"Julie,\\n\\nUntil now the only inputs referencing global variables as default values were plugin inputs. You are the first person to use global variables as defaults for service input parameters. Congratulations!\\n\\nMy hunch is that you've come across a bug in HIPIE 1.7. We'll investigate and let you know.\", \"post_time\": \"2016-08-03 15:30:15\" },\n\t{ \"post_id\": 10293, \"topic_id\": 2523, \"forum_id\": 53, \"post_subject\": \"Passing Global Variables in Search Service Soapcall\", \"username\": \"jcarmigniani\", \"post_text\": \"Hi Drea,\\n\\nI am using the latest HIPIE you sent and so far it looks like all the little things I used to hardcode appear to be taken care of \\n\\nNow what I am trying to do is pass in a global variable through to my service soapcall so that Batch can pass in that global variable from the composition, but I am not entirely sure how I should be writing this.\\nSo far all I have accomplished is getting to set my parameter to a STRING that says ‘^GLOBAL|JOBID’ when the code is generated but I would like it to actually refer the global variable instead\", \"post_time\": \"2016-08-03 15:21:34\" },\n\t{ \"post_id\": 10353, \"topic_id\": 2533, \"forum_id\": 53, \"post_subject\": \"Re: hipie plugins with local files\", \"username\": \"lbentley\", \"post_text\": \"Thank you Drea, this is really good news. Could the salt version and location in the .cfg file be the default, but you could override those by passing in that information to the plugin? The reason I ask is because when we do this regression testing, sometimes we want to compare two different versions of salt. if we have both salt executables on our local machine, and we could pass in which one we want to execute, that would be nice. Is that possible?\", \"post_time\": \"2016-08-03 18:39:55\" },\n\t{ \"post_id\": 10343, \"topic_id\": 2533, \"forum_id\": 53, \"post_subject\": \"Re: hipie plugins with local files\", \"username\": \"drealeed\", \"post_text\": \"Currently, HIPIE SALT support includes generating a SALT scored search service.\\n\\nGENERATES SALT(dsInput):SCOREDSEARCH\\n\\nThis does use the version of salt you have installed on your local computer. If you're using the HIPIE ECL IDE plugin, You specify the salt version and the location of the executable in your %USERPROFILE%/hipie_eclide/repositories.cfg file.\\n\\nThere are plans for adding much more SALT capability to plugins in the upcoming HIPIE 1.8 release, currently under development. The Plugin documentation will be updated with the details of how to use salt to generate profiles, linking, etc. once the capability exists.\", \"post_time\": \"2016-08-03 18:25:17\" },\n\t{ \"post_id\": 10333, \"topic_id\": 2533, \"forum_id\": 53, \"post_subject\": \"hipie plugins with local files\", \"username\": \"lbentley\", \"post_text\": \"I have a few questions about HIPIE plugins. I am a complete novice, so these may be stupid questions. I have looked over some of the documentation and saw this ‘generates salt’ section.\\nhttps://gitlab.ins.risk.regn.net/HIPIE/ ... rates-salt\\n\\nHow does this work? Does it call the salt executable to generate salt code for the particular dataset passed in? If so, does it use the salt executable on your local machine?\\nAlso, it looks like HIPIE plugins can access local files on your machine. This interests me because I am working on an automated SALT regression testing suite. The big thing that at this time that can’t be automated is the salt.exe execution. But if HIPIE allowed me to write a plugin that could run the salt.exe on my local machine and return the resulting code in the .mod file as a string, I would be golden.\\n\\nWhat do you all think? Thanks for your help!\", \"post_time\": \"2016-08-03 18:20:42\" },\n\t{ \"post_id\": 11013, \"topic_id\": 2713, \"forum_id\": 53, \"post_subject\": \"How to reference child dataset in a form\", \"username\": \"senthilkumar.periasamy\", \"post_text\": \"How can I reference child dataset in a dsp form.\", \"post_time\": \"2016-08-25 17:32:46\" },\n\t{ \"post_id\": 10053, \"topic_id\": 2403, \"forum_id\": 63, \"post_subject\": \"Re: Can't access dashboards\", \"username\": \"HPCC Staff\", \"post_text\": \"Two things can make this occur\\n1. the workunit that contains the information has been archived off of thor, you can either re run the dashboard or go to thor and find the workunit and un archive it. In a upcoming release the error message will tell you this and give you the option to un-archive the last workunit. \\n\\n2. the workunit has completed, but the deployment of the Roxie is taking a long time to complete -- we have a solution for this early next year (2017) planned there isn't a good way of querying ROXY to discover the state today.\", \"post_time\": \"2016-07-26 13:13:21\" },\n\t{ \"post_id\": 10023, \"topic_id\": 2403, \"forum_id\": 63, \"post_subject\": \"Can't access dashboards\", \"username\": \"auburn34\", \"post_text\": \"When I try to access any dashboard on the DSP, including one I created myself, I get the following error: No process available to view Dashboard. I need to know what I need to do to access the Quality Analysis - Insurance Scoring Dashboard.\\n\\nSean Rousey\", \"post_time\": \"2016-07-25 14:40:15\" },\n\t{ \"post_id\": 10163, \"topic_id\": 2443, \"forum_id\": 63, \"post_subject\": \"Re: DSP versions\", \"username\": \"drealeed\", \"post_text\": \"Once you've logged in, click on the about DSP link at the top right of the screen. This will tell you what version of DSP, Hipie and Visualization the app is running\", \"post_time\": \"2016-08-02 13:00:09\" },\n\t{ \"post_id\": 10153, \"topic_id\": 2443, \"forum_id\": 63, \"post_subject\": \"DSP versions\", \"username\": \"TomCharman\", \"post_text\": \"Is there a way to see which version of DSP / Visualization is being used on DSP at the moment? When logging bugs to Github I'd like to be able to say which version I'm talking about rather than just "QA" or "DEV" which obviously will be out of date later.\", \"post_time\": \"2016-08-02 05:22:06\" },\n\t{ \"post_id\": 11061, \"topic_id\": 2573, \"forum_id\": 63, \"post_subject\": \"Re: DSP Composition Issue: Enabling "Deploy Roxie Services"\", \"username\": \"michel_amjadi\", \"post_text\": \"Thank you Tom Charman.\", \"post_time\": \"2016-08-31 13:14:26\" },\n\t{ \"post_id\": 11043, \"topic_id\": 2573, \"forum_id\": 63, \"post_subject\": \"Re: DSP Composition Issue: Enabling "Deploy Roxie Services"\", \"username\": \"TomCharman\", \"post_text\": \"Hi Michel--This is quite possibly pointless to your use case, so apologies for wasting your time I was talking about the possibility of having data in widget 3 that can't be sensibly aggregated and only makes sense when both widget 1 and widget 2 have a selection. \\n\\ne.g. (big simplification)\\n\\nWidget 1 can take values A or B\\nWidget 2 can take values Y or Z\\nWidget 3's data looks like this:\\n\\n
A, Y, 0.5, 15, 30\\nA, Z, 0.4, 4, 10\\nB, Y, 0.3, 6, 20\\nB, Z, 0.6, 6, 10
\\n\\n... where col 3 was col 4 / col 5.\\n\\nYou can't sensibly put col 3 in a widget until you've filtered with widgets 1 and 2... otherwise for example you might end up saying that the average of col 3 when filtered to 'B' is 0.45 when it's really 0.4. In this case, the current Roxie-enabled behaviour is actually kind of nice because it stops you from displaying incorrect numbers.\\n\\nBut of course, once there's the ability to make calculated fields in a dashboard (i.e. divide col 4 by col 5 on the fly) -- which I think is being worked on -- then there'll be no excuse for the Roxie-enabled behaviour you were complaining about.\", \"post_time\": \"2016-08-29 07:05:38\" },\n\t{ \"post_id\": 11033, \"topic_id\": 2573, \"forum_id\": 63, \"post_subject\": \"Re: DSP Composition Issue: Enabling "Deploy Roxie Services"\", \"username\": \"michel_amjadi\", \"post_text\": \"Tom Charman, I am not sure I understand the relevance of your response to my raised issue. Please explain.\", \"post_time\": \"2016-08-26 02:52:10\" },\n\t{ \"post_id\": 10873, \"topic_id\": 2573, \"forum_id\": 63, \"post_subject\": \"Re: DSP Composition Issue: Enabling "Deploy Roxie Services"\", \"username\": \"TomCharman\", \"post_text\": \"Amusingly, to me this is a feature, not a bug--because if your numbers involve any sort of aggregation / calculation (e.g. a percentage or an average), they won't be correct until the most granular set of filters is applied. I guess it would be nice to have the option to choose.\", \"post_time\": \"2016-08-23 05:18:37\" },\n\t{ \"post_id\": 10523, \"topic_id\": 2573, \"forum_id\": 63, \"post_subject\": \"DSP Composition Issue: Enabling "Deploy Roxie Services"\", \"username\": \"michel_amjadi\", \"post_text\": \"Dear DSP Support team:\\nThis issue relates to a target widget which data depends on several source widgets to be clicked.\\n\\nIn DSP Composition if I set:\\n\\n - Deploy Roxie Services : Checked, and\\n - Create new : Radio button is chosen\\n\\nThe interactions can only work when all of the source widgets are clicked(selected), before the target widget be updated with data.\\nFor example if I have 3 widgets and selected pie slices from widget 1 and 2 can filter data in widget 3 (a table):\\n A) When Deploy Roxie Services is unchecked I can click only widget 1 or 2 and widget 3 table get updated (populated) accordingly\\n\\n B) When Deploy Roxie Services is Checked, I must click on both widget 1 and widget 2 to get data in widget 3 table. Else widget 3 will remains empty.\\n\\nAttached please find a document illustrating this issue.\\n\\nPlease contact me for any further information you may need.\\n\\nMichel Amjadi\\nMichel.amjadi@lexisnexis.com\\n678.622.4200\", \"post_time\": \"2016-08-11 21:26:35\" },\n\t{ \"post_id\": 11011, \"topic_id\": 2663, \"forum_id\": 63, \"post_subject\": \"Re: SORT failed. Exceeded skew limit\", \"username\": \"drealeed\", \"post_text\": \"This appears to be an issue in the HIPIE generated code. An issue has been added:\\nhttps://github.com/hpcc-systems/HIPIE/issues/1992\\n\\nThe skew is occurring when sorting on SORT(client_data_source_desc, in_network). As a workaround, try distributing the data randomly on the hash of these fields when outputting the prepped/normalized file to be consumed by this dashboard.\", \"post_time\": \"2016-08-25 15:46:30\" },\n\t{ \"post_id\": 10913, \"topic_id\": 2663, \"forum_id\": 63, \"post_subject\": \"Re: SORT failed. Exceeded skew limit\", \"username\": \"rtaylor\", \"post_text\": \"dpearson_17,\\n\\nThe SORT function does have a SKEW option that allows you to override the default skew allowance. Perhaps the widget developer could add an option to set that SKEW limit for the SORT?\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2016-08-23 14:31:34\" },\n\t{ \"post_id\": 10773, \"topic_id\": 2663, \"forum_id\": 63, \"post_subject\": \"SORT failed. Exceeded skew limit\", \"username\": \"dpearson_17\", \"post_text\": \"Hello all. In testing a batch composition (Anthem Comparison Report), we are hitting a system error. The WUID is W20160818-184632 out on DEV DSP. Anthem is dealing with millions or records worth of data which is why I imagine we are hitting the skew error, but am wondering how we get around it as I believe that is part of the generated code.\\n\\nSystem error: 10084: Graph[25], sort[27]: SORT failed. Graph[25], sort[27]: Exceeded skew limit: 0.010000, estimated skew: 0.376647\", \"post_time\": \"2016-08-19 14:50:08\" },\n\t{ \"post_id\": 11083, \"topic_id\": 2733, \"forum_id\": 63, \"post_subject\": \"Sorting and interactions\", \"username\": \"TomCharman\", \"post_text\": \"On this dashboard, I've tried to set the various tables to be sorted by the count (descending) on the Edit page in each widget. However, they always end up sorted alphabetically by the first column. Does anyone else find this, or am I alone in this issue? \\n\\nhttps://dsp-qa.risk.regn.net:443/?compo ... ab3e8f13bc\", \"post_time\": \"2016-09-07 07:42:05\" },\n\t{ \"post_id\": 34493, \"topic_id\": 9113, \"forum_id\": 73, \"post_subject\": \"Vim plugin for ECL\", \"username\": \"Nathaniel\", \"post_text\": \"Here's a little vim plugin I wrote that provides ECL file detection and syntax highlighting.\\n\\nhttps://github.com/nathanielgraham/vim-ecl\\n\\nIt makes editing ECL files from the command-line a little bit easier on the eyes.\\n\\nCheers, \\nNathaniel\", \"post_time\": \"2021-12-10 19:17:19\" },\n\t{ \"post_id\": 23353, \"topic_id\": 5993, \"forum_id\": 83, \"post_subject\": \"HPCC Log visualization via Elastic (ELK)!\", \"username\": \"rodrigo.pastrana\", \"post_text\": \"Want to make impactful visualizations based on your HPCC component logs?\\nTake a look at this write up based on the Elastic (ELK) stack:\\n\\nhttps://hpccsystems.com/blog/ELK_visualizations\", \"post_time\": \"2018-10-17 13:17:55\" },\n\t{ \"post_id\": 23363, \"topic_id\": 6003, \"forum_id\": 83, \"post_subject\": \"HPCC logs explained...\", \"username\": \"rodrigo.pastrana\", \"post_text\": \"The HPCC Systems Admin guide does a great great job discussing in detail the format of the HPCC component logs:\\n\\nhttp://cdn.hpccsystems.com/releases/CE-Candidate-7.0.0/docs/EN_US/HPCCSystemAdministratorsGuide_EN_US-7.0.0-rc3.pdf#page=26\\n\\nLog format configuration details are outlined here:\\n\\nhttp://cdn.hpccsystems.com/releases/CE-Candidate-7.0.0/docs/EN_US/HPCCSystemAdministratorsGuide_EN_US-7.0.0-rc3.pdf#page=52\", \"post_time\": \"2018-10-17 21:02:09\" },\n\t{ \"post_id\": 30283, \"topic_id\": 7973, \"forum_id\": 83, \"post_subject\": \"Re: Choosing axis for Visualized\", \"username\": \"bforeman\", \"post_text\": \"Vedant,\\n\\nWell, I worked a few values around and created some test data and I have a working graph in the code example below. Is this close to what you are trying to do?\\n\\nBob\\n\\n
import Std;\\nIMPORT Visualizer;\\n\\nLocalRecord := RECORD\\n STRING10 commodity;\\n UNSIGNED4 Date; //Std.Date.Date_t date;\\n STRING25 location;\\n DECIMAL8_2 price;\\nEND;\\n \\nfinalDataSet := DATASET([{'Corn',20200110,'New York',57.50},\\n\\t\\t\\t\\t\\t {'Soy',20191112,'London',27.30},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t {'Soy',20191115,'Paris',37.30},\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t {'Soy',20191116,'Paris',47.30},\\n\\t\\t\\t\\t\\t {'Wheat',20200208,'Paris',22}, \\n\\t\\t\\t\\t\\t {'Soy',20191010,'Munich',27.50},\\n\\t\\t\\t\\t\\t {'Wheat',20200305,'New York',39.70}],LocalRecord);\\n\\nMNum := STD.Date.Month(finalDataSet.Date);\\nMName := CASE(Mnum,1 => 'Jan',2 => 'Feb',3 => 'Mar',4 =>'Apr',5 => 'May',6 => 'Jun',\\n 7 => 'Jul',8 => 'Aug',9 =>'Sep',10 => 'Oct',11 => 'Nov',12 => 'Dec','Unk');\\nmappings := DATASET([ {'Month', 'MName'},\\n {'Commodity','Commodity'}, \\n {'Price', 'Ave_Price'}], Visualizer.KeyValueDef);\\n // {'Commodity', 'commodity'}], Visualizer.KeyValueDef);\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\nOUTPUT(finalDataSet,NAMED('OriginalData'));\\nOUTPUT(TABLE(finalDataSet, {Commodity, MName,DECIMAL8_2 Ave_Price := Ave(GROUP, Price)},\\n Commodity,MName, FEW),NAMED('CrossTabData'));\\n\\n// Visualizer.MultiD.area('CommPrice_Viz', /*datasource*/, 'CommodityPrice_Month', /*mappings*/, /*filteredBy*/, /*dermatologyProperties*/ ); \\nVisualizer.MultiD.column('CommPrice_Viz', /*datasource*/, 'CrossTabData', mappings, /*filteredBy*/, /*dermatologyProperties*/ );\\n
\", \"post_time\": \"2020-04-20 19:07:33\" },\n\t{ \"post_id\": 30273, \"topic_id\": 7973, \"forum_id\": 83, \"post_subject\": \"Re: Choosing axis for Visualized\", \"username\": \"bforeman\", \"post_text\": \"Vedant,\\n\\nIf you want average prices than change the SUM function to use AVE.\\n\\nWhat does your data output look like and also do you see any graphs on the Visualize tab next to the OUTPUT? \\n\\nAlso what version of the IDE and target cluster are you using?\\n\\nBob\", \"post_time\": \"2020-04-20 17:02:09\" },\n\t{ \"post_id\": 30263, \"topic_id\": 7973, \"forum_id\": 83, \"post_subject\": \"Re: Choosing axis for Visualized\", \"username\": \"vedant_dulori\", \"post_text\": \"Hi Bob,\\n\\nThank you for the help; however, when I tried to implement the above code however I am getting the following graph in the resource section. Also, I am trying to graph months vs average prices as prices are recorded from past 20 years. For reference, the second graph is similar to what I am trying to do.\", \"post_time\": \"2020-04-20 16:37:11\" },\n\t{ \"post_id\": 30253, \"topic_id\": 7973, \"forum_id\": 83, \"post_subject\": \"Re: Choosing axis for Visualized\", \"username\": \"bforeman\", \"post_text\": \"Vedant,\\n\\nSomething like this:\\n\\n\\n//Add after your finalDataSet definition\\nMNum := STD.Date.Month(finalDataSet.Date);\\nMName := CASE(Mnum,1 => 'Jan',2 => 'Feb',3 => 'Mar',4 =>'Apr',5 => 'May',6 => 'Jun',\\n 7 => 'Jul',8 => 'Aug',9 =>'Sep',10 => 'Oct',11 => 'Nov',12 => 'Dec','Unk');\\n\\n// Output "2D" dataset: "Mname" v "Price"\\nOUTPUT(TABLE(finalDataSet, {MName, UNSIGNED4 Sum_Price := SUM(GROUP, Price),Commodity}, Commodity, FEW), NAMED('CommodityPrice_Month'));\\n\\n// Create the visualization, giving it a uniqueID "bubble" and supplying the result name "CommPrice_Viz"\\nVisualizer.MultiD.column('CommPrice_Viz', /*datasource*/, 'CommodityPrice_Month', /*mappings*/, /*filteredBy*/, /*dermatologyProperties*/ );\\n
\\n\\n\\nBob\", \"post_time\": \"2020-04-20 16:02:06\" },\n\t{ \"post_id\": 30243, \"topic_id\": 7973, \"forum_id\": 83, \"post_subject\": \"Re: Choosing axis for Visualized\", \"username\": \"bforeman\", \"post_text\": \"Hi Vedant,\\nI see an issue with your Mappings definition:\\n\\nmappings := DATASET([ {'Location', 'date.month'},\\n {'Price', 'price'},\\n {'Commodity', 'commodity'}], Visualizer.KeyValueDef);\\nVisualizer.MultiD.area('myChart', /*datasource*/, 'MultiD__test', mappings, /*filteredBy*/, /*dermatologyProperties*/ );
\\n\\nI think "Location" needs to be "Date"\\n\\nThat said, have you tried running your finalDataset into a cross-tab report, and on your x-axis you have date and the y-axis would be price. Your cross-tab would break on date(month) and commodity. The cross-tab TABLE just has three fields, month, price and commodity. Once you have your data in that format it should be easy to visualize.\\n\\nI have an example in my archives somewhere so I will dig it up and post the code later today. \\n\\nRegards,\\n\\nBob\", \"post_time\": \"2020-04-20 14:47:42\" },\n\t{ \"post_id\": 30233, \"topic_id\": 7973, \"forum_id\": 83, \"post_subject\": \"Re: Choosing axis for Visualized\", \"username\": \"vedant_dulori\", \"post_text\": \"Thank you so much for you reply however I am still unable to get it working. The problem in my case is that I am creating a dataset with lineno, date (past 20 years), location and price of 3 different crops (wheat, soy and corn) and when I am creating chart using the standard method it gives me line no. vs date graph. However, I think I am not able to use mappings properly where I want month (jan-dec) vs price (of 3 different lines/gradient each representing different crop). Following is my code:\\n\\n\\nimport Std;\\nIMPORT Visualizer;\\n\\nLocalRecord := RECORD\\n\\tSTRING10 commodity;\\n Std.Date.Date_t date;\\n STRING25 location;\\n DECIMAL8_2 price;\\nEND;\\n// for CSV file\\nLocal_CSV_Record := RECORD\\n STRING lineno;\\n STRING date;\\n STRING location;\\n DECIMAL8_2 price;\\nEND;\\n\\n// soy\\nfname := '~::soy_local.csv';\\n\\n\\nds := DATASET(fname, Local_CSV_Record, CSV(HEADING(1)));\\n\\nLocalRecord tolocalrecord(Local_CSV_Record l) := TRANSFORM\\n date := Std.Date.FromStringToDate(l.date, '%Y-%m-%d');\\n self.commodity := 'Soy';\\n SELF.date := date; \\n SELF.location := l.location;\\n self.price := l.price;\\nEND;\\n\\n// wheat\\nfname2 := '~rawcsv::crop::wheat_updated.csv';\\n\\nds2 := DATASET(fname2, Local_CSV_Record, CSV(HEADING(1)));\\n\\nLocalRecord tolocalrecord2(Local_CSV_Record l) := TRANSFORM\\n date := Std.Date.FromStringToDate(l.date, '%e-%b-%y');\\n self.commodity := 'Wheat';\\n SELF.date := date; \\n SELF.location := l.location;\\n self.price := l.price;\\nEND;\\n// corn\\nfname3 := '~rawcsv::crop::corn_updated.csv';\\n\\nds3 := DATASET(fname3, Local_CSV_Record, CSV(HEADING(1)));\\n\\nLocalRecord tolocalrecord3(Local_CSV_Record l) := TRANSFORM\\n date := Std.Date.FromStringToDate(l.date, '%Y-%m-%d');\\n self.commodity := 'Corn';\\n SELF.date := date; \\n SELF.location := l.location;\\n self.price := l.price;\\nEND;\\n\\nlocalDataSet := PROJECT(ds, tolocalrecord(LEFT));\\nlocalDataSet2 := PROJECT(ds2, tolocalrecord2(LEFT));\\nlocalDataSet3 := PROJECT(ds3, tolocalrecord3(LEFT));\\n\\n// Merges all data corn + wheat + soy\\nfinalDataSet := localDataSet+localDataSet2+localDataSet3;\\n// Unique commodities\\nfilterData := finalDataSet(Commodity = 'Corn');\\nfilterData2 := finalDataSet(Commodity = 'Soy'); \\nfilterData3 := finalDataSet(Commodity = 'Wheat');\\ndata_exams := OUTPUT(finalDataSet, NAMED('MultiD__test'));\\nmappings := DATASET([ {'Location', 'date.month'}, \\n {'Price', 'price'}, \\n {'Commodity', 'commodity'}], Visualizer.KeyValueDef);\\nVisualizer.MultiD.area('myChart', /*datasource*/, 'MultiD__test', mappings, /*filteredBy*/, /*dermatologyProperties*/ );\\n\\n
\", \"post_time\": \"2020-04-20 13:09:16\" },\n\t{ \"post_id\": 30153, \"topic_id\": 7973, \"forum_id\": 83, \"post_subject\": \"Re: Choosing axis for Visualized\", \"username\": \"gsmith\", \"post_text\": \"In case you havn't seen it there is a brief tutorial here: https://github.com/hpcc-systems/Visuali ... r/tutorial \\n\\nStep 4 has an example of using the "mappings" to cherry pick certain columns of data for the visualization - the same will work with a regular chart (as long as you keep the first column as the X axis, the rest will be Y axises): https://github.com/hpcc-systems/Visuali ... Step04.ecl\\n\\nStep 5 has an example of the "filteredBy" option, in this case used to generate an interactive dashboard: https://github.com/hpcc-systems/Visuali ... Step05.ecl\", \"post_time\": \"2020-04-14 04:39:06\" },\n\t{ \"post_id\": 30143, \"topic_id\": 7973, \"forum_id\": 83, \"post_subject\": \"Re: Choosing axis for Visualized\", \"username\": \"bforeman\", \"post_text\": \"Hi Vedant!\\n\\nI have come to understanding that when visualizing a dataset, the visualizer chooses its x-axis and y-axis by using the first two data variables in the data set hence, I wanted to know how to let it choose which data variable should y-axis and x-axis represent?
\\nThere are two great examples that show how to control your graph mapping. Go to the Demos folder, and look at areaChart-mappings.ecl and areaChart-mappings-properties.ecl. That Demos folder is also packed with some other great examples. \\n\\nIf there are other funtion parameters that are not clear, you can always open an issue at https://github.com/hpcc-systems/Visualizer/issues.\\n\\nThere is also a new ECL Online lesson that discusses the Visualization support in ECL. Go to the Introduction to ECL (Part 2) Course and check out Lesson 6B.\\n\\nRegards,\\n\\nBob\", \"post_time\": \"2020-04-13 15:49:35\" },\n\t{ \"post_id\": 30133, \"topic_id\": 7973, \"forum_id\": 83, \"post_subject\": \"Choosing axis for Visualized\", \"username\": \"vedant_dulori\", \"post_text\": \"Hi\\n\\nAfter going through all documentations that are available to understand Visualizer, for instance https://github.com/hpcc-systems/Visualizer, I am facing difficulties implementing or understanding following things:\\n\\n- I have come to understanding that when visualizing a dataset, the visualizer chooses its x-axis and y-axis by using the first two data variables in the data set hence, I wanted to know how to let it choose which data variable should y-axis and x-axis represent?\\n- Also, can you help understand how to implement various function like mapping and filteredBy while declaring the visualizer. For instance, in the following code:\\nVisualizer.MultiD.area('myChart', /*datasource*/, 'Sales', mappings, /*filteredBy*/, /*dermatologyProperties*/ );\\n- It would be great if you can provide me links to resources for understanding Visualizer even better\\n\\nThank you,\\nVedant Dulori\", \"post_time\": \"2020-04-13 09:25:48\" },\n\t{ \"post_id\": 26903, \"topic_id\": 7143, \"forum_id\": 93, \"post_subject\": \"Re: Fórum em português está ativo!\", \"username\": \"JimD\", \"post_text\": \"Ótimas notícias!\\n\\n-Jim\", \"post_time\": \"2019-07-11 20:31:04\" },\n\t{ \"post_id\": 26893, \"topic_id\": 7143, \"forum_id\": 93, \"post_subject\": \"Fórum em português está ativo!\", \"username\": \"hwatanuki\", \"post_text\": \"Fórum em português está ativo!\", \"post_time\": \"2019-07-11 18:37:06\" },\n\t{ \"post_id\": 27043, \"topic_id\": 7173, \"forum_id\": 93, \"post_subject\": \"Re: SETS VERIFICATION\", \"username\": \"hwatanuki\", \"post_text\": \"Olá Elimar,\\n\\nCompartilhei essa questão com o time de treinamento e o Richard propôs uma solução via a criação de datasets filho aninhados para cada categoria de palavras-chave. Em seguida, uma função que limpa, categoriza e divide o texto, retorna um NORMALIZE associando cada palavra-chave encontrada com sua respectiva categoria. Veja o código sugerido abaixo.\\n\\n\\n\\n\\nIMPORT STD;\\n\\nkeywords := MODULE;\\n FOOD := ['dinner', 'food', 'buffet', 'flavor', 'meal', 'meals', 'lunch'];\\n SERVICES:= ['service', 'server', 'waitress', 'waiter', 'bill'];\\n TOILET := ['toilet', 'sink', 'bathrooms', 'bathroom'];\\n PRICE := ['pay', 'cost', 'expensive', 'cheap', 'payless']; \\n PLACE := ['place', 'decor', 'ambiance', 'dirty', 'clean'];\\n\\n //nested child dataset -- all terms and their categories\\n Crec := {STRING Term}; //child record structure\\n EXPORT SearchTermsDS := DATASET([{'FOOD',DATASET(FOOD,Crec)},\\n {'SERVICES',DATASET(SERVICES,Crec)},\\n {'TOILET',DATASET(TOILET,Crec)},\\n {'PRICE',DATASET(PRICE,Crec)},\\n {'PLACE',DATASET(PLACE,Crec)}],\\n {STRING10 TermType, DATASET(Crec) Terms});\\n //the "garbage" characters to lose from the text\\n EXPORT BadChars := '?.$!';\\nEND;\\n\\nEvalReview(STRING r) := FUNCTION\\n //first lose the "garbage" characters and lowercase the text\\n CleanText := STD.Str.SubstituteIncluded(r,keywords.BadChars,'');\\n LowerText := STD.Str.ToLowerCase(CleanText);\\n ReviewSet := STD.STR.SplitWords(LowerText,' '); //create the set of words \\n \\n Found := keywords.SearchTermsDS.Terms(Term IN ReviewSet);\\n RETURN NORMALIZE(keywords.SearchTermsDS,\\n LEFT.Terms(Term IN SET(Found, Term)),\\n TRANSFORM({STRING TermType,STRING Term},\\n SELF.TermType := LEFT.TermType,\\n SELF.Term := RIGHT.Term));\\nEND;\\n\\nreview1 := 'Total bill for this horrible service? Over $8Gs. These crooks actually had the nerve to charge us $69 for 3 pills. I checked online the pills can be had for 19 cents EACH! Avoid Hospital ERs at all costs.';\\nreview2 := 'The price was outrageoous! The waiter and dinner were both terrible. NEVER coming back!!';\\n\\nEvalReview(review1);\\nEvalReview(review2);
\", \"post_time\": \"2019-07-24 13:05:56\" },\n\t{ \"post_id\": 26963, \"topic_id\": 7173, \"forum_id\": 93, \"post_subject\": \"SETS VERIFICATION\", \"username\": \"elimar.macena\", \"post_text\": \"Bom dia, em um dos meus estudos de data mining encontrei uma certa dificuldade.\\n\\nEm texto de avaliações de restaurantes eu gostaria de filtrar qual ponto é mais relevante ao avaliar um estabelecimento(comida, servico, ambiente e etc). Um modo que pensei para esta verificação é a criação de um SET contendo as palavras chaves relacionadas a cada um desses pontos.\\n\\nO problema surgiu no momento que preciso verificar se o texto da avaliação possui alguma das palavras existentes nos set anteriormente citados. Não é possivel fazer a utilização do texto atrelado com a operação IN, uma vez que o texto é tratado como "uma unica" palavra e entao nao coincide com nenhuma informação do SET de palavras chaves, e o outro modo que havia pensado era a criação de um SET baseado no texto da avaliação, separando as suas palavras pelo espaço em branco entre elas, porem, novamente, não é possivel fazer o uso da operação IN entre dois SETS.\\n\\nSegue um trecho do codigo para melhor visualização.\\n\\nIMPORT STD;\\n\\nkeywords_FOOD := ['dinner', 'food', 'buffet', 'flavor', 'meal', 'meals', 'lunch'];\\n\\nkeywords_SERVICE := ['service', 'server', 'waitress', 'waiter', 'bill'];\\n\\nkeywords_TOILET := ['toilet', 'sink', 'bathrooms', 'bathroom'];\\n\\nkeywords_PRICE := ['pay', 'cost', 'expensive', 'cheap', 'payless'];\\n\\nkeywords_place := ['place', 'decor', 'ambiance', 'dirty', 'clean'];\\n\\nreview := 'Total bill for this horrible service? Over $8Gs. These crooks actually had the nerve to charge us $69 for 3 pills. I checked online the pills can be had for 19 cents EACH! Avoid Hospital ERs at all costs.';\\nset_review := STD.STR.SplitWords(review,' ');\\n\\nOUTPUT(review);\\nOUTPUT(set_review);
\\n\\nGostaria de saber se existe algum modo no qual eu possa comparar dois SETS e afirmar se existe pelo menos 1 elemento presente em ambos ou algum modo no qual eu possa verificar se uma frase possui palavras presentes em um SET.\\n\\nDesde já agradeço.\", \"post_time\": \"2019-07-23 14:32:40\" },\n\t{ \"post_id\": 27293, \"topic_id\": 7223, \"forum_id\": 93, \"post_subject\": \"Re: Erro de incompatibilidade de formato\", \"username\": \"mateus.andrade\", \"post_text\": \"Obrigado pelo suporte, Hugo!\", \"post_time\": \"2019-08-14 17:10:37\" },\n\t{ \"post_id\": 27283, \"topic_id\": 7223, \"forum_id\": 93, \"post_subject\": \"Re: Erro de incompatibilidade de formato\", \"username\": \"hwatanuki\", \"post_text\": \"Olá Mateus, \\n\\nSomente um adendo, os seguintes passos podem ser utilizados como alternativa enquanto o ticket do JIRA está aberto:\\n\\n1) Usar a opção "No Split" durante a operação de spray, e;\\n\\n2) Adicionar a seguinte opção de TEMPLATE LANGUAGE no início do arquivo .ecl:\\n\\n#option('validateFileType', false);\\n\\nAtt.,\\nHugo W\", \"post_time\": \"2019-08-14 17:07:47\" },\n\t{ \"post_id\": 27263, \"topic_id\": 7223, \"forum_id\": 93, \"post_subject\": \"Re: Erro de incompatibilidade de formato\", \"username\": \"hwatanuki\", \"post_text\": \"Olá Mateus, \\n\\nAbrimos um ticket no JIRA para investigação desse problema: (HPCC-22623) Error reading sprayed variable-length THOR/FLAT file. \\n\\nTe mantenho informado sobre o progresso do ticket.\\n\\nObrigado.\\nHugo\", \"post_time\": \"2019-08-07 01:20:07\" },\n\t{ \"post_id\": 27223, \"topic_id\": 7223, \"forum_id\": 93, \"post_subject\": \"Erro de incompatibilidade de formato\", \"username\": \"mateus.andrade\", \"post_text\": \"Durante o curso de AppliedECL me deparei com o seguinte erro: "File format mismatch reading file: 'online::mfa::appliedecl::personsaccts'. Expected type 'flat', but file is type 'csv'".\\n\\nSegue em anexo o arquivo utilizado, bem como uma captura de tela das configurações de spray.\\n\\nÉ possivel ver o conteúdo do arquivo executando:\\n\\nIMPORT $;\\nOUTPUT($.File_PersonsAccounts.Persons);
\\n\\nPorém qualquer operação adicional causa o erro reportado acima, conforme o exemplo abaixo:\\n\\nIMPORT $;\\nOUTPUT($.File_PersonsAccounts.PersonsParent);
\\n\\nOBS: No arquivo [attachment=2:3q8tq2hn]OnlinePersonsAccts.txt foi adicionado a extensão '.txt' porque a plataforma não aceita arquivos sem extensão, favor desconsiderar.\", \"post_time\": \"2019-08-06 11:54:10\" },\n\t{ \"post_id\": 28523, \"topic_id\": 7563, \"forum_id\": 93, \"post_subject\": \"Re: Definir tempo de evento (em segundos) usando "CRON"\", \"username\": \"hwatanuki\", \"post_text\": \"Olá Eudes, \\n\\nO parâmetro de tempo da função CRON em ECL segue o padrão de especificação do Unix e, nesse caso, o mínuto seria a menor unidade de tempo passível de especificação.\\n\\nUma alternativa para definir um período menor de tempo seria utilizar template language em ECL para, por meio de um loop iterativo, executar uma ação ou função a cada 10 segundos. Vide exemplo de código abaixo:\\n\\n\\nIMPORT STD;\\n\\n#DECLARE (N);\\n#SET (N,1);\\n#LOOP \\n\\t#IF (%N%>5)\\n\\t\\t#BREAK\\n\\t#ELSE \\n\\t\\tSTD.Date.CurrentTime(); \\n\\t\\tSTD.System.Debug.Sleep(10000);\\n\\t\\t#SET (N, %N%+1);\\n\\t#END;\\n#END;\\n
\\n\\nNesse caso, a CRON ainda poderia ser configurada para executar a cada minuto, mas invocando o código em template language acima para ser executado a cada 10 segundos.\\n\\nEspero ter ajudado.\\n\\nAtt.,\\nHugo W\", \"post_time\": \"2019-12-19 02:59:37\" },\n\t{ \"post_id\": 28463, \"topic_id\": 7563, \"forum_id\": 93, \"post_subject\": \"Definir tempo de evento (em segundos) usando "CRON"\", \"username\": \"eudes.edu\", \"post_text\": \"Olá estou estudando uma função cujo o evento é definido por, WHEN(CRON('* * * * *'), ou seja, a função ativa a cada 1 minuto.\\n\\nEu gostaria de definir um tempo menor, por exemplo, a cada "10 segundos", mas não encontrei uma forma de fazer isso, pois os parâmetros de tempo da CRON no HPCC são: minute, hour, dom, month, dow. Vocês conhecem alguma forma de alterar a condição de tempo para segundos?\\n\\nCRON: https://hpccsystems.com/training/docume ... /CRON.html\\n\\nDesde já agradeço,\\nEudes Santos\", \"post_time\": \"2019-12-13 13:53:49\" },\n\t{ \"post_id\": 29333, \"topic_id\": 7703, \"forum_id\": 93, \"post_subject\": \"Re: Função STD.Str.Contains talvez não esteja funcionando di\", \"username\": \"Hellesandro\", \"post_text\": \"Obrigado pela resposta!\", \"post_time\": \"2020-02-04 18:00:40\" },\n\t{ \"post_id\": 29293, \"topic_id\": 7703, \"forum_id\": 93, \"post_subject\": \"Re: Função STD.Str.Contains talvez não esteja funcionando di\", \"username\": \"hwatanuki\", \"post_text\": \"Olá Hellesandro,\\n\\nSomente reforçando o que o Richard comentou acima, a função STD.str.Contains retorna um valor booleano que será TRUE toda vez que TODOS os caracteres fornecidos como segundo parâmetro da função forem detectados na STRING especificado no primeiro parâmetro da função, independentemente da ordem ou posição desses caracteres. \\n\\nNo caso de um código como:\\n\\n\\nIMPORT STD;\\n\\ndFruitNFamily := DATASET([{'Apple'}, {'Apricot'}, {'Avocado'}, {'Banana'},\\n {'Blackcurrant'}, {'Blackberry'}, {'Blueberry'}, {'Cherry'}, \\n {'Coconut'}, {'Fig'}, {'Grape'}, {'Kiwi Fruit'}, {'Lemon'}, {'Lime'},\\n {'Lychee'}, {'Mango'}, {'Nectarine'}, {'Orange'}, {'Papaya'},\\n {'Passion Fruit'}, {'Peach'}, {'Pear'}, {'Pineapple'}, {'Plum'},\\n {'Quince'}, {'Raspberry'}, {'Strawberry'}, {'Watermelon'}],\\n {STRING Name});\\n\\nOUTPUT(dFruitNFamily(STD.Str.Contains(Name, 'pe', true)),, NAMED('Contains_pe'));\\nOUTPUT(dFruitNFamily(STD.Str.Contains(Name, 'pp', true)),, NAMED('Contains_pp'));\\n\\n
\\n\\nA primeira saída retornará todos os registros do dataset dFruitNFamily que contêm simultaneamente os caracteres "p" e "e" ou "P" e "e" ou "p" e "E" ou "P" e "E" no campo Name, tais como 'Apple', 'Grape', 'Peach', 'Pineapple' e 'Raspberry'.\\nA segunda saída retornará todos os registros do dataset dFruitNFamily que contêm simultaneamente os caracteres "p" e "p" ou "p" e "P" ou "P" e "P" no campo Name, tais como 'Apple', 'Papaya', 'Pineapple'.\\n\\nPara o seu objetivo original, uma alternativa seria usar a função STD.Str.WildMatch. Por exemplo, na primeira saída, você poderia usar: \\n\\nOUTPUT (dFruitNFamily (STD.Str.WildMatch (Name, '* pe *', true)) ,, NAMED ('Contains_pe'));\\n
\\n\\nEspero ter ajudado a esclarecer o uso dessa função.\\n\\nHugo Watanuki\", \"post_time\": \"2020-02-03 17:52:59\" },\n\t{ \"post_id\": 29283, \"topic_id\": 7703, \"forum_id\": 93, \"post_subject\": \"Re: Função STD.Str.Contains talvez não esteja funcionando di\", \"username\": \"rtaylor\", \"post_text\": \"Hellesandro,\\n\\nThe Contains function's documentation says:The Contains functions return true if all the characters in the pattern appear in the source, otherwise they return false.
so it will return every fruit that contains all the specified letters, in any order.\\n\\nHere's the way I tested it for your scenario:IMPORT Std;\\ns := ['Apple', 'Grape', 'Papaya', 'Peach', 'Pear', 'Pineapple', 'Raspberry' ];\\nds := DATASET(s,{STRING w});\\n\\nds(Std.Str.Contains(w,'pe',0)); //Apple,Grape,Pineapple,Raspberry\\nds(Std.Str.Contains(w,'pe',1)); //Apple,Grape,Peach,Pear,Pineapple,Raspberry\\nds(Std.Str.Contains(w,'per',0)); //Grape,Raspberry\\nds(Std.Str.Contains(w,'per',1)); //Grape,Pear,Raspberry
\\nIOW, this function is not looking for patterns, just the presence of all characters. If you need to look for specific patterns, then you should look at using regular expressions (like the REGEXFIND and REGEXREPLACE functions) or use ECL's PARSE technology.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-02-03 14:50:11\" },\n\t{ \"post_id\": 29263, \"topic_id\": 7703, \"forum_id\": 93, \"post_subject\": \"Função STD.Str.Contains talvez não esteja funcionando direit\", \"username\": \"Hellesandro\", \"post_text\": \"Estive utilizando a função STD.Str.Contains com o objetivo de identificar uma substring dentro de uma string mas o resultado não foi conforme o esperado, a função STD.Str.Contains está encontrando resultados em que não só o padrão informado foi encontrado, mas os caracteres individuais em ordens diferentes também.\\n\\nPara simplificar o problema, suponha que temos um dataset de frutas contendo apenas nomes de frutas como "Apple", "Grape", "Papaya", "Peach", "Pear", "Pineapple", "Raspberry" e outras.\\n\\nAplicando a função STD.Str.Contians com o padrão "pe", o que deveria ser retornado?\\n1) Grape, Peach, and Pear\\n2) Apple, Grape, Peach, Pear, Pineapple, Raspberry\\n\\nE para o padrão "pp"?\\n\\nVerifique a resposta aqui: http://10.173.248.1:8010/esp/files/stub ... 128-144013\\n\\nA função de fato deveria procurar pela presença de cada caractére da string ou encontrar todo o padrão/substring dentro da string? \\n\\nDê uma olhada em outro exemplo do uso dessa função na tentantiva de encontrar uma string que contenha a substring "HANDGUN": http://10.173.248.1:8010/esp/files/stub ... 128-134352\\n\\nEspecialmente as linhas 3, 7, 9, 10, 11, 12, 14 ...\", \"post_time\": \"2020-01-31 20:03:00\" },\n\t{ \"post_id\": 32233, \"topic_id\": 8393, \"forum_id\": 93, \"post_subject\": \"Re: "ERROR: JOIN 'denormedrecs' contains no equality conditi\", \"username\": \"hwatanuki\", \"post_text\": \"Olá Paulo, \\n\\nComo você está usando um intervalo de valores na condição de ligação entre os dois dataset´s (por meio do BETWEEN), é necessário incluir a opção "ALL" na função DENORMALIZE.\\n\\nVeja abaixo, um exemplo de código funcional para esse DERNOMALIZE baseado na proposta do Richard acima.\\n\\nHTH,\\nhwatanuki\\n\\nIMPORT $;\\n\\nDenormedRec := RECORD\\n $.File_BrunozziLblAnalise.Rec;\\n UNSIGNED1 NumRows ;\\n DATASET($.File_BrunozziAgroDB.Layout) Children ;\\nEND;\\n\\nBrunozziDB := $.File_BrunozziAgroDB.File;\\nSORT(BrunozziDB,CodGrupo);\\nBrunozziGRP := $.File_BrunozziLblAnalise.File;\\n\\nDenormedRec ParentLoad($.File_BrunozziLblAnalise.Rec Le) := TRANSFORM\\n SELF.NumRows := 0;\\n SELF.Children := [];\\n SELF := Le;\\nEND;\\nParentTbl:= PROJECT(BrunozziGRP,ParentLoad(LEFT));\\n\\nParentTbl DeNormThem(ParentTbl Le, BrunozziDB Ri , INTEGER Ct) := TRANSFORM\\n SELF.NumRows := Ct;\\n SELF.Children := Le.Children + Ri;\\n SELF := Le;\\nEND;\\n\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\nLoseDots(STRING9 s) := s[1..2] + s[4..5] + s[7..9];\\nDeNormedRecs := DENORMALIZE(ParentTbl,BrunozziDB,\\n\\t\\t LoseDots(RIGHT.CodGrupo) BETWEEN \\n LoseDots(LEFT.LabelInicial) AND LoseDots(LEFT.LabelFinal),\\n\\t\\t\\t DeNormThem(LEFT,RIGHT,COUNTER), ALL);\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\nOUTPUT(DeNormedRecs,NAMED('BrunoziChildDataset'));
\", \"post_time\": \"2020-10-15 19:58:54\" },\n\t{ \"post_id\": 32223, \"topic_id\": 8393, \"forum_id\": 93, \"post_subject\": \"Re: "ERROR: JOIN 'denormedrecs' contains no equality conditi\", \"username\": \"PAULO FERNANDO BRUNOZI\", \"post_text\": \"Hi,\\nI have changed the DENORMALIZE logic using LoseDots string function, but got the same error message:\\n\\nError: JOIN denormedrecs contains no equality conditions - use ,ALL to allow (43, 17 - C:\\\\Users\\\\Public\\\\Documents\\\\HPCC Systems\\\\ECL\\\\My Files\\\\BrunozziAgro\\\\DenormalizeBrunozi.ecl)\\n\\nAny insight?\", \"post_time\": \"2020-10-15 19:35:31\" },\n\t{ \"post_id\": 32213, \"topic_id\": 8393, \"forum_id\": 93, \"post_subject\": \"Re: "ERROR: JOIN 'denormedrecs' contains no equality conditi\", \"username\": \"PAULO FERNANDO BRUNOZI\", \"post_text\": \"Hi Richard, Thanks a lot. I will test your suggestion that looks very simple and clear.\\n\\nIn my previous logic, I have realized error, so I have changed by MAP command detail bellow and tested ok on separeted test ECL, but still getting the same error message when run main ECL. This is the MAP logic is bellow \\n\\n*********\\n DeNormedRecs := DENORMALIZE(ParentTbl,BrunozziDB,\\n MAP(\\n (((INTEGER2)LEFT.LabelFinal[1..2]-(INTEGER2)LEFT.LabelInicial[1..2]) > ((INTEGER2)RIGHT.CodGrupo[1..2]-(INTEGER2)LEFT.LabelInicial[1..2])) AND ((INTEGER2)RIGHT.CodGrupo[1..2] > (INTEGER2)LEFT.LabelInicial[1..2]) => TRUE,\\n\\t (((INTEGER2)LEFT.LabelFinal[1..2]-(INTEGER2)LEFT.LabelInicial[1..2]) = ((INTEGER2)RIGHT.CodGrupo[1..2]-(INTEGER2)LEFT.LabelInicial[1..2])) AND ((INTEGER2)RIGHT.CodGrupo[4..5] >= (INTEGER2)LEFT.LabelInicial[4..5]) AND ((INTEGER2)RIGHT.CodGrupo[4..5] < (INTEGER2)LEFT.LabelFinal[4..5]) => TRUE,\\n\\t (((INTEGER2)LEFT.LabelFinal[1..2]-(INTEGER2)LEFT.LabelInicial[1..2]) = ((INTEGER2)RIGHT.CodGrupo[1..2]-(INTEGER2)LEFT.LabelInicial[1..2])) AND ((INTEGER2)RIGHT.CodGrupo[4..5] = (INTEGER2)LEFT.LabelFinal[4..5]) AND ((INTEGER2)LEFT.LabelInicial[7..9] <= (INTEGER2)RIGHT.CodGrupo[7..9]) AND ((INTEGER2)RIGHT.CodGrupo[7..9] <= (INTEGER2)LEFT.LabelFinal[7..9]) => TRUE,\\n\\t FALSE),\\n\\t DeNormThem(LEFT,RIGHT,COUNTER));\\n\\nBest regards,\", \"post_time\": \"2020-10-15 18:56:13\" },\n\t{ \"post_id\": 32203, \"topic_id\": 8393, \"forum_id\": 93, \"post_subject\": \"Re: "ERROR: JOIN 'denormedrecs' contains no equality conditi\", \"username\": \"rtaylor\", \"post_text\": \"Paulo,\\n\\nI can suggest one improvement:\\nLoseDots(STRING9 s) := s[1..2] + s[4..5] + s[7..9];\\n\\nDeNormedRecs := DENORMALIZE(ParentTbl,BrunozziDB,\\n LoseDots(RIGHT.CodGrupo) BETWEEN \\n LoseDots(LEFT.LabelInicial) AND LoseDots(LEFT.LabelFinal),\\n DeNormThem(LEFT,RIGHT,COUNTER));
The BETWEEN operator duplicates your comparison logic more succinctly, and eliminating the periods in the data makes it easy to compare just the numeric values.\\n\\nHTH,\\n\\nRichard\", \"post_time\": \"2020-10-15 15:15:50\" },\n\t{ \"post_id\": 32183, \"topic_id\": 8393, \"forum_id\": 93, \"post_subject\": \""ERROR: JOIN 'denormedrecs' contains no equality condition -\", \"username\": \"PAULO FERNANDO BRUNOZI\", \"post_text\": \"[attachment=2:29cxsmbd]DenormalizeBrunozi.eclAo executar a função DENORMALIZE, ocorre o erro: "ERROR: JOIN 'denormedrecs' contains no equality condition - use ALL to allow (26,17) error code 4162. It is attached the ecl files[code][/code] The error descrition looks clear, it is not been matching noone condition defined on the DENORMALIZE function in order to return the expected result. Please, could you check if the code is correct? If am I doing any error? I have used the DENORMALIZE example of workshop last Tuesday. Best regards. \\nAdditional information: Denormalize will use the range defined by columns LabelInicial and LabelFinal on the parent table BrunozziGRP see . The values of the column CodGrupo see of BrunozziDB (data base) will be validated if is in the range described before, in order to apply the denormalization.\\n\\nI realized my logic is not correct, however it should work for some itens, I think. Maybe a solution could be create a C++ function that could test.\", \"post_time\": \"2020-10-14 22:18:48\" },\n\t{ \"post_id\": 33283, \"topic_id\": 8683, \"forum_id\": 93, \"post_subject\": \"Re: Iterar pelas colunas de um dataset\", \"username\": \"AndreFB\", \"post_text\": \"O código funcionou muito bem. Muito obrigado!\", \"post_time\": \"2021-03-26 20:42:43\" },\n\t{ \"post_id\": 33273, \"topic_id\": 8683, \"forum_id\": 93, \"post_subject\": \"Re: Iterar pelas colunas de um dataset\", \"username\": \"OlivAl01\", \"post_text\": \"Olá André. \\nA abordagem recomendada para problemas nos quais não se pode referenciar diretamente campos do dataset (como linhas, colunas, etc) é utilizando template language: https://hpccsystems.com/training/documentation/ecl-language-reference/html/Templates.html#:~:text=The%20Template%20language%20is%20a,to%20implement%20the%20user's%20choices.\\n\\nAcredito que o código a seguir seja o que você procura.\\n\\n//*****************************\\n//This is test data -- requirement being #records = # of REAL fields \\nfilelayout := RECORD\\n UNSIGNED myid;\\n REAL f1;\\n REAL f2;\\n REAL f3;\\n REAL f4;\\nEND;\\n\\nGFred := DATASET([ {1, 11.0, 12.0, 13.0, 99.0},\\n {2, 21.0, 22.0, 23.0, 24.0},\\n {3, 31.0, 32.0, 33.0, 34.0},\\n {4, 41.0, 42.0, 43.0, 44.0}\\n ],filelayout);\\n\\t\\n// ThisDS := MyMod.Myfile;\\t\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n// ****************************\\n//makes the code not reliant on any specific file/record structure names\\nds := 'GFred'; \\t\\t\\t\\t\\t\\t\\t\\t\\t//constant for the file def name\\nthislayout := RECORDOF(GFred); \\t//replace ThisDS with whatever filename is actually used\\n\\n// ****************************\\n #EXPORTXML(Fred,thislayout);\\n #DECLARE(outputStr)\\n #DECLARE(ProjStr)\\n #DECLARE(FldStr)\\n #DECLARE (Ndx)\\n #SET (Ndx, 0); \\n #FOR (Fred)\\n #FOR (Field) \\n #SET (Ndx, %Ndx% + 1)\\n #IF ( %Ndx% = 1)\\n #SET(outputStr,'SetVals := [')\\n #SET(ProjStr,'thislayout XF(OneRec L, INTEGER C) := TRANSFORM\\\\n SELF.' + %'{@label}'% + ' := C;\\\\n')\\n #ELSEIF ( %Ndx% = 2)\\n #APPEND(outputstr,ds + '.' + %'{@label}'%);\\n #APPEND(projstr,' SELF.' + %'{@label}'% + ' := CORRELATION(' + ds + ',SetVals[C],' + %'{@label}'% + ');\\\\n');\\n #ELSE \\n #APPEND(outputstr,',' + ds + '.' + %'{@label}'%);\\n #APPEND(projstr,' SELF.' + %'{@label}'% + ' := CORRELATION(' + ds + ',SetVals[C],' + %'{@label}'% + ');\\\\n');\\n #END\\n #END\\n #END\\n #APPEND(outputstr,'];\\\\nOutRecCnt := COUNT(SetVals);\\\\nOneRec := DATASET([{0}],{UNSIGNED1 h});');\\n #APPEND(projstr,'END;\\\\n P := NORMALIZE(OneRec,OutRecCnt,XF(LEFT,COUNTER));\\\\n');\\n %outputstr%;\\n %projstr%;\\n\\t\\t \\n GenCode := %'outputstr'% + '\\\\n' + %'projstr'%;\\n\\t //produces this code:\\n // SetVals := [GFred.f1,GFred.f2,GFred.f3,GFred.f4];\\n\\t\\t// OutRecCnt := COUNT(SetVals);\\n\\t\\t// OneRec := DATASET([{0}],{UNSIGNED1 h});\\n\\t\\t// thislayout XF(OneRec L, INTEGER C) := TRANSFORM\\n\\t\\t\\t// SELF.myid := C;\\n\\t\\t\\t// SELF.f1 := CORRELATION(GFred,SetVals[C],f1);\\n\\t\\t\\t// SELF.f2 := CORRELATION(GFred,SetVals[C],f2);\\n\\t\\t\\t// SELF.f3 := CORRELATION(GFred,SetVals[C],f3);\\n\\t\\t\\t// SELF.f4 := CORRELATION(GFred,SetVals[C],f4);\\n\\t\\t// END;\\n\\t\\t // P := NORMALIZE(OneRec,OutRecCnt,XF(LEFT,COUNTER));\\n\\nOUTPUT(GenCode);\\t\\nOUTPUT(P);\\t\\n
\\n\\nRepare que a estrutura record é exportada como XML (#EXPORTXML). Isso permite que seja possível utilizar o #FOR, uma estrutura de loop que percorre o arquivo XML exportado. Caso persistam dúvidas a respeito do trecho de código referente à exportação do arquivo como XML, há esse outro exemplo retirado da própria documentação da linguagem: https://hpccsystems.com/training/documentation/ecl-language-reference/html/_EXPORTXML.html\", \"post_time\": \"2021-03-25 13:42:11\" },\n\t{ \"post_id\": 33263, \"topic_id\": 8683, \"forum_id\": 93, \"post_subject\": \"Iterar pelas colunas de um dataset\", \"username\": \"AndreFB\", \"post_text\": \"Eu tenho um dataset com mais de 30 colunas e gostaria de calcular a correlação entre cada uma delas (e de preferência colocar os resultados em uma matriz de correlação). Existe alguma forma de criar um loop que roda a função CORRELATION() para todos os pares de colunas possíveis?\\n\\nUm exemplo para deixar mais claro:\\nDado um dataset de colunas y, x1, x2 e x3, gostaria de criar a seguinte tabela com as correlações entre cada uma (em anexo).[attachment=0:34ag2tb0]matriz de correlação.PNG\", \"post_time\": \"2021-03-24 18:06:42\" },\n\t{ \"post_id\": 33533, \"topic_id\": 8793, \"forum_id\": 93, \"post_subject\": \"Série de podcasts de 10 anos de HPCC Systems\", \"username\": \"HPCC Staff\", \"post_text\": \"Confira esta entrevista de nossa série de podcasts de aniversário de 10 anos apresentando Claudio Amaral, Diretor Sênior de Engenharia de Software, e Hugo Watanuki, Engenheiro de Software Sênior. Neste episódio, eles discutem como o HPCC Systems se envolveu com a academia no Brasil e o impacto que isso causou na comunidade internacional em geral.\\n\\nhttps://wiki.hpccsystems.com/display/hpcc/10+Year+Anniversary+Podcast+Series#id-10YearAnniversaryPodcastSeries-claudioandhugo\", \"post_time\": \"2021-05-14 16:10:24\" },\n\t{ \"post_id\": 33573, \"topic_id\": 8803, \"forum_id\": 93, \"post_subject\": \"Re: Iterar pelas colunas para rodar várias regressões\", \"username\": \"hwatanuki\", \"post_text\": \"Olá AndreFB,\\n\\nPensei em usar uma MACRO e recursos de template language (https://learn.lexisnexis.com/Activity/1342) para automatizar pelo menos o processo de geração do código para cada iteração do processo que você descreve. \\n\\nA MACRO do exemplo abaixo gera amostras de treinamento e teste para regressões logísticas baseadas em diferentes combinações das variáveis independentes e produz como resultado final a matriz de confusão de cada regressão. \\n\\nEsse exemplo considera que você esta na iteração 4 e, portanto, já estabeleceu as três primeiras variáveis independentes do modelo. Após rodar a iteração 4 e escolher a 4º variável independente do modelo, basta usar o mesmo código para incluir a quarta variável e rodar a iteração 5; e assim sucessivamente... O processo inverso também é valido para rodar iterações com menos variáveis.\\n\\nHTH,\\nhwatanuki\\n\\n\\nEXPORT MacGenStepWiseIter4 (v1,v2,v3):= MACRO //update the number of input tokens \\n //based on your iteration number. \\n //For instance, for iteration no 5, \\n //use "(v1,v2,v3,v4)"\\n\\n// sample dataset\\nmyrec := RECORD\\n UNSIGNED id; \\n REAL field1;\\n REAL field2;\\n REAL field3;\\n REAL field4;\\n REAL field5;\\n REAL label;\\nEND;\\n\\nmyds := DATASET([{1,5.0,6.0,7.0,8.0,9.0,0},\\n {2,10.0,12.0,14.0,16.0,18.0,0},\\n {3,15.0,18.0,21.0,24.0,27.0,0},\\n {4,20.0,18.0,21.0,24.0,27.0,1},\\n {5,25.0,30.0,21.0,24.0,27.0,1},\\n {6,30.0,36.0,42.0,24.0,27.0,1},\\n {7,35.0,42.0,49.0,56.0,63.0,0},\\n {8,40.0,42.0,49.0,56.0,63.0,1},\\n {9,45.0,54.0,49.0,56.0,63.0,1},\\n {10,50.0,60.0,70.0,80.0,90.0,0}\\n\\t\\t],myrec);\\n\\n// myds := $.MyFile; //update to use your own input DS\\n// myrec:=RECORDOF(myds); \\n\\t\\t\\t\\t\\t\\t\\t\\t \\n#EXPORTXML(layout,myrec);\\n#DECLARE(myset);\\n#DECLARE(Ndx);\\n#SET(Ndx,0); \\n#FOR(layout);\\n #FOR(Field);\\n #IF(%'{@label}'%<>'id'); // rename the 'id' string value to match your recid fieldname\\n #SET(Ndx,%Ndx% + 1);\\n #IF(%Ndx%=1);\\n #SET(myset,'IMPORT $; \\\\n IMPORT ML_Core; \\\\n IMPORT ML_Core.Types; \\\\n IMPORT LogisticRegression as LR; \\\\n MySetFld := ['); \\n\\t #APPEND(myset,'myds.'+%'{@label}'%);\\n\\t #ELSE\\n\\t #APPEND(myset,','+ 'myds.'+%'{@label}'%);\\n\\t #END\\n #END\\n #END\\n#END\\n#APPEND(myset,'];\\\\nCntFld := COUNT(MySetFld);');\\n\\n#DECLARE(Iteration);\\n#SET(Iteration,4); //update the symbol value for other iterations. \\n //For instance, for iteration no 5, use "#SET(Iteration,5);".\\n#DECLARE(Combination);\\n#SET(Combination,0);\\n#DECLARE(MainCode);\\n#SET(MainCode,'');\\n#DECLARE(var1);\\n#DECLARE(var2);\\n#DECLARE(var3); //include or remove var´s according to your iteration number. \\n //For instance, for iteration no 5, include "#DECLARE(var4);".\\n#DECLARE(TotVar);\\n#SET(var1,v1);\\n#SET(var2,v2);\\n#SET(var3,v3); //include or remove symbol values according to your \\n //iteration number. For instance, for iteration no 5, \\n //include "#SET(var4,v4);".\\n#SET(TotVar,%Iteration%+1);\\n#LOOP\\n #IF(%Combination%<%Ndx%-1);\\n #SET(Combination,%Combination% + 1);\\n #IF(%Combination%<>%Var1%);\\n #IF(%Combination%<>%Var2%);\\n #IF(%Combination%<>%Var3%); //include or remove var´s according to your \\n //iteration number. For instance, for iteration \\n //no 5, include "#IF(%Combination%<>%Var4%);".\\n #APPEND(MainCode,'myTrainData'+%'iteration'%+'_'+%'combination'%+' := \\n TABLE(myds[1..7],{myds.id, '+ //update the size of the \\n //training sample\\n\\t 'MySetFld['+%'var1'%+'], '+\\n\\t\\t\\t 'MySetFld['+%'var2'%+'], '+\\n\\t\\t\\t 'MySetFld['+%'var3'%+'], '+ //include or remove var´s \\n //according to your \\n //iteration number. For \\n //instance, for iteration \\n //no 5, include \\n //"'MySetFld['+%'var4'%+'],'+".\\n\\t\\t\\t 'MySetFld['+%'combination'%+'], '+\\n\\t 'MySetFld[CntFld]}); \\\\n');\\n #APPEND(MainCode,'ML_Core.ToField(myTrainData'+%'iteration'%+'_'+%'combination'%+', \\n myTrainDataNF'+%'iteration'%+'_'+%'combination'%+'); \\\\n');\\n #APPEND(MainCode,'XTrain'+%'iteration'%+'_'+%'combination'%+' := myTrainDataNF'+%'iteration'%+'_'+%'combination'%+'(number < '+%'TotVar'%+'); \\\\n' +\\n 'YTrain'+%'iteration'%+'_'+%'combination'%+' := PROJECT(myTrainDataNF'+%'iteration'%+'_'+%'combination'%+'(number =' +%'TotVar'%+'), '+\\n 'TRANSFORM(Types.DiscreteField, '+\\t\\t\\t\\t\\t\\t\\t \\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t \\n 'SELF.number := 1, '+\\n 'SELF := LEFT)); \\\\n');\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t \\n #APPEND(MainCode,'mod_bi'+%'iteration'%+'_'+%'combination'%+' := LR.BinomialLogisticRegression().getModel(XTrain'+%'iteration'%+'_'+%'combination'%+', '+\\n\\t YTrain'+%'iteration'%+'_'+%'combination'%+');\\\\n'); \\n\\n #APPEND(MainCode,\\n\\t 'myTestData'+%'iteration'%+'_'+%'combination'%+' := TABLE(myds[8..10],{myds.id, '+ //update the size of the test sample\\n\\t 'MySetFld['+%'var1'%+'], '+\\n\\t 'MySetFld['+%'var2'%+'], '+\\n\\t 'MySetFld['+%'var3'%+'], '+ //include or remove var´s according to your \\n //iteration number. For instance, for \\n //iteration no 5, include \\n //"'MySetFld['+%'var4'%+'], '+".\\n\\t 'MySetFld['+%'combination'%+'], '+\\n\\t 'MySetFld[CntFld]});\\\\n');\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t \\n #APPEND(MainCode, \\n 'ML_Core.ToField(myTestData'+%'iteration'%+'_'+%'combination'%+', '+\\n \\t 'myTestDataNF'+%'iteration'%+'_'+%'combination'%+'); \\\\n');\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\n #APPEND(MainCode,\\n\\t 'XTest'+%'iteration'%+'_'+%'combination'%+' := myTestDataNF'+%'iteration'%+'_'+%'combination'%+'(number <' +%'TotVar'%+'); \\\\n' +\\n 'YTest'+%'iteration'%+'_'+%'combination'%+' := PROJECT(myTestDataNF'+%'iteration'%+'_'+%'combination'%+'(number =' +%'TotVar'%+'), '+\\n\\t\\t'TRANSFORM(Types.DiscreteField, '+\\n\\t\\t\\t\\t\\t\\t'SELF.number := 1, '+\\n\\t\\t\\t\\t\\t\\t'SELF := LEFT)); \\\\n');\\n\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t \\n #APPEND(MainCode,\\n 'predict'+%'iteration'%+'_'+%'combination'%+' := \\nLR.BinomialLogisticRegression().Classify(mod_bi'+%'iteration'%+'_'+%'combination'%+', '+\\n'XTest'+%'iteration'%+'_'+%'combination'%+'); \\\\n');\\n \\n #APPEND(MainCode,\\n 'conf_matrix'+%'iteration'%+'_'+%'combination'%+' := \\n LR.Confusion(Ytest'+%'iteration'%+'_'+%'combination'%+','+\\n 'predict'+%'iteration'%+'_'+%'combination'%+'); \\\\n' +\\n 'bin_matrix'+%'iteration'%+'_'+%'combination'%+' := LR.BinomialConfusion(conf_matrix'+%'iteration'%+'_'+%'combination'%+'); \\\\n' +\\n 'OUTPUT(bin_matrix'+%'iteration'%+'_'+%'combination'%+', \\n NAMED(\\\\'Accuracy'+%'iteration'%+'_'+%'combination'%+'\\\\')); \\\\n');\\n #END\\n #END\\n\\t #END \\n #ELSE \\n\\t #BREAK\\n #END\\n #END\\n\\n\\nGenCode := %'myset'% + '\\\\n' + %'MainCode'%;\\nOUTPUT(GenCode);\\n\\n%myset%;\\n%MainCode%;\\n\\nENDMACRO;\\n
\", \"post_time\": \"2021-05-25 02:38:55\" },\n\t{ \"post_id\": 33543, \"topic_id\": 8803, \"forum_id\": 93, \"post_subject\": \"Iterar pelas colunas para rodar várias regressões\", \"username\": \"AndreFB\", \"post_text\": \"Eu tenho um dataset com cerca de 30 colunas e gostaria de rodar diversos modelos de regressão logística, com diferentes combinações de colunas, para analisar seus resultados. Por exemplo, suponhamos que eu já tenha 3 variáveis fixas e queira rodar 27 modelos de regressão logística, cada um com as 3 fixas mais uma das restantes (etapa 1). Depois, escolho a melhor e rodo mais 26 modelos, cada um com 4 fixas mais uma das restantes (etapa 2). Existe alguma forma de automatizar esse processo? \\nA escolha da variável fixa não precisa necessariamente ser automatizada, se apenas cada etapa fosse automatizada, já me ajudaria bastante. Nesse caso seria importante que a matriz de confusão de cada regressão fosse mostrada na forma de um OUTPUT para que eu possa analisar manualmente qual será a próxima variável escolhida.\\nAgradeço desde já.\", \"post_time\": \"2021-05-21 18:49:06\" },\n\t{ \"post_id\": 33623, \"topic_id\": 8833, \"forum_id\": 93, \"post_subject\": \"Re: Conversão SET OF STRING para STRING\", \"username\": \"rtaylor\", \"post_text\": \"federal,\\n\\nHere's a simple function that does what you need:\\noneStr(SET OF STRING s) := FUNCTION\\n ds := DATASET(s,{STRING c}); //treat the set as a dataset\\n RETURN ROLLUP(ds,TRUE,TRANSFORM({STRING c},SELF.c := LEFT.c + RIGHT.c))[1].c;\\n //roll up to a one-record dataset and return the c field value from that record\\nEND;\\n\\nsetStr := ['A', 'B', 'C'];\\noneStr(setStr);
\\nHTH,\\n\\nRichard\", \"post_time\": \"2021-06-01 18:39:00\" },\n\t{ \"post_id\": 33613, \"topic_id\": 8833, \"forum_id\": 93, \"post_subject\": \"Conversão SET OF STRING para STRING\", \"username\": \"eidg\", \"post_text\": \"Olá,\\n\\nÉ possível converter um SET OF STRING para STRING de forma direta?\\n\\nExemplo:\\nEntrada -> ['A','B','C']\\nSaída (Esperada) -> 'ABC'\\n\\nCaso não seja possível de forma direta, existe alguma maneira para realizar esta operação?\\n\\nDesde já agradeço.\", \"post_time\": \"2021-06-01 12:32:37\" }\n];\n\nexport function getPosts() {\n\treturn posts;\n}","import React from \"react\";\r\nimport { Link, useParams, useSearchParams } from \"react-router-dom\";\r\nimport { XCircleIcon } from \"@primer/octicons-react\";\r\nimport DOMPurify from \"dompurify\";\r\nimport { getForums } from \"../data/forums\";\r\nimport { getTopics } from \"../data/topics\";\r\nimport { getPosts } from \"../data/posts\";\r\nimport sortBy from \"../util/sortBy\";\r\n\r\nexport default function Posts() {\r\n const params = useParams();\r\n // const [filter, setFilter] = React.useState(\"\");\r\n const [searchParams, setSearchParams] = useSearchParams();\r\n\r\n let posts = sortBy(\r\n getPosts()\r\n .filter(post => post.forum_id.toString() === params.forumId && post.topic_id.toString() === params.topicId)\r\n .filter(post => {\r\n const filter = searchParams.get(\"filter\");\r\n if (!filter) return true;\r\n const subject = post.post_subject.toLowerCase();\r\n const text = post.post_text.toLowerCase();\r\n let match = false;\r\n filter.split(\" \").forEach(w => {\r\n const word = w.toLowerCase();\r\n if (subject.includes(word) || text.includes(word)) {\r\n match = true;\r\n }\r\n });\r\n return match;\r\n }),\r\n { prop: \"post_time\", desc: false, parser: (d) => new Date(d) }\r\n );\r\n const topicName = getTopics().filter(topic => topic.topic_id.toString() === params.topicId)[0]?.topic_title ?? \"\";\r\n const forumName = getForums().filter(forum => forum.forum_id.toString() === params.forumId)[0]?.forum_name ?? \"\";\r\n\r\n let filterTimeout;\r\n const filterPosts = query => {\r\n clearTimeout(filterTimeout);\r\n if (!query) setSearchParams({});\r\n\r\n filterTimeout = setTimeout(() => {\r\n setSearchParams({ filter: query });\r\n }, 500)\r\n };\r\n\r\n const clearFilter = React.useCallback(() => {\r\n if (searchParams.get(\"filter\") !== \"\") {\r\n document.getElementById(\"filter\").value = \"\";\r\n setSearchParams({ filter: \"\" });\r\n }\r\n }, [searchParams, setSearchParams]);\r\n\r\n return (\r\n \r\n Forums\r\n » \r\n {forumName !== \"\" ? forumName : \"Topics\"}\r\n » \r\n {topicName}\r\n
\r\n {post.post_subject}
\r\n \n \n
Forum Archive
\n